diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-04 00:35:32 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-15 10:06:21 +0900 |
| commit | 6f880873fbc49f44df6d2d19a76ceeb4052890c0 (patch) | |
| tree | f96e2a2f8d4ade94ab60e2aa0d113db08090b79d | |
| parent | 5630b305cbc8537dfdd92ac9e3118605e9464e83 (diff) | |
| download | ducc-6f880873fbc49f44df6d2d19a76ceeb4052890c0.tar.gz ducc-6f880873fbc49f44df6d2d19a76ceeb4052890c0.tar.zst ducc-6f880873fbc49f44df6d2d19a76ceeb4052890c0.zip | |
feat: support union types
| -rw-r--r-- | ast.c | 135 | ||||
| -rw-r--r-- | parse.c | 82 | ||||
| -rw-r--r-- | preprocess.c | 5 | ||||
| -rw-r--r-- | tests/082.sh | 24 |
4 files changed, 216 insertions, 30 deletions
@@ -2,17 +2,45 @@ enum TypeKind { TypeKind_unknown, TypeKind_char, - TypeKind_int, TypeKind_short, + TypeKind_int, TypeKind_long, TypeKind_void, TypeKind_ptr, TypeKind_array, TypeKind_enum, TypeKind_struct, + TypeKind_union, }; typedef enum TypeKind TypeKind; +const char* type_kind_stringify(TypeKind k) { + if (k == TypeKind_unknown) + return "<unknown>"; + else if (k == TypeKind_char) + return "char"; + else if (k == TypeKind_short) + return "short"; + else if (k == TypeKind_int) + return "int"; + else if (k == TypeKind_long) + return "long"; + else if (k == TypeKind_void) + return "void"; + else if (k == TypeKind_ptr) + return "<pointer>"; + else if (k == TypeKind_array) + return "<array>"; + else if (k == TypeKind_enum) + return "enum"; + else if (k == TypeKind_struct) + return "struct"; + else if (k == TypeKind_union) + return "union"; + else + unreachable(); +} + struct AstNode; struct Type { @@ -24,6 +52,12 @@ struct Type { }; typedef struct Type Type; +void type_dump(Type* ty) { + fprintf(stderr, "Type {\n"); + fprintf(stderr, " kind = %s\n", type_kind_stringify(ty->kind)); + fprintf(stderr, "}\n"); +} + Type* type_new(TypeKind kind) { Type* ty = calloc(1, sizeof(Type)); ty->kind = kind; @@ -54,60 +88,68 @@ Type* type_array_to_ptr(Type* ty) { } int type_is_unsized(Type* ty) { - return ty->kind != TypeKind_void; + return ty->kind == TypeKind_void; } int type_sizeof_struct(Type* ty); +int type_sizeof_union(Type* ty); int type_alignof_struct(Type* ty); +int type_alignof_union(Type* ty); int type_offsetof(Type* ty, const String* name); Type* type_member_typeof(Type* ty, const String* name); int type_sizeof(Type* ty) { - if (!type_is_unsized(ty)) { + if (type_is_unsized(ty)) { fatal_error("type_sizeof: type size cannot be determined"); } - if (ty->kind == TypeKind_ptr) { + if (ty->kind == TypeKind_ptr) return 8; - } else if (ty->kind == TypeKind_char) { + else if (ty->kind == TypeKind_char) return 1; - } else if (ty->kind == TypeKind_short) { + else if (ty->kind == TypeKind_short) return 2; - } else if (ty->kind == TypeKind_int) { + else if (ty->kind == TypeKind_int) return 4; - } else if (ty->kind == TypeKind_long) { + else if (ty->kind == TypeKind_long) return 8; - } else if (ty->kind == TypeKind_enum) { + else if (ty->kind == TypeKind_enum) return 4; - } else if (ty->kind == TypeKind_array) { + else if (ty->kind == TypeKind_array) return type_sizeof(ty->base) * ty->array_size; - } else { + else if (ty->kind == TypeKind_struct) return type_sizeof_struct(ty); - } + else if (ty->kind == TypeKind_union) + return type_sizeof_union(ty); + else + unreachable(); } int type_alignof(Type* ty) { - if (!type_is_unsized(ty)) { + if (type_is_unsized(ty)) { fatal_error("type_alignof: type size cannot be determined"); } - if (ty->kind == TypeKind_ptr) { + if (ty->kind == TypeKind_ptr) return 8; - } else if (ty->kind == TypeKind_char) { + else if (ty->kind == TypeKind_char) return 1; - } else if (ty->kind == TypeKind_short) { + else if (ty->kind == TypeKind_short) return 2; - } else if (ty->kind == TypeKind_int) { + else if (ty->kind == TypeKind_int) return 4; - } else if (ty->kind == TypeKind_long) { + else if (ty->kind == TypeKind_long) return 8; - } else if (ty->kind == TypeKind_enum) { + else if (ty->kind == TypeKind_enum) return 4; - } else if (ty->kind == TypeKind_array) { + else if (ty->kind == TypeKind_array) return type_alignof(ty->base); - } else { + else if (ty->kind == TypeKind_struct) return type_alignof_struct(ty); - } + else if (ty->kind == TypeKind_union) + return type_alignof_union(ty); + else + unreachable(); } int to_aligned(int n, int a) { @@ -149,6 +191,9 @@ enum AstNodeKind { AstNodeKind_type, AstNodeKind_typedef_decl, AstNodeKind_unary_expr, + AstNodeKind_union_decl, + AstNodeKind_union_def, + AstNodeKind_union_member, }; typedef enum AstNodeKind AstNodeKind; @@ -310,7 +355,6 @@ AstNode* ast_new_member_access_expr(AstNode* obj, const String* name) { int type_sizeof_struct(Type* ty) { int next_offset = 0; int struct_align = 0; - int padding; int i; for (i = 0; i < ty->def->node_members->node_len; ++i) { @@ -327,6 +371,27 @@ int type_sizeof_struct(Type* ty) { return to_aligned(next_offset, struct_align); } +int type_sizeof_union(Type* ty) { + int union_size = 0; + int union_align = 0; + + int i; + for (i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int size = type_sizeof(member->ty); + int align = type_alignof(member->ty); + + size = to_aligned(size, align); + if (union_size < size) { + union_size = size; + } + if (union_align < align) { + union_align = align; + } + } + return to_aligned(union_size, union_align); +} + int type_alignof_struct(Type* ty) { int struct_align = 0; @@ -342,9 +407,27 @@ int type_alignof_struct(Type* ty) { return struct_align; } +int type_alignof_union(Type* ty) { + int union_align = 0; + + int i; + for (i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int align = type_alignof(member->ty); + + if (union_align < align) { + union_align = align; + } + } + return union_align; +} + int type_offsetof(Type* ty, const String* name) { + if (ty->kind == TypeKind_union) { + return 0; + } if (ty->kind != TypeKind_struct) { - fatal_error("type_offsetof: type is not a struct"); + fatal_error("type_offsetof: type is neither a struct nor a union"); } int next_offset = 0; @@ -366,8 +449,8 @@ int type_offsetof(Type* ty, const String* name) { } Type* type_member_typeof(Type* ty, const String* name) { - if (ty->kind != TypeKind_struct) { - fatal_error("type_offsetof: type is not a struct"); + if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) { + fatal_error("type_member_typeof: type is neither a struct nor a union"); } int i; @@ -30,6 +30,8 @@ struct Parser { int n_funcs; AstNode* structs; int n_structs; + AstNode* unions; + int n_unions; AstNode* enums; int n_enums; AstNode* typedefs; @@ -45,6 +47,7 @@ Parser* parser_new(Token* tokens) { p->gvars = calloc(128, sizeof(GlobalVar)); p->funcs = calloc(256, sizeof(Func)); p->structs = calloc(64, sizeof(AstNode)); + p->unions = calloc(64, sizeof(AstNode)); p->enums = calloc(16, sizeof(AstNode)); p->typedefs = calloc(64, sizeof(AstNode)); p->str_literals = calloc(1024, sizeof(char*)); @@ -172,6 +175,16 @@ int find_struct(Parser* p, const String* name) { return -1; } +int find_union(Parser* p, const String* name) { + int i; + for (i = 0; i < p->n_unions; ++i) { + if (string_equals(&p->unions[i].name, name)) { + return i; + } + } + return -1; +} + int find_enum(Parser* p, const String* name) { int i; for (i = 0; i < p->n_enums; ++i) { @@ -361,7 +374,8 @@ int is_type_token(Parser* p, Token* token) { if (token->kind == TokenKind_keyword_int || token->kind == TokenKind_keyword_short || token->kind == TokenKind_keyword_long || token->kind == TokenKind_keyword_char || token->kind == TokenKind_keyword_void || token->kind == TokenKind_keyword_enum || - token->kind == TokenKind_keyword_struct || token->kind == TokenKind_keyword_const) { + token->kind == TokenKind_keyword_struct || token->kind == TokenKind_keyword_union || + token->kind == TokenKind_keyword_const) { return 1; } if (token->kind != TokenKind_ident) { @@ -414,6 +428,14 @@ Type* parse_type(Parser* p) { fatal_error("parse_type: unknown struct, %.*s", name->len, name->data); } ty->def = p->structs + struct_idx; + } else if (t->kind == TokenKind_keyword_union) { + ty->kind = TypeKind_union; + name = parse_ident(p); + int union_idx = find_union(p, name); + if (union_idx == -1) { + fatal_error("parse_type: unknown union, %.*s", name->len, name->data); + } + ty->def = p->unions + union_idx; } else { unreachable(); } @@ -761,7 +783,7 @@ AstNode* parse_continue_stmt(Parser* p) { AstNode* parse_var_decl(Parser* p) { Type* ty = parse_type(p); - if (!type_is_unsized(ty)) { + if (type_is_unsized(ty)) { fatal_error("parse_var_decl: invalid type for variable"); } String* name = parse_ident(p); @@ -951,7 +973,7 @@ AstNode* parse_struct_member(Parser* p) { } AstNode* parse_struct_members(Parser* p) { - AstNode* list = ast_new_list(16); + AstNode* list = ast_new_list(32); while (peek_token(p)->kind != TokenKind_brace_r) { AstNode* member = parse_struct_member(p); ast_append(list, member); @@ -990,6 +1012,56 @@ AstNode* parse_struct_decl_or_def(Parser* p) { return p->structs + struct_idx; } +AstNode* parse_union_member(Parser* p) { + Type* ty = parse_type(p); + String* name = parse_ident(p); + expect(p, TokenKind_semicolon); + AstNode* member = ast_new(AstNodeKind_union_member); + member->name = *name; + member->ty = ty; + return member; +} + +AstNode* parse_union_members(Parser* p) { + AstNode* list = ast_new_list(16); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* member = parse_union_member(p); + ast_append(list, member); + } + return list; +} + +AstNode* parse_union_decl_or_def(Parser* p) { + expect(p, TokenKind_keyword_union); + String* name = parse_ident(p); + + if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) { + p->pos = p->pos - 2; + return parse_func_decl_or_def(p); + } + + int union_idx = find_union(p, name); + if (union_idx == -1) { + union_idx = p->n_unions; + p->unions[union_idx].kind = AstNodeKind_union_def; + p->unions[union_idx].name = *name; + ++p->n_unions; + } + if (peek_token(p)->kind == TokenKind_semicolon) { + next_token(p); + return ast_new(AstNodeKind_union_decl); + } + if (p->unions[union_idx].node_members) { + fatal_error("parse_union_decl_or_def: union %.*s redefined", name->len, name->data); + } + expect(p, TokenKind_brace_l); + AstNode* members = parse_union_members(p); + expect(p, TokenKind_brace_r); + expect(p, TokenKind_semicolon); + p->unions[union_idx].node_members = members; + return p->unions + union_idx; +} + AstNode* parse_enum_member(Parser* p) { String* name = parse_ident(p); AstNode* member = ast_new(AstNodeKind_enum_member); @@ -1056,7 +1128,7 @@ AstNode* parse_typedef_decl(Parser* p) { AstNode* parse_extern_var_decl(Parser* p) { expect(p, TokenKind_keyword_extern); Type* ty = parse_type(p); - if (!type_is_unsized(ty)) { + if (type_is_unsized(ty)) { fatal_error("parse_extern_var_decl: invalid type for variable"); } String* name = parse_ident(p); @@ -1076,6 +1148,8 @@ AstNode* parse_toplevel(Parser* p) { TokenKind tk = peek_token(p)->kind; if (tk == TokenKind_keyword_struct) { return parse_struct_decl_or_def(p); + } else if (tk == TokenKind_keyword_union) { + return parse_union_decl_or_def(p); } else if (tk == TokenKind_keyword_enum) { return parse_enum_def(p); } else if (tk == TokenKind_keyword_typedef) { diff --git a/preprocess.c b/preprocess.c index 9a44394..ea900fe 100644 --- a/preprocess.c +++ b/preprocess.c @@ -42,6 +42,7 @@ enum TokenKind { TokenKind_keyword_sizeof, TokenKind_keyword_struct, TokenKind_keyword_typedef, + TokenKind_keyword_union, TokenKind_keyword_void, TokenKind_keyword_while, TokenKind_le, @@ -149,6 +150,8 @@ const char* token_kind_stringify(TokenKind k) { return "struct"; else if (k == TokenKind_keyword_typedef) return "typedef"; + else if (k == TokenKind_keyword_union) + return "union"; else if (k == TokenKind_keyword_void) return "void"; else if (k == TokenKind_keyword_while) @@ -592,6 +595,8 @@ void pp_tokenize_all(Preprocessor* pp) { tok->kind = TokenKind_keyword_struct; } else if (string_equals_cstr(&tok->raw, "typedef")) { tok->kind = TokenKind_keyword_typedef; + } else if (string_equals_cstr(&tok->raw, "union")) { + tok->kind = TokenKind_keyword_union; } else if (string_equals_cstr(&tok->raw, "void")) { tok->kind = TokenKind_keyword_void; } else if (string_equals_cstr(&tok->raw, "while")) { diff --git a/tests/082.sh b/tests/082.sh new file mode 100644 index 0000000..1d7727f --- /dev/null +++ b/tests/082.sh @@ -0,0 +1,24 @@ +set -e + +cat <<'EOF' > expected +8 +42 +42 +EOF + +bash ../../test_diff.sh <<'EOF' +int printf(); + +union U { + int i; + long l; +}; + +int main() { + union U u; + printf("%zu\n", sizeof(u)); + u.l = 42; + printf("%d\n", u.i); + printf("%ld\n", u.l); +} +EOF |
