aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-08-04 00:35:32 +0900
committernsfisis <nsfisis@gmail.com>2025-08-15 10:06:21 +0900
commit6f880873fbc49f44df6d2d19a76ceeb4052890c0 (patch)
treef96e2a2f8d4ade94ab60e2aa0d113db08090b79d
parent5630b305cbc8537dfdd92ac9e3118605e9464e83 (diff)
downloadducc-6f880873fbc49f44df6d2d19a76ceeb4052890c0.tar.gz
ducc-6f880873fbc49f44df6d2d19a76ceeb4052890c0.tar.zst
ducc-6f880873fbc49f44df6d2d19a76ceeb4052890c0.zip
feat: support union types
-rw-r--r--ast.c135
-rw-r--r--parse.c82
-rw-r--r--preprocess.c5
-rw-r--r--tests/082.sh24
4 files changed, 216 insertions, 30 deletions
diff --git a/ast.c b/ast.c
index e1c9ba6..3eafa94 100644
--- a/ast.c
+++ b/ast.c
@@ -2,17 +2,45 @@ enum TypeKind {
TypeKind_unknown,
TypeKind_char,
- TypeKind_int,
TypeKind_short,
+ TypeKind_int,
TypeKind_long,
TypeKind_void,
TypeKind_ptr,
TypeKind_array,
TypeKind_enum,
TypeKind_struct,
+ TypeKind_union,
};
typedef enum TypeKind TypeKind;
+const char* type_kind_stringify(TypeKind k) {
+ if (k == TypeKind_unknown)
+ return "<unknown>";
+ else if (k == TypeKind_char)
+ return "char";
+ else if (k == TypeKind_short)
+ return "short";
+ else if (k == TypeKind_int)
+ return "int";
+ else if (k == TypeKind_long)
+ return "long";
+ else if (k == TypeKind_void)
+ return "void";
+ else if (k == TypeKind_ptr)
+ return "<pointer>";
+ else if (k == TypeKind_array)
+ return "<array>";
+ else if (k == TypeKind_enum)
+ return "enum";
+ else if (k == TypeKind_struct)
+ return "struct";
+ else if (k == TypeKind_union)
+ return "union";
+ else
+ unreachable();
+}
+
struct AstNode;
struct Type {
@@ -24,6 +52,12 @@ struct Type {
};
typedef struct Type Type;
+void type_dump(Type* ty) {
+ fprintf(stderr, "Type {\n");
+ fprintf(stderr, " kind = %s\n", type_kind_stringify(ty->kind));
+ fprintf(stderr, "}\n");
+}
+
Type* type_new(TypeKind kind) {
Type* ty = calloc(1, sizeof(Type));
ty->kind = kind;
@@ -54,60 +88,68 @@ Type* type_array_to_ptr(Type* ty) {
}
int type_is_unsized(Type* ty) {
- return ty->kind != TypeKind_void;
+ return ty->kind == TypeKind_void;
}
int type_sizeof_struct(Type* ty);
+int type_sizeof_union(Type* ty);
int type_alignof_struct(Type* ty);
+int type_alignof_union(Type* ty);
int type_offsetof(Type* ty, const String* name);
Type* type_member_typeof(Type* ty, const String* name);
int type_sizeof(Type* ty) {
- if (!type_is_unsized(ty)) {
+ if (type_is_unsized(ty)) {
fatal_error("type_sizeof: type size cannot be determined");
}
- if (ty->kind == TypeKind_ptr) {
+ if (ty->kind == TypeKind_ptr)
return 8;
- } else if (ty->kind == TypeKind_char) {
+ else if (ty->kind == TypeKind_char)
return 1;
- } else if (ty->kind == TypeKind_short) {
+ else if (ty->kind == TypeKind_short)
return 2;
- } else if (ty->kind == TypeKind_int) {
+ else if (ty->kind == TypeKind_int)
return 4;
- } else if (ty->kind == TypeKind_long) {
+ else if (ty->kind == TypeKind_long)
return 8;
- } else if (ty->kind == TypeKind_enum) {
+ else if (ty->kind == TypeKind_enum)
return 4;
- } else if (ty->kind == TypeKind_array) {
+ else if (ty->kind == TypeKind_array)
return type_sizeof(ty->base) * ty->array_size;
- } else {
+ else if (ty->kind == TypeKind_struct)
return type_sizeof_struct(ty);
- }
+ else if (ty->kind == TypeKind_union)
+ return type_sizeof_union(ty);
+ else
+ unreachable();
}
int type_alignof(Type* ty) {
- if (!type_is_unsized(ty)) {
+ if (type_is_unsized(ty)) {
fatal_error("type_alignof: type size cannot be determined");
}
- if (ty->kind == TypeKind_ptr) {
+ if (ty->kind == TypeKind_ptr)
return 8;
- } else if (ty->kind == TypeKind_char) {
+ else if (ty->kind == TypeKind_char)
return 1;
- } else if (ty->kind == TypeKind_short) {
+ else if (ty->kind == TypeKind_short)
return 2;
- } else if (ty->kind == TypeKind_int) {
+ else if (ty->kind == TypeKind_int)
return 4;
- } else if (ty->kind == TypeKind_long) {
+ else if (ty->kind == TypeKind_long)
return 8;
- } else if (ty->kind == TypeKind_enum) {
+ else if (ty->kind == TypeKind_enum)
return 4;
- } else if (ty->kind == TypeKind_array) {
+ else if (ty->kind == TypeKind_array)
return type_alignof(ty->base);
- } else {
+ else if (ty->kind == TypeKind_struct)
return type_alignof_struct(ty);
- }
+ else if (ty->kind == TypeKind_union)
+ return type_alignof_union(ty);
+ else
+ unreachable();
}
int to_aligned(int n, int a) {
@@ -149,6 +191,9 @@ enum AstNodeKind {
AstNodeKind_type,
AstNodeKind_typedef_decl,
AstNodeKind_unary_expr,
+ AstNodeKind_union_decl,
+ AstNodeKind_union_def,
+ AstNodeKind_union_member,
};
typedef enum AstNodeKind AstNodeKind;
@@ -310,7 +355,6 @@ AstNode* ast_new_member_access_expr(AstNode* obj, const String* name) {
int type_sizeof_struct(Type* ty) {
int next_offset = 0;
int struct_align = 0;
- int padding;
int i;
for (i = 0; i < ty->def->node_members->node_len; ++i) {
@@ -327,6 +371,27 @@ int type_sizeof_struct(Type* ty) {
return to_aligned(next_offset, struct_align);
}
+int type_sizeof_union(Type* ty) {
+ int union_size = 0;
+ int union_align = 0;
+
+ int i;
+ for (i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ size = to_aligned(size, align);
+ if (union_size < size) {
+ union_size = size;
+ }
+ if (union_align < align) {
+ union_align = align;
+ }
+ }
+ return to_aligned(union_size, union_align);
+}
+
int type_alignof_struct(Type* ty) {
int struct_align = 0;
@@ -342,9 +407,27 @@ int type_alignof_struct(Type* ty) {
return struct_align;
}
+int type_alignof_union(Type* ty) {
+ int union_align = 0;
+
+ int i;
+ for (i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int align = type_alignof(member->ty);
+
+ if (union_align < align) {
+ union_align = align;
+ }
+ }
+ return union_align;
+}
+
int type_offsetof(Type* ty, const String* name) {
+ if (ty->kind == TypeKind_union) {
+ return 0;
+ }
if (ty->kind != TypeKind_struct) {
- fatal_error("type_offsetof: type is not a struct");
+ fatal_error("type_offsetof: type is neither a struct nor a union");
}
int next_offset = 0;
@@ -366,8 +449,8 @@ int type_offsetof(Type* ty, const String* name) {
}
Type* type_member_typeof(Type* ty, const String* name) {
- if (ty->kind != TypeKind_struct) {
- fatal_error("type_offsetof: type is not a struct");
+ if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) {
+ fatal_error("type_member_typeof: type is neither a struct nor a union");
}
int i;
diff --git a/parse.c b/parse.c
index 1695c8f..2f856bd 100644
--- a/parse.c
+++ b/parse.c
@@ -30,6 +30,8 @@ struct Parser {
int n_funcs;
AstNode* structs;
int n_structs;
+ AstNode* unions;
+ int n_unions;
AstNode* enums;
int n_enums;
AstNode* typedefs;
@@ -45,6 +47,7 @@ Parser* parser_new(Token* tokens) {
p->gvars = calloc(128, sizeof(GlobalVar));
p->funcs = calloc(256, sizeof(Func));
p->structs = calloc(64, sizeof(AstNode));
+ p->unions = calloc(64, sizeof(AstNode));
p->enums = calloc(16, sizeof(AstNode));
p->typedefs = calloc(64, sizeof(AstNode));
p->str_literals = calloc(1024, sizeof(char*));
@@ -172,6 +175,16 @@ int find_struct(Parser* p, const String* name) {
return -1;
}
+int find_union(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_unions; ++i) {
+ if (string_equals(&p->unions[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
int find_enum(Parser* p, const String* name) {
int i;
for (i = 0; i < p->n_enums; ++i) {
@@ -361,7 +374,8 @@ int is_type_token(Parser* p, Token* token) {
if (token->kind == TokenKind_keyword_int || token->kind == TokenKind_keyword_short ||
token->kind == TokenKind_keyword_long || token->kind == TokenKind_keyword_char ||
token->kind == TokenKind_keyword_void || token->kind == TokenKind_keyword_enum ||
- token->kind == TokenKind_keyword_struct || token->kind == TokenKind_keyword_const) {
+ token->kind == TokenKind_keyword_struct || token->kind == TokenKind_keyword_union ||
+ token->kind == TokenKind_keyword_const) {
return 1;
}
if (token->kind != TokenKind_ident) {
@@ -414,6 +428,14 @@ Type* parse_type(Parser* p) {
fatal_error("parse_type: unknown struct, %.*s", name->len, name->data);
}
ty->def = p->structs + struct_idx;
+ } else if (t->kind == TokenKind_keyword_union) {
+ ty->kind = TypeKind_union;
+ name = parse_ident(p);
+ int union_idx = find_union(p, name);
+ if (union_idx == -1) {
+ fatal_error("parse_type: unknown union, %.*s", name->len, name->data);
+ }
+ ty->def = p->unions + union_idx;
} else {
unreachable();
}
@@ -761,7 +783,7 @@ AstNode* parse_continue_stmt(Parser* p) {
AstNode* parse_var_decl(Parser* p) {
Type* ty = parse_type(p);
- if (!type_is_unsized(ty)) {
+ if (type_is_unsized(ty)) {
fatal_error("parse_var_decl: invalid type for variable");
}
String* name = parse_ident(p);
@@ -951,7 +973,7 @@ AstNode* parse_struct_member(Parser* p) {
}
AstNode* parse_struct_members(Parser* p) {
- AstNode* list = ast_new_list(16);
+ AstNode* list = ast_new_list(32);
while (peek_token(p)->kind != TokenKind_brace_r) {
AstNode* member = parse_struct_member(p);
ast_append(list, member);
@@ -990,6 +1012,56 @@ AstNode* parse_struct_decl_or_def(Parser* p) {
return p->structs + struct_idx;
}
+AstNode* parse_union_member(Parser* p) {
+ Type* ty = parse_type(p);
+ String* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* member = ast_new(AstNodeKind_union_member);
+ member->name = *name;
+ member->ty = ty;
+ return member;
+}
+
+AstNode* parse_union_members(Parser* p) {
+ AstNode* list = ast_new_list(16);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_union_member(p);
+ ast_append(list, member);
+ }
+ return list;
+}
+
+AstNode* parse_union_decl_or_def(Parser* p) {
+ expect(p, TokenKind_keyword_union);
+ String* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) {
+ p->pos = p->pos - 2;
+ return parse_func_decl_or_def(p);
+ }
+
+ int union_idx = find_union(p, name);
+ if (union_idx == -1) {
+ union_idx = p->n_unions;
+ p->unions[union_idx].kind = AstNodeKind_union_def;
+ p->unions[union_idx].name = *name;
+ ++p->n_unions;
+ }
+ if (peek_token(p)->kind == TokenKind_semicolon) {
+ next_token(p);
+ return ast_new(AstNodeKind_union_decl);
+ }
+ if (p->unions[union_idx].node_members) {
+ fatal_error("parse_union_decl_or_def: union %.*s redefined", name->len, name->data);
+ }
+ expect(p, TokenKind_brace_l);
+ AstNode* members = parse_union_members(p);
+ expect(p, TokenKind_brace_r);
+ expect(p, TokenKind_semicolon);
+ p->unions[union_idx].node_members = members;
+ return p->unions + union_idx;
+}
+
AstNode* parse_enum_member(Parser* p) {
String* name = parse_ident(p);
AstNode* member = ast_new(AstNodeKind_enum_member);
@@ -1056,7 +1128,7 @@ AstNode* parse_typedef_decl(Parser* p) {
AstNode* parse_extern_var_decl(Parser* p) {
expect(p, TokenKind_keyword_extern);
Type* ty = parse_type(p);
- if (!type_is_unsized(ty)) {
+ if (type_is_unsized(ty)) {
fatal_error("parse_extern_var_decl: invalid type for variable");
}
String* name = parse_ident(p);
@@ -1076,6 +1148,8 @@ AstNode* parse_toplevel(Parser* p) {
TokenKind tk = peek_token(p)->kind;
if (tk == TokenKind_keyword_struct) {
return parse_struct_decl_or_def(p);
+ } else if (tk == TokenKind_keyword_union) {
+ return parse_union_decl_or_def(p);
} else if (tk == TokenKind_keyword_enum) {
return parse_enum_def(p);
} else if (tk == TokenKind_keyword_typedef) {
diff --git a/preprocess.c b/preprocess.c
index 9a44394..ea900fe 100644
--- a/preprocess.c
+++ b/preprocess.c
@@ -42,6 +42,7 @@ enum TokenKind {
TokenKind_keyword_sizeof,
TokenKind_keyword_struct,
TokenKind_keyword_typedef,
+ TokenKind_keyword_union,
TokenKind_keyword_void,
TokenKind_keyword_while,
TokenKind_le,
@@ -149,6 +150,8 @@ const char* token_kind_stringify(TokenKind k) {
return "struct";
else if (k == TokenKind_keyword_typedef)
return "typedef";
+ else if (k == TokenKind_keyword_union)
+ return "union";
else if (k == TokenKind_keyword_void)
return "void";
else if (k == TokenKind_keyword_while)
@@ -592,6 +595,8 @@ void pp_tokenize_all(Preprocessor* pp) {
tok->kind = TokenKind_keyword_struct;
} else if (string_equals_cstr(&tok->raw, "typedef")) {
tok->kind = TokenKind_keyword_typedef;
+ } else if (string_equals_cstr(&tok->raw, "union")) {
+ tok->kind = TokenKind_keyword_union;
} else if (string_equals_cstr(&tok->raw, "void")) {
tok->kind = TokenKind_keyword_void;
} else if (string_equals_cstr(&tok->raw, "while")) {
diff --git a/tests/082.sh b/tests/082.sh
new file mode 100644
index 0000000..1d7727f
--- /dev/null
+++ b/tests/082.sh
@@ -0,0 +1,24 @@
+set -e
+
+cat <<'EOF' > expected
+8
+42
+42
+EOF
+
+bash ../../test_diff.sh <<'EOF'
+int printf();
+
+union U {
+ int i;
+ long l;
+};
+
+int main() {
+ union U u;
+ printf("%zu\n", sizeof(u));
+ u.l = 42;
+ printf("%d\n", u.i);
+ printf("%ld\n", u.l);
+}
+EOF