diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-05-03 17:29:12 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-05-03 18:42:58 +0900 |
| commit | 3654ce578e6fff53950874adf7e0e4ae0a6eb956 (patch) | |
| tree | 5b6c04273de38dba70b7c25e55da144f5f7c37da /src/cc1 | |
| parent | 1b406b13b03055d2b2d08e8279a4a80c41ca7c20 (diff) | |
| download | ducc-3654ce578e6fff53950874adf7e0e4ae0a6eb956.tar.gz ducc-3654ce578e6fff53950874adf7e0e4ae0a6eb956.tar.zst ducc-3654ce578e6fff53950874adf7e0e4ae0a6eb956.zip | |
Diffstat (limited to 'src/cc1')
| -rw-r--r-- | src/cc1/ast.c | 787 | ||||
| -rw-r--r-- | src/cc1/ast.h | 433 | ||||
| -rw-r--r-- | src/cc1/codegen.c | 925 | ||||
| -rw-r--r-- | src/cc1/codegen.h | 8 | ||||
| -rw-r--r-- | src/cc1/codegen_wasm.c | 164 | ||||
| -rw-r--r-- | src/cc1/codegen_wasm.h | 8 | ||||
| -rw-r--r-- | src/cc1/fs.c | 27 | ||||
| -rw-r--r-- | src/cc1/fs.h | 7 | ||||
| -rw-r--r-- | src/cc1/io.c | 127 | ||||
| -rw-r--r-- | src/cc1/io.h | 26 | ||||
| -rw-r--r-- | src/cc1/parse.c | 3018 | ||||
| -rw-r--r-- | src/cc1/parse.h | 43 | ||||
| -rw-r--r-- | src/cc1/preprocess.c | 1559 | ||||
| -rw-r--r-- | src/cc1/preprocess.h | 13 | ||||
| -rw-r--r-- | src/cc1/sys.c | 21 | ||||
| -rw-r--r-- | src/cc1/sys.h | 7 | ||||
| -rw-r--r-- | src/cc1/token.c | 387 | ||||
| -rw-r--r-- | src/cc1/token.h | 183 | ||||
| -rw-r--r-- | src/cc1/tokenize.c | 597 | ||||
| -rw-r--r-- | src/cc1/tokenize.h | 10 |
20 files changed, 8350 insertions, 0 deletions
diff --git a/src/cc1/ast.c b/src/cc1/ast.c new file mode 100644 index 0000000..efd16ae --- /dev/null +++ b/src/cc1/ast.c @@ -0,0 +1,787 @@ +#include "ast.h" +#include "../lib/common.h" +#include "preprocess.h" + +const char* storageclass_stringify(StorageClass s) { + if (s == StorageClass_unspecified) + return ""; + if (s == StorageClass_auto) + return "auto"; + if (s == StorageClass_constexpr) + return "constexpr"; + if (s == StorageClass_extern) + return "extern"; + if (s == StorageClass_register) + return "register"; + if (s == StorageClass_static) + return "static"; + if (s == StorageClass_thread_local) + return "thread_local"; + if (s == StorageClass_typedef) + return "typedef"; + else + unreachable(); +} + +const char* type_kind_stringify(TypeKind k) { + if (k == TypeKind_unknown) + return "<unknown>"; + else if (k == TypeKind_void) + return "void"; + else if (k == TypeKind_char) + return "char"; + else if (k == TypeKind_schar) + return "signed char"; + else if (k == TypeKind_uchar) + return "unsigned char"; + else if (k == TypeKind_short) + return "short"; + else if (k == TypeKind_ushort) + return "unsigned short"; + else if (k == TypeKind_int) + return "int"; + else if (k == TypeKind_uint) + return "unsigned int"; + else if (k == TypeKind_long) + return "long"; + else if (k == TypeKind_ulong) + return "unsigned long"; + else if (k == TypeKind_llong) + return "long long"; + else if (k == TypeKind_ullong) + return "unsigned long long"; + else if (k == TypeKind_float) + return "float"; + else if (k == TypeKind_double) + return "double"; + else if (k == TypeKind_ldouble) + return "long double"; + else if (k == TypeKind_bool) + return "bool"; + else if (k == TypeKind_struct) + return "struct"; + else if (k == TypeKind_union) + return "union"; + else if (k == TypeKind_enum) + return "enum"; + else if (k == TypeKind_ptr) + return "<pointer>"; + else if (k == TypeKind_array) + return "<array>"; + else if (k == TypeKind_func) + return "<function>"; + else + unreachable(); +} + +static AstNode* members_of(Type* ty) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + if (def->kind == AstNodeKind_struct_def) + return def->as.struct_def->members; + else + return def->as.union_def->members; +} + +Type* type_new(TypeKind kind) { + Type* ty = calloc(1, sizeof(Type)); + ty->kind = kind; + return ty; +} + +Type* type_dup(Type* src) { + Type* ty = malloc(sizeof(Type)); + memcpy(ty, src, sizeof(Type)); + return ty; +} + +Type* type_new_ptr(Type* base) { + Type* ty = type_new(TypeKind_ptr); + ty->base = base; + return ty; +} + +Type* type_new_array(Type* base, int size) { + Type* ty = type_new(TypeKind_array); + ty->base = base; + ty->array_size = size; + return ty; +} + +Type* type_new_static_string(int len) { + return type_new_array(type_new(TypeKind_char), len + 1); +} + +Type* type_array_to_ptr(Type* ty) { + return type_new_ptr(ty->base); +} + +Type* type_new_func(Type* result, AstNode* params) { + Type* ty = type_new(TypeKind_func); + ty->result = result; + ty->params = params; + return ty; +} + +bool type_is_unsized(Type* ty) { + return ty->kind == TypeKind_void; +} + +bool type_is_unsigned(Type* ty) { + return ty->kind == TypeKind_uchar || ty->kind == TypeKind_ushort || ty->kind == TypeKind_uint || + ty->kind == TypeKind_ulong || ty->kind == TypeKind_ullong || ty->kind == TypeKind_bool; +} + +int type_sizeof(Type* ty) { + if (type_is_unsized(ty)) { + fatal_error("type_sizeof: type size cannot be determined"); + } + + if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar || + ty->kind == TypeKind_bool) + return 1; + else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort) + return 2; + else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float) + return 4; + else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong || + ty->kind == TypeKind_ullong || ty->kind == TypeKind_double) + return 8; + else if (ty->kind == TypeKind_struct) + return type_sizeof_struct(ty); + else if (ty->kind == TypeKind_union) + return type_sizeof_union(ty); + else if (ty->kind == TypeKind_enum) + return 4; + else if (ty->kind == TypeKind_ptr) + return 8; + else if (ty->kind == TypeKind_array) + return type_sizeof(ty->base) * ty->array_size; + else if (ty->kind == TypeKind_func) + return 8; + else + unreachable(); +} + +int type_alignof(Type* ty) { + if (type_is_unsized(ty)) { + fatal_error("type_alignof: type size cannot be determined"); + } + + if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar || + ty->kind == TypeKind_bool) + return 1; + else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort) + return 2; + else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float) + return 4; + else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong || + ty->kind == TypeKind_ullong || ty->kind == TypeKind_double) + return 8; + else if (ty->kind == TypeKind_struct) + return type_alignof_struct(ty); + else if (ty->kind == TypeKind_union) + return type_alignof_union(ty); + else if (ty->kind == TypeKind_enum) + return 4; + else if (ty->kind == TypeKind_ptr) + return 8; + else if (ty->kind == TypeKind_array) + return type_alignof(ty->base); + else if (ty->kind == TypeKind_func) + return 8; + else + unreachable(); +} + +int to_aligned(int n, int a) { + return (n + a - 1) / a * a; +} + +const char* astnode_kind_stringify(AstNodeKind k) { + switch (k) { + case AstNodeKind_unknown: + return "unknown"; + case AstNodeKind_nop: + return "nop"; + case AstNodeKind_array_initializer: + return "array_initializer"; + case AstNodeKind_assign_expr: + return "assign_expr"; + case AstNodeKind_binary_expr: + return "binary_expr"; + case AstNodeKind_break_stmt: + return "break_stmt"; + case AstNodeKind_case_label: + return "case_label"; + case AstNodeKind_cast_expr: + return "cast_expr"; + case AstNodeKind_cond_expr: + return "cond_expr"; + case AstNodeKind_continue_stmt: + return "continue_stmt"; + case AstNodeKind_default_label: + return "default_label"; + case AstNodeKind_deref_expr: + return "deref_expr"; + case AstNodeKind_do_while_stmt: + return "do_while_stmt"; + case AstNodeKind_enum_def: + return "enum_def"; + case AstNodeKind_enum_member: + return "enum_member"; + case AstNodeKind_expr_stmt: + return "expr_stmt"; + case AstNodeKind_for_stmt: + return "for_stmt"; + case AstNodeKind_func: + return "func"; + case AstNodeKind_func_call: + return "func_call"; + case AstNodeKind_func_decl: + return "func_decl"; + case AstNodeKind_func_def: + return "func_def"; + case AstNodeKind_goto_stmt: + return "goto_stmt"; + case AstNodeKind_gvar: + return "gvar"; + case AstNodeKind_gvar_decl: + return "gvar_decl"; + case AstNodeKind_if_stmt: + return "if_stmt"; + case AstNodeKind_int_expr: + return "int_expr"; + case AstNodeKind_double_expr: + return "double_expr"; + case AstNodeKind_label_stmt: + return "label_stmt"; + case AstNodeKind_list: + return "list"; + case AstNodeKind_logical_expr: + return "logical_expr"; + case AstNodeKind_lvar: + return "lvar"; + case AstNodeKind_lvar_decl: + return "lvar_decl"; + case AstNodeKind_param: + return "param"; + case AstNodeKind_ref_expr: + return "ref_expr"; + case AstNodeKind_return_stmt: + return "return_stmt"; + case AstNodeKind_str_expr: + return "str_expr"; + case AstNodeKind_struct_decl: + return "struct_decl"; + case AstNodeKind_struct_def: + return "struct_def"; + case AstNodeKind_struct_member: + return "struct_member"; + case AstNodeKind_switch_stmt: + return "switch_stmt"; + case AstNodeKind_type: + return "type"; + case AstNodeKind_typedef_decl: + return "typedef_decl"; + case AstNodeKind_unary_expr: + return "unary_expr"; + case AstNodeKind_union_decl: + return "union_decl"; + case AstNodeKind_union_def: + return "union_def"; + case AstNodeKind_declarator: + return "declarator"; + default: + unreachable(); + } +} + +AstNode* ast_new(AstNodeKind kind) { + AstNode* ast = calloc(1, sizeof(AstNode)); + ast->kind = kind; + return ast; +} + +AstNode* ast_new_list(int capacity) { + if (capacity == 0) + unreachable(); + AstNode* list = ast_new(AstNodeKind_list); + list->as.list = calloc(1, sizeof(ListNode)); + list->as.list->cap = capacity; + list->as.list->len = 0; + list->as.list->items = calloc(list->as.list->cap, sizeof(AstNode)); + return list; +} + +void ast_append(AstNode* list, AstNode* item) { + if (list->kind != AstNodeKind_list) { + fatal_error("ast_append: ast is not a list"); + } + if (!item) { + return; + } + if (list->as.list->cap <= list->as.list->len) { + list->as.list->cap *= 2; + list->as.list->items = realloc(list->as.list->items, sizeof(AstNode) * list->as.list->cap); + memset(list->as.list->items + list->as.list->len, 0, + sizeof(AstNode) * (list->as.list->cap - list->as.list->len)); + } + memcpy(list->as.list->items + list->as.list->len, item, sizeof(AstNode)); + ++list->as.list->len; +} + +AstNode* ast_new_int(int v) { + AstNode* e = ast_new(AstNodeKind_int_expr); + e->as.int_expr = calloc(1, sizeof(IntExprNode)); + e->as.int_expr->value = v; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_double(double v) { + AstNode* e = ast_new(AstNodeKind_double_expr); + e->as.double_expr = calloc(1, sizeof(DoubleExprNode)); + e->as.double_expr->value = v; + e->ty = type_new(TypeKind_double); + return e; +} + +AstNode* ast_new_unary_expr(int op, AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_unary_expr); + e->as.unary_expr = calloc(1, sizeof(UnaryExprNode)); + e->as.unary_expr->op = op; + e->as.unary_expr->operand = operand; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_binary_expr); + e->as.binary_expr = calloc(1, sizeof(BinaryExprNode)); + e->as.binary_expr->op = op; + e->as.binary_expr->lhs = lhs; + e->as.binary_expr->rhs = rhs; + if (op == TokenKind_plus) { + if (lhs->ty->kind == TypeKind_ptr) { + e->ty = lhs->ty; + } else if (lhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(lhs->ty); + } else if (rhs->ty->kind == TypeKind_ptr) { + e->ty = rhs->ty; + } else if (rhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(rhs->ty); + } else { + e->ty = type_new(TypeKind_int); + } + } else if (op == TokenKind_minus) { + if (lhs->ty->kind == TypeKind_ptr) { + e->ty = lhs->ty; + } else if (lhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(lhs->ty); + } else { + e->ty = type_new(TypeKind_int); + } + } else { + e->ty = type_new(TypeKind_int); + } + return e; +} + +AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_assign_expr); + e->as.assign_expr = calloc(1, sizeof(AssignExprNode)); + e->as.assign_expr->op = op; + e->as.assign_expr->lhs = lhs; + e->as.assign_expr->rhs = rhs; + e->ty = lhs->ty; + return e; +} + +AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) { + if (lhs->ty->base) { + rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); + } else if (rhs->ty->base) { + lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))); + } + return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs); +} + +AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) { + if (lhs->ty->base) { + rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); + } + return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs); +} + +AstNode* ast_new_ref_expr(AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_ref_expr); + e->as.ref_expr = calloc(1, sizeof(RefExprNode)); + e->as.ref_expr->operand = operand; + e->ty = type_new_ptr(operand->ty); + return e; +} + +AstNode* ast_new_deref_expr(AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_deref_expr); + e->as.deref_expr = calloc(1, sizeof(DerefExprNode)); + e->as.deref_expr->operand = operand; + e->ty = operand->ty->base; + return e; +} + +AstNode* ast_new_member_access_expr(AstNode* obj, const char* name) { + AstNode* e = ast_new(AstNodeKind_deref_expr); + e->as.deref_expr = calloc(1, sizeof(DerefExprNode)); + e->as.deref_expr->operand = + ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->base, name))); + e->ty = type_member_typeof(obj->ty->base, name); + e->as.deref_expr->operand->ty = type_new_ptr(e->ty); + return e; +} + +AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty) { + AstNode* e = ast_new(AstNodeKind_cast_expr); + e->as.cast_expr = calloc(1, sizeof(CastExprNode)); + e->as.cast_expr->operand = operand; + e->ty = result_ty; + return e; +} + +AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_logical_expr); + e->as.logical_expr = calloc(1, sizeof(LogicalExprNode)); + e->as.logical_expr->op = op; + e->as.logical_expr->lhs = lhs; + e->as.logical_expr->rhs = rhs; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_) { + AstNode* e = ast_new(AstNodeKind_cond_expr); + e->as.cond_expr = calloc(1, sizeof(CondExprNode)); + e->as.cond_expr->cond = cond; + e->as.cond_expr->then = then; + e->as.cond_expr->else_ = else_; + e->ty = then->ty; + return e; +} + +AstNode* ast_new_str_expr(int idx, Type* ty) { + AstNode* e = ast_new(AstNodeKind_str_expr); + e->as.str_expr = calloc(1, sizeof(StrExprNode)); + e->as.str_expr->idx = idx; + e->ty = ty; + return e; +} + +AstNode* ast_new_func_call(AstNode* func, AstNode* args) { + AstNode* e = ast_new(AstNodeKind_func_call); + e->as.func_call = calloc(1, sizeof(FuncCallNode)); + e->as.func_call->func = func; + e->as.func_call->args = args; + return e; +} + +AstNode* ast_new_func(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_func); + e->as.func = calloc(1, sizeof(FuncNode)); + e->as.func->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_gvar(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_gvar); + e->as.gvar = calloc(1, sizeof(GvarNode)); + e->as.gvar->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty) { + AstNode* e = ast_new(AstNodeKind_lvar); + e->as.lvar = calloc(1, sizeof(LvarNode)); + e->as.lvar->name = name; + e->as.lvar->stack_offset = stack_offset; + e->ty = ty; + return e; +} + +AstNode* ast_new_nop(void) { + return ast_new(AstNodeKind_nop); +} + +AstNode* ast_new_break_stmt(void) { + return ast_new(AstNodeKind_break_stmt); +} + +AstNode* ast_new_continue_stmt(void) { + return ast_new(AstNodeKind_continue_stmt); +} + +AstNode* ast_new_return_stmt(AstNode* expr) { + AstNode* e = ast_new(AstNodeKind_return_stmt); + e->as.return_stmt = calloc(1, sizeof(ReturnStmtNode)); + e->as.return_stmt->expr = expr; + return e; +} + +AstNode* ast_new_expr_stmt(AstNode* expr) { + AstNode* e = ast_new(AstNodeKind_expr_stmt); + e->as.expr_stmt = calloc(1, sizeof(ExprStmtNode)); + e->as.expr_stmt->expr = expr; + return e; +} + +AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_) { + AstNode* e = ast_new(AstNodeKind_if_stmt); + e->as.if_stmt = calloc(1, sizeof(IfStmtNode)); + e->as.if_stmt->cond = cond; + e->as.if_stmt->then = then; + e->as.if_stmt->else_ = else_; + return e; +} + +AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_for_stmt); + e->as.for_stmt = calloc(1, sizeof(ForStmtNode)); + e->as.for_stmt->init = init; + e->as.for_stmt->cond = cond; + e->as.for_stmt->update = update; + e->as.for_stmt->body = body; + return e; +} + +AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_do_while_stmt); + e->as.do_while_stmt = calloc(1, sizeof(DoWhileStmtNode)); + e->as.do_while_stmt->cond = cond; + e->as.do_while_stmt->body = body; + return e; +} + +AstNode* ast_new_switch_stmt(AstNode* expr) { + AstNode* e = ast_new(AstNodeKind_switch_stmt); + e->as.switch_stmt = calloc(1, sizeof(SwitchStmtNode)); + e->as.switch_stmt->expr = expr; + return e; +} + +AstNode* ast_new_case_label(int value, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_case_label); + e->as.case_label = calloc(1, sizeof(CaseLabelNode)); + e->as.case_label->value = value; + e->as.case_label->body = body; + return e; +} + +AstNode* ast_new_default_label(AstNode* body) { + AstNode* e = ast_new(AstNodeKind_default_label); + e->as.default_label = calloc(1, sizeof(DefaultLabelNode)); + e->as.default_label->body = body; + return e; +} + +AstNode* ast_new_goto_stmt(const char* label) { + AstNode* e = ast_new(AstNodeKind_goto_stmt); + e->as.goto_stmt = calloc(1, sizeof(GotoStmtNode)); + e->as.goto_stmt->label = label; + return e; +} + +AstNode* ast_new_label_stmt(const char* name, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_label_stmt); + e->as.label_stmt = calloc(1, sizeof(LabelStmtNode)); + e->as.label_stmt->name = name; + e->as.label_stmt->body = body; + return e; +} + +AstNode* ast_new_declarator(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_declarator); + e->as.declarator = calloc(1, sizeof(DeclaratorNode)); + e->as.declarator->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size) { + AstNode* e = ast_new(AstNodeKind_func_def); + e->as.func_def = calloc(1, sizeof(FuncDefNode)); + e->as.func_def->name = name; + e->as.func_def->params = params; + e->as.func_def->body = body; + e->as.func_def->stack_size = stack_size; + e->ty = ty; + return e; +} + +AstNode* ast_new_enum_member(const char* name, int value) { + AstNode* e = ast_new(AstNodeKind_enum_member); + e->as.enum_member = calloc(1, sizeof(EnumMemberNode)); + e->as.enum_member->name = name; + e->as.enum_member->value = value; + return e; +} + +AstNode* ast_new_typedef_decl(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_typedef_decl); + e->as.typedef_decl = calloc(1, sizeof(TypedefDeclNode)); + e->as.typedef_decl->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_struct_def(const char* name) { + AstNode* e = ast_new(AstNodeKind_struct_def); + e->as.struct_def = calloc(1, sizeof(StructDefNode)); + e->as.struct_def->name = name; + return e; +} + +AstNode* ast_new_union_def(const char* name) { + AstNode* e = ast_new(AstNodeKind_union_def); + e->as.union_def = calloc(1, sizeof(UnionDefNode)); + e->as.union_def->name = name; + return e; +} + +AstNode* ast_new_enum_def(const char* name) { + AstNode* e = ast_new(AstNodeKind_enum_def); + e->as.enum_def = calloc(1, sizeof(EnumDefNode)); + e->as.enum_def->name = name; + return e; +} + +AstNode* ast_new_array_initializer(AstNode* list) { + AstNode* e = ast_new(AstNodeKind_array_initializer); + e->as.array_initializer = calloc(1, sizeof(ArrayInitializerNode)); + e->as.array_initializer->list = list; + return e; +} + +int type_sizeof_struct(Type* ty) { + int next_offset = 0; + int struct_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int size, align; + if (member->as.struct_member->is_bitfield) { + // TODO + size = member->as.struct_member->bitfield_width / 8; + align = 1; + } else { + size = type_sizeof(member->ty); + align = type_alignof(member->ty); + } + + next_offset = to_aligned(next_offset, align); + next_offset += size; + if (struct_align < align) { + struct_align = align; + } + } + return to_aligned(next_offset, struct_align); +} + +int type_sizeof_union(Type* ty) { + int union_size = 0; + int union_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int size = type_sizeof(member->ty); + int align = type_alignof(member->ty); + + size = to_aligned(size, align); + if (union_size < size) { + union_size = size; + } + if (union_align < align) { + union_align = align; + } + } + return to_aligned(union_size, union_align); +} + +int type_alignof_struct(Type* ty) { + int struct_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int align = type_alignof(member->ty); + + if (struct_align < align) { + struct_align = align; + } + } + return struct_align; +} + +int type_alignof_union(Type* ty) { + int union_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int align = type_alignof(member->ty); + + if (union_align < align) { + union_align = align; + } + } + return union_align; +} + +int type_offsetof(Type* ty, const char* name) { + if (ty->kind == TypeKind_union) { + return 0; + } + if (ty->kind != TypeKind_struct) { + fatal_error("type_offsetof: type is neither a struct nor a union"); + } + + int next_offset = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int size, align; + if (member->as.struct_member->is_bitfield) { + // TODO + size = member->as.struct_member->bitfield_width / 8; + align = 1; + } else { + size = type_sizeof(member->ty); + align = type_alignof(member->ty); + } + + next_offset = to_aligned(next_offset, align); + if (strcmp(member->as.struct_member->name, name) == 0) { + if (member->as.struct_member->is_bitfield) { + // C23: 7.21.4 + // if the specified member is a bit-field, the behavior is undefined. + fatal_error("type_offsetof: the result of offsetof(bit-field) is undefined"); + } + return next_offset; + } + next_offset += size; + } + + fatal_error("type_offsetof: member not found"); +} + +Type* type_member_typeof(Type* ty, const char* name) { + if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) { + fatal_error("type_member_typeof: type is neither a struct nor a union"); + } + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + if (strcmp(member->as.struct_member->name, name) == 0) { + return member->ty; + } + } + + fatal_error("type_offsetof: member not found"); +} diff --git a/src/cc1/ast.h b/src/cc1/ast.h new file mode 100644 index 0000000..ef984a9 --- /dev/null +++ b/src/cc1/ast.h @@ -0,0 +1,433 @@ +#ifndef DUCC_AST_H +#define DUCC_AST_H + +#include "../lib/ducc.h" +#include "io.h" + +typedef enum { + StorageClass_unspecified, + StorageClass_auto, + StorageClass_constexpr, + StorageClass_extern, + StorageClass_register, + StorageClass_static, + StorageClass_thread_local, + StorageClass_typedef, +} StorageClass; + +const char* storageclass_stringify(StorageClass s); + +typedef enum { + TypeKind_unknown, + + TypeKind_void, + TypeKind_char, + TypeKind_schar, + TypeKind_uchar, + TypeKind_short, + TypeKind_ushort, + TypeKind_int, + TypeKind_uint, + TypeKind_long, + TypeKind_ulong, + TypeKind_llong, + TypeKind_ullong, + TypeKind_float, + TypeKind_double, + TypeKind_ldouble, + TypeKind_bool, + TypeKind_struct, + TypeKind_union, + TypeKind_enum, + TypeKind_ptr, + TypeKind_array, + TypeKind_func, +} TypeKind; + +const char* type_kind_stringify(TypeKind k); + +struct AstNode; +typedef struct AstNode AstNode; + +typedef struct { + AstNode* defs; + size_t index; +} TypeRef; + +typedef struct Type { + TypeKind kind; + StorageClass storage_class; + // Check `base` instead of `kind` to test if the type is an array or a pointer. + struct Type* base; + int array_size; + TypeRef ref; + struct Type* result; + AstNode* params; +} Type; + +Type* type_new(TypeKind kind); +Type* type_dup(Type* src); +Type* type_new_ptr(Type* base); +Type* type_new_array(Type* elem, int size); +Type* type_new_static_string(int len); +Type* type_array_to_ptr(Type* ty); +Type* type_new_func(Type* result, AstNode* params); +bool type_is_unsized(Type* ty); +bool type_is_unsigned(Type* ty); + +int type_sizeof_struct(Type* ty); +int type_sizeof_union(Type* ty); +int type_alignof_struct(Type* ty); +int type_alignof_union(Type* ty); +int type_offsetof(Type* ty, const char* name); +Type* type_member_typeof(Type* ty, const char* name); + +int type_sizeof(Type* ty); +int type_alignof(Type* ty); + +int to_aligned(int n, int a); + +typedef enum { + AstNodeKind_unknown, + AstNodeKind_nop, + + AstNodeKind_array_initializer, + AstNodeKind_assign_expr, + AstNodeKind_binary_expr, + AstNodeKind_break_stmt, + AstNodeKind_case_label, + AstNodeKind_cast_expr, + AstNodeKind_cond_expr, + AstNodeKind_continue_stmt, + AstNodeKind_default_label, + AstNodeKind_deref_expr, + AstNodeKind_do_while_stmt, + AstNodeKind_enum_def, + AstNodeKind_enum_member, + AstNodeKind_expr_stmt, + AstNodeKind_for_stmt, + AstNodeKind_func, + AstNodeKind_func_call, + AstNodeKind_func_decl, + AstNodeKind_func_def, + AstNodeKind_goto_stmt, + AstNodeKind_gvar, + AstNodeKind_gvar_decl, + AstNodeKind_if_stmt, + AstNodeKind_int_expr, + AstNodeKind_double_expr, + AstNodeKind_label_stmt, + AstNodeKind_list, + AstNodeKind_logical_expr, + AstNodeKind_lvar, + AstNodeKind_lvar_decl, + AstNodeKind_param, + AstNodeKind_ref_expr, + AstNodeKind_return_stmt, + AstNodeKind_str_expr, + AstNodeKind_struct_decl, + AstNodeKind_struct_def, + AstNodeKind_struct_member, + AstNodeKind_switch_stmt, + AstNodeKind_type, + AstNodeKind_typedef_decl, + AstNodeKind_unary_expr, + AstNodeKind_union_decl, + AstNodeKind_union_def, + + // Intermediate ASTs: they are used only in parsing, not for parse result. + AstNodeKind_declarator, +} AstNodeKind; + +const char* astnode_kind_stringify(AstNodeKind k); + +// Expression nodes +typedef struct { + int value; +} IntExprNode; + +typedef struct { + double value; +} DoubleExprNode; + +typedef struct { + int idx; +} StrExprNode; + +typedef struct { + int op; + AstNode* operand; +} UnaryExprNode; + +typedef struct { + int op; + AstNode* lhs; + AstNode* rhs; +} BinaryExprNode; + +typedef struct { + int op; + AstNode* lhs; + AstNode* rhs; +} LogicalExprNode; + +typedef struct { + int op; + AstNode* lhs; + AstNode* rhs; +} AssignExprNode; + +typedef struct { + AstNode* operand; +} CastExprNode; + +typedef struct { + AstNode* cond; + AstNode* then; + AstNode* else_; +} CondExprNode; + +typedef struct { + AstNode* operand; +} DerefExprNode; + +typedef struct { + AstNode* operand; +} RefExprNode; + +typedef struct { + AstNode* func; + AstNode* args; +} FuncCallNode; + +// Statement nodes +typedef struct { + AstNode* cond; + AstNode* then; + AstNode* else_; +} IfStmtNode; + +typedef struct { + AstNode* init; + AstNode* cond; + AstNode* update; + AstNode* body; +} ForStmtNode; + +typedef struct { + AstNode* cond; + AstNode* body; +} DoWhileStmtNode; + +typedef struct { + AstNode* expr; + AstNode* body; +} SwitchStmtNode; + +typedef struct { + int value; + AstNode* body; +} CaseLabelNode; + +typedef struct { + AstNode* body; +} DefaultLabelNode; + +typedef struct { + const char* name; + AstNode* body; +} LabelStmtNode; + +typedef struct { + AstNode* expr; +} ReturnStmtNode; + +typedef struct { + const char* label; +} GotoStmtNode; + +typedef struct { + AstNode* expr; +} ExprStmtNode; + +// Declaration nodes +typedef struct { + const char* name; + AstNode* params; + AstNode* body; + int stack_size; +} FuncDefNode; + +typedef struct { + const char* name; + int stack_offset; +} LvarNode; + +typedef struct { + AstNode* expr; +} LvarDeclNode; + +typedef struct { + const char* name; + int stack_offset; +} ParamNode; + +typedef struct { + const char* name; + AstNode* expr; +} GvarDeclNode; + +typedef struct { + const char* name; + AstNode* members; +} StructDefNode; + +typedef struct { + const char* name; + AstNode* members; +} UnionDefNode; + +typedef struct { + const char* name; + AstNode* members; +} EnumDefNode; + +typedef struct { + const char* name; + int value; +} EnumMemberNode; + +typedef struct { + const char* name; + AstNode* init; +} DeclaratorNode; + +typedef struct { + const char* name; +} FuncNode; + +typedef struct { + const char* name; +} GvarNode; + +typedef struct { + const char* name; + bool is_bitfield; + int bitfield_offset; + int bitfield_width; +} StructMemberNode; + +typedef struct { + const char* name; +} TypedefDeclNode; + +typedef struct { + AstNode* list; +} ArrayInitializerNode; + +typedef struct { + AstNode* items; + int len; + int cap; +} ListNode; + +struct AstNode { + AstNodeKind kind; + // TODO: currently only limited set of ast nodes have loc field. + SourceLocation loc; + Type* ty; + union { + IntExprNode* int_expr; + DoubleExprNode* double_expr; + StrExprNode* str_expr; + UnaryExprNode* unary_expr; + BinaryExprNode* binary_expr; + LogicalExprNode* logical_expr; + AssignExprNode* assign_expr; + CastExprNode* cast_expr; + CondExprNode* cond_expr; + DerefExprNode* deref_expr; + RefExprNode* ref_expr; + FuncCallNode* func_call; + IfStmtNode* if_stmt; + ForStmtNode* for_stmt; + DoWhileStmtNode* do_while_stmt; + SwitchStmtNode* switch_stmt; + CaseLabelNode* case_label; + DefaultLabelNode* default_label; + LabelStmtNode* label_stmt; + ReturnStmtNode* return_stmt; + GotoStmtNode* goto_stmt; + ExprStmtNode* expr_stmt; + FuncDefNode* func_def; + LvarNode* lvar; + LvarDeclNode* lvar_decl; + ParamNode* param; + GvarDeclNode* gvar_decl; + StructDefNode* struct_def; + UnionDefNode* union_def; + EnumDefNode* enum_def; + EnumMemberNode* enum_member; + DeclaratorNode* declarator; + FuncNode* func; + GvarNode* gvar; + StructMemberNode* struct_member; + TypedefDeclNode* typedef_decl; + ArrayInitializerNode* array_initializer; + ListNode* list; + } as; +}; + +typedef struct { + AstNode* funcs; + AstNode* vars; + const char** str_literals; +} Program; + +AstNode* ast_new(AstNodeKind kind); +AstNode* ast_new_list(int capacity); +void ast_append(AstNode* list, AstNode* item); +AstNode* ast_new_int(int v); +AstNode* ast_new_double(double v); +AstNode* ast_new_unary_expr(int op, AstNode* operand); +AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs); + +AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs); +AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs); +AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs); +AstNode* ast_new_ref_expr(AstNode* operand); +AstNode* ast_new_deref_expr(AstNode* operand); +AstNode* ast_new_member_access_expr(AstNode* obj, const char* name); +AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty); +AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs); +AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_); +AstNode* ast_new_str_expr(int idx, Type* ty); +AstNode* ast_new_func_call(AstNode* func, AstNode* args); +AstNode* ast_new_func(const char* name, Type* ty); +AstNode* ast_new_gvar(const char* name, Type* ty); +AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty); + +AstNode* ast_new_nop(void); +AstNode* ast_new_break_stmt(void); +AstNode* ast_new_continue_stmt(void); +AstNode* ast_new_return_stmt(AstNode* expr); +AstNode* ast_new_expr_stmt(AstNode* expr); +AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_); +AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body); +AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body); +AstNode* ast_new_switch_stmt(AstNode* expr); +AstNode* ast_new_case_label(int value, AstNode* body); +AstNode* ast_new_default_label(AstNode* body); +AstNode* ast_new_goto_stmt(const char* label); +AstNode* ast_new_label_stmt(const char* name, AstNode* body); + +AstNode* ast_new_declarator(const char* name, Type* ty); +AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size); +AstNode* ast_new_enum_member(const char* name, int value); +AstNode* ast_new_typedef_decl(const char* name, Type* ty); +AstNode* ast_new_struct_def(const char* name); +AstNode* ast_new_union_def(const char* name); +AstNode* ast_new_enum_def(const char* name); +AstNode* ast_new_array_initializer(AstNode* list); + +#endif diff --git a/src/cc1/codegen.c b/src/cc1/codegen.c new file mode 100644 index 0000000..a08f6ad --- /dev/null +++ b/src/cc1/codegen.c @@ -0,0 +1,925 @@ +#include "codegen.h" +#include <stdlib.h> +#include <string.h> +#include "../lib/common.h" +#include "parse.h" +#include "preprocess.h" + +typedef enum { + GenMode_lval, + GenMode_rval, +} GenMode; + +typedef struct { + Program* prog; + FILE* out; + int next_label; + int* loop_labels; + AstNode* current_func; + int switch_label; +} CodeGen; + +static CodeGen* codegen_new(Program* prog, FILE* out) { + CodeGen* g = calloc(1, sizeof(CodeGen)); + g->prog = prog; + g->out = out; + g->next_label = 1; + g->loop_labels = calloc(1024, sizeof(int)); + g->switch_label = -1; + return g; +} + +static int codegen_new_label(CodeGen* g) { + int new_label = g->next_label; + ++g->next_label; + return new_label; +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); +static void codegen_stmt(CodeGen* g, AstNode* ast); + +static const char* param_reg(int n) { + if (n == 0) { + return "rdi"; + } else if (n == 1) { + return "rsi"; + } else if (n == 2) { + return "rdx"; + } else if (n == 3) { + return "rcx"; + } else if (n == 4) { + return "r8"; + } else if (n == 5) { + return "r9"; + } else { + unreachable(); + } +} + +static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) { + fprintf(g->out, " push rbp\n"); + fprintf(g->out, " mov rbp, rsp\n"); + for (int i = 0, j = 0; i < func_def->params->as.list->len; ++i) { + AstNode* param = &func_def->params->as.list->items[i]; + if (param->as.param->stack_offset >= 0) { + fprintf(g->out, " push %s\n", param_reg(j++)); + } + } + // Note: rsp must be aligned to 8. + fprintf(g->out, " sub rsp, %d\n", to_aligned(func_def->stack_size, 8)); +} + +static void codegen_func_epilogue(CodeGen* g) { + fprintf(g->out, " mov rsp, rbp\n"); + fprintf(g->out, " pop rbp\n"); + fprintf(g->out, " ret\n"); +} + +static void codegen_int_expr(CodeGen* g, IntExprNode* expr) { + fprintf(g->out, " mov rax, %d\n", expr->value); +} + +static void codegen_str_expr(CodeGen* g, StrExprNode* expr) { + fprintf(g->out, " lea rax, .Lstr__%d[rip]\n", expr->idx); +} + +static void codegen_unary_expr(CodeGen* g, UnaryExprNode* expr) { + codegen_expr(g, expr->operand, GenMode_rval); + if (expr->op == TokenKind_not) { + fprintf(g->out, " mov rdi, 0\n"); + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " sete al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_tilde) { + fprintf(g->out, " not rax\n"); + } else { + unreachable(); + } +} + +static void codegen_ref_expr(CodeGen* g, RefExprNode* expr) { + codegen_expr(g, expr->operand, GenMode_lval); +} + +static void codegen_lval2rval(CodeGen* g, Type* ty) { + if (ty->kind == TypeKind_array || ty->kind == TypeKind_func) { + return; + } + + int size = type_sizeof(ty); + if (size == 1) { + fprintf(g->out, " movsx rax, BYTE PTR [rax]\n"); + } else if (size == 2) { + fprintf(g->out, " movsx rax, WORD PTR [rax]\n"); + } else if (size == 4) { + fprintf(g->out, " movsxd rax, DWORD PTR [rax]\n"); + } else if (size == 8) { + fprintf(g->out, " mov rax, [rax]\n"); + } else { + // Do nothing. + } +} + +static void codegen_deref_expr(CodeGen* g, DerefExprNode* expr, GenMode gen_mode) { + codegen_expr(g, expr->operand, GenMode_rval); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(g, expr->operand->ty->base); + } +} + +static void codegen_cast_expr(CodeGen* g, CastExprNode* expr, Type* ty) { + codegen_expr(g, expr->operand, GenMode_rval); + + // (void) cast does nothing. + if (ty->kind == TypeKind_void) + return; + + int src_size = type_sizeof(expr->operand->ty); + int dst_size = type_sizeof(ty); + + if (src_size == dst_size) + return; + + if (dst_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else if (dst_size == 2) { + if (src_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else { + fprintf(g->out, " movsx rax, ax\n"); + } + } else if (dst_size == 4) { + if (src_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else if (src_size == 2) { + fprintf(g->out, " movsx rax, ax\n"); + } else { + fprintf(g->out, " movsxd rax, eax\n"); + } + } else if (dst_size == 8) { + if (src_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else if (src_size == 2) { + fprintf(g->out, " movsx rax, ax\n"); + } else if (src_size == 4) { + fprintf(g->out, " movsxd rax, eax\n"); + } + } +} + +static void codegen_logical_expr(CodeGen* g, LogicalExprNode* expr) { + int label = codegen_new_label(g); + + if (expr->op == TokenKind_andand) { + codegen_expr(g, expr->lhs, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + codegen_expr(g, expr->rhs, GenMode_rval); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + fprintf(g->out, " mov rax, 0\n"); + fprintf(g->out, ".Lend%d:\n", label); + } else { + codegen_expr(g, expr->lhs, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + fprintf(g->out, " mov rax, 1\n"); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + codegen_expr(g, expr->rhs, GenMode_rval); + fprintf(g->out, ".Lend%d:\n", label); + } +} + +static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) { + codegen_expr(g, expr->lhs, gen_mode); + fprintf(g->out, " push rax\n"); + codegen_expr(g, expr->rhs, gen_mode); + fprintf(g->out, " mov rdi, rax\n"); + fprintf(g->out, " pop rax\n"); + // rax=lhs, rdi=rhs + if (expr->op == TokenKind_plus) { + fprintf(g->out, " add rax, rdi\n"); + } else if (expr->op == TokenKind_minus) { + fprintf(g->out, " sub rax, rdi\n"); + } else if (expr->op == TokenKind_star) { + fprintf(g->out, " imul rax, rdi\n"); + } else if (expr->op == TokenKind_slash) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + } else if (expr->op == TokenKind_percent) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + fprintf(g->out, " mov rax, rdx\n"); + } else if (expr->op == TokenKind_and) { + fprintf(g->out, " and rax, rdi\n"); + } else if (expr->op == TokenKind_or) { + fprintf(g->out, " or rax, rdi\n"); + } else if (expr->op == TokenKind_xor) { + fprintf(g->out, " xor rax, rdi\n"); + } else if (expr->op == TokenKind_lshift) { + fprintf(g->out, " mov rcx, rdi\n"); + fprintf(g->out, " shl rax, cl\n"); + } else if (expr->op == TokenKind_rshift) { + fprintf(g->out, " mov rcx, rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " shr rax, cl\n"); + } else { + fprintf(g->out, " sar rax, cl\n"); + } + } else if (expr->op == TokenKind_eq) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " sete al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_ne) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " setne al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_lt) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " setl al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_le) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " setle al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else { + unreachable(); + } +} + +static void codegen_cond_expr(CodeGen* g, CondExprNode* expr, GenMode gen_mode) { + int label = codegen_new_label(g); + + codegen_expr(g, expr->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + codegen_expr(g, expr->then, gen_mode); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + codegen_expr(g, expr->else_, gen_mode); + fprintf(g->out, ".Lend%d:\n", label); +} + +static void codegen_assign_expr_helper(CodeGen* g, AssignExprNode* expr) { + if (expr->op == TokenKind_assign) { + return; + } + + fprintf(g->out, " mov rdi, rax\n"); + fprintf(g->out, " mov rax, [rsp]\n"); + codegen_lval2rval(g, expr->lhs->ty); + + if (expr->op == TokenKind_assign_add) { + fprintf(g->out, " add rax, rdi\n"); + } else if (expr->op == TokenKind_assign_sub) { + fprintf(g->out, " sub rax, rdi\n"); + } else if (expr->op == TokenKind_assign_mul) { + fprintf(g->out, " imul rax, rdi\n"); + } else if (expr->op == TokenKind_assign_div) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + } else if (expr->op == TokenKind_assign_mod) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + fprintf(g->out, " mov rax, rdx\n"); + } else if (expr->op == TokenKind_assign_or) { + fprintf(g->out, " or rax, rdi\n"); + } else if (expr->op == TokenKind_assign_and) { + fprintf(g->out, " and rax, rdi\n"); + } else if (expr->op == TokenKind_assign_xor) { + fprintf(g->out, " xor rax, rdi\n"); + } else if (expr->op == TokenKind_assign_lshift) { + fprintf(g->out, " mov rcx, rdi\n"); + fprintf(g->out, " shl rax, cl\n"); + } else if (expr->op == TokenKind_assign_rshift) { + fprintf(g->out, " mov rcx, rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " shr rax, cl\n"); + } else { + fprintf(g->out, " sar rax, cl\n"); + } + } else { + unreachable(); + } +} + +static void codegen_assign_expr(CodeGen* g, AssignExprNode* expr) { + int sizeof_lhs = type_sizeof(expr->lhs->ty); + + codegen_expr(g, expr->lhs, GenMode_lval); + fprintf(g->out, " push rax\n"); + codegen_expr(g, expr->rhs, GenMode_rval); + codegen_assign_expr_helper(g, expr); + fprintf(g->out, " pop rdi\n"); + if (sizeof_lhs == 1) { + fprintf(g->out, " mov BYTE PTR [rdi], al\n"); + } else if (sizeof_lhs == 2) { + fprintf(g->out, " mov WORD PTR [rdi], ax\n"); + } else if (sizeof_lhs == 4) { + fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); + } else if (sizeof_lhs == 8) { + fprintf(g->out, " mov [rdi], rax\n"); + } else { + if (expr->op != TokenKind_assign) { + unimplemented(); + } + // rax: address of rhs + // rdi: address of lhs + // Perform byte-wise copy. Use r10 register as temporary space. + for (int i = 0; i < sizeof_lhs; ++i) { + fprintf(g->out, " mov r10b, %d[rax]\n", i); + fprintf(g->out, " mov %d[rdi], r10b\n", i); + } + } +} + +static void codegen_args(CodeGen* g, AstNode* args) { + int* required_gp_regs_for_each_arg = calloc(args->as.list->len, sizeof(int)); + + int gp_regs = 6; + for (int i = 0; i < args->as.list->len; ++i) { + AstNode* arg = &args->as.list->items[i]; + int ty_size = type_sizeof(arg->ty); + // TODO + if (arg->ty->kind == TypeKind_array) { + ty_size = 8; + } + int required_gp_regs; + if (ty_size <= 8) { + required_gp_regs = 1; + } else if (ty_size <= 16) { + required_gp_regs = 2; + } else { + required_gp_regs = 0; + } + if (required_gp_regs <= gp_regs) { + gp_regs -= required_gp_regs; + } else { + required_gp_regs = 0; + } + required_gp_regs_for_each_arg[i] = required_gp_regs; + } + + // Evaluate arguments in the reverse order (right to left). + // Arguments passed by stack. + for (int i = args->as.list->len - 1; i >= 0; --i) { + AstNode* arg = &args->as.list->items[i]; + if (required_gp_regs_for_each_arg[i] == 0) { + codegen_expr(g, arg, GenMode_rval); + int ty_size = type_sizeof(arg->ty); + if (ty_size <= 8) { + fprintf(g->out, " push rax\n"); + } else { + // Perform byte-wise copy onto stack. Use r10 register as temporary space. + // NOTE: rsp must be aligned to 8. + fprintf(g->out, " sub rsp, %d\n", to_aligned(ty_size, 8)); + for (int i = 0; i < ty_size; ++i) { + // Copy a sinle byte from the address that rax points to to the stack via r10 register. + fprintf(g->out, " mov r10b, %d[rax]\n", i); + fprintf(g->out, " mov %d[rsp], r10b\n", i); + } + } + } + } + // Arguments passed by registers. + for (int i = args->as.list->len - 1; i >= 0; --i) { + AstNode* arg = &args->as.list->items[i]; + if (required_gp_regs_for_each_arg[i] != 0) { + codegen_expr(g, arg, GenMode_rval); + if (required_gp_regs_for_each_arg[i] == 1) { + fprintf(g->out, " push rax\n"); + } else { + assert(required_gp_regs_for_each_arg[i] == 2); + fprintf(g->out, " push [rax]\n"); + fprintf(g->out, " push [rax+8]\n"); + } + } + } + // Pop pushed arguments onto registers. + for (int i = 0, j = 0; i < args->as.list->len; ++i) { + for (int k = 0; k < required_gp_regs_for_each_arg[i]; ++k) { + fprintf(g->out, " pop %s\n", param_reg(j++)); + } + } +} + +static void codegen_func_call(CodeGen* g, FuncCallNode* call) { + const char* func_name = NULL; + if (call->func->kind == AstNodeKind_func) { + func_name = call->func->as.func->name; + } + + if (func_name && strcmp(func_name, "__ducc_va_start") == 0) { + fprintf(g->out, " # __ducc_va_start BEGIN\n"); + AstNode* va_list_args = &call->args->as.list->items[0]; + codegen_expr(g, va_list_args, GenMode_rval); + fprintf(g->out, " mov rdi, rax\n"); + + // Allocate save area. + fprintf(g->out, " sub rsp, 48\n"); + + fprintf(g->out, " mov [rsp+ 0], rdi\n"); + fprintf(g->out, " mov [rsp+ 8], rsi\n"); + fprintf(g->out, " mov [rsp+16], rdx\n"); + fprintf(g->out, " mov [rsp+24], rcx\n"); + fprintf(g->out, " mov [rsp+32], r8\n"); + fprintf(g->out, " mov [rsp+40], r9\n"); + + // Initialize va_list. + fprintf(g->out, " mov DWORD PTR [rdi], 8\n"); // gp_offset + fprintf(g->out, " mov DWORD PTR [rdi+4], 0\n"); // fp_offset + fprintf(g->out, " lea rax, [rbp+16]\n"); // overflow_arg_area + fprintf(g->out, " mov QWORD PTR [rdi+8], rax\n"); + fprintf(g->out, " mov QWORD PTR [rdi+16], rsp\n"); // reg_save_area + + fprintf(g->out, " mov rax, 0\n"); // dummy return value + fprintf(g->out, " # __ducc_va_start END\n"); + return; + } + + if (func_name && strcmp(func_name, "__ducc_va_arg") == 0) { + fprintf(g->out, " # __ducc_va_arg BEGIN\n"); + + // Evaluate va_list argument (first argument) + AstNode* va_list_arg = &call->args->as.list->items[0]; + codegen_expr(g, va_list_arg, GenMode_rval); + fprintf(g->out, " mov rdi, rax\n"); // rdi = pointer to va_list + + // Evaluate size argument (second argument) + AstNode* size_arg = &call->args->as.list->items[1]; + codegen_expr(g, size_arg, GenMode_rval); + fprintf(g->out, " mov rsi, rax\n"); // rsi = size + + int label = codegen_new_label(g); + + // Check if gp_offset < 48 (6 registers * 8 bytes) + fprintf(g->out, " mov eax, DWORD PTR [rdi]\n"); // eax = gp_offset + fprintf(g->out, " cmp eax, 48\n"); + fprintf(g->out, " jae .Lva_arg_overflow%d\n", label); + + // Fetch from register save area + fprintf(g->out, " mov rcx, QWORD PTR [rdi+16]\n"); // rcx = reg_save_area + fprintf(g->out, " movsx rdx, eax\n"); // rdx = gp_offset (sign-extended) + fprintf(g->out, " add rcx, rdx\n"); // rcx = reg_save_area + gp_offset + fprintf(g->out, " add eax, 8\n"); // gp_offset += 8 + fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); // store updated gp_offset + fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument + fprintf(g->out, " jmp .Lva_arg_end%d\n", label); + + // Fetch from overflow area (stack) + fprintf(g->out, ".Lva_arg_overflow%d:\n", label); + fprintf(g->out, " mov rcx, QWORD PTR [rdi+8]\n"); // rcx = overflow_arg_area + fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument + fprintf(g->out, " add rcx, 8\n"); // overflow_arg_area += 8 + fprintf(g->out, " mov QWORD PTR [rdi+8], rcx\n"); // store updated overflow_arg_area + + fprintf(g->out, ".Lva_arg_end%d:\n", label); + fprintf(g->out, " # __ducc_va_arg END\n"); + return; + } + + AstNode* args = call->args; + + int gp_regs = 6; + int pass_by_stack_offset = -16; + for (int i = 0; i < args->as.list->len; ++i) { + AstNode* arg = &args->as.list->items[i]; + int ty_size = type_sizeof(arg->ty); + // TODO + if (arg->ty->kind == TypeKind_array) { + ty_size = 8; + } + int required_gp_regs; + if (ty_size <= 8) { + required_gp_regs = 1; + } else if (ty_size <= 16) { + required_gp_regs = 2; + } else { + required_gp_regs = 0; + } + if (required_gp_regs <= gp_regs) { + gp_regs -= required_gp_regs; + } else { + required_gp_regs = 0; + } + if (required_gp_regs == 0) { + pass_by_stack_offset -= to_aligned(ty_size, 8); + } + } + + int label = codegen_new_label(g); + + fprintf(g->out, " mov rax, rsp\n"); + if (-pass_by_stack_offset % 16 != 0) { + fprintf(g->out, " add rax, 8\n"); + } + fprintf(g->out, " and rax, 15\n"); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Laligned%d\n", label); + + fprintf(g->out, " sub rsp, 8\n"); + codegen_args(g, args); + fprintf(g->out, " mov rax, 0\n"); + if (func_name) { + fprintf(g->out, " call %s\n", func_name); + } else { + codegen_expr(g, call->func, GenMode_rval); + fprintf(g->out, " call rax\n"); + } + fprintf(g->out, " add rsp, 8\n"); + + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Laligned%d:\n", label); + + codegen_args(g, args); + fprintf(g->out, " mov rax, 0\n"); + if (func_name) { + fprintf(g->out, " call %s\n", func_name); + } else { + codegen_expr(g, call->func, GenMode_rval); + fprintf(g->out, " call rax\n"); + } + + fprintf(g->out, ".Lend%d:\n", label); + // Pop pass-by-stack arguments. + fprintf(g->out, " add rsp, %d\n", -pass_by_stack_offset - 16); +} + +static void codegen_lvar(CodeGen* g, LvarNode* lvar, Type* ty, GenMode gen_mode) { + fprintf(g->out, " lea rax, %d[rbp]\n", -lvar->stack_offset); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(g, ty); + } +} + +static void codegen_gvar(CodeGen* g, GvarNode* gvar, Type* ty, GenMode gen_mode) { + fprintf(g->out, " lea rax, %s[rip]\n", gvar->name); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(g, ty); + } +} + +static void codegen_func_ref(CodeGen* g, FuncNode* func) { + fprintf(g->out, " lea rax, %s[rip]\n", func->name); +} + +static void codegen_composite_expr(CodeGen* g, AstNode* ast) { + // Standard C does not have composite expression, but ducc internally has. + for (int i = 0; i < ast->as.list->len; ++i) { + AstNode* expr = ast->as.list->items + i; + codegen_expr(g, expr, GenMode_rval); + } +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + if (ast->kind == AstNodeKind_int_expr) { + codegen_int_expr(g, ast->as.int_expr); + } else if (ast->kind == AstNodeKind_str_expr) { + codegen_str_expr(g, ast->as.str_expr); + } else if (ast->kind == AstNodeKind_unary_expr) { + codegen_unary_expr(g, ast->as.unary_expr); + } else if (ast->kind == AstNodeKind_ref_expr) { + codegen_ref_expr(g, ast->as.ref_expr); + } else if (ast->kind == AstNodeKind_deref_expr) { + codegen_deref_expr(g, ast->as.deref_expr, gen_mode); + } else if (ast->kind == AstNodeKind_cast_expr) { + codegen_cast_expr(g, ast->as.cast_expr, ast->ty); + } else if (ast->kind == AstNodeKind_binary_expr) { + codegen_binary_expr(g, ast->as.binary_expr, gen_mode); + } else if (ast->kind == AstNodeKind_cond_expr) { + codegen_cond_expr(g, ast->as.cond_expr, gen_mode); + } else if (ast->kind == AstNodeKind_logical_expr) { + codegen_logical_expr(g, ast->as.logical_expr); + } else if (ast->kind == AstNodeKind_assign_expr) { + codegen_assign_expr(g, ast->as.assign_expr); + } else if (ast->kind == AstNodeKind_func_call) { + codegen_func_call(g, ast->as.func_call); + } else if (ast->kind == AstNodeKind_lvar) { + codegen_lvar(g, ast->as.lvar, ast->ty, gen_mode); + } else if (ast->kind == AstNodeKind_gvar) { + codegen_gvar(g, ast->as.gvar, ast->ty, gen_mode); + } else if (ast->kind == AstNodeKind_func) { + codegen_func_ref(g, ast->as.func); + } else if (ast->kind == AstNodeKind_list) { + codegen_composite_expr(g, ast); + } else { + unreachable(); + } +} + +static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) { + if (stmt->expr) { + codegen_expr(g, stmt->expr, GenMode_rval); + } + codegen_func_epilogue(g); +} + +static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) { + int label = codegen_new_label(g); + + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + codegen_stmt(g, stmt->then); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + if (stmt->else_) { + codegen_stmt(g, stmt->else_); + } + fprintf(g->out, ".Lend%d:\n", label); +} + +static void codegen_for_stmt(CodeGen* g, ForStmtNode* stmt) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + if (stmt->init) { + codegen_expr(g, stmt->init, GenMode_rval); + } + fprintf(g->out, ".Lbegin%d:\n", label); + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lend%d\n", label); + codegen_stmt(g, stmt->body); + fprintf(g->out, ".Lcontinue%d:\n", label); + if (stmt->update) { + codegen_expr(g, stmt->update, GenMode_rval); + } + fprintf(g->out, " jmp .Lbegin%d\n", label); + fprintf(g->out, ".Lend%d:\n", label); + + --g->loop_labels; +} + +static void codegen_do_while_stmt(CodeGen* g, DoWhileStmtNode* stmt) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + fprintf(g->out, ".Lbegin%d:\n", label); + codegen_stmt(g, stmt->body); + fprintf(g->out, ".Lcontinue%d:\n", label); + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lend%d\n", label); + fprintf(g->out, " jmp .Lbegin%d\n", label); + fprintf(g->out, ".Lend%d:\n", label); + + --g->loop_labels; +} + +static void codegen_break_stmt(CodeGen* g) { + if (g->switch_label != -1) { + fprintf(g->out, " jmp .Lend%d\n", g->switch_label); + } else { + int label = *g->loop_labels; + fprintf(g->out, " jmp .Lend%d\n", label); + } +} + +static void codegen_continue_stmt(CodeGen* g) { + int label = *g->loop_labels; + fprintf(g->out, " jmp .Lcontinue%d\n", label); +} + +static void codegen_goto_stmt(CodeGen* g, GotoStmtNode* stmt) { + fprintf(g->out, " jmp .L%s__%s\n", g->current_func->as.func_def->name, stmt->label); +} + +static void codegen_label_stmt(CodeGen* g, LabelStmtNode* stmt) { + fprintf(g->out, ".L%s__%s:\n", g->current_func->as.func_def->name, stmt->name); + codegen_stmt(g, stmt->body); +} + +// Helper to collect case values from the switch body +static void collect_cases(AstNode* stmt, int* case_values, int* case_labels, int* n_cases) { + if (!stmt) + return; + + if (stmt->kind == AstNodeKind_case_label) { + case_values[*n_cases] = stmt->as.case_label->value; + case_labels[*n_cases] = *n_cases + 1; + (*n_cases)++; + collect_cases(stmt->as.case_label->body, case_values, case_labels, n_cases); + } else if (stmt->kind == AstNodeKind_default_label) { + collect_cases(stmt->as.default_label->body, case_values, case_labels, n_cases); + } else if (stmt->kind == AstNodeKind_list) { + for (int i = 0; i < stmt->as.list->len; i++) { + collect_cases(stmt->as.list->items + i, case_values, case_labels, n_cases); + } + } +} + +static bool codegen_switch_body(CodeGen* g, AstNode* stmt, int* case_values, int* case_labels, int n_cases) { + if (!stmt) + return false; + + if (stmt->kind == AstNodeKind_case_label) { + int value = stmt->as.case_label->value; + for (int i = 0; i < n_cases; i++) { + if (case_values[i] == value) { + fprintf(g->out, ".Lcase%d_%d:\n", g->switch_label, case_labels[i]); + break; + } + } + return codegen_switch_body(g, stmt->as.case_label->body, case_values, case_labels, n_cases); + } else if (stmt->kind == AstNodeKind_default_label) { + fprintf(g->out, ".Ldefault%d:\n", g->switch_label); + codegen_switch_body(g, stmt->as.default_label->body, case_values, case_labels, n_cases); + return true; + } else if (stmt->kind == AstNodeKind_list) { + bool default_label_emitted = false; + for (int i = 0; i < stmt->as.list->len; i++) { + default_label_emitted |= + codegen_switch_body(g, stmt->as.list->items + i, case_values, case_labels, n_cases); + } + return default_label_emitted; + } else { + codegen_stmt(g, stmt); + return false; + } +} + +static void codegen_switch_stmt(CodeGen* g, SwitchStmtNode* stmt) { + int switch_label = codegen_new_label(g); + int prev_switch_label = g->switch_label; + g->switch_label = switch_label; + + // Collect all case values and assign labels + int case_values[256]; + int case_labels[256]; + int n_cases = 0; + collect_cases(stmt->body, case_values, case_labels, &n_cases); + + // Generate jump instructions. + codegen_expr(g, stmt->expr, GenMode_rval); + for (int i = 0; i < n_cases; i++) { + fprintf(g->out, " cmp rax, %d\n", case_values[i]); + fprintf(g->out, " je .Lcase%d_%d\n", switch_label, case_labels[i]); + } + fprintf(g->out, " jmp .Ldefault%d\n", switch_label); + + // Generate the switch body with labels. + bool default_label_emitted = codegen_switch_body(g, stmt->body, case_values, case_labels, n_cases); + + if (!default_label_emitted) { + fprintf(g->out, ".Ldefault%d:\n", switch_label); + } + fprintf(g->out, ".Lend%d:\n", switch_label); + + g->switch_label = prev_switch_label; +} + +static void codegen_expr_stmt(CodeGen* g, ExprStmtNode* stmt) { + codegen_expr(g, stmt->expr, GenMode_rval); +} + +static void codegen_block_stmt(CodeGen* g, AstNode* ast) { + for (int i = 0; i < ast->as.list->len; ++i) { + AstNode* stmt = ast->as.list->items + i; + codegen_stmt(g, stmt); + } +} + +static void codegen_stmt(CodeGen* g, AstNode* ast) { + // TODO: support multiple files. + fprintf(g->out, " .loc 1 %d 0\n", ast->loc.line); + + if (ast->kind == AstNodeKind_list) { + codegen_block_stmt(g, ast); + } else if (ast->kind == AstNodeKind_return_stmt) { + codegen_return_stmt(g, ast->as.return_stmt); + } else if (ast->kind == AstNodeKind_if_stmt) { + codegen_if_stmt(g, ast->as.if_stmt); + } else if (ast->kind == AstNodeKind_switch_stmt) { + codegen_switch_stmt(g, ast->as.switch_stmt); + } else if (ast->kind == AstNodeKind_for_stmt) { + codegen_for_stmt(g, ast->as.for_stmt); + } else if (ast->kind == AstNodeKind_do_while_stmt) { + codegen_do_while_stmt(g, ast->as.do_while_stmt); + } else if (ast->kind == AstNodeKind_break_stmt) { + codegen_break_stmt(g); + } else if (ast->kind == AstNodeKind_continue_stmt) { + codegen_continue_stmt(g); + } else if (ast->kind == AstNodeKind_goto_stmt) { + codegen_goto_stmt(g, ast->as.goto_stmt); + } else if (ast->kind == AstNodeKind_label_stmt) { + codegen_label_stmt(g, ast->as.label_stmt); + } else if (ast->kind == AstNodeKind_expr_stmt) { + codegen_expr_stmt(g, ast->as.expr_stmt); + } else if (ast->kind == AstNodeKind_lvar_decl) { + // Do nothing. + } else if (ast->kind == AstNodeKind_nop) { + // Do nothing. + } else if (ast->kind == AstNodeKind_case_label || ast->kind == AstNodeKind_default_label) { + // They are handled by codegen_switch_stmt(). + unreachable(); + } else { + unreachable(); + } +} + +static void codegen_func(CodeGen* g, AstNode* ast) { + g->current_func = ast; + + if (ast->ty->storage_class != StorageClass_static) { + fprintf(g->out, ".globl %s\n", ast->as.func_def->name); + } + fprintf(g->out, "%s:\n", ast->as.func_def->name); + + codegen_func_prologue(g, ast->as.func_def); + codegen_stmt(g, ast->as.func_def->body); + if (strcmp(ast->as.func_def->name, "main") == 0) { + // C99: 5.1.2.2.3 + fprintf(g->out, " mov rax, 0\n"); + } + codegen_func_epilogue(g); + + fprintf(g->out, "\n"); + g->current_func = NULL; +} + +static void codegen_global_var(CodeGen* g, AstNode* var) { + if (var->ty->storage_class == StorageClass_extern) { + return; + } + if (var->ty->storage_class != StorageClass_static) { + fprintf(g->out, ".globl %s\n", var->as.gvar_decl->name); + } + fprintf(g->out, " %s:\n", var->as.gvar_decl->name); + if (!var->as.gvar_decl->expr) { + fprintf(g->out, " .zero %d\n", type_sizeof(var->ty)); + return; + } + + if (var->ty->kind == TypeKind_array && var->as.gvar_decl->expr->kind == AstNodeKind_str_expr) { + const char* str = g->prog->str_literals[var->as.gvar_decl->expr->as.str_expr->idx - 1]; + fprintf(g->out, " .string \"%s\"\n", str); + return; + } + + InitData* data = eval_init_expr(var->as.gvar_decl->expr, var->ty); + + for (size_t i = 0; i < data->len; ++i) { + InitDataBlock* block = &data->blocks[i]; + if (block->kind == InitDataBlockKind_addr) { + fprintf(g->out, " .quad %s\n", block->as.addr.label); + } else { + for (size_t j = 0; j < block->as.bytes.len; ++j) { + fprintf(g->out, " .byte %d\n", block->as.bytes.buf[j]); + } + } + } +} + +void codegen(Program* prog, const char* input_filename, FILE* out) { + CodeGen* g = codegen_new(prog, out); + + fprintf(g->out, ".intel_syntax noprefix\n\n"); + + // TODO: support multiple files. + fprintf(g->out, ".file 1 \"%s\"\n\n", input_filename); + + // For GNU ld: + // https://sourceware.org/binutils/docs/ld/Options.html + fprintf(g->out, ".section .note.GNU-stack,\"\",@progbits\n\n"); + + fprintf(g->out, ".section .rodata\n\n"); + for (int i = 0; prog->str_literals[i]; ++i) { + fprintf(g->out, ".Lstr__%d:\n", i + 1); + fprintf(g->out, " .string \"%s\"\n\n", prog->str_literals[i]); + } + + fprintf(g->out, ".data\n\n"); + for (int i = 0; i < prog->vars->as.list->len; ++i) { + codegen_global_var(g, &prog->vars->as.list->items[i]); + } + + fprintf(g->out, ".text\n\n"); + for (int i = 0; i < prog->funcs->as.list->len; ++i) { + AstNode* func = &prog->funcs->as.list->items[i]; + codegen_func(g, func); + } +} diff --git a/src/cc1/codegen.h b/src/cc1/codegen.h new file mode 100644 index 0000000..2c1d7b3 --- /dev/null +++ b/src/cc1/codegen.h @@ -0,0 +1,8 @@ +#ifndef DUCC_CODEGEN_H +#define DUCC_CODEGEN_H + +#include "ast.h" + +void codegen(Program* prog, const char* input_filename, FILE* out); + +#endif diff --git a/src/cc1/codegen_wasm.c b/src/cc1/codegen_wasm.c new file mode 100644 index 0000000..c7f8870 --- /dev/null +++ b/src/cc1/codegen_wasm.c @@ -0,0 +1,164 @@ +#include <stdlib.h> +#include <string.h> +#include "../lib/common.h" +#include "codegen.h" +#include "preprocess.h" + +typedef enum { + GenMode_lval, + GenMode_rval, +} GenMode; + +typedef struct { + FILE* out; + int next_label; + int* loop_labels; + AstNode* current_func; + int switch_label; +} CodeGen; + +static CodeGen* codegen_new(FILE* out) { + CodeGen* g = calloc(1, sizeof(CodeGen)); + g->out = out; + g->next_label = 1; + g->loop_labels = calloc(1024, sizeof(int)); + g->switch_label = -1; + return g; +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); +static void codegen_stmt(CodeGen* g, AstNode* ast); + +static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) { + for (int i = 0; i < func_def->params->as.list->len; ++i) { + fprintf(g->out, " (param $l_%s i32)", func_def->params->as.list->items[i].as.param->name); + } + fprintf(g->out, " (result i32)\n"); +} + +static void codegen_func_epilogue(CodeGen*) { +} + +static void codegen_int_expr(CodeGen* g, IntExprNode* expr) { + fprintf(g->out, " i32.const %d\n", expr->value); +} + +static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) { + codegen_expr(g, expr->lhs, gen_mode); + codegen_expr(g, expr->rhs, gen_mode); + if (expr->op == TokenKind_plus) { + fprintf(g->out, " i32.add\n"); + } else if (expr->op == TokenKind_minus) { + fprintf(g->out, " i32.sub\n"); + } else if (expr->op == TokenKind_le) { + fprintf(g->out, " i32.le_s\n"); + } else { + unreachable(); + } +} + +static void codegen_lvar(CodeGen* g, LvarNode* lvar, GenMode) { + fprintf(g->out, " local.get $l_%s\n", lvar->name); +} + +static void codegen_func_call(CodeGen* g, FuncCallNode* call) { + const char* func_name; + if (call->func->kind == AstNodeKind_func) { + func_name = call->func->as.func->name; + } else { + unimplemented(); + } + + AstNode* args = call->args; + for (int i = 0; i < args->as.list->len; ++i) { + AstNode* arg = args->as.list->items + i; + codegen_expr(g, arg, GenMode_rval); + } + fprintf(g->out, " call $%s\n", func_name); +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + if (ast->kind == AstNodeKind_int_expr) { + codegen_int_expr(g, ast->as.int_expr); + } else if (ast->kind == AstNodeKind_binary_expr) { + codegen_binary_expr(g, ast->as.binary_expr, gen_mode); + } else if (ast->kind == AstNodeKind_lvar) { + codegen_lvar(g, ast->as.lvar, gen_mode); + } else if (ast->kind == AstNodeKind_func_call) { + codegen_func_call(g, ast->as.func_call); + } else { + unreachable(); + } +} + +static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) { + if (stmt->expr) { + codegen_expr(g, stmt->expr, GenMode_rval); + } + fprintf(g->out, " return\n"); +} + +static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) { + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " (if (result i32)\n"); + fprintf(g->out, " (then\n"); + codegen_stmt(g, stmt->then); + fprintf(g->out, " )\n"); + if (stmt->else_) { + fprintf(g->out, " (else\n"); + codegen_stmt(g, stmt->else_); + fprintf(g->out, " )\n"); + } else { + fprintf(g->out, " (else\n"); + fprintf(g->out, " i32.const 0\n"); + fprintf(g->out, " )\n"); + } + fprintf(g->out, " )\n"); +} + +static void codegen_block_stmt(CodeGen* g, AstNode* ast) { + for (int i = 0; i < ast->as.list->len; ++i) { + AstNode* stmt = ast->as.list->items + i; + codegen_stmt(g, stmt); + } +} + +static void codegen_stmt(CodeGen* g, AstNode* ast) { + if (ast->kind == AstNodeKind_list) { + codegen_block_stmt(g, ast); + } else if (ast->kind == AstNodeKind_return_stmt) { + codegen_return_stmt(g, ast->as.return_stmt); + } else if (ast->kind == AstNodeKind_if_stmt) { + codegen_if_stmt(g, ast->as.if_stmt); + } else if (ast->kind == AstNodeKind_nop) { + // Do nothing. + } else { + unreachable(); + } +} + +static void codegen_func(CodeGen* g, AstNode* ast) { + g->current_func = ast; + + fprintf(g->out, "(func $%s (export \"%s\")", ast->as.func_def->name, ast->as.func_def->name); + + codegen_func_prologue(g, ast->as.func_def); + codegen_stmt(g, ast->as.func_def->body); + codegen_func_epilogue(g); + + fprintf(g->out, ")\n"); + g->current_func = NULL; +} + +void codegen_wasm(Program* prog, FILE* out) { + CodeGen* g = codegen_new(out); + + fprintf(g->out, "(module\n"); + + for (int i = 0; i < prog->funcs->as.list->len; ++i) { + AstNode* func = prog->funcs->as.list->items + i; + codegen_func(g, func); + } + + fprintf(g->out, ")\n"); +} diff --git a/src/cc1/codegen_wasm.h b/src/cc1/codegen_wasm.h new file mode 100644 index 0000000..1089154 --- /dev/null +++ b/src/cc1/codegen_wasm.h @@ -0,0 +1,8 @@ +#ifndef DUCC_CODEGEN_WASM_H +#define DUCC_CODEGEN_WASM_H + +#include "ast.h" + +void codegen_wasm(Program* prog, FILE* out); + +#endif diff --git a/src/cc1/fs.c b/src/cc1/fs.c new file mode 100644 index 0000000..c702fa4 --- /dev/null +++ b/src/cc1/fs.c @@ -0,0 +1,27 @@ +#include "fs.h" +#include <stdlib.h> +#include <string.h> + +// 'ext' must include '.'. +char* replace_extension(const char* file_name, const char* ext) { + size_t len = strlen(file_name); + const char* last_slash = strrchr(file_name, '/'); + const char* last_dot = strrchr(file_name, '.'); + + size_t ext_len = strlen(ext); + size_t base_len; + // !last_slash: foo.c + // last_slash < last_dot: ./bar/foo.c + if (last_dot && (!last_slash || last_slash < last_dot)) { + base_len = last_dot - file_name; + } else { + base_len = len; + } + + char* result = calloc(base_len + ext_len + 1, sizeof(char)); + memcpy(result, file_name, base_len); + memcpy(result + base_len, ext, ext_len); + result[base_len + ext_len] = '\0'; + + return result; +} diff --git a/src/cc1/fs.h b/src/cc1/fs.h new file mode 100644 index 0000000..b71889f --- /dev/null +++ b/src/cc1/fs.h @@ -0,0 +1,7 @@ +#ifndef DUCC_FS_H +#define DUCC_FS_H + +// 'ext' must include '.'. +char* replace_extension(const char* file_name, const char* ext); + +#endif diff --git a/src/cc1/io.c b/src/cc1/io.c new file mode 100644 index 0000000..65cd448 --- /dev/null +++ b/src/cc1/io.c @@ -0,0 +1,127 @@ +#include "io.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "../lib/common.h" +#include "../lib/json.h" + +void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "filename"); + jsonbuilder_string(builder, loc->filename); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "line"); + jsonbuilder_integer(builder, loc->line); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +InFile* infile_open(const char* filename) { + FILE* in = fopen(filename, "rb"); + if (!in) { + return NULL; + } + + size_t buf_size = 1024 * 10; + char* buf = calloc(buf_size, sizeof(char)); + char* cur = buf; + char* tmp = calloc(1024, sizeof(char)); + + while (fgets(tmp, 1024, in)) { + size_t len = strlen(tmp); + size_t used_size = cur - buf; + + if (buf_size <= used_size + len) { + size_t old_size = buf_size; + buf_size *= 2; + buf = realloc(buf, buf_size); + memset(buf + old_size, 0, buf_size - old_size); + cur = buf + used_size; + } + + memcpy(cur, tmp, len); + cur += len; + } + fclose(in); + + InFile* in_file = calloc(1, sizeof(InFile)); + in_file->buf = buf; + in_file->loc.filename = filename; + in_file->loc.line = 1; + return in_file; +} + +bool infile_eof(InFile* f) { + return f->buf[f->pos] == '\0'; +} + +char infile_peek_char(InFile* f) { + char c = f->buf[f->pos]; + + // Skip a pair of backslash and new-line. + if (c == '\\') { + char c2 = f->buf[f->pos + 1]; + // C23: 5.1.1.2 + // A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by + // a backslash character before any such splicing takes place. + if (c2 == '\0') { + fatal_error("%s:%d: <new-line> expected, but got <eof>", f->loc.filename, f->loc.line); + } + // Handle line continuation. + if (c2 == '\r') { + if (f->buf[f->pos + 2] == '\n') { + f->pos += 3; // Backslash + CRLF + } else { + f->pos += 2; // Backslash + CR + } + ++f->loc.line; + return infile_peek_char(f); + } else if (c2 == '\n') { + f->pos += 2; // Backslash + LF + ++f->loc.line; + return infile_peek_char(f); + } + } + + // Normalize new-line. + if (c == '\r') + c = '\n'; + return c; +} + +char infile_next_char(InFile* f) { + char c = infile_peek_char(f); + + if (f->buf[f->pos] == '\r') { + ++f->pos; + if (f->buf[f->pos] == '\n') { + ++f->pos; // CRLF + } + c = '\n'; + } else { + ++f->pos; + } + + if (c == '\n') + ++f->loc.line; + return c; +} + +char infile_peek_char2(InFile* f) { + int saved_pos = f->pos; + int saved_line = f->loc.line; + infile_next_char(f); + char c = infile_peek_char(f); + f->pos = saved_pos; + f->loc.line = saved_line; + return c; +} + +bool infile_consume_if(InFile* f, char expected) { + if (infile_peek_char(f) == expected) { + infile_next_char(f); + return true; + } else { + return false; + } +} diff --git a/src/cc1/io.h b/src/cc1/io.h new file mode 100644 index 0000000..be48b51 --- /dev/null +++ b/src/cc1/io.h @@ -0,0 +1,26 @@ +#ifndef DUCC_IO_H +#define DUCC_IO_H + +#include "../lib/json.h" + +typedef struct { + const char* filename; + int line; +} SourceLocation; + +void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc); + +typedef struct { + const char* buf; + int pos; + SourceLocation loc; +} InFile; + +InFile* infile_open(const char* filename); +bool infile_eof(InFile* f); +char infile_peek_char(InFile* f); +char infile_peek_char2(InFile* f); +char infile_next_char(InFile* f); +bool infile_consume_if(InFile* f, char expected); + +#endif diff --git a/src/cc1/parse.c b/src/cc1/parse.c new file mode 100644 index 0000000..52167c1 --- /dev/null +++ b/src/cc1/parse.c @@ -0,0 +1,3018 @@ +#include "parse.h" +#include "../lib/common.h" +#include "tokenize.h" + +typedef struct { + const char* name; + Type* ty; + int stack_offset; +} LocalVar; + +typedef struct { + size_t len; + size_t capacity; + LocalVar* data; +} LocalVarArray; + +static void lvars_init(LocalVarArray* lvars) { + lvars->len = 0; + lvars->capacity = 4; + lvars->data = calloc(lvars->capacity, sizeof(LocalVar)); +} + +static void lvars_reserve(LocalVarArray* lvars, size_t size) { + if (size <= lvars->capacity) + return; + while (lvars->capacity < size) { + lvars->capacity *= 2; + } + lvars->data = realloc(lvars->data, lvars->capacity * sizeof(LocalVar)); + memset(lvars->data + lvars->len, 0, (lvars->capacity - lvars->len) * sizeof(LocalVar)); +} + +static LocalVar* lvars_push_new(LocalVarArray* lvars) { + lvars_reserve(lvars, lvars->len + 1); + return &lvars->data[lvars->len++]; +} + +typedef struct { + const char* name; + int index; +} ScopedSymbol; + +typedef struct { + size_t len; + size_t capacity; + ScopedSymbol* data; +} ScopedSymbolArray; + +static void scopedsymbols_init(ScopedSymbolArray* syms) { + syms->len = 0; + syms->capacity = 4; + syms->data = calloc(syms->capacity, sizeof(ScopedSymbol)); +} + +static void scopedsymbols_reserve(ScopedSymbolArray* syms, size_t size) { + if (size <= syms->capacity) + return; + while (syms->capacity < size) { + syms->capacity *= 2; + } + syms->data = realloc(syms->data, syms->capacity * sizeof(ScopedSymbol)); + memset(syms->data + syms->len, 0, (syms->capacity - syms->len) * sizeof(ScopedSymbol)); +} + +static ScopedSymbol* scopedsymbols_push_new(ScopedSymbolArray* syms) { + scopedsymbols_reserve(syms, syms->len + 1); + return &syms->data[syms->len++]; +} + +typedef struct Scope { + struct Scope* outer; + ScopedSymbolArray syms; +} Scope; + +typedef struct { + const char* name; + Type* ty; +} GlobalVar; + +typedef struct { + size_t len; + size_t capacity; + GlobalVar* data; +} GlobalVarArray; + +static void gvars_init(GlobalVarArray* gvars) { + gvars->len = 0; + gvars->capacity = 4; + gvars->data = calloc(gvars->capacity, sizeof(GlobalVar)); +} + +static void gvars_reserve(GlobalVarArray* gvars, size_t size) { + if (size <= gvars->capacity) + return; + while (gvars->capacity < size) { + gvars->capacity *= 2; + } + gvars->data = realloc(gvars->data, gvars->capacity * sizeof(GlobalVar)); + memset(gvars->data + gvars->len, 0, (gvars->capacity - gvars->len) * sizeof(GlobalVar)); +} + +static GlobalVar* gvars_push_new(GlobalVarArray* gvars) { + gvars_reserve(gvars, gvars->len + 1); + return &gvars->data[gvars->len++]; +} + +typedef struct { + const char* name; + Type* ty; +} Func; + +typedef struct { + size_t len; + size_t capacity; + Func* data; +} FuncArray; + +static void funcs_init(FuncArray* funcs) { + funcs->len = 0; + funcs->capacity = 32; + funcs->data = calloc(funcs->capacity, sizeof(Func)); +} + +static void funcs_reserve(FuncArray* funcs, size_t size) { + if (size <= funcs->capacity) + return; + while (funcs->capacity < size) { + funcs->capacity *= 2; + } + funcs->data = realloc(funcs->data, funcs->capacity * sizeof(Func)); + memset(funcs->data + funcs->len, 0, (funcs->capacity - funcs->len) * sizeof(Func)); +} + +static Func* funcs_push_new(FuncArray* funcs) { + funcs_reserve(funcs, funcs->len + 1); + return &funcs->data[funcs->len++]; +} + +typedef struct { + TokenArray* tokens; + int pos; + LocalVarArray lvars; + Scope* scope; + GlobalVarArray gvars; + FuncArray funcs; + AstNode* structs; + AstNode* unions; + AstNode* enums; + AstNode* typedefs; + StrArray str_literals; + int anonymous_user_type_counter; + AstNode* current_switch; +} Parser; + +static Parser* parser_new(TokenArray* tokens) { + Parser* p = calloc(1, sizeof(Parser)); + p->tokens = tokens; + gvars_init(&p->gvars); + funcs_init(&p->funcs); + p->structs = ast_new_list(4); + p->unions = ast_new_list(4); + p->enums = ast_new_list(4); + p->typedefs = ast_new_list(16); + strings_init(&p->str_literals); + + Func* va_start_func = funcs_push_new(&p->funcs); + va_start_func->name = "__ducc_va_start"; + va_start_func->ty = type_new_func(type_new(TypeKind_void), NULL); + + Func* va_arg_func = funcs_push_new(&p->funcs); + va_arg_func->name = "__ducc_va_arg"; + va_arg_func->ty = type_new_func(type_new_ptr(type_new(TypeKind_void)), NULL); + + return p; +} + +static Token* peek_token(Parser* p) { + return &p->tokens->data[p->pos]; +} + +static Token* peek_token2(Parser* p) { + return &p->tokens->data[p->pos + 1]; +} + +static SourceLocation current_location(Parser* p) { + return peek_token(p)->loc; +} + +static Token* next_token(Parser* p) { + return &p->tokens->data[p->pos++]; +} + +static Token* consume_token_if(Parser* p, TokenKind expected) { + if (peek_token(p)->kind == expected) { + return next_token(p); + } else { + return NULL; + } +} + +static bool eof(Parser* p) { + return peek_token(p)->kind != TokenKind_eof; +} + +static Token* expect(Parser* p, TokenKind expected) { + Token* t = next_token(p); + if (t->kind == expected) { + return t; + } + fatal_error("%s:%d: expected '%s', but got '%s'", t->loc.filename, t->loc.line, token_kind_stringify(expected), + token_stringify(t)); +} + +static int find_lvar_in_scope(Parser*, Scope* scope, const char* name) { + for (size_t i = 0; i < scope->syms.len; ++i) { + ScopedSymbol* sym = &scope->syms.data[i]; + if (sym->name && strcmp(sym->name, name) == 0) { + return sym->index; + } + } + return -1; +} + +static int find_lvar_in_current_scope(Parser* p, const char* name) { + return find_lvar_in_scope(p, p->scope, name); +} + +static int find_lvar(Parser* p, const char* name) { + Scope* scope = p->scope; + while (scope) { + int idx = find_lvar_in_scope(p, scope, name); + if (idx != -1) + return idx; + scope = scope->outer; + } + return -1; +} + +static int calc_lvar_stack_offset(Parser* p, Type* ty) { + int last_offset = 0; + for (int i = p->lvars.len - 1; i >= 0; i--) { + int offset = p->lvars.data[i].stack_offset; + // Skip a passed-by-stack parameter. + if (offset <= 0) + continue; + last_offset = offset; + break; + } + return to_aligned(last_offset + type_sizeof(ty), type_alignof(ty)); +} + +static int add_lvar(Parser* p, const char* name, Type* ty, int stack_offset) { + LocalVar* lvar = lvars_push_new(&p->lvars); + lvar->name = name; + lvar->ty = ty; + lvar->stack_offset = stack_offset; + ScopedSymbol* sym = scopedsymbols_push_new(&p->scope->syms); + sym->name = name; + sym->index = p->lvars.len - 1; + return stack_offset; +} + +static AstNode* generate_temporary_lvar(Parser* p, Type* ty) { + int stack_offset = add_lvar(p, NULL, ty, calc_lvar_stack_offset(p, ty)); + return ast_new_lvar(NULL, stack_offset, ty); +} + +static int find_gvar(Parser* p, const char* name) { + for (size_t i = 0; i < p->gvars.len; ++i) { + if (strcmp(p->gvars.data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_func(Parser* p, const char* name) { + for (size_t i = 0; i < p->funcs.len; ++i) { + if (strcmp(p->funcs.data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_struct(Parser* p, const char* name) { + for (int i = 0; i < p->structs->as.list->len; ++i) { + if (strcmp(p->structs->as.list->items[i].as.struct_def->name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_union(Parser* p, const char* name) { + for (int i = 0; i < p->unions->as.list->len; ++i) { + if (strcmp(p->unions->as.list->items[i].as.union_def->name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_enum(Parser* p, const char* name) { + for (int i = 0; i < p->enums->as.list->len; ++i) { + if (strcmp(p->enums->as.list->items[i].as.enum_def->name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_enum_member(Parser* p, const char* name) { + for (int i = 0; i < p->enums->as.list->len; ++i) { + AstNode* members = p->enums->as.list->items[i].as.enum_def->members; + if (!members) + continue; + for (int j = 0; j < members->as.list->len; ++j) { + if (strcmp(members->as.list->items[j].as.enum_member->name, name) == 0) { + return i * 1000 + j; + } + } + } + return -1; +} + +static int find_typedef(Parser* p, const char* name) { + for (int i = 0; i < p->typedefs->as.list->len; ++i) { + if (strcmp(p->typedefs->as.list->items[i].as.typedef_decl->name, name) == 0) { + return i; + } + } + return -1; +} + +static void enter_scope(Parser* p) { + Scope* outer_scope = p->scope; + p->scope = calloc(1, sizeof(Scope)); + p->scope->outer = outer_scope; + scopedsymbols_init(&p->scope->syms); +} + +static void leave_scope(Parser* p) { + p->scope = p->scope->outer; +} + +static void enter_func(Parser* p) { + lvars_init(&p->lvars); + enter_scope(p); +} + +static void leave_func(Parser* p) { + leave_scope(p); +} + +static Token* parse_ident(Parser*); +static AstNode* parse_primary_expr(Parser*); +static AstNode* parse_postfix_expr(Parser*); +static AstNode* parse_argument_expr_list(Parser*); +static AstNode* parse_unary_expr(Parser*); +static AstNode* parse_cast_expr(Parser*); +static AstNode* parse_multiplicative_expr(Parser*); +static AstNode* parse_additive_expr(Parser*); +static AstNode* parse_shift_expr(Parser*); +static AstNode* parse_relational_expr(Parser*); +static AstNode* parse_equality_expr(Parser*); +static AstNode* parse_bitwise_and_expr(Parser*); +static AstNode* parse_bitwise_xor_expr(Parser*); +static AstNode* parse_bitwise_or_expr(Parser*); +static AstNode* parse_logical_and_expr(Parser*); +static AstNode* parse_logical_or_expr(Parser*); +static AstNode* parse_conditional_expr(Parser*); +static AstNode* parse_assignment_expr(Parser*); +static AstNode* parse_expr(Parser*); +static AstNode* parse_constant_expr(Parser*); +static Type* parse_pointer_opt(Parser*, Type*); +static void parse_type_qualifier_list_opt(Parser*); +static AstNode* parse_declarator_or_abstract_declarator_opt(Parser*, Type*); +static AstNode* parse_parameter_type_list(Parser*); +static AstNode* parse_parameter_list(Parser*); +static AstNode* parse_parameter_declaration(Parser*); +static Type* parse_array_declarator_suffix(Parser*, Type*); +static Type* parse_function_declarator_suffix(Parser*, Type*); +static AstNode* parse_direct_declarator(Parser*, Type*); +static AstNode* parse_declarator(Parser*, Type*); +static AstNode* parse_init_declarator_list(Parser*, Type*); +static AstNode* parse_declaration(Parser*); +static AstNode* parse_function_definition(Parser*, AstNode*); +static Type* parse_declaration_specifiers(Parser*); +static AstNode* parse_init_declarator(Parser*, Type*); +static Type* parse_struct_specifier(Parser*); +static Type* parse_union_specifier(Parser*); +static AstNode* parse_member_declaration_list(Parser*); +static AstNode* parse_member_declaration(Parser*); +static Type* parse_specifier_qualifier_list(Parser*); +static AstNode* parse_member_declarator_list(Parser*, Type*); +static AstNode* parse_member_declarator(Parser*, Type*); +static Type* parse_enum_specifier(Parser*); +static void parse_enum_members(Parser*, int); +static AstNode* parse_enum_member(Parser*, int); +static Type* parse_type_name(Parser*); +static Type* parse_abstract_declarator_opt(Parser*, Type*); +static AstNode* parse_braced_initializer(Parser*); +static AstNode* parse_initializer(Parser*); +static AstNode* parse_attribute_specifier_sequence_opt(Parser*); +static AstNode* parse_attribute_specifier_opt(Parser*); +static AstNode* parse_attribute_list(Parser*); +static AstNode* parse_attribute(Parser*); +static AstNode* parse_attribute_argument_clause(Parser*); +static AstNode* parse_balanced_token_sequence(Parser*); +static AstNode* parse_stmt(Parser*); +static AstNode* parse_unlabaled_stmt(Parser*); +static AstNode* parse_compound_stmt(Parser*); +static AstNode* parse_block_item(Parser*); +static AstNode* parse_expr_stmt(Parser*); +static AstNode* parse_if_stmt(Parser*); +static AstNode* parse_switch_stmt(Parser*); +static AstNode* parse_while_stmt(Parser*); +static AstNode* parse_do_while_stmt(Parser*); +static AstNode* parse_for_stmt(Parser*); +static AstNode* parse_goto_stmt(Parser*); +static AstNode* parse_continue_stmt(Parser*); +static AstNode* parse_break_stmt(Parser*); +static AstNode* parse_return_stmt(Parser*); +static AstNode* parse_static_assert_declaration(Parser*); +static AstNode* parse_external_declaration(Parser*); +static int eval(AstNode*); +static void process_declarations(Parser*, AstNode*); + +static Token* parse_ident(Parser* p) { + return expect(p, TokenKind_ident); +} + +static int register_str_literal(Parser* p, const char* s) { + return strings_push(&p->str_literals, s); +} + +static void register_params(Parser* p, AstNode* params) { + int gp_regs = 6; + int pass_by_reg_offset = 8; + int pass_by_stack_offset = -16; + for (int i = 0; i < params->as.list->len; ++i) { + AstNode* param = ¶ms->as.list->items[i]; + int ty_size = type_sizeof(param->ty); + int required_gp_regs; + if (ty_size <= 8) { + required_gp_regs = 1; + } else if (ty_size <= 16) { + required_gp_regs = 2; + } else { + required_gp_regs = 0; + } + if (required_gp_regs <= gp_regs) { + gp_regs -= required_gp_regs; + } else { + required_gp_regs = 0; + } + int stack_offset; + if (required_gp_regs == 0) { + stack_offset = pass_by_stack_offset; + pass_by_stack_offset -= to_aligned(ty_size, 8); + } else { + stack_offset = pass_by_reg_offset; + pass_by_reg_offset += to_aligned(ty_size, 8); + } + const char* name = param->as.declarator->name; + param->kind = AstNodeKind_param; + param->as.param = calloc(1, sizeof(ParamNode)); + param->as.param->name = name; + param->as.param->stack_offset = stack_offset; + add_lvar(p, name, param->ty, stack_offset); + } +} + +static void register_func(Parser* p, const char* name, Type* ty) { + Func* func = funcs_push_new(&p->funcs); + func->name = name; + func->ty = ty; +} + +typedef enum { + TypeSpecifierMask_void = 1 << 0, + TypeSpecifierMask_char = 1 << 1, + TypeSpecifierMask_short = 1 << 2, + TypeSpecifierMask_int = 1 << 3, + TypeSpecifierMask_long = 1 << 4, + // 1 << 5 is used for second 'long'. + TypeSpecifierMask_float = 1 << 6, + TypeSpecifierMask_double = 1 << 7, + TypeSpecifierMask_signed = 1 << 8, + TypeSpecifierMask_unsigned = 1 << 9, + TypeSpecifierMask__BitInt = 1 << 10, + TypeSpecifierMask_bool = 1 << 11, + TypeSpecifierMask__Complex = 1 << 12, + TypeSpecifierMask__Decimal32 = 1 << 13, + TypeSpecifierMask__Decimal64 = 1 << 14, + TypeSpecifierMask__Decimal128 = 1 << 15, + TypeSpecifierMask__Atomic = 1 << 16, + TypeSpecifierMask_struct = 1 << 17, + TypeSpecifierMask_union = 1 << 18, + TypeSpecifierMask_enum = 1 << 19, + TypeSpecifierMask_typeof = 1 << 20, + TypeSpecifierMask_typeof_unqual = 1 << 21, + TypeSpecifierMask_typedef_name = 1 << 22, +} TypeSpecifierMask; + +static Type* distinguish_type_from_type_specifiers(int type_specifiers) { + if (type_specifiers == TypeSpecifierMask_void) + return type_new(TypeKind_void); + else if (type_specifiers == TypeSpecifierMask_char) + return type_new(TypeKind_char); + else if (type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_char)) + return type_new(TypeKind_schar); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_char)) + return type_new(TypeKind_uchar); + else if (type_specifiers == TypeSpecifierMask_short || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short) || + type_specifiers == (TypeSpecifierMask_short + TypeSpecifierMask_int) || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short + TypeSpecifierMask_int)) + return type_new(TypeKind_short); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short) || + type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short + TypeSpecifierMask_int)) + return type_new(TypeKind_ushort); + else if (type_specifiers == TypeSpecifierMask_int || type_specifiers == TypeSpecifierMask_signed || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_int)) + return type_new(TypeKind_int); + else if (type_specifiers == TypeSpecifierMask_unsigned || + type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_int)) + return type_new(TypeKind_uint); + else if (type_specifiers == TypeSpecifierMask_long || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_int) || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_long); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_ulong); + else if (type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int) || + type_specifiers == + (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_llong); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long) || + type_specifiers == + (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_ullong); + else if (type_specifiers == TypeSpecifierMask_float) + return type_new(TypeKind_float); + else if (type_specifiers == TypeSpecifierMask_double) + return type_new(TypeKind_double); + else if (type_specifiers == TypeSpecifierMask_long + TypeSpecifierMask_double) + return type_new(TypeKind_ldouble); + else if (type_specifiers == TypeSpecifierMask_bool) + return type_new(TypeKind_bool); + else if (type_specifiers == TypeSpecifierMask_struct) + return NULL; + else if (type_specifiers == TypeSpecifierMask_union) + return NULL; + else if (type_specifiers == TypeSpecifierMask_enum) + return NULL; + else if (type_specifiers == TypeSpecifierMask_typedef_name) + return NULL; + else if (type_specifiers == 0) + return NULL; + else + unimplemented(); +} + +// e++ +// tmp1 = &e; tmp2 = *tmp1; *tmp1 += 1; tmp2 +// e-- +// tmp1 = &e; tmp2 = *tmp1; *tmp1 -= 1; tmp2 +static AstNode* create_new_postfix_inc_or_dec(Parser* p, AstNode* e, TokenKind op) { + AstNode* tmp1_lvar = generate_temporary_lvar(p, type_new_ptr(e->ty)); + AstNode* tmp2_lvar = generate_temporary_lvar(p, e->ty); + + AstNode* expr1 = ast_new_assign_expr(TokenKind_assign, tmp1_lvar, ast_new_ref_expr(e)); + AstNode* expr2 = ast_new_assign_expr(TokenKind_assign, tmp2_lvar, ast_new_deref_expr(tmp1_lvar)); + AstNode* expr3; + if (op == TokenKind_plusplus) { + expr3 = ast_new_assign_add_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); + } else { + expr3 = ast_new_assign_sub_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); + } + AstNode* expr4 = tmp2_lvar; + + AstNode* ret = ast_new_list(4); + ast_append(ret, expr1); + ast_append(ret, expr2); + ast_append(ret, expr3); + ast_append(ret, expr4); + ret->ty = expr4->ty; + return ret; +} + +static bool is_typedef_name(Parser* p, Token* tok) { + return tok->kind == TokenKind_ident && find_typedef(p, tok->value.string) != -1; +} + +static bool is_type_token(Parser* p, Token* tok) { + if (tok->kind == TokenKind_keyword_void || tok->kind == TokenKind_keyword_char || + tok->kind == TokenKind_keyword_short || tok->kind == TokenKind_keyword_int || + tok->kind == TokenKind_keyword_long || tok->kind == TokenKind_keyword_float || + tok->kind == TokenKind_keyword_double || tok->kind == TokenKind_keyword_signed || + tok->kind == TokenKind_keyword_unsigned || tok->kind == TokenKind_keyword__BitInt || + tok->kind == TokenKind_keyword_bool || tok->kind == TokenKind_keyword__Complex || + tok->kind == TokenKind_keyword__Decimal32 || tok->kind == TokenKind_keyword__Decimal64 || + tok->kind == TokenKind_keyword__Decimal128 || tok->kind == TokenKind_keyword__Atomic || + tok->kind == TokenKind_keyword_struct || tok->kind == TokenKind_keyword_union || + tok->kind == TokenKind_keyword_enum || tok->kind == TokenKind_keyword_typeof || + tok->kind == TokenKind_keyword_typeof_unqual || tok->kind == TokenKind_keyword_const || + tok->kind == TokenKind_keyword_restrict || tok->kind == TokenKind_keyword_volatile || + tok->kind == TokenKind_keyword__Atomic || tok->kind == TokenKind_keyword_alignas || + tok->kind == TokenKind_keyword_inline || tok->kind == TokenKind_keyword__Noreturn || + tok->kind == TokenKind_keyword_static) { + return true; + } + if (tok->kind != TokenKind_ident) { + return false; + } + return find_typedef(p, tok->value.string) != -1; +} + +// primary-expr: +// identifier +// constant +// string-literal +// '(' expr ')' +// TODO generic-selection +static AstNode* parse_primary_expr(Parser* p) { + Token* t = next_token(p); + if (t->kind == TokenKind_literal_int) { + return ast_new_int(t->value.integer); + } else if (t->kind == TokenKind_literal_double) { + return ast_new_double(t->value.floating); + } else if (t->kind == TokenKind_keyword_true) { + return ast_new_int(1); + } else if (t->kind == TokenKind_keyword_false) { + return ast_new_int(0); + } else if (t->kind == TokenKind_literal_str) { + return ast_new_str_expr(register_str_literal(p, t->value.string), + type_new_static_string(strlen(t->value.string))); + } else if (t->kind == TokenKind_paren_l) { + AstNode* e = parse_expr(p); + expect(p, TokenKind_paren_r); + return e; + } else if (t->kind == TokenKind_ident) { + const char* name = t->value.string; + + int lvar_idx = find_lvar(p, name); + if (lvar_idx == -1) { + int gvar_idx = find_gvar(p, name); + if (gvar_idx == -1) { + int enum_member_idx = find_enum_member(p, name); + if (enum_member_idx == -1) { + int func_idx = find_func(p, name); + if (func_idx == -1) { + fatal_error("%s:%d: undefined variable: %s", t->loc.filename, t->loc.line, name); + } + return ast_new_func(name, p->funcs.data[func_idx].ty); + } + int enum_idx = enum_member_idx / 1000; + int n = enum_member_idx % 1000; + AstNode* e = ast_new_int( + p->enums->as.list->items[enum_idx].as.enum_def->members->as.list->items[n].as.enum_member->value); + e->ty = type_new(TypeKind_enum); + e->ty->ref.defs = p->enums; + e->ty->ref.index = enum_idx; + return e; + } + return ast_new_gvar(name, p->gvars.data[gvar_idx].ty); + } + + return ast_new_lvar(name, p->lvars.data[lvar_idx].stack_offset, p->lvars.data[lvar_idx].ty); + } else { + fatal_error("%s:%d: expected an expression, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t)); + } +} + +// postfix-expr: +// postfix-expr-stem { postfix-expr-postfix }* +// +// postfix-expr-stem: +// primary-expr +// TODO compound-literal +// +// postfix-expr-postfix: +// '[' expr ']' +// '(' argument-expr-list? ')' +// '.' identifier +// '->' identifier +// '++' +// '--' +static AstNode* parse_postfix_expr(Parser* p) { + AstNode* ret = parse_primary_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_paren_l)) { + AstNode* args = parse_argument_expr_list(p); + expect(p, TokenKind_paren_r); + ret = ast_new_func_call(ret, args); + Type* func_type = ret->as.func_call->func->ty; + if (func_type->kind == TypeKind_ptr) { + func_type = func_type->base; + } + ret->ty = func_type->result; + } else if (consume_token_if(p, TokenKind_bracket_l)) { + AstNode* idx = parse_expr(p); + expect(p, TokenKind_bracket_r); + idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->base))); + ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx)); + } else if (consume_token_if(p, TokenKind_dot)) { + const Token* name = parse_ident(p); + ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name->value.string); + } else if (consume_token_if(p, TokenKind_arrow)) { + const Token* name = parse_ident(p); + ret = ast_new_member_access_expr(ret, name->value.string); + } else if (consume_token_if(p, TokenKind_plusplus)) { + ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_plusplus); + } else if (consume_token_if(p, TokenKind_minusminus)) { + ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_minusminus); + } else { + break; + } + } + return ret; +} + +// argument-expr-list: +// { assignment-expr | ',' }+ +static AstNode* parse_argument_expr_list(Parser* p) { + AstNode* list = ast_new_list(6); + while (peek_token(p)->kind != TokenKind_paren_r) { + AstNode* arg = parse_assignment_expr(p); + ast_append(list, arg); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + return list; +} + +// unary-expr: +// postfix-expr +// '++' unary-expr +// '--' unary-expr +// unary-operator cast-expr +// 'sizeof' unary-expr +// 'sizeof' '(' type-name ')' +// TODO 'alignof' '(' type-name ')' +static AstNode* parse_unary_expr(Parser* p) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_plus)) { + return parse_cast_expr(p); + } else if (consume_token_if(p, TokenKind_minus)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_binary_expr(op, ast_new_int(0), operand); + } else if (consume_token_if(p, TokenKind_not)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_unary_expr(op, operand); + } else if (consume_token_if(p, TokenKind_tilde)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_unary_expr(op, operand); + } else if (consume_token_if(p, TokenKind_and)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_ref_expr(operand); + } else if (consume_token_if(p, TokenKind_star)) { + AstNode* operand = parse_cast_expr(p); + if (operand->ty->kind == TypeKind_func) { + // dereference operator against function pointers are no-op. + return operand; + } else { + return ast_new_deref_expr(operand); + } + } else if (consume_token_if(p, TokenKind_plusplus)) { + AstNode* operand = parse_unary_expr(p); + return ast_new_assign_add_expr(operand, ast_new_int(1)); + } else if (consume_token_if(p, TokenKind_minusminus)) { + AstNode* operand = parse_unary_expr(p); + return ast_new_assign_sub_expr(operand, ast_new_int(1)); + } else if (consume_token_if(p, TokenKind_keyword_sizeof)) { + if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) { + next_token(p); + Type* ty = parse_type_name(p); + expect(p, TokenKind_paren_r); + return ast_new_int(type_sizeof(ty)); + } else { + AstNode* expr = parse_unary_expr(p); + return ast_new_int(type_sizeof(expr->ty)); + } + } + return parse_postfix_expr(p); +} + +// cast-expr: +// unary-expr +// '(' type-name ')' cast-expr +static AstNode* parse_cast_expr(Parser* p) { + if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) { + next_token(p); + Type* ty = parse_type_name(p); + expect(p, TokenKind_paren_r); + + // TODO: check whether the original type can be casted to the result type. + AstNode* e = parse_cast_expr(p); + return ast_new_cast_expr(e, ty); + } + return parse_unary_expr(p); +} + +// multiplicative-expr: +// cast-expr { ( '*' / '/' / '%' ) cast-expr }* +static AstNode* parse_multiplicative_expr(Parser* p) { + AstNode* lhs = parse_cast_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) { + next_token(p); + AstNode* rhs = parse_cast_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// additive-expr: +// multiplicative-expr { ( '+' / '-' ) multiplicative-expr }* +static AstNode* parse_additive_expr(Parser* p) { + AstNode* lhs = parse_multiplicative_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_plus)) { + AstNode* rhs = parse_multiplicative_expr(p); + if (lhs->ty->base) { + lhs = ast_new_binary_expr( + TokenKind_plus, lhs, + ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); + } else if (rhs->ty->base) { + lhs = ast_new_binary_expr( + TokenKind_plus, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))), + rhs); + } else { + lhs = ast_new_binary_expr(TokenKind_plus, lhs, rhs); + } + } else if (consume_token_if(p, TokenKind_minus)) { + AstNode* rhs = parse_multiplicative_expr(p); + if (lhs->ty->base) { + if (rhs->ty->base) { + // (a - b) / sizeof(a) + lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(TokenKind_minus, lhs, rhs), + ast_new_int(type_sizeof(lhs->ty->base))); + } else { + // a - b*sizeof(a) + lhs = ast_new_binary_expr( + TokenKind_minus, lhs, + ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); + } + } else { + lhs = ast_new_binary_expr(TokenKind_minus, lhs, rhs); + } + } else { + break; + } + } + return lhs; +} + +// shift-expr: +// additive-expr { ( '<<' / '>>' ) additive-expr }* +static AstNode* parse_shift_expr(Parser* p) { + AstNode* lhs = parse_additive_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_lshift || op == TokenKind_rshift) { + next_token(p); + AstNode* rhs = parse_additive_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// relational-expr: +// shift-expr { ( '<' / '>' / '<=' / '>=' ) shift-expr }* +static AstNode* parse_relational_expr(Parser* p) { + AstNode* lhs = parse_shift_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_lt || op == TokenKind_le) { + next_token(p); + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else if (consume_token_if(p, TokenKind_gt)) { + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs); + } else if (consume_token_if(p, TokenKind_ge)) { + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs); + } else { + break; + } + } + return lhs; +} + +// equality-expr: +// relational-expr { ( '==' / '!=' ) relational-expr }* +static AstNode* parse_equality_expr(Parser* p) { + AstNode* lhs = parse_relational_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_eq || op == TokenKind_ne) { + next_token(p); + AstNode* rhs = parse_relational_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// bitwise-and-expr: +// equality-expr { '&' equality-expr }* +static AstNode* parse_bitwise_and_expr(Parser* p) { + AstNode* lhs = parse_equality_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_and)) { + AstNode* rhs = parse_equality_expr(p); + lhs = ast_new_binary_expr(TokenKind_and, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +// bitwise-xor-expr: +// bitwise-and-expr { '^' bitwise-and-expr }* +static AstNode* parse_bitwise_xor_expr(Parser* p) { + AstNode* lhs = parse_bitwise_and_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_xor)) { + AstNode* rhs = parse_bitwise_and_expr(p); + lhs = ast_new_binary_expr(TokenKind_xor, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +// bitwise-or-expr: +// bitwise-xor-expr { '|' bitwise-xor-expr }* +static AstNode* parse_bitwise_or_expr(Parser* p) { + AstNode* lhs = parse_bitwise_xor_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_or)) { + AstNode* rhs = parse_bitwise_xor_expr(p); + lhs = ast_new_binary_expr(TokenKind_or, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +// logical-and-expr: +// bitwise-or-expr { '&&' bitwise-or-expr }* +static AstNode* parse_logical_and_expr(Parser* p) { + AstNode* lhs = parse_bitwise_or_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_andand)) { + AstNode* rhs = parse_bitwise_or_expr(p); + lhs = ast_new_logical_expr(TokenKind_andand, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// logical-or-expr: +// logical-and-expr { '||' logical-and-expr }* +static AstNode* parse_logical_or_expr(Parser* p) { + AstNode* lhs = parse_logical_and_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_oror)) { + AstNode* rhs = parse_logical_and_expr(p); + lhs = ast_new_logical_expr(TokenKind_oror, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// conditional-expr: +// logical-or-expr ( '?' expr ':' conditional-expr )? +static AstNode* parse_conditional_expr(Parser* p) { + AstNode* e = parse_logical_or_expr(p); + if (consume_token_if(p, TokenKind_question)) { + AstNode* then_expr = parse_expr(p); + expect(p, TokenKind_colon); + AstNode* else_expr = parse_assignment_expr(p); + return ast_new_cond_expr(e, then_expr, else_expr); + } else { + return e; + } +} + +// assignment-expr: +// conditional-expr +// unary-expr assignment-operator assignment-expr +static AstNode* parse_assignment_expr(Parser* p) { + AstNode* lhs = parse_conditional_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + // TODO: check if the lhs is unary expression. + if (op == TokenKind_assign || op == TokenKind_assign_mul || op == TokenKind_assign_div || + op == TokenKind_assign_mod || op == TokenKind_assign_or || op == TokenKind_assign_and || + op == TokenKind_assign_xor || op == TokenKind_assign_lshift || op == TokenKind_assign_rshift) { + next_token(p); + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_expr(op, lhs, rhs); + } else if (consume_token_if(p, TokenKind_assign_add)) { + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_add_expr(lhs, rhs); + } else if (consume_token_if(p, TokenKind_assign_sub)) { + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_sub_expr(lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// expr: +// assignment-expr { ',' assignment-expr }* +static AstNode* parse_expr(Parser* p) { + AstNode* lhs = parse_assignment_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_comma)) { + AstNode* rhs = parse_assignment_expr(p); + AstNode* list = ast_new_list(2); + ast_append(list, lhs); + ast_append(list, rhs); + list->ty = rhs->ty; + lhs = list; + } else { + break; + } + } + return lhs; +} + +// constant-expr: +// conditional-expr +static AstNode* parse_constant_expr(Parser* p) { + return parse_conditional_expr(p); +} + +// pointer: +// { '*' TODO attribute-specifier-sequence? type-qualifier-list? }+ +static Type* parse_pointer_opt(Parser* p, Type* ty) { + while (peek_token(p)->kind == TokenKind_star) { + next_token(p); + ty = type_new_ptr(ty); + parse_type_qualifier_list_opt(p); + } + return ty; +} + +// type-qualifier-list: +// { type-qualifier }+ +static void parse_type_qualifier_list_opt(Parser* p) { + while (1) { + Token* tok = peek_token(p); + if (tok->kind == TokenKind_keyword_const || tok->kind == TokenKind_keyword_restrict || + tok->kind == TokenKind_keyword_volatile || tok->kind == TokenKind_keyword__Atomic) { + // TODO: currently type qualifiers are just ignored. + next_token(p); + } else { + break; + } + } +} + +// declarator | abstract-declarator?: +// pointer? identifier TODO attribute-specifier-sequence? { direct-declarator-suffix }* +// pointer? '(' declarator ')' { direct-declarator-suffix }* +// pointer? '(' abstract-declarator ')' { direct-declarator-suffix }* +// pointer? { direct-declarator-suffix }* +// +// direct-declarator-suffix: +// array-declarator-suffix TODO attribute-specifier-sequence? +// function-declarator-suffix TODO attribute-specifier-sequence? +static AstNode* parse_declarator_or_abstract_declarator_opt(Parser* p, Type* ty) { + ty = parse_pointer_opt(p, ty); + + AstNode* decl; + if (peek_token(p)->kind == TokenKind_ident) { + decl = ast_new_declarator(parse_ident(p)->value.string, ty); + } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) { + next_token(p); + if (peek_token(p)->kind == TokenKind_paren_r) { + Token* tok = peek_token(p); + fatal_error("%s:%d: expected declarator, but got '%s'", tok->loc.filename, tok->loc.line, + token_stringify(tok)); + } + decl = parse_declarator_or_abstract_declarator_opt(p, ty); + expect(p, TokenKind_paren_r); + } else { + decl = ast_new_declarator(NULL, ty); + } + + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + decl->ty = parse_array_declarator_suffix(p, decl->ty); + } else if (peek_token(p)->kind == TokenKind_paren_l) { + decl->ty = parse_function_declarator_suffix(p, decl->ty); + } else { + break; + } + } + + return decl; +} + +// parameter-type-list: +// parameter-list +// parameter-list ',' '...' +// '...' +static AstNode* parse_parameter_type_list(Parser* p) { + AstNode* params = parse_parameter_list(p); + + bool has_void = false; + for (int i = 0; i < params->as.list->len; ++i) { + if (params->as.list->items[i].as.declarator->name && + strcmp(params->as.list->items[i].as.declarator->name, "...") == 0) { + if (i != params->as.list->len - 1) { + fatal_error("..."); + } + --params->as.list->len; + break; + } + has_void |= params->as.list->items[i].ty->kind == TypeKind_void; + } + + if (has_void) { + if (params->as.list->len != 1) { + fatal_error("invalid use of void param"); + } + params->as.list->len = 0; + } + + return params; +} + +// parameter-list: +// { parameter-declaration | ',' } +static AstNode* parse_parameter_list(Parser* p) { + AstNode* params = ast_new_list(4); + + while (1) { + AstNode* param = parse_parameter_declaration(p); + ast_append(params, param); + + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + + return params; +} + +// parameter-declaration: +// TODO attribute-specifier-sequence? declaration-specifiers declarator +// TODO attribute-specifier-sequence? declaration-specifiers abstract-declarator? +static AstNode* parse_parameter_declaration(Parser* p) { + if (consume_token_if(p, TokenKind_ellipsis)) { + return ast_new_declarator("...", NULL); + } + + Type* base_ty = parse_declaration_specifiers(p); + return parse_declarator_or_abstract_declarator_opt(p, base_ty); +} + +// array-declarator: +// direct-declarator '[' TODO type-qualifier-list? TODO assignment-expr? ']' +// TODO direct-declarator '[' 'static' type-qualifier-list? assignment-expr? ']' +// TODO direct-declarator '[' type-qualifier-list 'static' assignment-expr? ']' +// TODO direct-declarator '[' type-qualifier-list? '*' ']' +static Type* parse_array_declarator_suffix(Parser* p, Type* ty) { + next_token(p); // skip '[' + + int size = -1; + if (peek_token(p)->kind != TokenKind_bracket_r) { + AstNode* size_expr = parse_assignment_expr(p); + size = eval(size_expr); + } + expect(p, TokenKind_bracket_r); + + return type_new_array(ty, size); +} + +// function-declarator: +// direct-declarator '(' parameter-type-list? ')' +static Type* parse_function_declarator_suffix(Parser* p, Type* ty) { + next_token(p); // skip '(' + AstNode* params; + + if (consume_token_if(p, TokenKind_paren_r)) { + params = ast_new_list(1); + } else { + params = parse_parameter_type_list(p); + expect(p, TokenKind_paren_r); + } + + return type_new_func(ty, params); +} + +// direct-declarator: +// identifier TODO attribute-specifier-sequence? +// '(' declarator ')' +// array-declarator TODO attribute-specifier-sequence? +// function-declarator TODO attribute-specifier-sequence? +static AstNode* parse_direct_declarator(Parser* p, Type* ty) { + AstNode* decl; + if (peek_token(p)->kind == TokenKind_ident) { + decl = ast_new_declarator(parse_ident(p)->value.string, ty); + } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) { + next_token(p); + // 1st pass: skip content inside parentheses. + int saved_pos = p->pos; + Type* dummy = type_new(TypeKind_void); + parse_declarator(p, dummy); + expect(p, TokenKind_paren_r); + // Parse suffixes. + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + ty = parse_array_declarator_suffix(p, ty); + } else if (peek_token(p)->kind == TokenKind_paren_l) { + ty = parse_function_declarator_suffix(p, ty); + } else { + break; + } + } + // 2nd pass: re-parse inner content. + int end_pos = p->pos; + p->pos = saved_pos; + decl = parse_declarator(p, ty); + expect(p, TokenKind_paren_r); + p->pos = end_pos; + return ast_new_declarator(decl->as.declarator->name, decl->ty); + } else { + decl = ast_new_declarator(NULL, ty); + } + + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + decl->ty = parse_array_declarator_suffix(p, decl->ty); + } else if (peek_token(p)->kind == TokenKind_paren_l) { + decl->ty = parse_function_declarator_suffix(p, decl->ty); + } else { + break; + } + } + + return ast_new_declarator(decl->as.declarator->name, decl->ty); +} + +// declarator: +// pointer? direct-declarator +static AstNode* parse_declarator(Parser* p, Type* ty) { + ty = parse_pointer_opt(p, ty); + return parse_direct_declarator(p, ty); +} + +// init-declarator-list: +// init-declarator +// init-declarator-list ',' init-declarator +static AstNode* parse_init_declarator_list(Parser* p, Type* ty) { + AstNode* list = ast_new_list(1); + while (1) { + AstNode* d = parse_init_declarator(p, ty); + ast_append(list, d); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + + if (ty->storage_class == StorageClass_typedef) { + continue; + } + // Immediately declare to allow following initializer to access previous variables. For example, + // int a = 1, b = a; + process_declarations(p, &list->as.list->items[list->as.list->len - 1]); + } + return list; +} + +// declaration: +// static_assert-declaration +// TODO attribute-specifier-sequence ';' +// TODO attribute-specifier-sequence declaration-specifiers init-declarator-list ';' +// declaration-specifiers init-declarator-list ';' +static AstNode* parse_declaration(Parser* p) { + if (peek_token(p)->kind == TokenKind_keyword_static_assert) { + return parse_static_assert_declaration(p); + } + + Type* ty = parse_declaration_specifiers(p); + AstNode* decls = parse_init_declarator_list(p, ty); + expect(p, TokenKind_semicolon); + process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]); + return decls; +} + +static void create_local_initializer(AstNode* list, AstNode* lhs, AstNode* init, Type* ty) { + if (init->kind == AstNodeKind_array_initializer) { + AstNode* items = init->as.array_initializer->list; + if (ty->kind == TypeKind_array) { + for (int i = 0; i < items->as.list->len; ++i) { + AstNode* idx = ast_new_binary_expr(TokenKind_star, ast_new_int(i), ast_new_int(type_sizeof(ty->base))); + AstNode* elem = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, lhs, idx)); + create_local_initializer(list, elem, &items->as.list->items[i], ty->base); + } + } else if (ty->kind == TypeKind_struct) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.struct_def->members; + for (int i = 0; i < items->as.list->len; ++i) { + const char* member_name = members->as.list->items[i].as.struct_member->name; + AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name); + create_local_initializer(list, member_lhs, &items->as.list->items[i], members->as.list->items[i].ty); + } + } else if (ty->kind == TypeKind_union) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.union_def->members; + const char* member_name = members->as.list->items[0].as.struct_member->name; + AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name); + create_local_initializer(list, member_lhs, &items->as.list->items[0], members->as.list->items[0].ty); + } + } else { + AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, init); + ast_append(list, ast_new_expr_stmt(assign)); + } +} + +static void process_declarations(Parser* p, AstNode* decl) { + const char* name = decl->as.declarator->name; + if (decl->ty->kind == TypeKind_func) { + // TODO: refactor + decl->ty->storage_class = decl->ty->result->storage_class; + decl->ty->result->storage_class = StorageClass_unspecified; + register_func(p, name, decl->ty); + decl->kind = AstNodeKind_func_decl; + } else { + if (type_is_unsized(decl->ty)) { + fatal_error("process_declarations: invalid type for variable"); + } + + if (p->scope) { + if (find_lvar_in_current_scope(p, name) != -1) { + // TODO: use name's location. + fatal_error("%s:%d: '%s' redeclared", peek_token(p)->loc.filename, peek_token(p)->loc.line, name); + } + int stack_offset = add_lvar(p, name, decl->ty, calc_lvar_stack_offset(p, decl->ty)); + + if (decl->as.declarator->init) { + if (decl->as.declarator->init->kind == AstNodeKind_array_initializer) { + AstNode* lhs_base = ast_new_lvar(name, stack_offset, decl->ty); + AstNode* init = decl->as.declarator->init; + AstNode* stmt_list = ast_new_list(4); + create_local_initializer(stmt_list, lhs_base, init, decl->ty); + *decl = *stmt_list; + } else { + AstNode* lhs = ast_new_lvar(name, stack_offset, decl->ty); + AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, decl->as.declarator->init); + decl->kind = AstNodeKind_expr_stmt; + decl->as.expr_stmt = calloc(1, sizeof(ExprStmtNode)); + decl->as.expr_stmt->expr = assign; + } + } else { + decl->kind = AstNodeKind_nop; + } + } else { + if (find_gvar(p, name) != -1) { + // TODO + // fatal_error("process_declarations: %s redeclared", name); + } + // TODO: refactor + Type* base_ty = decl->ty; + while (base_ty->base) { + base_ty = base_ty->base; + } + if (base_ty != decl->ty) { + decl->ty->storage_class = base_ty->storage_class; + base_ty->storage_class = StorageClass_unspecified; + } + + // TODO: refactor + if (decl->ty->kind == TypeKind_array && decl->ty->array_size == -1 && decl->as.declarator && + decl->as.declarator->init && decl->as.declarator->init->kind == AstNodeKind_array_initializer) { + decl->ty->array_size = decl->as.declarator->init->as.array_initializer->list->as.list->len; + } + + GlobalVar* gvar = gvars_push_new(&p->gvars); + gvar->name = name; + gvar->ty = decl->ty; + + if (decl->ty->storage_class == StorageClass_extern) { + decl->kind = AstNodeKind_nop; + } else { + AstNode* init_expr = decl->as.declarator ? decl->as.declarator->init : NULL; + decl->kind = AstNodeKind_gvar_decl; + decl->as.gvar_decl = calloc(1, sizeof(GvarDeclNode)); + decl->as.gvar_decl->name = name; + decl->as.gvar_decl->expr = init_expr; + } + } + } +} + +// function-definition: +// declaration-specifiers init-declarator-list compound-stmt +static AstNode* parse_function_definition(Parser* p, AstNode* decls) { + if (decls->as.list->len != 1) { + fatal_error("parse_function_definition: invalid syntax"); + } + + Type* ty = decls->as.list->items[0].ty; + Type* base_ty = ty->result; + while (base_ty->base) { + base_ty = base_ty->base; + } + ty->storage_class = base_ty->storage_class; + base_ty->storage_class = StorageClass_unspecified; + const char* name = decls->as.list->items[0].as.declarator->name; + AstNode* params = ty->params; + + register_func(p, name, ty); + enter_func(p); + register_params(p, params); + AstNode* body = parse_compound_stmt(p); + leave_func(p); + + int stack_size = 0; + if (p->lvars.len != 0) { + stack_size = p->lvars.data[p->lvars.len - 1].stack_offset + type_sizeof(p->lvars.data[p->lvars.len - 1].ty); + if (stack_size < 0) { + stack_size = 0; + } + } + return ast_new_func_def(name, ty, params, body, stack_size); +} + +static void process_typedefs(Parser* p, AstNode* decls) { + for (int i = 0; i < decls->as.list->len; ++i) { + AstNode* decl = &decls->as.list->items[i]; + + AstNode* typedef_ = ast_new_typedef_decl(decl->as.declarator->name, decl->ty); + ast_append(p->typedefs, typedef_); + } +} + +static char* generate_anonymous_name(Parser* p) { + char* buf = calloc(32, sizeof(char)); + sprintf(buf, "__anonymous_%d__", p->anonymous_user_type_counter++); + return buf; +} + +// declaration-specifiers: +// { declaration-specifier }+ TODO attribute-specifier-sequence? +// +// declaration-specifier: +// storage-class-specifier +// type-specifier-qualifier +// function-speicifier +// +// storage-class-specifier: +// 'auto' +// 'constexpr' +// 'extern' +// 'register' +// 'static' +// 'thread_local' +// 'typedef' +// +// type-specifier-qualifier: +// type-specifier +// type-qualifier +// alignment-specifier +// +// function-specifier: +// 'inline' +// '_Noreturn' +// +// type-specifier: +// 'void' +// 'char' +// 'short' +// 'int' +// 'long' +// 'float' +// 'double' +// 'signed' +// 'unsigned' +// TODO '_BitInt' '(' constant-expr ')' +// 'bool' +// '_Complex' +// '_Decimal32' +// '_Decimal64' +// '_Decimal128' +// atomic-type-specifier +// struct-or-union-specifier +// enum-specifier +// typeof-specifier +// typedef-name +// +// type-qualifier: +// 'const' +// 'restrict' +// 'volatile' +// '_Atomic' +// +// alignment-specifier: +// TODO 'alignas' '(' type-name ')' +// TODO 'alignas' '(' constant-expr ')' +// +// atomic-type-specifier: +// TODO '_Atomic' '(' type-name ')' +// +// struct-or-union-specifier: +// 'struct' ... +// 'union' ... +// +// enum-specifier: +// 'enum' ... +// +// typeof-specifier: +// TODO 'typeof' '(' typeof-specifier-argument ')' +// TODO 'typeof_unqual' '(' typeof-specifier-argument ')' +// +// typedef-name: +// identifier +static Type* parse_declaration_specifiers(Parser* p) { + StorageClass storage_class = StorageClass_unspecified; + int type_specifiers = 0; + Type* ty = NULL; + + Token* tok; + while (1) { + tok = peek_token(p); + + // storage-class-spciefier + if (tok->kind == TokenKind_keyword_auto) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_constexpr) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_extern) { + next_token(p); + storage_class = StorageClass_extern; + } else if (tok->kind == TokenKind_keyword_register) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_static) { + next_token(p); + storage_class = StorageClass_static; + } else if (tok->kind == TokenKind_keyword_thread_local) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_typedef) { + next_token(p); + storage_class = StorageClass_typedef; + } + // type-specifier-qualifier > type-specifier + else if (tok->kind == TokenKind_keyword_void) { + if (type_specifiers & TypeSpecifierMask_void) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_void; + } else if (tok->kind == TokenKind_keyword_char) { + if (type_specifiers & TypeSpecifierMask_char) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_char; + } else if (tok->kind == TokenKind_keyword_short) { + if (type_specifiers & TypeSpecifierMask_short) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_short; + } else if (tok->kind == TokenKind_keyword_int) { + if (type_specifiers & TypeSpecifierMask_int) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_int; + } else if (tok->kind == TokenKind_keyword_long) { + if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) { + fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line); + } + next_token(p); + type_specifiers += TypeSpecifierMask_long; + } else if (tok->kind == TokenKind_keyword_float) { + if (type_specifiers & TypeSpecifierMask_float) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_float; + } else if (tok->kind == TokenKind_keyword_double) { + if (type_specifiers & TypeSpecifierMask_double) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_double; + } else if (tok->kind == TokenKind_keyword_signed) { + if (type_specifiers & TypeSpecifierMask_signed) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_signed; + } else if (tok->kind == TokenKind_keyword_unsigned) { + if (type_specifiers & TypeSpecifierMask_unsigned) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_unsigned; + } else if (tok->kind == TokenKind_keyword__BitInt) { + if (type_specifiers & TypeSpecifierMask__BitInt) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__BitInt; + } else if (tok->kind == TokenKind_keyword_bool) { + if (type_specifiers & TypeSpecifierMask_bool) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_bool; + } else if (tok->kind == TokenKind_keyword__Complex) { + if (type_specifiers & TypeSpecifierMask__Complex) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Complex; + } else if (tok->kind == TokenKind_keyword__Decimal32) { + if (type_specifiers & TypeSpecifierMask__Decimal32) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal32; + } else if (tok->kind == TokenKind_keyword__Decimal64) { + if (type_specifiers & TypeSpecifierMask__Decimal64) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal64; + } else if (tok->kind == TokenKind_keyword__Decimal128) { + if (type_specifiers & TypeSpecifierMask__Decimal128) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal128; + } else if (tok->kind == TokenKind_keyword__Atomic) { + if (type_specifiers & TypeSpecifierMask__Atomic) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Atomic; + } else if (tok->kind == TokenKind_keyword_struct) { + if (type_specifiers & TypeSpecifierMask_struct) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_struct_specifier(p); + type_specifiers += TypeSpecifierMask_struct; + } else if (tok->kind == TokenKind_keyword_union) { + if (type_specifiers & TypeSpecifierMask_union) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_union_specifier(p); + type_specifiers += TypeSpecifierMask_union; + } else if (tok->kind == TokenKind_keyword_enum) { + if (type_specifiers & TypeSpecifierMask_enum) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_enum_specifier(p); + type_specifiers += TypeSpecifierMask_enum; + } else if (tok->kind == TokenKind_keyword_typeof) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_typeof_unqual) { + unimplemented(); + } else if (is_typedef_name(p, tok)) { + if (type_specifiers & TypeSpecifierMask_typedef_name) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + int typedef_idx = find_typedef(p, tok->value.string); + ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty); + type_specifiers += TypeSpecifierMask_typedef_name; + } + // type-specifier-qualifier > type-qualifier + else if (tok->kind == TokenKind_keyword_const) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword_restrict) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword_volatile) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword__Atomic) { + unimplemented(); + } + // type-specifier-qualifier > alignment-specifier + else if (tok->kind == TokenKind_keyword_alignas) { + unimplemented(); + } + // function-specifier + else if (tok->kind == TokenKind_keyword_inline) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword__Noreturn) { + // TODO + next_token(p); + } else { + break; + } + } + + Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers); + if (ty_) { + ty = ty_; + } + if (!ty) { + fatal_error("%s:%d: no type specifiers", tok->loc.filename, tok->loc.line); + } + + ty->storage_class = storage_class; + return ty; +} + +// init-declarator: +// declarator +// declarator '=' initializer +static AstNode* parse_init_declarator(Parser* p, Type* ty) { + AstNode* decl = parse_declarator(p, ty); + if (consume_token_if(p, TokenKind_assign)) { + decl->as.declarator->init = parse_initializer(p); + } + return decl; +} + +// struct-specifier: +// 'struct' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}' +// 'struct' TODO attribute-specifier-sequence? identifier +static Type* parse_struct_specifier(Parser* p) { + SourceLocation struct_kw_loc = current_location(p); + next_token(p); + + const Token* name; + char* anonymous_name = NULL; + if (peek_token(p)->kind == TokenKind_brace_l) { + anonymous_name = generate_anonymous_name(p); + Token* anonymous_token = calloc(1, sizeof(Token)); + anonymous_token->kind = TokenKind_ident; + anonymous_token->value.string = anonymous_name; + anonymous_token->loc = struct_kw_loc; + name = anonymous_token; + } else { + name = parse_ident(p); + } + + if (!consume_token_if(p, TokenKind_brace_l)) { + int struct_idx = find_struct(p, name->value.string); + if (struct_idx != -1) { + Type* ty = type_new(TypeKind_struct); + ty->ref.defs = p->structs; + ty->ref.index = struct_idx; + return ty; + } else { + // TODO + AstNode* new_struct = ast_new_struct_def(name->value.string); + ast_append(p->structs, new_struct); + struct_idx = p->structs->as.list->len - 1; + + Type* ty = type_new(TypeKind_struct); + ty->ref.defs = p->structs; + ty->ref.index = struct_idx; + return ty; + } + } + + int struct_idx = find_struct(p, name->value.string); + if (struct_idx != -1 && p->structs->as.list->items[struct_idx].as.struct_def->members) { + fatal_error("%s:%d: struct %s redefined", name->loc.filename, name->loc.line, name->value.string); + } + + if (struct_idx == -1) { + AstNode* new_struct = ast_new_struct_def(name->value.string); + ast_append(p->structs, new_struct); + struct_idx = p->structs->as.list->len - 1; + } + + AstNode* members = parse_member_declaration_list(p); + expect(p, TokenKind_brace_r); + p->structs->as.list->items[struct_idx].as.struct_def->members = members; + + Type* ty = type_new(TypeKind_struct); + ty->ref.defs = p->structs; + ty->ref.index = struct_idx; + return ty; +} + +// union-specifier: +// 'union' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}' +// 'union' TODO attribute-specifier-sequence? identifier +static Type* parse_union_specifier(Parser* p) { + SourceLocation union_kw_loc = current_location(p); + next_token(p); + + const Token* name; + char* anonymous_name = NULL; + if (peek_token(p)->kind == TokenKind_brace_l) { + anonymous_name = generate_anonymous_name(p); + Token* anonymous_token = calloc(1, sizeof(Token)); + anonymous_token->kind = TokenKind_ident; + anonymous_token->value.string = anonymous_name; + anonymous_token->loc = union_kw_loc; + name = anonymous_token; + } else { + name = parse_ident(p); + } + + if (!consume_token_if(p, TokenKind_brace_l)) { + int union_idx = find_union(p, name->value.string); + if (union_idx != -1) { + Type* ty = type_new(TypeKind_union); + ty->ref.defs = p->unions; + ty->ref.index = union_idx; + return ty; + } else { + // TODO + AstNode* new_union = ast_new_union_def(name->value.string); + ast_append(p->unions, new_union); + union_idx = p->unions->as.list->len - 1; + + Type* ty = type_new(TypeKind_union); + ty->ref.defs = p->unions; + ty->ref.index = union_idx; + return ty; + } + } + + int union_idx = find_union(p, name->value.string); + if (union_idx != -1 && p->unions->as.list->items[union_idx].as.union_def->members) { + fatal_error("%s:%d: union %s redefined", name->loc.filename, name->loc.line, name->value.string); + } + + if (union_idx == -1) { + AstNode* new_union = ast_new_union_def(name->value.string); + ast_append(p->unions, new_union); + union_idx = p->unions->as.list->len - 1; + } + + AstNode* members = parse_member_declaration_list(p); + expect(p, TokenKind_brace_r); + p->unions->as.list->items[union_idx].as.union_def->members = members; + + Type* ty = type_new(TypeKind_union); + ty->ref.defs = p->unions; + ty->ref.index = union_idx; + return ty; +} + +// member-declaration-list: +// { member-declaration }+ +static AstNode* parse_member_declaration_list(Parser* p) { + AstNode* members = ast_new_list(4); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* decls = parse_member_declaration(p); + for (int i = 0; i < decls->as.list->len; i++) { + ast_append(members, &decls->as.list->items[i]); + } + } + return members; +} + +// member-declaration: +// TODO attribute-specifier-sequence? specifier-qualifier-list member-declaration-list? ';' +// TODO static_assert-declaration +static AstNode* parse_member_declaration(Parser* p) { + Type* base_ty = parse_specifier_qualifier_list(p); + + if (consume_token_if(p, TokenKind_semicolon)) { + char* name = generate_anonymous_name(p); + AstNode* decls = ast_new_list(1); + AstNode* member = ast_new(AstNodeKind_struct_member); + member->ty = base_ty; + member->as.struct_member = calloc(1, sizeof(StructMemberNode)); + member->as.struct_member->name = name; + ast_append(decls, member); + return decls; + } + AstNode* decls = parse_member_declarator_list(p, base_ty); + expect(p, TokenKind_semicolon); + return decls; +} + +// specifier-qualifier-list: +// { type-specifier-qualifier }+ TODO attribute-specifier-sequence? +static Type* parse_specifier_qualifier_list(Parser* p) { + int type_specifiers = 0; + Type* ty = NULL; + + while (1) { + Token* tok = peek_token(p); + + // type-specifier-qualifier > type-specifier + if (tok->kind == TokenKind_keyword_void) { + if (type_specifiers & TypeSpecifierMask_void) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_void; + } else if (tok->kind == TokenKind_keyword_char) { + if (type_specifiers & TypeSpecifierMask_char) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_char; + } else if (tok->kind == TokenKind_keyword_short) { + if (type_specifiers & TypeSpecifierMask_short) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_short; + } else if (tok->kind == TokenKind_keyword_int) { + if (type_specifiers & TypeSpecifierMask_int) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_int; + } else if (tok->kind == TokenKind_keyword_long) { + if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) { + fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line); + } + next_token(p); + type_specifiers += TypeSpecifierMask_long; + } else if (tok->kind == TokenKind_keyword_float) { + if (type_specifiers & TypeSpecifierMask_float) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_float; + } else if (tok->kind == TokenKind_keyword_double) { + if (type_specifiers & TypeSpecifierMask_double) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_double; + } else if (tok->kind == TokenKind_keyword_signed) { + if (type_specifiers & TypeSpecifierMask_signed) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_signed; + } else if (tok->kind == TokenKind_keyword_unsigned) { + if (type_specifiers & TypeSpecifierMask_unsigned) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_unsigned; + } else if (tok->kind == TokenKind_keyword__BitInt) { + if (type_specifiers & TypeSpecifierMask__BitInt) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__BitInt; + } else if (tok->kind == TokenKind_keyword_bool) { + if (type_specifiers & TypeSpecifierMask_bool) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_bool; + } else if (tok->kind == TokenKind_keyword__Complex) { + if (type_specifiers & TypeSpecifierMask__Complex) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Complex; + } else if (tok->kind == TokenKind_keyword__Decimal32) { + if (type_specifiers & TypeSpecifierMask__Decimal32) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal32; + } else if (tok->kind == TokenKind_keyword__Decimal64) { + if (type_specifiers & TypeSpecifierMask__Decimal64) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal64; + } else if (tok->kind == TokenKind_keyword__Decimal128) { + if (type_specifiers & TypeSpecifierMask__Decimal128) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal128; + } else if (tok->kind == TokenKind_keyword__Atomic) { + if (type_specifiers & TypeSpecifierMask__Atomic) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Atomic; + } else if (tok->kind == TokenKind_keyword_struct) { + if (type_specifiers & TypeSpecifierMask_struct) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_struct_specifier(p); + type_specifiers += TypeSpecifierMask_struct; + } else if (tok->kind == TokenKind_keyword_union) { + if (type_specifiers & TypeSpecifierMask_union) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_union_specifier(p); + type_specifiers += TypeSpecifierMask_union; + } else if (tok->kind == TokenKind_keyword_enum) { + if (type_specifiers & TypeSpecifierMask_enum) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_enum_specifier(p); + type_specifiers += TypeSpecifierMask_enum; + } else if (tok->kind == TokenKind_keyword_typeof) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_typeof_unqual) { + unimplemented(); + } else if (is_typedef_name(p, tok)) { + if (type_specifiers & TypeSpecifierMask_typedef_name) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + int typedef_idx = find_typedef(p, tok->value.string); + ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty); + type_specifiers += TypeSpecifierMask_typedef_name; + } + // type-specifier-qualifier > type-qualifier + else if (tok->kind == TokenKind_keyword_const) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword_restrict) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_volatile) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword__Atomic) { + unimplemented(); + } + // type-specifier-qualifier > alignment-specifier + else if (tok->kind == TokenKind_keyword_alignas) { + unimplemented(); + } else { + break; + } + } + + Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers); + if (ty_) { + ty = ty_; + } + + ty->storage_class = StorageClass_unspecified; + return ty; +} + +// member-declarator-list: +// { member-declarator | ',' }+ +static AstNode* parse_member_declarator_list(Parser* p, Type* base_ty) { + AstNode* list = ast_new_list(1); + while (1) { + AstNode* d = parse_member_declarator(p, base_ty); + ast_append(list, d); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + return list; +} + +// member-declarator: +// declarator +// declarator ':' constant-expr +// TODO ':' constant-expr +static AstNode* parse_member_declarator(Parser* p, Type* base_ty) { + AstNode* decl = parse_declarator(p, base_ty); + + const char* name = decl->as.declarator->name; + decl->kind = AstNodeKind_struct_member; + decl->as.struct_member = calloc(1, sizeof(StructMemberNode)); + decl->as.struct_member->name = name; + + if (consume_token_if(p, TokenKind_colon)) { + AstNode* bit_width_node = parse_constant_expr(p); + InitData* bit_width_evaluated = eval_init_expr(bit_width_node, type_new(TypeKind_int)); + if (bit_width_evaluated->len != 1) + fatal_error("parse_member_declarator: invalid bit-field"); + if (bit_width_evaluated->blocks[0].kind != InitDataBlockKind_bytes) + fatal_error("parse_member_declarator: invalid bit-field"); + if (bit_width_evaluated->blocks[0].as.bytes.len != sizeof(int)) + fatal_error("parse_member_declarator: invalid bit-field"); + int bit_width; + memcpy(&bit_width, bit_width_evaluated->blocks[0].as.bytes.buf, sizeof(int)); + if (bit_width < 0) + fatal_error("parse_member_declarator: invalid bit-field"); + if (bit_width % 8 != 0) + fatal_error("parse_member_declarator: unimplemented"); + decl->as.struct_member->is_bitfield = true; + decl->as.struct_member->bitfield_offset = -1; // TODO + decl->as.struct_member->bitfield_width = bit_width; + } + + return decl; +} + +// enum-specifier: +// 'enum' TODO attribute-specifier-sequence? identifier? TODO enum-type-specifier? '{' enumerator-list ','? '}' +// 'enum' identifier TODO enum-type-specifier? +static Type* parse_enum_specifier(Parser* p) { + SourceLocation enum_kw_loc = current_location(p); + next_token(p); + + const Token* name; + char* anonymous_name = NULL; + if (peek_token(p)->kind == TokenKind_brace_l) { + anonymous_name = generate_anonymous_name(p); + Token* anonymous_token = calloc(1, sizeof(Token)); + anonymous_token->kind = TokenKind_ident; + anonymous_token->value.string = anonymous_name; + anonymous_token->loc = enum_kw_loc; + name = anonymous_token; + } else { + name = parse_ident(p); + } + + if (!consume_token_if(p, TokenKind_brace_l)) { + int enum_idx = find_enum(p, name->value.string); + if (enum_idx != -1) { + Type* ty = type_new(TypeKind_enum); + ty->ref.defs = p->enums; + ty->ref.index = enum_idx; + return ty; + } else { + // TODO + AstNode* new_enum = ast_new_enum_def(name->value.string); + ast_append(p->enums, new_enum); + enum_idx = p->enums->as.list->len - 1; + + Type* ty = type_new(TypeKind_enum); + ty->ref.defs = p->enums; + ty->ref.index = enum_idx; + return ty; + } + } + + int enum_idx = find_enum(p, name->value.string); + if (enum_idx != -1 && p->enums->as.list->items[enum_idx].as.enum_def->members) { + fatal_error("%s:%d: enum %s redefined", name->loc.filename, name->loc.line, name->value.string); + } + + if (enum_idx == -1) { + AstNode* new_enum = ast_new_enum_def(name->value.string); + ast_append(p->enums, new_enum); + enum_idx = p->enums->as.list->len - 1; + } + + parse_enum_members(p, enum_idx); + expect(p, TokenKind_brace_r); + + Type* ty = type_new(TypeKind_enum); + ty->ref.defs = p->enums; + ty->ref.index = enum_idx; + return ty; +} + +// enumerator-list: +// { enumerator |? ',' }+ +static void parse_enum_members(Parser* p, int enum_idx) { + int next_value = 0; + AstNode* list = ast_new_list(16); + + if (!p->enums->as.list->items[enum_idx].as.enum_def->members) { + p->enums->as.list->items[enum_idx].as.enum_def->members = list; + } + + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* member = parse_enum_member(p, next_value); + next_value = member->as.enum_member->value + 1; + + ast_append(list, member); + + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } +} + +// enumerator: +// enumeration-constant TODO attribute-specifier-sequence? ( '=' constant-expr )? +static AstNode* parse_enum_member(Parser* p, int next_value) { + const Token* name = parse_ident(p); + int value; + if (consume_token_if(p, TokenKind_assign)) { + AstNode* expr = parse_constant_expr(p); + value = eval(expr); + } else { + value = next_value; + } + return ast_new_enum_member(name->value.string, value); +} + +// type-name: +// specifier-qualifier-list abstract-declarator? +static Type* parse_type_name(Parser* p) { + Type* ty = parse_specifier_qualifier_list(p); + ty = parse_abstract_declarator_opt(p, ty); + return ty; +} + +// abstract-declarator: +// pointer +// pointer? TODO direct-abstract-declarator +// +// abstract-declarator?: +// pointer? TODO direct-abstract-declarator? +static Type* parse_abstract_declarator_opt(Parser* p, Type* ty) { + return parse_pointer_opt(p, ty); +} + +// braced-initializer: +// '{' { designation? initializer |? ',' }* '}' +static AstNode* parse_braced_initializer(Parser* p) { + AstNode* inits = ast_new_list(4); + expect(p, TokenKind_brace_l); + while (peek_token(p)->kind != TokenKind_brace_r) { + // TODO: support designation + AstNode* init = parse_initializer(p); + ast_append(inits, init); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + expect(p, TokenKind_brace_r); + return ast_new_array_initializer(inits); +} + +// initializer: +// braced-initializer +// assignment-expr +static AstNode* parse_initializer(Parser* p) { + if (peek_token(p)->kind == TokenKind_brace_l) { + return parse_braced_initializer(p); + } else { + return parse_assignment_expr(p); + } +} + +#ifndef __ducc__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif +// attribute-specifier-sequence: +// { attribute-specifier }+ +static AstNode* parse_attribute_specifier_sequence_opt(Parser* p) { + AstNode* attrs = NULL; + AstNode* attr; + while ((attr = parse_attribute_specifier_opt(p))) { + unimplemented(); + } + return attrs; +} +#ifndef __ducc__ +#pragma GCC diagnostic pop +#endif + +// attribute-specifier: +// '[' '[' attribute-list ']' ']' +static AstNode* parse_attribute_specifier_opt(Parser* p) { + if (peek_token(p)->kind != TokenKind_bracket_l || peek_token2(p)->kind != TokenKind_bracket_l) { + return NULL; + } + next_token(p); // skip '[' + next_token(p); // skip '[' + AstNode* ret = parse_attribute_list(p); + expect(p, TokenKind_bracket_r); + expect(p, TokenKind_bracket_r); + return ret; +} + +// attribute-list: +// { attribute? | ',' } +static AstNode* parse_attribute_list(Parser* p) { + while (1) { + if (peek_token(p)->kind == TokenKind_ident) { + parse_attribute(p); + } + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + unimplemented(); + return NULL; +} + +// attribute: +// attribute-token attribute-argument-clause? +// +// attribute-token: +// identifier ( '::' identifier )? +static AstNode* parse_attribute(Parser* p) { + expect(p, TokenKind_ident); + if (peek_token(p)->kind == TokenKind_colon && peek_token2(p)->kind == TokenKind_colon) { + next_token(p); + next_token(p); + expect(p, TokenKind_ident); + } + if (peek_token(p)->kind == TokenKind_paren_l) { + parse_attribute_argument_clause(p); + } + unimplemented(); + return NULL; +} + +// attribute-argument-clause: +// '(' balanced-token-sequence? ')' +static AstNode* parse_attribute_argument_clause(Parser* p) { + expect(p, TokenKind_paren_l); + if (peek_token(p)->kind != TokenKind_paren_r) { + parse_balanced_token_sequence(p); + } + expect(p, TokenKind_paren_r); + unimplemented(); + return NULL; +} + +// balanced-token-sequence: +// { balanced-token }+ +// +// balanced-token: +// '(' balanced-token-sequence? ')' +// '[' balanced-token-sequence? ']' +// '{' balanced-token-sequence? '}' +// any token other than a parenthesis, a bracket, or a brace +static AstNode* parse_balanced_token_sequence(Parser* p) { + int paren_depth = 0; + int bracket_depth = 0; + int brace_depth = 0; + while (1) { + Token* tok = peek_token(p); + if (tok->kind == TokenKind_paren_l) { + paren_depth++; + next_token(p); + } else if (tok->kind == TokenKind_paren_r) { + if (paren_depth == 0) + break; + paren_depth--; + next_token(p); + } else if (tok->kind == TokenKind_bracket_l) { + bracket_depth++; + next_token(p); + } else if (tok->kind == TokenKind_bracket_r) { + if (bracket_depth == 0) + break; + bracket_depth--; + next_token(p); + } else if (tok->kind == TokenKind_brace_l) { + brace_depth++; + next_token(p); + } else if (tok->kind == TokenKind_brace_r) { + if (brace_depth == 0) + break; + brace_depth--; + next_token(p); + } else if (tok->kind == TokenKind_eof) { + break; + } else { + next_token(p); + } + } + unimplemented(); + return NULL; +} + +// unlabaled-stmt: +// attribute-specifier-sequence? compound-stmt +// attribute-specifier-sequence? if-stmt +// attribute-specifier-sequence? switch-stmt +// attribute-specifier-sequence? while-stmt +// attribute-specifier-sequence? do-while-stmt +// attribute-specifier-sequence? for-stmt +// attribute-specifier-sequence? goto-stmt +// attribute-specifier-sequence? continue-stmt +// attribute-specifier-sequence? break-stmt +// attribute-specifier-sequence? return-stmt +// attribute-specifier-sequence? expr-stmt +static AstNode* parse_unlabaled_stmt(Parser* p) { + switch (peek_token(p)->kind) { + case TokenKind_brace_l: + return parse_compound_stmt(p); + case TokenKind_keyword_if: + return parse_if_stmt(p); + case TokenKind_keyword_switch: + return parse_switch_stmt(p); + case TokenKind_keyword_while: + return parse_while_stmt(p); + case TokenKind_keyword_do: + return parse_do_while_stmt(p); + case TokenKind_keyword_for: + return parse_for_stmt(p); + case TokenKind_keyword_goto: + return parse_goto_stmt(p); + case TokenKind_keyword_continue: + return parse_continue_stmt(p); + case TokenKind_keyword_break: + return parse_break_stmt(p); + case TokenKind_keyword_return: + return parse_return_stmt(p); + default: + return parse_expr_stmt(p); + } +} + +// label: +// TODO attribute-specifier-sequence? identifier ':' +// TODO attribute-specifier-sequence? 'case' constant-expr ':' +// TODO attribute-specifier-sequence? 'default' ':' +static AstNode* parse_label(Parser* p) { + SourceLocation loc = current_location(p); + Token* t = peek_token(p); + + AstNode* node; + if (t->kind == TokenKind_keyword_case) { + if (!p->current_switch) { + fatal_error("%s:%d: 'case' label not within a switch statement", t->loc.filename, t->loc.line); + } + expect(p, TokenKind_keyword_case); + AstNode* value = parse_constant_expr(p); + expect(p, TokenKind_colon); + node = ast_new_case_label(eval(value), ast_new_nop()); + } else if (t->kind == TokenKind_keyword_default) { + if (!p->current_switch) { + fatal_error("%s:%d: 'default' label not within a switch statement", t->loc.filename, t->loc.line); + } + expect(p, TokenKind_keyword_default); + expect(p, TokenKind_colon); + node = ast_new_default_label(ast_new_nop()); + } else { + // identifier ':' + Token* label_token = expect(p, TokenKind_ident); + expect(p, TokenKind_colon); + node = ast_new_label_stmt(label_token->value.string, ast_new_nop()); + } + node->loc = loc; + return node; +} + +static bool is_label_token(Parser* p) { + Token* t = peek_token(p); + return t->kind == TokenKind_keyword_case || t->kind == TokenKind_keyword_default || + (t->kind == TokenKind_ident && peek_token2(p)->kind == TokenKind_colon); +} + +// stmt: +// label stmt +// unlabaled-stmt +static AstNode* parse_stmt(Parser* p) { + if (is_label_token(p)) { + AstNode* label = parse_label(p); + AstNode* stmt = parse_stmt(p); + switch (label->kind) { + case AstNodeKind_case_label: + label->as.case_label->body = stmt; + break; + case AstNodeKind_default_label: + label->as.default_label->body = stmt; + break; + case AstNodeKind_label_stmt: + label->as.label_stmt->body = stmt; + break; + default: + unreachable(); + break; + } + return label; + } + return parse_unlabaled_stmt(p); +} + +// compound-stmt: +// '{' { block-item }* '}' +static AstNode* parse_compound_stmt(Parser* p) { + SourceLocation loc = current_location(p); + AstNode* list = ast_new_list(4); + expect(p, TokenKind_brace_l); + enter_scope(p); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* stmt = parse_block_item(p); + ast_append(list, stmt); + } + leave_scope(p); + expect(p, TokenKind_brace_r); + list->loc = loc; + return list; +} + +// block-item: +// declaration +// unlabaled-stmt +// label +static AstNode* parse_block_item(Parser* p) { + Token* t = peek_token(p); + + if (t->kind == TokenKind_keyword_static_assert || is_type_token(p, t)) { + return parse_declaration(p); + } else if (is_label_token(p)) { + return parse_label(p); + } else { + return parse_unlabaled_stmt(p); + } +} + +// expr-stmt: +// TODO attribute-specifier-sequence? expr? ';' +static AstNode* parse_expr_stmt(Parser* p) { + SourceLocation loc = current_location(p); + if (consume_token_if(p, TokenKind_semicolon)) { + AstNode* node = ast_new_nop(); + node->loc = loc; + return node; + } + + AstNode* e = parse_expr(p); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_expr_stmt(e); + node->loc = loc; + return node; +} + +// if-stmt: +// 'if' '(' expr ')' stmt ( 'else' stmt )? +static AstNode* parse_if_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_if); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + AstNode* then_body = parse_stmt(p); + AstNode* else_body = NULL; + if (consume_token_if(p, TokenKind_keyword_else)) { + else_body = parse_stmt(p); + } + + AstNode* node = ast_new_if_stmt(cond, then_body, else_body); + node->loc = loc; + return node; +} + +// switch-stmt: +// 'switch' '(' expr ')' stmt +static AstNode* parse_switch_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_switch); + expect(p, TokenKind_paren_l); + AstNode* expr = parse_expr(p); + expect(p, TokenKind_paren_r); + + AstNode* tmp_var = generate_temporary_lvar(p, expr->ty); + AstNode* assignment = ast_new_assign_expr(TokenKind_assign, tmp_var, expr); + AstNode* assign_stmt = ast_new_expr_stmt(assignment); + assign_stmt->loc = loc; + + AstNode* switch_stmt = ast_new_switch_stmt(tmp_var); + switch_stmt->loc = loc; + + AstNode* prev_switch = p->current_switch; + p->current_switch = switch_stmt; + switch_stmt->as.switch_stmt->body = parse_stmt(p); + p->current_switch = prev_switch; + + AstNode* list = ast_new_list(2); + list->loc = loc; + ast_append(list, assign_stmt); + ast_append(list, switch_stmt); + return list; +} + +// while-stmt: +// 'while' '(' expr ')' stmt +static AstNode* parse_while_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_while); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + AstNode* body = parse_stmt(p); + AstNode* node = ast_new_for_stmt(NULL, cond, NULL, body); + node->loc = loc; + return node; +} + +// do-while-stmt: +// 'do' stmt 'while' '(' expr ')' ';' +static AstNode* parse_do_while_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_do); + AstNode* body = parse_stmt(p); + expect(p, TokenKind_keyword_while); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + expect(p, TokenKind_semicolon); + + AstNode* node = ast_new_do_while_stmt(cond, body); + node->loc = loc; + return node; +} + +// for-stmt: +// 'for' '(' expr? ';' expr? ';' expr? ')' stmt +// 'for' '(' declaration expr? ';' expr? ')' stmt +static AstNode* parse_for_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_for); + expect(p, TokenKind_paren_l); + AstNode* init = NULL; + AstNode* cond = NULL; + AstNode* update = NULL; + enter_scope(p); + if (peek_token(p)->kind != TokenKind_semicolon) { + if (is_type_token(p, peek_token(p))) { + AstNode* decls = parse_declaration(p); + AstNode* initializers = ast_new_list(1); + for (int i = 0; i < decls->as.list->len; i++) { + AstNode* item = &decls->as.list->items[i]; + if (item->kind == AstNodeKind_expr_stmt) { + ast_append(initializers, item->as.expr_stmt->expr); + } + } + init = initializers; + } else { + init = parse_expr(p); + expect(p, TokenKind_semicolon); + } + } else { + expect(p, TokenKind_semicolon); + } + if (peek_token(p)->kind != TokenKind_semicolon) { + cond = parse_expr(p); + } else { + cond = ast_new_int(1); + } + expect(p, TokenKind_semicolon); + if (peek_token(p)->kind != TokenKind_paren_r) { + update = parse_expr(p); + } + expect(p, TokenKind_paren_r); + AstNode* body = parse_stmt(p); + leave_scope(p); + AstNode* node = ast_new_for_stmt(init, cond, update, body); + node->loc = loc; + return node; +} + +// goto-stmt: +// 'goto' identifier ';' +static AstNode* parse_goto_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_goto); + Token* label_token = expect(p, TokenKind_ident); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_goto_stmt(label_token->value.string); + node->loc = loc; + return node; +} + +// continue-stmt: +// 'continue' ';' +static AstNode* parse_continue_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_continue); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_continue_stmt(); + node->loc = loc; + return node; +} + +// break-stmt: +// 'break' ';' +static AstNode* parse_break_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_break); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_break_stmt(); + node->loc = loc; + return node; +} + +// return-stmt: +// 'return' expr? ';' +static AstNode* parse_return_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_return); + if (consume_token_if(p, TokenKind_semicolon)) { + AstNode* node = ast_new_return_stmt(NULL); + node->loc = loc; + return node; + } + + AstNode* expr = parse_expr(p); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_return_stmt(expr); + node->loc = loc; + return node; +} + +// static_assert-declaration: +// 'static_assert' '(' constant-expr ( ',' string-literal )? ')' ';' +static AstNode* parse_static_assert_declaration(Parser* p) { + expect(p, TokenKind_keyword_static_assert); + expect(p, TokenKind_paren_l); + AstNode* predicate = parse_constant_expr(p); + Token* message = NULL; + if (consume_token_if(p, TokenKind_comma)) { + message = expect(p, TokenKind_literal_str); + } + // TODO: validate the assertion. + (void)predicate; + (void)message; + expect(p, TokenKind_paren_r); + expect(p, TokenKind_semicolon); + return NULL; +} + +// external-declaration: +// static_assert-declaration +// TODO attribute-specifier-sequence ';' +// TODO attribute-specifier-sequence function-definition-or-declaration-rest +// function-definition-or-declaration-rest +// +// function-definition-or-declaration-rest: +// declaration-specifiers init-declarator-list ';' +// declaration-specifiers init-declarator-list compound-stmt +static AstNode* parse_external_declaration(Parser* p) { + if (peek_token(p)->kind == TokenKind_keyword_static_assert) { + return parse_static_assert_declaration(p); + } + + Type* ty = parse_declaration_specifiers(p); + if (consume_token_if(p, TokenKind_semicolon)) { + // Type declaration. + return NULL; + } + + AstNode* decls = parse_init_declarator_list(p, ty); + + if (peek_token(p)->kind == TokenKind_brace_l) { + return parse_function_definition(p, decls); + } + + expect(p, TokenKind_semicolon); + if (ty->storage_class == StorageClass_typedef) { + process_typedefs(p, decls); + return NULL; + } else { + process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]); + return decls; + } +} + +// translation-unit: +// { external-declaration }+ +Program* parse(TokenArray* tokens) { + Parser* p = parser_new(tokens); + AstNode* funcs = ast_new_list(32); + AstNode* vars = ast_new_list(16); + while (eof(p)) { + AstNode* n = parse_external_declaration(p); + if (!n) + continue; + if (n->kind == AstNodeKind_func_def) { + ast_append(funcs, n); + } else if (n->kind == AstNodeKind_gvar_decl) { + ast_append(vars, n); + } else if (n->kind == AstNodeKind_list) { + for (int i = 0; i < n->as.list->len; ++i) { + if (n->as.list->items[i].kind == AstNodeKind_gvar_decl) + ast_append(vars, &n->as.list->items[i]); + } + } + } + Program* prog = calloc(1, sizeof(Program)); + prog->funcs = funcs; + prog->vars = vars; + prog->str_literals = p->str_literals.data; + return prog; +} + +static int eval_binary_expr(int op, int v1, int v2) { + if (op == TokenKind_andand) { + return v1 && v2; + } else if (op == TokenKind_oror) { + return v1 || v2; + } else if (op == TokenKind_plus) { + return v1 + v2; + } else if (op == TokenKind_minus) { + return v1 - v2; + } else if (op == TokenKind_star) { + return v1 * v2; + } else if (op == TokenKind_slash) { + if (v2 == 0) { + fatal_error("eval: division by zero"); + } + return v1 / v2; + } else if (op == TokenKind_percent) { + if (v2 == 0) { + fatal_error("eval: division by zero"); + } + return v1 % v2; + } else if (op == TokenKind_eq) { + return v1 == v2; + } else if (op == TokenKind_ne) { + return v1 != v2; + } else if (op == TokenKind_lt) { + return v1 < v2; + } else if (op == TokenKind_le) { + return v1 <= v2; + } else if (op == TokenKind_lshift) { + return v1 << v2; + } else if (op == TokenKind_rshift) { + return v1 >> v2; + } else if (op == TokenKind_and) { + return v1 & v2; + } else if (op == TokenKind_or) { + return v1 | v2; + } else if (op == TokenKind_xor) { + return v1 ^ v2; + } else { + unimplemented(); + } +} + +static int eval(AstNode* e) { + if (e->kind == AstNodeKind_int_expr) { + return e->as.int_expr->value; + } else if (e->kind == AstNodeKind_unary_expr) { + int v = eval(e->as.unary_expr->operand); + if (e->as.unary_expr->op == TokenKind_not) { + return !v; + } else if (e->as.unary_expr->op == TokenKind_minus) { + return -v; + } else { + unimplemented(); + } + } else if (e->kind == AstNodeKind_binary_expr) { + int v1 = eval(e->as.binary_expr->lhs); + int v2 = eval(e->as.binary_expr->rhs); + return eval_binary_expr(e->as.binary_expr->op, v1, v2); + } else if (e->kind == AstNodeKind_logical_expr) { + int v1 = eval(e->as.logical_expr->lhs); + int v2 = eval(e->as.logical_expr->rhs); + return eval_binary_expr(e->as.logical_expr->op, v1, v2); + } else if (e->kind == AstNodeKind_cond_expr) { + int cond = eval(e->as.cond_expr->cond); + if (cond) { + return eval(e->as.cond_expr->then); + } else { + return eval(e->as.cond_expr->else_); + } + } else if (e->kind == AstNodeKind_cast_expr) { + return eval(e->as.cast_expr->operand); + } else if (e->kind == AstNodeKind_deref_expr) { + return eval(e->as.deref_expr->operand); + } else if (e->kind == AstNodeKind_ref_expr) { + return eval(e->as.ref_expr->operand); + } else { + unimplemented(); + } +} + +bool pp_eval_constant_expr(TokenArray* pp_tokens) { + TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens); + Parser* p = parser_new(tokens); + AstNode* e = parse_constant_expr(p); + return eval(e) != 0; +} + +static InitData* initdata_new() { + InitData* init_data = calloc(1, sizeof(InitData)); + init_data->len = 0; + init_data->capacity = 1; + init_data->blocks = calloc(init_data->capacity, sizeof(InitDataBlock)); + return init_data; +} + +static void initdata_reserve(InitData* init_data, size_t size) { + if (size <= init_data->capacity) + return; + while (init_data->capacity < size) { + init_data->capacity *= 2; + } + init_data->blocks = realloc(init_data->blocks, init_data->capacity * sizeof(InitDataBlock)); + memset(init_data->blocks + init_data->len, 0, (init_data->capacity - init_data->len) * sizeof(InitDataBlock)); +} + +static InitDataBlock* initdata_push_new(InitData* init_data) { + initdata_reserve(init_data, init_data->len + 1); + return &init_data->blocks[init_data->len++]; +} + +static void initdata_append_addr(InitData* buf, const char* label) { + InitDataBlock* block = initdata_push_new(buf); + block->kind = InitDataBlockKind_addr; + block->as.addr.label = label; +} + +static void initdata_append_zeros(InitData* buf, size_t len) { + if (len == 0) + return; + char* data = calloc(len, sizeof(char)); + InitDataBlock* block = initdata_push_new(buf); + block->kind = InitDataBlockKind_bytes; + block->as.bytes.len = len; + block->as.bytes.buf = data; +} + +static void initdata_append_bytes(InitData* buf, const char* data, size_t len) { + char* copy = calloc(len, sizeof(char)); + memcpy(copy, data, len); + InitDataBlock* block = initdata_push_new(buf); + block->kind = InitDataBlockKind_bytes; + block->as.bytes.len = len; + block->as.bytes.buf = copy; +} + +static void do_eval_init_expr(InitData* buf, AstNode* expr, Type* ty) { + if (expr->kind == AstNodeKind_array_initializer) { + AstNode* list = expr->as.array_initializer->list; + if (ty->kind == TypeKind_array) { + for (int i = 0; i < list->as.list->len; ++i) { + do_eval_init_expr(buf, &list->as.list->items[i], ty->base); + } + } else if (ty->kind == TypeKind_struct) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.struct_def->members; + int offset = 0; + for (int i = 0; i < list->as.list->len; ++i) { + AstNode* member = &members->as.list->items[i]; + int align = type_alignof(member->ty); + int aligned_offset = to_aligned(offset, align); + initdata_append_zeros(buf, aligned_offset - offset); // padding + offset = aligned_offset; + do_eval_init_expr(buf, &list->as.list->items[i], member->ty); + offset += type_sizeof(member->ty); + } + int total = type_sizeof(ty); + initdata_append_zeros(buf, total - offset); // padding + } else if (ty->kind == TypeKind_union) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.union_def->members; + AstNode* member = &members->as.list->items[0]; + do_eval_init_expr(buf, &list->as.list->items[0], member->ty); + int member_size = type_sizeof(member->ty); + int total = type_sizeof(ty); + initdata_append_zeros(buf, total - member_size); // padding + } else { + unimplemented(); + } + } else if (ty->kind == TypeKind_ptr) { + if (expr->kind == AstNodeKind_str_expr) { + char label[32]; + sprintf(label, ".Lstr__%d", expr->as.str_expr->idx); + initdata_append_addr(buf, strdup(label)); + } else if (expr->kind == AstNodeKind_ref_expr) { + if (expr->as.ref_expr->operand->kind != AstNodeKind_gvar) { + unimplemented(); + } + initdata_append_addr(buf, expr->as.ref_expr->operand->as.gvar->name); + } else if (expr->kind == AstNodeKind_gvar) { + initdata_append_addr(buf, expr->as.gvar->name); + } else if (expr->kind == AstNodeKind_func) { + initdata_append_addr(buf, expr->as.func->name); + } else if (expr->kind == AstNodeKind_int_expr && expr->as.int_expr->value == 0) { + initdata_append_zeros(buf, type_sizeof(ty)); + } else { + unimplemented(); + } + } else { + int value = eval(expr); + int size = type_sizeof(ty); + char bytes[8]; + for (int i = 0; i < size; ++i) { + bytes[i] = (value >> (i * 8)) & 0xff; + } + initdata_append_bytes(buf, bytes, size); + } +} + +InitData* eval_init_expr(AstNode* expr, Type* ty) { + InitData* buf = initdata_new(); + do_eval_init_expr(buf, expr, ty); + return buf; +} diff --git a/src/cc1/parse.h b/src/cc1/parse.h new file mode 100644 index 0000000..f229b77 --- /dev/null +++ b/src/cc1/parse.h @@ -0,0 +1,43 @@ +#ifndef DUCC_PARSE_H +#define DUCC_PARSE_H + +#include "../lib/common.h" +#include "ast.h" +#include "preprocess.h" + +Program* parse(TokenArray* tokens); +bool pp_eval_constant_expr(TokenArray* pp_tokens); + +typedef enum { + InitDataBlockKind_addr, + InitDataBlockKind_bytes, +} InitDataBlockKind; + +// Static address to global variable or ROM area. +typedef struct { + const char* label; +} InitDataBlockAddr; + +// Static byte array. +typedef struct { + size_t len; + const char* buf; +} InitDataBlockBytes; + +typedef struct { + InitDataBlockKind kind; + union { + InitDataBlockAddr addr; + InitDataBlockBytes bytes; + } as; +} InitDataBlock; + +typedef struct { + size_t len; + size_t capacity; + InitDataBlock* blocks; +} InitData; + +InitData* eval_init_expr(AstNode* expr, Type* ty); + +#endif diff --git a/src/cc1/preprocess.c b/src/cc1/preprocess.c new file mode 100644 index 0000000..d050119 --- /dev/null +++ b/src/cc1/preprocess.c @@ -0,0 +1,1559 @@ +#include "preprocess.h" +#include <libgen.h> +#include <unistd.h> +#include "../lib/common.h" +#include "parse.h" +#include "sys.h" +#include "tokenize.h" + +typedef enum { + MacroKind_undef, + MacroKind_obj, + MacroKind_func, + MacroKind_builtin_file, + MacroKind_builtin_line, +} MacroKind; + +const char* macro_kind_stringify(MacroKind kind) { + if (kind == MacroKind_undef) + return "undef"; + else if (kind == MacroKind_obj) + return "object-like"; + else if (kind == MacroKind_func) + return "function-like"; + else if (kind == MacroKind_builtin_file) + return "__FILE__"; + else if (kind == MacroKind_builtin_line) + return "__LINE__"; + else + unreachable(); +} + +typedef struct { + MacroKind kind; + const char* name; + TokenArray parameters; + TokenArray replacements; +} Macro; + +void macro_build_json(JsonBuilder* builder, Macro* macro) { + jsonbuilder_object_start(builder); + + jsonbuilder_object_member_start(builder, "kind"); + jsonbuilder_string(builder, macro_kind_stringify(macro->kind)); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "name"); + jsonbuilder_string(builder, macro->name); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "parameters"); + tokens_build_json(builder, ¯o->parameters); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "replacements"); + tokens_build_json(builder, ¯o->replacements); + jsonbuilder_object_member_end(builder); + + jsonbuilder_object_end(builder); +} + +static int macro_find_param(Macro* macro, Token* tok) { + if (tok->kind != TokenKind_ident) + return -1; + + for (size_t i = 0; i < macro->parameters.len; ++i) { + if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { + return i; + } + } + return -1; +} + +typedef struct { + size_t len; + size_t capacity; + Macro* data; +} MacroArray; + +static MacroArray* macros_new() { + MacroArray* macros = calloc(1, sizeof(MacroArray)); + macros->len = 0; + macros->capacity = 8; + macros->data = calloc(macros->capacity, sizeof(Macro)); + return macros; +} + +static void macros_reserve(MacroArray* macros, size_t size) { + if (size <= macros->capacity) + return; + while (macros->capacity < size) { + macros->capacity *= 2; + } + macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); + memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); +} + +static Macro* macros_push_new(MacroArray* macros) { + macros_reserve(macros, macros->len + 1); + return ¯os->data[macros->len++]; +} + +static void define_macro_to_number(MacroArray* macros, const char* name, int n) { + Macro* m = macros_push_new(macros); + m->kind = MacroKind_obj; + m->name = name; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = n; +} + +static void add_predefined_macros(MacroArray* macros) { + Macro* m; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_file; + m->name = "__FILE__"; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_line; + m->name = "__LINE__"; + + // Non-standard pre-defined macros. + define_macro_to_number(macros, "__ducc__", 1); + define_macro_to_number(macros, "__x86_64__", 1); + define_macro_to_number(macros, "__LP64__", 1); + + // GCC's predefined macros. Glibc depends on these macros. + // https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + // TODO: uncomment out __LONG_MAX__, etc. once ducc supports 64-bit integer literals. + define_macro_to_number(macros, "__CHAR_BIT__", __CHAR_BIT__); + define_macro_to_number(macros, "__SCHAR_MAX__", __SCHAR_MAX__); + define_macro_to_number(macros, "__WCHAR_MAX__", __WCHAR_MAX__); + define_macro_to_number(macros, "__SHRT_MAX__", __SHRT_MAX__); + define_macro_to_number(macros, "__INT_MAX__", __INT_MAX__); + // define_macro_to_number(macros, "__LONG_MAX__", __LONG_MAX__); + // define_macro_to_number(macros, "__LONG_LONG_MAX__", __LONG_LONG_MAX__); + define_macro_to_number(macros, "__WINT_MAX__", __WINT_MAX__); + // define_macro_to_number(macros, "__SIZE_MAX__", __SIZE_MAX__); + // define_macro_to_number(macros, "__PTRDIFF_MAX__", __PTRDIFF_MAX__); + // define_macro_to_number(macros, "__INTMAX_MAX__", __INTMAX_MAX__); + // define_macro_to_number(macros, "__UINTMAX_MAX__", __UINTMAX_MAX__); + define_macro_to_number(macros, "__SIG_ATOMIC_MAX__", __SIG_ATOMIC_MAX__); + define_macro_to_number(macros, "__INT8_MAX__", __INT8_MAX__); + define_macro_to_number(macros, "__INT16_MAX__", __INT16_MAX__); + define_macro_to_number(macros, "__INT32_MAX__", __INT32_MAX__); + // define_macro_to_number(macros, "__INT64_MAX__", __INT64_MAX__); + define_macro_to_number(macros, "__UINT8_MAX__", __UINT8_MAX__); + define_macro_to_number(macros, "__UINT16_MAX__", __UINT16_MAX__); + define_macro_to_number(macros, "__UINT32_MAX__", __UINT32_MAX__); + // define_macro_to_number(macros, "__UINT64_MAX__", __UINT64_MAX__); + define_macro_to_number(macros, "__INT_LEAST8_MAX__", __INT_LEAST8_MAX__); + define_macro_to_number(macros, "__INT_LEAST16_MAX__", __INT_LEAST16_MAX__); + define_macro_to_number(macros, "__INT_LEAST32_MAX__", __INT_LEAST32_MAX__); + // define_macro_to_number(macros, "__INT_LEAST64_MAX__", __INT_LEAST64_MAX__); + define_macro_to_number(macros, "__UINT_LEAST8_MAX__", __UINT_LEAST8_MAX__); + define_macro_to_number(macros, "__UINT_LEAST16_MAX__", __UINT_LEAST16_MAX__); + define_macro_to_number(macros, "__UINT_LEAST32_MAX__", __UINT_LEAST32_MAX__); + // define_macro_to_number(macros, "__UINT_LEAST64_MAX__", __UINT_LEAST64_MAX__); + define_macro_to_number(macros, "__INT_FAST8_MAX__", __INT_FAST8_MAX__); + // define_macro_to_number(macros, "__INT_FAST16_MAX__", __INT_FAST16_MAX__); + // define_macro_to_number(macros, "__INT_FAST32_MAX__", __INT_FAST32_MAX__); + // define_macro_to_number(macros, "__INT_FAST64_MAX__", __INT_FAST64_MAX__); + define_macro_to_number(macros, "__UINT_FAST8_MAX__", __UINT_FAST8_MAX__); + // define_macro_to_number(macros, "__UINT_FAST16_MAX__", __UINT_FAST16_MAX__); + // define_macro_to_number(macros, "__UINT_FAST32_MAX__", __UINT_FAST32_MAX__); + // define_macro_to_number(macros, "__UINT_FAST64_MAX__", __UINT_FAST64_MAX__); + // define_macro_to_number(macros, "__INTPTR_MAX__", __INTPTR_MAX__); + // define_macro_to_number(macros, "__UINTPTR_MAX__", __UINTPTR_MAX__); + define_macro_to_number(macros, "__WCHAR_MIN__", __WCHAR_MIN__); + define_macro_to_number(macros, "__WINT_MIN__", __WINT_MIN__); + define_macro_to_number(macros, "__SIG_ATOMIC_MIN__", __SIG_ATOMIC_MIN__); + + // GCC's predefined macros not listed in Common Predefined Macros page. + define_macro_to_number(macros, "__DBL_DIG__", __DBL_DIG__); + define_macro_to_number(macros, "__DBL_MANT_DIG__", __DBL_MANT_DIG__); + define_macro_to_number(macros, "__DBL_MAX_10_EXP__", __DBL_MAX_10_EXP__); + define_macro_to_number(macros, "__FLT_DIG__", __FLT_DIG__); + define_macro_to_number(macros, "__FLT_MANT_DIG__", __FLT_MANT_DIG__); + define_macro_to_number(macros, "__FLT_MAX_10_EXP__", __FLT_MAX_10_EXP__); + define_macro_to_number(macros, "__LDBL_DIG__", __LDBL_DIG__); + define_macro_to_number(macros, "__LDBL_MANT_DIG__", __LDBL_MANT_DIG__); + define_macro_to_number(macros, "__LDBL_MAX_10_EXP__", __LDBL_MAX_10_EXP__); +} + +// Accept "FOO" or "FOO=value" +static void define_macro_from_string(MacroArray* macros, const char* def) { + Macro* m = macros_push_new(macros); + m->kind = MacroKind_obj; + + const char* eq = strchr(def, '='); + if (eq) { + // FOO=value format + size_t name_len = eq - def; + char* name = calloc(name_len + 1, sizeof(char)); + memcpy(name, def, name_len); + m->name = name; + + const char* value = eq + 1; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + + // Try to parse as integer + char* num_end; + long int_val = strtol(value, &num_end, 10); + if (value[0] != '\0' && *num_end == '\0') { + tok->kind = TokenKind_literal_int; + tok->value.integer = int_val; + } else { + tok->kind = TokenKind_ident; + tok->value.string = value; + } + } else { + // FOO format (equivalent to FOO=1) + m->name = def; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = 1; + } +} + +static void add_user_defines(MacroArray* macros, StrArray* user_defines) { + for (size_t i = 0; i < user_defines->len; ++i) { + define_macro_from_string(macros, user_defines->data[i]); + } +} + +void macros_build_json(JsonBuilder* builder, MacroArray* macros) { + jsonbuilder_array_start(builder); + for (size_t i = 0; i < macros->len; ++i) { + jsonbuilder_array_element_start(builder); + macro_build_json(builder, ¯os->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); +} + +typedef struct { + TokenArray tokens; +} MacroArg; + +void macroarg_build_json(JsonBuilder* builder, MacroArg* arg) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "tokens"); + tokens_build_json(builder, &arg->tokens); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +typedef struct { + size_t len; + size_t capacity; + MacroArg* data; +} MacroArgArray; + +static MacroArgArray* macroargs_new() { + MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); + macroargs->len = 0; + macroargs->capacity = 2; + macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); + return macroargs; +} + +static void macroargs_reserve(MacroArgArray* macroargs, size_t size) { + if (size <= macroargs->capacity) + return; + while (macroargs->capacity < size) { + macroargs->capacity *= 2; + } + macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); + memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); +} + +static MacroArg* macroargs_push_new(MacroArgArray* macroargs) { + macroargs_reserve(macroargs, macroargs->len + 1); + return ¯oargs->data[macroargs->len++]; +} + +void macroargs_build_json(JsonBuilder* builder, MacroArgArray* macroargs) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "len"); + jsonbuilder_integer(builder, macroargs->len); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "data"); + jsonbuilder_array_start(builder); + for (size_t i = 0; i < macroargs->len; ++i) { + jsonbuilder_array_element_start(builder); + macroarg_build_json(builder, ¯oargs->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +typedef struct { + TokenArray* pp_tokens; + int pos; + MacroArray* macros; + int include_depth; + StrArray* include_paths; + StrArray* included_files; + bool generate_system_deps; + bool generate_user_deps; +} Preprocessor; + +static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, + StrArray* included_files, bool generate_system_deps, bool generate_user_deps); + +static Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros, + StrArray* include_paths, StrArray* included_files, bool generate_system_deps, + bool generate_user_deps) { + if (include_depth >= 32) { + fatal_error("include depth limit exceeded"); + } + + Preprocessor* pp = calloc(1, sizeof(Preprocessor)); + pp->pp_tokens = pp_tokens; + pp->macros = macros; + pp->include_depth = include_depth; + pp->include_paths = include_paths; + pp->included_files = included_files; + pp->generate_system_deps = generate_system_deps; + pp->generate_user_deps = generate_user_deps; + + return pp; +} + +static Token* pp_token_at(Preprocessor* pp, int i) { + return &pp->pp_tokens->data[i]; +} + +static Token* peek_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos); +} + +static Token* next_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos++); +} + +static void skip_pp_token(Preprocessor* pp, TokenKind expected) { + Token* tok = next_pp_token(pp); + assert(tok->kind == expected); +} + +static Token* consume_pp_token_if(Preprocessor* pp, TokenKind expected) { + if (peek_pp_token(pp)->kind == expected) { + return next_pp_token(pp); + } else { + return NULL; + } +} + +static Token* consume_pp_token_if_not(Preprocessor* pp, TokenKind non_expected) { + if (peek_pp_token(pp)->kind == non_expected) { + return NULL; + } else { + return next_pp_token(pp); + } +} + +static Token* expect_pp_token(Preprocessor* pp, TokenKind expected) { + Token* tok = next_pp_token(pp); + if (tok->kind == expected) { + return tok; + } + fatal_error("%s:%d: expected '%s', but got '%s'", tok->loc.filename, tok->loc.line, token_kind_stringify(expected), + token_stringify(tok)); +} + +static bool pp_eof(Preprocessor* pp) { + return peek_pp_token(pp)->kind == TokenKind_eof; +} + +static int find_macro(Preprocessor* pp, const char* name) { + for (size_t i = 0; i < pp->macros->len; ++i) { + if (pp->macros->data[i].kind == MacroKind_undef) + continue; + if (strcmp(pp->macros->data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static void undef_macro(Preprocessor* pp, int idx) { + pp->macros->data[idx].kind = MacroKind_undef; + // TODO: Can predefined macro like __FILE__ be undefined? +} + +static void skip_whitespaces(Preprocessor* pp) { + while (!pp_eof(pp) && consume_pp_token_if(pp, TokenKind_whitespace)) + ; +} + +static void skip_whitespaces_or_newlines(Preprocessor* pp, bool skip_newline) { + while (!pp_eof(pp) && (consume_pp_token_if(pp, TokenKind_whitespace) || + (skip_newline && consume_pp_token_if(pp, TokenKind_newline)))) + ; +} + +// It will not consume a new-line token. +static void seek_to_next_newline(Preprocessor* pp) { + while (!pp_eof(pp) && consume_pp_token_if_not(pp, TokenKind_newline)) + ; +} + +static void make_tokens_removed(Preprocessor* pp, int start, int end) { + for (int i = start; i < end; ++i) { + Token* tok = pp_token_at(pp, i); + tok->kind = TokenKind_removed; + tok->value.string = NULL; + } +} + +static Token* read_include_header_name(Preprocessor* pp) { + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_header_name) { + fatal_error("%s:%d: invalid #include, %s", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + return tok; +} + +static const char* resolve_include_name(Preprocessor* pp, const Token* include_name_token) { + const char* include_name = include_name_token->value.string; + if (include_name[0] == '"') { + char* current_filename = strdup(include_name_token->loc.filename); + const char* current_dir = dirname(current_filename); + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(current_dir) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", current_dir, (int)(strlen(include_name) - 2), include_name + 1); + return buf; + } else { + for (size_t i = 0; i < pp->include_paths->len; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; + } +} + +static const char* resolve_next_include_name(Preprocessor* pp, const Token* include_name_token) { + const char* include_name = include_name_token->value.string; + char* current_filename = strdup(include_name_token->loc.filename); + for (size_t i = 0; i < pp->include_paths->len; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); + if (strcmp(buf, current_filename) == 0) { + // #include_next skips the same file. + continue; + } + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; +} + +static int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { + size_t n_tokens_to_remove = dest_end - dest_start; + size_t n_tokens_after_dest = pp->pp_tokens->len - dest_end; + size_t shift_amount; + + if (n_tokens_to_remove < source_tokens->len) { + // Move existing tokens backward to make room. + shift_amount = source_tokens->len - n_tokens_to_remove; + tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); + memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len += shift_amount; + } else if (source_tokens->len < n_tokens_to_remove) { + // Move existing tokens forward to reduce room. + shift_amount = n_tokens_to_remove - source_tokens->len; + memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len -= shift_amount; + memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); + } + + memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); + + return dest_start + source_tokens->len; +} + +static int insert_pp_tokens(Preprocessor* pp, int dest_pos, TokenArray* source_tokens) { + return replace_pp_tokens(pp, dest_pos, dest_pos, source_tokens); +} + +static void replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { + TokenArray tokens; + tokens_init(&tokens, 1); + *tokens_push_new(&tokens) = *source_tok; + replace_pp_tokens(pp, dest, dest + 1, &tokens); +} + +static void expand_include_directive(Preprocessor* pp, const char* include_name, Token* original_include_name_tok) { + InFile* include_source = infile_open(include_name); + if (!include_source) { + fatal_error("%s:%d: cannot open include file: %s", original_include_name_tok->loc.filename, + original_include_name_tok->loc.line, token_stringify(original_include_name_tok)); + } + + TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros, pp->include_paths, + pp->included_files, pp->generate_system_deps, pp->generate_user_deps); + tokens_pop(include_pp_tokens); // pop EOF token + pp->pos = insert_pp_tokens(pp, pp->pos, include_pp_tokens); +} + +// ws ::= many0(<whitespace>) +// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')' +static TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { + TokenArray* parameters = calloc(1, sizeof(TokenArray)); + tokens_init(parameters, 2); + + // '(' is consumed by caller. + skip_whitespaces(pp); + Token* tok = consume_pp_token_if(pp, TokenKind_ident); + if (tok) { + *tokens_push_new(parameters) = *tok; + skip_whitespaces(pp); + while (consume_pp_token_if(pp, TokenKind_comma)) { + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + *tokens_push_new(parameters) = *tok; + } + } + expect_pp_token(pp, TokenKind_paren_r); + + return parameters; +} + +// ws ::= many0(<whitespace>) +// macro-arguments ::= <ws> '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')' +static MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp, bool skip_newline) { + MacroArgArray* args = macroargs_new(); + + skip_whitespaces_or_newlines(pp, skip_newline); + if (!consume_pp_token_if(pp, TokenKind_paren_l)) { + return NULL; + } + skip_whitespaces_or_newlines(pp, skip_newline); + Token* tok = peek_pp_token(pp); + if (!skip_newline && tok->kind == TokenKind_newline) { + expect_pp_token(pp, TokenKind_paren_r); + } + if (tok->kind != TokenKind_paren_r) { + MacroArg* arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 4); + + // Parse argument tokens, handling nested parentheses. + int nesting = 0; + while (true) { + tok = peek_pp_token(pp); + + if (nesting == 0) { + if (tok->kind == TokenKind_paren_r) { + break; + } + if (tok->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces_or_newlines(pp, skip_newline); + + arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 4); + continue; + } + } + + if (tok->kind == TokenKind_paren_l) { + nesting++; + } else if (tok->kind == TokenKind_paren_r) { + nesting--; + if (nesting < 0) { + break; + } + } + + tok = next_pp_token(pp); + if (tok->kind != TokenKind_removed) { + *tokens_push_new(&arg->tokens) = *tok; + } + + skip_whitespaces_or_newlines(pp, skip_newline); + } + } + expect_pp_token(pp, TokenKind_paren_r); + + return args; +} + +static Token* concat_two_tokens(Token* left, Token* right) { + StrBuilder builder; + strbuilder_init(&builder); + + // Left + if (left->kind == TokenKind_ident) { + strbuilder_append_string(&builder, left->value.string); + } else if (left->kind == TokenKind_literal_int) { + char buf[32]; + sprintf(buf, "%d", left->value.integer); + strbuilder_append_string(&builder, buf); + } else { + strbuilder_append_string(&builder, token_stringify(left)); + } + + // Right + if (right->kind == TokenKind_ident) { + strbuilder_append_string(&builder, right->value.string); + } else if (right->kind == TokenKind_literal_int) { + char buf[32]; + sprintf(buf, "%d", right->value.integer); + strbuilder_append_string(&builder, buf); + } else { + strbuilder_append_string(&builder, token_stringify(right)); + } + + // Concat + Token* result = calloc(1, sizeof(Token)); + + char* endptr; + int val = strtol(builder.buf, &endptr, 10); + if (*endptr == '\0') { + result->kind = TokenKind_literal_int; + result->value.integer = val; + } else { + result->kind = TokenKind_ident; + result->value.string = builder.buf; + } + + return result; +} + +static Token* stringify_tokens(TokenArray* tokens) { + StrBuilder builder; + strbuilder_init(&builder); + + bool prev_whitespace = false; + for (size_t i = 0; i < tokens->len; ++i) { + Token* tok = &tokens->data[i]; + if (tok->kind == TokenKind_whitespace) { + prev_whitespace = true; + continue; + } + + if (prev_whitespace && builder.len > 0) { + strbuilder_append_char(&builder, ' '); + } + prev_whitespace = false; + + const char* str = token_stringify(tok); + + // For string literals and char constants, we need to escape quotes. + if (tok->kind == TokenKind_literal_str || tok->kind == TokenKind_character_constant) { + for (const char* p = str; *p; ++p) { + if (*p == '\\' || *p == '"') { + strbuilder_append_char(&builder, '\\'); + } + strbuilder_append_char(&builder, *p); + } + } else { + strbuilder_append_string(&builder, str); + } + } + + Token* result = calloc(1, sizeof(Token)); + result->kind = TokenKind_literal_str; + result->value.string = builder.buf; + return result; +} + +typedef struct MacroExpansionContext { + // Stack of macro names that have been already expanded. + StrArray already_expanded; +} MacroExpansionContext; + +MacroExpansionContext* macroexpansioncontext_new() { + MacroExpansionContext* ctx = calloc(1, sizeof(MacroExpansionContext)); + strings_init(&ctx->already_expanded); + return ctx; +} + +static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx); + +static void expand_macro_arg(Preprocessor* pp, MacroArg* arg, bool skip_newline, MacroExpansionContext* ctx) { + tokens_push_new(&arg->tokens)->kind = TokenKind_eof; + + Preprocessor* pp2 = preprocessor_new(&arg->tokens, pp->include_depth, pp->macros, pp->include_paths, + pp->included_files, pp->generate_system_deps, pp->generate_user_deps); + + size_t arg_token_count = arg->tokens.len; + size_t processed_token_count = 0; + while (processed_token_count < arg_token_count) { + if (peek_pp_token(pp2)->kind == TokenKind_ident) { + processed_token_count += expand_macro(pp2, skip_newline, ctx); + } else { + next_pp_token(pp2); + processed_token_count += 1; + } + } + + tokens_pop(&arg->tokens); +} + +static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx) { + if (ctx == NULL) { + ctx = macroexpansioncontext_new(); + } + + int macro_name_pos = pp->pos; + Token* macro_name = peek_pp_token(pp); + const char* macro_name_str = macro_name->value.string; + + // Supress expansion if the macro has already been expanded. + for (size_t i = 0; i < ctx->already_expanded.len; ++i) { + if (strcmp(ctx->already_expanded.data[i], macro_name->value.string) == 0) { + next_pp_token(pp); + return 1; + } + } + + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx == -1) { + next_pp_token(pp); + return 1; + } + + SourceLocation original_loc = macro_name->loc; + size_t token_count_before_expansion; + size_t token_count_after_expansion; + Macro* macro = &pp->macros->data[macro_idx]; + if (macro->kind == MacroKind_func) { + next_pp_token(pp); + MacroArgArray* args = pp_parse_macro_arguments(pp, skip_newline); + if (!args) { + // If function-like macro name is not followed by opening parenthesis, it is not a macro invocation. + return pp->pos - macro_name_pos; + } + token_count_before_expansion = pp->pos - macro_name_pos; + replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); + + // Operands of # and ## operators should not be expanded. + bool* no_expand = calloc(macro->parameters.len, sizeof(bool)); + for (size_t i = 0; i < macro->replacements.len; ++i) { + TokenKind kind = macro->replacements.data[i].kind; + if (kind == TokenKind_hashhash) { + Token* lhs = NULL; + for (int j = i - 1; j >= 0; --j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + lhs = ¯o->replacements.data[j]; + break; + } + } + Token* rhs = NULL; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + rhs = ¯o->replacements.data[j]; + break; + } + } + if (lhs) { + int param1 = macro_find_param(macro, lhs); + if (param1 != -1) { + no_expand[param1] = true; + } + } + if (rhs) { + int param2 = macro_find_param(macro, rhs); + if (param2 != -1) { + no_expand[param2] = true; + } + } + } else if (kind == TokenKind_hash) { + Token* operand = NULL; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + operand = ¯o->replacements.data[j]; + break; + } + } + if (operand) { + int param = macro_find_param(macro, operand); + if (param != -1) { + no_expand[param] = true; + } + } + } + } + + // Argument expansion + for (size_t i = 0; i < args->len; ++i) { + if (no_expand[i]) + continue; + MacroArg* arg = &args->data[i]; + expand_macro_arg(pp, arg, skip_newline, ctx); + } + + // Parameter substitution + size_t token_count = 0; + size_t offset = 0; + for (size_t i = 0; i < macro->replacements.len; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i + offset); + + // Handle # operator (stringification) + if (tok->kind == TokenKind_hash) { + size_t param_idx_in_replacements = 0; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + param_idx_in_replacements = j; + break; + } + } + + Token* param_tok = ¯o->replacements.data[param_idx_in_replacements]; + int macro_param_idx = macro_find_param(macro, param_tok); + if (macro_param_idx != -1) { + Token* stringified = stringify_tokens(&args->data[macro_param_idx].tokens); + + // Replace ('#' <whitespace>* <param>) with stringified token. + TokenArray single_token; + tokens_init(&single_token, 1); + *tokens_push_new(&single_token) = *stringified; + + size_t tokens_to_replace = param_idx_in_replacements - i + 1; + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + tokens_to_replace, + &single_token); + token_count += 1; + offset += 1 - tokens_to_replace; + i = param_idx_in_replacements; // Skip to after the parameter + continue; + } + } + + int macro_param_idx = macro_find_param(macro, tok); + if (macro_param_idx != -1) { + size_t arg_token_count = args->data[macro_param_idx].tokens.len; + if (arg_token_count == 0) { + // Empty argument: insert a placemarker token + TokenArray placemarker_token; + tokens_init(&placemarker_token, 1); + Token* pm = tokens_push_new(&placemarker_token); + pm->kind = TokenKind_placemarker; + pm->loc = tok->loc; + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, + &placemarker_token); + token_count += 1; + // offset stays the same (1 - 1 = 0) + } else { + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, + &args->data[macro_param_idx].tokens); + token_count += arg_token_count; + offset += arg_token_count - 1; + } + } else { + ++token_count; + } + } + + // Handle ## operator + size_t token_count2 = 0; + for (size_t i = 0; i < token_count; ++i) { + int pos = macro_name_pos + i; + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_hashhash) { + // Concatenate previous and next tokens + int lhs_pos = -1; + for (int j = pos - 1; j >= macro_name_pos; --j) { + if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { + lhs_pos = j; + break; + } + } + int rhs_pos = -1; + for (size_t j = pos + 1; j < macro_name_pos + token_count; ++j) { + if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { + rhs_pos = j; + break; + } + } + if (lhs_pos == -1 || rhs_pos == -1) { + fatal_error("%s:%d: invalid usage of ## operator", tok->loc.filename, tok->loc.line); + } + + Token* lhs_tok = pp_token_at(pp, lhs_pos); + Token* rhs_tok = pp_token_at(pp, rhs_pos); + bool lhs_is_placemarker = lhs_tok->kind == TokenKind_placemarker; + bool rhs_is_placemarker = rhs_tok->kind == TokenKind_placemarker; + + TokenArray result_tokens; + tokens_init(&result_tokens, 1); + + if (lhs_is_placemarker && rhs_is_placemarker) { + // Both are placemarkers: result is a placemarker + Token* pm = tokens_push_new(&result_tokens); + pm->kind = TokenKind_placemarker; + pm->loc = tok->loc; + } else if (lhs_is_placemarker) { + // Left is placemarker: result is the right token + *tokens_push_new(&result_tokens) = *rhs_tok; + } else if (rhs_is_placemarker) { + // Right is placemarker: result is the left token + *tokens_push_new(&result_tokens) = *lhs_tok; + } else { + // Neither is placemarker: concatenate them + Token* concatenated = concat_two_tokens(lhs_tok, rhs_tok); + *tokens_push_new(&result_tokens) = *concatenated; + } + + replace_pp_tokens(pp, lhs_pos, rhs_pos + 1, &result_tokens); + token_count -= rhs_pos - lhs_pos; + i -= pos - lhs_pos; + token_count2 -= pos - lhs_pos - 1; + } else { + ++token_count2; + } + } + + // Remove placemarker tokens after ## processing + for (size_t i = 0; i < token_count2; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i); + if (tok->kind == TokenKind_placemarker) { + tok->kind = TokenKind_removed; + } + } + + // Inherit a source location from the original macro token. + for (size_t i = 0; i < token_count2; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + token_count_after_expansion = token_count2; + } else if (macro->kind == MacroKind_obj) { + replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); + // Inherit a source location from the original macro token. + for (size_t i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + token_count_before_expansion = 1; + token_count_after_expansion = macro->replacements.len; + } else if (macro->kind == MacroKind_builtin_file) { + Token file_tok; + file_tok.kind = TokenKind_literal_str; + file_tok.value.string = macro_name->loc.filename; + file_tok.loc.filename = NULL; + file_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &file_tok); + token_count_before_expansion = 1; + token_count_after_expansion = 1; + } else if (macro->kind == MacroKind_builtin_line) { + Token line_tok; + line_tok.kind = TokenKind_literal_int; + line_tok.value.integer = macro_name->loc.line; + line_tok.loc.filename = NULL; + line_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &line_tok); + token_count_before_expansion = 1; + token_count_after_expansion = 1; + } else { + unreachable(); + } + + // Recursive expansion. + strings_push(&ctx->already_expanded, macro_name_str); + pp->pos = macro_name_pos; + size_t processed_token_count = 0; + while (processed_token_count < token_count_after_expansion) { + if (peek_pp_token(pp)->kind == TokenKind_ident) { + processed_token_count += expand_macro(pp, skip_newline, ctx); + } else { + next_pp_token(pp); + processed_token_count += 1; + } + } + strings_pop(&ctx->already_expanded); + + return token_count_before_expansion; +} + +typedef enum { + GroupDelimiterKind_normal, + GroupDelimiterKind_after_if_directive, + GroupDelimiterKind_after_else_directive, +} GroupDelimiterKind; + +static bool is_delimiter_of_current_group(GroupDelimiterKind delimiter_kind, TokenKind token_kind) { + if (delimiter_kind == GroupDelimiterKind_normal) { + return token_kind == TokenKind_eof; + } else if (delimiter_kind == GroupDelimiterKind_after_if_directive) { + return token_kind == TokenKind_pp_directive_elif || token_kind == TokenKind_pp_directive_elifdef || + token_kind == TokenKind_pp_directive_elifndef || token_kind == TokenKind_pp_directive_else || + token_kind == TokenKind_pp_directive_endif; + } else if (delimiter_kind == GroupDelimiterKind_after_else_directive) { + return token_kind == TokenKind_pp_directive_endif; + } else { + unreachable(); + } +} + +static int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); +static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include); + +static bool preprocess_if_group_or_elif_group(Preprocessor* pp, bool did_include) { + Token* directive = next_pp_token(pp); + + if (directive->kind == TokenKind_pp_directive_if || directive->kind == TokenKind_pp_directive_elif) { + int condition_expr_start_pos = pp->pos; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } else if (tok->kind == TokenKind_ident) { + if (strcmp(tok->value.string, "defined") == 0) { + int defined_pos = pp->pos; + // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')' + // 'defined' <ws>* <ident> + skip_pp_token(pp, TokenKind_ident); + skip_whitespaces(pp); + Token* macro_name; + if (consume_pp_token_if(pp, TokenKind_paren_l)) { + skip_whitespaces(pp); + macro_name = expect_pp_token(pp, TokenKind_ident); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_paren_r); + } else { + macro_name = expect_pp_token(pp, TokenKind_ident); + } + bool is_defined = find_macro(pp, macro_name->value.string) != -1; + TokenArray defined_results; + tokens_init(&defined_results, 1); + Token* defined_result = tokens_push_new(&defined_results); + defined_result->kind = TokenKind_literal_int; + defined_result->value.integer = is_defined; + pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); + } else { + expand_macro(pp, false, NULL); + } + } else { + next_pp_token(pp); + } + } + + // all remaining identifiers other than true (including those lexically identical to keywords such as false) are + // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is + // converted into a token. + for (int pos = condition_expr_start_pos; pos < pp->pos; ++pos) { + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_ident) { + bool is_true = strcmp(tok->value.string, "true") == 0; + tok->kind = TokenKind_literal_int; + tok->value.integer = is_true; + } + } + + int condition_expr_tokens_len = pp->pos - condition_expr_start_pos; + TokenArray condition_expr_tokens; + // +1 to add EOF token at the end. + tokens_init(&condition_expr_tokens, condition_expr_tokens_len + 1); + for (int i = 0; i < condition_expr_tokens_len; ++i) { + *tokens_push_new(&condition_expr_tokens) = *pp_token_at(pp, condition_expr_start_pos + i); + } + Token* eof_tok = tokens_push_new(&condition_expr_tokens); + eof_tok->kind = TokenKind_eof; + + bool do_include = pp_eval_constant_expr(&condition_expr_tokens) && !did_include; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else if (directive->kind == TokenKind_pp_directive_ifdef || directive->kind == TokenKind_pp_directive_elifdef) { + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (!macro_name) { + fatal_error(""); + } + + bool do_include = !did_include && find_macro(pp, macro_name->value.string) != -1; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else if (directive->kind == TokenKind_pp_directive_ifndef || directive->kind == TokenKind_pp_directive_elifndef) { + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (!macro_name) { + fatal_error(""); + } + + bool do_include = !did_include && find_macro(pp, macro_name->value.string) == -1; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else { + unreachable(); + } +} + +static bool preprocess_if_group(Preprocessor* pp) { + return preprocess_if_group_or_elif_group(pp, false); +} + +static bool preprocess_elif_group(Preprocessor* pp, bool did_include) { + return preprocess_if_group_or_elif_group(pp, did_include); +} + +// elif-groups: +// { elif-group }+ +static bool preprocess_elif_groups_opt(Preprocessor* pp, bool did_include) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || + tok->kind == TokenKind_pp_directive_elifndef) { + did_include |= preprocess_elif_group(pp, did_include); + } else { + break; + } + } + return did_include; +} + +// else-group: +// '#' 'else' group? +static void preprocess_else_group(Preprocessor* pp, bool did_include) { + skip_pp_token(pp, TokenKind_pp_directive_else); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + + include_conditionally(pp, GroupDelimiterKind_after_else_directive, !did_include); +} + +// endif-line: +// '#' 'endif' new-line +static void preprocess_endif_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_endif); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); +} + +// if-section: +// if-group elif-groups? else-group? endif-line +static void preprocess_if_section(Preprocessor* pp) { + bool did_include = preprocess_if_group(pp); + did_include = preprocess_elif_groups_opt(pp, did_include); + if (peek_pp_token(pp)->kind == TokenKind_pp_directive_else) { + preprocess_else_group(pp, did_include); + } + preprocess_endif_directive(pp); +} + +static void preprocess_include_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_include); + skip_whitespaces(pp); + Token* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, + token_stringify(include_name)); + } + + if ((pp->generate_system_deps && include_name->value.string[0] == '<') || + (pp->generate_user_deps && include_name->value.string[0] == '"')) { + bool already_included = false; + for (size_t i = 0; i < pp->included_files->len; ++i) { + if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { + already_included = true; + break; + } + } + if (!already_included) { + strings_push(pp->included_files, include_name_resolved); + } + } + + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + expand_include_directive(pp, include_name_resolved, include_name); +} + +// #include_next is a part of GNU extension. +// https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html +static void preprocess_include_next_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_include_next); + skip_whitespaces(pp); + Token* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_next_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, + token_stringify(include_name)); + } + + if (include_name->value.string[0] == '"') { + bool already_included = false; + for (size_t i = 0; i < pp->included_files->len; ++i) { + if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { + already_included = true; + break; + } + } + if (!already_included) { + strings_push(pp->included_files, include_name_resolved); + } + } + + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + expand_include_directive(pp, include_name_resolved, include_name); +} + +static void preprocess_embed_directive(Preprocessor*) { + unimplemented(); +} + +static void preprocess_define_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_define); + skip_whitespaces(pp); + Token* macro_name = expect_pp_token(pp, TokenKind_ident); + + if (consume_pp_token_if(pp, TokenKind_paren_l)) { + TokenArray* parameters = pp_parse_macro_parameters(pp); + skip_whitespaces(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_func; + macro->name = macro_name->value.string; + macro->parameters = *parameters; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + // Remove trailing whitespaces. + for (int i = n_replacements - 1; i >= 0; --i) { + if (macro->replacements.data[i].kind == TokenKind_whitespace) { + tokens_pop(¯o->replacements); + } else { + break; + } + } + } else { + skip_whitespaces(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_obj; + macro->name = macro_name->value.string; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + // Remove trailing whitespaces. + for (int i = n_replacements - 1; i >= 0; --i) { + if (macro->replacements.data[i].kind == TokenKind_whitespace) { + tokens_pop(¯o->replacements); + } else { + break; + } + } + } + expect_pp_token(pp, TokenKind_newline); +} + +static void preprocess_undef_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_undef); + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (macro_name) { + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx != -1) { + undef_macro(pp, macro_idx); + } + } +} + +static void preprocess_line_directive(Preprocessor*) { + unimplemented(); +} + +// control-line: +// ... +// '#' 'error' pp-tokens? new-line +static void preprocess_error_directive(Preprocessor* pp) { + // The C23 standard does not specify format of diagnostic message caused by #error. + // Ducc assumes that #error takes exactly one argument consisting of a string literal. + // TODO: output some general message or something else if not. + skip_pp_token(pp, TokenKind_pp_directive_error); + skip_whitespaces(pp); + Token* msg = expect_pp_token(pp, TokenKind_literal_str); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + fatal_error("%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); +} + +// control-line: +// ... +// '#' 'warning' pp-tokens? new-line +static void preprocess_warning_directive(Preprocessor* pp) { + // The C23 standard does not specify format of diagnostic message caused by #warning. + // Ducc assumes that #warning takes exactly one argument consisting of a string literal. + // TODO: output some general message or something else if not. + skip_pp_token(pp, TokenKind_pp_directive_warning); + skip_whitespaces(pp); + Token* msg = expect_pp_token(pp, TokenKind_literal_str); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + fprintf(stderr, "%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); +} + +static void preprocess_pragma_directive(Preprocessor* pp) { + // Ignore all #pragma directives for now. + skip_pp_token(pp, TokenKind_pp_directive_pragma); + seek_to_next_newline(pp); + skip_pp_token(pp, TokenKind_newline); +} + +static void preprocess_nop_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_nop); +} + +static void preprocess_non_directive_directive(Preprocessor* pp) { + Token* tok = peek_pp_token(pp); + // C23 6.10.1.13: + // The execution of a non-directive preprocessing directive results in undefined behavior. + fatal_error("%s:%d: invalid preprocessing directive, '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); +} + +static void preprocess_text_line(Preprocessor* pp) { + while (!pp_eof(pp)) { + if (consume_pp_token_if(pp, TokenKind_newline)) { + return; + } + if (consume_pp_token_if_not(pp, TokenKind_ident)) { + continue; + } + + expand_macro(pp, true, NULL); + } + expect_pp_token(pp, TokenKind_newline); +} + +// group-part: +// if-section +// control-line +// '#' non-directive +// text-line +// +// control-line: +// '#' 'include' ... +// '#' 'embed' ... +// '#' 'define' ... +// '#' 'undef' ... +// '#' 'line' ... +// '#' 'error' ... +// '#' 'warning' ... +// '#' 'pragma' ... +// '#' new-line +static void preprocess_group_part(Preprocessor* pp) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || + tok->kind == TokenKind_pp_directive_ifndef) { + preprocess_if_section(pp); + } else if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || + tok->kind == TokenKind_pp_directive_elifndef || tok->kind == TokenKind_pp_directive_else || + tok->kind == TokenKind_pp_directive_endif) { + fatal_error("%s:%d: unexpected '%s'; no corresponding '#if'*", tok->loc.filename, tok->loc.line, + token_kind_stringify(tok->kind)); + } else if (tok->kind == TokenKind_pp_directive_include) { + preprocess_include_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_include_next) { + preprocess_include_next_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_embed) { + preprocess_embed_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_define) { + preprocess_define_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_undef) { + preprocess_undef_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_line) { + preprocess_line_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_error) { + preprocess_error_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_warning) { + preprocess_warning_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_pragma) { + preprocess_pragma_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_nop) { + preprocess_nop_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_non_directive) { + preprocess_non_directive_directive(pp); + } else { + preprocess_text_line(pp); + } +} + +// group: +// { group-part }+ +static void preprocess_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (is_delimiter_of_current_group(delimiter_kind, tok->kind)) + return; + preprocess_group_part(pp); + } + + if (delimiter_kind != GroupDelimiterKind_normal) { + expect_pp_token(pp, TokenKind_pp_directive_endif); + } +} + +static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { + assert(delimiter_kind != GroupDelimiterKind_normal); + int nesting = 0; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (nesting == 0 && is_delimiter_of_current_group(delimiter_kind, tok->kind)) { + return; + } + if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || + tok->kind == TokenKind_pp_directive_ifndef) { + ++nesting; + } else if (tok->kind == TokenKind_pp_directive_endif) { + --nesting; + } + int first_pos = pp->pos; + seek_to_next_newline(pp); + expect_pp_token(pp, TokenKind_newline); + make_tokens_removed(pp, first_pos, pp->pos); + } + + expect_pp_token(pp, TokenKind_pp_directive_endif); +} + +static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include) { + if (do_include) { + preprocess_group_opt(pp, delimiter_kind); + } else { + skip_group_opt(pp, delimiter_kind); + } +} + +// preprocessing-file: +// group? +static void preprocess_preprocessing_file(Preprocessor* pp) { + preprocess_group_opt(pp, GroupDelimiterKind_normal); +} + +static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) { + seek_to_next_newline(pp); + skip_pp_token(pp, TokenKind_newline); + make_tokens_removed(pp, directive_token_pos, pp->pos); +} + +static void remove_pp_directives(Preprocessor* pp) { + pp->pos = 0; + while (!pp_eof(pp)) { + if (is_pp_directive(peek_pp_token(pp)->kind)) { + remove_pp_directive(pp, pp->pos); + } else { + next_pp_token(pp); + } + } +} + +static char* get_ducc_include_path() { + const char* self_dir = get_self_dir(); + char* buf = calloc(strlen(self_dir) + strlen("/../include") + 1, sizeof(char)); + sprintf(buf, "%s/../include", self_dir); + return buf; +} + +static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, + StrArray* included_files, bool generate_system_deps, bool generate_user_deps) { + TokenArray* pp_tokens = tokenize(src); + Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros, include_paths, included_files, generate_system_deps, + generate_user_deps); + + preprocess_preprocessing_file(pp); + remove_pp_directives(pp); + return pp->pp_tokens; +} + +TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, + bool generate_system_deps, bool generate_user_deps) { + MacroArray* macros = macros_new(); + add_predefined_macros(macros); + add_user_defines(macros, user_defines); + strings_push(included_files, src->loc.filename); + + StrArray* include_paths = calloc(1, sizeof(StrArray)); + strings_init(include_paths); + + // Ducc's built-in headers has highest priority. + strings_push(include_paths, get_ducc_include_path()); + + for (size_t i = 0; i < user_include_dirs->len; ++i) { + strings_push(include_paths, user_include_dirs->data[i]); + } + strings_push(include_paths, "/usr/local/include"); + strings_push(include_paths, "/usr/include/x86_64-linux-gnu"); + strings_push(include_paths, "/usr/include"); + + return do_preprocess(src, 0, macros, include_paths, included_files, generate_system_deps, generate_user_deps); +} + +void concat_adjacent_string_literals(TokenArray* pp_tokens) { + size_t last_nonempty_token_pos = 0; + TokenKind last_nonempty_token_kind = TokenKind_eof; + for (size_t pos = 0; pos < pp_tokens->len; ++pos) { + Token* pp_tok = &pp_tokens->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + if (k == TokenKind_literal_str && last_nonempty_token_kind == TokenKind_literal_str) { + // Concatenate adjacent string literals. + Token* last_pp_tok = &pp_tokens->data[last_nonempty_token_pos]; + const char* s1 = last_pp_tok->value.string; + size_t l1 = strlen(s1); + const char* s2 = pp_tok->value.string; + size_t l2 = strlen(s2); + char* buf = calloc(l1 + l2 + 1, sizeof(char)); + memcpy(buf, s1, l1); + memcpy(buf + l1, s2, l2); + last_pp_tok->value.string = buf; + pp_tok->kind = TokenKind_removed; + } else { + last_nonempty_token_pos = pos; + last_nonempty_token_kind = k; + } + } +} + +void print_token_to_file(FILE* out, TokenArray* pp_tokens) { + for (size_t i = 0; i < pp_tokens->len; ++i) { + Token* tok = &pp_tokens->data[i]; + + if (tok->kind == TokenKind_whitespace) { + // TODO: preserve indent? + fprintf(out, " "); + } else if (tok->kind == TokenKind_removed) { + // Output nothing for removed tokens + } else if (tok->kind == TokenKind_newline) { + // TODO: remove adjacent newlines? + fprintf(out, "\n"); + } else if (tok->kind != TokenKind_eof) { + // TODO: string literal + fprintf(out, "%s", token_stringify(tok)); + // Add space after token if next token is not punctuation + // TODO: apply stricter approach + if (i + 1 < pp_tokens->len) { + Token* next = &pp_tokens->data[i + 1]; + if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace && + next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma && + next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r && + next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r && + next->kind != TokenKind_dot) { + fprintf(out, " "); + } + } + } + } +} diff --git a/src/cc1/preprocess.h b/src/cc1/preprocess.h new file mode 100644 index 0000000..8d86fe5 --- /dev/null +++ b/src/cc1/preprocess.h @@ -0,0 +1,13 @@ +#ifndef DUCC_PREPROCESS_H +#define DUCC_PREPROCESS_H + +#include "../lib/common.h" +#include "io.h" +#include "token.h" + +TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, + bool generate_system_deps, bool generate_user_deps); +void concat_adjacent_string_literals(TokenArray* pp_tokens); +void print_token_to_file(FILE* output_file, TokenArray* pp_tokens); + +#endif diff --git a/src/cc1/sys.c b/src/cc1/sys.c new file mode 100644 index 0000000..693062a --- /dev/null +++ b/src/cc1/sys.c @@ -0,0 +1,21 @@ +#include "sys.h" +#include <libgen.h> +#include <linux/limits.h> +#include <unistd.h> +#include "../lib/ducc.h" + +static char* get_self_path() { + char* buf = calloc(PATH_MAX, sizeof(char)); + ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1); + if (len == -1 || len == PATH_MAX - 1) { + return NULL; + } + buf[len] = '\0'; + return buf; +} + +// It returns a path not including final / except for root directory. +char* get_self_dir() { + char* path = get_self_path(); + return dirname(path); +} diff --git a/src/cc1/sys.h b/src/cc1/sys.h new file mode 100644 index 0000000..2527724 --- /dev/null +++ b/src/cc1/sys.h @@ -0,0 +1,7 @@ +#ifndef DUCC_SYS_H +#define DUCC_SYS_H + +// It returns a path not including final / except for root directory. +char* get_self_dir(); + +#endif diff --git a/src/cc1/token.c b/src/cc1/token.c new file mode 100644 index 0000000..480a9a5 --- /dev/null +++ b/src/cc1/token.c @@ -0,0 +1,387 @@ +#include "token.h" +#include "../lib/common.h" +#include "../lib/json.h" + +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) + return "<eof>"; + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) + return "<whitespace>"; + else if (k == TokenKind_removed) + return "<removed>"; + else if (k == TokenKind_placemarker) + return "<placemarker>"; + else if (k == TokenKind_newline) + return "<new-line>"; + else if (k == TokenKind_other) + return "<other>"; + else if (k == TokenKind_character_constant) + return "<character-constant>"; + else if (k == TokenKind_header_name) + return "<header-name>"; + else if (k == TokenKind_pp_directive_define) + return "#define"; + else if (k == TokenKind_pp_directive_elif) + return "#elif"; + else if (k == TokenKind_pp_directive_elifdef) + return "#elifdef"; + else if (k == TokenKind_pp_directive_elifndef) + return "#elifndef"; + else if (k == TokenKind_pp_directive_else) + return "#else"; + else if (k == TokenKind_pp_directive_embed) + return "#embed"; + else if (k == TokenKind_pp_directive_endif) + return "#endif"; + else if (k == TokenKind_pp_directive_error) + return "#error"; + else if (k == TokenKind_pp_directive_if) + return "#if"; + else if (k == TokenKind_pp_directive_ifdef) + return "#ifdef"; + else if (k == TokenKind_pp_directive_ifndef) + return "#ifndef"; + else if (k == TokenKind_pp_directive_include) + return "#include"; + else if (k == TokenKind_pp_directive_include_next) + return "#include_next"; + else if (k == TokenKind_pp_directive_line) + return "#line"; + else if (k == TokenKind_pp_directive_non_directive) + return "#<non-directive>"; + else if (k == TokenKind_pp_directive_nop) + return "#"; + else if (k == TokenKind_pp_directive_pragma) + return "#pragma"; + else if (k == TokenKind_pp_directive_undef) + return "#undef"; + else if (k == TokenKind_pp_directive_warning) + return "#warning"; + else if (k == TokenKind_pp_operator_defined) + return "defined"; + else if (k == TokenKind_pp_operator___has_c_attribute) + return "__has_c_attribute"; + else if (k == TokenKind_pp_operator___has_embed) + return "__has_embed"; + else if (k == TokenKind_pp_operator___has_include) + return "__has_include"; + else if (k == TokenKind_keyword_alignas) + return "alignas"; + else if (k == TokenKind_keyword_alignof) + return "alignof"; + else if (k == TokenKind_keyword_auto) + return "auto"; + else if (k == TokenKind_keyword_bool) + return "bool"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_case) + return "case"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_constexpr) + return "constexpr"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_default) + return "default"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_double) + return "double"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_false) + return "false"; + else if (k == TokenKind_keyword_float) + return "float"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_goto) + return "goto"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_inline) + return "inline"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_nullptr) + return "nullptr"; + else if (k == TokenKind_keyword_register) + return "register"; + else if (k == TokenKind_keyword_restrict) + return "restrict"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_signed) + return "signed"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_static) + return "static"; + else if (k == TokenKind_keyword_static_assert) + return "static_assert"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_switch) + return "switch"; + else if (k == TokenKind_keyword_thread_local) + return "thread_local"; + else if (k == TokenKind_keyword_true) + return "true"; + else if (k == TokenKind_keyword_typedef) + return "typedef"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_typeof_unqual) + return "typeof_unqual"; + else if (k == TokenKind_keyword_union) + return "union"; + else if (k == TokenKind_keyword_unsigned) + return "unsigned"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_volatile) + return "volatile"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_keyword__Atomic) + return "_Atomic"; + else if (k == TokenKind_keyword__BitInt) + return "_BitInt"; + else if (k == TokenKind_keyword__Complex) + return "_Complex"; + else if (k == TokenKind_keyword__Decimal128) + return "_Decimal128"; + else if (k == TokenKind_keyword__Decimal32) + return "_Decimal32"; + else if (k == TokenKind_keyword__Decimal64) + return "_Decimal64"; + else if (k == TokenKind_keyword__Generic) + return "_Generic"; + else if (k == TokenKind_keyword__Imaginary) + return "_Imaginary"; + else if (k == TokenKind_keyword__Noreturn) + return "_Noreturn"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_and) + return "&="; + else if (k == TokenKind_assign_div) + return "/="; + else if (k == TokenKind_assign_lshift) + return "<<="; + else if (k == TokenKind_assign_mod) + return "%="; + else if (k == TokenKind_assign_mul) + return "*="; + else if (k == TokenKind_assign_or) + return "|="; + else if (k == TokenKind_assign_rshift) + return ">>="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_assign_xor) + return "^="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_colon) + return ":"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return "<identifier>"; + else if (k == TokenKind_le) + return "<="; + else if (k == TokenKind_literal_int) + return "<integer>"; + else if (k == TokenKind_literal_double) + return "<double>"; + else if (k == TokenKind_literal_str) + return "<string>"; + else if (k == TokenKind_lshift) + return "<<"; + else if (k == TokenKind_lt) + return "<"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_question) + return "?"; + else if (k == TokenKind_rshift) + return ">>"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_tilde) + return "~"; + else if (k == TokenKind_xor) + return "^"; + else + unreachable(); +} + +bool is_pp_directive(TokenKind k) { + return k == TokenKind_pp_directive_define || k == TokenKind_pp_directive_elif || + k == TokenKind_pp_directive_elifdef || k == TokenKind_pp_directive_elifndef || + k == TokenKind_pp_directive_else || k == TokenKind_pp_directive_embed || k == TokenKind_pp_directive_endif || + k == TokenKind_pp_directive_error || k == TokenKind_pp_directive_if || k == TokenKind_pp_directive_ifdef || + k == TokenKind_pp_directive_ifndef || k == TokenKind_pp_directive_include || + k == TokenKind_pp_directive_include_next || k == TokenKind_pp_directive_line || + k == TokenKind_pp_directive_non_directive || k == TokenKind_pp_directive_nop || + k == TokenKind_pp_directive_pragma || k == TokenKind_pp_directive_undef || + k == TokenKind_pp_directive_warning; +} + +const char* token_stringify(Token* tok) { + TokenKind k = tok->kind; + if (k == TokenKind_pp_directive_non_directive) { + char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char)); + sprintf(buf, "#%s", tok->value.string); + return buf; + } else if (k == TokenKind_literal_int) { + char* buf = calloc(10, sizeof(char)); + sprintf(buf, "%d", tok->value.integer); + return buf; + } else if (k == TokenKind_literal_double) { + char* buf = calloc(32, sizeof(char)); + sprintf(buf, "%g", tok->value.floating); + return buf; + } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || + k == TokenKind_literal_str || k == TokenKind_header_name) { + return tok->value.string; + } else { + return token_kind_stringify(k); + } +} + +void token_build_json(JsonBuilder* builder, Token* tok) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "kind"); + jsonbuilder_string(builder, token_kind_stringify(tok->kind)); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "value"); + if (tok->kind == TokenKind_pp_directive_non_directive) { + char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char)); + sprintf(buf, "#%s", tok->value.string); + jsonbuilder_string(builder, buf); + } else if (tok->kind == TokenKind_literal_int) { + jsonbuilder_integer(builder, tok->value.integer); + } else if (tok->kind == TokenKind_other || tok->kind == TokenKind_character_constant || + tok->kind == TokenKind_ident || tok->kind == TokenKind_literal_str) { + jsonbuilder_string(builder, tok->value.string); + } else { + jsonbuilder_null(builder); + } + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "loc"); + sourcelocation_build_json(builder, &tok->loc); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +void tokens_init(TokenArray* tokens, size_t capacity) { + tokens->len = 0; + tokens->capacity = capacity; + tokens->data = calloc(tokens->capacity, sizeof(Token)); +} + +void tokens_reserve(TokenArray* tokens, size_t size) { + if (size <= tokens->capacity) + return; + while (tokens->capacity < size) { + tokens->capacity *= 2; + } + tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); + memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); +} + +Token* tokens_push_new(TokenArray* tokens) { + tokens_reserve(tokens, tokens->len + 1); + return &tokens->data[tokens->len++]; +} + +Token* tokens_pop(TokenArray* tokens) { + if (tokens->len == 0) { + return NULL; + } else { + return &tokens->data[--tokens->len]; + } +} + +void tokens_build_json(JsonBuilder* builder, TokenArray* tokens) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "len"); + jsonbuilder_integer(builder, tokens->len); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "data"); + jsonbuilder_array_start(builder); + for (size_t i = 0; i < tokens->len; ++i) { + jsonbuilder_array_element_start(builder); + token_build_json(builder, &tokens->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} diff --git a/src/cc1/token.h b/src/cc1/token.h new file mode 100644 index 0000000..46490c1 --- /dev/null +++ b/src/cc1/token.h @@ -0,0 +1,183 @@ +#ifndef DUCC_TOKEN_H +#define DUCC_TOKEN_H + +#include "../lib/ducc.h" +#include "io.h" + +typedef enum { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_removed, + TokenKind_placemarker, + TokenKind_newline, + TokenKind_other, + TokenKind_character_constant, + TokenKind_header_name, + TokenKind_pp_directive_define, + TokenKind_pp_directive_elif, + TokenKind_pp_directive_elifdef, + TokenKind_pp_directive_elifndef, + TokenKind_pp_directive_else, + TokenKind_pp_directive_embed, + TokenKind_pp_directive_endif, + TokenKind_pp_directive_error, + TokenKind_pp_directive_if, + TokenKind_pp_directive_ifdef, + TokenKind_pp_directive_ifndef, + TokenKind_pp_directive_include, + TokenKind_pp_directive_include_next, // GNU extension + TokenKind_pp_directive_line, + TokenKind_pp_directive_non_directive, + TokenKind_pp_directive_nop, + TokenKind_pp_directive_pragma, + TokenKind_pp_directive_undef, + TokenKind_pp_directive_warning, + TokenKind_pp_operator_defined, + TokenKind_pp_operator___has_c_attribute, + TokenKind_pp_operator___has_embed, + TokenKind_pp_operator___has_include, + + // C23: 6.4.1 + TokenKind_keyword_alignas, + TokenKind_keyword_alignof, + TokenKind_keyword_auto, + TokenKind_keyword_bool, + TokenKind_keyword_break, + TokenKind_keyword_case, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_constexpr, + TokenKind_keyword_continue, + TokenKind_keyword_default, + TokenKind_keyword_do, + TokenKind_keyword_double, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_false, + TokenKind_keyword_float, + TokenKind_keyword_for, + TokenKind_keyword_goto, + TokenKind_keyword_if, + TokenKind_keyword_inline, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_nullptr, + TokenKind_keyword_register, + TokenKind_keyword_restrict, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_signed, + TokenKind_keyword_sizeof, + TokenKind_keyword_static, + TokenKind_keyword_static_assert, + TokenKind_keyword_struct, + TokenKind_keyword_switch, + TokenKind_keyword_thread_local, + TokenKind_keyword_true, + TokenKind_keyword_typedef, + TokenKind_keyword_typeof, + TokenKind_keyword_typeof_unqual, + TokenKind_keyword_union, + TokenKind_keyword_unsigned, + TokenKind_keyword_void, + TokenKind_keyword_volatile, + TokenKind_keyword_while, + TokenKind_keyword__Atomic, + TokenKind_keyword__BitInt, + TokenKind_keyword__Complex, + TokenKind_keyword__Decimal128, + TokenKind_keyword__Decimal32, + TokenKind_keyword__Decimal64, + TokenKind_keyword__Generic, + TokenKind_keyword__Imaginary, + TokenKind_keyword__Noreturn, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_and, + TokenKind_assign_div, + TokenKind_assign_lshift, + TokenKind_assign_mod, + TokenKind_assign_mul, + TokenKind_assign_or, + TokenKind_assign_rshift, + TokenKind_assign_sub, + TokenKind_assign_xor, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_colon, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_le, + TokenKind_literal_int, + TokenKind_literal_double, + TokenKind_literal_str, + TokenKind_lshift, + TokenKind_lt, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_question, + TokenKind_rshift, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + TokenKind_tilde, + TokenKind_xor, +} TokenKind; + +const char* token_kind_stringify(TokenKind k); +bool is_pp_directive(TokenKind k); + +// TokenValue is externally tagged by Token's kind. +typedef union { + const char* string; + int integer; + double floating; +} TokenValue; + +typedef struct { + TokenKind kind; + TokenValue value; + SourceLocation loc; +} Token; + +const char* token_stringify(Token* tok); +void token_build_json(JsonBuilder* builder, Token* tok); + +typedef struct { + size_t len; + size_t capacity; + Token* data; +} TokenArray; + +void tokens_init(TokenArray* tokens, size_t capacity); +void tokens_reserve(TokenArray* tokens, size_t size); +Token* tokens_push_new(TokenArray* tokens); +Token* tokens_pop(TokenArray* tokens); +void tokens_build_json(JsonBuilder* builder, TokenArray* tokens); + +#endif diff --git a/src/cc1/tokenize.c b/src/cc1/tokenize.c new file mode 100644 index 0000000..78b1acb --- /dev/null +++ b/src/cc1/tokenize.c @@ -0,0 +1,597 @@ +#include "tokenize.h" +#include <ctype.h> +#include "../lib/common.h" + +typedef struct { + InFile* src; + bool at_bol; + bool expect_header_name; + TokenArray* tokens; +} Lexer; + +static Lexer* lexer_new(InFile* src) { + Lexer* l = calloc(1, sizeof(Lexer)); + + l->src = src; + l->at_bol = true; + l->expect_header_name = false; + l->tokens = calloc(1, sizeof(TokenArray)); + tokens_init(l->tokens, 1024 * 16); + + return l; +} + +static void pplexer_tokenize_pp_directive(Lexer* l, Token* tok) { + // Skip whitespaces after '#'. + char c; + while (isspace((c = infile_peek_char(l->src)))) { + if (c == '\n') + break; + infile_next_char(l->src); + } + // '#' new-line + if (c == '\n') { + tok->kind = TokenKind_pp_directive_nop; + return; + } + + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + const char* pp_directive_name = builder.buf; + + if (builder.len == 0) { + tok->kind = TokenKind_hash; + } else if (strcmp(pp_directive_name, "define") == 0) { + tok->kind = TokenKind_pp_directive_define; + } else if (strcmp(pp_directive_name, "elif") == 0) { + tok->kind = TokenKind_pp_directive_elif; + } else if (strcmp(pp_directive_name, "elifdef") == 0) { + tok->kind = TokenKind_pp_directive_elifdef; + } else if (strcmp(pp_directive_name, "elifndef") == 0) { + tok->kind = TokenKind_pp_directive_elifndef; + } else if (strcmp(pp_directive_name, "else") == 0) { + tok->kind = TokenKind_pp_directive_else; + } else if (strcmp(pp_directive_name, "embed") == 0) { + tok->kind = TokenKind_pp_directive_embed; + } else if (strcmp(pp_directive_name, "endif") == 0) { + tok->kind = TokenKind_pp_directive_endif; + } else if (strcmp(pp_directive_name, "error") == 0) { + tok->kind = TokenKind_pp_directive_error; + } else if (strcmp(pp_directive_name, "if") == 0) { + tok->kind = TokenKind_pp_directive_if; + } else if (strcmp(pp_directive_name, "ifdef") == 0) { + tok->kind = TokenKind_pp_directive_ifdef; + } else if (strcmp(pp_directive_name, "ifndef") == 0) { + tok->kind = TokenKind_pp_directive_ifndef; + } else if (strcmp(pp_directive_name, "include") == 0) { + l->expect_header_name = true; + tok->kind = TokenKind_pp_directive_include; + } else if (strcmp(pp_directive_name, "include_next") == 0) { + l->expect_header_name = true; + tok->kind = TokenKind_pp_directive_include_next; + } else if (strcmp(pp_directive_name, "line") == 0) { + tok->kind = TokenKind_pp_directive_line; + } else if (strcmp(pp_directive_name, "pragma") == 0) { + tok->kind = TokenKind_pp_directive_pragma; + } else if (strcmp(pp_directive_name, "undef") == 0) { + tok->kind = TokenKind_pp_directive_undef; + } else if (strcmp(pp_directive_name, "warning") == 0) { + tok->kind = TokenKind_pp_directive_warning; + } else { + tok->kind = TokenKind_pp_directive_non_directive; + tok->value.string = pp_directive_name; + } +} + +static void do_tokenize_all(Lexer* l) { + while (!infile_eof(l->src)) { + Token* tok = tokens_push_new(l->tokens); + tok->loc = l->src->loc; + char c = infile_peek_char(l->src); + + if (l->expect_header_name && c == '"') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '"'); + while (1) { + char ch = infile_peek_char(l->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(l->src); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + } + infile_next_char(l->src); + } + strbuilder_append_char(&builder, '"'); + infile_next_char(l->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + l->expect_header_name = false; + } else if (l->expect_header_name && c == '<') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '<'); + while (1) { + char ch = infile_peek_char(l->src); + if (ch == '>') + break; + strbuilder_append_char(&builder, ch); + infile_next_char(l->src); + } + strbuilder_append_char(&builder, '>'); + infile_next_char(l->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + l->expect_header_name = false; + } else if (c == '(') { + infile_next_char(l->src); + tok->kind = TokenKind_paren_l; + } else if (c == ')') { + infile_next_char(l->src); + tok->kind = TokenKind_paren_r; + } else if (c == '{') { + infile_next_char(l->src); + tok->kind = TokenKind_brace_l; + } else if (c == '}') { + infile_next_char(l->src); + tok->kind = TokenKind_brace_r; + } else if (c == '[') { + infile_next_char(l->src); + tok->kind = TokenKind_bracket_l; + } else if (c == ']') { + infile_next_char(l->src); + tok->kind = TokenKind_bracket_r; + } else if (c == ',') { + infile_next_char(l->src); + tok->kind = TokenKind_comma; + } else if (c == ':') { + infile_next_char(l->src); + tok->kind = TokenKind_colon; + } else if (c == ';') { + infile_next_char(l->src); + tok->kind = TokenKind_semicolon; + } else if (c == '^') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_xor; + } else { + tok->kind = TokenKind_xor; + } + } else if (c == '?') { + infile_next_char(l->src); + tok->kind = TokenKind_question; + } else if (c == '~') { + infile_next_char(l->src); + tok->kind = TokenKind_tilde; + } else if (c == '+') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_add; + } else if (infile_consume_if(l->src, '+')) { + tok->kind = TokenKind_plusplus; + } else { + tok->kind = TokenKind_plus; + } + } else if (c == '|') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_or; + } else if (infile_consume_if(l->src, '|')) { + tok->kind = TokenKind_oror; + } else { + tok->kind = TokenKind_or; + } + } else if (c == '&') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_and; + } else if (infile_consume_if(l->src, '&')) { + tok->kind = TokenKind_andand; + } else { + tok->kind = TokenKind_and; + } + } else if (c == '-') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '>')) { + tok->kind = TokenKind_arrow; + } else if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_sub; + } else if (infile_consume_if(l->src, '-')) { + tok->kind = TokenKind_minusminus; + } else { + tok->kind = TokenKind_minus; + } + } else if (c == '*') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_mul; + } else { + tok->kind = TokenKind_star; + } + } else if (c == '/') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_div; + } else if (infile_consume_if(l->src, '/')) { + while (!infile_eof(l->src) && infile_peek_char(l->src) != '\n') { + infile_next_char(l->src); + } + tok->kind = TokenKind_whitespace; + } else if (infile_consume_if(l->src, '*')) { + while (infile_peek_char(l->src)) { + if (infile_consume_if(l->src, '*')) { + if (infile_consume_if(l->src, '/')) { + break; + } + continue; + } + infile_next_char(l->src); + } + tok->kind = TokenKind_whitespace; + } else { + tok->kind = TokenKind_slash; + } + } else if (c == '%') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_mod; + } else { + tok->kind = TokenKind_percent; + } + } else if (c == '.') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '.')) { + if (infile_consume_if(l->src, '.')) { + tok->kind = TokenKind_ellipsis; + } else { + tok->kind = TokenKind_other; + tok->value.string = ".."; + } + } else { + tok->kind = TokenKind_dot; + } + } else if (c == '!') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_ne; + } else { + tok->kind = TokenKind_not; + } + } else if (c == '=') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_eq; + } else { + tok->kind = TokenKind_assign; + } + } else if (c == '<') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_le; + } else if (infile_consume_if(l->src, '<')) { + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_lshift; + } else { + tok->kind = TokenKind_lshift; + } + } else { + tok->kind = TokenKind_lt; + } + } else if (c == '>') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_ge; + } else if (infile_consume_if(l->src, '>')) { + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_rshift; + } else { + tok->kind = TokenKind_rshift; + } + } else { + tok->kind = TokenKind_gt; + } + } else if (c == '#') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '#')) { + tok->kind = TokenKind_hashhash; + } else { + if (l->at_bol) { + pplexer_tokenize_pp_directive(l, tok); + } else { + tok->kind = TokenKind_hash; + } + } + } else if (c == '\'') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '\''); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + if (infile_peek_char(l->src) == '\\') { + infile_next_char(l->src); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + } + strbuilder_append_char(&builder, '\''); + infile_next_char(l->src); + infile_next_char(l->src); + tok->kind = TokenKind_character_constant; + tok->value.string = builder.buf; + } else if (c == '"') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + while (1) { + char ch = infile_peek_char(l->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(l->src); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + } + infile_next_char(l->src); + } + infile_next_char(l->src); + tok->kind = TokenKind_literal_str; + tok->value.string = builder.buf; + } else if (isdigit(c)) { + // TODO: implement tokenization of pp-number. + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(l->src))) { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + if (infile_peek_char(l->src) == '.' && isdigit(infile_peek_char2(l->src))) { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + while (isdigit(infile_peek_char(l->src))) { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + tok->kind = TokenKind_literal_double; + tok->value.floating = strtod(builder.buf, NULL); + } else { + tok->kind = TokenKind_literal_int; + tok->value.integer = strtol(builder.buf, NULL, 0); + } + } else if (isalpha(c) || c == '_') { + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + tok->kind = TokenKind_ident; + tok->value.string = builder.buf; + } else if (c == '\n') { + infile_next_char(l->src); + tok->kind = TokenKind_newline; + + // Reset expect_header_name at the end of line. It handles cases like: + // + // #ifdef ADDITIONAL_HEADER + // #include ADDITIONAL_HEADER + // #endif + // + // Even if ADDITIONAL_HEADER is undefined, this include directive line is tokenized. If the flag were not + // reset, the next occurrence of '<' or '"' would be recognized as part of a header name. + l->expect_header_name = false; + } else if (isspace(c)) { + while (isspace((c = infile_peek_char(l->src)))) { + if (c == '\n') + break; + infile_next_char(l->src); + } + if (l->at_bol && infile_peek_char(l->src) == '#') { + infile_next_char(l->src); + pplexer_tokenize_pp_directive(l, tok); + } else { + tok->kind = TokenKind_whitespace; + } + } else { + infile_next_char(l->src); + tok->kind = TokenKind_other; + char* buf = calloc(2, sizeof(char)); + buf[0] = c; + tok->value.string = buf; + } + l->at_bol = tok->kind == TokenKind_newline; + } + Token* eof_tok = tokens_push_new(l->tokens); + eof_tok->loc = l->src->loc; + eof_tok->kind = TokenKind_eof; +} + +TokenArray* tokenize(InFile* src) { + Lexer* l = lexer_new(src); + do_tokenize_all(l); + return l->tokens; +} + +TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens) { + TokenArray* tokens = calloc(1, sizeof(TokenArray)); + // tokens need not store whitespace tokens. + tokens_init(tokens, pp_tokens->len / 2); + + for (size_t pos = 0; pos < pp_tokens->len; ++pos) { + Token* pp_tok = &pp_tokens->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + Token* tok = tokens_push_new(tokens); + tok->loc = pp_tok->loc; + if (k == TokenKind_character_constant) { + tok->kind = TokenKind_literal_int; + int ch = pp_tok->value.string[1]; + if (ch == '\\') { + ch = pp_tok->value.string[2]; + if (ch == 'a') { + ch = '\a'; + } else if (ch == 'b') { + ch = '\b'; + } else if (ch == 'f') { + ch = '\f'; + } else if (ch == 'n') { + ch = '\n'; + } else if (ch == 'r') { + ch = '\r'; + } else if (ch == 't') { + ch = '\t'; + } else if (ch == 'v') { + ch = '\v'; + } else if (ch == '0') { + ch = '\0'; + } else if (ch == 'e') { + // \e is not a part of Standard C, but commonly supported. + ch = 27; + } + } + tok->value.integer = ch; + } else if (k == TokenKind_literal_str) { + tok->kind = pp_tok->kind; + + size_t len = strlen(pp_tok->value.string); + char* buf = calloc(len + 1, sizeof(char)); + for (size_t i = 0, j = 0; i < len; i++, j++) { + if (pp_tok->value.string[i] == '\\' && pp_tok->value.string[i + 1] == 'e') { + // \e is not a part of Standard C, but commonly supported. + buf[j] = 033; + i++; + } else { + buf[j] = pp_tok->value.string[i]; + } + } + tok->value.string = buf; + } else if (k == TokenKind_ident) { + if (strcmp(pp_tok->value.string, "alignas") == 0) { + tok->kind = TokenKind_keyword_alignas; + } else if (strcmp(pp_tok->value.string, "alignof") == 0) { + tok->kind = TokenKind_keyword_alignof; + } else if (strcmp(pp_tok->value.string, "auto") == 0) { + tok->kind = TokenKind_keyword_auto; + } else if (strcmp(pp_tok->value.string, "bool") == 0) { + tok->kind = TokenKind_keyword_bool; + } else if (strcmp(pp_tok->value.string, "break") == 0) { + tok->kind = TokenKind_keyword_break; + } else if (strcmp(pp_tok->value.string, "case") == 0) { + tok->kind = TokenKind_keyword_case; + } else if (strcmp(pp_tok->value.string, "char") == 0) { + tok->kind = TokenKind_keyword_char; + } else if (strcmp(pp_tok->value.string, "const") == 0) { + tok->kind = TokenKind_keyword_const; + } else if (strcmp(pp_tok->value.string, "constexpr") == 0) { + tok->kind = TokenKind_keyword_constexpr; + } else if (strcmp(pp_tok->value.string, "continue") == 0) { + tok->kind = TokenKind_keyword_continue; + } else if (strcmp(pp_tok->value.string, "default") == 0) { + tok->kind = TokenKind_keyword_default; + } else if (strcmp(pp_tok->value.string, "do") == 0) { + tok->kind = TokenKind_keyword_do; + } else if (strcmp(pp_tok->value.string, "double") == 0) { + tok->kind = TokenKind_keyword_double; + } else if (strcmp(pp_tok->value.string, "else") == 0) { + tok->kind = TokenKind_keyword_else; + } else if (strcmp(pp_tok->value.string, "enum") == 0) { + tok->kind = TokenKind_keyword_enum; + } else if (strcmp(pp_tok->value.string, "extern") == 0) { + tok->kind = TokenKind_keyword_extern; + } else if (strcmp(pp_tok->value.string, "false") == 0) { + tok->kind = TokenKind_keyword_false; + } else if (strcmp(pp_tok->value.string, "float") == 0) { + tok->kind = TokenKind_keyword_float; + } else if (strcmp(pp_tok->value.string, "for") == 0) { + tok->kind = TokenKind_keyword_for; + } else if (strcmp(pp_tok->value.string, "goto") == 0) { + tok->kind = TokenKind_keyword_goto; + } else if (strcmp(pp_tok->value.string, "if") == 0) { + tok->kind = TokenKind_keyword_if; + } else if (strcmp(pp_tok->value.string, "inline") == 0) { + tok->kind = TokenKind_keyword_inline; + } else if (strcmp(pp_tok->value.string, "int") == 0) { + tok->kind = TokenKind_keyword_int; + } else if (strcmp(pp_tok->value.string, "long") == 0) { + tok->kind = TokenKind_keyword_long; + } else if (strcmp(pp_tok->value.string, "nullptr") == 0) { + tok->kind = TokenKind_keyword_nullptr; + } else if (strcmp(pp_tok->value.string, "register") == 0) { + tok->kind = TokenKind_keyword_register; + } else if (strcmp(pp_tok->value.string, "restrict") == 0) { + tok->kind = TokenKind_keyword_restrict; + } else if (strcmp(pp_tok->value.string, "return") == 0) { + tok->kind = TokenKind_keyword_return; + } else if (strcmp(pp_tok->value.string, "short") == 0) { + tok->kind = TokenKind_keyword_short; + } else if (strcmp(pp_tok->value.string, "signed") == 0) { + tok->kind = TokenKind_keyword_signed; + } else if (strcmp(pp_tok->value.string, "sizeof") == 0) { + tok->kind = TokenKind_keyword_sizeof; + } else if (strcmp(pp_tok->value.string, "static") == 0) { + tok->kind = TokenKind_keyword_static; + } else if (strcmp(pp_tok->value.string, "static_assert") == 0) { + tok->kind = TokenKind_keyword_static_assert; + } else if (strcmp(pp_tok->value.string, "struct") == 0) { + tok->kind = TokenKind_keyword_struct; + } else if (strcmp(pp_tok->value.string, "switch") == 0) { + tok->kind = TokenKind_keyword_switch; + } else if (strcmp(pp_tok->value.string, "thread_local") == 0) { + tok->kind = TokenKind_keyword_thread_local; + } else if (strcmp(pp_tok->value.string, "true") == 0) { + tok->kind = TokenKind_keyword_true; + } else if (strcmp(pp_tok->value.string, "typedef") == 0) { + tok->kind = TokenKind_keyword_typedef; + } else if (strcmp(pp_tok->value.string, "typeof") == 0) { + tok->kind = TokenKind_keyword_typeof; + } else if (strcmp(pp_tok->value.string, "typeof_unqual") == 0) { + tok->kind = TokenKind_keyword_typeof_unqual; + } else if (strcmp(pp_tok->value.string, "union") == 0) { + tok->kind = TokenKind_keyword_union; + } else if (strcmp(pp_tok->value.string, "unsigned") == 0) { + tok->kind = TokenKind_keyword_unsigned; + } else if (strcmp(pp_tok->value.string, "void") == 0) { + tok->kind = TokenKind_keyword_void; + } else if (strcmp(pp_tok->value.string, "volatile") == 0) { + tok->kind = TokenKind_keyword_volatile; + } else if (strcmp(pp_tok->value.string, "while") == 0) { + tok->kind = TokenKind_keyword_while; + } else if (strcmp(pp_tok->value.string, "_Atomic") == 0) { + tok->kind = TokenKind_keyword__Atomic; + } else if (strcmp(pp_tok->value.string, "_BitInt") == 0) { + tok->kind = TokenKind_keyword__BitInt; + } else if (strcmp(pp_tok->value.string, "_Complex") == 0) { + tok->kind = TokenKind_keyword__Complex; + } else if (strcmp(pp_tok->value.string, "_Decimal128") == 0) { + tok->kind = TokenKind_keyword__Decimal128; + } else if (strcmp(pp_tok->value.string, "_Decimal32") == 0) { + tok->kind = TokenKind_keyword__Decimal32; + } else if (strcmp(pp_tok->value.string, "_Decimal64") == 0) { + tok->kind = TokenKind_keyword__Decimal64; + } else if (strcmp(pp_tok->value.string, "_Generic") == 0) { + tok->kind = TokenKind_keyword__Generic; + } else if (strcmp(pp_tok->value.string, "_Imaginary") == 0) { + tok->kind = TokenKind_keyword__Imaginary; + } else if (strcmp(pp_tok->value.string, "_Noreturn") == 0) { + tok->kind = TokenKind_keyword__Noreturn; + } else { + tok->kind = TokenKind_ident; + tok->value = pp_tok->value; + } + } else if (k == TokenKind_other) { + unreachable(); + } else { + tok->kind = pp_tok->kind; + tok->value = pp_tok->value; + } + } + + return tokens; +} diff --git a/src/cc1/tokenize.h b/src/cc1/tokenize.h new file mode 100644 index 0000000..fd334a1 --- /dev/null +++ b/src/cc1/tokenize.h @@ -0,0 +1,10 @@ +#ifndef DUCC_TOKENIZE_H +#define DUCC_TOKENIZE_H + +#include "io.h" +#include "token.h" + +TokenArray* tokenize(InFile* src); +TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens); + +#endif |
