aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/cc1
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-05-03 17:29:12 +0900
committernsfisis <nsfisis@gmail.com>2026-05-03 18:42:58 +0900
commit3654ce578e6fff53950874adf7e0e4ae0a6eb956 (patch)
tree5b6c04273de38dba70b7c25e55da144f5f7c37da /src/cc1
parent1b406b13b03055d2b2d08e8279a4a80c41ca7c20 (diff)
downloadducc-3654ce578e6fff53950874adf7e0e4ae0a6eb956.tar.gz
ducc-3654ce578e6fff53950874adf7e0e4ae0a6eb956.tar.zst
ducc-3654ce578e6fff53950874adf7e0e4ae0a6eb956.zip
refactor: organize directory structureHEADmain
Diffstat (limited to 'src/cc1')
-rw-r--r--src/cc1/ast.c787
-rw-r--r--src/cc1/ast.h433
-rw-r--r--src/cc1/codegen.c925
-rw-r--r--src/cc1/codegen.h8
-rw-r--r--src/cc1/codegen_wasm.c164
-rw-r--r--src/cc1/codegen_wasm.h8
-rw-r--r--src/cc1/fs.c27
-rw-r--r--src/cc1/fs.h7
-rw-r--r--src/cc1/io.c127
-rw-r--r--src/cc1/io.h26
-rw-r--r--src/cc1/parse.c3018
-rw-r--r--src/cc1/parse.h43
-rw-r--r--src/cc1/preprocess.c1559
-rw-r--r--src/cc1/preprocess.h13
-rw-r--r--src/cc1/sys.c21
-rw-r--r--src/cc1/sys.h7
-rw-r--r--src/cc1/token.c387
-rw-r--r--src/cc1/token.h183
-rw-r--r--src/cc1/tokenize.c597
-rw-r--r--src/cc1/tokenize.h10
20 files changed, 8350 insertions, 0 deletions
diff --git a/src/cc1/ast.c b/src/cc1/ast.c
new file mode 100644
index 0000000..efd16ae
--- /dev/null
+++ b/src/cc1/ast.c
@@ -0,0 +1,787 @@
+#include "ast.h"
+#include "../lib/common.h"
+#include "preprocess.h"
+
+const char* storageclass_stringify(StorageClass s) {
+ if (s == StorageClass_unspecified)
+ return "";
+ if (s == StorageClass_auto)
+ return "auto";
+ if (s == StorageClass_constexpr)
+ return "constexpr";
+ if (s == StorageClass_extern)
+ return "extern";
+ if (s == StorageClass_register)
+ return "register";
+ if (s == StorageClass_static)
+ return "static";
+ if (s == StorageClass_thread_local)
+ return "thread_local";
+ if (s == StorageClass_typedef)
+ return "typedef";
+ else
+ unreachable();
+}
+
+const char* type_kind_stringify(TypeKind k) {
+ if (k == TypeKind_unknown)
+ return "<unknown>";
+ else if (k == TypeKind_void)
+ return "void";
+ else if (k == TypeKind_char)
+ return "char";
+ else if (k == TypeKind_schar)
+ return "signed char";
+ else if (k == TypeKind_uchar)
+ return "unsigned char";
+ else if (k == TypeKind_short)
+ return "short";
+ else if (k == TypeKind_ushort)
+ return "unsigned short";
+ else if (k == TypeKind_int)
+ return "int";
+ else if (k == TypeKind_uint)
+ return "unsigned int";
+ else if (k == TypeKind_long)
+ return "long";
+ else if (k == TypeKind_ulong)
+ return "unsigned long";
+ else if (k == TypeKind_llong)
+ return "long long";
+ else if (k == TypeKind_ullong)
+ return "unsigned long long";
+ else if (k == TypeKind_float)
+ return "float";
+ else if (k == TypeKind_double)
+ return "double";
+ else if (k == TypeKind_ldouble)
+ return "long double";
+ else if (k == TypeKind_bool)
+ return "bool";
+ else if (k == TypeKind_struct)
+ return "struct";
+ else if (k == TypeKind_union)
+ return "union";
+ else if (k == TypeKind_enum)
+ return "enum";
+ else if (k == TypeKind_ptr)
+ return "<pointer>";
+ else if (k == TypeKind_array)
+ return "<array>";
+ else if (k == TypeKind_func)
+ return "<function>";
+ else
+ unreachable();
+}
+
+static AstNode* members_of(Type* ty) {
+ AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index];
+ if (def->kind == AstNodeKind_struct_def)
+ return def->as.struct_def->members;
+ else
+ return def->as.union_def->members;
+}
+
+Type* type_new(TypeKind kind) {
+ Type* ty = calloc(1, sizeof(Type));
+ ty->kind = kind;
+ return ty;
+}
+
+Type* type_dup(Type* src) {
+ Type* ty = malloc(sizeof(Type));
+ memcpy(ty, src, sizeof(Type));
+ return ty;
+}
+
+Type* type_new_ptr(Type* base) {
+ Type* ty = type_new(TypeKind_ptr);
+ ty->base = base;
+ return ty;
+}
+
+Type* type_new_array(Type* base, int size) {
+ Type* ty = type_new(TypeKind_array);
+ ty->base = base;
+ ty->array_size = size;
+ return ty;
+}
+
+Type* type_new_static_string(int len) {
+ return type_new_array(type_new(TypeKind_char), len + 1);
+}
+
+Type* type_array_to_ptr(Type* ty) {
+ return type_new_ptr(ty->base);
+}
+
+Type* type_new_func(Type* result, AstNode* params) {
+ Type* ty = type_new(TypeKind_func);
+ ty->result = result;
+ ty->params = params;
+ return ty;
+}
+
+bool type_is_unsized(Type* ty) {
+ return ty->kind == TypeKind_void;
+}
+
+bool type_is_unsigned(Type* ty) {
+ return ty->kind == TypeKind_uchar || ty->kind == TypeKind_ushort || ty->kind == TypeKind_uint ||
+ ty->kind == TypeKind_ulong || ty->kind == TypeKind_ullong || ty->kind == TypeKind_bool;
+}
+
+int type_sizeof(Type* ty) {
+ if (type_is_unsized(ty)) {
+ fatal_error("type_sizeof: type size cannot be determined");
+ }
+
+ if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar ||
+ ty->kind == TypeKind_bool)
+ return 1;
+ else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort)
+ return 2;
+ else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float)
+ return 4;
+ else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong ||
+ ty->kind == TypeKind_ullong || ty->kind == TypeKind_double)
+ return 8;
+ else if (ty->kind == TypeKind_struct)
+ return type_sizeof_struct(ty);
+ else if (ty->kind == TypeKind_union)
+ return type_sizeof_union(ty);
+ else if (ty->kind == TypeKind_enum)
+ return 4;
+ else if (ty->kind == TypeKind_ptr)
+ return 8;
+ else if (ty->kind == TypeKind_array)
+ return type_sizeof(ty->base) * ty->array_size;
+ else if (ty->kind == TypeKind_func)
+ return 8;
+ else
+ unreachable();
+}
+
+int type_alignof(Type* ty) {
+ if (type_is_unsized(ty)) {
+ fatal_error("type_alignof: type size cannot be determined");
+ }
+
+ if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar ||
+ ty->kind == TypeKind_bool)
+ return 1;
+ else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort)
+ return 2;
+ else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float)
+ return 4;
+ else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong ||
+ ty->kind == TypeKind_ullong || ty->kind == TypeKind_double)
+ return 8;
+ else if (ty->kind == TypeKind_struct)
+ return type_alignof_struct(ty);
+ else if (ty->kind == TypeKind_union)
+ return type_alignof_union(ty);
+ else if (ty->kind == TypeKind_enum)
+ return 4;
+ else if (ty->kind == TypeKind_ptr)
+ return 8;
+ else if (ty->kind == TypeKind_array)
+ return type_alignof(ty->base);
+ else if (ty->kind == TypeKind_func)
+ return 8;
+ else
+ unreachable();
+}
+
+int to_aligned(int n, int a) {
+ return (n + a - 1) / a * a;
+}
+
+const char* astnode_kind_stringify(AstNodeKind k) {
+ switch (k) {
+ case AstNodeKind_unknown:
+ return "unknown";
+ case AstNodeKind_nop:
+ return "nop";
+ case AstNodeKind_array_initializer:
+ return "array_initializer";
+ case AstNodeKind_assign_expr:
+ return "assign_expr";
+ case AstNodeKind_binary_expr:
+ return "binary_expr";
+ case AstNodeKind_break_stmt:
+ return "break_stmt";
+ case AstNodeKind_case_label:
+ return "case_label";
+ case AstNodeKind_cast_expr:
+ return "cast_expr";
+ case AstNodeKind_cond_expr:
+ return "cond_expr";
+ case AstNodeKind_continue_stmt:
+ return "continue_stmt";
+ case AstNodeKind_default_label:
+ return "default_label";
+ case AstNodeKind_deref_expr:
+ return "deref_expr";
+ case AstNodeKind_do_while_stmt:
+ return "do_while_stmt";
+ case AstNodeKind_enum_def:
+ return "enum_def";
+ case AstNodeKind_enum_member:
+ return "enum_member";
+ case AstNodeKind_expr_stmt:
+ return "expr_stmt";
+ case AstNodeKind_for_stmt:
+ return "for_stmt";
+ case AstNodeKind_func:
+ return "func";
+ case AstNodeKind_func_call:
+ return "func_call";
+ case AstNodeKind_func_decl:
+ return "func_decl";
+ case AstNodeKind_func_def:
+ return "func_def";
+ case AstNodeKind_goto_stmt:
+ return "goto_stmt";
+ case AstNodeKind_gvar:
+ return "gvar";
+ case AstNodeKind_gvar_decl:
+ return "gvar_decl";
+ case AstNodeKind_if_stmt:
+ return "if_stmt";
+ case AstNodeKind_int_expr:
+ return "int_expr";
+ case AstNodeKind_double_expr:
+ return "double_expr";
+ case AstNodeKind_label_stmt:
+ return "label_stmt";
+ case AstNodeKind_list:
+ return "list";
+ case AstNodeKind_logical_expr:
+ return "logical_expr";
+ case AstNodeKind_lvar:
+ return "lvar";
+ case AstNodeKind_lvar_decl:
+ return "lvar_decl";
+ case AstNodeKind_param:
+ return "param";
+ case AstNodeKind_ref_expr:
+ return "ref_expr";
+ case AstNodeKind_return_stmt:
+ return "return_stmt";
+ case AstNodeKind_str_expr:
+ return "str_expr";
+ case AstNodeKind_struct_decl:
+ return "struct_decl";
+ case AstNodeKind_struct_def:
+ return "struct_def";
+ case AstNodeKind_struct_member:
+ return "struct_member";
+ case AstNodeKind_switch_stmt:
+ return "switch_stmt";
+ case AstNodeKind_type:
+ return "type";
+ case AstNodeKind_typedef_decl:
+ return "typedef_decl";
+ case AstNodeKind_unary_expr:
+ return "unary_expr";
+ case AstNodeKind_union_decl:
+ return "union_decl";
+ case AstNodeKind_union_def:
+ return "union_def";
+ case AstNodeKind_declarator:
+ return "declarator";
+ default:
+ unreachable();
+ }
+}
+
+AstNode* ast_new(AstNodeKind kind) {
+ AstNode* ast = calloc(1, sizeof(AstNode));
+ ast->kind = kind;
+ return ast;
+}
+
+AstNode* ast_new_list(int capacity) {
+ if (capacity == 0)
+ unreachable();
+ AstNode* list = ast_new(AstNodeKind_list);
+ list->as.list = calloc(1, sizeof(ListNode));
+ list->as.list->cap = capacity;
+ list->as.list->len = 0;
+ list->as.list->items = calloc(list->as.list->cap, sizeof(AstNode));
+ return list;
+}
+
+void ast_append(AstNode* list, AstNode* item) {
+ if (list->kind != AstNodeKind_list) {
+ fatal_error("ast_append: ast is not a list");
+ }
+ if (!item) {
+ return;
+ }
+ if (list->as.list->cap <= list->as.list->len) {
+ list->as.list->cap *= 2;
+ list->as.list->items = realloc(list->as.list->items, sizeof(AstNode) * list->as.list->cap);
+ memset(list->as.list->items + list->as.list->len, 0,
+ sizeof(AstNode) * (list->as.list->cap - list->as.list->len));
+ }
+ memcpy(list->as.list->items + list->as.list->len, item, sizeof(AstNode));
+ ++list->as.list->len;
+}
+
+AstNode* ast_new_int(int v) {
+ AstNode* e = ast_new(AstNodeKind_int_expr);
+ e->as.int_expr = calloc(1, sizeof(IntExprNode));
+ e->as.int_expr->value = v;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_double(double v) {
+ AstNode* e = ast_new(AstNodeKind_double_expr);
+ e->as.double_expr = calloc(1, sizeof(DoubleExprNode));
+ e->as.double_expr->value = v;
+ e->ty = type_new(TypeKind_double);
+ return e;
+}
+
+AstNode* ast_new_unary_expr(int op, AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_unary_expr);
+ e->as.unary_expr = calloc(1, sizeof(UnaryExprNode));
+ e->as.unary_expr->op = op;
+ e->as.unary_expr->operand = operand;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_binary_expr);
+ e->as.binary_expr = calloc(1, sizeof(BinaryExprNode));
+ e->as.binary_expr->op = op;
+ e->as.binary_expr->lhs = lhs;
+ e->as.binary_expr->rhs = rhs;
+ if (op == TokenKind_plus) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ e->ty = lhs->ty;
+ } else if (lhs->ty->kind == TypeKind_array) {
+ e->ty = type_array_to_ptr(lhs->ty);
+ } else if (rhs->ty->kind == TypeKind_ptr) {
+ e->ty = rhs->ty;
+ } else if (rhs->ty->kind == TypeKind_array) {
+ e->ty = type_array_to_ptr(rhs->ty);
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ } else if (op == TokenKind_minus) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ e->ty = lhs->ty;
+ } else if (lhs->ty->kind == TypeKind_array) {
+ e->ty = type_array_to_ptr(lhs->ty);
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ return e;
+}
+
+AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_assign_expr);
+ e->as.assign_expr = calloc(1, sizeof(AssignExprNode));
+ e->as.assign_expr->op = op;
+ e->as.assign_expr->lhs = lhs;
+ e->as.assign_expr->rhs = rhs;
+ e->ty = lhs->ty;
+ return e;
+}
+
+AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) {
+ if (lhs->ty->base) {
+ rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)));
+ } else if (rhs->ty->base) {
+ lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base)));
+ }
+ return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs);
+}
+
+AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) {
+ if (lhs->ty->base) {
+ rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)));
+ }
+ return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs);
+}
+
+AstNode* ast_new_ref_expr(AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_ref_expr);
+ e->as.ref_expr = calloc(1, sizeof(RefExprNode));
+ e->as.ref_expr->operand = operand;
+ e->ty = type_new_ptr(operand->ty);
+ return e;
+}
+
+AstNode* ast_new_deref_expr(AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_deref_expr);
+ e->as.deref_expr = calloc(1, sizeof(DerefExprNode));
+ e->as.deref_expr->operand = operand;
+ e->ty = operand->ty->base;
+ return e;
+}
+
+AstNode* ast_new_member_access_expr(AstNode* obj, const char* name) {
+ AstNode* e = ast_new(AstNodeKind_deref_expr);
+ e->as.deref_expr = calloc(1, sizeof(DerefExprNode));
+ e->as.deref_expr->operand =
+ ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->base, name)));
+ e->ty = type_member_typeof(obj->ty->base, name);
+ e->as.deref_expr->operand->ty = type_new_ptr(e->ty);
+ return e;
+}
+
+AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty) {
+ AstNode* e = ast_new(AstNodeKind_cast_expr);
+ e->as.cast_expr = calloc(1, sizeof(CastExprNode));
+ e->as.cast_expr->operand = operand;
+ e->ty = result_ty;
+ return e;
+}
+
+AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_logical_expr);
+ e->as.logical_expr = calloc(1, sizeof(LogicalExprNode));
+ e->as.logical_expr->op = op;
+ e->as.logical_expr->lhs = lhs;
+ e->as.logical_expr->rhs = rhs;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_) {
+ AstNode* e = ast_new(AstNodeKind_cond_expr);
+ e->as.cond_expr = calloc(1, sizeof(CondExprNode));
+ e->as.cond_expr->cond = cond;
+ e->as.cond_expr->then = then;
+ e->as.cond_expr->else_ = else_;
+ e->ty = then->ty;
+ return e;
+}
+
+AstNode* ast_new_str_expr(int idx, Type* ty) {
+ AstNode* e = ast_new(AstNodeKind_str_expr);
+ e->as.str_expr = calloc(1, sizeof(StrExprNode));
+ e->as.str_expr->idx = idx;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_func_call(AstNode* func, AstNode* args) {
+ AstNode* e = ast_new(AstNodeKind_func_call);
+ e->as.func_call = calloc(1, sizeof(FuncCallNode));
+ e->as.func_call->func = func;
+ e->as.func_call->args = args;
+ return e;
+}
+
+AstNode* ast_new_func(const char* name, Type* ty) {
+ AstNode* e = ast_new(AstNodeKind_func);
+ e->as.func = calloc(1, sizeof(FuncNode));
+ e->as.func->name = name;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_gvar(const char* name, Type* ty) {
+ AstNode* e = ast_new(AstNodeKind_gvar);
+ e->as.gvar = calloc(1, sizeof(GvarNode));
+ e->as.gvar->name = name;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty) {
+ AstNode* e = ast_new(AstNodeKind_lvar);
+ e->as.lvar = calloc(1, sizeof(LvarNode));
+ e->as.lvar->name = name;
+ e->as.lvar->stack_offset = stack_offset;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_nop(void) {
+ return ast_new(AstNodeKind_nop);
+}
+
+AstNode* ast_new_break_stmt(void) {
+ return ast_new(AstNodeKind_break_stmt);
+}
+
+AstNode* ast_new_continue_stmt(void) {
+ return ast_new(AstNodeKind_continue_stmt);
+}
+
+AstNode* ast_new_return_stmt(AstNode* expr) {
+ AstNode* e = ast_new(AstNodeKind_return_stmt);
+ e->as.return_stmt = calloc(1, sizeof(ReturnStmtNode));
+ e->as.return_stmt->expr = expr;
+ return e;
+}
+
+AstNode* ast_new_expr_stmt(AstNode* expr) {
+ AstNode* e = ast_new(AstNodeKind_expr_stmt);
+ e->as.expr_stmt = calloc(1, sizeof(ExprStmtNode));
+ e->as.expr_stmt->expr = expr;
+ return e;
+}
+
+AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_) {
+ AstNode* e = ast_new(AstNodeKind_if_stmt);
+ e->as.if_stmt = calloc(1, sizeof(IfStmtNode));
+ e->as.if_stmt->cond = cond;
+ e->as.if_stmt->then = then;
+ e->as.if_stmt->else_ = else_;
+ return e;
+}
+
+AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body) {
+ AstNode* e = ast_new(AstNodeKind_for_stmt);
+ e->as.for_stmt = calloc(1, sizeof(ForStmtNode));
+ e->as.for_stmt->init = init;
+ e->as.for_stmt->cond = cond;
+ e->as.for_stmt->update = update;
+ e->as.for_stmt->body = body;
+ return e;
+}
+
+AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body) {
+ AstNode* e = ast_new(AstNodeKind_do_while_stmt);
+ e->as.do_while_stmt = calloc(1, sizeof(DoWhileStmtNode));
+ e->as.do_while_stmt->cond = cond;
+ e->as.do_while_stmt->body = body;
+ return e;
+}
+
+AstNode* ast_new_switch_stmt(AstNode* expr) {
+ AstNode* e = ast_new(AstNodeKind_switch_stmt);
+ e->as.switch_stmt = calloc(1, sizeof(SwitchStmtNode));
+ e->as.switch_stmt->expr = expr;
+ return e;
+}
+
+AstNode* ast_new_case_label(int value, AstNode* body) {
+ AstNode* e = ast_new(AstNodeKind_case_label);
+ e->as.case_label = calloc(1, sizeof(CaseLabelNode));
+ e->as.case_label->value = value;
+ e->as.case_label->body = body;
+ return e;
+}
+
+AstNode* ast_new_default_label(AstNode* body) {
+ AstNode* e = ast_new(AstNodeKind_default_label);
+ e->as.default_label = calloc(1, sizeof(DefaultLabelNode));
+ e->as.default_label->body = body;
+ return e;
+}
+
+AstNode* ast_new_goto_stmt(const char* label) {
+ AstNode* e = ast_new(AstNodeKind_goto_stmt);
+ e->as.goto_stmt = calloc(1, sizeof(GotoStmtNode));
+ e->as.goto_stmt->label = label;
+ return e;
+}
+
+AstNode* ast_new_label_stmt(const char* name, AstNode* body) {
+ AstNode* e = ast_new(AstNodeKind_label_stmt);
+ e->as.label_stmt = calloc(1, sizeof(LabelStmtNode));
+ e->as.label_stmt->name = name;
+ e->as.label_stmt->body = body;
+ return e;
+}
+
+AstNode* ast_new_declarator(const char* name, Type* ty) {
+ AstNode* e = ast_new(AstNodeKind_declarator);
+ e->as.declarator = calloc(1, sizeof(DeclaratorNode));
+ e->as.declarator->name = name;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size) {
+ AstNode* e = ast_new(AstNodeKind_func_def);
+ e->as.func_def = calloc(1, sizeof(FuncDefNode));
+ e->as.func_def->name = name;
+ e->as.func_def->params = params;
+ e->as.func_def->body = body;
+ e->as.func_def->stack_size = stack_size;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_enum_member(const char* name, int value) {
+ AstNode* e = ast_new(AstNodeKind_enum_member);
+ e->as.enum_member = calloc(1, sizeof(EnumMemberNode));
+ e->as.enum_member->name = name;
+ e->as.enum_member->value = value;
+ return e;
+}
+
+AstNode* ast_new_typedef_decl(const char* name, Type* ty) {
+ AstNode* e = ast_new(AstNodeKind_typedef_decl);
+ e->as.typedef_decl = calloc(1, sizeof(TypedefDeclNode));
+ e->as.typedef_decl->name = name;
+ e->ty = ty;
+ return e;
+}
+
+AstNode* ast_new_struct_def(const char* name) {
+ AstNode* e = ast_new(AstNodeKind_struct_def);
+ e->as.struct_def = calloc(1, sizeof(StructDefNode));
+ e->as.struct_def->name = name;
+ return e;
+}
+
+AstNode* ast_new_union_def(const char* name) {
+ AstNode* e = ast_new(AstNodeKind_union_def);
+ e->as.union_def = calloc(1, sizeof(UnionDefNode));
+ e->as.union_def->name = name;
+ return e;
+}
+
+AstNode* ast_new_enum_def(const char* name) {
+ AstNode* e = ast_new(AstNodeKind_enum_def);
+ e->as.enum_def = calloc(1, sizeof(EnumDefNode));
+ e->as.enum_def->name = name;
+ return e;
+}
+
+AstNode* ast_new_array_initializer(AstNode* list) {
+ AstNode* e = ast_new(AstNodeKind_array_initializer);
+ e->as.array_initializer = calloc(1, sizeof(ArrayInitializerNode));
+ e->as.array_initializer->list = list;
+ return e;
+}
+
+int type_sizeof_struct(Type* ty) {
+ int next_offset = 0;
+ int struct_align = 0;
+
+ for (int i = 0; i < members_of(ty)->as.list->len; ++i) {
+ AstNode* member = &members_of(ty)->as.list->items[i];
+ int size, align;
+ if (member->as.struct_member->is_bitfield) {
+ // TODO
+ size = member->as.struct_member->bitfield_width / 8;
+ align = 1;
+ } else {
+ size = type_sizeof(member->ty);
+ align = type_alignof(member->ty);
+ }
+
+ next_offset = to_aligned(next_offset, align);
+ next_offset += size;
+ if (struct_align < align) {
+ struct_align = align;
+ }
+ }
+ return to_aligned(next_offset, struct_align);
+}
+
+int type_sizeof_union(Type* ty) {
+ int union_size = 0;
+ int union_align = 0;
+
+ for (int i = 0; i < members_of(ty)->as.list->len; ++i) {
+ AstNode* member = &members_of(ty)->as.list->items[i];
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ size = to_aligned(size, align);
+ if (union_size < size) {
+ union_size = size;
+ }
+ if (union_align < align) {
+ union_align = align;
+ }
+ }
+ return to_aligned(union_size, union_align);
+}
+
+int type_alignof_struct(Type* ty) {
+ int struct_align = 0;
+
+ for (int i = 0; i < members_of(ty)->as.list->len; ++i) {
+ AstNode* member = &members_of(ty)->as.list->items[i];
+ int align = type_alignof(member->ty);
+
+ if (struct_align < align) {
+ struct_align = align;
+ }
+ }
+ return struct_align;
+}
+
+int type_alignof_union(Type* ty) {
+ int union_align = 0;
+
+ for (int i = 0; i < members_of(ty)->as.list->len; ++i) {
+ AstNode* member = &members_of(ty)->as.list->items[i];
+ int align = type_alignof(member->ty);
+
+ if (union_align < align) {
+ union_align = align;
+ }
+ }
+ return union_align;
+}
+
+int type_offsetof(Type* ty, const char* name) {
+ if (ty->kind == TypeKind_union) {
+ return 0;
+ }
+ if (ty->kind != TypeKind_struct) {
+ fatal_error("type_offsetof: type is neither a struct nor a union");
+ }
+
+ int next_offset = 0;
+
+ for (int i = 0; i < members_of(ty)->as.list->len; ++i) {
+ AstNode* member = &members_of(ty)->as.list->items[i];
+ int size, align;
+ if (member->as.struct_member->is_bitfield) {
+ // TODO
+ size = member->as.struct_member->bitfield_width / 8;
+ align = 1;
+ } else {
+ size = type_sizeof(member->ty);
+ align = type_alignof(member->ty);
+ }
+
+ next_offset = to_aligned(next_offset, align);
+ if (strcmp(member->as.struct_member->name, name) == 0) {
+ if (member->as.struct_member->is_bitfield) {
+ // C23: 7.21.4
+ // if the specified member is a bit-field, the behavior is undefined.
+ fatal_error("type_offsetof: the result of offsetof(bit-field) is undefined");
+ }
+ return next_offset;
+ }
+ next_offset += size;
+ }
+
+ fatal_error("type_offsetof: member not found");
+}
+
+Type* type_member_typeof(Type* ty, const char* name) {
+ if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) {
+ fatal_error("type_member_typeof: type is neither a struct nor a union");
+ }
+
+ for (int i = 0; i < members_of(ty)->as.list->len; ++i) {
+ AstNode* member = &members_of(ty)->as.list->items[i];
+ if (strcmp(member->as.struct_member->name, name) == 0) {
+ return member->ty;
+ }
+ }
+
+ fatal_error("type_offsetof: member not found");
+}
diff --git a/src/cc1/ast.h b/src/cc1/ast.h
new file mode 100644
index 0000000..ef984a9
--- /dev/null
+++ b/src/cc1/ast.h
@@ -0,0 +1,433 @@
+#ifndef DUCC_AST_H
+#define DUCC_AST_H
+
+#include "../lib/ducc.h"
+#include "io.h"
+
+typedef enum {
+ StorageClass_unspecified,
+ StorageClass_auto,
+ StorageClass_constexpr,
+ StorageClass_extern,
+ StorageClass_register,
+ StorageClass_static,
+ StorageClass_thread_local,
+ StorageClass_typedef,
+} StorageClass;
+
+const char* storageclass_stringify(StorageClass s);
+
+typedef enum {
+ TypeKind_unknown,
+
+ TypeKind_void,
+ TypeKind_char,
+ TypeKind_schar,
+ TypeKind_uchar,
+ TypeKind_short,
+ TypeKind_ushort,
+ TypeKind_int,
+ TypeKind_uint,
+ TypeKind_long,
+ TypeKind_ulong,
+ TypeKind_llong,
+ TypeKind_ullong,
+ TypeKind_float,
+ TypeKind_double,
+ TypeKind_ldouble,
+ TypeKind_bool,
+ TypeKind_struct,
+ TypeKind_union,
+ TypeKind_enum,
+ TypeKind_ptr,
+ TypeKind_array,
+ TypeKind_func,
+} TypeKind;
+
+const char* type_kind_stringify(TypeKind k);
+
+struct AstNode;
+typedef struct AstNode AstNode;
+
+typedef struct {
+ AstNode* defs;
+ size_t index;
+} TypeRef;
+
+typedef struct Type {
+ TypeKind kind;
+ StorageClass storage_class;
+ // Check `base` instead of `kind` to test if the type is an array or a pointer.
+ struct Type* base;
+ int array_size;
+ TypeRef ref;
+ struct Type* result;
+ AstNode* params;
+} Type;
+
+Type* type_new(TypeKind kind);
+Type* type_dup(Type* src);
+Type* type_new_ptr(Type* base);
+Type* type_new_array(Type* elem, int size);
+Type* type_new_static_string(int len);
+Type* type_array_to_ptr(Type* ty);
+Type* type_new_func(Type* result, AstNode* params);
+bool type_is_unsized(Type* ty);
+bool type_is_unsigned(Type* ty);
+
+int type_sizeof_struct(Type* ty);
+int type_sizeof_union(Type* ty);
+int type_alignof_struct(Type* ty);
+int type_alignof_union(Type* ty);
+int type_offsetof(Type* ty, const char* name);
+Type* type_member_typeof(Type* ty, const char* name);
+
+int type_sizeof(Type* ty);
+int type_alignof(Type* ty);
+
+int to_aligned(int n, int a);
+
+typedef enum {
+ AstNodeKind_unknown,
+ AstNodeKind_nop,
+
+ AstNodeKind_array_initializer,
+ AstNodeKind_assign_expr,
+ AstNodeKind_binary_expr,
+ AstNodeKind_break_stmt,
+ AstNodeKind_case_label,
+ AstNodeKind_cast_expr,
+ AstNodeKind_cond_expr,
+ AstNodeKind_continue_stmt,
+ AstNodeKind_default_label,
+ AstNodeKind_deref_expr,
+ AstNodeKind_do_while_stmt,
+ AstNodeKind_enum_def,
+ AstNodeKind_enum_member,
+ AstNodeKind_expr_stmt,
+ AstNodeKind_for_stmt,
+ AstNodeKind_func,
+ AstNodeKind_func_call,
+ AstNodeKind_func_decl,
+ AstNodeKind_func_def,
+ AstNodeKind_goto_stmt,
+ AstNodeKind_gvar,
+ AstNodeKind_gvar_decl,
+ AstNodeKind_if_stmt,
+ AstNodeKind_int_expr,
+ AstNodeKind_double_expr,
+ AstNodeKind_label_stmt,
+ AstNodeKind_list,
+ AstNodeKind_logical_expr,
+ AstNodeKind_lvar,
+ AstNodeKind_lvar_decl,
+ AstNodeKind_param,
+ AstNodeKind_ref_expr,
+ AstNodeKind_return_stmt,
+ AstNodeKind_str_expr,
+ AstNodeKind_struct_decl,
+ AstNodeKind_struct_def,
+ AstNodeKind_struct_member,
+ AstNodeKind_switch_stmt,
+ AstNodeKind_type,
+ AstNodeKind_typedef_decl,
+ AstNodeKind_unary_expr,
+ AstNodeKind_union_decl,
+ AstNodeKind_union_def,
+
+ // Intermediate ASTs: they are used only in parsing, not for parse result.
+ AstNodeKind_declarator,
+} AstNodeKind;
+
+const char* astnode_kind_stringify(AstNodeKind k);
+
+// Expression nodes
+typedef struct {
+ int value;
+} IntExprNode;
+
+typedef struct {
+ double value;
+} DoubleExprNode;
+
+typedef struct {
+ int idx;
+} StrExprNode;
+
+typedef struct {
+ int op;
+ AstNode* operand;
+} UnaryExprNode;
+
+typedef struct {
+ int op;
+ AstNode* lhs;
+ AstNode* rhs;
+} BinaryExprNode;
+
+typedef struct {
+ int op;
+ AstNode* lhs;
+ AstNode* rhs;
+} LogicalExprNode;
+
+typedef struct {
+ int op;
+ AstNode* lhs;
+ AstNode* rhs;
+} AssignExprNode;
+
+typedef struct {
+ AstNode* operand;
+} CastExprNode;
+
+typedef struct {
+ AstNode* cond;
+ AstNode* then;
+ AstNode* else_;
+} CondExprNode;
+
+typedef struct {
+ AstNode* operand;
+} DerefExprNode;
+
+typedef struct {
+ AstNode* operand;
+} RefExprNode;
+
+typedef struct {
+ AstNode* func;
+ AstNode* args;
+} FuncCallNode;
+
+// Statement nodes
+typedef struct {
+ AstNode* cond;
+ AstNode* then;
+ AstNode* else_;
+} IfStmtNode;
+
+typedef struct {
+ AstNode* init;
+ AstNode* cond;
+ AstNode* update;
+ AstNode* body;
+} ForStmtNode;
+
+typedef struct {
+ AstNode* cond;
+ AstNode* body;
+} DoWhileStmtNode;
+
+typedef struct {
+ AstNode* expr;
+ AstNode* body;
+} SwitchStmtNode;
+
+typedef struct {
+ int value;
+ AstNode* body;
+} CaseLabelNode;
+
+typedef struct {
+ AstNode* body;
+} DefaultLabelNode;
+
+typedef struct {
+ const char* name;
+ AstNode* body;
+} LabelStmtNode;
+
+typedef struct {
+ AstNode* expr;
+} ReturnStmtNode;
+
+typedef struct {
+ const char* label;
+} GotoStmtNode;
+
+typedef struct {
+ AstNode* expr;
+} ExprStmtNode;
+
+// Declaration nodes
+typedef struct {
+ const char* name;
+ AstNode* params;
+ AstNode* body;
+ int stack_size;
+} FuncDefNode;
+
+typedef struct {
+ const char* name;
+ int stack_offset;
+} LvarNode;
+
+typedef struct {
+ AstNode* expr;
+} LvarDeclNode;
+
+typedef struct {
+ const char* name;
+ int stack_offset;
+} ParamNode;
+
+typedef struct {
+ const char* name;
+ AstNode* expr;
+} GvarDeclNode;
+
+typedef struct {
+ const char* name;
+ AstNode* members;
+} StructDefNode;
+
+typedef struct {
+ const char* name;
+ AstNode* members;
+} UnionDefNode;
+
+typedef struct {
+ const char* name;
+ AstNode* members;
+} EnumDefNode;
+
+typedef struct {
+ const char* name;
+ int value;
+} EnumMemberNode;
+
+typedef struct {
+ const char* name;
+ AstNode* init;
+} DeclaratorNode;
+
+typedef struct {
+ const char* name;
+} FuncNode;
+
+typedef struct {
+ const char* name;
+} GvarNode;
+
+typedef struct {
+ const char* name;
+ bool is_bitfield;
+ int bitfield_offset;
+ int bitfield_width;
+} StructMemberNode;
+
+typedef struct {
+ const char* name;
+} TypedefDeclNode;
+
+typedef struct {
+ AstNode* list;
+} ArrayInitializerNode;
+
+typedef struct {
+ AstNode* items;
+ int len;
+ int cap;
+} ListNode;
+
+struct AstNode {
+ AstNodeKind kind;
+ // TODO: currently only limited set of ast nodes have loc field.
+ SourceLocation loc;
+ Type* ty;
+ union {
+ IntExprNode* int_expr;
+ DoubleExprNode* double_expr;
+ StrExprNode* str_expr;
+ UnaryExprNode* unary_expr;
+ BinaryExprNode* binary_expr;
+ LogicalExprNode* logical_expr;
+ AssignExprNode* assign_expr;
+ CastExprNode* cast_expr;
+ CondExprNode* cond_expr;
+ DerefExprNode* deref_expr;
+ RefExprNode* ref_expr;
+ FuncCallNode* func_call;
+ IfStmtNode* if_stmt;
+ ForStmtNode* for_stmt;
+ DoWhileStmtNode* do_while_stmt;
+ SwitchStmtNode* switch_stmt;
+ CaseLabelNode* case_label;
+ DefaultLabelNode* default_label;
+ LabelStmtNode* label_stmt;
+ ReturnStmtNode* return_stmt;
+ GotoStmtNode* goto_stmt;
+ ExprStmtNode* expr_stmt;
+ FuncDefNode* func_def;
+ LvarNode* lvar;
+ LvarDeclNode* lvar_decl;
+ ParamNode* param;
+ GvarDeclNode* gvar_decl;
+ StructDefNode* struct_def;
+ UnionDefNode* union_def;
+ EnumDefNode* enum_def;
+ EnumMemberNode* enum_member;
+ DeclaratorNode* declarator;
+ FuncNode* func;
+ GvarNode* gvar;
+ StructMemberNode* struct_member;
+ TypedefDeclNode* typedef_decl;
+ ArrayInitializerNode* array_initializer;
+ ListNode* list;
+ } as;
+};
+
+typedef struct {
+ AstNode* funcs;
+ AstNode* vars;
+ const char** str_literals;
+} Program;
+
+AstNode* ast_new(AstNodeKind kind);
+AstNode* ast_new_list(int capacity);
+void ast_append(AstNode* list, AstNode* item);
+AstNode* ast_new_int(int v);
+AstNode* ast_new_double(double v);
+AstNode* ast_new_unary_expr(int op, AstNode* operand);
+AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs);
+
+AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_ref_expr(AstNode* operand);
+AstNode* ast_new_deref_expr(AstNode* operand);
+AstNode* ast_new_member_access_expr(AstNode* obj, const char* name);
+AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty);
+AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_);
+AstNode* ast_new_str_expr(int idx, Type* ty);
+AstNode* ast_new_func_call(AstNode* func, AstNode* args);
+AstNode* ast_new_func(const char* name, Type* ty);
+AstNode* ast_new_gvar(const char* name, Type* ty);
+AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty);
+
+AstNode* ast_new_nop(void);
+AstNode* ast_new_break_stmt(void);
+AstNode* ast_new_continue_stmt(void);
+AstNode* ast_new_return_stmt(AstNode* expr);
+AstNode* ast_new_expr_stmt(AstNode* expr);
+AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_);
+AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body);
+AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body);
+AstNode* ast_new_switch_stmt(AstNode* expr);
+AstNode* ast_new_case_label(int value, AstNode* body);
+AstNode* ast_new_default_label(AstNode* body);
+AstNode* ast_new_goto_stmt(const char* label);
+AstNode* ast_new_label_stmt(const char* name, AstNode* body);
+
+AstNode* ast_new_declarator(const char* name, Type* ty);
+AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size);
+AstNode* ast_new_enum_member(const char* name, int value);
+AstNode* ast_new_typedef_decl(const char* name, Type* ty);
+AstNode* ast_new_struct_def(const char* name);
+AstNode* ast_new_union_def(const char* name);
+AstNode* ast_new_enum_def(const char* name);
+AstNode* ast_new_array_initializer(AstNode* list);
+
+#endif
diff --git a/src/cc1/codegen.c b/src/cc1/codegen.c
new file mode 100644
index 0000000..a08f6ad
--- /dev/null
+++ b/src/cc1/codegen.c
@@ -0,0 +1,925 @@
+#include "codegen.h"
+#include <stdlib.h>
+#include <string.h>
+#include "../lib/common.h"
+#include "parse.h"
+#include "preprocess.h"
+
+typedef enum {
+ GenMode_lval,
+ GenMode_rval,
+} GenMode;
+
+typedef struct {
+ Program* prog;
+ FILE* out;
+ int next_label;
+ int* loop_labels;
+ AstNode* current_func;
+ int switch_label;
+} CodeGen;
+
+static CodeGen* codegen_new(Program* prog, FILE* out) {
+ CodeGen* g = calloc(1, sizeof(CodeGen));
+ g->prog = prog;
+ g->out = out;
+ g->next_label = 1;
+ g->loop_labels = calloc(1024, sizeof(int));
+ g->switch_label = -1;
+ return g;
+}
+
+static int codegen_new_label(CodeGen* g) {
+ int new_label = g->next_label;
+ ++g->next_label;
+ return new_label;
+}
+
+static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode);
+static void codegen_stmt(CodeGen* g, AstNode* ast);
+
+static const char* param_reg(int n) {
+ if (n == 0) {
+ return "rdi";
+ } else if (n == 1) {
+ return "rsi";
+ } else if (n == 2) {
+ return "rdx";
+ } else if (n == 3) {
+ return "rcx";
+ } else if (n == 4) {
+ return "r8";
+ } else if (n == 5) {
+ return "r9";
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) {
+ fprintf(g->out, " push rbp\n");
+ fprintf(g->out, " mov rbp, rsp\n");
+ for (int i = 0, j = 0; i < func_def->params->as.list->len; ++i) {
+ AstNode* param = &func_def->params->as.list->items[i];
+ if (param->as.param->stack_offset >= 0) {
+ fprintf(g->out, " push %s\n", param_reg(j++));
+ }
+ }
+ // Note: rsp must be aligned to 8.
+ fprintf(g->out, " sub rsp, %d\n", to_aligned(func_def->stack_size, 8));
+}
+
+static void codegen_func_epilogue(CodeGen* g) {
+ fprintf(g->out, " mov rsp, rbp\n");
+ fprintf(g->out, " pop rbp\n");
+ fprintf(g->out, " ret\n");
+}
+
+static void codegen_int_expr(CodeGen* g, IntExprNode* expr) {
+ fprintf(g->out, " mov rax, %d\n", expr->value);
+}
+
+static void codegen_str_expr(CodeGen* g, StrExprNode* expr) {
+ fprintf(g->out, " lea rax, .Lstr__%d[rip]\n", expr->idx);
+}
+
+static void codegen_unary_expr(CodeGen* g, UnaryExprNode* expr) {
+ codegen_expr(g, expr->operand, GenMode_rval);
+ if (expr->op == TokenKind_not) {
+ fprintf(g->out, " mov rdi, 0\n");
+ fprintf(g->out, " cmp rax, rdi\n");
+ fprintf(g->out, " sete al\n");
+ fprintf(g->out, " movzb rax, al\n");
+ } else if (expr->op == TokenKind_tilde) {
+ fprintf(g->out, " not rax\n");
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_ref_expr(CodeGen* g, RefExprNode* expr) {
+ codegen_expr(g, expr->operand, GenMode_lval);
+}
+
+static void codegen_lval2rval(CodeGen* g, Type* ty) {
+ if (ty->kind == TypeKind_array || ty->kind == TypeKind_func) {
+ return;
+ }
+
+ int size = type_sizeof(ty);
+ if (size == 1) {
+ fprintf(g->out, " movsx rax, BYTE PTR [rax]\n");
+ } else if (size == 2) {
+ fprintf(g->out, " movsx rax, WORD PTR [rax]\n");
+ } else if (size == 4) {
+ fprintf(g->out, " movsxd rax, DWORD PTR [rax]\n");
+ } else if (size == 8) {
+ fprintf(g->out, " mov rax, [rax]\n");
+ } else {
+ // Do nothing.
+ }
+}
+
+static void codegen_deref_expr(CodeGen* g, DerefExprNode* expr, GenMode gen_mode) {
+ codegen_expr(g, expr->operand, GenMode_rval);
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(g, expr->operand->ty->base);
+ }
+}
+
+static void codegen_cast_expr(CodeGen* g, CastExprNode* expr, Type* ty) {
+ codegen_expr(g, expr->operand, GenMode_rval);
+
+ // (void) cast does nothing.
+ if (ty->kind == TypeKind_void)
+ return;
+
+ int src_size = type_sizeof(expr->operand->ty);
+ int dst_size = type_sizeof(ty);
+
+ if (src_size == dst_size)
+ return;
+
+ if (dst_size == 1) {
+ fprintf(g->out, " movsx rax, al\n");
+ } else if (dst_size == 2) {
+ if (src_size == 1) {
+ fprintf(g->out, " movsx rax, al\n");
+ } else {
+ fprintf(g->out, " movsx rax, ax\n");
+ }
+ } else if (dst_size == 4) {
+ if (src_size == 1) {
+ fprintf(g->out, " movsx rax, al\n");
+ } else if (src_size == 2) {
+ fprintf(g->out, " movsx rax, ax\n");
+ } else {
+ fprintf(g->out, " movsxd rax, eax\n");
+ }
+ } else if (dst_size == 8) {
+ if (src_size == 1) {
+ fprintf(g->out, " movsx rax, al\n");
+ } else if (src_size == 2) {
+ fprintf(g->out, " movsx rax, ax\n");
+ } else if (src_size == 4) {
+ fprintf(g->out, " movsxd rax, eax\n");
+ }
+ }
+}
+
+static void codegen_logical_expr(CodeGen* g, LogicalExprNode* expr) {
+ int label = codegen_new_label(g);
+
+ if (expr->op == TokenKind_andand) {
+ codegen_expr(g, expr->lhs, GenMode_rval);
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Lelse%d\n", label);
+ codegen_expr(g, expr->rhs, GenMode_rval);
+ fprintf(g->out, " jmp .Lend%d\n", label);
+ fprintf(g->out, ".Lelse%d:\n", label);
+ fprintf(g->out, " mov rax, 0\n");
+ fprintf(g->out, ".Lend%d:\n", label);
+ } else {
+ codegen_expr(g, expr->lhs, GenMode_rval);
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Lelse%d\n", label);
+ fprintf(g->out, " mov rax, 1\n");
+ fprintf(g->out, " jmp .Lend%d\n", label);
+ fprintf(g->out, ".Lelse%d:\n", label);
+ codegen_expr(g, expr->rhs, GenMode_rval);
+ fprintf(g->out, ".Lend%d:\n", label);
+ }
+}
+
+static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) {
+ codegen_expr(g, expr->lhs, gen_mode);
+ fprintf(g->out, " push rax\n");
+ codegen_expr(g, expr->rhs, gen_mode);
+ fprintf(g->out, " mov rdi, rax\n");
+ fprintf(g->out, " pop rax\n");
+ // rax=lhs, rdi=rhs
+ if (expr->op == TokenKind_plus) {
+ fprintf(g->out, " add rax, rdi\n");
+ } else if (expr->op == TokenKind_minus) {
+ fprintf(g->out, " sub rax, rdi\n");
+ } else if (expr->op == TokenKind_star) {
+ fprintf(g->out, " imul rax, rdi\n");
+ } else if (expr->op == TokenKind_slash) {
+ if (type_is_unsigned(expr->lhs->ty)) {
+ fprintf(g->out, " xor rdx, rdx\n");
+ fprintf(g->out, " div rdi\n");
+ } else {
+ fprintf(g->out, " cqo\n");
+ fprintf(g->out, " idiv rdi\n");
+ }
+ } else if (expr->op == TokenKind_percent) {
+ if (type_is_unsigned(expr->lhs->ty)) {
+ fprintf(g->out, " xor rdx, rdx\n");
+ fprintf(g->out, " div rdi\n");
+ } else {
+ fprintf(g->out, " cqo\n");
+ fprintf(g->out, " idiv rdi\n");
+ }
+ fprintf(g->out, " mov rax, rdx\n");
+ } else if (expr->op == TokenKind_and) {
+ fprintf(g->out, " and rax, rdi\n");
+ } else if (expr->op == TokenKind_or) {
+ fprintf(g->out, " or rax, rdi\n");
+ } else if (expr->op == TokenKind_xor) {
+ fprintf(g->out, " xor rax, rdi\n");
+ } else if (expr->op == TokenKind_lshift) {
+ fprintf(g->out, " mov rcx, rdi\n");
+ fprintf(g->out, " shl rax, cl\n");
+ } else if (expr->op == TokenKind_rshift) {
+ fprintf(g->out, " mov rcx, rdi\n");
+ if (type_is_unsigned(expr->lhs->ty)) {
+ fprintf(g->out, " shr rax, cl\n");
+ } else {
+ fprintf(g->out, " sar rax, cl\n");
+ }
+ } else if (expr->op == TokenKind_eq) {
+ fprintf(g->out, " cmp rax, rdi\n");
+ fprintf(g->out, " sete al\n");
+ fprintf(g->out, " movzb rax, al\n");
+ } else if (expr->op == TokenKind_ne) {
+ fprintf(g->out, " cmp rax, rdi\n");
+ fprintf(g->out, " setne al\n");
+ fprintf(g->out, " movzb rax, al\n");
+ } else if (expr->op == TokenKind_lt) {
+ fprintf(g->out, " cmp rax, rdi\n");
+ fprintf(g->out, " setl al\n");
+ fprintf(g->out, " movzb rax, al\n");
+ } else if (expr->op == TokenKind_le) {
+ fprintf(g->out, " cmp rax, rdi\n");
+ fprintf(g->out, " setle al\n");
+ fprintf(g->out, " movzb rax, al\n");
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_cond_expr(CodeGen* g, CondExprNode* expr, GenMode gen_mode) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, expr->cond, GenMode_rval);
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Lelse%d\n", label);
+ codegen_expr(g, expr->then, gen_mode);
+ fprintf(g->out, " jmp .Lend%d\n", label);
+ fprintf(g->out, ".Lelse%d:\n", label);
+ codegen_expr(g, expr->else_, gen_mode);
+ fprintf(g->out, ".Lend%d:\n", label);
+}
+
+static void codegen_assign_expr_helper(CodeGen* g, AssignExprNode* expr) {
+ if (expr->op == TokenKind_assign) {
+ return;
+ }
+
+ fprintf(g->out, " mov rdi, rax\n");
+ fprintf(g->out, " mov rax, [rsp]\n");
+ codegen_lval2rval(g, expr->lhs->ty);
+
+ if (expr->op == TokenKind_assign_add) {
+ fprintf(g->out, " add rax, rdi\n");
+ } else if (expr->op == TokenKind_assign_sub) {
+ fprintf(g->out, " sub rax, rdi\n");
+ } else if (expr->op == TokenKind_assign_mul) {
+ fprintf(g->out, " imul rax, rdi\n");
+ } else if (expr->op == TokenKind_assign_div) {
+ if (type_is_unsigned(expr->lhs->ty)) {
+ fprintf(g->out, " xor rdx, rdx\n");
+ fprintf(g->out, " div rdi\n");
+ } else {
+ fprintf(g->out, " cqo\n");
+ fprintf(g->out, " idiv rdi\n");
+ }
+ } else if (expr->op == TokenKind_assign_mod) {
+ if (type_is_unsigned(expr->lhs->ty)) {
+ fprintf(g->out, " xor rdx, rdx\n");
+ fprintf(g->out, " div rdi\n");
+ } else {
+ fprintf(g->out, " cqo\n");
+ fprintf(g->out, " idiv rdi\n");
+ }
+ fprintf(g->out, " mov rax, rdx\n");
+ } else if (expr->op == TokenKind_assign_or) {
+ fprintf(g->out, " or rax, rdi\n");
+ } else if (expr->op == TokenKind_assign_and) {
+ fprintf(g->out, " and rax, rdi\n");
+ } else if (expr->op == TokenKind_assign_xor) {
+ fprintf(g->out, " xor rax, rdi\n");
+ } else if (expr->op == TokenKind_assign_lshift) {
+ fprintf(g->out, " mov rcx, rdi\n");
+ fprintf(g->out, " shl rax, cl\n");
+ } else if (expr->op == TokenKind_assign_rshift) {
+ fprintf(g->out, " mov rcx, rdi\n");
+ if (type_is_unsigned(expr->lhs->ty)) {
+ fprintf(g->out, " shr rax, cl\n");
+ } else {
+ fprintf(g->out, " sar rax, cl\n");
+ }
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_assign_expr(CodeGen* g, AssignExprNode* expr) {
+ int sizeof_lhs = type_sizeof(expr->lhs->ty);
+
+ codegen_expr(g, expr->lhs, GenMode_lval);
+ fprintf(g->out, " push rax\n");
+ codegen_expr(g, expr->rhs, GenMode_rval);
+ codegen_assign_expr_helper(g, expr);
+ fprintf(g->out, " pop rdi\n");
+ if (sizeof_lhs == 1) {
+ fprintf(g->out, " mov BYTE PTR [rdi], al\n");
+ } else if (sizeof_lhs == 2) {
+ fprintf(g->out, " mov WORD PTR [rdi], ax\n");
+ } else if (sizeof_lhs == 4) {
+ fprintf(g->out, " mov DWORD PTR [rdi], eax\n");
+ } else if (sizeof_lhs == 8) {
+ fprintf(g->out, " mov [rdi], rax\n");
+ } else {
+ if (expr->op != TokenKind_assign) {
+ unimplemented();
+ }
+ // rax: address of rhs
+ // rdi: address of lhs
+ // Perform byte-wise copy. Use r10 register as temporary space.
+ for (int i = 0; i < sizeof_lhs; ++i) {
+ fprintf(g->out, " mov r10b, %d[rax]\n", i);
+ fprintf(g->out, " mov %d[rdi], r10b\n", i);
+ }
+ }
+}
+
+static void codegen_args(CodeGen* g, AstNode* args) {
+ int* required_gp_regs_for_each_arg = calloc(args->as.list->len, sizeof(int));
+
+ int gp_regs = 6;
+ for (int i = 0; i < args->as.list->len; ++i) {
+ AstNode* arg = &args->as.list->items[i];
+ int ty_size = type_sizeof(arg->ty);
+ // TODO
+ if (arg->ty->kind == TypeKind_array) {
+ ty_size = 8;
+ }
+ int required_gp_regs;
+ if (ty_size <= 8) {
+ required_gp_regs = 1;
+ } else if (ty_size <= 16) {
+ required_gp_regs = 2;
+ } else {
+ required_gp_regs = 0;
+ }
+ if (required_gp_regs <= gp_regs) {
+ gp_regs -= required_gp_regs;
+ } else {
+ required_gp_regs = 0;
+ }
+ required_gp_regs_for_each_arg[i] = required_gp_regs;
+ }
+
+ // Evaluate arguments in the reverse order (right to left).
+ // Arguments passed by stack.
+ for (int i = args->as.list->len - 1; i >= 0; --i) {
+ AstNode* arg = &args->as.list->items[i];
+ if (required_gp_regs_for_each_arg[i] == 0) {
+ codegen_expr(g, arg, GenMode_rval);
+ int ty_size = type_sizeof(arg->ty);
+ if (ty_size <= 8) {
+ fprintf(g->out, " push rax\n");
+ } else {
+ // Perform byte-wise copy onto stack. Use r10 register as temporary space.
+ // NOTE: rsp must be aligned to 8.
+ fprintf(g->out, " sub rsp, %d\n", to_aligned(ty_size, 8));
+ for (int i = 0; i < ty_size; ++i) {
+ // Copy a sinle byte from the address that rax points to to the stack via r10 register.
+ fprintf(g->out, " mov r10b, %d[rax]\n", i);
+ fprintf(g->out, " mov %d[rsp], r10b\n", i);
+ }
+ }
+ }
+ }
+ // Arguments passed by registers.
+ for (int i = args->as.list->len - 1; i >= 0; --i) {
+ AstNode* arg = &args->as.list->items[i];
+ if (required_gp_regs_for_each_arg[i] != 0) {
+ codegen_expr(g, arg, GenMode_rval);
+ if (required_gp_regs_for_each_arg[i] == 1) {
+ fprintf(g->out, " push rax\n");
+ } else {
+ assert(required_gp_regs_for_each_arg[i] == 2);
+ fprintf(g->out, " push [rax]\n");
+ fprintf(g->out, " push [rax+8]\n");
+ }
+ }
+ }
+ // Pop pushed arguments onto registers.
+ for (int i = 0, j = 0; i < args->as.list->len; ++i) {
+ for (int k = 0; k < required_gp_regs_for_each_arg[i]; ++k) {
+ fprintf(g->out, " pop %s\n", param_reg(j++));
+ }
+ }
+}
+
+static void codegen_func_call(CodeGen* g, FuncCallNode* call) {
+ const char* func_name = NULL;
+ if (call->func->kind == AstNodeKind_func) {
+ func_name = call->func->as.func->name;
+ }
+
+ if (func_name && strcmp(func_name, "__ducc_va_start") == 0) {
+ fprintf(g->out, " # __ducc_va_start BEGIN\n");
+ AstNode* va_list_args = &call->args->as.list->items[0];
+ codegen_expr(g, va_list_args, GenMode_rval);
+ fprintf(g->out, " mov rdi, rax\n");
+
+ // Allocate save area.
+ fprintf(g->out, " sub rsp, 48\n");
+
+ fprintf(g->out, " mov [rsp+ 0], rdi\n");
+ fprintf(g->out, " mov [rsp+ 8], rsi\n");
+ fprintf(g->out, " mov [rsp+16], rdx\n");
+ fprintf(g->out, " mov [rsp+24], rcx\n");
+ fprintf(g->out, " mov [rsp+32], r8\n");
+ fprintf(g->out, " mov [rsp+40], r9\n");
+
+ // Initialize va_list.
+ fprintf(g->out, " mov DWORD PTR [rdi], 8\n"); // gp_offset
+ fprintf(g->out, " mov DWORD PTR [rdi+4], 0\n"); // fp_offset
+ fprintf(g->out, " lea rax, [rbp+16]\n"); // overflow_arg_area
+ fprintf(g->out, " mov QWORD PTR [rdi+8], rax\n");
+ fprintf(g->out, " mov QWORD PTR [rdi+16], rsp\n"); // reg_save_area
+
+ fprintf(g->out, " mov rax, 0\n"); // dummy return value
+ fprintf(g->out, " # __ducc_va_start END\n");
+ return;
+ }
+
+ if (func_name && strcmp(func_name, "__ducc_va_arg") == 0) {
+ fprintf(g->out, " # __ducc_va_arg BEGIN\n");
+
+ // Evaluate va_list argument (first argument)
+ AstNode* va_list_arg = &call->args->as.list->items[0];
+ codegen_expr(g, va_list_arg, GenMode_rval);
+ fprintf(g->out, " mov rdi, rax\n"); // rdi = pointer to va_list
+
+ // Evaluate size argument (second argument)
+ AstNode* size_arg = &call->args->as.list->items[1];
+ codegen_expr(g, size_arg, GenMode_rval);
+ fprintf(g->out, " mov rsi, rax\n"); // rsi = size
+
+ int label = codegen_new_label(g);
+
+ // Check if gp_offset < 48 (6 registers * 8 bytes)
+ fprintf(g->out, " mov eax, DWORD PTR [rdi]\n"); // eax = gp_offset
+ fprintf(g->out, " cmp eax, 48\n");
+ fprintf(g->out, " jae .Lva_arg_overflow%d\n", label);
+
+ // Fetch from register save area
+ fprintf(g->out, " mov rcx, QWORD PTR [rdi+16]\n"); // rcx = reg_save_area
+ fprintf(g->out, " movsx rdx, eax\n"); // rdx = gp_offset (sign-extended)
+ fprintf(g->out, " add rcx, rdx\n"); // rcx = reg_save_area + gp_offset
+ fprintf(g->out, " add eax, 8\n"); // gp_offset += 8
+ fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); // store updated gp_offset
+ fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument
+ fprintf(g->out, " jmp .Lva_arg_end%d\n", label);
+
+ // Fetch from overflow area (stack)
+ fprintf(g->out, ".Lva_arg_overflow%d:\n", label);
+ fprintf(g->out, " mov rcx, QWORD PTR [rdi+8]\n"); // rcx = overflow_arg_area
+ fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument
+ fprintf(g->out, " add rcx, 8\n"); // overflow_arg_area += 8
+ fprintf(g->out, " mov QWORD PTR [rdi+8], rcx\n"); // store updated overflow_arg_area
+
+ fprintf(g->out, ".Lva_arg_end%d:\n", label);
+ fprintf(g->out, " # __ducc_va_arg END\n");
+ return;
+ }
+
+ AstNode* args = call->args;
+
+ int gp_regs = 6;
+ int pass_by_stack_offset = -16;
+ for (int i = 0; i < args->as.list->len; ++i) {
+ AstNode* arg = &args->as.list->items[i];
+ int ty_size = type_sizeof(arg->ty);
+ // TODO
+ if (arg->ty->kind == TypeKind_array) {
+ ty_size = 8;
+ }
+ int required_gp_regs;
+ if (ty_size <= 8) {
+ required_gp_regs = 1;
+ } else if (ty_size <= 16) {
+ required_gp_regs = 2;
+ } else {
+ required_gp_regs = 0;
+ }
+ if (required_gp_regs <= gp_regs) {
+ gp_regs -= required_gp_regs;
+ } else {
+ required_gp_regs = 0;
+ }
+ if (required_gp_regs == 0) {
+ pass_by_stack_offset -= to_aligned(ty_size, 8);
+ }
+ }
+
+ int label = codegen_new_label(g);
+
+ fprintf(g->out, " mov rax, rsp\n");
+ if (-pass_by_stack_offset % 16 != 0) {
+ fprintf(g->out, " add rax, 8\n");
+ }
+ fprintf(g->out, " and rax, 15\n");
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Laligned%d\n", label);
+
+ fprintf(g->out, " sub rsp, 8\n");
+ codegen_args(g, args);
+ fprintf(g->out, " mov rax, 0\n");
+ if (func_name) {
+ fprintf(g->out, " call %s\n", func_name);
+ } else {
+ codegen_expr(g, call->func, GenMode_rval);
+ fprintf(g->out, " call rax\n");
+ }
+ fprintf(g->out, " add rsp, 8\n");
+
+ fprintf(g->out, " jmp .Lend%d\n", label);
+ fprintf(g->out, ".Laligned%d:\n", label);
+
+ codegen_args(g, args);
+ fprintf(g->out, " mov rax, 0\n");
+ if (func_name) {
+ fprintf(g->out, " call %s\n", func_name);
+ } else {
+ codegen_expr(g, call->func, GenMode_rval);
+ fprintf(g->out, " call rax\n");
+ }
+
+ fprintf(g->out, ".Lend%d:\n", label);
+ // Pop pass-by-stack arguments.
+ fprintf(g->out, " add rsp, %d\n", -pass_by_stack_offset - 16);
+}
+
+static void codegen_lvar(CodeGen* g, LvarNode* lvar, Type* ty, GenMode gen_mode) {
+ fprintf(g->out, " lea rax, %d[rbp]\n", -lvar->stack_offset);
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(g, ty);
+ }
+}
+
+static void codegen_gvar(CodeGen* g, GvarNode* gvar, Type* ty, GenMode gen_mode) {
+ fprintf(g->out, " lea rax, %s[rip]\n", gvar->name);
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(g, ty);
+ }
+}
+
+static void codegen_func_ref(CodeGen* g, FuncNode* func) {
+ fprintf(g->out, " lea rax, %s[rip]\n", func->name);
+}
+
+static void codegen_composite_expr(CodeGen* g, AstNode* ast) {
+ // Standard C does not have composite expression, but ducc internally has.
+ for (int i = 0; i < ast->as.list->len; ++i) {
+ AstNode* expr = ast->as.list->items + i;
+ codegen_expr(g, expr, GenMode_rval);
+ }
+}
+
+static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ if (ast->kind == AstNodeKind_int_expr) {
+ codegen_int_expr(g, ast->as.int_expr);
+ } else if (ast->kind == AstNodeKind_str_expr) {
+ codegen_str_expr(g, ast->as.str_expr);
+ } else if (ast->kind == AstNodeKind_unary_expr) {
+ codegen_unary_expr(g, ast->as.unary_expr);
+ } else if (ast->kind == AstNodeKind_ref_expr) {
+ codegen_ref_expr(g, ast->as.ref_expr);
+ } else if (ast->kind == AstNodeKind_deref_expr) {
+ codegen_deref_expr(g, ast->as.deref_expr, gen_mode);
+ } else if (ast->kind == AstNodeKind_cast_expr) {
+ codegen_cast_expr(g, ast->as.cast_expr, ast->ty);
+ } else if (ast->kind == AstNodeKind_binary_expr) {
+ codegen_binary_expr(g, ast->as.binary_expr, gen_mode);
+ } else if (ast->kind == AstNodeKind_cond_expr) {
+ codegen_cond_expr(g, ast->as.cond_expr, gen_mode);
+ } else if (ast->kind == AstNodeKind_logical_expr) {
+ codegen_logical_expr(g, ast->as.logical_expr);
+ } else if (ast->kind == AstNodeKind_assign_expr) {
+ codegen_assign_expr(g, ast->as.assign_expr);
+ } else if (ast->kind == AstNodeKind_func_call) {
+ codegen_func_call(g, ast->as.func_call);
+ } else if (ast->kind == AstNodeKind_lvar) {
+ codegen_lvar(g, ast->as.lvar, ast->ty, gen_mode);
+ } else if (ast->kind == AstNodeKind_gvar) {
+ codegen_gvar(g, ast->as.gvar, ast->ty, gen_mode);
+ } else if (ast->kind == AstNodeKind_func) {
+ codegen_func_ref(g, ast->as.func);
+ } else if (ast->kind == AstNodeKind_list) {
+ codegen_composite_expr(g, ast);
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) {
+ if (stmt->expr) {
+ codegen_expr(g, stmt->expr, GenMode_rval);
+ }
+ codegen_func_epilogue(g);
+}
+
+static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, stmt->cond, GenMode_rval);
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Lelse%d\n", label);
+ codegen_stmt(g, stmt->then);
+ fprintf(g->out, " jmp .Lend%d\n", label);
+ fprintf(g->out, ".Lelse%d:\n", label);
+ if (stmt->else_) {
+ codegen_stmt(g, stmt->else_);
+ }
+ fprintf(g->out, ".Lend%d:\n", label);
+}
+
+static void codegen_for_stmt(CodeGen* g, ForStmtNode* stmt) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ if (stmt->init) {
+ codegen_expr(g, stmt->init, GenMode_rval);
+ }
+ fprintf(g->out, ".Lbegin%d:\n", label);
+ codegen_expr(g, stmt->cond, GenMode_rval);
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Lend%d\n", label);
+ codegen_stmt(g, stmt->body);
+ fprintf(g->out, ".Lcontinue%d:\n", label);
+ if (stmt->update) {
+ codegen_expr(g, stmt->update, GenMode_rval);
+ }
+ fprintf(g->out, " jmp .Lbegin%d\n", label);
+ fprintf(g->out, ".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+static void codegen_do_while_stmt(CodeGen* g, DoWhileStmtNode* stmt) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ fprintf(g->out, ".Lbegin%d:\n", label);
+ codegen_stmt(g, stmt->body);
+ fprintf(g->out, ".Lcontinue%d:\n", label);
+ codegen_expr(g, stmt->cond, GenMode_rval);
+ fprintf(g->out, " cmp rax, 0\n");
+ fprintf(g->out, " je .Lend%d\n", label);
+ fprintf(g->out, " jmp .Lbegin%d\n", label);
+ fprintf(g->out, ".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+static void codegen_break_stmt(CodeGen* g) {
+ if (g->switch_label != -1) {
+ fprintf(g->out, " jmp .Lend%d\n", g->switch_label);
+ } else {
+ int label = *g->loop_labels;
+ fprintf(g->out, " jmp .Lend%d\n", label);
+ }
+}
+
+static void codegen_continue_stmt(CodeGen* g) {
+ int label = *g->loop_labels;
+ fprintf(g->out, " jmp .Lcontinue%d\n", label);
+}
+
+static void codegen_goto_stmt(CodeGen* g, GotoStmtNode* stmt) {
+ fprintf(g->out, " jmp .L%s__%s\n", g->current_func->as.func_def->name, stmt->label);
+}
+
+static void codegen_label_stmt(CodeGen* g, LabelStmtNode* stmt) {
+ fprintf(g->out, ".L%s__%s:\n", g->current_func->as.func_def->name, stmt->name);
+ codegen_stmt(g, stmt->body);
+}
+
+// Helper to collect case values from the switch body
+static void collect_cases(AstNode* stmt, int* case_values, int* case_labels, int* n_cases) {
+ if (!stmt)
+ return;
+
+ if (stmt->kind == AstNodeKind_case_label) {
+ case_values[*n_cases] = stmt->as.case_label->value;
+ case_labels[*n_cases] = *n_cases + 1;
+ (*n_cases)++;
+ collect_cases(stmt->as.case_label->body, case_values, case_labels, n_cases);
+ } else if (stmt->kind == AstNodeKind_default_label) {
+ collect_cases(stmt->as.default_label->body, case_values, case_labels, n_cases);
+ } else if (stmt->kind == AstNodeKind_list) {
+ for (int i = 0; i < stmt->as.list->len; i++) {
+ collect_cases(stmt->as.list->items + i, case_values, case_labels, n_cases);
+ }
+ }
+}
+
+static bool codegen_switch_body(CodeGen* g, AstNode* stmt, int* case_values, int* case_labels, int n_cases) {
+ if (!stmt)
+ return false;
+
+ if (stmt->kind == AstNodeKind_case_label) {
+ int value = stmt->as.case_label->value;
+ for (int i = 0; i < n_cases; i++) {
+ if (case_values[i] == value) {
+ fprintf(g->out, ".Lcase%d_%d:\n", g->switch_label, case_labels[i]);
+ break;
+ }
+ }
+ return codegen_switch_body(g, stmt->as.case_label->body, case_values, case_labels, n_cases);
+ } else if (stmt->kind == AstNodeKind_default_label) {
+ fprintf(g->out, ".Ldefault%d:\n", g->switch_label);
+ codegen_switch_body(g, stmt->as.default_label->body, case_values, case_labels, n_cases);
+ return true;
+ } else if (stmt->kind == AstNodeKind_list) {
+ bool default_label_emitted = false;
+ for (int i = 0; i < stmt->as.list->len; i++) {
+ default_label_emitted |=
+ codegen_switch_body(g, stmt->as.list->items + i, case_values, case_labels, n_cases);
+ }
+ return default_label_emitted;
+ } else {
+ codegen_stmt(g, stmt);
+ return false;
+ }
+}
+
+static void codegen_switch_stmt(CodeGen* g, SwitchStmtNode* stmt) {
+ int switch_label = codegen_new_label(g);
+ int prev_switch_label = g->switch_label;
+ g->switch_label = switch_label;
+
+ // Collect all case values and assign labels
+ int case_values[256];
+ int case_labels[256];
+ int n_cases = 0;
+ collect_cases(stmt->body, case_values, case_labels, &n_cases);
+
+ // Generate jump instructions.
+ codegen_expr(g, stmt->expr, GenMode_rval);
+ for (int i = 0; i < n_cases; i++) {
+ fprintf(g->out, " cmp rax, %d\n", case_values[i]);
+ fprintf(g->out, " je .Lcase%d_%d\n", switch_label, case_labels[i]);
+ }
+ fprintf(g->out, " jmp .Ldefault%d\n", switch_label);
+
+ // Generate the switch body with labels.
+ bool default_label_emitted = codegen_switch_body(g, stmt->body, case_values, case_labels, n_cases);
+
+ if (!default_label_emitted) {
+ fprintf(g->out, ".Ldefault%d:\n", switch_label);
+ }
+ fprintf(g->out, ".Lend%d:\n", switch_label);
+
+ g->switch_label = prev_switch_label;
+}
+
+static void codegen_expr_stmt(CodeGen* g, ExprStmtNode* stmt) {
+ codegen_expr(g, stmt->expr, GenMode_rval);
+}
+
+static void codegen_block_stmt(CodeGen* g, AstNode* ast) {
+ for (int i = 0; i < ast->as.list->len; ++i) {
+ AstNode* stmt = ast->as.list->items + i;
+ codegen_stmt(g, stmt);
+ }
+}
+
+static void codegen_stmt(CodeGen* g, AstNode* ast) {
+ // TODO: support multiple files.
+ fprintf(g->out, " .loc 1 %d 0\n", ast->loc.line);
+
+ if (ast->kind == AstNodeKind_list) {
+ codegen_block_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_return_stmt) {
+ codegen_return_stmt(g, ast->as.return_stmt);
+ } else if (ast->kind == AstNodeKind_if_stmt) {
+ codegen_if_stmt(g, ast->as.if_stmt);
+ } else if (ast->kind == AstNodeKind_switch_stmt) {
+ codegen_switch_stmt(g, ast->as.switch_stmt);
+ } else if (ast->kind == AstNodeKind_for_stmt) {
+ codegen_for_stmt(g, ast->as.for_stmt);
+ } else if (ast->kind == AstNodeKind_do_while_stmt) {
+ codegen_do_while_stmt(g, ast->as.do_while_stmt);
+ } else if (ast->kind == AstNodeKind_break_stmt) {
+ codegen_break_stmt(g);
+ } else if (ast->kind == AstNodeKind_continue_stmt) {
+ codegen_continue_stmt(g);
+ } else if (ast->kind == AstNodeKind_goto_stmt) {
+ codegen_goto_stmt(g, ast->as.goto_stmt);
+ } else if (ast->kind == AstNodeKind_label_stmt) {
+ codegen_label_stmt(g, ast->as.label_stmt);
+ } else if (ast->kind == AstNodeKind_expr_stmt) {
+ codegen_expr_stmt(g, ast->as.expr_stmt);
+ } else if (ast->kind == AstNodeKind_lvar_decl) {
+ // Do nothing.
+ } else if (ast->kind == AstNodeKind_nop) {
+ // Do nothing.
+ } else if (ast->kind == AstNodeKind_case_label || ast->kind == AstNodeKind_default_label) {
+ // They are handled by codegen_switch_stmt().
+ unreachable();
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_func(CodeGen* g, AstNode* ast) {
+ g->current_func = ast;
+
+ if (ast->ty->storage_class != StorageClass_static) {
+ fprintf(g->out, ".globl %s\n", ast->as.func_def->name);
+ }
+ fprintf(g->out, "%s:\n", ast->as.func_def->name);
+
+ codegen_func_prologue(g, ast->as.func_def);
+ codegen_stmt(g, ast->as.func_def->body);
+ if (strcmp(ast->as.func_def->name, "main") == 0) {
+ // C99: 5.1.2.2.3
+ fprintf(g->out, " mov rax, 0\n");
+ }
+ codegen_func_epilogue(g);
+
+ fprintf(g->out, "\n");
+ g->current_func = NULL;
+}
+
+static void codegen_global_var(CodeGen* g, AstNode* var) {
+ if (var->ty->storage_class == StorageClass_extern) {
+ return;
+ }
+ if (var->ty->storage_class != StorageClass_static) {
+ fprintf(g->out, ".globl %s\n", var->as.gvar_decl->name);
+ }
+ fprintf(g->out, " %s:\n", var->as.gvar_decl->name);
+ if (!var->as.gvar_decl->expr) {
+ fprintf(g->out, " .zero %d\n", type_sizeof(var->ty));
+ return;
+ }
+
+ if (var->ty->kind == TypeKind_array && var->as.gvar_decl->expr->kind == AstNodeKind_str_expr) {
+ const char* str = g->prog->str_literals[var->as.gvar_decl->expr->as.str_expr->idx - 1];
+ fprintf(g->out, " .string \"%s\"\n", str);
+ return;
+ }
+
+ InitData* data = eval_init_expr(var->as.gvar_decl->expr, var->ty);
+
+ for (size_t i = 0; i < data->len; ++i) {
+ InitDataBlock* block = &data->blocks[i];
+ if (block->kind == InitDataBlockKind_addr) {
+ fprintf(g->out, " .quad %s\n", block->as.addr.label);
+ } else {
+ for (size_t j = 0; j < block->as.bytes.len; ++j) {
+ fprintf(g->out, " .byte %d\n", block->as.bytes.buf[j]);
+ }
+ }
+ }
+}
+
+void codegen(Program* prog, const char* input_filename, FILE* out) {
+ CodeGen* g = codegen_new(prog, out);
+
+ fprintf(g->out, ".intel_syntax noprefix\n\n");
+
+ // TODO: support multiple files.
+ fprintf(g->out, ".file 1 \"%s\"\n\n", input_filename);
+
+ // For GNU ld:
+ // https://sourceware.org/binutils/docs/ld/Options.html
+ fprintf(g->out, ".section .note.GNU-stack,\"\",@progbits\n\n");
+
+ fprintf(g->out, ".section .rodata\n\n");
+ for (int i = 0; prog->str_literals[i]; ++i) {
+ fprintf(g->out, ".Lstr__%d:\n", i + 1);
+ fprintf(g->out, " .string \"%s\"\n\n", prog->str_literals[i]);
+ }
+
+ fprintf(g->out, ".data\n\n");
+ for (int i = 0; i < prog->vars->as.list->len; ++i) {
+ codegen_global_var(g, &prog->vars->as.list->items[i]);
+ }
+
+ fprintf(g->out, ".text\n\n");
+ for (int i = 0; i < prog->funcs->as.list->len; ++i) {
+ AstNode* func = &prog->funcs->as.list->items[i];
+ codegen_func(g, func);
+ }
+}
diff --git a/src/cc1/codegen.h b/src/cc1/codegen.h
new file mode 100644
index 0000000..2c1d7b3
--- /dev/null
+++ b/src/cc1/codegen.h
@@ -0,0 +1,8 @@
+#ifndef DUCC_CODEGEN_H
+#define DUCC_CODEGEN_H
+
+#include "ast.h"
+
+void codegen(Program* prog, const char* input_filename, FILE* out);
+
+#endif
diff --git a/src/cc1/codegen_wasm.c b/src/cc1/codegen_wasm.c
new file mode 100644
index 0000000..c7f8870
--- /dev/null
+++ b/src/cc1/codegen_wasm.c
@@ -0,0 +1,164 @@
+#include <stdlib.h>
+#include <string.h>
+#include "../lib/common.h"
+#include "codegen.h"
+#include "preprocess.h"
+
+typedef enum {
+ GenMode_lval,
+ GenMode_rval,
+} GenMode;
+
+typedef struct {
+ FILE* out;
+ int next_label;
+ int* loop_labels;
+ AstNode* current_func;
+ int switch_label;
+} CodeGen;
+
+static CodeGen* codegen_new(FILE* out) {
+ CodeGen* g = calloc(1, sizeof(CodeGen));
+ g->out = out;
+ g->next_label = 1;
+ g->loop_labels = calloc(1024, sizeof(int));
+ g->switch_label = -1;
+ return g;
+}
+
+static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode);
+static void codegen_stmt(CodeGen* g, AstNode* ast);
+
+static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) {
+ for (int i = 0; i < func_def->params->as.list->len; ++i) {
+ fprintf(g->out, " (param $l_%s i32)", func_def->params->as.list->items[i].as.param->name);
+ }
+ fprintf(g->out, " (result i32)\n");
+}
+
+static void codegen_func_epilogue(CodeGen*) {
+}
+
+static void codegen_int_expr(CodeGen* g, IntExprNode* expr) {
+ fprintf(g->out, " i32.const %d\n", expr->value);
+}
+
+static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) {
+ codegen_expr(g, expr->lhs, gen_mode);
+ codegen_expr(g, expr->rhs, gen_mode);
+ if (expr->op == TokenKind_plus) {
+ fprintf(g->out, " i32.add\n");
+ } else if (expr->op == TokenKind_minus) {
+ fprintf(g->out, " i32.sub\n");
+ } else if (expr->op == TokenKind_le) {
+ fprintf(g->out, " i32.le_s\n");
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_lvar(CodeGen* g, LvarNode* lvar, GenMode) {
+ fprintf(g->out, " local.get $l_%s\n", lvar->name);
+}
+
+static void codegen_func_call(CodeGen* g, FuncCallNode* call) {
+ const char* func_name;
+ if (call->func->kind == AstNodeKind_func) {
+ func_name = call->func->as.func->name;
+ } else {
+ unimplemented();
+ }
+
+ AstNode* args = call->args;
+ for (int i = 0; i < args->as.list->len; ++i) {
+ AstNode* arg = args->as.list->items + i;
+ codegen_expr(g, arg, GenMode_rval);
+ }
+ fprintf(g->out, " call $%s\n", func_name);
+}
+
+static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ if (ast->kind == AstNodeKind_int_expr) {
+ codegen_int_expr(g, ast->as.int_expr);
+ } else if (ast->kind == AstNodeKind_binary_expr) {
+ codegen_binary_expr(g, ast->as.binary_expr, gen_mode);
+ } else if (ast->kind == AstNodeKind_lvar) {
+ codegen_lvar(g, ast->as.lvar, gen_mode);
+ } else if (ast->kind == AstNodeKind_func_call) {
+ codegen_func_call(g, ast->as.func_call);
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) {
+ if (stmt->expr) {
+ codegen_expr(g, stmt->expr, GenMode_rval);
+ }
+ fprintf(g->out, " return\n");
+}
+
+static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) {
+ codegen_expr(g, stmt->cond, GenMode_rval);
+ fprintf(g->out, " (if (result i32)\n");
+ fprintf(g->out, " (then\n");
+ codegen_stmt(g, stmt->then);
+ fprintf(g->out, " )\n");
+ if (stmt->else_) {
+ fprintf(g->out, " (else\n");
+ codegen_stmt(g, stmt->else_);
+ fprintf(g->out, " )\n");
+ } else {
+ fprintf(g->out, " (else\n");
+ fprintf(g->out, " i32.const 0\n");
+ fprintf(g->out, " )\n");
+ }
+ fprintf(g->out, " )\n");
+}
+
+static void codegen_block_stmt(CodeGen* g, AstNode* ast) {
+ for (int i = 0; i < ast->as.list->len; ++i) {
+ AstNode* stmt = ast->as.list->items + i;
+ codegen_stmt(g, stmt);
+ }
+}
+
+static void codegen_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->kind == AstNodeKind_list) {
+ codegen_block_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_return_stmt) {
+ codegen_return_stmt(g, ast->as.return_stmt);
+ } else if (ast->kind == AstNodeKind_if_stmt) {
+ codegen_if_stmt(g, ast->as.if_stmt);
+ } else if (ast->kind == AstNodeKind_nop) {
+ // Do nothing.
+ } else {
+ unreachable();
+ }
+}
+
+static void codegen_func(CodeGen* g, AstNode* ast) {
+ g->current_func = ast;
+
+ fprintf(g->out, "(func $%s (export \"%s\")", ast->as.func_def->name, ast->as.func_def->name);
+
+ codegen_func_prologue(g, ast->as.func_def);
+ codegen_stmt(g, ast->as.func_def->body);
+ codegen_func_epilogue(g);
+
+ fprintf(g->out, ")\n");
+ g->current_func = NULL;
+}
+
+void codegen_wasm(Program* prog, FILE* out) {
+ CodeGen* g = codegen_new(out);
+
+ fprintf(g->out, "(module\n");
+
+ for (int i = 0; i < prog->funcs->as.list->len; ++i) {
+ AstNode* func = prog->funcs->as.list->items + i;
+ codegen_func(g, func);
+ }
+
+ fprintf(g->out, ")\n");
+}
diff --git a/src/cc1/codegen_wasm.h b/src/cc1/codegen_wasm.h
new file mode 100644
index 0000000..1089154
--- /dev/null
+++ b/src/cc1/codegen_wasm.h
@@ -0,0 +1,8 @@
+#ifndef DUCC_CODEGEN_WASM_H
+#define DUCC_CODEGEN_WASM_H
+
+#include "ast.h"
+
+void codegen_wasm(Program* prog, FILE* out);
+
+#endif
diff --git a/src/cc1/fs.c b/src/cc1/fs.c
new file mode 100644
index 0000000..c702fa4
--- /dev/null
+++ b/src/cc1/fs.c
@@ -0,0 +1,27 @@
+#include "fs.h"
+#include <stdlib.h>
+#include <string.h>
+
+// 'ext' must include '.'.
+char* replace_extension(const char* file_name, const char* ext) {
+ size_t len = strlen(file_name);
+ const char* last_slash = strrchr(file_name, '/');
+ const char* last_dot = strrchr(file_name, '.');
+
+ size_t ext_len = strlen(ext);
+ size_t base_len;
+ // !last_slash: foo.c
+ // last_slash < last_dot: ./bar/foo.c
+ if (last_dot && (!last_slash || last_slash < last_dot)) {
+ base_len = last_dot - file_name;
+ } else {
+ base_len = len;
+ }
+
+ char* result = calloc(base_len + ext_len + 1, sizeof(char));
+ memcpy(result, file_name, base_len);
+ memcpy(result + base_len, ext, ext_len);
+ result[base_len + ext_len] = '\0';
+
+ return result;
+}
diff --git a/src/cc1/fs.h b/src/cc1/fs.h
new file mode 100644
index 0000000..b71889f
--- /dev/null
+++ b/src/cc1/fs.h
@@ -0,0 +1,7 @@
+#ifndef DUCC_FS_H
+#define DUCC_FS_H
+
+// 'ext' must include '.'.
+char* replace_extension(const char* file_name, const char* ext);
+
+#endif
diff --git a/src/cc1/io.c b/src/cc1/io.c
new file mode 100644
index 0000000..65cd448
--- /dev/null
+++ b/src/cc1/io.c
@@ -0,0 +1,127 @@
+#include "io.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../lib/common.h"
+#include "../lib/json.h"
+
+void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc) {
+ jsonbuilder_object_start(builder);
+ jsonbuilder_object_member_start(builder, "filename");
+ jsonbuilder_string(builder, loc->filename);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "line");
+ jsonbuilder_integer(builder, loc->line);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_end(builder);
+}
+
+InFile* infile_open(const char* filename) {
+ FILE* in = fopen(filename, "rb");
+ if (!in) {
+ return NULL;
+ }
+
+ size_t buf_size = 1024 * 10;
+ char* buf = calloc(buf_size, sizeof(char));
+ char* cur = buf;
+ char* tmp = calloc(1024, sizeof(char));
+
+ while (fgets(tmp, 1024, in)) {
+ size_t len = strlen(tmp);
+ size_t used_size = cur - buf;
+
+ if (buf_size <= used_size + len) {
+ size_t old_size = buf_size;
+ buf_size *= 2;
+ buf = realloc(buf, buf_size);
+ memset(buf + old_size, 0, buf_size - old_size);
+ cur = buf + used_size;
+ }
+
+ memcpy(cur, tmp, len);
+ cur += len;
+ }
+ fclose(in);
+
+ InFile* in_file = calloc(1, sizeof(InFile));
+ in_file->buf = buf;
+ in_file->loc.filename = filename;
+ in_file->loc.line = 1;
+ return in_file;
+}
+
+bool infile_eof(InFile* f) {
+ return f->buf[f->pos] == '\0';
+}
+
+char infile_peek_char(InFile* f) {
+ char c = f->buf[f->pos];
+
+ // Skip a pair of backslash and new-line.
+ if (c == '\\') {
+ char c2 = f->buf[f->pos + 1];
+ // C23: 5.1.1.2
+ // A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by
+ // a backslash character before any such splicing takes place.
+ if (c2 == '\0') {
+ fatal_error("%s:%d: <new-line> expected, but got <eof>", f->loc.filename, f->loc.line);
+ }
+ // Handle line continuation.
+ if (c2 == '\r') {
+ if (f->buf[f->pos + 2] == '\n') {
+ f->pos += 3; // Backslash + CRLF
+ } else {
+ f->pos += 2; // Backslash + CR
+ }
+ ++f->loc.line;
+ return infile_peek_char(f);
+ } else if (c2 == '\n') {
+ f->pos += 2; // Backslash + LF
+ ++f->loc.line;
+ return infile_peek_char(f);
+ }
+ }
+
+ // Normalize new-line.
+ if (c == '\r')
+ c = '\n';
+ return c;
+}
+
+char infile_next_char(InFile* f) {
+ char c = infile_peek_char(f);
+
+ if (f->buf[f->pos] == '\r') {
+ ++f->pos;
+ if (f->buf[f->pos] == '\n') {
+ ++f->pos; // CRLF
+ }
+ c = '\n';
+ } else {
+ ++f->pos;
+ }
+
+ if (c == '\n')
+ ++f->loc.line;
+ return c;
+}
+
+char infile_peek_char2(InFile* f) {
+ int saved_pos = f->pos;
+ int saved_line = f->loc.line;
+ infile_next_char(f);
+ char c = infile_peek_char(f);
+ f->pos = saved_pos;
+ f->loc.line = saved_line;
+ return c;
+}
+
+bool infile_consume_if(InFile* f, char expected) {
+ if (infile_peek_char(f) == expected) {
+ infile_next_char(f);
+ return true;
+ } else {
+ return false;
+ }
+}
diff --git a/src/cc1/io.h b/src/cc1/io.h
new file mode 100644
index 0000000..be48b51
--- /dev/null
+++ b/src/cc1/io.h
@@ -0,0 +1,26 @@
+#ifndef DUCC_IO_H
+#define DUCC_IO_H
+
+#include "../lib/json.h"
+
+typedef struct {
+ const char* filename;
+ int line;
+} SourceLocation;
+
+void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc);
+
+typedef struct {
+ const char* buf;
+ int pos;
+ SourceLocation loc;
+} InFile;
+
+InFile* infile_open(const char* filename);
+bool infile_eof(InFile* f);
+char infile_peek_char(InFile* f);
+char infile_peek_char2(InFile* f);
+char infile_next_char(InFile* f);
+bool infile_consume_if(InFile* f, char expected);
+
+#endif
diff --git a/src/cc1/parse.c b/src/cc1/parse.c
new file mode 100644
index 0000000..52167c1
--- /dev/null
+++ b/src/cc1/parse.c
@@ -0,0 +1,3018 @@
+#include "parse.h"
+#include "../lib/common.h"
+#include "tokenize.h"
+
+typedef struct {
+ const char* name;
+ Type* ty;
+ int stack_offset;
+} LocalVar;
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ LocalVar* data;
+} LocalVarArray;
+
+static void lvars_init(LocalVarArray* lvars) {
+ lvars->len = 0;
+ lvars->capacity = 4;
+ lvars->data = calloc(lvars->capacity, sizeof(LocalVar));
+}
+
+static void lvars_reserve(LocalVarArray* lvars, size_t size) {
+ if (size <= lvars->capacity)
+ return;
+ while (lvars->capacity < size) {
+ lvars->capacity *= 2;
+ }
+ lvars->data = realloc(lvars->data, lvars->capacity * sizeof(LocalVar));
+ memset(lvars->data + lvars->len, 0, (lvars->capacity - lvars->len) * sizeof(LocalVar));
+}
+
+static LocalVar* lvars_push_new(LocalVarArray* lvars) {
+ lvars_reserve(lvars, lvars->len + 1);
+ return &lvars->data[lvars->len++];
+}
+
+typedef struct {
+ const char* name;
+ int index;
+} ScopedSymbol;
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ ScopedSymbol* data;
+} ScopedSymbolArray;
+
+static void scopedsymbols_init(ScopedSymbolArray* syms) {
+ syms->len = 0;
+ syms->capacity = 4;
+ syms->data = calloc(syms->capacity, sizeof(ScopedSymbol));
+}
+
+static void scopedsymbols_reserve(ScopedSymbolArray* syms, size_t size) {
+ if (size <= syms->capacity)
+ return;
+ while (syms->capacity < size) {
+ syms->capacity *= 2;
+ }
+ syms->data = realloc(syms->data, syms->capacity * sizeof(ScopedSymbol));
+ memset(syms->data + syms->len, 0, (syms->capacity - syms->len) * sizeof(ScopedSymbol));
+}
+
+static ScopedSymbol* scopedsymbols_push_new(ScopedSymbolArray* syms) {
+ scopedsymbols_reserve(syms, syms->len + 1);
+ return &syms->data[syms->len++];
+}
+
+typedef struct Scope {
+ struct Scope* outer;
+ ScopedSymbolArray syms;
+} Scope;
+
+typedef struct {
+ const char* name;
+ Type* ty;
+} GlobalVar;
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ GlobalVar* data;
+} GlobalVarArray;
+
+static void gvars_init(GlobalVarArray* gvars) {
+ gvars->len = 0;
+ gvars->capacity = 4;
+ gvars->data = calloc(gvars->capacity, sizeof(GlobalVar));
+}
+
+static void gvars_reserve(GlobalVarArray* gvars, size_t size) {
+ if (size <= gvars->capacity)
+ return;
+ while (gvars->capacity < size) {
+ gvars->capacity *= 2;
+ }
+ gvars->data = realloc(gvars->data, gvars->capacity * sizeof(GlobalVar));
+ memset(gvars->data + gvars->len, 0, (gvars->capacity - gvars->len) * sizeof(GlobalVar));
+}
+
+static GlobalVar* gvars_push_new(GlobalVarArray* gvars) {
+ gvars_reserve(gvars, gvars->len + 1);
+ return &gvars->data[gvars->len++];
+}
+
+typedef struct {
+ const char* name;
+ Type* ty;
+} Func;
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ Func* data;
+} FuncArray;
+
+static void funcs_init(FuncArray* funcs) {
+ funcs->len = 0;
+ funcs->capacity = 32;
+ funcs->data = calloc(funcs->capacity, sizeof(Func));
+}
+
+static void funcs_reserve(FuncArray* funcs, size_t size) {
+ if (size <= funcs->capacity)
+ return;
+ while (funcs->capacity < size) {
+ funcs->capacity *= 2;
+ }
+ funcs->data = realloc(funcs->data, funcs->capacity * sizeof(Func));
+ memset(funcs->data + funcs->len, 0, (funcs->capacity - funcs->len) * sizeof(Func));
+}
+
+static Func* funcs_push_new(FuncArray* funcs) {
+ funcs_reserve(funcs, funcs->len + 1);
+ return &funcs->data[funcs->len++];
+}
+
+typedef struct {
+ TokenArray* tokens;
+ int pos;
+ LocalVarArray lvars;
+ Scope* scope;
+ GlobalVarArray gvars;
+ FuncArray funcs;
+ AstNode* structs;
+ AstNode* unions;
+ AstNode* enums;
+ AstNode* typedefs;
+ StrArray str_literals;
+ int anonymous_user_type_counter;
+ AstNode* current_switch;
+} Parser;
+
+static Parser* parser_new(TokenArray* tokens) {
+ Parser* p = calloc(1, sizeof(Parser));
+ p->tokens = tokens;
+ gvars_init(&p->gvars);
+ funcs_init(&p->funcs);
+ p->structs = ast_new_list(4);
+ p->unions = ast_new_list(4);
+ p->enums = ast_new_list(4);
+ p->typedefs = ast_new_list(16);
+ strings_init(&p->str_literals);
+
+ Func* va_start_func = funcs_push_new(&p->funcs);
+ va_start_func->name = "__ducc_va_start";
+ va_start_func->ty = type_new_func(type_new(TypeKind_void), NULL);
+
+ Func* va_arg_func = funcs_push_new(&p->funcs);
+ va_arg_func->name = "__ducc_va_arg";
+ va_arg_func->ty = type_new_func(type_new_ptr(type_new(TypeKind_void)), NULL);
+
+ return p;
+}
+
+static Token* peek_token(Parser* p) {
+ return &p->tokens->data[p->pos];
+}
+
+static Token* peek_token2(Parser* p) {
+ return &p->tokens->data[p->pos + 1];
+}
+
+static SourceLocation current_location(Parser* p) {
+ return peek_token(p)->loc;
+}
+
+static Token* next_token(Parser* p) {
+ return &p->tokens->data[p->pos++];
+}
+
+static Token* consume_token_if(Parser* p, TokenKind expected) {
+ if (peek_token(p)->kind == expected) {
+ return next_token(p);
+ } else {
+ return NULL;
+ }
+}
+
+static bool eof(Parser* p) {
+ return peek_token(p)->kind != TokenKind_eof;
+}
+
+static Token* expect(Parser* p, TokenKind expected) {
+ Token* t = next_token(p);
+ if (t->kind == expected) {
+ return t;
+ }
+ fatal_error("%s:%d: expected '%s', but got '%s'", t->loc.filename, t->loc.line, token_kind_stringify(expected),
+ token_stringify(t));
+}
+
+static int find_lvar_in_scope(Parser*, Scope* scope, const char* name) {
+ for (size_t i = 0; i < scope->syms.len; ++i) {
+ ScopedSymbol* sym = &scope->syms.data[i];
+ if (sym->name && strcmp(sym->name, name) == 0) {
+ return sym->index;
+ }
+ }
+ return -1;
+}
+
+static int find_lvar_in_current_scope(Parser* p, const char* name) {
+ return find_lvar_in_scope(p, p->scope, name);
+}
+
+static int find_lvar(Parser* p, const char* name) {
+ Scope* scope = p->scope;
+ while (scope) {
+ int idx = find_lvar_in_scope(p, scope, name);
+ if (idx != -1)
+ return idx;
+ scope = scope->outer;
+ }
+ return -1;
+}
+
+static int calc_lvar_stack_offset(Parser* p, Type* ty) {
+ int last_offset = 0;
+ for (int i = p->lvars.len - 1; i >= 0; i--) {
+ int offset = p->lvars.data[i].stack_offset;
+ // Skip a passed-by-stack parameter.
+ if (offset <= 0)
+ continue;
+ last_offset = offset;
+ break;
+ }
+ return to_aligned(last_offset + type_sizeof(ty), type_alignof(ty));
+}
+
+static int add_lvar(Parser* p, const char* name, Type* ty, int stack_offset) {
+ LocalVar* lvar = lvars_push_new(&p->lvars);
+ lvar->name = name;
+ lvar->ty = ty;
+ lvar->stack_offset = stack_offset;
+ ScopedSymbol* sym = scopedsymbols_push_new(&p->scope->syms);
+ sym->name = name;
+ sym->index = p->lvars.len - 1;
+ return stack_offset;
+}
+
+static AstNode* generate_temporary_lvar(Parser* p, Type* ty) {
+ int stack_offset = add_lvar(p, NULL, ty, calc_lvar_stack_offset(p, ty));
+ return ast_new_lvar(NULL, stack_offset, ty);
+}
+
+static int find_gvar(Parser* p, const char* name) {
+ for (size_t i = 0; i < p->gvars.len; ++i) {
+ if (strcmp(p->gvars.data[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int find_func(Parser* p, const char* name) {
+ for (size_t i = 0; i < p->funcs.len; ++i) {
+ if (strcmp(p->funcs.data[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int find_struct(Parser* p, const char* name) {
+ for (int i = 0; i < p->structs->as.list->len; ++i) {
+ if (strcmp(p->structs->as.list->items[i].as.struct_def->name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int find_union(Parser* p, const char* name) {
+ for (int i = 0; i < p->unions->as.list->len; ++i) {
+ if (strcmp(p->unions->as.list->items[i].as.union_def->name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int find_enum(Parser* p, const char* name) {
+ for (int i = 0; i < p->enums->as.list->len; ++i) {
+ if (strcmp(p->enums->as.list->items[i].as.enum_def->name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int find_enum_member(Parser* p, const char* name) {
+ for (int i = 0; i < p->enums->as.list->len; ++i) {
+ AstNode* members = p->enums->as.list->items[i].as.enum_def->members;
+ if (!members)
+ continue;
+ for (int j = 0; j < members->as.list->len; ++j) {
+ if (strcmp(members->as.list->items[j].as.enum_member->name, name) == 0) {
+ return i * 1000 + j;
+ }
+ }
+ }
+ return -1;
+}
+
+static int find_typedef(Parser* p, const char* name) {
+ for (int i = 0; i < p->typedefs->as.list->len; ++i) {
+ if (strcmp(p->typedefs->as.list->items[i].as.typedef_decl->name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void enter_scope(Parser* p) {
+ Scope* outer_scope = p->scope;
+ p->scope = calloc(1, sizeof(Scope));
+ p->scope->outer = outer_scope;
+ scopedsymbols_init(&p->scope->syms);
+}
+
+static void leave_scope(Parser* p) {
+ p->scope = p->scope->outer;
+}
+
+static void enter_func(Parser* p) {
+ lvars_init(&p->lvars);
+ enter_scope(p);
+}
+
+static void leave_func(Parser* p) {
+ leave_scope(p);
+}
+
+static Token* parse_ident(Parser*);
+static AstNode* parse_primary_expr(Parser*);
+static AstNode* parse_postfix_expr(Parser*);
+static AstNode* parse_argument_expr_list(Parser*);
+static AstNode* parse_unary_expr(Parser*);
+static AstNode* parse_cast_expr(Parser*);
+static AstNode* parse_multiplicative_expr(Parser*);
+static AstNode* parse_additive_expr(Parser*);
+static AstNode* parse_shift_expr(Parser*);
+static AstNode* parse_relational_expr(Parser*);
+static AstNode* parse_equality_expr(Parser*);
+static AstNode* parse_bitwise_and_expr(Parser*);
+static AstNode* parse_bitwise_xor_expr(Parser*);
+static AstNode* parse_bitwise_or_expr(Parser*);
+static AstNode* parse_logical_and_expr(Parser*);
+static AstNode* parse_logical_or_expr(Parser*);
+static AstNode* parse_conditional_expr(Parser*);
+static AstNode* parse_assignment_expr(Parser*);
+static AstNode* parse_expr(Parser*);
+static AstNode* parse_constant_expr(Parser*);
+static Type* parse_pointer_opt(Parser*, Type*);
+static void parse_type_qualifier_list_opt(Parser*);
+static AstNode* parse_declarator_or_abstract_declarator_opt(Parser*, Type*);
+static AstNode* parse_parameter_type_list(Parser*);
+static AstNode* parse_parameter_list(Parser*);
+static AstNode* parse_parameter_declaration(Parser*);
+static Type* parse_array_declarator_suffix(Parser*, Type*);
+static Type* parse_function_declarator_suffix(Parser*, Type*);
+static AstNode* parse_direct_declarator(Parser*, Type*);
+static AstNode* parse_declarator(Parser*, Type*);
+static AstNode* parse_init_declarator_list(Parser*, Type*);
+static AstNode* parse_declaration(Parser*);
+static AstNode* parse_function_definition(Parser*, AstNode*);
+static Type* parse_declaration_specifiers(Parser*);
+static AstNode* parse_init_declarator(Parser*, Type*);
+static Type* parse_struct_specifier(Parser*);
+static Type* parse_union_specifier(Parser*);
+static AstNode* parse_member_declaration_list(Parser*);
+static AstNode* parse_member_declaration(Parser*);
+static Type* parse_specifier_qualifier_list(Parser*);
+static AstNode* parse_member_declarator_list(Parser*, Type*);
+static AstNode* parse_member_declarator(Parser*, Type*);
+static Type* parse_enum_specifier(Parser*);
+static void parse_enum_members(Parser*, int);
+static AstNode* parse_enum_member(Parser*, int);
+static Type* parse_type_name(Parser*);
+static Type* parse_abstract_declarator_opt(Parser*, Type*);
+static AstNode* parse_braced_initializer(Parser*);
+static AstNode* parse_initializer(Parser*);
+static AstNode* parse_attribute_specifier_sequence_opt(Parser*);
+static AstNode* parse_attribute_specifier_opt(Parser*);
+static AstNode* parse_attribute_list(Parser*);
+static AstNode* parse_attribute(Parser*);
+static AstNode* parse_attribute_argument_clause(Parser*);
+static AstNode* parse_balanced_token_sequence(Parser*);
+static AstNode* parse_stmt(Parser*);
+static AstNode* parse_unlabaled_stmt(Parser*);
+static AstNode* parse_compound_stmt(Parser*);
+static AstNode* parse_block_item(Parser*);
+static AstNode* parse_expr_stmt(Parser*);
+static AstNode* parse_if_stmt(Parser*);
+static AstNode* parse_switch_stmt(Parser*);
+static AstNode* parse_while_stmt(Parser*);
+static AstNode* parse_do_while_stmt(Parser*);
+static AstNode* parse_for_stmt(Parser*);
+static AstNode* parse_goto_stmt(Parser*);
+static AstNode* parse_continue_stmt(Parser*);
+static AstNode* parse_break_stmt(Parser*);
+static AstNode* parse_return_stmt(Parser*);
+static AstNode* parse_static_assert_declaration(Parser*);
+static AstNode* parse_external_declaration(Parser*);
+static int eval(AstNode*);
+static void process_declarations(Parser*, AstNode*);
+
+static Token* parse_ident(Parser* p) {
+ return expect(p, TokenKind_ident);
+}
+
+static int register_str_literal(Parser* p, const char* s) {
+ return strings_push(&p->str_literals, s);
+}
+
+static void register_params(Parser* p, AstNode* params) {
+ int gp_regs = 6;
+ int pass_by_reg_offset = 8;
+ int pass_by_stack_offset = -16;
+ for (int i = 0; i < params->as.list->len; ++i) {
+ AstNode* param = &params->as.list->items[i];
+ int ty_size = type_sizeof(param->ty);
+ int required_gp_regs;
+ if (ty_size <= 8) {
+ required_gp_regs = 1;
+ } else if (ty_size <= 16) {
+ required_gp_regs = 2;
+ } else {
+ required_gp_regs = 0;
+ }
+ if (required_gp_regs <= gp_regs) {
+ gp_regs -= required_gp_regs;
+ } else {
+ required_gp_regs = 0;
+ }
+ int stack_offset;
+ if (required_gp_regs == 0) {
+ stack_offset = pass_by_stack_offset;
+ pass_by_stack_offset -= to_aligned(ty_size, 8);
+ } else {
+ stack_offset = pass_by_reg_offset;
+ pass_by_reg_offset += to_aligned(ty_size, 8);
+ }
+ const char* name = param->as.declarator->name;
+ param->kind = AstNodeKind_param;
+ param->as.param = calloc(1, sizeof(ParamNode));
+ param->as.param->name = name;
+ param->as.param->stack_offset = stack_offset;
+ add_lvar(p, name, param->ty, stack_offset);
+ }
+}
+
+static void register_func(Parser* p, const char* name, Type* ty) {
+ Func* func = funcs_push_new(&p->funcs);
+ func->name = name;
+ func->ty = ty;
+}
+
+typedef enum {
+ TypeSpecifierMask_void = 1 << 0,
+ TypeSpecifierMask_char = 1 << 1,
+ TypeSpecifierMask_short = 1 << 2,
+ TypeSpecifierMask_int = 1 << 3,
+ TypeSpecifierMask_long = 1 << 4,
+ // 1 << 5 is used for second 'long'.
+ TypeSpecifierMask_float = 1 << 6,
+ TypeSpecifierMask_double = 1 << 7,
+ TypeSpecifierMask_signed = 1 << 8,
+ TypeSpecifierMask_unsigned = 1 << 9,
+ TypeSpecifierMask__BitInt = 1 << 10,
+ TypeSpecifierMask_bool = 1 << 11,
+ TypeSpecifierMask__Complex = 1 << 12,
+ TypeSpecifierMask__Decimal32 = 1 << 13,
+ TypeSpecifierMask__Decimal64 = 1 << 14,
+ TypeSpecifierMask__Decimal128 = 1 << 15,
+ TypeSpecifierMask__Atomic = 1 << 16,
+ TypeSpecifierMask_struct = 1 << 17,
+ TypeSpecifierMask_union = 1 << 18,
+ TypeSpecifierMask_enum = 1 << 19,
+ TypeSpecifierMask_typeof = 1 << 20,
+ TypeSpecifierMask_typeof_unqual = 1 << 21,
+ TypeSpecifierMask_typedef_name = 1 << 22,
+} TypeSpecifierMask;
+
+static Type* distinguish_type_from_type_specifiers(int type_specifiers) {
+ if (type_specifiers == TypeSpecifierMask_void)
+ return type_new(TypeKind_void);
+ else if (type_specifiers == TypeSpecifierMask_char)
+ return type_new(TypeKind_char);
+ else if (type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_char))
+ return type_new(TypeKind_schar);
+ else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_char))
+ return type_new(TypeKind_uchar);
+ else if (type_specifiers == TypeSpecifierMask_short ||
+ type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short) ||
+ type_specifiers == (TypeSpecifierMask_short + TypeSpecifierMask_int) ||
+ type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short + TypeSpecifierMask_int))
+ return type_new(TypeKind_short);
+ else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short) ||
+ type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short + TypeSpecifierMask_int))
+ return type_new(TypeKind_ushort);
+ else if (type_specifiers == TypeSpecifierMask_int || type_specifiers == TypeSpecifierMask_signed ||
+ type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_int))
+ return type_new(TypeKind_int);
+ else if (type_specifiers == TypeSpecifierMask_unsigned ||
+ type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_int))
+ return type_new(TypeKind_uint);
+ else if (type_specifiers == TypeSpecifierMask_long ||
+ type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long) ||
+ type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_int) ||
+ type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_int))
+ return type_new(TypeKind_long);
+ else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long) ||
+ type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_int))
+ return type_new(TypeKind_ulong);
+ else if (type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long) ||
+ type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long) ||
+ type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int) ||
+ type_specifiers ==
+ (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int))
+ return type_new(TypeKind_llong);
+ else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long) ||
+ type_specifiers ==
+ (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int))
+ return type_new(TypeKind_ullong);
+ else if (type_specifiers == TypeSpecifierMask_float)
+ return type_new(TypeKind_float);
+ else if (type_specifiers == TypeSpecifierMask_double)
+ return type_new(TypeKind_double);
+ else if (type_specifiers == TypeSpecifierMask_long + TypeSpecifierMask_double)
+ return type_new(TypeKind_ldouble);
+ else if (type_specifiers == TypeSpecifierMask_bool)
+ return type_new(TypeKind_bool);
+ else if (type_specifiers == TypeSpecifierMask_struct)
+ return NULL;
+ else if (type_specifiers == TypeSpecifierMask_union)
+ return NULL;
+ else if (type_specifiers == TypeSpecifierMask_enum)
+ return NULL;
+ else if (type_specifiers == TypeSpecifierMask_typedef_name)
+ return NULL;
+ else if (type_specifiers == 0)
+ return NULL;
+ else
+ unimplemented();
+}
+
+// e++
+// tmp1 = &e; tmp2 = *tmp1; *tmp1 += 1; tmp2
+// e--
+// tmp1 = &e; tmp2 = *tmp1; *tmp1 -= 1; tmp2
+static AstNode* create_new_postfix_inc_or_dec(Parser* p, AstNode* e, TokenKind op) {
+ AstNode* tmp1_lvar = generate_temporary_lvar(p, type_new_ptr(e->ty));
+ AstNode* tmp2_lvar = generate_temporary_lvar(p, e->ty);
+
+ AstNode* expr1 = ast_new_assign_expr(TokenKind_assign, tmp1_lvar, ast_new_ref_expr(e));
+ AstNode* expr2 = ast_new_assign_expr(TokenKind_assign, tmp2_lvar, ast_new_deref_expr(tmp1_lvar));
+ AstNode* expr3;
+ if (op == TokenKind_plusplus) {
+ expr3 = ast_new_assign_add_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1));
+ } else {
+ expr3 = ast_new_assign_sub_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1));
+ }
+ AstNode* expr4 = tmp2_lvar;
+
+ AstNode* ret = ast_new_list(4);
+ ast_append(ret, expr1);
+ ast_append(ret, expr2);
+ ast_append(ret, expr3);
+ ast_append(ret, expr4);
+ ret->ty = expr4->ty;
+ return ret;
+}
+
+static bool is_typedef_name(Parser* p, Token* tok) {
+ return tok->kind == TokenKind_ident && find_typedef(p, tok->value.string) != -1;
+}
+
+static bool is_type_token(Parser* p, Token* tok) {
+ if (tok->kind == TokenKind_keyword_void || tok->kind == TokenKind_keyword_char ||
+ tok->kind == TokenKind_keyword_short || tok->kind == TokenKind_keyword_int ||
+ tok->kind == TokenKind_keyword_long || tok->kind == TokenKind_keyword_float ||
+ tok->kind == TokenKind_keyword_double || tok->kind == TokenKind_keyword_signed ||
+ tok->kind == TokenKind_keyword_unsigned || tok->kind == TokenKind_keyword__BitInt ||
+ tok->kind == TokenKind_keyword_bool || tok->kind == TokenKind_keyword__Complex ||
+ tok->kind == TokenKind_keyword__Decimal32 || tok->kind == TokenKind_keyword__Decimal64 ||
+ tok->kind == TokenKind_keyword__Decimal128 || tok->kind == TokenKind_keyword__Atomic ||
+ tok->kind == TokenKind_keyword_struct || tok->kind == TokenKind_keyword_union ||
+ tok->kind == TokenKind_keyword_enum || tok->kind == TokenKind_keyword_typeof ||
+ tok->kind == TokenKind_keyword_typeof_unqual || tok->kind == TokenKind_keyword_const ||
+ tok->kind == TokenKind_keyword_restrict || tok->kind == TokenKind_keyword_volatile ||
+ tok->kind == TokenKind_keyword__Atomic || tok->kind == TokenKind_keyword_alignas ||
+ tok->kind == TokenKind_keyword_inline || tok->kind == TokenKind_keyword__Noreturn ||
+ tok->kind == TokenKind_keyword_static) {
+ return true;
+ }
+ if (tok->kind != TokenKind_ident) {
+ return false;
+ }
+ return find_typedef(p, tok->value.string) != -1;
+}
+
+// primary-expr:
+// identifier
+// constant
+// string-literal
+// '(' expr ')'
+// TODO generic-selection
+static AstNode* parse_primary_expr(Parser* p) {
+ Token* t = next_token(p);
+ if (t->kind == TokenKind_literal_int) {
+ return ast_new_int(t->value.integer);
+ } else if (t->kind == TokenKind_literal_double) {
+ return ast_new_double(t->value.floating);
+ } else if (t->kind == TokenKind_keyword_true) {
+ return ast_new_int(1);
+ } else if (t->kind == TokenKind_keyword_false) {
+ return ast_new_int(0);
+ } else if (t->kind == TokenKind_literal_str) {
+ return ast_new_str_expr(register_str_literal(p, t->value.string),
+ type_new_static_string(strlen(t->value.string)));
+ } else if (t->kind == TokenKind_paren_l) {
+ AstNode* e = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ return e;
+ } else if (t->kind == TokenKind_ident) {
+ const char* name = t->value.string;
+
+ int lvar_idx = find_lvar(p, name);
+ if (lvar_idx == -1) {
+ int gvar_idx = find_gvar(p, name);
+ if (gvar_idx == -1) {
+ int enum_member_idx = find_enum_member(p, name);
+ if (enum_member_idx == -1) {
+ int func_idx = find_func(p, name);
+ if (func_idx == -1) {
+ fatal_error("%s:%d: undefined variable: %s", t->loc.filename, t->loc.line, name);
+ }
+ return ast_new_func(name, p->funcs.data[func_idx].ty);
+ }
+ int enum_idx = enum_member_idx / 1000;
+ int n = enum_member_idx % 1000;
+ AstNode* e = ast_new_int(
+ p->enums->as.list->items[enum_idx].as.enum_def->members->as.list->items[n].as.enum_member->value);
+ e->ty = type_new(TypeKind_enum);
+ e->ty->ref.defs = p->enums;
+ e->ty->ref.index = enum_idx;
+ return e;
+ }
+ return ast_new_gvar(name, p->gvars.data[gvar_idx].ty);
+ }
+
+ return ast_new_lvar(name, p->lvars.data[lvar_idx].stack_offset, p->lvars.data[lvar_idx].ty);
+ } else {
+ fatal_error("%s:%d: expected an expression, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t));
+ }
+}
+
+// postfix-expr:
+// postfix-expr-stem { postfix-expr-postfix }*
+//
+// postfix-expr-stem:
+// primary-expr
+// TODO compound-literal
+//
+// postfix-expr-postfix:
+// '[' expr ']'
+// '(' argument-expr-list? ')'
+// '.' identifier
+// '->' identifier
+// '++'
+// '--'
+static AstNode* parse_postfix_expr(Parser* p) {
+ AstNode* ret = parse_primary_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_paren_l)) {
+ AstNode* args = parse_argument_expr_list(p);
+ expect(p, TokenKind_paren_r);
+ ret = ast_new_func_call(ret, args);
+ Type* func_type = ret->as.func_call->func->ty;
+ if (func_type->kind == TypeKind_ptr) {
+ func_type = func_type->base;
+ }
+ ret->ty = func_type->result;
+ } else if (consume_token_if(p, TokenKind_bracket_l)) {
+ AstNode* idx = parse_expr(p);
+ expect(p, TokenKind_bracket_r);
+ idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->base)));
+ ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx));
+ } else if (consume_token_if(p, TokenKind_dot)) {
+ const Token* name = parse_ident(p);
+ ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name->value.string);
+ } else if (consume_token_if(p, TokenKind_arrow)) {
+ const Token* name = parse_ident(p);
+ ret = ast_new_member_access_expr(ret, name->value.string);
+ } else if (consume_token_if(p, TokenKind_plusplus)) {
+ ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_plusplus);
+ } else if (consume_token_if(p, TokenKind_minusminus)) {
+ ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_minusminus);
+ } else {
+ break;
+ }
+ }
+ return ret;
+}
+
+// argument-expr-list:
+// { assignment-expr | ',' }+
+static AstNode* parse_argument_expr_list(Parser* p) {
+ AstNode* list = ast_new_list(6);
+ while (peek_token(p)->kind != TokenKind_paren_r) {
+ AstNode* arg = parse_assignment_expr(p);
+ ast_append(list, arg);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ return list;
+}
+
+// unary-expr:
+// postfix-expr
+// '++' unary-expr
+// '--' unary-expr
+// unary-operator cast-expr
+// 'sizeof' unary-expr
+// 'sizeof' '(' type-name ')'
+// TODO 'alignof' '(' type-name ')'
+static AstNode* parse_unary_expr(Parser* p) {
+ TokenKind op = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_plus)) {
+ return parse_cast_expr(p);
+ } else if (consume_token_if(p, TokenKind_minus)) {
+ AstNode* operand = parse_cast_expr(p);
+ return ast_new_binary_expr(op, ast_new_int(0), operand);
+ } else if (consume_token_if(p, TokenKind_not)) {
+ AstNode* operand = parse_cast_expr(p);
+ return ast_new_unary_expr(op, operand);
+ } else if (consume_token_if(p, TokenKind_tilde)) {
+ AstNode* operand = parse_cast_expr(p);
+ return ast_new_unary_expr(op, operand);
+ } else if (consume_token_if(p, TokenKind_and)) {
+ AstNode* operand = parse_cast_expr(p);
+ return ast_new_ref_expr(operand);
+ } else if (consume_token_if(p, TokenKind_star)) {
+ AstNode* operand = parse_cast_expr(p);
+ if (operand->ty->kind == TypeKind_func) {
+ // dereference operator against function pointers are no-op.
+ return operand;
+ } else {
+ return ast_new_deref_expr(operand);
+ }
+ } else if (consume_token_if(p, TokenKind_plusplus)) {
+ AstNode* operand = parse_unary_expr(p);
+ return ast_new_assign_add_expr(operand, ast_new_int(1));
+ } else if (consume_token_if(p, TokenKind_minusminus)) {
+ AstNode* operand = parse_unary_expr(p);
+ return ast_new_assign_sub_expr(operand, ast_new_int(1));
+ } else if (consume_token_if(p, TokenKind_keyword_sizeof)) {
+ if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) {
+ next_token(p);
+ Type* ty = parse_type_name(p);
+ expect(p, TokenKind_paren_r);
+ return ast_new_int(type_sizeof(ty));
+ } else {
+ AstNode* expr = parse_unary_expr(p);
+ return ast_new_int(type_sizeof(expr->ty));
+ }
+ }
+ return parse_postfix_expr(p);
+}
+
+// cast-expr:
+// unary-expr
+// '(' type-name ')' cast-expr
+static AstNode* parse_cast_expr(Parser* p) {
+ if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) {
+ next_token(p);
+ Type* ty = parse_type_name(p);
+ expect(p, TokenKind_paren_r);
+
+ // TODO: check whether the original type can be casted to the result type.
+ AstNode* e = parse_cast_expr(p);
+ return ast_new_cast_expr(e, ty);
+ }
+ return parse_unary_expr(p);
+}
+
+// multiplicative-expr:
+// cast-expr { ( '*' / '/' / '%' ) cast-expr }*
+static AstNode* parse_multiplicative_expr(Parser* p) {
+ AstNode* lhs = parse_cast_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) {
+ next_token(p);
+ AstNode* rhs = parse_cast_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// additive-expr:
+// multiplicative-expr { ( '+' / '-' ) multiplicative-expr }*
+static AstNode* parse_additive_expr(Parser* p) {
+ AstNode* lhs = parse_multiplicative_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_plus)) {
+ AstNode* rhs = parse_multiplicative_expr(p);
+ if (lhs->ty->base) {
+ lhs = ast_new_binary_expr(
+ TokenKind_plus, lhs,
+ ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))));
+ } else if (rhs->ty->base) {
+ lhs = ast_new_binary_expr(
+ TokenKind_plus, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))),
+ rhs);
+ } else {
+ lhs = ast_new_binary_expr(TokenKind_plus, lhs, rhs);
+ }
+ } else if (consume_token_if(p, TokenKind_minus)) {
+ AstNode* rhs = parse_multiplicative_expr(p);
+ if (lhs->ty->base) {
+ if (rhs->ty->base) {
+ // (a - b) / sizeof(a)
+ lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(TokenKind_minus, lhs, rhs),
+ ast_new_int(type_sizeof(lhs->ty->base)));
+ } else {
+ // a - b*sizeof(a)
+ lhs = ast_new_binary_expr(
+ TokenKind_minus, lhs,
+ ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))));
+ }
+ } else {
+ lhs = ast_new_binary_expr(TokenKind_minus, lhs, rhs);
+ }
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// shift-expr:
+// additive-expr { ( '<<' / '>>' ) additive-expr }*
+static AstNode* parse_shift_expr(Parser* p) {
+ AstNode* lhs = parse_additive_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_lshift || op == TokenKind_rshift) {
+ next_token(p);
+ AstNode* rhs = parse_additive_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// relational-expr:
+// shift-expr { ( '<' / '>' / '<=' / '>=' ) shift-expr }*
+static AstNode* parse_relational_expr(Parser* p) {
+ AstNode* lhs = parse_shift_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_lt || op == TokenKind_le) {
+ next_token(p);
+ AstNode* rhs = parse_shift_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else if (consume_token_if(p, TokenKind_gt)) {
+ AstNode* rhs = parse_shift_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs);
+ } else if (consume_token_if(p, TokenKind_ge)) {
+ AstNode* rhs = parse_shift_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// equality-expr:
+// relational-expr { ( '==' / '!=' ) relational-expr }*
+static AstNode* parse_equality_expr(Parser* p) {
+ AstNode* lhs = parse_relational_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_eq || op == TokenKind_ne) {
+ next_token(p);
+ AstNode* rhs = parse_relational_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// bitwise-and-expr:
+// equality-expr { '&' equality-expr }*
+static AstNode* parse_bitwise_and_expr(Parser* p) {
+ AstNode* lhs = parse_equality_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_and)) {
+ AstNode* rhs = parse_equality_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_and, lhs, rhs);
+ lhs->ty = type_new(TypeKind_int);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// bitwise-xor-expr:
+// bitwise-and-expr { '^' bitwise-and-expr }*
+static AstNode* parse_bitwise_xor_expr(Parser* p) {
+ AstNode* lhs = parse_bitwise_and_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_xor)) {
+ AstNode* rhs = parse_bitwise_and_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_xor, lhs, rhs);
+ lhs->ty = type_new(TypeKind_int);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// bitwise-or-expr:
+// bitwise-xor-expr { '|' bitwise-xor-expr }*
+static AstNode* parse_bitwise_or_expr(Parser* p) {
+ AstNode* lhs = parse_bitwise_xor_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_or)) {
+ AstNode* rhs = parse_bitwise_xor_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_or, lhs, rhs);
+ lhs->ty = type_new(TypeKind_int);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// logical-and-expr:
+// bitwise-or-expr { '&&' bitwise-or-expr }*
+static AstNode* parse_logical_and_expr(Parser* p) {
+ AstNode* lhs = parse_bitwise_or_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_andand)) {
+ AstNode* rhs = parse_bitwise_or_expr(p);
+ lhs = ast_new_logical_expr(TokenKind_andand, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// logical-or-expr:
+// logical-and-expr { '||' logical-and-expr }*
+static AstNode* parse_logical_or_expr(Parser* p) {
+ AstNode* lhs = parse_logical_and_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_oror)) {
+ AstNode* rhs = parse_logical_and_expr(p);
+ lhs = ast_new_logical_expr(TokenKind_oror, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// conditional-expr:
+// logical-or-expr ( '?' expr ':' conditional-expr )?
+static AstNode* parse_conditional_expr(Parser* p) {
+ AstNode* e = parse_logical_or_expr(p);
+ if (consume_token_if(p, TokenKind_question)) {
+ AstNode* then_expr = parse_expr(p);
+ expect(p, TokenKind_colon);
+ AstNode* else_expr = parse_assignment_expr(p);
+ return ast_new_cond_expr(e, then_expr, else_expr);
+ } else {
+ return e;
+ }
+}
+
+// assignment-expr:
+// conditional-expr
+// unary-expr assignment-operator assignment-expr
+static AstNode* parse_assignment_expr(Parser* p) {
+ AstNode* lhs = parse_conditional_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ // TODO: check if the lhs is unary expression.
+ if (op == TokenKind_assign || op == TokenKind_assign_mul || op == TokenKind_assign_div ||
+ op == TokenKind_assign_mod || op == TokenKind_assign_or || op == TokenKind_assign_and ||
+ op == TokenKind_assign_xor || op == TokenKind_assign_lshift || op == TokenKind_assign_rshift) {
+ next_token(p);
+ AstNode* rhs = parse_assignment_expr(p);
+ lhs = ast_new_assign_expr(op, lhs, rhs);
+ } else if (consume_token_if(p, TokenKind_assign_add)) {
+ AstNode* rhs = parse_assignment_expr(p);
+ lhs = ast_new_assign_add_expr(lhs, rhs);
+ } else if (consume_token_if(p, TokenKind_assign_sub)) {
+ AstNode* rhs = parse_assignment_expr(p);
+ lhs = ast_new_assign_sub_expr(lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// expr:
+// assignment-expr { ',' assignment-expr }*
+static AstNode* parse_expr(Parser* p) {
+ AstNode* lhs = parse_assignment_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_comma)) {
+ AstNode* rhs = parse_assignment_expr(p);
+ AstNode* list = ast_new_list(2);
+ ast_append(list, lhs);
+ ast_append(list, rhs);
+ list->ty = rhs->ty;
+ lhs = list;
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+// constant-expr:
+// conditional-expr
+static AstNode* parse_constant_expr(Parser* p) {
+ return parse_conditional_expr(p);
+}
+
+// pointer:
+// { '*' TODO attribute-specifier-sequence? type-qualifier-list? }+
+static Type* parse_pointer_opt(Parser* p, Type* ty) {
+ while (peek_token(p)->kind == TokenKind_star) {
+ next_token(p);
+ ty = type_new_ptr(ty);
+ parse_type_qualifier_list_opt(p);
+ }
+ return ty;
+}
+
+// type-qualifier-list:
+// { type-qualifier }+
+static void parse_type_qualifier_list_opt(Parser* p) {
+ while (1) {
+ Token* tok = peek_token(p);
+ if (tok->kind == TokenKind_keyword_const || tok->kind == TokenKind_keyword_restrict ||
+ tok->kind == TokenKind_keyword_volatile || tok->kind == TokenKind_keyword__Atomic) {
+ // TODO: currently type qualifiers are just ignored.
+ next_token(p);
+ } else {
+ break;
+ }
+ }
+}
+
+// declarator | abstract-declarator?:
+// pointer? identifier TODO attribute-specifier-sequence? { direct-declarator-suffix }*
+// pointer? '(' declarator ')' { direct-declarator-suffix }*
+// pointer? '(' abstract-declarator ')' { direct-declarator-suffix }*
+// pointer? { direct-declarator-suffix }*
+//
+// direct-declarator-suffix:
+// array-declarator-suffix TODO attribute-specifier-sequence?
+// function-declarator-suffix TODO attribute-specifier-sequence?
+static AstNode* parse_declarator_or_abstract_declarator_opt(Parser* p, Type* ty) {
+ ty = parse_pointer_opt(p, ty);
+
+ AstNode* decl;
+ if (peek_token(p)->kind == TokenKind_ident) {
+ decl = ast_new_declarator(parse_ident(p)->value.string, ty);
+ } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) {
+ next_token(p);
+ if (peek_token(p)->kind == TokenKind_paren_r) {
+ Token* tok = peek_token(p);
+ fatal_error("%s:%d: expected declarator, but got '%s'", tok->loc.filename, tok->loc.line,
+ token_stringify(tok));
+ }
+ decl = parse_declarator_or_abstract_declarator_opt(p, ty);
+ expect(p, TokenKind_paren_r);
+ } else {
+ decl = ast_new_declarator(NULL, ty);
+ }
+
+ while (1) {
+ if (peek_token(p)->kind == TokenKind_bracket_l) {
+ decl->ty = parse_array_declarator_suffix(p, decl->ty);
+ } else if (peek_token(p)->kind == TokenKind_paren_l) {
+ decl->ty = parse_function_declarator_suffix(p, decl->ty);
+ } else {
+ break;
+ }
+ }
+
+ return decl;
+}
+
+// parameter-type-list:
+// parameter-list
+// parameter-list ',' '...'
+// '...'
+static AstNode* parse_parameter_type_list(Parser* p) {
+ AstNode* params = parse_parameter_list(p);
+
+ bool has_void = false;
+ for (int i = 0; i < params->as.list->len; ++i) {
+ if (params->as.list->items[i].as.declarator->name &&
+ strcmp(params->as.list->items[i].as.declarator->name, "...") == 0) {
+ if (i != params->as.list->len - 1) {
+ fatal_error("...");
+ }
+ --params->as.list->len;
+ break;
+ }
+ has_void |= params->as.list->items[i].ty->kind == TypeKind_void;
+ }
+
+ if (has_void) {
+ if (params->as.list->len != 1) {
+ fatal_error("invalid use of void param");
+ }
+ params->as.list->len = 0;
+ }
+
+ return params;
+}
+
+// parameter-list:
+// { parameter-declaration | ',' }
+static AstNode* parse_parameter_list(Parser* p) {
+ AstNode* params = ast_new_list(4);
+
+ while (1) {
+ AstNode* param = parse_parameter_declaration(p);
+ ast_append(params, param);
+
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+
+ return params;
+}
+
+// parameter-declaration:
+// TODO attribute-specifier-sequence? declaration-specifiers declarator
+// TODO attribute-specifier-sequence? declaration-specifiers abstract-declarator?
+static AstNode* parse_parameter_declaration(Parser* p) {
+ if (consume_token_if(p, TokenKind_ellipsis)) {
+ return ast_new_declarator("...", NULL);
+ }
+
+ Type* base_ty = parse_declaration_specifiers(p);
+ return parse_declarator_or_abstract_declarator_opt(p, base_ty);
+}
+
+// array-declarator:
+// direct-declarator '[' TODO type-qualifier-list? TODO assignment-expr? ']'
+// TODO direct-declarator '[' 'static' type-qualifier-list? assignment-expr? ']'
+// TODO direct-declarator '[' type-qualifier-list 'static' assignment-expr? ']'
+// TODO direct-declarator '[' type-qualifier-list? '*' ']'
+static Type* parse_array_declarator_suffix(Parser* p, Type* ty) {
+ next_token(p); // skip '['
+
+ int size = -1;
+ if (peek_token(p)->kind != TokenKind_bracket_r) {
+ AstNode* size_expr = parse_assignment_expr(p);
+ size = eval(size_expr);
+ }
+ expect(p, TokenKind_bracket_r);
+
+ return type_new_array(ty, size);
+}
+
+// function-declarator:
+// direct-declarator '(' parameter-type-list? ')'
+static Type* parse_function_declarator_suffix(Parser* p, Type* ty) {
+ next_token(p); // skip '('
+ AstNode* params;
+
+ if (consume_token_if(p, TokenKind_paren_r)) {
+ params = ast_new_list(1);
+ } else {
+ params = parse_parameter_type_list(p);
+ expect(p, TokenKind_paren_r);
+ }
+
+ return type_new_func(ty, params);
+}
+
+// direct-declarator:
+// identifier TODO attribute-specifier-sequence?
+// '(' declarator ')'
+// array-declarator TODO attribute-specifier-sequence?
+// function-declarator TODO attribute-specifier-sequence?
+static AstNode* parse_direct_declarator(Parser* p, Type* ty) {
+ AstNode* decl;
+ if (peek_token(p)->kind == TokenKind_ident) {
+ decl = ast_new_declarator(parse_ident(p)->value.string, ty);
+ } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) {
+ next_token(p);
+ // 1st pass: skip content inside parentheses.
+ int saved_pos = p->pos;
+ Type* dummy = type_new(TypeKind_void);
+ parse_declarator(p, dummy);
+ expect(p, TokenKind_paren_r);
+ // Parse suffixes.
+ while (1) {
+ if (peek_token(p)->kind == TokenKind_bracket_l) {
+ ty = parse_array_declarator_suffix(p, ty);
+ } else if (peek_token(p)->kind == TokenKind_paren_l) {
+ ty = parse_function_declarator_suffix(p, ty);
+ } else {
+ break;
+ }
+ }
+ // 2nd pass: re-parse inner content.
+ int end_pos = p->pos;
+ p->pos = saved_pos;
+ decl = parse_declarator(p, ty);
+ expect(p, TokenKind_paren_r);
+ p->pos = end_pos;
+ return ast_new_declarator(decl->as.declarator->name, decl->ty);
+ } else {
+ decl = ast_new_declarator(NULL, ty);
+ }
+
+ while (1) {
+ if (peek_token(p)->kind == TokenKind_bracket_l) {
+ decl->ty = parse_array_declarator_suffix(p, decl->ty);
+ } else if (peek_token(p)->kind == TokenKind_paren_l) {
+ decl->ty = parse_function_declarator_suffix(p, decl->ty);
+ } else {
+ break;
+ }
+ }
+
+ return ast_new_declarator(decl->as.declarator->name, decl->ty);
+}
+
+// declarator:
+// pointer? direct-declarator
+static AstNode* parse_declarator(Parser* p, Type* ty) {
+ ty = parse_pointer_opt(p, ty);
+ return parse_direct_declarator(p, ty);
+}
+
+// init-declarator-list:
+// init-declarator
+// init-declarator-list ',' init-declarator
+static AstNode* parse_init_declarator_list(Parser* p, Type* ty) {
+ AstNode* list = ast_new_list(1);
+ while (1) {
+ AstNode* d = parse_init_declarator(p, ty);
+ ast_append(list, d);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+
+ if (ty->storage_class == StorageClass_typedef) {
+ continue;
+ }
+ // Immediately declare to allow following initializer to access previous variables. For example,
+ // int a = 1, b = a;
+ process_declarations(p, &list->as.list->items[list->as.list->len - 1]);
+ }
+ return list;
+}
+
+// declaration:
+// static_assert-declaration
+// TODO attribute-specifier-sequence ';'
+// TODO attribute-specifier-sequence declaration-specifiers init-declarator-list ';'
+// declaration-specifiers init-declarator-list ';'
+static AstNode* parse_declaration(Parser* p) {
+ if (peek_token(p)->kind == TokenKind_keyword_static_assert) {
+ return parse_static_assert_declaration(p);
+ }
+
+ Type* ty = parse_declaration_specifiers(p);
+ AstNode* decls = parse_init_declarator_list(p, ty);
+ expect(p, TokenKind_semicolon);
+ process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]);
+ return decls;
+}
+
+static void create_local_initializer(AstNode* list, AstNode* lhs, AstNode* init, Type* ty) {
+ if (init->kind == AstNodeKind_array_initializer) {
+ AstNode* items = init->as.array_initializer->list;
+ if (ty->kind == TypeKind_array) {
+ for (int i = 0; i < items->as.list->len; ++i) {
+ AstNode* idx = ast_new_binary_expr(TokenKind_star, ast_new_int(i), ast_new_int(type_sizeof(ty->base)));
+ AstNode* elem = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, lhs, idx));
+ create_local_initializer(list, elem, &items->as.list->items[i], ty->base);
+ }
+ } else if (ty->kind == TypeKind_struct) {
+ AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index];
+ AstNode* members = def->as.struct_def->members;
+ for (int i = 0; i < items->as.list->len; ++i) {
+ const char* member_name = members->as.list->items[i].as.struct_member->name;
+ AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name);
+ create_local_initializer(list, member_lhs, &items->as.list->items[i], members->as.list->items[i].ty);
+ }
+ } else if (ty->kind == TypeKind_union) {
+ AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index];
+ AstNode* members = def->as.union_def->members;
+ const char* member_name = members->as.list->items[0].as.struct_member->name;
+ AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name);
+ create_local_initializer(list, member_lhs, &items->as.list->items[0], members->as.list->items[0].ty);
+ }
+ } else {
+ AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, init);
+ ast_append(list, ast_new_expr_stmt(assign));
+ }
+}
+
+static void process_declarations(Parser* p, AstNode* decl) {
+ const char* name = decl->as.declarator->name;
+ if (decl->ty->kind == TypeKind_func) {
+ // TODO: refactor
+ decl->ty->storage_class = decl->ty->result->storage_class;
+ decl->ty->result->storage_class = StorageClass_unspecified;
+ register_func(p, name, decl->ty);
+ decl->kind = AstNodeKind_func_decl;
+ } else {
+ if (type_is_unsized(decl->ty)) {
+ fatal_error("process_declarations: invalid type for variable");
+ }
+
+ if (p->scope) {
+ if (find_lvar_in_current_scope(p, name) != -1) {
+ // TODO: use name's location.
+ fatal_error("%s:%d: '%s' redeclared", peek_token(p)->loc.filename, peek_token(p)->loc.line, name);
+ }
+ int stack_offset = add_lvar(p, name, decl->ty, calc_lvar_stack_offset(p, decl->ty));
+
+ if (decl->as.declarator->init) {
+ if (decl->as.declarator->init->kind == AstNodeKind_array_initializer) {
+ AstNode* lhs_base = ast_new_lvar(name, stack_offset, decl->ty);
+ AstNode* init = decl->as.declarator->init;
+ AstNode* stmt_list = ast_new_list(4);
+ create_local_initializer(stmt_list, lhs_base, init, decl->ty);
+ *decl = *stmt_list;
+ } else {
+ AstNode* lhs = ast_new_lvar(name, stack_offset, decl->ty);
+ AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, decl->as.declarator->init);
+ decl->kind = AstNodeKind_expr_stmt;
+ decl->as.expr_stmt = calloc(1, sizeof(ExprStmtNode));
+ decl->as.expr_stmt->expr = assign;
+ }
+ } else {
+ decl->kind = AstNodeKind_nop;
+ }
+ } else {
+ if (find_gvar(p, name) != -1) {
+ // TODO
+ // fatal_error("process_declarations: %s redeclared", name);
+ }
+ // TODO: refactor
+ Type* base_ty = decl->ty;
+ while (base_ty->base) {
+ base_ty = base_ty->base;
+ }
+ if (base_ty != decl->ty) {
+ decl->ty->storage_class = base_ty->storage_class;
+ base_ty->storage_class = StorageClass_unspecified;
+ }
+
+ // TODO: refactor
+ if (decl->ty->kind == TypeKind_array && decl->ty->array_size == -1 && decl->as.declarator &&
+ decl->as.declarator->init && decl->as.declarator->init->kind == AstNodeKind_array_initializer) {
+ decl->ty->array_size = decl->as.declarator->init->as.array_initializer->list->as.list->len;
+ }
+
+ GlobalVar* gvar = gvars_push_new(&p->gvars);
+ gvar->name = name;
+ gvar->ty = decl->ty;
+
+ if (decl->ty->storage_class == StorageClass_extern) {
+ decl->kind = AstNodeKind_nop;
+ } else {
+ AstNode* init_expr = decl->as.declarator ? decl->as.declarator->init : NULL;
+ decl->kind = AstNodeKind_gvar_decl;
+ decl->as.gvar_decl = calloc(1, sizeof(GvarDeclNode));
+ decl->as.gvar_decl->name = name;
+ decl->as.gvar_decl->expr = init_expr;
+ }
+ }
+ }
+}
+
+// function-definition:
+// declaration-specifiers init-declarator-list compound-stmt
+static AstNode* parse_function_definition(Parser* p, AstNode* decls) {
+ if (decls->as.list->len != 1) {
+ fatal_error("parse_function_definition: invalid syntax");
+ }
+
+ Type* ty = decls->as.list->items[0].ty;
+ Type* base_ty = ty->result;
+ while (base_ty->base) {
+ base_ty = base_ty->base;
+ }
+ ty->storage_class = base_ty->storage_class;
+ base_ty->storage_class = StorageClass_unspecified;
+ const char* name = decls->as.list->items[0].as.declarator->name;
+ AstNode* params = ty->params;
+
+ register_func(p, name, ty);
+ enter_func(p);
+ register_params(p, params);
+ AstNode* body = parse_compound_stmt(p);
+ leave_func(p);
+
+ int stack_size = 0;
+ if (p->lvars.len != 0) {
+ stack_size = p->lvars.data[p->lvars.len - 1].stack_offset + type_sizeof(p->lvars.data[p->lvars.len - 1].ty);
+ if (stack_size < 0) {
+ stack_size = 0;
+ }
+ }
+ return ast_new_func_def(name, ty, params, body, stack_size);
+}
+
+static void process_typedefs(Parser* p, AstNode* decls) {
+ for (int i = 0; i < decls->as.list->len; ++i) {
+ AstNode* decl = &decls->as.list->items[i];
+
+ AstNode* typedef_ = ast_new_typedef_decl(decl->as.declarator->name, decl->ty);
+ ast_append(p->typedefs, typedef_);
+ }
+}
+
+static char* generate_anonymous_name(Parser* p) {
+ char* buf = calloc(32, sizeof(char));
+ sprintf(buf, "__anonymous_%d__", p->anonymous_user_type_counter++);
+ return buf;
+}
+
+// declaration-specifiers:
+// { declaration-specifier }+ TODO attribute-specifier-sequence?
+//
+// declaration-specifier:
+// storage-class-specifier
+// type-specifier-qualifier
+// function-speicifier
+//
+// storage-class-specifier:
+// 'auto'
+// 'constexpr'
+// 'extern'
+// 'register'
+// 'static'
+// 'thread_local'
+// 'typedef'
+//
+// type-specifier-qualifier:
+// type-specifier
+// type-qualifier
+// alignment-specifier
+//
+// function-specifier:
+// 'inline'
+// '_Noreturn'
+//
+// type-specifier:
+// 'void'
+// 'char'
+// 'short'
+// 'int'
+// 'long'
+// 'float'
+// 'double'
+// 'signed'
+// 'unsigned'
+// TODO '_BitInt' '(' constant-expr ')'
+// 'bool'
+// '_Complex'
+// '_Decimal32'
+// '_Decimal64'
+// '_Decimal128'
+// atomic-type-specifier
+// struct-or-union-specifier
+// enum-specifier
+// typeof-specifier
+// typedef-name
+//
+// type-qualifier:
+// 'const'
+// 'restrict'
+// 'volatile'
+// '_Atomic'
+//
+// alignment-specifier:
+// TODO 'alignas' '(' type-name ')'
+// TODO 'alignas' '(' constant-expr ')'
+//
+// atomic-type-specifier:
+// TODO '_Atomic' '(' type-name ')'
+//
+// struct-or-union-specifier:
+// 'struct' ...
+// 'union' ...
+//
+// enum-specifier:
+// 'enum' ...
+//
+// typeof-specifier:
+// TODO 'typeof' '(' typeof-specifier-argument ')'
+// TODO 'typeof_unqual' '(' typeof-specifier-argument ')'
+//
+// typedef-name:
+// identifier
+static Type* parse_declaration_specifiers(Parser* p) {
+ StorageClass storage_class = StorageClass_unspecified;
+ int type_specifiers = 0;
+ Type* ty = NULL;
+
+ Token* tok;
+ while (1) {
+ tok = peek_token(p);
+
+ // storage-class-spciefier
+ if (tok->kind == TokenKind_keyword_auto) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_constexpr) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_extern) {
+ next_token(p);
+ storage_class = StorageClass_extern;
+ } else if (tok->kind == TokenKind_keyword_register) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_static) {
+ next_token(p);
+ storage_class = StorageClass_static;
+ } else if (tok->kind == TokenKind_keyword_thread_local) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_typedef) {
+ next_token(p);
+ storage_class = StorageClass_typedef;
+ }
+ // type-specifier-qualifier > type-specifier
+ else if (tok->kind == TokenKind_keyword_void) {
+ if (type_specifiers & TypeSpecifierMask_void) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_void;
+ } else if (tok->kind == TokenKind_keyword_char) {
+ if (type_specifiers & TypeSpecifierMask_char) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_char;
+ } else if (tok->kind == TokenKind_keyword_short) {
+ if (type_specifiers & TypeSpecifierMask_short) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_short;
+ } else if (tok->kind == TokenKind_keyword_int) {
+ if (type_specifiers & TypeSpecifierMask_int) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_int;
+ } else if (tok->kind == TokenKind_keyword_long) {
+ if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) {
+ fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line);
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_long;
+ } else if (tok->kind == TokenKind_keyword_float) {
+ if (type_specifiers & TypeSpecifierMask_float) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_float;
+ } else if (tok->kind == TokenKind_keyword_double) {
+ if (type_specifiers & TypeSpecifierMask_double) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_double;
+ } else if (tok->kind == TokenKind_keyword_signed) {
+ if (type_specifiers & TypeSpecifierMask_signed) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_signed;
+ } else if (tok->kind == TokenKind_keyword_unsigned) {
+ if (type_specifiers & TypeSpecifierMask_unsigned) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_unsigned;
+ } else if (tok->kind == TokenKind_keyword__BitInt) {
+ if (type_specifiers & TypeSpecifierMask__BitInt) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__BitInt;
+ } else if (tok->kind == TokenKind_keyword_bool) {
+ if (type_specifiers & TypeSpecifierMask_bool) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_bool;
+ } else if (tok->kind == TokenKind_keyword__Complex) {
+ if (type_specifiers & TypeSpecifierMask__Complex) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Complex;
+ } else if (tok->kind == TokenKind_keyword__Decimal32) {
+ if (type_specifiers & TypeSpecifierMask__Decimal32) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Decimal32;
+ } else if (tok->kind == TokenKind_keyword__Decimal64) {
+ if (type_specifiers & TypeSpecifierMask__Decimal64) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Decimal64;
+ } else if (tok->kind == TokenKind_keyword__Decimal128) {
+ if (type_specifiers & TypeSpecifierMask__Decimal128) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Decimal128;
+ } else if (tok->kind == TokenKind_keyword__Atomic) {
+ if (type_specifiers & TypeSpecifierMask__Atomic) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Atomic;
+ } else if (tok->kind == TokenKind_keyword_struct) {
+ if (type_specifiers & TypeSpecifierMask_struct) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ ty = parse_struct_specifier(p);
+ type_specifiers += TypeSpecifierMask_struct;
+ } else if (tok->kind == TokenKind_keyword_union) {
+ if (type_specifiers & TypeSpecifierMask_union) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ ty = parse_union_specifier(p);
+ type_specifiers += TypeSpecifierMask_union;
+ } else if (tok->kind == TokenKind_keyword_enum) {
+ if (type_specifiers & TypeSpecifierMask_enum) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ ty = parse_enum_specifier(p);
+ type_specifiers += TypeSpecifierMask_enum;
+ } else if (tok->kind == TokenKind_keyword_typeof) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_typeof_unqual) {
+ unimplemented();
+ } else if (is_typedef_name(p, tok)) {
+ if (type_specifiers & TypeSpecifierMask_typedef_name) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ int typedef_idx = find_typedef(p, tok->value.string);
+ ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty);
+ type_specifiers += TypeSpecifierMask_typedef_name;
+ }
+ // type-specifier-qualifier > type-qualifier
+ else if (tok->kind == TokenKind_keyword_const) {
+ // TODO
+ next_token(p);
+ } else if (tok->kind == TokenKind_keyword_restrict) {
+ // TODO
+ next_token(p);
+ } else if (tok->kind == TokenKind_keyword_volatile) {
+ // TODO
+ next_token(p);
+ } else if (tok->kind == TokenKind_keyword__Atomic) {
+ unimplemented();
+ }
+ // type-specifier-qualifier > alignment-specifier
+ else if (tok->kind == TokenKind_keyword_alignas) {
+ unimplemented();
+ }
+ // function-specifier
+ else if (tok->kind == TokenKind_keyword_inline) {
+ // TODO
+ next_token(p);
+ } else if (tok->kind == TokenKind_keyword__Noreturn) {
+ // TODO
+ next_token(p);
+ } else {
+ break;
+ }
+ }
+
+ Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers);
+ if (ty_) {
+ ty = ty_;
+ }
+ if (!ty) {
+ fatal_error("%s:%d: no type specifiers", tok->loc.filename, tok->loc.line);
+ }
+
+ ty->storage_class = storage_class;
+ return ty;
+}
+
+// init-declarator:
+// declarator
+// declarator '=' initializer
+static AstNode* parse_init_declarator(Parser* p, Type* ty) {
+ AstNode* decl = parse_declarator(p, ty);
+ if (consume_token_if(p, TokenKind_assign)) {
+ decl->as.declarator->init = parse_initializer(p);
+ }
+ return decl;
+}
+
+// struct-specifier:
+// 'struct' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}'
+// 'struct' TODO attribute-specifier-sequence? identifier
+static Type* parse_struct_specifier(Parser* p) {
+ SourceLocation struct_kw_loc = current_location(p);
+ next_token(p);
+
+ const Token* name;
+ char* anonymous_name = NULL;
+ if (peek_token(p)->kind == TokenKind_brace_l) {
+ anonymous_name = generate_anonymous_name(p);
+ Token* anonymous_token = calloc(1, sizeof(Token));
+ anonymous_token->kind = TokenKind_ident;
+ anonymous_token->value.string = anonymous_name;
+ anonymous_token->loc = struct_kw_loc;
+ name = anonymous_token;
+ } else {
+ name = parse_ident(p);
+ }
+
+ if (!consume_token_if(p, TokenKind_brace_l)) {
+ int struct_idx = find_struct(p, name->value.string);
+ if (struct_idx != -1) {
+ Type* ty = type_new(TypeKind_struct);
+ ty->ref.defs = p->structs;
+ ty->ref.index = struct_idx;
+ return ty;
+ } else {
+ // TODO
+ AstNode* new_struct = ast_new_struct_def(name->value.string);
+ ast_append(p->structs, new_struct);
+ struct_idx = p->structs->as.list->len - 1;
+
+ Type* ty = type_new(TypeKind_struct);
+ ty->ref.defs = p->structs;
+ ty->ref.index = struct_idx;
+ return ty;
+ }
+ }
+
+ int struct_idx = find_struct(p, name->value.string);
+ if (struct_idx != -1 && p->structs->as.list->items[struct_idx].as.struct_def->members) {
+ fatal_error("%s:%d: struct %s redefined", name->loc.filename, name->loc.line, name->value.string);
+ }
+
+ if (struct_idx == -1) {
+ AstNode* new_struct = ast_new_struct_def(name->value.string);
+ ast_append(p->structs, new_struct);
+ struct_idx = p->structs->as.list->len - 1;
+ }
+
+ AstNode* members = parse_member_declaration_list(p);
+ expect(p, TokenKind_brace_r);
+ p->structs->as.list->items[struct_idx].as.struct_def->members = members;
+
+ Type* ty = type_new(TypeKind_struct);
+ ty->ref.defs = p->structs;
+ ty->ref.index = struct_idx;
+ return ty;
+}
+
+// union-specifier:
+// 'union' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}'
+// 'union' TODO attribute-specifier-sequence? identifier
+static Type* parse_union_specifier(Parser* p) {
+ SourceLocation union_kw_loc = current_location(p);
+ next_token(p);
+
+ const Token* name;
+ char* anonymous_name = NULL;
+ if (peek_token(p)->kind == TokenKind_brace_l) {
+ anonymous_name = generate_anonymous_name(p);
+ Token* anonymous_token = calloc(1, sizeof(Token));
+ anonymous_token->kind = TokenKind_ident;
+ anonymous_token->value.string = anonymous_name;
+ anonymous_token->loc = union_kw_loc;
+ name = anonymous_token;
+ } else {
+ name = parse_ident(p);
+ }
+
+ if (!consume_token_if(p, TokenKind_brace_l)) {
+ int union_idx = find_union(p, name->value.string);
+ if (union_idx != -1) {
+ Type* ty = type_new(TypeKind_union);
+ ty->ref.defs = p->unions;
+ ty->ref.index = union_idx;
+ return ty;
+ } else {
+ // TODO
+ AstNode* new_union = ast_new_union_def(name->value.string);
+ ast_append(p->unions, new_union);
+ union_idx = p->unions->as.list->len - 1;
+
+ Type* ty = type_new(TypeKind_union);
+ ty->ref.defs = p->unions;
+ ty->ref.index = union_idx;
+ return ty;
+ }
+ }
+
+ int union_idx = find_union(p, name->value.string);
+ if (union_idx != -1 && p->unions->as.list->items[union_idx].as.union_def->members) {
+ fatal_error("%s:%d: union %s redefined", name->loc.filename, name->loc.line, name->value.string);
+ }
+
+ if (union_idx == -1) {
+ AstNode* new_union = ast_new_union_def(name->value.string);
+ ast_append(p->unions, new_union);
+ union_idx = p->unions->as.list->len - 1;
+ }
+
+ AstNode* members = parse_member_declaration_list(p);
+ expect(p, TokenKind_brace_r);
+ p->unions->as.list->items[union_idx].as.union_def->members = members;
+
+ Type* ty = type_new(TypeKind_union);
+ ty->ref.defs = p->unions;
+ ty->ref.index = union_idx;
+ return ty;
+}
+
+// member-declaration-list:
+// { member-declaration }+
+static AstNode* parse_member_declaration_list(Parser* p) {
+ AstNode* members = ast_new_list(4);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* decls = parse_member_declaration(p);
+ for (int i = 0; i < decls->as.list->len; i++) {
+ ast_append(members, &decls->as.list->items[i]);
+ }
+ }
+ return members;
+}
+
+// member-declaration:
+// TODO attribute-specifier-sequence? specifier-qualifier-list member-declaration-list? ';'
+// TODO static_assert-declaration
+static AstNode* parse_member_declaration(Parser* p) {
+ Type* base_ty = parse_specifier_qualifier_list(p);
+
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ char* name = generate_anonymous_name(p);
+ AstNode* decls = ast_new_list(1);
+ AstNode* member = ast_new(AstNodeKind_struct_member);
+ member->ty = base_ty;
+ member->as.struct_member = calloc(1, sizeof(StructMemberNode));
+ member->as.struct_member->name = name;
+ ast_append(decls, member);
+ return decls;
+ }
+ AstNode* decls = parse_member_declarator_list(p, base_ty);
+ expect(p, TokenKind_semicolon);
+ return decls;
+}
+
+// specifier-qualifier-list:
+// { type-specifier-qualifier }+ TODO attribute-specifier-sequence?
+static Type* parse_specifier_qualifier_list(Parser* p) {
+ int type_specifiers = 0;
+ Type* ty = NULL;
+
+ while (1) {
+ Token* tok = peek_token(p);
+
+ // type-specifier-qualifier > type-specifier
+ if (tok->kind == TokenKind_keyword_void) {
+ if (type_specifiers & TypeSpecifierMask_void) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_void;
+ } else if (tok->kind == TokenKind_keyword_char) {
+ if (type_specifiers & TypeSpecifierMask_char) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_char;
+ } else if (tok->kind == TokenKind_keyword_short) {
+ if (type_specifiers & TypeSpecifierMask_short) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_short;
+ } else if (tok->kind == TokenKind_keyword_int) {
+ if (type_specifiers & TypeSpecifierMask_int) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_int;
+ } else if (tok->kind == TokenKind_keyword_long) {
+ if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) {
+ fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line);
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_long;
+ } else if (tok->kind == TokenKind_keyword_float) {
+ if (type_specifiers & TypeSpecifierMask_float) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_float;
+ } else if (tok->kind == TokenKind_keyword_double) {
+ if (type_specifiers & TypeSpecifierMask_double) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_double;
+ } else if (tok->kind == TokenKind_keyword_signed) {
+ if (type_specifiers & TypeSpecifierMask_signed) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_signed;
+ } else if (tok->kind == TokenKind_keyword_unsigned) {
+ if (type_specifiers & TypeSpecifierMask_unsigned) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_unsigned;
+ } else if (tok->kind == TokenKind_keyword__BitInt) {
+ if (type_specifiers & TypeSpecifierMask__BitInt) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__BitInt;
+ } else if (tok->kind == TokenKind_keyword_bool) {
+ if (type_specifiers & TypeSpecifierMask_bool) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask_bool;
+ } else if (tok->kind == TokenKind_keyword__Complex) {
+ if (type_specifiers & TypeSpecifierMask__Complex) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Complex;
+ } else if (tok->kind == TokenKind_keyword__Decimal32) {
+ if (type_specifiers & TypeSpecifierMask__Decimal32) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Decimal32;
+ } else if (tok->kind == TokenKind_keyword__Decimal64) {
+ if (type_specifiers & TypeSpecifierMask__Decimal64) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Decimal64;
+ } else if (tok->kind == TokenKind_keyword__Decimal128) {
+ if (type_specifiers & TypeSpecifierMask__Decimal128) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Decimal128;
+ } else if (tok->kind == TokenKind_keyword__Atomic) {
+ if (type_specifiers & TypeSpecifierMask__Atomic) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ type_specifiers += TypeSpecifierMask__Atomic;
+ } else if (tok->kind == TokenKind_keyword_struct) {
+ if (type_specifiers & TypeSpecifierMask_struct) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ ty = parse_struct_specifier(p);
+ type_specifiers += TypeSpecifierMask_struct;
+ } else if (tok->kind == TokenKind_keyword_union) {
+ if (type_specifiers & TypeSpecifierMask_union) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ ty = parse_union_specifier(p);
+ type_specifiers += TypeSpecifierMask_union;
+ } else if (tok->kind == TokenKind_keyword_enum) {
+ if (type_specifiers & TypeSpecifierMask_enum) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ ty = parse_enum_specifier(p);
+ type_specifiers += TypeSpecifierMask_enum;
+ } else if (tok->kind == TokenKind_keyword_typeof) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_typeof_unqual) {
+ unimplemented();
+ } else if (is_typedef_name(p, tok)) {
+ if (type_specifiers & TypeSpecifierMask_typedef_name) {
+ fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ next_token(p);
+ int typedef_idx = find_typedef(p, tok->value.string);
+ ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty);
+ type_specifiers += TypeSpecifierMask_typedef_name;
+ }
+ // type-specifier-qualifier > type-qualifier
+ else if (tok->kind == TokenKind_keyword_const) {
+ // TODO
+ next_token(p);
+ } else if (tok->kind == TokenKind_keyword_restrict) {
+ unimplemented();
+ } else if (tok->kind == TokenKind_keyword_volatile) {
+ // TODO
+ next_token(p);
+ } else if (tok->kind == TokenKind_keyword__Atomic) {
+ unimplemented();
+ }
+ // type-specifier-qualifier > alignment-specifier
+ else if (tok->kind == TokenKind_keyword_alignas) {
+ unimplemented();
+ } else {
+ break;
+ }
+ }
+
+ Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers);
+ if (ty_) {
+ ty = ty_;
+ }
+
+ ty->storage_class = StorageClass_unspecified;
+ return ty;
+}
+
+// member-declarator-list:
+// { member-declarator | ',' }+
+static AstNode* parse_member_declarator_list(Parser* p, Type* base_ty) {
+ AstNode* list = ast_new_list(1);
+ while (1) {
+ AstNode* d = parse_member_declarator(p, base_ty);
+ ast_append(list, d);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ return list;
+}
+
+// member-declarator:
+// declarator
+// declarator ':' constant-expr
+// TODO ':' constant-expr
+static AstNode* parse_member_declarator(Parser* p, Type* base_ty) {
+ AstNode* decl = parse_declarator(p, base_ty);
+
+ const char* name = decl->as.declarator->name;
+ decl->kind = AstNodeKind_struct_member;
+ decl->as.struct_member = calloc(1, sizeof(StructMemberNode));
+ decl->as.struct_member->name = name;
+
+ if (consume_token_if(p, TokenKind_colon)) {
+ AstNode* bit_width_node = parse_constant_expr(p);
+ InitData* bit_width_evaluated = eval_init_expr(bit_width_node, type_new(TypeKind_int));
+ if (bit_width_evaluated->len != 1)
+ fatal_error("parse_member_declarator: invalid bit-field");
+ if (bit_width_evaluated->blocks[0].kind != InitDataBlockKind_bytes)
+ fatal_error("parse_member_declarator: invalid bit-field");
+ if (bit_width_evaluated->blocks[0].as.bytes.len != sizeof(int))
+ fatal_error("parse_member_declarator: invalid bit-field");
+ int bit_width;
+ memcpy(&bit_width, bit_width_evaluated->blocks[0].as.bytes.buf, sizeof(int));
+ if (bit_width < 0)
+ fatal_error("parse_member_declarator: invalid bit-field");
+ if (bit_width % 8 != 0)
+ fatal_error("parse_member_declarator: unimplemented");
+ decl->as.struct_member->is_bitfield = true;
+ decl->as.struct_member->bitfield_offset = -1; // TODO
+ decl->as.struct_member->bitfield_width = bit_width;
+ }
+
+ return decl;
+}
+
+// enum-specifier:
+// 'enum' TODO attribute-specifier-sequence? identifier? TODO enum-type-specifier? '{' enumerator-list ','? '}'
+// 'enum' identifier TODO enum-type-specifier?
+static Type* parse_enum_specifier(Parser* p) {
+ SourceLocation enum_kw_loc = current_location(p);
+ next_token(p);
+
+ const Token* name;
+ char* anonymous_name = NULL;
+ if (peek_token(p)->kind == TokenKind_brace_l) {
+ anonymous_name = generate_anonymous_name(p);
+ Token* anonymous_token = calloc(1, sizeof(Token));
+ anonymous_token->kind = TokenKind_ident;
+ anonymous_token->value.string = anonymous_name;
+ anonymous_token->loc = enum_kw_loc;
+ name = anonymous_token;
+ } else {
+ name = parse_ident(p);
+ }
+
+ if (!consume_token_if(p, TokenKind_brace_l)) {
+ int enum_idx = find_enum(p, name->value.string);
+ if (enum_idx != -1) {
+ Type* ty = type_new(TypeKind_enum);
+ ty->ref.defs = p->enums;
+ ty->ref.index = enum_idx;
+ return ty;
+ } else {
+ // TODO
+ AstNode* new_enum = ast_new_enum_def(name->value.string);
+ ast_append(p->enums, new_enum);
+ enum_idx = p->enums->as.list->len - 1;
+
+ Type* ty = type_new(TypeKind_enum);
+ ty->ref.defs = p->enums;
+ ty->ref.index = enum_idx;
+ return ty;
+ }
+ }
+
+ int enum_idx = find_enum(p, name->value.string);
+ if (enum_idx != -1 && p->enums->as.list->items[enum_idx].as.enum_def->members) {
+ fatal_error("%s:%d: enum %s redefined", name->loc.filename, name->loc.line, name->value.string);
+ }
+
+ if (enum_idx == -1) {
+ AstNode* new_enum = ast_new_enum_def(name->value.string);
+ ast_append(p->enums, new_enum);
+ enum_idx = p->enums->as.list->len - 1;
+ }
+
+ parse_enum_members(p, enum_idx);
+ expect(p, TokenKind_brace_r);
+
+ Type* ty = type_new(TypeKind_enum);
+ ty->ref.defs = p->enums;
+ ty->ref.index = enum_idx;
+ return ty;
+}
+
+// enumerator-list:
+// { enumerator |? ',' }+
+static void parse_enum_members(Parser* p, int enum_idx) {
+ int next_value = 0;
+ AstNode* list = ast_new_list(16);
+
+ if (!p->enums->as.list->items[enum_idx].as.enum_def->members) {
+ p->enums->as.list->items[enum_idx].as.enum_def->members = list;
+ }
+
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_enum_member(p, next_value);
+ next_value = member->as.enum_member->value + 1;
+
+ ast_append(list, member);
+
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+}
+
+// enumerator:
+// enumeration-constant TODO attribute-specifier-sequence? ( '=' constant-expr )?
+static AstNode* parse_enum_member(Parser* p, int next_value) {
+ const Token* name = parse_ident(p);
+ int value;
+ if (consume_token_if(p, TokenKind_assign)) {
+ AstNode* expr = parse_constant_expr(p);
+ value = eval(expr);
+ } else {
+ value = next_value;
+ }
+ return ast_new_enum_member(name->value.string, value);
+}
+
+// type-name:
+// specifier-qualifier-list abstract-declarator?
+static Type* parse_type_name(Parser* p) {
+ Type* ty = parse_specifier_qualifier_list(p);
+ ty = parse_abstract_declarator_opt(p, ty);
+ return ty;
+}
+
+// abstract-declarator:
+// pointer
+// pointer? TODO direct-abstract-declarator
+//
+// abstract-declarator?:
+// pointer? TODO direct-abstract-declarator?
+static Type* parse_abstract_declarator_opt(Parser* p, Type* ty) {
+ return parse_pointer_opt(p, ty);
+}
+
+// braced-initializer:
+// '{' { designation? initializer |? ',' }* '}'
+static AstNode* parse_braced_initializer(Parser* p) {
+ AstNode* inits = ast_new_list(4);
+ expect(p, TokenKind_brace_l);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ // TODO: support designation
+ AstNode* init = parse_initializer(p);
+ ast_append(inits, init);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ expect(p, TokenKind_brace_r);
+ return ast_new_array_initializer(inits);
+}
+
+// initializer:
+// braced-initializer
+// assignment-expr
+static AstNode* parse_initializer(Parser* p) {
+ if (peek_token(p)->kind == TokenKind_brace_l) {
+ return parse_braced_initializer(p);
+ } else {
+ return parse_assignment_expr(p);
+ }
+}
+
+#ifndef __ducc__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+// attribute-specifier-sequence:
+// { attribute-specifier }+
+static AstNode* parse_attribute_specifier_sequence_opt(Parser* p) {
+ AstNode* attrs = NULL;
+ AstNode* attr;
+ while ((attr = parse_attribute_specifier_opt(p))) {
+ unimplemented();
+ }
+ return attrs;
+}
+#ifndef __ducc__
+#pragma GCC diagnostic pop
+#endif
+
+// attribute-specifier:
+// '[' '[' attribute-list ']' ']'
+static AstNode* parse_attribute_specifier_opt(Parser* p) {
+ if (peek_token(p)->kind != TokenKind_bracket_l || peek_token2(p)->kind != TokenKind_bracket_l) {
+ return NULL;
+ }
+ next_token(p); // skip '['
+ next_token(p); // skip '['
+ AstNode* ret = parse_attribute_list(p);
+ expect(p, TokenKind_bracket_r);
+ expect(p, TokenKind_bracket_r);
+ return ret;
+}
+
+// attribute-list:
+// { attribute? | ',' }
+static AstNode* parse_attribute_list(Parser* p) {
+ while (1) {
+ if (peek_token(p)->kind == TokenKind_ident) {
+ parse_attribute(p);
+ }
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ unimplemented();
+ return NULL;
+}
+
+// attribute:
+// attribute-token attribute-argument-clause?
+//
+// attribute-token:
+// identifier ( '::' identifier )?
+static AstNode* parse_attribute(Parser* p) {
+ expect(p, TokenKind_ident);
+ if (peek_token(p)->kind == TokenKind_colon && peek_token2(p)->kind == TokenKind_colon) {
+ next_token(p);
+ next_token(p);
+ expect(p, TokenKind_ident);
+ }
+ if (peek_token(p)->kind == TokenKind_paren_l) {
+ parse_attribute_argument_clause(p);
+ }
+ unimplemented();
+ return NULL;
+}
+
+// attribute-argument-clause:
+// '(' balanced-token-sequence? ')'
+static AstNode* parse_attribute_argument_clause(Parser* p) {
+ expect(p, TokenKind_paren_l);
+ if (peek_token(p)->kind != TokenKind_paren_r) {
+ parse_balanced_token_sequence(p);
+ }
+ expect(p, TokenKind_paren_r);
+ unimplemented();
+ return NULL;
+}
+
+// balanced-token-sequence:
+// { balanced-token }+
+//
+// balanced-token:
+// '(' balanced-token-sequence? ')'
+// '[' balanced-token-sequence? ']'
+// '{' balanced-token-sequence? '}'
+// any token other than a parenthesis, a bracket, or a brace
+static AstNode* parse_balanced_token_sequence(Parser* p) {
+ int paren_depth = 0;
+ int bracket_depth = 0;
+ int brace_depth = 0;
+ while (1) {
+ Token* tok = peek_token(p);
+ if (tok->kind == TokenKind_paren_l) {
+ paren_depth++;
+ next_token(p);
+ } else if (tok->kind == TokenKind_paren_r) {
+ if (paren_depth == 0)
+ break;
+ paren_depth--;
+ next_token(p);
+ } else if (tok->kind == TokenKind_bracket_l) {
+ bracket_depth++;
+ next_token(p);
+ } else if (tok->kind == TokenKind_bracket_r) {
+ if (bracket_depth == 0)
+ break;
+ bracket_depth--;
+ next_token(p);
+ } else if (tok->kind == TokenKind_brace_l) {
+ brace_depth++;
+ next_token(p);
+ } else if (tok->kind == TokenKind_brace_r) {
+ if (brace_depth == 0)
+ break;
+ brace_depth--;
+ next_token(p);
+ } else if (tok->kind == TokenKind_eof) {
+ break;
+ } else {
+ next_token(p);
+ }
+ }
+ unimplemented();
+ return NULL;
+}
+
+// unlabaled-stmt:
+// attribute-specifier-sequence? compound-stmt
+// attribute-specifier-sequence? if-stmt
+// attribute-specifier-sequence? switch-stmt
+// attribute-specifier-sequence? while-stmt
+// attribute-specifier-sequence? do-while-stmt
+// attribute-specifier-sequence? for-stmt
+// attribute-specifier-sequence? goto-stmt
+// attribute-specifier-sequence? continue-stmt
+// attribute-specifier-sequence? break-stmt
+// attribute-specifier-sequence? return-stmt
+// attribute-specifier-sequence? expr-stmt
+static AstNode* parse_unlabaled_stmt(Parser* p) {
+ switch (peek_token(p)->kind) {
+ case TokenKind_brace_l:
+ return parse_compound_stmt(p);
+ case TokenKind_keyword_if:
+ return parse_if_stmt(p);
+ case TokenKind_keyword_switch:
+ return parse_switch_stmt(p);
+ case TokenKind_keyword_while:
+ return parse_while_stmt(p);
+ case TokenKind_keyword_do:
+ return parse_do_while_stmt(p);
+ case TokenKind_keyword_for:
+ return parse_for_stmt(p);
+ case TokenKind_keyword_goto:
+ return parse_goto_stmt(p);
+ case TokenKind_keyword_continue:
+ return parse_continue_stmt(p);
+ case TokenKind_keyword_break:
+ return parse_break_stmt(p);
+ case TokenKind_keyword_return:
+ return parse_return_stmt(p);
+ default:
+ return parse_expr_stmt(p);
+ }
+}
+
+// label:
+// TODO attribute-specifier-sequence? identifier ':'
+// TODO attribute-specifier-sequence? 'case' constant-expr ':'
+// TODO attribute-specifier-sequence? 'default' ':'
+static AstNode* parse_label(Parser* p) {
+ SourceLocation loc = current_location(p);
+ Token* t = peek_token(p);
+
+ AstNode* node;
+ if (t->kind == TokenKind_keyword_case) {
+ if (!p->current_switch) {
+ fatal_error("%s:%d: 'case' label not within a switch statement", t->loc.filename, t->loc.line);
+ }
+ expect(p, TokenKind_keyword_case);
+ AstNode* value = parse_constant_expr(p);
+ expect(p, TokenKind_colon);
+ node = ast_new_case_label(eval(value), ast_new_nop());
+ } else if (t->kind == TokenKind_keyword_default) {
+ if (!p->current_switch) {
+ fatal_error("%s:%d: 'default' label not within a switch statement", t->loc.filename, t->loc.line);
+ }
+ expect(p, TokenKind_keyword_default);
+ expect(p, TokenKind_colon);
+ node = ast_new_default_label(ast_new_nop());
+ } else {
+ // identifier ':'
+ Token* label_token = expect(p, TokenKind_ident);
+ expect(p, TokenKind_colon);
+ node = ast_new_label_stmt(label_token->value.string, ast_new_nop());
+ }
+ node->loc = loc;
+ return node;
+}
+
+static bool is_label_token(Parser* p) {
+ Token* t = peek_token(p);
+ return t->kind == TokenKind_keyword_case || t->kind == TokenKind_keyword_default ||
+ (t->kind == TokenKind_ident && peek_token2(p)->kind == TokenKind_colon);
+}
+
+// stmt:
+// label stmt
+// unlabaled-stmt
+static AstNode* parse_stmt(Parser* p) {
+ if (is_label_token(p)) {
+ AstNode* label = parse_label(p);
+ AstNode* stmt = parse_stmt(p);
+ switch (label->kind) {
+ case AstNodeKind_case_label:
+ label->as.case_label->body = stmt;
+ break;
+ case AstNodeKind_default_label:
+ label->as.default_label->body = stmt;
+ break;
+ case AstNodeKind_label_stmt:
+ label->as.label_stmt->body = stmt;
+ break;
+ default:
+ unreachable();
+ break;
+ }
+ return label;
+ }
+ return parse_unlabaled_stmt(p);
+}
+
+// compound-stmt:
+// '{' { block-item }* '}'
+static AstNode* parse_compound_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ AstNode* list = ast_new_list(4);
+ expect(p, TokenKind_brace_l);
+ enter_scope(p);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* stmt = parse_block_item(p);
+ ast_append(list, stmt);
+ }
+ leave_scope(p);
+ expect(p, TokenKind_brace_r);
+ list->loc = loc;
+ return list;
+}
+
+// block-item:
+// declaration
+// unlabaled-stmt
+// label
+static AstNode* parse_block_item(Parser* p) {
+ Token* t = peek_token(p);
+
+ if (t->kind == TokenKind_keyword_static_assert || is_type_token(p, t)) {
+ return parse_declaration(p);
+ } else if (is_label_token(p)) {
+ return parse_label(p);
+ } else {
+ return parse_unlabaled_stmt(p);
+ }
+}
+
+// expr-stmt:
+// TODO attribute-specifier-sequence? expr? ';'
+static AstNode* parse_expr_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ AstNode* node = ast_new_nop();
+ node->loc = loc;
+ return node;
+ }
+
+ AstNode* e = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* node = ast_new_expr_stmt(e);
+ node->loc = loc;
+ return node;
+}
+
+// if-stmt:
+// 'if' '(' expr ')' stmt ( 'else' stmt )?
+static AstNode* parse_if_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_if);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ AstNode* then_body = parse_stmt(p);
+ AstNode* else_body = NULL;
+ if (consume_token_if(p, TokenKind_keyword_else)) {
+ else_body = parse_stmt(p);
+ }
+
+ AstNode* node = ast_new_if_stmt(cond, then_body, else_body);
+ node->loc = loc;
+ return node;
+}
+
+// switch-stmt:
+// 'switch' '(' expr ')' stmt
+static AstNode* parse_switch_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_switch);
+ expect(p, TokenKind_paren_l);
+ AstNode* expr = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+
+ AstNode* tmp_var = generate_temporary_lvar(p, expr->ty);
+ AstNode* assignment = ast_new_assign_expr(TokenKind_assign, tmp_var, expr);
+ AstNode* assign_stmt = ast_new_expr_stmt(assignment);
+ assign_stmt->loc = loc;
+
+ AstNode* switch_stmt = ast_new_switch_stmt(tmp_var);
+ switch_stmt->loc = loc;
+
+ AstNode* prev_switch = p->current_switch;
+ p->current_switch = switch_stmt;
+ switch_stmt->as.switch_stmt->body = parse_stmt(p);
+ p->current_switch = prev_switch;
+
+ AstNode* list = ast_new_list(2);
+ list->loc = loc;
+ ast_append(list, assign_stmt);
+ ast_append(list, switch_stmt);
+ return list;
+}
+
+// while-stmt:
+// 'while' '(' expr ')' stmt
+static AstNode* parse_while_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_while);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ AstNode* body = parse_stmt(p);
+ AstNode* node = ast_new_for_stmt(NULL, cond, NULL, body);
+ node->loc = loc;
+ return node;
+}
+
+// do-while-stmt:
+// 'do' stmt 'while' '(' expr ')' ';'
+static AstNode* parse_do_while_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_do);
+ AstNode* body = parse_stmt(p);
+ expect(p, TokenKind_keyword_while);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ expect(p, TokenKind_semicolon);
+
+ AstNode* node = ast_new_do_while_stmt(cond, body);
+ node->loc = loc;
+ return node;
+}
+
+// for-stmt:
+// 'for' '(' expr? ';' expr? ';' expr? ')' stmt
+// 'for' '(' declaration expr? ';' expr? ')' stmt
+static AstNode* parse_for_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_for);
+ expect(p, TokenKind_paren_l);
+ AstNode* init = NULL;
+ AstNode* cond = NULL;
+ AstNode* update = NULL;
+ enter_scope(p);
+ if (peek_token(p)->kind != TokenKind_semicolon) {
+ if (is_type_token(p, peek_token(p))) {
+ AstNode* decls = parse_declaration(p);
+ AstNode* initializers = ast_new_list(1);
+ for (int i = 0; i < decls->as.list->len; i++) {
+ AstNode* item = &decls->as.list->items[i];
+ if (item->kind == AstNodeKind_expr_stmt) {
+ ast_append(initializers, item->as.expr_stmt->expr);
+ }
+ }
+ init = initializers;
+ } else {
+ init = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+ }
+ } else {
+ expect(p, TokenKind_semicolon);
+ }
+ if (peek_token(p)->kind != TokenKind_semicolon) {
+ cond = parse_expr(p);
+ } else {
+ cond = ast_new_int(1);
+ }
+ expect(p, TokenKind_semicolon);
+ if (peek_token(p)->kind != TokenKind_paren_r) {
+ update = parse_expr(p);
+ }
+ expect(p, TokenKind_paren_r);
+ AstNode* body = parse_stmt(p);
+ leave_scope(p);
+ AstNode* node = ast_new_for_stmt(init, cond, update, body);
+ node->loc = loc;
+ return node;
+}
+
+// goto-stmt:
+// 'goto' identifier ';'
+static AstNode* parse_goto_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_goto);
+ Token* label_token = expect(p, TokenKind_ident);
+ expect(p, TokenKind_semicolon);
+ AstNode* node = ast_new_goto_stmt(label_token->value.string);
+ node->loc = loc;
+ return node;
+}
+
+// continue-stmt:
+// 'continue' ';'
+static AstNode* parse_continue_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_continue);
+ expect(p, TokenKind_semicolon);
+ AstNode* node = ast_new_continue_stmt();
+ node->loc = loc;
+ return node;
+}
+
+// break-stmt:
+// 'break' ';'
+static AstNode* parse_break_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_break);
+ expect(p, TokenKind_semicolon);
+ AstNode* node = ast_new_break_stmt();
+ node->loc = loc;
+ return node;
+}
+
+// return-stmt:
+// 'return' expr? ';'
+static AstNode* parse_return_stmt(Parser* p) {
+ SourceLocation loc = current_location(p);
+ expect(p, TokenKind_keyword_return);
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ AstNode* node = ast_new_return_stmt(NULL);
+ node->loc = loc;
+ return node;
+ }
+
+ AstNode* expr = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* node = ast_new_return_stmt(expr);
+ node->loc = loc;
+ return node;
+}
+
+// static_assert-declaration:
+// 'static_assert' '(' constant-expr ( ',' string-literal )? ')' ';'
+static AstNode* parse_static_assert_declaration(Parser* p) {
+ expect(p, TokenKind_keyword_static_assert);
+ expect(p, TokenKind_paren_l);
+ AstNode* predicate = parse_constant_expr(p);
+ Token* message = NULL;
+ if (consume_token_if(p, TokenKind_comma)) {
+ message = expect(p, TokenKind_literal_str);
+ }
+ // TODO: validate the assertion.
+ (void)predicate;
+ (void)message;
+ expect(p, TokenKind_paren_r);
+ expect(p, TokenKind_semicolon);
+ return NULL;
+}
+
+// external-declaration:
+// static_assert-declaration
+// TODO attribute-specifier-sequence ';'
+// TODO attribute-specifier-sequence function-definition-or-declaration-rest
+// function-definition-or-declaration-rest
+//
+// function-definition-or-declaration-rest:
+// declaration-specifiers init-declarator-list ';'
+// declaration-specifiers init-declarator-list compound-stmt
+static AstNode* parse_external_declaration(Parser* p) {
+ if (peek_token(p)->kind == TokenKind_keyword_static_assert) {
+ return parse_static_assert_declaration(p);
+ }
+
+ Type* ty = parse_declaration_specifiers(p);
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ // Type declaration.
+ return NULL;
+ }
+
+ AstNode* decls = parse_init_declarator_list(p, ty);
+
+ if (peek_token(p)->kind == TokenKind_brace_l) {
+ return parse_function_definition(p, decls);
+ }
+
+ expect(p, TokenKind_semicolon);
+ if (ty->storage_class == StorageClass_typedef) {
+ process_typedefs(p, decls);
+ return NULL;
+ } else {
+ process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]);
+ return decls;
+ }
+}
+
+// translation-unit:
+// { external-declaration }+
+Program* parse(TokenArray* tokens) {
+ Parser* p = parser_new(tokens);
+ AstNode* funcs = ast_new_list(32);
+ AstNode* vars = ast_new_list(16);
+ while (eof(p)) {
+ AstNode* n = parse_external_declaration(p);
+ if (!n)
+ continue;
+ if (n->kind == AstNodeKind_func_def) {
+ ast_append(funcs, n);
+ } else if (n->kind == AstNodeKind_gvar_decl) {
+ ast_append(vars, n);
+ } else if (n->kind == AstNodeKind_list) {
+ for (int i = 0; i < n->as.list->len; ++i) {
+ if (n->as.list->items[i].kind == AstNodeKind_gvar_decl)
+ ast_append(vars, &n->as.list->items[i]);
+ }
+ }
+ }
+ Program* prog = calloc(1, sizeof(Program));
+ prog->funcs = funcs;
+ prog->vars = vars;
+ prog->str_literals = p->str_literals.data;
+ return prog;
+}
+
+static int eval_binary_expr(int op, int v1, int v2) {
+ if (op == TokenKind_andand) {
+ return v1 && v2;
+ } else if (op == TokenKind_oror) {
+ return v1 || v2;
+ } else if (op == TokenKind_plus) {
+ return v1 + v2;
+ } else if (op == TokenKind_minus) {
+ return v1 - v2;
+ } else if (op == TokenKind_star) {
+ return v1 * v2;
+ } else if (op == TokenKind_slash) {
+ if (v2 == 0) {
+ fatal_error("eval: division by zero");
+ }
+ return v1 / v2;
+ } else if (op == TokenKind_percent) {
+ if (v2 == 0) {
+ fatal_error("eval: division by zero");
+ }
+ return v1 % v2;
+ } else if (op == TokenKind_eq) {
+ return v1 == v2;
+ } else if (op == TokenKind_ne) {
+ return v1 != v2;
+ } else if (op == TokenKind_lt) {
+ return v1 < v2;
+ } else if (op == TokenKind_le) {
+ return v1 <= v2;
+ } else if (op == TokenKind_lshift) {
+ return v1 << v2;
+ } else if (op == TokenKind_rshift) {
+ return v1 >> v2;
+ } else if (op == TokenKind_and) {
+ return v1 & v2;
+ } else if (op == TokenKind_or) {
+ return v1 | v2;
+ } else if (op == TokenKind_xor) {
+ return v1 ^ v2;
+ } else {
+ unimplemented();
+ }
+}
+
+static int eval(AstNode* e) {
+ if (e->kind == AstNodeKind_int_expr) {
+ return e->as.int_expr->value;
+ } else if (e->kind == AstNodeKind_unary_expr) {
+ int v = eval(e->as.unary_expr->operand);
+ if (e->as.unary_expr->op == TokenKind_not) {
+ return !v;
+ } else if (e->as.unary_expr->op == TokenKind_minus) {
+ return -v;
+ } else {
+ unimplemented();
+ }
+ } else if (e->kind == AstNodeKind_binary_expr) {
+ int v1 = eval(e->as.binary_expr->lhs);
+ int v2 = eval(e->as.binary_expr->rhs);
+ return eval_binary_expr(e->as.binary_expr->op, v1, v2);
+ } else if (e->kind == AstNodeKind_logical_expr) {
+ int v1 = eval(e->as.logical_expr->lhs);
+ int v2 = eval(e->as.logical_expr->rhs);
+ return eval_binary_expr(e->as.logical_expr->op, v1, v2);
+ } else if (e->kind == AstNodeKind_cond_expr) {
+ int cond = eval(e->as.cond_expr->cond);
+ if (cond) {
+ return eval(e->as.cond_expr->then);
+ } else {
+ return eval(e->as.cond_expr->else_);
+ }
+ } else if (e->kind == AstNodeKind_cast_expr) {
+ return eval(e->as.cast_expr->operand);
+ } else if (e->kind == AstNodeKind_deref_expr) {
+ return eval(e->as.deref_expr->operand);
+ } else if (e->kind == AstNodeKind_ref_expr) {
+ return eval(e->as.ref_expr->operand);
+ } else {
+ unimplemented();
+ }
+}
+
+bool pp_eval_constant_expr(TokenArray* pp_tokens) {
+ TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens);
+ Parser* p = parser_new(tokens);
+ AstNode* e = parse_constant_expr(p);
+ return eval(e) != 0;
+}
+
+static InitData* initdata_new() {
+ InitData* init_data = calloc(1, sizeof(InitData));
+ init_data->len = 0;
+ init_data->capacity = 1;
+ init_data->blocks = calloc(init_data->capacity, sizeof(InitDataBlock));
+ return init_data;
+}
+
+static void initdata_reserve(InitData* init_data, size_t size) {
+ if (size <= init_data->capacity)
+ return;
+ while (init_data->capacity < size) {
+ init_data->capacity *= 2;
+ }
+ init_data->blocks = realloc(init_data->blocks, init_data->capacity * sizeof(InitDataBlock));
+ memset(init_data->blocks + init_data->len, 0, (init_data->capacity - init_data->len) * sizeof(InitDataBlock));
+}
+
+static InitDataBlock* initdata_push_new(InitData* init_data) {
+ initdata_reserve(init_data, init_data->len + 1);
+ return &init_data->blocks[init_data->len++];
+}
+
+static void initdata_append_addr(InitData* buf, const char* label) {
+ InitDataBlock* block = initdata_push_new(buf);
+ block->kind = InitDataBlockKind_addr;
+ block->as.addr.label = label;
+}
+
+static void initdata_append_zeros(InitData* buf, size_t len) {
+ if (len == 0)
+ return;
+ char* data = calloc(len, sizeof(char));
+ InitDataBlock* block = initdata_push_new(buf);
+ block->kind = InitDataBlockKind_bytes;
+ block->as.bytes.len = len;
+ block->as.bytes.buf = data;
+}
+
+static void initdata_append_bytes(InitData* buf, const char* data, size_t len) {
+ char* copy = calloc(len, sizeof(char));
+ memcpy(copy, data, len);
+ InitDataBlock* block = initdata_push_new(buf);
+ block->kind = InitDataBlockKind_bytes;
+ block->as.bytes.len = len;
+ block->as.bytes.buf = copy;
+}
+
+static void do_eval_init_expr(InitData* buf, AstNode* expr, Type* ty) {
+ if (expr->kind == AstNodeKind_array_initializer) {
+ AstNode* list = expr->as.array_initializer->list;
+ if (ty->kind == TypeKind_array) {
+ for (int i = 0; i < list->as.list->len; ++i) {
+ do_eval_init_expr(buf, &list->as.list->items[i], ty->base);
+ }
+ } else if (ty->kind == TypeKind_struct) {
+ AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index];
+ AstNode* members = def->as.struct_def->members;
+ int offset = 0;
+ for (int i = 0; i < list->as.list->len; ++i) {
+ AstNode* member = &members->as.list->items[i];
+ int align = type_alignof(member->ty);
+ int aligned_offset = to_aligned(offset, align);
+ initdata_append_zeros(buf, aligned_offset - offset); // padding
+ offset = aligned_offset;
+ do_eval_init_expr(buf, &list->as.list->items[i], member->ty);
+ offset += type_sizeof(member->ty);
+ }
+ int total = type_sizeof(ty);
+ initdata_append_zeros(buf, total - offset); // padding
+ } else if (ty->kind == TypeKind_union) {
+ AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index];
+ AstNode* members = def->as.union_def->members;
+ AstNode* member = &members->as.list->items[0];
+ do_eval_init_expr(buf, &list->as.list->items[0], member->ty);
+ int member_size = type_sizeof(member->ty);
+ int total = type_sizeof(ty);
+ initdata_append_zeros(buf, total - member_size); // padding
+ } else {
+ unimplemented();
+ }
+ } else if (ty->kind == TypeKind_ptr) {
+ if (expr->kind == AstNodeKind_str_expr) {
+ char label[32];
+ sprintf(label, ".Lstr__%d", expr->as.str_expr->idx);
+ initdata_append_addr(buf, strdup(label));
+ } else if (expr->kind == AstNodeKind_ref_expr) {
+ if (expr->as.ref_expr->operand->kind != AstNodeKind_gvar) {
+ unimplemented();
+ }
+ initdata_append_addr(buf, expr->as.ref_expr->operand->as.gvar->name);
+ } else if (expr->kind == AstNodeKind_gvar) {
+ initdata_append_addr(buf, expr->as.gvar->name);
+ } else if (expr->kind == AstNodeKind_func) {
+ initdata_append_addr(buf, expr->as.func->name);
+ } else if (expr->kind == AstNodeKind_int_expr && expr->as.int_expr->value == 0) {
+ initdata_append_zeros(buf, type_sizeof(ty));
+ } else {
+ unimplemented();
+ }
+ } else {
+ int value = eval(expr);
+ int size = type_sizeof(ty);
+ char bytes[8];
+ for (int i = 0; i < size; ++i) {
+ bytes[i] = (value >> (i * 8)) & 0xff;
+ }
+ initdata_append_bytes(buf, bytes, size);
+ }
+}
+
+InitData* eval_init_expr(AstNode* expr, Type* ty) {
+ InitData* buf = initdata_new();
+ do_eval_init_expr(buf, expr, ty);
+ return buf;
+}
diff --git a/src/cc1/parse.h b/src/cc1/parse.h
new file mode 100644
index 0000000..f229b77
--- /dev/null
+++ b/src/cc1/parse.h
@@ -0,0 +1,43 @@
+#ifndef DUCC_PARSE_H
+#define DUCC_PARSE_H
+
+#include "../lib/common.h"
+#include "ast.h"
+#include "preprocess.h"
+
+Program* parse(TokenArray* tokens);
+bool pp_eval_constant_expr(TokenArray* pp_tokens);
+
+typedef enum {
+ InitDataBlockKind_addr,
+ InitDataBlockKind_bytes,
+} InitDataBlockKind;
+
+// Static address to global variable or ROM area.
+typedef struct {
+ const char* label;
+} InitDataBlockAddr;
+
+// Static byte array.
+typedef struct {
+ size_t len;
+ const char* buf;
+} InitDataBlockBytes;
+
+typedef struct {
+ InitDataBlockKind kind;
+ union {
+ InitDataBlockAddr addr;
+ InitDataBlockBytes bytes;
+ } as;
+} InitDataBlock;
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ InitDataBlock* blocks;
+} InitData;
+
+InitData* eval_init_expr(AstNode* expr, Type* ty);
+
+#endif
diff --git a/src/cc1/preprocess.c b/src/cc1/preprocess.c
new file mode 100644
index 0000000..d050119
--- /dev/null
+++ b/src/cc1/preprocess.c
@@ -0,0 +1,1559 @@
+#include "preprocess.h"
+#include <libgen.h>
+#include <unistd.h>
+#include "../lib/common.h"
+#include "parse.h"
+#include "sys.h"
+#include "tokenize.h"
+
+typedef enum {
+ MacroKind_undef,
+ MacroKind_obj,
+ MacroKind_func,
+ MacroKind_builtin_file,
+ MacroKind_builtin_line,
+} MacroKind;
+
+const char* macro_kind_stringify(MacroKind kind) {
+ if (kind == MacroKind_undef)
+ return "undef";
+ else if (kind == MacroKind_obj)
+ return "object-like";
+ else if (kind == MacroKind_func)
+ return "function-like";
+ else if (kind == MacroKind_builtin_file)
+ return "__FILE__";
+ else if (kind == MacroKind_builtin_line)
+ return "__LINE__";
+ else
+ unreachable();
+}
+
+typedef struct {
+ MacroKind kind;
+ const char* name;
+ TokenArray parameters;
+ TokenArray replacements;
+} Macro;
+
+void macro_build_json(JsonBuilder* builder, Macro* macro) {
+ jsonbuilder_object_start(builder);
+
+ jsonbuilder_object_member_start(builder, "kind");
+ jsonbuilder_string(builder, macro_kind_stringify(macro->kind));
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "name");
+ jsonbuilder_string(builder, macro->name);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "parameters");
+ tokens_build_json(builder, &macro->parameters);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "replacements");
+ tokens_build_json(builder, &macro->replacements);
+ jsonbuilder_object_member_end(builder);
+
+ jsonbuilder_object_end(builder);
+}
+
+static int macro_find_param(Macro* macro, Token* tok) {
+ if (tok->kind != TokenKind_ident)
+ return -1;
+
+ for (size_t i = 0; i < macro->parameters.len; ++i) {
+ if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ Macro* data;
+} MacroArray;
+
+static MacroArray* macros_new() {
+ MacroArray* macros = calloc(1, sizeof(MacroArray));
+ macros->len = 0;
+ macros->capacity = 8;
+ macros->data = calloc(macros->capacity, sizeof(Macro));
+ return macros;
+}
+
+static void macros_reserve(MacroArray* macros, size_t size) {
+ if (size <= macros->capacity)
+ return;
+ while (macros->capacity < size) {
+ macros->capacity *= 2;
+ }
+ macros->data = realloc(macros->data, macros->capacity * sizeof(Macro));
+ memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro));
+}
+
+static Macro* macros_push_new(MacroArray* macros) {
+ macros_reserve(macros, macros->len + 1);
+ return &macros->data[macros->len++];
+}
+
+static void define_macro_to_number(MacroArray* macros, const char* name, int n) {
+ Macro* m = macros_push_new(macros);
+ m->kind = MacroKind_obj;
+ m->name = name;
+ tokens_init(&m->replacements, 1);
+ Token* tok = tokens_push_new(&m->replacements);
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = n;
+}
+
+static void add_predefined_macros(MacroArray* macros) {
+ Macro* m;
+
+ m = macros_push_new(macros);
+ m->kind = MacroKind_builtin_file;
+ m->name = "__FILE__";
+
+ m = macros_push_new(macros);
+ m->kind = MacroKind_builtin_line;
+ m->name = "__LINE__";
+
+ // Non-standard pre-defined macros.
+ define_macro_to_number(macros, "__ducc__", 1);
+ define_macro_to_number(macros, "__x86_64__", 1);
+ define_macro_to_number(macros, "__LP64__", 1);
+
+ // GCC's predefined macros. Glibc depends on these macros.
+ // https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html
+ // TODO: uncomment out __LONG_MAX__, etc. once ducc supports 64-bit integer literals.
+ define_macro_to_number(macros, "__CHAR_BIT__", __CHAR_BIT__);
+ define_macro_to_number(macros, "__SCHAR_MAX__", __SCHAR_MAX__);
+ define_macro_to_number(macros, "__WCHAR_MAX__", __WCHAR_MAX__);
+ define_macro_to_number(macros, "__SHRT_MAX__", __SHRT_MAX__);
+ define_macro_to_number(macros, "__INT_MAX__", __INT_MAX__);
+ // define_macro_to_number(macros, "__LONG_MAX__", __LONG_MAX__);
+ // define_macro_to_number(macros, "__LONG_LONG_MAX__", __LONG_LONG_MAX__);
+ define_macro_to_number(macros, "__WINT_MAX__", __WINT_MAX__);
+ // define_macro_to_number(macros, "__SIZE_MAX__", __SIZE_MAX__);
+ // define_macro_to_number(macros, "__PTRDIFF_MAX__", __PTRDIFF_MAX__);
+ // define_macro_to_number(macros, "__INTMAX_MAX__", __INTMAX_MAX__);
+ // define_macro_to_number(macros, "__UINTMAX_MAX__", __UINTMAX_MAX__);
+ define_macro_to_number(macros, "__SIG_ATOMIC_MAX__", __SIG_ATOMIC_MAX__);
+ define_macro_to_number(macros, "__INT8_MAX__", __INT8_MAX__);
+ define_macro_to_number(macros, "__INT16_MAX__", __INT16_MAX__);
+ define_macro_to_number(macros, "__INT32_MAX__", __INT32_MAX__);
+ // define_macro_to_number(macros, "__INT64_MAX__", __INT64_MAX__);
+ define_macro_to_number(macros, "__UINT8_MAX__", __UINT8_MAX__);
+ define_macro_to_number(macros, "__UINT16_MAX__", __UINT16_MAX__);
+ define_macro_to_number(macros, "__UINT32_MAX__", __UINT32_MAX__);
+ // define_macro_to_number(macros, "__UINT64_MAX__", __UINT64_MAX__);
+ define_macro_to_number(macros, "__INT_LEAST8_MAX__", __INT_LEAST8_MAX__);
+ define_macro_to_number(macros, "__INT_LEAST16_MAX__", __INT_LEAST16_MAX__);
+ define_macro_to_number(macros, "__INT_LEAST32_MAX__", __INT_LEAST32_MAX__);
+ // define_macro_to_number(macros, "__INT_LEAST64_MAX__", __INT_LEAST64_MAX__);
+ define_macro_to_number(macros, "__UINT_LEAST8_MAX__", __UINT_LEAST8_MAX__);
+ define_macro_to_number(macros, "__UINT_LEAST16_MAX__", __UINT_LEAST16_MAX__);
+ define_macro_to_number(macros, "__UINT_LEAST32_MAX__", __UINT_LEAST32_MAX__);
+ // define_macro_to_number(macros, "__UINT_LEAST64_MAX__", __UINT_LEAST64_MAX__);
+ define_macro_to_number(macros, "__INT_FAST8_MAX__", __INT_FAST8_MAX__);
+ // define_macro_to_number(macros, "__INT_FAST16_MAX__", __INT_FAST16_MAX__);
+ // define_macro_to_number(macros, "__INT_FAST32_MAX__", __INT_FAST32_MAX__);
+ // define_macro_to_number(macros, "__INT_FAST64_MAX__", __INT_FAST64_MAX__);
+ define_macro_to_number(macros, "__UINT_FAST8_MAX__", __UINT_FAST8_MAX__);
+ // define_macro_to_number(macros, "__UINT_FAST16_MAX__", __UINT_FAST16_MAX__);
+ // define_macro_to_number(macros, "__UINT_FAST32_MAX__", __UINT_FAST32_MAX__);
+ // define_macro_to_number(macros, "__UINT_FAST64_MAX__", __UINT_FAST64_MAX__);
+ // define_macro_to_number(macros, "__INTPTR_MAX__", __INTPTR_MAX__);
+ // define_macro_to_number(macros, "__UINTPTR_MAX__", __UINTPTR_MAX__);
+ define_macro_to_number(macros, "__WCHAR_MIN__", __WCHAR_MIN__);
+ define_macro_to_number(macros, "__WINT_MIN__", __WINT_MIN__);
+ define_macro_to_number(macros, "__SIG_ATOMIC_MIN__", __SIG_ATOMIC_MIN__);
+
+ // GCC's predefined macros not listed in Common Predefined Macros page.
+ define_macro_to_number(macros, "__DBL_DIG__", __DBL_DIG__);
+ define_macro_to_number(macros, "__DBL_MANT_DIG__", __DBL_MANT_DIG__);
+ define_macro_to_number(macros, "__DBL_MAX_10_EXP__", __DBL_MAX_10_EXP__);
+ define_macro_to_number(macros, "__FLT_DIG__", __FLT_DIG__);
+ define_macro_to_number(macros, "__FLT_MANT_DIG__", __FLT_MANT_DIG__);
+ define_macro_to_number(macros, "__FLT_MAX_10_EXP__", __FLT_MAX_10_EXP__);
+ define_macro_to_number(macros, "__LDBL_DIG__", __LDBL_DIG__);
+ define_macro_to_number(macros, "__LDBL_MANT_DIG__", __LDBL_MANT_DIG__);
+ define_macro_to_number(macros, "__LDBL_MAX_10_EXP__", __LDBL_MAX_10_EXP__);
+}
+
+// Accept "FOO" or "FOO=value"
+static void define_macro_from_string(MacroArray* macros, const char* def) {
+ Macro* m = macros_push_new(macros);
+ m->kind = MacroKind_obj;
+
+ const char* eq = strchr(def, '=');
+ if (eq) {
+ // FOO=value format
+ size_t name_len = eq - def;
+ char* name = calloc(name_len + 1, sizeof(char));
+ memcpy(name, def, name_len);
+ m->name = name;
+
+ const char* value = eq + 1;
+ tokens_init(&m->replacements, 1);
+ Token* tok = tokens_push_new(&m->replacements);
+
+ // Try to parse as integer
+ char* num_end;
+ long int_val = strtol(value, &num_end, 10);
+ if (value[0] != '\0' && *num_end == '\0') {
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = int_val;
+ } else {
+ tok->kind = TokenKind_ident;
+ tok->value.string = value;
+ }
+ } else {
+ // FOO format (equivalent to FOO=1)
+ m->name = def;
+ tokens_init(&m->replacements, 1);
+ Token* tok = tokens_push_new(&m->replacements);
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = 1;
+ }
+}
+
+static void add_user_defines(MacroArray* macros, StrArray* user_defines) {
+ for (size_t i = 0; i < user_defines->len; ++i) {
+ define_macro_from_string(macros, user_defines->data[i]);
+ }
+}
+
+void macros_build_json(JsonBuilder* builder, MacroArray* macros) {
+ jsonbuilder_array_start(builder);
+ for (size_t i = 0; i < macros->len; ++i) {
+ jsonbuilder_array_element_start(builder);
+ macro_build_json(builder, &macros->data[i]);
+ jsonbuilder_array_element_end(builder);
+ }
+ jsonbuilder_array_end(builder);
+}
+
+typedef struct {
+ TokenArray tokens;
+} MacroArg;
+
+void macroarg_build_json(JsonBuilder* builder, MacroArg* arg) {
+ jsonbuilder_object_start(builder);
+ jsonbuilder_object_member_start(builder, "tokens");
+ tokens_build_json(builder, &arg->tokens);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_end(builder);
+}
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ MacroArg* data;
+} MacroArgArray;
+
+static MacroArgArray* macroargs_new() {
+ MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray));
+ macroargs->len = 0;
+ macroargs->capacity = 2;
+ macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg));
+ return macroargs;
+}
+
+static void macroargs_reserve(MacroArgArray* macroargs, size_t size) {
+ if (size <= macroargs->capacity)
+ return;
+ while (macroargs->capacity < size) {
+ macroargs->capacity *= 2;
+ }
+ macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg));
+ memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg));
+}
+
+static MacroArg* macroargs_push_new(MacroArgArray* macroargs) {
+ macroargs_reserve(macroargs, macroargs->len + 1);
+ return &macroargs->data[macroargs->len++];
+}
+
+void macroargs_build_json(JsonBuilder* builder, MacroArgArray* macroargs) {
+ jsonbuilder_object_start(builder);
+ jsonbuilder_object_member_start(builder, "len");
+ jsonbuilder_integer(builder, macroargs->len);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "data");
+ jsonbuilder_array_start(builder);
+ for (size_t i = 0; i < macroargs->len; ++i) {
+ jsonbuilder_array_element_start(builder);
+ macroarg_build_json(builder, &macroargs->data[i]);
+ jsonbuilder_array_element_end(builder);
+ }
+ jsonbuilder_array_end(builder);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_end(builder);
+}
+
+typedef struct {
+ TokenArray* pp_tokens;
+ int pos;
+ MacroArray* macros;
+ int include_depth;
+ StrArray* include_paths;
+ StrArray* included_files;
+ bool generate_system_deps;
+ bool generate_user_deps;
+} Preprocessor;
+
+static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths,
+ StrArray* included_files, bool generate_system_deps, bool generate_user_deps);
+
+static Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros,
+ StrArray* include_paths, StrArray* included_files, bool generate_system_deps,
+ bool generate_user_deps) {
+ if (include_depth >= 32) {
+ fatal_error("include depth limit exceeded");
+ }
+
+ Preprocessor* pp = calloc(1, sizeof(Preprocessor));
+ pp->pp_tokens = pp_tokens;
+ pp->macros = macros;
+ pp->include_depth = include_depth;
+ pp->include_paths = include_paths;
+ pp->included_files = included_files;
+ pp->generate_system_deps = generate_system_deps;
+ pp->generate_user_deps = generate_user_deps;
+
+ return pp;
+}
+
+static Token* pp_token_at(Preprocessor* pp, int i) {
+ return &pp->pp_tokens->data[i];
+}
+
+static Token* peek_pp_token(Preprocessor* pp) {
+ return pp_token_at(pp, pp->pos);
+}
+
+static Token* next_pp_token(Preprocessor* pp) {
+ return pp_token_at(pp, pp->pos++);
+}
+
+static void skip_pp_token(Preprocessor* pp, TokenKind expected) {
+ Token* tok = next_pp_token(pp);
+ assert(tok->kind == expected);
+}
+
+static Token* consume_pp_token_if(Preprocessor* pp, TokenKind expected) {
+ if (peek_pp_token(pp)->kind == expected) {
+ return next_pp_token(pp);
+ } else {
+ return NULL;
+ }
+}
+
+static Token* consume_pp_token_if_not(Preprocessor* pp, TokenKind non_expected) {
+ if (peek_pp_token(pp)->kind == non_expected) {
+ return NULL;
+ } else {
+ return next_pp_token(pp);
+ }
+}
+
+static Token* expect_pp_token(Preprocessor* pp, TokenKind expected) {
+ Token* tok = next_pp_token(pp);
+ if (tok->kind == expected) {
+ return tok;
+ }
+ fatal_error("%s:%d: expected '%s', but got '%s'", tok->loc.filename, tok->loc.line, token_kind_stringify(expected),
+ token_stringify(tok));
+}
+
+static bool pp_eof(Preprocessor* pp) {
+ return peek_pp_token(pp)->kind == TokenKind_eof;
+}
+
+static int find_macro(Preprocessor* pp, const char* name) {
+ for (size_t i = 0; i < pp->macros->len; ++i) {
+ if (pp->macros->data[i].kind == MacroKind_undef)
+ continue;
+ if (strcmp(pp->macros->data[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void undef_macro(Preprocessor* pp, int idx) {
+ pp->macros->data[idx].kind = MacroKind_undef;
+ // TODO: Can predefined macro like __FILE__ be undefined?
+}
+
+static void skip_whitespaces(Preprocessor* pp) {
+ while (!pp_eof(pp) && consume_pp_token_if(pp, TokenKind_whitespace))
+ ;
+}
+
+static void skip_whitespaces_or_newlines(Preprocessor* pp, bool skip_newline) {
+ while (!pp_eof(pp) && (consume_pp_token_if(pp, TokenKind_whitespace) ||
+ (skip_newline && consume_pp_token_if(pp, TokenKind_newline))))
+ ;
+}
+
+// It will not consume a new-line token.
+static void seek_to_next_newline(Preprocessor* pp) {
+ while (!pp_eof(pp) && consume_pp_token_if_not(pp, TokenKind_newline))
+ ;
+}
+
+static void make_tokens_removed(Preprocessor* pp, int start, int end) {
+ for (int i = start; i < end; ++i) {
+ Token* tok = pp_token_at(pp, i);
+ tok->kind = TokenKind_removed;
+ tok->value.string = NULL;
+ }
+}
+
+static Token* read_include_header_name(Preprocessor* pp) {
+ Token* tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_header_name) {
+ fatal_error("%s:%d: invalid #include, %s", tok->loc.filename, tok->loc.line, token_stringify(tok));
+ }
+ return tok;
+}
+
+static const char* resolve_include_name(Preprocessor* pp, const Token* include_name_token) {
+ const char* include_name = include_name_token->value.string;
+ if (include_name[0] == '"') {
+ char* current_filename = strdup(include_name_token->loc.filename);
+ const char* current_dir = dirname(current_filename);
+ char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(current_dir) + 1, sizeof(char));
+ sprintf(buf, "%s/%.*s", current_dir, (int)(strlen(include_name) - 2), include_name + 1);
+ return buf;
+ } else {
+ for (size_t i = 0; i < pp->include_paths->len; ++i) {
+ char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char));
+ sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1);
+ if (access(buf, F_OK | R_OK) == 0) {
+ return buf;
+ }
+ }
+ return NULL;
+ }
+}
+
+static const char* resolve_next_include_name(Preprocessor* pp, const Token* include_name_token) {
+ const char* include_name = include_name_token->value.string;
+ char* current_filename = strdup(include_name_token->loc.filename);
+ for (size_t i = 0; i < pp->include_paths->len; ++i) {
+ char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char));
+ sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1);
+ if (strcmp(buf, current_filename) == 0) {
+ // #include_next skips the same file.
+ continue;
+ }
+ if (access(buf, F_OK | R_OK) == 0) {
+ return buf;
+ }
+ }
+ return NULL;
+}
+
+static int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) {
+ size_t n_tokens_to_remove = dest_end - dest_start;
+ size_t n_tokens_after_dest = pp->pp_tokens->len - dest_end;
+ size_t shift_amount;
+
+ if (n_tokens_to_remove < source_tokens->len) {
+ // Move existing tokens backward to make room.
+ shift_amount = source_tokens->len - n_tokens_to_remove;
+ tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount);
+ memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end),
+ n_tokens_after_dest * sizeof(Token));
+ pp->pp_tokens->len += shift_amount;
+ } else if (source_tokens->len < n_tokens_to_remove) {
+ // Move existing tokens forward to reduce room.
+ shift_amount = n_tokens_to_remove - source_tokens->len;
+ memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end),
+ n_tokens_after_dest * sizeof(Token));
+ pp->pp_tokens->len -= shift_amount;
+ memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token));
+ }
+
+ memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token));
+
+ return dest_start + source_tokens->len;
+}
+
+static int insert_pp_tokens(Preprocessor* pp, int dest_pos, TokenArray* source_tokens) {
+ return replace_pp_tokens(pp, dest_pos, dest_pos, source_tokens);
+}
+
+static void replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) {
+ TokenArray tokens;
+ tokens_init(&tokens, 1);
+ *tokens_push_new(&tokens) = *source_tok;
+ replace_pp_tokens(pp, dest, dest + 1, &tokens);
+}
+
+static void expand_include_directive(Preprocessor* pp, const char* include_name, Token* original_include_name_tok) {
+ InFile* include_source = infile_open(include_name);
+ if (!include_source) {
+ fatal_error("%s:%d: cannot open include file: %s", original_include_name_tok->loc.filename,
+ original_include_name_tok->loc.line, token_stringify(original_include_name_tok));
+ }
+
+ TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros, pp->include_paths,
+ pp->included_files, pp->generate_system_deps, pp->generate_user_deps);
+ tokens_pop(include_pp_tokens); // pop EOF token
+ pp->pos = insert_pp_tokens(pp, pp->pos, include_pp_tokens);
+}
+
+// ws ::= many0(<whitespace>)
+// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')'
+static TokenArray* pp_parse_macro_parameters(Preprocessor* pp) {
+ TokenArray* parameters = calloc(1, sizeof(TokenArray));
+ tokens_init(parameters, 2);
+
+ // '(' is consumed by caller.
+ skip_whitespaces(pp);
+ Token* tok = consume_pp_token_if(pp, TokenKind_ident);
+ if (tok) {
+ *tokens_push_new(parameters) = *tok;
+ skip_whitespaces(pp);
+ while (consume_pp_token_if(pp, TokenKind_comma)) {
+ skip_whitespaces(pp);
+ tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_ident) {
+ fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line);
+ }
+ *tokens_push_new(parameters) = *tok;
+ }
+ }
+ expect_pp_token(pp, TokenKind_paren_r);
+
+ return parameters;
+}
+
+// ws ::= many0(<whitespace>)
+// macro-arguments ::= <ws> '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')'
+static MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp, bool skip_newline) {
+ MacroArgArray* args = macroargs_new();
+
+ skip_whitespaces_or_newlines(pp, skip_newline);
+ if (!consume_pp_token_if(pp, TokenKind_paren_l)) {
+ return NULL;
+ }
+ skip_whitespaces_or_newlines(pp, skip_newline);
+ Token* tok = peek_pp_token(pp);
+ if (!skip_newline && tok->kind == TokenKind_newline) {
+ expect_pp_token(pp, TokenKind_paren_r);
+ }
+ if (tok->kind != TokenKind_paren_r) {
+ MacroArg* arg = macroargs_push_new(args);
+ tokens_init(&arg->tokens, 4);
+
+ // Parse argument tokens, handling nested parentheses.
+ int nesting = 0;
+ while (true) {
+ tok = peek_pp_token(pp);
+
+ if (nesting == 0) {
+ if (tok->kind == TokenKind_paren_r) {
+ break;
+ }
+ if (tok->kind == TokenKind_comma) {
+ next_pp_token(pp);
+ skip_whitespaces_or_newlines(pp, skip_newline);
+
+ arg = macroargs_push_new(args);
+ tokens_init(&arg->tokens, 4);
+ continue;
+ }
+ }
+
+ if (tok->kind == TokenKind_paren_l) {
+ nesting++;
+ } else if (tok->kind == TokenKind_paren_r) {
+ nesting--;
+ if (nesting < 0) {
+ break;
+ }
+ }
+
+ tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_removed) {
+ *tokens_push_new(&arg->tokens) = *tok;
+ }
+
+ skip_whitespaces_or_newlines(pp, skip_newline);
+ }
+ }
+ expect_pp_token(pp, TokenKind_paren_r);
+
+ return args;
+}
+
+static Token* concat_two_tokens(Token* left, Token* right) {
+ StrBuilder builder;
+ strbuilder_init(&builder);
+
+ // Left
+ if (left->kind == TokenKind_ident) {
+ strbuilder_append_string(&builder, left->value.string);
+ } else if (left->kind == TokenKind_literal_int) {
+ char buf[32];
+ sprintf(buf, "%d", left->value.integer);
+ strbuilder_append_string(&builder, buf);
+ } else {
+ strbuilder_append_string(&builder, token_stringify(left));
+ }
+
+ // Right
+ if (right->kind == TokenKind_ident) {
+ strbuilder_append_string(&builder, right->value.string);
+ } else if (right->kind == TokenKind_literal_int) {
+ char buf[32];
+ sprintf(buf, "%d", right->value.integer);
+ strbuilder_append_string(&builder, buf);
+ } else {
+ strbuilder_append_string(&builder, token_stringify(right));
+ }
+
+ // Concat
+ Token* result = calloc(1, sizeof(Token));
+
+ char* endptr;
+ int val = strtol(builder.buf, &endptr, 10);
+ if (*endptr == '\0') {
+ result->kind = TokenKind_literal_int;
+ result->value.integer = val;
+ } else {
+ result->kind = TokenKind_ident;
+ result->value.string = builder.buf;
+ }
+
+ return result;
+}
+
+static Token* stringify_tokens(TokenArray* tokens) {
+ StrBuilder builder;
+ strbuilder_init(&builder);
+
+ bool prev_whitespace = false;
+ for (size_t i = 0; i < tokens->len; ++i) {
+ Token* tok = &tokens->data[i];
+ if (tok->kind == TokenKind_whitespace) {
+ prev_whitespace = true;
+ continue;
+ }
+
+ if (prev_whitespace && builder.len > 0) {
+ strbuilder_append_char(&builder, ' ');
+ }
+ prev_whitespace = false;
+
+ const char* str = token_stringify(tok);
+
+ // For string literals and char constants, we need to escape quotes.
+ if (tok->kind == TokenKind_literal_str || tok->kind == TokenKind_character_constant) {
+ for (const char* p = str; *p; ++p) {
+ if (*p == '\\' || *p == '"') {
+ strbuilder_append_char(&builder, '\\');
+ }
+ strbuilder_append_char(&builder, *p);
+ }
+ } else {
+ strbuilder_append_string(&builder, str);
+ }
+ }
+
+ Token* result = calloc(1, sizeof(Token));
+ result->kind = TokenKind_literal_str;
+ result->value.string = builder.buf;
+ return result;
+}
+
+typedef struct MacroExpansionContext {
+ // Stack of macro names that have been already expanded.
+ StrArray already_expanded;
+} MacroExpansionContext;
+
+MacroExpansionContext* macroexpansioncontext_new() {
+ MacroExpansionContext* ctx = calloc(1, sizeof(MacroExpansionContext));
+ strings_init(&ctx->already_expanded);
+ return ctx;
+}
+
+static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx);
+
+static void expand_macro_arg(Preprocessor* pp, MacroArg* arg, bool skip_newline, MacroExpansionContext* ctx) {
+ tokens_push_new(&arg->tokens)->kind = TokenKind_eof;
+
+ Preprocessor* pp2 = preprocessor_new(&arg->tokens, pp->include_depth, pp->macros, pp->include_paths,
+ pp->included_files, pp->generate_system_deps, pp->generate_user_deps);
+
+ size_t arg_token_count = arg->tokens.len;
+ size_t processed_token_count = 0;
+ while (processed_token_count < arg_token_count) {
+ if (peek_pp_token(pp2)->kind == TokenKind_ident) {
+ processed_token_count += expand_macro(pp2, skip_newline, ctx);
+ } else {
+ next_pp_token(pp2);
+ processed_token_count += 1;
+ }
+ }
+
+ tokens_pop(&arg->tokens);
+}
+
+static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx) {
+ if (ctx == NULL) {
+ ctx = macroexpansioncontext_new();
+ }
+
+ int macro_name_pos = pp->pos;
+ Token* macro_name = peek_pp_token(pp);
+ const char* macro_name_str = macro_name->value.string;
+
+ // Supress expansion if the macro has already been expanded.
+ for (size_t i = 0; i < ctx->already_expanded.len; ++i) {
+ if (strcmp(ctx->already_expanded.data[i], macro_name->value.string) == 0) {
+ next_pp_token(pp);
+ return 1;
+ }
+ }
+
+ int macro_idx = find_macro(pp, macro_name->value.string);
+ if (macro_idx == -1) {
+ next_pp_token(pp);
+ return 1;
+ }
+
+ SourceLocation original_loc = macro_name->loc;
+ size_t token_count_before_expansion;
+ size_t token_count_after_expansion;
+ Macro* macro = &pp->macros->data[macro_idx];
+ if (macro->kind == MacroKind_func) {
+ next_pp_token(pp);
+ MacroArgArray* args = pp_parse_macro_arguments(pp, skip_newline);
+ if (!args) {
+ // If function-like macro name is not followed by opening parenthesis, it is not a macro invocation.
+ return pp->pos - macro_name_pos;
+ }
+ token_count_before_expansion = pp->pos - macro_name_pos;
+ replace_pp_tokens(pp, macro_name_pos, pp->pos, &macro->replacements);
+
+ // Operands of # and ## operators should not be expanded.
+ bool* no_expand = calloc(macro->parameters.len, sizeof(bool));
+ for (size_t i = 0; i < macro->replacements.len; ++i) {
+ TokenKind kind = macro->replacements.data[i].kind;
+ if (kind == TokenKind_hashhash) {
+ Token* lhs = NULL;
+ for (int j = i - 1; j >= 0; --j) {
+ if (macro->replacements.data[j].kind != TokenKind_whitespace) {
+ lhs = &macro->replacements.data[j];
+ break;
+ }
+ }
+ Token* rhs = NULL;
+ for (size_t j = i + 1; j < macro->replacements.len; ++j) {
+ if (macro->replacements.data[j].kind != TokenKind_whitespace) {
+ rhs = &macro->replacements.data[j];
+ break;
+ }
+ }
+ if (lhs) {
+ int param1 = macro_find_param(macro, lhs);
+ if (param1 != -1) {
+ no_expand[param1] = true;
+ }
+ }
+ if (rhs) {
+ int param2 = macro_find_param(macro, rhs);
+ if (param2 != -1) {
+ no_expand[param2] = true;
+ }
+ }
+ } else if (kind == TokenKind_hash) {
+ Token* operand = NULL;
+ for (size_t j = i + 1; j < macro->replacements.len; ++j) {
+ if (macro->replacements.data[j].kind != TokenKind_whitespace) {
+ operand = &macro->replacements.data[j];
+ break;
+ }
+ }
+ if (operand) {
+ int param = macro_find_param(macro, operand);
+ if (param != -1) {
+ no_expand[param] = true;
+ }
+ }
+ }
+ }
+
+ // Argument expansion
+ for (size_t i = 0; i < args->len; ++i) {
+ if (no_expand[i])
+ continue;
+ MacroArg* arg = &args->data[i];
+ expand_macro_arg(pp, arg, skip_newline, ctx);
+ }
+
+ // Parameter substitution
+ size_t token_count = 0;
+ size_t offset = 0;
+ for (size_t i = 0; i < macro->replacements.len; ++i) {
+ Token* tok = pp_token_at(pp, macro_name_pos + i + offset);
+
+ // Handle # operator (stringification)
+ if (tok->kind == TokenKind_hash) {
+ size_t param_idx_in_replacements = 0;
+ for (size_t j = i + 1; j < macro->replacements.len; ++j) {
+ if (macro->replacements.data[j].kind != TokenKind_whitespace) {
+ param_idx_in_replacements = j;
+ break;
+ }
+ }
+
+ Token* param_tok = &macro->replacements.data[param_idx_in_replacements];
+ int macro_param_idx = macro_find_param(macro, param_tok);
+ if (macro_param_idx != -1) {
+ Token* stringified = stringify_tokens(&args->data[macro_param_idx].tokens);
+
+ // Replace ('#' <whitespace>* <param>) with stringified token.
+ TokenArray single_token;
+ tokens_init(&single_token, 1);
+ *tokens_push_new(&single_token) = *stringified;
+
+ size_t tokens_to_replace = param_idx_in_replacements - i + 1;
+ replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + tokens_to_replace,
+ &single_token);
+ token_count += 1;
+ offset += 1 - tokens_to_replace;
+ i = param_idx_in_replacements; // Skip to after the parameter
+ continue;
+ }
+ }
+
+ int macro_param_idx = macro_find_param(macro, tok);
+ if (macro_param_idx != -1) {
+ size_t arg_token_count = args->data[macro_param_idx].tokens.len;
+ if (arg_token_count == 0) {
+ // Empty argument: insert a placemarker token
+ TokenArray placemarker_token;
+ tokens_init(&placemarker_token, 1);
+ Token* pm = tokens_push_new(&placemarker_token);
+ pm->kind = TokenKind_placemarker;
+ pm->loc = tok->loc;
+ replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1,
+ &placemarker_token);
+ token_count += 1;
+ // offset stays the same (1 - 1 = 0)
+ } else {
+ replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1,
+ &args->data[macro_param_idx].tokens);
+ token_count += arg_token_count;
+ offset += arg_token_count - 1;
+ }
+ } else {
+ ++token_count;
+ }
+ }
+
+ // Handle ## operator
+ size_t token_count2 = 0;
+ for (size_t i = 0; i < token_count; ++i) {
+ int pos = macro_name_pos + i;
+ Token* tok = pp_token_at(pp, pos);
+ if (tok->kind == TokenKind_hashhash) {
+ // Concatenate previous and next tokens
+ int lhs_pos = -1;
+ for (int j = pos - 1; j >= macro_name_pos; --j) {
+ if (pp_token_at(pp, j)->kind != TokenKind_whitespace) {
+ lhs_pos = j;
+ break;
+ }
+ }
+ int rhs_pos = -1;
+ for (size_t j = pos + 1; j < macro_name_pos + token_count; ++j) {
+ if (pp_token_at(pp, j)->kind != TokenKind_whitespace) {
+ rhs_pos = j;
+ break;
+ }
+ }
+ if (lhs_pos == -1 || rhs_pos == -1) {
+ fatal_error("%s:%d: invalid usage of ## operator", tok->loc.filename, tok->loc.line);
+ }
+
+ Token* lhs_tok = pp_token_at(pp, lhs_pos);
+ Token* rhs_tok = pp_token_at(pp, rhs_pos);
+ bool lhs_is_placemarker = lhs_tok->kind == TokenKind_placemarker;
+ bool rhs_is_placemarker = rhs_tok->kind == TokenKind_placemarker;
+
+ TokenArray result_tokens;
+ tokens_init(&result_tokens, 1);
+
+ if (lhs_is_placemarker && rhs_is_placemarker) {
+ // Both are placemarkers: result is a placemarker
+ Token* pm = tokens_push_new(&result_tokens);
+ pm->kind = TokenKind_placemarker;
+ pm->loc = tok->loc;
+ } else if (lhs_is_placemarker) {
+ // Left is placemarker: result is the right token
+ *tokens_push_new(&result_tokens) = *rhs_tok;
+ } else if (rhs_is_placemarker) {
+ // Right is placemarker: result is the left token
+ *tokens_push_new(&result_tokens) = *lhs_tok;
+ } else {
+ // Neither is placemarker: concatenate them
+ Token* concatenated = concat_two_tokens(lhs_tok, rhs_tok);
+ *tokens_push_new(&result_tokens) = *concatenated;
+ }
+
+ replace_pp_tokens(pp, lhs_pos, rhs_pos + 1, &result_tokens);
+ token_count -= rhs_pos - lhs_pos;
+ i -= pos - lhs_pos;
+ token_count2 -= pos - lhs_pos - 1;
+ } else {
+ ++token_count2;
+ }
+ }
+
+ // Remove placemarker tokens after ## processing
+ for (size_t i = 0; i < token_count2; ++i) {
+ Token* tok = pp_token_at(pp, macro_name_pos + i);
+ if (tok->kind == TokenKind_placemarker) {
+ tok->kind = TokenKind_removed;
+ }
+ }
+
+ // Inherit a source location from the original macro token.
+ for (size_t i = 0; i < token_count2; ++i) {
+ pp_token_at(pp, macro_name_pos + i)->loc = original_loc;
+ }
+ token_count_after_expansion = token_count2;
+ } else if (macro->kind == MacroKind_obj) {
+ replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, &macro->replacements);
+ // Inherit a source location from the original macro token.
+ for (size_t i = 0; i < macro->replacements.len; ++i) {
+ pp_token_at(pp, macro_name_pos + i)->loc = original_loc;
+ }
+ token_count_before_expansion = 1;
+ token_count_after_expansion = macro->replacements.len;
+ } else if (macro->kind == MacroKind_builtin_file) {
+ Token file_tok;
+ file_tok.kind = TokenKind_literal_str;
+ file_tok.value.string = macro_name->loc.filename;
+ file_tok.loc.filename = NULL;
+ file_tok.loc.line = 0;
+ replace_single_pp_token(pp, macro_name_pos, &file_tok);
+ token_count_before_expansion = 1;
+ token_count_after_expansion = 1;
+ } else if (macro->kind == MacroKind_builtin_line) {
+ Token line_tok;
+ line_tok.kind = TokenKind_literal_int;
+ line_tok.value.integer = macro_name->loc.line;
+ line_tok.loc.filename = NULL;
+ line_tok.loc.line = 0;
+ replace_single_pp_token(pp, macro_name_pos, &line_tok);
+ token_count_before_expansion = 1;
+ token_count_after_expansion = 1;
+ } else {
+ unreachable();
+ }
+
+ // Recursive expansion.
+ strings_push(&ctx->already_expanded, macro_name_str);
+ pp->pos = macro_name_pos;
+ size_t processed_token_count = 0;
+ while (processed_token_count < token_count_after_expansion) {
+ if (peek_pp_token(pp)->kind == TokenKind_ident) {
+ processed_token_count += expand_macro(pp, skip_newline, ctx);
+ } else {
+ next_pp_token(pp);
+ processed_token_count += 1;
+ }
+ }
+ strings_pop(&ctx->already_expanded);
+
+ return token_count_before_expansion;
+}
+
+typedef enum {
+ GroupDelimiterKind_normal,
+ GroupDelimiterKind_after_if_directive,
+ GroupDelimiterKind_after_else_directive,
+} GroupDelimiterKind;
+
+static bool is_delimiter_of_current_group(GroupDelimiterKind delimiter_kind, TokenKind token_kind) {
+ if (delimiter_kind == GroupDelimiterKind_normal) {
+ return token_kind == TokenKind_eof;
+ } else if (delimiter_kind == GroupDelimiterKind_after_if_directive) {
+ return token_kind == TokenKind_pp_directive_elif || token_kind == TokenKind_pp_directive_elifdef ||
+ token_kind == TokenKind_pp_directive_elifndef || token_kind == TokenKind_pp_directive_else ||
+ token_kind == TokenKind_pp_directive_endif;
+ } else if (delimiter_kind == GroupDelimiterKind_after_else_directive) {
+ return token_kind == TokenKind_pp_directive_endif;
+ } else {
+ unreachable();
+ }
+}
+
+static int replace_pp_tokens(Preprocessor*, int, int, TokenArray*);
+static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include);
+
+static bool preprocess_if_group_or_elif_group(Preprocessor* pp, bool did_include) {
+ Token* directive = next_pp_token(pp);
+
+ if (directive->kind == TokenKind_pp_directive_if || directive->kind == TokenKind_pp_directive_elif) {
+ int condition_expr_start_pos = pp->pos;
+
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_newline) {
+ break;
+ } else if (tok->kind == TokenKind_ident) {
+ if (strcmp(tok->value.string, "defined") == 0) {
+ int defined_pos = pp->pos;
+ // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')'
+ // 'defined' <ws>* <ident>
+ skip_pp_token(pp, TokenKind_ident);
+ skip_whitespaces(pp);
+ Token* macro_name;
+ if (consume_pp_token_if(pp, TokenKind_paren_l)) {
+ skip_whitespaces(pp);
+ macro_name = expect_pp_token(pp, TokenKind_ident);
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_paren_r);
+ } else {
+ macro_name = expect_pp_token(pp, TokenKind_ident);
+ }
+ bool is_defined = find_macro(pp, macro_name->value.string) != -1;
+ TokenArray defined_results;
+ tokens_init(&defined_results, 1);
+ Token* defined_result = tokens_push_new(&defined_results);
+ defined_result->kind = TokenKind_literal_int;
+ defined_result->value.integer = is_defined;
+ pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results);
+ } else {
+ expand_macro(pp, false, NULL);
+ }
+ } else {
+ next_pp_token(pp);
+ }
+ }
+
+ // all remaining identifiers other than true (including those lexically identical to keywords such as false) are
+ // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is
+ // converted into a token.
+ for (int pos = condition_expr_start_pos; pos < pp->pos; ++pos) {
+ Token* tok = pp_token_at(pp, pos);
+ if (tok->kind == TokenKind_ident) {
+ bool is_true = strcmp(tok->value.string, "true") == 0;
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = is_true;
+ }
+ }
+
+ int condition_expr_tokens_len = pp->pos - condition_expr_start_pos;
+ TokenArray condition_expr_tokens;
+ // +1 to add EOF token at the end.
+ tokens_init(&condition_expr_tokens, condition_expr_tokens_len + 1);
+ for (int i = 0; i < condition_expr_tokens_len; ++i) {
+ *tokens_push_new(&condition_expr_tokens) = *pp_token_at(pp, condition_expr_start_pos + i);
+ }
+ Token* eof_tok = tokens_push_new(&condition_expr_tokens);
+ eof_tok->kind = TokenKind_eof;
+
+ bool do_include = pp_eval_constant_expr(&condition_expr_tokens) && !did_include;
+ include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include);
+ return do_include;
+ } else if (directive->kind == TokenKind_pp_directive_ifdef || directive->kind == TokenKind_pp_directive_elifdef) {
+ skip_whitespaces(pp);
+ Token* macro_name = consume_pp_token_if(pp, TokenKind_ident);
+ if (!macro_name) {
+ fatal_error("");
+ }
+
+ bool do_include = !did_include && find_macro(pp, macro_name->value.string) != -1;
+ include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include);
+ return do_include;
+ } else if (directive->kind == TokenKind_pp_directive_ifndef || directive->kind == TokenKind_pp_directive_elifndef) {
+ skip_whitespaces(pp);
+ Token* macro_name = consume_pp_token_if(pp, TokenKind_ident);
+ if (!macro_name) {
+ fatal_error("");
+ }
+
+ bool do_include = !did_include && find_macro(pp, macro_name->value.string) == -1;
+ include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include);
+ return do_include;
+ } else {
+ unreachable();
+ }
+}
+
+static bool preprocess_if_group(Preprocessor* pp) {
+ return preprocess_if_group_or_elif_group(pp, false);
+}
+
+static bool preprocess_elif_group(Preprocessor* pp, bool did_include) {
+ return preprocess_if_group_or_elif_group(pp, did_include);
+}
+
+// elif-groups:
+// { elif-group }+
+static bool preprocess_elif_groups_opt(Preprocessor* pp, bool did_include) {
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef ||
+ tok->kind == TokenKind_pp_directive_elifndef) {
+ did_include |= preprocess_elif_group(pp, did_include);
+ } else {
+ break;
+ }
+ }
+ return did_include;
+}
+
+// else-group:
+// '#' 'else' group?
+static void preprocess_else_group(Preprocessor* pp, bool did_include) {
+ skip_pp_token(pp, TokenKind_pp_directive_else);
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_newline);
+
+ include_conditionally(pp, GroupDelimiterKind_after_else_directive, !did_include);
+}
+
+// endif-line:
+// '#' 'endif' new-line
+static void preprocess_endif_directive(Preprocessor* pp) {
+ skip_pp_token(pp, TokenKind_pp_directive_endif);
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_newline);
+}
+
+// if-section:
+// if-group elif-groups? else-group? endif-line
+static void preprocess_if_section(Preprocessor* pp) {
+ bool did_include = preprocess_if_group(pp);
+ did_include = preprocess_elif_groups_opt(pp, did_include);
+ if (peek_pp_token(pp)->kind == TokenKind_pp_directive_else) {
+ preprocess_else_group(pp, did_include);
+ }
+ preprocess_endif_directive(pp);
+}
+
+static void preprocess_include_directive(Preprocessor* pp) {
+ skip_pp_token(pp, TokenKind_pp_directive_include);
+ skip_whitespaces(pp);
+ Token* include_name = read_include_header_name(pp);
+ const char* include_name_resolved = resolve_include_name(pp, include_name);
+ if (include_name_resolved == NULL) {
+ fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line,
+ token_stringify(include_name));
+ }
+
+ if ((pp->generate_system_deps && include_name->value.string[0] == '<') ||
+ (pp->generate_user_deps && include_name->value.string[0] == '"')) {
+ bool already_included = false;
+ for (size_t i = 0; i < pp->included_files->len; ++i) {
+ if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) {
+ already_included = true;
+ break;
+ }
+ }
+ if (!already_included) {
+ strings_push(pp->included_files, include_name_resolved);
+ }
+ }
+
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_newline);
+ expand_include_directive(pp, include_name_resolved, include_name);
+}
+
+// #include_next is a part of GNU extension.
+// https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html
+static void preprocess_include_next_directive(Preprocessor* pp) {
+ skip_pp_token(pp, TokenKind_pp_directive_include_next);
+ skip_whitespaces(pp);
+ Token* include_name = read_include_header_name(pp);
+ const char* include_name_resolved = resolve_next_include_name(pp, include_name);
+ if (include_name_resolved == NULL) {
+ fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line,
+ token_stringify(include_name));
+ }
+
+ if (include_name->value.string[0] == '"') {
+ bool already_included = false;
+ for (size_t i = 0; i < pp->included_files->len; ++i) {
+ if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) {
+ already_included = true;
+ break;
+ }
+ }
+ if (!already_included) {
+ strings_push(pp->included_files, include_name_resolved);
+ }
+ }
+
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_newline);
+ expand_include_directive(pp, include_name_resolved, include_name);
+}
+
+static void preprocess_embed_directive(Preprocessor*) {
+ unimplemented();
+}
+
+static void preprocess_define_directive(Preprocessor* pp) {
+ skip_pp_token(pp, TokenKind_pp_directive_define);
+ skip_whitespaces(pp);
+ Token* macro_name = expect_pp_token(pp, TokenKind_ident);
+
+ if (consume_pp_token_if(pp, TokenKind_paren_l)) {
+ TokenArray* parameters = pp_parse_macro_parameters(pp);
+ skip_whitespaces(pp);
+ int replacements_start_pos = pp->pos;
+ seek_to_next_newline(pp);
+ Macro* macro = macros_push_new(pp->macros);
+ macro->kind = MacroKind_func;
+ macro->name = macro_name->value.string;
+ macro->parameters = *parameters;
+ int n_replacements = pp->pos - replacements_start_pos;
+ tokens_init(&macro->replacements, n_replacements);
+ for (int i = 0; i < n_replacements; ++i) {
+ *tokens_push_new(&macro->replacements) = *pp_token_at(pp, replacements_start_pos + i);
+ }
+ // Remove trailing whitespaces.
+ for (int i = n_replacements - 1; i >= 0; --i) {
+ if (macro->replacements.data[i].kind == TokenKind_whitespace) {
+ tokens_pop(&macro->replacements);
+ } else {
+ break;
+ }
+ }
+ } else {
+ skip_whitespaces(pp);
+ int replacements_start_pos = pp->pos;
+ seek_to_next_newline(pp);
+ Macro* macro = macros_push_new(pp->macros);
+ macro->kind = MacroKind_obj;
+ macro->name = macro_name->value.string;
+ int n_replacements = pp->pos - replacements_start_pos;
+ tokens_init(&macro->replacements, n_replacements);
+ for (int i = 0; i < n_replacements; ++i) {
+ *tokens_push_new(&macro->replacements) = *pp_token_at(pp, replacements_start_pos + i);
+ }
+ // Remove trailing whitespaces.
+ for (int i = n_replacements - 1; i >= 0; --i) {
+ if (macro->replacements.data[i].kind == TokenKind_whitespace) {
+ tokens_pop(&macro->replacements);
+ } else {
+ break;
+ }
+ }
+ }
+ expect_pp_token(pp, TokenKind_newline);
+}
+
+static void preprocess_undef_directive(Preprocessor* pp) {
+ skip_pp_token(pp, TokenKind_pp_directive_undef);
+ skip_whitespaces(pp);
+ Token* macro_name = consume_pp_token_if(pp, TokenKind_ident);
+ if (macro_name) {
+ int macro_idx = find_macro(pp, macro_name->value.string);
+ if (macro_idx != -1) {
+ undef_macro(pp, macro_idx);
+ }
+ }
+}
+
+static void preprocess_line_directive(Preprocessor*) {
+ unimplemented();
+}
+
+// control-line:
+// ...
+// '#' 'error' pp-tokens? new-line
+static void preprocess_error_directive(Preprocessor* pp) {
+ // The C23 standard does not specify format of diagnostic message caused by #error.
+ // Ducc assumes that #error takes exactly one argument consisting of a string literal.
+ // TODO: output some general message or something else if not.
+ skip_pp_token(pp, TokenKind_pp_directive_error);
+ skip_whitespaces(pp);
+ Token* msg = expect_pp_token(pp, TokenKind_literal_str);
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_newline);
+ fatal_error("%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string);
+}
+
+// control-line:
+// ...
+// '#' 'warning' pp-tokens? new-line
+static void preprocess_warning_directive(Preprocessor* pp) {
+ // The C23 standard does not specify format of diagnostic message caused by #warning.
+ // Ducc assumes that #warning takes exactly one argument consisting of a string literal.
+ // TODO: output some general message or something else if not.
+ skip_pp_token(pp, TokenKind_pp_directive_warning);
+ skip_whitespaces(pp);
+ Token* msg = expect_pp_token(pp, TokenKind_literal_str);
+ skip_whitespaces(pp);
+ expect_pp_token(pp, TokenKind_newline);
+ fprintf(stderr, "%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string);
+}
+
+static void preprocess_pragma_directive(Preprocessor* pp) {
+ // Ignore all #pragma directives for now.
+ skip_pp_token(pp, TokenKind_pp_directive_pragma);
+ seek_to_next_newline(pp);
+ skip_pp_token(pp, TokenKind_newline);
+}
+
+static void preprocess_nop_directive(Preprocessor* pp) {
+ skip_pp_token(pp, TokenKind_pp_directive_nop);
+}
+
+static void preprocess_non_directive_directive(Preprocessor* pp) {
+ Token* tok = peek_pp_token(pp);
+ // C23 6.10.1.13:
+ // The execution of a non-directive preprocessing directive results in undefined behavior.
+ fatal_error("%s:%d: invalid preprocessing directive, '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok));
+}
+
+static void preprocess_text_line(Preprocessor* pp) {
+ while (!pp_eof(pp)) {
+ if (consume_pp_token_if(pp, TokenKind_newline)) {
+ return;
+ }
+ if (consume_pp_token_if_not(pp, TokenKind_ident)) {
+ continue;
+ }
+
+ expand_macro(pp, true, NULL);
+ }
+ expect_pp_token(pp, TokenKind_newline);
+}
+
+// group-part:
+// if-section
+// control-line
+// '#' non-directive
+// text-line
+//
+// control-line:
+// '#' 'include' ...
+// '#' 'embed' ...
+// '#' 'define' ...
+// '#' 'undef' ...
+// '#' 'line' ...
+// '#' 'error' ...
+// '#' 'warning' ...
+// '#' 'pragma' ...
+// '#' new-line
+static void preprocess_group_part(Preprocessor* pp) {
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef ||
+ tok->kind == TokenKind_pp_directive_ifndef) {
+ preprocess_if_section(pp);
+ } else if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef ||
+ tok->kind == TokenKind_pp_directive_elifndef || tok->kind == TokenKind_pp_directive_else ||
+ tok->kind == TokenKind_pp_directive_endif) {
+ fatal_error("%s:%d: unexpected '%s'; no corresponding '#if'*", tok->loc.filename, tok->loc.line,
+ token_kind_stringify(tok->kind));
+ } else if (tok->kind == TokenKind_pp_directive_include) {
+ preprocess_include_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_include_next) {
+ preprocess_include_next_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_embed) {
+ preprocess_embed_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_define) {
+ preprocess_define_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_undef) {
+ preprocess_undef_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_line) {
+ preprocess_line_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_error) {
+ preprocess_error_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_warning) {
+ preprocess_warning_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_pragma) {
+ preprocess_pragma_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_nop) {
+ preprocess_nop_directive(pp);
+ } else if (tok->kind == TokenKind_pp_directive_non_directive) {
+ preprocess_non_directive_directive(pp);
+ } else {
+ preprocess_text_line(pp);
+ }
+}
+
+// group:
+// { group-part }+
+static void preprocess_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) {
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (is_delimiter_of_current_group(delimiter_kind, tok->kind))
+ return;
+ preprocess_group_part(pp);
+ }
+
+ if (delimiter_kind != GroupDelimiterKind_normal) {
+ expect_pp_token(pp, TokenKind_pp_directive_endif);
+ }
+}
+
+static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) {
+ assert(delimiter_kind != GroupDelimiterKind_normal);
+ int nesting = 0;
+
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (nesting == 0 && is_delimiter_of_current_group(delimiter_kind, tok->kind)) {
+ return;
+ }
+ if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef ||
+ tok->kind == TokenKind_pp_directive_ifndef) {
+ ++nesting;
+ } else if (tok->kind == TokenKind_pp_directive_endif) {
+ --nesting;
+ }
+ int first_pos = pp->pos;
+ seek_to_next_newline(pp);
+ expect_pp_token(pp, TokenKind_newline);
+ make_tokens_removed(pp, first_pos, pp->pos);
+ }
+
+ expect_pp_token(pp, TokenKind_pp_directive_endif);
+}
+
+static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include) {
+ if (do_include) {
+ preprocess_group_opt(pp, delimiter_kind);
+ } else {
+ skip_group_opt(pp, delimiter_kind);
+ }
+}
+
+// preprocessing-file:
+// group?
+static void preprocess_preprocessing_file(Preprocessor* pp) {
+ preprocess_group_opt(pp, GroupDelimiterKind_normal);
+}
+
+static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) {
+ seek_to_next_newline(pp);
+ skip_pp_token(pp, TokenKind_newline);
+ make_tokens_removed(pp, directive_token_pos, pp->pos);
+}
+
+static void remove_pp_directives(Preprocessor* pp) {
+ pp->pos = 0;
+ while (!pp_eof(pp)) {
+ if (is_pp_directive(peek_pp_token(pp)->kind)) {
+ remove_pp_directive(pp, pp->pos);
+ } else {
+ next_pp_token(pp);
+ }
+ }
+}
+
+static char* get_ducc_include_path() {
+ const char* self_dir = get_self_dir();
+ char* buf = calloc(strlen(self_dir) + strlen("/../include") + 1, sizeof(char));
+ sprintf(buf, "%s/../include", self_dir);
+ return buf;
+}
+
+static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths,
+ StrArray* included_files, bool generate_system_deps, bool generate_user_deps) {
+ TokenArray* pp_tokens = tokenize(src);
+ Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros, include_paths, included_files, generate_system_deps,
+ generate_user_deps);
+
+ preprocess_preprocessing_file(pp);
+ remove_pp_directives(pp);
+ return pp->pp_tokens;
+}
+
+TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files,
+ bool generate_system_deps, bool generate_user_deps) {
+ MacroArray* macros = macros_new();
+ add_predefined_macros(macros);
+ add_user_defines(macros, user_defines);
+ strings_push(included_files, src->loc.filename);
+
+ StrArray* include_paths = calloc(1, sizeof(StrArray));
+ strings_init(include_paths);
+
+ // Ducc's built-in headers has highest priority.
+ strings_push(include_paths, get_ducc_include_path());
+
+ for (size_t i = 0; i < user_include_dirs->len; ++i) {
+ strings_push(include_paths, user_include_dirs->data[i]);
+ }
+ strings_push(include_paths, "/usr/local/include");
+ strings_push(include_paths, "/usr/include/x86_64-linux-gnu");
+ strings_push(include_paths, "/usr/include");
+
+ return do_preprocess(src, 0, macros, include_paths, included_files, generate_system_deps, generate_user_deps);
+}
+
+void concat_adjacent_string_literals(TokenArray* pp_tokens) {
+ size_t last_nonempty_token_pos = 0;
+ TokenKind last_nonempty_token_kind = TokenKind_eof;
+ for (size_t pos = 0; pos < pp_tokens->len; ++pos) {
+ Token* pp_tok = &pp_tokens->data[pos];
+ TokenKind k = pp_tok->kind;
+ if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) {
+ continue;
+ }
+ if (k == TokenKind_literal_str && last_nonempty_token_kind == TokenKind_literal_str) {
+ // Concatenate adjacent string literals.
+ Token* last_pp_tok = &pp_tokens->data[last_nonempty_token_pos];
+ const char* s1 = last_pp_tok->value.string;
+ size_t l1 = strlen(s1);
+ const char* s2 = pp_tok->value.string;
+ size_t l2 = strlen(s2);
+ char* buf = calloc(l1 + l2 + 1, sizeof(char));
+ memcpy(buf, s1, l1);
+ memcpy(buf + l1, s2, l2);
+ last_pp_tok->value.string = buf;
+ pp_tok->kind = TokenKind_removed;
+ } else {
+ last_nonempty_token_pos = pos;
+ last_nonempty_token_kind = k;
+ }
+ }
+}
+
+void print_token_to_file(FILE* out, TokenArray* pp_tokens) {
+ for (size_t i = 0; i < pp_tokens->len; ++i) {
+ Token* tok = &pp_tokens->data[i];
+
+ if (tok->kind == TokenKind_whitespace) {
+ // TODO: preserve indent?
+ fprintf(out, " ");
+ } else if (tok->kind == TokenKind_removed) {
+ // Output nothing for removed tokens
+ } else if (tok->kind == TokenKind_newline) {
+ // TODO: remove adjacent newlines?
+ fprintf(out, "\n");
+ } else if (tok->kind != TokenKind_eof) {
+ // TODO: string literal
+ fprintf(out, "%s", token_stringify(tok));
+ // Add space after token if next token is not punctuation
+ // TODO: apply stricter approach
+ if (i + 1 < pp_tokens->len) {
+ Token* next = &pp_tokens->data[i + 1];
+ if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace &&
+ next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma &&
+ next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r &&
+ next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r &&
+ next->kind != TokenKind_dot) {
+ fprintf(out, " ");
+ }
+ }
+ }
+ }
+}
diff --git a/src/cc1/preprocess.h b/src/cc1/preprocess.h
new file mode 100644
index 0000000..8d86fe5
--- /dev/null
+++ b/src/cc1/preprocess.h
@@ -0,0 +1,13 @@
+#ifndef DUCC_PREPROCESS_H
+#define DUCC_PREPROCESS_H
+
+#include "../lib/common.h"
+#include "io.h"
+#include "token.h"
+
+TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files,
+ bool generate_system_deps, bool generate_user_deps);
+void concat_adjacent_string_literals(TokenArray* pp_tokens);
+void print_token_to_file(FILE* output_file, TokenArray* pp_tokens);
+
+#endif
diff --git a/src/cc1/sys.c b/src/cc1/sys.c
new file mode 100644
index 0000000..693062a
--- /dev/null
+++ b/src/cc1/sys.c
@@ -0,0 +1,21 @@
+#include "sys.h"
+#include <libgen.h>
+#include <linux/limits.h>
+#include <unistd.h>
+#include "../lib/ducc.h"
+
+static char* get_self_path() {
+ char* buf = calloc(PATH_MAX, sizeof(char));
+ ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1);
+ if (len == -1 || len == PATH_MAX - 1) {
+ return NULL;
+ }
+ buf[len] = '\0';
+ return buf;
+}
+
+// It returns a path not including final / except for root directory.
+char* get_self_dir() {
+ char* path = get_self_path();
+ return dirname(path);
+}
diff --git a/src/cc1/sys.h b/src/cc1/sys.h
new file mode 100644
index 0000000..2527724
--- /dev/null
+++ b/src/cc1/sys.h
@@ -0,0 +1,7 @@
+#ifndef DUCC_SYS_H
+#define DUCC_SYS_H
+
+// It returns a path not including final / except for root directory.
+char* get_self_dir();
+
+#endif
diff --git a/src/cc1/token.c b/src/cc1/token.c
new file mode 100644
index 0000000..480a9a5
--- /dev/null
+++ b/src/cc1/token.c
@@ -0,0 +1,387 @@
+#include "token.h"
+#include "../lib/common.h"
+#include "../lib/json.h"
+
+const char* token_kind_stringify(TokenKind k) {
+ if (k == TokenKind_eof)
+ return "<eof>";
+ else if (k == TokenKind_hash)
+ return "#";
+ else if (k == TokenKind_hashhash)
+ return "##";
+ else if (k == TokenKind_whitespace)
+ return "<whitespace>";
+ else if (k == TokenKind_removed)
+ return "<removed>";
+ else if (k == TokenKind_placemarker)
+ return "<placemarker>";
+ else if (k == TokenKind_newline)
+ return "<new-line>";
+ else if (k == TokenKind_other)
+ return "<other>";
+ else if (k == TokenKind_character_constant)
+ return "<character-constant>";
+ else if (k == TokenKind_header_name)
+ return "<header-name>";
+ else if (k == TokenKind_pp_directive_define)
+ return "#define";
+ else if (k == TokenKind_pp_directive_elif)
+ return "#elif";
+ else if (k == TokenKind_pp_directive_elifdef)
+ return "#elifdef";
+ else if (k == TokenKind_pp_directive_elifndef)
+ return "#elifndef";
+ else if (k == TokenKind_pp_directive_else)
+ return "#else";
+ else if (k == TokenKind_pp_directive_embed)
+ return "#embed";
+ else if (k == TokenKind_pp_directive_endif)
+ return "#endif";
+ else if (k == TokenKind_pp_directive_error)
+ return "#error";
+ else if (k == TokenKind_pp_directive_if)
+ return "#if";
+ else if (k == TokenKind_pp_directive_ifdef)
+ return "#ifdef";
+ else if (k == TokenKind_pp_directive_ifndef)
+ return "#ifndef";
+ else if (k == TokenKind_pp_directive_include)
+ return "#include";
+ else if (k == TokenKind_pp_directive_include_next)
+ return "#include_next";
+ else if (k == TokenKind_pp_directive_line)
+ return "#line";
+ else if (k == TokenKind_pp_directive_non_directive)
+ return "#<non-directive>";
+ else if (k == TokenKind_pp_directive_nop)
+ return "#";
+ else if (k == TokenKind_pp_directive_pragma)
+ return "#pragma";
+ else if (k == TokenKind_pp_directive_undef)
+ return "#undef";
+ else if (k == TokenKind_pp_directive_warning)
+ return "#warning";
+ else if (k == TokenKind_pp_operator_defined)
+ return "defined";
+ else if (k == TokenKind_pp_operator___has_c_attribute)
+ return "__has_c_attribute";
+ else if (k == TokenKind_pp_operator___has_embed)
+ return "__has_embed";
+ else if (k == TokenKind_pp_operator___has_include)
+ return "__has_include";
+ else if (k == TokenKind_keyword_alignas)
+ return "alignas";
+ else if (k == TokenKind_keyword_alignof)
+ return "alignof";
+ else if (k == TokenKind_keyword_auto)
+ return "auto";
+ else if (k == TokenKind_keyword_bool)
+ return "bool";
+ else if (k == TokenKind_keyword_break)
+ return "break";
+ else if (k == TokenKind_keyword_case)
+ return "case";
+ else if (k == TokenKind_keyword_char)
+ return "char";
+ else if (k == TokenKind_keyword_const)
+ return "const";
+ else if (k == TokenKind_keyword_constexpr)
+ return "constexpr";
+ else if (k == TokenKind_keyword_continue)
+ return "continue";
+ else if (k == TokenKind_keyword_default)
+ return "default";
+ else if (k == TokenKind_keyword_do)
+ return "do";
+ else if (k == TokenKind_keyword_double)
+ return "double";
+ else if (k == TokenKind_keyword_else)
+ return "else";
+ else if (k == TokenKind_keyword_enum)
+ return "enum";
+ else if (k == TokenKind_keyword_extern)
+ return "extern";
+ else if (k == TokenKind_keyword_false)
+ return "false";
+ else if (k == TokenKind_keyword_float)
+ return "float";
+ else if (k == TokenKind_keyword_for)
+ return "for";
+ else if (k == TokenKind_keyword_goto)
+ return "goto";
+ else if (k == TokenKind_keyword_if)
+ return "if";
+ else if (k == TokenKind_keyword_inline)
+ return "inline";
+ else if (k == TokenKind_keyword_int)
+ return "int";
+ else if (k == TokenKind_keyword_long)
+ return "long";
+ else if (k == TokenKind_keyword_nullptr)
+ return "nullptr";
+ else if (k == TokenKind_keyword_register)
+ return "register";
+ else if (k == TokenKind_keyword_restrict)
+ return "restrict";
+ else if (k == TokenKind_keyword_return)
+ return "return";
+ else if (k == TokenKind_keyword_short)
+ return "short";
+ else if (k == TokenKind_keyword_signed)
+ return "signed";
+ else if (k == TokenKind_keyword_sizeof)
+ return "sizeof";
+ else if (k == TokenKind_keyword_static)
+ return "static";
+ else if (k == TokenKind_keyword_static_assert)
+ return "static_assert";
+ else if (k == TokenKind_keyword_struct)
+ return "struct";
+ else if (k == TokenKind_keyword_switch)
+ return "switch";
+ else if (k == TokenKind_keyword_thread_local)
+ return "thread_local";
+ else if (k == TokenKind_keyword_true)
+ return "true";
+ else if (k == TokenKind_keyword_typedef)
+ return "typedef";
+ else if (k == TokenKind_keyword_typeof)
+ return "typeof";
+ else if (k == TokenKind_keyword_typeof_unqual)
+ return "typeof_unqual";
+ else if (k == TokenKind_keyword_union)
+ return "union";
+ else if (k == TokenKind_keyword_unsigned)
+ return "unsigned";
+ else if (k == TokenKind_keyword_void)
+ return "void";
+ else if (k == TokenKind_keyword_volatile)
+ return "volatile";
+ else if (k == TokenKind_keyword_while)
+ return "while";
+ else if (k == TokenKind_keyword__Atomic)
+ return "_Atomic";
+ else if (k == TokenKind_keyword__BitInt)
+ return "_BitInt";
+ else if (k == TokenKind_keyword__Complex)
+ return "_Complex";
+ else if (k == TokenKind_keyword__Decimal128)
+ return "_Decimal128";
+ else if (k == TokenKind_keyword__Decimal32)
+ return "_Decimal32";
+ else if (k == TokenKind_keyword__Decimal64)
+ return "_Decimal64";
+ else if (k == TokenKind_keyword__Generic)
+ return "_Generic";
+ else if (k == TokenKind_keyword__Imaginary)
+ return "_Imaginary";
+ else if (k == TokenKind_keyword__Noreturn)
+ return "_Noreturn";
+ else if (k == TokenKind_and)
+ return "&";
+ else if (k == TokenKind_andand)
+ return "&&";
+ else if (k == TokenKind_arrow)
+ return "->";
+ else if (k == TokenKind_assign)
+ return "=";
+ else if (k == TokenKind_assign_add)
+ return "+=";
+ else if (k == TokenKind_assign_and)
+ return "&=";
+ else if (k == TokenKind_assign_div)
+ return "/=";
+ else if (k == TokenKind_assign_lshift)
+ return "<<=";
+ else if (k == TokenKind_assign_mod)
+ return "%=";
+ else if (k == TokenKind_assign_mul)
+ return "*=";
+ else if (k == TokenKind_assign_or)
+ return "|=";
+ else if (k == TokenKind_assign_rshift)
+ return ">>=";
+ else if (k == TokenKind_assign_sub)
+ return "-=";
+ else if (k == TokenKind_assign_xor)
+ return "^=";
+ else if (k == TokenKind_brace_l)
+ return "{";
+ else if (k == TokenKind_brace_r)
+ return "}";
+ else if (k == TokenKind_bracket_l)
+ return "[";
+ else if (k == TokenKind_bracket_r)
+ return "]";
+ else if (k == TokenKind_colon)
+ return ":";
+ else if (k == TokenKind_comma)
+ return ",";
+ else if (k == TokenKind_dot)
+ return ".";
+ else if (k == TokenKind_ellipsis)
+ return "...";
+ else if (k == TokenKind_eq)
+ return "==";
+ else if (k == TokenKind_ge)
+ return ">=";
+ else if (k == TokenKind_gt)
+ return ">";
+ else if (k == TokenKind_ident)
+ return "<identifier>";
+ else if (k == TokenKind_le)
+ return "<=";
+ else if (k == TokenKind_literal_int)
+ return "<integer>";
+ else if (k == TokenKind_literal_double)
+ return "<double>";
+ else if (k == TokenKind_literal_str)
+ return "<string>";
+ else if (k == TokenKind_lshift)
+ return "<<";
+ else if (k == TokenKind_lt)
+ return "<";
+ else if (k == TokenKind_minus)
+ return "-";
+ else if (k == TokenKind_minusminus)
+ return "--";
+ else if (k == TokenKind_ne)
+ return "!=";
+ else if (k == TokenKind_not)
+ return "!";
+ else if (k == TokenKind_or)
+ return "|";
+ else if (k == TokenKind_oror)
+ return "||";
+ else if (k == TokenKind_paren_l)
+ return "(";
+ else if (k == TokenKind_paren_r)
+ return ")";
+ else if (k == TokenKind_percent)
+ return "%";
+ else if (k == TokenKind_plus)
+ return "+";
+ else if (k == TokenKind_plusplus)
+ return "++";
+ else if (k == TokenKind_question)
+ return "?";
+ else if (k == TokenKind_rshift)
+ return ">>";
+ else if (k == TokenKind_semicolon)
+ return ";";
+ else if (k == TokenKind_slash)
+ return "/";
+ else if (k == TokenKind_star)
+ return "*";
+ else if (k == TokenKind_tilde)
+ return "~";
+ else if (k == TokenKind_xor)
+ return "^";
+ else
+ unreachable();
+}
+
+bool is_pp_directive(TokenKind k) {
+ return k == TokenKind_pp_directive_define || k == TokenKind_pp_directive_elif ||
+ k == TokenKind_pp_directive_elifdef || k == TokenKind_pp_directive_elifndef ||
+ k == TokenKind_pp_directive_else || k == TokenKind_pp_directive_embed || k == TokenKind_pp_directive_endif ||
+ k == TokenKind_pp_directive_error || k == TokenKind_pp_directive_if || k == TokenKind_pp_directive_ifdef ||
+ k == TokenKind_pp_directive_ifndef || k == TokenKind_pp_directive_include ||
+ k == TokenKind_pp_directive_include_next || k == TokenKind_pp_directive_line ||
+ k == TokenKind_pp_directive_non_directive || k == TokenKind_pp_directive_nop ||
+ k == TokenKind_pp_directive_pragma || k == TokenKind_pp_directive_undef ||
+ k == TokenKind_pp_directive_warning;
+}
+
+const char* token_stringify(Token* tok) {
+ TokenKind k = tok->kind;
+ if (k == TokenKind_pp_directive_non_directive) {
+ char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char));
+ sprintf(buf, "#%s", tok->value.string);
+ return buf;
+ } else if (k == TokenKind_literal_int) {
+ char* buf = calloc(10, sizeof(char));
+ sprintf(buf, "%d", tok->value.integer);
+ return buf;
+ } else if (k == TokenKind_literal_double) {
+ char* buf = calloc(32, sizeof(char));
+ sprintf(buf, "%g", tok->value.floating);
+ return buf;
+ } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident ||
+ k == TokenKind_literal_str || k == TokenKind_header_name) {
+ return tok->value.string;
+ } else {
+ return token_kind_stringify(k);
+ }
+}
+
+void token_build_json(JsonBuilder* builder, Token* tok) {
+ jsonbuilder_object_start(builder);
+ jsonbuilder_object_member_start(builder, "kind");
+ jsonbuilder_string(builder, token_kind_stringify(tok->kind));
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "value");
+ if (tok->kind == TokenKind_pp_directive_non_directive) {
+ char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char));
+ sprintf(buf, "#%s", tok->value.string);
+ jsonbuilder_string(builder, buf);
+ } else if (tok->kind == TokenKind_literal_int) {
+ jsonbuilder_integer(builder, tok->value.integer);
+ } else if (tok->kind == TokenKind_other || tok->kind == TokenKind_character_constant ||
+ tok->kind == TokenKind_ident || tok->kind == TokenKind_literal_str) {
+ jsonbuilder_string(builder, tok->value.string);
+ } else {
+ jsonbuilder_null(builder);
+ }
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "loc");
+ sourcelocation_build_json(builder, &tok->loc);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_end(builder);
+}
+
+void tokens_init(TokenArray* tokens, size_t capacity) {
+ tokens->len = 0;
+ tokens->capacity = capacity;
+ tokens->data = calloc(tokens->capacity, sizeof(Token));
+}
+
+void tokens_reserve(TokenArray* tokens, size_t size) {
+ if (size <= tokens->capacity)
+ return;
+ while (tokens->capacity < size) {
+ tokens->capacity *= 2;
+ }
+ tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token));
+ memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token));
+}
+
+Token* tokens_push_new(TokenArray* tokens) {
+ tokens_reserve(tokens, tokens->len + 1);
+ return &tokens->data[tokens->len++];
+}
+
+Token* tokens_pop(TokenArray* tokens) {
+ if (tokens->len == 0) {
+ return NULL;
+ } else {
+ return &tokens->data[--tokens->len];
+ }
+}
+
+void tokens_build_json(JsonBuilder* builder, TokenArray* tokens) {
+ jsonbuilder_object_start(builder);
+ jsonbuilder_object_member_start(builder, "len");
+ jsonbuilder_integer(builder, tokens->len);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_member_start(builder, "data");
+ jsonbuilder_array_start(builder);
+ for (size_t i = 0; i < tokens->len; ++i) {
+ jsonbuilder_array_element_start(builder);
+ token_build_json(builder, &tokens->data[i]);
+ jsonbuilder_array_element_end(builder);
+ }
+ jsonbuilder_array_end(builder);
+ jsonbuilder_object_member_end(builder);
+ jsonbuilder_object_end(builder);
+}
diff --git a/src/cc1/token.h b/src/cc1/token.h
new file mode 100644
index 0000000..46490c1
--- /dev/null
+++ b/src/cc1/token.h
@@ -0,0 +1,183 @@
+#ifndef DUCC_TOKEN_H
+#define DUCC_TOKEN_H
+
+#include "../lib/ducc.h"
+#include "io.h"
+
+typedef enum {
+ TokenKind_eof,
+
+ // Only preprocessing phase.
+ TokenKind_hash,
+ TokenKind_hashhash,
+ TokenKind_whitespace,
+ TokenKind_removed,
+ TokenKind_placemarker,
+ TokenKind_newline,
+ TokenKind_other,
+ TokenKind_character_constant,
+ TokenKind_header_name,
+ TokenKind_pp_directive_define,
+ TokenKind_pp_directive_elif,
+ TokenKind_pp_directive_elifdef,
+ TokenKind_pp_directive_elifndef,
+ TokenKind_pp_directive_else,
+ TokenKind_pp_directive_embed,
+ TokenKind_pp_directive_endif,
+ TokenKind_pp_directive_error,
+ TokenKind_pp_directive_if,
+ TokenKind_pp_directive_ifdef,
+ TokenKind_pp_directive_ifndef,
+ TokenKind_pp_directive_include,
+ TokenKind_pp_directive_include_next, // GNU extension
+ TokenKind_pp_directive_line,
+ TokenKind_pp_directive_non_directive,
+ TokenKind_pp_directive_nop,
+ TokenKind_pp_directive_pragma,
+ TokenKind_pp_directive_undef,
+ TokenKind_pp_directive_warning,
+ TokenKind_pp_operator_defined,
+ TokenKind_pp_operator___has_c_attribute,
+ TokenKind_pp_operator___has_embed,
+ TokenKind_pp_operator___has_include,
+
+ // C23: 6.4.1
+ TokenKind_keyword_alignas,
+ TokenKind_keyword_alignof,
+ TokenKind_keyword_auto,
+ TokenKind_keyword_bool,
+ TokenKind_keyword_break,
+ TokenKind_keyword_case,
+ TokenKind_keyword_char,
+ TokenKind_keyword_const,
+ TokenKind_keyword_constexpr,
+ TokenKind_keyword_continue,
+ TokenKind_keyword_default,
+ TokenKind_keyword_do,
+ TokenKind_keyword_double,
+ TokenKind_keyword_else,
+ TokenKind_keyword_enum,
+ TokenKind_keyword_extern,
+ TokenKind_keyword_false,
+ TokenKind_keyword_float,
+ TokenKind_keyword_for,
+ TokenKind_keyword_goto,
+ TokenKind_keyword_if,
+ TokenKind_keyword_inline,
+ TokenKind_keyword_int,
+ TokenKind_keyword_long,
+ TokenKind_keyword_nullptr,
+ TokenKind_keyword_register,
+ TokenKind_keyword_restrict,
+ TokenKind_keyword_return,
+ TokenKind_keyword_short,
+ TokenKind_keyword_signed,
+ TokenKind_keyword_sizeof,
+ TokenKind_keyword_static,
+ TokenKind_keyword_static_assert,
+ TokenKind_keyword_struct,
+ TokenKind_keyword_switch,
+ TokenKind_keyword_thread_local,
+ TokenKind_keyword_true,
+ TokenKind_keyword_typedef,
+ TokenKind_keyword_typeof,
+ TokenKind_keyword_typeof_unqual,
+ TokenKind_keyword_union,
+ TokenKind_keyword_unsigned,
+ TokenKind_keyword_void,
+ TokenKind_keyword_volatile,
+ TokenKind_keyword_while,
+ TokenKind_keyword__Atomic,
+ TokenKind_keyword__BitInt,
+ TokenKind_keyword__Complex,
+ TokenKind_keyword__Decimal128,
+ TokenKind_keyword__Decimal32,
+ TokenKind_keyword__Decimal64,
+ TokenKind_keyword__Generic,
+ TokenKind_keyword__Imaginary,
+ TokenKind_keyword__Noreturn,
+
+ TokenKind_and,
+ TokenKind_andand,
+ TokenKind_arrow,
+ TokenKind_assign,
+ TokenKind_assign_add,
+ TokenKind_assign_and,
+ TokenKind_assign_div,
+ TokenKind_assign_lshift,
+ TokenKind_assign_mod,
+ TokenKind_assign_mul,
+ TokenKind_assign_or,
+ TokenKind_assign_rshift,
+ TokenKind_assign_sub,
+ TokenKind_assign_xor,
+ TokenKind_brace_l,
+ TokenKind_brace_r,
+ TokenKind_bracket_l,
+ TokenKind_bracket_r,
+ TokenKind_colon,
+ TokenKind_comma,
+ TokenKind_dot,
+ TokenKind_ellipsis,
+ TokenKind_eq,
+ TokenKind_ge,
+ TokenKind_gt,
+ TokenKind_ident,
+ TokenKind_le,
+ TokenKind_literal_int,
+ TokenKind_literal_double,
+ TokenKind_literal_str,
+ TokenKind_lshift,
+ TokenKind_lt,
+ TokenKind_minus,
+ TokenKind_minusminus,
+ TokenKind_ne,
+ TokenKind_not,
+ TokenKind_or,
+ TokenKind_oror,
+ TokenKind_paren_l,
+ TokenKind_paren_r,
+ TokenKind_percent,
+ TokenKind_plus,
+ TokenKind_plusplus,
+ TokenKind_question,
+ TokenKind_rshift,
+ TokenKind_semicolon,
+ TokenKind_slash,
+ TokenKind_star,
+ TokenKind_tilde,
+ TokenKind_xor,
+} TokenKind;
+
+const char* token_kind_stringify(TokenKind k);
+bool is_pp_directive(TokenKind k);
+
+// TokenValue is externally tagged by Token's kind.
+typedef union {
+ const char* string;
+ int integer;
+ double floating;
+} TokenValue;
+
+typedef struct {
+ TokenKind kind;
+ TokenValue value;
+ SourceLocation loc;
+} Token;
+
+const char* token_stringify(Token* tok);
+void token_build_json(JsonBuilder* builder, Token* tok);
+
+typedef struct {
+ size_t len;
+ size_t capacity;
+ Token* data;
+} TokenArray;
+
+void tokens_init(TokenArray* tokens, size_t capacity);
+void tokens_reserve(TokenArray* tokens, size_t size);
+Token* tokens_push_new(TokenArray* tokens);
+Token* tokens_pop(TokenArray* tokens);
+void tokens_build_json(JsonBuilder* builder, TokenArray* tokens);
+
+#endif
diff --git a/src/cc1/tokenize.c b/src/cc1/tokenize.c
new file mode 100644
index 0000000..78b1acb
--- /dev/null
+++ b/src/cc1/tokenize.c
@@ -0,0 +1,597 @@
+#include "tokenize.h"
+#include <ctype.h>
+#include "../lib/common.h"
+
+typedef struct {
+ InFile* src;
+ bool at_bol;
+ bool expect_header_name;
+ TokenArray* tokens;
+} Lexer;
+
+static Lexer* lexer_new(InFile* src) {
+ Lexer* l = calloc(1, sizeof(Lexer));
+
+ l->src = src;
+ l->at_bol = true;
+ l->expect_header_name = false;
+ l->tokens = calloc(1, sizeof(TokenArray));
+ tokens_init(l->tokens, 1024 * 16);
+
+ return l;
+}
+
+static void pplexer_tokenize_pp_directive(Lexer* l, Token* tok) {
+ // Skip whitespaces after '#'.
+ char c;
+ while (isspace((c = infile_peek_char(l->src)))) {
+ if (c == '\n')
+ break;
+ infile_next_char(l->src);
+ }
+ // '#' new-line
+ if (c == '\n') {
+ tok->kind = TokenKind_pp_directive_nop;
+ return;
+ }
+
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') {
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ infile_next_char(l->src);
+ }
+ const char* pp_directive_name = builder.buf;
+
+ if (builder.len == 0) {
+ tok->kind = TokenKind_hash;
+ } else if (strcmp(pp_directive_name, "define") == 0) {
+ tok->kind = TokenKind_pp_directive_define;
+ } else if (strcmp(pp_directive_name, "elif") == 0) {
+ tok->kind = TokenKind_pp_directive_elif;
+ } else if (strcmp(pp_directive_name, "elifdef") == 0) {
+ tok->kind = TokenKind_pp_directive_elifdef;
+ } else if (strcmp(pp_directive_name, "elifndef") == 0) {
+ tok->kind = TokenKind_pp_directive_elifndef;
+ } else if (strcmp(pp_directive_name, "else") == 0) {
+ tok->kind = TokenKind_pp_directive_else;
+ } else if (strcmp(pp_directive_name, "embed") == 0) {
+ tok->kind = TokenKind_pp_directive_embed;
+ } else if (strcmp(pp_directive_name, "endif") == 0) {
+ tok->kind = TokenKind_pp_directive_endif;
+ } else if (strcmp(pp_directive_name, "error") == 0) {
+ tok->kind = TokenKind_pp_directive_error;
+ } else if (strcmp(pp_directive_name, "if") == 0) {
+ tok->kind = TokenKind_pp_directive_if;
+ } else if (strcmp(pp_directive_name, "ifdef") == 0) {
+ tok->kind = TokenKind_pp_directive_ifdef;
+ } else if (strcmp(pp_directive_name, "ifndef") == 0) {
+ tok->kind = TokenKind_pp_directive_ifndef;
+ } else if (strcmp(pp_directive_name, "include") == 0) {
+ l->expect_header_name = true;
+ tok->kind = TokenKind_pp_directive_include;
+ } else if (strcmp(pp_directive_name, "include_next") == 0) {
+ l->expect_header_name = true;
+ tok->kind = TokenKind_pp_directive_include_next;
+ } else if (strcmp(pp_directive_name, "line") == 0) {
+ tok->kind = TokenKind_pp_directive_line;
+ } else if (strcmp(pp_directive_name, "pragma") == 0) {
+ tok->kind = TokenKind_pp_directive_pragma;
+ } else if (strcmp(pp_directive_name, "undef") == 0) {
+ tok->kind = TokenKind_pp_directive_undef;
+ } else if (strcmp(pp_directive_name, "warning") == 0) {
+ tok->kind = TokenKind_pp_directive_warning;
+ } else {
+ tok->kind = TokenKind_pp_directive_non_directive;
+ tok->value.string = pp_directive_name;
+ }
+}
+
+static void do_tokenize_all(Lexer* l) {
+ while (!infile_eof(l->src)) {
+ Token* tok = tokens_push_new(l->tokens);
+ tok->loc = l->src->loc;
+ char c = infile_peek_char(l->src);
+
+ if (l->expect_header_name && c == '"') {
+ infile_next_char(l->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '"');
+ while (1) {
+ char ch = infile_peek_char(l->src);
+ if (ch == '"')
+ break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(l->src);
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ }
+ infile_next_char(l->src);
+ }
+ strbuilder_append_char(&builder, '"');
+ infile_next_char(l->src);
+ tok->kind = TokenKind_header_name;
+ tok->value.string = builder.buf;
+ l->expect_header_name = false;
+ } else if (l->expect_header_name && c == '<') {
+ infile_next_char(l->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '<');
+ while (1) {
+ char ch = infile_peek_char(l->src);
+ if (ch == '>')
+ break;
+ strbuilder_append_char(&builder, ch);
+ infile_next_char(l->src);
+ }
+ strbuilder_append_char(&builder, '>');
+ infile_next_char(l->src);
+ tok->kind = TokenKind_header_name;
+ tok->value.string = builder.buf;
+ l->expect_header_name = false;
+ } else if (c == '(') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_paren_l;
+ } else if (c == ')') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_paren_r;
+ } else if (c == '{') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_brace_l;
+ } else if (c == '}') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_brace_r;
+ } else if (c == '[') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_bracket_l;
+ } else if (c == ']') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_bracket_r;
+ } else if (c == ',') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_comma;
+ } else if (c == ':') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_colon;
+ } else if (c == ';') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_semicolon;
+ } else if (c == '^') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_xor;
+ } else {
+ tok->kind = TokenKind_xor;
+ }
+ } else if (c == '?') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_question;
+ } else if (c == '~') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_tilde;
+ } else if (c == '+') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_add;
+ } else if (infile_consume_if(l->src, '+')) {
+ tok->kind = TokenKind_plusplus;
+ } else {
+ tok->kind = TokenKind_plus;
+ }
+ } else if (c == '|') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_or;
+ } else if (infile_consume_if(l->src, '|')) {
+ tok->kind = TokenKind_oror;
+ } else {
+ tok->kind = TokenKind_or;
+ }
+ } else if (c == '&') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_and;
+ } else if (infile_consume_if(l->src, '&')) {
+ tok->kind = TokenKind_andand;
+ } else {
+ tok->kind = TokenKind_and;
+ }
+ } else if (c == '-') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '>')) {
+ tok->kind = TokenKind_arrow;
+ } else if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_sub;
+ } else if (infile_consume_if(l->src, '-')) {
+ tok->kind = TokenKind_minusminus;
+ } else {
+ tok->kind = TokenKind_minus;
+ }
+ } else if (c == '*') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_mul;
+ } else {
+ tok->kind = TokenKind_star;
+ }
+ } else if (c == '/') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_div;
+ } else if (infile_consume_if(l->src, '/')) {
+ while (!infile_eof(l->src) && infile_peek_char(l->src) != '\n') {
+ infile_next_char(l->src);
+ }
+ tok->kind = TokenKind_whitespace;
+ } else if (infile_consume_if(l->src, '*')) {
+ while (infile_peek_char(l->src)) {
+ if (infile_consume_if(l->src, '*')) {
+ if (infile_consume_if(l->src, '/')) {
+ break;
+ }
+ continue;
+ }
+ infile_next_char(l->src);
+ }
+ tok->kind = TokenKind_whitespace;
+ } else {
+ tok->kind = TokenKind_slash;
+ }
+ } else if (c == '%') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_mod;
+ } else {
+ tok->kind = TokenKind_percent;
+ }
+ } else if (c == '.') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '.')) {
+ if (infile_consume_if(l->src, '.')) {
+ tok->kind = TokenKind_ellipsis;
+ } else {
+ tok->kind = TokenKind_other;
+ tok->value.string = "..";
+ }
+ } else {
+ tok->kind = TokenKind_dot;
+ }
+ } else if (c == '!') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_ne;
+ } else {
+ tok->kind = TokenKind_not;
+ }
+ } else if (c == '=') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_eq;
+ } else {
+ tok->kind = TokenKind_assign;
+ }
+ } else if (c == '<') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_le;
+ } else if (infile_consume_if(l->src, '<')) {
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_lshift;
+ } else {
+ tok->kind = TokenKind_lshift;
+ }
+ } else {
+ tok->kind = TokenKind_lt;
+ }
+ } else if (c == '>') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_ge;
+ } else if (infile_consume_if(l->src, '>')) {
+ if (infile_consume_if(l->src, '=')) {
+ tok->kind = TokenKind_assign_rshift;
+ } else {
+ tok->kind = TokenKind_rshift;
+ }
+ } else {
+ tok->kind = TokenKind_gt;
+ }
+ } else if (c == '#') {
+ infile_next_char(l->src);
+ if (infile_consume_if(l->src, '#')) {
+ tok->kind = TokenKind_hashhash;
+ } else {
+ if (l->at_bol) {
+ pplexer_tokenize_pp_directive(l, tok);
+ } else {
+ tok->kind = TokenKind_hash;
+ }
+ }
+ } else if (c == '\'') {
+ infile_next_char(l->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '\'');
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ if (infile_peek_char(l->src) == '\\') {
+ infile_next_char(l->src);
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ }
+ strbuilder_append_char(&builder, '\'');
+ infile_next_char(l->src);
+ infile_next_char(l->src);
+ tok->kind = TokenKind_character_constant;
+ tok->value.string = builder.buf;
+ } else if (c == '"') {
+ infile_next_char(l->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (1) {
+ char ch = infile_peek_char(l->src);
+ if (ch == '"')
+ break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(l->src);
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ }
+ infile_next_char(l->src);
+ }
+ infile_next_char(l->src);
+ tok->kind = TokenKind_literal_str;
+ tok->value.string = builder.buf;
+ } else if (isdigit(c)) {
+ // TODO: implement tokenization of pp-number.
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(l->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ infile_next_char(l->src);
+ }
+ if (infile_peek_char(l->src) == '.' && isdigit(infile_peek_char2(l->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ infile_next_char(l->src);
+ while (isdigit(infile_peek_char(l->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ infile_next_char(l->src);
+ }
+ tok->kind = TokenKind_literal_double;
+ tok->value.floating = strtod(builder.buf, NULL);
+ } else {
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = strtol(builder.buf, NULL, 0);
+ }
+ } else if (isalpha(c) || c == '_') {
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') {
+ strbuilder_append_char(&builder, infile_peek_char(l->src));
+ infile_next_char(l->src);
+ }
+ tok->kind = TokenKind_ident;
+ tok->value.string = builder.buf;
+ } else if (c == '\n') {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_newline;
+
+ // Reset expect_header_name at the end of line. It handles cases like:
+ //
+ // #ifdef ADDITIONAL_HEADER
+ // #include ADDITIONAL_HEADER
+ // #endif
+ //
+ // Even if ADDITIONAL_HEADER is undefined, this include directive line is tokenized. If the flag were not
+ // reset, the next occurrence of '<' or '"' would be recognized as part of a header name.
+ l->expect_header_name = false;
+ } else if (isspace(c)) {
+ while (isspace((c = infile_peek_char(l->src)))) {
+ if (c == '\n')
+ break;
+ infile_next_char(l->src);
+ }
+ if (l->at_bol && infile_peek_char(l->src) == '#') {
+ infile_next_char(l->src);
+ pplexer_tokenize_pp_directive(l, tok);
+ } else {
+ tok->kind = TokenKind_whitespace;
+ }
+ } else {
+ infile_next_char(l->src);
+ tok->kind = TokenKind_other;
+ char* buf = calloc(2, sizeof(char));
+ buf[0] = c;
+ tok->value.string = buf;
+ }
+ l->at_bol = tok->kind == TokenKind_newline;
+ }
+ Token* eof_tok = tokens_push_new(l->tokens);
+ eof_tok->loc = l->src->loc;
+ eof_tok->kind = TokenKind_eof;
+}
+
+TokenArray* tokenize(InFile* src) {
+ Lexer* l = lexer_new(src);
+ do_tokenize_all(l);
+ return l->tokens;
+}
+
+TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens) {
+ TokenArray* tokens = calloc(1, sizeof(TokenArray));
+ // tokens need not store whitespace tokens.
+ tokens_init(tokens, pp_tokens->len / 2);
+
+ for (size_t pos = 0; pos < pp_tokens->len; ++pos) {
+ Token* pp_tok = &pp_tokens->data[pos];
+ TokenKind k = pp_tok->kind;
+ if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) {
+ continue;
+ }
+ Token* tok = tokens_push_new(tokens);
+ tok->loc = pp_tok->loc;
+ if (k == TokenKind_character_constant) {
+ tok->kind = TokenKind_literal_int;
+ int ch = pp_tok->value.string[1];
+ if (ch == '\\') {
+ ch = pp_tok->value.string[2];
+ if (ch == 'a') {
+ ch = '\a';
+ } else if (ch == 'b') {
+ ch = '\b';
+ } else if (ch == 'f') {
+ ch = '\f';
+ } else if (ch == 'n') {
+ ch = '\n';
+ } else if (ch == 'r') {
+ ch = '\r';
+ } else if (ch == 't') {
+ ch = '\t';
+ } else if (ch == 'v') {
+ ch = '\v';
+ } else if (ch == '0') {
+ ch = '\0';
+ } else if (ch == 'e') {
+ // \e is not a part of Standard C, but commonly supported.
+ ch = 27;
+ }
+ }
+ tok->value.integer = ch;
+ } else if (k == TokenKind_literal_str) {
+ tok->kind = pp_tok->kind;
+
+ size_t len = strlen(pp_tok->value.string);
+ char* buf = calloc(len + 1, sizeof(char));
+ for (size_t i = 0, j = 0; i < len; i++, j++) {
+ if (pp_tok->value.string[i] == '\\' && pp_tok->value.string[i + 1] == 'e') {
+ // \e is not a part of Standard C, but commonly supported.
+ buf[j] = 033;
+ i++;
+ } else {
+ buf[j] = pp_tok->value.string[i];
+ }
+ }
+ tok->value.string = buf;
+ } else if (k == TokenKind_ident) {
+ if (strcmp(pp_tok->value.string, "alignas") == 0) {
+ tok->kind = TokenKind_keyword_alignas;
+ } else if (strcmp(pp_tok->value.string, "alignof") == 0) {
+ tok->kind = TokenKind_keyword_alignof;
+ } else if (strcmp(pp_tok->value.string, "auto") == 0) {
+ tok->kind = TokenKind_keyword_auto;
+ } else if (strcmp(pp_tok->value.string, "bool") == 0) {
+ tok->kind = TokenKind_keyword_bool;
+ } else if (strcmp(pp_tok->value.string, "break") == 0) {
+ tok->kind = TokenKind_keyword_break;
+ } else if (strcmp(pp_tok->value.string, "case") == 0) {
+ tok->kind = TokenKind_keyword_case;
+ } else if (strcmp(pp_tok->value.string, "char") == 0) {
+ tok->kind = TokenKind_keyword_char;
+ } else if (strcmp(pp_tok->value.string, "const") == 0) {
+ tok->kind = TokenKind_keyword_const;
+ } else if (strcmp(pp_tok->value.string, "constexpr") == 0) {
+ tok->kind = TokenKind_keyword_constexpr;
+ } else if (strcmp(pp_tok->value.string, "continue") == 0) {
+ tok->kind = TokenKind_keyword_continue;
+ } else if (strcmp(pp_tok->value.string, "default") == 0) {
+ tok->kind = TokenKind_keyword_default;
+ } else if (strcmp(pp_tok->value.string, "do") == 0) {
+ tok->kind = TokenKind_keyword_do;
+ } else if (strcmp(pp_tok->value.string, "double") == 0) {
+ tok->kind = TokenKind_keyword_double;
+ } else if (strcmp(pp_tok->value.string, "else") == 0) {
+ tok->kind = TokenKind_keyword_else;
+ } else if (strcmp(pp_tok->value.string, "enum") == 0) {
+ tok->kind = TokenKind_keyword_enum;
+ } else if (strcmp(pp_tok->value.string, "extern") == 0) {
+ tok->kind = TokenKind_keyword_extern;
+ } else if (strcmp(pp_tok->value.string, "false") == 0) {
+ tok->kind = TokenKind_keyword_false;
+ } else if (strcmp(pp_tok->value.string, "float") == 0) {
+ tok->kind = TokenKind_keyword_float;
+ } else if (strcmp(pp_tok->value.string, "for") == 0) {
+ tok->kind = TokenKind_keyword_for;
+ } else if (strcmp(pp_tok->value.string, "goto") == 0) {
+ tok->kind = TokenKind_keyword_goto;
+ } else if (strcmp(pp_tok->value.string, "if") == 0) {
+ tok->kind = TokenKind_keyword_if;
+ } else if (strcmp(pp_tok->value.string, "inline") == 0) {
+ tok->kind = TokenKind_keyword_inline;
+ } else if (strcmp(pp_tok->value.string, "int") == 0) {
+ tok->kind = TokenKind_keyword_int;
+ } else if (strcmp(pp_tok->value.string, "long") == 0) {
+ tok->kind = TokenKind_keyword_long;
+ } else if (strcmp(pp_tok->value.string, "nullptr") == 0) {
+ tok->kind = TokenKind_keyword_nullptr;
+ } else if (strcmp(pp_tok->value.string, "register") == 0) {
+ tok->kind = TokenKind_keyword_register;
+ } else if (strcmp(pp_tok->value.string, "restrict") == 0) {
+ tok->kind = TokenKind_keyword_restrict;
+ } else if (strcmp(pp_tok->value.string, "return") == 0) {
+ tok->kind = TokenKind_keyword_return;
+ } else if (strcmp(pp_tok->value.string, "short") == 0) {
+ tok->kind = TokenKind_keyword_short;
+ } else if (strcmp(pp_tok->value.string, "signed") == 0) {
+ tok->kind = TokenKind_keyword_signed;
+ } else if (strcmp(pp_tok->value.string, "sizeof") == 0) {
+ tok->kind = TokenKind_keyword_sizeof;
+ } else if (strcmp(pp_tok->value.string, "static") == 0) {
+ tok->kind = TokenKind_keyword_static;
+ } else if (strcmp(pp_tok->value.string, "static_assert") == 0) {
+ tok->kind = TokenKind_keyword_static_assert;
+ } else if (strcmp(pp_tok->value.string, "struct") == 0) {
+ tok->kind = TokenKind_keyword_struct;
+ } else if (strcmp(pp_tok->value.string, "switch") == 0) {
+ tok->kind = TokenKind_keyword_switch;
+ } else if (strcmp(pp_tok->value.string, "thread_local") == 0) {
+ tok->kind = TokenKind_keyword_thread_local;
+ } else if (strcmp(pp_tok->value.string, "true") == 0) {
+ tok->kind = TokenKind_keyword_true;
+ } else if (strcmp(pp_tok->value.string, "typedef") == 0) {
+ tok->kind = TokenKind_keyword_typedef;
+ } else if (strcmp(pp_tok->value.string, "typeof") == 0) {
+ tok->kind = TokenKind_keyword_typeof;
+ } else if (strcmp(pp_tok->value.string, "typeof_unqual") == 0) {
+ tok->kind = TokenKind_keyword_typeof_unqual;
+ } else if (strcmp(pp_tok->value.string, "union") == 0) {
+ tok->kind = TokenKind_keyword_union;
+ } else if (strcmp(pp_tok->value.string, "unsigned") == 0) {
+ tok->kind = TokenKind_keyword_unsigned;
+ } else if (strcmp(pp_tok->value.string, "void") == 0) {
+ tok->kind = TokenKind_keyword_void;
+ } else if (strcmp(pp_tok->value.string, "volatile") == 0) {
+ tok->kind = TokenKind_keyword_volatile;
+ } else if (strcmp(pp_tok->value.string, "while") == 0) {
+ tok->kind = TokenKind_keyword_while;
+ } else if (strcmp(pp_tok->value.string, "_Atomic") == 0) {
+ tok->kind = TokenKind_keyword__Atomic;
+ } else if (strcmp(pp_tok->value.string, "_BitInt") == 0) {
+ tok->kind = TokenKind_keyword__BitInt;
+ } else if (strcmp(pp_tok->value.string, "_Complex") == 0) {
+ tok->kind = TokenKind_keyword__Complex;
+ } else if (strcmp(pp_tok->value.string, "_Decimal128") == 0) {
+ tok->kind = TokenKind_keyword__Decimal128;
+ } else if (strcmp(pp_tok->value.string, "_Decimal32") == 0) {
+ tok->kind = TokenKind_keyword__Decimal32;
+ } else if (strcmp(pp_tok->value.string, "_Decimal64") == 0) {
+ tok->kind = TokenKind_keyword__Decimal64;
+ } else if (strcmp(pp_tok->value.string, "_Generic") == 0) {
+ tok->kind = TokenKind_keyword__Generic;
+ } else if (strcmp(pp_tok->value.string, "_Imaginary") == 0) {
+ tok->kind = TokenKind_keyword__Imaginary;
+ } else if (strcmp(pp_tok->value.string, "_Noreturn") == 0) {
+ tok->kind = TokenKind_keyword__Noreturn;
+ } else {
+ tok->kind = TokenKind_ident;
+ tok->value = pp_tok->value;
+ }
+ } else if (k == TokenKind_other) {
+ unreachable();
+ } else {
+ tok->kind = pp_tok->kind;
+ tok->value = pp_tok->value;
+ }
+ }
+
+ return tokens;
+}
diff --git a/src/cc1/tokenize.h b/src/cc1/tokenize.h
new file mode 100644
index 0000000..fd334a1
--- /dev/null
+++ b/src/cc1/tokenize.h
@@ -0,0 +1,10 @@
+#ifndef DUCC_TOKENIZE_H
+#define DUCC_TOKENIZE_H
+
+#include "io.h"
+#include "token.h"
+
+TokenArray* tokenize(InFile* src);
+TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens);
+
+#endif