aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-08-22 23:28:25 +0900
committernsfisis <nsfisis@gmail.com>2025-08-22 23:28:25 +0900
commit9c202a496e75903fe37e5c19cb97c98eba6e35f2 (patch)
tree52de494a4717a3c30c4bacb9dd9b91980be2a575 /src
parent0ac6ac95283735dd70ebf55b26ef78a4c32c31de (diff)
downloadducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.gz
ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.zst
ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.zip
chore: move *.c and *.h files to src/
Diffstat (limited to 'src')
-rw-r--r--src/ast.c475
-rw-r--r--src/codegen.c595
-rw-r--r--src/common.c39
-rw-r--r--src/io.c99
-rw-r--r--src/parse.c1404
-rw-r--r--src/preprocess.c1557
-rw-r--r--src/std.h50
-rw-r--r--src/sys.c15
-rw-r--r--src/tokenize.c175
9 files changed, 4409 insertions, 0 deletions
diff --git a/src/ast.c b/src/ast.c
new file mode 100644
index 0000000..66f7cc2
--- /dev/null
+++ b/src/ast.c
@@ -0,0 +1,475 @@
+enum TypeKind {
+ TypeKind_unknown,
+
+ TypeKind_char,
+ TypeKind_short,
+ TypeKind_int,
+ TypeKind_long,
+ TypeKind_void,
+ TypeKind_ptr,
+ TypeKind_array,
+ TypeKind_enum,
+ TypeKind_struct,
+ TypeKind_union,
+};
+typedef enum TypeKind TypeKind;
+
+const char* type_kind_stringify(TypeKind k) {
+ if (k == TypeKind_unknown)
+ return "<unknown>";
+ else if (k == TypeKind_char)
+ return "char";
+ else if (k == TypeKind_short)
+ return "short";
+ else if (k == TypeKind_int)
+ return "int";
+ else if (k == TypeKind_long)
+ return "long";
+ else if (k == TypeKind_void)
+ return "void";
+ else if (k == TypeKind_ptr)
+ return "<pointer>";
+ else if (k == TypeKind_array)
+ return "<array>";
+ else if (k == TypeKind_enum)
+ return "enum";
+ else if (k == TypeKind_struct)
+ return "struct";
+ else if (k == TypeKind_union)
+ return "union";
+ else
+ unreachable();
+}
+
+struct AstNode;
+
+struct Type {
+ TypeKind kind;
+ // Check `base` instead of `kind` to test if the type is an array or a pointer.
+ struct Type* base;
+ int array_size;
+ struct AstNode* def;
+};
+typedef struct Type Type;
+
+void type_dump(Type* ty) {
+ fprintf(stderr, "Type {\n");
+ fprintf(stderr, " kind = %s\n", type_kind_stringify(ty->kind));
+ fprintf(stderr, "}\n");
+}
+
+Type* type_new(TypeKind kind) {
+ Type* ty = calloc(1, sizeof(Type));
+ ty->kind = kind;
+ return ty;
+}
+
+Type* type_new_ptr(Type* base) {
+ Type* ty = calloc(1, sizeof(Type));
+ ty->kind = TypeKind_ptr;
+ ty->base = base;
+ return ty;
+}
+
+Type* type_new_array(Type* elem, int size) {
+ Type* ty = calloc(1, sizeof(Type));
+ ty->kind = TypeKind_array;
+ ty->base = elem;
+ ty->array_size = size;
+ return ty;
+}
+
+Type* type_new_static_string(int len) {
+ return type_new_array(type_new(TypeKind_char), len + 1);
+}
+
+Type* type_array_to_ptr(Type* ty) {
+ return type_new_ptr(ty->base);
+}
+
+BOOL type_is_unsized(Type* ty) {
+ return ty->kind == TypeKind_void;
+}
+
+int type_sizeof_struct(Type* ty);
+int type_sizeof_union(Type* ty);
+int type_alignof_struct(Type* ty);
+int type_alignof_union(Type* ty);
+int type_offsetof(Type* ty, const char* name);
+Type* type_member_typeof(Type* ty, const char* name);
+
+int type_sizeof(Type* ty) {
+ if (type_is_unsized(ty)) {
+ fatal_error("type_sizeof: type size cannot be determined");
+ }
+
+ if (ty->kind == TypeKind_ptr)
+ return 8;
+ else if (ty->kind == TypeKind_char)
+ return 1;
+ else if (ty->kind == TypeKind_short)
+ return 2;
+ else if (ty->kind == TypeKind_int)
+ return 4;
+ else if (ty->kind == TypeKind_long)
+ return 8;
+ else if (ty->kind == TypeKind_enum)
+ return 4;
+ else if (ty->kind == TypeKind_array)
+ return type_sizeof(ty->base) * ty->array_size;
+ else if (ty->kind == TypeKind_struct)
+ return type_sizeof_struct(ty);
+ else if (ty->kind == TypeKind_union)
+ return type_sizeof_union(ty);
+ else
+ unreachable();
+}
+
+int type_alignof(Type* ty) {
+ if (type_is_unsized(ty)) {
+ fatal_error("type_alignof: type size cannot be determined");
+ }
+
+ if (ty->kind == TypeKind_ptr)
+ return 8;
+ else if (ty->kind == TypeKind_char)
+ return 1;
+ else if (ty->kind == TypeKind_short)
+ return 2;
+ else if (ty->kind == TypeKind_int)
+ return 4;
+ else if (ty->kind == TypeKind_long)
+ return 8;
+ else if (ty->kind == TypeKind_enum)
+ return 4;
+ else if (ty->kind == TypeKind_array)
+ return type_alignof(ty->base);
+ else if (ty->kind == TypeKind_struct)
+ return type_alignof_struct(ty);
+ else if (ty->kind == TypeKind_union)
+ return type_alignof_union(ty);
+ else
+ unreachable();
+}
+
+int to_aligned(int n, int a) {
+ return (n + a - 1) / a * a;
+}
+
+enum AstNodeKind {
+ AstNodeKind_unknown,
+ AstNodeKind_nop,
+
+ AstNodeKind_assign_expr,
+ AstNodeKind_binary_expr,
+ AstNodeKind_break_stmt,
+ AstNodeKind_cond_expr,
+ AstNodeKind_continue_stmt,
+ AstNodeKind_deref_expr,
+ AstNodeKind_do_while_stmt,
+ AstNodeKind_enum_def,
+ AstNodeKind_enum_member,
+ AstNodeKind_expr_stmt,
+ AstNodeKind_for_stmt,
+ AstNodeKind_func_call,
+ AstNodeKind_func_decl,
+ AstNodeKind_func_def,
+ AstNodeKind_gvar,
+ AstNodeKind_gvar_decl,
+ AstNodeKind_if_stmt,
+ AstNodeKind_int_expr,
+ AstNodeKind_list,
+ AstNodeKind_logical_expr,
+ AstNodeKind_lvar,
+ AstNodeKind_lvar_decl,
+ AstNodeKind_param,
+ AstNodeKind_ref_expr,
+ AstNodeKind_return_stmt,
+ AstNodeKind_str_expr,
+ AstNodeKind_struct_decl,
+ AstNodeKind_struct_def,
+ AstNodeKind_struct_member,
+ AstNodeKind_type,
+ AstNodeKind_typedef_decl,
+ AstNodeKind_unary_expr,
+ AstNodeKind_union_decl,
+ AstNodeKind_union_def,
+ AstNodeKind_union_member,
+
+ // Intermediate ASTs: they are used only in parsing, not for parse result.
+ AstNodeKind_declarator,
+};
+typedef enum AstNodeKind AstNodeKind;
+
+#define node_items __n1
+#define node_len __i1
+#define node_cap __i2
+#define node_expr __n1
+#define node_lhs __n1
+#define node_rhs __n2
+#define node_operand __n1
+#define node_cond __n1
+#define node_init __n2
+#define node_update __n3
+#define node_then __n2
+#define node_else __n3
+#define node_body __n4
+#define node_members __n1
+#define node_params __n1
+#define node_args __n1
+#define node_int_value __i1
+#define node_idx __i1
+#define node_op __i1
+#define node_stack_offset __i1
+#define node_stack_size __i1
+
+struct AstNode {
+ AstNodeKind kind;
+ const char* name;
+ Type* ty;
+ struct AstNode* __n1;
+ struct AstNode* __n2;
+ struct AstNode* __n3;
+ struct AstNode* __n4;
+ int __i1;
+ int __i2;
+};
+typedef struct AstNode AstNode;
+
+struct Program {
+ AstNode* funcs;
+ AstNode* vars;
+ const char** str_literals;
+};
+typedef struct Program Program;
+
+AstNode* ast_new(AstNodeKind kind) {
+ AstNode* ast = calloc(1, sizeof(AstNode));
+ ast->kind = kind;
+ return ast;
+}
+
+AstNode* ast_new_list(int capacity) {
+ if (capacity == 0)
+ unreachable();
+ AstNode* list = ast_new(AstNodeKind_list);
+ list->node_cap = capacity;
+ list->node_len = 0;
+ list->node_items = calloc(list->node_cap, sizeof(AstNode));
+ return list;
+}
+
+void ast_append(AstNode* list, AstNode* item) {
+ if (list->kind != AstNodeKind_list) {
+ fatal_error("ast_append: ast is not a list");
+ }
+ if (!item) {
+ return;
+ }
+ if (list->node_cap <= list->node_len) {
+ list->node_cap *= 2;
+ list->node_items = realloc(list->node_items, sizeof(AstNode) * list->node_cap);
+ memset(list->node_items + list->node_len, 0, sizeof(AstNode) * (list->node_cap - list->node_len));
+ }
+ memcpy(list->node_items + list->node_len, item, sizeof(AstNode));
+ ++list->node_len;
+}
+
+AstNode* ast_new_int(int v) {
+ AstNode* e = ast_new(AstNodeKind_int_expr);
+ e->node_int_value = v;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_unary_expr(int op, AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_unary_expr);
+ e->node_op = op;
+ e->node_operand = operand;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_binary_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ if (op == TokenKind_plus) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ e->ty = lhs->ty;
+ } else if (lhs->ty->kind == TypeKind_array) {
+ e->ty = type_array_to_ptr(lhs->ty);
+ } else if (rhs->ty->kind == TypeKind_ptr) {
+ e->ty = rhs->ty;
+ } else if (rhs->ty->kind == TypeKind_array) {
+ e->ty = type_array_to_ptr(rhs->ty);
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ } else if (op == TokenKind_minus) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ e->ty = lhs->ty;
+ } else if (lhs->ty->kind == TypeKind_array) {
+ e->ty = type_array_to_ptr(lhs->ty);
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ return e;
+}
+
+AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_assign_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ e->ty = lhs->ty;
+ return e;
+}
+
+AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) {
+ if (lhs->ty->base) {
+ rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)));
+ } else if (rhs->ty->base) {
+ lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base)));
+ }
+ return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs);
+}
+
+AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) {
+ if (lhs->ty->base) {
+ rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)));
+ }
+ return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs);
+}
+
+AstNode* ast_new_ref_expr(AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_ref_expr);
+ e->node_operand = operand;
+ e->ty = type_new_ptr(operand->ty);
+ return e;
+}
+
+AstNode* ast_new_deref_expr(AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_deref_expr);
+ e->node_operand = operand;
+ e->ty = operand->ty->base;
+ return e;
+}
+
+AstNode* ast_new_member_access_expr(AstNode* obj, const char* name) {
+ AstNode* e = ast_new(AstNodeKind_deref_expr);
+ e->node_operand = ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->base, name)));
+ e->ty = type_member_typeof(obj->ty->base, name);
+ e->node_operand->ty = type_new_ptr(e->ty);
+ return e;
+}
+
+int type_sizeof_struct(Type* ty) {
+ int next_offset = 0;
+ int struct_align = 0;
+
+ for (int i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ next_offset = to_aligned(next_offset, align);
+ next_offset += size;
+ if (struct_align < align) {
+ struct_align = align;
+ }
+ }
+ return to_aligned(next_offset, struct_align);
+}
+
+int type_sizeof_union(Type* ty) {
+ int union_size = 0;
+ int union_align = 0;
+
+ for (int i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ size = to_aligned(size, align);
+ if (union_size < size) {
+ union_size = size;
+ }
+ if (union_align < align) {
+ union_align = align;
+ }
+ }
+ return to_aligned(union_size, union_align);
+}
+
+int type_alignof_struct(Type* ty) {
+ int struct_align = 0;
+
+ for (int i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int align = type_alignof(member->ty);
+
+ if (struct_align < align) {
+ struct_align = align;
+ }
+ }
+ return struct_align;
+}
+
+int type_alignof_union(Type* ty) {
+ int union_align = 0;
+
+ for (int i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int align = type_alignof(member->ty);
+
+ if (union_align < align) {
+ union_align = align;
+ }
+ }
+ return union_align;
+}
+
+int type_offsetof(Type* ty, const char* name) {
+ if (ty->kind == TypeKind_union) {
+ return 0;
+ }
+ if (ty->kind != TypeKind_struct) {
+ fatal_error("type_offsetof: type is neither a struct nor a union");
+ }
+
+ int next_offset = 0;
+
+ for (int i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ next_offset = to_aligned(next_offset, align);
+ if (strcmp(member->name, name) == 0) {
+ return next_offset;
+ }
+ next_offset += size;
+ }
+
+ fatal_error("type_offsetof: member not found");
+}
+
+Type* type_member_typeof(Type* ty, const char* name) {
+ if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) {
+ fatal_error("type_member_typeof: type is neither a struct nor a union");
+ }
+
+ for (int i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ if (strcmp(member->name, name) == 0) {
+ return member->ty;
+ }
+ }
+
+ fatal_error("type_offsetof: member not found");
+}
diff --git a/src/codegen.c b/src/codegen.c
new file mode 100644
index 0000000..a1167d4
--- /dev/null
+++ b/src/codegen.c
@@ -0,0 +1,595 @@
+enum GenMode {
+ GenMode_lval,
+ GenMode_rval,
+};
+typedef enum GenMode GenMode;
+
+struct CodeGen {
+ int next_label;
+ int* loop_labels;
+ AstNode* current_func;
+};
+typedef struct CodeGen CodeGen;
+
+CodeGen* codegen_new() {
+ CodeGen* g = calloc(1, sizeof(CodeGen));
+ g->next_label = 1;
+ g->loop_labels = calloc(1024, sizeof(int));
+ return g;
+}
+
+int codegen_new_label(CodeGen* g) {
+ int new_label = g->next_label;
+ ++g->next_label;
+ return new_label;
+}
+
+void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode);
+void codegen_stmt(CodeGen* g, AstNode* ast);
+
+const char* param_reg(int n) {
+ if (n == 0) {
+ return "rdi";
+ } else if (n == 1) {
+ return "rsi";
+ } else if (n == 2) {
+ return "rdx";
+ } else if (n == 3) {
+ return "rcx";
+ } else if (n == 4) {
+ return "r8";
+ } else if (n == 5) {
+ return "r9";
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_func_prologue(CodeGen* g, AstNode* ast) {
+ printf(" push rbp\n");
+ printf(" mov rbp, rsp\n");
+ for (int i = 0; i < ast->node_params->node_len; ++i) {
+ printf(" push %s\n", param_reg(i));
+ }
+ printf(" sub rsp, %d\n", ast->node_stack_size);
+}
+
+void codegen_func_epilogue(CodeGen* g, AstNode* ast) {
+ printf(" mov rsp, rbp\n");
+ printf(" pop rbp\n");
+ printf(" ret\n");
+}
+
+void codegen_int_expr(CodeGen* g, AstNode* ast) {
+ printf(" push %d\n", ast->node_int_value);
+}
+
+void codegen_str_expr(CodeGen* g, AstNode* ast) {
+ printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx);
+ printf(" push rax\n");
+}
+
+void codegen_unary_expr(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_operand, GenMode_rval);
+ if (ast->node_op == TokenKind_not) {
+ printf(" pop rax\n");
+ printf(" mov rdi, 0\n");
+ printf(" cmp rax, rdi\n");
+ printf(" sete al\n");
+ printf(" movzb rax, al\n");
+ printf(" push rax\n");
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_operand, GenMode_lval);
+}
+
+// "reg" stores the address of the expression to be pushed.
+void codegen_push_expr(const char* reg, int size) {
+ if (size == 1) {
+ printf(" movsx %s, BYTE PTR [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else if (size == 2) {
+ printf(" movsx %s, WORD PTR [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else if (size == 4) {
+ printf(" movsxd %s, DWORD PTR [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else if (size == 8) {
+ printf(" mov %s, [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else {
+ // Perform bitwise copy. Use r10 register as temporary space.
+ // Note: rsp must be aligned to 8.
+ printf(" sub rsp, %d\n", to_aligned(size, 8));
+ for (int i = 0; i < size; ++i) {
+ // Copy a sinle byte from the address that "reg" points to to the stack via r10 register.
+ printf(" mov r10b, [%s+%d]\n", reg, i);
+ printf(" mov [rsp+%d], r10b\n", i);
+ }
+ }
+}
+
+void codegen_lval2rval(Type* ty) {
+ if (ty->kind == TypeKind_array) {
+ return;
+ }
+
+ printf(" pop rax\n");
+ codegen_push_expr("rax", type_sizeof(ty));
+}
+
+void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_operand, GenMode_rval);
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->node_operand->ty->base);
+ }
+}
+
+void codegen_logical_expr(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+
+ if (ast->node_op == TokenKind_andand) {
+ codegen_expr(g, ast->node_lhs, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ printf(" push 0\n");
+ printf(".Lend%d:\n", label);
+ } else {
+ codegen_expr(g, ast->node_lhs, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ printf(" push 1\n");
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ printf(".Lend%d:\n", label);
+ }
+}
+
+void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_lhs, gen_mode);
+ codegen_expr(g, ast->node_rhs, gen_mode);
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ if (ast->node_op == TokenKind_plus) {
+ printf(" add rax, rdi\n");
+ } else if (ast->node_op == TokenKind_minus) {
+ printf(" sub rax, rdi\n");
+ } else if (ast->node_op == TokenKind_star) {
+ printf(" imul rax, rdi\n");
+ } else if (ast->node_op == TokenKind_slash) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ } else if (ast->node_op == TokenKind_percent) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ printf(" mov rax, rdx\n");
+ } else if (ast->node_op == TokenKind_or) {
+ printf(" or rax, rdi\n");
+ } else if (ast->node_op == TokenKind_lshift) {
+ printf(" mov rcx, rdi\n");
+ printf(" shl rax, cl\n");
+ } else if (ast->node_op == TokenKind_rshift) {
+ // TODO: check if the operand is signed or unsigned
+ printf(" mov rcx, rdi\n");
+ printf(" sar rax, cl\n");
+ } else if (ast->node_op == TokenKind_eq) {
+ printf(" cmp rax, rdi\n");
+ printf(" sete al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_ne) {
+ printf(" cmp rax, rdi\n");
+ printf(" setne al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_lt) {
+ printf(" cmp rax, rdi\n");
+ printf(" setl al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_le) {
+ printf(" cmp rax, rdi\n");
+ printf(" setle al\n");
+ printf(" movzb rax, al\n");
+ } else {
+ unreachable();
+ }
+ printf(" push rax\n");
+}
+
+void codegen_cond_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_expr(g, ast->node_then, gen_mode);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ codegen_expr(g, ast->node_else, gen_mode);
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_assign_expr_helper(CodeGen* g, AstNode* ast) {
+ if (ast->node_op == TokenKind_assign) {
+ return;
+ }
+
+ printf(" pop rdi\n");
+ printf(" push [rsp]\n");
+ codegen_lval2rval(ast->node_lhs->ty);
+ printf(" pop rax\n");
+
+ if (ast->node_op == TokenKind_assign_add) {
+ printf(" add rax, rdi\n");
+ } else if (ast->node_op == TokenKind_assign_sub) {
+ printf(" sub rax, rdi\n");
+ } else if (ast->node_op == TokenKind_assign_mul) {
+ printf(" imul rax, rdi\n");
+ } else if (ast->node_op == TokenKind_assign_div) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ } else if (ast->node_op == TokenKind_assign_mod) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ printf(" mov rax, rdx\n");
+ } else {
+ unreachable();
+ }
+
+ printf(" push rax\n");
+}
+
+void codegen_assign_expr(CodeGen* g, AstNode* ast) {
+ int sizeof_lhs = type_sizeof(ast->node_lhs->ty);
+ int sizeof_rhs = type_sizeof(ast->node_rhs->ty);
+
+ codegen_expr(g, ast->node_lhs, GenMode_lval);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ codegen_assign_expr_helper(g, ast);
+ if (sizeof_lhs == 1) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov BYTE PTR [rax], dil\n");
+ printf(" push rdi\n");
+ } else if (sizeof_lhs == 2) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov WORD PTR [rax], di\n");
+ printf(" push rdi\n");
+ } else if (sizeof_lhs == 4) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov DWORD PTR [rax], edi\n");
+ printf(" push rdi\n");
+ } else if (sizeof_lhs == 8) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov [rax], rdi\n");
+ printf(" push rdi\n");
+ } else {
+ if (ast->node_op != TokenKind_assign) {
+ unimplemented();
+ }
+ // Note: rsp must be aligned to 8.
+ int aligned_size = to_aligned(sizeof_lhs, 8);
+ printf(" mov rax, [rsp+%d]\n", aligned_size);
+ // Perform bitwise copy. Use r10 register as temporary space.
+ for (int i = 0; i < aligned_size; ++i) {
+ // Copy a sinle byte from the stack to the address that rax points to via r10 register.
+ printf(" mov r10b, [rsp+%d]\n", i);
+ printf(" mov [rax+%d], r10b\n", i);
+ }
+ // Pop the RHS value and the LHS address.
+ printf(" add rsp, %d\n", aligned_size + 8);
+ // TODO: dummy
+ printf(" push 0\n");
+ }
+}
+
+void codegen_func_call(CodeGen* g, AstNode* ast) {
+ const char* func_name = ast->name;
+
+ if (strcmp(func_name, "__ducc_va_start") == 0) {
+ int stack_size = g->current_func->node_stack_size;
+ printf(" # __ducc_va_start BEGIN\n");
+ for (int i = 0; i < 6; ++i) {
+ printf(" mov rax, %s\n", param_reg(i));
+ printf(" mov [rbp-%d], rax\n", 8 + (stack_size - 4 - i) * 8);
+ }
+ AstNode* va_list_args = ast->node_args->node_items;
+ codegen_expr(g, va_list_args, GenMode_lval);
+ printf(" pop rdi\n");
+ printf(" mov rax, rbp\n");
+ printf(" sub rax, %d\n", 8 + (stack_size - 1) * 8);
+ printf(" mov [rdi], rax\n");
+ printf(" mov DWORD PTR [rax], 8\n");
+ printf(" mov DWORD PTR [rax+4], 0\n");
+ printf(" mov QWORD PTR [rax+8], 0\n");
+ printf(" mov rdi, rbp\n");
+ printf(" sub rdi, %d\n", 8 + (stack_size - 4) * 8);
+ printf(" mov QWORD PTR [rax+16], rdi\n");
+ printf(" # __ducc_va_start END\n");
+ return;
+ }
+
+ AstNode* args = ast->node_args;
+ for (int i = 0; i < args->node_len; ++i) {
+ AstNode* arg = args->node_items + i;
+ codegen_expr(g, arg, GenMode_rval);
+ }
+ for (int i = args->node_len - 1; i >= 0; --i) {
+ printf(" pop %s\n", param_reg(i));
+ }
+
+ int label = codegen_new_label(g);
+
+ printf(" mov rax, rsp\n");
+ printf(" and rax, 15\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Laligned%d\n", label);
+
+ printf(" mov rax, 0\n");
+ printf(" sub rsp, 8\n");
+ printf(" call %s\n", func_name);
+ printf(" add rsp, 8\n");
+ printf(" push rax\n");
+
+ printf(" jmp .Lend%d\n", label);
+ printf(".Laligned%d:\n", label);
+
+ printf(" mov rax, 0\n");
+ printf(" call %s\n", func_name);
+ printf(" push rax\n");
+
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ printf(" mov rax, rbp\n");
+ printf(" sub rax, %d\n", ast->node_stack_offset);
+ printf(" push rax\n");
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->ty);
+ }
+}
+
+void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ printf(" lea rax, %s[rip]\n", ast->name);
+ printf(" push rax\n");
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->ty);
+ }
+}
+
+void codegen_composite_expr(CodeGen* g, AstNode* ast) {
+ // Standard C does not have composite expression, but ducc internally has.
+ for (int i = 0; i < ast->node_len; ++i) {
+ AstNode* expr = ast->node_items + i;
+ codegen_expr(g, expr, GenMode_rval);
+ if (i != ast->node_len - 1) {
+ // TODO: the expression on the stack can be more than 8 bytes.
+ printf(" pop rax\n");
+ }
+ }
+}
+
+void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ if (ast->kind == AstNodeKind_int_expr) {
+ codegen_int_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_str_expr) {
+ codegen_str_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_unary_expr) {
+ codegen_unary_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_ref_expr) {
+ codegen_ref_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_deref_expr) {
+ codegen_deref_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_binary_expr) {
+ codegen_binary_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_cond_expr) {
+ codegen_cond_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_logical_expr) {
+ codegen_logical_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_assign_expr) {
+ codegen_assign_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_func_call) {
+ codegen_func_call(g, ast);
+ } else if (ast->kind == AstNodeKind_lvar) {
+ codegen_lvar(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_gvar) {
+ codegen_gvar(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_list) {
+ codegen_composite_expr(g, ast);
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_return_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->node_expr) {
+ codegen_expr(g, ast->node_expr, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ codegen_func_epilogue(g, ast);
+}
+
+void codegen_if_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_stmt(g, ast->node_then);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ if (ast->node_else) {
+ codegen_stmt(g, ast->node_else);
+ }
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_for_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ if (ast->node_init) {
+ codegen_expr(g, ast->node_init, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ printf(".Lbegin%d:\n", label);
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lend%d\n", label);
+ codegen_stmt(g, ast->node_body);
+ printf(".Lcontinue%d:\n", label);
+ if (ast->node_update) {
+ codegen_expr(g, ast->node_update, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ printf(" jmp .Lbegin%d\n", label);
+ printf(".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+void codegen_do_while_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ printf(".Lbegin%d:\n", label);
+ codegen_stmt(g, ast->node_body);
+ printf(".Lcontinue%d:\n", label);
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lend%d\n", label);
+ printf(" jmp .Lbegin%d\n", label);
+ printf(".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+void codegen_break_stmt(CodeGen* g, AstNode* ast) {
+ int label = *g->loop_labels;
+ printf(" jmp .Lend%d\n", label);
+}
+
+void codegen_continue_stmt(CodeGen* g, AstNode* ast) {
+ int label = *g->loop_labels;
+ printf(" jmp .Lcontinue%d\n", label);
+}
+
+void codegen_expr_stmt(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_expr, GenMode_rval);
+ // TODO: the expression on the stack can be more than 8 bytes.
+ printf(" pop rax\n");
+}
+
+void codegen_var_decl(CodeGen* g, AstNode* ast) {
+}
+
+void codegen_nop(CodeGen* g, AstNode* ast) {
+}
+
+void codegen_block_stmt(CodeGen* g, AstNode* ast) {
+ for (int i = 0; i < ast->node_len; ++i) {
+ AstNode* stmt = ast->node_items + i;
+ codegen_stmt(g, stmt);
+ }
+}
+
+void codegen_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->kind == AstNodeKind_list) {
+ codegen_block_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_return_stmt) {
+ codegen_return_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_if_stmt) {
+ codegen_if_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_for_stmt) {
+ codegen_for_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_do_while_stmt) {
+ codegen_do_while_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_break_stmt) {
+ codegen_break_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_continue_stmt) {
+ codegen_continue_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_expr_stmt) {
+ codegen_expr_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_lvar_decl) {
+ codegen_var_decl(g, ast);
+ } else if (ast->kind == AstNodeKind_nop) {
+ codegen_nop(g, ast);
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_func(CodeGen* g, AstNode* ast) {
+ g->current_func = ast;
+ printf("%s:\n", ast->name);
+
+ codegen_func_prologue(g, ast);
+ codegen_stmt(g, ast->node_body);
+ if (strcmp(ast->name, "main") == 0) {
+ // C99: 5.1.2.2.3
+ printf(" mov rax, 0\n");
+ }
+ codegen_func_epilogue(g, ast);
+
+ printf("\n");
+ g->current_func = NULL;
+}
+
+void codegen(Program* prog) {
+ CodeGen* g = codegen_new();
+
+ printf(".intel_syntax noprefix\n\n");
+
+ // For GNU ld:
+ // https://sourceware.org/binutils/docs/ld/Options.html
+ printf(".section .note.GNU-stack,\"\",@progbits\n\n");
+
+ printf(".section .rodata\n\n");
+ for (int i = 0; prog->str_literals[i]; ++i) {
+ printf(".Lstr__%d:\n", i + 1);
+ printf(" .string \"%s\"\n\n", prog->str_literals[i]);
+ }
+
+ printf(".data\n\n");
+ for (int i = 0; i < prog->vars->node_len; ++i) {
+ AstNode* var = prog->vars->node_items + i;
+ if (var->node_expr) {
+ if (var->ty->kind == TypeKind_char)
+ printf(" %s: .byte %d\n", var->name, var->node_expr->node_int_value);
+ else if (var->ty->kind == TypeKind_short)
+ printf(" %s: .word %d\n", var->name, var->node_expr->node_int_value);
+ else if (var->ty->kind == TypeKind_int)
+ printf(" %s: .int %d\n", var->name, var->node_expr->node_int_value);
+ else
+ unimplemented();
+ } else {
+ printf(" %s: .zero %d\n", var->name, type_sizeof(var->ty));
+ }
+ }
+
+ printf(".globl main\n\n");
+
+ printf(".text\n\n");
+ for (int i = 0; i < prog->funcs->node_len; ++i) {
+ AstNode* func = prog->funcs->node_items + i;
+ codegen_func(g, func);
+ }
+}
diff --git a/src/common.c b/src/common.c
new file mode 100644
index 0000000..1b89f8c
--- /dev/null
+++ b/src/common.c
@@ -0,0 +1,39 @@
+void fatal_error(const char* msg, ...) {
+ va_list args;
+ va_start(args, msg);
+ vfprintf(stderr, msg, args);
+ va_end(args);
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__)
+
+#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__)
+
+struct StrBuilder {
+ size_t len;
+ size_t capacity;
+ char* buf;
+};
+typedef struct StrBuilder StrBuilder;
+
+void strbuilder_init(StrBuilder* b) {
+ b->len = 0;
+ b->capacity = 16;
+ b->buf = calloc(b->capacity, sizeof(char));
+}
+
+// `size` must include a trailing null byte.
+void strbuilder_reserve(StrBuilder* b, size_t size) {
+ if (size <= b->capacity)
+ return;
+ b->capacity *= 2;
+ b->buf = realloc(b->buf, b->capacity * sizeof(char));
+ memset(b->buf + b->len, 0, (b->capacity - b->len) * sizeof(char));
+}
+
+void strbuilder_append_char(StrBuilder* b, int c) {
+ strbuilder_reserve(b, b->len + 1 + 1);
+ b->buf[b->len++] = c;
+}
diff --git a/src/io.c b/src/io.c
new file mode 100644
index 0000000..5cd5e86
--- /dev/null
+++ b/src/io.c
@@ -0,0 +1,99 @@
+struct SourceLocation {
+ const char* filename;
+ int line;
+};
+typedef struct SourceLocation SourceLocation;
+
+struct InFile {
+ const char* buf;
+ int pos;
+ SourceLocation loc;
+};
+typedef struct InFile InFile;
+
+InFile* infile_open(const char* filename) {
+ FILE* in;
+ if (strcmp(filename, "-") == 0) {
+ in = stdin;
+ } else {
+ in = fopen(filename, "rb");
+ }
+ if (!in) {
+ return NULL;
+ }
+
+ size_t buf_size = 1024 * 10;
+ char* buf = calloc(buf_size, sizeof(char));
+ char* cur = buf;
+ char* tmp = calloc(1024, sizeof(char));
+
+ while (fgets(tmp, 1024, in)) {
+ size_t len = strlen(tmp);
+ size_t used_size = cur - buf;
+
+ if (buf_size <= used_size + len) {
+ size_t old_size = buf_size;
+ buf_size *= 2;
+ buf = realloc(buf, buf_size);
+ memset(buf + old_size, 0, buf_size - old_size);
+ cur = buf + used_size;
+ }
+
+ memcpy(cur, tmp, len);
+ cur += len;
+ }
+ fclose(in);
+
+ InFile* in_file = calloc(1, sizeof(InFile));
+ in_file->buf = buf;
+ in_file->loc.filename = filename;
+ in_file->loc.line = 1;
+ return in_file;
+}
+
+BOOL infile_eof(InFile* f) {
+ return f->buf[f->pos] == '\0';
+}
+
+char infile_peek_char(InFile* f) {
+ char c = f->buf[f->pos];
+
+ // Skip a pair of backslash and new-line.
+ if (c == '\\') {
+ char c2 = f->buf[f->pos + 1];
+ // C23: 5.1.1.2
+ // A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by
+ // a backslash character before any such splicing takes place.
+ if (c2 == '\0') {
+ fatal_error("%s:%d: <new-line> expected, but got <eof>", f->loc.filename, f->loc.line);
+ }
+ // TODO: crlf
+ if (c2 == '\r' || c2 == '\n') {
+ f->pos += 2;
+ return infile_peek_char(f);
+ }
+ }
+
+ // Normalize new-line.
+ // TODO: crlf
+ if (c == '\r')
+ c = '\n';
+ return c;
+}
+
+char infile_next_char(InFile* f) {
+ char c = infile_peek_char(f);
+ ++f->pos;
+ if (c == '\n')
+ ++f->loc.line;
+ return c;
+}
+
+BOOL infile_consume_if(InFile* f, char expected) {
+ if (infile_peek_char(f) == expected) {
+ infile_next_char(f);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
diff --git a/src/parse.c b/src/parse.c
new file mode 100644
index 0000000..8f06a83
--- /dev/null
+++ b/src/parse.c
@@ -0,0 +1,1404 @@
+#define LVAR_MAX 32
+
+struct LocalVar {
+ const char* name;
+ Type* ty;
+ int stack_offset;
+};
+typedef struct LocalVar LocalVar;
+
+struct Scope {
+ struct Scope* outer;
+ const char** lvar_names;
+ int* lvar_indices;
+};
+typedef struct Scope Scope;
+
+struct GlobalVar {
+ const char* name;
+ Type* ty;
+};
+typedef struct GlobalVar GlobalVar;
+
+struct Func {
+ const char* name;
+ Type* ty;
+};
+typedef struct Func Func;
+
+struct Parser {
+ TokenArray* tokens;
+ int pos;
+ LocalVar* lvars;
+ int n_lvars;
+ Scope* scope;
+ GlobalVar* gvars;
+ int n_gvars;
+ Func* funcs;
+ int n_funcs;
+ AstNode* structs;
+ int n_structs;
+ AstNode* unions;
+ int n_unions;
+ AstNode* enums;
+ int n_enums;
+ AstNode* typedefs;
+ int n_typedefs;
+ const char** str_literals;
+ int n_str_literals;
+};
+typedef struct Parser Parser;
+
+Parser* parser_new(TokenArray* tokens) {
+ Parser* p = calloc(1, sizeof(Parser));
+ p->tokens = tokens;
+ p->gvars = calloc(128, sizeof(GlobalVar));
+ p->funcs = calloc(256, sizeof(Func));
+ p->structs = calloc(64, sizeof(AstNode));
+ p->unions = calloc(64, sizeof(AstNode));
+ p->enums = calloc(16, sizeof(AstNode));
+ p->typedefs = calloc(64, sizeof(AstNode));
+ p->str_literals = calloc(1024, sizeof(char*));
+
+ p->funcs[p->n_funcs].name = "__ducc_va_start";
+ p->funcs[p->n_funcs].ty = calloc(1, sizeof(Type));
+ p->funcs[p->n_funcs].ty->kind = TypeKind_void;
+ ++p->n_funcs;
+
+ return p;
+}
+
+Token* peek_token(Parser* p) {
+ return &p->tokens->data[p->pos];
+}
+
+Token* next_token(Parser* p) {
+ return &p->tokens->data[p->pos++];
+}
+
+Token* consume_token_if(Parser* p, TokenKind expected) {
+ if (peek_token(p)->kind == expected) {
+ return next_token(p);
+ } else {
+ return NULL;
+ }
+}
+
+BOOL eof(Parser* p) {
+ return peek_token(p)->kind != TokenKind_eof;
+}
+
+Token* expect(Parser* p, TokenKind expected) {
+ Token* t = next_token(p);
+ if (t->kind == expected) {
+ return t;
+ }
+ fatal_error("%s:%d: expected '%s', but got '%s'", t->loc.filename, t->loc.line, token_kind_stringify(expected),
+ token_stringify(t));
+}
+
+int find_lvar_in_scope(Parser* p, Scope* scope, const char* name) {
+ for (int i = 0; i < LVAR_MAX; ++i) {
+ if (scope->lvar_names[i] && strcmp(scope->lvar_names[i], name) == 0) {
+ return scope->lvar_indices[i];
+ }
+ }
+ return -1;
+}
+
+int find_lvar_in_current_scope(Parser* p, const char* name) {
+ return find_lvar_in_scope(p, p->scope, name);
+}
+
+int find_lvar(Parser* p, const char* name) {
+ Scope* scope = p->scope;
+ while (scope) {
+ int idx = find_lvar_in_scope(p, scope, name);
+ if (idx != -1)
+ return idx;
+ scope = scope->outer;
+ }
+ return -1;
+}
+
+int calc_stack_offset(Parser* p, Type* ty, BOOL is_param) {
+ int align;
+ if (is_param) {
+ if (8 < type_sizeof(ty) || 8 < type_alignof(ty)) {
+ fatal_error("too large");
+ }
+ align = 8;
+ } else {
+ align = type_alignof(ty);
+ }
+
+ int offset;
+ if (p->n_lvars == 0) {
+ offset = 0;
+ } else {
+ offset = p->lvars[p->n_lvars - 1].stack_offset;
+ }
+
+ offset += type_sizeof(ty);
+ return to_aligned(offset, align);
+}
+
+int add_lvar(Parser* p, const char* name, Type* ty, BOOL is_param) {
+ int stack_offset = calc_stack_offset(p, ty, is_param);
+ p->lvars[p->n_lvars].name = name;
+ p->lvars[p->n_lvars].ty = ty;
+ p->lvars[p->n_lvars].stack_offset = stack_offset;
+ for (int i = 0; i < LVAR_MAX; ++i) {
+ if (p->scope->lvar_names[i] == NULL) {
+ p->scope->lvar_names[i] = name;
+ p->scope->lvar_indices[i] = p->n_lvars;
+ break;
+ }
+ }
+ ++p->n_lvars;
+ return stack_offset;
+}
+
+AstNode* generate_temporary_lvar(Parser* p, Type* ty) {
+ int stack_offset = add_lvar(p, NULL, ty, FALSE);
+ AstNode* lvar = ast_new(AstNodeKind_lvar);
+ lvar->name = NULL;
+ lvar->node_stack_offset = stack_offset;
+ lvar->ty = ty;
+ return lvar;
+}
+
+int find_gvar(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_gvars; ++i) {
+ if (strcmp(p->gvars[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_func(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_funcs; ++i) {
+ if (strcmp(p->funcs[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_struct(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_structs; ++i) {
+ if (strcmp(p->structs[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_union(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_unions; ++i) {
+ if (strcmp(p->unions[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_enum(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_enums; ++i) {
+ if (strcmp(p->enums[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_enum_member(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_enums; ++i) {
+ for (int j = 0; j < p->enums[i].node_members->node_len; ++j) {
+ if (strcmp(p->enums[i].node_members->node_items[j].name, name) == 0) {
+ return i * 1000 + j;
+ }
+ }
+ }
+ return -1;
+}
+
+int find_typedef(Parser* p, const char* name) {
+ for (int i = 0; i < p->n_typedefs; ++i) {
+ if (strcmp(p->typedefs[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+void enter_scope(Parser* p) {
+ Scope* outer_scope = p->scope;
+ p->scope = calloc(1, sizeof(Scope));
+ p->scope->outer = outer_scope;
+ p->scope->lvar_names = calloc(LVAR_MAX, sizeof(char*));
+ p->scope->lvar_indices = calloc(LVAR_MAX, sizeof(int));
+}
+
+void leave_scope(Parser* p) {
+ p->scope = p->scope->outer;
+}
+
+void enter_func(Parser* p) {
+ p->lvars = calloc(LVAR_MAX, sizeof(LocalVar));
+ p->n_lvars = 0;
+ enter_scope(p);
+}
+
+void leave_func(Parser* p) {
+ leave_scope(p);
+}
+
+AstNode* parse_assignment_expr(Parser* p);
+AstNode* parse_expr(Parser* p);
+AstNode* parse_stmt(Parser* p);
+
+const char* parse_ident(Parser* p) {
+ return expect(p, TokenKind_ident)->value.string;
+}
+
+int register_str_literal(Parser* p, const char* s) {
+ p->str_literals[p->n_str_literals] = s;
+ ++p->n_str_literals;
+ return p->n_str_literals;
+}
+
+AstNode* parse_primary_expr(Parser* p) {
+ Token* t = next_token(p);
+ if (t->kind == TokenKind_literal_int) {
+ return ast_new_int(t->value.integer);
+ } else if (t->kind == TokenKind_literal_str) {
+ AstNode* e = ast_new(AstNodeKind_str_expr);
+ e->node_idx = register_str_literal(p, t->value.string);
+ e->ty = type_new_static_string(strlen(t->value.string));
+ return e;
+ } else if (t->kind == TokenKind_paren_l) {
+ AstNode* e = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ return e;
+ } else if (t->kind == TokenKind_ident) {
+ const char* name = t->value.string;
+
+ if (peek_token(p)->kind == TokenKind_paren_l) {
+ AstNode* e = ast_new(AstNodeKind_func_call);
+ int func_idx = find_func(p, name);
+ if (func_idx == -1) {
+ fatal_error("undefined function: %s", name);
+ }
+ e->name = name;
+ e->ty = p->funcs[func_idx].ty;
+ return e;
+ }
+
+ int lvar_idx = find_lvar(p, name);
+ if (lvar_idx == -1) {
+ int gvar_idx = find_gvar(p, name);
+ if (gvar_idx == -1) {
+ int enum_member_idx = find_enum_member(p, name);
+ if (enum_member_idx == -1) {
+ fatal_error("undefined variable: %s", name);
+ }
+ int enum_idx = enum_member_idx / 1000;
+ int n = enum_member_idx % 1000;
+ AstNode* e = ast_new_int(p->enums[enum_idx].node_members->node_items[n].node_int_value);
+ e->ty = type_new(TypeKind_enum);
+ e->ty->def = p->enums + enum_idx;
+ return e;
+ }
+ AstNode* e = ast_new(AstNodeKind_gvar);
+ e->name = name;
+ e->ty = p->gvars[gvar_idx].ty;
+ return e;
+ }
+
+ AstNode* e = ast_new(AstNodeKind_lvar);
+ e->name = name;
+ e->node_stack_offset = p->lvars[lvar_idx].stack_offset;
+ e->ty = p->lvars[lvar_idx].ty;
+ return e;
+ } else {
+ fatal_error("%s:%d: expected an expression, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t));
+ }
+}
+
+AstNode* parse_arg_list(Parser* p) {
+ AstNode* list = ast_new_list(6);
+ while (peek_token(p)->kind != TokenKind_paren_r) {
+ AstNode* arg = parse_assignment_expr(p);
+ ast_append(list, arg);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ if (list->node_len > 6) {
+ fatal_error("too many arguments");
+ }
+ return list;
+}
+
+// e++
+// tmp1 = &e; tmp2 = *tmp1; *tmp1 += 1; tmp2
+// e--
+// tmp1 = &e; tmp2 = *tmp1; *tmp1 -= 1; tmp2
+AstNode* create_new_postfix_inc_or_dec(Parser* p, AstNode* e, TokenKind op) {
+ AstNode* tmp1_lvar = generate_temporary_lvar(p, type_new_ptr(e->ty));
+ AstNode* tmp2_lvar = generate_temporary_lvar(p, e->ty);
+
+ AstNode* expr1 = ast_new_assign_expr(TokenKind_assign, tmp1_lvar, ast_new_ref_expr(e));
+ AstNode* expr2 = ast_new_assign_expr(TokenKind_assign, tmp2_lvar, ast_new_deref_expr(tmp1_lvar));
+ AstNode* expr3;
+ if (op == TokenKind_plusplus) {
+ expr3 = ast_new_assign_add_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1));
+ } else {
+ expr3 = ast_new_assign_sub_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1));
+ }
+ AstNode* expr4 = tmp2_lvar;
+
+ AstNode* ret = ast_new_list(4);
+ ast_append(ret, expr1);
+ ast_append(ret, expr2);
+ ast_append(ret, expr3);
+ ast_append(ret, expr4);
+ ret->ty = expr4->ty;
+ return ret;
+}
+
+AstNode* parse_postfix_expr(Parser* p) {
+ AstNode* ret = parse_primary_expr(p);
+ while (1) {
+ TokenKind tk = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_paren_l)) {
+ AstNode* args = parse_arg_list(p);
+ expect(p, TokenKind_paren_r);
+ ret->node_args = args;
+ } else if (consume_token_if(p, TokenKind_bracket_l)) {
+ AstNode* idx = parse_expr(p);
+ expect(p, TokenKind_bracket_r);
+ idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->base)));
+ ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx));
+ } else if (consume_token_if(p, TokenKind_dot)) {
+ const char* name = parse_ident(p);
+ ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name);
+ } else if (consume_token_if(p, TokenKind_arrow)) {
+ const char* name = parse_ident(p);
+ ret = ast_new_member_access_expr(ret, name);
+ } else if (consume_token_if(p, TokenKind_plusplus)) {
+ ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_plusplus);
+ } else if (consume_token_if(p, TokenKind_minusminus)) {
+ ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_minusminus);
+ } else {
+ break;
+ }
+ }
+ return ret;
+}
+
+BOOL is_type_token(Parser* p, Token* token) {
+ if (token->kind == TokenKind_keyword_int || token->kind == TokenKind_keyword_short ||
+ token->kind == TokenKind_keyword_long || token->kind == TokenKind_keyword_char ||
+ token->kind == TokenKind_keyword_void || token->kind == TokenKind_keyword_enum ||
+ token->kind == TokenKind_keyword_struct || token->kind == TokenKind_keyword_union ||
+ token->kind == TokenKind_keyword_const) {
+ return TRUE;
+ }
+ if (token->kind != TokenKind_ident) {
+ return FALSE;
+ }
+ return find_typedef(p, token->value.string) != -1;
+}
+
+Type* parse_type(Parser* p) {
+ Token* t = next_token(p);
+ if (t->kind == TokenKind_keyword_const) {
+ t = next_token(p);
+ }
+ if (!is_type_token(p, t)) {
+ fatal_error("%s:%d: parse_type: expected type, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t));
+ }
+ Type* ty;
+ if (t->kind == TokenKind_ident) {
+ int typedef_idx = find_typedef(p, t->value.string);
+ if (typedef_idx == -1) {
+ fatal_error("parse_type: unknown typedef, %s", t->value.string);
+ }
+ ty = p->typedefs[typedef_idx].ty;
+ } else {
+ ty = type_new(TypeKind_unknown);
+ if (t->kind == TokenKind_keyword_char) {
+ ty->kind = TypeKind_char;
+ } else if (t->kind == TokenKind_keyword_short) {
+ ty->kind = TypeKind_short;
+ } else if (t->kind == TokenKind_keyword_int) {
+ ty->kind = TypeKind_int;
+ } else if (t->kind == TokenKind_keyword_long) {
+ ty->kind = TypeKind_long;
+ } else if (t->kind == TokenKind_keyword_void) {
+ ty->kind = TypeKind_void;
+ } else if (t->kind == TokenKind_keyword_enum) {
+ ty->kind = TypeKind_enum;
+ const char* name = parse_ident(p);
+ int enum_idx = find_enum(p, name);
+ if (enum_idx == -1) {
+ fatal_error("parse_type: unknown enum, %s", name);
+ }
+ ty->def = p->enums + enum_idx;
+ } else if (t->kind == TokenKind_keyword_struct) {
+ ty->kind = TypeKind_struct;
+ const char* name = parse_ident(p);
+ int struct_idx = find_struct(p, name);
+ if (struct_idx == -1) {
+ fatal_error("parse_type: unknown struct, %s", name);
+ }
+ ty->def = p->structs + struct_idx;
+ } else if (t->kind == TokenKind_keyword_union) {
+ ty->kind = TypeKind_union;
+ const char* name = parse_ident(p);
+ int union_idx = find_union(p, name);
+ if (union_idx == -1) {
+ fatal_error("parse_type: unknown union, %s", name);
+ }
+ ty->def = p->unions + union_idx;
+ } else {
+ unreachable();
+ }
+ }
+ while (1) {
+ if (!consume_token_if(p, TokenKind_star)) {
+ break;
+ }
+ ty = type_new_ptr(ty);
+ }
+ return ty;
+}
+
+AstNode* parse_prefix_expr(Parser* p) {
+ TokenKind op = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_minus)) {
+ AstNode* operand = parse_prefix_expr(p);
+ return ast_new_binary_expr(op, ast_new_int(0), operand);
+ } else if (consume_token_if(p, TokenKind_not)) {
+ AstNode* operand = parse_prefix_expr(p);
+ return ast_new_unary_expr(op, operand);
+ } else if (consume_token_if(p, TokenKind_and)) {
+ AstNode* operand = parse_prefix_expr(p);
+ return ast_new_ref_expr(operand);
+ } else if (consume_token_if(p, TokenKind_star)) {
+ AstNode* operand = parse_prefix_expr(p);
+ return ast_new_deref_expr(operand);
+ } else if (consume_token_if(p, TokenKind_plusplus)) {
+ AstNode* operand = parse_prefix_expr(p);
+ return ast_new_assign_add_expr(operand, ast_new_int(1));
+ } else if (consume_token_if(p, TokenKind_minusminus)) {
+ AstNode* operand = parse_prefix_expr(p);
+ return ast_new_assign_sub_expr(operand, ast_new_int(1));
+ } else if (consume_token_if(p, TokenKind_keyword_sizeof)) {
+ expect(p, TokenKind_paren_l);
+ Token* next_tok = peek_token(p);
+ Type* ty = NULL;
+ if (next_tok->kind == TokenKind_ident) {
+ int lvar_idx = find_lvar(p, next_tok->value.string);
+ if (lvar_idx != -1) {
+ next_token(p);
+ ty = p->lvars[lvar_idx].ty;
+ }
+ int gvar_idx = find_gvar(p, next_tok->value.string);
+ if (gvar_idx != -1) {
+ next_token(p);
+ ty = p->gvars[gvar_idx].ty;
+ }
+ }
+ if (!ty) {
+ ty = parse_type(p);
+ }
+ expect(p, TokenKind_paren_r);
+ return ast_new_int(type_sizeof(ty));
+ }
+ return parse_postfix_expr(p);
+}
+
+AstNode* parse_multiplicative_expr(Parser* p) {
+ AstNode* lhs = parse_prefix_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) {
+ next_token(p);
+ AstNode* rhs = parse_prefix_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_additive_expr(Parser* p) {
+ AstNode* lhs = parse_multiplicative_expr(p);
+ while (1) {
+ if (consume_token_if(p, TokenKind_plus)) {
+ AstNode* rhs = parse_multiplicative_expr(p);
+ if (lhs->ty->base) {
+ lhs = ast_new_binary_expr(
+ TokenKind_plus, lhs,
+ ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))));
+ } else if (rhs->ty->base) {
+ lhs = ast_new_binary_expr(
+ TokenKind_plus, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))),
+ rhs);
+ } else {
+ lhs = ast_new_binary_expr(TokenKind_plus, lhs, rhs);
+ }
+ } else if (consume_token_if(p, TokenKind_minus)) {
+ AstNode* rhs = parse_multiplicative_expr(p);
+ if (lhs->ty->base) {
+ if (rhs->ty->base) {
+ // (a - b) / sizeof(a)
+ lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(TokenKind_minus, lhs, rhs),
+ ast_new_int(type_sizeof(lhs->ty->base)));
+ } else {
+ // a - b*sizeof(a)
+ lhs = ast_new_binary_expr(
+ TokenKind_minus, lhs,
+ ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))));
+ }
+ } else {
+ lhs = ast_new_binary_expr(TokenKind_minus, lhs, rhs);
+ }
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_shift_expr(Parser* p) {
+ AstNode* lhs = parse_additive_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_lshift || op == TokenKind_rshift) {
+ next_token(p);
+ AstNode* rhs = parse_additive_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_relational_expr(Parser* p) {
+ AstNode* lhs = parse_shift_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_lt || op == TokenKind_le) {
+ next_token(p);
+ AstNode* rhs = parse_shift_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else if (consume_token_if(p, TokenKind_gt)) {
+ AstNode* rhs = parse_shift_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs);
+ } else if (consume_token_if(p, TokenKind_ge)) {
+ AstNode* rhs = parse_shift_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_equality_expr(Parser* p) {
+ AstNode* lhs = parse_relational_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_eq || op == TokenKind_ne) {
+ next_token(p);
+ AstNode* rhs = parse_relational_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_bitwise_or_expr(Parser* p) {
+ AstNode* lhs = parse_equality_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_or)) {
+ AstNode* rhs = parse_equality_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ lhs->ty = type_new(TypeKind_int);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_logical_and_expr(Parser* p) {
+ AstNode* lhs = parse_bitwise_or_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_andand)) {
+ AstNode* rhs = parse_bitwise_or_expr(p);
+ AstNode* e = ast_new(AstNodeKind_logical_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ e->ty = type_new(TypeKind_int);
+ lhs = e;
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_logical_or_expr(Parser* p) {
+ AstNode* lhs = parse_logical_and_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_oror)) {
+ AstNode* rhs = parse_logical_and_expr(p);
+ AstNode* e = ast_new(AstNodeKind_logical_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ e->ty = type_new(TypeKind_int);
+ lhs = e;
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_conditional_expr(Parser* p) {
+ AstNode* e = parse_logical_or_expr(p);
+ if (consume_token_if(p, TokenKind_question)) {
+ AstNode* then_expr = parse_expr(p);
+ expect(p, TokenKind_colon);
+ AstNode* else_expr = parse_assignment_expr(p);
+ AstNode* ret = ast_new(AstNodeKind_cond_expr);
+ ret->node_cond = e;
+ ret->node_then = then_expr;
+ ret->node_else = else_expr;
+ ret->ty = then_expr->ty;
+ return ret;
+ } else {
+ return e;
+ }
+}
+
+// constant-expression:
+// conditional-expression
+AstNode* parse_constant_expression(Parser* p) {
+ return parse_conditional_expr(p);
+}
+
+AstNode* parse_assignment_expr(Parser* p) {
+ AstNode* lhs = parse_conditional_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ // TODO: check if the lhs is unary expression.
+ if (op == TokenKind_assign || op == TokenKind_assign_mul || op == TokenKind_assign_div ||
+ op == TokenKind_assign_mod) {
+ next_token(p);
+ AstNode* rhs = parse_assignment_expr(p);
+ lhs = ast_new_assign_expr(op, lhs, rhs);
+ } else if (consume_token_if(p, TokenKind_assign_add)) {
+ AstNode* rhs = parse_assignment_expr(p);
+ lhs = ast_new_assign_add_expr(lhs, rhs);
+ } else if (consume_token_if(p, TokenKind_assign_sub)) {
+ AstNode* rhs = parse_assignment_expr(p);
+ lhs = ast_new_assign_sub_expr(lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_comma_expr(Parser* p) {
+ AstNode* lhs = parse_assignment_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (consume_token_if(p, TokenKind_comma)) {
+ AstNode* rhs = parse_assignment_expr(p);
+ AstNode* list = ast_new_list(2);
+ ast_append(list, lhs);
+ ast_append(list, rhs);
+ list->ty = rhs->ty;
+ lhs = list;
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_expr(Parser* p) {
+ return parse_comma_expr(p);
+}
+
+AstNode* parse_return_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_return);
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ return ast_new(AstNodeKind_return_stmt);
+ }
+
+ AstNode* expr = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+
+ AstNode* ret = ast_new(AstNodeKind_return_stmt);
+ ret->node_expr = expr;
+ return ret;
+}
+
+AstNode* parse_if_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_if);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ AstNode* then_body = parse_stmt(p);
+ AstNode* else_body = NULL;
+ if (consume_token_if(p, TokenKind_keyword_else)) {
+ else_body = parse_stmt(p);
+ }
+
+ AstNode* stmt = ast_new(AstNodeKind_if_stmt);
+ stmt->node_cond = cond;
+ stmt->node_then = then_body;
+ stmt->node_else = else_body;
+ return stmt;
+}
+
+// pointer:
+// '*' TODO attribute-specifier-sequence_opt TODO type-qualifier-list_opt
+// '*' TODO attribute-specifier-sequence_opt TODO type-qualifier-list_opt pointer
+Type* parse_pointer_opt(Parser* p, Type* ty) {
+ while (peek_token(p)->kind == TokenKind_star) {
+ next_token(p);
+ ty = type_new_ptr(ty);
+ }
+ return ty;
+}
+
+// array-declarator:
+// direct-declarator '[' TODO type-qualifier-list_opt TODO assignment-expression_opt ']'
+// TODO direct-declarator '[' 'static' type-qualifier-list_opt assignment-expression_opt ']'
+// TODO direct-declarator '[' type-qualifier-list 'static' assignment-expression_opt ']'
+// TODO direct-declarator '[' type-qualifier-list_opt '*' ']'
+Type* parse_array_declarator_suffix(Parser* p, Type* ty) {
+ next_token(p); // skip '['
+
+ AstNode* size_expr = parse_expr(p);
+ if (size_expr->kind != AstNodeKind_int_expr) {
+ fatal_error("parse_var_decl: invalid array size");
+ }
+ int size = size_expr->node_int_value;
+ expect(p, TokenKind_bracket_r);
+
+ return type_new_array(ty, size);
+}
+
+// direct-declarator:
+// identifier TODO attribute-specifier-sequence_opt
+// TODO '(' declarator ')'
+// array-declarator TODO attribute-specifier-sequence_opt
+// TODO function-declarator TODO attribute-specifier-sequence_opt
+AstNode* parse_direct_declarator(Parser* p, Type* ty) {
+ const char* name = parse_ident(p);
+ while (1) {
+ if (peek_token(p)->kind == TokenKind_bracket_l) {
+ ty = parse_array_declarator_suffix(p, ty);
+ } else {
+ break;
+ }
+ }
+
+ AstNode* ret = ast_new(AstNodeKind_declarator);
+ ret->name = name;
+ ret->ty = ty;
+ return ret;
+}
+
+// declarator:
+// pointer_opt direct-declarator
+AstNode* parse_declarator(Parser* p, Type* ty) {
+ ty = parse_pointer_opt(p, ty);
+ return parse_direct_declarator(p, ty);
+}
+
+// initializer:
+// assignment-expression
+// TODO braced-initializer
+AstNode* parse_initializer(Parser* p) {
+ return parse_assignment_expr(p);
+}
+
+// init-declarator:
+// declarator
+// declarator '=' initializer
+AstNode* parse_init_declarator(Parser* p, Type* ty) {
+ AstNode* decl = parse_declarator(p, ty);
+ AstNode* init = NULL;
+ if (consume_token_if(p, TokenKind_assign)) {
+ init = parse_initializer(p);
+ }
+ decl->node_init = init;
+ return decl;
+}
+
+// init-declarator-list:
+// init-declarator
+// init-declarator-list ',' init-declarator
+AstNode* parse_init_declarator_list(Parser* p, Type* ty) {
+ AstNode* list = ast_new_list(1);
+ while (1) {
+ AstNode* d = parse_init_declarator(p, ty);
+ ast_append(list, d);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ return list;
+}
+
+AstNode* parse_var_decl(Parser* p) {
+ Type* base_ty = parse_type(p);
+ if (type_is_unsized(base_ty)) {
+ fatal_error("parse_var_decl: invalid type for variable");
+ }
+
+ AstNode* decls = parse_init_declarator_list(p, base_ty);
+ expect(p, TokenKind_semicolon);
+
+ for (int i = 0; i < decls->node_len; ++i) {
+ AstNode* decl = &decls->node_items[i];
+
+ if (find_lvar_in_current_scope(p, decl->name) != -1) {
+ // TODO: use name's location.
+ fatal_error("%s:%d: '%s' redeclared", peek_token(p)->loc.filename, peek_token(p)->loc.line, decl->name);
+ }
+ int stack_offset = add_lvar(p, decl->name, decl->ty, FALSE);
+
+ if (decl->node_init) {
+ AstNode* lhs = ast_new(AstNodeKind_lvar);
+ lhs->name = decl->name;
+ lhs->node_stack_offset = stack_offset;
+ lhs->ty = decl->ty;
+ AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, decl->node_init);
+ decl->kind = AstNodeKind_expr_stmt;
+ decl->node_expr = assign;
+ } else {
+ decl->kind = AstNodeKind_nop;
+ }
+ }
+ return decls;
+}
+
+AstNode* parse_for_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_for);
+ expect(p, TokenKind_paren_l);
+ AstNode* init = NULL;
+ AstNode* cond = NULL;
+ AstNode* update = NULL;
+ enter_scope(p);
+ if (peek_token(p)->kind != TokenKind_semicolon) {
+ if (is_type_token(p, peek_token(p))) {
+ init = parse_var_decl(p)->node_items[0].node_expr;
+ } else {
+ init = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+ }
+ } else {
+ expect(p, TokenKind_semicolon);
+ }
+ if (peek_token(p)->kind != TokenKind_semicolon) {
+ cond = parse_expr(p);
+ } else {
+ cond = ast_new_int(1);
+ }
+ expect(p, TokenKind_semicolon);
+ if (peek_token(p)->kind != TokenKind_paren_r) {
+ update = parse_expr(p);
+ }
+ expect(p, TokenKind_paren_r);
+ AstNode* body = parse_stmt(p);
+ leave_scope(p);
+
+ AstNode* stmt = ast_new(AstNodeKind_for_stmt);
+ stmt->node_cond = cond;
+ stmt->node_init = init;
+ stmt->node_update = update;
+ stmt->node_body = body;
+ return stmt;
+}
+
+AstNode* parse_while_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_while);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ AstNode* body = parse_stmt(p);
+
+ AstNode* stmt = ast_new(AstNodeKind_for_stmt);
+ stmt->node_cond = cond;
+ stmt->node_body = body;
+ return stmt;
+}
+
+AstNode* parse_do_while_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_do);
+ AstNode* body = parse_stmt(p);
+ expect(p, TokenKind_keyword_while);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ expect(p, TokenKind_semicolon);
+
+ AstNode* stmt = ast_new(AstNodeKind_do_while_stmt);
+ stmt->node_cond = cond;
+ stmt->node_body = body;
+ return stmt;
+}
+
+AstNode* parse_break_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_break);
+ expect(p, TokenKind_semicolon);
+ return ast_new(AstNodeKind_break_stmt);
+}
+
+AstNode* parse_continue_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_continue);
+ expect(p, TokenKind_semicolon);
+ return ast_new(AstNodeKind_continue_stmt);
+}
+
+AstNode* parse_expr_stmt(Parser* p) {
+ AstNode* e = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* stmt = ast_new(AstNodeKind_expr_stmt);
+ stmt->node_expr = e;
+ return stmt;
+}
+
+AstNode* parse_block_stmt(Parser* p) {
+ AstNode* list = ast_new_list(4);
+ expect(p, TokenKind_brace_l);
+ enter_scope(p);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* stmt = parse_stmt(p);
+ ast_append(list, stmt);
+ }
+ leave_scope(p);
+ expect(p, TokenKind_brace_r);
+ return list;
+}
+
+AstNode* parse_empty_stmt(Parser* p) {
+ consume_token_if(p, TokenKind_semicolon);
+ return ast_new(AstNodeKind_nop);
+}
+
+AstNode* parse_stmt(Parser* p) {
+ Token* t = peek_token(p);
+ if (t->kind == TokenKind_keyword_return) {
+ return parse_return_stmt(p);
+ } else if (t->kind == TokenKind_keyword_if) {
+ return parse_if_stmt(p);
+ } else if (t->kind == TokenKind_keyword_for) {
+ return parse_for_stmt(p);
+ } else if (t->kind == TokenKind_keyword_while) {
+ return parse_while_stmt(p);
+ } else if (t->kind == TokenKind_keyword_do) {
+ return parse_do_while_stmt(p);
+ } else if (t->kind == TokenKind_keyword_break) {
+ return parse_break_stmt(p);
+ } else if (t->kind == TokenKind_keyword_continue) {
+ return parse_continue_stmt(p);
+ } else if (t->kind == TokenKind_brace_l) {
+ return parse_block_stmt(p);
+ } else if (t->kind == TokenKind_semicolon) {
+ return parse_empty_stmt(p);
+ } else if (is_type_token(p, t)) {
+ return parse_var_decl(p);
+ } else {
+ return parse_expr_stmt(p);
+ }
+}
+
+void register_params(Parser* p, AstNode* params) {
+ for (int i = 0; i < params->node_len; ++i) {
+ AstNode* param = params->node_items + i;
+ add_lvar(p, param->name, param->ty, TRUE);
+ }
+}
+
+void register_func(Parser* p, const char* name, Type* ty) {
+ p->funcs[p->n_funcs].name = name;
+ p->funcs[p->n_funcs].ty = ty;
+ ++p->n_funcs;
+}
+
+AstNode* parse_param(Parser* p) {
+ Type* ty = parse_type(p);
+ const char* name = NULL;
+ TokenKind tk = peek_token(p)->kind;
+ if (tk != TokenKind_comma && tk != TokenKind_paren_r) {
+ name = parse_ident(p);
+ }
+ AstNode* param = ast_new(AstNodeKind_param);
+ param->ty = ty;
+ if (name) {
+ param->name = name;
+ }
+ return param;
+}
+
+AstNode* parse_param_list(Parser* p) {
+ BOOL has_void = FALSE;
+ AstNode* list = ast_new_list(6);
+ while (peek_token(p)->kind != TokenKind_paren_r) {
+ if (consume_token_if(p, TokenKind_ellipsis)) {
+ break;
+ }
+ AstNode* param = parse_param(p);
+ has_void = has_void || param->ty->kind == TypeKind_void;
+ ast_append(list, param);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ if (list->node_len > 6) {
+ fatal_error("too many parameters");
+ }
+ if (has_void) {
+ if (list->node_len != 1) {
+ fatal_error("invalid use of void param");
+ }
+ list->node_len = 0;
+ }
+ return list;
+}
+
+AstNode* parse_global_var_decl(Parser* p) {
+ Type* base_ty = parse_type(p);
+ if (type_is_unsized(base_ty)) {
+ fatal_error("parse_global_var_decl: invalid type for variable");
+ }
+
+ AstNode* decls = parse_init_declarator_list(p, base_ty);
+ expect(p, TokenKind_semicolon);
+
+ for (int i = 0; i < decls->node_len; ++i) {
+ AstNode* decl = &decls->node_items[i];
+
+ if (find_gvar(p, decl->name) != -1) {
+ fatal_error("parse_global_var_decl: %s redeclared", decl->name);
+ }
+ p->gvars[p->n_gvars].name = decl->name;
+ p->gvars[p->n_gvars].ty = decl->ty;
+ ++p->n_gvars;
+
+ decl->kind = AstNodeKind_gvar_decl;
+ decl->node_expr = decl->node_init;
+ }
+ return decls;
+}
+
+AstNode* parse_func_decl_or_def(Parser* p) {
+ // TODO: remove it.
+ int start_pos = p->pos;
+
+ Type* ty = parse_type(p);
+ const char* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_paren_l) {
+ // TODO: avoid backtracking
+ p->pos = start_pos;
+ return parse_global_var_decl(p);
+ }
+
+ expect(p, TokenKind_paren_l);
+ register_func(p, name, ty);
+ AstNode* params = parse_param_list(p);
+ expect(p, TokenKind_paren_r);
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ return ast_new(AstNodeKind_func_decl);
+ }
+ enter_func(p);
+ register_params(p, params);
+ AstNode* body = parse_block_stmt(p);
+ leave_func(p);
+ AstNode* func = ast_new(AstNodeKind_func_def);
+ func->ty = ty;
+ func->name = name;
+ func->node_params = params;
+ func->node_body = body;
+ if (p->n_lvars == 0) {
+ func->node_stack_size = 0;
+ } else {
+ func->node_stack_size = p->lvars[p->n_lvars - 1].stack_offset + type_sizeof(p->lvars[p->n_lvars - 1].ty);
+ }
+ return func;
+}
+
+AstNode* parse_struct_member(Parser* p) {
+ Type* ty = parse_type(p);
+ const char* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* member = ast_new(AstNodeKind_struct_member);
+ member->name = name;
+ member->ty = ty;
+ return member;
+}
+
+AstNode* parse_struct_members(Parser* p) {
+ AstNode* list = ast_new_list(4);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_struct_member(p);
+ ast_append(list, member);
+ }
+ return list;
+}
+
+AstNode* parse_struct_decl_or_def(Parser* p) {
+ expect(p, TokenKind_keyword_struct);
+ const char* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) {
+ p->pos = p->pos - 2;
+ return parse_func_decl_or_def(p);
+ }
+
+ int struct_idx = find_struct(p, name);
+ if (struct_idx == -1) {
+ struct_idx = p->n_structs;
+ p->structs[struct_idx].kind = AstNodeKind_struct_def;
+ p->structs[struct_idx].name = name;
+ ++p->n_structs;
+ }
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ return ast_new(AstNodeKind_struct_decl);
+ }
+ if (p->structs[struct_idx].node_members) {
+ fatal_error("parse_struct_decl_or_def: struct %s redefined", name);
+ }
+ expect(p, TokenKind_brace_l);
+ AstNode* members = parse_struct_members(p);
+ expect(p, TokenKind_brace_r);
+ expect(p, TokenKind_semicolon);
+ p->structs[struct_idx].node_members = members;
+ return p->structs + struct_idx;
+}
+
+AstNode* parse_union_member(Parser* p) {
+ Type* ty = parse_type(p);
+ const char* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* member = ast_new(AstNodeKind_union_member);
+ member->name = name;
+ member->ty = ty;
+ return member;
+}
+
+AstNode* parse_union_members(Parser* p) {
+ AstNode* list = ast_new_list(4);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_union_member(p);
+ ast_append(list, member);
+ }
+ return list;
+}
+
+AstNode* parse_union_decl_or_def(Parser* p) {
+ expect(p, TokenKind_keyword_union);
+ const char* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) {
+ p->pos = p->pos - 2;
+ return parse_func_decl_or_def(p);
+ }
+
+ int union_idx = find_union(p, name);
+ if (union_idx == -1) {
+ union_idx = p->n_unions;
+ p->unions[union_idx].kind = AstNodeKind_union_def;
+ p->unions[union_idx].name = name;
+ ++p->n_unions;
+ }
+ if (consume_token_if(p, TokenKind_semicolon)) {
+ return ast_new(AstNodeKind_union_decl);
+ }
+ if (p->unions[union_idx].node_members) {
+ fatal_error("parse_union_decl_or_def: union %s redefined", name);
+ }
+ expect(p, TokenKind_brace_l);
+ AstNode* members = parse_union_members(p);
+ expect(p, TokenKind_brace_r);
+ expect(p, TokenKind_semicolon);
+ p->unions[union_idx].node_members = members;
+ return p->unions + union_idx;
+}
+
+AstNode* parse_enum_member(Parser* p) {
+ const char* name = parse_ident(p);
+ AstNode* member = ast_new(AstNodeKind_enum_member);
+ member->name = name;
+ return member;
+}
+
+AstNode* parse_enum_members(Parser* p) {
+ int next_value = 0;
+ AstNode* list = ast_new_list(16);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_enum_member(p);
+ member->node_int_value = next_value;
+ ++next_value;
+ ast_append(list, member);
+ if (!consume_token_if(p, TokenKind_comma)) {
+ break;
+ }
+ }
+ return list;
+}
+
+AstNode* parse_enum_def(Parser* p) {
+ expect(p, TokenKind_keyword_enum);
+ const char* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_brace_l) {
+ p->pos = p->pos - 2;
+ return parse_func_decl_or_def(p);
+ }
+
+ int enum_idx = find_enum(p, name);
+ if (enum_idx == -1) {
+ enum_idx = p->n_enums;
+ p->enums[enum_idx].kind = AstNodeKind_enum_def;
+ p->enums[enum_idx].name = name;
+ ++p->n_enums;
+ } else {
+ fatal_error("parse_enum_def: enum %s redefined", name);
+ }
+ expect(p, TokenKind_brace_l);
+ AstNode* members = parse_enum_members(p);
+ expect(p, TokenKind_brace_r);
+ expect(p, TokenKind_semicolon);
+ p->enums[enum_idx].node_members = members;
+ return p->enums + enum_idx;
+}
+
+AstNode* parse_typedef_decl(Parser* p) {
+ expect(p, TokenKind_keyword_typedef);
+ Type* ty = parse_type(p);
+ const char* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* decl = ast_new(AstNodeKind_typedef_decl);
+ decl->name = name;
+ decl->ty = ty;
+ p->typedefs[p->n_typedefs].name = name;
+ p->typedefs[p->n_typedefs].ty = ty;
+ ++p->n_typedefs;
+ return decl;
+}
+
+AstNode* parse_extern_var_decl(Parser* p) {
+ expect(p, TokenKind_keyword_extern);
+ Type* ty = parse_type(p);
+ if (type_is_unsized(ty)) {
+ fatal_error("parse_extern_var_decl: invalid type for variable");
+ }
+ const char* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+
+ if (find_lvar(p, name) != -1 || find_gvar(p, name) != -1) {
+ fatal_error("parse_extern_var_decl: %s redeclared", name);
+ }
+ p->gvars[p->n_gvars].name = name;
+ p->gvars[p->n_gvars].ty = ty;
+ ++p->n_gvars;
+
+ return ast_new(AstNodeKind_gvar_decl);
+}
+
+AstNode* parse_toplevel(Parser* p) {
+ TokenKind tk = peek_token(p)->kind;
+ if (tk == TokenKind_keyword_struct) {
+ return parse_struct_decl_or_def(p);
+ } else if (tk == TokenKind_keyword_union) {
+ return parse_union_decl_or_def(p);
+ } else if (tk == TokenKind_keyword_enum) {
+ return parse_enum_def(p);
+ } else if (tk == TokenKind_keyword_typedef) {
+ return parse_typedef_decl(p);
+ } else if (tk == TokenKind_keyword_extern) {
+ return parse_extern_var_decl(p);
+ } else {
+ return parse_func_decl_or_def(p);
+ }
+}
+
+Program* parse(TokenArray* tokens) {
+ Parser* p = parser_new(tokens);
+ AstNode* funcs = ast_new_list(32);
+ AstNode* vars = ast_new_list(16);
+ while (eof(p)) {
+ AstNode* n = parse_toplevel(p);
+ if (n->kind == AstNodeKind_func_def) {
+ ast_append(funcs, n);
+ } else if (n->kind == AstNodeKind_gvar_decl && n->ty) {
+ ast_append(vars, n);
+ } else if (n->kind == AstNodeKind_list) {
+ for (int i = 0; i < n->node_len; ++i) {
+ ast_append(vars, &n->node_items[i]);
+ }
+ }
+ }
+ Program* prog = calloc(1, sizeof(Program));
+ prog->funcs = funcs;
+ prog->vars = vars;
+ prog->str_literals = p->str_literals;
+ return prog;
+}
+
+int eval(AstNode* e) {
+ if (e->kind == AstNodeKind_int_expr) {
+ return e->node_int_value;
+ } else if (e->kind == AstNodeKind_unary_expr) {
+ int v = eval(e->node_operand);
+ if (e->node_op == TokenKind_not) {
+ return !v;
+ } else {
+ unimplemented();
+ }
+ } else if (e->kind == AstNodeKind_binary_expr || e->kind == AstNodeKind_logical_expr) {
+ int v1 = eval(e->node_lhs);
+ int v2 = eval(e->node_rhs);
+ if (e->node_op == TokenKind_andand) {
+ return v1 && v2;
+ } else if (e->node_op == TokenKind_oror) {
+ return v1 || v2;
+ } else {
+ unimplemented();
+ }
+ } else {
+ unimplemented();
+ }
+}
+
+BOOL pp_eval_constant_expression(TokenArray* pp_tokens) {
+ TokenArray* tokens = tokenize(pp_tokens);
+ Parser* p = parser_new(tokens);
+ AstNode* e = parse_constant_expression(p);
+ return eval(e) != 0;
+}
diff --git a/src/preprocess.c b/src/preprocess.c
new file mode 100644
index 0000000..b1810cd
--- /dev/null
+++ b/src/preprocess.c
@@ -0,0 +1,1557 @@
+enum TokenKind {
+ TokenKind_eof,
+
+ // Only preprocessing phase.
+ TokenKind_hash,
+ TokenKind_hashhash,
+ TokenKind_whitespace,
+ TokenKind_newline,
+ TokenKind_other,
+ TokenKind_character_constant,
+ TokenKind_header_name,
+ TokenKind_pp_directive_define,
+ TokenKind_pp_directive_elif,
+ TokenKind_pp_directive_elifdef,
+ TokenKind_pp_directive_elifndef,
+ TokenKind_pp_directive_else,
+ TokenKind_pp_directive_embed,
+ TokenKind_pp_directive_endif,
+ TokenKind_pp_directive_error,
+ TokenKind_pp_directive_if,
+ TokenKind_pp_directive_ifdef,
+ TokenKind_pp_directive_ifndef,
+ TokenKind_pp_directive_include,
+ TokenKind_pp_directive_line,
+ TokenKind_pp_directive_pragma,
+ TokenKind_pp_directive_undef,
+ TokenKind_pp_directive_warning,
+ TokenKind_pp_operator_defined,
+ TokenKind_pp_operator___has_c_attribute,
+ TokenKind_pp_operator___has_embed,
+ TokenKind_pp_operator___has_include,
+
+ // C23: 6.4.1
+ TokenKind_keyword_alignas,
+ TokenKind_keyword_alignof,
+ TokenKind_keyword_auto,
+ TokenKind_keyword_bool,
+ TokenKind_keyword_break,
+ TokenKind_keyword_case,
+ TokenKind_keyword_char,
+ TokenKind_keyword_const,
+ TokenKind_keyword_constexpr,
+ TokenKind_keyword_continue,
+ TokenKind_keyword_default,
+ TokenKind_keyword_do,
+ TokenKind_keyword_double,
+ TokenKind_keyword_else,
+ TokenKind_keyword_enum,
+ TokenKind_keyword_extern,
+ TokenKind_keyword_false,
+ TokenKind_keyword_float,
+ TokenKind_keyword_for,
+ TokenKind_keyword_goto,
+ TokenKind_keyword_if,
+ TokenKind_keyword_inline,
+ TokenKind_keyword_int,
+ TokenKind_keyword_long,
+ TokenKind_keyword_nullptr,
+ TokenKind_keyword_register,
+ TokenKind_keyword_restrict,
+ TokenKind_keyword_return,
+ TokenKind_keyword_short,
+ TokenKind_keyword_signed,
+ TokenKind_keyword_sizeof,
+ TokenKind_keyword_static,
+ TokenKind_keyword_static_assert,
+ TokenKind_keyword_struct,
+ TokenKind_keyword_switch,
+ TokenKind_keyword_thread_local,
+ TokenKind_keyword_true,
+ TokenKind_keyword_typedef,
+ TokenKind_keyword_typeof,
+ TokenKind_keyword_typeof_unqual,
+ TokenKind_keyword_union,
+ TokenKind_keyword_unsigned,
+ TokenKind_keyword_void,
+ TokenKind_keyword_volatile,
+ TokenKind_keyword_while,
+ TokenKind_keyword__Atomic,
+ TokenKind_keyword__BitInt,
+ TokenKind_keyword__Complex,
+ TokenKind_keyword__Decimal128,
+ TokenKind_keyword__Decimal32,
+ TokenKind_keyword__Decimal64,
+ TokenKind_keyword__Generic,
+ TokenKind_keyword__Imaginary,
+ TokenKind_keyword__Noreturn,
+
+ TokenKind_and,
+ TokenKind_andand,
+ TokenKind_arrow,
+ TokenKind_assign,
+ TokenKind_assign_add,
+ TokenKind_assign_and,
+ TokenKind_assign_div,
+ TokenKind_assign_lshift,
+ TokenKind_assign_mod,
+ TokenKind_assign_mul,
+ TokenKind_assign_or,
+ TokenKind_assign_rshift,
+ TokenKind_assign_sub,
+ TokenKind_assign_xor,
+ TokenKind_brace_l,
+ TokenKind_brace_r,
+ TokenKind_bracket_l,
+ TokenKind_bracket_r,
+ TokenKind_colon,
+ TokenKind_comma,
+ TokenKind_dot,
+ TokenKind_ellipsis,
+ TokenKind_eq,
+ TokenKind_ge,
+ TokenKind_gt,
+ TokenKind_ident,
+ TokenKind_le,
+ TokenKind_literal_int,
+ TokenKind_literal_str,
+ TokenKind_lshift,
+ TokenKind_lt,
+ TokenKind_minus,
+ TokenKind_minusminus,
+ TokenKind_ne,
+ TokenKind_not,
+ TokenKind_or,
+ TokenKind_oror,
+ TokenKind_paren_l,
+ TokenKind_paren_r,
+ TokenKind_percent,
+ TokenKind_plus,
+ TokenKind_plusplus,
+ TokenKind_question,
+ TokenKind_rshift,
+ TokenKind_semicolon,
+ TokenKind_slash,
+ TokenKind_star,
+ TokenKind_tilde,
+ TokenKind_xor,
+};
+typedef enum TokenKind TokenKind;
+
+const char* token_kind_stringify(TokenKind k) {
+ if (k == TokenKind_eof)
+ return "<eof>";
+ else if (k == TokenKind_hash)
+ return "#";
+ else if (k == TokenKind_hashhash)
+ return "##";
+ else if (k == TokenKind_whitespace)
+ return "<whitespace>";
+ else if (k == TokenKind_newline)
+ return "<new-line>";
+ else if (k == TokenKind_other)
+ return "<other>";
+ else if (k == TokenKind_character_constant)
+ return "<character-constant>";
+ else if (k == TokenKind_header_name)
+ return "<header-name>";
+ else if (k == TokenKind_pp_directive_define)
+ return "#define";
+ else if (k == TokenKind_pp_directive_elif)
+ return "#elif";
+ else if (k == TokenKind_pp_directive_elifdef)
+ return "#elifdef";
+ else if (k == TokenKind_pp_directive_elifndef)
+ return "#elifndef";
+ else if (k == TokenKind_pp_directive_else)
+ return "#else";
+ else if (k == TokenKind_pp_directive_embed)
+ return "#embed";
+ else if (k == TokenKind_pp_directive_endif)
+ return "#endif";
+ else if (k == TokenKind_pp_directive_error)
+ return "#error";
+ else if (k == TokenKind_pp_directive_if)
+ return "#if";
+ else if (k == TokenKind_pp_directive_ifdef)
+ return "#ifdef";
+ else if (k == TokenKind_pp_directive_ifndef)
+ return "#ifndef";
+ else if (k == TokenKind_pp_directive_include)
+ return "#include";
+ else if (k == TokenKind_pp_directive_line)
+ return "#line";
+ else if (k == TokenKind_pp_directive_pragma)
+ return "#pragma";
+ else if (k == TokenKind_pp_directive_undef)
+ return "#undef";
+ else if (k == TokenKind_pp_directive_warning)
+ return "#warning";
+ else if (k == TokenKind_pp_operator_defined)
+ return "defined";
+ else if (k == TokenKind_pp_operator___has_c_attribute)
+ return "__has_c_attribute";
+ else if (k == TokenKind_pp_operator___has_embed)
+ return "__has_embed";
+ else if (k == TokenKind_pp_operator___has_include)
+ return "__has_include";
+ else if (k == TokenKind_keyword_alignas)
+ return "alignas";
+ else if (k == TokenKind_keyword_alignof)
+ return "alignof";
+ else if (k == TokenKind_keyword_auto)
+ return "auto";
+ else if (k == TokenKind_keyword_bool)
+ return "bool";
+ else if (k == TokenKind_keyword_break)
+ return "break";
+ else if (k == TokenKind_keyword_case)
+ return "case";
+ else if (k == TokenKind_keyword_char)
+ return "char";
+ else if (k == TokenKind_keyword_const)
+ return "const";
+ else if (k == TokenKind_keyword_constexpr)
+ return "constexpr";
+ else if (k == TokenKind_keyword_continue)
+ return "continue";
+ else if (k == TokenKind_keyword_default)
+ return "default";
+ else if (k == TokenKind_keyword_do)
+ return "do";
+ else if (k == TokenKind_keyword_double)
+ return "double";
+ else if (k == TokenKind_keyword_else)
+ return "else";
+ else if (k == TokenKind_keyword_enum)
+ return "enum";
+ else if (k == TokenKind_keyword_extern)
+ return "extern";
+ else if (k == TokenKind_keyword_false)
+ return "false";
+ else if (k == TokenKind_keyword_float)
+ return "float";
+ else if (k == TokenKind_keyword_for)
+ return "for";
+ else if (k == TokenKind_keyword_goto)
+ return "goto";
+ else if (k == TokenKind_keyword_if)
+ return "if";
+ else if (k == TokenKind_keyword_inline)
+ return "inline";
+ else if (k == TokenKind_keyword_int)
+ return "int";
+ else if (k == TokenKind_keyword_long)
+ return "long";
+ else if (k == TokenKind_keyword_nullptr)
+ return "nullptr";
+ else if (k == TokenKind_keyword_register)
+ return "register";
+ else if (k == TokenKind_keyword_restrict)
+ return "restrict";
+ else if (k == TokenKind_keyword_return)
+ return "return";
+ else if (k == TokenKind_keyword_short)
+ return "short";
+ else if (k == TokenKind_keyword_signed)
+ return "signed";
+ else if (k == TokenKind_keyword_sizeof)
+ return "sizeof";
+ else if (k == TokenKind_keyword_static)
+ return "static";
+ else if (k == TokenKind_keyword_static_assert)
+ return "static_assert";
+ else if (k == TokenKind_keyword_struct)
+ return "struct";
+ else if (k == TokenKind_keyword_switch)
+ return "switch";
+ else if (k == TokenKind_keyword_thread_local)
+ return "thread_local";
+ else if (k == TokenKind_keyword_true)
+ return "true";
+ else if (k == TokenKind_keyword_typedef)
+ return "typedef";
+ else if (k == TokenKind_keyword_typeof)
+ return "typeof";
+ else if (k == TokenKind_keyword_typeof_unqual)
+ return "typeof_unqual";
+ else if (k == TokenKind_keyword_union)
+ return "union";
+ else if (k == TokenKind_keyword_unsigned)
+ return "unsigned";
+ else if (k == TokenKind_keyword_void)
+ return "void";
+ else if (k == TokenKind_keyword_volatile)
+ return "volatile";
+ else if (k == TokenKind_keyword_while)
+ return "while";
+ else if (k == TokenKind_keyword__Atomic)
+ return "_Atomic";
+ else if (k == TokenKind_keyword__BitInt)
+ return "_BitInt";
+ else if (k == TokenKind_keyword__Complex)
+ return "_Complex";
+ else if (k == TokenKind_keyword__Decimal128)
+ return "_Decimal128";
+ else if (k == TokenKind_keyword__Decimal32)
+ return "_Decimal32";
+ else if (k == TokenKind_keyword__Decimal64)
+ return "_Decimal64";
+ else if (k == TokenKind_keyword__Generic)
+ return "_Generic";
+ else if (k == TokenKind_keyword__Imaginary)
+ return "_Imaginary";
+ else if (k == TokenKind_keyword__Noreturn)
+ return "_Noreturn";
+ else if (k == TokenKind_and)
+ return "&";
+ else if (k == TokenKind_andand)
+ return "&&";
+ else if (k == TokenKind_arrow)
+ return "->";
+ else if (k == TokenKind_assign)
+ return "=";
+ else if (k == TokenKind_assign_add)
+ return "+=";
+ else if (k == TokenKind_assign_and)
+ return "&=";
+ else if (k == TokenKind_assign_div)
+ return "/=";
+ else if (k == TokenKind_assign_lshift)
+ return "<<=";
+ else if (k == TokenKind_assign_mod)
+ return "%=";
+ else if (k == TokenKind_assign_mul)
+ return "*=";
+ else if (k == TokenKind_assign_or)
+ return "|=";
+ else if (k == TokenKind_assign_rshift)
+ return ">>=";
+ else if (k == TokenKind_assign_sub)
+ return "-=";
+ else if (k == TokenKind_assign_xor)
+ return "^=";
+ else if (k == TokenKind_brace_l)
+ return "{";
+ else if (k == TokenKind_brace_r)
+ return "}";
+ else if (k == TokenKind_bracket_l)
+ return "[";
+ else if (k == TokenKind_bracket_r)
+ return "]";
+ else if (k == TokenKind_colon)
+ return ":";
+ else if (k == TokenKind_comma)
+ return ",";
+ else if (k == TokenKind_dot)
+ return ".";
+ else if (k == TokenKind_ellipsis)
+ return "...";
+ else if (k == TokenKind_eq)
+ return "==";
+ else if (k == TokenKind_ge)
+ return ">=";
+ else if (k == TokenKind_gt)
+ return ">";
+ else if (k == TokenKind_ident)
+ return "<identifier>";
+ else if (k == TokenKind_le)
+ return "le";
+ else if (k == TokenKind_literal_int)
+ return "<integer>";
+ else if (k == TokenKind_literal_str)
+ return "<string>";
+ else if (k == TokenKind_lshift)
+ return "<<";
+ else if (k == TokenKind_lt)
+ return "lt";
+ else if (k == TokenKind_minus)
+ return "-";
+ else if (k == TokenKind_minusminus)
+ return "--";
+ else if (k == TokenKind_ne)
+ return "!=";
+ else if (k == TokenKind_not)
+ return "!";
+ else if (k == TokenKind_or)
+ return "|";
+ else if (k == TokenKind_oror)
+ return "||";
+ else if (k == TokenKind_paren_l)
+ return "(";
+ else if (k == TokenKind_paren_r)
+ return ")";
+ else if (k == TokenKind_percent)
+ return "%";
+ else if (k == TokenKind_plus)
+ return "+";
+ else if (k == TokenKind_plusplus)
+ return "++";
+ else if (k == TokenKind_question)
+ return "?";
+ else if (k == TokenKind_rshift)
+ return ">>";
+ else if (k == TokenKind_semicolon)
+ return ";";
+ else if (k == TokenKind_slash)
+ return "/";
+ else if (k == TokenKind_star)
+ return "*";
+ else if (k == TokenKind_tilde)
+ return "~";
+ else if (k == TokenKind_xor)
+ return "^";
+ else
+ unreachable();
+}
+
+// TokenValue is externally tagged by Token's kind.
+union TokenValue {
+ const char* string;
+ int integer;
+};
+typedef union TokenValue TokenValue;
+
+struct Token {
+ TokenKind kind;
+ TokenValue value;
+ SourceLocation loc;
+};
+typedef struct Token Token;
+
+const char* token_stringify(Token* t) {
+ TokenKind k = t->kind;
+ if (k == TokenKind_literal_int) {
+ const char* kind_str = token_kind_stringify(k);
+ char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char));
+ sprintf(buf, "%d (%s)", t->value.integer, kind_str);
+ return buf;
+ } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident ||
+ k == TokenKind_literal_int || k == TokenKind_literal_str) {
+ const char* kind_str = token_kind_stringify(k);
+ char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char));
+ sprintf(buf, "%s (%s)", t->value.string, kind_str);
+ return buf;
+ } else {
+ return token_kind_stringify(k);
+ }
+}
+
+struct TokenArray {
+ size_t len;
+ size_t capacity;
+ Token* data;
+};
+typedef struct TokenArray TokenArray;
+
+void tokens_init(TokenArray* tokens, size_t capacity) {
+ tokens->len = 0;
+ tokens->capacity = capacity;
+ tokens->data = calloc(tokens->capacity, sizeof(Token));
+}
+
+void tokens_reserve(TokenArray* tokens, size_t size) {
+ if (size <= tokens->capacity)
+ return;
+ tokens->capacity *= 2;
+ tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token));
+ memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token));
+}
+
+Token* tokens_push_new(TokenArray* tokens) {
+ tokens_reserve(tokens, tokens->len + 1);
+ return &tokens->data[tokens->len++];
+}
+
+Token* tokens_pop(TokenArray* tokens) {
+ if (tokens->len != 0)
+ tokens->len--;
+}
+
+enum MacroKind {
+ MacroKind_undef,
+ MacroKind_obj,
+ MacroKind_func,
+ MacroKind_builtin_file,
+ MacroKind_builtin_line,
+};
+typedef enum MacroKind MacroKind;
+
+const char* macro_kind_stringify(MacroKind kind) {
+ if (kind == MacroKind_undef)
+ return "undef";
+ else if (kind == MacroKind_obj)
+ return "object-like";
+ else if (kind == MacroKind_func)
+ return "function-like";
+ else if (kind == MacroKind_builtin_file)
+ return "__FILE__";
+ else if (kind == MacroKind_builtin_line)
+ return "__LINE__";
+ else
+ unreachable();
+}
+
+struct Macro {
+ MacroKind kind;
+ const char* name;
+ TokenArray parameters;
+ TokenArray replacements;
+};
+typedef struct Macro Macro;
+
+int macro_find_param(Macro* macro, Token* tok) {
+ if (tok->kind != TokenKind_ident)
+ return -1;
+
+ for (int i = 0; i < macro->parameters.len; ++i) {
+ if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+struct MacroArray {
+ size_t len;
+ size_t capacity;
+ Macro* data;
+};
+typedef struct MacroArray MacroArray;
+
+MacroArray* macros_new() {
+ MacroArray* macros = calloc(1, sizeof(MacroArray));
+ macros->len = 0;
+ macros->capacity = 8;
+ macros->data = calloc(macros->capacity, sizeof(Macro));
+ return macros;
+}
+
+void macros_reserve(MacroArray* macros, size_t size) {
+ if (size <= macros->capacity)
+ return;
+ macros->capacity *= 2;
+ macros->data = realloc(macros->data, macros->capacity * sizeof(Macro));
+ memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro));
+}
+
+Macro* macros_push_new(MacroArray* macros) {
+ macros_reserve(macros, macros->len + 1);
+ return &macros->data[macros->len++];
+}
+
+void macros_dump(MacroArray* macros) {
+ fprintf(stderr, "MacroArray {\n");
+ fprintf(stderr, " len = %zu\n", macros->len);
+ fprintf(stderr, " data = [\n");
+ for (int i = 0; i < macros->len; ++i) {
+ Macro* m = &macros->data[i];
+ fprintf(stderr, " Macro {\n");
+ fprintf(stderr, " kind = %s\n", macro_kind_stringify(m->kind));
+ fprintf(stderr, " name = %s\n", m->name);
+ fprintf(stderr, " replacements = TODO\n");
+ fprintf(stderr, " }\n");
+ }
+ fprintf(stderr, " ]\n");
+ fprintf(stderr, "}\n");
+}
+
+void add_predefined_macros(MacroArray* macros) {
+ Macro* m;
+
+ m = macros_push_new(macros);
+ m->kind = MacroKind_obj;
+ m->name = "__ducc__";
+ tokens_init(&m->replacements, 1);
+ Token* tok = tokens_push_new(&m->replacements);
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = 1;
+
+ m = macros_push_new(macros);
+ m->kind = MacroKind_builtin_file;
+ m->name = "__FILE__";
+
+ m = macros_push_new(macros);
+ m->kind = MacroKind_builtin_line;
+ m->name = "__LINE__";
+}
+
+struct MacroArg {
+ TokenArray tokens;
+};
+typedef struct MacroArg MacroArg;
+
+struct MacroArgArray {
+ size_t len;
+ size_t capacity;
+ MacroArg* data;
+};
+typedef struct MacroArgArray MacroArgArray;
+
+MacroArgArray* macroargs_new() {
+ MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray));
+ macroargs->len = 0;
+ macroargs->capacity = 2;
+ macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg));
+ return macroargs;
+}
+
+void macroargs_reserve(MacroArgArray* macroargs, size_t size) {
+ if (size <= macroargs->capacity)
+ return;
+ macroargs->capacity *= 2;
+ macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg));
+ memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg));
+}
+
+MacroArg* macroargs_push_new(MacroArgArray* macroargs) {
+ macroargs_reserve(macroargs, macroargs->len + 1);
+ return &macroargs->data[macroargs->len++];
+}
+
+struct PpLexer {
+ InFile* src;
+ BOOL at_bol;
+ BOOL expect_header_name;
+ TokenArray* pp_tokens;
+};
+typedef struct PpLexer PpLexer;
+
+PpLexer* pplexer_new(InFile* src) {
+ PpLexer* ppl = calloc(1, sizeof(PpLexer));
+
+ ppl->src = src;
+ ppl->at_bol = TRUE;
+ ppl->expect_header_name = FALSE;
+ ppl->pp_tokens = calloc(1, sizeof(TokenArray));
+ tokens_init(ppl->pp_tokens, 1024 * 16);
+
+ return ppl;
+}
+
+TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) {
+ // Skip whitespaces after '#'.
+ char c;
+ while (isspace((c = infile_peek_char(ppl->src)))) {
+ if (c == '\n')
+ break;
+ infile_next_char(ppl->src);
+ }
+
+ SourceLocation pp_directive_name_start_loc = ppl->src->loc;
+
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(ppl->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
+ }
+ const char* pp_directive_name = builder.buf;
+
+ if (builder.len == 0) {
+ return TokenKind_hash;
+ } else if (strcmp(pp_directive_name, "define") == 0) {
+ return TokenKind_pp_directive_define;
+ } else if (strcmp(pp_directive_name, "elif") == 0) {
+ return TokenKind_pp_directive_elif;
+ } else if (strcmp(pp_directive_name, "elifdef") == 0) {
+ return TokenKind_pp_directive_elifdef;
+ } else if (strcmp(pp_directive_name, "elifndef") == 0) {
+ return TokenKind_pp_directive_elifndef;
+ } else if (strcmp(pp_directive_name, "else") == 0) {
+ return TokenKind_pp_directive_else;
+ } else if (strcmp(pp_directive_name, "embed") == 0) {
+ return TokenKind_pp_directive_embed;
+ } else if (strcmp(pp_directive_name, "endif") == 0) {
+ return TokenKind_pp_directive_endif;
+ } else if (strcmp(pp_directive_name, "error") == 0) {
+ return TokenKind_pp_directive_error;
+ } else if (strcmp(pp_directive_name, "if") == 0) {
+ return TokenKind_pp_directive_if;
+ } else if (strcmp(pp_directive_name, "ifdef") == 0) {
+ return TokenKind_pp_directive_ifdef;
+ } else if (strcmp(pp_directive_name, "ifndef") == 0) {
+ return TokenKind_pp_directive_ifndef;
+ } else if (strcmp(pp_directive_name, "include") == 0) {
+ ppl->expect_header_name = TRUE;
+ return TokenKind_pp_directive_include;
+ } else if (strcmp(pp_directive_name, "line") == 0) {
+ return TokenKind_pp_directive_line;
+ } else if (strcmp(pp_directive_name, "pragma") == 0) {
+ return TokenKind_pp_directive_pragma;
+ } else if (strcmp(pp_directive_name, "undef") == 0) {
+ return TokenKind_pp_directive_undef;
+ } else if (strcmp(pp_directive_name, "warning") == 0) {
+ return TokenKind_pp_directive_warning;
+ } else {
+ fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename,
+ pp_directive_name_start_loc.line, pp_directive_name);
+ }
+}
+
+void pplexer_tokenize_all(PpLexer* ppl) {
+ while (!infile_eof(ppl->src)) {
+ Token* tok = tokens_push_new(ppl->pp_tokens);
+ tok->loc = ppl->src->loc;
+ char c = infile_peek_char(ppl->src);
+
+ if (ppl->expect_header_name && c == '"') {
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '"');
+ while (1) {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '"')
+ break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ }
+ infile_next_char(ppl->src);
+ }
+ strbuilder_append_char(&builder, '"');
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_header_name;
+ tok->value.string = builder.buf;
+ ppl->expect_header_name = FALSE;
+ } else if (ppl->expect_header_name && c == '<') {
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '<');
+ while (1) {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '>')
+ break;
+ strbuilder_append_char(&builder, ch);
+ infile_next_char(ppl->src);
+ }
+ strbuilder_append_char(&builder, '>');
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_header_name;
+ tok->value.string = builder.buf;
+ ppl->expect_header_name = FALSE;
+ } else if (c == '(') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_paren_l;
+ } else if (c == ')') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_paren_r;
+ } else if (c == '{') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_brace_l;
+ } else if (c == '}') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_brace_r;
+ } else if (c == '[') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_bracket_l;
+ } else if (c == ']') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_bracket_r;
+ } else if (c == ',') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_comma;
+ } else if (c == ':') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_colon;
+ } else if (c == ';') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_semicolon;
+ } else if (c == '^') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_xor;
+ } else {
+ tok->kind = TokenKind_xor;
+ }
+ } else if (c == '?') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_question;
+ } else if (c == '~') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_tilde;
+ } else if (c == '+') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_add;
+ } else if (infile_consume_if(ppl->src, '+')) {
+ tok->kind = TokenKind_plusplus;
+ } else {
+ tok->kind = TokenKind_plus;
+ }
+ } else if (c == '|') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_or;
+ } else if (infile_consume_if(ppl->src, '|')) {
+ tok->kind = TokenKind_oror;
+ } else {
+ tok->kind = TokenKind_or;
+ }
+ } else if (c == '&') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_and;
+ } else if (infile_consume_if(ppl->src, '&')) {
+ tok->kind = TokenKind_andand;
+ } else {
+ tok->kind = TokenKind_and;
+ }
+ } else if (c == '-') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '>')) {
+ tok->kind = TokenKind_arrow;
+ } else if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_sub;
+ } else if (infile_consume_if(ppl->src, '-')) {
+ tok->kind = TokenKind_minusminus;
+ } else {
+ tok->kind = TokenKind_minus;
+ }
+ } else if (c == '*') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_mul;
+ } else {
+ tok->kind = TokenKind_star;
+ }
+ } else if (c == '/') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_div;
+ } else if (infile_consume_if(ppl->src, '/')) {
+ while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') {
+ infile_next_char(ppl->src);
+ }
+ tok->kind = TokenKind_whitespace;
+ } else if (infile_consume_if(ppl->src, '*')) {
+ while (infile_peek_char(ppl->src)) {
+ if (infile_consume_if(ppl->src, '*')) {
+ if (infile_consume_if(ppl->src, '/')) {
+ break;
+ }
+ continue;
+ }
+ infile_next_char(ppl->src);
+ }
+ tok->kind = TokenKind_whitespace;
+ } else {
+ tok->kind = TokenKind_slash;
+ }
+ } else if (c == '%') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_mod;
+ } else {
+ tok->kind = TokenKind_percent;
+ }
+ } else if (c == '.') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '.')) {
+ if (infile_consume_if(ppl->src, '.')) {
+ tok->kind = TokenKind_ellipsis;
+ } else {
+ tok->kind = TokenKind_other;
+ tok->value.string = "..";
+ }
+ } else {
+ tok->kind = TokenKind_dot;
+ }
+ } else if (c == '!') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_ne;
+ } else {
+ tok->kind = TokenKind_not;
+ }
+ } else if (c == '=') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_eq;
+ } else {
+ tok->kind = TokenKind_assign;
+ }
+ } else if (c == '<') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_le;
+ } else if (infile_consume_if(ppl->src, '<')) {
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_lshift;
+ } else {
+ tok->kind = TokenKind_lshift;
+ }
+ } else {
+ tok->kind = TokenKind_lt;
+ }
+ } else if (c == '>') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_ge;
+ } else if (infile_consume_if(ppl->src, '>')) {
+ if (infile_consume_if(ppl->src, '=')) {
+ tok->kind = TokenKind_assign_rshift;
+ } else {
+ tok->kind = TokenKind_rshift;
+ }
+ } else {
+ tok->kind = TokenKind_gt;
+ }
+ } else if (c == '#') {
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '#')) {
+ tok->kind = TokenKind_hashhash;
+ } else {
+ tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash;
+ }
+ } else if (c == '\'') {
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '\'');
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ if (infile_peek_char(ppl->src) == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ }
+ strbuilder_append_char(&builder, '\'');
+ infile_next_char(ppl->src);
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_character_constant;
+ tok->value.string = builder.buf;
+ } else if (c == '"') {
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (1) {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '"')
+ break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ }
+ infile_next_char(ppl->src);
+ }
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_literal_str;
+ tok->value.string = builder.buf;
+ } else if (isdigit(c)) {
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isdigit(infile_peek_char(ppl->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
+ }
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = atoi(builder.buf);
+ } else if (isalpha(c) || c == '_') {
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
+ }
+ tok->kind = TokenKind_ident;
+ tok->value.string = builder.buf;
+ } else if (c == '\n') {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_newline;
+ } else if (isspace(c)) {
+ while (isspace((c = infile_peek_char(ppl->src)))) {
+ if (c == '\n')
+ break;
+ infile_next_char(ppl->src);
+ }
+ if (ppl->at_bol && infile_peek_char(ppl->src) == '#') {
+ infile_next_char(ppl->src);
+ tok->kind = pplexer_tokenize_pp_directive(ppl);
+ } else {
+ tok->kind = TokenKind_whitespace;
+ }
+ } else {
+ infile_next_char(ppl->src);
+ tok->kind = TokenKind_other;
+ char* buf = calloc(2, sizeof(char));
+ buf[0] = c;
+ tok->value.string = buf;
+ }
+ ppl->at_bol = tok->kind == TokenKind_newline;
+ }
+ Token* eof_tok = tokens_push_new(ppl->pp_tokens);
+ eof_tok->loc = ppl->src->loc;
+ eof_tok->kind = TokenKind_eof;
+}
+
+TokenArray* pp_tokenize(InFile* src) {
+ PpLexer* ppl = pplexer_new(src);
+ pplexer_tokenize_all(ppl);
+ return ppl->pp_tokens;
+}
+
+struct Preprocessor {
+ TokenArray* pp_tokens;
+ int pos;
+ MacroArray* macros;
+ int include_depth;
+ BOOL skip_pp_tokens;
+ char** include_paths;
+ int n_include_paths;
+};
+typedef struct Preprocessor Preprocessor;
+
+TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros);
+
+Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros) {
+ if (include_depth >= 32) {
+ fatal_error("include depth limit exceeded");
+ }
+
+ Preprocessor* pp = calloc(1, sizeof(Preprocessor));
+ pp->pp_tokens = pp_tokens;
+ pp->macros = macros;
+ pp->include_depth = include_depth;
+ pp->include_paths = calloc(16, sizeof(char*));
+
+ return pp;
+}
+
+Token* pp_token_at(Preprocessor* pp, int i) {
+ return &pp->pp_tokens->data[i];
+}
+
+Token* peek_pp_token(Preprocessor* pp) {
+ return pp_token_at(pp, pp->pos);
+}
+
+Token* next_pp_token(Preprocessor* pp) {
+ return pp_token_at(pp, pp->pos++);
+}
+
+BOOL pp_eof(Preprocessor* pp) {
+ return peek_pp_token(pp)->kind == TokenKind_eof;
+}
+
+int find_macro(Preprocessor* pp, const char* name) {
+ for (int i = 0; i < pp->macros->len; ++i) {
+ if (pp->macros->data[i].kind == MacroKind_undef)
+ continue;
+ if (strcmp(pp->macros->data[i].name, name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+void undef_macro(Preprocessor* pp, int idx) {
+ pp->macros->data[idx].kind = MacroKind_undef;
+ // TODO: Can predefined macro like __FILE__ be undefined?
+}
+
+void add_include_path(Preprocessor* pp, char* include_path) {
+ pp->include_paths[pp->n_include_paths] = include_path;
+ ++pp->n_include_paths;
+}
+
+BOOL skip_pp_tokens(Preprocessor* pp) {
+ // TODO: support nested #if
+ return pp->skip_pp_tokens;
+}
+
+void skip_whitespaces(Preprocessor* pp) {
+ while (!pp_eof(pp) && peek_pp_token(pp)->kind == TokenKind_whitespace) {
+ next_pp_token(pp);
+ }
+}
+
+void seek_to_next_newline(Preprocessor* pp) {
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_newline) {
+ break;
+ }
+ next_pp_token(pp);
+ }
+}
+
+void make_token_whitespace(Token* tok) {
+ tok->kind = TokenKind_whitespace;
+ tok->value.string = NULL;
+}
+
+void remove_directive_tokens(Preprocessor* pp, int start, int end) {
+ for (int i = start; i < end; ++i) {
+ make_token_whitespace(pp_token_at(pp, i));
+ }
+}
+
+void process_endif_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ pp->skip_pp_tokens = FALSE;
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+void process_else_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ pp->skip_pp_tokens = !pp->skip_pp_tokens;
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+void process_elif_directive(Preprocessor* pp, int directive_token_pos) {
+ unimplemented();
+}
+
+BOOL pp_eval_constant_expression(TokenArray*);
+int replace_pp_tokens(Preprocessor*, int, int, TokenArray*);
+BOOL expand_macro(Preprocessor*);
+
+void process_if_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ int condition_expression_start_pos = pp->pos;
+
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_newline) {
+ break;
+ } else if (tok->kind == TokenKind_ident) {
+ if (strcmp(tok->value.string, "defined") == 0) {
+ int defined_pos = pp->pos;
+ // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')'
+ // 'defined' <ws>* <ident>
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name;
+ if (peek_pp_token(pp)->kind == TokenKind_paren_l) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ macro_name = next_pp_token(pp);
+ if (macro_name->kind != TokenKind_ident) {
+ fatal_error("invalid defined");
+ }
+ skip_whitespaces(pp);
+ if (next_pp_token(pp)->kind != TokenKind_paren_r) {
+ fatal_error("invalid defined");
+ }
+ } else {
+ macro_name = next_pp_token(pp);
+ if (macro_name->kind != TokenKind_ident) {
+ fatal_error("invalid defined");
+ }
+ }
+ BOOL is_defined = find_macro(pp, macro_name->value.string) != -1;
+ TokenArray defined_results;
+ tokens_init(&defined_results, 1);
+ Token* defined_result = tokens_push_new(&defined_results);
+ defined_result->kind = TokenKind_literal_int;
+ defined_result->value.integer = is_defined;
+ pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results);
+ } else {
+ BOOL expanded = expand_macro(pp);
+ if (expanded) {
+ // A macro may expand to another macro. Re-scan the expanded tokens.
+ // TODO: if the macro is defined recursively, it causes infinite loop.
+ } else {
+ next_pp_token(pp);
+ }
+ }
+ } else {
+ next_pp_token(pp);
+ }
+ }
+
+ // all remaining identifiers other than true (including those lexically identical to keywords such as false) are
+ // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is converted
+ // into a token.
+ for (int pos = condition_expression_start_pos; pos < pp->pos; ++pos) {
+ Token* tok = pp_token_at(pp, pos);
+ if (tok->kind == TokenKind_ident) {
+ BOOL is_true = strcmp(tok->value.string, "true") == 0;
+ tok->kind = TokenKind_literal_int;
+ tok->value.integer = is_true;
+ }
+ }
+
+ int condition_expression_tokens_len = pp->pos - condition_expression_start_pos;
+ TokenArray condition_expression_tokens;
+ // +1 to add EOF token at the end.
+ tokens_init(&condition_expression_tokens, condition_expression_tokens_len + 1);
+ for (int i = 0; i < condition_expression_tokens_len; ++i) {
+ *tokens_push_new(&condition_expression_tokens) = *pp_token_at(pp, condition_expression_start_pos + i);
+ }
+ Token* eof_tok = tokens_push_new(&condition_expression_tokens);
+ eof_tok->kind = TokenKind_eof;
+
+ BOOL result = pp_eval_constant_expression(&condition_expression_tokens);
+
+ pp->skip_pp_tokens = !result;
+
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+void process_ifdef_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = peek_pp_token(pp);
+ if (macro_name->kind == TokenKind_ident) {
+ next_pp_token(pp);
+ pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) == -1;
+ }
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+void process_ifndef_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = peek_pp_token(pp);
+ if (macro_name->kind == TokenKind_ident) {
+ next_pp_token(pp);
+ pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) != -1;
+ }
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+const char* read_include_header_name(Preprocessor* pp) {
+ Token* tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_header_name) {
+ fatal_error("%s:%d: invalid #include", tok->loc.filename, tok->loc.line);
+ }
+
+ return tok->value.string;
+}
+
+const char* resolve_include_name(Preprocessor* pp, const char* include_name) {
+ if (include_name[0] == '"') {
+ return strndup(include_name + 1, strlen(include_name) - 2);
+ } else {
+ for (int i = 0; i < pp->n_include_paths; ++i) {
+ char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths[i]) + 1, sizeof(char));
+ sprintf(buf, "%s/%.*s", pp->include_paths[i], strlen(include_name) - 2, include_name + 1);
+ if (access(buf, F_OK | R_OK) == 0) {
+ return buf;
+ }
+ }
+ return NULL;
+ }
+}
+
+int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) {
+ int n_tokens_to_remove = dest_end - dest_start;
+ int n_tokens_after_dest = pp->pp_tokens->len - dest_end;
+ int shift_amount;
+
+ if (n_tokens_to_remove < source_tokens->len) {
+ // Move existing tokens backward to make room.
+ shift_amount = source_tokens->len - n_tokens_to_remove;
+ tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount);
+ memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end),
+ n_tokens_after_dest * sizeof(Token));
+ pp->pp_tokens->len += shift_amount;
+ } else if (source_tokens->len < n_tokens_to_remove) {
+ // Move existing tokens forward to reduce room.
+ shift_amount = n_tokens_to_remove - source_tokens->len;
+ memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end),
+ n_tokens_after_dest * sizeof(Token));
+ pp->pp_tokens->len -= shift_amount;
+ memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token));
+ }
+
+ memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token));
+
+ return dest_start + source_tokens->len;
+}
+
+int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) {
+ TokenArray tokens;
+ tokens_init(&tokens, 1);
+ *tokens_push_new(&tokens) = *source_tok;
+ replace_pp_tokens(pp, dest, dest + 1, &tokens);
+}
+
+void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) {
+ InFile* include_source = infile_open(include_name);
+ if (!include_source) {
+ fatal_error("cannot open include file: %s", include_name);
+ }
+
+ TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros);
+ tokens_pop(include_pp_tokens); // pop EOF token
+ pp->pos = replace_pp_tokens(pp, directive_token_pos, pp->pos, include_pp_tokens);
+}
+
+void process_include_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ const char* include_name = read_include_header_name(pp);
+ const char* include_name_resolved = resolve_include_name(pp, include_name);
+ if (include_name_resolved == NULL) {
+ fatal_error("cannot resolve include file name: %s", include_name);
+ }
+ expand_include_directive(pp, directive_token_pos, include_name_resolved);
+}
+
+// ws ::= many0(<whitespace>)
+// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')'
+TokenArray* pp_parse_macro_parameters(Preprocessor* pp) {
+ TokenArray* parameters = calloc(1, sizeof(TokenArray));
+ tokens_init(parameters, 2);
+
+ // '(' is consumed by caller.
+ skip_whitespaces(pp);
+ Token* tok = next_pp_token(pp);
+ if (tok->kind == TokenKind_ident) {
+ *tokens_push_new(parameters) = *tok;
+ skip_whitespaces(pp);
+ while (peek_pp_token(pp)->kind == TokenKind_comma) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_ident) {
+ fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line);
+ }
+ *tokens_push_new(parameters) = *tok;
+ }
+ tok = next_pp_token(pp);
+ }
+ if (tok->kind != TokenKind_paren_r) {
+ fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line);
+ }
+
+ return parameters;
+}
+
+void process_define_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = next_pp_token(pp);
+
+ if (macro_name->kind != TokenKind_ident) {
+ fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line);
+ }
+
+ if (peek_pp_token(pp)->kind == TokenKind_paren_l) {
+ next_pp_token(pp);
+ TokenArray* parameters = pp_parse_macro_parameters(pp);
+ int replacements_start_pos = pp->pos;
+ seek_to_next_newline(pp);
+ if (pp_eof(pp)) {
+ fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line);
+ }
+ Macro* macro = macros_push_new(pp->macros);
+ macro->kind = MacroKind_func;
+ macro->name = macro_name->value.string;
+ macro->parameters = *parameters;
+ int n_replacements = pp->pos - replacements_start_pos;
+ tokens_init(&macro->replacements, n_replacements);
+ for (int i = 0; i < n_replacements; ++i) {
+ *tokens_push_new(&macro->replacements) = *pp_token_at(pp, replacements_start_pos + i);
+ }
+ } else {
+ int replacements_start_pos = pp->pos;
+ seek_to_next_newline(pp);
+ if (pp_eof(pp)) {
+ fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line);
+ }
+ Macro* macro = macros_push_new(pp->macros);
+ macro->kind = MacroKind_obj;
+ macro->name = macro_name->value.string;
+ int n_replacements = pp->pos - replacements_start_pos;
+ tokens_init(&macro->replacements, n_replacements);
+ for (int i = 0; i < n_replacements; ++i) {
+ *tokens_push_new(&macro->replacements) = *pp_token_at(pp, replacements_start_pos + i);
+ }
+ }
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+void process_undef_directive(Preprocessor* pp, int directive_token_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = peek_pp_token(pp);
+ if (macro_name->kind == TokenKind_ident) {
+ next_pp_token(pp);
+ int macro_idx = find_macro(pp, macro_name->value.string);
+ if (macro_idx != -1) {
+ undef_macro(pp, macro_idx);
+ }
+ }
+ remove_directive_tokens(pp, directive_token_pos, pp->pos);
+}
+
+void process_line_directive(Preprocessor* pp, int directive_token_pos) {
+ unimplemented();
+}
+
+void process_error_directive(Preprocessor* pp, int directive_token_pos) {
+ unimplemented();
+}
+
+void process_pragma_directive(Preprocessor* pp, int directive_token_pos) {
+ unimplemented();
+}
+
+// ws ::= many0(<Whitespace>)
+// macro-arguments ::= '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')'
+MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp) {
+ MacroArgArray* args = macroargs_new();
+
+ Token* tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_paren_l) {
+ fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line);
+ }
+ skip_whitespaces(pp);
+ tok = next_pp_token(pp);
+ if (tok->kind != TokenKind_paren_r) {
+ MacroArg* arg = macroargs_push_new(args);
+ tokens_init(&arg->tokens, 1);
+ *tokens_push_new(&arg->tokens) = *tok;
+ skip_whitespaces(pp);
+ while (peek_pp_token(pp)->kind == TokenKind_comma) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ tok = next_pp_token(pp);
+ arg = macroargs_push_new(args);
+ tokens_init(&arg->tokens, 1);
+ *tokens_push_new(&arg->tokens) = *tok;
+ }
+ tok = next_pp_token(pp);
+ }
+ if (tok->kind != TokenKind_paren_r) {
+ fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line);
+ }
+
+ return args;
+}
+
+BOOL expand_macro(Preprocessor* pp) {
+ int macro_name_pos = pp->pos;
+ Token* macro_name = next_pp_token(pp);
+ int macro_idx = find_macro(pp, macro_name->value.string);
+ if (macro_idx == -1) {
+ return FALSE;
+ }
+
+ SourceLocation original_loc = macro_name->loc;
+ Macro* macro = &pp->macros->data[macro_idx];
+ if (macro->kind == MacroKind_func) {
+ MacroArgArray* args = pp_parse_macro_arguments(pp);
+ replace_pp_tokens(pp, macro_name_pos, pp->pos, &macro->replacements);
+ for (int i = 0; i < macro->replacements.len; ++i) {
+ Token* tok = pp_token_at(pp, macro_name_pos + i);
+ int macro_param_idx = macro_find_param(macro, tok);
+ if (macro_param_idx != -1) {
+ replace_pp_tokens(pp, macro_name_pos + i, macro_name_pos + i + 1, &args->data[macro_param_idx].tokens);
+ }
+ }
+ // Inherit a source location from the original macro token.
+ for (int i = 0; i < macro->replacements.len; ++i) {
+ pp_token_at(pp, macro_name_pos + i)->loc = original_loc;
+ }
+ } else if (macro->kind == MacroKind_obj) {
+ replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, &macro->replacements);
+ // Inherit a source location from the original macro token.
+ for (int i = 0; i < macro->replacements.len; ++i) {
+ pp_token_at(pp, macro_name_pos + i)->loc = original_loc;
+ }
+ } else if (macro->kind == MacroKind_builtin_file) {
+ Token file_tok;
+ file_tok.kind = TokenKind_literal_str;
+ file_tok.value.string = macro_name->loc.filename;
+ file_tok.loc.filename = NULL;
+ file_tok.loc.line = 0;
+ replace_single_pp_token(pp, macro_name_pos, &file_tok);
+ } else if (macro->kind == MacroKind_builtin_line) {
+ Token line_tok;
+ line_tok.kind = TokenKind_literal_int;
+ line_tok.value.integer = macro_name->loc.line;
+ line_tok.loc.filename = NULL;
+ line_tok.loc.line = 0;
+ replace_single_pp_token(pp, macro_name_pos, &line_tok);
+ } else {
+ unreachable();
+ }
+ return TRUE;
+}
+
+void process_pp_directive(Preprocessor* pp) {
+ int first_token_pos = pp->pos;
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_pp_directive_endif) {
+ process_endif_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_else) {
+ process_else_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_elif) {
+ process_elif_directive(pp, first_token_pos);
+ } else if (skip_pp_tokens(pp)) {
+ make_token_whitespace(next_pp_token(pp));
+ } else if (tok->kind == TokenKind_pp_directive_if) {
+ process_if_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_ifdef) {
+ process_ifdef_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_ifndef) {
+ process_ifndef_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_include) {
+ process_include_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_define) {
+ process_define_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_undef) {
+ process_undef_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_line) {
+ process_line_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_error) {
+ process_error_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_pp_directive_pragma) {
+ process_pragma_directive(pp, first_token_pos);
+ } else if (tok->kind == TokenKind_ident) {
+ BOOL expanded = expand_macro(pp);
+ if (expanded) {
+ // A macro may expand to another macro. Re-scan the expanded tokens.
+ // TODO: if the macro is defined recursively, it causes infinite loop.
+ } else {
+ next_pp_token(pp);
+ }
+ } else {
+ next_pp_token(pp);
+ }
+}
+
+void process_pp_directives(Preprocessor* pp) {
+ while (!pp_eof(pp)) {
+ process_pp_directive(pp);
+ }
+}
+
+void pp_dump(Token* t, BOOL include_whitespace) {
+ for (; t->kind != TokenKind_eof; ++t) {
+ if (t->kind == TokenKind_whitespace && !include_whitespace) {
+ continue;
+ }
+ fprintf(stderr, "%s\n", token_stringify(t));
+ }
+}
+
+char* get_ducc_include_path() {
+ const char* self_dir = get_self_dir();
+ char* buf = calloc(strlen(self_dir) + strlen("/include") + 1, sizeof(char));
+ sprintf(buf, "%s/include", self_dir);
+ return buf;
+}
+
+TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros) {
+ TokenArray* pp_tokens = pp_tokenize(src);
+ Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros);
+ add_include_path(pp, get_ducc_include_path());
+ add_include_path(pp, "/usr/include/x86_64-linux-gnu");
+ add_include_path(pp, "/usr/include");
+ process_pp_directives(pp);
+ return pp->pp_tokens;
+}
+
+TokenArray* preprocess(InFile* src) {
+ MacroArray* macros = macros_new();
+ add_predefined_macros(macros);
+ return do_preprocess(src, 0, macros);
+}
diff --git a/src/std.h b/src/std.h
new file mode 100644
index 0000000..c1dcd66
--- /dev/null
+++ b/src/std.h
@@ -0,0 +1,50 @@
+#include <stddef.h>
+
+struct FILE;
+typedef struct FILE FILE;
+
+extern FILE* stdin;
+extern FILE* stdout;
+extern FILE* stderr;
+
+int atoi(const char*);
+void* calloc(size_t, size_t);
+void exit(int);
+int fclose(FILE*);
+int fprintf(FILE*, const char*, ...);
+char* fgets(char*, int, FILE*);
+FILE* fopen(const char*, const char*);
+int getchar(void);
+int isalnum(int);
+int isalpha(int);
+int isdigit(int);
+int isspace(int);
+void* memcpy(void*, const void*, size_t);
+void* memmove(void*, const void*, size_t);
+void* memset(void*, int, size_t);
+int printf(const char*, ...);
+void* realloc(void*, size_t);
+int sprintf(char*, const char*, ...);
+int strcmp(const char*, const char*);
+size_t strlen(const char*);
+int strncmp(const char*, const char*, size_t);
+char* strndup(const char*, size_t);
+char* strstr(const char*, const char*);
+
+#include <stdarg.h>
+
+int vfprintf(FILE*, const char*, va_list);
+
+#define F_OK 0
+#define R_OK 4
+int access(const char*, int);
+
+#define PATH_MAX 4096
+
+typedef long ssize_t;
+ssize_t readlink(const char*, char*, size_t);
+char* dirname(char*);
+
+#define BOOL int
+#define TRUE 1
+#define FALSE 0
diff --git a/src/sys.c b/src/sys.c
new file mode 100644
index 0000000..aa7b13d
--- /dev/null
+++ b/src/sys.c
@@ -0,0 +1,15 @@
+char* get_self_path() {
+ char* buf = calloc(PATH_MAX, sizeof(char));
+ ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1);
+ if (len == -1 || len == PATH_MAX - 1) {
+ return NULL;
+ }
+ buf[len] = '\0';
+ return buf;
+}
+
+// It returns a path not including final / except for root directory.
+char* get_self_dir() {
+ char* path = get_self_path();
+ return dirname(path);
+}
diff --git a/src/tokenize.c b/src/tokenize.c
new file mode 100644
index 0000000..a7e99b2
--- /dev/null
+++ b/src/tokenize.c
@@ -0,0 +1,175 @@
+struct Lexer {
+ TokenArray* src;
+ TokenArray* tokens;
+};
+typedef struct Lexer Lexer;
+
+Lexer* lexer_new(TokenArray* pp_tokens) {
+ Lexer* l = calloc(1, sizeof(Lexer));
+ l->src = pp_tokens;
+ l->tokens = calloc(1, sizeof(TokenArray));
+ // l->tokens need not store whitespace tokens.
+ tokens_init(l->tokens, pp_tokens->len / 2);
+ return l;
+}
+
+void tokenize_all(Lexer* l) {
+ for (int pos = 0; pos < l->src->len; ++pos) {
+ Token* pp_tok = &l->src->data[pos];
+ TokenKind k = pp_tok->kind;
+ if (k == TokenKind_whitespace || k == TokenKind_newline) {
+ continue;
+ }
+ Token* tok = tokens_push_new(l->tokens);
+ tok->loc = pp_tok->loc;
+ if (k == TokenKind_character_constant) {
+ tok->kind = TokenKind_literal_int;
+ int ch = pp_tok->value.string[1];
+ if (ch == '\\') {
+ ch = pp_tok->value.string[2];
+ if (ch == 'a') {
+ ch = '\a';
+ } else if (ch == 'b') {
+ ch = '\b';
+ } else if (ch == 'f') {
+ ch = '\f';
+ } else if (ch == 'n') {
+ ch = '\n';
+ } else if (ch == 'r') {
+ ch = '\r';
+ } else if (ch == 't') {
+ ch = '\t';
+ } else if (ch == 'v') {
+ ch = '\v';
+ } else if (ch == '0') {
+ ch = '\0';
+ }
+ }
+ tok->value.integer = ch;
+ } else if (k == TokenKind_ident) {
+ if (strcmp(pp_tok->value.string, "alignas") == 0) {
+ tok->kind = TokenKind_keyword_alignas;
+ } else if (strcmp(pp_tok->value.string, "alignof") == 0) {
+ tok->kind = TokenKind_keyword_alignof;
+ } else if (strcmp(pp_tok->value.string, "auto") == 0) {
+ tok->kind = TokenKind_keyword_auto;
+ } else if (strcmp(pp_tok->value.string, "bool") == 0) {
+ tok->kind = TokenKind_keyword_bool;
+ } else if (strcmp(pp_tok->value.string, "break") == 0) {
+ tok->kind = TokenKind_keyword_break;
+ } else if (strcmp(pp_tok->value.string, "case") == 0) {
+ tok->kind = TokenKind_keyword_case;
+ } else if (strcmp(pp_tok->value.string, "char") == 0) {
+ tok->kind = TokenKind_keyword_char;
+ } else if (strcmp(pp_tok->value.string, "const") == 0) {
+ tok->kind = TokenKind_keyword_const;
+ } else if (strcmp(pp_tok->value.string, "constexpr") == 0) {
+ tok->kind = TokenKind_keyword_constexpr;
+ } else if (strcmp(pp_tok->value.string, "continue") == 0) {
+ tok->kind = TokenKind_keyword_continue;
+ } else if (strcmp(pp_tok->value.string, "default") == 0) {
+ tok->kind = TokenKind_keyword_default;
+ } else if (strcmp(pp_tok->value.string, "do") == 0) {
+ tok->kind = TokenKind_keyword_do;
+ } else if (strcmp(pp_tok->value.string, "double") == 0) {
+ tok->kind = TokenKind_keyword_double;
+ } else if (strcmp(pp_tok->value.string, "else") == 0) {
+ tok->kind = TokenKind_keyword_else;
+ } else if (strcmp(pp_tok->value.string, "enum") == 0) {
+ tok->kind = TokenKind_keyword_enum;
+ } else if (strcmp(pp_tok->value.string, "extern") == 0) {
+ tok->kind = TokenKind_keyword_extern;
+ } else if (strcmp(pp_tok->value.string, "false") == 0) {
+ tok->kind = TokenKind_keyword_false;
+ } else if (strcmp(pp_tok->value.string, "float") == 0) {
+ tok->kind = TokenKind_keyword_float;
+ } else if (strcmp(pp_tok->value.string, "for") == 0) {
+ tok->kind = TokenKind_keyword_for;
+ } else if (strcmp(pp_tok->value.string, "goto") == 0) {
+ tok->kind = TokenKind_keyword_goto;
+ } else if (strcmp(pp_tok->value.string, "if") == 0) {
+ tok->kind = TokenKind_keyword_if;
+ } else if (strcmp(pp_tok->value.string, "inline") == 0) {
+ tok->kind = TokenKind_keyword_inline;
+ } else if (strcmp(pp_tok->value.string, "int") == 0) {
+ tok->kind = TokenKind_keyword_int;
+ } else if (strcmp(pp_tok->value.string, "long") == 0) {
+ tok->kind = TokenKind_keyword_long;
+ } else if (strcmp(pp_tok->value.string, "nullptr") == 0) {
+ tok->kind = TokenKind_keyword_nullptr;
+ } else if (strcmp(pp_tok->value.string, "register") == 0) {
+ tok->kind = TokenKind_keyword_register;
+ } else if (strcmp(pp_tok->value.string, "restrict") == 0) {
+ tok->kind = TokenKind_keyword_restrict;
+ } else if (strcmp(pp_tok->value.string, "return") == 0) {
+ tok->kind = TokenKind_keyword_return;
+ } else if (strcmp(pp_tok->value.string, "short") == 0) {
+ tok->kind = TokenKind_keyword_short;
+ } else if (strcmp(pp_tok->value.string, "signed") == 0) {
+ tok->kind = TokenKind_keyword_signed;
+ } else if (strcmp(pp_tok->value.string, "sizeof") == 0) {
+ tok->kind = TokenKind_keyword_sizeof;
+ } else if (strcmp(pp_tok->value.string, "static") == 0) {
+ tok->kind = TokenKind_keyword_static;
+ } else if (strcmp(pp_tok->value.string, "static_assert") == 0) {
+ tok->kind = TokenKind_keyword_static_assert;
+ } else if (strcmp(pp_tok->value.string, "struct") == 0) {
+ tok->kind = TokenKind_keyword_struct;
+ } else if (strcmp(pp_tok->value.string, "switch") == 0) {
+ tok->kind = TokenKind_keyword_switch;
+ } else if (strcmp(pp_tok->value.string, "thread_local") == 0) {
+ tok->kind = TokenKind_keyword_thread_local;
+ } else if (strcmp(pp_tok->value.string, "true") == 0) {
+ tok->kind = TokenKind_keyword_true;
+ } else if (strcmp(pp_tok->value.string, "typedef") == 0) {
+ tok->kind = TokenKind_keyword_typedef;
+ } else if (strcmp(pp_tok->value.string, "typeof") == 0) {
+ tok->kind = TokenKind_keyword_typeof;
+ } else if (strcmp(pp_tok->value.string, "typeof_unqual") == 0) {
+ tok->kind = TokenKind_keyword_typeof_unqual;
+ } else if (strcmp(pp_tok->value.string, "union") == 0) {
+ tok->kind = TokenKind_keyword_union;
+ } else if (strcmp(pp_tok->value.string, "unsigned") == 0) {
+ tok->kind = TokenKind_keyword_unsigned;
+ } else if (strcmp(pp_tok->value.string, "void") == 0) {
+ tok->kind = TokenKind_keyword_void;
+ } else if (strcmp(pp_tok->value.string, "volatile") == 0) {
+ tok->kind = TokenKind_keyword_volatile;
+ } else if (strcmp(pp_tok->value.string, "while") == 0) {
+ tok->kind = TokenKind_keyword_while;
+ } else if (strcmp(pp_tok->value.string, "_Atomic") == 0) {
+ tok->kind = TokenKind_keyword__Atomic;
+ } else if (strcmp(pp_tok->value.string, "_BitInt") == 0) {
+ tok->kind = TokenKind_keyword__BitInt;
+ } else if (strcmp(pp_tok->value.string, "_Complex") == 0) {
+ tok->kind = TokenKind_keyword__Complex;
+ } else if (strcmp(pp_tok->value.string, "_Decimal128") == 0) {
+ tok->kind = TokenKind_keyword__Decimal128;
+ } else if (strcmp(pp_tok->value.string, "_Decimal32") == 0) {
+ tok->kind = TokenKind_keyword__Decimal32;
+ } else if (strcmp(pp_tok->value.string, "_Decimal64") == 0) {
+ tok->kind = TokenKind_keyword__Decimal64;
+ } else if (strcmp(pp_tok->value.string, "_Generic") == 0) {
+ tok->kind = TokenKind_keyword__Generic;
+ } else if (strcmp(pp_tok->value.string, "_Imaginary") == 0) {
+ tok->kind = TokenKind_keyword__Imaginary;
+ } else if (strcmp(pp_tok->value.string, "_Noreturn") == 0) {
+ tok->kind = TokenKind_keyword__Noreturn;
+ } else {
+ tok->kind = TokenKind_ident;
+ tok->value = pp_tok->value;
+ }
+ } else if (k == TokenKind_other) {
+ unreachable();
+ } else {
+ tok->kind = pp_tok->kind;
+ tok->value = pp_tok->value;
+ }
+ }
+}
+
+TokenArray* tokenize(TokenArray* pp_tokens) {
+ Lexer* l = lexer_new(pp_tokens);
+ tokenize_all(l);
+ return l->tokens;
+}