aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-07-21 10:09:00 +0900
committernsfisis <nsfisis@gmail.com>2025-08-15 10:04:28 +0900
commit6daa56323634e1142f2d22a756a77a74382cf3a7 (patch)
tree85355ad1ad0cf5dca837f8b4cd7e6b09eaa7bd9d
parent16bbc2e0d72f94f8061ba294d1776edfe5bf8a55 (diff)
downloadducc-6daa56323634e1142f2d22a756a77a74382cf3a7.tar.gz
ducc-6daa56323634e1142f2d22a756a77a74382cf3a7.tar.zst
ducc-6daa56323634e1142f2d22a756a77a74382cf3a7.zip
feat: separate main.c
-rw-r--r--analyze.c2
-rw-r--r--ast.c352
-rw-r--r--codegen.c439
-rw-r--r--common.c41
-rw-r--r--main.c2679
-rw-r--r--parse.c1009
-rw-r--r--preprocess.c431
-rw-r--r--std.h31
-rw-r--r--tests/060.sh20
-rw-r--r--tokenize.c368
10 files changed, 2704 insertions, 2668 deletions
diff --git a/analyze.c b/analyze.c
new file mode 100644
index 0000000..53ce1b4
--- /dev/null
+++ b/analyze.c
@@ -0,0 +1,2 @@
+void analyze(Program* prog) {
+}
diff --git a/ast.c b/ast.c
new file mode 100644
index 0000000..ad5b83f
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,352 @@
+enum TypeKind {
+ TypeKind_unknown,
+
+ TypeKind_char,
+ TypeKind_int,
+ TypeKind_long,
+ TypeKind_void,
+ TypeKind_ptr,
+ TypeKind_enum,
+ TypeKind_struct,
+};
+typedef enum TypeKind TypeKind;
+
+struct AstNode;
+
+struct Type {
+ TypeKind kind;
+ struct Type* to;
+ struct AstNode* def;
+};
+typedef struct Type Type;
+
+Type* type_new(TypeKind kind) {
+ Type* ty = calloc(1, sizeof(Type));
+ ty->kind = kind;
+ return ty;
+}
+
+Type* type_new_ptr(Type* to) {
+ Type* ty = calloc(1, sizeof(Type));
+ ty->kind = TypeKind_ptr;
+ ty->to = to;
+ return ty;
+}
+
+int type_is_unsized(Type* ty) {
+ return ty->kind != TypeKind_void;
+}
+
+int type_sizeof_struct(Type* ty);
+int type_alignof_struct(Type* ty);
+int type_offsetof(Type* ty, const String* name);
+Type* type_member_typeof(Type* ty, const String* name);
+
+int type_sizeof(Type* ty) {
+ if (!type_is_unsized(ty)) {
+ fatal_error("type_sizeof: type size cannot be determined");
+ }
+
+ if (ty->kind == TypeKind_ptr) {
+ return 8;
+ } else if (ty->kind == TypeKind_char) {
+ return 1;
+ } else if (ty->kind == TypeKind_int) {
+ return 4;
+ } else if (ty->kind == TypeKind_long) {
+ return 8;
+ } else if (ty->kind == TypeKind_enum) {
+ return 4;
+ } else {
+ return type_sizeof_struct(ty);
+ }
+}
+
+int type_alignof(Type* ty) {
+ if (!type_is_unsized(ty)) {
+ fatal_error("type_alignof: type size cannot be determined");
+ }
+
+ if (ty->kind == TypeKind_ptr) {
+ return 8;
+ } else if (ty->kind == TypeKind_char) {
+ return 1;
+ } else if (ty->kind == TypeKind_int) {
+ return 4;
+ } else if (ty->kind == TypeKind_long) {
+ return 8;
+ } else if (ty->kind == TypeKind_enum) {
+ return 4;
+ } else {
+ return type_alignof_struct(ty);
+ }
+}
+
+enum AstNodeKind {
+ AstNodeKind_unknown,
+
+ AstNodeKind_assign_expr,
+ AstNodeKind_binary_expr,
+ AstNodeKind_break_stmt,
+ AstNodeKind_continue_stmt,
+ AstNodeKind_deref_expr,
+ AstNodeKind_do_while_stmt,
+ AstNodeKind_enum_def,
+ AstNodeKind_enum_member,
+ AstNodeKind_expr_stmt,
+ AstNodeKind_for_stmt,
+ AstNodeKind_func_call,
+ AstNodeKind_func_decl,
+ AstNodeKind_func_def,
+ AstNodeKind_gvar,
+ AstNodeKind_gvar_decl,
+ AstNodeKind_if_stmt,
+ AstNodeKind_int_expr,
+ AstNodeKind_list,
+ AstNodeKind_logical_expr,
+ AstNodeKind_lvar,
+ AstNodeKind_lvar_decl,
+ AstNodeKind_param,
+ AstNodeKind_ref_expr,
+ AstNodeKind_return_stmt,
+ AstNodeKind_str_expr,
+ AstNodeKind_struct_decl,
+ AstNodeKind_struct_def,
+ AstNodeKind_struct_member,
+ AstNodeKind_type,
+ AstNodeKind_typedef_decl,
+ AstNodeKind_unary_expr,
+};
+typedef enum AstNodeKind AstNodeKind;
+
+#define node_items __n1
+#define node_len __i
+#define node_expr __n1
+#define node_lhs __n1
+#define node_rhs __n2
+#define node_operand __n1
+#define node_cond __n1
+#define node_init __n2
+#define node_update __n3
+#define node_then __n2
+#define node_else __n3
+#define node_body __n4
+#define node_members __n1
+#define node_params __n1
+#define node_args __n1
+#define node_int_value __i
+#define node_idx __i
+#define node_op __i
+
+struct AstNode {
+ AstNodeKind kind;
+ String name;
+ Type* ty;
+ struct AstNode* __n1;
+ struct AstNode* __n2;
+ struct AstNode* __n3;
+ struct AstNode* __n4;
+ int __i;
+};
+typedef struct AstNode AstNode;
+
+struct Program {
+ AstNode* funcs;
+ char** str_literals;
+};
+typedef struct Program Program;
+
+AstNode* ast_new(AstNodeKind kind) {
+ AstNode* ast = calloc(1, sizeof(AstNode));
+ ast->kind = kind;
+ return ast;
+}
+
+AstNode* ast_new_list(int capacity) {
+ AstNode* list = ast_new(AstNodeKind_list);
+ list->node_items = calloc(capacity, sizeof(AstNode));
+ list->node_len = 0;
+ return list;
+}
+
+void ast_append(AstNode* list, AstNode* item) {
+ if (list->kind != AstNodeKind_list) {
+ fatal_error("ast_append: ast is not a list");
+ }
+ if (!item) {
+ return;
+ }
+ memcpy(list->node_items + list->node_len, item, sizeof(AstNode));
+ ++list->node_len;
+}
+
+AstNode* ast_new_int(int v) {
+ AstNode* e = ast_new(AstNodeKind_int_expr);
+ e->node_int_value = v;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_unary_expr(int op, AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_unary_expr);
+ e->node_op = op;
+ e->node_operand = operand;
+ e->ty = type_new(TypeKind_int);
+ return e;
+}
+
+AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_binary_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ if (op == TokenKind_plus) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ e->ty = lhs->ty;
+ } else if (rhs->ty->kind == TypeKind_ptr) {
+ e->ty = rhs->ty;
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ } else if (op == TokenKind_minus) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ e->ty = lhs->ty;
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ } else {
+ e->ty = type_new(TypeKind_int);
+ }
+ return e;
+}
+
+AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) {
+ AstNode* e = ast_new(AstNodeKind_assign_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ e->ty = lhs->ty;
+ return e;
+}
+
+AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to)));
+ } else if (rhs->ty->kind == TypeKind_ptr) {
+ lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->to)));
+ }
+ return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs);
+}
+
+AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) {
+ if (lhs->ty->kind == TypeKind_ptr) {
+ rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to)));
+ }
+ return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs);
+}
+
+AstNode* ast_new_ref_expr(AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_ref_expr);
+ e->node_operand = operand;
+ e->ty = type_new_ptr(operand->ty);
+ return e;
+}
+
+AstNode* ast_new_deref_expr(AstNode* operand) {
+ AstNode* e = ast_new(AstNodeKind_deref_expr);
+ e->node_operand = operand;
+ e->ty = operand->ty->to;
+ return e;
+}
+
+AstNode* ast_new_member_access_expr(AstNode* obj, const String* name) {
+ AstNode* e = ast_new(AstNodeKind_deref_expr);
+ e->node_operand = ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->to, name)));
+ e->ty = type_member_typeof(obj->ty->to, name);
+ e->node_operand->ty = type_new_ptr(e->ty);
+ return e;
+}
+
+int type_sizeof_struct(Type* ty) {
+ int next_offset = 0;
+ int struct_align = 0;
+ int padding;
+
+ int i;
+ for (i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ if (next_offset % align != 0) {
+ padding = align - next_offset % align;
+ next_offset += padding;
+ }
+ next_offset += size;
+ if (struct_align < align) {
+ struct_align = align;
+ }
+ }
+ if (next_offset % struct_align != 0) {
+ padding = struct_align - next_offset % struct_align;
+ next_offset += padding;
+ }
+ return next_offset;
+}
+
+int type_alignof_struct(Type* ty) {
+ int struct_align = 0;
+
+ int i;
+ for (i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int align = type_alignof(member->ty);
+
+ if (struct_align < align) {
+ struct_align = align;
+ }
+ }
+ return struct_align;
+}
+
+int type_offsetof(Type* ty, const String* name) {
+ if (ty->kind != TypeKind_struct) {
+ fatal_error("type_offsetof: type is not a struct");
+ }
+
+ int next_offset = 0;
+
+ int i;
+ for (i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ int size = type_sizeof(member->ty);
+ int align = type_alignof(member->ty);
+
+ if (next_offset % align != 0) {
+ int padding = align - next_offset % align;
+ next_offset += padding;
+ }
+ if (string_equals(&member->name, name)) {
+ return next_offset;
+ }
+ next_offset += size;
+ }
+
+ fatal_error("type_offsetof: member not found");
+}
+
+Type* type_member_typeof(Type* ty, const String* name) {
+ if (ty->kind != TypeKind_struct) {
+ fatal_error("type_offsetof: type is not a struct");
+ }
+
+ int i;
+ for (i = 0; i < ty->def->node_members->node_len; ++i) {
+ AstNode* member = ty->def->node_members->node_items + i;
+ if (string_equals(&member->name, name)) {
+ return member->ty;
+ }
+ }
+
+ fatal_error("type_offsetof: member not found");
+}
diff --git a/codegen.c b/codegen.c
new file mode 100644
index 0000000..26e7604
--- /dev/null
+++ b/codegen.c
@@ -0,0 +1,439 @@
+enum GenMode {
+ GenMode_lval,
+ GenMode_rval,
+};
+typedef enum GenMode GenMode;
+
+struct CodeGen {
+ int next_label;
+ int* loop_labels;
+};
+typedef struct CodeGen CodeGen;
+
+CodeGen* codegen_new() {
+ CodeGen* g = calloc(1, sizeof(CodeGen));
+ g->next_label = 1;
+ g->loop_labels = calloc(1024, sizeof(int));
+ return g;
+}
+
+int codegen_new_label(CodeGen* g) {
+ int new_label = g->next_label;
+ ++g->next_label;
+ return new_label;
+}
+
+void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode);
+void codegen_stmt(CodeGen* g, AstNode* ast);
+
+const char* param_reg(int n) {
+ if (n == 0) {
+ return "rdi";
+ } else if (n == 1) {
+ return "rsi";
+ } else if (n == 2) {
+ return "rdx";
+ } else if (n == 3) {
+ return "rcx";
+ } else if (n == 4) {
+ return "r8";
+ } else if (n == 5) {
+ return "r9";
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_func_prologue(CodeGen* g, AstNode* ast) {
+ printf(" push rbp\n");
+ printf(" mov rbp, rsp\n");
+ int i;
+ for (i = 0; i < ast->node_params->node_len; ++i) {
+ printf(" push %s\n", param_reg(i));
+ }
+ printf(" sub rsp, %d\n", 8 * LVAR_MAX);
+}
+
+void codegen_func_epilogue(CodeGen* g, AstNode* ast) {
+ printf(" mov rsp, rbp\n");
+ printf(" pop rbp\n");
+ printf(" ret\n");
+}
+
+void codegen_int_expr(CodeGen* g, AstNode* ast) {
+ printf(" push %d\n", ast->node_int_value);
+}
+
+void codegen_str_expr(CodeGen* g, AstNode* ast) {
+ printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx);
+ printf(" push rax\n");
+}
+
+void codegen_unary_expr(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_operand, GenMode_rval);
+ if (ast->node_op == TokenKind_not) {
+ printf(" pop rax\n");
+ printf(" mov rdi, 0\n");
+ printf(" cmp rax, rdi\n");
+ printf(" sete al\n");
+ printf(" movzb rax, al\n");
+ printf(" push rax\n");
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_operand, GenMode_lval);
+}
+
+void codegen_lval2rval(Type* ty) {
+ int size = type_sizeof(ty);
+
+ printf(" pop rax\n");
+ if (size == 1) {
+ printf(" movsx rax, BYTE PTR [rax]\n");
+ } else if (size == 4) {
+ printf(" movsxd rax, DWORD PTR [rax]\n");
+ } else {
+ printf(" mov rax, [rax]\n");
+ }
+ printf(" push rax\n");
+}
+
+void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_operand, GenMode_rval);
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->node_operand->ty->to);
+ }
+}
+
+void codegen_logical_expr(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+
+ if (ast->node_op == TokenKind_andand) {
+ codegen_expr(g, ast->node_lhs, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ printf(" push 0\n");
+ printf(".Lend%d:\n", label);
+ } else {
+ codegen_expr(g, ast->node_lhs, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ printf(" push 1\n");
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ printf(".Lend%d:\n", label);
+ }
+}
+
+void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_lhs, gen_mode);
+ codegen_expr(g, ast->node_rhs, gen_mode);
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ if (ast->node_op == TokenKind_plus) {
+ printf(" add rax, rdi\n");
+ } else if (ast->node_op == TokenKind_minus) {
+ printf(" sub rax, rdi\n");
+ } else if (ast->node_op == TokenKind_star) {
+ printf(" imul rax, rdi\n");
+ } else if (ast->node_op == TokenKind_slash) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ } else if (ast->node_op == TokenKind_percent) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ printf(" mov rax, rdx\n");
+ } else if (ast->node_op == TokenKind_eq) {
+ printf(" cmp rax, rdi\n");
+ printf(" sete al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_ne) {
+ printf(" cmp rax, rdi\n");
+ printf(" setne al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_lt) {
+ printf(" cmp rax, rdi\n");
+ printf(" setl al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_le) {
+ printf(" cmp rax, rdi\n");
+ printf(" setle al\n");
+ printf(" movzb rax, al\n");
+ } else {
+ unreachable();
+ }
+ printf(" push rax\n");
+}
+
+void codegen_assign_expr(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_lhs, GenMode_lval);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ if (ast->node_op == TokenKind_assign) {
+ } else if (ast->node_op == TokenKind_assign_add) {
+ printf(" pop rdi\n");
+ printf(" push [rsp]\n");
+ codegen_lval2rval(ast->node_lhs->ty);
+ printf(" pop rax\n");
+ printf(" add rax, rdi\n");
+ printf(" push rax\n");
+ } else if (ast->node_op == TokenKind_assign_sub) {
+ printf(" pop rdi\n");
+ printf(" push [rsp]\n");
+ codegen_lval2rval(ast->node_lhs->ty);
+ printf(" pop rax\n");
+ printf(" sub rax, rdi\n");
+ printf(" push rax\n");
+ } else {
+ unreachable();
+ }
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ if (type_sizeof(ast->node_lhs->ty) == 1) {
+ printf(" mov BYTE PTR [rax], dil\n");
+ } else if (type_sizeof(ast->node_lhs->ty) == 4) {
+ printf(" mov DWORD PTR [rax], edi\n");
+ } else {
+ printf(" mov [rax], rdi\n");
+ }
+ printf(" push rdi\n");
+}
+
+void codegen_func_call(CodeGen* g, AstNode* ast) {
+ String* func_name = &ast->name;
+ AstNode* args = ast->node_args;
+ int i;
+ for (i = 0; i < args->node_len; ++i) {
+ AstNode* arg = args->node_items + i;
+ codegen_expr(g, arg, GenMode_rval);
+ }
+ for (i = args->node_len - 1; i >= 0; --i) {
+ printf(" pop %s\n", param_reg(i));
+ }
+
+ int label = codegen_new_label(g);
+
+ printf(" mov rax, rsp\n");
+ printf(" and rax, 15\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Laligned%d\n", label);
+
+ printf(" mov rax, 0\n");
+ printf(" sub rsp, 8\n");
+ printf(" call %.*s\n", func_name->len, func_name->data);
+ printf(" add rsp, 8\n");
+ printf(" push rax\n");
+
+ printf(" jmp .Lend%d\n", label);
+ printf(".Laligned%d:\n", label);
+
+ printf(" mov rax, 0\n");
+ printf(" call %.*s\n", func_name->len, func_name->data);
+ printf(" push rax\n");
+
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ int offset = 8 + ast->node_idx * 8;
+ printf(" mov rax, rbp\n");
+ printf(" sub rax, %d\n", offset);
+ printf(" push rax\n");
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->ty);
+ }
+}
+
+void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ if (gen_mode == GenMode_lval) {
+ fatal_error("unimplemented");
+ }
+ if (ast->ty->kind != TypeKind_ptr) {
+ fatal_error("unimplemented");
+ }
+ printf(" mov rax, QWORD PTR %.*s[rip]\n", ast->name.len, ast->name.data);
+ printf(" push rax\n");
+}
+
+void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ if (ast->kind == AstNodeKind_int_expr) {
+ codegen_int_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_str_expr) {
+ codegen_str_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_unary_expr) {
+ codegen_unary_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_ref_expr) {
+ codegen_ref_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_deref_expr) {
+ codegen_deref_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_binary_expr) {
+ codegen_binary_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_logical_expr) {
+ codegen_logical_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_assign_expr) {
+ codegen_assign_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_func_call) {
+ codegen_func_call(g, ast);
+ } else if (ast->kind == AstNodeKind_lvar) {
+ codegen_lvar(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_gvar) {
+ codegen_gvar(g, ast, gen_mode);
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_return_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->node_expr) {
+ codegen_expr(g, ast->node_expr, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ codegen_func_epilogue(g, ast);
+}
+
+void codegen_if_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_stmt(g, ast->node_then);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ if (ast->node_else) {
+ codegen_stmt(g, ast->node_else);
+ }
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_for_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ if (ast->node_init) {
+ codegen_expr(g, ast->node_init, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ printf(".Lbegin%d:\n", label);
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lend%d\n", label);
+ codegen_stmt(g, ast->node_body);
+ printf(".Lcontinue%d:\n", label);
+ if (ast->node_update) {
+ codegen_expr(g, ast->node_update, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ printf(" jmp .Lbegin%d\n", label);
+ printf(".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+void codegen_do_while_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ printf(".Lbegin%d:\n", label);
+ codegen_stmt(g, ast->node_body);
+ printf(".Lcontinue%d:\n", label);
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lend%d\n", label);
+ printf(" jmp .Lbegin%d\n", label);
+ printf(".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+void codegen_break_stmt(CodeGen* g, AstNode* ast) {
+ int label = *g->loop_labels;
+ printf(" jmp .Lend%d\n", label);
+}
+
+void codegen_continue_stmt(CodeGen* g, AstNode* ast) {
+ int label = *g->loop_labels;
+ printf(" jmp .Lcontinue%d\n", label);
+}
+
+void codegen_expr_stmt(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_expr, GenMode_rval);
+ printf(" pop rax\n");
+}
+
+void codegen_var_decl(CodeGen* g, AstNode* ast) {
+}
+
+void codegen_block_stmt(CodeGen* g, AstNode* ast) {
+ int i;
+ for (i = 0; i < ast->node_len; ++i) {
+ AstNode* stmt = ast->node_items + i;
+ codegen_stmt(g, stmt);
+ }
+}
+
+void codegen_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->kind == AstNodeKind_list) {
+ codegen_block_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_return_stmt) {
+ codegen_return_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_if_stmt) {
+ codegen_if_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_for_stmt) {
+ codegen_for_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_do_while_stmt) {
+ codegen_do_while_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_break_stmt) {
+ codegen_break_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_continue_stmt) {
+ codegen_continue_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_expr_stmt) {
+ codegen_expr_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_lvar_decl) {
+ codegen_var_decl(g, ast);
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_func(CodeGen* g, AstNode* ast) {
+ printf("%.*s:\n", ast->name.len, ast->name.data);
+
+ codegen_func_prologue(g, ast);
+ codegen_stmt(g, ast->node_body);
+ codegen_func_epilogue(g, ast);
+
+ printf("\n");
+}
+
+void codegen(Program* prog) {
+ CodeGen* g = codegen_new();
+
+ printf(".intel_syntax noprefix\n\n");
+
+ int i;
+ for (i = 0; prog->str_literals[i]; ++i) {
+ printf(".Lstr__%d:\n", i + 1);
+ printf(" .string \"%s\"\n\n", prog->str_literals[i]);
+ }
+
+ printf(".globl main\n\n");
+
+ for (i = 0; i < prog->funcs->node_len; ++i) {
+ AstNode* func = prog->funcs->node_items + i;
+ codegen_func(g, func);
+ }
+}
diff --git a/common.c b/common.c
new file mode 100644
index 0000000..fcc0790
--- /dev/null
+++ b/common.c
@@ -0,0 +1,41 @@
+void fatal_error(const char* msg) {
+ fprintf(stderr, "%s\n", msg);
+ exit(1);
+}
+
+void unreachable() {
+ fatal_error("unreachable");
+}
+
+char* read_all(FILE* in) {
+ char* buf = calloc(1024 * 1024, sizeof(char));
+ char* cur = buf;
+ char* tmp = calloc(1024, sizeof(char));
+ while (fgets(tmp, 1024, in)) {
+ size_t len = strlen(tmp);
+ memcpy(cur, tmp, len);
+ cur += len;
+ }
+ return buf;
+}
+
+struct String {
+ char* data;
+ size_t len;
+};
+typedef struct String String;
+
+char* string_to_cstr(const String* s) {
+ char* buf = calloc(s->len + 1, sizeof(char));
+ memcpy(buf, s->data, s->len);
+ return buf;
+}
+
+int string_equals(const String* s1, const String* s2) {
+ return s1->len == s2->len && strncmp(s1->data, s2->data, s1->len) == 0;
+}
+
+int string_equals_cstr(const String* s1, const char* s2) {
+ size_t s2_len = strlen(s2);
+ return s1->len == s2_len && strncmp(s1->data, s2, s1->len) == 0;
+}
diff --git a/main.c b/main.c
index 463d4a1..e9a4fa0 100644
--- a/main.c
+++ b/main.c
@@ -1,2671 +1,14 @@
-typedef long size_t;
-
-struct FILE;
-typedef struct FILE FILE;
-
-extern FILE* stdin;
-extern FILE* stdout;
-extern FILE* stderr;
-
-int atoi(const char*);
-void* calloc(size_t, size_t);
-void exit(int);
-int fclose(FILE*);
-int fprintf(FILE*, const char*, ...);
-char* fgets(char*, int, FILE*);
-FILE* fopen(const char*, const char*);
-int getchar(void);
-int isalnum(int);
-int isalpha(int);
-int isdigit(int);
-int isspace(int);
-void* memcpy(void*, void*, size_t);
-void* memmove(void*, void*, size_t);
-int printf(const char*, ...);
-int sprintf(char*, const char*, ...);
-int strcmp(const char*, const char*);
-size_t strlen(const char*);
-int strncmp(const char*, const char*, size_t);
-char* strstr(const char*, const char*);
-
-#define NULL 0
-
-void fatal_error(const char* msg) {
- fprintf(stderr, "%s\n", msg);
- exit(1);
-}
-
-void unreachable() {
- fatal_error("unreachable");
-}
-
-char* read_all(FILE* in) {
- char* buf = calloc(1024 * 1024, sizeof(char));
- char* cur = buf;
- char* tmp = calloc(1024, sizeof(char));
- while (fgets(tmp, 1024, in)) {
- size_t len = strlen(tmp);
- memcpy(cur, tmp, len);
- cur += len;
- }
- return buf;
-}
-
-struct String {
- char* data;
- size_t len;
-};
-typedef struct String String;
-
-char* string_to_cstr(const String* s) {
- char* buf = calloc(s->len + 1, sizeof(char));
- memcpy(buf, s->data, s->len);
- return buf;
-}
-
-int string_equals(const String* s1, const String* s2) {
- return s1->len == s2->len && strncmp(s1->data, s2->data, s1->len) == 0;
-}
-
-int string_equals_cstr(const String* s1, const char* s2) {
- size_t s2_len = strlen(s2);
- return s1->len == s2_len && strncmp(s1->data, s2, s1->len) == 0;
-}
-
-enum PpTokenKind {
- PpTokenKind_eof,
-
- PpTokenKind_header_name,
- PpTokenKind_identifier,
- PpTokenKind_pp_number,
- PpTokenKind_character_constant,
- PpTokenKind_string_literal,
- PpTokenKind_punctuator,
- PpTokenKind_other,
- PpTokenKind_whitespace,
-};
-typedef enum PpTokenKind PpTokenKind;
-
-struct PpToken {
- PpTokenKind kind;
- String raw;
-};
-typedef struct PpToken PpToken;
-
-struct PpDefine {
- String name;
- PpToken* tokens;
-};
-typedef struct PpDefine PpDefine;
-
-struct Preprocessor {
- char* src;
- int pos;
- PpToken* pp_tokens;
- int n_pp_tokens;
- PpDefine* pp_defines;
- int n_pp_defines;
- int include_depth;
-};
-typedef struct Preprocessor Preprocessor;
-
-PpToken* do_preprocess(char* src, int depth);
-
-Preprocessor* preprocessor_new(char* src, int include_depth) {
- if (include_depth >= 32) {
- fatal_error("include depth limit exceeded");
- }
-
- Preprocessor* pp = calloc(1, sizeof(Preprocessor));
- pp->src = src;
- pp->pp_tokens = calloc(1024 * 1024, sizeof(PpToken));
- pp->pp_defines = calloc(1024, sizeof(PpDefine));
- pp->include_depth = include_depth;
-
- return pp;
-}
-
-int find_pp_define(Preprocessor* pp, String* name) {
- int i;
- for (i = 0; i < pp->n_pp_defines; ++i) {
- if (string_equals(&pp->pp_defines[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-void pp_tokenize_all(Preprocessor* pp) {
- char* buf;
- int ch;
- int start;
- while (pp->src[pp->pos]) {
- PpToken* tok = pp->pp_tokens + pp->n_pp_tokens;
- char c = pp->src[pp->pos];
- ++pp->pos;
- if (c == '(') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == ')') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '{') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '}') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '[') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == ']') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == ',') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == ';') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '+') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (pp->src[pp->pos] == '+') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '|') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '&') {
- if (pp->src[pp->pos] == '&') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '-') {
- if (pp->src[pp->pos] == '>') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (pp->src[pp->pos] == '=') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (pp->src[pp->pos] == '-') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '*') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '/') {
- if (pp->src[pp->pos] == '/') {
- start = pp->pos - 1;
- ++pp->pos;
- while (pp->src[pp->pos] && pp->src[pp->pos] != '\n' && pp->src[pp->pos] != '\r') {
- ++pp->pos;
- }
- tok->kind = PpTokenKind_whitespace;
- tok->raw.len = pp->pos - start;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (pp->src[pp->pos] == '*') {
- fatal_error("unimplemented");
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '%') {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (c == '.') {
- if (pp->src[pp->pos] == '.') {
- ++pp->pos;
- if (pp->src[pp->pos] == '.') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 3;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- --pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '!') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '=') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '<') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '>') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '#') {
- if (pp->src[pp->pos] == '#') {
- ++pp->pos;
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else {
- tok->kind = PpTokenKind_punctuator;
- tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- }
- } else if (c == '\'') {
- start = pp->pos - 1;
- if (pp->src[pp->pos] == '\\') {
- ++pp->pos;
- }
- pp->pos += 2;
- tok->kind = PpTokenKind_character_constant;
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
- } else if (c == '"') {
- start = pp->pos - 1;
- while (1) {
- ch = pp->src[pp->pos];
- if (ch == '\\') {
- ++pp->pos;
- } else if (ch == '"') {
- break;
- }
- ++pp->pos;
- }
- ++pp->pos;
- tok->kind = PpTokenKind_string_literal;
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
- } else if (isdigit(c)) {
- --pp->pos;
- start = pp->pos;
- while (isdigit(pp->src[pp->pos])) {
- ++pp->pos;
- }
- tok->kind = PpTokenKind_pp_number;
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
- } else if (isalpha(c) || c == '_') {
- --pp->pos;
- start = pp->pos;
- while (isalnum(pp->src[pp->pos]) || pp->src[pp->pos] == '_') {
- ++pp->pos;
- }
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
- tok->kind = PpTokenKind_identifier;
- } else if (isspace(c)) {
- tok->raw.data = pp->src;
- tok->raw.len = 1;
- tok->kind = PpTokenKind_whitespace;
- } else {
- tok->raw.data = pp->src;
- tok->raw.len = 1;
- tok->kind = PpTokenKind_other;
- }
- ++pp->n_pp_tokens;
- }
-}
-
-void process_pp_directives(Preprocessor* pp) {
- PpToken* tok = pp->pp_tokens;
- PpToken* define_dest;
- while (tok->kind != PpTokenKind_eof) {
- // TODO: check if the token is at the beginning of line.
- // TODO: check if skipped whitespaces do not contain line breaks.
- if (tok->kind == PpTokenKind_punctuator && string_equals_cstr(&tok->raw, "#")) {
- PpToken* tok2 = tok + 1;
- while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
- ++tok2;
- if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "define")) {
- ++tok2;
- while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
- ++tok2;
- if (tok2->kind == PpTokenKind_identifier) {
- PpToken* define_name = tok2;
- ++tok2;
- while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
- ++tok2;
- if (tok2->kind == PpTokenKind_identifier || tok2->kind == PpTokenKind_pp_number) {
- define_dest = tok2;
-
- pp->pp_defines[pp->n_pp_defines].name.len = define_name->raw.len;
- pp->pp_defines[pp->n_pp_defines].name.data = define_name->raw.data;
- pp->pp_defines[pp->n_pp_defines].tokens = calloc(1, sizeof(PpToken));
- pp->pp_defines[pp->n_pp_defines].tokens[0].kind = define_dest->kind;
- pp->pp_defines[pp->n_pp_defines].tokens[0].raw.len = define_dest->raw.len;
- pp->pp_defines[pp->n_pp_defines].tokens[0].raw.data = define_dest->raw.data;
- ++pp->n_pp_defines;
- }
- }
- // Remove #define directive.
- while (tok != tok2 + 1) {
- tok->kind = PpTokenKind_whitespace;
- tok->raw.len = 0;
- tok->raw.data = NULL;
- ++tok;
- }
- } else if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "include")) {
- ++tok2;
- while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
- ++tok2;
- if (tok2->kind == PpTokenKind_string_literal) {
- // Process #include directive.
- PpToken* include_name = tok2;
- ++tok2;
- char* include_name_buf = calloc(include_name->raw.len - 2 + 1, sizeof(char));
- sprintf(include_name_buf, "%.*s", include_name->raw.len - 2, include_name->raw.data + 1);
-
- // Remove #include directive itself.
- while (tok != tok2 + 1) {
- tok->kind = PpTokenKind_whitespace;
- tok->raw.len = 0;
- tok->raw.data = NULL;
- ++tok;
- }
-
- // Read and preprocess included file.
- FILE* include_file = fopen(include_name_buf, "rb");
- if (include_file == NULL) {
- char* buf = calloc(1024, sizeof(char));
- sprintf(buf, "cannot open include file: %s", include_name_buf);
- fatal_error(buf);
- }
- char* include_source = read_all(include_file);
- fclose(include_file);
-
- PpToken* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1);
-
- // Insert preprocessed tokens into the current token stream.
- int n_include_pp_tokens = 0;
- while (include_pp_tokens[n_include_pp_tokens].kind != PpTokenKind_eof) {
- ++n_include_pp_tokens;
- }
-
- // Shift existing tokens to make room for include tokens
- int n_pp_tokens_after_include = pp->n_pp_tokens - (tok - pp->pp_tokens);
-
- memmove(tok + n_include_pp_tokens, tok, n_pp_tokens_after_include * sizeof(PpToken));
-
- // Copy include tokens into the current position
- memcpy(tok, include_pp_tokens, n_include_pp_tokens * sizeof(PpToken));
-
- pp->n_pp_tokens += n_include_pp_tokens;
- }
- }
- } else if (tok->kind == PpTokenKind_identifier) {
- int pp_define_idx = find_pp_define(pp, &tok->raw);
- if (pp_define_idx != -1) {
- define_dest = pp->pp_defines[pp_define_idx].tokens;
- tok->kind = define_dest->kind;
- tok->raw.data = define_dest->raw.data;
- tok->raw.len = define_dest->raw.len;
- }
- }
- ++tok;
- }
-}
-
-PpToken* do_preprocess(char* src, int depth) {
- Preprocessor* pp = preprocessor_new(src, depth);
- pp_tokenize_all(pp);
- process_pp_directives(pp);
- return pp->pp_tokens;
-}
-
-PpToken* preprocess(char* src) {
- return do_preprocess(src, 0);
-}
-
-enum TokenKind {
- TokenKind_eof,
-
- TokenKind_and,
- TokenKind_andand,
- TokenKind_arrow,
- TokenKind_assign,
- TokenKind_assign_add,
- TokenKind_assign_sub,
- TokenKind_brace_l,
- TokenKind_brace_r,
- TokenKind_bracket_l,
- TokenKind_bracket_r,
- TokenKind_comma,
- TokenKind_dot,
- TokenKind_ellipsis,
- TokenKind_eq,
- TokenKind_ge,
- TokenKind_gt,
- TokenKind_ident,
- TokenKind_keyword_break,
- TokenKind_keyword_char,
- TokenKind_keyword_const,
- TokenKind_keyword_continue,
- TokenKind_keyword_do,
- TokenKind_keyword_else,
- TokenKind_keyword_enum,
- TokenKind_keyword_extern,
- TokenKind_keyword_for,
- TokenKind_keyword_if,
- TokenKind_keyword_int,
- TokenKind_keyword_long,
- TokenKind_keyword_return,
- TokenKind_keyword_sizeof,
- TokenKind_keyword_struct,
- TokenKind_keyword_typeof,
- TokenKind_keyword_void,
- TokenKind_keyword_while,
- TokenKind_le,
- TokenKind_lt,
- TokenKind_literal_int,
- TokenKind_literal_str,
- TokenKind_minus,
- TokenKind_minusminus,
- TokenKind_ne,
- TokenKind_not,
- TokenKind_oror,
- TokenKind_paren_l,
- TokenKind_paren_r,
- TokenKind_percent,
- TokenKind_plus,
- TokenKind_plusplus,
- TokenKind_semicolon,
- TokenKind_slash,
- TokenKind_star,
-};
-typedef enum TokenKind TokenKind;
-
-struct Token {
- TokenKind kind;
- String raw;
-};
-typedef struct Token Token;
-
-const char* token_kind_stringify(TokenKind k) {
- if (k == TokenKind_eof)
- return "<eof>";
- else if (k == TokenKind_and)
- return "&";
- else if (k == TokenKind_andand)
- return "&&";
- else if (k == TokenKind_arrow)
- return "->";
- else if (k == TokenKind_assign)
- return "=";
- else if (k == TokenKind_assign_add)
- return "+=";
- else if (k == TokenKind_assign_sub)
- return "-=";
- else if (k == TokenKind_brace_l)
- return "{";
- else if (k == TokenKind_brace_r)
- return "}";
- else if (k == TokenKind_bracket_l)
- return "[";
- else if (k == TokenKind_bracket_r)
- return "]";
- else if (k == TokenKind_comma)
- return ",";
- else if (k == TokenKind_dot)
- return ".";
- else if (k == TokenKind_ellipsis)
- return "...";
- else if (k == TokenKind_eq)
- return "==";
- else if (k == TokenKind_ge)
- return ">=";
- else if (k == TokenKind_gt)
- return ">";
- else if (k == TokenKind_ident)
- return "<identifier>";
- else if (k == TokenKind_keyword_break)
- return "break";
- else if (k == TokenKind_keyword_char)
- return "char";
- else if (k == TokenKind_keyword_const)
- return "const";
- else if (k == TokenKind_keyword_continue)
- return "continue";
- else if (k == TokenKind_keyword_do)
- return "do";
- else if (k == TokenKind_keyword_else)
- return "else";
- else if (k == TokenKind_keyword_enum)
- return "enum";
- else if (k == TokenKind_keyword_extern)
- return "extern";
- else if (k == TokenKind_keyword_for)
- return "for";
- else if (k == TokenKind_keyword_if)
- return "if";
- else if (k == TokenKind_keyword_int)
- return "int";
- else if (k == TokenKind_keyword_long)
- return "long";
- else if (k == TokenKind_keyword_return)
- return "return";
- else if (k == TokenKind_keyword_sizeof)
- return "sizeof";
- else if (k == TokenKind_keyword_struct)
- return "struct";
- else if (k == TokenKind_keyword_typeof)
- return "typeof";
- else if (k == TokenKind_keyword_void)
- return "void";
- else if (k == TokenKind_keyword_while)
- return "while";
- else if (k == TokenKind_le)
- return "le";
- else if (k == TokenKind_lt)
- return "lt";
- else if (k == TokenKind_literal_int)
- return "<integer>";
- else if (k == TokenKind_literal_str)
- return "<string>";
- else if (k == TokenKind_minus)
- return "-";
- else if (k == TokenKind_minusminus)
- return "--";
- else if (k == TokenKind_ne)
- return "!=";
- else if (k == TokenKind_not)
- return "!";
- else if (k == TokenKind_oror)
- return "||";
- else if (k == TokenKind_paren_l)
- return "(";
- else if (k == TokenKind_paren_r)
- return ")";
- else if (k == TokenKind_percent)
- return "%";
- else if (k == TokenKind_plus)
- return "+";
- else if (k == TokenKind_plusplus)
- return "++";
- else if (k == TokenKind_semicolon)
- return ";";
- else if (k == TokenKind_slash)
- return "/";
- else if (k == TokenKind_star)
- return "*";
- else
- unreachable();
-}
-
-const char* token_stringify(Token* t) {
- TokenKind k = t->kind;
- if (k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) {
- char* buf = calloc(t->raw.len + 1, sizeof(char));
- sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, token_kind_stringify(k));
- return buf;
- } else {
- return token_kind_stringify(k);
- }
-}
-
-struct Lexer {
- PpToken* src;
- int pos;
- Token* tokens;
- int n_tokens;
-};
-typedef struct Lexer Lexer;
-
-Lexer* lexer_new(PpToken* pp_tokens) {
- Lexer* l = calloc(1, sizeof(Lexer));
- l->src = pp_tokens;
- l->tokens = calloc(1024 * 1024, sizeof(Token));
- return l;
-}
-
-void tokenize_all(Lexer* l) {
- char* buf;
- int ch;
- int start;
- while (l->src[l->pos].kind != PpTokenKind_eof) {
- PpToken* pp_tok = l->src + l->pos;
- Token* tok = l->tokens + l->n_tokens;
- PpTokenKind k = pp_tok->kind;
- ++l->pos;
- if (k == PpTokenKind_header_name) {
- fatal_error("not implemented yet");
- } else if (k == PpTokenKind_identifier) {
- if (string_equals_cstr(&pp_tok->raw, "break")) {
- tok->kind = TokenKind_keyword_break;
- } else if (string_equals_cstr(&pp_tok->raw, "char")) {
- tok->kind = TokenKind_keyword_char;
- } else if (string_equals_cstr(&pp_tok->raw, "const")) {
- tok->kind = TokenKind_keyword_const;
- } else if (string_equals_cstr(&pp_tok->raw, "continue")) {
- tok->kind = TokenKind_keyword_continue;
- } else if (string_equals_cstr(&pp_tok->raw, "do")) {
- tok->kind = TokenKind_keyword_do;
- } else if (string_equals_cstr(&pp_tok->raw, "else")) {
- tok->kind = TokenKind_keyword_else;
- } else if (string_equals_cstr(&pp_tok->raw, "enum")) {
- tok->kind = TokenKind_keyword_enum;
- } else if (string_equals_cstr(&pp_tok->raw, "extern")) {
- tok->kind = TokenKind_keyword_extern;
- } else if (string_equals_cstr(&pp_tok->raw, "for")) {
- tok->kind = TokenKind_keyword_for;
- } else if (string_equals_cstr(&pp_tok->raw, "if")) {
- tok->kind = TokenKind_keyword_if;
- } else if (string_equals_cstr(&pp_tok->raw, "int")) {
- tok->kind = TokenKind_keyword_int;
- } else if (string_equals_cstr(&pp_tok->raw, "long")) {
- tok->kind = TokenKind_keyword_long;
- } else if (string_equals_cstr(&pp_tok->raw, "return")) {
- tok->kind = TokenKind_keyword_return;
- } else if (string_equals_cstr(&pp_tok->raw, "sizeof")) {
- tok->kind = TokenKind_keyword_sizeof;
- } else if (string_equals_cstr(&pp_tok->raw, "struct")) {
- tok->kind = TokenKind_keyword_struct;
- } else if (string_equals_cstr(&pp_tok->raw, "typedef")) {
- tok->kind = TokenKind_keyword_typeof;
- } else if (string_equals_cstr(&pp_tok->raw, "void")) {
- tok->kind = TokenKind_keyword_void;
- } else if (string_equals_cstr(&pp_tok->raw, "while")) {
- tok->kind = TokenKind_keyword_while;
- } else {
- tok->kind = TokenKind_ident;
- }
- tok->raw.data = pp_tok->raw.data;
- tok->raw.len = pp_tok->raw.len;
- } else if (k == PpTokenKind_pp_number) {
- tok->kind = TokenKind_literal_int;
- tok->raw.data = pp_tok->raw.data;
- tok->raw.len = pp_tok->raw.len;
- } else if (k == PpTokenKind_character_constant) {
- tok->kind = TokenKind_literal_int;
- ch = pp_tok->raw.data[1];
- if (ch == '\\') {
- ch = pp_tok->raw.data[2];
- if (ch == 'a') {
- ch = '\a';
- } else if (ch == 'b') {
- ch = '\b';
- } else if (ch == 'f') {
- ch = '\f';
- } else if (ch == 'n') {
- ch = '\n';
- } else if (ch == 'r') {
- ch = '\r';
- } else if (ch == 't') {
- ch = '\t';
- } else if (ch == 'v') {
- ch = '\v';
- }
- }
- buf = calloc(4, sizeof(char));
- sprintf(buf, "%d", ch);
- tok->raw.data = buf;
- tok->raw.len = strlen(buf);
- } else if (k == PpTokenKind_string_literal) {
- tok->kind = TokenKind_literal_str;
- tok->raw.data = pp_tok->raw.data + 1;
- tok->raw.len = pp_tok->raw.len - 2;
- } else if (k == PpTokenKind_punctuator || k == PpTokenKind_other) {
- if (string_equals_cstr(&pp_tok->raw, "(")) {
- tok->kind = TokenKind_paren_l;
- } else if (string_equals_cstr(&pp_tok->raw, ")")) {
- tok->kind = TokenKind_paren_r;
- } else if (string_equals_cstr(&pp_tok->raw, "{")) {
- tok->kind = TokenKind_brace_l;
- } else if (string_equals_cstr(&pp_tok->raw, "}")) {
- tok->kind = TokenKind_brace_r;
- } else if (string_equals_cstr(&pp_tok->raw, "[")) {
- tok->kind = TokenKind_bracket_l;
- } else if (string_equals_cstr(&pp_tok->raw, "]")) {
- tok->kind = TokenKind_bracket_r;
- } else if (string_equals_cstr(&pp_tok->raw, ",")) {
- tok->kind = TokenKind_comma;
- } else if (string_equals_cstr(&pp_tok->raw, ";")) {
- tok->kind = TokenKind_semicolon;
- } else if (string_equals_cstr(&pp_tok->raw, "+=")) {
- tok->kind = TokenKind_assign_add;
- } else if (string_equals_cstr(&pp_tok->raw, "++")) {
- tok->kind = TokenKind_plusplus;
- } else if (string_equals_cstr(&pp_tok->raw, "+")) {
- tok->kind = TokenKind_plus;
- } else if (string_equals_cstr(&pp_tok->raw, "||")) {
- tok->kind = TokenKind_oror;
- } else if (string_equals_cstr(&pp_tok->raw, "&&")) {
- tok->kind = TokenKind_andand;
- } else if (string_equals_cstr(&pp_tok->raw, "&")) {
- tok->kind = TokenKind_and;
- } else if (string_equals_cstr(&pp_tok->raw, "->")) {
- tok->kind = TokenKind_arrow;
- } else if (string_equals_cstr(&pp_tok->raw, "-=")) {
- tok->kind = TokenKind_assign_sub;
- } else if (string_equals_cstr(&pp_tok->raw, "--")) {
- tok->kind = TokenKind_minusminus;
- } else if (string_equals_cstr(&pp_tok->raw, "-")) {
- tok->kind = TokenKind_minus;
- } else if (string_equals_cstr(&pp_tok->raw, "*")) {
- tok->kind = TokenKind_star;
- } else if (string_equals_cstr(&pp_tok->raw, "/")) {
- tok->kind = TokenKind_slash;
- } else if (string_equals_cstr(&pp_tok->raw, "%")) {
- tok->kind = TokenKind_percent;
- } else if (string_equals_cstr(&pp_tok->raw, "...")) {
- tok->kind = TokenKind_ellipsis;
- } else if (string_equals_cstr(&pp_tok->raw, ".")) {
- tok->kind = TokenKind_dot;
- } else if (string_equals_cstr(&pp_tok->raw, "!=")) {
- tok->kind = TokenKind_ne;
- } else if (string_equals_cstr(&pp_tok->raw, "!")) {
- tok->kind = TokenKind_not;
- } else if (string_equals_cstr(&pp_tok->raw, "==")) {
- tok->kind = TokenKind_eq;
- } else if (string_equals_cstr(&pp_tok->raw, "=")) {
- tok->kind = TokenKind_assign;
- } else if (string_equals_cstr(&pp_tok->raw, "<=")) {
- tok->kind = TokenKind_le;
- } else if (string_equals_cstr(&pp_tok->raw, "<")) {
- tok->kind = TokenKind_lt;
- } else if (string_equals_cstr(&pp_tok->raw, ">=")) {
- tok->kind = TokenKind_ge;
- } else if (string_equals_cstr(&pp_tok->raw, ">")) {
- tok->kind = TokenKind_gt;
- } else {
- sprintf(buf, "unknown token: %.*s", pp_tok->raw.len, pp_tok->raw.data);
- fatal_error(buf);
- }
- tok->raw.data = pp_tok->raw.data;
- tok->raw.len = pp_tok->raw.len;
- } else if (k == PpTokenKind_whitespace) {
- continue;
- }
- ++l->n_tokens;
- }
-}
-
-Token* tokenize(PpToken* pp_tokens) {
- Lexer* l = lexer_new(pp_tokens);
- tokenize_all(l);
- return l->tokens;
-}
-
-enum TypeKind {
- TypeKind_unknown,
-
- TypeKind_char,
- TypeKind_int,
- TypeKind_long,
- TypeKind_void,
- TypeKind_ptr,
- TypeKind_enum,
- TypeKind_struct,
-};
-typedef enum TypeKind TypeKind;
-
-struct AstNode;
-
-struct Type {
- TypeKind kind;
- struct Type* to;
- struct AstNode* def;
-};
-typedef struct Type Type;
-
-Type* type_new(TypeKind kind) {
- Type* ty = calloc(1, sizeof(Type));
- ty->kind = kind;
- return ty;
-}
-
-Type* type_new_ptr(Type* to) {
- Type* ty = calloc(1, sizeof(Type));
- ty->kind = TypeKind_ptr;
- ty->to = to;
- return ty;
-}
-
-int type_is_unsized(Type* ty) {
- return ty->kind != TypeKind_void;
-}
-
-int type_sizeof_struct(Type* ty);
-int type_alignof_struct(Type* ty);
-int type_offsetof(Type* ty, const String* name);
-Type* type_member_typeof(Type* ty, const String* name);
-
-int type_sizeof(Type* ty) {
- if (!type_is_unsized(ty)) {
- fatal_error("type_sizeof: type size cannot be determined");
- }
-
- if (ty->kind == TypeKind_ptr) {
- return 8;
- } else if (ty->kind == TypeKind_char) {
- return 1;
- } else if (ty->kind == TypeKind_int) {
- return 4;
- } else if (ty->kind == TypeKind_long) {
- return 8;
- } else if (ty->kind == TypeKind_enum) {
- return 4;
- } else {
- return type_sizeof_struct(ty);
- }
-}
-
-int type_alignof(Type* ty) {
- if (!type_is_unsized(ty)) {
- fatal_error("type_alignof: type size cannot be determined");
- }
-
- if (ty->kind == TypeKind_ptr) {
- return 8;
- } else if (ty->kind == TypeKind_char) {
- return 1;
- } else if (ty->kind == TypeKind_int) {
- return 4;
- } else if (ty->kind == TypeKind_long) {
- return 8;
- } else if (ty->kind == TypeKind_enum) {
- return 4;
- } else {
- return type_alignof_struct(ty);
- }
-}
-
-enum AstNodeKind {
- AstNodeKind_unknown,
-
- AstNodeKind_assign_expr,
- AstNodeKind_binary_expr,
- AstNodeKind_break_stmt,
- AstNodeKind_continue_stmt,
- AstNodeKind_deref_expr,
- AstNodeKind_do_while_stmt,
- AstNodeKind_enum_def,
- AstNodeKind_enum_member,
- AstNodeKind_expr_stmt,
- AstNodeKind_for_stmt,
- AstNodeKind_func_call,
- AstNodeKind_func_decl,
- AstNodeKind_func_def,
- AstNodeKind_gvar,
- AstNodeKind_gvar_decl,
- AstNodeKind_if_stmt,
- AstNodeKind_int_expr,
- AstNodeKind_list,
- AstNodeKind_logical_expr,
- AstNodeKind_lvar,
- AstNodeKind_lvar_decl,
- AstNodeKind_param,
- AstNodeKind_ref_expr,
- AstNodeKind_return_stmt,
- AstNodeKind_str_expr,
- AstNodeKind_struct_decl,
- AstNodeKind_struct_def,
- AstNodeKind_struct_member,
- AstNodeKind_type,
- AstNodeKind_typedef_decl,
- AstNodeKind_unary_expr,
-};
-typedef enum AstNodeKind AstNodeKind;
-
-#define node_items __n1
-#define node_len __i
-#define node_expr __n1
-#define node_lhs __n1
-#define node_rhs __n2
-#define node_operand __n1
-#define node_cond __n1
-#define node_init __n2
-#define node_update __n3
-#define node_then __n2
-#define node_else __n3
-#define node_body __n4
-#define node_members __n1
-#define node_params __n1
-#define node_args __n1
-#define node_int_value __i
-#define node_idx __i
-#define node_op __i
-
-struct AstNode {
- AstNodeKind kind;
- String name;
- Type* ty;
- struct AstNode* __n1;
- struct AstNode* __n2;
- struct AstNode* __n3;
- struct AstNode* __n4;
- int __i;
-};
-typedef struct AstNode AstNode;
-
-struct Program {
- AstNode* funcs;
- char** str_literals;
-};
-typedef struct Program Program;
-
-AstNode* ast_new(AstNodeKind kind) {
- AstNode* ast = calloc(1, sizeof(AstNode));
- ast->kind = kind;
- return ast;
-}
-
-AstNode* ast_new_list(int capacity) {
- AstNode* list = ast_new(AstNodeKind_list);
- list->node_items = calloc(capacity, sizeof(AstNode));
- list->node_len = 0;
- return list;
-}
-
-void ast_append(AstNode* list, AstNode* item) {
- if (list->kind != AstNodeKind_list) {
- fatal_error("ast_append: ast is not a list");
- }
- if (!item) {
- return;
- }
- memcpy(list->node_items + list->node_len, item, sizeof(AstNode));
- ++list->node_len;
-}
-
-AstNode* ast_new_int(int v) {
- AstNode* e = ast_new(AstNodeKind_int_expr);
- e->node_int_value = v;
- e->ty = type_new(TypeKind_int);
- return e;
-}
-
-AstNode* ast_new_unary_expr(int op, AstNode* operand) {
- AstNode* e = ast_new(AstNodeKind_unary_expr);
- e->node_op = op;
- e->node_operand = operand;
- e->ty = type_new(TypeKind_int);
- return e;
-}
-
-AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) {
- AstNode* e = ast_new(AstNodeKind_binary_expr);
- e->node_op = op;
- e->node_lhs = lhs;
- e->node_rhs = rhs;
- if (op == TokenKind_plus) {
- if (lhs->ty->kind == TypeKind_ptr) {
- e->ty = lhs->ty;
- } else if (rhs->ty->kind == TypeKind_ptr) {
- e->ty = rhs->ty;
- } else {
- e->ty = type_new(TypeKind_int);
- }
- } else if (op == TokenKind_minus) {
- if (lhs->ty->kind == TypeKind_ptr) {
- e->ty = lhs->ty;
- } else {
- e->ty = type_new(TypeKind_int);
- }
- } else {
- e->ty = type_new(TypeKind_int);
- }
- return e;
-}
-
-AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) {
- AstNode* e = ast_new(AstNodeKind_assign_expr);
- e->node_op = op;
- e->node_lhs = lhs;
- e->node_rhs = rhs;
- e->ty = lhs->ty;
- return e;
-}
-
-AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) {
- if (lhs->ty->kind == TypeKind_ptr) {
- rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to)));
- } else if (rhs->ty->kind == TypeKind_ptr) {
- lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->to)));
- }
- return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs);
-}
-
-AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) {
- if (lhs->ty->kind == TypeKind_ptr) {
- rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to)));
- }
- return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs);
-}
-
-AstNode* ast_new_ref_expr(AstNode* operand) {
- AstNode* e = ast_new(AstNodeKind_ref_expr);
- e->node_operand = operand;
- e->ty = type_new_ptr(operand->ty);
- return e;
-}
-
-AstNode* ast_new_deref_expr(AstNode* operand) {
- AstNode* e = ast_new(AstNodeKind_deref_expr);
- e->node_operand = operand;
- e->ty = operand->ty->to;
- return e;
-}
-
-AstNode* ast_new_member_access_expr(AstNode* obj, const String* name) {
- AstNode* e = ast_new(AstNodeKind_deref_expr);
- e->node_operand = ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->to, name)));
- e->ty = type_member_typeof(obj->ty->to, name);
- e->node_operand->ty = type_new_ptr(e->ty);
- return e;
-}
-
-int type_sizeof_struct(Type* ty) {
- int next_offset = 0;
- int struct_align = 0;
- int padding;
-
- int i;
- for (i = 0; i < ty->def->node_members->node_len; ++i) {
- AstNode* member = ty->def->node_members->node_items + i;
- int size = type_sizeof(member->ty);
- int align = type_alignof(member->ty);
-
- if (next_offset % align != 0) {
- padding = align - next_offset % align;
- next_offset += padding;
- }
- next_offset += size;
- if (struct_align < align) {
- struct_align = align;
- }
- }
- if (next_offset % struct_align != 0) {
- padding = struct_align - next_offset % struct_align;
- next_offset += padding;
- }
- return next_offset;
-}
-
-int type_alignof_struct(Type* ty) {
- int struct_align = 0;
-
- int i;
- for (i = 0; i < ty->def->node_members->node_len; ++i) {
- AstNode* member = ty->def->node_members->node_items + i;
- int align = type_alignof(member->ty);
-
- if (struct_align < align) {
- struct_align = align;
- }
- }
- return struct_align;
-}
-
-int type_offsetof(Type* ty, const String* name) {
- if (ty->kind != TypeKind_struct) {
- fatal_error("type_offsetof: type is not a struct");
- }
-
- int next_offset = 0;
-
- int i;
- for (i = 0; i < ty->def->node_members->node_len; ++i) {
- AstNode* member = ty->def->node_members->node_items + i;
- int size = type_sizeof(member->ty);
- int align = type_alignof(member->ty);
-
- if (next_offset % align != 0) {
- int padding = align - next_offset % align;
- next_offset += padding;
- }
- if (string_equals(&member->name, name)) {
- return next_offset;
- }
- next_offset += size;
- }
-
- fatal_error("type_offsetof: member not found");
-}
-
-Type* type_member_typeof(Type* ty, const String* name) {
- if (ty->kind != TypeKind_struct) {
- fatal_error("type_offsetof: type is not a struct");
- }
-
- int i;
- for (i = 0; i < ty->def->node_members->node_len; ++i) {
- AstNode* member = ty->def->node_members->node_items + i;
- if (string_equals(&member->name, name)) {
- return member->ty;
- }
- }
-
- fatal_error("type_offsetof: member not found");
-}
-
-#define LVAR_MAX 32
-
-struct LocalVar {
- String name;
- Type* ty;
-};
-typedef struct LocalVar LocalVar;
-
-struct GlobalVar {
- String name;
- Type* ty;
-};
-typedef struct GlobalVar GlobalVar;
-
-struct Func {
- String name;
- Type* ty;
-};
-typedef struct Func Func;
-
-struct Parser {
- Token* tokens;
- int pos;
- LocalVar* lvars;
- int n_lvars;
- GlobalVar* gvars;
- int n_gvars;
- Func* funcs;
- int n_funcs;
- AstNode* structs;
- int n_structs;
- AstNode* enums;
- int n_enums;
- AstNode* typedefs;
- int n_typedefs;
- char** str_literals;
- int n_str_literals;
-};
-typedef struct Parser Parser;
-
-Parser* parser_new(Token* tokens) {
- Parser* p = calloc(1, sizeof(Parser));
- p->tokens = tokens;
- p->gvars = calloc(128, sizeof(GlobalVar));
- p->funcs = calloc(256, sizeof(Func));
- p->structs = calloc(64, sizeof(AstNode));
- p->enums = calloc(16, sizeof(AstNode));
- p->typedefs = calloc(64, sizeof(AstNode));
- p->str_literals = calloc(1024, sizeof(char*));
- return p;
-}
-
-Token* peek_token(Parser* p) {
- return p->tokens + p->pos;
-}
-
-Token* next_token(Parser* p) {
- ++p->pos;
- return p->tokens + p->pos - 1;
-}
-
-int eof(Parser* p) {
- return peek_token(p)->kind != TokenKind_eof;
-}
-
-Token* expect(Parser* p, int expected) {
- Token* t = next_token(p);
- if (t->kind == expected) {
- return t;
- }
-
- char* buf = calloc(1024, sizeof(char));
- sprintf(buf, "expected '%s', but got '%s'", token_kind_stringify(expected), token_stringify(t));
- fatal_error(buf);
-}
-
-int find_lvar(Parser* p, const String* name) {
- int i;
- for (i = 0; i < p->n_lvars; ++i) {
- if (string_equals(&p->lvars[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-int find_gvar(Parser* p, const String* name) {
- int i;
- for (i = 0; i < p->n_gvars; ++i) {
- if (string_equals(&p->gvars[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-int find_func(Parser* p, const String* name) {
- int i;
- for (i = 0; i < p->n_funcs; ++i) {
- if (string_equals(&p->funcs[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-int find_struct(Parser* p, const String* name) {
- int i;
- for (i = 0; i < p->n_structs; ++i) {
- if (string_equals(&p->structs[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-int find_enum(Parser* p, const String* name) {
- int i;
- for (i = 0; i < p->n_enums; ++i) {
- if (string_equals(&p->enums[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-int find_enum_member(Parser* p, const String* name) {
- int i;
- int j;
- for (i = 0; i < p->n_enums; ++i) {
- for (j = 0; j < p->enums[i].node_members->node_len; ++j) {
- if (string_equals(&p->enums[i].node_members->node_items[j].name, name)) {
- return i * 1000 + j;
- }
- }
- }
- return -1;
-}
-
-int find_typedef(Parser* p, const String* name) {
- int i;
- for (i = 0; i < p->n_typedefs; ++i) {
- if (string_equals(&p->typedefs[i].name, name)) {
- return i;
- }
- }
- return -1;
-}
-
-AstNode* parse_expr(Parser* p);
-AstNode* parse_stmt(Parser* p);
-
-String* parse_ident(Parser* p) {
- return &expect(p, TokenKind_ident)->raw;
-}
-
-int register_str_literal(Parser* p, char* s) {
- p->str_literals[p->n_str_literals] = s;
- ++p->n_str_literals;
- return p->n_str_literals;
-}
-
-AstNode* parse_primary_expr(Parser* p) {
- Token* t = next_token(p);
- AstNode* e;
- char* buf;
- if (t->kind == TokenKind_literal_int) {
- return ast_new_int(atoi(string_to_cstr(&t->raw)));
- } else if (t->kind == TokenKind_literal_str) {
- e = ast_new(AstNodeKind_str_expr);
- e->node_idx = register_str_literal(p, string_to_cstr(&t->raw));
- return e;
- } else if (t->kind == TokenKind_paren_l) {
- e = parse_expr(p);
- expect(p, TokenKind_paren_r);
- return e;
- } else if (t->kind == TokenKind_ident) {
- String* name = &t->raw;
-
- if (peek_token(p)->kind == TokenKind_paren_l) {
- e = ast_new(AstNodeKind_func_call);
- int func_idx = find_func(p, name);
- if (func_idx == -1) {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "undefined function: %.*s", name->len, name->data);
- fatal_error(buf);
- }
- e->name.data = name->data;
- e->name.len = name->len;
- e->ty = p->funcs[func_idx].ty;
- return e;
- }
-
- int lvar_idx = find_lvar(p, name);
- if (lvar_idx == -1) {
- int gvar_idx = find_gvar(p, name);
- if (gvar_idx == -1) {
- int enum_member_idx = find_enum_member(p, name);
- if (enum_member_idx == -1) {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "undefined variable: %.*s", name->len, name->data);
- fatal_error(buf);
- }
- int enum_idx = enum_member_idx / 1000;
- int n = enum_member_idx % 1000;
- e = ast_new_int(p->enums[enum_idx].node_members->node_items[n].node_int_value);
- e->ty = type_new(TypeKind_enum);
- e->ty->def = p->enums + enum_idx;
- return e;
- }
- e = ast_new(AstNodeKind_gvar);
- e->name.data = name->data;
- e->name.len = name->len;
- e->ty = p->gvars[gvar_idx].ty;
- return e;
- }
-
- e = ast_new(AstNodeKind_lvar);
- e->name.data = name->data;
- e->name.len = name->len;
- e->node_idx = lvar_idx;
- e->ty = p->lvars[lvar_idx].ty;
- return e;
- } else {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "expected primary expression, but got '%s'", token_stringify(t));
- fatal_error(buf);
- }
-}
-
-AstNode* parse_arg_list(Parser* p) {
- AstNode* list = ast_new_list(6);
- while (peek_token(p)->kind != TokenKind_paren_r) {
- AstNode* arg = parse_expr(p);
- ast_append(list, arg);
- if (peek_token(p)->kind == TokenKind_comma) {
- next_token(p);
- } else {
- break;
- }
- }
- if (list->node_len > 6) {
- fatal_error("too many arguments");
- }
- return list;
-}
-
-AstNode* parse_postfix_expr(Parser* p) {
- AstNode* ret = parse_primary_expr(p);
- String* name;
- while (1) {
- TokenKind tk = peek_token(p)->kind;
- if (tk == TokenKind_paren_l) {
- next_token(p);
- AstNode* args = parse_arg_list(p);
- expect(p, TokenKind_paren_r);
- ret->node_args = args;
- } else if (tk == TokenKind_bracket_l) {
- next_token(p);
- AstNode* idx = parse_expr(p);
- expect(p, TokenKind_bracket_r);
- idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->to)));
- ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx));
- } else if (tk == TokenKind_dot) {
- next_token(p);
- name = parse_ident(p);
- ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name);
- } else if (tk == TokenKind_arrow) {
- next_token(p);
- name = parse_ident(p);
- ret = ast_new_member_access_expr(ret, name);
- } else {
- break;
- }
- }
- return ret;
-}
-
-int is_type_token(Parser* p, Token* token) {
- if (token->kind == TokenKind_keyword_int || token->kind == TokenKind_keyword_long ||
- token->kind == TokenKind_keyword_char || token->kind == TokenKind_keyword_void ||
- token->kind == TokenKind_keyword_enum || token->kind == TokenKind_keyword_struct ||
- token->kind == TokenKind_keyword_const) {
- return 1;
- }
- if (token->kind != TokenKind_ident) {
- return 0;
- }
- return find_typedef(p, &token->raw) != -1;
-}
-
-Type* parse_type(Parser* p) {
- Token* t = next_token(p);
- char* buf;
- String* name;
- if (t->kind == TokenKind_keyword_const) {
- t = next_token(p);
- }
- if (!is_type_token(p, t)) {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_type: expected type, but got '%s'", token_stringify(t));
- fatal_error(buf);
- }
- Type* ty;
- if (t->kind == TokenKind_ident) {
- int typedef_idx = find_typedef(p, &t->raw);
- if (typedef_idx == -1) {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_type: unknown typedef, %.*s", t->raw.len, t->raw.data);
- fatal_error(buf);
- }
- ty = p->typedefs[typedef_idx].ty;
- } else {
- ty = type_new(TypeKind_unknown);
- if (t->kind == TokenKind_keyword_int) {
- ty->kind = TypeKind_int;
- } else if (t->kind == TokenKind_keyword_long) {
- ty->kind = TypeKind_long;
- } else if (t->kind == TokenKind_keyword_char) {
- ty->kind = TypeKind_char;
- } else if (t->kind == TokenKind_keyword_void) {
- ty->kind = TypeKind_void;
- } else if (t->kind == TokenKind_keyword_enum) {
- ty->kind = TypeKind_enum;
- name = parse_ident(p);
- int enum_idx = find_enum(p, name);
- if (enum_idx == -1) {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_type: unknown enum, %.*s", name->len, name->data);
- fatal_error(buf);
- }
- ty->def = p->enums + enum_idx;
- } else if (t->kind == TokenKind_keyword_struct) {
- ty->kind = TypeKind_struct;
- name = parse_ident(p);
- int struct_idx = find_struct(p, name);
- if (struct_idx == -1) {
- buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_type: unknown struct, %.*s", name->len, name->data);
- fatal_error(buf);
- }
- ty->def = p->structs + struct_idx;
- } else {
- unreachable();
- }
- }
- while (1) {
- if (peek_token(p)->kind == TokenKind_star) {
- next_token(p);
- ty = type_new_ptr(ty);
- } else {
- break;
- }
- }
- return ty;
-}
-
-AstNode* parse_prefix_expr(Parser* p) {
- AstNode* operand;
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_minus) {
- next_token(p);
- operand = parse_prefix_expr(p);
- return ast_new_binary_expr(op, ast_new_int(0), operand);
- } else if (op == TokenKind_not) {
- next_token(p);
- operand = parse_prefix_expr(p);
- return ast_new_unary_expr(op, operand);
- } else if (op == TokenKind_and) {
- next_token(p);
- operand = parse_prefix_expr(p);
- return ast_new_ref_expr(operand);
- } else if (op == TokenKind_star) {
- next_token(p);
- operand = parse_prefix_expr(p);
- return ast_new_deref_expr(operand);
- } else if (op == TokenKind_plusplus) {
- next_token(p);
- operand = parse_prefix_expr(p);
- return ast_new_assign_add_expr(operand, ast_new_int(1));
- } else if (op == TokenKind_minusminus) {
- next_token(p);
- operand = parse_prefix_expr(p);
- return ast_new_assign_sub_expr(operand, ast_new_int(1));
- } else if (op == TokenKind_keyword_sizeof) {
- next_token(p);
- expect(p, TokenKind_paren_l);
- Type* ty = parse_type(p);
- expect(p, TokenKind_paren_r);
- return ast_new_int(type_sizeof(ty));
- }
- return parse_postfix_expr(p);
-}
-
-AstNode* parse_multiplicative_expr(Parser* p) {
- AstNode* lhs = parse_prefix_expr(p);
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) {
- next_token(p);
- AstNode* rhs = parse_prefix_expr(p);
- lhs = ast_new_binary_expr(op, lhs, rhs);
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_additive_expr(Parser* p) {
- AstNode* lhs = parse_multiplicative_expr(p);
- AstNode* rhs;
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_plus) {
- next_token(p);
- rhs = parse_multiplicative_expr(p);
- if (lhs->ty->kind == TypeKind_ptr) {
- lhs = ast_new_binary_expr(
- op, lhs, ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to))));
- } else if (rhs->ty->kind == TypeKind_ptr) {
- lhs = ast_new_binary_expr(
- op, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->to))), rhs);
- } else {
- lhs = ast_new_binary_expr(op, lhs, rhs);
- }
- } else if (op == TokenKind_minus) {
- next_token(p);
- rhs = parse_multiplicative_expr(p);
- if (lhs->ty->kind == TypeKind_ptr) {
- if (rhs->ty->kind == TypeKind_ptr) {
- // (a - b) / sizeof(a)
- lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(op, lhs, rhs),
- ast_new_int(type_sizeof(lhs->ty->to)));
- } else {
- // a - b*sizeof(a)
- lhs = ast_new_binary_expr(
- op, lhs, ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to))));
- }
- } else {
- lhs = ast_new_binary_expr(op, lhs, rhs);
- }
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_relational_expr(Parser* p) {
- AstNode* lhs = parse_additive_expr(p);
- AstNode* rhs;
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_lt || op == TokenKind_le) {
- next_token(p);
- rhs = parse_additive_expr(p);
- lhs = ast_new_binary_expr(op, lhs, rhs);
- } else if (op == TokenKind_gt) {
- next_token(p);
- rhs = parse_additive_expr(p);
- lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs);
- } else if (op == TokenKind_ge) {
- next_token(p);
- rhs = parse_additive_expr(p);
- lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs);
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_equality_expr(Parser* p) {
- AstNode* lhs = parse_relational_expr(p);
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_eq || op == TokenKind_ne) {
- next_token(p);
- AstNode* rhs = parse_relational_expr(p);
- lhs = ast_new_binary_expr(op, lhs, rhs);
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_logical_and_expr(Parser* p) {
- AstNode* lhs = parse_equality_expr(p);
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_andand) {
- next_token(p);
- AstNode* rhs = parse_equality_expr(p);
- AstNode* e = ast_new(AstNodeKind_logical_expr);
- e->node_op = op;
- e->node_lhs = lhs;
- e->node_rhs = rhs;
- e->ty = type_new(TypeKind_int);
- lhs = e;
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_logical_or_expr(Parser* p) {
- AstNode* lhs = parse_logical_and_expr(p);
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_oror) {
- next_token(p);
- AstNode* rhs = parse_logical_and_expr(p);
- AstNode* e = ast_new(AstNodeKind_logical_expr);
- e->node_op = op;
- e->node_lhs = lhs;
- e->node_rhs = rhs;
- e->ty = type_new(TypeKind_int);
- lhs = e;
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_assignment_expr(Parser* p) {
- AstNode* lhs = parse_logical_or_expr(p);
- AstNode* rhs;
- while (1) {
- TokenKind op = peek_token(p)->kind;
- if (op == TokenKind_assign) {
- next_token(p);
- rhs = parse_logical_or_expr(p);
- lhs = ast_new_assign_expr(op, lhs, rhs);
- } else if (op == TokenKind_assign_add) {
- next_token(p);
- rhs = parse_logical_or_expr(p);
- lhs = ast_new_assign_add_expr(lhs, rhs);
- } else if (op == TokenKind_assign_sub) {
- next_token(p);
- rhs = parse_logical_or_expr(p);
- lhs = ast_new_assign_sub_expr(lhs, rhs);
- } else {
- break;
- }
- }
- return lhs;
-}
-
-AstNode* parse_expr(Parser* p) {
- return parse_assignment_expr(p);
-}
-
-AstNode* parse_return_stmt(Parser* p) {
- expect(p, TokenKind_keyword_return);
- if (peek_token(p)->kind == TokenKind_semicolon) {
- next_token(p);
- return ast_new(AstNodeKind_return_stmt);
- }
-
- AstNode* expr = parse_expr(p);
- expect(p, TokenKind_semicolon);
-
- AstNode* ret = ast_new(AstNodeKind_return_stmt);
- ret->node_expr = expr;
- return ret;
-}
-
-AstNode* parse_if_stmt(Parser* p) {
- expect(p, TokenKind_keyword_if);
- expect(p, TokenKind_paren_l);
- AstNode* cond = parse_expr(p);
- expect(p, TokenKind_paren_r);
- AstNode* then_body = parse_stmt(p);
- AstNode* else_body = NULL;
- if (peek_token(p)->kind == TokenKind_keyword_else) {
- next_token(p);
- else_body = parse_stmt(p);
- }
-
- AstNode* stmt = ast_new(AstNodeKind_if_stmt);
- stmt->node_cond = cond;
- stmt->node_then = then_body;
- stmt->node_else = else_body;
- return stmt;
-}
-
-AstNode* parse_for_stmt(Parser* p) {
- expect(p, TokenKind_keyword_for);
- expect(p, TokenKind_paren_l);
- AstNode* init = NULL;
- AstNode* cond = NULL;
- AstNode* update = NULL;
- if (peek_token(p)->kind != TokenKind_semicolon) {
- init = parse_expr(p);
- }
- expect(p, TokenKind_semicolon);
- if (peek_token(p)->kind != TokenKind_semicolon) {
- cond = parse_expr(p);
- } else {
- cond = ast_new_int(1);
- }
- expect(p, TokenKind_semicolon);
- if (peek_token(p)->kind != TokenKind_paren_r) {
- update = parse_expr(p);
- }
- expect(p, TokenKind_paren_r);
- AstNode* body = parse_stmt(p);
-
- AstNode* stmt = ast_new(AstNodeKind_for_stmt);
- stmt->node_cond = cond;
- stmt->node_init = init;
- stmt->node_update = update;
- stmt->node_body = body;
- return stmt;
-}
-
-AstNode* parse_while_stmt(Parser* p) {
- expect(p, TokenKind_keyword_while);
- expect(p, TokenKind_paren_l);
- AstNode* cond = parse_expr(p);
- expect(p, TokenKind_paren_r);
- AstNode* body = parse_stmt(p);
-
- AstNode* stmt = ast_new(AstNodeKind_for_stmt);
- stmt->node_cond = cond;
- stmt->node_body = body;
- return stmt;
-}
-
-AstNode* parse_do_while_stmt(Parser* p) {
- expect(p, TokenKind_keyword_do);
- AstNode* body = parse_stmt(p);
- expect(p, TokenKind_keyword_while);
- expect(p, TokenKind_paren_l);
- AstNode* cond = parse_expr(p);
- expect(p, TokenKind_paren_r);
- expect(p, TokenKind_semicolon);
-
- AstNode* stmt = ast_new(AstNodeKind_do_while_stmt);
- stmt->node_cond = cond;
- stmt->node_body = body;
- return stmt;
-}
-
-AstNode* parse_break_stmt(Parser* p) {
- expect(p, TokenKind_keyword_break);
- expect(p, TokenKind_semicolon);
- return ast_new(AstNodeKind_break_stmt);
-}
-
-AstNode* parse_continue_stmt(Parser* p) {
- expect(p, TokenKind_keyword_continue);
- expect(p, TokenKind_semicolon);
- return ast_new(AstNodeKind_continue_stmt);
-}
-
-AstNode* parse_var_decl(Parser* p) {
- Type* ty = parse_type(p);
- if (!type_is_unsized(ty)) {
- fatal_error("parse_var_decl: invalid type for variable");
- }
- String* name = parse_ident(p);
-
- AstNode* init = NULL;
- if (peek_token(p)->kind == TokenKind_assign) {
- next_token(p);
- init = parse_expr(p);
- }
- expect(p, TokenKind_semicolon);
-
- if (find_lvar(p, name) != -1 || find_gvar(p, name) != -1) {
- char* buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_var_decl: %.*s redeclared", name->len, name->data);
- fatal_error(buf);
- }
- p->lvars[p->n_lvars].name.data = name->data;
- p->lvars[p->n_lvars].name.len = name->len;
- p->lvars[p->n_lvars].ty = ty;
- ++p->n_lvars;
-
- AstNode* ret;
- if (init) {
- AstNode* lhs = ast_new(AstNodeKind_lvar);
- lhs->name.data = name->data;
- lhs->name.len = name->len;
- lhs->node_idx = p->n_lvars - 1;
- lhs->ty = ty;
- AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, init);
- ret = ast_new(AstNodeKind_expr_stmt);
- ret->node_expr = assign;
- } else {
- ret = ast_new(AstNodeKind_lvar_decl);
- }
- return ret;
-}
-
-AstNode* parse_expr_stmt(Parser* p) {
- AstNode* e = parse_expr(p);
- expect(p, TokenKind_semicolon);
- AstNode* stmt = ast_new(AstNodeKind_expr_stmt);
- stmt->node_expr = e;
- return stmt;
-}
-
-AstNode* parse_block_stmt(Parser* p) {
- AstNode* list = ast_new_list(1024);
- expect(p, TokenKind_brace_l);
- while (peek_token(p)->kind != TokenKind_brace_r) {
- AstNode* stmt = parse_stmt(p);
- ast_append(list, stmt);
- }
- expect(p, TokenKind_brace_r);
- return list;
-}
-
-AstNode* parse_stmt(Parser* p) {
- Token* t = peek_token(p);
- if (t->kind == TokenKind_keyword_return) {
- return parse_return_stmt(p);
- } else if (t->kind == TokenKind_keyword_if) {
- return parse_if_stmt(p);
- } else if (t->kind == TokenKind_keyword_for) {
- return parse_for_stmt(p);
- } else if (t->kind == TokenKind_keyword_while) {
- return parse_while_stmt(p);
- } else if (t->kind == TokenKind_keyword_do) {
- return parse_do_while_stmt(p);
- } else if (t->kind == TokenKind_keyword_break) {
- return parse_break_stmt(p);
- } else if (t->kind == TokenKind_keyword_continue) {
- return parse_continue_stmt(p);
- } else if (t->kind == TokenKind_brace_l) {
- return parse_block_stmt(p);
- } else if (is_type_token(p, t)) {
- return parse_var_decl(p);
- } else {
- return parse_expr_stmt(p);
- }
-}
-
-void enter_func(Parser* p) {
- p->lvars = calloc(LVAR_MAX, sizeof(LocalVar));
- p->n_lvars = 0;
-}
-
-void register_params(Parser* p, AstNode* params) {
- int i;
- for (i = 0; i < params->node_len; ++i) {
- AstNode* param = params->node_items + i;
- p->lvars[p->n_lvars].name.data = param->name.data;
- p->lvars[p->n_lvars].name.len = param->name.len;
- p->lvars[p->n_lvars].ty = param->ty;
- ++p->n_lvars;
- }
-}
-
-void register_func(Parser* p, const String* name, Type* ty) {
- p->funcs[p->n_funcs].name.data = name->data;
- p->funcs[p->n_funcs].name.len = name->len;
- p->funcs[p->n_funcs].ty = ty;
- ++p->n_funcs;
-}
-
-AstNode* parse_param(Parser* p) {
- Type* ty = parse_type(p);
- String* name = NULL;
- TokenKind tk = peek_token(p)->kind;
- if (tk != TokenKind_comma && tk != TokenKind_paren_r) {
- name = parse_ident(p);
- }
- AstNode* param = ast_new(AstNodeKind_param);
- param->ty = ty;
- if (name) {
- param->name.data = name->data;
- param->name.len = name->len;
- }
- return param;
-}
-
-AstNode* parse_param_list(Parser* p) {
- int has_void = 0;
- AstNode* list = ast_new_list(6);
- while (peek_token(p)->kind != TokenKind_paren_r) {
- if (peek_token(p)->kind == TokenKind_ellipsis) {
- next_token(p);
- break;
- }
- AstNode* param = parse_param(p);
- has_void = has_void || param->ty->kind == TypeKind_void;
- ast_append(list, param);
- if (peek_token(p)->kind == TokenKind_comma) {
- next_token(p);
- } else {
- break;
- }
- }
- if (list->node_len > 6) {
- fatal_error("too many parameters");
- }
- if (has_void) {
- if (list->node_len != 1) {
- fatal_error("invalid use of void param");
- }
- list->node_len = 0;
- }
- return list;
-}
-
-AstNode* parse_func_decl_or_def(Parser* p) {
- Type* ty = parse_type(p);
- String* name = parse_ident(p);
- register_func(p, name, ty);
- expect(p, TokenKind_paren_l);
- AstNode* params = parse_param_list(p);
- expect(p, TokenKind_paren_r);
- if (peek_token(p)->kind == TokenKind_semicolon) {
- next_token(p);
- return ast_new(AstNodeKind_func_decl);
- }
- enter_func(p);
- register_params(p, params);
- AstNode* body = parse_block_stmt(p);
- AstNode* func = ast_new(AstNodeKind_func_def);
- func->ty = ty;
- func->name.data = name->data;
- func->name.len = name->len;
- func->node_params = params;
- func->node_body = body;
- return func;
-}
-
-AstNode* parse_struct_member(Parser* p) {
- Type* ty = parse_type(p);
- String* name = parse_ident(p);
- expect(p, TokenKind_semicolon);
- AstNode* member = ast_new(AstNodeKind_struct_member);
- member->name.data = name->data;
- member->name.len = name->len;
- member->ty = ty;
- return member;
-}
-
-AstNode* parse_struct_members(Parser* p) {
- AstNode* list = ast_new_list(16);
- while (peek_token(p)->kind != TokenKind_brace_r) {
- AstNode* member = parse_struct_member(p);
- ast_append(list, member);
- }
- return list;
-}
-
-AstNode* parse_struct_decl_or_def(Parser* p) {
- expect(p, TokenKind_keyword_struct);
- String* name = parse_ident(p);
-
- if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) {
- p->pos = p->pos - 2;
- return parse_func_decl_or_def(p);
- }
-
- int struct_idx = find_struct(p, name);
- if (struct_idx == -1) {
- struct_idx = p->n_structs;
- p->structs[struct_idx].kind = AstNodeKind_struct_def;
- p->structs[struct_idx].name.data = name->data;
- p->structs[struct_idx].name.len = name->len;
- ++p->n_structs;
- }
- if (peek_token(p)->kind == TokenKind_semicolon) {
- next_token(p);
- return ast_new(AstNodeKind_struct_decl);
- }
- if (p->structs[struct_idx].node_members) {
- char* buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_struct_decl_or_def: struct %.*s redefined", name->len, name->data);
- fatal_error(buf);
- }
- expect(p, TokenKind_brace_l);
- AstNode* members = parse_struct_members(p);
- expect(p, TokenKind_brace_r);
- expect(p, TokenKind_semicolon);
- p->structs[struct_idx].node_members = members;
- return p->structs + struct_idx;
-}
-
-AstNode* parse_enum_member(Parser* p) {
- String* name = parse_ident(p);
- AstNode* member = ast_new(AstNodeKind_enum_member);
- member->name.data = name->data;
- member->name.len = name->len;
- return member;
-}
-
-AstNode* parse_enum_members(Parser* p) {
- int next_value = 0;
- AstNode* list = ast_new_list(256);
- while (peek_token(p)->kind != TokenKind_brace_r) {
- AstNode* member = parse_enum_member(p);
- member->node_int_value = next_value;
- ++next_value;
- ast_append(list, member);
- if (peek_token(p)->kind != TokenKind_comma) {
- break;
- }
- next_token(p);
- }
- return list;
-}
-
-AstNode* parse_enum_def(Parser* p) {
- expect(p, TokenKind_keyword_enum);
- String* name = parse_ident(p);
-
- if (peek_token(p)->kind != TokenKind_brace_l) {
- p->pos = p->pos - 2;
- return parse_func_decl_or_def(p);
- }
-
- int enum_idx = find_enum(p, name);
- if (enum_idx == -1) {
- enum_idx = p->n_enums;
- p->enums[enum_idx].kind = AstNodeKind_enum_def;
- p->enums[enum_idx].name.data = name->data;
- p->enums[enum_idx].name.len = name->len;
- ++p->n_enums;
- } else {
- char* buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_enum_def: enum %.*s redefined", name->len, name->data);
- fatal_error(buf);
- }
- expect(p, TokenKind_brace_l);
- AstNode* members = parse_enum_members(p);
- expect(p, TokenKind_brace_r);
- expect(p, TokenKind_semicolon);
- p->enums[enum_idx].node_members = members;
- return p->enums + enum_idx;
-}
-
-AstNode* parse_typedef_decl(Parser* p) {
- expect(p, TokenKind_keyword_typeof);
- Type* ty = parse_type(p);
- String* name = parse_ident(p);
- expect(p, TokenKind_semicolon);
- AstNode* decl = ast_new(AstNodeKind_typedef_decl);
- decl->name.data = name->data;
- decl->name.len = name->len;
- decl->ty = ty;
- p->typedefs[p->n_typedefs].name.data = name->data;
- p->typedefs[p->n_typedefs].name.len = name->len;
- p->typedefs[p->n_typedefs].ty = ty;
- ++p->n_typedefs;
- return decl;
-}
-
-AstNode* parse_extern_var_decl(Parser* p) {
- expect(p, TokenKind_keyword_extern);
- Type* ty = parse_type(p);
- if (!type_is_unsized(ty)) {
- fatal_error("parse_extern_var_decl: invalid type for variable");
- }
- String* name = parse_ident(p);
- expect(p, TokenKind_semicolon);
-
- if (find_lvar(p, name) != -1 || find_gvar(p, name) != -1) {
- char* buf = calloc(1024, sizeof(char));
- sprintf(buf, "parse_extern_var_decl: %.*s redeclared", name->len, name->data);
- fatal_error(buf);
- }
- p->gvars[p->n_gvars].name.data = name->data;
- p->gvars[p->n_gvars].name.len = name->len;
- p->gvars[p->n_gvars].ty = ty;
- ++p->n_gvars;
-
- return ast_new(AstNodeKind_gvar_decl);
-}
-
-AstNode* parse_toplevel(Parser* p) {
- TokenKind tk = peek_token(p)->kind;
- if (tk == TokenKind_keyword_struct) {
- return parse_struct_decl_or_def(p);
- } else if (tk == TokenKind_keyword_enum) {
- return parse_enum_def(p);
- } else if (tk == TokenKind_keyword_typeof) {
- return parse_typedef_decl(p);
- } else if (tk == TokenKind_keyword_extern) {
- return parse_extern_var_decl(p);
- } else {
- return parse_func_decl_or_def(p);
- }
-}
-
-Program* parse(Token* tokens) {
- Parser* p = parser_new(tokens);
- AstNode* funcs = ast_new_list(1024);
- while (eof(p)) {
- AstNode* n = parse_toplevel(p);
- if (n->kind == AstNodeKind_func_def) {
- ast_append(funcs, n);
- }
- }
- Program* prog = calloc(1, sizeof(Program));
- prog->funcs = funcs;
- prog->str_literals = p->str_literals;
- return prog;
-}
-
-void analyze(Program* prog) {
-}
-
-enum GenMode {
- GenMode_lval,
- GenMode_rval,
-};
-typedef enum GenMode GenMode;
-
-struct CodeGen {
- int next_label;
- int* loop_labels;
-};
-typedef struct CodeGen CodeGen;
-
-CodeGen* codegen_new() {
- CodeGen* g = calloc(1, sizeof(CodeGen));
- g->next_label = 1;
- g->loop_labels = calloc(1024, sizeof(int));
- return g;
-}
-
-int codegen_new_label(CodeGen* g) {
- int new_label = g->next_label;
- ++g->next_label;
- return new_label;
-}
-
-void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode);
-void codegen_stmt(CodeGen* g, AstNode* ast);
-
-const char* param_reg(int n) {
- if (n == 0) {
- return "rdi";
- } else if (n == 1) {
- return "rsi";
- } else if (n == 2) {
- return "rdx";
- } else if (n == 3) {
- return "rcx";
- } else if (n == 4) {
- return "r8";
- } else if (n == 5) {
- return "r9";
- } else {
- unreachable();
- }
-}
-
-void codegen_func_prologue(CodeGen* g, AstNode* ast) {
- printf(" push rbp\n");
- printf(" mov rbp, rsp\n");
- int i;
- for (i = 0; i < ast->node_params->node_len; ++i) {
- printf(" push %s\n", param_reg(i));
- }
- printf(" sub rsp, %d\n", 8 * LVAR_MAX);
-}
-
-void codegen_func_epilogue(CodeGen* g, AstNode* ast) {
- printf(" mov rsp, rbp\n");
- printf(" pop rbp\n");
- printf(" ret\n");
-}
-
-void codegen_int_expr(CodeGen* g, AstNode* ast) {
- printf(" push %d\n", ast->node_int_value);
-}
-
-void codegen_str_expr(CodeGen* g, AstNode* ast) {
- printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx);
- printf(" push rax\n");
-}
-
-void codegen_unary_expr(CodeGen* g, AstNode* ast) {
- codegen_expr(g, ast->node_operand, GenMode_rval);
- if (ast->node_op == TokenKind_not) {
- printf(" pop rax\n");
- printf(" mov rdi, 0\n");
- printf(" cmp rax, rdi\n");
- printf(" sete al\n");
- printf(" movzb rax, al\n");
- printf(" push rax\n");
- } else {
- unreachable();
- }
-}
-
-void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
- codegen_expr(g, ast->node_operand, GenMode_lval);
-}
-
-void codegen_lval2rval(Type* ty) {
- int size = type_sizeof(ty);
-
- printf(" pop rax\n");
- if (size == 1) {
- printf(" movsx rax, BYTE PTR [rax]\n");
- } else if (size == 4) {
- printf(" movsxd rax, DWORD PTR [rax]\n");
- } else {
- printf(" mov rax, [rax]\n");
- }
- printf(" push rax\n");
-}
-
-void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
- codegen_expr(g, ast->node_operand, GenMode_rval);
- if (gen_mode == GenMode_rval) {
- codegen_lval2rval(ast->node_operand->ty->to);
- }
-}
-
-void codegen_logical_expr(CodeGen* g, AstNode* ast) {
- int label = codegen_new_label(g);
-
- if (ast->node_op == TokenKind_andand) {
- codegen_expr(g, ast->node_lhs, GenMode_rval);
- printf(" pop rax\n");
- printf(" cmp rax, 0\n");
- printf(" je .Lelse%d\n", label);
- codegen_expr(g, ast->node_rhs, GenMode_rval);
- printf(" jmp .Lend%d\n", label);
- printf(".Lelse%d:\n", label);
- printf(" push 0\n");
- printf(".Lend%d:\n", label);
- } else {
- codegen_expr(g, ast->node_lhs, GenMode_rval);
- printf(" pop rax\n");
- printf(" cmp rax, 0\n");
- printf(" je .Lelse%d\n", label);
- printf(" push 1\n");
- printf(" jmp .Lend%d\n", label);
- printf(".Lelse%d:\n", label);
- codegen_expr(g, ast->node_rhs, GenMode_rval);
- printf(".Lend%d:\n", label);
- }
-}
-
-void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
- codegen_expr(g, ast->node_lhs, gen_mode);
- codegen_expr(g, ast->node_rhs, gen_mode);
- printf(" pop rdi\n");
- printf(" pop rax\n");
- if (ast->node_op == TokenKind_plus) {
- printf(" add rax, rdi\n");
- } else if (ast->node_op == TokenKind_minus) {
- printf(" sub rax, rdi\n");
- } else if (ast->node_op == TokenKind_star) {
- printf(" imul rax, rdi\n");
- } else if (ast->node_op == TokenKind_slash) {
- printf(" cqo\n");
- printf(" idiv rdi\n");
- } else if (ast->node_op == TokenKind_percent) {
- printf(" cqo\n");
- printf(" idiv rdi\n");
- printf(" mov rax, rdx\n");
- } else if (ast->node_op == TokenKind_eq) {
- printf(" cmp rax, rdi\n");
- printf(" sete al\n");
- printf(" movzb rax, al\n");
- } else if (ast->node_op == TokenKind_ne) {
- printf(" cmp rax, rdi\n");
- printf(" setne al\n");
- printf(" movzb rax, al\n");
- } else if (ast->node_op == TokenKind_lt) {
- printf(" cmp rax, rdi\n");
- printf(" setl al\n");
- printf(" movzb rax, al\n");
- } else if (ast->node_op == TokenKind_le) {
- printf(" cmp rax, rdi\n");
- printf(" setle al\n");
- printf(" movzb rax, al\n");
- } else {
- unreachable();
- }
- printf(" push rax\n");
-}
-
-void codegen_assign_expr(CodeGen* g, AstNode* ast) {
- codegen_expr(g, ast->node_lhs, GenMode_lval);
- codegen_expr(g, ast->node_rhs, GenMode_rval);
- if (ast->node_op == TokenKind_assign) {
- } else if (ast->node_op == TokenKind_assign_add) {
- printf(" pop rdi\n");
- printf(" push [rsp]\n");
- codegen_lval2rval(ast->node_lhs->ty);
- printf(" pop rax\n");
- printf(" add rax, rdi\n");
- printf(" push rax\n");
- } else if (ast->node_op == TokenKind_assign_sub) {
- printf(" pop rdi\n");
- printf(" push [rsp]\n");
- codegen_lval2rval(ast->node_lhs->ty);
- printf(" pop rax\n");
- printf(" sub rax, rdi\n");
- printf(" push rax\n");
- } else {
- unreachable();
- }
- printf(" pop rdi\n");
- printf(" pop rax\n");
- if (type_sizeof(ast->node_lhs->ty) == 1) {
- printf(" mov BYTE PTR [rax], dil\n");
- } else if (type_sizeof(ast->node_lhs->ty) == 4) {
- printf(" mov DWORD PTR [rax], edi\n");
- } else {
- printf(" mov [rax], rdi\n");
- }
- printf(" push rdi\n");
-}
-
-void codegen_func_call(CodeGen* g, AstNode* ast) {
- String* func_name = &ast->name;
- AstNode* args = ast->node_args;
- int i;
- for (i = 0; i < args->node_len; ++i) {
- AstNode* arg = args->node_items + i;
- codegen_expr(g, arg, GenMode_rval);
- }
- for (i = args->node_len - 1; i >= 0; --i) {
- printf(" pop %s\n", param_reg(i));
- }
-
- int label = codegen_new_label(g);
-
- printf(" mov rax, rsp\n");
- printf(" and rax, 15\n");
- printf(" cmp rax, 0\n");
- printf(" je .Laligned%d\n", label);
-
- printf(" mov rax, 0\n");
- printf(" sub rsp, 8\n");
- printf(" call %.*s\n", func_name->len, func_name->data);
- printf(" add rsp, 8\n");
- printf(" push rax\n");
-
- printf(" jmp .Lend%d\n", label);
- printf(".Laligned%d:\n", label);
-
- printf(" mov rax, 0\n");
- printf(" call %.*s\n", func_name->len, func_name->data);
- printf(" push rax\n");
-
- printf(".Lend%d:\n", label);
-}
-
-void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
- int offset = 8 + ast->node_idx * 8;
- printf(" mov rax, rbp\n");
- printf(" sub rax, %d\n", offset);
- printf(" push rax\n");
- if (gen_mode == GenMode_rval) {
- codegen_lval2rval(ast->ty);
- }
-}
-
-void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
- if (gen_mode == GenMode_lval) {
- fatal_error("unimplemented");
- }
- if (ast->ty->kind != TypeKind_ptr) {
- fatal_error("unimplemented");
- }
- printf(" mov rax, QWORD PTR %.*s[rip]\n", ast->name.len, ast->name.data);
- printf(" push rax\n");
-}
-
-void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
- if (ast->kind == AstNodeKind_int_expr) {
- codegen_int_expr(g, ast);
- } else if (ast->kind == AstNodeKind_str_expr) {
- codegen_str_expr(g, ast);
- } else if (ast->kind == AstNodeKind_unary_expr) {
- codegen_unary_expr(g, ast);
- } else if (ast->kind == AstNodeKind_ref_expr) {
- codegen_ref_expr(g, ast, gen_mode);
- } else if (ast->kind == AstNodeKind_deref_expr) {
- codegen_deref_expr(g, ast, gen_mode);
- } else if (ast->kind == AstNodeKind_binary_expr) {
- codegen_binary_expr(g, ast, gen_mode);
- } else if (ast->kind == AstNodeKind_logical_expr) {
- codegen_logical_expr(g, ast);
- } else if (ast->kind == AstNodeKind_assign_expr) {
- codegen_assign_expr(g, ast);
- } else if (ast->kind == AstNodeKind_func_call) {
- codegen_func_call(g, ast);
- } else if (ast->kind == AstNodeKind_lvar) {
- codegen_lvar(g, ast, gen_mode);
- } else if (ast->kind == AstNodeKind_gvar) {
- codegen_gvar(g, ast, gen_mode);
- } else {
- unreachable();
- }
-}
-
-void codegen_return_stmt(CodeGen* g, AstNode* ast) {
- if (ast->node_expr) {
- codegen_expr(g, ast->node_expr, GenMode_rval);
- printf(" pop rax\n");
- }
- codegen_func_epilogue(g, ast);
-}
-
-void codegen_if_stmt(CodeGen* g, AstNode* ast) {
- int label = codegen_new_label(g);
-
- codegen_expr(g, ast->node_cond, GenMode_rval);
- printf(" pop rax\n");
- printf(" cmp rax, 0\n");
- printf(" je .Lelse%d\n", label);
- codegen_stmt(g, ast->node_then);
- printf(" jmp .Lend%d\n", label);
- printf(".Lelse%d:\n", label);
- if (ast->node_else) {
- codegen_stmt(g, ast->node_else);
- }
- printf(".Lend%d:\n", label);
-}
-
-void codegen_for_stmt(CodeGen* g, AstNode* ast) {
- int label = codegen_new_label(g);
- ++g->loop_labels;
- *g->loop_labels = label;
-
- if (ast->node_init) {
- codegen_expr(g, ast->node_init, GenMode_rval);
- printf(" pop rax\n");
- }
- printf(".Lbegin%d:\n", label);
- codegen_expr(g, ast->node_cond, GenMode_rval);
- printf(" pop rax\n");
- printf(" cmp rax, 0\n");
- printf(" je .Lend%d\n", label);
- codegen_stmt(g, ast->node_body);
- printf(".Lcontinue%d:\n", label);
- if (ast->node_update) {
- codegen_expr(g, ast->node_update, GenMode_rval);
- printf(" pop rax\n");
- }
- printf(" jmp .Lbegin%d\n", label);
- printf(".Lend%d:\n", label);
-
- --g->loop_labels;
-}
-
-void codegen_do_while_stmt(CodeGen* g, AstNode* ast) {
- int label = codegen_new_label(g);
- ++g->loop_labels;
- *g->loop_labels = label;
-
- printf(".Lbegin%d:\n", label);
- codegen_stmt(g, ast->node_body);
- printf(".Lcontinue%d:\n", label);
- codegen_expr(g, ast->node_cond, GenMode_rval);
- printf(" pop rax\n");
- printf(" cmp rax, 0\n");
- printf(" je .Lend%d\n", label);
- printf(" jmp .Lbegin%d\n", label);
- printf(".Lend%d:\n", label);
-
- --g->loop_labels;
-}
-
-void codegen_break_stmt(CodeGen* g, AstNode* ast) {
- int label = *g->loop_labels;
- printf(" jmp .Lend%d\n", label);
-}
-
-void codegen_continue_stmt(CodeGen* g, AstNode* ast) {
- int label = *g->loop_labels;
- printf(" jmp .Lcontinue%d\n", label);
-}
-
-void codegen_expr_stmt(CodeGen* g, AstNode* ast) {
- codegen_expr(g, ast->node_expr, GenMode_rval);
- printf(" pop rax\n");
-}
-
-void codegen_var_decl(CodeGen* g, AstNode* ast) {
-}
-
-void codegen_block_stmt(CodeGen* g, AstNode* ast) {
- int i;
- for (i = 0; i < ast->node_len; ++i) {
- AstNode* stmt = ast->node_items + i;
- codegen_stmt(g, stmt);
- }
-}
-
-void codegen_stmt(CodeGen* g, AstNode* ast) {
- if (ast->kind == AstNodeKind_list) {
- codegen_block_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_return_stmt) {
- codegen_return_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_if_stmt) {
- codegen_if_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_for_stmt) {
- codegen_for_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_do_while_stmt) {
- codegen_do_while_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_break_stmt) {
- codegen_break_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_continue_stmt) {
- codegen_continue_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_expr_stmt) {
- codegen_expr_stmt(g, ast);
- } else if (ast->kind == AstNodeKind_lvar_decl) {
- codegen_var_decl(g, ast);
- } else {
- unreachable();
- }
-}
-
-void codegen_func(CodeGen* g, AstNode* ast) {
- printf("%.*s:\n", ast->name.len, ast->name.data);
-
- codegen_func_prologue(g, ast);
- codegen_stmt(g, ast->node_body);
- codegen_func_epilogue(g, ast);
-
- printf("\n");
-}
-
-void codegen(Program* prog) {
- CodeGen* g = codegen_new();
-
- printf(".intel_syntax noprefix\n\n");
-
- int i;
- for (i = 0; prog->str_literals[i]; ++i) {
- printf(".Lstr__%d:\n", i + 1);
- printf(" .string \"%s\"\n\n", prog->str_literals[i]);
- }
-
- printf(".globl main\n\n");
-
- for (i = 0; i < prog->funcs->node_len; ++i) {
- AstNode* func = prog->funcs->node_items + i;
- codegen_func(g, func);
- }
-}
+// Currently the source code depends on the #include order.
+// clang-format off
+#include "std.h"
+#include "common.c"
+#include "preprocess.c"
+#include "tokenize.c"
+#include "ast.c"
+#include "parse.c"
+#include "codegen.c"
+#include "analyze.c"
+// clang-format on
int main(int argc, char** argv) {
if (argc == 1) {
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..b056077
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,1009 @@
+#define LVAR_MAX 32
+
+struct LocalVar {
+ String name;
+ Type* ty;
+};
+typedef struct LocalVar LocalVar;
+
+struct GlobalVar {
+ String name;
+ Type* ty;
+};
+typedef struct GlobalVar GlobalVar;
+
+struct Func {
+ String name;
+ Type* ty;
+};
+typedef struct Func Func;
+
+struct Parser {
+ Token* tokens;
+ int pos;
+ LocalVar* lvars;
+ int n_lvars;
+ GlobalVar* gvars;
+ int n_gvars;
+ Func* funcs;
+ int n_funcs;
+ AstNode* structs;
+ int n_structs;
+ AstNode* enums;
+ int n_enums;
+ AstNode* typedefs;
+ int n_typedefs;
+ char** str_literals;
+ int n_str_literals;
+};
+typedef struct Parser Parser;
+
+Parser* parser_new(Token* tokens) {
+ Parser* p = calloc(1, sizeof(Parser));
+ p->tokens = tokens;
+ p->gvars = calloc(128, sizeof(GlobalVar));
+ p->funcs = calloc(256, sizeof(Func));
+ p->structs = calloc(64, sizeof(AstNode));
+ p->enums = calloc(16, sizeof(AstNode));
+ p->typedefs = calloc(64, sizeof(AstNode));
+ p->str_literals = calloc(1024, sizeof(char*));
+ return p;
+}
+
+Token* peek_token(Parser* p) {
+ return p->tokens + p->pos;
+}
+
+Token* next_token(Parser* p) {
+ ++p->pos;
+ return p->tokens + p->pos - 1;
+}
+
+int eof(Parser* p) {
+ return peek_token(p)->kind != TokenKind_eof;
+}
+
+Token* expect(Parser* p, int expected) {
+ Token* t = next_token(p);
+ if (t->kind == expected) {
+ return t;
+ }
+
+ char* buf = calloc(1024, sizeof(char));
+ sprintf(buf, "expected '%s', but got '%s'", token_kind_stringify(expected), token_stringify(t));
+ fatal_error(buf);
+}
+
+int find_lvar(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_lvars; ++i) {
+ if (string_equals(&p->lvars[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_gvar(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_gvars; ++i) {
+ if (string_equals(&p->gvars[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_func(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_funcs; ++i) {
+ if (string_equals(&p->funcs[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_struct(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_structs; ++i) {
+ if (string_equals(&p->structs[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_enum(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_enums; ++i) {
+ if (string_equals(&p->enums[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+int find_enum_member(Parser* p, const String* name) {
+ int i;
+ int j;
+ for (i = 0; i < p->n_enums; ++i) {
+ for (j = 0; j < p->enums[i].node_members->node_len; ++j) {
+ if (string_equals(&p->enums[i].node_members->node_items[j].name, name)) {
+ return i * 1000 + j;
+ }
+ }
+ }
+ return -1;
+}
+
+int find_typedef(Parser* p, const String* name) {
+ int i;
+ for (i = 0; i < p->n_typedefs; ++i) {
+ if (string_equals(&p->typedefs[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+AstNode* parse_expr(Parser* p);
+AstNode* parse_stmt(Parser* p);
+
+String* parse_ident(Parser* p) {
+ return &expect(p, TokenKind_ident)->raw;
+}
+
+int register_str_literal(Parser* p, char* s) {
+ p->str_literals[p->n_str_literals] = s;
+ ++p->n_str_literals;
+ return p->n_str_literals;
+}
+
+AstNode* parse_primary_expr(Parser* p) {
+ Token* t = next_token(p);
+ AstNode* e;
+ char* buf;
+ if (t->kind == TokenKind_literal_int) {
+ return ast_new_int(atoi(string_to_cstr(&t->raw)));
+ } else if (t->kind == TokenKind_literal_str) {
+ e = ast_new(AstNodeKind_str_expr);
+ e->node_idx = register_str_literal(p, string_to_cstr(&t->raw));
+ return e;
+ } else if (t->kind == TokenKind_paren_l) {
+ e = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ return e;
+ } else if (t->kind == TokenKind_ident) {
+ String* name = &t->raw;
+
+ if (peek_token(p)->kind == TokenKind_paren_l) {
+ e = ast_new(AstNodeKind_func_call);
+ int func_idx = find_func(p, name);
+ if (func_idx == -1) {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "undefined function: %.*s", name->len, name->data);
+ fatal_error(buf);
+ }
+ e->name.data = name->data;
+ e->name.len = name->len;
+ e->ty = p->funcs[func_idx].ty;
+ return e;
+ }
+
+ int lvar_idx = find_lvar(p, name);
+ if (lvar_idx == -1) {
+ int gvar_idx = find_gvar(p, name);
+ if (gvar_idx == -1) {
+ int enum_member_idx = find_enum_member(p, name);
+ if (enum_member_idx == -1) {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "undefined variable: %.*s", name->len, name->data);
+ fatal_error(buf);
+ }
+ int enum_idx = enum_member_idx / 1000;
+ int n = enum_member_idx % 1000;
+ e = ast_new_int(p->enums[enum_idx].node_members->node_items[n].node_int_value);
+ e->ty = type_new(TypeKind_enum);
+ e->ty->def = p->enums + enum_idx;
+ return e;
+ }
+ e = ast_new(AstNodeKind_gvar);
+ e->name.data = name->data;
+ e->name.len = name->len;
+ e->ty = p->gvars[gvar_idx].ty;
+ return e;
+ }
+
+ e = ast_new(AstNodeKind_lvar);
+ e->name.data = name->data;
+ e->name.len = name->len;
+ e->node_idx = lvar_idx;
+ e->ty = p->lvars[lvar_idx].ty;
+ return e;
+ } else {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "expected primary expression, but got '%s'", token_stringify(t));
+ fatal_error(buf);
+ }
+}
+
+AstNode* parse_arg_list(Parser* p) {
+ AstNode* list = ast_new_list(6);
+ while (peek_token(p)->kind != TokenKind_paren_r) {
+ AstNode* arg = parse_expr(p);
+ ast_append(list, arg);
+ if (peek_token(p)->kind == TokenKind_comma) {
+ next_token(p);
+ } else {
+ break;
+ }
+ }
+ if (list->node_len > 6) {
+ fatal_error("too many arguments");
+ }
+ return list;
+}
+
+AstNode* parse_postfix_expr(Parser* p) {
+ AstNode* ret = parse_primary_expr(p);
+ String* name;
+ while (1) {
+ TokenKind tk = peek_token(p)->kind;
+ if (tk == TokenKind_paren_l) {
+ next_token(p);
+ AstNode* args = parse_arg_list(p);
+ expect(p, TokenKind_paren_r);
+ ret->node_args = args;
+ } else if (tk == TokenKind_bracket_l) {
+ next_token(p);
+ AstNode* idx = parse_expr(p);
+ expect(p, TokenKind_bracket_r);
+ idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->to)));
+ ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx));
+ } else if (tk == TokenKind_dot) {
+ next_token(p);
+ name = parse_ident(p);
+ ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name);
+ } else if (tk == TokenKind_arrow) {
+ next_token(p);
+ name = parse_ident(p);
+ ret = ast_new_member_access_expr(ret, name);
+ } else {
+ break;
+ }
+ }
+ return ret;
+}
+
+int is_type_token(Parser* p, Token* token) {
+ if (token->kind == TokenKind_keyword_int || token->kind == TokenKind_keyword_long ||
+ token->kind == TokenKind_keyword_char || token->kind == TokenKind_keyword_void ||
+ token->kind == TokenKind_keyword_enum || token->kind == TokenKind_keyword_struct ||
+ token->kind == TokenKind_keyword_const) {
+ return 1;
+ }
+ if (token->kind != TokenKind_ident) {
+ return 0;
+ }
+ return find_typedef(p, &token->raw) != -1;
+}
+
+Type* parse_type(Parser* p) {
+ Token* t = next_token(p);
+ char* buf;
+ String* name;
+ if (t->kind == TokenKind_keyword_const) {
+ t = next_token(p);
+ }
+ if (!is_type_token(p, t)) {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_type: expected type, but got '%s'", token_stringify(t));
+ fatal_error(buf);
+ }
+ Type* ty;
+ if (t->kind == TokenKind_ident) {
+ int typedef_idx = find_typedef(p, &t->raw);
+ if (typedef_idx == -1) {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_type: unknown typedef, %.*s", t->raw.len, t->raw.data);
+ fatal_error(buf);
+ }
+ ty = p->typedefs[typedef_idx].ty;
+ } else {
+ ty = type_new(TypeKind_unknown);
+ if (t->kind == TokenKind_keyword_int) {
+ ty->kind = TypeKind_int;
+ } else if (t->kind == TokenKind_keyword_long) {
+ ty->kind = TypeKind_long;
+ } else if (t->kind == TokenKind_keyword_char) {
+ ty->kind = TypeKind_char;
+ } else if (t->kind == TokenKind_keyword_void) {
+ ty->kind = TypeKind_void;
+ } else if (t->kind == TokenKind_keyword_enum) {
+ ty->kind = TypeKind_enum;
+ name = parse_ident(p);
+ int enum_idx = find_enum(p, name);
+ if (enum_idx == -1) {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_type: unknown enum, %.*s", name->len, name->data);
+ fatal_error(buf);
+ }
+ ty->def = p->enums + enum_idx;
+ } else if (t->kind == TokenKind_keyword_struct) {
+ ty->kind = TypeKind_struct;
+ name = parse_ident(p);
+ int struct_idx = find_struct(p, name);
+ if (struct_idx == -1) {
+ buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_type: unknown struct, %.*s", name->len, name->data);
+ fatal_error(buf);
+ }
+ ty->def = p->structs + struct_idx;
+ } else {
+ unreachable();
+ }
+ }
+ while (1) {
+ if (peek_token(p)->kind == TokenKind_star) {
+ next_token(p);
+ ty = type_new_ptr(ty);
+ } else {
+ break;
+ }
+ }
+ return ty;
+}
+
+AstNode* parse_prefix_expr(Parser* p) {
+ AstNode* operand;
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_minus) {
+ next_token(p);
+ operand = parse_prefix_expr(p);
+ return ast_new_binary_expr(op, ast_new_int(0), operand);
+ } else if (op == TokenKind_not) {
+ next_token(p);
+ operand = parse_prefix_expr(p);
+ return ast_new_unary_expr(op, operand);
+ } else if (op == TokenKind_and) {
+ next_token(p);
+ operand = parse_prefix_expr(p);
+ return ast_new_ref_expr(operand);
+ } else if (op == TokenKind_star) {
+ next_token(p);
+ operand = parse_prefix_expr(p);
+ return ast_new_deref_expr(operand);
+ } else if (op == TokenKind_plusplus) {
+ next_token(p);
+ operand = parse_prefix_expr(p);
+ return ast_new_assign_add_expr(operand, ast_new_int(1));
+ } else if (op == TokenKind_minusminus) {
+ next_token(p);
+ operand = parse_prefix_expr(p);
+ return ast_new_assign_sub_expr(operand, ast_new_int(1));
+ } else if (op == TokenKind_keyword_sizeof) {
+ next_token(p);
+ expect(p, TokenKind_paren_l);
+ Type* ty = parse_type(p);
+ expect(p, TokenKind_paren_r);
+ return ast_new_int(type_sizeof(ty));
+ }
+ return parse_postfix_expr(p);
+}
+
+AstNode* parse_multiplicative_expr(Parser* p) {
+ AstNode* lhs = parse_prefix_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) {
+ next_token(p);
+ AstNode* rhs = parse_prefix_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_additive_expr(Parser* p) {
+ AstNode* lhs = parse_multiplicative_expr(p);
+ AstNode* rhs;
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_plus) {
+ next_token(p);
+ rhs = parse_multiplicative_expr(p);
+ if (lhs->ty->kind == TypeKind_ptr) {
+ lhs = ast_new_binary_expr(
+ op, lhs, ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to))));
+ } else if (rhs->ty->kind == TypeKind_ptr) {
+ lhs = ast_new_binary_expr(
+ op, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->to))), rhs);
+ } else {
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ }
+ } else if (op == TokenKind_minus) {
+ next_token(p);
+ rhs = parse_multiplicative_expr(p);
+ if (lhs->ty->kind == TypeKind_ptr) {
+ if (rhs->ty->kind == TypeKind_ptr) {
+ // (a - b) / sizeof(a)
+ lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(op, lhs, rhs),
+ ast_new_int(type_sizeof(lhs->ty->to)));
+ } else {
+ // a - b*sizeof(a)
+ lhs = ast_new_binary_expr(
+ op, lhs, ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->to))));
+ }
+ } else {
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ }
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_relational_expr(Parser* p) {
+ AstNode* lhs = parse_additive_expr(p);
+ AstNode* rhs;
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_lt || op == TokenKind_le) {
+ next_token(p);
+ rhs = parse_additive_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else if (op == TokenKind_gt) {
+ next_token(p);
+ rhs = parse_additive_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs);
+ } else if (op == TokenKind_ge) {
+ next_token(p);
+ rhs = parse_additive_expr(p);
+ lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_equality_expr(Parser* p) {
+ AstNode* lhs = parse_relational_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_eq || op == TokenKind_ne) {
+ next_token(p);
+ AstNode* rhs = parse_relational_expr(p);
+ lhs = ast_new_binary_expr(op, lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_logical_and_expr(Parser* p) {
+ AstNode* lhs = parse_equality_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_andand) {
+ next_token(p);
+ AstNode* rhs = parse_equality_expr(p);
+ AstNode* e = ast_new(AstNodeKind_logical_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ e->ty = type_new(TypeKind_int);
+ lhs = e;
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_logical_or_expr(Parser* p) {
+ AstNode* lhs = parse_logical_and_expr(p);
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_oror) {
+ next_token(p);
+ AstNode* rhs = parse_logical_and_expr(p);
+ AstNode* e = ast_new(AstNodeKind_logical_expr);
+ e->node_op = op;
+ e->node_lhs = lhs;
+ e->node_rhs = rhs;
+ e->ty = type_new(TypeKind_int);
+ lhs = e;
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_assignment_expr(Parser* p) {
+ AstNode* lhs = parse_logical_or_expr(p);
+ AstNode* rhs;
+ while (1) {
+ TokenKind op = peek_token(p)->kind;
+ if (op == TokenKind_assign) {
+ next_token(p);
+ rhs = parse_logical_or_expr(p);
+ lhs = ast_new_assign_expr(op, lhs, rhs);
+ } else if (op == TokenKind_assign_add) {
+ next_token(p);
+ rhs = parse_logical_or_expr(p);
+ lhs = ast_new_assign_add_expr(lhs, rhs);
+ } else if (op == TokenKind_assign_sub) {
+ next_token(p);
+ rhs = parse_logical_or_expr(p);
+ lhs = ast_new_assign_sub_expr(lhs, rhs);
+ } else {
+ break;
+ }
+ }
+ return lhs;
+}
+
+AstNode* parse_expr(Parser* p) {
+ return parse_assignment_expr(p);
+}
+
+AstNode* parse_return_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_return);
+ if (peek_token(p)->kind == TokenKind_semicolon) {
+ next_token(p);
+ return ast_new(AstNodeKind_return_stmt);
+ }
+
+ AstNode* expr = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+
+ AstNode* ret = ast_new(AstNodeKind_return_stmt);
+ ret->node_expr = expr;
+ return ret;
+}
+
+AstNode* parse_if_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_if);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ AstNode* then_body = parse_stmt(p);
+ AstNode* else_body = NULL;
+ if (peek_token(p)->kind == TokenKind_keyword_else) {
+ next_token(p);
+ else_body = parse_stmt(p);
+ }
+
+ AstNode* stmt = ast_new(AstNodeKind_if_stmt);
+ stmt->node_cond = cond;
+ stmt->node_then = then_body;
+ stmt->node_else = else_body;
+ return stmt;
+}
+
+AstNode* parse_for_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_for);
+ expect(p, TokenKind_paren_l);
+ AstNode* init = NULL;
+ AstNode* cond = NULL;
+ AstNode* update = NULL;
+ if (peek_token(p)->kind != TokenKind_semicolon) {
+ init = parse_expr(p);
+ }
+ expect(p, TokenKind_semicolon);
+ if (peek_token(p)->kind != TokenKind_semicolon) {
+ cond = parse_expr(p);
+ } else {
+ cond = ast_new_int(1);
+ }
+ expect(p, TokenKind_semicolon);
+ if (peek_token(p)->kind != TokenKind_paren_r) {
+ update = parse_expr(p);
+ }
+ expect(p, TokenKind_paren_r);
+ AstNode* body = parse_stmt(p);
+
+ AstNode* stmt = ast_new(AstNodeKind_for_stmt);
+ stmt->node_cond = cond;
+ stmt->node_init = init;
+ stmt->node_update = update;
+ stmt->node_body = body;
+ return stmt;
+}
+
+AstNode* parse_while_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_while);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ AstNode* body = parse_stmt(p);
+
+ AstNode* stmt = ast_new(AstNodeKind_for_stmt);
+ stmt->node_cond = cond;
+ stmt->node_body = body;
+ return stmt;
+}
+
+AstNode* parse_do_while_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_do);
+ AstNode* body = parse_stmt(p);
+ expect(p, TokenKind_keyword_while);
+ expect(p, TokenKind_paren_l);
+ AstNode* cond = parse_expr(p);
+ expect(p, TokenKind_paren_r);
+ expect(p, TokenKind_semicolon);
+
+ AstNode* stmt = ast_new(AstNodeKind_do_while_stmt);
+ stmt->node_cond = cond;
+ stmt->node_body = body;
+ return stmt;
+}
+
+AstNode* parse_break_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_break);
+ expect(p, TokenKind_semicolon);
+ return ast_new(AstNodeKind_break_stmt);
+}
+
+AstNode* parse_continue_stmt(Parser* p) {
+ expect(p, TokenKind_keyword_continue);
+ expect(p, TokenKind_semicolon);
+ return ast_new(AstNodeKind_continue_stmt);
+}
+
+AstNode* parse_var_decl(Parser* p) {
+ Type* ty = parse_type(p);
+ if (!type_is_unsized(ty)) {
+ fatal_error("parse_var_decl: invalid type for variable");
+ }
+ String* name = parse_ident(p);
+
+ AstNode* init = NULL;
+ if (peek_token(p)->kind == TokenKind_assign) {
+ next_token(p);
+ init = parse_expr(p);
+ }
+ expect(p, TokenKind_semicolon);
+
+ if (find_lvar(p, name) != -1 || find_gvar(p, name) != -1) {
+ char* buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_var_decl: %.*s redeclared", name->len, name->data);
+ fatal_error(buf);
+ }
+ p->lvars[p->n_lvars].name.data = name->data;
+ p->lvars[p->n_lvars].name.len = name->len;
+ p->lvars[p->n_lvars].ty = ty;
+ ++p->n_lvars;
+
+ AstNode* ret;
+ if (init) {
+ AstNode* lhs = ast_new(AstNodeKind_lvar);
+ lhs->name.data = name->data;
+ lhs->name.len = name->len;
+ lhs->node_idx = p->n_lvars - 1;
+ lhs->ty = ty;
+ AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, init);
+ ret = ast_new(AstNodeKind_expr_stmt);
+ ret->node_expr = assign;
+ } else {
+ ret = ast_new(AstNodeKind_lvar_decl);
+ }
+ return ret;
+}
+
+AstNode* parse_expr_stmt(Parser* p) {
+ AstNode* e = parse_expr(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* stmt = ast_new(AstNodeKind_expr_stmt);
+ stmt->node_expr = e;
+ return stmt;
+}
+
+AstNode* parse_block_stmt(Parser* p) {
+ AstNode* list = ast_new_list(1024);
+ expect(p, TokenKind_brace_l);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* stmt = parse_stmt(p);
+ ast_append(list, stmt);
+ }
+ expect(p, TokenKind_brace_r);
+ return list;
+}
+
+AstNode* parse_stmt(Parser* p) {
+ Token* t = peek_token(p);
+ if (t->kind == TokenKind_keyword_return) {
+ return parse_return_stmt(p);
+ } else if (t->kind == TokenKind_keyword_if) {
+ return parse_if_stmt(p);
+ } else if (t->kind == TokenKind_keyword_for) {
+ return parse_for_stmt(p);
+ } else if (t->kind == TokenKind_keyword_while) {
+ return parse_while_stmt(p);
+ } else if (t->kind == TokenKind_keyword_do) {
+ return parse_do_while_stmt(p);
+ } else if (t->kind == TokenKind_keyword_break) {
+ return parse_break_stmt(p);
+ } else if (t->kind == TokenKind_keyword_continue) {
+ return parse_continue_stmt(p);
+ } else if (t->kind == TokenKind_brace_l) {
+ return parse_block_stmt(p);
+ } else if (is_type_token(p, t)) {
+ return parse_var_decl(p);
+ } else {
+ return parse_expr_stmt(p);
+ }
+}
+
+void enter_func(Parser* p) {
+ p->lvars = calloc(LVAR_MAX, sizeof(LocalVar));
+ p->n_lvars = 0;
+}
+
+void register_params(Parser* p, AstNode* params) {
+ int i;
+ for (i = 0; i < params->node_len; ++i) {
+ AstNode* param = params->node_items + i;
+ p->lvars[p->n_lvars].name.data = param->name.data;
+ p->lvars[p->n_lvars].name.len = param->name.len;
+ p->lvars[p->n_lvars].ty = param->ty;
+ ++p->n_lvars;
+ }
+}
+
+void register_func(Parser* p, const String* name, Type* ty) {
+ p->funcs[p->n_funcs].name.data = name->data;
+ p->funcs[p->n_funcs].name.len = name->len;
+ p->funcs[p->n_funcs].ty = ty;
+ ++p->n_funcs;
+}
+
+AstNode* parse_param(Parser* p) {
+ Type* ty = parse_type(p);
+ String* name = NULL;
+ TokenKind tk = peek_token(p)->kind;
+ if (tk != TokenKind_comma && tk != TokenKind_paren_r) {
+ name = parse_ident(p);
+ }
+ AstNode* param = ast_new(AstNodeKind_param);
+ param->ty = ty;
+ if (name) {
+ param->name.data = name->data;
+ param->name.len = name->len;
+ }
+ return param;
+}
+
+AstNode* parse_param_list(Parser* p) {
+ int has_void = 0;
+ AstNode* list = ast_new_list(6);
+ while (peek_token(p)->kind != TokenKind_paren_r) {
+ if (peek_token(p)->kind == TokenKind_ellipsis) {
+ next_token(p);
+ break;
+ }
+ AstNode* param = parse_param(p);
+ has_void = has_void || param->ty->kind == TypeKind_void;
+ ast_append(list, param);
+ if (peek_token(p)->kind == TokenKind_comma) {
+ next_token(p);
+ } else {
+ break;
+ }
+ }
+ if (list->node_len > 6) {
+ fatal_error("too many parameters");
+ }
+ if (has_void) {
+ if (list->node_len != 1) {
+ fatal_error("invalid use of void param");
+ }
+ list->node_len = 0;
+ }
+ return list;
+}
+
+AstNode* parse_func_decl_or_def(Parser* p) {
+ Type* ty = parse_type(p);
+ String* name = parse_ident(p);
+ register_func(p, name, ty);
+ expect(p, TokenKind_paren_l);
+ AstNode* params = parse_param_list(p);
+ expect(p, TokenKind_paren_r);
+ if (peek_token(p)->kind == TokenKind_semicolon) {
+ next_token(p);
+ return ast_new(AstNodeKind_func_decl);
+ }
+ enter_func(p);
+ register_params(p, params);
+ AstNode* body = parse_block_stmt(p);
+ AstNode* func = ast_new(AstNodeKind_func_def);
+ func->ty = ty;
+ func->name.data = name->data;
+ func->name.len = name->len;
+ func->node_params = params;
+ func->node_body = body;
+ return func;
+}
+
+AstNode* parse_struct_member(Parser* p) {
+ Type* ty = parse_type(p);
+ String* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* member = ast_new(AstNodeKind_struct_member);
+ member->name.data = name->data;
+ member->name.len = name->len;
+ member->ty = ty;
+ return member;
+}
+
+AstNode* parse_struct_members(Parser* p) {
+ AstNode* list = ast_new_list(16);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_struct_member(p);
+ ast_append(list, member);
+ }
+ return list;
+}
+
+AstNode* parse_struct_decl_or_def(Parser* p) {
+ expect(p, TokenKind_keyword_struct);
+ String* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) {
+ p->pos = p->pos - 2;
+ return parse_func_decl_or_def(p);
+ }
+
+ int struct_idx = find_struct(p, name);
+ if (struct_idx == -1) {
+ struct_idx = p->n_structs;
+ p->structs[struct_idx].kind = AstNodeKind_struct_def;
+ p->structs[struct_idx].name.data = name->data;
+ p->structs[struct_idx].name.len = name->len;
+ ++p->n_structs;
+ }
+ if (peek_token(p)->kind == TokenKind_semicolon) {
+ next_token(p);
+ return ast_new(AstNodeKind_struct_decl);
+ }
+ if (p->structs[struct_idx].node_members) {
+ char* buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_struct_decl_or_def: struct %.*s redefined", name->len, name->data);
+ fatal_error(buf);
+ }
+ expect(p, TokenKind_brace_l);
+ AstNode* members = parse_struct_members(p);
+ expect(p, TokenKind_brace_r);
+ expect(p, TokenKind_semicolon);
+ p->structs[struct_idx].node_members = members;
+ return p->structs + struct_idx;
+}
+
+AstNode* parse_enum_member(Parser* p) {
+ String* name = parse_ident(p);
+ AstNode* member = ast_new(AstNodeKind_enum_member);
+ member->name.data = name->data;
+ member->name.len = name->len;
+ return member;
+}
+
+AstNode* parse_enum_members(Parser* p) {
+ int next_value = 0;
+ AstNode* list = ast_new_list(256);
+ while (peek_token(p)->kind != TokenKind_brace_r) {
+ AstNode* member = parse_enum_member(p);
+ member->node_int_value = next_value;
+ ++next_value;
+ ast_append(list, member);
+ if (peek_token(p)->kind != TokenKind_comma) {
+ break;
+ }
+ next_token(p);
+ }
+ return list;
+}
+
+AstNode* parse_enum_def(Parser* p) {
+ expect(p, TokenKind_keyword_enum);
+ String* name = parse_ident(p);
+
+ if (peek_token(p)->kind != TokenKind_brace_l) {
+ p->pos = p->pos - 2;
+ return parse_func_decl_or_def(p);
+ }
+
+ int enum_idx = find_enum(p, name);
+ if (enum_idx == -1) {
+ enum_idx = p->n_enums;
+ p->enums[enum_idx].kind = AstNodeKind_enum_def;
+ p->enums[enum_idx].name.data = name->data;
+ p->enums[enum_idx].name.len = name->len;
+ ++p->n_enums;
+ } else {
+ char* buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_enum_def: enum %.*s redefined", name->len, name->data);
+ fatal_error(buf);
+ }
+ expect(p, TokenKind_brace_l);
+ AstNode* members = parse_enum_members(p);
+ expect(p, TokenKind_brace_r);
+ expect(p, TokenKind_semicolon);
+ p->enums[enum_idx].node_members = members;
+ return p->enums + enum_idx;
+}
+
+AstNode* parse_typedef_decl(Parser* p) {
+ expect(p, TokenKind_keyword_typeof);
+ Type* ty = parse_type(p);
+ String* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+ AstNode* decl = ast_new(AstNodeKind_typedef_decl);
+ decl->name.data = name->data;
+ decl->name.len = name->len;
+ decl->ty = ty;
+ p->typedefs[p->n_typedefs].name.data = name->data;
+ p->typedefs[p->n_typedefs].name.len = name->len;
+ p->typedefs[p->n_typedefs].ty = ty;
+ ++p->n_typedefs;
+ return decl;
+}
+
+AstNode* parse_extern_var_decl(Parser* p) {
+ expect(p, TokenKind_keyword_extern);
+ Type* ty = parse_type(p);
+ if (!type_is_unsized(ty)) {
+ fatal_error("parse_extern_var_decl: invalid type for variable");
+ }
+ String* name = parse_ident(p);
+ expect(p, TokenKind_semicolon);
+
+ if (find_lvar(p, name) != -1 || find_gvar(p, name) != -1) {
+ char* buf = calloc(1024, sizeof(char));
+ sprintf(buf, "parse_extern_var_decl: %.*s redeclared", name->len, name->data);
+ fatal_error(buf);
+ }
+ p->gvars[p->n_gvars].name.data = name->data;
+ p->gvars[p->n_gvars].name.len = name->len;
+ p->gvars[p->n_gvars].ty = ty;
+ ++p->n_gvars;
+
+ return ast_new(AstNodeKind_gvar_decl);
+}
+
+AstNode* parse_toplevel(Parser* p) {
+ TokenKind tk = peek_token(p)->kind;
+ if (tk == TokenKind_keyword_struct) {
+ return parse_struct_decl_or_def(p);
+ } else if (tk == TokenKind_keyword_enum) {
+ return parse_enum_def(p);
+ } else if (tk == TokenKind_keyword_typeof) {
+ return parse_typedef_decl(p);
+ } else if (tk == TokenKind_keyword_extern) {
+ return parse_extern_var_decl(p);
+ } else {
+ return parse_func_decl_or_def(p);
+ }
+}
+
+Program* parse(Token* tokens) {
+ Parser* p = parser_new(tokens);
+ AstNode* funcs = ast_new_list(1024);
+ while (eof(p)) {
+ AstNode* n = parse_toplevel(p);
+ if (n->kind == AstNodeKind_func_def) {
+ ast_append(funcs, n);
+ }
+ }
+ Program* prog = calloc(1, sizeof(Program));
+ prog->funcs = funcs;
+ prog->str_literals = p->str_literals;
+ return prog;
+}
diff --git a/preprocess.c b/preprocess.c
new file mode 100644
index 0000000..202ff40
--- /dev/null
+++ b/preprocess.c
@@ -0,0 +1,431 @@
+enum PpTokenKind {
+ PpTokenKind_eof,
+
+ PpTokenKind_header_name,
+ PpTokenKind_identifier,
+ PpTokenKind_pp_number,
+ PpTokenKind_character_constant,
+ PpTokenKind_string_literal,
+ PpTokenKind_punctuator,
+ PpTokenKind_other,
+ PpTokenKind_whitespace,
+};
+typedef enum PpTokenKind PpTokenKind;
+
+struct PpToken {
+ PpTokenKind kind;
+ String raw;
+};
+typedef struct PpToken PpToken;
+
+struct PpDefine {
+ String name;
+ PpToken* tokens;
+};
+typedef struct PpDefine PpDefine;
+
+struct PpDefines {
+ PpDefine* data;
+ size_t len;
+};
+typedef struct PpDefines PpDefines;
+
+struct Preprocessor {
+ char* src;
+ int pos;
+ PpToken* pp_tokens;
+ int n_pp_tokens;
+ PpDefines* pp_defines;
+ int include_depth;
+};
+typedef struct Preprocessor Preprocessor;
+
+PpToken* do_preprocess(char* src, int depth, PpDefines* pp_defines);
+
+PpDefines* pp_defines_new() {
+ PpDefines* pp_defines = calloc(1, sizeof(PpDefines));
+ pp_defines->data = calloc(1024, sizeof(PpDefine));
+ return pp_defines;
+}
+
+Preprocessor* preprocessor_new(char* src, int include_depth, PpDefines* pp_defines) {
+ if (include_depth >= 32) {
+ fatal_error("include depth limit exceeded");
+ }
+
+ Preprocessor* pp = calloc(1, sizeof(Preprocessor));
+ pp->src = src;
+ pp->pp_tokens = calloc(1024 * 1024, sizeof(PpToken));
+ pp->pp_defines = pp_defines;
+ pp->include_depth = include_depth;
+
+ return pp;
+}
+
+int find_pp_define(Preprocessor* pp, String* name) {
+ int i;
+ for (i = 0; i < pp->pp_defines->len; ++i) {
+ if (string_equals(&pp->pp_defines->data[i].name, name)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+void pp_tokenize_all(Preprocessor* pp) {
+ char* buf;
+ int ch;
+ int start;
+ while (pp->src[pp->pos]) {
+ PpToken* tok = pp->pp_tokens + pp->n_pp_tokens;
+ char c = pp->src[pp->pos];
+ ++pp->pos;
+ if (c == '(') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == ')') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '{') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '}') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '[') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == ']') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == ',') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == ';') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '+') {
+ if (pp->src[pp->pos] == '=') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (pp->src[pp->pos] == '+') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '|') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '&') {
+ if (pp->src[pp->pos] == '&') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '-') {
+ if (pp->src[pp->pos] == '>') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (pp->src[pp->pos] == '=') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (pp->src[pp->pos] == '-') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '*') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '/') {
+ if (pp->src[pp->pos] == '/') {
+ start = pp->pos - 1;
+ ++pp->pos;
+ while (pp->src[pp->pos] && pp->src[pp->pos] != '\n' && pp->src[pp->pos] != '\r') {
+ ++pp->pos;
+ }
+ tok->kind = PpTokenKind_whitespace;
+ tok->raw.len = pp->pos - start;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (pp->src[pp->pos] == '*') {
+ fatal_error("unimplemented");
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '%') {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else if (c == '.') {
+ if (pp->src[pp->pos] == '.') {
+ ++pp->pos;
+ if (pp->src[pp->pos] == '.') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 3;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ --pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '!') {
+ if (pp->src[pp->pos] == '=') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '=') {
+ if (pp->src[pp->pos] == '=') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '<') {
+ if (pp->src[pp->pos] == '=') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '>') {
+ if (pp->src[pp->pos] == '=') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '#') {
+ if (pp->src[pp->pos] == '#') {
+ ++pp->pos;
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 2;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ } else {
+ tok->kind = PpTokenKind_punctuator;
+ tok->raw.len = 1;
+ tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ }
+ } else if (c == '\'') {
+ start = pp->pos - 1;
+ if (pp->src[pp->pos] == '\\') {
+ ++pp->pos;
+ }
+ pp->pos += 2;
+ tok->kind = PpTokenKind_character_constant;
+ tok->raw.data = pp->src + start;
+ tok->raw.len = pp->pos - start;
+ } else if (c == '"') {
+ start = pp->pos - 1;
+ while (1) {
+ ch = pp->src[pp->pos];
+ if (ch == '\\') {
+ ++pp->pos;
+ } else if (ch == '"') {
+ break;
+ }
+ ++pp->pos;
+ }
+ ++pp->pos;
+ tok->kind = PpTokenKind_string_literal;
+ tok->raw.data = pp->src + start;
+ tok->raw.len = pp->pos - start;
+ } else if (isdigit(c)) {
+ --pp->pos;
+ start = pp->pos;
+ while (isdigit(pp->src[pp->pos])) {
+ ++pp->pos;
+ }
+ tok->kind = PpTokenKind_pp_number;
+ tok->raw.data = pp->src + start;
+ tok->raw.len = pp->pos - start;
+ } else if (isalpha(c) || c == '_') {
+ --pp->pos;
+ start = pp->pos;
+ while (isalnum(pp->src[pp->pos]) || pp->src[pp->pos] == '_') {
+ ++pp->pos;
+ }
+ tok->raw.data = pp->src + start;
+ tok->raw.len = pp->pos - start;
+ tok->kind = PpTokenKind_identifier;
+ } else if (isspace(c)) {
+ tok->raw.data = pp->src;
+ tok->raw.len = 1;
+ tok->kind = PpTokenKind_whitespace;
+ } else {
+ tok->raw.data = pp->src;
+ tok->raw.len = 1;
+ tok->kind = PpTokenKind_other;
+ }
+ ++pp->n_pp_tokens;
+ }
+}
+
+void process_pp_directives(Preprocessor* pp) {
+ PpToken* tok = pp->pp_tokens;
+ PpToken* define_dest;
+ while (tok->kind != PpTokenKind_eof) {
+ // TODO: check if the token is at the beginning of line.
+ // TODO: check if skipped whitespaces do not contain line breaks.
+ if (tok->kind == PpTokenKind_punctuator && string_equals_cstr(&tok->raw, "#")) {
+ PpToken* tok2 = tok + 1;
+ while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
+ ++tok2;
+ if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "define")) {
+ ++tok2;
+ while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
+ ++tok2;
+ if (tok2->kind == PpTokenKind_identifier) {
+ PpToken* define_name = tok2;
+ ++tok2;
+ while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
+ ++tok2;
+ if (tok2->kind == PpTokenKind_identifier || tok2->kind == PpTokenKind_pp_number) {
+ define_dest = tok2;
+
+ PpDefine* pp_define = pp->pp_defines->data + pp->pp_defines->len;
+ pp_define->name.len = define_name->raw.len;
+ pp_define->name.data = define_name->raw.data;
+ pp_define->tokens = calloc(1, sizeof(PpToken));
+ pp_define->tokens[0].kind = define_dest->kind;
+ pp_define->tokens[0].raw.len = define_dest->raw.len;
+ pp_define->tokens[0].raw.data = define_dest->raw.data;
+ ++pp->pp_defines->len;
+ }
+ }
+ // Remove #define directive.
+ while (tok != tok2 + 1) {
+ tok->kind = PpTokenKind_whitespace;
+ tok->raw.len = 0;
+ tok->raw.data = NULL;
+ ++tok;
+ }
+ } else if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "include")) {
+ ++tok2;
+ while (tok2->kind != PpTokenKind_eof && tok2->kind == PpTokenKind_whitespace)
+ ++tok2;
+ if (tok2->kind == PpTokenKind_string_literal) {
+ // Process #include directive.
+ PpToken* include_name = tok2;
+ ++tok2;
+ char* include_name_buf = calloc(include_name->raw.len - 2 + 1, sizeof(char));
+ sprintf(include_name_buf, "%.*s", include_name->raw.len - 2, include_name->raw.data + 1);
+
+ // Remove #include directive itself.
+ while (tok != tok2 + 1) {
+ tok->kind = PpTokenKind_whitespace;
+ tok->raw.len = 0;
+ tok->raw.data = NULL;
+ ++tok;
+ }
+
+ // Read and preprocess included file.
+ FILE* include_file = fopen(include_name_buf, "rb");
+ if (include_file == NULL) {
+ char* buf = calloc(1024, sizeof(char));
+ sprintf(buf, "cannot open include file: %s", include_name_buf);
+ fatal_error(buf);
+ }
+ char* include_source = read_all(include_file);
+ fclose(include_file);
+
+ PpToken* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_defines);
+
+ // Insert preprocessed tokens into the current token stream.
+ int n_include_pp_tokens = 0;
+ while (include_pp_tokens[n_include_pp_tokens].kind != PpTokenKind_eof) {
+ ++n_include_pp_tokens;
+ }
+
+ // Shift existing tokens to make room for include tokens
+ int n_pp_tokens_after_include = pp->n_pp_tokens - (tok - pp->pp_tokens);
+
+ memmove(tok + n_include_pp_tokens, tok, n_pp_tokens_after_include * sizeof(PpToken));
+
+ // Copy include tokens into the current position
+ memcpy(tok, include_pp_tokens, n_include_pp_tokens * sizeof(PpToken));
+
+ pp->n_pp_tokens += n_include_pp_tokens;
+ }
+ }
+ } else if (tok->kind == PpTokenKind_identifier) {
+ int pp_define_idx = find_pp_define(pp, &tok->raw);
+ if (pp_define_idx != -1) {
+ define_dest = pp->pp_defines->data[pp_define_idx].tokens;
+ tok->kind = define_dest->kind;
+ tok->raw.data = define_dest->raw.data;
+ tok->raw.len = define_dest->raw.len;
+ }
+ }
+ ++tok;
+ }
+}
+
+PpToken* do_preprocess(char* src, int depth, PpDefines* pp_defines) {
+ Preprocessor* pp = preprocessor_new(src, depth, pp_defines);
+ pp_tokenize_all(pp);
+ process_pp_directives(pp);
+ return pp->pp_tokens;
+}
+
+PpToken* preprocess(char* src) {
+ return do_preprocess(src, 0, pp_defines_new());
+}
diff --git a/std.h b/std.h
new file mode 100644
index 0000000..23a2e9a
--- /dev/null
+++ b/std.h
@@ -0,0 +1,31 @@
+typedef long size_t;
+
+struct FILE;
+typedef struct FILE FILE;
+
+extern FILE* stdin;
+extern FILE* stdout;
+extern FILE* stderr;
+
+int atoi(const char*);
+void* calloc(size_t, size_t);
+void exit(int);
+int fclose(FILE*);
+int fprintf(FILE*, const char*, ...);
+char* fgets(char*, int, FILE*);
+FILE* fopen(const char*, const char*);
+int getchar(void);
+int isalnum(int);
+int isalpha(int);
+int isdigit(int);
+int isspace(int);
+void* memcpy(void*, void*, size_t);
+void* memmove(void*, void*, size_t);
+int printf(const char*, ...);
+int sprintf(char*, const char*, ...);
+int strcmp(const char*, const char*);
+size_t strlen(const char*);
+int strncmp(const char*, const char*, size_t);
+char* strstr(const char*, const char*);
+
+#define NULL 0
diff --git a/tests/060.sh b/tests/060.sh
new file mode 100644
index 0000000..03d65b7
--- /dev/null
+++ b/tests/060.sh
@@ -0,0 +1,20 @@
+set -e
+
+cat <<'EOF' > expected
+42
+EOF
+
+cat <<'EOF' > header.h
+#define A 42
+EOF
+
+bash ../../test_diff.sh <<'EOF'
+#include "header.h"
+
+int printf();
+
+int main() {
+ printf("%d\n", A);
+ return 0;
+}
+EOF
diff --git a/tokenize.c b/tokenize.c
new file mode 100644
index 0000000..71427d0
--- /dev/null
+++ b/tokenize.c
@@ -0,0 +1,368 @@
+enum TokenKind {
+ TokenKind_eof,
+
+ TokenKind_and,
+ TokenKind_andand,
+ TokenKind_arrow,
+ TokenKind_assign,
+ TokenKind_assign_add,
+ TokenKind_assign_sub,
+ TokenKind_brace_l,
+ TokenKind_brace_r,
+ TokenKind_bracket_l,
+ TokenKind_bracket_r,
+ TokenKind_comma,
+ TokenKind_dot,
+ TokenKind_ellipsis,
+ TokenKind_eq,
+ TokenKind_ge,
+ TokenKind_gt,
+ TokenKind_ident,
+ TokenKind_keyword_break,
+ TokenKind_keyword_char,
+ TokenKind_keyword_const,
+ TokenKind_keyword_continue,
+ TokenKind_keyword_do,
+ TokenKind_keyword_else,
+ TokenKind_keyword_enum,
+ TokenKind_keyword_extern,
+ TokenKind_keyword_for,
+ TokenKind_keyword_if,
+ TokenKind_keyword_int,
+ TokenKind_keyword_long,
+ TokenKind_keyword_return,
+ TokenKind_keyword_sizeof,
+ TokenKind_keyword_struct,
+ TokenKind_keyword_typeof,
+ TokenKind_keyword_void,
+ TokenKind_keyword_while,
+ TokenKind_le,
+ TokenKind_lt,
+ TokenKind_literal_int,
+ TokenKind_literal_str,
+ TokenKind_minus,
+ TokenKind_minusminus,
+ TokenKind_ne,
+ TokenKind_not,
+ TokenKind_oror,
+ TokenKind_paren_l,
+ TokenKind_paren_r,
+ TokenKind_percent,
+ TokenKind_plus,
+ TokenKind_plusplus,
+ TokenKind_semicolon,
+ TokenKind_slash,
+ TokenKind_star,
+};
+typedef enum TokenKind TokenKind;
+
+struct Token {
+ TokenKind kind;
+ String raw;
+};
+typedef struct Token Token;
+
+const char* token_kind_stringify(TokenKind k) {
+ if (k == TokenKind_eof)
+ return "<eof>";
+ else if (k == TokenKind_and)
+ return "&";
+ else if (k == TokenKind_andand)
+ return "&&";
+ else if (k == TokenKind_arrow)
+ return "->";
+ else if (k == TokenKind_assign)
+ return "=";
+ else if (k == TokenKind_assign_add)
+ return "+=";
+ else if (k == TokenKind_assign_sub)
+ return "-=";
+ else if (k == TokenKind_brace_l)
+ return "{";
+ else if (k == TokenKind_brace_r)
+ return "}";
+ else if (k == TokenKind_bracket_l)
+ return "[";
+ else if (k == TokenKind_bracket_r)
+ return "]";
+ else if (k == TokenKind_comma)
+ return ",";
+ else if (k == TokenKind_dot)
+ return ".";
+ else if (k == TokenKind_ellipsis)
+ return "...";
+ else if (k == TokenKind_eq)
+ return "==";
+ else if (k == TokenKind_ge)
+ return ">=";
+ else if (k == TokenKind_gt)
+ return ">";
+ else if (k == TokenKind_ident)
+ return "<identifier>";
+ else if (k == TokenKind_keyword_break)
+ return "break";
+ else if (k == TokenKind_keyword_char)
+ return "char";
+ else if (k == TokenKind_keyword_const)
+ return "const";
+ else if (k == TokenKind_keyword_continue)
+ return "continue";
+ else if (k == TokenKind_keyword_do)
+ return "do";
+ else if (k == TokenKind_keyword_else)
+ return "else";
+ else if (k == TokenKind_keyword_enum)
+ return "enum";
+ else if (k == TokenKind_keyword_extern)
+ return "extern";
+ else if (k == TokenKind_keyword_for)
+ return "for";
+ else if (k == TokenKind_keyword_if)
+ return "if";
+ else if (k == TokenKind_keyword_int)
+ return "int";
+ else if (k == TokenKind_keyword_long)
+ return "long";
+ else if (k == TokenKind_keyword_return)
+ return "return";
+ else if (k == TokenKind_keyword_sizeof)
+ return "sizeof";
+ else if (k == TokenKind_keyword_struct)
+ return "struct";
+ else if (k == TokenKind_keyword_typeof)
+ return "typeof";
+ else if (k == TokenKind_keyword_void)
+ return "void";
+ else if (k == TokenKind_keyword_while)
+ return "while";
+ else if (k == TokenKind_le)
+ return "le";
+ else if (k == TokenKind_lt)
+ return "lt";
+ else if (k == TokenKind_literal_int)
+ return "<integer>";
+ else if (k == TokenKind_literal_str)
+ return "<string>";
+ else if (k == TokenKind_minus)
+ return "-";
+ else if (k == TokenKind_minusminus)
+ return "--";
+ else if (k == TokenKind_ne)
+ return "!=";
+ else if (k == TokenKind_not)
+ return "!";
+ else if (k == TokenKind_oror)
+ return "||";
+ else if (k == TokenKind_paren_l)
+ return "(";
+ else if (k == TokenKind_paren_r)
+ return ")";
+ else if (k == TokenKind_percent)
+ return "%";
+ else if (k == TokenKind_plus)
+ return "+";
+ else if (k == TokenKind_plusplus)
+ return "++";
+ else if (k == TokenKind_semicolon)
+ return ";";
+ else if (k == TokenKind_slash)
+ return "/";
+ else if (k == TokenKind_star)
+ return "*";
+ else
+ unreachable();
+}
+
+const char* token_stringify(Token* t) {
+ TokenKind k = t->kind;
+ if (k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) {
+ char* buf = calloc(t->raw.len + 1, sizeof(char));
+ sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, token_kind_stringify(k));
+ return buf;
+ } else {
+ return token_kind_stringify(k);
+ }
+}
+
+struct Lexer {
+ PpToken* src;
+ int pos;
+ Token* tokens;
+ int n_tokens;
+};
+typedef struct Lexer Lexer;
+
+Lexer* lexer_new(PpToken* pp_tokens) {
+ Lexer* l = calloc(1, sizeof(Lexer));
+ l->src = pp_tokens;
+ l->tokens = calloc(1024 * 1024, sizeof(Token));
+ return l;
+}
+
+void tokenize_all(Lexer* l) {
+ char* buf;
+ int ch;
+ int start;
+ while (l->src[l->pos].kind != PpTokenKind_eof) {
+ PpToken* pp_tok = l->src + l->pos;
+ Token* tok = l->tokens + l->n_tokens;
+ PpTokenKind k = pp_tok->kind;
+ ++l->pos;
+ if (k == PpTokenKind_header_name) {
+ fatal_error("not implemented yet");
+ } else if (k == PpTokenKind_identifier) {
+ if (string_equals_cstr(&pp_tok->raw, "break")) {
+ tok->kind = TokenKind_keyword_break;
+ } else if (string_equals_cstr(&pp_tok->raw, "char")) {
+ tok->kind = TokenKind_keyword_char;
+ } else if (string_equals_cstr(&pp_tok->raw, "const")) {
+ tok->kind = TokenKind_keyword_const;
+ } else if (string_equals_cstr(&pp_tok->raw, "continue")) {
+ tok->kind = TokenKind_keyword_continue;
+ } else if (string_equals_cstr(&pp_tok->raw, "do")) {
+ tok->kind = TokenKind_keyword_do;
+ } else if (string_equals_cstr(&pp_tok->raw, "else")) {
+ tok->kind = TokenKind_keyword_else;
+ } else if (string_equals_cstr(&pp_tok->raw, "enum")) {
+ tok->kind = TokenKind_keyword_enum;
+ } else if (string_equals_cstr(&pp_tok->raw, "extern")) {
+ tok->kind = TokenKind_keyword_extern;
+ } else if (string_equals_cstr(&pp_tok->raw, "for")) {
+ tok->kind = TokenKind_keyword_for;
+ } else if (string_equals_cstr(&pp_tok->raw, "if")) {
+ tok->kind = TokenKind_keyword_if;
+ } else if (string_equals_cstr(&pp_tok->raw, "int")) {
+ tok->kind = TokenKind_keyword_int;
+ } else if (string_equals_cstr(&pp_tok->raw, "long")) {
+ tok->kind = TokenKind_keyword_long;
+ } else if (string_equals_cstr(&pp_tok->raw, "return")) {
+ tok->kind = TokenKind_keyword_return;
+ } else if (string_equals_cstr(&pp_tok->raw, "sizeof")) {
+ tok->kind = TokenKind_keyword_sizeof;
+ } else if (string_equals_cstr(&pp_tok->raw, "struct")) {
+ tok->kind = TokenKind_keyword_struct;
+ } else if (string_equals_cstr(&pp_tok->raw, "typedef")) {
+ tok->kind = TokenKind_keyword_typeof;
+ } else if (string_equals_cstr(&pp_tok->raw, "void")) {
+ tok->kind = TokenKind_keyword_void;
+ } else if (string_equals_cstr(&pp_tok->raw, "while")) {
+ tok->kind = TokenKind_keyword_while;
+ } else {
+ tok->kind = TokenKind_ident;
+ }
+ tok->raw.data = pp_tok->raw.data;
+ tok->raw.len = pp_tok->raw.len;
+ } else if (k == PpTokenKind_pp_number) {
+ tok->kind = TokenKind_literal_int;
+ tok->raw.data = pp_tok->raw.data;
+ tok->raw.len = pp_tok->raw.len;
+ } else if (k == PpTokenKind_character_constant) {
+ tok->kind = TokenKind_literal_int;
+ ch = pp_tok->raw.data[1];
+ if (ch == '\\') {
+ ch = pp_tok->raw.data[2];
+ if (ch == 'a') {
+ ch = '\a';
+ } else if (ch == 'b') {
+ ch = '\b';
+ } else if (ch == 'f') {
+ ch = '\f';
+ } else if (ch == 'n') {
+ ch = '\n';
+ } else if (ch == 'r') {
+ ch = '\r';
+ } else if (ch == 't') {
+ ch = '\t';
+ } else if (ch == 'v') {
+ ch = '\v';
+ }
+ }
+ buf = calloc(4, sizeof(char));
+ sprintf(buf, "%d", ch);
+ tok->raw.data = buf;
+ tok->raw.len = strlen(buf);
+ } else if (k == PpTokenKind_string_literal) {
+ tok->kind = TokenKind_literal_str;
+ tok->raw.data = pp_tok->raw.data + 1;
+ tok->raw.len = pp_tok->raw.len - 2;
+ } else if (k == PpTokenKind_punctuator || k == PpTokenKind_other) {
+ if (string_equals_cstr(&pp_tok->raw, "(")) {
+ tok->kind = TokenKind_paren_l;
+ } else if (string_equals_cstr(&pp_tok->raw, ")")) {
+ tok->kind = TokenKind_paren_r;
+ } else if (string_equals_cstr(&pp_tok->raw, "{")) {
+ tok->kind = TokenKind_brace_l;
+ } else if (string_equals_cstr(&pp_tok->raw, "}")) {
+ tok->kind = TokenKind_brace_r;
+ } else if (string_equals_cstr(&pp_tok->raw, "[")) {
+ tok->kind = TokenKind_bracket_l;
+ } else if (string_equals_cstr(&pp_tok->raw, "]")) {
+ tok->kind = TokenKind_bracket_r;
+ } else if (string_equals_cstr(&pp_tok->raw, ",")) {
+ tok->kind = TokenKind_comma;
+ } else if (string_equals_cstr(&pp_tok->raw, ";")) {
+ tok->kind = TokenKind_semicolon;
+ } else if (string_equals_cstr(&pp_tok->raw, "+=")) {
+ tok->kind = TokenKind_assign_add;
+ } else if (string_equals_cstr(&pp_tok->raw, "++")) {
+ tok->kind = TokenKind_plusplus;
+ } else if (string_equals_cstr(&pp_tok->raw, "+")) {
+ tok->kind = TokenKind_plus;
+ } else if (string_equals_cstr(&pp_tok->raw, "||")) {
+ tok->kind = TokenKind_oror;
+ } else if (string_equals_cstr(&pp_tok->raw, "&&")) {
+ tok->kind = TokenKind_andand;
+ } else if (string_equals_cstr(&pp_tok->raw, "&")) {
+ tok->kind = TokenKind_and;
+ } else if (string_equals_cstr(&pp_tok->raw, "->")) {
+ tok->kind = TokenKind_arrow;
+ } else if (string_equals_cstr(&pp_tok->raw, "-=")) {
+ tok->kind = TokenKind_assign_sub;
+ } else if (string_equals_cstr(&pp_tok->raw, "--")) {
+ tok->kind = TokenKind_minusminus;
+ } else if (string_equals_cstr(&pp_tok->raw, "-")) {
+ tok->kind = TokenKind_minus;
+ } else if (string_equals_cstr(&pp_tok->raw, "*")) {
+ tok->kind = TokenKind_star;
+ } else if (string_equals_cstr(&pp_tok->raw, "/")) {
+ tok->kind = TokenKind_slash;
+ } else if (string_equals_cstr(&pp_tok->raw, "%")) {
+ tok->kind = TokenKind_percent;
+ } else if (string_equals_cstr(&pp_tok->raw, "...")) {
+ tok->kind = TokenKind_ellipsis;
+ } else if (string_equals_cstr(&pp_tok->raw, ".")) {
+ tok->kind = TokenKind_dot;
+ } else if (string_equals_cstr(&pp_tok->raw, "!=")) {
+ tok->kind = TokenKind_ne;
+ } else if (string_equals_cstr(&pp_tok->raw, "!")) {
+ tok->kind = TokenKind_not;
+ } else if (string_equals_cstr(&pp_tok->raw, "==")) {
+ tok->kind = TokenKind_eq;
+ } else if (string_equals_cstr(&pp_tok->raw, "=")) {
+ tok->kind = TokenKind_assign;
+ } else if (string_equals_cstr(&pp_tok->raw, "<=")) {
+ tok->kind = TokenKind_le;
+ } else if (string_equals_cstr(&pp_tok->raw, "<")) {
+ tok->kind = TokenKind_lt;
+ } else if (string_equals_cstr(&pp_tok->raw, ">=")) {
+ tok->kind = TokenKind_ge;
+ } else if (string_equals_cstr(&pp_tok->raw, ">")) {
+ tok->kind = TokenKind_gt;
+ } else {
+ sprintf(buf, "unknown token: %.*s", pp_tok->raw.len, pp_tok->raw.data);
+ fatal_error(buf);
+ }
+ tok->raw.data = pp_tok->raw.data;
+ tok->raw.len = pp_tok->raw.len;
+ } else if (k == PpTokenKind_whitespace) {
+ continue;
+ }
+ ++l->n_tokens;
+ }
+}
+
+Token* tokenize(PpToken* pp_tokens) {
+ Lexer* l = lexer_new(pp_tokens);
+ tokenize_all(l);
+ return l->tokens;
+}