diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-22 23:28:25 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-22 23:28:25 +0900 |
| commit | 9c202a496e75903fe37e5c19cb97c98eba6e35f2 (patch) | |
| tree | 52de494a4717a3c30c4bacb9dd9b91980be2a575 /src | |
| parent | 0ac6ac95283735dd70ebf55b26ef78a4c32c31de (diff) | |
| download | ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.gz ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.zst ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.zip | |
chore: move *.c and *.h files to src/
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.c | 475 | ||||
| -rw-r--r-- | src/codegen.c | 595 | ||||
| -rw-r--r-- | src/common.c | 39 | ||||
| -rw-r--r-- | src/io.c | 99 | ||||
| -rw-r--r-- | src/parse.c | 1404 | ||||
| -rw-r--r-- | src/preprocess.c | 1557 | ||||
| -rw-r--r-- | src/std.h | 50 | ||||
| -rw-r--r-- | src/sys.c | 15 | ||||
| -rw-r--r-- | src/tokenize.c | 175 |
9 files changed, 4409 insertions, 0 deletions
diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 0000000..66f7cc2 --- /dev/null +++ b/src/ast.c @@ -0,0 +1,475 @@ +enum TypeKind { + TypeKind_unknown, + + TypeKind_char, + TypeKind_short, + TypeKind_int, + TypeKind_long, + TypeKind_void, + TypeKind_ptr, + TypeKind_array, + TypeKind_enum, + TypeKind_struct, + TypeKind_union, +}; +typedef enum TypeKind TypeKind; + +const char* type_kind_stringify(TypeKind k) { + if (k == TypeKind_unknown) + return "<unknown>"; + else if (k == TypeKind_char) + return "char"; + else if (k == TypeKind_short) + return "short"; + else if (k == TypeKind_int) + return "int"; + else if (k == TypeKind_long) + return "long"; + else if (k == TypeKind_void) + return "void"; + else if (k == TypeKind_ptr) + return "<pointer>"; + else if (k == TypeKind_array) + return "<array>"; + else if (k == TypeKind_enum) + return "enum"; + else if (k == TypeKind_struct) + return "struct"; + else if (k == TypeKind_union) + return "union"; + else + unreachable(); +} + +struct AstNode; + +struct Type { + TypeKind kind; + // Check `base` instead of `kind` to test if the type is an array or a pointer. + struct Type* base; + int array_size; + struct AstNode* def; +}; +typedef struct Type Type; + +void type_dump(Type* ty) { + fprintf(stderr, "Type {\n"); + fprintf(stderr, " kind = %s\n", type_kind_stringify(ty->kind)); + fprintf(stderr, "}\n"); +} + +Type* type_new(TypeKind kind) { + Type* ty = calloc(1, sizeof(Type)); + ty->kind = kind; + return ty; +} + +Type* type_new_ptr(Type* base) { + Type* ty = calloc(1, sizeof(Type)); + ty->kind = TypeKind_ptr; + ty->base = base; + return ty; +} + +Type* type_new_array(Type* elem, int size) { + Type* ty = calloc(1, sizeof(Type)); + ty->kind = TypeKind_array; + ty->base = elem; + ty->array_size = size; + return ty; +} + +Type* type_new_static_string(int len) { + return type_new_array(type_new(TypeKind_char), len + 1); +} + +Type* type_array_to_ptr(Type* ty) { + return type_new_ptr(ty->base); +} + +BOOL type_is_unsized(Type* ty) { + return ty->kind == TypeKind_void; +} + +int type_sizeof_struct(Type* ty); +int type_sizeof_union(Type* ty); +int type_alignof_struct(Type* ty); +int type_alignof_union(Type* ty); +int type_offsetof(Type* ty, const char* name); +Type* type_member_typeof(Type* ty, const char* name); + +int type_sizeof(Type* ty) { + if (type_is_unsized(ty)) { + fatal_error("type_sizeof: type size cannot be determined"); + } + + if (ty->kind == TypeKind_ptr) + return 8; + else if (ty->kind == TypeKind_char) + return 1; + else if (ty->kind == TypeKind_short) + return 2; + else if (ty->kind == TypeKind_int) + return 4; + else if (ty->kind == TypeKind_long) + return 8; + else if (ty->kind == TypeKind_enum) + return 4; + else if (ty->kind == TypeKind_array) + return type_sizeof(ty->base) * ty->array_size; + else if (ty->kind == TypeKind_struct) + return type_sizeof_struct(ty); + else if (ty->kind == TypeKind_union) + return type_sizeof_union(ty); + else + unreachable(); +} + +int type_alignof(Type* ty) { + if (type_is_unsized(ty)) { + fatal_error("type_alignof: type size cannot be determined"); + } + + if (ty->kind == TypeKind_ptr) + return 8; + else if (ty->kind == TypeKind_char) + return 1; + else if (ty->kind == TypeKind_short) + return 2; + else if (ty->kind == TypeKind_int) + return 4; + else if (ty->kind == TypeKind_long) + return 8; + else if (ty->kind == TypeKind_enum) + return 4; + else if (ty->kind == TypeKind_array) + return type_alignof(ty->base); + else if (ty->kind == TypeKind_struct) + return type_alignof_struct(ty); + else if (ty->kind == TypeKind_union) + return type_alignof_union(ty); + else + unreachable(); +} + +int to_aligned(int n, int a) { + return (n + a - 1) / a * a; +} + +enum AstNodeKind { + AstNodeKind_unknown, + AstNodeKind_nop, + + AstNodeKind_assign_expr, + AstNodeKind_binary_expr, + AstNodeKind_break_stmt, + AstNodeKind_cond_expr, + AstNodeKind_continue_stmt, + AstNodeKind_deref_expr, + AstNodeKind_do_while_stmt, + AstNodeKind_enum_def, + AstNodeKind_enum_member, + AstNodeKind_expr_stmt, + AstNodeKind_for_stmt, + AstNodeKind_func_call, + AstNodeKind_func_decl, + AstNodeKind_func_def, + AstNodeKind_gvar, + AstNodeKind_gvar_decl, + AstNodeKind_if_stmt, + AstNodeKind_int_expr, + AstNodeKind_list, + AstNodeKind_logical_expr, + AstNodeKind_lvar, + AstNodeKind_lvar_decl, + AstNodeKind_param, + AstNodeKind_ref_expr, + AstNodeKind_return_stmt, + AstNodeKind_str_expr, + AstNodeKind_struct_decl, + AstNodeKind_struct_def, + AstNodeKind_struct_member, + AstNodeKind_type, + AstNodeKind_typedef_decl, + AstNodeKind_unary_expr, + AstNodeKind_union_decl, + AstNodeKind_union_def, + AstNodeKind_union_member, + + // Intermediate ASTs: they are used only in parsing, not for parse result. + AstNodeKind_declarator, +}; +typedef enum AstNodeKind AstNodeKind; + +#define node_items __n1 +#define node_len __i1 +#define node_cap __i2 +#define node_expr __n1 +#define node_lhs __n1 +#define node_rhs __n2 +#define node_operand __n1 +#define node_cond __n1 +#define node_init __n2 +#define node_update __n3 +#define node_then __n2 +#define node_else __n3 +#define node_body __n4 +#define node_members __n1 +#define node_params __n1 +#define node_args __n1 +#define node_int_value __i1 +#define node_idx __i1 +#define node_op __i1 +#define node_stack_offset __i1 +#define node_stack_size __i1 + +struct AstNode { + AstNodeKind kind; + const char* name; + Type* ty; + struct AstNode* __n1; + struct AstNode* __n2; + struct AstNode* __n3; + struct AstNode* __n4; + int __i1; + int __i2; +}; +typedef struct AstNode AstNode; + +struct Program { + AstNode* funcs; + AstNode* vars; + const char** str_literals; +}; +typedef struct Program Program; + +AstNode* ast_new(AstNodeKind kind) { + AstNode* ast = calloc(1, sizeof(AstNode)); + ast->kind = kind; + return ast; +} + +AstNode* ast_new_list(int capacity) { + if (capacity == 0) + unreachable(); + AstNode* list = ast_new(AstNodeKind_list); + list->node_cap = capacity; + list->node_len = 0; + list->node_items = calloc(list->node_cap, sizeof(AstNode)); + return list; +} + +void ast_append(AstNode* list, AstNode* item) { + if (list->kind != AstNodeKind_list) { + fatal_error("ast_append: ast is not a list"); + } + if (!item) { + return; + } + if (list->node_cap <= list->node_len) { + list->node_cap *= 2; + list->node_items = realloc(list->node_items, sizeof(AstNode) * list->node_cap); + memset(list->node_items + list->node_len, 0, sizeof(AstNode) * (list->node_cap - list->node_len)); + } + memcpy(list->node_items + list->node_len, item, sizeof(AstNode)); + ++list->node_len; +} + +AstNode* ast_new_int(int v) { + AstNode* e = ast_new(AstNodeKind_int_expr); + e->node_int_value = v; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_unary_expr(int op, AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_unary_expr); + e->node_op = op; + e->node_operand = operand; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_binary_expr); + e->node_op = op; + e->node_lhs = lhs; + e->node_rhs = rhs; + if (op == TokenKind_plus) { + if (lhs->ty->kind == TypeKind_ptr) { + e->ty = lhs->ty; + } else if (lhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(lhs->ty); + } else if (rhs->ty->kind == TypeKind_ptr) { + e->ty = rhs->ty; + } else if (rhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(rhs->ty); + } else { + e->ty = type_new(TypeKind_int); + } + } else if (op == TokenKind_minus) { + if (lhs->ty->kind == TypeKind_ptr) { + e->ty = lhs->ty; + } else if (lhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(lhs->ty); + } else { + e->ty = type_new(TypeKind_int); + } + } else { + e->ty = type_new(TypeKind_int); + } + return e; +} + +AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_assign_expr); + e->node_op = op; + e->node_lhs = lhs; + e->node_rhs = rhs; + e->ty = lhs->ty; + return e; +} + +AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) { + if (lhs->ty->base) { + rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); + } else if (rhs->ty->base) { + lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))); + } + return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs); +} + +AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) { + if (lhs->ty->base) { + rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); + } + return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs); +} + +AstNode* ast_new_ref_expr(AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_ref_expr); + e->node_operand = operand; + e->ty = type_new_ptr(operand->ty); + return e; +} + +AstNode* ast_new_deref_expr(AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_deref_expr); + e->node_operand = operand; + e->ty = operand->ty->base; + return e; +} + +AstNode* ast_new_member_access_expr(AstNode* obj, const char* name) { + AstNode* e = ast_new(AstNodeKind_deref_expr); + e->node_operand = ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->base, name))); + e->ty = type_member_typeof(obj->ty->base, name); + e->node_operand->ty = type_new_ptr(e->ty); + return e; +} + +int type_sizeof_struct(Type* ty) { + int next_offset = 0; + int struct_align = 0; + + for (int i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int size = type_sizeof(member->ty); + int align = type_alignof(member->ty); + + next_offset = to_aligned(next_offset, align); + next_offset += size; + if (struct_align < align) { + struct_align = align; + } + } + return to_aligned(next_offset, struct_align); +} + +int type_sizeof_union(Type* ty) { + int union_size = 0; + int union_align = 0; + + for (int i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int size = type_sizeof(member->ty); + int align = type_alignof(member->ty); + + size = to_aligned(size, align); + if (union_size < size) { + union_size = size; + } + if (union_align < align) { + union_align = align; + } + } + return to_aligned(union_size, union_align); +} + +int type_alignof_struct(Type* ty) { + int struct_align = 0; + + for (int i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int align = type_alignof(member->ty); + + if (struct_align < align) { + struct_align = align; + } + } + return struct_align; +} + +int type_alignof_union(Type* ty) { + int union_align = 0; + + for (int i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int align = type_alignof(member->ty); + + if (union_align < align) { + union_align = align; + } + } + return union_align; +} + +int type_offsetof(Type* ty, const char* name) { + if (ty->kind == TypeKind_union) { + return 0; + } + if (ty->kind != TypeKind_struct) { + fatal_error("type_offsetof: type is neither a struct nor a union"); + } + + int next_offset = 0; + + for (int i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + int size = type_sizeof(member->ty); + int align = type_alignof(member->ty); + + next_offset = to_aligned(next_offset, align); + if (strcmp(member->name, name) == 0) { + return next_offset; + } + next_offset += size; + } + + fatal_error("type_offsetof: member not found"); +} + +Type* type_member_typeof(Type* ty, const char* name) { + if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) { + fatal_error("type_member_typeof: type is neither a struct nor a union"); + } + + for (int i = 0; i < ty->def->node_members->node_len; ++i) { + AstNode* member = ty->def->node_members->node_items + i; + if (strcmp(member->name, name) == 0) { + return member->ty; + } + } + + fatal_error("type_offsetof: member not found"); +} diff --git a/src/codegen.c b/src/codegen.c new file mode 100644 index 0000000..a1167d4 --- /dev/null +++ b/src/codegen.c @@ -0,0 +1,595 @@ +enum GenMode { + GenMode_lval, + GenMode_rval, +}; +typedef enum GenMode GenMode; + +struct CodeGen { + int next_label; + int* loop_labels; + AstNode* current_func; +}; +typedef struct CodeGen CodeGen; + +CodeGen* codegen_new() { + CodeGen* g = calloc(1, sizeof(CodeGen)); + g->next_label = 1; + g->loop_labels = calloc(1024, sizeof(int)); + return g; +} + +int codegen_new_label(CodeGen* g) { + int new_label = g->next_label; + ++g->next_label; + return new_label; +} + +void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); +void codegen_stmt(CodeGen* g, AstNode* ast); + +const char* param_reg(int n) { + if (n == 0) { + return "rdi"; + } else if (n == 1) { + return "rsi"; + } else if (n == 2) { + return "rdx"; + } else if (n == 3) { + return "rcx"; + } else if (n == 4) { + return "r8"; + } else if (n == 5) { + return "r9"; + } else { + unreachable(); + } +} + +void codegen_func_prologue(CodeGen* g, AstNode* ast) { + printf(" push rbp\n"); + printf(" mov rbp, rsp\n"); + for (int i = 0; i < ast->node_params->node_len; ++i) { + printf(" push %s\n", param_reg(i)); + } + printf(" sub rsp, %d\n", ast->node_stack_size); +} + +void codegen_func_epilogue(CodeGen* g, AstNode* ast) { + printf(" mov rsp, rbp\n"); + printf(" pop rbp\n"); + printf(" ret\n"); +} + +void codegen_int_expr(CodeGen* g, AstNode* ast) { + printf(" push %d\n", ast->node_int_value); +} + +void codegen_str_expr(CodeGen* g, AstNode* ast) { + printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx); + printf(" push rax\n"); +} + +void codegen_unary_expr(CodeGen* g, AstNode* ast) { + codegen_expr(g, ast->node_operand, GenMode_rval); + if (ast->node_op == TokenKind_not) { + printf(" pop rax\n"); + printf(" mov rdi, 0\n"); + printf(" cmp rax, rdi\n"); + printf(" sete al\n"); + printf(" movzb rax, al\n"); + printf(" push rax\n"); + } else { + unreachable(); + } +} + +void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + codegen_expr(g, ast->node_operand, GenMode_lval); +} + +// "reg" stores the address of the expression to be pushed. +void codegen_push_expr(const char* reg, int size) { + if (size == 1) { + printf(" movsx %s, BYTE PTR [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else if (size == 2) { + printf(" movsx %s, WORD PTR [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else if (size == 4) { + printf(" movsxd %s, DWORD PTR [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else if (size == 8) { + printf(" mov %s, [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else { + // Perform bitwise copy. Use r10 register as temporary space. + // Note: rsp must be aligned to 8. + printf(" sub rsp, %d\n", to_aligned(size, 8)); + for (int i = 0; i < size; ++i) { + // Copy a sinle byte from the address that "reg" points to to the stack via r10 register. + printf(" mov r10b, [%s+%d]\n", reg, i); + printf(" mov [rsp+%d], r10b\n", i); + } + } +} + +void codegen_lval2rval(Type* ty) { + if (ty->kind == TypeKind_array) { + return; + } + + printf(" pop rax\n"); + codegen_push_expr("rax", type_sizeof(ty)); +} + +void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + codegen_expr(g, ast->node_operand, GenMode_rval); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(ast->node_operand->ty->base); + } +} + +void codegen_logical_expr(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + + if (ast->node_op == TokenKind_andand) { + codegen_expr(g, ast->node_lhs, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + codegen_expr(g, ast->node_rhs, GenMode_rval); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + printf(" push 0\n"); + printf(".Lend%d:\n", label); + } else { + codegen_expr(g, ast->node_lhs, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + printf(" push 1\n"); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + codegen_expr(g, ast->node_rhs, GenMode_rval); + printf(".Lend%d:\n", label); + } +} + +void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + codegen_expr(g, ast->node_lhs, gen_mode); + codegen_expr(g, ast->node_rhs, gen_mode); + printf(" pop rdi\n"); + printf(" pop rax\n"); + if (ast->node_op == TokenKind_plus) { + printf(" add rax, rdi\n"); + } else if (ast->node_op == TokenKind_minus) { + printf(" sub rax, rdi\n"); + } else if (ast->node_op == TokenKind_star) { + printf(" imul rax, rdi\n"); + } else if (ast->node_op == TokenKind_slash) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + } else if (ast->node_op == TokenKind_percent) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + printf(" mov rax, rdx\n"); + } else if (ast->node_op == TokenKind_or) { + printf(" or rax, rdi\n"); + } else if (ast->node_op == TokenKind_lshift) { + printf(" mov rcx, rdi\n"); + printf(" shl rax, cl\n"); + } else if (ast->node_op == TokenKind_rshift) { + // TODO: check if the operand is signed or unsigned + printf(" mov rcx, rdi\n"); + printf(" sar rax, cl\n"); + } else if (ast->node_op == TokenKind_eq) { + printf(" cmp rax, rdi\n"); + printf(" sete al\n"); + printf(" movzb rax, al\n"); + } else if (ast->node_op == TokenKind_ne) { + printf(" cmp rax, rdi\n"); + printf(" setne al\n"); + printf(" movzb rax, al\n"); + } else if (ast->node_op == TokenKind_lt) { + printf(" cmp rax, rdi\n"); + printf(" setl al\n"); + printf(" movzb rax, al\n"); + } else if (ast->node_op == TokenKind_le) { + printf(" cmp rax, rdi\n"); + printf(" setle al\n"); + printf(" movzb rax, al\n"); + } else { + unreachable(); + } + printf(" push rax\n"); +} + +void codegen_cond_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + int label = codegen_new_label(g); + + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + codegen_expr(g, ast->node_then, gen_mode); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + codegen_expr(g, ast->node_else, gen_mode); + printf(".Lend%d:\n", label); +} + +void codegen_assign_expr_helper(CodeGen* g, AstNode* ast) { + if (ast->node_op == TokenKind_assign) { + return; + } + + printf(" pop rdi\n"); + printf(" push [rsp]\n"); + codegen_lval2rval(ast->node_lhs->ty); + printf(" pop rax\n"); + + if (ast->node_op == TokenKind_assign_add) { + printf(" add rax, rdi\n"); + } else if (ast->node_op == TokenKind_assign_sub) { + printf(" sub rax, rdi\n"); + } else if (ast->node_op == TokenKind_assign_mul) { + printf(" imul rax, rdi\n"); + } else if (ast->node_op == TokenKind_assign_div) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + } else if (ast->node_op == TokenKind_assign_mod) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + printf(" mov rax, rdx\n"); + } else { + unreachable(); + } + + printf(" push rax\n"); +} + +void codegen_assign_expr(CodeGen* g, AstNode* ast) { + int sizeof_lhs = type_sizeof(ast->node_lhs->ty); + int sizeof_rhs = type_sizeof(ast->node_rhs->ty); + + codegen_expr(g, ast->node_lhs, GenMode_lval); + codegen_expr(g, ast->node_rhs, GenMode_rval); + codegen_assign_expr_helper(g, ast); + if (sizeof_lhs == 1) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov BYTE PTR [rax], dil\n"); + printf(" push rdi\n"); + } else if (sizeof_lhs == 2) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov WORD PTR [rax], di\n"); + printf(" push rdi\n"); + } else if (sizeof_lhs == 4) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov DWORD PTR [rax], edi\n"); + printf(" push rdi\n"); + } else if (sizeof_lhs == 8) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov [rax], rdi\n"); + printf(" push rdi\n"); + } else { + if (ast->node_op != TokenKind_assign) { + unimplemented(); + } + // Note: rsp must be aligned to 8. + int aligned_size = to_aligned(sizeof_lhs, 8); + printf(" mov rax, [rsp+%d]\n", aligned_size); + // Perform bitwise copy. Use r10 register as temporary space. + for (int i = 0; i < aligned_size; ++i) { + // Copy a sinle byte from the stack to the address that rax points to via r10 register. + printf(" mov r10b, [rsp+%d]\n", i); + printf(" mov [rax+%d], r10b\n", i); + } + // Pop the RHS value and the LHS address. + printf(" add rsp, %d\n", aligned_size + 8); + // TODO: dummy + printf(" push 0\n"); + } +} + +void codegen_func_call(CodeGen* g, AstNode* ast) { + const char* func_name = ast->name; + + if (strcmp(func_name, "__ducc_va_start") == 0) { + int stack_size = g->current_func->node_stack_size; + printf(" # __ducc_va_start BEGIN\n"); + for (int i = 0; i < 6; ++i) { + printf(" mov rax, %s\n", param_reg(i)); + printf(" mov [rbp-%d], rax\n", 8 + (stack_size - 4 - i) * 8); + } + AstNode* va_list_args = ast->node_args->node_items; + codegen_expr(g, va_list_args, GenMode_lval); + printf(" pop rdi\n"); + printf(" mov rax, rbp\n"); + printf(" sub rax, %d\n", 8 + (stack_size - 1) * 8); + printf(" mov [rdi], rax\n"); + printf(" mov DWORD PTR [rax], 8\n"); + printf(" mov DWORD PTR [rax+4], 0\n"); + printf(" mov QWORD PTR [rax+8], 0\n"); + printf(" mov rdi, rbp\n"); + printf(" sub rdi, %d\n", 8 + (stack_size - 4) * 8); + printf(" mov QWORD PTR [rax+16], rdi\n"); + printf(" # __ducc_va_start END\n"); + return; + } + + AstNode* args = ast->node_args; + for (int i = 0; i < args->node_len; ++i) { + AstNode* arg = args->node_items + i; + codegen_expr(g, arg, GenMode_rval); + } + for (int i = args->node_len - 1; i >= 0; --i) { + printf(" pop %s\n", param_reg(i)); + } + + int label = codegen_new_label(g); + + printf(" mov rax, rsp\n"); + printf(" and rax, 15\n"); + printf(" cmp rax, 0\n"); + printf(" je .Laligned%d\n", label); + + printf(" mov rax, 0\n"); + printf(" sub rsp, 8\n"); + printf(" call %s\n", func_name); + printf(" add rsp, 8\n"); + printf(" push rax\n"); + + printf(" jmp .Lend%d\n", label); + printf(".Laligned%d:\n", label); + + printf(" mov rax, 0\n"); + printf(" call %s\n", func_name); + printf(" push rax\n"); + + printf(".Lend%d:\n", label); +} + +void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) { + printf(" mov rax, rbp\n"); + printf(" sub rax, %d\n", ast->node_stack_offset); + printf(" push rax\n"); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(ast->ty); + } +} + +void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) { + printf(" lea rax, %s[rip]\n", ast->name); + printf(" push rax\n"); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(ast->ty); + } +} + +void codegen_composite_expr(CodeGen* g, AstNode* ast) { + // Standard C does not have composite expression, but ducc internally has. + for (int i = 0; i < ast->node_len; ++i) { + AstNode* expr = ast->node_items + i; + codegen_expr(g, expr, GenMode_rval); + if (i != ast->node_len - 1) { + // TODO: the expression on the stack can be more than 8 bytes. + printf(" pop rax\n"); + } + } +} + +void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + if (ast->kind == AstNodeKind_int_expr) { + codegen_int_expr(g, ast); + } else if (ast->kind == AstNodeKind_str_expr) { + codegen_str_expr(g, ast); + } else if (ast->kind == AstNodeKind_unary_expr) { + codegen_unary_expr(g, ast); + } else if (ast->kind == AstNodeKind_ref_expr) { + codegen_ref_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_deref_expr) { + codegen_deref_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_binary_expr) { + codegen_binary_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_cond_expr) { + codegen_cond_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_logical_expr) { + codegen_logical_expr(g, ast); + } else if (ast->kind == AstNodeKind_assign_expr) { + codegen_assign_expr(g, ast); + } else if (ast->kind == AstNodeKind_func_call) { + codegen_func_call(g, ast); + } else if (ast->kind == AstNodeKind_lvar) { + codegen_lvar(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_gvar) { + codegen_gvar(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_list) { + codegen_composite_expr(g, ast); + } else { + unreachable(); + } +} + +void codegen_return_stmt(CodeGen* g, AstNode* ast) { + if (ast->node_expr) { + codegen_expr(g, ast->node_expr, GenMode_rval); + printf(" pop rax\n"); + } + codegen_func_epilogue(g, ast); +} + +void codegen_if_stmt(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + codegen_stmt(g, ast->node_then); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + if (ast->node_else) { + codegen_stmt(g, ast->node_else); + } + printf(".Lend%d:\n", label); +} + +void codegen_for_stmt(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + if (ast->node_init) { + codegen_expr(g, ast->node_init, GenMode_rval); + printf(" pop rax\n"); + } + printf(".Lbegin%d:\n", label); + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lend%d\n", label); + codegen_stmt(g, ast->node_body); + printf(".Lcontinue%d:\n", label); + if (ast->node_update) { + codegen_expr(g, ast->node_update, GenMode_rval); + printf(" pop rax\n"); + } + printf(" jmp .Lbegin%d\n", label); + printf(".Lend%d:\n", label); + + --g->loop_labels; +} + +void codegen_do_while_stmt(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + printf(".Lbegin%d:\n", label); + codegen_stmt(g, ast->node_body); + printf(".Lcontinue%d:\n", label); + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lend%d\n", label); + printf(" jmp .Lbegin%d\n", label); + printf(".Lend%d:\n", label); + + --g->loop_labels; +} + +void codegen_break_stmt(CodeGen* g, AstNode* ast) { + int label = *g->loop_labels; + printf(" jmp .Lend%d\n", label); +} + +void codegen_continue_stmt(CodeGen* g, AstNode* ast) { + int label = *g->loop_labels; + printf(" jmp .Lcontinue%d\n", label); +} + +void codegen_expr_stmt(CodeGen* g, AstNode* ast) { + codegen_expr(g, ast->node_expr, GenMode_rval); + // TODO: the expression on the stack can be more than 8 bytes. + printf(" pop rax\n"); +} + +void codegen_var_decl(CodeGen* g, AstNode* ast) { +} + +void codegen_nop(CodeGen* g, AstNode* ast) { +} + +void codegen_block_stmt(CodeGen* g, AstNode* ast) { + for (int i = 0; i < ast->node_len; ++i) { + AstNode* stmt = ast->node_items + i; + codegen_stmt(g, stmt); + } +} + +void codegen_stmt(CodeGen* g, AstNode* ast) { + if (ast->kind == AstNodeKind_list) { + codegen_block_stmt(g, ast); + } else if (ast->kind == AstNodeKind_return_stmt) { + codegen_return_stmt(g, ast); + } else if (ast->kind == AstNodeKind_if_stmt) { + codegen_if_stmt(g, ast); + } else if (ast->kind == AstNodeKind_for_stmt) { + codegen_for_stmt(g, ast); + } else if (ast->kind == AstNodeKind_do_while_stmt) { + codegen_do_while_stmt(g, ast); + } else if (ast->kind == AstNodeKind_break_stmt) { + codegen_break_stmt(g, ast); + } else if (ast->kind == AstNodeKind_continue_stmt) { + codegen_continue_stmt(g, ast); + } else if (ast->kind == AstNodeKind_expr_stmt) { + codegen_expr_stmt(g, ast); + } else if (ast->kind == AstNodeKind_lvar_decl) { + codegen_var_decl(g, ast); + } else if (ast->kind == AstNodeKind_nop) { + codegen_nop(g, ast); + } else { + unreachable(); + } +} + +void codegen_func(CodeGen* g, AstNode* ast) { + g->current_func = ast; + printf("%s:\n", ast->name); + + codegen_func_prologue(g, ast); + codegen_stmt(g, ast->node_body); + if (strcmp(ast->name, "main") == 0) { + // C99: 5.1.2.2.3 + printf(" mov rax, 0\n"); + } + codegen_func_epilogue(g, ast); + + printf("\n"); + g->current_func = NULL; +} + +void codegen(Program* prog) { + CodeGen* g = codegen_new(); + + printf(".intel_syntax noprefix\n\n"); + + // For GNU ld: + // https://sourceware.org/binutils/docs/ld/Options.html + printf(".section .note.GNU-stack,\"\",@progbits\n\n"); + + printf(".section .rodata\n\n"); + for (int i = 0; prog->str_literals[i]; ++i) { + printf(".Lstr__%d:\n", i + 1); + printf(" .string \"%s\"\n\n", prog->str_literals[i]); + } + + printf(".data\n\n"); + for (int i = 0; i < prog->vars->node_len; ++i) { + AstNode* var = prog->vars->node_items + i; + if (var->node_expr) { + if (var->ty->kind == TypeKind_char) + printf(" %s: .byte %d\n", var->name, var->node_expr->node_int_value); + else if (var->ty->kind == TypeKind_short) + printf(" %s: .word %d\n", var->name, var->node_expr->node_int_value); + else if (var->ty->kind == TypeKind_int) + printf(" %s: .int %d\n", var->name, var->node_expr->node_int_value); + else + unimplemented(); + } else { + printf(" %s: .zero %d\n", var->name, type_sizeof(var->ty)); + } + } + + printf(".globl main\n\n"); + + printf(".text\n\n"); + for (int i = 0; i < prog->funcs->node_len; ++i) { + AstNode* func = prog->funcs->node_items + i; + codegen_func(g, func); + } +} diff --git a/src/common.c b/src/common.c new file mode 100644 index 0000000..1b89f8c --- /dev/null +++ b/src/common.c @@ -0,0 +1,39 @@ +void fatal_error(const char* msg, ...) { + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + fprintf(stderr, "\n"); + exit(1); +} + +#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__) + +#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__) + +struct StrBuilder { + size_t len; + size_t capacity; + char* buf; +}; +typedef struct StrBuilder StrBuilder; + +void strbuilder_init(StrBuilder* b) { + b->len = 0; + b->capacity = 16; + b->buf = calloc(b->capacity, sizeof(char)); +} + +// `size` must include a trailing null byte. +void strbuilder_reserve(StrBuilder* b, size_t size) { + if (size <= b->capacity) + return; + b->capacity *= 2; + b->buf = realloc(b->buf, b->capacity * sizeof(char)); + memset(b->buf + b->len, 0, (b->capacity - b->len) * sizeof(char)); +} + +void strbuilder_append_char(StrBuilder* b, int c) { + strbuilder_reserve(b, b->len + 1 + 1); + b->buf[b->len++] = c; +} diff --git a/src/io.c b/src/io.c new file mode 100644 index 0000000..5cd5e86 --- /dev/null +++ b/src/io.c @@ -0,0 +1,99 @@ +struct SourceLocation { + const char* filename; + int line; +}; +typedef struct SourceLocation SourceLocation; + +struct InFile { + const char* buf; + int pos; + SourceLocation loc; +}; +typedef struct InFile InFile; + +InFile* infile_open(const char* filename) { + FILE* in; + if (strcmp(filename, "-") == 0) { + in = stdin; + } else { + in = fopen(filename, "rb"); + } + if (!in) { + return NULL; + } + + size_t buf_size = 1024 * 10; + char* buf = calloc(buf_size, sizeof(char)); + char* cur = buf; + char* tmp = calloc(1024, sizeof(char)); + + while (fgets(tmp, 1024, in)) { + size_t len = strlen(tmp); + size_t used_size = cur - buf; + + if (buf_size <= used_size + len) { + size_t old_size = buf_size; + buf_size *= 2; + buf = realloc(buf, buf_size); + memset(buf + old_size, 0, buf_size - old_size); + cur = buf + used_size; + } + + memcpy(cur, tmp, len); + cur += len; + } + fclose(in); + + InFile* in_file = calloc(1, sizeof(InFile)); + in_file->buf = buf; + in_file->loc.filename = filename; + in_file->loc.line = 1; + return in_file; +} + +BOOL infile_eof(InFile* f) { + return f->buf[f->pos] == '\0'; +} + +char infile_peek_char(InFile* f) { + char c = f->buf[f->pos]; + + // Skip a pair of backslash and new-line. + if (c == '\\') { + char c2 = f->buf[f->pos + 1]; + // C23: 5.1.1.2 + // A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by + // a backslash character before any such splicing takes place. + if (c2 == '\0') { + fatal_error("%s:%d: <new-line> expected, but got <eof>", f->loc.filename, f->loc.line); + } + // TODO: crlf + if (c2 == '\r' || c2 == '\n') { + f->pos += 2; + return infile_peek_char(f); + } + } + + // Normalize new-line. + // TODO: crlf + if (c == '\r') + c = '\n'; + return c; +} + +char infile_next_char(InFile* f) { + char c = infile_peek_char(f); + ++f->pos; + if (c == '\n') + ++f->loc.line; + return c; +} + +BOOL infile_consume_if(InFile* f, char expected) { + if (infile_peek_char(f) == expected) { + infile_next_char(f); + return TRUE; + } else { + return FALSE; + } +} diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..8f06a83 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,1404 @@ +#define LVAR_MAX 32 + +struct LocalVar { + const char* name; + Type* ty; + int stack_offset; +}; +typedef struct LocalVar LocalVar; + +struct Scope { + struct Scope* outer; + const char** lvar_names; + int* lvar_indices; +}; +typedef struct Scope Scope; + +struct GlobalVar { + const char* name; + Type* ty; +}; +typedef struct GlobalVar GlobalVar; + +struct Func { + const char* name; + Type* ty; +}; +typedef struct Func Func; + +struct Parser { + TokenArray* tokens; + int pos; + LocalVar* lvars; + int n_lvars; + Scope* scope; + GlobalVar* gvars; + int n_gvars; + Func* funcs; + int n_funcs; + AstNode* structs; + int n_structs; + AstNode* unions; + int n_unions; + AstNode* enums; + int n_enums; + AstNode* typedefs; + int n_typedefs; + const char** str_literals; + int n_str_literals; +}; +typedef struct Parser Parser; + +Parser* parser_new(TokenArray* tokens) { + Parser* p = calloc(1, sizeof(Parser)); + p->tokens = tokens; + p->gvars = calloc(128, sizeof(GlobalVar)); + p->funcs = calloc(256, sizeof(Func)); + p->structs = calloc(64, sizeof(AstNode)); + p->unions = calloc(64, sizeof(AstNode)); + p->enums = calloc(16, sizeof(AstNode)); + p->typedefs = calloc(64, sizeof(AstNode)); + p->str_literals = calloc(1024, sizeof(char*)); + + p->funcs[p->n_funcs].name = "__ducc_va_start"; + p->funcs[p->n_funcs].ty = calloc(1, sizeof(Type)); + p->funcs[p->n_funcs].ty->kind = TypeKind_void; + ++p->n_funcs; + + return p; +} + +Token* peek_token(Parser* p) { + return &p->tokens->data[p->pos]; +} + +Token* next_token(Parser* p) { + return &p->tokens->data[p->pos++]; +} + +Token* consume_token_if(Parser* p, TokenKind expected) { + if (peek_token(p)->kind == expected) { + return next_token(p); + } else { + return NULL; + } +} + +BOOL eof(Parser* p) { + return peek_token(p)->kind != TokenKind_eof; +} + +Token* expect(Parser* p, TokenKind expected) { + Token* t = next_token(p); + if (t->kind == expected) { + return t; + } + fatal_error("%s:%d: expected '%s', but got '%s'", t->loc.filename, t->loc.line, token_kind_stringify(expected), + token_stringify(t)); +} + +int find_lvar_in_scope(Parser* p, Scope* scope, const char* name) { + for (int i = 0; i < LVAR_MAX; ++i) { + if (scope->lvar_names[i] && strcmp(scope->lvar_names[i], name) == 0) { + return scope->lvar_indices[i]; + } + } + return -1; +} + +int find_lvar_in_current_scope(Parser* p, const char* name) { + return find_lvar_in_scope(p, p->scope, name); +} + +int find_lvar(Parser* p, const char* name) { + Scope* scope = p->scope; + while (scope) { + int idx = find_lvar_in_scope(p, scope, name); + if (idx != -1) + return idx; + scope = scope->outer; + } + return -1; +} + +int calc_stack_offset(Parser* p, Type* ty, BOOL is_param) { + int align; + if (is_param) { + if (8 < type_sizeof(ty) || 8 < type_alignof(ty)) { + fatal_error("too large"); + } + align = 8; + } else { + align = type_alignof(ty); + } + + int offset; + if (p->n_lvars == 0) { + offset = 0; + } else { + offset = p->lvars[p->n_lvars - 1].stack_offset; + } + + offset += type_sizeof(ty); + return to_aligned(offset, align); +} + +int add_lvar(Parser* p, const char* name, Type* ty, BOOL is_param) { + int stack_offset = calc_stack_offset(p, ty, is_param); + p->lvars[p->n_lvars].name = name; + p->lvars[p->n_lvars].ty = ty; + p->lvars[p->n_lvars].stack_offset = stack_offset; + for (int i = 0; i < LVAR_MAX; ++i) { + if (p->scope->lvar_names[i] == NULL) { + p->scope->lvar_names[i] = name; + p->scope->lvar_indices[i] = p->n_lvars; + break; + } + } + ++p->n_lvars; + return stack_offset; +} + +AstNode* generate_temporary_lvar(Parser* p, Type* ty) { + int stack_offset = add_lvar(p, NULL, ty, FALSE); + AstNode* lvar = ast_new(AstNodeKind_lvar); + lvar->name = NULL; + lvar->node_stack_offset = stack_offset; + lvar->ty = ty; + return lvar; +} + +int find_gvar(Parser* p, const char* name) { + for (int i = 0; i < p->n_gvars; ++i) { + if (strcmp(p->gvars[i].name, name) == 0) { + return i; + } + } + return -1; +} + +int find_func(Parser* p, const char* name) { + for (int i = 0; i < p->n_funcs; ++i) { + if (strcmp(p->funcs[i].name, name) == 0) { + return i; + } + } + return -1; +} + +int find_struct(Parser* p, const char* name) { + for (int i = 0; i < p->n_structs; ++i) { + if (strcmp(p->structs[i].name, name) == 0) { + return i; + } + } + return -1; +} + +int find_union(Parser* p, const char* name) { + for (int i = 0; i < p->n_unions; ++i) { + if (strcmp(p->unions[i].name, name) == 0) { + return i; + } + } + return -1; +} + +int find_enum(Parser* p, const char* name) { + for (int i = 0; i < p->n_enums; ++i) { + if (strcmp(p->enums[i].name, name) == 0) { + return i; + } + } + return -1; +} + +int find_enum_member(Parser* p, const char* name) { + for (int i = 0; i < p->n_enums; ++i) { + for (int j = 0; j < p->enums[i].node_members->node_len; ++j) { + if (strcmp(p->enums[i].node_members->node_items[j].name, name) == 0) { + return i * 1000 + j; + } + } + } + return -1; +} + +int find_typedef(Parser* p, const char* name) { + for (int i = 0; i < p->n_typedefs; ++i) { + if (strcmp(p->typedefs[i].name, name) == 0) { + return i; + } + } + return -1; +} + +void enter_scope(Parser* p) { + Scope* outer_scope = p->scope; + p->scope = calloc(1, sizeof(Scope)); + p->scope->outer = outer_scope; + p->scope->lvar_names = calloc(LVAR_MAX, sizeof(char*)); + p->scope->lvar_indices = calloc(LVAR_MAX, sizeof(int)); +} + +void leave_scope(Parser* p) { + p->scope = p->scope->outer; +} + +void enter_func(Parser* p) { + p->lvars = calloc(LVAR_MAX, sizeof(LocalVar)); + p->n_lvars = 0; + enter_scope(p); +} + +void leave_func(Parser* p) { + leave_scope(p); +} + +AstNode* parse_assignment_expr(Parser* p); +AstNode* parse_expr(Parser* p); +AstNode* parse_stmt(Parser* p); + +const char* parse_ident(Parser* p) { + return expect(p, TokenKind_ident)->value.string; +} + +int register_str_literal(Parser* p, const char* s) { + p->str_literals[p->n_str_literals] = s; + ++p->n_str_literals; + return p->n_str_literals; +} + +AstNode* parse_primary_expr(Parser* p) { + Token* t = next_token(p); + if (t->kind == TokenKind_literal_int) { + return ast_new_int(t->value.integer); + } else if (t->kind == TokenKind_literal_str) { + AstNode* e = ast_new(AstNodeKind_str_expr); + e->node_idx = register_str_literal(p, t->value.string); + e->ty = type_new_static_string(strlen(t->value.string)); + return e; + } else if (t->kind == TokenKind_paren_l) { + AstNode* e = parse_expr(p); + expect(p, TokenKind_paren_r); + return e; + } else if (t->kind == TokenKind_ident) { + const char* name = t->value.string; + + if (peek_token(p)->kind == TokenKind_paren_l) { + AstNode* e = ast_new(AstNodeKind_func_call); + int func_idx = find_func(p, name); + if (func_idx == -1) { + fatal_error("undefined function: %s", name); + } + e->name = name; + e->ty = p->funcs[func_idx].ty; + return e; + } + + int lvar_idx = find_lvar(p, name); + if (lvar_idx == -1) { + int gvar_idx = find_gvar(p, name); + if (gvar_idx == -1) { + int enum_member_idx = find_enum_member(p, name); + if (enum_member_idx == -1) { + fatal_error("undefined variable: %s", name); + } + int enum_idx = enum_member_idx / 1000; + int n = enum_member_idx % 1000; + AstNode* e = ast_new_int(p->enums[enum_idx].node_members->node_items[n].node_int_value); + e->ty = type_new(TypeKind_enum); + e->ty->def = p->enums + enum_idx; + return e; + } + AstNode* e = ast_new(AstNodeKind_gvar); + e->name = name; + e->ty = p->gvars[gvar_idx].ty; + return e; + } + + AstNode* e = ast_new(AstNodeKind_lvar); + e->name = name; + e->node_stack_offset = p->lvars[lvar_idx].stack_offset; + e->ty = p->lvars[lvar_idx].ty; + return e; + } else { + fatal_error("%s:%d: expected an expression, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t)); + } +} + +AstNode* parse_arg_list(Parser* p) { + AstNode* list = ast_new_list(6); + while (peek_token(p)->kind != TokenKind_paren_r) { + AstNode* arg = parse_assignment_expr(p); + ast_append(list, arg); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + if (list->node_len > 6) { + fatal_error("too many arguments"); + } + return list; +} + +// e++ +// tmp1 = &e; tmp2 = *tmp1; *tmp1 += 1; tmp2 +// e-- +// tmp1 = &e; tmp2 = *tmp1; *tmp1 -= 1; tmp2 +AstNode* create_new_postfix_inc_or_dec(Parser* p, AstNode* e, TokenKind op) { + AstNode* tmp1_lvar = generate_temporary_lvar(p, type_new_ptr(e->ty)); + AstNode* tmp2_lvar = generate_temporary_lvar(p, e->ty); + + AstNode* expr1 = ast_new_assign_expr(TokenKind_assign, tmp1_lvar, ast_new_ref_expr(e)); + AstNode* expr2 = ast_new_assign_expr(TokenKind_assign, tmp2_lvar, ast_new_deref_expr(tmp1_lvar)); + AstNode* expr3; + if (op == TokenKind_plusplus) { + expr3 = ast_new_assign_add_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); + } else { + expr3 = ast_new_assign_sub_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); + } + AstNode* expr4 = tmp2_lvar; + + AstNode* ret = ast_new_list(4); + ast_append(ret, expr1); + ast_append(ret, expr2); + ast_append(ret, expr3); + ast_append(ret, expr4); + ret->ty = expr4->ty; + return ret; +} + +AstNode* parse_postfix_expr(Parser* p) { + AstNode* ret = parse_primary_expr(p); + while (1) { + TokenKind tk = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_paren_l)) { + AstNode* args = parse_arg_list(p); + expect(p, TokenKind_paren_r); + ret->node_args = args; + } else if (consume_token_if(p, TokenKind_bracket_l)) { + AstNode* idx = parse_expr(p); + expect(p, TokenKind_bracket_r); + idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->base))); + ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx)); + } else if (consume_token_if(p, TokenKind_dot)) { + const char* name = parse_ident(p); + ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name); + } else if (consume_token_if(p, TokenKind_arrow)) { + const char* name = parse_ident(p); + ret = ast_new_member_access_expr(ret, name); + } else if (consume_token_if(p, TokenKind_plusplus)) { + ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_plusplus); + } else if (consume_token_if(p, TokenKind_minusminus)) { + ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_minusminus); + } else { + break; + } + } + return ret; +} + +BOOL is_type_token(Parser* p, Token* token) { + if (token->kind == TokenKind_keyword_int || token->kind == TokenKind_keyword_short || + token->kind == TokenKind_keyword_long || token->kind == TokenKind_keyword_char || + token->kind == TokenKind_keyword_void || token->kind == TokenKind_keyword_enum || + token->kind == TokenKind_keyword_struct || token->kind == TokenKind_keyword_union || + token->kind == TokenKind_keyword_const) { + return TRUE; + } + if (token->kind != TokenKind_ident) { + return FALSE; + } + return find_typedef(p, token->value.string) != -1; +} + +Type* parse_type(Parser* p) { + Token* t = next_token(p); + if (t->kind == TokenKind_keyword_const) { + t = next_token(p); + } + if (!is_type_token(p, t)) { + fatal_error("%s:%d: parse_type: expected type, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t)); + } + Type* ty; + if (t->kind == TokenKind_ident) { + int typedef_idx = find_typedef(p, t->value.string); + if (typedef_idx == -1) { + fatal_error("parse_type: unknown typedef, %s", t->value.string); + } + ty = p->typedefs[typedef_idx].ty; + } else { + ty = type_new(TypeKind_unknown); + if (t->kind == TokenKind_keyword_char) { + ty->kind = TypeKind_char; + } else if (t->kind == TokenKind_keyword_short) { + ty->kind = TypeKind_short; + } else if (t->kind == TokenKind_keyword_int) { + ty->kind = TypeKind_int; + } else if (t->kind == TokenKind_keyword_long) { + ty->kind = TypeKind_long; + } else if (t->kind == TokenKind_keyword_void) { + ty->kind = TypeKind_void; + } else if (t->kind == TokenKind_keyword_enum) { + ty->kind = TypeKind_enum; + const char* name = parse_ident(p); + int enum_idx = find_enum(p, name); + if (enum_idx == -1) { + fatal_error("parse_type: unknown enum, %s", name); + } + ty->def = p->enums + enum_idx; + } else if (t->kind == TokenKind_keyword_struct) { + ty->kind = TypeKind_struct; + const char* name = parse_ident(p); + int struct_idx = find_struct(p, name); + if (struct_idx == -1) { + fatal_error("parse_type: unknown struct, %s", name); + } + ty->def = p->structs + struct_idx; + } else if (t->kind == TokenKind_keyword_union) { + ty->kind = TypeKind_union; + const char* name = parse_ident(p); + int union_idx = find_union(p, name); + if (union_idx == -1) { + fatal_error("parse_type: unknown union, %s", name); + } + ty->def = p->unions + union_idx; + } else { + unreachable(); + } + } + while (1) { + if (!consume_token_if(p, TokenKind_star)) { + break; + } + ty = type_new_ptr(ty); + } + return ty; +} + +AstNode* parse_prefix_expr(Parser* p) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_minus)) { + AstNode* operand = parse_prefix_expr(p); + return ast_new_binary_expr(op, ast_new_int(0), operand); + } else if (consume_token_if(p, TokenKind_not)) { + AstNode* operand = parse_prefix_expr(p); + return ast_new_unary_expr(op, operand); + } else if (consume_token_if(p, TokenKind_and)) { + AstNode* operand = parse_prefix_expr(p); + return ast_new_ref_expr(operand); + } else if (consume_token_if(p, TokenKind_star)) { + AstNode* operand = parse_prefix_expr(p); + return ast_new_deref_expr(operand); + } else if (consume_token_if(p, TokenKind_plusplus)) { + AstNode* operand = parse_prefix_expr(p); + return ast_new_assign_add_expr(operand, ast_new_int(1)); + } else if (consume_token_if(p, TokenKind_minusminus)) { + AstNode* operand = parse_prefix_expr(p); + return ast_new_assign_sub_expr(operand, ast_new_int(1)); + } else if (consume_token_if(p, TokenKind_keyword_sizeof)) { + expect(p, TokenKind_paren_l); + Token* next_tok = peek_token(p); + Type* ty = NULL; + if (next_tok->kind == TokenKind_ident) { + int lvar_idx = find_lvar(p, next_tok->value.string); + if (lvar_idx != -1) { + next_token(p); + ty = p->lvars[lvar_idx].ty; + } + int gvar_idx = find_gvar(p, next_tok->value.string); + if (gvar_idx != -1) { + next_token(p); + ty = p->gvars[gvar_idx].ty; + } + } + if (!ty) { + ty = parse_type(p); + } + expect(p, TokenKind_paren_r); + return ast_new_int(type_sizeof(ty)); + } + return parse_postfix_expr(p); +} + +AstNode* parse_multiplicative_expr(Parser* p) { + AstNode* lhs = parse_prefix_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) { + next_token(p); + AstNode* rhs = parse_prefix_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +AstNode* parse_additive_expr(Parser* p) { + AstNode* lhs = parse_multiplicative_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_plus)) { + AstNode* rhs = parse_multiplicative_expr(p); + if (lhs->ty->base) { + lhs = ast_new_binary_expr( + TokenKind_plus, lhs, + ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); + } else if (rhs->ty->base) { + lhs = ast_new_binary_expr( + TokenKind_plus, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))), + rhs); + } else { + lhs = ast_new_binary_expr(TokenKind_plus, lhs, rhs); + } + } else if (consume_token_if(p, TokenKind_minus)) { + AstNode* rhs = parse_multiplicative_expr(p); + if (lhs->ty->base) { + if (rhs->ty->base) { + // (a - b) / sizeof(a) + lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(TokenKind_minus, lhs, rhs), + ast_new_int(type_sizeof(lhs->ty->base))); + } else { + // a - b*sizeof(a) + lhs = ast_new_binary_expr( + TokenKind_minus, lhs, + ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); + } + } else { + lhs = ast_new_binary_expr(TokenKind_minus, lhs, rhs); + } + } else { + break; + } + } + return lhs; +} + +AstNode* parse_shift_expr(Parser* p) { + AstNode* lhs = parse_additive_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_lshift || op == TokenKind_rshift) { + next_token(p); + AstNode* rhs = parse_additive_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +AstNode* parse_relational_expr(Parser* p) { + AstNode* lhs = parse_shift_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_lt || op == TokenKind_le) { + next_token(p); + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else if (consume_token_if(p, TokenKind_gt)) { + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs); + } else if (consume_token_if(p, TokenKind_ge)) { + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs); + } else { + break; + } + } + return lhs; +} + +AstNode* parse_equality_expr(Parser* p) { + AstNode* lhs = parse_relational_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_eq || op == TokenKind_ne) { + next_token(p); + AstNode* rhs = parse_relational_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +AstNode* parse_bitwise_or_expr(Parser* p) { + AstNode* lhs = parse_equality_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_or)) { + AstNode* rhs = parse_equality_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +AstNode* parse_logical_and_expr(Parser* p) { + AstNode* lhs = parse_bitwise_or_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_andand)) { + AstNode* rhs = parse_bitwise_or_expr(p); + AstNode* e = ast_new(AstNodeKind_logical_expr); + e->node_op = op; + e->node_lhs = lhs; + e->node_rhs = rhs; + e->ty = type_new(TypeKind_int); + lhs = e; + } else { + break; + } + } + return lhs; +} + +AstNode* parse_logical_or_expr(Parser* p) { + AstNode* lhs = parse_logical_and_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_oror)) { + AstNode* rhs = parse_logical_and_expr(p); + AstNode* e = ast_new(AstNodeKind_logical_expr); + e->node_op = op; + e->node_lhs = lhs; + e->node_rhs = rhs; + e->ty = type_new(TypeKind_int); + lhs = e; + } else { + break; + } + } + return lhs; +} + +AstNode* parse_conditional_expr(Parser* p) { + AstNode* e = parse_logical_or_expr(p); + if (consume_token_if(p, TokenKind_question)) { + AstNode* then_expr = parse_expr(p); + expect(p, TokenKind_colon); + AstNode* else_expr = parse_assignment_expr(p); + AstNode* ret = ast_new(AstNodeKind_cond_expr); + ret->node_cond = e; + ret->node_then = then_expr; + ret->node_else = else_expr; + ret->ty = then_expr->ty; + return ret; + } else { + return e; + } +} + +// constant-expression: +// conditional-expression +AstNode* parse_constant_expression(Parser* p) { + return parse_conditional_expr(p); +} + +AstNode* parse_assignment_expr(Parser* p) { + AstNode* lhs = parse_conditional_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + // TODO: check if the lhs is unary expression. + if (op == TokenKind_assign || op == TokenKind_assign_mul || op == TokenKind_assign_div || + op == TokenKind_assign_mod) { + next_token(p); + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_expr(op, lhs, rhs); + } else if (consume_token_if(p, TokenKind_assign_add)) { + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_add_expr(lhs, rhs); + } else if (consume_token_if(p, TokenKind_assign_sub)) { + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_sub_expr(lhs, rhs); + } else { + break; + } + } + return lhs; +} + +AstNode* parse_comma_expr(Parser* p) { + AstNode* lhs = parse_assignment_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_comma)) { + AstNode* rhs = parse_assignment_expr(p); + AstNode* list = ast_new_list(2); + ast_append(list, lhs); + ast_append(list, rhs); + list->ty = rhs->ty; + lhs = list; + } else { + break; + } + } + return lhs; +} + +AstNode* parse_expr(Parser* p) { + return parse_comma_expr(p); +} + +AstNode* parse_return_stmt(Parser* p) { + expect(p, TokenKind_keyword_return); + if (consume_token_if(p, TokenKind_semicolon)) { + return ast_new(AstNodeKind_return_stmt); + } + + AstNode* expr = parse_expr(p); + expect(p, TokenKind_semicolon); + + AstNode* ret = ast_new(AstNodeKind_return_stmt); + ret->node_expr = expr; + return ret; +} + +AstNode* parse_if_stmt(Parser* p) { + expect(p, TokenKind_keyword_if); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + AstNode* then_body = parse_stmt(p); + AstNode* else_body = NULL; + if (consume_token_if(p, TokenKind_keyword_else)) { + else_body = parse_stmt(p); + } + + AstNode* stmt = ast_new(AstNodeKind_if_stmt); + stmt->node_cond = cond; + stmt->node_then = then_body; + stmt->node_else = else_body; + return stmt; +} + +// pointer: +// '*' TODO attribute-specifier-sequence_opt TODO type-qualifier-list_opt +// '*' TODO attribute-specifier-sequence_opt TODO type-qualifier-list_opt pointer +Type* parse_pointer_opt(Parser* p, Type* ty) { + while (peek_token(p)->kind == TokenKind_star) { + next_token(p); + ty = type_new_ptr(ty); + } + return ty; +} + +// array-declarator: +// direct-declarator '[' TODO type-qualifier-list_opt TODO assignment-expression_opt ']' +// TODO direct-declarator '[' 'static' type-qualifier-list_opt assignment-expression_opt ']' +// TODO direct-declarator '[' type-qualifier-list 'static' assignment-expression_opt ']' +// TODO direct-declarator '[' type-qualifier-list_opt '*' ']' +Type* parse_array_declarator_suffix(Parser* p, Type* ty) { + next_token(p); // skip '[' + + AstNode* size_expr = parse_expr(p); + if (size_expr->kind != AstNodeKind_int_expr) { + fatal_error("parse_var_decl: invalid array size"); + } + int size = size_expr->node_int_value; + expect(p, TokenKind_bracket_r); + + return type_new_array(ty, size); +} + +// direct-declarator: +// identifier TODO attribute-specifier-sequence_opt +// TODO '(' declarator ')' +// array-declarator TODO attribute-specifier-sequence_opt +// TODO function-declarator TODO attribute-specifier-sequence_opt +AstNode* parse_direct_declarator(Parser* p, Type* ty) { + const char* name = parse_ident(p); + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + ty = parse_array_declarator_suffix(p, ty); + } else { + break; + } + } + + AstNode* ret = ast_new(AstNodeKind_declarator); + ret->name = name; + ret->ty = ty; + return ret; +} + +// declarator: +// pointer_opt direct-declarator +AstNode* parse_declarator(Parser* p, Type* ty) { + ty = parse_pointer_opt(p, ty); + return parse_direct_declarator(p, ty); +} + +// initializer: +// assignment-expression +// TODO braced-initializer +AstNode* parse_initializer(Parser* p) { + return parse_assignment_expr(p); +} + +// init-declarator: +// declarator +// declarator '=' initializer +AstNode* parse_init_declarator(Parser* p, Type* ty) { + AstNode* decl = parse_declarator(p, ty); + AstNode* init = NULL; + if (consume_token_if(p, TokenKind_assign)) { + init = parse_initializer(p); + } + decl->node_init = init; + return decl; +} + +// init-declarator-list: +// init-declarator +// init-declarator-list ',' init-declarator +AstNode* parse_init_declarator_list(Parser* p, Type* ty) { + AstNode* list = ast_new_list(1); + while (1) { + AstNode* d = parse_init_declarator(p, ty); + ast_append(list, d); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + return list; +} + +AstNode* parse_var_decl(Parser* p) { + Type* base_ty = parse_type(p); + if (type_is_unsized(base_ty)) { + fatal_error("parse_var_decl: invalid type for variable"); + } + + AstNode* decls = parse_init_declarator_list(p, base_ty); + expect(p, TokenKind_semicolon); + + for (int i = 0; i < decls->node_len; ++i) { + AstNode* decl = &decls->node_items[i]; + + if (find_lvar_in_current_scope(p, decl->name) != -1) { + // TODO: use name's location. + fatal_error("%s:%d: '%s' redeclared", peek_token(p)->loc.filename, peek_token(p)->loc.line, decl->name); + } + int stack_offset = add_lvar(p, decl->name, decl->ty, FALSE); + + if (decl->node_init) { + AstNode* lhs = ast_new(AstNodeKind_lvar); + lhs->name = decl->name; + lhs->node_stack_offset = stack_offset; + lhs->ty = decl->ty; + AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, decl->node_init); + decl->kind = AstNodeKind_expr_stmt; + decl->node_expr = assign; + } else { + decl->kind = AstNodeKind_nop; + } + } + return decls; +} + +AstNode* parse_for_stmt(Parser* p) { + expect(p, TokenKind_keyword_for); + expect(p, TokenKind_paren_l); + AstNode* init = NULL; + AstNode* cond = NULL; + AstNode* update = NULL; + enter_scope(p); + if (peek_token(p)->kind != TokenKind_semicolon) { + if (is_type_token(p, peek_token(p))) { + init = parse_var_decl(p)->node_items[0].node_expr; + } else { + init = parse_expr(p); + expect(p, TokenKind_semicolon); + } + } else { + expect(p, TokenKind_semicolon); + } + if (peek_token(p)->kind != TokenKind_semicolon) { + cond = parse_expr(p); + } else { + cond = ast_new_int(1); + } + expect(p, TokenKind_semicolon); + if (peek_token(p)->kind != TokenKind_paren_r) { + update = parse_expr(p); + } + expect(p, TokenKind_paren_r); + AstNode* body = parse_stmt(p); + leave_scope(p); + + AstNode* stmt = ast_new(AstNodeKind_for_stmt); + stmt->node_cond = cond; + stmt->node_init = init; + stmt->node_update = update; + stmt->node_body = body; + return stmt; +} + +AstNode* parse_while_stmt(Parser* p) { + expect(p, TokenKind_keyword_while); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + AstNode* body = parse_stmt(p); + + AstNode* stmt = ast_new(AstNodeKind_for_stmt); + stmt->node_cond = cond; + stmt->node_body = body; + return stmt; +} + +AstNode* parse_do_while_stmt(Parser* p) { + expect(p, TokenKind_keyword_do); + AstNode* body = parse_stmt(p); + expect(p, TokenKind_keyword_while); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + expect(p, TokenKind_semicolon); + + AstNode* stmt = ast_new(AstNodeKind_do_while_stmt); + stmt->node_cond = cond; + stmt->node_body = body; + return stmt; +} + +AstNode* parse_break_stmt(Parser* p) { + expect(p, TokenKind_keyword_break); + expect(p, TokenKind_semicolon); + return ast_new(AstNodeKind_break_stmt); +} + +AstNode* parse_continue_stmt(Parser* p) { + expect(p, TokenKind_keyword_continue); + expect(p, TokenKind_semicolon); + return ast_new(AstNodeKind_continue_stmt); +} + +AstNode* parse_expr_stmt(Parser* p) { + AstNode* e = parse_expr(p); + expect(p, TokenKind_semicolon); + AstNode* stmt = ast_new(AstNodeKind_expr_stmt); + stmt->node_expr = e; + return stmt; +} + +AstNode* parse_block_stmt(Parser* p) { + AstNode* list = ast_new_list(4); + expect(p, TokenKind_brace_l); + enter_scope(p); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* stmt = parse_stmt(p); + ast_append(list, stmt); + } + leave_scope(p); + expect(p, TokenKind_brace_r); + return list; +} + +AstNode* parse_empty_stmt(Parser* p) { + consume_token_if(p, TokenKind_semicolon); + return ast_new(AstNodeKind_nop); +} + +AstNode* parse_stmt(Parser* p) { + Token* t = peek_token(p); + if (t->kind == TokenKind_keyword_return) { + return parse_return_stmt(p); + } else if (t->kind == TokenKind_keyword_if) { + return parse_if_stmt(p); + } else if (t->kind == TokenKind_keyword_for) { + return parse_for_stmt(p); + } else if (t->kind == TokenKind_keyword_while) { + return parse_while_stmt(p); + } else if (t->kind == TokenKind_keyword_do) { + return parse_do_while_stmt(p); + } else if (t->kind == TokenKind_keyword_break) { + return parse_break_stmt(p); + } else if (t->kind == TokenKind_keyword_continue) { + return parse_continue_stmt(p); + } else if (t->kind == TokenKind_brace_l) { + return parse_block_stmt(p); + } else if (t->kind == TokenKind_semicolon) { + return parse_empty_stmt(p); + } else if (is_type_token(p, t)) { + return parse_var_decl(p); + } else { + return parse_expr_stmt(p); + } +} + +void register_params(Parser* p, AstNode* params) { + for (int i = 0; i < params->node_len; ++i) { + AstNode* param = params->node_items + i; + add_lvar(p, param->name, param->ty, TRUE); + } +} + +void register_func(Parser* p, const char* name, Type* ty) { + p->funcs[p->n_funcs].name = name; + p->funcs[p->n_funcs].ty = ty; + ++p->n_funcs; +} + +AstNode* parse_param(Parser* p) { + Type* ty = parse_type(p); + const char* name = NULL; + TokenKind tk = peek_token(p)->kind; + if (tk != TokenKind_comma && tk != TokenKind_paren_r) { + name = parse_ident(p); + } + AstNode* param = ast_new(AstNodeKind_param); + param->ty = ty; + if (name) { + param->name = name; + } + return param; +} + +AstNode* parse_param_list(Parser* p) { + BOOL has_void = FALSE; + AstNode* list = ast_new_list(6); + while (peek_token(p)->kind != TokenKind_paren_r) { + if (consume_token_if(p, TokenKind_ellipsis)) { + break; + } + AstNode* param = parse_param(p); + has_void = has_void || param->ty->kind == TypeKind_void; + ast_append(list, param); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + if (list->node_len > 6) { + fatal_error("too many parameters"); + } + if (has_void) { + if (list->node_len != 1) { + fatal_error("invalid use of void param"); + } + list->node_len = 0; + } + return list; +} + +AstNode* parse_global_var_decl(Parser* p) { + Type* base_ty = parse_type(p); + if (type_is_unsized(base_ty)) { + fatal_error("parse_global_var_decl: invalid type for variable"); + } + + AstNode* decls = parse_init_declarator_list(p, base_ty); + expect(p, TokenKind_semicolon); + + for (int i = 0; i < decls->node_len; ++i) { + AstNode* decl = &decls->node_items[i]; + + if (find_gvar(p, decl->name) != -1) { + fatal_error("parse_global_var_decl: %s redeclared", decl->name); + } + p->gvars[p->n_gvars].name = decl->name; + p->gvars[p->n_gvars].ty = decl->ty; + ++p->n_gvars; + + decl->kind = AstNodeKind_gvar_decl; + decl->node_expr = decl->node_init; + } + return decls; +} + +AstNode* parse_func_decl_or_def(Parser* p) { + // TODO: remove it. + int start_pos = p->pos; + + Type* ty = parse_type(p); + const char* name = parse_ident(p); + + if (peek_token(p)->kind != TokenKind_paren_l) { + // TODO: avoid backtracking + p->pos = start_pos; + return parse_global_var_decl(p); + } + + expect(p, TokenKind_paren_l); + register_func(p, name, ty); + AstNode* params = parse_param_list(p); + expect(p, TokenKind_paren_r); + if (consume_token_if(p, TokenKind_semicolon)) { + return ast_new(AstNodeKind_func_decl); + } + enter_func(p); + register_params(p, params); + AstNode* body = parse_block_stmt(p); + leave_func(p); + AstNode* func = ast_new(AstNodeKind_func_def); + func->ty = ty; + func->name = name; + func->node_params = params; + func->node_body = body; + if (p->n_lvars == 0) { + func->node_stack_size = 0; + } else { + func->node_stack_size = p->lvars[p->n_lvars - 1].stack_offset + type_sizeof(p->lvars[p->n_lvars - 1].ty); + } + return func; +} + +AstNode* parse_struct_member(Parser* p) { + Type* ty = parse_type(p); + const char* name = parse_ident(p); + expect(p, TokenKind_semicolon); + AstNode* member = ast_new(AstNodeKind_struct_member); + member->name = name; + member->ty = ty; + return member; +} + +AstNode* parse_struct_members(Parser* p) { + AstNode* list = ast_new_list(4); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* member = parse_struct_member(p); + ast_append(list, member); + } + return list; +} + +AstNode* parse_struct_decl_or_def(Parser* p) { + expect(p, TokenKind_keyword_struct); + const char* name = parse_ident(p); + + if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) { + p->pos = p->pos - 2; + return parse_func_decl_or_def(p); + } + + int struct_idx = find_struct(p, name); + if (struct_idx == -1) { + struct_idx = p->n_structs; + p->structs[struct_idx].kind = AstNodeKind_struct_def; + p->structs[struct_idx].name = name; + ++p->n_structs; + } + if (consume_token_if(p, TokenKind_semicolon)) { + return ast_new(AstNodeKind_struct_decl); + } + if (p->structs[struct_idx].node_members) { + fatal_error("parse_struct_decl_or_def: struct %s redefined", name); + } + expect(p, TokenKind_brace_l); + AstNode* members = parse_struct_members(p); + expect(p, TokenKind_brace_r); + expect(p, TokenKind_semicolon); + p->structs[struct_idx].node_members = members; + return p->structs + struct_idx; +} + +AstNode* parse_union_member(Parser* p) { + Type* ty = parse_type(p); + const char* name = parse_ident(p); + expect(p, TokenKind_semicolon); + AstNode* member = ast_new(AstNodeKind_union_member); + member->name = name; + member->ty = ty; + return member; +} + +AstNode* parse_union_members(Parser* p) { + AstNode* list = ast_new_list(4); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* member = parse_union_member(p); + ast_append(list, member); + } + return list; +} + +AstNode* parse_union_decl_or_def(Parser* p) { + expect(p, TokenKind_keyword_union); + const char* name = parse_ident(p); + + if (peek_token(p)->kind != TokenKind_semicolon && peek_token(p)->kind != TokenKind_brace_l) { + p->pos = p->pos - 2; + return parse_func_decl_or_def(p); + } + + int union_idx = find_union(p, name); + if (union_idx == -1) { + union_idx = p->n_unions; + p->unions[union_idx].kind = AstNodeKind_union_def; + p->unions[union_idx].name = name; + ++p->n_unions; + } + if (consume_token_if(p, TokenKind_semicolon)) { + return ast_new(AstNodeKind_union_decl); + } + if (p->unions[union_idx].node_members) { + fatal_error("parse_union_decl_or_def: union %s redefined", name); + } + expect(p, TokenKind_brace_l); + AstNode* members = parse_union_members(p); + expect(p, TokenKind_brace_r); + expect(p, TokenKind_semicolon); + p->unions[union_idx].node_members = members; + return p->unions + union_idx; +} + +AstNode* parse_enum_member(Parser* p) { + const char* name = parse_ident(p); + AstNode* member = ast_new(AstNodeKind_enum_member); + member->name = name; + return member; +} + +AstNode* parse_enum_members(Parser* p) { + int next_value = 0; + AstNode* list = ast_new_list(16); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* member = parse_enum_member(p); + member->node_int_value = next_value; + ++next_value; + ast_append(list, member); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + return list; +} + +AstNode* parse_enum_def(Parser* p) { + expect(p, TokenKind_keyword_enum); + const char* name = parse_ident(p); + + if (peek_token(p)->kind != TokenKind_brace_l) { + p->pos = p->pos - 2; + return parse_func_decl_or_def(p); + } + + int enum_idx = find_enum(p, name); + if (enum_idx == -1) { + enum_idx = p->n_enums; + p->enums[enum_idx].kind = AstNodeKind_enum_def; + p->enums[enum_idx].name = name; + ++p->n_enums; + } else { + fatal_error("parse_enum_def: enum %s redefined", name); + } + expect(p, TokenKind_brace_l); + AstNode* members = parse_enum_members(p); + expect(p, TokenKind_brace_r); + expect(p, TokenKind_semicolon); + p->enums[enum_idx].node_members = members; + return p->enums + enum_idx; +} + +AstNode* parse_typedef_decl(Parser* p) { + expect(p, TokenKind_keyword_typedef); + Type* ty = parse_type(p); + const char* name = parse_ident(p); + expect(p, TokenKind_semicolon); + AstNode* decl = ast_new(AstNodeKind_typedef_decl); + decl->name = name; + decl->ty = ty; + p->typedefs[p->n_typedefs].name = name; + p->typedefs[p->n_typedefs].ty = ty; + ++p->n_typedefs; + return decl; +} + +AstNode* parse_extern_var_decl(Parser* p) { + expect(p, TokenKind_keyword_extern); + Type* ty = parse_type(p); + if (type_is_unsized(ty)) { + fatal_error("parse_extern_var_decl: invalid type for variable"); + } + const char* name = parse_ident(p); + expect(p, TokenKind_semicolon); + + if (find_lvar(p, name) != -1 || find_gvar(p, name) != -1) { + fatal_error("parse_extern_var_decl: %s redeclared", name); + } + p->gvars[p->n_gvars].name = name; + p->gvars[p->n_gvars].ty = ty; + ++p->n_gvars; + + return ast_new(AstNodeKind_gvar_decl); +} + +AstNode* parse_toplevel(Parser* p) { + TokenKind tk = peek_token(p)->kind; + if (tk == TokenKind_keyword_struct) { + return parse_struct_decl_or_def(p); + } else if (tk == TokenKind_keyword_union) { + return parse_union_decl_or_def(p); + } else if (tk == TokenKind_keyword_enum) { + return parse_enum_def(p); + } else if (tk == TokenKind_keyword_typedef) { + return parse_typedef_decl(p); + } else if (tk == TokenKind_keyword_extern) { + return parse_extern_var_decl(p); + } else { + return parse_func_decl_or_def(p); + } +} + +Program* parse(TokenArray* tokens) { + Parser* p = parser_new(tokens); + AstNode* funcs = ast_new_list(32); + AstNode* vars = ast_new_list(16); + while (eof(p)) { + AstNode* n = parse_toplevel(p); + if (n->kind == AstNodeKind_func_def) { + ast_append(funcs, n); + } else if (n->kind == AstNodeKind_gvar_decl && n->ty) { + ast_append(vars, n); + } else if (n->kind == AstNodeKind_list) { + for (int i = 0; i < n->node_len; ++i) { + ast_append(vars, &n->node_items[i]); + } + } + } + Program* prog = calloc(1, sizeof(Program)); + prog->funcs = funcs; + prog->vars = vars; + prog->str_literals = p->str_literals; + return prog; +} + +int eval(AstNode* e) { + if (e->kind == AstNodeKind_int_expr) { + return e->node_int_value; + } else if (e->kind == AstNodeKind_unary_expr) { + int v = eval(e->node_operand); + if (e->node_op == TokenKind_not) { + return !v; + } else { + unimplemented(); + } + } else if (e->kind == AstNodeKind_binary_expr || e->kind == AstNodeKind_logical_expr) { + int v1 = eval(e->node_lhs); + int v2 = eval(e->node_rhs); + if (e->node_op == TokenKind_andand) { + return v1 && v2; + } else if (e->node_op == TokenKind_oror) { + return v1 || v2; + } else { + unimplemented(); + } + } else { + unimplemented(); + } +} + +BOOL pp_eval_constant_expression(TokenArray* pp_tokens) { + TokenArray* tokens = tokenize(pp_tokens); + Parser* p = parser_new(tokens); + AstNode* e = parse_constant_expression(p); + return eval(e) != 0; +} diff --git a/src/preprocess.c b/src/preprocess.c new file mode 100644 index 0000000..b1810cd --- /dev/null +++ b/src/preprocess.c @@ -0,0 +1,1557 @@ +enum TokenKind { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_newline, + TokenKind_other, + TokenKind_character_constant, + TokenKind_header_name, + TokenKind_pp_directive_define, + TokenKind_pp_directive_elif, + TokenKind_pp_directive_elifdef, + TokenKind_pp_directive_elifndef, + TokenKind_pp_directive_else, + TokenKind_pp_directive_embed, + TokenKind_pp_directive_endif, + TokenKind_pp_directive_error, + TokenKind_pp_directive_if, + TokenKind_pp_directive_ifdef, + TokenKind_pp_directive_ifndef, + TokenKind_pp_directive_include, + TokenKind_pp_directive_line, + TokenKind_pp_directive_pragma, + TokenKind_pp_directive_undef, + TokenKind_pp_directive_warning, + TokenKind_pp_operator_defined, + TokenKind_pp_operator___has_c_attribute, + TokenKind_pp_operator___has_embed, + TokenKind_pp_operator___has_include, + + // C23: 6.4.1 + TokenKind_keyword_alignas, + TokenKind_keyword_alignof, + TokenKind_keyword_auto, + TokenKind_keyword_bool, + TokenKind_keyword_break, + TokenKind_keyword_case, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_constexpr, + TokenKind_keyword_continue, + TokenKind_keyword_default, + TokenKind_keyword_do, + TokenKind_keyword_double, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_false, + TokenKind_keyword_float, + TokenKind_keyword_for, + TokenKind_keyword_goto, + TokenKind_keyword_if, + TokenKind_keyword_inline, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_nullptr, + TokenKind_keyword_register, + TokenKind_keyword_restrict, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_signed, + TokenKind_keyword_sizeof, + TokenKind_keyword_static, + TokenKind_keyword_static_assert, + TokenKind_keyword_struct, + TokenKind_keyword_switch, + TokenKind_keyword_thread_local, + TokenKind_keyword_true, + TokenKind_keyword_typedef, + TokenKind_keyword_typeof, + TokenKind_keyword_typeof_unqual, + TokenKind_keyword_union, + TokenKind_keyword_unsigned, + TokenKind_keyword_void, + TokenKind_keyword_volatile, + TokenKind_keyword_while, + TokenKind_keyword__Atomic, + TokenKind_keyword__BitInt, + TokenKind_keyword__Complex, + TokenKind_keyword__Decimal128, + TokenKind_keyword__Decimal32, + TokenKind_keyword__Decimal64, + TokenKind_keyword__Generic, + TokenKind_keyword__Imaginary, + TokenKind_keyword__Noreturn, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_and, + TokenKind_assign_div, + TokenKind_assign_lshift, + TokenKind_assign_mod, + TokenKind_assign_mul, + TokenKind_assign_or, + TokenKind_assign_rshift, + TokenKind_assign_sub, + TokenKind_assign_xor, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_colon, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_le, + TokenKind_literal_int, + TokenKind_literal_str, + TokenKind_lshift, + TokenKind_lt, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_question, + TokenKind_rshift, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + TokenKind_tilde, + TokenKind_xor, +}; +typedef enum TokenKind TokenKind; + +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) + return "<eof>"; + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) + return "<whitespace>"; + else if (k == TokenKind_newline) + return "<new-line>"; + else if (k == TokenKind_other) + return "<other>"; + else if (k == TokenKind_character_constant) + return "<character-constant>"; + else if (k == TokenKind_header_name) + return "<header-name>"; + else if (k == TokenKind_pp_directive_define) + return "#define"; + else if (k == TokenKind_pp_directive_elif) + return "#elif"; + else if (k == TokenKind_pp_directive_elifdef) + return "#elifdef"; + else if (k == TokenKind_pp_directive_elifndef) + return "#elifndef"; + else if (k == TokenKind_pp_directive_else) + return "#else"; + else if (k == TokenKind_pp_directive_embed) + return "#embed"; + else if (k == TokenKind_pp_directive_endif) + return "#endif"; + else if (k == TokenKind_pp_directive_error) + return "#error"; + else if (k == TokenKind_pp_directive_if) + return "#if"; + else if (k == TokenKind_pp_directive_ifdef) + return "#ifdef"; + else if (k == TokenKind_pp_directive_ifndef) + return "#ifndef"; + else if (k == TokenKind_pp_directive_include) + return "#include"; + else if (k == TokenKind_pp_directive_line) + return "#line"; + else if (k == TokenKind_pp_directive_pragma) + return "#pragma"; + else if (k == TokenKind_pp_directive_undef) + return "#undef"; + else if (k == TokenKind_pp_directive_warning) + return "#warning"; + else if (k == TokenKind_pp_operator_defined) + return "defined"; + else if (k == TokenKind_pp_operator___has_c_attribute) + return "__has_c_attribute"; + else if (k == TokenKind_pp_operator___has_embed) + return "__has_embed"; + else if (k == TokenKind_pp_operator___has_include) + return "__has_include"; + else if (k == TokenKind_keyword_alignas) + return "alignas"; + else if (k == TokenKind_keyword_alignof) + return "alignof"; + else if (k == TokenKind_keyword_auto) + return "auto"; + else if (k == TokenKind_keyword_bool) + return "bool"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_case) + return "case"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_constexpr) + return "constexpr"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_default) + return "default"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_double) + return "double"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_false) + return "false"; + else if (k == TokenKind_keyword_float) + return "float"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_goto) + return "goto"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_inline) + return "inline"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_nullptr) + return "nullptr"; + else if (k == TokenKind_keyword_register) + return "register"; + else if (k == TokenKind_keyword_restrict) + return "restrict"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_signed) + return "signed"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_static) + return "static"; + else if (k == TokenKind_keyword_static_assert) + return "static_assert"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_switch) + return "switch"; + else if (k == TokenKind_keyword_thread_local) + return "thread_local"; + else if (k == TokenKind_keyword_true) + return "true"; + else if (k == TokenKind_keyword_typedef) + return "typedef"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_typeof_unqual) + return "typeof_unqual"; + else if (k == TokenKind_keyword_union) + return "union"; + else if (k == TokenKind_keyword_unsigned) + return "unsigned"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_volatile) + return "volatile"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_keyword__Atomic) + return "_Atomic"; + else if (k == TokenKind_keyword__BitInt) + return "_BitInt"; + else if (k == TokenKind_keyword__Complex) + return "_Complex"; + else if (k == TokenKind_keyword__Decimal128) + return "_Decimal128"; + else if (k == TokenKind_keyword__Decimal32) + return "_Decimal32"; + else if (k == TokenKind_keyword__Decimal64) + return "_Decimal64"; + else if (k == TokenKind_keyword__Generic) + return "_Generic"; + else if (k == TokenKind_keyword__Imaginary) + return "_Imaginary"; + else if (k == TokenKind_keyword__Noreturn) + return "_Noreturn"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_and) + return "&="; + else if (k == TokenKind_assign_div) + return "/="; + else if (k == TokenKind_assign_lshift) + return "<<="; + else if (k == TokenKind_assign_mod) + return "%="; + else if (k == TokenKind_assign_mul) + return "*="; + else if (k == TokenKind_assign_or) + return "|="; + else if (k == TokenKind_assign_rshift) + return ">>="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_assign_xor) + return "^="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_colon) + return ":"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return "<identifier>"; + else if (k == TokenKind_le) + return "le"; + else if (k == TokenKind_literal_int) + return "<integer>"; + else if (k == TokenKind_literal_str) + return "<string>"; + else if (k == TokenKind_lshift) + return "<<"; + else if (k == TokenKind_lt) + return "lt"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_question) + return "?"; + else if (k == TokenKind_rshift) + return ">>"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_tilde) + return "~"; + else if (k == TokenKind_xor) + return "^"; + else + unreachable(); +} + +// TokenValue is externally tagged by Token's kind. +union TokenValue { + const char* string; + int integer; +}; +typedef union TokenValue TokenValue; + +struct Token { + TokenKind kind; + TokenValue value; + SourceLocation loc; +}; +typedef struct Token Token; + +const char* token_stringify(Token* t) { + TokenKind k = t->kind; + if (k == TokenKind_literal_int) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%d (%s)", t->value.integer, kind_str); + return buf; + } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || + k == TokenKind_literal_int || k == TokenKind_literal_str) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%s (%s)", t->value.string, kind_str); + return buf; + } else { + return token_kind_stringify(k); + } +} + +struct TokenArray { + size_t len; + size_t capacity; + Token* data; +}; +typedef struct TokenArray TokenArray; + +void tokens_init(TokenArray* tokens, size_t capacity) { + tokens->len = 0; + tokens->capacity = capacity; + tokens->data = calloc(tokens->capacity, sizeof(Token)); +} + +void tokens_reserve(TokenArray* tokens, size_t size) { + if (size <= tokens->capacity) + return; + tokens->capacity *= 2; + tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); + memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); +} + +Token* tokens_push_new(TokenArray* tokens) { + tokens_reserve(tokens, tokens->len + 1); + return &tokens->data[tokens->len++]; +} + +Token* tokens_pop(TokenArray* tokens) { + if (tokens->len != 0) + tokens->len--; +} + +enum MacroKind { + MacroKind_undef, + MacroKind_obj, + MacroKind_func, + MacroKind_builtin_file, + MacroKind_builtin_line, +}; +typedef enum MacroKind MacroKind; + +const char* macro_kind_stringify(MacroKind kind) { + if (kind == MacroKind_undef) + return "undef"; + else if (kind == MacroKind_obj) + return "object-like"; + else if (kind == MacroKind_func) + return "function-like"; + else if (kind == MacroKind_builtin_file) + return "__FILE__"; + else if (kind == MacroKind_builtin_line) + return "__LINE__"; + else + unreachable(); +} + +struct Macro { + MacroKind kind; + const char* name; + TokenArray parameters; + TokenArray replacements; +}; +typedef struct Macro Macro; + +int macro_find_param(Macro* macro, Token* tok) { + if (tok->kind != TokenKind_ident) + return -1; + + for (int i = 0; i < macro->parameters.len; ++i) { + if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { + return i; + } + } + return -1; +} + +struct MacroArray { + size_t len; + size_t capacity; + Macro* data; +}; +typedef struct MacroArray MacroArray; + +MacroArray* macros_new() { + MacroArray* macros = calloc(1, sizeof(MacroArray)); + macros->len = 0; + macros->capacity = 8; + macros->data = calloc(macros->capacity, sizeof(Macro)); + return macros; +} + +void macros_reserve(MacroArray* macros, size_t size) { + if (size <= macros->capacity) + return; + macros->capacity *= 2; + macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); + memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); +} + +Macro* macros_push_new(MacroArray* macros) { + macros_reserve(macros, macros->len + 1); + return ¯os->data[macros->len++]; +} + +void macros_dump(MacroArray* macros) { + fprintf(stderr, "MacroArray {\n"); + fprintf(stderr, " len = %zu\n", macros->len); + fprintf(stderr, " data = [\n"); + for (int i = 0; i < macros->len; ++i) { + Macro* m = ¯os->data[i]; + fprintf(stderr, " Macro {\n"); + fprintf(stderr, " kind = %s\n", macro_kind_stringify(m->kind)); + fprintf(stderr, " name = %s\n", m->name); + fprintf(stderr, " replacements = TODO\n"); + fprintf(stderr, " }\n"); + } + fprintf(stderr, " ]\n"); + fprintf(stderr, "}\n"); +} + +void add_predefined_macros(MacroArray* macros) { + Macro* m; + + m = macros_push_new(macros); + m->kind = MacroKind_obj; + m->name = "__ducc__"; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = 1; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_file; + m->name = "__FILE__"; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_line; + m->name = "__LINE__"; +} + +struct MacroArg { + TokenArray tokens; +}; +typedef struct MacroArg MacroArg; + +struct MacroArgArray { + size_t len; + size_t capacity; + MacroArg* data; +}; +typedef struct MacroArgArray MacroArgArray; + +MacroArgArray* macroargs_new() { + MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); + macroargs->len = 0; + macroargs->capacity = 2; + macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); + return macroargs; +} + +void macroargs_reserve(MacroArgArray* macroargs, size_t size) { + if (size <= macroargs->capacity) + return; + macroargs->capacity *= 2; + macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); + memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); +} + +MacroArg* macroargs_push_new(MacroArgArray* macroargs) { + macroargs_reserve(macroargs, macroargs->len + 1); + return ¯oargs->data[macroargs->len++]; +} + +struct PpLexer { + InFile* src; + BOOL at_bol; + BOOL expect_header_name; + TokenArray* pp_tokens; +}; +typedef struct PpLexer PpLexer; + +PpLexer* pplexer_new(InFile* src) { + PpLexer* ppl = calloc(1, sizeof(PpLexer)); + + ppl->src = src; + ppl->at_bol = TRUE; + ppl->expect_header_name = FALSE; + ppl->pp_tokens = calloc(1, sizeof(TokenArray)); + tokens_init(ppl->pp_tokens, 1024 * 16); + + return ppl; +} + +TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) { + // Skip whitespaces after '#'. + char c; + while (isspace((c = infile_peek_char(ppl->src)))) { + if (c == '\n') + break; + infile_next_char(ppl->src); + } + + SourceLocation pp_directive_name_start_loc = ppl->src->loc; + + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(ppl->src))) { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); + } + const char* pp_directive_name = builder.buf; + + if (builder.len == 0) { + return TokenKind_hash; + } else if (strcmp(pp_directive_name, "define") == 0) { + return TokenKind_pp_directive_define; + } else if (strcmp(pp_directive_name, "elif") == 0) { + return TokenKind_pp_directive_elif; + } else if (strcmp(pp_directive_name, "elifdef") == 0) { + return TokenKind_pp_directive_elifdef; + } else if (strcmp(pp_directive_name, "elifndef") == 0) { + return TokenKind_pp_directive_elifndef; + } else if (strcmp(pp_directive_name, "else") == 0) { + return TokenKind_pp_directive_else; + } else if (strcmp(pp_directive_name, "embed") == 0) { + return TokenKind_pp_directive_embed; + } else if (strcmp(pp_directive_name, "endif") == 0) { + return TokenKind_pp_directive_endif; + } else if (strcmp(pp_directive_name, "error") == 0) { + return TokenKind_pp_directive_error; + } else if (strcmp(pp_directive_name, "if") == 0) { + return TokenKind_pp_directive_if; + } else if (strcmp(pp_directive_name, "ifdef") == 0) { + return TokenKind_pp_directive_ifdef; + } else if (strcmp(pp_directive_name, "ifndef") == 0) { + return TokenKind_pp_directive_ifndef; + } else if (strcmp(pp_directive_name, "include") == 0) { + ppl->expect_header_name = TRUE; + return TokenKind_pp_directive_include; + } else if (strcmp(pp_directive_name, "line") == 0) { + return TokenKind_pp_directive_line; + } else if (strcmp(pp_directive_name, "pragma") == 0) { + return TokenKind_pp_directive_pragma; + } else if (strcmp(pp_directive_name, "undef") == 0) { + return TokenKind_pp_directive_undef; + } else if (strcmp(pp_directive_name, "warning") == 0) { + return TokenKind_pp_directive_warning; + } else { + fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename, + pp_directive_name_start_loc.line, pp_directive_name); + } +} + +void pplexer_tokenize_all(PpLexer* ppl) { + while (!infile_eof(ppl->src)) { + Token* tok = tokens_push_new(ppl->pp_tokens); + tok->loc = ppl->src->loc; + char c = infile_peek_char(ppl->src); + + if (ppl->expect_header_name && c == '"') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '"'); + while (1) { + char ch = infile_peek_char(ppl->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + } + infile_next_char(ppl->src); + } + strbuilder_append_char(&builder, '"'); + infile_next_char(ppl->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + ppl->expect_header_name = FALSE; + } else if (ppl->expect_header_name && c == '<') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '<'); + while (1) { + char ch = infile_peek_char(ppl->src); + if (ch == '>') + break; + strbuilder_append_char(&builder, ch); + infile_next_char(ppl->src); + } + strbuilder_append_char(&builder, '>'); + infile_next_char(ppl->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + ppl->expect_header_name = FALSE; + } else if (c == '(') { + infile_next_char(ppl->src); + tok->kind = TokenKind_paren_l; + } else if (c == ')') { + infile_next_char(ppl->src); + tok->kind = TokenKind_paren_r; + } else if (c == '{') { + infile_next_char(ppl->src); + tok->kind = TokenKind_brace_l; + } else if (c == '}') { + infile_next_char(ppl->src); + tok->kind = TokenKind_brace_r; + } else if (c == '[') { + infile_next_char(ppl->src); + tok->kind = TokenKind_bracket_l; + } else if (c == ']') { + infile_next_char(ppl->src); + tok->kind = TokenKind_bracket_r; + } else if (c == ',') { + infile_next_char(ppl->src); + tok->kind = TokenKind_comma; + } else if (c == ':') { + infile_next_char(ppl->src); + tok->kind = TokenKind_colon; + } else if (c == ';') { + infile_next_char(ppl->src); + tok->kind = TokenKind_semicolon; + } else if (c == '^') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_xor; + } else { + tok->kind = TokenKind_xor; + } + } else if (c == '?') { + infile_next_char(ppl->src); + tok->kind = TokenKind_question; + } else if (c == '~') { + infile_next_char(ppl->src); + tok->kind = TokenKind_tilde; + } else if (c == '+') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_add; + } else if (infile_consume_if(ppl->src, '+')) { + tok->kind = TokenKind_plusplus; + } else { + tok->kind = TokenKind_plus; + } + } else if (c == '|') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_or; + } else if (infile_consume_if(ppl->src, '|')) { + tok->kind = TokenKind_oror; + } else { + tok->kind = TokenKind_or; + } + } else if (c == '&') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_and; + } else if (infile_consume_if(ppl->src, '&')) { + tok->kind = TokenKind_andand; + } else { + tok->kind = TokenKind_and; + } + } else if (c == '-') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '>')) { + tok->kind = TokenKind_arrow; + } else if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_sub; + } else if (infile_consume_if(ppl->src, '-')) { + tok->kind = TokenKind_minusminus; + } else { + tok->kind = TokenKind_minus; + } + } else if (c == '*') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_mul; + } else { + tok->kind = TokenKind_star; + } + } else if (c == '/') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_div; + } else if (infile_consume_if(ppl->src, '/')) { + while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') { + infile_next_char(ppl->src); + } + tok->kind = TokenKind_whitespace; + } else if (infile_consume_if(ppl->src, '*')) { + while (infile_peek_char(ppl->src)) { + if (infile_consume_if(ppl->src, '*')) { + if (infile_consume_if(ppl->src, '/')) { + break; + } + continue; + } + infile_next_char(ppl->src); + } + tok->kind = TokenKind_whitespace; + } else { + tok->kind = TokenKind_slash; + } + } else if (c == '%') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_mod; + } else { + tok->kind = TokenKind_percent; + } + } else if (c == '.') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '.')) { + if (infile_consume_if(ppl->src, '.')) { + tok->kind = TokenKind_ellipsis; + } else { + tok->kind = TokenKind_other; + tok->value.string = ".."; + } + } else { + tok->kind = TokenKind_dot; + } + } else if (c == '!') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_ne; + } else { + tok->kind = TokenKind_not; + } + } else if (c == '=') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_eq; + } else { + tok->kind = TokenKind_assign; + } + } else if (c == '<') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_le; + } else if (infile_consume_if(ppl->src, '<')) { + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_lshift; + } else { + tok->kind = TokenKind_lshift; + } + } else { + tok->kind = TokenKind_lt; + } + } else if (c == '>') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_ge; + } else if (infile_consume_if(ppl->src, '>')) { + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_rshift; + } else { + tok->kind = TokenKind_rshift; + } + } else { + tok->kind = TokenKind_gt; + } + } else if (c == '#') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '#')) { + tok->kind = TokenKind_hashhash; + } else { + tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash; + } + } else if (c == '\'') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '\''); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + if (infile_peek_char(ppl->src) == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + } + strbuilder_append_char(&builder, '\''); + infile_next_char(ppl->src); + infile_next_char(ppl->src); + tok->kind = TokenKind_character_constant; + tok->value.string = builder.buf; + } else if (c == '"') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + while (1) { + char ch = infile_peek_char(ppl->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + } + infile_next_char(ppl->src); + } + infile_next_char(ppl->src); + tok->kind = TokenKind_literal_str; + tok->value.string = builder.buf; + } else if (isdigit(c)) { + StrBuilder builder; + strbuilder_init(&builder); + while (isdigit(infile_peek_char(ppl->src))) { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); + } + tok->kind = TokenKind_literal_int; + tok->value.integer = atoi(builder.buf); + } else if (isalpha(c) || c == '_') { + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); + } + tok->kind = TokenKind_ident; + tok->value.string = builder.buf; + } else if (c == '\n') { + infile_next_char(ppl->src); + tok->kind = TokenKind_newline; + } else if (isspace(c)) { + while (isspace((c = infile_peek_char(ppl->src)))) { + if (c == '\n') + break; + infile_next_char(ppl->src); + } + if (ppl->at_bol && infile_peek_char(ppl->src) == '#') { + infile_next_char(ppl->src); + tok->kind = pplexer_tokenize_pp_directive(ppl); + } else { + tok->kind = TokenKind_whitespace; + } + } else { + infile_next_char(ppl->src); + tok->kind = TokenKind_other; + char* buf = calloc(2, sizeof(char)); + buf[0] = c; + tok->value.string = buf; + } + ppl->at_bol = tok->kind == TokenKind_newline; + } + Token* eof_tok = tokens_push_new(ppl->pp_tokens); + eof_tok->loc = ppl->src->loc; + eof_tok->kind = TokenKind_eof; +} + +TokenArray* pp_tokenize(InFile* src) { + PpLexer* ppl = pplexer_new(src); + pplexer_tokenize_all(ppl); + return ppl->pp_tokens; +} + +struct Preprocessor { + TokenArray* pp_tokens; + int pos; + MacroArray* macros; + int include_depth; + BOOL skip_pp_tokens; + char** include_paths; + int n_include_paths; +}; +typedef struct Preprocessor Preprocessor; + +TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros); + +Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros) { + if (include_depth >= 32) { + fatal_error("include depth limit exceeded"); + } + + Preprocessor* pp = calloc(1, sizeof(Preprocessor)); + pp->pp_tokens = pp_tokens; + pp->macros = macros; + pp->include_depth = include_depth; + pp->include_paths = calloc(16, sizeof(char*)); + + return pp; +} + +Token* pp_token_at(Preprocessor* pp, int i) { + return &pp->pp_tokens->data[i]; +} + +Token* peek_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos); +} + +Token* next_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos++); +} + +BOOL pp_eof(Preprocessor* pp) { + return peek_pp_token(pp)->kind == TokenKind_eof; +} + +int find_macro(Preprocessor* pp, const char* name) { + for (int i = 0; i < pp->macros->len; ++i) { + if (pp->macros->data[i].kind == MacroKind_undef) + continue; + if (strcmp(pp->macros->data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +void undef_macro(Preprocessor* pp, int idx) { + pp->macros->data[idx].kind = MacroKind_undef; + // TODO: Can predefined macro like __FILE__ be undefined? +} + +void add_include_path(Preprocessor* pp, char* include_path) { + pp->include_paths[pp->n_include_paths] = include_path; + ++pp->n_include_paths; +} + +BOOL skip_pp_tokens(Preprocessor* pp) { + // TODO: support nested #if + return pp->skip_pp_tokens; +} + +void skip_whitespaces(Preprocessor* pp) { + while (!pp_eof(pp) && peek_pp_token(pp)->kind == TokenKind_whitespace) { + next_pp_token(pp); + } +} + +void seek_to_next_newline(Preprocessor* pp) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } + next_pp_token(pp); + } +} + +void make_token_whitespace(Token* tok) { + tok->kind = TokenKind_whitespace; + tok->value.string = NULL; +} + +void remove_directive_tokens(Preprocessor* pp, int start, int end) { + for (int i = start; i < end; ++i) { + make_token_whitespace(pp_token_at(pp, i)); + } +} + +void process_endif_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + pp->skip_pp_tokens = FALSE; + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_else_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + pp->skip_pp_tokens = !pp->skip_pp_tokens; + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_elif_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +BOOL pp_eval_constant_expression(TokenArray*); +int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); +BOOL expand_macro(Preprocessor*); + +void process_if_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + int condition_expression_start_pos = pp->pos; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } else if (tok->kind == TokenKind_ident) { + if (strcmp(tok->value.string, "defined") == 0) { + int defined_pos = pp->pos; + // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')' + // 'defined' <ws>* <ident> + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name; + if (peek_pp_token(pp)->kind == TokenKind_paren_l) { + next_pp_token(pp); + skip_whitespaces(pp); + macro_name = next_pp_token(pp); + if (macro_name->kind != TokenKind_ident) { + fatal_error("invalid defined"); + } + skip_whitespaces(pp); + if (next_pp_token(pp)->kind != TokenKind_paren_r) { + fatal_error("invalid defined"); + } + } else { + macro_name = next_pp_token(pp); + if (macro_name->kind != TokenKind_ident) { + fatal_error("invalid defined"); + } + } + BOOL is_defined = find_macro(pp, macro_name->value.string) != -1; + TokenArray defined_results; + tokens_init(&defined_results, 1); + Token* defined_result = tokens_push_new(&defined_results); + defined_result->kind = TokenKind_literal_int; + defined_result->value.integer = is_defined; + pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); + } else { + BOOL expanded = expand_macro(pp); + if (expanded) { + // A macro may expand to another macro. Re-scan the expanded tokens. + // TODO: if the macro is defined recursively, it causes infinite loop. + } else { + next_pp_token(pp); + } + } + } else { + next_pp_token(pp); + } + } + + // all remaining identifiers other than true (including those lexically identical to keywords such as false) are + // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is converted + // into a token. + for (int pos = condition_expression_start_pos; pos < pp->pos; ++pos) { + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_ident) { + BOOL is_true = strcmp(tok->value.string, "true") == 0; + tok->kind = TokenKind_literal_int; + tok->value.integer = is_true; + } + } + + int condition_expression_tokens_len = pp->pos - condition_expression_start_pos; + TokenArray condition_expression_tokens; + // +1 to add EOF token at the end. + tokens_init(&condition_expression_tokens, condition_expression_tokens_len + 1); + for (int i = 0; i < condition_expression_tokens_len; ++i) { + *tokens_push_new(&condition_expression_tokens) = *pp_token_at(pp, condition_expression_start_pos + i); + } + Token* eof_tok = tokens_push_new(&condition_expression_tokens); + eof_tok->kind = TokenKind_eof; + + BOOL result = pp_eval_constant_expression(&condition_expression_tokens); + + pp->skip_pp_tokens = !result; + + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_ifdef_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = peek_pp_token(pp); + if (macro_name->kind == TokenKind_ident) { + next_pp_token(pp); + pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) == -1; + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_ifndef_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = peek_pp_token(pp); + if (macro_name->kind == TokenKind_ident) { + next_pp_token(pp); + pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) != -1; + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +const char* read_include_header_name(Preprocessor* pp) { + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_header_name) { + fatal_error("%s:%d: invalid #include", tok->loc.filename, tok->loc.line); + } + + return tok->value.string; +} + +const char* resolve_include_name(Preprocessor* pp, const char* include_name) { + if (include_name[0] == '"') { + return strndup(include_name + 1, strlen(include_name) - 2); + } else { + for (int i = 0; i < pp->n_include_paths; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths[i], strlen(include_name) - 2, include_name + 1); + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; + } +} + +int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { + int n_tokens_to_remove = dest_end - dest_start; + int n_tokens_after_dest = pp->pp_tokens->len - dest_end; + int shift_amount; + + if (n_tokens_to_remove < source_tokens->len) { + // Move existing tokens backward to make room. + shift_amount = source_tokens->len - n_tokens_to_remove; + tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); + memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len += shift_amount; + } else if (source_tokens->len < n_tokens_to_remove) { + // Move existing tokens forward to reduce room. + shift_amount = n_tokens_to_remove - source_tokens->len; + memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len -= shift_amount; + memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); + } + + memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); + + return dest_start + source_tokens->len; +} + +int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { + TokenArray tokens; + tokens_init(&tokens, 1); + *tokens_push_new(&tokens) = *source_tok; + replace_pp_tokens(pp, dest, dest + 1, &tokens); +} + +void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) { + InFile* include_source = infile_open(include_name); + if (!include_source) { + fatal_error("cannot open include file: %s", include_name); + } + + TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros); + tokens_pop(include_pp_tokens); // pop EOF token + pp->pos = replace_pp_tokens(pp, directive_token_pos, pp->pos, include_pp_tokens); +} + +void process_include_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + const char* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("cannot resolve include file name: %s", include_name); + } + expand_include_directive(pp, directive_token_pos, include_name_resolved); +} + +// ws ::= many0(<whitespace>) +// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')' +TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { + TokenArray* parameters = calloc(1, sizeof(TokenArray)); + tokens_init(parameters, 2); + + // '(' is consumed by caller. + skip_whitespaces(pp); + Token* tok = next_pp_token(pp); + if (tok->kind == TokenKind_ident) { + *tokens_push_new(parameters) = *tok; + skip_whitespaces(pp); + while (peek_pp_token(pp)->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + *tokens_push_new(parameters) = *tok; + } + tok = next_pp_token(pp); + } + if (tok->kind != TokenKind_paren_r) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + + return parameters; +} + +void process_define_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = next_pp_token(pp); + + if (macro_name->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); + } + + if (peek_pp_token(pp)->kind == TokenKind_paren_l) { + next_pp_token(pp); + TokenArray* parameters = pp_parse_macro_parameters(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + if (pp_eof(pp)) { + fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); + } + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_func; + macro->name = macro_name->value.string; + macro->parameters = *parameters; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + } else { + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + if (pp_eof(pp)) { + fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); + } + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_obj; + macro->name = macro_name->value.string; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_undef_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = peek_pp_token(pp); + if (macro_name->kind == TokenKind_ident) { + next_pp_token(pp); + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx != -1) { + undef_macro(pp, macro_idx); + } + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_line_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +void process_error_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +void process_pragma_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +// ws ::= many0(<Whitespace>) +// macro-arguments ::= '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')' +MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp) { + MacroArgArray* args = macroargs_new(); + + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_paren_l) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_paren_r) { + MacroArg* arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 1); + *tokens_push_new(&arg->tokens) = *tok; + skip_whitespaces(pp); + while (peek_pp_token(pp)->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces(pp); + tok = next_pp_token(pp); + arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 1); + *tokens_push_new(&arg->tokens) = *tok; + } + tok = next_pp_token(pp); + } + if (tok->kind != TokenKind_paren_r) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + + return args; +} + +BOOL expand_macro(Preprocessor* pp) { + int macro_name_pos = pp->pos; + Token* macro_name = next_pp_token(pp); + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx == -1) { + return FALSE; + } + + SourceLocation original_loc = macro_name->loc; + Macro* macro = &pp->macros->data[macro_idx]; + if (macro->kind == MacroKind_func) { + MacroArgArray* args = pp_parse_macro_arguments(pp); + replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); + for (int i = 0; i < macro->replacements.len; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i); + int macro_param_idx = macro_find_param(macro, tok); + if (macro_param_idx != -1) { + replace_pp_tokens(pp, macro_name_pos + i, macro_name_pos + i + 1, &args->data[macro_param_idx].tokens); + } + } + // Inherit a source location from the original macro token. + for (int i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + } else if (macro->kind == MacroKind_obj) { + replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); + // Inherit a source location from the original macro token. + for (int i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + } else if (macro->kind == MacroKind_builtin_file) { + Token file_tok; + file_tok.kind = TokenKind_literal_str; + file_tok.value.string = macro_name->loc.filename; + file_tok.loc.filename = NULL; + file_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &file_tok); + } else if (macro->kind == MacroKind_builtin_line) { + Token line_tok; + line_tok.kind = TokenKind_literal_int; + line_tok.value.integer = macro_name->loc.line; + line_tok.loc.filename = NULL; + line_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &line_tok); + } else { + unreachable(); + } + return TRUE; +} + +void process_pp_directive(Preprocessor* pp) { + int first_token_pos = pp->pos; + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_endif) { + process_endif_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_else) { + process_else_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_elif) { + process_elif_directive(pp, first_token_pos); + } else if (skip_pp_tokens(pp)) { + make_token_whitespace(next_pp_token(pp)); + } else if (tok->kind == TokenKind_pp_directive_if) { + process_if_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_ifdef) { + process_ifdef_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_ifndef) { + process_ifndef_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_include) { + process_include_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_define) { + process_define_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_undef) { + process_undef_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_line) { + process_line_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_error) { + process_error_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_pragma) { + process_pragma_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_ident) { + BOOL expanded = expand_macro(pp); + if (expanded) { + // A macro may expand to another macro. Re-scan the expanded tokens. + // TODO: if the macro is defined recursively, it causes infinite loop. + } else { + next_pp_token(pp); + } + } else { + next_pp_token(pp); + } +} + +void process_pp_directives(Preprocessor* pp) { + while (!pp_eof(pp)) { + process_pp_directive(pp); + } +} + +void pp_dump(Token* t, BOOL include_whitespace) { + for (; t->kind != TokenKind_eof; ++t) { + if (t->kind == TokenKind_whitespace && !include_whitespace) { + continue; + } + fprintf(stderr, "%s\n", token_stringify(t)); + } +} + +char* get_ducc_include_path() { + const char* self_dir = get_self_dir(); + char* buf = calloc(strlen(self_dir) + strlen("/include") + 1, sizeof(char)); + sprintf(buf, "%s/include", self_dir); + return buf; +} + +TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros) { + TokenArray* pp_tokens = pp_tokenize(src); + Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros); + add_include_path(pp, get_ducc_include_path()); + add_include_path(pp, "/usr/include/x86_64-linux-gnu"); + add_include_path(pp, "/usr/include"); + process_pp_directives(pp); + return pp->pp_tokens; +} + +TokenArray* preprocess(InFile* src) { + MacroArray* macros = macros_new(); + add_predefined_macros(macros); + return do_preprocess(src, 0, macros); +} diff --git a/src/std.h b/src/std.h new file mode 100644 index 0000000..c1dcd66 --- /dev/null +++ b/src/std.h @@ -0,0 +1,50 @@ +#include <stddef.h> + +struct FILE; +typedef struct FILE FILE; + +extern FILE* stdin; +extern FILE* stdout; +extern FILE* stderr; + +int atoi(const char*); +void* calloc(size_t, size_t); +void exit(int); +int fclose(FILE*); +int fprintf(FILE*, const char*, ...); +char* fgets(char*, int, FILE*); +FILE* fopen(const char*, const char*); +int getchar(void); +int isalnum(int); +int isalpha(int); +int isdigit(int); +int isspace(int); +void* memcpy(void*, const void*, size_t); +void* memmove(void*, const void*, size_t); +void* memset(void*, int, size_t); +int printf(const char*, ...); +void* realloc(void*, size_t); +int sprintf(char*, const char*, ...); +int strcmp(const char*, const char*); +size_t strlen(const char*); +int strncmp(const char*, const char*, size_t); +char* strndup(const char*, size_t); +char* strstr(const char*, const char*); + +#include <stdarg.h> + +int vfprintf(FILE*, const char*, va_list); + +#define F_OK 0 +#define R_OK 4 +int access(const char*, int); + +#define PATH_MAX 4096 + +typedef long ssize_t; +ssize_t readlink(const char*, char*, size_t); +char* dirname(char*); + +#define BOOL int +#define TRUE 1 +#define FALSE 0 diff --git a/src/sys.c b/src/sys.c new file mode 100644 index 0000000..aa7b13d --- /dev/null +++ b/src/sys.c @@ -0,0 +1,15 @@ +char* get_self_path() { + char* buf = calloc(PATH_MAX, sizeof(char)); + ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1); + if (len == -1 || len == PATH_MAX - 1) { + return NULL; + } + buf[len] = '\0'; + return buf; +} + +// It returns a path not including final / except for root directory. +char* get_self_dir() { + char* path = get_self_path(); + return dirname(path); +} diff --git a/src/tokenize.c b/src/tokenize.c new file mode 100644 index 0000000..a7e99b2 --- /dev/null +++ b/src/tokenize.c @@ -0,0 +1,175 @@ +struct Lexer { + TokenArray* src; + TokenArray* tokens; +}; +typedef struct Lexer Lexer; + +Lexer* lexer_new(TokenArray* pp_tokens) { + Lexer* l = calloc(1, sizeof(Lexer)); + l->src = pp_tokens; + l->tokens = calloc(1, sizeof(TokenArray)); + // l->tokens need not store whitespace tokens. + tokens_init(l->tokens, pp_tokens->len / 2); + return l; +} + +void tokenize_all(Lexer* l) { + for (int pos = 0; pos < l->src->len; ++pos) { + Token* pp_tok = &l->src->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + Token* tok = tokens_push_new(l->tokens); + tok->loc = pp_tok->loc; + if (k == TokenKind_character_constant) { + tok->kind = TokenKind_literal_int; + int ch = pp_tok->value.string[1]; + if (ch == '\\') { + ch = pp_tok->value.string[2]; + if (ch == 'a') { + ch = '\a'; + } else if (ch == 'b') { + ch = '\b'; + } else if (ch == 'f') { + ch = '\f'; + } else if (ch == 'n') { + ch = '\n'; + } else if (ch == 'r') { + ch = '\r'; + } else if (ch == 't') { + ch = '\t'; + } else if (ch == 'v') { + ch = '\v'; + } else if (ch == '0') { + ch = '\0'; + } + } + tok->value.integer = ch; + } else if (k == TokenKind_ident) { + if (strcmp(pp_tok->value.string, "alignas") == 0) { + tok->kind = TokenKind_keyword_alignas; + } else if (strcmp(pp_tok->value.string, "alignof") == 0) { + tok->kind = TokenKind_keyword_alignof; + } else if (strcmp(pp_tok->value.string, "auto") == 0) { + tok->kind = TokenKind_keyword_auto; + } else if (strcmp(pp_tok->value.string, "bool") == 0) { + tok->kind = TokenKind_keyword_bool; + } else if (strcmp(pp_tok->value.string, "break") == 0) { + tok->kind = TokenKind_keyword_break; + } else if (strcmp(pp_tok->value.string, "case") == 0) { + tok->kind = TokenKind_keyword_case; + } else if (strcmp(pp_tok->value.string, "char") == 0) { + tok->kind = TokenKind_keyword_char; + } else if (strcmp(pp_tok->value.string, "const") == 0) { + tok->kind = TokenKind_keyword_const; + } else if (strcmp(pp_tok->value.string, "constexpr") == 0) { + tok->kind = TokenKind_keyword_constexpr; + } else if (strcmp(pp_tok->value.string, "continue") == 0) { + tok->kind = TokenKind_keyword_continue; + } else if (strcmp(pp_tok->value.string, "default") == 0) { + tok->kind = TokenKind_keyword_default; + } else if (strcmp(pp_tok->value.string, "do") == 0) { + tok->kind = TokenKind_keyword_do; + } else if (strcmp(pp_tok->value.string, "double") == 0) { + tok->kind = TokenKind_keyword_double; + } else if (strcmp(pp_tok->value.string, "else") == 0) { + tok->kind = TokenKind_keyword_else; + } else if (strcmp(pp_tok->value.string, "enum") == 0) { + tok->kind = TokenKind_keyword_enum; + } else if (strcmp(pp_tok->value.string, "extern") == 0) { + tok->kind = TokenKind_keyword_extern; + } else if (strcmp(pp_tok->value.string, "false") == 0) { + tok->kind = TokenKind_keyword_false; + } else if (strcmp(pp_tok->value.string, "float") == 0) { + tok->kind = TokenKind_keyword_float; + } else if (strcmp(pp_tok->value.string, "for") == 0) { + tok->kind = TokenKind_keyword_for; + } else if (strcmp(pp_tok->value.string, "goto") == 0) { + tok->kind = TokenKind_keyword_goto; + } else if (strcmp(pp_tok->value.string, "if") == 0) { + tok->kind = TokenKind_keyword_if; + } else if (strcmp(pp_tok->value.string, "inline") == 0) { + tok->kind = TokenKind_keyword_inline; + } else if (strcmp(pp_tok->value.string, "int") == 0) { + tok->kind = TokenKind_keyword_int; + } else if (strcmp(pp_tok->value.string, "long") == 0) { + tok->kind = TokenKind_keyword_long; + } else if (strcmp(pp_tok->value.string, "nullptr") == 0) { + tok->kind = TokenKind_keyword_nullptr; + } else if (strcmp(pp_tok->value.string, "register") == 0) { + tok->kind = TokenKind_keyword_register; + } else if (strcmp(pp_tok->value.string, "restrict") == 0) { + tok->kind = TokenKind_keyword_restrict; + } else if (strcmp(pp_tok->value.string, "return") == 0) { + tok->kind = TokenKind_keyword_return; + } else if (strcmp(pp_tok->value.string, "short") == 0) { + tok->kind = TokenKind_keyword_short; + } else if (strcmp(pp_tok->value.string, "signed") == 0) { + tok->kind = TokenKind_keyword_signed; + } else if (strcmp(pp_tok->value.string, "sizeof") == 0) { + tok->kind = TokenKind_keyword_sizeof; + } else if (strcmp(pp_tok->value.string, "static") == 0) { + tok->kind = TokenKind_keyword_static; + } else if (strcmp(pp_tok->value.string, "static_assert") == 0) { + tok->kind = TokenKind_keyword_static_assert; + } else if (strcmp(pp_tok->value.string, "struct") == 0) { + tok->kind = TokenKind_keyword_struct; + } else if (strcmp(pp_tok->value.string, "switch") == 0) { + tok->kind = TokenKind_keyword_switch; + } else if (strcmp(pp_tok->value.string, "thread_local") == 0) { + tok->kind = TokenKind_keyword_thread_local; + } else if (strcmp(pp_tok->value.string, "true") == 0) { + tok->kind = TokenKind_keyword_true; + } else if (strcmp(pp_tok->value.string, "typedef") == 0) { + tok->kind = TokenKind_keyword_typedef; + } else if (strcmp(pp_tok->value.string, "typeof") == 0) { + tok->kind = TokenKind_keyword_typeof; + } else if (strcmp(pp_tok->value.string, "typeof_unqual") == 0) { + tok->kind = TokenKind_keyword_typeof_unqual; + } else if (strcmp(pp_tok->value.string, "union") == 0) { + tok->kind = TokenKind_keyword_union; + } else if (strcmp(pp_tok->value.string, "unsigned") == 0) { + tok->kind = TokenKind_keyword_unsigned; + } else if (strcmp(pp_tok->value.string, "void") == 0) { + tok->kind = TokenKind_keyword_void; + } else if (strcmp(pp_tok->value.string, "volatile") == 0) { + tok->kind = TokenKind_keyword_volatile; + } else if (strcmp(pp_tok->value.string, "while") == 0) { + tok->kind = TokenKind_keyword_while; + } else if (strcmp(pp_tok->value.string, "_Atomic") == 0) { + tok->kind = TokenKind_keyword__Atomic; + } else if (strcmp(pp_tok->value.string, "_BitInt") == 0) { + tok->kind = TokenKind_keyword__BitInt; + } else if (strcmp(pp_tok->value.string, "_Complex") == 0) { + tok->kind = TokenKind_keyword__Complex; + } else if (strcmp(pp_tok->value.string, "_Decimal128") == 0) { + tok->kind = TokenKind_keyword__Decimal128; + } else if (strcmp(pp_tok->value.string, "_Decimal32") == 0) { + tok->kind = TokenKind_keyword__Decimal32; + } else if (strcmp(pp_tok->value.string, "_Decimal64") == 0) { + tok->kind = TokenKind_keyword__Decimal64; + } else if (strcmp(pp_tok->value.string, "_Generic") == 0) { + tok->kind = TokenKind_keyword__Generic; + } else if (strcmp(pp_tok->value.string, "_Imaginary") == 0) { + tok->kind = TokenKind_keyword__Imaginary; + } else if (strcmp(pp_tok->value.string, "_Noreturn") == 0) { + tok->kind = TokenKind_keyword__Noreturn; + } else { + tok->kind = TokenKind_ident; + tok->value = pp_tok->value; + } + } else if (k == TokenKind_other) { + unreachable(); + } else { + tok->kind = pp_tok->kind; + tok->value = pp_tok->value; + } + } +} + +TokenArray* tokenize(TokenArray* pp_tokens) { + Lexer* l = lexer_new(pp_tokens); + tokenize_all(l); + return l->tokens; +} |
