diff options
Diffstat (limited to 'src/codegen.c')
| -rw-r--r-- | src/codegen.c | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/src/codegen.c b/src/codegen.c new file mode 100644 index 0000000..a1167d4 --- /dev/null +++ b/src/codegen.c @@ -0,0 +1,595 @@ +enum GenMode { + GenMode_lval, + GenMode_rval, +}; +typedef enum GenMode GenMode; + +struct CodeGen { + int next_label; + int* loop_labels; + AstNode* current_func; +}; +typedef struct CodeGen CodeGen; + +CodeGen* codegen_new() { + CodeGen* g = calloc(1, sizeof(CodeGen)); + g->next_label = 1; + g->loop_labels = calloc(1024, sizeof(int)); + return g; +} + +int codegen_new_label(CodeGen* g) { + int new_label = g->next_label; + ++g->next_label; + return new_label; +} + +void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); +void codegen_stmt(CodeGen* g, AstNode* ast); + +const char* param_reg(int n) { + if (n == 0) { + return "rdi"; + } else if (n == 1) { + return "rsi"; + } else if (n == 2) { + return "rdx"; + } else if (n == 3) { + return "rcx"; + } else if (n == 4) { + return "r8"; + } else if (n == 5) { + return "r9"; + } else { + unreachable(); + } +} + +void codegen_func_prologue(CodeGen* g, AstNode* ast) { + printf(" push rbp\n"); + printf(" mov rbp, rsp\n"); + for (int i = 0; i < ast->node_params->node_len; ++i) { + printf(" push %s\n", param_reg(i)); + } + printf(" sub rsp, %d\n", ast->node_stack_size); +} + +void codegen_func_epilogue(CodeGen* g, AstNode* ast) { + printf(" mov rsp, rbp\n"); + printf(" pop rbp\n"); + printf(" ret\n"); +} + +void codegen_int_expr(CodeGen* g, AstNode* ast) { + printf(" push %d\n", ast->node_int_value); +} + +void codegen_str_expr(CodeGen* g, AstNode* ast) { + printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx); + printf(" push rax\n"); +} + +void codegen_unary_expr(CodeGen* g, AstNode* ast) { + codegen_expr(g, ast->node_operand, GenMode_rval); + if (ast->node_op == TokenKind_not) { + printf(" pop rax\n"); + printf(" mov rdi, 0\n"); + printf(" cmp rax, rdi\n"); + printf(" sete al\n"); + printf(" movzb rax, al\n"); + printf(" push rax\n"); + } else { + unreachable(); + } +} + +void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + codegen_expr(g, ast->node_operand, GenMode_lval); +} + +// "reg" stores the address of the expression to be pushed. +void codegen_push_expr(const char* reg, int size) { + if (size == 1) { + printf(" movsx %s, BYTE PTR [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else if (size == 2) { + printf(" movsx %s, WORD PTR [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else if (size == 4) { + printf(" movsxd %s, DWORD PTR [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else if (size == 8) { + printf(" mov %s, [%s]\n", reg, reg); + printf(" push %s\n", reg); + } else { + // Perform bitwise copy. Use r10 register as temporary space. + // Note: rsp must be aligned to 8. + printf(" sub rsp, %d\n", to_aligned(size, 8)); + for (int i = 0; i < size; ++i) { + // Copy a sinle byte from the address that "reg" points to to the stack via r10 register. + printf(" mov r10b, [%s+%d]\n", reg, i); + printf(" mov [rsp+%d], r10b\n", i); + } + } +} + +void codegen_lval2rval(Type* ty) { + if (ty->kind == TypeKind_array) { + return; + } + + printf(" pop rax\n"); + codegen_push_expr("rax", type_sizeof(ty)); +} + +void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + codegen_expr(g, ast->node_operand, GenMode_rval); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(ast->node_operand->ty->base); + } +} + +void codegen_logical_expr(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + + if (ast->node_op == TokenKind_andand) { + codegen_expr(g, ast->node_lhs, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + codegen_expr(g, ast->node_rhs, GenMode_rval); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + printf(" push 0\n"); + printf(".Lend%d:\n", label); + } else { + codegen_expr(g, ast->node_lhs, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + printf(" push 1\n"); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + codegen_expr(g, ast->node_rhs, GenMode_rval); + printf(".Lend%d:\n", label); + } +} + +void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + codegen_expr(g, ast->node_lhs, gen_mode); + codegen_expr(g, ast->node_rhs, gen_mode); + printf(" pop rdi\n"); + printf(" pop rax\n"); + if (ast->node_op == TokenKind_plus) { + printf(" add rax, rdi\n"); + } else if (ast->node_op == TokenKind_minus) { + printf(" sub rax, rdi\n"); + } else if (ast->node_op == TokenKind_star) { + printf(" imul rax, rdi\n"); + } else if (ast->node_op == TokenKind_slash) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + } else if (ast->node_op == TokenKind_percent) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + printf(" mov rax, rdx\n"); + } else if (ast->node_op == TokenKind_or) { + printf(" or rax, rdi\n"); + } else if (ast->node_op == TokenKind_lshift) { + printf(" mov rcx, rdi\n"); + printf(" shl rax, cl\n"); + } else if (ast->node_op == TokenKind_rshift) { + // TODO: check if the operand is signed or unsigned + printf(" mov rcx, rdi\n"); + printf(" sar rax, cl\n"); + } else if (ast->node_op == TokenKind_eq) { + printf(" cmp rax, rdi\n"); + printf(" sete al\n"); + printf(" movzb rax, al\n"); + } else if (ast->node_op == TokenKind_ne) { + printf(" cmp rax, rdi\n"); + printf(" setne al\n"); + printf(" movzb rax, al\n"); + } else if (ast->node_op == TokenKind_lt) { + printf(" cmp rax, rdi\n"); + printf(" setl al\n"); + printf(" movzb rax, al\n"); + } else if (ast->node_op == TokenKind_le) { + printf(" cmp rax, rdi\n"); + printf(" setle al\n"); + printf(" movzb rax, al\n"); + } else { + unreachable(); + } + printf(" push rax\n"); +} + +void codegen_cond_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + int label = codegen_new_label(g); + + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + codegen_expr(g, ast->node_then, gen_mode); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + codegen_expr(g, ast->node_else, gen_mode); + printf(".Lend%d:\n", label); +} + +void codegen_assign_expr_helper(CodeGen* g, AstNode* ast) { + if (ast->node_op == TokenKind_assign) { + return; + } + + printf(" pop rdi\n"); + printf(" push [rsp]\n"); + codegen_lval2rval(ast->node_lhs->ty); + printf(" pop rax\n"); + + if (ast->node_op == TokenKind_assign_add) { + printf(" add rax, rdi\n"); + } else if (ast->node_op == TokenKind_assign_sub) { + printf(" sub rax, rdi\n"); + } else if (ast->node_op == TokenKind_assign_mul) { + printf(" imul rax, rdi\n"); + } else if (ast->node_op == TokenKind_assign_div) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + } else if (ast->node_op == TokenKind_assign_mod) { + printf(" cqo\n"); + printf(" idiv rdi\n"); + printf(" mov rax, rdx\n"); + } else { + unreachable(); + } + + printf(" push rax\n"); +} + +void codegen_assign_expr(CodeGen* g, AstNode* ast) { + int sizeof_lhs = type_sizeof(ast->node_lhs->ty); + int sizeof_rhs = type_sizeof(ast->node_rhs->ty); + + codegen_expr(g, ast->node_lhs, GenMode_lval); + codegen_expr(g, ast->node_rhs, GenMode_rval); + codegen_assign_expr_helper(g, ast); + if (sizeof_lhs == 1) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov BYTE PTR [rax], dil\n"); + printf(" push rdi\n"); + } else if (sizeof_lhs == 2) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov WORD PTR [rax], di\n"); + printf(" push rdi\n"); + } else if (sizeof_lhs == 4) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov DWORD PTR [rax], edi\n"); + printf(" push rdi\n"); + } else if (sizeof_lhs == 8) { + printf(" pop rdi\n"); + printf(" pop rax\n"); + printf(" mov [rax], rdi\n"); + printf(" push rdi\n"); + } else { + if (ast->node_op != TokenKind_assign) { + unimplemented(); + } + // Note: rsp must be aligned to 8. + int aligned_size = to_aligned(sizeof_lhs, 8); + printf(" mov rax, [rsp+%d]\n", aligned_size); + // Perform bitwise copy. Use r10 register as temporary space. + for (int i = 0; i < aligned_size; ++i) { + // Copy a sinle byte from the stack to the address that rax points to via r10 register. + printf(" mov r10b, [rsp+%d]\n", i); + printf(" mov [rax+%d], r10b\n", i); + } + // Pop the RHS value and the LHS address. + printf(" add rsp, %d\n", aligned_size + 8); + // TODO: dummy + printf(" push 0\n"); + } +} + +void codegen_func_call(CodeGen* g, AstNode* ast) { + const char* func_name = ast->name; + + if (strcmp(func_name, "__ducc_va_start") == 0) { + int stack_size = g->current_func->node_stack_size; + printf(" # __ducc_va_start BEGIN\n"); + for (int i = 0; i < 6; ++i) { + printf(" mov rax, %s\n", param_reg(i)); + printf(" mov [rbp-%d], rax\n", 8 + (stack_size - 4 - i) * 8); + } + AstNode* va_list_args = ast->node_args->node_items; + codegen_expr(g, va_list_args, GenMode_lval); + printf(" pop rdi\n"); + printf(" mov rax, rbp\n"); + printf(" sub rax, %d\n", 8 + (stack_size - 1) * 8); + printf(" mov [rdi], rax\n"); + printf(" mov DWORD PTR [rax], 8\n"); + printf(" mov DWORD PTR [rax+4], 0\n"); + printf(" mov QWORD PTR [rax+8], 0\n"); + printf(" mov rdi, rbp\n"); + printf(" sub rdi, %d\n", 8 + (stack_size - 4) * 8); + printf(" mov QWORD PTR [rax+16], rdi\n"); + printf(" # __ducc_va_start END\n"); + return; + } + + AstNode* args = ast->node_args; + for (int i = 0; i < args->node_len; ++i) { + AstNode* arg = args->node_items + i; + codegen_expr(g, arg, GenMode_rval); + } + for (int i = args->node_len - 1; i >= 0; --i) { + printf(" pop %s\n", param_reg(i)); + } + + int label = codegen_new_label(g); + + printf(" mov rax, rsp\n"); + printf(" and rax, 15\n"); + printf(" cmp rax, 0\n"); + printf(" je .Laligned%d\n", label); + + printf(" mov rax, 0\n"); + printf(" sub rsp, 8\n"); + printf(" call %s\n", func_name); + printf(" add rsp, 8\n"); + printf(" push rax\n"); + + printf(" jmp .Lend%d\n", label); + printf(".Laligned%d:\n", label); + + printf(" mov rax, 0\n"); + printf(" call %s\n", func_name); + printf(" push rax\n"); + + printf(".Lend%d:\n", label); +} + +void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) { + printf(" mov rax, rbp\n"); + printf(" sub rax, %d\n", ast->node_stack_offset); + printf(" push rax\n"); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(ast->ty); + } +} + +void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) { + printf(" lea rax, %s[rip]\n", ast->name); + printf(" push rax\n"); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(ast->ty); + } +} + +void codegen_composite_expr(CodeGen* g, AstNode* ast) { + // Standard C does not have composite expression, but ducc internally has. + for (int i = 0; i < ast->node_len; ++i) { + AstNode* expr = ast->node_items + i; + codegen_expr(g, expr, GenMode_rval); + if (i != ast->node_len - 1) { + // TODO: the expression on the stack can be more than 8 bytes. + printf(" pop rax\n"); + } + } +} + +void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + if (ast->kind == AstNodeKind_int_expr) { + codegen_int_expr(g, ast); + } else if (ast->kind == AstNodeKind_str_expr) { + codegen_str_expr(g, ast); + } else if (ast->kind == AstNodeKind_unary_expr) { + codegen_unary_expr(g, ast); + } else if (ast->kind == AstNodeKind_ref_expr) { + codegen_ref_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_deref_expr) { + codegen_deref_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_binary_expr) { + codegen_binary_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_cond_expr) { + codegen_cond_expr(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_logical_expr) { + codegen_logical_expr(g, ast); + } else if (ast->kind == AstNodeKind_assign_expr) { + codegen_assign_expr(g, ast); + } else if (ast->kind == AstNodeKind_func_call) { + codegen_func_call(g, ast); + } else if (ast->kind == AstNodeKind_lvar) { + codegen_lvar(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_gvar) { + codegen_gvar(g, ast, gen_mode); + } else if (ast->kind == AstNodeKind_list) { + codegen_composite_expr(g, ast); + } else { + unreachable(); + } +} + +void codegen_return_stmt(CodeGen* g, AstNode* ast) { + if (ast->node_expr) { + codegen_expr(g, ast->node_expr, GenMode_rval); + printf(" pop rax\n"); + } + codegen_func_epilogue(g, ast); +} + +void codegen_if_stmt(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lelse%d\n", label); + codegen_stmt(g, ast->node_then); + printf(" jmp .Lend%d\n", label); + printf(".Lelse%d:\n", label); + if (ast->node_else) { + codegen_stmt(g, ast->node_else); + } + printf(".Lend%d:\n", label); +} + +void codegen_for_stmt(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + if (ast->node_init) { + codegen_expr(g, ast->node_init, GenMode_rval); + printf(" pop rax\n"); + } + printf(".Lbegin%d:\n", label); + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lend%d\n", label); + codegen_stmt(g, ast->node_body); + printf(".Lcontinue%d:\n", label); + if (ast->node_update) { + codegen_expr(g, ast->node_update, GenMode_rval); + printf(" pop rax\n"); + } + printf(" jmp .Lbegin%d\n", label); + printf(".Lend%d:\n", label); + + --g->loop_labels; +} + +void codegen_do_while_stmt(CodeGen* g, AstNode* ast) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + printf(".Lbegin%d:\n", label); + codegen_stmt(g, ast->node_body); + printf(".Lcontinue%d:\n", label); + codegen_expr(g, ast->node_cond, GenMode_rval); + printf(" pop rax\n"); + printf(" cmp rax, 0\n"); + printf(" je .Lend%d\n", label); + printf(" jmp .Lbegin%d\n", label); + printf(".Lend%d:\n", label); + + --g->loop_labels; +} + +void codegen_break_stmt(CodeGen* g, AstNode* ast) { + int label = *g->loop_labels; + printf(" jmp .Lend%d\n", label); +} + +void codegen_continue_stmt(CodeGen* g, AstNode* ast) { + int label = *g->loop_labels; + printf(" jmp .Lcontinue%d\n", label); +} + +void codegen_expr_stmt(CodeGen* g, AstNode* ast) { + codegen_expr(g, ast->node_expr, GenMode_rval); + // TODO: the expression on the stack can be more than 8 bytes. + printf(" pop rax\n"); +} + +void codegen_var_decl(CodeGen* g, AstNode* ast) { +} + +void codegen_nop(CodeGen* g, AstNode* ast) { +} + +void codegen_block_stmt(CodeGen* g, AstNode* ast) { + for (int i = 0; i < ast->node_len; ++i) { + AstNode* stmt = ast->node_items + i; + codegen_stmt(g, stmt); + } +} + +void codegen_stmt(CodeGen* g, AstNode* ast) { + if (ast->kind == AstNodeKind_list) { + codegen_block_stmt(g, ast); + } else if (ast->kind == AstNodeKind_return_stmt) { + codegen_return_stmt(g, ast); + } else if (ast->kind == AstNodeKind_if_stmt) { + codegen_if_stmt(g, ast); + } else if (ast->kind == AstNodeKind_for_stmt) { + codegen_for_stmt(g, ast); + } else if (ast->kind == AstNodeKind_do_while_stmt) { + codegen_do_while_stmt(g, ast); + } else if (ast->kind == AstNodeKind_break_stmt) { + codegen_break_stmt(g, ast); + } else if (ast->kind == AstNodeKind_continue_stmt) { + codegen_continue_stmt(g, ast); + } else if (ast->kind == AstNodeKind_expr_stmt) { + codegen_expr_stmt(g, ast); + } else if (ast->kind == AstNodeKind_lvar_decl) { + codegen_var_decl(g, ast); + } else if (ast->kind == AstNodeKind_nop) { + codegen_nop(g, ast); + } else { + unreachable(); + } +} + +void codegen_func(CodeGen* g, AstNode* ast) { + g->current_func = ast; + printf("%s:\n", ast->name); + + codegen_func_prologue(g, ast); + codegen_stmt(g, ast->node_body); + if (strcmp(ast->name, "main") == 0) { + // C99: 5.1.2.2.3 + printf(" mov rax, 0\n"); + } + codegen_func_epilogue(g, ast); + + printf("\n"); + g->current_func = NULL; +} + +void codegen(Program* prog) { + CodeGen* g = codegen_new(); + + printf(".intel_syntax noprefix\n\n"); + + // For GNU ld: + // https://sourceware.org/binutils/docs/ld/Options.html + printf(".section .note.GNU-stack,\"\",@progbits\n\n"); + + printf(".section .rodata\n\n"); + for (int i = 0; prog->str_literals[i]; ++i) { + printf(".Lstr__%d:\n", i + 1); + printf(" .string \"%s\"\n\n", prog->str_literals[i]); + } + + printf(".data\n\n"); + for (int i = 0; i < prog->vars->node_len; ++i) { + AstNode* var = prog->vars->node_items + i; + if (var->node_expr) { + if (var->ty->kind == TypeKind_char) + printf(" %s: .byte %d\n", var->name, var->node_expr->node_int_value); + else if (var->ty->kind == TypeKind_short) + printf(" %s: .word %d\n", var->name, var->node_expr->node_int_value); + else if (var->ty->kind == TypeKind_int) + printf(" %s: .int %d\n", var->name, var->node_expr->node_int_value); + else + unimplemented(); + } else { + printf(" %s: .zero %d\n", var->name, type_sizeof(var->ty)); + } + } + + printf(".globl main\n\n"); + + printf(".text\n\n"); + for (int i = 0; i < prog->funcs->node_len; ++i) { + AstNode* func = prog->funcs->node_items + i; + codegen_func(g, func); + } +} |
