diff options
Diffstat (limited to 'codegen.c')
| -rw-r--r-- | codegen.c | 595 |
1 files changed, 0 insertions, 595 deletions
diff --git a/codegen.c b/codegen.c deleted file mode 100644 index a1167d4..0000000 --- a/codegen.c +++ /dev/null @@ -1,595 +0,0 @@ -enum GenMode { - GenMode_lval, - GenMode_rval, -}; -typedef enum GenMode GenMode; - -struct CodeGen { - int next_label; - int* loop_labels; - AstNode* current_func; -}; -typedef struct CodeGen CodeGen; - -CodeGen* codegen_new() { - CodeGen* g = calloc(1, sizeof(CodeGen)); - g->next_label = 1; - g->loop_labels = calloc(1024, sizeof(int)); - return g; -} - -int codegen_new_label(CodeGen* g) { - int new_label = g->next_label; - ++g->next_label; - return new_label; -} - -void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); -void codegen_stmt(CodeGen* g, AstNode* ast); - -const char* param_reg(int n) { - if (n == 0) { - return "rdi"; - } else if (n == 1) { - return "rsi"; - } else if (n == 2) { - return "rdx"; - } else if (n == 3) { - return "rcx"; - } else if (n == 4) { - return "r8"; - } else if (n == 5) { - return "r9"; - } else { - unreachable(); - } -} - -void codegen_func_prologue(CodeGen* g, AstNode* ast) { - printf(" push rbp\n"); - printf(" mov rbp, rsp\n"); - for (int i = 0; i < ast->node_params->node_len; ++i) { - printf(" push %s\n", param_reg(i)); - } - printf(" sub rsp, %d\n", ast->node_stack_size); -} - -void codegen_func_epilogue(CodeGen* g, AstNode* ast) { - printf(" mov rsp, rbp\n"); - printf(" pop rbp\n"); - printf(" ret\n"); -} - -void codegen_int_expr(CodeGen* g, AstNode* ast) { - printf(" push %d\n", ast->node_int_value); -} - -void codegen_str_expr(CodeGen* g, AstNode* ast) { - printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx); - printf(" push rax\n"); -} - -void codegen_unary_expr(CodeGen* g, AstNode* ast) { - codegen_expr(g, ast->node_operand, GenMode_rval); - if (ast->node_op == TokenKind_not) { - printf(" pop rax\n"); - printf(" mov rdi, 0\n"); - printf(" cmp rax, rdi\n"); - printf(" sete al\n"); - printf(" movzb rax, al\n"); - printf(" push rax\n"); - } else { - unreachable(); - } -} - -void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - codegen_expr(g, ast->node_operand, GenMode_lval); -} - -// "reg" stores the address of the expression to be pushed. -void codegen_push_expr(const char* reg, int size) { - if (size == 1) { - printf(" movsx %s, BYTE PTR [%s]\n", reg, reg); - printf(" push %s\n", reg); - } else if (size == 2) { - printf(" movsx %s, WORD PTR [%s]\n", reg, reg); - printf(" push %s\n", reg); - } else if (size == 4) { - printf(" movsxd %s, DWORD PTR [%s]\n", reg, reg); - printf(" push %s\n", reg); - } else if (size == 8) { - printf(" mov %s, [%s]\n", reg, reg); - printf(" push %s\n", reg); - } else { - // Perform bitwise copy. Use r10 register as temporary space. - // Note: rsp must be aligned to 8. - printf(" sub rsp, %d\n", to_aligned(size, 8)); - for (int i = 0; i < size; ++i) { - // Copy a sinle byte from the address that "reg" points to to the stack via r10 register. - printf(" mov r10b, [%s+%d]\n", reg, i); - printf(" mov [rsp+%d], r10b\n", i); - } - } -} - -void codegen_lval2rval(Type* ty) { - if (ty->kind == TypeKind_array) { - return; - } - - printf(" pop rax\n"); - codegen_push_expr("rax", type_sizeof(ty)); -} - -void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - codegen_expr(g, ast->node_operand, GenMode_rval); - if (gen_mode == GenMode_rval) { - codegen_lval2rval(ast->node_operand->ty->base); - } -} - -void codegen_logical_expr(CodeGen* g, AstNode* ast) { - int label = codegen_new_label(g); - - if (ast->node_op == TokenKind_andand) { - codegen_expr(g, ast->node_lhs, GenMode_rval); - printf(" pop rax\n"); - printf(" cmp rax, 0\n"); - printf(" je .Lelse%d\n", label); - codegen_expr(g, ast->node_rhs, GenMode_rval); - printf(" jmp .Lend%d\n", label); - printf(".Lelse%d:\n", label); - printf(" push 0\n"); - printf(".Lend%d:\n", label); - } else { - codegen_expr(g, ast->node_lhs, GenMode_rval); - printf(" pop rax\n"); - printf(" cmp rax, 0\n"); - printf(" je .Lelse%d\n", label); - printf(" push 1\n"); - printf(" jmp .Lend%d\n", label); - printf(".Lelse%d:\n", label); - codegen_expr(g, ast->node_rhs, GenMode_rval); - printf(".Lend%d:\n", label); - } -} - -void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - codegen_expr(g, ast->node_lhs, gen_mode); - codegen_expr(g, ast->node_rhs, gen_mode); - printf(" pop rdi\n"); - printf(" pop rax\n"); - if (ast->node_op == TokenKind_plus) { - printf(" add rax, rdi\n"); - } else if (ast->node_op == TokenKind_minus) { - printf(" sub rax, rdi\n"); - } else if (ast->node_op == TokenKind_star) { - printf(" imul rax, rdi\n"); - } else if (ast->node_op == TokenKind_slash) { - printf(" cqo\n"); - printf(" idiv rdi\n"); - } else if (ast->node_op == TokenKind_percent) { - printf(" cqo\n"); - printf(" idiv rdi\n"); - printf(" mov rax, rdx\n"); - } else if (ast->node_op == TokenKind_or) { - printf(" or rax, rdi\n"); - } else if (ast->node_op == TokenKind_lshift) { - printf(" mov rcx, rdi\n"); - printf(" shl rax, cl\n"); - } else if (ast->node_op == TokenKind_rshift) { - // TODO: check if the operand is signed or unsigned - printf(" mov rcx, rdi\n"); - printf(" sar rax, cl\n"); - } else if (ast->node_op == TokenKind_eq) { - printf(" cmp rax, rdi\n"); - printf(" sete al\n"); - printf(" movzb rax, al\n"); - } else if (ast->node_op == TokenKind_ne) { - printf(" cmp rax, rdi\n"); - printf(" setne al\n"); - printf(" movzb rax, al\n"); - } else if (ast->node_op == TokenKind_lt) { - printf(" cmp rax, rdi\n"); - printf(" setl al\n"); - printf(" movzb rax, al\n"); - } else if (ast->node_op == TokenKind_le) { - printf(" cmp rax, rdi\n"); - printf(" setle al\n"); - printf(" movzb rax, al\n"); - } else { - unreachable(); - } - printf(" push rax\n"); -} - -void codegen_cond_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - int label = codegen_new_label(g); - - codegen_expr(g, ast->node_cond, GenMode_rval); - printf(" pop rax\n"); - printf(" cmp rax, 0\n"); - printf(" je .Lelse%d\n", label); - codegen_expr(g, ast->node_then, gen_mode); - printf(" jmp .Lend%d\n", label); - printf(".Lelse%d:\n", label); - codegen_expr(g, ast->node_else, gen_mode); - printf(".Lend%d:\n", label); -} - -void codegen_assign_expr_helper(CodeGen* g, AstNode* ast) { - if (ast->node_op == TokenKind_assign) { - return; - } - - printf(" pop rdi\n"); - printf(" push [rsp]\n"); - codegen_lval2rval(ast->node_lhs->ty); - printf(" pop rax\n"); - - if (ast->node_op == TokenKind_assign_add) { - printf(" add rax, rdi\n"); - } else if (ast->node_op == TokenKind_assign_sub) { - printf(" sub rax, rdi\n"); - } else if (ast->node_op == TokenKind_assign_mul) { - printf(" imul rax, rdi\n"); - } else if (ast->node_op == TokenKind_assign_div) { - printf(" cqo\n"); - printf(" idiv rdi\n"); - } else if (ast->node_op == TokenKind_assign_mod) { - printf(" cqo\n"); - printf(" idiv rdi\n"); - printf(" mov rax, rdx\n"); - } else { - unreachable(); - } - - printf(" push rax\n"); -} - -void codegen_assign_expr(CodeGen* g, AstNode* ast) { - int sizeof_lhs = type_sizeof(ast->node_lhs->ty); - int sizeof_rhs = type_sizeof(ast->node_rhs->ty); - - codegen_expr(g, ast->node_lhs, GenMode_lval); - codegen_expr(g, ast->node_rhs, GenMode_rval); - codegen_assign_expr_helper(g, ast); - if (sizeof_lhs == 1) { - printf(" pop rdi\n"); - printf(" pop rax\n"); - printf(" mov BYTE PTR [rax], dil\n"); - printf(" push rdi\n"); - } else if (sizeof_lhs == 2) { - printf(" pop rdi\n"); - printf(" pop rax\n"); - printf(" mov WORD PTR [rax], di\n"); - printf(" push rdi\n"); - } else if (sizeof_lhs == 4) { - printf(" pop rdi\n"); - printf(" pop rax\n"); - printf(" mov DWORD PTR [rax], edi\n"); - printf(" push rdi\n"); - } else if (sizeof_lhs == 8) { - printf(" pop rdi\n"); - printf(" pop rax\n"); - printf(" mov [rax], rdi\n"); - printf(" push rdi\n"); - } else { - if (ast->node_op != TokenKind_assign) { - unimplemented(); - } - // Note: rsp must be aligned to 8. - int aligned_size = to_aligned(sizeof_lhs, 8); - printf(" mov rax, [rsp+%d]\n", aligned_size); - // Perform bitwise copy. Use r10 register as temporary space. - for (int i = 0; i < aligned_size; ++i) { - // Copy a sinle byte from the stack to the address that rax points to via r10 register. - printf(" mov r10b, [rsp+%d]\n", i); - printf(" mov [rax+%d], r10b\n", i); - } - // Pop the RHS value and the LHS address. - printf(" add rsp, %d\n", aligned_size + 8); - // TODO: dummy - printf(" push 0\n"); - } -} - -void codegen_func_call(CodeGen* g, AstNode* ast) { - const char* func_name = ast->name; - - if (strcmp(func_name, "__ducc_va_start") == 0) { - int stack_size = g->current_func->node_stack_size; - printf(" # __ducc_va_start BEGIN\n"); - for (int i = 0; i < 6; ++i) { - printf(" mov rax, %s\n", param_reg(i)); - printf(" mov [rbp-%d], rax\n", 8 + (stack_size - 4 - i) * 8); - } - AstNode* va_list_args = ast->node_args->node_items; - codegen_expr(g, va_list_args, GenMode_lval); - printf(" pop rdi\n"); - printf(" mov rax, rbp\n"); - printf(" sub rax, %d\n", 8 + (stack_size - 1) * 8); - printf(" mov [rdi], rax\n"); - printf(" mov DWORD PTR [rax], 8\n"); - printf(" mov DWORD PTR [rax+4], 0\n"); - printf(" mov QWORD PTR [rax+8], 0\n"); - printf(" mov rdi, rbp\n"); - printf(" sub rdi, %d\n", 8 + (stack_size - 4) * 8); - printf(" mov QWORD PTR [rax+16], rdi\n"); - printf(" # __ducc_va_start END\n"); - return; - } - - AstNode* args = ast->node_args; - for (int i = 0; i < args->node_len; ++i) { - AstNode* arg = args->node_items + i; - codegen_expr(g, arg, GenMode_rval); - } - for (int i = args->node_len - 1; i >= 0; --i) { - printf(" pop %s\n", param_reg(i)); - } - - int label = codegen_new_label(g); - - printf(" mov rax, rsp\n"); - printf(" and rax, 15\n"); - printf(" cmp rax, 0\n"); - printf(" je .Laligned%d\n", label); - - printf(" mov rax, 0\n"); - printf(" sub rsp, 8\n"); - printf(" call %s\n", func_name); - printf(" add rsp, 8\n"); - printf(" push rax\n"); - - printf(" jmp .Lend%d\n", label); - printf(".Laligned%d:\n", label); - - printf(" mov rax, 0\n"); - printf(" call %s\n", func_name); - printf(" push rax\n"); - - printf(".Lend%d:\n", label); -} - -void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) { - printf(" mov rax, rbp\n"); - printf(" sub rax, %d\n", ast->node_stack_offset); - printf(" push rax\n"); - if (gen_mode == GenMode_rval) { - codegen_lval2rval(ast->ty); - } -} - -void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) { - printf(" lea rax, %s[rip]\n", ast->name); - printf(" push rax\n"); - if (gen_mode == GenMode_rval) { - codegen_lval2rval(ast->ty); - } -} - -void codegen_composite_expr(CodeGen* g, AstNode* ast) { - // Standard C does not have composite expression, but ducc internally has. - for (int i = 0; i < ast->node_len; ++i) { - AstNode* expr = ast->node_items + i; - codegen_expr(g, expr, GenMode_rval); - if (i != ast->node_len - 1) { - // TODO: the expression on the stack can be more than 8 bytes. - printf(" pop rax\n"); - } - } -} - -void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - if (ast->kind == AstNodeKind_int_expr) { - codegen_int_expr(g, ast); - } else if (ast->kind == AstNodeKind_str_expr) { - codegen_str_expr(g, ast); - } else if (ast->kind == AstNodeKind_unary_expr) { - codegen_unary_expr(g, ast); - } else if (ast->kind == AstNodeKind_ref_expr) { - codegen_ref_expr(g, ast, gen_mode); - } else if (ast->kind == AstNodeKind_deref_expr) { - codegen_deref_expr(g, ast, gen_mode); - } else if (ast->kind == AstNodeKind_binary_expr) { - codegen_binary_expr(g, ast, gen_mode); - } else if (ast->kind == AstNodeKind_cond_expr) { - codegen_cond_expr(g, ast, gen_mode); - } else if (ast->kind == AstNodeKind_logical_expr) { - codegen_logical_expr(g, ast); - } else if (ast->kind == AstNodeKind_assign_expr) { - codegen_assign_expr(g, ast); - } else if (ast->kind == AstNodeKind_func_call) { - codegen_func_call(g, ast); - } else if (ast->kind == AstNodeKind_lvar) { - codegen_lvar(g, ast, gen_mode); - } else if (ast->kind == AstNodeKind_gvar) { - codegen_gvar(g, ast, gen_mode); - } else if (ast->kind == AstNodeKind_list) { - codegen_composite_expr(g, ast); - } else { - unreachable(); - } -} - -void codegen_return_stmt(CodeGen* g, AstNode* ast) { - if (ast->node_expr) { - codegen_expr(g, ast->node_expr, GenMode_rval); - printf(" pop rax\n"); - } - codegen_func_epilogue(g, ast); -} - -void codegen_if_stmt(CodeGen* g, AstNode* ast) { - int label = codegen_new_label(g); - - codegen_expr(g, ast->node_cond, GenMode_rval); - printf(" pop rax\n"); - printf(" cmp rax, 0\n"); - printf(" je .Lelse%d\n", label); - codegen_stmt(g, ast->node_then); - printf(" jmp .Lend%d\n", label); - printf(".Lelse%d:\n", label); - if (ast->node_else) { - codegen_stmt(g, ast->node_else); - } - printf(".Lend%d:\n", label); -} - -void codegen_for_stmt(CodeGen* g, AstNode* ast) { - int label = codegen_new_label(g); - ++g->loop_labels; - *g->loop_labels = label; - - if (ast->node_init) { - codegen_expr(g, ast->node_init, GenMode_rval); - printf(" pop rax\n"); - } - printf(".Lbegin%d:\n", label); - codegen_expr(g, ast->node_cond, GenMode_rval); - printf(" pop rax\n"); - printf(" cmp rax, 0\n"); - printf(" je .Lend%d\n", label); - codegen_stmt(g, ast->node_body); - printf(".Lcontinue%d:\n", label); - if (ast->node_update) { - codegen_expr(g, ast->node_update, GenMode_rval); - printf(" pop rax\n"); - } - printf(" jmp .Lbegin%d\n", label); - printf(".Lend%d:\n", label); - - --g->loop_labels; -} - -void codegen_do_while_stmt(CodeGen* g, AstNode* ast) { - int label = codegen_new_label(g); - ++g->loop_labels; - *g->loop_labels = label; - - printf(".Lbegin%d:\n", label); - codegen_stmt(g, ast->node_body); - printf(".Lcontinue%d:\n", label); - codegen_expr(g, ast->node_cond, GenMode_rval); - printf(" pop rax\n"); - printf(" cmp rax, 0\n"); - printf(" je .Lend%d\n", label); - printf(" jmp .Lbegin%d\n", label); - printf(".Lend%d:\n", label); - - --g->loop_labels; -} - -void codegen_break_stmt(CodeGen* g, AstNode* ast) { - int label = *g->loop_labels; - printf(" jmp .Lend%d\n", label); -} - -void codegen_continue_stmt(CodeGen* g, AstNode* ast) { - int label = *g->loop_labels; - printf(" jmp .Lcontinue%d\n", label); -} - -void codegen_expr_stmt(CodeGen* g, AstNode* ast) { - codegen_expr(g, ast->node_expr, GenMode_rval); - // TODO: the expression on the stack can be more than 8 bytes. - printf(" pop rax\n"); -} - -void codegen_var_decl(CodeGen* g, AstNode* ast) { -} - -void codegen_nop(CodeGen* g, AstNode* ast) { -} - -void codegen_block_stmt(CodeGen* g, AstNode* ast) { - for (int i = 0; i < ast->node_len; ++i) { - AstNode* stmt = ast->node_items + i; - codegen_stmt(g, stmt); - } -} - -void codegen_stmt(CodeGen* g, AstNode* ast) { - if (ast->kind == AstNodeKind_list) { - codegen_block_stmt(g, ast); - } else if (ast->kind == AstNodeKind_return_stmt) { - codegen_return_stmt(g, ast); - } else if (ast->kind == AstNodeKind_if_stmt) { - codegen_if_stmt(g, ast); - } else if (ast->kind == AstNodeKind_for_stmt) { - codegen_for_stmt(g, ast); - } else if (ast->kind == AstNodeKind_do_while_stmt) { - codegen_do_while_stmt(g, ast); - } else if (ast->kind == AstNodeKind_break_stmt) { - codegen_break_stmt(g, ast); - } else if (ast->kind == AstNodeKind_continue_stmt) { - codegen_continue_stmt(g, ast); - } else if (ast->kind == AstNodeKind_expr_stmt) { - codegen_expr_stmt(g, ast); - } else if (ast->kind == AstNodeKind_lvar_decl) { - codegen_var_decl(g, ast); - } else if (ast->kind == AstNodeKind_nop) { - codegen_nop(g, ast); - } else { - unreachable(); - } -} - -void codegen_func(CodeGen* g, AstNode* ast) { - g->current_func = ast; - printf("%s:\n", ast->name); - - codegen_func_prologue(g, ast); - codegen_stmt(g, ast->node_body); - if (strcmp(ast->name, "main") == 0) { - // C99: 5.1.2.2.3 - printf(" mov rax, 0\n"); - } - codegen_func_epilogue(g, ast); - - printf("\n"); - g->current_func = NULL; -} - -void codegen(Program* prog) { - CodeGen* g = codegen_new(); - - printf(".intel_syntax noprefix\n\n"); - - // For GNU ld: - // https://sourceware.org/binutils/docs/ld/Options.html - printf(".section .note.GNU-stack,\"\",@progbits\n\n"); - - printf(".section .rodata\n\n"); - for (int i = 0; prog->str_literals[i]; ++i) { - printf(".Lstr__%d:\n", i + 1); - printf(" .string \"%s\"\n\n", prog->str_literals[i]); - } - - printf(".data\n\n"); - for (int i = 0; i < prog->vars->node_len; ++i) { - AstNode* var = prog->vars->node_items + i; - if (var->node_expr) { - if (var->ty->kind == TypeKind_char) - printf(" %s: .byte %d\n", var->name, var->node_expr->node_int_value); - else if (var->ty->kind == TypeKind_short) - printf(" %s: .word %d\n", var->name, var->node_expr->node_int_value); - else if (var->ty->kind == TypeKind_int) - printf(" %s: .int %d\n", var->name, var->node_expr->node_int_value); - else - unimplemented(); - } else { - printf(" %s: .zero %d\n", var->name, type_sizeof(var->ty)); - } - } - - printf(".globl main\n\n"); - - printf(".text\n\n"); - for (int i = 0; i < prog->funcs->node_len; ++i) { - AstNode* func = prog->funcs->node_items + i; - codegen_func(g, func); - } -} |
