aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/codegen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/codegen.c')
-rw-r--r--src/codegen.c595
1 files changed, 595 insertions, 0 deletions
diff --git a/src/codegen.c b/src/codegen.c
new file mode 100644
index 0000000..a1167d4
--- /dev/null
+++ b/src/codegen.c
@@ -0,0 +1,595 @@
+enum GenMode {
+ GenMode_lval,
+ GenMode_rval,
+};
+typedef enum GenMode GenMode;
+
+struct CodeGen {
+ int next_label;
+ int* loop_labels;
+ AstNode* current_func;
+};
+typedef struct CodeGen CodeGen;
+
+CodeGen* codegen_new() {
+ CodeGen* g = calloc(1, sizeof(CodeGen));
+ g->next_label = 1;
+ g->loop_labels = calloc(1024, sizeof(int));
+ return g;
+}
+
+int codegen_new_label(CodeGen* g) {
+ int new_label = g->next_label;
+ ++g->next_label;
+ return new_label;
+}
+
+void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode);
+void codegen_stmt(CodeGen* g, AstNode* ast);
+
+const char* param_reg(int n) {
+ if (n == 0) {
+ return "rdi";
+ } else if (n == 1) {
+ return "rsi";
+ } else if (n == 2) {
+ return "rdx";
+ } else if (n == 3) {
+ return "rcx";
+ } else if (n == 4) {
+ return "r8";
+ } else if (n == 5) {
+ return "r9";
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_func_prologue(CodeGen* g, AstNode* ast) {
+ printf(" push rbp\n");
+ printf(" mov rbp, rsp\n");
+ for (int i = 0; i < ast->node_params->node_len; ++i) {
+ printf(" push %s\n", param_reg(i));
+ }
+ printf(" sub rsp, %d\n", ast->node_stack_size);
+}
+
+void codegen_func_epilogue(CodeGen* g, AstNode* ast) {
+ printf(" mov rsp, rbp\n");
+ printf(" pop rbp\n");
+ printf(" ret\n");
+}
+
+void codegen_int_expr(CodeGen* g, AstNode* ast) {
+ printf(" push %d\n", ast->node_int_value);
+}
+
+void codegen_str_expr(CodeGen* g, AstNode* ast) {
+ printf(" mov rax, OFFSET FLAG:.Lstr__%d\n", ast->node_idx);
+ printf(" push rax\n");
+}
+
+void codegen_unary_expr(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_operand, GenMode_rval);
+ if (ast->node_op == TokenKind_not) {
+ printf(" pop rax\n");
+ printf(" mov rdi, 0\n");
+ printf(" cmp rax, rdi\n");
+ printf(" sete al\n");
+ printf(" movzb rax, al\n");
+ printf(" push rax\n");
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_ref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_operand, GenMode_lval);
+}
+
+// "reg" stores the address of the expression to be pushed.
+void codegen_push_expr(const char* reg, int size) {
+ if (size == 1) {
+ printf(" movsx %s, BYTE PTR [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else if (size == 2) {
+ printf(" movsx %s, WORD PTR [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else if (size == 4) {
+ printf(" movsxd %s, DWORD PTR [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else if (size == 8) {
+ printf(" mov %s, [%s]\n", reg, reg);
+ printf(" push %s\n", reg);
+ } else {
+ // Perform bitwise copy. Use r10 register as temporary space.
+ // Note: rsp must be aligned to 8.
+ printf(" sub rsp, %d\n", to_aligned(size, 8));
+ for (int i = 0; i < size; ++i) {
+ // Copy a sinle byte from the address that "reg" points to to the stack via r10 register.
+ printf(" mov r10b, [%s+%d]\n", reg, i);
+ printf(" mov [rsp+%d], r10b\n", i);
+ }
+ }
+}
+
+void codegen_lval2rval(Type* ty) {
+ if (ty->kind == TypeKind_array) {
+ return;
+ }
+
+ printf(" pop rax\n");
+ codegen_push_expr("rax", type_sizeof(ty));
+}
+
+void codegen_deref_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_operand, GenMode_rval);
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->node_operand->ty->base);
+ }
+}
+
+void codegen_logical_expr(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+
+ if (ast->node_op == TokenKind_andand) {
+ codegen_expr(g, ast->node_lhs, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ printf(" push 0\n");
+ printf(".Lend%d:\n", label);
+ } else {
+ codegen_expr(g, ast->node_lhs, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ printf(" push 1\n");
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ printf(".Lend%d:\n", label);
+ }
+}
+
+void codegen_binary_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ codegen_expr(g, ast->node_lhs, gen_mode);
+ codegen_expr(g, ast->node_rhs, gen_mode);
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ if (ast->node_op == TokenKind_plus) {
+ printf(" add rax, rdi\n");
+ } else if (ast->node_op == TokenKind_minus) {
+ printf(" sub rax, rdi\n");
+ } else if (ast->node_op == TokenKind_star) {
+ printf(" imul rax, rdi\n");
+ } else if (ast->node_op == TokenKind_slash) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ } else if (ast->node_op == TokenKind_percent) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ printf(" mov rax, rdx\n");
+ } else if (ast->node_op == TokenKind_or) {
+ printf(" or rax, rdi\n");
+ } else if (ast->node_op == TokenKind_lshift) {
+ printf(" mov rcx, rdi\n");
+ printf(" shl rax, cl\n");
+ } else if (ast->node_op == TokenKind_rshift) {
+ // TODO: check if the operand is signed or unsigned
+ printf(" mov rcx, rdi\n");
+ printf(" sar rax, cl\n");
+ } else if (ast->node_op == TokenKind_eq) {
+ printf(" cmp rax, rdi\n");
+ printf(" sete al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_ne) {
+ printf(" cmp rax, rdi\n");
+ printf(" setne al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_lt) {
+ printf(" cmp rax, rdi\n");
+ printf(" setl al\n");
+ printf(" movzb rax, al\n");
+ } else if (ast->node_op == TokenKind_le) {
+ printf(" cmp rax, rdi\n");
+ printf(" setle al\n");
+ printf(" movzb rax, al\n");
+ } else {
+ unreachable();
+ }
+ printf(" push rax\n");
+}
+
+void codegen_cond_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_expr(g, ast->node_then, gen_mode);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ codegen_expr(g, ast->node_else, gen_mode);
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_assign_expr_helper(CodeGen* g, AstNode* ast) {
+ if (ast->node_op == TokenKind_assign) {
+ return;
+ }
+
+ printf(" pop rdi\n");
+ printf(" push [rsp]\n");
+ codegen_lval2rval(ast->node_lhs->ty);
+ printf(" pop rax\n");
+
+ if (ast->node_op == TokenKind_assign_add) {
+ printf(" add rax, rdi\n");
+ } else if (ast->node_op == TokenKind_assign_sub) {
+ printf(" sub rax, rdi\n");
+ } else if (ast->node_op == TokenKind_assign_mul) {
+ printf(" imul rax, rdi\n");
+ } else if (ast->node_op == TokenKind_assign_div) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ } else if (ast->node_op == TokenKind_assign_mod) {
+ printf(" cqo\n");
+ printf(" idiv rdi\n");
+ printf(" mov rax, rdx\n");
+ } else {
+ unreachable();
+ }
+
+ printf(" push rax\n");
+}
+
+void codegen_assign_expr(CodeGen* g, AstNode* ast) {
+ int sizeof_lhs = type_sizeof(ast->node_lhs->ty);
+ int sizeof_rhs = type_sizeof(ast->node_rhs->ty);
+
+ codegen_expr(g, ast->node_lhs, GenMode_lval);
+ codegen_expr(g, ast->node_rhs, GenMode_rval);
+ codegen_assign_expr_helper(g, ast);
+ if (sizeof_lhs == 1) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov BYTE PTR [rax], dil\n");
+ printf(" push rdi\n");
+ } else if (sizeof_lhs == 2) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov WORD PTR [rax], di\n");
+ printf(" push rdi\n");
+ } else if (sizeof_lhs == 4) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov DWORD PTR [rax], edi\n");
+ printf(" push rdi\n");
+ } else if (sizeof_lhs == 8) {
+ printf(" pop rdi\n");
+ printf(" pop rax\n");
+ printf(" mov [rax], rdi\n");
+ printf(" push rdi\n");
+ } else {
+ if (ast->node_op != TokenKind_assign) {
+ unimplemented();
+ }
+ // Note: rsp must be aligned to 8.
+ int aligned_size = to_aligned(sizeof_lhs, 8);
+ printf(" mov rax, [rsp+%d]\n", aligned_size);
+ // Perform bitwise copy. Use r10 register as temporary space.
+ for (int i = 0; i < aligned_size; ++i) {
+ // Copy a sinle byte from the stack to the address that rax points to via r10 register.
+ printf(" mov r10b, [rsp+%d]\n", i);
+ printf(" mov [rax+%d], r10b\n", i);
+ }
+ // Pop the RHS value and the LHS address.
+ printf(" add rsp, %d\n", aligned_size + 8);
+ // TODO: dummy
+ printf(" push 0\n");
+ }
+}
+
+void codegen_func_call(CodeGen* g, AstNode* ast) {
+ const char* func_name = ast->name;
+
+ if (strcmp(func_name, "__ducc_va_start") == 0) {
+ int stack_size = g->current_func->node_stack_size;
+ printf(" # __ducc_va_start BEGIN\n");
+ for (int i = 0; i < 6; ++i) {
+ printf(" mov rax, %s\n", param_reg(i));
+ printf(" mov [rbp-%d], rax\n", 8 + (stack_size - 4 - i) * 8);
+ }
+ AstNode* va_list_args = ast->node_args->node_items;
+ codegen_expr(g, va_list_args, GenMode_lval);
+ printf(" pop rdi\n");
+ printf(" mov rax, rbp\n");
+ printf(" sub rax, %d\n", 8 + (stack_size - 1) * 8);
+ printf(" mov [rdi], rax\n");
+ printf(" mov DWORD PTR [rax], 8\n");
+ printf(" mov DWORD PTR [rax+4], 0\n");
+ printf(" mov QWORD PTR [rax+8], 0\n");
+ printf(" mov rdi, rbp\n");
+ printf(" sub rdi, %d\n", 8 + (stack_size - 4) * 8);
+ printf(" mov QWORD PTR [rax+16], rdi\n");
+ printf(" # __ducc_va_start END\n");
+ return;
+ }
+
+ AstNode* args = ast->node_args;
+ for (int i = 0; i < args->node_len; ++i) {
+ AstNode* arg = args->node_items + i;
+ codegen_expr(g, arg, GenMode_rval);
+ }
+ for (int i = args->node_len - 1; i >= 0; --i) {
+ printf(" pop %s\n", param_reg(i));
+ }
+
+ int label = codegen_new_label(g);
+
+ printf(" mov rax, rsp\n");
+ printf(" and rax, 15\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Laligned%d\n", label);
+
+ printf(" mov rax, 0\n");
+ printf(" sub rsp, 8\n");
+ printf(" call %s\n", func_name);
+ printf(" add rsp, 8\n");
+ printf(" push rax\n");
+
+ printf(" jmp .Lend%d\n", label);
+ printf(".Laligned%d:\n", label);
+
+ printf(" mov rax, 0\n");
+ printf(" call %s\n", func_name);
+ printf(" push rax\n");
+
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_lvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ printf(" mov rax, rbp\n");
+ printf(" sub rax, %d\n", ast->node_stack_offset);
+ printf(" push rax\n");
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->ty);
+ }
+}
+
+void codegen_gvar(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ printf(" lea rax, %s[rip]\n", ast->name);
+ printf(" push rax\n");
+ if (gen_mode == GenMode_rval) {
+ codegen_lval2rval(ast->ty);
+ }
+}
+
+void codegen_composite_expr(CodeGen* g, AstNode* ast) {
+ // Standard C does not have composite expression, but ducc internally has.
+ for (int i = 0; i < ast->node_len; ++i) {
+ AstNode* expr = ast->node_items + i;
+ codegen_expr(g, expr, GenMode_rval);
+ if (i != ast->node_len - 1) {
+ // TODO: the expression on the stack can be more than 8 bytes.
+ printf(" pop rax\n");
+ }
+ }
+}
+
+void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) {
+ if (ast->kind == AstNodeKind_int_expr) {
+ codegen_int_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_str_expr) {
+ codegen_str_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_unary_expr) {
+ codegen_unary_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_ref_expr) {
+ codegen_ref_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_deref_expr) {
+ codegen_deref_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_binary_expr) {
+ codegen_binary_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_cond_expr) {
+ codegen_cond_expr(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_logical_expr) {
+ codegen_logical_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_assign_expr) {
+ codegen_assign_expr(g, ast);
+ } else if (ast->kind == AstNodeKind_func_call) {
+ codegen_func_call(g, ast);
+ } else if (ast->kind == AstNodeKind_lvar) {
+ codegen_lvar(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_gvar) {
+ codegen_gvar(g, ast, gen_mode);
+ } else if (ast->kind == AstNodeKind_list) {
+ codegen_composite_expr(g, ast);
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_return_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->node_expr) {
+ codegen_expr(g, ast->node_expr, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ codegen_func_epilogue(g, ast);
+}
+
+void codegen_if_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lelse%d\n", label);
+ codegen_stmt(g, ast->node_then);
+ printf(" jmp .Lend%d\n", label);
+ printf(".Lelse%d:\n", label);
+ if (ast->node_else) {
+ codegen_stmt(g, ast->node_else);
+ }
+ printf(".Lend%d:\n", label);
+}
+
+void codegen_for_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ if (ast->node_init) {
+ codegen_expr(g, ast->node_init, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ printf(".Lbegin%d:\n", label);
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lend%d\n", label);
+ codegen_stmt(g, ast->node_body);
+ printf(".Lcontinue%d:\n", label);
+ if (ast->node_update) {
+ codegen_expr(g, ast->node_update, GenMode_rval);
+ printf(" pop rax\n");
+ }
+ printf(" jmp .Lbegin%d\n", label);
+ printf(".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+void codegen_do_while_stmt(CodeGen* g, AstNode* ast) {
+ int label = codegen_new_label(g);
+ ++g->loop_labels;
+ *g->loop_labels = label;
+
+ printf(".Lbegin%d:\n", label);
+ codegen_stmt(g, ast->node_body);
+ printf(".Lcontinue%d:\n", label);
+ codegen_expr(g, ast->node_cond, GenMode_rval);
+ printf(" pop rax\n");
+ printf(" cmp rax, 0\n");
+ printf(" je .Lend%d\n", label);
+ printf(" jmp .Lbegin%d\n", label);
+ printf(".Lend%d:\n", label);
+
+ --g->loop_labels;
+}
+
+void codegen_break_stmt(CodeGen* g, AstNode* ast) {
+ int label = *g->loop_labels;
+ printf(" jmp .Lend%d\n", label);
+}
+
+void codegen_continue_stmt(CodeGen* g, AstNode* ast) {
+ int label = *g->loop_labels;
+ printf(" jmp .Lcontinue%d\n", label);
+}
+
+void codegen_expr_stmt(CodeGen* g, AstNode* ast) {
+ codegen_expr(g, ast->node_expr, GenMode_rval);
+ // TODO: the expression on the stack can be more than 8 bytes.
+ printf(" pop rax\n");
+}
+
+void codegen_var_decl(CodeGen* g, AstNode* ast) {
+}
+
+void codegen_nop(CodeGen* g, AstNode* ast) {
+}
+
+void codegen_block_stmt(CodeGen* g, AstNode* ast) {
+ for (int i = 0; i < ast->node_len; ++i) {
+ AstNode* stmt = ast->node_items + i;
+ codegen_stmt(g, stmt);
+ }
+}
+
+void codegen_stmt(CodeGen* g, AstNode* ast) {
+ if (ast->kind == AstNodeKind_list) {
+ codegen_block_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_return_stmt) {
+ codegen_return_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_if_stmt) {
+ codegen_if_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_for_stmt) {
+ codegen_for_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_do_while_stmt) {
+ codegen_do_while_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_break_stmt) {
+ codegen_break_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_continue_stmt) {
+ codegen_continue_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_expr_stmt) {
+ codegen_expr_stmt(g, ast);
+ } else if (ast->kind == AstNodeKind_lvar_decl) {
+ codegen_var_decl(g, ast);
+ } else if (ast->kind == AstNodeKind_nop) {
+ codegen_nop(g, ast);
+ } else {
+ unreachable();
+ }
+}
+
+void codegen_func(CodeGen* g, AstNode* ast) {
+ g->current_func = ast;
+ printf("%s:\n", ast->name);
+
+ codegen_func_prologue(g, ast);
+ codegen_stmt(g, ast->node_body);
+ if (strcmp(ast->name, "main") == 0) {
+ // C99: 5.1.2.2.3
+ printf(" mov rax, 0\n");
+ }
+ codegen_func_epilogue(g, ast);
+
+ printf("\n");
+ g->current_func = NULL;
+}
+
+void codegen(Program* prog) {
+ CodeGen* g = codegen_new();
+
+ printf(".intel_syntax noprefix\n\n");
+
+ // For GNU ld:
+ // https://sourceware.org/binutils/docs/ld/Options.html
+ printf(".section .note.GNU-stack,\"\",@progbits\n\n");
+
+ printf(".section .rodata\n\n");
+ for (int i = 0; prog->str_literals[i]; ++i) {
+ printf(".Lstr__%d:\n", i + 1);
+ printf(" .string \"%s\"\n\n", prog->str_literals[i]);
+ }
+
+ printf(".data\n\n");
+ for (int i = 0; i < prog->vars->node_len; ++i) {
+ AstNode* var = prog->vars->node_items + i;
+ if (var->node_expr) {
+ if (var->ty->kind == TypeKind_char)
+ printf(" %s: .byte %d\n", var->name, var->node_expr->node_int_value);
+ else if (var->ty->kind == TypeKind_short)
+ printf(" %s: .word %d\n", var->name, var->node_expr->node_int_value);
+ else if (var->ty->kind == TypeKind_int)
+ printf(" %s: .int %d\n", var->name, var->node_expr->node_int_value);
+ else
+ unimplemented();
+ } else {
+ printf(" %s: .zero %d\n", var->name, type_sizeof(var->ty));
+ }
+ }
+
+ printf(".globl main\n\n");
+
+ printf(".text\n\n");
+ for (int i = 0; i < prog->funcs->node_len; ++i) {
+ AstNode* func = prog->funcs->node_items + i;
+ codegen_func(g, func);
+ }
+}