From 3654ce578e6fff53950874adf7e0e4ae0a6eb956 Mon Sep 17 00:00:00 2001 From: nsfisis Date: Sun, 3 May 2026 17:29:12 +0900 Subject: refactor: organize directory structure --- Makefile | 32 +- src/ast.c | 787 ------------- src/ast.h | 433 ------- src/cc1/ast.c | 787 +++++++++++++ src/cc1/ast.h | 433 +++++++ src/cc1/codegen.c | 925 +++++++++++++++ src/cc1/codegen.h | 8 + src/cc1/codegen_wasm.c | 164 +++ src/cc1/codegen_wasm.h | 8 + src/cc1/fs.c | 27 + src/cc1/fs.h | 7 + src/cc1/io.c | 127 ++ src/cc1/io.h | 26 + src/cc1/parse.c | 3018 ++++++++++++++++++++++++++++++++++++++++++++++++ src/cc1/parse.h | 43 + src/cc1/preprocess.c | 1559 +++++++++++++++++++++++++ src/cc1/preprocess.h | 13 + src/cc1/sys.c | 21 + src/cc1/sys.h | 7 + src/cc1/token.c | 387 +++++++ src/cc1/token.h | 183 +++ src/cc1/tokenize.c | 597 ++++++++++ src/cc1/tokenize.h | 10 + src/cli.c | 133 --- src/cli.h | 24 - src/codegen.c | 925 --------------- src/codegen.h | 8 - src/codegen_wasm.c | 164 --- src/codegen_wasm.h | 8 - src/common.c | 88 -- src/common.h | 41 - src/ducc.h | 11 - src/ducc/cli.c | 133 +++ src/ducc/cli.h | 24 + src/ducc/main.c | 88 ++ src/ducc/version.h | 6 + src/fs.c | 27 - src/fs.h | 7 - src/io.c | 127 -- src/io.h | 26 - src/json.c | 112 -- src/json.h | 32 - src/lib/common.c | 88 ++ src/lib/common.h | 41 + src/lib/ducc.h | 11 + src/lib/json.c | 112 ++ src/lib/json.h | 32 + src/main.c | 88 -- src/parse.c | 3018 ------------------------------------------------ src/parse.h | 43 - src/preprocess.c | 1559 ------------------------- src/preprocess.h | 13 - src/sys.c | 21 - src/sys.h | 7 - src/token.c | 387 ------- src/token.h | 183 --- src/tokenize.c | 597 ---------- src/tokenize.h | 10 - src/version.h | 6 - 59 files changed, 8902 insertions(+), 8900 deletions(-) delete mode 100644 src/ast.c delete mode 100644 src/ast.h create mode 100644 src/cc1/ast.c create mode 100644 src/cc1/ast.h create mode 100644 src/cc1/codegen.c create mode 100644 src/cc1/codegen.h create mode 100644 src/cc1/codegen_wasm.c create mode 100644 src/cc1/codegen_wasm.h create mode 100644 src/cc1/fs.c create mode 100644 src/cc1/fs.h create mode 100644 src/cc1/io.c create mode 100644 src/cc1/io.h create mode 100644 src/cc1/parse.c create mode 100644 src/cc1/parse.h create mode 100644 src/cc1/preprocess.c create mode 100644 src/cc1/preprocess.h create mode 100644 src/cc1/sys.c create mode 100644 src/cc1/sys.h create mode 100644 src/cc1/token.c create mode 100644 src/cc1/token.h create mode 100644 src/cc1/tokenize.c create mode 100644 src/cc1/tokenize.h delete mode 100644 src/cli.c delete mode 100644 src/cli.h delete mode 100644 src/codegen.c delete mode 100644 src/codegen.h delete mode 100644 src/codegen_wasm.c delete mode 100644 src/codegen_wasm.h delete mode 100644 src/common.c delete mode 100644 src/common.h delete mode 100644 src/ducc.h create mode 100644 src/ducc/cli.c create mode 100644 src/ducc/cli.h create mode 100644 src/ducc/main.c create mode 100644 src/ducc/version.h delete mode 100644 src/fs.c delete mode 100644 src/fs.h delete mode 100644 src/io.c delete mode 100644 src/io.h delete mode 100644 src/json.c delete mode 100644 src/json.h create mode 100644 src/lib/common.c create mode 100644 src/lib/common.h create mode 100644 src/lib/ducc.h create mode 100644 src/lib/json.c create mode 100644 src/lib/json.h delete mode 100644 src/main.c delete mode 100644 src/parse.c delete mode 100644 src/parse.h delete mode 100644 src/preprocess.c delete mode 100644 src/preprocess.h delete mode 100644 src/sys.c delete mode 100644 src/sys.h delete mode 100644 src/token.c delete mode 100644 src/token.h delete mode 100644 src/tokenize.c delete mode 100644 src/tokenize.h delete mode 100644 src/version.h diff --git a/Makefile b/Makefile index c4eb6bb..c8c27cd 100644 --- a/Makefile +++ b/Makefile @@ -4,26 +4,28 @@ BUILD_ROOT_DIR := build BUILD_DIR := $(BUILD_ROOT_DIR)/.$(TARGET) OBJECTS := \ - $(BUILD_DIR)/main.o \ - $(BUILD_DIR)/ast.o \ - $(BUILD_DIR)/cli.o \ - $(BUILD_DIR)/codegen.o \ - $(BUILD_DIR)/codegen_wasm.o \ - $(BUILD_DIR)/common.o \ - $(BUILD_DIR)/fs.o \ - $(BUILD_DIR)/io.o \ - $(BUILD_DIR)/json.o \ - $(BUILD_DIR)/parse.o \ - $(BUILD_DIR)/preprocess.o \ - $(BUILD_DIR)/sys.o \ - $(BUILD_DIR)/token.o \ - $(BUILD_DIR)/tokenize.o + $(BUILD_DIR)/cc1/ast.o \ + $(BUILD_DIR)/cc1/codegen.o \ + $(BUILD_DIR)/cc1/codegen_wasm.o \ + $(BUILD_DIR)/cc1/fs.o \ + $(BUILD_DIR)/cc1/io.o \ + $(BUILD_DIR)/cc1/parse.o \ + $(BUILD_DIR)/cc1/preprocess.o \ + $(BUILD_DIR)/cc1/sys.o \ + $(BUILD_DIR)/cc1/token.o \ + $(BUILD_DIR)/cc1/tokenize.o \ + $(BUILD_DIR)/ducc/cli.o \ + $(BUILD_DIR)/ducc/main.o \ + $(BUILD_DIR)/lib/common.o \ + $(BUILD_DIR)/lib/json.o .PHONY: all all: $(BUILD_DIR) $(BUILD_ROOT_DIR)/$(TARGET) $(BUILD_DIR): - @mkdir -p $(BUILD_DIR) + @mkdir -p $(BUILD_DIR)/cc1 + @mkdir -p $(BUILD_DIR)/ducc + @mkdir -p $(BUILD_DIR)/lib # TODO: provide release build? # TODO: use -std=c23 instead of -std=gnu23 diff --git a/src/ast.c b/src/ast.c deleted file mode 100644 index 0dcec0d..0000000 --- a/src/ast.c +++ /dev/null @@ -1,787 +0,0 @@ -#include "ast.h" -#include "common.h" -#include "preprocess.h" - -const char* storageclass_stringify(StorageClass s) { - if (s == StorageClass_unspecified) - return ""; - if (s == StorageClass_auto) - return "auto"; - if (s == StorageClass_constexpr) - return "constexpr"; - if (s == StorageClass_extern) - return "extern"; - if (s == StorageClass_register) - return "register"; - if (s == StorageClass_static) - return "static"; - if (s == StorageClass_thread_local) - return "thread_local"; - if (s == StorageClass_typedef) - return "typedef"; - else - unreachable(); -} - -const char* type_kind_stringify(TypeKind k) { - if (k == TypeKind_unknown) - return ""; - else if (k == TypeKind_void) - return "void"; - else if (k == TypeKind_char) - return "char"; - else if (k == TypeKind_schar) - return "signed char"; - else if (k == TypeKind_uchar) - return "unsigned char"; - else if (k == TypeKind_short) - return "short"; - else if (k == TypeKind_ushort) - return "unsigned short"; - else if (k == TypeKind_int) - return "int"; - else if (k == TypeKind_uint) - return "unsigned int"; - else if (k == TypeKind_long) - return "long"; - else if (k == TypeKind_ulong) - return "unsigned long"; - else if (k == TypeKind_llong) - return "long long"; - else if (k == TypeKind_ullong) - return "unsigned long long"; - else if (k == TypeKind_float) - return "float"; - else if (k == TypeKind_double) - return "double"; - else if (k == TypeKind_ldouble) - return "long double"; - else if (k == TypeKind_bool) - return "bool"; - else if (k == TypeKind_struct) - return "struct"; - else if (k == TypeKind_union) - return "union"; - else if (k == TypeKind_enum) - return "enum"; - else if (k == TypeKind_ptr) - return ""; - else if (k == TypeKind_array) - return ""; - else if (k == TypeKind_func) - return ""; - else - unreachable(); -} - -static AstNode* members_of(Type* ty) { - AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; - if (def->kind == AstNodeKind_struct_def) - return def->as.struct_def->members; - else - return def->as.union_def->members; -} - -Type* type_new(TypeKind kind) { - Type* ty = calloc(1, sizeof(Type)); - ty->kind = kind; - return ty; -} - -Type* type_dup(Type* src) { - Type* ty = malloc(sizeof(Type)); - memcpy(ty, src, sizeof(Type)); - return ty; -} - -Type* type_new_ptr(Type* base) { - Type* ty = type_new(TypeKind_ptr); - ty->base = base; - return ty; -} - -Type* type_new_array(Type* base, int size) { - Type* ty = type_new(TypeKind_array); - ty->base = base; - ty->array_size = size; - return ty; -} - -Type* type_new_static_string(int len) { - return type_new_array(type_new(TypeKind_char), len + 1); -} - -Type* type_array_to_ptr(Type* ty) { - return type_new_ptr(ty->base); -} - -Type* type_new_func(Type* result, AstNode* params) { - Type* ty = type_new(TypeKind_func); - ty->result = result; - ty->params = params; - return ty; -} - -bool type_is_unsized(Type* ty) { - return ty->kind == TypeKind_void; -} - -bool type_is_unsigned(Type* ty) { - return ty->kind == TypeKind_uchar || ty->kind == TypeKind_ushort || ty->kind == TypeKind_uint || - ty->kind == TypeKind_ulong || ty->kind == TypeKind_ullong || ty->kind == TypeKind_bool; -} - -int type_sizeof(Type* ty) { - if (type_is_unsized(ty)) { - fatal_error("type_sizeof: type size cannot be determined"); - } - - if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar || - ty->kind == TypeKind_bool) - return 1; - else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort) - return 2; - else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float) - return 4; - else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong || - ty->kind == TypeKind_ullong || ty->kind == TypeKind_double) - return 8; - else if (ty->kind == TypeKind_struct) - return type_sizeof_struct(ty); - else if (ty->kind == TypeKind_union) - return type_sizeof_union(ty); - else if (ty->kind == TypeKind_enum) - return 4; - else if (ty->kind == TypeKind_ptr) - return 8; - else if (ty->kind == TypeKind_array) - return type_sizeof(ty->base) * ty->array_size; - else if (ty->kind == TypeKind_func) - return 8; - else - unreachable(); -} - -int type_alignof(Type* ty) { - if (type_is_unsized(ty)) { - fatal_error("type_alignof: type size cannot be determined"); - } - - if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar || - ty->kind == TypeKind_bool) - return 1; - else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort) - return 2; - else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float) - return 4; - else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong || - ty->kind == TypeKind_ullong || ty->kind == TypeKind_double) - return 8; - else if (ty->kind == TypeKind_struct) - return type_alignof_struct(ty); - else if (ty->kind == TypeKind_union) - return type_alignof_union(ty); - else if (ty->kind == TypeKind_enum) - return 4; - else if (ty->kind == TypeKind_ptr) - return 8; - else if (ty->kind == TypeKind_array) - return type_alignof(ty->base); - else if (ty->kind == TypeKind_func) - return 8; - else - unreachable(); -} - -int to_aligned(int n, int a) { - return (n + a - 1) / a * a; -} - -const char* astnode_kind_stringify(AstNodeKind k) { - switch (k) { - case AstNodeKind_unknown: - return "unknown"; - case AstNodeKind_nop: - return "nop"; - case AstNodeKind_array_initializer: - return "array_initializer"; - case AstNodeKind_assign_expr: - return "assign_expr"; - case AstNodeKind_binary_expr: - return "binary_expr"; - case AstNodeKind_break_stmt: - return "break_stmt"; - case AstNodeKind_case_label: - return "case_label"; - case AstNodeKind_cast_expr: - return "cast_expr"; - case AstNodeKind_cond_expr: - return "cond_expr"; - case AstNodeKind_continue_stmt: - return "continue_stmt"; - case AstNodeKind_default_label: - return "default_label"; - case AstNodeKind_deref_expr: - return "deref_expr"; - case AstNodeKind_do_while_stmt: - return "do_while_stmt"; - case AstNodeKind_enum_def: - return "enum_def"; - case AstNodeKind_enum_member: - return "enum_member"; - case AstNodeKind_expr_stmt: - return "expr_stmt"; - case AstNodeKind_for_stmt: - return "for_stmt"; - case AstNodeKind_func: - return "func"; - case AstNodeKind_func_call: - return "func_call"; - case AstNodeKind_func_decl: - return "func_decl"; - case AstNodeKind_func_def: - return "func_def"; - case AstNodeKind_goto_stmt: - return "goto_stmt"; - case AstNodeKind_gvar: - return "gvar"; - case AstNodeKind_gvar_decl: - return "gvar_decl"; - case AstNodeKind_if_stmt: - return "if_stmt"; - case AstNodeKind_int_expr: - return "int_expr"; - case AstNodeKind_double_expr: - return "double_expr"; - case AstNodeKind_label_stmt: - return "label_stmt"; - case AstNodeKind_list: - return "list"; - case AstNodeKind_logical_expr: - return "logical_expr"; - case AstNodeKind_lvar: - return "lvar"; - case AstNodeKind_lvar_decl: - return "lvar_decl"; - case AstNodeKind_param: - return "param"; - case AstNodeKind_ref_expr: - return "ref_expr"; - case AstNodeKind_return_stmt: - return "return_stmt"; - case AstNodeKind_str_expr: - return "str_expr"; - case AstNodeKind_struct_decl: - return "struct_decl"; - case AstNodeKind_struct_def: - return "struct_def"; - case AstNodeKind_struct_member: - return "struct_member"; - case AstNodeKind_switch_stmt: - return "switch_stmt"; - case AstNodeKind_type: - return "type"; - case AstNodeKind_typedef_decl: - return "typedef_decl"; - case AstNodeKind_unary_expr: - return "unary_expr"; - case AstNodeKind_union_decl: - return "union_decl"; - case AstNodeKind_union_def: - return "union_def"; - case AstNodeKind_declarator: - return "declarator"; - default: - unreachable(); - } -} - -AstNode* ast_new(AstNodeKind kind) { - AstNode* ast = calloc(1, sizeof(AstNode)); - ast->kind = kind; - return ast; -} - -AstNode* ast_new_list(int capacity) { - if (capacity == 0) - unreachable(); - AstNode* list = ast_new(AstNodeKind_list); - list->as.list = calloc(1, sizeof(ListNode)); - list->as.list->cap = capacity; - list->as.list->len = 0; - list->as.list->items = calloc(list->as.list->cap, sizeof(AstNode)); - return list; -} - -void ast_append(AstNode* list, AstNode* item) { - if (list->kind != AstNodeKind_list) { - fatal_error("ast_append: ast is not a list"); - } - if (!item) { - return; - } - if (list->as.list->cap <= list->as.list->len) { - list->as.list->cap *= 2; - list->as.list->items = realloc(list->as.list->items, sizeof(AstNode) * list->as.list->cap); - memset(list->as.list->items + list->as.list->len, 0, - sizeof(AstNode) * (list->as.list->cap - list->as.list->len)); - } - memcpy(list->as.list->items + list->as.list->len, item, sizeof(AstNode)); - ++list->as.list->len; -} - -AstNode* ast_new_int(int v) { - AstNode* e = ast_new(AstNodeKind_int_expr); - e->as.int_expr = calloc(1, sizeof(IntExprNode)); - e->as.int_expr->value = v; - e->ty = type_new(TypeKind_int); - return e; -} - -AstNode* ast_new_double(double v) { - AstNode* e = ast_new(AstNodeKind_double_expr); - e->as.double_expr = calloc(1, sizeof(DoubleExprNode)); - e->as.double_expr->value = v; - e->ty = type_new(TypeKind_double); - return e; -} - -AstNode* ast_new_unary_expr(int op, AstNode* operand) { - AstNode* e = ast_new(AstNodeKind_unary_expr); - e->as.unary_expr = calloc(1, sizeof(UnaryExprNode)); - e->as.unary_expr->op = op; - e->as.unary_expr->operand = operand; - e->ty = type_new(TypeKind_int); - return e; -} - -AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) { - AstNode* e = ast_new(AstNodeKind_binary_expr); - e->as.binary_expr = calloc(1, sizeof(BinaryExprNode)); - e->as.binary_expr->op = op; - e->as.binary_expr->lhs = lhs; - e->as.binary_expr->rhs = rhs; - if (op == TokenKind_plus) { - if (lhs->ty->kind == TypeKind_ptr) { - e->ty = lhs->ty; - } else if (lhs->ty->kind == TypeKind_array) { - e->ty = type_array_to_ptr(lhs->ty); - } else if (rhs->ty->kind == TypeKind_ptr) { - e->ty = rhs->ty; - } else if (rhs->ty->kind == TypeKind_array) { - e->ty = type_array_to_ptr(rhs->ty); - } else { - e->ty = type_new(TypeKind_int); - } - } else if (op == TokenKind_minus) { - if (lhs->ty->kind == TypeKind_ptr) { - e->ty = lhs->ty; - } else if (lhs->ty->kind == TypeKind_array) { - e->ty = type_array_to_ptr(lhs->ty); - } else { - e->ty = type_new(TypeKind_int); - } - } else { - e->ty = type_new(TypeKind_int); - } - return e; -} - -AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) { - AstNode* e = ast_new(AstNodeKind_assign_expr); - e->as.assign_expr = calloc(1, sizeof(AssignExprNode)); - e->as.assign_expr->op = op; - e->as.assign_expr->lhs = lhs; - e->as.assign_expr->rhs = rhs; - e->ty = lhs->ty; - return e; -} - -AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) { - if (lhs->ty->base) { - rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); - } else if (rhs->ty->base) { - lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))); - } - return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs); -} - -AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) { - if (lhs->ty->base) { - rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); - } - return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs); -} - -AstNode* ast_new_ref_expr(AstNode* operand) { - AstNode* e = ast_new(AstNodeKind_ref_expr); - e->as.ref_expr = calloc(1, sizeof(RefExprNode)); - e->as.ref_expr->operand = operand; - e->ty = type_new_ptr(operand->ty); - return e; -} - -AstNode* ast_new_deref_expr(AstNode* operand) { - AstNode* e = ast_new(AstNodeKind_deref_expr); - e->as.deref_expr = calloc(1, sizeof(DerefExprNode)); - e->as.deref_expr->operand = operand; - e->ty = operand->ty->base; - return e; -} - -AstNode* ast_new_member_access_expr(AstNode* obj, const char* name) { - AstNode* e = ast_new(AstNodeKind_deref_expr); - e->as.deref_expr = calloc(1, sizeof(DerefExprNode)); - e->as.deref_expr->operand = - ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->base, name))); - e->ty = type_member_typeof(obj->ty->base, name); - e->as.deref_expr->operand->ty = type_new_ptr(e->ty); - return e; -} - -AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty) { - AstNode* e = ast_new(AstNodeKind_cast_expr); - e->as.cast_expr = calloc(1, sizeof(CastExprNode)); - e->as.cast_expr->operand = operand; - e->ty = result_ty; - return e; -} - -AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs) { - AstNode* e = ast_new(AstNodeKind_logical_expr); - e->as.logical_expr = calloc(1, sizeof(LogicalExprNode)); - e->as.logical_expr->op = op; - e->as.logical_expr->lhs = lhs; - e->as.logical_expr->rhs = rhs; - e->ty = type_new(TypeKind_int); - return e; -} - -AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_) { - AstNode* e = ast_new(AstNodeKind_cond_expr); - e->as.cond_expr = calloc(1, sizeof(CondExprNode)); - e->as.cond_expr->cond = cond; - e->as.cond_expr->then = then; - e->as.cond_expr->else_ = else_; - e->ty = then->ty; - return e; -} - -AstNode* ast_new_str_expr(int idx, Type* ty) { - AstNode* e = ast_new(AstNodeKind_str_expr); - e->as.str_expr = calloc(1, sizeof(StrExprNode)); - e->as.str_expr->idx = idx; - e->ty = ty; - return e; -} - -AstNode* ast_new_func_call(AstNode* func, AstNode* args) { - AstNode* e = ast_new(AstNodeKind_func_call); - e->as.func_call = calloc(1, sizeof(FuncCallNode)); - e->as.func_call->func = func; - e->as.func_call->args = args; - return e; -} - -AstNode* ast_new_func(const char* name, Type* ty) { - AstNode* e = ast_new(AstNodeKind_func); - e->as.func = calloc(1, sizeof(FuncNode)); - e->as.func->name = name; - e->ty = ty; - return e; -} - -AstNode* ast_new_gvar(const char* name, Type* ty) { - AstNode* e = ast_new(AstNodeKind_gvar); - e->as.gvar = calloc(1, sizeof(GvarNode)); - e->as.gvar->name = name; - e->ty = ty; - return e; -} - -AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty) { - AstNode* e = ast_new(AstNodeKind_lvar); - e->as.lvar = calloc(1, sizeof(LvarNode)); - e->as.lvar->name = name; - e->as.lvar->stack_offset = stack_offset; - e->ty = ty; - return e; -} - -AstNode* ast_new_nop(void) { - return ast_new(AstNodeKind_nop); -} - -AstNode* ast_new_break_stmt(void) { - return ast_new(AstNodeKind_break_stmt); -} - -AstNode* ast_new_continue_stmt(void) { - return ast_new(AstNodeKind_continue_stmt); -} - -AstNode* ast_new_return_stmt(AstNode* expr) { - AstNode* e = ast_new(AstNodeKind_return_stmt); - e->as.return_stmt = calloc(1, sizeof(ReturnStmtNode)); - e->as.return_stmt->expr = expr; - return e; -} - -AstNode* ast_new_expr_stmt(AstNode* expr) { - AstNode* e = ast_new(AstNodeKind_expr_stmt); - e->as.expr_stmt = calloc(1, sizeof(ExprStmtNode)); - e->as.expr_stmt->expr = expr; - return e; -} - -AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_) { - AstNode* e = ast_new(AstNodeKind_if_stmt); - e->as.if_stmt = calloc(1, sizeof(IfStmtNode)); - e->as.if_stmt->cond = cond; - e->as.if_stmt->then = then; - e->as.if_stmt->else_ = else_; - return e; -} - -AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body) { - AstNode* e = ast_new(AstNodeKind_for_stmt); - e->as.for_stmt = calloc(1, sizeof(ForStmtNode)); - e->as.for_stmt->init = init; - e->as.for_stmt->cond = cond; - e->as.for_stmt->update = update; - e->as.for_stmt->body = body; - return e; -} - -AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body) { - AstNode* e = ast_new(AstNodeKind_do_while_stmt); - e->as.do_while_stmt = calloc(1, sizeof(DoWhileStmtNode)); - e->as.do_while_stmt->cond = cond; - e->as.do_while_stmt->body = body; - return e; -} - -AstNode* ast_new_switch_stmt(AstNode* expr) { - AstNode* e = ast_new(AstNodeKind_switch_stmt); - e->as.switch_stmt = calloc(1, sizeof(SwitchStmtNode)); - e->as.switch_stmt->expr = expr; - return e; -} - -AstNode* ast_new_case_label(int value, AstNode* body) { - AstNode* e = ast_new(AstNodeKind_case_label); - e->as.case_label = calloc(1, sizeof(CaseLabelNode)); - e->as.case_label->value = value; - e->as.case_label->body = body; - return e; -} - -AstNode* ast_new_default_label(AstNode* body) { - AstNode* e = ast_new(AstNodeKind_default_label); - e->as.default_label = calloc(1, sizeof(DefaultLabelNode)); - e->as.default_label->body = body; - return e; -} - -AstNode* ast_new_goto_stmt(const char* label) { - AstNode* e = ast_new(AstNodeKind_goto_stmt); - e->as.goto_stmt = calloc(1, sizeof(GotoStmtNode)); - e->as.goto_stmt->label = label; - return e; -} - -AstNode* ast_new_label_stmt(const char* name, AstNode* body) { - AstNode* e = ast_new(AstNodeKind_label_stmt); - e->as.label_stmt = calloc(1, sizeof(LabelStmtNode)); - e->as.label_stmt->name = name; - e->as.label_stmt->body = body; - return e; -} - -AstNode* ast_new_declarator(const char* name, Type* ty) { - AstNode* e = ast_new(AstNodeKind_declarator); - e->as.declarator = calloc(1, sizeof(DeclaratorNode)); - e->as.declarator->name = name; - e->ty = ty; - return e; -} - -AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size) { - AstNode* e = ast_new(AstNodeKind_func_def); - e->as.func_def = calloc(1, sizeof(FuncDefNode)); - e->as.func_def->name = name; - e->as.func_def->params = params; - e->as.func_def->body = body; - e->as.func_def->stack_size = stack_size; - e->ty = ty; - return e; -} - -AstNode* ast_new_enum_member(const char* name, int value) { - AstNode* e = ast_new(AstNodeKind_enum_member); - e->as.enum_member = calloc(1, sizeof(EnumMemberNode)); - e->as.enum_member->name = name; - e->as.enum_member->value = value; - return e; -} - -AstNode* ast_new_typedef_decl(const char* name, Type* ty) { - AstNode* e = ast_new(AstNodeKind_typedef_decl); - e->as.typedef_decl = calloc(1, sizeof(TypedefDeclNode)); - e->as.typedef_decl->name = name; - e->ty = ty; - return e; -} - -AstNode* ast_new_struct_def(const char* name) { - AstNode* e = ast_new(AstNodeKind_struct_def); - e->as.struct_def = calloc(1, sizeof(StructDefNode)); - e->as.struct_def->name = name; - return e; -} - -AstNode* ast_new_union_def(const char* name) { - AstNode* e = ast_new(AstNodeKind_union_def); - e->as.union_def = calloc(1, sizeof(UnionDefNode)); - e->as.union_def->name = name; - return e; -} - -AstNode* ast_new_enum_def(const char* name) { - AstNode* e = ast_new(AstNodeKind_enum_def); - e->as.enum_def = calloc(1, sizeof(EnumDefNode)); - e->as.enum_def->name = name; - return e; -} - -AstNode* ast_new_array_initializer(AstNode* list) { - AstNode* e = ast_new(AstNodeKind_array_initializer); - e->as.array_initializer = calloc(1, sizeof(ArrayInitializerNode)); - e->as.array_initializer->list = list; - return e; -} - -int type_sizeof_struct(Type* ty) { - int next_offset = 0; - int struct_align = 0; - - for (int i = 0; i < members_of(ty)->as.list->len; ++i) { - AstNode* member = &members_of(ty)->as.list->items[i]; - int size, align; - if (member->as.struct_member->is_bitfield) { - // TODO - size = member->as.struct_member->bitfield_width / 8; - align = 1; - } else { - size = type_sizeof(member->ty); - align = type_alignof(member->ty); - } - - next_offset = to_aligned(next_offset, align); - next_offset += size; - if (struct_align < align) { - struct_align = align; - } - } - return to_aligned(next_offset, struct_align); -} - -int type_sizeof_union(Type* ty) { - int union_size = 0; - int union_align = 0; - - for (int i = 0; i < members_of(ty)->as.list->len; ++i) { - AstNode* member = &members_of(ty)->as.list->items[i]; - int size = type_sizeof(member->ty); - int align = type_alignof(member->ty); - - size = to_aligned(size, align); - if (union_size < size) { - union_size = size; - } - if (union_align < align) { - union_align = align; - } - } - return to_aligned(union_size, union_align); -} - -int type_alignof_struct(Type* ty) { - int struct_align = 0; - - for (int i = 0; i < members_of(ty)->as.list->len; ++i) { - AstNode* member = &members_of(ty)->as.list->items[i]; - int align = type_alignof(member->ty); - - if (struct_align < align) { - struct_align = align; - } - } - return struct_align; -} - -int type_alignof_union(Type* ty) { - int union_align = 0; - - for (int i = 0; i < members_of(ty)->as.list->len; ++i) { - AstNode* member = &members_of(ty)->as.list->items[i]; - int align = type_alignof(member->ty); - - if (union_align < align) { - union_align = align; - } - } - return union_align; -} - -int type_offsetof(Type* ty, const char* name) { - if (ty->kind == TypeKind_union) { - return 0; - } - if (ty->kind != TypeKind_struct) { - fatal_error("type_offsetof: type is neither a struct nor a union"); - } - - int next_offset = 0; - - for (int i = 0; i < members_of(ty)->as.list->len; ++i) { - AstNode* member = &members_of(ty)->as.list->items[i]; - int size, align; - if (member->as.struct_member->is_bitfield) { - // TODO - size = member->as.struct_member->bitfield_width / 8; - align = 1; - } else { - size = type_sizeof(member->ty); - align = type_alignof(member->ty); - } - - next_offset = to_aligned(next_offset, align); - if (strcmp(member->as.struct_member->name, name) == 0) { - if (member->as.struct_member->is_bitfield) { - // C23: 7.21.4 - // if the specified member is a bit-field, the behavior is undefined. - fatal_error("type_offsetof: the result of offsetof(bit-field) is undefined"); - } - return next_offset; - } - next_offset += size; - } - - fatal_error("type_offsetof: member not found"); -} - -Type* type_member_typeof(Type* ty, const char* name) { - if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) { - fatal_error("type_member_typeof: type is neither a struct nor a union"); - } - - for (int i = 0; i < members_of(ty)->as.list->len; ++i) { - AstNode* member = &members_of(ty)->as.list->items[i]; - if (strcmp(member->as.struct_member->name, name) == 0) { - return member->ty; - } - } - - fatal_error("type_offsetof: member not found"); -} diff --git a/src/ast.h b/src/ast.h deleted file mode 100644 index c6fd0e2..0000000 --- a/src/ast.h +++ /dev/null @@ -1,433 +0,0 @@ -#ifndef DUCC_AST_H -#define DUCC_AST_H - -#include "ducc.h" -#include "io.h" - -typedef enum { - StorageClass_unspecified, - StorageClass_auto, - StorageClass_constexpr, - StorageClass_extern, - StorageClass_register, - StorageClass_static, - StorageClass_thread_local, - StorageClass_typedef, -} StorageClass; - -const char* storageclass_stringify(StorageClass s); - -typedef enum { - TypeKind_unknown, - - TypeKind_void, - TypeKind_char, - TypeKind_schar, - TypeKind_uchar, - TypeKind_short, - TypeKind_ushort, - TypeKind_int, - TypeKind_uint, - TypeKind_long, - TypeKind_ulong, - TypeKind_llong, - TypeKind_ullong, - TypeKind_float, - TypeKind_double, - TypeKind_ldouble, - TypeKind_bool, - TypeKind_struct, - TypeKind_union, - TypeKind_enum, - TypeKind_ptr, - TypeKind_array, - TypeKind_func, -} TypeKind; - -const char* type_kind_stringify(TypeKind k); - -struct AstNode; -typedef struct AstNode AstNode; - -typedef struct { - AstNode* defs; - size_t index; -} TypeRef; - -typedef struct Type { - TypeKind kind; - StorageClass storage_class; - // Check `base` instead of `kind` to test if the type is an array or a pointer. - struct Type* base; - int array_size; - TypeRef ref; - struct Type* result; - AstNode* params; -} Type; - -Type* type_new(TypeKind kind); -Type* type_dup(Type* src); -Type* type_new_ptr(Type* base); -Type* type_new_array(Type* elem, int size); -Type* type_new_static_string(int len); -Type* type_array_to_ptr(Type* ty); -Type* type_new_func(Type* result, AstNode* params); -bool type_is_unsized(Type* ty); -bool type_is_unsigned(Type* ty); - -int type_sizeof_struct(Type* ty); -int type_sizeof_union(Type* ty); -int type_alignof_struct(Type* ty); -int type_alignof_union(Type* ty); -int type_offsetof(Type* ty, const char* name); -Type* type_member_typeof(Type* ty, const char* name); - -int type_sizeof(Type* ty); -int type_alignof(Type* ty); - -int to_aligned(int n, int a); - -typedef enum { - AstNodeKind_unknown, - AstNodeKind_nop, - - AstNodeKind_array_initializer, - AstNodeKind_assign_expr, - AstNodeKind_binary_expr, - AstNodeKind_break_stmt, - AstNodeKind_case_label, - AstNodeKind_cast_expr, - AstNodeKind_cond_expr, - AstNodeKind_continue_stmt, - AstNodeKind_default_label, - AstNodeKind_deref_expr, - AstNodeKind_do_while_stmt, - AstNodeKind_enum_def, - AstNodeKind_enum_member, - AstNodeKind_expr_stmt, - AstNodeKind_for_stmt, - AstNodeKind_func, - AstNodeKind_func_call, - AstNodeKind_func_decl, - AstNodeKind_func_def, - AstNodeKind_goto_stmt, - AstNodeKind_gvar, - AstNodeKind_gvar_decl, - AstNodeKind_if_stmt, - AstNodeKind_int_expr, - AstNodeKind_double_expr, - AstNodeKind_label_stmt, - AstNodeKind_list, - AstNodeKind_logical_expr, - AstNodeKind_lvar, - AstNodeKind_lvar_decl, - AstNodeKind_param, - AstNodeKind_ref_expr, - AstNodeKind_return_stmt, - AstNodeKind_str_expr, - AstNodeKind_struct_decl, - AstNodeKind_struct_def, - AstNodeKind_struct_member, - AstNodeKind_switch_stmt, - AstNodeKind_type, - AstNodeKind_typedef_decl, - AstNodeKind_unary_expr, - AstNodeKind_union_decl, - AstNodeKind_union_def, - - // Intermediate ASTs: they are used only in parsing, not for parse result. - AstNodeKind_declarator, -} AstNodeKind; - -const char* astnode_kind_stringify(AstNodeKind k); - -// Expression nodes -typedef struct { - int value; -} IntExprNode; - -typedef struct { - double value; -} DoubleExprNode; - -typedef struct { - int idx; -} StrExprNode; - -typedef struct { - int op; - AstNode* operand; -} UnaryExprNode; - -typedef struct { - int op; - AstNode* lhs; - AstNode* rhs; -} BinaryExprNode; - -typedef struct { - int op; - AstNode* lhs; - AstNode* rhs; -} LogicalExprNode; - -typedef struct { - int op; - AstNode* lhs; - AstNode* rhs; -} AssignExprNode; - -typedef struct { - AstNode* operand; -} CastExprNode; - -typedef struct { - AstNode* cond; - AstNode* then; - AstNode* else_; -} CondExprNode; - -typedef struct { - AstNode* operand; -} DerefExprNode; - -typedef struct { - AstNode* operand; -} RefExprNode; - -typedef struct { - AstNode* func; - AstNode* args; -} FuncCallNode; - -// Statement nodes -typedef struct { - AstNode* cond; - AstNode* then; - AstNode* else_; -} IfStmtNode; - -typedef struct { - AstNode* init; - AstNode* cond; - AstNode* update; - AstNode* body; -} ForStmtNode; - -typedef struct { - AstNode* cond; - AstNode* body; -} DoWhileStmtNode; - -typedef struct { - AstNode* expr; - AstNode* body; -} SwitchStmtNode; - -typedef struct { - int value; - AstNode* body; -} CaseLabelNode; - -typedef struct { - AstNode* body; -} DefaultLabelNode; - -typedef struct { - const char* name; - AstNode* body; -} LabelStmtNode; - -typedef struct { - AstNode* expr; -} ReturnStmtNode; - -typedef struct { - const char* label; -} GotoStmtNode; - -typedef struct { - AstNode* expr; -} ExprStmtNode; - -// Declaration nodes -typedef struct { - const char* name; - AstNode* params; - AstNode* body; - int stack_size; -} FuncDefNode; - -typedef struct { - const char* name; - int stack_offset; -} LvarNode; - -typedef struct { - AstNode* expr; -} LvarDeclNode; - -typedef struct { - const char* name; - int stack_offset; -} ParamNode; - -typedef struct { - const char* name; - AstNode* expr; -} GvarDeclNode; - -typedef struct { - const char* name; - AstNode* members; -} StructDefNode; - -typedef struct { - const char* name; - AstNode* members; -} UnionDefNode; - -typedef struct { - const char* name; - AstNode* members; -} EnumDefNode; - -typedef struct { - const char* name; - int value; -} EnumMemberNode; - -typedef struct { - const char* name; - AstNode* init; -} DeclaratorNode; - -typedef struct { - const char* name; -} FuncNode; - -typedef struct { - const char* name; -} GvarNode; - -typedef struct { - const char* name; - bool is_bitfield; - int bitfield_offset; - int bitfield_width; -} StructMemberNode; - -typedef struct { - const char* name; -} TypedefDeclNode; - -typedef struct { - AstNode* list; -} ArrayInitializerNode; - -typedef struct { - AstNode* items; - int len; - int cap; -} ListNode; - -struct AstNode { - AstNodeKind kind; - // TODO: currently only limited set of ast nodes have loc field. - SourceLocation loc; - Type* ty; - union { - IntExprNode* int_expr; - DoubleExprNode* double_expr; - StrExprNode* str_expr; - UnaryExprNode* unary_expr; - BinaryExprNode* binary_expr; - LogicalExprNode* logical_expr; - AssignExprNode* assign_expr; - CastExprNode* cast_expr; - CondExprNode* cond_expr; - DerefExprNode* deref_expr; - RefExprNode* ref_expr; - FuncCallNode* func_call; - IfStmtNode* if_stmt; - ForStmtNode* for_stmt; - DoWhileStmtNode* do_while_stmt; - SwitchStmtNode* switch_stmt; - CaseLabelNode* case_label; - DefaultLabelNode* default_label; - LabelStmtNode* label_stmt; - ReturnStmtNode* return_stmt; - GotoStmtNode* goto_stmt; - ExprStmtNode* expr_stmt; - FuncDefNode* func_def; - LvarNode* lvar; - LvarDeclNode* lvar_decl; - ParamNode* param; - GvarDeclNode* gvar_decl; - StructDefNode* struct_def; - UnionDefNode* union_def; - EnumDefNode* enum_def; - EnumMemberNode* enum_member; - DeclaratorNode* declarator; - FuncNode* func; - GvarNode* gvar; - StructMemberNode* struct_member; - TypedefDeclNode* typedef_decl; - ArrayInitializerNode* array_initializer; - ListNode* list; - } as; -}; - -typedef struct { - AstNode* funcs; - AstNode* vars; - const char** str_literals; -} Program; - -AstNode* ast_new(AstNodeKind kind); -AstNode* ast_new_list(int capacity); -void ast_append(AstNode* list, AstNode* item); -AstNode* ast_new_int(int v); -AstNode* ast_new_double(double v); -AstNode* ast_new_unary_expr(int op, AstNode* operand); -AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs); - -AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs); -AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs); -AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs); -AstNode* ast_new_ref_expr(AstNode* operand); -AstNode* ast_new_deref_expr(AstNode* operand); -AstNode* ast_new_member_access_expr(AstNode* obj, const char* name); -AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty); -AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs); -AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_); -AstNode* ast_new_str_expr(int idx, Type* ty); -AstNode* ast_new_func_call(AstNode* func, AstNode* args); -AstNode* ast_new_func(const char* name, Type* ty); -AstNode* ast_new_gvar(const char* name, Type* ty); -AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty); - -AstNode* ast_new_nop(void); -AstNode* ast_new_break_stmt(void); -AstNode* ast_new_continue_stmt(void); -AstNode* ast_new_return_stmt(AstNode* expr); -AstNode* ast_new_expr_stmt(AstNode* expr); -AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_); -AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body); -AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body); -AstNode* ast_new_switch_stmt(AstNode* expr); -AstNode* ast_new_case_label(int value, AstNode* body); -AstNode* ast_new_default_label(AstNode* body); -AstNode* ast_new_goto_stmt(const char* label); -AstNode* ast_new_label_stmt(const char* name, AstNode* body); - -AstNode* ast_new_declarator(const char* name, Type* ty); -AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size); -AstNode* ast_new_enum_member(const char* name, int value); -AstNode* ast_new_typedef_decl(const char* name, Type* ty); -AstNode* ast_new_struct_def(const char* name); -AstNode* ast_new_union_def(const char* name); -AstNode* ast_new_enum_def(const char* name); -AstNode* ast_new_array_initializer(AstNode* list); - -#endif diff --git a/src/cc1/ast.c b/src/cc1/ast.c new file mode 100644 index 0000000..efd16ae --- /dev/null +++ b/src/cc1/ast.c @@ -0,0 +1,787 @@ +#include "ast.h" +#include "../lib/common.h" +#include "preprocess.h" + +const char* storageclass_stringify(StorageClass s) { + if (s == StorageClass_unspecified) + return ""; + if (s == StorageClass_auto) + return "auto"; + if (s == StorageClass_constexpr) + return "constexpr"; + if (s == StorageClass_extern) + return "extern"; + if (s == StorageClass_register) + return "register"; + if (s == StorageClass_static) + return "static"; + if (s == StorageClass_thread_local) + return "thread_local"; + if (s == StorageClass_typedef) + return "typedef"; + else + unreachable(); +} + +const char* type_kind_stringify(TypeKind k) { + if (k == TypeKind_unknown) + return ""; + else if (k == TypeKind_void) + return "void"; + else if (k == TypeKind_char) + return "char"; + else if (k == TypeKind_schar) + return "signed char"; + else if (k == TypeKind_uchar) + return "unsigned char"; + else if (k == TypeKind_short) + return "short"; + else if (k == TypeKind_ushort) + return "unsigned short"; + else if (k == TypeKind_int) + return "int"; + else if (k == TypeKind_uint) + return "unsigned int"; + else if (k == TypeKind_long) + return "long"; + else if (k == TypeKind_ulong) + return "unsigned long"; + else if (k == TypeKind_llong) + return "long long"; + else if (k == TypeKind_ullong) + return "unsigned long long"; + else if (k == TypeKind_float) + return "float"; + else if (k == TypeKind_double) + return "double"; + else if (k == TypeKind_ldouble) + return "long double"; + else if (k == TypeKind_bool) + return "bool"; + else if (k == TypeKind_struct) + return "struct"; + else if (k == TypeKind_union) + return "union"; + else if (k == TypeKind_enum) + return "enum"; + else if (k == TypeKind_ptr) + return ""; + else if (k == TypeKind_array) + return ""; + else if (k == TypeKind_func) + return ""; + else + unreachable(); +} + +static AstNode* members_of(Type* ty) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + if (def->kind == AstNodeKind_struct_def) + return def->as.struct_def->members; + else + return def->as.union_def->members; +} + +Type* type_new(TypeKind kind) { + Type* ty = calloc(1, sizeof(Type)); + ty->kind = kind; + return ty; +} + +Type* type_dup(Type* src) { + Type* ty = malloc(sizeof(Type)); + memcpy(ty, src, sizeof(Type)); + return ty; +} + +Type* type_new_ptr(Type* base) { + Type* ty = type_new(TypeKind_ptr); + ty->base = base; + return ty; +} + +Type* type_new_array(Type* base, int size) { + Type* ty = type_new(TypeKind_array); + ty->base = base; + ty->array_size = size; + return ty; +} + +Type* type_new_static_string(int len) { + return type_new_array(type_new(TypeKind_char), len + 1); +} + +Type* type_array_to_ptr(Type* ty) { + return type_new_ptr(ty->base); +} + +Type* type_new_func(Type* result, AstNode* params) { + Type* ty = type_new(TypeKind_func); + ty->result = result; + ty->params = params; + return ty; +} + +bool type_is_unsized(Type* ty) { + return ty->kind == TypeKind_void; +} + +bool type_is_unsigned(Type* ty) { + return ty->kind == TypeKind_uchar || ty->kind == TypeKind_ushort || ty->kind == TypeKind_uint || + ty->kind == TypeKind_ulong || ty->kind == TypeKind_ullong || ty->kind == TypeKind_bool; +} + +int type_sizeof(Type* ty) { + if (type_is_unsized(ty)) { + fatal_error("type_sizeof: type size cannot be determined"); + } + + if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar || + ty->kind == TypeKind_bool) + return 1; + else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort) + return 2; + else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float) + return 4; + else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong || + ty->kind == TypeKind_ullong || ty->kind == TypeKind_double) + return 8; + else if (ty->kind == TypeKind_struct) + return type_sizeof_struct(ty); + else if (ty->kind == TypeKind_union) + return type_sizeof_union(ty); + else if (ty->kind == TypeKind_enum) + return 4; + else if (ty->kind == TypeKind_ptr) + return 8; + else if (ty->kind == TypeKind_array) + return type_sizeof(ty->base) * ty->array_size; + else if (ty->kind == TypeKind_func) + return 8; + else + unreachable(); +} + +int type_alignof(Type* ty) { + if (type_is_unsized(ty)) { + fatal_error("type_alignof: type size cannot be determined"); + } + + if (ty->kind == TypeKind_char || ty->kind == TypeKind_schar || ty->kind == TypeKind_uchar || + ty->kind == TypeKind_bool) + return 1; + else if (ty->kind == TypeKind_short || ty->kind == TypeKind_ushort) + return 2; + else if (ty->kind == TypeKind_int || ty->kind == TypeKind_uint || ty->kind == TypeKind_float) + return 4; + else if (ty->kind == TypeKind_long || ty->kind == TypeKind_ulong || ty->kind == TypeKind_llong || + ty->kind == TypeKind_ullong || ty->kind == TypeKind_double) + return 8; + else if (ty->kind == TypeKind_struct) + return type_alignof_struct(ty); + else if (ty->kind == TypeKind_union) + return type_alignof_union(ty); + else if (ty->kind == TypeKind_enum) + return 4; + else if (ty->kind == TypeKind_ptr) + return 8; + else if (ty->kind == TypeKind_array) + return type_alignof(ty->base); + else if (ty->kind == TypeKind_func) + return 8; + else + unreachable(); +} + +int to_aligned(int n, int a) { + return (n + a - 1) / a * a; +} + +const char* astnode_kind_stringify(AstNodeKind k) { + switch (k) { + case AstNodeKind_unknown: + return "unknown"; + case AstNodeKind_nop: + return "nop"; + case AstNodeKind_array_initializer: + return "array_initializer"; + case AstNodeKind_assign_expr: + return "assign_expr"; + case AstNodeKind_binary_expr: + return "binary_expr"; + case AstNodeKind_break_stmt: + return "break_stmt"; + case AstNodeKind_case_label: + return "case_label"; + case AstNodeKind_cast_expr: + return "cast_expr"; + case AstNodeKind_cond_expr: + return "cond_expr"; + case AstNodeKind_continue_stmt: + return "continue_stmt"; + case AstNodeKind_default_label: + return "default_label"; + case AstNodeKind_deref_expr: + return "deref_expr"; + case AstNodeKind_do_while_stmt: + return "do_while_stmt"; + case AstNodeKind_enum_def: + return "enum_def"; + case AstNodeKind_enum_member: + return "enum_member"; + case AstNodeKind_expr_stmt: + return "expr_stmt"; + case AstNodeKind_for_stmt: + return "for_stmt"; + case AstNodeKind_func: + return "func"; + case AstNodeKind_func_call: + return "func_call"; + case AstNodeKind_func_decl: + return "func_decl"; + case AstNodeKind_func_def: + return "func_def"; + case AstNodeKind_goto_stmt: + return "goto_stmt"; + case AstNodeKind_gvar: + return "gvar"; + case AstNodeKind_gvar_decl: + return "gvar_decl"; + case AstNodeKind_if_stmt: + return "if_stmt"; + case AstNodeKind_int_expr: + return "int_expr"; + case AstNodeKind_double_expr: + return "double_expr"; + case AstNodeKind_label_stmt: + return "label_stmt"; + case AstNodeKind_list: + return "list"; + case AstNodeKind_logical_expr: + return "logical_expr"; + case AstNodeKind_lvar: + return "lvar"; + case AstNodeKind_lvar_decl: + return "lvar_decl"; + case AstNodeKind_param: + return "param"; + case AstNodeKind_ref_expr: + return "ref_expr"; + case AstNodeKind_return_stmt: + return "return_stmt"; + case AstNodeKind_str_expr: + return "str_expr"; + case AstNodeKind_struct_decl: + return "struct_decl"; + case AstNodeKind_struct_def: + return "struct_def"; + case AstNodeKind_struct_member: + return "struct_member"; + case AstNodeKind_switch_stmt: + return "switch_stmt"; + case AstNodeKind_type: + return "type"; + case AstNodeKind_typedef_decl: + return "typedef_decl"; + case AstNodeKind_unary_expr: + return "unary_expr"; + case AstNodeKind_union_decl: + return "union_decl"; + case AstNodeKind_union_def: + return "union_def"; + case AstNodeKind_declarator: + return "declarator"; + default: + unreachable(); + } +} + +AstNode* ast_new(AstNodeKind kind) { + AstNode* ast = calloc(1, sizeof(AstNode)); + ast->kind = kind; + return ast; +} + +AstNode* ast_new_list(int capacity) { + if (capacity == 0) + unreachable(); + AstNode* list = ast_new(AstNodeKind_list); + list->as.list = calloc(1, sizeof(ListNode)); + list->as.list->cap = capacity; + list->as.list->len = 0; + list->as.list->items = calloc(list->as.list->cap, sizeof(AstNode)); + return list; +} + +void ast_append(AstNode* list, AstNode* item) { + if (list->kind != AstNodeKind_list) { + fatal_error("ast_append: ast is not a list"); + } + if (!item) { + return; + } + if (list->as.list->cap <= list->as.list->len) { + list->as.list->cap *= 2; + list->as.list->items = realloc(list->as.list->items, sizeof(AstNode) * list->as.list->cap); + memset(list->as.list->items + list->as.list->len, 0, + sizeof(AstNode) * (list->as.list->cap - list->as.list->len)); + } + memcpy(list->as.list->items + list->as.list->len, item, sizeof(AstNode)); + ++list->as.list->len; +} + +AstNode* ast_new_int(int v) { + AstNode* e = ast_new(AstNodeKind_int_expr); + e->as.int_expr = calloc(1, sizeof(IntExprNode)); + e->as.int_expr->value = v; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_double(double v) { + AstNode* e = ast_new(AstNodeKind_double_expr); + e->as.double_expr = calloc(1, sizeof(DoubleExprNode)); + e->as.double_expr->value = v; + e->ty = type_new(TypeKind_double); + return e; +} + +AstNode* ast_new_unary_expr(int op, AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_unary_expr); + e->as.unary_expr = calloc(1, sizeof(UnaryExprNode)); + e->as.unary_expr->op = op; + e->as.unary_expr->operand = operand; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_binary_expr); + e->as.binary_expr = calloc(1, sizeof(BinaryExprNode)); + e->as.binary_expr->op = op; + e->as.binary_expr->lhs = lhs; + e->as.binary_expr->rhs = rhs; + if (op == TokenKind_plus) { + if (lhs->ty->kind == TypeKind_ptr) { + e->ty = lhs->ty; + } else if (lhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(lhs->ty); + } else if (rhs->ty->kind == TypeKind_ptr) { + e->ty = rhs->ty; + } else if (rhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(rhs->ty); + } else { + e->ty = type_new(TypeKind_int); + } + } else if (op == TokenKind_minus) { + if (lhs->ty->kind == TypeKind_ptr) { + e->ty = lhs->ty; + } else if (lhs->ty->kind == TypeKind_array) { + e->ty = type_array_to_ptr(lhs->ty); + } else { + e->ty = type_new(TypeKind_int); + } + } else { + e->ty = type_new(TypeKind_int); + } + return e; +} + +AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_assign_expr); + e->as.assign_expr = calloc(1, sizeof(AssignExprNode)); + e->as.assign_expr->op = op; + e->as.assign_expr->lhs = lhs; + e->as.assign_expr->rhs = rhs; + e->ty = lhs->ty; + return e; +} + +AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs) { + if (lhs->ty->base) { + rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); + } else if (rhs->ty->base) { + lhs = ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))); + } + return ast_new_assign_expr(TokenKind_assign_add, lhs, rhs); +} + +AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs) { + if (lhs->ty->base) { + rhs = ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base))); + } + return ast_new_assign_expr(TokenKind_assign_sub, lhs, rhs); +} + +AstNode* ast_new_ref_expr(AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_ref_expr); + e->as.ref_expr = calloc(1, sizeof(RefExprNode)); + e->as.ref_expr->operand = operand; + e->ty = type_new_ptr(operand->ty); + return e; +} + +AstNode* ast_new_deref_expr(AstNode* operand) { + AstNode* e = ast_new(AstNodeKind_deref_expr); + e->as.deref_expr = calloc(1, sizeof(DerefExprNode)); + e->as.deref_expr->operand = operand; + e->ty = operand->ty->base; + return e; +} + +AstNode* ast_new_member_access_expr(AstNode* obj, const char* name) { + AstNode* e = ast_new(AstNodeKind_deref_expr); + e->as.deref_expr = calloc(1, sizeof(DerefExprNode)); + e->as.deref_expr->operand = + ast_new_binary_expr(TokenKind_plus, obj, ast_new_int(type_offsetof(obj->ty->base, name))); + e->ty = type_member_typeof(obj->ty->base, name); + e->as.deref_expr->operand->ty = type_new_ptr(e->ty); + return e; +} + +AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty) { + AstNode* e = ast_new(AstNodeKind_cast_expr); + e->as.cast_expr = calloc(1, sizeof(CastExprNode)); + e->as.cast_expr->operand = operand; + e->ty = result_ty; + return e; +} + +AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs) { + AstNode* e = ast_new(AstNodeKind_logical_expr); + e->as.logical_expr = calloc(1, sizeof(LogicalExprNode)); + e->as.logical_expr->op = op; + e->as.logical_expr->lhs = lhs; + e->as.logical_expr->rhs = rhs; + e->ty = type_new(TypeKind_int); + return e; +} + +AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_) { + AstNode* e = ast_new(AstNodeKind_cond_expr); + e->as.cond_expr = calloc(1, sizeof(CondExprNode)); + e->as.cond_expr->cond = cond; + e->as.cond_expr->then = then; + e->as.cond_expr->else_ = else_; + e->ty = then->ty; + return e; +} + +AstNode* ast_new_str_expr(int idx, Type* ty) { + AstNode* e = ast_new(AstNodeKind_str_expr); + e->as.str_expr = calloc(1, sizeof(StrExprNode)); + e->as.str_expr->idx = idx; + e->ty = ty; + return e; +} + +AstNode* ast_new_func_call(AstNode* func, AstNode* args) { + AstNode* e = ast_new(AstNodeKind_func_call); + e->as.func_call = calloc(1, sizeof(FuncCallNode)); + e->as.func_call->func = func; + e->as.func_call->args = args; + return e; +} + +AstNode* ast_new_func(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_func); + e->as.func = calloc(1, sizeof(FuncNode)); + e->as.func->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_gvar(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_gvar); + e->as.gvar = calloc(1, sizeof(GvarNode)); + e->as.gvar->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty) { + AstNode* e = ast_new(AstNodeKind_lvar); + e->as.lvar = calloc(1, sizeof(LvarNode)); + e->as.lvar->name = name; + e->as.lvar->stack_offset = stack_offset; + e->ty = ty; + return e; +} + +AstNode* ast_new_nop(void) { + return ast_new(AstNodeKind_nop); +} + +AstNode* ast_new_break_stmt(void) { + return ast_new(AstNodeKind_break_stmt); +} + +AstNode* ast_new_continue_stmt(void) { + return ast_new(AstNodeKind_continue_stmt); +} + +AstNode* ast_new_return_stmt(AstNode* expr) { + AstNode* e = ast_new(AstNodeKind_return_stmt); + e->as.return_stmt = calloc(1, sizeof(ReturnStmtNode)); + e->as.return_stmt->expr = expr; + return e; +} + +AstNode* ast_new_expr_stmt(AstNode* expr) { + AstNode* e = ast_new(AstNodeKind_expr_stmt); + e->as.expr_stmt = calloc(1, sizeof(ExprStmtNode)); + e->as.expr_stmt->expr = expr; + return e; +} + +AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_) { + AstNode* e = ast_new(AstNodeKind_if_stmt); + e->as.if_stmt = calloc(1, sizeof(IfStmtNode)); + e->as.if_stmt->cond = cond; + e->as.if_stmt->then = then; + e->as.if_stmt->else_ = else_; + return e; +} + +AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_for_stmt); + e->as.for_stmt = calloc(1, sizeof(ForStmtNode)); + e->as.for_stmt->init = init; + e->as.for_stmt->cond = cond; + e->as.for_stmt->update = update; + e->as.for_stmt->body = body; + return e; +} + +AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_do_while_stmt); + e->as.do_while_stmt = calloc(1, sizeof(DoWhileStmtNode)); + e->as.do_while_stmt->cond = cond; + e->as.do_while_stmt->body = body; + return e; +} + +AstNode* ast_new_switch_stmt(AstNode* expr) { + AstNode* e = ast_new(AstNodeKind_switch_stmt); + e->as.switch_stmt = calloc(1, sizeof(SwitchStmtNode)); + e->as.switch_stmt->expr = expr; + return e; +} + +AstNode* ast_new_case_label(int value, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_case_label); + e->as.case_label = calloc(1, sizeof(CaseLabelNode)); + e->as.case_label->value = value; + e->as.case_label->body = body; + return e; +} + +AstNode* ast_new_default_label(AstNode* body) { + AstNode* e = ast_new(AstNodeKind_default_label); + e->as.default_label = calloc(1, sizeof(DefaultLabelNode)); + e->as.default_label->body = body; + return e; +} + +AstNode* ast_new_goto_stmt(const char* label) { + AstNode* e = ast_new(AstNodeKind_goto_stmt); + e->as.goto_stmt = calloc(1, sizeof(GotoStmtNode)); + e->as.goto_stmt->label = label; + return e; +} + +AstNode* ast_new_label_stmt(const char* name, AstNode* body) { + AstNode* e = ast_new(AstNodeKind_label_stmt); + e->as.label_stmt = calloc(1, sizeof(LabelStmtNode)); + e->as.label_stmt->name = name; + e->as.label_stmt->body = body; + return e; +} + +AstNode* ast_new_declarator(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_declarator); + e->as.declarator = calloc(1, sizeof(DeclaratorNode)); + e->as.declarator->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size) { + AstNode* e = ast_new(AstNodeKind_func_def); + e->as.func_def = calloc(1, sizeof(FuncDefNode)); + e->as.func_def->name = name; + e->as.func_def->params = params; + e->as.func_def->body = body; + e->as.func_def->stack_size = stack_size; + e->ty = ty; + return e; +} + +AstNode* ast_new_enum_member(const char* name, int value) { + AstNode* e = ast_new(AstNodeKind_enum_member); + e->as.enum_member = calloc(1, sizeof(EnumMemberNode)); + e->as.enum_member->name = name; + e->as.enum_member->value = value; + return e; +} + +AstNode* ast_new_typedef_decl(const char* name, Type* ty) { + AstNode* e = ast_new(AstNodeKind_typedef_decl); + e->as.typedef_decl = calloc(1, sizeof(TypedefDeclNode)); + e->as.typedef_decl->name = name; + e->ty = ty; + return e; +} + +AstNode* ast_new_struct_def(const char* name) { + AstNode* e = ast_new(AstNodeKind_struct_def); + e->as.struct_def = calloc(1, sizeof(StructDefNode)); + e->as.struct_def->name = name; + return e; +} + +AstNode* ast_new_union_def(const char* name) { + AstNode* e = ast_new(AstNodeKind_union_def); + e->as.union_def = calloc(1, sizeof(UnionDefNode)); + e->as.union_def->name = name; + return e; +} + +AstNode* ast_new_enum_def(const char* name) { + AstNode* e = ast_new(AstNodeKind_enum_def); + e->as.enum_def = calloc(1, sizeof(EnumDefNode)); + e->as.enum_def->name = name; + return e; +} + +AstNode* ast_new_array_initializer(AstNode* list) { + AstNode* e = ast_new(AstNodeKind_array_initializer); + e->as.array_initializer = calloc(1, sizeof(ArrayInitializerNode)); + e->as.array_initializer->list = list; + return e; +} + +int type_sizeof_struct(Type* ty) { + int next_offset = 0; + int struct_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int size, align; + if (member->as.struct_member->is_bitfield) { + // TODO + size = member->as.struct_member->bitfield_width / 8; + align = 1; + } else { + size = type_sizeof(member->ty); + align = type_alignof(member->ty); + } + + next_offset = to_aligned(next_offset, align); + next_offset += size; + if (struct_align < align) { + struct_align = align; + } + } + return to_aligned(next_offset, struct_align); +} + +int type_sizeof_union(Type* ty) { + int union_size = 0; + int union_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int size = type_sizeof(member->ty); + int align = type_alignof(member->ty); + + size = to_aligned(size, align); + if (union_size < size) { + union_size = size; + } + if (union_align < align) { + union_align = align; + } + } + return to_aligned(union_size, union_align); +} + +int type_alignof_struct(Type* ty) { + int struct_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int align = type_alignof(member->ty); + + if (struct_align < align) { + struct_align = align; + } + } + return struct_align; +} + +int type_alignof_union(Type* ty) { + int union_align = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int align = type_alignof(member->ty); + + if (union_align < align) { + union_align = align; + } + } + return union_align; +} + +int type_offsetof(Type* ty, const char* name) { + if (ty->kind == TypeKind_union) { + return 0; + } + if (ty->kind != TypeKind_struct) { + fatal_error("type_offsetof: type is neither a struct nor a union"); + } + + int next_offset = 0; + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + int size, align; + if (member->as.struct_member->is_bitfield) { + // TODO + size = member->as.struct_member->bitfield_width / 8; + align = 1; + } else { + size = type_sizeof(member->ty); + align = type_alignof(member->ty); + } + + next_offset = to_aligned(next_offset, align); + if (strcmp(member->as.struct_member->name, name) == 0) { + if (member->as.struct_member->is_bitfield) { + // C23: 7.21.4 + // if the specified member is a bit-field, the behavior is undefined. + fatal_error("type_offsetof: the result of offsetof(bit-field) is undefined"); + } + return next_offset; + } + next_offset += size; + } + + fatal_error("type_offsetof: member not found"); +} + +Type* type_member_typeof(Type* ty, const char* name) { + if (ty->kind != TypeKind_struct && ty->kind != TypeKind_union) { + fatal_error("type_member_typeof: type is neither a struct nor a union"); + } + + for (int i = 0; i < members_of(ty)->as.list->len; ++i) { + AstNode* member = &members_of(ty)->as.list->items[i]; + if (strcmp(member->as.struct_member->name, name) == 0) { + return member->ty; + } + } + + fatal_error("type_offsetof: member not found"); +} diff --git a/src/cc1/ast.h b/src/cc1/ast.h new file mode 100644 index 0000000..ef984a9 --- /dev/null +++ b/src/cc1/ast.h @@ -0,0 +1,433 @@ +#ifndef DUCC_AST_H +#define DUCC_AST_H + +#include "../lib/ducc.h" +#include "io.h" + +typedef enum { + StorageClass_unspecified, + StorageClass_auto, + StorageClass_constexpr, + StorageClass_extern, + StorageClass_register, + StorageClass_static, + StorageClass_thread_local, + StorageClass_typedef, +} StorageClass; + +const char* storageclass_stringify(StorageClass s); + +typedef enum { + TypeKind_unknown, + + TypeKind_void, + TypeKind_char, + TypeKind_schar, + TypeKind_uchar, + TypeKind_short, + TypeKind_ushort, + TypeKind_int, + TypeKind_uint, + TypeKind_long, + TypeKind_ulong, + TypeKind_llong, + TypeKind_ullong, + TypeKind_float, + TypeKind_double, + TypeKind_ldouble, + TypeKind_bool, + TypeKind_struct, + TypeKind_union, + TypeKind_enum, + TypeKind_ptr, + TypeKind_array, + TypeKind_func, +} TypeKind; + +const char* type_kind_stringify(TypeKind k); + +struct AstNode; +typedef struct AstNode AstNode; + +typedef struct { + AstNode* defs; + size_t index; +} TypeRef; + +typedef struct Type { + TypeKind kind; + StorageClass storage_class; + // Check `base` instead of `kind` to test if the type is an array or a pointer. + struct Type* base; + int array_size; + TypeRef ref; + struct Type* result; + AstNode* params; +} Type; + +Type* type_new(TypeKind kind); +Type* type_dup(Type* src); +Type* type_new_ptr(Type* base); +Type* type_new_array(Type* elem, int size); +Type* type_new_static_string(int len); +Type* type_array_to_ptr(Type* ty); +Type* type_new_func(Type* result, AstNode* params); +bool type_is_unsized(Type* ty); +bool type_is_unsigned(Type* ty); + +int type_sizeof_struct(Type* ty); +int type_sizeof_union(Type* ty); +int type_alignof_struct(Type* ty); +int type_alignof_union(Type* ty); +int type_offsetof(Type* ty, const char* name); +Type* type_member_typeof(Type* ty, const char* name); + +int type_sizeof(Type* ty); +int type_alignof(Type* ty); + +int to_aligned(int n, int a); + +typedef enum { + AstNodeKind_unknown, + AstNodeKind_nop, + + AstNodeKind_array_initializer, + AstNodeKind_assign_expr, + AstNodeKind_binary_expr, + AstNodeKind_break_stmt, + AstNodeKind_case_label, + AstNodeKind_cast_expr, + AstNodeKind_cond_expr, + AstNodeKind_continue_stmt, + AstNodeKind_default_label, + AstNodeKind_deref_expr, + AstNodeKind_do_while_stmt, + AstNodeKind_enum_def, + AstNodeKind_enum_member, + AstNodeKind_expr_stmt, + AstNodeKind_for_stmt, + AstNodeKind_func, + AstNodeKind_func_call, + AstNodeKind_func_decl, + AstNodeKind_func_def, + AstNodeKind_goto_stmt, + AstNodeKind_gvar, + AstNodeKind_gvar_decl, + AstNodeKind_if_stmt, + AstNodeKind_int_expr, + AstNodeKind_double_expr, + AstNodeKind_label_stmt, + AstNodeKind_list, + AstNodeKind_logical_expr, + AstNodeKind_lvar, + AstNodeKind_lvar_decl, + AstNodeKind_param, + AstNodeKind_ref_expr, + AstNodeKind_return_stmt, + AstNodeKind_str_expr, + AstNodeKind_struct_decl, + AstNodeKind_struct_def, + AstNodeKind_struct_member, + AstNodeKind_switch_stmt, + AstNodeKind_type, + AstNodeKind_typedef_decl, + AstNodeKind_unary_expr, + AstNodeKind_union_decl, + AstNodeKind_union_def, + + // Intermediate ASTs: they are used only in parsing, not for parse result. + AstNodeKind_declarator, +} AstNodeKind; + +const char* astnode_kind_stringify(AstNodeKind k); + +// Expression nodes +typedef struct { + int value; +} IntExprNode; + +typedef struct { + double value; +} DoubleExprNode; + +typedef struct { + int idx; +} StrExprNode; + +typedef struct { + int op; + AstNode* operand; +} UnaryExprNode; + +typedef struct { + int op; + AstNode* lhs; + AstNode* rhs; +} BinaryExprNode; + +typedef struct { + int op; + AstNode* lhs; + AstNode* rhs; +} LogicalExprNode; + +typedef struct { + int op; + AstNode* lhs; + AstNode* rhs; +} AssignExprNode; + +typedef struct { + AstNode* operand; +} CastExprNode; + +typedef struct { + AstNode* cond; + AstNode* then; + AstNode* else_; +} CondExprNode; + +typedef struct { + AstNode* operand; +} DerefExprNode; + +typedef struct { + AstNode* operand; +} RefExprNode; + +typedef struct { + AstNode* func; + AstNode* args; +} FuncCallNode; + +// Statement nodes +typedef struct { + AstNode* cond; + AstNode* then; + AstNode* else_; +} IfStmtNode; + +typedef struct { + AstNode* init; + AstNode* cond; + AstNode* update; + AstNode* body; +} ForStmtNode; + +typedef struct { + AstNode* cond; + AstNode* body; +} DoWhileStmtNode; + +typedef struct { + AstNode* expr; + AstNode* body; +} SwitchStmtNode; + +typedef struct { + int value; + AstNode* body; +} CaseLabelNode; + +typedef struct { + AstNode* body; +} DefaultLabelNode; + +typedef struct { + const char* name; + AstNode* body; +} LabelStmtNode; + +typedef struct { + AstNode* expr; +} ReturnStmtNode; + +typedef struct { + const char* label; +} GotoStmtNode; + +typedef struct { + AstNode* expr; +} ExprStmtNode; + +// Declaration nodes +typedef struct { + const char* name; + AstNode* params; + AstNode* body; + int stack_size; +} FuncDefNode; + +typedef struct { + const char* name; + int stack_offset; +} LvarNode; + +typedef struct { + AstNode* expr; +} LvarDeclNode; + +typedef struct { + const char* name; + int stack_offset; +} ParamNode; + +typedef struct { + const char* name; + AstNode* expr; +} GvarDeclNode; + +typedef struct { + const char* name; + AstNode* members; +} StructDefNode; + +typedef struct { + const char* name; + AstNode* members; +} UnionDefNode; + +typedef struct { + const char* name; + AstNode* members; +} EnumDefNode; + +typedef struct { + const char* name; + int value; +} EnumMemberNode; + +typedef struct { + const char* name; + AstNode* init; +} DeclaratorNode; + +typedef struct { + const char* name; +} FuncNode; + +typedef struct { + const char* name; +} GvarNode; + +typedef struct { + const char* name; + bool is_bitfield; + int bitfield_offset; + int bitfield_width; +} StructMemberNode; + +typedef struct { + const char* name; +} TypedefDeclNode; + +typedef struct { + AstNode* list; +} ArrayInitializerNode; + +typedef struct { + AstNode* items; + int len; + int cap; +} ListNode; + +struct AstNode { + AstNodeKind kind; + // TODO: currently only limited set of ast nodes have loc field. + SourceLocation loc; + Type* ty; + union { + IntExprNode* int_expr; + DoubleExprNode* double_expr; + StrExprNode* str_expr; + UnaryExprNode* unary_expr; + BinaryExprNode* binary_expr; + LogicalExprNode* logical_expr; + AssignExprNode* assign_expr; + CastExprNode* cast_expr; + CondExprNode* cond_expr; + DerefExprNode* deref_expr; + RefExprNode* ref_expr; + FuncCallNode* func_call; + IfStmtNode* if_stmt; + ForStmtNode* for_stmt; + DoWhileStmtNode* do_while_stmt; + SwitchStmtNode* switch_stmt; + CaseLabelNode* case_label; + DefaultLabelNode* default_label; + LabelStmtNode* label_stmt; + ReturnStmtNode* return_stmt; + GotoStmtNode* goto_stmt; + ExprStmtNode* expr_stmt; + FuncDefNode* func_def; + LvarNode* lvar; + LvarDeclNode* lvar_decl; + ParamNode* param; + GvarDeclNode* gvar_decl; + StructDefNode* struct_def; + UnionDefNode* union_def; + EnumDefNode* enum_def; + EnumMemberNode* enum_member; + DeclaratorNode* declarator; + FuncNode* func; + GvarNode* gvar; + StructMemberNode* struct_member; + TypedefDeclNode* typedef_decl; + ArrayInitializerNode* array_initializer; + ListNode* list; + } as; +}; + +typedef struct { + AstNode* funcs; + AstNode* vars; + const char** str_literals; +} Program; + +AstNode* ast_new(AstNodeKind kind); +AstNode* ast_new_list(int capacity); +void ast_append(AstNode* list, AstNode* item); +AstNode* ast_new_int(int v); +AstNode* ast_new_double(double v); +AstNode* ast_new_unary_expr(int op, AstNode* operand); +AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs); + +AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs); +AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs); +AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs); +AstNode* ast_new_ref_expr(AstNode* operand); +AstNode* ast_new_deref_expr(AstNode* operand); +AstNode* ast_new_member_access_expr(AstNode* obj, const char* name); +AstNode* ast_new_cast_expr(AstNode* operand, Type* result_ty); +AstNode* ast_new_logical_expr(int op, AstNode* lhs, AstNode* rhs); +AstNode* ast_new_cond_expr(AstNode* cond, AstNode* then, AstNode* else_); +AstNode* ast_new_str_expr(int idx, Type* ty); +AstNode* ast_new_func_call(AstNode* func, AstNode* args); +AstNode* ast_new_func(const char* name, Type* ty); +AstNode* ast_new_gvar(const char* name, Type* ty); +AstNode* ast_new_lvar(const char* name, int stack_offset, Type* ty); + +AstNode* ast_new_nop(void); +AstNode* ast_new_break_stmt(void); +AstNode* ast_new_continue_stmt(void); +AstNode* ast_new_return_stmt(AstNode* expr); +AstNode* ast_new_expr_stmt(AstNode* expr); +AstNode* ast_new_if_stmt(AstNode* cond, AstNode* then, AstNode* else_); +AstNode* ast_new_for_stmt(AstNode* init, AstNode* cond, AstNode* update, AstNode* body); +AstNode* ast_new_do_while_stmt(AstNode* cond, AstNode* body); +AstNode* ast_new_switch_stmt(AstNode* expr); +AstNode* ast_new_case_label(int value, AstNode* body); +AstNode* ast_new_default_label(AstNode* body); +AstNode* ast_new_goto_stmt(const char* label); +AstNode* ast_new_label_stmt(const char* name, AstNode* body); + +AstNode* ast_new_declarator(const char* name, Type* ty); +AstNode* ast_new_func_def(const char* name, Type* ty, AstNode* params, AstNode* body, int stack_size); +AstNode* ast_new_enum_member(const char* name, int value); +AstNode* ast_new_typedef_decl(const char* name, Type* ty); +AstNode* ast_new_struct_def(const char* name); +AstNode* ast_new_union_def(const char* name); +AstNode* ast_new_enum_def(const char* name); +AstNode* ast_new_array_initializer(AstNode* list); + +#endif diff --git a/src/cc1/codegen.c b/src/cc1/codegen.c new file mode 100644 index 0000000..a08f6ad --- /dev/null +++ b/src/cc1/codegen.c @@ -0,0 +1,925 @@ +#include "codegen.h" +#include +#include +#include "../lib/common.h" +#include "parse.h" +#include "preprocess.h" + +typedef enum { + GenMode_lval, + GenMode_rval, +} GenMode; + +typedef struct { + Program* prog; + FILE* out; + int next_label; + int* loop_labels; + AstNode* current_func; + int switch_label; +} CodeGen; + +static CodeGen* codegen_new(Program* prog, FILE* out) { + CodeGen* g = calloc(1, sizeof(CodeGen)); + g->prog = prog; + g->out = out; + g->next_label = 1; + g->loop_labels = calloc(1024, sizeof(int)); + g->switch_label = -1; + return g; +} + +static int codegen_new_label(CodeGen* g) { + int new_label = g->next_label; + ++g->next_label; + return new_label; +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); +static void codegen_stmt(CodeGen* g, AstNode* ast); + +static const char* param_reg(int n) { + if (n == 0) { + return "rdi"; + } else if (n == 1) { + return "rsi"; + } else if (n == 2) { + return "rdx"; + } else if (n == 3) { + return "rcx"; + } else if (n == 4) { + return "r8"; + } else if (n == 5) { + return "r9"; + } else { + unreachable(); + } +} + +static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) { + fprintf(g->out, " push rbp\n"); + fprintf(g->out, " mov rbp, rsp\n"); + for (int i = 0, j = 0; i < func_def->params->as.list->len; ++i) { + AstNode* param = &func_def->params->as.list->items[i]; + if (param->as.param->stack_offset >= 0) { + fprintf(g->out, " push %s\n", param_reg(j++)); + } + } + // Note: rsp must be aligned to 8. + fprintf(g->out, " sub rsp, %d\n", to_aligned(func_def->stack_size, 8)); +} + +static void codegen_func_epilogue(CodeGen* g) { + fprintf(g->out, " mov rsp, rbp\n"); + fprintf(g->out, " pop rbp\n"); + fprintf(g->out, " ret\n"); +} + +static void codegen_int_expr(CodeGen* g, IntExprNode* expr) { + fprintf(g->out, " mov rax, %d\n", expr->value); +} + +static void codegen_str_expr(CodeGen* g, StrExprNode* expr) { + fprintf(g->out, " lea rax, .Lstr__%d[rip]\n", expr->idx); +} + +static void codegen_unary_expr(CodeGen* g, UnaryExprNode* expr) { + codegen_expr(g, expr->operand, GenMode_rval); + if (expr->op == TokenKind_not) { + fprintf(g->out, " mov rdi, 0\n"); + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " sete al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_tilde) { + fprintf(g->out, " not rax\n"); + } else { + unreachable(); + } +} + +static void codegen_ref_expr(CodeGen* g, RefExprNode* expr) { + codegen_expr(g, expr->operand, GenMode_lval); +} + +static void codegen_lval2rval(CodeGen* g, Type* ty) { + if (ty->kind == TypeKind_array || ty->kind == TypeKind_func) { + return; + } + + int size = type_sizeof(ty); + if (size == 1) { + fprintf(g->out, " movsx rax, BYTE PTR [rax]\n"); + } else if (size == 2) { + fprintf(g->out, " movsx rax, WORD PTR [rax]\n"); + } else if (size == 4) { + fprintf(g->out, " movsxd rax, DWORD PTR [rax]\n"); + } else if (size == 8) { + fprintf(g->out, " mov rax, [rax]\n"); + } else { + // Do nothing. + } +} + +static void codegen_deref_expr(CodeGen* g, DerefExprNode* expr, GenMode gen_mode) { + codegen_expr(g, expr->operand, GenMode_rval); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(g, expr->operand->ty->base); + } +} + +static void codegen_cast_expr(CodeGen* g, CastExprNode* expr, Type* ty) { + codegen_expr(g, expr->operand, GenMode_rval); + + // (void) cast does nothing. + if (ty->kind == TypeKind_void) + return; + + int src_size = type_sizeof(expr->operand->ty); + int dst_size = type_sizeof(ty); + + if (src_size == dst_size) + return; + + if (dst_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else if (dst_size == 2) { + if (src_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else { + fprintf(g->out, " movsx rax, ax\n"); + } + } else if (dst_size == 4) { + if (src_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else if (src_size == 2) { + fprintf(g->out, " movsx rax, ax\n"); + } else { + fprintf(g->out, " movsxd rax, eax\n"); + } + } else if (dst_size == 8) { + if (src_size == 1) { + fprintf(g->out, " movsx rax, al\n"); + } else if (src_size == 2) { + fprintf(g->out, " movsx rax, ax\n"); + } else if (src_size == 4) { + fprintf(g->out, " movsxd rax, eax\n"); + } + } +} + +static void codegen_logical_expr(CodeGen* g, LogicalExprNode* expr) { + int label = codegen_new_label(g); + + if (expr->op == TokenKind_andand) { + codegen_expr(g, expr->lhs, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + codegen_expr(g, expr->rhs, GenMode_rval); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + fprintf(g->out, " mov rax, 0\n"); + fprintf(g->out, ".Lend%d:\n", label); + } else { + codegen_expr(g, expr->lhs, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + fprintf(g->out, " mov rax, 1\n"); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + codegen_expr(g, expr->rhs, GenMode_rval); + fprintf(g->out, ".Lend%d:\n", label); + } +} + +static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) { + codegen_expr(g, expr->lhs, gen_mode); + fprintf(g->out, " push rax\n"); + codegen_expr(g, expr->rhs, gen_mode); + fprintf(g->out, " mov rdi, rax\n"); + fprintf(g->out, " pop rax\n"); + // rax=lhs, rdi=rhs + if (expr->op == TokenKind_plus) { + fprintf(g->out, " add rax, rdi\n"); + } else if (expr->op == TokenKind_minus) { + fprintf(g->out, " sub rax, rdi\n"); + } else if (expr->op == TokenKind_star) { + fprintf(g->out, " imul rax, rdi\n"); + } else if (expr->op == TokenKind_slash) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + } else if (expr->op == TokenKind_percent) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + fprintf(g->out, " mov rax, rdx\n"); + } else if (expr->op == TokenKind_and) { + fprintf(g->out, " and rax, rdi\n"); + } else if (expr->op == TokenKind_or) { + fprintf(g->out, " or rax, rdi\n"); + } else if (expr->op == TokenKind_xor) { + fprintf(g->out, " xor rax, rdi\n"); + } else if (expr->op == TokenKind_lshift) { + fprintf(g->out, " mov rcx, rdi\n"); + fprintf(g->out, " shl rax, cl\n"); + } else if (expr->op == TokenKind_rshift) { + fprintf(g->out, " mov rcx, rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " shr rax, cl\n"); + } else { + fprintf(g->out, " sar rax, cl\n"); + } + } else if (expr->op == TokenKind_eq) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " sete al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_ne) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " setne al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_lt) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " setl al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else if (expr->op == TokenKind_le) { + fprintf(g->out, " cmp rax, rdi\n"); + fprintf(g->out, " setle al\n"); + fprintf(g->out, " movzb rax, al\n"); + } else { + unreachable(); + } +} + +static void codegen_cond_expr(CodeGen* g, CondExprNode* expr, GenMode gen_mode) { + int label = codegen_new_label(g); + + codegen_expr(g, expr->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + codegen_expr(g, expr->then, gen_mode); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + codegen_expr(g, expr->else_, gen_mode); + fprintf(g->out, ".Lend%d:\n", label); +} + +static void codegen_assign_expr_helper(CodeGen* g, AssignExprNode* expr) { + if (expr->op == TokenKind_assign) { + return; + } + + fprintf(g->out, " mov rdi, rax\n"); + fprintf(g->out, " mov rax, [rsp]\n"); + codegen_lval2rval(g, expr->lhs->ty); + + if (expr->op == TokenKind_assign_add) { + fprintf(g->out, " add rax, rdi\n"); + } else if (expr->op == TokenKind_assign_sub) { + fprintf(g->out, " sub rax, rdi\n"); + } else if (expr->op == TokenKind_assign_mul) { + fprintf(g->out, " imul rax, rdi\n"); + } else if (expr->op == TokenKind_assign_div) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + } else if (expr->op == TokenKind_assign_mod) { + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } + fprintf(g->out, " mov rax, rdx\n"); + } else if (expr->op == TokenKind_assign_or) { + fprintf(g->out, " or rax, rdi\n"); + } else if (expr->op == TokenKind_assign_and) { + fprintf(g->out, " and rax, rdi\n"); + } else if (expr->op == TokenKind_assign_xor) { + fprintf(g->out, " xor rax, rdi\n"); + } else if (expr->op == TokenKind_assign_lshift) { + fprintf(g->out, " mov rcx, rdi\n"); + fprintf(g->out, " shl rax, cl\n"); + } else if (expr->op == TokenKind_assign_rshift) { + fprintf(g->out, " mov rcx, rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " shr rax, cl\n"); + } else { + fprintf(g->out, " sar rax, cl\n"); + } + } else { + unreachable(); + } +} + +static void codegen_assign_expr(CodeGen* g, AssignExprNode* expr) { + int sizeof_lhs = type_sizeof(expr->lhs->ty); + + codegen_expr(g, expr->lhs, GenMode_lval); + fprintf(g->out, " push rax\n"); + codegen_expr(g, expr->rhs, GenMode_rval); + codegen_assign_expr_helper(g, expr); + fprintf(g->out, " pop rdi\n"); + if (sizeof_lhs == 1) { + fprintf(g->out, " mov BYTE PTR [rdi], al\n"); + } else if (sizeof_lhs == 2) { + fprintf(g->out, " mov WORD PTR [rdi], ax\n"); + } else if (sizeof_lhs == 4) { + fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); + } else if (sizeof_lhs == 8) { + fprintf(g->out, " mov [rdi], rax\n"); + } else { + if (expr->op != TokenKind_assign) { + unimplemented(); + } + // rax: address of rhs + // rdi: address of lhs + // Perform byte-wise copy. Use r10 register as temporary space. + for (int i = 0; i < sizeof_lhs; ++i) { + fprintf(g->out, " mov r10b, %d[rax]\n", i); + fprintf(g->out, " mov %d[rdi], r10b\n", i); + } + } +} + +static void codegen_args(CodeGen* g, AstNode* args) { + int* required_gp_regs_for_each_arg = calloc(args->as.list->len, sizeof(int)); + + int gp_regs = 6; + for (int i = 0; i < args->as.list->len; ++i) { + AstNode* arg = &args->as.list->items[i]; + int ty_size = type_sizeof(arg->ty); + // TODO + if (arg->ty->kind == TypeKind_array) { + ty_size = 8; + } + int required_gp_regs; + if (ty_size <= 8) { + required_gp_regs = 1; + } else if (ty_size <= 16) { + required_gp_regs = 2; + } else { + required_gp_regs = 0; + } + if (required_gp_regs <= gp_regs) { + gp_regs -= required_gp_regs; + } else { + required_gp_regs = 0; + } + required_gp_regs_for_each_arg[i] = required_gp_regs; + } + + // Evaluate arguments in the reverse order (right to left). + // Arguments passed by stack. + for (int i = args->as.list->len - 1; i >= 0; --i) { + AstNode* arg = &args->as.list->items[i]; + if (required_gp_regs_for_each_arg[i] == 0) { + codegen_expr(g, arg, GenMode_rval); + int ty_size = type_sizeof(arg->ty); + if (ty_size <= 8) { + fprintf(g->out, " push rax\n"); + } else { + // Perform byte-wise copy onto stack. Use r10 register as temporary space. + // NOTE: rsp must be aligned to 8. + fprintf(g->out, " sub rsp, %d\n", to_aligned(ty_size, 8)); + for (int i = 0; i < ty_size; ++i) { + // Copy a sinle byte from the address that rax points to to the stack via r10 register. + fprintf(g->out, " mov r10b, %d[rax]\n", i); + fprintf(g->out, " mov %d[rsp], r10b\n", i); + } + } + } + } + // Arguments passed by registers. + for (int i = args->as.list->len - 1; i >= 0; --i) { + AstNode* arg = &args->as.list->items[i]; + if (required_gp_regs_for_each_arg[i] != 0) { + codegen_expr(g, arg, GenMode_rval); + if (required_gp_regs_for_each_arg[i] == 1) { + fprintf(g->out, " push rax\n"); + } else { + assert(required_gp_regs_for_each_arg[i] == 2); + fprintf(g->out, " push [rax]\n"); + fprintf(g->out, " push [rax+8]\n"); + } + } + } + // Pop pushed arguments onto registers. + for (int i = 0, j = 0; i < args->as.list->len; ++i) { + for (int k = 0; k < required_gp_regs_for_each_arg[i]; ++k) { + fprintf(g->out, " pop %s\n", param_reg(j++)); + } + } +} + +static void codegen_func_call(CodeGen* g, FuncCallNode* call) { + const char* func_name = NULL; + if (call->func->kind == AstNodeKind_func) { + func_name = call->func->as.func->name; + } + + if (func_name && strcmp(func_name, "__ducc_va_start") == 0) { + fprintf(g->out, " # __ducc_va_start BEGIN\n"); + AstNode* va_list_args = &call->args->as.list->items[0]; + codegen_expr(g, va_list_args, GenMode_rval); + fprintf(g->out, " mov rdi, rax\n"); + + // Allocate save area. + fprintf(g->out, " sub rsp, 48\n"); + + fprintf(g->out, " mov [rsp+ 0], rdi\n"); + fprintf(g->out, " mov [rsp+ 8], rsi\n"); + fprintf(g->out, " mov [rsp+16], rdx\n"); + fprintf(g->out, " mov [rsp+24], rcx\n"); + fprintf(g->out, " mov [rsp+32], r8\n"); + fprintf(g->out, " mov [rsp+40], r9\n"); + + // Initialize va_list. + fprintf(g->out, " mov DWORD PTR [rdi], 8\n"); // gp_offset + fprintf(g->out, " mov DWORD PTR [rdi+4], 0\n"); // fp_offset + fprintf(g->out, " lea rax, [rbp+16]\n"); // overflow_arg_area + fprintf(g->out, " mov QWORD PTR [rdi+8], rax\n"); + fprintf(g->out, " mov QWORD PTR [rdi+16], rsp\n"); // reg_save_area + + fprintf(g->out, " mov rax, 0\n"); // dummy return value + fprintf(g->out, " # __ducc_va_start END\n"); + return; + } + + if (func_name && strcmp(func_name, "__ducc_va_arg") == 0) { + fprintf(g->out, " # __ducc_va_arg BEGIN\n"); + + // Evaluate va_list argument (first argument) + AstNode* va_list_arg = &call->args->as.list->items[0]; + codegen_expr(g, va_list_arg, GenMode_rval); + fprintf(g->out, " mov rdi, rax\n"); // rdi = pointer to va_list + + // Evaluate size argument (second argument) + AstNode* size_arg = &call->args->as.list->items[1]; + codegen_expr(g, size_arg, GenMode_rval); + fprintf(g->out, " mov rsi, rax\n"); // rsi = size + + int label = codegen_new_label(g); + + // Check if gp_offset < 48 (6 registers * 8 bytes) + fprintf(g->out, " mov eax, DWORD PTR [rdi]\n"); // eax = gp_offset + fprintf(g->out, " cmp eax, 48\n"); + fprintf(g->out, " jae .Lva_arg_overflow%d\n", label); + + // Fetch from register save area + fprintf(g->out, " mov rcx, QWORD PTR [rdi+16]\n"); // rcx = reg_save_area + fprintf(g->out, " movsx rdx, eax\n"); // rdx = gp_offset (sign-extended) + fprintf(g->out, " add rcx, rdx\n"); // rcx = reg_save_area + gp_offset + fprintf(g->out, " add eax, 8\n"); // gp_offset += 8 + fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); // store updated gp_offset + fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument + fprintf(g->out, " jmp .Lva_arg_end%d\n", label); + + // Fetch from overflow area (stack) + fprintf(g->out, ".Lva_arg_overflow%d:\n", label); + fprintf(g->out, " mov rcx, QWORD PTR [rdi+8]\n"); // rcx = overflow_arg_area + fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument + fprintf(g->out, " add rcx, 8\n"); // overflow_arg_area += 8 + fprintf(g->out, " mov QWORD PTR [rdi+8], rcx\n"); // store updated overflow_arg_area + + fprintf(g->out, ".Lva_arg_end%d:\n", label); + fprintf(g->out, " # __ducc_va_arg END\n"); + return; + } + + AstNode* args = call->args; + + int gp_regs = 6; + int pass_by_stack_offset = -16; + for (int i = 0; i < args->as.list->len; ++i) { + AstNode* arg = &args->as.list->items[i]; + int ty_size = type_sizeof(arg->ty); + // TODO + if (arg->ty->kind == TypeKind_array) { + ty_size = 8; + } + int required_gp_regs; + if (ty_size <= 8) { + required_gp_regs = 1; + } else if (ty_size <= 16) { + required_gp_regs = 2; + } else { + required_gp_regs = 0; + } + if (required_gp_regs <= gp_regs) { + gp_regs -= required_gp_regs; + } else { + required_gp_regs = 0; + } + if (required_gp_regs == 0) { + pass_by_stack_offset -= to_aligned(ty_size, 8); + } + } + + int label = codegen_new_label(g); + + fprintf(g->out, " mov rax, rsp\n"); + if (-pass_by_stack_offset % 16 != 0) { + fprintf(g->out, " add rax, 8\n"); + } + fprintf(g->out, " and rax, 15\n"); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Laligned%d\n", label); + + fprintf(g->out, " sub rsp, 8\n"); + codegen_args(g, args); + fprintf(g->out, " mov rax, 0\n"); + if (func_name) { + fprintf(g->out, " call %s\n", func_name); + } else { + codegen_expr(g, call->func, GenMode_rval); + fprintf(g->out, " call rax\n"); + } + fprintf(g->out, " add rsp, 8\n"); + + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Laligned%d:\n", label); + + codegen_args(g, args); + fprintf(g->out, " mov rax, 0\n"); + if (func_name) { + fprintf(g->out, " call %s\n", func_name); + } else { + codegen_expr(g, call->func, GenMode_rval); + fprintf(g->out, " call rax\n"); + } + + fprintf(g->out, ".Lend%d:\n", label); + // Pop pass-by-stack arguments. + fprintf(g->out, " add rsp, %d\n", -pass_by_stack_offset - 16); +} + +static void codegen_lvar(CodeGen* g, LvarNode* lvar, Type* ty, GenMode gen_mode) { + fprintf(g->out, " lea rax, %d[rbp]\n", -lvar->stack_offset); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(g, ty); + } +} + +static void codegen_gvar(CodeGen* g, GvarNode* gvar, Type* ty, GenMode gen_mode) { + fprintf(g->out, " lea rax, %s[rip]\n", gvar->name); + if (gen_mode == GenMode_rval) { + codegen_lval2rval(g, ty); + } +} + +static void codegen_func_ref(CodeGen* g, FuncNode* func) { + fprintf(g->out, " lea rax, %s[rip]\n", func->name); +} + +static void codegen_composite_expr(CodeGen* g, AstNode* ast) { + // Standard C does not have composite expression, but ducc internally has. + for (int i = 0; i < ast->as.list->len; ++i) { + AstNode* expr = ast->as.list->items + i; + codegen_expr(g, expr, GenMode_rval); + } +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + if (ast->kind == AstNodeKind_int_expr) { + codegen_int_expr(g, ast->as.int_expr); + } else if (ast->kind == AstNodeKind_str_expr) { + codegen_str_expr(g, ast->as.str_expr); + } else if (ast->kind == AstNodeKind_unary_expr) { + codegen_unary_expr(g, ast->as.unary_expr); + } else if (ast->kind == AstNodeKind_ref_expr) { + codegen_ref_expr(g, ast->as.ref_expr); + } else if (ast->kind == AstNodeKind_deref_expr) { + codegen_deref_expr(g, ast->as.deref_expr, gen_mode); + } else if (ast->kind == AstNodeKind_cast_expr) { + codegen_cast_expr(g, ast->as.cast_expr, ast->ty); + } else if (ast->kind == AstNodeKind_binary_expr) { + codegen_binary_expr(g, ast->as.binary_expr, gen_mode); + } else if (ast->kind == AstNodeKind_cond_expr) { + codegen_cond_expr(g, ast->as.cond_expr, gen_mode); + } else if (ast->kind == AstNodeKind_logical_expr) { + codegen_logical_expr(g, ast->as.logical_expr); + } else if (ast->kind == AstNodeKind_assign_expr) { + codegen_assign_expr(g, ast->as.assign_expr); + } else if (ast->kind == AstNodeKind_func_call) { + codegen_func_call(g, ast->as.func_call); + } else if (ast->kind == AstNodeKind_lvar) { + codegen_lvar(g, ast->as.lvar, ast->ty, gen_mode); + } else if (ast->kind == AstNodeKind_gvar) { + codegen_gvar(g, ast->as.gvar, ast->ty, gen_mode); + } else if (ast->kind == AstNodeKind_func) { + codegen_func_ref(g, ast->as.func); + } else if (ast->kind == AstNodeKind_list) { + codegen_composite_expr(g, ast); + } else { + unreachable(); + } +} + +static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) { + if (stmt->expr) { + codegen_expr(g, stmt->expr, GenMode_rval); + } + codegen_func_epilogue(g); +} + +static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) { + int label = codegen_new_label(g); + + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lelse%d\n", label); + codegen_stmt(g, stmt->then); + fprintf(g->out, " jmp .Lend%d\n", label); + fprintf(g->out, ".Lelse%d:\n", label); + if (stmt->else_) { + codegen_stmt(g, stmt->else_); + } + fprintf(g->out, ".Lend%d:\n", label); +} + +static void codegen_for_stmt(CodeGen* g, ForStmtNode* stmt) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + if (stmt->init) { + codegen_expr(g, stmt->init, GenMode_rval); + } + fprintf(g->out, ".Lbegin%d:\n", label); + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lend%d\n", label); + codegen_stmt(g, stmt->body); + fprintf(g->out, ".Lcontinue%d:\n", label); + if (stmt->update) { + codegen_expr(g, stmt->update, GenMode_rval); + } + fprintf(g->out, " jmp .Lbegin%d\n", label); + fprintf(g->out, ".Lend%d:\n", label); + + --g->loop_labels; +} + +static void codegen_do_while_stmt(CodeGen* g, DoWhileStmtNode* stmt) { + int label = codegen_new_label(g); + ++g->loop_labels; + *g->loop_labels = label; + + fprintf(g->out, ".Lbegin%d:\n", label); + codegen_stmt(g, stmt->body); + fprintf(g->out, ".Lcontinue%d:\n", label); + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " cmp rax, 0\n"); + fprintf(g->out, " je .Lend%d\n", label); + fprintf(g->out, " jmp .Lbegin%d\n", label); + fprintf(g->out, ".Lend%d:\n", label); + + --g->loop_labels; +} + +static void codegen_break_stmt(CodeGen* g) { + if (g->switch_label != -1) { + fprintf(g->out, " jmp .Lend%d\n", g->switch_label); + } else { + int label = *g->loop_labels; + fprintf(g->out, " jmp .Lend%d\n", label); + } +} + +static void codegen_continue_stmt(CodeGen* g) { + int label = *g->loop_labels; + fprintf(g->out, " jmp .Lcontinue%d\n", label); +} + +static void codegen_goto_stmt(CodeGen* g, GotoStmtNode* stmt) { + fprintf(g->out, " jmp .L%s__%s\n", g->current_func->as.func_def->name, stmt->label); +} + +static void codegen_label_stmt(CodeGen* g, LabelStmtNode* stmt) { + fprintf(g->out, ".L%s__%s:\n", g->current_func->as.func_def->name, stmt->name); + codegen_stmt(g, stmt->body); +} + +// Helper to collect case values from the switch body +static void collect_cases(AstNode* stmt, int* case_values, int* case_labels, int* n_cases) { + if (!stmt) + return; + + if (stmt->kind == AstNodeKind_case_label) { + case_values[*n_cases] = stmt->as.case_label->value; + case_labels[*n_cases] = *n_cases + 1; + (*n_cases)++; + collect_cases(stmt->as.case_label->body, case_values, case_labels, n_cases); + } else if (stmt->kind == AstNodeKind_default_label) { + collect_cases(stmt->as.default_label->body, case_values, case_labels, n_cases); + } else if (stmt->kind == AstNodeKind_list) { + for (int i = 0; i < stmt->as.list->len; i++) { + collect_cases(stmt->as.list->items + i, case_values, case_labels, n_cases); + } + } +} + +static bool codegen_switch_body(CodeGen* g, AstNode* stmt, int* case_values, int* case_labels, int n_cases) { + if (!stmt) + return false; + + if (stmt->kind == AstNodeKind_case_label) { + int value = stmt->as.case_label->value; + for (int i = 0; i < n_cases; i++) { + if (case_values[i] == value) { + fprintf(g->out, ".Lcase%d_%d:\n", g->switch_label, case_labels[i]); + break; + } + } + return codegen_switch_body(g, stmt->as.case_label->body, case_values, case_labels, n_cases); + } else if (stmt->kind == AstNodeKind_default_label) { + fprintf(g->out, ".Ldefault%d:\n", g->switch_label); + codegen_switch_body(g, stmt->as.default_label->body, case_values, case_labels, n_cases); + return true; + } else if (stmt->kind == AstNodeKind_list) { + bool default_label_emitted = false; + for (int i = 0; i < stmt->as.list->len; i++) { + default_label_emitted |= + codegen_switch_body(g, stmt->as.list->items + i, case_values, case_labels, n_cases); + } + return default_label_emitted; + } else { + codegen_stmt(g, stmt); + return false; + } +} + +static void codegen_switch_stmt(CodeGen* g, SwitchStmtNode* stmt) { + int switch_label = codegen_new_label(g); + int prev_switch_label = g->switch_label; + g->switch_label = switch_label; + + // Collect all case values and assign labels + int case_values[256]; + int case_labels[256]; + int n_cases = 0; + collect_cases(stmt->body, case_values, case_labels, &n_cases); + + // Generate jump instructions. + codegen_expr(g, stmt->expr, GenMode_rval); + for (int i = 0; i < n_cases; i++) { + fprintf(g->out, " cmp rax, %d\n", case_values[i]); + fprintf(g->out, " je .Lcase%d_%d\n", switch_label, case_labels[i]); + } + fprintf(g->out, " jmp .Ldefault%d\n", switch_label); + + // Generate the switch body with labels. + bool default_label_emitted = codegen_switch_body(g, stmt->body, case_values, case_labels, n_cases); + + if (!default_label_emitted) { + fprintf(g->out, ".Ldefault%d:\n", switch_label); + } + fprintf(g->out, ".Lend%d:\n", switch_label); + + g->switch_label = prev_switch_label; +} + +static void codegen_expr_stmt(CodeGen* g, ExprStmtNode* stmt) { + codegen_expr(g, stmt->expr, GenMode_rval); +} + +static void codegen_block_stmt(CodeGen* g, AstNode* ast) { + for (int i = 0; i < ast->as.list->len; ++i) { + AstNode* stmt = ast->as.list->items + i; + codegen_stmt(g, stmt); + } +} + +static void codegen_stmt(CodeGen* g, AstNode* ast) { + // TODO: support multiple files. + fprintf(g->out, " .loc 1 %d 0\n", ast->loc.line); + + if (ast->kind == AstNodeKind_list) { + codegen_block_stmt(g, ast); + } else if (ast->kind == AstNodeKind_return_stmt) { + codegen_return_stmt(g, ast->as.return_stmt); + } else if (ast->kind == AstNodeKind_if_stmt) { + codegen_if_stmt(g, ast->as.if_stmt); + } else if (ast->kind == AstNodeKind_switch_stmt) { + codegen_switch_stmt(g, ast->as.switch_stmt); + } else if (ast->kind == AstNodeKind_for_stmt) { + codegen_for_stmt(g, ast->as.for_stmt); + } else if (ast->kind == AstNodeKind_do_while_stmt) { + codegen_do_while_stmt(g, ast->as.do_while_stmt); + } else if (ast->kind == AstNodeKind_break_stmt) { + codegen_break_stmt(g); + } else if (ast->kind == AstNodeKind_continue_stmt) { + codegen_continue_stmt(g); + } else if (ast->kind == AstNodeKind_goto_stmt) { + codegen_goto_stmt(g, ast->as.goto_stmt); + } else if (ast->kind == AstNodeKind_label_stmt) { + codegen_label_stmt(g, ast->as.label_stmt); + } else if (ast->kind == AstNodeKind_expr_stmt) { + codegen_expr_stmt(g, ast->as.expr_stmt); + } else if (ast->kind == AstNodeKind_lvar_decl) { + // Do nothing. + } else if (ast->kind == AstNodeKind_nop) { + // Do nothing. + } else if (ast->kind == AstNodeKind_case_label || ast->kind == AstNodeKind_default_label) { + // They are handled by codegen_switch_stmt(). + unreachable(); + } else { + unreachable(); + } +} + +static void codegen_func(CodeGen* g, AstNode* ast) { + g->current_func = ast; + + if (ast->ty->storage_class != StorageClass_static) { + fprintf(g->out, ".globl %s\n", ast->as.func_def->name); + } + fprintf(g->out, "%s:\n", ast->as.func_def->name); + + codegen_func_prologue(g, ast->as.func_def); + codegen_stmt(g, ast->as.func_def->body); + if (strcmp(ast->as.func_def->name, "main") == 0) { + // C99: 5.1.2.2.3 + fprintf(g->out, " mov rax, 0\n"); + } + codegen_func_epilogue(g); + + fprintf(g->out, "\n"); + g->current_func = NULL; +} + +static void codegen_global_var(CodeGen* g, AstNode* var) { + if (var->ty->storage_class == StorageClass_extern) { + return; + } + if (var->ty->storage_class != StorageClass_static) { + fprintf(g->out, ".globl %s\n", var->as.gvar_decl->name); + } + fprintf(g->out, " %s:\n", var->as.gvar_decl->name); + if (!var->as.gvar_decl->expr) { + fprintf(g->out, " .zero %d\n", type_sizeof(var->ty)); + return; + } + + if (var->ty->kind == TypeKind_array && var->as.gvar_decl->expr->kind == AstNodeKind_str_expr) { + const char* str = g->prog->str_literals[var->as.gvar_decl->expr->as.str_expr->idx - 1]; + fprintf(g->out, " .string \"%s\"\n", str); + return; + } + + InitData* data = eval_init_expr(var->as.gvar_decl->expr, var->ty); + + for (size_t i = 0; i < data->len; ++i) { + InitDataBlock* block = &data->blocks[i]; + if (block->kind == InitDataBlockKind_addr) { + fprintf(g->out, " .quad %s\n", block->as.addr.label); + } else { + for (size_t j = 0; j < block->as.bytes.len; ++j) { + fprintf(g->out, " .byte %d\n", block->as.bytes.buf[j]); + } + } + } +} + +void codegen(Program* prog, const char* input_filename, FILE* out) { + CodeGen* g = codegen_new(prog, out); + + fprintf(g->out, ".intel_syntax noprefix\n\n"); + + // TODO: support multiple files. + fprintf(g->out, ".file 1 \"%s\"\n\n", input_filename); + + // For GNU ld: + // https://sourceware.org/binutils/docs/ld/Options.html + fprintf(g->out, ".section .note.GNU-stack,\"\",@progbits\n\n"); + + fprintf(g->out, ".section .rodata\n\n"); + for (int i = 0; prog->str_literals[i]; ++i) { + fprintf(g->out, ".Lstr__%d:\n", i + 1); + fprintf(g->out, " .string \"%s\"\n\n", prog->str_literals[i]); + } + + fprintf(g->out, ".data\n\n"); + for (int i = 0; i < prog->vars->as.list->len; ++i) { + codegen_global_var(g, &prog->vars->as.list->items[i]); + } + + fprintf(g->out, ".text\n\n"); + for (int i = 0; i < prog->funcs->as.list->len; ++i) { + AstNode* func = &prog->funcs->as.list->items[i]; + codegen_func(g, func); + } +} diff --git a/src/cc1/codegen.h b/src/cc1/codegen.h new file mode 100644 index 0000000..2c1d7b3 --- /dev/null +++ b/src/cc1/codegen.h @@ -0,0 +1,8 @@ +#ifndef DUCC_CODEGEN_H +#define DUCC_CODEGEN_H + +#include "ast.h" + +void codegen(Program* prog, const char* input_filename, FILE* out); + +#endif diff --git a/src/cc1/codegen_wasm.c b/src/cc1/codegen_wasm.c new file mode 100644 index 0000000..c7f8870 --- /dev/null +++ b/src/cc1/codegen_wasm.c @@ -0,0 +1,164 @@ +#include +#include +#include "../lib/common.h" +#include "codegen.h" +#include "preprocess.h" + +typedef enum { + GenMode_lval, + GenMode_rval, +} GenMode; + +typedef struct { + FILE* out; + int next_label; + int* loop_labels; + AstNode* current_func; + int switch_label; +} CodeGen; + +static CodeGen* codegen_new(FILE* out) { + CodeGen* g = calloc(1, sizeof(CodeGen)); + g->out = out; + g->next_label = 1; + g->loop_labels = calloc(1024, sizeof(int)); + g->switch_label = -1; + return g; +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); +static void codegen_stmt(CodeGen* g, AstNode* ast); + +static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) { + for (int i = 0; i < func_def->params->as.list->len; ++i) { + fprintf(g->out, " (param $l_%s i32)", func_def->params->as.list->items[i].as.param->name); + } + fprintf(g->out, " (result i32)\n"); +} + +static void codegen_func_epilogue(CodeGen*) { +} + +static void codegen_int_expr(CodeGen* g, IntExprNode* expr) { + fprintf(g->out, " i32.const %d\n", expr->value); +} + +static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) { + codegen_expr(g, expr->lhs, gen_mode); + codegen_expr(g, expr->rhs, gen_mode); + if (expr->op == TokenKind_plus) { + fprintf(g->out, " i32.add\n"); + } else if (expr->op == TokenKind_minus) { + fprintf(g->out, " i32.sub\n"); + } else if (expr->op == TokenKind_le) { + fprintf(g->out, " i32.le_s\n"); + } else { + unreachable(); + } +} + +static void codegen_lvar(CodeGen* g, LvarNode* lvar, GenMode) { + fprintf(g->out, " local.get $l_%s\n", lvar->name); +} + +static void codegen_func_call(CodeGen* g, FuncCallNode* call) { + const char* func_name; + if (call->func->kind == AstNodeKind_func) { + func_name = call->func->as.func->name; + } else { + unimplemented(); + } + + AstNode* args = call->args; + for (int i = 0; i < args->as.list->len; ++i) { + AstNode* arg = args->as.list->items + i; + codegen_expr(g, arg, GenMode_rval); + } + fprintf(g->out, " call $%s\n", func_name); +} + +static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { + if (ast->kind == AstNodeKind_int_expr) { + codegen_int_expr(g, ast->as.int_expr); + } else if (ast->kind == AstNodeKind_binary_expr) { + codegen_binary_expr(g, ast->as.binary_expr, gen_mode); + } else if (ast->kind == AstNodeKind_lvar) { + codegen_lvar(g, ast->as.lvar, gen_mode); + } else if (ast->kind == AstNodeKind_func_call) { + codegen_func_call(g, ast->as.func_call); + } else { + unreachable(); + } +} + +static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) { + if (stmt->expr) { + codegen_expr(g, stmt->expr, GenMode_rval); + } + fprintf(g->out, " return\n"); +} + +static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) { + codegen_expr(g, stmt->cond, GenMode_rval); + fprintf(g->out, " (if (result i32)\n"); + fprintf(g->out, " (then\n"); + codegen_stmt(g, stmt->then); + fprintf(g->out, " )\n"); + if (stmt->else_) { + fprintf(g->out, " (else\n"); + codegen_stmt(g, stmt->else_); + fprintf(g->out, " )\n"); + } else { + fprintf(g->out, " (else\n"); + fprintf(g->out, " i32.const 0\n"); + fprintf(g->out, " )\n"); + } + fprintf(g->out, " )\n"); +} + +static void codegen_block_stmt(CodeGen* g, AstNode* ast) { + for (int i = 0; i < ast->as.list->len; ++i) { + AstNode* stmt = ast->as.list->items + i; + codegen_stmt(g, stmt); + } +} + +static void codegen_stmt(CodeGen* g, AstNode* ast) { + if (ast->kind == AstNodeKind_list) { + codegen_block_stmt(g, ast); + } else if (ast->kind == AstNodeKind_return_stmt) { + codegen_return_stmt(g, ast->as.return_stmt); + } else if (ast->kind == AstNodeKind_if_stmt) { + codegen_if_stmt(g, ast->as.if_stmt); + } else if (ast->kind == AstNodeKind_nop) { + // Do nothing. + } else { + unreachable(); + } +} + +static void codegen_func(CodeGen* g, AstNode* ast) { + g->current_func = ast; + + fprintf(g->out, "(func $%s (export \"%s\")", ast->as.func_def->name, ast->as.func_def->name); + + codegen_func_prologue(g, ast->as.func_def); + codegen_stmt(g, ast->as.func_def->body); + codegen_func_epilogue(g); + + fprintf(g->out, ")\n"); + g->current_func = NULL; +} + +void codegen_wasm(Program* prog, FILE* out) { + CodeGen* g = codegen_new(out); + + fprintf(g->out, "(module\n"); + + for (int i = 0; i < prog->funcs->as.list->len; ++i) { + AstNode* func = prog->funcs->as.list->items + i; + codegen_func(g, func); + } + + fprintf(g->out, ")\n"); +} diff --git a/src/cc1/codegen_wasm.h b/src/cc1/codegen_wasm.h new file mode 100644 index 0000000..1089154 --- /dev/null +++ b/src/cc1/codegen_wasm.h @@ -0,0 +1,8 @@ +#ifndef DUCC_CODEGEN_WASM_H +#define DUCC_CODEGEN_WASM_H + +#include "ast.h" + +void codegen_wasm(Program* prog, FILE* out); + +#endif diff --git a/src/cc1/fs.c b/src/cc1/fs.c new file mode 100644 index 0000000..c702fa4 --- /dev/null +++ b/src/cc1/fs.c @@ -0,0 +1,27 @@ +#include "fs.h" +#include +#include + +// 'ext' must include '.'. +char* replace_extension(const char* file_name, const char* ext) { + size_t len = strlen(file_name); + const char* last_slash = strrchr(file_name, '/'); + const char* last_dot = strrchr(file_name, '.'); + + size_t ext_len = strlen(ext); + size_t base_len; + // !last_slash: foo.c + // last_slash < last_dot: ./bar/foo.c + if (last_dot && (!last_slash || last_slash < last_dot)) { + base_len = last_dot - file_name; + } else { + base_len = len; + } + + char* result = calloc(base_len + ext_len + 1, sizeof(char)); + memcpy(result, file_name, base_len); + memcpy(result + base_len, ext, ext_len); + result[base_len + ext_len] = '\0'; + + return result; +} diff --git a/src/cc1/fs.h b/src/cc1/fs.h new file mode 100644 index 0000000..b71889f --- /dev/null +++ b/src/cc1/fs.h @@ -0,0 +1,7 @@ +#ifndef DUCC_FS_H +#define DUCC_FS_H + +// 'ext' must include '.'. +char* replace_extension(const char* file_name, const char* ext); + +#endif diff --git a/src/cc1/io.c b/src/cc1/io.c new file mode 100644 index 0000000..65cd448 --- /dev/null +++ b/src/cc1/io.c @@ -0,0 +1,127 @@ +#include "io.h" +#include +#include +#include +#include "../lib/common.h" +#include "../lib/json.h" + +void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "filename"); + jsonbuilder_string(builder, loc->filename); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "line"); + jsonbuilder_integer(builder, loc->line); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +InFile* infile_open(const char* filename) { + FILE* in = fopen(filename, "rb"); + if (!in) { + return NULL; + } + + size_t buf_size = 1024 * 10; + char* buf = calloc(buf_size, sizeof(char)); + char* cur = buf; + char* tmp = calloc(1024, sizeof(char)); + + while (fgets(tmp, 1024, in)) { + size_t len = strlen(tmp); + size_t used_size = cur - buf; + + if (buf_size <= used_size + len) { + size_t old_size = buf_size; + buf_size *= 2; + buf = realloc(buf, buf_size); + memset(buf + old_size, 0, buf_size - old_size); + cur = buf + used_size; + } + + memcpy(cur, tmp, len); + cur += len; + } + fclose(in); + + InFile* in_file = calloc(1, sizeof(InFile)); + in_file->buf = buf; + in_file->loc.filename = filename; + in_file->loc.line = 1; + return in_file; +} + +bool infile_eof(InFile* f) { + return f->buf[f->pos] == '\0'; +} + +char infile_peek_char(InFile* f) { + char c = f->buf[f->pos]; + + // Skip a pair of backslash and new-line. + if (c == '\\') { + char c2 = f->buf[f->pos + 1]; + // C23: 5.1.1.2 + // A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by + // a backslash character before any such splicing takes place. + if (c2 == '\0') { + fatal_error("%s:%d: expected, but got ", f->loc.filename, f->loc.line); + } + // Handle line continuation. + if (c2 == '\r') { + if (f->buf[f->pos + 2] == '\n') { + f->pos += 3; // Backslash + CRLF + } else { + f->pos += 2; // Backslash + CR + } + ++f->loc.line; + return infile_peek_char(f); + } else if (c2 == '\n') { + f->pos += 2; // Backslash + LF + ++f->loc.line; + return infile_peek_char(f); + } + } + + // Normalize new-line. + if (c == '\r') + c = '\n'; + return c; +} + +char infile_next_char(InFile* f) { + char c = infile_peek_char(f); + + if (f->buf[f->pos] == '\r') { + ++f->pos; + if (f->buf[f->pos] == '\n') { + ++f->pos; // CRLF + } + c = '\n'; + } else { + ++f->pos; + } + + if (c == '\n') + ++f->loc.line; + return c; +} + +char infile_peek_char2(InFile* f) { + int saved_pos = f->pos; + int saved_line = f->loc.line; + infile_next_char(f); + char c = infile_peek_char(f); + f->pos = saved_pos; + f->loc.line = saved_line; + return c; +} + +bool infile_consume_if(InFile* f, char expected) { + if (infile_peek_char(f) == expected) { + infile_next_char(f); + return true; + } else { + return false; + } +} diff --git a/src/cc1/io.h b/src/cc1/io.h new file mode 100644 index 0000000..be48b51 --- /dev/null +++ b/src/cc1/io.h @@ -0,0 +1,26 @@ +#ifndef DUCC_IO_H +#define DUCC_IO_H + +#include "../lib/json.h" + +typedef struct { + const char* filename; + int line; +} SourceLocation; + +void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc); + +typedef struct { + const char* buf; + int pos; + SourceLocation loc; +} InFile; + +InFile* infile_open(const char* filename); +bool infile_eof(InFile* f); +char infile_peek_char(InFile* f); +char infile_peek_char2(InFile* f); +char infile_next_char(InFile* f); +bool infile_consume_if(InFile* f, char expected); + +#endif diff --git a/src/cc1/parse.c b/src/cc1/parse.c new file mode 100644 index 0000000..52167c1 --- /dev/null +++ b/src/cc1/parse.c @@ -0,0 +1,3018 @@ +#include "parse.h" +#include "../lib/common.h" +#include "tokenize.h" + +typedef struct { + const char* name; + Type* ty; + int stack_offset; +} LocalVar; + +typedef struct { + size_t len; + size_t capacity; + LocalVar* data; +} LocalVarArray; + +static void lvars_init(LocalVarArray* lvars) { + lvars->len = 0; + lvars->capacity = 4; + lvars->data = calloc(lvars->capacity, sizeof(LocalVar)); +} + +static void lvars_reserve(LocalVarArray* lvars, size_t size) { + if (size <= lvars->capacity) + return; + while (lvars->capacity < size) { + lvars->capacity *= 2; + } + lvars->data = realloc(lvars->data, lvars->capacity * sizeof(LocalVar)); + memset(lvars->data + lvars->len, 0, (lvars->capacity - lvars->len) * sizeof(LocalVar)); +} + +static LocalVar* lvars_push_new(LocalVarArray* lvars) { + lvars_reserve(lvars, lvars->len + 1); + return &lvars->data[lvars->len++]; +} + +typedef struct { + const char* name; + int index; +} ScopedSymbol; + +typedef struct { + size_t len; + size_t capacity; + ScopedSymbol* data; +} ScopedSymbolArray; + +static void scopedsymbols_init(ScopedSymbolArray* syms) { + syms->len = 0; + syms->capacity = 4; + syms->data = calloc(syms->capacity, sizeof(ScopedSymbol)); +} + +static void scopedsymbols_reserve(ScopedSymbolArray* syms, size_t size) { + if (size <= syms->capacity) + return; + while (syms->capacity < size) { + syms->capacity *= 2; + } + syms->data = realloc(syms->data, syms->capacity * sizeof(ScopedSymbol)); + memset(syms->data + syms->len, 0, (syms->capacity - syms->len) * sizeof(ScopedSymbol)); +} + +static ScopedSymbol* scopedsymbols_push_new(ScopedSymbolArray* syms) { + scopedsymbols_reserve(syms, syms->len + 1); + return &syms->data[syms->len++]; +} + +typedef struct Scope { + struct Scope* outer; + ScopedSymbolArray syms; +} Scope; + +typedef struct { + const char* name; + Type* ty; +} GlobalVar; + +typedef struct { + size_t len; + size_t capacity; + GlobalVar* data; +} GlobalVarArray; + +static void gvars_init(GlobalVarArray* gvars) { + gvars->len = 0; + gvars->capacity = 4; + gvars->data = calloc(gvars->capacity, sizeof(GlobalVar)); +} + +static void gvars_reserve(GlobalVarArray* gvars, size_t size) { + if (size <= gvars->capacity) + return; + while (gvars->capacity < size) { + gvars->capacity *= 2; + } + gvars->data = realloc(gvars->data, gvars->capacity * sizeof(GlobalVar)); + memset(gvars->data + gvars->len, 0, (gvars->capacity - gvars->len) * sizeof(GlobalVar)); +} + +static GlobalVar* gvars_push_new(GlobalVarArray* gvars) { + gvars_reserve(gvars, gvars->len + 1); + return &gvars->data[gvars->len++]; +} + +typedef struct { + const char* name; + Type* ty; +} Func; + +typedef struct { + size_t len; + size_t capacity; + Func* data; +} FuncArray; + +static void funcs_init(FuncArray* funcs) { + funcs->len = 0; + funcs->capacity = 32; + funcs->data = calloc(funcs->capacity, sizeof(Func)); +} + +static void funcs_reserve(FuncArray* funcs, size_t size) { + if (size <= funcs->capacity) + return; + while (funcs->capacity < size) { + funcs->capacity *= 2; + } + funcs->data = realloc(funcs->data, funcs->capacity * sizeof(Func)); + memset(funcs->data + funcs->len, 0, (funcs->capacity - funcs->len) * sizeof(Func)); +} + +static Func* funcs_push_new(FuncArray* funcs) { + funcs_reserve(funcs, funcs->len + 1); + return &funcs->data[funcs->len++]; +} + +typedef struct { + TokenArray* tokens; + int pos; + LocalVarArray lvars; + Scope* scope; + GlobalVarArray gvars; + FuncArray funcs; + AstNode* structs; + AstNode* unions; + AstNode* enums; + AstNode* typedefs; + StrArray str_literals; + int anonymous_user_type_counter; + AstNode* current_switch; +} Parser; + +static Parser* parser_new(TokenArray* tokens) { + Parser* p = calloc(1, sizeof(Parser)); + p->tokens = tokens; + gvars_init(&p->gvars); + funcs_init(&p->funcs); + p->structs = ast_new_list(4); + p->unions = ast_new_list(4); + p->enums = ast_new_list(4); + p->typedefs = ast_new_list(16); + strings_init(&p->str_literals); + + Func* va_start_func = funcs_push_new(&p->funcs); + va_start_func->name = "__ducc_va_start"; + va_start_func->ty = type_new_func(type_new(TypeKind_void), NULL); + + Func* va_arg_func = funcs_push_new(&p->funcs); + va_arg_func->name = "__ducc_va_arg"; + va_arg_func->ty = type_new_func(type_new_ptr(type_new(TypeKind_void)), NULL); + + return p; +} + +static Token* peek_token(Parser* p) { + return &p->tokens->data[p->pos]; +} + +static Token* peek_token2(Parser* p) { + return &p->tokens->data[p->pos + 1]; +} + +static SourceLocation current_location(Parser* p) { + return peek_token(p)->loc; +} + +static Token* next_token(Parser* p) { + return &p->tokens->data[p->pos++]; +} + +static Token* consume_token_if(Parser* p, TokenKind expected) { + if (peek_token(p)->kind == expected) { + return next_token(p); + } else { + return NULL; + } +} + +static bool eof(Parser* p) { + return peek_token(p)->kind != TokenKind_eof; +} + +static Token* expect(Parser* p, TokenKind expected) { + Token* t = next_token(p); + if (t->kind == expected) { + return t; + } + fatal_error("%s:%d: expected '%s', but got '%s'", t->loc.filename, t->loc.line, token_kind_stringify(expected), + token_stringify(t)); +} + +static int find_lvar_in_scope(Parser*, Scope* scope, const char* name) { + for (size_t i = 0; i < scope->syms.len; ++i) { + ScopedSymbol* sym = &scope->syms.data[i]; + if (sym->name && strcmp(sym->name, name) == 0) { + return sym->index; + } + } + return -1; +} + +static int find_lvar_in_current_scope(Parser* p, const char* name) { + return find_lvar_in_scope(p, p->scope, name); +} + +static int find_lvar(Parser* p, const char* name) { + Scope* scope = p->scope; + while (scope) { + int idx = find_lvar_in_scope(p, scope, name); + if (idx != -1) + return idx; + scope = scope->outer; + } + return -1; +} + +static int calc_lvar_stack_offset(Parser* p, Type* ty) { + int last_offset = 0; + for (int i = p->lvars.len - 1; i >= 0; i--) { + int offset = p->lvars.data[i].stack_offset; + // Skip a passed-by-stack parameter. + if (offset <= 0) + continue; + last_offset = offset; + break; + } + return to_aligned(last_offset + type_sizeof(ty), type_alignof(ty)); +} + +static int add_lvar(Parser* p, const char* name, Type* ty, int stack_offset) { + LocalVar* lvar = lvars_push_new(&p->lvars); + lvar->name = name; + lvar->ty = ty; + lvar->stack_offset = stack_offset; + ScopedSymbol* sym = scopedsymbols_push_new(&p->scope->syms); + sym->name = name; + sym->index = p->lvars.len - 1; + return stack_offset; +} + +static AstNode* generate_temporary_lvar(Parser* p, Type* ty) { + int stack_offset = add_lvar(p, NULL, ty, calc_lvar_stack_offset(p, ty)); + return ast_new_lvar(NULL, stack_offset, ty); +} + +static int find_gvar(Parser* p, const char* name) { + for (size_t i = 0; i < p->gvars.len; ++i) { + if (strcmp(p->gvars.data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_func(Parser* p, const char* name) { + for (size_t i = 0; i < p->funcs.len; ++i) { + if (strcmp(p->funcs.data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_struct(Parser* p, const char* name) { + for (int i = 0; i < p->structs->as.list->len; ++i) { + if (strcmp(p->structs->as.list->items[i].as.struct_def->name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_union(Parser* p, const char* name) { + for (int i = 0; i < p->unions->as.list->len; ++i) { + if (strcmp(p->unions->as.list->items[i].as.union_def->name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_enum(Parser* p, const char* name) { + for (int i = 0; i < p->enums->as.list->len; ++i) { + if (strcmp(p->enums->as.list->items[i].as.enum_def->name, name) == 0) { + return i; + } + } + return -1; +} + +static int find_enum_member(Parser* p, const char* name) { + for (int i = 0; i < p->enums->as.list->len; ++i) { + AstNode* members = p->enums->as.list->items[i].as.enum_def->members; + if (!members) + continue; + for (int j = 0; j < members->as.list->len; ++j) { + if (strcmp(members->as.list->items[j].as.enum_member->name, name) == 0) { + return i * 1000 + j; + } + } + } + return -1; +} + +static int find_typedef(Parser* p, const char* name) { + for (int i = 0; i < p->typedefs->as.list->len; ++i) { + if (strcmp(p->typedefs->as.list->items[i].as.typedef_decl->name, name) == 0) { + return i; + } + } + return -1; +} + +static void enter_scope(Parser* p) { + Scope* outer_scope = p->scope; + p->scope = calloc(1, sizeof(Scope)); + p->scope->outer = outer_scope; + scopedsymbols_init(&p->scope->syms); +} + +static void leave_scope(Parser* p) { + p->scope = p->scope->outer; +} + +static void enter_func(Parser* p) { + lvars_init(&p->lvars); + enter_scope(p); +} + +static void leave_func(Parser* p) { + leave_scope(p); +} + +static Token* parse_ident(Parser*); +static AstNode* parse_primary_expr(Parser*); +static AstNode* parse_postfix_expr(Parser*); +static AstNode* parse_argument_expr_list(Parser*); +static AstNode* parse_unary_expr(Parser*); +static AstNode* parse_cast_expr(Parser*); +static AstNode* parse_multiplicative_expr(Parser*); +static AstNode* parse_additive_expr(Parser*); +static AstNode* parse_shift_expr(Parser*); +static AstNode* parse_relational_expr(Parser*); +static AstNode* parse_equality_expr(Parser*); +static AstNode* parse_bitwise_and_expr(Parser*); +static AstNode* parse_bitwise_xor_expr(Parser*); +static AstNode* parse_bitwise_or_expr(Parser*); +static AstNode* parse_logical_and_expr(Parser*); +static AstNode* parse_logical_or_expr(Parser*); +static AstNode* parse_conditional_expr(Parser*); +static AstNode* parse_assignment_expr(Parser*); +static AstNode* parse_expr(Parser*); +static AstNode* parse_constant_expr(Parser*); +static Type* parse_pointer_opt(Parser*, Type*); +static void parse_type_qualifier_list_opt(Parser*); +static AstNode* parse_declarator_or_abstract_declarator_opt(Parser*, Type*); +static AstNode* parse_parameter_type_list(Parser*); +static AstNode* parse_parameter_list(Parser*); +static AstNode* parse_parameter_declaration(Parser*); +static Type* parse_array_declarator_suffix(Parser*, Type*); +static Type* parse_function_declarator_suffix(Parser*, Type*); +static AstNode* parse_direct_declarator(Parser*, Type*); +static AstNode* parse_declarator(Parser*, Type*); +static AstNode* parse_init_declarator_list(Parser*, Type*); +static AstNode* parse_declaration(Parser*); +static AstNode* parse_function_definition(Parser*, AstNode*); +static Type* parse_declaration_specifiers(Parser*); +static AstNode* parse_init_declarator(Parser*, Type*); +static Type* parse_struct_specifier(Parser*); +static Type* parse_union_specifier(Parser*); +static AstNode* parse_member_declaration_list(Parser*); +static AstNode* parse_member_declaration(Parser*); +static Type* parse_specifier_qualifier_list(Parser*); +static AstNode* parse_member_declarator_list(Parser*, Type*); +static AstNode* parse_member_declarator(Parser*, Type*); +static Type* parse_enum_specifier(Parser*); +static void parse_enum_members(Parser*, int); +static AstNode* parse_enum_member(Parser*, int); +static Type* parse_type_name(Parser*); +static Type* parse_abstract_declarator_opt(Parser*, Type*); +static AstNode* parse_braced_initializer(Parser*); +static AstNode* parse_initializer(Parser*); +static AstNode* parse_attribute_specifier_sequence_opt(Parser*); +static AstNode* parse_attribute_specifier_opt(Parser*); +static AstNode* parse_attribute_list(Parser*); +static AstNode* parse_attribute(Parser*); +static AstNode* parse_attribute_argument_clause(Parser*); +static AstNode* parse_balanced_token_sequence(Parser*); +static AstNode* parse_stmt(Parser*); +static AstNode* parse_unlabaled_stmt(Parser*); +static AstNode* parse_compound_stmt(Parser*); +static AstNode* parse_block_item(Parser*); +static AstNode* parse_expr_stmt(Parser*); +static AstNode* parse_if_stmt(Parser*); +static AstNode* parse_switch_stmt(Parser*); +static AstNode* parse_while_stmt(Parser*); +static AstNode* parse_do_while_stmt(Parser*); +static AstNode* parse_for_stmt(Parser*); +static AstNode* parse_goto_stmt(Parser*); +static AstNode* parse_continue_stmt(Parser*); +static AstNode* parse_break_stmt(Parser*); +static AstNode* parse_return_stmt(Parser*); +static AstNode* parse_static_assert_declaration(Parser*); +static AstNode* parse_external_declaration(Parser*); +static int eval(AstNode*); +static void process_declarations(Parser*, AstNode*); + +static Token* parse_ident(Parser* p) { + return expect(p, TokenKind_ident); +} + +static int register_str_literal(Parser* p, const char* s) { + return strings_push(&p->str_literals, s); +} + +static void register_params(Parser* p, AstNode* params) { + int gp_regs = 6; + int pass_by_reg_offset = 8; + int pass_by_stack_offset = -16; + for (int i = 0; i < params->as.list->len; ++i) { + AstNode* param = ¶ms->as.list->items[i]; + int ty_size = type_sizeof(param->ty); + int required_gp_regs; + if (ty_size <= 8) { + required_gp_regs = 1; + } else if (ty_size <= 16) { + required_gp_regs = 2; + } else { + required_gp_regs = 0; + } + if (required_gp_regs <= gp_regs) { + gp_regs -= required_gp_regs; + } else { + required_gp_regs = 0; + } + int stack_offset; + if (required_gp_regs == 0) { + stack_offset = pass_by_stack_offset; + pass_by_stack_offset -= to_aligned(ty_size, 8); + } else { + stack_offset = pass_by_reg_offset; + pass_by_reg_offset += to_aligned(ty_size, 8); + } + const char* name = param->as.declarator->name; + param->kind = AstNodeKind_param; + param->as.param = calloc(1, sizeof(ParamNode)); + param->as.param->name = name; + param->as.param->stack_offset = stack_offset; + add_lvar(p, name, param->ty, stack_offset); + } +} + +static void register_func(Parser* p, const char* name, Type* ty) { + Func* func = funcs_push_new(&p->funcs); + func->name = name; + func->ty = ty; +} + +typedef enum { + TypeSpecifierMask_void = 1 << 0, + TypeSpecifierMask_char = 1 << 1, + TypeSpecifierMask_short = 1 << 2, + TypeSpecifierMask_int = 1 << 3, + TypeSpecifierMask_long = 1 << 4, + // 1 << 5 is used for second 'long'. + TypeSpecifierMask_float = 1 << 6, + TypeSpecifierMask_double = 1 << 7, + TypeSpecifierMask_signed = 1 << 8, + TypeSpecifierMask_unsigned = 1 << 9, + TypeSpecifierMask__BitInt = 1 << 10, + TypeSpecifierMask_bool = 1 << 11, + TypeSpecifierMask__Complex = 1 << 12, + TypeSpecifierMask__Decimal32 = 1 << 13, + TypeSpecifierMask__Decimal64 = 1 << 14, + TypeSpecifierMask__Decimal128 = 1 << 15, + TypeSpecifierMask__Atomic = 1 << 16, + TypeSpecifierMask_struct = 1 << 17, + TypeSpecifierMask_union = 1 << 18, + TypeSpecifierMask_enum = 1 << 19, + TypeSpecifierMask_typeof = 1 << 20, + TypeSpecifierMask_typeof_unqual = 1 << 21, + TypeSpecifierMask_typedef_name = 1 << 22, +} TypeSpecifierMask; + +static Type* distinguish_type_from_type_specifiers(int type_specifiers) { + if (type_specifiers == TypeSpecifierMask_void) + return type_new(TypeKind_void); + else if (type_specifiers == TypeSpecifierMask_char) + return type_new(TypeKind_char); + else if (type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_char)) + return type_new(TypeKind_schar); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_char)) + return type_new(TypeKind_uchar); + else if (type_specifiers == TypeSpecifierMask_short || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short) || + type_specifiers == (TypeSpecifierMask_short + TypeSpecifierMask_int) || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short + TypeSpecifierMask_int)) + return type_new(TypeKind_short); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short) || + type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short + TypeSpecifierMask_int)) + return type_new(TypeKind_ushort); + else if (type_specifiers == TypeSpecifierMask_int || type_specifiers == TypeSpecifierMask_signed || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_int)) + return type_new(TypeKind_int); + else if (type_specifiers == TypeSpecifierMask_unsigned || + type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_int)) + return type_new(TypeKind_uint); + else if (type_specifiers == TypeSpecifierMask_long || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_int) || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_long); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_ulong); + else if (type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long) || + type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int) || + type_specifiers == + (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_llong); + else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long) || + type_specifiers == + (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int)) + return type_new(TypeKind_ullong); + else if (type_specifiers == TypeSpecifierMask_float) + return type_new(TypeKind_float); + else if (type_specifiers == TypeSpecifierMask_double) + return type_new(TypeKind_double); + else if (type_specifiers == TypeSpecifierMask_long + TypeSpecifierMask_double) + return type_new(TypeKind_ldouble); + else if (type_specifiers == TypeSpecifierMask_bool) + return type_new(TypeKind_bool); + else if (type_specifiers == TypeSpecifierMask_struct) + return NULL; + else if (type_specifiers == TypeSpecifierMask_union) + return NULL; + else if (type_specifiers == TypeSpecifierMask_enum) + return NULL; + else if (type_specifiers == TypeSpecifierMask_typedef_name) + return NULL; + else if (type_specifiers == 0) + return NULL; + else + unimplemented(); +} + +// e++ +// tmp1 = &e; tmp2 = *tmp1; *tmp1 += 1; tmp2 +// e-- +// tmp1 = &e; tmp2 = *tmp1; *tmp1 -= 1; tmp2 +static AstNode* create_new_postfix_inc_or_dec(Parser* p, AstNode* e, TokenKind op) { + AstNode* tmp1_lvar = generate_temporary_lvar(p, type_new_ptr(e->ty)); + AstNode* tmp2_lvar = generate_temporary_lvar(p, e->ty); + + AstNode* expr1 = ast_new_assign_expr(TokenKind_assign, tmp1_lvar, ast_new_ref_expr(e)); + AstNode* expr2 = ast_new_assign_expr(TokenKind_assign, tmp2_lvar, ast_new_deref_expr(tmp1_lvar)); + AstNode* expr3; + if (op == TokenKind_plusplus) { + expr3 = ast_new_assign_add_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); + } else { + expr3 = ast_new_assign_sub_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); + } + AstNode* expr4 = tmp2_lvar; + + AstNode* ret = ast_new_list(4); + ast_append(ret, expr1); + ast_append(ret, expr2); + ast_append(ret, expr3); + ast_append(ret, expr4); + ret->ty = expr4->ty; + return ret; +} + +static bool is_typedef_name(Parser* p, Token* tok) { + return tok->kind == TokenKind_ident && find_typedef(p, tok->value.string) != -1; +} + +static bool is_type_token(Parser* p, Token* tok) { + if (tok->kind == TokenKind_keyword_void || tok->kind == TokenKind_keyword_char || + tok->kind == TokenKind_keyword_short || tok->kind == TokenKind_keyword_int || + tok->kind == TokenKind_keyword_long || tok->kind == TokenKind_keyword_float || + tok->kind == TokenKind_keyword_double || tok->kind == TokenKind_keyword_signed || + tok->kind == TokenKind_keyword_unsigned || tok->kind == TokenKind_keyword__BitInt || + tok->kind == TokenKind_keyword_bool || tok->kind == TokenKind_keyword__Complex || + tok->kind == TokenKind_keyword__Decimal32 || tok->kind == TokenKind_keyword__Decimal64 || + tok->kind == TokenKind_keyword__Decimal128 || tok->kind == TokenKind_keyword__Atomic || + tok->kind == TokenKind_keyword_struct || tok->kind == TokenKind_keyword_union || + tok->kind == TokenKind_keyword_enum || tok->kind == TokenKind_keyword_typeof || + tok->kind == TokenKind_keyword_typeof_unqual || tok->kind == TokenKind_keyword_const || + tok->kind == TokenKind_keyword_restrict || tok->kind == TokenKind_keyword_volatile || + tok->kind == TokenKind_keyword__Atomic || tok->kind == TokenKind_keyword_alignas || + tok->kind == TokenKind_keyword_inline || tok->kind == TokenKind_keyword__Noreturn || + tok->kind == TokenKind_keyword_static) { + return true; + } + if (tok->kind != TokenKind_ident) { + return false; + } + return find_typedef(p, tok->value.string) != -1; +} + +// primary-expr: +// identifier +// constant +// string-literal +// '(' expr ')' +// TODO generic-selection +static AstNode* parse_primary_expr(Parser* p) { + Token* t = next_token(p); + if (t->kind == TokenKind_literal_int) { + return ast_new_int(t->value.integer); + } else if (t->kind == TokenKind_literal_double) { + return ast_new_double(t->value.floating); + } else if (t->kind == TokenKind_keyword_true) { + return ast_new_int(1); + } else if (t->kind == TokenKind_keyword_false) { + return ast_new_int(0); + } else if (t->kind == TokenKind_literal_str) { + return ast_new_str_expr(register_str_literal(p, t->value.string), + type_new_static_string(strlen(t->value.string))); + } else if (t->kind == TokenKind_paren_l) { + AstNode* e = parse_expr(p); + expect(p, TokenKind_paren_r); + return e; + } else if (t->kind == TokenKind_ident) { + const char* name = t->value.string; + + int lvar_idx = find_lvar(p, name); + if (lvar_idx == -1) { + int gvar_idx = find_gvar(p, name); + if (gvar_idx == -1) { + int enum_member_idx = find_enum_member(p, name); + if (enum_member_idx == -1) { + int func_idx = find_func(p, name); + if (func_idx == -1) { + fatal_error("%s:%d: undefined variable: %s", t->loc.filename, t->loc.line, name); + } + return ast_new_func(name, p->funcs.data[func_idx].ty); + } + int enum_idx = enum_member_idx / 1000; + int n = enum_member_idx % 1000; + AstNode* e = ast_new_int( + p->enums->as.list->items[enum_idx].as.enum_def->members->as.list->items[n].as.enum_member->value); + e->ty = type_new(TypeKind_enum); + e->ty->ref.defs = p->enums; + e->ty->ref.index = enum_idx; + return e; + } + return ast_new_gvar(name, p->gvars.data[gvar_idx].ty); + } + + return ast_new_lvar(name, p->lvars.data[lvar_idx].stack_offset, p->lvars.data[lvar_idx].ty); + } else { + fatal_error("%s:%d: expected an expression, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t)); + } +} + +// postfix-expr: +// postfix-expr-stem { postfix-expr-postfix }* +// +// postfix-expr-stem: +// primary-expr +// TODO compound-literal +// +// postfix-expr-postfix: +// '[' expr ']' +// '(' argument-expr-list? ')' +// '.' identifier +// '->' identifier +// '++' +// '--' +static AstNode* parse_postfix_expr(Parser* p) { + AstNode* ret = parse_primary_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_paren_l)) { + AstNode* args = parse_argument_expr_list(p); + expect(p, TokenKind_paren_r); + ret = ast_new_func_call(ret, args); + Type* func_type = ret->as.func_call->func->ty; + if (func_type->kind == TypeKind_ptr) { + func_type = func_type->base; + } + ret->ty = func_type->result; + } else if (consume_token_if(p, TokenKind_bracket_l)) { + AstNode* idx = parse_expr(p); + expect(p, TokenKind_bracket_r); + idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->base))); + ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx)); + } else if (consume_token_if(p, TokenKind_dot)) { + const Token* name = parse_ident(p); + ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name->value.string); + } else if (consume_token_if(p, TokenKind_arrow)) { + const Token* name = parse_ident(p); + ret = ast_new_member_access_expr(ret, name->value.string); + } else if (consume_token_if(p, TokenKind_plusplus)) { + ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_plusplus); + } else if (consume_token_if(p, TokenKind_minusminus)) { + ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_minusminus); + } else { + break; + } + } + return ret; +} + +// argument-expr-list: +// { assignment-expr | ',' }+ +static AstNode* parse_argument_expr_list(Parser* p) { + AstNode* list = ast_new_list(6); + while (peek_token(p)->kind != TokenKind_paren_r) { + AstNode* arg = parse_assignment_expr(p); + ast_append(list, arg); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + return list; +} + +// unary-expr: +// postfix-expr +// '++' unary-expr +// '--' unary-expr +// unary-operator cast-expr +// 'sizeof' unary-expr +// 'sizeof' '(' type-name ')' +// TODO 'alignof' '(' type-name ')' +static AstNode* parse_unary_expr(Parser* p) { + TokenKind op = peek_token(p)->kind; + if (consume_token_if(p, TokenKind_plus)) { + return parse_cast_expr(p); + } else if (consume_token_if(p, TokenKind_minus)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_binary_expr(op, ast_new_int(0), operand); + } else if (consume_token_if(p, TokenKind_not)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_unary_expr(op, operand); + } else if (consume_token_if(p, TokenKind_tilde)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_unary_expr(op, operand); + } else if (consume_token_if(p, TokenKind_and)) { + AstNode* operand = parse_cast_expr(p); + return ast_new_ref_expr(operand); + } else if (consume_token_if(p, TokenKind_star)) { + AstNode* operand = parse_cast_expr(p); + if (operand->ty->kind == TypeKind_func) { + // dereference operator against function pointers are no-op. + return operand; + } else { + return ast_new_deref_expr(operand); + } + } else if (consume_token_if(p, TokenKind_plusplus)) { + AstNode* operand = parse_unary_expr(p); + return ast_new_assign_add_expr(operand, ast_new_int(1)); + } else if (consume_token_if(p, TokenKind_minusminus)) { + AstNode* operand = parse_unary_expr(p); + return ast_new_assign_sub_expr(operand, ast_new_int(1)); + } else if (consume_token_if(p, TokenKind_keyword_sizeof)) { + if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) { + next_token(p); + Type* ty = parse_type_name(p); + expect(p, TokenKind_paren_r); + return ast_new_int(type_sizeof(ty)); + } else { + AstNode* expr = parse_unary_expr(p); + return ast_new_int(type_sizeof(expr->ty)); + } + } + return parse_postfix_expr(p); +} + +// cast-expr: +// unary-expr +// '(' type-name ')' cast-expr +static AstNode* parse_cast_expr(Parser* p) { + if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) { + next_token(p); + Type* ty = parse_type_name(p); + expect(p, TokenKind_paren_r); + + // TODO: check whether the original type can be casted to the result type. + AstNode* e = parse_cast_expr(p); + return ast_new_cast_expr(e, ty); + } + return parse_unary_expr(p); +} + +// multiplicative-expr: +// cast-expr { ( '*' / '/' / '%' ) cast-expr }* +static AstNode* parse_multiplicative_expr(Parser* p) { + AstNode* lhs = parse_cast_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) { + next_token(p); + AstNode* rhs = parse_cast_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// additive-expr: +// multiplicative-expr { ( '+' / '-' ) multiplicative-expr }* +static AstNode* parse_additive_expr(Parser* p) { + AstNode* lhs = parse_multiplicative_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_plus)) { + AstNode* rhs = parse_multiplicative_expr(p); + if (lhs->ty->base) { + lhs = ast_new_binary_expr( + TokenKind_plus, lhs, + ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); + } else if (rhs->ty->base) { + lhs = ast_new_binary_expr( + TokenKind_plus, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))), + rhs); + } else { + lhs = ast_new_binary_expr(TokenKind_plus, lhs, rhs); + } + } else if (consume_token_if(p, TokenKind_minus)) { + AstNode* rhs = parse_multiplicative_expr(p); + if (lhs->ty->base) { + if (rhs->ty->base) { + // (a - b) / sizeof(a) + lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(TokenKind_minus, lhs, rhs), + ast_new_int(type_sizeof(lhs->ty->base))); + } else { + // a - b*sizeof(a) + lhs = ast_new_binary_expr( + TokenKind_minus, lhs, + ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); + } + } else { + lhs = ast_new_binary_expr(TokenKind_minus, lhs, rhs); + } + } else { + break; + } + } + return lhs; +} + +// shift-expr: +// additive-expr { ( '<<' / '>>' ) additive-expr }* +static AstNode* parse_shift_expr(Parser* p) { + AstNode* lhs = parse_additive_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_lshift || op == TokenKind_rshift) { + next_token(p); + AstNode* rhs = parse_additive_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// relational-expr: +// shift-expr { ( '<' / '>' / '<=' / '>=' ) shift-expr }* +static AstNode* parse_relational_expr(Parser* p) { + AstNode* lhs = parse_shift_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_lt || op == TokenKind_le) { + next_token(p); + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else if (consume_token_if(p, TokenKind_gt)) { + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs); + } else if (consume_token_if(p, TokenKind_ge)) { + AstNode* rhs = parse_shift_expr(p); + lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs); + } else { + break; + } + } + return lhs; +} + +// equality-expr: +// relational-expr { ( '==' / '!=' ) relational-expr }* +static AstNode* parse_equality_expr(Parser* p) { + AstNode* lhs = parse_relational_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + if (op == TokenKind_eq || op == TokenKind_ne) { + next_token(p); + AstNode* rhs = parse_relational_expr(p); + lhs = ast_new_binary_expr(op, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// bitwise-and-expr: +// equality-expr { '&' equality-expr }* +static AstNode* parse_bitwise_and_expr(Parser* p) { + AstNode* lhs = parse_equality_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_and)) { + AstNode* rhs = parse_equality_expr(p); + lhs = ast_new_binary_expr(TokenKind_and, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +// bitwise-xor-expr: +// bitwise-and-expr { '^' bitwise-and-expr }* +static AstNode* parse_bitwise_xor_expr(Parser* p) { + AstNode* lhs = parse_bitwise_and_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_xor)) { + AstNode* rhs = parse_bitwise_and_expr(p); + lhs = ast_new_binary_expr(TokenKind_xor, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +// bitwise-or-expr: +// bitwise-xor-expr { '|' bitwise-xor-expr }* +static AstNode* parse_bitwise_or_expr(Parser* p) { + AstNode* lhs = parse_bitwise_xor_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_or)) { + AstNode* rhs = parse_bitwise_xor_expr(p); + lhs = ast_new_binary_expr(TokenKind_or, lhs, rhs); + lhs->ty = type_new(TypeKind_int); + } else { + break; + } + } + return lhs; +} + +// logical-and-expr: +// bitwise-or-expr { '&&' bitwise-or-expr }* +static AstNode* parse_logical_and_expr(Parser* p) { + AstNode* lhs = parse_bitwise_or_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_andand)) { + AstNode* rhs = parse_bitwise_or_expr(p); + lhs = ast_new_logical_expr(TokenKind_andand, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// logical-or-expr: +// logical-and-expr { '||' logical-and-expr }* +static AstNode* parse_logical_or_expr(Parser* p) { + AstNode* lhs = parse_logical_and_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_oror)) { + AstNode* rhs = parse_logical_and_expr(p); + lhs = ast_new_logical_expr(TokenKind_oror, lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// conditional-expr: +// logical-or-expr ( '?' expr ':' conditional-expr )? +static AstNode* parse_conditional_expr(Parser* p) { + AstNode* e = parse_logical_or_expr(p); + if (consume_token_if(p, TokenKind_question)) { + AstNode* then_expr = parse_expr(p); + expect(p, TokenKind_colon); + AstNode* else_expr = parse_assignment_expr(p); + return ast_new_cond_expr(e, then_expr, else_expr); + } else { + return e; + } +} + +// assignment-expr: +// conditional-expr +// unary-expr assignment-operator assignment-expr +static AstNode* parse_assignment_expr(Parser* p) { + AstNode* lhs = parse_conditional_expr(p); + while (1) { + TokenKind op = peek_token(p)->kind; + // TODO: check if the lhs is unary expression. + if (op == TokenKind_assign || op == TokenKind_assign_mul || op == TokenKind_assign_div || + op == TokenKind_assign_mod || op == TokenKind_assign_or || op == TokenKind_assign_and || + op == TokenKind_assign_xor || op == TokenKind_assign_lshift || op == TokenKind_assign_rshift) { + next_token(p); + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_expr(op, lhs, rhs); + } else if (consume_token_if(p, TokenKind_assign_add)) { + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_add_expr(lhs, rhs); + } else if (consume_token_if(p, TokenKind_assign_sub)) { + AstNode* rhs = parse_assignment_expr(p); + lhs = ast_new_assign_sub_expr(lhs, rhs); + } else { + break; + } + } + return lhs; +} + +// expr: +// assignment-expr { ',' assignment-expr }* +static AstNode* parse_expr(Parser* p) { + AstNode* lhs = parse_assignment_expr(p); + while (1) { + if (consume_token_if(p, TokenKind_comma)) { + AstNode* rhs = parse_assignment_expr(p); + AstNode* list = ast_new_list(2); + ast_append(list, lhs); + ast_append(list, rhs); + list->ty = rhs->ty; + lhs = list; + } else { + break; + } + } + return lhs; +} + +// constant-expr: +// conditional-expr +static AstNode* parse_constant_expr(Parser* p) { + return parse_conditional_expr(p); +} + +// pointer: +// { '*' TODO attribute-specifier-sequence? type-qualifier-list? }+ +static Type* parse_pointer_opt(Parser* p, Type* ty) { + while (peek_token(p)->kind == TokenKind_star) { + next_token(p); + ty = type_new_ptr(ty); + parse_type_qualifier_list_opt(p); + } + return ty; +} + +// type-qualifier-list: +// { type-qualifier }+ +static void parse_type_qualifier_list_opt(Parser* p) { + while (1) { + Token* tok = peek_token(p); + if (tok->kind == TokenKind_keyword_const || tok->kind == TokenKind_keyword_restrict || + tok->kind == TokenKind_keyword_volatile || tok->kind == TokenKind_keyword__Atomic) { + // TODO: currently type qualifiers are just ignored. + next_token(p); + } else { + break; + } + } +} + +// declarator | abstract-declarator?: +// pointer? identifier TODO attribute-specifier-sequence? { direct-declarator-suffix }* +// pointer? '(' declarator ')' { direct-declarator-suffix }* +// pointer? '(' abstract-declarator ')' { direct-declarator-suffix }* +// pointer? { direct-declarator-suffix }* +// +// direct-declarator-suffix: +// array-declarator-suffix TODO attribute-specifier-sequence? +// function-declarator-suffix TODO attribute-specifier-sequence? +static AstNode* parse_declarator_or_abstract_declarator_opt(Parser* p, Type* ty) { + ty = parse_pointer_opt(p, ty); + + AstNode* decl; + if (peek_token(p)->kind == TokenKind_ident) { + decl = ast_new_declarator(parse_ident(p)->value.string, ty); + } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) { + next_token(p); + if (peek_token(p)->kind == TokenKind_paren_r) { + Token* tok = peek_token(p); + fatal_error("%s:%d: expected declarator, but got '%s'", tok->loc.filename, tok->loc.line, + token_stringify(tok)); + } + decl = parse_declarator_or_abstract_declarator_opt(p, ty); + expect(p, TokenKind_paren_r); + } else { + decl = ast_new_declarator(NULL, ty); + } + + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + decl->ty = parse_array_declarator_suffix(p, decl->ty); + } else if (peek_token(p)->kind == TokenKind_paren_l) { + decl->ty = parse_function_declarator_suffix(p, decl->ty); + } else { + break; + } + } + + return decl; +} + +// parameter-type-list: +// parameter-list +// parameter-list ',' '...' +// '...' +static AstNode* parse_parameter_type_list(Parser* p) { + AstNode* params = parse_parameter_list(p); + + bool has_void = false; + for (int i = 0; i < params->as.list->len; ++i) { + if (params->as.list->items[i].as.declarator->name && + strcmp(params->as.list->items[i].as.declarator->name, "...") == 0) { + if (i != params->as.list->len - 1) { + fatal_error("..."); + } + --params->as.list->len; + break; + } + has_void |= params->as.list->items[i].ty->kind == TypeKind_void; + } + + if (has_void) { + if (params->as.list->len != 1) { + fatal_error("invalid use of void param"); + } + params->as.list->len = 0; + } + + return params; +} + +// parameter-list: +// { parameter-declaration | ',' } +static AstNode* parse_parameter_list(Parser* p) { + AstNode* params = ast_new_list(4); + + while (1) { + AstNode* param = parse_parameter_declaration(p); + ast_append(params, param); + + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + + return params; +} + +// parameter-declaration: +// TODO attribute-specifier-sequence? declaration-specifiers declarator +// TODO attribute-specifier-sequence? declaration-specifiers abstract-declarator? +static AstNode* parse_parameter_declaration(Parser* p) { + if (consume_token_if(p, TokenKind_ellipsis)) { + return ast_new_declarator("...", NULL); + } + + Type* base_ty = parse_declaration_specifiers(p); + return parse_declarator_or_abstract_declarator_opt(p, base_ty); +} + +// array-declarator: +// direct-declarator '[' TODO type-qualifier-list? TODO assignment-expr? ']' +// TODO direct-declarator '[' 'static' type-qualifier-list? assignment-expr? ']' +// TODO direct-declarator '[' type-qualifier-list 'static' assignment-expr? ']' +// TODO direct-declarator '[' type-qualifier-list? '*' ']' +static Type* parse_array_declarator_suffix(Parser* p, Type* ty) { + next_token(p); // skip '[' + + int size = -1; + if (peek_token(p)->kind != TokenKind_bracket_r) { + AstNode* size_expr = parse_assignment_expr(p); + size = eval(size_expr); + } + expect(p, TokenKind_bracket_r); + + return type_new_array(ty, size); +} + +// function-declarator: +// direct-declarator '(' parameter-type-list? ')' +static Type* parse_function_declarator_suffix(Parser* p, Type* ty) { + next_token(p); // skip '(' + AstNode* params; + + if (consume_token_if(p, TokenKind_paren_r)) { + params = ast_new_list(1); + } else { + params = parse_parameter_type_list(p); + expect(p, TokenKind_paren_r); + } + + return type_new_func(ty, params); +} + +// direct-declarator: +// identifier TODO attribute-specifier-sequence? +// '(' declarator ')' +// array-declarator TODO attribute-specifier-sequence? +// function-declarator TODO attribute-specifier-sequence? +static AstNode* parse_direct_declarator(Parser* p, Type* ty) { + AstNode* decl; + if (peek_token(p)->kind == TokenKind_ident) { + decl = ast_new_declarator(parse_ident(p)->value.string, ty); + } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) { + next_token(p); + // 1st pass: skip content inside parentheses. + int saved_pos = p->pos; + Type* dummy = type_new(TypeKind_void); + parse_declarator(p, dummy); + expect(p, TokenKind_paren_r); + // Parse suffixes. + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + ty = parse_array_declarator_suffix(p, ty); + } else if (peek_token(p)->kind == TokenKind_paren_l) { + ty = parse_function_declarator_suffix(p, ty); + } else { + break; + } + } + // 2nd pass: re-parse inner content. + int end_pos = p->pos; + p->pos = saved_pos; + decl = parse_declarator(p, ty); + expect(p, TokenKind_paren_r); + p->pos = end_pos; + return ast_new_declarator(decl->as.declarator->name, decl->ty); + } else { + decl = ast_new_declarator(NULL, ty); + } + + while (1) { + if (peek_token(p)->kind == TokenKind_bracket_l) { + decl->ty = parse_array_declarator_suffix(p, decl->ty); + } else if (peek_token(p)->kind == TokenKind_paren_l) { + decl->ty = parse_function_declarator_suffix(p, decl->ty); + } else { + break; + } + } + + return ast_new_declarator(decl->as.declarator->name, decl->ty); +} + +// declarator: +// pointer? direct-declarator +static AstNode* parse_declarator(Parser* p, Type* ty) { + ty = parse_pointer_opt(p, ty); + return parse_direct_declarator(p, ty); +} + +// init-declarator-list: +// init-declarator +// init-declarator-list ',' init-declarator +static AstNode* parse_init_declarator_list(Parser* p, Type* ty) { + AstNode* list = ast_new_list(1); + while (1) { + AstNode* d = parse_init_declarator(p, ty); + ast_append(list, d); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + + if (ty->storage_class == StorageClass_typedef) { + continue; + } + // Immediately declare to allow following initializer to access previous variables. For example, + // int a = 1, b = a; + process_declarations(p, &list->as.list->items[list->as.list->len - 1]); + } + return list; +} + +// declaration: +// static_assert-declaration +// TODO attribute-specifier-sequence ';' +// TODO attribute-specifier-sequence declaration-specifiers init-declarator-list ';' +// declaration-specifiers init-declarator-list ';' +static AstNode* parse_declaration(Parser* p) { + if (peek_token(p)->kind == TokenKind_keyword_static_assert) { + return parse_static_assert_declaration(p); + } + + Type* ty = parse_declaration_specifiers(p); + AstNode* decls = parse_init_declarator_list(p, ty); + expect(p, TokenKind_semicolon); + process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]); + return decls; +} + +static void create_local_initializer(AstNode* list, AstNode* lhs, AstNode* init, Type* ty) { + if (init->kind == AstNodeKind_array_initializer) { + AstNode* items = init->as.array_initializer->list; + if (ty->kind == TypeKind_array) { + for (int i = 0; i < items->as.list->len; ++i) { + AstNode* idx = ast_new_binary_expr(TokenKind_star, ast_new_int(i), ast_new_int(type_sizeof(ty->base))); + AstNode* elem = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, lhs, idx)); + create_local_initializer(list, elem, &items->as.list->items[i], ty->base); + } + } else if (ty->kind == TypeKind_struct) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.struct_def->members; + for (int i = 0; i < items->as.list->len; ++i) { + const char* member_name = members->as.list->items[i].as.struct_member->name; + AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name); + create_local_initializer(list, member_lhs, &items->as.list->items[i], members->as.list->items[i].ty); + } + } else if (ty->kind == TypeKind_union) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.union_def->members; + const char* member_name = members->as.list->items[0].as.struct_member->name; + AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name); + create_local_initializer(list, member_lhs, &items->as.list->items[0], members->as.list->items[0].ty); + } + } else { + AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, init); + ast_append(list, ast_new_expr_stmt(assign)); + } +} + +static void process_declarations(Parser* p, AstNode* decl) { + const char* name = decl->as.declarator->name; + if (decl->ty->kind == TypeKind_func) { + // TODO: refactor + decl->ty->storage_class = decl->ty->result->storage_class; + decl->ty->result->storage_class = StorageClass_unspecified; + register_func(p, name, decl->ty); + decl->kind = AstNodeKind_func_decl; + } else { + if (type_is_unsized(decl->ty)) { + fatal_error("process_declarations: invalid type for variable"); + } + + if (p->scope) { + if (find_lvar_in_current_scope(p, name) != -1) { + // TODO: use name's location. + fatal_error("%s:%d: '%s' redeclared", peek_token(p)->loc.filename, peek_token(p)->loc.line, name); + } + int stack_offset = add_lvar(p, name, decl->ty, calc_lvar_stack_offset(p, decl->ty)); + + if (decl->as.declarator->init) { + if (decl->as.declarator->init->kind == AstNodeKind_array_initializer) { + AstNode* lhs_base = ast_new_lvar(name, stack_offset, decl->ty); + AstNode* init = decl->as.declarator->init; + AstNode* stmt_list = ast_new_list(4); + create_local_initializer(stmt_list, lhs_base, init, decl->ty); + *decl = *stmt_list; + } else { + AstNode* lhs = ast_new_lvar(name, stack_offset, decl->ty); + AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, decl->as.declarator->init); + decl->kind = AstNodeKind_expr_stmt; + decl->as.expr_stmt = calloc(1, sizeof(ExprStmtNode)); + decl->as.expr_stmt->expr = assign; + } + } else { + decl->kind = AstNodeKind_nop; + } + } else { + if (find_gvar(p, name) != -1) { + // TODO + // fatal_error("process_declarations: %s redeclared", name); + } + // TODO: refactor + Type* base_ty = decl->ty; + while (base_ty->base) { + base_ty = base_ty->base; + } + if (base_ty != decl->ty) { + decl->ty->storage_class = base_ty->storage_class; + base_ty->storage_class = StorageClass_unspecified; + } + + // TODO: refactor + if (decl->ty->kind == TypeKind_array && decl->ty->array_size == -1 && decl->as.declarator && + decl->as.declarator->init && decl->as.declarator->init->kind == AstNodeKind_array_initializer) { + decl->ty->array_size = decl->as.declarator->init->as.array_initializer->list->as.list->len; + } + + GlobalVar* gvar = gvars_push_new(&p->gvars); + gvar->name = name; + gvar->ty = decl->ty; + + if (decl->ty->storage_class == StorageClass_extern) { + decl->kind = AstNodeKind_nop; + } else { + AstNode* init_expr = decl->as.declarator ? decl->as.declarator->init : NULL; + decl->kind = AstNodeKind_gvar_decl; + decl->as.gvar_decl = calloc(1, sizeof(GvarDeclNode)); + decl->as.gvar_decl->name = name; + decl->as.gvar_decl->expr = init_expr; + } + } + } +} + +// function-definition: +// declaration-specifiers init-declarator-list compound-stmt +static AstNode* parse_function_definition(Parser* p, AstNode* decls) { + if (decls->as.list->len != 1) { + fatal_error("parse_function_definition: invalid syntax"); + } + + Type* ty = decls->as.list->items[0].ty; + Type* base_ty = ty->result; + while (base_ty->base) { + base_ty = base_ty->base; + } + ty->storage_class = base_ty->storage_class; + base_ty->storage_class = StorageClass_unspecified; + const char* name = decls->as.list->items[0].as.declarator->name; + AstNode* params = ty->params; + + register_func(p, name, ty); + enter_func(p); + register_params(p, params); + AstNode* body = parse_compound_stmt(p); + leave_func(p); + + int stack_size = 0; + if (p->lvars.len != 0) { + stack_size = p->lvars.data[p->lvars.len - 1].stack_offset + type_sizeof(p->lvars.data[p->lvars.len - 1].ty); + if (stack_size < 0) { + stack_size = 0; + } + } + return ast_new_func_def(name, ty, params, body, stack_size); +} + +static void process_typedefs(Parser* p, AstNode* decls) { + for (int i = 0; i < decls->as.list->len; ++i) { + AstNode* decl = &decls->as.list->items[i]; + + AstNode* typedef_ = ast_new_typedef_decl(decl->as.declarator->name, decl->ty); + ast_append(p->typedefs, typedef_); + } +} + +static char* generate_anonymous_name(Parser* p) { + char* buf = calloc(32, sizeof(char)); + sprintf(buf, "__anonymous_%d__", p->anonymous_user_type_counter++); + return buf; +} + +// declaration-specifiers: +// { declaration-specifier }+ TODO attribute-specifier-sequence? +// +// declaration-specifier: +// storage-class-specifier +// type-specifier-qualifier +// function-speicifier +// +// storage-class-specifier: +// 'auto' +// 'constexpr' +// 'extern' +// 'register' +// 'static' +// 'thread_local' +// 'typedef' +// +// type-specifier-qualifier: +// type-specifier +// type-qualifier +// alignment-specifier +// +// function-specifier: +// 'inline' +// '_Noreturn' +// +// type-specifier: +// 'void' +// 'char' +// 'short' +// 'int' +// 'long' +// 'float' +// 'double' +// 'signed' +// 'unsigned' +// TODO '_BitInt' '(' constant-expr ')' +// 'bool' +// '_Complex' +// '_Decimal32' +// '_Decimal64' +// '_Decimal128' +// atomic-type-specifier +// struct-or-union-specifier +// enum-specifier +// typeof-specifier +// typedef-name +// +// type-qualifier: +// 'const' +// 'restrict' +// 'volatile' +// '_Atomic' +// +// alignment-specifier: +// TODO 'alignas' '(' type-name ')' +// TODO 'alignas' '(' constant-expr ')' +// +// atomic-type-specifier: +// TODO '_Atomic' '(' type-name ')' +// +// struct-or-union-specifier: +// 'struct' ... +// 'union' ... +// +// enum-specifier: +// 'enum' ... +// +// typeof-specifier: +// TODO 'typeof' '(' typeof-specifier-argument ')' +// TODO 'typeof_unqual' '(' typeof-specifier-argument ')' +// +// typedef-name: +// identifier +static Type* parse_declaration_specifiers(Parser* p) { + StorageClass storage_class = StorageClass_unspecified; + int type_specifiers = 0; + Type* ty = NULL; + + Token* tok; + while (1) { + tok = peek_token(p); + + // storage-class-spciefier + if (tok->kind == TokenKind_keyword_auto) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_constexpr) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_extern) { + next_token(p); + storage_class = StorageClass_extern; + } else if (tok->kind == TokenKind_keyword_register) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_static) { + next_token(p); + storage_class = StorageClass_static; + } else if (tok->kind == TokenKind_keyword_thread_local) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_typedef) { + next_token(p); + storage_class = StorageClass_typedef; + } + // type-specifier-qualifier > type-specifier + else if (tok->kind == TokenKind_keyword_void) { + if (type_specifiers & TypeSpecifierMask_void) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_void; + } else if (tok->kind == TokenKind_keyword_char) { + if (type_specifiers & TypeSpecifierMask_char) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_char; + } else if (tok->kind == TokenKind_keyword_short) { + if (type_specifiers & TypeSpecifierMask_short) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_short; + } else if (tok->kind == TokenKind_keyword_int) { + if (type_specifiers & TypeSpecifierMask_int) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_int; + } else if (tok->kind == TokenKind_keyword_long) { + if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) { + fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line); + } + next_token(p); + type_specifiers += TypeSpecifierMask_long; + } else if (tok->kind == TokenKind_keyword_float) { + if (type_specifiers & TypeSpecifierMask_float) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_float; + } else if (tok->kind == TokenKind_keyword_double) { + if (type_specifiers & TypeSpecifierMask_double) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_double; + } else if (tok->kind == TokenKind_keyword_signed) { + if (type_specifiers & TypeSpecifierMask_signed) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_signed; + } else if (tok->kind == TokenKind_keyword_unsigned) { + if (type_specifiers & TypeSpecifierMask_unsigned) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_unsigned; + } else if (tok->kind == TokenKind_keyword__BitInt) { + if (type_specifiers & TypeSpecifierMask__BitInt) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__BitInt; + } else if (tok->kind == TokenKind_keyword_bool) { + if (type_specifiers & TypeSpecifierMask_bool) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_bool; + } else if (tok->kind == TokenKind_keyword__Complex) { + if (type_specifiers & TypeSpecifierMask__Complex) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Complex; + } else if (tok->kind == TokenKind_keyword__Decimal32) { + if (type_specifiers & TypeSpecifierMask__Decimal32) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal32; + } else if (tok->kind == TokenKind_keyword__Decimal64) { + if (type_specifiers & TypeSpecifierMask__Decimal64) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal64; + } else if (tok->kind == TokenKind_keyword__Decimal128) { + if (type_specifiers & TypeSpecifierMask__Decimal128) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal128; + } else if (tok->kind == TokenKind_keyword__Atomic) { + if (type_specifiers & TypeSpecifierMask__Atomic) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Atomic; + } else if (tok->kind == TokenKind_keyword_struct) { + if (type_specifiers & TypeSpecifierMask_struct) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_struct_specifier(p); + type_specifiers += TypeSpecifierMask_struct; + } else if (tok->kind == TokenKind_keyword_union) { + if (type_specifiers & TypeSpecifierMask_union) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_union_specifier(p); + type_specifiers += TypeSpecifierMask_union; + } else if (tok->kind == TokenKind_keyword_enum) { + if (type_specifiers & TypeSpecifierMask_enum) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_enum_specifier(p); + type_specifiers += TypeSpecifierMask_enum; + } else if (tok->kind == TokenKind_keyword_typeof) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_typeof_unqual) { + unimplemented(); + } else if (is_typedef_name(p, tok)) { + if (type_specifiers & TypeSpecifierMask_typedef_name) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + int typedef_idx = find_typedef(p, tok->value.string); + ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty); + type_specifiers += TypeSpecifierMask_typedef_name; + } + // type-specifier-qualifier > type-qualifier + else if (tok->kind == TokenKind_keyword_const) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword_restrict) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword_volatile) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword__Atomic) { + unimplemented(); + } + // type-specifier-qualifier > alignment-specifier + else if (tok->kind == TokenKind_keyword_alignas) { + unimplemented(); + } + // function-specifier + else if (tok->kind == TokenKind_keyword_inline) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword__Noreturn) { + // TODO + next_token(p); + } else { + break; + } + } + + Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers); + if (ty_) { + ty = ty_; + } + if (!ty) { + fatal_error("%s:%d: no type specifiers", tok->loc.filename, tok->loc.line); + } + + ty->storage_class = storage_class; + return ty; +} + +// init-declarator: +// declarator +// declarator '=' initializer +static AstNode* parse_init_declarator(Parser* p, Type* ty) { + AstNode* decl = parse_declarator(p, ty); + if (consume_token_if(p, TokenKind_assign)) { + decl->as.declarator->init = parse_initializer(p); + } + return decl; +} + +// struct-specifier: +// 'struct' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}' +// 'struct' TODO attribute-specifier-sequence? identifier +static Type* parse_struct_specifier(Parser* p) { + SourceLocation struct_kw_loc = current_location(p); + next_token(p); + + const Token* name; + char* anonymous_name = NULL; + if (peek_token(p)->kind == TokenKind_brace_l) { + anonymous_name = generate_anonymous_name(p); + Token* anonymous_token = calloc(1, sizeof(Token)); + anonymous_token->kind = TokenKind_ident; + anonymous_token->value.string = anonymous_name; + anonymous_token->loc = struct_kw_loc; + name = anonymous_token; + } else { + name = parse_ident(p); + } + + if (!consume_token_if(p, TokenKind_brace_l)) { + int struct_idx = find_struct(p, name->value.string); + if (struct_idx != -1) { + Type* ty = type_new(TypeKind_struct); + ty->ref.defs = p->structs; + ty->ref.index = struct_idx; + return ty; + } else { + // TODO + AstNode* new_struct = ast_new_struct_def(name->value.string); + ast_append(p->structs, new_struct); + struct_idx = p->structs->as.list->len - 1; + + Type* ty = type_new(TypeKind_struct); + ty->ref.defs = p->structs; + ty->ref.index = struct_idx; + return ty; + } + } + + int struct_idx = find_struct(p, name->value.string); + if (struct_idx != -1 && p->structs->as.list->items[struct_idx].as.struct_def->members) { + fatal_error("%s:%d: struct %s redefined", name->loc.filename, name->loc.line, name->value.string); + } + + if (struct_idx == -1) { + AstNode* new_struct = ast_new_struct_def(name->value.string); + ast_append(p->structs, new_struct); + struct_idx = p->structs->as.list->len - 1; + } + + AstNode* members = parse_member_declaration_list(p); + expect(p, TokenKind_brace_r); + p->structs->as.list->items[struct_idx].as.struct_def->members = members; + + Type* ty = type_new(TypeKind_struct); + ty->ref.defs = p->structs; + ty->ref.index = struct_idx; + return ty; +} + +// union-specifier: +// 'union' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}' +// 'union' TODO attribute-specifier-sequence? identifier +static Type* parse_union_specifier(Parser* p) { + SourceLocation union_kw_loc = current_location(p); + next_token(p); + + const Token* name; + char* anonymous_name = NULL; + if (peek_token(p)->kind == TokenKind_brace_l) { + anonymous_name = generate_anonymous_name(p); + Token* anonymous_token = calloc(1, sizeof(Token)); + anonymous_token->kind = TokenKind_ident; + anonymous_token->value.string = anonymous_name; + anonymous_token->loc = union_kw_loc; + name = anonymous_token; + } else { + name = parse_ident(p); + } + + if (!consume_token_if(p, TokenKind_brace_l)) { + int union_idx = find_union(p, name->value.string); + if (union_idx != -1) { + Type* ty = type_new(TypeKind_union); + ty->ref.defs = p->unions; + ty->ref.index = union_idx; + return ty; + } else { + // TODO + AstNode* new_union = ast_new_union_def(name->value.string); + ast_append(p->unions, new_union); + union_idx = p->unions->as.list->len - 1; + + Type* ty = type_new(TypeKind_union); + ty->ref.defs = p->unions; + ty->ref.index = union_idx; + return ty; + } + } + + int union_idx = find_union(p, name->value.string); + if (union_idx != -1 && p->unions->as.list->items[union_idx].as.union_def->members) { + fatal_error("%s:%d: union %s redefined", name->loc.filename, name->loc.line, name->value.string); + } + + if (union_idx == -1) { + AstNode* new_union = ast_new_union_def(name->value.string); + ast_append(p->unions, new_union); + union_idx = p->unions->as.list->len - 1; + } + + AstNode* members = parse_member_declaration_list(p); + expect(p, TokenKind_brace_r); + p->unions->as.list->items[union_idx].as.union_def->members = members; + + Type* ty = type_new(TypeKind_union); + ty->ref.defs = p->unions; + ty->ref.index = union_idx; + return ty; +} + +// member-declaration-list: +// { member-declaration }+ +static AstNode* parse_member_declaration_list(Parser* p) { + AstNode* members = ast_new_list(4); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* decls = parse_member_declaration(p); + for (int i = 0; i < decls->as.list->len; i++) { + ast_append(members, &decls->as.list->items[i]); + } + } + return members; +} + +// member-declaration: +// TODO attribute-specifier-sequence? specifier-qualifier-list member-declaration-list? ';' +// TODO static_assert-declaration +static AstNode* parse_member_declaration(Parser* p) { + Type* base_ty = parse_specifier_qualifier_list(p); + + if (consume_token_if(p, TokenKind_semicolon)) { + char* name = generate_anonymous_name(p); + AstNode* decls = ast_new_list(1); + AstNode* member = ast_new(AstNodeKind_struct_member); + member->ty = base_ty; + member->as.struct_member = calloc(1, sizeof(StructMemberNode)); + member->as.struct_member->name = name; + ast_append(decls, member); + return decls; + } + AstNode* decls = parse_member_declarator_list(p, base_ty); + expect(p, TokenKind_semicolon); + return decls; +} + +// specifier-qualifier-list: +// { type-specifier-qualifier }+ TODO attribute-specifier-sequence? +static Type* parse_specifier_qualifier_list(Parser* p) { + int type_specifiers = 0; + Type* ty = NULL; + + while (1) { + Token* tok = peek_token(p); + + // type-specifier-qualifier > type-specifier + if (tok->kind == TokenKind_keyword_void) { + if (type_specifiers & TypeSpecifierMask_void) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_void; + } else if (tok->kind == TokenKind_keyword_char) { + if (type_specifiers & TypeSpecifierMask_char) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_char; + } else if (tok->kind == TokenKind_keyword_short) { + if (type_specifiers & TypeSpecifierMask_short) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_short; + } else if (tok->kind == TokenKind_keyword_int) { + if (type_specifiers & TypeSpecifierMask_int) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_int; + } else if (tok->kind == TokenKind_keyword_long) { + if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) { + fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line); + } + next_token(p); + type_specifiers += TypeSpecifierMask_long; + } else if (tok->kind == TokenKind_keyword_float) { + if (type_specifiers & TypeSpecifierMask_float) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_float; + } else if (tok->kind == TokenKind_keyword_double) { + if (type_specifiers & TypeSpecifierMask_double) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_double; + } else if (tok->kind == TokenKind_keyword_signed) { + if (type_specifiers & TypeSpecifierMask_signed) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_signed; + } else if (tok->kind == TokenKind_keyword_unsigned) { + if (type_specifiers & TypeSpecifierMask_unsigned) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_unsigned; + } else if (tok->kind == TokenKind_keyword__BitInt) { + if (type_specifiers & TypeSpecifierMask__BitInt) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__BitInt; + } else if (tok->kind == TokenKind_keyword_bool) { + if (type_specifiers & TypeSpecifierMask_bool) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask_bool; + } else if (tok->kind == TokenKind_keyword__Complex) { + if (type_specifiers & TypeSpecifierMask__Complex) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Complex; + } else if (tok->kind == TokenKind_keyword__Decimal32) { + if (type_specifiers & TypeSpecifierMask__Decimal32) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal32; + } else if (tok->kind == TokenKind_keyword__Decimal64) { + if (type_specifiers & TypeSpecifierMask__Decimal64) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal64; + } else if (tok->kind == TokenKind_keyword__Decimal128) { + if (type_specifiers & TypeSpecifierMask__Decimal128) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Decimal128; + } else if (tok->kind == TokenKind_keyword__Atomic) { + if (type_specifiers & TypeSpecifierMask__Atomic) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + type_specifiers += TypeSpecifierMask__Atomic; + } else if (tok->kind == TokenKind_keyword_struct) { + if (type_specifiers & TypeSpecifierMask_struct) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_struct_specifier(p); + type_specifiers += TypeSpecifierMask_struct; + } else if (tok->kind == TokenKind_keyword_union) { + if (type_specifiers & TypeSpecifierMask_union) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_union_specifier(p); + type_specifiers += TypeSpecifierMask_union; + } else if (tok->kind == TokenKind_keyword_enum) { + if (type_specifiers & TypeSpecifierMask_enum) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + ty = parse_enum_specifier(p); + type_specifiers += TypeSpecifierMask_enum; + } else if (tok->kind == TokenKind_keyword_typeof) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_typeof_unqual) { + unimplemented(); + } else if (is_typedef_name(p, tok)) { + if (type_specifiers & TypeSpecifierMask_typedef_name) { + fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + next_token(p); + int typedef_idx = find_typedef(p, tok->value.string); + ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty); + type_specifiers += TypeSpecifierMask_typedef_name; + } + // type-specifier-qualifier > type-qualifier + else if (tok->kind == TokenKind_keyword_const) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword_restrict) { + unimplemented(); + } else if (tok->kind == TokenKind_keyword_volatile) { + // TODO + next_token(p); + } else if (tok->kind == TokenKind_keyword__Atomic) { + unimplemented(); + } + // type-specifier-qualifier > alignment-specifier + else if (tok->kind == TokenKind_keyword_alignas) { + unimplemented(); + } else { + break; + } + } + + Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers); + if (ty_) { + ty = ty_; + } + + ty->storage_class = StorageClass_unspecified; + return ty; +} + +// member-declarator-list: +// { member-declarator | ',' }+ +static AstNode* parse_member_declarator_list(Parser* p, Type* base_ty) { + AstNode* list = ast_new_list(1); + while (1) { + AstNode* d = parse_member_declarator(p, base_ty); + ast_append(list, d); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + return list; +} + +// member-declarator: +// declarator +// declarator ':' constant-expr +// TODO ':' constant-expr +static AstNode* parse_member_declarator(Parser* p, Type* base_ty) { + AstNode* decl = parse_declarator(p, base_ty); + + const char* name = decl->as.declarator->name; + decl->kind = AstNodeKind_struct_member; + decl->as.struct_member = calloc(1, sizeof(StructMemberNode)); + decl->as.struct_member->name = name; + + if (consume_token_if(p, TokenKind_colon)) { + AstNode* bit_width_node = parse_constant_expr(p); + InitData* bit_width_evaluated = eval_init_expr(bit_width_node, type_new(TypeKind_int)); + if (bit_width_evaluated->len != 1) + fatal_error("parse_member_declarator: invalid bit-field"); + if (bit_width_evaluated->blocks[0].kind != InitDataBlockKind_bytes) + fatal_error("parse_member_declarator: invalid bit-field"); + if (bit_width_evaluated->blocks[0].as.bytes.len != sizeof(int)) + fatal_error("parse_member_declarator: invalid bit-field"); + int bit_width; + memcpy(&bit_width, bit_width_evaluated->blocks[0].as.bytes.buf, sizeof(int)); + if (bit_width < 0) + fatal_error("parse_member_declarator: invalid bit-field"); + if (bit_width % 8 != 0) + fatal_error("parse_member_declarator: unimplemented"); + decl->as.struct_member->is_bitfield = true; + decl->as.struct_member->bitfield_offset = -1; // TODO + decl->as.struct_member->bitfield_width = bit_width; + } + + return decl; +} + +// enum-specifier: +// 'enum' TODO attribute-specifier-sequence? identifier? TODO enum-type-specifier? '{' enumerator-list ','? '}' +// 'enum' identifier TODO enum-type-specifier? +static Type* parse_enum_specifier(Parser* p) { + SourceLocation enum_kw_loc = current_location(p); + next_token(p); + + const Token* name; + char* anonymous_name = NULL; + if (peek_token(p)->kind == TokenKind_brace_l) { + anonymous_name = generate_anonymous_name(p); + Token* anonymous_token = calloc(1, sizeof(Token)); + anonymous_token->kind = TokenKind_ident; + anonymous_token->value.string = anonymous_name; + anonymous_token->loc = enum_kw_loc; + name = anonymous_token; + } else { + name = parse_ident(p); + } + + if (!consume_token_if(p, TokenKind_brace_l)) { + int enum_idx = find_enum(p, name->value.string); + if (enum_idx != -1) { + Type* ty = type_new(TypeKind_enum); + ty->ref.defs = p->enums; + ty->ref.index = enum_idx; + return ty; + } else { + // TODO + AstNode* new_enum = ast_new_enum_def(name->value.string); + ast_append(p->enums, new_enum); + enum_idx = p->enums->as.list->len - 1; + + Type* ty = type_new(TypeKind_enum); + ty->ref.defs = p->enums; + ty->ref.index = enum_idx; + return ty; + } + } + + int enum_idx = find_enum(p, name->value.string); + if (enum_idx != -1 && p->enums->as.list->items[enum_idx].as.enum_def->members) { + fatal_error("%s:%d: enum %s redefined", name->loc.filename, name->loc.line, name->value.string); + } + + if (enum_idx == -1) { + AstNode* new_enum = ast_new_enum_def(name->value.string); + ast_append(p->enums, new_enum); + enum_idx = p->enums->as.list->len - 1; + } + + parse_enum_members(p, enum_idx); + expect(p, TokenKind_brace_r); + + Type* ty = type_new(TypeKind_enum); + ty->ref.defs = p->enums; + ty->ref.index = enum_idx; + return ty; +} + +// enumerator-list: +// { enumerator |? ',' }+ +static void parse_enum_members(Parser* p, int enum_idx) { + int next_value = 0; + AstNode* list = ast_new_list(16); + + if (!p->enums->as.list->items[enum_idx].as.enum_def->members) { + p->enums->as.list->items[enum_idx].as.enum_def->members = list; + } + + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* member = parse_enum_member(p, next_value); + next_value = member->as.enum_member->value + 1; + + ast_append(list, member); + + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } +} + +// enumerator: +// enumeration-constant TODO attribute-specifier-sequence? ( '=' constant-expr )? +static AstNode* parse_enum_member(Parser* p, int next_value) { + const Token* name = parse_ident(p); + int value; + if (consume_token_if(p, TokenKind_assign)) { + AstNode* expr = parse_constant_expr(p); + value = eval(expr); + } else { + value = next_value; + } + return ast_new_enum_member(name->value.string, value); +} + +// type-name: +// specifier-qualifier-list abstract-declarator? +static Type* parse_type_name(Parser* p) { + Type* ty = parse_specifier_qualifier_list(p); + ty = parse_abstract_declarator_opt(p, ty); + return ty; +} + +// abstract-declarator: +// pointer +// pointer? TODO direct-abstract-declarator +// +// abstract-declarator?: +// pointer? TODO direct-abstract-declarator? +static Type* parse_abstract_declarator_opt(Parser* p, Type* ty) { + return parse_pointer_opt(p, ty); +} + +// braced-initializer: +// '{' { designation? initializer |? ',' }* '}' +static AstNode* parse_braced_initializer(Parser* p) { + AstNode* inits = ast_new_list(4); + expect(p, TokenKind_brace_l); + while (peek_token(p)->kind != TokenKind_brace_r) { + // TODO: support designation + AstNode* init = parse_initializer(p); + ast_append(inits, init); + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + expect(p, TokenKind_brace_r); + return ast_new_array_initializer(inits); +} + +// initializer: +// braced-initializer +// assignment-expr +static AstNode* parse_initializer(Parser* p) { + if (peek_token(p)->kind == TokenKind_brace_l) { + return parse_braced_initializer(p); + } else { + return parse_assignment_expr(p); + } +} + +#ifndef __ducc__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#endif +// attribute-specifier-sequence: +// { attribute-specifier }+ +static AstNode* parse_attribute_specifier_sequence_opt(Parser* p) { + AstNode* attrs = NULL; + AstNode* attr; + while ((attr = parse_attribute_specifier_opt(p))) { + unimplemented(); + } + return attrs; +} +#ifndef __ducc__ +#pragma GCC diagnostic pop +#endif + +// attribute-specifier: +// '[' '[' attribute-list ']' ']' +static AstNode* parse_attribute_specifier_opt(Parser* p) { + if (peek_token(p)->kind != TokenKind_bracket_l || peek_token2(p)->kind != TokenKind_bracket_l) { + return NULL; + } + next_token(p); // skip '[' + next_token(p); // skip '[' + AstNode* ret = parse_attribute_list(p); + expect(p, TokenKind_bracket_r); + expect(p, TokenKind_bracket_r); + return ret; +} + +// attribute-list: +// { attribute? | ',' } +static AstNode* parse_attribute_list(Parser* p) { + while (1) { + if (peek_token(p)->kind == TokenKind_ident) { + parse_attribute(p); + } + if (!consume_token_if(p, TokenKind_comma)) { + break; + } + } + unimplemented(); + return NULL; +} + +// attribute: +// attribute-token attribute-argument-clause? +// +// attribute-token: +// identifier ( '::' identifier )? +static AstNode* parse_attribute(Parser* p) { + expect(p, TokenKind_ident); + if (peek_token(p)->kind == TokenKind_colon && peek_token2(p)->kind == TokenKind_colon) { + next_token(p); + next_token(p); + expect(p, TokenKind_ident); + } + if (peek_token(p)->kind == TokenKind_paren_l) { + parse_attribute_argument_clause(p); + } + unimplemented(); + return NULL; +} + +// attribute-argument-clause: +// '(' balanced-token-sequence? ')' +static AstNode* parse_attribute_argument_clause(Parser* p) { + expect(p, TokenKind_paren_l); + if (peek_token(p)->kind != TokenKind_paren_r) { + parse_balanced_token_sequence(p); + } + expect(p, TokenKind_paren_r); + unimplemented(); + return NULL; +} + +// balanced-token-sequence: +// { balanced-token }+ +// +// balanced-token: +// '(' balanced-token-sequence? ')' +// '[' balanced-token-sequence? ']' +// '{' balanced-token-sequence? '}' +// any token other than a parenthesis, a bracket, or a brace +static AstNode* parse_balanced_token_sequence(Parser* p) { + int paren_depth = 0; + int bracket_depth = 0; + int brace_depth = 0; + while (1) { + Token* tok = peek_token(p); + if (tok->kind == TokenKind_paren_l) { + paren_depth++; + next_token(p); + } else if (tok->kind == TokenKind_paren_r) { + if (paren_depth == 0) + break; + paren_depth--; + next_token(p); + } else if (tok->kind == TokenKind_bracket_l) { + bracket_depth++; + next_token(p); + } else if (tok->kind == TokenKind_bracket_r) { + if (bracket_depth == 0) + break; + bracket_depth--; + next_token(p); + } else if (tok->kind == TokenKind_brace_l) { + brace_depth++; + next_token(p); + } else if (tok->kind == TokenKind_brace_r) { + if (brace_depth == 0) + break; + brace_depth--; + next_token(p); + } else if (tok->kind == TokenKind_eof) { + break; + } else { + next_token(p); + } + } + unimplemented(); + return NULL; +} + +// unlabaled-stmt: +// attribute-specifier-sequence? compound-stmt +// attribute-specifier-sequence? if-stmt +// attribute-specifier-sequence? switch-stmt +// attribute-specifier-sequence? while-stmt +// attribute-specifier-sequence? do-while-stmt +// attribute-specifier-sequence? for-stmt +// attribute-specifier-sequence? goto-stmt +// attribute-specifier-sequence? continue-stmt +// attribute-specifier-sequence? break-stmt +// attribute-specifier-sequence? return-stmt +// attribute-specifier-sequence? expr-stmt +static AstNode* parse_unlabaled_stmt(Parser* p) { + switch (peek_token(p)->kind) { + case TokenKind_brace_l: + return parse_compound_stmt(p); + case TokenKind_keyword_if: + return parse_if_stmt(p); + case TokenKind_keyword_switch: + return parse_switch_stmt(p); + case TokenKind_keyword_while: + return parse_while_stmt(p); + case TokenKind_keyword_do: + return parse_do_while_stmt(p); + case TokenKind_keyword_for: + return parse_for_stmt(p); + case TokenKind_keyword_goto: + return parse_goto_stmt(p); + case TokenKind_keyword_continue: + return parse_continue_stmt(p); + case TokenKind_keyword_break: + return parse_break_stmt(p); + case TokenKind_keyword_return: + return parse_return_stmt(p); + default: + return parse_expr_stmt(p); + } +} + +// label: +// TODO attribute-specifier-sequence? identifier ':' +// TODO attribute-specifier-sequence? 'case' constant-expr ':' +// TODO attribute-specifier-sequence? 'default' ':' +static AstNode* parse_label(Parser* p) { + SourceLocation loc = current_location(p); + Token* t = peek_token(p); + + AstNode* node; + if (t->kind == TokenKind_keyword_case) { + if (!p->current_switch) { + fatal_error("%s:%d: 'case' label not within a switch statement", t->loc.filename, t->loc.line); + } + expect(p, TokenKind_keyword_case); + AstNode* value = parse_constant_expr(p); + expect(p, TokenKind_colon); + node = ast_new_case_label(eval(value), ast_new_nop()); + } else if (t->kind == TokenKind_keyword_default) { + if (!p->current_switch) { + fatal_error("%s:%d: 'default' label not within a switch statement", t->loc.filename, t->loc.line); + } + expect(p, TokenKind_keyword_default); + expect(p, TokenKind_colon); + node = ast_new_default_label(ast_new_nop()); + } else { + // identifier ':' + Token* label_token = expect(p, TokenKind_ident); + expect(p, TokenKind_colon); + node = ast_new_label_stmt(label_token->value.string, ast_new_nop()); + } + node->loc = loc; + return node; +} + +static bool is_label_token(Parser* p) { + Token* t = peek_token(p); + return t->kind == TokenKind_keyword_case || t->kind == TokenKind_keyword_default || + (t->kind == TokenKind_ident && peek_token2(p)->kind == TokenKind_colon); +} + +// stmt: +// label stmt +// unlabaled-stmt +static AstNode* parse_stmt(Parser* p) { + if (is_label_token(p)) { + AstNode* label = parse_label(p); + AstNode* stmt = parse_stmt(p); + switch (label->kind) { + case AstNodeKind_case_label: + label->as.case_label->body = stmt; + break; + case AstNodeKind_default_label: + label->as.default_label->body = stmt; + break; + case AstNodeKind_label_stmt: + label->as.label_stmt->body = stmt; + break; + default: + unreachable(); + break; + } + return label; + } + return parse_unlabaled_stmt(p); +} + +// compound-stmt: +// '{' { block-item }* '}' +static AstNode* parse_compound_stmt(Parser* p) { + SourceLocation loc = current_location(p); + AstNode* list = ast_new_list(4); + expect(p, TokenKind_brace_l); + enter_scope(p); + while (peek_token(p)->kind != TokenKind_brace_r) { + AstNode* stmt = parse_block_item(p); + ast_append(list, stmt); + } + leave_scope(p); + expect(p, TokenKind_brace_r); + list->loc = loc; + return list; +} + +// block-item: +// declaration +// unlabaled-stmt +// label +static AstNode* parse_block_item(Parser* p) { + Token* t = peek_token(p); + + if (t->kind == TokenKind_keyword_static_assert || is_type_token(p, t)) { + return parse_declaration(p); + } else if (is_label_token(p)) { + return parse_label(p); + } else { + return parse_unlabaled_stmt(p); + } +} + +// expr-stmt: +// TODO attribute-specifier-sequence? expr? ';' +static AstNode* parse_expr_stmt(Parser* p) { + SourceLocation loc = current_location(p); + if (consume_token_if(p, TokenKind_semicolon)) { + AstNode* node = ast_new_nop(); + node->loc = loc; + return node; + } + + AstNode* e = parse_expr(p); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_expr_stmt(e); + node->loc = loc; + return node; +} + +// if-stmt: +// 'if' '(' expr ')' stmt ( 'else' stmt )? +static AstNode* parse_if_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_if); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + AstNode* then_body = parse_stmt(p); + AstNode* else_body = NULL; + if (consume_token_if(p, TokenKind_keyword_else)) { + else_body = parse_stmt(p); + } + + AstNode* node = ast_new_if_stmt(cond, then_body, else_body); + node->loc = loc; + return node; +} + +// switch-stmt: +// 'switch' '(' expr ')' stmt +static AstNode* parse_switch_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_switch); + expect(p, TokenKind_paren_l); + AstNode* expr = parse_expr(p); + expect(p, TokenKind_paren_r); + + AstNode* tmp_var = generate_temporary_lvar(p, expr->ty); + AstNode* assignment = ast_new_assign_expr(TokenKind_assign, tmp_var, expr); + AstNode* assign_stmt = ast_new_expr_stmt(assignment); + assign_stmt->loc = loc; + + AstNode* switch_stmt = ast_new_switch_stmt(tmp_var); + switch_stmt->loc = loc; + + AstNode* prev_switch = p->current_switch; + p->current_switch = switch_stmt; + switch_stmt->as.switch_stmt->body = parse_stmt(p); + p->current_switch = prev_switch; + + AstNode* list = ast_new_list(2); + list->loc = loc; + ast_append(list, assign_stmt); + ast_append(list, switch_stmt); + return list; +} + +// while-stmt: +// 'while' '(' expr ')' stmt +static AstNode* parse_while_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_while); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + AstNode* body = parse_stmt(p); + AstNode* node = ast_new_for_stmt(NULL, cond, NULL, body); + node->loc = loc; + return node; +} + +// do-while-stmt: +// 'do' stmt 'while' '(' expr ')' ';' +static AstNode* parse_do_while_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_do); + AstNode* body = parse_stmt(p); + expect(p, TokenKind_keyword_while); + expect(p, TokenKind_paren_l); + AstNode* cond = parse_expr(p); + expect(p, TokenKind_paren_r); + expect(p, TokenKind_semicolon); + + AstNode* node = ast_new_do_while_stmt(cond, body); + node->loc = loc; + return node; +} + +// for-stmt: +// 'for' '(' expr? ';' expr? ';' expr? ')' stmt +// 'for' '(' declaration expr? ';' expr? ')' stmt +static AstNode* parse_for_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_for); + expect(p, TokenKind_paren_l); + AstNode* init = NULL; + AstNode* cond = NULL; + AstNode* update = NULL; + enter_scope(p); + if (peek_token(p)->kind != TokenKind_semicolon) { + if (is_type_token(p, peek_token(p))) { + AstNode* decls = parse_declaration(p); + AstNode* initializers = ast_new_list(1); + for (int i = 0; i < decls->as.list->len; i++) { + AstNode* item = &decls->as.list->items[i]; + if (item->kind == AstNodeKind_expr_stmt) { + ast_append(initializers, item->as.expr_stmt->expr); + } + } + init = initializers; + } else { + init = parse_expr(p); + expect(p, TokenKind_semicolon); + } + } else { + expect(p, TokenKind_semicolon); + } + if (peek_token(p)->kind != TokenKind_semicolon) { + cond = parse_expr(p); + } else { + cond = ast_new_int(1); + } + expect(p, TokenKind_semicolon); + if (peek_token(p)->kind != TokenKind_paren_r) { + update = parse_expr(p); + } + expect(p, TokenKind_paren_r); + AstNode* body = parse_stmt(p); + leave_scope(p); + AstNode* node = ast_new_for_stmt(init, cond, update, body); + node->loc = loc; + return node; +} + +// goto-stmt: +// 'goto' identifier ';' +static AstNode* parse_goto_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_goto); + Token* label_token = expect(p, TokenKind_ident); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_goto_stmt(label_token->value.string); + node->loc = loc; + return node; +} + +// continue-stmt: +// 'continue' ';' +static AstNode* parse_continue_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_continue); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_continue_stmt(); + node->loc = loc; + return node; +} + +// break-stmt: +// 'break' ';' +static AstNode* parse_break_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_break); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_break_stmt(); + node->loc = loc; + return node; +} + +// return-stmt: +// 'return' expr? ';' +static AstNode* parse_return_stmt(Parser* p) { + SourceLocation loc = current_location(p); + expect(p, TokenKind_keyword_return); + if (consume_token_if(p, TokenKind_semicolon)) { + AstNode* node = ast_new_return_stmt(NULL); + node->loc = loc; + return node; + } + + AstNode* expr = parse_expr(p); + expect(p, TokenKind_semicolon); + AstNode* node = ast_new_return_stmt(expr); + node->loc = loc; + return node; +} + +// static_assert-declaration: +// 'static_assert' '(' constant-expr ( ',' string-literal )? ')' ';' +static AstNode* parse_static_assert_declaration(Parser* p) { + expect(p, TokenKind_keyword_static_assert); + expect(p, TokenKind_paren_l); + AstNode* predicate = parse_constant_expr(p); + Token* message = NULL; + if (consume_token_if(p, TokenKind_comma)) { + message = expect(p, TokenKind_literal_str); + } + // TODO: validate the assertion. + (void)predicate; + (void)message; + expect(p, TokenKind_paren_r); + expect(p, TokenKind_semicolon); + return NULL; +} + +// external-declaration: +// static_assert-declaration +// TODO attribute-specifier-sequence ';' +// TODO attribute-specifier-sequence function-definition-or-declaration-rest +// function-definition-or-declaration-rest +// +// function-definition-or-declaration-rest: +// declaration-specifiers init-declarator-list ';' +// declaration-specifiers init-declarator-list compound-stmt +static AstNode* parse_external_declaration(Parser* p) { + if (peek_token(p)->kind == TokenKind_keyword_static_assert) { + return parse_static_assert_declaration(p); + } + + Type* ty = parse_declaration_specifiers(p); + if (consume_token_if(p, TokenKind_semicolon)) { + // Type declaration. + return NULL; + } + + AstNode* decls = parse_init_declarator_list(p, ty); + + if (peek_token(p)->kind == TokenKind_brace_l) { + return parse_function_definition(p, decls); + } + + expect(p, TokenKind_semicolon); + if (ty->storage_class == StorageClass_typedef) { + process_typedefs(p, decls); + return NULL; + } else { + process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]); + return decls; + } +} + +// translation-unit: +// { external-declaration }+ +Program* parse(TokenArray* tokens) { + Parser* p = parser_new(tokens); + AstNode* funcs = ast_new_list(32); + AstNode* vars = ast_new_list(16); + while (eof(p)) { + AstNode* n = parse_external_declaration(p); + if (!n) + continue; + if (n->kind == AstNodeKind_func_def) { + ast_append(funcs, n); + } else if (n->kind == AstNodeKind_gvar_decl) { + ast_append(vars, n); + } else if (n->kind == AstNodeKind_list) { + for (int i = 0; i < n->as.list->len; ++i) { + if (n->as.list->items[i].kind == AstNodeKind_gvar_decl) + ast_append(vars, &n->as.list->items[i]); + } + } + } + Program* prog = calloc(1, sizeof(Program)); + prog->funcs = funcs; + prog->vars = vars; + prog->str_literals = p->str_literals.data; + return prog; +} + +static int eval_binary_expr(int op, int v1, int v2) { + if (op == TokenKind_andand) { + return v1 && v2; + } else if (op == TokenKind_oror) { + return v1 || v2; + } else if (op == TokenKind_plus) { + return v1 + v2; + } else if (op == TokenKind_minus) { + return v1 - v2; + } else if (op == TokenKind_star) { + return v1 * v2; + } else if (op == TokenKind_slash) { + if (v2 == 0) { + fatal_error("eval: division by zero"); + } + return v1 / v2; + } else if (op == TokenKind_percent) { + if (v2 == 0) { + fatal_error("eval: division by zero"); + } + return v1 % v2; + } else if (op == TokenKind_eq) { + return v1 == v2; + } else if (op == TokenKind_ne) { + return v1 != v2; + } else if (op == TokenKind_lt) { + return v1 < v2; + } else if (op == TokenKind_le) { + return v1 <= v2; + } else if (op == TokenKind_lshift) { + return v1 << v2; + } else if (op == TokenKind_rshift) { + return v1 >> v2; + } else if (op == TokenKind_and) { + return v1 & v2; + } else if (op == TokenKind_or) { + return v1 | v2; + } else if (op == TokenKind_xor) { + return v1 ^ v2; + } else { + unimplemented(); + } +} + +static int eval(AstNode* e) { + if (e->kind == AstNodeKind_int_expr) { + return e->as.int_expr->value; + } else if (e->kind == AstNodeKind_unary_expr) { + int v = eval(e->as.unary_expr->operand); + if (e->as.unary_expr->op == TokenKind_not) { + return !v; + } else if (e->as.unary_expr->op == TokenKind_minus) { + return -v; + } else { + unimplemented(); + } + } else if (e->kind == AstNodeKind_binary_expr) { + int v1 = eval(e->as.binary_expr->lhs); + int v2 = eval(e->as.binary_expr->rhs); + return eval_binary_expr(e->as.binary_expr->op, v1, v2); + } else if (e->kind == AstNodeKind_logical_expr) { + int v1 = eval(e->as.logical_expr->lhs); + int v2 = eval(e->as.logical_expr->rhs); + return eval_binary_expr(e->as.logical_expr->op, v1, v2); + } else if (e->kind == AstNodeKind_cond_expr) { + int cond = eval(e->as.cond_expr->cond); + if (cond) { + return eval(e->as.cond_expr->then); + } else { + return eval(e->as.cond_expr->else_); + } + } else if (e->kind == AstNodeKind_cast_expr) { + return eval(e->as.cast_expr->operand); + } else if (e->kind == AstNodeKind_deref_expr) { + return eval(e->as.deref_expr->operand); + } else if (e->kind == AstNodeKind_ref_expr) { + return eval(e->as.ref_expr->operand); + } else { + unimplemented(); + } +} + +bool pp_eval_constant_expr(TokenArray* pp_tokens) { + TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens); + Parser* p = parser_new(tokens); + AstNode* e = parse_constant_expr(p); + return eval(e) != 0; +} + +static InitData* initdata_new() { + InitData* init_data = calloc(1, sizeof(InitData)); + init_data->len = 0; + init_data->capacity = 1; + init_data->blocks = calloc(init_data->capacity, sizeof(InitDataBlock)); + return init_data; +} + +static void initdata_reserve(InitData* init_data, size_t size) { + if (size <= init_data->capacity) + return; + while (init_data->capacity < size) { + init_data->capacity *= 2; + } + init_data->blocks = realloc(init_data->blocks, init_data->capacity * sizeof(InitDataBlock)); + memset(init_data->blocks + init_data->len, 0, (init_data->capacity - init_data->len) * sizeof(InitDataBlock)); +} + +static InitDataBlock* initdata_push_new(InitData* init_data) { + initdata_reserve(init_data, init_data->len + 1); + return &init_data->blocks[init_data->len++]; +} + +static void initdata_append_addr(InitData* buf, const char* label) { + InitDataBlock* block = initdata_push_new(buf); + block->kind = InitDataBlockKind_addr; + block->as.addr.label = label; +} + +static void initdata_append_zeros(InitData* buf, size_t len) { + if (len == 0) + return; + char* data = calloc(len, sizeof(char)); + InitDataBlock* block = initdata_push_new(buf); + block->kind = InitDataBlockKind_bytes; + block->as.bytes.len = len; + block->as.bytes.buf = data; +} + +static void initdata_append_bytes(InitData* buf, const char* data, size_t len) { + char* copy = calloc(len, sizeof(char)); + memcpy(copy, data, len); + InitDataBlock* block = initdata_push_new(buf); + block->kind = InitDataBlockKind_bytes; + block->as.bytes.len = len; + block->as.bytes.buf = copy; +} + +static void do_eval_init_expr(InitData* buf, AstNode* expr, Type* ty) { + if (expr->kind == AstNodeKind_array_initializer) { + AstNode* list = expr->as.array_initializer->list; + if (ty->kind == TypeKind_array) { + for (int i = 0; i < list->as.list->len; ++i) { + do_eval_init_expr(buf, &list->as.list->items[i], ty->base); + } + } else if (ty->kind == TypeKind_struct) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.struct_def->members; + int offset = 0; + for (int i = 0; i < list->as.list->len; ++i) { + AstNode* member = &members->as.list->items[i]; + int align = type_alignof(member->ty); + int aligned_offset = to_aligned(offset, align); + initdata_append_zeros(buf, aligned_offset - offset); // padding + offset = aligned_offset; + do_eval_init_expr(buf, &list->as.list->items[i], member->ty); + offset += type_sizeof(member->ty); + } + int total = type_sizeof(ty); + initdata_append_zeros(buf, total - offset); // padding + } else if (ty->kind == TypeKind_union) { + AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; + AstNode* members = def->as.union_def->members; + AstNode* member = &members->as.list->items[0]; + do_eval_init_expr(buf, &list->as.list->items[0], member->ty); + int member_size = type_sizeof(member->ty); + int total = type_sizeof(ty); + initdata_append_zeros(buf, total - member_size); // padding + } else { + unimplemented(); + } + } else if (ty->kind == TypeKind_ptr) { + if (expr->kind == AstNodeKind_str_expr) { + char label[32]; + sprintf(label, ".Lstr__%d", expr->as.str_expr->idx); + initdata_append_addr(buf, strdup(label)); + } else if (expr->kind == AstNodeKind_ref_expr) { + if (expr->as.ref_expr->operand->kind != AstNodeKind_gvar) { + unimplemented(); + } + initdata_append_addr(buf, expr->as.ref_expr->operand->as.gvar->name); + } else if (expr->kind == AstNodeKind_gvar) { + initdata_append_addr(buf, expr->as.gvar->name); + } else if (expr->kind == AstNodeKind_func) { + initdata_append_addr(buf, expr->as.func->name); + } else if (expr->kind == AstNodeKind_int_expr && expr->as.int_expr->value == 0) { + initdata_append_zeros(buf, type_sizeof(ty)); + } else { + unimplemented(); + } + } else { + int value = eval(expr); + int size = type_sizeof(ty); + char bytes[8]; + for (int i = 0; i < size; ++i) { + bytes[i] = (value >> (i * 8)) & 0xff; + } + initdata_append_bytes(buf, bytes, size); + } +} + +InitData* eval_init_expr(AstNode* expr, Type* ty) { + InitData* buf = initdata_new(); + do_eval_init_expr(buf, expr, ty); + return buf; +} diff --git a/src/cc1/parse.h b/src/cc1/parse.h new file mode 100644 index 0000000..f229b77 --- /dev/null +++ b/src/cc1/parse.h @@ -0,0 +1,43 @@ +#ifndef DUCC_PARSE_H +#define DUCC_PARSE_H + +#include "../lib/common.h" +#include "ast.h" +#include "preprocess.h" + +Program* parse(TokenArray* tokens); +bool pp_eval_constant_expr(TokenArray* pp_tokens); + +typedef enum { + InitDataBlockKind_addr, + InitDataBlockKind_bytes, +} InitDataBlockKind; + +// Static address to global variable or ROM area. +typedef struct { + const char* label; +} InitDataBlockAddr; + +// Static byte array. +typedef struct { + size_t len; + const char* buf; +} InitDataBlockBytes; + +typedef struct { + InitDataBlockKind kind; + union { + InitDataBlockAddr addr; + InitDataBlockBytes bytes; + } as; +} InitDataBlock; + +typedef struct { + size_t len; + size_t capacity; + InitDataBlock* blocks; +} InitData; + +InitData* eval_init_expr(AstNode* expr, Type* ty); + +#endif diff --git a/src/cc1/preprocess.c b/src/cc1/preprocess.c new file mode 100644 index 0000000..d050119 --- /dev/null +++ b/src/cc1/preprocess.c @@ -0,0 +1,1559 @@ +#include "preprocess.h" +#include +#include +#include "../lib/common.h" +#include "parse.h" +#include "sys.h" +#include "tokenize.h" + +typedef enum { + MacroKind_undef, + MacroKind_obj, + MacroKind_func, + MacroKind_builtin_file, + MacroKind_builtin_line, +} MacroKind; + +const char* macro_kind_stringify(MacroKind kind) { + if (kind == MacroKind_undef) + return "undef"; + else if (kind == MacroKind_obj) + return "object-like"; + else if (kind == MacroKind_func) + return "function-like"; + else if (kind == MacroKind_builtin_file) + return "__FILE__"; + else if (kind == MacroKind_builtin_line) + return "__LINE__"; + else + unreachable(); +} + +typedef struct { + MacroKind kind; + const char* name; + TokenArray parameters; + TokenArray replacements; +} Macro; + +void macro_build_json(JsonBuilder* builder, Macro* macro) { + jsonbuilder_object_start(builder); + + jsonbuilder_object_member_start(builder, "kind"); + jsonbuilder_string(builder, macro_kind_stringify(macro->kind)); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "name"); + jsonbuilder_string(builder, macro->name); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "parameters"); + tokens_build_json(builder, ¯o->parameters); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "replacements"); + tokens_build_json(builder, ¯o->replacements); + jsonbuilder_object_member_end(builder); + + jsonbuilder_object_end(builder); +} + +static int macro_find_param(Macro* macro, Token* tok) { + if (tok->kind != TokenKind_ident) + return -1; + + for (size_t i = 0; i < macro->parameters.len; ++i) { + if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { + return i; + } + } + return -1; +} + +typedef struct { + size_t len; + size_t capacity; + Macro* data; +} MacroArray; + +static MacroArray* macros_new() { + MacroArray* macros = calloc(1, sizeof(MacroArray)); + macros->len = 0; + macros->capacity = 8; + macros->data = calloc(macros->capacity, sizeof(Macro)); + return macros; +} + +static void macros_reserve(MacroArray* macros, size_t size) { + if (size <= macros->capacity) + return; + while (macros->capacity < size) { + macros->capacity *= 2; + } + macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); + memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); +} + +static Macro* macros_push_new(MacroArray* macros) { + macros_reserve(macros, macros->len + 1); + return ¯os->data[macros->len++]; +} + +static void define_macro_to_number(MacroArray* macros, const char* name, int n) { + Macro* m = macros_push_new(macros); + m->kind = MacroKind_obj; + m->name = name; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = n; +} + +static void add_predefined_macros(MacroArray* macros) { + Macro* m; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_file; + m->name = "__FILE__"; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_line; + m->name = "__LINE__"; + + // Non-standard pre-defined macros. + define_macro_to_number(macros, "__ducc__", 1); + define_macro_to_number(macros, "__x86_64__", 1); + define_macro_to_number(macros, "__LP64__", 1); + + // GCC's predefined macros. Glibc depends on these macros. + // https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + // TODO: uncomment out __LONG_MAX__, etc. once ducc supports 64-bit integer literals. + define_macro_to_number(macros, "__CHAR_BIT__", __CHAR_BIT__); + define_macro_to_number(macros, "__SCHAR_MAX__", __SCHAR_MAX__); + define_macro_to_number(macros, "__WCHAR_MAX__", __WCHAR_MAX__); + define_macro_to_number(macros, "__SHRT_MAX__", __SHRT_MAX__); + define_macro_to_number(macros, "__INT_MAX__", __INT_MAX__); + // define_macro_to_number(macros, "__LONG_MAX__", __LONG_MAX__); + // define_macro_to_number(macros, "__LONG_LONG_MAX__", __LONG_LONG_MAX__); + define_macro_to_number(macros, "__WINT_MAX__", __WINT_MAX__); + // define_macro_to_number(macros, "__SIZE_MAX__", __SIZE_MAX__); + // define_macro_to_number(macros, "__PTRDIFF_MAX__", __PTRDIFF_MAX__); + // define_macro_to_number(macros, "__INTMAX_MAX__", __INTMAX_MAX__); + // define_macro_to_number(macros, "__UINTMAX_MAX__", __UINTMAX_MAX__); + define_macro_to_number(macros, "__SIG_ATOMIC_MAX__", __SIG_ATOMIC_MAX__); + define_macro_to_number(macros, "__INT8_MAX__", __INT8_MAX__); + define_macro_to_number(macros, "__INT16_MAX__", __INT16_MAX__); + define_macro_to_number(macros, "__INT32_MAX__", __INT32_MAX__); + // define_macro_to_number(macros, "__INT64_MAX__", __INT64_MAX__); + define_macro_to_number(macros, "__UINT8_MAX__", __UINT8_MAX__); + define_macro_to_number(macros, "__UINT16_MAX__", __UINT16_MAX__); + define_macro_to_number(macros, "__UINT32_MAX__", __UINT32_MAX__); + // define_macro_to_number(macros, "__UINT64_MAX__", __UINT64_MAX__); + define_macro_to_number(macros, "__INT_LEAST8_MAX__", __INT_LEAST8_MAX__); + define_macro_to_number(macros, "__INT_LEAST16_MAX__", __INT_LEAST16_MAX__); + define_macro_to_number(macros, "__INT_LEAST32_MAX__", __INT_LEAST32_MAX__); + // define_macro_to_number(macros, "__INT_LEAST64_MAX__", __INT_LEAST64_MAX__); + define_macro_to_number(macros, "__UINT_LEAST8_MAX__", __UINT_LEAST8_MAX__); + define_macro_to_number(macros, "__UINT_LEAST16_MAX__", __UINT_LEAST16_MAX__); + define_macro_to_number(macros, "__UINT_LEAST32_MAX__", __UINT_LEAST32_MAX__); + // define_macro_to_number(macros, "__UINT_LEAST64_MAX__", __UINT_LEAST64_MAX__); + define_macro_to_number(macros, "__INT_FAST8_MAX__", __INT_FAST8_MAX__); + // define_macro_to_number(macros, "__INT_FAST16_MAX__", __INT_FAST16_MAX__); + // define_macro_to_number(macros, "__INT_FAST32_MAX__", __INT_FAST32_MAX__); + // define_macro_to_number(macros, "__INT_FAST64_MAX__", __INT_FAST64_MAX__); + define_macro_to_number(macros, "__UINT_FAST8_MAX__", __UINT_FAST8_MAX__); + // define_macro_to_number(macros, "__UINT_FAST16_MAX__", __UINT_FAST16_MAX__); + // define_macro_to_number(macros, "__UINT_FAST32_MAX__", __UINT_FAST32_MAX__); + // define_macro_to_number(macros, "__UINT_FAST64_MAX__", __UINT_FAST64_MAX__); + // define_macro_to_number(macros, "__INTPTR_MAX__", __INTPTR_MAX__); + // define_macro_to_number(macros, "__UINTPTR_MAX__", __UINTPTR_MAX__); + define_macro_to_number(macros, "__WCHAR_MIN__", __WCHAR_MIN__); + define_macro_to_number(macros, "__WINT_MIN__", __WINT_MIN__); + define_macro_to_number(macros, "__SIG_ATOMIC_MIN__", __SIG_ATOMIC_MIN__); + + // GCC's predefined macros not listed in Common Predefined Macros page. + define_macro_to_number(macros, "__DBL_DIG__", __DBL_DIG__); + define_macro_to_number(macros, "__DBL_MANT_DIG__", __DBL_MANT_DIG__); + define_macro_to_number(macros, "__DBL_MAX_10_EXP__", __DBL_MAX_10_EXP__); + define_macro_to_number(macros, "__FLT_DIG__", __FLT_DIG__); + define_macro_to_number(macros, "__FLT_MANT_DIG__", __FLT_MANT_DIG__); + define_macro_to_number(macros, "__FLT_MAX_10_EXP__", __FLT_MAX_10_EXP__); + define_macro_to_number(macros, "__LDBL_DIG__", __LDBL_DIG__); + define_macro_to_number(macros, "__LDBL_MANT_DIG__", __LDBL_MANT_DIG__); + define_macro_to_number(macros, "__LDBL_MAX_10_EXP__", __LDBL_MAX_10_EXP__); +} + +// Accept "FOO" or "FOO=value" +static void define_macro_from_string(MacroArray* macros, const char* def) { + Macro* m = macros_push_new(macros); + m->kind = MacroKind_obj; + + const char* eq = strchr(def, '='); + if (eq) { + // FOO=value format + size_t name_len = eq - def; + char* name = calloc(name_len + 1, sizeof(char)); + memcpy(name, def, name_len); + m->name = name; + + const char* value = eq + 1; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + + // Try to parse as integer + char* num_end; + long int_val = strtol(value, &num_end, 10); + if (value[0] != '\0' && *num_end == '\0') { + tok->kind = TokenKind_literal_int; + tok->value.integer = int_val; + } else { + tok->kind = TokenKind_ident; + tok->value.string = value; + } + } else { + // FOO format (equivalent to FOO=1) + m->name = def; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = 1; + } +} + +static void add_user_defines(MacroArray* macros, StrArray* user_defines) { + for (size_t i = 0; i < user_defines->len; ++i) { + define_macro_from_string(macros, user_defines->data[i]); + } +} + +void macros_build_json(JsonBuilder* builder, MacroArray* macros) { + jsonbuilder_array_start(builder); + for (size_t i = 0; i < macros->len; ++i) { + jsonbuilder_array_element_start(builder); + macro_build_json(builder, ¯os->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); +} + +typedef struct { + TokenArray tokens; +} MacroArg; + +void macroarg_build_json(JsonBuilder* builder, MacroArg* arg) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "tokens"); + tokens_build_json(builder, &arg->tokens); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +typedef struct { + size_t len; + size_t capacity; + MacroArg* data; +} MacroArgArray; + +static MacroArgArray* macroargs_new() { + MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); + macroargs->len = 0; + macroargs->capacity = 2; + macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); + return macroargs; +} + +static void macroargs_reserve(MacroArgArray* macroargs, size_t size) { + if (size <= macroargs->capacity) + return; + while (macroargs->capacity < size) { + macroargs->capacity *= 2; + } + macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); + memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); +} + +static MacroArg* macroargs_push_new(MacroArgArray* macroargs) { + macroargs_reserve(macroargs, macroargs->len + 1); + return ¯oargs->data[macroargs->len++]; +} + +void macroargs_build_json(JsonBuilder* builder, MacroArgArray* macroargs) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "len"); + jsonbuilder_integer(builder, macroargs->len); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "data"); + jsonbuilder_array_start(builder); + for (size_t i = 0; i < macroargs->len; ++i) { + jsonbuilder_array_element_start(builder); + macroarg_build_json(builder, ¯oargs->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +typedef struct { + TokenArray* pp_tokens; + int pos; + MacroArray* macros; + int include_depth; + StrArray* include_paths; + StrArray* included_files; + bool generate_system_deps; + bool generate_user_deps; +} Preprocessor; + +static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, + StrArray* included_files, bool generate_system_deps, bool generate_user_deps); + +static Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros, + StrArray* include_paths, StrArray* included_files, bool generate_system_deps, + bool generate_user_deps) { + if (include_depth >= 32) { + fatal_error("include depth limit exceeded"); + } + + Preprocessor* pp = calloc(1, sizeof(Preprocessor)); + pp->pp_tokens = pp_tokens; + pp->macros = macros; + pp->include_depth = include_depth; + pp->include_paths = include_paths; + pp->included_files = included_files; + pp->generate_system_deps = generate_system_deps; + pp->generate_user_deps = generate_user_deps; + + return pp; +} + +static Token* pp_token_at(Preprocessor* pp, int i) { + return &pp->pp_tokens->data[i]; +} + +static Token* peek_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos); +} + +static Token* next_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos++); +} + +static void skip_pp_token(Preprocessor* pp, TokenKind expected) { + Token* tok = next_pp_token(pp); + assert(tok->kind == expected); +} + +static Token* consume_pp_token_if(Preprocessor* pp, TokenKind expected) { + if (peek_pp_token(pp)->kind == expected) { + return next_pp_token(pp); + } else { + return NULL; + } +} + +static Token* consume_pp_token_if_not(Preprocessor* pp, TokenKind non_expected) { + if (peek_pp_token(pp)->kind == non_expected) { + return NULL; + } else { + return next_pp_token(pp); + } +} + +static Token* expect_pp_token(Preprocessor* pp, TokenKind expected) { + Token* tok = next_pp_token(pp); + if (tok->kind == expected) { + return tok; + } + fatal_error("%s:%d: expected '%s', but got '%s'", tok->loc.filename, tok->loc.line, token_kind_stringify(expected), + token_stringify(tok)); +} + +static bool pp_eof(Preprocessor* pp) { + return peek_pp_token(pp)->kind == TokenKind_eof; +} + +static int find_macro(Preprocessor* pp, const char* name) { + for (size_t i = 0; i < pp->macros->len; ++i) { + if (pp->macros->data[i].kind == MacroKind_undef) + continue; + if (strcmp(pp->macros->data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static void undef_macro(Preprocessor* pp, int idx) { + pp->macros->data[idx].kind = MacroKind_undef; + // TODO: Can predefined macro like __FILE__ be undefined? +} + +static void skip_whitespaces(Preprocessor* pp) { + while (!pp_eof(pp) && consume_pp_token_if(pp, TokenKind_whitespace)) + ; +} + +static void skip_whitespaces_or_newlines(Preprocessor* pp, bool skip_newline) { + while (!pp_eof(pp) && (consume_pp_token_if(pp, TokenKind_whitespace) || + (skip_newline && consume_pp_token_if(pp, TokenKind_newline)))) + ; +} + +// It will not consume a new-line token. +static void seek_to_next_newline(Preprocessor* pp) { + while (!pp_eof(pp) && consume_pp_token_if_not(pp, TokenKind_newline)) + ; +} + +static void make_tokens_removed(Preprocessor* pp, int start, int end) { + for (int i = start; i < end; ++i) { + Token* tok = pp_token_at(pp, i); + tok->kind = TokenKind_removed; + tok->value.string = NULL; + } +} + +static Token* read_include_header_name(Preprocessor* pp) { + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_header_name) { + fatal_error("%s:%d: invalid #include, %s", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + return tok; +} + +static const char* resolve_include_name(Preprocessor* pp, const Token* include_name_token) { + const char* include_name = include_name_token->value.string; + if (include_name[0] == '"') { + char* current_filename = strdup(include_name_token->loc.filename); + const char* current_dir = dirname(current_filename); + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(current_dir) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", current_dir, (int)(strlen(include_name) - 2), include_name + 1); + return buf; + } else { + for (size_t i = 0; i < pp->include_paths->len; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; + } +} + +static const char* resolve_next_include_name(Preprocessor* pp, const Token* include_name_token) { + const char* include_name = include_name_token->value.string; + char* current_filename = strdup(include_name_token->loc.filename); + for (size_t i = 0; i < pp->include_paths->len; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); + if (strcmp(buf, current_filename) == 0) { + // #include_next skips the same file. + continue; + } + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; +} + +static int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { + size_t n_tokens_to_remove = dest_end - dest_start; + size_t n_tokens_after_dest = pp->pp_tokens->len - dest_end; + size_t shift_amount; + + if (n_tokens_to_remove < source_tokens->len) { + // Move existing tokens backward to make room. + shift_amount = source_tokens->len - n_tokens_to_remove; + tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); + memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len += shift_amount; + } else if (source_tokens->len < n_tokens_to_remove) { + // Move existing tokens forward to reduce room. + shift_amount = n_tokens_to_remove - source_tokens->len; + memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len -= shift_amount; + memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); + } + + memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); + + return dest_start + source_tokens->len; +} + +static int insert_pp_tokens(Preprocessor* pp, int dest_pos, TokenArray* source_tokens) { + return replace_pp_tokens(pp, dest_pos, dest_pos, source_tokens); +} + +static void replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { + TokenArray tokens; + tokens_init(&tokens, 1); + *tokens_push_new(&tokens) = *source_tok; + replace_pp_tokens(pp, dest, dest + 1, &tokens); +} + +static void expand_include_directive(Preprocessor* pp, const char* include_name, Token* original_include_name_tok) { + InFile* include_source = infile_open(include_name); + if (!include_source) { + fatal_error("%s:%d: cannot open include file: %s", original_include_name_tok->loc.filename, + original_include_name_tok->loc.line, token_stringify(original_include_name_tok)); + } + + TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros, pp->include_paths, + pp->included_files, pp->generate_system_deps, pp->generate_user_deps); + tokens_pop(include_pp_tokens); // pop EOF token + pp->pos = insert_pp_tokens(pp, pp->pos, include_pp_tokens); +} + +// ws ::= many0() +// macro-parameters ::= '(' opt( many0(',' )) ')' +static TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { + TokenArray* parameters = calloc(1, sizeof(TokenArray)); + tokens_init(parameters, 2); + + // '(' is consumed by caller. + skip_whitespaces(pp); + Token* tok = consume_pp_token_if(pp, TokenKind_ident); + if (tok) { + *tokens_push_new(parameters) = *tok; + skip_whitespaces(pp); + while (consume_pp_token_if(pp, TokenKind_comma)) { + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + *tokens_push_new(parameters) = *tok; + } + } + expect_pp_token(pp, TokenKind_paren_r); + + return parameters; +} + +// ws ::= many0() +// macro-arguments ::= '(' opt( many0(',' )) ')' +static MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp, bool skip_newline) { + MacroArgArray* args = macroargs_new(); + + skip_whitespaces_or_newlines(pp, skip_newline); + if (!consume_pp_token_if(pp, TokenKind_paren_l)) { + return NULL; + } + skip_whitespaces_or_newlines(pp, skip_newline); + Token* tok = peek_pp_token(pp); + if (!skip_newline && tok->kind == TokenKind_newline) { + expect_pp_token(pp, TokenKind_paren_r); + } + if (tok->kind != TokenKind_paren_r) { + MacroArg* arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 4); + + // Parse argument tokens, handling nested parentheses. + int nesting = 0; + while (true) { + tok = peek_pp_token(pp); + + if (nesting == 0) { + if (tok->kind == TokenKind_paren_r) { + break; + } + if (tok->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces_or_newlines(pp, skip_newline); + + arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 4); + continue; + } + } + + if (tok->kind == TokenKind_paren_l) { + nesting++; + } else if (tok->kind == TokenKind_paren_r) { + nesting--; + if (nesting < 0) { + break; + } + } + + tok = next_pp_token(pp); + if (tok->kind != TokenKind_removed) { + *tokens_push_new(&arg->tokens) = *tok; + } + + skip_whitespaces_or_newlines(pp, skip_newline); + } + } + expect_pp_token(pp, TokenKind_paren_r); + + return args; +} + +static Token* concat_two_tokens(Token* left, Token* right) { + StrBuilder builder; + strbuilder_init(&builder); + + // Left + if (left->kind == TokenKind_ident) { + strbuilder_append_string(&builder, left->value.string); + } else if (left->kind == TokenKind_literal_int) { + char buf[32]; + sprintf(buf, "%d", left->value.integer); + strbuilder_append_string(&builder, buf); + } else { + strbuilder_append_string(&builder, token_stringify(left)); + } + + // Right + if (right->kind == TokenKind_ident) { + strbuilder_append_string(&builder, right->value.string); + } else if (right->kind == TokenKind_literal_int) { + char buf[32]; + sprintf(buf, "%d", right->value.integer); + strbuilder_append_string(&builder, buf); + } else { + strbuilder_append_string(&builder, token_stringify(right)); + } + + // Concat + Token* result = calloc(1, sizeof(Token)); + + char* endptr; + int val = strtol(builder.buf, &endptr, 10); + if (*endptr == '\0') { + result->kind = TokenKind_literal_int; + result->value.integer = val; + } else { + result->kind = TokenKind_ident; + result->value.string = builder.buf; + } + + return result; +} + +static Token* stringify_tokens(TokenArray* tokens) { + StrBuilder builder; + strbuilder_init(&builder); + + bool prev_whitespace = false; + for (size_t i = 0; i < tokens->len; ++i) { + Token* tok = &tokens->data[i]; + if (tok->kind == TokenKind_whitespace) { + prev_whitespace = true; + continue; + } + + if (prev_whitespace && builder.len > 0) { + strbuilder_append_char(&builder, ' '); + } + prev_whitespace = false; + + const char* str = token_stringify(tok); + + // For string literals and char constants, we need to escape quotes. + if (tok->kind == TokenKind_literal_str || tok->kind == TokenKind_character_constant) { + for (const char* p = str; *p; ++p) { + if (*p == '\\' || *p == '"') { + strbuilder_append_char(&builder, '\\'); + } + strbuilder_append_char(&builder, *p); + } + } else { + strbuilder_append_string(&builder, str); + } + } + + Token* result = calloc(1, sizeof(Token)); + result->kind = TokenKind_literal_str; + result->value.string = builder.buf; + return result; +} + +typedef struct MacroExpansionContext { + // Stack of macro names that have been already expanded. + StrArray already_expanded; +} MacroExpansionContext; + +MacroExpansionContext* macroexpansioncontext_new() { + MacroExpansionContext* ctx = calloc(1, sizeof(MacroExpansionContext)); + strings_init(&ctx->already_expanded); + return ctx; +} + +static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx); + +static void expand_macro_arg(Preprocessor* pp, MacroArg* arg, bool skip_newline, MacroExpansionContext* ctx) { + tokens_push_new(&arg->tokens)->kind = TokenKind_eof; + + Preprocessor* pp2 = preprocessor_new(&arg->tokens, pp->include_depth, pp->macros, pp->include_paths, + pp->included_files, pp->generate_system_deps, pp->generate_user_deps); + + size_t arg_token_count = arg->tokens.len; + size_t processed_token_count = 0; + while (processed_token_count < arg_token_count) { + if (peek_pp_token(pp2)->kind == TokenKind_ident) { + processed_token_count += expand_macro(pp2, skip_newline, ctx); + } else { + next_pp_token(pp2); + processed_token_count += 1; + } + } + + tokens_pop(&arg->tokens); +} + +static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx) { + if (ctx == NULL) { + ctx = macroexpansioncontext_new(); + } + + int macro_name_pos = pp->pos; + Token* macro_name = peek_pp_token(pp); + const char* macro_name_str = macro_name->value.string; + + // Supress expansion if the macro has already been expanded. + for (size_t i = 0; i < ctx->already_expanded.len; ++i) { + if (strcmp(ctx->already_expanded.data[i], macro_name->value.string) == 0) { + next_pp_token(pp); + return 1; + } + } + + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx == -1) { + next_pp_token(pp); + return 1; + } + + SourceLocation original_loc = macro_name->loc; + size_t token_count_before_expansion; + size_t token_count_after_expansion; + Macro* macro = &pp->macros->data[macro_idx]; + if (macro->kind == MacroKind_func) { + next_pp_token(pp); + MacroArgArray* args = pp_parse_macro_arguments(pp, skip_newline); + if (!args) { + // If function-like macro name is not followed by opening parenthesis, it is not a macro invocation. + return pp->pos - macro_name_pos; + } + token_count_before_expansion = pp->pos - macro_name_pos; + replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); + + // Operands of # and ## operators should not be expanded. + bool* no_expand = calloc(macro->parameters.len, sizeof(bool)); + for (size_t i = 0; i < macro->replacements.len; ++i) { + TokenKind kind = macro->replacements.data[i].kind; + if (kind == TokenKind_hashhash) { + Token* lhs = NULL; + for (int j = i - 1; j >= 0; --j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + lhs = ¯o->replacements.data[j]; + break; + } + } + Token* rhs = NULL; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + rhs = ¯o->replacements.data[j]; + break; + } + } + if (lhs) { + int param1 = macro_find_param(macro, lhs); + if (param1 != -1) { + no_expand[param1] = true; + } + } + if (rhs) { + int param2 = macro_find_param(macro, rhs); + if (param2 != -1) { + no_expand[param2] = true; + } + } + } else if (kind == TokenKind_hash) { + Token* operand = NULL; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + operand = ¯o->replacements.data[j]; + break; + } + } + if (operand) { + int param = macro_find_param(macro, operand); + if (param != -1) { + no_expand[param] = true; + } + } + } + } + + // Argument expansion + for (size_t i = 0; i < args->len; ++i) { + if (no_expand[i]) + continue; + MacroArg* arg = &args->data[i]; + expand_macro_arg(pp, arg, skip_newline, ctx); + } + + // Parameter substitution + size_t token_count = 0; + size_t offset = 0; + for (size_t i = 0; i < macro->replacements.len; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i + offset); + + // Handle # operator (stringification) + if (tok->kind == TokenKind_hash) { + size_t param_idx_in_replacements = 0; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + param_idx_in_replacements = j; + break; + } + } + + Token* param_tok = ¯o->replacements.data[param_idx_in_replacements]; + int macro_param_idx = macro_find_param(macro, param_tok); + if (macro_param_idx != -1) { + Token* stringified = stringify_tokens(&args->data[macro_param_idx].tokens); + + // Replace ('#' * ) with stringified token. + TokenArray single_token; + tokens_init(&single_token, 1); + *tokens_push_new(&single_token) = *stringified; + + size_t tokens_to_replace = param_idx_in_replacements - i + 1; + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + tokens_to_replace, + &single_token); + token_count += 1; + offset += 1 - tokens_to_replace; + i = param_idx_in_replacements; // Skip to after the parameter + continue; + } + } + + int macro_param_idx = macro_find_param(macro, tok); + if (macro_param_idx != -1) { + size_t arg_token_count = args->data[macro_param_idx].tokens.len; + if (arg_token_count == 0) { + // Empty argument: insert a placemarker token + TokenArray placemarker_token; + tokens_init(&placemarker_token, 1); + Token* pm = tokens_push_new(&placemarker_token); + pm->kind = TokenKind_placemarker; + pm->loc = tok->loc; + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, + &placemarker_token); + token_count += 1; + // offset stays the same (1 - 1 = 0) + } else { + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, + &args->data[macro_param_idx].tokens); + token_count += arg_token_count; + offset += arg_token_count - 1; + } + } else { + ++token_count; + } + } + + // Handle ## operator + size_t token_count2 = 0; + for (size_t i = 0; i < token_count; ++i) { + int pos = macro_name_pos + i; + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_hashhash) { + // Concatenate previous and next tokens + int lhs_pos = -1; + for (int j = pos - 1; j >= macro_name_pos; --j) { + if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { + lhs_pos = j; + break; + } + } + int rhs_pos = -1; + for (size_t j = pos + 1; j < macro_name_pos + token_count; ++j) { + if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { + rhs_pos = j; + break; + } + } + if (lhs_pos == -1 || rhs_pos == -1) { + fatal_error("%s:%d: invalid usage of ## operator", tok->loc.filename, tok->loc.line); + } + + Token* lhs_tok = pp_token_at(pp, lhs_pos); + Token* rhs_tok = pp_token_at(pp, rhs_pos); + bool lhs_is_placemarker = lhs_tok->kind == TokenKind_placemarker; + bool rhs_is_placemarker = rhs_tok->kind == TokenKind_placemarker; + + TokenArray result_tokens; + tokens_init(&result_tokens, 1); + + if (lhs_is_placemarker && rhs_is_placemarker) { + // Both are placemarkers: result is a placemarker + Token* pm = tokens_push_new(&result_tokens); + pm->kind = TokenKind_placemarker; + pm->loc = tok->loc; + } else if (lhs_is_placemarker) { + // Left is placemarker: result is the right token + *tokens_push_new(&result_tokens) = *rhs_tok; + } else if (rhs_is_placemarker) { + // Right is placemarker: result is the left token + *tokens_push_new(&result_tokens) = *lhs_tok; + } else { + // Neither is placemarker: concatenate them + Token* concatenated = concat_two_tokens(lhs_tok, rhs_tok); + *tokens_push_new(&result_tokens) = *concatenated; + } + + replace_pp_tokens(pp, lhs_pos, rhs_pos + 1, &result_tokens); + token_count -= rhs_pos - lhs_pos; + i -= pos - lhs_pos; + token_count2 -= pos - lhs_pos - 1; + } else { + ++token_count2; + } + } + + // Remove placemarker tokens after ## processing + for (size_t i = 0; i < token_count2; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i); + if (tok->kind == TokenKind_placemarker) { + tok->kind = TokenKind_removed; + } + } + + // Inherit a source location from the original macro token. + for (size_t i = 0; i < token_count2; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + token_count_after_expansion = token_count2; + } else if (macro->kind == MacroKind_obj) { + replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); + // Inherit a source location from the original macro token. + for (size_t i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + token_count_before_expansion = 1; + token_count_after_expansion = macro->replacements.len; + } else if (macro->kind == MacroKind_builtin_file) { + Token file_tok; + file_tok.kind = TokenKind_literal_str; + file_tok.value.string = macro_name->loc.filename; + file_tok.loc.filename = NULL; + file_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &file_tok); + token_count_before_expansion = 1; + token_count_after_expansion = 1; + } else if (macro->kind == MacroKind_builtin_line) { + Token line_tok; + line_tok.kind = TokenKind_literal_int; + line_tok.value.integer = macro_name->loc.line; + line_tok.loc.filename = NULL; + line_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &line_tok); + token_count_before_expansion = 1; + token_count_after_expansion = 1; + } else { + unreachable(); + } + + // Recursive expansion. + strings_push(&ctx->already_expanded, macro_name_str); + pp->pos = macro_name_pos; + size_t processed_token_count = 0; + while (processed_token_count < token_count_after_expansion) { + if (peek_pp_token(pp)->kind == TokenKind_ident) { + processed_token_count += expand_macro(pp, skip_newline, ctx); + } else { + next_pp_token(pp); + processed_token_count += 1; + } + } + strings_pop(&ctx->already_expanded); + + return token_count_before_expansion; +} + +typedef enum { + GroupDelimiterKind_normal, + GroupDelimiterKind_after_if_directive, + GroupDelimiterKind_after_else_directive, +} GroupDelimiterKind; + +static bool is_delimiter_of_current_group(GroupDelimiterKind delimiter_kind, TokenKind token_kind) { + if (delimiter_kind == GroupDelimiterKind_normal) { + return token_kind == TokenKind_eof; + } else if (delimiter_kind == GroupDelimiterKind_after_if_directive) { + return token_kind == TokenKind_pp_directive_elif || token_kind == TokenKind_pp_directive_elifdef || + token_kind == TokenKind_pp_directive_elifndef || token_kind == TokenKind_pp_directive_else || + token_kind == TokenKind_pp_directive_endif; + } else if (delimiter_kind == GroupDelimiterKind_after_else_directive) { + return token_kind == TokenKind_pp_directive_endif; + } else { + unreachable(); + } +} + +static int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); +static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include); + +static bool preprocess_if_group_or_elif_group(Preprocessor* pp, bool did_include) { + Token* directive = next_pp_token(pp); + + if (directive->kind == TokenKind_pp_directive_if || directive->kind == TokenKind_pp_directive_elif) { + int condition_expr_start_pos = pp->pos; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } else if (tok->kind == TokenKind_ident) { + if (strcmp(tok->value.string, "defined") == 0) { + int defined_pos = pp->pos; + // 'defined' * '(' * * ')' + // 'defined' * + skip_pp_token(pp, TokenKind_ident); + skip_whitespaces(pp); + Token* macro_name; + if (consume_pp_token_if(pp, TokenKind_paren_l)) { + skip_whitespaces(pp); + macro_name = expect_pp_token(pp, TokenKind_ident); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_paren_r); + } else { + macro_name = expect_pp_token(pp, TokenKind_ident); + } + bool is_defined = find_macro(pp, macro_name->value.string) != -1; + TokenArray defined_results; + tokens_init(&defined_results, 1); + Token* defined_result = tokens_push_new(&defined_results); + defined_result->kind = TokenKind_literal_int; + defined_result->value.integer = is_defined; + pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); + } else { + expand_macro(pp, false, NULL); + } + } else { + next_pp_token(pp); + } + } + + // all remaining identifiers other than true (including those lexically identical to keywords such as false) are + // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is + // converted into a token. + for (int pos = condition_expr_start_pos; pos < pp->pos; ++pos) { + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_ident) { + bool is_true = strcmp(tok->value.string, "true") == 0; + tok->kind = TokenKind_literal_int; + tok->value.integer = is_true; + } + } + + int condition_expr_tokens_len = pp->pos - condition_expr_start_pos; + TokenArray condition_expr_tokens; + // +1 to add EOF token at the end. + tokens_init(&condition_expr_tokens, condition_expr_tokens_len + 1); + for (int i = 0; i < condition_expr_tokens_len; ++i) { + *tokens_push_new(&condition_expr_tokens) = *pp_token_at(pp, condition_expr_start_pos + i); + } + Token* eof_tok = tokens_push_new(&condition_expr_tokens); + eof_tok->kind = TokenKind_eof; + + bool do_include = pp_eval_constant_expr(&condition_expr_tokens) && !did_include; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else if (directive->kind == TokenKind_pp_directive_ifdef || directive->kind == TokenKind_pp_directive_elifdef) { + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (!macro_name) { + fatal_error(""); + } + + bool do_include = !did_include && find_macro(pp, macro_name->value.string) != -1; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else if (directive->kind == TokenKind_pp_directive_ifndef || directive->kind == TokenKind_pp_directive_elifndef) { + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (!macro_name) { + fatal_error(""); + } + + bool do_include = !did_include && find_macro(pp, macro_name->value.string) == -1; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else { + unreachable(); + } +} + +static bool preprocess_if_group(Preprocessor* pp) { + return preprocess_if_group_or_elif_group(pp, false); +} + +static bool preprocess_elif_group(Preprocessor* pp, bool did_include) { + return preprocess_if_group_or_elif_group(pp, did_include); +} + +// elif-groups: +// { elif-group }+ +static bool preprocess_elif_groups_opt(Preprocessor* pp, bool did_include) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || + tok->kind == TokenKind_pp_directive_elifndef) { + did_include |= preprocess_elif_group(pp, did_include); + } else { + break; + } + } + return did_include; +} + +// else-group: +// '#' 'else' group? +static void preprocess_else_group(Preprocessor* pp, bool did_include) { + skip_pp_token(pp, TokenKind_pp_directive_else); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + + include_conditionally(pp, GroupDelimiterKind_after_else_directive, !did_include); +} + +// endif-line: +// '#' 'endif' new-line +static void preprocess_endif_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_endif); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); +} + +// if-section: +// if-group elif-groups? else-group? endif-line +static void preprocess_if_section(Preprocessor* pp) { + bool did_include = preprocess_if_group(pp); + did_include = preprocess_elif_groups_opt(pp, did_include); + if (peek_pp_token(pp)->kind == TokenKind_pp_directive_else) { + preprocess_else_group(pp, did_include); + } + preprocess_endif_directive(pp); +} + +static void preprocess_include_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_include); + skip_whitespaces(pp); + Token* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, + token_stringify(include_name)); + } + + if ((pp->generate_system_deps && include_name->value.string[0] == '<') || + (pp->generate_user_deps && include_name->value.string[0] == '"')) { + bool already_included = false; + for (size_t i = 0; i < pp->included_files->len; ++i) { + if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { + already_included = true; + break; + } + } + if (!already_included) { + strings_push(pp->included_files, include_name_resolved); + } + } + + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + expand_include_directive(pp, include_name_resolved, include_name); +} + +// #include_next is a part of GNU extension. +// https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html +static void preprocess_include_next_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_include_next); + skip_whitespaces(pp); + Token* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_next_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, + token_stringify(include_name)); + } + + if (include_name->value.string[0] == '"') { + bool already_included = false; + for (size_t i = 0; i < pp->included_files->len; ++i) { + if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { + already_included = true; + break; + } + } + if (!already_included) { + strings_push(pp->included_files, include_name_resolved); + } + } + + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + expand_include_directive(pp, include_name_resolved, include_name); +} + +static void preprocess_embed_directive(Preprocessor*) { + unimplemented(); +} + +static void preprocess_define_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_define); + skip_whitespaces(pp); + Token* macro_name = expect_pp_token(pp, TokenKind_ident); + + if (consume_pp_token_if(pp, TokenKind_paren_l)) { + TokenArray* parameters = pp_parse_macro_parameters(pp); + skip_whitespaces(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_func; + macro->name = macro_name->value.string; + macro->parameters = *parameters; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + // Remove trailing whitespaces. + for (int i = n_replacements - 1; i >= 0; --i) { + if (macro->replacements.data[i].kind == TokenKind_whitespace) { + tokens_pop(¯o->replacements); + } else { + break; + } + } + } else { + skip_whitespaces(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_obj; + macro->name = macro_name->value.string; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + // Remove trailing whitespaces. + for (int i = n_replacements - 1; i >= 0; --i) { + if (macro->replacements.data[i].kind == TokenKind_whitespace) { + tokens_pop(¯o->replacements); + } else { + break; + } + } + } + expect_pp_token(pp, TokenKind_newline); +} + +static void preprocess_undef_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_undef); + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (macro_name) { + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx != -1) { + undef_macro(pp, macro_idx); + } + } +} + +static void preprocess_line_directive(Preprocessor*) { + unimplemented(); +} + +// control-line: +// ... +// '#' 'error' pp-tokens? new-line +static void preprocess_error_directive(Preprocessor* pp) { + // The C23 standard does not specify format of diagnostic message caused by #error. + // Ducc assumes that #error takes exactly one argument consisting of a string literal. + // TODO: output some general message or something else if not. + skip_pp_token(pp, TokenKind_pp_directive_error); + skip_whitespaces(pp); + Token* msg = expect_pp_token(pp, TokenKind_literal_str); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + fatal_error("%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); +} + +// control-line: +// ... +// '#' 'warning' pp-tokens? new-line +static void preprocess_warning_directive(Preprocessor* pp) { + // The C23 standard does not specify format of diagnostic message caused by #warning. + // Ducc assumes that #warning takes exactly one argument consisting of a string literal. + // TODO: output some general message or something else if not. + skip_pp_token(pp, TokenKind_pp_directive_warning); + skip_whitespaces(pp); + Token* msg = expect_pp_token(pp, TokenKind_literal_str); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + fprintf(stderr, "%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); +} + +static void preprocess_pragma_directive(Preprocessor* pp) { + // Ignore all #pragma directives for now. + skip_pp_token(pp, TokenKind_pp_directive_pragma); + seek_to_next_newline(pp); + skip_pp_token(pp, TokenKind_newline); +} + +static void preprocess_nop_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_nop); +} + +static void preprocess_non_directive_directive(Preprocessor* pp) { + Token* tok = peek_pp_token(pp); + // C23 6.10.1.13: + // The execution of a non-directive preprocessing directive results in undefined behavior. + fatal_error("%s:%d: invalid preprocessing directive, '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); +} + +static void preprocess_text_line(Preprocessor* pp) { + while (!pp_eof(pp)) { + if (consume_pp_token_if(pp, TokenKind_newline)) { + return; + } + if (consume_pp_token_if_not(pp, TokenKind_ident)) { + continue; + } + + expand_macro(pp, true, NULL); + } + expect_pp_token(pp, TokenKind_newline); +} + +// group-part: +// if-section +// control-line +// '#' non-directive +// text-line +// +// control-line: +// '#' 'include' ... +// '#' 'embed' ... +// '#' 'define' ... +// '#' 'undef' ... +// '#' 'line' ... +// '#' 'error' ... +// '#' 'warning' ... +// '#' 'pragma' ... +// '#' new-line +static void preprocess_group_part(Preprocessor* pp) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || + tok->kind == TokenKind_pp_directive_ifndef) { + preprocess_if_section(pp); + } else if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || + tok->kind == TokenKind_pp_directive_elifndef || tok->kind == TokenKind_pp_directive_else || + tok->kind == TokenKind_pp_directive_endif) { + fatal_error("%s:%d: unexpected '%s'; no corresponding '#if'*", tok->loc.filename, tok->loc.line, + token_kind_stringify(tok->kind)); + } else if (tok->kind == TokenKind_pp_directive_include) { + preprocess_include_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_include_next) { + preprocess_include_next_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_embed) { + preprocess_embed_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_define) { + preprocess_define_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_undef) { + preprocess_undef_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_line) { + preprocess_line_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_error) { + preprocess_error_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_warning) { + preprocess_warning_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_pragma) { + preprocess_pragma_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_nop) { + preprocess_nop_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_non_directive) { + preprocess_non_directive_directive(pp); + } else { + preprocess_text_line(pp); + } +} + +// group: +// { group-part }+ +static void preprocess_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (is_delimiter_of_current_group(delimiter_kind, tok->kind)) + return; + preprocess_group_part(pp); + } + + if (delimiter_kind != GroupDelimiterKind_normal) { + expect_pp_token(pp, TokenKind_pp_directive_endif); + } +} + +static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { + assert(delimiter_kind != GroupDelimiterKind_normal); + int nesting = 0; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (nesting == 0 && is_delimiter_of_current_group(delimiter_kind, tok->kind)) { + return; + } + if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || + tok->kind == TokenKind_pp_directive_ifndef) { + ++nesting; + } else if (tok->kind == TokenKind_pp_directive_endif) { + --nesting; + } + int first_pos = pp->pos; + seek_to_next_newline(pp); + expect_pp_token(pp, TokenKind_newline); + make_tokens_removed(pp, first_pos, pp->pos); + } + + expect_pp_token(pp, TokenKind_pp_directive_endif); +} + +static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include) { + if (do_include) { + preprocess_group_opt(pp, delimiter_kind); + } else { + skip_group_opt(pp, delimiter_kind); + } +} + +// preprocessing-file: +// group? +static void preprocess_preprocessing_file(Preprocessor* pp) { + preprocess_group_opt(pp, GroupDelimiterKind_normal); +} + +static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) { + seek_to_next_newline(pp); + skip_pp_token(pp, TokenKind_newline); + make_tokens_removed(pp, directive_token_pos, pp->pos); +} + +static void remove_pp_directives(Preprocessor* pp) { + pp->pos = 0; + while (!pp_eof(pp)) { + if (is_pp_directive(peek_pp_token(pp)->kind)) { + remove_pp_directive(pp, pp->pos); + } else { + next_pp_token(pp); + } + } +} + +static char* get_ducc_include_path() { + const char* self_dir = get_self_dir(); + char* buf = calloc(strlen(self_dir) + strlen("/../include") + 1, sizeof(char)); + sprintf(buf, "%s/../include", self_dir); + return buf; +} + +static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, + StrArray* included_files, bool generate_system_deps, bool generate_user_deps) { + TokenArray* pp_tokens = tokenize(src); + Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros, include_paths, included_files, generate_system_deps, + generate_user_deps); + + preprocess_preprocessing_file(pp); + remove_pp_directives(pp); + return pp->pp_tokens; +} + +TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, + bool generate_system_deps, bool generate_user_deps) { + MacroArray* macros = macros_new(); + add_predefined_macros(macros); + add_user_defines(macros, user_defines); + strings_push(included_files, src->loc.filename); + + StrArray* include_paths = calloc(1, sizeof(StrArray)); + strings_init(include_paths); + + // Ducc's built-in headers has highest priority. + strings_push(include_paths, get_ducc_include_path()); + + for (size_t i = 0; i < user_include_dirs->len; ++i) { + strings_push(include_paths, user_include_dirs->data[i]); + } + strings_push(include_paths, "/usr/local/include"); + strings_push(include_paths, "/usr/include/x86_64-linux-gnu"); + strings_push(include_paths, "/usr/include"); + + return do_preprocess(src, 0, macros, include_paths, included_files, generate_system_deps, generate_user_deps); +} + +void concat_adjacent_string_literals(TokenArray* pp_tokens) { + size_t last_nonempty_token_pos = 0; + TokenKind last_nonempty_token_kind = TokenKind_eof; + for (size_t pos = 0; pos < pp_tokens->len; ++pos) { + Token* pp_tok = &pp_tokens->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + if (k == TokenKind_literal_str && last_nonempty_token_kind == TokenKind_literal_str) { + // Concatenate adjacent string literals. + Token* last_pp_tok = &pp_tokens->data[last_nonempty_token_pos]; + const char* s1 = last_pp_tok->value.string; + size_t l1 = strlen(s1); + const char* s2 = pp_tok->value.string; + size_t l2 = strlen(s2); + char* buf = calloc(l1 + l2 + 1, sizeof(char)); + memcpy(buf, s1, l1); + memcpy(buf + l1, s2, l2); + last_pp_tok->value.string = buf; + pp_tok->kind = TokenKind_removed; + } else { + last_nonempty_token_pos = pos; + last_nonempty_token_kind = k; + } + } +} + +void print_token_to_file(FILE* out, TokenArray* pp_tokens) { + for (size_t i = 0; i < pp_tokens->len; ++i) { + Token* tok = &pp_tokens->data[i]; + + if (tok->kind == TokenKind_whitespace) { + // TODO: preserve indent? + fprintf(out, " "); + } else if (tok->kind == TokenKind_removed) { + // Output nothing for removed tokens + } else if (tok->kind == TokenKind_newline) { + // TODO: remove adjacent newlines? + fprintf(out, "\n"); + } else if (tok->kind != TokenKind_eof) { + // TODO: string literal + fprintf(out, "%s", token_stringify(tok)); + // Add space after token if next token is not punctuation + // TODO: apply stricter approach + if (i + 1 < pp_tokens->len) { + Token* next = &pp_tokens->data[i + 1]; + if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace && + next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma && + next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r && + next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r && + next->kind != TokenKind_dot) { + fprintf(out, " "); + } + } + } + } +} diff --git a/src/cc1/preprocess.h b/src/cc1/preprocess.h new file mode 100644 index 0000000..8d86fe5 --- /dev/null +++ b/src/cc1/preprocess.h @@ -0,0 +1,13 @@ +#ifndef DUCC_PREPROCESS_H +#define DUCC_PREPROCESS_H + +#include "../lib/common.h" +#include "io.h" +#include "token.h" + +TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, + bool generate_system_deps, bool generate_user_deps); +void concat_adjacent_string_literals(TokenArray* pp_tokens); +void print_token_to_file(FILE* output_file, TokenArray* pp_tokens); + +#endif diff --git a/src/cc1/sys.c b/src/cc1/sys.c new file mode 100644 index 0000000..693062a --- /dev/null +++ b/src/cc1/sys.c @@ -0,0 +1,21 @@ +#include "sys.h" +#include +#include +#include +#include "../lib/ducc.h" + +static char* get_self_path() { + char* buf = calloc(PATH_MAX, sizeof(char)); + ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1); + if (len == -1 || len == PATH_MAX - 1) { + return NULL; + } + buf[len] = '\0'; + return buf; +} + +// It returns a path not including final / except for root directory. +char* get_self_dir() { + char* path = get_self_path(); + return dirname(path); +} diff --git a/src/cc1/sys.h b/src/cc1/sys.h new file mode 100644 index 0000000..2527724 --- /dev/null +++ b/src/cc1/sys.h @@ -0,0 +1,7 @@ +#ifndef DUCC_SYS_H +#define DUCC_SYS_H + +// It returns a path not including final / except for root directory. +char* get_self_dir(); + +#endif diff --git a/src/cc1/token.c b/src/cc1/token.c new file mode 100644 index 0000000..480a9a5 --- /dev/null +++ b/src/cc1/token.c @@ -0,0 +1,387 @@ +#include "token.h" +#include "../lib/common.h" +#include "../lib/json.h" + +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) + return ""; + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) + return ""; + else if (k == TokenKind_removed) + return ""; + else if (k == TokenKind_placemarker) + return ""; + else if (k == TokenKind_newline) + return ""; + else if (k == TokenKind_other) + return ""; + else if (k == TokenKind_character_constant) + return ""; + else if (k == TokenKind_header_name) + return ""; + else if (k == TokenKind_pp_directive_define) + return "#define"; + else if (k == TokenKind_pp_directive_elif) + return "#elif"; + else if (k == TokenKind_pp_directive_elifdef) + return "#elifdef"; + else if (k == TokenKind_pp_directive_elifndef) + return "#elifndef"; + else if (k == TokenKind_pp_directive_else) + return "#else"; + else if (k == TokenKind_pp_directive_embed) + return "#embed"; + else if (k == TokenKind_pp_directive_endif) + return "#endif"; + else if (k == TokenKind_pp_directive_error) + return "#error"; + else if (k == TokenKind_pp_directive_if) + return "#if"; + else if (k == TokenKind_pp_directive_ifdef) + return "#ifdef"; + else if (k == TokenKind_pp_directive_ifndef) + return "#ifndef"; + else if (k == TokenKind_pp_directive_include) + return "#include"; + else if (k == TokenKind_pp_directive_include_next) + return "#include_next"; + else if (k == TokenKind_pp_directive_line) + return "#line"; + else if (k == TokenKind_pp_directive_non_directive) + return "#"; + else if (k == TokenKind_pp_directive_nop) + return "#"; + else if (k == TokenKind_pp_directive_pragma) + return "#pragma"; + else if (k == TokenKind_pp_directive_undef) + return "#undef"; + else if (k == TokenKind_pp_directive_warning) + return "#warning"; + else if (k == TokenKind_pp_operator_defined) + return "defined"; + else if (k == TokenKind_pp_operator___has_c_attribute) + return "__has_c_attribute"; + else if (k == TokenKind_pp_operator___has_embed) + return "__has_embed"; + else if (k == TokenKind_pp_operator___has_include) + return "__has_include"; + else if (k == TokenKind_keyword_alignas) + return "alignas"; + else if (k == TokenKind_keyword_alignof) + return "alignof"; + else if (k == TokenKind_keyword_auto) + return "auto"; + else if (k == TokenKind_keyword_bool) + return "bool"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_case) + return "case"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_constexpr) + return "constexpr"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_default) + return "default"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_double) + return "double"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_false) + return "false"; + else if (k == TokenKind_keyword_float) + return "float"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_goto) + return "goto"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_inline) + return "inline"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_nullptr) + return "nullptr"; + else if (k == TokenKind_keyword_register) + return "register"; + else if (k == TokenKind_keyword_restrict) + return "restrict"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_signed) + return "signed"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_static) + return "static"; + else if (k == TokenKind_keyword_static_assert) + return "static_assert"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_switch) + return "switch"; + else if (k == TokenKind_keyword_thread_local) + return "thread_local"; + else if (k == TokenKind_keyword_true) + return "true"; + else if (k == TokenKind_keyword_typedef) + return "typedef"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_typeof_unqual) + return "typeof_unqual"; + else if (k == TokenKind_keyword_union) + return "union"; + else if (k == TokenKind_keyword_unsigned) + return "unsigned"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_volatile) + return "volatile"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_keyword__Atomic) + return "_Atomic"; + else if (k == TokenKind_keyword__BitInt) + return "_BitInt"; + else if (k == TokenKind_keyword__Complex) + return "_Complex"; + else if (k == TokenKind_keyword__Decimal128) + return "_Decimal128"; + else if (k == TokenKind_keyword__Decimal32) + return "_Decimal32"; + else if (k == TokenKind_keyword__Decimal64) + return "_Decimal64"; + else if (k == TokenKind_keyword__Generic) + return "_Generic"; + else if (k == TokenKind_keyword__Imaginary) + return "_Imaginary"; + else if (k == TokenKind_keyword__Noreturn) + return "_Noreturn"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_and) + return "&="; + else if (k == TokenKind_assign_div) + return "/="; + else if (k == TokenKind_assign_lshift) + return "<<="; + else if (k == TokenKind_assign_mod) + return "%="; + else if (k == TokenKind_assign_mul) + return "*="; + else if (k == TokenKind_assign_or) + return "|="; + else if (k == TokenKind_assign_rshift) + return ">>="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_assign_xor) + return "^="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_colon) + return ":"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return ""; + else if (k == TokenKind_le) + return "<="; + else if (k == TokenKind_literal_int) + return ""; + else if (k == TokenKind_literal_double) + return ""; + else if (k == TokenKind_literal_str) + return ""; + else if (k == TokenKind_lshift) + return "<<"; + else if (k == TokenKind_lt) + return "<"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_question) + return "?"; + else if (k == TokenKind_rshift) + return ">>"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_tilde) + return "~"; + else if (k == TokenKind_xor) + return "^"; + else + unreachable(); +} + +bool is_pp_directive(TokenKind k) { + return k == TokenKind_pp_directive_define || k == TokenKind_pp_directive_elif || + k == TokenKind_pp_directive_elifdef || k == TokenKind_pp_directive_elifndef || + k == TokenKind_pp_directive_else || k == TokenKind_pp_directive_embed || k == TokenKind_pp_directive_endif || + k == TokenKind_pp_directive_error || k == TokenKind_pp_directive_if || k == TokenKind_pp_directive_ifdef || + k == TokenKind_pp_directive_ifndef || k == TokenKind_pp_directive_include || + k == TokenKind_pp_directive_include_next || k == TokenKind_pp_directive_line || + k == TokenKind_pp_directive_non_directive || k == TokenKind_pp_directive_nop || + k == TokenKind_pp_directive_pragma || k == TokenKind_pp_directive_undef || + k == TokenKind_pp_directive_warning; +} + +const char* token_stringify(Token* tok) { + TokenKind k = tok->kind; + if (k == TokenKind_pp_directive_non_directive) { + char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char)); + sprintf(buf, "#%s", tok->value.string); + return buf; + } else if (k == TokenKind_literal_int) { + char* buf = calloc(10, sizeof(char)); + sprintf(buf, "%d", tok->value.integer); + return buf; + } else if (k == TokenKind_literal_double) { + char* buf = calloc(32, sizeof(char)); + sprintf(buf, "%g", tok->value.floating); + return buf; + } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || + k == TokenKind_literal_str || k == TokenKind_header_name) { + return tok->value.string; + } else { + return token_kind_stringify(k); + } +} + +void token_build_json(JsonBuilder* builder, Token* tok) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "kind"); + jsonbuilder_string(builder, token_kind_stringify(tok->kind)); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "value"); + if (tok->kind == TokenKind_pp_directive_non_directive) { + char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char)); + sprintf(buf, "#%s", tok->value.string); + jsonbuilder_string(builder, buf); + } else if (tok->kind == TokenKind_literal_int) { + jsonbuilder_integer(builder, tok->value.integer); + } else if (tok->kind == TokenKind_other || tok->kind == TokenKind_character_constant || + tok->kind == TokenKind_ident || tok->kind == TokenKind_literal_str) { + jsonbuilder_string(builder, tok->value.string); + } else { + jsonbuilder_null(builder); + } + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "loc"); + sourcelocation_build_json(builder, &tok->loc); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +void tokens_init(TokenArray* tokens, size_t capacity) { + tokens->len = 0; + tokens->capacity = capacity; + tokens->data = calloc(tokens->capacity, sizeof(Token)); +} + +void tokens_reserve(TokenArray* tokens, size_t size) { + if (size <= tokens->capacity) + return; + while (tokens->capacity < size) { + tokens->capacity *= 2; + } + tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); + memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); +} + +Token* tokens_push_new(TokenArray* tokens) { + tokens_reserve(tokens, tokens->len + 1); + return &tokens->data[tokens->len++]; +} + +Token* tokens_pop(TokenArray* tokens) { + if (tokens->len == 0) { + return NULL; + } else { + return &tokens->data[--tokens->len]; + } +} + +void tokens_build_json(JsonBuilder* builder, TokenArray* tokens) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "len"); + jsonbuilder_integer(builder, tokens->len); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "data"); + jsonbuilder_array_start(builder); + for (size_t i = 0; i < tokens->len; ++i) { + jsonbuilder_array_element_start(builder); + token_build_json(builder, &tokens->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} diff --git a/src/cc1/token.h b/src/cc1/token.h new file mode 100644 index 0000000..46490c1 --- /dev/null +++ b/src/cc1/token.h @@ -0,0 +1,183 @@ +#ifndef DUCC_TOKEN_H +#define DUCC_TOKEN_H + +#include "../lib/ducc.h" +#include "io.h" + +typedef enum { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_removed, + TokenKind_placemarker, + TokenKind_newline, + TokenKind_other, + TokenKind_character_constant, + TokenKind_header_name, + TokenKind_pp_directive_define, + TokenKind_pp_directive_elif, + TokenKind_pp_directive_elifdef, + TokenKind_pp_directive_elifndef, + TokenKind_pp_directive_else, + TokenKind_pp_directive_embed, + TokenKind_pp_directive_endif, + TokenKind_pp_directive_error, + TokenKind_pp_directive_if, + TokenKind_pp_directive_ifdef, + TokenKind_pp_directive_ifndef, + TokenKind_pp_directive_include, + TokenKind_pp_directive_include_next, // GNU extension + TokenKind_pp_directive_line, + TokenKind_pp_directive_non_directive, + TokenKind_pp_directive_nop, + TokenKind_pp_directive_pragma, + TokenKind_pp_directive_undef, + TokenKind_pp_directive_warning, + TokenKind_pp_operator_defined, + TokenKind_pp_operator___has_c_attribute, + TokenKind_pp_operator___has_embed, + TokenKind_pp_operator___has_include, + + // C23: 6.4.1 + TokenKind_keyword_alignas, + TokenKind_keyword_alignof, + TokenKind_keyword_auto, + TokenKind_keyword_bool, + TokenKind_keyword_break, + TokenKind_keyword_case, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_constexpr, + TokenKind_keyword_continue, + TokenKind_keyword_default, + TokenKind_keyword_do, + TokenKind_keyword_double, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_false, + TokenKind_keyword_float, + TokenKind_keyword_for, + TokenKind_keyword_goto, + TokenKind_keyword_if, + TokenKind_keyword_inline, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_nullptr, + TokenKind_keyword_register, + TokenKind_keyword_restrict, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_signed, + TokenKind_keyword_sizeof, + TokenKind_keyword_static, + TokenKind_keyword_static_assert, + TokenKind_keyword_struct, + TokenKind_keyword_switch, + TokenKind_keyword_thread_local, + TokenKind_keyword_true, + TokenKind_keyword_typedef, + TokenKind_keyword_typeof, + TokenKind_keyword_typeof_unqual, + TokenKind_keyword_union, + TokenKind_keyword_unsigned, + TokenKind_keyword_void, + TokenKind_keyword_volatile, + TokenKind_keyword_while, + TokenKind_keyword__Atomic, + TokenKind_keyword__BitInt, + TokenKind_keyword__Complex, + TokenKind_keyword__Decimal128, + TokenKind_keyword__Decimal32, + TokenKind_keyword__Decimal64, + TokenKind_keyword__Generic, + TokenKind_keyword__Imaginary, + TokenKind_keyword__Noreturn, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_and, + TokenKind_assign_div, + TokenKind_assign_lshift, + TokenKind_assign_mod, + TokenKind_assign_mul, + TokenKind_assign_or, + TokenKind_assign_rshift, + TokenKind_assign_sub, + TokenKind_assign_xor, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_colon, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_le, + TokenKind_literal_int, + TokenKind_literal_double, + TokenKind_literal_str, + TokenKind_lshift, + TokenKind_lt, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_question, + TokenKind_rshift, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + TokenKind_tilde, + TokenKind_xor, +} TokenKind; + +const char* token_kind_stringify(TokenKind k); +bool is_pp_directive(TokenKind k); + +// TokenValue is externally tagged by Token's kind. +typedef union { + const char* string; + int integer; + double floating; +} TokenValue; + +typedef struct { + TokenKind kind; + TokenValue value; + SourceLocation loc; +} Token; + +const char* token_stringify(Token* tok); +void token_build_json(JsonBuilder* builder, Token* tok); + +typedef struct { + size_t len; + size_t capacity; + Token* data; +} TokenArray; + +void tokens_init(TokenArray* tokens, size_t capacity); +void tokens_reserve(TokenArray* tokens, size_t size); +Token* tokens_push_new(TokenArray* tokens); +Token* tokens_pop(TokenArray* tokens); +void tokens_build_json(JsonBuilder* builder, TokenArray* tokens); + +#endif diff --git a/src/cc1/tokenize.c b/src/cc1/tokenize.c new file mode 100644 index 0000000..78b1acb --- /dev/null +++ b/src/cc1/tokenize.c @@ -0,0 +1,597 @@ +#include "tokenize.h" +#include +#include "../lib/common.h" + +typedef struct { + InFile* src; + bool at_bol; + bool expect_header_name; + TokenArray* tokens; +} Lexer; + +static Lexer* lexer_new(InFile* src) { + Lexer* l = calloc(1, sizeof(Lexer)); + + l->src = src; + l->at_bol = true; + l->expect_header_name = false; + l->tokens = calloc(1, sizeof(TokenArray)); + tokens_init(l->tokens, 1024 * 16); + + return l; +} + +static void pplexer_tokenize_pp_directive(Lexer* l, Token* tok) { + // Skip whitespaces after '#'. + char c; + while (isspace((c = infile_peek_char(l->src)))) { + if (c == '\n') + break; + infile_next_char(l->src); + } + // '#' new-line + if (c == '\n') { + tok->kind = TokenKind_pp_directive_nop; + return; + } + + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + const char* pp_directive_name = builder.buf; + + if (builder.len == 0) { + tok->kind = TokenKind_hash; + } else if (strcmp(pp_directive_name, "define") == 0) { + tok->kind = TokenKind_pp_directive_define; + } else if (strcmp(pp_directive_name, "elif") == 0) { + tok->kind = TokenKind_pp_directive_elif; + } else if (strcmp(pp_directive_name, "elifdef") == 0) { + tok->kind = TokenKind_pp_directive_elifdef; + } else if (strcmp(pp_directive_name, "elifndef") == 0) { + tok->kind = TokenKind_pp_directive_elifndef; + } else if (strcmp(pp_directive_name, "else") == 0) { + tok->kind = TokenKind_pp_directive_else; + } else if (strcmp(pp_directive_name, "embed") == 0) { + tok->kind = TokenKind_pp_directive_embed; + } else if (strcmp(pp_directive_name, "endif") == 0) { + tok->kind = TokenKind_pp_directive_endif; + } else if (strcmp(pp_directive_name, "error") == 0) { + tok->kind = TokenKind_pp_directive_error; + } else if (strcmp(pp_directive_name, "if") == 0) { + tok->kind = TokenKind_pp_directive_if; + } else if (strcmp(pp_directive_name, "ifdef") == 0) { + tok->kind = TokenKind_pp_directive_ifdef; + } else if (strcmp(pp_directive_name, "ifndef") == 0) { + tok->kind = TokenKind_pp_directive_ifndef; + } else if (strcmp(pp_directive_name, "include") == 0) { + l->expect_header_name = true; + tok->kind = TokenKind_pp_directive_include; + } else if (strcmp(pp_directive_name, "include_next") == 0) { + l->expect_header_name = true; + tok->kind = TokenKind_pp_directive_include_next; + } else if (strcmp(pp_directive_name, "line") == 0) { + tok->kind = TokenKind_pp_directive_line; + } else if (strcmp(pp_directive_name, "pragma") == 0) { + tok->kind = TokenKind_pp_directive_pragma; + } else if (strcmp(pp_directive_name, "undef") == 0) { + tok->kind = TokenKind_pp_directive_undef; + } else if (strcmp(pp_directive_name, "warning") == 0) { + tok->kind = TokenKind_pp_directive_warning; + } else { + tok->kind = TokenKind_pp_directive_non_directive; + tok->value.string = pp_directive_name; + } +} + +static void do_tokenize_all(Lexer* l) { + while (!infile_eof(l->src)) { + Token* tok = tokens_push_new(l->tokens); + tok->loc = l->src->loc; + char c = infile_peek_char(l->src); + + if (l->expect_header_name && c == '"') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '"'); + while (1) { + char ch = infile_peek_char(l->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(l->src); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + } + infile_next_char(l->src); + } + strbuilder_append_char(&builder, '"'); + infile_next_char(l->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + l->expect_header_name = false; + } else if (l->expect_header_name && c == '<') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '<'); + while (1) { + char ch = infile_peek_char(l->src); + if (ch == '>') + break; + strbuilder_append_char(&builder, ch); + infile_next_char(l->src); + } + strbuilder_append_char(&builder, '>'); + infile_next_char(l->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + l->expect_header_name = false; + } else if (c == '(') { + infile_next_char(l->src); + tok->kind = TokenKind_paren_l; + } else if (c == ')') { + infile_next_char(l->src); + tok->kind = TokenKind_paren_r; + } else if (c == '{') { + infile_next_char(l->src); + tok->kind = TokenKind_brace_l; + } else if (c == '}') { + infile_next_char(l->src); + tok->kind = TokenKind_brace_r; + } else if (c == '[') { + infile_next_char(l->src); + tok->kind = TokenKind_bracket_l; + } else if (c == ']') { + infile_next_char(l->src); + tok->kind = TokenKind_bracket_r; + } else if (c == ',') { + infile_next_char(l->src); + tok->kind = TokenKind_comma; + } else if (c == ':') { + infile_next_char(l->src); + tok->kind = TokenKind_colon; + } else if (c == ';') { + infile_next_char(l->src); + tok->kind = TokenKind_semicolon; + } else if (c == '^') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_xor; + } else { + tok->kind = TokenKind_xor; + } + } else if (c == '?') { + infile_next_char(l->src); + tok->kind = TokenKind_question; + } else if (c == '~') { + infile_next_char(l->src); + tok->kind = TokenKind_tilde; + } else if (c == '+') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_add; + } else if (infile_consume_if(l->src, '+')) { + tok->kind = TokenKind_plusplus; + } else { + tok->kind = TokenKind_plus; + } + } else if (c == '|') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_or; + } else if (infile_consume_if(l->src, '|')) { + tok->kind = TokenKind_oror; + } else { + tok->kind = TokenKind_or; + } + } else if (c == '&') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_and; + } else if (infile_consume_if(l->src, '&')) { + tok->kind = TokenKind_andand; + } else { + tok->kind = TokenKind_and; + } + } else if (c == '-') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '>')) { + tok->kind = TokenKind_arrow; + } else if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_sub; + } else if (infile_consume_if(l->src, '-')) { + tok->kind = TokenKind_minusminus; + } else { + tok->kind = TokenKind_minus; + } + } else if (c == '*') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_mul; + } else { + tok->kind = TokenKind_star; + } + } else if (c == '/') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_div; + } else if (infile_consume_if(l->src, '/')) { + while (!infile_eof(l->src) && infile_peek_char(l->src) != '\n') { + infile_next_char(l->src); + } + tok->kind = TokenKind_whitespace; + } else if (infile_consume_if(l->src, '*')) { + while (infile_peek_char(l->src)) { + if (infile_consume_if(l->src, '*')) { + if (infile_consume_if(l->src, '/')) { + break; + } + continue; + } + infile_next_char(l->src); + } + tok->kind = TokenKind_whitespace; + } else { + tok->kind = TokenKind_slash; + } + } else if (c == '%') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_mod; + } else { + tok->kind = TokenKind_percent; + } + } else if (c == '.') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '.')) { + if (infile_consume_if(l->src, '.')) { + tok->kind = TokenKind_ellipsis; + } else { + tok->kind = TokenKind_other; + tok->value.string = ".."; + } + } else { + tok->kind = TokenKind_dot; + } + } else if (c == '!') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_ne; + } else { + tok->kind = TokenKind_not; + } + } else if (c == '=') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_eq; + } else { + tok->kind = TokenKind_assign; + } + } else if (c == '<') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_le; + } else if (infile_consume_if(l->src, '<')) { + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_lshift; + } else { + tok->kind = TokenKind_lshift; + } + } else { + tok->kind = TokenKind_lt; + } + } else if (c == '>') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_ge; + } else if (infile_consume_if(l->src, '>')) { + if (infile_consume_if(l->src, '=')) { + tok->kind = TokenKind_assign_rshift; + } else { + tok->kind = TokenKind_rshift; + } + } else { + tok->kind = TokenKind_gt; + } + } else if (c == '#') { + infile_next_char(l->src); + if (infile_consume_if(l->src, '#')) { + tok->kind = TokenKind_hashhash; + } else { + if (l->at_bol) { + pplexer_tokenize_pp_directive(l, tok); + } else { + tok->kind = TokenKind_hash; + } + } + } else if (c == '\'') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '\''); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + if (infile_peek_char(l->src) == '\\') { + infile_next_char(l->src); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + } + strbuilder_append_char(&builder, '\''); + infile_next_char(l->src); + infile_next_char(l->src); + tok->kind = TokenKind_character_constant; + tok->value.string = builder.buf; + } else if (c == '"') { + infile_next_char(l->src); + StrBuilder builder; + strbuilder_init(&builder); + while (1) { + char ch = infile_peek_char(l->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(l->src); + strbuilder_append_char(&builder, infile_peek_char(l->src)); + } + infile_next_char(l->src); + } + infile_next_char(l->src); + tok->kind = TokenKind_literal_str; + tok->value.string = builder.buf; + } else if (isdigit(c)) { + // TODO: implement tokenization of pp-number. + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(l->src))) { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + if (infile_peek_char(l->src) == '.' && isdigit(infile_peek_char2(l->src))) { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + while (isdigit(infile_peek_char(l->src))) { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + tok->kind = TokenKind_literal_double; + tok->value.floating = strtod(builder.buf, NULL); + } else { + tok->kind = TokenKind_literal_int; + tok->value.integer = strtol(builder.buf, NULL, 0); + } + } else if (isalpha(c) || c == '_') { + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(l->src)); + infile_next_char(l->src); + } + tok->kind = TokenKind_ident; + tok->value.string = builder.buf; + } else if (c == '\n') { + infile_next_char(l->src); + tok->kind = TokenKind_newline; + + // Reset expect_header_name at the end of line. It handles cases like: + // + // #ifdef ADDITIONAL_HEADER + // #include ADDITIONAL_HEADER + // #endif + // + // Even if ADDITIONAL_HEADER is undefined, this include directive line is tokenized. If the flag were not + // reset, the next occurrence of '<' or '"' would be recognized as part of a header name. + l->expect_header_name = false; + } else if (isspace(c)) { + while (isspace((c = infile_peek_char(l->src)))) { + if (c == '\n') + break; + infile_next_char(l->src); + } + if (l->at_bol && infile_peek_char(l->src) == '#') { + infile_next_char(l->src); + pplexer_tokenize_pp_directive(l, tok); + } else { + tok->kind = TokenKind_whitespace; + } + } else { + infile_next_char(l->src); + tok->kind = TokenKind_other; + char* buf = calloc(2, sizeof(char)); + buf[0] = c; + tok->value.string = buf; + } + l->at_bol = tok->kind == TokenKind_newline; + } + Token* eof_tok = tokens_push_new(l->tokens); + eof_tok->loc = l->src->loc; + eof_tok->kind = TokenKind_eof; +} + +TokenArray* tokenize(InFile* src) { + Lexer* l = lexer_new(src); + do_tokenize_all(l); + return l->tokens; +} + +TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens) { + TokenArray* tokens = calloc(1, sizeof(TokenArray)); + // tokens need not store whitespace tokens. + tokens_init(tokens, pp_tokens->len / 2); + + for (size_t pos = 0; pos < pp_tokens->len; ++pos) { + Token* pp_tok = &pp_tokens->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + Token* tok = tokens_push_new(tokens); + tok->loc = pp_tok->loc; + if (k == TokenKind_character_constant) { + tok->kind = TokenKind_literal_int; + int ch = pp_tok->value.string[1]; + if (ch == '\\') { + ch = pp_tok->value.string[2]; + if (ch == 'a') { + ch = '\a'; + } else if (ch == 'b') { + ch = '\b'; + } else if (ch == 'f') { + ch = '\f'; + } else if (ch == 'n') { + ch = '\n'; + } else if (ch == 'r') { + ch = '\r'; + } else if (ch == 't') { + ch = '\t'; + } else if (ch == 'v') { + ch = '\v'; + } else if (ch == '0') { + ch = '\0'; + } else if (ch == 'e') { + // \e is not a part of Standard C, but commonly supported. + ch = 27; + } + } + tok->value.integer = ch; + } else if (k == TokenKind_literal_str) { + tok->kind = pp_tok->kind; + + size_t len = strlen(pp_tok->value.string); + char* buf = calloc(len + 1, sizeof(char)); + for (size_t i = 0, j = 0; i < len; i++, j++) { + if (pp_tok->value.string[i] == '\\' && pp_tok->value.string[i + 1] == 'e') { + // \e is not a part of Standard C, but commonly supported. + buf[j] = 033; + i++; + } else { + buf[j] = pp_tok->value.string[i]; + } + } + tok->value.string = buf; + } else if (k == TokenKind_ident) { + if (strcmp(pp_tok->value.string, "alignas") == 0) { + tok->kind = TokenKind_keyword_alignas; + } else if (strcmp(pp_tok->value.string, "alignof") == 0) { + tok->kind = TokenKind_keyword_alignof; + } else if (strcmp(pp_tok->value.string, "auto") == 0) { + tok->kind = TokenKind_keyword_auto; + } else if (strcmp(pp_tok->value.string, "bool") == 0) { + tok->kind = TokenKind_keyword_bool; + } else if (strcmp(pp_tok->value.string, "break") == 0) { + tok->kind = TokenKind_keyword_break; + } else if (strcmp(pp_tok->value.string, "case") == 0) { + tok->kind = TokenKind_keyword_case; + } else if (strcmp(pp_tok->value.string, "char") == 0) { + tok->kind = TokenKind_keyword_char; + } else if (strcmp(pp_tok->value.string, "const") == 0) { + tok->kind = TokenKind_keyword_const; + } else if (strcmp(pp_tok->value.string, "constexpr") == 0) { + tok->kind = TokenKind_keyword_constexpr; + } else if (strcmp(pp_tok->value.string, "continue") == 0) { + tok->kind = TokenKind_keyword_continue; + } else if (strcmp(pp_tok->value.string, "default") == 0) { + tok->kind = TokenKind_keyword_default; + } else if (strcmp(pp_tok->value.string, "do") == 0) { + tok->kind = TokenKind_keyword_do; + } else if (strcmp(pp_tok->value.string, "double") == 0) { + tok->kind = TokenKind_keyword_double; + } else if (strcmp(pp_tok->value.string, "else") == 0) { + tok->kind = TokenKind_keyword_else; + } else if (strcmp(pp_tok->value.string, "enum") == 0) { + tok->kind = TokenKind_keyword_enum; + } else if (strcmp(pp_tok->value.string, "extern") == 0) { + tok->kind = TokenKind_keyword_extern; + } else if (strcmp(pp_tok->value.string, "false") == 0) { + tok->kind = TokenKind_keyword_false; + } else if (strcmp(pp_tok->value.string, "float") == 0) { + tok->kind = TokenKind_keyword_float; + } else if (strcmp(pp_tok->value.string, "for") == 0) { + tok->kind = TokenKind_keyword_for; + } else if (strcmp(pp_tok->value.string, "goto") == 0) { + tok->kind = TokenKind_keyword_goto; + } else if (strcmp(pp_tok->value.string, "if") == 0) { + tok->kind = TokenKind_keyword_if; + } else if (strcmp(pp_tok->value.string, "inline") == 0) { + tok->kind = TokenKind_keyword_inline; + } else if (strcmp(pp_tok->value.string, "int") == 0) { + tok->kind = TokenKind_keyword_int; + } else if (strcmp(pp_tok->value.string, "long") == 0) { + tok->kind = TokenKind_keyword_long; + } else if (strcmp(pp_tok->value.string, "nullptr") == 0) { + tok->kind = TokenKind_keyword_nullptr; + } else if (strcmp(pp_tok->value.string, "register") == 0) { + tok->kind = TokenKind_keyword_register; + } else if (strcmp(pp_tok->value.string, "restrict") == 0) { + tok->kind = TokenKind_keyword_restrict; + } else if (strcmp(pp_tok->value.string, "return") == 0) { + tok->kind = TokenKind_keyword_return; + } else if (strcmp(pp_tok->value.string, "short") == 0) { + tok->kind = TokenKind_keyword_short; + } else if (strcmp(pp_tok->value.string, "signed") == 0) { + tok->kind = TokenKind_keyword_signed; + } else if (strcmp(pp_tok->value.string, "sizeof") == 0) { + tok->kind = TokenKind_keyword_sizeof; + } else if (strcmp(pp_tok->value.string, "static") == 0) { + tok->kind = TokenKind_keyword_static; + } else if (strcmp(pp_tok->value.string, "static_assert") == 0) { + tok->kind = TokenKind_keyword_static_assert; + } else if (strcmp(pp_tok->value.string, "struct") == 0) { + tok->kind = TokenKind_keyword_struct; + } else if (strcmp(pp_tok->value.string, "switch") == 0) { + tok->kind = TokenKind_keyword_switch; + } else if (strcmp(pp_tok->value.string, "thread_local") == 0) { + tok->kind = TokenKind_keyword_thread_local; + } else if (strcmp(pp_tok->value.string, "true") == 0) { + tok->kind = TokenKind_keyword_true; + } else if (strcmp(pp_tok->value.string, "typedef") == 0) { + tok->kind = TokenKind_keyword_typedef; + } else if (strcmp(pp_tok->value.string, "typeof") == 0) { + tok->kind = TokenKind_keyword_typeof; + } else if (strcmp(pp_tok->value.string, "typeof_unqual") == 0) { + tok->kind = TokenKind_keyword_typeof_unqual; + } else if (strcmp(pp_tok->value.string, "union") == 0) { + tok->kind = TokenKind_keyword_union; + } else if (strcmp(pp_tok->value.string, "unsigned") == 0) { + tok->kind = TokenKind_keyword_unsigned; + } else if (strcmp(pp_tok->value.string, "void") == 0) { + tok->kind = TokenKind_keyword_void; + } else if (strcmp(pp_tok->value.string, "volatile") == 0) { + tok->kind = TokenKind_keyword_volatile; + } else if (strcmp(pp_tok->value.string, "while") == 0) { + tok->kind = TokenKind_keyword_while; + } else if (strcmp(pp_tok->value.string, "_Atomic") == 0) { + tok->kind = TokenKind_keyword__Atomic; + } else if (strcmp(pp_tok->value.string, "_BitInt") == 0) { + tok->kind = TokenKind_keyword__BitInt; + } else if (strcmp(pp_tok->value.string, "_Complex") == 0) { + tok->kind = TokenKind_keyword__Complex; + } else if (strcmp(pp_tok->value.string, "_Decimal128") == 0) { + tok->kind = TokenKind_keyword__Decimal128; + } else if (strcmp(pp_tok->value.string, "_Decimal32") == 0) { + tok->kind = TokenKind_keyword__Decimal32; + } else if (strcmp(pp_tok->value.string, "_Decimal64") == 0) { + tok->kind = TokenKind_keyword__Decimal64; + } else if (strcmp(pp_tok->value.string, "_Generic") == 0) { + tok->kind = TokenKind_keyword__Generic; + } else if (strcmp(pp_tok->value.string, "_Imaginary") == 0) { + tok->kind = TokenKind_keyword__Imaginary; + } else if (strcmp(pp_tok->value.string, "_Noreturn") == 0) { + tok->kind = TokenKind_keyword__Noreturn; + } else { + tok->kind = TokenKind_ident; + tok->value = pp_tok->value; + } + } else if (k == TokenKind_other) { + unreachable(); + } else { + tok->kind = pp_tok->kind; + tok->value = pp_tok->value; + } + } + + return tokens; +} diff --git a/src/cc1/tokenize.h b/src/cc1/tokenize.h new file mode 100644 index 0000000..fd334a1 --- /dev/null +++ b/src/cc1/tokenize.h @@ -0,0 +1,10 @@ +#ifndef DUCC_TOKENIZE_H +#define DUCC_TOKENIZE_H + +#include "io.h" +#include "token.h" + +TokenArray* tokenize(InFile* src); +TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens); + +#endif diff --git a/src/cli.c b/src/cli.c deleted file mode 100644 index b890375..0000000 --- a/src/cli.c +++ /dev/null @@ -1,133 +0,0 @@ -#include "cli.h" -#include -#include -#include -#include "common.h" -#include "version.h" - -static void print_version() { - printf("ducc v%s\n", DUCC_VERSION); -} - -CliArgs* parse_cli_args(int argc, char** argv) { - const char* output_filename = NULL; - int positional_arguments_start = -1; - bool opt_c = false; - bool opt_E = false; - bool opt_wasm = false; - bool opt_MD = false; - bool opt_MMD = false; - bool opt_g = false; - StrArray include_dirs; - strings_init(&include_dirs); - StrArray defines; - strings_init(&defines); - - for (int i = 1; i < argc; ++i) { - if (argv[i][0] != '-') { - positional_arguments_start = i; - break; - } - char c = argv[i][1]; - if (c == 'f') { - // ignore - } else if (c == 'g') { - // ignore - } else if (c == 'm') { - // ignore - } else if (c == 'O') { - // ignore - } else if (c == 'W') { - // ignore - } else if (c == 'M' && argv[i][2] == '\0') { - // ignore -M - } else if (c == 'I') { - const char* dir = NULL; - if (argv[i][2] != '\0') { - // -Ipath format - dir = argv[i] + 2; - } else if (argc > i + 1) { - // -I path format - dir = argv[i + 1]; - ++i; - } else { - fatal_error("-I requires directory"); - } - strings_push(&include_dirs, dir); - } else if (c == 'D') { - const char* def = NULL; - if (argv[i][2] != '\0') { - // -DFOO or -DFOO=value format - def = argv[i] + 2; - } else if (argc > i + 1) { - // -D FOO or -D FOO=value format - def = argv[i + 1]; - ++i; - } else { - fatal_error("-D requires macro definition"); - } - strings_push(&defines, def); - } else if (c == 'o') { - if (argc <= i + 1) { - fatal_error("-o requires filename"); - } - output_filename = argv[i + 1]; - ++i; - } else if (c == 'c') { - opt_c = true; - } else if (c == 'E') { - opt_E = true; - } else if (c == 'g') { - opt_g = true; - } else if (strcmp(argv[i], "-MD") == 0) { - opt_MD = true; - } else if (strcmp(argv[i], "-MMD") == 0) { - opt_MMD = true; - } else if (strcmp(argv[i], "--version") == 0) { - print_version(); - exit(0); - } else if (str_starts_with(argv[i], "-std=")) { - // ignore -std=* - } else if (strcmp(argv[i], "--wasm") == 0) { - opt_wasm = true; - } else { - fatal_error("unknown option: %s", argv[i]); - } - } - if (positional_arguments_start == -1) { - fatal_error("usage: ducc "); - } - - CliArgs* a = calloc(1, sizeof(CliArgs)); - a->input_filename = argv[positional_arguments_start]; - a->output_filename = output_filename; - a->output_assembly = !output_filename || str_ends_with(output_filename, ".s") || opt_wasm; - a->only_compile = opt_c; - a->preprocess_only = opt_E; - a->totally_deligate_to_gcc = false; - a->wasm = opt_wasm; - a->gcc_command = NULL; - a->generate_system_deps = opt_MD; - a->generate_user_deps = opt_MD || opt_MMD; - a->generate_debug_info = opt_g; - a->include_dirs = include_dirs; - a->defines = defines; - - if (!a->only_compile && str_ends_with(a->input_filename, ".o")) { - a->totally_deligate_to_gcc = true; - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_string(&builder, "gcc "); - for (int i = 1; i < argc; ++i) { - strbuilder_append_char(&builder, '\''); - strbuilder_append_string(&builder, argv[i]); - strbuilder_append_char(&builder, '\''); - if (i != argc - 1) { - strbuilder_append_char(&builder, ' '); - } - } - a->gcc_command = builder.buf; - } - - return a; -} diff --git a/src/cli.h b/src/cli.h deleted file mode 100644 index 7691323..0000000 --- a/src/cli.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef DUCC_CLI_H -#define DUCC_CLI_H - -#include "common.h" - -typedef struct { - const char* input_filename; - const char* output_filename; - bool output_assembly; - bool only_compile; - bool preprocess_only; - bool generate_system_deps; - bool generate_user_deps; - bool generate_debug_info; - bool totally_deligate_to_gcc; - bool wasm; - const char* gcc_command; - StrArray include_dirs; - StrArray defines; -} CliArgs; - -CliArgs* parse_cli_args(int argc, char** argv); - -#endif diff --git a/src/codegen.c b/src/codegen.c deleted file mode 100644 index 10a0287..0000000 --- a/src/codegen.c +++ /dev/null @@ -1,925 +0,0 @@ -#include "codegen.h" -#include -#include -#include "common.h" -#include "parse.h" -#include "preprocess.h" - -typedef enum { - GenMode_lval, - GenMode_rval, -} GenMode; - -typedef struct { - Program* prog; - FILE* out; - int next_label; - int* loop_labels; - AstNode* current_func; - int switch_label; -} CodeGen; - -static CodeGen* codegen_new(Program* prog, FILE* out) { - CodeGen* g = calloc(1, sizeof(CodeGen)); - g->prog = prog; - g->out = out; - g->next_label = 1; - g->loop_labels = calloc(1024, sizeof(int)); - g->switch_label = -1; - return g; -} - -static int codegen_new_label(CodeGen* g) { - int new_label = g->next_label; - ++g->next_label; - return new_label; -} - -static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); -static void codegen_stmt(CodeGen* g, AstNode* ast); - -static const char* param_reg(int n) { - if (n == 0) { - return "rdi"; - } else if (n == 1) { - return "rsi"; - } else if (n == 2) { - return "rdx"; - } else if (n == 3) { - return "rcx"; - } else if (n == 4) { - return "r8"; - } else if (n == 5) { - return "r9"; - } else { - unreachable(); - } -} - -static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) { - fprintf(g->out, " push rbp\n"); - fprintf(g->out, " mov rbp, rsp\n"); - for (int i = 0, j = 0; i < func_def->params->as.list->len; ++i) { - AstNode* param = &func_def->params->as.list->items[i]; - if (param->as.param->stack_offset >= 0) { - fprintf(g->out, " push %s\n", param_reg(j++)); - } - } - // Note: rsp must be aligned to 8. - fprintf(g->out, " sub rsp, %d\n", to_aligned(func_def->stack_size, 8)); -} - -static void codegen_func_epilogue(CodeGen* g) { - fprintf(g->out, " mov rsp, rbp\n"); - fprintf(g->out, " pop rbp\n"); - fprintf(g->out, " ret\n"); -} - -static void codegen_int_expr(CodeGen* g, IntExprNode* expr) { - fprintf(g->out, " mov rax, %d\n", expr->value); -} - -static void codegen_str_expr(CodeGen* g, StrExprNode* expr) { - fprintf(g->out, " lea rax, .Lstr__%d[rip]\n", expr->idx); -} - -static void codegen_unary_expr(CodeGen* g, UnaryExprNode* expr) { - codegen_expr(g, expr->operand, GenMode_rval); - if (expr->op == TokenKind_not) { - fprintf(g->out, " mov rdi, 0\n"); - fprintf(g->out, " cmp rax, rdi\n"); - fprintf(g->out, " sete al\n"); - fprintf(g->out, " movzb rax, al\n"); - } else if (expr->op == TokenKind_tilde) { - fprintf(g->out, " not rax\n"); - } else { - unreachable(); - } -} - -static void codegen_ref_expr(CodeGen* g, RefExprNode* expr) { - codegen_expr(g, expr->operand, GenMode_lval); -} - -static void codegen_lval2rval(CodeGen* g, Type* ty) { - if (ty->kind == TypeKind_array || ty->kind == TypeKind_func) { - return; - } - - int size = type_sizeof(ty); - if (size == 1) { - fprintf(g->out, " movsx rax, BYTE PTR [rax]\n"); - } else if (size == 2) { - fprintf(g->out, " movsx rax, WORD PTR [rax]\n"); - } else if (size == 4) { - fprintf(g->out, " movsxd rax, DWORD PTR [rax]\n"); - } else if (size == 8) { - fprintf(g->out, " mov rax, [rax]\n"); - } else { - // Do nothing. - } -} - -static void codegen_deref_expr(CodeGen* g, DerefExprNode* expr, GenMode gen_mode) { - codegen_expr(g, expr->operand, GenMode_rval); - if (gen_mode == GenMode_rval) { - codegen_lval2rval(g, expr->operand->ty->base); - } -} - -static void codegen_cast_expr(CodeGen* g, CastExprNode* expr, Type* ty) { - codegen_expr(g, expr->operand, GenMode_rval); - - // (void) cast does nothing. - if (ty->kind == TypeKind_void) - return; - - int src_size = type_sizeof(expr->operand->ty); - int dst_size = type_sizeof(ty); - - if (src_size == dst_size) - return; - - if (dst_size == 1) { - fprintf(g->out, " movsx rax, al\n"); - } else if (dst_size == 2) { - if (src_size == 1) { - fprintf(g->out, " movsx rax, al\n"); - } else { - fprintf(g->out, " movsx rax, ax\n"); - } - } else if (dst_size == 4) { - if (src_size == 1) { - fprintf(g->out, " movsx rax, al\n"); - } else if (src_size == 2) { - fprintf(g->out, " movsx rax, ax\n"); - } else { - fprintf(g->out, " movsxd rax, eax\n"); - } - } else if (dst_size == 8) { - if (src_size == 1) { - fprintf(g->out, " movsx rax, al\n"); - } else if (src_size == 2) { - fprintf(g->out, " movsx rax, ax\n"); - } else if (src_size == 4) { - fprintf(g->out, " movsxd rax, eax\n"); - } - } -} - -static void codegen_logical_expr(CodeGen* g, LogicalExprNode* expr) { - int label = codegen_new_label(g); - - if (expr->op == TokenKind_andand) { - codegen_expr(g, expr->lhs, GenMode_rval); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Lelse%d\n", label); - codegen_expr(g, expr->rhs, GenMode_rval); - fprintf(g->out, " jmp .Lend%d\n", label); - fprintf(g->out, ".Lelse%d:\n", label); - fprintf(g->out, " mov rax, 0\n"); - fprintf(g->out, ".Lend%d:\n", label); - } else { - codegen_expr(g, expr->lhs, GenMode_rval); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Lelse%d\n", label); - fprintf(g->out, " mov rax, 1\n"); - fprintf(g->out, " jmp .Lend%d\n", label); - fprintf(g->out, ".Lelse%d:\n", label); - codegen_expr(g, expr->rhs, GenMode_rval); - fprintf(g->out, ".Lend%d:\n", label); - } -} - -static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) { - codegen_expr(g, expr->lhs, gen_mode); - fprintf(g->out, " push rax\n"); - codegen_expr(g, expr->rhs, gen_mode); - fprintf(g->out, " mov rdi, rax\n"); - fprintf(g->out, " pop rax\n"); - // rax=lhs, rdi=rhs - if (expr->op == TokenKind_plus) { - fprintf(g->out, " add rax, rdi\n"); - } else if (expr->op == TokenKind_minus) { - fprintf(g->out, " sub rax, rdi\n"); - } else if (expr->op == TokenKind_star) { - fprintf(g->out, " imul rax, rdi\n"); - } else if (expr->op == TokenKind_slash) { - if (type_is_unsigned(expr->lhs->ty)) { - fprintf(g->out, " xor rdx, rdx\n"); - fprintf(g->out, " div rdi\n"); - } else { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); - } - } else if (expr->op == TokenKind_percent) { - if (type_is_unsigned(expr->lhs->ty)) { - fprintf(g->out, " xor rdx, rdx\n"); - fprintf(g->out, " div rdi\n"); - } else { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); - } - fprintf(g->out, " mov rax, rdx\n"); - } else if (expr->op == TokenKind_and) { - fprintf(g->out, " and rax, rdi\n"); - } else if (expr->op == TokenKind_or) { - fprintf(g->out, " or rax, rdi\n"); - } else if (expr->op == TokenKind_xor) { - fprintf(g->out, " xor rax, rdi\n"); - } else if (expr->op == TokenKind_lshift) { - fprintf(g->out, " mov rcx, rdi\n"); - fprintf(g->out, " shl rax, cl\n"); - } else if (expr->op == TokenKind_rshift) { - fprintf(g->out, " mov rcx, rdi\n"); - if (type_is_unsigned(expr->lhs->ty)) { - fprintf(g->out, " shr rax, cl\n"); - } else { - fprintf(g->out, " sar rax, cl\n"); - } - } else if (expr->op == TokenKind_eq) { - fprintf(g->out, " cmp rax, rdi\n"); - fprintf(g->out, " sete al\n"); - fprintf(g->out, " movzb rax, al\n"); - } else if (expr->op == TokenKind_ne) { - fprintf(g->out, " cmp rax, rdi\n"); - fprintf(g->out, " setne al\n"); - fprintf(g->out, " movzb rax, al\n"); - } else if (expr->op == TokenKind_lt) { - fprintf(g->out, " cmp rax, rdi\n"); - fprintf(g->out, " setl al\n"); - fprintf(g->out, " movzb rax, al\n"); - } else if (expr->op == TokenKind_le) { - fprintf(g->out, " cmp rax, rdi\n"); - fprintf(g->out, " setle al\n"); - fprintf(g->out, " movzb rax, al\n"); - } else { - unreachable(); - } -} - -static void codegen_cond_expr(CodeGen* g, CondExprNode* expr, GenMode gen_mode) { - int label = codegen_new_label(g); - - codegen_expr(g, expr->cond, GenMode_rval); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Lelse%d\n", label); - codegen_expr(g, expr->then, gen_mode); - fprintf(g->out, " jmp .Lend%d\n", label); - fprintf(g->out, ".Lelse%d:\n", label); - codegen_expr(g, expr->else_, gen_mode); - fprintf(g->out, ".Lend%d:\n", label); -} - -static void codegen_assign_expr_helper(CodeGen* g, AssignExprNode* expr) { - if (expr->op == TokenKind_assign) { - return; - } - - fprintf(g->out, " mov rdi, rax\n"); - fprintf(g->out, " mov rax, [rsp]\n"); - codegen_lval2rval(g, expr->lhs->ty); - - if (expr->op == TokenKind_assign_add) { - fprintf(g->out, " add rax, rdi\n"); - } else if (expr->op == TokenKind_assign_sub) { - fprintf(g->out, " sub rax, rdi\n"); - } else if (expr->op == TokenKind_assign_mul) { - fprintf(g->out, " imul rax, rdi\n"); - } else if (expr->op == TokenKind_assign_div) { - if (type_is_unsigned(expr->lhs->ty)) { - fprintf(g->out, " xor rdx, rdx\n"); - fprintf(g->out, " div rdi\n"); - } else { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); - } - } else if (expr->op == TokenKind_assign_mod) { - if (type_is_unsigned(expr->lhs->ty)) { - fprintf(g->out, " xor rdx, rdx\n"); - fprintf(g->out, " div rdi\n"); - } else { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); - } - fprintf(g->out, " mov rax, rdx\n"); - } else if (expr->op == TokenKind_assign_or) { - fprintf(g->out, " or rax, rdi\n"); - } else if (expr->op == TokenKind_assign_and) { - fprintf(g->out, " and rax, rdi\n"); - } else if (expr->op == TokenKind_assign_xor) { - fprintf(g->out, " xor rax, rdi\n"); - } else if (expr->op == TokenKind_assign_lshift) { - fprintf(g->out, " mov rcx, rdi\n"); - fprintf(g->out, " shl rax, cl\n"); - } else if (expr->op == TokenKind_assign_rshift) { - fprintf(g->out, " mov rcx, rdi\n"); - if (type_is_unsigned(expr->lhs->ty)) { - fprintf(g->out, " shr rax, cl\n"); - } else { - fprintf(g->out, " sar rax, cl\n"); - } - } else { - unreachable(); - } -} - -static void codegen_assign_expr(CodeGen* g, AssignExprNode* expr) { - int sizeof_lhs = type_sizeof(expr->lhs->ty); - - codegen_expr(g, expr->lhs, GenMode_lval); - fprintf(g->out, " push rax\n"); - codegen_expr(g, expr->rhs, GenMode_rval); - codegen_assign_expr_helper(g, expr); - fprintf(g->out, " pop rdi\n"); - if (sizeof_lhs == 1) { - fprintf(g->out, " mov BYTE PTR [rdi], al\n"); - } else if (sizeof_lhs == 2) { - fprintf(g->out, " mov WORD PTR [rdi], ax\n"); - } else if (sizeof_lhs == 4) { - fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); - } else if (sizeof_lhs == 8) { - fprintf(g->out, " mov [rdi], rax\n"); - } else { - if (expr->op != TokenKind_assign) { - unimplemented(); - } - // rax: address of rhs - // rdi: address of lhs - // Perform byte-wise copy. Use r10 register as temporary space. - for (int i = 0; i < sizeof_lhs; ++i) { - fprintf(g->out, " mov r10b, %d[rax]\n", i); - fprintf(g->out, " mov %d[rdi], r10b\n", i); - } - } -} - -static void codegen_args(CodeGen* g, AstNode* args) { - int* required_gp_regs_for_each_arg = calloc(args->as.list->len, sizeof(int)); - - int gp_regs = 6; - for (int i = 0; i < args->as.list->len; ++i) { - AstNode* arg = &args->as.list->items[i]; - int ty_size = type_sizeof(arg->ty); - // TODO - if (arg->ty->kind == TypeKind_array) { - ty_size = 8; - } - int required_gp_regs; - if (ty_size <= 8) { - required_gp_regs = 1; - } else if (ty_size <= 16) { - required_gp_regs = 2; - } else { - required_gp_regs = 0; - } - if (required_gp_regs <= gp_regs) { - gp_regs -= required_gp_regs; - } else { - required_gp_regs = 0; - } - required_gp_regs_for_each_arg[i] = required_gp_regs; - } - - // Evaluate arguments in the reverse order (right to left). - // Arguments passed by stack. - for (int i = args->as.list->len - 1; i >= 0; --i) { - AstNode* arg = &args->as.list->items[i]; - if (required_gp_regs_for_each_arg[i] == 0) { - codegen_expr(g, arg, GenMode_rval); - int ty_size = type_sizeof(arg->ty); - if (ty_size <= 8) { - fprintf(g->out, " push rax\n"); - } else { - // Perform byte-wise copy onto stack. Use r10 register as temporary space. - // NOTE: rsp must be aligned to 8. - fprintf(g->out, " sub rsp, %d\n", to_aligned(ty_size, 8)); - for (int i = 0; i < ty_size; ++i) { - // Copy a sinle byte from the address that rax points to to the stack via r10 register. - fprintf(g->out, " mov r10b, %d[rax]\n", i); - fprintf(g->out, " mov %d[rsp], r10b\n", i); - } - } - } - } - // Arguments passed by registers. - for (int i = args->as.list->len - 1; i >= 0; --i) { - AstNode* arg = &args->as.list->items[i]; - if (required_gp_regs_for_each_arg[i] != 0) { - codegen_expr(g, arg, GenMode_rval); - if (required_gp_regs_for_each_arg[i] == 1) { - fprintf(g->out, " push rax\n"); - } else { - assert(required_gp_regs_for_each_arg[i] == 2); - fprintf(g->out, " push [rax]\n"); - fprintf(g->out, " push [rax+8]\n"); - } - } - } - // Pop pushed arguments onto registers. - for (int i = 0, j = 0; i < args->as.list->len; ++i) { - for (int k = 0; k < required_gp_regs_for_each_arg[i]; ++k) { - fprintf(g->out, " pop %s\n", param_reg(j++)); - } - } -} - -static void codegen_func_call(CodeGen* g, FuncCallNode* call) { - const char* func_name = NULL; - if (call->func->kind == AstNodeKind_func) { - func_name = call->func->as.func->name; - } - - if (func_name && strcmp(func_name, "__ducc_va_start") == 0) { - fprintf(g->out, " # __ducc_va_start BEGIN\n"); - AstNode* va_list_args = &call->args->as.list->items[0]; - codegen_expr(g, va_list_args, GenMode_rval); - fprintf(g->out, " mov rdi, rax\n"); - - // Allocate save area. - fprintf(g->out, " sub rsp, 48\n"); - - fprintf(g->out, " mov [rsp+ 0], rdi\n"); - fprintf(g->out, " mov [rsp+ 8], rsi\n"); - fprintf(g->out, " mov [rsp+16], rdx\n"); - fprintf(g->out, " mov [rsp+24], rcx\n"); - fprintf(g->out, " mov [rsp+32], r8\n"); - fprintf(g->out, " mov [rsp+40], r9\n"); - - // Initialize va_list. - fprintf(g->out, " mov DWORD PTR [rdi], 8\n"); // gp_offset - fprintf(g->out, " mov DWORD PTR [rdi+4], 0\n"); // fp_offset - fprintf(g->out, " lea rax, [rbp+16]\n"); // overflow_arg_area - fprintf(g->out, " mov QWORD PTR [rdi+8], rax\n"); - fprintf(g->out, " mov QWORD PTR [rdi+16], rsp\n"); // reg_save_area - - fprintf(g->out, " mov rax, 0\n"); // dummy return value - fprintf(g->out, " # __ducc_va_start END\n"); - return; - } - - if (func_name && strcmp(func_name, "__ducc_va_arg") == 0) { - fprintf(g->out, " # __ducc_va_arg BEGIN\n"); - - // Evaluate va_list argument (first argument) - AstNode* va_list_arg = &call->args->as.list->items[0]; - codegen_expr(g, va_list_arg, GenMode_rval); - fprintf(g->out, " mov rdi, rax\n"); // rdi = pointer to va_list - - // Evaluate size argument (second argument) - AstNode* size_arg = &call->args->as.list->items[1]; - codegen_expr(g, size_arg, GenMode_rval); - fprintf(g->out, " mov rsi, rax\n"); // rsi = size - - int label = codegen_new_label(g); - - // Check if gp_offset < 48 (6 registers * 8 bytes) - fprintf(g->out, " mov eax, DWORD PTR [rdi]\n"); // eax = gp_offset - fprintf(g->out, " cmp eax, 48\n"); - fprintf(g->out, " jae .Lva_arg_overflow%d\n", label); - - // Fetch from register save area - fprintf(g->out, " mov rcx, QWORD PTR [rdi+16]\n"); // rcx = reg_save_area - fprintf(g->out, " movsx rdx, eax\n"); // rdx = gp_offset (sign-extended) - fprintf(g->out, " add rcx, rdx\n"); // rcx = reg_save_area + gp_offset - fprintf(g->out, " add eax, 8\n"); // gp_offset += 8 - fprintf(g->out, " mov DWORD PTR [rdi], eax\n"); // store updated gp_offset - fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument - fprintf(g->out, " jmp .Lva_arg_end%d\n", label); - - // Fetch from overflow area (stack) - fprintf(g->out, ".Lva_arg_overflow%d:\n", label); - fprintf(g->out, " mov rcx, QWORD PTR [rdi+8]\n"); // rcx = overflow_arg_area - fprintf(g->out, " mov rax, rcx\n"); // return pointer to argument - fprintf(g->out, " add rcx, 8\n"); // overflow_arg_area += 8 - fprintf(g->out, " mov QWORD PTR [rdi+8], rcx\n"); // store updated overflow_arg_area - - fprintf(g->out, ".Lva_arg_end%d:\n", label); - fprintf(g->out, " # __ducc_va_arg END\n"); - return; - } - - AstNode* args = call->args; - - int gp_regs = 6; - int pass_by_stack_offset = -16; - for (int i = 0; i < args->as.list->len; ++i) { - AstNode* arg = &args->as.list->items[i]; - int ty_size = type_sizeof(arg->ty); - // TODO - if (arg->ty->kind == TypeKind_array) { - ty_size = 8; - } - int required_gp_regs; - if (ty_size <= 8) { - required_gp_regs = 1; - } else if (ty_size <= 16) { - required_gp_regs = 2; - } else { - required_gp_regs = 0; - } - if (required_gp_regs <= gp_regs) { - gp_regs -= required_gp_regs; - } else { - required_gp_regs = 0; - } - if (required_gp_regs == 0) { - pass_by_stack_offset -= to_aligned(ty_size, 8); - } - } - - int label = codegen_new_label(g); - - fprintf(g->out, " mov rax, rsp\n"); - if (-pass_by_stack_offset % 16 != 0) { - fprintf(g->out, " add rax, 8\n"); - } - fprintf(g->out, " and rax, 15\n"); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Laligned%d\n", label); - - fprintf(g->out, " sub rsp, 8\n"); - codegen_args(g, args); - fprintf(g->out, " mov rax, 0\n"); - if (func_name) { - fprintf(g->out, " call %s\n", func_name); - } else { - codegen_expr(g, call->func, GenMode_rval); - fprintf(g->out, " call rax\n"); - } - fprintf(g->out, " add rsp, 8\n"); - - fprintf(g->out, " jmp .Lend%d\n", label); - fprintf(g->out, ".Laligned%d:\n", label); - - codegen_args(g, args); - fprintf(g->out, " mov rax, 0\n"); - if (func_name) { - fprintf(g->out, " call %s\n", func_name); - } else { - codegen_expr(g, call->func, GenMode_rval); - fprintf(g->out, " call rax\n"); - } - - fprintf(g->out, ".Lend%d:\n", label); - // Pop pass-by-stack arguments. - fprintf(g->out, " add rsp, %d\n", -pass_by_stack_offset - 16); -} - -static void codegen_lvar(CodeGen* g, LvarNode* lvar, Type* ty, GenMode gen_mode) { - fprintf(g->out, " lea rax, %d[rbp]\n", -lvar->stack_offset); - if (gen_mode == GenMode_rval) { - codegen_lval2rval(g, ty); - } -} - -static void codegen_gvar(CodeGen* g, GvarNode* gvar, Type* ty, GenMode gen_mode) { - fprintf(g->out, " lea rax, %s[rip]\n", gvar->name); - if (gen_mode == GenMode_rval) { - codegen_lval2rval(g, ty); - } -} - -static void codegen_func_ref(CodeGen* g, FuncNode* func) { - fprintf(g->out, " lea rax, %s[rip]\n", func->name); -} - -static void codegen_composite_expr(CodeGen* g, AstNode* ast) { - // Standard C does not have composite expression, but ducc internally has. - for (int i = 0; i < ast->as.list->len; ++i) { - AstNode* expr = ast->as.list->items + i; - codegen_expr(g, expr, GenMode_rval); - } -} - -static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - if (ast->kind == AstNodeKind_int_expr) { - codegen_int_expr(g, ast->as.int_expr); - } else if (ast->kind == AstNodeKind_str_expr) { - codegen_str_expr(g, ast->as.str_expr); - } else if (ast->kind == AstNodeKind_unary_expr) { - codegen_unary_expr(g, ast->as.unary_expr); - } else if (ast->kind == AstNodeKind_ref_expr) { - codegen_ref_expr(g, ast->as.ref_expr); - } else if (ast->kind == AstNodeKind_deref_expr) { - codegen_deref_expr(g, ast->as.deref_expr, gen_mode); - } else if (ast->kind == AstNodeKind_cast_expr) { - codegen_cast_expr(g, ast->as.cast_expr, ast->ty); - } else if (ast->kind == AstNodeKind_binary_expr) { - codegen_binary_expr(g, ast->as.binary_expr, gen_mode); - } else if (ast->kind == AstNodeKind_cond_expr) { - codegen_cond_expr(g, ast->as.cond_expr, gen_mode); - } else if (ast->kind == AstNodeKind_logical_expr) { - codegen_logical_expr(g, ast->as.logical_expr); - } else if (ast->kind == AstNodeKind_assign_expr) { - codegen_assign_expr(g, ast->as.assign_expr); - } else if (ast->kind == AstNodeKind_func_call) { - codegen_func_call(g, ast->as.func_call); - } else if (ast->kind == AstNodeKind_lvar) { - codegen_lvar(g, ast->as.lvar, ast->ty, gen_mode); - } else if (ast->kind == AstNodeKind_gvar) { - codegen_gvar(g, ast->as.gvar, ast->ty, gen_mode); - } else if (ast->kind == AstNodeKind_func) { - codegen_func_ref(g, ast->as.func); - } else if (ast->kind == AstNodeKind_list) { - codegen_composite_expr(g, ast); - } else { - unreachable(); - } -} - -static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) { - if (stmt->expr) { - codegen_expr(g, stmt->expr, GenMode_rval); - } - codegen_func_epilogue(g); -} - -static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) { - int label = codegen_new_label(g); - - codegen_expr(g, stmt->cond, GenMode_rval); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Lelse%d\n", label); - codegen_stmt(g, stmt->then); - fprintf(g->out, " jmp .Lend%d\n", label); - fprintf(g->out, ".Lelse%d:\n", label); - if (stmt->else_) { - codegen_stmt(g, stmt->else_); - } - fprintf(g->out, ".Lend%d:\n", label); -} - -static void codegen_for_stmt(CodeGen* g, ForStmtNode* stmt) { - int label = codegen_new_label(g); - ++g->loop_labels; - *g->loop_labels = label; - - if (stmt->init) { - codegen_expr(g, stmt->init, GenMode_rval); - } - fprintf(g->out, ".Lbegin%d:\n", label); - codegen_expr(g, stmt->cond, GenMode_rval); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Lend%d\n", label); - codegen_stmt(g, stmt->body); - fprintf(g->out, ".Lcontinue%d:\n", label); - if (stmt->update) { - codegen_expr(g, stmt->update, GenMode_rval); - } - fprintf(g->out, " jmp .Lbegin%d\n", label); - fprintf(g->out, ".Lend%d:\n", label); - - --g->loop_labels; -} - -static void codegen_do_while_stmt(CodeGen* g, DoWhileStmtNode* stmt) { - int label = codegen_new_label(g); - ++g->loop_labels; - *g->loop_labels = label; - - fprintf(g->out, ".Lbegin%d:\n", label); - codegen_stmt(g, stmt->body); - fprintf(g->out, ".Lcontinue%d:\n", label); - codegen_expr(g, stmt->cond, GenMode_rval); - fprintf(g->out, " cmp rax, 0\n"); - fprintf(g->out, " je .Lend%d\n", label); - fprintf(g->out, " jmp .Lbegin%d\n", label); - fprintf(g->out, ".Lend%d:\n", label); - - --g->loop_labels; -} - -static void codegen_break_stmt(CodeGen* g) { - if (g->switch_label != -1) { - fprintf(g->out, " jmp .Lend%d\n", g->switch_label); - } else { - int label = *g->loop_labels; - fprintf(g->out, " jmp .Lend%d\n", label); - } -} - -static void codegen_continue_stmt(CodeGen* g) { - int label = *g->loop_labels; - fprintf(g->out, " jmp .Lcontinue%d\n", label); -} - -static void codegen_goto_stmt(CodeGen* g, GotoStmtNode* stmt) { - fprintf(g->out, " jmp .L%s__%s\n", g->current_func->as.func_def->name, stmt->label); -} - -static void codegen_label_stmt(CodeGen* g, LabelStmtNode* stmt) { - fprintf(g->out, ".L%s__%s:\n", g->current_func->as.func_def->name, stmt->name); - codegen_stmt(g, stmt->body); -} - -// Helper to collect case values from the switch body -static void collect_cases(AstNode* stmt, int* case_values, int* case_labels, int* n_cases) { - if (!stmt) - return; - - if (stmt->kind == AstNodeKind_case_label) { - case_values[*n_cases] = stmt->as.case_label->value; - case_labels[*n_cases] = *n_cases + 1; - (*n_cases)++; - collect_cases(stmt->as.case_label->body, case_values, case_labels, n_cases); - } else if (stmt->kind == AstNodeKind_default_label) { - collect_cases(stmt->as.default_label->body, case_values, case_labels, n_cases); - } else if (stmt->kind == AstNodeKind_list) { - for (int i = 0; i < stmt->as.list->len; i++) { - collect_cases(stmt->as.list->items + i, case_values, case_labels, n_cases); - } - } -} - -static bool codegen_switch_body(CodeGen* g, AstNode* stmt, int* case_values, int* case_labels, int n_cases) { - if (!stmt) - return false; - - if (stmt->kind == AstNodeKind_case_label) { - int value = stmt->as.case_label->value; - for (int i = 0; i < n_cases; i++) { - if (case_values[i] == value) { - fprintf(g->out, ".Lcase%d_%d:\n", g->switch_label, case_labels[i]); - break; - } - } - return codegen_switch_body(g, stmt->as.case_label->body, case_values, case_labels, n_cases); - } else if (stmt->kind == AstNodeKind_default_label) { - fprintf(g->out, ".Ldefault%d:\n", g->switch_label); - codegen_switch_body(g, stmt->as.default_label->body, case_values, case_labels, n_cases); - return true; - } else if (stmt->kind == AstNodeKind_list) { - bool default_label_emitted = false; - for (int i = 0; i < stmt->as.list->len; i++) { - default_label_emitted |= - codegen_switch_body(g, stmt->as.list->items + i, case_values, case_labels, n_cases); - } - return default_label_emitted; - } else { - codegen_stmt(g, stmt); - return false; - } -} - -static void codegen_switch_stmt(CodeGen* g, SwitchStmtNode* stmt) { - int switch_label = codegen_new_label(g); - int prev_switch_label = g->switch_label; - g->switch_label = switch_label; - - // Collect all case values and assign labels - int case_values[256]; - int case_labels[256]; - int n_cases = 0; - collect_cases(stmt->body, case_values, case_labels, &n_cases); - - // Generate jump instructions. - codegen_expr(g, stmt->expr, GenMode_rval); - for (int i = 0; i < n_cases; i++) { - fprintf(g->out, " cmp rax, %d\n", case_values[i]); - fprintf(g->out, " je .Lcase%d_%d\n", switch_label, case_labels[i]); - } - fprintf(g->out, " jmp .Ldefault%d\n", switch_label); - - // Generate the switch body with labels. - bool default_label_emitted = codegen_switch_body(g, stmt->body, case_values, case_labels, n_cases); - - if (!default_label_emitted) { - fprintf(g->out, ".Ldefault%d:\n", switch_label); - } - fprintf(g->out, ".Lend%d:\n", switch_label); - - g->switch_label = prev_switch_label; -} - -static void codegen_expr_stmt(CodeGen* g, ExprStmtNode* stmt) { - codegen_expr(g, stmt->expr, GenMode_rval); -} - -static void codegen_block_stmt(CodeGen* g, AstNode* ast) { - for (int i = 0; i < ast->as.list->len; ++i) { - AstNode* stmt = ast->as.list->items + i; - codegen_stmt(g, stmt); - } -} - -static void codegen_stmt(CodeGen* g, AstNode* ast) { - // TODO: support multiple files. - fprintf(g->out, " .loc 1 %d 0\n", ast->loc.line); - - if (ast->kind == AstNodeKind_list) { - codegen_block_stmt(g, ast); - } else if (ast->kind == AstNodeKind_return_stmt) { - codegen_return_stmt(g, ast->as.return_stmt); - } else if (ast->kind == AstNodeKind_if_stmt) { - codegen_if_stmt(g, ast->as.if_stmt); - } else if (ast->kind == AstNodeKind_switch_stmt) { - codegen_switch_stmt(g, ast->as.switch_stmt); - } else if (ast->kind == AstNodeKind_for_stmt) { - codegen_for_stmt(g, ast->as.for_stmt); - } else if (ast->kind == AstNodeKind_do_while_stmt) { - codegen_do_while_stmt(g, ast->as.do_while_stmt); - } else if (ast->kind == AstNodeKind_break_stmt) { - codegen_break_stmt(g); - } else if (ast->kind == AstNodeKind_continue_stmt) { - codegen_continue_stmt(g); - } else if (ast->kind == AstNodeKind_goto_stmt) { - codegen_goto_stmt(g, ast->as.goto_stmt); - } else if (ast->kind == AstNodeKind_label_stmt) { - codegen_label_stmt(g, ast->as.label_stmt); - } else if (ast->kind == AstNodeKind_expr_stmt) { - codegen_expr_stmt(g, ast->as.expr_stmt); - } else if (ast->kind == AstNodeKind_lvar_decl) { - // Do nothing. - } else if (ast->kind == AstNodeKind_nop) { - // Do nothing. - } else if (ast->kind == AstNodeKind_case_label || ast->kind == AstNodeKind_default_label) { - // They are handled by codegen_switch_stmt(). - unreachable(); - } else { - unreachable(); - } -} - -static void codegen_func(CodeGen* g, AstNode* ast) { - g->current_func = ast; - - if (ast->ty->storage_class != StorageClass_static) { - fprintf(g->out, ".globl %s\n", ast->as.func_def->name); - } - fprintf(g->out, "%s:\n", ast->as.func_def->name); - - codegen_func_prologue(g, ast->as.func_def); - codegen_stmt(g, ast->as.func_def->body); - if (strcmp(ast->as.func_def->name, "main") == 0) { - // C99: 5.1.2.2.3 - fprintf(g->out, " mov rax, 0\n"); - } - codegen_func_epilogue(g); - - fprintf(g->out, "\n"); - g->current_func = NULL; -} - -static void codegen_global_var(CodeGen* g, AstNode* var) { - if (var->ty->storage_class == StorageClass_extern) { - return; - } - if (var->ty->storage_class != StorageClass_static) { - fprintf(g->out, ".globl %s\n", var->as.gvar_decl->name); - } - fprintf(g->out, " %s:\n", var->as.gvar_decl->name); - if (!var->as.gvar_decl->expr) { - fprintf(g->out, " .zero %d\n", type_sizeof(var->ty)); - return; - } - - if (var->ty->kind == TypeKind_array && var->as.gvar_decl->expr->kind == AstNodeKind_str_expr) { - const char* str = g->prog->str_literals[var->as.gvar_decl->expr->as.str_expr->idx - 1]; - fprintf(g->out, " .string \"%s\"\n", str); - return; - } - - InitData* data = eval_init_expr(var->as.gvar_decl->expr, var->ty); - - for (size_t i = 0; i < data->len; ++i) { - InitDataBlock* block = &data->blocks[i]; - if (block->kind == InitDataBlockKind_addr) { - fprintf(g->out, " .quad %s\n", block->as.addr.label); - } else { - for (size_t j = 0; j < block->as.bytes.len; ++j) { - fprintf(g->out, " .byte %d\n", block->as.bytes.buf[j]); - } - } - } -} - -void codegen(Program* prog, const char* input_filename, FILE* out) { - CodeGen* g = codegen_new(prog, out); - - fprintf(g->out, ".intel_syntax noprefix\n\n"); - - // TODO: support multiple files. - fprintf(g->out, ".file 1 \"%s\"\n\n", input_filename); - - // For GNU ld: - // https://sourceware.org/binutils/docs/ld/Options.html - fprintf(g->out, ".section .note.GNU-stack,\"\",@progbits\n\n"); - - fprintf(g->out, ".section .rodata\n\n"); - for (int i = 0; prog->str_literals[i]; ++i) { - fprintf(g->out, ".Lstr__%d:\n", i + 1); - fprintf(g->out, " .string \"%s\"\n\n", prog->str_literals[i]); - } - - fprintf(g->out, ".data\n\n"); - for (int i = 0; i < prog->vars->as.list->len; ++i) { - codegen_global_var(g, &prog->vars->as.list->items[i]); - } - - fprintf(g->out, ".text\n\n"); - for (int i = 0; i < prog->funcs->as.list->len; ++i) { - AstNode* func = &prog->funcs->as.list->items[i]; - codegen_func(g, func); - } -} diff --git a/src/codegen.h b/src/codegen.h deleted file mode 100644 index 2c1d7b3..0000000 --- a/src/codegen.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef DUCC_CODEGEN_H -#define DUCC_CODEGEN_H - -#include "ast.h" - -void codegen(Program* prog, const char* input_filename, FILE* out); - -#endif diff --git a/src/codegen_wasm.c b/src/codegen_wasm.c deleted file mode 100644 index 01dfe42..0000000 --- a/src/codegen_wasm.c +++ /dev/null @@ -1,164 +0,0 @@ -#include -#include -#include "codegen.h" -#include "common.h" -#include "preprocess.h" - -typedef enum { - GenMode_lval, - GenMode_rval, -} GenMode; - -typedef struct { - FILE* out; - int next_label; - int* loop_labels; - AstNode* current_func; - int switch_label; -} CodeGen; - -static CodeGen* codegen_new(FILE* out) { - CodeGen* g = calloc(1, sizeof(CodeGen)); - g->out = out; - g->next_label = 1; - g->loop_labels = calloc(1024, sizeof(int)); - g->switch_label = -1; - return g; -} - -static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode); -static void codegen_stmt(CodeGen* g, AstNode* ast); - -static void codegen_func_prologue(CodeGen* g, FuncDefNode* func_def) { - for (int i = 0; i < func_def->params->as.list->len; ++i) { - fprintf(g->out, " (param $l_%s i32)", func_def->params->as.list->items[i].as.param->name); - } - fprintf(g->out, " (result i32)\n"); -} - -static void codegen_func_epilogue(CodeGen*) { -} - -static void codegen_int_expr(CodeGen* g, IntExprNode* expr) { - fprintf(g->out, " i32.const %d\n", expr->value); -} - -static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mode) { - codegen_expr(g, expr->lhs, gen_mode); - codegen_expr(g, expr->rhs, gen_mode); - if (expr->op == TokenKind_plus) { - fprintf(g->out, " i32.add\n"); - } else if (expr->op == TokenKind_minus) { - fprintf(g->out, " i32.sub\n"); - } else if (expr->op == TokenKind_le) { - fprintf(g->out, " i32.le_s\n"); - } else { - unreachable(); - } -} - -static void codegen_lvar(CodeGen* g, LvarNode* lvar, GenMode) { - fprintf(g->out, " local.get $l_%s\n", lvar->name); -} - -static void codegen_func_call(CodeGen* g, FuncCallNode* call) { - const char* func_name; - if (call->func->kind == AstNodeKind_func) { - func_name = call->func->as.func->name; - } else { - unimplemented(); - } - - AstNode* args = call->args; - for (int i = 0; i < args->as.list->len; ++i) { - AstNode* arg = args->as.list->items + i; - codegen_expr(g, arg, GenMode_rval); - } - fprintf(g->out, " call $%s\n", func_name); -} - -static void codegen_expr(CodeGen* g, AstNode* ast, GenMode gen_mode) { - if (ast->kind == AstNodeKind_int_expr) { - codegen_int_expr(g, ast->as.int_expr); - } else if (ast->kind == AstNodeKind_binary_expr) { - codegen_binary_expr(g, ast->as.binary_expr, gen_mode); - } else if (ast->kind == AstNodeKind_lvar) { - codegen_lvar(g, ast->as.lvar, gen_mode); - } else if (ast->kind == AstNodeKind_func_call) { - codegen_func_call(g, ast->as.func_call); - } else { - unreachable(); - } -} - -static void codegen_return_stmt(CodeGen* g, ReturnStmtNode* stmt) { - if (stmt->expr) { - codegen_expr(g, stmt->expr, GenMode_rval); - } - fprintf(g->out, " return\n"); -} - -static void codegen_if_stmt(CodeGen* g, IfStmtNode* stmt) { - codegen_expr(g, stmt->cond, GenMode_rval); - fprintf(g->out, " (if (result i32)\n"); - fprintf(g->out, " (then\n"); - codegen_stmt(g, stmt->then); - fprintf(g->out, " )\n"); - if (stmt->else_) { - fprintf(g->out, " (else\n"); - codegen_stmt(g, stmt->else_); - fprintf(g->out, " )\n"); - } else { - fprintf(g->out, " (else\n"); - fprintf(g->out, " i32.const 0\n"); - fprintf(g->out, " )\n"); - } - fprintf(g->out, " )\n"); -} - -static void codegen_block_stmt(CodeGen* g, AstNode* ast) { - for (int i = 0; i < ast->as.list->len; ++i) { - AstNode* stmt = ast->as.list->items + i; - codegen_stmt(g, stmt); - } -} - -static void codegen_stmt(CodeGen* g, AstNode* ast) { - if (ast->kind == AstNodeKind_list) { - codegen_block_stmt(g, ast); - } else if (ast->kind == AstNodeKind_return_stmt) { - codegen_return_stmt(g, ast->as.return_stmt); - } else if (ast->kind == AstNodeKind_if_stmt) { - codegen_if_stmt(g, ast->as.if_stmt); - } else if (ast->kind == AstNodeKind_nop) { - // Do nothing. - } else { - unreachable(); - } -} - -static void codegen_func(CodeGen* g, AstNode* ast) { - g->current_func = ast; - - fprintf(g->out, "(func $%s (export \"%s\")", ast->as.func_def->name, ast->as.func_def->name); - - codegen_func_prologue(g, ast->as.func_def); - codegen_stmt(g, ast->as.func_def->body); - codegen_func_epilogue(g); - - fprintf(g->out, ")\n"); - g->current_func = NULL; -} - -void codegen_wasm(Program* prog, FILE* out) { - CodeGen* g = codegen_new(out); - - fprintf(g->out, "(module\n"); - - for (int i = 0; i < prog->funcs->as.list->len; ++i) { - AstNode* func = prog->funcs->as.list->items + i; - codegen_func(g, func); - } - - fprintf(g->out, ")\n"); -} diff --git a/src/codegen_wasm.h b/src/codegen_wasm.h deleted file mode 100644 index 1089154..0000000 --- a/src/codegen_wasm.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef DUCC_CODEGEN_WASM_H -#define DUCC_CODEGEN_WASM_H - -#include "ast.h" - -void codegen_wasm(Program* prog, FILE* out); - -#endif diff --git a/src/common.c b/src/common.c deleted file mode 100644 index af35064..0000000 --- a/src/common.c +++ /dev/null @@ -1,88 +0,0 @@ -#include "common.h" -#include -#include -#include -#include - -void fatal_error(const char* msg, ...) { - va_list args; - va_start(args, msg); - vfprintf(stderr, msg, args); - va_end(args); - fprintf(stderr, "\n"); - exit(1); -} - -bool str_starts_with(const char* s, const char* prefix) { - size_t l1 = strlen(s); - size_t l2 = strlen(prefix); - if (l1 < l2) - return false; - return strncmp(s, prefix, l2) == 0; -} - -bool str_ends_with(const char* s, const char* suffix) { - size_t l1 = strlen(s); - size_t l2 = strlen(suffix); - if (l1 < l2) - return false; - return strcmp(s + l1 - l2, suffix) == 0; -} - -void strbuilder_init(StrBuilder* b) { - b->len = 0; - b->capacity = 16; - b->buf = calloc(b->capacity, sizeof(char)); -} - -// `size` must include a trailing null byte. -void strbuilder_reserve(StrBuilder* b, size_t size) { - if (size <= b->capacity) - return; - while (b->capacity < size) { - b->capacity *= 2; - } - b->buf = realloc(b->buf, b->capacity * sizeof(char)); - memset(b->buf + b->len, 0, (b->capacity - b->len) * sizeof(char)); -} - -void strbuilder_append_char(StrBuilder* b, int c) { - strbuilder_reserve(b, b->len + 1 + 1); - b->buf[b->len++] = c; -} - -void strbuilder_append_string(StrBuilder* b, const char* s) { - int len = strlen(s); - strbuilder_reserve(b, b->len + len + 1); - for (int i = 0; i < len; ++i) { - b->buf[b->len++] = s[i]; - } -} - -void strings_init(StrArray* strings) { - strings->len = 0; - strings->capacity = 32; - strings->data = calloc(strings->capacity, sizeof(const char*)); -} - -void strings_reserve(StrArray* strings, size_t size) { - if (size <= strings->capacity) - return; - while (strings->capacity < size) { - strings->capacity *= 2; - } - strings->data = realloc(strings->data, strings->capacity * sizeof(const char*)); - memset(strings->data + strings->len, 0, (strings->capacity - strings->len) * sizeof(const char*)); -} - -int strings_push(StrArray* strings, const char* str) { - strings_reserve(strings, strings->len + 1); - strings->data[strings->len] = str; - return ++strings->len; -} - -void strings_pop(StrArray* strings) { - if (strings->len > 0) { - strings->len--; - } -} diff --git a/src/common.h b/src/common.h deleted file mode 100644 index bb94e79..0000000 --- a/src/common.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef DUCC_COMMON_H -#define DUCC_COMMON_H - -#include "ducc.h" - -_Noreturn void fatal_error(const char* msg, ...); - -// TODO -#ifdef __ducc__ -#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__) -#endif - -#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__) - -bool str_starts_with(const char* s, const char* prefix); -bool str_ends_with(const char* s, const char* suffix); - -typedef struct { - size_t len; - size_t capacity; - char* buf; -} StrBuilder; - -void strbuilder_init(StrBuilder* b); -// `size` must include a trailing null byte. -void strbuilder_reserve(StrBuilder* b, size_t size); -void strbuilder_append_char(StrBuilder* b, int c); -void strbuilder_append_string(StrBuilder* b, const char* s); - -typedef struct { - size_t len; - size_t capacity; - const char** data; -} StrArray; - -void strings_init(StrArray* strings); -void strings_reserve(StrArray* strings, size_t size); -int strings_push(StrArray* strings, const char* str); -void strings_pop(StrArray* strings); - -#endif diff --git a/src/ducc.h b/src/ducc.h deleted file mode 100644 index 31ee977..0000000 --- a/src/ducc.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef DUCC_DUCC_H -#define DUCC_DUCC_H - -#include -#include -#include -#include -#include -#include - -#endif diff --git a/src/ducc/cli.c b/src/ducc/cli.c new file mode 100644 index 0000000..3ea363a --- /dev/null +++ b/src/ducc/cli.c @@ -0,0 +1,133 @@ +#include "cli.h" +#include +#include +#include +#include "../lib/common.h" +#include "version.h" + +static void print_version() { + printf("ducc v%s\n", DUCC_VERSION); +} + +CliArgs* parse_cli_args(int argc, char** argv) { + const char* output_filename = NULL; + int positional_arguments_start = -1; + bool opt_c = false; + bool opt_E = false; + bool opt_wasm = false; + bool opt_MD = false; + bool opt_MMD = false; + bool opt_g = false; + StrArray include_dirs; + strings_init(&include_dirs); + StrArray defines; + strings_init(&defines); + + for (int i = 1; i < argc; ++i) { + if (argv[i][0] != '-') { + positional_arguments_start = i; + break; + } + char c = argv[i][1]; + if (c == 'f') { + // ignore + } else if (c == 'g') { + // ignore + } else if (c == 'm') { + // ignore + } else if (c == 'O') { + // ignore + } else if (c == 'W') { + // ignore + } else if (c == 'M' && argv[i][2] == '\0') { + // ignore -M + } else if (c == 'I') { + const char* dir = NULL; + if (argv[i][2] != '\0') { + // -Ipath format + dir = argv[i] + 2; + } else if (argc > i + 1) { + // -I path format + dir = argv[i + 1]; + ++i; + } else { + fatal_error("-I requires directory"); + } + strings_push(&include_dirs, dir); + } else if (c == 'D') { + const char* def = NULL; + if (argv[i][2] != '\0') { + // -DFOO or -DFOO=value format + def = argv[i] + 2; + } else if (argc > i + 1) { + // -D FOO or -D FOO=value format + def = argv[i + 1]; + ++i; + } else { + fatal_error("-D requires macro definition"); + } + strings_push(&defines, def); + } else if (c == 'o') { + if (argc <= i + 1) { + fatal_error("-o requires filename"); + } + output_filename = argv[i + 1]; + ++i; + } else if (c == 'c') { + opt_c = true; + } else if (c == 'E') { + opt_E = true; + } else if (c == 'g') { + opt_g = true; + } else if (strcmp(argv[i], "-MD") == 0) { + opt_MD = true; + } else if (strcmp(argv[i], "-MMD") == 0) { + opt_MMD = true; + } else if (strcmp(argv[i], "--version") == 0) { + print_version(); + exit(0); + } else if (str_starts_with(argv[i], "-std=")) { + // ignore -std=* + } else if (strcmp(argv[i], "--wasm") == 0) { + opt_wasm = true; + } else { + fatal_error("unknown option: %s", argv[i]); + } + } + if (positional_arguments_start == -1) { + fatal_error("usage: ducc "); + } + + CliArgs* a = calloc(1, sizeof(CliArgs)); + a->input_filename = argv[positional_arguments_start]; + a->output_filename = output_filename; + a->output_assembly = !output_filename || str_ends_with(output_filename, ".s") || opt_wasm; + a->only_compile = opt_c; + a->preprocess_only = opt_E; + a->totally_deligate_to_gcc = false; + a->wasm = opt_wasm; + a->gcc_command = NULL; + a->generate_system_deps = opt_MD; + a->generate_user_deps = opt_MD || opt_MMD; + a->generate_debug_info = opt_g; + a->include_dirs = include_dirs; + a->defines = defines; + + if (!a->only_compile && str_ends_with(a->input_filename, ".o")) { + a->totally_deligate_to_gcc = true; + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_string(&builder, "gcc "); + for (int i = 1; i < argc; ++i) { + strbuilder_append_char(&builder, '\''); + strbuilder_append_string(&builder, argv[i]); + strbuilder_append_char(&builder, '\''); + if (i != argc - 1) { + strbuilder_append_char(&builder, ' '); + } + } + a->gcc_command = builder.buf; + } + + return a; +} diff --git a/src/ducc/cli.h b/src/ducc/cli.h new file mode 100644 index 0000000..b9cfc87 --- /dev/null +++ b/src/ducc/cli.h @@ -0,0 +1,24 @@ +#ifndef DUCC_CLI_H +#define DUCC_CLI_H + +#include "../lib/common.h" + +typedef struct { + const char* input_filename; + const char* output_filename; + bool output_assembly; + bool only_compile; + bool preprocess_only; + bool generate_system_deps; + bool generate_user_deps; + bool generate_debug_info; + bool totally_deligate_to_gcc; + bool wasm; + const char* gcc_command; + StrArray include_dirs; + StrArray defines; +} CliArgs; + +CliArgs* parse_cli_args(int argc, char** argv); + +#endif diff --git a/src/ducc/main.c b/src/ducc/main.c new file mode 100644 index 0000000..18bdcbf --- /dev/null +++ b/src/ducc/main.c @@ -0,0 +1,88 @@ +#include "../cc1/ast.h" +#include "../cc1/codegen.h" +#include "../cc1/codegen_wasm.h" +#include "../cc1/fs.h" +#include "../cc1/io.h" +#include "../cc1/parse.h" +#include "../cc1/preprocess.h" +#include "../cc1/tokenize.h" +#include "../lib/common.h" +#include "cli.h" + +int main(int argc, char** argv) { + CliArgs* cli_args = parse_cli_args(argc, argv); + + if (cli_args->totally_deligate_to_gcc) { + return system(cli_args->gcc_command); + } + + InFile* source = infile_open(cli_args->input_filename); + + StrArray included_files; + strings_init(&included_files); + + TokenArray* pp_tokens = preprocess(source, &cli_args->defines, &cli_args->include_dirs, &included_files, + cli_args->generate_system_deps, cli_args->generate_user_deps); + + if (cli_args->preprocess_only) { + FILE* output_file = cli_args->output_filename ? fopen(cli_args->output_filename, "w") : stdout; + if (!output_file) { + fatal_error("Cannot open output file: %s", cli_args->output_filename); + } + print_token_to_file(output_file, pp_tokens); + return 0; + } + + concat_adjacent_string_literals(pp_tokens); + TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens); + Program* prog = parse(tokens); + + const char* assembly_filename; + if (cli_args->output_assembly) { + assembly_filename = cli_args->output_filename; + } else { + char* temp_filename = calloc(19, sizeof(char)); + temp_filename = strdup("/tmp/ducc-XXXXXX.s"); + mkstemps(temp_filename, strlen(".s")); + assembly_filename = temp_filename; + } + FILE* assembly_file = assembly_filename ? fopen(assembly_filename, "wb") : stdout; + if (cli_args->wasm) { + codegen_wasm(prog, assembly_file); + } else { + codegen(prog, cli_args->input_filename, assembly_file); + } + fclose(assembly_file); + + if (cli_args->wasm) { + return 0; + } + + if (!cli_args->output_assembly) { + char cmd_buf[256]; + if (cli_args->only_compile) { + sprintf(cmd_buf, "gcc -c -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); + } else { + sprintf(cmd_buf, "gcc -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); + } + int result = system(cmd_buf); + if (result != 0) { + fatal_error("gcc failed: %d", result); + } + } + + if ((cli_args->generate_system_deps || cli_args->generate_user_deps) && cli_args->only_compile && + cli_args->output_filename) { + const char* dep_filename = replace_extension(cli_args->output_filename, ".d"); + + FILE* dep_file = fopen(dep_filename, "w"); + if (!dep_file) { + fatal_error("Cannot open dependency file: %s", dep_filename); + } + fprintf(dep_file, "%s:", cli_args->output_filename); + for (size_t i = 0; i < included_files.len; ++i) { + fprintf(dep_file, " \\\n %s", included_files.data[i]); + } + fprintf(dep_file, "\n"); + } +} diff --git a/src/ducc/version.h b/src/ducc/version.h new file mode 100644 index 0000000..67c2352 --- /dev/null +++ b/src/ducc/version.h @@ -0,0 +1,6 @@ +#ifndef DUCC_VERSION_H +#define DUCC_VERSION_H + +#define DUCC_VERSION "0.4.0" + +#endif diff --git a/src/fs.c b/src/fs.c deleted file mode 100644 index c702fa4..0000000 --- a/src/fs.c +++ /dev/null @@ -1,27 +0,0 @@ -#include "fs.h" -#include -#include - -// 'ext' must include '.'. -char* replace_extension(const char* file_name, const char* ext) { - size_t len = strlen(file_name); - const char* last_slash = strrchr(file_name, '/'); - const char* last_dot = strrchr(file_name, '.'); - - size_t ext_len = strlen(ext); - size_t base_len; - // !last_slash: foo.c - // last_slash < last_dot: ./bar/foo.c - if (last_dot && (!last_slash || last_slash < last_dot)) { - base_len = last_dot - file_name; - } else { - base_len = len; - } - - char* result = calloc(base_len + ext_len + 1, sizeof(char)); - memcpy(result, file_name, base_len); - memcpy(result + base_len, ext, ext_len); - result[base_len + ext_len] = '\0'; - - return result; -} diff --git a/src/fs.h b/src/fs.h deleted file mode 100644 index b71889f..0000000 --- a/src/fs.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef DUCC_FS_H -#define DUCC_FS_H - -// 'ext' must include '.'. -char* replace_extension(const char* file_name, const char* ext); - -#endif diff --git a/src/io.c b/src/io.c deleted file mode 100644 index 469c868..0000000 --- a/src/io.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "io.h" -#include -#include -#include -#include "common.h" -#include "json.h" - -void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc) { - jsonbuilder_object_start(builder); - jsonbuilder_object_member_start(builder, "filename"); - jsonbuilder_string(builder, loc->filename); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "line"); - jsonbuilder_integer(builder, loc->line); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_end(builder); -} - -InFile* infile_open(const char* filename) { - FILE* in = fopen(filename, "rb"); - if (!in) { - return NULL; - } - - size_t buf_size = 1024 * 10; - char* buf = calloc(buf_size, sizeof(char)); - char* cur = buf; - char* tmp = calloc(1024, sizeof(char)); - - while (fgets(tmp, 1024, in)) { - size_t len = strlen(tmp); - size_t used_size = cur - buf; - - if (buf_size <= used_size + len) { - size_t old_size = buf_size; - buf_size *= 2; - buf = realloc(buf, buf_size); - memset(buf + old_size, 0, buf_size - old_size); - cur = buf + used_size; - } - - memcpy(cur, tmp, len); - cur += len; - } - fclose(in); - - InFile* in_file = calloc(1, sizeof(InFile)); - in_file->buf = buf; - in_file->loc.filename = filename; - in_file->loc.line = 1; - return in_file; -} - -bool infile_eof(InFile* f) { - return f->buf[f->pos] == '\0'; -} - -char infile_peek_char(InFile* f) { - char c = f->buf[f->pos]; - - // Skip a pair of backslash and new-line. - if (c == '\\') { - char c2 = f->buf[f->pos + 1]; - // C23: 5.1.1.2 - // A source file that is not empty shall end in a new-line character, which shall not be immediately preceded by - // a backslash character before any such splicing takes place. - if (c2 == '\0') { - fatal_error("%s:%d: expected, but got ", f->loc.filename, f->loc.line); - } - // Handle line continuation. - if (c2 == '\r') { - if (f->buf[f->pos + 2] == '\n') { - f->pos += 3; // Backslash + CRLF - } else { - f->pos += 2; // Backslash + CR - } - ++f->loc.line; - return infile_peek_char(f); - } else if (c2 == '\n') { - f->pos += 2; // Backslash + LF - ++f->loc.line; - return infile_peek_char(f); - } - } - - // Normalize new-line. - if (c == '\r') - c = '\n'; - return c; -} - -char infile_next_char(InFile* f) { - char c = infile_peek_char(f); - - if (f->buf[f->pos] == '\r') { - ++f->pos; - if (f->buf[f->pos] == '\n') { - ++f->pos; // CRLF - } - c = '\n'; - } else { - ++f->pos; - } - - if (c == '\n') - ++f->loc.line; - return c; -} - -char infile_peek_char2(InFile* f) { - int saved_pos = f->pos; - int saved_line = f->loc.line; - infile_next_char(f); - char c = infile_peek_char(f); - f->pos = saved_pos; - f->loc.line = saved_line; - return c; -} - -bool infile_consume_if(InFile* f, char expected) { - if (infile_peek_char(f) == expected) { - infile_next_char(f); - return true; - } else { - return false; - } -} diff --git a/src/io.h b/src/io.h deleted file mode 100644 index 71e25b0..0000000 --- a/src/io.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef DUCC_IO_H -#define DUCC_IO_H - -#include "json.h" - -typedef struct { - const char* filename; - int line; -} SourceLocation; - -void sourcelocation_build_json(JsonBuilder* builder, SourceLocation* loc); - -typedef struct { - const char* buf; - int pos; - SourceLocation loc; -} InFile; - -InFile* infile_open(const char* filename); -bool infile_eof(InFile* f); -char infile_peek_char(InFile* f); -char infile_peek_char2(InFile* f); -char infile_next_char(InFile* f); -bool infile_consume_if(InFile* f, char expected); - -#endif diff --git a/src/json.c b/src/json.c deleted file mode 100644 index 0384516..0000000 --- a/src/json.c +++ /dev/null @@ -1,112 +0,0 @@ -#include "json.h" -#include -#include -#include "common.h" - -struct JsonBuilder { - StrBuilder buf; - // not supported by ducc for now - // bool has_output_element[256]; - bool* has_output_element; - int depth; -}; - -JsonBuilder* jsonbuilder_new() { - JsonBuilder* b = calloc(1, sizeof(JsonBuilder)); - strbuilder_init(&b->buf); - b->has_output_element = calloc(256, sizeof(bool)); - return b; -} - -const char* jsonbuilder_get_output(JsonBuilder* b) { - return b->buf.buf; -} - -static void emit_comma_if_needed(JsonBuilder* b) { - if (b->has_output_element[b->depth - 1]) { - strbuilder_append_char(&b->buf, ','); - } -} - -void jsonbuilder_object_start(JsonBuilder* b) { - strbuilder_append_char(&b->buf, '{'); - b->has_output_element[b->depth++] = false; -} - -void jsonbuilder_object_end(JsonBuilder* b) { - b->depth--; - strbuilder_append_char(&b->buf, '}'); -} - -void jsonbuilder_object_member_start(JsonBuilder* b, const char* key) { - emit_comma_if_needed(b); - jsonbuilder_string(b, key); - strbuilder_append_char(&b->buf, ':'); -} - -void jsonbuilder_object_member_end(JsonBuilder* b) { - b->has_output_element[b->depth - 1] = true; -} - -void jsonbuilder_array_start(JsonBuilder* b) { - strbuilder_append_char(&b->buf, '['); - b->has_output_element[b->depth++] = false; -} - -void jsonbuilder_array_end(JsonBuilder* b) { - b->depth--; - strbuilder_append_char(&b->buf, ']'); -} - -void jsonbuilder_array_element_start(JsonBuilder* b) { - emit_comma_if_needed(b); -} - -void jsonbuilder_array_element_end(JsonBuilder* b) { - b->has_output_element[b->depth - 1] = true; -} - -void jsonbuilder_null(JsonBuilder* b) { - strbuilder_append_string(&b->buf, "null"); -} - -void jsonbuilder_boolean(JsonBuilder* b, bool value) { - strbuilder_append_string(&b->buf, value ? "true" : "false"); -} - -void jsonbuilder_integer(JsonBuilder* b, int value) { - char buf[32]; - sprintf(buf, "%d", value); - strbuilder_append_string(&b->buf, buf); -} - -void jsonbuilder_string(JsonBuilder* b, const char* value) { - if (!value) { - jsonbuilder_null(b); - return; - } - - strbuilder_append_char(&b->buf, '"'); - - for (const char* p = value; *p; p++) { - if (*p == '"') { - strbuilder_append_string(&b->buf, "\\\""); - } else if (*p == '\\') { - strbuilder_append_string(&b->buf, "\\\\"); - } else if (*p == '\b') { - strbuilder_append_string(&b->buf, "\\b"); - } else if (*p == '\f') { - strbuilder_append_string(&b->buf, "\\f"); - } else if (*p == '\n') { - strbuilder_append_string(&b->buf, "\\n"); - } else if (*p == '\r') { - strbuilder_append_string(&b->buf, "\\r"); - } else if (*p == '\t') { - strbuilder_append_string(&b->buf, "\\t"); - } else { - strbuilder_append_char(&b->buf, *p); - } - } - - strbuilder_append_char(&b->buf, '"'); -} diff --git a/src/json.h b/src/json.h deleted file mode 100644 index 9322901..0000000 --- a/src/json.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef DUCC_JSON_H -#define DUCC_JSON_H - -struct JsonBuilder; -typedef struct JsonBuilder JsonBuilder; - -JsonBuilder* jsonbuilder_new(); -const char* jsonbuilder_get_output(JsonBuilder* b); - -void jsonbuilder_object_start(JsonBuilder* b); -void jsonbuilder_object_end(JsonBuilder* b); -void jsonbuilder_object_member_start(JsonBuilder* b, const char* key); -void jsonbuilder_object_member_end(JsonBuilder* b); - -void jsonbuilder_array_start(JsonBuilder* b); -void jsonbuilder_array_end(JsonBuilder* b); -void jsonbuilder_array_element_start(JsonBuilder* b); -void jsonbuilder_array_element_end(JsonBuilder* b); - -void jsonbuilder_null(JsonBuilder* b); -void jsonbuilder_boolean(JsonBuilder* b, bool value); -void jsonbuilder_integer(JsonBuilder* b, int value); -void jsonbuilder_string(JsonBuilder* b, const char* value); - -#define JSON_DUMP(x, f) \ - do { \ - JsonBuilder* _builder = jsonbuilder_new(); \ - f(_builder, (x)); \ - fprintf(stderr, "%s\n", jsonbuilder_get_output(_builder)); \ - } while (0) - -#endif diff --git a/src/lib/common.c b/src/lib/common.c new file mode 100644 index 0000000..af35064 --- /dev/null +++ b/src/lib/common.c @@ -0,0 +1,88 @@ +#include "common.h" +#include +#include +#include +#include + +void fatal_error(const char* msg, ...) { + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + fprintf(stderr, "\n"); + exit(1); +} + +bool str_starts_with(const char* s, const char* prefix) { + size_t l1 = strlen(s); + size_t l2 = strlen(prefix); + if (l1 < l2) + return false; + return strncmp(s, prefix, l2) == 0; +} + +bool str_ends_with(const char* s, const char* suffix) { + size_t l1 = strlen(s); + size_t l2 = strlen(suffix); + if (l1 < l2) + return false; + return strcmp(s + l1 - l2, suffix) == 0; +} + +void strbuilder_init(StrBuilder* b) { + b->len = 0; + b->capacity = 16; + b->buf = calloc(b->capacity, sizeof(char)); +} + +// `size` must include a trailing null byte. +void strbuilder_reserve(StrBuilder* b, size_t size) { + if (size <= b->capacity) + return; + while (b->capacity < size) { + b->capacity *= 2; + } + b->buf = realloc(b->buf, b->capacity * sizeof(char)); + memset(b->buf + b->len, 0, (b->capacity - b->len) * sizeof(char)); +} + +void strbuilder_append_char(StrBuilder* b, int c) { + strbuilder_reserve(b, b->len + 1 + 1); + b->buf[b->len++] = c; +} + +void strbuilder_append_string(StrBuilder* b, const char* s) { + int len = strlen(s); + strbuilder_reserve(b, b->len + len + 1); + for (int i = 0; i < len; ++i) { + b->buf[b->len++] = s[i]; + } +} + +void strings_init(StrArray* strings) { + strings->len = 0; + strings->capacity = 32; + strings->data = calloc(strings->capacity, sizeof(const char*)); +} + +void strings_reserve(StrArray* strings, size_t size) { + if (size <= strings->capacity) + return; + while (strings->capacity < size) { + strings->capacity *= 2; + } + strings->data = realloc(strings->data, strings->capacity * sizeof(const char*)); + memset(strings->data + strings->len, 0, (strings->capacity - strings->len) * sizeof(const char*)); +} + +int strings_push(StrArray* strings, const char* str) { + strings_reserve(strings, strings->len + 1); + strings->data[strings->len] = str; + return ++strings->len; +} + +void strings_pop(StrArray* strings) { + if (strings->len > 0) { + strings->len--; + } +} diff --git a/src/lib/common.h b/src/lib/common.h new file mode 100644 index 0000000..bb94e79 --- /dev/null +++ b/src/lib/common.h @@ -0,0 +1,41 @@ +#ifndef DUCC_COMMON_H +#define DUCC_COMMON_H + +#include "ducc.h" + +_Noreturn void fatal_error(const char* msg, ...); + +// TODO +#ifdef __ducc__ +#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__) +#endif + +#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__) + +bool str_starts_with(const char* s, const char* prefix); +bool str_ends_with(const char* s, const char* suffix); + +typedef struct { + size_t len; + size_t capacity; + char* buf; +} StrBuilder; + +void strbuilder_init(StrBuilder* b); +// `size` must include a trailing null byte. +void strbuilder_reserve(StrBuilder* b, size_t size); +void strbuilder_append_char(StrBuilder* b, int c); +void strbuilder_append_string(StrBuilder* b, const char* s); + +typedef struct { + size_t len; + size_t capacity; + const char** data; +} StrArray; + +void strings_init(StrArray* strings); +void strings_reserve(StrArray* strings, size_t size); +int strings_push(StrArray* strings, const char* str); +void strings_pop(StrArray* strings); + +#endif diff --git a/src/lib/ducc.h b/src/lib/ducc.h new file mode 100644 index 0000000..31ee977 --- /dev/null +++ b/src/lib/ducc.h @@ -0,0 +1,11 @@ +#ifndef DUCC_DUCC_H +#define DUCC_DUCC_H + +#include +#include +#include +#include +#include +#include + +#endif diff --git a/src/lib/json.c b/src/lib/json.c new file mode 100644 index 0000000..0384516 --- /dev/null +++ b/src/lib/json.c @@ -0,0 +1,112 @@ +#include "json.h" +#include +#include +#include "common.h" + +struct JsonBuilder { + StrBuilder buf; + // not supported by ducc for now + // bool has_output_element[256]; + bool* has_output_element; + int depth; +}; + +JsonBuilder* jsonbuilder_new() { + JsonBuilder* b = calloc(1, sizeof(JsonBuilder)); + strbuilder_init(&b->buf); + b->has_output_element = calloc(256, sizeof(bool)); + return b; +} + +const char* jsonbuilder_get_output(JsonBuilder* b) { + return b->buf.buf; +} + +static void emit_comma_if_needed(JsonBuilder* b) { + if (b->has_output_element[b->depth - 1]) { + strbuilder_append_char(&b->buf, ','); + } +} + +void jsonbuilder_object_start(JsonBuilder* b) { + strbuilder_append_char(&b->buf, '{'); + b->has_output_element[b->depth++] = false; +} + +void jsonbuilder_object_end(JsonBuilder* b) { + b->depth--; + strbuilder_append_char(&b->buf, '}'); +} + +void jsonbuilder_object_member_start(JsonBuilder* b, const char* key) { + emit_comma_if_needed(b); + jsonbuilder_string(b, key); + strbuilder_append_char(&b->buf, ':'); +} + +void jsonbuilder_object_member_end(JsonBuilder* b) { + b->has_output_element[b->depth - 1] = true; +} + +void jsonbuilder_array_start(JsonBuilder* b) { + strbuilder_append_char(&b->buf, '['); + b->has_output_element[b->depth++] = false; +} + +void jsonbuilder_array_end(JsonBuilder* b) { + b->depth--; + strbuilder_append_char(&b->buf, ']'); +} + +void jsonbuilder_array_element_start(JsonBuilder* b) { + emit_comma_if_needed(b); +} + +void jsonbuilder_array_element_end(JsonBuilder* b) { + b->has_output_element[b->depth - 1] = true; +} + +void jsonbuilder_null(JsonBuilder* b) { + strbuilder_append_string(&b->buf, "null"); +} + +void jsonbuilder_boolean(JsonBuilder* b, bool value) { + strbuilder_append_string(&b->buf, value ? "true" : "false"); +} + +void jsonbuilder_integer(JsonBuilder* b, int value) { + char buf[32]; + sprintf(buf, "%d", value); + strbuilder_append_string(&b->buf, buf); +} + +void jsonbuilder_string(JsonBuilder* b, const char* value) { + if (!value) { + jsonbuilder_null(b); + return; + } + + strbuilder_append_char(&b->buf, '"'); + + for (const char* p = value; *p; p++) { + if (*p == '"') { + strbuilder_append_string(&b->buf, "\\\""); + } else if (*p == '\\') { + strbuilder_append_string(&b->buf, "\\\\"); + } else if (*p == '\b') { + strbuilder_append_string(&b->buf, "\\b"); + } else if (*p == '\f') { + strbuilder_append_string(&b->buf, "\\f"); + } else if (*p == '\n') { + strbuilder_append_string(&b->buf, "\\n"); + } else if (*p == '\r') { + strbuilder_append_string(&b->buf, "\\r"); + } else if (*p == '\t') { + strbuilder_append_string(&b->buf, "\\t"); + } else { + strbuilder_append_char(&b->buf, *p); + } + } + + strbuilder_append_char(&b->buf, '"'); +} diff --git a/src/lib/json.h b/src/lib/json.h new file mode 100644 index 0000000..9322901 --- /dev/null +++ b/src/lib/json.h @@ -0,0 +1,32 @@ +#ifndef DUCC_JSON_H +#define DUCC_JSON_H + +struct JsonBuilder; +typedef struct JsonBuilder JsonBuilder; + +JsonBuilder* jsonbuilder_new(); +const char* jsonbuilder_get_output(JsonBuilder* b); + +void jsonbuilder_object_start(JsonBuilder* b); +void jsonbuilder_object_end(JsonBuilder* b); +void jsonbuilder_object_member_start(JsonBuilder* b, const char* key); +void jsonbuilder_object_member_end(JsonBuilder* b); + +void jsonbuilder_array_start(JsonBuilder* b); +void jsonbuilder_array_end(JsonBuilder* b); +void jsonbuilder_array_element_start(JsonBuilder* b); +void jsonbuilder_array_element_end(JsonBuilder* b); + +void jsonbuilder_null(JsonBuilder* b); +void jsonbuilder_boolean(JsonBuilder* b, bool value); +void jsonbuilder_integer(JsonBuilder* b, int value); +void jsonbuilder_string(JsonBuilder* b, const char* value); + +#define JSON_DUMP(x, f) \ + do { \ + JsonBuilder* _builder = jsonbuilder_new(); \ + f(_builder, (x)); \ + fprintf(stderr, "%s\n", jsonbuilder_get_output(_builder)); \ + } while (0) + +#endif diff --git a/src/main.c b/src/main.c deleted file mode 100644 index 63422b5..0000000 --- a/src/main.c +++ /dev/null @@ -1,88 +0,0 @@ -#include "ast.h" -#include "cli.h" -#include "codegen.h" -#include "codegen_wasm.h" -#include "common.h" -#include "fs.h" -#include "io.h" -#include "parse.h" -#include "preprocess.h" -#include "tokenize.h" - -int main(int argc, char** argv) { - CliArgs* cli_args = parse_cli_args(argc, argv); - - if (cli_args->totally_deligate_to_gcc) { - return system(cli_args->gcc_command); - } - - InFile* source = infile_open(cli_args->input_filename); - - StrArray included_files; - strings_init(&included_files); - - TokenArray* pp_tokens = preprocess(source, &cli_args->defines, &cli_args->include_dirs, &included_files, - cli_args->generate_system_deps, cli_args->generate_user_deps); - - if (cli_args->preprocess_only) { - FILE* output_file = cli_args->output_filename ? fopen(cli_args->output_filename, "w") : stdout; - if (!output_file) { - fatal_error("Cannot open output file: %s", cli_args->output_filename); - } - print_token_to_file(output_file, pp_tokens); - return 0; - } - - concat_adjacent_string_literals(pp_tokens); - TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens); - Program* prog = parse(tokens); - - const char* assembly_filename; - if (cli_args->output_assembly) { - assembly_filename = cli_args->output_filename; - } else { - char* temp_filename = calloc(19, sizeof(char)); - temp_filename = strdup("/tmp/ducc-XXXXXX.s"); - mkstemps(temp_filename, strlen(".s")); - assembly_filename = temp_filename; - } - FILE* assembly_file = assembly_filename ? fopen(assembly_filename, "wb") : stdout; - if (cli_args->wasm) { - codegen_wasm(prog, assembly_file); - } else { - codegen(prog, cli_args->input_filename, assembly_file); - } - fclose(assembly_file); - - if (cli_args->wasm) { - return 0; - } - - if (!cli_args->output_assembly) { - char cmd_buf[256]; - if (cli_args->only_compile) { - sprintf(cmd_buf, "gcc -c -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); - } else { - sprintf(cmd_buf, "gcc -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); - } - int result = system(cmd_buf); - if (result != 0) { - fatal_error("gcc failed: %d", result); - } - } - - if ((cli_args->generate_system_deps || cli_args->generate_user_deps) && cli_args->only_compile && - cli_args->output_filename) { - const char* dep_filename = replace_extension(cli_args->output_filename, ".d"); - - FILE* dep_file = fopen(dep_filename, "w"); - if (!dep_file) { - fatal_error("Cannot open dependency file: %s", dep_filename); - } - fprintf(dep_file, "%s:", cli_args->output_filename); - for (size_t i = 0; i < included_files.len; ++i) { - fprintf(dep_file, " \\\n %s", included_files.data[i]); - } - fprintf(dep_file, "\n"); - } -} diff --git a/src/parse.c b/src/parse.c deleted file mode 100644 index b95bc1e..0000000 --- a/src/parse.c +++ /dev/null @@ -1,3018 +0,0 @@ -#include "parse.h" -#include "common.h" -#include "tokenize.h" - -typedef struct { - const char* name; - Type* ty; - int stack_offset; -} LocalVar; - -typedef struct { - size_t len; - size_t capacity; - LocalVar* data; -} LocalVarArray; - -static void lvars_init(LocalVarArray* lvars) { - lvars->len = 0; - lvars->capacity = 4; - lvars->data = calloc(lvars->capacity, sizeof(LocalVar)); -} - -static void lvars_reserve(LocalVarArray* lvars, size_t size) { - if (size <= lvars->capacity) - return; - while (lvars->capacity < size) { - lvars->capacity *= 2; - } - lvars->data = realloc(lvars->data, lvars->capacity * sizeof(LocalVar)); - memset(lvars->data + lvars->len, 0, (lvars->capacity - lvars->len) * sizeof(LocalVar)); -} - -static LocalVar* lvars_push_new(LocalVarArray* lvars) { - lvars_reserve(lvars, lvars->len + 1); - return &lvars->data[lvars->len++]; -} - -typedef struct { - const char* name; - int index; -} ScopedSymbol; - -typedef struct { - size_t len; - size_t capacity; - ScopedSymbol* data; -} ScopedSymbolArray; - -static void scopedsymbols_init(ScopedSymbolArray* syms) { - syms->len = 0; - syms->capacity = 4; - syms->data = calloc(syms->capacity, sizeof(ScopedSymbol)); -} - -static void scopedsymbols_reserve(ScopedSymbolArray* syms, size_t size) { - if (size <= syms->capacity) - return; - while (syms->capacity < size) { - syms->capacity *= 2; - } - syms->data = realloc(syms->data, syms->capacity * sizeof(ScopedSymbol)); - memset(syms->data + syms->len, 0, (syms->capacity - syms->len) * sizeof(ScopedSymbol)); -} - -static ScopedSymbol* scopedsymbols_push_new(ScopedSymbolArray* syms) { - scopedsymbols_reserve(syms, syms->len + 1); - return &syms->data[syms->len++]; -} - -typedef struct Scope { - struct Scope* outer; - ScopedSymbolArray syms; -} Scope; - -typedef struct { - const char* name; - Type* ty; -} GlobalVar; - -typedef struct { - size_t len; - size_t capacity; - GlobalVar* data; -} GlobalVarArray; - -static void gvars_init(GlobalVarArray* gvars) { - gvars->len = 0; - gvars->capacity = 4; - gvars->data = calloc(gvars->capacity, sizeof(GlobalVar)); -} - -static void gvars_reserve(GlobalVarArray* gvars, size_t size) { - if (size <= gvars->capacity) - return; - while (gvars->capacity < size) { - gvars->capacity *= 2; - } - gvars->data = realloc(gvars->data, gvars->capacity * sizeof(GlobalVar)); - memset(gvars->data + gvars->len, 0, (gvars->capacity - gvars->len) * sizeof(GlobalVar)); -} - -static GlobalVar* gvars_push_new(GlobalVarArray* gvars) { - gvars_reserve(gvars, gvars->len + 1); - return &gvars->data[gvars->len++]; -} - -typedef struct { - const char* name; - Type* ty; -} Func; - -typedef struct { - size_t len; - size_t capacity; - Func* data; -} FuncArray; - -static void funcs_init(FuncArray* funcs) { - funcs->len = 0; - funcs->capacity = 32; - funcs->data = calloc(funcs->capacity, sizeof(Func)); -} - -static void funcs_reserve(FuncArray* funcs, size_t size) { - if (size <= funcs->capacity) - return; - while (funcs->capacity < size) { - funcs->capacity *= 2; - } - funcs->data = realloc(funcs->data, funcs->capacity * sizeof(Func)); - memset(funcs->data + funcs->len, 0, (funcs->capacity - funcs->len) * sizeof(Func)); -} - -static Func* funcs_push_new(FuncArray* funcs) { - funcs_reserve(funcs, funcs->len + 1); - return &funcs->data[funcs->len++]; -} - -typedef struct { - TokenArray* tokens; - int pos; - LocalVarArray lvars; - Scope* scope; - GlobalVarArray gvars; - FuncArray funcs; - AstNode* structs; - AstNode* unions; - AstNode* enums; - AstNode* typedefs; - StrArray str_literals; - int anonymous_user_type_counter; - AstNode* current_switch; -} Parser; - -static Parser* parser_new(TokenArray* tokens) { - Parser* p = calloc(1, sizeof(Parser)); - p->tokens = tokens; - gvars_init(&p->gvars); - funcs_init(&p->funcs); - p->structs = ast_new_list(4); - p->unions = ast_new_list(4); - p->enums = ast_new_list(4); - p->typedefs = ast_new_list(16); - strings_init(&p->str_literals); - - Func* va_start_func = funcs_push_new(&p->funcs); - va_start_func->name = "__ducc_va_start"; - va_start_func->ty = type_new_func(type_new(TypeKind_void), NULL); - - Func* va_arg_func = funcs_push_new(&p->funcs); - va_arg_func->name = "__ducc_va_arg"; - va_arg_func->ty = type_new_func(type_new_ptr(type_new(TypeKind_void)), NULL); - - return p; -} - -static Token* peek_token(Parser* p) { - return &p->tokens->data[p->pos]; -} - -static Token* peek_token2(Parser* p) { - return &p->tokens->data[p->pos + 1]; -} - -static SourceLocation current_location(Parser* p) { - return peek_token(p)->loc; -} - -static Token* next_token(Parser* p) { - return &p->tokens->data[p->pos++]; -} - -static Token* consume_token_if(Parser* p, TokenKind expected) { - if (peek_token(p)->kind == expected) { - return next_token(p); - } else { - return NULL; - } -} - -static bool eof(Parser* p) { - return peek_token(p)->kind != TokenKind_eof; -} - -static Token* expect(Parser* p, TokenKind expected) { - Token* t = next_token(p); - if (t->kind == expected) { - return t; - } - fatal_error("%s:%d: expected '%s', but got '%s'", t->loc.filename, t->loc.line, token_kind_stringify(expected), - token_stringify(t)); -} - -static int find_lvar_in_scope(Parser*, Scope* scope, const char* name) { - for (size_t i = 0; i < scope->syms.len; ++i) { - ScopedSymbol* sym = &scope->syms.data[i]; - if (sym->name && strcmp(sym->name, name) == 0) { - return sym->index; - } - } - return -1; -} - -static int find_lvar_in_current_scope(Parser* p, const char* name) { - return find_lvar_in_scope(p, p->scope, name); -} - -static int find_lvar(Parser* p, const char* name) { - Scope* scope = p->scope; - while (scope) { - int idx = find_lvar_in_scope(p, scope, name); - if (idx != -1) - return idx; - scope = scope->outer; - } - return -1; -} - -static int calc_lvar_stack_offset(Parser* p, Type* ty) { - int last_offset = 0; - for (int i = p->lvars.len - 1; i >= 0; i--) { - int offset = p->lvars.data[i].stack_offset; - // Skip a passed-by-stack parameter. - if (offset <= 0) - continue; - last_offset = offset; - break; - } - return to_aligned(last_offset + type_sizeof(ty), type_alignof(ty)); -} - -static int add_lvar(Parser* p, const char* name, Type* ty, int stack_offset) { - LocalVar* lvar = lvars_push_new(&p->lvars); - lvar->name = name; - lvar->ty = ty; - lvar->stack_offset = stack_offset; - ScopedSymbol* sym = scopedsymbols_push_new(&p->scope->syms); - sym->name = name; - sym->index = p->lvars.len - 1; - return stack_offset; -} - -static AstNode* generate_temporary_lvar(Parser* p, Type* ty) { - int stack_offset = add_lvar(p, NULL, ty, calc_lvar_stack_offset(p, ty)); - return ast_new_lvar(NULL, stack_offset, ty); -} - -static int find_gvar(Parser* p, const char* name) { - for (size_t i = 0; i < p->gvars.len; ++i) { - if (strcmp(p->gvars.data[i].name, name) == 0) { - return i; - } - } - return -1; -} - -static int find_func(Parser* p, const char* name) { - for (size_t i = 0; i < p->funcs.len; ++i) { - if (strcmp(p->funcs.data[i].name, name) == 0) { - return i; - } - } - return -1; -} - -static int find_struct(Parser* p, const char* name) { - for (int i = 0; i < p->structs->as.list->len; ++i) { - if (strcmp(p->structs->as.list->items[i].as.struct_def->name, name) == 0) { - return i; - } - } - return -1; -} - -static int find_union(Parser* p, const char* name) { - for (int i = 0; i < p->unions->as.list->len; ++i) { - if (strcmp(p->unions->as.list->items[i].as.union_def->name, name) == 0) { - return i; - } - } - return -1; -} - -static int find_enum(Parser* p, const char* name) { - for (int i = 0; i < p->enums->as.list->len; ++i) { - if (strcmp(p->enums->as.list->items[i].as.enum_def->name, name) == 0) { - return i; - } - } - return -1; -} - -static int find_enum_member(Parser* p, const char* name) { - for (int i = 0; i < p->enums->as.list->len; ++i) { - AstNode* members = p->enums->as.list->items[i].as.enum_def->members; - if (!members) - continue; - for (int j = 0; j < members->as.list->len; ++j) { - if (strcmp(members->as.list->items[j].as.enum_member->name, name) == 0) { - return i * 1000 + j; - } - } - } - return -1; -} - -static int find_typedef(Parser* p, const char* name) { - for (int i = 0; i < p->typedefs->as.list->len; ++i) { - if (strcmp(p->typedefs->as.list->items[i].as.typedef_decl->name, name) == 0) { - return i; - } - } - return -1; -} - -static void enter_scope(Parser* p) { - Scope* outer_scope = p->scope; - p->scope = calloc(1, sizeof(Scope)); - p->scope->outer = outer_scope; - scopedsymbols_init(&p->scope->syms); -} - -static void leave_scope(Parser* p) { - p->scope = p->scope->outer; -} - -static void enter_func(Parser* p) { - lvars_init(&p->lvars); - enter_scope(p); -} - -static void leave_func(Parser* p) { - leave_scope(p); -} - -static Token* parse_ident(Parser*); -static AstNode* parse_primary_expr(Parser*); -static AstNode* parse_postfix_expr(Parser*); -static AstNode* parse_argument_expr_list(Parser*); -static AstNode* parse_unary_expr(Parser*); -static AstNode* parse_cast_expr(Parser*); -static AstNode* parse_multiplicative_expr(Parser*); -static AstNode* parse_additive_expr(Parser*); -static AstNode* parse_shift_expr(Parser*); -static AstNode* parse_relational_expr(Parser*); -static AstNode* parse_equality_expr(Parser*); -static AstNode* parse_bitwise_and_expr(Parser*); -static AstNode* parse_bitwise_xor_expr(Parser*); -static AstNode* parse_bitwise_or_expr(Parser*); -static AstNode* parse_logical_and_expr(Parser*); -static AstNode* parse_logical_or_expr(Parser*); -static AstNode* parse_conditional_expr(Parser*); -static AstNode* parse_assignment_expr(Parser*); -static AstNode* parse_expr(Parser*); -static AstNode* parse_constant_expr(Parser*); -static Type* parse_pointer_opt(Parser*, Type*); -static void parse_type_qualifier_list_opt(Parser*); -static AstNode* parse_declarator_or_abstract_declarator_opt(Parser*, Type*); -static AstNode* parse_parameter_type_list(Parser*); -static AstNode* parse_parameter_list(Parser*); -static AstNode* parse_parameter_declaration(Parser*); -static Type* parse_array_declarator_suffix(Parser*, Type*); -static Type* parse_function_declarator_suffix(Parser*, Type*); -static AstNode* parse_direct_declarator(Parser*, Type*); -static AstNode* parse_declarator(Parser*, Type*); -static AstNode* parse_init_declarator_list(Parser*, Type*); -static AstNode* parse_declaration(Parser*); -static AstNode* parse_function_definition(Parser*, AstNode*); -static Type* parse_declaration_specifiers(Parser*); -static AstNode* parse_init_declarator(Parser*, Type*); -static Type* parse_struct_specifier(Parser*); -static Type* parse_union_specifier(Parser*); -static AstNode* parse_member_declaration_list(Parser*); -static AstNode* parse_member_declaration(Parser*); -static Type* parse_specifier_qualifier_list(Parser*); -static AstNode* parse_member_declarator_list(Parser*, Type*); -static AstNode* parse_member_declarator(Parser*, Type*); -static Type* parse_enum_specifier(Parser*); -static void parse_enum_members(Parser*, int); -static AstNode* parse_enum_member(Parser*, int); -static Type* parse_type_name(Parser*); -static Type* parse_abstract_declarator_opt(Parser*, Type*); -static AstNode* parse_braced_initializer(Parser*); -static AstNode* parse_initializer(Parser*); -static AstNode* parse_attribute_specifier_sequence_opt(Parser*); -static AstNode* parse_attribute_specifier_opt(Parser*); -static AstNode* parse_attribute_list(Parser*); -static AstNode* parse_attribute(Parser*); -static AstNode* parse_attribute_argument_clause(Parser*); -static AstNode* parse_balanced_token_sequence(Parser*); -static AstNode* parse_stmt(Parser*); -static AstNode* parse_unlabaled_stmt(Parser*); -static AstNode* parse_compound_stmt(Parser*); -static AstNode* parse_block_item(Parser*); -static AstNode* parse_expr_stmt(Parser*); -static AstNode* parse_if_stmt(Parser*); -static AstNode* parse_switch_stmt(Parser*); -static AstNode* parse_while_stmt(Parser*); -static AstNode* parse_do_while_stmt(Parser*); -static AstNode* parse_for_stmt(Parser*); -static AstNode* parse_goto_stmt(Parser*); -static AstNode* parse_continue_stmt(Parser*); -static AstNode* parse_break_stmt(Parser*); -static AstNode* parse_return_stmt(Parser*); -static AstNode* parse_static_assert_declaration(Parser*); -static AstNode* parse_external_declaration(Parser*); -static int eval(AstNode*); -static void process_declarations(Parser*, AstNode*); - -static Token* parse_ident(Parser* p) { - return expect(p, TokenKind_ident); -} - -static int register_str_literal(Parser* p, const char* s) { - return strings_push(&p->str_literals, s); -} - -static void register_params(Parser* p, AstNode* params) { - int gp_regs = 6; - int pass_by_reg_offset = 8; - int pass_by_stack_offset = -16; - for (int i = 0; i < params->as.list->len; ++i) { - AstNode* param = ¶ms->as.list->items[i]; - int ty_size = type_sizeof(param->ty); - int required_gp_regs; - if (ty_size <= 8) { - required_gp_regs = 1; - } else if (ty_size <= 16) { - required_gp_regs = 2; - } else { - required_gp_regs = 0; - } - if (required_gp_regs <= gp_regs) { - gp_regs -= required_gp_regs; - } else { - required_gp_regs = 0; - } - int stack_offset; - if (required_gp_regs == 0) { - stack_offset = pass_by_stack_offset; - pass_by_stack_offset -= to_aligned(ty_size, 8); - } else { - stack_offset = pass_by_reg_offset; - pass_by_reg_offset += to_aligned(ty_size, 8); - } - const char* name = param->as.declarator->name; - param->kind = AstNodeKind_param; - param->as.param = calloc(1, sizeof(ParamNode)); - param->as.param->name = name; - param->as.param->stack_offset = stack_offset; - add_lvar(p, name, param->ty, stack_offset); - } -} - -static void register_func(Parser* p, const char* name, Type* ty) { - Func* func = funcs_push_new(&p->funcs); - func->name = name; - func->ty = ty; -} - -typedef enum { - TypeSpecifierMask_void = 1 << 0, - TypeSpecifierMask_char = 1 << 1, - TypeSpecifierMask_short = 1 << 2, - TypeSpecifierMask_int = 1 << 3, - TypeSpecifierMask_long = 1 << 4, - // 1 << 5 is used for second 'long'. - TypeSpecifierMask_float = 1 << 6, - TypeSpecifierMask_double = 1 << 7, - TypeSpecifierMask_signed = 1 << 8, - TypeSpecifierMask_unsigned = 1 << 9, - TypeSpecifierMask__BitInt = 1 << 10, - TypeSpecifierMask_bool = 1 << 11, - TypeSpecifierMask__Complex = 1 << 12, - TypeSpecifierMask__Decimal32 = 1 << 13, - TypeSpecifierMask__Decimal64 = 1 << 14, - TypeSpecifierMask__Decimal128 = 1 << 15, - TypeSpecifierMask__Atomic = 1 << 16, - TypeSpecifierMask_struct = 1 << 17, - TypeSpecifierMask_union = 1 << 18, - TypeSpecifierMask_enum = 1 << 19, - TypeSpecifierMask_typeof = 1 << 20, - TypeSpecifierMask_typeof_unqual = 1 << 21, - TypeSpecifierMask_typedef_name = 1 << 22, -} TypeSpecifierMask; - -static Type* distinguish_type_from_type_specifiers(int type_specifiers) { - if (type_specifiers == TypeSpecifierMask_void) - return type_new(TypeKind_void); - else if (type_specifiers == TypeSpecifierMask_char) - return type_new(TypeKind_char); - else if (type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_char)) - return type_new(TypeKind_schar); - else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_char)) - return type_new(TypeKind_uchar); - else if (type_specifiers == TypeSpecifierMask_short || - type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short) || - type_specifiers == (TypeSpecifierMask_short + TypeSpecifierMask_int) || - type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_short + TypeSpecifierMask_int)) - return type_new(TypeKind_short); - else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short) || - type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_short + TypeSpecifierMask_int)) - return type_new(TypeKind_ushort); - else if (type_specifiers == TypeSpecifierMask_int || type_specifiers == TypeSpecifierMask_signed || - type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_int)) - return type_new(TypeKind_int); - else if (type_specifiers == TypeSpecifierMask_unsigned || - type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_int)) - return type_new(TypeKind_uint); - else if (type_specifiers == TypeSpecifierMask_long || - type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long) || - type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_int) || - type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_int)) - return type_new(TypeKind_long); - else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long) || - type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_int)) - return type_new(TypeKind_ulong); - else if (type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long) || - type_specifiers == (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long) || - type_specifiers == (TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int) || - type_specifiers == - (TypeSpecifierMask_signed + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int)) - return type_new(TypeKind_llong); - else if (type_specifiers == (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long) || - type_specifiers == - (TypeSpecifierMask_unsigned + TypeSpecifierMask_long + TypeSpecifierMask_long + TypeSpecifierMask_int)) - return type_new(TypeKind_ullong); - else if (type_specifiers == TypeSpecifierMask_float) - return type_new(TypeKind_float); - else if (type_specifiers == TypeSpecifierMask_double) - return type_new(TypeKind_double); - else if (type_specifiers == TypeSpecifierMask_long + TypeSpecifierMask_double) - return type_new(TypeKind_ldouble); - else if (type_specifiers == TypeSpecifierMask_bool) - return type_new(TypeKind_bool); - else if (type_specifiers == TypeSpecifierMask_struct) - return NULL; - else if (type_specifiers == TypeSpecifierMask_union) - return NULL; - else if (type_specifiers == TypeSpecifierMask_enum) - return NULL; - else if (type_specifiers == TypeSpecifierMask_typedef_name) - return NULL; - else if (type_specifiers == 0) - return NULL; - else - unimplemented(); -} - -// e++ -// tmp1 = &e; tmp2 = *tmp1; *tmp1 += 1; tmp2 -// e-- -// tmp1 = &e; tmp2 = *tmp1; *tmp1 -= 1; tmp2 -static AstNode* create_new_postfix_inc_or_dec(Parser* p, AstNode* e, TokenKind op) { - AstNode* tmp1_lvar = generate_temporary_lvar(p, type_new_ptr(e->ty)); - AstNode* tmp2_lvar = generate_temporary_lvar(p, e->ty); - - AstNode* expr1 = ast_new_assign_expr(TokenKind_assign, tmp1_lvar, ast_new_ref_expr(e)); - AstNode* expr2 = ast_new_assign_expr(TokenKind_assign, tmp2_lvar, ast_new_deref_expr(tmp1_lvar)); - AstNode* expr3; - if (op == TokenKind_plusplus) { - expr3 = ast_new_assign_add_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); - } else { - expr3 = ast_new_assign_sub_expr(ast_new_deref_expr(tmp1_lvar), ast_new_int(1)); - } - AstNode* expr4 = tmp2_lvar; - - AstNode* ret = ast_new_list(4); - ast_append(ret, expr1); - ast_append(ret, expr2); - ast_append(ret, expr3); - ast_append(ret, expr4); - ret->ty = expr4->ty; - return ret; -} - -static bool is_typedef_name(Parser* p, Token* tok) { - return tok->kind == TokenKind_ident && find_typedef(p, tok->value.string) != -1; -} - -static bool is_type_token(Parser* p, Token* tok) { - if (tok->kind == TokenKind_keyword_void || tok->kind == TokenKind_keyword_char || - tok->kind == TokenKind_keyword_short || tok->kind == TokenKind_keyword_int || - tok->kind == TokenKind_keyword_long || tok->kind == TokenKind_keyword_float || - tok->kind == TokenKind_keyword_double || tok->kind == TokenKind_keyword_signed || - tok->kind == TokenKind_keyword_unsigned || tok->kind == TokenKind_keyword__BitInt || - tok->kind == TokenKind_keyword_bool || tok->kind == TokenKind_keyword__Complex || - tok->kind == TokenKind_keyword__Decimal32 || tok->kind == TokenKind_keyword__Decimal64 || - tok->kind == TokenKind_keyword__Decimal128 || tok->kind == TokenKind_keyword__Atomic || - tok->kind == TokenKind_keyword_struct || tok->kind == TokenKind_keyword_union || - tok->kind == TokenKind_keyword_enum || tok->kind == TokenKind_keyword_typeof || - tok->kind == TokenKind_keyword_typeof_unqual || tok->kind == TokenKind_keyword_const || - tok->kind == TokenKind_keyword_restrict || tok->kind == TokenKind_keyword_volatile || - tok->kind == TokenKind_keyword__Atomic || tok->kind == TokenKind_keyword_alignas || - tok->kind == TokenKind_keyword_inline || tok->kind == TokenKind_keyword__Noreturn || - tok->kind == TokenKind_keyword_static) { - return true; - } - if (tok->kind != TokenKind_ident) { - return false; - } - return find_typedef(p, tok->value.string) != -1; -} - -// primary-expr: -// identifier -// constant -// string-literal -// '(' expr ')' -// TODO generic-selection -static AstNode* parse_primary_expr(Parser* p) { - Token* t = next_token(p); - if (t->kind == TokenKind_literal_int) { - return ast_new_int(t->value.integer); - } else if (t->kind == TokenKind_literal_double) { - return ast_new_double(t->value.floating); - } else if (t->kind == TokenKind_keyword_true) { - return ast_new_int(1); - } else if (t->kind == TokenKind_keyword_false) { - return ast_new_int(0); - } else if (t->kind == TokenKind_literal_str) { - return ast_new_str_expr(register_str_literal(p, t->value.string), - type_new_static_string(strlen(t->value.string))); - } else if (t->kind == TokenKind_paren_l) { - AstNode* e = parse_expr(p); - expect(p, TokenKind_paren_r); - return e; - } else if (t->kind == TokenKind_ident) { - const char* name = t->value.string; - - int lvar_idx = find_lvar(p, name); - if (lvar_idx == -1) { - int gvar_idx = find_gvar(p, name); - if (gvar_idx == -1) { - int enum_member_idx = find_enum_member(p, name); - if (enum_member_idx == -1) { - int func_idx = find_func(p, name); - if (func_idx == -1) { - fatal_error("%s:%d: undefined variable: %s", t->loc.filename, t->loc.line, name); - } - return ast_new_func(name, p->funcs.data[func_idx].ty); - } - int enum_idx = enum_member_idx / 1000; - int n = enum_member_idx % 1000; - AstNode* e = ast_new_int( - p->enums->as.list->items[enum_idx].as.enum_def->members->as.list->items[n].as.enum_member->value); - e->ty = type_new(TypeKind_enum); - e->ty->ref.defs = p->enums; - e->ty->ref.index = enum_idx; - return e; - } - return ast_new_gvar(name, p->gvars.data[gvar_idx].ty); - } - - return ast_new_lvar(name, p->lvars.data[lvar_idx].stack_offset, p->lvars.data[lvar_idx].ty); - } else { - fatal_error("%s:%d: expected an expression, but got '%s'", t->loc.filename, t->loc.line, token_stringify(t)); - } -} - -// postfix-expr: -// postfix-expr-stem { postfix-expr-postfix }* -// -// postfix-expr-stem: -// primary-expr -// TODO compound-literal -// -// postfix-expr-postfix: -// '[' expr ']' -// '(' argument-expr-list? ')' -// '.' identifier -// '->' identifier -// '++' -// '--' -static AstNode* parse_postfix_expr(Parser* p) { - AstNode* ret = parse_primary_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_paren_l)) { - AstNode* args = parse_argument_expr_list(p); - expect(p, TokenKind_paren_r); - ret = ast_new_func_call(ret, args); - Type* func_type = ret->as.func_call->func->ty; - if (func_type->kind == TypeKind_ptr) { - func_type = func_type->base; - } - ret->ty = func_type->result; - } else if (consume_token_if(p, TokenKind_bracket_l)) { - AstNode* idx = parse_expr(p); - expect(p, TokenKind_bracket_r); - idx = ast_new_binary_expr(TokenKind_star, idx, ast_new_int(type_sizeof(ret->ty->base))); - ret = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, ret, idx)); - } else if (consume_token_if(p, TokenKind_dot)) { - const Token* name = parse_ident(p); - ret = ast_new_member_access_expr(ast_new_ref_expr(ret), name->value.string); - } else if (consume_token_if(p, TokenKind_arrow)) { - const Token* name = parse_ident(p); - ret = ast_new_member_access_expr(ret, name->value.string); - } else if (consume_token_if(p, TokenKind_plusplus)) { - ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_plusplus); - } else if (consume_token_if(p, TokenKind_minusminus)) { - ret = create_new_postfix_inc_or_dec(p, ret, TokenKind_minusminus); - } else { - break; - } - } - return ret; -} - -// argument-expr-list: -// { assignment-expr | ',' }+ -static AstNode* parse_argument_expr_list(Parser* p) { - AstNode* list = ast_new_list(6); - while (peek_token(p)->kind != TokenKind_paren_r) { - AstNode* arg = parse_assignment_expr(p); - ast_append(list, arg); - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - } - return list; -} - -// unary-expr: -// postfix-expr -// '++' unary-expr -// '--' unary-expr -// unary-operator cast-expr -// 'sizeof' unary-expr -// 'sizeof' '(' type-name ')' -// TODO 'alignof' '(' type-name ')' -static AstNode* parse_unary_expr(Parser* p) { - TokenKind op = peek_token(p)->kind; - if (consume_token_if(p, TokenKind_plus)) { - return parse_cast_expr(p); - } else if (consume_token_if(p, TokenKind_minus)) { - AstNode* operand = parse_cast_expr(p); - return ast_new_binary_expr(op, ast_new_int(0), operand); - } else if (consume_token_if(p, TokenKind_not)) { - AstNode* operand = parse_cast_expr(p); - return ast_new_unary_expr(op, operand); - } else if (consume_token_if(p, TokenKind_tilde)) { - AstNode* operand = parse_cast_expr(p); - return ast_new_unary_expr(op, operand); - } else if (consume_token_if(p, TokenKind_and)) { - AstNode* operand = parse_cast_expr(p); - return ast_new_ref_expr(operand); - } else if (consume_token_if(p, TokenKind_star)) { - AstNode* operand = parse_cast_expr(p); - if (operand->ty->kind == TypeKind_func) { - // dereference operator against function pointers are no-op. - return operand; - } else { - return ast_new_deref_expr(operand); - } - } else if (consume_token_if(p, TokenKind_plusplus)) { - AstNode* operand = parse_unary_expr(p); - return ast_new_assign_add_expr(operand, ast_new_int(1)); - } else if (consume_token_if(p, TokenKind_minusminus)) { - AstNode* operand = parse_unary_expr(p); - return ast_new_assign_sub_expr(operand, ast_new_int(1)); - } else if (consume_token_if(p, TokenKind_keyword_sizeof)) { - if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) { - next_token(p); - Type* ty = parse_type_name(p); - expect(p, TokenKind_paren_r); - return ast_new_int(type_sizeof(ty)); - } else { - AstNode* expr = parse_unary_expr(p); - return ast_new_int(type_sizeof(expr->ty)); - } - } - return parse_postfix_expr(p); -} - -// cast-expr: -// unary-expr -// '(' type-name ')' cast-expr -static AstNode* parse_cast_expr(Parser* p) { - if (peek_token(p)->kind == TokenKind_paren_l && is_type_token(p, peek_token2(p))) { - next_token(p); - Type* ty = parse_type_name(p); - expect(p, TokenKind_paren_r); - - // TODO: check whether the original type can be casted to the result type. - AstNode* e = parse_cast_expr(p); - return ast_new_cast_expr(e, ty); - } - return parse_unary_expr(p); -} - -// multiplicative-expr: -// cast-expr { ( '*' / '/' / '%' ) cast-expr }* -static AstNode* parse_multiplicative_expr(Parser* p) { - AstNode* lhs = parse_cast_expr(p); - while (1) { - TokenKind op = peek_token(p)->kind; - if (op == TokenKind_star || op == TokenKind_slash || op == TokenKind_percent) { - next_token(p); - AstNode* rhs = parse_cast_expr(p); - lhs = ast_new_binary_expr(op, lhs, rhs); - } else { - break; - } - } - return lhs; -} - -// additive-expr: -// multiplicative-expr { ( '+' / '-' ) multiplicative-expr }* -static AstNode* parse_additive_expr(Parser* p) { - AstNode* lhs = parse_multiplicative_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_plus)) { - AstNode* rhs = parse_multiplicative_expr(p); - if (lhs->ty->base) { - lhs = ast_new_binary_expr( - TokenKind_plus, lhs, - ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); - } else if (rhs->ty->base) { - lhs = ast_new_binary_expr( - TokenKind_plus, ast_new_binary_expr(TokenKind_star, lhs, ast_new_int(type_sizeof(rhs->ty->base))), - rhs); - } else { - lhs = ast_new_binary_expr(TokenKind_plus, lhs, rhs); - } - } else if (consume_token_if(p, TokenKind_minus)) { - AstNode* rhs = parse_multiplicative_expr(p); - if (lhs->ty->base) { - if (rhs->ty->base) { - // (a - b) / sizeof(a) - lhs = ast_new_binary_expr(TokenKind_slash, ast_new_binary_expr(TokenKind_minus, lhs, rhs), - ast_new_int(type_sizeof(lhs->ty->base))); - } else { - // a - b*sizeof(a) - lhs = ast_new_binary_expr( - TokenKind_minus, lhs, - ast_new_binary_expr(TokenKind_star, rhs, ast_new_int(type_sizeof(lhs->ty->base)))); - } - } else { - lhs = ast_new_binary_expr(TokenKind_minus, lhs, rhs); - } - } else { - break; - } - } - return lhs; -} - -// shift-expr: -// additive-expr { ( '<<' / '>>' ) additive-expr }* -static AstNode* parse_shift_expr(Parser* p) { - AstNode* lhs = parse_additive_expr(p); - while (1) { - TokenKind op = peek_token(p)->kind; - if (op == TokenKind_lshift || op == TokenKind_rshift) { - next_token(p); - AstNode* rhs = parse_additive_expr(p); - lhs = ast_new_binary_expr(op, lhs, rhs); - } else { - break; - } - } - return lhs; -} - -// relational-expr: -// shift-expr { ( '<' / '>' / '<=' / '>=' ) shift-expr }* -static AstNode* parse_relational_expr(Parser* p) { - AstNode* lhs = parse_shift_expr(p); - while (1) { - TokenKind op = peek_token(p)->kind; - if (op == TokenKind_lt || op == TokenKind_le) { - next_token(p); - AstNode* rhs = parse_shift_expr(p); - lhs = ast_new_binary_expr(op, lhs, rhs); - } else if (consume_token_if(p, TokenKind_gt)) { - AstNode* rhs = parse_shift_expr(p); - lhs = ast_new_binary_expr(TokenKind_lt, rhs, lhs); - } else if (consume_token_if(p, TokenKind_ge)) { - AstNode* rhs = parse_shift_expr(p); - lhs = ast_new_binary_expr(TokenKind_le, rhs, lhs); - } else { - break; - } - } - return lhs; -} - -// equality-expr: -// relational-expr { ( '==' / '!=' ) relational-expr }* -static AstNode* parse_equality_expr(Parser* p) { - AstNode* lhs = parse_relational_expr(p); - while (1) { - TokenKind op = peek_token(p)->kind; - if (op == TokenKind_eq || op == TokenKind_ne) { - next_token(p); - AstNode* rhs = parse_relational_expr(p); - lhs = ast_new_binary_expr(op, lhs, rhs); - } else { - break; - } - } - return lhs; -} - -// bitwise-and-expr: -// equality-expr { '&' equality-expr }* -static AstNode* parse_bitwise_and_expr(Parser* p) { - AstNode* lhs = parse_equality_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_and)) { - AstNode* rhs = parse_equality_expr(p); - lhs = ast_new_binary_expr(TokenKind_and, lhs, rhs); - lhs->ty = type_new(TypeKind_int); - } else { - break; - } - } - return lhs; -} - -// bitwise-xor-expr: -// bitwise-and-expr { '^' bitwise-and-expr }* -static AstNode* parse_bitwise_xor_expr(Parser* p) { - AstNode* lhs = parse_bitwise_and_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_xor)) { - AstNode* rhs = parse_bitwise_and_expr(p); - lhs = ast_new_binary_expr(TokenKind_xor, lhs, rhs); - lhs->ty = type_new(TypeKind_int); - } else { - break; - } - } - return lhs; -} - -// bitwise-or-expr: -// bitwise-xor-expr { '|' bitwise-xor-expr }* -static AstNode* parse_bitwise_or_expr(Parser* p) { - AstNode* lhs = parse_bitwise_xor_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_or)) { - AstNode* rhs = parse_bitwise_xor_expr(p); - lhs = ast_new_binary_expr(TokenKind_or, lhs, rhs); - lhs->ty = type_new(TypeKind_int); - } else { - break; - } - } - return lhs; -} - -// logical-and-expr: -// bitwise-or-expr { '&&' bitwise-or-expr }* -static AstNode* parse_logical_and_expr(Parser* p) { - AstNode* lhs = parse_bitwise_or_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_andand)) { - AstNode* rhs = parse_bitwise_or_expr(p); - lhs = ast_new_logical_expr(TokenKind_andand, lhs, rhs); - } else { - break; - } - } - return lhs; -} - -// logical-or-expr: -// logical-and-expr { '||' logical-and-expr }* -static AstNode* parse_logical_or_expr(Parser* p) { - AstNode* lhs = parse_logical_and_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_oror)) { - AstNode* rhs = parse_logical_and_expr(p); - lhs = ast_new_logical_expr(TokenKind_oror, lhs, rhs); - } else { - break; - } - } - return lhs; -} - -// conditional-expr: -// logical-or-expr ( '?' expr ':' conditional-expr )? -static AstNode* parse_conditional_expr(Parser* p) { - AstNode* e = parse_logical_or_expr(p); - if (consume_token_if(p, TokenKind_question)) { - AstNode* then_expr = parse_expr(p); - expect(p, TokenKind_colon); - AstNode* else_expr = parse_assignment_expr(p); - return ast_new_cond_expr(e, then_expr, else_expr); - } else { - return e; - } -} - -// assignment-expr: -// conditional-expr -// unary-expr assignment-operator assignment-expr -static AstNode* parse_assignment_expr(Parser* p) { - AstNode* lhs = parse_conditional_expr(p); - while (1) { - TokenKind op = peek_token(p)->kind; - // TODO: check if the lhs is unary expression. - if (op == TokenKind_assign || op == TokenKind_assign_mul || op == TokenKind_assign_div || - op == TokenKind_assign_mod || op == TokenKind_assign_or || op == TokenKind_assign_and || - op == TokenKind_assign_xor || op == TokenKind_assign_lshift || op == TokenKind_assign_rshift) { - next_token(p); - AstNode* rhs = parse_assignment_expr(p); - lhs = ast_new_assign_expr(op, lhs, rhs); - } else if (consume_token_if(p, TokenKind_assign_add)) { - AstNode* rhs = parse_assignment_expr(p); - lhs = ast_new_assign_add_expr(lhs, rhs); - } else if (consume_token_if(p, TokenKind_assign_sub)) { - AstNode* rhs = parse_assignment_expr(p); - lhs = ast_new_assign_sub_expr(lhs, rhs); - } else { - break; - } - } - return lhs; -} - -// expr: -// assignment-expr { ',' assignment-expr }* -static AstNode* parse_expr(Parser* p) { - AstNode* lhs = parse_assignment_expr(p); - while (1) { - if (consume_token_if(p, TokenKind_comma)) { - AstNode* rhs = parse_assignment_expr(p); - AstNode* list = ast_new_list(2); - ast_append(list, lhs); - ast_append(list, rhs); - list->ty = rhs->ty; - lhs = list; - } else { - break; - } - } - return lhs; -} - -// constant-expr: -// conditional-expr -static AstNode* parse_constant_expr(Parser* p) { - return parse_conditional_expr(p); -} - -// pointer: -// { '*' TODO attribute-specifier-sequence? type-qualifier-list? }+ -static Type* parse_pointer_opt(Parser* p, Type* ty) { - while (peek_token(p)->kind == TokenKind_star) { - next_token(p); - ty = type_new_ptr(ty); - parse_type_qualifier_list_opt(p); - } - return ty; -} - -// type-qualifier-list: -// { type-qualifier }+ -static void parse_type_qualifier_list_opt(Parser* p) { - while (1) { - Token* tok = peek_token(p); - if (tok->kind == TokenKind_keyword_const || tok->kind == TokenKind_keyword_restrict || - tok->kind == TokenKind_keyword_volatile || tok->kind == TokenKind_keyword__Atomic) { - // TODO: currently type qualifiers are just ignored. - next_token(p); - } else { - break; - } - } -} - -// declarator | abstract-declarator?: -// pointer? identifier TODO attribute-specifier-sequence? { direct-declarator-suffix }* -// pointer? '(' declarator ')' { direct-declarator-suffix }* -// pointer? '(' abstract-declarator ')' { direct-declarator-suffix }* -// pointer? { direct-declarator-suffix }* -// -// direct-declarator-suffix: -// array-declarator-suffix TODO attribute-specifier-sequence? -// function-declarator-suffix TODO attribute-specifier-sequence? -static AstNode* parse_declarator_or_abstract_declarator_opt(Parser* p, Type* ty) { - ty = parse_pointer_opt(p, ty); - - AstNode* decl; - if (peek_token(p)->kind == TokenKind_ident) { - decl = ast_new_declarator(parse_ident(p)->value.string, ty); - } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) { - next_token(p); - if (peek_token(p)->kind == TokenKind_paren_r) { - Token* tok = peek_token(p); - fatal_error("%s:%d: expected declarator, but got '%s'", tok->loc.filename, tok->loc.line, - token_stringify(tok)); - } - decl = parse_declarator_or_abstract_declarator_opt(p, ty); - expect(p, TokenKind_paren_r); - } else { - decl = ast_new_declarator(NULL, ty); - } - - while (1) { - if (peek_token(p)->kind == TokenKind_bracket_l) { - decl->ty = parse_array_declarator_suffix(p, decl->ty); - } else if (peek_token(p)->kind == TokenKind_paren_l) { - decl->ty = parse_function_declarator_suffix(p, decl->ty); - } else { - break; - } - } - - return decl; -} - -// parameter-type-list: -// parameter-list -// parameter-list ',' '...' -// '...' -static AstNode* parse_parameter_type_list(Parser* p) { - AstNode* params = parse_parameter_list(p); - - bool has_void = false; - for (int i = 0; i < params->as.list->len; ++i) { - if (params->as.list->items[i].as.declarator->name && - strcmp(params->as.list->items[i].as.declarator->name, "...") == 0) { - if (i != params->as.list->len - 1) { - fatal_error("..."); - } - --params->as.list->len; - break; - } - has_void |= params->as.list->items[i].ty->kind == TypeKind_void; - } - - if (has_void) { - if (params->as.list->len != 1) { - fatal_error("invalid use of void param"); - } - params->as.list->len = 0; - } - - return params; -} - -// parameter-list: -// { parameter-declaration | ',' } -static AstNode* parse_parameter_list(Parser* p) { - AstNode* params = ast_new_list(4); - - while (1) { - AstNode* param = parse_parameter_declaration(p); - ast_append(params, param); - - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - } - - return params; -} - -// parameter-declaration: -// TODO attribute-specifier-sequence? declaration-specifiers declarator -// TODO attribute-specifier-sequence? declaration-specifiers abstract-declarator? -static AstNode* parse_parameter_declaration(Parser* p) { - if (consume_token_if(p, TokenKind_ellipsis)) { - return ast_new_declarator("...", NULL); - } - - Type* base_ty = parse_declaration_specifiers(p); - return parse_declarator_or_abstract_declarator_opt(p, base_ty); -} - -// array-declarator: -// direct-declarator '[' TODO type-qualifier-list? TODO assignment-expr? ']' -// TODO direct-declarator '[' 'static' type-qualifier-list? assignment-expr? ']' -// TODO direct-declarator '[' type-qualifier-list 'static' assignment-expr? ']' -// TODO direct-declarator '[' type-qualifier-list? '*' ']' -static Type* parse_array_declarator_suffix(Parser* p, Type* ty) { - next_token(p); // skip '[' - - int size = -1; - if (peek_token(p)->kind != TokenKind_bracket_r) { - AstNode* size_expr = parse_assignment_expr(p); - size = eval(size_expr); - } - expect(p, TokenKind_bracket_r); - - return type_new_array(ty, size); -} - -// function-declarator: -// direct-declarator '(' parameter-type-list? ')' -static Type* parse_function_declarator_suffix(Parser* p, Type* ty) { - next_token(p); // skip '(' - AstNode* params; - - if (consume_token_if(p, TokenKind_paren_r)) { - params = ast_new_list(1); - } else { - params = parse_parameter_type_list(p); - expect(p, TokenKind_paren_r); - } - - return type_new_func(ty, params); -} - -// direct-declarator: -// identifier TODO attribute-specifier-sequence? -// '(' declarator ')' -// array-declarator TODO attribute-specifier-sequence? -// function-declarator TODO attribute-specifier-sequence? -static AstNode* parse_direct_declarator(Parser* p, Type* ty) { - AstNode* decl; - if (peek_token(p)->kind == TokenKind_ident) { - decl = ast_new_declarator(parse_ident(p)->value.string, ty); - } else if (peek_token(p)->kind == TokenKind_paren_l && !is_type_token(p, peek_token2(p))) { - next_token(p); - // 1st pass: skip content inside parentheses. - int saved_pos = p->pos; - Type* dummy = type_new(TypeKind_void); - parse_declarator(p, dummy); - expect(p, TokenKind_paren_r); - // Parse suffixes. - while (1) { - if (peek_token(p)->kind == TokenKind_bracket_l) { - ty = parse_array_declarator_suffix(p, ty); - } else if (peek_token(p)->kind == TokenKind_paren_l) { - ty = parse_function_declarator_suffix(p, ty); - } else { - break; - } - } - // 2nd pass: re-parse inner content. - int end_pos = p->pos; - p->pos = saved_pos; - decl = parse_declarator(p, ty); - expect(p, TokenKind_paren_r); - p->pos = end_pos; - return ast_new_declarator(decl->as.declarator->name, decl->ty); - } else { - decl = ast_new_declarator(NULL, ty); - } - - while (1) { - if (peek_token(p)->kind == TokenKind_bracket_l) { - decl->ty = parse_array_declarator_suffix(p, decl->ty); - } else if (peek_token(p)->kind == TokenKind_paren_l) { - decl->ty = parse_function_declarator_suffix(p, decl->ty); - } else { - break; - } - } - - return ast_new_declarator(decl->as.declarator->name, decl->ty); -} - -// declarator: -// pointer? direct-declarator -static AstNode* parse_declarator(Parser* p, Type* ty) { - ty = parse_pointer_opt(p, ty); - return parse_direct_declarator(p, ty); -} - -// init-declarator-list: -// init-declarator -// init-declarator-list ',' init-declarator -static AstNode* parse_init_declarator_list(Parser* p, Type* ty) { - AstNode* list = ast_new_list(1); - while (1) { - AstNode* d = parse_init_declarator(p, ty); - ast_append(list, d); - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - - if (ty->storage_class == StorageClass_typedef) { - continue; - } - // Immediately declare to allow following initializer to access previous variables. For example, - // int a = 1, b = a; - process_declarations(p, &list->as.list->items[list->as.list->len - 1]); - } - return list; -} - -// declaration: -// static_assert-declaration -// TODO attribute-specifier-sequence ';' -// TODO attribute-specifier-sequence declaration-specifiers init-declarator-list ';' -// declaration-specifiers init-declarator-list ';' -static AstNode* parse_declaration(Parser* p) { - if (peek_token(p)->kind == TokenKind_keyword_static_assert) { - return parse_static_assert_declaration(p); - } - - Type* ty = parse_declaration_specifiers(p); - AstNode* decls = parse_init_declarator_list(p, ty); - expect(p, TokenKind_semicolon); - process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]); - return decls; -} - -static void create_local_initializer(AstNode* list, AstNode* lhs, AstNode* init, Type* ty) { - if (init->kind == AstNodeKind_array_initializer) { - AstNode* items = init->as.array_initializer->list; - if (ty->kind == TypeKind_array) { - for (int i = 0; i < items->as.list->len; ++i) { - AstNode* idx = ast_new_binary_expr(TokenKind_star, ast_new_int(i), ast_new_int(type_sizeof(ty->base))); - AstNode* elem = ast_new_deref_expr(ast_new_binary_expr(TokenKind_plus, lhs, idx)); - create_local_initializer(list, elem, &items->as.list->items[i], ty->base); - } - } else if (ty->kind == TypeKind_struct) { - AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; - AstNode* members = def->as.struct_def->members; - for (int i = 0; i < items->as.list->len; ++i) { - const char* member_name = members->as.list->items[i].as.struct_member->name; - AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name); - create_local_initializer(list, member_lhs, &items->as.list->items[i], members->as.list->items[i].ty); - } - } else if (ty->kind == TypeKind_union) { - AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; - AstNode* members = def->as.union_def->members; - const char* member_name = members->as.list->items[0].as.struct_member->name; - AstNode* member_lhs = ast_new_member_access_expr(ast_new_ref_expr(lhs), member_name); - create_local_initializer(list, member_lhs, &items->as.list->items[0], members->as.list->items[0].ty); - } - } else { - AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, init); - ast_append(list, ast_new_expr_stmt(assign)); - } -} - -static void process_declarations(Parser* p, AstNode* decl) { - const char* name = decl->as.declarator->name; - if (decl->ty->kind == TypeKind_func) { - // TODO: refactor - decl->ty->storage_class = decl->ty->result->storage_class; - decl->ty->result->storage_class = StorageClass_unspecified; - register_func(p, name, decl->ty); - decl->kind = AstNodeKind_func_decl; - } else { - if (type_is_unsized(decl->ty)) { - fatal_error("process_declarations: invalid type for variable"); - } - - if (p->scope) { - if (find_lvar_in_current_scope(p, name) != -1) { - // TODO: use name's location. - fatal_error("%s:%d: '%s' redeclared", peek_token(p)->loc.filename, peek_token(p)->loc.line, name); - } - int stack_offset = add_lvar(p, name, decl->ty, calc_lvar_stack_offset(p, decl->ty)); - - if (decl->as.declarator->init) { - if (decl->as.declarator->init->kind == AstNodeKind_array_initializer) { - AstNode* lhs_base = ast_new_lvar(name, stack_offset, decl->ty); - AstNode* init = decl->as.declarator->init; - AstNode* stmt_list = ast_new_list(4); - create_local_initializer(stmt_list, lhs_base, init, decl->ty); - *decl = *stmt_list; - } else { - AstNode* lhs = ast_new_lvar(name, stack_offset, decl->ty); - AstNode* assign = ast_new_assign_expr(TokenKind_assign, lhs, decl->as.declarator->init); - decl->kind = AstNodeKind_expr_stmt; - decl->as.expr_stmt = calloc(1, sizeof(ExprStmtNode)); - decl->as.expr_stmt->expr = assign; - } - } else { - decl->kind = AstNodeKind_nop; - } - } else { - if (find_gvar(p, name) != -1) { - // TODO - // fatal_error("process_declarations: %s redeclared", name); - } - // TODO: refactor - Type* base_ty = decl->ty; - while (base_ty->base) { - base_ty = base_ty->base; - } - if (base_ty != decl->ty) { - decl->ty->storage_class = base_ty->storage_class; - base_ty->storage_class = StorageClass_unspecified; - } - - // TODO: refactor - if (decl->ty->kind == TypeKind_array && decl->ty->array_size == -1 && decl->as.declarator && - decl->as.declarator->init && decl->as.declarator->init->kind == AstNodeKind_array_initializer) { - decl->ty->array_size = decl->as.declarator->init->as.array_initializer->list->as.list->len; - } - - GlobalVar* gvar = gvars_push_new(&p->gvars); - gvar->name = name; - gvar->ty = decl->ty; - - if (decl->ty->storage_class == StorageClass_extern) { - decl->kind = AstNodeKind_nop; - } else { - AstNode* init_expr = decl->as.declarator ? decl->as.declarator->init : NULL; - decl->kind = AstNodeKind_gvar_decl; - decl->as.gvar_decl = calloc(1, sizeof(GvarDeclNode)); - decl->as.gvar_decl->name = name; - decl->as.gvar_decl->expr = init_expr; - } - } - } -} - -// function-definition: -// declaration-specifiers init-declarator-list compound-stmt -static AstNode* parse_function_definition(Parser* p, AstNode* decls) { - if (decls->as.list->len != 1) { - fatal_error("parse_function_definition: invalid syntax"); - } - - Type* ty = decls->as.list->items[0].ty; - Type* base_ty = ty->result; - while (base_ty->base) { - base_ty = base_ty->base; - } - ty->storage_class = base_ty->storage_class; - base_ty->storage_class = StorageClass_unspecified; - const char* name = decls->as.list->items[0].as.declarator->name; - AstNode* params = ty->params; - - register_func(p, name, ty); - enter_func(p); - register_params(p, params); - AstNode* body = parse_compound_stmt(p); - leave_func(p); - - int stack_size = 0; - if (p->lvars.len != 0) { - stack_size = p->lvars.data[p->lvars.len - 1].stack_offset + type_sizeof(p->lvars.data[p->lvars.len - 1].ty); - if (stack_size < 0) { - stack_size = 0; - } - } - return ast_new_func_def(name, ty, params, body, stack_size); -} - -static void process_typedefs(Parser* p, AstNode* decls) { - for (int i = 0; i < decls->as.list->len; ++i) { - AstNode* decl = &decls->as.list->items[i]; - - AstNode* typedef_ = ast_new_typedef_decl(decl->as.declarator->name, decl->ty); - ast_append(p->typedefs, typedef_); - } -} - -static char* generate_anonymous_name(Parser* p) { - char* buf = calloc(32, sizeof(char)); - sprintf(buf, "__anonymous_%d__", p->anonymous_user_type_counter++); - return buf; -} - -// declaration-specifiers: -// { declaration-specifier }+ TODO attribute-specifier-sequence? -// -// declaration-specifier: -// storage-class-specifier -// type-specifier-qualifier -// function-speicifier -// -// storage-class-specifier: -// 'auto' -// 'constexpr' -// 'extern' -// 'register' -// 'static' -// 'thread_local' -// 'typedef' -// -// type-specifier-qualifier: -// type-specifier -// type-qualifier -// alignment-specifier -// -// function-specifier: -// 'inline' -// '_Noreturn' -// -// type-specifier: -// 'void' -// 'char' -// 'short' -// 'int' -// 'long' -// 'float' -// 'double' -// 'signed' -// 'unsigned' -// TODO '_BitInt' '(' constant-expr ')' -// 'bool' -// '_Complex' -// '_Decimal32' -// '_Decimal64' -// '_Decimal128' -// atomic-type-specifier -// struct-or-union-specifier -// enum-specifier -// typeof-specifier -// typedef-name -// -// type-qualifier: -// 'const' -// 'restrict' -// 'volatile' -// '_Atomic' -// -// alignment-specifier: -// TODO 'alignas' '(' type-name ')' -// TODO 'alignas' '(' constant-expr ')' -// -// atomic-type-specifier: -// TODO '_Atomic' '(' type-name ')' -// -// struct-or-union-specifier: -// 'struct' ... -// 'union' ... -// -// enum-specifier: -// 'enum' ... -// -// typeof-specifier: -// TODO 'typeof' '(' typeof-specifier-argument ')' -// TODO 'typeof_unqual' '(' typeof-specifier-argument ')' -// -// typedef-name: -// identifier -static Type* parse_declaration_specifiers(Parser* p) { - StorageClass storage_class = StorageClass_unspecified; - int type_specifiers = 0; - Type* ty = NULL; - - Token* tok; - while (1) { - tok = peek_token(p); - - // storage-class-spciefier - if (tok->kind == TokenKind_keyword_auto) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_constexpr) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_extern) { - next_token(p); - storage_class = StorageClass_extern; - } else if (tok->kind == TokenKind_keyword_register) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_static) { - next_token(p); - storage_class = StorageClass_static; - } else if (tok->kind == TokenKind_keyword_thread_local) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_typedef) { - next_token(p); - storage_class = StorageClass_typedef; - } - // type-specifier-qualifier > type-specifier - else if (tok->kind == TokenKind_keyword_void) { - if (type_specifiers & TypeSpecifierMask_void) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_void; - } else if (tok->kind == TokenKind_keyword_char) { - if (type_specifiers & TypeSpecifierMask_char) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_char; - } else if (tok->kind == TokenKind_keyword_short) { - if (type_specifiers & TypeSpecifierMask_short) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_short; - } else if (tok->kind == TokenKind_keyword_int) { - if (type_specifiers & TypeSpecifierMask_int) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_int; - } else if (tok->kind == TokenKind_keyword_long) { - if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) { - fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line); - } - next_token(p); - type_specifiers += TypeSpecifierMask_long; - } else if (tok->kind == TokenKind_keyword_float) { - if (type_specifiers & TypeSpecifierMask_float) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_float; - } else if (tok->kind == TokenKind_keyword_double) { - if (type_specifiers & TypeSpecifierMask_double) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_double; - } else if (tok->kind == TokenKind_keyword_signed) { - if (type_specifiers & TypeSpecifierMask_signed) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_signed; - } else if (tok->kind == TokenKind_keyword_unsigned) { - if (type_specifiers & TypeSpecifierMask_unsigned) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_unsigned; - } else if (tok->kind == TokenKind_keyword__BitInt) { - if (type_specifiers & TypeSpecifierMask__BitInt) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__BitInt; - } else if (tok->kind == TokenKind_keyword_bool) { - if (type_specifiers & TypeSpecifierMask_bool) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_bool; - } else if (tok->kind == TokenKind_keyword__Complex) { - if (type_specifiers & TypeSpecifierMask__Complex) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Complex; - } else if (tok->kind == TokenKind_keyword__Decimal32) { - if (type_specifiers & TypeSpecifierMask__Decimal32) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Decimal32; - } else if (tok->kind == TokenKind_keyword__Decimal64) { - if (type_specifiers & TypeSpecifierMask__Decimal64) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Decimal64; - } else if (tok->kind == TokenKind_keyword__Decimal128) { - if (type_specifiers & TypeSpecifierMask__Decimal128) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Decimal128; - } else if (tok->kind == TokenKind_keyword__Atomic) { - if (type_specifiers & TypeSpecifierMask__Atomic) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Atomic; - } else if (tok->kind == TokenKind_keyword_struct) { - if (type_specifiers & TypeSpecifierMask_struct) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - ty = parse_struct_specifier(p); - type_specifiers += TypeSpecifierMask_struct; - } else if (tok->kind == TokenKind_keyword_union) { - if (type_specifiers & TypeSpecifierMask_union) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - ty = parse_union_specifier(p); - type_specifiers += TypeSpecifierMask_union; - } else if (tok->kind == TokenKind_keyword_enum) { - if (type_specifiers & TypeSpecifierMask_enum) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - ty = parse_enum_specifier(p); - type_specifiers += TypeSpecifierMask_enum; - } else if (tok->kind == TokenKind_keyword_typeof) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_typeof_unqual) { - unimplemented(); - } else if (is_typedef_name(p, tok)) { - if (type_specifiers & TypeSpecifierMask_typedef_name) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - int typedef_idx = find_typedef(p, tok->value.string); - ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty); - type_specifiers += TypeSpecifierMask_typedef_name; - } - // type-specifier-qualifier > type-qualifier - else if (tok->kind == TokenKind_keyword_const) { - // TODO - next_token(p); - } else if (tok->kind == TokenKind_keyword_restrict) { - // TODO - next_token(p); - } else if (tok->kind == TokenKind_keyword_volatile) { - // TODO - next_token(p); - } else if (tok->kind == TokenKind_keyword__Atomic) { - unimplemented(); - } - // type-specifier-qualifier > alignment-specifier - else if (tok->kind == TokenKind_keyword_alignas) { - unimplemented(); - } - // function-specifier - else if (tok->kind == TokenKind_keyword_inline) { - // TODO - next_token(p); - } else if (tok->kind == TokenKind_keyword__Noreturn) { - // TODO - next_token(p); - } else { - break; - } - } - - Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers); - if (ty_) { - ty = ty_; - } - if (!ty) { - fatal_error("%s:%d: no type specifiers", tok->loc.filename, tok->loc.line); - } - - ty->storage_class = storage_class; - return ty; -} - -// init-declarator: -// declarator -// declarator '=' initializer -static AstNode* parse_init_declarator(Parser* p, Type* ty) { - AstNode* decl = parse_declarator(p, ty); - if (consume_token_if(p, TokenKind_assign)) { - decl->as.declarator->init = parse_initializer(p); - } - return decl; -} - -// struct-specifier: -// 'struct' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}' -// 'struct' TODO attribute-specifier-sequence? identifier -static Type* parse_struct_specifier(Parser* p) { - SourceLocation struct_kw_loc = current_location(p); - next_token(p); - - const Token* name; - char* anonymous_name = NULL; - if (peek_token(p)->kind == TokenKind_brace_l) { - anonymous_name = generate_anonymous_name(p); - Token* anonymous_token = calloc(1, sizeof(Token)); - anonymous_token->kind = TokenKind_ident; - anonymous_token->value.string = anonymous_name; - anonymous_token->loc = struct_kw_loc; - name = anonymous_token; - } else { - name = parse_ident(p); - } - - if (!consume_token_if(p, TokenKind_brace_l)) { - int struct_idx = find_struct(p, name->value.string); - if (struct_idx != -1) { - Type* ty = type_new(TypeKind_struct); - ty->ref.defs = p->structs; - ty->ref.index = struct_idx; - return ty; - } else { - // TODO - AstNode* new_struct = ast_new_struct_def(name->value.string); - ast_append(p->structs, new_struct); - struct_idx = p->structs->as.list->len - 1; - - Type* ty = type_new(TypeKind_struct); - ty->ref.defs = p->structs; - ty->ref.index = struct_idx; - return ty; - } - } - - int struct_idx = find_struct(p, name->value.string); - if (struct_idx != -1 && p->structs->as.list->items[struct_idx].as.struct_def->members) { - fatal_error("%s:%d: struct %s redefined", name->loc.filename, name->loc.line, name->value.string); - } - - if (struct_idx == -1) { - AstNode* new_struct = ast_new_struct_def(name->value.string); - ast_append(p->structs, new_struct); - struct_idx = p->structs->as.list->len - 1; - } - - AstNode* members = parse_member_declaration_list(p); - expect(p, TokenKind_brace_r); - p->structs->as.list->items[struct_idx].as.struct_def->members = members; - - Type* ty = type_new(TypeKind_struct); - ty->ref.defs = p->structs; - ty->ref.index = struct_idx; - return ty; -} - -// union-specifier: -// 'union' TODO attribute-specifier-sequence? identifier? '{' member-declaration-list '}' -// 'union' TODO attribute-specifier-sequence? identifier -static Type* parse_union_specifier(Parser* p) { - SourceLocation union_kw_loc = current_location(p); - next_token(p); - - const Token* name; - char* anonymous_name = NULL; - if (peek_token(p)->kind == TokenKind_brace_l) { - anonymous_name = generate_anonymous_name(p); - Token* anonymous_token = calloc(1, sizeof(Token)); - anonymous_token->kind = TokenKind_ident; - anonymous_token->value.string = anonymous_name; - anonymous_token->loc = union_kw_loc; - name = anonymous_token; - } else { - name = parse_ident(p); - } - - if (!consume_token_if(p, TokenKind_brace_l)) { - int union_idx = find_union(p, name->value.string); - if (union_idx != -1) { - Type* ty = type_new(TypeKind_union); - ty->ref.defs = p->unions; - ty->ref.index = union_idx; - return ty; - } else { - // TODO - AstNode* new_union = ast_new_union_def(name->value.string); - ast_append(p->unions, new_union); - union_idx = p->unions->as.list->len - 1; - - Type* ty = type_new(TypeKind_union); - ty->ref.defs = p->unions; - ty->ref.index = union_idx; - return ty; - } - } - - int union_idx = find_union(p, name->value.string); - if (union_idx != -1 && p->unions->as.list->items[union_idx].as.union_def->members) { - fatal_error("%s:%d: union %s redefined", name->loc.filename, name->loc.line, name->value.string); - } - - if (union_idx == -1) { - AstNode* new_union = ast_new_union_def(name->value.string); - ast_append(p->unions, new_union); - union_idx = p->unions->as.list->len - 1; - } - - AstNode* members = parse_member_declaration_list(p); - expect(p, TokenKind_brace_r); - p->unions->as.list->items[union_idx].as.union_def->members = members; - - Type* ty = type_new(TypeKind_union); - ty->ref.defs = p->unions; - ty->ref.index = union_idx; - return ty; -} - -// member-declaration-list: -// { member-declaration }+ -static AstNode* parse_member_declaration_list(Parser* p) { - AstNode* members = ast_new_list(4); - while (peek_token(p)->kind != TokenKind_brace_r) { - AstNode* decls = parse_member_declaration(p); - for (int i = 0; i < decls->as.list->len; i++) { - ast_append(members, &decls->as.list->items[i]); - } - } - return members; -} - -// member-declaration: -// TODO attribute-specifier-sequence? specifier-qualifier-list member-declaration-list? ';' -// TODO static_assert-declaration -static AstNode* parse_member_declaration(Parser* p) { - Type* base_ty = parse_specifier_qualifier_list(p); - - if (consume_token_if(p, TokenKind_semicolon)) { - char* name = generate_anonymous_name(p); - AstNode* decls = ast_new_list(1); - AstNode* member = ast_new(AstNodeKind_struct_member); - member->ty = base_ty; - member->as.struct_member = calloc(1, sizeof(StructMemberNode)); - member->as.struct_member->name = name; - ast_append(decls, member); - return decls; - } - AstNode* decls = parse_member_declarator_list(p, base_ty); - expect(p, TokenKind_semicolon); - return decls; -} - -// specifier-qualifier-list: -// { type-specifier-qualifier }+ TODO attribute-specifier-sequence? -static Type* parse_specifier_qualifier_list(Parser* p) { - int type_specifiers = 0; - Type* ty = NULL; - - while (1) { - Token* tok = peek_token(p); - - // type-specifier-qualifier > type-specifier - if (tok->kind == TokenKind_keyword_void) { - if (type_specifiers & TypeSpecifierMask_void) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_void; - } else if (tok->kind == TokenKind_keyword_char) { - if (type_specifiers & TypeSpecifierMask_char) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_char; - } else if (tok->kind == TokenKind_keyword_short) { - if (type_specifiers & TypeSpecifierMask_short) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_short; - } else if (tok->kind == TokenKind_keyword_int) { - if (type_specifiers & TypeSpecifierMask_int) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_int; - } else if (tok->kind == TokenKind_keyword_long) { - if (type_specifiers & (TypeSpecifierMask_long + TypeSpecifierMask_long)) { - fatal_error("%s:%d: too looong!", tok->loc.filename, tok->loc.line); - } - next_token(p); - type_specifiers += TypeSpecifierMask_long; - } else if (tok->kind == TokenKind_keyword_float) { - if (type_specifiers & TypeSpecifierMask_float) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_float; - } else if (tok->kind == TokenKind_keyword_double) { - if (type_specifiers & TypeSpecifierMask_double) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_double; - } else if (tok->kind == TokenKind_keyword_signed) { - if (type_specifiers & TypeSpecifierMask_signed) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_signed; - } else if (tok->kind == TokenKind_keyword_unsigned) { - if (type_specifiers & TypeSpecifierMask_unsigned) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_unsigned; - } else if (tok->kind == TokenKind_keyword__BitInt) { - if (type_specifiers & TypeSpecifierMask__BitInt) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__BitInt; - } else if (tok->kind == TokenKind_keyword_bool) { - if (type_specifiers & TypeSpecifierMask_bool) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask_bool; - } else if (tok->kind == TokenKind_keyword__Complex) { - if (type_specifiers & TypeSpecifierMask__Complex) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Complex; - } else if (tok->kind == TokenKind_keyword__Decimal32) { - if (type_specifiers & TypeSpecifierMask__Decimal32) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Decimal32; - } else if (tok->kind == TokenKind_keyword__Decimal64) { - if (type_specifiers & TypeSpecifierMask__Decimal64) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Decimal64; - } else if (tok->kind == TokenKind_keyword__Decimal128) { - if (type_specifiers & TypeSpecifierMask__Decimal128) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Decimal128; - } else if (tok->kind == TokenKind_keyword__Atomic) { - if (type_specifiers & TypeSpecifierMask__Atomic) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - type_specifiers += TypeSpecifierMask__Atomic; - } else if (tok->kind == TokenKind_keyword_struct) { - if (type_specifiers & TypeSpecifierMask_struct) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - ty = parse_struct_specifier(p); - type_specifiers += TypeSpecifierMask_struct; - } else if (tok->kind == TokenKind_keyword_union) { - if (type_specifiers & TypeSpecifierMask_union) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - ty = parse_union_specifier(p); - type_specifiers += TypeSpecifierMask_union; - } else if (tok->kind == TokenKind_keyword_enum) { - if (type_specifiers & TypeSpecifierMask_enum) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - ty = parse_enum_specifier(p); - type_specifiers += TypeSpecifierMask_enum; - } else if (tok->kind == TokenKind_keyword_typeof) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_typeof_unqual) { - unimplemented(); - } else if (is_typedef_name(p, tok)) { - if (type_specifiers & TypeSpecifierMask_typedef_name) { - fatal_error("%s:%d: duplicate '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - next_token(p); - int typedef_idx = find_typedef(p, tok->value.string); - ty = type_dup(p->typedefs->as.list->items[typedef_idx].ty); - type_specifiers += TypeSpecifierMask_typedef_name; - } - // type-specifier-qualifier > type-qualifier - else if (tok->kind == TokenKind_keyword_const) { - // TODO - next_token(p); - } else if (tok->kind == TokenKind_keyword_restrict) { - unimplemented(); - } else if (tok->kind == TokenKind_keyword_volatile) { - // TODO - next_token(p); - } else if (tok->kind == TokenKind_keyword__Atomic) { - unimplemented(); - } - // type-specifier-qualifier > alignment-specifier - else if (tok->kind == TokenKind_keyword_alignas) { - unimplemented(); - } else { - break; - } - } - - Type* ty_ = distinguish_type_from_type_specifiers(type_specifiers); - if (ty_) { - ty = ty_; - } - - ty->storage_class = StorageClass_unspecified; - return ty; -} - -// member-declarator-list: -// { member-declarator | ',' }+ -static AstNode* parse_member_declarator_list(Parser* p, Type* base_ty) { - AstNode* list = ast_new_list(1); - while (1) { - AstNode* d = parse_member_declarator(p, base_ty); - ast_append(list, d); - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - } - return list; -} - -// member-declarator: -// declarator -// declarator ':' constant-expr -// TODO ':' constant-expr -static AstNode* parse_member_declarator(Parser* p, Type* base_ty) { - AstNode* decl = parse_declarator(p, base_ty); - - const char* name = decl->as.declarator->name; - decl->kind = AstNodeKind_struct_member; - decl->as.struct_member = calloc(1, sizeof(StructMemberNode)); - decl->as.struct_member->name = name; - - if (consume_token_if(p, TokenKind_colon)) { - AstNode* bit_width_node = parse_constant_expr(p); - InitData* bit_width_evaluated = eval_init_expr(bit_width_node, type_new(TypeKind_int)); - if (bit_width_evaluated->len != 1) - fatal_error("parse_member_declarator: invalid bit-field"); - if (bit_width_evaluated->blocks[0].kind != InitDataBlockKind_bytes) - fatal_error("parse_member_declarator: invalid bit-field"); - if (bit_width_evaluated->blocks[0].as.bytes.len != sizeof(int)) - fatal_error("parse_member_declarator: invalid bit-field"); - int bit_width; - memcpy(&bit_width, bit_width_evaluated->blocks[0].as.bytes.buf, sizeof(int)); - if (bit_width < 0) - fatal_error("parse_member_declarator: invalid bit-field"); - if (bit_width % 8 != 0) - fatal_error("parse_member_declarator: unimplemented"); - decl->as.struct_member->is_bitfield = true; - decl->as.struct_member->bitfield_offset = -1; // TODO - decl->as.struct_member->bitfield_width = bit_width; - } - - return decl; -} - -// enum-specifier: -// 'enum' TODO attribute-specifier-sequence? identifier? TODO enum-type-specifier? '{' enumerator-list ','? '}' -// 'enum' identifier TODO enum-type-specifier? -static Type* parse_enum_specifier(Parser* p) { - SourceLocation enum_kw_loc = current_location(p); - next_token(p); - - const Token* name; - char* anonymous_name = NULL; - if (peek_token(p)->kind == TokenKind_brace_l) { - anonymous_name = generate_anonymous_name(p); - Token* anonymous_token = calloc(1, sizeof(Token)); - anonymous_token->kind = TokenKind_ident; - anonymous_token->value.string = anonymous_name; - anonymous_token->loc = enum_kw_loc; - name = anonymous_token; - } else { - name = parse_ident(p); - } - - if (!consume_token_if(p, TokenKind_brace_l)) { - int enum_idx = find_enum(p, name->value.string); - if (enum_idx != -1) { - Type* ty = type_new(TypeKind_enum); - ty->ref.defs = p->enums; - ty->ref.index = enum_idx; - return ty; - } else { - // TODO - AstNode* new_enum = ast_new_enum_def(name->value.string); - ast_append(p->enums, new_enum); - enum_idx = p->enums->as.list->len - 1; - - Type* ty = type_new(TypeKind_enum); - ty->ref.defs = p->enums; - ty->ref.index = enum_idx; - return ty; - } - } - - int enum_idx = find_enum(p, name->value.string); - if (enum_idx != -1 && p->enums->as.list->items[enum_idx].as.enum_def->members) { - fatal_error("%s:%d: enum %s redefined", name->loc.filename, name->loc.line, name->value.string); - } - - if (enum_idx == -1) { - AstNode* new_enum = ast_new_enum_def(name->value.string); - ast_append(p->enums, new_enum); - enum_idx = p->enums->as.list->len - 1; - } - - parse_enum_members(p, enum_idx); - expect(p, TokenKind_brace_r); - - Type* ty = type_new(TypeKind_enum); - ty->ref.defs = p->enums; - ty->ref.index = enum_idx; - return ty; -} - -// enumerator-list: -// { enumerator |? ',' }+ -static void parse_enum_members(Parser* p, int enum_idx) { - int next_value = 0; - AstNode* list = ast_new_list(16); - - if (!p->enums->as.list->items[enum_idx].as.enum_def->members) { - p->enums->as.list->items[enum_idx].as.enum_def->members = list; - } - - while (peek_token(p)->kind != TokenKind_brace_r) { - AstNode* member = parse_enum_member(p, next_value); - next_value = member->as.enum_member->value + 1; - - ast_append(list, member); - - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - } -} - -// enumerator: -// enumeration-constant TODO attribute-specifier-sequence? ( '=' constant-expr )? -static AstNode* parse_enum_member(Parser* p, int next_value) { - const Token* name = parse_ident(p); - int value; - if (consume_token_if(p, TokenKind_assign)) { - AstNode* expr = parse_constant_expr(p); - value = eval(expr); - } else { - value = next_value; - } - return ast_new_enum_member(name->value.string, value); -} - -// type-name: -// specifier-qualifier-list abstract-declarator? -static Type* parse_type_name(Parser* p) { - Type* ty = parse_specifier_qualifier_list(p); - ty = parse_abstract_declarator_opt(p, ty); - return ty; -} - -// abstract-declarator: -// pointer -// pointer? TODO direct-abstract-declarator -// -// abstract-declarator?: -// pointer? TODO direct-abstract-declarator? -static Type* parse_abstract_declarator_opt(Parser* p, Type* ty) { - return parse_pointer_opt(p, ty); -} - -// braced-initializer: -// '{' { designation? initializer |? ',' }* '}' -static AstNode* parse_braced_initializer(Parser* p) { - AstNode* inits = ast_new_list(4); - expect(p, TokenKind_brace_l); - while (peek_token(p)->kind != TokenKind_brace_r) { - // TODO: support designation - AstNode* init = parse_initializer(p); - ast_append(inits, init); - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - } - expect(p, TokenKind_brace_r); - return ast_new_array_initializer(inits); -} - -// initializer: -// braced-initializer -// assignment-expr -static AstNode* parse_initializer(Parser* p) { - if (peek_token(p)->kind == TokenKind_brace_l) { - return parse_braced_initializer(p); - } else { - return parse_assignment_expr(p); - } -} - -#ifndef __ducc__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -#endif -// attribute-specifier-sequence: -// { attribute-specifier }+ -static AstNode* parse_attribute_specifier_sequence_opt(Parser* p) { - AstNode* attrs = NULL; - AstNode* attr; - while ((attr = parse_attribute_specifier_opt(p))) { - unimplemented(); - } - return attrs; -} -#ifndef __ducc__ -#pragma GCC diagnostic pop -#endif - -// attribute-specifier: -// '[' '[' attribute-list ']' ']' -static AstNode* parse_attribute_specifier_opt(Parser* p) { - if (peek_token(p)->kind != TokenKind_bracket_l || peek_token2(p)->kind != TokenKind_bracket_l) { - return NULL; - } - next_token(p); // skip '[' - next_token(p); // skip '[' - AstNode* ret = parse_attribute_list(p); - expect(p, TokenKind_bracket_r); - expect(p, TokenKind_bracket_r); - return ret; -} - -// attribute-list: -// { attribute? | ',' } -static AstNode* parse_attribute_list(Parser* p) { - while (1) { - if (peek_token(p)->kind == TokenKind_ident) { - parse_attribute(p); - } - if (!consume_token_if(p, TokenKind_comma)) { - break; - } - } - unimplemented(); - return NULL; -} - -// attribute: -// attribute-token attribute-argument-clause? -// -// attribute-token: -// identifier ( '::' identifier )? -static AstNode* parse_attribute(Parser* p) { - expect(p, TokenKind_ident); - if (peek_token(p)->kind == TokenKind_colon && peek_token2(p)->kind == TokenKind_colon) { - next_token(p); - next_token(p); - expect(p, TokenKind_ident); - } - if (peek_token(p)->kind == TokenKind_paren_l) { - parse_attribute_argument_clause(p); - } - unimplemented(); - return NULL; -} - -// attribute-argument-clause: -// '(' balanced-token-sequence? ')' -static AstNode* parse_attribute_argument_clause(Parser* p) { - expect(p, TokenKind_paren_l); - if (peek_token(p)->kind != TokenKind_paren_r) { - parse_balanced_token_sequence(p); - } - expect(p, TokenKind_paren_r); - unimplemented(); - return NULL; -} - -// balanced-token-sequence: -// { balanced-token }+ -// -// balanced-token: -// '(' balanced-token-sequence? ')' -// '[' balanced-token-sequence? ']' -// '{' balanced-token-sequence? '}' -// any token other than a parenthesis, a bracket, or a brace -static AstNode* parse_balanced_token_sequence(Parser* p) { - int paren_depth = 0; - int bracket_depth = 0; - int brace_depth = 0; - while (1) { - Token* tok = peek_token(p); - if (tok->kind == TokenKind_paren_l) { - paren_depth++; - next_token(p); - } else if (tok->kind == TokenKind_paren_r) { - if (paren_depth == 0) - break; - paren_depth--; - next_token(p); - } else if (tok->kind == TokenKind_bracket_l) { - bracket_depth++; - next_token(p); - } else if (tok->kind == TokenKind_bracket_r) { - if (bracket_depth == 0) - break; - bracket_depth--; - next_token(p); - } else if (tok->kind == TokenKind_brace_l) { - brace_depth++; - next_token(p); - } else if (tok->kind == TokenKind_brace_r) { - if (brace_depth == 0) - break; - brace_depth--; - next_token(p); - } else if (tok->kind == TokenKind_eof) { - break; - } else { - next_token(p); - } - } - unimplemented(); - return NULL; -} - -// unlabaled-stmt: -// attribute-specifier-sequence? compound-stmt -// attribute-specifier-sequence? if-stmt -// attribute-specifier-sequence? switch-stmt -// attribute-specifier-sequence? while-stmt -// attribute-specifier-sequence? do-while-stmt -// attribute-specifier-sequence? for-stmt -// attribute-specifier-sequence? goto-stmt -// attribute-specifier-sequence? continue-stmt -// attribute-specifier-sequence? break-stmt -// attribute-specifier-sequence? return-stmt -// attribute-specifier-sequence? expr-stmt -static AstNode* parse_unlabaled_stmt(Parser* p) { - switch (peek_token(p)->kind) { - case TokenKind_brace_l: - return parse_compound_stmt(p); - case TokenKind_keyword_if: - return parse_if_stmt(p); - case TokenKind_keyword_switch: - return parse_switch_stmt(p); - case TokenKind_keyword_while: - return parse_while_stmt(p); - case TokenKind_keyword_do: - return parse_do_while_stmt(p); - case TokenKind_keyword_for: - return parse_for_stmt(p); - case TokenKind_keyword_goto: - return parse_goto_stmt(p); - case TokenKind_keyword_continue: - return parse_continue_stmt(p); - case TokenKind_keyword_break: - return parse_break_stmt(p); - case TokenKind_keyword_return: - return parse_return_stmt(p); - default: - return parse_expr_stmt(p); - } -} - -// label: -// TODO attribute-specifier-sequence? identifier ':' -// TODO attribute-specifier-sequence? 'case' constant-expr ':' -// TODO attribute-specifier-sequence? 'default' ':' -static AstNode* parse_label(Parser* p) { - SourceLocation loc = current_location(p); - Token* t = peek_token(p); - - AstNode* node; - if (t->kind == TokenKind_keyword_case) { - if (!p->current_switch) { - fatal_error("%s:%d: 'case' label not within a switch statement", t->loc.filename, t->loc.line); - } - expect(p, TokenKind_keyword_case); - AstNode* value = parse_constant_expr(p); - expect(p, TokenKind_colon); - node = ast_new_case_label(eval(value), ast_new_nop()); - } else if (t->kind == TokenKind_keyword_default) { - if (!p->current_switch) { - fatal_error("%s:%d: 'default' label not within a switch statement", t->loc.filename, t->loc.line); - } - expect(p, TokenKind_keyword_default); - expect(p, TokenKind_colon); - node = ast_new_default_label(ast_new_nop()); - } else { - // identifier ':' - Token* label_token = expect(p, TokenKind_ident); - expect(p, TokenKind_colon); - node = ast_new_label_stmt(label_token->value.string, ast_new_nop()); - } - node->loc = loc; - return node; -} - -static bool is_label_token(Parser* p) { - Token* t = peek_token(p); - return t->kind == TokenKind_keyword_case || t->kind == TokenKind_keyword_default || - (t->kind == TokenKind_ident && peek_token2(p)->kind == TokenKind_colon); -} - -// stmt: -// label stmt -// unlabaled-stmt -static AstNode* parse_stmt(Parser* p) { - if (is_label_token(p)) { - AstNode* label = parse_label(p); - AstNode* stmt = parse_stmt(p); - switch (label->kind) { - case AstNodeKind_case_label: - label->as.case_label->body = stmt; - break; - case AstNodeKind_default_label: - label->as.default_label->body = stmt; - break; - case AstNodeKind_label_stmt: - label->as.label_stmt->body = stmt; - break; - default: - unreachable(); - break; - } - return label; - } - return parse_unlabaled_stmt(p); -} - -// compound-stmt: -// '{' { block-item }* '}' -static AstNode* parse_compound_stmt(Parser* p) { - SourceLocation loc = current_location(p); - AstNode* list = ast_new_list(4); - expect(p, TokenKind_brace_l); - enter_scope(p); - while (peek_token(p)->kind != TokenKind_brace_r) { - AstNode* stmt = parse_block_item(p); - ast_append(list, stmt); - } - leave_scope(p); - expect(p, TokenKind_brace_r); - list->loc = loc; - return list; -} - -// block-item: -// declaration -// unlabaled-stmt -// label -static AstNode* parse_block_item(Parser* p) { - Token* t = peek_token(p); - - if (t->kind == TokenKind_keyword_static_assert || is_type_token(p, t)) { - return parse_declaration(p); - } else if (is_label_token(p)) { - return parse_label(p); - } else { - return parse_unlabaled_stmt(p); - } -} - -// expr-stmt: -// TODO attribute-specifier-sequence? expr? ';' -static AstNode* parse_expr_stmt(Parser* p) { - SourceLocation loc = current_location(p); - if (consume_token_if(p, TokenKind_semicolon)) { - AstNode* node = ast_new_nop(); - node->loc = loc; - return node; - } - - AstNode* e = parse_expr(p); - expect(p, TokenKind_semicolon); - AstNode* node = ast_new_expr_stmt(e); - node->loc = loc; - return node; -} - -// if-stmt: -// 'if' '(' expr ')' stmt ( 'else' stmt )? -static AstNode* parse_if_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_if); - expect(p, TokenKind_paren_l); - AstNode* cond = parse_expr(p); - expect(p, TokenKind_paren_r); - AstNode* then_body = parse_stmt(p); - AstNode* else_body = NULL; - if (consume_token_if(p, TokenKind_keyword_else)) { - else_body = parse_stmt(p); - } - - AstNode* node = ast_new_if_stmt(cond, then_body, else_body); - node->loc = loc; - return node; -} - -// switch-stmt: -// 'switch' '(' expr ')' stmt -static AstNode* parse_switch_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_switch); - expect(p, TokenKind_paren_l); - AstNode* expr = parse_expr(p); - expect(p, TokenKind_paren_r); - - AstNode* tmp_var = generate_temporary_lvar(p, expr->ty); - AstNode* assignment = ast_new_assign_expr(TokenKind_assign, tmp_var, expr); - AstNode* assign_stmt = ast_new_expr_stmt(assignment); - assign_stmt->loc = loc; - - AstNode* switch_stmt = ast_new_switch_stmt(tmp_var); - switch_stmt->loc = loc; - - AstNode* prev_switch = p->current_switch; - p->current_switch = switch_stmt; - switch_stmt->as.switch_stmt->body = parse_stmt(p); - p->current_switch = prev_switch; - - AstNode* list = ast_new_list(2); - list->loc = loc; - ast_append(list, assign_stmt); - ast_append(list, switch_stmt); - return list; -} - -// while-stmt: -// 'while' '(' expr ')' stmt -static AstNode* parse_while_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_while); - expect(p, TokenKind_paren_l); - AstNode* cond = parse_expr(p); - expect(p, TokenKind_paren_r); - AstNode* body = parse_stmt(p); - AstNode* node = ast_new_for_stmt(NULL, cond, NULL, body); - node->loc = loc; - return node; -} - -// do-while-stmt: -// 'do' stmt 'while' '(' expr ')' ';' -static AstNode* parse_do_while_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_do); - AstNode* body = parse_stmt(p); - expect(p, TokenKind_keyword_while); - expect(p, TokenKind_paren_l); - AstNode* cond = parse_expr(p); - expect(p, TokenKind_paren_r); - expect(p, TokenKind_semicolon); - - AstNode* node = ast_new_do_while_stmt(cond, body); - node->loc = loc; - return node; -} - -// for-stmt: -// 'for' '(' expr? ';' expr? ';' expr? ')' stmt -// 'for' '(' declaration expr? ';' expr? ')' stmt -static AstNode* parse_for_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_for); - expect(p, TokenKind_paren_l); - AstNode* init = NULL; - AstNode* cond = NULL; - AstNode* update = NULL; - enter_scope(p); - if (peek_token(p)->kind != TokenKind_semicolon) { - if (is_type_token(p, peek_token(p))) { - AstNode* decls = parse_declaration(p); - AstNode* initializers = ast_new_list(1); - for (int i = 0; i < decls->as.list->len; i++) { - AstNode* item = &decls->as.list->items[i]; - if (item->kind == AstNodeKind_expr_stmt) { - ast_append(initializers, item->as.expr_stmt->expr); - } - } - init = initializers; - } else { - init = parse_expr(p); - expect(p, TokenKind_semicolon); - } - } else { - expect(p, TokenKind_semicolon); - } - if (peek_token(p)->kind != TokenKind_semicolon) { - cond = parse_expr(p); - } else { - cond = ast_new_int(1); - } - expect(p, TokenKind_semicolon); - if (peek_token(p)->kind != TokenKind_paren_r) { - update = parse_expr(p); - } - expect(p, TokenKind_paren_r); - AstNode* body = parse_stmt(p); - leave_scope(p); - AstNode* node = ast_new_for_stmt(init, cond, update, body); - node->loc = loc; - return node; -} - -// goto-stmt: -// 'goto' identifier ';' -static AstNode* parse_goto_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_goto); - Token* label_token = expect(p, TokenKind_ident); - expect(p, TokenKind_semicolon); - AstNode* node = ast_new_goto_stmt(label_token->value.string); - node->loc = loc; - return node; -} - -// continue-stmt: -// 'continue' ';' -static AstNode* parse_continue_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_continue); - expect(p, TokenKind_semicolon); - AstNode* node = ast_new_continue_stmt(); - node->loc = loc; - return node; -} - -// break-stmt: -// 'break' ';' -static AstNode* parse_break_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_break); - expect(p, TokenKind_semicolon); - AstNode* node = ast_new_break_stmt(); - node->loc = loc; - return node; -} - -// return-stmt: -// 'return' expr? ';' -static AstNode* parse_return_stmt(Parser* p) { - SourceLocation loc = current_location(p); - expect(p, TokenKind_keyword_return); - if (consume_token_if(p, TokenKind_semicolon)) { - AstNode* node = ast_new_return_stmt(NULL); - node->loc = loc; - return node; - } - - AstNode* expr = parse_expr(p); - expect(p, TokenKind_semicolon); - AstNode* node = ast_new_return_stmt(expr); - node->loc = loc; - return node; -} - -// static_assert-declaration: -// 'static_assert' '(' constant-expr ( ',' string-literal )? ')' ';' -static AstNode* parse_static_assert_declaration(Parser* p) { - expect(p, TokenKind_keyword_static_assert); - expect(p, TokenKind_paren_l); - AstNode* predicate = parse_constant_expr(p); - Token* message = NULL; - if (consume_token_if(p, TokenKind_comma)) { - message = expect(p, TokenKind_literal_str); - } - // TODO: validate the assertion. - (void)predicate; - (void)message; - expect(p, TokenKind_paren_r); - expect(p, TokenKind_semicolon); - return NULL; -} - -// external-declaration: -// static_assert-declaration -// TODO attribute-specifier-sequence ';' -// TODO attribute-specifier-sequence function-definition-or-declaration-rest -// function-definition-or-declaration-rest -// -// function-definition-or-declaration-rest: -// declaration-specifiers init-declarator-list ';' -// declaration-specifiers init-declarator-list compound-stmt -static AstNode* parse_external_declaration(Parser* p) { - if (peek_token(p)->kind == TokenKind_keyword_static_assert) { - return parse_static_assert_declaration(p); - } - - Type* ty = parse_declaration_specifiers(p); - if (consume_token_if(p, TokenKind_semicolon)) { - // Type declaration. - return NULL; - } - - AstNode* decls = parse_init_declarator_list(p, ty); - - if (peek_token(p)->kind == TokenKind_brace_l) { - return parse_function_definition(p, decls); - } - - expect(p, TokenKind_semicolon); - if (ty->storage_class == StorageClass_typedef) { - process_typedefs(p, decls); - return NULL; - } else { - process_declarations(p, &decls->as.list->items[decls->as.list->len - 1]); - return decls; - } -} - -// translation-unit: -// { external-declaration }+ -Program* parse(TokenArray* tokens) { - Parser* p = parser_new(tokens); - AstNode* funcs = ast_new_list(32); - AstNode* vars = ast_new_list(16); - while (eof(p)) { - AstNode* n = parse_external_declaration(p); - if (!n) - continue; - if (n->kind == AstNodeKind_func_def) { - ast_append(funcs, n); - } else if (n->kind == AstNodeKind_gvar_decl) { - ast_append(vars, n); - } else if (n->kind == AstNodeKind_list) { - for (int i = 0; i < n->as.list->len; ++i) { - if (n->as.list->items[i].kind == AstNodeKind_gvar_decl) - ast_append(vars, &n->as.list->items[i]); - } - } - } - Program* prog = calloc(1, sizeof(Program)); - prog->funcs = funcs; - prog->vars = vars; - prog->str_literals = p->str_literals.data; - return prog; -} - -static int eval_binary_expr(int op, int v1, int v2) { - if (op == TokenKind_andand) { - return v1 && v2; - } else if (op == TokenKind_oror) { - return v1 || v2; - } else if (op == TokenKind_plus) { - return v1 + v2; - } else if (op == TokenKind_minus) { - return v1 - v2; - } else if (op == TokenKind_star) { - return v1 * v2; - } else if (op == TokenKind_slash) { - if (v2 == 0) { - fatal_error("eval: division by zero"); - } - return v1 / v2; - } else if (op == TokenKind_percent) { - if (v2 == 0) { - fatal_error("eval: division by zero"); - } - return v1 % v2; - } else if (op == TokenKind_eq) { - return v1 == v2; - } else if (op == TokenKind_ne) { - return v1 != v2; - } else if (op == TokenKind_lt) { - return v1 < v2; - } else if (op == TokenKind_le) { - return v1 <= v2; - } else if (op == TokenKind_lshift) { - return v1 << v2; - } else if (op == TokenKind_rshift) { - return v1 >> v2; - } else if (op == TokenKind_and) { - return v1 & v2; - } else if (op == TokenKind_or) { - return v1 | v2; - } else if (op == TokenKind_xor) { - return v1 ^ v2; - } else { - unimplemented(); - } -} - -static int eval(AstNode* e) { - if (e->kind == AstNodeKind_int_expr) { - return e->as.int_expr->value; - } else if (e->kind == AstNodeKind_unary_expr) { - int v = eval(e->as.unary_expr->operand); - if (e->as.unary_expr->op == TokenKind_not) { - return !v; - } else if (e->as.unary_expr->op == TokenKind_minus) { - return -v; - } else { - unimplemented(); - } - } else if (e->kind == AstNodeKind_binary_expr) { - int v1 = eval(e->as.binary_expr->lhs); - int v2 = eval(e->as.binary_expr->rhs); - return eval_binary_expr(e->as.binary_expr->op, v1, v2); - } else if (e->kind == AstNodeKind_logical_expr) { - int v1 = eval(e->as.logical_expr->lhs); - int v2 = eval(e->as.logical_expr->rhs); - return eval_binary_expr(e->as.logical_expr->op, v1, v2); - } else if (e->kind == AstNodeKind_cond_expr) { - int cond = eval(e->as.cond_expr->cond); - if (cond) { - return eval(e->as.cond_expr->then); - } else { - return eval(e->as.cond_expr->else_); - } - } else if (e->kind == AstNodeKind_cast_expr) { - return eval(e->as.cast_expr->operand); - } else if (e->kind == AstNodeKind_deref_expr) { - return eval(e->as.deref_expr->operand); - } else if (e->kind == AstNodeKind_ref_expr) { - return eval(e->as.ref_expr->operand); - } else { - unimplemented(); - } -} - -bool pp_eval_constant_expr(TokenArray* pp_tokens) { - TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens); - Parser* p = parser_new(tokens); - AstNode* e = parse_constant_expr(p); - return eval(e) != 0; -} - -static InitData* initdata_new() { - InitData* init_data = calloc(1, sizeof(InitData)); - init_data->len = 0; - init_data->capacity = 1; - init_data->blocks = calloc(init_data->capacity, sizeof(InitDataBlock)); - return init_data; -} - -static void initdata_reserve(InitData* init_data, size_t size) { - if (size <= init_data->capacity) - return; - while (init_data->capacity < size) { - init_data->capacity *= 2; - } - init_data->blocks = realloc(init_data->blocks, init_data->capacity * sizeof(InitDataBlock)); - memset(init_data->blocks + init_data->len, 0, (init_data->capacity - init_data->len) * sizeof(InitDataBlock)); -} - -static InitDataBlock* initdata_push_new(InitData* init_data) { - initdata_reserve(init_data, init_data->len + 1); - return &init_data->blocks[init_data->len++]; -} - -static void initdata_append_addr(InitData* buf, const char* label) { - InitDataBlock* block = initdata_push_new(buf); - block->kind = InitDataBlockKind_addr; - block->as.addr.label = label; -} - -static void initdata_append_zeros(InitData* buf, size_t len) { - if (len == 0) - return; - char* data = calloc(len, sizeof(char)); - InitDataBlock* block = initdata_push_new(buf); - block->kind = InitDataBlockKind_bytes; - block->as.bytes.len = len; - block->as.bytes.buf = data; -} - -static void initdata_append_bytes(InitData* buf, const char* data, size_t len) { - char* copy = calloc(len, sizeof(char)); - memcpy(copy, data, len); - InitDataBlock* block = initdata_push_new(buf); - block->kind = InitDataBlockKind_bytes; - block->as.bytes.len = len; - block->as.bytes.buf = copy; -} - -static void do_eval_init_expr(InitData* buf, AstNode* expr, Type* ty) { - if (expr->kind == AstNodeKind_array_initializer) { - AstNode* list = expr->as.array_initializer->list; - if (ty->kind == TypeKind_array) { - for (int i = 0; i < list->as.list->len; ++i) { - do_eval_init_expr(buf, &list->as.list->items[i], ty->base); - } - } else if (ty->kind == TypeKind_struct) { - AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; - AstNode* members = def->as.struct_def->members; - int offset = 0; - for (int i = 0; i < list->as.list->len; ++i) { - AstNode* member = &members->as.list->items[i]; - int align = type_alignof(member->ty); - int aligned_offset = to_aligned(offset, align); - initdata_append_zeros(buf, aligned_offset - offset); // padding - offset = aligned_offset; - do_eval_init_expr(buf, &list->as.list->items[i], member->ty); - offset += type_sizeof(member->ty); - } - int total = type_sizeof(ty); - initdata_append_zeros(buf, total - offset); // padding - } else if (ty->kind == TypeKind_union) { - AstNode* def = &ty->ref.defs->as.list->items[ty->ref.index]; - AstNode* members = def->as.union_def->members; - AstNode* member = &members->as.list->items[0]; - do_eval_init_expr(buf, &list->as.list->items[0], member->ty); - int member_size = type_sizeof(member->ty); - int total = type_sizeof(ty); - initdata_append_zeros(buf, total - member_size); // padding - } else { - unimplemented(); - } - } else if (ty->kind == TypeKind_ptr) { - if (expr->kind == AstNodeKind_str_expr) { - char label[32]; - sprintf(label, ".Lstr__%d", expr->as.str_expr->idx); - initdata_append_addr(buf, strdup(label)); - } else if (expr->kind == AstNodeKind_ref_expr) { - if (expr->as.ref_expr->operand->kind != AstNodeKind_gvar) { - unimplemented(); - } - initdata_append_addr(buf, expr->as.ref_expr->operand->as.gvar->name); - } else if (expr->kind == AstNodeKind_gvar) { - initdata_append_addr(buf, expr->as.gvar->name); - } else if (expr->kind == AstNodeKind_func) { - initdata_append_addr(buf, expr->as.func->name); - } else if (expr->kind == AstNodeKind_int_expr && expr->as.int_expr->value == 0) { - initdata_append_zeros(buf, type_sizeof(ty)); - } else { - unimplemented(); - } - } else { - int value = eval(expr); - int size = type_sizeof(ty); - char bytes[8]; - for (int i = 0; i < size; ++i) { - bytes[i] = (value >> (i * 8)) & 0xff; - } - initdata_append_bytes(buf, bytes, size); - } -} - -InitData* eval_init_expr(AstNode* expr, Type* ty) { - InitData* buf = initdata_new(); - do_eval_init_expr(buf, expr, ty); - return buf; -} diff --git a/src/parse.h b/src/parse.h deleted file mode 100644 index d5a443c..0000000 --- a/src/parse.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef DUCC_PARSE_H -#define DUCC_PARSE_H - -#include "ast.h" -#include "common.h" -#include "preprocess.h" - -Program* parse(TokenArray* tokens); -bool pp_eval_constant_expr(TokenArray* pp_tokens); - -typedef enum { - InitDataBlockKind_addr, - InitDataBlockKind_bytes, -} InitDataBlockKind; - -// Static address to global variable or ROM area. -typedef struct { - const char* label; -} InitDataBlockAddr; - -// Static byte array. -typedef struct { - size_t len; - const char* buf; -} InitDataBlockBytes; - -typedef struct { - InitDataBlockKind kind; - union { - InitDataBlockAddr addr; - InitDataBlockBytes bytes; - } as; -} InitDataBlock; - -typedef struct { - size_t len; - size_t capacity; - InitDataBlock* blocks; -} InitData; - -InitData* eval_init_expr(AstNode* expr, Type* ty); - -#endif diff --git a/src/preprocess.c b/src/preprocess.c deleted file mode 100644 index 1bdaaf9..0000000 --- a/src/preprocess.c +++ /dev/null @@ -1,1559 +0,0 @@ -#include "preprocess.h" -#include -#include -#include "common.h" -#include "parse.h" -#include "sys.h" -#include "tokenize.h" - -typedef enum { - MacroKind_undef, - MacroKind_obj, - MacroKind_func, - MacroKind_builtin_file, - MacroKind_builtin_line, -} MacroKind; - -const char* macro_kind_stringify(MacroKind kind) { - if (kind == MacroKind_undef) - return "undef"; - else if (kind == MacroKind_obj) - return "object-like"; - else if (kind == MacroKind_func) - return "function-like"; - else if (kind == MacroKind_builtin_file) - return "__FILE__"; - else if (kind == MacroKind_builtin_line) - return "__LINE__"; - else - unreachable(); -} - -typedef struct { - MacroKind kind; - const char* name; - TokenArray parameters; - TokenArray replacements; -} Macro; - -void macro_build_json(JsonBuilder* builder, Macro* macro) { - jsonbuilder_object_start(builder); - - jsonbuilder_object_member_start(builder, "kind"); - jsonbuilder_string(builder, macro_kind_stringify(macro->kind)); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "name"); - jsonbuilder_string(builder, macro->name); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "parameters"); - tokens_build_json(builder, ¯o->parameters); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "replacements"); - tokens_build_json(builder, ¯o->replacements); - jsonbuilder_object_member_end(builder); - - jsonbuilder_object_end(builder); -} - -static int macro_find_param(Macro* macro, Token* tok) { - if (tok->kind != TokenKind_ident) - return -1; - - for (size_t i = 0; i < macro->parameters.len; ++i) { - if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { - return i; - } - } - return -1; -} - -typedef struct { - size_t len; - size_t capacity; - Macro* data; -} MacroArray; - -static MacroArray* macros_new() { - MacroArray* macros = calloc(1, sizeof(MacroArray)); - macros->len = 0; - macros->capacity = 8; - macros->data = calloc(macros->capacity, sizeof(Macro)); - return macros; -} - -static void macros_reserve(MacroArray* macros, size_t size) { - if (size <= macros->capacity) - return; - while (macros->capacity < size) { - macros->capacity *= 2; - } - macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); - memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); -} - -static Macro* macros_push_new(MacroArray* macros) { - macros_reserve(macros, macros->len + 1); - return ¯os->data[macros->len++]; -} - -static void define_macro_to_number(MacroArray* macros, const char* name, int n) { - Macro* m = macros_push_new(macros); - m->kind = MacroKind_obj; - m->name = name; - tokens_init(&m->replacements, 1); - Token* tok = tokens_push_new(&m->replacements); - tok->kind = TokenKind_literal_int; - tok->value.integer = n; -} - -static void add_predefined_macros(MacroArray* macros) { - Macro* m; - - m = macros_push_new(macros); - m->kind = MacroKind_builtin_file; - m->name = "__FILE__"; - - m = macros_push_new(macros); - m->kind = MacroKind_builtin_line; - m->name = "__LINE__"; - - // Non-standard pre-defined macros. - define_macro_to_number(macros, "__ducc__", 1); - define_macro_to_number(macros, "__x86_64__", 1); - define_macro_to_number(macros, "__LP64__", 1); - - // GCC's predefined macros. Glibc depends on these macros. - // https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html - // TODO: uncomment out __LONG_MAX__, etc. once ducc supports 64-bit integer literals. - define_macro_to_number(macros, "__CHAR_BIT__", __CHAR_BIT__); - define_macro_to_number(macros, "__SCHAR_MAX__", __SCHAR_MAX__); - define_macro_to_number(macros, "__WCHAR_MAX__", __WCHAR_MAX__); - define_macro_to_number(macros, "__SHRT_MAX__", __SHRT_MAX__); - define_macro_to_number(macros, "__INT_MAX__", __INT_MAX__); - // define_macro_to_number(macros, "__LONG_MAX__", __LONG_MAX__); - // define_macro_to_number(macros, "__LONG_LONG_MAX__", __LONG_LONG_MAX__); - define_macro_to_number(macros, "__WINT_MAX__", __WINT_MAX__); - // define_macro_to_number(macros, "__SIZE_MAX__", __SIZE_MAX__); - // define_macro_to_number(macros, "__PTRDIFF_MAX__", __PTRDIFF_MAX__); - // define_macro_to_number(macros, "__INTMAX_MAX__", __INTMAX_MAX__); - // define_macro_to_number(macros, "__UINTMAX_MAX__", __UINTMAX_MAX__); - define_macro_to_number(macros, "__SIG_ATOMIC_MAX__", __SIG_ATOMIC_MAX__); - define_macro_to_number(macros, "__INT8_MAX__", __INT8_MAX__); - define_macro_to_number(macros, "__INT16_MAX__", __INT16_MAX__); - define_macro_to_number(macros, "__INT32_MAX__", __INT32_MAX__); - // define_macro_to_number(macros, "__INT64_MAX__", __INT64_MAX__); - define_macro_to_number(macros, "__UINT8_MAX__", __UINT8_MAX__); - define_macro_to_number(macros, "__UINT16_MAX__", __UINT16_MAX__); - define_macro_to_number(macros, "__UINT32_MAX__", __UINT32_MAX__); - // define_macro_to_number(macros, "__UINT64_MAX__", __UINT64_MAX__); - define_macro_to_number(macros, "__INT_LEAST8_MAX__", __INT_LEAST8_MAX__); - define_macro_to_number(macros, "__INT_LEAST16_MAX__", __INT_LEAST16_MAX__); - define_macro_to_number(macros, "__INT_LEAST32_MAX__", __INT_LEAST32_MAX__); - // define_macro_to_number(macros, "__INT_LEAST64_MAX__", __INT_LEAST64_MAX__); - define_macro_to_number(macros, "__UINT_LEAST8_MAX__", __UINT_LEAST8_MAX__); - define_macro_to_number(macros, "__UINT_LEAST16_MAX__", __UINT_LEAST16_MAX__); - define_macro_to_number(macros, "__UINT_LEAST32_MAX__", __UINT_LEAST32_MAX__); - // define_macro_to_number(macros, "__UINT_LEAST64_MAX__", __UINT_LEAST64_MAX__); - define_macro_to_number(macros, "__INT_FAST8_MAX__", __INT_FAST8_MAX__); - // define_macro_to_number(macros, "__INT_FAST16_MAX__", __INT_FAST16_MAX__); - // define_macro_to_number(macros, "__INT_FAST32_MAX__", __INT_FAST32_MAX__); - // define_macro_to_number(macros, "__INT_FAST64_MAX__", __INT_FAST64_MAX__); - define_macro_to_number(macros, "__UINT_FAST8_MAX__", __UINT_FAST8_MAX__); - // define_macro_to_number(macros, "__UINT_FAST16_MAX__", __UINT_FAST16_MAX__); - // define_macro_to_number(macros, "__UINT_FAST32_MAX__", __UINT_FAST32_MAX__); - // define_macro_to_number(macros, "__UINT_FAST64_MAX__", __UINT_FAST64_MAX__); - // define_macro_to_number(macros, "__INTPTR_MAX__", __INTPTR_MAX__); - // define_macro_to_number(macros, "__UINTPTR_MAX__", __UINTPTR_MAX__); - define_macro_to_number(macros, "__WCHAR_MIN__", __WCHAR_MIN__); - define_macro_to_number(macros, "__WINT_MIN__", __WINT_MIN__); - define_macro_to_number(macros, "__SIG_ATOMIC_MIN__", __SIG_ATOMIC_MIN__); - - // GCC's predefined macros not listed in Common Predefined Macros page. - define_macro_to_number(macros, "__DBL_DIG__", __DBL_DIG__); - define_macro_to_number(macros, "__DBL_MANT_DIG__", __DBL_MANT_DIG__); - define_macro_to_number(macros, "__DBL_MAX_10_EXP__", __DBL_MAX_10_EXP__); - define_macro_to_number(macros, "__FLT_DIG__", __FLT_DIG__); - define_macro_to_number(macros, "__FLT_MANT_DIG__", __FLT_MANT_DIG__); - define_macro_to_number(macros, "__FLT_MAX_10_EXP__", __FLT_MAX_10_EXP__); - define_macro_to_number(macros, "__LDBL_DIG__", __LDBL_DIG__); - define_macro_to_number(macros, "__LDBL_MANT_DIG__", __LDBL_MANT_DIG__); - define_macro_to_number(macros, "__LDBL_MAX_10_EXP__", __LDBL_MAX_10_EXP__); -} - -// Accept "FOO" or "FOO=value" -static void define_macro_from_string(MacroArray* macros, const char* def) { - Macro* m = macros_push_new(macros); - m->kind = MacroKind_obj; - - const char* eq = strchr(def, '='); - if (eq) { - // FOO=value format - size_t name_len = eq - def; - char* name = calloc(name_len + 1, sizeof(char)); - memcpy(name, def, name_len); - m->name = name; - - const char* value = eq + 1; - tokens_init(&m->replacements, 1); - Token* tok = tokens_push_new(&m->replacements); - - // Try to parse as integer - char* num_end; - long int_val = strtol(value, &num_end, 10); - if (value[0] != '\0' && *num_end == '\0') { - tok->kind = TokenKind_literal_int; - tok->value.integer = int_val; - } else { - tok->kind = TokenKind_ident; - tok->value.string = value; - } - } else { - // FOO format (equivalent to FOO=1) - m->name = def; - tokens_init(&m->replacements, 1); - Token* tok = tokens_push_new(&m->replacements); - tok->kind = TokenKind_literal_int; - tok->value.integer = 1; - } -} - -static void add_user_defines(MacroArray* macros, StrArray* user_defines) { - for (size_t i = 0; i < user_defines->len; ++i) { - define_macro_from_string(macros, user_defines->data[i]); - } -} - -void macros_build_json(JsonBuilder* builder, MacroArray* macros) { - jsonbuilder_array_start(builder); - for (size_t i = 0; i < macros->len; ++i) { - jsonbuilder_array_element_start(builder); - macro_build_json(builder, ¯os->data[i]); - jsonbuilder_array_element_end(builder); - } - jsonbuilder_array_end(builder); -} - -typedef struct { - TokenArray tokens; -} MacroArg; - -void macroarg_build_json(JsonBuilder* builder, MacroArg* arg) { - jsonbuilder_object_start(builder); - jsonbuilder_object_member_start(builder, "tokens"); - tokens_build_json(builder, &arg->tokens); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_end(builder); -} - -typedef struct { - size_t len; - size_t capacity; - MacroArg* data; -} MacroArgArray; - -static MacroArgArray* macroargs_new() { - MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); - macroargs->len = 0; - macroargs->capacity = 2; - macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); - return macroargs; -} - -static void macroargs_reserve(MacroArgArray* macroargs, size_t size) { - if (size <= macroargs->capacity) - return; - while (macroargs->capacity < size) { - macroargs->capacity *= 2; - } - macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); - memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); -} - -static MacroArg* macroargs_push_new(MacroArgArray* macroargs) { - macroargs_reserve(macroargs, macroargs->len + 1); - return ¯oargs->data[macroargs->len++]; -} - -void macroargs_build_json(JsonBuilder* builder, MacroArgArray* macroargs) { - jsonbuilder_object_start(builder); - jsonbuilder_object_member_start(builder, "len"); - jsonbuilder_integer(builder, macroargs->len); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "data"); - jsonbuilder_array_start(builder); - for (size_t i = 0; i < macroargs->len; ++i) { - jsonbuilder_array_element_start(builder); - macroarg_build_json(builder, ¯oargs->data[i]); - jsonbuilder_array_element_end(builder); - } - jsonbuilder_array_end(builder); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_end(builder); -} - -typedef struct { - TokenArray* pp_tokens; - int pos; - MacroArray* macros; - int include_depth; - StrArray* include_paths; - StrArray* included_files; - bool generate_system_deps; - bool generate_user_deps; -} Preprocessor; - -static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, - StrArray* included_files, bool generate_system_deps, bool generate_user_deps); - -static Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros, - StrArray* include_paths, StrArray* included_files, bool generate_system_deps, - bool generate_user_deps) { - if (include_depth >= 32) { - fatal_error("include depth limit exceeded"); - } - - Preprocessor* pp = calloc(1, sizeof(Preprocessor)); - pp->pp_tokens = pp_tokens; - pp->macros = macros; - pp->include_depth = include_depth; - pp->include_paths = include_paths; - pp->included_files = included_files; - pp->generate_system_deps = generate_system_deps; - pp->generate_user_deps = generate_user_deps; - - return pp; -} - -static Token* pp_token_at(Preprocessor* pp, int i) { - return &pp->pp_tokens->data[i]; -} - -static Token* peek_pp_token(Preprocessor* pp) { - return pp_token_at(pp, pp->pos); -} - -static Token* next_pp_token(Preprocessor* pp) { - return pp_token_at(pp, pp->pos++); -} - -static void skip_pp_token(Preprocessor* pp, TokenKind expected) { - Token* tok = next_pp_token(pp); - assert(tok->kind == expected); -} - -static Token* consume_pp_token_if(Preprocessor* pp, TokenKind expected) { - if (peek_pp_token(pp)->kind == expected) { - return next_pp_token(pp); - } else { - return NULL; - } -} - -static Token* consume_pp_token_if_not(Preprocessor* pp, TokenKind non_expected) { - if (peek_pp_token(pp)->kind == non_expected) { - return NULL; - } else { - return next_pp_token(pp); - } -} - -static Token* expect_pp_token(Preprocessor* pp, TokenKind expected) { - Token* tok = next_pp_token(pp); - if (tok->kind == expected) { - return tok; - } - fatal_error("%s:%d: expected '%s', but got '%s'", tok->loc.filename, tok->loc.line, token_kind_stringify(expected), - token_stringify(tok)); -} - -static bool pp_eof(Preprocessor* pp) { - return peek_pp_token(pp)->kind == TokenKind_eof; -} - -static int find_macro(Preprocessor* pp, const char* name) { - for (size_t i = 0; i < pp->macros->len; ++i) { - if (pp->macros->data[i].kind == MacroKind_undef) - continue; - if (strcmp(pp->macros->data[i].name, name) == 0) { - return i; - } - } - return -1; -} - -static void undef_macro(Preprocessor* pp, int idx) { - pp->macros->data[idx].kind = MacroKind_undef; - // TODO: Can predefined macro like __FILE__ be undefined? -} - -static void skip_whitespaces(Preprocessor* pp) { - while (!pp_eof(pp) && consume_pp_token_if(pp, TokenKind_whitespace)) - ; -} - -static void skip_whitespaces_or_newlines(Preprocessor* pp, bool skip_newline) { - while (!pp_eof(pp) && (consume_pp_token_if(pp, TokenKind_whitespace) || - (skip_newline && consume_pp_token_if(pp, TokenKind_newline)))) - ; -} - -// It will not consume a new-line token. -static void seek_to_next_newline(Preprocessor* pp) { - while (!pp_eof(pp) && consume_pp_token_if_not(pp, TokenKind_newline)) - ; -} - -static void make_tokens_removed(Preprocessor* pp, int start, int end) { - for (int i = start; i < end; ++i) { - Token* tok = pp_token_at(pp, i); - tok->kind = TokenKind_removed; - tok->value.string = NULL; - } -} - -static Token* read_include_header_name(Preprocessor* pp) { - Token* tok = next_pp_token(pp); - if (tok->kind != TokenKind_header_name) { - fatal_error("%s:%d: invalid #include, %s", tok->loc.filename, tok->loc.line, token_stringify(tok)); - } - return tok; -} - -static const char* resolve_include_name(Preprocessor* pp, const Token* include_name_token) { - const char* include_name = include_name_token->value.string; - if (include_name[0] == '"') { - char* current_filename = strdup(include_name_token->loc.filename); - const char* current_dir = dirname(current_filename); - char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(current_dir) + 1, sizeof(char)); - sprintf(buf, "%s/%.*s", current_dir, (int)(strlen(include_name) - 2), include_name + 1); - return buf; - } else { - for (size_t i = 0; i < pp->include_paths->len; ++i) { - char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); - sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); - if (access(buf, F_OK | R_OK) == 0) { - return buf; - } - } - return NULL; - } -} - -static const char* resolve_next_include_name(Preprocessor* pp, const Token* include_name_token) { - const char* include_name = include_name_token->value.string; - char* current_filename = strdup(include_name_token->loc.filename); - for (size_t i = 0; i < pp->include_paths->len; ++i) { - char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); - sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); - if (strcmp(buf, current_filename) == 0) { - // #include_next skips the same file. - continue; - } - if (access(buf, F_OK | R_OK) == 0) { - return buf; - } - } - return NULL; -} - -static int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { - size_t n_tokens_to_remove = dest_end - dest_start; - size_t n_tokens_after_dest = pp->pp_tokens->len - dest_end; - size_t shift_amount; - - if (n_tokens_to_remove < source_tokens->len) { - // Move existing tokens backward to make room. - shift_amount = source_tokens->len - n_tokens_to_remove; - tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); - memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), - n_tokens_after_dest * sizeof(Token)); - pp->pp_tokens->len += shift_amount; - } else if (source_tokens->len < n_tokens_to_remove) { - // Move existing tokens forward to reduce room. - shift_amount = n_tokens_to_remove - source_tokens->len; - memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), - n_tokens_after_dest * sizeof(Token)); - pp->pp_tokens->len -= shift_amount; - memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); - } - - memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); - - return dest_start + source_tokens->len; -} - -static int insert_pp_tokens(Preprocessor* pp, int dest_pos, TokenArray* source_tokens) { - return replace_pp_tokens(pp, dest_pos, dest_pos, source_tokens); -} - -static void replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { - TokenArray tokens; - tokens_init(&tokens, 1); - *tokens_push_new(&tokens) = *source_tok; - replace_pp_tokens(pp, dest, dest + 1, &tokens); -} - -static void expand_include_directive(Preprocessor* pp, const char* include_name, Token* original_include_name_tok) { - InFile* include_source = infile_open(include_name); - if (!include_source) { - fatal_error("%s:%d: cannot open include file: %s", original_include_name_tok->loc.filename, - original_include_name_tok->loc.line, token_stringify(original_include_name_tok)); - } - - TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros, pp->include_paths, - pp->included_files, pp->generate_system_deps, pp->generate_user_deps); - tokens_pop(include_pp_tokens); // pop EOF token - pp->pos = insert_pp_tokens(pp, pp->pos, include_pp_tokens); -} - -// ws ::= many0() -// macro-parameters ::= '(' opt( many0(',' )) ')' -static TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { - TokenArray* parameters = calloc(1, sizeof(TokenArray)); - tokens_init(parameters, 2); - - // '(' is consumed by caller. - skip_whitespaces(pp); - Token* tok = consume_pp_token_if(pp, TokenKind_ident); - if (tok) { - *tokens_push_new(parameters) = *tok; - skip_whitespaces(pp); - while (consume_pp_token_if(pp, TokenKind_comma)) { - skip_whitespaces(pp); - tok = next_pp_token(pp); - if (tok->kind != TokenKind_ident) { - fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); - } - *tokens_push_new(parameters) = *tok; - } - } - expect_pp_token(pp, TokenKind_paren_r); - - return parameters; -} - -// ws ::= many0() -// macro-arguments ::= '(' opt( many0(',' )) ')' -static MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp, bool skip_newline) { - MacroArgArray* args = macroargs_new(); - - skip_whitespaces_or_newlines(pp, skip_newline); - if (!consume_pp_token_if(pp, TokenKind_paren_l)) { - return NULL; - } - skip_whitespaces_or_newlines(pp, skip_newline); - Token* tok = peek_pp_token(pp); - if (!skip_newline && tok->kind == TokenKind_newline) { - expect_pp_token(pp, TokenKind_paren_r); - } - if (tok->kind != TokenKind_paren_r) { - MacroArg* arg = macroargs_push_new(args); - tokens_init(&arg->tokens, 4); - - // Parse argument tokens, handling nested parentheses. - int nesting = 0; - while (true) { - tok = peek_pp_token(pp); - - if (nesting == 0) { - if (tok->kind == TokenKind_paren_r) { - break; - } - if (tok->kind == TokenKind_comma) { - next_pp_token(pp); - skip_whitespaces_or_newlines(pp, skip_newline); - - arg = macroargs_push_new(args); - tokens_init(&arg->tokens, 4); - continue; - } - } - - if (tok->kind == TokenKind_paren_l) { - nesting++; - } else if (tok->kind == TokenKind_paren_r) { - nesting--; - if (nesting < 0) { - break; - } - } - - tok = next_pp_token(pp); - if (tok->kind != TokenKind_removed) { - *tokens_push_new(&arg->tokens) = *tok; - } - - skip_whitespaces_or_newlines(pp, skip_newline); - } - } - expect_pp_token(pp, TokenKind_paren_r); - - return args; -} - -static Token* concat_two_tokens(Token* left, Token* right) { - StrBuilder builder; - strbuilder_init(&builder); - - // Left - if (left->kind == TokenKind_ident) { - strbuilder_append_string(&builder, left->value.string); - } else if (left->kind == TokenKind_literal_int) { - char buf[32]; - sprintf(buf, "%d", left->value.integer); - strbuilder_append_string(&builder, buf); - } else { - strbuilder_append_string(&builder, token_stringify(left)); - } - - // Right - if (right->kind == TokenKind_ident) { - strbuilder_append_string(&builder, right->value.string); - } else if (right->kind == TokenKind_literal_int) { - char buf[32]; - sprintf(buf, "%d", right->value.integer); - strbuilder_append_string(&builder, buf); - } else { - strbuilder_append_string(&builder, token_stringify(right)); - } - - // Concat - Token* result = calloc(1, sizeof(Token)); - - char* endptr; - int val = strtol(builder.buf, &endptr, 10); - if (*endptr == '\0') { - result->kind = TokenKind_literal_int; - result->value.integer = val; - } else { - result->kind = TokenKind_ident; - result->value.string = builder.buf; - } - - return result; -} - -static Token* stringify_tokens(TokenArray* tokens) { - StrBuilder builder; - strbuilder_init(&builder); - - bool prev_whitespace = false; - for (size_t i = 0; i < tokens->len; ++i) { - Token* tok = &tokens->data[i]; - if (tok->kind == TokenKind_whitespace) { - prev_whitespace = true; - continue; - } - - if (prev_whitespace && builder.len > 0) { - strbuilder_append_char(&builder, ' '); - } - prev_whitespace = false; - - const char* str = token_stringify(tok); - - // For string literals and char constants, we need to escape quotes. - if (tok->kind == TokenKind_literal_str || tok->kind == TokenKind_character_constant) { - for (const char* p = str; *p; ++p) { - if (*p == '\\' || *p == '"') { - strbuilder_append_char(&builder, '\\'); - } - strbuilder_append_char(&builder, *p); - } - } else { - strbuilder_append_string(&builder, str); - } - } - - Token* result = calloc(1, sizeof(Token)); - result->kind = TokenKind_literal_str; - result->value.string = builder.buf; - return result; -} - -typedef struct MacroExpansionContext { - // Stack of macro names that have been already expanded. - StrArray already_expanded; -} MacroExpansionContext; - -MacroExpansionContext* macroexpansioncontext_new() { - MacroExpansionContext* ctx = calloc(1, sizeof(MacroExpansionContext)); - strings_init(&ctx->already_expanded); - return ctx; -} - -static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx); - -static void expand_macro_arg(Preprocessor* pp, MacroArg* arg, bool skip_newline, MacroExpansionContext* ctx) { - tokens_push_new(&arg->tokens)->kind = TokenKind_eof; - - Preprocessor* pp2 = preprocessor_new(&arg->tokens, pp->include_depth, pp->macros, pp->include_paths, - pp->included_files, pp->generate_system_deps, pp->generate_user_deps); - - size_t arg_token_count = arg->tokens.len; - size_t processed_token_count = 0; - while (processed_token_count < arg_token_count) { - if (peek_pp_token(pp2)->kind == TokenKind_ident) { - processed_token_count += expand_macro(pp2, skip_newline, ctx); - } else { - next_pp_token(pp2); - processed_token_count += 1; - } - } - - tokens_pop(&arg->tokens); -} - -static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx) { - if (ctx == NULL) { - ctx = macroexpansioncontext_new(); - } - - int macro_name_pos = pp->pos; - Token* macro_name = peek_pp_token(pp); - const char* macro_name_str = macro_name->value.string; - - // Supress expansion if the macro has already been expanded. - for (size_t i = 0; i < ctx->already_expanded.len; ++i) { - if (strcmp(ctx->already_expanded.data[i], macro_name->value.string) == 0) { - next_pp_token(pp); - return 1; - } - } - - int macro_idx = find_macro(pp, macro_name->value.string); - if (macro_idx == -1) { - next_pp_token(pp); - return 1; - } - - SourceLocation original_loc = macro_name->loc; - size_t token_count_before_expansion; - size_t token_count_after_expansion; - Macro* macro = &pp->macros->data[macro_idx]; - if (macro->kind == MacroKind_func) { - next_pp_token(pp); - MacroArgArray* args = pp_parse_macro_arguments(pp, skip_newline); - if (!args) { - // If function-like macro name is not followed by opening parenthesis, it is not a macro invocation. - return pp->pos - macro_name_pos; - } - token_count_before_expansion = pp->pos - macro_name_pos; - replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); - - // Operands of # and ## operators should not be expanded. - bool* no_expand = calloc(macro->parameters.len, sizeof(bool)); - for (size_t i = 0; i < macro->replacements.len; ++i) { - TokenKind kind = macro->replacements.data[i].kind; - if (kind == TokenKind_hashhash) { - Token* lhs = NULL; - for (int j = i - 1; j >= 0; --j) { - if (macro->replacements.data[j].kind != TokenKind_whitespace) { - lhs = ¯o->replacements.data[j]; - break; - } - } - Token* rhs = NULL; - for (size_t j = i + 1; j < macro->replacements.len; ++j) { - if (macro->replacements.data[j].kind != TokenKind_whitespace) { - rhs = ¯o->replacements.data[j]; - break; - } - } - if (lhs) { - int param1 = macro_find_param(macro, lhs); - if (param1 != -1) { - no_expand[param1] = true; - } - } - if (rhs) { - int param2 = macro_find_param(macro, rhs); - if (param2 != -1) { - no_expand[param2] = true; - } - } - } else if (kind == TokenKind_hash) { - Token* operand = NULL; - for (size_t j = i + 1; j < macro->replacements.len; ++j) { - if (macro->replacements.data[j].kind != TokenKind_whitespace) { - operand = ¯o->replacements.data[j]; - break; - } - } - if (operand) { - int param = macro_find_param(macro, operand); - if (param != -1) { - no_expand[param] = true; - } - } - } - } - - // Argument expansion - for (size_t i = 0; i < args->len; ++i) { - if (no_expand[i]) - continue; - MacroArg* arg = &args->data[i]; - expand_macro_arg(pp, arg, skip_newline, ctx); - } - - // Parameter substitution - size_t token_count = 0; - size_t offset = 0; - for (size_t i = 0; i < macro->replacements.len; ++i) { - Token* tok = pp_token_at(pp, macro_name_pos + i + offset); - - // Handle # operator (stringification) - if (tok->kind == TokenKind_hash) { - size_t param_idx_in_replacements = 0; - for (size_t j = i + 1; j < macro->replacements.len; ++j) { - if (macro->replacements.data[j].kind != TokenKind_whitespace) { - param_idx_in_replacements = j; - break; - } - } - - Token* param_tok = ¯o->replacements.data[param_idx_in_replacements]; - int macro_param_idx = macro_find_param(macro, param_tok); - if (macro_param_idx != -1) { - Token* stringified = stringify_tokens(&args->data[macro_param_idx].tokens); - - // Replace ('#' * ) with stringified token. - TokenArray single_token; - tokens_init(&single_token, 1); - *tokens_push_new(&single_token) = *stringified; - - size_t tokens_to_replace = param_idx_in_replacements - i + 1; - replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + tokens_to_replace, - &single_token); - token_count += 1; - offset += 1 - tokens_to_replace; - i = param_idx_in_replacements; // Skip to after the parameter - continue; - } - } - - int macro_param_idx = macro_find_param(macro, tok); - if (macro_param_idx != -1) { - size_t arg_token_count = args->data[macro_param_idx].tokens.len; - if (arg_token_count == 0) { - // Empty argument: insert a placemarker token - TokenArray placemarker_token; - tokens_init(&placemarker_token, 1); - Token* pm = tokens_push_new(&placemarker_token); - pm->kind = TokenKind_placemarker; - pm->loc = tok->loc; - replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, - &placemarker_token); - token_count += 1; - // offset stays the same (1 - 1 = 0) - } else { - replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, - &args->data[macro_param_idx].tokens); - token_count += arg_token_count; - offset += arg_token_count - 1; - } - } else { - ++token_count; - } - } - - // Handle ## operator - size_t token_count2 = 0; - for (size_t i = 0; i < token_count; ++i) { - int pos = macro_name_pos + i; - Token* tok = pp_token_at(pp, pos); - if (tok->kind == TokenKind_hashhash) { - // Concatenate previous and next tokens - int lhs_pos = -1; - for (int j = pos - 1; j >= macro_name_pos; --j) { - if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { - lhs_pos = j; - break; - } - } - int rhs_pos = -1; - for (size_t j = pos + 1; j < macro_name_pos + token_count; ++j) { - if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { - rhs_pos = j; - break; - } - } - if (lhs_pos == -1 || rhs_pos == -1) { - fatal_error("%s:%d: invalid usage of ## operator", tok->loc.filename, tok->loc.line); - } - - Token* lhs_tok = pp_token_at(pp, lhs_pos); - Token* rhs_tok = pp_token_at(pp, rhs_pos); - bool lhs_is_placemarker = lhs_tok->kind == TokenKind_placemarker; - bool rhs_is_placemarker = rhs_tok->kind == TokenKind_placemarker; - - TokenArray result_tokens; - tokens_init(&result_tokens, 1); - - if (lhs_is_placemarker && rhs_is_placemarker) { - // Both are placemarkers: result is a placemarker - Token* pm = tokens_push_new(&result_tokens); - pm->kind = TokenKind_placemarker; - pm->loc = tok->loc; - } else if (lhs_is_placemarker) { - // Left is placemarker: result is the right token - *tokens_push_new(&result_tokens) = *rhs_tok; - } else if (rhs_is_placemarker) { - // Right is placemarker: result is the left token - *tokens_push_new(&result_tokens) = *lhs_tok; - } else { - // Neither is placemarker: concatenate them - Token* concatenated = concat_two_tokens(lhs_tok, rhs_tok); - *tokens_push_new(&result_tokens) = *concatenated; - } - - replace_pp_tokens(pp, lhs_pos, rhs_pos + 1, &result_tokens); - token_count -= rhs_pos - lhs_pos; - i -= pos - lhs_pos; - token_count2 -= pos - lhs_pos - 1; - } else { - ++token_count2; - } - } - - // Remove placemarker tokens after ## processing - for (size_t i = 0; i < token_count2; ++i) { - Token* tok = pp_token_at(pp, macro_name_pos + i); - if (tok->kind == TokenKind_placemarker) { - tok->kind = TokenKind_removed; - } - } - - // Inherit a source location from the original macro token. - for (size_t i = 0; i < token_count2; ++i) { - pp_token_at(pp, macro_name_pos + i)->loc = original_loc; - } - token_count_after_expansion = token_count2; - } else if (macro->kind == MacroKind_obj) { - replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); - // Inherit a source location from the original macro token. - for (size_t i = 0; i < macro->replacements.len; ++i) { - pp_token_at(pp, macro_name_pos + i)->loc = original_loc; - } - token_count_before_expansion = 1; - token_count_after_expansion = macro->replacements.len; - } else if (macro->kind == MacroKind_builtin_file) { - Token file_tok; - file_tok.kind = TokenKind_literal_str; - file_tok.value.string = macro_name->loc.filename; - file_tok.loc.filename = NULL; - file_tok.loc.line = 0; - replace_single_pp_token(pp, macro_name_pos, &file_tok); - token_count_before_expansion = 1; - token_count_after_expansion = 1; - } else if (macro->kind == MacroKind_builtin_line) { - Token line_tok; - line_tok.kind = TokenKind_literal_int; - line_tok.value.integer = macro_name->loc.line; - line_tok.loc.filename = NULL; - line_tok.loc.line = 0; - replace_single_pp_token(pp, macro_name_pos, &line_tok); - token_count_before_expansion = 1; - token_count_after_expansion = 1; - } else { - unreachable(); - } - - // Recursive expansion. - strings_push(&ctx->already_expanded, macro_name_str); - pp->pos = macro_name_pos; - size_t processed_token_count = 0; - while (processed_token_count < token_count_after_expansion) { - if (peek_pp_token(pp)->kind == TokenKind_ident) { - processed_token_count += expand_macro(pp, skip_newline, ctx); - } else { - next_pp_token(pp); - processed_token_count += 1; - } - } - strings_pop(&ctx->already_expanded); - - return token_count_before_expansion; -} - -typedef enum { - GroupDelimiterKind_normal, - GroupDelimiterKind_after_if_directive, - GroupDelimiterKind_after_else_directive, -} GroupDelimiterKind; - -static bool is_delimiter_of_current_group(GroupDelimiterKind delimiter_kind, TokenKind token_kind) { - if (delimiter_kind == GroupDelimiterKind_normal) { - return token_kind == TokenKind_eof; - } else if (delimiter_kind == GroupDelimiterKind_after_if_directive) { - return token_kind == TokenKind_pp_directive_elif || token_kind == TokenKind_pp_directive_elifdef || - token_kind == TokenKind_pp_directive_elifndef || token_kind == TokenKind_pp_directive_else || - token_kind == TokenKind_pp_directive_endif; - } else if (delimiter_kind == GroupDelimiterKind_after_else_directive) { - return token_kind == TokenKind_pp_directive_endif; - } else { - unreachable(); - } -} - -static int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); -static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include); - -static bool preprocess_if_group_or_elif_group(Preprocessor* pp, bool did_include) { - Token* directive = next_pp_token(pp); - - if (directive->kind == TokenKind_pp_directive_if || directive->kind == TokenKind_pp_directive_elif) { - int condition_expr_start_pos = pp->pos; - - while (!pp_eof(pp)) { - Token* tok = peek_pp_token(pp); - if (tok->kind == TokenKind_newline) { - break; - } else if (tok->kind == TokenKind_ident) { - if (strcmp(tok->value.string, "defined") == 0) { - int defined_pos = pp->pos; - // 'defined' * '(' * * ')' - // 'defined' * - skip_pp_token(pp, TokenKind_ident); - skip_whitespaces(pp); - Token* macro_name; - if (consume_pp_token_if(pp, TokenKind_paren_l)) { - skip_whitespaces(pp); - macro_name = expect_pp_token(pp, TokenKind_ident); - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_paren_r); - } else { - macro_name = expect_pp_token(pp, TokenKind_ident); - } - bool is_defined = find_macro(pp, macro_name->value.string) != -1; - TokenArray defined_results; - tokens_init(&defined_results, 1); - Token* defined_result = tokens_push_new(&defined_results); - defined_result->kind = TokenKind_literal_int; - defined_result->value.integer = is_defined; - pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); - } else { - expand_macro(pp, false, NULL); - } - } else { - next_pp_token(pp); - } - } - - // all remaining identifiers other than true (including those lexically identical to keywords such as false) are - // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is - // converted into a token. - for (int pos = condition_expr_start_pos; pos < pp->pos; ++pos) { - Token* tok = pp_token_at(pp, pos); - if (tok->kind == TokenKind_ident) { - bool is_true = strcmp(tok->value.string, "true") == 0; - tok->kind = TokenKind_literal_int; - tok->value.integer = is_true; - } - } - - int condition_expr_tokens_len = pp->pos - condition_expr_start_pos; - TokenArray condition_expr_tokens; - // +1 to add EOF token at the end. - tokens_init(&condition_expr_tokens, condition_expr_tokens_len + 1); - for (int i = 0; i < condition_expr_tokens_len; ++i) { - *tokens_push_new(&condition_expr_tokens) = *pp_token_at(pp, condition_expr_start_pos + i); - } - Token* eof_tok = tokens_push_new(&condition_expr_tokens); - eof_tok->kind = TokenKind_eof; - - bool do_include = pp_eval_constant_expr(&condition_expr_tokens) && !did_include; - include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); - return do_include; - } else if (directive->kind == TokenKind_pp_directive_ifdef || directive->kind == TokenKind_pp_directive_elifdef) { - skip_whitespaces(pp); - Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); - if (!macro_name) { - fatal_error(""); - } - - bool do_include = !did_include && find_macro(pp, macro_name->value.string) != -1; - include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); - return do_include; - } else if (directive->kind == TokenKind_pp_directive_ifndef || directive->kind == TokenKind_pp_directive_elifndef) { - skip_whitespaces(pp); - Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); - if (!macro_name) { - fatal_error(""); - } - - bool do_include = !did_include && find_macro(pp, macro_name->value.string) == -1; - include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); - return do_include; - } else { - unreachable(); - } -} - -static bool preprocess_if_group(Preprocessor* pp) { - return preprocess_if_group_or_elif_group(pp, false); -} - -static bool preprocess_elif_group(Preprocessor* pp, bool did_include) { - return preprocess_if_group_or_elif_group(pp, did_include); -} - -// elif-groups: -// { elif-group }+ -static bool preprocess_elif_groups_opt(Preprocessor* pp, bool did_include) { - while (!pp_eof(pp)) { - Token* tok = peek_pp_token(pp); - if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || - tok->kind == TokenKind_pp_directive_elifndef) { - did_include |= preprocess_elif_group(pp, did_include); - } else { - break; - } - } - return did_include; -} - -// else-group: -// '#' 'else' group? -static void preprocess_else_group(Preprocessor* pp, bool did_include) { - skip_pp_token(pp, TokenKind_pp_directive_else); - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_newline); - - include_conditionally(pp, GroupDelimiterKind_after_else_directive, !did_include); -} - -// endif-line: -// '#' 'endif' new-line -static void preprocess_endif_directive(Preprocessor* pp) { - skip_pp_token(pp, TokenKind_pp_directive_endif); - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_newline); -} - -// if-section: -// if-group elif-groups? else-group? endif-line -static void preprocess_if_section(Preprocessor* pp) { - bool did_include = preprocess_if_group(pp); - did_include = preprocess_elif_groups_opt(pp, did_include); - if (peek_pp_token(pp)->kind == TokenKind_pp_directive_else) { - preprocess_else_group(pp, did_include); - } - preprocess_endif_directive(pp); -} - -static void preprocess_include_directive(Preprocessor* pp) { - skip_pp_token(pp, TokenKind_pp_directive_include); - skip_whitespaces(pp); - Token* include_name = read_include_header_name(pp); - const char* include_name_resolved = resolve_include_name(pp, include_name); - if (include_name_resolved == NULL) { - fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, - token_stringify(include_name)); - } - - if ((pp->generate_system_deps && include_name->value.string[0] == '<') || - (pp->generate_user_deps && include_name->value.string[0] == '"')) { - bool already_included = false; - for (size_t i = 0; i < pp->included_files->len; ++i) { - if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { - already_included = true; - break; - } - } - if (!already_included) { - strings_push(pp->included_files, include_name_resolved); - } - } - - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_newline); - expand_include_directive(pp, include_name_resolved, include_name); -} - -// #include_next is a part of GNU extension. -// https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html -static void preprocess_include_next_directive(Preprocessor* pp) { - skip_pp_token(pp, TokenKind_pp_directive_include_next); - skip_whitespaces(pp); - Token* include_name = read_include_header_name(pp); - const char* include_name_resolved = resolve_next_include_name(pp, include_name); - if (include_name_resolved == NULL) { - fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, - token_stringify(include_name)); - } - - if (include_name->value.string[0] == '"') { - bool already_included = false; - for (size_t i = 0; i < pp->included_files->len; ++i) { - if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { - already_included = true; - break; - } - } - if (!already_included) { - strings_push(pp->included_files, include_name_resolved); - } - } - - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_newline); - expand_include_directive(pp, include_name_resolved, include_name); -} - -static void preprocess_embed_directive(Preprocessor*) { - unimplemented(); -} - -static void preprocess_define_directive(Preprocessor* pp) { - skip_pp_token(pp, TokenKind_pp_directive_define); - skip_whitespaces(pp); - Token* macro_name = expect_pp_token(pp, TokenKind_ident); - - if (consume_pp_token_if(pp, TokenKind_paren_l)) { - TokenArray* parameters = pp_parse_macro_parameters(pp); - skip_whitespaces(pp); - int replacements_start_pos = pp->pos; - seek_to_next_newline(pp); - Macro* macro = macros_push_new(pp->macros); - macro->kind = MacroKind_func; - macro->name = macro_name->value.string; - macro->parameters = *parameters; - int n_replacements = pp->pos - replacements_start_pos; - tokens_init(¯o->replacements, n_replacements); - for (int i = 0; i < n_replacements; ++i) { - *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); - } - // Remove trailing whitespaces. - for (int i = n_replacements - 1; i >= 0; --i) { - if (macro->replacements.data[i].kind == TokenKind_whitespace) { - tokens_pop(¯o->replacements); - } else { - break; - } - } - } else { - skip_whitespaces(pp); - int replacements_start_pos = pp->pos; - seek_to_next_newline(pp); - Macro* macro = macros_push_new(pp->macros); - macro->kind = MacroKind_obj; - macro->name = macro_name->value.string; - int n_replacements = pp->pos - replacements_start_pos; - tokens_init(¯o->replacements, n_replacements); - for (int i = 0; i < n_replacements; ++i) { - *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); - } - // Remove trailing whitespaces. - for (int i = n_replacements - 1; i >= 0; --i) { - if (macro->replacements.data[i].kind == TokenKind_whitespace) { - tokens_pop(¯o->replacements); - } else { - break; - } - } - } - expect_pp_token(pp, TokenKind_newline); -} - -static void preprocess_undef_directive(Preprocessor* pp) { - skip_pp_token(pp, TokenKind_pp_directive_undef); - skip_whitespaces(pp); - Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); - if (macro_name) { - int macro_idx = find_macro(pp, macro_name->value.string); - if (macro_idx != -1) { - undef_macro(pp, macro_idx); - } - } -} - -static void preprocess_line_directive(Preprocessor*) { - unimplemented(); -} - -// control-line: -// ... -// '#' 'error' pp-tokens? new-line -static void preprocess_error_directive(Preprocessor* pp) { - // The C23 standard does not specify format of diagnostic message caused by #error. - // Ducc assumes that #error takes exactly one argument consisting of a string literal. - // TODO: output some general message or something else if not. - skip_pp_token(pp, TokenKind_pp_directive_error); - skip_whitespaces(pp); - Token* msg = expect_pp_token(pp, TokenKind_literal_str); - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_newline); - fatal_error("%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); -} - -// control-line: -// ... -// '#' 'warning' pp-tokens? new-line -static void preprocess_warning_directive(Preprocessor* pp) { - // The C23 standard does not specify format of diagnostic message caused by #warning. - // Ducc assumes that #warning takes exactly one argument consisting of a string literal. - // TODO: output some general message or something else if not. - skip_pp_token(pp, TokenKind_pp_directive_warning); - skip_whitespaces(pp); - Token* msg = expect_pp_token(pp, TokenKind_literal_str); - skip_whitespaces(pp); - expect_pp_token(pp, TokenKind_newline); - fprintf(stderr, "%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); -} - -static void preprocess_pragma_directive(Preprocessor* pp) { - // Ignore all #pragma directives for now. - skip_pp_token(pp, TokenKind_pp_directive_pragma); - seek_to_next_newline(pp); - skip_pp_token(pp, TokenKind_newline); -} - -static void preprocess_nop_directive(Preprocessor* pp) { - skip_pp_token(pp, TokenKind_pp_directive_nop); -} - -static void preprocess_non_directive_directive(Preprocessor* pp) { - Token* tok = peek_pp_token(pp); - // C23 6.10.1.13: - // The execution of a non-directive preprocessing directive results in undefined behavior. - fatal_error("%s:%d: invalid preprocessing directive, '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); -} - -static void preprocess_text_line(Preprocessor* pp) { - while (!pp_eof(pp)) { - if (consume_pp_token_if(pp, TokenKind_newline)) { - return; - } - if (consume_pp_token_if_not(pp, TokenKind_ident)) { - continue; - } - - expand_macro(pp, true, NULL); - } - expect_pp_token(pp, TokenKind_newline); -} - -// group-part: -// if-section -// control-line -// '#' non-directive -// text-line -// -// control-line: -// '#' 'include' ... -// '#' 'embed' ... -// '#' 'define' ... -// '#' 'undef' ... -// '#' 'line' ... -// '#' 'error' ... -// '#' 'warning' ... -// '#' 'pragma' ... -// '#' new-line -static void preprocess_group_part(Preprocessor* pp) { - Token* tok = peek_pp_token(pp); - if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || - tok->kind == TokenKind_pp_directive_ifndef) { - preprocess_if_section(pp); - } else if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || - tok->kind == TokenKind_pp_directive_elifndef || tok->kind == TokenKind_pp_directive_else || - tok->kind == TokenKind_pp_directive_endif) { - fatal_error("%s:%d: unexpected '%s'; no corresponding '#if'*", tok->loc.filename, tok->loc.line, - token_kind_stringify(tok->kind)); - } else if (tok->kind == TokenKind_pp_directive_include) { - preprocess_include_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_include_next) { - preprocess_include_next_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_embed) { - preprocess_embed_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_define) { - preprocess_define_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_undef) { - preprocess_undef_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_line) { - preprocess_line_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_error) { - preprocess_error_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_warning) { - preprocess_warning_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_pragma) { - preprocess_pragma_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_nop) { - preprocess_nop_directive(pp); - } else if (tok->kind == TokenKind_pp_directive_non_directive) { - preprocess_non_directive_directive(pp); - } else { - preprocess_text_line(pp); - } -} - -// group: -// { group-part }+ -static void preprocess_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { - while (!pp_eof(pp)) { - Token* tok = peek_pp_token(pp); - if (is_delimiter_of_current_group(delimiter_kind, tok->kind)) - return; - preprocess_group_part(pp); - } - - if (delimiter_kind != GroupDelimiterKind_normal) { - expect_pp_token(pp, TokenKind_pp_directive_endif); - } -} - -static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { - assert(delimiter_kind != GroupDelimiterKind_normal); - int nesting = 0; - - while (!pp_eof(pp)) { - Token* tok = peek_pp_token(pp); - if (nesting == 0 && is_delimiter_of_current_group(delimiter_kind, tok->kind)) { - return; - } - if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || - tok->kind == TokenKind_pp_directive_ifndef) { - ++nesting; - } else if (tok->kind == TokenKind_pp_directive_endif) { - --nesting; - } - int first_pos = pp->pos; - seek_to_next_newline(pp); - expect_pp_token(pp, TokenKind_newline); - make_tokens_removed(pp, first_pos, pp->pos); - } - - expect_pp_token(pp, TokenKind_pp_directive_endif); -} - -static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include) { - if (do_include) { - preprocess_group_opt(pp, delimiter_kind); - } else { - skip_group_opt(pp, delimiter_kind); - } -} - -// preprocessing-file: -// group? -static void preprocess_preprocessing_file(Preprocessor* pp) { - preprocess_group_opt(pp, GroupDelimiterKind_normal); -} - -static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) { - seek_to_next_newline(pp); - skip_pp_token(pp, TokenKind_newline); - make_tokens_removed(pp, directive_token_pos, pp->pos); -} - -static void remove_pp_directives(Preprocessor* pp) { - pp->pos = 0; - while (!pp_eof(pp)) { - if (is_pp_directive(peek_pp_token(pp)->kind)) { - remove_pp_directive(pp, pp->pos); - } else { - next_pp_token(pp); - } - } -} - -static char* get_ducc_include_path() { - const char* self_dir = get_self_dir(); - char* buf = calloc(strlen(self_dir) + strlen("/../include") + 1, sizeof(char)); - sprintf(buf, "%s/../include", self_dir); - return buf; -} - -static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, - StrArray* included_files, bool generate_system_deps, bool generate_user_deps) { - TokenArray* pp_tokens = tokenize(src); - Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros, include_paths, included_files, generate_system_deps, - generate_user_deps); - - preprocess_preprocessing_file(pp); - remove_pp_directives(pp); - return pp->pp_tokens; -} - -TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, - bool generate_system_deps, bool generate_user_deps) { - MacroArray* macros = macros_new(); - add_predefined_macros(macros); - add_user_defines(macros, user_defines); - strings_push(included_files, src->loc.filename); - - StrArray* include_paths = calloc(1, sizeof(StrArray)); - strings_init(include_paths); - - // Ducc's built-in headers has highest priority. - strings_push(include_paths, get_ducc_include_path()); - - for (size_t i = 0; i < user_include_dirs->len; ++i) { - strings_push(include_paths, user_include_dirs->data[i]); - } - strings_push(include_paths, "/usr/local/include"); - strings_push(include_paths, "/usr/include/x86_64-linux-gnu"); - strings_push(include_paths, "/usr/include"); - - return do_preprocess(src, 0, macros, include_paths, included_files, generate_system_deps, generate_user_deps); -} - -void concat_adjacent_string_literals(TokenArray* pp_tokens) { - size_t last_nonempty_token_pos = 0; - TokenKind last_nonempty_token_kind = TokenKind_eof; - for (size_t pos = 0; pos < pp_tokens->len; ++pos) { - Token* pp_tok = &pp_tokens->data[pos]; - TokenKind k = pp_tok->kind; - if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { - continue; - } - if (k == TokenKind_literal_str && last_nonempty_token_kind == TokenKind_literal_str) { - // Concatenate adjacent string literals. - Token* last_pp_tok = &pp_tokens->data[last_nonempty_token_pos]; - const char* s1 = last_pp_tok->value.string; - size_t l1 = strlen(s1); - const char* s2 = pp_tok->value.string; - size_t l2 = strlen(s2); - char* buf = calloc(l1 + l2 + 1, sizeof(char)); - memcpy(buf, s1, l1); - memcpy(buf + l1, s2, l2); - last_pp_tok->value.string = buf; - pp_tok->kind = TokenKind_removed; - } else { - last_nonempty_token_pos = pos; - last_nonempty_token_kind = k; - } - } -} - -void print_token_to_file(FILE* out, TokenArray* pp_tokens) { - for (size_t i = 0; i < pp_tokens->len; ++i) { - Token* tok = &pp_tokens->data[i]; - - if (tok->kind == TokenKind_whitespace) { - // TODO: preserve indent? - fprintf(out, " "); - } else if (tok->kind == TokenKind_removed) { - // Output nothing for removed tokens - } else if (tok->kind == TokenKind_newline) { - // TODO: remove adjacent newlines? - fprintf(out, "\n"); - } else if (tok->kind != TokenKind_eof) { - // TODO: string literal - fprintf(out, "%s", token_stringify(tok)); - // Add space after token if next token is not punctuation - // TODO: apply stricter approach - if (i + 1 < pp_tokens->len) { - Token* next = &pp_tokens->data[i + 1]; - if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace && - next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma && - next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r && - next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r && - next->kind != TokenKind_dot) { - fprintf(out, " "); - } - } - } - } -} diff --git a/src/preprocess.h b/src/preprocess.h deleted file mode 100644 index 60672c6..0000000 --- a/src/preprocess.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef DUCC_PREPROCESS_H -#define DUCC_PREPROCESS_H - -#include "common.h" -#include "io.h" -#include "token.h" - -TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, - bool generate_system_deps, bool generate_user_deps); -void concat_adjacent_string_literals(TokenArray* pp_tokens); -void print_token_to_file(FILE* output_file, TokenArray* pp_tokens); - -#endif diff --git a/src/sys.c b/src/sys.c deleted file mode 100644 index 7e0025a..0000000 --- a/src/sys.c +++ /dev/null @@ -1,21 +0,0 @@ -#include "sys.h" -#include -#include -#include -#include "ducc.h" - -static char* get_self_path() { - char* buf = calloc(PATH_MAX, sizeof(char)); - ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1); - if (len == -1 || len == PATH_MAX - 1) { - return NULL; - } - buf[len] = '\0'; - return buf; -} - -// It returns a path not including final / except for root directory. -char* get_self_dir() { - char* path = get_self_path(); - return dirname(path); -} diff --git a/src/sys.h b/src/sys.h deleted file mode 100644 index 2527724..0000000 --- a/src/sys.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef DUCC_SYS_H -#define DUCC_SYS_H - -// It returns a path not including final / except for root directory. -char* get_self_dir(); - -#endif diff --git a/src/token.c b/src/token.c deleted file mode 100644 index 726a912..0000000 --- a/src/token.c +++ /dev/null @@ -1,387 +0,0 @@ -#include "token.h" -#include "common.h" -#include "json.h" - -const char* token_kind_stringify(TokenKind k) { - if (k == TokenKind_eof) - return ""; - else if (k == TokenKind_hash) - return "#"; - else if (k == TokenKind_hashhash) - return "##"; - else if (k == TokenKind_whitespace) - return ""; - else if (k == TokenKind_removed) - return ""; - else if (k == TokenKind_placemarker) - return ""; - else if (k == TokenKind_newline) - return ""; - else if (k == TokenKind_other) - return ""; - else if (k == TokenKind_character_constant) - return ""; - else if (k == TokenKind_header_name) - return ""; - else if (k == TokenKind_pp_directive_define) - return "#define"; - else if (k == TokenKind_pp_directive_elif) - return "#elif"; - else if (k == TokenKind_pp_directive_elifdef) - return "#elifdef"; - else if (k == TokenKind_pp_directive_elifndef) - return "#elifndef"; - else if (k == TokenKind_pp_directive_else) - return "#else"; - else if (k == TokenKind_pp_directive_embed) - return "#embed"; - else if (k == TokenKind_pp_directive_endif) - return "#endif"; - else if (k == TokenKind_pp_directive_error) - return "#error"; - else if (k == TokenKind_pp_directive_if) - return "#if"; - else if (k == TokenKind_pp_directive_ifdef) - return "#ifdef"; - else if (k == TokenKind_pp_directive_ifndef) - return "#ifndef"; - else if (k == TokenKind_pp_directive_include) - return "#include"; - else if (k == TokenKind_pp_directive_include_next) - return "#include_next"; - else if (k == TokenKind_pp_directive_line) - return "#line"; - else if (k == TokenKind_pp_directive_non_directive) - return "#"; - else if (k == TokenKind_pp_directive_nop) - return "#"; - else if (k == TokenKind_pp_directive_pragma) - return "#pragma"; - else if (k == TokenKind_pp_directive_undef) - return "#undef"; - else if (k == TokenKind_pp_directive_warning) - return "#warning"; - else if (k == TokenKind_pp_operator_defined) - return "defined"; - else if (k == TokenKind_pp_operator___has_c_attribute) - return "__has_c_attribute"; - else if (k == TokenKind_pp_operator___has_embed) - return "__has_embed"; - else if (k == TokenKind_pp_operator___has_include) - return "__has_include"; - else if (k == TokenKind_keyword_alignas) - return "alignas"; - else if (k == TokenKind_keyword_alignof) - return "alignof"; - else if (k == TokenKind_keyword_auto) - return "auto"; - else if (k == TokenKind_keyword_bool) - return "bool"; - else if (k == TokenKind_keyword_break) - return "break"; - else if (k == TokenKind_keyword_case) - return "case"; - else if (k == TokenKind_keyword_char) - return "char"; - else if (k == TokenKind_keyword_const) - return "const"; - else if (k == TokenKind_keyword_constexpr) - return "constexpr"; - else if (k == TokenKind_keyword_continue) - return "continue"; - else if (k == TokenKind_keyword_default) - return "default"; - else if (k == TokenKind_keyword_do) - return "do"; - else if (k == TokenKind_keyword_double) - return "double"; - else if (k == TokenKind_keyword_else) - return "else"; - else if (k == TokenKind_keyword_enum) - return "enum"; - else if (k == TokenKind_keyword_extern) - return "extern"; - else if (k == TokenKind_keyword_false) - return "false"; - else if (k == TokenKind_keyword_float) - return "float"; - else if (k == TokenKind_keyword_for) - return "for"; - else if (k == TokenKind_keyword_goto) - return "goto"; - else if (k == TokenKind_keyword_if) - return "if"; - else if (k == TokenKind_keyword_inline) - return "inline"; - else if (k == TokenKind_keyword_int) - return "int"; - else if (k == TokenKind_keyword_long) - return "long"; - else if (k == TokenKind_keyword_nullptr) - return "nullptr"; - else if (k == TokenKind_keyword_register) - return "register"; - else if (k == TokenKind_keyword_restrict) - return "restrict"; - else if (k == TokenKind_keyword_return) - return "return"; - else if (k == TokenKind_keyword_short) - return "short"; - else if (k == TokenKind_keyword_signed) - return "signed"; - else if (k == TokenKind_keyword_sizeof) - return "sizeof"; - else if (k == TokenKind_keyword_static) - return "static"; - else if (k == TokenKind_keyword_static_assert) - return "static_assert"; - else if (k == TokenKind_keyword_struct) - return "struct"; - else if (k == TokenKind_keyword_switch) - return "switch"; - else if (k == TokenKind_keyword_thread_local) - return "thread_local"; - else if (k == TokenKind_keyword_true) - return "true"; - else if (k == TokenKind_keyword_typedef) - return "typedef"; - else if (k == TokenKind_keyword_typeof) - return "typeof"; - else if (k == TokenKind_keyword_typeof_unqual) - return "typeof_unqual"; - else if (k == TokenKind_keyword_union) - return "union"; - else if (k == TokenKind_keyword_unsigned) - return "unsigned"; - else if (k == TokenKind_keyword_void) - return "void"; - else if (k == TokenKind_keyword_volatile) - return "volatile"; - else if (k == TokenKind_keyword_while) - return "while"; - else if (k == TokenKind_keyword__Atomic) - return "_Atomic"; - else if (k == TokenKind_keyword__BitInt) - return "_BitInt"; - else if (k == TokenKind_keyword__Complex) - return "_Complex"; - else if (k == TokenKind_keyword__Decimal128) - return "_Decimal128"; - else if (k == TokenKind_keyword__Decimal32) - return "_Decimal32"; - else if (k == TokenKind_keyword__Decimal64) - return "_Decimal64"; - else if (k == TokenKind_keyword__Generic) - return "_Generic"; - else if (k == TokenKind_keyword__Imaginary) - return "_Imaginary"; - else if (k == TokenKind_keyword__Noreturn) - return "_Noreturn"; - else if (k == TokenKind_and) - return "&"; - else if (k == TokenKind_andand) - return "&&"; - else if (k == TokenKind_arrow) - return "->"; - else if (k == TokenKind_assign) - return "="; - else if (k == TokenKind_assign_add) - return "+="; - else if (k == TokenKind_assign_and) - return "&="; - else if (k == TokenKind_assign_div) - return "/="; - else if (k == TokenKind_assign_lshift) - return "<<="; - else if (k == TokenKind_assign_mod) - return "%="; - else if (k == TokenKind_assign_mul) - return "*="; - else if (k == TokenKind_assign_or) - return "|="; - else if (k == TokenKind_assign_rshift) - return ">>="; - else if (k == TokenKind_assign_sub) - return "-="; - else if (k == TokenKind_assign_xor) - return "^="; - else if (k == TokenKind_brace_l) - return "{"; - else if (k == TokenKind_brace_r) - return "}"; - else if (k == TokenKind_bracket_l) - return "["; - else if (k == TokenKind_bracket_r) - return "]"; - else if (k == TokenKind_colon) - return ":"; - else if (k == TokenKind_comma) - return ","; - else if (k == TokenKind_dot) - return "."; - else if (k == TokenKind_ellipsis) - return "..."; - else if (k == TokenKind_eq) - return "=="; - else if (k == TokenKind_ge) - return ">="; - else if (k == TokenKind_gt) - return ">"; - else if (k == TokenKind_ident) - return ""; - else if (k == TokenKind_le) - return "<="; - else if (k == TokenKind_literal_int) - return ""; - else if (k == TokenKind_literal_double) - return ""; - else if (k == TokenKind_literal_str) - return ""; - else if (k == TokenKind_lshift) - return "<<"; - else if (k == TokenKind_lt) - return "<"; - else if (k == TokenKind_minus) - return "-"; - else if (k == TokenKind_minusminus) - return "--"; - else if (k == TokenKind_ne) - return "!="; - else if (k == TokenKind_not) - return "!"; - else if (k == TokenKind_or) - return "|"; - else if (k == TokenKind_oror) - return "||"; - else if (k == TokenKind_paren_l) - return "("; - else if (k == TokenKind_paren_r) - return ")"; - else if (k == TokenKind_percent) - return "%"; - else if (k == TokenKind_plus) - return "+"; - else if (k == TokenKind_plusplus) - return "++"; - else if (k == TokenKind_question) - return "?"; - else if (k == TokenKind_rshift) - return ">>"; - else if (k == TokenKind_semicolon) - return ";"; - else if (k == TokenKind_slash) - return "/"; - else if (k == TokenKind_star) - return "*"; - else if (k == TokenKind_tilde) - return "~"; - else if (k == TokenKind_xor) - return "^"; - else - unreachable(); -} - -bool is_pp_directive(TokenKind k) { - return k == TokenKind_pp_directive_define || k == TokenKind_pp_directive_elif || - k == TokenKind_pp_directive_elifdef || k == TokenKind_pp_directive_elifndef || - k == TokenKind_pp_directive_else || k == TokenKind_pp_directive_embed || k == TokenKind_pp_directive_endif || - k == TokenKind_pp_directive_error || k == TokenKind_pp_directive_if || k == TokenKind_pp_directive_ifdef || - k == TokenKind_pp_directive_ifndef || k == TokenKind_pp_directive_include || - k == TokenKind_pp_directive_include_next || k == TokenKind_pp_directive_line || - k == TokenKind_pp_directive_non_directive || k == TokenKind_pp_directive_nop || - k == TokenKind_pp_directive_pragma || k == TokenKind_pp_directive_undef || - k == TokenKind_pp_directive_warning; -} - -const char* token_stringify(Token* tok) { - TokenKind k = tok->kind; - if (k == TokenKind_pp_directive_non_directive) { - char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char)); - sprintf(buf, "#%s", tok->value.string); - return buf; - } else if (k == TokenKind_literal_int) { - char* buf = calloc(10, sizeof(char)); - sprintf(buf, "%d", tok->value.integer); - return buf; - } else if (k == TokenKind_literal_double) { - char* buf = calloc(32, sizeof(char)); - sprintf(buf, "%g", tok->value.floating); - return buf; - } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || - k == TokenKind_literal_str || k == TokenKind_header_name) { - return tok->value.string; - } else { - return token_kind_stringify(k); - } -} - -void token_build_json(JsonBuilder* builder, Token* tok) { - jsonbuilder_object_start(builder); - jsonbuilder_object_member_start(builder, "kind"); - jsonbuilder_string(builder, token_kind_stringify(tok->kind)); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "value"); - if (tok->kind == TokenKind_pp_directive_non_directive) { - char* buf = calloc(strlen(tok->value.string) + 1 + 1, sizeof(char)); - sprintf(buf, "#%s", tok->value.string); - jsonbuilder_string(builder, buf); - } else if (tok->kind == TokenKind_literal_int) { - jsonbuilder_integer(builder, tok->value.integer); - } else if (tok->kind == TokenKind_other || tok->kind == TokenKind_character_constant || - tok->kind == TokenKind_ident || tok->kind == TokenKind_literal_str) { - jsonbuilder_string(builder, tok->value.string); - } else { - jsonbuilder_null(builder); - } - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "loc"); - sourcelocation_build_json(builder, &tok->loc); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_end(builder); -} - -void tokens_init(TokenArray* tokens, size_t capacity) { - tokens->len = 0; - tokens->capacity = capacity; - tokens->data = calloc(tokens->capacity, sizeof(Token)); -} - -void tokens_reserve(TokenArray* tokens, size_t size) { - if (size <= tokens->capacity) - return; - while (tokens->capacity < size) { - tokens->capacity *= 2; - } - tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); - memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); -} - -Token* tokens_push_new(TokenArray* tokens) { - tokens_reserve(tokens, tokens->len + 1); - return &tokens->data[tokens->len++]; -} - -Token* tokens_pop(TokenArray* tokens) { - if (tokens->len == 0) { - return NULL; - } else { - return &tokens->data[--tokens->len]; - } -} - -void tokens_build_json(JsonBuilder* builder, TokenArray* tokens) { - jsonbuilder_object_start(builder); - jsonbuilder_object_member_start(builder, "len"); - jsonbuilder_integer(builder, tokens->len); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_member_start(builder, "data"); - jsonbuilder_array_start(builder); - for (size_t i = 0; i < tokens->len; ++i) { - jsonbuilder_array_element_start(builder); - token_build_json(builder, &tokens->data[i]); - jsonbuilder_array_element_end(builder); - } - jsonbuilder_array_end(builder); - jsonbuilder_object_member_end(builder); - jsonbuilder_object_end(builder); -} diff --git a/src/token.h b/src/token.h deleted file mode 100644 index 6c4f20b..0000000 --- a/src/token.h +++ /dev/null @@ -1,183 +0,0 @@ -#ifndef DUCC_TOKEN_H -#define DUCC_TOKEN_H - -#include "ducc.h" -#include "io.h" - -typedef enum { - TokenKind_eof, - - // Only preprocessing phase. - TokenKind_hash, - TokenKind_hashhash, - TokenKind_whitespace, - TokenKind_removed, - TokenKind_placemarker, - TokenKind_newline, - TokenKind_other, - TokenKind_character_constant, - TokenKind_header_name, - TokenKind_pp_directive_define, - TokenKind_pp_directive_elif, - TokenKind_pp_directive_elifdef, - TokenKind_pp_directive_elifndef, - TokenKind_pp_directive_else, - TokenKind_pp_directive_embed, - TokenKind_pp_directive_endif, - TokenKind_pp_directive_error, - TokenKind_pp_directive_if, - TokenKind_pp_directive_ifdef, - TokenKind_pp_directive_ifndef, - TokenKind_pp_directive_include, - TokenKind_pp_directive_include_next, // GNU extension - TokenKind_pp_directive_line, - TokenKind_pp_directive_non_directive, - TokenKind_pp_directive_nop, - TokenKind_pp_directive_pragma, - TokenKind_pp_directive_undef, - TokenKind_pp_directive_warning, - TokenKind_pp_operator_defined, - TokenKind_pp_operator___has_c_attribute, - TokenKind_pp_operator___has_embed, - TokenKind_pp_operator___has_include, - - // C23: 6.4.1 - TokenKind_keyword_alignas, - TokenKind_keyword_alignof, - TokenKind_keyword_auto, - TokenKind_keyword_bool, - TokenKind_keyword_break, - TokenKind_keyword_case, - TokenKind_keyword_char, - TokenKind_keyword_const, - TokenKind_keyword_constexpr, - TokenKind_keyword_continue, - TokenKind_keyword_default, - TokenKind_keyword_do, - TokenKind_keyword_double, - TokenKind_keyword_else, - TokenKind_keyword_enum, - TokenKind_keyword_extern, - TokenKind_keyword_false, - TokenKind_keyword_float, - TokenKind_keyword_for, - TokenKind_keyword_goto, - TokenKind_keyword_if, - TokenKind_keyword_inline, - TokenKind_keyword_int, - TokenKind_keyword_long, - TokenKind_keyword_nullptr, - TokenKind_keyword_register, - TokenKind_keyword_restrict, - TokenKind_keyword_return, - TokenKind_keyword_short, - TokenKind_keyword_signed, - TokenKind_keyword_sizeof, - TokenKind_keyword_static, - TokenKind_keyword_static_assert, - TokenKind_keyword_struct, - TokenKind_keyword_switch, - TokenKind_keyword_thread_local, - TokenKind_keyword_true, - TokenKind_keyword_typedef, - TokenKind_keyword_typeof, - TokenKind_keyword_typeof_unqual, - TokenKind_keyword_union, - TokenKind_keyword_unsigned, - TokenKind_keyword_void, - TokenKind_keyword_volatile, - TokenKind_keyword_while, - TokenKind_keyword__Atomic, - TokenKind_keyword__BitInt, - TokenKind_keyword__Complex, - TokenKind_keyword__Decimal128, - TokenKind_keyword__Decimal32, - TokenKind_keyword__Decimal64, - TokenKind_keyword__Generic, - TokenKind_keyword__Imaginary, - TokenKind_keyword__Noreturn, - - TokenKind_and, - TokenKind_andand, - TokenKind_arrow, - TokenKind_assign, - TokenKind_assign_add, - TokenKind_assign_and, - TokenKind_assign_div, - TokenKind_assign_lshift, - TokenKind_assign_mod, - TokenKind_assign_mul, - TokenKind_assign_or, - TokenKind_assign_rshift, - TokenKind_assign_sub, - TokenKind_assign_xor, - TokenKind_brace_l, - TokenKind_brace_r, - TokenKind_bracket_l, - TokenKind_bracket_r, - TokenKind_colon, - TokenKind_comma, - TokenKind_dot, - TokenKind_ellipsis, - TokenKind_eq, - TokenKind_ge, - TokenKind_gt, - TokenKind_ident, - TokenKind_le, - TokenKind_literal_int, - TokenKind_literal_double, - TokenKind_literal_str, - TokenKind_lshift, - TokenKind_lt, - TokenKind_minus, - TokenKind_minusminus, - TokenKind_ne, - TokenKind_not, - TokenKind_or, - TokenKind_oror, - TokenKind_paren_l, - TokenKind_paren_r, - TokenKind_percent, - TokenKind_plus, - TokenKind_plusplus, - TokenKind_question, - TokenKind_rshift, - TokenKind_semicolon, - TokenKind_slash, - TokenKind_star, - TokenKind_tilde, - TokenKind_xor, -} TokenKind; - -const char* token_kind_stringify(TokenKind k); -bool is_pp_directive(TokenKind k); - -// TokenValue is externally tagged by Token's kind. -typedef union { - const char* string; - int integer; - double floating; -} TokenValue; - -typedef struct { - TokenKind kind; - TokenValue value; - SourceLocation loc; -} Token; - -const char* token_stringify(Token* tok); -void token_build_json(JsonBuilder* builder, Token* tok); - -typedef struct { - size_t len; - size_t capacity; - Token* data; -} TokenArray; - -void tokens_init(TokenArray* tokens, size_t capacity); -void tokens_reserve(TokenArray* tokens, size_t size); -Token* tokens_push_new(TokenArray* tokens); -Token* tokens_pop(TokenArray* tokens); -void tokens_build_json(JsonBuilder* builder, TokenArray* tokens); - -#endif diff --git a/src/tokenize.c b/src/tokenize.c deleted file mode 100644 index 6e9ad43..0000000 --- a/src/tokenize.c +++ /dev/null @@ -1,597 +0,0 @@ -#include "tokenize.h" -#include -#include "common.h" - -typedef struct { - InFile* src; - bool at_bol; - bool expect_header_name; - TokenArray* tokens; -} Lexer; - -static Lexer* lexer_new(InFile* src) { - Lexer* l = calloc(1, sizeof(Lexer)); - - l->src = src; - l->at_bol = true; - l->expect_header_name = false; - l->tokens = calloc(1, sizeof(TokenArray)); - tokens_init(l->tokens, 1024 * 16); - - return l; -} - -static void pplexer_tokenize_pp_directive(Lexer* l, Token* tok) { - // Skip whitespaces after '#'. - char c; - while (isspace((c = infile_peek_char(l->src)))) { - if (c == '\n') - break; - infile_next_char(l->src); - } - // '#' new-line - if (c == '\n') { - tok->kind = TokenKind_pp_directive_nop; - return; - } - - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') { - strbuilder_append_char(&builder, infile_peek_char(l->src)); - infile_next_char(l->src); - } - const char* pp_directive_name = builder.buf; - - if (builder.len == 0) { - tok->kind = TokenKind_hash; - } else if (strcmp(pp_directive_name, "define") == 0) { - tok->kind = TokenKind_pp_directive_define; - } else if (strcmp(pp_directive_name, "elif") == 0) { - tok->kind = TokenKind_pp_directive_elif; - } else if (strcmp(pp_directive_name, "elifdef") == 0) { - tok->kind = TokenKind_pp_directive_elifdef; - } else if (strcmp(pp_directive_name, "elifndef") == 0) { - tok->kind = TokenKind_pp_directive_elifndef; - } else if (strcmp(pp_directive_name, "else") == 0) { - tok->kind = TokenKind_pp_directive_else; - } else if (strcmp(pp_directive_name, "embed") == 0) { - tok->kind = TokenKind_pp_directive_embed; - } else if (strcmp(pp_directive_name, "endif") == 0) { - tok->kind = TokenKind_pp_directive_endif; - } else if (strcmp(pp_directive_name, "error") == 0) { - tok->kind = TokenKind_pp_directive_error; - } else if (strcmp(pp_directive_name, "if") == 0) { - tok->kind = TokenKind_pp_directive_if; - } else if (strcmp(pp_directive_name, "ifdef") == 0) { - tok->kind = TokenKind_pp_directive_ifdef; - } else if (strcmp(pp_directive_name, "ifndef") == 0) { - tok->kind = TokenKind_pp_directive_ifndef; - } else if (strcmp(pp_directive_name, "include") == 0) { - l->expect_header_name = true; - tok->kind = TokenKind_pp_directive_include; - } else if (strcmp(pp_directive_name, "include_next") == 0) { - l->expect_header_name = true; - tok->kind = TokenKind_pp_directive_include_next; - } else if (strcmp(pp_directive_name, "line") == 0) { - tok->kind = TokenKind_pp_directive_line; - } else if (strcmp(pp_directive_name, "pragma") == 0) { - tok->kind = TokenKind_pp_directive_pragma; - } else if (strcmp(pp_directive_name, "undef") == 0) { - tok->kind = TokenKind_pp_directive_undef; - } else if (strcmp(pp_directive_name, "warning") == 0) { - tok->kind = TokenKind_pp_directive_warning; - } else { - tok->kind = TokenKind_pp_directive_non_directive; - tok->value.string = pp_directive_name; - } -} - -static void do_tokenize_all(Lexer* l) { - while (!infile_eof(l->src)) { - Token* tok = tokens_push_new(l->tokens); - tok->loc = l->src->loc; - char c = infile_peek_char(l->src); - - if (l->expect_header_name && c == '"') { - infile_next_char(l->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '"'); - while (1) { - char ch = infile_peek_char(l->src); - if (ch == '"') - break; - strbuilder_append_char(&builder, ch); - if (ch == '\\') { - infile_next_char(l->src); - strbuilder_append_char(&builder, infile_peek_char(l->src)); - } - infile_next_char(l->src); - } - strbuilder_append_char(&builder, '"'); - infile_next_char(l->src); - tok->kind = TokenKind_header_name; - tok->value.string = builder.buf; - l->expect_header_name = false; - } else if (l->expect_header_name && c == '<') { - infile_next_char(l->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '<'); - while (1) { - char ch = infile_peek_char(l->src); - if (ch == '>') - break; - strbuilder_append_char(&builder, ch); - infile_next_char(l->src); - } - strbuilder_append_char(&builder, '>'); - infile_next_char(l->src); - tok->kind = TokenKind_header_name; - tok->value.string = builder.buf; - l->expect_header_name = false; - } else if (c == '(') { - infile_next_char(l->src); - tok->kind = TokenKind_paren_l; - } else if (c == ')') { - infile_next_char(l->src); - tok->kind = TokenKind_paren_r; - } else if (c == '{') { - infile_next_char(l->src); - tok->kind = TokenKind_brace_l; - } else if (c == '}') { - infile_next_char(l->src); - tok->kind = TokenKind_brace_r; - } else if (c == '[') { - infile_next_char(l->src); - tok->kind = TokenKind_bracket_l; - } else if (c == ']') { - infile_next_char(l->src); - tok->kind = TokenKind_bracket_r; - } else if (c == ',') { - infile_next_char(l->src); - tok->kind = TokenKind_comma; - } else if (c == ':') { - infile_next_char(l->src); - tok->kind = TokenKind_colon; - } else if (c == ';') { - infile_next_char(l->src); - tok->kind = TokenKind_semicolon; - } else if (c == '^') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_xor; - } else { - tok->kind = TokenKind_xor; - } - } else if (c == '?') { - infile_next_char(l->src); - tok->kind = TokenKind_question; - } else if (c == '~') { - infile_next_char(l->src); - tok->kind = TokenKind_tilde; - } else if (c == '+') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_add; - } else if (infile_consume_if(l->src, '+')) { - tok->kind = TokenKind_plusplus; - } else { - tok->kind = TokenKind_plus; - } - } else if (c == '|') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_or; - } else if (infile_consume_if(l->src, '|')) { - tok->kind = TokenKind_oror; - } else { - tok->kind = TokenKind_or; - } - } else if (c == '&') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_and; - } else if (infile_consume_if(l->src, '&')) { - tok->kind = TokenKind_andand; - } else { - tok->kind = TokenKind_and; - } - } else if (c == '-') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '>')) { - tok->kind = TokenKind_arrow; - } else if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_sub; - } else if (infile_consume_if(l->src, '-')) { - tok->kind = TokenKind_minusminus; - } else { - tok->kind = TokenKind_minus; - } - } else if (c == '*') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_mul; - } else { - tok->kind = TokenKind_star; - } - } else if (c == '/') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_div; - } else if (infile_consume_if(l->src, '/')) { - while (!infile_eof(l->src) && infile_peek_char(l->src) != '\n') { - infile_next_char(l->src); - } - tok->kind = TokenKind_whitespace; - } else if (infile_consume_if(l->src, '*')) { - while (infile_peek_char(l->src)) { - if (infile_consume_if(l->src, '*')) { - if (infile_consume_if(l->src, '/')) { - break; - } - continue; - } - infile_next_char(l->src); - } - tok->kind = TokenKind_whitespace; - } else { - tok->kind = TokenKind_slash; - } - } else if (c == '%') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_mod; - } else { - tok->kind = TokenKind_percent; - } - } else if (c == '.') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '.')) { - if (infile_consume_if(l->src, '.')) { - tok->kind = TokenKind_ellipsis; - } else { - tok->kind = TokenKind_other; - tok->value.string = ".."; - } - } else { - tok->kind = TokenKind_dot; - } - } else if (c == '!') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_ne; - } else { - tok->kind = TokenKind_not; - } - } else if (c == '=') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_eq; - } else { - tok->kind = TokenKind_assign; - } - } else if (c == '<') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_le; - } else if (infile_consume_if(l->src, '<')) { - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_lshift; - } else { - tok->kind = TokenKind_lshift; - } - } else { - tok->kind = TokenKind_lt; - } - } else if (c == '>') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_ge; - } else if (infile_consume_if(l->src, '>')) { - if (infile_consume_if(l->src, '=')) { - tok->kind = TokenKind_assign_rshift; - } else { - tok->kind = TokenKind_rshift; - } - } else { - tok->kind = TokenKind_gt; - } - } else if (c == '#') { - infile_next_char(l->src); - if (infile_consume_if(l->src, '#')) { - tok->kind = TokenKind_hashhash; - } else { - if (l->at_bol) { - pplexer_tokenize_pp_directive(l, tok); - } else { - tok->kind = TokenKind_hash; - } - } - } else if (c == '\'') { - infile_next_char(l->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '\''); - strbuilder_append_char(&builder, infile_peek_char(l->src)); - if (infile_peek_char(l->src) == '\\') { - infile_next_char(l->src); - strbuilder_append_char(&builder, infile_peek_char(l->src)); - } - strbuilder_append_char(&builder, '\''); - infile_next_char(l->src); - infile_next_char(l->src); - tok->kind = TokenKind_character_constant; - tok->value.string = builder.buf; - } else if (c == '"') { - infile_next_char(l->src); - StrBuilder builder; - strbuilder_init(&builder); - while (1) { - char ch = infile_peek_char(l->src); - if (ch == '"') - break; - strbuilder_append_char(&builder, ch); - if (ch == '\\') { - infile_next_char(l->src); - strbuilder_append_char(&builder, infile_peek_char(l->src)); - } - infile_next_char(l->src); - } - infile_next_char(l->src); - tok->kind = TokenKind_literal_str; - tok->value.string = builder.buf; - } else if (isdigit(c)) { - // TODO: implement tokenization of pp-number. - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(l->src))) { - strbuilder_append_char(&builder, infile_peek_char(l->src)); - infile_next_char(l->src); - } - if (infile_peek_char(l->src) == '.' && isdigit(infile_peek_char2(l->src))) { - strbuilder_append_char(&builder, infile_peek_char(l->src)); - infile_next_char(l->src); - while (isdigit(infile_peek_char(l->src))) { - strbuilder_append_char(&builder, infile_peek_char(l->src)); - infile_next_char(l->src); - } - tok->kind = TokenKind_literal_double; - tok->value.floating = strtod(builder.buf, NULL); - } else { - tok->kind = TokenKind_literal_int; - tok->value.integer = strtol(builder.buf, NULL, 0); - } - } else if (isalpha(c) || c == '_') { - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(l->src)) || infile_peek_char(l->src) == '_') { - strbuilder_append_char(&builder, infile_peek_char(l->src)); - infile_next_char(l->src); - } - tok->kind = TokenKind_ident; - tok->value.string = builder.buf; - } else if (c == '\n') { - infile_next_char(l->src); - tok->kind = TokenKind_newline; - - // Reset expect_header_name at the end of line. It handles cases like: - // - // #ifdef ADDITIONAL_HEADER - // #include ADDITIONAL_HEADER - // #endif - // - // Even if ADDITIONAL_HEADER is undefined, this include directive line is tokenized. If the flag were not - // reset, the next occurrence of '<' or '"' would be recognized as part of a header name. - l->expect_header_name = false; - } else if (isspace(c)) { - while (isspace((c = infile_peek_char(l->src)))) { - if (c == '\n') - break; - infile_next_char(l->src); - } - if (l->at_bol && infile_peek_char(l->src) == '#') { - infile_next_char(l->src); - pplexer_tokenize_pp_directive(l, tok); - } else { - tok->kind = TokenKind_whitespace; - } - } else { - infile_next_char(l->src); - tok->kind = TokenKind_other; - char* buf = calloc(2, sizeof(char)); - buf[0] = c; - tok->value.string = buf; - } - l->at_bol = tok->kind == TokenKind_newline; - } - Token* eof_tok = tokens_push_new(l->tokens); - eof_tok->loc = l->src->loc; - eof_tok->kind = TokenKind_eof; -} - -TokenArray* tokenize(InFile* src) { - Lexer* l = lexer_new(src); - do_tokenize_all(l); - return l->tokens; -} - -TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens) { - TokenArray* tokens = calloc(1, sizeof(TokenArray)); - // tokens need not store whitespace tokens. - tokens_init(tokens, pp_tokens->len / 2); - - for (size_t pos = 0; pos < pp_tokens->len; ++pos) { - Token* pp_tok = &pp_tokens->data[pos]; - TokenKind k = pp_tok->kind; - if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { - continue; - } - Token* tok = tokens_push_new(tokens); - tok->loc = pp_tok->loc; - if (k == TokenKind_character_constant) { - tok->kind = TokenKind_literal_int; - int ch = pp_tok->value.string[1]; - if (ch == '\\') { - ch = pp_tok->value.string[2]; - if (ch == 'a') { - ch = '\a'; - } else if (ch == 'b') { - ch = '\b'; - } else if (ch == 'f') { - ch = '\f'; - } else if (ch == 'n') { - ch = '\n'; - } else if (ch == 'r') { - ch = '\r'; - } else if (ch == 't') { - ch = '\t'; - } else if (ch == 'v') { - ch = '\v'; - } else if (ch == '0') { - ch = '\0'; - } else if (ch == 'e') { - // \e is not a part of Standard C, but commonly supported. - ch = 27; - } - } - tok->value.integer = ch; - } else if (k == TokenKind_literal_str) { - tok->kind = pp_tok->kind; - - size_t len = strlen(pp_tok->value.string); - char* buf = calloc(len + 1, sizeof(char)); - for (size_t i = 0, j = 0; i < len; i++, j++) { - if (pp_tok->value.string[i] == '\\' && pp_tok->value.string[i + 1] == 'e') { - // \e is not a part of Standard C, but commonly supported. - buf[j] = 033; - i++; - } else { - buf[j] = pp_tok->value.string[i]; - } - } - tok->value.string = buf; - } else if (k == TokenKind_ident) { - if (strcmp(pp_tok->value.string, "alignas") == 0) { - tok->kind = TokenKind_keyword_alignas; - } else if (strcmp(pp_tok->value.string, "alignof") == 0) { - tok->kind = TokenKind_keyword_alignof; - } else if (strcmp(pp_tok->value.string, "auto") == 0) { - tok->kind = TokenKind_keyword_auto; - } else if (strcmp(pp_tok->value.string, "bool") == 0) { - tok->kind = TokenKind_keyword_bool; - } else if (strcmp(pp_tok->value.string, "break") == 0) { - tok->kind = TokenKind_keyword_break; - } else if (strcmp(pp_tok->value.string, "case") == 0) { - tok->kind = TokenKind_keyword_case; - } else if (strcmp(pp_tok->value.string, "char") == 0) { - tok->kind = TokenKind_keyword_char; - } else if (strcmp(pp_tok->value.string, "const") == 0) { - tok->kind = TokenKind_keyword_const; - } else if (strcmp(pp_tok->value.string, "constexpr") == 0) { - tok->kind = TokenKind_keyword_constexpr; - } else if (strcmp(pp_tok->value.string, "continue") == 0) { - tok->kind = TokenKind_keyword_continue; - } else if (strcmp(pp_tok->value.string, "default") == 0) { - tok->kind = TokenKind_keyword_default; - } else if (strcmp(pp_tok->value.string, "do") == 0) { - tok->kind = TokenKind_keyword_do; - } else if (strcmp(pp_tok->value.string, "double") == 0) { - tok->kind = TokenKind_keyword_double; - } else if (strcmp(pp_tok->value.string, "else") == 0) { - tok->kind = TokenKind_keyword_else; - } else if (strcmp(pp_tok->value.string, "enum") == 0) { - tok->kind = TokenKind_keyword_enum; - } else if (strcmp(pp_tok->value.string, "extern") == 0) { - tok->kind = TokenKind_keyword_extern; - } else if (strcmp(pp_tok->value.string, "false") == 0) { - tok->kind = TokenKind_keyword_false; - } else if (strcmp(pp_tok->value.string, "float") == 0) { - tok->kind = TokenKind_keyword_float; - } else if (strcmp(pp_tok->value.string, "for") == 0) { - tok->kind = TokenKind_keyword_for; - } else if (strcmp(pp_tok->value.string, "goto") == 0) { - tok->kind = TokenKind_keyword_goto; - } else if (strcmp(pp_tok->value.string, "if") == 0) { - tok->kind = TokenKind_keyword_if; - } else if (strcmp(pp_tok->value.string, "inline") == 0) { - tok->kind = TokenKind_keyword_inline; - } else if (strcmp(pp_tok->value.string, "int") == 0) { - tok->kind = TokenKind_keyword_int; - } else if (strcmp(pp_tok->value.string, "long") == 0) { - tok->kind = TokenKind_keyword_long; - } else if (strcmp(pp_tok->value.string, "nullptr") == 0) { - tok->kind = TokenKind_keyword_nullptr; - } else if (strcmp(pp_tok->value.string, "register") == 0) { - tok->kind = TokenKind_keyword_register; - } else if (strcmp(pp_tok->value.string, "restrict") == 0) { - tok->kind = TokenKind_keyword_restrict; - } else if (strcmp(pp_tok->value.string, "return") == 0) { - tok->kind = TokenKind_keyword_return; - } else if (strcmp(pp_tok->value.string, "short") == 0) { - tok->kind = TokenKind_keyword_short; - } else if (strcmp(pp_tok->value.string, "signed") == 0) { - tok->kind = TokenKind_keyword_signed; - } else if (strcmp(pp_tok->value.string, "sizeof") == 0) { - tok->kind = TokenKind_keyword_sizeof; - } else if (strcmp(pp_tok->value.string, "static") == 0) { - tok->kind = TokenKind_keyword_static; - } else if (strcmp(pp_tok->value.string, "static_assert") == 0) { - tok->kind = TokenKind_keyword_static_assert; - } else if (strcmp(pp_tok->value.string, "struct") == 0) { - tok->kind = TokenKind_keyword_struct; - } else if (strcmp(pp_tok->value.string, "switch") == 0) { - tok->kind = TokenKind_keyword_switch; - } else if (strcmp(pp_tok->value.string, "thread_local") == 0) { - tok->kind = TokenKind_keyword_thread_local; - } else if (strcmp(pp_tok->value.string, "true") == 0) { - tok->kind = TokenKind_keyword_true; - } else if (strcmp(pp_tok->value.string, "typedef") == 0) { - tok->kind = TokenKind_keyword_typedef; - } else if (strcmp(pp_tok->value.string, "typeof") == 0) { - tok->kind = TokenKind_keyword_typeof; - } else if (strcmp(pp_tok->value.string, "typeof_unqual") == 0) { - tok->kind = TokenKind_keyword_typeof_unqual; - } else if (strcmp(pp_tok->value.string, "union") == 0) { - tok->kind = TokenKind_keyword_union; - } else if (strcmp(pp_tok->value.string, "unsigned") == 0) { - tok->kind = TokenKind_keyword_unsigned; - } else if (strcmp(pp_tok->value.string, "void") == 0) { - tok->kind = TokenKind_keyword_void; - } else if (strcmp(pp_tok->value.string, "volatile") == 0) { - tok->kind = TokenKind_keyword_volatile; - } else if (strcmp(pp_tok->value.string, "while") == 0) { - tok->kind = TokenKind_keyword_while; - } else if (strcmp(pp_tok->value.string, "_Atomic") == 0) { - tok->kind = TokenKind_keyword__Atomic; - } else if (strcmp(pp_tok->value.string, "_BitInt") == 0) { - tok->kind = TokenKind_keyword__BitInt; - } else if (strcmp(pp_tok->value.string, "_Complex") == 0) { - tok->kind = TokenKind_keyword__Complex; - } else if (strcmp(pp_tok->value.string, "_Decimal128") == 0) { - tok->kind = TokenKind_keyword__Decimal128; - } else if (strcmp(pp_tok->value.string, "_Decimal32") == 0) { - tok->kind = TokenKind_keyword__Decimal32; - } else if (strcmp(pp_tok->value.string, "_Decimal64") == 0) { - tok->kind = TokenKind_keyword__Decimal64; - } else if (strcmp(pp_tok->value.string, "_Generic") == 0) { - tok->kind = TokenKind_keyword__Generic; - } else if (strcmp(pp_tok->value.string, "_Imaginary") == 0) { - tok->kind = TokenKind_keyword__Imaginary; - } else if (strcmp(pp_tok->value.string, "_Noreturn") == 0) { - tok->kind = TokenKind_keyword__Noreturn; - } else { - tok->kind = TokenKind_ident; - tok->value = pp_tok->value; - } - } else if (k == TokenKind_other) { - unreachable(); - } else { - tok->kind = pp_tok->kind; - tok->value = pp_tok->value; - } - } - - return tokens; -} diff --git a/src/tokenize.h b/src/tokenize.h deleted file mode 100644 index fd334a1..0000000 --- a/src/tokenize.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef DUCC_TOKENIZE_H -#define DUCC_TOKENIZE_H - -#include "io.h" -#include "token.h" - -TokenArray* tokenize(InFile* src); -TokenArray* convert_pp_tokens_to_tokens(TokenArray* pp_tokens); - -#endif diff --git a/src/version.h b/src/version.h deleted file mode 100644 index 67c2352..0000000 --- a/src/version.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef DUCC_VERSION_H -#define DUCC_VERSION_H - -#define DUCC_VERSION "0.4.0" - -#endif -- cgit v1.3.1