diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-24 23:55:22 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-25 00:18:08 +0900 |
| commit | 36da139565202a9a39d1e4261ab8bf950041518b (patch) | |
| tree | c8e8867edc4e4fed94403331b8a7837e9951d9af | |
| parent | 5bbc4414b8178ff924444857ac05e5b06cf0e651 (diff) | |
| download | ducc-36da139565202a9a39d1e4261ab8bf950041518b.tar.gz ducc-36da139565202a9a39d1e4261ab8bf950041518b.tar.zst ducc-36da139565202a9a39d1e4261ab8bf950041518b.zip | |
feat: separate compilation
| -rw-r--r-- | Makefile | 19 | ||||
| -rw-r--r-- | src/ast.c | 123 | ||||
| -rw-r--r-- | src/ast.h | 156 | ||||
| -rw-r--r-- | src/cli.c | 33 | ||||
| -rw-r--r-- | src/cli.h | 18 | ||||
| -rw-r--r-- | src/codegen.c | 4 | ||||
| -rw-r--r-- | src/codegen.h | 8 | ||||
| -rw-r--r-- | src/common.c | 26 | ||||
| -rw-r--r-- | src/common.h | 37 | ||||
| -rw-r--r-- | src/io.c | 14 | ||||
| -rw-r--r-- | src/io.h | 25 | ||||
| -rw-r--r-- | src/main.c (renamed from main.c) | 41 | ||||
| -rw-r--r-- | src/parse.c | 4 | ||||
| -rw-r--r-- | src/parse.h | 10 | ||||
| -rw-r--r-- | src/preprocess.c | 163 | ||||
| -rw-r--r-- | src/preprocess.h | 178 | ||||
| -rw-r--r-- | src/sys.c | 3 | ||||
| -rw-r--r-- | src/sys.h | 7 | ||||
| -rw-r--r-- | src/tokenize.c | 3 | ||||
| -rw-r--r-- | src/tokenize.h | 8 |
20 files changed, 543 insertions, 337 deletions
@@ -2,13 +2,28 @@ SRC_DIR := src BUILD_DIR := build TARGET ?= ducc +OBJECTS := \ + $(BUILD_DIR)/main.o \ + $(BUILD_DIR)/ast.o \ + $(BUILD_DIR)/cli.o \ + $(BUILD_DIR)/codegen.o \ + $(BUILD_DIR)/common.o \ + $(BUILD_DIR)/io.o \ + $(BUILD_DIR)/parse.o \ + $(BUILD_DIR)/preprocess.o \ + $(BUILD_DIR)/sys.o \ + $(BUILD_DIR)/tokenize.o + .PHONY: all all: $(BUILD_DIR) $(BUILD_DIR)/$(TARGET) -$(BUILD_DIR)/$(TARGET): main.c - $(CC) -MD -g -O0 -o $@ $< +$(BUILD_DIR)/$(TARGET): $(OBJECTS) + $(CC) -MD -g -O0 -o $@ $^ $(BUILD_DIR): @mkdir -p $(BUILD_DIR) +$(BUILD_DIR)/%.o: src/%.c + $(CC) -c -MD -g -O0 -o $@ $< + -include $(BUILD_DIR)/*.d @@ -1,18 +1,6 @@ -enum TypeKind { - TypeKind_unknown, - - TypeKind_char, - TypeKind_short, - TypeKind_int, - TypeKind_long, - TypeKind_void, - TypeKind_ptr, - TypeKind_array, - TypeKind_enum, - TypeKind_struct, - TypeKind_union, -}; -typedef enum TypeKind TypeKind; +#include "ast.h" +#include "common.h" +#include "preprocess.h" const char* type_kind_stringify(TypeKind k) { if (k == TypeKind_unknown) @@ -41,17 +29,6 @@ const char* type_kind_stringify(TypeKind k) { unreachable(); } -struct AstNode; - -struct Type { - TypeKind kind; - // Check `base` instead of `kind` to test if the type is an array or a pointer. - struct Type* base; - int array_size; - struct AstNode* def; -}; -typedef struct Type Type; - void type_dump(Type* ty) { fprintf(stderr, "Type {\n"); fprintf(stderr, " kind = %s\n", type_kind_stringify(ty->kind)); @@ -91,13 +68,6 @@ BOOL type_is_unsized(Type* ty) { return ty->kind == TypeKind_void; } -int type_sizeof_struct(Type* ty); -int type_sizeof_union(Type* ty); -int type_alignof_struct(Type* ty); -int type_alignof_union(Type* ty); -int type_offsetof(Type* ty, const char* name); -Type* type_member_typeof(Type* ty, const char* name); - int type_sizeof(Type* ty) { if (type_is_unsized(ty)) { fatal_error("type_sizeof: type size cannot be determined"); @@ -156,93 +126,6 @@ int to_aligned(int n, int a) { return (n + a - 1) / a * a; } -enum AstNodeKind { - AstNodeKind_unknown, - AstNodeKind_nop, - - AstNodeKind_assign_expr, - AstNodeKind_binary_expr, - AstNodeKind_break_stmt, - AstNodeKind_cond_expr, - AstNodeKind_continue_stmt, - AstNodeKind_deref_expr, - AstNodeKind_do_while_stmt, - AstNodeKind_enum_def, - AstNodeKind_enum_member, - AstNodeKind_expr_stmt, - AstNodeKind_for_stmt, - AstNodeKind_func_call, - AstNodeKind_func_decl, - AstNodeKind_func_def, - AstNodeKind_gvar, - AstNodeKind_gvar_decl, - AstNodeKind_if_stmt, - AstNodeKind_int_expr, - AstNodeKind_list, - AstNodeKind_logical_expr, - AstNodeKind_lvar, - AstNodeKind_lvar_decl, - AstNodeKind_param, - AstNodeKind_ref_expr, - AstNodeKind_return_stmt, - AstNodeKind_str_expr, - AstNodeKind_struct_decl, - AstNodeKind_struct_def, - AstNodeKind_struct_member, - AstNodeKind_type, - AstNodeKind_typedef_decl, - AstNodeKind_unary_expr, - AstNodeKind_union_decl, - AstNodeKind_union_def, - AstNodeKind_union_member, - - // Intermediate ASTs: they are used only in parsing, not for parse result. - AstNodeKind_declarator, -}; -typedef enum AstNodeKind AstNodeKind; - -#define node_items __n1 -#define node_len __i1 -#define node_cap __i2 -#define node_expr __n1 -#define node_lhs __n1 -#define node_rhs __n2 -#define node_operand __n1 -#define node_cond __n1 -#define node_init __n2 -#define node_update __n3 -#define node_then __n2 -#define node_else __n3 -#define node_body __n4 -#define node_members __n1 -#define node_params __n1 -#define node_args __n1 -#define node_int_value __i1 -#define node_idx __i1 -#define node_op __i1 -#define node_stack_offset __i1 -#define node_stack_size __i1 - -struct AstNode { - AstNodeKind kind; - const char* name; - Type* ty; - struct AstNode* __n1; - struct AstNode* __n2; - struct AstNode* __n3; - struct AstNode* __n4; - int __i1; - int __i2; -}; -typedef struct AstNode AstNode; - -struct Program { - AstNode* funcs; - AstNode* vars; - const char** str_literals; -}; -typedef struct Program Program; - AstNode* ast_new(AstNodeKind kind) { AstNode* ast = calloc(1, sizeof(AstNode)); ast->kind = kind; diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..0711e44 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,156 @@ +#ifndef DUCC_AST_H +#define DUCC_AST_H + +#include "std.h" + +enum TypeKind { + TypeKind_unknown, + + TypeKind_char, + TypeKind_short, + TypeKind_int, + TypeKind_long, + TypeKind_void, + TypeKind_ptr, + TypeKind_array, + TypeKind_enum, + TypeKind_struct, + TypeKind_union, +}; +typedef enum TypeKind TypeKind; + +const char* type_kind_stringify(TypeKind k); + +struct AstNode; + +struct Type { + TypeKind kind; + // Check `base` instead of `kind` to test if the type is an array or a pointer. + struct Type* base; + int array_size; + struct AstNode* def; +}; +typedef struct Type Type; + +void type_dump(Type* ty); +Type* type_new(TypeKind kind); +Type* type_new_ptr(Type* base); +Type* type_new_array(Type* elem, int size); +Type* type_new_static_string(int len); +Type* type_array_to_ptr(Type* ty); +BOOL type_is_unsized(Type* ty); + +int type_sizeof_struct(Type* ty); +int type_sizeof_union(Type* ty); +int type_alignof_struct(Type* ty); +int type_alignof_union(Type* ty); +int type_offsetof(Type* ty, const char* name); +Type* type_member_typeof(Type* ty, const char* name); + +int type_sizeof(Type* ty); +int type_alignof(Type* ty); + +int to_aligned(int n, int a); + +enum AstNodeKind { + AstNodeKind_unknown, + AstNodeKind_nop, + + AstNodeKind_assign_expr, + AstNodeKind_binary_expr, + AstNodeKind_break_stmt, + AstNodeKind_cond_expr, + AstNodeKind_continue_stmt, + AstNodeKind_deref_expr, + AstNodeKind_do_while_stmt, + AstNodeKind_enum_def, + AstNodeKind_enum_member, + AstNodeKind_expr_stmt, + AstNodeKind_for_stmt, + AstNodeKind_func_call, + AstNodeKind_func_decl, + AstNodeKind_func_def, + AstNodeKind_gvar, + AstNodeKind_gvar_decl, + AstNodeKind_if_stmt, + AstNodeKind_int_expr, + AstNodeKind_list, + AstNodeKind_logical_expr, + AstNodeKind_lvar, + AstNodeKind_lvar_decl, + AstNodeKind_param, + AstNodeKind_ref_expr, + AstNodeKind_return_stmt, + AstNodeKind_str_expr, + AstNodeKind_struct_decl, + AstNodeKind_struct_def, + AstNodeKind_struct_member, + AstNodeKind_type, + AstNodeKind_typedef_decl, + AstNodeKind_unary_expr, + AstNodeKind_union_decl, + AstNodeKind_union_def, + AstNodeKind_union_member, + + // Intermediate ASTs: they are used only in parsing, not for parse result. + AstNodeKind_declarator, +}; +typedef enum AstNodeKind AstNodeKind; + +#define node_items __n1 +#define node_len __i1 +#define node_cap __i2 +#define node_expr __n1 +#define node_lhs __n1 +#define node_rhs __n2 +#define node_operand __n1 +#define node_cond __n1 +#define node_init __n2 +#define node_update __n3 +#define node_then __n2 +#define node_else __n3 +#define node_body __n4 +#define node_members __n1 +#define node_params __n1 +#define node_args __n1 +#define node_int_value __i1 +#define node_idx __i1 +#define node_op __i1 +#define node_stack_offset __i1 +#define node_stack_size __i1 + +struct AstNode { + AstNodeKind kind; + const char* name; + Type* ty; + struct AstNode* __n1; + struct AstNode* __n2; + struct AstNode* __n3; + struct AstNode* __n4; + int __i1; + int __i2; +}; +typedef struct AstNode AstNode; + +struct Program { + AstNode* funcs; + AstNode* vars; + const char** str_literals; +}; +typedef struct Program Program; + +AstNode* ast_new(AstNodeKind kind); +AstNode* ast_new_list(int capacity); +void ast_append(AstNode* list, AstNode* item); +AstNode* ast_new_int(int v); +AstNode* ast_new_unary_expr(int op, AstNode* operand); +AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs); + +AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs); +AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs); +AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs); +AstNode* ast_new_ref_expr(AstNode* operand); +AstNode* ast_new_deref_expr(AstNode* operand); +AstNode* ast_new_member_access_expr(AstNode* obj, const char* name); + +#endif @@ -1,13 +1,10 @@ -struct CliArgs { - const char* input_filename; - const char* output_filename; - BOOL output_executable; -}; -typedef struct CliArgs CliArgs; +#include "cli.h" +#include "common.h" CliArgs* parse_cli_args(int argc, char** argv) { const char* output_filename = NULL; int positional_arguments_start = -1; + BOOL only_compile = FALSE; for (int i = 1; i < argc; ++i) { if (argv[i][0] != '-') { @@ -27,6 +24,8 @@ CliArgs* parse_cli_args(int argc, char** argv) { } output_filename = argv[i + 1]; ++i; + } else if (c == 'c') { + only_compile = TRUE; } else { fatal_error("unknown option: %s", argv[i]); } @@ -38,6 +37,26 @@ CliArgs* parse_cli_args(int argc, char** argv) { CliArgs* a = calloc(1, sizeof(CliArgs)); a->input_filename = argv[positional_arguments_start]; a->output_filename = output_filename; - a->output_executable = output_filename && !str_ends_with(output_filename, ".s"); + a->output_assembly = !output_filename || str_ends_with(output_filename, ".s"); + a->only_compile = only_compile; + a->totally_deligate_to_gcc = FALSE; + a->gcc_command = NULL; + + if (!a->only_compile && str_ends_with(a->input_filename, ".o")) { + a->totally_deligate_to_gcc = TRUE; + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_string(&builder, "gcc "); + for (int i = 1; i < argc; ++i) { + strbuilder_append_char(&builder, '\''); + strbuilder_append_string(&builder, argv[i]); + strbuilder_append_char(&builder, '\''); + if (i != argc - 1) { + strbuilder_append_char(&builder, ' '); + } + } + a->gcc_command = builder.buf; + } + return a; } diff --git a/src/cli.h b/src/cli.h new file mode 100644 index 0000000..3288c10 --- /dev/null +++ b/src/cli.h @@ -0,0 +1,18 @@ +#ifndef DUCC_CLI_H +#define DUCC_CLI_H + +#include "std.h" + +struct CliArgs { + const char* input_filename; + const char* output_filename; + BOOL output_assembly; + BOOL only_compile; + BOOL totally_deligate_to_gcc; + const char* gcc_command; +}; +typedef struct CliArgs CliArgs; + +CliArgs* parse_cli_args(int argc, char** argv); + +#endif diff --git a/src/codegen.c b/src/codegen.c index b24fe38..da2cd17 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,3 +1,7 @@ +#include "codegen.h" +#include "common.h" +#include "preprocess.h" + enum GenMode { GenMode_lval, GenMode_rval, diff --git a/src/codegen.h b/src/codegen.h new file mode 100644 index 0000000..95ec069 --- /dev/null +++ b/src/codegen.h @@ -0,0 +1,8 @@ +#ifndef DUCC_CODEGEN_H +#define DUCC_CODEGEN_H + +#include "ast.h" + +void codegen(Program* prog, FILE* out); + +#endif diff --git a/src/common.c b/src/common.c index 8fb73f7..595f2c7 100644 --- a/src/common.c +++ b/src/common.c @@ -1,3 +1,5 @@ +#include "common.h" + void fatal_error(const char* msg, ...) { va_list args; va_start(args, msg); @@ -7,10 +9,6 @@ void fatal_error(const char* msg, ...) { exit(1); } -#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__) - -#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__) - BOOL str_ends_with(const char* s, const char* suffix) { size_t l1 = strlen(s); size_t l2 = strlen(suffix); @@ -19,13 +17,6 @@ BOOL str_ends_with(const char* s, const char* suffix) { return strcmp(s + l1 - l2, suffix) == 0; } -struct StrBuilder { - size_t len; - size_t capacity; - char* buf; -}; -typedef struct StrBuilder StrBuilder; - void strbuilder_init(StrBuilder* b) { b->len = 0; b->capacity = 16; @@ -48,12 +39,13 @@ void strbuilder_append_char(StrBuilder* b, int c) { b->buf[b->len++] = c; } -struct StrArray { - size_t len; - size_t capacity; - const char** data; -}; -typedef struct StrArray StrArray; +void strbuilder_append_string(StrBuilder* b, const char* s) { + int len = strlen(s); + strbuilder_reserve(b, b->len + len + 1); + for (int i = 0; i < len; ++i) { + b->buf[b->len++] = s[i]; + } +} void strings_init(StrArray* strings) { strings->len = 0; diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..f8f6b1d --- /dev/null +++ b/src/common.h @@ -0,0 +1,37 @@ +#ifndef DUCC_COMMON_H +#define DUCC_COMMON_H + +#include "std.h" + +void fatal_error(const char* msg, ...); + +#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__) +#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__) + +BOOL str_ends_with(const char* s, const char* suffix); + +struct StrBuilder { + size_t len; + size_t capacity; + char* buf; +}; +typedef struct StrBuilder StrBuilder; + +void strbuilder_init(StrBuilder* b); +// `size` must include a trailing null byte. +void strbuilder_reserve(StrBuilder* b, size_t size); +void strbuilder_append_char(StrBuilder* b, int c); +void strbuilder_append_string(StrBuilder* b, const char* s); + +struct StrArray { + size_t len; + size_t capacity; + const char** data; +}; +typedef struct StrArray StrArray; + +void strings_init(StrArray* strings); +void strings_reserve(StrArray* strings, size_t size); +int strings_push(StrArray* strings, const char* str); + +#endif @@ -1,15 +1,5 @@ -struct SourceLocation { - const char* filename; - int line; -}; -typedef struct SourceLocation SourceLocation; - -struct InFile { - const char* buf; - int pos; - SourceLocation loc; -}; -typedef struct InFile InFile; +#include "io.h" +#include "common.h" InFile* infile_open(const char* filename) { FILE* in; diff --git a/src/io.h b/src/io.h new file mode 100644 index 0000000..8a96b59 --- /dev/null +++ b/src/io.h @@ -0,0 +1,25 @@ +#ifndef DUCC_IO_H +#define DUCC_IO_H + +#include "std.h" + +struct SourceLocation { + const char* filename; + int line; +}; +typedef struct SourceLocation SourceLocation; + +struct InFile { + const char* buf; + int pos; + SourceLocation loc; +}; +typedef struct InFile InFile; + +InFile* infile_open(const char* filename); +BOOL infile_eof(InFile* f); +char infile_peek_char(InFile* f); +char infile_next_char(InFile* f); +BOOL infile_consume_if(InFile* f, char expected); + +#endif @@ -1,40 +1,45 @@ -// Currently the source code depends on the #include order. -// clang-format off -#include "src/std.h" -#include "src/common.c" -#include "src/io.c" -#include "src/sys.c" -#include "src/preprocess.c" -#include "src/tokenize.c" -#include "src/ast.c" -#include "src/parse.c" -#include "src/codegen.c" -#include "src/cli.c" -// clang-format on +#include "ast.h" +#include "cli.h" +#include "codegen.h" +#include "common.h" +#include "io.h" +#include "parse.h" +#include "preprocess.h" +#include "std.h" +#include "tokenize.h" int main(int argc, char** argv) { CliArgs* cli_args = parse_cli_args(argc, argv); + + if (cli_args->totally_deligate_to_gcc) { + return system(cli_args->gcc_command); + } + InFile* source = infile_open(cli_args->input_filename); TokenArray* pp_tokens = preprocess(source); TokenArray* tokens = tokenize(pp_tokens); Program* prog = parse(tokens); const char* assembly_filename; - if (cli_args->output_executable) { + if (cli_args->output_assembly) { + assembly_filename = cli_args->output_filename; + } else { char* temp_filename = calloc(19, sizeof(char)); temp_filename = strdup("/tmp/ducc-XXXXXX.s"); mkstemps(temp_filename, strlen(".s")); assembly_filename = temp_filename; - } else { - assembly_filename = cli_args->output_filename; } FILE* assembly_file = assembly_filename ? fopen(assembly_filename, "wb") : stdout; codegen(prog, assembly_file); fclose(assembly_file); - if (cli_args->output_executable) { + if (!cli_args->output_assembly) { char cmd_buf[256]; - sprintf(cmd_buf, "gcc -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); + if (cli_args->only_compile) { + sprintf(cmd_buf, "gcc -c -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); + } else { + sprintf(cmd_buf, "gcc -s -o '%s' '%s'", cli_args->output_filename, assembly_filename); + } int result = system(cmd_buf); if (result != 0) { fatal_error("gcc failed: %d", result); diff --git a/src/parse.c b/src/parse.c index 6bb87ec..882d769 100644 --- a/src/parse.c +++ b/src/parse.c @@ -1,4 +1,6 @@ -#define LVAR_MAX 32 +#include "parse.h" +#include "common.h" +#include "tokenize.h" struct LocalVar { const char* name; diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..fe67b75 --- /dev/null +++ b/src/parse.h @@ -0,0 +1,10 @@ +#ifndef DUCC_PARSE_H +#define DUCC_PARSE_H + +#include "ast.h" +#include "preprocess.h" + +Program* parse(TokenArray* tokens); +BOOL pp_eval_constant_expression(TokenArray* pp_tokens); + +#endif diff --git a/src/preprocess.c b/src/preprocess.c index 3ed9740..8c4d096 100644 --- a/src/preprocess.c +++ b/src/preprocess.c @@ -1,142 +1,6 @@ -enum TokenKind { - TokenKind_eof, - - // Only preprocessing phase. - TokenKind_hash, - TokenKind_hashhash, - TokenKind_whitespace, - TokenKind_newline, - TokenKind_other, - TokenKind_character_constant, - TokenKind_header_name, - TokenKind_pp_directive_define, - TokenKind_pp_directive_elif, - TokenKind_pp_directive_elifdef, - TokenKind_pp_directive_elifndef, - TokenKind_pp_directive_else, - TokenKind_pp_directive_embed, - TokenKind_pp_directive_endif, - TokenKind_pp_directive_error, - TokenKind_pp_directive_if, - TokenKind_pp_directive_ifdef, - TokenKind_pp_directive_ifndef, - TokenKind_pp_directive_include, - TokenKind_pp_directive_line, - TokenKind_pp_directive_pragma, - TokenKind_pp_directive_undef, - TokenKind_pp_directive_warning, - TokenKind_pp_operator_defined, - TokenKind_pp_operator___has_c_attribute, - TokenKind_pp_operator___has_embed, - TokenKind_pp_operator___has_include, - - // C23: 6.4.1 - TokenKind_keyword_alignas, - TokenKind_keyword_alignof, - TokenKind_keyword_auto, - TokenKind_keyword_bool, - TokenKind_keyword_break, - TokenKind_keyword_case, - TokenKind_keyword_char, - TokenKind_keyword_const, - TokenKind_keyword_constexpr, - TokenKind_keyword_continue, - TokenKind_keyword_default, - TokenKind_keyword_do, - TokenKind_keyword_double, - TokenKind_keyword_else, - TokenKind_keyword_enum, - TokenKind_keyword_extern, - TokenKind_keyword_false, - TokenKind_keyword_float, - TokenKind_keyword_for, - TokenKind_keyword_goto, - TokenKind_keyword_if, - TokenKind_keyword_inline, - TokenKind_keyword_int, - TokenKind_keyword_long, - TokenKind_keyword_nullptr, - TokenKind_keyword_register, - TokenKind_keyword_restrict, - TokenKind_keyword_return, - TokenKind_keyword_short, - TokenKind_keyword_signed, - TokenKind_keyword_sizeof, - TokenKind_keyword_static, - TokenKind_keyword_static_assert, - TokenKind_keyword_struct, - TokenKind_keyword_switch, - TokenKind_keyword_thread_local, - TokenKind_keyword_true, - TokenKind_keyword_typedef, - TokenKind_keyword_typeof, - TokenKind_keyword_typeof_unqual, - TokenKind_keyword_union, - TokenKind_keyword_unsigned, - TokenKind_keyword_void, - TokenKind_keyword_volatile, - TokenKind_keyword_while, - TokenKind_keyword__Atomic, - TokenKind_keyword__BitInt, - TokenKind_keyword__Complex, - TokenKind_keyword__Decimal128, - TokenKind_keyword__Decimal32, - TokenKind_keyword__Decimal64, - TokenKind_keyword__Generic, - TokenKind_keyword__Imaginary, - TokenKind_keyword__Noreturn, - - TokenKind_and, - TokenKind_andand, - TokenKind_arrow, - TokenKind_assign, - TokenKind_assign_add, - TokenKind_assign_and, - TokenKind_assign_div, - TokenKind_assign_lshift, - TokenKind_assign_mod, - TokenKind_assign_mul, - TokenKind_assign_or, - TokenKind_assign_rshift, - TokenKind_assign_sub, - TokenKind_assign_xor, - TokenKind_brace_l, - TokenKind_brace_r, - TokenKind_bracket_l, - TokenKind_bracket_r, - TokenKind_colon, - TokenKind_comma, - TokenKind_dot, - TokenKind_ellipsis, - TokenKind_eq, - TokenKind_ge, - TokenKind_gt, - TokenKind_ident, - TokenKind_le, - TokenKind_literal_int, - TokenKind_literal_str, - TokenKind_lshift, - TokenKind_lt, - TokenKind_minus, - TokenKind_minusminus, - TokenKind_ne, - TokenKind_not, - TokenKind_or, - TokenKind_oror, - TokenKind_paren_l, - TokenKind_paren_r, - TokenKind_percent, - TokenKind_plus, - TokenKind_plusplus, - TokenKind_question, - TokenKind_rshift, - TokenKind_semicolon, - TokenKind_slash, - TokenKind_star, - TokenKind_tilde, - TokenKind_xor, -}; -typedef enum TokenKind TokenKind; +#include "preprocess.h" +#include "common.h" +#include "sys.h" const char* token_kind_stringify(TokenKind k) { if (k == TokenKind_eof) @@ -405,20 +269,6 @@ const char* token_kind_stringify(TokenKind k) { unreachable(); } -// TokenValue is externally tagged by Token's kind. -union TokenValue { - const char* string; - int integer; -}; -typedef union TokenValue TokenValue; - -struct Token { - TokenKind kind; - TokenValue value; - SourceLocation loc; -}; -typedef struct Token Token; - const char* token_stringify(Token* t) { TokenKind k = t->kind; if (k == TokenKind_literal_int) { @@ -437,13 +287,6 @@ const char* token_stringify(Token* t) { } } -struct TokenArray { - size_t len; - size_t capacity; - Token* data; -}; -typedef struct TokenArray TokenArray; - void tokens_init(TokenArray* tokens, size_t capacity) { tokens->len = 0; tokens->capacity = capacity; diff --git a/src/preprocess.h b/src/preprocess.h new file mode 100644 index 0000000..8c5ade2 --- /dev/null +++ b/src/preprocess.h @@ -0,0 +1,178 @@ +#ifndef DUCC_PREPROCESS_H +#define DUCC_PREPROCESS_H + +#include "io.h" + +enum TokenKind { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_newline, + TokenKind_other, + TokenKind_character_constant, + TokenKind_header_name, + TokenKind_pp_directive_define, + TokenKind_pp_directive_elif, + TokenKind_pp_directive_elifdef, + TokenKind_pp_directive_elifndef, + TokenKind_pp_directive_else, + TokenKind_pp_directive_embed, + TokenKind_pp_directive_endif, + TokenKind_pp_directive_error, + TokenKind_pp_directive_if, + TokenKind_pp_directive_ifdef, + TokenKind_pp_directive_ifndef, + TokenKind_pp_directive_include, + TokenKind_pp_directive_line, + TokenKind_pp_directive_pragma, + TokenKind_pp_directive_undef, + TokenKind_pp_directive_warning, + TokenKind_pp_operator_defined, + TokenKind_pp_operator___has_c_attribute, + TokenKind_pp_operator___has_embed, + TokenKind_pp_operator___has_include, + + // C23: 6.4.1 + TokenKind_keyword_alignas, + TokenKind_keyword_alignof, + TokenKind_keyword_auto, + TokenKind_keyword_bool, + TokenKind_keyword_break, + TokenKind_keyword_case, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_constexpr, + TokenKind_keyword_continue, + TokenKind_keyword_default, + TokenKind_keyword_do, + TokenKind_keyword_double, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_false, + TokenKind_keyword_float, + TokenKind_keyword_for, + TokenKind_keyword_goto, + TokenKind_keyword_if, + TokenKind_keyword_inline, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_nullptr, + TokenKind_keyword_register, + TokenKind_keyword_restrict, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_signed, + TokenKind_keyword_sizeof, + TokenKind_keyword_static, + TokenKind_keyword_static_assert, + TokenKind_keyword_struct, + TokenKind_keyword_switch, + TokenKind_keyword_thread_local, + TokenKind_keyword_true, + TokenKind_keyword_typedef, + TokenKind_keyword_typeof, + TokenKind_keyword_typeof_unqual, + TokenKind_keyword_union, + TokenKind_keyword_unsigned, + TokenKind_keyword_void, + TokenKind_keyword_volatile, + TokenKind_keyword_while, + TokenKind_keyword__Atomic, + TokenKind_keyword__BitInt, + TokenKind_keyword__Complex, + TokenKind_keyword__Decimal128, + TokenKind_keyword__Decimal32, + TokenKind_keyword__Decimal64, + TokenKind_keyword__Generic, + TokenKind_keyword__Imaginary, + TokenKind_keyword__Noreturn, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_and, + TokenKind_assign_div, + TokenKind_assign_lshift, + TokenKind_assign_mod, + TokenKind_assign_mul, + TokenKind_assign_or, + TokenKind_assign_rshift, + TokenKind_assign_sub, + TokenKind_assign_xor, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_colon, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_le, + TokenKind_literal_int, + TokenKind_literal_str, + TokenKind_lshift, + TokenKind_lt, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_question, + TokenKind_rshift, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + TokenKind_tilde, + TokenKind_xor, +}; +typedef enum TokenKind TokenKind; + +const char* token_kind_stringify(TokenKind k); + +// TokenValue is externally tagged by Token's kind. +union TokenValue { + const char* string; + int integer; +}; +typedef union TokenValue TokenValue; + +struct Token { + TokenKind kind; + TokenValue value; + SourceLocation loc; +}; +typedef struct Token Token; + +const char* token_stringify(Token* t); + +struct TokenArray { + size_t len; + size_t capacity; + Token* data; +}; +typedef struct TokenArray TokenArray; + +void tokens_init(TokenArray* tokens, size_t capacity); +void tokens_reserve(TokenArray* tokens, size_t size); +Token* tokens_push_new(TokenArray* tokens); +Token* tokens_pop(TokenArray* tokens); + +TokenArray* preprocess(InFile* src); + +#endif @@ -1,3 +1,6 @@ +#include "sys.h" +#include "std.h" + char* get_self_path() { char* buf = calloc(PATH_MAX, sizeof(char)); ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1); diff --git a/src/sys.h b/src/sys.h new file mode 100644 index 0000000..2527724 --- /dev/null +++ b/src/sys.h @@ -0,0 +1,7 @@ +#ifndef DUCC_SYS_H +#define DUCC_SYS_H + +// It returns a path not including final / except for root directory. +char* get_self_dir(); + +#endif diff --git a/src/tokenize.c b/src/tokenize.c index a7e99b2..0fb126f 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -1,3 +1,6 @@ +#include "tokenize.h" +#include "common.h" + struct Lexer { TokenArray* src; TokenArray* tokens; diff --git a/src/tokenize.h b/src/tokenize.h new file mode 100644 index 0000000..2e28335 --- /dev/null +++ b/src/tokenize.h @@ -0,0 +1,8 @@ +#ifndef DUCC_TOKENIZE_H +#define DUCC_TOKENIZE_H + +#include "preprocess.h" + +TokenArray* tokenize(TokenArray* pp_tokens); + +#endif |
