diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-22 23:28:25 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-22 23:28:25 +0900 |
| commit | 9c202a496e75903fe37e5c19cb97c98eba6e35f2 (patch) | |
| tree | 52de494a4717a3c30c4bacb9dd9b91980be2a575 /src/preprocess.c | |
| parent | 0ac6ac95283735dd70ebf55b26ef78a4c32c31de (diff) | |
| download | ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.gz ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.zst ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.zip | |
chore: move *.c and *.h files to src/
Diffstat (limited to 'src/preprocess.c')
| -rw-r--r-- | src/preprocess.c | 1557 |
1 files changed, 1557 insertions, 0 deletions
diff --git a/src/preprocess.c b/src/preprocess.c new file mode 100644 index 0000000..b1810cd --- /dev/null +++ b/src/preprocess.c @@ -0,0 +1,1557 @@ +enum TokenKind { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_newline, + TokenKind_other, + TokenKind_character_constant, + TokenKind_header_name, + TokenKind_pp_directive_define, + TokenKind_pp_directive_elif, + TokenKind_pp_directive_elifdef, + TokenKind_pp_directive_elifndef, + TokenKind_pp_directive_else, + TokenKind_pp_directive_embed, + TokenKind_pp_directive_endif, + TokenKind_pp_directive_error, + TokenKind_pp_directive_if, + TokenKind_pp_directive_ifdef, + TokenKind_pp_directive_ifndef, + TokenKind_pp_directive_include, + TokenKind_pp_directive_line, + TokenKind_pp_directive_pragma, + TokenKind_pp_directive_undef, + TokenKind_pp_directive_warning, + TokenKind_pp_operator_defined, + TokenKind_pp_operator___has_c_attribute, + TokenKind_pp_operator___has_embed, + TokenKind_pp_operator___has_include, + + // C23: 6.4.1 + TokenKind_keyword_alignas, + TokenKind_keyword_alignof, + TokenKind_keyword_auto, + TokenKind_keyword_bool, + TokenKind_keyword_break, + TokenKind_keyword_case, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_constexpr, + TokenKind_keyword_continue, + TokenKind_keyword_default, + TokenKind_keyword_do, + TokenKind_keyword_double, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_false, + TokenKind_keyword_float, + TokenKind_keyword_for, + TokenKind_keyword_goto, + TokenKind_keyword_if, + TokenKind_keyword_inline, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_nullptr, + TokenKind_keyword_register, + TokenKind_keyword_restrict, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_signed, + TokenKind_keyword_sizeof, + TokenKind_keyword_static, + TokenKind_keyword_static_assert, + TokenKind_keyword_struct, + TokenKind_keyword_switch, + TokenKind_keyword_thread_local, + TokenKind_keyword_true, + TokenKind_keyword_typedef, + TokenKind_keyword_typeof, + TokenKind_keyword_typeof_unqual, + TokenKind_keyword_union, + TokenKind_keyword_unsigned, + TokenKind_keyword_void, + TokenKind_keyword_volatile, + TokenKind_keyword_while, + TokenKind_keyword__Atomic, + TokenKind_keyword__BitInt, + TokenKind_keyword__Complex, + TokenKind_keyword__Decimal128, + TokenKind_keyword__Decimal32, + TokenKind_keyword__Decimal64, + TokenKind_keyword__Generic, + TokenKind_keyword__Imaginary, + TokenKind_keyword__Noreturn, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_and, + TokenKind_assign_div, + TokenKind_assign_lshift, + TokenKind_assign_mod, + TokenKind_assign_mul, + TokenKind_assign_or, + TokenKind_assign_rshift, + TokenKind_assign_sub, + TokenKind_assign_xor, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_colon, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_le, + TokenKind_literal_int, + TokenKind_literal_str, + TokenKind_lshift, + TokenKind_lt, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_question, + TokenKind_rshift, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + TokenKind_tilde, + TokenKind_xor, +}; +typedef enum TokenKind TokenKind; + +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) + return "<eof>"; + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) + return "<whitespace>"; + else if (k == TokenKind_newline) + return "<new-line>"; + else if (k == TokenKind_other) + return "<other>"; + else if (k == TokenKind_character_constant) + return "<character-constant>"; + else if (k == TokenKind_header_name) + return "<header-name>"; + else if (k == TokenKind_pp_directive_define) + return "#define"; + else if (k == TokenKind_pp_directive_elif) + return "#elif"; + else if (k == TokenKind_pp_directive_elifdef) + return "#elifdef"; + else if (k == TokenKind_pp_directive_elifndef) + return "#elifndef"; + else if (k == TokenKind_pp_directive_else) + return "#else"; + else if (k == TokenKind_pp_directive_embed) + return "#embed"; + else if (k == TokenKind_pp_directive_endif) + return "#endif"; + else if (k == TokenKind_pp_directive_error) + return "#error"; + else if (k == TokenKind_pp_directive_if) + return "#if"; + else if (k == TokenKind_pp_directive_ifdef) + return "#ifdef"; + else if (k == TokenKind_pp_directive_ifndef) + return "#ifndef"; + else if (k == TokenKind_pp_directive_include) + return "#include"; + else if (k == TokenKind_pp_directive_line) + return "#line"; + else if (k == TokenKind_pp_directive_pragma) + return "#pragma"; + else if (k == TokenKind_pp_directive_undef) + return "#undef"; + else if (k == TokenKind_pp_directive_warning) + return "#warning"; + else if (k == TokenKind_pp_operator_defined) + return "defined"; + else if (k == TokenKind_pp_operator___has_c_attribute) + return "__has_c_attribute"; + else if (k == TokenKind_pp_operator___has_embed) + return "__has_embed"; + else if (k == TokenKind_pp_operator___has_include) + return "__has_include"; + else if (k == TokenKind_keyword_alignas) + return "alignas"; + else if (k == TokenKind_keyword_alignof) + return "alignof"; + else if (k == TokenKind_keyword_auto) + return "auto"; + else if (k == TokenKind_keyword_bool) + return "bool"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_case) + return "case"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_constexpr) + return "constexpr"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_default) + return "default"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_double) + return "double"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_false) + return "false"; + else if (k == TokenKind_keyword_float) + return "float"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_goto) + return "goto"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_inline) + return "inline"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_nullptr) + return "nullptr"; + else if (k == TokenKind_keyword_register) + return "register"; + else if (k == TokenKind_keyword_restrict) + return "restrict"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_signed) + return "signed"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_static) + return "static"; + else if (k == TokenKind_keyword_static_assert) + return "static_assert"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_switch) + return "switch"; + else if (k == TokenKind_keyword_thread_local) + return "thread_local"; + else if (k == TokenKind_keyword_true) + return "true"; + else if (k == TokenKind_keyword_typedef) + return "typedef"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_typeof_unqual) + return "typeof_unqual"; + else if (k == TokenKind_keyword_union) + return "union"; + else if (k == TokenKind_keyword_unsigned) + return "unsigned"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_volatile) + return "volatile"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_keyword__Atomic) + return "_Atomic"; + else if (k == TokenKind_keyword__BitInt) + return "_BitInt"; + else if (k == TokenKind_keyword__Complex) + return "_Complex"; + else if (k == TokenKind_keyword__Decimal128) + return "_Decimal128"; + else if (k == TokenKind_keyword__Decimal32) + return "_Decimal32"; + else if (k == TokenKind_keyword__Decimal64) + return "_Decimal64"; + else if (k == TokenKind_keyword__Generic) + return "_Generic"; + else if (k == TokenKind_keyword__Imaginary) + return "_Imaginary"; + else if (k == TokenKind_keyword__Noreturn) + return "_Noreturn"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_and) + return "&="; + else if (k == TokenKind_assign_div) + return "/="; + else if (k == TokenKind_assign_lshift) + return "<<="; + else if (k == TokenKind_assign_mod) + return "%="; + else if (k == TokenKind_assign_mul) + return "*="; + else if (k == TokenKind_assign_or) + return "|="; + else if (k == TokenKind_assign_rshift) + return ">>="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_assign_xor) + return "^="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_colon) + return ":"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return "<identifier>"; + else if (k == TokenKind_le) + return "le"; + else if (k == TokenKind_literal_int) + return "<integer>"; + else if (k == TokenKind_literal_str) + return "<string>"; + else if (k == TokenKind_lshift) + return "<<"; + else if (k == TokenKind_lt) + return "lt"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_question) + return "?"; + else if (k == TokenKind_rshift) + return ">>"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_tilde) + return "~"; + else if (k == TokenKind_xor) + return "^"; + else + unreachable(); +} + +// TokenValue is externally tagged by Token's kind. +union TokenValue { + const char* string; + int integer; +}; +typedef union TokenValue TokenValue; + +struct Token { + TokenKind kind; + TokenValue value; + SourceLocation loc; +}; +typedef struct Token Token; + +const char* token_stringify(Token* t) { + TokenKind k = t->kind; + if (k == TokenKind_literal_int) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%d (%s)", t->value.integer, kind_str); + return buf; + } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || + k == TokenKind_literal_int || k == TokenKind_literal_str) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%s (%s)", t->value.string, kind_str); + return buf; + } else { + return token_kind_stringify(k); + } +} + +struct TokenArray { + size_t len; + size_t capacity; + Token* data; +}; +typedef struct TokenArray TokenArray; + +void tokens_init(TokenArray* tokens, size_t capacity) { + tokens->len = 0; + tokens->capacity = capacity; + tokens->data = calloc(tokens->capacity, sizeof(Token)); +} + +void tokens_reserve(TokenArray* tokens, size_t size) { + if (size <= tokens->capacity) + return; + tokens->capacity *= 2; + tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); + memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); +} + +Token* tokens_push_new(TokenArray* tokens) { + tokens_reserve(tokens, tokens->len + 1); + return &tokens->data[tokens->len++]; +} + +Token* tokens_pop(TokenArray* tokens) { + if (tokens->len != 0) + tokens->len--; +} + +enum MacroKind { + MacroKind_undef, + MacroKind_obj, + MacroKind_func, + MacroKind_builtin_file, + MacroKind_builtin_line, +}; +typedef enum MacroKind MacroKind; + +const char* macro_kind_stringify(MacroKind kind) { + if (kind == MacroKind_undef) + return "undef"; + else if (kind == MacroKind_obj) + return "object-like"; + else if (kind == MacroKind_func) + return "function-like"; + else if (kind == MacroKind_builtin_file) + return "__FILE__"; + else if (kind == MacroKind_builtin_line) + return "__LINE__"; + else + unreachable(); +} + +struct Macro { + MacroKind kind; + const char* name; + TokenArray parameters; + TokenArray replacements; +}; +typedef struct Macro Macro; + +int macro_find_param(Macro* macro, Token* tok) { + if (tok->kind != TokenKind_ident) + return -1; + + for (int i = 0; i < macro->parameters.len; ++i) { + if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { + return i; + } + } + return -1; +} + +struct MacroArray { + size_t len; + size_t capacity; + Macro* data; +}; +typedef struct MacroArray MacroArray; + +MacroArray* macros_new() { + MacroArray* macros = calloc(1, sizeof(MacroArray)); + macros->len = 0; + macros->capacity = 8; + macros->data = calloc(macros->capacity, sizeof(Macro)); + return macros; +} + +void macros_reserve(MacroArray* macros, size_t size) { + if (size <= macros->capacity) + return; + macros->capacity *= 2; + macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); + memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); +} + +Macro* macros_push_new(MacroArray* macros) { + macros_reserve(macros, macros->len + 1); + return ¯os->data[macros->len++]; +} + +void macros_dump(MacroArray* macros) { + fprintf(stderr, "MacroArray {\n"); + fprintf(stderr, " len = %zu\n", macros->len); + fprintf(stderr, " data = [\n"); + for (int i = 0; i < macros->len; ++i) { + Macro* m = ¯os->data[i]; + fprintf(stderr, " Macro {\n"); + fprintf(stderr, " kind = %s\n", macro_kind_stringify(m->kind)); + fprintf(stderr, " name = %s\n", m->name); + fprintf(stderr, " replacements = TODO\n"); + fprintf(stderr, " }\n"); + } + fprintf(stderr, " ]\n"); + fprintf(stderr, "}\n"); +} + +void add_predefined_macros(MacroArray* macros) { + Macro* m; + + m = macros_push_new(macros); + m->kind = MacroKind_obj; + m->name = "__ducc__"; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = 1; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_file; + m->name = "__FILE__"; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_line; + m->name = "__LINE__"; +} + +struct MacroArg { + TokenArray tokens; +}; +typedef struct MacroArg MacroArg; + +struct MacroArgArray { + size_t len; + size_t capacity; + MacroArg* data; +}; +typedef struct MacroArgArray MacroArgArray; + +MacroArgArray* macroargs_new() { + MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); + macroargs->len = 0; + macroargs->capacity = 2; + macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); + return macroargs; +} + +void macroargs_reserve(MacroArgArray* macroargs, size_t size) { + if (size <= macroargs->capacity) + return; + macroargs->capacity *= 2; + macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); + memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); +} + +MacroArg* macroargs_push_new(MacroArgArray* macroargs) { + macroargs_reserve(macroargs, macroargs->len + 1); + return ¯oargs->data[macroargs->len++]; +} + +struct PpLexer { + InFile* src; + BOOL at_bol; + BOOL expect_header_name; + TokenArray* pp_tokens; +}; +typedef struct PpLexer PpLexer; + +PpLexer* pplexer_new(InFile* src) { + PpLexer* ppl = calloc(1, sizeof(PpLexer)); + + ppl->src = src; + ppl->at_bol = TRUE; + ppl->expect_header_name = FALSE; + ppl->pp_tokens = calloc(1, sizeof(TokenArray)); + tokens_init(ppl->pp_tokens, 1024 * 16); + + return ppl; +} + +TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) { + // Skip whitespaces after '#'. + char c; + while (isspace((c = infile_peek_char(ppl->src)))) { + if (c == '\n') + break; + infile_next_char(ppl->src); + } + + SourceLocation pp_directive_name_start_loc = ppl->src->loc; + + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(ppl->src))) { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); + } + const char* pp_directive_name = builder.buf; + + if (builder.len == 0) { + return TokenKind_hash; + } else if (strcmp(pp_directive_name, "define") == 0) { + return TokenKind_pp_directive_define; + } else if (strcmp(pp_directive_name, "elif") == 0) { + return TokenKind_pp_directive_elif; + } else if (strcmp(pp_directive_name, "elifdef") == 0) { + return TokenKind_pp_directive_elifdef; + } else if (strcmp(pp_directive_name, "elifndef") == 0) { + return TokenKind_pp_directive_elifndef; + } else if (strcmp(pp_directive_name, "else") == 0) { + return TokenKind_pp_directive_else; + } else if (strcmp(pp_directive_name, "embed") == 0) { + return TokenKind_pp_directive_embed; + } else if (strcmp(pp_directive_name, "endif") == 0) { + return TokenKind_pp_directive_endif; + } else if (strcmp(pp_directive_name, "error") == 0) { + return TokenKind_pp_directive_error; + } else if (strcmp(pp_directive_name, "if") == 0) { + return TokenKind_pp_directive_if; + } else if (strcmp(pp_directive_name, "ifdef") == 0) { + return TokenKind_pp_directive_ifdef; + } else if (strcmp(pp_directive_name, "ifndef") == 0) { + return TokenKind_pp_directive_ifndef; + } else if (strcmp(pp_directive_name, "include") == 0) { + ppl->expect_header_name = TRUE; + return TokenKind_pp_directive_include; + } else if (strcmp(pp_directive_name, "line") == 0) { + return TokenKind_pp_directive_line; + } else if (strcmp(pp_directive_name, "pragma") == 0) { + return TokenKind_pp_directive_pragma; + } else if (strcmp(pp_directive_name, "undef") == 0) { + return TokenKind_pp_directive_undef; + } else if (strcmp(pp_directive_name, "warning") == 0) { + return TokenKind_pp_directive_warning; + } else { + fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename, + pp_directive_name_start_loc.line, pp_directive_name); + } +} + +void pplexer_tokenize_all(PpLexer* ppl) { + while (!infile_eof(ppl->src)) { + Token* tok = tokens_push_new(ppl->pp_tokens); + tok->loc = ppl->src->loc; + char c = infile_peek_char(ppl->src); + + if (ppl->expect_header_name && c == '"') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '"'); + while (1) { + char ch = infile_peek_char(ppl->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + } + infile_next_char(ppl->src); + } + strbuilder_append_char(&builder, '"'); + infile_next_char(ppl->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + ppl->expect_header_name = FALSE; + } else if (ppl->expect_header_name && c == '<') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '<'); + while (1) { + char ch = infile_peek_char(ppl->src); + if (ch == '>') + break; + strbuilder_append_char(&builder, ch); + infile_next_char(ppl->src); + } + strbuilder_append_char(&builder, '>'); + infile_next_char(ppl->src); + tok->kind = TokenKind_header_name; + tok->value.string = builder.buf; + ppl->expect_header_name = FALSE; + } else if (c == '(') { + infile_next_char(ppl->src); + tok->kind = TokenKind_paren_l; + } else if (c == ')') { + infile_next_char(ppl->src); + tok->kind = TokenKind_paren_r; + } else if (c == '{') { + infile_next_char(ppl->src); + tok->kind = TokenKind_brace_l; + } else if (c == '}') { + infile_next_char(ppl->src); + tok->kind = TokenKind_brace_r; + } else if (c == '[') { + infile_next_char(ppl->src); + tok->kind = TokenKind_bracket_l; + } else if (c == ']') { + infile_next_char(ppl->src); + tok->kind = TokenKind_bracket_r; + } else if (c == ',') { + infile_next_char(ppl->src); + tok->kind = TokenKind_comma; + } else if (c == ':') { + infile_next_char(ppl->src); + tok->kind = TokenKind_colon; + } else if (c == ';') { + infile_next_char(ppl->src); + tok->kind = TokenKind_semicolon; + } else if (c == '^') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_xor; + } else { + tok->kind = TokenKind_xor; + } + } else if (c == '?') { + infile_next_char(ppl->src); + tok->kind = TokenKind_question; + } else if (c == '~') { + infile_next_char(ppl->src); + tok->kind = TokenKind_tilde; + } else if (c == '+') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_add; + } else if (infile_consume_if(ppl->src, '+')) { + tok->kind = TokenKind_plusplus; + } else { + tok->kind = TokenKind_plus; + } + } else if (c == '|') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_or; + } else if (infile_consume_if(ppl->src, '|')) { + tok->kind = TokenKind_oror; + } else { + tok->kind = TokenKind_or; + } + } else if (c == '&') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_and; + } else if (infile_consume_if(ppl->src, '&')) { + tok->kind = TokenKind_andand; + } else { + tok->kind = TokenKind_and; + } + } else if (c == '-') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '>')) { + tok->kind = TokenKind_arrow; + } else if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_sub; + } else if (infile_consume_if(ppl->src, '-')) { + tok->kind = TokenKind_minusminus; + } else { + tok->kind = TokenKind_minus; + } + } else if (c == '*') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_mul; + } else { + tok->kind = TokenKind_star; + } + } else if (c == '/') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_div; + } else if (infile_consume_if(ppl->src, '/')) { + while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') { + infile_next_char(ppl->src); + } + tok->kind = TokenKind_whitespace; + } else if (infile_consume_if(ppl->src, '*')) { + while (infile_peek_char(ppl->src)) { + if (infile_consume_if(ppl->src, '*')) { + if (infile_consume_if(ppl->src, '/')) { + break; + } + continue; + } + infile_next_char(ppl->src); + } + tok->kind = TokenKind_whitespace; + } else { + tok->kind = TokenKind_slash; + } + } else if (c == '%') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_mod; + } else { + tok->kind = TokenKind_percent; + } + } else if (c == '.') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '.')) { + if (infile_consume_if(ppl->src, '.')) { + tok->kind = TokenKind_ellipsis; + } else { + tok->kind = TokenKind_other; + tok->value.string = ".."; + } + } else { + tok->kind = TokenKind_dot; + } + } else if (c == '!') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_ne; + } else { + tok->kind = TokenKind_not; + } + } else if (c == '=') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_eq; + } else { + tok->kind = TokenKind_assign; + } + } else if (c == '<') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_le; + } else if (infile_consume_if(ppl->src, '<')) { + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_lshift; + } else { + tok->kind = TokenKind_lshift; + } + } else { + tok->kind = TokenKind_lt; + } + } else if (c == '>') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_ge; + } else if (infile_consume_if(ppl->src, '>')) { + if (infile_consume_if(ppl->src, '=')) { + tok->kind = TokenKind_assign_rshift; + } else { + tok->kind = TokenKind_rshift; + } + } else { + tok->kind = TokenKind_gt; + } + } else if (c == '#') { + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '#')) { + tok->kind = TokenKind_hashhash; + } else { + tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash; + } + } else if (c == '\'') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '\''); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + if (infile_peek_char(ppl->src) == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + } + strbuilder_append_char(&builder, '\''); + infile_next_char(ppl->src); + infile_next_char(ppl->src); + tok->kind = TokenKind_character_constant; + tok->value.string = builder.buf; + } else if (c == '"') { + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + while (1) { + char ch = infile_peek_char(ppl->src); + if (ch == '"') + break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + } + infile_next_char(ppl->src); + } + infile_next_char(ppl->src); + tok->kind = TokenKind_literal_str; + tok->value.string = builder.buf; + } else if (isdigit(c)) { + StrBuilder builder; + strbuilder_init(&builder); + while (isdigit(infile_peek_char(ppl->src))) { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); + } + tok->kind = TokenKind_literal_int; + tok->value.integer = atoi(builder.buf); + } else if (isalpha(c) || c == '_') { + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); + } + tok->kind = TokenKind_ident; + tok->value.string = builder.buf; + } else if (c == '\n') { + infile_next_char(ppl->src); + tok->kind = TokenKind_newline; + } else if (isspace(c)) { + while (isspace((c = infile_peek_char(ppl->src)))) { + if (c == '\n') + break; + infile_next_char(ppl->src); + } + if (ppl->at_bol && infile_peek_char(ppl->src) == '#') { + infile_next_char(ppl->src); + tok->kind = pplexer_tokenize_pp_directive(ppl); + } else { + tok->kind = TokenKind_whitespace; + } + } else { + infile_next_char(ppl->src); + tok->kind = TokenKind_other; + char* buf = calloc(2, sizeof(char)); + buf[0] = c; + tok->value.string = buf; + } + ppl->at_bol = tok->kind == TokenKind_newline; + } + Token* eof_tok = tokens_push_new(ppl->pp_tokens); + eof_tok->loc = ppl->src->loc; + eof_tok->kind = TokenKind_eof; +} + +TokenArray* pp_tokenize(InFile* src) { + PpLexer* ppl = pplexer_new(src); + pplexer_tokenize_all(ppl); + return ppl->pp_tokens; +} + +struct Preprocessor { + TokenArray* pp_tokens; + int pos; + MacroArray* macros; + int include_depth; + BOOL skip_pp_tokens; + char** include_paths; + int n_include_paths; +}; +typedef struct Preprocessor Preprocessor; + +TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros); + +Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros) { + if (include_depth >= 32) { + fatal_error("include depth limit exceeded"); + } + + Preprocessor* pp = calloc(1, sizeof(Preprocessor)); + pp->pp_tokens = pp_tokens; + pp->macros = macros; + pp->include_depth = include_depth; + pp->include_paths = calloc(16, sizeof(char*)); + + return pp; +} + +Token* pp_token_at(Preprocessor* pp, int i) { + return &pp->pp_tokens->data[i]; +} + +Token* peek_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos); +} + +Token* next_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos++); +} + +BOOL pp_eof(Preprocessor* pp) { + return peek_pp_token(pp)->kind == TokenKind_eof; +} + +int find_macro(Preprocessor* pp, const char* name) { + for (int i = 0; i < pp->macros->len; ++i) { + if (pp->macros->data[i].kind == MacroKind_undef) + continue; + if (strcmp(pp->macros->data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +void undef_macro(Preprocessor* pp, int idx) { + pp->macros->data[idx].kind = MacroKind_undef; + // TODO: Can predefined macro like __FILE__ be undefined? +} + +void add_include_path(Preprocessor* pp, char* include_path) { + pp->include_paths[pp->n_include_paths] = include_path; + ++pp->n_include_paths; +} + +BOOL skip_pp_tokens(Preprocessor* pp) { + // TODO: support nested #if + return pp->skip_pp_tokens; +} + +void skip_whitespaces(Preprocessor* pp) { + while (!pp_eof(pp) && peek_pp_token(pp)->kind == TokenKind_whitespace) { + next_pp_token(pp); + } +} + +void seek_to_next_newline(Preprocessor* pp) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } + next_pp_token(pp); + } +} + +void make_token_whitespace(Token* tok) { + tok->kind = TokenKind_whitespace; + tok->value.string = NULL; +} + +void remove_directive_tokens(Preprocessor* pp, int start, int end) { + for (int i = start; i < end; ++i) { + make_token_whitespace(pp_token_at(pp, i)); + } +} + +void process_endif_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + pp->skip_pp_tokens = FALSE; + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_else_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + pp->skip_pp_tokens = !pp->skip_pp_tokens; + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_elif_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +BOOL pp_eval_constant_expression(TokenArray*); +int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); +BOOL expand_macro(Preprocessor*); + +void process_if_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + int condition_expression_start_pos = pp->pos; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } else if (tok->kind == TokenKind_ident) { + if (strcmp(tok->value.string, "defined") == 0) { + int defined_pos = pp->pos; + // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')' + // 'defined' <ws>* <ident> + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name; + if (peek_pp_token(pp)->kind == TokenKind_paren_l) { + next_pp_token(pp); + skip_whitespaces(pp); + macro_name = next_pp_token(pp); + if (macro_name->kind != TokenKind_ident) { + fatal_error("invalid defined"); + } + skip_whitespaces(pp); + if (next_pp_token(pp)->kind != TokenKind_paren_r) { + fatal_error("invalid defined"); + } + } else { + macro_name = next_pp_token(pp); + if (macro_name->kind != TokenKind_ident) { + fatal_error("invalid defined"); + } + } + BOOL is_defined = find_macro(pp, macro_name->value.string) != -1; + TokenArray defined_results; + tokens_init(&defined_results, 1); + Token* defined_result = tokens_push_new(&defined_results); + defined_result->kind = TokenKind_literal_int; + defined_result->value.integer = is_defined; + pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); + } else { + BOOL expanded = expand_macro(pp); + if (expanded) { + // A macro may expand to another macro. Re-scan the expanded tokens. + // TODO: if the macro is defined recursively, it causes infinite loop. + } else { + next_pp_token(pp); + } + } + } else { + next_pp_token(pp); + } + } + + // all remaining identifiers other than true (including those lexically identical to keywords such as false) are + // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is converted + // into a token. + for (int pos = condition_expression_start_pos; pos < pp->pos; ++pos) { + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_ident) { + BOOL is_true = strcmp(tok->value.string, "true") == 0; + tok->kind = TokenKind_literal_int; + tok->value.integer = is_true; + } + } + + int condition_expression_tokens_len = pp->pos - condition_expression_start_pos; + TokenArray condition_expression_tokens; + // +1 to add EOF token at the end. + tokens_init(&condition_expression_tokens, condition_expression_tokens_len + 1); + for (int i = 0; i < condition_expression_tokens_len; ++i) { + *tokens_push_new(&condition_expression_tokens) = *pp_token_at(pp, condition_expression_start_pos + i); + } + Token* eof_tok = tokens_push_new(&condition_expression_tokens); + eof_tok->kind = TokenKind_eof; + + BOOL result = pp_eval_constant_expression(&condition_expression_tokens); + + pp->skip_pp_tokens = !result; + + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_ifdef_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = peek_pp_token(pp); + if (macro_name->kind == TokenKind_ident) { + next_pp_token(pp); + pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) == -1; + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_ifndef_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = peek_pp_token(pp); + if (macro_name->kind == TokenKind_ident) { + next_pp_token(pp); + pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) != -1; + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +const char* read_include_header_name(Preprocessor* pp) { + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_header_name) { + fatal_error("%s:%d: invalid #include", tok->loc.filename, tok->loc.line); + } + + return tok->value.string; +} + +const char* resolve_include_name(Preprocessor* pp, const char* include_name) { + if (include_name[0] == '"') { + return strndup(include_name + 1, strlen(include_name) - 2); + } else { + for (int i = 0; i < pp->n_include_paths; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths[i], strlen(include_name) - 2, include_name + 1); + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; + } +} + +int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { + int n_tokens_to_remove = dest_end - dest_start; + int n_tokens_after_dest = pp->pp_tokens->len - dest_end; + int shift_amount; + + if (n_tokens_to_remove < source_tokens->len) { + // Move existing tokens backward to make room. + shift_amount = source_tokens->len - n_tokens_to_remove; + tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); + memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len += shift_amount; + } else if (source_tokens->len < n_tokens_to_remove) { + // Move existing tokens forward to reduce room. + shift_amount = n_tokens_to_remove - source_tokens->len; + memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len -= shift_amount; + memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); + } + + memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); + + return dest_start + source_tokens->len; +} + +int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { + TokenArray tokens; + tokens_init(&tokens, 1); + *tokens_push_new(&tokens) = *source_tok; + replace_pp_tokens(pp, dest, dest + 1, &tokens); +} + +void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) { + InFile* include_source = infile_open(include_name); + if (!include_source) { + fatal_error("cannot open include file: %s", include_name); + } + + TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros); + tokens_pop(include_pp_tokens); // pop EOF token + pp->pos = replace_pp_tokens(pp, directive_token_pos, pp->pos, include_pp_tokens); +} + +void process_include_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + const char* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("cannot resolve include file name: %s", include_name); + } + expand_include_directive(pp, directive_token_pos, include_name_resolved); +} + +// ws ::= many0(<whitespace>) +// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')' +TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { + TokenArray* parameters = calloc(1, sizeof(TokenArray)); + tokens_init(parameters, 2); + + // '(' is consumed by caller. + skip_whitespaces(pp); + Token* tok = next_pp_token(pp); + if (tok->kind == TokenKind_ident) { + *tokens_push_new(parameters) = *tok; + skip_whitespaces(pp); + while (peek_pp_token(pp)->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + *tokens_push_new(parameters) = *tok; + } + tok = next_pp_token(pp); + } + if (tok->kind != TokenKind_paren_r) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + + return parameters; +} + +void process_define_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = next_pp_token(pp); + + if (macro_name->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); + } + + if (peek_pp_token(pp)->kind == TokenKind_paren_l) { + next_pp_token(pp); + TokenArray* parameters = pp_parse_macro_parameters(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + if (pp_eof(pp)) { + fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); + } + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_func; + macro->name = macro_name->value.string; + macro->parameters = *parameters; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + } else { + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + if (pp_eof(pp)) { + fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); + } + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_obj; + macro->name = macro_name->value.string; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_undef_directive(Preprocessor* pp, int directive_token_pos) { + next_pp_token(pp); + skip_whitespaces(pp); + Token* macro_name = peek_pp_token(pp); + if (macro_name->kind == TokenKind_ident) { + next_pp_token(pp); + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx != -1) { + undef_macro(pp, macro_idx); + } + } + remove_directive_tokens(pp, directive_token_pos, pp->pos); +} + +void process_line_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +void process_error_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +void process_pragma_directive(Preprocessor* pp, int directive_token_pos) { + unimplemented(); +} + +// ws ::= many0(<Whitespace>) +// macro-arguments ::= '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')' +MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp) { + MacroArgArray* args = macroargs_new(); + + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_paren_l) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_paren_r) { + MacroArg* arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 1); + *tokens_push_new(&arg->tokens) = *tok; + skip_whitespaces(pp); + while (peek_pp_token(pp)->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces(pp); + tok = next_pp_token(pp); + arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 1); + *tokens_push_new(&arg->tokens) = *tok; + } + tok = next_pp_token(pp); + } + if (tok->kind != TokenKind_paren_r) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + + return args; +} + +BOOL expand_macro(Preprocessor* pp) { + int macro_name_pos = pp->pos; + Token* macro_name = next_pp_token(pp); + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx == -1) { + return FALSE; + } + + SourceLocation original_loc = macro_name->loc; + Macro* macro = &pp->macros->data[macro_idx]; + if (macro->kind == MacroKind_func) { + MacroArgArray* args = pp_parse_macro_arguments(pp); + replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); + for (int i = 0; i < macro->replacements.len; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i); + int macro_param_idx = macro_find_param(macro, tok); + if (macro_param_idx != -1) { + replace_pp_tokens(pp, macro_name_pos + i, macro_name_pos + i + 1, &args->data[macro_param_idx].tokens); + } + } + // Inherit a source location from the original macro token. + for (int i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + } else if (macro->kind == MacroKind_obj) { + replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); + // Inherit a source location from the original macro token. + for (int i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + } else if (macro->kind == MacroKind_builtin_file) { + Token file_tok; + file_tok.kind = TokenKind_literal_str; + file_tok.value.string = macro_name->loc.filename; + file_tok.loc.filename = NULL; + file_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &file_tok); + } else if (macro->kind == MacroKind_builtin_line) { + Token line_tok; + line_tok.kind = TokenKind_literal_int; + line_tok.value.integer = macro_name->loc.line; + line_tok.loc.filename = NULL; + line_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &line_tok); + } else { + unreachable(); + } + return TRUE; +} + +void process_pp_directive(Preprocessor* pp) { + int first_token_pos = pp->pos; + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_endif) { + process_endif_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_else) { + process_else_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_elif) { + process_elif_directive(pp, first_token_pos); + } else if (skip_pp_tokens(pp)) { + make_token_whitespace(next_pp_token(pp)); + } else if (tok->kind == TokenKind_pp_directive_if) { + process_if_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_ifdef) { + process_ifdef_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_ifndef) { + process_ifndef_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_include) { + process_include_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_define) { + process_define_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_undef) { + process_undef_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_line) { + process_line_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_error) { + process_error_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_pp_directive_pragma) { + process_pragma_directive(pp, first_token_pos); + } else if (tok->kind == TokenKind_ident) { + BOOL expanded = expand_macro(pp); + if (expanded) { + // A macro may expand to another macro. Re-scan the expanded tokens. + // TODO: if the macro is defined recursively, it causes infinite loop. + } else { + next_pp_token(pp); + } + } else { + next_pp_token(pp); + } +} + +void process_pp_directives(Preprocessor* pp) { + while (!pp_eof(pp)) { + process_pp_directive(pp); + } +} + +void pp_dump(Token* t, BOOL include_whitespace) { + for (; t->kind != TokenKind_eof; ++t) { + if (t->kind == TokenKind_whitespace && !include_whitespace) { + continue; + } + fprintf(stderr, "%s\n", token_stringify(t)); + } +} + +char* get_ducc_include_path() { + const char* self_dir = get_self_dir(); + char* buf = calloc(strlen(self_dir) + strlen("/include") + 1, sizeof(char)); + sprintf(buf, "%s/include", self_dir); + return buf; +} + +TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros) { + TokenArray* pp_tokens = pp_tokenize(src); + Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros); + add_include_path(pp, get_ducc_include_path()); + add_include_path(pp, "/usr/include/x86_64-linux-gnu"); + add_include_path(pp, "/usr/include"); + process_pp_directives(pp); + return pp->pp_tokens; +} + +TokenArray* preprocess(InFile* src) { + MacroArray* macros = macros_new(); + add_predefined_macros(macros); + return do_preprocess(src, 0, macros); +} |
