diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-22 23:28:25 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-22 23:28:25 +0900 |
| commit | 9c202a496e75903fe37e5c19cb97c98eba6e35f2 (patch) | |
| tree | 52de494a4717a3c30c4bacb9dd9b91980be2a575 /preprocess.c | |
| parent | 0ac6ac95283735dd70ebf55b26ef78a4c32c31de (diff) | |
| download | ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.gz ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.tar.zst ducc-9c202a496e75903fe37e5c19cb97c98eba6e35f2.zip | |
chore: move *.c and *.h files to src/
Diffstat (limited to 'preprocess.c')
| -rw-r--r-- | preprocess.c | 1557 |
1 files changed, 0 insertions, 1557 deletions
diff --git a/preprocess.c b/preprocess.c deleted file mode 100644 index b1810cd..0000000 --- a/preprocess.c +++ /dev/null @@ -1,1557 +0,0 @@ -enum TokenKind { - TokenKind_eof, - - // Only preprocessing phase. - TokenKind_hash, - TokenKind_hashhash, - TokenKind_whitespace, - TokenKind_newline, - TokenKind_other, - TokenKind_character_constant, - TokenKind_header_name, - TokenKind_pp_directive_define, - TokenKind_pp_directive_elif, - TokenKind_pp_directive_elifdef, - TokenKind_pp_directive_elifndef, - TokenKind_pp_directive_else, - TokenKind_pp_directive_embed, - TokenKind_pp_directive_endif, - TokenKind_pp_directive_error, - TokenKind_pp_directive_if, - TokenKind_pp_directive_ifdef, - TokenKind_pp_directive_ifndef, - TokenKind_pp_directive_include, - TokenKind_pp_directive_line, - TokenKind_pp_directive_pragma, - TokenKind_pp_directive_undef, - TokenKind_pp_directive_warning, - TokenKind_pp_operator_defined, - TokenKind_pp_operator___has_c_attribute, - TokenKind_pp_operator___has_embed, - TokenKind_pp_operator___has_include, - - // C23: 6.4.1 - TokenKind_keyword_alignas, - TokenKind_keyword_alignof, - TokenKind_keyword_auto, - TokenKind_keyword_bool, - TokenKind_keyword_break, - TokenKind_keyword_case, - TokenKind_keyword_char, - TokenKind_keyword_const, - TokenKind_keyword_constexpr, - TokenKind_keyword_continue, - TokenKind_keyword_default, - TokenKind_keyword_do, - TokenKind_keyword_double, - TokenKind_keyword_else, - TokenKind_keyword_enum, - TokenKind_keyword_extern, - TokenKind_keyword_false, - TokenKind_keyword_float, - TokenKind_keyword_for, - TokenKind_keyword_goto, - TokenKind_keyword_if, - TokenKind_keyword_inline, - TokenKind_keyword_int, - TokenKind_keyword_long, - TokenKind_keyword_nullptr, - TokenKind_keyword_register, - TokenKind_keyword_restrict, - TokenKind_keyword_return, - TokenKind_keyword_short, - TokenKind_keyword_signed, - TokenKind_keyword_sizeof, - TokenKind_keyword_static, - TokenKind_keyword_static_assert, - TokenKind_keyword_struct, - TokenKind_keyword_switch, - TokenKind_keyword_thread_local, - TokenKind_keyword_true, - TokenKind_keyword_typedef, - TokenKind_keyword_typeof, - TokenKind_keyword_typeof_unqual, - TokenKind_keyword_union, - TokenKind_keyword_unsigned, - TokenKind_keyword_void, - TokenKind_keyword_volatile, - TokenKind_keyword_while, - TokenKind_keyword__Atomic, - TokenKind_keyword__BitInt, - TokenKind_keyword__Complex, - TokenKind_keyword__Decimal128, - TokenKind_keyword__Decimal32, - TokenKind_keyword__Decimal64, - TokenKind_keyword__Generic, - TokenKind_keyword__Imaginary, - TokenKind_keyword__Noreturn, - - TokenKind_and, - TokenKind_andand, - TokenKind_arrow, - TokenKind_assign, - TokenKind_assign_add, - TokenKind_assign_and, - TokenKind_assign_div, - TokenKind_assign_lshift, - TokenKind_assign_mod, - TokenKind_assign_mul, - TokenKind_assign_or, - TokenKind_assign_rshift, - TokenKind_assign_sub, - TokenKind_assign_xor, - TokenKind_brace_l, - TokenKind_brace_r, - TokenKind_bracket_l, - TokenKind_bracket_r, - TokenKind_colon, - TokenKind_comma, - TokenKind_dot, - TokenKind_ellipsis, - TokenKind_eq, - TokenKind_ge, - TokenKind_gt, - TokenKind_ident, - TokenKind_le, - TokenKind_literal_int, - TokenKind_literal_str, - TokenKind_lshift, - TokenKind_lt, - TokenKind_minus, - TokenKind_minusminus, - TokenKind_ne, - TokenKind_not, - TokenKind_or, - TokenKind_oror, - TokenKind_paren_l, - TokenKind_paren_r, - TokenKind_percent, - TokenKind_plus, - TokenKind_plusplus, - TokenKind_question, - TokenKind_rshift, - TokenKind_semicolon, - TokenKind_slash, - TokenKind_star, - TokenKind_tilde, - TokenKind_xor, -}; -typedef enum TokenKind TokenKind; - -const char* token_kind_stringify(TokenKind k) { - if (k == TokenKind_eof) - return "<eof>"; - else if (k == TokenKind_hash) - return "#"; - else if (k == TokenKind_hashhash) - return "##"; - else if (k == TokenKind_whitespace) - return "<whitespace>"; - else if (k == TokenKind_newline) - return "<new-line>"; - else if (k == TokenKind_other) - return "<other>"; - else if (k == TokenKind_character_constant) - return "<character-constant>"; - else if (k == TokenKind_header_name) - return "<header-name>"; - else if (k == TokenKind_pp_directive_define) - return "#define"; - else if (k == TokenKind_pp_directive_elif) - return "#elif"; - else if (k == TokenKind_pp_directive_elifdef) - return "#elifdef"; - else if (k == TokenKind_pp_directive_elifndef) - return "#elifndef"; - else if (k == TokenKind_pp_directive_else) - return "#else"; - else if (k == TokenKind_pp_directive_embed) - return "#embed"; - else if (k == TokenKind_pp_directive_endif) - return "#endif"; - else if (k == TokenKind_pp_directive_error) - return "#error"; - else if (k == TokenKind_pp_directive_if) - return "#if"; - else if (k == TokenKind_pp_directive_ifdef) - return "#ifdef"; - else if (k == TokenKind_pp_directive_ifndef) - return "#ifndef"; - else if (k == TokenKind_pp_directive_include) - return "#include"; - else if (k == TokenKind_pp_directive_line) - return "#line"; - else if (k == TokenKind_pp_directive_pragma) - return "#pragma"; - else if (k == TokenKind_pp_directive_undef) - return "#undef"; - else if (k == TokenKind_pp_directive_warning) - return "#warning"; - else if (k == TokenKind_pp_operator_defined) - return "defined"; - else if (k == TokenKind_pp_operator___has_c_attribute) - return "__has_c_attribute"; - else if (k == TokenKind_pp_operator___has_embed) - return "__has_embed"; - else if (k == TokenKind_pp_operator___has_include) - return "__has_include"; - else if (k == TokenKind_keyword_alignas) - return "alignas"; - else if (k == TokenKind_keyword_alignof) - return "alignof"; - else if (k == TokenKind_keyword_auto) - return "auto"; - else if (k == TokenKind_keyword_bool) - return "bool"; - else if (k == TokenKind_keyword_break) - return "break"; - else if (k == TokenKind_keyword_case) - return "case"; - else if (k == TokenKind_keyword_char) - return "char"; - else if (k == TokenKind_keyword_const) - return "const"; - else if (k == TokenKind_keyword_constexpr) - return "constexpr"; - else if (k == TokenKind_keyword_continue) - return "continue"; - else if (k == TokenKind_keyword_default) - return "default"; - else if (k == TokenKind_keyword_do) - return "do"; - else if (k == TokenKind_keyword_double) - return "double"; - else if (k == TokenKind_keyword_else) - return "else"; - else if (k == TokenKind_keyword_enum) - return "enum"; - else if (k == TokenKind_keyword_extern) - return "extern"; - else if (k == TokenKind_keyword_false) - return "false"; - else if (k == TokenKind_keyword_float) - return "float"; - else if (k == TokenKind_keyword_for) - return "for"; - else if (k == TokenKind_keyword_goto) - return "goto"; - else if (k == TokenKind_keyword_if) - return "if"; - else if (k == TokenKind_keyword_inline) - return "inline"; - else if (k == TokenKind_keyword_int) - return "int"; - else if (k == TokenKind_keyword_long) - return "long"; - else if (k == TokenKind_keyword_nullptr) - return "nullptr"; - else if (k == TokenKind_keyword_register) - return "register"; - else if (k == TokenKind_keyword_restrict) - return "restrict"; - else if (k == TokenKind_keyword_return) - return "return"; - else if (k == TokenKind_keyword_short) - return "short"; - else if (k == TokenKind_keyword_signed) - return "signed"; - else if (k == TokenKind_keyword_sizeof) - return "sizeof"; - else if (k == TokenKind_keyword_static) - return "static"; - else if (k == TokenKind_keyword_static_assert) - return "static_assert"; - else if (k == TokenKind_keyword_struct) - return "struct"; - else if (k == TokenKind_keyword_switch) - return "switch"; - else if (k == TokenKind_keyword_thread_local) - return "thread_local"; - else if (k == TokenKind_keyword_true) - return "true"; - else if (k == TokenKind_keyword_typedef) - return "typedef"; - else if (k == TokenKind_keyword_typeof) - return "typeof"; - else if (k == TokenKind_keyword_typeof_unqual) - return "typeof_unqual"; - else if (k == TokenKind_keyword_union) - return "union"; - else if (k == TokenKind_keyword_unsigned) - return "unsigned"; - else if (k == TokenKind_keyword_void) - return "void"; - else if (k == TokenKind_keyword_volatile) - return "volatile"; - else if (k == TokenKind_keyword_while) - return "while"; - else if (k == TokenKind_keyword__Atomic) - return "_Atomic"; - else if (k == TokenKind_keyword__BitInt) - return "_BitInt"; - else if (k == TokenKind_keyword__Complex) - return "_Complex"; - else if (k == TokenKind_keyword__Decimal128) - return "_Decimal128"; - else if (k == TokenKind_keyword__Decimal32) - return "_Decimal32"; - else if (k == TokenKind_keyword__Decimal64) - return "_Decimal64"; - else if (k == TokenKind_keyword__Generic) - return "_Generic"; - else if (k == TokenKind_keyword__Imaginary) - return "_Imaginary"; - else if (k == TokenKind_keyword__Noreturn) - return "_Noreturn"; - else if (k == TokenKind_and) - return "&"; - else if (k == TokenKind_andand) - return "&&"; - else if (k == TokenKind_arrow) - return "->"; - else if (k == TokenKind_assign) - return "="; - else if (k == TokenKind_assign_add) - return "+="; - else if (k == TokenKind_assign_and) - return "&="; - else if (k == TokenKind_assign_div) - return "/="; - else if (k == TokenKind_assign_lshift) - return "<<="; - else if (k == TokenKind_assign_mod) - return "%="; - else if (k == TokenKind_assign_mul) - return "*="; - else if (k == TokenKind_assign_or) - return "|="; - else if (k == TokenKind_assign_rshift) - return ">>="; - else if (k == TokenKind_assign_sub) - return "-="; - else if (k == TokenKind_assign_xor) - return "^="; - else if (k == TokenKind_brace_l) - return "{"; - else if (k == TokenKind_brace_r) - return "}"; - else if (k == TokenKind_bracket_l) - return "["; - else if (k == TokenKind_bracket_r) - return "]"; - else if (k == TokenKind_colon) - return ":"; - else if (k == TokenKind_comma) - return ","; - else if (k == TokenKind_dot) - return "."; - else if (k == TokenKind_ellipsis) - return "..."; - else if (k == TokenKind_eq) - return "=="; - else if (k == TokenKind_ge) - return ">="; - else if (k == TokenKind_gt) - return ">"; - else if (k == TokenKind_ident) - return "<identifier>"; - else if (k == TokenKind_le) - return "le"; - else if (k == TokenKind_literal_int) - return "<integer>"; - else if (k == TokenKind_literal_str) - return "<string>"; - else if (k == TokenKind_lshift) - return "<<"; - else if (k == TokenKind_lt) - return "lt"; - else if (k == TokenKind_minus) - return "-"; - else if (k == TokenKind_minusminus) - return "--"; - else if (k == TokenKind_ne) - return "!="; - else if (k == TokenKind_not) - return "!"; - else if (k == TokenKind_or) - return "|"; - else if (k == TokenKind_oror) - return "||"; - else if (k == TokenKind_paren_l) - return "("; - else if (k == TokenKind_paren_r) - return ")"; - else if (k == TokenKind_percent) - return "%"; - else if (k == TokenKind_plus) - return "+"; - else if (k == TokenKind_plusplus) - return "++"; - else if (k == TokenKind_question) - return "?"; - else if (k == TokenKind_rshift) - return ">>"; - else if (k == TokenKind_semicolon) - return ";"; - else if (k == TokenKind_slash) - return "/"; - else if (k == TokenKind_star) - return "*"; - else if (k == TokenKind_tilde) - return "~"; - else if (k == TokenKind_xor) - return "^"; - else - unreachable(); -} - -// TokenValue is externally tagged by Token's kind. -union TokenValue { - const char* string; - int integer; -}; -typedef union TokenValue TokenValue; - -struct Token { - TokenKind kind; - TokenValue value; - SourceLocation loc; -}; -typedef struct Token Token; - -const char* token_stringify(Token* t) { - TokenKind k = t->kind; - if (k == TokenKind_literal_int) { - const char* kind_str = token_kind_stringify(k); - char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%d (%s)", t->value.integer, kind_str); - return buf; - } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || - k == TokenKind_literal_int || k == TokenKind_literal_str) { - const char* kind_str = token_kind_stringify(k); - char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%s (%s)", t->value.string, kind_str); - return buf; - } else { - return token_kind_stringify(k); - } -} - -struct TokenArray { - size_t len; - size_t capacity; - Token* data; -}; -typedef struct TokenArray TokenArray; - -void tokens_init(TokenArray* tokens, size_t capacity) { - tokens->len = 0; - tokens->capacity = capacity; - tokens->data = calloc(tokens->capacity, sizeof(Token)); -} - -void tokens_reserve(TokenArray* tokens, size_t size) { - if (size <= tokens->capacity) - return; - tokens->capacity *= 2; - tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); - memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); -} - -Token* tokens_push_new(TokenArray* tokens) { - tokens_reserve(tokens, tokens->len + 1); - return &tokens->data[tokens->len++]; -} - -Token* tokens_pop(TokenArray* tokens) { - if (tokens->len != 0) - tokens->len--; -} - -enum MacroKind { - MacroKind_undef, - MacroKind_obj, - MacroKind_func, - MacroKind_builtin_file, - MacroKind_builtin_line, -}; -typedef enum MacroKind MacroKind; - -const char* macro_kind_stringify(MacroKind kind) { - if (kind == MacroKind_undef) - return "undef"; - else if (kind == MacroKind_obj) - return "object-like"; - else if (kind == MacroKind_func) - return "function-like"; - else if (kind == MacroKind_builtin_file) - return "__FILE__"; - else if (kind == MacroKind_builtin_line) - return "__LINE__"; - else - unreachable(); -} - -struct Macro { - MacroKind kind; - const char* name; - TokenArray parameters; - TokenArray replacements; -}; -typedef struct Macro Macro; - -int macro_find_param(Macro* macro, Token* tok) { - if (tok->kind != TokenKind_ident) - return -1; - - for (int i = 0; i < macro->parameters.len; ++i) { - if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { - return i; - } - } - return -1; -} - -struct MacroArray { - size_t len; - size_t capacity; - Macro* data; -}; -typedef struct MacroArray MacroArray; - -MacroArray* macros_new() { - MacroArray* macros = calloc(1, sizeof(MacroArray)); - macros->len = 0; - macros->capacity = 8; - macros->data = calloc(macros->capacity, sizeof(Macro)); - return macros; -} - -void macros_reserve(MacroArray* macros, size_t size) { - if (size <= macros->capacity) - return; - macros->capacity *= 2; - macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); - memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); -} - -Macro* macros_push_new(MacroArray* macros) { - macros_reserve(macros, macros->len + 1); - return ¯os->data[macros->len++]; -} - -void macros_dump(MacroArray* macros) { - fprintf(stderr, "MacroArray {\n"); - fprintf(stderr, " len = %zu\n", macros->len); - fprintf(stderr, " data = [\n"); - for (int i = 0; i < macros->len; ++i) { - Macro* m = ¯os->data[i]; - fprintf(stderr, " Macro {\n"); - fprintf(stderr, " kind = %s\n", macro_kind_stringify(m->kind)); - fprintf(stderr, " name = %s\n", m->name); - fprintf(stderr, " replacements = TODO\n"); - fprintf(stderr, " }\n"); - } - fprintf(stderr, " ]\n"); - fprintf(stderr, "}\n"); -} - -void add_predefined_macros(MacroArray* macros) { - Macro* m; - - m = macros_push_new(macros); - m->kind = MacroKind_obj; - m->name = "__ducc__"; - tokens_init(&m->replacements, 1); - Token* tok = tokens_push_new(&m->replacements); - tok->kind = TokenKind_literal_int; - tok->value.integer = 1; - - m = macros_push_new(macros); - m->kind = MacroKind_builtin_file; - m->name = "__FILE__"; - - m = macros_push_new(macros); - m->kind = MacroKind_builtin_line; - m->name = "__LINE__"; -} - -struct MacroArg { - TokenArray tokens; -}; -typedef struct MacroArg MacroArg; - -struct MacroArgArray { - size_t len; - size_t capacity; - MacroArg* data; -}; -typedef struct MacroArgArray MacroArgArray; - -MacroArgArray* macroargs_new() { - MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); - macroargs->len = 0; - macroargs->capacity = 2; - macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); - return macroargs; -} - -void macroargs_reserve(MacroArgArray* macroargs, size_t size) { - if (size <= macroargs->capacity) - return; - macroargs->capacity *= 2; - macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); - memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); -} - -MacroArg* macroargs_push_new(MacroArgArray* macroargs) { - macroargs_reserve(macroargs, macroargs->len + 1); - return ¯oargs->data[macroargs->len++]; -} - -struct PpLexer { - InFile* src; - BOOL at_bol; - BOOL expect_header_name; - TokenArray* pp_tokens; -}; -typedef struct PpLexer PpLexer; - -PpLexer* pplexer_new(InFile* src) { - PpLexer* ppl = calloc(1, sizeof(PpLexer)); - - ppl->src = src; - ppl->at_bol = TRUE; - ppl->expect_header_name = FALSE; - ppl->pp_tokens = calloc(1, sizeof(TokenArray)); - tokens_init(ppl->pp_tokens, 1024 * 16); - - return ppl; -} - -TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) { - // Skip whitespaces after '#'. - char c; - while (isspace((c = infile_peek_char(ppl->src)))) { - if (c == '\n') - break; - infile_next_char(ppl->src); - } - - SourceLocation pp_directive_name_start_loc = ppl->src->loc; - - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(ppl->src))) { - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - infile_next_char(ppl->src); - } - const char* pp_directive_name = builder.buf; - - if (builder.len == 0) { - return TokenKind_hash; - } else if (strcmp(pp_directive_name, "define") == 0) { - return TokenKind_pp_directive_define; - } else if (strcmp(pp_directive_name, "elif") == 0) { - return TokenKind_pp_directive_elif; - } else if (strcmp(pp_directive_name, "elifdef") == 0) { - return TokenKind_pp_directive_elifdef; - } else if (strcmp(pp_directive_name, "elifndef") == 0) { - return TokenKind_pp_directive_elifndef; - } else if (strcmp(pp_directive_name, "else") == 0) { - return TokenKind_pp_directive_else; - } else if (strcmp(pp_directive_name, "embed") == 0) { - return TokenKind_pp_directive_embed; - } else if (strcmp(pp_directive_name, "endif") == 0) { - return TokenKind_pp_directive_endif; - } else if (strcmp(pp_directive_name, "error") == 0) { - return TokenKind_pp_directive_error; - } else if (strcmp(pp_directive_name, "if") == 0) { - return TokenKind_pp_directive_if; - } else if (strcmp(pp_directive_name, "ifdef") == 0) { - return TokenKind_pp_directive_ifdef; - } else if (strcmp(pp_directive_name, "ifndef") == 0) { - return TokenKind_pp_directive_ifndef; - } else if (strcmp(pp_directive_name, "include") == 0) { - ppl->expect_header_name = TRUE; - return TokenKind_pp_directive_include; - } else if (strcmp(pp_directive_name, "line") == 0) { - return TokenKind_pp_directive_line; - } else if (strcmp(pp_directive_name, "pragma") == 0) { - return TokenKind_pp_directive_pragma; - } else if (strcmp(pp_directive_name, "undef") == 0) { - return TokenKind_pp_directive_undef; - } else if (strcmp(pp_directive_name, "warning") == 0) { - return TokenKind_pp_directive_warning; - } else { - fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename, - pp_directive_name_start_loc.line, pp_directive_name); - } -} - -void pplexer_tokenize_all(PpLexer* ppl) { - while (!infile_eof(ppl->src)) { - Token* tok = tokens_push_new(ppl->pp_tokens); - tok->loc = ppl->src->loc; - char c = infile_peek_char(ppl->src); - - if (ppl->expect_header_name && c == '"') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '"'); - while (1) { - char ch = infile_peek_char(ppl->src); - if (ch == '"') - break; - strbuilder_append_char(&builder, ch); - if (ch == '\\') { - infile_next_char(ppl->src); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - } - infile_next_char(ppl->src); - } - strbuilder_append_char(&builder, '"'); - infile_next_char(ppl->src); - tok->kind = TokenKind_header_name; - tok->value.string = builder.buf; - ppl->expect_header_name = FALSE; - } else if (ppl->expect_header_name && c == '<') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '<'); - while (1) { - char ch = infile_peek_char(ppl->src); - if (ch == '>') - break; - strbuilder_append_char(&builder, ch); - infile_next_char(ppl->src); - } - strbuilder_append_char(&builder, '>'); - infile_next_char(ppl->src); - tok->kind = TokenKind_header_name; - tok->value.string = builder.buf; - ppl->expect_header_name = FALSE; - } else if (c == '(') { - infile_next_char(ppl->src); - tok->kind = TokenKind_paren_l; - } else if (c == ')') { - infile_next_char(ppl->src); - tok->kind = TokenKind_paren_r; - } else if (c == '{') { - infile_next_char(ppl->src); - tok->kind = TokenKind_brace_l; - } else if (c == '}') { - infile_next_char(ppl->src); - tok->kind = TokenKind_brace_r; - } else if (c == '[') { - infile_next_char(ppl->src); - tok->kind = TokenKind_bracket_l; - } else if (c == ']') { - infile_next_char(ppl->src); - tok->kind = TokenKind_bracket_r; - } else if (c == ',') { - infile_next_char(ppl->src); - tok->kind = TokenKind_comma; - } else if (c == ':') { - infile_next_char(ppl->src); - tok->kind = TokenKind_colon; - } else if (c == ';') { - infile_next_char(ppl->src); - tok->kind = TokenKind_semicolon; - } else if (c == '^') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_xor; - } else { - tok->kind = TokenKind_xor; - } - } else if (c == '?') { - infile_next_char(ppl->src); - tok->kind = TokenKind_question; - } else if (c == '~') { - infile_next_char(ppl->src); - tok->kind = TokenKind_tilde; - } else if (c == '+') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_add; - } else if (infile_consume_if(ppl->src, '+')) { - tok->kind = TokenKind_plusplus; - } else { - tok->kind = TokenKind_plus; - } - } else if (c == '|') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_or; - } else if (infile_consume_if(ppl->src, '|')) { - tok->kind = TokenKind_oror; - } else { - tok->kind = TokenKind_or; - } - } else if (c == '&') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_and; - } else if (infile_consume_if(ppl->src, '&')) { - tok->kind = TokenKind_andand; - } else { - tok->kind = TokenKind_and; - } - } else if (c == '-') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '>')) { - tok->kind = TokenKind_arrow; - } else if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_sub; - } else if (infile_consume_if(ppl->src, '-')) { - tok->kind = TokenKind_minusminus; - } else { - tok->kind = TokenKind_minus; - } - } else if (c == '*') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_mul; - } else { - tok->kind = TokenKind_star; - } - } else if (c == '/') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_div; - } else if (infile_consume_if(ppl->src, '/')) { - while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') { - infile_next_char(ppl->src); - } - tok->kind = TokenKind_whitespace; - } else if (infile_consume_if(ppl->src, '*')) { - while (infile_peek_char(ppl->src)) { - if (infile_consume_if(ppl->src, '*')) { - if (infile_consume_if(ppl->src, '/')) { - break; - } - continue; - } - infile_next_char(ppl->src); - } - tok->kind = TokenKind_whitespace; - } else { - tok->kind = TokenKind_slash; - } - } else if (c == '%') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_mod; - } else { - tok->kind = TokenKind_percent; - } - } else if (c == '.') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '.')) { - if (infile_consume_if(ppl->src, '.')) { - tok->kind = TokenKind_ellipsis; - } else { - tok->kind = TokenKind_other; - tok->value.string = ".."; - } - } else { - tok->kind = TokenKind_dot; - } - } else if (c == '!') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_ne; - } else { - tok->kind = TokenKind_not; - } - } else if (c == '=') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_eq; - } else { - tok->kind = TokenKind_assign; - } - } else if (c == '<') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_le; - } else if (infile_consume_if(ppl->src, '<')) { - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_lshift; - } else { - tok->kind = TokenKind_lshift; - } - } else { - tok->kind = TokenKind_lt; - } - } else if (c == '>') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_ge; - } else if (infile_consume_if(ppl->src, '>')) { - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_rshift; - } else { - tok->kind = TokenKind_rshift; - } - } else { - tok->kind = TokenKind_gt; - } - } else if (c == '#') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '#')) { - tok->kind = TokenKind_hashhash; - } else { - tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash; - } - } else if (c == '\'') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '\''); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - if (infile_peek_char(ppl->src) == '\\') { - infile_next_char(ppl->src); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - } - strbuilder_append_char(&builder, '\''); - infile_next_char(ppl->src); - infile_next_char(ppl->src); - tok->kind = TokenKind_character_constant; - tok->value.string = builder.buf; - } else if (c == '"') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - while (1) { - char ch = infile_peek_char(ppl->src); - if (ch == '"') - break; - strbuilder_append_char(&builder, ch); - if (ch == '\\') { - infile_next_char(ppl->src); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - } - infile_next_char(ppl->src); - } - infile_next_char(ppl->src); - tok->kind = TokenKind_literal_str; - tok->value.string = builder.buf; - } else if (isdigit(c)) { - StrBuilder builder; - strbuilder_init(&builder); - while (isdigit(infile_peek_char(ppl->src))) { - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - infile_next_char(ppl->src); - } - tok->kind = TokenKind_literal_int; - tok->value.integer = atoi(builder.buf); - } else if (isalpha(c) || c == '_') { - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') { - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - infile_next_char(ppl->src); - } - tok->kind = TokenKind_ident; - tok->value.string = builder.buf; - } else if (c == '\n') { - infile_next_char(ppl->src); - tok->kind = TokenKind_newline; - } else if (isspace(c)) { - while (isspace((c = infile_peek_char(ppl->src)))) { - if (c == '\n') - break; - infile_next_char(ppl->src); - } - if (ppl->at_bol && infile_peek_char(ppl->src) == '#') { - infile_next_char(ppl->src); - tok->kind = pplexer_tokenize_pp_directive(ppl); - } else { - tok->kind = TokenKind_whitespace; - } - } else { - infile_next_char(ppl->src); - tok->kind = TokenKind_other; - char* buf = calloc(2, sizeof(char)); - buf[0] = c; - tok->value.string = buf; - } - ppl->at_bol = tok->kind == TokenKind_newline; - } - Token* eof_tok = tokens_push_new(ppl->pp_tokens); - eof_tok->loc = ppl->src->loc; - eof_tok->kind = TokenKind_eof; -} - -TokenArray* pp_tokenize(InFile* src) { - PpLexer* ppl = pplexer_new(src); - pplexer_tokenize_all(ppl); - return ppl->pp_tokens; -} - -struct Preprocessor { - TokenArray* pp_tokens; - int pos; - MacroArray* macros; - int include_depth; - BOOL skip_pp_tokens; - char** include_paths; - int n_include_paths; -}; -typedef struct Preprocessor Preprocessor; - -TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros); - -Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros) { - if (include_depth >= 32) { - fatal_error("include depth limit exceeded"); - } - - Preprocessor* pp = calloc(1, sizeof(Preprocessor)); - pp->pp_tokens = pp_tokens; - pp->macros = macros; - pp->include_depth = include_depth; - pp->include_paths = calloc(16, sizeof(char*)); - - return pp; -} - -Token* pp_token_at(Preprocessor* pp, int i) { - return &pp->pp_tokens->data[i]; -} - -Token* peek_pp_token(Preprocessor* pp) { - return pp_token_at(pp, pp->pos); -} - -Token* next_pp_token(Preprocessor* pp) { - return pp_token_at(pp, pp->pos++); -} - -BOOL pp_eof(Preprocessor* pp) { - return peek_pp_token(pp)->kind == TokenKind_eof; -} - -int find_macro(Preprocessor* pp, const char* name) { - for (int i = 0; i < pp->macros->len; ++i) { - if (pp->macros->data[i].kind == MacroKind_undef) - continue; - if (strcmp(pp->macros->data[i].name, name) == 0) { - return i; - } - } - return -1; -} - -void undef_macro(Preprocessor* pp, int idx) { - pp->macros->data[idx].kind = MacroKind_undef; - // TODO: Can predefined macro like __FILE__ be undefined? -} - -void add_include_path(Preprocessor* pp, char* include_path) { - pp->include_paths[pp->n_include_paths] = include_path; - ++pp->n_include_paths; -} - -BOOL skip_pp_tokens(Preprocessor* pp) { - // TODO: support nested #if - return pp->skip_pp_tokens; -} - -void skip_whitespaces(Preprocessor* pp) { - while (!pp_eof(pp) && peek_pp_token(pp)->kind == TokenKind_whitespace) { - next_pp_token(pp); - } -} - -void seek_to_next_newline(Preprocessor* pp) { - while (!pp_eof(pp)) { - Token* tok = peek_pp_token(pp); - if (tok->kind == TokenKind_newline) { - break; - } - next_pp_token(pp); - } -} - -void make_token_whitespace(Token* tok) { - tok->kind = TokenKind_whitespace; - tok->value.string = NULL; -} - -void remove_directive_tokens(Preprocessor* pp, int start, int end) { - for (int i = start; i < end; ++i) { - make_token_whitespace(pp_token_at(pp, i)); - } -} - -void process_endif_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - pp->skip_pp_tokens = FALSE; - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -void process_else_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - pp->skip_pp_tokens = !pp->skip_pp_tokens; - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -void process_elif_directive(Preprocessor* pp, int directive_token_pos) { - unimplemented(); -} - -BOOL pp_eval_constant_expression(TokenArray*); -int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); -BOOL expand_macro(Preprocessor*); - -void process_if_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - int condition_expression_start_pos = pp->pos; - - while (!pp_eof(pp)) { - Token* tok = peek_pp_token(pp); - if (tok->kind == TokenKind_newline) { - break; - } else if (tok->kind == TokenKind_ident) { - if (strcmp(tok->value.string, "defined") == 0) { - int defined_pos = pp->pos; - // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')' - // 'defined' <ws>* <ident> - next_pp_token(pp); - skip_whitespaces(pp); - Token* macro_name; - if (peek_pp_token(pp)->kind == TokenKind_paren_l) { - next_pp_token(pp); - skip_whitespaces(pp); - macro_name = next_pp_token(pp); - if (macro_name->kind != TokenKind_ident) { - fatal_error("invalid defined"); - } - skip_whitespaces(pp); - if (next_pp_token(pp)->kind != TokenKind_paren_r) { - fatal_error("invalid defined"); - } - } else { - macro_name = next_pp_token(pp); - if (macro_name->kind != TokenKind_ident) { - fatal_error("invalid defined"); - } - } - BOOL is_defined = find_macro(pp, macro_name->value.string) != -1; - TokenArray defined_results; - tokens_init(&defined_results, 1); - Token* defined_result = tokens_push_new(&defined_results); - defined_result->kind = TokenKind_literal_int; - defined_result->value.integer = is_defined; - pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); - } else { - BOOL expanded = expand_macro(pp); - if (expanded) { - // A macro may expand to another macro. Re-scan the expanded tokens. - // TODO: if the macro is defined recursively, it causes infinite loop. - } else { - next_pp_token(pp); - } - } - } else { - next_pp_token(pp); - } - } - - // all remaining identifiers other than true (including those lexically identical to keywords such as false) are - // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is converted - // into a token. - for (int pos = condition_expression_start_pos; pos < pp->pos; ++pos) { - Token* tok = pp_token_at(pp, pos); - if (tok->kind == TokenKind_ident) { - BOOL is_true = strcmp(tok->value.string, "true") == 0; - tok->kind = TokenKind_literal_int; - tok->value.integer = is_true; - } - } - - int condition_expression_tokens_len = pp->pos - condition_expression_start_pos; - TokenArray condition_expression_tokens; - // +1 to add EOF token at the end. - tokens_init(&condition_expression_tokens, condition_expression_tokens_len + 1); - for (int i = 0; i < condition_expression_tokens_len; ++i) { - *tokens_push_new(&condition_expression_tokens) = *pp_token_at(pp, condition_expression_start_pos + i); - } - Token* eof_tok = tokens_push_new(&condition_expression_tokens); - eof_tok->kind = TokenKind_eof; - - BOOL result = pp_eval_constant_expression(&condition_expression_tokens); - - pp->skip_pp_tokens = !result; - - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -void process_ifdef_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - skip_whitespaces(pp); - Token* macro_name = peek_pp_token(pp); - if (macro_name->kind == TokenKind_ident) { - next_pp_token(pp); - pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) == -1; - } - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -void process_ifndef_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - skip_whitespaces(pp); - Token* macro_name = peek_pp_token(pp); - if (macro_name->kind == TokenKind_ident) { - next_pp_token(pp); - pp->skip_pp_tokens = find_macro(pp, macro_name->value.string) != -1; - } - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -const char* read_include_header_name(Preprocessor* pp) { - Token* tok = next_pp_token(pp); - if (tok->kind != TokenKind_header_name) { - fatal_error("%s:%d: invalid #include", tok->loc.filename, tok->loc.line); - } - - return tok->value.string; -} - -const char* resolve_include_name(Preprocessor* pp, const char* include_name) { - if (include_name[0] == '"') { - return strndup(include_name + 1, strlen(include_name) - 2); - } else { - for (int i = 0; i < pp->n_include_paths; ++i) { - char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths[i]) + 1, sizeof(char)); - sprintf(buf, "%s/%.*s", pp->include_paths[i], strlen(include_name) - 2, include_name + 1); - if (access(buf, F_OK | R_OK) == 0) { - return buf; - } - } - return NULL; - } -} - -int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { - int n_tokens_to_remove = dest_end - dest_start; - int n_tokens_after_dest = pp->pp_tokens->len - dest_end; - int shift_amount; - - if (n_tokens_to_remove < source_tokens->len) { - // Move existing tokens backward to make room. - shift_amount = source_tokens->len - n_tokens_to_remove; - tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); - memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), - n_tokens_after_dest * sizeof(Token)); - pp->pp_tokens->len += shift_amount; - } else if (source_tokens->len < n_tokens_to_remove) { - // Move existing tokens forward to reduce room. - shift_amount = n_tokens_to_remove - source_tokens->len; - memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), - n_tokens_after_dest * sizeof(Token)); - pp->pp_tokens->len -= shift_amount; - memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); - } - - memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); - - return dest_start + source_tokens->len; -} - -int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { - TokenArray tokens; - tokens_init(&tokens, 1); - *tokens_push_new(&tokens) = *source_tok; - replace_pp_tokens(pp, dest, dest + 1, &tokens); -} - -void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) { - InFile* include_source = infile_open(include_name); - if (!include_source) { - fatal_error("cannot open include file: %s", include_name); - } - - TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros); - tokens_pop(include_pp_tokens); // pop EOF token - pp->pos = replace_pp_tokens(pp, directive_token_pos, pp->pos, include_pp_tokens); -} - -void process_include_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - skip_whitespaces(pp); - const char* include_name = read_include_header_name(pp); - const char* include_name_resolved = resolve_include_name(pp, include_name); - if (include_name_resolved == NULL) { - fatal_error("cannot resolve include file name: %s", include_name); - } - expand_include_directive(pp, directive_token_pos, include_name_resolved); -} - -// ws ::= many0(<whitespace>) -// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')' -TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { - TokenArray* parameters = calloc(1, sizeof(TokenArray)); - tokens_init(parameters, 2); - - // '(' is consumed by caller. - skip_whitespaces(pp); - Token* tok = next_pp_token(pp); - if (tok->kind == TokenKind_ident) { - *tokens_push_new(parameters) = *tok; - skip_whitespaces(pp); - while (peek_pp_token(pp)->kind == TokenKind_comma) { - next_pp_token(pp); - skip_whitespaces(pp); - tok = next_pp_token(pp); - if (tok->kind != TokenKind_ident) { - fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); - } - *tokens_push_new(parameters) = *tok; - } - tok = next_pp_token(pp); - } - if (tok->kind != TokenKind_paren_r) { - fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); - } - - return parameters; -} - -void process_define_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - skip_whitespaces(pp); - Token* macro_name = next_pp_token(pp); - - if (macro_name->kind != TokenKind_ident) { - fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); - } - - if (peek_pp_token(pp)->kind == TokenKind_paren_l) { - next_pp_token(pp); - TokenArray* parameters = pp_parse_macro_parameters(pp); - int replacements_start_pos = pp->pos; - seek_to_next_newline(pp); - if (pp_eof(pp)) { - fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); - } - Macro* macro = macros_push_new(pp->macros); - macro->kind = MacroKind_func; - macro->name = macro_name->value.string; - macro->parameters = *parameters; - int n_replacements = pp->pos - replacements_start_pos; - tokens_init(¯o->replacements, n_replacements); - for (int i = 0; i < n_replacements; ++i) { - *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); - } - } else { - int replacements_start_pos = pp->pos; - seek_to_next_newline(pp); - if (pp_eof(pp)) { - fatal_error("%s:%d: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line); - } - Macro* macro = macros_push_new(pp->macros); - macro->kind = MacroKind_obj; - macro->name = macro_name->value.string; - int n_replacements = pp->pos - replacements_start_pos; - tokens_init(¯o->replacements, n_replacements); - for (int i = 0; i < n_replacements; ++i) { - *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); - } - } - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -void process_undef_directive(Preprocessor* pp, int directive_token_pos) { - next_pp_token(pp); - skip_whitespaces(pp); - Token* macro_name = peek_pp_token(pp); - if (macro_name->kind == TokenKind_ident) { - next_pp_token(pp); - int macro_idx = find_macro(pp, macro_name->value.string); - if (macro_idx != -1) { - undef_macro(pp, macro_idx); - } - } - remove_directive_tokens(pp, directive_token_pos, pp->pos); -} - -void process_line_directive(Preprocessor* pp, int directive_token_pos) { - unimplemented(); -} - -void process_error_directive(Preprocessor* pp, int directive_token_pos) { - unimplemented(); -} - -void process_pragma_directive(Preprocessor* pp, int directive_token_pos) { - unimplemented(); -} - -// ws ::= many0(<Whitespace>) -// macro-arguments ::= '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')' -MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp) { - MacroArgArray* args = macroargs_new(); - - Token* tok = next_pp_token(pp); - if (tok->kind != TokenKind_paren_l) { - fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); - } - skip_whitespaces(pp); - tok = next_pp_token(pp); - if (tok->kind != TokenKind_paren_r) { - MacroArg* arg = macroargs_push_new(args); - tokens_init(&arg->tokens, 1); - *tokens_push_new(&arg->tokens) = *tok; - skip_whitespaces(pp); - while (peek_pp_token(pp)->kind == TokenKind_comma) { - next_pp_token(pp); - skip_whitespaces(pp); - tok = next_pp_token(pp); - arg = macroargs_push_new(args); - tokens_init(&arg->tokens, 1); - *tokens_push_new(&arg->tokens) = *tok; - } - tok = next_pp_token(pp); - } - if (tok->kind != TokenKind_paren_r) { - fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); - } - - return args; -} - -BOOL expand_macro(Preprocessor* pp) { - int macro_name_pos = pp->pos; - Token* macro_name = next_pp_token(pp); - int macro_idx = find_macro(pp, macro_name->value.string); - if (macro_idx == -1) { - return FALSE; - } - - SourceLocation original_loc = macro_name->loc; - Macro* macro = &pp->macros->data[macro_idx]; - if (macro->kind == MacroKind_func) { - MacroArgArray* args = pp_parse_macro_arguments(pp); - replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); - for (int i = 0; i < macro->replacements.len; ++i) { - Token* tok = pp_token_at(pp, macro_name_pos + i); - int macro_param_idx = macro_find_param(macro, tok); - if (macro_param_idx != -1) { - replace_pp_tokens(pp, macro_name_pos + i, macro_name_pos + i + 1, &args->data[macro_param_idx].tokens); - } - } - // Inherit a source location from the original macro token. - for (int i = 0; i < macro->replacements.len; ++i) { - pp_token_at(pp, macro_name_pos + i)->loc = original_loc; - } - } else if (macro->kind == MacroKind_obj) { - replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); - // Inherit a source location from the original macro token. - for (int i = 0; i < macro->replacements.len; ++i) { - pp_token_at(pp, macro_name_pos + i)->loc = original_loc; - } - } else if (macro->kind == MacroKind_builtin_file) { - Token file_tok; - file_tok.kind = TokenKind_literal_str; - file_tok.value.string = macro_name->loc.filename; - file_tok.loc.filename = NULL; - file_tok.loc.line = 0; - replace_single_pp_token(pp, macro_name_pos, &file_tok); - } else if (macro->kind == MacroKind_builtin_line) { - Token line_tok; - line_tok.kind = TokenKind_literal_int; - line_tok.value.integer = macro_name->loc.line; - line_tok.loc.filename = NULL; - line_tok.loc.line = 0; - replace_single_pp_token(pp, macro_name_pos, &line_tok); - } else { - unreachable(); - } - return TRUE; -} - -void process_pp_directive(Preprocessor* pp) { - int first_token_pos = pp->pos; - Token* tok = peek_pp_token(pp); - if (tok->kind == TokenKind_pp_directive_endif) { - process_endif_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_else) { - process_else_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_elif) { - process_elif_directive(pp, first_token_pos); - } else if (skip_pp_tokens(pp)) { - make_token_whitespace(next_pp_token(pp)); - } else if (tok->kind == TokenKind_pp_directive_if) { - process_if_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_ifdef) { - process_ifdef_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_ifndef) { - process_ifndef_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_include) { - process_include_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_define) { - process_define_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_undef) { - process_undef_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_line) { - process_line_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_error) { - process_error_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_pp_directive_pragma) { - process_pragma_directive(pp, first_token_pos); - } else if (tok->kind == TokenKind_ident) { - BOOL expanded = expand_macro(pp); - if (expanded) { - // A macro may expand to another macro. Re-scan the expanded tokens. - // TODO: if the macro is defined recursively, it causes infinite loop. - } else { - next_pp_token(pp); - } - } else { - next_pp_token(pp); - } -} - -void process_pp_directives(Preprocessor* pp) { - while (!pp_eof(pp)) { - process_pp_directive(pp); - } -} - -void pp_dump(Token* t, BOOL include_whitespace) { - for (; t->kind != TokenKind_eof; ++t) { - if (t->kind == TokenKind_whitespace && !include_whitespace) { - continue; - } - fprintf(stderr, "%s\n", token_stringify(t)); - } -} - -char* get_ducc_include_path() { - const char* self_dir = get_self_dir(); - char* buf = calloc(strlen(self_dir) + strlen("/include") + 1, sizeof(char)); - sprintf(buf, "%s/include", self_dir); - return buf; -} - -TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros) { - TokenArray* pp_tokens = pp_tokenize(src); - Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros); - add_include_path(pp, get_ducc_include_path()); - add_include_path(pp, "/usr/include/x86_64-linux-gnu"); - add_include_path(pp, "/usr/include"); - process_pp_directives(pp); - return pp->pp_tokens; -} - -TokenArray* preprocess(InFile* src) { - MacroArray* macros = macros_new(); - add_predefined_macros(macros); - return do_preprocess(src, 0, macros); -} |
