diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-05-03 17:29:12 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-05-03 18:42:58 +0900 |
| commit | 3654ce578e6fff53950874adf7e0e4ae0a6eb956 (patch) | |
| tree | 5b6c04273de38dba70b7c25e55da144f5f7c37da /src/cc1/preprocess.c | |
| parent | 1b406b13b03055d2b2d08e8279a4a80c41ca7c20 (diff) | |
| download | ducc-main.tar.gz ducc-main.tar.zst ducc-main.zip | |
Diffstat (limited to 'src/cc1/preprocess.c')
| -rw-r--r-- | src/cc1/preprocess.c | 1559 |
1 files changed, 1559 insertions, 0 deletions
diff --git a/src/cc1/preprocess.c b/src/cc1/preprocess.c new file mode 100644 index 0000000..d050119 --- /dev/null +++ b/src/cc1/preprocess.c @@ -0,0 +1,1559 @@ +#include "preprocess.h" +#include <libgen.h> +#include <unistd.h> +#include "../lib/common.h" +#include "parse.h" +#include "sys.h" +#include "tokenize.h" + +typedef enum { + MacroKind_undef, + MacroKind_obj, + MacroKind_func, + MacroKind_builtin_file, + MacroKind_builtin_line, +} MacroKind; + +const char* macro_kind_stringify(MacroKind kind) { + if (kind == MacroKind_undef) + return "undef"; + else if (kind == MacroKind_obj) + return "object-like"; + else if (kind == MacroKind_func) + return "function-like"; + else if (kind == MacroKind_builtin_file) + return "__FILE__"; + else if (kind == MacroKind_builtin_line) + return "__LINE__"; + else + unreachable(); +} + +typedef struct { + MacroKind kind; + const char* name; + TokenArray parameters; + TokenArray replacements; +} Macro; + +void macro_build_json(JsonBuilder* builder, Macro* macro) { + jsonbuilder_object_start(builder); + + jsonbuilder_object_member_start(builder, "kind"); + jsonbuilder_string(builder, macro_kind_stringify(macro->kind)); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "name"); + jsonbuilder_string(builder, macro->name); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "parameters"); + tokens_build_json(builder, ¯o->parameters); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "replacements"); + tokens_build_json(builder, ¯o->replacements); + jsonbuilder_object_member_end(builder); + + jsonbuilder_object_end(builder); +} + +static int macro_find_param(Macro* macro, Token* tok) { + if (tok->kind != TokenKind_ident) + return -1; + + for (size_t i = 0; i < macro->parameters.len; ++i) { + if (strcmp(macro->parameters.data[i].value.string, tok->value.string) == 0) { + return i; + } + } + return -1; +} + +typedef struct { + size_t len; + size_t capacity; + Macro* data; +} MacroArray; + +static MacroArray* macros_new() { + MacroArray* macros = calloc(1, sizeof(MacroArray)); + macros->len = 0; + macros->capacity = 8; + macros->data = calloc(macros->capacity, sizeof(Macro)); + return macros; +} + +static void macros_reserve(MacroArray* macros, size_t size) { + if (size <= macros->capacity) + return; + while (macros->capacity < size) { + macros->capacity *= 2; + } + macros->data = realloc(macros->data, macros->capacity * sizeof(Macro)); + memset(macros->data + macros->len, 0, (macros->capacity - macros->len) * sizeof(Macro)); +} + +static Macro* macros_push_new(MacroArray* macros) { + macros_reserve(macros, macros->len + 1); + return ¯os->data[macros->len++]; +} + +static void define_macro_to_number(MacroArray* macros, const char* name, int n) { + Macro* m = macros_push_new(macros); + m->kind = MacroKind_obj; + m->name = name; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = n; +} + +static void add_predefined_macros(MacroArray* macros) { + Macro* m; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_file; + m->name = "__FILE__"; + + m = macros_push_new(macros); + m->kind = MacroKind_builtin_line; + m->name = "__LINE__"; + + // Non-standard pre-defined macros. + define_macro_to_number(macros, "__ducc__", 1); + define_macro_to_number(macros, "__x86_64__", 1); + define_macro_to_number(macros, "__LP64__", 1); + + // GCC's predefined macros. Glibc depends on these macros. + // https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + // TODO: uncomment out __LONG_MAX__, etc. once ducc supports 64-bit integer literals. + define_macro_to_number(macros, "__CHAR_BIT__", __CHAR_BIT__); + define_macro_to_number(macros, "__SCHAR_MAX__", __SCHAR_MAX__); + define_macro_to_number(macros, "__WCHAR_MAX__", __WCHAR_MAX__); + define_macro_to_number(macros, "__SHRT_MAX__", __SHRT_MAX__); + define_macro_to_number(macros, "__INT_MAX__", __INT_MAX__); + // define_macro_to_number(macros, "__LONG_MAX__", __LONG_MAX__); + // define_macro_to_number(macros, "__LONG_LONG_MAX__", __LONG_LONG_MAX__); + define_macro_to_number(macros, "__WINT_MAX__", __WINT_MAX__); + // define_macro_to_number(macros, "__SIZE_MAX__", __SIZE_MAX__); + // define_macro_to_number(macros, "__PTRDIFF_MAX__", __PTRDIFF_MAX__); + // define_macro_to_number(macros, "__INTMAX_MAX__", __INTMAX_MAX__); + // define_macro_to_number(macros, "__UINTMAX_MAX__", __UINTMAX_MAX__); + define_macro_to_number(macros, "__SIG_ATOMIC_MAX__", __SIG_ATOMIC_MAX__); + define_macro_to_number(macros, "__INT8_MAX__", __INT8_MAX__); + define_macro_to_number(macros, "__INT16_MAX__", __INT16_MAX__); + define_macro_to_number(macros, "__INT32_MAX__", __INT32_MAX__); + // define_macro_to_number(macros, "__INT64_MAX__", __INT64_MAX__); + define_macro_to_number(macros, "__UINT8_MAX__", __UINT8_MAX__); + define_macro_to_number(macros, "__UINT16_MAX__", __UINT16_MAX__); + define_macro_to_number(macros, "__UINT32_MAX__", __UINT32_MAX__); + // define_macro_to_number(macros, "__UINT64_MAX__", __UINT64_MAX__); + define_macro_to_number(macros, "__INT_LEAST8_MAX__", __INT_LEAST8_MAX__); + define_macro_to_number(macros, "__INT_LEAST16_MAX__", __INT_LEAST16_MAX__); + define_macro_to_number(macros, "__INT_LEAST32_MAX__", __INT_LEAST32_MAX__); + // define_macro_to_number(macros, "__INT_LEAST64_MAX__", __INT_LEAST64_MAX__); + define_macro_to_number(macros, "__UINT_LEAST8_MAX__", __UINT_LEAST8_MAX__); + define_macro_to_number(macros, "__UINT_LEAST16_MAX__", __UINT_LEAST16_MAX__); + define_macro_to_number(macros, "__UINT_LEAST32_MAX__", __UINT_LEAST32_MAX__); + // define_macro_to_number(macros, "__UINT_LEAST64_MAX__", __UINT_LEAST64_MAX__); + define_macro_to_number(macros, "__INT_FAST8_MAX__", __INT_FAST8_MAX__); + // define_macro_to_number(macros, "__INT_FAST16_MAX__", __INT_FAST16_MAX__); + // define_macro_to_number(macros, "__INT_FAST32_MAX__", __INT_FAST32_MAX__); + // define_macro_to_number(macros, "__INT_FAST64_MAX__", __INT_FAST64_MAX__); + define_macro_to_number(macros, "__UINT_FAST8_MAX__", __UINT_FAST8_MAX__); + // define_macro_to_number(macros, "__UINT_FAST16_MAX__", __UINT_FAST16_MAX__); + // define_macro_to_number(macros, "__UINT_FAST32_MAX__", __UINT_FAST32_MAX__); + // define_macro_to_number(macros, "__UINT_FAST64_MAX__", __UINT_FAST64_MAX__); + // define_macro_to_number(macros, "__INTPTR_MAX__", __INTPTR_MAX__); + // define_macro_to_number(macros, "__UINTPTR_MAX__", __UINTPTR_MAX__); + define_macro_to_number(macros, "__WCHAR_MIN__", __WCHAR_MIN__); + define_macro_to_number(macros, "__WINT_MIN__", __WINT_MIN__); + define_macro_to_number(macros, "__SIG_ATOMIC_MIN__", __SIG_ATOMIC_MIN__); + + // GCC's predefined macros not listed in Common Predefined Macros page. + define_macro_to_number(macros, "__DBL_DIG__", __DBL_DIG__); + define_macro_to_number(macros, "__DBL_MANT_DIG__", __DBL_MANT_DIG__); + define_macro_to_number(macros, "__DBL_MAX_10_EXP__", __DBL_MAX_10_EXP__); + define_macro_to_number(macros, "__FLT_DIG__", __FLT_DIG__); + define_macro_to_number(macros, "__FLT_MANT_DIG__", __FLT_MANT_DIG__); + define_macro_to_number(macros, "__FLT_MAX_10_EXP__", __FLT_MAX_10_EXP__); + define_macro_to_number(macros, "__LDBL_DIG__", __LDBL_DIG__); + define_macro_to_number(macros, "__LDBL_MANT_DIG__", __LDBL_MANT_DIG__); + define_macro_to_number(macros, "__LDBL_MAX_10_EXP__", __LDBL_MAX_10_EXP__); +} + +// Accept "FOO" or "FOO=value" +static void define_macro_from_string(MacroArray* macros, const char* def) { + Macro* m = macros_push_new(macros); + m->kind = MacroKind_obj; + + const char* eq = strchr(def, '='); + if (eq) { + // FOO=value format + size_t name_len = eq - def; + char* name = calloc(name_len + 1, sizeof(char)); + memcpy(name, def, name_len); + m->name = name; + + const char* value = eq + 1; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + + // Try to parse as integer + char* num_end; + long int_val = strtol(value, &num_end, 10); + if (value[0] != '\0' && *num_end == '\0') { + tok->kind = TokenKind_literal_int; + tok->value.integer = int_val; + } else { + tok->kind = TokenKind_ident; + tok->value.string = value; + } + } else { + // FOO format (equivalent to FOO=1) + m->name = def; + tokens_init(&m->replacements, 1); + Token* tok = tokens_push_new(&m->replacements); + tok->kind = TokenKind_literal_int; + tok->value.integer = 1; + } +} + +static void add_user_defines(MacroArray* macros, StrArray* user_defines) { + for (size_t i = 0; i < user_defines->len; ++i) { + define_macro_from_string(macros, user_defines->data[i]); + } +} + +void macros_build_json(JsonBuilder* builder, MacroArray* macros) { + jsonbuilder_array_start(builder); + for (size_t i = 0; i < macros->len; ++i) { + jsonbuilder_array_element_start(builder); + macro_build_json(builder, ¯os->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); +} + +typedef struct { + TokenArray tokens; +} MacroArg; + +void macroarg_build_json(JsonBuilder* builder, MacroArg* arg) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "tokens"); + tokens_build_json(builder, &arg->tokens); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +typedef struct { + size_t len; + size_t capacity; + MacroArg* data; +} MacroArgArray; + +static MacroArgArray* macroargs_new() { + MacroArgArray* macroargs = calloc(1, sizeof(MacroArgArray)); + macroargs->len = 0; + macroargs->capacity = 2; + macroargs->data = calloc(macroargs->capacity, sizeof(MacroArg)); + return macroargs; +} + +static void macroargs_reserve(MacroArgArray* macroargs, size_t size) { + if (size <= macroargs->capacity) + return; + while (macroargs->capacity < size) { + macroargs->capacity *= 2; + } + macroargs->data = realloc(macroargs->data, macroargs->capacity * sizeof(MacroArg)); + memset(macroargs->data + macroargs->len, 0, (macroargs->capacity - macroargs->len) * sizeof(MacroArg)); +} + +static MacroArg* macroargs_push_new(MacroArgArray* macroargs) { + macroargs_reserve(macroargs, macroargs->len + 1); + return ¯oargs->data[macroargs->len++]; +} + +void macroargs_build_json(JsonBuilder* builder, MacroArgArray* macroargs) { + jsonbuilder_object_start(builder); + jsonbuilder_object_member_start(builder, "len"); + jsonbuilder_integer(builder, macroargs->len); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_member_start(builder, "data"); + jsonbuilder_array_start(builder); + for (size_t i = 0; i < macroargs->len; ++i) { + jsonbuilder_array_element_start(builder); + macroarg_build_json(builder, ¯oargs->data[i]); + jsonbuilder_array_element_end(builder); + } + jsonbuilder_array_end(builder); + jsonbuilder_object_member_end(builder); + jsonbuilder_object_end(builder); +} + +typedef struct { + TokenArray* pp_tokens; + int pos; + MacroArray* macros; + int include_depth; + StrArray* include_paths; + StrArray* included_files; + bool generate_system_deps; + bool generate_user_deps; +} Preprocessor; + +static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, + StrArray* included_files, bool generate_system_deps, bool generate_user_deps); + +static Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros, + StrArray* include_paths, StrArray* included_files, bool generate_system_deps, + bool generate_user_deps) { + if (include_depth >= 32) { + fatal_error("include depth limit exceeded"); + } + + Preprocessor* pp = calloc(1, sizeof(Preprocessor)); + pp->pp_tokens = pp_tokens; + pp->macros = macros; + pp->include_depth = include_depth; + pp->include_paths = include_paths; + pp->included_files = included_files; + pp->generate_system_deps = generate_system_deps; + pp->generate_user_deps = generate_user_deps; + + return pp; +} + +static Token* pp_token_at(Preprocessor* pp, int i) { + return &pp->pp_tokens->data[i]; +} + +static Token* peek_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos); +} + +static Token* next_pp_token(Preprocessor* pp) { + return pp_token_at(pp, pp->pos++); +} + +static void skip_pp_token(Preprocessor* pp, TokenKind expected) { + Token* tok = next_pp_token(pp); + assert(tok->kind == expected); +} + +static Token* consume_pp_token_if(Preprocessor* pp, TokenKind expected) { + if (peek_pp_token(pp)->kind == expected) { + return next_pp_token(pp); + } else { + return NULL; + } +} + +static Token* consume_pp_token_if_not(Preprocessor* pp, TokenKind non_expected) { + if (peek_pp_token(pp)->kind == non_expected) { + return NULL; + } else { + return next_pp_token(pp); + } +} + +static Token* expect_pp_token(Preprocessor* pp, TokenKind expected) { + Token* tok = next_pp_token(pp); + if (tok->kind == expected) { + return tok; + } + fatal_error("%s:%d: expected '%s', but got '%s'", tok->loc.filename, tok->loc.line, token_kind_stringify(expected), + token_stringify(tok)); +} + +static bool pp_eof(Preprocessor* pp) { + return peek_pp_token(pp)->kind == TokenKind_eof; +} + +static int find_macro(Preprocessor* pp, const char* name) { + for (size_t i = 0; i < pp->macros->len; ++i) { + if (pp->macros->data[i].kind == MacroKind_undef) + continue; + if (strcmp(pp->macros->data[i].name, name) == 0) { + return i; + } + } + return -1; +} + +static void undef_macro(Preprocessor* pp, int idx) { + pp->macros->data[idx].kind = MacroKind_undef; + // TODO: Can predefined macro like __FILE__ be undefined? +} + +static void skip_whitespaces(Preprocessor* pp) { + while (!pp_eof(pp) && consume_pp_token_if(pp, TokenKind_whitespace)) + ; +} + +static void skip_whitespaces_or_newlines(Preprocessor* pp, bool skip_newline) { + while (!pp_eof(pp) && (consume_pp_token_if(pp, TokenKind_whitespace) || + (skip_newline && consume_pp_token_if(pp, TokenKind_newline)))) + ; +} + +// It will not consume a new-line token. +static void seek_to_next_newline(Preprocessor* pp) { + while (!pp_eof(pp) && consume_pp_token_if_not(pp, TokenKind_newline)) + ; +} + +static void make_tokens_removed(Preprocessor* pp, int start, int end) { + for (int i = start; i < end; ++i) { + Token* tok = pp_token_at(pp, i); + tok->kind = TokenKind_removed; + tok->value.string = NULL; + } +} + +static Token* read_include_header_name(Preprocessor* pp) { + Token* tok = next_pp_token(pp); + if (tok->kind != TokenKind_header_name) { + fatal_error("%s:%d: invalid #include, %s", tok->loc.filename, tok->loc.line, token_stringify(tok)); + } + return tok; +} + +static const char* resolve_include_name(Preprocessor* pp, const Token* include_name_token) { + const char* include_name = include_name_token->value.string; + if (include_name[0] == '"') { + char* current_filename = strdup(include_name_token->loc.filename); + const char* current_dir = dirname(current_filename); + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(current_dir) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", current_dir, (int)(strlen(include_name) - 2), include_name + 1); + return buf; + } else { + for (size_t i = 0; i < pp->include_paths->len; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; + } +} + +static const char* resolve_next_include_name(Preprocessor* pp, const Token* include_name_token) { + const char* include_name = include_name_token->value.string; + char* current_filename = strdup(include_name_token->loc.filename); + for (size_t i = 0; i < pp->include_paths->len; ++i) { + char* buf = calloc(strlen(include_name) - 2 + 1 + strlen(pp->include_paths->data[i]) + 1, sizeof(char)); + sprintf(buf, "%s/%.*s", pp->include_paths->data[i], (int)(strlen(include_name) - 2), include_name + 1); + if (strcmp(buf, current_filename) == 0) { + // #include_next skips the same file. + continue; + } + if (access(buf, F_OK | R_OK) == 0) { + return buf; + } + } + return NULL; +} + +static int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray* source_tokens) { + size_t n_tokens_to_remove = dest_end - dest_start; + size_t n_tokens_after_dest = pp->pp_tokens->len - dest_end; + size_t shift_amount; + + if (n_tokens_to_remove < source_tokens->len) { + // Move existing tokens backward to make room. + shift_amount = source_tokens->len - n_tokens_to_remove; + tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount); + memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len += shift_amount; + } else if (source_tokens->len < n_tokens_to_remove) { + // Move existing tokens forward to reduce room. + shift_amount = n_tokens_to_remove - source_tokens->len; + memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end), + n_tokens_after_dest * sizeof(Token)); + pp->pp_tokens->len -= shift_amount; + memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token)); + } + + memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token)); + + return dest_start + source_tokens->len; +} + +static int insert_pp_tokens(Preprocessor* pp, int dest_pos, TokenArray* source_tokens) { + return replace_pp_tokens(pp, dest_pos, dest_pos, source_tokens); +} + +static void replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { + TokenArray tokens; + tokens_init(&tokens, 1); + *tokens_push_new(&tokens) = *source_tok; + replace_pp_tokens(pp, dest, dest + 1, &tokens); +} + +static void expand_include_directive(Preprocessor* pp, const char* include_name, Token* original_include_name_tok) { + InFile* include_source = infile_open(include_name); + if (!include_source) { + fatal_error("%s:%d: cannot open include file: %s", original_include_name_tok->loc.filename, + original_include_name_tok->loc.line, token_stringify(original_include_name_tok)); + } + + TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros, pp->include_paths, + pp->included_files, pp->generate_system_deps, pp->generate_user_deps); + tokens_pop(include_pp_tokens); // pop EOF token + pp->pos = insert_pp_tokens(pp, pp->pos, include_pp_tokens); +} + +// ws ::= many0(<whitespace>) +// macro-parameters ::= '(' <ws> opt(<identifier> <ws> many0(',' <ws> <identifier> <ws>)) ')' +static TokenArray* pp_parse_macro_parameters(Preprocessor* pp) { + TokenArray* parameters = calloc(1, sizeof(TokenArray)); + tokens_init(parameters, 2); + + // '(' is consumed by caller. + skip_whitespaces(pp); + Token* tok = consume_pp_token_if(pp, TokenKind_ident); + if (tok) { + *tokens_push_new(parameters) = *tok; + skip_whitespaces(pp); + while (consume_pp_token_if(pp, TokenKind_comma)) { + skip_whitespaces(pp); + tok = next_pp_token(pp); + if (tok->kind != TokenKind_ident) { + fatal_error("%s:%d: invalid macro syntax", tok->loc.filename, tok->loc.line); + } + *tokens_push_new(parameters) = *tok; + } + } + expect_pp_token(pp, TokenKind_paren_r); + + return parameters; +} + +// ws ::= many0(<whitespace>) +// macro-arguments ::= <ws> '(' <ws> opt(<any-token> <ws> many0(',' <ws> <any-token> <ws>)) ')' +static MacroArgArray* pp_parse_macro_arguments(Preprocessor* pp, bool skip_newline) { + MacroArgArray* args = macroargs_new(); + + skip_whitespaces_or_newlines(pp, skip_newline); + if (!consume_pp_token_if(pp, TokenKind_paren_l)) { + return NULL; + } + skip_whitespaces_or_newlines(pp, skip_newline); + Token* tok = peek_pp_token(pp); + if (!skip_newline && tok->kind == TokenKind_newline) { + expect_pp_token(pp, TokenKind_paren_r); + } + if (tok->kind != TokenKind_paren_r) { + MacroArg* arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 4); + + // Parse argument tokens, handling nested parentheses. + int nesting = 0; + while (true) { + tok = peek_pp_token(pp); + + if (nesting == 0) { + if (tok->kind == TokenKind_paren_r) { + break; + } + if (tok->kind == TokenKind_comma) { + next_pp_token(pp); + skip_whitespaces_or_newlines(pp, skip_newline); + + arg = macroargs_push_new(args); + tokens_init(&arg->tokens, 4); + continue; + } + } + + if (tok->kind == TokenKind_paren_l) { + nesting++; + } else if (tok->kind == TokenKind_paren_r) { + nesting--; + if (nesting < 0) { + break; + } + } + + tok = next_pp_token(pp); + if (tok->kind != TokenKind_removed) { + *tokens_push_new(&arg->tokens) = *tok; + } + + skip_whitespaces_or_newlines(pp, skip_newline); + } + } + expect_pp_token(pp, TokenKind_paren_r); + + return args; +} + +static Token* concat_two_tokens(Token* left, Token* right) { + StrBuilder builder; + strbuilder_init(&builder); + + // Left + if (left->kind == TokenKind_ident) { + strbuilder_append_string(&builder, left->value.string); + } else if (left->kind == TokenKind_literal_int) { + char buf[32]; + sprintf(buf, "%d", left->value.integer); + strbuilder_append_string(&builder, buf); + } else { + strbuilder_append_string(&builder, token_stringify(left)); + } + + // Right + if (right->kind == TokenKind_ident) { + strbuilder_append_string(&builder, right->value.string); + } else if (right->kind == TokenKind_literal_int) { + char buf[32]; + sprintf(buf, "%d", right->value.integer); + strbuilder_append_string(&builder, buf); + } else { + strbuilder_append_string(&builder, token_stringify(right)); + } + + // Concat + Token* result = calloc(1, sizeof(Token)); + + char* endptr; + int val = strtol(builder.buf, &endptr, 10); + if (*endptr == '\0') { + result->kind = TokenKind_literal_int; + result->value.integer = val; + } else { + result->kind = TokenKind_ident; + result->value.string = builder.buf; + } + + return result; +} + +static Token* stringify_tokens(TokenArray* tokens) { + StrBuilder builder; + strbuilder_init(&builder); + + bool prev_whitespace = false; + for (size_t i = 0; i < tokens->len; ++i) { + Token* tok = &tokens->data[i]; + if (tok->kind == TokenKind_whitespace) { + prev_whitespace = true; + continue; + } + + if (prev_whitespace && builder.len > 0) { + strbuilder_append_char(&builder, ' '); + } + prev_whitespace = false; + + const char* str = token_stringify(tok); + + // For string literals and char constants, we need to escape quotes. + if (tok->kind == TokenKind_literal_str || tok->kind == TokenKind_character_constant) { + for (const char* p = str; *p; ++p) { + if (*p == '\\' || *p == '"') { + strbuilder_append_char(&builder, '\\'); + } + strbuilder_append_char(&builder, *p); + } + } else { + strbuilder_append_string(&builder, str); + } + } + + Token* result = calloc(1, sizeof(Token)); + result->kind = TokenKind_literal_str; + result->value.string = builder.buf; + return result; +} + +typedef struct MacroExpansionContext { + // Stack of macro names that have been already expanded. + StrArray already_expanded; +} MacroExpansionContext; + +MacroExpansionContext* macroexpansioncontext_new() { + MacroExpansionContext* ctx = calloc(1, sizeof(MacroExpansionContext)); + strings_init(&ctx->already_expanded); + return ctx; +} + +static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx); + +static void expand_macro_arg(Preprocessor* pp, MacroArg* arg, bool skip_newline, MacroExpansionContext* ctx) { + tokens_push_new(&arg->tokens)->kind = TokenKind_eof; + + Preprocessor* pp2 = preprocessor_new(&arg->tokens, pp->include_depth, pp->macros, pp->include_paths, + pp->included_files, pp->generate_system_deps, pp->generate_user_deps); + + size_t arg_token_count = arg->tokens.len; + size_t processed_token_count = 0; + while (processed_token_count < arg_token_count) { + if (peek_pp_token(pp2)->kind == TokenKind_ident) { + processed_token_count += expand_macro(pp2, skip_newline, ctx); + } else { + next_pp_token(pp2); + processed_token_count += 1; + } + } + + tokens_pop(&arg->tokens); +} + +static int expand_macro(Preprocessor* pp, bool skip_newline, MacroExpansionContext* ctx) { + if (ctx == NULL) { + ctx = macroexpansioncontext_new(); + } + + int macro_name_pos = pp->pos; + Token* macro_name = peek_pp_token(pp); + const char* macro_name_str = macro_name->value.string; + + // Supress expansion if the macro has already been expanded. + for (size_t i = 0; i < ctx->already_expanded.len; ++i) { + if (strcmp(ctx->already_expanded.data[i], macro_name->value.string) == 0) { + next_pp_token(pp); + return 1; + } + } + + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx == -1) { + next_pp_token(pp); + return 1; + } + + SourceLocation original_loc = macro_name->loc; + size_t token_count_before_expansion; + size_t token_count_after_expansion; + Macro* macro = &pp->macros->data[macro_idx]; + if (macro->kind == MacroKind_func) { + next_pp_token(pp); + MacroArgArray* args = pp_parse_macro_arguments(pp, skip_newline); + if (!args) { + // If function-like macro name is not followed by opening parenthesis, it is not a macro invocation. + return pp->pos - macro_name_pos; + } + token_count_before_expansion = pp->pos - macro_name_pos; + replace_pp_tokens(pp, macro_name_pos, pp->pos, ¯o->replacements); + + // Operands of # and ## operators should not be expanded. + bool* no_expand = calloc(macro->parameters.len, sizeof(bool)); + for (size_t i = 0; i < macro->replacements.len; ++i) { + TokenKind kind = macro->replacements.data[i].kind; + if (kind == TokenKind_hashhash) { + Token* lhs = NULL; + for (int j = i - 1; j >= 0; --j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + lhs = ¯o->replacements.data[j]; + break; + } + } + Token* rhs = NULL; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + rhs = ¯o->replacements.data[j]; + break; + } + } + if (lhs) { + int param1 = macro_find_param(macro, lhs); + if (param1 != -1) { + no_expand[param1] = true; + } + } + if (rhs) { + int param2 = macro_find_param(macro, rhs); + if (param2 != -1) { + no_expand[param2] = true; + } + } + } else if (kind == TokenKind_hash) { + Token* operand = NULL; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + operand = ¯o->replacements.data[j]; + break; + } + } + if (operand) { + int param = macro_find_param(macro, operand); + if (param != -1) { + no_expand[param] = true; + } + } + } + } + + // Argument expansion + for (size_t i = 0; i < args->len; ++i) { + if (no_expand[i]) + continue; + MacroArg* arg = &args->data[i]; + expand_macro_arg(pp, arg, skip_newline, ctx); + } + + // Parameter substitution + size_t token_count = 0; + size_t offset = 0; + for (size_t i = 0; i < macro->replacements.len; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i + offset); + + // Handle # operator (stringification) + if (tok->kind == TokenKind_hash) { + size_t param_idx_in_replacements = 0; + for (size_t j = i + 1; j < macro->replacements.len; ++j) { + if (macro->replacements.data[j].kind != TokenKind_whitespace) { + param_idx_in_replacements = j; + break; + } + } + + Token* param_tok = ¯o->replacements.data[param_idx_in_replacements]; + int macro_param_idx = macro_find_param(macro, param_tok); + if (macro_param_idx != -1) { + Token* stringified = stringify_tokens(&args->data[macro_param_idx].tokens); + + // Replace ('#' <whitespace>* <param>) with stringified token. + TokenArray single_token; + tokens_init(&single_token, 1); + *tokens_push_new(&single_token) = *stringified; + + size_t tokens_to_replace = param_idx_in_replacements - i + 1; + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + tokens_to_replace, + &single_token); + token_count += 1; + offset += 1 - tokens_to_replace; + i = param_idx_in_replacements; // Skip to after the parameter + continue; + } + } + + int macro_param_idx = macro_find_param(macro, tok); + if (macro_param_idx != -1) { + size_t arg_token_count = args->data[macro_param_idx].tokens.len; + if (arg_token_count == 0) { + // Empty argument: insert a placemarker token + TokenArray placemarker_token; + tokens_init(&placemarker_token, 1); + Token* pm = tokens_push_new(&placemarker_token); + pm->kind = TokenKind_placemarker; + pm->loc = tok->loc; + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, + &placemarker_token); + token_count += 1; + // offset stays the same (1 - 1 = 0) + } else { + replace_pp_tokens(pp, macro_name_pos + i + offset, macro_name_pos + i + offset + 1, + &args->data[macro_param_idx].tokens); + token_count += arg_token_count; + offset += arg_token_count - 1; + } + } else { + ++token_count; + } + } + + // Handle ## operator + size_t token_count2 = 0; + for (size_t i = 0; i < token_count; ++i) { + int pos = macro_name_pos + i; + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_hashhash) { + // Concatenate previous and next tokens + int lhs_pos = -1; + for (int j = pos - 1; j >= macro_name_pos; --j) { + if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { + lhs_pos = j; + break; + } + } + int rhs_pos = -1; + for (size_t j = pos + 1; j < macro_name_pos + token_count; ++j) { + if (pp_token_at(pp, j)->kind != TokenKind_whitespace) { + rhs_pos = j; + break; + } + } + if (lhs_pos == -1 || rhs_pos == -1) { + fatal_error("%s:%d: invalid usage of ## operator", tok->loc.filename, tok->loc.line); + } + + Token* lhs_tok = pp_token_at(pp, lhs_pos); + Token* rhs_tok = pp_token_at(pp, rhs_pos); + bool lhs_is_placemarker = lhs_tok->kind == TokenKind_placemarker; + bool rhs_is_placemarker = rhs_tok->kind == TokenKind_placemarker; + + TokenArray result_tokens; + tokens_init(&result_tokens, 1); + + if (lhs_is_placemarker && rhs_is_placemarker) { + // Both are placemarkers: result is a placemarker + Token* pm = tokens_push_new(&result_tokens); + pm->kind = TokenKind_placemarker; + pm->loc = tok->loc; + } else if (lhs_is_placemarker) { + // Left is placemarker: result is the right token + *tokens_push_new(&result_tokens) = *rhs_tok; + } else if (rhs_is_placemarker) { + // Right is placemarker: result is the left token + *tokens_push_new(&result_tokens) = *lhs_tok; + } else { + // Neither is placemarker: concatenate them + Token* concatenated = concat_two_tokens(lhs_tok, rhs_tok); + *tokens_push_new(&result_tokens) = *concatenated; + } + + replace_pp_tokens(pp, lhs_pos, rhs_pos + 1, &result_tokens); + token_count -= rhs_pos - lhs_pos; + i -= pos - lhs_pos; + token_count2 -= pos - lhs_pos - 1; + } else { + ++token_count2; + } + } + + // Remove placemarker tokens after ## processing + for (size_t i = 0; i < token_count2; ++i) { + Token* tok = pp_token_at(pp, macro_name_pos + i); + if (tok->kind == TokenKind_placemarker) { + tok->kind = TokenKind_removed; + } + } + + // Inherit a source location from the original macro token. + for (size_t i = 0; i < token_count2; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + token_count_after_expansion = token_count2; + } else if (macro->kind == MacroKind_obj) { + replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, ¯o->replacements); + // Inherit a source location from the original macro token. + for (size_t i = 0; i < macro->replacements.len; ++i) { + pp_token_at(pp, macro_name_pos + i)->loc = original_loc; + } + token_count_before_expansion = 1; + token_count_after_expansion = macro->replacements.len; + } else if (macro->kind == MacroKind_builtin_file) { + Token file_tok; + file_tok.kind = TokenKind_literal_str; + file_tok.value.string = macro_name->loc.filename; + file_tok.loc.filename = NULL; + file_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &file_tok); + token_count_before_expansion = 1; + token_count_after_expansion = 1; + } else if (macro->kind == MacroKind_builtin_line) { + Token line_tok; + line_tok.kind = TokenKind_literal_int; + line_tok.value.integer = macro_name->loc.line; + line_tok.loc.filename = NULL; + line_tok.loc.line = 0; + replace_single_pp_token(pp, macro_name_pos, &line_tok); + token_count_before_expansion = 1; + token_count_after_expansion = 1; + } else { + unreachable(); + } + + // Recursive expansion. + strings_push(&ctx->already_expanded, macro_name_str); + pp->pos = macro_name_pos; + size_t processed_token_count = 0; + while (processed_token_count < token_count_after_expansion) { + if (peek_pp_token(pp)->kind == TokenKind_ident) { + processed_token_count += expand_macro(pp, skip_newline, ctx); + } else { + next_pp_token(pp); + processed_token_count += 1; + } + } + strings_pop(&ctx->already_expanded); + + return token_count_before_expansion; +} + +typedef enum { + GroupDelimiterKind_normal, + GroupDelimiterKind_after_if_directive, + GroupDelimiterKind_after_else_directive, +} GroupDelimiterKind; + +static bool is_delimiter_of_current_group(GroupDelimiterKind delimiter_kind, TokenKind token_kind) { + if (delimiter_kind == GroupDelimiterKind_normal) { + return token_kind == TokenKind_eof; + } else if (delimiter_kind == GroupDelimiterKind_after_if_directive) { + return token_kind == TokenKind_pp_directive_elif || token_kind == TokenKind_pp_directive_elifdef || + token_kind == TokenKind_pp_directive_elifndef || token_kind == TokenKind_pp_directive_else || + token_kind == TokenKind_pp_directive_endif; + } else if (delimiter_kind == GroupDelimiterKind_after_else_directive) { + return token_kind == TokenKind_pp_directive_endif; + } else { + unreachable(); + } +} + +static int replace_pp_tokens(Preprocessor*, int, int, TokenArray*); +static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include); + +static bool preprocess_if_group_or_elif_group(Preprocessor* pp, bool did_include) { + Token* directive = next_pp_token(pp); + + if (directive->kind == TokenKind_pp_directive_if || directive->kind == TokenKind_pp_directive_elif) { + int condition_expr_start_pos = pp->pos; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_newline) { + break; + } else if (tok->kind == TokenKind_ident) { + if (strcmp(tok->value.string, "defined") == 0) { + int defined_pos = pp->pos; + // 'defined' <ws>* '(' <ws>* <ident> <ws>* ')' + // 'defined' <ws>* <ident> + skip_pp_token(pp, TokenKind_ident); + skip_whitespaces(pp); + Token* macro_name; + if (consume_pp_token_if(pp, TokenKind_paren_l)) { + skip_whitespaces(pp); + macro_name = expect_pp_token(pp, TokenKind_ident); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_paren_r); + } else { + macro_name = expect_pp_token(pp, TokenKind_ident); + } + bool is_defined = find_macro(pp, macro_name->value.string) != -1; + TokenArray defined_results; + tokens_init(&defined_results, 1); + Token* defined_result = tokens_push_new(&defined_results); + defined_result->kind = TokenKind_literal_int; + defined_result->value.integer = is_defined; + pp->pos = replace_pp_tokens(pp, defined_pos, pp->pos, &defined_results); + } else { + expand_macro(pp, false, NULL); + } + } else { + next_pp_token(pp); + } + } + + // all remaining identifiers other than true (including those lexically identical to keywords such as false) are + // replaced with the pp-number 0, true is replaced with pp-number 1, and then each preprocessing token is + // converted into a token. + for (int pos = condition_expr_start_pos; pos < pp->pos; ++pos) { + Token* tok = pp_token_at(pp, pos); + if (tok->kind == TokenKind_ident) { + bool is_true = strcmp(tok->value.string, "true") == 0; + tok->kind = TokenKind_literal_int; + tok->value.integer = is_true; + } + } + + int condition_expr_tokens_len = pp->pos - condition_expr_start_pos; + TokenArray condition_expr_tokens; + // +1 to add EOF token at the end. + tokens_init(&condition_expr_tokens, condition_expr_tokens_len + 1); + for (int i = 0; i < condition_expr_tokens_len; ++i) { + *tokens_push_new(&condition_expr_tokens) = *pp_token_at(pp, condition_expr_start_pos + i); + } + Token* eof_tok = tokens_push_new(&condition_expr_tokens); + eof_tok->kind = TokenKind_eof; + + bool do_include = pp_eval_constant_expr(&condition_expr_tokens) && !did_include; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else if (directive->kind == TokenKind_pp_directive_ifdef || directive->kind == TokenKind_pp_directive_elifdef) { + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (!macro_name) { + fatal_error(""); + } + + bool do_include = !did_include && find_macro(pp, macro_name->value.string) != -1; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else if (directive->kind == TokenKind_pp_directive_ifndef || directive->kind == TokenKind_pp_directive_elifndef) { + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (!macro_name) { + fatal_error(""); + } + + bool do_include = !did_include && find_macro(pp, macro_name->value.string) == -1; + include_conditionally(pp, GroupDelimiterKind_after_if_directive, do_include); + return do_include; + } else { + unreachable(); + } +} + +static bool preprocess_if_group(Preprocessor* pp) { + return preprocess_if_group_or_elif_group(pp, false); +} + +static bool preprocess_elif_group(Preprocessor* pp, bool did_include) { + return preprocess_if_group_or_elif_group(pp, did_include); +} + +// elif-groups: +// { elif-group }+ +static bool preprocess_elif_groups_opt(Preprocessor* pp, bool did_include) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || + tok->kind == TokenKind_pp_directive_elifndef) { + did_include |= preprocess_elif_group(pp, did_include); + } else { + break; + } + } + return did_include; +} + +// else-group: +// '#' 'else' group? +static void preprocess_else_group(Preprocessor* pp, bool did_include) { + skip_pp_token(pp, TokenKind_pp_directive_else); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + + include_conditionally(pp, GroupDelimiterKind_after_else_directive, !did_include); +} + +// endif-line: +// '#' 'endif' new-line +static void preprocess_endif_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_endif); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); +} + +// if-section: +// if-group elif-groups? else-group? endif-line +static void preprocess_if_section(Preprocessor* pp) { + bool did_include = preprocess_if_group(pp); + did_include = preprocess_elif_groups_opt(pp, did_include); + if (peek_pp_token(pp)->kind == TokenKind_pp_directive_else) { + preprocess_else_group(pp, did_include); + } + preprocess_endif_directive(pp); +} + +static void preprocess_include_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_include); + skip_whitespaces(pp); + Token* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, + token_stringify(include_name)); + } + + if ((pp->generate_system_deps && include_name->value.string[0] == '<') || + (pp->generate_user_deps && include_name->value.string[0] == '"')) { + bool already_included = false; + for (size_t i = 0; i < pp->included_files->len; ++i) { + if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { + already_included = true; + break; + } + } + if (!already_included) { + strings_push(pp->included_files, include_name_resolved); + } + } + + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + expand_include_directive(pp, include_name_resolved, include_name); +} + +// #include_next is a part of GNU extension. +// https://gcc.gnu.org/onlinedocs/cpp/Wrapper-Headers.html +static void preprocess_include_next_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_include_next); + skip_whitespaces(pp); + Token* include_name = read_include_header_name(pp); + const char* include_name_resolved = resolve_next_include_name(pp, include_name); + if (include_name_resolved == NULL) { + fatal_error("%s:%d: cannot resolve include file name: %s", include_name->loc.filename, include_name->loc.line, + token_stringify(include_name)); + } + + if (include_name->value.string[0] == '"') { + bool already_included = false; + for (size_t i = 0; i < pp->included_files->len; ++i) { + if (strcmp(pp->included_files->data[i], include_name_resolved) == 0) { + already_included = true; + break; + } + } + if (!already_included) { + strings_push(pp->included_files, include_name_resolved); + } + } + + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + expand_include_directive(pp, include_name_resolved, include_name); +} + +static void preprocess_embed_directive(Preprocessor*) { + unimplemented(); +} + +static void preprocess_define_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_define); + skip_whitespaces(pp); + Token* macro_name = expect_pp_token(pp, TokenKind_ident); + + if (consume_pp_token_if(pp, TokenKind_paren_l)) { + TokenArray* parameters = pp_parse_macro_parameters(pp); + skip_whitespaces(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_func; + macro->name = macro_name->value.string; + macro->parameters = *parameters; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + // Remove trailing whitespaces. + for (int i = n_replacements - 1; i >= 0; --i) { + if (macro->replacements.data[i].kind == TokenKind_whitespace) { + tokens_pop(¯o->replacements); + } else { + break; + } + } + } else { + skip_whitespaces(pp); + int replacements_start_pos = pp->pos; + seek_to_next_newline(pp); + Macro* macro = macros_push_new(pp->macros); + macro->kind = MacroKind_obj; + macro->name = macro_name->value.string; + int n_replacements = pp->pos - replacements_start_pos; + tokens_init(¯o->replacements, n_replacements); + for (int i = 0; i < n_replacements; ++i) { + *tokens_push_new(¯o->replacements) = *pp_token_at(pp, replacements_start_pos + i); + } + // Remove trailing whitespaces. + for (int i = n_replacements - 1; i >= 0; --i) { + if (macro->replacements.data[i].kind == TokenKind_whitespace) { + tokens_pop(¯o->replacements); + } else { + break; + } + } + } + expect_pp_token(pp, TokenKind_newline); +} + +static void preprocess_undef_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_undef); + skip_whitespaces(pp); + Token* macro_name = consume_pp_token_if(pp, TokenKind_ident); + if (macro_name) { + int macro_idx = find_macro(pp, macro_name->value.string); + if (macro_idx != -1) { + undef_macro(pp, macro_idx); + } + } +} + +static void preprocess_line_directive(Preprocessor*) { + unimplemented(); +} + +// control-line: +// ... +// '#' 'error' pp-tokens? new-line +static void preprocess_error_directive(Preprocessor* pp) { + // The C23 standard does not specify format of diagnostic message caused by #error. + // Ducc assumes that #error takes exactly one argument consisting of a string literal. + // TODO: output some general message or something else if not. + skip_pp_token(pp, TokenKind_pp_directive_error); + skip_whitespaces(pp); + Token* msg = expect_pp_token(pp, TokenKind_literal_str); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + fatal_error("%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); +} + +// control-line: +// ... +// '#' 'warning' pp-tokens? new-line +static void preprocess_warning_directive(Preprocessor* pp) { + // The C23 standard does not specify format of diagnostic message caused by #warning. + // Ducc assumes that #warning takes exactly one argument consisting of a string literal. + // TODO: output some general message or something else if not. + skip_pp_token(pp, TokenKind_pp_directive_warning); + skip_whitespaces(pp); + Token* msg = expect_pp_token(pp, TokenKind_literal_str); + skip_whitespaces(pp); + expect_pp_token(pp, TokenKind_newline); + fprintf(stderr, "%s:%d: %s", msg->loc.filename, msg->loc.line, msg->value.string); +} + +static void preprocess_pragma_directive(Preprocessor* pp) { + // Ignore all #pragma directives for now. + skip_pp_token(pp, TokenKind_pp_directive_pragma); + seek_to_next_newline(pp); + skip_pp_token(pp, TokenKind_newline); +} + +static void preprocess_nop_directive(Preprocessor* pp) { + skip_pp_token(pp, TokenKind_pp_directive_nop); +} + +static void preprocess_non_directive_directive(Preprocessor* pp) { + Token* tok = peek_pp_token(pp); + // C23 6.10.1.13: + // The execution of a non-directive preprocessing directive results in undefined behavior. + fatal_error("%s:%d: invalid preprocessing directive, '%s'", tok->loc.filename, tok->loc.line, token_stringify(tok)); +} + +static void preprocess_text_line(Preprocessor* pp) { + while (!pp_eof(pp)) { + if (consume_pp_token_if(pp, TokenKind_newline)) { + return; + } + if (consume_pp_token_if_not(pp, TokenKind_ident)) { + continue; + } + + expand_macro(pp, true, NULL); + } + expect_pp_token(pp, TokenKind_newline); +} + +// group-part: +// if-section +// control-line +// '#' non-directive +// text-line +// +// control-line: +// '#' 'include' ... +// '#' 'embed' ... +// '#' 'define' ... +// '#' 'undef' ... +// '#' 'line' ... +// '#' 'error' ... +// '#' 'warning' ... +// '#' 'pragma' ... +// '#' new-line +static void preprocess_group_part(Preprocessor* pp) { + Token* tok = peek_pp_token(pp); + if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || + tok->kind == TokenKind_pp_directive_ifndef) { + preprocess_if_section(pp); + } else if (tok->kind == TokenKind_pp_directive_elif || tok->kind == TokenKind_pp_directive_elifdef || + tok->kind == TokenKind_pp_directive_elifndef || tok->kind == TokenKind_pp_directive_else || + tok->kind == TokenKind_pp_directive_endif) { + fatal_error("%s:%d: unexpected '%s'; no corresponding '#if'*", tok->loc.filename, tok->loc.line, + token_kind_stringify(tok->kind)); + } else if (tok->kind == TokenKind_pp_directive_include) { + preprocess_include_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_include_next) { + preprocess_include_next_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_embed) { + preprocess_embed_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_define) { + preprocess_define_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_undef) { + preprocess_undef_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_line) { + preprocess_line_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_error) { + preprocess_error_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_warning) { + preprocess_warning_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_pragma) { + preprocess_pragma_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_nop) { + preprocess_nop_directive(pp); + } else if (tok->kind == TokenKind_pp_directive_non_directive) { + preprocess_non_directive_directive(pp); + } else { + preprocess_text_line(pp); + } +} + +// group: +// { group-part }+ +static void preprocess_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (is_delimiter_of_current_group(delimiter_kind, tok->kind)) + return; + preprocess_group_part(pp); + } + + if (delimiter_kind != GroupDelimiterKind_normal) { + expect_pp_token(pp, TokenKind_pp_directive_endif); + } +} + +static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) { + assert(delimiter_kind != GroupDelimiterKind_normal); + int nesting = 0; + + while (!pp_eof(pp)) { + Token* tok = peek_pp_token(pp); + if (nesting == 0 && is_delimiter_of_current_group(delimiter_kind, tok->kind)) { + return; + } + if (tok->kind == TokenKind_pp_directive_if || tok->kind == TokenKind_pp_directive_ifdef || + tok->kind == TokenKind_pp_directive_ifndef) { + ++nesting; + } else if (tok->kind == TokenKind_pp_directive_endif) { + --nesting; + } + int first_pos = pp->pos; + seek_to_next_newline(pp); + expect_pp_token(pp, TokenKind_newline); + make_tokens_removed(pp, first_pos, pp->pos); + } + + expect_pp_token(pp, TokenKind_pp_directive_endif); +} + +static void include_conditionally(Preprocessor* pp, GroupDelimiterKind delimiter_kind, bool do_include) { + if (do_include) { + preprocess_group_opt(pp, delimiter_kind); + } else { + skip_group_opt(pp, delimiter_kind); + } +} + +// preprocessing-file: +// group? +static void preprocess_preprocessing_file(Preprocessor* pp) { + preprocess_group_opt(pp, GroupDelimiterKind_normal); +} + +static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) { + seek_to_next_newline(pp); + skip_pp_token(pp, TokenKind_newline); + make_tokens_removed(pp, directive_token_pos, pp->pos); +} + +static void remove_pp_directives(Preprocessor* pp) { + pp->pos = 0; + while (!pp_eof(pp)) { + if (is_pp_directive(peek_pp_token(pp)->kind)) { + remove_pp_directive(pp, pp->pos); + } else { + next_pp_token(pp); + } + } +} + +static char* get_ducc_include_path() { + const char* self_dir = get_self_dir(); + char* buf = calloc(strlen(self_dir) + strlen("/../include") + 1, sizeof(char)); + sprintf(buf, "%s/../include", self_dir); + return buf; +} + +static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* include_paths, + StrArray* included_files, bool generate_system_deps, bool generate_user_deps) { + TokenArray* pp_tokens = tokenize(src); + Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros, include_paths, included_files, generate_system_deps, + generate_user_deps); + + preprocess_preprocessing_file(pp); + remove_pp_directives(pp); + return pp->pp_tokens; +} + +TokenArray* preprocess(InFile* src, StrArray* user_defines, StrArray* user_include_dirs, StrArray* included_files, + bool generate_system_deps, bool generate_user_deps) { + MacroArray* macros = macros_new(); + add_predefined_macros(macros); + add_user_defines(macros, user_defines); + strings_push(included_files, src->loc.filename); + + StrArray* include_paths = calloc(1, sizeof(StrArray)); + strings_init(include_paths); + + // Ducc's built-in headers has highest priority. + strings_push(include_paths, get_ducc_include_path()); + + for (size_t i = 0; i < user_include_dirs->len; ++i) { + strings_push(include_paths, user_include_dirs->data[i]); + } + strings_push(include_paths, "/usr/local/include"); + strings_push(include_paths, "/usr/include/x86_64-linux-gnu"); + strings_push(include_paths, "/usr/include"); + + return do_preprocess(src, 0, macros, include_paths, included_files, generate_system_deps, generate_user_deps); +} + +void concat_adjacent_string_literals(TokenArray* pp_tokens) { + size_t last_nonempty_token_pos = 0; + TokenKind last_nonempty_token_kind = TokenKind_eof; + for (size_t pos = 0; pos < pp_tokens->len; ++pos) { + Token* pp_tok = &pp_tokens->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + if (k == TokenKind_literal_str && last_nonempty_token_kind == TokenKind_literal_str) { + // Concatenate adjacent string literals. + Token* last_pp_tok = &pp_tokens->data[last_nonempty_token_pos]; + const char* s1 = last_pp_tok->value.string; + size_t l1 = strlen(s1); + const char* s2 = pp_tok->value.string; + size_t l2 = strlen(s2); + char* buf = calloc(l1 + l2 + 1, sizeof(char)); + memcpy(buf, s1, l1); + memcpy(buf + l1, s2, l2); + last_pp_tok->value.string = buf; + pp_tok->kind = TokenKind_removed; + } else { + last_nonempty_token_pos = pos; + last_nonempty_token_kind = k; + } + } +} + +void print_token_to_file(FILE* out, TokenArray* pp_tokens) { + for (size_t i = 0; i < pp_tokens->len; ++i) { + Token* tok = &pp_tokens->data[i]; + + if (tok->kind == TokenKind_whitespace) { + // TODO: preserve indent? + fprintf(out, " "); + } else if (tok->kind == TokenKind_removed) { + // Output nothing for removed tokens + } else if (tok->kind == TokenKind_newline) { + // TODO: remove adjacent newlines? + fprintf(out, "\n"); + } else if (tok->kind != TokenKind_eof) { + // TODO: string literal + fprintf(out, "%s", token_stringify(tok)); + // Add space after token if next token is not punctuation + // TODO: apply stricter approach + if (i + 1 < pp_tokens->len) { + Token* next = &pp_tokens->data[i + 1]; + if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace && + next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma && + next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r && + next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r && + next->kind != TokenKind_dot) { + fprintf(out, " "); + } + } + } + } +} |
