diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-03 14:02:54 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-15 10:06:21 +0900 |
| commit | 1364b1303e96221c52568aed0726adc83aee1dc4 (patch) | |
| tree | d4569d48d543e83593fdd4a8fcfc919a65eb47cb /preprocess.c | |
| parent | e1de8fc36f11ac932707c7113eb4bf3ebc4b1f74 (diff) | |
| download | ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.gz ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.zst ducc-1364b1303e96221c52568aed0726adc83aee1dc4.zip | |
refactor: merge PpToken and Token
Diffstat (limited to 'preprocess.c')
| -rw-r--r-- | preprocess.c | 525 |
1 files changed, 366 insertions, 159 deletions
diff --git a/preprocess.c b/preprocess.c index 0978dd3..2caa37c 100644 --- a/preprocess.c +++ b/preprocess.c @@ -1,36 +1,196 @@ -enum PpTokenKind { - PpTokenKind_eof, - - PpTokenKind_header_name, - PpTokenKind_identifier, - PpTokenKind_pp_number, - PpTokenKind_character_constant, - PpTokenKind_string_literal, - PpTokenKind_punctuator, - PpTokenKind_other, - PpTokenKind_whitespace, +enum TokenKind { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_other, + TokenKind_character_constant, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_sub, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_keyword_break, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_continue, + TokenKind_keyword_do, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_for, + TokenKind_keyword_if, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_sizeof, + TokenKind_keyword_struct, + TokenKind_keyword_typeof, + TokenKind_keyword_void, + TokenKind_keyword_while, + TokenKind_le, + TokenKind_lt, + TokenKind_literal_int, + TokenKind_literal_str, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + + // va_start() is currently implemented as a special form due to the current limitation of #define macro. + TokenKind_va_start, }; -typedef enum PpTokenKind PpTokenKind; +typedef enum TokenKind TokenKind; -const char* pp_token_kind_stringify(PpTokenKind kind) { - if (kind == PpTokenKind_eof) +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) return "<eof>"; - else if (kind == PpTokenKind_header_name) - return "<header-name>"; - else if (kind == PpTokenKind_identifier) - return "<identifier>"; - else if (kind == PpTokenKind_pp_number) - return "<pp-number>"; - else if (kind == PpTokenKind_character_constant) - return "<character-constant>"; - else if (kind == PpTokenKind_string_literal) - return "<string-literal>"; - else if (kind == PpTokenKind_punctuator) - return "<punctuator>"; - else if (kind == PpTokenKind_other) - return "<other>"; - else if (kind == PpTokenKind_whitespace) + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) return "<whitespace>"; + else if (k == TokenKind_other) + return "<other>"; + else if (k == TokenKind_character_constant) + return "<character-constant>"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return "<identifier>"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_le) + return "le"; + else if (k == TokenKind_lt) + return "lt"; + else if (k == TokenKind_literal_int) + return "<integer>"; + else if (k == TokenKind_literal_str) + return "<string>"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_va_start) + return "va_start"; else unreachable(); } @@ -41,18 +201,24 @@ struct SourceLocation { }; typedef struct SourceLocation SourceLocation; -struct PpToken { - PpTokenKind kind; +struct Token { + TokenKind kind; String raw; SourceLocation loc; }; -typedef struct PpToken PpToken; +typedef struct Token Token; -const char* pp_token_stringify(PpToken* tok) { - const char* kind_str = pp_token_kind_stringify(tok->kind); - char* buf = calloc(tok->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%.*s (%s)", tok->raw.len, tok->raw.data, kind_str); - return buf; +const char* token_stringify(Token* t) { + TokenKind k = t->kind; + if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_whitespace || + k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(t->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, kind_str); + return buf; + } else { + return token_kind_stringify(k); + } } enum PpMacroKind { @@ -80,7 +246,7 @@ struct PpMacro { PpMacroKind kind; String name; size_t n_replacements; - PpToken* replacements; + Token* replacements; }; typedef struct PpMacro PpMacro; @@ -95,7 +261,7 @@ struct Preprocessor { int line; char* src; int pos; - PpToken* pp_tokens; + Token* pp_tokens; int n_pp_tokens; PpMacros* pp_macros; int include_depth; @@ -105,7 +271,7 @@ struct Preprocessor { }; typedef struct Preprocessor Preprocessor; -PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros); +Token* do_preprocess(InFile* src, int depth, PpMacros* pp_macros); PpMacros* pp_macros_new() { PpMacros* pp_macros = calloc(1, sizeof(PpMacros)); @@ -139,8 +305,8 @@ void add_predefined_macros(PpMacros* pp_macros) { m->name.len = strlen("__ducc__"); m->name.data = "__ducc__"; m->n_replacements = 1; - m->replacements = calloc(1, sizeof(PpToken)); - m->replacements[0].kind = PpTokenKind_pp_number; + m->replacements = calloc(1, sizeof(Token)); + m->replacements[0].kind = TokenKind_literal_int; m->replacements[0].raw.len = strlen("1"); m->replacements[0].raw.data = "1"; pp_macros->len += 1; @@ -158,9 +324,9 @@ void add_predefined_macros(PpMacros* pp_macros) { pp_macros->len += 1; } -int count_pp_tokens(PpToken* pp_tokens) { +int count_pp_tokens(Token* pp_tokens) { int n = 0; - while (pp_tokens[n].kind != PpTokenKind_eof) { + while (pp_tokens[n].kind != TokenKind_eof) { ++n; } return n; @@ -175,7 +341,7 @@ Preprocessor* preprocessor_new(InFile* src, int include_depth, PpMacros* pp_macr pp->filename = src->filename; pp->line = 1; pp->src = src->buf; - pp->pp_tokens = calloc(1024 * 1024, sizeof(PpToken)); + pp->pp_tokens = calloc(1024 * 1024, sizeof(Token)); pp->pp_macros = pp_macros; pp->include_depth = include_depth; pp->include_paths = calloc(16, sizeof(String)); @@ -209,104 +375,104 @@ void pp_tokenize_all(Preprocessor* pp) { int ch; int start; while (pp->src[pp->pos]) { - PpToken* tok = pp->pp_tokens + pp->n_pp_tokens; + Token* tok = pp->pp_tokens + pp->n_pp_tokens; tok->loc.filename = pp->filename; tok->loc.line = pp->line; char c = pp->src[pp->pos]; ++pp->pos; if (c == '(') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_paren_l; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ')') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_paren_r; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '{') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_brace_l; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '}') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_brace_r; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '[') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_bracket_l; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ']') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_bracket_r; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ',') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_comma; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ';') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_semicolon; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '+') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_assign_add; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '+') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_plusplus; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_plus; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '|') { if (pp->src[pp->pos] == '|') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_oror; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_or; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '&') { if (pp->src[pp->pos] == '&') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_andand; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_and; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '-') { if (pp->src[pp->pos] == '>') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_arrow; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_assign_sub; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '-') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_minusminus; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_minus; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '*') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_star; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '/') { @@ -316,7 +482,7 @@ void pp_tokenize_all(Preprocessor* pp) { while (pp->src[pp->pos] && pp->src[pp->pos] != '\n' && pp->src[pp->pos] != '\r') { ++pp->pos; } - tok->kind = PpTokenKind_whitespace; + tok->kind = TokenKind_whitespace; tok->raw.len = pp->pos - start; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '*') { @@ -332,16 +498,16 @@ void pp_tokenize_all(Preprocessor* pp) { } ++pp->pos; } - tok->kind = PpTokenKind_whitespace; + tok->kind = TokenKind_whitespace; tok->raw.len = pp->pos - start; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_slash; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '%') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_percent; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '.') { @@ -349,72 +515,71 @@ void pp_tokenize_all(Preprocessor* pp) { ++pp->pos; if (pp->src[pp->pos] == '.') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_ellipsis; tok->raw.len = 3; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - --pp->pos; - tok->kind = PpTokenKind_punctuator; - tok->raw.len = 1; + tok->kind = TokenKind_other; + tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_dot; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '!') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_ne; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_not; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '=') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_eq; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_assign; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '<') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_le; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_lt; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '>') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_ge; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_gt; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '#') { if (pp->src[pp->pos] == '#') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_hashhash; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_hash; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } @@ -424,7 +589,7 @@ void pp_tokenize_all(Preprocessor* pp) { ++pp->pos; } pp->pos += 2; - tok->kind = PpTokenKind_character_constant; + tok->kind = TokenKind_character_constant; tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; } else if (c == '"') { @@ -439,7 +604,7 @@ void pp_tokenize_all(Preprocessor* pp) { ++pp->pos; } ++pp->pos; - tok->kind = PpTokenKind_string_literal; + tok->kind = TokenKind_literal_str; tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; } else if (isdigit(c)) { @@ -448,7 +613,7 @@ void pp_tokenize_all(Preprocessor* pp) { while (isdigit(pp->src[pp->pos])) { ++pp->pos; } - tok->kind = PpTokenKind_pp_number; + tok->kind = TokenKind_literal_int; tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; } else if (isalpha(c) || c == '_') { @@ -459,16 +624,58 @@ void pp_tokenize_all(Preprocessor* pp) { } tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; - tok->kind = PpTokenKind_identifier; + if (string_equals_cstr(&tok->raw, "break")) { + tok->kind = TokenKind_keyword_break; + } else if (string_equals_cstr(&tok->raw, "char")) { + tok->kind = TokenKind_keyword_char; + } else if (string_equals_cstr(&tok->raw, "const")) { + tok->kind = TokenKind_keyword_const; + } else if (string_equals_cstr(&tok->raw, "continue")) { + tok->kind = TokenKind_keyword_continue; + } else if (string_equals_cstr(&tok->raw, "do")) { + tok->kind = TokenKind_keyword_do; + } else if (string_equals_cstr(&tok->raw, "else")) { + tok->kind = TokenKind_keyword_else; + } else if (string_equals_cstr(&tok->raw, "enum")) { + tok->kind = TokenKind_keyword_enum; + } else if (string_equals_cstr(&tok->raw, "extern")) { + tok->kind = TokenKind_keyword_extern; + } else if (string_equals_cstr(&tok->raw, "for")) { + tok->kind = TokenKind_keyword_for; + } else if (string_equals_cstr(&tok->raw, "if")) { + tok->kind = TokenKind_keyword_if; + } else if (string_equals_cstr(&tok->raw, "int")) { + tok->kind = TokenKind_keyword_int; + } else if (string_equals_cstr(&tok->raw, "long")) { + tok->kind = TokenKind_keyword_long; + } else if (string_equals_cstr(&tok->raw, "return")) { + tok->kind = TokenKind_keyword_return; + } else if (string_equals_cstr(&tok->raw, "short")) { + tok->kind = TokenKind_keyword_short; + } else if (string_equals_cstr(&tok->raw, "sizeof")) { + tok->kind = TokenKind_keyword_sizeof; + } else if (string_equals_cstr(&tok->raw, "struct")) { + tok->kind = TokenKind_keyword_struct; + } else if (string_equals_cstr(&tok->raw, "typedef")) { + tok->kind = TokenKind_keyword_typeof; + } else if (string_equals_cstr(&tok->raw, "void")) { + tok->kind = TokenKind_keyword_void; + } else if (string_equals_cstr(&tok->raw, "while")) { + tok->kind = TokenKind_keyword_while; + } else if (string_equals_cstr(&tok->raw, "va_start")) { + tok->kind = TokenKind_va_start; + } else { + tok->kind = TokenKind_ident; + } } else if (isspace(c)) { if (c == '\n' || c == '\r') { ++pp->line; } - tok->kind = PpTokenKind_whitespace; + tok->kind = TokenKind_whitespace; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_other; + tok->kind = TokenKind_other; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } @@ -476,8 +683,8 @@ void pp_tokenize_all(Preprocessor* pp) { } } -PpToken* skip_whitespace(PpToken* tok) { - while (tok->kind != PpTokenKind_eof && tok->kind == PpTokenKind_whitespace) { +Token* skip_whitespace(Token* tok) { + while (tok->kind != TokenKind_eof && tok->kind == TokenKind_whitespace) { ++tok; } return tok; @@ -493,9 +700,9 @@ int string_contains_newline(String* s) { return 0; } -PpToken* find_next_newline(PpToken* tok) { - while (tok->kind != PpTokenKind_eof) { - if (tok->kind == PpTokenKind_whitespace && string_contains_newline(&tok->raw)) { +Token* find_next_newline(Token* tok) { + while (tok->kind != TokenKind_eof) { + if (tok->kind == TokenKind_whitespace && string_contains_newline(&tok->raw)) { return tok; } ++tok; @@ -503,23 +710,23 @@ PpToken* find_next_newline(PpToken* tok) { return NULL; } -void make_token_whitespace(PpToken* tok) { - tok->kind = PpTokenKind_whitespace; +void make_token_whitespace(Token* tok) { + tok->kind = TokenKind_whitespace; tok->raw.len = 0; tok->raw.data = NULL; } -void remove_directive_tokens(PpToken* start, PpToken* end) { - PpToken* tok = start; +void remove_directive_tokens(Token* start, Token* end) { + Token* tok = start; while (tok != end) { make_token_whitespace(tok); ++tok; } } -PpToken* process_endif_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "endif")) { +Token* process_endif_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "endif")) { ++tok2; pp->skip_pp_tokens = 0; remove_directive_tokens(tok, tok2); @@ -528,9 +735,9 @@ PpToken* process_endif_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_else_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "else")) { +Token* process_else_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_keyword_else) { ++tok2; pp->skip_pp_tokens = 1 - pp->skip_pp_tokens; remove_directive_tokens(tok, tok2); @@ -539,13 +746,13 @@ PpToken* process_else_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_ifdef_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "ifdef")) { +Token* process_ifdef_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "ifdef")) { ++tok2; tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier) { - PpToken* name = tok2; + if (tok2->kind == TokenKind_ident) { + Token* name = tok2; ++tok2; pp->skip_pp_tokens = find_pp_macro(pp, &name->raw) == -1; } @@ -555,13 +762,13 @@ PpToken* process_ifdef_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_ifndef_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "ifndef")) { +Token* process_ifndef_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "ifndef")) { ++tok2; tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier) { - PpToken* name = tok2; + if (tok2->kind == TokenKind_ident) { + Token* name = tok2; ++tok2; pp->skip_pp_tokens = find_pp_macro(pp, &name->raw) != -1; } @@ -571,23 +778,23 @@ PpToken* process_ifndef_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* read_include_header_name(PpToken* tok2, String* include_name) { - if (tok2->kind == PpTokenKind_string_literal) { +Token* read_include_header_name(Token* tok2, String* include_name) { + if (tok2->kind == TokenKind_literal_str) { *include_name = tok2->raw; ++tok2; return tok2; - } else if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, "<")) { + } else if (tok2->kind == TokenKind_lt) { char* include_name_start = tok2->raw.data; ++tok2; int include_name_len = 0; - while (tok2->kind != PpTokenKind_eof) { - if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, ">")) { + while (tok2->kind != TokenKind_eof) { + if (tok2->kind == TokenKind_gt) { break; } include_name_len += tok2->raw.len; ++tok2; } - if (tok2->kind == PpTokenKind_eof) { + if (tok2->kind == TokenKind_eof) { fatal_error("invalid #include: <> not balanced"); } ++tok2; @@ -616,8 +823,8 @@ const char* resolve_include_name(Preprocessor* pp, String* include_name) { } } -PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_end, int n_source_tokens, - PpToken* source_tokens) { +Token* replace_pp_tokens(Preprocessor* pp, Token* dest_start, Token* dest_end, int n_source_tokens, + Token* source_tokens) { int n_tokens_to_remove = dest_end - dest_start; int n_tokens_after_dest = (pp->pp_tokens + pp->n_pp_tokens) - dest_end; int shift_amount; @@ -625,35 +832,35 @@ PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_ if (n_tokens_to_remove < n_source_tokens) { // Move existing tokens backward to make room. shift_amount = n_source_tokens - n_tokens_to_remove; - memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(PpToken)); + memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(Token)); pp->n_pp_tokens += shift_amount; } else if (n_source_tokens < n_tokens_to_remove) { // Move existing tokens forward to reduce room. shift_amount = n_tokens_to_remove - n_source_tokens; - memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(PpToken)); + memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(Token)); pp->n_pp_tokens -= shift_amount; - memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(PpToken)); + memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(Token)); } - memcpy(dest_start, source_tokens, n_source_tokens * sizeof(PpToken)); + memcpy(dest_start, source_tokens, n_source_tokens * sizeof(Token)); return dest_start + n_source_tokens; } -PpToken* expand_include_directive(Preprocessor* pp, PpToken* tok, PpToken* tok2, const char* include_name_buf) { +Token* expand_include_directive(Preprocessor* pp, Token* tok, Token* tok2, const char* include_name_buf) { InFile* include_source = read_all(include_name_buf); if (!include_source) { fatal_error("cannot open include file: %s", include_name_buf); } - PpToken* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_macros); + Token* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_macros); return replace_pp_tokens(pp, tok, tok2 + 1, count_pp_tokens(include_pp_tokens), include_pp_tokens); } -PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); +Token* process_include_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "include")) { + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "include")) { ++tok2; tok2 = skip_whitespace(tok2); String* include_name = calloc(1, sizeof(String)); @@ -667,20 +874,20 @@ PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - PpToken* tok3 = NULL; +Token* process_define_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + Token* tok3 = NULL; PpMacro* pp_macro; int i; - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "define")) { + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "define")) { ++tok2; tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier) { - PpToken* macro_name = tok2; + if (tok2->kind == TokenKind_ident) { + Token* macro_name = tok2; ++tok2; - if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, "(")) { + if (tok2->kind == TokenKind_paren_l) { ++tok2; - if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, ")")) { + if (tok2->kind == TokenKind_paren_r) { ++tok2; } else { fatal_error("%s:%d: invalid function-like macro syntax (#define %.*s)", macro_name->loc.filename, @@ -692,7 +899,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { pp_macro->kind = PpMacroKind_func; pp_macro->name = macro_name->raw; pp_macro->n_replacements = tok3 - tok2; - pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken)); + pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(Token)); for (i = 0; i < pp_macro->n_replacements; ++i) { pp_macro->replacements[i] = tok2[i]; } @@ -705,7 +912,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { pp_macro->kind = PpMacroKind_obj; pp_macro->name = macro_name->raw; pp_macro->n_replacements = tok3 - tok2; - pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken)); + pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(Token)); for (i = 0; i < pp_macro->n_replacements; ++i) { pp_macro->replacements[i] = tok2[i]; } @@ -721,7 +928,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -int expand_macro(Preprocessor* pp, PpToken* tok) { +int expand_macro(Preprocessor* pp, Token* tok) { int pp_macro_idx = find_pp_macro(pp, &tok->raw); if (pp_macro_idx == -1) { return 0; @@ -744,15 +951,15 @@ int expand_macro(Preprocessor* pp, PpToken* tok) { tok[i].loc = original_loc; } } else if (pp_macro->kind == PpMacroKind_builtin_file) { - PpToken* file_tok = calloc(1, sizeof(PpToken)); - file_tok->kind = PpTokenKind_string_literal; + Token* file_tok = calloc(1, sizeof(Token)); + file_tok->kind = TokenKind_literal_str; file_tok->raw.len = strlen(tok->loc.filename) + 2; file_tok->raw.data = calloc(file_tok->raw.len, sizeof(char)); sprintf(file_tok->raw.data, "\"%s\"", tok->loc.filename); replace_pp_tokens(pp, tok, tok + 1, 1, file_tok); } else if (pp_macro->kind == PpMacroKind_builtin_line) { - PpToken* line_tok = calloc(1, sizeof(PpToken)); - line_tok->kind = PpTokenKind_pp_number; + Token* line_tok = calloc(1, sizeof(Token)); + line_tok->kind = TokenKind_literal_int; line_tok->raw.data = calloc(10, sizeof(char)); sprintf(line_tok->raw.data, "%d", tok->loc.line); line_tok->raw.len = strlen(line_tok->raw.data); @@ -764,11 +971,11 @@ int expand_macro(Preprocessor* pp, PpToken* tok) { } void process_pp_directives(Preprocessor* pp) { - PpToken* tok = pp->pp_tokens; + Token* tok = pp->pp_tokens; - while (tok->kind != PpTokenKind_eof) { - if (tok->kind == PpTokenKind_punctuator && string_equals_cstr(&tok->raw, "#")) { - PpToken* next_tok; + while (tok->kind != TokenKind_eof) { + if (tok->kind == TokenKind_hash) { + Token* next_tok; if ((next_tok = process_endif_directive(pp, tok)) != NULL) { tok = next_tok; @@ -797,7 +1004,7 @@ void process_pp_directives(Preprocessor* pp) { } } else if (skip_pp_tokens(pp)) { make_token_whitespace(tok); - } else if (tok->kind == PpTokenKind_identifier) { + } else if (tok->kind == TokenKind_ident) { int expanded = expand_macro(pp, tok); if (expanded) { // A macro may expand to another macro. Re-scan the expanded tokens. @@ -809,12 +1016,12 @@ void process_pp_directives(Preprocessor* pp) { } } -void pp_dump(PpToken* t, int include_whitespace) { - for (; t->kind != PpTokenKind_eof; ++t) { - if (t->kind == PpTokenKind_whitespace && !include_whitespace) { +void pp_dump(Token* t, int include_whitespace) { + for (; t->kind != TokenKind_eof; ++t) { + if (t->kind == TokenKind_whitespace && !include_whitespace) { continue; } - fprintf(stderr, "%s\n", pp_token_stringify(t)); + fprintf(stderr, "%s\n", token_stringify(t)); } } @@ -825,7 +1032,7 @@ char* get_ducc_include_path() { return buf; } -PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) { +Token* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) { Preprocessor* pp = preprocessor_new(src, depth, pp_macros); add_include_path(pp, get_ducc_include_path()); add_include_path(pp, "/usr/include/x86_64-linux-gnu"); @@ -835,7 +1042,7 @@ PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) { return pp->pp_tokens; } -PpToken* preprocess(InFile* src) { +Token* preprocess(InFile* src) { PpMacros* pp_macros = pp_macros_new(); add_predefined_macros(pp_macros); return do_preprocess(src, 0, pp_macros); |
