diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-03 14:02:54 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-15 10:06:21 +0900 |
| commit | 1364b1303e96221c52568aed0726adc83aee1dc4 (patch) | |
| tree | d4569d48d543e83593fdd4a8fcfc919a65eb47cb | |
| parent | e1de8fc36f11ac932707c7113eb4bf3ebc4b1f74 (diff) | |
| download | ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.gz ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.zst ducc-1364b1303e96221c52568aed0726adc83aee1dc4.zip | |
refactor: merge PpToken and Token
| -rw-r--r-- | main.c | 2 | ||||
| -rw-r--r-- | preprocess.c | 525 | ||||
| -rw-r--r-- | tokenize.c | 340 |
3 files changed, 381 insertions, 486 deletions
@@ -16,7 +16,7 @@ int main(int argc, char** argv) { fatal_error("usage: ducc <FILE>"); } InFile* source = read_all(argv[1]); - PpToken* pp_tokens = preprocess(source); + Token* pp_tokens = preprocess(source); Token* tokens = tokenize(pp_tokens); Program* prog = parse(tokens); codegen(prog); diff --git a/preprocess.c b/preprocess.c index 0978dd3..2caa37c 100644 --- a/preprocess.c +++ b/preprocess.c @@ -1,36 +1,196 @@ -enum PpTokenKind { - PpTokenKind_eof, - - PpTokenKind_header_name, - PpTokenKind_identifier, - PpTokenKind_pp_number, - PpTokenKind_character_constant, - PpTokenKind_string_literal, - PpTokenKind_punctuator, - PpTokenKind_other, - PpTokenKind_whitespace, +enum TokenKind { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_other, + TokenKind_character_constant, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_sub, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_keyword_break, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_continue, + TokenKind_keyword_do, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_for, + TokenKind_keyword_if, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_sizeof, + TokenKind_keyword_struct, + TokenKind_keyword_typeof, + TokenKind_keyword_void, + TokenKind_keyword_while, + TokenKind_le, + TokenKind_lt, + TokenKind_literal_int, + TokenKind_literal_str, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + + // va_start() is currently implemented as a special form due to the current limitation of #define macro. + TokenKind_va_start, }; -typedef enum PpTokenKind PpTokenKind; +typedef enum TokenKind TokenKind; -const char* pp_token_kind_stringify(PpTokenKind kind) { - if (kind == PpTokenKind_eof) +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) return "<eof>"; - else if (kind == PpTokenKind_header_name) - return "<header-name>"; - else if (kind == PpTokenKind_identifier) - return "<identifier>"; - else if (kind == PpTokenKind_pp_number) - return "<pp-number>"; - else if (kind == PpTokenKind_character_constant) - return "<character-constant>"; - else if (kind == PpTokenKind_string_literal) - return "<string-literal>"; - else if (kind == PpTokenKind_punctuator) - return "<punctuator>"; - else if (kind == PpTokenKind_other) - return "<other>"; - else if (kind == PpTokenKind_whitespace) + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) return "<whitespace>"; + else if (k == TokenKind_other) + return "<other>"; + else if (k == TokenKind_character_constant) + return "<character-constant>"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return "<identifier>"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_le) + return "le"; + else if (k == TokenKind_lt) + return "lt"; + else if (k == TokenKind_literal_int) + return "<integer>"; + else if (k == TokenKind_literal_str) + return "<string>"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_va_start) + return "va_start"; else unreachable(); } @@ -41,18 +201,24 @@ struct SourceLocation { }; typedef struct SourceLocation SourceLocation; -struct PpToken { - PpTokenKind kind; +struct Token { + TokenKind kind; String raw; SourceLocation loc; }; -typedef struct PpToken PpToken; +typedef struct Token Token; -const char* pp_token_stringify(PpToken* tok) { - const char* kind_str = pp_token_kind_stringify(tok->kind); - char* buf = calloc(tok->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%.*s (%s)", tok->raw.len, tok->raw.data, kind_str); - return buf; +const char* token_stringify(Token* t) { + TokenKind k = t->kind; + if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_whitespace || + k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(t->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, kind_str); + return buf; + } else { + return token_kind_stringify(k); + } } enum PpMacroKind { @@ -80,7 +246,7 @@ struct PpMacro { PpMacroKind kind; String name; size_t n_replacements; - PpToken* replacements; + Token* replacements; }; typedef struct PpMacro PpMacro; @@ -95,7 +261,7 @@ struct Preprocessor { int line; char* src; int pos; - PpToken* pp_tokens; + Token* pp_tokens; int n_pp_tokens; PpMacros* pp_macros; int include_depth; @@ -105,7 +271,7 @@ struct Preprocessor { }; typedef struct Preprocessor Preprocessor; -PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros); +Token* do_preprocess(InFile* src, int depth, PpMacros* pp_macros); PpMacros* pp_macros_new() { PpMacros* pp_macros = calloc(1, sizeof(PpMacros)); @@ -139,8 +305,8 @@ void add_predefined_macros(PpMacros* pp_macros) { m->name.len = strlen("__ducc__"); m->name.data = "__ducc__"; m->n_replacements = 1; - m->replacements = calloc(1, sizeof(PpToken)); - m->replacements[0].kind = PpTokenKind_pp_number; + m->replacements = calloc(1, sizeof(Token)); + m->replacements[0].kind = TokenKind_literal_int; m->replacements[0].raw.len = strlen("1"); m->replacements[0].raw.data = "1"; pp_macros->len += 1; @@ -158,9 +324,9 @@ void add_predefined_macros(PpMacros* pp_macros) { pp_macros->len += 1; } -int count_pp_tokens(PpToken* pp_tokens) { +int count_pp_tokens(Token* pp_tokens) { int n = 0; - while (pp_tokens[n].kind != PpTokenKind_eof) { + while (pp_tokens[n].kind != TokenKind_eof) { ++n; } return n; @@ -175,7 +341,7 @@ Preprocessor* preprocessor_new(InFile* src, int include_depth, PpMacros* pp_macr pp->filename = src->filename; pp->line = 1; pp->src = src->buf; - pp->pp_tokens = calloc(1024 * 1024, sizeof(PpToken)); + pp->pp_tokens = calloc(1024 * 1024, sizeof(Token)); pp->pp_macros = pp_macros; pp->include_depth = include_depth; pp->include_paths = calloc(16, sizeof(String)); @@ -209,104 +375,104 @@ void pp_tokenize_all(Preprocessor* pp) { int ch; int start; while (pp->src[pp->pos]) { - PpToken* tok = pp->pp_tokens + pp->n_pp_tokens; + Token* tok = pp->pp_tokens + pp->n_pp_tokens; tok->loc.filename = pp->filename; tok->loc.line = pp->line; char c = pp->src[pp->pos]; ++pp->pos; if (c == '(') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_paren_l; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ')') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_paren_r; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '{') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_brace_l; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '}') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_brace_r; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '[') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_bracket_l; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ']') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_bracket_r; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ',') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_comma; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == ';') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_semicolon; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '+') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_assign_add; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '+') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_plusplus; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_plus; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '|') { if (pp->src[pp->pos] == '|') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_oror; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_or; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '&') { if (pp->src[pp->pos] == '&') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_andand; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_and; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '-') { if (pp->src[pp->pos] == '>') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_arrow; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_assign_sub; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '-') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_minusminus; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_minus; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '*') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_star; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '/') { @@ -316,7 +482,7 @@ void pp_tokenize_all(Preprocessor* pp) { while (pp->src[pp->pos] && pp->src[pp->pos] != '\n' && pp->src[pp->pos] != '\r') { ++pp->pos; } - tok->kind = PpTokenKind_whitespace; + tok->kind = TokenKind_whitespace; tok->raw.len = pp->pos - start; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (pp->src[pp->pos] == '*') { @@ -332,16 +498,16 @@ void pp_tokenize_all(Preprocessor* pp) { } ++pp->pos; } - tok->kind = PpTokenKind_whitespace; + tok->kind = TokenKind_whitespace; tok->raw.len = pp->pos - start; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_slash; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '%') { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_percent; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else if (c == '.') { @@ -349,72 +515,71 @@ void pp_tokenize_all(Preprocessor* pp) { ++pp->pos; if (pp->src[pp->pos] == '.') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_ellipsis; tok->raw.len = 3; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - --pp->pos; - tok->kind = PpTokenKind_punctuator; - tok->raw.len = 1; + tok->kind = TokenKind_other; + tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_dot; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '!') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_ne; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_not; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '=') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_eq; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_assign; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '<') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_le; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_lt; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '>') { if (pp->src[pp->pos] == '=') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_ge; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_gt; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } } else if (c == '#') { if (pp->src[pp->pos] == '#') { ++pp->pos; - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_hashhash; tok->raw.len = 2; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_punctuator; + tok->kind = TokenKind_hash; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } @@ -424,7 +589,7 @@ void pp_tokenize_all(Preprocessor* pp) { ++pp->pos; } pp->pos += 2; - tok->kind = PpTokenKind_character_constant; + tok->kind = TokenKind_character_constant; tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; } else if (c == '"') { @@ -439,7 +604,7 @@ void pp_tokenize_all(Preprocessor* pp) { ++pp->pos; } ++pp->pos; - tok->kind = PpTokenKind_string_literal; + tok->kind = TokenKind_literal_str; tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; } else if (isdigit(c)) { @@ -448,7 +613,7 @@ void pp_tokenize_all(Preprocessor* pp) { while (isdigit(pp->src[pp->pos])) { ++pp->pos; } - tok->kind = PpTokenKind_pp_number; + tok->kind = TokenKind_literal_int; tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; } else if (isalpha(c) || c == '_') { @@ -459,16 +624,58 @@ void pp_tokenize_all(Preprocessor* pp) { } tok->raw.data = pp->src + start; tok->raw.len = pp->pos - start; - tok->kind = PpTokenKind_identifier; + if (string_equals_cstr(&tok->raw, "break")) { + tok->kind = TokenKind_keyword_break; + } else if (string_equals_cstr(&tok->raw, "char")) { + tok->kind = TokenKind_keyword_char; + } else if (string_equals_cstr(&tok->raw, "const")) { + tok->kind = TokenKind_keyword_const; + } else if (string_equals_cstr(&tok->raw, "continue")) { + tok->kind = TokenKind_keyword_continue; + } else if (string_equals_cstr(&tok->raw, "do")) { + tok->kind = TokenKind_keyword_do; + } else if (string_equals_cstr(&tok->raw, "else")) { + tok->kind = TokenKind_keyword_else; + } else if (string_equals_cstr(&tok->raw, "enum")) { + tok->kind = TokenKind_keyword_enum; + } else if (string_equals_cstr(&tok->raw, "extern")) { + tok->kind = TokenKind_keyword_extern; + } else if (string_equals_cstr(&tok->raw, "for")) { + tok->kind = TokenKind_keyword_for; + } else if (string_equals_cstr(&tok->raw, "if")) { + tok->kind = TokenKind_keyword_if; + } else if (string_equals_cstr(&tok->raw, "int")) { + tok->kind = TokenKind_keyword_int; + } else if (string_equals_cstr(&tok->raw, "long")) { + tok->kind = TokenKind_keyword_long; + } else if (string_equals_cstr(&tok->raw, "return")) { + tok->kind = TokenKind_keyword_return; + } else if (string_equals_cstr(&tok->raw, "short")) { + tok->kind = TokenKind_keyword_short; + } else if (string_equals_cstr(&tok->raw, "sizeof")) { + tok->kind = TokenKind_keyword_sizeof; + } else if (string_equals_cstr(&tok->raw, "struct")) { + tok->kind = TokenKind_keyword_struct; + } else if (string_equals_cstr(&tok->raw, "typedef")) { + tok->kind = TokenKind_keyword_typeof; + } else if (string_equals_cstr(&tok->raw, "void")) { + tok->kind = TokenKind_keyword_void; + } else if (string_equals_cstr(&tok->raw, "while")) { + tok->kind = TokenKind_keyword_while; + } else if (string_equals_cstr(&tok->raw, "va_start")) { + tok->kind = TokenKind_va_start; + } else { + tok->kind = TokenKind_ident; + } } else if (isspace(c)) { if (c == '\n' || c == '\r') { ++pp->line; } - tok->kind = PpTokenKind_whitespace; + tok->kind = TokenKind_whitespace; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } else { - tok->kind = PpTokenKind_other; + tok->kind = TokenKind_other; tok->raw.len = 1; tok->raw.data = pp->src + pp->pos - tok->raw.len; } @@ -476,8 +683,8 @@ void pp_tokenize_all(Preprocessor* pp) { } } -PpToken* skip_whitespace(PpToken* tok) { - while (tok->kind != PpTokenKind_eof && tok->kind == PpTokenKind_whitespace) { +Token* skip_whitespace(Token* tok) { + while (tok->kind != TokenKind_eof && tok->kind == TokenKind_whitespace) { ++tok; } return tok; @@ -493,9 +700,9 @@ int string_contains_newline(String* s) { return 0; } -PpToken* find_next_newline(PpToken* tok) { - while (tok->kind != PpTokenKind_eof) { - if (tok->kind == PpTokenKind_whitespace && string_contains_newline(&tok->raw)) { +Token* find_next_newline(Token* tok) { + while (tok->kind != TokenKind_eof) { + if (tok->kind == TokenKind_whitespace && string_contains_newline(&tok->raw)) { return tok; } ++tok; @@ -503,23 +710,23 @@ PpToken* find_next_newline(PpToken* tok) { return NULL; } -void make_token_whitespace(PpToken* tok) { - tok->kind = PpTokenKind_whitespace; +void make_token_whitespace(Token* tok) { + tok->kind = TokenKind_whitespace; tok->raw.len = 0; tok->raw.data = NULL; } -void remove_directive_tokens(PpToken* start, PpToken* end) { - PpToken* tok = start; +void remove_directive_tokens(Token* start, Token* end) { + Token* tok = start; while (tok != end) { make_token_whitespace(tok); ++tok; } } -PpToken* process_endif_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "endif")) { +Token* process_endif_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "endif")) { ++tok2; pp->skip_pp_tokens = 0; remove_directive_tokens(tok, tok2); @@ -528,9 +735,9 @@ PpToken* process_endif_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_else_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "else")) { +Token* process_else_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_keyword_else) { ++tok2; pp->skip_pp_tokens = 1 - pp->skip_pp_tokens; remove_directive_tokens(tok, tok2); @@ -539,13 +746,13 @@ PpToken* process_else_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_ifdef_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "ifdef")) { +Token* process_ifdef_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "ifdef")) { ++tok2; tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier) { - PpToken* name = tok2; + if (tok2->kind == TokenKind_ident) { + Token* name = tok2; ++tok2; pp->skip_pp_tokens = find_pp_macro(pp, &name->raw) == -1; } @@ -555,13 +762,13 @@ PpToken* process_ifdef_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_ifndef_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "ifndef")) { +Token* process_ifndef_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "ifndef")) { ++tok2; tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier) { - PpToken* name = tok2; + if (tok2->kind == TokenKind_ident) { + Token* name = tok2; ++tok2; pp->skip_pp_tokens = find_pp_macro(pp, &name->raw) != -1; } @@ -571,23 +778,23 @@ PpToken* process_ifndef_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* read_include_header_name(PpToken* tok2, String* include_name) { - if (tok2->kind == PpTokenKind_string_literal) { +Token* read_include_header_name(Token* tok2, String* include_name) { + if (tok2->kind == TokenKind_literal_str) { *include_name = tok2->raw; ++tok2; return tok2; - } else if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, "<")) { + } else if (tok2->kind == TokenKind_lt) { char* include_name_start = tok2->raw.data; ++tok2; int include_name_len = 0; - while (tok2->kind != PpTokenKind_eof) { - if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, ">")) { + while (tok2->kind != TokenKind_eof) { + if (tok2->kind == TokenKind_gt) { break; } include_name_len += tok2->raw.len; ++tok2; } - if (tok2->kind == PpTokenKind_eof) { + if (tok2->kind == TokenKind_eof) { fatal_error("invalid #include: <> not balanced"); } ++tok2; @@ -616,8 +823,8 @@ const char* resolve_include_name(Preprocessor* pp, String* include_name) { } } -PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_end, int n_source_tokens, - PpToken* source_tokens) { +Token* replace_pp_tokens(Preprocessor* pp, Token* dest_start, Token* dest_end, int n_source_tokens, + Token* source_tokens) { int n_tokens_to_remove = dest_end - dest_start; int n_tokens_after_dest = (pp->pp_tokens + pp->n_pp_tokens) - dest_end; int shift_amount; @@ -625,35 +832,35 @@ PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_ if (n_tokens_to_remove < n_source_tokens) { // Move existing tokens backward to make room. shift_amount = n_source_tokens - n_tokens_to_remove; - memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(PpToken)); + memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(Token)); pp->n_pp_tokens += shift_amount; } else if (n_source_tokens < n_tokens_to_remove) { // Move existing tokens forward to reduce room. shift_amount = n_tokens_to_remove - n_source_tokens; - memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(PpToken)); + memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(Token)); pp->n_pp_tokens -= shift_amount; - memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(PpToken)); + memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(Token)); } - memcpy(dest_start, source_tokens, n_source_tokens * sizeof(PpToken)); + memcpy(dest_start, source_tokens, n_source_tokens * sizeof(Token)); return dest_start + n_source_tokens; } -PpToken* expand_include_directive(Preprocessor* pp, PpToken* tok, PpToken* tok2, const char* include_name_buf) { +Token* expand_include_directive(Preprocessor* pp, Token* tok, Token* tok2, const char* include_name_buf) { InFile* include_source = read_all(include_name_buf); if (!include_source) { fatal_error("cannot open include file: %s", include_name_buf); } - PpToken* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_macros); + Token* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_macros); return replace_pp_tokens(pp, tok, tok2 + 1, count_pp_tokens(include_pp_tokens), include_pp_tokens); } -PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); +Token* process_include_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "include")) { + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "include")) { ++tok2; tok2 = skip_whitespace(tok2); String* include_name = calloc(1, sizeof(String)); @@ -667,20 +874,20 @@ PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { - PpToken* tok2 = skip_whitespace(tok + 1); - PpToken* tok3 = NULL; +Token* process_define_directive(Preprocessor* pp, Token* tok) { + Token* tok2 = skip_whitespace(tok + 1); + Token* tok3 = NULL; PpMacro* pp_macro; int i; - if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "define")) { + if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "define")) { ++tok2; tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier) { - PpToken* macro_name = tok2; + if (tok2->kind == TokenKind_ident) { + Token* macro_name = tok2; ++tok2; - if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, "(")) { + if (tok2->kind == TokenKind_paren_l) { ++tok2; - if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, ")")) { + if (tok2->kind == TokenKind_paren_r) { ++tok2; } else { fatal_error("%s:%d: invalid function-like macro syntax (#define %.*s)", macro_name->loc.filename, @@ -692,7 +899,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { pp_macro->kind = PpMacroKind_func; pp_macro->name = macro_name->raw; pp_macro->n_replacements = tok3 - tok2; - pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken)); + pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(Token)); for (i = 0; i < pp_macro->n_replacements; ++i) { pp_macro->replacements[i] = tok2[i]; } @@ -705,7 +912,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { pp_macro->kind = PpMacroKind_obj; pp_macro->name = macro_name->raw; pp_macro->n_replacements = tok3 - tok2; - pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken)); + pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(Token)); for (i = 0; i < pp_macro->n_replacements; ++i) { pp_macro->replacements[i] = tok2[i]; } @@ -721,7 +928,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { return NULL; } -int expand_macro(Preprocessor* pp, PpToken* tok) { +int expand_macro(Preprocessor* pp, Token* tok) { int pp_macro_idx = find_pp_macro(pp, &tok->raw); if (pp_macro_idx == -1) { return 0; @@ -744,15 +951,15 @@ int expand_macro(Preprocessor* pp, PpToken* tok) { tok[i].loc = original_loc; } } else if (pp_macro->kind == PpMacroKind_builtin_file) { - PpToken* file_tok = calloc(1, sizeof(PpToken)); - file_tok->kind = PpTokenKind_string_literal; + Token* file_tok = calloc(1, sizeof(Token)); + file_tok->kind = TokenKind_literal_str; file_tok->raw.len = strlen(tok->loc.filename) + 2; file_tok->raw.data = calloc(file_tok->raw.len, sizeof(char)); sprintf(file_tok->raw.data, "\"%s\"", tok->loc.filename); replace_pp_tokens(pp, tok, tok + 1, 1, file_tok); } else if (pp_macro->kind == PpMacroKind_builtin_line) { - PpToken* line_tok = calloc(1, sizeof(PpToken)); - line_tok->kind = PpTokenKind_pp_number; + Token* line_tok = calloc(1, sizeof(Token)); + line_tok->kind = TokenKind_literal_int; line_tok->raw.data = calloc(10, sizeof(char)); sprintf(line_tok->raw.data, "%d", tok->loc.line); line_tok->raw.len = strlen(line_tok->raw.data); @@ -764,11 +971,11 @@ int expand_macro(Preprocessor* pp, PpToken* tok) { } void process_pp_directives(Preprocessor* pp) { - PpToken* tok = pp->pp_tokens; + Token* tok = pp->pp_tokens; - while (tok->kind != PpTokenKind_eof) { - if (tok->kind == PpTokenKind_punctuator && string_equals_cstr(&tok->raw, "#")) { - PpToken* next_tok; + while (tok->kind != TokenKind_eof) { + if (tok->kind == TokenKind_hash) { + Token* next_tok; if ((next_tok = process_endif_directive(pp, tok)) != NULL) { tok = next_tok; @@ -797,7 +1004,7 @@ void process_pp_directives(Preprocessor* pp) { } } else if (skip_pp_tokens(pp)) { make_token_whitespace(tok); - } else if (tok->kind == PpTokenKind_identifier) { + } else if (tok->kind == TokenKind_ident) { int expanded = expand_macro(pp, tok); if (expanded) { // A macro may expand to another macro. Re-scan the expanded tokens. @@ -809,12 +1016,12 @@ void process_pp_directives(Preprocessor* pp) { } } -void pp_dump(PpToken* t, int include_whitespace) { - for (; t->kind != PpTokenKind_eof; ++t) { - if (t->kind == PpTokenKind_whitespace && !include_whitespace) { +void pp_dump(Token* t, int include_whitespace) { + for (; t->kind != TokenKind_eof; ++t) { + if (t->kind == TokenKind_whitespace && !include_whitespace) { continue; } - fprintf(stderr, "%s\n", pp_token_stringify(t)); + fprintf(stderr, "%s\n", token_stringify(t)); } } @@ -825,7 +1032,7 @@ char* get_ducc_include_path() { return buf; } -PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) { +Token* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) { Preprocessor* pp = preprocessor_new(src, depth, pp_macros); add_include_path(pp, get_ducc_include_path()); add_include_path(pp, "/usr/include/x86_64-linux-gnu"); @@ -835,7 +1042,7 @@ PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) { return pp->pp_tokens; } -PpToken* preprocess(InFile* src) { +Token* preprocess(InFile* src) { PpMacros* pp_macros = pp_macros_new(); add_predefined_macros(pp_macros); return do_preprocess(src, 0, pp_macros); @@ -1,210 +1,12 @@ -enum TokenKind { - TokenKind_eof, - - TokenKind_and, - TokenKind_andand, - TokenKind_arrow, - TokenKind_assign, - TokenKind_assign_add, - TokenKind_assign_sub, - TokenKind_brace_l, - TokenKind_brace_r, - TokenKind_bracket_l, - TokenKind_bracket_r, - TokenKind_comma, - TokenKind_dot, - TokenKind_ellipsis, - TokenKind_eq, - TokenKind_ge, - TokenKind_gt, - TokenKind_ident, - TokenKind_keyword_break, - TokenKind_keyword_char, - TokenKind_keyword_const, - TokenKind_keyword_continue, - TokenKind_keyword_do, - TokenKind_keyword_else, - TokenKind_keyword_enum, - TokenKind_keyword_extern, - TokenKind_keyword_for, - TokenKind_keyword_if, - TokenKind_keyword_int, - TokenKind_keyword_long, - TokenKind_keyword_return, - TokenKind_keyword_short, - TokenKind_keyword_sizeof, - TokenKind_keyword_struct, - TokenKind_keyword_typeof, - TokenKind_keyword_void, - TokenKind_keyword_while, - TokenKind_le, - TokenKind_lt, - TokenKind_literal_int, - TokenKind_literal_str, - TokenKind_minus, - TokenKind_minusminus, - TokenKind_ne, - TokenKind_not, - TokenKind_or, - TokenKind_oror, - TokenKind_paren_l, - TokenKind_paren_r, - TokenKind_percent, - TokenKind_plus, - TokenKind_plusplus, - TokenKind_semicolon, - TokenKind_slash, - TokenKind_star, - - // va_start() is currently implemented as a special form due to the current limitation of #define macro. - TokenKind_va_start, -}; -typedef enum TokenKind TokenKind; - -struct Token { - TokenKind kind; - String raw; -}; -typedef struct Token Token; - -const char* token_kind_stringify(TokenKind k) { - if (k == TokenKind_eof) - return "<eof>"; - else if (k == TokenKind_and) - return "&"; - else if (k == TokenKind_andand) - return "&&"; - else if (k == TokenKind_arrow) - return "->"; - else if (k == TokenKind_assign) - return "="; - else if (k == TokenKind_assign_add) - return "+="; - else if (k == TokenKind_assign_sub) - return "-="; - else if (k == TokenKind_brace_l) - return "{"; - else if (k == TokenKind_brace_r) - return "}"; - else if (k == TokenKind_bracket_l) - return "["; - else if (k == TokenKind_bracket_r) - return "]"; - else if (k == TokenKind_comma) - return ","; - else if (k == TokenKind_dot) - return "."; - else if (k == TokenKind_ellipsis) - return "..."; - else if (k == TokenKind_eq) - return "=="; - else if (k == TokenKind_ge) - return ">="; - else if (k == TokenKind_gt) - return ">"; - else if (k == TokenKind_ident) - return "<identifier>"; - else if (k == TokenKind_keyword_break) - return "break"; - else if (k == TokenKind_keyword_char) - return "char"; - else if (k == TokenKind_keyword_const) - return "const"; - else if (k == TokenKind_keyword_continue) - return "continue"; - else if (k == TokenKind_keyword_do) - return "do"; - else if (k == TokenKind_keyword_else) - return "else"; - else if (k == TokenKind_keyword_enum) - return "enum"; - else if (k == TokenKind_keyword_extern) - return "extern"; - else if (k == TokenKind_keyword_for) - return "for"; - else if (k == TokenKind_keyword_if) - return "if"; - else if (k == TokenKind_keyword_int) - return "int"; - else if (k == TokenKind_keyword_long) - return "long"; - else if (k == TokenKind_keyword_return) - return "return"; - else if (k == TokenKind_keyword_short) - return "short"; - else if (k == TokenKind_keyword_sizeof) - return "sizeof"; - else if (k == TokenKind_keyword_struct) - return "struct"; - else if (k == TokenKind_keyword_typeof) - return "typeof"; - else if (k == TokenKind_keyword_void) - return "void"; - else if (k == TokenKind_keyword_while) - return "while"; - else if (k == TokenKind_le) - return "le"; - else if (k == TokenKind_lt) - return "lt"; - else if (k == TokenKind_literal_int) - return "<integer>"; - else if (k == TokenKind_literal_str) - return "<string>"; - else if (k == TokenKind_minus) - return "-"; - else if (k == TokenKind_minusminus) - return "--"; - else if (k == TokenKind_ne) - return "!="; - else if (k == TokenKind_not) - return "!"; - else if (k == TokenKind_or) - return "|"; - else if (k == TokenKind_oror) - return "||"; - else if (k == TokenKind_paren_l) - return "("; - else if (k == TokenKind_paren_r) - return ")"; - else if (k == TokenKind_percent) - return "%"; - else if (k == TokenKind_plus) - return "+"; - else if (k == TokenKind_plusplus) - return "++"; - else if (k == TokenKind_semicolon) - return ";"; - else if (k == TokenKind_slash) - return "/"; - else if (k == TokenKind_star) - return "*"; - else if (k == TokenKind_va_start) - return "va_start"; - else - unreachable(); -} - -const char* token_stringify(Token* t) { - TokenKind k = t->kind; - if (k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) { - const char* kind_str = token_kind_stringify(k); - char* buf = calloc(t->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, kind_str); - return buf; - } else { - return token_kind_stringify(k); - } -} - struct Lexer { - PpToken* src; + Token* src; int pos; Token* tokens; int n_tokens; }; typedef struct Lexer Lexer; -Lexer* lexer_new(PpToken* pp_tokens) { +Lexer* lexer_new(Token* pp_tokens) { Lexer* l = calloc(1, sizeof(Lexer)); l->src = pp_tokens; l->tokens = calloc(1024 * 1024, sizeof(Token)); @@ -214,62 +16,12 @@ Lexer* lexer_new(PpToken* pp_tokens) { void tokenize_all(Lexer* l) { int ch; int start; - while (l->src[l->pos].kind != PpTokenKind_eof) { - PpToken* pp_tok = l->src + l->pos; + while (l->src[l->pos].kind != TokenKind_eof) { + Token* pp_tok = l->src + l->pos; Token* tok = l->tokens + l->n_tokens; - PpTokenKind k = pp_tok->kind; + TokenKind k = pp_tok->kind; ++l->pos; - if (k == PpTokenKind_header_name) { - unimplemented(); - } else if (k == PpTokenKind_identifier) { - if (string_equals_cstr(&pp_tok->raw, "break")) { - tok->kind = TokenKind_keyword_break; - } else if (string_equals_cstr(&pp_tok->raw, "char")) { - tok->kind = TokenKind_keyword_char; - } else if (string_equals_cstr(&pp_tok->raw, "const")) { - tok->kind = TokenKind_keyword_const; - } else if (string_equals_cstr(&pp_tok->raw, "continue")) { - tok->kind = TokenKind_keyword_continue; - } else if (string_equals_cstr(&pp_tok->raw, "do")) { - tok->kind = TokenKind_keyword_do; - } else if (string_equals_cstr(&pp_tok->raw, "else")) { - tok->kind = TokenKind_keyword_else; - } else if (string_equals_cstr(&pp_tok->raw, "enum")) { - tok->kind = TokenKind_keyword_enum; - } else if (string_equals_cstr(&pp_tok->raw, "extern")) { - tok->kind = TokenKind_keyword_extern; - } else if (string_equals_cstr(&pp_tok->raw, "for")) { - tok->kind = TokenKind_keyword_for; - } else if (string_equals_cstr(&pp_tok->raw, "if")) { - tok->kind = TokenKind_keyword_if; - } else if (string_equals_cstr(&pp_tok->raw, "int")) { - tok->kind = TokenKind_keyword_int; - } else if (string_equals_cstr(&pp_tok->raw, "long")) { - tok->kind = TokenKind_keyword_long; - } else if (string_equals_cstr(&pp_tok->raw, "return")) { - tok->kind = TokenKind_keyword_return; - } else if (string_equals_cstr(&pp_tok->raw, "short")) { - tok->kind = TokenKind_keyword_short; - } else if (string_equals_cstr(&pp_tok->raw, "sizeof")) { - tok->kind = TokenKind_keyword_sizeof; - } else if (string_equals_cstr(&pp_tok->raw, "struct")) { - tok->kind = TokenKind_keyword_struct; - } else if (string_equals_cstr(&pp_tok->raw, "typedef")) { - tok->kind = TokenKind_keyword_typeof; - } else if (string_equals_cstr(&pp_tok->raw, "void")) { - tok->kind = TokenKind_keyword_void; - } else if (string_equals_cstr(&pp_tok->raw, "while")) { - tok->kind = TokenKind_keyword_while; - } else if (string_equals_cstr(&pp_tok->raw, "va_start")) { - tok->kind = TokenKind_va_start; - } else { - tok->kind = TokenKind_ident; - } - tok->raw = pp_tok->raw; - } else if (k == PpTokenKind_pp_number) { - tok->kind = TokenKind_literal_int; - tok->raw = pp_tok->raw; - } else if (k == PpTokenKind_character_constant) { + if (k == TokenKind_character_constant) { tok->kind = TokenKind_literal_int; ch = pp_tok->raw.data[1]; if (ch == '\\') { @@ -296,87 +48,23 @@ void tokenize_all(Lexer* l) { sprintf(buf, "%d", ch); tok->raw.data = buf; tok->raw.len = strlen(buf); - } else if (k == PpTokenKind_string_literal) { + } else if (k == TokenKind_literal_str) { tok->kind = TokenKind_literal_str; tok->raw.data = pp_tok->raw.data + 1; tok->raw.len = pp_tok->raw.len - 2; - } else if (k == PpTokenKind_punctuator || k == PpTokenKind_other) { - if (string_equals_cstr(&pp_tok->raw, "(")) { - tok->kind = TokenKind_paren_l; - } else if (string_equals_cstr(&pp_tok->raw, ")")) { - tok->kind = TokenKind_paren_r; - } else if (string_equals_cstr(&pp_tok->raw, "{")) { - tok->kind = TokenKind_brace_l; - } else if (string_equals_cstr(&pp_tok->raw, "}")) { - tok->kind = TokenKind_brace_r; - } else if (string_equals_cstr(&pp_tok->raw, "[")) { - tok->kind = TokenKind_bracket_l; - } else if (string_equals_cstr(&pp_tok->raw, "]")) { - tok->kind = TokenKind_bracket_r; - } else if (string_equals_cstr(&pp_tok->raw, ",")) { - tok->kind = TokenKind_comma; - } else if (string_equals_cstr(&pp_tok->raw, ";")) { - tok->kind = TokenKind_semicolon; - } else if (string_equals_cstr(&pp_tok->raw, "+=")) { - tok->kind = TokenKind_assign_add; - } else if (string_equals_cstr(&pp_tok->raw, "++")) { - tok->kind = TokenKind_plusplus; - } else if (string_equals_cstr(&pp_tok->raw, "+")) { - tok->kind = TokenKind_plus; - } else if (string_equals_cstr(&pp_tok->raw, "||")) { - tok->kind = TokenKind_oror; - } else if (string_equals_cstr(&pp_tok->raw, "|")) { - tok->kind = TokenKind_or; - } else if (string_equals_cstr(&pp_tok->raw, "&&")) { - tok->kind = TokenKind_andand; - } else if (string_equals_cstr(&pp_tok->raw, "&")) { - tok->kind = TokenKind_and; - } else if (string_equals_cstr(&pp_tok->raw, "->")) { - tok->kind = TokenKind_arrow; - } else if (string_equals_cstr(&pp_tok->raw, "-=")) { - tok->kind = TokenKind_assign_sub; - } else if (string_equals_cstr(&pp_tok->raw, "--")) { - tok->kind = TokenKind_minusminus; - } else if (string_equals_cstr(&pp_tok->raw, "-")) { - tok->kind = TokenKind_minus; - } else if (string_equals_cstr(&pp_tok->raw, "*")) { - tok->kind = TokenKind_star; - } else if (string_equals_cstr(&pp_tok->raw, "/")) { - tok->kind = TokenKind_slash; - } else if (string_equals_cstr(&pp_tok->raw, "%")) { - tok->kind = TokenKind_percent; - } else if (string_equals_cstr(&pp_tok->raw, "...")) { - tok->kind = TokenKind_ellipsis; - } else if (string_equals_cstr(&pp_tok->raw, ".")) { - tok->kind = TokenKind_dot; - } else if (string_equals_cstr(&pp_tok->raw, "!=")) { - tok->kind = TokenKind_ne; - } else if (string_equals_cstr(&pp_tok->raw, "!")) { - tok->kind = TokenKind_not; - } else if (string_equals_cstr(&pp_tok->raw, "==")) { - tok->kind = TokenKind_eq; - } else if (string_equals_cstr(&pp_tok->raw, "=")) { - tok->kind = TokenKind_assign; - } else if (string_equals_cstr(&pp_tok->raw, "<=")) { - tok->kind = TokenKind_le; - } else if (string_equals_cstr(&pp_tok->raw, "<")) { - tok->kind = TokenKind_lt; - } else if (string_equals_cstr(&pp_tok->raw, ">=")) { - tok->kind = TokenKind_ge; - } else if (string_equals_cstr(&pp_tok->raw, ">")) { - tok->kind = TokenKind_gt; - } else { - fatal_error("unknown token: %.*s", pp_tok->raw.len, pp_tok->raw.data); - } - tok->raw = pp_tok->raw; - } else if (k == PpTokenKind_whitespace) { + } else if (k == TokenKind_other) { + unreachable(); + } else if (k == TokenKind_whitespace) { continue; + } else { + tok->kind = pp_tok->kind; + tok->raw = pp_tok->raw; } ++l->n_tokens; } } -Token* tokenize(PpToken* pp_tokens) { +Token* tokenize(Token* pp_tokens) { Lexer* l = lexer_new(pp_tokens); tokenize_all(l); return l->tokens; |
