refactor: merge PpToken and Token

author: nsfisis <nsfisis@gmail.com> 2025-08-03 14:02:54 +0900
committer: nsfisis <nsfisis@gmail.com> 2025-08-15 10:06:21 +0900
commit: 1364b1303e96221c52568aed0726adc83aee1dc4 (patch)
tree: d4569d48d543e83593fdd4a8fcfc919a65eb47cb
parent: e1de8fc36f11ac932707c7113eb4bf3ebc4b1f74 (diff)
download: ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.gz
ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.zst
ducc-1364b1303e96221c52568aed0726adc83aee1dc4.zip
3 files changed, 380 insertions, 485 deletions
diff --git a/main.c b/main.c
index 4b2143d..733f6c1 100644
--- a/main.c
+++ b/main.c
@@ -16,7 +16,7 @@ int main(int argc, char** argv) {
         fatal_error("usage: ducc <FILE>");
     }
     InFile* source = read_all(argv[1]);
-    PpToken* pp_tokens = preprocess(source);
+    Token* pp_tokens = preprocess(source);
     Token* tokens = tokenize(pp_tokens);
     Program* prog = parse(tokens);
     codegen(prog);
diff --git a/preprocess.c b/preprocess.c
index 0978dd3..2caa37c 100644
--- a/preprocess.c
+++ b/preprocess.c
@@ -1,36 +1,196 @@
-enum PpTokenKind {
-    PpTokenKind_eof,
+enum TokenKind {
+    TokenKind_eof,
 
-    PpTokenKind_header_name,
-    PpTokenKind_identifier,
-    PpTokenKind_pp_number,
-    PpTokenKind_character_constant,
-    PpTokenKind_string_literal,
-    PpTokenKind_punctuator,
-    PpTokenKind_other,
-    PpTokenKind_whitespace,
+    // Only preprocessing phase.
+    TokenKind_hash,
+    TokenKind_hashhash,
+    TokenKind_whitespace,
+    TokenKind_other,
+    TokenKind_character_constant,
+
+    TokenKind_and,
+    TokenKind_andand,
+    TokenKind_arrow,
+    TokenKind_assign,
+    TokenKind_assign_add,
+    TokenKind_assign_sub,
+    TokenKind_brace_l,
+    TokenKind_brace_r,
+    TokenKind_bracket_l,
+    TokenKind_bracket_r,
+    TokenKind_comma,
+    TokenKind_dot,
+    TokenKind_ellipsis,
+    TokenKind_eq,
+    TokenKind_ge,
+    TokenKind_gt,
+    TokenKind_ident,
+    TokenKind_keyword_break,
+    TokenKind_keyword_char,
+    TokenKind_keyword_const,
+    TokenKind_keyword_continue,
+    TokenKind_keyword_do,
+    TokenKind_keyword_else,
+    TokenKind_keyword_enum,
+    TokenKind_keyword_extern,
+    TokenKind_keyword_for,
+    TokenKind_keyword_if,
+    TokenKind_keyword_int,
+    TokenKind_keyword_long,
+    TokenKind_keyword_return,
+    TokenKind_keyword_short,
+    TokenKind_keyword_sizeof,
+    TokenKind_keyword_struct,
+    TokenKind_keyword_typeof,
+    TokenKind_keyword_void,
+    TokenKind_keyword_while,
+    TokenKind_le,
+    TokenKind_lt,
+    TokenKind_literal_int,
+    TokenKind_literal_str,
+    TokenKind_minus,
+    TokenKind_minusminus,
+    TokenKind_ne,
+    TokenKind_not,
+    TokenKind_or,
+    TokenKind_oror,
+    TokenKind_paren_l,
+    TokenKind_paren_r,
+    TokenKind_percent,
+    TokenKind_plus,
+    TokenKind_plusplus,
+    TokenKind_semicolon,
+    TokenKind_slash,
+    TokenKind_star,
+
+    // va_start() is currently implemented as a special form due to the current limitation of #define macro.
+    TokenKind_va_start,
 };
-typedef enum PpTokenKind PpTokenKind;
+typedef enum TokenKind TokenKind;
 
-const char* pp_token_kind_stringify(PpTokenKind kind) {
-    if (kind == PpTokenKind_eof)
+const char* token_kind_stringify(TokenKind k) {
+    if (k == TokenKind_eof)
         return "<eof>";
-    else if (kind == PpTokenKind_header_name)
-        return "<header-name>";
-    else if (kind == PpTokenKind_identifier)
-        return "<identifier>";
-    else if (kind == PpTokenKind_pp_number)
-        return "<pp-number>";
-    else if (kind == PpTokenKind_character_constant)
-        return "<character-constant>";
-    else if (kind == PpTokenKind_string_literal)
-        return "<string-literal>";
-    else if (kind == PpTokenKind_punctuator)
-        return "<punctuator>";
-    else if (kind == PpTokenKind_other)
-        return "<other>";
-    else if (kind == PpTokenKind_whitespace)
+    else if (k == TokenKind_hash)
+        return "#";
+    else if (k == TokenKind_hashhash)
+        return "##";
+    else if (k == TokenKind_whitespace)
         return "<whitespace>";
+    else if (k == TokenKind_other)
+        return "<other>";
+    else if (k == TokenKind_character_constant)
+        return "<character-constant>";
+    else if (k == TokenKind_and)
+        return "&";
+    else if (k == TokenKind_andand)
+        return "&&";
+    else if (k == TokenKind_arrow)
+        return "->";
+    else if (k == TokenKind_assign)
+        return "=";
+    else if (k == TokenKind_assign_add)
+        return "+=";
+    else if (k == TokenKind_assign_sub)
+        return "-=";
+    else if (k == TokenKind_brace_l)
+        return "{";
+    else if (k == TokenKind_brace_r)
+        return "}";
+    else if (k == TokenKind_bracket_l)
+        return "[";
+    else if (k == TokenKind_bracket_r)
+        return "]";
+    else if (k == TokenKind_comma)
+        return ",";
+    else if (k == TokenKind_dot)
+        return ".";
+    else if (k == TokenKind_ellipsis)
+        return "...";
+    else if (k == TokenKind_eq)
+        return "==";
+    else if (k == TokenKind_ge)
+        return ">=";
+    else if (k == TokenKind_gt)
+        return ">";
+    else if (k == TokenKind_ident)
+        return "<identifier>";
+    else if (k == TokenKind_keyword_break)
+        return "break";
+    else if (k == TokenKind_keyword_char)
+        return "char";
+    else if (k == TokenKind_keyword_const)
+        return "const";
+    else if (k == TokenKind_keyword_continue)
+        return "continue";
+    else if (k == TokenKind_keyword_do)
+        return "do";
+    else if (k == TokenKind_keyword_else)
+        return "else";
+    else if (k == TokenKind_keyword_enum)
+        return "enum";
+    else if (k == TokenKind_keyword_extern)
+        return "extern";
+    else if (k == TokenKind_keyword_for)
+        return "for";
+    else if (k == TokenKind_keyword_if)
+        return "if";
+    else if (k == TokenKind_keyword_int)
+        return "int";
+    else if (k == TokenKind_keyword_long)
+        return "long";
+    else if (k == TokenKind_keyword_return)
+        return "return";
+    else if (k == TokenKind_keyword_short)
+        return "short";
+    else if (k == TokenKind_keyword_sizeof)
+        return "sizeof";
+    else if (k == TokenKind_keyword_struct)
+        return "struct";
+    else if (k == TokenKind_keyword_typeof)
+        return "typeof";
+    else if (k == TokenKind_keyword_void)
+        return "void";
+    else if (k == TokenKind_keyword_while)
+        return "while";
+    else if (k == TokenKind_le)
+        return "le";
+    else if (k == TokenKind_lt)
+        return "lt";
+    else if (k == TokenKind_literal_int)
+        return "<integer>";
+    else if (k == TokenKind_literal_str)
+        return "<string>";
+    else if (k == TokenKind_minus)
+        return "-";
+    else if (k == TokenKind_minusminus)
+        return "--";
+    else if (k == TokenKind_ne)
+        return "!=";
+    else if (k == TokenKind_not)
+        return "!";
+    else if (k == TokenKind_or)
+        return "|";
+    else if (k == TokenKind_oror)
+        return "||";
+    else if (k == TokenKind_paren_l)
+        return "(";
+    else if (k == TokenKind_paren_r)
+        return ")";
+    else if (k == TokenKind_percent)
+        return "%";
+    else if (k == TokenKind_plus)
+        return "+";
+    else if (k == TokenKind_plusplus)
+        return "++";
+    else if (k == TokenKind_semicolon)
+        return ";";
+    else if (k == TokenKind_slash)
+        return "/";
+    else if (k == TokenKind_star)
+        return "*";
+    else if (k == TokenKind_va_start)
+        return "va_start";
     else
         unreachable();
 }
@@ -41,18 +201,24 @@ struct SourceLocation {
 };
 typedef struct SourceLocation SourceLocation;
 
-struct PpToken {
-    PpTokenKind kind;
+struct Token {
+    TokenKind kind;
     String raw;
     SourceLocation loc;
 };
-typedef struct PpToken PpToken;
+typedef struct Token Token;
 
-const char* pp_token_stringify(PpToken* tok) {
-    const char* kind_str = pp_token_kind_stringify(tok->kind);
-    char* buf = calloc(tok->raw.len + strlen(kind_str) + 3 + 1, sizeof(char));
-    sprintf(buf, "%.*s (%s)", tok->raw.len, tok->raw.data, kind_str);
-    return buf;
+const char* token_stringify(Token* t) {
+    TokenKind k = t->kind;
+    if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_whitespace ||
+        k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) {
+        const char* kind_str = token_kind_stringify(k);
+        char* buf = calloc(t->raw.len + strlen(kind_str) + 3 + 1, sizeof(char));
+        sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, kind_str);
+        return buf;
+    } else {
+        return token_kind_stringify(k);
+    }
 }
 
 enum PpMacroKind {
@@ -80,7 +246,7 @@ struct PpMacro {
     PpMacroKind kind;
     String name;
     size_t n_replacements;
-    PpToken* replacements;
+    Token* replacements;
 };
 typedef struct PpMacro PpMacro;
 
@@ -95,7 +261,7 @@ struct Preprocessor {
     int line;
     char* src;
     int pos;
-    PpToken* pp_tokens;
+    Token* pp_tokens;
     int n_pp_tokens;
     PpMacros* pp_macros;
     int include_depth;
@@ -105,7 +271,7 @@ struct Preprocessor {
 };
 typedef struct Preprocessor Preprocessor;
 
-PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros);
+Token* do_preprocess(InFile* src, int depth, PpMacros* pp_macros);
 
 PpMacros* pp_macros_new() {
     PpMacros* pp_macros = calloc(1, sizeof(PpMacros));
@@ -139,8 +305,8 @@ void add_predefined_macros(PpMacros* pp_macros) {
     m->name.len = strlen("__ducc__");
     m->name.data = "__ducc__";
     m->n_replacements = 1;
-    m->replacements = calloc(1, sizeof(PpToken));
-    m->replacements[0].kind = PpTokenKind_pp_number;
+    m->replacements = calloc(1, sizeof(Token));
+    m->replacements[0].kind = TokenKind_literal_int;
     m->replacements[0].raw.len = strlen("1");
     m->replacements[0].raw.data = "1";
     pp_macros->len += 1;
@@ -158,9 +324,9 @@ void add_predefined_macros(PpMacros* pp_macros) {
     pp_macros->len += 1;
 }
 
-int count_pp_tokens(PpToken* pp_tokens) {
+int count_pp_tokens(Token* pp_tokens) {
     int n = 0;
-    while (pp_tokens[n].kind != PpTokenKind_eof) {
+    while (pp_tokens[n].kind != TokenKind_eof) {
         ++n;
     }
     return n;
@@ -175,7 +341,7 @@ Preprocessor* preprocessor_new(InFile* src, int include_depth, PpMacros* pp_macr
     pp->filename = src->filename;
     pp->line = 1;
     pp->src = src->buf;
-    pp->pp_tokens = calloc(1024 * 1024, sizeof(PpToken));
+    pp->pp_tokens = calloc(1024 * 1024, sizeof(Token));
     pp->pp_macros = pp_macros;
     pp->include_depth = include_depth;
     pp->include_paths = calloc(16, sizeof(String));
@@ -209,104 +375,104 @@ void pp_tokenize_all(Preprocessor* pp) {
     int ch;
     int start;
     while (pp->src[pp->pos]) {
-        PpToken* tok = pp->pp_tokens + pp->n_pp_tokens;
+        Token* tok = pp->pp_tokens + pp->n_pp_tokens;
         tok->loc.filename = pp->filename;
         tok->loc.line = pp->line;
         char c = pp->src[pp->pos];
         ++pp->pos;
         if (c == '(') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_paren_l;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == ')') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_paren_r;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == '{') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_brace_l;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == '}') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_brace_r;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == '[') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_bracket_l;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == ']') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_bracket_r;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == ',') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_comma;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == ';') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_semicolon;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == '+') {
             if (pp->src[pp->pos] == '=') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_assign_add;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else if (pp->src[pp->pos] == '+') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_plusplus;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_plus;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '|') {
             if (pp->src[pp->pos] == '|') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_oror;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_or;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '&') {
             if (pp->src[pp->pos] == '&') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_andand;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_and;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '-') {
             if (pp->src[pp->pos] == '>') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_arrow;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else if (pp->src[pp->pos] == '=') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_assign_sub;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else if (pp->src[pp->pos] == '-') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_minusminus;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_minus;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '*') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_star;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == '/') {
@@ -316,7 +482,7 @@ void pp_tokenize_all(Preprocessor* pp) {
                 while (pp->src[pp->pos] && pp->src[pp->pos] != '\n' && pp->src[pp->pos] != '\r') {
                     ++pp->pos;
                 }
-                tok->kind = PpTokenKind_whitespace;
+                tok->kind = TokenKind_whitespace;
                 tok->raw.len = pp->pos - start;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else if (pp->src[pp->pos] == '*') {
@@ -332,16 +498,16 @@ void pp_tokenize_all(Preprocessor* pp) {
                     }
                     ++pp->pos;
                 }
-                tok->kind = PpTokenKind_whitespace;
+                tok->kind = TokenKind_whitespace;
                 tok->raw.len = pp->pos - start;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_slash;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '%') {
-            tok->kind = PpTokenKind_punctuator;
+            tok->kind = TokenKind_percent;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else if (c == '.') {
@@ -349,72 +515,71 @@ void pp_tokenize_all(Preprocessor* pp) {
                 ++pp->pos;
                 if (pp->src[pp->pos] == '.') {
                     ++pp->pos;
-                    tok->kind = PpTokenKind_punctuator;
+                    tok->kind = TokenKind_ellipsis;
                     tok->raw.len = 3;
                     tok->raw.data = pp->src + pp->pos - tok->raw.len;
                 } else {
-                    --pp->pos;
-                    tok->kind = PpTokenKind_punctuator;
-                    tok->raw.len = 1;
+                    tok->kind = TokenKind_other;
+                    tok->raw.len = 2;
                     tok->raw.data = pp->src + pp->pos - tok->raw.len;
                 }
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_dot;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '!') {
             if (pp->src[pp->pos] == '=') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_ne;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_not;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '=') {
             if (pp->src[pp->pos] == '=') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_eq;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_assign;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '<') {
             if (pp->src[pp->pos] == '=') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_le;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_lt;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '>') {
             if (pp->src[pp->pos] == '=') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_ge;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_gt;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
         } else if (c == '#') {
             if (pp->src[pp->pos] == '#') {
                 ++pp->pos;
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_hashhash;
                 tok->raw.len = 2;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             } else {
-                tok->kind = PpTokenKind_punctuator;
+                tok->kind = TokenKind_hash;
                 tok->raw.len = 1;
                 tok->raw.data = pp->src + pp->pos - tok->raw.len;
             }
@@ -424,7 +589,7 @@ void pp_tokenize_all(Preprocessor* pp) {
                 ++pp->pos;
             }
             pp->pos += 2;
-            tok->kind = PpTokenKind_character_constant;
+            tok->kind = TokenKind_character_constant;
             tok->raw.data = pp->src + start;
             tok->raw.len = pp->pos - start;
         } else if (c == '"') {
@@ -439,7 +604,7 @@ void pp_tokenize_all(Preprocessor* pp) {
                 ++pp->pos;
             }
             ++pp->pos;
-            tok->kind = PpTokenKind_string_literal;
+            tok->kind = TokenKind_literal_str;
             tok->raw.data = pp->src + start;
             tok->raw.len = pp->pos - start;
         } else if (isdigit(c)) {
@@ -448,7 +613,7 @@ void pp_tokenize_all(Preprocessor* pp) {
             while (isdigit(pp->src[pp->pos])) {
                 ++pp->pos;
             }
-            tok->kind = PpTokenKind_pp_number;
+            tok->kind = TokenKind_literal_int;
             tok->raw.data = pp->src + start;
             tok->raw.len = pp->pos - start;
         } else if (isalpha(c) || c == '_') {
@@ -459,16 +624,58 @@ void pp_tokenize_all(Preprocessor* pp) {
             }
             tok->raw.data = pp->src + start;
             tok->raw.len = pp->pos - start;
-            tok->kind = PpTokenKind_identifier;
+            if (string_equals_cstr(&tok->raw, "break")) {
+                tok->kind = TokenKind_keyword_break;
+            } else if (string_equals_cstr(&tok->raw, "char")) {
+                tok->kind = TokenKind_keyword_char;
+            } else if (string_equals_cstr(&tok->raw, "const")) {
+                tok->kind = TokenKind_keyword_const;
+            } else if (string_equals_cstr(&tok->raw, "continue")) {
+                tok->kind = TokenKind_keyword_continue;
+            } else if (string_equals_cstr(&tok->raw, "do")) {
+                tok->kind = TokenKind_keyword_do;
+            } else if (string_equals_cstr(&tok->raw, "else")) {
+                tok->kind = TokenKind_keyword_else;
+            } else if (string_equals_cstr(&tok->raw, "enum")) {
+                tok->kind = TokenKind_keyword_enum;
+            } else if (string_equals_cstr(&tok->raw, "extern")) {
+                tok->kind = TokenKind_keyword_extern;
+            } else if (string_equals_cstr(&tok->raw, "for")) {
+                tok->kind = TokenKind_keyword_for;
+            } else if (string_equals_cstr(&tok->raw, "if")) {
+                tok->kind = TokenKind_keyword_if;
+            } else if (string_equals_cstr(&tok->raw, "int")) {
+                tok->kind = TokenKind_keyword_int;
+            } else if (string_equals_cstr(&tok->raw, "long")) {
+                tok->kind = TokenKind_keyword_long;
+            } else if (string_equals_cstr(&tok->raw, "return")) {
+                tok->kind = TokenKind_keyword_return;
+            } else if (string_equals_cstr(&tok->raw, "short")) {
+                tok->kind = TokenKind_keyword_short;
+            } else if (string_equals_cstr(&tok->raw, "sizeof")) {
+                tok->kind = TokenKind_keyword_sizeof;
+            } else if (string_equals_cstr(&tok->raw, "struct")) {
+                tok->kind = TokenKind_keyword_struct;
+            } else if (string_equals_cstr(&tok->raw, "typedef")) {
+                tok->kind = TokenKind_keyword_typeof;
+            } else if (string_equals_cstr(&tok->raw, "void")) {
+                tok->kind = TokenKind_keyword_void;
+            } else if (string_equals_cstr(&tok->raw, "while")) {
+                tok->kind = TokenKind_keyword_while;
+            } else if (string_equals_cstr(&tok->raw, "va_start")) {
+                tok->kind = TokenKind_va_start;
+            } else {
+                tok->kind = TokenKind_ident;
+            }
         } else if (isspace(c)) {
             if (c == '\n' || c == '\r') {
                 ++pp->line;
             }
-            tok->kind = PpTokenKind_whitespace;
+            tok->kind = TokenKind_whitespace;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         } else {
-            tok->kind = PpTokenKind_other;
+            tok->kind = TokenKind_other;
             tok->raw.len = 1;
             tok->raw.data = pp->src + pp->pos - tok->raw.len;
         }
@@ -476,8 +683,8 @@ void pp_tokenize_all(Preprocessor* pp) {
     }
 }
 
-PpToken* skip_whitespace(PpToken* tok) {
-    while (tok->kind != PpTokenKind_eof && tok->kind == PpTokenKind_whitespace) {
+Token* skip_whitespace(Token* tok) {
+    while (tok->kind != TokenKind_eof && tok->kind == TokenKind_whitespace) {
         ++tok;
     }
     return tok;
@@ -493,9 +700,9 @@ int string_contains_newline(String* s) {
     return 0;
 }
 
-PpToken* find_next_newline(PpToken* tok) {
-    while (tok->kind != PpTokenKind_eof) {
-        if (tok->kind == PpTokenKind_whitespace && string_contains_newline(&tok->raw)) {
+Token* find_next_newline(Token* tok) {
+    while (tok->kind != TokenKind_eof) {
+        if (tok->kind == TokenKind_whitespace && string_contains_newline(&tok->raw)) {
             return tok;
         }
         ++tok;
@@ -503,23 +710,23 @@ PpToken* find_next_newline(PpToken* tok) {
     return NULL;
 }
 
-void make_token_whitespace(PpToken* tok) {
-    tok->kind = PpTokenKind_whitespace;
+void make_token_whitespace(Token* tok) {
+    tok->kind = TokenKind_whitespace;
     tok->raw.len = 0;
     tok->raw.data = NULL;
 }
 
-void remove_directive_tokens(PpToken* start, PpToken* end) {
-    PpToken* tok = start;
+void remove_directive_tokens(Token* start, Token* end) {
+    Token* tok = start;
     while (tok != end) {
         make_token_whitespace(tok);
         ++tok;
     }
 }
 
-PpToken* process_endif_directive(Preprocessor* pp, PpToken* tok) {
-    PpToken* tok2 = skip_whitespace(tok + 1);
-    if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "endif")) {
+Token* process_endif_directive(Preprocessor* pp, Token* tok) {
+    Token* tok2 = skip_whitespace(tok + 1);
+    if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "endif")) {
         ++tok2;
         pp->skip_pp_tokens = 0;
         remove_directive_tokens(tok, tok2);
@@ -528,9 +735,9 @@ PpToken* process_endif_directive(Preprocessor* pp, PpToken* tok) {
     return NULL;
 }
 
-PpToken* process_else_directive(Preprocessor* pp, PpToken* tok) {
-    PpToken* tok2 = skip_whitespace(tok + 1);
-    if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "else")) {
+Token* process_else_directive(Preprocessor* pp, Token* tok) {
+    Token* tok2 = skip_whitespace(tok + 1);
+    if (tok2->kind == TokenKind_keyword_else) {
         ++tok2;
         pp->skip_pp_tokens = 1 - pp->skip_pp_tokens;
         remove_directive_tokens(tok, tok2);
@@ -539,13 +746,13 @@ PpToken* process_else_directive(Preprocessor* pp, PpToken* tok) {
     return NULL;
 }
 
-PpToken* process_ifdef_directive(Preprocessor* pp, PpToken* tok) {
-    PpToken* tok2 = skip_whitespace(tok + 1);
-    if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "ifdef")) {
+Token* process_ifdef_directive(Preprocessor* pp, Token* tok) {
+    Token* tok2 = skip_whitespace(tok + 1);
+    if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "ifdef")) {
         ++tok2;
         tok2 = skip_whitespace(tok2);
-        if (tok2->kind == PpTokenKind_identifier) {
-            PpToken* name = tok2;
+        if (tok2->kind == TokenKind_ident) {
+            Token* name = tok2;
             ++tok2;
             pp->skip_pp_tokens = find_pp_macro(pp, &name->raw) == -1;
         }
@@ -555,13 +762,13 @@ PpToken* process_ifdef_directive(Preprocessor* pp, PpToken* tok) {
     return NULL;
 }
 
-PpToken* process_ifndef_directive(Preprocessor* pp, PpToken* tok) {
-    PpToken* tok2 = skip_whitespace(tok + 1);
-    if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "ifndef")) {
+Token* process_ifndef_directive(Preprocessor* pp, Token* tok) {
+    Token* tok2 = skip_whitespace(tok + 1);
+    if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "ifndef")) {
         ++tok2;
         tok2 = skip_whitespace(tok2);
-        if (tok2->kind == PpTokenKind_identifier) {
-            PpToken* name = tok2;
+        if (tok2->kind == TokenKind_ident) {
+            Token* name = tok2;
             ++tok2;
             pp->skip_pp_tokens = find_pp_macro(pp, &name->raw) != -1;
         }
@@ -571,23 +778,23 @@ PpToken* process_ifndef_directive(Preprocessor* pp, PpToken* tok) {
     return NULL;
 }
 
-PpToken* read_include_header_name(PpToken* tok2, String* include_name) {
-    if (tok2->kind == PpTokenKind_string_literal) {
+Token* read_include_header_name(Token* tok2, String* include_name) {
+    if (tok2->kind == TokenKind_literal_str) {
         *include_name = tok2->raw;
         ++tok2;
         return tok2;
-    } else if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, "<")) {
+    } else if (tok2->kind == TokenKind_lt) {
         char* include_name_start = tok2->raw.data;
         ++tok2;
         int include_name_len = 0;
-        while (tok2->kind != PpTokenKind_eof) {
-            if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, ">")) {
+        while (tok2->kind != TokenKind_eof) {
+            if (tok2->kind == TokenKind_gt) {
                 break;
             }
             include_name_len += tok2->raw.len;
             ++tok2;
         }
-        if (tok2->kind == PpTokenKind_eof) {
+        if (tok2->kind == TokenKind_eof) {
             fatal_error("invalid #include: <> not balanced");
         }
         ++tok2;
@@ -616,8 +823,8 @@ const char* resolve_include_name(Preprocessor* pp, String* include_name) {
     }
 }
 
-PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_end, int n_source_tokens,
-                           PpToken* source_tokens) {
+Token* replace_pp_tokens(Preprocessor* pp, Token* dest_start, Token* dest_end, int n_source_tokens,
+                         Token* source_tokens) {
     int n_tokens_to_remove = dest_end - dest_start;
     int n_tokens_after_dest = (pp->pp_tokens + pp->n_pp_tokens) - dest_end;
     int shift_amount;
@@ -625,35 +832,35 @@ PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_
     if (n_tokens_to_remove < n_source_tokens) {
         // Move existing tokens backward to make room.
         shift_amount = n_source_tokens - n_tokens_to_remove;
-        memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(PpToken));
+        memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(Token));
         pp->n_pp_tokens += shift_amount;
     } else if (n_source_tokens < n_tokens_to_remove) {
         // Move existing tokens forward to reduce room.
         shift_amount = n_tokens_to_remove - n_source_tokens;
-        memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(PpToken));
+        memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(Token));
         pp->n_pp_tokens -= shift_amount;
-        memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(PpToken));
+        memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(Token));
     }
 
-    memcpy(dest_start, source_tokens, n_source_tokens * sizeof(PpToken));
+    memcpy(dest_start, source_tokens, n_source_tokens * sizeof(Token));
 
     return dest_start + n_source_tokens;
 }
 
-PpToken* expand_include_directive(Preprocessor* pp, PpToken* tok, PpToken* tok2, const char* include_name_buf) {
+Token* expand_include_directive(Preprocessor* pp, Token* tok, Token* tok2, const char* include_name_buf) {
     InFile* include_source = read_all(include_name_buf);
     if (!include_source) {
         fatal_error("cannot open include file: %s", include_name_buf);
     }
 
-    PpToken* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_macros);
+    Token* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->pp_macros);
     return replace_pp_tokens(pp, tok, tok2 + 1, count_pp_tokens(include_pp_tokens), include_pp_tokens);
 }
 
-PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) {
-    PpToken* tok2 = skip_whitespace(tok + 1);
+Token* process_include_directive(Preprocessor* pp, Token* tok) {
+    Token* tok2 = skip_whitespace(tok + 1);
 
-    if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "include")) {
+    if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "include")) {
         ++tok2;
         tok2 = skip_whitespace(tok2);
         String* include_name = calloc(1, sizeof(String));
@@ -667,20 +874,20 @@ PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) {
     return NULL;
 }
 
-PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) {
-    PpToken* tok2 = skip_whitespace(tok + 1);
-    PpToken* tok3 = NULL;
+Token* process_define_directive(Preprocessor* pp, Token* tok) {
+    Token* tok2 = skip_whitespace(tok + 1);
+    Token* tok3 = NULL;
     PpMacro* pp_macro;
     int i;
-    if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "define")) {
+    if (tok2->kind == TokenKind_ident && string_equals_cstr(&tok2->raw, "define")) {
         ++tok2;
         tok2 = skip_whitespace(tok2);
-        if (tok2->kind == PpTokenKind_identifier) {
-            PpToken* macro_name = tok2;
+        if (tok2->kind == TokenKind_ident) {
+            Token* macro_name = tok2;
             ++tok2;
-            if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, "(")) {
+            if (tok2->kind == TokenKind_paren_l) {
                 ++tok2;
-                if (tok2->kind == PpTokenKind_punctuator && string_equals_cstr(&tok2->raw, ")")) {
+                if (tok2->kind == TokenKind_paren_r) {
                     ++tok2;
                 } else {
                     fatal_error("%s:%d: invalid function-like macro syntax (#define %.*s)", macro_name->loc.filename,
@@ -692,7 +899,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) {
                     pp_macro->kind = PpMacroKind_func;
                     pp_macro->name = macro_name->raw;
                     pp_macro->n_replacements = tok3 - tok2;
-                    pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken));
+                    pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(Token));
                     for (i = 0; i < pp_macro->n_replacements; ++i) {
                         pp_macro->replacements[i] = tok2[i];
                     }
@@ -705,7 +912,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) {
                     pp_macro->kind = PpMacroKind_obj;
                     pp_macro->name = macro_name->raw;
                     pp_macro->n_replacements = tok3 - tok2;
-                    pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken));
+                    pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(Token));
                     for (i = 0; i < pp_macro->n_replacements; ++i) {
                         pp_macro->replacements[i] = tok2[i];
                     }
@@ -721,7 +928,7 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) {
     return NULL;
 }
 
-int expand_macro(Preprocessor* pp, PpToken* tok) {
+int expand_macro(Preprocessor* pp, Token* tok) {
     int pp_macro_idx = find_pp_macro(pp, &tok->raw);
     if (pp_macro_idx == -1) {
         return 0;
@@ -744,15 +951,15 @@ int expand_macro(Preprocessor* pp, PpToken* tok) {
             tok[i].loc = original_loc;
         }
     } else if (pp_macro->kind == PpMacroKind_builtin_file) {
-        PpToken* file_tok = calloc(1, sizeof(PpToken));
-        file_tok->kind = PpTokenKind_string_literal;
+        Token* file_tok = calloc(1, sizeof(Token));
+        file_tok->kind = TokenKind_literal_str;
         file_tok->raw.len = strlen(tok->loc.filename) + 2;
         file_tok->raw.data = calloc(file_tok->raw.len, sizeof(char));
         sprintf(file_tok->raw.data, "\"%s\"", tok->loc.filename);
         replace_pp_tokens(pp, tok, tok + 1, 1, file_tok);
     } else if (pp_macro->kind == PpMacroKind_builtin_line) {
-        PpToken* line_tok = calloc(1, sizeof(PpToken));
-        line_tok->kind = PpTokenKind_pp_number;
+        Token* line_tok = calloc(1, sizeof(Token));
+        line_tok->kind = TokenKind_literal_int;
         line_tok->raw.data = calloc(10, sizeof(char));
         sprintf(line_tok->raw.data, "%d", tok->loc.line);
         line_tok->raw.len = strlen(line_tok->raw.data);
@@ -764,11 +971,11 @@ int expand_macro(Preprocessor* pp, PpToken* tok) {
 }
 
 void process_pp_directives(Preprocessor* pp) {
-    PpToken* tok = pp->pp_tokens;
+    Token* tok = pp->pp_tokens;
 
-    while (tok->kind != PpTokenKind_eof) {
-        if (tok->kind == PpTokenKind_punctuator && string_equals_cstr(&tok->raw, "#")) {
-            PpToken* next_tok;
+    while (tok->kind != TokenKind_eof) {
+        if (tok->kind == TokenKind_hash) {
+            Token* next_tok;
 
             if ((next_tok = process_endif_directive(pp, tok)) != NULL) {
                 tok = next_tok;
@@ -797,7 +1004,7 @@ void process_pp_directives(Preprocessor* pp) {
             }
         } else if (skip_pp_tokens(pp)) {
             make_token_whitespace(tok);
-        } else if (tok->kind == PpTokenKind_identifier) {
+        } else if (tok->kind == TokenKind_ident) {
             int expanded = expand_macro(pp, tok);
             if (expanded) {
                 // A macro may expand to another macro. Re-scan the expanded tokens.
@@ -809,12 +1016,12 @@ void process_pp_directives(Preprocessor* pp) {
     }
 }
 
-void pp_dump(PpToken* t, int include_whitespace) {
-    for (; t->kind != PpTokenKind_eof; ++t) {
-        if (t->kind == PpTokenKind_whitespace && !include_whitespace) {
+void pp_dump(Token* t, int include_whitespace) {
+    for (; t->kind != TokenKind_eof; ++t) {
+        if (t->kind == TokenKind_whitespace && !include_whitespace) {
             continue;
         }
-        fprintf(stderr, "%s\n", pp_token_stringify(t));
+        fprintf(stderr, "%s\n", token_stringify(t));
     }
 }
 
@@ -825,7 +1032,7 @@ char* get_ducc_include_path() {
     return buf;
 }
 
-PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) {
+Token* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) {
     Preprocessor* pp = preprocessor_new(src, depth, pp_macros);
     add_include_path(pp, get_ducc_include_path());
     add_include_path(pp, "/usr/include/x86_64-linux-gnu");
@@ -835,7 +1042,7 @@ PpToken* do_preprocess(InFile* src, int depth, PpMacros* pp_macros) {
     return pp->pp_tokens;
 }
 
-PpToken* preprocess(InFile* src) {
+Token* preprocess(InFile* src) {
     PpMacros* pp_macros = pp_macros_new();
     add_predefined_macros(pp_macros);
     return do_preprocess(src, 0, pp_macros);
diff --git a/tokenize.c b/tokenize.c
index 9bc14d6..ff66525 100644
--- a/tokenize.c
+++ b/tokenize.c
@@ -1,210 +1,12 @@
-enum TokenKind {
-    TokenKind_eof,
-
-    TokenKind_and,
-    TokenKind_andand,
-    TokenKind_arrow,
-    TokenKind_assign,
-    TokenKind_assign_add,
-    TokenKind_assign_sub,
-    TokenKind_brace_l,
-    TokenKind_brace_r,
-    TokenKind_bracket_l,
-    TokenKind_bracket_r,
-    TokenKind_comma,
-    TokenKind_dot,
-    TokenKind_ellipsis,
-    TokenKind_eq,
-    TokenKind_ge,
-    TokenKind_gt,
-    TokenKind_ident,
-    TokenKind_keyword_break,
-    TokenKind_keyword_char,
-    TokenKind_keyword_const,
-    TokenKind_keyword_continue,
-    TokenKind_keyword_do,
-    TokenKind_keyword_else,
-    TokenKind_keyword_enum,
-    TokenKind_keyword_extern,
-    TokenKind_keyword_for,
-    TokenKind_keyword_if,
-    TokenKind_keyword_int,
-    TokenKind_keyword_long,
-    TokenKind_keyword_return,
-    TokenKind_keyword_short,
-    TokenKind_keyword_sizeof,
-    TokenKind_keyword_struct,
-    TokenKind_keyword_typeof,
-    TokenKind_keyword_void,
-    TokenKind_keyword_while,
-    TokenKind_le,
-    TokenKind_lt,
-    TokenKind_literal_int,
-    TokenKind_literal_str,
-    TokenKind_minus,
-    TokenKind_minusminus,
-    TokenKind_ne,
-    TokenKind_not,
-    TokenKind_or,
-    TokenKind_oror,
-    TokenKind_paren_l,
-    TokenKind_paren_r,
-    TokenKind_percent,
-    TokenKind_plus,
-    TokenKind_plusplus,
-    TokenKind_semicolon,
-    TokenKind_slash,
-    TokenKind_star,
-
-    // va_start() is currently implemented as a special form due to the current limitation of #define macro.
-    TokenKind_va_start,
-};
-typedef enum TokenKind TokenKind;
-
-struct Token {
-    TokenKind kind;
-    String raw;
-};
-typedef struct Token Token;
-
-const char* token_kind_stringify(TokenKind k) {
-    if (k == TokenKind_eof)
-        return "<eof>";
-    else if (k == TokenKind_and)
-        return "&";
-    else if (k == TokenKind_andand)
-        return "&&";
-    else if (k == TokenKind_arrow)
-        return "->";
-    else if (k == TokenKind_assign)
-        return "=";
-    else if (k == TokenKind_assign_add)
-        return "+=";
-    else if (k == TokenKind_assign_sub)
-        return "-=";
-    else if (k == TokenKind_brace_l)
-        return "{";
-    else if (k == TokenKind_brace_r)
-        return "}";
-    else if (k == TokenKind_bracket_l)
-        return "[";
-    else if (k == TokenKind_bracket_r)
-        return "]";
-    else if (k == TokenKind_comma)
-        return ",";
-    else if (k == TokenKind_dot)
-        return ".";
-    else if (k == TokenKind_ellipsis)
-        return "...";
-    else if (k == TokenKind_eq)
-        return "==";
-    else if (k == TokenKind_ge)
-        return ">=";
-    else if (k == TokenKind_gt)
-        return ">";
-    else if (k == TokenKind_ident)
-        return "<identifier>";
-    else if (k == TokenKind_keyword_break)
-        return "break";
-    else if (k == TokenKind_keyword_char)
-        return "char";
-    else if (k == TokenKind_keyword_const)
-        return "const";
-    else if (k == TokenKind_keyword_continue)
-        return "continue";
-    else if (k == TokenKind_keyword_do)
-        return "do";
-    else if (k == TokenKind_keyword_else)
-        return "else";
-    else if (k == TokenKind_keyword_enum)
-        return "enum";
-    else if (k == TokenKind_keyword_extern)
-        return "extern";
-    else if (k == TokenKind_keyword_for)
-        return "for";
-    else if (k == TokenKind_keyword_if)
-        return "if";
-    else if (k == TokenKind_keyword_int)
-        return "int";
-    else if (k == TokenKind_keyword_long)
-        return "long";
-    else if (k == TokenKind_keyword_return)
-        return "return";
-    else if (k == TokenKind_keyword_short)
-        return "short";
-    else if (k == TokenKind_keyword_sizeof)
-        return "sizeof";
-    else if (k == TokenKind_keyword_struct)
-        return "struct";
-    else if (k == TokenKind_keyword_typeof)
-        return "typeof";
-    else if (k == TokenKind_keyword_void)
-        return "void";
-    else if (k == TokenKind_keyword_while)
-        return "while";
-    else if (k == TokenKind_le)
-        return "le";
-    else if (k == TokenKind_lt)
-        return "lt";
-    else if (k == TokenKind_literal_int)
-        return "<integer>";
-    else if (k == TokenKind_literal_str)
-        return "<string>";
-    else if (k == TokenKind_minus)
-        return "-";
-    else if (k == TokenKind_minusminus)
-        return "--";
-    else if (k == TokenKind_ne)
-        return "!=";
-    else if (k == TokenKind_not)
-        return "!";
-    else if (k == TokenKind_or)
-        return "|";
-    else if (k == TokenKind_oror)
-        return "||";
-    else if (k == TokenKind_paren_l)
-        return "(";
-    else if (k == TokenKind_paren_r)
-        return ")";
-    else if (k == TokenKind_percent)
-        return "%";
-    else if (k == TokenKind_plus)
-        return "+";
-    else if (k == TokenKind_plusplus)
-        return "++";
-    else if (k == TokenKind_semicolon)
-        return ";";
-    else if (k == TokenKind_slash)
-        return "/";
-    else if (k == TokenKind_star)
-        return "*";
-    else if (k == TokenKind_va_start)
-        return "va_start";
-    else
-        unreachable();
-}
-
-const char* token_stringify(Token* t) {
-    TokenKind k = t->kind;
-    if (k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) {
-        const char* kind_str = token_kind_stringify(k);
-        char* buf = calloc(t->raw.len + strlen(kind_str) + 3 + 1, sizeof(char));
-        sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, kind_str);
-        return buf;
-    } else {
-        return token_kind_stringify(k);
-    }
-}
-
 struct Lexer {
-    PpToken* src;
+    Token* src;
     int pos;
     Token* tokens;
     int n_tokens;
 };
 typedef struct Lexer Lexer;
 
-Lexer* lexer_new(PpToken* pp_tokens) {
+Lexer* lexer_new(Token* pp_tokens) {
     Lexer* l = calloc(1, sizeof(Lexer));
     l->src = pp_tokens;
     l->tokens = calloc(1024 * 1024, sizeof(Token));
@@ -214,62 +16,12 @@ Lexer* lexer_new(PpToken* pp_tokens) {
 void tokenize_all(Lexer* l) {
     int ch;
     int start;
-    while (l->src[l->pos].kind != PpTokenKind_eof) {
-        PpToken* pp_tok = l->src + l->pos;
+    while (l->src[l->pos].kind != TokenKind_eof) {
+        Token* pp_tok = l->src + l->pos;
         Token* tok = l->tokens + l->n_tokens;
-        PpTokenKind k = pp_tok->kind;
+        TokenKind k = pp_tok->kind;
         ++l->pos;
-        if (k == PpTokenKind_header_name) {
-            unimplemented();
-        } else if (k == PpTokenKind_identifier) {
-            if (string_equals_cstr(&pp_tok->raw, "break")) {
-                tok->kind = TokenKind_keyword_break;
-            } else if (string_equals_cstr(&pp_tok->raw, "char")) {
-                tok->kind = TokenKind_keyword_char;
-            } else if (string_equals_cstr(&pp_tok->raw, "const")) {
-                tok->kind = TokenKind_keyword_const;
-            } else if (string_equals_cstr(&pp_tok->raw, "continue")) {
-                tok->kind = TokenKind_keyword_continue;
-            } else if (string_equals_cstr(&pp_tok->raw, "do")) {
-                tok->kind = TokenKind_keyword_do;
-            } else if (string_equals_cstr(&pp_tok->raw, "else")) {
-                tok->kind = TokenKind_keyword_else;
-            } else if (string_equals_cstr(&pp_tok->raw, "enum")) {
-                tok->kind = TokenKind_keyword_enum;
-            } else if (string_equals_cstr(&pp_tok->raw, "extern")) {
-                tok->kind = TokenKind_keyword_extern;
-            } else if (string_equals_cstr(&pp_tok->raw, "for")) {
-                tok->kind = TokenKind_keyword_for;
-            } else if (string_equals_cstr(&pp_tok->raw, "if")) {
-                tok->kind = TokenKind_keyword_if;
-            } else if (string_equals_cstr(&pp_tok->raw, "int")) {
-                tok->kind = TokenKind_keyword_int;
-            } else if (string_equals_cstr(&pp_tok->raw, "long")) {
-                tok->kind = TokenKind_keyword_long;
-            } else if (string_equals_cstr(&pp_tok->raw, "return")) {
-                tok->kind = TokenKind_keyword_return;
-            } else if (string_equals_cstr(&pp_tok->raw, "short")) {
-                tok->kind = TokenKind_keyword_short;
-            } else if (string_equals_cstr(&pp_tok->raw, "sizeof")) {
-                tok->kind = TokenKind_keyword_sizeof;
-            } else if (string_equals_cstr(&pp_tok->raw, "struct")) {
-                tok->kind = TokenKind_keyword_struct;
-            } else if (string_equals_cstr(&pp_tok->raw, "typedef")) {
-                tok->kind = TokenKind_keyword_typeof;
-            } else if (string_equals_cstr(&pp_tok->raw, "void")) {
-                tok->kind = TokenKind_keyword_void;
-            } else if (string_equals_cstr(&pp_tok->raw, "while")) {
-                tok->kind = TokenKind_keyword_while;
-            } else if (string_equals_cstr(&pp_tok->raw, "va_start")) {
-                tok->kind = TokenKind_va_start;
-            } else {
-                tok->kind = TokenKind_ident;
-            }
-            tok->raw = pp_tok->raw;
-        } else if (k == PpTokenKind_pp_number) {
-            tok->kind = TokenKind_literal_int;
-            tok->raw = pp_tok->raw;
-        } else if (k == PpTokenKind_character_constant) {
+        if (k == TokenKind_character_constant) {
             tok->kind = TokenKind_literal_int;
             ch = pp_tok->raw.data[1];
             if (ch == '\\') {
@@ -296,87 +48,23 @@ void tokenize_all(Lexer* l) {
             sprintf(buf, "%d", ch);
             tok->raw.data = buf;
             tok->raw.len = strlen(buf);
-        } else if (k == PpTokenKind_string_literal) {
+        } else if (k == TokenKind_literal_str) {
             tok->kind = TokenKind_literal_str;
             tok->raw.data = pp_tok->raw.data + 1;
             tok->raw.len = pp_tok->raw.len - 2;
-        } else if (k == PpTokenKind_punctuator || k == PpTokenKind_other) {
-            if (string_equals_cstr(&pp_tok->raw, "(")) {
-                tok->kind = TokenKind_paren_l;
-            } else if (string_equals_cstr(&pp_tok->raw, ")")) {
-                tok->kind = TokenKind_paren_r;
-            } else if (string_equals_cstr(&pp_tok->raw, "{")) {
-                tok->kind = TokenKind_brace_l;
-            } else if (string_equals_cstr(&pp_tok->raw, "}")) {
-                tok->kind = TokenKind_brace_r;
-            } else if (string_equals_cstr(&pp_tok->raw, "[")) {
-                tok->kind = TokenKind_bracket_l;
-            } else if (string_equals_cstr(&pp_tok->raw, "]")) {
-                tok->kind = TokenKind_bracket_r;
-            } else if (string_equals_cstr(&pp_tok->raw, ",")) {
-                tok->kind = TokenKind_comma;
-            } else if (string_equals_cstr(&pp_tok->raw, ";")) {
-                tok->kind = TokenKind_semicolon;
-            } else if (string_equals_cstr(&pp_tok->raw, "+=")) {
-                tok->kind = TokenKind_assign_add;
-            } else if (string_equals_cstr(&pp_tok->raw, "++")) {
-                tok->kind = TokenKind_plusplus;
-            } else if (string_equals_cstr(&pp_tok->raw, "+")) {
-                tok->kind = TokenKind_plus;
-            } else if (string_equals_cstr(&pp_tok->raw, "||")) {
-                tok->kind = TokenKind_oror;
-            } else if (string_equals_cstr(&pp_tok->raw, "|")) {
-                tok->kind = TokenKind_or;
-            } else if (string_equals_cstr(&pp_tok->raw, "&&")) {
-                tok->kind = TokenKind_andand;
-            } else if (string_equals_cstr(&pp_tok->raw, "&")) {
-                tok->kind = TokenKind_and;
-            } else if (string_equals_cstr(&pp_tok->raw, "->")) {
-                tok->kind = TokenKind_arrow;
-            } else if (string_equals_cstr(&pp_tok->raw, "-=")) {
-                tok->kind = TokenKind_assign_sub;
-            } else if (string_equals_cstr(&pp_tok->raw, "--")) {
-                tok->kind = TokenKind_minusminus;
-            } else if (string_equals_cstr(&pp_tok->raw, "-")) {
-                tok->kind = TokenKind_minus;
-            } else if (string_equals_cstr(&pp_tok->raw, "*")) {
-                tok->kind = TokenKind_star;
-            } else if (string_equals_cstr(&pp_tok->raw, "/")) {
-                tok->kind = TokenKind_slash;
-            } else if (string_equals_cstr(&pp_tok->raw, "%")) {
-                tok->kind = TokenKind_percent;
-            } else if (string_equals_cstr(&pp_tok->raw, "...")) {
-                tok->kind = TokenKind_ellipsis;
-            } else if (string_equals_cstr(&pp_tok->raw, ".")) {
-                tok->kind = TokenKind_dot;
-            } else if (string_equals_cstr(&pp_tok->raw, "!=")) {
-                tok->kind = TokenKind_ne;
-            } else if (string_equals_cstr(&pp_tok->raw, "!")) {
-                tok->kind = TokenKind_not;
-            } else if (string_equals_cstr(&pp_tok->raw, "==")) {
-                tok->kind = TokenKind_eq;
-            } else if (string_equals_cstr(&pp_tok->raw, "=")) {
-                tok->kind = TokenKind_assign;
-            } else if (string_equals_cstr(&pp_tok->raw, "<=")) {
-                tok->kind = TokenKind_le;
-            } else if (string_equals_cstr(&pp_tok->raw, "<")) {
-                tok->kind = TokenKind_lt;
-            } else if (string_equals_cstr(&pp_tok->raw, ">=")) {
-                tok->kind = TokenKind_ge;
-            } else if (string_equals_cstr(&pp_tok->raw, ">")) {
-                tok->kind = TokenKind_gt;
-            } else {
-                fatal_error("unknown token: %.*s", pp_tok->raw.len, pp_tok->raw.data);
-            }
-            tok->raw = pp_tok->raw;
-        } else if (k == PpTokenKind_whitespace) {
+        } else if (k == TokenKind_other) {
+            unreachable();
+        } else if (k == TokenKind_whitespace) {
             continue;
+        } else {
+            tok->kind = pp_tok->kind;
+            tok->raw = pp_tok->raw;
         }
         ++l->n_tokens;
     }
 }
 
-Token* tokenize(PpToken* pp_tokens) {
+Token* tokenize(Token* pp_tokens) {
     Lexer* l = lexer_new(pp_tokens);
     tokenize_all(l);
     return l->tokens;
author	nsfisis <nsfisis@gmail.com>	2025-08-03 14:02:54 +0900
committer	nsfisis <nsfisis@gmail.com>	2025-08-15 10:06:21 +0900
commit	1364b1303e96221c52568aed0726adc83aee1dc4 (patch)
tree	d4569d48d543e83593fdd4a8fcfc919a65eb47cb
parent	e1de8fc36f11ac932707c7113eb4bf3ebc4b1f74 (diff)
download	ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.gz ducc-1364b1303e96221c52568aed0726adc83aee1dc4.tar.zst ducc-1364b1303e96221c52568aed0726adc83aee1dc4.zip