diff options
| -rw-r--r-- | Makefile | 1 | ||||
| -rw-r--r-- | src/preprocess.c | 311 | ||||
| -rw-r--r-- | src/preprocess.h | 171 | ||||
| -rw-r--r-- | src/token.c | 313 | ||||
| -rw-r--r-- | src/token.h | 176 |
5 files changed, 491 insertions, 481 deletions
@@ -12,6 +12,7 @@ OBJECTS := \ $(BUILD_DIR)/parse.o \ $(BUILD_DIR)/preprocess.o \ $(BUILD_DIR)/sys.o \ + $(BUILD_DIR)/token.o \ $(BUILD_DIR)/tokenize.o .PHONY: all diff --git a/src/preprocess.c b/src/preprocess.c index 85f7320..c96c194 100644 --- a/src/preprocess.c +++ b/src/preprocess.c @@ -3,317 +3,6 @@ #include "parse.h" #include "sys.h" -const char* token_kind_stringify(TokenKind k) { - if (k == TokenKind_eof) - return "<eof>"; - else if (k == TokenKind_hash) - return "#"; - else if (k == TokenKind_hashhash) - return "##"; - else if (k == TokenKind_whitespace) - return "<whitespace>"; - else if (k == TokenKind_newline) - return "<new-line>"; - else if (k == TokenKind_other) - return "<other>"; - else if (k == TokenKind_character_constant) - return "<character-constant>"; - else if (k == TokenKind_header_name) - return "<header-name>"; - else if (k == TokenKind_pp_directive_define) - return "#define"; - else if (k == TokenKind_pp_directive_elif) - return "#elif"; - else if (k == TokenKind_pp_directive_elifdef) - return "#elifdef"; - else if (k == TokenKind_pp_directive_elifndef) - return "#elifndef"; - else if (k == TokenKind_pp_directive_else) - return "#else"; - else if (k == TokenKind_pp_directive_embed) - return "#embed"; - else if (k == TokenKind_pp_directive_endif) - return "#endif"; - else if (k == TokenKind_pp_directive_error) - return "#error"; - else if (k == TokenKind_pp_directive_if) - return "#if"; - else if (k == TokenKind_pp_directive_ifdef) - return "#ifdef"; - else if (k == TokenKind_pp_directive_ifndef) - return "#ifndef"; - else if (k == TokenKind_pp_directive_include) - return "#include"; - else if (k == TokenKind_pp_directive_line) - return "#line"; - else if (k == TokenKind_pp_directive_pragma) - return "#pragma"; - else if (k == TokenKind_pp_directive_undef) - return "#undef"; - else if (k == TokenKind_pp_directive_warning) - return "#warning"; - else if (k == TokenKind_pp_operator_defined) - return "defined"; - else if (k == TokenKind_pp_operator___has_c_attribute) - return "__has_c_attribute"; - else if (k == TokenKind_pp_operator___has_embed) - return "__has_embed"; - else if (k == TokenKind_pp_operator___has_include) - return "__has_include"; - else if (k == TokenKind_keyword_alignas) - return "alignas"; - else if (k == TokenKind_keyword_alignof) - return "alignof"; - else if (k == TokenKind_keyword_auto) - return "auto"; - else if (k == TokenKind_keyword_bool) - return "bool"; - else if (k == TokenKind_keyword_break) - return "break"; - else if (k == TokenKind_keyword_case) - return "case"; - else if (k == TokenKind_keyword_char) - return "char"; - else if (k == TokenKind_keyword_const) - return "const"; - else if (k == TokenKind_keyword_constexpr) - return "constexpr"; - else if (k == TokenKind_keyword_continue) - return "continue"; - else if (k == TokenKind_keyword_default) - return "default"; - else if (k == TokenKind_keyword_do) - return "do"; - else if (k == TokenKind_keyword_double) - return "double"; - else if (k == TokenKind_keyword_else) - return "else"; - else if (k == TokenKind_keyword_enum) - return "enum"; - else if (k == TokenKind_keyword_extern) - return "extern"; - else if (k == TokenKind_keyword_false) - return "false"; - else if (k == TokenKind_keyword_float) - return "float"; - else if (k == TokenKind_keyword_for) - return "for"; - else if (k == TokenKind_keyword_goto) - return "goto"; - else if (k == TokenKind_keyword_if) - return "if"; - else if (k == TokenKind_keyword_inline) - return "inline"; - else if (k == TokenKind_keyword_int) - return "int"; - else if (k == TokenKind_keyword_long) - return "long"; - else if (k == TokenKind_keyword_nullptr) - return "nullptr"; - else if (k == TokenKind_keyword_register) - return "register"; - else if (k == TokenKind_keyword_restrict) - return "restrict"; - else if (k == TokenKind_keyword_return) - return "return"; - else if (k == TokenKind_keyword_short) - return "short"; - else if (k == TokenKind_keyword_signed) - return "signed"; - else if (k == TokenKind_keyword_sizeof) - return "sizeof"; - else if (k == TokenKind_keyword_static) - return "static"; - else if (k == TokenKind_keyword_static_assert) - return "static_assert"; - else if (k == TokenKind_keyword_struct) - return "struct"; - else if (k == TokenKind_keyword_switch) - return "switch"; - else if (k == TokenKind_keyword_thread_local) - return "thread_local"; - else if (k == TokenKind_keyword_true) - return "true"; - else if (k == TokenKind_keyword_typedef) - return "typedef"; - else if (k == TokenKind_keyword_typeof) - return "typeof"; - else if (k == TokenKind_keyword_typeof_unqual) - return "typeof_unqual"; - else if (k == TokenKind_keyword_union) - return "union"; - else if (k == TokenKind_keyword_unsigned) - return "unsigned"; - else if (k == TokenKind_keyword_void) - return "void"; - else if (k == TokenKind_keyword_volatile) - return "volatile"; - else if (k == TokenKind_keyword_while) - return "while"; - else if (k == TokenKind_keyword__Atomic) - return "_Atomic"; - else if (k == TokenKind_keyword__BitInt) - return "_BitInt"; - else if (k == TokenKind_keyword__Complex) - return "_Complex"; - else if (k == TokenKind_keyword__Decimal128) - return "_Decimal128"; - else if (k == TokenKind_keyword__Decimal32) - return "_Decimal32"; - else if (k == TokenKind_keyword__Decimal64) - return "_Decimal64"; - else if (k == TokenKind_keyword__Generic) - return "_Generic"; - else if (k == TokenKind_keyword__Imaginary) - return "_Imaginary"; - else if (k == TokenKind_keyword__Noreturn) - return "_Noreturn"; - else if (k == TokenKind_and) - return "&"; - else if (k == TokenKind_andand) - return "&&"; - else if (k == TokenKind_arrow) - return "->"; - else if (k == TokenKind_assign) - return "="; - else if (k == TokenKind_assign_add) - return "+="; - else if (k == TokenKind_assign_and) - return "&="; - else if (k == TokenKind_assign_div) - return "/="; - else if (k == TokenKind_assign_lshift) - return "<<="; - else if (k == TokenKind_assign_mod) - return "%="; - else if (k == TokenKind_assign_mul) - return "*="; - else if (k == TokenKind_assign_or) - return "|="; - else if (k == TokenKind_assign_rshift) - return ">>="; - else if (k == TokenKind_assign_sub) - return "-="; - else if (k == TokenKind_assign_xor) - return "^="; - else if (k == TokenKind_brace_l) - return "{"; - else if (k == TokenKind_brace_r) - return "}"; - else if (k == TokenKind_bracket_l) - return "["; - else if (k == TokenKind_bracket_r) - return "]"; - else if (k == TokenKind_colon) - return ":"; - else if (k == TokenKind_comma) - return ","; - else if (k == TokenKind_dot) - return "."; - else if (k == TokenKind_ellipsis) - return "..."; - else if (k == TokenKind_eq) - return "=="; - else if (k == TokenKind_ge) - return ">="; - else if (k == TokenKind_gt) - return ">"; - else if (k == TokenKind_ident) - return "<identifier>"; - else if (k == TokenKind_le) - return "le"; - else if (k == TokenKind_literal_int) - return "<integer>"; - else if (k == TokenKind_literal_str) - return "<string>"; - else if (k == TokenKind_lshift) - return "<<"; - else if (k == TokenKind_lt) - return "lt"; - else if (k == TokenKind_minus) - return "-"; - else if (k == TokenKind_minusminus) - return "--"; - else if (k == TokenKind_ne) - return "!="; - else if (k == TokenKind_not) - return "!"; - else if (k == TokenKind_or) - return "|"; - else if (k == TokenKind_oror) - return "||"; - else if (k == TokenKind_paren_l) - return "("; - else if (k == TokenKind_paren_r) - return ")"; - else if (k == TokenKind_percent) - return "%"; - else if (k == TokenKind_plus) - return "+"; - else if (k == TokenKind_plusplus) - return "++"; - else if (k == TokenKind_question) - return "?"; - else if (k == TokenKind_rshift) - return ">>"; - else if (k == TokenKind_semicolon) - return ";"; - else if (k == TokenKind_slash) - return "/"; - else if (k == TokenKind_star) - return "*"; - else if (k == TokenKind_tilde) - return "~"; - else if (k == TokenKind_xor) - return "^"; - else - unreachable(); -} - -const char* token_stringify(Token* t) { - TokenKind k = t->kind; - if (k == TokenKind_literal_int) { - const char* kind_str = token_kind_stringify(k); - char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%d (%s)", t->value.integer, kind_str); - return buf; - } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || - k == TokenKind_literal_int || k == TokenKind_literal_str) { - const char* kind_str = token_kind_stringify(k); - char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char)); - sprintf(buf, "%s (%s)", t->value.string, kind_str); - return buf; - } else { - return token_kind_stringify(k); - } -} - -void tokens_init(TokenArray* tokens, size_t capacity) { - tokens->len = 0; - tokens->capacity = capacity; - tokens->data = calloc(tokens->capacity, sizeof(Token)); -} - -void tokens_reserve(TokenArray* tokens, size_t size) { - if (size <= tokens->capacity) - return; - while (tokens->capacity < size) { - tokens->capacity *= 2; - } - tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); - memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); -} - -Token* tokens_push_new(TokenArray* tokens) { - tokens_reserve(tokens, tokens->len + 1); - return &tokens->data[tokens->len++]; -} - -Token* tokens_pop(TokenArray* tokens) { - if (tokens->len != 0) - tokens->len--; -} - enum MacroKind { MacroKind_undef, MacroKind_obj, diff --git a/src/preprocess.h b/src/preprocess.h index 8c5ade2..b43ec4c 100644 --- a/src/preprocess.h +++ b/src/preprocess.h @@ -2,176 +2,7 @@ #define DUCC_PREPROCESS_H #include "io.h" - -enum TokenKind { - TokenKind_eof, - - // Only preprocessing phase. - TokenKind_hash, - TokenKind_hashhash, - TokenKind_whitespace, - TokenKind_newline, - TokenKind_other, - TokenKind_character_constant, - TokenKind_header_name, - TokenKind_pp_directive_define, - TokenKind_pp_directive_elif, - TokenKind_pp_directive_elifdef, - TokenKind_pp_directive_elifndef, - TokenKind_pp_directive_else, - TokenKind_pp_directive_embed, - TokenKind_pp_directive_endif, - TokenKind_pp_directive_error, - TokenKind_pp_directive_if, - TokenKind_pp_directive_ifdef, - TokenKind_pp_directive_ifndef, - TokenKind_pp_directive_include, - TokenKind_pp_directive_line, - TokenKind_pp_directive_pragma, - TokenKind_pp_directive_undef, - TokenKind_pp_directive_warning, - TokenKind_pp_operator_defined, - TokenKind_pp_operator___has_c_attribute, - TokenKind_pp_operator___has_embed, - TokenKind_pp_operator___has_include, - - // C23: 6.4.1 - TokenKind_keyword_alignas, - TokenKind_keyword_alignof, - TokenKind_keyword_auto, - TokenKind_keyword_bool, - TokenKind_keyword_break, - TokenKind_keyword_case, - TokenKind_keyword_char, - TokenKind_keyword_const, - TokenKind_keyword_constexpr, - TokenKind_keyword_continue, - TokenKind_keyword_default, - TokenKind_keyword_do, - TokenKind_keyword_double, - TokenKind_keyword_else, - TokenKind_keyword_enum, - TokenKind_keyword_extern, - TokenKind_keyword_false, - TokenKind_keyword_float, - TokenKind_keyword_for, - TokenKind_keyword_goto, - TokenKind_keyword_if, - TokenKind_keyword_inline, - TokenKind_keyword_int, - TokenKind_keyword_long, - TokenKind_keyword_nullptr, - TokenKind_keyword_register, - TokenKind_keyword_restrict, - TokenKind_keyword_return, - TokenKind_keyword_short, - TokenKind_keyword_signed, - TokenKind_keyword_sizeof, - TokenKind_keyword_static, - TokenKind_keyword_static_assert, - TokenKind_keyword_struct, - TokenKind_keyword_switch, - TokenKind_keyword_thread_local, - TokenKind_keyword_true, - TokenKind_keyword_typedef, - TokenKind_keyword_typeof, - TokenKind_keyword_typeof_unqual, - TokenKind_keyword_union, - TokenKind_keyword_unsigned, - TokenKind_keyword_void, - TokenKind_keyword_volatile, - TokenKind_keyword_while, - TokenKind_keyword__Atomic, - TokenKind_keyword__BitInt, - TokenKind_keyword__Complex, - TokenKind_keyword__Decimal128, - TokenKind_keyword__Decimal32, - TokenKind_keyword__Decimal64, - TokenKind_keyword__Generic, - TokenKind_keyword__Imaginary, - TokenKind_keyword__Noreturn, - - TokenKind_and, - TokenKind_andand, - TokenKind_arrow, - TokenKind_assign, - TokenKind_assign_add, - TokenKind_assign_and, - TokenKind_assign_div, - TokenKind_assign_lshift, - TokenKind_assign_mod, - TokenKind_assign_mul, - TokenKind_assign_or, - TokenKind_assign_rshift, - TokenKind_assign_sub, - TokenKind_assign_xor, - TokenKind_brace_l, - TokenKind_brace_r, - TokenKind_bracket_l, - TokenKind_bracket_r, - TokenKind_colon, - TokenKind_comma, - TokenKind_dot, - TokenKind_ellipsis, - TokenKind_eq, - TokenKind_ge, - TokenKind_gt, - TokenKind_ident, - TokenKind_le, - TokenKind_literal_int, - TokenKind_literal_str, - TokenKind_lshift, - TokenKind_lt, - TokenKind_minus, - TokenKind_minusminus, - TokenKind_ne, - TokenKind_not, - TokenKind_or, - TokenKind_oror, - TokenKind_paren_l, - TokenKind_paren_r, - TokenKind_percent, - TokenKind_plus, - TokenKind_plusplus, - TokenKind_question, - TokenKind_rshift, - TokenKind_semicolon, - TokenKind_slash, - TokenKind_star, - TokenKind_tilde, - TokenKind_xor, -}; -typedef enum TokenKind TokenKind; - -const char* token_kind_stringify(TokenKind k); - -// TokenValue is externally tagged by Token's kind. -union TokenValue { - const char* string; - int integer; -}; -typedef union TokenValue TokenValue; - -struct Token { - TokenKind kind; - TokenValue value; - SourceLocation loc; -}; -typedef struct Token Token; - -const char* token_stringify(Token* t); - -struct TokenArray { - size_t len; - size_t capacity; - Token* data; -}; -typedef struct TokenArray TokenArray; - -void tokens_init(TokenArray* tokens, size_t capacity); -void tokens_reserve(TokenArray* tokens, size_t size); -Token* tokens_push_new(TokenArray* tokens); -Token* tokens_pop(TokenArray* tokens); +#include "token.h" TokenArray* preprocess(InFile* src); diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..3ca34d8 --- /dev/null +++ b/src/token.c @@ -0,0 +1,313 @@ +#include "token.h" +#include "common.h" + +const char* token_kind_stringify(TokenKind k) { + if (k == TokenKind_eof) + return "<eof>"; + else if (k == TokenKind_hash) + return "#"; + else if (k == TokenKind_hashhash) + return "##"; + else if (k == TokenKind_whitespace) + return "<whitespace>"; + else if (k == TokenKind_newline) + return "<new-line>"; + else if (k == TokenKind_other) + return "<other>"; + else if (k == TokenKind_character_constant) + return "<character-constant>"; + else if (k == TokenKind_header_name) + return "<header-name>"; + else if (k == TokenKind_pp_directive_define) + return "#define"; + else if (k == TokenKind_pp_directive_elif) + return "#elif"; + else if (k == TokenKind_pp_directive_elifdef) + return "#elifdef"; + else if (k == TokenKind_pp_directive_elifndef) + return "#elifndef"; + else if (k == TokenKind_pp_directive_else) + return "#else"; + else if (k == TokenKind_pp_directive_embed) + return "#embed"; + else if (k == TokenKind_pp_directive_endif) + return "#endif"; + else if (k == TokenKind_pp_directive_error) + return "#error"; + else if (k == TokenKind_pp_directive_if) + return "#if"; + else if (k == TokenKind_pp_directive_ifdef) + return "#ifdef"; + else if (k == TokenKind_pp_directive_ifndef) + return "#ifndef"; + else if (k == TokenKind_pp_directive_include) + return "#include"; + else if (k == TokenKind_pp_directive_line) + return "#line"; + else if (k == TokenKind_pp_directive_pragma) + return "#pragma"; + else if (k == TokenKind_pp_directive_undef) + return "#undef"; + else if (k == TokenKind_pp_directive_warning) + return "#warning"; + else if (k == TokenKind_pp_operator_defined) + return "defined"; + else if (k == TokenKind_pp_operator___has_c_attribute) + return "__has_c_attribute"; + else if (k == TokenKind_pp_operator___has_embed) + return "__has_embed"; + else if (k == TokenKind_pp_operator___has_include) + return "__has_include"; + else if (k == TokenKind_keyword_alignas) + return "alignas"; + else if (k == TokenKind_keyword_alignof) + return "alignof"; + else if (k == TokenKind_keyword_auto) + return "auto"; + else if (k == TokenKind_keyword_bool) + return "bool"; + else if (k == TokenKind_keyword_break) + return "break"; + else if (k == TokenKind_keyword_case) + return "case"; + else if (k == TokenKind_keyword_char) + return "char"; + else if (k == TokenKind_keyword_const) + return "const"; + else if (k == TokenKind_keyword_constexpr) + return "constexpr"; + else if (k == TokenKind_keyword_continue) + return "continue"; + else if (k == TokenKind_keyword_default) + return "default"; + else if (k == TokenKind_keyword_do) + return "do"; + else if (k == TokenKind_keyword_double) + return "double"; + else if (k == TokenKind_keyword_else) + return "else"; + else if (k == TokenKind_keyword_enum) + return "enum"; + else if (k == TokenKind_keyword_extern) + return "extern"; + else if (k == TokenKind_keyword_false) + return "false"; + else if (k == TokenKind_keyword_float) + return "float"; + else if (k == TokenKind_keyword_for) + return "for"; + else if (k == TokenKind_keyword_goto) + return "goto"; + else if (k == TokenKind_keyword_if) + return "if"; + else if (k == TokenKind_keyword_inline) + return "inline"; + else if (k == TokenKind_keyword_int) + return "int"; + else if (k == TokenKind_keyword_long) + return "long"; + else if (k == TokenKind_keyword_nullptr) + return "nullptr"; + else if (k == TokenKind_keyword_register) + return "register"; + else if (k == TokenKind_keyword_restrict) + return "restrict"; + else if (k == TokenKind_keyword_return) + return "return"; + else if (k == TokenKind_keyword_short) + return "short"; + else if (k == TokenKind_keyword_signed) + return "signed"; + else if (k == TokenKind_keyword_sizeof) + return "sizeof"; + else if (k == TokenKind_keyword_static) + return "static"; + else if (k == TokenKind_keyword_static_assert) + return "static_assert"; + else if (k == TokenKind_keyword_struct) + return "struct"; + else if (k == TokenKind_keyword_switch) + return "switch"; + else if (k == TokenKind_keyword_thread_local) + return "thread_local"; + else if (k == TokenKind_keyword_true) + return "true"; + else if (k == TokenKind_keyword_typedef) + return "typedef"; + else if (k == TokenKind_keyword_typeof) + return "typeof"; + else if (k == TokenKind_keyword_typeof_unqual) + return "typeof_unqual"; + else if (k == TokenKind_keyword_union) + return "union"; + else if (k == TokenKind_keyword_unsigned) + return "unsigned"; + else if (k == TokenKind_keyword_void) + return "void"; + else if (k == TokenKind_keyword_volatile) + return "volatile"; + else if (k == TokenKind_keyword_while) + return "while"; + else if (k == TokenKind_keyword__Atomic) + return "_Atomic"; + else if (k == TokenKind_keyword__BitInt) + return "_BitInt"; + else if (k == TokenKind_keyword__Complex) + return "_Complex"; + else if (k == TokenKind_keyword__Decimal128) + return "_Decimal128"; + else if (k == TokenKind_keyword__Decimal32) + return "_Decimal32"; + else if (k == TokenKind_keyword__Decimal64) + return "_Decimal64"; + else if (k == TokenKind_keyword__Generic) + return "_Generic"; + else if (k == TokenKind_keyword__Imaginary) + return "_Imaginary"; + else if (k == TokenKind_keyword__Noreturn) + return "_Noreturn"; + else if (k == TokenKind_and) + return "&"; + else if (k == TokenKind_andand) + return "&&"; + else if (k == TokenKind_arrow) + return "->"; + else if (k == TokenKind_assign) + return "="; + else if (k == TokenKind_assign_add) + return "+="; + else if (k == TokenKind_assign_and) + return "&="; + else if (k == TokenKind_assign_div) + return "/="; + else if (k == TokenKind_assign_lshift) + return "<<="; + else if (k == TokenKind_assign_mod) + return "%="; + else if (k == TokenKind_assign_mul) + return "*="; + else if (k == TokenKind_assign_or) + return "|="; + else if (k == TokenKind_assign_rshift) + return ">>="; + else if (k == TokenKind_assign_sub) + return "-="; + else if (k == TokenKind_assign_xor) + return "^="; + else if (k == TokenKind_brace_l) + return "{"; + else if (k == TokenKind_brace_r) + return "}"; + else if (k == TokenKind_bracket_l) + return "["; + else if (k == TokenKind_bracket_r) + return "]"; + else if (k == TokenKind_colon) + return ":"; + else if (k == TokenKind_comma) + return ","; + else if (k == TokenKind_dot) + return "."; + else if (k == TokenKind_ellipsis) + return "..."; + else if (k == TokenKind_eq) + return "=="; + else if (k == TokenKind_ge) + return ">="; + else if (k == TokenKind_gt) + return ">"; + else if (k == TokenKind_ident) + return "<identifier>"; + else if (k == TokenKind_le) + return "le"; + else if (k == TokenKind_literal_int) + return "<integer>"; + else if (k == TokenKind_literal_str) + return "<string>"; + else if (k == TokenKind_lshift) + return "<<"; + else if (k == TokenKind_lt) + return "lt"; + else if (k == TokenKind_minus) + return "-"; + else if (k == TokenKind_minusminus) + return "--"; + else if (k == TokenKind_ne) + return "!="; + else if (k == TokenKind_not) + return "!"; + else if (k == TokenKind_or) + return "|"; + else if (k == TokenKind_oror) + return "||"; + else if (k == TokenKind_paren_l) + return "("; + else if (k == TokenKind_paren_r) + return ")"; + else if (k == TokenKind_percent) + return "%"; + else if (k == TokenKind_plus) + return "+"; + else if (k == TokenKind_plusplus) + return "++"; + else if (k == TokenKind_question) + return "?"; + else if (k == TokenKind_rshift) + return ">>"; + else if (k == TokenKind_semicolon) + return ";"; + else if (k == TokenKind_slash) + return "/"; + else if (k == TokenKind_star) + return "*"; + else if (k == TokenKind_tilde) + return "~"; + else if (k == TokenKind_xor) + return "^"; + else + unreachable(); +} + +const char* token_stringify(Token* t) { + TokenKind k = t->kind; + if (k == TokenKind_literal_int) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%d (%s)", t->value.integer, kind_str); + return buf; + } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident || + k == TokenKind_literal_int || k == TokenKind_literal_str) { + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%s (%s)", t->value.string, kind_str); + return buf; + } else { + return token_kind_stringify(k); + } +} + +void tokens_init(TokenArray* tokens, size_t capacity) { + tokens->len = 0; + tokens->capacity = capacity; + tokens->data = calloc(tokens->capacity, sizeof(Token)); +} + +void tokens_reserve(TokenArray* tokens, size_t size) { + if (size <= tokens->capacity) + return; + while (tokens->capacity < size) { + tokens->capacity *= 2; + } + tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token)); + memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token)); +} + +Token* tokens_push_new(TokenArray* tokens) { + tokens_reserve(tokens, tokens->len + 1); + return &tokens->data[tokens->len++]; +} + +Token* tokens_pop(TokenArray* tokens) { + if (tokens->len != 0) + tokens->len--; +} diff --git a/src/token.h b/src/token.h new file mode 100644 index 0000000..6f5fb2a --- /dev/null +++ b/src/token.h @@ -0,0 +1,176 @@ +#ifndef DUCC_TOKEN_H +#define DUCC_TOKEN_H + +#include "io.h" + +enum TokenKind { + TokenKind_eof, + + // Only preprocessing phase. + TokenKind_hash, + TokenKind_hashhash, + TokenKind_whitespace, + TokenKind_newline, + TokenKind_other, + TokenKind_character_constant, + TokenKind_header_name, + TokenKind_pp_directive_define, + TokenKind_pp_directive_elif, + TokenKind_pp_directive_elifdef, + TokenKind_pp_directive_elifndef, + TokenKind_pp_directive_else, + TokenKind_pp_directive_embed, + TokenKind_pp_directive_endif, + TokenKind_pp_directive_error, + TokenKind_pp_directive_if, + TokenKind_pp_directive_ifdef, + TokenKind_pp_directive_ifndef, + TokenKind_pp_directive_include, + TokenKind_pp_directive_line, + TokenKind_pp_directive_pragma, + TokenKind_pp_directive_undef, + TokenKind_pp_directive_warning, + TokenKind_pp_operator_defined, + TokenKind_pp_operator___has_c_attribute, + TokenKind_pp_operator___has_embed, + TokenKind_pp_operator___has_include, + + // C23: 6.4.1 + TokenKind_keyword_alignas, + TokenKind_keyword_alignof, + TokenKind_keyword_auto, + TokenKind_keyword_bool, + TokenKind_keyword_break, + TokenKind_keyword_case, + TokenKind_keyword_char, + TokenKind_keyword_const, + TokenKind_keyword_constexpr, + TokenKind_keyword_continue, + TokenKind_keyword_default, + TokenKind_keyword_do, + TokenKind_keyword_double, + TokenKind_keyword_else, + TokenKind_keyword_enum, + TokenKind_keyword_extern, + TokenKind_keyword_false, + TokenKind_keyword_float, + TokenKind_keyword_for, + TokenKind_keyword_goto, + TokenKind_keyword_if, + TokenKind_keyword_inline, + TokenKind_keyword_int, + TokenKind_keyword_long, + TokenKind_keyword_nullptr, + TokenKind_keyword_register, + TokenKind_keyword_restrict, + TokenKind_keyword_return, + TokenKind_keyword_short, + TokenKind_keyword_signed, + TokenKind_keyword_sizeof, + TokenKind_keyword_static, + TokenKind_keyword_static_assert, + TokenKind_keyword_struct, + TokenKind_keyword_switch, + TokenKind_keyword_thread_local, + TokenKind_keyword_true, + TokenKind_keyword_typedef, + TokenKind_keyword_typeof, + TokenKind_keyword_typeof_unqual, + TokenKind_keyword_union, + TokenKind_keyword_unsigned, + TokenKind_keyword_void, + TokenKind_keyword_volatile, + TokenKind_keyword_while, + TokenKind_keyword__Atomic, + TokenKind_keyword__BitInt, + TokenKind_keyword__Complex, + TokenKind_keyword__Decimal128, + TokenKind_keyword__Decimal32, + TokenKind_keyword__Decimal64, + TokenKind_keyword__Generic, + TokenKind_keyword__Imaginary, + TokenKind_keyword__Noreturn, + + TokenKind_and, + TokenKind_andand, + TokenKind_arrow, + TokenKind_assign, + TokenKind_assign_add, + TokenKind_assign_and, + TokenKind_assign_div, + TokenKind_assign_lshift, + TokenKind_assign_mod, + TokenKind_assign_mul, + TokenKind_assign_or, + TokenKind_assign_rshift, + TokenKind_assign_sub, + TokenKind_assign_xor, + TokenKind_brace_l, + TokenKind_brace_r, + TokenKind_bracket_l, + TokenKind_bracket_r, + TokenKind_colon, + TokenKind_comma, + TokenKind_dot, + TokenKind_ellipsis, + TokenKind_eq, + TokenKind_ge, + TokenKind_gt, + TokenKind_ident, + TokenKind_le, + TokenKind_literal_int, + TokenKind_literal_str, + TokenKind_lshift, + TokenKind_lt, + TokenKind_minus, + TokenKind_minusminus, + TokenKind_ne, + TokenKind_not, + TokenKind_or, + TokenKind_oror, + TokenKind_paren_l, + TokenKind_paren_r, + TokenKind_percent, + TokenKind_plus, + TokenKind_plusplus, + TokenKind_question, + TokenKind_rshift, + TokenKind_semicolon, + TokenKind_slash, + TokenKind_star, + TokenKind_tilde, + TokenKind_xor, +}; +typedef enum TokenKind TokenKind; + +const char* token_kind_stringify(TokenKind k); + +// TokenValue is externally tagged by Token's kind. +union TokenValue { + const char* string; + int integer; +}; +typedef union TokenValue TokenValue; + +struct Token { + TokenKind kind; + TokenValue value; + SourceLocation loc; +}; +typedef struct Token Token; + +const char* token_stringify(Token* t); + +struct TokenArray { + size_t len; + size_t capacity; + Token* data; +}; +typedef struct TokenArray TokenArray; + +void tokens_init(TokenArray* tokens, size_t capacity); +void tokens_reserve(TokenArray* tokens, size_t size); +Token* tokens_push_new(TokenArray* tokens); +Token* tokens_pop(TokenArray* tokens); + +#endif |
