aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--src/preprocess.c311
-rw-r--r--src/preprocess.h171
-rw-r--r--src/token.c313
-rw-r--r--src/token.h176
5 files changed, 491 insertions, 481 deletions
diff --git a/Makefile b/Makefile
index 36b4825..19cbf45 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@ OBJECTS := \
$(BUILD_DIR)/parse.o \
$(BUILD_DIR)/preprocess.o \
$(BUILD_DIR)/sys.o \
+ $(BUILD_DIR)/token.o \
$(BUILD_DIR)/tokenize.o
.PHONY: all
diff --git a/src/preprocess.c b/src/preprocess.c
index 85f7320..c96c194 100644
--- a/src/preprocess.c
+++ b/src/preprocess.c
@@ -3,317 +3,6 @@
#include "parse.h"
#include "sys.h"
-const char* token_kind_stringify(TokenKind k) {
- if (k == TokenKind_eof)
- return "<eof>";
- else if (k == TokenKind_hash)
- return "#";
- else if (k == TokenKind_hashhash)
- return "##";
- else if (k == TokenKind_whitespace)
- return "<whitespace>";
- else if (k == TokenKind_newline)
- return "<new-line>";
- else if (k == TokenKind_other)
- return "<other>";
- else if (k == TokenKind_character_constant)
- return "<character-constant>";
- else if (k == TokenKind_header_name)
- return "<header-name>";
- else if (k == TokenKind_pp_directive_define)
- return "#define";
- else if (k == TokenKind_pp_directive_elif)
- return "#elif";
- else if (k == TokenKind_pp_directive_elifdef)
- return "#elifdef";
- else if (k == TokenKind_pp_directive_elifndef)
- return "#elifndef";
- else if (k == TokenKind_pp_directive_else)
- return "#else";
- else if (k == TokenKind_pp_directive_embed)
- return "#embed";
- else if (k == TokenKind_pp_directive_endif)
- return "#endif";
- else if (k == TokenKind_pp_directive_error)
- return "#error";
- else if (k == TokenKind_pp_directive_if)
- return "#if";
- else if (k == TokenKind_pp_directive_ifdef)
- return "#ifdef";
- else if (k == TokenKind_pp_directive_ifndef)
- return "#ifndef";
- else if (k == TokenKind_pp_directive_include)
- return "#include";
- else if (k == TokenKind_pp_directive_line)
- return "#line";
- else if (k == TokenKind_pp_directive_pragma)
- return "#pragma";
- else if (k == TokenKind_pp_directive_undef)
- return "#undef";
- else if (k == TokenKind_pp_directive_warning)
- return "#warning";
- else if (k == TokenKind_pp_operator_defined)
- return "defined";
- else if (k == TokenKind_pp_operator___has_c_attribute)
- return "__has_c_attribute";
- else if (k == TokenKind_pp_operator___has_embed)
- return "__has_embed";
- else if (k == TokenKind_pp_operator___has_include)
- return "__has_include";
- else if (k == TokenKind_keyword_alignas)
- return "alignas";
- else if (k == TokenKind_keyword_alignof)
- return "alignof";
- else if (k == TokenKind_keyword_auto)
- return "auto";
- else if (k == TokenKind_keyword_bool)
- return "bool";
- else if (k == TokenKind_keyword_break)
- return "break";
- else if (k == TokenKind_keyword_case)
- return "case";
- else if (k == TokenKind_keyword_char)
- return "char";
- else if (k == TokenKind_keyword_const)
- return "const";
- else if (k == TokenKind_keyword_constexpr)
- return "constexpr";
- else if (k == TokenKind_keyword_continue)
- return "continue";
- else if (k == TokenKind_keyword_default)
- return "default";
- else if (k == TokenKind_keyword_do)
- return "do";
- else if (k == TokenKind_keyword_double)
- return "double";
- else if (k == TokenKind_keyword_else)
- return "else";
- else if (k == TokenKind_keyword_enum)
- return "enum";
- else if (k == TokenKind_keyword_extern)
- return "extern";
- else if (k == TokenKind_keyword_false)
- return "false";
- else if (k == TokenKind_keyword_float)
- return "float";
- else if (k == TokenKind_keyword_for)
- return "for";
- else if (k == TokenKind_keyword_goto)
- return "goto";
- else if (k == TokenKind_keyword_if)
- return "if";
- else if (k == TokenKind_keyword_inline)
- return "inline";
- else if (k == TokenKind_keyword_int)
- return "int";
- else if (k == TokenKind_keyword_long)
- return "long";
- else if (k == TokenKind_keyword_nullptr)
- return "nullptr";
- else if (k == TokenKind_keyword_register)
- return "register";
- else if (k == TokenKind_keyword_restrict)
- return "restrict";
- else if (k == TokenKind_keyword_return)
- return "return";
- else if (k == TokenKind_keyword_short)
- return "short";
- else if (k == TokenKind_keyword_signed)
- return "signed";
- else if (k == TokenKind_keyword_sizeof)
- return "sizeof";
- else if (k == TokenKind_keyword_static)
- return "static";
- else if (k == TokenKind_keyword_static_assert)
- return "static_assert";
- else if (k == TokenKind_keyword_struct)
- return "struct";
- else if (k == TokenKind_keyword_switch)
- return "switch";
- else if (k == TokenKind_keyword_thread_local)
- return "thread_local";
- else if (k == TokenKind_keyword_true)
- return "true";
- else if (k == TokenKind_keyword_typedef)
- return "typedef";
- else if (k == TokenKind_keyword_typeof)
- return "typeof";
- else if (k == TokenKind_keyword_typeof_unqual)
- return "typeof_unqual";
- else if (k == TokenKind_keyword_union)
- return "union";
- else if (k == TokenKind_keyword_unsigned)
- return "unsigned";
- else if (k == TokenKind_keyword_void)
- return "void";
- else if (k == TokenKind_keyword_volatile)
- return "volatile";
- else if (k == TokenKind_keyword_while)
- return "while";
- else if (k == TokenKind_keyword__Atomic)
- return "_Atomic";
- else if (k == TokenKind_keyword__BitInt)
- return "_BitInt";
- else if (k == TokenKind_keyword__Complex)
- return "_Complex";
- else if (k == TokenKind_keyword__Decimal128)
- return "_Decimal128";
- else if (k == TokenKind_keyword__Decimal32)
- return "_Decimal32";
- else if (k == TokenKind_keyword__Decimal64)
- return "_Decimal64";
- else if (k == TokenKind_keyword__Generic)
- return "_Generic";
- else if (k == TokenKind_keyword__Imaginary)
- return "_Imaginary";
- else if (k == TokenKind_keyword__Noreturn)
- return "_Noreturn";
- else if (k == TokenKind_and)
- return "&";
- else if (k == TokenKind_andand)
- return "&&";
- else if (k == TokenKind_arrow)
- return "->";
- else if (k == TokenKind_assign)
- return "=";
- else if (k == TokenKind_assign_add)
- return "+=";
- else if (k == TokenKind_assign_and)
- return "&=";
- else if (k == TokenKind_assign_div)
- return "/=";
- else if (k == TokenKind_assign_lshift)
- return "<<=";
- else if (k == TokenKind_assign_mod)
- return "%=";
- else if (k == TokenKind_assign_mul)
- return "*=";
- else if (k == TokenKind_assign_or)
- return "|=";
- else if (k == TokenKind_assign_rshift)
- return ">>=";
- else if (k == TokenKind_assign_sub)
- return "-=";
- else if (k == TokenKind_assign_xor)
- return "^=";
- else if (k == TokenKind_brace_l)
- return "{";
- else if (k == TokenKind_brace_r)
- return "}";
- else if (k == TokenKind_bracket_l)
- return "[";
- else if (k == TokenKind_bracket_r)
- return "]";
- else if (k == TokenKind_colon)
- return ":";
- else if (k == TokenKind_comma)
- return ",";
- else if (k == TokenKind_dot)
- return ".";
- else if (k == TokenKind_ellipsis)
- return "...";
- else if (k == TokenKind_eq)
- return "==";
- else if (k == TokenKind_ge)
- return ">=";
- else if (k == TokenKind_gt)
- return ">";
- else if (k == TokenKind_ident)
- return "<identifier>";
- else if (k == TokenKind_le)
- return "le";
- else if (k == TokenKind_literal_int)
- return "<integer>";
- else if (k == TokenKind_literal_str)
- return "<string>";
- else if (k == TokenKind_lshift)
- return "<<";
- else if (k == TokenKind_lt)
- return "lt";
- else if (k == TokenKind_minus)
- return "-";
- else if (k == TokenKind_minusminus)
- return "--";
- else if (k == TokenKind_ne)
- return "!=";
- else if (k == TokenKind_not)
- return "!";
- else if (k == TokenKind_or)
- return "|";
- else if (k == TokenKind_oror)
- return "||";
- else if (k == TokenKind_paren_l)
- return "(";
- else if (k == TokenKind_paren_r)
- return ")";
- else if (k == TokenKind_percent)
- return "%";
- else if (k == TokenKind_plus)
- return "+";
- else if (k == TokenKind_plusplus)
- return "++";
- else if (k == TokenKind_question)
- return "?";
- else if (k == TokenKind_rshift)
- return ">>";
- else if (k == TokenKind_semicolon)
- return ";";
- else if (k == TokenKind_slash)
- return "/";
- else if (k == TokenKind_star)
- return "*";
- else if (k == TokenKind_tilde)
- return "~";
- else if (k == TokenKind_xor)
- return "^";
- else
- unreachable();
-}
-
-const char* token_stringify(Token* t) {
- TokenKind k = t->kind;
- if (k == TokenKind_literal_int) {
- const char* kind_str = token_kind_stringify(k);
- char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char));
- sprintf(buf, "%d (%s)", t->value.integer, kind_str);
- return buf;
- } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident ||
- k == TokenKind_literal_int || k == TokenKind_literal_str) {
- const char* kind_str = token_kind_stringify(k);
- char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char));
- sprintf(buf, "%s (%s)", t->value.string, kind_str);
- return buf;
- } else {
- return token_kind_stringify(k);
- }
-}
-
-void tokens_init(TokenArray* tokens, size_t capacity) {
- tokens->len = 0;
- tokens->capacity = capacity;
- tokens->data = calloc(tokens->capacity, sizeof(Token));
-}
-
-void tokens_reserve(TokenArray* tokens, size_t size) {
- if (size <= tokens->capacity)
- return;
- while (tokens->capacity < size) {
- tokens->capacity *= 2;
- }
- tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token));
- memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token));
-}
-
-Token* tokens_push_new(TokenArray* tokens) {
- tokens_reserve(tokens, tokens->len + 1);
- return &tokens->data[tokens->len++];
-}
-
-Token* tokens_pop(TokenArray* tokens) {
- if (tokens->len != 0)
- tokens->len--;
-}
-
enum MacroKind {
MacroKind_undef,
MacroKind_obj,
diff --git a/src/preprocess.h b/src/preprocess.h
index 8c5ade2..b43ec4c 100644
--- a/src/preprocess.h
+++ b/src/preprocess.h
@@ -2,176 +2,7 @@
#define DUCC_PREPROCESS_H
#include "io.h"
-
-enum TokenKind {
- TokenKind_eof,
-
- // Only preprocessing phase.
- TokenKind_hash,
- TokenKind_hashhash,
- TokenKind_whitespace,
- TokenKind_newline,
- TokenKind_other,
- TokenKind_character_constant,
- TokenKind_header_name,
- TokenKind_pp_directive_define,
- TokenKind_pp_directive_elif,
- TokenKind_pp_directive_elifdef,
- TokenKind_pp_directive_elifndef,
- TokenKind_pp_directive_else,
- TokenKind_pp_directive_embed,
- TokenKind_pp_directive_endif,
- TokenKind_pp_directive_error,
- TokenKind_pp_directive_if,
- TokenKind_pp_directive_ifdef,
- TokenKind_pp_directive_ifndef,
- TokenKind_pp_directive_include,
- TokenKind_pp_directive_line,
- TokenKind_pp_directive_pragma,
- TokenKind_pp_directive_undef,
- TokenKind_pp_directive_warning,
- TokenKind_pp_operator_defined,
- TokenKind_pp_operator___has_c_attribute,
- TokenKind_pp_operator___has_embed,
- TokenKind_pp_operator___has_include,
-
- // C23: 6.4.1
- TokenKind_keyword_alignas,
- TokenKind_keyword_alignof,
- TokenKind_keyword_auto,
- TokenKind_keyword_bool,
- TokenKind_keyword_break,
- TokenKind_keyword_case,
- TokenKind_keyword_char,
- TokenKind_keyword_const,
- TokenKind_keyword_constexpr,
- TokenKind_keyword_continue,
- TokenKind_keyword_default,
- TokenKind_keyword_do,
- TokenKind_keyword_double,
- TokenKind_keyword_else,
- TokenKind_keyword_enum,
- TokenKind_keyword_extern,
- TokenKind_keyword_false,
- TokenKind_keyword_float,
- TokenKind_keyword_for,
- TokenKind_keyword_goto,
- TokenKind_keyword_if,
- TokenKind_keyword_inline,
- TokenKind_keyword_int,
- TokenKind_keyword_long,
- TokenKind_keyword_nullptr,
- TokenKind_keyword_register,
- TokenKind_keyword_restrict,
- TokenKind_keyword_return,
- TokenKind_keyword_short,
- TokenKind_keyword_signed,
- TokenKind_keyword_sizeof,
- TokenKind_keyword_static,
- TokenKind_keyword_static_assert,
- TokenKind_keyword_struct,
- TokenKind_keyword_switch,
- TokenKind_keyword_thread_local,
- TokenKind_keyword_true,
- TokenKind_keyword_typedef,
- TokenKind_keyword_typeof,
- TokenKind_keyword_typeof_unqual,
- TokenKind_keyword_union,
- TokenKind_keyword_unsigned,
- TokenKind_keyword_void,
- TokenKind_keyword_volatile,
- TokenKind_keyword_while,
- TokenKind_keyword__Atomic,
- TokenKind_keyword__BitInt,
- TokenKind_keyword__Complex,
- TokenKind_keyword__Decimal128,
- TokenKind_keyword__Decimal32,
- TokenKind_keyword__Decimal64,
- TokenKind_keyword__Generic,
- TokenKind_keyword__Imaginary,
- TokenKind_keyword__Noreturn,
-
- TokenKind_and,
- TokenKind_andand,
- TokenKind_arrow,
- TokenKind_assign,
- TokenKind_assign_add,
- TokenKind_assign_and,
- TokenKind_assign_div,
- TokenKind_assign_lshift,
- TokenKind_assign_mod,
- TokenKind_assign_mul,
- TokenKind_assign_or,
- TokenKind_assign_rshift,
- TokenKind_assign_sub,
- TokenKind_assign_xor,
- TokenKind_brace_l,
- TokenKind_brace_r,
- TokenKind_bracket_l,
- TokenKind_bracket_r,
- TokenKind_colon,
- TokenKind_comma,
- TokenKind_dot,
- TokenKind_ellipsis,
- TokenKind_eq,
- TokenKind_ge,
- TokenKind_gt,
- TokenKind_ident,
- TokenKind_le,
- TokenKind_literal_int,
- TokenKind_literal_str,
- TokenKind_lshift,
- TokenKind_lt,
- TokenKind_minus,
- TokenKind_minusminus,
- TokenKind_ne,
- TokenKind_not,
- TokenKind_or,
- TokenKind_oror,
- TokenKind_paren_l,
- TokenKind_paren_r,
- TokenKind_percent,
- TokenKind_plus,
- TokenKind_plusplus,
- TokenKind_question,
- TokenKind_rshift,
- TokenKind_semicolon,
- TokenKind_slash,
- TokenKind_star,
- TokenKind_tilde,
- TokenKind_xor,
-};
-typedef enum TokenKind TokenKind;
-
-const char* token_kind_stringify(TokenKind k);
-
-// TokenValue is externally tagged by Token's kind.
-union TokenValue {
- const char* string;
- int integer;
-};
-typedef union TokenValue TokenValue;
-
-struct Token {
- TokenKind kind;
- TokenValue value;
- SourceLocation loc;
-};
-typedef struct Token Token;
-
-const char* token_stringify(Token* t);
-
-struct TokenArray {
- size_t len;
- size_t capacity;
- Token* data;
-};
-typedef struct TokenArray TokenArray;
-
-void tokens_init(TokenArray* tokens, size_t capacity);
-void tokens_reserve(TokenArray* tokens, size_t size);
-Token* tokens_push_new(TokenArray* tokens);
-Token* tokens_pop(TokenArray* tokens);
+#include "token.h"
TokenArray* preprocess(InFile* src);
diff --git a/src/token.c b/src/token.c
new file mode 100644
index 0000000..3ca34d8
--- /dev/null
+++ b/src/token.c
@@ -0,0 +1,313 @@
+#include "token.h"
+#include "common.h"
+
+const char* token_kind_stringify(TokenKind k) {
+ if (k == TokenKind_eof)
+ return "<eof>";
+ else if (k == TokenKind_hash)
+ return "#";
+ else if (k == TokenKind_hashhash)
+ return "##";
+ else if (k == TokenKind_whitespace)
+ return "<whitespace>";
+ else if (k == TokenKind_newline)
+ return "<new-line>";
+ else if (k == TokenKind_other)
+ return "<other>";
+ else if (k == TokenKind_character_constant)
+ return "<character-constant>";
+ else if (k == TokenKind_header_name)
+ return "<header-name>";
+ else if (k == TokenKind_pp_directive_define)
+ return "#define";
+ else if (k == TokenKind_pp_directive_elif)
+ return "#elif";
+ else if (k == TokenKind_pp_directive_elifdef)
+ return "#elifdef";
+ else if (k == TokenKind_pp_directive_elifndef)
+ return "#elifndef";
+ else if (k == TokenKind_pp_directive_else)
+ return "#else";
+ else if (k == TokenKind_pp_directive_embed)
+ return "#embed";
+ else if (k == TokenKind_pp_directive_endif)
+ return "#endif";
+ else if (k == TokenKind_pp_directive_error)
+ return "#error";
+ else if (k == TokenKind_pp_directive_if)
+ return "#if";
+ else if (k == TokenKind_pp_directive_ifdef)
+ return "#ifdef";
+ else if (k == TokenKind_pp_directive_ifndef)
+ return "#ifndef";
+ else if (k == TokenKind_pp_directive_include)
+ return "#include";
+ else if (k == TokenKind_pp_directive_line)
+ return "#line";
+ else if (k == TokenKind_pp_directive_pragma)
+ return "#pragma";
+ else if (k == TokenKind_pp_directive_undef)
+ return "#undef";
+ else if (k == TokenKind_pp_directive_warning)
+ return "#warning";
+ else if (k == TokenKind_pp_operator_defined)
+ return "defined";
+ else if (k == TokenKind_pp_operator___has_c_attribute)
+ return "__has_c_attribute";
+ else if (k == TokenKind_pp_operator___has_embed)
+ return "__has_embed";
+ else if (k == TokenKind_pp_operator___has_include)
+ return "__has_include";
+ else if (k == TokenKind_keyword_alignas)
+ return "alignas";
+ else if (k == TokenKind_keyword_alignof)
+ return "alignof";
+ else if (k == TokenKind_keyword_auto)
+ return "auto";
+ else if (k == TokenKind_keyword_bool)
+ return "bool";
+ else if (k == TokenKind_keyword_break)
+ return "break";
+ else if (k == TokenKind_keyword_case)
+ return "case";
+ else if (k == TokenKind_keyword_char)
+ return "char";
+ else if (k == TokenKind_keyword_const)
+ return "const";
+ else if (k == TokenKind_keyword_constexpr)
+ return "constexpr";
+ else if (k == TokenKind_keyword_continue)
+ return "continue";
+ else if (k == TokenKind_keyword_default)
+ return "default";
+ else if (k == TokenKind_keyword_do)
+ return "do";
+ else if (k == TokenKind_keyword_double)
+ return "double";
+ else if (k == TokenKind_keyword_else)
+ return "else";
+ else if (k == TokenKind_keyword_enum)
+ return "enum";
+ else if (k == TokenKind_keyword_extern)
+ return "extern";
+ else if (k == TokenKind_keyword_false)
+ return "false";
+ else if (k == TokenKind_keyword_float)
+ return "float";
+ else if (k == TokenKind_keyword_for)
+ return "for";
+ else if (k == TokenKind_keyword_goto)
+ return "goto";
+ else if (k == TokenKind_keyword_if)
+ return "if";
+ else if (k == TokenKind_keyword_inline)
+ return "inline";
+ else if (k == TokenKind_keyword_int)
+ return "int";
+ else if (k == TokenKind_keyword_long)
+ return "long";
+ else if (k == TokenKind_keyword_nullptr)
+ return "nullptr";
+ else if (k == TokenKind_keyword_register)
+ return "register";
+ else if (k == TokenKind_keyword_restrict)
+ return "restrict";
+ else if (k == TokenKind_keyword_return)
+ return "return";
+ else if (k == TokenKind_keyword_short)
+ return "short";
+ else if (k == TokenKind_keyword_signed)
+ return "signed";
+ else if (k == TokenKind_keyword_sizeof)
+ return "sizeof";
+ else if (k == TokenKind_keyword_static)
+ return "static";
+ else if (k == TokenKind_keyword_static_assert)
+ return "static_assert";
+ else if (k == TokenKind_keyword_struct)
+ return "struct";
+ else if (k == TokenKind_keyword_switch)
+ return "switch";
+ else if (k == TokenKind_keyword_thread_local)
+ return "thread_local";
+ else if (k == TokenKind_keyword_true)
+ return "true";
+ else if (k == TokenKind_keyword_typedef)
+ return "typedef";
+ else if (k == TokenKind_keyword_typeof)
+ return "typeof";
+ else if (k == TokenKind_keyword_typeof_unqual)
+ return "typeof_unqual";
+ else if (k == TokenKind_keyword_union)
+ return "union";
+ else if (k == TokenKind_keyword_unsigned)
+ return "unsigned";
+ else if (k == TokenKind_keyword_void)
+ return "void";
+ else if (k == TokenKind_keyword_volatile)
+ return "volatile";
+ else if (k == TokenKind_keyword_while)
+ return "while";
+ else if (k == TokenKind_keyword__Atomic)
+ return "_Atomic";
+ else if (k == TokenKind_keyword__BitInt)
+ return "_BitInt";
+ else if (k == TokenKind_keyword__Complex)
+ return "_Complex";
+ else if (k == TokenKind_keyword__Decimal128)
+ return "_Decimal128";
+ else if (k == TokenKind_keyword__Decimal32)
+ return "_Decimal32";
+ else if (k == TokenKind_keyword__Decimal64)
+ return "_Decimal64";
+ else if (k == TokenKind_keyword__Generic)
+ return "_Generic";
+ else if (k == TokenKind_keyword__Imaginary)
+ return "_Imaginary";
+ else if (k == TokenKind_keyword__Noreturn)
+ return "_Noreturn";
+ else if (k == TokenKind_and)
+ return "&";
+ else if (k == TokenKind_andand)
+ return "&&";
+ else if (k == TokenKind_arrow)
+ return "->";
+ else if (k == TokenKind_assign)
+ return "=";
+ else if (k == TokenKind_assign_add)
+ return "+=";
+ else if (k == TokenKind_assign_and)
+ return "&=";
+ else if (k == TokenKind_assign_div)
+ return "/=";
+ else if (k == TokenKind_assign_lshift)
+ return "<<=";
+ else if (k == TokenKind_assign_mod)
+ return "%=";
+ else if (k == TokenKind_assign_mul)
+ return "*=";
+ else if (k == TokenKind_assign_or)
+ return "|=";
+ else if (k == TokenKind_assign_rshift)
+ return ">>=";
+ else if (k == TokenKind_assign_sub)
+ return "-=";
+ else if (k == TokenKind_assign_xor)
+ return "^=";
+ else if (k == TokenKind_brace_l)
+ return "{";
+ else if (k == TokenKind_brace_r)
+ return "}";
+ else if (k == TokenKind_bracket_l)
+ return "[";
+ else if (k == TokenKind_bracket_r)
+ return "]";
+ else if (k == TokenKind_colon)
+ return ":";
+ else if (k == TokenKind_comma)
+ return ",";
+ else if (k == TokenKind_dot)
+ return ".";
+ else if (k == TokenKind_ellipsis)
+ return "...";
+ else if (k == TokenKind_eq)
+ return "==";
+ else if (k == TokenKind_ge)
+ return ">=";
+ else if (k == TokenKind_gt)
+ return ">";
+ else if (k == TokenKind_ident)
+ return "<identifier>";
+ else if (k == TokenKind_le)
+ return "le";
+ else if (k == TokenKind_literal_int)
+ return "<integer>";
+ else if (k == TokenKind_literal_str)
+ return "<string>";
+ else if (k == TokenKind_lshift)
+ return "<<";
+ else if (k == TokenKind_lt)
+ return "lt";
+ else if (k == TokenKind_minus)
+ return "-";
+ else if (k == TokenKind_minusminus)
+ return "--";
+ else if (k == TokenKind_ne)
+ return "!=";
+ else if (k == TokenKind_not)
+ return "!";
+ else if (k == TokenKind_or)
+ return "|";
+ else if (k == TokenKind_oror)
+ return "||";
+ else if (k == TokenKind_paren_l)
+ return "(";
+ else if (k == TokenKind_paren_r)
+ return ")";
+ else if (k == TokenKind_percent)
+ return "%";
+ else if (k == TokenKind_plus)
+ return "+";
+ else if (k == TokenKind_plusplus)
+ return "++";
+ else if (k == TokenKind_question)
+ return "?";
+ else if (k == TokenKind_rshift)
+ return ">>";
+ else if (k == TokenKind_semicolon)
+ return ";";
+ else if (k == TokenKind_slash)
+ return "/";
+ else if (k == TokenKind_star)
+ return "*";
+ else if (k == TokenKind_tilde)
+ return "~";
+ else if (k == TokenKind_xor)
+ return "^";
+ else
+ unreachable();
+}
+
+const char* token_stringify(Token* t) {
+ TokenKind k = t->kind;
+ if (k == TokenKind_literal_int) {
+ const char* kind_str = token_kind_stringify(k);
+ char* buf = calloc(10 + strlen(kind_str) + 3 + 1, sizeof(char));
+ sprintf(buf, "%d (%s)", t->value.integer, kind_str);
+ return buf;
+ } else if (k == TokenKind_other || k == TokenKind_character_constant || k == TokenKind_ident ||
+ k == TokenKind_literal_int || k == TokenKind_literal_str) {
+ const char* kind_str = token_kind_stringify(k);
+ char* buf = calloc(strlen(t->value.string) + strlen(kind_str) + 3 + 1, sizeof(char));
+ sprintf(buf, "%s (%s)", t->value.string, kind_str);
+ return buf;
+ } else {
+ return token_kind_stringify(k);
+ }
+}
+
+void tokens_init(TokenArray* tokens, size_t capacity) {
+ tokens->len = 0;
+ tokens->capacity = capacity;
+ tokens->data = calloc(tokens->capacity, sizeof(Token));
+}
+
+void tokens_reserve(TokenArray* tokens, size_t size) {
+ if (size <= tokens->capacity)
+ return;
+ while (tokens->capacity < size) {
+ tokens->capacity *= 2;
+ }
+ tokens->data = realloc(tokens->data, tokens->capacity * sizeof(Token));
+ memset(tokens->data + tokens->len, 0, (tokens->capacity - tokens->len) * sizeof(Token));
+}
+
+Token* tokens_push_new(TokenArray* tokens) {
+ tokens_reserve(tokens, tokens->len + 1);
+ return &tokens->data[tokens->len++];
+}
+
+Token* tokens_pop(TokenArray* tokens) {
+ if (tokens->len != 0)
+ tokens->len--;
+}
diff --git a/src/token.h b/src/token.h
new file mode 100644
index 0000000..6f5fb2a
--- /dev/null
+++ b/src/token.h
@@ -0,0 +1,176 @@
+#ifndef DUCC_TOKEN_H
+#define DUCC_TOKEN_H
+
+#include "io.h"
+
+enum TokenKind {
+ TokenKind_eof,
+
+ // Only preprocessing phase.
+ TokenKind_hash,
+ TokenKind_hashhash,
+ TokenKind_whitespace,
+ TokenKind_newline,
+ TokenKind_other,
+ TokenKind_character_constant,
+ TokenKind_header_name,
+ TokenKind_pp_directive_define,
+ TokenKind_pp_directive_elif,
+ TokenKind_pp_directive_elifdef,
+ TokenKind_pp_directive_elifndef,
+ TokenKind_pp_directive_else,
+ TokenKind_pp_directive_embed,
+ TokenKind_pp_directive_endif,
+ TokenKind_pp_directive_error,
+ TokenKind_pp_directive_if,
+ TokenKind_pp_directive_ifdef,
+ TokenKind_pp_directive_ifndef,
+ TokenKind_pp_directive_include,
+ TokenKind_pp_directive_line,
+ TokenKind_pp_directive_pragma,
+ TokenKind_pp_directive_undef,
+ TokenKind_pp_directive_warning,
+ TokenKind_pp_operator_defined,
+ TokenKind_pp_operator___has_c_attribute,
+ TokenKind_pp_operator___has_embed,
+ TokenKind_pp_operator___has_include,
+
+ // C23: 6.4.1
+ TokenKind_keyword_alignas,
+ TokenKind_keyword_alignof,
+ TokenKind_keyword_auto,
+ TokenKind_keyword_bool,
+ TokenKind_keyword_break,
+ TokenKind_keyword_case,
+ TokenKind_keyword_char,
+ TokenKind_keyword_const,
+ TokenKind_keyword_constexpr,
+ TokenKind_keyword_continue,
+ TokenKind_keyword_default,
+ TokenKind_keyword_do,
+ TokenKind_keyword_double,
+ TokenKind_keyword_else,
+ TokenKind_keyword_enum,
+ TokenKind_keyword_extern,
+ TokenKind_keyword_false,
+ TokenKind_keyword_float,
+ TokenKind_keyword_for,
+ TokenKind_keyword_goto,
+ TokenKind_keyword_if,
+ TokenKind_keyword_inline,
+ TokenKind_keyword_int,
+ TokenKind_keyword_long,
+ TokenKind_keyword_nullptr,
+ TokenKind_keyword_register,
+ TokenKind_keyword_restrict,
+ TokenKind_keyword_return,
+ TokenKind_keyword_short,
+ TokenKind_keyword_signed,
+ TokenKind_keyword_sizeof,
+ TokenKind_keyword_static,
+ TokenKind_keyword_static_assert,
+ TokenKind_keyword_struct,
+ TokenKind_keyword_switch,
+ TokenKind_keyword_thread_local,
+ TokenKind_keyword_true,
+ TokenKind_keyword_typedef,
+ TokenKind_keyword_typeof,
+ TokenKind_keyword_typeof_unqual,
+ TokenKind_keyword_union,
+ TokenKind_keyword_unsigned,
+ TokenKind_keyword_void,
+ TokenKind_keyword_volatile,
+ TokenKind_keyword_while,
+ TokenKind_keyword__Atomic,
+ TokenKind_keyword__BitInt,
+ TokenKind_keyword__Complex,
+ TokenKind_keyword__Decimal128,
+ TokenKind_keyword__Decimal32,
+ TokenKind_keyword__Decimal64,
+ TokenKind_keyword__Generic,
+ TokenKind_keyword__Imaginary,
+ TokenKind_keyword__Noreturn,
+
+ TokenKind_and,
+ TokenKind_andand,
+ TokenKind_arrow,
+ TokenKind_assign,
+ TokenKind_assign_add,
+ TokenKind_assign_and,
+ TokenKind_assign_div,
+ TokenKind_assign_lshift,
+ TokenKind_assign_mod,
+ TokenKind_assign_mul,
+ TokenKind_assign_or,
+ TokenKind_assign_rshift,
+ TokenKind_assign_sub,
+ TokenKind_assign_xor,
+ TokenKind_brace_l,
+ TokenKind_brace_r,
+ TokenKind_bracket_l,
+ TokenKind_bracket_r,
+ TokenKind_colon,
+ TokenKind_comma,
+ TokenKind_dot,
+ TokenKind_ellipsis,
+ TokenKind_eq,
+ TokenKind_ge,
+ TokenKind_gt,
+ TokenKind_ident,
+ TokenKind_le,
+ TokenKind_literal_int,
+ TokenKind_literal_str,
+ TokenKind_lshift,
+ TokenKind_lt,
+ TokenKind_minus,
+ TokenKind_minusminus,
+ TokenKind_ne,
+ TokenKind_not,
+ TokenKind_or,
+ TokenKind_oror,
+ TokenKind_paren_l,
+ TokenKind_paren_r,
+ TokenKind_percent,
+ TokenKind_plus,
+ TokenKind_plusplus,
+ TokenKind_question,
+ TokenKind_rshift,
+ TokenKind_semicolon,
+ TokenKind_slash,
+ TokenKind_star,
+ TokenKind_tilde,
+ TokenKind_xor,
+};
+typedef enum TokenKind TokenKind;
+
+const char* token_kind_stringify(TokenKind k);
+
+// TokenValue is externally tagged by Token's kind.
+union TokenValue {
+ const char* string;
+ int integer;
+};
+typedef union TokenValue TokenValue;
+
+struct Token {
+ TokenKind kind;
+ TokenValue value;
+ SourceLocation loc;
+};
+typedef struct Token Token;
+
+const char* token_stringify(Token* t);
+
+struct TokenArray {
+ size_t len;
+ size_t capacity;
+ Token* data;
+};
+typedef struct TokenArray TokenArray;
+
+void tokens_init(TokenArray* tokens, size_t capacity);
+void tokens_reserve(TokenArray* tokens, size_t size);
+Token* tokens_push_new(TokenArray* tokens);
+Token* tokens_pop(TokenArray* tokens);
+
+#endif