aboutsummaryrefslogtreecommitdiffhomepage
path: root/preprocess.c
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-08-16 00:47:59 +0900
committernsfisis <nsfisis@gmail.com>2025-08-16 02:01:20 +0900
commitc1f7732c1902745180e77d0abcf73714cb2e2ead (patch)
tree7f93f3556fabff3d6d3dc1998bac0b900a99d76c /preprocess.c
parente2064554b6d653439fbbb2bcde00e6f1a1079cb4 (diff)
downloadducc-c1f7732c1902745180e77d0abcf73714cb2e2ead.tar.gz
ducc-c1f7732c1902745180e77d0abcf73714cb2e2ead.tar.zst
ducc-c1f7732c1902745180e77d0abcf73714cb2e2ead.zip
refactor: extract tokenization from Preprocessor
Diffstat (limited to 'preprocess.c')
-rw-r--r--preprocess.c692
1 files changed, 355 insertions, 337 deletions
diff --git a/preprocess.c b/preprocess.c
index d82bbc4..468ff3e 100644
--- a/preprocess.c
+++ b/preprocess.c
@@ -444,76 +444,34 @@ void add_predefined_macros(MacroArray* macros) {
m->name.data = "__LINE__";
}
-struct Preprocessor {
+struct PpLexer {
const char* filename;
int line;
char* src;
int pos;
TokenArray* pp_tokens;
- MacroArray* macros;
- int include_depth;
- BOOL skip_pp_tokens;
- String* include_paths;
- int n_include_paths;
};
-typedef struct Preprocessor Preprocessor;
-
-TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros);
-
-Preprocessor* preprocessor_new(InFile* src, int include_depth, MacroArray* macros) {
- if (include_depth >= 32) {
- fatal_error("include depth limit exceeded");
- }
-
- Preprocessor* pp = calloc(1, sizeof(Preprocessor));
- pp->filename = src->filename;
- pp->line = 1;
- pp->src = src->buf;
- pp->pp_tokens = calloc(1, sizeof(TokenArray));
- tokens_init(pp->pp_tokens, 1024 * 16);
- pp->macros = macros;
- pp->include_depth = include_depth;
- pp->include_paths = calloc(16, sizeof(String));
+typedef struct PpLexer PpLexer;
- return pp;
-}
+PpLexer* pplexer_new(InFile* src) {
+ PpLexer* ppl = calloc(1, sizeof(PpLexer));
-Token* pp_token_at(Preprocessor* pp, int i) {
- return &pp->pp_tokens->data[i];
-}
+ ppl->filename = src->filename;
+ ppl->line = 1;
+ ppl->src = src->buf;
+ ppl->pp_tokens = calloc(1, sizeof(TokenArray));
+ tokens_init(ppl->pp_tokens, 1024 * 16);
-int find_macro(Preprocessor* pp, String* name) {
- for (int i = 0; i < pp->macros->len; ++i) {
- if (string_equals(&pp->macros->data[i].name, name)) {
- return i;
- }
- }
- return -1;
+ return ppl;
}
-void undef_macro(Preprocessor* pp, int idx) {
- pp->macros->data[idx].name.len = 0;
- // TODO: Can predefined macro like __FILE__ be undefined?
-}
-
-void add_include_path(Preprocessor* pp, char* include_path) {
- pp->include_paths[pp->n_include_paths].data = include_path;
- pp->include_paths[pp->n_include_paths].len = strlen(include_path);
- ++pp->n_include_paths;
-}
-
-BOOL skip_pp_tokens(Preprocessor* pp) {
- // TODO: support nested #if
- return pp->skip_pp_tokens;
-}
-
-void pp_tokenize_all(Preprocessor* pp) {
- while (pp->src[pp->pos]) {
- Token* tok = tokens_push_new(pp->pp_tokens);
- tok->loc.filename = pp->filename;
- tok->loc.line = pp->line;
- char c = pp->src[pp->pos];
- ++pp->pos;
+void pplexer_tokenize_all(PpLexer* ppl) {
+ while (ppl->src[ppl->pos]) {
+ Token* tok = tokens_push_new(ppl->pp_tokens);
+ tok->loc.filename = ppl->filename;
+ tok->loc.line = ppl->line;
+ char c = ppl->src[ppl->pos];
+ ++ppl->pos;
if (c == '(') {
tok->kind = TokenKind_paren_l;
} else if (c == ')') {
@@ -533,8 +491,8 @@ void pp_tokenize_all(Preprocessor* pp) {
} else if (c == ';') {
tok->kind = TokenKind_semicolon;
} else if (c == '^') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_xor;
} else {
tok->kind = TokenKind_xor;
@@ -544,132 +502,132 @@ void pp_tokenize_all(Preprocessor* pp) {
} else if (c == '~') {
tok->kind = TokenKind_tilde;
} else if (c == '+') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_add;
- } else if (pp->src[pp->pos] == '+') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '+') {
+ ++ppl->pos;
tok->kind = TokenKind_plusplus;
} else {
tok->kind = TokenKind_plus;
}
} else if (c == '|') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_or;
- } else if (pp->src[pp->pos] == '|') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '|') {
+ ++ppl->pos;
tok->kind = TokenKind_oror;
} else {
tok->kind = TokenKind_or;
}
} else if (c == '&') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_and;
- } else if (pp->src[pp->pos] == '&') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '&') {
+ ++ppl->pos;
tok->kind = TokenKind_andand;
} else {
tok->kind = TokenKind_and;
}
} else if (c == '-') {
- if (pp->src[pp->pos] == '>') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '>') {
+ ++ppl->pos;
tok->kind = TokenKind_arrow;
- } else if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_sub;
- } else if (pp->src[pp->pos] == '-') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '-') {
+ ++ppl->pos;
tok->kind = TokenKind_minusminus;
} else {
tok->kind = TokenKind_minus;
}
} else if (c == '*') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_mul;
} else {
tok->kind = TokenKind_star;
}
} else if (c == '/') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_div;
- } else if (pp->src[pp->pos] == '/') {
- int start = pp->pos - 1;
- ++pp->pos;
- while (pp->src[pp->pos] && pp->src[pp->pos] != '\n' && pp->src[pp->pos] != '\r') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '/') {
+ int start = ppl->pos - 1;
+ ++ppl->pos;
+ while (ppl->src[ppl->pos] && ppl->src[ppl->pos] != '\n' && ppl->src[ppl->pos] != '\r') {
+ ++ppl->pos;
}
tok->kind = TokenKind_whitespace;
- tok->raw.len = pp->pos - start;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
- } else if (pp->src[pp->pos] == '*') {
- int start = pp->pos - 1;
- ++pp->pos;
- while (pp->src[pp->pos]) {
- if (pp->src[pp->pos] == '*' && pp->src[pp->pos + 1] == '/') {
- pp->pos += 2;
+ tok->raw.len = ppl->pos - start;
+ tok->raw.data = ppl->src + ppl->pos - tok->raw.len;
+ } else if (ppl->src[ppl->pos] == '*') {
+ int start = ppl->pos - 1;
+ ++ppl->pos;
+ while (ppl->src[ppl->pos]) {
+ if (ppl->src[ppl->pos] == '*' && ppl->src[ppl->pos + 1] == '/') {
+ ppl->pos += 2;
break;
}
- if (pp->src[pp->pos] == '\n') {
- ++pp->line;
+ if (ppl->src[ppl->pos] == '\n') {
+ ++ppl->line;
}
- ++pp->pos;
+ ++ppl->pos;
}
tok->kind = TokenKind_whitespace;
- tok->raw.len = pp->pos - start;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ tok->raw.len = ppl->pos - start;
+ tok->raw.data = ppl->src + ppl->pos - tok->raw.len;
} else {
tok->kind = TokenKind_slash;
}
} else if (c == '%') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_mod;
} else {
tok->kind = TokenKind_percent;
}
} else if (c == '.') {
- if (pp->src[pp->pos] == '.') {
- ++pp->pos;
- if (pp->src[pp->pos] == '.') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '.') {
+ ++ppl->pos;
+ if (ppl->src[ppl->pos] == '.') {
+ ++ppl->pos;
tok->kind = TokenKind_ellipsis;
} else {
tok->kind = TokenKind_other;
tok->raw.len = 2;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ tok->raw.data = ppl->src + ppl->pos - tok->raw.len;
}
} else {
tok->kind = TokenKind_dot;
tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ tok->raw.data = ppl->src + ppl->pos - tok->raw.len;
}
} else if (c == '!') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_ne;
} else {
tok->kind = TokenKind_not;
}
} else if (c == '=') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_eq;
} else {
tok->kind = TokenKind_assign;
}
} else if (c == '<') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_le;
- } else if (pp->src[pp->pos] == '<') {
- ++pp->pos;
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '<') {
+ ++ppl->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_lshift;
} else {
tok->kind = TokenKind_lshift;
@@ -678,13 +636,13 @@ void pp_tokenize_all(Preprocessor* pp) {
tok->kind = TokenKind_lt;
}
} else if (c == '>') {
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_ge;
- } else if (pp->src[pp->pos] == '>') {
- ++pp->pos;
- if (pp->src[pp->pos] == '=') {
- ++pp->pos;
+ } else if (ppl->src[ppl->pos] == '>') {
+ ++ppl->pos;
+ if (ppl->src[ppl->pos] == '=') {
+ ++ppl->pos;
tok->kind = TokenKind_assign_rshift;
} else {
tok->kind = TokenKind_rshift;
@@ -693,53 +651,53 @@ void pp_tokenize_all(Preprocessor* pp) {
tok->kind = TokenKind_gt;
}
} else if (c == '#') {
- if (pp->src[pp->pos] == '#') {
- ++pp->pos;
+ if (ppl->src[ppl->pos] == '#') {
+ ++ppl->pos;
tok->kind = TokenKind_hashhash;
} else {
tok->kind = TokenKind_hash;
}
} else if (c == '\'') {
- int start = pp->pos - 1;
- if (pp->src[pp->pos] == '\\') {
- ++pp->pos;
+ int start = ppl->pos - 1;
+ if (ppl->src[ppl->pos] == '\\') {
+ ++ppl->pos;
}
- pp->pos += 2;
+ ppl->pos += 2;
tok->kind = TokenKind_character_constant;
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
+ tok->raw.data = ppl->src + start;
+ tok->raw.len = ppl->pos - start;
} else if (c == '"') {
- int start = pp->pos - 1;
+ int start = ppl->pos - 1;
while (1) {
- char ch = pp->src[pp->pos];
+ char ch = ppl->src[ppl->pos];
if (ch == '\\') {
- ++pp->pos;
+ ++ppl->pos;
} else if (ch == '"') {
break;
}
- ++pp->pos;
+ ++ppl->pos;
}
- ++pp->pos;
+ ++ppl->pos;
tok->kind = TokenKind_literal_str;
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
+ tok->raw.data = ppl->src + start;
+ tok->raw.len = ppl->pos - start;
} else if (isdigit(c)) {
- --pp->pos;
- int start = pp->pos;
- while (isdigit(pp->src[pp->pos])) {
- ++pp->pos;
+ --ppl->pos;
+ int start = ppl->pos;
+ while (isdigit(ppl->src[ppl->pos])) {
+ ++ppl->pos;
}
tok->kind = TokenKind_literal_int;
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
+ tok->raw.data = ppl->src + start;
+ tok->raw.len = ppl->pos - start;
} else if (isalpha(c) || c == '_') {
- --pp->pos;
- int start = pp->pos;
- while (isalnum(pp->src[pp->pos]) || pp->src[pp->pos] == '_') {
- ++pp->pos;
+ --ppl->pos;
+ int start = ppl->pos;
+ while (isalnum(ppl->src[ppl->pos]) || ppl->src[ppl->pos] == '_') {
+ ++ppl->pos;
}
- tok->raw.data = pp->src + start;
- tok->raw.len = pp->pos - start;
+ tok->raw.data = ppl->src + start;
+ tok->raw.len = ppl->pos - start;
if (string_equals_cstr(&tok->raw, "auto")) {
tok->kind = TokenKind_keyword_auto;
} else if (string_equals_cstr(&tok->raw, "break")) {
@@ -821,30 +779,101 @@ void pp_tokenize_all(Preprocessor* pp) {
}
} else if (isspace(c)) {
if (c == '\n' || c == '\r') {
- ++pp->line;
+ ++ppl->line;
}
tok->kind = TokenKind_whitespace;
tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ tok->raw.data = ppl->src + ppl->pos - tok->raw.len;
} else {
tok->kind = TokenKind_other;
tok->raw.len = 1;
- tok->raw.data = pp->src + pp->pos - tok->raw.len;
+ tok->raw.data = ppl->src + ppl->pos - tok->raw.len;
}
}
- Token* eof_tok = tokens_push_new(pp->pp_tokens);
- eof_tok->loc.filename = pp->filename;
- eof_tok->loc.line = pp->line;
+ Token* eof_tok = tokens_push_new(ppl->pp_tokens);
+ eof_tok->loc.filename = ppl->filename;
+ eof_tok->loc.line = ppl->line;
eof_tok->kind = TokenKind_eof;
}
-int skip_whitespace(Preprocessor* pp, int pos) {
- for (; pos < pp->pp_tokens->len; ++pos) {
- if (pp_token_at(pp, pos)->kind != TokenKind_whitespace) {
- break;
+TokenArray* pp_tokenize(InFile* src) {
+ PpLexer* ppl = pplexer_new(src);
+ pplexer_tokenize_all(ppl);
+ return ppl->pp_tokens;
+}
+
+struct Preprocessor {
+ TokenArray* pp_tokens;
+ int pos;
+ MacroArray* macros;
+ int include_depth;
+ BOOL skip_pp_tokens;
+ String* include_paths;
+ int n_include_paths;
+};
+typedef struct Preprocessor Preprocessor;
+
+TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros);
+
+Preprocessor* preprocessor_new(TokenArray* pp_tokens, int include_depth, MacroArray* macros) {
+ if (include_depth >= 32) {
+ fatal_error("include depth limit exceeded");
+ }
+
+ Preprocessor* pp = calloc(1, sizeof(Preprocessor));
+ pp->pp_tokens = pp_tokens;
+ pp->macros = macros;
+ pp->include_depth = include_depth;
+ pp->include_paths = calloc(16, sizeof(String));
+
+ return pp;
+}
+
+Token* pp_token_at(Preprocessor* pp, int i) {
+ return &pp->pp_tokens->data[i];
+}
+
+Token* peek_pp_token(Preprocessor* pp) {
+ return pp_token_at(pp, pp->pos);
+}
+
+Token* next_pp_token(Preprocessor* pp) {
+ return pp_token_at(pp, pp->pos++);
+}
+
+BOOL pp_eof(Preprocessor* pp) {
+ return peek_pp_token(pp)->kind == TokenKind_eof;
+}
+
+int find_macro(Preprocessor* pp, String* name) {
+ for (int i = 0; i < pp->macros->len; ++i) {
+ if (string_equals(&pp->macros->data[i].name, name)) {
+ return i;
}
}
- return pos;
+ return -1;
+}
+
+void undef_macro(Preprocessor* pp, int idx) {
+ pp->macros->data[idx].name.len = 0;
+ // TODO: Can predefined macro like __FILE__ be undefined?
+}
+
+void add_include_path(Preprocessor* pp, char* include_path) {
+ pp->include_paths[pp->n_include_paths].data = include_path;
+ pp->include_paths[pp->n_include_paths].len = strlen(include_path);
+ ++pp->n_include_paths;
+}
+
+BOOL skip_pp_tokens(Preprocessor* pp) {
+ // TODO: support nested #if
+ return pp->skip_pp_tokens;
+}
+
+void skip_whitespaces(Preprocessor* pp) {
+ while (!pp_eof(pp) && peek_pp_token(pp)->kind == TokenKind_whitespace) {
+ next_pp_token(pp);
+ }
}
BOOL string_contains_newline(String* s) {
@@ -856,13 +885,14 @@ BOOL string_contains_newline(String* s) {
return FALSE;
}
-int find_next_newline(Preprocessor* pp, int pos) {
- for (; pos < pp->pp_tokens->len; ++pos) {
- if (pp_token_at(pp, pos)->kind == TokenKind_whitespace && string_contains_newline(&pp_token_at(pp, pos)->raw)) {
- return pos;
+void seek_to_next_newline(Preprocessor* pp) {
+ while (!pp_eof(pp)) {
+ Token* tok = peek_pp_token(pp);
+ if (tok->kind == TokenKind_whitespace && string_contains_newline(&tok->raw)) {
+ break;
}
+ next_pp_token(pp);
}
- return -1;
}
void make_token_whitespace(Token* tok) {
@@ -877,75 +907,72 @@ void remove_directive_tokens(Preprocessor* pp, int start, int end) {
}
}
-int process_endif_directive(Preprocessor* pp, int tok, int tok2) {
- ++tok2;
+void process_endif_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
pp->skip_pp_tokens = FALSE;
- remove_directive_tokens(pp, tok, tok2);
- return tok2;
+ remove_directive_tokens(pp, hash_pos, pp->pos);
}
-int process_else_directive(Preprocessor* pp, int tok, int tok2) {
- ++tok2;
+void process_else_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
pp->skip_pp_tokens = !pp->skip_pp_tokens;
- remove_directive_tokens(pp, tok, tok2);
- return tok2;
+ remove_directive_tokens(pp, hash_pos, pp->pos);
}
-int process_elif_directive(Preprocessor* pp, int tok, int tok2) {
+void process_elif_directive(Preprocessor* pp, int hash_pos) {
unimplemented();
}
-int process_if_directive(Preprocessor* pp, int tok, int tok2) {
+void process_if_directive(Preprocessor* pp, int hash_pos) {
unimplemented();
}
-int process_ifdef_directive(Preprocessor* pp, int tok, int tok2) {
- ++tok2;
- tok2 = skip_whitespace(pp, tok2);
- if (pp_token_at(pp, tok2)->kind == TokenKind_ident) {
- Token* name = pp_token_at(pp, tok2);
- ++tok2;
- pp->skip_pp_tokens = find_macro(pp, &name->raw) == -1;
+void process_ifdef_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = peek_pp_token(pp);
+ if (macro_name->kind == TokenKind_ident) {
+ next_pp_token(pp);
+ pp->skip_pp_tokens = find_macro(pp, &macro_name->raw) == -1;
}
- remove_directive_tokens(pp, tok, tok2);
- return tok2;
+ remove_directive_tokens(pp, hash_pos, pp->pos);
}
-int process_ifndef_directive(Preprocessor* pp, int tok, int tok2) {
- ++tok2;
- tok2 = skip_whitespace(pp, tok2);
- if (pp_token_at(pp, tok2)->kind == TokenKind_ident) {
- Token* name = pp_token_at(pp, tok2);
- ++tok2;
- pp->skip_pp_tokens = find_macro(pp, &name->raw) != -1;
+void process_ifndef_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = peek_pp_token(pp);
+ if (macro_name->kind == TokenKind_ident) {
+ next_pp_token(pp);
+ pp->skip_pp_tokens = find_macro(pp, &macro_name->raw) != -1;
}
- remove_directive_tokens(pp, tok, tok2);
- return tok2;
+ remove_directive_tokens(pp, hash_pos, pp->pos);
}
-int read_include_header_name(Preprocessor* pp, int tok2, String* include_name) {
- if (pp_token_at(pp, tok2)->kind == TokenKind_literal_str) {
- *include_name = pp_token_at(pp, tok2)->raw;
- ++tok2;
- return tok2;
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_lt) {
- ++tok2;
- char* include_name_start = pp_token_at(pp, tok2)->raw.data;
+String* read_include_header_name(Preprocessor* pp) {
+ Token* tok = next_pp_token(pp);
+ if (tok->kind == TokenKind_literal_str) {
+ return &tok->raw;
+ } else if (tok->kind == TokenKind_lt) {
+ char* include_name_start = peek_pp_token(pp)->raw.data;
int include_name_len = 0;
- while (pp_token_at(pp, tok2)->kind != TokenKind_eof) {
- if (pp_token_at(pp, tok2)->kind == TokenKind_gt) {
+ while (!pp_eof(pp)) {
+ if (peek_pp_token(pp)->kind == TokenKind_gt) {
break;
}
- include_name_len += pp_token_at(pp, tok2)->raw.len;
- ++tok2;
+ include_name_len += peek_pp_token(pp)->raw.len;
+ next_pp_token(pp);
}
- if (pp_token_at(pp, tok2)->kind == TokenKind_eof) {
+ if (pp_eof(pp)) {
fatal_error("invalid #include: <> not balanced");
}
- ++tok2;
+ next_pp_token(pp);
+ String* include_name = calloc(1, sizeof(String));
include_name->data = include_name_start;
include_name->len = include_name_len;
- return tok2;
+ return include_name;
+ } else {
+ unreachable();
}
}
@@ -975,24 +1002,24 @@ int replace_pp_tokens(Preprocessor* pp, int dest_start, int dest_end, TokenArray
// Move existing tokens backward to make room.
shift_amount = source_tokens->len - n_tokens_to_remove;
tokens_reserve(pp->pp_tokens, pp->pp_tokens->len + shift_amount);
- memmove(pp->pp_tokens->data + dest_end + shift_amount, pp->pp_tokens->data + dest_end,
+ memmove(pp_token_at(pp, dest_end + shift_amount), pp_token_at(pp, dest_end),
n_tokens_after_dest * sizeof(Token));
pp->pp_tokens->len += shift_amount;
} else if (source_tokens->len < n_tokens_to_remove) {
// Move existing tokens forward to reduce room.
shift_amount = n_tokens_to_remove - source_tokens->len;
- memmove(pp->pp_tokens->data + dest_start + source_tokens->len, pp->pp_tokens->data + dest_end,
+ memmove(pp_token_at(pp, dest_start + source_tokens->len), pp_token_at(pp, dest_end),
n_tokens_after_dest * sizeof(Token));
pp->pp_tokens->len -= shift_amount;
- memset(pp->pp_tokens->data + pp->pp_tokens->len, 0, shift_amount * sizeof(Token));
+ memset(pp_token_at(pp, pp->pp_tokens->len), 0, shift_amount * sizeof(Token));
}
- memcpy(pp->pp_tokens->data + dest_start, source_tokens->data, source_tokens->len * sizeof(Token));
+ memcpy(pp_token_at(pp, dest_start), source_tokens->data, source_tokens->len * sizeof(Token));
return dest_start + source_tokens->len;
}
-int expand_include_directive(Preprocessor* pp, int tok, int tok2, const char* include_name_buf) {
+void expand_include_directive(Preprocessor* pp, int hash_pos, const char* include_name_buf) {
InFile* include_source = read_all(include_name_buf);
if (!include_source) {
fatal_error("cannot open include file: %s", include_name_buf);
@@ -1000,137 +1027,133 @@ int expand_include_directive(Preprocessor* pp, int tok, int tok2, const char* in
TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros);
tokens_pop(include_pp_tokens); // pop EOF token
- return replace_pp_tokens(pp, tok, tok2 + 1, include_pp_tokens);
+ pp->pos = replace_pp_tokens(pp, hash_pos, pp->pos, include_pp_tokens);
}
-int process_include_directive(Preprocessor* pp, int tok, int tok2) {
- ++tok2;
- tok2 = skip_whitespace(pp, tok2);
- String* include_name = calloc(1, sizeof(String));
- tok2 = read_include_header_name(pp, tok2, include_name);
+void process_include_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ String* include_name = read_include_header_name(pp);
const char* include_name_buf = resolve_include_name(pp, include_name);
if (include_name_buf == NULL) {
fatal_error("cannot resolve include file name: %.*s", include_name->len, include_name->data);
}
- return expand_include_directive(pp, tok, tok2, include_name_buf);
+ expand_include_directive(pp, hash_pos, include_name_buf);
}
-int process_define_directive(Preprocessor* pp, int tok, int tok2) {
- int tok3 = -1;
- ++tok2;
- tok2 = skip_whitespace(pp, tok2);
- if (pp_token_at(pp, tok2)->kind != TokenKind_ident) {
- fatal_error("%s:%s: invalid #define syntax", pp_token_at(pp, tok2)->loc.filename,
- pp_token_at(pp, tok2)->loc.line);
+void process_define_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = next_pp_token(pp);
+
+ if (macro_name->kind != TokenKind_ident) {
+ fatal_error("%s:%s: invalid #define syntax", macro_name->loc.filename, macro_name->loc.line);
}
- Token* macro_name = pp_token_at(pp, tok2);
- ++tok2;
- if (pp_token_at(pp, tok2)->kind == TokenKind_paren_l) {
- ++tok2;
- if (pp_token_at(pp, tok2)->kind == TokenKind_paren_r) {
- ++tok2;
- } else {
- fatal_error("%s:%d: invalid function-like macro syntax (#define %.*s)", macro_name->loc.filename,
- macro_name->loc.line, macro_name->raw.len, macro_name->raw.data);
+ if (peek_pp_token(pp)->kind == TokenKind_paren_l) {
+ next_pp_token(pp);
+ if (peek_pp_token(pp)->kind != TokenKind_paren_r) {
+ unimplemented();
}
- tok3 = find_next_newline(pp, tok2);
- if (tok3 == -1) {
- fatal_error("%s:%s: invalid #define syntax", pp_token_at(pp, tok3)->loc.filename,
- pp_token_at(pp, tok3)->loc.line);
+ next_pp_token(pp);
+ int replacements_start_pos = pp->pos;
+ seek_to_next_newline(pp);
+ if (pp_eof(pp)) {
+ fatal_error("%s:%s: invalid #define syntax");
}
Macro* macro = macros_push_new(pp->macros);
macro->kind = MacroKind_func;
macro->name = macro_name->raw;
- int n_replacements = tok3 - tok2;
+ int n_replacements = pp->pos - replacements_start_pos;
tokens_init(&macro->replacements, n_replacements);
for (int i = 0; i < n_replacements; ++i) {
- *tokens_push_new(&macro->replacements) = *pp_token_at(pp, tok2 + i);
+ *tokens_push_new(&macro->replacements) = *pp_token_at(pp, replacements_start_pos + i);
}
} else {
- tok3 = find_next_newline(pp, tok2);
- if (tok3 == -1) {
- fatal_error("%s:%s: invalid #define syntax", pp_token_at(pp, tok3)->loc.filename,
- pp_token_at(pp, tok3)->loc.line);
+ int replacements_start_pos = pp->pos;
+ seek_to_next_newline(pp);
+ if (pp_eof(pp)) {
+ fatal_error("%s:%s: invalid #define syntax");
}
Macro* macro = macros_push_new(pp->macros);
macro->kind = MacroKind_obj;
macro->name = macro_name->raw;
- int n_replacements = tok3 - tok2;
+ int n_replacements = pp->pos - replacements_start_pos;
tokens_init(&macro->replacements, n_replacements);
for (int i = 0; i < n_replacements; ++i) {
- *tokens_push_new(&macro->replacements) = *pp_token_at(pp, tok2 + i);
+ *tokens_push_new(&macro->replacements) = *pp_token_at(pp, replacements_start_pos + i);
}
}
- remove_directive_tokens(pp, tok, tok3);
- return tok3;
+ remove_directive_tokens(pp, hash_pos, pp->pos);
}
-int process_undef_directive(Preprocessor* pp, int tok, int tok2) {
- tok2 = skip_whitespace(pp, tok2 + 1);
- if (pp_token_at(pp, tok2)->kind == TokenKind_ident) {
- Token* macro_name = pp_token_at(pp, tok2);
- ++tok2;
+void process_undef_directive(Preprocessor* pp, int hash_pos) {
+ next_pp_token(pp);
+ skip_whitespaces(pp);
+ Token* macro_name = peek_pp_token(pp);
+ if (macro_name->kind == TokenKind_ident) {
+ next_pp_token(pp);
int macro_idx = find_macro(pp, &macro_name->raw);
if (macro_idx != -1) {
undef_macro(pp, macro_idx);
}
}
- remove_directive_tokens(pp, tok, tok2);
- return tok2;
+ remove_directive_tokens(pp, hash_pos, pp->pos);
}
-int process_line_directive(Preprocessor* pp, int tok, int tok2) {
+void process_line_directive(Preprocessor* pp, int hash_pos) {
unimplemented();
}
-int process_error_directive(Preprocessor* pp, int tok, int tok2) {
+void process_error_directive(Preprocessor* pp, int hash_pos) {
unimplemented();
}
-int process_pragma_directive(Preprocessor* pp, int tok, int tok2) {
+void process_pragma_directive(Preprocessor* pp, int hash_pos) {
unimplemented();
}
-BOOL expand_macro(Preprocessor* pp, int tok) {
- int macro_idx = find_macro(pp, &pp_token_at(pp, tok)->raw);
+BOOL expand_macro(Preprocessor* pp) {
+ int macro_name_pos = pp->pos;
+ Token* macro_name = next_pp_token(pp);
+ int macro_idx = find_macro(pp, &macro_name->raw);
if (macro_idx == -1) {
return FALSE;
}
- SourceLocation original_loc = pp_token_at(pp, tok)->loc;
- Macro* macro = pp->macros->data + macro_idx;
+ SourceLocation original_loc = macro_name->loc;
+ Macro* macro = &pp->macros->data[macro_idx];
if (macro->kind == MacroKind_func) {
// also consume '(' and ')'
- replace_pp_tokens(pp, tok, tok + 3, &macro->replacements);
+ replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 3, &macro->replacements);
// Inherit a source location from the original macro token.
for (int i = 0; i < macro->replacements.len; ++i) {
- pp_token_at(pp, tok + i)->loc = original_loc;
+ pp_token_at(pp, macro_name_pos + i)->loc = original_loc;
}
} else if (macro->kind == MacroKind_obj) {
- replace_pp_tokens(pp, tok, tok + 1, &macro->replacements);
+ replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, &macro->replacements);
// Inherit a source location from the original macro token.
for (int i = 0; i < macro->replacements.len; ++i) {
- pp_token_at(pp, tok + i)->loc = original_loc;
+ pp_token_at(pp, macro_name_pos + i)->loc = original_loc;
}
} else if (macro->kind == MacroKind_builtin_file) {
TokenArray tokens;
tokens_init(&tokens, 1);
Token* file_tok = tokens_push_new(&tokens);
file_tok->kind = TokenKind_literal_str;
- file_tok->raw.len = strlen(pp_token_at(pp, tok)->loc.filename) + 2;
+ file_tok->raw.len = strlen(macro_name->loc.filename) + 2;
file_tok->raw.data = calloc(file_tok->raw.len, sizeof(char));
- sprintf(file_tok->raw.data, "\"%s\"", pp_token_at(pp, tok)->loc.filename);
- replace_pp_tokens(pp, tok, tok + 1, &tokens);
+ sprintf(file_tok->raw.data, "\"%s\"", macro_name->loc.filename);
+ replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, &tokens);
} else if (macro->kind == MacroKind_builtin_line) {
TokenArray tokens;
tokens_init(&tokens, 1);
Token* line_tok = tokens_push_new(&tokens);
line_tok->kind = TokenKind_literal_int;
line_tok->raw.data = calloc(10, sizeof(char));
- sprintf(line_tok->raw.data, "%d", pp_token_at(pp, tok)->loc.line);
+ sprintf(line_tok->raw.data, "%d", macro_name->loc.line);
line_tok->raw.len = strlen(line_tok->raw.data);
- replace_pp_tokens(pp, tok, tok + 1, &tokens);
+ replace_pp_tokens(pp, macro_name_pos, macro_name_pos + 1, &tokens);
} else {
unreachable();
}
@@ -1142,68 +1165,63 @@ BOOL is_pp_hash(Token* t) {
return t->kind == TokenKind_hash;
}
-void process_pp_directives(Preprocessor* pp) {
- int tok = 0;
-
- while (pp_token_at(pp, tok)->kind != TokenKind_eof) {
- if (is_pp_hash(pp_token_at(pp, tok))) {
- // TODO: don't skip newline after '#'.
- int tok2 = skip_whitespace(pp, tok + 1);
- if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "endif")) {
- tok = process_endif_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_keyword_else) {
- tok = process_else_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "elif")) {
- tok = process_elif_directive(pp, tok, tok2);
- } else if (skip_pp_tokens(pp)) {
- make_token_whitespace(pp_token_at(pp, tok));
- ++tok;
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_keyword_if) {
- tok = process_if_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "ifdef")) {
- tok = process_ifdef_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "ifndef")) {
- tok = process_ifndef_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "include")) {
- tok = process_include_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "define")) {
- tok = process_define_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "undef")) {
- tok = process_undef_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "line")) {
- tok = process_line_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "error")) {
- tok = process_error_directive(pp, tok, tok2);
- } else if (pp_token_at(pp, tok2)->kind == TokenKind_ident &&
- string_equals_cstr(&pp_token_at(pp, tok2)->raw, "pragma")) {
- tok = process_pragma_directive(pp, tok, tok2);
- } else {
- fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_token_at(pp, tok2)->loc.filename,
- pp_token_at(pp, tok2)->loc.line, token_stringify(pp_token_at(pp, tok2)));
- }
+void process_pp_directive(Preprocessor* pp) {
+ int first_token_pos = pp->pos;
+ Token* first_token = peek_pp_token(pp);
+ if (is_pp_hash(first_token)) {
+ next_pp_token(pp);
+ // TODO: don't skip newline after '#'.
+ skip_whitespaces(pp);
+ Token* next_tok = peek_pp_token(pp);
+ if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "endif")) {
+ process_endif_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_keyword_else) {
+ process_else_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "elif")) {
+ process_elif_directive(pp, first_token_pos);
} else if (skip_pp_tokens(pp)) {
- make_token_whitespace(pp_token_at(pp, tok));
- ++tok;
- } else if (pp_token_at(pp, tok)->kind == TokenKind_ident) {
- BOOL expanded = expand_macro(pp, tok);
- if (expanded) {
- // A macro may expand to another macro. Re-scan the expanded tokens.
- // TODO: if the macro is defined recursively, it causes infinite loop.
- } else {
- ++tok;
- }
+ make_token_whitespace(pp_token_at(pp, first_token_pos));
+ make_token_whitespace(next_pp_token(pp));
+ } else if (next_tok->kind == TokenKind_keyword_if) {
+ process_if_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "ifdef")) {
+ process_ifdef_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "ifndef")) {
+ process_ifndef_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "include")) {
+ process_include_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "define")) {
+ process_define_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "undef")) {
+ process_undef_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "line")) {
+ process_line_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "error")) {
+ process_error_directive(pp, first_token_pos);
+ } else if (next_tok->kind == TokenKind_ident && string_equals_cstr(&next_tok->raw, "pragma")) {
+ process_pragma_directive(pp, first_token_pos);
} else {
- ++tok;
+ fatal_error("%s:%d: unknown preprocessor directive (%s)", next_tok->loc.filename, next_tok->loc.line,
+ token_stringify(next_tok));
}
+ } else if (skip_pp_tokens(pp)) {
+ make_token_whitespace(next_pp_token(pp));
+ } else if (first_token->kind == TokenKind_ident) {
+ BOOL expanded = expand_macro(pp);
+ if (expanded) {
+ // A macro may expand to another macro. Re-scan the expanded tokens.
+ // TODO: if the macro is defined recursively, it causes infinite loop.
+ } else {
+ next_pp_token(pp);
+ }
+ } else {
+ next_pp_token(pp);
+ }
+}
+
+void process_pp_directives(Preprocessor* pp) {
+ while (!pp_eof(pp)) {
+ process_pp_directive(pp);
}
}
@@ -1224,11 +1242,11 @@ char* get_ducc_include_path() {
}
TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros) {
- Preprocessor* pp = preprocessor_new(src, depth, macros);
+ TokenArray* pp_tokens = pp_tokenize(src);
+ Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros);
add_include_path(pp, get_ducc_include_path());
add_include_path(pp, "/usr/include/x86_64-linux-gnu");
add_include_path(pp, "/usr/include");
- pp_tokenize_all(pp);
process_pp_directives(pp);
return pp->pp_tokens;
}