aboutsummaryrefslogtreecommitdiffhomepage
path: root/preprocess.c
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-08-17 13:18:15 +0900
committernsfisis <nsfisis@gmail.com>2025-08-18 03:52:44 +0900
commit2a7e1667f70c5381d3b939324cc647e51134b15c (patch)
treee6d6d5bc285aadbae20c99dbcf0a653f78dda4c3 /preprocess.c
parentd90a9c83a253b71e7731a44657f998a361a41b97 (diff)
downloadducc-2a7e1667f70c5381d3b939324cc647e51134b15c.tar.gz
ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.tar.zst
ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.zip
refactor: direct array access with infile_*() helper functions
Diffstat (limited to 'preprocess.c')
-rw-r--r--preprocess.c303
1 files changed, 156 insertions, 147 deletions
diff --git a/preprocess.c b/preprocess.c
index 0b55131..056422f 100644
--- a/preprocess.c
+++ b/preprocess.c
@@ -333,12 +333,6 @@ const char* token_kind_stringify(TokenKind k) {
unreachable();
}
-struct SourceLocation {
- const char* filename;
- int line;
-};
-typedef struct SourceLocation SourceLocation;
-
// TokenValue is externally tagged by Token's kind.
union TokenValue {
const char* string;
@@ -544,10 +538,7 @@ MacroArg* macroargs_push_new(MacroArgArray* macroargs) {
}
struct PpLexer {
- const char* filename;
- int line;
- char* src;
- int pos;
+ InFile* src;
BOOL at_bol;
BOOL expect_header_name;
TokenArray* pp_tokens;
@@ -557,9 +548,7 @@ typedef struct PpLexer PpLexer;
PpLexer* pplexer_new(InFile* src) {
PpLexer* ppl = calloc(1, sizeof(PpLexer));
- ppl->filename = src->filename;
- ppl->line = 1;
- ppl->src = src->buf;
+ ppl->src = src;
ppl->at_bol = TRUE;
ppl->expect_header_name = FALSE;
ppl->pp_tokens = calloc(1, sizeof(TokenArray));
@@ -571,21 +560,23 @@ PpLexer* pplexer_new(InFile* src) {
TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) {
// Skip whitespaces after '#'.
char c;
- while (isspace((c = ppl->src[ppl->pos]))) {
- if (c == '\n' || c == '\r') {
+ while (isspace((c = infile_peek_char(ppl->src)))) {
+ if (c == '\n')
break;
- }
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- int pp_directive_name_start = ppl->pos;
- while (isalnum(ppl->src[ppl->pos])) {
- ++ppl->pos;
+ SourceLocation pp_directive_name_start_loc = ppl->src->loc;
+
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(ppl->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
}
- int pp_directive_name_len = ppl->pos - pp_directive_name_start;
- const char* pp_directive_name = strndup(&ppl->src[pp_directive_name_start], pp_directive_name_len);
+ const char* pp_directive_name = builder.buf;
- if (pp_directive_name_len == 0) {
+ if (builder.len == 0) {
return TokenKind_hash;
} else if (strcmp(pp_directive_name, "define") == 0) {
return TokenKind_pp_directive_define;
@@ -613,195 +604,202 @@ TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) {
} else if (strcmp(pp_directive_name, "undef") == 0) {
return TokenKind_pp_directive_undef;
} else {
- fatal_error("%s:%d: unknown preprocessor directive (%s)", ppl->filename, ppl->line, pp_directive_name);
+ fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename,
+ pp_directive_name_start_loc.line, pp_directive_name);
}
}
void pplexer_tokenize_all(PpLexer* ppl) {
- while (ppl->src[ppl->pos]) {
+ while (!infile_eof(ppl->src)) {
Token* tok = tokens_push_new(ppl->pp_tokens);
- tok->loc.filename = ppl->filename;
- tok->loc.line = ppl->line;
- char c = ppl->src[ppl->pos];
- ++ppl->pos;
+ tok->loc = ppl->src->loc;
+ char c = infile_peek_char(ppl->src);
if (ppl->expect_header_name && c == '"') {
- int start = ppl->pos - 1;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '"');
while (1) {
- char ch = ppl->src[ppl->pos];
- if (ch == '\\') {
- ++ppl->pos;
- } else if (ch == '"') {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '"')
break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- ++ppl->pos;
+ strbuilder_append_char(&builder, '"');
+ infile_next_char(ppl->src);
tok->kind = TokenKind_header_name;
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
+ tok->value.string = builder.buf;
ppl->expect_header_name = FALSE;
} else if (ppl->expect_header_name && c == '<') {
- int start = ppl->pos - 1;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '<');
while (1) {
- char ch = ppl->src[ppl->pos];
- if (ch == '>') {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '>')
break;
- }
- ++ppl->pos;
+ strbuilder_append_char(&builder, ch);
+ infile_next_char(ppl->src);
}
- ++ppl->pos;
+ strbuilder_append_char(&builder, '>');
+ infile_next_char(ppl->src);
tok->kind = TokenKind_header_name;
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
+ tok->value.string = builder.buf;
ppl->expect_header_name = FALSE;
} else if (c == '(') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_paren_l;
} else if (c == ')') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_paren_r;
} else if (c == '{') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_brace_l;
} else if (c == '}') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_brace_r;
} else if (c == '[') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_bracket_l;
} else if (c == ']') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_bracket_r;
} else if (c == ',') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_comma;
} else if (c == ':') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_colon;
} else if (c == ';') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_semicolon;
} else if (c == '^') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_xor;
} else {
tok->kind = TokenKind_xor;
}
} else if (c == '?') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_question;
} else if (c == '~') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_tilde;
} else if (c == '+') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_add;
- } else if (ppl->src[ppl->pos] == '+') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '+')) {
tok->kind = TokenKind_plusplus;
} else {
tok->kind = TokenKind_plus;
}
} else if (c == '|') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_or;
- } else if (ppl->src[ppl->pos] == '|') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '|')) {
tok->kind = TokenKind_oror;
} else {
tok->kind = TokenKind_or;
}
} else if (c == '&') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_and;
- } else if (ppl->src[ppl->pos] == '&') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '&')) {
tok->kind = TokenKind_andand;
} else {
tok->kind = TokenKind_and;
}
} else if (c == '-') {
- if (ppl->src[ppl->pos] == '>') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '>')) {
tok->kind = TokenKind_arrow;
- } else if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_sub;
- } else if (ppl->src[ppl->pos] == '-') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '-')) {
tok->kind = TokenKind_minusminus;
} else {
tok->kind = TokenKind_minus;
}
} else if (c == '*') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_mul;
} else {
tok->kind = TokenKind_star;
}
} else if (c == '/') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_div;
- } else if (ppl->src[ppl->pos] == '/') {
- int start = ppl->pos - 1;
- ++ppl->pos;
- while (ppl->src[ppl->pos] && ppl->src[ppl->pos] != '\n' && ppl->src[ppl->pos] != '\r') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '/')) {
+ while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') {
+ infile_next_char(ppl->src);
}
tok->kind = TokenKind_whitespace;
- } else if (ppl->src[ppl->pos] == '*') {
- int start = ppl->pos - 1;
- ++ppl->pos;
- while (ppl->src[ppl->pos]) {
- if (ppl->src[ppl->pos] == '*' && ppl->src[ppl->pos + 1] == '/') {
- ppl->pos += 2;
- break;
- }
- if (ppl->src[ppl->pos] == '\n') {
- ++ppl->line;
+ } else if (infile_consume_if(ppl->src, '*')) {
+ while (infile_peek_char(ppl->src)) {
+ if (infile_consume_if(ppl->src, '*')) {
+ if (infile_consume_if(ppl->src, '/')) {
+ break;
+ }
+ continue;
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
tok->kind = TokenKind_whitespace;
} else {
tok->kind = TokenKind_slash;
}
} else if (c == '%') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_mod;
} else {
tok->kind = TokenKind_percent;
}
} else if (c == '.') {
- if (ppl->src[ppl->pos] == '.') {
- ++ppl->pos;
- if (ppl->src[ppl->pos] == '.') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '.')) {
+ if (infile_consume_if(ppl->src, '.')) {
tok->kind = TokenKind_ellipsis;
} else {
tok->kind = TokenKind_other;
- tok->value.string = strndup(ppl->src + ppl->pos - 2, 2);
+ tok->value.string = "..";
}
} else {
tok->kind = TokenKind_dot;
}
} else if (c == '!') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_ne;
} else {
tok->kind = TokenKind_not;
}
} else if (c == '=') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_eq;
} else {
tok->kind = TokenKind_assign;
}
} else if (c == '<') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_le;
- } else if (ppl->src[ppl->pos] == '<') {
- ++ppl->pos;
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '<')) {
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_lshift;
} else {
tok->kind = TokenKind_lshift;
@@ -810,13 +808,11 @@ void pplexer_tokenize_all(PpLexer* ppl) {
tok->kind = TokenKind_lt;
}
} else if (c == '>') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_ge;
- } else if (ppl->src[ppl->pos] == '>') {
- ++ppl->pos;
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '>')) {
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_rshift;
} else {
tok->kind = TokenKind_rshift;
@@ -825,76 +821,89 @@ void pplexer_tokenize_all(PpLexer* ppl) {
tok->kind = TokenKind_gt;
}
} else if (c == '#') {
- if (ppl->src[ppl->pos] == '#') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '#')) {
tok->kind = TokenKind_hashhash;
} else {
tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash;
}
} else if (c == '\'') {
- int start = ppl->pos - 1;
- if (ppl->src[ppl->pos] == '\\') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '\'');
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ if (infile_peek_char(ppl->src) == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
}
- ppl->pos += 2;
+ strbuilder_append_char(&builder, '\'');
+ infile_next_char(ppl->src);
+ infile_next_char(ppl->src);
tok->kind = TokenKind_character_constant;
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
+ tok->value.string = builder.buf;
} else if (c == '"') {
- int start = ppl->pos - 1;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
while (1) {
- char ch = ppl->src[ppl->pos];
- if (ch == '\\') {
- ++ppl->pos;
- } else if (ch == '"') {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '"')
break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
tok->kind = TokenKind_literal_str;
- tok->value.string = strndup(ppl->src + start + 1, ppl->pos - start - 2);
+ tok->value.string = builder.buf;
} else if (isdigit(c)) {
- --ppl->pos;
- int start = ppl->pos;
- while (isdigit(ppl->src[ppl->pos])) {
- ++ppl->pos;
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isdigit(infile_peek_char(ppl->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
}
tok->kind = TokenKind_literal_int;
- tok->value.integer = atoi(ppl->src + start);
+ tok->value.integer = atoi(builder.buf);
} else if (isalpha(c) || c == '_') {
- --ppl->pos;
- int start = ppl->pos;
- while (isalnum(ppl->src[ppl->pos]) || ppl->src[ppl->pos] == '_') {
- ++ppl->pos;
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
}
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
tok->kind = TokenKind_ident;
- } else if (c == '\n' || c == '\r') {
- ++ppl->line;
+ tok->value.string = builder.buf;
+ } else if (c == '\n') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_newline;
} else if (isspace(c)) {
- --ppl->pos;
- while (isspace((c = ppl->src[ppl->pos]))) {
- if (c == '\n' || c == '\r') {
+ while (isspace((c = infile_peek_char(ppl->src)))) {
+ if (c == '\n')
break;
- }
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- if (ppl->at_bol && ppl->src[ppl->pos] == '#') {
- ++ppl->pos;
+ if (ppl->at_bol && infile_peek_char(ppl->src) == '#') {
+ infile_next_char(ppl->src);
tok->kind = pplexer_tokenize_pp_directive(ppl);
} else {
tok->kind = TokenKind_whitespace;
}
} else {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_other;
- tok->value.string = strndup(ppl->src + ppl->pos - 1, 1);
+ char* buf = calloc(2, sizeof(char));
+ buf[0] = c;
+ tok->value.string = buf;
}
ppl->at_bol = tok->kind == TokenKind_newline;
}
Token* eof_tok = tokens_push_new(ppl->pp_tokens);
- eof_tok->loc.filename = ppl->filename;
- eof_tok->loc.line = ppl->line;
+ eof_tok->loc = ppl->src->loc;
eof_tok->kind = TokenKind_eof;
}
@@ -1099,10 +1108,10 @@ int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) {
replace_pp_tokens(pp, dest, dest + 1, &tokens);
}
-void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name_buf) {
- InFile* include_source = read_all(include_name_buf);
+void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) {
+ InFile* include_source = infile_open(include_name);
if (!include_source) {
- fatal_error("cannot open include file: %s", include_name_buf);
+ fatal_error("cannot open include file: %s", include_name);
}
TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros);