aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-08-17 13:18:15 +0900
committernsfisis <nsfisis@gmail.com>2025-08-18 03:52:44 +0900
commit2a7e1667f70c5381d3b939324cc647e51134b15c (patch)
treee6d6d5bc285aadbae20c99dbcf0a653f78dda4c3
parentd90a9c83a253b71e7731a44657f998a361a41b97 (diff)
downloadducc-2a7e1667f70c5381d3b939324cc647e51134b15c.tar.gz
ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.tar.zst
ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.zip
refactor: direct array access with infile_*() helper functions
-rw-r--r--common.c27
-rw-r--r--io.c46
-rw-r--r--main.c2
-rw-r--r--preprocess.c303
4 files changed, 226 insertions, 152 deletions
diff --git a/common.c b/common.c
index 921b89b..1b89f8c 100644
--- a/common.c
+++ b/common.c
@@ -10,3 +10,30 @@ void fatal_error(const char* msg, ...) {
#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__)
#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__)
+
+struct StrBuilder {
+ size_t len;
+ size_t capacity;
+ char* buf;
+};
+typedef struct StrBuilder StrBuilder;
+
+void strbuilder_init(StrBuilder* b) {
+ b->len = 0;
+ b->capacity = 16;
+ b->buf = calloc(b->capacity, sizeof(char));
+}
+
+// `size` must include a trailing null byte.
+void strbuilder_reserve(StrBuilder* b, size_t size) {
+ if (size <= b->capacity)
+ return;
+ b->capacity *= 2;
+ b->buf = realloc(b->buf, b->capacity * sizeof(char));
+ memset(b->buf + b->len, 0, (b->capacity - b->len) * sizeof(char));
+}
+
+void strbuilder_append_char(StrBuilder* b, int c) {
+ strbuilder_reserve(b, b->len + 1 + 1);
+ b->buf[b->len++] = c;
+}
diff --git a/io.c b/io.c
index ae02a0d..c6e94d1 100644
--- a/io.c
+++ b/io.c
@@ -1,10 +1,17 @@
-struct InFile {
+struct SourceLocation {
const char* filename;
- char* buf;
+ int line;
+};
+typedef struct SourceLocation SourceLocation;
+
+struct InFile {
+ const char* buf;
+ int pos;
+ SourceLocation loc;
};
typedef struct InFile InFile;
-InFile* read_all(const char* filename) {
+InFile* infile_open(const char* filename) {
FILE* in;
if (strcmp(filename, "-") == 0) {
in = stdin;
@@ -38,7 +45,38 @@ InFile* read_all(const char* filename) {
fclose(in);
InFile* in_file = calloc(1, sizeof(InFile));
- in_file->filename = filename;
in_file->buf = buf;
+ in_file->loc.filename = filename;
+ in_file->loc.line = 1;
return in_file;
}
+
+BOOL infile_eof(InFile* f) {
+ return f->buf[f->pos] == '\0';
+}
+
+char infile_peek_char(InFile* f) {
+ char c = f->buf[f->pos];
+ // Normalize new-line.
+ // TODO: crlf
+ if (c == '\r')
+ c = '\n';
+ return c;
+}
+
+char infile_next_char(InFile* f) {
+ char c = infile_peek_char(f);
+ ++f->pos;
+ if (c == '\n')
+ ++f->loc.line;
+ return c;
+}
+
+BOOL infile_consume_if(InFile* f, char expected) {
+ if (infile_peek_char(f) == expected) {
+ infile_next_char(f);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
diff --git a/main.c b/main.c
index 4307dba..3f9be2e 100644
--- a/main.c
+++ b/main.c
@@ -15,7 +15,7 @@ int main(int argc, char** argv) {
if (argc == 1) {
fatal_error("usage: ducc <FILE>");
}
- InFile* source = read_all(argv[1]);
+ InFile* source = infile_open(argv[1]);
TokenArray* pp_tokens = preprocess(source);
TokenArray* tokens = tokenize(pp_tokens);
Program* prog = parse(tokens);
diff --git a/preprocess.c b/preprocess.c
index 0b55131..056422f 100644
--- a/preprocess.c
+++ b/preprocess.c
@@ -333,12 +333,6 @@ const char* token_kind_stringify(TokenKind k) {
unreachable();
}
-struct SourceLocation {
- const char* filename;
- int line;
-};
-typedef struct SourceLocation SourceLocation;
-
// TokenValue is externally tagged by Token's kind.
union TokenValue {
const char* string;
@@ -544,10 +538,7 @@ MacroArg* macroargs_push_new(MacroArgArray* macroargs) {
}
struct PpLexer {
- const char* filename;
- int line;
- char* src;
- int pos;
+ InFile* src;
BOOL at_bol;
BOOL expect_header_name;
TokenArray* pp_tokens;
@@ -557,9 +548,7 @@ typedef struct PpLexer PpLexer;
PpLexer* pplexer_new(InFile* src) {
PpLexer* ppl = calloc(1, sizeof(PpLexer));
- ppl->filename = src->filename;
- ppl->line = 1;
- ppl->src = src->buf;
+ ppl->src = src;
ppl->at_bol = TRUE;
ppl->expect_header_name = FALSE;
ppl->pp_tokens = calloc(1, sizeof(TokenArray));
@@ -571,21 +560,23 @@ PpLexer* pplexer_new(InFile* src) {
TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) {
// Skip whitespaces after '#'.
char c;
- while (isspace((c = ppl->src[ppl->pos]))) {
- if (c == '\n' || c == '\r') {
+ while (isspace((c = infile_peek_char(ppl->src)))) {
+ if (c == '\n')
break;
- }
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- int pp_directive_name_start = ppl->pos;
- while (isalnum(ppl->src[ppl->pos])) {
- ++ppl->pos;
+ SourceLocation pp_directive_name_start_loc = ppl->src->loc;
+
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(ppl->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
}
- int pp_directive_name_len = ppl->pos - pp_directive_name_start;
- const char* pp_directive_name = strndup(&ppl->src[pp_directive_name_start], pp_directive_name_len);
+ const char* pp_directive_name = builder.buf;
- if (pp_directive_name_len == 0) {
+ if (builder.len == 0) {
return TokenKind_hash;
} else if (strcmp(pp_directive_name, "define") == 0) {
return TokenKind_pp_directive_define;
@@ -613,195 +604,202 @@ TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) {
} else if (strcmp(pp_directive_name, "undef") == 0) {
return TokenKind_pp_directive_undef;
} else {
- fatal_error("%s:%d: unknown preprocessor directive (%s)", ppl->filename, ppl->line, pp_directive_name);
+ fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename,
+ pp_directive_name_start_loc.line, pp_directive_name);
}
}
void pplexer_tokenize_all(PpLexer* ppl) {
- while (ppl->src[ppl->pos]) {
+ while (!infile_eof(ppl->src)) {
Token* tok = tokens_push_new(ppl->pp_tokens);
- tok->loc.filename = ppl->filename;
- tok->loc.line = ppl->line;
- char c = ppl->src[ppl->pos];
- ++ppl->pos;
+ tok->loc = ppl->src->loc;
+ char c = infile_peek_char(ppl->src);
if (ppl->expect_header_name && c == '"') {
- int start = ppl->pos - 1;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '"');
while (1) {
- char ch = ppl->src[ppl->pos];
- if (ch == '\\') {
- ++ppl->pos;
- } else if (ch == '"') {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '"')
break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- ++ppl->pos;
+ strbuilder_append_char(&builder, '"');
+ infile_next_char(ppl->src);
tok->kind = TokenKind_header_name;
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
+ tok->value.string = builder.buf;
ppl->expect_header_name = FALSE;
} else if (ppl->expect_header_name && c == '<') {
- int start = ppl->pos - 1;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '<');
while (1) {
- char ch = ppl->src[ppl->pos];
- if (ch == '>') {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '>')
break;
- }
- ++ppl->pos;
+ strbuilder_append_char(&builder, ch);
+ infile_next_char(ppl->src);
}
- ++ppl->pos;
+ strbuilder_append_char(&builder, '>');
+ infile_next_char(ppl->src);
tok->kind = TokenKind_header_name;
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
+ tok->value.string = builder.buf;
ppl->expect_header_name = FALSE;
} else if (c == '(') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_paren_l;
} else if (c == ')') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_paren_r;
} else if (c == '{') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_brace_l;
} else if (c == '}') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_brace_r;
} else if (c == '[') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_bracket_l;
} else if (c == ']') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_bracket_r;
} else if (c == ',') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_comma;
} else if (c == ':') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_colon;
} else if (c == ';') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_semicolon;
} else if (c == '^') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_xor;
} else {
tok->kind = TokenKind_xor;
}
} else if (c == '?') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_question;
} else if (c == '~') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_tilde;
} else if (c == '+') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_add;
- } else if (ppl->src[ppl->pos] == '+') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '+')) {
tok->kind = TokenKind_plusplus;
} else {
tok->kind = TokenKind_plus;
}
} else if (c == '|') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_or;
- } else if (ppl->src[ppl->pos] == '|') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '|')) {
tok->kind = TokenKind_oror;
} else {
tok->kind = TokenKind_or;
}
} else if (c == '&') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_and;
- } else if (ppl->src[ppl->pos] == '&') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '&')) {
tok->kind = TokenKind_andand;
} else {
tok->kind = TokenKind_and;
}
} else if (c == '-') {
- if (ppl->src[ppl->pos] == '>') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '>')) {
tok->kind = TokenKind_arrow;
- } else if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_sub;
- } else if (ppl->src[ppl->pos] == '-') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '-')) {
tok->kind = TokenKind_minusminus;
} else {
tok->kind = TokenKind_minus;
}
} else if (c == '*') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_mul;
} else {
tok->kind = TokenKind_star;
}
} else if (c == '/') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_div;
- } else if (ppl->src[ppl->pos] == '/') {
- int start = ppl->pos - 1;
- ++ppl->pos;
- while (ppl->src[ppl->pos] && ppl->src[ppl->pos] != '\n' && ppl->src[ppl->pos] != '\r') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '/')) {
+ while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') {
+ infile_next_char(ppl->src);
}
tok->kind = TokenKind_whitespace;
- } else if (ppl->src[ppl->pos] == '*') {
- int start = ppl->pos - 1;
- ++ppl->pos;
- while (ppl->src[ppl->pos]) {
- if (ppl->src[ppl->pos] == '*' && ppl->src[ppl->pos + 1] == '/') {
- ppl->pos += 2;
- break;
- }
- if (ppl->src[ppl->pos] == '\n') {
- ++ppl->line;
+ } else if (infile_consume_if(ppl->src, '*')) {
+ while (infile_peek_char(ppl->src)) {
+ if (infile_consume_if(ppl->src, '*')) {
+ if (infile_consume_if(ppl->src, '/')) {
+ break;
+ }
+ continue;
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
tok->kind = TokenKind_whitespace;
} else {
tok->kind = TokenKind_slash;
}
} else if (c == '%') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_mod;
} else {
tok->kind = TokenKind_percent;
}
} else if (c == '.') {
- if (ppl->src[ppl->pos] == '.') {
- ++ppl->pos;
- if (ppl->src[ppl->pos] == '.') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '.')) {
+ if (infile_consume_if(ppl->src, '.')) {
tok->kind = TokenKind_ellipsis;
} else {
tok->kind = TokenKind_other;
- tok->value.string = strndup(ppl->src + ppl->pos - 2, 2);
+ tok->value.string = "..";
}
} else {
tok->kind = TokenKind_dot;
}
} else if (c == '!') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_ne;
} else {
tok->kind = TokenKind_not;
}
} else if (c == '=') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_eq;
} else {
tok->kind = TokenKind_assign;
}
} else if (c == '<') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_le;
- } else if (ppl->src[ppl->pos] == '<') {
- ++ppl->pos;
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '<')) {
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_lshift;
} else {
tok->kind = TokenKind_lshift;
@@ -810,13 +808,11 @@ void pplexer_tokenize_all(PpLexer* ppl) {
tok->kind = TokenKind_lt;
}
} else if (c == '>') {
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_ge;
- } else if (ppl->src[ppl->pos] == '>') {
- ++ppl->pos;
- if (ppl->src[ppl->pos] == '=') {
- ++ppl->pos;
+ } else if (infile_consume_if(ppl->src, '>')) {
+ if (infile_consume_if(ppl->src, '=')) {
tok->kind = TokenKind_assign_rshift;
} else {
tok->kind = TokenKind_rshift;
@@ -825,76 +821,89 @@ void pplexer_tokenize_all(PpLexer* ppl) {
tok->kind = TokenKind_gt;
}
} else if (c == '#') {
- if (ppl->src[ppl->pos] == '#') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ if (infile_consume_if(ppl->src, '#')) {
tok->kind = TokenKind_hashhash;
} else {
tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash;
}
} else if (c == '\'') {
- int start = ppl->pos - 1;
- if (ppl->src[ppl->pos] == '\\') {
- ++ppl->pos;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_char(&builder, '\'');
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ if (infile_peek_char(ppl->src) == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
}
- ppl->pos += 2;
+ strbuilder_append_char(&builder, '\'');
+ infile_next_char(ppl->src);
+ infile_next_char(ppl->src);
tok->kind = TokenKind_character_constant;
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
+ tok->value.string = builder.buf;
} else if (c == '"') {
- int start = ppl->pos - 1;
+ infile_next_char(ppl->src);
+ StrBuilder builder;
+ strbuilder_init(&builder);
while (1) {
- char ch = ppl->src[ppl->pos];
- if (ch == '\\') {
- ++ppl->pos;
- } else if (ch == '"') {
+ char ch = infile_peek_char(ppl->src);
+ if (ch == '"')
break;
+ strbuilder_append_char(&builder, ch);
+ if (ch == '\\') {
+ infile_next_char(ppl->src);
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- ++ppl->pos;
+ infile_next_char(ppl->src);
tok->kind = TokenKind_literal_str;
- tok->value.string = strndup(ppl->src + start + 1, ppl->pos - start - 2);
+ tok->value.string = builder.buf;
} else if (isdigit(c)) {
- --ppl->pos;
- int start = ppl->pos;
- while (isdigit(ppl->src[ppl->pos])) {
- ++ppl->pos;
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isdigit(infile_peek_char(ppl->src))) {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
}
tok->kind = TokenKind_literal_int;
- tok->value.integer = atoi(ppl->src + start);
+ tok->value.integer = atoi(builder.buf);
} else if (isalpha(c) || c == '_') {
- --ppl->pos;
- int start = ppl->pos;
- while (isalnum(ppl->src[ppl->pos]) || ppl->src[ppl->pos] == '_') {
- ++ppl->pos;
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') {
+ strbuilder_append_char(&builder, infile_peek_char(ppl->src));
+ infile_next_char(ppl->src);
}
- tok->value.string = strndup(ppl->src + start, ppl->pos - start);
tok->kind = TokenKind_ident;
- } else if (c == '\n' || c == '\r') {
- ++ppl->line;
+ tok->value.string = builder.buf;
+ } else if (c == '\n') {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_newline;
} else if (isspace(c)) {
- --ppl->pos;
- while (isspace((c = ppl->src[ppl->pos]))) {
- if (c == '\n' || c == '\r') {
+ while (isspace((c = infile_peek_char(ppl->src)))) {
+ if (c == '\n')
break;
- }
- ++ppl->pos;
+ infile_next_char(ppl->src);
}
- if (ppl->at_bol && ppl->src[ppl->pos] == '#') {
- ++ppl->pos;
+ if (ppl->at_bol && infile_peek_char(ppl->src) == '#') {
+ infile_next_char(ppl->src);
tok->kind = pplexer_tokenize_pp_directive(ppl);
} else {
tok->kind = TokenKind_whitespace;
}
} else {
+ infile_next_char(ppl->src);
tok->kind = TokenKind_other;
- tok->value.string = strndup(ppl->src + ppl->pos - 1, 1);
+ char* buf = calloc(2, sizeof(char));
+ buf[0] = c;
+ tok->value.string = buf;
}
ppl->at_bol = tok->kind == TokenKind_newline;
}
Token* eof_tok = tokens_push_new(ppl->pp_tokens);
- eof_tok->loc.filename = ppl->filename;
- eof_tok->loc.line = ppl->line;
+ eof_tok->loc = ppl->src->loc;
eof_tok->kind = TokenKind_eof;
}
@@ -1099,10 +1108,10 @@ int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) {
replace_pp_tokens(pp, dest, dest + 1, &tokens);
}
-void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name_buf) {
- InFile* include_source = read_all(include_name_buf);
+void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) {
+ InFile* include_source = infile_open(include_name);
if (!include_source) {
- fatal_error("cannot open include file: %s", include_name_buf);
+ fatal_error("cannot open include file: %s", include_name);
}
TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros);