diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-08-17 13:18:15 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-08-18 03:52:44 +0900 |
| commit | 2a7e1667f70c5381d3b939324cc647e51134b15c (patch) | |
| tree | e6d6d5bc285aadbae20c99dbcf0a653f78dda4c3 | |
| parent | d90a9c83a253b71e7731a44657f998a361a41b97 (diff) | |
| download | ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.tar.gz ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.tar.zst ducc-2a7e1667f70c5381d3b939324cc647e51134b15c.zip | |
refactor: direct array access with infile_*() helper functions
| -rw-r--r-- | common.c | 27 | ||||
| -rw-r--r-- | io.c | 46 | ||||
| -rw-r--r-- | main.c | 2 | ||||
| -rw-r--r-- | preprocess.c | 303 |
4 files changed, 226 insertions, 152 deletions
@@ -10,3 +10,30 @@ void fatal_error(const char* msg, ...) { #define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__) #define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__) + +struct StrBuilder { + size_t len; + size_t capacity; + char* buf; +}; +typedef struct StrBuilder StrBuilder; + +void strbuilder_init(StrBuilder* b) { + b->len = 0; + b->capacity = 16; + b->buf = calloc(b->capacity, sizeof(char)); +} + +// `size` must include a trailing null byte. +void strbuilder_reserve(StrBuilder* b, size_t size) { + if (size <= b->capacity) + return; + b->capacity *= 2; + b->buf = realloc(b->buf, b->capacity * sizeof(char)); + memset(b->buf + b->len, 0, (b->capacity - b->len) * sizeof(char)); +} + +void strbuilder_append_char(StrBuilder* b, int c) { + strbuilder_reserve(b, b->len + 1 + 1); + b->buf[b->len++] = c; +} @@ -1,10 +1,17 @@ -struct InFile { +struct SourceLocation { const char* filename; - char* buf; + int line; +}; +typedef struct SourceLocation SourceLocation; + +struct InFile { + const char* buf; + int pos; + SourceLocation loc; }; typedef struct InFile InFile; -InFile* read_all(const char* filename) { +InFile* infile_open(const char* filename) { FILE* in; if (strcmp(filename, "-") == 0) { in = stdin; @@ -38,7 +45,38 @@ InFile* read_all(const char* filename) { fclose(in); InFile* in_file = calloc(1, sizeof(InFile)); - in_file->filename = filename; in_file->buf = buf; + in_file->loc.filename = filename; + in_file->loc.line = 1; return in_file; } + +BOOL infile_eof(InFile* f) { + return f->buf[f->pos] == '\0'; +} + +char infile_peek_char(InFile* f) { + char c = f->buf[f->pos]; + // Normalize new-line. + // TODO: crlf + if (c == '\r') + c = '\n'; + return c; +} + +char infile_next_char(InFile* f) { + char c = infile_peek_char(f); + ++f->pos; + if (c == '\n') + ++f->loc.line; + return c; +} + +BOOL infile_consume_if(InFile* f, char expected) { + if (infile_peek_char(f) == expected) { + infile_next_char(f); + return TRUE; + } else { + return FALSE; + } +} @@ -15,7 +15,7 @@ int main(int argc, char** argv) { if (argc == 1) { fatal_error("usage: ducc <FILE>"); } - InFile* source = read_all(argv[1]); + InFile* source = infile_open(argv[1]); TokenArray* pp_tokens = preprocess(source); TokenArray* tokens = tokenize(pp_tokens); Program* prog = parse(tokens); diff --git a/preprocess.c b/preprocess.c index 0b55131..056422f 100644 --- a/preprocess.c +++ b/preprocess.c @@ -333,12 +333,6 @@ const char* token_kind_stringify(TokenKind k) { unreachable(); } -struct SourceLocation { - const char* filename; - int line; -}; -typedef struct SourceLocation SourceLocation; - // TokenValue is externally tagged by Token's kind. union TokenValue { const char* string; @@ -544,10 +538,7 @@ MacroArg* macroargs_push_new(MacroArgArray* macroargs) { } struct PpLexer { - const char* filename; - int line; - char* src; - int pos; + InFile* src; BOOL at_bol; BOOL expect_header_name; TokenArray* pp_tokens; @@ -557,9 +548,7 @@ typedef struct PpLexer PpLexer; PpLexer* pplexer_new(InFile* src) { PpLexer* ppl = calloc(1, sizeof(PpLexer)); - ppl->filename = src->filename; - ppl->line = 1; - ppl->src = src->buf; + ppl->src = src; ppl->at_bol = TRUE; ppl->expect_header_name = FALSE; ppl->pp_tokens = calloc(1, sizeof(TokenArray)); @@ -571,21 +560,23 @@ PpLexer* pplexer_new(InFile* src) { TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) { // Skip whitespaces after '#'. char c; - while (isspace((c = ppl->src[ppl->pos]))) { - if (c == '\n' || c == '\r') { + while (isspace((c = infile_peek_char(ppl->src)))) { + if (c == '\n') break; - } - ++ppl->pos; + infile_next_char(ppl->src); } - int pp_directive_name_start = ppl->pos; - while (isalnum(ppl->src[ppl->pos])) { - ++ppl->pos; + SourceLocation pp_directive_name_start_loc = ppl->src->loc; + + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(ppl->src))) { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); } - int pp_directive_name_len = ppl->pos - pp_directive_name_start; - const char* pp_directive_name = strndup(&ppl->src[pp_directive_name_start], pp_directive_name_len); + const char* pp_directive_name = builder.buf; - if (pp_directive_name_len == 0) { + if (builder.len == 0) { return TokenKind_hash; } else if (strcmp(pp_directive_name, "define") == 0) { return TokenKind_pp_directive_define; @@ -613,195 +604,202 @@ TokenKind pplexer_tokenize_pp_directive(PpLexer* ppl) { } else if (strcmp(pp_directive_name, "undef") == 0) { return TokenKind_pp_directive_undef; } else { - fatal_error("%s:%d: unknown preprocessor directive (%s)", ppl->filename, ppl->line, pp_directive_name); + fatal_error("%s:%d: unknown preprocessor directive (%s)", pp_directive_name_start_loc.filename, + pp_directive_name_start_loc.line, pp_directive_name); } } void pplexer_tokenize_all(PpLexer* ppl) { - while (ppl->src[ppl->pos]) { + while (!infile_eof(ppl->src)) { Token* tok = tokens_push_new(ppl->pp_tokens); - tok->loc.filename = ppl->filename; - tok->loc.line = ppl->line; - char c = ppl->src[ppl->pos]; - ++ppl->pos; + tok->loc = ppl->src->loc; + char c = infile_peek_char(ppl->src); if (ppl->expect_header_name && c == '"') { - int start = ppl->pos - 1; + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '"'); while (1) { - char ch = ppl->src[ppl->pos]; - if (ch == '\\') { - ++ppl->pos; - } else if (ch == '"') { + char ch = infile_peek_char(ppl->src); + if (ch == '"') break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); } - ++ppl->pos; + infile_next_char(ppl->src); } - ++ppl->pos; + strbuilder_append_char(&builder, '"'); + infile_next_char(ppl->src); tok->kind = TokenKind_header_name; - tok->value.string = strndup(ppl->src + start, ppl->pos - start); + tok->value.string = builder.buf; ppl->expect_header_name = FALSE; } else if (ppl->expect_header_name && c == '<') { - int start = ppl->pos - 1; + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '<'); while (1) { - char ch = ppl->src[ppl->pos]; - if (ch == '>') { + char ch = infile_peek_char(ppl->src); + if (ch == '>') break; - } - ++ppl->pos; + strbuilder_append_char(&builder, ch); + infile_next_char(ppl->src); } - ++ppl->pos; + strbuilder_append_char(&builder, '>'); + infile_next_char(ppl->src); tok->kind = TokenKind_header_name; - tok->value.string = strndup(ppl->src + start, ppl->pos - start); + tok->value.string = builder.buf; ppl->expect_header_name = FALSE; } else if (c == '(') { + infile_next_char(ppl->src); tok->kind = TokenKind_paren_l; } else if (c == ')') { + infile_next_char(ppl->src); tok->kind = TokenKind_paren_r; } else if (c == '{') { + infile_next_char(ppl->src); tok->kind = TokenKind_brace_l; } else if (c == '}') { + infile_next_char(ppl->src); tok->kind = TokenKind_brace_r; } else if (c == '[') { + infile_next_char(ppl->src); tok->kind = TokenKind_bracket_l; } else if (c == ']') { + infile_next_char(ppl->src); tok->kind = TokenKind_bracket_r; } else if (c == ',') { + infile_next_char(ppl->src); tok->kind = TokenKind_comma; } else if (c == ':') { + infile_next_char(ppl->src); tok->kind = TokenKind_colon; } else if (c == ';') { + infile_next_char(ppl->src); tok->kind = TokenKind_semicolon; } else if (c == '^') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_xor; } else { tok->kind = TokenKind_xor; } } else if (c == '?') { + infile_next_char(ppl->src); tok->kind = TokenKind_question; } else if (c == '~') { + infile_next_char(ppl->src); tok->kind = TokenKind_tilde; } else if (c == '+') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_add; - } else if (ppl->src[ppl->pos] == '+') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '+')) { tok->kind = TokenKind_plusplus; } else { tok->kind = TokenKind_plus; } } else if (c == '|') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_or; - } else if (ppl->src[ppl->pos] == '|') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '|')) { tok->kind = TokenKind_oror; } else { tok->kind = TokenKind_or; } } else if (c == '&') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_and; - } else if (ppl->src[ppl->pos] == '&') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '&')) { tok->kind = TokenKind_andand; } else { tok->kind = TokenKind_and; } } else if (c == '-') { - if (ppl->src[ppl->pos] == '>') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '>')) { tok->kind = TokenKind_arrow; - } else if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_sub; - } else if (ppl->src[ppl->pos] == '-') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '-')) { tok->kind = TokenKind_minusminus; } else { tok->kind = TokenKind_minus; } } else if (c == '*') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_mul; } else { tok->kind = TokenKind_star; } } else if (c == '/') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_div; - } else if (ppl->src[ppl->pos] == '/') { - int start = ppl->pos - 1; - ++ppl->pos; - while (ppl->src[ppl->pos] && ppl->src[ppl->pos] != '\n' && ppl->src[ppl->pos] != '\r') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '/')) { + while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') { + infile_next_char(ppl->src); } tok->kind = TokenKind_whitespace; - } else if (ppl->src[ppl->pos] == '*') { - int start = ppl->pos - 1; - ++ppl->pos; - while (ppl->src[ppl->pos]) { - if (ppl->src[ppl->pos] == '*' && ppl->src[ppl->pos + 1] == '/') { - ppl->pos += 2; - break; - } - if (ppl->src[ppl->pos] == '\n') { - ++ppl->line; + } else if (infile_consume_if(ppl->src, '*')) { + while (infile_peek_char(ppl->src)) { + if (infile_consume_if(ppl->src, '*')) { + if (infile_consume_if(ppl->src, '/')) { + break; + } + continue; } - ++ppl->pos; + infile_next_char(ppl->src); } tok->kind = TokenKind_whitespace; } else { tok->kind = TokenKind_slash; } } else if (c == '%') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_mod; } else { tok->kind = TokenKind_percent; } } else if (c == '.') { - if (ppl->src[ppl->pos] == '.') { - ++ppl->pos; - if (ppl->src[ppl->pos] == '.') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '.')) { + if (infile_consume_if(ppl->src, '.')) { tok->kind = TokenKind_ellipsis; } else { tok->kind = TokenKind_other; - tok->value.string = strndup(ppl->src + ppl->pos - 2, 2); + tok->value.string = ".."; } } else { tok->kind = TokenKind_dot; } } else if (c == '!') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_ne; } else { tok->kind = TokenKind_not; } } else if (c == '=') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_eq; } else { tok->kind = TokenKind_assign; } } else if (c == '<') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_le; - } else if (ppl->src[ppl->pos] == '<') { - ++ppl->pos; - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '<')) { + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_lshift; } else { tok->kind = TokenKind_lshift; @@ -810,13 +808,11 @@ void pplexer_tokenize_all(PpLexer* ppl) { tok->kind = TokenKind_lt; } } else if (c == '>') { - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_ge; - } else if (ppl->src[ppl->pos] == '>') { - ++ppl->pos; - if (ppl->src[ppl->pos] == '=') { - ++ppl->pos; + } else if (infile_consume_if(ppl->src, '>')) { + if (infile_consume_if(ppl->src, '=')) { tok->kind = TokenKind_assign_rshift; } else { tok->kind = TokenKind_rshift; @@ -825,76 +821,89 @@ void pplexer_tokenize_all(PpLexer* ppl) { tok->kind = TokenKind_gt; } } else if (c == '#') { - if (ppl->src[ppl->pos] == '#') { - ++ppl->pos; + infile_next_char(ppl->src); + if (infile_consume_if(ppl->src, '#')) { tok->kind = TokenKind_hashhash; } else { tok->kind = ppl->at_bol ? pplexer_tokenize_pp_directive(ppl) : TokenKind_hash; } } else if (c == '\'') { - int start = ppl->pos - 1; - if (ppl->src[ppl->pos] == '\\') { - ++ppl->pos; + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); + strbuilder_append_char(&builder, '\''); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + if (infile_peek_char(ppl->src) == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); } - ppl->pos += 2; + strbuilder_append_char(&builder, '\''); + infile_next_char(ppl->src); + infile_next_char(ppl->src); tok->kind = TokenKind_character_constant; - tok->value.string = strndup(ppl->src + start, ppl->pos - start); + tok->value.string = builder.buf; } else if (c == '"') { - int start = ppl->pos - 1; + infile_next_char(ppl->src); + StrBuilder builder; + strbuilder_init(&builder); while (1) { - char ch = ppl->src[ppl->pos]; - if (ch == '\\') { - ++ppl->pos; - } else if (ch == '"') { + char ch = infile_peek_char(ppl->src); + if (ch == '"') break; + strbuilder_append_char(&builder, ch); + if (ch == '\\') { + infile_next_char(ppl->src); + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); } - ++ppl->pos; + infile_next_char(ppl->src); } - ++ppl->pos; + infile_next_char(ppl->src); tok->kind = TokenKind_literal_str; - tok->value.string = strndup(ppl->src + start + 1, ppl->pos - start - 2); + tok->value.string = builder.buf; } else if (isdigit(c)) { - --ppl->pos; - int start = ppl->pos; - while (isdigit(ppl->src[ppl->pos])) { - ++ppl->pos; + StrBuilder builder; + strbuilder_init(&builder); + while (isdigit(infile_peek_char(ppl->src))) { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); } tok->kind = TokenKind_literal_int; - tok->value.integer = atoi(ppl->src + start); + tok->value.integer = atoi(builder.buf); } else if (isalpha(c) || c == '_') { - --ppl->pos; - int start = ppl->pos; - while (isalnum(ppl->src[ppl->pos]) || ppl->src[ppl->pos] == '_') { - ++ppl->pos; + StrBuilder builder; + strbuilder_init(&builder); + while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') { + strbuilder_append_char(&builder, infile_peek_char(ppl->src)); + infile_next_char(ppl->src); } - tok->value.string = strndup(ppl->src + start, ppl->pos - start); tok->kind = TokenKind_ident; - } else if (c == '\n' || c == '\r') { - ++ppl->line; + tok->value.string = builder.buf; + } else if (c == '\n') { + infile_next_char(ppl->src); tok->kind = TokenKind_newline; } else if (isspace(c)) { - --ppl->pos; - while (isspace((c = ppl->src[ppl->pos]))) { - if (c == '\n' || c == '\r') { + while (isspace((c = infile_peek_char(ppl->src)))) { + if (c == '\n') break; - } - ++ppl->pos; + infile_next_char(ppl->src); } - if (ppl->at_bol && ppl->src[ppl->pos] == '#') { - ++ppl->pos; + if (ppl->at_bol && infile_peek_char(ppl->src) == '#') { + infile_next_char(ppl->src); tok->kind = pplexer_tokenize_pp_directive(ppl); } else { tok->kind = TokenKind_whitespace; } } else { + infile_next_char(ppl->src); tok->kind = TokenKind_other; - tok->value.string = strndup(ppl->src + ppl->pos - 1, 1); + char* buf = calloc(2, sizeof(char)); + buf[0] = c; + tok->value.string = buf; } ppl->at_bol = tok->kind == TokenKind_newline; } Token* eof_tok = tokens_push_new(ppl->pp_tokens); - eof_tok->loc.filename = ppl->filename; - eof_tok->loc.line = ppl->line; + eof_tok->loc = ppl->src->loc; eof_tok->kind = TokenKind_eof; } @@ -1099,10 +1108,10 @@ int replace_single_pp_token(Preprocessor* pp, int dest, Token* source_tok) { replace_pp_tokens(pp, dest, dest + 1, &tokens); } -void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name_buf) { - InFile* include_source = read_all(include_name_buf); +void expand_include_directive(Preprocessor* pp, int directive_token_pos, const char* include_name) { + InFile* include_source = infile_open(include_name); if (!include_source) { - fatal_error("cannot open include file: %s", include_name_buf); + fatal_error("cannot open include file: %s", include_name); } TokenArray* include_pp_tokens = do_preprocess(include_source, pp->include_depth + 1, pp->macros); |
