diff options
| -rw-r--r-- | preprocess.c | 162 | ||||
| -rw-r--r-- | std.h | 1 | ||||
| -rw-r--r-- | tests/071.sh | 18 | ||||
| -rw-r--r-- | tokenize.c | 5 |
4 files changed, 152 insertions, 34 deletions
diff --git a/preprocess.c b/preprocess.c index 57c400d..560b6b7 100644 --- a/preprocess.c +++ b/preprocess.c @@ -12,29 +12,69 @@ enum PpTokenKind { }; typedef enum PpTokenKind PpTokenKind; +const char* pp_token_kind_stringify(PpTokenKind kind) { + if (kind == PpTokenKind_eof) + return "<eof>"; + else if (kind == PpTokenKind_header_name) + return "<header-name>"; + else if (kind == PpTokenKind_identifier) + return "<identifier>"; + else if (kind == PpTokenKind_pp_number) + return "<pp-number>"; + else if (kind == PpTokenKind_character_constant) + return "<character-constant>"; + else if (kind == PpTokenKind_string_literal) + return "<string-literal>"; + else if (kind == PpTokenKind_punctuator) + return "<punctuator>"; + else if (kind == PpTokenKind_other) + return "<other>"; + else if (kind == PpTokenKind_whitespace) + return "<whitespace>"; + else + unreachable(); +} + struct PpToken { PpTokenKind kind; String raw; }; typedef struct PpToken PpToken; +const char* pp_token_stringify(PpToken* tok) { + const char* kind_str = pp_token_kind_stringify(tok->kind); + char* buf = calloc(tok->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%.*s (%s)", tok->raw.len, tok->raw.data, kind_str); + return buf; +} + enum PpMacroKind { PpMacroKind_obj, PpMacroKind_func, }; typedef enum PpMacroKind PpMacroKind; +const char* pp_macro_kind_stringify(PpMacroKind kind) { + if (kind == PpMacroKind_obj) { + return "object-like"; + } else if (kind == PpMacroKind_func) { + return "function-like"; + } else { + unreachable(); + } +} + struct PpMacro { PpMacroKind kind; String name; - int n_replacements; + size_t n_replacements; PpToken* replacements; }; typedef struct PpMacro PpMacro; struct PpMacros { - PpMacro* data; size_t len; + PpMacro* data; }; typedef struct PpMacros PpMacros; @@ -59,6 +99,24 @@ PpMacros* pp_macros_new() { return pp_macros; } +void pp_macros_dump(PpMacros* pp_macros) { + int i; + fprintf(stderr, "PpMacros {\n"); + fprintf(stderr, " len = %zu\n", pp_macros->len); + fprintf(stderr, " data = [\n"); + for (i = 0; i < pp_macros->len; ++i) { + PpMacro* m = &pp_macros->data[i]; + fprintf(stderr, " PpMacro {\n"); + fprintf(stderr, " kind = %s\n", pp_macro_kind_stringify(m->kind)); + fprintf(stderr, " name = %.*s\n", m->name.len, m->name.data); + fprintf(stderr, " n_replacements = %zu\n", m->n_replacements); + fprintf(stderr, " replacements = TODO\n"); + fprintf(stderr, " }\n"); + } + fprintf(stderr, " ]\n"); + fprintf(stderr, "}\n"); +} + void add_predefined_macros(PpMacros* pp_macros) { PpMacro* pp_macro = pp_macros->data + pp_macros->len; pp_macro->kind = PpMacroKind_obj; @@ -357,13 +415,13 @@ void pp_tokenize_all(Preprocessor* pp) { tok->raw.len = pp->pos - start; tok->kind = PpTokenKind_identifier; } else if (isspace(c)) { - tok->raw.data = pp->src; - tok->raw.len = 1; tok->kind = PpTokenKind_whitespace; - } else { - tok->raw.data = pp->src; tok->raw.len = 1; + tok->raw.data = pp->src + pp->pos - tok->raw.len; + } else { tok->kind = PpTokenKind_other; + tok->raw.len = 1; + tok->raw.data = pp->src + pp->pos - tok->raw.len; } ++pp->n_pp_tokens; } @@ -376,6 +434,26 @@ PpToken* skip_whitespace(PpToken* tok) { return tok; } +int string_contains_newline(String* s) { + int i; + for (i = 0; i < s->len; ++i) { + if (s->data[i] == '\n') { + return 1; + } + } + return 0; +} + +PpToken* find_next_newline(PpToken* tok) { + while (tok->kind != PpTokenKind_eof) { + if (tok->kind == PpTokenKind_whitespace && string_contains_newline(&tok->raw)) { + return tok; + } + ++tok; + } + return NULL; +} + void make_token_whitespace(PpToken* tok) { tok->kind = PpTokenKind_whitespace; tok->raw.len = 0; @@ -478,18 +556,19 @@ PpToken* replace_pp_tokens(Preprocessor* pp, PpToken* dest_start, PpToken* dest_ PpToken* source_tokens) { int n_tokens_to_remove = dest_end - dest_start; int n_tokens_after_dest = (pp->pp_tokens + pp->n_pp_tokens) - dest_end; - int n_moved; + int shift_amount; if (n_tokens_to_remove < n_source_tokens) { // Move existing tokens backward to make room. - n_moved = n_source_tokens - n_tokens_to_remove; - memmove(dest_end + n_moved, dest_end, n_tokens_after_dest * sizeof(PpToken)); - pp->n_pp_tokens += n_moved; + shift_amount = n_source_tokens - n_tokens_to_remove; + memmove(dest_end + shift_amount, dest_end, n_tokens_after_dest * sizeof(PpToken)); + pp->n_pp_tokens += shift_amount; } else if (n_source_tokens < n_tokens_to_remove) { // Move existing tokens forward to reduce room. - n_moved = n_tokens_to_remove - n_source_tokens; + shift_amount = n_tokens_to_remove - n_source_tokens; memmove(dest_start + n_source_tokens, dest_end, n_tokens_after_dest * sizeof(PpToken)); - pp->n_pp_tokens -= n_moved; + pp->n_pp_tokens -= shift_amount; + memset(pp->pp_tokens + pp->n_pp_tokens, 0, shift_amount * sizeof(PpToken)); } memcpy(dest_start, source_tokens, n_source_tokens * sizeof(PpToken)); @@ -528,8 +607,9 @@ PpToken* process_include_directive(Preprocessor* pp, PpToken* tok) { PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { PpToken* tok2 = skip_whitespace(tok + 1); - PpToken* macro_replacements; + PpToken* tok3 = NULL; PpMacro* pp_macro; + int i; if (tok2->kind == PpTokenKind_identifier && string_equals_cstr(&tok2->raw, "define")) { ++tok2; tok2 = skip_whitespace(tok2); @@ -544,40 +624,44 @@ PpToken* process_define_directive(Preprocessor* pp, PpToken* tok) { fatal_error("#define: invalid function-like macro syntax"); } tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier || tok2->kind == PpTokenKind_pp_number) { - macro_replacements = tok2; - + tok3 = find_next_newline(tok2); + if (tok3) { pp_macro = pp->pp_macros->data + pp->pp_macros->len; pp_macro->kind = PpMacroKind_func; pp_macro->name.len = macro_name->raw.len; pp_macro->name.data = macro_name->raw.data; - pp_macro->n_replacements = 1; - pp_macro->replacements = calloc(1, sizeof(PpToken)); - pp_macro->replacements[0].kind = macro_replacements->kind; - pp_macro->replacements[0].raw.len = macro_replacements->raw.len; - pp_macro->replacements[0].raw.data = macro_replacements->raw.data; + pp_macro->n_replacements = tok3 - tok2; + pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken)); + for (i = 0; i < pp_macro->n_replacements; ++i) { + pp_macro->replacements[i].kind = tok2[i].kind; + pp_macro->replacements[i].raw.len = tok2[i].raw.len; + pp_macro->replacements[i].raw.data = tok2[i].raw.data; + } ++pp->pp_macros->len; } } else { tok2 = skip_whitespace(tok2); - if (tok2->kind == PpTokenKind_identifier || tok2->kind == PpTokenKind_pp_number) { - macro_replacements = tok2; - + tok3 = find_next_newline(tok2); + if (tok3) { pp_macro = pp->pp_macros->data + pp->pp_macros->len; pp_macro->kind = PpMacroKind_obj; pp_macro->name.len = macro_name->raw.len; pp_macro->name.data = macro_name->raw.data; - pp_macro->n_replacements = 1; - pp_macro->replacements = calloc(1, sizeof(PpToken)); - pp_macro->replacements[0].kind = macro_replacements->kind; - pp_macro->replacements[0].raw.len = macro_replacements->raw.len; - pp_macro->replacements[0].raw.data = macro_replacements->raw.data; + pp_macro->n_replacements = tok3 - tok2; + pp_macro->replacements = calloc(pp_macro->n_replacements, sizeof(PpToken)); + for (i = 0; i < pp_macro->n_replacements; ++i) { + pp_macro->replacements[i].kind = tok2[i].kind; + pp_macro->replacements[i].raw.len = tok2[i].raw.len; + pp_macro->replacements[i].raw.data = tok2[i].raw.data; + } ++pp->pp_macros->len; } } } - remove_directive_tokens(tok, tok2 + 1); - return tok2 + 1; + if (tok3) { + remove_directive_tokens(tok, tok3); + return tok3; + } } return NULL; } @@ -589,7 +673,12 @@ void expand_macro(Preprocessor* pp, PpToken* tok) { } PpMacro* pp_macro = pp->pp_macros->data + pp_macro_idx; - replace_pp_tokens(pp, tok, tok + 1, pp_macro->n_replacements, pp_macro->replacements); + if (pp_macro->kind == PpMacroKind_func) { + // also consume '(' and ')' + replace_pp_tokens(pp, tok, tok + 3, pp_macro->n_replacements, pp_macro->replacements); + } else { + replace_pp_tokens(pp, tok, tok + 1, pp_macro->n_replacements, pp_macro->replacements); + } } void process_pp_directives(Preprocessor* pp) { @@ -629,6 +718,15 @@ void process_pp_directives(Preprocessor* pp) { } } +void pp_dump(PpToken* t, int include_whitespace) { + for (; t->kind != PpTokenKind_eof; ++t) { + if (t->kind == PpTokenKind_whitespace && !include_whitespace) { + continue; + } + fprintf(stderr, "%s\n", pp_token_stringify(t)); + } +} + PpToken* do_preprocess(char* src, int depth, PpMacros* pp_macros) { Preprocessor* pp = preprocessor_new(src, depth, pp_macros); add_include_path(pp, "/home/ken/src/ducc/include"); @@ -21,6 +21,7 @@ int isdigit(int); int isspace(int); void* memcpy(void*, void*, size_t); void* memmove(void*, void*, size_t); +void* memset(void*, int, size_t); int printf(const char*, ...); int sprintf(char*, const char*, ...); int strcmp(const char*, const char*); diff --git a/tests/071.sh b/tests/071.sh new file mode 100644 index 0000000..203ac5e --- /dev/null +++ b/tests/071.sh @@ -0,0 +1,18 @@ +set -e + +cat <<'EOF' > expected +579 +975 +EOF + +bash ../../test_diff.sh <<'EOF' +#define foo 123 + 456 +#define bar() 321 + 654 + +int printf(); + +int main() { + printf("%d\n", foo); + printf("%d\n", bar()); +} +EOF @@ -184,8 +184,9 @@ const char* token_kind_stringify(TokenKind k) { const char* token_stringify(Token* t) { TokenKind k = t->kind; if (k == TokenKind_ident || k == TokenKind_literal_int || k == TokenKind_literal_str) { - char* buf = calloc(t->raw.len + 1, sizeof(char)); - sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, token_kind_stringify(k)); + const char* kind_str = token_kind_stringify(k); + char* buf = calloc(t->raw.len + strlen(kind_str) + 3 + 1, sizeof(char)); + sprintf(buf, "%.*s (%s)", t->raw.len, t->raw.data, kind_str); return buf; } else { return token_kind_stringify(k); |
