From 0fe7524bd07365ceafa906a2ec3e9fb2d6650feb Mon Sep 17 00:00:00 2001 From: nsfisis Date: Mon, 5 Jan 2026 02:19:29 +0900 Subject: feat: concatenate adjacent string literals --- src/main.c | 1 + src/preprocess.c | 28 ++++++++++++++++++++++++++++ src/preprocess.h | 1 + tests/string_operations.sh | 16 +++++++++++++++- 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/main.c b/src/main.c index e8e1030..44a317c 100644 --- a/src/main.c +++ b/src/main.c @@ -32,6 +32,7 @@ int main(int argc, char** argv) { return 0; } + concat_adjacent_string_literals(pp_tokens); TokenArray* tokens = convert_pp_tokens_to_tokens(pp_tokens); Program* prog = parse(tokens); diff --git a/src/preprocess.c b/src/preprocess.c index 9605f69..b06de6d 100644 --- a/src/preprocess.c +++ b/src/preprocess.c @@ -1177,6 +1177,34 @@ TokenArray* preprocess(InFile* src, StrArray* included_files, StrArray* user_inc return do_preprocess(src, 0, macros, included_files, user_include_dirs); } +void concat_adjacent_string_literals(TokenArray* pp_tokens) { + size_t last_nonempty_token_pos = 0; + TokenKind last_nonempty_token_kind = TokenKind_eof; + for (size_t pos = 0; pos < pp_tokens->len; ++pos) { + Token* pp_tok = &pp_tokens->data[pos]; + TokenKind k = pp_tok->kind; + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { + continue; + } + if (k == TokenKind_literal_str && last_nonempty_token_kind == TokenKind_literal_str) { + // Concatenate adjacent string literals. + Token* last_pp_tok = &pp_tokens->data[last_nonempty_token_pos]; + const char* s1 = last_pp_tok->value.string; + size_t l1 = strlen(s1); + const char* s2 = pp_tok->value.string; + size_t l2 = strlen(s2); + char* buf = calloc(l1 + l2 + 1, sizeof(char)); + memcpy(buf, s1, l1); + memcpy(buf + l1, s2, l2); + last_pp_tok->value.string = buf; + pp_tok->kind = TokenKind_removed; + } else { + last_nonempty_token_pos = pos; + last_nonempty_token_kind = k; + } + } +} + void print_token_to_file(FILE* out, TokenArray* pp_tokens) { for (size_t i = 0; i < pp_tokens->len; ++i) { Token* tok = &pp_tokens->data[i]; diff --git a/src/preprocess.h b/src/preprocess.h index a39f01c..4bf9834 100644 --- a/src/preprocess.h +++ b/src/preprocess.h @@ -6,6 +6,7 @@ #include "token.h" TokenArray* preprocess(InFile* src, StrArray* included_files, StrArray* user_include_dirs); +void concat_adjacent_string_literals(TokenArray* pp_tokens); void print_token_to_file(FILE* output_file, TokenArray* pp_tokens); #endif diff --git a/tests/string_operations.sh b/tests/string_operations.sh index 0c9a07e..2466145 100644 --- a/tests/string_operations.sh +++ b/tests/string_operations.sh @@ -22,6 +22,21 @@ int main() { } EOF +cat <<'EOF' > expected +abc +defghijkl +EOF + +test_diff <<'EOF' +int printf(); + +int main() { + printf("abc\n"); + printf("def" "ghi" + "jkl\n"); +} +EOF + cat <<'EOF' > expected h l @@ -54,4 +69,3 @@ int main() { } } EOF - -- cgit v1.2.3-70-g09d2