diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-09-13 00:05:12 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-09-13 01:38:56 +0900 |
| commit | 8de7fa9da5fd8015f4fcc826b9270061b7b89478 (patch) | |
| tree | ec9c1e56f179be207c31a113a0a96210f7509431 | |
| parent | d41a97e957ef616d194f60b9b79820cd0162d920 (diff) | |
| download | ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.gz ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.zst ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.zip | |
feat: implement -E flag (preprocess only)
| -rw-r--r-- | src/cli.c | 16 | ||||
| -rw-r--r-- | src/cli.h | 1 | ||||
| -rw-r--r-- | src/main.c | 10 | ||||
| -rw-r--r-- | src/preprocess.c | 39 | ||||
| -rw-r--r-- | src/preprocess.h | 2 | ||||
| -rw-r--r-- | src/token.c | 2 | ||||
| -rw-r--r-- | src/token.h | 1 | ||||
| -rw-r--r-- | src/tokenize.c | 2 | ||||
| -rw-r--r-- | tests/helpers.sh | 7 | ||||
| -rw-r--r-- | tests/test_function_macros.sh | 16 | ||||
| -rw-r--r-- | tests/test_if_elif_else.sh | 56 | ||||
| -rw-r--r-- | tests/test_ifdef_ifndef.sh | 39 | ||||
| -rw-r--r-- | tests/test_keywords.sh | 11 | ||||
| -rw-r--r-- | tests/test_macro_operators.sh | 45 | ||||
| -rw-r--r-- | tests/test_preprocess_flag.sh | 122 | ||||
| -rw-r--r-- | tests/test_undef.sh | 14 |
16 files changed, 323 insertions, 60 deletions
@@ -10,8 +10,9 @@ static void print_version() { CliArgs* parse_cli_args(int argc, char** argv) { const char* output_filename = NULL; int positional_arguments_start = -1; - bool only_compile = false; - bool generate_deps = false; + bool opt_c = false; + bool opt_E = false; + bool opt_MMD = false; for (int i = 1; i < argc; ++i) { if (argv[i][0] != '-') { @@ -34,9 +35,11 @@ CliArgs* parse_cli_args(int argc, char** argv) { output_filename = argv[i + 1]; ++i; } else if (c == 'c') { - only_compile = true; + opt_c = true; + } else if (c == 'E') { + opt_E = true; } else if (strcmp(argv[i], "-MMD") == 0) { - generate_deps = true; + opt_MMD = true; } else if (strcmp(argv[i], "--version") == 0) { print_version(); exit(0); @@ -54,10 +57,11 @@ CliArgs* parse_cli_args(int argc, char** argv) { a->input_filename = argv[positional_arguments_start]; a->output_filename = output_filename; a->output_assembly = !output_filename || str_ends_with(output_filename, ".s"); - a->only_compile = only_compile; + a->only_compile = opt_c; + a->preprocess_only = opt_E; a->totally_deligate_to_gcc = false; a->gcc_command = NULL; - a->generate_deps = generate_deps; + a->generate_deps = opt_MMD; if (!a->only_compile && str_ends_with(a->input_filename, ".o")) { a->totally_deligate_to_gcc = true; @@ -8,6 +8,7 @@ typedef struct { const char* output_filename; bool output_assembly; bool only_compile; + bool preprocess_only; bool generate_deps; bool totally_deligate_to_gcc; const char* gcc_command; @@ -22,6 +22,16 @@ int main(int argc, char** argv) { strings_init(&included_files); TokenArray* pp_tokens = preprocess(source, &included_files); + + if (cli_args->preprocess_only) { + FILE* output_file = cli_args->output_filename ? fopen(cli_args->output_filename, "w") : stdout; + if (!output_file) { + fatal_error("Cannot open output file: %s", cli_args->output_filename); + } + print_token_to_file(output_file, pp_tokens); + return 0; + } + TokenArray* tokens = tokenize(pp_tokens); Program* prog = parse(tokens); diff --git a/src/preprocess.c b/src/preprocess.c index 9ec582a..3965949 100644 --- a/src/preprocess.c +++ b/src/preprocess.c @@ -661,10 +661,10 @@ static void seek_to_next_newline(Preprocessor* pp) { ; } -static void make_tokens_whitespaces(Preprocessor* pp, int start, int end) { +static void make_tokens_removed(Preprocessor* pp, int start, int end) { for (int i = start; i < end; ++i) { Token* tok = pp_token_at(pp, i); - tok->kind = TokenKind_whitespace; + tok->kind = TokenKind_removed; tok->value.string = NULL; } } @@ -1468,7 +1468,7 @@ static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind) int first_pos = pp->pos; seek_to_next_newline(pp); expect_pp_token(pp, TokenKind_newline); - make_tokens_whitespaces(pp, first_pos, pp->pos); + make_tokens_removed(pp, first_pos, pp->pos); } expect_pp_token(pp, TokenKind_pp_directive_endif); @@ -1491,7 +1491,7 @@ static void preprocess_preprocessing_file(Preprocessor* pp) { static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) { seek_to_next_newline(pp); skip_pp_token(pp, TokenKind_newline); - make_tokens_whitespaces(pp, directive_token_pos, pp->pos); + make_tokens_removed(pp, directive_token_pos, pp->pos); } static void remove_pp_directives(Preprocessor* pp) { @@ -1530,3 +1530,34 @@ TokenArray* preprocess(InFile* src, StrArray* included_files) { strings_push(included_files, src->loc.filename); return do_preprocess(src, 0, macros, included_files); } + +void print_token_to_file(FILE* out, TokenArray* pp_tokens) { + for (size_t i = 0; i < pp_tokens->len; ++i) { + Token* tok = &pp_tokens->data[i]; + + if (tok->kind == TokenKind_whitespace) { + // TODO: preserve indent? + fprintf(out, " "); + } else if (tok->kind == TokenKind_removed) { + // Output nothing for removed tokens + } else if (tok->kind == TokenKind_newline) { + // TODO: remove adjacent newlines? + fprintf(out, "\n"); + } else if (tok->kind != TokenKind_eof) { + // TODO: string literal + fprintf(out, "%s", token_stringify(tok)); + // Add space after token if next token is not punctuation + // TODO: apply stricter approach + if (i + 1 < pp_tokens->len) { + Token* next = &pp_tokens->data[i + 1]; + if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace && + next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma && + next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r && + next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r && + next->kind != TokenKind_dot) { + fprintf(out, " "); + } + } + } + } +} diff --git a/src/preprocess.h b/src/preprocess.h index 81007c3..5449857 100644 --- a/src/preprocess.h +++ b/src/preprocess.h @@ -1,10 +1,12 @@ #ifndef DUCC_PREPROCESS_H #define DUCC_PREPROCESS_H +#include <stdio.h> #include "common.h" #include "io.h" #include "token.h" TokenArray* preprocess(InFile* src, StrArray* included_files); +void print_token_to_file(FILE* output_file, TokenArray* pp_tokens); #endif diff --git a/src/token.c b/src/token.c index 45ef2cd..e825de1 100644 --- a/src/token.c +++ b/src/token.c @@ -12,6 +12,8 @@ const char* token_kind_stringify(TokenKind k) { return "##"; else if (k == TokenKind_whitespace) return "<whitespace>"; + else if (k == TokenKind_removed) + return "<removed>"; else if (k == TokenKind_newline) return "<new-line>"; else if (k == TokenKind_other) diff --git a/src/token.h b/src/token.h index 0bf23f6..bbd8066 100644 --- a/src/token.h +++ b/src/token.h @@ -11,6 +11,7 @@ typedef enum { TokenKind_hash, TokenKind_hashhash, TokenKind_whitespace, + TokenKind_removed, TokenKind_newline, TokenKind_other, TokenKind_character_constant, diff --git a/src/tokenize.c b/src/tokenize.c index 7ba00cd..c897b02 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -19,7 +19,7 @@ static void tokenize_all(Lexer* l) { for (size_t pos = 0; pos < l->src->len; ++pos) { Token* pp_tok = &l->src->data[pos]; TokenKind k = pp_tok->kind; - if (k == TokenKind_whitespace || k == TokenKind_newline) { + if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) { continue; } Token* tok = tokens_push_new(l->tokens); diff --git a/tests/helpers.sh b/tests/helpers.sh index eaccd5c..a69e213 100644 --- a/tests/helpers.sh +++ b/tests/helpers.sh @@ -49,6 +49,13 @@ function test_compile_error() { diff -u expected output } +function test_cpp() { + cat > main.c + + "$ducc" -E main.c > output + diff -u -Z expected output +} + function test_example() { filename="../../../examples/$1.c" diff --git a/tests/test_function_macros.sh b/tests/test_function_macros.sh index 4bc0cc2..1108650 100644 --- a/tests/test_function_macros.sh +++ b/tests/test_function_macros.sh @@ -83,19 +83,24 @@ int main() { EOF cat <<'EOF' > expected -42 -123 +int printf ( const char *, ...); + +int main () { + int foo = 42; + printf ( %d\n, foo); + + int a = 123; + printf ( %d\n, a); +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(const char*, ...); int main() { - // TODO: check if "foo" is expanded to "foo" or not once '#' operator is implemented. #define foo foo int foo = 42; printf("%d\n", foo); - // TODO: check if "a" is expanded to "a" or not once '#' operator is implemented. #define a b #define b c #define c a @@ -103,4 +108,3 @@ int main() { printf("%d\n", a); } EOF - diff --git a/tests/test_if_elif_else.sh b/tests/test_if_elif_else.sh index a07dd5c..e0adee0 100644 --- a/tests/test_if_elif_else.sh +++ b/tests/test_if_elif_else.sh @@ -1,8 +1,13 @@ cat <<'EOF' > expected -1 +int printf (); + + +int main () { + printf ( 1\n); +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); #define A @@ -18,13 +23,20 @@ int main() { EOF cat <<'EOF' > expected -1 -1 -2 -3 +int printf (); + +int main () { + printf ( 1\n); + + printf ( 1\n); + + printf ( 2\n); + + printf ( 3\n); +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); int main() { @@ -63,17 +75,28 @@ int main() { EOF cat <<'EOF' > expected -1 -1 -2 -2 -3 -4 -3 -4 +int printf (); + +int main () { + printf ( 1\n); + + printf ( 1\n); + + printf ( 2\n); + + printf ( 2\n); + + printf ( 3\n); + + printf ( 4\n); + + printf ( 3\n); + + printf ( 4\n); +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); int main() { @@ -190,4 +213,3 @@ int main() { #endif } EOF - diff --git a/tests/test_ifdef_ifndef.sh b/tests/test_ifdef_ifndef.sh index f59b87b..683a1dc 100644 --- a/tests/test_ifdef_ifndef.sh +++ b/tests/test_ifdef_ifndef.sh @@ -1,10 +1,21 @@ cat <<'EOF' > expected -__ducc__ is defined. -A is defined. -B is undefined. +int printf (); + + +int main () { + + printf ( __ducc__ is defined.\n); + + printf ( A is defined.\n); + + printf ( B is undefined.\n); + + + return 0; +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); #define A 123 @@ -36,12 +47,23 @@ int main() { EOF cat <<'EOF' > expected -__ducc__ is defined. -A is defined. -B is undefined. +int printf (); + + +int main () { + + printf ( __ducc__ is defined.\n); + + printf ( A is defined.\n); + + printf ( B is undefined.\n); + + + return 0; +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); #define A 123 @@ -71,4 +93,3 @@ int main() { return 0; } EOF - diff --git a/tests/test_keywords.sh b/tests/test_keywords.sh index 1b99b67..0bdb6f7 100644 --- a/tests/test_keywords.sh +++ b/tests/test_keywords.sh @@ -1,9 +1,14 @@ -touch expected -test_diff <<'EOF' +cat <<'EOF' > expected + + +int printf (); +int main () {} +EOF + +test_cpp <<'EOF' // A keyword is treated as a normal identifier in preprocessing phase. #define auto int auto printf(); auto main() {} EOF - diff --git a/tests/test_macro_operators.sh b/tests/test_macro_operators.sh index a77d3ca..f9cc385 100644 --- a/tests/test_macro_operators.sh +++ b/tests/test_macro_operators.sh @@ -1,14 +1,31 @@ cat <<'EOF' > expected -foobar=100 -prefix_test=200 -test_suffix=300 -var_1=10 -var_2=20 -var_A=30 -number_12=12 +int printf (); + + +int foobar = 100; +int prefix_test = 200; +int test_suffix = 300; + +int var_1 = 10; +int var_2 = 20; + +int var_A = 30; + +int number_12 = 12; + +int main () { + printf ( foobar=%d\n, foobar); + printf ( prefix_test=%d\n, prefix_test); + printf ( test_suffix=%d\n, test_suffix); + printf ( var_1=%d\n, var_1); + printf ( var_2=%d\n, var_2); + printf ( var_A=%d\n, var_A); + printf ( number_12=%d\n, number_12); + return 0; +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); #define CONCAT(a, b) a##b @@ -42,9 +59,17 @@ int main() { EOF cat <<'EOF' > expected -123 +int printf ( const char *, ...); + + +int H ( int n) { return n; } + +int main () { + printf ( %d\n, H ( 123)); +} EOF -test_diff <<'EOF' + +test_cpp <<'EOF' int printf(const char*, ...); #define F(x) CHECK(G(x)) diff --git a/tests/test_preprocess_flag.sh b/tests/test_preprocess_flag.sh new file mode 100644 index 0000000..3b52bc0 --- /dev/null +++ b/tests/test_preprocess_flag.sh @@ -0,0 +1,122 @@ +cat <<'EOF' > expected + +int main () { + int x = 100; + int y = 0; + return 0; +} +EOF + +test_cpp <<'EOF' +#define MAX 100 +#define MIN 0 + +int main() { + int x = MAX; + int y = MIN; + return 0; +} +EOF + +cat <<'EOF' > expected + +int main () { + int sum = ( ( 10) + ( 20)); + int product = ( ( 3) * ( 4)); + return 0; +} +EOF + +test_cpp <<'EOF' +#define ADD(a, b) ((a) + (b)) +#define MUL(x, y) ((x) * (y)) + +int main() { + int sum = ADD(10, 20); + int product = MUL(3, 4); + return 0; +} +EOF + +cat <<'EOF' > expected +int main () { + return 42; +} +EOF + +test_cpp <<'EOF' +#define FOO 42 +int main() { + return FOO; +} +EOF + +cat <<'EOF' > expected +int foo () { return 1; } +EOF + +test_cpp <<'EOF' +#define X 1 +#ifdef X +int foo() { return 1; } +#else +int foo() { return 0; } +#endif +EOF + +cat <<'EOF' > expected + +int main () { + int x = 1 + 2 * 3; + return 0; +} +EOF + +test_cpp <<'EOF' +#define A 1 +#define B A + 2 +#define C B * 3 + +int main() { + int x = C; + return 0; +} +EOF + +cat <<'EOF' > expected + +int main () { + int x = ( 5 + 3); + int y = ( ( 5) + ( 3)); + return 0; +} +EOF + +test_cpp <<'EOF' +#define SIMPLE (5 + 3) +#define COMPLEX(a, b) ((a) + (b)) + +int main() { + int x = SIMPLE; + int y = COMPLEX(5, 3); + return 0; +} +EOF + +cat <<'EOF' > expected + +int main () { + int bar = 0; + bar ++; +} +EOF + +test_cpp <<'EOF' +#define A(a) a +#define B(b) b + +int main() { + A(int)B(bar) = 0; + bar++; +} +EOF diff --git a/tests/test_undef.sh b/tests/test_undef.sh index 34c3dad..27d03c6 100644 --- a/tests/test_undef.sh +++ b/tests/test_undef.sh @@ -1,9 +1,16 @@ cat <<'EOF' > expected -A is defined -A is undefined +int printf (); + +int main () { + + printf ( A is defined\n); + + + printf ( A is undefined\n); +} EOF -test_diff <<'EOF' +test_cpp <<'EOF' int printf(); int main() { @@ -24,4 +31,3 @@ int main() { #endif } EOF - |
