aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-09-13 00:05:12 +0900
committernsfisis <nsfisis@gmail.com>2025-09-13 01:38:56 +0900
commit8de7fa9da5fd8015f4fcc826b9270061b7b89478 (patch)
treeec9c1e56f179be207c31a113a0a96210f7509431
parentd41a97e957ef616d194f60b9b79820cd0162d920 (diff)
downloadducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.gz
ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.zst
ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.zip
feat: implement -E flag (preprocess only)
-rw-r--r--src/cli.c16
-rw-r--r--src/cli.h1
-rw-r--r--src/main.c10
-rw-r--r--src/preprocess.c39
-rw-r--r--src/preprocess.h2
-rw-r--r--src/token.c2
-rw-r--r--src/token.h1
-rw-r--r--src/tokenize.c2
-rw-r--r--tests/helpers.sh7
-rw-r--r--tests/test_function_macros.sh16
-rw-r--r--tests/test_if_elif_else.sh56
-rw-r--r--tests/test_ifdef_ifndef.sh39
-rw-r--r--tests/test_keywords.sh11
-rw-r--r--tests/test_macro_operators.sh45
-rw-r--r--tests/test_preprocess_flag.sh122
-rw-r--r--tests/test_undef.sh14
16 files changed, 323 insertions, 60 deletions
diff --git a/src/cli.c b/src/cli.c
index 893c7f2..cd4cc42 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -10,8 +10,9 @@ static void print_version() {
CliArgs* parse_cli_args(int argc, char** argv) {
const char* output_filename = NULL;
int positional_arguments_start = -1;
- bool only_compile = false;
- bool generate_deps = false;
+ bool opt_c = false;
+ bool opt_E = false;
+ bool opt_MMD = false;
for (int i = 1; i < argc; ++i) {
if (argv[i][0] != '-') {
@@ -34,9 +35,11 @@ CliArgs* parse_cli_args(int argc, char** argv) {
output_filename = argv[i + 1];
++i;
} else if (c == 'c') {
- only_compile = true;
+ opt_c = true;
+ } else if (c == 'E') {
+ opt_E = true;
} else if (strcmp(argv[i], "-MMD") == 0) {
- generate_deps = true;
+ opt_MMD = true;
} else if (strcmp(argv[i], "--version") == 0) {
print_version();
exit(0);
@@ -54,10 +57,11 @@ CliArgs* parse_cli_args(int argc, char** argv) {
a->input_filename = argv[positional_arguments_start];
a->output_filename = output_filename;
a->output_assembly = !output_filename || str_ends_with(output_filename, ".s");
- a->only_compile = only_compile;
+ a->only_compile = opt_c;
+ a->preprocess_only = opt_E;
a->totally_deligate_to_gcc = false;
a->gcc_command = NULL;
- a->generate_deps = generate_deps;
+ a->generate_deps = opt_MMD;
if (!a->only_compile && str_ends_with(a->input_filename, ".o")) {
a->totally_deligate_to_gcc = true;
diff --git a/src/cli.h b/src/cli.h
index ec1fa50..3b1e7f5 100644
--- a/src/cli.h
+++ b/src/cli.h
@@ -8,6 +8,7 @@ typedef struct {
const char* output_filename;
bool output_assembly;
bool only_compile;
+ bool preprocess_only;
bool generate_deps;
bool totally_deligate_to_gcc;
const char* gcc_command;
diff --git a/src/main.c b/src/main.c
index b064f39..5a9fd56 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,6 +22,16 @@ int main(int argc, char** argv) {
strings_init(&included_files);
TokenArray* pp_tokens = preprocess(source, &included_files);
+
+ if (cli_args->preprocess_only) {
+ FILE* output_file = cli_args->output_filename ? fopen(cli_args->output_filename, "w") : stdout;
+ if (!output_file) {
+ fatal_error("Cannot open output file: %s", cli_args->output_filename);
+ }
+ print_token_to_file(output_file, pp_tokens);
+ return 0;
+ }
+
TokenArray* tokens = tokenize(pp_tokens);
Program* prog = parse(tokens);
diff --git a/src/preprocess.c b/src/preprocess.c
index 9ec582a..3965949 100644
--- a/src/preprocess.c
+++ b/src/preprocess.c
@@ -661,10 +661,10 @@ static void seek_to_next_newline(Preprocessor* pp) {
;
}
-static void make_tokens_whitespaces(Preprocessor* pp, int start, int end) {
+static void make_tokens_removed(Preprocessor* pp, int start, int end) {
for (int i = start; i < end; ++i) {
Token* tok = pp_token_at(pp, i);
- tok->kind = TokenKind_whitespace;
+ tok->kind = TokenKind_removed;
tok->value.string = NULL;
}
}
@@ -1468,7 +1468,7 @@ static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind)
int first_pos = pp->pos;
seek_to_next_newline(pp);
expect_pp_token(pp, TokenKind_newline);
- make_tokens_whitespaces(pp, first_pos, pp->pos);
+ make_tokens_removed(pp, first_pos, pp->pos);
}
expect_pp_token(pp, TokenKind_pp_directive_endif);
@@ -1491,7 +1491,7 @@ static void preprocess_preprocessing_file(Preprocessor* pp) {
static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) {
seek_to_next_newline(pp);
skip_pp_token(pp, TokenKind_newline);
- make_tokens_whitespaces(pp, directive_token_pos, pp->pos);
+ make_tokens_removed(pp, directive_token_pos, pp->pos);
}
static void remove_pp_directives(Preprocessor* pp) {
@@ -1530,3 +1530,34 @@ TokenArray* preprocess(InFile* src, StrArray* included_files) {
strings_push(included_files, src->loc.filename);
return do_preprocess(src, 0, macros, included_files);
}
+
+void print_token_to_file(FILE* out, TokenArray* pp_tokens) {
+ for (size_t i = 0; i < pp_tokens->len; ++i) {
+ Token* tok = &pp_tokens->data[i];
+
+ if (tok->kind == TokenKind_whitespace) {
+ // TODO: preserve indent?
+ fprintf(out, " ");
+ } else if (tok->kind == TokenKind_removed) {
+ // Output nothing for removed tokens
+ } else if (tok->kind == TokenKind_newline) {
+ // TODO: remove adjacent newlines?
+ fprintf(out, "\n");
+ } else if (tok->kind != TokenKind_eof) {
+ // TODO: string literal
+ fprintf(out, "%s", token_stringify(tok));
+ // Add space after token if next token is not punctuation
+ // TODO: apply stricter approach
+ if (i + 1 < pp_tokens->len) {
+ Token* next = &pp_tokens->data[i + 1];
+ if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace &&
+ next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma &&
+ next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r &&
+ next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r &&
+ next->kind != TokenKind_dot) {
+ fprintf(out, " ");
+ }
+ }
+ }
+ }
+}
diff --git a/src/preprocess.h b/src/preprocess.h
index 81007c3..5449857 100644
--- a/src/preprocess.h
+++ b/src/preprocess.h
@@ -1,10 +1,12 @@
#ifndef DUCC_PREPROCESS_H
#define DUCC_PREPROCESS_H
+#include <stdio.h>
#include "common.h"
#include "io.h"
#include "token.h"
TokenArray* preprocess(InFile* src, StrArray* included_files);
+void print_token_to_file(FILE* output_file, TokenArray* pp_tokens);
#endif
diff --git a/src/token.c b/src/token.c
index 45ef2cd..e825de1 100644
--- a/src/token.c
+++ b/src/token.c
@@ -12,6 +12,8 @@ const char* token_kind_stringify(TokenKind k) {
return "##";
else if (k == TokenKind_whitespace)
return "<whitespace>";
+ else if (k == TokenKind_removed)
+ return "<removed>";
else if (k == TokenKind_newline)
return "<new-line>";
else if (k == TokenKind_other)
diff --git a/src/token.h b/src/token.h
index 0bf23f6..bbd8066 100644
--- a/src/token.h
+++ b/src/token.h
@@ -11,6 +11,7 @@ typedef enum {
TokenKind_hash,
TokenKind_hashhash,
TokenKind_whitespace,
+ TokenKind_removed,
TokenKind_newline,
TokenKind_other,
TokenKind_character_constant,
diff --git a/src/tokenize.c b/src/tokenize.c
index 7ba00cd..c897b02 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -19,7 +19,7 @@ static void tokenize_all(Lexer* l) {
for (size_t pos = 0; pos < l->src->len; ++pos) {
Token* pp_tok = &l->src->data[pos];
TokenKind k = pp_tok->kind;
- if (k == TokenKind_whitespace || k == TokenKind_newline) {
+ if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) {
continue;
}
Token* tok = tokens_push_new(l->tokens);
diff --git a/tests/helpers.sh b/tests/helpers.sh
index eaccd5c..a69e213 100644
--- a/tests/helpers.sh
+++ b/tests/helpers.sh
@@ -49,6 +49,13 @@ function test_compile_error() {
diff -u expected output
}
+function test_cpp() {
+ cat > main.c
+
+ "$ducc" -E main.c > output
+ diff -u -Z expected output
+}
+
function test_example() {
filename="../../../examples/$1.c"
diff --git a/tests/test_function_macros.sh b/tests/test_function_macros.sh
index 4bc0cc2..1108650 100644
--- a/tests/test_function_macros.sh
+++ b/tests/test_function_macros.sh
@@ -83,19 +83,24 @@ int main() {
EOF
cat <<'EOF' > expected
-42
-123
+int printf ( const char *, ...);
+
+int main () {
+ int foo = 42;
+ printf ( %d\n, foo);
+
+ int a = 123;
+ printf ( %d\n, a);
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf(const char*, ...);
int main() {
- // TODO: check if "foo" is expanded to "foo" or not once '#' operator is implemented.
#define foo foo
int foo = 42;
printf("%d\n", foo);
- // TODO: check if "a" is expanded to "a" or not once '#' operator is implemented.
#define a b
#define b c
#define c a
@@ -103,4 +108,3 @@ int main() {
printf("%d\n", a);
}
EOF
-
diff --git a/tests/test_if_elif_else.sh b/tests/test_if_elif_else.sh
index a07dd5c..e0adee0 100644
--- a/tests/test_if_elif_else.sh
+++ b/tests/test_if_elif_else.sh
@@ -1,8 +1,13 @@
cat <<'EOF' > expected
-1
+int printf ();
+
+
+int main () {
+ printf ( 1\n);
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
#define A
@@ -18,13 +23,20 @@ int main() {
EOF
cat <<'EOF' > expected
-1
-1
-2
-3
+int printf ();
+
+int main () {
+ printf ( 1\n);
+
+ printf ( 1\n);
+
+ printf ( 2\n);
+
+ printf ( 3\n);
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
int main() {
@@ -63,17 +75,28 @@ int main() {
EOF
cat <<'EOF' > expected
-1
-1
-2
-2
-3
-4
-3
-4
+int printf ();
+
+int main () {
+ printf ( 1\n);
+
+ printf ( 1\n);
+
+ printf ( 2\n);
+
+ printf ( 2\n);
+
+ printf ( 3\n);
+
+ printf ( 4\n);
+
+ printf ( 3\n);
+
+ printf ( 4\n);
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
int main() {
@@ -190,4 +213,3 @@ int main() {
#endif
}
EOF
-
diff --git a/tests/test_ifdef_ifndef.sh b/tests/test_ifdef_ifndef.sh
index f59b87b..683a1dc 100644
--- a/tests/test_ifdef_ifndef.sh
+++ b/tests/test_ifdef_ifndef.sh
@@ -1,10 +1,21 @@
cat <<'EOF' > expected
-__ducc__ is defined.
-A is defined.
-B is undefined.
+int printf ();
+
+
+int main () {
+
+ printf ( __ducc__ is defined.\n);
+
+ printf ( A is defined.\n);
+
+ printf ( B is undefined.\n);
+
+
+ return 0;
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
#define A 123
@@ -36,12 +47,23 @@ int main() {
EOF
cat <<'EOF' > expected
-__ducc__ is defined.
-A is defined.
-B is undefined.
+int printf ();
+
+
+int main () {
+
+ printf ( __ducc__ is defined.\n);
+
+ printf ( A is defined.\n);
+
+ printf ( B is undefined.\n);
+
+
+ return 0;
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
#define A 123
@@ -71,4 +93,3 @@ int main() {
return 0;
}
EOF
-
diff --git a/tests/test_keywords.sh b/tests/test_keywords.sh
index 1b99b67..0bdb6f7 100644
--- a/tests/test_keywords.sh
+++ b/tests/test_keywords.sh
@@ -1,9 +1,14 @@
-touch expected
-test_diff <<'EOF'
+cat <<'EOF' > expected
+
+
+int printf ();
+int main () {}
+EOF
+
+test_cpp <<'EOF'
// A keyword is treated as a normal identifier in preprocessing phase.
#define auto int
auto printf();
auto main() {}
EOF
-
diff --git a/tests/test_macro_operators.sh b/tests/test_macro_operators.sh
index a77d3ca..f9cc385 100644
--- a/tests/test_macro_operators.sh
+++ b/tests/test_macro_operators.sh
@@ -1,14 +1,31 @@
cat <<'EOF' > expected
-foobar=100
-prefix_test=200
-test_suffix=300
-var_1=10
-var_2=20
-var_A=30
-number_12=12
+int printf ();
+
+
+int foobar = 100;
+int prefix_test = 200;
+int test_suffix = 300;
+
+int var_1 = 10;
+int var_2 = 20;
+
+int var_A = 30;
+
+int number_12 = 12;
+
+int main () {
+ printf ( foobar=%d\n, foobar);
+ printf ( prefix_test=%d\n, prefix_test);
+ printf ( test_suffix=%d\n, test_suffix);
+ printf ( var_1=%d\n, var_1);
+ printf ( var_2=%d\n, var_2);
+ printf ( var_A=%d\n, var_A);
+ printf ( number_12=%d\n, number_12);
+ return 0;
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
#define CONCAT(a, b) a##b
@@ -42,9 +59,17 @@ int main() {
EOF
cat <<'EOF' > expected
-123
+int printf ( const char *, ...);
+
+
+int H ( int n) { return n; }
+
+int main () {
+ printf ( %d\n, H ( 123));
+}
EOF
-test_diff <<'EOF'
+
+test_cpp <<'EOF'
int printf(const char*, ...);
#define F(x) CHECK(G(x))
diff --git a/tests/test_preprocess_flag.sh b/tests/test_preprocess_flag.sh
new file mode 100644
index 0000000..3b52bc0
--- /dev/null
+++ b/tests/test_preprocess_flag.sh
@@ -0,0 +1,122 @@
+cat <<'EOF' > expected
+
+int main () {
+ int x = 100;
+ int y = 0;
+ return 0;
+}
+EOF
+
+test_cpp <<'EOF'
+#define MAX 100
+#define MIN 0
+
+int main() {
+ int x = MAX;
+ int y = MIN;
+ return 0;
+}
+EOF
+
+cat <<'EOF' > expected
+
+int main () {
+ int sum = ( ( 10) + ( 20));
+ int product = ( ( 3) * ( 4));
+ return 0;
+}
+EOF
+
+test_cpp <<'EOF'
+#define ADD(a, b) ((a) + (b))
+#define MUL(x, y) ((x) * (y))
+
+int main() {
+ int sum = ADD(10, 20);
+ int product = MUL(3, 4);
+ return 0;
+}
+EOF
+
+cat <<'EOF' > expected
+int main () {
+ return 42;
+}
+EOF
+
+test_cpp <<'EOF'
+#define FOO 42
+int main() {
+ return FOO;
+}
+EOF
+
+cat <<'EOF' > expected
+int foo () { return 1; }
+EOF
+
+test_cpp <<'EOF'
+#define X 1
+#ifdef X
+int foo() { return 1; }
+#else
+int foo() { return 0; }
+#endif
+EOF
+
+cat <<'EOF' > expected
+
+int main () {
+ int x = 1 + 2 * 3;
+ return 0;
+}
+EOF
+
+test_cpp <<'EOF'
+#define A 1
+#define B A + 2
+#define C B * 3
+
+int main() {
+ int x = C;
+ return 0;
+}
+EOF
+
+cat <<'EOF' > expected
+
+int main () {
+ int x = ( 5 + 3);
+ int y = ( ( 5) + ( 3));
+ return 0;
+}
+EOF
+
+test_cpp <<'EOF'
+#define SIMPLE (5 + 3)
+#define COMPLEX(a, b) ((a) + (b))
+
+int main() {
+ int x = SIMPLE;
+ int y = COMPLEX(5, 3);
+ return 0;
+}
+EOF
+
+cat <<'EOF' > expected
+
+int main () {
+ int bar = 0;
+ bar ++;
+}
+EOF
+
+test_cpp <<'EOF'
+#define A(a) a
+#define B(b) b
+
+int main() {
+ A(int)B(bar) = 0;
+ bar++;
+}
+EOF
diff --git a/tests/test_undef.sh b/tests/test_undef.sh
index 34c3dad..27d03c6 100644
--- a/tests/test_undef.sh
+++ b/tests/test_undef.sh
@@ -1,9 +1,16 @@
cat <<'EOF' > expected
-A is defined
-A is undefined
+int printf ();
+
+int main () {
+
+ printf ( A is defined\n);
+
+
+ printf ( A is undefined\n);
+}
EOF
-test_diff <<'EOF'
+test_cpp <<'EOF'
int printf();
int main() {
@@ -24,4 +31,3 @@ int main() {
#endif
}
EOF
-