feat: implement -E flag (preprocess only)

author: nsfisis <nsfisis@gmail.com> 2025-09-13 00:05:12 +0900
committer: nsfisis <nsfisis@gmail.com> 2025-09-13 01:38:56 +0900
commit: 8de7fa9da5fd8015f4fcc826b9270061b7b89478 (patch)
tree: ec9c1e56f179be207c31a113a0a96210f7509431 /src
parent: d41a97e957ef616d194f60b9b79820cd0162d920 (diff)
download: ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.gz
ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.zst
ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.zip
8 files changed, 62 insertions, 11 deletions
diff --git a/src/cli.c b/src/cli.c
index 893c7f2..cd4cc42 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -10,8 +10,9 @@ static void print_version() {
 CliArgs* parse_cli_args(int argc, char** argv) {
     const char* output_filename = NULL;
     int positional_arguments_start = -1;
-    bool only_compile = false;
-    bool generate_deps = false;
+    bool opt_c = false;
+    bool opt_E = false;
+    bool opt_MMD = false;
 
     for (int i = 1; i < argc; ++i) {
         if (argv[i][0] != '-') {
@@ -34,9 +35,11 @@ CliArgs* parse_cli_args(int argc, char** argv) {
             output_filename = argv[i + 1];
             ++i;
         } else if (c == 'c') {
-            only_compile = true;
+            opt_c = true;
+        } else if (c == 'E') {
+            opt_E = true;
         } else if (strcmp(argv[i], "-MMD") == 0) {
-            generate_deps = true;
+            opt_MMD = true;
         } else if (strcmp(argv[i], "--version") == 0) {
             print_version();
             exit(0);
@@ -54,10 +57,11 @@ CliArgs* parse_cli_args(int argc, char** argv) {
     a->input_filename = argv[positional_arguments_start];
     a->output_filename = output_filename;
     a->output_assembly = !output_filename || str_ends_with(output_filename, ".s");
-    a->only_compile = only_compile;
+    a->only_compile = opt_c;
+    a->preprocess_only = opt_E;
     a->totally_deligate_to_gcc = false;
     a->gcc_command = NULL;
-    a->generate_deps = generate_deps;
+    a->generate_deps = opt_MMD;
 
     if (!a->only_compile && str_ends_with(a->input_filename, ".o")) {
         a->totally_deligate_to_gcc = true;
diff --git a/src/cli.h b/src/cli.h
index ec1fa50..3b1e7f5 100644
--- a/src/cli.h
+++ b/src/cli.h
@@ -8,6 +8,7 @@ typedef struct {
     const char* output_filename;
     bool output_assembly;
     bool only_compile;
+    bool preprocess_only;
     bool generate_deps;
     bool totally_deligate_to_gcc;
     const char* gcc_command;
diff --git a/src/main.c b/src/main.c
index b064f39..5a9fd56 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,6 +22,16 @@ int main(int argc, char** argv) {
     strings_init(&included_files);
 
     TokenArray* pp_tokens = preprocess(source, &included_files);
+
+    if (cli_args->preprocess_only) {
+        FILE* output_file = cli_args->output_filename ? fopen(cli_args->output_filename, "w") : stdout;
+        if (!output_file) {
+            fatal_error("Cannot open output file: %s", cli_args->output_filename);
+        }
+        print_token_to_file(output_file, pp_tokens);
+        return 0;
+    }
+
     TokenArray* tokens = tokenize(pp_tokens);
     Program* prog = parse(tokens);
 
diff --git a/src/preprocess.c b/src/preprocess.c
index 9ec582a..3965949 100644
--- a/src/preprocess.c
+++ b/src/preprocess.c
@@ -661,10 +661,10 @@ static void seek_to_next_newline(Preprocessor* pp) {
         ;
 }
 
-static void make_tokens_whitespaces(Preprocessor* pp, int start, int end) {
+static void make_tokens_removed(Preprocessor* pp, int start, int end) {
     for (int i = start; i < end; ++i) {
         Token* tok = pp_token_at(pp, i);
-        tok->kind = TokenKind_whitespace;
+        tok->kind = TokenKind_removed;
         tok->value.string = NULL;
     }
 }
@@ -1468,7 +1468,7 @@ static void skip_group_opt(Preprocessor* pp, GroupDelimiterKind delimiter_kind)
         int first_pos = pp->pos;
         seek_to_next_newline(pp);
         expect_pp_token(pp, TokenKind_newline);
-        make_tokens_whitespaces(pp, first_pos, pp->pos);
+        make_tokens_removed(pp, first_pos, pp->pos);
     }
 
     expect_pp_token(pp, TokenKind_pp_directive_endif);
@@ -1491,7 +1491,7 @@ static void preprocess_preprocessing_file(Preprocessor* pp) {
 static void remove_pp_directive(Preprocessor* pp, int directive_token_pos) {
     seek_to_next_newline(pp);
     skip_pp_token(pp, TokenKind_newline);
-    make_tokens_whitespaces(pp, directive_token_pos, pp->pos);
+    make_tokens_removed(pp, directive_token_pos, pp->pos);
 }
 
 static void remove_pp_directives(Preprocessor* pp) {
@@ -1530,3 +1530,34 @@ TokenArray* preprocess(InFile* src, StrArray* included_files) {
     strings_push(included_files, src->loc.filename);
     return do_preprocess(src, 0, macros, included_files);
 }
+
+void print_token_to_file(FILE* out, TokenArray* pp_tokens) {
+    for (size_t i = 0; i < pp_tokens->len; ++i) {
+        Token* tok = &pp_tokens->data[i];
+
+        if (tok->kind == TokenKind_whitespace) {
+            // TODO: preserve indent?
+            fprintf(out, " ");
+        } else if (tok->kind == TokenKind_removed) {
+            // Output nothing for removed tokens
+        } else if (tok->kind == TokenKind_newline) {
+            // TODO: remove adjacent newlines?
+            fprintf(out, "\n");
+        } else if (tok->kind != TokenKind_eof) {
+            // TODO: string literal
+            fprintf(out, "%s", token_stringify(tok));
+            // Add space after token if next token is not punctuation
+            // TODO: apply stricter approach
+            if (i + 1 < pp_tokens->len) {
+                Token* next = &pp_tokens->data[i + 1];
+                if (next->kind != TokenKind_newline && next->kind != TokenKind_whitespace &&
+                    next->kind != TokenKind_removed && next->kind != TokenKind_eof && next->kind != TokenKind_comma &&
+                    next->kind != TokenKind_semicolon && next->kind != TokenKind_paren_r &&
+                    next->kind != TokenKind_bracket_r && next->kind != TokenKind_brace_r &&
+                    next->kind != TokenKind_dot) {
+                    fprintf(out, " ");
+                }
+            }
+        }
+    }
+}
diff --git a/src/preprocess.h b/src/preprocess.h
index 81007c3..5449857 100644
--- a/src/preprocess.h
+++ b/src/preprocess.h
@@ -1,10 +1,12 @@
 #ifndef DUCC_PREPROCESS_H
 #define DUCC_PREPROCESS_H
 
+#include <stdio.h>
 #include "common.h"
 #include "io.h"
 #include "token.h"
 
 TokenArray* preprocess(InFile* src, StrArray* included_files);
+void print_token_to_file(FILE* output_file, TokenArray* pp_tokens);
 
 #endif
diff --git a/src/token.c b/src/token.c
index 45ef2cd..e825de1 100644
--- a/src/token.c
+++ b/src/token.c
@@ -12,6 +12,8 @@ const char* token_kind_stringify(TokenKind k) {
         return "##";
     else if (k == TokenKind_whitespace)
         return "<whitespace>";
+    else if (k == TokenKind_removed)
+        return "<removed>";
     else if (k == TokenKind_newline)
         return "<new-line>";
     else if (k == TokenKind_other)
diff --git a/src/token.h b/src/token.h
index 0bf23f6..bbd8066 100644
--- a/src/token.h
+++ b/src/token.h
@@ -11,6 +11,7 @@ typedef enum {
     TokenKind_hash,
     TokenKind_hashhash,
     TokenKind_whitespace,
+    TokenKind_removed,
     TokenKind_newline,
     TokenKind_other,
     TokenKind_character_constant,
diff --git a/src/tokenize.c b/src/tokenize.c
index 7ba00cd..c897b02 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -19,7 +19,7 @@ static void tokenize_all(Lexer* l) {
     for (size_t pos = 0; pos < l->src->len; ++pos) {
         Token* pp_tok = &l->src->data[pos];
         TokenKind k = pp_tok->kind;
-        if (k == TokenKind_whitespace || k == TokenKind_newline) {
+        if (k == TokenKind_removed || k == TokenKind_whitespace || k == TokenKind_newline) {
             continue;
         }
         Token* tok = tokens_push_new(l->tokens);
author	nsfisis <nsfisis@gmail.com>	2025-09-13 00:05:12 +0900
committer	nsfisis <nsfisis@gmail.com>	2025-09-13 01:38:56 +0900
commit	8de7fa9da5fd8015f4fcc826b9270061b7b89478 (patch)
tree	ec9c1e56f179be207c31a113a0a96210f7509431 /src
parent	d41a97e957ef616d194f60b9b79820cd0162d920 (diff)
download	ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.gz ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.tar.zst ducc-8de7fa9da5fd8015f4fcc826b9270061b7b89478.zip