aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2025-08-24 23:55:22 +0900
committernsfisis <nsfisis@gmail.com>2025-08-25 00:18:08 +0900
commit36da139565202a9a39d1e4261ab8bf950041518b (patch)
treec8e8867edc4e4fed94403331b8a7837e9951d9af
parent5bbc4414b8178ff924444857ac05e5b06cf0e651 (diff)
downloadducc-36da139565202a9a39d1e4261ab8bf950041518b.tar.gz
ducc-36da139565202a9a39d1e4261ab8bf950041518b.tar.zst
ducc-36da139565202a9a39d1e4261ab8bf950041518b.zip
feat: separate compilation
-rw-r--r--Makefile19
-rw-r--r--src/ast.c123
-rw-r--r--src/ast.h156
-rw-r--r--src/cli.c33
-rw-r--r--src/cli.h18
-rw-r--r--src/codegen.c4
-rw-r--r--src/codegen.h8
-rw-r--r--src/common.c26
-rw-r--r--src/common.h37
-rw-r--r--src/io.c14
-rw-r--r--src/io.h25
-rw-r--r--src/main.c (renamed from main.c)41
-rw-r--r--src/parse.c4
-rw-r--r--src/parse.h10
-rw-r--r--src/preprocess.c163
-rw-r--r--src/preprocess.h178
-rw-r--r--src/sys.c3
-rw-r--r--src/sys.h7
-rw-r--r--src/tokenize.c3
-rw-r--r--src/tokenize.h8
20 files changed, 543 insertions, 337 deletions
diff --git a/Makefile b/Makefile
index 6eb170a..36b4825 100644
--- a/Makefile
+++ b/Makefile
@@ -2,13 +2,28 @@ SRC_DIR := src
BUILD_DIR := build
TARGET ?= ducc
+OBJECTS := \
+ $(BUILD_DIR)/main.o \
+ $(BUILD_DIR)/ast.o \
+ $(BUILD_DIR)/cli.o \
+ $(BUILD_DIR)/codegen.o \
+ $(BUILD_DIR)/common.o \
+ $(BUILD_DIR)/io.o \
+ $(BUILD_DIR)/parse.o \
+ $(BUILD_DIR)/preprocess.o \
+ $(BUILD_DIR)/sys.o \
+ $(BUILD_DIR)/tokenize.o
+
.PHONY: all
all: $(BUILD_DIR) $(BUILD_DIR)/$(TARGET)
-$(BUILD_DIR)/$(TARGET): main.c
- $(CC) -MD -g -O0 -o $@ $<
+$(BUILD_DIR)/$(TARGET): $(OBJECTS)
+ $(CC) -MD -g -O0 -o $@ $^
$(BUILD_DIR):
@mkdir -p $(BUILD_DIR)
+$(BUILD_DIR)/%.o: src/%.c
+ $(CC) -c -MD -g -O0 -o $@ $<
+
-include $(BUILD_DIR)/*.d
diff --git a/src/ast.c b/src/ast.c
index 66f7cc2..662052f 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -1,18 +1,6 @@
-enum TypeKind {
- TypeKind_unknown,
-
- TypeKind_char,
- TypeKind_short,
- TypeKind_int,
- TypeKind_long,
- TypeKind_void,
- TypeKind_ptr,
- TypeKind_array,
- TypeKind_enum,
- TypeKind_struct,
- TypeKind_union,
-};
-typedef enum TypeKind TypeKind;
+#include "ast.h"
+#include "common.h"
+#include "preprocess.h"
const char* type_kind_stringify(TypeKind k) {
if (k == TypeKind_unknown)
@@ -41,17 +29,6 @@ const char* type_kind_stringify(TypeKind k) {
unreachable();
}
-struct AstNode;
-
-struct Type {
- TypeKind kind;
- // Check `base` instead of `kind` to test if the type is an array or a pointer.
- struct Type* base;
- int array_size;
- struct AstNode* def;
-};
-typedef struct Type Type;
-
void type_dump(Type* ty) {
fprintf(stderr, "Type {\n");
fprintf(stderr, " kind = %s\n", type_kind_stringify(ty->kind));
@@ -91,13 +68,6 @@ BOOL type_is_unsized(Type* ty) {
return ty->kind == TypeKind_void;
}
-int type_sizeof_struct(Type* ty);
-int type_sizeof_union(Type* ty);
-int type_alignof_struct(Type* ty);
-int type_alignof_union(Type* ty);
-int type_offsetof(Type* ty, const char* name);
-Type* type_member_typeof(Type* ty, const char* name);
-
int type_sizeof(Type* ty) {
if (type_is_unsized(ty)) {
fatal_error("type_sizeof: type size cannot be determined");
@@ -156,93 +126,6 @@ int to_aligned(int n, int a) {
return (n + a - 1) / a * a;
}
-enum AstNodeKind {
- AstNodeKind_unknown,
- AstNodeKind_nop,
-
- AstNodeKind_assign_expr,
- AstNodeKind_binary_expr,
- AstNodeKind_break_stmt,
- AstNodeKind_cond_expr,
- AstNodeKind_continue_stmt,
- AstNodeKind_deref_expr,
- AstNodeKind_do_while_stmt,
- AstNodeKind_enum_def,
- AstNodeKind_enum_member,
- AstNodeKind_expr_stmt,
- AstNodeKind_for_stmt,
- AstNodeKind_func_call,
- AstNodeKind_func_decl,
- AstNodeKind_func_def,
- AstNodeKind_gvar,
- AstNodeKind_gvar_decl,
- AstNodeKind_if_stmt,
- AstNodeKind_int_expr,
- AstNodeKind_list,
- AstNodeKind_logical_expr,
- AstNodeKind_lvar,
- AstNodeKind_lvar_decl,
- AstNodeKind_param,
- AstNodeKind_ref_expr,
- AstNodeKind_return_stmt,
- AstNodeKind_str_expr,
- AstNodeKind_struct_decl,
- AstNodeKind_struct_def,
- AstNodeKind_struct_member,
- AstNodeKind_type,
- AstNodeKind_typedef_decl,
- AstNodeKind_unary_expr,
- AstNodeKind_union_decl,
- AstNodeKind_union_def,
- AstNodeKind_union_member,
-
- // Intermediate ASTs: they are used only in parsing, not for parse result.
- AstNodeKind_declarator,
-};
-typedef enum AstNodeKind AstNodeKind;
-
-#define node_items __n1
-#define node_len __i1
-#define node_cap __i2
-#define node_expr __n1
-#define node_lhs __n1
-#define node_rhs __n2
-#define node_operand __n1
-#define node_cond __n1
-#define node_init __n2
-#define node_update __n3
-#define node_then __n2
-#define node_else __n3
-#define node_body __n4
-#define node_members __n1
-#define node_params __n1
-#define node_args __n1
-#define node_int_value __i1
-#define node_idx __i1
-#define node_op __i1
-#define node_stack_offset __i1
-#define node_stack_size __i1
-
-struct AstNode {
- AstNodeKind kind;
- const char* name;
- Type* ty;
- struct AstNode* __n1;
- struct AstNode* __n2;
- struct AstNode* __n3;
- struct AstNode* __n4;
- int __i1;
- int __i2;
-};
-typedef struct AstNode AstNode;
-
-struct Program {
- AstNode* funcs;
- AstNode* vars;
- const char** str_literals;
-};
-typedef struct Program Program;
-
AstNode* ast_new(AstNodeKind kind) {
AstNode* ast = calloc(1, sizeof(AstNode));
ast->kind = kind;
diff --git a/src/ast.h b/src/ast.h
new file mode 100644
index 0000000..0711e44
--- /dev/null
+++ b/src/ast.h
@@ -0,0 +1,156 @@
+#ifndef DUCC_AST_H
+#define DUCC_AST_H
+
+#include "std.h"
+
+enum TypeKind {
+ TypeKind_unknown,
+
+ TypeKind_char,
+ TypeKind_short,
+ TypeKind_int,
+ TypeKind_long,
+ TypeKind_void,
+ TypeKind_ptr,
+ TypeKind_array,
+ TypeKind_enum,
+ TypeKind_struct,
+ TypeKind_union,
+};
+typedef enum TypeKind TypeKind;
+
+const char* type_kind_stringify(TypeKind k);
+
+struct AstNode;
+
+struct Type {
+ TypeKind kind;
+ // Check `base` instead of `kind` to test if the type is an array or a pointer.
+ struct Type* base;
+ int array_size;
+ struct AstNode* def;
+};
+typedef struct Type Type;
+
+void type_dump(Type* ty);
+Type* type_new(TypeKind kind);
+Type* type_new_ptr(Type* base);
+Type* type_new_array(Type* elem, int size);
+Type* type_new_static_string(int len);
+Type* type_array_to_ptr(Type* ty);
+BOOL type_is_unsized(Type* ty);
+
+int type_sizeof_struct(Type* ty);
+int type_sizeof_union(Type* ty);
+int type_alignof_struct(Type* ty);
+int type_alignof_union(Type* ty);
+int type_offsetof(Type* ty, const char* name);
+Type* type_member_typeof(Type* ty, const char* name);
+
+int type_sizeof(Type* ty);
+int type_alignof(Type* ty);
+
+int to_aligned(int n, int a);
+
+enum AstNodeKind {
+ AstNodeKind_unknown,
+ AstNodeKind_nop,
+
+ AstNodeKind_assign_expr,
+ AstNodeKind_binary_expr,
+ AstNodeKind_break_stmt,
+ AstNodeKind_cond_expr,
+ AstNodeKind_continue_stmt,
+ AstNodeKind_deref_expr,
+ AstNodeKind_do_while_stmt,
+ AstNodeKind_enum_def,
+ AstNodeKind_enum_member,
+ AstNodeKind_expr_stmt,
+ AstNodeKind_for_stmt,
+ AstNodeKind_func_call,
+ AstNodeKind_func_decl,
+ AstNodeKind_func_def,
+ AstNodeKind_gvar,
+ AstNodeKind_gvar_decl,
+ AstNodeKind_if_stmt,
+ AstNodeKind_int_expr,
+ AstNodeKind_list,
+ AstNodeKind_logical_expr,
+ AstNodeKind_lvar,
+ AstNodeKind_lvar_decl,
+ AstNodeKind_param,
+ AstNodeKind_ref_expr,
+ AstNodeKind_return_stmt,
+ AstNodeKind_str_expr,
+ AstNodeKind_struct_decl,
+ AstNodeKind_struct_def,
+ AstNodeKind_struct_member,
+ AstNodeKind_type,
+ AstNodeKind_typedef_decl,
+ AstNodeKind_unary_expr,
+ AstNodeKind_union_decl,
+ AstNodeKind_union_def,
+ AstNodeKind_union_member,
+
+ // Intermediate ASTs: they are used only in parsing, not for parse result.
+ AstNodeKind_declarator,
+};
+typedef enum AstNodeKind AstNodeKind;
+
+#define node_items __n1
+#define node_len __i1
+#define node_cap __i2
+#define node_expr __n1
+#define node_lhs __n1
+#define node_rhs __n2
+#define node_operand __n1
+#define node_cond __n1
+#define node_init __n2
+#define node_update __n3
+#define node_then __n2
+#define node_else __n3
+#define node_body __n4
+#define node_members __n1
+#define node_params __n1
+#define node_args __n1
+#define node_int_value __i1
+#define node_idx __i1
+#define node_op __i1
+#define node_stack_offset __i1
+#define node_stack_size __i1
+
+struct AstNode {
+ AstNodeKind kind;
+ const char* name;
+ Type* ty;
+ struct AstNode* __n1;
+ struct AstNode* __n2;
+ struct AstNode* __n3;
+ struct AstNode* __n4;
+ int __i1;
+ int __i2;
+};
+typedef struct AstNode AstNode;
+
+struct Program {
+ AstNode* funcs;
+ AstNode* vars;
+ const char** str_literals;
+};
+typedef struct Program Program;
+
+AstNode* ast_new(AstNodeKind kind);
+AstNode* ast_new_list(int capacity);
+void ast_append(AstNode* list, AstNode* item);
+AstNode* ast_new_int(int v);
+AstNode* ast_new_unary_expr(int op, AstNode* operand);
+AstNode* ast_new_binary_expr(int op, AstNode* lhs, AstNode* rhs);
+
+AstNode* ast_new_assign_expr(int op, AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_assign_add_expr(AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_assign_sub_expr(AstNode* lhs, AstNode* rhs);
+AstNode* ast_new_ref_expr(AstNode* operand);
+AstNode* ast_new_deref_expr(AstNode* operand);
+AstNode* ast_new_member_access_expr(AstNode* obj, const char* name);
+
+#endif
diff --git a/src/cli.c b/src/cli.c
index c8bb60c..ccc64b9 100644
--- a/src/cli.c
+++ b/src/cli.c
@@ -1,13 +1,10 @@
-struct CliArgs {
- const char* input_filename;
- const char* output_filename;
- BOOL output_executable;
-};
-typedef struct CliArgs CliArgs;
+#include "cli.h"
+#include "common.h"
CliArgs* parse_cli_args(int argc, char** argv) {
const char* output_filename = NULL;
int positional_arguments_start = -1;
+ BOOL only_compile = FALSE;
for (int i = 1; i < argc; ++i) {
if (argv[i][0] != '-') {
@@ -27,6 +24,8 @@ CliArgs* parse_cli_args(int argc, char** argv) {
}
output_filename = argv[i + 1];
++i;
+ } else if (c == 'c') {
+ only_compile = TRUE;
} else {
fatal_error("unknown option: %s", argv[i]);
}
@@ -38,6 +37,26 @@ CliArgs* parse_cli_args(int argc, char** argv) {
CliArgs* a = calloc(1, sizeof(CliArgs));
a->input_filename = argv[positional_arguments_start];
a->output_filename = output_filename;
- a->output_executable = output_filename && !str_ends_with(output_filename, ".s");
+ a->output_assembly = !output_filename || str_ends_with(output_filename, ".s");
+ a->only_compile = only_compile;
+ a->totally_deligate_to_gcc = FALSE;
+ a->gcc_command = NULL;
+
+ if (!a->only_compile && str_ends_with(a->input_filename, ".o")) {
+ a->totally_deligate_to_gcc = TRUE;
+ StrBuilder builder;
+ strbuilder_init(&builder);
+ strbuilder_append_string(&builder, "gcc ");
+ for (int i = 1; i < argc; ++i) {
+ strbuilder_append_char(&builder, '\'');
+ strbuilder_append_string(&builder, argv[i]);
+ strbuilder_append_char(&builder, '\'');
+ if (i != argc - 1) {
+ strbuilder_append_char(&builder, ' ');
+ }
+ }
+ a->gcc_command = builder.buf;
+ }
+
return a;
}
diff --git a/src/cli.h b/src/cli.h
new file mode 100644
index 0000000..3288c10
--- /dev/null
+++ b/src/cli.h
@@ -0,0 +1,18 @@
+#ifndef DUCC_CLI_H
+#define DUCC_CLI_H
+
+#include "std.h"
+
+struct CliArgs {
+ const char* input_filename;
+ const char* output_filename;
+ BOOL output_assembly;
+ BOOL only_compile;
+ BOOL totally_deligate_to_gcc;
+ const char* gcc_command;
+};
+typedef struct CliArgs CliArgs;
+
+CliArgs* parse_cli_args(int argc, char** argv);
+
+#endif
diff --git a/src/codegen.c b/src/codegen.c
index b24fe38..da2cd17 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -1,3 +1,7 @@
+#include "codegen.h"
+#include "common.h"
+#include "preprocess.h"
+
enum GenMode {
GenMode_lval,
GenMode_rval,
diff --git a/src/codegen.h b/src/codegen.h
new file mode 100644
index 0000000..95ec069
--- /dev/null
+++ b/src/codegen.h
@@ -0,0 +1,8 @@
+#ifndef DUCC_CODEGEN_H
+#define DUCC_CODEGEN_H
+
+#include "ast.h"
+
+void codegen(Program* prog, FILE* out);
+
+#endif
diff --git a/src/common.c b/src/common.c
index 8fb73f7..595f2c7 100644
--- a/src/common.c
+++ b/src/common.c
@@ -1,3 +1,5 @@
+#include "common.h"
+
void fatal_error(const char* msg, ...) {
va_list args;
va_start(args, msg);
@@ -7,10 +9,6 @@ void fatal_error(const char* msg, ...) {
exit(1);
}
-#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__)
-
-#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__)
-
BOOL str_ends_with(const char* s, const char* suffix) {
size_t l1 = strlen(s);
size_t l2 = strlen(suffix);
@@ -19,13 +17,6 @@ BOOL str_ends_with(const char* s, const char* suffix) {
return strcmp(s + l1 - l2, suffix) == 0;
}
-struct StrBuilder {
- size_t len;
- size_t capacity;
- char* buf;
-};
-typedef struct StrBuilder StrBuilder;
-
void strbuilder_init(StrBuilder* b) {
b->len = 0;
b->capacity = 16;
@@ -48,12 +39,13 @@ void strbuilder_append_char(StrBuilder* b, int c) {
b->buf[b->len++] = c;
}
-struct StrArray {
- size_t len;
- size_t capacity;
- const char** data;
-};
-typedef struct StrArray StrArray;
+void strbuilder_append_string(StrBuilder* b, const char* s) {
+ int len = strlen(s);
+ strbuilder_reserve(b, b->len + len + 1);
+ for (int i = 0; i < len; ++i) {
+ b->buf[b->len++] = s[i];
+ }
+}
void strings_init(StrArray* strings) {
strings->len = 0;
diff --git a/src/common.h b/src/common.h
new file mode 100644
index 0000000..f8f6b1d
--- /dev/null
+++ b/src/common.h
@@ -0,0 +1,37 @@
+#ifndef DUCC_COMMON_H
+#define DUCC_COMMON_H
+
+#include "std.h"
+
+void fatal_error(const char* msg, ...);
+
+#define unreachable() fatal_error("%s:%d: unreachable", __FILE__, __LINE__)
+#define unimplemented() fatal_error("%s:%d: unimplemented", __FILE__, __LINE__)
+
+BOOL str_ends_with(const char* s, const char* suffix);
+
+struct StrBuilder {
+ size_t len;
+ size_t capacity;
+ char* buf;
+};
+typedef struct StrBuilder StrBuilder;
+
+void strbuilder_init(StrBuilder* b);
+// `size` must include a trailing null byte.
+void strbuilder_reserve(StrBuilder* b, size_t size);
+void strbuilder_append_char(StrBuilder* b, int c);
+void strbuilder_append_string(StrBuilder* b, const char* s);
+
+struct StrArray {
+ size_t len;
+ size_t capacity;
+ const char** data;
+};
+typedef struct StrArray StrArray;
+
+void strings_init(StrArray* strings);
+void strings_reserve(StrArray* strings, size_t size);
+int strings_push(StrArray* strings, const char* str);
+
+#endif
diff --git a/src/io.c b/src/io.c
index 5cd5e86..fb21672 100644
--- a/src/io.c
+++ b/src/io.c
@@ -1,15 +1,5 @@
-struct SourceLocation {
- const char* filename;
- int line;
-};
-typedef struct SourceLocation SourceLocation;
-
-struct InFile {
- const char* buf;
- int pos;
- SourceLocation loc;
-};
-typedef struct InFile InFile;
+#include "io.h"
+#include "common.h"
InFile* infile_open(const char* filename) {
FILE* in;
diff --git a/src/io.h b/src/io.h
new file mode 100644
index 0000000..8a96b59
--- /dev/null
+++ b/src/io.h
@@ -0,0 +1,25 @@
+#ifndef DUCC_IO_H
+#define DUCC_IO_H
+
+#include "std.h"
+
+struct SourceLocation {
+ const char* filename;
+ int line;
+};
+typedef struct SourceLocation SourceLocation;
+
+struct InFile {
+ const char* buf;
+ int pos;
+ SourceLocation loc;
+};
+typedef struct InFile InFile;
+
+InFile* infile_open(const char* filename);
+BOOL infile_eof(InFile* f);
+char infile_peek_char(InFile* f);
+char infile_next_char(InFile* f);
+BOOL infile_consume_if(InFile* f, char expected);
+
+#endif
diff --git a/main.c b/src/main.c
index 434f304..5704367 100644
--- a/main.c
+++ b/src/main.c
@@ -1,40 +1,45 @@
-// Currently the source code depends on the #include order.
-// clang-format off
-#include "src/std.h"
-#include "src/common.c"
-#include "src/io.c"
-#include "src/sys.c"
-#include "src/preprocess.c"
-#include "src/tokenize.c"
-#include "src/ast.c"
-#include "src/parse.c"
-#include "src/codegen.c"
-#include "src/cli.c"
-// clang-format on
+#include "ast.h"
+#include "cli.h"
+#include "codegen.h"
+#include "common.h"
+#include "io.h"
+#include "parse.h"
+#include "preprocess.h"
+#include "std.h"
+#include "tokenize.h"
int main(int argc, char** argv) {
CliArgs* cli_args = parse_cli_args(argc, argv);
+
+ if (cli_args->totally_deligate_to_gcc) {
+ return system(cli_args->gcc_command);
+ }
+
InFile* source = infile_open(cli_args->input_filename);
TokenArray* pp_tokens = preprocess(source);
TokenArray* tokens = tokenize(pp_tokens);
Program* prog = parse(tokens);
const char* assembly_filename;
- if (cli_args->output_executable) {
+ if (cli_args->output_assembly) {
+ assembly_filename = cli_args->output_filename;
+ } else {
char* temp_filename = calloc(19, sizeof(char));
temp_filename = strdup("/tmp/ducc-XXXXXX.s");
mkstemps(temp_filename, strlen(".s"));
assembly_filename = temp_filename;
- } else {
- assembly_filename = cli_args->output_filename;
}
FILE* assembly_file = assembly_filename ? fopen(assembly_filename, "wb") : stdout;
codegen(prog, assembly_file);
fclose(assembly_file);
- if (cli_args->output_executable) {
+ if (!cli_args->output_assembly) {
char cmd_buf[256];
- sprintf(cmd_buf, "gcc -s -o '%s' '%s'", cli_args->output_filename, assembly_filename);
+ if (cli_args->only_compile) {
+ sprintf(cmd_buf, "gcc -c -s -o '%s' '%s'", cli_args->output_filename, assembly_filename);
+ } else {
+ sprintf(cmd_buf, "gcc -s -o '%s' '%s'", cli_args->output_filename, assembly_filename);
+ }
int result = system(cmd_buf);
if (result != 0) {
fatal_error("gcc failed: %d", result);
diff --git a/src/parse.c b/src/parse.c
index 6bb87ec..882d769 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -1,4 +1,6 @@
-#define LVAR_MAX 32
+#include "parse.h"
+#include "common.h"
+#include "tokenize.h"
struct LocalVar {
const char* name;
diff --git a/src/parse.h b/src/parse.h
new file mode 100644
index 0000000..fe67b75
--- /dev/null
+++ b/src/parse.h
@@ -0,0 +1,10 @@
+#ifndef DUCC_PARSE_H
+#define DUCC_PARSE_H
+
+#include "ast.h"
+#include "preprocess.h"
+
+Program* parse(TokenArray* tokens);
+BOOL pp_eval_constant_expression(TokenArray* pp_tokens);
+
+#endif
diff --git a/src/preprocess.c b/src/preprocess.c
index 3ed9740..8c4d096 100644
--- a/src/preprocess.c
+++ b/src/preprocess.c
@@ -1,142 +1,6 @@
-enum TokenKind {
- TokenKind_eof,
-
- // Only preprocessing phase.
- TokenKind_hash,
- TokenKind_hashhash,
- TokenKind_whitespace,
- TokenKind_newline,
- TokenKind_other,
- TokenKind_character_constant,
- TokenKind_header_name,
- TokenKind_pp_directive_define,
- TokenKind_pp_directive_elif,
- TokenKind_pp_directive_elifdef,
- TokenKind_pp_directive_elifndef,
- TokenKind_pp_directive_else,
- TokenKind_pp_directive_embed,
- TokenKind_pp_directive_endif,
- TokenKind_pp_directive_error,
- TokenKind_pp_directive_if,
- TokenKind_pp_directive_ifdef,
- TokenKind_pp_directive_ifndef,
- TokenKind_pp_directive_include,
- TokenKind_pp_directive_line,
- TokenKind_pp_directive_pragma,
- TokenKind_pp_directive_undef,
- TokenKind_pp_directive_warning,
- TokenKind_pp_operator_defined,
- TokenKind_pp_operator___has_c_attribute,
- TokenKind_pp_operator___has_embed,
- TokenKind_pp_operator___has_include,
-
- // C23: 6.4.1
- TokenKind_keyword_alignas,
- TokenKind_keyword_alignof,
- TokenKind_keyword_auto,
- TokenKind_keyword_bool,
- TokenKind_keyword_break,
- TokenKind_keyword_case,
- TokenKind_keyword_char,
- TokenKind_keyword_const,
- TokenKind_keyword_constexpr,
- TokenKind_keyword_continue,
- TokenKind_keyword_default,
- TokenKind_keyword_do,
- TokenKind_keyword_double,
- TokenKind_keyword_else,
- TokenKind_keyword_enum,
- TokenKind_keyword_extern,
- TokenKind_keyword_false,
- TokenKind_keyword_float,
- TokenKind_keyword_for,
- TokenKind_keyword_goto,
- TokenKind_keyword_if,
- TokenKind_keyword_inline,
- TokenKind_keyword_int,
- TokenKind_keyword_long,
- TokenKind_keyword_nullptr,
- TokenKind_keyword_register,
- TokenKind_keyword_restrict,
- TokenKind_keyword_return,
- TokenKind_keyword_short,
- TokenKind_keyword_signed,
- TokenKind_keyword_sizeof,
- TokenKind_keyword_static,
- TokenKind_keyword_static_assert,
- TokenKind_keyword_struct,
- TokenKind_keyword_switch,
- TokenKind_keyword_thread_local,
- TokenKind_keyword_true,
- TokenKind_keyword_typedef,
- TokenKind_keyword_typeof,
- TokenKind_keyword_typeof_unqual,
- TokenKind_keyword_union,
- TokenKind_keyword_unsigned,
- TokenKind_keyword_void,
- TokenKind_keyword_volatile,
- TokenKind_keyword_while,
- TokenKind_keyword__Atomic,
- TokenKind_keyword__BitInt,
- TokenKind_keyword__Complex,
- TokenKind_keyword__Decimal128,
- TokenKind_keyword__Decimal32,
- TokenKind_keyword__Decimal64,
- TokenKind_keyword__Generic,
- TokenKind_keyword__Imaginary,
- TokenKind_keyword__Noreturn,
-
- TokenKind_and,
- TokenKind_andand,
- TokenKind_arrow,
- TokenKind_assign,
- TokenKind_assign_add,
- TokenKind_assign_and,
- TokenKind_assign_div,
- TokenKind_assign_lshift,
- TokenKind_assign_mod,
- TokenKind_assign_mul,
- TokenKind_assign_or,
- TokenKind_assign_rshift,
- TokenKind_assign_sub,
- TokenKind_assign_xor,
- TokenKind_brace_l,
- TokenKind_brace_r,
- TokenKind_bracket_l,
- TokenKind_bracket_r,
- TokenKind_colon,
- TokenKind_comma,
- TokenKind_dot,
- TokenKind_ellipsis,
- TokenKind_eq,
- TokenKind_ge,
- TokenKind_gt,
- TokenKind_ident,
- TokenKind_le,
- TokenKind_literal_int,
- TokenKind_literal_str,
- TokenKind_lshift,
- TokenKind_lt,
- TokenKind_minus,
- TokenKind_minusminus,
- TokenKind_ne,
- TokenKind_not,
- TokenKind_or,
- TokenKind_oror,
- TokenKind_paren_l,
- TokenKind_paren_r,
- TokenKind_percent,
- TokenKind_plus,
- TokenKind_plusplus,
- TokenKind_question,
- TokenKind_rshift,
- TokenKind_semicolon,
- TokenKind_slash,
- TokenKind_star,
- TokenKind_tilde,
- TokenKind_xor,
-};
-typedef enum TokenKind TokenKind;
+#include "preprocess.h"
+#include "common.h"
+#include "sys.h"
const char* token_kind_stringify(TokenKind k) {
if (k == TokenKind_eof)
@@ -405,20 +269,6 @@ const char* token_kind_stringify(TokenKind k) {
unreachable();
}
-// TokenValue is externally tagged by Token's kind.
-union TokenValue {
- const char* string;
- int integer;
-};
-typedef union TokenValue TokenValue;
-
-struct Token {
- TokenKind kind;
- TokenValue value;
- SourceLocation loc;
-};
-typedef struct Token Token;
-
const char* token_stringify(Token* t) {
TokenKind k = t->kind;
if (k == TokenKind_literal_int) {
@@ -437,13 +287,6 @@ const char* token_stringify(Token* t) {
}
}
-struct TokenArray {
- size_t len;
- size_t capacity;
- Token* data;
-};
-typedef struct TokenArray TokenArray;
-
void tokens_init(TokenArray* tokens, size_t capacity) {
tokens->len = 0;
tokens->capacity = capacity;
diff --git a/src/preprocess.h b/src/preprocess.h
new file mode 100644
index 0000000..8c5ade2
--- /dev/null
+++ b/src/preprocess.h
@@ -0,0 +1,178 @@
+#ifndef DUCC_PREPROCESS_H
+#define DUCC_PREPROCESS_H
+
+#include "io.h"
+
+enum TokenKind {
+ TokenKind_eof,
+
+ // Only preprocessing phase.
+ TokenKind_hash,
+ TokenKind_hashhash,
+ TokenKind_whitespace,
+ TokenKind_newline,
+ TokenKind_other,
+ TokenKind_character_constant,
+ TokenKind_header_name,
+ TokenKind_pp_directive_define,
+ TokenKind_pp_directive_elif,
+ TokenKind_pp_directive_elifdef,
+ TokenKind_pp_directive_elifndef,
+ TokenKind_pp_directive_else,
+ TokenKind_pp_directive_embed,
+ TokenKind_pp_directive_endif,
+ TokenKind_pp_directive_error,
+ TokenKind_pp_directive_if,
+ TokenKind_pp_directive_ifdef,
+ TokenKind_pp_directive_ifndef,
+ TokenKind_pp_directive_include,
+ TokenKind_pp_directive_line,
+ TokenKind_pp_directive_pragma,
+ TokenKind_pp_directive_undef,
+ TokenKind_pp_directive_warning,
+ TokenKind_pp_operator_defined,
+ TokenKind_pp_operator___has_c_attribute,
+ TokenKind_pp_operator___has_embed,
+ TokenKind_pp_operator___has_include,
+
+ // C23: 6.4.1
+ TokenKind_keyword_alignas,
+ TokenKind_keyword_alignof,
+ TokenKind_keyword_auto,
+ TokenKind_keyword_bool,
+ TokenKind_keyword_break,
+ TokenKind_keyword_case,
+ TokenKind_keyword_char,
+ TokenKind_keyword_const,
+ TokenKind_keyword_constexpr,
+ TokenKind_keyword_continue,
+ TokenKind_keyword_default,
+ TokenKind_keyword_do,
+ TokenKind_keyword_double,
+ TokenKind_keyword_else,
+ TokenKind_keyword_enum,
+ TokenKind_keyword_extern,
+ TokenKind_keyword_false,
+ TokenKind_keyword_float,
+ TokenKind_keyword_for,
+ TokenKind_keyword_goto,
+ TokenKind_keyword_if,
+ TokenKind_keyword_inline,
+ TokenKind_keyword_int,
+ TokenKind_keyword_long,
+ TokenKind_keyword_nullptr,
+ TokenKind_keyword_register,
+ TokenKind_keyword_restrict,
+ TokenKind_keyword_return,
+ TokenKind_keyword_short,
+ TokenKind_keyword_signed,
+ TokenKind_keyword_sizeof,
+ TokenKind_keyword_static,
+ TokenKind_keyword_static_assert,
+ TokenKind_keyword_struct,
+ TokenKind_keyword_switch,
+ TokenKind_keyword_thread_local,
+ TokenKind_keyword_true,
+ TokenKind_keyword_typedef,
+ TokenKind_keyword_typeof,
+ TokenKind_keyword_typeof_unqual,
+ TokenKind_keyword_union,
+ TokenKind_keyword_unsigned,
+ TokenKind_keyword_void,
+ TokenKind_keyword_volatile,
+ TokenKind_keyword_while,
+ TokenKind_keyword__Atomic,
+ TokenKind_keyword__BitInt,
+ TokenKind_keyword__Complex,
+ TokenKind_keyword__Decimal128,
+ TokenKind_keyword__Decimal32,
+ TokenKind_keyword__Decimal64,
+ TokenKind_keyword__Generic,
+ TokenKind_keyword__Imaginary,
+ TokenKind_keyword__Noreturn,
+
+ TokenKind_and,
+ TokenKind_andand,
+ TokenKind_arrow,
+ TokenKind_assign,
+ TokenKind_assign_add,
+ TokenKind_assign_and,
+ TokenKind_assign_div,
+ TokenKind_assign_lshift,
+ TokenKind_assign_mod,
+ TokenKind_assign_mul,
+ TokenKind_assign_or,
+ TokenKind_assign_rshift,
+ TokenKind_assign_sub,
+ TokenKind_assign_xor,
+ TokenKind_brace_l,
+ TokenKind_brace_r,
+ TokenKind_bracket_l,
+ TokenKind_bracket_r,
+ TokenKind_colon,
+ TokenKind_comma,
+ TokenKind_dot,
+ TokenKind_ellipsis,
+ TokenKind_eq,
+ TokenKind_ge,
+ TokenKind_gt,
+ TokenKind_ident,
+ TokenKind_le,
+ TokenKind_literal_int,
+ TokenKind_literal_str,
+ TokenKind_lshift,
+ TokenKind_lt,
+ TokenKind_minus,
+ TokenKind_minusminus,
+ TokenKind_ne,
+ TokenKind_not,
+ TokenKind_or,
+ TokenKind_oror,
+ TokenKind_paren_l,
+ TokenKind_paren_r,
+ TokenKind_percent,
+ TokenKind_plus,
+ TokenKind_plusplus,
+ TokenKind_question,
+ TokenKind_rshift,
+ TokenKind_semicolon,
+ TokenKind_slash,
+ TokenKind_star,
+ TokenKind_tilde,
+ TokenKind_xor,
+};
+typedef enum TokenKind TokenKind;
+
+const char* token_kind_stringify(TokenKind k);
+
+// TokenValue is externally tagged by Token's kind.
+union TokenValue {
+ const char* string;
+ int integer;
+};
+typedef union TokenValue TokenValue;
+
+struct Token {
+ TokenKind kind;
+ TokenValue value;
+ SourceLocation loc;
+};
+typedef struct Token Token;
+
+const char* token_stringify(Token* t);
+
+struct TokenArray {
+ size_t len;
+ size_t capacity;
+ Token* data;
+};
+typedef struct TokenArray TokenArray;
+
+void tokens_init(TokenArray* tokens, size_t capacity);
+void tokens_reserve(TokenArray* tokens, size_t size);
+Token* tokens_push_new(TokenArray* tokens);
+Token* tokens_pop(TokenArray* tokens);
+
+TokenArray* preprocess(InFile* src);
+
+#endif
diff --git a/src/sys.c b/src/sys.c
index aa7b13d..5c9a4ea 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -1,3 +1,6 @@
+#include "sys.h"
+#include "std.h"
+
char* get_self_path() {
char* buf = calloc(PATH_MAX, sizeof(char));
ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX - 1);
diff --git a/src/sys.h b/src/sys.h
new file mode 100644
index 0000000..2527724
--- /dev/null
+++ b/src/sys.h
@@ -0,0 +1,7 @@
+#ifndef DUCC_SYS_H
+#define DUCC_SYS_H
+
+// It returns a path not including final / except for root directory.
+char* get_self_dir();
+
+#endif
diff --git a/src/tokenize.c b/src/tokenize.c
index a7e99b2..0fb126f 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -1,3 +1,6 @@
+#include "tokenize.h"
+#include "common.h"
+
struct Lexer {
TokenArray* src;
TokenArray* tokens;
diff --git a/src/tokenize.h b/src/tokenize.h
new file mode 100644
index 0000000..2e28335
--- /dev/null
+++ b/src/tokenize.h
@@ -0,0 +1,8 @@
+#ifndef DUCC_TOKENIZE_H
+#define DUCC_TOKENIZE_H
+
+#include "preprocess.h"
+
+TokenArray* tokenize(TokenArray* pp_tokens);
+
+#endif