diff options
Diffstat (limited to 'src/preprocess.c')
| -rw-r--r-- | src/preprocess.c | 395 |
1 files changed, 2 insertions, 393 deletions
diff --git a/src/preprocess.c b/src/preprocess.c index 34c2fe0..0af146c 100644 --- a/src/preprocess.c +++ b/src/preprocess.c @@ -1,12 +1,12 @@ #include "preprocess.h" #include <assert.h> -#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "common.h" #include "parse.h" #include "sys.h" +#include "tokenize.h" typedef enum { MacroKind_undef, @@ -165,397 +165,6 @@ void macroargs_build_json(JsonBuilder* builder, MacroArgArray* macroargs) { } typedef struct { - InFile* src; - bool at_bol; - bool expect_header_name; - TokenArray* pp_tokens; -} PpLexer; - -static PpLexer* pplexer_new(InFile* src) { - PpLexer* ppl = calloc(1, sizeof(PpLexer)); - - ppl->src = src; - ppl->at_bol = true; - ppl->expect_header_name = false; - ppl->pp_tokens = calloc(1, sizeof(TokenArray)); - tokens_init(ppl->pp_tokens, 1024 * 16); - - return ppl; -} - -static void pplexer_tokenize_pp_directive(PpLexer* ppl, Token* tok) { - // Skip whitespaces after '#'. - char c; - while (isspace((c = infile_peek_char(ppl->src)))) { - if (c == '\n') - break; - infile_next_char(ppl->src); - } - // '#' new-line - if (c == '\n') { - tok->kind = TokenKind_pp_directive_nop; - return; - } - - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(ppl->src))) { - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - infile_next_char(ppl->src); - } - const char* pp_directive_name = builder.buf; - - if (builder.len == 0) { - tok->kind = TokenKind_hash; - } else if (strcmp(pp_directive_name, "define") == 0) { - tok->kind = TokenKind_pp_directive_define; - } else if (strcmp(pp_directive_name, "elif") == 0) { - tok->kind = TokenKind_pp_directive_elif; - } else if (strcmp(pp_directive_name, "elifdef") == 0) { - tok->kind = TokenKind_pp_directive_elifdef; - } else if (strcmp(pp_directive_name, "elifndef") == 0) { - tok->kind = TokenKind_pp_directive_elifndef; - } else if (strcmp(pp_directive_name, "else") == 0) { - tok->kind = TokenKind_pp_directive_else; - } else if (strcmp(pp_directive_name, "embed") == 0) { - tok->kind = TokenKind_pp_directive_embed; - } else if (strcmp(pp_directive_name, "endif") == 0) { - tok->kind = TokenKind_pp_directive_endif; - } else if (strcmp(pp_directive_name, "error") == 0) { - tok->kind = TokenKind_pp_directive_error; - } else if (strcmp(pp_directive_name, "if") == 0) { - tok->kind = TokenKind_pp_directive_if; - } else if (strcmp(pp_directive_name, "ifdef") == 0) { - tok->kind = TokenKind_pp_directive_ifdef; - } else if (strcmp(pp_directive_name, "ifndef") == 0) { - tok->kind = TokenKind_pp_directive_ifndef; - } else if (strcmp(pp_directive_name, "include") == 0) { - ppl->expect_header_name = true; - tok->kind = TokenKind_pp_directive_include; - } else if (strcmp(pp_directive_name, "line") == 0) { - tok->kind = TokenKind_pp_directive_line; - } else if (strcmp(pp_directive_name, "pragma") == 0) { - tok->kind = TokenKind_pp_directive_pragma; - } else if (strcmp(pp_directive_name, "undef") == 0) { - tok->kind = TokenKind_pp_directive_undef; - } else if (strcmp(pp_directive_name, "warning") == 0) { - tok->kind = TokenKind_pp_directive_warning; - } else { - tok->kind = TokenKind_pp_directive_non_directive; - tok->value.string = pp_directive_name; - } -} - -static void pplexer_tokenize_all(PpLexer* ppl) { - while (!infile_eof(ppl->src)) { - Token* tok = tokens_push_new(ppl->pp_tokens); - tok->loc = ppl->src->loc; - char c = infile_peek_char(ppl->src); - - if (ppl->expect_header_name && c == '"') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '"'); - while (1) { - char ch = infile_peek_char(ppl->src); - if (ch == '"') - break; - strbuilder_append_char(&builder, ch); - if (ch == '\\') { - infile_next_char(ppl->src); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - } - infile_next_char(ppl->src); - } - strbuilder_append_char(&builder, '"'); - infile_next_char(ppl->src); - tok->kind = TokenKind_header_name; - tok->value.string = builder.buf; - ppl->expect_header_name = false; - } else if (ppl->expect_header_name && c == '<') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '<'); - while (1) { - char ch = infile_peek_char(ppl->src); - if (ch == '>') - break; - strbuilder_append_char(&builder, ch); - infile_next_char(ppl->src); - } - strbuilder_append_char(&builder, '>'); - infile_next_char(ppl->src); - tok->kind = TokenKind_header_name; - tok->value.string = builder.buf; - ppl->expect_header_name = false; - } else if (c == '(') { - infile_next_char(ppl->src); - tok->kind = TokenKind_paren_l; - } else if (c == ')') { - infile_next_char(ppl->src); - tok->kind = TokenKind_paren_r; - } else if (c == '{') { - infile_next_char(ppl->src); - tok->kind = TokenKind_brace_l; - } else if (c == '}') { - infile_next_char(ppl->src); - tok->kind = TokenKind_brace_r; - } else if (c == '[') { - infile_next_char(ppl->src); - tok->kind = TokenKind_bracket_l; - } else if (c == ']') { - infile_next_char(ppl->src); - tok->kind = TokenKind_bracket_r; - } else if (c == ',') { - infile_next_char(ppl->src); - tok->kind = TokenKind_comma; - } else if (c == ':') { - infile_next_char(ppl->src); - tok->kind = TokenKind_colon; - } else if (c == ';') { - infile_next_char(ppl->src); - tok->kind = TokenKind_semicolon; - } else if (c == '^') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_xor; - } else { - tok->kind = TokenKind_xor; - } - } else if (c == '?') { - infile_next_char(ppl->src); - tok->kind = TokenKind_question; - } else if (c == '~') { - infile_next_char(ppl->src); - tok->kind = TokenKind_tilde; - } else if (c == '+') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_add; - } else if (infile_consume_if(ppl->src, '+')) { - tok->kind = TokenKind_plusplus; - } else { - tok->kind = TokenKind_plus; - } - } else if (c == '|') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_or; - } else if (infile_consume_if(ppl->src, '|')) { - tok->kind = TokenKind_oror; - } else { - tok->kind = TokenKind_or; - } - } else if (c == '&') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_and; - } else if (infile_consume_if(ppl->src, '&')) { - tok->kind = TokenKind_andand; - } else { - tok->kind = TokenKind_and; - } - } else if (c == '-') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '>')) { - tok->kind = TokenKind_arrow; - } else if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_sub; - } else if (infile_consume_if(ppl->src, '-')) { - tok->kind = TokenKind_minusminus; - } else { - tok->kind = TokenKind_minus; - } - } else if (c == '*') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_mul; - } else { - tok->kind = TokenKind_star; - } - } else if (c == '/') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_div; - } else if (infile_consume_if(ppl->src, '/')) { - while (!infile_eof(ppl->src) && infile_peek_char(ppl->src) != '\n') { - infile_next_char(ppl->src); - } - tok->kind = TokenKind_whitespace; - } else if (infile_consume_if(ppl->src, '*')) { - while (infile_peek_char(ppl->src)) { - if (infile_consume_if(ppl->src, '*')) { - if (infile_consume_if(ppl->src, '/')) { - break; - } - continue; - } - infile_next_char(ppl->src); - } - tok->kind = TokenKind_whitespace; - } else { - tok->kind = TokenKind_slash; - } - } else if (c == '%') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_mod; - } else { - tok->kind = TokenKind_percent; - } - } else if (c == '.') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '.')) { - if (infile_consume_if(ppl->src, '.')) { - tok->kind = TokenKind_ellipsis; - } else { - tok->kind = TokenKind_other; - tok->value.string = ".."; - } - } else { - tok->kind = TokenKind_dot; - } - } else if (c == '!') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_ne; - } else { - tok->kind = TokenKind_not; - } - } else if (c == '=') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_eq; - } else { - tok->kind = TokenKind_assign; - } - } else if (c == '<') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_le; - } else if (infile_consume_if(ppl->src, '<')) { - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_lshift; - } else { - tok->kind = TokenKind_lshift; - } - } else { - tok->kind = TokenKind_lt; - } - } else if (c == '>') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_ge; - } else if (infile_consume_if(ppl->src, '>')) { - if (infile_consume_if(ppl->src, '=')) { - tok->kind = TokenKind_assign_rshift; - } else { - tok->kind = TokenKind_rshift; - } - } else { - tok->kind = TokenKind_gt; - } - } else if (c == '#') { - infile_next_char(ppl->src); - if (infile_consume_if(ppl->src, '#')) { - tok->kind = TokenKind_hashhash; - } else { - if (ppl->at_bol) { - pplexer_tokenize_pp_directive(ppl, tok); - } else { - tok->kind = TokenKind_hash; - } - } - } else if (c == '\'') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - strbuilder_append_char(&builder, '\''); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - if (infile_peek_char(ppl->src) == '\\') { - infile_next_char(ppl->src); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - } - strbuilder_append_char(&builder, '\''); - infile_next_char(ppl->src); - infile_next_char(ppl->src); - tok->kind = TokenKind_character_constant; - tok->value.string = builder.buf; - } else if (c == '"') { - infile_next_char(ppl->src); - StrBuilder builder; - strbuilder_init(&builder); - while (1) { - char ch = infile_peek_char(ppl->src); - if (ch == '"') - break; - strbuilder_append_char(&builder, ch); - if (ch == '\\') { - infile_next_char(ppl->src); - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - } - infile_next_char(ppl->src); - } - infile_next_char(ppl->src); - tok->kind = TokenKind_literal_str; - tok->value.string = builder.buf; - } else if (isdigit(c)) { - // TODO: implement tokenization of pp-number. - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(ppl->src))) { - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - infile_next_char(ppl->src); - } - tok->kind = TokenKind_literal_int; - tok->value.integer = atoi(builder.buf); - } else if (isalpha(c) || c == '_') { - StrBuilder builder; - strbuilder_init(&builder); - while (isalnum(infile_peek_char(ppl->src)) || infile_peek_char(ppl->src) == '_') { - strbuilder_append_char(&builder, infile_peek_char(ppl->src)); - infile_next_char(ppl->src); - } - tok->kind = TokenKind_ident; - tok->value.string = builder.buf; - } else if (c == '\n') { - infile_next_char(ppl->src); - tok->kind = TokenKind_newline; - } else if (isspace(c)) { - while (isspace((c = infile_peek_char(ppl->src)))) { - if (c == '\n') - break; - infile_next_char(ppl->src); - } - if (ppl->at_bol && infile_peek_char(ppl->src) == '#') { - infile_next_char(ppl->src); - pplexer_tokenize_pp_directive(ppl, tok); - } else { - tok->kind = TokenKind_whitespace; - } - } else { - infile_next_char(ppl->src); - tok->kind = TokenKind_other; - char* buf = calloc(2, sizeof(char)); - buf[0] = c; - tok->value.string = buf; - } - ppl->at_bol = tok->kind == TokenKind_newline; - } - Token* eof_tok = tokens_push_new(ppl->pp_tokens); - eof_tok->loc = ppl->src->loc; - eof_tok->kind = TokenKind_eof; -} - -static TokenArray* pp_tokenize(InFile* src) { - PpLexer* ppl = pplexer_new(src); - pplexer_tokenize_all(ppl); - return ppl->pp_tokens; -} - -typedef struct { TokenArray* pp_tokens; int pos; MacroArray* macros; @@ -1546,7 +1155,7 @@ static char* get_ducc_include_path() { static TokenArray* do_preprocess(InFile* src, int depth, MacroArray* macros, StrArray* included_files, StrArray* user_include_dirs) { - TokenArray* pp_tokens = pp_tokenize(src); + TokenArray* pp_tokens = tokenize(src); Preprocessor* pp = preprocessor_new(pp_tokens, depth, macros, included_files); // Ducc's built-in headers has highest priority. |
