diff options
| -rw-r--r-- | main.c | 365 |
1 files changed, 364 insertions, 1 deletions
@@ -1,6 +1,369 @@ +int atoi(); +void* calloc(); +void exit(); +int getchar(); +int isalnum(); +int isalpha(); +int isdigit(); +int isspace(); +void* malloc(); +void* memcpy(); +void* memset(); int printf(); +int putchar(); +int sprintf(); +int strcmp(); +char* strstr(); +long strtol(); + +int fatal_error(char* msg) { + printf("%s", msg); + exit(1); + return 0; +} + +int source_len; +char source[1024*1024]; + +int read_all() { + int c; + int n = 0; + while ((c = getchar()) != -1) { + source[n] = c; + n += 1; + } + return n; +} + +#define TK_EOF 0 +#define TK_K_BREAK 1 +#define TK_K_CHAR 2 +#define TK_K_CONTINUE 3 +#define TK_K_ELSE 4 +#define TK_K_FOR 5 +#define TK_K_IF 6 +#define TK_K_INT 7 +#define TK_K_LONG 8 +#define TK_K_RETURN 9 +#define TK_K_SIZEOF 10 +#define TK_K_STRUCT 11 +#define TK_K_VOID 12 +#define TK_IDENT 13 +#define TK_L_INT 14 +#define TK_L_STR 15 // TODO +#define TK_PAREN_L 16 +#define TK_PAREN_R 17 +#define TK_BRACE_L 18 +#define TK_BRACE_R 19 +#define TK_BRACKET_L 20 +#define TK_BRACKET_R 21 +#define TK_SEMICOLON 22 + +struct Token { + int kind; + char* value; + int len; +}; +typedef struct Token* TOKEN; + +struct Token tokens[1024]; +int token_len; + +int tokenize(char* src, int len) { + int pos = 0; + int token_len = 0; + while (pos < len) { + char c = src[pos]; + if (c == '(') { + pos += 1; + tokens[token_len].kind = TK_PAREN_L; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (c == ')') { + pos += 1; + tokens[token_len].kind = TK_PAREN_R; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (c == '{') { + pos += 1; + tokens[token_len].kind = TK_BRACE_L; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (c == '}') { + pos += 1; + tokens[token_len].kind = TK_BRACE_R; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (c == '[') { + pos += 1; + tokens[token_len].kind = TK_BRACKET_L; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (c == ']') { + pos += 1; + tokens[token_len].kind = TK_BRACKET_R; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (c == ';') { + pos += 1; + tokens[token_len].kind = TK_SEMICOLON; + tokens[token_len].value = src + pos - 1; + tokens[token_len].len = 1; + token_len += 1; + } else if (isdigit(c)) { + int start = pos; + while (isdigit(src[pos])) { + pos += 1; + } + tokens[token_len].kind = TK_L_INT; + tokens[token_len].value = src + start; + tokens[token_len].len = pos - start; + token_len += 1; + } else if (isalpha(c)) { + int start = pos; + while (isalnum(src[pos])) { + pos += 1; + } + int kind; + if (strstr(src + start, "break") == src + start) { + kind = TK_K_BREAK; + } else if (strstr(src + start, "char") == src + start) { + kind = TK_K_CHAR; + } else if (strstr(src + start, "continue") == src + start) { + kind = TK_K_CONTINUE; + } else if (strstr(src + start, "else") == src + start) { + kind = TK_K_ELSE; + } else if (strstr(src + start, "for") == src + start) { + kind = TK_K_FOR; + } else if (strstr(src + start, "if") == src + start) { + kind = TK_K_IF; + } else if (strstr(src + start, "int") == src + start) { + kind = TK_K_INT; + } else if (strstr(src + start, "long") == src + start) { + kind = TK_K_LONG; + } else if (strstr(src + start, "return") == src + start) { + kind = TK_K_RETURN; + } else if (strstr(src + start, "sizeof") == src + start) { + kind = TK_K_SIZEOF; + } else if (strstr(src + start, "struct") == src + start) { + kind = TK_K_STRUCT; + } else { + kind = TK_IDENT; + } + tokens[token_len].kind = kind; + tokens[token_len].value = src + start; + tokens[token_len].len = pos - start; + token_len += 1; + } else if (isspace(c)) { + pos += 1; + } else { + fatal_error("unknown token"); + } + } + return token_len; +} + +int token_pos; + +#define AST_UNKNOWN 0 +#define AST_PROGRAM 1 +#define AST_FUNC_DECL 2 +#define AST_FUNC_DEF 3 +#define AST_TYPE 4 +#define AST_BLOCK 5 +#define AST_RETURN_STMT 6 +#define AST_INT_LIT_EXPR 7 + +struct AstNode { + int kind; + struct AstNode* next; + struct AstNode* last; + TOKEN func_name; + struct AstNode* func_body; + struct AstNode* expr1; + int int_value; +}; +typedef struct AstNode* AST; + +int eof() { + return token_pos < token_len; +} + +TOKEN next_tok() { + return tokens + token_pos; +} + +TOKEN expect(int expected) { + TOKEN t = next_tok(); + if (t->kind == expected) { + token_pos += 1; + return tokens + token_pos - 1; + } else { + char buf[1024]; + sprintf(buf, "expected %d, but got %d", expected, t->kind); + fatal_error(buf); + } +} + +AST parse_expr(); + +AST parse_primitive_expr() { + TOKEN t = next_tok(); + if (t->kind == TK_L_INT) { + token_pos += 1; + AST e = calloc(1, sizeof(struct AstNode)); + e->kind = AST_INT_LIT_EXPR; + char buf[1024]; + memcpy(buf, t->value, t->len); + buf[t->len] = 0; + e->int_value = atoi(buf); + return e; + } else { + fatal_error("parse_expr"); + } +} + +AST parse_expr() { + return parse_primitive_expr(); +} + +AST parse_return_stmt() { + expect(TK_K_RETURN); + AST expr = parse_expr(); + expect(TK_SEMICOLON); + + AST ret = calloc(1, sizeof(struct AstNode)); + ret->kind = AST_RETURN_STMT; + ret->expr1 = expr; + return ret; +} + +AST parse_stmt() { + TOKEN t = next_tok(); + if (t->kind == TK_K_RETURN) { + return parse_return_stmt(); + } else { + fatal_error("parse_stmt"); + } +} + +AST parse_block_stmt() { + AST list = calloc(1, sizeof(struct AstNode)); + list->kind = AST_BLOCK; + list->last = list; + expect(TK_BRACE_L); + while (next_tok()->kind != TK_BRACE_R) { + AST n = parse_stmt(); + list->last->next = n; + list->last = n; + } + expect(TK_BRACE_R); + return list; +} + +AST parse_func_decl_or_def() { + TOKEN t = next_tok(); + if (t->kind == TK_K_INT) { + token_pos += 1; + TOKEN name = expect(TK_IDENT); + expect(TK_PAREN_L); + expect(TK_PAREN_R); + AST body = parse_block_stmt(); + AST func = calloc(1, sizeof(struct AstNode)); + func->kind = AST_FUNC_DEF; + func->func_name = name; + func->func_body = body; + return func; + } else { + fatal_error("parse_func_decl_or_def: expect type"); + } +} + +AST parse_toplevel() { + return parse_func_decl_or_def(); +} + +AST parse() { + AST list = calloc(1, sizeof(struct AstNode)); + list->kind = AST_PROGRAM; + list->last = list; + while (eof()) { + AST n = parse_toplevel(); + list->last->next = n; + list->last = n; + } + return list; +} + +void assert_ast_kind(AST ast, int kind) { + if (ast->kind != kind) { + char buf[1024]; + sprintf(buf, "invalid ast kind: expected %d, but got %d", kind, ast->kind); + fatal_error(buf); + } +} + +void gen_int_lit_expr(AST ast) { + assert_ast_kind(ast, AST_INT_LIT_EXPR); + printf(" mov rax, %d\n", ast->int_value); +} + +void gen_primitive_expr(AST ast) { + gen_int_lit_expr(ast); +} + +void gen_expr(AST ast) { + gen_primitive_expr(ast); +} + +void gen_return_stmt(AST ast) { + assert_ast_kind(ast, AST_RETURN_STMT); + gen_expr(ast->expr1); + printf(" ret\n"); +} + +void gen_block_stmt(AST ast) { + assert_ast_kind(ast, AST_BLOCK); + gen_return_stmt(ast->next); +} + +void gen_stmt(AST ast) { + gen_block_stmt(ast); +} + +void gen_func(AST ast) { + assert_ast_kind(ast, AST_FUNC_DEF); + gen_stmt(ast->func_body); + printf(" ret\n"); +} + +void gen(AST ast) { + assert_ast_kind(ast, AST_PROGRAM); + printf(".intel_syntax noprefix\n"); + printf(".globl main\n"); + printf("main:\n"); + gen_func(ast->next); +} int main() { - printf("Hello, World!\n"); + source_len = read_all(); + token_len = tokenize(source, source_len); + + // for (int i = 0; tokens[i].kind != TK_EOF; i++) { + // for (int j = 0; j < tokens[i].len; j++) { + // putchar(tokens[i].value[j]); + // } + // printf("\n"); + // } + + AST ast = parse(); + gen(ast); + return 0; } |
