From 1b406b13b03055d2b2d08e8279a4a80c41ca7c20 Mon Sep 17 00:00:00 2001 From: nsfisis Date: Sun, 3 May 2026 16:46:10 +0900 Subject: fix: div/mod/shift operations on unsigned integers --- src/ast.c | 5 +++++ src/ast.h | 1 + src/codegen.c | 49 ++++++++++++++++++++++++++++++++++++++----------- tests/operators.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 11 deletions(-) diff --git a/src/ast.c b/src/ast.c index 4d30a66..0dcec0d 100644 --- a/src/ast.c +++ b/src/ast.c @@ -126,6 +126,11 @@ bool type_is_unsized(Type* ty) { return ty->kind == TypeKind_void; } +bool type_is_unsigned(Type* ty) { + return ty->kind == TypeKind_uchar || ty->kind == TypeKind_ushort || ty->kind == TypeKind_uint || + ty->kind == TypeKind_ulong || ty->kind == TypeKind_ullong || ty->kind == TypeKind_bool; +} + int type_sizeof(Type* ty) { if (type_is_unsized(ty)) { fatal_error("type_sizeof: type size cannot be determined"); diff --git a/src/ast.h b/src/ast.h index f12fd83..c6fd0e2 100644 --- a/src/ast.h +++ b/src/ast.h @@ -73,6 +73,7 @@ Type* type_new_static_string(int len); Type* type_array_to_ptr(Type* ty); Type* type_new_func(Type* result, AstNode* params); bool type_is_unsized(Type* ty); +bool type_is_unsigned(Type* ty); int type_sizeof_struct(Type* ty); int type_sizeof_union(Type* ty); diff --git a/src/codegen.c b/src/codegen.c index 2da6851..10a0287 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -205,11 +205,21 @@ static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mo } else if (expr->op == TokenKind_star) { fprintf(g->out, " imul rax, rdi\n"); } else if (expr->op == TokenKind_slash) { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } } else if (expr->op == TokenKind_percent) { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } fprintf(g->out, " mov rax, rdx\n"); } else if (expr->op == TokenKind_and) { fprintf(g->out, " and rax, rdi\n"); @@ -221,9 +231,12 @@ static void codegen_binary_expr(CodeGen* g, BinaryExprNode* expr, GenMode gen_mo fprintf(g->out, " mov rcx, rdi\n"); fprintf(g->out, " shl rax, cl\n"); } else if (expr->op == TokenKind_rshift) { - // TODO: check if the operand is signed or unsigned fprintf(g->out, " mov rcx, rdi\n"); - fprintf(g->out, " sar rax, cl\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " shr rax, cl\n"); + } else { + fprintf(g->out, " sar rax, cl\n"); + } } else if (expr->op == TokenKind_eq) { fprintf(g->out, " cmp rax, rdi\n"); fprintf(g->out, " sete al\n"); @@ -274,11 +287,21 @@ static void codegen_assign_expr_helper(CodeGen* g, AssignExprNode* expr) { } else if (expr->op == TokenKind_assign_mul) { fprintf(g->out, " imul rax, rdi\n"); } else if (expr->op == TokenKind_assign_div) { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } } else if (expr->op == TokenKind_assign_mod) { - fprintf(g->out, " cqo\n"); - fprintf(g->out, " idiv rdi\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " xor rdx, rdx\n"); + fprintf(g->out, " div rdi\n"); + } else { + fprintf(g->out, " cqo\n"); + fprintf(g->out, " idiv rdi\n"); + } fprintf(g->out, " mov rax, rdx\n"); } else if (expr->op == TokenKind_assign_or) { fprintf(g->out, " or rax, rdi\n"); @@ -291,7 +314,11 @@ static void codegen_assign_expr_helper(CodeGen* g, AssignExprNode* expr) { fprintf(g->out, " shl rax, cl\n"); } else if (expr->op == TokenKind_assign_rshift) { fprintf(g->out, " mov rcx, rdi\n"); - fprintf(g->out, " sar rax, cl\n"); + if (type_is_unsigned(expr->lhs->ty)) { + fprintf(g->out, " shr rax, cl\n"); + } else { + fprintf(g->out, " sar rax, cl\n"); + } } else { unreachable(); } diff --git a/tests/operators.c b/tests/operators.c index e089477..5d969f2 100644 --- a/tests/operators.c +++ b/tests/operators.c @@ -116,6 +116,51 @@ int main() { k >>= 3; ASSERT_EQ(8, k); + // unsigned division / modulo + // (unsigned long)-1 = 0xFFFFFFFFFFFFFFFF + // 0xFFFFFFFFFFFFFFFF / 9 = 0x1C71C71C71C71C71 (huge positive) + // 0xFFFFFFFFFFFFFFFF % 9 = 6 + // signed: -1 / 9 == 0 and -1 % 9 == -1 + unsigned long u_max = -1L; + ASSERT_EQ(6L, u_max % 9); + ASSERT_EQ(1L, u_max % 7); + ASSERT_EQ(0, u_max / 9 == 0); + ASSERT_EQ(u_max, (u_max / 9) * 9 + u_max % 9); + + long s_neg = -1L; + ASSERT_EQ(0L, s_neg / 9); + ASSERT_EQ(-1L, s_neg % 9); + + // unsigned right shift (logical, fills 0) + // 0xFFFFFFFFFFFFFFFF >> 1 = 0x7FFFFFFFFFFFFFFF + unsigned long highbit = 1L; + for (int p = 0; p < 63; p += 1) + highbit *= 2; // 0x8000000000000000 + unsigned long lmax = highbit - 1; // 0x7FFFFFFFFFFFFFFF + ASSERT_EQ(lmax, ((unsigned long)-1L) >> 1); + + // signed right shift (arithmetic, preserves sign) + long s_neg2 = -1L; + ASSERT_EQ(-1L, s_neg2 >> 1); + ASSERT_EQ(-2L, (-8L) >> 2); + + // compound assignment + unsigned long u_div = -1L; + u_div /= 9; + ASSERT_EQ(0, u_div == 0); + + unsigned long u_mod = -1L; + u_mod %= 9; + ASSERT_EQ(6L, u_mod); + + unsigned long u_shr = -1L; + u_shr >>= 1; + ASSERT_EQ(lmax, u_shr); + + long s_shr = -1L; + s_shr >>= 1; + ASSERT_EQ(-1L, s_shr); + // ternary operator ASSERT_EQ(2, 1 ? 2 : 3); ASSERT_EQ(5, 0 ? 4 : 5); -- cgit v1.3.1