aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-01-25 15:47:26 +0900
committernsfisis <nsfisis@gmail.com>2026-01-25 15:47:26 +0900
commit4c654156403c9660f0973605afa8d7b3645055c4 (patch)
tree4d3d5137ed6456f02ac6b7ae2549cdb7c176ec18
parent45f6c28a2e085da2a9d5d2331533e4cfdc0c0492 (diff)
downloadzgjq-4c654156403c9660f0973605afa8d7b3645055c4.tar.gz
zgjq-4c654156403c9660f0973605afa8d7b3645055c4.tar.zst
zgjq-4c654156403c9660f0973605afa8d7b3645055c4.zip
implement tokenization of floating-point numbers
-rw-r--r--src/jq/parse.zig10
-rw-r--r--src/jq/tokenize.zig146
2 files changed, 148 insertions, 8 deletions
diff --git a/src/jq/parse.zig b/src/jq/parse.zig
index 60ba89c..3ec9520 100644
--- a/src/jq/parse.zig
+++ b/src/jq/parse.zig
@@ -163,7 +163,13 @@ fn parseTerm(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, t
if (first_token.kind() == .number) {
_ = try tokens.next();
const number_value = try allocator.create(jv.Value);
- number_value.* = .{ .integer = first_token.number };
+ const f = first_token.number;
+ const i: i64 = @intFromFloat(f);
+ if (@as(f64, @floatFromInt(i)) == f) {
+ number_value.* = .{ .integer = i };
+ } else {
+ number_value.* = .{ .float = f };
+ }
const number_node = try parse_allocator.create(Ast);
number_node.* = .{ .literal = number_value };
return number_node;
@@ -205,7 +211,7 @@ fn parseIndexAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Alloc
_ = try tokens.expect(.bracket_right);
const index_value = try allocator.create(jv.Value);
- index_value.* = .{ .integer = index_token.number };
+ index_value.* = .{ .integer = @intFromFloat(index_token.number) };
const index_node = try parse_allocator.create(Ast);
index_node.* = .{ .literal = index_value };
const ast = try parse_allocator.create(Ast);
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig
index 8c70665..3e00e4f 100644
--- a/src/jq/tokenize.zig
+++ b/src/jq/tokenize.zig
@@ -3,6 +3,7 @@ const std = @import("std");
pub const TokenizeError = error{
UnexpectedEnd,
InvalidCharacter,
+ InvalidNumber,
};
pub const TokenKind = enum {
@@ -132,7 +133,7 @@ pub const Token = union(TokenKind) {
keyword_try,
identifier: []const u8,
- number: i64,
+ number: f64,
pub fn kind(self: @This()) TokenKind {
return self;
@@ -234,19 +235,75 @@ fn tokenizeIdentifier(allocator: std.mem.Allocator, reader: *std.Io.Reader, firs
return buffer.toOwnedSlice(allocator);
}
-fn tokenizeNumber(reader: *std.Io.Reader, first: u8) error{ReadFailed}!i64 {
- var value: i64 = first - '0';
+fn tokenizeNumber(allocator: std.mem.Allocator, reader: *std.Io.Reader, first: u8) !f64 {
+ var buffer = try std.ArrayList(u8).initCapacity(allocator, 16);
+ try buffer.append(allocator, first);
+ // Integer part
while (try peekByte(reader)) |c| {
if (std.ascii.isDigit(c)) {
- value = value * 10 + (c - '0');
+ try buffer.append(allocator, c);
reader.toss(1);
} else {
break;
}
}
- return value;
+ // Fractional part
+ if (try peekByte(reader) == '.') {
+ const lookahead = reader.peek(2) catch |err| switch (err) {
+ error.EndOfStream => null,
+ error.ReadFailed => return error.ReadFailed,
+ };
+ if (lookahead) |bytes| {
+ if (std.ascii.isDigit(bytes[1])) {
+ try buffer.append(allocator, '.');
+ reader.toss(1);
+ while (try peekByte(reader)) |c| {
+ if (std.ascii.isDigit(c)) {
+ try buffer.append(allocator, c);
+ reader.toss(1);
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Exponent part
+ if (try peekByte(reader)) |c| {
+ if (c == 'e' or c == 'E') {
+ try buffer.append(allocator, c);
+ reader.toss(1);
+
+ // Sign
+ if (try peekByte(reader)) |sign| {
+ if (sign == '+' or sign == '-') {
+ try buffer.append(allocator, sign);
+ reader.toss(1);
+ }
+ }
+
+ // Exponent
+ var has_exp_digits = false;
+ while (try peekByte(reader)) |d| {
+ if (std.ascii.isDigit(d)) {
+ try buffer.append(allocator, d);
+ reader.toss(1);
+ has_exp_digits = true;
+ } else {
+ break;
+ }
+ }
+ if (!has_exp_digits) {
+ return error.InvalidNumber;
+ }
+ }
+ }
+
+ const slice = buffer.toOwnedSlice(allocator) catch return error.OutOfMemory;
+ return std.fmt.parseFloat(f64, slice) catch return error.InvalidNumber;
}
fn tryConvertToKeywordToken(identifier: []const u8) ?Token {
@@ -328,7 +385,7 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
'}' => .brace_right,
else => blk: {
if (std.ascii.isDigit(c)) {
- break :blk .{ .number = try tokenizeNumber(reader, c) };
+ break :blk .{ .number = try tokenizeNumber(allocator, reader, c) };
} else if (isIdentifierStart(c)) {
const ident = try tokenizeIdentifier(allocator, reader, c);
break :blk tryConvertToKeywordToken(ident) orelse Token{ .identifier = ident };
@@ -693,3 +750,80 @@ test "tokenize comment with single CR does not end comment" {
try std.testing.expectEqualStrings("baz", tokens[3].identifier);
try std.testing.expectEqual(.end, tokens[4]);
}
+
+test "tokenize floating point numbers" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("3.14 1.5 0.5");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqual(Token{ .number = 3.14 }, tokens[0]);
+ try std.testing.expectEqual(Token{ .number = 1.5 }, tokens[1]);
+ try std.testing.expectEqual(Token{ .number = 0.5 }, tokens[2]);
+ try std.testing.expectEqual(.end, tokens[3]);
+}
+
+test "tokenize exponent notation" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("1e10 1E10 1e+10 1e-10");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(5, tokens.len);
+ try std.testing.expectEqual(Token{ .number = 1e10 }, tokens[0]);
+ try std.testing.expectEqual(Token{ .number = 1e10 }, tokens[1]);
+ try std.testing.expectEqual(Token{ .number = 1e10 }, tokens[2]);
+ try std.testing.expectEqual(Token{ .number = 1e-10 }, tokens[3]);
+ try std.testing.expectEqual(.end, tokens[4]);
+}
+
+test "tokenize float with exponent" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("1.5e-3 2.5E+2");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(3, tokens.len);
+ try std.testing.expectEqual(Token{ .number = 1.5e-3 }, tokens[0]);
+ try std.testing.expectEqual(Token{ .number = 2.5e+2 }, tokens[1]);
+ try std.testing.expectEqual(.end, tokens[2]);
+}
+
+test "tokenize number followed by dot dot" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ // "1..2" should be parsed as 1, .., 2 not 1. .2
+ var reader = std.Io.Reader.fixed("1..2");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqual(Token{ .number = 1 }, tokens[0]);
+ try std.testing.expectEqual(.dot_dot, tokens[1]);
+ try std.testing.expectEqual(Token{ .number = 2 }, tokens[2]);
+ try std.testing.expectEqual(.end, tokens[3]);
+}
+
+test "tokenize invalid exponent" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("1e");
+ const result = tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectError(error.InvalidNumber, result);
+}
+
+test "tokenize invalid exponent with sign only" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("1e+");
+ const result = tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectError(error.InvalidNumber, result);
+}