From 48d9ec8aef4c3e7f3574346a6cf6a1fa3d725561 Mon Sep 17 00:00:00 2001
From: nsfisis <nsfisis@gmail.com>
Date: Sun, 25 Jan 2026 18:09:51 +0900
Subject: refactor term parsing

---
 src/jq/compile.zig  |   7 +-
 src/jq/parse.zig    | 141 ++++++++++++++++----------------------
 src/jq/tokenize.zig | 193 ++++++++++++++++++++++++++++++++++++----------------
 3 files changed, 197 insertions(+), 144 deletions(-)

(limited to 'src')

diff --git a/src/jq/compile.zig b/src/jq/compile.zig
index ec6ef63..fb2a691 100644
--- a/src/jq/compile.zig
+++ b/src/jq/compile.zig
@@ -68,9 +68,12 @@ fn compileExpr(allocator: std.mem.Allocator, compile_allocator: std.mem.Allocato
 
     switch (ast.*) {
         .identity => try instrs.append(allocator, .nop),
-        .array_index => |index| {
-            const index_instrs = try compileExpr(allocator, compile_allocator, index);
+        .array_index => |arr_idx| {
+            const base_instrs = try compileExpr(allocator, compile_allocator, arr_idx.base);
+            defer allocator.free(base_instrs);
+            const index_instrs = try compileExpr(allocator, compile_allocator, arr_idx.index);
             defer allocator.free(index_instrs);
+            try instrs.appendSlice(allocator, base_instrs);
             try instrs.append(allocator, .subexp_begin);
             try instrs.appendSlice(allocator, index_instrs);
             try instrs.append(allocator, .subexp_end);
diff --git a/src/jq/parse.zig b/src/jq/parse.zig
index 6a76861..5df1d14 100644
--- a/src/jq/parse.zig
+++ b/src/jq/parse.zig
@@ -45,7 +45,7 @@ pub const BinaryOp = enum {
 
 pub const Ast = union(AstKind) {
     identity,
-    array_index: *Ast,
+    array_index: struct { base: *Ast, index: *Ast },
     object_key: []const u8,
     literal: *jv.Value,
     binary_expr: struct { op: BinaryOp, lhs: *Ast, rhs: *Ast },
@@ -300,101 +300,75 @@ fn parseExpr7(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator,
 }
 
 fn parseTerm(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
-    const first_token = try tokens.peek();
-    if (first_token.kind() == .number) {
-        _ = try tokens.next();
-        const number_value = try allocator.create(jv.Value);
-        const f = first_token.number;
-        const i: i64 = @intFromFloat(f);
-        if (@as(f64, @floatFromInt(i)) == f) {
-            number_value.* = .{ .integer = i };
+    var result = try parsePrimary(allocator, parse_allocator, tokens);
+    while (true) {
+        const token = tokens.peek() catch return result;
+        if (token.kind() == .bracket_left) {
+            result = try parseSuffix(allocator, parse_allocator, tokens, result);
         } else {
-            number_value.* = .{ .float = f };
+            break;
         }
-        const number_node = try parse_allocator.create(Ast);
-        number_node.* = .{ .literal = number_value };
-        return number_node;
-    }
-
-    if (first_token.kind() == .keyword_null) {
-        _ = try tokens.next();
-        const null_value = try allocator.create(jv.Value);
-        null_value.* = .null;
-        const null_node = try parse_allocator.create(Ast);
-        null_node.* = .{ .literal = null_value };
-        return null_node;
-    }
-
-    if (first_token.kind() == .keyword_true) {
-        _ = try tokens.next();
-        const true_value = try allocator.create(jv.Value);
-        true_value.* = .{ .bool = true };
-        const true_node = try parse_allocator.create(Ast);
-        true_node.* = .{ .literal = true_value };
-        return true_node;
-    }
-
-    if (first_token.kind() == .keyword_false) {
-        _ = try tokens.next();
-        const false_value = try allocator.create(jv.Value);
-        false_value.* = .{ .bool = false };
-        const false_node = try parse_allocator.create(Ast);
-        false_node.* = .{ .literal = false_value };
-        return false_node;
     }
+    return result;
+}
 
-    _ = try tokens.expect(.dot);
-
-    const next_token = try tokens.peek();
-    switch (next_token.kind()) {
-        .identifier => {
-            return parseFieldAccess(allocator, parse_allocator, tokens);
+fn parsePrimary(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
+    const first_token = try tokens.peek();
+    switch (first_token) {
+        .keyword_null => {
+            _ = try tokens.next();
+            const null_value = try allocator.create(jv.Value);
+            null_value.* = .null;
+            const null_node = try parse_allocator.create(Ast);
+            null_node.* = .{ .literal = null_value };
+            return null_node;
+        },
+        .keyword_true => {
+            _ = try tokens.next();
+            const true_value = try allocator.create(jv.Value);
+            true_value.* = .{ .bool = true };
+            const true_node = try parse_allocator.create(Ast);
+            true_node.* = .{ .literal = true_value };
+            return true_node;
+        },
+        .keyword_false => {
+            _ = try tokens.next();
+            const false_value = try allocator.create(jv.Value);
+            false_value.* = .{ .bool = false };
+            const false_node = try parse_allocator.create(Ast);
+            false_node.* = .{ .literal = false_value };
+            return false_node;
         },
-        .bracket_left => {
-            return parseIndexAccess(allocator, parse_allocator, tokens);
+        .number => |f| {
+            _ = try tokens.next();
+            const number_value = try allocator.create(jv.Value);
+            const i: i64 = @intFromFloat(f);
+            if (@as(f64, @floatFromInt(i)) == f) {
+                number_value.* = .{ .integer = i };
+            } else {
+                number_value.* = .{ .float = f };
+            }
+            const number_node = try parse_allocator.create(Ast);
+            number_node.* = .{ .literal = number_value };
+            return number_node;
         },
-        .end,
-        .pipe,
-        .comma,
-        .slash_slash,
-        .equal,
-        .pipe_equal,
-        .slash_slash_equal,
-        .plus_equal,
-        .minus_equal,
-        .asterisk_equal,
-        .slash_equal,
-        .percent_equal,
-        .keyword_or,
-        .keyword_and,
-        .equal_equal,
-        .not_equal,
-        .less_than,
-        .greater_than,
-        .less_than_equal,
-        .greater_than_equal,
-        .plus,
-        .minus,
-        .asterisk,
-        .slash,
-        .percent,
-        => {
+        .dot => {
+            _ = try tokens.next();
             const ast = try parse_allocator.create(Ast);
             ast.* = .identity;
             return ast;
         },
+        .field => |name| {
+            _ = try tokens.next();
+            const ast = try parse_allocator.create(Ast);
+            ast.* = .{ .object_key = try allocator.dupe(u8, name) };
+            return ast;
+        },
         else => return error.InvalidQuery,
     }
 }
 
-fn parseFieldAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
-    const token = try tokens.expect(.identifier);
-    const ast = try parse_allocator.create(Ast);
-    ast.* = .{ .object_key = try allocator.dupe(u8, token.identifier) };
-    return ast;
-}
-
-fn parseIndexAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
+fn parseSuffix(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, base: *Ast) !*Ast {
     _ = try tokens.expect(.bracket_left);
     const index_token = try tokens.expect(.number);
     _ = try tokens.expect(.bracket_right);
@@ -403,7 +377,8 @@ fn parseIndexAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Alloc
     index_value.* = .{ .integer = @intFromFloat(index_token.number) };
     const index_node = try parse_allocator.create(Ast);
     index_node.* = .{ .literal = index_value };
+
     const ast = try parse_allocator.create(Ast);
-    ast.* = .{ .array_index = index_node };
+    ast.* = .{ .array_index = .{ .base = base, .index = index_node } };
     return ast;
 }
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig
index 60643de..f5e1a70 100644
--- a/src/jq/tokenize.zig
+++ b/src/jq/tokenize.zig
@@ -74,6 +74,7 @@ pub const TokenKind = enum {
     number,
     string,
     format,
+    field,
 };
 
 pub const Token = union(TokenKind) {
@@ -141,6 +142,7 @@ pub const Token = union(TokenKind) {
     number: f64,
     string: []const u8,
     format: []const u8,
+    field: []const u8,
 
     pub fn kind(self: @This()) TokenKind {
         return self;
@@ -242,6 +244,22 @@ fn tokenizeIdentifier(allocator: std.mem.Allocator, reader: *std.Io.Reader, firs
     return buffer.toOwnedSlice(allocator);
 }
 
+fn tokenizeField(allocator: std.mem.Allocator, reader: *std.Io.Reader, first: u8) ![]const u8 {
+    var buffer = try std.ArrayList(u8).initCapacity(allocator, 16);
+    try buffer.append(allocator, first);
+
+    while (try peekByte(reader)) |c| {
+        if (isIdentifierContinue(c)) {
+            try buffer.append(allocator, c);
+            reader.toss(1);
+        } else {
+            break;
+        }
+    }
+
+    return buffer.toOwnedSlice(allocator);
+}
+
 fn tokenizeNumber(allocator: std.mem.Allocator, reader: *std.Io.Reader, first: u8) !f64 {
     var buffer = try std.ArrayList(u8).initCapacity(allocator, 16);
     try buffer.append(allocator, first);
@@ -484,7 +502,18 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
             '+' => if (try takeByteIf(reader, '=')) .plus_equal else .plus,
             ',' => .comma,
             '-' => if (try takeByteIf(reader, '=')) .minus_equal else .minus,
-            '.' => if (try takeByteIf(reader, '.')) .dot_dot else .dot,
+            '.' => blk: {
+                if (try takeByteIf(reader, '.')) {
+                    break :blk .dot_dot;
+                }
+                if (try peekByte(reader)) |next| {
+                    if (isIdentifierStart(next)) {
+                        reader.toss(1);
+                        break :blk Token{ .field = try tokenizeField(allocator, reader, next) };
+                    }
+                }
+                break :blk .dot;
+            },
             '/' => if (try takeByteIf(reader, '/'))
                 if (try takeByteIf(reader, '=')) .slash_slash_equal else .slash_slash
             else if (try takeByteIf(reader, '='))
@@ -679,12 +708,11 @@ test "tokenize identifier in complex query" {
     var reader = std.Io.Reader.fixed(".foo | bar::baz");
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar::baz", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar::baz", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize keywords" {
@@ -751,12 +779,11 @@ test "tokenize with comments" {
     );
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize comment at end of input" {
@@ -766,10 +793,9 @@ test "tokenize comment at end of input" {
     var reader = std.Io.Reader.fixed(".foo # comment without newline");
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(3, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.end, tokens[2]);
+    try std.testing.expectEqual(2, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.end, tokens[1]);
 }
 
 test "tokenize comment with line continuation" {
@@ -783,12 +809,11 @@ test "tokenize comment with line continuation" {
     );
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize comment with escaped backslash before newline" {
@@ -802,12 +827,11 @@ test "tokenize comment with escaped backslash before newline" {
     );
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize comment with three backslashes before newline" {
@@ -822,12 +846,11 @@ test "tokenize comment with three backslashes before newline" {
     );
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize comment with CRLF" {
@@ -837,12 +860,11 @@ test "tokenize comment with CRLF" {
     var reader = std.Io.Reader.fixed(".foo # comment\r\n| bar");
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize comment with line continuation before CRLF" {
@@ -852,12 +874,11 @@ test "tokenize comment with line continuation before CRLF" {
     var reader = std.Io.Reader.fixed(".foo # comment \\\r\nthis is also comment\r\n| bar");
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize comment with single CR does not end comment" {
@@ -867,12 +888,11 @@ test "tokenize comment with single CR does not end comment" {
     var reader = std.Io.Reader.fixed(".foo # comment\r| bar\n| baz");
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("baz", tokens[3].identifier);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("baz", tokens[2].identifier);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize floating point numbers" {
@@ -986,12 +1006,11 @@ test "tokenize format in expression" {
     var reader = std.Io.Reader.fixed(".foo | @base64");
     const tokens = try tokenize(allocator.allocator(), &reader);
 
-    try std.testing.expectEqual(5, tokens.len);
-    try std.testing.expectEqual(.dot, tokens[0]);
-    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
-    try std.testing.expectEqual(.pipe, tokens[2]);
-    try std.testing.expectEqualStrings("base64", tokens[3].format);
-    try std.testing.expectEqual(.end, tokens[4]);
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.pipe, tokens[1]);
+    try std.testing.expectEqualStrings("base64", tokens[2].format);
+    try std.testing.expectEqual(.end, tokens[3]);
 }
 
 test "tokenize format invalid" {
@@ -1150,3 +1169,59 @@ test "tokenize lone low surrogate" {
 
     try std.testing.expectError(error.InvalidUnicodeEscape, result);
 }
+
+test "tokenize field" {
+    var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer allocator.deinit();
+
+    var reader = std.Io.Reader.fixed(".foo");
+    const tokens = try tokenize(allocator.allocator(), &reader);
+
+    try std.testing.expectEqual(2, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.end, tokens[1]);
+}
+
+test "tokenize chained fields" {
+    var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer allocator.deinit();
+
+    var reader = std.Io.Reader.fixed(".foo.bar.baz");
+    const tokens = try tokenize(allocator.allocator(), &reader);
+
+    try std.testing.expectEqual(4, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqualStrings("bar", tokens[1].field);
+    try std.testing.expectEqualStrings("baz", tokens[2].field);
+    try std.testing.expectEqual(.end, tokens[3]);
+}
+
+test "tokenize field does not support namespace" {
+    var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer allocator.deinit();
+
+    // Unlike identifiers, field access does not support namespace syntax
+    var reader = std.Io.Reader.fixed(".foo::bar");
+    const tokens = try tokenize(allocator.allocator(), &reader);
+
+    try std.testing.expectEqual(5, tokens.len);
+    try std.testing.expectEqualStrings("foo", tokens[0].field);
+    try std.testing.expectEqual(.colon, tokens[1]);
+    try std.testing.expectEqual(.colon, tokens[2]);
+    try std.testing.expectEqualStrings("bar", tokens[3].identifier);
+    try std.testing.expectEqual(.end, tokens[4]);
+}
+
+test "tokenize dot with space before identifier" {
+    var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+    defer allocator.deinit();
+
+    // ". foo" should be [dot, identifier("foo")], not [field("foo")]
+    var reader = std.Io.Reader.fixed(". foo");
+    const tokens = try tokenize(allocator.allocator(), &reader);
+
+    try std.testing.expectEqual(3, tokens.len);
+    try std.testing.expectEqual(.dot, tokens[0]);
+    try std.testing.expectEqualStrings("foo", tokens[1].identifier);
+    try std.testing.expectEqual(.end, tokens[2]);
+}
-- 
cgit v1.3.1