aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-01-25 18:09:51 +0900
committernsfisis <nsfisis@gmail.com>2026-01-25 18:09:51 +0900
commit48d9ec8aef4c3e7f3574346a6cf6a1fa3d725561 (patch)
tree37ab7baf3d4008497c72dc57a60c7c77b5153fd4
parent20ff09460371b07d7e9683757657a5a3ead005a8 (diff)
downloadzgjq-48d9ec8aef4c3e7f3574346a6cf6a1fa3d725561.tar.gz
zgjq-48d9ec8aef4c3e7f3574346a6cf6a1fa3d725561.tar.zst
zgjq-48d9ec8aef4c3e7f3574346a6cf6a1fa3d725561.zip
refactor term parsing
-rw-r--r--docs/jq_grammar.md81
-rw-r--r--src/jq/compile.zig7
-rw-r--r--src/jq/parse.zig141
-rw-r--r--src/jq/tokenize.zig193
4 files changed, 277 insertions, 145 deletions
diff --git a/docs/jq_grammar.md b/docs/jq_grammar.md
index 339fac8..5405e95 100644
--- a/docs/jq_grammar.md
+++ b/docs/jq_grammar.md
@@ -21,7 +21,86 @@
* `# ...`: Additional constraints
-## Grammar
+## Implemented Grammar
+
+```
+program:
+ body
+
+body:
+ query
+
+query:
+ query2
+
+query2:
+ query3 '|' query3
+ query3
+
+query3:
+ expr ',' expr
+ expr
+
+expr:
+ expr2 '//' expr2
+ expr2
+
+expr2:
+ expr3 '=' expr3
+ expr3 '|=' expr3
+ expr3 '//=' expr3
+ expr3 '+=' expr3
+ expr3 '-=' expr3
+ expr3 '*=' expr3
+ expr3 '/=' expr3
+ expr3 '%=' expr3
+ expr3
+
+expr3:
+ expr4 'or' expr4
+ expr4
+
+expr4:
+ expr5 'and' expr5
+ expr5
+
+expr5:
+ expr6 '==' expr6
+ expr6 '!=' expr6
+ expr6 '<' expr6
+ expr6 '>' expr6
+ expr6 '<=' expr6
+ expr6 '>=' expr6
+ expr6
+
+expr6:
+ expr7 '+' expr7
+ expr7 '-' expr7
+ expr7
+
+expr7:
+ term '*' term
+ term '/' term
+ term '%' term
+ term
+
+term:
+ primary { suffix }*
+
+suffix:
+ '[' query ']'
+
+primary:
+ 'null'
+ 'true'
+ 'false'
+ NUMBER
+ '.'
+ FIELD
+```
+
+
+## Complete Grammar
```
program:
diff --git a/src/jq/compile.zig b/src/jq/compile.zig
index ec6ef63..fb2a691 100644
--- a/src/jq/compile.zig
+++ b/src/jq/compile.zig
@@ -68,9 +68,12 @@ fn compileExpr(allocator: std.mem.Allocator, compile_allocator: std.mem.Allocato
switch (ast.*) {
.identity => try instrs.append(allocator, .nop),
- .array_index => |index| {
- const index_instrs = try compileExpr(allocator, compile_allocator, index);
+ .array_index => |arr_idx| {
+ const base_instrs = try compileExpr(allocator, compile_allocator, arr_idx.base);
+ defer allocator.free(base_instrs);
+ const index_instrs = try compileExpr(allocator, compile_allocator, arr_idx.index);
defer allocator.free(index_instrs);
+ try instrs.appendSlice(allocator, base_instrs);
try instrs.append(allocator, .subexp_begin);
try instrs.appendSlice(allocator, index_instrs);
try instrs.append(allocator, .subexp_end);
diff --git a/src/jq/parse.zig b/src/jq/parse.zig
index 6a76861..5df1d14 100644
--- a/src/jq/parse.zig
+++ b/src/jq/parse.zig
@@ -45,7 +45,7 @@ pub const BinaryOp = enum {
pub const Ast = union(AstKind) {
identity,
- array_index: *Ast,
+ array_index: struct { base: *Ast, index: *Ast },
object_key: []const u8,
literal: *jv.Value,
binary_expr: struct { op: BinaryOp, lhs: *Ast, rhs: *Ast },
@@ -300,101 +300,75 @@ fn parseExpr7(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator,
}
fn parseTerm(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
- const first_token = try tokens.peek();
- if (first_token.kind() == .number) {
- _ = try tokens.next();
- const number_value = try allocator.create(jv.Value);
- const f = first_token.number;
- const i: i64 = @intFromFloat(f);
- if (@as(f64, @floatFromInt(i)) == f) {
- number_value.* = .{ .integer = i };
+ var result = try parsePrimary(allocator, parse_allocator, tokens);
+ while (true) {
+ const token = tokens.peek() catch return result;
+ if (token.kind() == .bracket_left) {
+ result = try parseSuffix(allocator, parse_allocator, tokens, result);
} else {
- number_value.* = .{ .float = f };
+ break;
}
- const number_node = try parse_allocator.create(Ast);
- number_node.* = .{ .literal = number_value };
- return number_node;
- }
-
- if (first_token.kind() == .keyword_null) {
- _ = try tokens.next();
- const null_value = try allocator.create(jv.Value);
- null_value.* = .null;
- const null_node = try parse_allocator.create(Ast);
- null_node.* = .{ .literal = null_value };
- return null_node;
- }
-
- if (first_token.kind() == .keyword_true) {
- _ = try tokens.next();
- const true_value = try allocator.create(jv.Value);
- true_value.* = .{ .bool = true };
- const true_node = try parse_allocator.create(Ast);
- true_node.* = .{ .literal = true_value };
- return true_node;
- }
-
- if (first_token.kind() == .keyword_false) {
- _ = try tokens.next();
- const false_value = try allocator.create(jv.Value);
- false_value.* = .{ .bool = false };
- const false_node = try parse_allocator.create(Ast);
- false_node.* = .{ .literal = false_value };
- return false_node;
}
+ return result;
+}
- _ = try tokens.expect(.dot);
-
- const next_token = try tokens.peek();
- switch (next_token.kind()) {
- .identifier => {
- return parseFieldAccess(allocator, parse_allocator, tokens);
+fn parsePrimary(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
+ const first_token = try tokens.peek();
+ switch (first_token) {
+ .keyword_null => {
+ _ = try tokens.next();
+ const null_value = try allocator.create(jv.Value);
+ null_value.* = .null;
+ const null_node = try parse_allocator.create(Ast);
+ null_node.* = .{ .literal = null_value };
+ return null_node;
+ },
+ .keyword_true => {
+ _ = try tokens.next();
+ const true_value = try allocator.create(jv.Value);
+ true_value.* = .{ .bool = true };
+ const true_node = try parse_allocator.create(Ast);
+ true_node.* = .{ .literal = true_value };
+ return true_node;
+ },
+ .keyword_false => {
+ _ = try tokens.next();
+ const false_value = try allocator.create(jv.Value);
+ false_value.* = .{ .bool = false };
+ const false_node = try parse_allocator.create(Ast);
+ false_node.* = .{ .literal = false_value };
+ return false_node;
},
- .bracket_left => {
- return parseIndexAccess(allocator, parse_allocator, tokens);
+ .number => |f| {
+ _ = try tokens.next();
+ const number_value = try allocator.create(jv.Value);
+ const i: i64 = @intFromFloat(f);
+ if (@as(f64, @floatFromInt(i)) == f) {
+ number_value.* = .{ .integer = i };
+ } else {
+ number_value.* = .{ .float = f };
+ }
+ const number_node = try parse_allocator.create(Ast);
+ number_node.* = .{ .literal = number_value };
+ return number_node;
},
- .end,
- .pipe,
- .comma,
- .slash_slash,
- .equal,
- .pipe_equal,
- .slash_slash_equal,
- .plus_equal,
- .minus_equal,
- .asterisk_equal,
- .slash_equal,
- .percent_equal,
- .keyword_or,
- .keyword_and,
- .equal_equal,
- .not_equal,
- .less_than,
- .greater_than,
- .less_than_equal,
- .greater_than_equal,
- .plus,
- .minus,
- .asterisk,
- .slash,
- .percent,
- => {
+ .dot => {
+ _ = try tokens.next();
const ast = try parse_allocator.create(Ast);
ast.* = .identity;
return ast;
},
+ .field => |name| {
+ _ = try tokens.next();
+ const ast = try parse_allocator.create(Ast);
+ ast.* = .{ .object_key = try allocator.dupe(u8, name) };
+ return ast;
+ },
else => return error.InvalidQuery,
}
}
-fn parseFieldAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
- const token = try tokens.expect(.identifier);
- const ast = try parse_allocator.create(Ast);
- ast.* = .{ .object_key = try allocator.dupe(u8, token.identifier) };
- return ast;
-}
-
-fn parseIndexAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream) !*Ast {
+fn parseSuffix(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, base: *Ast) !*Ast {
_ = try tokens.expect(.bracket_left);
const index_token = try tokens.expect(.number);
_ = try tokens.expect(.bracket_right);
@@ -403,7 +377,8 @@ fn parseIndexAccess(allocator: std.mem.Allocator, parse_allocator: std.mem.Alloc
index_value.* = .{ .integer = @intFromFloat(index_token.number) };
const index_node = try parse_allocator.create(Ast);
index_node.* = .{ .literal = index_value };
+
const ast = try parse_allocator.create(Ast);
- ast.* = .{ .array_index = index_node };
+ ast.* = .{ .array_index = .{ .base = base, .index = index_node } };
return ast;
}
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig
index 60643de..f5e1a70 100644
--- a/src/jq/tokenize.zig
+++ b/src/jq/tokenize.zig
@@ -74,6 +74,7 @@ pub const TokenKind = enum {
number,
string,
format,
+ field,
};
pub const Token = union(TokenKind) {
@@ -141,6 +142,7 @@ pub const Token = union(TokenKind) {
number: f64,
string: []const u8,
format: []const u8,
+ field: []const u8,
pub fn kind(self: @This()) TokenKind {
return self;
@@ -242,6 +244,22 @@ fn tokenizeIdentifier(allocator: std.mem.Allocator, reader: *std.Io.Reader, firs
return buffer.toOwnedSlice(allocator);
}
+fn tokenizeField(allocator: std.mem.Allocator, reader: *std.Io.Reader, first: u8) ![]const u8 {
+ var buffer = try std.ArrayList(u8).initCapacity(allocator, 16);
+ try buffer.append(allocator, first);
+
+ while (try peekByte(reader)) |c| {
+ if (isIdentifierContinue(c)) {
+ try buffer.append(allocator, c);
+ reader.toss(1);
+ } else {
+ break;
+ }
+ }
+
+ return buffer.toOwnedSlice(allocator);
+}
+
fn tokenizeNumber(allocator: std.mem.Allocator, reader: *std.Io.Reader, first: u8) !f64 {
var buffer = try std.ArrayList(u8).initCapacity(allocator, 16);
try buffer.append(allocator, first);
@@ -484,7 +502,18 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
'+' => if (try takeByteIf(reader, '=')) .plus_equal else .plus,
',' => .comma,
'-' => if (try takeByteIf(reader, '=')) .minus_equal else .minus,
- '.' => if (try takeByteIf(reader, '.')) .dot_dot else .dot,
+ '.' => blk: {
+ if (try takeByteIf(reader, '.')) {
+ break :blk .dot_dot;
+ }
+ if (try peekByte(reader)) |next| {
+ if (isIdentifierStart(next)) {
+ reader.toss(1);
+ break :blk Token{ .field = try tokenizeField(allocator, reader, next) };
+ }
+ }
+ break :blk .dot;
+ },
'/' => if (try takeByteIf(reader, '/'))
if (try takeByteIf(reader, '=')) .slash_slash_equal else .slash_slash
else if (try takeByteIf(reader, '='))
@@ -679,12 +708,11 @@ test "tokenize identifier in complex query" {
var reader = std.Io.Reader.fixed(".foo | bar::baz");
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar::baz", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar::baz", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize keywords" {
@@ -751,12 +779,11 @@ test "tokenize with comments" {
);
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize comment at end of input" {
@@ -766,10 +793,9 @@ test "tokenize comment at end of input" {
var reader = std.Io.Reader.fixed(".foo # comment without newline");
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(3, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.end, tokens[2]);
+ try std.testing.expectEqual(2, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.end, tokens[1]);
}
test "tokenize comment with line continuation" {
@@ -783,12 +809,11 @@ test "tokenize comment with line continuation" {
);
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize comment with escaped backslash before newline" {
@@ -802,12 +827,11 @@ test "tokenize comment with escaped backslash before newline" {
);
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize comment with three backslashes before newline" {
@@ -822,12 +846,11 @@ test "tokenize comment with three backslashes before newline" {
);
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize comment with CRLF" {
@@ -837,12 +860,11 @@ test "tokenize comment with CRLF" {
var reader = std.Io.Reader.fixed(".foo # comment\r\n| bar");
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize comment with line continuation before CRLF" {
@@ -852,12 +874,11 @@ test "tokenize comment with line continuation before CRLF" {
var reader = std.Io.Reader.fixed(".foo # comment \\\r\nthis is also comment\r\n| bar");
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("bar", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize comment with single CR does not end comment" {
@@ -867,12 +888,11 @@ test "tokenize comment with single CR does not end comment" {
var reader = std.Io.Reader.fixed(".foo # comment\r| bar\n| baz");
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("baz", tokens[3].identifier);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("baz", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize floating point numbers" {
@@ -986,12 +1006,11 @@ test "tokenize format in expression" {
var reader = std.Io.Reader.fixed(".foo | @base64");
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(5, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqualStrings("foo", tokens[1].identifier);
- try std.testing.expectEqual(.pipe, tokens[2]);
- try std.testing.expectEqualStrings("base64", tokens[3].format);
- try std.testing.expectEqual(.end, tokens[4]);
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.pipe, tokens[1]);
+ try std.testing.expectEqualStrings("base64", tokens[2].format);
+ try std.testing.expectEqual(.end, tokens[3]);
}
test "tokenize format invalid" {
@@ -1150,3 +1169,59 @@ test "tokenize lone low surrogate" {
try std.testing.expectError(error.InvalidUnicodeEscape, result);
}
+
+test "tokenize field" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed(".foo");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(2, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.end, tokens[1]);
+}
+
+test "tokenize chained fields" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed(".foo.bar.baz");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqualStrings("bar", tokens[1].field);
+ try std.testing.expectEqualStrings("baz", tokens[2].field);
+ try std.testing.expectEqual(.end, tokens[3]);
+}
+
+test "tokenize field does not support namespace" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ // Unlike identifiers, field access does not support namespace syntax
+ var reader = std.Io.Reader.fixed(".foo::bar");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(5, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].field);
+ try std.testing.expectEqual(.colon, tokens[1]);
+ try std.testing.expectEqual(.colon, tokens[2]);
+ try std.testing.expectEqualStrings("bar", tokens[3].identifier);
+ try std.testing.expectEqual(.end, tokens[4]);
+}
+
+test "tokenize dot with space before identifier" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ // ". foo" should be [dot, identifier("foo")], not [field("foo")]
+ var reader = std.Io.Reader.fixed(". foo");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(3, tokens.len);
+ try std.testing.expectEqual(.dot, tokens[0]);
+ try std.testing.expectEqualStrings("foo", tokens[1].identifier);
+ try std.testing.expectEqual(.end, tokens[2]);
+}