aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jq
diff options
context:
space:
mode:
authornsfisis <nsfisis@gmail.com>2026-01-17 16:16:34 +0900
committernsfisis <nsfisis@gmail.com>2026-01-17 16:16:34 +0900
commit815661c081625ca513209aa80dc19846de2913e2 (patch)
tree910577462ed3389c7f5adff1052240996214ce69 /src/jq
parent6e81a4c7a56ea30cca61b7cc32697cc90d6a1c87 (diff)
downloadzgjq-815661c081625ca513209aa80dc19846de2913e2.tar.gz
zgjq-815661c081625ca513209aa80dc19846de2913e2.tar.zst
zgjq-815661c081625ca513209aa80dc19846de2913e2.zip
implement identifier tokenization
Diffstat (limited to 'src/jq')
-rw-r--r--src/jq/tokenize.zig103
1 files changed, 103 insertions, 0 deletions
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig
index a178b50..8be92df 100644
--- a/src/jq/tokenize.zig
+++ b/src/jq/tokenize.zig
@@ -44,6 +44,7 @@ pub const TokenKind = enum {
slash_slash,
slash_slash_equal,
+ identifier,
number,
};
@@ -86,6 +87,7 @@ pub const Token = union(TokenKind) {
slash_slash,
slash_slash_equal,
+ identifier: []const u8,
number: i64,
pub fn kind(self: @This()) TokenKind {
@@ -100,6 +102,48 @@ fn peekByte(reader: *std.Io.Reader) error{ReadFailed}!?u8 {
};
}
+fn isIdentifierStart(c: u8) bool {
+ return std.ascii.isAlphabetic(c) or c == '_';
+}
+
+fn isIdentifierContinue(c: u8) bool {
+ return std.ascii.isAlphanumeric(c) or c == '_';
+}
+
+fn tokenizeIdentifier(allocator: std.mem.Allocator, reader: *std.Io.Reader, first: u8) ![]const u8 {
+ var buffer = try std.array_list.Aligned(u8, null).initCapacity(allocator, 16);
+ try buffer.append(allocator, first);
+
+ while (true) {
+ // Read an identifier.
+ while (try peekByte(reader)) |c| {
+ if (isIdentifierContinue(c)) {
+ try buffer.append(allocator, c);
+ _ = reader.takeByte() catch unreachable;
+ } else {
+ break;
+ }
+ }
+
+ // Check namespaced identifier (e.g., "foo::bar").
+ const lookahead = reader.peek(3) catch |err| switch (err) {
+ error.EndOfStream => break,
+ error.ReadFailed => return error.ReadFailed,
+ };
+ if (lookahead[0] == ':' and lookahead[1] == ':' and isIdentifierStart(lookahead[2])) {
+ try buffer.append(allocator, ':');
+ try buffer.append(allocator, ':');
+ try buffer.append(allocator, lookahead[2]);
+ reader.toss(3);
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ return buffer.toOwnedSlice(allocator);
+}
+
pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
var tokens = try std.array_list.Aligned(Token, null).initCapacity(allocator, 16);
@@ -194,6 +238,9 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
else => {
if (std.ascii.isDigit(c)) {
try tokens.append(allocator, .{ .number = (c - '0') });
+ } else if (isIdentifierStart(c)) {
+ const ident = try tokenizeIdentifier(allocator, reader, c);
+ try tokens.append(allocator, .{ .identifier = ident });
} else {
return error.InvalidCharacter;
}
@@ -309,3 +356,59 @@ test "tokenize invalid character returns error" {
try std.testing.expectError(error.InvalidCharacter, result);
}
+
+test "tokenize identifiers" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("foo _foo foo2");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].identifier);
+ try std.testing.expectEqualStrings("_foo", tokens[1].identifier);
+ try std.testing.expectEqualStrings("foo2", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
+}
+
+test "tokenize namespaced identifier" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("foo::bar foo::bar::baz");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(3, tokens.len);
+ try std.testing.expectEqualStrings("foo::bar", tokens[0].identifier);
+ try std.testing.expectEqualStrings("foo::bar::baz", tokens[1].identifier);
+ try std.testing.expectEqual(.end, tokens[2]);
+}
+
+test "tokenize identifier followed by colon" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed("foo:bar");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(4, tokens.len);
+ try std.testing.expectEqualStrings("foo", tokens[0].identifier);
+ try std.testing.expectEqual(.colon, tokens[1]);
+ try std.testing.expectEqualStrings("bar", tokens[2].identifier);
+ try std.testing.expectEqual(.end, tokens[3]);
+}
+
+test "tokenize identifier in complex query" {
+ var allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
+ defer allocator.deinit();
+
+ var reader = std.Io.Reader.fixed(".foo | bar::baz");
+ const tokens = try tokenize(allocator.allocator(), &reader);
+
+ try std.testing.expectEqual(5, tokens.len);
+ try std.testing.expectEqual(.dot, tokens[0]);
+ try std.testing.expectEqualStrings("foo", tokens[1].identifier);
+ try std.testing.expectEqual(.pipe, tokens[2]);
+ try std.testing.expectEqualStrings("bar::baz", tokens[3].identifier);
+ try std.testing.expectEqual(.end, tokens[4]);
+}