diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-01-17 20:43:49 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-01-17 20:43:49 +0900 |
| commit | 0295edea4c2d5e6f2e0a6761483cc9e2c64544f8 (patch) | |
| tree | 1f37433ef6a826ed5d34455be9832fb71f742c8a | |
| parent | 419a606657f4186ab243ed22d35ae173fa6ebaa6 (diff) | |
| download | zgjq-0295edea4c2d5e6f2e0a6761483cc9e2c64544f8.tar.gz zgjq-0295edea4c2d5e6f2e0a6761483cc9e2c64544f8.tar.zst zgjq-0295edea4c2d5e6f2e0a6761483cc9e2c64544f8.zip | |
implement comment
| -rw-r--r-- | src/jq/tokenize.zig | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig index 2f002da..8c70665 100644 --- a/src/jq/tokenize.zig +++ b/src/jq/tokenize.zig @@ -155,6 +155,43 @@ fn takeByteIf(reader: *std.Io.Reader, expected: u8) error{ReadFailed}!bool { } } +fn skipComment(reader: *std.Io.Reader) error{ReadFailed}!void { + var is_last_character_unescaped_backslash = false; + + while (true) { + const c = reader.takeByte() catch |err| switch (err) { + error.EndOfStream => return, + error.ReadFailed => return error.ReadFailed, + }; + + if (c == '\n') { + if (is_last_character_unescaped_backslash) { + is_last_character_unescaped_backslash = false; + continue; // comment line continuation + } else { + return; + } + } else if (c == '\r') { + // Check CRLF. + if (try peekByte(reader) == '\n') { + reader.toss(1); + if (is_last_character_unescaped_backslash) { + is_last_character_unescaped_backslash = false; + continue; // comment line continuation + } else { + return; + } + } + is_last_character_unescaped_backslash = false; + // NOTE: single CR is not treated as line break, as the original jq does. + } else if (c == '\\') { + is_last_character_unescaped_backslash = !is_last_character_unescaped_backslash; + } else { + is_last_character_unescaped_backslash = false; + } + } +} + fn isIdentifierStart(c: u8) bool { return std.ascii.isAlphabetic(c) or c == '_'; } @@ -255,6 +292,10 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token { }; const token: Token = switch (c) { ' ', '\t', '\n', '\r' => continue, + '#' => { + try skipComment(reader); + continue; + }, '$' => .dollar, '%' => if (try takeByteIf(reader, '=')) .percent_equal else .percent, '(' => .paren_left, @@ -518,3 +559,137 @@ test "tokenize keyword-like identifiers" { try std.testing.expectEqualStrings("for", tokens[2].identifier); try std.testing.expectEqual(.end, tokens[3]); } + +test "tokenize with comments" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + var reader = std.Io.Reader.fixed( + \\.foo # this is a comment + \\| bar + ); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("bar", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} + +test "tokenize comment at end of input" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + var reader = std.Io.Reader.fixed(".foo # comment without newline"); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(3, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.end, tokens[2]); +} + +test "tokenize comment with line continuation" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + var reader = std.Io.Reader.fixed( + \\.foo # comment \ + \\this is also comment + \\| bar + ); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("bar", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} + +test "tokenize comment with escaped backslash before newline" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + // Two backslashes (even) before newline: comment ends + var reader = std.Io.Reader.fixed( + \\.foo # comment \\ + \\| bar + ); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("bar", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} + +test "tokenize comment with three backslashes before newline" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + // Three backslashes (odd) before newline: comment continues + var reader = std.Io.Reader.fixed( + \\.foo # comment \\\ + \\this is also comment + \\| bar + ); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("bar", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} + +test "tokenize comment with CRLF" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + var reader = std.Io.Reader.fixed(".foo # comment\r\n| bar"); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("bar", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} + +test "tokenize comment with line continuation before CRLF" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + var reader = std.Io.Reader.fixed(".foo # comment \\\r\nthis is also comment\r\n| bar"); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("bar", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} + +test "tokenize comment with single CR does not end comment" { + var allocator = std.heap.ArenaAllocator.init(std.testing.allocator); + defer allocator.deinit(); + + var reader = std.Io.Reader.fixed(".foo # comment\r| bar\n| baz"); + const tokens = try tokenize(allocator.allocator(), &reader); + + try std.testing.expectEqual(5, tokens.len); + try std.testing.expectEqual(.dot, tokens[0]); + try std.testing.expectEqualStrings("foo", tokens[1].identifier); + try std.testing.expectEqual(.pipe, tokens[2]); + try std.testing.expectEqualStrings("baz", tokens[3].identifier); + try std.testing.expectEqual(.end, tokens[4]); +} |
