aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/jq/tokenize.zig210
1 files changed, 201 insertions, 9 deletions
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig
index 39cae29..a178b50 100644
--- a/src/jq/tokenize.zig
+++ b/src/jq/tokenize.zig
@@ -7,17 +7,85 @@ pub const TokenizeError = error{
pub const TokenKind = enum {
end,
- dot,
+
+ asterisk,
+ asterisk_equal,
+ brace_left,
+ brace_right,
bracket_left,
bracket_right,
+ colon,
+ comma,
+ dollar,
+ dot,
+ dot_dot,
+ equal,
+ equal_equal,
+ greater_than,
+ greater_than_equal,
+ less_than,
+ less_than_equal,
+ minus,
+ minus_equal,
+ not_equal,
+ paren_left,
+ paren_right,
+ percent,
+ percent_equal,
+ pipe,
+ pipe_equal,
+ plus,
+ plus_equal,
+ question,
+ question_slash_slash,
+ semicolon,
+ slash,
+ slash_equal,
+ slash_slash,
+ slash_slash_equal,
+
number,
};
pub const Token = union(TokenKind) {
end,
- dot,
+
+ asterisk,
+ asterisk_equal,
+ brace_left,
+ brace_right,
bracket_left,
bracket_right,
+ colon,
+ comma,
+ dollar,
+ dot,
+ dot_dot,
+ equal,
+ equal_equal,
+ greater_than,
+ greater_than_equal,
+ less_than,
+ less_than_equal,
+ minus,
+ minus_equal,
+ not_equal,
+ paren_left,
+ paren_right,
+ percent,
+ percent_equal,
+ pipe,
+ pipe_equal,
+ plus,
+ plus_equal,
+ question,
+ question_slash_slash,
+ semicolon,
+ slash,
+ slash_equal,
+ slash_slash,
+ slash_slash_equal,
+
number: i64,
pub fn kind(self: @This()) TokenKind {
@@ -25,6 +93,13 @@ pub const Token = union(TokenKind) {
}
};
+fn peekByte(reader: *std.Io.Reader) error{ReadFailed}!?u8 {
+ return reader.peekByte() catch |err| switch (err) {
+ error.EndOfStream => null,
+ error.ReadFailed => error.ReadFailed,
+ };
+}
+
pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
var tokens = try std.array_list.Aligned(Token, null).initCapacity(allocator, 16);
@@ -34,9 +109,88 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token {
error.ReadFailed => return error.ReadFailed,
};
switch (c) {
- '.' => try tokens.append(allocator, .dot),
+ ' ', '\t', '\n', '\r' => continue,
+ '$' => try tokens.append(allocator, .dollar),
+ '%' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .percent_equal;
+ } else .percent),
+ '(' => try tokens.append(allocator, .paren_left),
+ ')' => try tokens.append(allocator, .paren_right),
+ '*' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .asterisk_equal;
+ } else .asterisk),
+ '+' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .plus_equal;
+ } else .plus),
+ ',' => try tokens.append(allocator, .comma),
+ '-' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .minus_equal;
+ } else .minus),
+ '.' => try tokens.append(allocator, if (try peekByte(reader) == '.') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .dot_dot;
+ } else .dot),
+ '/' => {
+ if (try peekByte(reader) == '/') {
+ _ = reader.takeByte() catch unreachable;
+ try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .slash_slash_equal;
+ } else .slash_slash);
+ } else if (try peekByte(reader) == '=') {
+ _ = reader.takeByte() catch unreachable;
+ try tokens.append(allocator, .slash_equal);
+ } else {
+ try tokens.append(allocator, .slash);
+ }
+ },
+ ':' => try tokens.append(allocator, .colon),
+ ';' => try tokens.append(allocator, .semicolon),
+ '<' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .less_than_equal;
+ } else .less_than),
+ '=' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .equal_equal;
+ } else .equal),
+ '>' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .greater_than_equal;
+ } else .greater_than),
+ '!' => {
+ if (try peekByte(reader) == '=') {
+ _ = reader.takeByte() catch unreachable;
+ try tokens.append(allocator, .not_equal);
+ } else {
+ return error.InvalidCharacter;
+ }
+ },
+ '?' => {
+ if (try peekByte(reader) == '/') {
+ _ = reader.takeByte() catch unreachable;
+ if (try peekByte(reader) == '/') {
+ _ = reader.takeByte() catch unreachable;
+ try tokens.append(allocator, .question_slash_slash);
+ } else {
+ return error.InvalidCharacter;
+ }
+ } else {
+ try tokens.append(allocator, .question);
+ }
+ },
'[' => try tokens.append(allocator, .bracket_left),
']' => try tokens.append(allocator, .bracket_right),
+ '{' => try tokens.append(allocator, .brace_left),
+ '|' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: {
+ _ = reader.takeByte() catch unreachable;
+ break :blk .pipe_equal;
+ } else .pipe),
+ '}' => try tokens.append(allocator, .brace_right),
else => {
if (std.ascii.isDigit(c)) {
try tokens.append(allocator, .{ .number = (c - '0') });
@@ -60,15 +214,53 @@ test "tokenize symbols" {
defer allocator.deinit();
var reader = std.Io.Reader.fixed(
- \\.[]
+ \\* *= { } [ ] : , $ . .. = == > >= < <= - -= != ( ) % %=
+ \\| |= + += ? ?// ; / /= // //=
);
const tokens = try tokenize(allocator.allocator(), &reader);
- try std.testing.expectEqual(4, tokens.len);
- try std.testing.expectEqual(.dot, tokens[0]);
- try std.testing.expectEqual(.bracket_left, tokens[1]);
- try std.testing.expectEqual(.bracket_right, tokens[2]);
- try std.testing.expectEqual(.end, tokens[3]);
+ const expected = [_]Token{
+ .asterisk,
+ .asterisk_equal,
+ .brace_left,
+ .brace_right,
+ .bracket_left,
+ .bracket_right,
+ .colon,
+ .comma,
+ .dollar,
+ .dot,
+ .dot_dot,
+ .equal,
+ .equal_equal,
+ .greater_than,
+ .greater_than_equal,
+ .less_than,
+ .less_than_equal,
+ .minus,
+ .minus_equal,
+ .not_equal,
+ .paren_left,
+ .paren_right,
+ .percent,
+ .percent_equal,
+ .pipe,
+ .pipe_equal,
+ .plus,
+ .plus_equal,
+ .question,
+ .question_slash_slash,
+ .semicolon,
+ .slash,
+ .slash_equal,
+ .slash_slash,
+ .slash_slash_equal,
+ .end,
+ };
+ try std.testing.expectEqual(expected.len, tokens.len);
+ for (expected, tokens) |e, t| {
+ try std.testing.expectEqual(e, t);
+ }
}
test "tokenize number" {