diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/jq/tokenize.zig | 210 |
1 files changed, 201 insertions, 9 deletions
diff --git a/src/jq/tokenize.zig b/src/jq/tokenize.zig index 39cae29..a178b50 100644 --- a/src/jq/tokenize.zig +++ b/src/jq/tokenize.zig @@ -7,17 +7,85 @@ pub const TokenizeError = error{ pub const TokenKind = enum { end, - dot, + + asterisk, + asterisk_equal, + brace_left, + brace_right, bracket_left, bracket_right, + colon, + comma, + dollar, + dot, + dot_dot, + equal, + equal_equal, + greater_than, + greater_than_equal, + less_than, + less_than_equal, + minus, + minus_equal, + not_equal, + paren_left, + paren_right, + percent, + percent_equal, + pipe, + pipe_equal, + plus, + plus_equal, + question, + question_slash_slash, + semicolon, + slash, + slash_equal, + slash_slash, + slash_slash_equal, + number, }; pub const Token = union(TokenKind) { end, - dot, + + asterisk, + asterisk_equal, + brace_left, + brace_right, bracket_left, bracket_right, + colon, + comma, + dollar, + dot, + dot_dot, + equal, + equal_equal, + greater_than, + greater_than_equal, + less_than, + less_than_equal, + minus, + minus_equal, + not_equal, + paren_left, + paren_right, + percent, + percent_equal, + pipe, + pipe_equal, + plus, + plus_equal, + question, + question_slash_slash, + semicolon, + slash, + slash_equal, + slash_slash, + slash_slash_equal, + number: i64, pub fn kind(self: @This()) TokenKind { @@ -25,6 +93,13 @@ pub const Token = union(TokenKind) { } }; +fn peekByte(reader: *std.Io.Reader) error{ReadFailed}!?u8 { + return reader.peekByte() catch |err| switch (err) { + error.EndOfStream => null, + error.ReadFailed => error.ReadFailed, + }; +} + pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token { var tokens = try std.array_list.Aligned(Token, null).initCapacity(allocator, 16); @@ -34,9 +109,88 @@ pub fn tokenize(allocator: std.mem.Allocator, reader: *std.Io.Reader) ![]Token { error.ReadFailed => return error.ReadFailed, }; switch (c) { - '.' => try tokens.append(allocator, .dot), + ' ', '\t', '\n', '\r' => continue, + '$' => try tokens.append(allocator, .dollar), + '%' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .percent_equal; + } else .percent), + '(' => try tokens.append(allocator, .paren_left), + ')' => try tokens.append(allocator, .paren_right), + '*' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .asterisk_equal; + } else .asterisk), + '+' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .plus_equal; + } else .plus), + ',' => try tokens.append(allocator, .comma), + '-' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .minus_equal; + } else .minus), + '.' => try tokens.append(allocator, if (try peekByte(reader) == '.') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .dot_dot; + } else .dot), + '/' => { + if (try peekByte(reader) == '/') { + _ = reader.takeByte() catch unreachable; + try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .slash_slash_equal; + } else .slash_slash); + } else if (try peekByte(reader) == '=') { + _ = reader.takeByte() catch unreachable; + try tokens.append(allocator, .slash_equal); + } else { + try tokens.append(allocator, .slash); + } + }, + ':' => try tokens.append(allocator, .colon), + ';' => try tokens.append(allocator, .semicolon), + '<' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .less_than_equal; + } else .less_than), + '=' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .equal_equal; + } else .equal), + '>' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .greater_than_equal; + } else .greater_than), + '!' => { + if (try peekByte(reader) == '=') { + _ = reader.takeByte() catch unreachable; + try tokens.append(allocator, .not_equal); + } else { + return error.InvalidCharacter; + } + }, + '?' => { + if (try peekByte(reader) == '/') { + _ = reader.takeByte() catch unreachable; + if (try peekByte(reader) == '/') { + _ = reader.takeByte() catch unreachable; + try tokens.append(allocator, .question_slash_slash); + } else { + return error.InvalidCharacter; + } + } else { + try tokens.append(allocator, .question); + } + }, '[' => try tokens.append(allocator, .bracket_left), ']' => try tokens.append(allocator, .bracket_right), + '{' => try tokens.append(allocator, .brace_left), + '|' => try tokens.append(allocator, if (try peekByte(reader) == '=') blk: { + _ = reader.takeByte() catch unreachable; + break :blk .pipe_equal; + } else .pipe), + '}' => try tokens.append(allocator, .brace_right), else => { if (std.ascii.isDigit(c)) { try tokens.append(allocator, .{ .number = (c - '0') }); @@ -60,15 +214,53 @@ test "tokenize symbols" { defer allocator.deinit(); var reader = std.Io.Reader.fixed( - \\.[] + \\* *= { } [ ] : , $ . .. = == > >= < <= - -= != ( ) % %= + \\| |= + += ? ?// ; / /= // //= ); const tokens = try tokenize(allocator.allocator(), &reader); - try std.testing.expectEqual(4, tokens.len); - try std.testing.expectEqual(.dot, tokens[0]); - try std.testing.expectEqual(.bracket_left, tokens[1]); - try std.testing.expectEqual(.bracket_right, tokens[2]); - try std.testing.expectEqual(.end, tokens[3]); + const expected = [_]Token{ + .asterisk, + .asterisk_equal, + .brace_left, + .brace_right, + .bracket_left, + .bracket_right, + .colon, + .comma, + .dollar, + .dot, + .dot_dot, + .equal, + .equal_equal, + .greater_than, + .greater_than_equal, + .less_than, + .less_than_equal, + .minus, + .minus_equal, + .not_equal, + .paren_left, + .paren_right, + .percent, + .percent_equal, + .pipe, + .pipe_equal, + .plus, + .plus_equal, + .question, + .question_slash_slash, + .semicolon, + .slash, + .slash_equal, + .slash_slash, + .slash_slash_equal, + .end, + }; + try std.testing.expectEqual(expected.len, tokens.len); + for (expected, tokens) |e, t| { + try std.testing.expectEqual(e, t); + } } test "tokenize number" { |
