diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/jq/parse.zig | 559 |
1 files changed, 287 insertions, 272 deletions
diff --git a/src/jq/parse.zig b/src/jq/parse.zig index 8b4e350..afcf0e6 100644 --- a/src/jq/parse.zig +++ b/src/jq/parse.zig @@ -58,20 +58,20 @@ pub const Ast = union(AstKind) { } }; -pub const TokenStream = struct { +const TokenStream = struct { const Self = @This(); tokens: []const Token, current_position: usize, - pub fn init(tokens: []const Token) Self { + fn init(tokens: []const Token) Self { return .{ .tokens = tokens, .current_position = 0, }; } - pub fn next(self: *Self) ParseError!Token { + fn next(self: *Self) ParseError!Token { if (self.current_position >= self.tokens.len) { return error.UnexpectedEnd; } @@ -80,14 +80,14 @@ pub const TokenStream = struct { return token; } - pub fn peek(self: *Self) ParseError!Token { + fn peek(self: *Self) ParseError!Token { if (self.current_position >= self.tokens.len) { return error.UnexpectedEnd; } return self.tokens[self.current_position]; } - pub fn expect(self: *Self, expected: TokenKind) ParseError!Token { + fn expect(self: *Self, expected: TokenKind) ParseError!Token { const token = try self.next(); if (token.kind() != expected) { return error.InvalidQuery; @@ -96,316 +96,331 @@ pub const TokenStream = struct { } }; -pub fn parse(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: []const Token, constants: *std.ArrayList(jv.Value)) !*Ast { - var token_stream = TokenStream.init(tokens); - return parseQuery(allocator, parse_allocator, &token_stream, constants); -} +const Parser = struct { + const Self = @This(); -fn parseProgram(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - return parseBody(allocator, parse_allocator, tokens, constants); -} + allocator: std.mem.Allocator, + parse_allocator: std.mem.Allocator, + tokens: *TokenStream, + constants: *std.ArrayList(jv.Value), -fn parseBody(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - return parseQuery(allocator, parse_allocator, tokens, constants); -} - -fn parseQuery(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - return parseQuery2(allocator, parse_allocator, tokens, constants); -} + fn parseProgram(self: *Self) !*Ast { + return self.parseBody(); + } -fn parseQuery2(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - var lhs = try parseQuery3(allocator, parse_allocator, tokens, constants); - while (true) { - const token = tokens.peek() catch break; - if (token.kind() == .pipe) { - _ = try tokens.next(); - const rhs = try parseQuery3(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .pipe = .{ - .lhs = lhs, - .rhs = rhs, - } }; - lhs = ast; - } else { - break; - } + fn parseBody(self: *Self) !*Ast { + return self.parseQuery(); } - _ = try tokens.expect(.end); - return lhs; -} -fn parseQuery3(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - var lhs = try parseExpr(allocator, parse_allocator, tokens, constants); - while (true) { - const token = tokens.peek() catch return lhs; - if (token.kind() == .comma) { - _ = try tokens.next(); - const rhs = try parseExpr(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .comma = .{ - .lhs = lhs, - .rhs = rhs, - } }; - lhs = ast; - } else { - break; - } + fn parseQuery(self: *Self) !*Ast { + return self.parseQuery2(); } - return lhs; -} -fn parseExpr(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - var lhs = try parseExpr2(allocator, parse_allocator, tokens, constants); - while (true) { - const token = try tokens.peek(); - if (token.kind() == .slash_slash) { - _ = try tokens.next(); - const rhs = try parseExpr2(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .binary_expr = .{ - .op = .alt, - .lhs = lhs, - .rhs = rhs, - } }; - lhs = ast; - } else { - break; + fn parseQuery2(self: *Self) !*Ast { + var lhs = try self.parseQuery3(); + while (true) { + const token = self.tokens.peek() catch break; + if (token.kind() == .pipe) { + _ = try self.tokens.next(); + const rhs = try self.parseQuery3(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .pipe = .{ + .lhs = lhs, + .rhs = rhs, + } }; + lhs = ast; + } else { + break; + } } + _ = try self.tokens.expect(.end); + return lhs; } - return lhs; -} - -fn parseExpr2(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - const lhs = try parseExpr3(allocator, parse_allocator, tokens, constants); - const token = tokens.peek() catch return lhs; - const op: BinaryOp = switch (token.kind()) { - .equal => .assign, - .pipe_equal => .update, - .slash_slash_equal => .alt_assign, - .plus_equal => .add_assign, - .minus_equal => .sub_assign, - .asterisk_equal => .mul_assign, - .slash_equal => .div_assign, - .percent_equal => .mod_assign, - else => return lhs, - }; - _ = try tokens.next(); - const rhs = try parseExpr3(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .binary_expr = .{ - .op = op, - .lhs = lhs, - .rhs = rhs, - } }; - return ast; -} -fn parseExpr3(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - const lhs = try parseExpr4(allocator, parse_allocator, tokens, constants); - const token = tokens.peek() catch return lhs; - if (token.kind() != .keyword_or) { + fn parseQuery3(self: *Self) !*Ast { + var lhs = try self.parseExpr(); + while (true) { + const token = self.tokens.peek() catch return lhs; + if (token.kind() == .comma) { + _ = try self.tokens.next(); + const rhs = try self.parseExpr(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .comma = .{ + .lhs = lhs, + .rhs = rhs, + } }; + lhs = ast; + } else { + break; + } + } return lhs; } - _ = try tokens.next(); - const rhs = try parseExpr4(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .binary_expr = .{ - .op = .@"or", - .lhs = lhs, - .rhs = rhs, - } }; - return ast; -} -fn parseExpr4(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - const lhs = try parseExpr5(allocator, parse_allocator, tokens, constants); - const token = tokens.peek() catch return lhs; - if (token.kind() != .keyword_and) { + fn parseExpr(self: *Self) !*Ast { + var lhs = try self.parseExpr2(); + while (true) { + const token = try self.tokens.peek(); + if (token.kind() == .slash_slash) { + _ = try self.tokens.next(); + const rhs = try self.parseExpr2(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .binary_expr = .{ + .op = .alt, + .lhs = lhs, + .rhs = rhs, + } }; + lhs = ast; + } else { + break; + } + } return lhs; } - _ = try tokens.next(); - const rhs = try parseExpr5(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .binary_expr = .{ - .op = .@"and", - .lhs = lhs, - .rhs = rhs, - } }; - return ast; -} -fn parseExpr5(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - const lhs = try parseExpr6(allocator, parse_allocator, tokens, constants); - const token = tokens.peek() catch return lhs; - const op: BinaryOp = switch (token.kind()) { - .equal_equal => .eq, - .not_equal => .ne, - .less_than => .lt, - .greater_than => .gt, - .less_than_equal => .le, - .greater_than_equal => .ge, - else => return lhs, - }; - _ = try tokens.next(); - const rhs = try parseExpr6(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .binary_expr = .{ - .op = op, - .lhs = lhs, - .rhs = rhs, - } }; - return ast; -} - -fn parseExpr6(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - var lhs = try parseExpr7(allocator, parse_allocator, tokens, constants); - while (true) { - const token = tokens.peek() catch return lhs; + fn parseExpr2(self: *Self) !*Ast { + const lhs = try self.parseExpr3(); + const token = self.tokens.peek() catch return lhs; const op: BinaryOp = switch (token.kind()) { - .plus => .add, - .minus => .sub, + .equal => .assign, + .pipe_equal => .update, + .slash_slash_equal => .alt_assign, + .plus_equal => .add_assign, + .minus_equal => .sub_assign, + .asterisk_equal => .mul_assign, + .slash_equal => .div_assign, + .percent_equal => .mod_assign, else => return lhs, }; - _ = try tokens.next(); - const rhs = try parseExpr7(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); + _ = try self.tokens.next(); + const rhs = try self.parseExpr3(); + const ast = try self.parse_allocator.create(Ast); ast.* = .{ .binary_expr = .{ .op = op, .lhs = lhs, .rhs = rhs, } }; - lhs = ast; + return ast; } -} -fn parseExpr7(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - var lhs = try parseTerm(allocator, parse_allocator, tokens, constants); - while (true) { - const token = tokens.peek() catch return lhs; + fn parseExpr3(self: *Self) !*Ast { + const lhs = try self.parseExpr4(); + const token = self.tokens.peek() catch return lhs; + if (token.kind() != .keyword_or) { + return lhs; + } + _ = try self.tokens.next(); + const rhs = try self.parseExpr4(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .binary_expr = .{ + .op = .@"or", + .lhs = lhs, + .rhs = rhs, + } }; + return ast; + } + + fn parseExpr4(self: *Self) !*Ast { + const lhs = try self.parseExpr5(); + const token = self.tokens.peek() catch return lhs; + if (token.kind() != .keyword_and) { + return lhs; + } + _ = try self.tokens.next(); + const rhs = try self.parseExpr5(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .binary_expr = .{ + .op = .@"and", + .lhs = lhs, + .rhs = rhs, + } }; + return ast; + } + + fn parseExpr5(self: *Self) !*Ast { + const lhs = try self.parseExpr6(); + const token = self.tokens.peek() catch return lhs; const op: BinaryOp = switch (token.kind()) { - .asterisk => .mul, - .slash => .div, - .percent => .mod, + .equal_equal => .eq, + .not_equal => .ne, + .less_than => .lt, + .greater_than => .gt, + .less_than_equal => .le, + .greater_than_equal => .ge, else => return lhs, }; - _ = try tokens.next(); - const rhs = try parseTerm(allocator, parse_allocator, tokens, constants); - const ast = try parse_allocator.create(Ast); + _ = try self.tokens.next(); + const rhs = try self.parseExpr6(); + const ast = try self.parse_allocator.create(Ast); ast.* = .{ .binary_expr = .{ .op = op, .lhs = lhs, .rhs = rhs, } }; - lhs = ast; + return ast; + } + + fn parseExpr6(self: *Self) !*Ast { + var lhs = try self.parseExpr7(); + while (true) { + const token = self.tokens.peek() catch return lhs; + const op: BinaryOp = switch (token.kind()) { + .plus => .add, + .minus => .sub, + else => return lhs, + }; + _ = try self.tokens.next(); + const rhs = try self.parseExpr7(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .binary_expr = .{ + .op = op, + .lhs = lhs, + .rhs = rhs, + } }; + lhs = ast; + } } -} -fn parseTerm(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - var result = try parsePrimary(allocator, parse_allocator, tokens, constants); - while (true) { - const token = tokens.peek() catch return result; - if (token.kind() == .bracket_left) { - result = try parseSuffix(allocator, parse_allocator, tokens, constants, result); - } else { - break; + fn parseExpr7(self: *Self) !*Ast { + var lhs = try self.parseTerm(); + while (true) { + const token = self.tokens.peek() catch return lhs; + const op: BinaryOp = switch (token.kind()) { + .asterisk => .mul, + .slash => .div, + .percent => .mod, + else => return lhs, + }; + _ = try self.tokens.next(); + const rhs = try self.parseTerm(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .binary_expr = .{ + .op = op, + .lhs = lhs, + .rhs = rhs, + } }; + lhs = ast; } } - return result; -} -fn parsePrimary(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value)) !*Ast { - const first_token = try tokens.peek(); - switch (first_token) { - .keyword_null => { - _ = try tokens.next(); - try constants.append(allocator, .null); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const null_node = try parse_allocator.create(Ast); - null_node.* = .{ .literal = idx }; - return null_node; - }, - .keyword_true => { - _ = try tokens.next(); - try constants.append(allocator, .{ .bool = true }); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const true_node = try parse_allocator.create(Ast); - true_node.* = .{ .literal = idx }; - return true_node; - }, - .keyword_false => { - _ = try tokens.next(); - try constants.append(allocator, .{ .bool = false }); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const false_node = try parse_allocator.create(Ast); - false_node.* = .{ .literal = idx }; - return false_node; - }, - .number => |f| { - _ = try tokens.next(); - const i: i64 = @intFromFloat(f); - if (@as(f64, @floatFromInt(i)) == f) { - try constants.append(allocator, .{ .integer = i }); + fn parseTerm(self: *Self) !*Ast { + var result = try self.parsePrimary(); + while (true) { + const token = self.tokens.peek() catch return result; + if (token.kind() == .bracket_left) { + result = try self.parseSuffix(result); } else { - try constants.append(allocator, .{ .float = f }); + break; } - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const number_node = try parse_allocator.create(Ast); - number_node.* = .{ .literal = idx }; - return number_node; - }, - .string => |s| { - _ = try tokens.next(); - try constants.append(allocator, .{ .string = try allocator.dupe(u8, s) }); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const string_node = try parse_allocator.create(Ast); - string_node.* = .{ .literal = idx }; - return string_node; - }, - .dot => { - _ = try tokens.next(); - const ast = try parse_allocator.create(Ast); - ast.* = .identity; - return ast; - }, - .bracket_left => { - _ = try tokens.next(); - _ = try tokens.expect(.bracket_right); - try constants.append(allocator, .{ .array = jv.Array.init(allocator) }); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const array_node = try parse_allocator.create(Ast); - array_node.* = .{ .literal = idx }; - return array_node; - }, - .brace_left => { - _ = try tokens.next(); - _ = try tokens.expect(.brace_right); - try constants.append(allocator, .{ .object = jv.Object.init(allocator) }); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const object_node = try parse_allocator.create(Ast); - object_node.* = .{ .literal = idx }; - return object_node; - }, - .field => |name| { - _ = try tokens.next(); - const ast = try parse_allocator.create(Ast); - ast.* = .{ .object_key = try allocator.dupe(u8, name) }; - return ast; - }, - else => return error.InvalidQuery, + } + return result; } -} -fn parseSuffix(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: *TokenStream, constants: *std.ArrayList(jv.Value), base: *Ast) !*Ast { - _ = try tokens.expect(.bracket_left); - const index_token = try tokens.expect(.number); - _ = try tokens.expect(.bracket_right); + fn parsePrimary(self: *Self) !*Ast { + const first_token = try self.tokens.peek(); + switch (first_token) { + .keyword_null => { + _ = try self.tokens.next(); + try self.constants.append(self.allocator, .null); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const null_node = try self.parse_allocator.create(Ast); + null_node.* = .{ .literal = idx }; + return null_node; + }, + .keyword_true => { + _ = try self.tokens.next(); + try self.constants.append(self.allocator, .{ .bool = true }); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const true_node = try self.parse_allocator.create(Ast); + true_node.* = .{ .literal = idx }; + return true_node; + }, + .keyword_false => { + _ = try self.tokens.next(); + try self.constants.append(self.allocator, .{ .bool = false }); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const false_node = try self.parse_allocator.create(Ast); + false_node.* = .{ .literal = idx }; + return false_node; + }, + .number => |f| { + _ = try self.tokens.next(); + const i: i64 = @intFromFloat(f); + if (@as(f64, @floatFromInt(i)) == f) { + try self.constants.append(self.allocator, .{ .integer = i }); + } else { + try self.constants.append(self.allocator, .{ .float = f }); + } + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const number_node = try self.parse_allocator.create(Ast); + number_node.* = .{ .literal = idx }; + return number_node; + }, + .string => |s| { + _ = try self.tokens.next(); + try self.constants.append(self.allocator, .{ .string = try self.allocator.dupe(u8, s) }); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const string_node = try self.parse_allocator.create(Ast); + string_node.* = .{ .literal = idx }; + return string_node; + }, + .dot => { + _ = try self.tokens.next(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .identity; + return ast; + }, + .bracket_left => { + _ = try self.tokens.next(); + _ = try self.tokens.expect(.bracket_right); + try self.constants.append(self.allocator, .{ .array = jv.Array.init(self.allocator) }); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const array_node = try self.parse_allocator.create(Ast); + array_node.* = .{ .literal = idx }; + return array_node; + }, + .brace_left => { + _ = try self.tokens.next(); + _ = try self.tokens.expect(.brace_right); + try self.constants.append(self.allocator, .{ .object = jv.Object.init(self.allocator) }); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const object_node = try self.parse_allocator.create(Ast); + object_node.* = .{ .literal = idx }; + return object_node; + }, + .field => |name| { + _ = try self.tokens.next(); + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .object_key = try self.allocator.dupe(u8, name) }; + return ast; + }, + else => return error.InvalidQuery, + } + } + + fn parseSuffix(self: *Self, base: *Ast) !*Ast { + _ = try self.tokens.expect(.bracket_left); + const index_token = try self.tokens.expect(.number); + _ = try self.tokens.expect(.bracket_right); + + try self.constants.append(self.allocator, .{ .integer = @intFromFloat(index_token.number) }); + const idx: ConstIndex = @enumFromInt(self.constants.items.len - 1); + const index_node = try self.parse_allocator.create(Ast); + index_node.* = .{ .literal = idx }; - try constants.append(allocator, .{ .integer = @intFromFloat(index_token.number) }); - const idx: ConstIndex = @enumFromInt(constants.items.len - 1); - const index_node = try parse_allocator.create(Ast); - index_node.* = .{ .literal = idx }; + const ast = try self.parse_allocator.create(Ast); + ast.* = .{ .array_index = .{ .base = base, .index = index_node } }; + return ast; + } +}; - const ast = try parse_allocator.create(Ast); - ast.* = .{ .array_index = .{ .base = base, .index = index_node } }; - return ast; +pub fn parse(allocator: std.mem.Allocator, parse_allocator: std.mem.Allocator, tokens: []const Token, constants: *std.ArrayList(jv.Value)) !*Ast { + var token_stream = TokenStream.init(tokens); + var parser = Parser{ + .allocator = allocator, + .parse_allocator = parse_allocator, + .tokens = &token_stream, + .constants = constants, + }; + return parser.parseQuery(); } |
