aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/syntax/parse.rs
blob: d7933ce5e5f6feaae49b44c9ce32938ea3d68e9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
use crate::syntax::ast::{Pattern, Regex};

// SYNTAX
//
// regex ::= pattern
// pattern ::= alt-pattern
//
// alt-pattern ::= concat-pattern
//               | concat-pattern '|' alt-pattern
// concat-pattern ::= star-pattern
//                  | star-pattern concat-pattern
// star-pattern ::= primary-pattern
//                | primary-pattern '*'
// primary-pattern ::= <empty>
//                   | <non-meta-character>
//                   | '(' pattern ')'
pub fn parse(s: &str) -> Result<Regex, String> {
    let mut parser = Parser::new(s.as_bytes());
    parser.parse_regex()
}

struct Parser<'input> {
    str: &'input [u8],
    pos: usize,
}

impl<'input> Parser<'input> {
    fn new(s: &'input [u8]) -> Self {
        Self { str: s, pos: 0 }
    }

    fn parse_regex(&mut self) -> Result<Regex, String> {
        let p = self.parse_pattern()?;
        if self.pos == self.str.len() {
            Ok(Regex { root: p })
        } else {
            Err(format!("unconsumed input: {}", self.pos))
        }
    }

    fn parse_pattern(&mut self) -> Result<Box<Pattern>, String> {
        self.parse_alt_pattern()
    }

    fn parse_alt_pattern(&mut self) -> Result<Box<Pattern>, String> {
        let mut p1 = self.parse_concat_pattern()?;
        loop {
            if matches!(self.str.get(self.pos), Some(b'|')) {
                self.pos += 1;
                let p2 = self.parse_concat_pattern()?;
                p1 = Box::new(Pattern::Alt(p1, p2));
            } else {
                return Ok(p1);
            }
        }
    }

    fn parse_concat_pattern(&mut self) -> Result<Box<Pattern>, String> {
        let mut p1 = self.parse_star_pattern()?;
        loop {
            match self.str.get(self.pos) {
                None | Some(b'|') | Some(b')') => return Ok(p1),
                _ => {
                    let p2 = self.parse_star_pattern()?;
                    p1 = Box::new(Pattern::Concat(p1, p2));
                }
            }
        }
    }

    fn parse_star_pattern(&mut self) -> Result<Box<Pattern>, String> {
        let pat = self.parse_primary_pattern()?;
        if matches!(self.str.get(self.pos), Some(b'*')) {
            self.pos += 1;
            Ok(Box::new(Pattern::Star(pat)))
        } else {
            Ok(pat)
        }
    }

    fn parse_primary_pattern(&mut self) -> Result<Box<Pattern>, String> {
        match self.str.get(self.pos) {
            Some(b'(') => {
                self.pos += 1;
                let pat = self.parse_pattern()?;
                if matches!(self.str.get(self.pos), Some(b')')) {
                    self.pos += 1;
                    Ok(pat)
                } else {
                    Err("paren not balanced".into())
                }
            }
            Some(b')') => Ok(Box::new(Pattern::Empty)),
            Some(c) => {
                self.pos += 1;
                Ok(Box::new(Pattern::Literal(*c)))
            }
            None => Ok(Box::new(Pattern::Empty)),
        }
    }
}