diff options
| author | nsfisis <nsfisis@gmail.com> | 2025-07-19 15:24:30 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2025-07-19 15:41:49 +0900 |
| commit | 145efc39d14c242266c574063ffcba329c62e8a3 (patch) | |
| tree | a6f6e2c78b14ed97486041cd853d2140f1b659e2 | |
| parent | 6cbd16ed14ac36f8e117937fc2dc73a8a9ddb698 (diff) | |
| download | regulus-145efc39d14c242266c574063ffcba329c62e8a3.tar.gz regulus-145efc39d14c242266c574063ffcba329c62e8a3.tar.zst regulus-145efc39d14c242266c574063ffcba329c62e8a3.zip | |
implement parsing
| -rw-r--r-- | src/lib.rs | 2 | ||||
| -rw-r--r-- | src/syntax.rs | 27 | ||||
| -rw-r--r-- | src/syntax/ast.rs | 11 | ||||
| -rw-r--r-- | src/syntax/parse.rs | 101 |
4 files changed, 141 insertions, 0 deletions
@@ -1,3 +1,5 @@ +mod syntax; + pub fn add(left: u64, right: u64) -> u64 { left + right } diff --git a/src/syntax.rs b/src/syntax.rs new file mode 100644 index 0000000..abf7309 --- /dev/null +++ b/src/syntax.rs @@ -0,0 +1,27 @@ +pub mod ast; +pub mod parse; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_valid() { + assert!(parse::parse("").is_ok()); + assert!(parse::parse("a").is_ok()); + assert!(parse::parse("ab").is_ok()); + assert!(parse::parse("abc").is_ok()); + assert!(parse::parse("a|b").is_ok()); + assert!(parse::parse("a|b*").is_ok()); + assert!(parse::parse("(a|b)*").is_ok()); + assert!(parse::parse("(((a|b)))*").is_ok()); + assert!(parse::parse("a*b*").is_ok()); + } + + #[test] + fn parse_invalid() { + assert!(parse::parse("(").is_err()); + assert!(parse::parse("()))").is_err()); + assert!(parse::parse("(((())").is_err()); + } +} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs new file mode 100644 index 0000000..bb12e81 --- /dev/null +++ b/src/syntax/ast.rs @@ -0,0 +1,11 @@ +pub struct Regex { + pub root: Box<Pattern>, +} + +pub enum Pattern { + Empty, + Literal(u8), + Concat(Box<Pattern>, Box<Pattern>), + Alt(Box<Pattern>, Box<Pattern>), + Star(Box<Pattern>), +} diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs new file mode 100644 index 0000000..d7933ce --- /dev/null +++ b/src/syntax/parse.rs @@ -0,0 +1,101 @@ +use crate::syntax::ast::{Pattern, Regex}; + +// SYNTAX +// +// regex ::= pattern +// pattern ::= alt-pattern +// +// alt-pattern ::= concat-pattern +// | concat-pattern '|' alt-pattern +// concat-pattern ::= star-pattern +// | star-pattern concat-pattern +// star-pattern ::= primary-pattern +// | primary-pattern '*' +// primary-pattern ::= <empty> +// | <non-meta-character> +// | '(' pattern ')' +pub fn parse(s: &str) -> Result<Regex, String> { + let mut parser = Parser::new(s.as_bytes()); + parser.parse_regex() +} + +struct Parser<'input> { + str: &'input [u8], + pos: usize, +} + +impl<'input> Parser<'input> { + fn new(s: &'input [u8]) -> Self { + Self { str: s, pos: 0 } + } + + fn parse_regex(&mut self) -> Result<Regex, String> { + let p = self.parse_pattern()?; + if self.pos == self.str.len() { + Ok(Regex { root: p }) + } else { + Err(format!("unconsumed input: {}", self.pos)) + } + } + + fn parse_pattern(&mut self) -> Result<Box<Pattern>, String> { + self.parse_alt_pattern() + } + + fn parse_alt_pattern(&mut self) -> Result<Box<Pattern>, String> { + let mut p1 = self.parse_concat_pattern()?; + loop { + if matches!(self.str.get(self.pos), Some(b'|')) { + self.pos += 1; + let p2 = self.parse_concat_pattern()?; + p1 = Box::new(Pattern::Alt(p1, p2)); + } else { + return Ok(p1); + } + } + } + + fn parse_concat_pattern(&mut self) -> Result<Box<Pattern>, String> { + let mut p1 = self.parse_star_pattern()?; + loop { + match self.str.get(self.pos) { + None | Some(b'|') | Some(b')') => return Ok(p1), + _ => { + let p2 = self.parse_star_pattern()?; + p1 = Box::new(Pattern::Concat(p1, p2)); + } + } + } + } + + fn parse_star_pattern(&mut self) -> Result<Box<Pattern>, String> { + let pat = self.parse_primary_pattern()?; + if matches!(self.str.get(self.pos), Some(b'*')) { + self.pos += 1; + Ok(Box::new(Pattern::Star(pat))) + } else { + Ok(pat) + } + } + + fn parse_primary_pattern(&mut self) -> Result<Box<Pattern>, String> { + match self.str.get(self.pos) { + Some(b'(') => { + self.pos += 1; + let pat = self.parse_pattern()?; + if matches!(self.str.get(self.pos), Some(b')')) { + self.pos += 1; + Ok(pat) + } else { + Err("paren not balanced".into()) + } + } + Some(b')') => Ok(Box::new(Pattern::Empty)), + Some(c) => { + self.pos += 1; + Ok(Box::new(Pattern::Literal(*c))) + } + None => Ok(Box::new(Pattern::Empty)), + } + } +} |
