aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs2
-rw-r--r--src/syntax.rs27
-rw-r--r--src/syntax/ast.rs11
-rw-r--r--src/syntax/parse.rs101
4 files changed, 141 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
index b93cf3f..ecefcb8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,5 @@
+mod syntax;
+
pub fn add(left: u64, right: u64) -> u64 {
left + right
}
diff --git a/src/syntax.rs b/src/syntax.rs
new file mode 100644
index 0000000..abf7309
--- /dev/null
+++ b/src/syntax.rs
@@ -0,0 +1,27 @@
+pub mod ast;
+pub mod parse;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn parse_valid() {
+ assert!(parse::parse("").is_ok());
+ assert!(parse::parse("a").is_ok());
+ assert!(parse::parse("ab").is_ok());
+ assert!(parse::parse("abc").is_ok());
+ assert!(parse::parse("a|b").is_ok());
+ assert!(parse::parse("a|b*").is_ok());
+ assert!(parse::parse("(a|b)*").is_ok());
+ assert!(parse::parse("(((a|b)))*").is_ok());
+ assert!(parse::parse("a*b*").is_ok());
+ }
+
+ #[test]
+ fn parse_invalid() {
+ assert!(parse::parse("(").is_err());
+ assert!(parse::parse("()))").is_err());
+ assert!(parse::parse("(((())").is_err());
+ }
+}
diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs
new file mode 100644
index 0000000..bb12e81
--- /dev/null
+++ b/src/syntax/ast.rs
@@ -0,0 +1,11 @@
+pub struct Regex {
+ pub root: Box<Pattern>,
+}
+
+pub enum Pattern {
+ Empty,
+ Literal(u8),
+ Concat(Box<Pattern>, Box<Pattern>),
+ Alt(Box<Pattern>, Box<Pattern>),
+ Star(Box<Pattern>),
+}
diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs
new file mode 100644
index 0000000..d7933ce
--- /dev/null
+++ b/src/syntax/parse.rs
@@ -0,0 +1,101 @@
+use crate::syntax::ast::{Pattern, Regex};
+
+// SYNTAX
+//
+// regex ::= pattern
+// pattern ::= alt-pattern
+//
+// alt-pattern ::= concat-pattern
+// | concat-pattern '|' alt-pattern
+// concat-pattern ::= star-pattern
+// | star-pattern concat-pattern
+// star-pattern ::= primary-pattern
+// | primary-pattern '*'
+// primary-pattern ::= <empty>
+// | <non-meta-character>
+// | '(' pattern ')'
+pub fn parse(s: &str) -> Result<Regex, String> {
+ let mut parser = Parser::new(s.as_bytes());
+ parser.parse_regex()
+}
+
+struct Parser<'input> {
+ str: &'input [u8],
+ pos: usize,
+}
+
+impl<'input> Parser<'input> {
+ fn new(s: &'input [u8]) -> Self {
+ Self { str: s, pos: 0 }
+ }
+
+ fn parse_regex(&mut self) -> Result<Regex, String> {
+ let p = self.parse_pattern()?;
+ if self.pos == self.str.len() {
+ Ok(Regex { root: p })
+ } else {
+ Err(format!("unconsumed input: {}", self.pos))
+ }
+ }
+
+ fn parse_pattern(&mut self) -> Result<Box<Pattern>, String> {
+ self.parse_alt_pattern()
+ }
+
+ fn parse_alt_pattern(&mut self) -> Result<Box<Pattern>, String> {
+ let mut p1 = self.parse_concat_pattern()?;
+ loop {
+ if matches!(self.str.get(self.pos), Some(b'|')) {
+ self.pos += 1;
+ let p2 = self.parse_concat_pattern()?;
+ p1 = Box::new(Pattern::Alt(p1, p2));
+ } else {
+ return Ok(p1);
+ }
+ }
+ }
+
+ fn parse_concat_pattern(&mut self) -> Result<Box<Pattern>, String> {
+ let mut p1 = self.parse_star_pattern()?;
+ loop {
+ match self.str.get(self.pos) {
+ None | Some(b'|') | Some(b')') => return Ok(p1),
+ _ => {
+ let p2 = self.parse_star_pattern()?;
+ p1 = Box::new(Pattern::Concat(p1, p2));
+ }
+ }
+ }
+ }
+
+ fn parse_star_pattern(&mut self) -> Result<Box<Pattern>, String> {
+ let pat = self.parse_primary_pattern()?;
+ if matches!(self.str.get(self.pos), Some(b'*')) {
+ self.pos += 1;
+ Ok(Box::new(Pattern::Star(pat)))
+ } else {
+ Ok(pat)
+ }
+ }
+
+ fn parse_primary_pattern(&mut self) -> Result<Box<Pattern>, String> {
+ match self.str.get(self.pos) {
+ Some(b'(') => {
+ self.pos += 1;
+ let pat = self.parse_pattern()?;
+ if matches!(self.str.get(self.pos), Some(b')')) {
+ self.pos += 1;
+ Ok(pat)
+ } else {
+ Err("paren not balanced".into())
+ }
+ }
+ Some(b')') => Ok(Box::new(Pattern::Empty)),
+ Some(c) => {
+ self.pos += 1;
+ Ok(Box::new(Pattern::Literal(*c)))
+ }
+ None => Ok(Box::new(Pattern::Empty)),
+ }
+ }
+}