diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 18 | ||||
| -rw-r--r-- | src/vm.rs | 36 | ||||
| -rw-r--r-- | src/vm/compile.rs | 90 | ||||
| -rw-r--r-- | src/vm/instr.rs | 7 | ||||
| -rw-r--r-- | src/vm/vm.rs | 68 |
5 files changed, 203 insertions, 16 deletions
@@ -1,16 +1,2 @@ -mod syntax; - -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +pub mod syntax; +pub mod vm; diff --git a/src/vm.rs b/src/vm.rs new file mode 100644 index 0000000..e780e96 --- /dev/null +++ b/src/vm.rs @@ -0,0 +1,36 @@ +use crate::syntax::ast::Regex; + +pub mod compile; +pub mod instr; +pub mod vm; + +pub fn is_match(re: Regex, s: &str) -> bool { + let instrs = compile::compile(&re); + vm::run(&instrs, s.as_bytes()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn re(p: &str) -> Regex { + use crate::syntax::parse::parse; + parse(p).unwrap() + } + + #[test] + fn matches() { + assert!(is_match(re("a"), "a")); + assert!(is_match(re("a*"), "")); + assert!(is_match(re("a*"), "a")); + assert!(is_match(re("a|b*"), "a")); + assert!(is_match(re("a|b*"), "bb")); + assert!(is_match(re("(a|b)*"), "abababaa")); + assert!(is_match(re("a*b*"), "abb")); + } + + #[test] + fn does_not_match() { + assert!(!is_match(re("a"), "b")); + } +} diff --git a/src/vm/compile.rs b/src/vm/compile.rs new file mode 100644 index 0000000..a14143a --- /dev/null +++ b/src/vm/compile.rs @@ -0,0 +1,90 @@ +use crate::syntax::ast::{Pattern, Regex}; +use crate::vm::instr::Instr; + +pub fn compile(re: &Regex) -> Vec<Instr> { + let compiler = Compiler::default(); + compiler.compile(&re.root) +} + +#[derive(Default)] +struct Compiler { + instrs: Vec<Instr>, + labels: Vec<usize>, +} + +impl Compiler { + fn compile(mut self, pat: &Pattern) -> Vec<Instr> { + self.compile_pattern(pat); + self.resolve_labels(); + self.instrs.push(Instr::Match); + self.instrs + } + + fn compile_pattern(&mut self, pat: &Pattern) { + match pat { + Pattern::Empty => todo!(), + Pattern::Literal(c) => self.instrs.push(Instr::Char(*c)), + Pattern::Concat(a, b) => { + self.compile_pattern(&a); + self.compile_pattern(&b); + } + Pattern::Alt(a, b) => { + // split l1 l2 + // l1: compile(a) + // jmp l3 + // l2: compile(b) + // l3: + let label_1_index = self.create_new_label(); + let label_2_index = self.create_new_label(); + let label_3_index = self.create_new_label(); + self.instrs.push(Instr::Split(label_1_index, label_2_index)); + self.fix_label_address(label_1_index); + self.compile_pattern(&a); + self.instrs.push(Instr::Jmp(label_3_index)); + self.fix_label_address(label_2_index); + self.compile_pattern(&b); + self.fix_label_address(label_3_index); + } + Pattern::Star(a) => { + // l1: split l2 l3 + // l2: compile(a) + // jmp l1 + // l3: + let label_1_index = self.create_new_label(); + let label_2_index = self.create_new_label(); + let label_3_index = self.create_new_label(); + self.fix_label_address(label_1_index); + self.instrs.push(Instr::Split(label_2_index, label_3_index)); + self.fix_label_address(label_2_index); + self.compile_pattern(&a); + self.instrs.push(Instr::Jmp(label_1_index)); + self.fix_label_address(label_3_index); + } + } + } + + fn resolve_labels(&mut self) { + for instr in self.instrs.iter_mut() { + match instr { + &mut Instr::Jmp(ref mut a) => { + *a = self.labels[*a]; + } + &mut Instr::Split(ref mut a, ref mut b) => { + *a = self.labels[*a]; + *b = self.labels[*b]; + } + _ => (), + } + } + } + + fn create_new_label(&mut self) -> usize { + self.labels.push(0); + self.labels.len() - 1 + } + + fn fix_label_address(&mut self, label_index: usize) { + let address = self.instrs.len(); + self.labels[label_index] = address; + } +} diff --git a/src/vm/instr.rs b/src/vm/instr.rs new file mode 100644 index 0000000..b03cd11 --- /dev/null +++ b/src/vm/instr.rs @@ -0,0 +1,7 @@ +#[derive(Debug)] +pub enum Instr { + Char(u8), + Match, + Jmp(usize), + Split(usize, usize), +} diff --git a/src/vm/vm.rs b/src/vm/vm.rs new file mode 100644 index 0000000..367bbaf --- /dev/null +++ b/src/vm/vm.rs @@ -0,0 +1,68 @@ +use crate::vm::instr::Instr; + +pub fn run(instrs: &[Instr], s: &[u8]) -> bool { + let mut vm = Vm::new(instrs, s); + vm.run() +} + +struct Thread { + pc: usize, // program counter + sp: usize, // string pointer +} + +impl Thread { + fn new(pc: usize, sp: usize) -> Self { + Self { pc, sp } + } +} + +struct Vm<'instr, 'input> { + instrs: &'instr [Instr], + string: &'input [u8], + thread_stack: Vec<Thread>, +} + +impl<'instr, 'input> Vm<'instr, 'input> { + fn new(instrs: &'instr [Instr], string: &'input [u8]) -> Self { + let initial_thread = Thread::new(0, 0); + Self { + instrs, + string, + thread_stack: vec![initial_thread], + } + } + + fn run(&mut self) -> bool { + while let Some(t) = self.thread_stack.pop() { + let mut pc = t.pc; + let mut sp = t.sp; + loop { + match self.instrs[pc] { + Instr::Char(expected) => { + if let Some(c) = self.string.get(sp) { + if *c == expected { + pc += 1; + sp += 1; + } else { + break; + } + } else { + break; + } + } + Instr::Match => { + return true; + } + Instr::Jmp(a) => { + pc = a; + } + Instr::Split(a, b) => { + self.thread_stack.push(Thread::new(b, sp)); + pc = a; + } + } + } + } + false + } +} |
