aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs18
-rw-r--r--src/vm.rs36
-rw-r--r--src/vm/compile.rs90
-rw-r--r--src/vm/instr.rs7
-rw-r--r--src/vm/vm.rs68
5 files changed, 203 insertions, 16 deletions
diff --git a/src/lib.rs b/src/lib.rs
index ecefcb8..68a6bfc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,2 @@
-mod syntax;
-
-pub fn add(left: u64, right: u64) -> u64 {
- left + right
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
- }
-}
+pub mod syntax;
+pub mod vm;
diff --git a/src/vm.rs b/src/vm.rs
new file mode 100644
index 0000000..e780e96
--- /dev/null
+++ b/src/vm.rs
@@ -0,0 +1,36 @@
+use crate::syntax::ast::Regex;
+
+pub mod compile;
+pub mod instr;
+pub mod vm;
+
+pub fn is_match(re: Regex, s: &str) -> bool {
+ let instrs = compile::compile(&re);
+ vm::run(&instrs, s.as_bytes())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn re(p: &str) -> Regex {
+ use crate::syntax::parse::parse;
+ parse(p).unwrap()
+ }
+
+ #[test]
+ fn matches() {
+ assert!(is_match(re("a"), "a"));
+ assert!(is_match(re("a*"), ""));
+ assert!(is_match(re("a*"), "a"));
+ assert!(is_match(re("a|b*"), "a"));
+ assert!(is_match(re("a|b*"), "bb"));
+ assert!(is_match(re("(a|b)*"), "abababaa"));
+ assert!(is_match(re("a*b*"), "abb"));
+ }
+
+ #[test]
+ fn does_not_match() {
+ assert!(!is_match(re("a"), "b"));
+ }
+}
diff --git a/src/vm/compile.rs b/src/vm/compile.rs
new file mode 100644
index 0000000..a14143a
--- /dev/null
+++ b/src/vm/compile.rs
@@ -0,0 +1,90 @@
+use crate::syntax::ast::{Pattern, Regex};
+use crate::vm::instr::Instr;
+
+pub fn compile(re: &Regex) -> Vec<Instr> {
+ let compiler = Compiler::default();
+ compiler.compile(&re.root)
+}
+
+#[derive(Default)]
+struct Compiler {
+ instrs: Vec<Instr>,
+ labels: Vec<usize>,
+}
+
+impl Compiler {
+ fn compile(mut self, pat: &Pattern) -> Vec<Instr> {
+ self.compile_pattern(pat);
+ self.resolve_labels();
+ self.instrs.push(Instr::Match);
+ self.instrs
+ }
+
+ fn compile_pattern(&mut self, pat: &Pattern) {
+ match pat {
+ Pattern::Empty => todo!(),
+ Pattern::Literal(c) => self.instrs.push(Instr::Char(*c)),
+ Pattern::Concat(a, b) => {
+ self.compile_pattern(&a);
+ self.compile_pattern(&b);
+ }
+ Pattern::Alt(a, b) => {
+ // split l1 l2
+ // l1: compile(a)
+ // jmp l3
+ // l2: compile(b)
+ // l3:
+ let label_1_index = self.create_new_label();
+ let label_2_index = self.create_new_label();
+ let label_3_index = self.create_new_label();
+ self.instrs.push(Instr::Split(label_1_index, label_2_index));
+ self.fix_label_address(label_1_index);
+ self.compile_pattern(&a);
+ self.instrs.push(Instr::Jmp(label_3_index));
+ self.fix_label_address(label_2_index);
+ self.compile_pattern(&b);
+ self.fix_label_address(label_3_index);
+ }
+ Pattern::Star(a) => {
+ // l1: split l2 l3
+ // l2: compile(a)
+ // jmp l1
+ // l3:
+ let label_1_index = self.create_new_label();
+ let label_2_index = self.create_new_label();
+ let label_3_index = self.create_new_label();
+ self.fix_label_address(label_1_index);
+ self.instrs.push(Instr::Split(label_2_index, label_3_index));
+ self.fix_label_address(label_2_index);
+ self.compile_pattern(&a);
+ self.instrs.push(Instr::Jmp(label_1_index));
+ self.fix_label_address(label_3_index);
+ }
+ }
+ }
+
+ fn resolve_labels(&mut self) {
+ for instr in self.instrs.iter_mut() {
+ match instr {
+ &mut Instr::Jmp(ref mut a) => {
+ *a = self.labels[*a];
+ }
+ &mut Instr::Split(ref mut a, ref mut b) => {
+ *a = self.labels[*a];
+ *b = self.labels[*b];
+ }
+ _ => (),
+ }
+ }
+ }
+
+ fn create_new_label(&mut self) -> usize {
+ self.labels.push(0);
+ self.labels.len() - 1
+ }
+
+ fn fix_label_address(&mut self, label_index: usize) {
+ let address = self.instrs.len();
+ self.labels[label_index] = address;
+ }
+}
diff --git a/src/vm/instr.rs b/src/vm/instr.rs
new file mode 100644
index 0000000..b03cd11
--- /dev/null
+++ b/src/vm/instr.rs
@@ -0,0 +1,7 @@
+#[derive(Debug)]
+pub enum Instr {
+ Char(u8),
+ Match,
+ Jmp(usize),
+ Split(usize, usize),
+}
diff --git a/src/vm/vm.rs b/src/vm/vm.rs
new file mode 100644
index 0000000..367bbaf
--- /dev/null
+++ b/src/vm/vm.rs
@@ -0,0 +1,68 @@
+use crate::vm::instr::Instr;
+
+pub fn run(instrs: &[Instr], s: &[u8]) -> bool {
+ let mut vm = Vm::new(instrs, s);
+ vm.run()
+}
+
+struct Thread {
+ pc: usize, // program counter
+ sp: usize, // string pointer
+}
+
+impl Thread {
+ fn new(pc: usize, sp: usize) -> Self {
+ Self { pc, sp }
+ }
+}
+
+struct Vm<'instr, 'input> {
+ instrs: &'instr [Instr],
+ string: &'input [u8],
+ thread_stack: Vec<Thread>,
+}
+
+impl<'instr, 'input> Vm<'instr, 'input> {
+ fn new(instrs: &'instr [Instr], string: &'input [u8]) -> Self {
+ let initial_thread = Thread::new(0, 0);
+ Self {
+ instrs,
+ string,
+ thread_stack: vec![initial_thread],
+ }
+ }
+
+ fn run(&mut self) -> bool {
+ while let Some(t) = self.thread_stack.pop() {
+ let mut pc = t.pc;
+ let mut sp = t.sp;
+ loop {
+ match self.instrs[pc] {
+ Instr::Char(expected) => {
+ if let Some(c) = self.string.get(sp) {
+ if *c == expected {
+ pc += 1;
+ sp += 1;
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+ Instr::Match => {
+ return true;
+ }
+ Instr::Jmp(a) => {
+ pc = a;
+ }
+ Instr::Split(a, b) => {
+ self.thread_stack.push(Thread::new(b, sp));
+ pc = a;
+ }
+ }
+ }
+ }
+ false
+ }
+}