diff options
| author | nsfisis <nsfisis@gmail.com> | 2024-07-15 13:58:32 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2024-07-15 13:58:32 +0900 |
| commit | a245b635c9099448a00eea15cec5bc61dcf1d026 (patch) | |
| tree | 7bf7476fa386f3746f9ad9471f2fa00bb45fa41d /src | |
| parent | 8eae1719cd68929580ea2c8e795d238a1a03d81d (diff) | |
| download | reparojson-a245b635c9099448a00eea15cec5bc61dcf1d026.tar.gz reparojson-a245b635c9099448a00eea15cec5bc61dcf1d026.tar.zst reparojson-a245b635c9099448a00eea15cec5bc61dcf1d026.zip | |
initial implementation
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 636 | ||||
| -rw-r--r-- | src/main.rs | 77 |
2 files changed, 711 insertions, 2 deletions
diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5fca21c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,636 @@ +use std::io::{Read, Write}; +use std::iter::Peekable; + +pub type RepairResult = Result<RepairOk, RepairErr>; + +pub enum RepairOk { + Valid, + Repaired, +} + +pub enum RepairErr { + Invalid(SyntaxError), + IoErr(std::io::Error), +} + +impl From<std::io::Error> for RepairErr { + fn from(value: std::io::Error) -> Self { + Self::IoErr(value) + } +} + +pub enum SyntaxError { + UnexpectedEof, + InvalidValue, +} + +impl SyntaxError { + fn to_result(self) -> ParserResult { + Err(RepairErr::Invalid(self)) + } +} + +impl std::fmt::Display for SyntaxError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::UnexpectedEof => write!(f, "unexpected end of file"), + Self::InvalidValue => write!(f, "invalid value"), + } + } +} + +pub fn repair(r: impl Read, mut w: impl Write) -> RepairResult { + let mut p = Parser::new(); + match p.walk_json(&mut r.bytes().peekable(), &mut w) { + Ok(_) => Ok(if p.repaired() { + RepairOk::Repaired + } else { + RepairOk::Valid + }), + Err(err) => Err(err), + } +} + +struct Parser { + repaired: bool, +} + +type ParserResult = Result<(), RepairErr>; + +impl Parser { + fn new() -> Self { + Self { repaired: false } + } + + fn repaired(&self) -> bool { + self.repaired + } + + fn walk_json<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + self.walk_element(input, w) + } + + fn walk_value<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let Some(c) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(c) = c else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + + match c { + b'n' => { + input.next(); // => n + match input.next() { + Some(Ok(b'u')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b'l')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b'l')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + w.write_all(b"null")?; + Ok(()) + } + b't' => { + input.next(); // => t + match input.next() { + Some(Ok(b'r')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b'u')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b'e')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + w.write_all(b"true")?; + Ok(()) + } + b'f' => { + input.next(); // => f + match input.next() { + Some(Ok(b'a')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b'l')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b's')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + match input.next() { + Some(Ok(b'e')) => (), + Some(Ok(_)) => return SyntaxError::InvalidValue.to_result(), + Some(Err(err)) => return Err(err.into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + w.write_all(b"false")?; + Ok(()) + } + b'{' => self.walk_object(input, w), + b'[' => self.walk_array(input, w), + b'"' => self.walk_string(input, w), + b'-' | b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => { + self.walk_number(input, w) + } + _ => SyntaxError::InvalidValue.to_result(), + } + } + + fn walk_object<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + w.write_all(b"{")?; + input.next(); // => { + + self.walk_ws(input, w)?; + + // members_opt + let Some(first) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(first) = first else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *first == b'"' { + self.walk_members(input, w)?; + } + + // trailing_comma_opt + let Some(maybe_comma) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(maybe_comma) = maybe_comma else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *maybe_comma == b',' { + self.repaired = true; + input.next(); + self.walk_ws(input, w)?; + } + + let Some(last) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let last = last?; + if last != b'}' { + return SyntaxError::InvalidValue.to_result(); + } + w.write_all(b"}")?; + Ok(()) + } + + fn walk_members<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + loop { + self.walk_member(input, w)?; + + let mut ws = Vec::with_capacity(1024); + self.walk_ws(input, &mut ws)?; + + let Some(next) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(next) = next else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + + match *next { + b'}' => { + w.write_all(&mut ws)?; + return Ok(()); + } + b',' => { + w.write_all(&mut ws)?; + + input.next(); + + self.walk_ws(input, &mut ws)?; + + let Some(c) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(c) = c else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + match *c { + b'}' => { + self.repaired = true; + w.write_all(&mut ws)?; + return Ok(()); + } + _ => { + w.write_all(b",")?; + w.write_all(&mut ws)?; + } + } + } + _ => { + self.repaired = true; + w.write_all(b",")?; + w.write_all(&mut ws)?; + } + } + } + } + + fn walk_member<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + self.walk_string(input, w)?; + self.walk_ws(input, w)?; + let Some(colon) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let colon = colon?; + if colon != b':' { + return SyntaxError::InvalidValue.to_result(); + } + w.write_all(b":")?; + self.walk_element(input, w) + } + + fn walk_array<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + w.write_all(b"[")?; + input.next(); // => [ + + self.walk_ws(input, w)?; + + // elements_opt + let Some(first) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(first) = first else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *first != b',' && *first != b']' { + self.walk_elements(input, w)?; + } + + // trailing_comma_opt + let Some(maybe_comma) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(maybe_comma) = maybe_comma else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *maybe_comma == b',' { + self.repaired = true; + input.next(); + self.walk_ws(input, w)?; + } + + let Some(last) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let last = last?; + if last != b']' { + return SyntaxError::InvalidValue.to_result(); + } + w.write_all(b"]")?; + Ok(()) + } + + fn walk_elements<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + loop { + self.walk_value(input, w)?; + + let mut ws = Vec::with_capacity(1024); + self.walk_ws(input, &mut ws)?; + + let Some(next) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(next) = next else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + + match *next { + b']' => { + w.write_all(&mut ws)?; + return Ok(()); + } + b',' => { + w.write_all(&mut ws)?; + + input.next(); + + self.walk_ws(input, &mut ws)?; + + let Some(c) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(c) = c else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + match *c { + b']' => { + self.repaired = true; + w.write_all(&mut ws)?; + return Ok(()); + } + _ => { + w.write_all(b",")?; + w.write_all(&mut ws)?; + } + } + } + _ => { + self.repaired = true; + w.write_all(b",")?; + w.write_all(&mut ws)?; + } + } + } + } + + fn walk_element<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + self.walk_ws(input, w)?; + self.walk_value(input, w)?; + self.walk_ws(input, w) + } + + fn walk_string<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + w.write_all(b"\"")?; + input.next(); // => " + loop { + match input.next() { + Some(Ok(b'"')) => break, + Some(Ok(b'\\')) => { + self.walk_escape(input, w)?; + } + Some(Ok(c)) => { + w.write_all(&[c])?; + } + Some(Err(_)) => return Err(input.next().unwrap().unwrap_err().into()), + None => return SyntaxError::UnexpectedEof.to_result(), + } + } + w.write_all(b"\"")?; + Ok(()) + } + + fn walk_escape<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let Some(c) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let c = c?; + match c { + b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => { + w.write_all(&[b'\\', c])?; + } + b'u' => { + let Some(u1) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let u1 = u1?; + if !u1.is_ascii_hexdigit() { + return SyntaxError::InvalidValue.to_result(); + } + let Some(u2) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let u2 = u2?; + if !u2.is_ascii_hexdigit() { + return SyntaxError::InvalidValue.to_result(); + } + let Some(u3) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let u3 = u3?; + if !u3.is_ascii_hexdigit() { + return SyntaxError::InvalidValue.to_result(); + } + let Some(u4) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let u4 = u4?; + if !u4.is_ascii_hexdigit() { + return SyntaxError::InvalidValue.to_result(); + } + w.write_all(&[b'\\', u1, u2, u3, u4])?; + } + _ => return SyntaxError::InvalidValue.to_result(), + } + Ok(()) + } + + fn walk_number<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + self.walk_integer(input, w)?; + self.walk_fraction(input, w)?; + self.walk_exponent(input, w) + } + + fn walk_integer<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let Some(first) = input.next() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let first = first?; + match first { + b'-' => { + w.write_all(b"-")?; + return self.walk_integer(input, w); + } + b'0' => { + w.write_all(b"0")?; + return Ok(()); + } + b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => { + w.write_all(&[first])?; + loop { + match input.peek() { + Some(Ok(c @ b'0')) | Some(Ok(c @ b'1')) | Some(Ok(c @ b'2')) + | Some(Ok(c @ b'3')) | Some(Ok(c @ b'4')) | Some(Ok(c @ b'5')) + | Some(Ok(c @ b'6')) | Some(Ok(c @ b'7')) | Some(Ok(c @ b'8')) + | Some(Ok(c @ b'9')) => { + w.write_all(&[*c])?; + input.next(); + } + Some(Ok(_)) => break, + Some(Err(_)) => return Err(input.next().unwrap().unwrap_err().into()), + None => return Ok(()), + } + } + } + _ => return SyntaxError::InvalidValue.to_result(), + } + Ok(()) + } + + fn walk_digits<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let mut has_digit = false; + loop { + match input.peek() { + Some(Ok(c @ b'0')) | Some(Ok(c @ b'1')) | Some(Ok(c @ b'2')) + | Some(Ok(c @ b'3')) | Some(Ok(c @ b'4')) | Some(Ok(c @ b'5')) + | Some(Ok(c @ b'6')) | Some(Ok(c @ b'7')) | Some(Ok(c @ b'8')) + | Some(Ok(c @ b'9')) => { + w.write_all(&[*c])?; + input.next(); + has_digit = true; + } + Some(Ok(_)) => break, + Some(Err(_)) => return Err(input.next().unwrap().unwrap_err().into()), + None => break, + } + } + if has_digit { + Ok(()) + } else { + match input.peek() { + Some(_) => SyntaxError::InvalidValue.to_result(), + None => SyntaxError::UnexpectedEof.to_result(), + } + } + } + + fn walk_fraction<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let Some(first) = input.peek() else { + return Ok(()); + }; + let Ok(first) = first else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *first != b'.' { + return Ok(()); + } + w.write_all(b".")?; + input.next(); + self.walk_digits(input, w) + } + + fn walk_exponent<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let Some(first) = input.peek() else { + return Ok(()); + }; + let Ok(first) = first else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *first != b'e' && *first != b'E' { + return Ok(()); + } + w.write_all(&[*first])?; + input.next(); + self.walk_sign(input, w)?; + self.walk_digits(input, w) + } + + fn walk_sign<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + let Some(c) = input.peek() else { + return SyntaxError::UnexpectedEof.to_result(); + }; + let Ok(c) = c else { + return Err(input.next().unwrap().unwrap_err().into()); + }; + if *c == b'+' || *c == b'-' { + w.write_all(&[*c])?; + input.next(); + } + Ok(()) + } + + fn walk_ws<I: Iterator<Item = std::io::Result<u8>>, W: Write>( + &mut self, + input: &mut Peekable<I>, + w: &mut W, + ) -> ParserResult { + loop { + match input.peek() { + Some(Ok(c @ 0x09)) | Some(Ok(c @ 0x0A)) | Some(Ok(c @ 0x0D)) + | Some(Ok(c @ 0x20)) => { + w.write_all(&[*c])?; + input.next(); + } + Some(Ok(_)) => return Ok(()), + Some(Err(_)) => return Err(input.next().unwrap().unwrap_err().into()), + None => return Ok(()), + } + } + } +} diff --git a/src/main.rs b/src/main.rs index 0672e51..c113039 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,76 @@ -fn main() { - println!("Hello, World!"); +use reparojson::{self, RepairErr, RepairOk, RepairResult}; +use std::ffi::{OsStr, OsString}; +use std::fs::File; +use std::io::{stdin, stdout, BufReader, BufWriter, Write}; +use std::process::ExitCode; + +struct Config { + quiet: bool, + file_path: Option<OsString>, +} + +fn parse_args() -> std::io::Result<Config> { + use clap::{arg, command, value_parser}; + + let matches = command!() + .arg(arg!(-q --quiet "Successfully exit if the input JSON is repaired")) + .arg( + arg!([FILE] "The input JSON file (default: STDIN)") + .value_parser(value_parser!(OsString)), + ) + .get_matches(); + + let quiet = matches.get_flag("quiet"); + let file_path = matches.get_one("FILE").cloned(); + Ok(Config { quiet, file_path }) +} + +fn repair(input_file_path: Option<OsString>, mut w: impl Write) -> RepairResult { + match input_file_path.as_ref() { + None => { + let reader = stdin().lock(); + let reader = BufReader::new(reader); + reparojson::repair(reader, &mut w) + } + Some(file_path) => { + if file_path == OsStr::new("-") { + let reader = stdin().lock(); + let reader = BufReader::new(reader); + reparojson::repair(reader, &mut w) + } else { + let reader = File::open(file_path)?; + let reader = BufReader::new(reader); + reparojson::repair(reader, &mut w) + } + } + } +} + +fn main() -> std::io::Result<ExitCode> { + let config = parse_args()?; + + let writer = stdout().lock(); + let mut writer = BufWriter::new(writer); + + let exit_code = match repair(config.file_path, &mut writer) { + Ok(RepairOk::Valid) => ExitCode::SUCCESS, + Ok(RepairOk::Repaired) => { + if config.quiet { + ExitCode::SUCCESS + } else { + ExitCode::from(1) + } + } + Err(RepairErr::Invalid(err)) => { + eprintln!("{}", err); + ExitCode::from(2) + } + Err(RepairErr::IoErr(err)) => { + eprintln!("{}", err); + ExitCode::from(3) + } + }; + + writer.flush()?; + Ok(exit_code) } |
