commit 00bd55874c7eeca1fe8307ee6f440eb6d381993a Author: Jeremy Wall Date: Sun Sep 2 22:10:11 2018 -0500 Initial Commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..626bd18 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/target +**/*.rs.bk + +/target +**/*.rs.bk +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ac4da0f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "abortable-parser" +version = "0.1.0" +authors = ["Jeremy Wall "] + +[dependencies] diff --git a/src/iter.rs b/src/iter.rs new file mode 100644 index 0000000..50af31a --- /dev/null +++ b/src/iter.rs @@ -0,0 +1,51 @@ +//! Contains implementations of `InputIter`. +use std::iter::Iterator; +use std::fmt::Debug; + +use super::InputIter; + +/// Implements `InputIter` for any slice of T. +#[derive(Debug)] +pub struct SliceIter<'a, T: Debug + 'a> { + source: &'a [T], + offset: usize, +} + +impl<'a, T: Debug + 'a> SliceIter<'a, T> { + /// new constructs a SliceIter from a Slice of T. + pub fn new(source: &'a [T]) -> Self { + SliceIter { + source: source, + offset: 0, + } + } +} + +impl<'a, T: Debug + 'a> Iterator for SliceIter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + match self.source.get(self.offset) { + Some(item) => { + self.offset += 1; + Some(item) + }, + None => None, + } + } +} + +impl<'a, T: Debug + 'a> InputIter for SliceIter<'a, T> { + fn get_offset(&self) -> usize { + self.offset + } +} + +impl<'a, T: Debug + 'a> Clone for SliceIter<'a, T> { + fn clone(&self) -> Self { + SliceIter { + source: self.source, + offset: self.offset, + } + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ff73e1f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,64 @@ +//! A parser combinator library with a focus on fully abortable parsing and error handling. +use std::iter::Iterator; + +/// A Cloneable Iterator that can report an offset as a count of processed Items. +pub trait InputIter: Iterator + Clone { + fn get_offset(&self) -> usize; +} + +/// The result of a parsing attempt. +#[derive(Debug)] +pub enum Result { + /// Complete represents a successful match. + Complete(I, O), + /// Incomplete indicates input ended before a match could be completed. + /// It contains the offset at which the input ended before a match could be completed. + Incomplete(usize), + /// Fail represents a failed match. + Fail(E), + /// Abort represents a match failure that the parser cannot recover from. + Abort(E), +} + +impl Result { + /// Returns true if the Result is Complete. + pub fn is_complete(&self) -> bool { + if let &Result::Complete(_, _) = self { + return true; + } + return false; + } + + /// Returns true if the Result is Incomoplete. + pub fn is_incomplete(&self) -> bool { + if let &Result::Incomplete(_) = self { + return true; + } + return false; + } + + /// Returns true if the Result is Fail. + pub fn is_fail(&self) -> bool { + if let &Result::Fail(_) = self { + return true; + } + return false; + } + + /// Returns true if the Result is Abort. + pub fn is_abort(&self) -> bool { + if let &Result::Abort(_) = self { + return true; + } + return false; + } +} + +pub use iter::SliceIter; + +#[macro_use] +pub mod macros; +pub mod iter; + +#[cfg(test)] +mod test; \ No newline at end of file diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..83e0ee5 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,275 @@ +//! Contains the helper macros for abortable-parser. + +/// Converts a function indentifier into a macro call. Useful when writing your own macro combinator. +#[macro_export] +macro_rules! run { + ($i:expr, $f:ident) => { + $f($i) + }; +} + +/// Turns Fails into Aborts. Allows you to turn any parse failure into a hard abort of the parser. +#[macro_export] +macro_rules! must { + ($i:expr, $f:ident!( $( $args:tt )* ) ) => { + match $f!($i, $($args)*) { + $crate::Result::Complete(i, o) => $crate::Result::Complete(i, o), + $crate::Result::Incomplete(offset) => $crate::Result::Incomplete(offset), + $crate::Result::Fail(e) => $crate::Result::Abort(e), + $crate::Result::Abort(e) => $crate::Result::Abort(e), + } + }; + + ($i:expr, $f:ident) => { + must!($i, run!($f)) + }; +} + +/// Turns Aborts into fails allowing you to trap and then convert an Abort into a normal Fail. +#[macro_export] +macro_rules! trap { + ($i:expr, $f:ident!( $( $args:tt )* ) ) => { + match $f!($i, $($args)*) { + $crate::Result::Complete(i, o) => $crate::Result::Complete(i, o), + $crate::Result::Incomplete(offset) => $crate::Result::Incomplete(offset), + $crate::Result::Fail(e) => $crate::Result::Fail(e), + $crate::Result::Abort(e) => $crate::Result::Fail(e), + } + }; + + ($i:expr, $f:ident) => { + trap!($i, run!($f)) + }; +} + +/// Turns Fails and Incompletes into Aborts. It uses an error factory +/// to construct the errors for the Incomplete case. +#[macro_export] +macro_rules! must_complete { + ($i:expr, $efn:expr, $f:ident!( $( $args:tt )* ) ) => { + match $f!($i, $($args)*) { + $crate::Result::Complete(i, o) => $crate::Result::Complete(i, o), + $crate::Result::Incomplete(offset) => $crate::Result::Abort($efn(offset)), + $crate::Result::Fail(e) => $crate::Result::Abort(e), + $crate::Result::Abort(e) => $crate::Result::Abort(e), + } + }; + + ($i:expr, $efn:expr, $f:ident) => { + must_complete!($i, $efn, run!($f)) + }; +} + +/// Captures a sequence of sub parsers output. +#[macro_export] +macro_rules! do_each { + ($i:expr, $val:ident => $f:ident) => { + // This is a compile failure. + compile_error!("do_each! must end with a tuple capturing the results") + }; + + ($i:expr, $val:ident => $f:ident!($( $args:tt )* ), $($rest:tt)* ) => { + // If any single one of these matchers fails then all of them are failures. + match $f!($i, $($args)*) { + $crate::Result::Complete(i, o) => { + let $val = o; + do_each!(i, $($rest)*) + } + $crate::Result::Incomplete(offset) => { + Result::Incomplete(offset) + } + $crate::Result::Fail(e) => Result::Fail(e), + $crate::Result::Abort(e) => Result::Abort(e), + } + }; + + ($i:expr, _ => $f:ident!($( $args:tt )* ), $($rest:tt)* ) => { + // If any single one of these matchers fails then all of them are failures. + match $f!($i, $($args)*) { + $crate::Result::Complete(i, _) => { + do_each!(i, $($rest)*) + } + $crate::Result::Incomplete(offset) => { + Result::Incomplete(offset) + } + $crate::Result::Fail(e) => Result::Fail(e), + $crate::Result::Abort(e) => Result::Abort(e), + } + }; + + ($i:expr, $val:ident => $f:ident, $($rest:tt)* ) => { + // If any single one of these matchers fails then all of them are failures. + do_each!($i, $val => run!($f), $( $rest )* ) + }; + + ($i:expr, _ => $f:ident, $($rest:tt)* ) => { + // If any single one of these matchers fails then all of them are failures. + do_each!($i, _ => run!($f), $( $rest )* ) + }; + + // FIXME(jwall): Make this internal only. + // Our Terminal condition + ($i:expr, ( $($rest:tt)* ) ) => { + Result::Complete($i, ($($rest)*)) + }; +} + +/// Returns the output of the first sub parser to succeed. +#[macro_export] +macro_rules! either { + // Initialization case. + ($i:expr, $f:ident!( $( $args:tt )* ), $( $rest:tt)* ) => { // 0 + either!(__impl $i, $f!( $($args)* ), $($rest)*) + }; + + // Initialization case. + ($i:expr, $f:ident, $($rest:tt)* ) => { // 1 + either!(__impl $i, run!($f), $($rest)*) + }; + + // Initialization failure case. + ($i:expr, $f:ident!( $( $args:tt )* )) => { // 2 + compile_error!("Either requires at least two sub matchers.") + }; + + // Initialization failure case. + ($i:expr, $f:ident) => { // 3 + either!($i, run!($f)) + }; + + // Termination clause + (__impl $i:expr, $f:ident) => { // 4 + either!(__impl $i, run!($f)) + }; + + // Termination clause + (__impl $i:expr, $f:ident,) => { // 5 + either!(__impl $i, run!($f)) + }; + + // Termination clause + (__impl $i:expr, $f:ident!( $( $args:tt )* ),) => { // 6 + either!(__impl $i, $f!($($args)*) __end) + }; + + // Termination clause + (__impl $i:expr, $f:ident!( $( $args:tt )* )) => {{ // 7 + match $f!($i, $($args)*) { + // The first one to match is our result. + $crate::Result::Complete(i, o) => { + Result::Complete(i, o) + } + // Incompletes may still be parseable. + $crate::Result::Incomplete(i) => { + Result::Incomplete(i) + } + // Fail means it didn't match so we are now done. + $crate::Result::Fail(e) => { + $crate::Result::Fail(e) + }, + // Aborts are hard failures that the parser can't recover from. + $crate::Result::Abort(e) => Result::Abort(e), + } + }}; + + // Internal Loop Implementation + (__impl $i:expr, $f:ident!( $( $args:tt )* ), $( $rest:tt )* ) => {{ // 8 + let _i = $i.clone(); + match $f!($i, $($args)*) { + // The first one to match is our result. + $crate::Result::Complete(i, o) => { + Result::Complete(i, o) + } + // Incompletes may still be parseable. + $crate::Result::Incomplete(i) => { + Result::Incomplete(i) + } + // Fail means it didn't match so continue to next one. + $crate::Result::Fail(_) => { + either!(__impl _i, $($rest)*) + }, + // Aborts are hard failures that the parser can't recover from. + $crate::Result::Abort(e) => Result::Abort(e), + } + }}; + + // Internal Loop Implementation + (__impl $i:expr, $f:ident, $( $rest:tt )* ) => { // 9 + either!(__impl $i, run!($f), $( $rest )* ) + } +} + +/// Treats a sub parser as optional. It returns Some(output) for a successful match +/// and None for Fails. +#[macro_export] +macro_rules! optional { + ($i:expr, $f:ident) => { + optional!(__impl $i, run!($f)) + }; + + ($i:expr, $f:ident!( $( $args:tt )* ) ) => { + optional!(__impl $i, $f!( $( $args )* )) + }; + + (__impl $i:expr, $f:ident!( $( $args:tt )* )) => {{ + let _i = $i.clone(); + match $f!($i, $($args)*) { + $crate::Result::Complete(i, o) => { + Result::Complete(i, Some(o)) + } + // Incomplete could still work possibly parse. + $crate::Result::Incomplete(i) => { + Result::Incomplete(i) + } + // Fail just means it didn't match. + $crate::Result::Fail(_) => { + Result::Complete(_i, None) + }, + // Aborts are hard failures that the parser can't recover from. + $crate::Result::Abort(e) => Result::Abort(e), + } + }}; +} + +/// Runs a single parser repeating 0 or mre times and returns a possibly empty +/// vector of the parsed results. +#[macro_export] +macro_rules! repeat { + ($i:expr, $f:ident!( $( $args:tt )* ) ) => {{ + let mut _i = $i.clone(); + let mut seq = Vec::new(); + let mut opt_error = None; + loop { + let __i = _i.clone(); + match $f!(_i, $($args)*) { + $crate::Result::Complete(i, o) => { + seq.push(o); + _i = i; + } + // Aborts are always a hard fail. + $crate::Result::Abort(e) => { + opt_error = Some($crate::Result::Abort(e)); + _i = $i.clone(); + break; + } + // Everything else just means we are finished parsing. + $crate::Result::Incomplete(_) => { + _i = __i; + break; + } + $crate::Result::Fail(_) => { + _i = __i; + break; + } + } + } + match opt_error { + Some(e) => e, + None => $crate::Result::Complete(_i, seq), + } + }}; + + ($i:expr, $f:ident) => { + repeat!($i, run!($f)) + }; +} \ No newline at end of file diff --git a/src/test.rs b/src/test.rs new file mode 100644 index 0000000..b6e3d1f --- /dev/null +++ b/src/test.rs @@ -0,0 +1,269 @@ +use super::iter::SliceIter; +use super::{Result, InputIter}; + +#[test] +fn test_slice_iter() { + let input_str = "foo"; + let mut iter = SliceIter::new(input_str.as_bytes()); + let cloned = iter.clone(); + assert_eq!(0, iter.get_offset()); + let mut out = Vec::new(); + loop { + let b = match iter.next() { + None => break, + Some(b) => b, + }; + out.push(b.clone()); + } + assert_eq!(3, out.len()); + assert_eq!('f' as u8, out[0]); + assert_eq!('o' as u8, out[1]); + assert_eq!('o' as u8, out[2]); + assert_eq!(3, iter.get_offset()); + + out = Vec::new(); + for b in cloned { + out.push(b.clone()); + } + assert_eq!(3, out.len()); + assert_eq!('f' as u8, out[0]); + assert_eq!('o' as u8, out[1]); + assert_eq!('o' as u8, out[2]); +} + +fn will_fail(_: SliceIter) -> Result, String, String> { + Result::Fail("AAAAHHH!!!".to_string()) +} + +fn parse_byte(mut i: SliceIter) -> Result, u8, String> { + match i.next() { + Some(b) => Result::Complete(i, *b), + None => Result::Incomplete(i.get_offset()), + } +} + +fn will_not_complete(_: SliceIter) -> Result, String, String> { + Result::Incomplete(0) +} + +fn parse_three(i: SliceIter) -> Result, String, String> { + let mut _i = i.clone(); + let mut out = String::new(); + loop { + let b = match _i.next() { + None => break, + Some(b) => *b, + }; + out.push(b as char); + if out.len() == 3 { + break; + } + } + if out.len() != 3 { + Result::Incomplete(_i.get_offset()) + } else { + Result::Complete(_i, out) + } +} + +#[test] +fn test_must_fails() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = must!(iter, will_fail); + assert!(result.is_abort()); +} + +#[test] +fn test_must_succeed() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = must!(iter, parse_byte); + assert!(result.is_complete()); +} + +#[test] +fn test_trap_abort() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = trap!(iter, must!(will_fail)); + assert!(result.is_fail(), format!("{:?}", result)); +} + +#[test] +fn test_trap_incomplete() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = trap!(iter, will_not_complete); + assert!(result.is_incomplete(), format!("{:?}", result)); +} + +#[test] +fn test_trap_fail() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = trap!(iter, will_fail); + assert!(result.is_fail(), format!("{:?}", result)); +} + +#[test] +fn test_trap_complete() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = trap!(iter, parse_byte); + assert!(result.is_complete(), format!("{:?}", result)); +} + +#[test] +fn test_must_complete() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let iter_fail = iter.clone(); + let mut result = must_complete!(iter, |_| "AHHH".to_string(), will_not_complete); + assert!(result.is_abort()); + result = must_complete!(iter_fail, |_| "AHHH".to_string(), will_fail); + assert!(result.is_abort()); +} + +#[test] +fn test_do_each() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = do_each!(iter, + b1 => parse_byte, + _ => parse_byte, + b3 => parse_byte, + (b1, b3) + ); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!('f' as u8, o.0); + assert_eq!('o' as u8, o.1); + } else { + assert!(false, "did not get a tuple of 2 items"); + } +} + +#[test] +fn test_either_idents() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = either!(iter, + will_fail, + will_fail, + parse_three); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!("foo".to_string(), o); + } else { + assert!(false, "Didn't not successfully match"); + } +} + +#[test] +fn test_either_macros() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = either!(iter, + run!(will_fail), + run!(will_fail), + run!(parse_three)); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!("foo".to_string(), o); + } else { + assert!(false, "Didn't successfully match"); + } +} + +#[test] +fn test_either_fail() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = either!(iter, + run!(will_fail), + run!(will_fail)); + assert!(result.is_fail()); +} + +#[test] +fn test_either_abort() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = either!(iter, + must!(will_fail), + parse_three, + run!(will_fail)); + assert!(result.is_abort()); +} + +#[test] +fn test_optional_some() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = optional!(iter, parse_byte); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!('f' as u8, o.unwrap()); + } else { + assert!(false, "optional! did not complete"); + } +} + +#[test] +fn test_optional_none() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = optional!(iter, will_fail); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert!(o.is_none(), "output was not none"); + } else { + assert!(false, "optional! did not complete"); + } +} + +#[test] +fn test_optional_abort() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = optional!(iter, must!(will_fail)); + assert!(result.is_abort(), "optional did not abort"); +} + +#[test] +fn test_repeat() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = repeat!(iter, parse_byte); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!(3, o.len()); + assert_eq!('f' as u8, o[0]); + assert_eq!('o' as u8, o[1]); + assert_eq!('o' as u8, o[2]); + } else { + assert!(false, "repeat did not parse succesfully"); + } +} + +#[test] +fn test_repeat_fail() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = repeat!(iter, will_fail); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!(0, o.len()); + } else { + assert!(false, "repeat did not parse succesfully"); + } +} + +#[test] +fn test_repeat_abort() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = repeat!(iter, must!(will_fail)); + assert!(result.is_abort()); +} \ No newline at end of file