diff --git a/src/combinators.rs b/src/combinators.rs index 238423a..dcc2a89 100644 --- a/src/combinators.rs +++ b/src/combinators.rs @@ -13,7 +13,6 @@ // limitations under the License. //! Contains combinators that can assemble other matchers or combinators into more complex grammars. - use super::{Error, InputIter, Result}; /// Turns a `Result` to it's inverse. @@ -176,8 +175,8 @@ macro_rules! wrap_err { match $f!($i, $($args)*) { $crate::Result::Complete(i, o) => $crate::Result::Complete(i, o), $crate::Result::Incomplete(offset) => $crate::Result::Incomplete(offset), - $crate::Result::Fail(e) => $crate::Result::Fail($crate::Error::caused_by($e, &_i, e)), - $crate::Result::Abort(e) => $crate::Result::Abort($crate::Error::caused_by($e, &_i, e)), + $crate::Result::Fail(e) => $crate::Result::Fail($crate::Error::caused_by($e, &_i, Box::new(e))), + $crate::Result::Abort(e) => $crate::Result::Abort($crate::Error::caused_by($e, &_i, Box::new(e))), } }}; @@ -237,10 +236,9 @@ macro_rules! trap { /// You must specify the error message to use in case the matcher is incomplete. /// /// The must_complete! macro provides syntactic sugar for using this combinator. -pub fn must_complete(result: Result, msg: M) -> Result +pub fn must_complete(result: Result, msg: String) -> Result where I: InputIter, - M: Into, { match result { Result::Complete(i, o) => Result::Complete(i, o), @@ -250,6 +248,32 @@ where } } +/// Turns `Result::Incomplete` into `Result::Fail`. +pub fn complete(result: Result, msg: S) -> Result +where + I: InputIter, + S: Into, +{ + match result { + Result::Incomplete(offset) => Result::Fail(Error::new(msg.into(), &offset)), + Result::Complete(i, o) => Result::Complete(i, o), + Result::Fail(e) => Result::Fail(e), + Result::Abort(e) => Result::Abort(e), + } +} + +/// Turns `Result::Incomplete` into `Result::Fail`. +#[macro_export] +macro_rules! complete { + ($i:expr, $e:expr, $f:ident!( $( $args:tt )* ) ) => { + $crate::combinators::complete($f!($i, $($args)*), $e) + }; + + ($i:expr, $efn:expr, $f:ident) => { + complete!($i, $efn, run!($f)) + }; +} + /// Turns `Result::Fail` and `Result::Incomplete` into `Result::Abort`. /// /// You must specify the error message to use in case the matcher is incomplete. @@ -594,6 +618,53 @@ macro_rules! repeat { }; } +#[macro_export] +macro_rules! separated { + ($i:expr, $sep_rule:ident!( $( $sep_args:tt )* ), $item_rule:ident!( $( $item_args:tt )* ) ) => {{ + use $crate::Result; + let _i = $i.clone(); + // We require at least one item for our list + let head = $item_rule!($i.clone(), $($item_args)*); + match head { + Result::Incomplete(offset) => Result::Incomplete(offset), + Result::Fail(e) => Result::Fail(e), + Result::Abort(e) => Result::Abort(e), + Result::Complete(i,item) => { + let mut list = vec![item]; + // Now we parse a repeat of sep_rule and item_rule. + let tail_result = repeat!(i, + do_each!( + _ => $sep_rule!($($sep_args)*), + item => $item_rule!($($item_args)*), + (item) + ) + ); + match tail_result { + Result::Fail(e) => Result::Fail(e), + Result::Incomplete(offset) => Result::Incomplete(offset), + Result::Abort(e) => Result::Abort(e), + Result::Complete(i, mut tail) => { + list.extend(tail.drain(0..)); + Result::Complete(i, list) + } + } + } + } + }}; + + ($i:expr, $sep_rule:ident, $item_rule:ident ) => { + separated!($i, run!($sep_rule), run!($item_rule)) + }; + + ($i:expr, $sep_rule:ident!( $( $args:tt )* ), $item_rule:ident ) => { + separated!($i, $sep_rule!($($args)*), run!($item_rule)) + }; + + ($i:expr, $sep_rule:ident, $item_rule:ident!( $( $args:tt )* ) ) => { + separated!($i, run!($sep_rule), $item_rule!($($args)*)) + }; +} + /// Convenience macro for looking for a specific text token in a byte input stream. /// /// ``` @@ -683,7 +754,7 @@ macro_rules! until { }}; ($i:expr, $rule:ident) => { - consume_until!($i, run!($rule)) + until!($i, run!($rule)) }; } @@ -709,14 +780,14 @@ macro_rules! discard { /// Matches and returns any ascii charactar whitespace byte. pub fn ascii_ws<'a, I: InputIter>(mut i: I) -> Result { match i.next() { - Some(b) => match b { - b'\r' => Result::Complete(i, *b), - b'\n' => Result::Complete(i, *b), - b'\t' => Result::Complete(i, *b), - b' ' => Result::Complete(i, *b), - _ => Result::Fail(Error::new("Not whitespace", &i)), + Some(b) => { + if (*b as char).is_whitespace() { + Result::Complete(i, *b) + } else { + Result::Fail(Error::new("Not whitespace".to_string(), &i)) + } }, - None => Result::Fail(Error::new("Unexpected End Of Input", &i)), + None => Result::Fail(Error::new("Unexpected End Of Input".to_string(), &i)), } } @@ -725,7 +796,7 @@ pub fn ascii_ws<'a, I: InputIter>(mut i: I) -> Result { pub fn eoi(i: I) -> Result { let mut _i = i.clone(); match _i.next() { - Some(_) => Result::Fail(Error::new("Expected End Of Input", &i)), + Some(_) => Result::Fail(Error::new("Expected End Of Input".to_string(), &i)), None => Result::Complete(i, ()), } } @@ -753,34 +824,138 @@ pub fn eoi(i: I) -> Result { #[macro_export] macro_rules! make_fn { ($name:ident<$i:ty, $o:ty>, $rule:ident!($( $body:tt )* )) => { - fn $name(i: $i) -> $crate::Result<$i,$o> { + fn $name(i: $i) -> $crate::Result<$i, $o> { $rule!(i, $($body)*) } }; (pub $name:ident<$i:ty, $o:ty>, $rule:ident!($( $body:tt )* )) => { - pub fn $name(i: $i) -> $crate::Result<$i,$o> { + pub fn $name(i: $i) -> $crate::Result<$i, $o> { $rule!(i, $($body)*) } }; ($name:ident<$i:ty, $o:ty>, $rule:ident) => { - make_fn!($name<$i, $o>, run!($rule)) + make_fn!($name<$i, $o>, run!($rule)); }; (pub $name:ident<$i:ty, $o:ty>, $rule:ident) => { - make_fn!(pub $name<$i, $o>, run!($rule)) + make_fn!(pub $name<$i, $o>, run!($rule)); + }; +} + +/// Helper macro that returns the input without consuming it. +/// +/// Useful when you need to get the input and use it to retrieve +/// positional information like offset or line and column. +#[macro_export] +macro_rules! input { + ($i:expr) => { + input!($i,) }; -} - -/// For inputs that implement the TextPositionTracker trait returns the current -/// line and column position for this input. -#[macro_export] -macro_rules! pos { - ($i:expr) => {{ + ($i:expr,) => {{ let _i = $i.clone(); - use $crate::TextPositionTracker; - $crate::Result::Complete($i, (_i.line(), _i.column())) + $crate::Result::Complete($i, _i) }}; } + +/// Consumes the input until the $rule fails and then returns the consumed input as +/// a slice. +/// +/// ``` +/// # #[macro_use] extern crate abortable_parser; +/// use abortable_parser::iter; +/// # use abortable_parser::{Result, Offsetable}; +/// # use abortable_parser::combinators::ascii_alpha; +/// use std::convert::From; +/// # fn main() { +/// let iter: iter::StrIter = "foo;".into(); +/// let tok = consume_all!(iter, ascii_alpha); +/// # assert!(tok.is_complete()); +/// if let Result::Complete(i, o) = tok { +/// assert_eq!(i.get_offset(), 3); +/// assert_eq!(o, "foo"); +/// } +/// # } +/// ``` +#[macro_export] +macro_rules! consume_all { + ($i:expr, $rule:ident!( $( $args:tt )* ) ) => {{ + use $crate::{Result, Offsetable, Span, SpanRange}; + let start_offset = $i.get_offset(); + let mut _i = $i.clone(); + let pfn = || { + loop { + match $rule!(_i.clone(), $($args)*) { + Result::Complete(_, _) => { + // noop + }, + Result::Abort(e) => return Result::Abort(e), + Result::Incomplete(offset) => return Result::Incomplete(offset), + Result::Fail(_) => { + let range = SpanRange::Range(start_offset.._i.get_offset()); + return Result::Complete(_i, $i.span(range)); + } + } + if let None = _i.next() { + return Result::Incomplete(_i.get_offset()); + } + } + }; + pfn() + }}; + + ($i:expr, $rule:ident) => { + consume_all!($i, run!($rule)) + } +} + +/// ascii_digit parses a single ascii alphabetic or digit character from an InputIter of bytes. +#[inline(always)] +pub fn ascii_alphanumeric<'a, I: InputIter>(mut i: I) -> Result { + match i.next() { + Some(b) => { + let c = *b as char; + if c.is_ascii_alphabetic() || c.is_ascii_digit() { + Result::Complete(i, *b) + } else { + Result::Fail(Error::new("Not an alphanumeric character".to_string(), &i)) + } + }, + None => Result::Fail(Error::new("Unexpected End Of Input.".to_string(), &i)), + } +} + +/// ascii_digit parses a single ascii digit character from an InputIter of bytes. +#[inline(always)] +pub fn ascii_digit<'a, I: InputIter>(mut i: I) -> Result { + match i.next() { + Some(b) => { + if (*b as char).is_ascii_digit() { + Result::Complete(i, *b) + } else { + Result::Fail(Error::new("Not an digit character".to_string(), &i)) + } + }, + None => Result::Fail(Error::new("Unexpected End Of Input.".to_string(), &i)), + } +} + +/// ascii_alpha parses a single ascii alphabet character from an InputIter of bytes. +#[inline(always)] +pub fn ascii_alpha<'a, I: InputIter>(mut i: I) -> Result { + match i.next() { + Some(b) => { + if (*b as char).is_ascii_alphabetic() { + Result::Complete(i, *b) + } else { + Result::Fail(Error::new("Not an alpha character".to_string(), &i)) + } + }, + None => Result::Fail(Error::new("Unexpected End Of Input.".to_string(), &i)), + } +} + +// TODO(jwall): We need a helper to convert Optional into failures. +// TODO(jwall): We need a helper to convert std::result::Result into failures. \ No newline at end of file diff --git a/src/integration_tests.rs b/src/integration_tests.rs index b1db1d2..9543566 100644 --- a/src/integration_tests.rs +++ b/src/integration_tests.rs @@ -11,9 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - use super::{ascii_ws, eoi, Result}; -use iter::StrIter; +use iter::{StrIter, SliceIter}; make_fn!(proto, do_each!( @@ -34,8 +33,25 @@ make_fn!(path, until!(either!(discard!(ascii_ws), eoi)) ); +make_fn!(sliceit, ()>, + do_each!( + _ => input!(), + end_of_input => eoi, + (end_of_input) + ) +); + +make_fn!(long_string_path, ()>, + do_each!( + _ => input!(), + end_of_input => eoi, + (end_of_input) + ) +); + make_fn!(pub url, Option<&str>, &str)>, do_each!( + _ => input!(), protocol => optional!(proto), domain => optional!(domain), path => path, @@ -54,3 +70,17 @@ fn test_url_parser() { assert_eq!(path, "/some/path"); } } + +#[test] +fn test_slice_iter_make_fn() { + let iter = SliceIter::from("yo!"); + let result = sliceit(iter); + assert!(result.is_fail()); +} + +#[test] +fn test_slice_iter_make_fn_long_error_path() { + let iter = SliceIter::from("yo!"); + let result = long_string_path(iter); + assert!(result.is_fail()); +} diff --git a/src/iter.rs b/src/iter.rs index 56689af..98def77 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -16,7 +16,7 @@ use std::fmt::Debug; use std::iter::Iterator; -use super::{InputIter, Offsetable, Span, SpanRange, TextPositionTracker}; +use super::{InputIter, Offsetable, Seekable, Span, SpanRange, TextPositionTracker}; /// Implements `InputIter` for any slice of T. #[derive(Debug)] @@ -95,6 +95,12 @@ impl<'a, T: Debug> From<&'a Vec> for SliceIter<'a, T> { } } +impl<'a, O: Debug> Peekable<&'a O> for SliceIter<'a, O> { + fn peek_next(&self) -> Option<&'a O> { + self.source.get(self.offset) + } +} + /// Implements `InputIter` for any slice of T. #[derive(Debug)] pub struct StrIter<'a> { @@ -184,3 +190,24 @@ impl<'a> Span<&'a str> for StrIter<'a> { } } } + +impl<'a> Seekable for StrIter<'a> { + fn seek(&mut self, to: usize) -> usize { + let self_len = self.source.len(); + let offset = if self_len > to { + to + } else { + self_len + }; + self.offset = offset; + self.offset + } +} + +use super::Peekable; + +impl<'a> Peekable<&'a u8> for StrIter<'a> { + fn peek_next(&self) -> Option<&'a u8> { + self.source.as_bytes().get(self.offset) + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 2137f32..97108cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -104,6 +104,7 @@ //! ``` use std::fmt::Display; use std::iter::Iterator; +use std::result; /// A trait for types that can have an offset as a count of processed items. pub trait Offsetable { @@ -116,6 +117,10 @@ impl Offsetable for usize { } } +pub trait Seekable { + fn seek(&mut self, usize) -> usize; +} + /// Trait for Inputs that can track lines and columns in a text input. pub trait TextPositionTracker { fn line(&self) -> usize; @@ -135,6 +140,10 @@ pub trait Span { fn span(&self, idx: SpanRange) -> O; } +pub trait Peekable { + fn peek_next(&self) -> Option; +} + /// A Cloneable Iterator that can report an offset as a count of processed Items. pub trait InputIter: Iterator + Clone + Offsetable {} @@ -142,7 +151,8 @@ pub trait InputIter: Iterator + Clone + Offsetable {} /// Stores a wrapped err that must implement Display as well as an offset and /// an optional cause. #[derive(Debug)] -pub struct Error { +pub struct Error +{ msg: String, offset: usize, cause: Option>, @@ -150,10 +160,9 @@ pub struct Error { impl Error { /// Constructs a new Error with an offset and no cause. - pub fn new(msg: M, offset: &S) -> Self + pub fn new>(msg: D, offset: &S) -> Self where S: Offsetable, - M: Into, { Error { msg: msg.into(), @@ -163,28 +172,27 @@ impl Error { } /// Constructs a new Error with an offset and a cause. - pub fn caused_by(msg: M, offset: &S, cause: Self) -> Self + pub fn caused_by<'a, S, D: Into>(msg: D, offset: &'a S, cause: Box) -> Self where S: Offsetable, - M: Into, { Error { msg: msg.into(), offset: offset.get_offset(), - cause: Some(Box::new(cause)), + cause: Some(cause), } } - /// Returns the contained err. - pub fn get_msg<'a>(&'a self) -> &'a str { - &self.msg + /// Returns the msg. + pub fn get_msg<'a>(&'a self) -> String { + format!("{}", &self.msg) } /// Returns `Some(cause)` if there is one, None otherwise. pub fn get_cause<'a>(&'a self) -> Option<&'a Error> { match self.cause { - Some(ref cause) => Some(cause), - None => None, + Some(ref e) => Some(e), + None => None } } @@ -195,7 +203,7 @@ impl Error { } impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> result::Result<(), std::fmt::Error> { try!(write!(f, "{}", self.msg)); match self.cause { Some(ref c) => write!(f, "\n\tCaused By:{}", c), @@ -204,9 +212,12 @@ impl Display for Error { } } +impl std::error::Error for Error {} + /// The result of a parsing attempt. #[derive(Debug)] -pub enum Result { +pub enum Result +{ /// Complete represents a successful match. Complete(I, O), /// Incomplete indicates input ended before a match could be completed. @@ -218,7 +229,8 @@ pub enum Result { Abort(Error), } -impl Result { +impl Result +{ /// Returns true if the Result is Complete. pub fn is_complete(&self) -> bool { if let &Result::Complete(_, _) = self { @@ -254,6 +266,7 @@ impl Result { pub use combinators::*; pub use iter::SliceIter; +pub use iter::StrIter; #[macro_use] pub mod combinators; diff --git a/src/test.rs b/src/test.rs index 87764a9..cb57892 100644 --- a/src/test.rs +++ b/src/test.rs @@ -14,7 +14,7 @@ use std::fmt::{Debug, Display}; -use super::{InputIter, Offsetable, Result}; +use super::{InputIter, Offsetable, Result, TextPositionTracker}; use combinators::*; use iter::{SliceIter, StrIter}; @@ -252,6 +252,23 @@ fn test_do_each() { } } +#[test] +fn test_do_each_input_and_token() { + let input_str = "foo"; + let iter = SliceIter::new(input_str.as_bytes()); + let result = do_each!(iter, + _ => input!(), + token => text_token!("foo"), + (token) + ); + assert!(result.is_complete()); + if let Result::Complete(_, o) = result { + assert_eq!("foo", o); + } else { + assert!(false, "did not get our token"); + } +} + #[test] fn test_either_idents() { let input_str = "foo"; @@ -477,7 +494,6 @@ fn test_ascii_ws_carriage_return() { assert!(result.is_complete()); } -use super::TextPositionTracker; #[test] fn test_position_tracking_striter() { let input_str = "\n"; @@ -487,10 +503,146 @@ fn test_position_tracking_striter() { iter.next(); assert_eq!(iter.line(), 2); assert_eq!(iter.column(), 1); - let pos_result = pos!(iter); + let pos_result: Result = input!(iter); assert!(pos_result.is_complete()); - if let Result::Complete(_, (line, column)) = pos_result { - assert_eq!(line, 2); - assert_eq!(column, 1); + if let Result::Complete(_, i) = pos_result { + assert_eq!(i.line(), 2); + assert_eq!(i.column(), 1); } } + +#[test] +fn test_separated_good() { + let input_str = "foo,foo,foo"; + let iter = StrIter::new(input_str); + let result = separated!(iter, text_token!(","), text_token!("foo")); + assert!(result.is_complete()); + if let Result::Complete(_, list) = result { + assert_eq!(list.len(), 3); + assert_eq!(list[0], "foo"); + assert_eq!(list[1], "foo"); + assert_eq!(list[2], "foo"); + } +} + +#[test] +fn test_separated_single_item() { + let input_str = "foo"; + let iter = StrIter::new(input_str); + let result = separated!(iter, text_token!(","), text_token!("foo")); + assert!(result.is_complete()); + if let Result::Complete(_, list) = result { + assert_eq!(list.len(), 1); + assert_eq!(list[0], "foo"); + } +} + +#[test] +fn test_separated_empty_list() { + let input_str = ""; + let iter = StrIter::new(input_str); + let result = separated!(iter, text_token!(","), text_token!("foo")); + assert!(result.is_fail()); +} + +#[test] +fn test_separated_bad() { + let input_str = "bar foo,foo"; + let iter = StrIter::new(input_str); + let result = separated!(iter, text_token!(","), text_token!("foo")); + assert!(result.is_fail()); +} + +#[test] +fn test_separated_trailing_comma() { + let input_str = "foo,foo,foo,"; + let iter = StrIter::new(input_str); + let result = separated!(iter, text_token!(","), text_token!("foo")); + assert!(result.is_complete()); + if let Result::Complete(i, list) = result { + assert_eq!(list.len(), 3); + assert_eq!(list[0], "foo"); + assert_eq!(list[1], "foo"); + assert_eq!(list[2], "foo"); + assert!(text_token!(i, ",").is_complete()); + } +} + +#[test] +fn test_ascii_alphanumeric() { + let input_str = "a1"; + let iter = StrIter::new(input_str); + let result = repeat!(iter, ascii_alphanumeric); + assert!(result.is_complete()); + if let Result::Complete(i,list) = result { + assert_eq!(list.len(), 2); + assert_eq!(list[0], b'a'); + assert_eq!(list[1], b'1'); + assert!(eoi(i).is_complete()); + } +} + +#[test] +fn test_ascii_alphanumeric_fail() { + let input_str = "-"; + let iter = StrIter::new(input_str); + let result = ascii_alphanumeric(iter); + assert!(result.is_fail()); +} + +#[test] +fn test_ascii_digit() { + let input_str = "12"; + let iter = StrIter::new(input_str); + let result = repeat!(iter, ascii_digit); + assert!(result.is_complete()); + if let Result::Complete(i,list) = result { + assert_eq!(list.len(), 2); + assert_eq!(list[0], b'1'); + assert_eq!(list[1], b'2'); + assert!(eoi(i).is_complete()); + } +} + +#[test] +fn test_ascii_digit_fail() { + let input_str = "-"; + let iter = StrIter::new(input_str); + let result = ascii_digit(iter); + assert!(result.is_fail()); +} + +#[test] +fn test_ascii_alpha() { + let input_str = "ab"; + let iter = StrIter::new(input_str); + let result = repeat!(iter, ascii_alpha); + assert!(result.is_complete()); + if let Result::Complete(i,list) = result { + assert_eq!(list.len(), 2); + assert_eq!(list[0], b'a'); + assert_eq!(list[1], b'b'); + assert!(eoi(i).is_complete()); + } +} + +#[test] +fn test_ascii_alpha_fail() { + let input_str = "1"; + let iter = StrIter::new(input_str); + let result = ascii_alpha(iter); + assert!(result.is_fail()); +} + +#[test] +fn test_consume_all() { + let input_str = "foo;"; + let iter = StrIter::new(input_str); + let result = consume_all!(iter, ascii_alpha); + assert!(result.is_complete()); + if let Result::Complete(i, o) = result { + assert_eq!(i.get_offset(), 3); + assert_eq!(o, "foo"); + assert!(text_token!(i, ";").is_complete()); + } +} \ No newline at end of file