FEATURE: A mass of bugfixes and ergonomic improvements.

This commit is contained in:
Jeremy Wall 2018-09-24 22:04:16 -05:00
parent 79ee6841e4
commit d9de070873
5 changed files with 447 additions and 50 deletions

View File

@ -13,7 +13,6 @@
// limitations under the License.
//! Contains combinators that can assemble other matchers or combinators into more complex grammars.
use super::{Error, InputIter, Result};
/// Turns a `Result` to it's inverse.
@ -176,8 +175,8 @@ macro_rules! wrap_err {
match $f!($i, $($args)*) {
$crate::Result::Complete(i, o) => $crate::Result::Complete(i, o),
$crate::Result::Incomplete(offset) => $crate::Result::Incomplete(offset),
$crate::Result::Fail(e) => $crate::Result::Fail($crate::Error::caused_by($e, &_i, e)),
$crate::Result::Abort(e) => $crate::Result::Abort($crate::Error::caused_by($e, &_i, e)),
$crate::Result::Fail(e) => $crate::Result::Fail($crate::Error::caused_by($e, &_i, Box::new(e))),
$crate::Result::Abort(e) => $crate::Result::Abort($crate::Error::caused_by($e, &_i, Box::new(e))),
}
}};
@ -237,10 +236,9 @@ macro_rules! trap {
/// You must specify the error message to use in case the matcher is incomplete.
///
/// The must_complete! macro provides syntactic sugar for using this combinator.
pub fn must_complete<I, O, M>(result: Result<I, O>, msg: M) -> Result<I, O>
pub fn must_complete<I, O>(result: Result<I, O>, msg: String) -> Result<I, O>
where
I: InputIter,
M: Into<String>,
{
match result {
Result::Complete(i, o) => Result::Complete(i, o),
@ -250,6 +248,32 @@ where
}
}
/// Turns `Result::Incomplete` into `Result::Fail`.
pub fn complete<I, O, S>(result: Result<I, O>, msg: S) -> Result<I, O>
where
I: InputIter,
S: Into<String>,
{
match result {
Result::Incomplete(offset) => Result::Fail(Error::new(msg.into(), &offset)),
Result::Complete(i, o) => Result::Complete(i, o),
Result::Fail(e) => Result::Fail(e),
Result::Abort(e) => Result::Abort(e),
}
}
/// Turns `Result::Incomplete` into `Result::Fail`.
#[macro_export]
macro_rules! complete {
($i:expr, $e:expr, $f:ident!( $( $args:tt )* ) ) => {
$crate::combinators::complete($f!($i, $($args)*), $e)
};
($i:expr, $efn:expr, $f:ident) => {
complete!($i, $efn, run!($f))
};
}
/// Turns `Result::Fail` and `Result::Incomplete` into `Result::Abort`.
///
/// You must specify the error message to use in case the matcher is incomplete.
@ -594,6 +618,53 @@ macro_rules! repeat {
};
}
#[macro_export]
macro_rules! separated {
($i:expr, $sep_rule:ident!( $( $sep_args:tt )* ), $item_rule:ident!( $( $item_args:tt )* ) ) => {{
use $crate::Result;
let _i = $i.clone();
// We require at least one item for our list
let head = $item_rule!($i.clone(), $($item_args)*);
match head {
Result::Incomplete(offset) => Result::Incomplete(offset),
Result::Fail(e) => Result::Fail(e),
Result::Abort(e) => Result::Abort(e),
Result::Complete(i,item) => {
let mut list = vec![item];
// Now we parse a repeat of sep_rule and item_rule.
let tail_result = repeat!(i,
do_each!(
_ => $sep_rule!($($sep_args)*),
item => $item_rule!($($item_args)*),
(item)
)
);
match tail_result {
Result::Fail(e) => Result::Fail(e),
Result::Incomplete(offset) => Result::Incomplete(offset),
Result::Abort(e) => Result::Abort(e),
Result::Complete(i, mut tail) => {
list.extend(tail.drain(0..));
Result::Complete(i, list)
}
}
}
}
}};
($i:expr, $sep_rule:ident, $item_rule:ident ) => {
separated!($i, run!($sep_rule), run!($item_rule))
};
($i:expr, $sep_rule:ident!( $( $args:tt )* ), $item_rule:ident ) => {
separated!($i, $sep_rule!($($args)*), run!($item_rule))
};
($i:expr, $sep_rule:ident, $item_rule:ident!( $( $args:tt )* ) ) => {
separated!($i, run!($sep_rule), $item_rule!($($args)*))
};
}
/// Convenience macro for looking for a specific text token in a byte input stream.
///
/// ```
@ -683,7 +754,7 @@ macro_rules! until {
}};
($i:expr, $rule:ident) => {
consume_until!($i, run!($rule))
until!($i, run!($rule))
};
}
@ -709,14 +780,14 @@ macro_rules! discard {
/// Matches and returns any ascii charactar whitespace byte.
pub fn ascii_ws<'a, I: InputIter<Item = &'a u8>>(mut i: I) -> Result<I, u8> {
match i.next() {
Some(b) => match b {
b'\r' => Result::Complete(i, *b),
b'\n' => Result::Complete(i, *b),
b'\t' => Result::Complete(i, *b),
b' ' => Result::Complete(i, *b),
_ => Result::Fail(Error::new("Not whitespace", &i)),
Some(b) => {
if (*b as char).is_whitespace() {
Result::Complete(i, *b)
} else {
Result::Fail(Error::new("Not whitespace".to_string(), &i))
}
},
None => Result::Fail(Error::new("Unexpected End Of Input", &i)),
None => Result::Fail(Error::new("Unexpected End Of Input".to_string(), &i)),
}
}
@ -725,7 +796,7 @@ pub fn ascii_ws<'a, I: InputIter<Item = &'a u8>>(mut i: I) -> Result<I, u8> {
pub fn eoi<I: InputIter>(i: I) -> Result<I, ()> {
let mut _i = i.clone();
match _i.next() {
Some(_) => Result::Fail(Error::new("Expected End Of Input", &i)),
Some(_) => Result::Fail(Error::new("Expected End Of Input".to_string(), &i)),
None => Result::Complete(i, ()),
}
}
@ -753,34 +824,138 @@ pub fn eoi<I: InputIter>(i: I) -> Result<I, ()> {
#[macro_export]
macro_rules! make_fn {
($name:ident<$i:ty, $o:ty>, $rule:ident!($( $body:tt )* )) => {
fn $name(i: $i) -> $crate::Result<$i,$o> {
fn $name(i: $i) -> $crate::Result<$i, $o> {
$rule!(i, $($body)*)
}
};
(pub $name:ident<$i:ty, $o:ty>, $rule:ident!($( $body:tt )* )) => {
pub fn $name(i: $i) -> $crate::Result<$i,$o> {
pub fn $name(i: $i) -> $crate::Result<$i, $o> {
$rule!(i, $($body)*)
}
};
($name:ident<$i:ty, $o:ty>, $rule:ident) => {
make_fn!($name<$i, $o>, run!($rule))
make_fn!($name<$i, $o>, run!($rule));
};
(pub $name:ident<$i:ty, $o:ty>, $rule:ident) => {
make_fn!(pub $name<$i, $o>, run!($rule))
make_fn!(pub $name<$i, $o>, run!($rule));
};
}
/// Helper macro that returns the input without consuming it.
///
/// Useful when you need to get the input and use it to retrieve
/// positional information like offset or line and column.
#[macro_export]
macro_rules! input {
($i:expr) => {
input!($i,)
};
}
/// For inputs that implement the TextPositionTracker trait returns the current
/// line and column position for this input.
#[macro_export]
macro_rules! pos {
($i:expr) => {{
($i:expr,) => {{
let _i = $i.clone();
use $crate::TextPositionTracker;
$crate::Result::Complete($i, (_i.line(), _i.column()))
$crate::Result::Complete($i, _i)
}};
}
/// Consumes the input until the $rule fails and then returns the consumed input as
/// a slice.
///
/// ```
/// # #[macro_use] extern crate abortable_parser;
/// use abortable_parser::iter;
/// # use abortable_parser::{Result, Offsetable};
/// # use abortable_parser::combinators::ascii_alpha;
/// use std::convert::From;
/// # fn main() {
/// let iter: iter::StrIter = "foo;".into();
/// let tok = consume_all!(iter, ascii_alpha);
/// # assert!(tok.is_complete());
/// if let Result::Complete(i, o) = tok {
/// assert_eq!(i.get_offset(), 3);
/// assert_eq!(o, "foo");
/// }
/// # }
/// ```
#[macro_export]
macro_rules! consume_all {
($i:expr, $rule:ident!( $( $args:tt )* ) ) => {{
use $crate::{Result, Offsetable, Span, SpanRange};
let start_offset = $i.get_offset();
let mut _i = $i.clone();
let pfn = || {
loop {
match $rule!(_i.clone(), $($args)*) {
Result::Complete(_, _) => {
// noop
},
Result::Abort(e) => return Result::Abort(e),
Result::Incomplete(offset) => return Result::Incomplete(offset),
Result::Fail(_) => {
let range = SpanRange::Range(start_offset.._i.get_offset());
return Result::Complete(_i, $i.span(range));
}
}
if let None = _i.next() {
return Result::Incomplete(_i.get_offset());
}
}
};
pfn()
}};
($i:expr, $rule:ident) => {
consume_all!($i, run!($rule))
}
}
/// ascii_digit parses a single ascii alphabetic or digit character from an InputIter of bytes.
#[inline(always)]
pub fn ascii_alphanumeric<'a, I: InputIter<Item=&'a u8>>(mut i: I) -> Result<I, u8> {
match i.next() {
Some(b) => {
let c = *b as char;
if c.is_ascii_alphabetic() || c.is_ascii_digit() {
Result::Complete(i, *b)
} else {
Result::Fail(Error::new("Not an alphanumeric character".to_string(), &i))
}
},
None => Result::Fail(Error::new("Unexpected End Of Input.".to_string(), &i)),
}
}
/// ascii_digit parses a single ascii digit character from an InputIter of bytes.
#[inline(always)]
pub fn ascii_digit<'a, I: InputIter<Item = &'a u8>>(mut i: I) -> Result<I, u8> {
match i.next() {
Some(b) => {
if (*b as char).is_ascii_digit() {
Result::Complete(i, *b)
} else {
Result::Fail(Error::new("Not an digit character".to_string(), &i))
}
},
None => Result::Fail(Error::new("Unexpected End Of Input.".to_string(), &i)),
}
}
/// ascii_alpha parses a single ascii alphabet character from an InputIter of bytes.
#[inline(always)]
pub fn ascii_alpha<'a, I: InputIter<Item = &'a u8>>(mut i: I) -> Result<I, u8> {
match i.next() {
Some(b) => {
if (*b as char).is_ascii_alphabetic() {
Result::Complete(i, *b)
} else {
Result::Fail(Error::new("Not an alpha character".to_string(), &i))
}
},
None => Result::Fail(Error::new("Unexpected End Of Input.".to_string(), &i)),
}
}
// TODO(jwall): We need a helper to convert Optional into failures.
// TODO(jwall): We need a helper to convert std::result::Result into failures.

View File

@ -11,9 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use super::{ascii_ws, eoi, Result};
use iter::StrIter;
use iter::{StrIter, SliceIter};
make_fn!(proto<StrIter, &str>,
do_each!(
@ -34,8 +33,25 @@ make_fn!(path<StrIter, &str>,
until!(either!(discard!(ascii_ws), eoi))
);
make_fn!(sliceit<SliceIter<u8>, ()>,
do_each!(
_ => input!(),
end_of_input => eoi,
(end_of_input)
)
);
make_fn!(long_string_path<SliceIter<u8>, ()>,
do_each!(
_ => input!(),
end_of_input => eoi,
(end_of_input)
)
);
make_fn!(pub url<StrIter, (Option<&str>, Option<&str>, &str)>,
do_each!(
_ => input!(),
protocol => optional!(proto),
domain => optional!(domain),
path => path,
@ -54,3 +70,17 @@ fn test_url_parser() {
assert_eq!(path, "/some/path");
}
}
#[test]
fn test_slice_iter_make_fn() {
let iter = SliceIter::from("yo!");
let result = sliceit(iter);
assert!(result.is_fail());
}
#[test]
fn test_slice_iter_make_fn_long_error_path() {
let iter = SliceIter::from("yo!");
let result = long_string_path(iter);
assert!(result.is_fail());
}

View File

@ -16,7 +16,7 @@
use std::fmt::Debug;
use std::iter::Iterator;
use super::{InputIter, Offsetable, Span, SpanRange, TextPositionTracker};
use super::{InputIter, Offsetable, Seekable, Span, SpanRange, TextPositionTracker};
/// Implements `InputIter` for any slice of T.
#[derive(Debug)]
@ -95,6 +95,12 @@ impl<'a, T: Debug> From<&'a Vec<T>> for SliceIter<'a, T> {
}
}
impl<'a, O: Debug> Peekable<&'a O> for SliceIter<'a, O> {
fn peek_next(&self) -> Option<&'a O> {
self.source.get(self.offset)
}
}
/// Implements `InputIter` for any slice of T.
#[derive(Debug)]
pub struct StrIter<'a> {
@ -184,3 +190,24 @@ impl<'a> Span<&'a str> for StrIter<'a> {
}
}
}
impl<'a> Seekable for StrIter<'a> {
fn seek(&mut self, to: usize) -> usize {
let self_len = self.source.len();
let offset = if self_len > to {
to
} else {
self_len
};
self.offset = offset;
self.offset
}
}
use super::Peekable;
impl<'a> Peekable<&'a u8> for StrIter<'a> {
fn peek_next(&self) -> Option<&'a u8> {
self.source.as_bytes().get(self.offset)
}
}

View File

@ -104,6 +104,7 @@
//! ```
use std::fmt::Display;
use std::iter::Iterator;
use std::result;
/// A trait for types that can have an offset as a count of processed items.
pub trait Offsetable {
@ -116,6 +117,10 @@ impl Offsetable for usize {
}
}
pub trait Seekable {
fn seek(&mut self, usize) -> usize;
}
/// Trait for Inputs that can track lines and columns in a text input.
pub trait TextPositionTracker {
fn line(&self) -> usize;
@ -135,6 +140,10 @@ pub trait Span<O> {
fn span(&self, idx: SpanRange) -> O;
}
pub trait Peekable<O> {
fn peek_next(&self) -> Option<O>;
}
/// A Cloneable Iterator that can report an offset as a count of processed Items.
pub trait InputIter: Iterator + Clone + Offsetable {}
@ -142,7 +151,8 @@ pub trait InputIter: Iterator + Clone + Offsetable {}
/// Stores a wrapped err that must implement Display as well as an offset and
/// an optional cause.
#[derive(Debug)]
pub struct Error {
pub struct Error
{
msg: String,
offset: usize,
cause: Option<Box<Error>>,
@ -150,10 +160,9 @@ pub struct Error {
impl Error {
/// Constructs a new Error with an offset and no cause.
pub fn new<S, M>(msg: M, offset: &S) -> Self
pub fn new<S, D: Into<String>>(msg: D, offset: &S) -> Self
where
S: Offsetable,
M: Into<String>,
{
Error {
msg: msg.into(),
@ -163,28 +172,27 @@ impl Error {
}
/// Constructs a new Error with an offset and a cause.
pub fn caused_by<S, M>(msg: M, offset: &S, cause: Self) -> Self
pub fn caused_by<'a, S, D: Into<String>>(msg: D, offset: &'a S, cause: Box<Self>) -> Self
where
S: Offsetable,
M: Into<String>,
{
Error {
msg: msg.into(),
offset: offset.get_offset(),
cause: Some(Box::new(cause)),
cause: Some(cause),
}
}
/// Returns the contained err.
pub fn get_msg<'a>(&'a self) -> &'a str {
&self.msg
/// Returns the msg.
pub fn get_msg<'a>(&'a self) -> String {
format!("{}", &self.msg)
}
/// Returns `Some(cause)` if there is one, None otherwise.
pub fn get_cause<'a>(&'a self) -> Option<&'a Error> {
match self.cause {
Some(ref cause) => Some(cause),
None => None,
Some(ref e) => Some(e),
None => None
}
}
@ -195,7 +203,7 @@ impl Error {
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> result::Result<(), std::fmt::Error> {
try!(write!(f, "{}", self.msg));
match self.cause {
Some(ref c) => write!(f, "\n\tCaused By:{}", c),
@ -204,9 +212,12 @@ impl Display for Error {
}
}
impl std::error::Error for Error {}
/// The result of a parsing attempt.
#[derive(Debug)]
pub enum Result<I: InputIter, O> {
pub enum Result<I: InputIter, O>
{
/// Complete represents a successful match.
Complete(I, O),
/// Incomplete indicates input ended before a match could be completed.
@ -218,7 +229,8 @@ pub enum Result<I: InputIter, O> {
Abort(Error),
}
impl<I: InputIter, O> Result<I, O> {
impl<I: InputIter, O> Result<I, O>
{
/// Returns true if the Result is Complete.
pub fn is_complete(&self) -> bool {
if let &Result::Complete(_, _) = self {
@ -254,6 +266,7 @@ impl<I: InputIter, O> Result<I, O> {
pub use combinators::*;
pub use iter::SliceIter;
pub use iter::StrIter;
#[macro_use]
pub mod combinators;

View File

@ -14,7 +14,7 @@
use std::fmt::{Debug, Display};
use super::{InputIter, Offsetable, Result};
use super::{InputIter, Offsetable, Result, TextPositionTracker};
use combinators::*;
use iter::{SliceIter, StrIter};
@ -252,6 +252,23 @@ fn test_do_each() {
}
}
#[test]
fn test_do_each_input_and_token() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = do_each!(iter,
_ => input!(),
token => text_token!("foo"),
(token)
);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!("foo", o);
} else {
assert!(false, "did not get our token");
}
}
#[test]
fn test_either_idents() {
let input_str = "foo";
@ -477,7 +494,6 @@ fn test_ascii_ws_carriage_return() {
assert!(result.is_complete());
}
use super::TextPositionTracker;
#[test]
fn test_position_tracking_striter() {
let input_str = "\n";
@ -487,10 +503,146 @@ fn test_position_tracking_striter() {
iter.next();
assert_eq!(iter.line(), 2);
assert_eq!(iter.column(), 1);
let pos_result = pos!(iter);
let pos_result: Result<StrIter, StrIter> = input!(iter);
assert!(pos_result.is_complete());
if let Result::Complete(_, (line, column)) = pos_result {
assert_eq!(line, 2);
assert_eq!(column, 1);
if let Result::Complete(_, i) = pos_result {
assert_eq!(i.line(), 2);
assert_eq!(i.column(), 1);
}
}
#[test]
fn test_separated_good() {
let input_str = "foo,foo,foo";
let iter = StrIter::new(input_str);
let result = separated!(iter, text_token!(","), text_token!("foo"));
assert!(result.is_complete());
if let Result::Complete(_, list) = result {
assert_eq!(list.len(), 3);
assert_eq!(list[0], "foo");
assert_eq!(list[1], "foo");
assert_eq!(list[2], "foo");
}
}
#[test]
fn test_separated_single_item() {
let input_str = "foo";
let iter = StrIter::new(input_str);
let result = separated!(iter, text_token!(","), text_token!("foo"));
assert!(result.is_complete());
if let Result::Complete(_, list) = result {
assert_eq!(list.len(), 1);
assert_eq!(list[0], "foo");
}
}
#[test]
fn test_separated_empty_list() {
let input_str = "";
let iter = StrIter::new(input_str);
let result = separated!(iter, text_token!(","), text_token!("foo"));
assert!(result.is_fail());
}
#[test]
fn test_separated_bad() {
let input_str = "bar foo,foo";
let iter = StrIter::new(input_str);
let result = separated!(iter, text_token!(","), text_token!("foo"));
assert!(result.is_fail());
}
#[test]
fn test_separated_trailing_comma() {
let input_str = "foo,foo,foo,";
let iter = StrIter::new(input_str);
let result = separated!(iter, text_token!(","), text_token!("foo"));
assert!(result.is_complete());
if let Result::Complete(i, list) = result {
assert_eq!(list.len(), 3);
assert_eq!(list[0], "foo");
assert_eq!(list[1], "foo");
assert_eq!(list[2], "foo");
assert!(text_token!(i, ",").is_complete());
}
}
#[test]
fn test_ascii_alphanumeric() {
let input_str = "a1";
let iter = StrIter::new(input_str);
let result = repeat!(iter, ascii_alphanumeric);
assert!(result.is_complete());
if let Result::Complete(i,list) = result {
assert_eq!(list.len(), 2);
assert_eq!(list[0], b'a');
assert_eq!(list[1], b'1');
assert!(eoi(i).is_complete());
}
}
#[test]
fn test_ascii_alphanumeric_fail() {
let input_str = "-";
let iter = StrIter::new(input_str);
let result = ascii_alphanumeric(iter);
assert!(result.is_fail());
}
#[test]
fn test_ascii_digit() {
let input_str = "12";
let iter = StrIter::new(input_str);
let result = repeat!(iter, ascii_digit);
assert!(result.is_complete());
if let Result::Complete(i,list) = result {
assert_eq!(list.len(), 2);
assert_eq!(list[0], b'1');
assert_eq!(list[1], b'2');
assert!(eoi(i).is_complete());
}
}
#[test]
fn test_ascii_digit_fail() {
let input_str = "-";
let iter = StrIter::new(input_str);
let result = ascii_digit(iter);
assert!(result.is_fail());
}
#[test]
fn test_ascii_alpha() {
let input_str = "ab";
let iter = StrIter::new(input_str);
let result = repeat!(iter, ascii_alpha);
assert!(result.is_complete());
if let Result::Complete(i,list) = result {
assert_eq!(list.len(), 2);
assert_eq!(list[0], b'a');
assert_eq!(list[1], b'b');
assert!(eoi(i).is_complete());
}
}
#[test]
fn test_ascii_alpha_fail() {
let input_str = "1";
let iter = StrIter::new(input_str);
let result = ascii_alpha(iter);
assert!(result.is_fail());
}
#[test]
fn test_consume_all() {
let input_str = "foo;";
let iter = StrIter::new(input_str);
let result = consume_all!(iter, ascii_alpha);
assert!(result.is_complete());
if let Result::Complete(i, o) = result {
assert_eq!(i.get_offset(), 3);
assert_eq!(o, "foo");
assert!(text_token!(i, ";").is_complete());
}
}