Initial Commit.

This commit is contained in:
Jeremy Wall 2018-09-02 22:10:11 -05:00
commit 00bd55874c
6 changed files with 671 additions and 0 deletions

6
.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
/target
**/*.rs.bk
/target
**/*.rs.bk
Cargo.lock

6
Cargo.toml Normal file
View File

@ -0,0 +1,6 @@
[package]
name = "abortable-parser"
version = "0.1.0"
authors = ["Jeremy Wall <jeremy@marzhillstudios.com>"]
[dependencies]

51
src/iter.rs Normal file
View File

@ -0,0 +1,51 @@
//! Contains implementations of `InputIter`.
use std::iter::Iterator;
use std::fmt::Debug;
use super::InputIter;
/// Implements `InputIter` for any slice of T.
#[derive(Debug)]
pub struct SliceIter<'a, T: Debug + 'a> {
source: &'a [T],
offset: usize,
}
impl<'a, T: Debug + 'a> SliceIter<'a, T> {
/// new constructs a SliceIter from a Slice of T.
pub fn new(source: &'a [T]) -> Self {
SliceIter {
source: source,
offset: 0,
}
}
}
impl<'a, T: Debug + 'a> Iterator for SliceIter<'a, T> {
type Item = &'a T;
fn next(&mut self) -> Option<Self::Item> {
match self.source.get(self.offset) {
Some(item) => {
self.offset += 1;
Some(item)
},
None => None,
}
}
}
impl<'a, T: Debug + 'a> InputIter for SliceIter<'a, T> {
fn get_offset(&self) -> usize {
self.offset
}
}
impl<'a, T: Debug + 'a> Clone for SliceIter<'a, T> {
fn clone(&self) -> Self {
SliceIter {
source: self.source,
offset: self.offset,
}
}
}

64
src/lib.rs Normal file
View File

@ -0,0 +1,64 @@
//! A parser combinator library with a focus on fully abortable parsing and error handling.
use std::iter::Iterator;
/// A Cloneable Iterator that can report an offset as a count of processed Items.
pub trait InputIter: Iterator + Clone {
fn get_offset(&self) -> usize;
}
/// The result of a parsing attempt.
#[derive(Debug)]
pub enum Result<I: InputIter, O, E> {
/// Complete represents a successful match.
Complete(I, O),
/// Incomplete indicates input ended before a match could be completed.
/// It contains the offset at which the input ended before a match could be completed.
Incomplete(usize),
/// Fail represents a failed match.
Fail(E),
/// Abort represents a match failure that the parser cannot recover from.
Abort(E),
}
impl<I: InputIter, O, E> Result<I, O, E> {
/// Returns true if the Result is Complete.
pub fn is_complete(&self) -> bool {
if let &Result::Complete(_, _) = self {
return true;
}
return false;
}
/// Returns true if the Result is Incomoplete.
pub fn is_incomplete(&self) -> bool {
if let &Result::Incomplete(_) = self {
return true;
}
return false;
}
/// Returns true if the Result is Fail.
pub fn is_fail(&self) -> bool {
if let &Result::Fail(_) = self {
return true;
}
return false;
}
/// Returns true if the Result is Abort.
pub fn is_abort(&self) -> bool {
if let &Result::Abort(_) = self {
return true;
}
return false;
}
}
pub use iter::SliceIter;
#[macro_use]
pub mod macros;
pub mod iter;
#[cfg(test)]
mod test;

275
src/macros.rs Normal file
View File

@ -0,0 +1,275 @@
//! Contains the helper macros for abortable-parser.
/// Converts a function indentifier into a macro call. Useful when writing your own macro combinator.
#[macro_export]
macro_rules! run {
($i:expr, $f:ident) => {
$f($i)
};
}
/// Turns Fails into Aborts. Allows you to turn any parse failure into a hard abort of the parser.
#[macro_export]
macro_rules! must {
($i:expr, $f:ident!( $( $args:tt )* ) ) => {
match $f!($i, $($args)*) {
$crate::Result::Complete(i, o) => $crate::Result::Complete(i, o),
$crate::Result::Incomplete(offset) => $crate::Result::Incomplete(offset),
$crate::Result::Fail(e) => $crate::Result::Abort(e),
$crate::Result::Abort(e) => $crate::Result::Abort(e),
}
};
($i:expr, $f:ident) => {
must!($i, run!($f))
};
}
/// Turns Aborts into fails allowing you to trap and then convert an Abort into a normal Fail.
#[macro_export]
macro_rules! trap {
($i:expr, $f:ident!( $( $args:tt )* ) ) => {
match $f!($i, $($args)*) {
$crate::Result::Complete(i, o) => $crate::Result::Complete(i, o),
$crate::Result::Incomplete(offset) => $crate::Result::Incomplete(offset),
$crate::Result::Fail(e) => $crate::Result::Fail(e),
$crate::Result::Abort(e) => $crate::Result::Fail(e),
}
};
($i:expr, $f:ident) => {
trap!($i, run!($f))
};
}
/// Turns Fails and Incompletes into Aborts. It uses an error factory
/// to construct the errors for the Incomplete case.
#[macro_export]
macro_rules! must_complete {
($i:expr, $efn:expr, $f:ident!( $( $args:tt )* ) ) => {
match $f!($i, $($args)*) {
$crate::Result::Complete(i, o) => $crate::Result::Complete(i, o),
$crate::Result::Incomplete(offset) => $crate::Result::Abort($efn(offset)),
$crate::Result::Fail(e) => $crate::Result::Abort(e),
$crate::Result::Abort(e) => $crate::Result::Abort(e),
}
};
($i:expr, $efn:expr, $f:ident) => {
must_complete!($i, $efn, run!($f))
};
}
/// Captures a sequence of sub parsers output.
#[macro_export]
macro_rules! do_each {
($i:expr, $val:ident => $f:ident) => {
// This is a compile failure.
compile_error!("do_each! must end with a tuple capturing the results")
};
($i:expr, $val:ident => $f:ident!($( $args:tt )* ), $($rest:tt)* ) => {
// If any single one of these matchers fails then all of them are failures.
match $f!($i, $($args)*) {
$crate::Result::Complete(i, o) => {
let $val = o;
do_each!(i, $($rest)*)
}
$crate::Result::Incomplete(offset) => {
Result::Incomplete(offset)
}
$crate::Result::Fail(e) => Result::Fail(e),
$crate::Result::Abort(e) => Result::Abort(e),
}
};
($i:expr, _ => $f:ident!($( $args:tt )* ), $($rest:tt)* ) => {
// If any single one of these matchers fails then all of them are failures.
match $f!($i, $($args)*) {
$crate::Result::Complete(i, _) => {
do_each!(i, $($rest)*)
}
$crate::Result::Incomplete(offset) => {
Result::Incomplete(offset)
}
$crate::Result::Fail(e) => Result::Fail(e),
$crate::Result::Abort(e) => Result::Abort(e),
}
};
($i:expr, $val:ident => $f:ident, $($rest:tt)* ) => {
// If any single one of these matchers fails then all of them are failures.
do_each!($i, $val => run!($f), $( $rest )* )
};
($i:expr, _ => $f:ident, $($rest:tt)* ) => {
// If any single one of these matchers fails then all of them are failures.
do_each!($i, _ => run!($f), $( $rest )* )
};
// FIXME(jwall): Make this internal only.
// Our Terminal condition
($i:expr, ( $($rest:tt)* ) ) => {
Result::Complete($i, ($($rest)*))
};
}
/// Returns the output of the first sub parser to succeed.
#[macro_export]
macro_rules! either {
// Initialization case.
($i:expr, $f:ident!( $( $args:tt )* ), $( $rest:tt)* ) => { // 0
either!(__impl $i, $f!( $($args)* ), $($rest)*)
};
// Initialization case.
($i:expr, $f:ident, $($rest:tt)* ) => { // 1
either!(__impl $i, run!($f), $($rest)*)
};
// Initialization failure case.
($i:expr, $f:ident!( $( $args:tt )* )) => { // 2
compile_error!("Either requires at least two sub matchers.")
};
// Initialization failure case.
($i:expr, $f:ident) => { // 3
either!($i, run!($f))
};
// Termination clause
(__impl $i:expr, $f:ident) => { // 4
either!(__impl $i, run!($f))
};
// Termination clause
(__impl $i:expr, $f:ident,) => { // 5
either!(__impl $i, run!($f))
};
// Termination clause
(__impl $i:expr, $f:ident!( $( $args:tt )* ),) => { // 6
either!(__impl $i, $f!($($args)*) __end)
};
// Termination clause
(__impl $i:expr, $f:ident!( $( $args:tt )* )) => {{ // 7
match $f!($i, $($args)*) {
// The first one to match is our result.
$crate::Result::Complete(i, o) => {
Result::Complete(i, o)
}
// Incompletes may still be parseable.
$crate::Result::Incomplete(i) => {
Result::Incomplete(i)
}
// Fail means it didn't match so we are now done.
$crate::Result::Fail(e) => {
$crate::Result::Fail(e)
},
// Aborts are hard failures that the parser can't recover from.
$crate::Result::Abort(e) => Result::Abort(e),
}
}};
// Internal Loop Implementation
(__impl $i:expr, $f:ident!( $( $args:tt )* ), $( $rest:tt )* ) => {{ // 8
let _i = $i.clone();
match $f!($i, $($args)*) {
// The first one to match is our result.
$crate::Result::Complete(i, o) => {
Result::Complete(i, o)
}
// Incompletes may still be parseable.
$crate::Result::Incomplete(i) => {
Result::Incomplete(i)
}
// Fail means it didn't match so continue to next one.
$crate::Result::Fail(_) => {
either!(__impl _i, $($rest)*)
},
// Aborts are hard failures that the parser can't recover from.
$crate::Result::Abort(e) => Result::Abort(e),
}
}};
// Internal Loop Implementation
(__impl $i:expr, $f:ident, $( $rest:tt )* ) => { // 9
either!(__impl $i, run!($f), $( $rest )* )
}
}
/// Treats a sub parser as optional. It returns Some(output) for a successful match
/// and None for Fails.
#[macro_export]
macro_rules! optional {
($i:expr, $f:ident) => {
optional!(__impl $i, run!($f))
};
($i:expr, $f:ident!( $( $args:tt )* ) ) => {
optional!(__impl $i, $f!( $( $args )* ))
};
(__impl $i:expr, $f:ident!( $( $args:tt )* )) => {{
let _i = $i.clone();
match $f!($i, $($args)*) {
$crate::Result::Complete(i, o) => {
Result::Complete(i, Some(o))
}
// Incomplete could still work possibly parse.
$crate::Result::Incomplete(i) => {
Result::Incomplete(i)
}
// Fail just means it didn't match.
$crate::Result::Fail(_) => {
Result::Complete(_i, None)
},
// Aborts are hard failures that the parser can't recover from.
$crate::Result::Abort(e) => Result::Abort(e),
}
}};
}
/// Runs a single parser repeating 0 or mre times and returns a possibly empty
/// vector of the parsed results.
#[macro_export]
macro_rules! repeat {
($i:expr, $f:ident!( $( $args:tt )* ) ) => {{
let mut _i = $i.clone();
let mut seq = Vec::new();
let mut opt_error = None;
loop {
let __i = _i.clone();
match $f!(_i, $($args)*) {
$crate::Result::Complete(i, o) => {
seq.push(o);
_i = i;
}
// Aborts are always a hard fail.
$crate::Result::Abort(e) => {
opt_error = Some($crate::Result::Abort(e));
_i = $i.clone();
break;
}
// Everything else just means we are finished parsing.
$crate::Result::Incomplete(_) => {
_i = __i;
break;
}
$crate::Result::Fail(_) => {
_i = __i;
break;
}
}
}
match opt_error {
Some(e) => e,
None => $crate::Result::Complete(_i, seq),
}
}};
($i:expr, $f:ident) => {
repeat!($i, run!($f))
};
}

269
src/test.rs Normal file
View File

@ -0,0 +1,269 @@
use super::iter::SliceIter;
use super::{Result, InputIter};
#[test]
fn test_slice_iter() {
let input_str = "foo";
let mut iter = SliceIter::new(input_str.as_bytes());
let cloned = iter.clone();
assert_eq!(0, iter.get_offset());
let mut out = Vec::new();
loop {
let b = match iter.next() {
None => break,
Some(b) => b,
};
out.push(b.clone());
}
assert_eq!(3, out.len());
assert_eq!('f' as u8, out[0]);
assert_eq!('o' as u8, out[1]);
assert_eq!('o' as u8, out[2]);
assert_eq!(3, iter.get_offset());
out = Vec::new();
for b in cloned {
out.push(b.clone());
}
assert_eq!(3, out.len());
assert_eq!('f' as u8, out[0]);
assert_eq!('o' as u8, out[1]);
assert_eq!('o' as u8, out[2]);
}
fn will_fail(_: SliceIter<u8>) -> Result<SliceIter<u8>, String, String> {
Result::Fail("AAAAHHH!!!".to_string())
}
fn parse_byte(mut i: SliceIter<u8>) -> Result<SliceIter<u8>, u8, String> {
match i.next() {
Some(b) => Result::Complete(i, *b),
None => Result::Incomplete(i.get_offset()),
}
}
fn will_not_complete(_: SliceIter<u8>) -> Result<SliceIter<u8>, String, String> {
Result::Incomplete(0)
}
fn parse_three(i: SliceIter<u8>) -> Result<SliceIter<u8>, String, String> {
let mut _i = i.clone();
let mut out = String::new();
loop {
let b = match _i.next() {
None => break,
Some(b) => *b,
};
out.push(b as char);
if out.len() == 3 {
break;
}
}
if out.len() != 3 {
Result::Incomplete(_i.get_offset())
} else {
Result::Complete(_i, out)
}
}
#[test]
fn test_must_fails() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = must!(iter, will_fail);
assert!(result.is_abort());
}
#[test]
fn test_must_succeed() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = must!(iter, parse_byte);
assert!(result.is_complete());
}
#[test]
fn test_trap_abort() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = trap!(iter, must!(will_fail));
assert!(result.is_fail(), format!("{:?}", result));
}
#[test]
fn test_trap_incomplete() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = trap!(iter, will_not_complete);
assert!(result.is_incomplete(), format!("{:?}", result));
}
#[test]
fn test_trap_fail() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = trap!(iter, will_fail);
assert!(result.is_fail(), format!("{:?}", result));
}
#[test]
fn test_trap_complete() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = trap!(iter, parse_byte);
assert!(result.is_complete(), format!("{:?}", result));
}
#[test]
fn test_must_complete() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let iter_fail = iter.clone();
let mut result = must_complete!(iter, |_| "AHHH".to_string(), will_not_complete);
assert!(result.is_abort());
result = must_complete!(iter_fail, |_| "AHHH".to_string(), will_fail);
assert!(result.is_abort());
}
#[test]
fn test_do_each() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = do_each!(iter,
b1 => parse_byte,
_ => parse_byte,
b3 => parse_byte,
(b1, b3)
);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!('f' as u8, o.0);
assert_eq!('o' as u8, o.1);
} else {
assert!(false, "did not get a tuple of 2 items");
}
}
#[test]
fn test_either_idents() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = either!(iter,
will_fail,
will_fail,
parse_three);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!("foo".to_string(), o);
} else {
assert!(false, "Didn't not successfully match");
}
}
#[test]
fn test_either_macros() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = either!(iter,
run!(will_fail),
run!(will_fail),
run!(parse_three));
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!("foo".to_string(), o);
} else {
assert!(false, "Didn't successfully match");
}
}
#[test]
fn test_either_fail() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = either!(iter,
run!(will_fail),
run!(will_fail));
assert!(result.is_fail());
}
#[test]
fn test_either_abort() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = either!(iter,
must!(will_fail),
parse_three,
run!(will_fail));
assert!(result.is_abort());
}
#[test]
fn test_optional_some() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = optional!(iter, parse_byte);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!('f' as u8, o.unwrap());
} else {
assert!(false, "optional! did not complete");
}
}
#[test]
fn test_optional_none() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = optional!(iter, will_fail);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert!(o.is_none(), "output was not none");
} else {
assert!(false, "optional! did not complete");
}
}
#[test]
fn test_optional_abort() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = optional!(iter, must!(will_fail));
assert!(result.is_abort(), "optional did not abort");
}
#[test]
fn test_repeat() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = repeat!(iter, parse_byte);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!(3, o.len());
assert_eq!('f' as u8, o[0]);
assert_eq!('o' as u8, o[1]);
assert_eq!('o' as u8, o[2]);
} else {
assert!(false, "repeat did not parse succesfully");
}
}
#[test]
fn test_repeat_fail() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = repeat!(iter, will_fail);
assert!(result.is_complete());
if let Result::Complete(_, o) = result {
assert_eq!(0, o.len());
} else {
assert!(false, "repeat did not parse succesfully");
}
}
#[test]
fn test_repeat_abort() {
let input_str = "foo";
let iter = SliceIter::new(input_str.as_bytes());
let result = repeat!(iter, must!(will_fail));
assert!(result.is_abort());
}