2018-09-12 21:00:03 -05:00
|
|
|
// Copyright 2017 Jeremy Wall <jeremy@marzhillstudios.com>
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2018-09-07 22:09:45 -05:00
|
|
|
//! An opinionated parser combinator library with a focus on fully abortable parsing and error handling.
|
2018-09-12 19:55:14 -05:00
|
|
|
//!
|
2018-09-12 20:22:06 -05:00
|
|
|
//! The approach to macro composition is heavily inspired by nom. However we emphasize error
|
|
|
|
//! handling as a first class citizen. abortable_parser has the concept of an unrecoverable
|
|
|
|
//! parsing error as distinct from a general failure to match.
|
|
|
|
//!
|
|
|
|
//! We have a numner of macros that assist in the gneration or handling of each type
|
|
|
|
//! of error.
|
|
|
|
//!
|
|
|
|
//! # Simple parsing of a url.
|
2018-09-12 19:55:14 -05:00
|
|
|
//!
|
2018-09-12 19:33:11 -05:00
|
|
|
//! ```
|
|
|
|
//! #[macro_use]
|
|
|
|
//! extern crate abortable_parser;
|
|
|
|
//! use abortable_parser::iter::StrIter;
|
|
|
|
//! use abortable_parser::{Result, eoi, ascii_ws};
|
2018-09-12 19:55:14 -05:00
|
|
|
//!
|
2018-09-12 19:33:11 -05:00
|
|
|
//! make_fn!(proto<StrIter, &str>,
|
|
|
|
//! do_each!(
|
|
|
|
//! proto => until!(text_token!("://")),
|
|
|
|
//! _ => must!(text_token!("://")),
|
|
|
|
//! (proto)
|
|
|
|
//! )
|
|
|
|
//! );
|
|
|
|
//!
|
|
|
|
//! make_fn!(domain<StrIter, &str>,
|
2018-09-12 20:22:06 -05:00
|
|
|
//! do_each!(
|
|
|
|
//! // domains do not start with a slash
|
|
|
|
//! _ => peek!(not!(text_token!("/"))),
|
|
|
|
//! domain => until!(either!(
|
|
|
|
//! discard!(text_token!("/")),
|
|
|
|
//! discard!(ascii_ws),
|
|
|
|
//! eoi)),
|
|
|
|
//! (domain)
|
|
|
|
//! )
|
2018-09-12 19:33:11 -05:00
|
|
|
//! );
|
2018-09-12 19:55:14 -05:00
|
|
|
//!
|
2018-09-12 19:33:11 -05:00
|
|
|
//! make_fn!(path<StrIter, &str>,
|
|
|
|
//! until!(either!(discard!(ascii_ws), eoi))
|
|
|
|
//! );
|
2018-09-12 19:55:14 -05:00
|
|
|
//!
|
2018-09-12 20:22:06 -05:00
|
|
|
//! make_fn!(full_url<StrIter, (Option<&str>, Option<&str>, Option<&str>)>,
|
|
|
|
//! do_each!(
|
|
|
|
//! protocol => proto,
|
|
|
|
//! // If we match the protocol then we must have a domain.
|
|
|
|
//! // This is an example of an unrecoverable parsing error so we
|
|
|
|
//! // abort with the must! macro if it doesn't match.
|
|
|
|
//! domain => must!(domain),
|
|
|
|
//! path => optional!(path),
|
|
|
|
//! (Some(protocol), Some(domain), path)
|
|
|
|
//! )
|
|
|
|
//! );
|
|
|
|
//!
|
|
|
|
//! make_fn!(relative_url<StrIter, (Option<&str>, Option<&str>, Option<&str>)>,
|
2018-09-12 19:33:11 -05:00
|
|
|
//! do_each!(
|
2018-09-12 20:22:06 -05:00
|
|
|
//! _ => not!(either!(text_token!("//"), proto)),
|
|
|
|
//! // we require a valid path for relative urls.
|
2018-09-12 19:33:11 -05:00
|
|
|
//! path => path,
|
2018-09-12 20:22:06 -05:00
|
|
|
//! (None, None, Some(path))
|
|
|
|
//! )
|
|
|
|
//! );
|
|
|
|
//!
|
|
|
|
//! make_fn!(url<StrIter, (Option<&str>, Option<&str>, Option<&str>)>,
|
|
|
|
//! either!(
|
|
|
|
//! full_url,
|
|
|
|
//! relative_url,
|
2018-09-12 19:33:11 -05:00
|
|
|
//! )
|
|
|
|
//! );
|
2018-09-12 19:55:14 -05:00
|
|
|
//!
|
2018-09-12 19:33:11 -05:00
|
|
|
//! # fn main() {
|
|
|
|
//! let iter = StrIter::new("http://example.com/some/path ");
|
|
|
|
//! let result = url(iter);
|
|
|
|
//! assert!(result.is_complete());
|
|
|
|
//! if let Result::Complete(_, (proto, domain, path)) = result {
|
|
|
|
//! assert!(proto.is_some());
|
|
|
|
//! assert!(domain.is_some());
|
2018-09-12 20:22:06 -05:00
|
|
|
//! if let Some(domain) = domain {
|
|
|
|
//! assert_eq!(domain, "example.com");
|
|
|
|
//! }
|
|
|
|
//! assert!(path.is_some());
|
|
|
|
//! if let Some(path) = path {
|
|
|
|
//! assert_eq!(path, "/some/path");
|
|
|
|
//! }
|
2018-09-12 19:33:11 -05:00
|
|
|
//! }
|
2018-09-12 20:22:06 -05:00
|
|
|
//!
|
|
|
|
//! let bad_input = StrIter::new("http:///some/path");
|
|
|
|
//! let bad_result = url(bad_input);
|
|
|
|
//! assert!(bad_result.is_abort());
|
2018-09-12 19:33:11 -05:00
|
|
|
//! # }
|
|
|
|
//! ```
|
2018-09-07 22:09:45 -05:00
|
|
|
use std::fmt::Display;
|
2018-09-03 00:06:15 -05:00
|
|
|
use std::iter::Iterator;
|
2018-09-02 22:10:11 -05:00
|
|
|
|
2018-09-03 00:09:57 -05:00
|
|
|
/// A trait for types that can have an offset as a count of processed items.
|
2018-09-03 00:05:32 -05:00
|
|
|
pub trait Offsetable {
|
2018-09-02 22:10:11 -05:00
|
|
|
fn get_offset(&self) -> usize;
|
|
|
|
}
|
|
|
|
|
2018-09-03 00:05:32 -05:00
|
|
|
impl Offsetable for usize {
|
|
|
|
fn get_offset(&self) -> usize {
|
|
|
|
return *self;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-12 20:18:19 -05:00
|
|
|
/// Trait for Inputs that can track lines and columns in a text input.
|
|
|
|
pub trait TextPositionTracker {
|
|
|
|
fn line(&self) -> usize;
|
|
|
|
fn column(&self) -> usize;
|
|
|
|
}
|
|
|
|
|
2018-09-12 20:22:06 -05:00
|
|
|
/// SpanRange encompasses the valid Ops::Range types for use with the Span trait.
|
2018-09-11 20:54:20 -05:00
|
|
|
pub enum SpanRange {
|
|
|
|
Range(std::ops::Range<usize>),
|
|
|
|
RangeTo(std::ops::RangeTo<usize>),
|
|
|
|
RangeFrom(std::ops::RangeFrom<usize>),
|
|
|
|
RangeFull(std::ops::RangeFull),
|
|
|
|
}
|
|
|
|
|
2018-09-12 20:22:06 -05:00
|
|
|
/// An input that can provide a span of a range of the input.
|
2018-09-11 20:54:20 -05:00
|
|
|
pub trait Span<O> {
|
|
|
|
fn span(&self, idx: SpanRange) -> O;
|
|
|
|
}
|
|
|
|
|
2018-09-03 00:05:32 -05:00
|
|
|
/// A Cloneable Iterator that can report an offset as a count of processed Items.
|
|
|
|
pub trait InputIter: Iterator + Clone + Offsetable {}
|
|
|
|
|
2018-09-03 00:09:57 -05:00
|
|
|
/// The custom error type for use in `Result::{Fail, Abort}`.
|
|
|
|
/// Stores a wrapped err that must implement Display as well as an offset and
|
|
|
|
/// an optional cause.
|
2018-09-03 00:05:32 -05:00
|
|
|
#[derive(Debug)]
|
2018-09-07 22:09:45 -05:00
|
|
|
pub struct Error {
|
|
|
|
msg: String,
|
2018-09-03 00:05:32 -05:00
|
|
|
offset: usize,
|
2018-09-07 22:09:45 -05:00
|
|
|
cause: Option<Box<Error>>,
|
2018-09-03 00:05:32 -05:00
|
|
|
}
|
|
|
|
|
2018-09-07 22:09:45 -05:00
|
|
|
impl Error {
|
2018-09-03 00:09:57 -05:00
|
|
|
/// Constructs a new Error with an offset and no cause.
|
2018-09-12 19:55:14 -05:00
|
|
|
pub fn new<S, M>(msg: M, offset: &S) -> Self
|
2018-09-07 22:09:45 -05:00
|
|
|
where
|
|
|
|
S: Offsetable,
|
2018-09-12 19:55:14 -05:00
|
|
|
M: Into<String>,
|
|
|
|
{
|
2018-09-03 00:06:15 -05:00
|
|
|
Error {
|
2018-09-07 22:09:45 -05:00
|
|
|
msg: msg.into(),
|
2018-09-03 00:05:32 -05:00
|
|
|
offset: offset.get_offset(),
|
|
|
|
cause: None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-03 00:09:57 -05:00
|
|
|
/// Constructs a new Error with an offset and a cause.
|
2018-09-07 22:09:45 -05:00
|
|
|
pub fn caused_by<S, M>(msg: M, offset: &S, cause: Self) -> Self
|
|
|
|
where
|
|
|
|
S: Offsetable,
|
2018-09-12 19:55:14 -05:00
|
|
|
M: Into<String>,
|
|
|
|
{
|
2018-09-03 00:06:15 -05:00
|
|
|
Error {
|
2018-09-07 22:09:45 -05:00
|
|
|
msg: msg.into(),
|
2018-09-03 00:05:32 -05:00
|
|
|
offset: offset.get_offset(),
|
|
|
|
cause: Some(Box::new(cause)),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-03 00:09:57 -05:00
|
|
|
/// Returns the contained err.
|
2018-09-07 22:09:45 -05:00
|
|
|
pub fn get_msg<'a>(&'a self) -> &'a str {
|
2018-09-07 20:46:17 -05:00
|
|
|
&self.msg
|
2018-09-03 00:05:32 -05:00
|
|
|
}
|
|
|
|
|
2018-09-03 00:09:57 -05:00
|
|
|
/// Returns `Some(cause)` if there is one, None otherwise.
|
2018-09-07 22:09:45 -05:00
|
|
|
pub fn get_cause<'a>(&'a self) -> Option<&'a Error> {
|
2018-09-03 00:05:32 -05:00
|
|
|
match self.cause {
|
|
|
|
Some(ref cause) => Some(cause),
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-03 00:09:57 -05:00
|
|
|
// Returns the offset at which this Error happened.
|
2018-09-03 00:05:32 -05:00
|
|
|
pub fn get_offset(&self) -> usize {
|
|
|
|
self.offset
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-07 22:09:45 -05:00
|
|
|
impl Display for Error {
|
2018-09-03 00:05:32 -05:00
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> {
|
2018-09-07 20:46:17 -05:00
|
|
|
try!(write!(f, "{}", self.msg));
|
2018-09-03 00:05:32 -05:00
|
|
|
match self.cause {
|
|
|
|
Some(ref c) => write!(f, "\n\tCaused By:{}", c),
|
2018-09-03 00:06:15 -05:00
|
|
|
None => Ok(()),
|
2018-09-03 00:05:32 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-02 22:10:11 -05:00
|
|
|
/// The result of a parsing attempt.
|
|
|
|
#[derive(Debug)]
|
2018-09-07 22:09:45 -05:00
|
|
|
pub enum Result<I: InputIter, O> {
|
2018-09-02 22:10:11 -05:00
|
|
|
/// Complete represents a successful match.
|
|
|
|
Complete(I, O),
|
|
|
|
/// Incomplete indicates input ended before a match could be completed.
|
|
|
|
/// It contains the offset at which the input ended before a match could be completed.
|
|
|
|
Incomplete(usize),
|
|
|
|
/// Fail represents a failed match.
|
2018-09-07 22:09:45 -05:00
|
|
|
Fail(Error),
|
2018-09-02 22:10:11 -05:00
|
|
|
/// Abort represents a match failure that the parser cannot recover from.
|
2018-09-07 22:09:45 -05:00
|
|
|
Abort(Error),
|
2018-09-02 22:10:11 -05:00
|
|
|
}
|
|
|
|
|
2018-09-07 22:09:45 -05:00
|
|
|
impl<I: InputIter, O> Result<I, O> {
|
2018-09-02 22:10:11 -05:00
|
|
|
/// Returns true if the Result is Complete.
|
|
|
|
pub fn is_complete(&self) -> bool {
|
|
|
|
if let &Result::Complete(_, _) = self {
|
2018-09-03 00:06:15 -05:00
|
|
|
return true;
|
2018-09-02 22:10:11 -05:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-09-03 00:06:15 -05:00
|
|
|
/// Returns true if the Result is Incomoplete.
|
2018-09-02 22:10:11 -05:00
|
|
|
pub fn is_incomplete(&self) -> bool {
|
|
|
|
if let &Result::Incomplete(_) = self {
|
2018-09-03 00:06:15 -05:00
|
|
|
return true;
|
2018-09-02 22:10:11 -05:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2018-09-03 00:06:15 -05:00
|
|
|
|
|
|
|
/// Returns true if the Result is Fail.
|
2018-09-02 22:10:11 -05:00
|
|
|
pub fn is_fail(&self) -> bool {
|
|
|
|
if let &Result::Fail(_) = self {
|
2018-09-03 00:06:15 -05:00
|
|
|
return true;
|
2018-09-02 22:10:11 -05:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2018-09-03 00:06:15 -05:00
|
|
|
|
|
|
|
/// Returns true if the Result is Abort.
|
2018-09-02 22:10:11 -05:00
|
|
|
pub fn is_abort(&self) -> bool {
|
|
|
|
if let &Result::Abort(_) = self {
|
2018-09-03 00:06:15 -05:00
|
|
|
return true;
|
2018-09-02 22:10:11 -05:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-12 19:33:11 -05:00
|
|
|
pub use combinators::*;
|
2018-09-12 19:55:14 -05:00
|
|
|
pub use iter::SliceIter;
|
2018-09-02 22:10:11 -05:00
|
|
|
|
|
|
|
#[macro_use]
|
2018-09-05 22:43:15 -05:00
|
|
|
pub mod combinators;
|
2018-09-02 22:10:11 -05:00
|
|
|
pub mod iter;
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2018-09-12 19:55:14 -05:00
|
|
|
mod integration_tests;
|
2018-09-12 19:33:11 -05:00
|
|
|
#[cfg(test)]
|
2018-09-12 19:55:14 -05:00
|
|
|
mod test;
|