REFACTOR: Use abortable_parser.

This commit is contained in:
Jeremy Wall 2018-09-23 15:08:45 -05:00
parent 29aed2c997
commit 91d7ed690b
12 changed files with 1147 additions and 1302 deletions

37
Cargo.lock generated
View File

@ -1,3 +1,8 @@
[[package]]
name = "abortable_parser"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ansi_term"
version = "0.9.0"
@ -131,31 +136,6 @@ name = "linked-hash-map"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nom"
version = "3.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "nom_locate"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "redox_syscall"
version = "0.1.40"
@ -248,11 +228,10 @@ dependencies = [
name = "ucg"
version = "0.2.0"
dependencies = [
"abortable_parser 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bencher 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.26.2 (registry+https://github.com/rust-lang/crates.io-index)",
"cpuprofiler 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"nom_locate 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_yaml 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"simple-error 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
@ -306,6 +285,7 @@ dependencies = [
]
[metadata]
"checksum abortable_parser 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a628e31269165eeea62b71b2555c6379d4fbadb3e34656b6e1445b0235247c0d"
"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6"
"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
"checksum backtrace 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "346d7644f0b5f9bc73082d3b2236b69a05fd35cce0cfa3724e184e6a5c9e2a2f"
@ -324,9 +304,6 @@ dependencies = [
"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
"checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d"
"checksum linked-hash-map 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "70fb39025bc7cdd76305867c4eccf2f2dcf6e9a57f5b21a93e1c2d86cd03ec9e"
"checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"
"checksum nom 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05aec50c70fd288702bcd93284a8444607f3292dbdf2a30de5ea5dcdbe72287b"
"checksum nom_locate 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49b1c61eff39ab6b91ccedfc62aff196eae066d88355b4fe3e4100c23168f0df"
"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1"
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"

View File

@ -9,11 +9,8 @@ readme = "README.md"
keywords = ["compiler", "config"]
license = "Apache-2.0"
[dependencies.nom]
version = "^3.2"
[dependencies]
nom_locate = "^0.1.1"
abortable_parser = "~0.1.0"
clap = "~2.26.0"
serde_json = "~1.0.9"
simple-error = "0.1"

View File

@ -9,6 +9,8 @@
You should be able to ask the compiler to tell you any value or set of values in the
compiled configuration.
Inspect is probably the correct location for this.
## Shape equality as a form of type assertion?
# Minor Fixes and Polish

View File

@ -16,17 +16,20 @@
#[macro_use]
extern crate bencher;
extern crate abortable_parser;
extern crate cpuprofiler;
extern crate nom_locate;
extern crate ucglib;
use bencher::Bencher;
use abortable_parser::StrIter;
//use cpuprofiler::PROFILER;
use ucglib::parse::*;
fn do_parse(i: &str) {
parse(nom_locate::LocatedSpan::new(i));
parse(StrIter::new(i));
}
fn parse_int(b: &mut Bencher) {

View File

@ -25,11 +25,12 @@ use std::path::PathBuf;
use std::rc::Rc;
use std::string::ToString;
use abortable_parser::StrIter;
use ast::*;
use error;
use format;
use parse::parse;
use tokenizer::Span;
pub mod assets;
pub mod ir;
@ -159,8 +160,9 @@ impl<'a> Builder<'a> {
&Value::Int(ref i) => Ok(Rc::new(Val::Int(i.val))),
&Value::Float(ref f) => Ok(Rc::new(Val::Float(f.val))),
&Value::Str(ref s) => Ok(Rc::new(Val::Str(s.val.to_string()))),
&Value::Symbol(ref s) => self.lookup_sym(&(s.into())).ok_or(Box::new(
error::Error::new(
&Value::Symbol(ref s) => {
self.lookup_sym(&(s.into()))
.ok_or(Box::new(error::Error::new(
format!(
"Unable to find {} in file: {}",
s.val,
@ -168,8 +170,8 @@ impl<'a> Builder<'a> {
),
error::ErrorType::NoSuchSymbol,
v.pos().clone(),
),
)),
)))
}
&Value::List(ref def) => self.list_to_val(def),
&Value::Tuple(ref tuple) => self.tuple_to_val(&tuple.val),
&Value::Selector(ref selector_list_node) => {
@ -243,7 +245,7 @@ impl<'a> Builder<'a> {
Ok(())
}
fn eval_span(&mut self, input: Span) -> Result<Rc<Val>, Box<Error>> {
fn eval_span(&mut self, input: StrIter) -> Result<Rc<Val>, Box<Error>> {
match parse(input) {
Ok(stmts) => {
//panic!("Successfully parsed {}", input);
@ -256,20 +258,20 @@ impl<'a> Builder<'a> {
Some(val) => Ok(val),
}
}
Err(err) => Err(Box::new(error::Error::new_with_cause(
Err(err) => Err(Box::new(error::Error::new_with_boxed_cause(
format!(
"Error while parsing file: {}",
self.curr_file.unwrap_or("<eval>")
),
error::ErrorType::ParseError,
err,
Box::new(err),
))),
}
}
/// Evaluate an input string as UCG.
pub fn eval_string(&mut self, input: &str) -> Result<Rc<Val>, Box<Error>> {
self.eval_span(Span::new(input))
self.eval_span(StrIter::new(input))
}
/// Builds a ucg file at the named path.
@ -839,8 +841,7 @@ impl<'a> Builder<'a> {
let first = a.0.clone();
let t = a.1.clone();
(first, t.1)
})
.collect(),
}).collect(),
)));
}
Err(Box::new(error::Error::new(
@ -992,11 +993,7 @@ impl<'a> Builder<'a> {
let expr = &tok.fragment;
expr_as_stmt.push_str(expr);
expr_as_stmt.push_str(";");
let assert_input = Span {
fragment: &expr_as_stmt,
line: tok.pos.line as u32,
offset: tok.pos.column,
};
let assert_input = StrIter::new(&expr_as_stmt);
let ok = match self.eval_span(assert_input) {
Ok(v) => v,
Err(e) => {

View File

@ -18,8 +18,6 @@ use std::fmt;
use ast::*;
use nom;
/// ErrorType defines the various types of errors that can result from compiling UCG into an
/// output format.
pub enum ErrorType {
@ -62,7 +60,7 @@ pub struct Error {
pub err_type: ErrorType,
pub pos: Position,
pub msg: String,
pub cause: Option<Box<Error>>,
pub cause: Option<Box<error::Error>>,
_pkgonly: (),
}
@ -77,8 +75,12 @@ impl Error {
}
}
pub fn new_with_boxed_cause<S: Into<String>>(msg: S, t: ErrorType, cause: Box<Self>) -> Self {
let mut e = Self::new(msg, t, cause.pos.clone());
pub fn new_with_boxed_cause<S: Into<String>>(
msg: S,
t: ErrorType,
cause: Box<error::Error>,
) -> Self {
let mut e = Self::new(msg, t, Position { line: 0, column: 0 });
e.cause = Some(cause);
return e;
}
@ -87,22 +89,6 @@ impl Error {
Self::new_with_boxed_cause(msg, t, Box::new(cause))
}
pub fn new_with_errorkind<S: Into<String>>(
msg: S,
t: ErrorType,
pos: Position,
cause: nom::ErrorKind<Error>,
) -> Self {
match cause {
nom::ErrorKind::Custom(e) => Self::new_with_cause(msg, t, e),
e => Self::new_with_cause(
msg,
t,
Error::new(format!("ErrorKind: {}", e), ErrorType::Unsupported, pos),
),
}
}
fn render(&self, w: &mut fmt::Formatter) -> fmt::Result {
try!(write!(
w,

View File

@ -444,9 +444,7 @@
// to succeed.
#![recursion_limit = "128"]
#[macro_use]
extern crate nom;
#[macro_use]
extern crate nom_locate;
extern crate abortable_parser;
extern crate serde_json;
extern crate serde_yaml;
extern crate simple_error;

File diff suppressed because it is too large Load Diff

View File

@ -14,14 +14,11 @@
//! Bottom up parser for precedence parsing of expressions separated by binary
//! operators.
use std;
use abortable_parser::combinators::eoi;
use abortable_parser::{Error, Result, SliceIter};
use nom::{ErrorKind, IResult, InputIter, InputLength, Slice};
use super::{non_op_expression, NomResult, ParseResult};
use super::{non_op_expression, NomResult};
use ast::*;
use error;
use tokenizer::TokenIter;
/// Defines the intermediate stages of our bottom up parser for precedence parsing.
#[derive(Debug, PartialEq, Clone)]
@ -31,120 +28,116 @@ pub enum Element {
CompareOp(CompareType),
}
named!(math_op_type<TokenIter, Element, error::Error>,
alt!(
do_parse!(punct!("+") >> (Element::MathOp(BinaryExprType::Add))) |
do_parse!(punct!("-") >> (Element::MathOp(BinaryExprType::Sub))) |
do_parse!(punct!("*") >> (Element::MathOp(BinaryExprType::Mul))) |
do_parse!(punct!("/") >> (Element::MathOp(BinaryExprType::Div)))
make_fn!(
math_op_type<SliceIter<Token>, Element>,
either!(
do_each!(
_ => punct!("+"),
(Element::MathOp(BinaryExprType::Add))),
do_each!(
_ => punct!("-"),
(Element::MathOp(BinaryExprType::Sub))),
do_each!(
_ => punct!("*"),
(Element::MathOp(BinaryExprType::Mul))),
do_each!(
_ => punct!("/"),
(Element::MathOp(BinaryExprType::Div)))
)
);
fn parse_expression(i: OpListIter) -> IResult<OpListIter, Expression, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
fn parse_expression(i: SliceIter<Element>) -> Result<SliceIter<Element>, Expression> {
let mut i_ = i.clone();
if eoi(i_.clone()).is_complete() {
return Result::Abort(Error::new("Expected Expression found End Of Input", &i_));
}
let el = &(i_[0]);
if let &Element::Expr(ref expr) = el {
return IResult::Done(i.slice(1..), expr.clone());
let el = i_.next();
if let Some(&Element::Expr(ref expr)) = el {
return Result::Complete(i_.clone(), expr.clone());
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
return Result::Fail(Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
"Error while parsing Binary Expression Expected Expression got {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i_,
));
}
fn parse_sum_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
fn parse_sum_operator(i: SliceIter<Element>) -> Result<SliceIter<Element>, BinaryExprType> {
let mut i_ = i.clone();
if eoi(i_.clone()).is_complete() {
return Result::Fail(Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i_,
));
}
let el = &(i_[0]);
if let &Element::MathOp(ref op) = el {
let el = i_.next();
if let Some(&Element::MathOp(ref op)) = el {
match op {
&BinaryExprType::Add => {
return IResult::Done(i.slice(1..), op.clone());
return Result::Complete(i_.clone(), op.clone());
}
&BinaryExprType::Sub => {
return IResult::Done(i.slice(1..), op.clone());
return Result::Complete(i_.clone(), op.clone());
}
_other => {
// noop
}
};
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
return Result::Fail(Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i_,
));
}
fn tuple_to_binary_expression(
tpl: (BinaryExprType, Expression, Expression),
) -> ParseResult<Expression> {
let pos = tpl.1.pos().clone();
Ok(Expression::Binary(BinaryOpDef {
kind: tpl.0,
left: Box::new(tpl.1),
right: Box::new(tpl.2),
kind: BinaryExprType,
left: Expression,
right: Expression,
) -> Expression {
let pos = left.pos().clone();
Expression::Binary(BinaryOpDef {
kind: kind,
left: Box::new(left),
right: Box::new(right),
pos: pos,
}))
})
}
fn parse_product_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
fn parse_product_operator(i: SliceIter<Element>) -> Result<SliceIter<Element>, BinaryExprType> {
let mut i_ = i.clone();
if eoi(i_.clone()).is_complete() {
return Result::Fail(Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i_,
));
}
let el = &(i_[0]);
if let &Element::MathOp(ref op) = el {
let el = i_.next();
if let Some(&Element::MathOp(ref op)) = el {
match op {
&BinaryExprType::Mul => {
return IResult::Done(i.slice(1..), op.clone());
return Result::Complete(i_.clone(), op.clone());
}
&BinaryExprType::Div => {
return IResult::Done(i.slice(1..), op.clone());
return Result::Complete(i_.clone(), op.clone());
}
_other => {
// noop
}
};
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
return Result::Fail(Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i_,
));
}
/// do_binary_expr implements precedence based parsing where the more tightly bound
@ -152,15 +145,15 @@ fn parse_product_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType,
/// as the most tightly bound expressions.
macro_rules! do_binary_expr {
($i:expr, $oprule:ident, $lowerrule:ident) => {
do_binary_expr!($i, call!($oprule), $lowerrule)
do_binary_expr!($i, run!($oprule), $lowerrule)
};
($i:expr, $oprule:ident, $lowerrule:ident!( $($lowerargs:tt)* )) => {
do_binary_expr!($i, call!($oprule), $lowerrule!($($lowerargs)*))
do_binary_expr!($i, run!($oprule), $lowerrule!($($lowerargs)*))
};
($i:expr, $oprule:ident) => {
do_binary_expr!($i, call!($oprule))
do_binary_expr!($i, run!($oprule))
};
($i:expr, $oprule:ident!( $($args:tt)* )) => {
@ -168,101 +161,97 @@ macro_rules! do_binary_expr {
};
($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident) => {
do_binary_expr!($i, $oprule!($($args)*), call!($lowerrule))
do_binary_expr!($i, $oprule!($($args)*), run!($lowerrule))
};
($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident!( $($lowerargs:tt)* )) => {
map_res!($i,
do_parse!(
left: $lowerrule!($($lowerargs)*) >>
typ: $oprule!($($args)*) >>
right: $lowerrule!($($lowerargs)*) >>
(typ, left, right)
),
tuple_to_binary_expression
do_each!($i,
left => $lowerrule!($($lowerargs)*),
typ => $oprule!($($args)*),
right => $lowerrule!($($lowerargs)*),
(tuple_to_binary_expression(typ, left, right))
)
};
}
named!(sum_expression<OpListIter, Expression, error::Error>,
make_fn!(
sum_expression<SliceIter<Element>, Expression>,
do_binary_expr!(
parse_sum_operator,
alt!(trace_nom!(product_expression) | trace_nom!(parse_expression)))
either!(trace_nom!(product_expression), trace_nom!(parse_expression))
)
);
named!(product_expression<OpListIter, Expression, error::Error>,
do_binary_expr!(
parse_product_operator,
trace_nom!(parse_expression))
make_fn!(
product_expression<SliceIter<Element>, Expression>,
do_binary_expr!(parse_product_operator, trace_nom!(parse_expression))
);
named!(math_expression<OpListIter, Expression, error::Error>,
alt!(trace_nom!(sum_expression) | trace_nom!(product_expression))
make_fn!(
math_expression<SliceIter<Element>, Expression>,
either!(trace_nom!(sum_expression), trace_nom!(product_expression))
);
// TODO(jwall): Change comparison operators to use the do_binary_expr! with precedence?
fn tuple_to_compare_expression(
tpl: (CompareType, Expression, Expression),
) -> ParseResult<Expression> {
let pos = tpl.1.pos().clone();
Ok(Expression::Compare(ComparisonDef {
kind: tpl.0,
left: Box::new(tpl.1),
right: Box::new(tpl.2),
kind: CompareType,
left: Expression,
right: Expression,
) -> Expression {
let pos = left.pos().clone();
Expression::Compare(ComparisonDef {
kind: kind,
left: Box::new(left),
right: Box::new(right),
pos: pos,
}))
})
}
named!(compare_op_type<TokenIter, Element, error::Error>,
alt!(
do_parse!(punct!("==") >> (Element::CompareOp(CompareType::Equal))) |
do_parse!(punct!("!=") >> (Element::CompareOp(CompareType::NotEqual))) |
do_parse!(punct!("<=") >> (Element::CompareOp(CompareType::LTEqual))) |
do_parse!(punct!(">=") >> (Element::CompareOp(CompareType::GTEqual))) |
do_parse!(punct!("<") >> (Element::CompareOp(CompareType::LT))) |
do_parse!(punct!(">") >> (Element::CompareOp(CompareType::GT)))
make_fn!(
compare_op_type<SliceIter<Token>, Element>,
either!(
do_each!(_ => punct!("=="), (Element::CompareOp(CompareType::Equal))),
do_each!(_ => punct!("!="), (Element::CompareOp(CompareType::NotEqual))),
do_each!(_ => punct!("<="), (Element::CompareOp(CompareType::LTEqual))),
do_each!(_ => punct!(">="), (Element::CompareOp(CompareType::GTEqual))),
do_each!(_ => punct!("<"), (Element::CompareOp(CompareType::LT))),
do_each!(_ => punct!(">"), (Element::CompareOp(CompareType::GT)))
)
);
fn parse_compare_operator(i: OpListIter) -> IResult<OpListIter, CompareType, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
fn parse_compare_operator(i: SliceIter<Element>) -> Result<SliceIter<Element>, CompareType> {
let mut i_ = i.clone();
if eoi(i_.clone()).is_complete() {
return Result::Fail(Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i_,
));
}
let el = &(i_[0]);
if let &Element::CompareOp(ref op) = el {
return IResult::Done(i.slice(1..), op.clone());
let el = i_.next();
if let Some(&Element::CompareOp(ref op)) = el {
return Result::Complete(i_.clone(), op.clone());
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
return Result::Fail(Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
&i,
));
}
named!(compare_expression<OpListIter, Expression, error::Error>,
map_res!(
do_parse!(
left: alt!(trace_nom!(math_expression) | trace_nom!(parse_expression)) >>
typ: parse_compare_operator >>
right: alt!(trace_nom!(math_expression) | trace_nom!(parse_expression)) >>
(typ, left, right)
),
tuple_to_compare_expression
make_fn!(
compare_expression<SliceIter<Element>, Expression>,
do_each!(
left => either!(trace_nom!(math_expression), trace_nom!(parse_expression)),
typ => parse_compare_operator,
right => either!(trace_nom!(math_expression), trace_nom!(parse_expression)),
(tuple_to_compare_expression(typ, left, right))
)
);
/// Parse a list of expressions separated by operators into a Vec<Element>.
fn parse_operand_list(i: TokenIter) -> NomResult<Vec<Element>> {
fn parse_operand_list<'a>(i: SliceIter<'a, Token>) -> NomResult<'a, Vec<Element>> {
// 1. First try to parse a non_op_expression,
let mut _i = i.clone();
let mut list = Vec::new();
@ -271,148 +260,74 @@ fn parse_operand_list(i: TokenIter) -> NomResult<Vec<Element>> {
loop {
// 2. Parse a non_op_expression.
match non_op_expression(_i.clone()) {
IResult::Error(e) => {
Result::Fail(e) => {
// A failure to parse an expression
// is always an error.
return IResult::Error(e);
return Result::Fail(e);
}
IResult::Incomplete(i) => {
return IResult::Incomplete(i);
Result::Abort(e) => {
// A failure to parse an expression
// is always an error.
return Result::Abort(e);
}
IResult::Done(rest, expr) => {
Result::Incomplete(i) => {
return Result::Incomplete(i);
}
Result::Complete(rest, expr) => {
list.push(Element::Expr(expr));
_i = rest.clone();
}
}
// 3. Parse an operator.
match alt!(_i, math_op_type | compare_op_type) {
IResult::Error(e) => {
match either!(_i.clone(), math_op_type, compare_op_type) {
Result::Fail(e) => {
if firstrun {
// If we don't find an operator in our first
// run then this is not an operand list.
return IResult::Error(e);
return Result::Fail(e);
}
// if we don't find one on subsequent runs then
// that's the end of the operand list.
break;
}
IResult::Incomplete(i) => {
return IResult::Incomplete(i);
Result::Abort(e) => {
// A failure to parse an expression
// is always an error.
return Result::Abort(e);
}
IResult::Done(rest, el) => {
Result::Incomplete(i) => {
return Result::Incomplete(i);
}
Result::Complete(rest, el) => {
list.push(el);
_i = rest.clone();
}
}
firstrun = false;
}
return IResult::Done(_i, list);
}
#[derive(Clone, Debug, PartialEq)]
pub struct OpListIter<'a> {
pub source: &'a [Element],
}
impl<'a> OpListIter<'a> {
pub fn len(&self) -> usize {
self.source.len()
}
}
impl<'a> InputLength for OpListIter<'a> {
fn input_len(&self) -> usize {
self.source.input_len()
}
}
macro_rules! impl_op_iter_slice {
($r:ty) => {
impl<'a> Slice<$r> for OpListIter<'a> {
fn slice(&self, range: $r) -> Self {
OpListIter {
source: self.source.slice(range),
}
}
}
};
}
impl_op_iter_slice!(std::ops::Range<usize>);
impl_op_iter_slice!(std::ops::RangeTo<usize>);
impl_op_iter_slice!(std::ops::RangeFrom<usize>);
impl_op_iter_slice!(std::ops::RangeFull);
impl<'a> std::ops::Index<usize> for OpListIter<'a> {
type Output = Element;
fn index(&self, i: usize) -> &Self::Output {
&self.source[i]
}
}
impl<'a> InputIter for OpListIter<'a> {
type Item = &'a Element;
type RawItem = Element;
type Iter = std::iter::Enumerate<std::slice::Iter<'a, Self::RawItem>>;
type IterElem = std::slice::Iter<'a, Self::RawItem>;
fn iter_indices(&self) -> Self::Iter {
self.source.iter().enumerate()
}
fn iter_elements(&self) -> Self::IterElem {
self.source.iter()
}
fn position<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::RawItem) -> bool,
{
for (o, v) in self.iter_indices() {
if predicate(v.clone()) {
return Some(o);
}
}
None
}
fn slice_index(&self, count: usize) -> Option<usize> {
let mut cnt = 0;
for (index, _) in self.iter_indices() {
if cnt == count {
return Some(index);
}
cnt += 1;
}
if cnt == count {
return Some(self.len());
}
None
}
return Result::Complete(_i, list);
}
/// Parse a binary operator expression.
pub fn op_expression(i: TokenIter) -> NomResult<Expression> {
pub fn op_expression<'a>(i: SliceIter<'a, Token>) -> NomResult<'a, Expression> {
let preparse = parse_operand_list(i.clone());
match preparse {
IResult::Error(e) => IResult::Error(e),
IResult::Incomplete(i) => IResult::Incomplete(i),
IResult::Done(rest, oplist) => {
let mut i_ = OpListIter {
source: oplist.as_slice(),
};
let parse_result = alt!(
Result::Fail(e) => Result::Fail(e),
Result::Abort(e) => Result::Abort(e),
Result::Incomplete(i) => Result::Incomplete(i),
Result::Complete(rest, oplist) => {
let mut i_ = SliceIter::new(&oplist);
let parse_result = either!(
i_,
trace_nom!(compare_expression) | trace_nom!(math_expression)
trace_nom!(compare_expression),
trace_nom!(math_expression)
);
match parse_result {
IResult::Error(e) => IResult::Error(e),
IResult::Incomplete(i) => IResult::Incomplete(i),
IResult::Done(_, expr) => IResult::Done(rest.clone(), expr),
Result::Fail(e) => Result::Fail(e),
Result::Abort(e) => Result::Abort(e),
Result::Incomplete(i) => Result::Incomplete(i),
Result::Complete(_, expr) => Result::Complete(rest.clone(), expr),
}
}
}

View File

@ -12,23 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use super::*;
use tokenizer::{tokenize, TokenIter};
use tokenizer::tokenize;
use nom::IResult;
use nom_locate::LocatedSpan;
use abortable_parser::{Result, SliceIter, StrIter};
macro_rules! assert_parse {
($parsemac:ident($i:expr), $out:expr) => {
assert_parse!($i, $parsemac, $out)
};
($i:expr, $f:expr, $out:expr) => {{
let input = LocatedSpan::new($i);
let input = StrIter::new($i);
match tokenize(input) {
Err(e) => assert!(false, format!("Tokenizer Error: {:?}", e)),
Ok(val) => match $f(TokenIter {
source: val.as_slice(),
}) {
IResult::Done(_, result) => assert_eq!(result, $out),
Ok(val) => match $f(SliceIter::new(val.as_slice())) {
Result::Complete(_, result) => assert_eq!(result, $out),
other => assert!(false, format!("Expected Done got {:?}", other)),
},
}
@ -40,14 +37,12 @@ macro_rules! assert_error {
assert_error!($i, $parsemac)
};
($i:expr, $f:expr) => {{
let input = LocatedSpan::new($i);
let input = StrIter::new($i);
match tokenize(input) {
Err(_) => assert!(true),
Ok(val) => {
let result = $f(TokenIter {
source: val.as_slice(),
});
assert!(result.is_err(), format!("Not an error: {:?}", result))
let result = $f(SliceIter::new(val.as_slice()));
assert!(result.is_fail(), format!("Not a fail: {:?}", result))
}
}
}};
@ -757,6 +752,24 @@ fn test_macro_expression_parsing() {
assert_error!(macro_expression("macro (arg1, arg2) => { foo"));
assert_error!(macro_expression("macro (arg1, arg2) => { foo ="));
assert_parse!(
macro_expression("macro () => {foo=1,bar=2}"),
Expression::Macro(MacroDef {
argdefs: Vec::new(),
fields: vec![
(
make_tok!("foo", 1, 14),
Expression::Simple(Value::Int(value_node!(1, 1, 18))),
),
(
make_tok!("bar", 1, 20),
Expression::Simple(Value::Int(value_node!(2, 1, 24))),
),
],
pos: Position::new(1, 1),
})
);
assert_parse!(
macro_expression("macro (arg1, arg2) => {foo=1,bar=2}"),
Expression::Macro(MacroDef {
@ -1223,12 +1236,12 @@ fn test_number_parsing() {
#[test]
fn test_parse() {
let bad_input = LocatedSpan::new("import mylib as lib;");
let bad_input = StrIter::new("import mylib as lib;");
let bad_result = parse(bad_input);
assert!(bad_result.is_err());
// Valid parsing tree
let input = LocatedSpan::new("import \"mylib\" as lib;let foo = 1;1+1;");
let input = StrIter::new("import \"mylib\" as lib;let foo = 1;1+1;");
let result = parse(input);
assert!(result.is_ok(), format!("Expected Ok, Got {:?}", result));
let tpl = result.unwrap();

View File

@ -12,333 +12,324 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! The tokenization stage of the ucg compiler.
use ast::*;
use error;
use nom;
use nom::{alpha, digit, is_alphanumeric, multispace};
use nom::{InputIter, InputLength, Slice};
use nom_locate::LocatedSpan;
//! The tokenization stext_tokene of the ucg compiler.
use std;
use std::result::Result;
pub type Span<'a> = LocatedSpan<&'a str>;
use abortable_parser::combinators::*;
use abortable_parser::iter::{SliceIter, StrIter};
use abortable_parser::{Error, Offsetable, Result, TextPositionTracker};
use ast::*;
impl<'a> From<Span<'a>> for Position {
fn from(s: Span) -> Position {
impl<'a> From<StrIter<'a>> for Position {
fn from(s: StrIter<'a>) -> Position {
Position {
line: s.line as usize,
column: s.get_column() as usize,
line: s.line(),
column: s.column(),
}
}
}
fn is_symbol_char(c: char) -> bool {
is_alphanumeric(c as u8) || c == '-' as char || c == '_' as char
fn is_symbol_char<'a>(i: StrIter<'a>) -> Result<StrIter<'a>, u8> {
let mut _i = i.clone();
let c = match _i.next() {
Some(c) => *c,
None => return Result::Fail(Error::new("Unexpected End of Input".to_string(), &_i)),
};
if (c as char).is_ascii_alphanumeric() || c == b'-' || c == b'_' {
Result::Complete(_i, c)
} else {
Result::Fail(Error::new("Not a symbol character".to_string(), &_i))
}
}
fn escapequoted(input: Span) -> nom::IResult<Span, String> {
fn escapequoted<'a>(input: StrIter<'a>) -> Result<StrIter<'a>, String> {
// loop until we find a " that is not preceded by \.
// Collapse all \<char> to just char for escaping.
let mut frag = String::new();
let mut escape = false;
for (i, c) in input.iter_indices() {
if c == '\\' && !escape {
let mut _input = input.clone();
loop {
let c = match _input.next() {
Some(c) => *c,
None => break,
};
if c == '\\' as u8 && !escape {
// eat this slash and set our escaping sentinel
escape = true;
} else if c == '"' && !escape {
} else if c == '"' as u8 && !escape {
// Bail if this is an unescaped "
// we exit here.
return nom::IResult::Done(input.slice(i..), frag);
return Result::Complete(_input, frag);
} else {
// we accumulate this character.
frag.push(c);
frag.push(c as char);
escape = false; // reset our escaping sentinel
}
}
return nom::IResult::Incomplete(nom::Needed::Unknown);
return Result::Incomplete(_input.get_offset());
}
named!(strtok( Span ) -> Token,
do_parse!(
span: position!() >>
tag!("\"") >>
frag: escapequoted >>
tag!("\"") >>
make_fn!(strtok<StrIter, Token>,
do_each!(
span => input!(),
_ => text_token!("\""),
frag => escapequoted,
(Token{
typ: TokenType::QUOTED,
pos: Position::from(span),
fragment: frag,
fragment: frag.to_string(),
})
)
);
named!(pipequotetok( Span ) -> Token,
do_parse!(
span: position!() >>
tag!("|") >>
frag: take_until!("|") >>
tag!("|") >>
make_fn!(pipequotetok<StrIter, Token>,
do_each!(
p => input!(),
_ => text_token!("|"),
frag => until!(text_token!("|")),
_ => text_token!("|"),
(Token{
typ: TokenType::PIPEQUOTE,
pos: Position::from(span),
fragment: frag.fragment.to_string(),
pos: Position::from(p),
fragment: frag.to_string(),
})
)
);
named!(barewordtok( Span ) -> Token,
do_parse!(
span: position!() >>
frag: preceded!(peek!(alpha), take_while!(is_symbol_char)) >>
make_fn!(barewordtok<StrIter, Token>,
do_each!(
span => input!(),
_ => peek!(ascii_alpha),
frag => consume_all!(is_symbol_char),
(Token{
typ: TokenType::BAREWORD,
pos: Position::from(span),
fragment: frag.fragment.to_string(),
fragment: frag.to_string(),
})
)
);
named!(digittok( Span ) -> Token,
do_parse!(
span: position!() >>
digits: digit >>
make_fn!(digittok<StrIter, Token>,
do_each!(
span => input!(),
_ => peek!(ascii_digit),
digits => consume_all!(ascii_digit),
(Token{
typ: TokenType::DIGIT,
pos: Position::from(span),
fragment: digits.fragment.to_string(),
fragment: digits.to_string(),
})
)
);
named!(booleantok( Span ) -> Token,
do_parse!(
span: position!() >>
b: alt!(
tag!("true") |
tag!("false")
) >>
make_fn!(booleantok<StrIter, Token>,
do_each!(
span => input!(),
token => either!(
text_token!("true"),
text_token!("false")
),
(Token{
typ: TokenType::BOOLEAN,
pos: Position::from(span),
fragment: b.fragment.to_string(),
fragment: token.to_string(),
})
)
);
/// do_tag_tok! is a helper macro to make building a simple tag token
/// do_text_token_tok! is a helper macro to make building a simple text_token token
/// less code.
macro_rules! do_tag_tok {
// NOTE(jwall): Nom macros do magic with their inputs. They in fact
// rewrite your macro argumets for you by adding an initial argument
// for all their sub-macros. Which means we require this $i paramater
// on the first macro invocation but not the rest.
($i:expr, $type:expr, $tag:expr,WS) => {
do_parse!(
$i,
span: position!() >> frag: tag!($tag) >> alt!(whitespace | comment) >> (Token {
macro_rules! do_text_token_tok {
($i:expr, $type:expr, $text_token:expr, WS) => {
do_each!($i,
span => input!(),
frag => text_token!($text_token),
_ => either!(whitespace, comment),
(Token {
typ: $type,
pos: Position::from(span),
fragment: frag.fragment.to_string(),
fragment: frag.to_string(),
})
)
};
($i:expr, $type:expr, $tag:expr) => {
do_parse!(
$i,
span: position!() >> frag: tag!($tag) >> (Token {
($i:expr, $type:expr, $text_token:expr) => {
do_each!($i,
span => input!(),
frag => text_token!($text_token),
(Token {
typ: $type,
pos: Position::from(span),
fragment: frag.fragment.to_string(),
fragment: frag.to_string(),
})
)
};
}
named!(emptytok( Span ) -> Token,
do_tag_tok!(TokenType::EMPTY, "NULL")
make_fn!(emptytok<StrIter, Token>,
do_text_token_tok!(TokenType::EMPTY, "NULL")
);
named!(commatok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, ",")
make_fn!(commatok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ",")
);
named!(lbracetok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "{")
make_fn!(lbracetok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "{")
);
named!(rbracetok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "}")
make_fn!(rbracetok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "}")
);
named!(lparentok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "(")
make_fn!(lparentok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "(")
);
named!(rparentok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, ")")
make_fn!(rparentok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ")")
);
named!(dottok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, ".")
make_fn!(dottok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ".")
);
named!(plustok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "+")
make_fn!(plustok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "+")
);
named!(dashtok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "-")
make_fn!(dashtok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "-")
);
named!(startok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "*")
make_fn!(startok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "*")
);
named!(slashtok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "/")
make_fn!(slashtok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "/")
);
named!(pcttok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "%")
make_fn!(pcttok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "%")
);
named!(eqeqtok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "==")
make_fn!(eqeqtok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "==")
);
named!(notequaltok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "!=")
make_fn!(notequaltok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "!=")
);
named!(gttok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, ">")
make_fn!(gttok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ">")
);
named!(gtequaltok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, ">=")
make_fn!(gtequaltok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ">=")
);
named!(ltequaltok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "<=")
make_fn!(ltequaltok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "<=")
);
named!(lttok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "<")
make_fn!(lttok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "<")
);
named!(equaltok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "=")
make_fn!(equaltok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "=")
);
named!(semicolontok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, ";")
make_fn!(semicolontok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, ";")
);
named!(leftsquarebracket( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "[")
make_fn!(leftsquarebracket<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "[")
);
named!(rightsquarebracket( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "]")
make_fn!(rightsquarebracket<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "]")
);
named!(fatcommatok( Span ) -> Token,
do_tag_tok!(TokenType::PUNCT, "=>")
make_fn!(fatcommatok<StrIter, Token>,
do_text_token_tok!(TokenType::PUNCT, "=>")
);
named!(selecttok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "select", WS)
make_fn!(selecttok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "select", WS)
);
named!(macrotok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "macro", WS)
make_fn!(macrotok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "macro", WS)
);
named!(lettok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "let", WS)
make_fn!(lettok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "let", WS)
);
named!(importtok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "import", WS)
make_fn!(importtok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "import", WS)
);
named!(asserttok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "assert", WS)
make_fn!(asserttok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "assert", WS)
);
named!(outtok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "out", WS)
make_fn!(outtok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "out", WS)
);
named!(astok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "as", WS)
make_fn!(astok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "as", WS)
);
named!(maptok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "map", WS)
make_fn!(maptok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "map", WS)
);
named!(filtertok( Span ) -> Token,
do_tag_tok!(TokenType::BAREWORD, "filter", WS)
make_fn!(filtertok<StrIter, Token>,
do_text_token_tok!(TokenType::BAREWORD, "filter", WS)
);
fn end_of_input(input: Span) -> nom::IResult<Span, Token> {
match eof!(input,) {
nom::IResult::Done(_, _) => {
return nom::IResult::Done(
input,
make_tok!(EOF => input.line as usize,
input.get_column() as usize),
);
}
nom::IResult::Incomplete(_) => {
return nom::IResult::Incomplete(nom::Needed::Unknown);
}
nom::IResult::Error(e) => {
return nom::IResult::Error(e);
}
}
}
fn comment(input: Span) -> nom::IResult<Span, Token> {
match tag!(input, "//") {
nom::IResult::Done(rest, _) => {
match alt!(
fn comment(input: StrIter) -> Result<StrIter, Token> {
match text_token!(input, "//") {
Result::Complete(rest, _) => {
match until!(
rest,
take_until_and_consume!("\r\n") | take_until_and_consume!("\n")
either!(
eoi,
discard!(text_token!("\r\n")),
discard!(text_token!("\n"))
)
) {
nom::IResult::Done(rest, cmt) => {
return nom::IResult::Done(
Result::Complete(rest, cmt) => {
return Result::Complete(
rest,
make_tok!(CMT => cmt.fragment.to_string(),
input.line as usize,
input.get_column() as usize),
make_tok!(CMT => cmt.to_string(),
input.line() as usize,
input.column() as usize),
);
}
// If we didn't find a new line then we just grab everything.
_ => {
let blen = rest.input_len();
let next = rest.slice(blen..);
let tok = rest.slice(..blen);
return nom::IResult::Done(
next,
make_tok!(CMT => tok.fragment.to_string(),
input.line as usize, input.get_column() as usize
),
);
return Result::Abort(Error::new("Unparsable comment".to_string(), &rest));
}
}
}
nom::IResult::Incomplete(i) => return nom::IResult::Incomplete(i),
nom::IResult::Error(e) => return nom::IResult::Error(e),
Result::Incomplete(offset) => return Result::Incomplete(offset),
Result::Fail(e) => return Result::Fail(e),
Result::Abort(e) => return Result::Abort(e),
}
}
named!(whitespace( Span ) -> Token,
do_parse!(
span: position!() >>
many1!(multispace) >>
make_fn!(whitespace<StrIter, Token>,
do_each!(
span => input!(),
_ => peek!(ascii_ws),
_ => repeat!(ascii_ws),
(Token{
typ: TokenType::WS,
pos: Position::from(span),
@ -347,80 +338,89 @@ named!(whitespace( Span ) -> Token,
)
);
named!(token( Span ) -> Token,
alt!(
strtok |
pipequotetok |
emptytok | // This must come before the barewordtok
digittok |
commatok |
rbracetok |
lbracetok |
lparentok |
rparentok |
dottok |
plustok |
dashtok |
startok |
comment | // Note comment must come before slashtok
slashtok |
pcttok |
eqeqtok |
notequaltok |
complete!(gtequaltok) |
complete!(ltequaltok) |
gttok |
lttok |
fatcommatok | // Note fatcommatok must come before equaltok
equaltok |
semicolontok |
leftsquarebracket |
rightsquarebracket |
booleantok |
lettok |
outtok |
selecttok |
asserttok |
macrotok |
importtok |
astok |
maptok |
filtertok |
barewordtok |
whitespace |
make_fn!(end_of_input<StrIter, Token>,
do_each!(
span => input!(),
_ => eoi,
(Token{
typ: TokenType::END,
pos: Position::from(span),
fragment: String::new(),
})
)
);
make_fn!(token<StrIter, Token>,
either!(
strtok,
pipequotetok,
emptytok, // This must come before the barewordtok
digittok,
commatok,
rbracetok,
lbracetok,
lparentok,
rparentok,
dottok,
plustok,
dashtok,
startok,
comment, // Note comment must come before slashtok
slashtok,
pcttok,
eqeqtok,
notequaltok,
complete!("Not >=".to_string(), gtequaltok),
complete!("Not <=".to_string(), ltequaltok),
gttok,
lttok,
fatcommatok, // Note fatcommatok must come before equaltok
equaltok,
semicolontok,
leftsquarebracket,
rightsquarebracket,
booleantok,
lettok,
outtok,
selecttok,
asserttok,
macrotok,
importtok,
astok,
maptok,
filtertok,
barewordtok,
whitespace,
end_of_input)
);
/// Consumes an input Span and returns either a Vec<Token> or a nom::ErrorKind.
pub fn tokenize(input: Span) -> Result<Vec<Token>, error::Error> {
/// Consumes an input StrIter and returns either a Vec<Token> or a error::Error.
pub fn tokenize(input: StrIter) -> std::result::Result<Vec<Token>, Error> {
let mut out = Vec::new();
let mut i = input;
let mut i = input.clone();
loop {
if i.input_len() == 0 {
if let Result::Complete(_, _) = eoi(i.clone()) {
break;
}
match token(i) {
nom::IResult::Error(_e) => {
return Err(error::Error::new(
match token(i.clone()) {
Result::Abort(e) => {
return Err(Error::caused_by(
"Invalid Token encountered",
error::ErrorType::UnexpectedToken,
Position {
line: i.line as usize,
column: i.get_column() as usize,
},
&i,
Box::new(e),
));
}
nom::IResult::Incomplete(_) => {
return Err(error::Error::new(
"Unexepcted end of Input",
error::ErrorType::UnexpectedToken,
Position {
line: i.line as usize,
column: i.get_column() as usize,
},
Result::Fail(e) => {
return Err(Error::caused_by(
"Invalid Token encountered",
&i,
Box::new(e),
));
}
nom::IResult::Done(rest, tok) => {
Result::Incomplete(offset) => {
return Err(Error::new("Unexepcted end of Input", &offset));
}
Result::Complete(rest, tok) => {
i = rest;
if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS {
// we skip comments and whitespace
@ -435,8 +435,8 @@ pub fn tokenize(input: Span) -> Result<Vec<Token>, error::Error> {
fragment: String::new(),
typ: TokenType::END,
pos: Position {
line: i.line as usize,
column: i.get_column() as usize,
line: i.line(),
column: i.column(),
},
});
Ok(out)
@ -445,7 +445,7 @@ pub fn tokenize(input: Span) -> Result<Vec<Token>, error::Error> {
/// Clones a token.
///
/// This is necessary to allow the match_type and match_token macros to work.
pub fn token_clone(t: &Token) -> Result<Token, error::Error> {
pub fn token_clone(t: &Token) -> std::result::Result<Token, Error> {
Ok(t.clone())
}
@ -517,28 +517,28 @@ macro_rules! match_type {
};
($i:expr, $t:expr, $msg:expr, $h:expr) => {{
let i_ = $i.clone();
use nom::Slice;
use std::convert::Into;
if i_.input_len() == 0 {
nom::IResult::Error(nom::ErrorKind::Custom(error::Error::new(
format!("End of Input! {}", $msg),
error::ErrorType::IncompleteParsing,
Position { line: 0, column: 0 },
)))
use abortable_parser::combinators::eoi;
use abortable_parser::{Error, Result};
use std;
let mut _i = $i.clone();
if eoi(_i.clone()).is_complete() {
Result::Fail(Error::new(format!("End of Input! {}", $msg), &$i))
} else {
let tok = &(i_[0]);
match _i.next() {
Some(tok) => {
if tok.typ == $t {
match $h(tok) {
Result::Ok(v) => nom::IResult::Done($i.slice(1..), v),
Result::Err(e) => nom::IResult::Error(nom::ErrorKind::Custom(e.into())),
std::result::Result::Ok(v) => Result::Complete(_i.clone(), v),
std::result::Result::Err(e) => {
Result::Fail(Error::caused_by($msg, &_i, Box::new(e)))
}
}
} else {
nom::IResult::Error(nom::ErrorKind::Custom(error::Error::new(
$msg.to_string(),
error::ErrorType::UnexpectedToken,
tok.pos.clone(),
)))
Result::Fail(Error::new($msg.to_string(), &$i))
}
}
None => Result::Fail(Error::new($msg.to_string(), &$i)),
}
}
}};
@ -553,7 +553,7 @@ macro_rules! match_token {
}};
($i:expr,PUNCT => $f:expr, $h:expr) => {
match_token!($i, TokenType::PUNCT, $f, format!("Not PUNCT ({})", $f), $h)
match_token!($i, TokenType::PUNCT, $f, format!("({})", $f), $h)
};
($i:expr,BAREWORD => $f:expr) => {{
@ -572,22 +572,26 @@ macro_rules! match_token {
};
($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{
let i_ = $i.clone();
use nom;
use nom::Slice;
use std::convert::Into;
let tok = &(i_[0]);
use abortable_parser::Result;
use std;
let mut i_ = $i.clone();
let tok = i_.next();
if let Some(tok) = tok {
if tok.typ == $t && &tok.fragment == $f {
match $h(tok) {
Result::Ok(v) => nom::IResult::Done($i.slice(1..), v),
Result::Err(e) => nom::IResult::Error(nom::ErrorKind::Custom(e.into())),
std::result::Result::Ok(v) => Result::Complete(i_.clone(), v),
std::result::Result::Err(e) => {
Result::Fail(Error::caused_by($msg, &i_, Box::new(e)))
}
}
} else {
nom::IResult::Error(nom::ErrorKind::Custom(error::Error::new(
format!("{} Instead is ({})", $msg, tok.fragment),
error::ErrorType::UnexpectedToken,
tok.pos.clone(),
)))
Result::Fail(Error::new(
format!("Expected {} Instead is ({})", $msg, tok.fragment),
&i_,
))
}
} else {
Result::Fail(Error::new("Unexpected End Of Input", &i_))
}
}};
}
@ -607,11 +611,12 @@ macro_rules! word {
}
/// pos gets the current position from a TokenIter input without consuming it.
pub fn pos(i: TokenIter) -> nom::IResult<TokenIter, Position, error::Error> {
let tok = &i[0];
pub fn pos<'a>(i: SliceIter<'a, Token>) -> Result<SliceIter<'a, Token>, Position> {
let mut _i = i.clone();
let tok = _i.next().unwrap();
let line = tok.pos.line;
let column = tok.pos.column;
nom::IResult::Done(
Result::Complete(
i.clone(),
Position {
line: line,
@ -620,91 +625,5 @@ pub fn pos(i: TokenIter) -> nom::IResult<TokenIter, Position, error::Error> {
)
}
/// TokenIter wraps a slice of Tokens and implements the various necessary
/// nom traits to use it as an input to nom parsers.
#[derive(Clone, Debug, PartialEq)]
pub struct TokenIter<'a> {
pub source: &'a [Token],
}
impl<'a> TokenIter<'a> {
pub fn len(&self) -> usize {
self.source.len()
}
}
impl<'a> nom::InputLength for TokenIter<'a> {
fn input_len(&self) -> usize {
self.source.input_len()
}
}
macro_rules! impl_token_iter_slice {
($r:ty) => {
impl<'a> nom::Slice<$r> for TokenIter<'a> {
fn slice(&self, range: $r) -> Self {
TokenIter {
source: self.source.slice(range),
}
}
}
};
}
impl_token_iter_slice!(std::ops::Range<usize>);
impl_token_iter_slice!(std::ops::RangeTo<usize>);
impl_token_iter_slice!(std::ops::RangeFrom<usize>);
impl_token_iter_slice!(std::ops::RangeFull);
impl<'a> std::ops::Index<usize> for TokenIter<'a> {
type Output = Token;
fn index(&self, i: usize) -> &Self::Output {
&self.source[i]
}
}
impl<'a> InputIter for TokenIter<'a> {
type Item = &'a Token;
type RawItem = Token;
type Iter = std::iter::Enumerate<std::slice::Iter<'a, Self::RawItem>>;
type IterElem = std::slice::Iter<'a, Self::RawItem>;
fn iter_indices(&self) -> Self::Iter {
self.source.iter().enumerate()
}
fn iter_elements(&self) -> Self::IterElem {
self.source.iter()
}
fn position<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::RawItem) -> bool,
{
for (o, v) in self.iter_indices() {
if predicate(v.clone()) {
return Some(o);
}
}
None
}
fn slice_index(&self, count: usize) -> Option<usize> {
let mut cnt = 0;
for (index, _) in self.iter_indices() {
if cnt == count {
return Some(index);
}
cnt += 1;
}
if cnt == count {
return Some(self.len());
}
None
}
}
#[cfg(test)]
mod test;

View File

@ -1,12 +1,15 @@
use super::*;
use nom;
use nom_locate::LocatedSpan;
use abortable_parser::{Result, SliceIter, StrIter};
#[test]
fn test_empty_token() {
let result = emptytok(LocatedSpan::new("NULL "));
assert!(result.is_done(), format!("result {:?} is not done", result));
if let nom::IResult::Done(_, tok) = result {
let result = emptytok(StrIter::new("NULL "));
assert!(
result.is_complete(),
format!("result {:?} is not done", result)
);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.fragment, "NULL");
assert_eq!(tok.typ, TokenType::EMPTY);
}
@ -14,9 +17,12 @@ fn test_empty_token() {
#[test]
fn test_assert_token() {
let result = asserttok(LocatedSpan::new("assert "));
assert!(result.is_done(), format!("result {:?} is not done", result));
if let nom::IResult::Done(_, tok) = result {
let result = asserttok(StrIter::new("assert "));
assert!(
result.is_complete(),
format!("result {:?} is not done", result)
);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.fragment, "assert");
assert_eq!(tok.typ, TokenType::BAREWORD);
}
@ -24,29 +30,56 @@ fn test_assert_token() {
#[test]
fn test_out_token() {
let result = outtok(LocatedSpan::new("out "));
assert!(result.is_done(), format!("result {:?} is not done", result));
if let nom::IResult::Done(_, tok) = result {
let result = outtok(StrIter::new("out "));
assert!(
result.is_complete(),
format!("result {:?} is not done", result)
);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.fragment, "out");
assert_eq!(tok.typ, TokenType::BAREWORD);
}
}
#[test]
fn test_out_token_with_comment() {
let result = outtok(StrIter::new("out//comment"));
assert!(
result.is_complete(),
format!("result {:?} is not done", result)
);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.fragment, "out");
assert_eq!(tok.typ, TokenType::BAREWORD);
}
}
#[test]
fn test_not_out_token() {
let result = outtok(StrIter::new("output"));
assert!(result.is_fail(), format!("result {:?} is not fail", result));
}
#[test]
fn test_escape_quoted() {
let result = escapequoted(LocatedSpan::new("foo \\\"bar\""));
assert!(result.is_done(), format!("result {:?} is not ok", result));
if let nom::IResult::Done(rest, frag) = result {
let result = escapequoted(StrIter::new("foo \\\"bar\""));
assert!(
result.is_complete(),
format!("result {:?} is not ok", result)
);
if let Result::Complete(_rest, frag) = result {
assert_eq!(frag, "foo \"bar");
assert_eq!(rest.fragment, "\"");
}
}
#[test]
fn test_pipe_quoted() {
let result = pipequotetok(LocatedSpan::new("|foo|"));
assert!(result.is_done(), format!("result {:?} is not ok", result));
if let nom::IResult::Done(_, tok) = result {
let result = pipequotetok(StrIter::new("|foo|"));
assert!(
result.is_complete(),
format!("result {:?} is not ok", result)
);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.fragment, "foo".to_string());
assert_eq!(tok.typ, TokenType::PIPEQUOTE);
}
@ -54,16 +87,19 @@ fn test_pipe_quoted() {
#[test]
fn test_string_with_escaping() {
let result = strtok(LocatedSpan::new("\"foo \\\\ \\\"bar\""));
assert!(result.is_done(), format!("result {:?} is not ok", result));
if let nom::IResult::Done(_, tok) = result {
let result = strtok(StrIter::new("\"foo \\\\ \\\"bar\""));
assert!(
result.is_complete(),
format!("result {:?} is not ok", result)
);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.fragment, "foo \\ \"bar".to_string());
}
}
#[test]
fn test_tokenize_bareword_with_dash() {
let result = tokenize(LocatedSpan::new("foo-bar "));
let result = tokenize(StrIter::new("foo-bar "));
assert!(result.is_ok(), format!("result {:?} is not ok", result));
if let Ok(toks) = result {
assert_eq!(toks.len(), 2);
@ -73,18 +109,23 @@ fn test_tokenize_bareword_with_dash() {
macro_rules! assert_token {
($input:expr, $typ:expr, $msg:expr) => {
let result = token(LocatedSpan::new($input));
let result = token(StrIter::new($input));
assert!(
result.is_done(),
result.is_complete(),
format!("result {:?} is not a {}", result, $msg)
);
if let nom::IResult::Done(_, tok) = result {
assert_eq!(tok.fragment, $input);
if let Result::Complete(_, tok) = result {
assert_eq!(tok.typ, $typ);
assert_eq!(tok.fragment, $input);
}
};
}
#[test]
fn test_digittok() {
assert_token!("1", TokenType::DIGIT, "1");
}
#[test]
fn test_boolean() {
assert_token!("true", TokenType::BOOLEAN, "boolean");
@ -122,7 +163,7 @@ fn test_lteqtok() {
#[test]
fn test_tokenize_one_of_each() {
let result = tokenize(LocatedSpan::new(
let result = tokenize(StrIter::new(
"map out filter assert let import macro select as => [ ] { } ; = % / * \
+ - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=",
));
@ -137,65 +178,67 @@ fn test_tokenize_one_of_each() {
#[test]
fn test_parse_has_end() {
let result = tokenize(LocatedSpan::new("foo"));
let result = tokenize(StrIter::new("foo"));
assert!(result.is_ok());
let v = result.unwrap();
assert_eq!(v.len(), 2);
assert_eq!(v[1].typ, TokenType::END);
}
#[test]
fn test_whitespace() {
assert!(whitespace(StrIter::new(" ")).is_complete());
let result = whitespace(StrIter::new(" "));
match result {
Result::Complete(rest, o) => {
assert_eq!(rest.get_offset(), 2);
assert_eq!(o.typ, TokenType::WS);
}
_ => assert!(false, "Not complete"),
}
}
#[test]
fn test_parse_comment() {
assert!(comment(LocatedSpan::new("// comment\n")).is_done());
assert!(comment(LocatedSpan::new("// comment")).is_done());
assert!(comment(StrIter::new("// comment\n")).is_complete());
assert!(comment(StrIter::new("// comment")).is_complete());
let mut parsed = comment(StrIter::new("// comment\n"));
assert!(parsed.is_complete());
if let Result::Complete(_rest, cmt) = parsed {
assert_eq!(
comment(LocatedSpan::new("// comment\n")),
nom::IResult::Done(
LocatedSpan {
fragment: "",
offset: 11,
line: 2,
},
cmt,
Token {
typ: TokenType::COMMENT,
fragment: " comment".to_string(),
pos: Position { line: 1, column: 1 },
}
)
);
assert!(comment(LocatedSpan::new("// comment\r\n")).is_done());
}
assert!(comment(StrIter::new("// comment\r\n")).is_complete());
parsed = comment(StrIter::new("// comment\r\n"));
if let Result::Complete(_rest, cmt) = parsed {
assert_eq!(
comment(LocatedSpan::new("// comment\r\n")),
nom::IResult::Done(
LocatedSpan {
fragment: "",
offset: 12,
line: 2,
},
cmt,
Token {
typ: TokenType::COMMENT,
fragment: " comment".to_string(),
pos: Position { column: 1, line: 1 },
}
)
);
assert!(comment(LocatedSpan::new("// comment\r\n ")).is_done());
}
assert!(comment(StrIter::new("// comment\r\n ")).is_complete());
parsed = comment(StrIter::new("// comment\r\n "));
if let Result::Complete(_rest, cmt) = parsed {
assert_eq!(
comment(LocatedSpan::new("// comment\r\n ")),
nom::IResult::Done(
LocatedSpan {
fragment: " ",
offset: 12,
line: 2,
},
cmt,
Token {
typ: TokenType::COMMENT,
fragment: " comment".to_string(),
pos: Position { column: 1, line: 1 },
}
)
);
assert!(comment(LocatedSpan::new("// comment")).is_done());
}
assert!(comment(StrIter::new("// comment")).is_complete());
}
#[test]
@ -205,14 +248,9 @@ fn test_match_word() {
typ: TokenType::BAREWORD,
pos: Position { line: 1, column: 1 },
}];
let result = word!(
TokenIter {
source: input.as_slice(),
},
"foo"
);
let result = word!(SliceIter::new(input.as_slice()), "foo");
match result {
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
Result::Complete(_, tok) => assert_eq!(tok, input[0]),
res => assert!(false, format!("Fail: {:?}", res)),
}
}
@ -224,20 +262,14 @@ fn test_match_word_empty_input() {
typ: TokenType::END,
pos: Position { line: 1, column: 1 },
}];
let result = word!(
TokenIter {
source: input.as_slice(),
},
"foo"
);
let result = word!(SliceIter::new(input.as_slice()), "foo");
match result {
nom::IResult::Done(_, _) => assert!(false, "Should have been an error but was Done"),
nom::IResult::Incomplete(_) => {
assert!(false, "Should have been an error but was Incomplete")
}
nom::IResult::Error(_) => {
Result::Complete(_, _) => assert!(false, "Should have been an error but was Done"),
Result::Incomplete(_) => assert!(false, "Should have been a Fail but was Incomplete"),
Result::Fail(_) => {
// noop
}
Result::Abort(_) => assert!(false, "Should have been a Fail but was Abort"),
}
}
@ -248,14 +280,9 @@ fn test_match_punct() {
typ: TokenType::PUNCT,
pos: Position { line: 1, column: 1 },
}];
let result = punct!(
TokenIter {
source: input.as_slice(),
},
"!"
);
let result = punct!(SliceIter::new(input.as_slice()), "!");
match result {
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
Result::Complete(_, tok) => assert_eq!(tok, input[0]),
res => assert!(false, format!("Fail: {:?}", res)),
}
}
@ -267,14 +294,9 @@ fn test_match_type() {
typ: TokenType::BAREWORD,
pos: Position { line: 1, column: 1 },
}];
let result = match_type!(
TokenIter {
source: input.as_slice(),
},
BAREWORD
);
let result = match_type!(SliceIter::new(input.as_slice()), BAREWORD);
match result {
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
Result::Complete(_, tok) => assert_eq!(tok, input[0]),
res => assert!(false, format!("Fail: {:?}", res)),
}
}