REFACTOR: Speed up precedence parsing.

Do less reparsing by parsing the list of operators and operands first before
performing the bottom up parsing.
This commit is contained in:
Jeremy Wall 2018-07-04 12:30:29 -05:00
parent b12ac18481
commit 921643972b
5 changed files with 419 additions and 125 deletions

View File

@ -703,6 +703,7 @@ pub enum Expression {
// Complex Expressions
Copy(CopyDef),
// TODO(jwall): This should really store it's position :-(
Grouped(Box<Expression>),
Format(FormatDef),
Call(CallDef),

View File

@ -21,6 +21,7 @@ use nom::IResult;
use nom::InputLength;
use nom_locate::LocatedSpan;
use self::precedence::op_expression;
use ast::*;
use error;
use tokenizer::*;
@ -36,6 +37,7 @@ type ParseResult<O> = Result<O, error::Error>;
macro_rules! trace_nom {
($i:expr, $rule:ident!( $($args:tt)* )) => {
{
use parse::ENABLE_TRACE;
if ENABLE_TRACE {
eprintln!("Entering Rule: {:?} {:?}", stringify!($rule), $i);
}
@ -49,6 +51,7 @@ macro_rules! trace_nom {
($i:expr, $rule:ident) => {
{
use parse::ENABLE_TRACE;
if ENABLE_TRACE {
eprintln!("Entering Rule: {:?} {:?}", stringify!($rule), $i);
}
@ -204,7 +207,7 @@ macro_rules! alt_peek {
// This is our default termination case.
// If there is no fallback then we return an Error.
(__inner $i:expr, __end) => {
// FIXME(jwall): We should do a better custom error here.
// TODO(jwall): We should do a better custom error here.
nom::IResult::Error(error_position!(nom::ErrorKind::Alt,$i))
};
@ -366,115 +369,6 @@ named!(simple_expression<TokenIter, Expression, error::Error>,
)
);
fn tuple_to_binary_expression(
tpl: (Position, BinaryExprType, Expression, Expression),
) -> ParseResult<Expression> {
Ok(Expression::Binary(BinaryOpDef {
kind: tpl.1,
left: Box::new(tpl.2),
right: Box::new(tpl.3),
pos: Position::new(tpl.0.line as usize, tpl.0.column as usize),
}))
}
/// do_binary_expr implements precedence based parsing where the more tightly bound
/// parsers are passed in as lowerrule parsers. We default to any non_op_expression
/// as the most tightly bound expressions.
macro_rules! do_binary_expr {
($i:expr, $oprule:ident!( $($args:tt)* )) => {
do_binary_expr!($i, $oprule!($($args)*), non_op_expression)
};
($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident) => {
do_binary_expr!($i, $oprule!($($args)*), call!($lowerrule))
};
($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident!( $($lowerargs:tt)* )) => {
map_res!($i,
do_parse!(
pos: pos >>
left: $lowerrule!($($lowerargs)*) >>
typ: $oprule!($($args)*) >>
right: $lowerrule!($($lowerargs)*) >>
(pos, typ, left, right)
),
tuple_to_binary_expression
)
};
}
// Matches an operator token to a BinaryExprType
named!(math_op_type<TokenIter, BinaryExprType, error::Error>,
alt!(
do_parse!(punct!("+") >> (BinaryExprType::Add)) |
do_parse!(punct!("-") >> (BinaryExprType::Sub)) |
do_parse!(punct!("*") >> (BinaryExprType::Mul)) |
do_parse!(punct!("/") >> (BinaryExprType::Div))
)
);
// trace_macros!(true);
named!(sum_expression<TokenIter, Expression, error::Error>,
do_binary_expr!(
alt_peek!(
punct!("+") => math_op_type |
punct!("-") => math_op_type),
alt!(trace_nom!(product_expression) | trace_nom!(simple_expression) | trace_nom!(grouped_expression)))
);
named!(product_expression<TokenIter, Expression, error::Error>,
do_binary_expr!(
alt_peek!(
punct!("*") => math_op_type |
punct!("/") => math_op_type)
)
);
named!(math_expression<TokenIter, Expression, error::Error>,
alt!(trace_nom!(sum_expression) | trace_nom!(product_expression))
);
// TODO(jwall): Change comparison operators to use the do_binary_expr! with precedence?
fn tuple_to_compare_expression(
tpl: (Position, CompareType, Expression, Expression),
) -> ParseResult<Expression> {
Ok(Expression::Compare(ComparisonDef {
kind: tpl.1,
left: Box::new(tpl.2),
right: Box::new(tpl.3),
pos: Position::new(tpl.0.line as usize, tpl.0.column as usize),
}))
}
named!(compare_op_type<TokenIter, CompareType, error::Error>,
alt!(
do_parse!(punct!("==") >> (CompareType::Equal)) |
do_parse!(punct!("!=") >> (CompareType::NotEqual)) |
do_parse!(punct!("<=") >> (CompareType::LTEqual)) |
do_parse!(punct!(">=") >> (CompareType::GTEqual)) |
do_parse!(punct!("<") >> (CompareType::LT)) |
do_parse!(punct!(">") >> (CompareType::GT))
)
);
named!(compare_expression<TokenIter, Expression, error::Error>,
map_res!(
do_parse!(
pos: pos >>
left: alt!(trace_nom!(math_expression) | trace_nom!(non_op_expression)) >>
typ: compare_op_type >>
right: alt!(trace_nom!(math_expression) | trace_nom!(non_op_expression)) >>
(pos, typ, left, right)
),
tuple_to_compare_expression
)
);
// FIXME(jwall): This is really *really* slow.
named!(op_expression<TokenIter, Expression, error::Error>,
alt!(trace_nom!(math_expression) | trace_nom!(compare_expression))
);
fn expression_to_grouped_expression(e: Expression) -> ParseResult<Expression> {
Ok(Expression::Grouped(Box::new(e)))
}
@ -1022,5 +916,7 @@ pub fn parse(input: LocatedSpan<&str>) -> Result<Vec<Statement>, error::Error> {
}
}
pub mod precedence;
#[cfg(test)]
mod test;

403
src/parse/precedence.rs Normal file
View File

@ -0,0 +1,403 @@
use std;
use nom::{ErrorKind, IResult, InputIter, InputLength, Slice};
use super::{non_op_expression, NomResult, ParseResult};
use ast::*;
use error;
use tokenizer::TokenIter;
#[derive(Debug, PartialEq, Clone)]
pub enum Element {
Expr(Expression),
MathOp(BinaryExprType),
CompareOp(CompareType),
}
named!(pub math_op_type<TokenIter, Element, error::Error>,
alt!(
do_parse!(punct!("+") >> (Element::MathOp(BinaryExprType::Add))) |
do_parse!(punct!("-") >> (Element::MathOp(BinaryExprType::Sub))) |
do_parse!(punct!("*") >> (Element::MathOp(BinaryExprType::Mul))) |
do_parse!(punct!("/") >> (Element::MathOp(BinaryExprType::Div)))
)
);
fn parse_expression(i: OpListIter) -> IResult<OpListIter, Expression, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
let el = &(i_[0]);
if let &Element::Expr(ref expr) = el {
return IResult::Done(i.slice(1..), expr.clone());
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
fn parse_sum_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
let el = &(i_[0]);
if let &Element::MathOp(ref op) = el {
match op {
&BinaryExprType::Add => {
return IResult::Done(i.slice(1..), op.clone());
}
&BinaryExprType::Sub => {
return IResult::Done(i.slice(1..), op.clone());
}
_other => {
// noop
}
};
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
fn tuple_to_binary_expression(
tpl: (BinaryExprType, Expression, Expression),
) -> ParseResult<Expression> {
let pos = tpl.1.pos().clone();
Ok(Expression::Binary(BinaryOpDef {
kind: tpl.0,
left: Box::new(tpl.1),
right: Box::new(tpl.2),
pos: pos,
}))
}
fn parse_product_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
let el = &(i_[0]);
if let &Element::MathOp(ref op) = el {
match op {
&BinaryExprType::Mul => {
return IResult::Done(i.slice(1..), op.clone());
}
&BinaryExprType::Div => {
return IResult::Done(i.slice(1..), op.clone());
}
_other => {
// noop
}
};
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
/// do_binary_expr implements precedence based parsing where the more tightly bound
/// parsers are passed in as lowerrule parsers. We default to any non_op_expression
/// as the most tightly bound expressions.
macro_rules! do_binary_expr {
($i:expr, $oprule:ident, $lowerrule:ident) => {
do_binary_expr!($i, call!($oprule), $lowerrule)
};
($i:expr, $oprule:ident, $lowerrule:ident!( $($lowerargs:tt)* )) => {
do_binary_expr!($i, call!($oprule), $lowerrule!($($lowerargs)*))
};
($i:expr, $oprule:ident) => {
do_binary_expr!($i, call!($oprule))
};
($i:expr, $oprule:ident!( $($args:tt)* )) => {
do_binary_expr!($i, $oprule!($($args)*), parse_expression)
};
($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident) => {
do_binary_expr!($i, $oprule!($($args)*), call!($lowerrule))
};
($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident!( $($lowerargs:tt)* )) => {
map_res!($i,
do_parse!(
left: $lowerrule!($($lowerargs)*) >>
typ: $oprule!($($args)*) >>
right: $lowerrule!($($lowerargs)*) >>
(typ, left, right)
),
tuple_to_binary_expression
)
};
}
named!(pub sum_expression<OpListIter, Expression, error::Error>,
do_binary_expr!(
parse_sum_operator,
alt!(trace_nom!(product_expression) | trace_nom!(parse_expression)))
);
named!(pub product_expression<OpListIter, Expression, error::Error>,
do_binary_expr!(
parse_product_operator,
trace_nom!(parse_expression))
);
named!(pub math_expression<OpListIter, Expression, error::Error>,
alt!(trace_nom!(sum_expression) | trace_nom!(product_expression))
);
// TODO(jwall): Change comparison operators to use the do_binary_expr! with precedence?
fn tuple_to_compare_expression(
tpl: (CompareType, Expression, Expression),
) -> ParseResult<Expression> {
let pos = tpl.1.pos().clone();
Ok(Expression::Compare(ComparisonDef {
kind: tpl.0,
left: Box::new(tpl.1),
right: Box::new(tpl.2),
pos: pos,
}))
}
named!(pub compare_op_type<TokenIter, Element, error::Error>,
alt!(
do_parse!(punct!("==") >> (Element::CompareOp(CompareType::Equal))) |
do_parse!(punct!("!=") >> (Element::CompareOp(CompareType::NotEqual))) |
do_parse!(punct!("<=") >> (Element::CompareOp(CompareType::LTEqual))) |
do_parse!(punct!(">=") >> (Element::CompareOp(CompareType::GTEqual))) |
do_parse!(punct!("<") >> (Element::CompareOp(CompareType::LT))) |
do_parse!(punct!(">") >> (Element::CompareOp(CompareType::GT)))
)
);
fn parse_compare_operator(i: OpListIter) -> IResult<OpListIter, CompareType, error::Error> {
let i_ = i.clone();
if i_.input_len() == 0 {
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!("Expected Expression found End Of Input"),
error::ErrorType::IncompleteParsing,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
let el = &(i_[0]);
if let &Element::CompareOp(ref op) = el {
return IResult::Done(i.slice(1..), op.clone());
}
return IResult::Error(ErrorKind::Custom(error::Error::new(
format!(
"Error while parsing Binary Expression Unexpected Operator {:?}",
el
),
error::ErrorType::ParseError,
// TODO(jwall): This position information is incorrect.
Position { line: 0, column: 0 },
)));
}
named!(pub compare_expression<OpListIter, Expression, error::Error>,
map_res!(
do_parse!(
left: alt!(trace_nom!(math_expression) | trace_nom!(parse_expression)) >>
// FIXME(jwall): Wrong type of combinator
typ: parse_compare_operator >>
right: alt!(trace_nom!(math_expression) | trace_nom!(parse_expression)) >>
(typ, left, right)
),
tuple_to_compare_expression
)
);
// Implement nom::Input Length and nom::Slice for OpListIter.
pub fn parse_operand_list(i: TokenIter) -> NomResult<Vec<Element>> {
// 1. First try to parse a non_op_expression,
let mut _i = i.clone();
let mut list = Vec::new();
// 1. loop
let mut firstrun = true;
loop {
// 2. Parse a non_op_expression.
match non_op_expression(_i.clone()) {
IResult::Error(e) => {
// A failure to parse an expression
// is always an error.
return IResult::Error(e);
}
IResult::Incomplete(i) => {
return IResult::Incomplete(i);
}
IResult::Done(rest, expr) => {
list.push(Element::Expr(expr));
_i = rest.clone();
}
}
// 3. Parse an operator.
match alt!(_i, math_op_type | compare_op_type) {
IResult::Error(e) => {
if firstrun {
// If we don't find an operator in our first
// run then this is not an operand list.
return IResult::Error(e);
}
// if we don't find one on subsequent runs then
// that's the end of the operand list.
break;
}
IResult::Incomplete(i) => {
return IResult::Incomplete(i);
}
IResult::Done(rest, el) => {
list.push(el);
_i = rest.clone();
}
}
firstrun = false;
}
return IResult::Done(_i, list);
}
#[derive(Clone, Debug, PartialEq)]
pub struct OpListIter<'a> {
pub source: &'a [Element],
}
impl<'a> OpListIter<'a> {
pub fn len(&self) -> usize {
self.source.len()
}
}
impl<'a> InputLength for OpListIter<'a> {
fn input_len(&self) -> usize {
self.source.input_len()
}
}
macro_rules! impl_op_iter_slice {
($r:ty) => {
impl<'a> Slice<$r> for OpListIter<'a> {
fn slice(&self, range: $r) -> Self {
OpListIter {
source: self.source.slice(range),
}
}
}
};
}
impl_op_iter_slice!(std::ops::Range<usize>);
impl_op_iter_slice!(std::ops::RangeTo<usize>);
impl_op_iter_slice!(std::ops::RangeFrom<usize>);
impl_op_iter_slice!(std::ops::RangeFull);
impl<'a> std::ops::Index<usize> for OpListIter<'a> {
type Output = Element;
fn index(&self, i: usize) -> &Self::Output {
&self.source[i]
}
}
impl<'a> InputIter for OpListIter<'a> {
type Item = &'a Element;
type RawItem = Element;
type Iter = std::iter::Enumerate<std::slice::Iter<'a, Self::RawItem>>;
type IterElem = std::slice::Iter<'a, Self::RawItem>;
fn iter_indices(&self) -> Self::Iter {
self.source.iter().enumerate()
}
fn iter_elements(&self) -> Self::IterElem {
self.source.iter()
}
fn position<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::RawItem) -> bool,
{
for (o, v) in self.iter_indices() {
if predicate(v.clone()) {
return Some(o);
}
}
None
}
fn slice_index(&self, count: usize) -> Option<usize> {
let mut cnt = 0;
for (index, _) in self.iter_indices() {
if cnt == count {
return Some(index);
}
cnt += 1;
}
if cnt == count {
return Some(self.len());
}
None
}
}
pub fn op_expression(i: TokenIter) -> NomResult<Expression> {
let preparse = parse_operand_list(i.clone());
match preparse {
IResult::Error(e) => IResult::Error(e),
IResult::Incomplete(i) => IResult::Incomplete(i),
IResult::Done(rest, oplist) => {
// TODO run our binary parsing.
let mut i_ = OpListIter {
source: oplist.as_slice(),
};
let parse_result = alt!(
i_,
trace_nom!(math_expression) | trace_nom!(compare_expression)
);
match parse_result {
IResult::Error(e) => IResult::Error(e),
IResult::Incomplete(i) => IResult::Incomplete(i),
IResult::Done(_, expr) => IResult::Done(rest.clone(), expr),
}
}
}
}

View File

@ -172,7 +172,7 @@ fn test_statement_parse() {
},
)))),
right: Box::new(Expression::Simple(Value::Int(value_node!(2, 1, 21)))),
pos: Position::new(1, 11),
pos: Position::new(1, 12),
}),
})
);
@ -420,15 +420,6 @@ fn test_expression_parse() {
pos: Position::new(1, 1),
})
);
assert_parse!(
product_expression("1 * 1"),
Expression::Binary(BinaryOpDef {
kind: BinaryExprType::Mul,
left: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 5)))),
pos: Position::new(1, 1),
})
);
assert_parse!(
expression("1 / 1"),
Expression::Binary(BinaryOpDef {
@ -474,7 +465,7 @@ fn test_expression_parse() {
}
)))),
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 11)))),
pos: Position::new(1, 1),
pos: Position::new(1, 2),
})
);
assert_parse!(

View File

@ -514,17 +514,19 @@ macro_rules! match_type {
/// nom style macro that matches various Tokens by type and value and allows optional
/// conversion handlers for the matched Token.
macro_rules! match_token {
($i:expr,PUNCT => $f:expr) => {
($i:expr,PUNCT => $f:expr) => {{
use tokenizer::token_clone;
match_token!($i, PUNCT => $f, token_clone)
};
}};
($i:expr,PUNCT => $f:expr, $h:expr) => {
match_token!($i, TokenType::PUNCT, $f, format!("Not PUNCT ({})", $f), $h)
};
($i:expr,BAREWORD => $f:expr) => {
($i:expr,BAREWORD => $f:expr) => {{
use tokenizer::token_clone;
match_token!($i, BAREWORD => $f, token_clone)
};
}};
($i:expr,BAREWORD => $f:expr, $h:expr) => {
match_token!(
@ -538,6 +540,7 @@ macro_rules! match_token {
($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{
let i_ = $i.clone();
use nom;
use nom::Slice;
use std::convert::Into;
let tok = &(i_[0]);