REFACTOR: Speed up precedence parsing.

Do less reparsing by parsing the list of operators and operands first before performing the bottom up parsing.
2025-07-22 18:19:54 -04:00 · 2018-07-04 12:30:29 -05:00 · 2018-07-04 12:30:29 -05:00 · 921643972b
commit 921643972b
parent b12ac18481
5 changed files with 419 additions and 125 deletions
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@ -703,6 +703,7 @@ pub enum Expression {

    // Complex Expressions
    Copy(CopyDef),
+    // TODO(jwall): This should really store it's position :-(
    Grouped(Box<Expression>),
    Format(FormatDef),
    Call(CallDef),
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -21,6 +21,7 @@ use nom::IResult;
 use nom::InputLength;
 use nom_locate::LocatedSpan;

+use self::precedence::op_expression;
 use ast::*;
 use error;
 use tokenizer::*;
@ -36,6 +37,7 @@ type ParseResult<O> = Result<O, error::Error>;
 macro_rules! trace_nom {
    ($i:expr, $rule:ident!( $($args:tt)* )) => {
        {
+            use parse::ENABLE_TRACE;
            if ENABLE_TRACE {
                eprintln!("Entering Rule: {:?} {:?}", stringify!($rule), $i);
            }
@ -49,6 +51,7 @@ macro_rules! trace_nom {

    ($i:expr, $rule:ident) => {
        {
+            use parse::ENABLE_TRACE;
            if ENABLE_TRACE {
                eprintln!("Entering Rule: {:?} {:?}", stringify!($rule), $i);
            }
@ -204,7 +207,7 @@ macro_rules! alt_peek {
    // This is our default termination case.
    // If there is no fallback then we return an Error.
    (__inner $i:expr, __end) => {
-        // FIXME(jwall): We should do a better custom error here.
+        // TODO(jwall): We should do a better custom error here.
        nom::IResult::Error(error_position!(nom::ErrorKind::Alt,$i))
    };

@ -366,115 +369,6 @@ named!(simple_expression<TokenIter, Expression, error::Error>,
       )
 );

-fn tuple_to_binary_expression(
-    tpl: (Position, BinaryExprType, Expression, Expression),
-) -> ParseResult<Expression> {
-    Ok(Expression::Binary(BinaryOpDef {
-        kind: tpl.1,
-        left: Box::new(tpl.2),
-        right: Box::new(tpl.3),
-        pos: Position::new(tpl.0.line as usize, tpl.0.column as usize),
-    }))
-}
-
-/// do_binary_expr implements precedence based parsing where the more tightly bound
-/// parsers are passed in as lowerrule parsers. We default to any non_op_expression
-/// as the most tightly bound expressions.
-macro_rules! do_binary_expr {
-    ($i:expr, $oprule:ident!( $($args:tt)* )) => {
-        do_binary_expr!($i, $oprule!($($args)*), non_op_expression)
-    };
-
-    ($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident) => {
-        do_binary_expr!($i, $oprule!($($args)*), call!($lowerrule))
-    };
-
-    ($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident!( $($lowerargs:tt)* )) => {
-        map_res!($i,
-            do_parse!(
-                pos: pos >>
-                left: $lowerrule!($($lowerargs)*) >>
-                    typ: $oprule!($($args)*) >>
-                    right: $lowerrule!($($lowerargs)*) >>
-                    (pos, typ, left, right)
-            ),
-            tuple_to_binary_expression
-        )
-    };
-}
-
-// Matches an operator token to a BinaryExprType
-named!(math_op_type<TokenIter, BinaryExprType, error::Error>,
-    alt!(
-        do_parse!(punct!("+") >> (BinaryExprType::Add)) |
-        do_parse!(punct!("-") >> (BinaryExprType::Sub)) |
-        do_parse!(punct!("*") >> (BinaryExprType::Mul)) |
-        do_parse!(punct!("/") >> (BinaryExprType::Div))
-    )
-);
-
-// trace_macros!(true);
-named!(sum_expression<TokenIter, Expression, error::Error>,
-    do_binary_expr!(
-        alt_peek!(
-            punct!("+") => math_op_type |
-            punct!("-") => math_op_type),
-        alt!(trace_nom!(product_expression) | trace_nom!(simple_expression) | trace_nom!(grouped_expression)))
-);
-
-named!(product_expression<TokenIter, Expression, error::Error>,
-    do_binary_expr!(
-       alt_peek!(
-       punct!("*") => math_op_type |
-       punct!("/") => math_op_type)
-    )
-);
-
-named!(math_expression<TokenIter, Expression, error::Error>,
-    alt!(trace_nom!(sum_expression) | trace_nom!(product_expression))
-);
-
-// TODO(jwall): Change comparison operators to use the do_binary_expr! with precedence?
-fn tuple_to_compare_expression(
-    tpl: (Position, CompareType, Expression, Expression),
-) -> ParseResult<Expression> {
-    Ok(Expression::Compare(ComparisonDef {
-        kind: tpl.1,
-        left: Box::new(tpl.2),
-        right: Box::new(tpl.3),
-        pos: Position::new(tpl.0.line as usize, tpl.0.column as usize),
-    }))
-}
-
-named!(compare_op_type<TokenIter, CompareType, error::Error>,
-    alt!(
-        do_parse!(punct!("==") >> (CompareType::Equal)) |
-        do_parse!(punct!("!=") >> (CompareType::NotEqual)) |
-        do_parse!(punct!("<=") >> (CompareType::LTEqual)) |
-        do_parse!(punct!(">=") >> (CompareType::GTEqual)) |
-        do_parse!(punct!("<") >> (CompareType::LT)) |
-        do_parse!(punct!(">") >> (CompareType::GT))
-    )
-);
-
-named!(compare_expression<TokenIter, Expression, error::Error>,
-    map_res!(
-        do_parse!(
-            pos: pos >>
-            left: alt!(trace_nom!(math_expression) | trace_nom!(non_op_expression)) >>
-                typ: compare_op_type >>
-                right: alt!(trace_nom!(math_expression) | trace_nom!(non_op_expression)) >>
-                (pos, typ, left, right)
-        ),
-        tuple_to_compare_expression
-    )
-);
-
-// FIXME(jwall): This is really *really* slow.
-named!(op_expression<TokenIter, Expression, error::Error>,
-    alt!(trace_nom!(math_expression) | trace_nom!(compare_expression))
-);
-
 fn expression_to_grouped_expression(e: Expression) -> ParseResult<Expression> {
    Ok(Expression::Grouped(Box::new(e)))
 }
@ -1022,5 +916,7 @@ pub fn parse(input: LocatedSpan<&str>) -> Result<Vec<Statement>, error::Error> {
    }
 }

+pub mod precedence;
+
 #[cfg(test)]
 mod test;
--- a/src/parse/precedence.rs
+++ b/src/parse/precedence.rs
@ -0,0 +1,403 @@
+use std;
+
+use nom::{ErrorKind, IResult, InputIter, InputLength, Slice};
+
+use super::{non_op_expression, NomResult, ParseResult};
+use ast::*;
+use error;
+use tokenizer::TokenIter;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Element {
+    Expr(Expression),
+    MathOp(BinaryExprType),
+    CompareOp(CompareType),
+}
+
+named!(pub math_op_type<TokenIter, Element, error::Error>,
+    alt!(
+        do_parse!(punct!("+") >> (Element::MathOp(BinaryExprType::Add))) |
+        do_parse!(punct!("-") >> (Element::MathOp(BinaryExprType::Sub))) |
+        do_parse!(punct!("*") >> (Element::MathOp(BinaryExprType::Mul))) |
+        do_parse!(punct!("/") >> (Element::MathOp(BinaryExprType::Div)))
+    )
+);
+
+fn parse_expression(i: OpListIter) -> IResult<OpListIter, Expression, error::Error> {
+    let i_ = i.clone();
+    if i_.input_len() == 0 {
+        return IResult::Error(ErrorKind::Custom(error::Error::new(
+            format!("Expected Expression found End Of Input"),
+            error::ErrorType::IncompleteParsing,
+            // TODO(jwall): This position information is incorrect.
+            Position { line: 0, column: 0 },
+        )));
+    }
+    let el = &(i_[0]);
+    if let &Element::Expr(ref expr) = el {
+        return IResult::Done(i.slice(1..), expr.clone());
+    }
+    return IResult::Error(ErrorKind::Custom(error::Error::new(
+        format!(
+            "Error while parsing Binary Expression Unexpected Operator {:?}",
+            el
+        ),
+        error::ErrorType::ParseError,
+        // TODO(jwall): This position information is incorrect.
+        Position { line: 0, column: 0 },
+    )));
+}
+
+fn parse_sum_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType, error::Error> {
+    let i_ = i.clone();
+    if i_.input_len() == 0 {
+        return IResult::Error(ErrorKind::Custom(error::Error::new(
+            format!("Expected Expression found End Of Input"),
+            error::ErrorType::IncompleteParsing,
+            // TODO(jwall): This position information is incorrect.
+            Position { line: 0, column: 0 },
+        )));
+    }
+    let el = &(i_[0]);
+    if let &Element::MathOp(ref op) = el {
+        match op {
+            &BinaryExprType::Add => {
+                return IResult::Done(i.slice(1..), op.clone());
+            }
+            &BinaryExprType::Sub => {
+                return IResult::Done(i.slice(1..), op.clone());
+            }
+            _other => {
+                // noop
+            }
+        };
+    }
+    return IResult::Error(ErrorKind::Custom(error::Error::new(
+        format!(
+            "Error while parsing Binary Expression Unexpected Operator {:?}",
+            el
+        ),
+        error::ErrorType::ParseError,
+        // TODO(jwall): This position information is incorrect.
+        Position { line: 0, column: 0 },
+    )));
+}
+
+fn tuple_to_binary_expression(
+    tpl: (BinaryExprType, Expression, Expression),
+) -> ParseResult<Expression> {
+    let pos = tpl.1.pos().clone();
+    Ok(Expression::Binary(BinaryOpDef {
+        kind: tpl.0,
+        left: Box::new(tpl.1),
+        right: Box::new(tpl.2),
+        pos: pos,
+    }))
+}
+
+fn parse_product_operator(i: OpListIter) -> IResult<OpListIter, BinaryExprType, error::Error> {
+    let i_ = i.clone();
+    if i_.input_len() == 0 {
+        return IResult::Error(ErrorKind::Custom(error::Error::new(
+            format!("Expected Expression found End Of Input"),
+            error::ErrorType::IncompleteParsing,
+            // TODO(jwall): This position information is incorrect.
+            Position { line: 0, column: 0 },
+        )));
+    }
+    let el = &(i_[0]);
+    if let &Element::MathOp(ref op) = el {
+        match op {
+            &BinaryExprType::Mul => {
+                return IResult::Done(i.slice(1..), op.clone());
+            }
+            &BinaryExprType::Div => {
+                return IResult::Done(i.slice(1..), op.clone());
+            }
+            _other => {
+                // noop
+            }
+        };
+    }
+    return IResult::Error(ErrorKind::Custom(error::Error::new(
+        format!(
+            "Error while parsing Binary Expression Unexpected Operator {:?}",
+            el
+        ),
+        error::ErrorType::ParseError,
+        // TODO(jwall): This position information is incorrect.
+        Position { line: 0, column: 0 },
+    )));
+}
+
+/// do_binary_expr implements precedence based parsing where the more tightly bound
+/// parsers are passed in as lowerrule parsers. We default to any non_op_expression
+/// as the most tightly bound expressions.
+macro_rules! do_binary_expr {
+    ($i:expr, $oprule:ident, $lowerrule:ident) => {
+        do_binary_expr!($i, call!($oprule), $lowerrule)
+    };
+
+    ($i:expr, $oprule:ident, $lowerrule:ident!( $($lowerargs:tt)* )) => {
+        do_binary_expr!($i, call!($oprule), $lowerrule!($($lowerargs)*))
+    };
+
+    ($i:expr, $oprule:ident) => {
+        do_binary_expr!($i, call!($oprule))
+    };
+
+    ($i:expr, $oprule:ident!( $($args:tt)* )) => {
+        do_binary_expr!($i, $oprule!($($args)*), parse_expression)
+    };
+
+    ($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident) => {
+        do_binary_expr!($i, $oprule!($($args)*), call!($lowerrule))
+    };
+
+    ($i:expr, $oprule:ident!( $($args:tt)* ), $lowerrule:ident!( $($lowerargs:tt)* )) => {
+        map_res!($i,
+            do_parse!(
+                left: $lowerrule!($($lowerargs)*) >>
+                    typ: $oprule!($($args)*) >>
+                    right: $lowerrule!($($lowerargs)*) >>
+                    (typ, left, right)
+            ),
+            tuple_to_binary_expression
+        )
+    };
+}
+
+named!(pub sum_expression<OpListIter, Expression, error::Error>,
+    do_binary_expr!(
+        parse_sum_operator,
+        alt!(trace_nom!(product_expression) | trace_nom!(parse_expression)))
+);
+
+named!(pub product_expression<OpListIter, Expression, error::Error>,
+    do_binary_expr!(
+       parse_product_operator,
+    trace_nom!(parse_expression))
+);
+
+named!(pub math_expression<OpListIter, Expression, error::Error>,
+    alt!(trace_nom!(sum_expression) | trace_nom!(product_expression))
+);
+
+// TODO(jwall): Change comparison operators to use the do_binary_expr! with precedence?
+fn tuple_to_compare_expression(
+    tpl: (CompareType, Expression, Expression),
+) -> ParseResult<Expression> {
+    let pos = tpl.1.pos().clone();
+    Ok(Expression::Compare(ComparisonDef {
+        kind: tpl.0,
+        left: Box::new(tpl.1),
+        right: Box::new(tpl.2),
+        pos: pos,
+    }))
+}
+
+named!(pub compare_op_type<TokenIter, Element, error::Error>,
+    alt!(
+        do_parse!(punct!("==") >> (Element::CompareOp(CompareType::Equal))) |
+        do_parse!(punct!("!=") >> (Element::CompareOp(CompareType::NotEqual))) |
+        do_parse!(punct!("<=") >> (Element::CompareOp(CompareType::LTEqual))) |
+        do_parse!(punct!(">=") >> (Element::CompareOp(CompareType::GTEqual))) |
+        do_parse!(punct!("<") >>  (Element::CompareOp(CompareType::LT))) |
+        do_parse!(punct!(">") >>  (Element::CompareOp(CompareType::GT)))
+    )
+);
+
+fn parse_compare_operator(i: OpListIter) -> IResult<OpListIter, CompareType, error::Error> {
+    let i_ = i.clone();
+    if i_.input_len() == 0 {
+        return IResult::Error(ErrorKind::Custom(error::Error::new(
+            format!("Expected Expression found End Of Input"),
+            error::ErrorType::IncompleteParsing,
+            // TODO(jwall): This position information is incorrect.
+            Position { line: 0, column: 0 },
+        )));
+    }
+    let el = &(i_[0]);
+    if let &Element::CompareOp(ref op) = el {
+        return IResult::Done(i.slice(1..), op.clone());
+    }
+    return IResult::Error(ErrorKind::Custom(error::Error::new(
+        format!(
+            "Error while parsing Binary Expression Unexpected Operator {:?}",
+            el
+        ),
+        error::ErrorType::ParseError,
+        // TODO(jwall): This position information is incorrect.
+        Position { line: 0, column: 0 },
+    )));
+}
+
+named!(pub compare_expression<OpListIter, Expression, error::Error>,
+    map_res!(
+        do_parse!(
+            left: alt!(trace_nom!(math_expression) | trace_nom!(parse_expression)) >>
+                // FIXME(jwall): Wrong type of combinator
+                typ: parse_compare_operator >>
+                right: alt!(trace_nom!(math_expression) | trace_nom!(parse_expression)) >>
+                (typ, left, right)
+        ),
+        tuple_to_compare_expression
+    )
+);
+
+// Implement nom::Input Length and nom::Slice for OpListIter.
+pub fn parse_operand_list(i: TokenIter) -> NomResult<Vec<Element>> {
+    // 1. First try to parse a non_op_expression,
+    let mut _i = i.clone();
+    let mut list = Vec::new();
+    // 1. loop
+    let mut firstrun = true;
+    loop {
+        // 2. Parse a non_op_expression.
+        match non_op_expression(_i.clone()) {
+            IResult::Error(e) => {
+                // A failure to parse an expression
+                // is always an error.
+                return IResult::Error(e);
+            }
+            IResult::Incomplete(i) => {
+                return IResult::Incomplete(i);
+            }
+            IResult::Done(rest, expr) => {
+                list.push(Element::Expr(expr));
+                _i = rest.clone();
+            }
+        }
+        // 3. Parse an operator.
+        match alt!(_i, math_op_type | compare_op_type) {
+            IResult::Error(e) => {
+                if firstrun {
+                    // If we don't find an operator in our first
+                    // run then this is not an operand list.
+                    return IResult::Error(e);
+                }
+                // if we don't find one on subsequent runs then
+                // that's the end of the operand list.
+                break;
+            }
+            IResult::Incomplete(i) => {
+                return IResult::Incomplete(i);
+            }
+            IResult::Done(rest, el) => {
+                list.push(el);
+                _i = rest.clone();
+            }
+        }
+        firstrun = false;
+    }
+    return IResult::Done(_i, list);
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct OpListIter<'a> {
+    pub source: &'a [Element],
+}
+
+impl<'a> OpListIter<'a> {
+    pub fn len(&self) -> usize {
+        self.source.len()
+    }
+}
+
+impl<'a> InputLength for OpListIter<'a> {
+    fn input_len(&self) -> usize {
+        self.source.input_len()
+    }
+}
+
+macro_rules! impl_op_iter_slice {
+    ($r:ty) => {
+        impl<'a> Slice<$r> for OpListIter<'a> {
+            fn slice(&self, range: $r) -> Self {
+                OpListIter {
+                    source: self.source.slice(range),
+                }
+            }
+        }
+    };
+}
+
+impl_op_iter_slice!(std::ops::Range<usize>);
+impl_op_iter_slice!(std::ops::RangeTo<usize>);
+impl_op_iter_slice!(std::ops::RangeFrom<usize>);
+impl_op_iter_slice!(std::ops::RangeFull);
+
+impl<'a> std::ops::Index<usize> for OpListIter<'a> {
+    type Output = Element;
+
+    fn index(&self, i: usize) -> &Self::Output {
+        &self.source[i]
+    }
+}
+
+impl<'a> InputIter for OpListIter<'a> {
+    type Item = &'a Element;
+    type RawItem = Element;
+
+    type Iter = std::iter::Enumerate<std::slice::Iter<'a, Self::RawItem>>;
+    type IterElem = std::slice::Iter<'a, Self::RawItem>;
+
+    fn iter_indices(&self) -> Self::Iter {
+        self.source.iter().enumerate()
+    }
+
+    fn iter_elements(&self) -> Self::IterElem {
+        self.source.iter()
+    }
+
+    fn position<P>(&self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::RawItem) -> bool,
+    {
+        for (o, v) in self.iter_indices() {
+            if predicate(v.clone()) {
+                return Some(o);
+            }
+        }
+        None
+    }
+
+    fn slice_index(&self, count: usize) -> Option<usize> {
+        let mut cnt = 0;
+        for (index, _) in self.iter_indices() {
+            if cnt == count {
+                return Some(index);
+            }
+            cnt += 1;
+        }
+        if cnt == count {
+            return Some(self.len());
+        }
+        None
+    }
+}
+
+pub fn op_expression(i: TokenIter) -> NomResult<Expression> {
+    let preparse = parse_operand_list(i.clone());
+    match preparse {
+        IResult::Error(e) => IResult::Error(e),
+        IResult::Incomplete(i) => IResult::Incomplete(i),
+        IResult::Done(rest, oplist) => {
+            // TODO run our binary parsing.
+            let mut i_ = OpListIter {
+                source: oplist.as_slice(),
+            };
+
+            let parse_result = alt!(
+                i_,
+                trace_nom!(math_expression) | trace_nom!(compare_expression)
+            );
+
+            match parse_result {
+                IResult::Error(e) => IResult::Error(e),
+                IResult::Incomplete(i) => IResult::Incomplete(i),
+                IResult::Done(_, expr) => IResult::Done(rest.clone(), expr),
+            }
+        }
+    }
+}
--- a/src/parse/test.rs
+++ b/src/parse/test.rs
@ -172,7 +172,7 @@ fn test_statement_parse() {
                    },
                )))),
                right: Box::new(Expression::Simple(Value::Int(value_node!(2, 1, 21)))),
-                pos: Position::new(1, 11),
+                pos: Position::new(1, 12),
            }),
        })
    );
@ -420,15 +420,6 @@ fn test_expression_parse() {
            pos: Position::new(1, 1),
        })
    );
-    assert_parse!(
-        product_expression("1 * 1"),
-        Expression::Binary(BinaryOpDef {
-            kind: BinaryExprType::Mul,
-            left: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
-            right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 5)))),
-            pos: Position::new(1, 1),
-        })
-    );
    assert_parse!(
        expression("1 / 1"),
        Expression::Binary(BinaryOpDef {
@ -474,7 +465,7 @@ fn test_expression_parse() {
                }
            )))),
            right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 11)))),
-            pos: Position::new(1, 1),
+            pos: Position::new(1, 2),
        })
    );
    assert_parse!(
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@ -514,17 +514,19 @@ macro_rules! match_type {
 /// nom style macro that matches various Tokens by type and value and allows optional
 /// conversion handlers for the matched Token.
 macro_rules! match_token {
-    ($i:expr,PUNCT => $f:expr) => {
+    ($i:expr,PUNCT => $f:expr) => {{
+        use tokenizer::token_clone;
        match_token!($i, PUNCT => $f, token_clone)
-    };
+    }};

    ($i:expr,PUNCT => $f:expr, $h:expr) => {
        match_token!($i, TokenType::PUNCT, $f, format!("Not PUNCT ({})", $f), $h)
    };

-    ($i:expr,BAREWORD => $f:expr) => {
+    ($i:expr,BAREWORD => $f:expr) => {{
+        use tokenizer::token_clone;
        match_token!($i, BAREWORD => $f, token_clone)
-    };
+    }};

    ($i:expr,BAREWORD => $f:expr, $h:expr) => {
        match_token!(
@ -538,6 +540,7 @@ macro_rules! match_token {

    ($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{
        let i_ = $i.clone();
+        use nom;
        use nom::Slice;
        use std::convert::Into;
        let tok = &(i_[0]);