From c925399adcedb9e04d5d57909f9c2558b539fc45 Mon Sep 17 00:00:00 2001 From: Jeremy Wall Date: Mon, 8 Oct 2018 23:03:52 -0500 Subject: [PATCH] REFACTOR: Move to nom4 for superior error handling. * Everything compiles but not all tests pass yet. --- src/error.rs | 6 +- src/parse/mod.rs | 409 +++++++++++++++++++++++----------------- src/parse/precedence.rs | 188 ++++++++++-------- src/parse/test.rs | 5 +- src/tokenizer/mod.rs | 145 ++++++++------ src/tokenizer/test.rs | 90 ++++++--- 6 files changed, 504 insertions(+), 339 deletions(-) diff --git a/src/error.rs b/src/error.rs index 77ff70a..3c8453c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -98,7 +98,11 @@ impl Error { e => Self::new_with_cause( msg, t, - Error::new(format!("ErrorKind: {}", e), ErrorType::Unsupported, pos), + Error::new( + format!("ErrorKind: {}", e.description()), + ErrorType::Unsupported, + pos, + ), ), } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 8b33a85..185dd14 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -17,7 +17,7 @@ use std::borrow::Borrow; use std::str::FromStr; use nom; -use nom::IResult; +use nom::Context::Code; use nom::InputLength; use nom_locate::LocatedSpan; @@ -42,16 +42,18 @@ macro_rules! wrap_err { ($i:expr, $submac:ident!( $($args:tt)* ), $msg:expr) => {{ let _i = $i.clone(); + use nom::Context::Code; match $submac!(_i, $($args)*) { Ok((rest, mac)) => Ok((rest, mac)), Err(e) => { let context = match e { nom::Err::Incomplete(i) => nom::Err::Incomplete(i), - nom::Err::Error(nom::Context::Code((i, e))) => { - let wrapper = error::Error::new_with_cause($msg, error::ErrorType::ParseError, e); - nom::Err::Error(nom::Context::Code((i, wrapper))) + nom::Err::Error(Code(i, e)) => { + // TODO(jwall): This is a little ugly. Can we fix the position handling? + let wrapper = error::Error::new_with_errorkind($msg, error::ErrorType::ParseError, try!(pos(i.clone())).1, e); + nom::Err::Error(Code(i, nom::ErrorKind::Custom(wrapper))) } - nom::Err::Failure(e) => nom::Err::Error(e), + nom::Err::Failure(ctx) => nom::Err::Error(ctx), }; Err(context) } @@ -113,6 +115,94 @@ named!(quoted_value, match_type!(STR => str_to_value) ); +/// alt_peek conditionally runs a combinator if a lookahead combinator matches. +macro_rules! alt_peek { + (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident | $($rest:tt)* ) => ( + alt_peek!(__inner $i, $peekrule!($($peekargs)*) => call!($parserule) | $($rest)* ) + ); + + (__inner $i:expr, $peekrule:ident => $($rest:tt)* ) => ( + alt_peek!(__inner $i, call!($peekrule) => $($rest)* ) + ); + + (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident!( $($parseargs:tt)* ) | $($rest:tt)* ) => ( + { + let _i = $i.clone(); + let pre_res = peek!(_i, $peekrule!($($peekargs)*)); + match pre_res { + // if the peek was incomplete then it might still match so return incomplete. + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + // If the peek was in error then try the next peek => parse pair. + Err(nom::Err::Error(_ctx)) => { + alt_peek!(__inner $i, $($rest)*) + }, + // Failures are a hard abort. Don't keep parsing. + Err(nom::Err::Failure(ctx)) => Err(nom::Err::Failure(ctx)), + // If the peek was successful then return the result of the parserule + // regardless of it's result. + Ok((_i, _)) => { + $parserule!(_i, $($parseargs)*) + }, + } + } + ); + + // These are our no fallback termination cases. + (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident, __end ) => ( + alt_peek!(__inner $i, $peekrule!($($peekargs)*) => call!($parserule), __end ) + ); + + (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident!( $($parseargs:tt)* ), __end ) => ( + { + let _i = $i.clone(); + let pre_res = peek!(_i, $peekrule!($($peekargs)*)); + match pre_res { + // if the peek was incomplete then it might still match so return incomplete. + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + // If the peek was in error then try the next peek => parse pair. + Err(nom::Err::Error(_)) => { + alt_peek!(__inner $i, __end) + }, + Err(nom::Err::Failure(ctx)) => Err(nom::Err::Failure(ctx)), + // If the peek was successful then return the result of the parserule + // regardless of it's result. + Ok((_i, _)) => { + $parserule!(_i, $($parseargs)*) + }, + } + } + ); + + // These are our fallback termination cases. + (__inner $i:expr, $fallback:ident, __end) => ( + { + let _i = $i.clone(); + call!(_i, $fallback) + } + ); + // In the case of a fallback rule with no peek we just return whatever + // the fallback rule returns. + (__inner $i:expr, $fallback:ident!( $($args:tt)* ), __end) => ( + { + let _i = $i.clone(); + $fallback!(_i, $($args)*) + } + ); + + // This is our default termination case. + // If there is no fallback then we return an Error. + (__inner $i:expr, __end) => { + // TODO(jwall): We should do a better custom error here. + Err(nom::Err::Error(error_position!($i, nom::ErrorKind::Alt))) + }; + + // alt_peek entry_point. + ($i:expr, $($rest:tt)*) => { + // We use __end to define the termination token the recursive rule should consume. + alt_peek!(__inner $i, $($rest)*, __end) + }; +} + // Helper function to make the return types work for down below. fn triple_to_number(v: (Option, Option, Option)) -> ParseResult { let (pref, mut pref_pos) = match v.0 { @@ -161,129 +251,68 @@ fn triple_to_number(v: (Option, Option, Option)) -> ParseRe return Ok(Value::Float(value_node!(f, pref_pos))); } -/// alt_peek conditionally runs a combinator if a lookahead combinator matches. -macro_rules! alt_peek { - (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident | $($rest:tt)* ) => ( - alt_peek!(__inner $i, $peekrule!($($peekargs)*) => call!($parserule) | $($rest)* ) - ); - - (__inner $i:expr, $peekrule:ident => $($rest:tt)* ) => ( - alt_peek!(__inner $i, call!($peekrule) => $($rest)* ) - ); - - (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident!( $($parseargs:tt)* ) | $($rest:tt)* ) => ( - { - let _i = $i.clone(); - let pre_res = peek!(_i, $peekrule!($($peekargs)*)); - match pre_res { - // if the peek was incomplete then it might still match so return incomplete. - nom::IResult::Incomplete(i) => nom::IResult::Incomplete(i), - // If the peek was in error then try the next peek => parse pair. - nom::IResult::Error(_) => { - alt_peek!(__inner $i, $($rest)*) - }, - // If the peek was successful then return the result of the parserule - // regardless of it's result. - nom::IResult::Done(_i, _) => { - $parserule!(_i, $($parseargs)*) - }, - } +macro_rules! try_number { + ($ctx:expr, $res:expr) => {{ + use nom::Context::Code; + // Failures abort processing and returned immediately. + if let Err(nom::Err::Failure(ctx)) = $res { + return Err(nom::Err::Failure(ctx)); } - ); - - // These are our no fallback termination cases. - (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident, __end ) => ( - alt_peek!(__inner $i, $peekrule!($($peekargs)*) => call!($parserule), __end ) - ); - - (__inner $i:expr, $peekrule:ident!( $($peekargs:tt)* ) => $parserule:ident!( $($parseargs:tt)* ), __end ) => ( - { - let _i = $i.clone(); - let pre_res = peek!(_i, $peekrule!($($peekargs)*)); - match pre_res { - // if the peek was incomplete then it might still match so return incomplete. - nom::IResult::Incomplete(i) => nom::IResult::Incomplete(i), - // If the peek was in error then try the next peek => parse pair. - nom::IResult::Error(_) => { - alt_peek!(__inner $i, __end) - }, - // If the peek was successful then return the result of the parserule - // regardless of it's result. - nom::IResult::Done(_i, _) => { - $parserule!(_i, $($parseargs)*) - }, - } + // Successes abort processing and return immediately. + if let Ok((rest, tpl)) = $res { + return match triple_to_number(tpl) { + Ok(val) => Ok((rest, val)), + Err(e) => Err(nom::Err::Error(Code( + $ctx.clone(), + nom::ErrorKind::Custom(e), + ))), + }; } - ); - - // These are our fallback termination cases. - (__inner $i:expr, $fallback:ident, __end) => ( - { - let _i = $i.clone(); - call!(_i, $fallback) - } - ); - // In the case of a fallback rule with no peek we just return whatever - // the fallback rule returns. - (__inner $i:expr, $fallback:ident!( $($args:tt)* ), __end) => ( - { - let _i = $i.clone(); - $fallback!(_i, $($args)*) - } - ); - - // This is our default termination case. - // If there is no fallback then we return an Error. - (__inner $i:expr, __end) => { - // TODO(jwall): We should do a better custom error here. - nom::IResult::Error(error_position!(nom::ErrorKind::Alt,$i)) - }; - - // alt_peek entry_point. - ($i:expr, $($rest:tt)*) => { - // We use __end to define the termination token the recursive rule should consume. - alt_peek!(__inner $i, $($rest)*, __end) - }; + // If we get an incomplete or an error we'll try the next one. + }}; } -// trace_macros!(true); - -// NOTE(jwall): HERE THERE BE DRAGONS. The order for these matters -// alot. We need to process alternatives in order of decreasing -// specificity. Unfortunately this means we are required to go in a -// decreasing size order which messes with alt!'s completion logic. To -// work around this we have to force Incomplete to be Error so that -// alt! will try the next in the series instead of aborting. -// -// *IMPORTANT* -// It also means this combinator is risky when used with partial -// inputs. So handle with care. -named!(number, - map_res!(alt!( - complete!(do_parse!( // 1.0 - prefix: match_type!(DIGIT) >> - has_dot: punct!(".") >> - suffix: match_type!(DIGIT) >> - (Some(prefix.clone()), Some(has_dot.clone()), Some(suffix.clone())) - )) | - complete!(do_parse!( // 1. - prefix: match_type!(DIGIT) >> - has_dot: punct!(".") >> - (Some(prefix.clone()), Some(has_dot.clone()), None) - )) | - complete!(do_parse!( // .1 - has_dot: punct!(".") >> - suffix: match_type!(DIGIT) >> - (None, Some(has_dot.clone()), Some(suffix.clone())) - )) | - do_parse!( // 1 - prefix: match_type!(DIGIT) >> - (Some(prefix.clone()), None, None) - )), - triple_to_number - ) -); -// trace_macros!(false); +fn number(i: TokenIter) -> NomResult { + let full = do_parse!( + i.clone(), // 1.0 + prefix: match_type!(DIGIT) + >> has_dot: punct!(".") + >> suffix: match_type!(DIGIT) + >> ( + Some(prefix.clone()), + Some(has_dot.clone()), + Some(suffix.clone()) + ) + ); + try_number!(i, full); + let left_partial = do_parse!( + i.clone(), // 1. + prefix: match_type!(DIGIT) + >> has_dot: punct!(".") + >> (Some(prefix.clone()), Some(has_dot.clone()), None) + ); + try_number!(i, left_partial); + let right_partial = do_parse!( + i.clone(), // .1 + has_dot: punct!(".") + >> suffix: match_type!(DIGIT) + >> (None, Some(has_dot.clone()), Some(suffix.clone())) + ); + try_number!(i, right_partial); + let int_num = do_parse!( + i.clone(), // 1 + prefix: match_type!(DIGIT) >> (Some(prefix.clone()), None, None) + ); + try_number!(i, int_num); + Err(nom::Err::Error(Code( + i.clone(), + nom::ErrorKind::Custom(error::Error::new( + "Not a Number", + error::ErrorType::ParseError, + i.token_pos(), + )), + ))) +} named!(boolean_value, do_parse!( @@ -406,34 +435,44 @@ fn symbol_or_expression(input: TokenIter) -> NomResult { let scalar_head = do_parse!(input, sym: alt!(symbol | compound_value) >> (sym)); match scalar_head { - IResult::Incomplete(i) => IResult::Incomplete(i), - IResult::Error(_) => grouped_expression(input), - IResult::Done(rest, val) => { - let res = peek!(rest.clone(), punct!(".")); + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Failure(ctx)) => Err(nom::Err::Failure(ctx)), + Err(nom::Err::Error(_)) => grouped_expression(input), + Ok((rest, val)) => { + let res: NomResult = peek!(rest.clone(), punct!(".")); + // NOTE(jwall): We ignore the failure case below because it's nonsensical + // for a peek on a single character. If the above ever becomes not a single + // character then we would want to handle the Failure state below. match val { Value::Tuple(_) => { - if res.is_done() { - IResult::Done(rest, Expression::Simple(val)) + if res.is_ok() { + Ok((rest, Expression::Simple(val))) } else { - return IResult::Error(nom::ErrorKind::Custom(error::Error::new( - "Expected (.) but no dot found".to_string(), - error::ErrorType::IncompleteParsing, - val.pos().clone(), + return Err(nom::Err::Error(Code( + rest, + nom::ErrorKind::Custom(error::Error::new( + "Expected (.) but no dot found".to_string(), + error::ErrorType::IncompleteParsing, + val.pos().clone(), + )), ))); } } Value::List(_) => { - if res.is_done() { - IResult::Done(rest, Expression::Simple(val)) + if res.is_ok() { + Ok((rest, Expression::Simple(val))) } else { - return IResult::Error(nom::ErrorKind::Custom(error::Error::new( - "Expected (.) but no dot found".to_string(), - error::ErrorType::IncompleteParsing, - val.pos().clone(), + return Err(nom::Err::Error(Code( + rest, + nom::ErrorKind::Custom(error::Error::new( + "Expected (.) but no dot found".to_string(), + error::ErrorType::IncompleteParsing, + val.pos().clone(), + )), ))); } } - _ => IResult::Done(rest, Expression::Simple(val)), + _ => Ok((rest, Expression::Simple(val))), } } } @@ -441,21 +480,23 @@ fn symbol_or_expression(input: TokenIter) -> NomResult { fn selector_list(input: TokenIter) -> NomResult { let (rest, head) = match symbol_or_expression(input) { - IResult::Done(rest, val) => (rest, val), - IResult::Error(e) => { - return IResult::Error(e); + Ok((rest, val)) => (rest, val), + Err(nom::Err::Error(ctx)) => { + return Err(nom::Err::Error(ctx)); } - IResult::Incomplete(i) => { - return IResult::Incomplete(i); + Err(nom::Err::Failure(ctx)) => { + return Err(nom::Err::Failure(ctx)); + } + Err(nom::Err::Incomplete(i)) => { + return Err(nom::Err::Incomplete(i)); } }; let (rest, is_dot) = match punct!(rest, ".") { - IResult::Done(rest, tok) => (rest, Some(tok)), - IResult::Incomplete(i) => { - return IResult::Incomplete(i); - } - IResult::Error(_) => (rest, None), + Ok((rest, tok)) => (rest, Some(tok)), + Err(nom::Err::Incomplete(i)) => return Err(nom::Err::Incomplete(i)), + Err(nom::Err::Error(_)) => (rest, None), + Err(nom::Err::Failure(ctx)) => return Err(nom::Err::Failure(ctx)), }; let (rest, list) = if is_dot.is_some() { @@ -464,20 +505,26 @@ fn selector_list(input: TokenIter) -> NomResult { punct!("."), alt!(match_type!(BAREWORD) | match_type!(DIGIT) | match_type!(STR)) ) { - IResult::Done(rest, val) => (rest, val), - IResult::Incomplete(i) => { - return IResult::Incomplete(i); + Ok((rest, val)) => (rest, val), + Err(nom::Err::Incomplete(i)) => { + return Err(nom::Err::Incomplete(i)); } - IResult::Error(e) => { - return IResult::Error(e); + Err(nom::Err::Error(ctx)) => { + return Err(nom::Err::Error(ctx)); + } + Err(nom::Err::Failure(ctx)) => { + return Err(nom::Err::Failure(ctx)); } }; if list.is_empty() { - return IResult::Error(nom::ErrorKind::Custom(error::Error::new( - "(.) with no selector fields after".to_string(), - error::ErrorType::IncompleteParsing, - is_dot.unwrap().pos, + return Err(nom::Err::Error(Code( + rest, + nom::ErrorKind::Custom(error::Error::new( + "(.) with no selector fields after".to_string(), + error::ErrorType::IncompleteParsing, + is_dot.unwrap().pos, + )), ))); } else { (rest, Some(list)) @@ -491,7 +538,7 @@ fn selector_list(input: TokenIter) -> NomResult { tail: list, }; - return IResult::Done(rest, sel_list); + return Ok((rest, sel_list)); } fn tuple_to_copy(t: (SelectorDef, FieldList)) -> ParseResult { @@ -749,9 +796,10 @@ fn unprefixed_expression(input: TokenIter) -> NomResult { trace_nom!(call_expression) | trace_nom!(copy_expression) | trace_nom!(format_expression) ); match attempt { - IResult::Incomplete(i) => IResult::Incomplete(i), - IResult::Done(rest, expr) => IResult::Done(rest, expr), - IResult::Error(_) => trace_nom!(_input, simple_expression), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Failure(ctx)) => Err(nom::Err::Failure(ctx)), + Err(nom::Err::Error(_)) => trace_nom!(_input, simple_expression), + Ok((rest, expr)) => Ok((rest, expr)), } } @@ -767,9 +815,10 @@ named!(non_op_expression, fn expression(input: TokenIter) -> NomResult { let _input = input.clone(); match trace_nom!(_input, op_expression) { - IResult::Incomplete(i) => IResult::Incomplete(i), - IResult::Error(_) => trace_nom!(input, non_op_expression), - IResult::Done(rest, expr) => IResult::Done(rest, expr), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Err(nom::Err::Failure(ctx)) => Err(nom::Err::Failure(ctx)), + Err(nom::Err::Error(_)) => trace_nom!(input, non_op_expression), + Ok((rest, expr)) => Ok((rest, expr)), } } @@ -810,7 +859,6 @@ named!(let_stmt_body, named!(let_statement, wrap_err!(do_parse!( word!("let") >> - pos: pos >> stmt: trace_nom!(let_stmt_body) >> (stmt) ), "Invalid let statement") @@ -839,7 +887,6 @@ named!(import_statement, wrap_err!(do_parse!( word!("import") >> // past this point we know this is supposed to be an import statement. - pos: pos >> stmt: trace_nom!(import_stmt_body) >> (stmt) ), "Invalid import statement") @@ -848,7 +895,6 @@ named!(import_statement, named!(assert_statement, wrap_err!(do_parse!( word!("assert") >> - pos: pos >> tok: match_type!(PIPEQUOTE) >> punct!(";") >> (Statement::Assert(tok.clone())) @@ -858,7 +904,6 @@ named!(assert_statement, named!(out_statement, wrap_err!(do_parse!( word!("out") >> - pos: pos >> typ: match_type!(BAREWORD) >> expr: expression >> punct!(";") >> @@ -892,10 +937,13 @@ pub fn parse(input: LocatedSpan<&str>) -> Result, error::Error> { break; } match statement(i) { - IResult::Error(nom::ErrorKind::Custom(e)) => { + Err(nom::Err::Error(Code(_, nom::ErrorKind::Custom(e)))) => { return Err(e); } - IResult::Error(e) => { + Err(nom::Err::Failure(Code(_, nom::ErrorKind::Custom(e)))) => { + return Err(e); + } + Err(nom::Err::Error(Code(_, e))) => { return Err(error::Error::new_with_errorkind( "Statement Parse error", error::ErrorType::ParseError, @@ -906,7 +954,18 @@ pub fn parse(input: LocatedSpan<&str>) -> Result, error::Error> { e, )); } - IResult::Incomplete(ei) => { + Err(nom::Err::Failure(Code(_, e))) => { + return Err(error::Error::new_with_errorkind( + "Statement Parse error", + error::ErrorType::ParseError, + Position { + line: i_[0].pos.line, + column: i_[0].pos.column, + }, + e, + )); + } + Err(nom::Err::Incomplete(ei)) => { return Err(error::Error::new( format!("Unexpected end of parsing input: {:?}", ei), error::ErrorType::IncompleteParsing, @@ -916,7 +975,7 @@ pub fn parse(input: LocatedSpan<&str>) -> Result, error::Error> { }, )); } - IResult::Done(rest, stmt) => { + Ok((rest, stmt)) => { out.push(stmt); i_ = rest; if i_.input_len() == 0 { diff --git a/src/parse/precedence.rs b/src/parse/precedence.rs index 6d06e40..14bd137 100644 --- a/src/parse/precedence.rs +++ b/src/parse/precedence.rs @@ -16,6 +16,8 @@ //! operators. use std; +use nom; +use nom::Context::Code; use nom::{ErrorKind, IResult, InputIter, InputLength, Slice}; use super::{non_op_expression, NomResult, ParseResult}; @@ -43,60 +45,72 @@ named!(math_op_type, fn parse_expression(i: OpListIter) -> IResult { let i_ = i.clone(); if i_.input_len() == 0 { - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!("Expected Expression found End Of Input"), - error::ErrorType::IncompleteParsing, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_, + ErrorKind::Custom(error::Error::new( + format!("Expected Expression found End Of Input"), + error::ErrorType::IncompleteParsing, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } let el = &(i_[0]); if let &Element::Expr(ref expr) = el { - return IResult::Done(i.slice(1..), expr.clone()); + return Ok((i.slice(1..), expr.clone())); } - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!( - "Error while parsing Binary Expression Unexpected Operator {:?}", - el - ), - error::ErrorType::ParseError, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_.clone(), + ErrorKind::Custom(error::Error::new( + format!( + "Error while parsing Binary Expression Unexpected Operator {:?}", + el + ), + error::ErrorType::ParseError, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } fn parse_sum_operator(i: OpListIter) -> IResult { let i_ = i.clone(); if i_.input_len() == 0 { - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!("Expected Expression found End Of Input"), - error::ErrorType::IncompleteParsing, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_, + ErrorKind::Custom(error::Error::new( + format!("Expected Expression found End Of Input"), + error::ErrorType::IncompleteParsing, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } let el = &(i_[0]); if let &Element::MathOp(ref op) = el { match op { &BinaryExprType::Add => { - return IResult::Done(i.slice(1..), op.clone()); + return Ok((i.slice(1..), op.clone())); } &BinaryExprType::Sub => { - return IResult::Done(i.slice(1..), op.clone()); + return Ok((i.slice(1..), op.clone())); } _other => { // noop } }; } - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!( - "Error while parsing Binary Expression Unexpected Operator {:?}", - el - ), - error::ErrorType::ParseError, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_.clone(), + ErrorKind::Custom(error::Error::new( + format!( + "Error while parsing Binary Expression Unexpected Operator {:?}", + el + ), + error::ErrorType::ParseError, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } @@ -115,35 +129,41 @@ fn tuple_to_binary_expression( fn parse_product_operator(i: OpListIter) -> IResult { let i_ = i.clone(); if i_.input_len() == 0 { - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!("Expected Expression found End Of Input"), - error::ErrorType::IncompleteParsing, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_, + ErrorKind::Custom(error::Error::new( + format!("Expected Expression found End Of Input"), + error::ErrorType::IncompleteParsing, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } let el = &(i_[0]); if let &Element::MathOp(ref op) = el { match op { &BinaryExprType::Mul => { - return IResult::Done(i.slice(1..), op.clone()); + return Ok((i.slice(1..), op.clone())); } &BinaryExprType::Div => { - return IResult::Done(i.slice(1..), op.clone()); + return Ok((i.slice(1..), op.clone())); } _other => { // noop } }; } - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!( - "Error while parsing Binary Expression Unexpected Operator {:?}", - el - ), - error::ErrorType::ParseError, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_.clone(), + ErrorKind::Custom(error::Error::new( + format!( + "Error while parsing Binary Expression Unexpected Operator {:?}", + el + ), + error::ErrorType::ParseError, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } @@ -227,25 +247,31 @@ named!(compare_op_type, fn parse_compare_operator(i: OpListIter) -> IResult { let i_ = i.clone(); if i_.input_len() == 0 { - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!("Expected Expression found End Of Input"), - error::ErrorType::IncompleteParsing, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_, + ErrorKind::Custom(error::Error::new( + format!("Expected Expression found End Of Input"), + error::ErrorType::IncompleteParsing, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } let el = &(i_[0]); if let &Element::CompareOp(ref op) = el { - return IResult::Done(i.slice(1..), op.clone()); + return Ok((i.slice(1..), op.clone())); } - return IResult::Error(ErrorKind::Custom(error::Error::new( - format!( - "Error while parsing Binary Expression Unexpected Operator {:?}", - el - ), - error::ErrorType::ParseError, - // TODO(jwall): This position information is incorrect. - Position { line: 0, column: 0 }, + return Err(nom::Err::Error(Code( + i_.clone(), + ErrorKind::Custom(error::Error::new( + format!( + "Error while parsing Binary Expression Unexpected Operator {:?}", + el + ), + error::ErrorType::ParseError, + // TODO(jwall): This position information is incorrect. + Position { line: 0, column: 0 }, + )), ))); } @@ -271,42 +297,50 @@ fn parse_operand_list(i: TokenIter) -> NomResult> { loop { // 2. Parse a non_op_expression. match non_op_expression(_i.clone()) { - IResult::Error(e) => { + Err(nom::Err::Error(ctx)) => { // A failure to parse an expression // is always an error. - return IResult::Error(e); + return Err(nom::Err::Error(ctx)); } - IResult::Incomplete(i) => { - return IResult::Incomplete(i); + Err(nom::Err::Failure(ctx)) => { + // A failure to parse an expression + // is always an error. + return Err(nom::Err::Failure(ctx)); } - IResult::Done(rest, expr) => { + Err(nom::Err::Incomplete(i)) => { + return Err(nom::Err::Incomplete(i)); + } + Ok((rest, expr)) => { list.push(Element::Expr(expr)); _i = rest.clone(); } } // 3. Parse an operator. - match alt!(_i, math_op_type | compare_op_type) { - IResult::Error(e) => { + match alt!(_i.clone(), math_op_type | compare_op_type) { + Err(nom::Err::Error(ctx)) => { if firstrun { // If we don't find an operator in our first // run then this is not an operand list. - return IResult::Error(e); + return Err(nom::Err::Error(ctx)); } // if we don't find one on subsequent runs then // that's the end of the operand list. break; } - IResult::Incomplete(i) => { - return IResult::Incomplete(i); + Err(nom::Err::Failure(ctx)) => { + return Err(nom::Err::Failure(ctx)); } - IResult::Done(rest, el) => { + Err(nom::Err::Incomplete(i)) => { + return Err(nom::Err::Incomplete(i)); + } + Ok((rest, el)) => { list.push(el); _i = rest.clone(); } } firstrun = false; } - return IResult::Done(_i, list); + return Ok((_i, list)); } #[derive(Clone, Debug, PartialEq)] @@ -397,9 +431,10 @@ impl<'a> InputIter for OpListIter<'a> { pub fn op_expression(i: TokenIter) -> NomResult { let preparse = parse_operand_list(i.clone()); match preparse { - IResult::Error(e) => IResult::Error(e), - IResult::Incomplete(i) => IResult::Incomplete(i), - IResult::Done(rest, oplist) => { + Err(nom::Err::Error(ctx)) => Err(nom::Err::Error(ctx)), + Err(nom::Err::Failure(ctx)) => Err(nom::Err::Failure(ctx)), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Ok((rest, oplist)) => { let mut i_ = OpListIter { source: oplist.as_slice(), }; @@ -410,9 +445,10 @@ pub fn op_expression(i: TokenIter) -> NomResult { ); match parse_result { - IResult::Error(e) => IResult::Error(e), - IResult::Incomplete(i) => IResult::Incomplete(i), - IResult::Done(_, expr) => IResult::Done(rest.clone(), expr), + Err(nom::Err::Error(Code(_, e))) => Err(nom::Err::Error(Code(rest.clone(), e))), + Err(nom::Err::Failure(Code(_, e))) => Err(nom::Err::Failure(Code(rest.clone(), e))), + Err(nom::Err::Incomplete(i)) => Err(nom::Err::Incomplete(i)), + Ok((_, expr)) => Ok((rest.clone(), expr)), } } } diff --git a/src/parse/test.rs b/src/parse/test.rs index b1520eb..b644472 100644 --- a/src/parse/test.rs +++ b/src/parse/test.rs @@ -14,7 +14,6 @@ use super::*; use tokenizer::{tokenize, TokenIter}; -use nom::IResult; use nom_locate::LocatedSpan; macro_rules! assert_parse { @@ -28,8 +27,8 @@ macro_rules! assert_parse { Ok(val) => match $f(TokenIter { source: val.as_slice(), }) { - IResult::Done(_, result) => assert_eq!(result, $out), - other => assert!(false, format!("Expected Done got {:?}", other)), + Ok((_, result)) => assert_eq!(result, $out), + other => assert!(false, format!("Expected Ok got {:?}", other)), }, } };}; diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 9ee360a..559948b 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -16,7 +16,7 @@ use ast::*; use error; use nom; -use nom::{alpha, digit, is_alphanumeric, multispace}; +use nom::{alpha, digit, is_alphanumeric}; use nom::{InputIter, InputLength, Slice}; use nom_locate::LocatedSpan; use std; @@ -49,14 +49,14 @@ fn escapequoted(input: Span) -> nom::IResult { } else if c == '"' && !escape { // Bail if this is an unescaped " // we exit here. - return nom::IResult::Done(input.slice(i..), frag); + return Ok((input.slice(i..), frag)); } else { // we accumulate this character. frag.push(c); escape = false; // reset our escaping sentinel } } - return nom::IResult::Incomplete(nom::Needed::Unknown); + return Err(nom::Err::Incomplete(nom::Needed::Unknown)); } named!(strtok( Span ) -> Token, @@ -136,21 +136,26 @@ macro_rules! do_tag_tok { ($i:expr, $type:expr, $tag:expr,WS) => { do_parse!( $i, - span: position!() >> frag: tag!($tag) >> alt!(whitespace | comment) >> (Token { - typ: $type, - pos: Position::from(span), - fragment: frag.fragment.to_string(), - }) + span: position!() + >> frag: tag!($tag) + >> alt!(whitespace | comment) + >> (Token { + typ: $type, + pos: Position::from(span), + fragment: frag.fragment.to_string(), + }) ) }; ($i:expr, $type:expr, $tag:expr) => { do_parse!( $i, - span: position!() >> frag: tag!($tag) >> (Token { - typ: $type, - pos: Position::from(span), - fragment: frag.fragment.to_string(), - }) + span: position!() + >> frag: tag!($tag) + >> (Token { + typ: $type, + pos: Position::from(span), + fragment: frag.fragment.to_string(), + }) ) }; } @@ -285,60 +290,59 @@ named!(filtertok( Span ) -> Token, fn end_of_input(input: Span) -> nom::IResult { match eof!(input,) { - nom::IResult::Done(_, _) => { - return nom::IResult::Done( + Ok((_, _)) => { + return Ok(( input, make_tok!(EOF => input.line as usize, input.get_column() as usize), - ); - } - nom::IResult::Incomplete(_) => { - return nom::IResult::Incomplete(nom::Needed::Unknown); - } - nom::IResult::Error(e) => { - return nom::IResult::Error(e); + )); } + Err(e) => Err(e), } } fn comment(input: Span) -> nom::IResult { match tag!(input, "//") { - nom::IResult::Done(rest, _) => { + Ok((rest, _)) => { match alt!( rest, take_until_and_consume!("\r\n") | take_until_and_consume!("\n") ) { - nom::IResult::Done(rest, cmt) => { - return nom::IResult::Done( + Ok((rest, cmt)) => { + return Ok(( rest, make_tok!(CMT => cmt.fragment.to_string(), input.line as usize, input.get_column() as usize), - ); + )); } // If we didn't find a new line then we just grab everything. _ => { let blen = rest.input_len(); let next = rest.slice(blen..); let tok = rest.slice(..blen); - return nom::IResult::Done( + return Ok(( next, make_tok!(CMT => tok.fragment.to_string(), input.line as usize, input.get_column() as usize ), - ); + )); } } } - nom::IResult::Incomplete(i) => return nom::IResult::Incomplete(i), - nom::IResult::Error(e) => return nom::IResult::Error(e), + Err(e) => Err(e), } } +pub fn is_ws(chr: char) -> bool { + chr.is_whitespace() +} + +// TODO(jwall): take_while and many don't work well with the end of input. named!(whitespace( Span ) -> Token, do_parse!( span: position!() >> - many1!(multispace) >> + take_while!(is_ws) >> (Token{ typ: TokenType::WS, pos: Position::from(span), @@ -351,7 +355,6 @@ named!(token( Span ) -> Token, alt!( strtok | pipequotetok | - emptytok | // This must come before the barewordtok digittok | commatok | rbracetok | @@ -376,6 +379,7 @@ named!(token( Span ) -> Token, semicolontok | leftsquarebracket | rightsquarebracket | + emptytok | // This must come before the barewordtok booleantok | lettok | outtok | @@ -400,7 +404,7 @@ pub fn tokenize(input: Span) -> Result, error::Error> { break; } match token(i) { - nom::IResult::Error(_e) => { + Err(nom::Err::Error(_e)) => { return Err(error::Error::new( "Invalid Token encountered", error::ErrorType::UnexpectedToken, @@ -410,7 +414,17 @@ pub fn tokenize(input: Span) -> Result, error::Error> { }, )); } - nom::IResult::Incomplete(_) => { + Err(nom::Err::Failure(_ctx)) => { + return Err(error::Error::new( + "Invalid Token encountered", + error::ErrorType::UnexpectedToken, + Position { + line: i.line as usize, + column: i.get_column() as usize, + }, + )); + } + Err(nom::Err::Incomplete(_)) => { return Err(error::Error::new( "Unexepcted end of Input", error::ErrorType::UnexpectedToken, @@ -420,7 +434,7 @@ pub fn tokenize(input: Span) -> Result, error::Error> { }, )); } - nom::IResult::Done(rest, tok) => { + Ok((rest, tok)) => { i = rest; if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS { // we skip comments and whitespace @@ -518,26 +532,36 @@ macro_rules! match_type { ($i:expr, $t:expr, $msg:expr, $h:expr) => {{ let i_ = $i.clone(); + use nom::Context::Code; use nom::Slice; use std::convert::Into; if i_.input_len() == 0 { - nom::IResult::Error(nom::ErrorKind::Custom(error::Error::new( - format!("End of Input! {}", $msg), - error::ErrorType::IncompleteParsing, - Position { line: 0, column: 0 }, + Err(nom::Err::Error(Code( + i_, + nom::ErrorKind::Custom(error::Error::new( + format!("End of Input! {}", $msg), + error::ErrorType::IncompleteParsing, + Position { line: 0, column: 0 }, + )), ))) } else { let tok = &(i_[0]); if tok.typ == $t { match $h(tok) { - Result::Ok(v) => nom::IResult::Done($i.slice(1..), v), - Result::Err(e) => nom::IResult::Error(nom::ErrorKind::Custom(e.into())), + Result::Ok(v) => Result::Ok(($i.slice(1..), v)), + Result::Err(e) => Err(nom::Err::Error(Code( + i_.clone(), + nom::ErrorKind::Custom(e.into()), + ))), } } else { - nom::IResult::Error(nom::ErrorKind::Custom(error::Error::new( - $msg.to_string(), - error::ErrorType::UnexpectedToken, - tok.pos.clone(), + Err(nom::Err::Error(Code( + i_.clone(), + nom::ErrorKind::Custom(error::Error::new( + $msg.to_string(), + error::ErrorType::UnexpectedToken, + tok.pos.clone(), + )), ))) } } @@ -574,19 +598,26 @@ macro_rules! match_token { ($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{ let i_ = $i.clone(); use nom; + use nom::Context::Code; use nom::Slice; use std::convert::Into; let tok = &(i_[0]); if tok.typ == $t && &tok.fragment == $f { match $h(tok) { - Result::Ok(v) => nom::IResult::Done($i.slice(1..), v), - Result::Err(e) => nom::IResult::Error(nom::ErrorKind::Custom(e.into())), + Result::Ok(v) => Ok(($i.slice(1..), v)), + Result::Err(e) => Result::Err(nom::Err::Error(Code( + i_.clone(), + nom::ErrorKind::Custom(e.into()), + ))), } } else { - nom::IResult::Error(nom::ErrorKind::Custom(error::Error::new( - format!("{} Instead is ({})", $msg, tok.fragment), - error::ErrorType::UnexpectedToken, - tok.pos.clone(), + Err(nom::Err::Error(Code( + i_.clone(), + nom::ErrorKind::Custom(error::Error::new( + format!("{} Instead is ({})", $msg, tok.fragment), + error::ErrorType::UnexpectedToken, + tok.pos.clone(), + )), ))) } }}; @@ -611,13 +642,13 @@ pub fn pos(i: TokenIter) -> nom::IResult { let tok = &i[0]; let line = tok.pos.line; let column = tok.pos.column; - nom::IResult::Done( + Ok(( i.clone(), Position { line: line, column: column, }, - ) + )) } /// TokenIter wraps a slice of Tokens and implements the various necessary @@ -631,6 +662,14 @@ impl<'a> TokenIter<'a> { pub fn len(&self) -> usize { self.source.len() } + + pub fn token_pos(&self) -> Position { + let curr = &self.source[0]; + Position { + line: curr.pos.line, + column: curr.pos.column, + } + } } impl<'a> nom::InputLength for TokenIter<'a> { diff --git a/src/tokenizer/test.rs b/src/tokenizer/test.rs index 5f1dec2..200d27b 100644 --- a/src/tokenizer/test.rs +++ b/src/tokenizer/test.rs @@ -2,11 +2,18 @@ use super::*; use nom; use nom_locate::LocatedSpan; +#[test] +fn test_whitespace() { + //let result = whitespace(LocatedSpan::new(" \n\t")); + let result = take_while!(LocatedSpan::new(" f"), is_ws); + assert!(result.is_ok(), format!("result {:?} is not ok", result)); +} + #[test] fn test_empty_token() { let result = emptytok(LocatedSpan::new("NULL ")); - assert!(result.is_done(), format!("result {:?} is not done", result)); - if let nom::IResult::Done(_, tok) = result { + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { assert_eq!(tok.fragment, "NULL"); assert_eq!(tok.typ, TokenType::EMPTY); } @@ -15,18 +22,38 @@ fn test_empty_token() { #[test] fn test_assert_token() { let result = asserttok(LocatedSpan::new("assert ")); - assert!(result.is_done(), format!("result {:?} is not done", result)); - if let nom::IResult::Done(_, tok) = result { + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { assert_eq!(tok.fragment, "assert"); assert_eq!(tok.typ, TokenType::BAREWORD); } } +#[test] +fn test_let_token() { + let result = lettok(LocatedSpan::new("let ")); + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { + assert_eq!(tok.fragment, "let"); + assert_eq!(tok.typ, TokenType::BAREWORD); + } +} + +#[test] +fn test_filter_token() { + let result = filtertok(LocatedSpan::new("filter ")); + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { + assert_eq!(tok.fragment, "filter"); + assert_eq!(tok.typ, TokenType::BAREWORD); + } +} + #[test] fn test_out_token() { let result = outtok(LocatedSpan::new("out ")); - assert!(result.is_done(), format!("result {:?} is not done", result)); - if let nom::IResult::Done(_, tok) = result { + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { assert_eq!(tok.fragment, "out"); assert_eq!(tok.typ, TokenType::BAREWORD); } @@ -35,8 +62,8 @@ fn test_out_token() { #[test] fn test_escape_quoted() { let result = escapequoted(LocatedSpan::new("foo \\\"bar\"")); - assert!(result.is_done(), format!("result {:?} is not ok", result)); - if let nom::IResult::Done(rest, frag) = result { + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((rest, frag)) = result { assert_eq!(frag, "foo \"bar"); assert_eq!(rest.fragment, "\""); } @@ -45,8 +72,8 @@ fn test_escape_quoted() { #[test] fn test_pipe_quoted() { let result = pipequotetok(LocatedSpan::new("|foo|")); - assert!(result.is_done(), format!("result {:?} is not ok", result)); - if let nom::IResult::Done(_, tok) = result { + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { assert_eq!(tok.fragment, "foo".to_string()); assert_eq!(tok.typ, TokenType::PIPEQUOTE); } @@ -55,8 +82,8 @@ fn test_pipe_quoted() { #[test] fn test_string_with_escaping() { let result = strtok(LocatedSpan::new("\"foo \\\\ \\\"bar\"")); - assert!(result.is_done(), format!("result {:?} is not ok", result)); - if let nom::IResult::Done(_, tok) = result { + assert!(result.is_ok(), format!("result {:?} is not ok", result)); + if let Ok((_, tok)) = result { assert_eq!(tok.fragment, "foo \\ \"bar".to_string()); } } @@ -75,10 +102,10 @@ macro_rules! assert_token { ($input:expr, $typ:expr, $msg:expr) => { let result = token(LocatedSpan::new($input)); assert!( - result.is_done(), + result.is_ok(), format!("result {:?} is not a {}", result, $msg) ); - if let nom::IResult::Done(_, tok) = result { + if let Ok((_, tok)) = result { assert_eq!(tok.fragment, $input); assert_eq!(tok.typ, $typ); } @@ -146,11 +173,11 @@ fn test_parse_has_end() { #[test] fn test_parse_comment() { - assert!(comment(LocatedSpan::new("// comment\n")).is_done()); - assert!(comment(LocatedSpan::new("// comment")).is_done()); + assert!(comment(LocatedSpan::new("// comment\n")).is_ok()); + assert!(comment(LocatedSpan::new("// comment")).is_ok()); assert_eq!( comment(LocatedSpan::new("// comment\n")), - nom::IResult::Done( + Ok(( LocatedSpan { fragment: "", offset: 11, @@ -161,12 +188,12 @@ fn test_parse_comment() { fragment: " comment".to_string(), pos: Position { line: 1, column: 1 }, } - ) + )) ); - assert!(comment(LocatedSpan::new("// comment\r\n")).is_done()); + assert!(comment(LocatedSpan::new("// comment\r\n")).is_ok()); assert_eq!( comment(LocatedSpan::new("// comment\r\n")), - nom::IResult::Done( + Ok(( LocatedSpan { fragment: "", offset: 12, @@ -177,12 +204,12 @@ fn test_parse_comment() { fragment: " comment".to_string(), pos: Position { column: 1, line: 1 }, } - ) + )) ); - assert!(comment(LocatedSpan::new("// comment\r\n ")).is_done()); + assert!(comment(LocatedSpan::new("// comment\r\n ")).is_ok()); assert_eq!( comment(LocatedSpan::new("// comment\r\n ")), - nom::IResult::Done( + Ok(( LocatedSpan { fragment: " ", offset: 12, @@ -193,9 +220,9 @@ fn test_parse_comment() { fragment: " comment".to_string(), pos: Position { column: 1, line: 1 }, } - ) + )) ); - assert!(comment(LocatedSpan::new("// comment")).is_done()); + assert!(comment(LocatedSpan::new("// comment")).is_ok()); } #[test] @@ -212,7 +239,7 @@ fn test_match_word() { "foo" ); match result { - nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]), + Ok((_, tok)) => assert_eq!(tok, input[0]), res => assert!(false, format!("Fail: {:?}", res)), } } @@ -231,11 +258,12 @@ fn test_match_word_empty_input() { "foo" ); match result { - nom::IResult::Done(_, _) => assert!(false, "Should have been an error but was Done"), - nom::IResult::Incomplete(_) => { + Ok((_, _)) => assert!(false, "Should have been an error but was Done"), + Err(nom::Err::Incomplete(_)) => { assert!(false, "Should have been an error but was Incomplete") } - nom::IResult::Error(_) => { + Err(nom::Err::Failure(_)) => assert!(false, "Should have been an error but was Failure"), + Err(nom::Err::Error(_)) => { // noop } } @@ -255,7 +283,7 @@ fn test_match_punct() { "!" ); match result { - nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]), + Ok((_, tok)) => assert_eq!(tok, input[0]), res => assert!(false, format!("Fail: {:?}", res)), } } @@ -274,7 +302,7 @@ fn test_match_type() { BAREWORD ); match result { - nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]), + Ok((_, tok)) => assert_eq!(tok, input[0]), res => assert!(false, format!("Fail: {:?}", res)), } }