// Copyright 2017 Jeremy Wall // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //! The Parsing stage of the ucg compiler. use std::borrow::Borrow; use std::str::FromStr; use nom; use nom::IResult; use nom::InputLength; use nom_locate::LocatedSpan; use ast::*; use error; use tokenizer::*; type NomResult<'a, O> = nom::IResult, O, error::Error>; type ParseResult = Result; fn symbol_to_value(s: &Token) -> ParseResult { Ok(Value::Symbol(value_node!( s.fragment.to_string(), s.pos.clone() ))) } // symbol is a bare unquoted field. named!(symbol, match_type!(BAREWORD => symbol_to_value) ); fn str_to_value(s: &Token) -> ParseResult { Ok(Value::String(value_node!( s.fragment.to_string(), s.pos.clone() ))) } // quoted_value is a quoted string. named!(quoted_value, match_type!(STR => str_to_value) ); // Helper function to make the return types work for down below. fn triple_to_number(v: (Option, Option, Option)) -> ParseResult { let (pref, mut pref_pos) = match v.0 { None => ("", Position::new(0, 0)), Some(ref bs) => (bs.fragment.borrow(), bs.pos.clone()), }; let has_dot = v.1.is_some(); if v.0.is_some() && !has_dot && v.2.is_none() { let i = match FromStr::from_str(pref) { Ok(i) => i, Err(_) => { return Err(error::Error::new( format!("Not an integer! {}", pref), error::ErrorType::UnexpectedToken, pref_pos, )) } }; return Ok(Value::Int(value_node!(i, pref_pos))); } if v.0.is_none() && has_dot { pref_pos = v.1.unwrap().pos; } let (maybepos, suf) = match v.2 { None => (None, "".to_string()), Some(bs) => (Some(bs.pos), bs.fragment), }; let to_parse = pref.to_string() + "." + &suf; // TODO(jwall): if there is an error we should report where that error occured. let f = match FromStr::from_str(&to_parse) { Ok(f) => f, Err(_) => { return Err(error::Error::new( format!("Not a float! {}", to_parse), error::ErrorType::UnexpectedToken, maybepos.unwrap(), )) } }; return Ok(Value::Float(value_node!(f, pref_pos))); } // trace_macros!(true); // NOTE(jwall): HERE THERE BE DRAGONS. The order for these matters // alot. We need to process alternatives in order of decreasing // specificity. Unfortunately this means we are required to go in a // decreasing size order which messes with alt!'s completion logic. To // work around this we have to force Incomplete to be Error so that // alt! will try the next in the series instead of aborting. // // *IMPORTANT* // It also means this combinator is risky when used with partial // inputs. So handle with care. named!(number, map_res!(alt!( complete!(do_parse!( // 1.0 prefix: match_type!(DIGIT) >> has_dot: punct!(".") >> suffix: match_type!(DIGIT) >> (Some(prefix.clone()), Some(has_dot.clone()), Some(suffix.clone())) )) | complete!(do_parse!( // 1. prefix: match_type!(DIGIT) >> has_dot: punct!(".") >> (Some(prefix.clone()), Some(has_dot.clone()), None) )) | complete!(do_parse!( // .1 has_dot: punct!(".") >> suffix: match_type!(DIGIT) >> (None, Some(has_dot.clone()), Some(suffix.clone())) )) | do_parse!( // 1 prefix: match_type!(DIGIT) >> (Some(prefix.clone()), None, None) )), triple_to_number ) ); // trace_macros!(false); named!(boolean_value, do_parse!( b: match_type!(BOOLEAN) >> (Value::Boolean(Positioned{ val: b.fragment == "true", pos: b.pos, })) ) ); named!( field_value, do_parse!( field: match_type!(BAREWORD) >> punct!("=") >> value: expression >> (field, value) ) ); // Helper function to make the return types work for down below. fn vec_to_tuple(t: (Position, Option)) -> ParseResult { Ok(Value::Tuple(value_node!( t.1.unwrap_or(Vec::new()), t.0.line as usize, t.0.column as usize ))) } named!(field_list, separated_list!(punct!(","), field_value) ); named!( #[doc="Capture a tuple of named fields with values. {=,...}"], tuple, map_res!( do_parse!( pos: pos >> punct!("{") >> v: field_list >> opt_res!(punct!(",")) >> // nom's opt! macro doesn't preserve error types properly but this one does. punct!("}") >> (pos, Some(v)) ), vec_to_tuple ) ); fn tuple_to_list>(t: (Sp, Vec)) -> ParseResult { return Ok(Value::List(ListDef { elems: t.1, pos: t.0.into(), })); } named!(list_value, map_res!( do_parse!( start: punct!("[") >> elements: separated_list!(punct!(","), expression) >> opt_res!(punct!(",")) >> // nom's opt! macro doesn't preserve error types properly but this one does. punct!("]") >> (start.pos, elements) ), tuple_to_list ) ); named!(empty_value, do_parse!( pos: pos >> match_type!(EMPTY) >> (Value::Empty(pos)) ) ); named!(compound_value, alt!(list_value | tuple) ); named!(scalar_value, alt!( boolean_value | empty_value | number | quoted_value ) ); named!(value, alt!( selector_value | compound_value | scalar_value ) ); fn value_to_expression(v: Value) -> ParseResult { Ok(Expression::Simple(v)) } named!(simple_expression, map_res!( value, value_to_expression ) ); fn tuple_to_binary_expression( tpl: (Position, BinaryExprType, Expression, Expression), ) -> ParseResult { Ok(Expression::Binary(BinaryOpDef { kind: tpl.1, left: Box::new(tpl.2), right: Box::new(tpl.3), pos: Position::new(tpl.0.line as usize, tpl.0.column as usize), })) } /// do_binary_expr implements precedence based parsing where the more tightly bound parsers /// are passed in as lowerrule parsers. We default to grouped_expression and simple_expression as /// the most tightly bound expressions. macro_rules! do_binary_expr { ($i:expr, $oprule:ident!( $($args:tt)* ), $typ:expr) => { do_binary_expr!($i, $oprule!($($args)*), $typ, non_op_expression) }; ($i:expr, $oprule:ident!( $($args:tt)* ), $typ:expr, $lowerrule:ident) => { do_binary_expr!($i, $oprule!($($args)*), $typ, call!($lowerrule)) }; ($i:expr, $oprule:ident!( $($args:tt)* ), $typ:expr, $lowerrule:ident!( $($lowerargs:tt)* )) => { map_res!($i, do_parse!( pos: pos >> left: $lowerrule!($($lowerargs)*) >> $oprule!($($args)*) >> right: $lowerrule!($($lowerargs)*) >> (pos, $typ, left, right) ), tuple_to_binary_expression ) }; } // trace_macros!(true); named!(add_expression, do_binary_expr!(punct!("+"), BinaryExprType::Add, alt!(product_expression | simple_expression | grouped_expression)) ); // trace_macros!(false); named!(sub_expression, do_binary_expr!(punct!("-"), BinaryExprType::Sub, alt!(product_expression | simple_expression | grouped_expression)) ); named!(sum_expression, alt!(add_expression | sub_expression) ); named!(mul_expression, do_binary_expr!(punct!("*"), BinaryExprType::Mul) ); named!(div_expression, do_binary_expr!(punct!("/"), BinaryExprType::Div) ); named!(product_expression, alt!(mul_expression | div_expression) ); named!(math_expression, alt!(sum_expression | product_expression) ); // TODO(jwall): Change comparison operators to use the do_binary_expr! with precedence? fn tuple_to_compare_expression( tpl: (Position, CompareType, Expression, Expression), ) -> ParseResult { Ok(Expression::Compare(ComparisonDef { kind: tpl.1, left: Box::new(tpl.2), right: Box::new(tpl.3), pos: Position::new(tpl.0.line as usize, tpl.0.column as usize), })) } macro_rules! do_compare_expr { ($i:expr, $subrule:ident!( $($args:tt)* ), $typ:expr) => { map_res!($i, do_parse!( pos: pos >> left: alt!(simple_expression | grouped_expression | math_expression) >> $subrule!($($args)*) >> right: expression >> (pos, $typ, left, right) ), tuple_to_compare_expression ) }; } named!(eqeq_expression, do_compare_expr!(punct!("=="), CompareType::Equal) ); named!(not_eqeq_expression, do_compare_expr!(punct!("!="), CompareType::NotEqual) ); named!(lt_eqeq_expression, do_compare_expr!(punct!("<="), CompareType::LTEqual) ); named!(gt_eqeq_expression, do_compare_expr!(punct!(">="), CompareType::GTEqual) ); named!(gt_expression, do_compare_expr!(punct!(">"), CompareType::GT) ); named!(lt_expression, do_compare_expr!(punct!("<"), CompareType::LT) ); named!(compare_expression, alt!( eqeq_expression | not_eqeq_expression | lt_eqeq_expression | gt_eqeq_expression | lt_expression | gt_expression) ); named!(op_expression, alt!(math_expression | compare_expression) ); fn expression_to_grouped_expression(e: Expression) -> ParseResult { Ok(Expression::Grouped(Box::new(e))) } named!(grouped_expression, map_res!( preceded!(punct!("("), terminated!(expression, punct!(")"))), expression_to_grouped_expression ) ); fn symbol_or_expression(input: TokenIter) -> NomResult { let scalar_head = do_parse!(input, sym: alt!(symbol | compound_value) >> (sym)); match scalar_head { IResult::Incomplete(i) => IResult::Incomplete(i), IResult::Error(_) => grouped_expression(input), IResult::Done(rest, val) => { let res = peek!(rest.clone(), punct!(".")); match val { Value::Tuple(_) => { if res.is_done() { IResult::Done(rest, Expression::Simple(val)) } else { return IResult::Error(nom::ErrorKind::Custom(error::Error::new( "Expected (.) but no dot found".to_string(), error::ErrorType::IncompleteParsing, val.pos().clone(), ))); } } Value::List(_) => { if res.is_done() { IResult::Done(rest, Expression::Simple(val)) } else { return IResult::Error(nom::ErrorKind::Custom(error::Error::new( "Expected (.) but no dot found".to_string(), error::ErrorType::IncompleteParsing, val.pos().clone(), ))); } } _ => IResult::Done(rest, Expression::Simple(val)), } } } } fn selector_list(input: TokenIter) -> NomResult { let (rest, head) = match symbol_or_expression(input) { IResult::Done(rest, val) => (rest, val), IResult::Error(e) => { return IResult::Error(e); } IResult::Incomplete(i) => { return IResult::Incomplete(i); } }; let (rest, is_dot) = match punct!(rest, ".") { IResult::Done(rest, tok) => (rest, Some(tok)), IResult::Incomplete(i) => { return IResult::Incomplete(i); } IResult::Error(_) => (rest, None), }; let (rest, list) = if is_dot.is_some() { let (rest, list) = match separated_list!( rest, punct!("."), alt!(match_type!(BAREWORD) | match_type!(DIGIT)) ) { IResult::Done(rest, val) => (rest, val), IResult::Incomplete(i) => { return IResult::Incomplete(i); } IResult::Error(e) => { return IResult::Error(e); } }; if list.is_empty() { return IResult::Error(nom::ErrorKind::Custom(error::Error::new( "(.) with no selector fields after".to_string(), error::ErrorType::IncompleteParsing, is_dot.unwrap().pos, ))); } else { (rest, Some(list)) } } else { (rest, None) }; let sel_list = SelectorList { head: Box::new(head), tail: list, }; return IResult::Done(rest, sel_list); } fn tuple_to_copy(t: (SelectorDef, FieldList)) -> ParseResult { let pos = t.0.pos.clone(); Ok(Expression::Copy(CopyDef { selector: t.0, fields: t.1, pos: pos, })) } named!(copy_expression, map_res!( do_parse!( pos: pos >> selector: selector_list >> punct!("{") >> fields: field_list >> opt_res!(punct!(",")) >> // noms opt! macro does not preserve error types properly but this one does. punct!("}") >> (SelectorDef::new(selector, pos.line, pos.column as usize), fields) ), tuple_to_copy ) ); fn tuple_to_macro(mut t: (Position, Vec, Value)) -> ParseResult { match t.2 { Value::Tuple(v) => Ok(Expression::Macro(MacroDef { argdefs: t.1 .drain(0..) .map(|s| Positioned { pos: s.pos().clone(), val: s.to_string(), }) .collect(), fields: v.val, pos: t.0, })), val => Err(error::Error::new( format!("Expected Tuple Got {:?}", val), error::ErrorType::UnexpectedToken, t.0, )), } } named!(arglist, error::Error>, separated_list!(punct!(","), symbol)); named!(macro_expression, map_res!( do_parse!( pos: pos >> word!("macro") >> punct!("(") >> arglist: arglist >> punct!(")") >> punct!("=>") >> map: tuple >> (pos, arglist, map) ), tuple_to_macro ) ); fn tuple_to_select(t: (Position, Expression, Expression, Value)) -> ParseResult { match t.3 { Value::Tuple(v) => Ok(Expression::Select(SelectDef { val: Box::new(t.1), default: Box::new(t.2), tuple: v.val, pos: t.0, })), val => Err(error::Error::new( format!("Expected Tuple Got {:?}", val), error::ErrorType::UnexpectedToken, t.0, )), } } named!(select_expression, map_res!( do_parse!( start: word!("select") >> val: terminated!(expression, punct!(",")) >> default: terminated!(expression, punct!(",")) >> map: tuple >> (start.pos.clone(), val, default, map) ), tuple_to_select ) ); fn tuple_to_format(t: (Token, Vec)) -> ParseResult { Ok(Expression::Format(FormatDef { template: t.0.fragment.to_string(), args: t.1, pos: t.0.pos, })) } named!(format_expression, map_res!( do_parse!( tmpl: match_type!(STR) >> punct!("%") >> punct!("(") >> args: separated_list!(punct!(","), expression) >> punct!(")") >> (tmpl, args) ), tuple_to_format ) ); fn tuple_to_call(t: (Position, Value, Vec)) -> ParseResult { if let Value::Selector(def) = t.1 { Ok(Expression::Call(CallDef { macroref: def, arglist: t.2, pos: Position::new(t.0.line as usize, t.0.column as usize), })) } else { Err(error::Error::new( format!("Expected Selector Got {:?}", t.0), error::ErrorType::UnexpectedToken, Position::new(t.0.line as usize, t.0.column as usize), )) } } fn vec_to_selector_value(t: (Position, SelectorList)) -> ParseResult { Ok(Value::Selector(SelectorDef::new( t.1, t.0.line as usize, t.0.column as usize, ))) } named!(selector_value, map_res!( do_parse!( sl: selector_list >> (sl.head.pos().clone(), sl) ), vec_to_selector_value ) ); named!(call_expression, map_res!( do_parse!( macroname: selector_value >> punct!("(") >> args: separated_list!(punct!(","), expression) >> punct!(")") >> (macroname.pos().clone(), macroname, args) ), tuple_to_call ) ); fn symbol_or_list(input: TokenIter) -> NomResult { let sym = do_parse!(input, sym: symbol >> (sym)); match sym { IResult::Incomplete(i) => { return IResult::Incomplete(i); } IResult::Error(_) => match list_value(input) { IResult::Incomplete(i) => { return IResult::Incomplete(i); } IResult::Error(e) => { return IResult::Error(e); } IResult::Done(i, val) => { return IResult::Done(i, val); } }, IResult::Done(rest, val) => { return IResult::Done(rest, val); } } } fn tuple_to_list_op(tpl: (Position, Token, Value, Value)) -> ParseResult { let pos = tpl.0; let t = if &tpl.1.fragment == "map" { ListOpType::Map } else if &tpl.1.fragment == "filter" { ListOpType::Filter } else { return Err(error::Error::new( format!( "Expected one of 'map' or 'filter' but got '{}'", tpl.1.fragment ), error::ErrorType::UnexpectedToken, pos, )); }; let macroname = tpl.2; let list = tpl.3; if let Value::Selector(mut def) = macroname { // First of all we need to assert that this is a selector of at least // two sections. let fieldname: String = match &mut def.sel.tail { &mut None => { return Err(error::Error::new( format!("Missing a result field for the macro"), error::ErrorType::IncompleteParsing, pos, )); } &mut Some(ref mut tl) => { if tl.len() < 1 { return Err(error::Error::new( format!("Missing a result field for the macro"), error::ErrorType::IncompleteParsing, def.pos.clone(), )); } let fname = tl.pop(); fname.unwrap().fragment } }; if let Value::List(ldef) = list { return Ok(Expression::ListOp(ListOpDef { typ: t, mac: def, field: fieldname, target: ldef, pos: pos, })); } return Err(error::Error::new( format!("Expected a list but got {}", list.type_name()), error::ErrorType::UnexpectedToken, pos, )); } return Err(error::Error::new( format!("Expected a selector but got {}", macroname.type_name()), error::ErrorType::UnexpectedToken, pos, )); } named!(list_op_expression, map_res!( do_parse!( pos: pos >> optype: alt!(word!("map") | word!("filter")) >> macroname: selector_value >> list: symbol_or_list >> (pos, optype, macroname, list) ), tuple_to_list_op ) ); named!(non_op_expression, alt!(list_op_expression | macro_expression | format_expression | select_expression | call_expression | copy_expression | grouped_expression | simple_expression) ); named!(expression, alt!(complete!(op_expression) | complete!(non_op_expression)) ); fn expression_to_statement(v: Expression) -> ParseResult { Ok(Statement::Expression(v)) } named!(expression_statement, map_res!( terminated!(expression, punct!(";")), expression_to_statement ) ); fn tuple_to_let(t: (Token, Expression)) -> ParseResult { Ok(Statement::Let(LetDef { name: t.0, value: t.1, })) } named!(let_stmt_body, map_res!( do_parse!( name: match_type!(BAREWORD) >> punct!("=") >> val: expression >> punct!(";") >> (name, val)), tuple_to_let ) ); named!(let_statement, do_parse!( word!("let") >> pos: pos >> stmt: add_return_error!( nom::ErrorKind::Custom( error::Error::new( "Invalid syntax for let binding", error::ErrorType::ParseError, pos)), let_stmt_body) >> (stmt) ) ); fn tuple_to_import(t: (Token, Token)) -> ParseResult { Ok(Statement::Import(ImportDef { path: t.0, name: t.1, })) } named!(import_stmt_body, map_res!( do_parse!( path: match_type!(STR) >> word!("as") >> name: match_type!(BAREWORD) >> punct!(";") >> (path, name)), tuple_to_import ) ); named!(import_statement, do_parse!( word!("import") >> // past this point we know this is supposed to be an import statement. pos: pos >> stmt: add_return_error!( nom::ErrorKind::Custom( error::Error::new( "Invalid syntax for import", error::ErrorType::ParseError, pos)), import_stmt_body) >> (stmt) ) ); named!(statement, do_parse!( stmt: alt_complete!( import_statement | let_statement | expression_statement ) >> (stmt) ) ); /// Parses a LocatedSpan into a list of Statements or an error::Error. pub fn parse(input: LocatedSpan<&str>) -> Result, error::Error> { match tokenize(input) { Ok(tokenized) => { let mut out = Vec::new(); let mut i_ = TokenIter { source: tokenized.as_slice(), }; loop { let i = i_.clone(); if i[0].typ == TokenType::END { break; } match statement(i) { IResult::Error(nom::ErrorKind::Custom(e)) => { return Err(e); } IResult::Error(e) => { return Err(error::Error::new_with_errorkind( format!("Statement Parse error: {:?} current token: {:?}", e, i_[0]), error::ErrorType::ParseError, Position { line: i_[0].pos.line, column: i_[0].pos.column, }, e, )); } IResult::Incomplete(ei) => { return Err(error::Error::new( format!("Unexpected end of parsing input: {:?}", ei), error::ErrorType::IncompleteParsing, Position { line: i_[0].pos.line, column: i_[0].pos.column, }, )); } IResult::Done(rest, stmt) => { out.push(stmt); i_ = rest; if i_.input_len() == 0 { break; } } } } return Ok(out); } Err(e) => { return Err(error::Error::new( format!("Tokenization Error {:?}", e.1), error::ErrorType::ParseError, e.0, )); } } } #[cfg(test)] mod test;