diff --git a/src/ast/printer.rs b/src/ast/printer.rs index 6a2ebb2..b56194e 100644 --- a/src/ast/printer.rs +++ b/src/ast/printer.rs @@ -12,105 +12,352 @@ // See the License for the specific language governing permissions and // limitations under the License. use std::borrow::BorrowMut; -use std::error::Error; use std::io::Write; -use crate::ast::walk::Walker; use crate::ast::*; // TODO(jwall): We really need a way to preserve comments for these. // Perhaps for code formatting we actually want to work on the token stream instead? -pub struct Printer { - indent: u8, - curr_indent: u8, - w: Box, - pub errs: Vec>, +pub struct AstPrinter +where + W: Write, +{ + indent: usize, + curr_indent: usize, + w: W, + pub err: Option, } -impl Printer { - pub fn new(indent: u8, w: Box) -> Self { - Printer { +impl AstPrinter +where + W: Write, +{ + pub fn new(indent: usize, w: W) -> Self { + AstPrinter { indent: indent, curr_indent: 0, w: w, - errs: Vec::new(), + err: None, } } - pub fn render_list_def(&mut self, def: &ListDef) -> std::io::Result<()> { - panic!("Unimplemented"); - Ok(()) - } - - pub fn render_tuple_def(&mut self, def: &Vec<(Token, Expression)>) -> std::io::Result<()> { - panic!("Unimplemented"); - Ok(()) - } - - pub fn render_value(&mut self, v: &Value) { - // TODO + pub fn visit_token(&mut self, t: &Token) -> std::io::Result<()> { let w: &mut Write = self.w.borrow_mut(); - let result = match v { - Value::Boolean(b) => write!(w, "{}", b), - Value::Empty(_) => write!(w, "NULL"), - // TODO(jwall): Should we maintain precision for floats? - Value::Float(f) => write!(w, "{}", f), - Value::Int(i) => write!(w, "{}", i), - // TODO(jwall): Make sure that we properly escape quotes here when rendering this? - Value::Str(s) => write!(w, "\"{}\"", s), - Value::Symbol(s) => write!(w, "{}", s), - Value::List(l) => self.render_list_def(l), - Value::Tuple(tpl) => self.render_tuple_def(&tpl.val), + // Do we care about line length? + match t.typ { + TokenType::BAREWORD | TokenType::BOOLEAN | TokenType::DIGIT => { + write!(w, "{}", t.fragment)?; + } + TokenType::EMPTY => { + write!(w, "NULL")?; + } + TokenType::PUNCT => { + // TODO(jwall): We need to identify the points at which we + // introduce new lines and new indentation scopes. + } + TokenType::COMMENT => { + // We need to track some state here probably. + // Do we leave comments untouched? + } + TokenType::PIPEQUOTE => { + // FIXME I think is supposed to be removed. + } + TokenType::QUOTED => { + w.write(&['"' as u8])?; + write!(w, "{}", Self::escape_quotes(&t.fragment))?; + w.write(&['"' as u8])?; + } + TokenType::WS => { + // TODO(jwall): Track some state around new lines here? + } + TokenType::END => { + // NOOP + } }; - if let Err(e) = result { - self.errs.push(Box::new(e)); - } + Ok(()) } - fn render_expr(&mut self, expr: &Expression) { + fn make_indent(&self) -> String { + // TODO(jwall): This is probably inefficient but we'll improve it after + // we get it correct. + let indent: Vec = std::iter::repeat(' ' as u8) + .take(self.curr_indent) + .collect(); + String::from_utf8_lossy(&indent).to_string() + } + + fn render_list_def(&mut self, def: &ListDef) -> std::io::Result<()> { + write!(self.w, "[\n")?; + self.curr_indent += self.indent; + // If the element list is just 1 we might be able to collapse the tuple. + let indent = self.make_indent(); + for e in def.elems.iter() { + // TODO(jwall): Now print out the elements + write!(self.w, "{}", indent)?; + self.render_expr(e)?; + write!(self.w, "\n")?; + } + self.curr_indent -= self.indent; + self.w.write(&[']' as u8])?; + Ok(()) + } + + fn render_tuple_def(&mut self, def: &Vec<(Token, Expression)>) -> std::io::Result<()> { + self.w.write(&['{' as u8])?; + // If the field list is just 1 we might be able to collapse the tuple. + self.curr_indent += self.indent; + let indent = self.make_indent(); + for &(ref t, ref expr) in def.iter() { + write!(self.w, "{}", indent)?; + // TODO(jwall): Detect if there are strings and render as a quoted string. + write!(&mut self.w, "{} = ", t.fragment)?; + self.render_expr(expr)?; + write!(&mut self.w, ",")?; + write!(self.w, "\n")?; + } + self.w.write(&['}' as u8])?; + Ok(()) + } + + fn escape_quotes(s: &str) -> String { + let mut escaped = String::new(); + for c in s.chars() { + if c == '"' { + escaped.push_str("\\\""); + } else if c == '\\' { + escaped.push_str("\\\\"); + } else { + escaped.push(c); + } + } + escaped + } + + pub fn render_value(&mut self, v: &Value) -> std::io::Result<()> { + match v { + Value::Boolean(b) => write!(self.w, "{}", if b.val { "true" } else { "false" })?, + Value::Empty(_) => write!(self.w, "NULL")?, + // TODO(jwall): We should maintain precision for floats? + Value::Float(f) => write!(self.w, "{}", f.val)?, + Value::Int(i) => write!(self.w, "{}", i.val)?, + Value::Str(s) => write!(self.w, "\"{}\"", Self::escape_quotes(&s.val))?, + Value::Symbol(s) => write!(self.w, "{}", s.val)?, + Value::List(l) => self.render_list_def(l)?, + Value::Tuple(tpl) => self.render_tuple_def(&tpl.val)?, + }; + Ok(()) + } + + fn render_expr(&mut self, expr: &Expression) -> std::io::Result<()> { match expr { - Expression::Binary(_def) => {} - Expression::Call(_def) => {} - Expression::Copy(_def) => {} - Expression::Debug(_def) => {} - Expression::Fail(_def) => {} - Expression::Format(_def) => {} - Expression::Func(_def) => {} - Expression::FuncOp(_def) => {} - Expression::Grouped(_expr, _) => {} - Expression::Import(_def) => {} - Expression::Include(_def) => {} - Expression::Module(_def) => {} - Expression::Not(_def) => {} - Expression::Range(_def) => {} - Expression::Select(_def) => {} - Expression::Simple(_def) => {} - } + Expression::Binary(_def) => { + let op = match _def.kind { + BinaryExprType::AND => " && ", + BinaryExprType::OR => " || ", + BinaryExprType::DOT => ".", + BinaryExprType::Equal => " = ", + BinaryExprType::NotEqual => " != ", + BinaryExprType::GTEqual => " >= ", + BinaryExprType::LTEqual => " <= ", + BinaryExprType::GT => " > ", + BinaryExprType::LT => " < ", + BinaryExprType::Add => " + ", + BinaryExprType::Sub => " - ", + BinaryExprType::Mul => " * ", + BinaryExprType::Div => " / ", + BinaryExprType::Mod => " %% ", + BinaryExprType::IN => " in ", + BinaryExprType::IS => " is ", + BinaryExprType::REMatch => " ~ ", + BinaryExprType::NotREMatch => " !~ ", + }; + self.render_expr(&_def.left)?; + self.w.write(op.as_bytes())?; + self.render_expr(&_def.right)?; + } + Expression::Call(_def) => { + self.render_value(&_def.funcref)?; + self.w.write("(".as_bytes())?; + self.curr_indent += self.indent; + let indent = self.make_indent(); + for e in _def.arglist.iter() { + self.w.write(indent.as_bytes())?; + self.render_expr(e)?; + self.w.write("\n".as_bytes())?; + } + self.curr_indent -= self.indent; + self.w.write("(".as_bytes())?; + } + Expression::Copy(_def) => { + self.render_value(&_def.selector)?; + self.render_tuple_def(&_def.fields)?; + } + Expression::Debug(_def) => { + self.w.write("TRACE ".as_bytes())?; + self.render_expr(&_def.expr)?; + } + Expression::Fail(_def) => { + self.w.write("fail ".as_bytes())?; + self.render_expr(&_def.message)?; + } + Expression::Format(_def) => { + self.w + .write(Self::escape_quotes(&_def.template).as_bytes())?; + write!(self.w, " % ")?; + match _def.args { + FormatArgs::Single(ref e) => { + self.render_expr(e)?; + } + FormatArgs::List(ref es) => { + self.w.write("(".as_bytes())?; + self.curr_indent += self.indent; + let indent = self.make_indent(); + for e in es.iter() { + self.w.write(indent.as_bytes())?; + self.render_expr(e)?; + self.w.write("\n".as_bytes())?; + } + self.curr_indent -= self.indent; + self.w.write(")".as_bytes())?; + } + } + } + Expression::Func(_def) => { + self.w.write("func (".as_bytes())?; + for n in _def.argdefs.iter() { + write!(self.w, "{}, ", n.val)?; + } + self.w.write(") => ".as_bytes())?; + self.render_expr(&_def.fields)?; + } + Expression::FuncOp(_def) => match _def { + FuncOpDef::Filter(_def) => { + write!(self.w, "filter(")?; + self.render_expr(&_def.func)?; + write!(self.w, ", ")?; + self.render_expr(&_def.target)?; + write!(self.w, ")")?; + } + FuncOpDef::Reduce(_def) => { + write!(self.w, "reduce(")?; + self.render_expr(&_def.func)?; + write!(self.w, ", ")?; + self.render_expr(&_def.acc)?; + write!(self.w, ", ")?; + self.render_expr(&_def.target)?; + write!(self.w, ")")?; + } + FuncOpDef::Map(_def) => { + write!(self.w, "map(")?; + self.render_expr(&_def.func)?; + write!(self.w, ", ")?; + self.render_expr(&_def.target)?; + write!(self.w, ")")?; + } + }, + Expression::Grouped(ref expr, _) => { + write!(self.w, "(")?; + self.render_expr(expr)?; + write!(self.w, ")")?; + } + Expression::Import(_def) => { + write!( + self.w, + "import \"{}\"", + Self::escape_quotes(&_def.path.fragment) + )?; + } + Expression::Include(_def) => { + write!( + self.w, + "include {} \"{}\"", + _def.typ.fragment, + Self::escape_quotes(&_def.path.fragment) + )?; + } + Expression::Module(_def) => { + write!(self.w, "module ")?; + self.render_tuple_def(&_def.arg_set)?; + write!(self.w, " => ")?; + if let Some(ref e) = _def.out_expr { + write!(self.w, "(")?; + self.render_expr(e)?; + write!(self.w, ") ")?; + } + write!(self.w, "{{")?; + self.curr_indent += self.indent; + let indent = self.make_indent(); + for stmt in _def.statements.iter() { + write!(self.w, "{}", indent)?; + self.render_stmt(stmt)?; + } + self.curr_indent -= self.indent; + write!(self.w, "}}")?; + } + Expression::Not(_def) => { + write!(self.w, "not ")?; + self.render_expr(&_def.expr)?; + } + Expression::Range(_def) => { + self.render_expr(&_def.start)?; + write!(self.w, ":")?; + if let Some(ref e) = _def.step { + write!(self.w, ":")?; + self.render_expr(e)?; + } + self.render_expr(&_def.end)?; + } + Expression::Select(_def) => { + // + write!(self.w, "select ")?; + self.render_expr(&_def.val)?; + write!(self.w, ", ")?; + if let Some(ref e) = _def.default { + self.render_expr(e)?; + write!(self.w, ", ")?; + } + self.render_tuple_def(&_def.tuple)?; + } + Expression::Simple(ref _def) => { + self.render_value(_def)?; + } + }; + Ok(()) } - fn render_stmt(&mut self, stmt: &Statement) { + fn render_stmt(&mut self, stmt: &Statement) -> std::io::Result<()> { + // All statements start at the beginning of a line. match stmt { - Statement::Let(_def) => {} - Statement::Expression(_expr) => {} - Statement::Assert(_def) => {} - Statement::Output(_, _tok, _expr) => {} - } + Statement::Let(def) => { + write!(&mut self.w, "let {} = ", def.name.fragment)?; + self.render_expr(&def.value)?; + } + Statement::Expression(_expr) => { + self.render_expr(&_expr)?; + // + } + Statement::Assert(def) => { + write!(&mut self.w, "assert ")?; + self.render_expr(&def)?; + // + } + Statement::Output(_, _tok, _expr) => { + write!(&mut self.w, "out {} = ", _tok.fragment)?; + self.render_expr(&_expr)?; + // + } + }; + write!(self.w, ";\n")?; + Ok(()) } pub fn render(&mut self, stmts: Vec<&mut Statement>) { - self.walk_statement_list(stmts); - } -} - -impl Walker for Printer { - fn visit_value(&mut self, val: &mut Value) { - self.render_value(val); - } - fn visit_expression(&mut self, expr: &mut Expression) { - self.render_expr(expr); - } - fn visit_statement(&mut self, stmt: &mut Statement) { - self.render_stmt(stmt); + for v in stmts { + if let Err(e) = self.render_stmt(v) { + self.err = Some(e); + return; + } + } } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 4578022..96bec15 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -854,7 +854,7 @@ fn statement(i: SliceIter) -> Result, Statement> { /// Parses a LocatedSpan into a list of Statements or an `error::Error`. pub fn parse<'a>(input: OffsetStrIter<'a>) -> std::result::Result, String> { - match tokenize(input.clone()) { + match tokenize(input.clone(), true) { Ok(tokenized) => { let mut out = Vec::new(); let mut i_ = SliceIter::new(&tokenized); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index b191fe7..07d5a74 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -452,7 +452,10 @@ fn token<'a>(input: OffsetStrIter<'a>) -> Result, Token> { } /// Consumes an input OffsetStrIter and returns either a Vec or a error::Error. -pub fn tokenize<'a>(input: OffsetStrIter<'a>) -> std::result::Result, String> { +pub fn tokenize<'a>( + input: OffsetStrIter<'a>, + skip_comments: bool, +) -> std::result::Result, String> { let mut out = Vec::new(); let mut i = input.clone(); loop { @@ -486,7 +489,7 @@ pub fn tokenize<'a>(input: OffsetStrIter<'a>) -> std::result::Result, } Result::Complete(rest, tok) => { i = rest; - if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS { + if (skip_comments && tok.typ == TokenType::COMMENT) || tok.typ == TokenType::WS { // we skip comments and whitespace continue; } diff --git a/src/tokenizer/test.rs b/src/tokenizer/test.rs index 2458226..0ef757c 100644 --- a/src/tokenizer/test.rs +++ b/src/tokenizer/test.rs @@ -89,7 +89,7 @@ fn test_string_with_escaping() { #[test] fn test_tokenize_bareword_with_dash() { let input = OffsetStrIter::new("foo-bar "); - let result = tokenize(input.clone()); + let result = tokenize(input.clone(), true); assert!(result.is_ok(), format!("result {:?} is not ok", result)); if let Ok(toks) = result { assert_eq!(toks.len(), 2); @@ -157,7 +157,7 @@ fn test_tokenize_one_of_each() { "map out filter assert let import func select as => [ ] { } ; = % / * \ + - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=", ); - let result = tokenize(input.clone()); + let result = tokenize(input.clone(), true); assert!(result.is_ok(), format!("result {:?} is not ok", result)); let v = result.unwrap(); for (i, t) in v.iter().enumerate() { @@ -170,7 +170,7 @@ fn test_tokenize_one_of_each() { #[test] fn test_parse_has_end() { let input = OffsetStrIter::new("foo"); - let result = tokenize(input.clone()); + let result = tokenize(input.clone(), true); assert!(result.is_ok()); let v = result.unwrap(); assert_eq!(v.len(), 2);