FEATURE: Begin the work of pretty printing the AST.

This commit is contained in:
Jeremy Wall 2019-05-14 21:03:23 -05:00
parent 2821d0953b
commit e86827f613
4 changed files with 330 additions and 80 deletions

View File

@ -12,105 +12,352 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::borrow::BorrowMut; use std::borrow::BorrowMut;
use std::error::Error;
use std::io::Write; use std::io::Write;
use crate::ast::walk::Walker;
use crate::ast::*; use crate::ast::*;
// TODO(jwall): We really need a way to preserve comments for these. // TODO(jwall): We really need a way to preserve comments for these.
// Perhaps for code formatting we actually want to work on the token stream instead? // Perhaps for code formatting we actually want to work on the token stream instead?
pub struct Printer { pub struct AstPrinter<W>
indent: u8, where
curr_indent: u8, W: Write,
w: Box<dyn Write>, {
pub errs: Vec<Box<dyn Error>>, indent: usize,
curr_indent: usize,
w: W,
pub err: Option<std::io::Error>,
} }
impl Printer { impl<W> AstPrinter<W>
pub fn new(indent: u8, w: Box<dyn Write>) -> Self { where
Printer { W: Write,
{
pub fn new(indent: usize, w: W) -> Self {
AstPrinter {
indent: indent, indent: indent,
curr_indent: 0, curr_indent: 0,
w: w, w: w,
errs: Vec::new(), err: None,
} }
} }
pub fn render_list_def(&mut self, def: &ListDef) -> std::io::Result<()> { pub fn visit_token(&mut self, t: &Token) -> std::io::Result<()> {
panic!("Unimplemented");
Ok(())
}
pub fn render_tuple_def(&mut self, def: &Vec<(Token, Expression)>) -> std::io::Result<()> {
panic!("Unimplemented");
Ok(())
}
pub fn render_value(&mut self, v: &Value) {
// TODO
let w: &mut Write = self.w.borrow_mut(); let w: &mut Write = self.w.borrow_mut();
let result = match v { // Do we care about line length?
Value::Boolean(b) => write!(w, "{}", b), match t.typ {
Value::Empty(_) => write!(w, "NULL"), TokenType::BAREWORD | TokenType::BOOLEAN | TokenType::DIGIT => {
// TODO(jwall): Should we maintain precision for floats? write!(w, "{}", t.fragment)?;
Value::Float(f) => write!(w, "{}", f), }
Value::Int(i) => write!(w, "{}", i), TokenType::EMPTY => {
// TODO(jwall): Make sure that we properly escape quotes here when rendering this? write!(w, "NULL")?;
Value::Str(s) => write!(w, "\"{}\"", s), }
Value::Symbol(s) => write!(w, "{}", s), TokenType::PUNCT => {
Value::List(l) => self.render_list_def(l), // TODO(jwall): We need to identify the points at which we
Value::Tuple(tpl) => self.render_tuple_def(&tpl.val), // introduce new lines and new indentation scopes.
}
TokenType::COMMENT => {
// We need to track some state here probably.
// Do we leave comments untouched?
}
TokenType::PIPEQUOTE => {
// FIXME I think is supposed to be removed.
}
TokenType::QUOTED => {
w.write(&['"' as u8])?;
write!(w, "{}", Self::escape_quotes(&t.fragment))?;
w.write(&['"' as u8])?;
}
TokenType::WS => {
// TODO(jwall): Track some state around new lines here?
}
TokenType::END => {
// NOOP
}
}; };
if let Err(e) = result { Ok(())
self.errs.push(Box::new(e));
}
} }
fn render_expr(&mut self, expr: &Expression) { fn make_indent(&self) -> String {
// TODO(jwall): This is probably inefficient but we'll improve it after
// we get it correct.
let indent: Vec<u8> = std::iter::repeat(' ' as u8)
.take(self.curr_indent)
.collect();
String::from_utf8_lossy(&indent).to_string()
}
fn render_list_def(&mut self, def: &ListDef) -> std::io::Result<()> {
write!(self.w, "[\n")?;
self.curr_indent += self.indent;
// If the element list is just 1 we might be able to collapse the tuple.
let indent = self.make_indent();
for e in def.elems.iter() {
// TODO(jwall): Now print out the elements
write!(self.w, "{}", indent)?;
self.render_expr(e)?;
write!(self.w, "\n")?;
}
self.curr_indent -= self.indent;
self.w.write(&[']' as u8])?;
Ok(())
}
fn render_tuple_def(&mut self, def: &Vec<(Token, Expression)>) -> std::io::Result<()> {
self.w.write(&['{' as u8])?;
// If the field list is just 1 we might be able to collapse the tuple.
self.curr_indent += self.indent;
let indent = self.make_indent();
for &(ref t, ref expr) in def.iter() {
write!(self.w, "{}", indent)?;
// TODO(jwall): Detect if there are strings and render as a quoted string.
write!(&mut self.w, "{} = ", t.fragment)?;
self.render_expr(expr)?;
write!(&mut self.w, ",")?;
write!(self.w, "\n")?;
}
self.w.write(&['}' as u8])?;
Ok(())
}
fn escape_quotes(s: &str) -> String {
let mut escaped = String::new();
for c in s.chars() {
if c == '"' {
escaped.push_str("\\\"");
} else if c == '\\' {
escaped.push_str("\\\\");
} else {
escaped.push(c);
}
}
escaped
}
pub fn render_value(&mut self, v: &Value) -> std::io::Result<()> {
match v {
Value::Boolean(b) => write!(self.w, "{}", if b.val { "true" } else { "false" })?,
Value::Empty(_) => write!(self.w, "NULL")?,
// TODO(jwall): We should maintain precision for floats?
Value::Float(f) => write!(self.w, "{}", f.val)?,
Value::Int(i) => write!(self.w, "{}", i.val)?,
Value::Str(s) => write!(self.w, "\"{}\"", Self::escape_quotes(&s.val))?,
Value::Symbol(s) => write!(self.w, "{}", s.val)?,
Value::List(l) => self.render_list_def(l)?,
Value::Tuple(tpl) => self.render_tuple_def(&tpl.val)?,
};
Ok(())
}
fn render_expr(&mut self, expr: &Expression) -> std::io::Result<()> {
match expr { match expr {
Expression::Binary(_def) => {} Expression::Binary(_def) => {
Expression::Call(_def) => {} let op = match _def.kind {
Expression::Copy(_def) => {} BinaryExprType::AND => " && ",
Expression::Debug(_def) => {} BinaryExprType::OR => " || ",
Expression::Fail(_def) => {} BinaryExprType::DOT => ".",
Expression::Format(_def) => {} BinaryExprType::Equal => " = ",
Expression::Func(_def) => {} BinaryExprType::NotEqual => " != ",
Expression::FuncOp(_def) => {} BinaryExprType::GTEqual => " >= ",
Expression::Grouped(_expr, _) => {} BinaryExprType::LTEqual => " <= ",
Expression::Import(_def) => {} BinaryExprType::GT => " > ",
Expression::Include(_def) => {} BinaryExprType::LT => " < ",
Expression::Module(_def) => {} BinaryExprType::Add => " + ",
Expression::Not(_def) => {} BinaryExprType::Sub => " - ",
Expression::Range(_def) => {} BinaryExprType::Mul => " * ",
Expression::Select(_def) => {} BinaryExprType::Div => " / ",
Expression::Simple(_def) => {} BinaryExprType::Mod => " %% ",
} BinaryExprType::IN => " in ",
BinaryExprType::IS => " is ",
BinaryExprType::REMatch => " ~ ",
BinaryExprType::NotREMatch => " !~ ",
};
self.render_expr(&_def.left)?;
self.w.write(op.as_bytes())?;
self.render_expr(&_def.right)?;
}
Expression::Call(_def) => {
self.render_value(&_def.funcref)?;
self.w.write("(".as_bytes())?;
self.curr_indent += self.indent;
let indent = self.make_indent();
for e in _def.arglist.iter() {
self.w.write(indent.as_bytes())?;
self.render_expr(e)?;
self.w.write("\n".as_bytes())?;
}
self.curr_indent -= self.indent;
self.w.write("(".as_bytes())?;
}
Expression::Copy(_def) => {
self.render_value(&_def.selector)?;
self.render_tuple_def(&_def.fields)?;
}
Expression::Debug(_def) => {
self.w.write("TRACE ".as_bytes())?;
self.render_expr(&_def.expr)?;
}
Expression::Fail(_def) => {
self.w.write("fail ".as_bytes())?;
self.render_expr(&_def.message)?;
}
Expression::Format(_def) => {
self.w
.write(Self::escape_quotes(&_def.template).as_bytes())?;
write!(self.w, " % ")?;
match _def.args {
FormatArgs::Single(ref e) => {
self.render_expr(e)?;
}
FormatArgs::List(ref es) => {
self.w.write("(".as_bytes())?;
self.curr_indent += self.indent;
let indent = self.make_indent();
for e in es.iter() {
self.w.write(indent.as_bytes())?;
self.render_expr(e)?;
self.w.write("\n".as_bytes())?;
}
self.curr_indent -= self.indent;
self.w.write(")".as_bytes())?;
}
}
}
Expression::Func(_def) => {
self.w.write("func (".as_bytes())?;
for n in _def.argdefs.iter() {
write!(self.w, "{}, ", n.val)?;
}
self.w.write(") => ".as_bytes())?;
self.render_expr(&_def.fields)?;
}
Expression::FuncOp(_def) => match _def {
FuncOpDef::Filter(_def) => {
write!(self.w, "filter(")?;
self.render_expr(&_def.func)?;
write!(self.w, ", ")?;
self.render_expr(&_def.target)?;
write!(self.w, ")")?;
}
FuncOpDef::Reduce(_def) => {
write!(self.w, "reduce(")?;
self.render_expr(&_def.func)?;
write!(self.w, ", ")?;
self.render_expr(&_def.acc)?;
write!(self.w, ", ")?;
self.render_expr(&_def.target)?;
write!(self.w, ")")?;
}
FuncOpDef::Map(_def) => {
write!(self.w, "map(")?;
self.render_expr(&_def.func)?;
write!(self.w, ", ")?;
self.render_expr(&_def.target)?;
write!(self.w, ")")?;
}
},
Expression::Grouped(ref expr, _) => {
write!(self.w, "(")?;
self.render_expr(expr)?;
write!(self.w, ")")?;
}
Expression::Import(_def) => {
write!(
self.w,
"import \"{}\"",
Self::escape_quotes(&_def.path.fragment)
)?;
}
Expression::Include(_def) => {
write!(
self.w,
"include {} \"{}\"",
_def.typ.fragment,
Self::escape_quotes(&_def.path.fragment)
)?;
}
Expression::Module(_def) => {
write!(self.w, "module ")?;
self.render_tuple_def(&_def.arg_set)?;
write!(self.w, " => ")?;
if let Some(ref e) = _def.out_expr {
write!(self.w, "(")?;
self.render_expr(e)?;
write!(self.w, ") ")?;
}
write!(self.w, "{{")?;
self.curr_indent += self.indent;
let indent = self.make_indent();
for stmt in _def.statements.iter() {
write!(self.w, "{}", indent)?;
self.render_stmt(stmt)?;
}
self.curr_indent -= self.indent;
write!(self.w, "}}")?;
}
Expression::Not(_def) => {
write!(self.w, "not ")?;
self.render_expr(&_def.expr)?;
}
Expression::Range(_def) => {
self.render_expr(&_def.start)?;
write!(self.w, ":")?;
if let Some(ref e) = _def.step {
write!(self.w, ":")?;
self.render_expr(e)?;
}
self.render_expr(&_def.end)?;
}
Expression::Select(_def) => {
//
write!(self.w, "select ")?;
self.render_expr(&_def.val)?;
write!(self.w, ", ")?;
if let Some(ref e) = _def.default {
self.render_expr(e)?;
write!(self.w, ", ")?;
}
self.render_tuple_def(&_def.tuple)?;
}
Expression::Simple(ref _def) => {
self.render_value(_def)?;
}
};
Ok(())
} }
fn render_stmt(&mut self, stmt: &Statement) { fn render_stmt(&mut self, stmt: &Statement) -> std::io::Result<()> {
// All statements start at the beginning of a line.
match stmt { match stmt {
Statement::Let(_def) => {} Statement::Let(def) => {
Statement::Expression(_expr) => {} write!(&mut self.w, "let {} = ", def.name.fragment)?;
Statement::Assert(_def) => {} self.render_expr(&def.value)?;
Statement::Output(_, _tok, _expr) => {} }
} Statement::Expression(_expr) => {
self.render_expr(&_expr)?;
//
}
Statement::Assert(def) => {
write!(&mut self.w, "assert ")?;
self.render_expr(&def)?;
//
}
Statement::Output(_, _tok, _expr) => {
write!(&mut self.w, "out {} = ", _tok.fragment)?;
self.render_expr(&_expr)?;
//
}
};
write!(self.w, ";\n")?;
Ok(())
} }
pub fn render(&mut self, stmts: Vec<&mut Statement>) { pub fn render(&mut self, stmts: Vec<&mut Statement>) {
self.walk_statement_list(stmts); for v in stmts {
} if let Err(e) = self.render_stmt(v) {
} self.err = Some(e);
return;
impl Walker for Printer { }
fn visit_value(&mut self, val: &mut Value) { }
self.render_value(val);
}
fn visit_expression(&mut self, expr: &mut Expression) {
self.render_expr(expr);
}
fn visit_statement(&mut self, stmt: &mut Statement) {
self.render_stmt(stmt);
} }
} }

View File

@ -854,7 +854,7 @@ fn statement(i: SliceIter<Token>) -> Result<SliceIter<Token>, Statement> {
/// Parses a LocatedSpan into a list of Statements or an `error::Error`. /// Parses a LocatedSpan into a list of Statements or an `error::Error`.
pub fn parse<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Statement>, String> { pub fn parse<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Statement>, String> {
match tokenize(input.clone()) { match tokenize(input.clone(), true) {
Ok(tokenized) => { Ok(tokenized) => {
let mut out = Vec::new(); let mut out = Vec::new();
let mut i_ = SliceIter::new(&tokenized); let mut i_ = SliceIter::new(&tokenized);

View File

@ -452,7 +452,10 @@ fn token<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, Token> {
} }
/// Consumes an input OffsetStrIter and returns either a Vec<Token> or a error::Error. /// Consumes an input OffsetStrIter and returns either a Vec<Token> or a error::Error.
pub fn tokenize<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Token>, String> { pub fn tokenize<'a>(
input: OffsetStrIter<'a>,
skip_comments: bool,
) -> std::result::Result<Vec<Token>, String> {
let mut out = Vec::new(); let mut out = Vec::new();
let mut i = input.clone(); let mut i = input.clone();
loop { loop {
@ -486,7 +489,7 @@ pub fn tokenize<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Token>,
} }
Result::Complete(rest, tok) => { Result::Complete(rest, tok) => {
i = rest; i = rest;
if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS { if (skip_comments && tok.typ == TokenType::COMMENT) || tok.typ == TokenType::WS {
// we skip comments and whitespace // we skip comments and whitespace
continue; continue;
} }

View File

@ -89,7 +89,7 @@ fn test_string_with_escaping() {
#[test] #[test]
fn test_tokenize_bareword_with_dash() { fn test_tokenize_bareword_with_dash() {
let input = OffsetStrIter::new("foo-bar "); let input = OffsetStrIter::new("foo-bar ");
let result = tokenize(input.clone()); let result = tokenize(input.clone(), true);
assert!(result.is_ok(), format!("result {:?} is not ok", result)); assert!(result.is_ok(), format!("result {:?} is not ok", result));
if let Ok(toks) = result { if let Ok(toks) = result {
assert_eq!(toks.len(), 2); assert_eq!(toks.len(), 2);
@ -157,7 +157,7 @@ fn test_tokenize_one_of_each() {
"map out filter assert let import func select as => [ ] { } ; = % / * \ "map out filter assert let import func select as => [ ] { } ; = % / * \
+ - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=", + - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=",
); );
let result = tokenize(input.clone()); let result = tokenize(input.clone(), true);
assert!(result.is_ok(), format!("result {:?} is not ok", result)); assert!(result.is_ok(), format!("result {:?} is not ok", result));
let v = result.unwrap(); let v = result.unwrap();
for (i, t) in v.iter().enumerate() { for (i, t) in v.iter().enumerate() {
@ -170,7 +170,7 @@ fn test_tokenize_one_of_each() {
#[test] #[test]
fn test_parse_has_end() { fn test_parse_has_end() {
let input = OffsetStrIter::new("foo"); let input = OffsetStrIter::new("foo");
let result = tokenize(input.clone()); let result = tokenize(input.clone(), true);
assert!(result.is_ok()); assert!(result.is_ok());
let v = result.unwrap(); let v = result.unwrap();
assert_eq!(v.len(), 2); assert_eq!(v.len(), 2);