Merge pull request #44 from zaphar/ast_pretty_print

FEATURE: Adds an autoformatting command to the ucg compiler.
2025-07-22 18:19:54 -04:00 · 2019-05-24 15:56:34 -05:00 · 2019-05-24 15:56:34 -05:00 · 6d16365af0
commit 6d16365af0
parent 53f34d1647 69eb7a398e
12 changed files with 1227 additions and 61 deletions
--- a/README.md
+++ b/README.md
@ -44,6 +44,7 @@ SUBCOMMANDS:
    converters    list the available converters
    env           Describe the environment variables ucg uses.
    eval          Evaluate an expression with an optional ucg file as context.
+    fmt           Format ucg files automatically.
    help          Prints this message or the help of the given subcommand(s)
    importers     list the available importers for includes
    test          Check a list of ucg files for errors and run test assertions.
--- a/examples/module_example/modules/host_module.ucg
+++ b/examples/module_example/modules/host_module.ucg
@ -1,4 +1,4 @@
-let host_mod = module{
+let host_mod = TRACE module{
    hostname="",
    mem=2048,
    cpu=2,
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@ -31,8 +31,11 @@ use abortable_parser;
 use crate::build::scope::Scope;
 use crate::build::Val;

+pub mod printer;
 pub mod walk;

+pub use walk::Walker;
+
 macro_rules! enum_type_equality {
    ( $slf:ident, $r:expr, $( $l:pat ),* ) => {
        match $slf {
@ -593,7 +596,7 @@ impl ModuleDef {
                }
            }
        };
-        let walker = walk::AstWalker::new().with_expr_handler(&rewrite_import);
+        let mut walker = walk::AstWalker::new().with_expr_handler(&rewrite_import);
        for stmt in self.statements.iter_mut() {
            walker.walk_statement(stmt);
        }
@ -753,6 +756,7 @@ impl fmt::Display for Expression {
 /// Encodes a let statement in the UCG AST.
 #[derive(Debug, PartialEq, Clone)]
 pub struct LetDef {
+    pub pos: Position,
    pub name: Token,
    pub value: Expression,
 }
@ -767,8 +771,19 @@ pub enum Statement {
    Let(LetDef),

    // Assert statement
-    Assert(Expression),
+    Assert(Position, Expression),

    // Identify an Expression for output.
    Output(Position, Token, Expression),
 }
+
+impl Statement {
+    fn pos(&self) -> &Position {
+        match self {
+            Statement::Expression(ref e) => e.pos(),
+            Statement::Let(ref def) => &def.pos,
+            Statement::Assert(ref pos, _) => pos,
+            Statement::Output(ref pos, _, _) => pos,
+        }
+    }
+}
--- a/src/ast/printer/mod.rs
+++ b/src/ast/printer/mod.rs
@ -0,0 +1,573 @@
+// Copyright 2019 Jeremy Wall
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use std::io::Write;
+
+use crate::ast::*;
+use crate::parse::CommentMap;
+
+// TODO(jwall): We really need a way to preserve comments for these.
+// Perhaps for code formatting we actually want to work on the token stream instead?
+
+pub struct AstPrinter<'a, W>
+where
+    W: Write,
+{
+    indent_size: usize,
+    curr_indent: usize,
+    w: W,
+    // Indexed by line that the comment was on.
+    // We use this to determine when to print a comment in our AstPrinter
+    comment_map: Option<&'a CommentMap>,
+    last_line: usize,
+    comment_group_lines: Vec<usize>,
+}
+
+// TODO(jwall): At some point we probably want to be more aware of line length
+// in our formatting. But not at the moment.
+impl<'a, W> AstPrinter<'a, W>
+where
+    W: Write,
+{
+    pub fn new(indent: usize, w: W) -> Self {
+        AstPrinter {
+            indent_size: indent,
+            curr_indent: 0,
+            comment_map: None,
+            w: w,
+            last_line: 0,
+            comment_group_lines: Vec::new(),
+        }
+    }
+
+    pub fn with_comment_map(mut self, map: &'a CommentMap) -> Self {
+        self.comment_group_lines = map.keys().cloned().collect();
+        self.comment_group_lines.reverse();
+        self.comment_map = Some(map);
+        self
+    }
+
+    fn make_indent(&self) -> String {
+        // TODO(jwall): This is probably inefficient but we'll improve it after
+        // we get it correct.
+        let indent: Vec<u8> = std::iter::repeat(' ' as u8)
+            .take(self.curr_indent)
+            .collect();
+        String::from_utf8_lossy(&indent).to_string()
+    }
+
+    fn is_bareword(s: &str) -> bool {
+        match s.chars().nth(0) {
+            Some(c) => {
+                if !(c.is_ascii_alphabetic() || c == '_') {
+                    return false;
+                }
+            }
+            None => return false,
+        };
+        for c in s.chars() {
+            if !(c.is_ascii_alphabetic() || c == '_') {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    fn print_comment_group(&mut self, line: usize) -> std::io::Result<()> {
+        if let Some(ref map) = self.comment_map {
+            let empty: Vec<Token> = Vec::new();
+            let cg = map.get(&line).unwrap_or(&empty);
+            let indent = self.make_indent();
+            for c in cg.iter() {
+                let first_char = match c.fragment.chars().nth(0) {
+                    Some(c) => c,
+                    None => '\0',
+                };
+                if !first_char.is_whitespace() {
+                    write!(self.w, "{}// {}\n", indent, c.fragment.trim_end())?;
+                } else {
+                    write!(self.w, "{}//{}\n", indent, c.fragment.trim_end())?;
+                }
+            }
+            self.comment_group_lines.pop();
+        }
+        Ok(())
+    }
+
+    fn render_missed_comments(&mut self, line: usize) -> std::io::Result<()> {
+        loop {
+            if let Some(next_comment_line) = self.comment_group_lines.last() {
+                let next_comment_line = *next_comment_line;
+                if next_comment_line <= line {
+                    self.print_comment_group(next_comment_line)?;
+                } else {
+                    break;
+                }
+                if next_comment_line < line - 1 {
+                    write!(self.w, "\n")?;
+                }
+                continue;
+            }
+            break;
+        }
+        Ok(())
+    }
+
+    fn render_comment_if_needed(&mut self, line: usize) -> std::io::Result<()> {
+        self.render_missed_comments(line)?;
+        self.last_line = line;
+        Ok(())
+    }
+
+    fn has_comment(&self, line: usize) -> bool {
+        if let Some(next_comment_line) = self.comment_group_lines.last() {
+            return *next_comment_line < line;
+        }
+        false
+    }
+
+    fn render_list_def(&mut self, def: &ListDef) -> std::io::Result<()> {
+        write!(self.w, "[")?;
+        self.curr_indent += self.indent_size;
+        let indent = self.make_indent();
+        let has_fields = def.elems.len() > 0;
+        if has_fields {
+            write!(self.w, "\n")?;
+        }
+        for e in def.elems.iter() {
+            self.render_comment_if_needed(e.pos().line)?;
+            write!(self.w, "{}", indent)?;
+            self.render_expr(e)?;
+            write!(self.w, ",\n")?;
+        }
+        self.curr_indent -= self.indent_size;
+        if has_fields {
+            write!(self.w, "{}", self.make_indent())?;
+        }
+        self.w.write(&[']' as u8])?;
+        Ok(())
+    }
+
+    fn render_tuple_def(&mut self, def: &Vec<(Token, Expression)>) -> std::io::Result<()> {
+        self.w.write(&['{' as u8])?;
+        // If the field list is just 1 we might be able to collapse the tuple.
+        self.curr_indent += self.indent_size;
+        let indent = self.make_indent();
+        let has_fields = def.len() > 0;
+        if has_fields {
+            write!(self.w, "\n")?;
+        }
+        for &(ref t, ref expr) in def.iter() {
+            let field_line = t.pos.line;
+            let expr_line = expr.pos().line;
+            self.render_comment_if_needed(field_line)?;
+            if expr_line != field_line {
+                self.render_comment_if_needed(expr_line)?;
+            }
+            write!(self.w, "{}", indent)?;
+            if Self::is_bareword(&t.fragment) {
+                write!(&mut self.w, "{} = ", t.fragment)?;
+            } else {
+                write!(self.w, "\"{}\" = ", Self::escape_quotes(&t.fragment))?;
+            }
+            self.render_expr(expr)?;
+            write!(&mut self.w, ",")?;
+            write!(self.w, "\n")?;
+        }
+        self.curr_indent -= self.indent_size;
+        if has_fields {
+            write!(self.w, "{}", self.make_indent())?;
+        }
+        self.w.write(&['}' as u8])?;
+        Ok(())
+    }
+
+    fn escape_quotes(s: &str) -> String {
+        let mut escaped = String::new();
+        for c in s.chars() {
+            if c == '"' {
+                escaped.push_str("\\\"");
+            } else if c == '\\' {
+                escaped.push_str("\\\\");
+            } else {
+                escaped.push(c);
+            }
+        }
+        escaped
+    }
+
+    pub fn render_value(&mut self, v: &Value) -> std::io::Result<()> {
+        match v {
+            Value::Boolean(b) => write!(self.w, "{}", if b.val { "true" } else { "false" })?,
+            Value::Empty(_) => write!(self.w, "NULL")?,
+            // TODO(jwall): We should maintain precision for floats?
+            Value::Float(f) => write!(self.w, "{}", f.val)?,
+            Value::Int(i) => write!(self.w, "{}", i.val)?,
+            Value::Str(s) => write!(self.w, "\"{}\"", Self::escape_quotes(&s.val))?,
+            Value::Symbol(s) => write!(self.w, "{}", s.val)?,
+            Value::List(l) => self.render_list_def(l)?,
+            Value::Tuple(tpl) => self.render_tuple_def(&tpl.val)?,
+        };
+        Ok(())
+    }
+
+    pub fn render_expr(&mut self, expr: &Expression) -> std::io::Result<()> {
+        let had_comment = self.has_comment(expr.pos().line);
+        self.render_comment_if_needed(expr.pos().line)?;
+        let indent = self.make_indent();
+        if had_comment {
+            write!(self.w, "{}", indent)?;
+        }
+        let mut did_indent = false;
+        match expr {
+            Expression::Binary(_def) => {
+                let op = match _def.kind {
+                    BinaryExprType::AND => " && ",
+                    BinaryExprType::OR => " || ",
+                    BinaryExprType::DOT => ".",
+                    BinaryExprType::Equal => " == ",
+                    BinaryExprType::NotEqual => " != ",
+                    BinaryExprType::GTEqual => " >= ",
+                    BinaryExprType::LTEqual => " <= ",
+                    BinaryExprType::GT => " > ",
+                    BinaryExprType::LT => " < ",
+                    BinaryExprType::Add => " + ",
+                    BinaryExprType::Sub => " - ",
+                    BinaryExprType::Mul => " * ",
+                    BinaryExprType::Div => " / ",
+                    BinaryExprType::Mod => " %% ",
+                    BinaryExprType::IN => " in ",
+                    BinaryExprType::IS => " is ",
+                    BinaryExprType::REMatch => " ~ ",
+                    BinaryExprType::NotREMatch => " !~ ",
+                };
+                let right_line = _def.right.pos().line;
+                self.render_expr(&_def.left)?;
+                self.w.write(op.as_bytes())?;
+                if self.has_comment(right_line) {
+                    // if we'll be rendering a comment then we should
+                    // add a new line here
+                    self.w.write("\n".as_bytes())?;
+                }
+                self.render_expr(&_def.right)?;
+            }
+            Expression::Call(_def) => {
+                self.render_value(&_def.funcref)?;
+                self.w.write("(".as_bytes())?;
+                self.curr_indent += self.indent_size;
+                let indent = self.make_indent();
+                let has_args = _def.arglist.len() > 1;
+                if has_args {
+                    write!(self.w, "\n")?;
+                }
+                for e in _def.arglist.iter() {
+                    self.render_comment_if_needed(e.pos().line)?;
+                    if has_args {
+                        write!(self.w, "{}", indent)?;
+                    }
+                    self.render_expr(e)?;
+                    if has_args {
+                        self.w.write(",\n".as_bytes())?;
+                    }
+                }
+                self.curr_indent -= self.indent_size;
+                if has_args {
+                    write!(self.w, "{}", self.make_indent())?;
+                }
+                self.w.write(")".as_bytes())?;
+            }
+            Expression::Copy(_def) => {
+                self.render_value(&_def.selector)?;
+                self.render_tuple_def(&_def.fields)?;
+            }
+            Expression::Debug(_def) => {
+                self.w.write("TRACE ".as_bytes())?;
+                if self.has_comment(_def.expr.pos().line) {
+                    self.w.write("\n".as_bytes())?;
+                }
+                self.render_expr(&_def.expr)?;
+            }
+            Expression::Fail(_def) => {
+                self.w.write("fail ".as_bytes())?;
+                if self.has_comment(_def.message.pos().line) {
+                    self.w.write("\n".as_bytes())?;
+                }
+                self.render_expr(&_def.message)?;
+            }
+            Expression::Format(_def) => {
+                write!(self.w, "\"{}\"", Self::escape_quotes(&_def.template))?;
+                write!(self.w, " % ")?;
+                match _def.args {
+                    FormatArgs::Single(ref e) => {
+                        if self.has_comment(e.pos().line) {
+                            self.w.write("\n".as_bytes())?;
+                        }
+                        self.render_expr(e)?;
+                    }
+                    FormatArgs::List(ref es) => {
+                        self.w.write("(\n".as_bytes())?;
+                        self.curr_indent += self.indent_size;
+                        let indent = self.make_indent();
+                        let mut prefix = if es
+                            .first()
+                            .and_then(|e| Some(self.has_comment(e.pos().line)))
+                            .unwrap_or(false)
+                        {
+                            "\n"
+                        } else {
+                            ""
+                        };
+                        for e in es.iter() {
+                            write!(self.w, "{}{}", prefix, indent)?;
+                            self.render_expr(e)?;
+                            prefix = ",\n";
+                        }
+                        self.curr_indent -= self.indent_size;
+                        self.w.write(")".as_bytes())?;
+                    }
+                }
+            }
+            Expression::Func(_def) => {
+                self.w.write("func (".as_bytes())?;
+                if _def.argdefs.len() == 1 {
+                    write!(self.w, "{}", _def.argdefs.first().unwrap())?;
+                } else {
+                    let mut prefix = "";
+                    for n in _def.argdefs.iter() {
+                        write!(self.w, "{}{}", prefix, n.val)?;
+                        prefix = ", ";
+                    }
+                }
+                self.w.write(") => ".as_bytes())?;
+                self.render_expr(&_def.fields)?;
+            }
+            Expression::FuncOp(_def) => match _def {
+                FuncOpDef::Filter(_def) => {
+                    write!(self.w, "filter(")?;
+                    if self.has_comment(_def.func.pos().line) {
+                        self.curr_indent += self.indent_size;
+                        did_indent = true;
+                        write!(self.w, "\n")?;
+                    }
+                    self.render_expr(&_def.func)?;
+                    if self.has_comment(_def.target.pos().line) {
+                        write!(self.w, ",")?;
+                        if !did_indent {
+                            self.curr_indent += self.indent_size;
+                        }
+                        did_indent = true;
+                        self.w.write("\n".as_bytes())?;
+                    } else {
+                        write!(self.w, ", ")?;
+                    }
+                    self.render_expr(&_def.target)?;
+                    write!(self.w, ")")?;
+                }
+                FuncOpDef::Reduce(_def) => {
+                    write!(self.w, "reduce(")?;
+                    if self.has_comment(_def.func.pos().line) {
+                        self.curr_indent += self.indent_size;
+                        did_indent = true;
+                        write!(self.w, "\n")?;
+                    }
+                    self.render_expr(&_def.func)?;
+                    if self.has_comment(_def.acc.pos().line) {
+                        write!(self.w, ",")?;
+                        if !did_indent {
+                            self.curr_indent += self.indent_size;
+                        }
+                        did_indent = true;
+                        self.w.write("\n".as_bytes())?;
+                    } else {
+                        write!(self.w, ", ")?;
+                    }
+                    self.render_expr(&_def.acc)?;
+                    if self.has_comment(_def.target.pos().line) {
+                        write!(self.w, ",")?;
+                        if !did_indent {
+                            self.curr_indent += self.indent_size;
+                        }
+                        did_indent = true;
+                        self.w.write("\n".as_bytes())?;
+                    } else {
+                        write!(self.w, ", ")?;
+                    }
+                    self.render_expr(&_def.target)?;
+                    write!(self.w, ")")?;
+                }
+                FuncOpDef::Map(_def) => {
+                    write!(self.w, "map(")?;
+                    if self.has_comment(_def.func.pos().line) {
+                        self.curr_indent += self.indent_size;
+                        did_indent = true;
+                        write!(self.w, "\n")?;
+                    }
+                    self.render_expr(&_def.func)?;
+                    if self.has_comment(_def.target.pos().line) {
+                        write!(self.w, ",")?;
+                        if !did_indent {
+                            self.curr_indent += self.indent_size;
+                        }
+                        did_indent = true;
+                        self.w.write("\n".as_bytes())?;
+                    } else {
+                        write!(self.w, ", ")?;
+                    }
+                    self.render_expr(&_def.target)?;
+                    write!(self.w, ")")?;
+                }
+            },
+            Expression::Grouped(ref expr, _) => {
+                write!(self.w, "(")?;
+                if self.has_comment(expr.pos().line) {
+                    self.curr_indent += self.indent_size;
+                    did_indent = true;
+                    write!(self.w, "\n")?;
+                }
+                self.render_expr(expr)?;
+                if did_indent {
+                    write!(self.w, "\n")?;
+                }
+                write!(self.w, ")")?;
+            }
+            Expression::Import(_def) => {
+                if self.has_comment(_def.path.pos.line) {
+                    self.render_missed_comments(_def.path.pos.line)?;
+                }
+                write!(
+                    self.w,
+                    "import \"{}\"",
+                    Self::escape_quotes(&_def.path.fragment)
+                )?;
+            }
+            Expression::Include(_def) => {
+                if self.has_comment(_def.path.pos.line) {
+                    self.render_missed_comments(_def.path.pos.line)?;
+                }
+                write!(
+                    self.w,
+                    "include {} \"{}\"",
+                    _def.typ.fragment,
+                    Self::escape_quotes(&_def.path.fragment)
+                )?;
+            }
+            Expression::Module(_def) => {
+                write!(self.w, "module ")?;
+                self.render_tuple_def(&_def.arg_set)?;
+                write!(self.w, " => ")?;
+                if let Some(ref e) = _def.out_expr {
+                    write!(self.w, "(")?;
+                    self.render_expr(e)?;
+                    write!(self.w, ") ")?;
+                }
+                write!(self.w, "{{\n")?;
+                self.curr_indent += self.indent_size;
+                let indent = self.make_indent();
+                for stmt in _def.statements.iter() {
+                    write!(self.w, "{}", indent)?;
+                    self.render_stmt(stmt)?;
+                }
+                self.curr_indent -= self.indent_size;
+                write!(self.w, "}}")?;
+            }
+            Expression::Not(_def) => {
+                if self.has_comment(_def.pos.line) {
+                    self.render_missed_comments(_def.pos.line)?;
+                }
+                write!(self.w, "not ")?;
+                self.render_expr(&_def.expr)?;
+            }
+            Expression::Range(_def) => {
+                // We print all of the comments we missed before the end of this
+                // expression before the entire range expression.
+                let end_line = _def.end.pos().line;
+                if self.has_comment(end_line) {
+                    self.render_missed_comments(end_line)?;
+                }
+                self.render_expr(&_def.start)?;
+                write!(self.w, ":")?;
+                if let Some(ref e) = _def.step {
+                    write!(self.w, ":")?;
+                    self.render_expr(e)?;
+                }
+                self.render_expr(&_def.end)?;
+            }
+            Expression::Select(_def) => {
+                let val_line = _def.val.pos().line;
+                if self.has_comment(val_line) {
+                    self.render_missed_comments(val_line)?;
+                }
+                if let Some(ref e) = _def.default {
+                    let default_line = e.pos().line;
+                    if self.has_comment(default_line) {
+                        self.render_missed_comments(default_line)?;
+                    }
+                }
+                write!(self.w, "select ")?;
+                self.render_expr(&_def.val)?;
+                write!(self.w, ", ")?;
+                if let Some(ref e) = _def.default {
+                    self.render_expr(e)?;
+                    write!(self.w, ", ")?;
+                }
+                self.render_tuple_def(&_def.tuple)?;
+            }
+            Expression::Simple(ref _def) => {
+                self.render_value(_def)?;
+            }
+        };
+        if did_indent {
+            self.curr_indent -= self.indent_size;
+        }
+        Ok(())
+    }
+
+    pub fn render_stmt(&mut self, stmt: &Statement) -> std::io::Result<()> {
+        // All statements start at the beginning of a line.
+        let line = stmt.pos().line;
+        self.render_comment_if_needed(line)?;
+        match stmt {
+            Statement::Let(def) => {
+                write!(&mut self.w, "let {} = ", def.name.fragment)?;
+                self.render_expr(&def.value)?;
+            }
+            Statement::Expression(_expr) => {
+                self.render_expr(&_expr)?;
+            }
+            Statement::Assert(_, def) => {
+                write!(&mut self.w, "assert ")?;
+                self.render_expr(&def)?;
+            }
+            Statement::Output(_, _tok, _expr) => {
+                write!(&mut self.w, "out {} ", _tok.fragment)?;
+                self.render_expr(&_expr)?;
+            }
+        };
+        write!(self.w, ";\n\n")?;
+        self.last_line = line;
+        Ok(())
+    }
+
+    pub fn render(&mut self, stmts: &Vec<Statement>) -> std::io::Result<()> {
+        for v in stmts {
+            self.render_stmt(v)?;
+        }
+        if let Some(last_comment_line) = self.comment_group_lines.first() {
+            self.render_missed_comments(*last_comment_line + 1)?;
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod test;
--- a/src/ast/printer/test.rs
+++ b/src/ast/printer/test.rs
@ -0,0 +1,393 @@
+// Copyright 2019 Jeremy Wall
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use std::collections::BTreeMap;
+
+use crate::ast::printer::*;
+use crate::iter::OffsetStrIter;
+use crate::parse::*;
+
+fn assert_parse(input: &str, comment_map: Option<&mut CommentMap>) -> Vec<Statement> {
+    parse(OffsetStrIter::new(input), comment_map).unwrap()
+}
+
+fn print_to_buffer(input: &str) -> String {
+    let mut comment_map = BTreeMap::new();
+    let stmts = assert_parse(input, Some(&mut comment_map));
+    let mut buffer: Vec<u8> = Vec::new();
+    let mut printer = AstPrinter::new(2, &mut buffer).with_comment_map(&comment_map);
+    assert!(printer.render(&stmts).is_ok());
+    String::from_utf8(buffer).unwrap()
+}
+
+#[test]
+fn test_simple_value_printing() {
+    let input = "1;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_simple_selector_printing() {
+    let input = "foo.bar.quux;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_simple_quoted_printing() {
+    let input = "\"foo\";";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_escaped_quoted_printing() {
+    let input = "\"f\\\\o\\\"o\";";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_empty_tuple_printing() {
+    let input = "{};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_empty_list_printing() {
+    let input = "[];";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_non_empty_tuple_printing() {
+    let input = "{\n  foo = 1,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_nested_empty_tuple_printing() {
+    let input = "{\n  foo = {},\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_list_nested_empty_tuple_printing() {
+    let input = "[\n  {},\n];";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_nested_non_empty_tuple_printing() {
+    let input = "{\n  foo = {\n    bar = 1,\n  },\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_nested_non_empty_list_printing() {
+    let input = "[\n  [\n    1,\n  ],\n];";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_simple_quoted_field_tuple_printing() {
+    let input = "{\n  \"foo\" = {\n    bar = 1,\n  },\n};";
+    assert_eq!(
+        print_to_buffer(input),
+        format!("{}\n\n", "{\n  foo = {\n    bar = 1,\n  },\n};")
+    );
+}
+
+#[test]
+fn test_special_quoted_field_tuple_printing() {
+    let input = "{\n  \"foo bar\" = {\n    bar = 1,\n  },\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_let_statement_printing() {
+    let input = "let tpl = {\n  \"foo bar\" = {\n    bar = 1,\n  },\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_call_expr_printing() {
+    let input = "call(\n  foo,\n  bar,\n);";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_call_expr_one_arg_printing() {
+    let input = "call(foo);";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_copy_expr_printing() {
+    let input = "copy{\n  foo = 1,\n  bar = 2,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_copy_expr_one_arg_printing() {
+    let input = "copy{\n  foo = 1,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_out_expr_printing() {
+    let input = "out json {\n  foo = 1,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_select_expr_no_default_printing() {
+    let input = "select true, {\n  true = 1,\n  false = 2,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_select_expr_with_default_printing() {
+    let input = "select true, 3, {\n  true = 1,\n  false = 2,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_not_expr_printing() {
+    let input = "not true;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_fail_expr_printing() {
+    let input = "fail \"AHHh\";";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_trace_expr_printing() {
+    let input = "TRACE \"AHHh\";";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_module_no_out_expr_printing() {
+    let input = "let m = module {
+  hostname = \"\",
+  mem = 2048,
+  cpu = 2,
+} => {
+  let config = {
+    hostname = mod.hostname,
+    memory_size = mod.mem,
+    cpu_count = mod.cpu,
+  };
+
+};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_module_with_out_expr_printing() {
+    let input = "let m = module {
+  hostname = \"\",
+  mem = 2048,
+  cpu = 2,
+} => (config) {
+  let config = {
+    hostname = mod.hostname,
+    memory_size = mod.mem,
+    cpu_count = mod.cpu,
+  };
+
+};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_func_expr_printing() {
+    let input = "let f = func (foo, bar) => {
+  foo = foo,
+  bar = bar,
+};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_func_expr_single_arg_printing() {
+    let input = "let f = func (foo) => {
+  foo = foo,
+};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_format_expr_single_arg_printing() {
+    let input = "\"what? @{item.foo}\" % {
+  foo = 1,
+};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_format_expr_list_arg_printing() {
+    let input = "\"what? @ @\" % (
+  1,
+  2);";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_statement_with_comment_printing() {
+    let input = "// add 1 + 1\n1 + 1;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_statement_with_comment_printing_groups() {
+    let input = "// add 1\n// and 1\n1 + 1;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_statement_with_comment_printing_multiple_groups() {
+    let input = "\n// group 1\n// more group 1\n\n// group 2\n// more group 2\n1 + 1;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input.trim()));
+}
+
+#[test]
+fn test_statement_with_comment_printing_comments_at_end() {
+    let input = "// group 1\n1 + 1;\n\n// group 2\n\n";
+    assert_eq!(print_to_buffer(input), format!("{}\n", input.trim()));
+}
+
+#[test]
+fn test_tuple_expression_with_embedded_comment() {
+    let input = "{\n  foo = bar,\n  // a comment\n  bar = foo,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_tuple_expression_with_embedded_comment_same_line() {
+    let input = "{
+  foo = bar, // a comment
+  bar = foo,
+};";
+    let expected = "{
+  // a comment
+  foo = bar,
+  bar = foo,
+};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", expected));
+}
+
+#[test]
+fn test_tuple_expression_with_embedded_comment_mid_field_expr() {
+    let input = "{\n  foo = bar,\n  bar =\n// a comment\n   foo\n};";
+    assert_eq!(
+        print_to_buffer(input),
+        "{\n  foo = bar,\n  // a comment\n  bar = foo,\n};\n\n"
+    );
+}
+
+#[test]
+fn test_tuple_expression_with_embedded_comment_and_mid_field_expr() {
+    let input = "{\n  foo = bar,\n// a comment\n  bar =\n// another comment\n   foo\n};";
+    assert_eq!(
+        print_to_buffer(input),
+        "{\n  foo = bar,\n  // a comment\n  // another comment\n  bar = foo,\n};\n\n"
+    );
+}
+
+#[test]
+fn test_list_expression_with_embedded_comment() {
+    let input = "[\n  bar,\n  // a comment\n  foo,\n];";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_binary_expression_with_embedded_comment() {
+    let input = "true == \n// false is not true\nfalse;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_empty_call_expression_with_comment() {
+    let input = "// a comment\nmyfunc();";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_call_expression_with_embedded_comment_in_args() {
+    let input = "// a comment\nmyfunc(\n  arg1,\n  // another comment\n  arg2,\n);";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_copy_expression_with_embedded_comment_in_args() {
+    let input = "// a comment\nmyfunc{\n  foo = arg1,\n  // another comment\n  bar = arg2,\n};";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_trace_expression_with_embedded_comment() {
+    let input = "// a comment\nTRACE \n// another comment\nfoo;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_fail_expression_with_embedded_comment() {
+    let input = "// a comment\nfail \n// another comment\nfoo;";
+    assert_eq!(print_to_buffer(input), format!("{}\n\n", input));
+}
+
+#[test]
+fn test_format_expression_with_embedded_comment() {
+    let input = "// a comment\n\"@(item.bar)\" % \n// another comment\nfoo;";
+    let output = print_to_buffer(input);
+    assert_eq!(output, format!("{}\n\n", input.trim()));
+}
+
+#[test]
+fn test_filter_func_operator_expression_with_embedded_comment() {
+    //let input = "// a comment\nfilter(foo, bar);";
+    let input = "// a comment\nfilter(\n  // another comment\n  foo,\n  // one more\n  bar);";
+    let output = print_to_buffer(input);
+    assert_eq!(output, format!("{}\n\n", input.trim()));
+}
+
+#[test]
+fn test_reduce_func_operator_expression_with_embedded_comment() {
+    let input = "// a comment\nreduce(
+  // another comment
+  myfunc,
+  // one more
+  acc,
+  // and the last
+  target);";
+    let output = print_to_buffer(input);
+    assert_eq!(output, format!("{}\n\n", input.trim()));
+}
+
+#[test]
+fn test_map_func_operator_expression_with_embedded_comment() {
+    //let input = "// a comment\nfilter(foo, bar);";
+    let input = "// a comment\nmap(\n  // another comment\n  foo,\n  // one more\n  bar);";
+    let output = print_to_buffer(input);
+    assert_eq!(output, format!("{}\n\n", input.trim()));
+}
+
+#[test]
+fn test_grouped_expression_with_embedded_comment() {
+    //let input = "// a comment\nfilter(foo, bar);";
+    let input = "// a comment\n(\n  // a comment\n  foo\n);";
+    let output = print_to_buffer(input);
+    assert_eq!(output, format!("{}\n\n", input.trim()));
+}
--- a/src/ast/walk.rs
+++ b/src/ast/walk.rs
@ -1,36 +1,13 @@
 use crate::ast::*;

-pub struct AstWalker<'a> {
-    handle_value: Option<&'a Fn(&mut Value)>,
-    handle_expression: Option<&'a Fn(&mut Expression)>,
-    handle_statment: Option<&'a Fn(&mut Statement)>,
-}
-
-impl<'a> AstWalker<'a> {
-    pub fn new() -> Self {
-        AstWalker {
-            handle_value: None,
-            handle_expression: None,
-            handle_statment: None,
+pub trait Walker {
+    fn walk_statement_list(&mut self, stmts: Vec<&mut Statement>) {
+        for v in stmts {
+            self.walk_statement(v);
        }
    }

-    pub fn with_value_handler(mut self, h: &'a Fn(&mut Value)) -> Self {
-        self.handle_value = Some(h);
-        self
-    }
-
-    pub fn with_expr_handler(mut self, h: &'a Fn(&mut Expression)) -> Self {
-        self.handle_expression = Some(h);
-        self
-    }
-
-    pub fn with_stmt_handler(mut self, h: &'a Fn(&mut Statement)) -> Self {
-        self.handle_statment = Some(h);
-        self
-    }
-
-    pub fn walk_statement(&self, stmt: &mut Statement) {
+    fn walk_statement(&mut self, stmt: &mut Statement) {
        self.visit_statement(stmt);
        match stmt {
            Statement::Let(ref mut def) => {
@ -39,7 +16,7 @@ impl<'a> AstWalker<'a> {
            Statement::Expression(ref mut expr) => {
                self.walk_expression(expr);
            }
-            Statement::Assert(ref mut expr) => {
+            Statement::Assert(_, ref mut expr) => {
                self.walk_expression(expr);
            }
            Statement::Output(_, _, ref mut expr) => {
@ -48,13 +25,13 @@ impl<'a> AstWalker<'a> {
        }
    }

-    fn walk_fieldset(&self, fs: &mut FieldList) {
+    fn walk_fieldset(&mut self, fs: &mut FieldList) {
        for &mut (_, ref mut expr) in fs.iter_mut() {
            self.walk_expression(expr);
        }
    }

-    pub fn walk_expression(&self, expr: &mut Expression) {
+    fn walk_expression(&mut self, expr: &mut Expression) {
        self.visit_expression(expr);
        match expr {
            Expression::Call(ref mut def) => {
@ -135,19 +112,59 @@ impl<'a> AstWalker<'a> {
        }
    }

-    fn visit_value(&self, val: &mut Value) {
+    fn visit_value(&mut self, val: &mut Value);
+
+    fn visit_expression(&mut self, expr: &mut Expression);
+
+    fn visit_statement(&mut self, stmt: &mut Statement);
+}
+
+// TODO this would be better implemented as a Trait I think.
+pub struct AstWalker<'a> {
+    handle_value: Option<&'a Fn(&mut Value)>,
+    handle_expression: Option<&'a Fn(&mut Expression)>,
+    handle_statment: Option<&'a Fn(&mut Statement)>,
+}
+
+impl<'a> AstWalker<'a> {
+    pub fn new() -> Self {
+        AstWalker {
+            handle_value: None,
+            handle_expression: None,
+            handle_statment: None,
+        }
+    }
+
+    pub fn with_value_handler(mut self, h: &'a Fn(&mut Value)) -> Self {
+        self.handle_value = Some(h);
+        self
+    }
+
+    pub fn with_expr_handler(mut self, h: &'a Fn(&mut Expression)) -> Self {
+        self.handle_expression = Some(h);
+        self
+    }
+
+    pub fn with_stmt_handler(mut self, h: &'a Fn(&mut Statement)) -> Self {
+        self.handle_statment = Some(h);
+        self
+    }
+}
+
+impl<'a> Walker for AstWalker<'a> {
+    fn visit_value(&mut self, val: &mut Value) {
        if let Some(h) = self.handle_value {
            h(val);
        }
    }

-    fn visit_expression(&self, expr: &mut Expression) {
+    fn visit_expression(&mut self, expr: &mut Expression) {
        if let Some(h) = self.handle_expression {
            h(expr);
        }
    }

-    fn visit_statement(&self, stmt: &mut Statement) {
+    fn visit_statement(&mut self, stmt: &mut Statement) {
        if let Some(h) = self.handle_statment {
            h(stmt);
        }
--- a/src/benches/parse.rs
+++ b/src/benches/parse.rs
@ -26,7 +26,7 @@ use ucglib::iter::OffsetStrIter;
 use ucglib::parse::*;

 fn do_parse(i: &str) {
-    parse(OffsetStrIter::new(i));
+    parse(OffsetStrIter::new(i), None);
 }

 fn parse_int(b: &mut Bencher) {
--- a/src/build/mod.rs
+++ b/src/build/mod.rs
@ -303,7 +303,7 @@ impl<'a> FileBuilder<'a> {
    }

    fn eval_input(&mut self, input: OffsetStrIter) -> Result<Rc<Val>, Box<dyn Error>> {
-        match parse(input.clone()) {
+        match parse(input.clone(), None) {
            Ok(stmts) => {
                //panic!("Successfully parsed {}", input);
                let mut out: Option<Rc<Val>> = None;
@ -511,7 +511,7 @@ impl<'a> FileBuilder<'a> {
    fn eval_stmt(&mut self, stmt: &Statement) -> Result<Rc<Val>, Box<dyn Error>> {
        let child_scope = self.scope.clone();
        match stmt {
-            &Statement::Assert(ref expr) => self.eval_assert(&expr, &child_scope),
+            &Statement::Assert(_, ref expr) => self.eval_assert(&expr, &child_scope),
            &Statement::Let(ref def) => self.eval_let(def),
            &Statement::Expression(ref expr) => self.eval_expr(expr, &child_scope),
            // Only one output can be used per file. Right now we enforce this by
@ -1654,11 +1654,17 @@ impl<'a> FileBuilder<'a> {
            // we are not in validate_mode so build_asserts are noops.
            return Ok(Rc::new(Val::Empty));
        }
+        let mut buffer: Vec<u8> = Vec::new();
+        {
+            let mut printer = crate::ast::printer::AstPrinter::new(2, &mut buffer);
+            let _ = printer.render_expr(expr);
+        }
+        let expr_pretty = String::from_utf8(buffer).unwrap();
        let ok = match self.eval_expr(expr, scope) {
            Ok(v) => v,
            Err(e) => {
                // failure!
-                let msg = format!("CompileError: {}\n", e);
+                let msg = format!("CompileError: {}\nfor expression:\n{}\n", e, expr_pretty);
                self.record_assert_result(&msg, false);
                return Ok(Rc::new(Val::Empty));
            }
@ -1965,9 +1971,16 @@ impl<'a> FileBuilder<'a> {
                };
            }
            &Expression::Debug(ref def) => {
+                let mut buffer: Vec<u8> = Vec::new();
+                {
+                    let mut printer = crate::ast::printer::AstPrinter::new(2, &mut buffer);
+                    let _ = printer.render_expr(&def.expr);
+                }
+                let expr_pretty = String::from_utf8(buffer).unwrap();
+
                let val = self.eval_expr(&def.expr, scope);
                if let Ok(ref val) = val {
-                    eprintln!("TRACE: {} at {}", val, def.pos);
+                    eprintln!("TRACE: {} = {} at {}", expr_pretty, val, def.pos);
                }
                val
            }
--- a/src/main.rs
+++ b/src/main.rs
@ -17,9 +17,11 @@ extern crate dirs;
 extern crate ucglib;

 use std::cell::RefCell;
+use std::collections::BTreeMap;
 use std::error::Error;
 use std::fs::File;
 use std::io;
+use std::io::Read;
 use std::path::{Path, PathBuf};
 use std::process;
 use std::rc::Rc;
@ -29,6 +31,8 @@ use ucglib::build::assets::{Cache, MemoryCache};
 use ucglib::build::Val;
 use ucglib::convert::traits;
 use ucglib::convert::{ConverterRegistry, ImporterRegistry};
+use ucglib::iter::OffsetStrIter;
+use ucglib::parse::parse;

 fn do_flags<'a, 'b>() -> clap::App<'a, 'b> {
    clap_app!(
@ -53,6 +57,12 @@ fn do_flags<'a, 'b>() -> clap::App<'a, 'b> {
             (@arg recurse: -r "Whether we should recurse or not.")
             (@arg INPUT: ... "Input ucg files or directories to run test assertions for. If not provided it will scan the current directory for files with _test.ucg")
            )
+            (@subcommand fmt =>
+             (about: "Format ucg files automatically.")
+             (@arg recurse: -r "Whether we should recurse or not.")
+             (@arg indent: -i --indent "How many spaces to indent by. Defaults to 4")
+             (@arg INPUT: ... "Input ucg files or directories to format")
+            )
            (@subcommand converters =>
             (about: "list the available converters")
             (@arg converter: "Converter name to get help for.")
@ -346,6 +356,60 @@ fn build_command(
    }
 }

+fn fmt_file(p: &Path, indent: usize) -> std::result::Result<(), Box<dyn Error>> {
+    let mut f = File::open(p)?;
+    let mut contents = String::new();
+    f.read_to_string(&mut contents)?;
+    let mut comment_map = BTreeMap::new();
+    let stmts = parse(OffsetStrIter::new(&contents), Some(&mut comment_map))?;
+    let mut printer = ucglib::ast::printer::AstPrinter::new(indent, std::io::stdout())
+        .with_comment_map(&comment_map);
+    printer.render(&stmts)?;
+    Ok(())
+}
+
+fn fmt_dir(p: &Path, recurse: bool, indent: usize) -> std::result::Result<(), Box<dyn Error>> {
+    // TODO(jwall): We should handle this error more gracefully
+    // for the user here.
+    let dir_iter = std::fs::read_dir(p)?.peekable();
+    for entry in dir_iter {
+        let next_item = entry.unwrap();
+        let path = next_item.path();
+        if path.is_dir() && recurse {
+            fmt_dir(&path, recurse, indent)?;
+        } else {
+            fmt_file(&path, indent)?;
+        }
+    }
+    Ok(())
+}
+
+fn fmt_command(matches: &clap::ArgMatches) -> std::result::Result<(), Box<dyn Error>> {
+    let files = matches.values_of("INPUT");
+    let recurse = matches.is_present("recurse");
+    let indent = match matches.value_of("indent") {
+        Some(s) => s.parse::<usize>()?,
+        None => 4,
+    };
+
+    let mut paths = Vec::new();
+    if files.is_none() {
+        paths.push(std::env::current_dir()?);
+    } else {
+        for f in files.unwrap() {
+            paths.push(PathBuf::from(f));
+        }
+    }
+    for p in paths {
+        if p.is_dir() {
+            fmt_dir(&p, recurse, indent)?;
+        } else {
+            fmt_file(&p, indent)?;
+        }
+    }
+    Ok(())
+}
+
 fn test_command(
    matches: &clap::ArgMatches,
    import_paths: &Vec<PathBuf>,
@ -482,6 +546,10 @@ fn main() {
        importers_command(&registry)
    } else if let Some(_) = app_matches.subcommand_matches("env") {
        env_help()
+    } else if let Some(matches) = app_matches.subcommand_matches("fmt") {
+        if let Err(e) = fmt_command(matches) {
+            eprintln!("{}", e);
+        }
    } else {
        app.print_help().unwrap();
        println!("");
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@ -28,6 +28,8 @@ use crate::error::StackPrinter;
 use crate::iter::OffsetStrIter;
 use crate::tokenizer::*;

+pub use crate::tokenizer::{CommentGroup, CommentMap};
+
 type ParseResult<'a, O> = Result<SliceIter<'a, Token>, O>;

 #[cfg(feature = "tracing")]
@ -791,21 +793,19 @@ make_fn!(
    )
 );

-fn tuple_to_let(tok: Token, expr: Expression) -> Statement {
-    Statement::Let(LetDef {
-        name: tok,
-        value: expr,
-    })
-}
-
 make_fn!(
    let_stmt_body<SliceIter<Token>, Statement>,
    do_each!(
+        pos => pos,
        name => wrap_err!(match_type!(BAREWORD), "Expected name for binding"),
        _ => punct!("="),
        val => wrap_err!(trace_parse!(expression), "Expected Expression"),
        _ => punct!(";"),
-        (tuple_to_let(name, val))
+        (Statement::Let(LetDef {
+            pos: pos,
+            name: name,
+            value: val,
+        }))
    )
 );

@ -821,10 +821,11 @@ make_fn!(
 make_fn!(
    assert_statement<SliceIter<Token>, Statement>,
    do_each!(
+        pos => pos,
        _ => word!("assert"),
        expr => wrap_err!(must!(expression), "Expected Tuple {ok=<bool>, desc=<str>}"),
        _ => must!(punct!(";")),
-        (Statement::Assert(expr))
+        (Statement::Assert(pos, expr))
    )
 );

@ -853,8 +854,11 @@ fn statement(i: SliceIter<Token>) -> Result<SliceIter<Token>, Statement> {
 //trace_macros!(false);

 /// Parses a LocatedSpan into a list of Statements or an `error::Error`.
-pub fn parse<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Statement>, String> {
-    match tokenize(input.clone()) {
+pub fn parse<'a>(
+    input: OffsetStrIter<'a>,
+    comment_map: Option<&mut CommentMap>,
+) -> std::result::Result<Vec<Statement>, String> {
+    match tokenize(input.clone(), comment_map) {
        Ok(tokenized) => {
            let mut out = Vec::new();
            let mut i_ = SliceIter::new(&tokenized);
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@ -23,6 +23,9 @@ use crate::ast::*;
 use crate::error::StackPrinter;
 use crate::iter::OffsetStrIter;

+pub type CommentGroup = Vec<Token>;
+pub type CommentMap = std::collections::BTreeMap<usize, CommentGroup>;
+
 fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, u8> {
    let mut _i = i.clone();
    let c = match _i.next() {
@ -350,6 +353,12 @@ fn comment(input: OffsetStrIter) -> Result<OffsetStrIter, Token> {
                )
            ) {
                Result::Complete(rest, cmt) => {
+                    // Eat the new lines here before continuing
+                    let rest =
+                        match optional!(rest, either!(text_token!("\r\n"), text_token!("\n"))) {
+                            Result::Complete(next_rest, _) => next_rest,
+                            _ => rest,
+                        };
                    return Result::Complete(rest, make_tok!(CMT => cmt.to_string(), input));
                }
                // If we didn't find a new line then we just grab everything.
@ -452,9 +461,16 @@ fn token<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, Token> {
 }

 /// Consumes an input OffsetStrIter and returns either a Vec<Token> or a error::Error.
-pub fn tokenize<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Token>, String> {
+/// If a comment_map is passed in then it will store the comments indexed by their
+/// line number.
+pub fn tokenize<'a>(
+    input: OffsetStrIter<'a>,
+    mut comment_map: Option<&mut CommentMap>,
+) -> std::result::Result<Vec<Token>, String> {
    let mut out = Vec::new();
    let mut i = input.clone();
+    let mut comment_group = Vec::new();
+    let mut comment_was_last: Option<Token> = None;
    loop {
        if let Result::Complete(_, _) = eoi(i.clone()) {
            break;
@ -486,12 +502,38 @@ pub fn tokenize<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Token>,
            }
            Result::Complete(rest, tok) => {
                i = rest;
-                if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS {
-                    // we skip comments and whitespace
+                match (&mut comment_map, &tok.typ) {
+                    // variants with a comment_map
+                    (&mut Some(_), &TokenType::COMMENT) => {
+                        comment_group.push(tok.clone());
+                        comment_was_last = Some(tok.clone());
                        continue;
                    }
+                    (&mut Some(ref mut map), _) => {
+                        if tok.typ != TokenType::WS {
                            out.push(tok);
                        }
+                        if let Some(tok) = comment_was_last {
+                            map.insert(tok.pos.line, comment_group);
+                            comment_group = Vec::new();
+                        }
+                    }
+                    // variants without a comment_map
+                    (None, TokenType::WS) | (None, TokenType::COMMENT) => continue,
+                    (None, _) => {
+                        out.push(tok);
+                    }
+                }
+                comment_was_last = None;
+            }
+        }
+    }
+    // if we had a comments at the end then we need to do a final
+    // insert into our map.
+    if let Some(ref mut map) = comment_map {
+        if let Some(ref tok) = comment_group.last() {
+            let line = tok.pos.line;
+            map.insert(line, comment_group);
        }
    }
    // ensure that we always have an END token to go off of.
--- a/src/tokenizer/test.rs
+++ b/src/tokenizer/test.rs
@ -1,3 +1,5 @@
+use std::collections::BTreeMap;
+
 use super::*;

 use abortable_parser::{Offsetable, Result, SliceIter};
@ -89,7 +91,7 @@ fn test_string_with_escaping() {
 #[test]
 fn test_tokenize_bareword_with_dash() {
    let input = OffsetStrIter::new("foo-bar ");
-    let result = tokenize(input.clone());
+    let result = tokenize(input.clone(), None);
    assert!(result.is_ok(), format!("result {:?} is not ok", result));
    if let Ok(toks) = result {
        assert_eq!(toks.len(), 2);
@ -157,7 +159,24 @@ fn test_tokenize_one_of_each() {
        "map out filter assert let import func select as => [ ] { } ; = % / * \
         + - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=",
    );
-    let result = tokenize(input.clone());
+    let result = tokenize(input.clone(), None);
+    assert!(result.is_ok(), format!("result {:?} is not ok", result));
+    let v = result.unwrap();
+    for (i, t) in v.iter().enumerate() {
+        println!("{}: {:?}", i, t);
+    }
+    assert_eq!(v.len(), 39);
+    assert_eq!(v[38].typ, TokenType::END);
+}
+
+#[test]
+fn test_tokenize_one_of_each_comment_map_path() {
+    let input = OffsetStrIter::new(
+        "map out filter assert let import func select as => [ ] { } ; = % / * \
+         + - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=",
+    );
+    let mut comment_map = BTreeMap::new();
+    let result = tokenize(input.clone(), Some(&mut comment_map));
    assert!(result.is_ok(), format!("result {:?} is not ok", result));
    let v = result.unwrap();
    for (i, t) in v.iter().enumerate() {
@ -170,7 +189,7 @@ fn test_tokenize_one_of_each() {
 #[test]
 fn test_parse_has_end() {
    let input = OffsetStrIter::new("foo");
-    let result = tokenize(input.clone());
+    let result = tokenize(input.clone(), None);
    assert!(result.is_ok());
    let v = result.unwrap();
    assert_eq!(v.len(), 2);
@ -327,3 +346,24 @@ fn test_match_type() {
        res => assert!(false, format!("Fail: {:?}", res)),
    }
 }
+
+#[test]
+fn test_tokenize_builds_comment_map() {
+    let input = OffsetStrIter::new("// comment 1\n\n//comment 2");
+    let mut comment_map = BTreeMap::new();
+    let result = tokenize(input.clone(), Some(&mut comment_map));
+    assert!(result.is_ok(), format!("result {:?} is not ok", result));
+
+    assert_eq!(comment_map.len(), 2);
+}
+
+#[test]
+fn test_tokenize_builds_comment_map_groups() {
+    let input = OffsetStrIter::new("// first part\n// comment 1\n\n//comment 2");
+    let mut comment_map = BTreeMap::new();
+    let result = tokenize(input.clone(), Some(&mut comment_map));
+    assert!(result.is_ok(), format!("result {:?} is not ok", result));
+
+    assert_eq!(comment_map.len(), 2);
+    assert_eq!(comment_map[&2].len(), 2);
+}