From 6661e02a759e9ebca0e25b87065529c428ce4fb5 Mon Sep 17 00:00:00 2001 From: Jeremy Wall Date: Mon, 20 May 2019 21:02:51 -0500 Subject: [PATCH] DEV: Generate a comment map as part of our tokenization. The comment_map is optional but if passed in it will be populated during tokenization. --- src/ast/printer/mod.rs | 47 +++++++----------------------- src/ast/printer/test.rs | 64 ++++++++++++++++++++--------------------- src/benches/parse.rs | 2 +- src/build/mod.rs | 2 +- src/parse/mod.rs | 9 ++++-- src/tokenizer/mod.rs | 41 ++++++++++++++++++++++---- src/tokenizer/test.rs | 6 ++-- 7 files changed, 90 insertions(+), 81 deletions(-) diff --git a/src/ast/printer/mod.rs b/src/ast/printer/mod.rs index b0517c5..172ddd4 100644 --- a/src/ast/printer/mod.rs +++ b/src/ast/printer/mod.rs @@ -11,27 +11,30 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -use std::borrow::BorrowMut; use std::io::Write; use crate::ast::*; +use crate::parse::CommentMap; // TODO(jwall): We really need a way to preserve comments for these. // Perhaps for code formatting we actually want to work on the token stream instead? -pub struct AstPrinter +pub struct AstPrinter<'a, W> where W: Write, { indent: usize, curr_indent: usize, w: W, + // Indexed by line that the comment was on. + // We use this to determine when to print a comment in our AstPrinter + comment_map: Option<&'a CommentMap>, pub err: Option, } // TODO(jwall): At some point we probably want to be more aware of line length // in our formatting. But not at the moment. -impl AstPrinter +impl<'a, W> AstPrinter<'a, W> where W: Write, { @@ -39,45 +42,15 @@ where AstPrinter { indent: indent, curr_indent: 0, + comment_map: None, w: w, err: None, } } - pub fn visit_token(&mut self, t: &Token) -> std::io::Result<()> { - let w: &mut Write = self.w.borrow_mut(); - // Do we care about line length? - match t.typ { - TokenType::BAREWORD | TokenType::BOOLEAN | TokenType::DIGIT => { - write!(w, "{}", t.fragment)?; - } - TokenType::EMPTY => { - write!(w, "NULL")?; - } - TokenType::PUNCT => { - // TODO(jwall): We need to identify the points at which we - // introduce new lines and new indentation scopes. - } - TokenType::COMMENT => { - // We need to track some state here probably. - // Do we leave comments untouched? - } - TokenType::PIPEQUOTE => { - // FIXME I think is supposed to be removed. - } - TokenType::QUOTED => { - w.write(&['"' as u8])?; - write!(w, "{}", Self::escape_quotes(&t.fragment))?; - w.write(&['"' as u8])?; - } - TokenType::WS => { - // TODO(jwall): Track some state around new lines here? - } - TokenType::END => { - // NOOP - } - }; - Ok(()) + pub fn with_comment_map(mut self, map: &'a CommentMap) -> Self { + self.comment_map = Some(map); + self } fn make_indent(&self) -> String { diff --git a/src/ast/printer/test.rs b/src/ast/printer/test.rs index 41511e1..86db9ba 100644 --- a/src/ast/printer/test.rs +++ b/src/ast/printer/test.rs @@ -16,14 +16,14 @@ use crate::ast::printer::*; use crate::iter::OffsetStrIter; use crate::parse::*; -fn assert_parse(input: &str) -> Vec { - parse(OffsetStrIter::new(input)).unwrap() +fn assert_parse(input: &str, comment_map: Option<&mut CommentMap>) -> Vec { + parse(OffsetStrIter::new(input), comment_map).unwrap() } #[test] fn test_simple_value_printing() { let input = "1;"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(0, &mut buffer); printer.render(&stmts); @@ -34,7 +34,7 @@ fn test_simple_value_printing() { #[test] fn test_simple_selector_printing() { let input = "foo.bar.quux;"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(0, &mut buffer); printer.render(&stmts); @@ -45,7 +45,7 @@ fn test_simple_selector_printing() { #[test] fn test_simple_quoted_printing() { let input = "\"foo\";"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(0, &mut buffer); printer.render(&stmts); @@ -56,7 +56,7 @@ fn test_simple_quoted_printing() { #[test] fn test_escaped_quoted_printing() { let input = "\"f\\\\o\\\"o\";"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(0, &mut buffer); printer.render(&stmts); @@ -67,7 +67,7 @@ fn test_escaped_quoted_printing() { #[test] fn test_empty_tuple_printing() { let input = "{};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -78,7 +78,7 @@ fn test_empty_tuple_printing() { #[test] fn test_empty_list_printing() { let input = "[];"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -89,7 +89,7 @@ fn test_empty_list_printing() { #[test] fn test_non_empty_tuple_printing() { let input = "{\n foo = 1,\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -100,7 +100,7 @@ fn test_non_empty_tuple_printing() { #[test] fn test_nested_empty_tuple_printing() { let input = "{\n foo = {},\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -111,7 +111,7 @@ fn test_nested_empty_tuple_printing() { #[test] fn test_list_nested_empty_tuple_printing() { let input = "[\n {},\n];"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -122,7 +122,7 @@ fn test_list_nested_empty_tuple_printing() { #[test] fn test_nested_non_empty_tuple_printing() { let input = "{\n foo = {\n bar = 1,\n },\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -133,7 +133,7 @@ fn test_nested_non_empty_tuple_printing() { #[test] fn test_nested_non_empty_list_printing() { let input = "[\n [\n 1,\n ],\n];"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -144,7 +144,7 @@ fn test_nested_non_empty_list_printing() { #[test] fn test_simple_quoted_field_tuple_printing() { let input = "{\n \"foo\" = {\n bar = 1,\n },\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -158,7 +158,7 @@ fn test_simple_quoted_field_tuple_printing() { #[test] fn test_special_quoted_field_tuple_printing() { let input = "{\n \"foo bar\" = {\n bar = 1,\n },\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -169,7 +169,7 @@ fn test_special_quoted_field_tuple_printing() { #[test] fn test_let_statement_printing() { let input = "let tpl = {\n \"foo bar\" = {\n bar = 1,\n },\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -180,7 +180,7 @@ fn test_let_statement_printing() { #[test] fn test_call_expr_printing() { let input = "call(\n foo,\n bar,\n);"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -191,7 +191,7 @@ fn test_call_expr_printing() { #[test] fn test_call_expr_one_arg_printing() { let input = "call(foo);"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -202,7 +202,7 @@ fn test_call_expr_one_arg_printing() { #[test] fn test_copy_expr_printing() { let input = "copy{\n foo = 1,\n bar = 2,\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -213,7 +213,7 @@ fn test_copy_expr_printing() { #[test] fn test_copy_expr_one_arg_printing() { let input = "copy{\n foo = 1,\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -224,7 +224,7 @@ fn test_copy_expr_one_arg_printing() { #[test] fn test_out_expr_printing() { let input = "out json {\n foo = 1,\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -235,7 +235,7 @@ fn test_out_expr_printing() { #[test] fn test_select_expr_no_default_printing() { let input = "select true, {\n true = 1,\n false = 2,\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -246,7 +246,7 @@ fn test_select_expr_no_default_printing() { #[test] fn test_select_expr_with_default_printing() { let input = "select true, 3, {\n true = 1,\n false = 2,\n};"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -257,7 +257,7 @@ fn test_select_expr_with_default_printing() { #[test] fn test_not_expr_printing() { let input = "not true;"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -268,7 +268,7 @@ fn test_not_expr_printing() { #[test] fn test_fail_expr_printing() { let input = "fail \"AHHh\";"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -279,7 +279,7 @@ fn test_fail_expr_printing() { #[test] fn test_trace_expr_printing() { let input = "TRACE \"AHHh\";"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -300,7 +300,7 @@ fn test_module_no_out_expr_printing() { \"cpu_count\" = mod.cpu, }; };"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -321,7 +321,7 @@ fn test_module_with_out_expr_printing() { \"cpu_count\" = mod.cpu, }; };"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -335,7 +335,7 @@ fn test_func_expr_printing() { foo = foo, bar = bar, };"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -348,7 +348,7 @@ fn test_func_expr_single_arg_printing() { let input = "let f = func (foo) => { foo = foo, };"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -361,7 +361,7 @@ fn test_format_expr_single_arg_printing() { let input = "\"what? @{item.foo}\" % { foo = 1, };"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); @@ -374,7 +374,7 @@ fn test_format_expr_list_arg_printing() { let input = "\"what? @ @\" % ( 1, 2);"; - let stmts = assert_parse(input); + let stmts = assert_parse(input, None); let mut buffer: Vec = Vec::new(); let mut printer = AstPrinter::new(2, &mut buffer); printer.render(&stmts); diff --git a/src/benches/parse.rs b/src/benches/parse.rs index 61e81ff..782d7c7 100644 --- a/src/benches/parse.rs +++ b/src/benches/parse.rs @@ -26,7 +26,7 @@ use ucglib::iter::OffsetStrIter; use ucglib::parse::*; fn do_parse(i: &str) { - parse(OffsetStrIter::new(i)); + parse(OffsetStrIter::new(i), None); } fn parse_int(b: &mut Bencher) { diff --git a/src/build/mod.rs b/src/build/mod.rs index 4e23874..1864779 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -303,7 +303,7 @@ impl<'a> FileBuilder<'a> { } fn eval_input(&mut self, input: OffsetStrIter) -> Result, Box> { - match parse(input.clone()) { + match parse(input.clone(), None) { Ok(stmts) => { //panic!("Successfully parsed {}", input); let mut out: Option> = None; diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 96bec15..a95054c 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -28,6 +28,8 @@ use crate::error::StackPrinter; use crate::iter::OffsetStrIter; use crate::tokenizer::*; +pub use crate::tokenizer::{CommentGroup, CommentMap}; + type ParseResult<'a, O> = Result, O>; #[cfg(feature = "tracing")] @@ -853,8 +855,11 @@ fn statement(i: SliceIter) -> Result, Statement> { //trace_macros!(false); /// Parses a LocatedSpan into a list of Statements or an `error::Error`. -pub fn parse<'a>(input: OffsetStrIter<'a>) -> std::result::Result, String> { - match tokenize(input.clone(), true) { +pub fn parse<'a>( + input: OffsetStrIter<'a>, + comment_map: Option<&mut CommentMap>, +) -> std::result::Result, String> { + match tokenize(input.clone(), comment_map) { Ok(tokenized) => { let mut out = Vec::new(); let mut i_ = SliceIter::new(&tokenized); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 07d5a74..e669342 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -23,6 +23,9 @@ use crate::ast::*; use crate::error::StackPrinter; use crate::iter::OffsetStrIter; +pub type CommentGroup = Vec; +pub type CommentMap = std::collections::HashMap; + fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result, u8> { let mut _i = i.clone(); let c = match _i.next() { @@ -452,12 +455,16 @@ fn token<'a>(input: OffsetStrIter<'a>) -> Result, Token> { } /// Consumes an input OffsetStrIter and returns either a Vec or a error::Error. +/// If a comment_map is passed in then it will store the comments indexed by their +/// line number. pub fn tokenize<'a>( input: OffsetStrIter<'a>, - skip_comments: bool, + mut comment_map: Option<&mut CommentMap>, ) -> std::result::Result, String> { let mut out = Vec::new(); let mut i = input.clone(); + let mut comment_group = Vec::new(); + let mut comment_was_last: Option = None; loop { if let Result::Complete(_, _) = eoi(i.clone()) { break; @@ -489,14 +496,38 @@ pub fn tokenize<'a>( } Result::Complete(rest, tok) => { i = rest; - if (skip_comments && tok.typ == TokenType::COMMENT) || tok.typ == TokenType::WS { - // we skip comments and whitespace - continue; + match (&mut comment_map, &tok.typ) { + // variants with a comment_map + (&mut Some(_), &TokenType::COMMENT) => { + comment_group.push(tok.clone()); + comment_was_last = Some(tok.clone()); + continue; + } + (&mut Some(ref mut map), _) => { + out.push(tok); + if let Some(tok) = comment_was_last { + map.insert(tok.pos.line, comment_group); + comment_group = Vec::new(); + } + } + // variants without a comment_map + (None, TokenType::WS) | (None, TokenType::COMMENT) => continue, + (None, _) => { + out.push(tok); + } } - out.push(tok); + comment_was_last = None; } } } + // if we had a comments at the end then we need to do a final + // insert into our map. + if let Some(ref mut map) = comment_map { + if let Some(ref tok) = comment_group.last() { + let line = tok.pos.line; + map.insert(line, comment_group); + } + } // ensure that we always have an END token to go off of. out.push(Token { fragment: String::new(), diff --git a/src/tokenizer/test.rs b/src/tokenizer/test.rs index 0ef757c..13d58e0 100644 --- a/src/tokenizer/test.rs +++ b/src/tokenizer/test.rs @@ -89,7 +89,7 @@ fn test_string_with_escaping() { #[test] fn test_tokenize_bareword_with_dash() { let input = OffsetStrIter::new("foo-bar "); - let result = tokenize(input.clone(), true); + let result = tokenize(input.clone(), None); assert!(result.is_ok(), format!("result {:?} is not ok", result)); if let Ok(toks) = result { assert_eq!(toks.len(), 2); @@ -157,7 +157,7 @@ fn test_tokenize_one_of_each() { "map out filter assert let import func select as => [ ] { } ; = % / * \ + - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=", ); - let result = tokenize(input.clone(), true); + let result = tokenize(input.clone(), None); assert!(result.is_ok(), format!("result {:?} is not ok", result)); let v = result.unwrap(); for (i, t) in v.iter().enumerate() { @@ -170,7 +170,7 @@ fn test_tokenize_one_of_each() { #[test] fn test_parse_has_end() { let input = OffsetStrIter::new("foo"); - let result = tokenize(input.clone(), true); + let result = tokenize(input.clone(), None); assert!(result.is_ok()); let v = result.unwrap(); assert_eq!(v.len(), 2);