DEV: Generate a comment map as part of our tokenization.

The comment_map is optional but if passed in it will be populated
during tokenization.
This commit is contained in:
Jeremy Wall 2019-05-20 21:02:51 -05:00
parent f3a08718dc
commit 6661e02a75
7 changed files with 90 additions and 81 deletions

View File

@ -11,27 +11,30 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::borrow::BorrowMut;
use std::io::Write;
use crate::ast::*;
use crate::parse::CommentMap;
// TODO(jwall): We really need a way to preserve comments for these.
// Perhaps for code formatting we actually want to work on the token stream instead?
pub struct AstPrinter<W>
pub struct AstPrinter<'a, W>
where
W: Write,
{
indent: usize,
curr_indent: usize,
w: W,
// Indexed by line that the comment was on.
// We use this to determine when to print a comment in our AstPrinter
comment_map: Option<&'a CommentMap>,
pub err: Option<std::io::Error>,
}
// TODO(jwall): At some point we probably want to be more aware of line length
// in our formatting. But not at the moment.
impl<W> AstPrinter<W>
impl<'a, W> AstPrinter<'a, W>
where
W: Write,
{
@ -39,45 +42,15 @@ where
AstPrinter {
indent: indent,
curr_indent: 0,
comment_map: None,
w: w,
err: None,
}
}
pub fn visit_token(&mut self, t: &Token) -> std::io::Result<()> {
let w: &mut Write = self.w.borrow_mut();
// Do we care about line length?
match t.typ {
TokenType::BAREWORD | TokenType::BOOLEAN | TokenType::DIGIT => {
write!(w, "{}", t.fragment)?;
}
TokenType::EMPTY => {
write!(w, "NULL")?;
}
TokenType::PUNCT => {
// TODO(jwall): We need to identify the points at which we
// introduce new lines and new indentation scopes.
}
TokenType::COMMENT => {
// We need to track some state here probably.
// Do we leave comments untouched?
}
TokenType::PIPEQUOTE => {
// FIXME I think is supposed to be removed.
}
TokenType::QUOTED => {
w.write(&['"' as u8])?;
write!(w, "{}", Self::escape_quotes(&t.fragment))?;
w.write(&['"' as u8])?;
}
TokenType::WS => {
// TODO(jwall): Track some state around new lines here?
}
TokenType::END => {
// NOOP
}
};
Ok(())
pub fn with_comment_map(mut self, map: &'a CommentMap) -> Self {
self.comment_map = Some(map);
self
}
fn make_indent(&self) -> String {

View File

@ -16,14 +16,14 @@ use crate::ast::printer::*;
use crate::iter::OffsetStrIter;
use crate::parse::*;
fn assert_parse(input: &str) -> Vec<Statement> {
parse(OffsetStrIter::new(input)).unwrap()
fn assert_parse(input: &str, comment_map: Option<&mut CommentMap>) -> Vec<Statement> {
parse(OffsetStrIter::new(input), comment_map).unwrap()
}
#[test]
fn test_simple_value_printing() {
let input = "1;";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(0, &mut buffer);
printer.render(&stmts);
@ -34,7 +34,7 @@ fn test_simple_value_printing() {
#[test]
fn test_simple_selector_printing() {
let input = "foo.bar.quux;";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(0, &mut buffer);
printer.render(&stmts);
@ -45,7 +45,7 @@ fn test_simple_selector_printing() {
#[test]
fn test_simple_quoted_printing() {
let input = "\"foo\";";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(0, &mut buffer);
printer.render(&stmts);
@ -56,7 +56,7 @@ fn test_simple_quoted_printing() {
#[test]
fn test_escaped_quoted_printing() {
let input = "\"f\\\\o\\\"o\";";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(0, &mut buffer);
printer.render(&stmts);
@ -67,7 +67,7 @@ fn test_escaped_quoted_printing() {
#[test]
fn test_empty_tuple_printing() {
let input = "{};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -78,7 +78,7 @@ fn test_empty_tuple_printing() {
#[test]
fn test_empty_list_printing() {
let input = "[];";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -89,7 +89,7 @@ fn test_empty_list_printing() {
#[test]
fn test_non_empty_tuple_printing() {
let input = "{\n foo = 1,\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -100,7 +100,7 @@ fn test_non_empty_tuple_printing() {
#[test]
fn test_nested_empty_tuple_printing() {
let input = "{\n foo = {},\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -111,7 +111,7 @@ fn test_nested_empty_tuple_printing() {
#[test]
fn test_list_nested_empty_tuple_printing() {
let input = "[\n {},\n];";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -122,7 +122,7 @@ fn test_list_nested_empty_tuple_printing() {
#[test]
fn test_nested_non_empty_tuple_printing() {
let input = "{\n foo = {\n bar = 1,\n },\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -133,7 +133,7 @@ fn test_nested_non_empty_tuple_printing() {
#[test]
fn test_nested_non_empty_list_printing() {
let input = "[\n [\n 1,\n ],\n];";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -144,7 +144,7 @@ fn test_nested_non_empty_list_printing() {
#[test]
fn test_simple_quoted_field_tuple_printing() {
let input = "{\n \"foo\" = {\n bar = 1,\n },\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -158,7 +158,7 @@ fn test_simple_quoted_field_tuple_printing() {
#[test]
fn test_special_quoted_field_tuple_printing() {
let input = "{\n \"foo bar\" = {\n bar = 1,\n },\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -169,7 +169,7 @@ fn test_special_quoted_field_tuple_printing() {
#[test]
fn test_let_statement_printing() {
let input = "let tpl = {\n \"foo bar\" = {\n bar = 1,\n },\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -180,7 +180,7 @@ fn test_let_statement_printing() {
#[test]
fn test_call_expr_printing() {
let input = "call(\n foo,\n bar,\n);";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -191,7 +191,7 @@ fn test_call_expr_printing() {
#[test]
fn test_call_expr_one_arg_printing() {
let input = "call(foo);";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -202,7 +202,7 @@ fn test_call_expr_one_arg_printing() {
#[test]
fn test_copy_expr_printing() {
let input = "copy{\n foo = 1,\n bar = 2,\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -213,7 +213,7 @@ fn test_copy_expr_printing() {
#[test]
fn test_copy_expr_one_arg_printing() {
let input = "copy{\n foo = 1,\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -224,7 +224,7 @@ fn test_copy_expr_one_arg_printing() {
#[test]
fn test_out_expr_printing() {
let input = "out json {\n foo = 1,\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -235,7 +235,7 @@ fn test_out_expr_printing() {
#[test]
fn test_select_expr_no_default_printing() {
let input = "select true, {\n true = 1,\n false = 2,\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -246,7 +246,7 @@ fn test_select_expr_no_default_printing() {
#[test]
fn test_select_expr_with_default_printing() {
let input = "select true, 3, {\n true = 1,\n false = 2,\n};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -257,7 +257,7 @@ fn test_select_expr_with_default_printing() {
#[test]
fn test_not_expr_printing() {
let input = "not true;";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -268,7 +268,7 @@ fn test_not_expr_printing() {
#[test]
fn test_fail_expr_printing() {
let input = "fail \"AHHh\";";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -279,7 +279,7 @@ fn test_fail_expr_printing() {
#[test]
fn test_trace_expr_printing() {
let input = "TRACE \"AHHh\";";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -300,7 +300,7 @@ fn test_module_no_out_expr_printing() {
\"cpu_count\" = mod.cpu,
};
};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -321,7 +321,7 @@ fn test_module_with_out_expr_printing() {
\"cpu_count\" = mod.cpu,
};
};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -335,7 +335,7 @@ fn test_func_expr_printing() {
foo = foo,
bar = bar,
};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -348,7 +348,7 @@ fn test_func_expr_single_arg_printing() {
let input = "let f = func (foo) => {
foo = foo,
};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -361,7 +361,7 @@ fn test_format_expr_single_arg_printing() {
let input = "\"what? @{item.foo}\" % {
foo = 1,
};";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);
@ -374,7 +374,7 @@ fn test_format_expr_list_arg_printing() {
let input = "\"what? @ @\" % (
1,
2);";
let stmts = assert_parse(input);
let stmts = assert_parse(input, None);
let mut buffer: Vec<u8> = Vec::new();
let mut printer = AstPrinter::new(2, &mut buffer);
printer.render(&stmts);

View File

@ -26,7 +26,7 @@ use ucglib::iter::OffsetStrIter;
use ucglib::parse::*;
fn do_parse(i: &str) {
parse(OffsetStrIter::new(i));
parse(OffsetStrIter::new(i), None);
}
fn parse_int(b: &mut Bencher) {

View File

@ -303,7 +303,7 @@ impl<'a> FileBuilder<'a> {
}
fn eval_input(&mut self, input: OffsetStrIter) -> Result<Rc<Val>, Box<dyn Error>> {
match parse(input.clone()) {
match parse(input.clone(), None) {
Ok(stmts) => {
//panic!("Successfully parsed {}", input);
let mut out: Option<Rc<Val>> = None;

View File

@ -28,6 +28,8 @@ use crate::error::StackPrinter;
use crate::iter::OffsetStrIter;
use crate::tokenizer::*;
pub use crate::tokenizer::{CommentGroup, CommentMap};
type ParseResult<'a, O> = Result<SliceIter<'a, Token>, O>;
#[cfg(feature = "tracing")]
@ -853,8 +855,11 @@ fn statement(i: SliceIter<Token>) -> Result<SliceIter<Token>, Statement> {
//trace_macros!(false);
/// Parses a LocatedSpan into a list of Statements or an `error::Error`.
pub fn parse<'a>(input: OffsetStrIter<'a>) -> std::result::Result<Vec<Statement>, String> {
match tokenize(input.clone(), true) {
pub fn parse<'a>(
input: OffsetStrIter<'a>,
comment_map: Option<&mut CommentMap>,
) -> std::result::Result<Vec<Statement>, String> {
match tokenize(input.clone(), comment_map) {
Ok(tokenized) => {
let mut out = Vec::new();
let mut i_ = SliceIter::new(&tokenized);

View File

@ -23,6 +23,9 @@ use crate::ast::*;
use crate::error::StackPrinter;
use crate::iter::OffsetStrIter;
pub type CommentGroup = Vec<Token>;
pub type CommentMap = std::collections::HashMap<usize, CommentGroup>;
fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, u8> {
let mut _i = i.clone();
let c = match _i.next() {
@ -452,12 +455,16 @@ fn token<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, Token> {
}
/// Consumes an input OffsetStrIter and returns either a Vec<Token> or a error::Error.
/// If a comment_map is passed in then it will store the comments indexed by their
/// line number.
pub fn tokenize<'a>(
input: OffsetStrIter<'a>,
skip_comments: bool,
mut comment_map: Option<&mut CommentMap>,
) -> std::result::Result<Vec<Token>, String> {
let mut out = Vec::new();
let mut i = input.clone();
let mut comment_group = Vec::new();
let mut comment_was_last: Option<Token> = None;
loop {
if let Result::Complete(_, _) = eoi(i.clone()) {
break;
@ -489,14 +496,38 @@ pub fn tokenize<'a>(
}
Result::Complete(rest, tok) => {
i = rest;
if (skip_comments && tok.typ == TokenType::COMMENT) || tok.typ == TokenType::WS {
// we skip comments and whitespace
continue;
match (&mut comment_map, &tok.typ) {
// variants with a comment_map
(&mut Some(_), &TokenType::COMMENT) => {
comment_group.push(tok.clone());
comment_was_last = Some(tok.clone());
continue;
}
(&mut Some(ref mut map), _) => {
out.push(tok);
if let Some(tok) = comment_was_last {
map.insert(tok.pos.line, comment_group);
comment_group = Vec::new();
}
}
// variants without a comment_map
(None, TokenType::WS) | (None, TokenType::COMMENT) => continue,
(None, _) => {
out.push(tok);
}
}
out.push(tok);
comment_was_last = None;
}
}
}
// if we had a comments at the end then we need to do a final
// insert into our map.
if let Some(ref mut map) = comment_map {
if let Some(ref tok) = comment_group.last() {
let line = tok.pos.line;
map.insert(line, comment_group);
}
}
// ensure that we always have an END token to go off of.
out.push(Token {
fragment: String::new(),

View File

@ -89,7 +89,7 @@ fn test_string_with_escaping() {
#[test]
fn test_tokenize_bareword_with_dash() {
let input = OffsetStrIter::new("foo-bar ");
let result = tokenize(input.clone(), true);
let result = tokenize(input.clone(), None);
assert!(result.is_ok(), format!("result {:?} is not ok", result));
if let Ok(toks) = result {
assert_eq!(toks.len(), 2);
@ -157,7 +157,7 @@ fn test_tokenize_one_of_each() {
"map out filter assert let import func select as => [ ] { } ; = % / * \
+ - . ( ) , 1 . foo \"bar\" // comment\n ; true false == < > <= >= !=",
);
let result = tokenize(input.clone(), true);
let result = tokenize(input.clone(), None);
assert!(result.is_ok(), format!("result {:?} is not ok", result));
let v = result.unwrap();
for (i, t) in v.iter().enumerate() {
@ -170,7 +170,7 @@ fn test_tokenize_one_of_each() {
#[test]
fn test_parse_has_end() {
let input = OffsetStrIter::new("foo");
let result = tokenize(input.clone(), true);
let result = tokenize(input.clone(), None);
assert!(result.is_ok());
let v = result.unwrap();
assert_eq!(v.len(), 2);