2017-10-02 21:32:06 -05:00
|
|
|
// Copyright 2017 Jeremy Wall <jeremy@marzhillstudios.com>
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2018-02-07 19:49:13 -06:00
|
|
|
|
2019-01-03 10:20:59 -06:00
|
|
|
//! The tokenization stage of the ucg compiler.
|
2018-02-02 15:27:33 -06:00
|
|
|
use std;
|
2017-10-02 21:32:06 -05:00
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
use abortable_parser::combinators::*;
|
|
|
|
use abortable_parser::iter::SliceIter;
|
2019-03-26 20:54:07 -04:00
|
|
|
use abortable_parser::{Error, Result};
|
2017-10-02 21:32:06 -05:00
|
|
|
|
2018-12-06 12:23:52 -06:00
|
|
|
use crate::ast::*;
|
2019-05-29 18:57:24 -05:00
|
|
|
use crate::error::BuildError;
|
2018-12-06 12:23:52 -06:00
|
|
|
use crate::iter::OffsetStrIter;
|
2018-11-05 21:34:12 -06:00
|
|
|
|
2019-05-20 21:02:51 -05:00
|
|
|
pub type CommentGroup = Vec<Token>;
|
2019-05-21 20:34:42 -05:00
|
|
|
pub type CommentMap = std::collections::BTreeMap<usize, CommentGroup>;
|
2019-05-20 21:02:51 -05:00
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, u8> {
|
|
|
|
let mut _i = i.clone();
|
|
|
|
let c = match _i.next() {
|
|
|
|
Some(c) => *c,
|
|
|
|
None => {
|
|
|
|
return Result::Fail(Error::new(
|
|
|
|
"Unexpected End of Input".to_string(),
|
|
|
|
Box::new(_i.clone()),
|
2019-01-18 19:58:57 -06:00
|
|
|
));
|
2017-10-02 21:32:06 -05:00
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
};
|
|
|
|
if (c as char).is_ascii_alphanumeric() || c == b'-' || c == b'_' {
|
|
|
|
Result::Complete(_i, c)
|
|
|
|
} else {
|
|
|
|
Result::Fail(Error::new(
|
|
|
|
"Not a symbol character".to_string(),
|
|
|
|
Box::new(_i.clone()),
|
|
|
|
))
|
2017-10-02 21:32:06 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
fn escapequoted<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, String> {
|
2018-02-05 19:35:17 -06:00
|
|
|
// loop until we find a " that is not preceded by \.
|
2019-08-12 18:10:48 -05:00
|
|
|
// Collapse all \<char> to just char for escaping exept for \n \r \t and \@.
|
2018-02-05 19:35:17 -06:00
|
|
|
let mut frag = String::new();
|
|
|
|
let mut escape = false;
|
2018-11-05 21:34:12 -06:00
|
|
|
let mut _input = input.clone();
|
|
|
|
loop {
|
|
|
|
let c = match _input.next() {
|
|
|
|
Some(c) => *c,
|
|
|
|
None => break,
|
|
|
|
};
|
2019-08-12 18:10:48 -05:00
|
|
|
if escape {
|
|
|
|
match c as char {
|
|
|
|
'n' => {
|
|
|
|
frag.push('\n');
|
|
|
|
escape = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
'r' => {
|
|
|
|
frag.push('\r');
|
|
|
|
escape = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
't' => {
|
|
|
|
frag.push('\t');
|
|
|
|
escape = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
//noop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
if c == '\\' as u8 && !escape {
|
2018-02-05 19:41:47 -06:00
|
|
|
// eat this slash and set our escaping sentinel
|
2018-02-05 19:35:17 -06:00
|
|
|
escape = true;
|
2018-11-05 21:34:12 -06:00
|
|
|
} else if c == '"' as u8 && !escape {
|
2018-02-05 19:41:47 -06:00
|
|
|
// Bail if this is an unescaped "
|
2018-02-05 19:35:17 -06:00
|
|
|
// we exit here.
|
2018-11-05 21:34:12 -06:00
|
|
|
return Result::Complete(_input, frag);
|
2018-02-05 19:35:17 -06:00
|
|
|
} else {
|
|
|
|
// we accumulate this character.
|
2018-11-05 21:34:12 -06:00
|
|
|
frag.push(c as char);
|
2018-02-05 19:35:17 -06:00
|
|
|
escape = false; // reset our escaping sentinel
|
|
|
|
}
|
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
return Result::Incomplete(_input.clone());
|
2018-02-05 19:35:17 -06:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(strtok<OffsetStrIter, Token>,
|
|
|
|
do_each!(
|
|
|
|
span => input!(),
|
|
|
|
_ => text_token!("\""),
|
|
|
|
frag => escapequoted,
|
2017-10-02 21:32:06 -05:00
|
|
|
(Token{
|
2018-02-02 15:27:33 -06:00
|
|
|
typ: TokenType::QUOTED,
|
2018-11-05 21:34:12 -06:00
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: frag.to_string(),
|
2017-10-02 21:32:06 -05:00
|
|
|
})
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(barewordtok<OffsetStrIter, Token>,
|
|
|
|
do_each!(
|
|
|
|
span => input!(),
|
|
|
|
_ => peek!(ascii_alpha),
|
|
|
|
frag => consume_all!(is_symbol_char),
|
2017-10-02 21:32:06 -05:00
|
|
|
(Token{
|
2018-02-02 15:27:33 -06:00
|
|
|
typ: TokenType::BAREWORD,
|
2018-11-05 21:34:12 -06:00
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: frag.to_string(),
|
2017-10-02 21:32:06 -05:00
|
|
|
})
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(digittok<OffsetStrIter, Token>,
|
|
|
|
do_each!(
|
|
|
|
span => input!(),
|
|
|
|
_ => peek!(ascii_digit),
|
|
|
|
digits => consume_all!(ascii_digit),
|
|
|
|
(Token{
|
|
|
|
typ: TokenType::DIGIT,
|
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: digits.to_string(),
|
|
|
|
})
|
2017-10-02 21:32:06 -05:00
|
|
|
)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(booleantok<OffsetStrIter, Token>,
|
|
|
|
do_each!(
|
|
|
|
span => input!(),
|
|
|
|
token => either!(
|
|
|
|
text_token!("true"),
|
|
|
|
text_token!("false")
|
|
|
|
),
|
2018-03-22 20:09:38 -05:00
|
|
|
(Token{
|
|
|
|
typ: TokenType::BOOLEAN,
|
2018-11-05 21:34:12 -06:00
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: token.to_string(),
|
2018-03-22 20:09:38 -05:00
|
|
|
})
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
/// do_text_token_tok! is a helper macro to make building a simple text_token token
|
2017-10-02 21:32:06 -05:00
|
|
|
/// less code.
|
2018-11-05 21:34:12 -06:00
|
|
|
macro_rules! do_text_token_tok {
|
|
|
|
($i:expr, $type:expr, $text_token:expr, WS) => {
|
|
|
|
do_each!($i,
|
2019-01-07 19:27:29 -06:00
|
|
|
span => input!(),
|
|
|
|
frag => text_token!($text_token),
|
|
|
|
_ => either!(whitespace, comment),
|
|
|
|
(Token {
|
|
|
|
typ: $type,
|
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: frag.to_string(),
|
|
|
|
})
|
|
|
|
)
|
2018-05-30 22:07:25 -05:00
|
|
|
};
|
2018-11-05 21:34:12 -06:00
|
|
|
|
|
|
|
($i:expr, $type:expr, $text_token:expr) => {
|
|
|
|
do_each!($i,
|
2019-01-07 19:27:29 -06:00
|
|
|
span => input!(),
|
|
|
|
frag => text_token!($text_token),
|
|
|
|
(Token {
|
|
|
|
typ: $type,
|
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: frag.to_string(),
|
|
|
|
})
|
|
|
|
)
|
2018-05-14 21:34:38 -05:00
|
|
|
};
|
2017-10-02 21:32:06 -05:00
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(emptytok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::EMPTY, "NULL")
|
2018-03-12 20:29:31 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(commatok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ",")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(lbracetok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "{")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(rbracetok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "}")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(lparentok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "(")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(rparentok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ")")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(dottok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ".")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(plustok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "+")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(dashtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "-")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(startok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "*")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(slashtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "/")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2019-02-18 21:09:42 -06:00
|
|
|
make_fn!(modulustok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "%%")
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(pcttok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "%")
|
2017-11-05 15:26:52 -06:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(eqeqtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "==")
|
2018-03-24 08:58:16 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(notequaltok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "!=")
|
2018-03-24 08:58:16 -05:00
|
|
|
);
|
|
|
|
|
2019-01-07 19:27:29 -06:00
|
|
|
make_fn!(matchtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "~")
|
|
|
|
);
|
|
|
|
|
|
|
|
make_fn!(notmatchtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "!~")
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(gttok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ">")
|
2018-03-24 08:58:16 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(gtequaltok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ">=")
|
2018-03-24 08:58:16 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(ltequaltok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "<=")
|
2018-03-24 08:58:16 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(lttok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "<")
|
2018-03-24 08:58:16 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(equaltok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "=")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(semicolontok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ";")
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2019-01-10 18:38:14 -06:00
|
|
|
make_fn!(colontok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, ":")
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(leftsquarebracket<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "[")
|
2017-11-26 12:22:58 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(rightsquarebracket<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "]")
|
2017-11-26 12:22:58 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(fatcommatok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "=>")
|
2017-12-24 15:24:06 -05:00
|
|
|
);
|
|
|
|
|
2019-01-17 19:20:01 -06:00
|
|
|
make_fn!(andtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "&&")
|
|
|
|
);
|
|
|
|
|
|
|
|
make_fn!(ortok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::PUNCT, "||")
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(selecttok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "select", WS)
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2019-01-03 11:42:11 -06:00
|
|
|
make_fn!(intok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "in", WS)
|
|
|
|
);
|
|
|
|
|
2019-01-18 18:44:29 -06:00
|
|
|
make_fn!(istok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "is", WS)
|
|
|
|
);
|
|
|
|
|
2019-01-24 16:53:02 -06:00
|
|
|
make_fn!(nottok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "not", WS)
|
|
|
|
);
|
|
|
|
|
2019-04-26 19:08:41 -05:00
|
|
|
make_fn!(tracetok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "TRACE", WS)
|
|
|
|
);
|
|
|
|
|
2019-01-19 11:18:24 -06:00
|
|
|
make_fn!(failtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "fail", WS)
|
|
|
|
);
|
|
|
|
|
2019-01-24 20:04:40 -06:00
|
|
|
make_fn!(functok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "func", WS)
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-23 12:50:47 -06:00
|
|
|
make_fn!(moduletok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "module", WS)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(lettok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "let", WS)
|
2018-05-30 23:00:50 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(importtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "import", WS)
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2019-01-04 10:01:49 -06:00
|
|
|
make_fn!(includetok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "include", WS)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(asserttok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "assert", WS)
|
2018-05-30 23:00:50 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(outtok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "out", WS)
|
2018-08-13 20:37:58 -05:00
|
|
|
);
|
|
|
|
|
2019-06-19 22:16:37 -05:00
|
|
|
make_fn!(converttok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "convert", WS)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(astok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "as", WS)
|
2017-10-02 21:32:06 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(maptok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "map", WS)
|
2018-03-15 19:08:33 -05:00
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(filtertok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "filter", WS)
|
2018-03-15 19:08:33 -05:00
|
|
|
);
|
|
|
|
|
2019-01-06 20:56:08 -06:00
|
|
|
make_fn!(reducetok<OffsetStrIter, Token>,
|
|
|
|
do_text_token_tok!(TokenType::BAREWORD, "reduce", WS)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
fn comment(input: OffsetStrIter) -> Result<OffsetStrIter, Token> {
|
|
|
|
match text_token!(input, "//") {
|
|
|
|
Result::Complete(rest, _) => {
|
|
|
|
match until!(
|
2018-02-15 19:55:43 -06:00
|
|
|
rest,
|
2018-11-05 21:34:12 -06:00
|
|
|
either!(
|
|
|
|
eoi,
|
|
|
|
discard!(text_token!("\r\n")),
|
|
|
|
discard!(text_token!("\n"))
|
|
|
|
)
|
2018-02-15 19:55:43 -06:00
|
|
|
) {
|
2018-11-05 21:34:12 -06:00
|
|
|
Result::Complete(rest, cmt) => {
|
2019-05-21 20:34:42 -05:00
|
|
|
// Eat the new lines here before continuing
|
|
|
|
let rest =
|
|
|
|
match optional!(rest, either!(text_token!("\r\n"), text_token!("\n"))) {
|
|
|
|
Result::Complete(next_rest, _) => next_rest,
|
|
|
|
_ => rest,
|
|
|
|
};
|
2018-11-05 21:34:12 -06:00
|
|
|
return Result::Complete(rest, make_tok!(CMT => cmt.to_string(), input));
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
// If we didn't find a new line then we just grab everything.
|
|
|
|
_ => {
|
2018-11-05 21:34:12 -06:00
|
|
|
return Result::Abort(Error::new(
|
|
|
|
"Unparsable comment".to_string(),
|
|
|
|
Box::new(rest.clone()),
|
|
|
|
));
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
Result::Incomplete(ctx) => return Result::Incomplete(ctx),
|
|
|
|
Result::Fail(e) => return Result::Fail(e),
|
|
|
|
Result::Abort(e) => return Result::Abort(e),
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(whitespace<OffsetStrIter, Token>,
|
|
|
|
do_each!(
|
|
|
|
span => input!(),
|
|
|
|
_ => peek!(ascii_ws),
|
|
|
|
_ => repeat!(ascii_ws),
|
2018-02-02 15:27:33 -06:00
|
|
|
(Token{
|
|
|
|
typ: TokenType::WS,
|
2018-11-05 21:34:12 -06:00
|
|
|
pos: Position::from(&span),
|
2018-02-02 15:27:33 -06:00
|
|
|
fragment: String::new(),
|
|
|
|
})
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
2018-11-05 21:34:12 -06:00
|
|
|
make_fn!(end_of_input<OffsetStrIter, Token>,
|
|
|
|
do_each!(
|
|
|
|
span => input!(),
|
|
|
|
_ => eoi,
|
|
|
|
(Token{
|
|
|
|
typ: TokenType::END,
|
|
|
|
pos: Position::from(&span),
|
|
|
|
fragment: String::new(),
|
|
|
|
})
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
|
|
|
fn token<'a>(input: OffsetStrIter<'a>) -> Result<OffsetStrIter<'a>, Token> {
|
|
|
|
either!(
|
|
|
|
input,
|
|
|
|
strtok,
|
|
|
|
emptytok, // This must come before the barewordtok
|
|
|
|
digittok,
|
|
|
|
commatok,
|
|
|
|
rbracetok,
|
|
|
|
lbracetok,
|
|
|
|
lparentok,
|
|
|
|
rparentok,
|
|
|
|
dottok,
|
2019-01-17 19:20:01 -06:00
|
|
|
andtok,
|
|
|
|
ortok,
|
2018-11-05 21:34:12 -06:00
|
|
|
plustok,
|
|
|
|
dashtok,
|
|
|
|
startok,
|
|
|
|
comment, // Note comment must come before slashtok
|
|
|
|
slashtok,
|
2019-02-18 21:09:42 -06:00
|
|
|
modulustok,
|
2018-11-05 21:34:12 -06:00
|
|
|
pcttok,
|
|
|
|
eqeqtok,
|
|
|
|
notequaltok,
|
2019-01-07 19:27:29 -06:00
|
|
|
matchtok,
|
|
|
|
notmatchtok,
|
2018-11-05 21:34:12 -06:00
|
|
|
complete!("Not >=".to_string(), gtequaltok),
|
|
|
|
complete!("Not <=".to_string(), ltequaltok),
|
|
|
|
gttok,
|
|
|
|
lttok,
|
|
|
|
fatcommatok, // Note fatcommatok must come before equaltok
|
|
|
|
equaltok,
|
|
|
|
semicolontok,
|
2019-01-10 18:38:14 -06:00
|
|
|
colontok,
|
2018-11-05 21:34:12 -06:00
|
|
|
leftsquarebracket,
|
|
|
|
rightsquarebracket,
|
|
|
|
booleantok,
|
2019-01-03 11:42:11 -06:00
|
|
|
intok,
|
2019-01-18 18:44:29 -06:00
|
|
|
istok,
|
2019-01-24 16:53:02 -06:00
|
|
|
nottok,
|
2018-11-05 21:34:12 -06:00
|
|
|
lettok,
|
|
|
|
outtok,
|
2019-06-19 22:16:37 -05:00
|
|
|
converttok,
|
2018-11-05 21:34:12 -06:00
|
|
|
selecttok,
|
|
|
|
asserttok,
|
2019-01-19 11:18:24 -06:00
|
|
|
failtok,
|
2019-04-26 19:08:41 -05:00
|
|
|
tracetok,
|
2019-01-24 20:04:40 -06:00
|
|
|
functok,
|
2018-11-23 12:50:47 -06:00
|
|
|
moduletok,
|
2018-11-05 21:34:12 -06:00
|
|
|
importtok,
|
2019-01-04 10:01:49 -06:00
|
|
|
includetok,
|
2018-11-05 21:34:12 -06:00
|
|
|
astok,
|
|
|
|
maptok,
|
|
|
|
filtertok,
|
2019-01-06 20:56:08 -06:00
|
|
|
reducetok,
|
2018-11-05 21:34:12 -06:00
|
|
|
barewordtok,
|
|
|
|
whitespace,
|
|
|
|
end_of_input
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Consumes an input OffsetStrIter and returns either a Vec<Token> or a error::Error.
|
2019-05-20 21:02:51 -05:00
|
|
|
/// If a comment_map is passed in then it will store the comments indexed by their
|
|
|
|
/// line number.
|
2019-05-14 21:03:23 -05:00
|
|
|
pub fn tokenize<'a>(
|
|
|
|
input: OffsetStrIter<'a>,
|
2019-05-20 21:02:51 -05:00
|
|
|
mut comment_map: Option<&mut CommentMap>,
|
2019-05-29 18:57:24 -05:00
|
|
|
) -> std::result::Result<Vec<Token>, BuildError> {
|
2018-02-02 15:27:33 -06:00
|
|
|
let mut out = Vec::new();
|
2018-11-05 21:34:12 -06:00
|
|
|
let mut i = input.clone();
|
2019-05-20 21:02:51 -05:00
|
|
|
let mut comment_group = Vec::new();
|
|
|
|
let mut comment_was_last: Option<Token> = None;
|
2018-02-02 15:27:33 -06:00
|
|
|
loop {
|
2018-11-05 21:34:12 -06:00
|
|
|
if let Result::Complete(_, _) = eoi(i.clone()) {
|
2018-02-02 15:27:33 -06:00
|
|
|
break;
|
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
match token(i.clone()) {
|
|
|
|
Result::Abort(e) => {
|
2019-05-29 18:57:24 -05:00
|
|
|
return Err(BuildError::from(e));
|
2018-11-05 21:34:12 -06:00
|
|
|
}
|
|
|
|
Result::Fail(e) => {
|
2019-05-29 18:57:24 -05:00
|
|
|
return Err(BuildError::from(e));
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
Result::Incomplete(_offset) => {
|
|
|
|
let err =
|
|
|
|
abortable_parser::Error::new("Invalid Token encountered", Box::new(i.clone()));
|
2019-05-29 18:57:24 -05:00
|
|
|
return Err(BuildError::from(err));
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
Result::Complete(rest, tok) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
i = rest;
|
2019-05-20 21:02:51 -05:00
|
|
|
match (&mut comment_map, &tok.typ) {
|
|
|
|
// variants with a comment_map
|
|
|
|
(&mut Some(_), &TokenType::COMMENT) => {
|
|
|
|
comment_group.push(tok.clone());
|
|
|
|
comment_was_last = Some(tok.clone());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
(&mut Some(ref mut map), _) => {
|
2019-05-21 20:34:42 -05:00
|
|
|
if tok.typ != TokenType::WS {
|
|
|
|
out.push(tok);
|
|
|
|
}
|
2019-05-20 21:02:51 -05:00
|
|
|
if let Some(tok) = comment_was_last {
|
|
|
|
map.insert(tok.pos.line, comment_group);
|
|
|
|
comment_group = Vec::new();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// variants without a comment_map
|
|
|
|
(None, TokenType::WS) | (None, TokenType::COMMENT) => continue,
|
|
|
|
(None, _) => {
|
|
|
|
out.push(tok);
|
|
|
|
}
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
2019-05-20 21:02:51 -05:00
|
|
|
comment_was_last = None;
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
}
|
2017-12-24 15:24:06 -05:00
|
|
|
}
|
2019-05-20 21:02:51 -05:00
|
|
|
// if we had a comments at the end then we need to do a final
|
|
|
|
// insert into our map.
|
|
|
|
if let Some(ref mut map) = comment_map {
|
|
|
|
if let Some(ref tok) = comment_group.last() {
|
|
|
|
let line = tok.pos.line;
|
|
|
|
map.insert(line, comment_group);
|
|
|
|
}
|
|
|
|
}
|
2018-02-02 15:27:33 -06:00
|
|
|
// ensure that we always have an END token to go off of.
|
|
|
|
out.push(Token {
|
|
|
|
fragment: String::new(),
|
|
|
|
typ: TokenType::END,
|
2018-11-05 21:34:12 -06:00
|
|
|
pos: Position::from(&i),
|
2018-02-02 15:27:33 -06:00
|
|
|
});
|
|
|
|
Ok(out)
|
2017-12-24 15:24:06 -05:00
|
|
|
}
|
|
|
|
|
2018-07-14 22:56:47 -05:00
|
|
|
/// Clones a token.
|
|
|
|
///
|
|
|
|
/// This is necessary to allow the match_type and match_token macros to work.
|
2018-11-05 21:34:12 -06:00
|
|
|
pub fn token_clone(t: &Token) -> std::result::Result<Token, Error<SliceIter<Token>>> {
|
2018-02-02 15:27:33 -06:00
|
|
|
Ok(t.clone())
|
|
|
|
}
|
|
|
|
|
2018-02-07 19:49:13 -06:00
|
|
|
/// nom macro that matches a Token by type and uses an optional conversion handler
|
|
|
|
/// for the matched Token.
|
2018-02-02 15:27:33 -06:00
|
|
|
macro_rules! match_type {
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,BOOLEAN => $h:expr) => {
|
2018-03-22 20:09:38 -05:00
|
|
|
match_type!($i, TokenType::BOOLEAN, "Not a Boolean", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,BOOLEAN) => {
|
2018-03-22 20:09:38 -05:00
|
|
|
match_type!($i, BOOLEAN => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,COMMENT => $h:expr) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, TokenType::COMMENT, "Not a Comment", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,COMMENT) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, COMMENT => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,BAREWORD => $h:expr) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, TokenType::BAREWORD, "Not a Bareword", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,BAREWORD) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, BAREWORD => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,EMPTY => $h:expr) => {
|
2018-03-12 20:29:31 -05:00
|
|
|
match_type!($i, TokenType::EMPTY, "Not NULL", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,EMPTY) => {
|
2018-03-12 20:29:31 -05:00
|
|
|
match_type!($i, EMPTY => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,STR => $h:expr) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, TokenType::QUOTED, "Not a String", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,STR) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, STR => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,DIGIT => $h:expr) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, TokenType::DIGIT, "Not a DIGIT", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,DIGIT) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, DIGIT => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,PUNCT => $h:expr) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, TokenType::PUNCT, "Not PUNCTUATION", $h)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,PUNCT) => {
|
2018-02-02 15:27:33 -06:00
|
|
|
match_type!($i, PUNCT => token_clone)
|
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr, $t:expr, $msg:expr, $h:expr) => {{
|
2018-11-05 21:34:12 -06:00
|
|
|
use abortable_parser::combinators::eoi;
|
|
|
|
use abortable_parser::{Error, Result};
|
|
|
|
use std;
|
|
|
|
|
|
|
|
let mut _i = $i.clone();
|
|
|
|
if eoi(_i.clone()).is_complete() {
|
|
|
|
Result::Fail(Error::new(format!("End of Input! {}", $msg), Box::new(_i)))
|
2018-05-14 21:34:38 -05:00
|
|
|
} else {
|
2018-11-05 21:34:12 -06:00
|
|
|
match _i.next() {
|
|
|
|
Some(tok) => {
|
|
|
|
if tok.typ == $t {
|
|
|
|
match $h(tok) {
|
|
|
|
std::result::Result::Ok(v) => Result::Complete(_i.clone(), v),
|
|
|
|
std::result::Result::Err(e) => {
|
|
|
|
Result::Fail(Error::caused_by($msg, Box::new(e), Box::new(_i)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Result::Fail(Error::new($msg.to_string(), Box::new($i)))
|
|
|
|
}
|
2017-12-24 15:24:06 -05:00
|
|
|
}
|
2018-11-05 21:34:12 -06:00
|
|
|
None => Result::Fail(Error::new($msg.to_string(), Box::new($i))),
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
2017-12-24 15:24:06 -05:00
|
|
|
}
|
2018-05-14 21:34:38 -05:00
|
|
|
}};
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
|
2018-02-07 19:49:13 -06:00
|
|
|
/// nom style macro that matches various Tokens by type and value and allows optional
|
|
|
|
/// conversion handlers for the matched Token.
|
2018-02-02 15:27:33 -06:00
|
|
|
macro_rules! match_token {
|
2018-07-04 12:30:29 -05:00
|
|
|
($i:expr,PUNCT => $f:expr) => {{
|
2018-12-06 12:23:52 -06:00
|
|
|
use crate::tokenizer::token_clone;
|
2018-02-02 15:27:33 -06:00
|
|
|
match_token!($i, PUNCT => $f, token_clone)
|
2018-07-04 12:30:29 -05:00
|
|
|
}};
|
2018-02-02 15:27:33 -06:00
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,PUNCT => $f:expr, $h:expr) => {
|
2018-11-05 21:34:12 -06:00
|
|
|
match_token!($i, TokenType::PUNCT, $f, format!("({})", $f), $h)
|
2018-02-02 15:27:33 -06:00
|
|
|
};
|
|
|
|
|
2018-07-04 12:30:29 -05:00
|
|
|
($i:expr,BAREWORD => $f:expr) => {{
|
2018-12-06 12:23:52 -06:00
|
|
|
use crate::tokenizer::token_clone;
|
2018-02-02 15:27:33 -06:00
|
|
|
match_token!($i, BAREWORD => $f, token_clone)
|
2018-07-04 12:30:29 -05:00
|
|
|
}};
|
2018-02-02 15:27:33 -06:00
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr,BAREWORD => $f:expr, $h:expr) => {
|
|
|
|
match_token!(
|
|
|
|
$i,
|
|
|
|
TokenType::BAREWORD,
|
|
|
|
$f,
|
2019-02-03 09:58:38 -06:00
|
|
|
format!("Expected BAREWORD but got ({})", $f),
|
2018-05-14 21:34:38 -05:00
|
|
|
$h
|
|
|
|
)
|
2018-02-02 15:27:33 -06:00
|
|
|
};
|
|
|
|
|
2018-05-14 21:34:38 -05:00
|
|
|
($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {{
|
2018-11-05 21:34:12 -06:00
|
|
|
use abortable_parser::Result;
|
|
|
|
use std;
|
|
|
|
let mut i_ = $i.clone();
|
|
|
|
let tok = i_.next();
|
|
|
|
if let Some(tok) = tok {
|
|
|
|
if tok.typ == $t && &tok.fragment == $f {
|
|
|
|
match $h(tok) {
|
|
|
|
std::result::Result::Ok(v) => Result::Complete(i_.clone(), v),
|
|
|
|
std::result::Result::Err(e) => {
|
|
|
|
Result::Fail(Error::caused_by($msg, Box::new(e), Box::new(i_)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Result::Fail(Error::new(
|
2019-02-03 09:58:38 -06:00
|
|
|
format!("Expected {} but got ({})", $msg, tok.fragment),
|
2019-02-01 19:17:31 -06:00
|
|
|
Box::new($i.clone()),
|
2018-11-05 21:34:12 -06:00
|
|
|
))
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
2018-05-14 21:34:38 -05:00
|
|
|
} else {
|
2018-11-05 21:34:12 -06:00
|
|
|
Result::Fail(Error::new("Unexpected End Of Input", Box::new(i_)))
|
2017-12-24 15:24:06 -05:00
|
|
|
}
|
2018-05-14 21:34:38 -05:00
|
|
|
}};
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
|
2018-02-07 19:49:13 -06:00
|
|
|
/// nom style macro that matches punctuation Tokens.
|
2018-02-02 15:27:33 -06:00
|
|
|
macro_rules! punct {
|
|
|
|
($i:expr, $c:expr) => {
|
|
|
|
match_token!($i, PUNCT => $c)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2018-02-07 19:49:13 -06:00
|
|
|
/// nom style macro that matches any bareword Token.
|
2018-02-02 15:27:33 -06:00
|
|
|
macro_rules! word {
|
|
|
|
($i:expr, $w:expr) => {
|
|
|
|
match_token!($i, BAREWORD => $w)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2018-02-07 19:49:13 -06:00
|
|
|
/// pos gets the current position from a TokenIter input without consuming it.
|
2018-11-05 21:34:12 -06:00
|
|
|
pub fn pos<'a>(i: SliceIter<'a, Token>) -> Result<SliceIter<'a, Token>, Position> {
|
|
|
|
let mut _i = i.clone();
|
|
|
|
let tok = _i.next().unwrap();
|
2019-03-26 20:54:07 -04:00
|
|
|
let pos = tok.pos.clone();
|
|
|
|
Result::Complete(i, pos)
|
2018-02-02 15:27:33 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2018-05-22 18:02:44 -05:00
|
|
|
mod test;
|