mirror of
https://github.com/zaphar/ucg.git
synced 2025-07-22 18:19:54 -04:00
852 lines
24 KiB
Rust
852 lines
24 KiB
Rust
// Copyright 2017 Jeremy Wall <jeremy@marzhillstudios.com>
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
//! The tokenization stage of the ucg compiler.
|
|
use nom_locate::LocatedSpan;
|
|
use nom;
|
|
use nom::{alpha, digit, is_alphanumeric, multispace};
|
|
use nom::{InputIter, InputLength, Slice};
|
|
use ast::*;
|
|
use std;
|
|
use std::result::Result;
|
|
|
|
pub type Span<'a> = LocatedSpan<&'a str>;
|
|
|
|
impl<'a> From<Span<'a>> for Position {
|
|
fn from(s: Span) -> Position {
|
|
Position {
|
|
line: s.line as usize,
|
|
column: s.get_column() as usize,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn is_symbol_char(c: char) -> bool {
|
|
is_alphanumeric(c as u8) || c == '-' as char || c == '_' as char
|
|
}
|
|
|
|
fn escapequoted(input: Span) -> nom::IResult<Span, String> {
|
|
// loop until we find a " that is not preceded by \.
|
|
// Collapse all \<char> to just char for escaping.
|
|
let mut frag = String::new();
|
|
let mut escape = false;
|
|
for (i, c) in input.iter_indices() {
|
|
if c == '\\' && !escape {
|
|
// eat this slash and set our escaping sentinel
|
|
escape = true;
|
|
} else if c == '"' && !escape {
|
|
// Bail if this is an unescaped "
|
|
// we exit here.
|
|
return nom::IResult::Done(input.slice(i..), frag);
|
|
} else {
|
|
// we accumulate this character.
|
|
frag.push(c);
|
|
escape = false; // reset our escaping sentinel
|
|
}
|
|
}
|
|
return nom::IResult::Incomplete(nom::Needed::Unknown);
|
|
}
|
|
|
|
// TODO(jwall): Handle escapes
|
|
named!(strtok( Span ) -> Token,
|
|
do_parse!(
|
|
span: position!() >>
|
|
tag!("\"") >>
|
|
frag: escapequoted >>
|
|
tag!("\"") >>
|
|
(Token{
|
|
typ: TokenType::QUOTED,
|
|
pos: Position::from(span),
|
|
fragment: frag,
|
|
})
|
|
)
|
|
);
|
|
|
|
named!(barewordtok( Span ) -> Token,
|
|
do_parse!(
|
|
span: position!() >>
|
|
frag: preceded!(peek!(alpha), take_while!(is_symbol_char)) >>
|
|
(Token{
|
|
typ: TokenType::BAREWORD,
|
|
pos: Position::from(span),
|
|
fragment: frag.fragment.to_string(),
|
|
})
|
|
)
|
|
);
|
|
|
|
named!(digittok( Span ) -> Token,
|
|
do_parse!(
|
|
span: position!() >>
|
|
digits: digit >>
|
|
(Token{
|
|
typ: TokenType::DIGIT,
|
|
pos: Position::from(span),
|
|
fragment: digits.fragment.to_string(),
|
|
})
|
|
)
|
|
);
|
|
|
|
named!(booleantok( Span ) -> Token,
|
|
do_parse!(
|
|
span: position!() >>
|
|
b: alt!(
|
|
tag!("true") |
|
|
tag!("false")
|
|
) >>
|
|
(Token{
|
|
typ: TokenType::BOOLEAN,
|
|
pos: Position::from(span),
|
|
fragment: b.fragment.to_string(),
|
|
})
|
|
)
|
|
);
|
|
|
|
/// do_tag_tok! is a helper macro to make building a simple tag token
|
|
/// less code.
|
|
macro_rules! do_tag_tok {
|
|
// NOTE(jwall): Nom macros do magic with their inputs. They in fact
|
|
// rewrite your macro argumets for you. Which means we require this $i
|
|
// paramater even though we don't explicitely pass it below. I don't
|
|
// particularly like this but I'm living with it for now.
|
|
($i:expr, $type:expr, $tag:expr) => {
|
|
do_parse!($i,
|
|
span: position!() >>
|
|
frag: tag!($tag) >>
|
|
(Token{
|
|
typ: $type,
|
|
pos: Position::from(span),
|
|
fragment: frag.fragment.to_string(),
|
|
})
|
|
)
|
|
}
|
|
}
|
|
|
|
named!(emptytok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::EMPTY, "NULL")
|
|
);
|
|
|
|
named!(commatok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, ",")
|
|
);
|
|
|
|
named!(lbracetok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "{")
|
|
);
|
|
|
|
named!(rbracetok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "}")
|
|
);
|
|
|
|
named!(lparentok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "(")
|
|
);
|
|
|
|
named!(rparentok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, ")")
|
|
);
|
|
|
|
named!(dottok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, ".")
|
|
);
|
|
|
|
named!(plustok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "+")
|
|
);
|
|
|
|
named!(dashtok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "-")
|
|
);
|
|
|
|
named!(startok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "*")
|
|
);
|
|
|
|
named!(slashtok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "/")
|
|
);
|
|
|
|
named!(pcttok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "%")
|
|
);
|
|
|
|
named!(equaltok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "=")
|
|
);
|
|
|
|
named!(semicolontok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, ";")
|
|
);
|
|
|
|
named!(leftsquarebracket( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "[")
|
|
);
|
|
|
|
named!(rightsquarebracket( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "]")
|
|
);
|
|
|
|
named!(fatcommatok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::PUNCT, "=>")
|
|
);
|
|
|
|
named!(lettok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "let")
|
|
);
|
|
|
|
named!(selecttok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "select")
|
|
);
|
|
|
|
named!(macrotok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "macro")
|
|
);
|
|
|
|
named!(importtok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "import")
|
|
);
|
|
|
|
named!(astok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "as")
|
|
);
|
|
|
|
named!(maptok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "map")
|
|
);
|
|
|
|
named!(filtertok( Span ) -> Token,
|
|
do_tag_tok!(TokenType::BAREWORD, "filter")
|
|
);
|
|
|
|
fn end_of_input(input: Span) -> nom::IResult<Span, Token> {
|
|
match eof!(input,) {
|
|
nom::IResult::Done(_, _) => {
|
|
return nom::IResult::Done(
|
|
input,
|
|
make_tok!(EOF => input.line as usize,
|
|
input.get_column() as usize),
|
|
);
|
|
}
|
|
nom::IResult::Incomplete(_) => {
|
|
return nom::IResult::Incomplete(nom::Needed::Unknown);
|
|
}
|
|
nom::IResult::Error(e) => {
|
|
return nom::IResult::Error(e);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn comment(input: Span) -> nom::IResult<Span, Token> {
|
|
match tag!(input, "//") {
|
|
nom::IResult::Done(rest, _) => {
|
|
match alt!(
|
|
rest,
|
|
take_until_and_consume!("\r\n") | take_until_and_consume!("\n")
|
|
) {
|
|
nom::IResult::Done(rest, cmt) => {
|
|
return nom::IResult::Done(
|
|
rest,
|
|
make_tok!(CMT => cmt.fragment.to_string(),
|
|
input.line as usize,
|
|
input.get_column() as usize),
|
|
);
|
|
}
|
|
// If we didn't find a new line then we just grab everything.
|
|
_ => {
|
|
let blen = rest.input_len();
|
|
let next = rest.slice(blen..);
|
|
let tok = rest.slice(..blen);
|
|
return nom::IResult::Done(
|
|
next,
|
|
make_tok!(CMT => tok.fragment.to_string(),
|
|
input.line as usize, input.get_column() as usize
|
|
),
|
|
);
|
|
}
|
|
}
|
|
}
|
|
nom::IResult::Incomplete(i) => return nom::IResult::Incomplete(i),
|
|
nom::IResult::Error(e) => return nom::IResult::Error(e),
|
|
}
|
|
}
|
|
|
|
named!(whitespace( Span ) -> Token,
|
|
do_parse!(
|
|
span: position!() >>
|
|
many1!(multispace) >>
|
|
(Token{
|
|
typ: TokenType::WS,
|
|
pos: Position::from(span),
|
|
fragment: String::new(),
|
|
})
|
|
)
|
|
);
|
|
|
|
named!(token( Span ) -> Token,
|
|
alt!(
|
|
strtok |
|
|
emptytok | // This must come before the barewordtok
|
|
digittok |
|
|
commatok |
|
|
rbracetok |
|
|
lbracetok |
|
|
lparentok |
|
|
rparentok |
|
|
dottok |
|
|
plustok |
|
|
dashtok |
|
|
startok |
|
|
comment | // Note comment must come before slashtok
|
|
slashtok |
|
|
pcttok |
|
|
fatcommatok | // Note fatcommatok must come before equaltok
|
|
equaltok |
|
|
semicolontok |
|
|
leftsquarebracket |
|
|
rightsquarebracket |
|
|
booleantok |
|
|
lettok |
|
|
selecttok |
|
|
macrotok |
|
|
importtok |
|
|
astok |
|
|
maptok |
|
|
filtertok |
|
|
barewordtok |
|
|
whitespace |
|
|
end_of_input)
|
|
);
|
|
|
|
// TODO(jwall): This should return a ParseError instead.
|
|
|
|
/// Consumes an input Span and returns either a Vec<Token> or a nom::ErrorKind.
|
|
pub fn tokenize(input: Span) -> Result<Vec<Token>, (Position, nom::ErrorKind)> {
|
|
let mut out = Vec::new();
|
|
let mut i = input;
|
|
loop {
|
|
if i.input_len() == 0 {
|
|
break;
|
|
}
|
|
match token(i) {
|
|
nom::IResult::Error(e) => {
|
|
return Err((
|
|
Position {
|
|
line: i.line as usize,
|
|
column: i.get_column() as usize,
|
|
},
|
|
e,
|
|
));
|
|
}
|
|
nom::IResult::Incomplete(_) => {
|
|
return Err((
|
|
Position {
|
|
line: i.line as usize,
|
|
column: i.get_column() as usize,
|
|
},
|
|
nom::ErrorKind::Complete,
|
|
));
|
|
}
|
|
nom::IResult::Done(rest, tok) => {
|
|
i = rest;
|
|
if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS {
|
|
// we skip comments and whitespace
|
|
continue;
|
|
}
|
|
out.push(tok);
|
|
}
|
|
}
|
|
}
|
|
// ensure that we always have an END token to go off of.
|
|
out.push(Token {
|
|
fragment: String::new(),
|
|
typ: TokenType::END,
|
|
pos: Position {
|
|
line: i.line as usize,
|
|
column: i.get_column() as usize,
|
|
},
|
|
});
|
|
Ok(out)
|
|
}
|
|
|
|
pub fn token_clone(t: &Token) -> Result<Token, ParseError> {
|
|
Ok(t.clone())
|
|
}
|
|
|
|
/// nom macro that matches a Token by type and uses an optional conversion handler
|
|
/// for the matched Token.
|
|
macro_rules! match_type {
|
|
($i:expr, BOOLEAN => $h:expr) => {
|
|
match_type!($i, TokenType::BOOLEAN, "Not a Boolean", $h)
|
|
};
|
|
|
|
($i:expr, BOOLEAN) => {
|
|
match_type!($i, BOOLEAN => token_clone)
|
|
};
|
|
|
|
($i:expr, COMMENT => $h:expr) => {
|
|
match_type!($i, TokenType::COMMENT, "Not a Comment", $h)
|
|
};
|
|
|
|
($i:expr, COMMENT) => {
|
|
match_type!($i, COMMENT => token_clone)
|
|
};
|
|
|
|
($i:expr, BAREWORD => $h:expr) => {
|
|
match_type!($i, TokenType::BAREWORD, "Not a Bareword", $h)
|
|
};
|
|
|
|
($i:expr, BAREWORD) => {
|
|
match_type!($i, BAREWORD => token_clone)
|
|
};
|
|
|
|
($i:expr, EMPTY => $h:expr) => {
|
|
match_type!($i, TokenType::EMPTY, "Not NULL", $h)
|
|
};
|
|
|
|
($i:expr, EMPTY) => {
|
|
match_type!($i, EMPTY => token_clone)
|
|
};
|
|
|
|
($i:expr, STR => $h:expr) => {
|
|
match_type!($i, TokenType::QUOTED, "Not a String", $h)
|
|
};
|
|
|
|
($i:expr, STR) => {
|
|
match_type!($i, STR => token_clone)
|
|
};
|
|
|
|
($i:expr, DIGIT => $h:expr) => {
|
|
match_type!($i, TokenType::DIGIT, "Not a DIGIT", $h)
|
|
};
|
|
|
|
($i:expr, DIGIT) => {
|
|
match_type!($i, DIGIT => token_clone)
|
|
};
|
|
|
|
($i:expr, PUNCT => $h:expr) => {
|
|
match_type!($i, TokenType::PUNCT, "Not PUNCTUATION", $h)
|
|
};
|
|
|
|
($i:expr, PUNCT) => {
|
|
match_type!($i, PUNCT => token_clone)
|
|
};
|
|
|
|
($i:expr, $t:expr, $msg:expr, $h:expr) => {
|
|
{
|
|
let i_ = $i.clone();
|
|
use nom::Slice;
|
|
use std::convert::Into;
|
|
if i_.input_len() == 0 {
|
|
nom::IResult::Error(
|
|
nom::ErrorKind::Custom(ParseError{
|
|
description: format!("End of Input! {}", $msg),
|
|
pos: Position{line: 0, column: 0}
|
|
}))
|
|
} else {
|
|
let tok = &(i_[0]);
|
|
if tok.typ == $t {
|
|
match $h(tok) {
|
|
Result::Ok(v) => nom::IResult::Done($i.slice(1..), v),
|
|
Result::Err(e) => nom::IResult::Error(
|
|
nom::ErrorKind::Custom(e.into())),
|
|
}
|
|
} else {
|
|
nom::IResult::Error(nom::ErrorKind::Custom(ParseError{
|
|
description: $msg.to_string(),
|
|
pos: tok.pos.clone()}))
|
|
}
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// nom style macro that matches various Tokens by type and value and allows optional
|
|
/// conversion handlers for the matched Token.
|
|
macro_rules! match_token {
|
|
($i:expr, PUNCT => $f:expr) => {
|
|
match_token!($i, PUNCT => $f, token_clone)
|
|
};
|
|
|
|
($i:expr, PUNCT => $f:expr, $h:expr) => {
|
|
match_token!($i, TokenType::PUNCT, $f, format!("Not PUNCT ({})", $f), $h)
|
|
};
|
|
|
|
($i:expr, BAREWORD => $f:expr) => {
|
|
match_token!($i, BAREWORD => $f, token_clone)
|
|
};
|
|
|
|
($i:expr, BAREWORD => $f:expr, $h:expr) => {
|
|
match_token!($i, TokenType::BAREWORD, $f, format!("Not a BAREWORD ({})", $f), $h)
|
|
};
|
|
|
|
($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {
|
|
{
|
|
let i_ = $i.clone();
|
|
use nom::Slice;
|
|
use std::convert::Into;
|
|
let tok = &(i_[0]);
|
|
if tok.typ == $t && &tok.fragment == $f {
|
|
match $h(tok) {
|
|
Result::Ok(v) => nom::IResult::Done($i.slice(1..), v),
|
|
Result::Err(e) => nom::IResult::Error(
|
|
nom::ErrorKind::Custom(e.into())),
|
|
}
|
|
} else {
|
|
nom::IResult::Error(nom::ErrorKind::Custom(ParseError{
|
|
description: format!("{} Instead is ({})", $msg, tok.fragment),
|
|
pos: tok.pos.clone()}))
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
/// nom style macro that matches punctuation Tokens.
|
|
macro_rules! punct {
|
|
($i:expr, $c:expr) => {
|
|
match_token!($i, PUNCT => $c)
|
|
};
|
|
}
|
|
|
|
/// nom style macro that matches any bareword Token.
|
|
macro_rules! word {
|
|
($i:expr, $w:expr) => {
|
|
match_token!($i, BAREWORD => $w)
|
|
};
|
|
}
|
|
|
|
/// pos gets the current position from a TokenIter input without consuming it.
|
|
pub fn pos(i: TokenIter) -> nom::IResult<TokenIter, Position, ParseError> {
|
|
let tok = &i[0];
|
|
let line = tok.pos.line;
|
|
let column = tok.pos.column;
|
|
nom::IResult::Done(
|
|
i.clone(),
|
|
Position {
|
|
line: line,
|
|
column: column,
|
|
},
|
|
)
|
|
}
|
|
|
|
/// TokenIter wraps a slice of Tokens and implements the various necessary
|
|
/// nom traits to use it as an input to nom parsers.
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub struct TokenIter<'a> {
|
|
pub source: &'a [Token],
|
|
}
|
|
|
|
impl<'a> TokenIter<'a> {
|
|
pub fn len(&self) -> usize {
|
|
self.source.len()
|
|
}
|
|
}
|
|
|
|
impl<'a> nom::InputLength for TokenIter<'a> {
|
|
fn input_len(&self) -> usize {
|
|
self.source.input_len()
|
|
}
|
|
}
|
|
|
|
macro_rules! impl_token_iter_slice {
|
|
($r:ty) => {
|
|
impl<'a> nom::Slice<$r> for TokenIter<'a> {
|
|
fn slice(&self, range: $r) -> Self {
|
|
TokenIter {
|
|
source: self.source.slice(range),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl_token_iter_slice!(std::ops::Range<usize>);
|
|
impl_token_iter_slice!(std::ops::RangeTo<usize>);
|
|
impl_token_iter_slice!(std::ops::RangeFrom<usize>);
|
|
impl_token_iter_slice!(std::ops::RangeFull);
|
|
|
|
impl<'a> std::ops::Index<usize> for TokenIter<'a> {
|
|
type Output = Token;
|
|
|
|
fn index(&self, i: usize) -> &Self::Output {
|
|
&self.source[i]
|
|
}
|
|
}
|
|
|
|
impl<'a> InputIter for TokenIter<'a> {
|
|
type Item = &'a Token;
|
|
type RawItem = Token;
|
|
|
|
type Iter = std::iter::Enumerate<std::slice::Iter<'a, Self::RawItem>>;
|
|
type IterElem = std::slice::Iter<'a, Self::RawItem>;
|
|
|
|
fn iter_indices(&self) -> Self::Iter {
|
|
self.source.iter().enumerate()
|
|
}
|
|
|
|
fn iter_elements(&self) -> Self::IterElem {
|
|
self.source.iter()
|
|
}
|
|
|
|
fn position<P>(&self, predicate: P) -> Option<usize>
|
|
where
|
|
P: Fn(Self::RawItem) -> bool,
|
|
{
|
|
for (o, v) in self.iter_indices() {
|
|
if predicate(v.clone()) {
|
|
return Some(o);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
fn slice_index(&self, count: usize) -> Option<usize> {
|
|
let mut cnt = 0;
|
|
for (index, _) in self.iter_indices() {
|
|
if cnt == count {
|
|
return Some(index);
|
|
}
|
|
cnt += 1;
|
|
}
|
|
if cnt == count {
|
|
return Some(self.len());
|
|
}
|
|
None
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tokenizer_test {
|
|
use super::*;
|
|
use nom;
|
|
use nom_locate::LocatedSpan;
|
|
|
|
#[test]
|
|
fn test_empty_token() {
|
|
let result = emptytok(LocatedSpan::new("NULL"));
|
|
assert!(result.is_done(), format!("result {:?} is not done", result));
|
|
if let nom::IResult::Done(_, tok) = result {
|
|
assert_eq!(tok.fragment, "NULL");
|
|
assert_eq!(tok.typ, TokenType::EMPTY);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_escape_quoted() {
|
|
let result = escapequoted(LocatedSpan::new("foo \\\"bar\""));
|
|
assert!(result.is_done(), format!("result {:?} is not ok", result));
|
|
if let nom::IResult::Done(rest, frag) = result {
|
|
assert_eq!(frag, "foo \"bar");
|
|
assert_eq!(rest.fragment, "\"");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_string_with_escaping() {
|
|
let result = strtok(LocatedSpan::new("\"foo \\\\ \\\"bar\""));
|
|
assert!(result.is_done(), format!("result {:?} is not ok", result));
|
|
if let nom::IResult::Done(_, tok) = result {
|
|
assert_eq!(tok.fragment, "foo \\ \"bar".to_string());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_tokenize_bareword_with_dash() {
|
|
let result = tokenize(LocatedSpan::new("foo-bar "));
|
|
assert!(result.is_ok(), format!("result {:?} is not ok", result));
|
|
if let Ok(toks) = result {
|
|
assert_eq!(toks.len(), 2);
|
|
assert_eq!(toks[0].fragment, "foo-bar");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_boolean() {
|
|
let result = token(LocatedSpan::new("true"));
|
|
assert!(
|
|
result.is_done(),
|
|
format!("result {:?} is not a boolean", result)
|
|
);
|
|
if let nom::IResult::Done(_, tok) = result {
|
|
assert_eq!(tok.fragment, "true");
|
|
assert_eq!(tok.typ, TokenType::BOOLEAN);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_tokenize_one_of_each() {
|
|
let result = tokenize(LocatedSpan::new(
|
|
"let import macro select as => [ ] { } ; = % / * \
|
|
+ - . ( ) , 1 . foo \"bar\" // comment\n ; true false",
|
|
));
|
|
assert!(result.is_ok(), format!("result {:?} is not ok", result));
|
|
let v = result.unwrap();
|
|
for (i, t) in v.iter().enumerate() {
|
|
println!("{}: {:?}", i, t);
|
|
}
|
|
assert_eq!(v.len(), 29);
|
|
assert_eq!(v[28].typ, TokenType::END);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_has_end() {
|
|
let result = tokenize(LocatedSpan::new("foo"));
|
|
assert!(result.is_ok());
|
|
let v = result.unwrap();
|
|
assert_eq!(v.len(), 2);
|
|
assert_eq!(v[1].typ, TokenType::END);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_comment() {
|
|
assert!(comment(LocatedSpan::new("// comment\n")).is_done());
|
|
assert!(comment(LocatedSpan::new("// comment")).is_done());
|
|
assert_eq!(
|
|
comment(LocatedSpan::new("// comment\n")),
|
|
nom::IResult::Done(
|
|
LocatedSpan {
|
|
fragment: "",
|
|
offset: 11,
|
|
line: 2,
|
|
},
|
|
Token {
|
|
typ: TokenType::COMMENT,
|
|
fragment: " comment".to_string(),
|
|
pos: Position { line: 1, column: 1 },
|
|
}
|
|
)
|
|
);
|
|
assert!(comment(LocatedSpan::new("// comment\r\n")).is_done());
|
|
assert_eq!(
|
|
comment(LocatedSpan::new("// comment\r\n")),
|
|
nom::IResult::Done(
|
|
LocatedSpan {
|
|
fragment: "",
|
|
offset: 12,
|
|
line: 2,
|
|
},
|
|
Token {
|
|
typ: TokenType::COMMENT,
|
|
fragment: " comment".to_string(),
|
|
pos: Position { column: 1, line: 1 },
|
|
}
|
|
)
|
|
);
|
|
assert!(comment(LocatedSpan::new("// comment\r\n ")).is_done());
|
|
assert_eq!(
|
|
comment(LocatedSpan::new("// comment\r\n ")),
|
|
nom::IResult::Done(
|
|
LocatedSpan {
|
|
fragment: " ",
|
|
offset: 12,
|
|
line: 2,
|
|
},
|
|
Token {
|
|
typ: TokenType::COMMENT,
|
|
fragment: " comment".to_string(),
|
|
pos: Position { column: 1, line: 1 },
|
|
}
|
|
)
|
|
);
|
|
assert!(comment(LocatedSpan::new("// comment")).is_done());
|
|
}
|
|
|
|
#[test]
|
|
fn test_match_word() {
|
|
let input = vec![
|
|
Token {
|
|
fragment: "foo".to_string(),
|
|
typ: TokenType::BAREWORD,
|
|
pos: Position { line: 1, column: 1 },
|
|
},
|
|
];
|
|
let result = word!(
|
|
TokenIter {
|
|
source: input.as_slice(),
|
|
},
|
|
"foo"
|
|
);
|
|
match result {
|
|
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
|
|
res => assert!(false, format!("Fail: {:?}", res)),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_match_word_empty_input() {
|
|
let input = vec![
|
|
Token {
|
|
fragment: "".to_string(),
|
|
typ: TokenType::END,
|
|
pos: Position { line: 1, column: 1 },
|
|
},
|
|
];
|
|
let result = word!(
|
|
TokenIter {
|
|
source: input.as_slice(),
|
|
},
|
|
"foo"
|
|
);
|
|
match result {
|
|
nom::IResult::Done(_, _) => assert!(false, "Should have been an error but was Done"),
|
|
nom::IResult::Incomplete(_) => {
|
|
assert!(false, "Should have been an error but was Incomplete")
|
|
}
|
|
nom::IResult::Error(_) => {
|
|
// noop
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_match_punct() {
|
|
let input = vec![
|
|
Token {
|
|
fragment: "!".to_string(),
|
|
typ: TokenType::PUNCT,
|
|
pos: Position { line: 1, column: 1 },
|
|
},
|
|
];
|
|
let result = punct!(
|
|
TokenIter {
|
|
source: input.as_slice(),
|
|
},
|
|
"!"
|
|
);
|
|
match result {
|
|
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
|
|
res => assert!(false, format!("Fail: {:?}", res)),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_match_type() {
|
|
let input = vec![
|
|
Token {
|
|
fragment: "foo".to_string(),
|
|
typ: TokenType::BAREWORD,
|
|
pos: Position { line: 1, column: 1 },
|
|
},
|
|
];
|
|
let result = match_type!(
|
|
TokenIter {
|
|
source: input.as_slice(),
|
|
},
|
|
BAREWORD
|
|
);
|
|
match result {
|
|
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
|
|
res => assert!(false, format!("Fail: {:?}", res)),
|
|
}
|
|
}
|
|
}
|