mirror of
https://github.com/zaphar/ucg.git
synced 2025-07-22 18:19:54 -04:00
Finish splitting the Tokenization and parsing stages.
* Added some helper matcher macros for tokens. * Added some helper assertion macros for tests. * Tokenization and Parsing are two separate stages.
This commit is contained in:
parent
b25e12608c
commit
97c97ced55
@ -8,8 +8,10 @@ readme = "README.md"
|
|||||||
keywords = ["compiler", "config"]
|
keywords = ["compiler", "config"]
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
|
|
||||||
|
[dependencies.nom]
|
||||||
|
version = "^3.2"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
nom = "^3.2"
|
|
||||||
nom_locate = "^0.1.1"
|
nom_locate = "^0.1.1"
|
||||||
clap = "~2.26.0"
|
clap = "~2.26.0"
|
||||||
|
|
||||||
|
1
examples/flags.txt
Normal file
1
examples/flags.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--db_conn1 'db1.prod.net:3306/testdb' --db_conn2 'db2.prod.net:3306/testdb' --dbconn_list --tmpldir './templates'
|
@ -20,7 +20,7 @@ let db_conns = [db_conn1.conn_string, db_conn2.conn_string];
|
|||||||
// Our server configuration.
|
// Our server configuration.
|
||||||
let server_config = {
|
let server_config = {
|
||||||
dbconn_list = db_conns,
|
dbconn_list = db_conns,
|
||||||
db_conn1 = db_conns.0,
|
db_conn1 = db_conns.0, // connection one
|
||||||
db_conn2 = db_conns.1,
|
db_conn2 = db_conns.1,
|
||||||
tmpldir = "./templates"
|
tmpldir = "./templates"
|
||||||
};
|
};
|
94
src/ast.rs
94
src/ast.rs
@ -11,6 +11,7 @@
|
|||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
use std;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::borrow::Borrow;
|
use std::borrow::Borrow;
|
||||||
use std::convert::Into;
|
use std::convert::Into;
|
||||||
@ -21,6 +22,29 @@ use std::cmp::PartialEq;
|
|||||||
use std::hash::Hasher;
|
use std::hash::Hasher;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
|
#[derive(Debug,PartialEq)]
|
||||||
|
pub struct ParseError {
|
||||||
|
pub pos: Position,
|
||||||
|
pub description: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ParseError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
|
||||||
|
write!(f,
|
||||||
|
"Parsing Error {} at line: {} column: {}",
|
||||||
|
self.description,
|
||||||
|
self.pos.line,
|
||||||
|
self.pos.column)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for ParseError {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
&self.description
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
macro_rules! enum_type_equality {
|
macro_rules! enum_type_equality {
|
||||||
( $slf:ident, $r:expr, $( $l:pat ),* ) => {
|
( $slf:ident, $r:expr, $( $l:pat ),* ) => {
|
||||||
match $slf {
|
match $slf {
|
||||||
@ -52,19 +76,33 @@ impl Position {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug,PartialEq,Eq,Clone,PartialOrd,Ord,Hash)]
|
||||||
|
pub enum TokenType {
|
||||||
|
END,
|
||||||
|
WS,
|
||||||
|
COMMENT,
|
||||||
|
QUOTED,
|
||||||
|
DIGIT,
|
||||||
|
BAREWORD,
|
||||||
|
PUNCT,
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME(jwall): We should probably implement copy for this.
|
||||||
#[derive(Debug,PartialEq,Eq,Clone,PartialOrd,Ord,Hash)]
|
#[derive(Debug,PartialEq,Eq,Clone,PartialOrd,Ord,Hash)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
|
pub typ: TokenType,
|
||||||
pub fragment: String,
|
pub fragment: String,
|
||||||
pub pos: Position,
|
pub pos: Position,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
pub fn new<S: Into<String>>(f: S, line: usize, col: usize) -> Self {
|
pub fn new<S: Into<String>>(f: S, typ: TokenType, line: usize, col: usize) -> Self {
|
||||||
Self::new_with_pos(f, Position::new(line, col))
|
Self::new_with_pos(f, typ, Position::new(line, col))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_with_pos<S: Into<String>>(f: S, pos: Position) -> Self {
|
pub fn new_with_pos<S: Into<String>>(f: S, typ: TokenType, pos: Position) -> Self {
|
||||||
Token {
|
Token {
|
||||||
|
typ: typ,
|
||||||
fragment: f.into(),
|
fragment: f.into(),
|
||||||
pos: pos,
|
pos: pos,
|
||||||
}
|
}
|
||||||
@ -88,8 +126,32 @@ macro_rules! value_node {
|
|||||||
|
|
||||||
#[allow(unused_macros)]
|
#[allow(unused_macros)]
|
||||||
macro_rules! make_tok {
|
macro_rules! make_tok {
|
||||||
( $e: expr, $l:expr, $c:expr ) => {
|
( EOF => $l:expr, $c:expr ) => {
|
||||||
Token::new($e, $l, $c)
|
Token::new("", TokenType::END, $l, $c)
|
||||||
|
};
|
||||||
|
|
||||||
|
( WS => $l:expr, $c:expr ) => {
|
||||||
|
Token::new("", TokenType::WS, $l, $c)
|
||||||
|
};
|
||||||
|
|
||||||
|
( CMT => $e:expr, $l:expr, $c:expr ) => {
|
||||||
|
Token::new($e, TokenType::COMMENT, $l, $c)
|
||||||
|
};
|
||||||
|
|
||||||
|
( QUOT => $e:expr, $l:expr, $c:expr ) => {
|
||||||
|
Token::new($e, TokenType::QUOTED, $l, $c)
|
||||||
|
};
|
||||||
|
|
||||||
|
( PUNCT => $e:expr, $l:expr, $c:expr ) => {
|
||||||
|
Token::new($e, TokenType::PUNCT, $l, $c)
|
||||||
|
};
|
||||||
|
|
||||||
|
( DIGIT => $e:expr, $l:expr, $c:expr ) => {
|
||||||
|
Token::new($e, TokenType::DIGIT, $l, $c)
|
||||||
|
};
|
||||||
|
|
||||||
|
( $e:expr, $l:expr, $c:expr ) => {
|
||||||
|
Token::new($e, TokenType::BAREWORD, $l, $c)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,7 +163,11 @@ macro_rules! make_expr {
|
|||||||
|
|
||||||
( $e:expr, $l:expr, $c:expr ) => {
|
( $e:expr, $l:expr, $c:expr ) => {
|
||||||
Expression::Simple(Value::Symbol(Positioned::new($e.to_string(), $l, $c)))
|
Expression::Simple(Value::Symbol(Positioned::new($e.to_string(), $l, $c)))
|
||||||
}
|
};
|
||||||
|
|
||||||
|
( $e:expr => int, $l:expr, $c:expr ) => {
|
||||||
|
Expression::Simple(Value::Int(Positioned::new($e, $l, $c)))
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper macro for making selectors.
|
/// Helper macro for making selectors.
|
||||||
@ -160,7 +226,7 @@ macro_rules! make_selector {
|
|||||||
let mut list: Vec<Token> = Vec::new();
|
let mut list: Vec<Token> = Vec::new();
|
||||||
|
|
||||||
$(
|
$(
|
||||||
list.push(Token::new($item, 1, col));
|
list.push(make_tok!($item, 1, col));
|
||||||
col += $item.len() + 1;
|
col += $item.len() + 1;
|
||||||
)*
|
)*
|
||||||
|
|
||||||
@ -178,7 +244,7 @@ macro_rules! make_selector {
|
|||||||
let mut list: Vec<Token> = Vec::new();
|
let mut list: Vec<Token> = Vec::new();
|
||||||
|
|
||||||
$(
|
$(
|
||||||
list.push(Token::new($item, $l, col));
|
list.push(make_tok!($item, $l, col));
|
||||||
col += $item.len() + 1;
|
col += $item.len() + 1;
|
||||||
)*
|
)*
|
||||||
|
|
||||||
@ -601,7 +667,7 @@ mod ast_test {
|
|||||||
let def = MacroDef {
|
let def = MacroDef {
|
||||||
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
(make_tok!("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
||||||
kind: BinaryExprType::Add,
|
kind: BinaryExprType::Add,
|
||||||
left: Value::Symbol(value_node!("foo".to_string(), 1, 1)),
|
left: Value::Symbol(value_node!("foo".to_string(), 1, 1)),
|
||||||
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
@ -618,7 +684,7 @@ mod ast_test {
|
|||||||
let def = MacroDef {
|
let def = MacroDef {
|
||||||
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
(make_tok!("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
||||||
kind: BinaryExprType::Add,
|
kind: BinaryExprType::Add,
|
||||||
left: Value::Symbol(value_node!("bar".to_string(), 1, 1)),
|
left: Value::Symbol(value_node!("bar".to_string(), 1, 1)),
|
||||||
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
@ -637,10 +703,10 @@ mod ast_test {
|
|||||||
let def = MacroDef {
|
let def = MacroDef {
|
||||||
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
(make_tok!("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
||||||
kind: BinaryExprType::Add,
|
kind: BinaryExprType::Add,
|
||||||
left: Value::Selector(make_selector!(make_expr!("foo", 1, 1) => [
|
left: Value::Selector(make_selector!(make_expr!("foo", 1, 1) => [
|
||||||
Token::new("quux", 1, 1) ] => 1, 1)),
|
make_tok!("quux", 1, 1) ] => 1, 1)),
|
||||||
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
})),
|
})),
|
||||||
@ -655,10 +721,10 @@ mod ast_test {
|
|||||||
let def = MacroDef {
|
let def = MacroDef {
|
||||||
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
argdefs: vec![value_node!("foo".to_string(), 1, 0)],
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
(make_tok!("f1", 1, 1), Expression::Binary(BinaryOpDef{
|
||||||
kind: BinaryExprType::Add,
|
kind: BinaryExprType::Add,
|
||||||
left: Value::Selector(make_selector!(make_expr!("bar", 1, 1) => [
|
left: Value::Selector(make_selector!(make_expr!("bar", 1, 1) => [
|
||||||
Token::new("quux", 1, 1) ] => 1, 1)),
|
make_tok!("quux", 1, 1) ] => 1, 1)),
|
||||||
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
right: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
})),
|
})),
|
||||||
|
56
src/build.rs
56
src/build.rs
@ -22,8 +22,6 @@ use std::ops::Deref;
|
|||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::convert::From;
|
use std::convert::From;
|
||||||
|
|
||||||
use nom;
|
|
||||||
|
|
||||||
use tokenizer::Span;
|
use tokenizer::Span;
|
||||||
use ast::*;
|
use ast::*;
|
||||||
use format;
|
use format;
|
||||||
@ -153,16 +151,14 @@ impl Display for Val {
|
|||||||
}
|
}
|
||||||
write!(f, "]")
|
write!(f, "]")
|
||||||
}
|
}
|
||||||
&Val::Macro(_) => {
|
&Val::Macro(_) => write!(f, "Macro(..)"),
|
||||||
write!(f, "Macro(..)")
|
|
||||||
},
|
|
||||||
&Val::Tuple(ref def) => {
|
&Val::Tuple(ref def) => {
|
||||||
try!(write!(f, "Tuple(\n"));
|
try!(write!(f, "Tuple(\n"));
|
||||||
for v in def.iter() {
|
for v in def.iter() {
|
||||||
try!(write!(f, "\t{} = {},\n", v.0.val, v.1));
|
try!(write!(f, "\t{} = {},\n", v.0.val, v.1));
|
||||||
}
|
}
|
||||||
write!(f, ")")
|
write!(f, ")")
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -282,22 +278,15 @@ impl Builder {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_file_string(&mut self, name: &str, input: String) -> BuildResult {
|
pub fn build_file_string(&mut self, _name: &str, input: String) -> BuildResult {
|
||||||
match parse(Span::new(&input)) {
|
match parse(Span::new(&input)) {
|
||||||
nom::IResult::Done(_span, stmts) => {
|
Ok(stmts) => {
|
||||||
for stmt in stmts.iter() {
|
for stmt in stmts.iter() {
|
||||||
try!(self.build_stmt(stmt));
|
try!(self.build_stmt(stmt));
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
nom::IResult::Error(err) => Err(Box::new(err)),
|
Err(err) => Err(Box::new(err)),
|
||||||
nom::IResult::Incomplete(_) => {
|
|
||||||
Err(Box::new(error::Error::new(
|
|
||||||
format!("Could not parse input from file: {}", name),
|
|
||||||
error::ErrorType::IncompleteParsing,
|
|
||||||
Position{line: 0, column: 0}
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -433,11 +422,11 @@ impl Builder {
|
|||||||
&Val::Tuple(_) => {
|
&Val::Tuple(_) => {
|
||||||
stack.push_back(first.clone());
|
stack.push_back(first.clone());
|
||||||
}
|
}
|
||||||
&Val::List(_) =>{
|
&Val::List(_) => {
|
||||||
stack.push_back(first.clone());
|
stack.push_back(first.clone());
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
//noop
|
// noop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -465,7 +454,8 @@ impl Builder {
|
|||||||
_ => {
|
_ => {
|
||||||
return Err(Box::new(error::Error::new(format!("{} is not a Tuple or List",
|
return Err(Box::new(error::Error::new(format!("{} is not a Tuple or List",
|
||||||
vref),
|
vref),
|
||||||
error::ErrorType::TypeFail, next.pos.clone())));
|
error::ErrorType::TypeFail,
|
||||||
|
next.pos.clone())));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -989,7 +979,7 @@ mod test {
|
|||||||
(Expression::Simple(Value::String(value_node!("foo".to_string(), 1, 1))),
|
(Expression::Simple(Value::String(value_node!("foo".to_string(), 1, 1))),
|
||||||
Val::String("foo".to_string())),
|
Val::String("foo".to_string())),
|
||||||
(Expression::Simple(Value::Tuple(value_node!(vec![
|
(Expression::Simple(Value::Tuple(value_node!(vec![
|
||||||
(Token::new("bar", 1, 1), Expression::Simple(Value::Int(value_node!(1, 1, 1))))
|
(make_tok!("bar", 1, 1), Expression::Simple(Value::Int(value_node!(1, 1, 1))))
|
||||||
], 1, 1))),
|
], 1, 1))),
|
||||||
Val::Tuple(vec![(value_node!("bar".to_string(), 1, 1),
|
Val::Tuple(vec![(value_node!("bar".to_string(), 1, 1),
|
||||||
Rc::new(Val::Int(1)))])),
|
Rc::new(Val::Int(1)))])),
|
||||||
@ -1124,7 +1114,7 @@ mod test {
|
|||||||
(Expression::Copy(
|
(Expression::Copy(
|
||||||
CopyDef{
|
CopyDef{
|
||||||
selector: make_selector!(make_expr!("tpl1")),
|
selector: make_selector!(make_expr!("tpl1")),
|
||||||
fields: vec![(Token::new("fld1", 1, 1),
|
fields: vec![(make_tok!("fld1", 1, 1),
|
||||||
Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1))))],
|
Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1))))],
|
||||||
pos: Position::new(1, 0)}),
|
pos: Position::new(1, 0)}),
|
||||||
Val::Tuple(
|
Val::Tuple(
|
||||||
@ -1145,7 +1135,7 @@ mod test {
|
|||||||
(Expression::Copy(
|
(Expression::Copy(
|
||||||
CopyDef{
|
CopyDef{
|
||||||
selector: make_selector!(make_expr!("tpl1")),
|
selector: make_selector!(make_expr!("tpl1")),
|
||||||
fields: vec![(Token::new("fld2", 1, 1),
|
fields: vec![(make_tok!("fld2", 1, 1),
|
||||||
Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1))))],
|
Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1))))],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
}),
|
}),
|
||||||
@ -1164,9 +1154,9 @@ mod test {
|
|||||||
CopyDef{
|
CopyDef{
|
||||||
selector: make_selector!(make_expr!("tpl1")),
|
selector: make_selector!(make_expr!("tpl1")),
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("fld1", 1, 1),
|
(make_tok!("fld1", 1, 1),
|
||||||
Expression::Simple(Value::Int(value_node!(3, 1, 1)))),
|
Expression::Simple(Value::Int(value_node!(3, 1, 1)))),
|
||||||
(Token::new("fld2", 1, 1),
|
(make_tok!("fld2", 1, 1),
|
||||||
Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
||||||
],
|
],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
@ -1193,7 +1183,7 @@ mod test {
|
|||||||
b.out.entry(value_node!("tstmac".to_string(), 1, 0)).or_insert(Rc::new(Val::Macro(MacroDef{
|
b.out.entry(value_node!("tstmac".to_string(), 1, 0)).or_insert(Rc::new(Val::Macro(MacroDef{
|
||||||
argdefs: vec![value_node!("arg1".to_string(), 1, 0)],
|
argdefs: vec![value_node!("arg1".to_string(), 1, 0)],
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("foo", 1, 1), Expression::Simple(Value::Symbol(value_node!("arg1".to_string(), 1, 1)))),
|
(make_tok!("foo", 1, 1), Expression::Simple(Value::Symbol(value_node!("arg1".to_string(), 1, 1)))),
|
||||||
],
|
],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
})));
|
})));
|
||||||
@ -1220,7 +1210,7 @@ mod test {
|
|||||||
b.out.entry(value_node!("tstmac".to_string(), 1, 0)).or_insert(Rc::new(Val::Macro(MacroDef{
|
b.out.entry(value_node!("tstmac".to_string(), 1, 0)).or_insert(Rc::new(Val::Macro(MacroDef{
|
||||||
argdefs: vec![value_node!("arg2".to_string(), 1, 0)],
|
argdefs: vec![value_node!("arg2".to_string(), 1, 0)],
|
||||||
fields: vec![
|
fields: vec![
|
||||||
(Token::new("foo", 1, 1), Expression::Simple(Value::Symbol(value_node!("arg1".to_string(), 1, 1)))),
|
(make_tok!("foo", 1, 1), Expression::Simple(Value::Symbol(value_node!("arg1".to_string(), 1, 1)))),
|
||||||
],
|
],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
})));
|
})));
|
||||||
@ -1250,8 +1240,8 @@ mod test {
|
|||||||
val: Box::new(Expression::Simple(Value::Symbol(value_node!("foo".to_string(), 1, 1)))),
|
val: Box::new(Expression::Simple(Value::Symbol(value_node!("foo".to_string(), 1, 1)))),
|
||||||
default: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
default: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
tuple: vec![
|
tuple: vec![
|
||||||
(Token::new("foo", 1, 1), Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
(make_tok!("foo", 1, 1), Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
||||||
(Token::new("bar", 1, 1), Expression::Simple(Value::Int(value_node!(2, 1, 1)))),
|
(make_tok!("bar", 1, 1), Expression::Simple(Value::Int(value_node!(2, 1, 1)))),
|
||||||
],
|
],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
}),
|
}),
|
||||||
@ -1260,8 +1250,8 @@ mod test {
|
|||||||
val: Box::new(Expression::Simple(Value::Symbol(value_node!("baz".to_string(), 1, 1)))),
|
val: Box::new(Expression::Simple(Value::Symbol(value_node!("baz".to_string(), 1, 1)))),
|
||||||
default: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
default: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
tuple: vec![
|
tuple: vec![
|
||||||
(Token::new("bar", 1, 1), Expression::Simple(Value::Int(value_node!(2, 1, 1)))),
|
(make_tok!("bar", 1, 1), Expression::Simple(Value::Int(value_node!(2, 1, 1)))),
|
||||||
(Token::new("quux", 1, 1), Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
(make_tok!("quux", 1, 1), Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
||||||
],
|
],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
}),
|
}),
|
||||||
@ -1282,8 +1272,8 @@ mod test {
|
|||||||
val: Box::new(Expression::Simple(Value::Symbol(value_node!("foo".to_string(), 1, 1)))),
|
val: Box::new(Expression::Simple(Value::Symbol(value_node!("foo".to_string(), 1, 1)))),
|
||||||
default: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
default: Box::new(Expression::Simple(Value::Int(value_node!(1, 1, 1)))),
|
||||||
tuple: vec![
|
tuple: vec![
|
||||||
(Token::new("bar", 1, 1), Expression::Simple(Value::Int(value_node!(2, 1, 1)))),
|
(make_tok!("bar", 1, 1), Expression::Simple(Value::Int(value_node!(2, 1, 1)))),
|
||||||
(Token::new("quux", 1, 1), Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
(make_tok!("quux", 1, 1), Expression::Simple(Value::String(value_node!("2".to_string(), 1, 1)))),
|
||||||
],
|
],
|
||||||
pos: Position::new(1, 0),
|
pos: Position::new(1, 0),
|
||||||
}),
|
}),
|
||||||
@ -1295,7 +1285,7 @@ mod test {
|
|||||||
fn test_let_statement() {
|
fn test_let_statement() {
|
||||||
let mut b = Builder::new();
|
let mut b = Builder::new();
|
||||||
let stmt = Statement::Let(LetDef {
|
let stmt = Statement::Let(LetDef {
|
||||||
name: Token::new("foo", 1, 1),
|
name: make_tok!("foo", 1, 1),
|
||||||
value: Expression::Simple(Value::String(value_node!("bar".to_string(), 1, 1))),
|
value: Expression::Simple(Value::String(value_node!("bar".to_string(), 1, 1))),
|
||||||
});
|
});
|
||||||
b.build_stmt(&stmt).unwrap();
|
b.build_stmt(&stmt).unwrap();
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//#![feature(trace_macros,log_syntax)]
|
// #![feature(trace_macros,log_syntax)]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate nom;
|
extern crate nom;
|
||||||
@ -20,6 +20,7 @@ extern crate nom_locate;
|
|||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod ast;
|
pub mod ast;
|
||||||
|
#[macro_use]
|
||||||
pub mod tokenizer;
|
pub mod tokenizer;
|
||||||
pub mod parse;
|
pub mod parse;
|
||||||
pub mod build;
|
pub mod build;
|
||||||
|
1542
src/parse.rs
1542
src/parse.rs
File diff suppressed because it is too large
Load Diff
587
src/tokenizer.rs
587
src/tokenizer.rs
@ -13,9 +13,11 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
use nom;
|
use nom;
|
||||||
use nom::{alpha, is_alphanumeric, digit, InputLength, sp};
|
use nom::{alpha, is_alphanumeric, digit, multispace};
|
||||||
|
use nom::{InputLength, Slice};
|
||||||
use ast::*;
|
use ast::*;
|
||||||
|
use std;
|
||||||
|
use std::result::Result;
|
||||||
|
|
||||||
pub type Span<'a> = LocatedSpan<&'a str>;
|
pub type Span<'a> = LocatedSpan<&'a str>;
|
||||||
|
|
||||||
@ -32,35 +34,38 @@ fn is_symbol_char(c: char) -> bool {
|
|||||||
is_alphanumeric(c as u8) || c == '-' as char || c == '_' as char
|
is_alphanumeric(c as u8) || c == '-' as char || c == '_' as char
|
||||||
}
|
}
|
||||||
|
|
||||||
named!(pub strtok( Span ) -> Token,
|
named!(strtok( Span ) -> Token,
|
||||||
do_parse!(
|
do_parse!(
|
||||||
span: position!() >>
|
span: position!() >>
|
||||||
tag!("\"") >>
|
tag!("\"") >>
|
||||||
frag: take_until!("\"") >>
|
frag: take_until!("\"") >>
|
||||||
tag!("\"") >>
|
tag!("\"") >>
|
||||||
(Token{
|
(Token{
|
||||||
|
typ: TokenType::QUOTED,
|
||||||
pos: Position::from(span),
|
pos: Position::from(span),
|
||||||
fragment: frag.fragment.to_string(),
|
fragment: frag.fragment.to_string(),
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub barewordtok( Span ) -> Token,
|
named!(barewordtok( Span ) -> Token,
|
||||||
do_parse!(
|
do_parse!(
|
||||||
span: position!() >>
|
span: position!() >>
|
||||||
frag: preceded!(peek!(alpha), take_while!(is_symbol_char)) >>
|
frag: preceded!(peek!(alpha), take_while!(is_symbol_char)) >>
|
||||||
(Token{
|
(Token{
|
||||||
|
typ: TokenType::BAREWORD,
|
||||||
pos: Position::from(span),
|
pos: Position::from(span),
|
||||||
fragment: frag.fragment.to_string(),
|
fragment: frag.fragment.to_string(),
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub digittok( Span ) -> Token,
|
named!(digittok( Span ) -> Token,
|
||||||
do_parse!(
|
do_parse!(
|
||||||
span: position!() >>
|
span: position!() >>
|
||||||
digits: digit >>
|
digits: digit >>
|
||||||
(Token{
|
(Token{
|
||||||
|
typ: TokenType::DIGIT,
|
||||||
pos: Position::from(span),
|
pos: Position::from(span),
|
||||||
fragment: digits.fragment.to_string(),
|
fragment: digits.fragment.to_string(),
|
||||||
})
|
})
|
||||||
@ -74,11 +79,12 @@ macro_rules! do_tag_tok {
|
|||||||
// rewrite your macro argumets for you. Which means we require this $i
|
// rewrite your macro argumets for you. Which means we require this $i
|
||||||
// paramater even though we don't explicitely pass it below. I don't
|
// paramater even though we don't explicitely pass it below. I don't
|
||||||
// particularly like this but I'm living with it for now.
|
// particularly like this but I'm living with it for now.
|
||||||
($i:expr, $tag:expr) => {
|
($i:expr, $type:expr, $tag:expr) => {
|
||||||
do_parse!($i,
|
do_parse!($i,
|
||||||
span: position!() >>
|
span: position!() >>
|
||||||
frag: tag!($tag) >>
|
frag: tag!($tag) >>
|
||||||
(Token{
|
(Token{
|
||||||
|
typ: $type,
|
||||||
pos: Position::from(span),
|
pos: Position::from(span),
|
||||||
fragment: frag.fragment.to_string(),
|
fragment: frag.fragment.to_string(),
|
||||||
})
|
})
|
||||||
@ -86,126 +92,549 @@ macro_rules! do_tag_tok {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
named!(pub commatok( Span ) -> Token,
|
named!(commatok( Span ) -> Token,
|
||||||
do_tag_tok!(",")
|
do_tag_tok!(TokenType::PUNCT, ",")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub lbracetok( Span ) -> Token,
|
named!(lbracetok( Span ) -> Token,
|
||||||
do_tag_tok!("{")
|
do_tag_tok!(TokenType::PUNCT, "{")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub rbracetok( Span ) -> Token,
|
named!(rbracetok( Span ) -> Token,
|
||||||
do_tag_tok!("}")
|
do_tag_tok!(TokenType::PUNCT, "}")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub lparentok( Span ) -> Token,
|
named!(lparentok( Span ) -> Token,
|
||||||
do_tag_tok!("(")
|
do_tag_tok!(TokenType::PUNCT, "(")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub rparentok( Span ) -> Token,
|
named!(rparentok( Span ) -> Token,
|
||||||
do_tag_tok!(")")
|
do_tag_tok!(TokenType::PUNCT, ")")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub dottok( Span ) -> Token,
|
named!(dottok( Span ) -> Token,
|
||||||
do_tag_tok!(".")
|
do_tag_tok!(TokenType::PUNCT, ".")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub plustok( Span ) -> Token,
|
named!(plustok( Span ) -> Token,
|
||||||
do_tag_tok!("+")
|
do_tag_tok!(TokenType::PUNCT, "+")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub dashtok( Span ) -> Token,
|
named!(dashtok( Span ) -> Token,
|
||||||
do_tag_tok!("-")
|
do_tag_tok!(TokenType::PUNCT, "-")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub startok( Span ) -> Token,
|
named!(startok( Span ) -> Token,
|
||||||
do_tag_tok!("*")
|
do_tag_tok!(TokenType::PUNCT, "*")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub slashtok( Span ) -> Token,
|
named!(slashtok( Span ) -> Token,
|
||||||
do_tag_tok!("/")
|
do_tag_tok!(TokenType::PUNCT, "/")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub pcttok( Span ) -> Token,
|
named!(pcttok( Span ) -> Token,
|
||||||
do_tag_tok!("%")
|
do_tag_tok!(TokenType::PUNCT, "%")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub equaltok( Span ) -> Token,
|
named!(equaltok( Span ) -> Token,
|
||||||
do_tag_tok!("=")
|
do_tag_tok!(TokenType::PUNCT, "=")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub semicolontok( Span ) -> Token,
|
named!(semicolontok( Span ) -> Token,
|
||||||
do_tag_tok!(";")
|
do_tag_tok!(TokenType::PUNCT, ";")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub leftsquarebracket( Span ) -> Token,
|
named!(leftsquarebracket( Span ) -> Token,
|
||||||
do_tag_tok!("[")
|
do_tag_tok!(TokenType::PUNCT, "[")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub rightsquarebracket( Span ) -> Token,
|
named!(rightsquarebracket( Span ) -> Token,
|
||||||
do_tag_tok!("]")
|
do_tag_tok!(TokenType::PUNCT, "]")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub commentprefix( Span ) -> Token,
|
named!(fatcommatok( Span ) -> Token,
|
||||||
do_tag_tok!("//")
|
do_tag_tok!(TokenType::PUNCT, "=>")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub fatcommatok( Span ) -> Token,
|
named!(lettok( Span ) -> Token,
|
||||||
do_tag_tok!("=>")
|
do_tag_tok!(TokenType::BAREWORD, "let")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub lettok( Span ) -> Token,
|
named!(selecttok( Span ) -> Token,
|
||||||
do_tag_tok!("let")
|
do_tag_tok!(TokenType::BAREWORD, "select")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub selecttok( Span ) -> Token,
|
named!(macrotok( Span ) -> Token,
|
||||||
do_tag_tok!("select")
|
do_tag_tok!(TokenType::BAREWORD, "macro")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub macrotok( Span ) -> Token,
|
named!(importtok( Span ) -> Token,
|
||||||
do_tag_tok!("macro")
|
do_tag_tok!(TokenType::BAREWORD, "import")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub importtok( Span ) -> Token,
|
named!(astok( Span ) -> Token,
|
||||||
do_tag_tok!("import")
|
do_tag_tok!(TokenType::BAREWORD, "as")
|
||||||
);
|
);
|
||||||
|
|
||||||
named!(pub astok( Span ) -> Token,
|
fn end_of_input(input: Span) -> nom::IResult<Span, Token> {
|
||||||
do_tag_tok!("as")
|
match eof!(input,) {
|
||||||
);
|
nom::IResult::Done(_, _) => {
|
||||||
|
return nom::IResult::Done(input,
|
||||||
pub fn end_of_input(input: Span) -> nom::IResult<Span, Span> {
|
make_tok!(EOF => input.line as usize,
|
||||||
if input.input_len() == 0 {
|
input.get_column() as usize));
|
||||||
return nom::IResult::Done(input, input);
|
|
||||||
} else {
|
|
||||||
return nom::IResult::Incomplete(nom::Needed::Unknown);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn comment(input: Span) -> nom::IResult<Span, Span> {
|
|
||||||
match commentprefix(input) {
|
|
||||||
nom::IResult::Done(rest, _) => {
|
|
||||||
match alt!(rest, take_until!("\r\n") | take_until!("\n")) {
|
|
||||||
nom::IResult::Done(rest, cmt) => nom::IResult::Done(rest, cmt),
|
|
||||||
nom::IResult::Incomplete(i) => nom::IResult::Incomplete(i),
|
|
||||||
nom::IResult::Error(e) => {
|
|
||||||
if let nom::ErrorKind::Eof = e {
|
|
||||||
return nom::IResult::Done(input, input)
|
|
||||||
} else {
|
|
||||||
return nom::IResult::Error(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
nom::IResult::Incomplete(i) => {
|
nom::IResult::Incomplete(_) => {
|
||||||
return nom::IResult::Incomplete(i)
|
return nom::IResult::Incomplete(nom::Needed::Unknown);
|
||||||
}
|
}
|
||||||
nom::IResult::Error(e) => {
|
nom::IResult::Error(e) => {
|
||||||
return nom::IResult::Error(e)
|
return nom::IResult::Error(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
named!(pub emptyspace( Span ) -> Span,
|
fn comment(input: Span) -> nom::IResult<Span, Token> {
|
||||||
alt!(sp | comment)
|
match tag!(input, "//") {
|
||||||
|
nom::IResult::Done(rest, _) => {
|
||||||
|
match alt!(rest, take_until_and_consume!("\r\n") | take_until_and_consume!("\n")) {
|
||||||
|
nom::IResult::Done(rest, cmt) => {
|
||||||
|
return nom::IResult::Done(rest,
|
||||||
|
make_tok!(CMT => cmt.fragment.to_string(),
|
||||||
|
input.line as usize,
|
||||||
|
input.get_column() as usize));
|
||||||
|
}
|
||||||
|
// If we didn't find a new line then we just grab everything.
|
||||||
|
_ => {
|
||||||
|
let blen = rest.input_len();
|
||||||
|
let next = rest.slice(blen..);
|
||||||
|
let tok = rest.slice(..blen);
|
||||||
|
return nom::IResult::Done(next,
|
||||||
|
make_tok!(CMT => tok.fragment.to_string(),
|
||||||
|
input.line as usize, input.get_column() as usize
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nom::IResult::Incomplete(i) => return nom::IResult::Incomplete(i),
|
||||||
|
nom::IResult::Error(e) => return nom::IResult::Error(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
named!(whitespace( Span ) -> Token,
|
||||||
|
do_parse!(
|
||||||
|
span: position!() >>
|
||||||
|
many1!(multispace) >>
|
||||||
|
(Token{
|
||||||
|
typ: TokenType::WS,
|
||||||
|
pos: Position::from(span),
|
||||||
|
fragment: String::new(),
|
||||||
|
})
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
named!(token( Span ) -> Token,
|
||||||
|
alt!(
|
||||||
|
strtok |
|
||||||
|
barewordtok |
|
||||||
|
digittok |
|
||||||
|
commatok |
|
||||||
|
rbracetok |
|
||||||
|
lbracetok |
|
||||||
|
lparentok |
|
||||||
|
rparentok |
|
||||||
|
dottok |
|
||||||
|
plustok |
|
||||||
|
dashtok |
|
||||||
|
startok |
|
||||||
|
comment | // Note comment must come before slashtok
|
||||||
|
slashtok |
|
||||||
|
pcttok |
|
||||||
|
fatcommatok | // Note fatcommatok must come before equaltok
|
||||||
|
equaltok |
|
||||||
|
semicolontok |
|
||||||
|
leftsquarebracket |
|
||||||
|
rightsquarebracket |
|
||||||
|
lettok |
|
||||||
|
selecttok |
|
||||||
|
macrotok |
|
||||||
|
importtok |
|
||||||
|
astok |
|
||||||
|
whitespace |
|
||||||
|
end_of_input)
|
||||||
|
);
|
||||||
|
|
||||||
|
// TODO(jwall): This should return a ParseError instead.
|
||||||
|
pub fn tokenize(input: Span) -> Result<Vec<Token>, nom::ErrorKind> {
|
||||||
|
let mut out = Vec::new();
|
||||||
|
let mut i = input;
|
||||||
|
loop {
|
||||||
|
if i.input_len() == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
match token(i) {
|
||||||
|
nom::IResult::Error(e) => {
|
||||||
|
return Err(e);
|
||||||
|
}
|
||||||
|
nom::IResult::Incomplete(_) => {
|
||||||
|
return Err(nom::ErrorKind::Complete);
|
||||||
|
}
|
||||||
|
nom::IResult::Done(rest, tok) => {
|
||||||
|
i = rest;
|
||||||
|
if tok.typ == TokenType::COMMENT || tok.typ == TokenType::WS {
|
||||||
|
// we skip comments and whitespace
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out.push(tok);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ensure that we always have an END token to go off of.
|
||||||
|
out.push(Token {
|
||||||
|
fragment: String::new(),
|
||||||
|
typ: TokenType::END,
|
||||||
|
pos: Position {
|
||||||
|
line: i.line as usize,
|
||||||
|
column: i.get_column() as usize,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn token_clone(t: &Token) -> Result<Token, ParseError> {
|
||||||
|
Ok(t.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! match_type {
|
||||||
|
($i:expr, COMMENT => $h:expr) => {
|
||||||
|
match_type!($i, TokenType::COMMENT, "Not a Comment", $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, COMMENT) => {
|
||||||
|
match_type!($i, COMMENT => token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, BAREWORD => $h:expr) => {
|
||||||
|
match_type!($i, TokenType::BAREWORD, "Not a Bareword", $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, BAREWORD) => {
|
||||||
|
match_type!($i, BAREWORD => token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, STR => $h:expr) => {
|
||||||
|
match_type!($i, TokenType::QUOTED, "Not a String", $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, STR) => {
|
||||||
|
match_type!($i, STR => token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, DIGIT => $h:expr) => {
|
||||||
|
match_type!($i, TokenType::DIGIT, "Not a DIGIT", $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, DIGIT) => {
|
||||||
|
match_type!($i, DIGIT => token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, PUNCT => $h:expr) => {
|
||||||
|
match_type!($i, TokenType::PUNCT, "Not PUNCTUATION", $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, PUNCT) => {
|
||||||
|
match_type!($i, PUNCT => token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, $t:expr, $msg:expr, $h:expr) => {
|
||||||
|
{
|
||||||
|
let i_ = $i.clone();
|
||||||
|
use nom::Slice;
|
||||||
|
use std::convert::Into;
|
||||||
|
if i_.input_len() == 0 {
|
||||||
|
nom::IResult::Error(
|
||||||
|
nom::ErrorKind::Custom(ParseError{
|
||||||
|
description: format!("End of Input! {}", $msg),
|
||||||
|
pos: Position{line: 0, column: 0}
|
||||||
|
}))
|
||||||
|
} else {
|
||||||
|
let tok = &(i_[0]);
|
||||||
|
if tok.typ == $t {
|
||||||
|
match $h(tok) {
|
||||||
|
Result::Ok(v) => nom::IResult::Done($i.slice(1..), v),
|
||||||
|
Result::Err(e) => nom::IResult::Error(
|
||||||
|
nom::ErrorKind::Custom(e.into())),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nom::IResult::Error(nom::ErrorKind::Custom(ParseError{
|
||||||
|
description: $msg.to_string(),
|
||||||
|
pos: tok.pos.clone()}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! match_token {
|
||||||
|
($i:expr, PUNCT => $f:expr) => {
|
||||||
|
match_token!($i, PUNCT => $f, token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, PUNCT => $f:expr, $h:expr) => {
|
||||||
|
match_token!($i, TokenType::PUNCT, $f, format!("Not PUNCT ({})", $f), $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, BAREWORD => $f:expr) => {
|
||||||
|
match_token!($i, BAREWORD => $f, token_clone)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, BAREWORD => $f:expr, $h:expr) => {
|
||||||
|
match_token!($i, TokenType::BAREWORD, $f, format!("Not a BAREWORD ({})", $f), $h)
|
||||||
|
};
|
||||||
|
|
||||||
|
($i:expr, $t:expr, $f:expr, $msg:expr, $h:expr) => {
|
||||||
|
{
|
||||||
|
let i_ = $i.clone();
|
||||||
|
use nom::Slice;
|
||||||
|
use std::convert::Into;
|
||||||
|
let tok = &(i_[0]);
|
||||||
|
if tok.typ == $t && &tok.fragment == $f {
|
||||||
|
match $h(tok) {
|
||||||
|
Result::Ok(v) => nom::IResult::Done($i.slice(1..), v),
|
||||||
|
Result::Err(e) => nom::IResult::Error(
|
||||||
|
nom::ErrorKind::Custom(e.into())),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nom::IResult::Error(nom::ErrorKind::Custom(ParseError{
|
||||||
|
description: format!("{} Instead is ({})", $msg, tok.fragment),
|
||||||
|
pos: tok.pos.clone()}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! punct {
|
||||||
|
($i:expr, $c:expr) => {
|
||||||
|
match_token!($i, PUNCT => $c)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! word {
|
||||||
|
($i:expr, $w:expr) => {
|
||||||
|
match_token!($i, BAREWORD => $w)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pos(i: TokenIter) -> nom::IResult<TokenIter, Position, ParseError> {
|
||||||
|
let tok = &i[0];
|
||||||
|
let line = tok.pos.line;
|
||||||
|
let column = tok.pos.column;
|
||||||
|
nom::IResult::Done(i.clone(),
|
||||||
|
Position {
|
||||||
|
line: line,
|
||||||
|
column: column,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct TokenIter<'a> {
|
||||||
|
pub source: &'a [Token],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> TokenIter<'a> {
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.source.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> nom::InputLength for TokenIter<'a> {
|
||||||
|
fn input_len(&self) -> usize {
|
||||||
|
self.source.input_len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_token_iter_slice {
|
||||||
|
($r:ty) => {
|
||||||
|
impl<'a> nom::Slice<$r> for TokenIter<'a> {
|
||||||
|
fn slice(&self, range: $r) -> Self {
|
||||||
|
TokenIter {
|
||||||
|
source: self.source.slice(range),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_token_iter_slice!(std::ops::Range<usize>);
|
||||||
|
impl_token_iter_slice!(std::ops::RangeTo<usize>);
|
||||||
|
impl_token_iter_slice!(std::ops::RangeFrom<usize>);
|
||||||
|
impl_token_iter_slice!(std::ops::RangeFull);
|
||||||
|
|
||||||
|
impl<'a> std::ops::Index<usize> for TokenIter<'a> {
|
||||||
|
type Output = Token;
|
||||||
|
|
||||||
|
fn index(&self, i: usize) -> &Self::Output {
|
||||||
|
&self.source[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> nom::InputIter for TokenIter<'a> {
|
||||||
|
type Item = &'a Token;
|
||||||
|
type RawItem = Token;
|
||||||
|
|
||||||
|
type Iter = std::iter::Enumerate<std::slice::Iter<'a, Self::RawItem>>;
|
||||||
|
type IterElem = std::slice::Iter<'a, Self::RawItem>;
|
||||||
|
|
||||||
|
fn iter_indices(&self) -> Self::Iter {
|
||||||
|
self.source.iter().enumerate()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iter_elements(&self) -> Self::IterElem {
|
||||||
|
self.source.iter()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn position<P>(&self, predicate: P) -> Option<usize>
|
||||||
|
where P: Fn(Self::RawItem) -> bool
|
||||||
|
{
|
||||||
|
for (o, v) in self.iter_indices() {
|
||||||
|
if predicate(v.clone()) {
|
||||||
|
return Some(o);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice_index(&self, count: usize) -> Option<usize> {
|
||||||
|
let mut cnt = 0;
|
||||||
|
for (index, _) in self.iter_indices() {
|
||||||
|
if cnt == count {
|
||||||
|
return Some(index);
|
||||||
|
}
|
||||||
|
cnt += 1;
|
||||||
|
}
|
||||||
|
if cnt == count {
|
||||||
|
return Some(self.len());
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tokenizer_test {
|
||||||
|
use super::*;
|
||||||
|
use nom;
|
||||||
|
use nom_locate::LocatedSpan;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize_one_of_each() {
|
||||||
|
// 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
|
||||||
|
// 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
|
||||||
|
let result = tokenize(LocatedSpan::new("let import macro select as => [ ] { } ; = % / * \
|
||||||
|
+ - . ( ) , 1 . foo \"bar\" // comment\n ;"));
|
||||||
|
assert!(result.is_ok(), format!("result {:?} is not ok", result));
|
||||||
|
let v = result.unwrap();
|
||||||
|
for (i, t) in v.iter().enumerate() {
|
||||||
|
println!("{}: {:?}", i, t);
|
||||||
|
}
|
||||||
|
assert_eq!(v.len(), 27);
|
||||||
|
assert_eq!(v[26].typ, TokenType::END);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_has_end() {
|
||||||
|
let result = tokenize(LocatedSpan::new("foo"));
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let v = result.unwrap();
|
||||||
|
assert_eq!(v.len(), 2);
|
||||||
|
assert_eq!(v[1].typ, TokenType::END);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_comment() {
|
||||||
|
assert!(comment(LocatedSpan::new("// comment\n")).is_done());
|
||||||
|
assert!(comment(LocatedSpan::new("// comment")).is_done());
|
||||||
|
assert_eq!(comment(LocatedSpan::new("// comment\n")),
|
||||||
|
nom::IResult::Done(LocatedSpan{fragment: "", offset: 11, line: 2},
|
||||||
|
Token{
|
||||||
|
typ: TokenType::COMMENT,
|
||||||
|
fragment: " comment".to_string(),
|
||||||
|
pos: Position{line: 1, column: 1},
|
||||||
|
}));
|
||||||
|
assert!(comment(LocatedSpan::new("// comment\r\n")).is_done());
|
||||||
|
assert_eq!(comment(LocatedSpan::new("// comment\r\n")),
|
||||||
|
nom::IResult::Done(LocatedSpan{fragment: "", offset: 12, line: 2},
|
||||||
|
Token{
|
||||||
|
typ: TokenType::COMMENT,
|
||||||
|
fragment: " comment".to_string(),
|
||||||
|
pos: Position{column: 1, line: 1}
|
||||||
|
}));
|
||||||
|
assert!(comment(LocatedSpan::new("// comment\r\n ")).is_done());
|
||||||
|
assert_eq!(comment(LocatedSpan::new("// comment\r\n ")),
|
||||||
|
nom::IResult::Done(LocatedSpan{fragment: " ", offset: 12, line: 2},
|
||||||
|
Token{
|
||||||
|
typ: TokenType::COMMENT,
|
||||||
|
fragment: " comment".to_string(),
|
||||||
|
pos: Position{column: 1, line: 1},
|
||||||
|
}));
|
||||||
|
// TODO(jwall): assert!(comment(LocatedSpan::new("// comment")).is_done());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_match_word() {
|
||||||
|
let input = vec![Token{
|
||||||
|
fragment: "foo".to_string(),
|
||||||
|
typ: TokenType::BAREWORD,
|
||||||
|
pos: Position{line: 1, column: 1}
|
||||||
|
}];
|
||||||
|
let result = word!(TokenIter{source: input.as_slice()}, "foo");
|
||||||
|
match result {
|
||||||
|
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
|
||||||
|
res => assert!(false, format!("Fail: {:?}", res)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_match_word_empty_input() {
|
||||||
|
let input = vec![Token{
|
||||||
|
fragment: "".to_string(),
|
||||||
|
typ: TokenType::END,
|
||||||
|
pos: Position{line: 1, column: 1},
|
||||||
|
}];
|
||||||
|
let result = word!(TokenIter{source: input.as_slice()}, "foo");
|
||||||
|
match result {
|
||||||
|
nom::IResult::Done(_, _) => assert!(false, "Should have been an error but was Done"),
|
||||||
|
nom::IResult::Incomplete(_) => {
|
||||||
|
assert!(false, "Should have been an error but was Incomplete")
|
||||||
|
}
|
||||||
|
nom::IResult::Error(_) => {
|
||||||
|
// noop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_match_punct() {
|
||||||
|
let input = vec![Token{
|
||||||
|
fragment: "!".to_string(),
|
||||||
|
typ: TokenType::PUNCT,
|
||||||
|
pos: Position{line: 1, column: 1}
|
||||||
|
}];
|
||||||
|
let result = punct!(TokenIter{source: input.as_slice()}, "!");
|
||||||
|
match result {
|
||||||
|
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
|
||||||
|
res => assert!(false, format!("Fail: {:?}", res)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_match_type() {
|
||||||
|
let input = vec![Token{
|
||||||
|
fragment: "foo".to_string(),
|
||||||
|
typ: TokenType::BAREWORD,
|
||||||
|
pos: Position{line: 1, column: 1}
|
||||||
|
}];
|
||||||
|
let result = match_type!(TokenIter{source: input.as_slice()}, BAREWORD);
|
||||||
|
match result {
|
||||||
|
nom::IResult::Done(_, tok) => assert_eq!(tok, input[0]),
|
||||||
|
res => assert!(false, format!("Fail: {:?}", res)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user