// Copyright 2017 Jeremy Wall // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //! The definitions of the ucg AST and Tokens. use std; use std::borrow::Borrow; use std::cmp::Eq; use std::cmp::Ordering; use std::cmp::PartialEq; use std::cmp::PartialOrd; use std::convert::{Into, TryFrom, TryInto}; use std::fmt; use std::hash::Hash; use std::hash::Hasher; use std::path::PathBuf; use std::rc::Rc; use abortable_parser; use crate::build::scope::Scope; use crate::build::Val; use crate::error::BuildError; pub mod printer; pub mod walk; pub use walk::Walker; #[derive(Debug, PartialEq, Clone)] pub enum TemplatePart { Str(Vec), PlaceHolder(usize), Expression(Expression), } macro_rules! enum_type_equality { ( $slf:ident, $r:expr, $( $l:pat ),* ) => { match $slf { $( $l => { if let $l = $r { true } else { false } } )* } } } /// Represents a line and a column position in UCG code. /// /// It is used for generating error messages mostly. Most all /// parts of the UCG AST have a positioned associated with them. #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Hash)] pub struct Position { pub file: Option, pub line: usize, pub column: usize, pub offset: usize, } impl Position { /// Construct a new Position. pub fn new(line: usize, column: usize, offset: usize) -> Self { Position { file: None, line: line, column: column, offset: offset, } } pub fn with_file>(mut self, file: P) -> Self { self.file = Some(file.into()); self } } impl<'a> From<&'a Position> for Position { fn from(source: &'a Position) -> Self { source.clone() } } impl std::fmt::Display for Position { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { if let Some(ref file) = self.file { write!(f, "file: {} ", file.to_string_lossy().to_string())?; } write!(f, "line: {} column: {}", self.line, self.column) } } /// Defines the types of tokens in UCG syntax. #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Hash)] pub enum TokenType { EMPTY, BOOLEAN, END, WS, COMMENT, QUOTED, PIPEQUOTE, DIGIT, BAREWORD, PUNCT, } /// Defines a Token representing a building block of UCG syntax. /// /// Token's are passed to the parser stage to be parsed into an AST. #[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord, Hash)] pub struct Token { pub typ: TokenType, pub fragment: String, pub pos: Position, } impl Token { /// Constructs a new Token with a type and line and column information. pub fn new, P: Into>(f: S, typ: TokenType, p: P) -> Self { Self::new_with_pos(f, typ, p.into()) } // Constructs a new Token with a type and a Position. pub fn new_with_pos>(f: S, typ: TokenType, pos: Position) -> Self { Token { typ: typ, fragment: f.into(), pos: pos, } } } impl abortable_parser::Positioned for Token { fn line(&self) -> usize { self.pos.line } fn column(&self) -> usize { self.pos.column } } impl Borrow for Token { fn borrow(&self) -> &str { &self.fragment } } /// Helper macro for making a Positioned Value. macro_rules! value_node { ($v:expr, $p:expr) => { PositionedItem::new_with_pos($v, $p) }; } /// Helper macro for making a Token. #[allow(unused_macros)] macro_rules! make_tok { (EOF => $i:expr) => { Token::new("", TokenType::END, &$i) }; (WS => $i:expr) => { Token::new("", TokenType::WS, &$i) }; (CMT => $e:expr, $i:expr) => { Token::new($e, TokenType::COMMENT, &$i) }; (QUOT => $e:expr, $i:expr) => { Token::new($e, TokenType::QUOTED, &$i) }; (PUNCT => $e:expr, $i:expr) => { Token::new($e, TokenType::PUNCT, &$i) }; (DIGIT => $e:expr, $i:expr) => { Token::new($e, TokenType::DIGIT, &$i) }; ($e:expr, $i:expr) => { Token::new($e, TokenType::BAREWORD, &$i) }; } /// Helper macro for making expressions. #[allow(unused_macros)] macro_rules! make_expr { ($e:expr, $i:expr) => { Expression::Simple(Value::Symbol(PositionedItem::new_with_pos( $e.to_string(), $i, ))) }; ($e:expr => int, $i:expr) => { Expression::Simple(Value::Int(PositionedItem::new_with_pos($e, $i))) }; } /// An ordered list of Name = Value pairs. /// /// This is usually used as the body of a tuple in the UCG AST. pub type FieldList = Vec<(Token, Expression)>; // Token is expected to be a symbol pub type ShapeTuple = Vec<(Token, Shape)>; pub type ShapeList = Vec; #[derive(PartialEq, Debug, Clone)] pub struct FuncShapeDef { args: Vec, ret: Box, } #[derive(PartialEq, Debug, Clone)] pub struct ModuleShapeDef { items: ShapeTuple, ret: Box, } macro_rules! value_enum { ($doc:meta $i:tt, $t:ty, $l:ty, $($extra:tt)*) => { #[$doc] #[derive(PartialEq, Debug, Clone)] pub enum $i { // Simple Values Empty(Position), Boolean(PositionedItem), Int(PositionedItem), Float(PositionedItem), Str(PositionedItem), Symbol(PositionedItem), // Complex Values Tuple(PositionedItem<$t>), List($l), // Extra items $( $extra )* } } } value_enum!( doc="Value types represent the Values that UCG can have." Value, FieldList, ListDef, ); value_enum!( doc="Shapes represent the types that UCG values or expressions can have." Shape, ShapeTuple, PositionedItem, Func(FuncShapeDef), Module(ModuleShapeDef), ); impl Value { /// Returns the type name of the Value it is called on as a string. pub fn type_name(&self) -> String { match self { &Value::Empty(_) => "EmptyValue".to_string(), &Value::Boolean(_) => "Boolean".to_string(), &Value::Int(_) => "Integer".to_string(), &Value::Float(_) => "Float".to_string(), &Value::Str(_) => "String".to_string(), &Value::Symbol(_) => "Symbol".to_string(), &Value::Tuple(_) => "Tuple".to_string(), &Value::List(_) => "List".to_string(), } } fn fields_to_string(v: &FieldList) -> String { let mut buf = String::new(); buf.push_str("{\n"); for ref t in v.iter() { buf.push_str("\t"); buf.push_str(&t.0.fragment); buf.push_str("\n"); } buf.push_str("}"); return buf; } fn elems_to_string(v: &Vec) -> String { return format!("{}", v.len()); } /// Returns a stringified version of the Value. pub fn to_string(&self) -> String { match self { &Value::Empty(_) => "EmptyValue".to_string(), &Value::Boolean(ref b) => format!("{}", b.val), &Value::Int(ref i) => format!("{}", i.val), &Value::Float(ref f) => format!("{}", f.val), &Value::Str(ref s) => format!("{}", s.val), &Value::Symbol(ref s) => format!("{}", s.val), &Value::Tuple(ref fs) => format!("{}", Self::fields_to_string(&fs.val)), &Value::List(ref def) => format!("[{}]", Self::elems_to_string(&def.elems)), } } /// Returns the position for a Value. pub fn pos(&self) -> &Position { match self { &Value::Empty(ref pos) => pos, &Value::Boolean(ref b) => &b.pos, &Value::Int(ref i) => &i.pos, &Value::Float(ref f) => &f.pos, &Value::Str(ref s) => &s.pos, &Value::Symbol(ref s) => &s.pos, &Value::Tuple(ref fs) => &fs.pos, &Value::List(ref def) => &def.pos, } } /// Returns true if called on a Value that is the same type as itself. pub fn type_equal(&self, target: &Self) -> bool { enum_type_equality!( self, target, &Value::Empty(_), &Value::Boolean(_), &Value::Int(_), &Value::Float(_), &Value::Str(_), &Value::Symbol(_), &Value::Tuple(_), &Value::List(_) ) } fn derive_shape(&self) -> Result { let shape = match self { Value::Empty(p) => Shape::Empty(p.clone()), Value::Boolean(p) => Shape::Boolean(p.clone()), Value::Int(p) => Shape::Int(p.clone()), Value::Float(p) => Shape::Float(p.clone()), Value::Str(p) => Shape::Str(p.clone()), // Symbols in a shape are placeholder. They allow a form of genericity // in the shape. They can be any type and are only refined down. // by their presence in an expression. Value::Symbol(p) => Shape::Symbol(p.clone()), Value::Tuple(flds) => { let mut field_shapes = Vec::new(); for &(ref tok, ref expr) in &flds.val { field_shapes.push((tok.clone(), expr.try_into()?)); } Shape::Tuple(PositionedItem::new(field_shapes, flds.pos.clone())) } Value::List(flds) => { let mut field_shapes = Vec::new(); for f in &flds.elems { field_shapes.push(f.try_into()?); } Shape::List(PositionedItem::new(field_shapes, flds.pos.clone())) } }; Ok(shape) } } impl TryFrom<&Value> for Shape { type Error = crate::error::BuildError; fn try_from(v: &Value) -> Result { v.derive_shape() } } /// Represents an expansion of a Macro that is expected to already have been /// defined. #[derive(PartialEq, Debug, Clone)] pub struct CallDef { pub funcref: Value, pub arglist: Vec, pub pos: Position, } /// The allowable types to which you can perform a primitive cast. #[derive(PartialEq, Debug, Clone)] pub enum CastType { Int, Float, Str, Bool, } impl fmt::Display for CastType { fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { write!( w, "{}", match self { CastType::Int => "int", CastType::Float => "float", CastType::Bool => "bool", CastType::Str => "str", } ) } } /// Represents a cast of a target to a primitive type. #[derive(PartialEq, Debug, Clone)] pub struct CastDef { pub cast_type: CastType, pub target: Box, pub pos: Position, } /// Encodes a select expression in the UCG AST. #[derive(PartialEq, Debug, Clone)] pub struct SelectDef { pub val: Box, pub default: Option>, pub tuple: FieldList, pub pos: Position, } /// Adds position information to any type `T`. #[derive(Debug, Clone)] pub struct PositionedItem { pub pos: Position, pub val: T, } impl std::fmt::Display for PositionedItem { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { write!(f, "{}", self.val) } } impl PositionedItem { /// Constructs a new Positioned with a value, line, and column information. pub fn new>(v: T, p: P) -> Self { Self::new_with_pos(v, p.into()) } /// Constructs a new Positioned with a value and a Position. pub fn new_with_pos(v: T, pos: Position) -> Self { PositionedItem { pos: pos, val: v } } } impl PartialEq for PositionedItem { fn eq(&self, other: &Self) -> bool { self.val == other.val } } impl Eq for PositionedItem {} impl Ord for PositionedItem { fn cmp(&self, other: &Self) -> Ordering { self.val.cmp(&other.val) } } impl PartialOrd for PositionedItem { fn partial_cmp(&self, other: &Self) -> Option { self.val.partial_cmp(&other.val) } } impl Hash for PositionedItem { fn hash(&self, state: &mut H) { self.val.hash(state); } } impl<'a> From<&'a Token> for PositionedItem { fn from(t: &'a Token) -> PositionedItem { PositionedItem { pos: t.pos.clone(), val: t.fragment.to_string(), } } } impl<'a> From<&'a PositionedItem> for PositionedItem { fn from(t: &PositionedItem) -> PositionedItem { PositionedItem { pos: t.pos.clone(), val: t.val.clone(), } } } /// Encodes a func expression in the UCG AST.. /// /// A func is a pure function over a tuple. #[derive(PartialEq, Debug, Clone)] pub struct FuncDef { pub scope: Option, pub argdefs: Vec>, pub fields: Box, pub pos: Position, } /// Specifies the types of binary operations supported in /// UCG expression. #[derive(Debug, PartialEq, Clone)] pub enum BinaryExprType { // Math Add, Sub, Mul, Div, Mod, // Boolean AND, OR, // Comparison Equal, GT, LT, NotEqual, GTEqual, LTEqual, REMatch, NotREMatch, IN, IS, // Selector operator DOT, } impl BinaryExprType { /// Returns the precedence level for the binary operator. /// /// Higher values bind tighter than lower values. pub fn precedence_level(&self) -> u32 { match self { // Equality operators are least tightly bound BinaryExprType::Equal => 1, BinaryExprType::NotEqual => 1, BinaryExprType::GTEqual => 1, BinaryExprType::LTEqual => 1, BinaryExprType::GT => 1, BinaryExprType::LT => 1, BinaryExprType::REMatch => 1, BinaryExprType::NotREMatch => 1, BinaryExprType::IN => 2, BinaryExprType::IS => 2, // Sum operators are next least tightly bound BinaryExprType::Add => 3, BinaryExprType::Sub => 3, // Product operators are next tightly bound BinaryExprType::Mul => 4, BinaryExprType::Div => 4, BinaryExprType::Mod => 4, // Boolean operators bind tighter than math BinaryExprType::AND => 5, BinaryExprType::OR => 5, // Dot operators are most tightly bound. BinaryExprType::DOT => 6, } } } /// Represents an expression with a left and a right side. #[derive(Debug, PartialEq, Clone)] pub struct BinaryOpDef { pub kind: BinaryExprType, pub left: Box, pub right: Box, pub pos: Position, } /// Encodes a tuple Copy expression in the UCG AST. #[derive(Debug, PartialEq, Clone)] pub struct CopyDef { pub selector: Value, pub fields: FieldList, pub pos: Position, } /// Encodes one of two possible forms for format expression arguments. #[derive(Debug, PartialEq, Clone)] pub enum FormatArgs { List(Vec), Single(Box), } /// Encodes a format expression in the UCG AST. #[derive(Debug, PartialEq, Clone)] pub struct FormatDef { pub template: String, pub args: FormatArgs, pub pos: Position, } /// Encodes an import statement in the UCG AST. #[derive(Debug, PartialEq, Clone)] pub struct IncludeDef { pub pos: Position, pub path: Token, pub typ: Token, } /// Encodes a list expression in the UCG AST. #[derive(Debug, PartialEq, Clone)] pub struct ListDef { pub elems: Vec, pub pos: Position, } #[derive(Debug, PartialEq, Clone)] pub enum FuncOpDef { Reduce(ReduceOpDef), Map(MapFilterOpDef), Filter(MapFilterOpDef), } #[derive(Debug, PartialEq, Clone)] pub struct ReduceOpDef { pub func: Box, pub acc: Box, pub target: Box, pub pos: Position, } /// MapFilterOpDef implements the list operations in the UCG AST. #[derive(Debug, PartialEq, Clone)] pub struct MapFilterOpDef { pub func: Box, pub target: Box, pub pos: Position, } impl FuncOpDef { pub fn pos(&self) -> &Position { match self { FuncOpDef::Map(def) => &def.pos, FuncOpDef::Filter(def) => &def.pos, FuncOpDef::Reduce(def) => &def.pos, } } } #[derive(Debug, PartialEq, Clone)] pub struct ModuleDef { pub scope: Option, pub pos: Position, pub arg_set: FieldList, pub out_expr: Option>, pub arg_tuple: Option>, pub statements: Vec, } impl ModuleDef { pub fn new>(arg_set: FieldList, stmts: Vec, pos: P) -> Self { ModuleDef { scope: None, pos: pos.into(), arg_set: arg_set, out_expr: None, arg_tuple: None, statements: stmts, } } pub fn set_out_expr(&mut self, expr: Expression) { self.out_expr = Some(Box::new(expr)); } } pub struct Rewriter { base: PathBuf, } impl Rewriter { pub fn new>(base: P) -> Self { Self { base: base.into() } } } impl walk::Walker for Rewriter { fn visit_expression(&mut self, expr: &mut Expression) { // Rewrite all paths except for stdlib paths to absolute. let main_separator = format!("{}", std::path::MAIN_SEPARATOR); if let Expression::Include(ref mut def) = expr { let path = PathBuf::from(&def.path.fragment); #[cfg(not(windows))] { if path.is_relative() { def.path.fragment = self .base .join(path) .canonicalize() .unwrap() .to_string_lossy() .to_string(); } } #[cfg(windows)] { if path.is_relative() { def.path.fragment = self.base.join(path).to_string_lossy().to_string(); } } } if let Expression::Import(ref mut def) = expr { let path = PathBuf::from( &def.path .fragment .replace("/", &main_separator) .replace("\\", &main_separator), ); // std/ paths are special and do not get made into absolute paths. if path.starts_with(format!("std{}", main_separator)) { return; } #[cfg(not(windows))] { if path.is_relative() { def.path.fragment = self .base .join(path) .canonicalize() .unwrap() .to_string_lossy() .to_string(); } } #[cfg(windows)] { if path.is_relative() { def.path.fragment = self.base.join(path).to_string_lossy().to_string(); } } } } } /// RangeDef defines a range with optional step. #[derive(Debug, PartialEq, Clone)] pub struct RangeDef { pub pos: Position, pub start: Box, pub step: Option>, pub end: Box, } /// Encodes an import expression in the UCG AST. #[derive(Debug, PartialEq, Clone)] pub struct ImportDef { pub pos: Position, pub path: Token, } #[derive(Debug, PartialEq, Clone)] pub struct IsDef { pub pos: Position, pub target: Box, pub typ: Token, } #[derive(Debug, PartialEq, Clone)] pub struct FailDef { pub pos: Position, pub message: Box, } #[derive(Debug, PartialEq, Clone)] pub struct NotDef { pub pos: Position, pub expr: Box, } #[derive(Debug, PartialEq, Clone)] pub struct DebugDef { pub pos: Position, pub expr: Box, } /// Encodes a ucg expression. Expressions compute a value from. #[derive(Debug, PartialEq, Clone)] pub enum Expression { // Base Expression Simple(Value), Not(NotDef), // Binary expressions Binary(BinaryOpDef), // Complex Expressions Copy(CopyDef), Range(RangeDef), Grouped(Box, Position), Format(FormatDef), Include(IncludeDef), Import(ImportDef), Call(CallDef), Cast(CastDef), Func(FuncDef), Select(SelectDef), FuncOp(FuncOpDef), Module(ModuleDef), // Declarative failure expressions Fail(FailDef), // Debugging assistance Debug(DebugDef), } impl Expression { /// Returns the position of the Expression. pub fn pos(&self) -> &Position { match self { &Expression::Simple(ref v) => v.pos(), &Expression::Binary(ref def) => &def.pos, &Expression::Copy(ref def) => &def.pos, &Expression::Range(ref def) => &def.pos, &Expression::Grouped(_, ref pos) => pos, &Expression::Format(ref def) => &def.pos, &Expression::Call(ref def) => &def.pos, &Expression::Cast(ref def) => &def.pos, &Expression::Func(ref def) => &def.pos, &Expression::Module(ref def) => &def.pos, &Expression::Select(ref def) => &def.pos, &Expression::FuncOp(ref def) => def.pos(), &Expression::Include(ref def) => &def.pos, &Expression::Import(ref def) => &def.pos, &Expression::Fail(ref def) => &def.pos, &Expression::Not(ref def) => &def.pos, &Expression::Debug(ref def) => &def.pos, } } fn derive_shape(&self) -> Result { // FIXME(jwall): Implement this let shape = match self { Expression::Simple(ref v) => v.try_into()?, Expression::Format(def) => { Shape::Str(PositionedItem::new("".to_owned(), def.pos.clone())) } Expression::Not(def) => Shape::Boolean(PositionedItem::new(true, def.pos.clone())), Expression::Grouped(v, _pos) => v.as_ref().try_into()?, _ => Shape::Empty(Position::new(0, 0, 0)), }; Ok(shape) } } impl TryFrom<&Expression> for Shape { type Error = crate::error::BuildError; fn try_from(e: &Expression) -> Result { e.derive_shape() } } impl fmt::Display for Expression { fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { match self { &Expression::Simple(ref v) => { write!(w, "{}", v.to_string())?; } &Expression::Binary(_) => { write!(w, "")?; } &Expression::FuncOp(_) => { write!(w, "")?; } &Expression::Copy(_) => { write!(w, "")?; } &Expression::Range(_) => { write!(w, "")?; } &Expression::Grouped(_, _) => { write!(w, "()")?; } &Expression::Format(_) => { write!(w, "")?; } &Expression::Call(_) => { write!(w, "")?; } &Expression::Cast(_) => { write!(w, "")?; } &Expression::Func(_) => { write!(w, "")?; } &Expression::Module(_) => { write!(w, "")?; } &Expression::Select(_) => { write!(w, "