Parsing the map operations.

2025-07-22 18:19:54 -04:00 · 2018-03-15 19:08:33 -05:00 · 2018-03-15 19:08:33 -05:00 · eeac1ba599
commit eeac1ba599
parent 51edc6d15c
7 changed files with 245 additions and 21 deletions
--- a/TODO.md
+++ b/TODO.md
@ -1,18 +1,11 @@
 # Major Planned Features

-## Boolean operations
+## Boolean operations and type

 * equality (for everything)
 * contains (for lists or strings)
 * less than or greater than (for numeric types)

-## List processing
-
-* Map over lists
-* Filtering lists
-* Flags could automatically expand a list of values into a list of flags.
-* Joining a lists elements. (i.e. folds)
-
 ## Query Language (Experimental)

 You should be able to ask the compiler to tell you any value or set of values in the
--- a/examples/test.ucg
+++ b/examples/test.ucg
@ -15,7 +15,13 @@ let db_conn1 = mk_db_conn(dbhost1, 3306, dbname);
 let db_conn2 = mk_db_conn(dbhost2, 3306, dbname);

 // We have two database connections in a list
-let db_conns = [db_conn1.conn_string, db_conn2.conn_string];
+let db_conn_list = [db_conn1, db_conn2];
+
+let connstr_mapper = macro (item) => {
+    str = item.conn_string
+};
+
+let db_conns = map connstr_mapper.str [db_conn1, db_conn2];

 // Our server configuration.
 let server_config = {
--- a/src/ast.rs
+++ b/src/ast.rs
@ -599,6 +599,10 @@ impl MacroDef {
                        // noop
                        continue;
                    }
+                    &Expression::ListOp(_) => {
+                        // noop
+                        continue;
+                    }
                }
            }
        }
@ -651,6 +655,21 @@ pub struct ListDef {
    pub pos: Position,
 }

+#[derive(Debug, PartialEq, Clone)]
+pub enum ListOpType {
+    Map,
+    Filter,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct ListOpDef {
+    pub typ: ListOpType,
+    pub mac: SelectorDef,
+    pub field: String,
+    pub target: ListDef,
+    pub pos: Position,
+}
+
 /// Encodes a ucg expression. Expressions compute a value from.
 #[derive(Debug, PartialEq, Clone)]
 pub enum Expression {
@ -667,6 +686,7 @@ pub enum Expression {
    Call(CallDef),
    Macro(MacroDef),
    Select(SelectDef),
+    ListOp(ListOpDef),
 }

 impl Expression {
@ -681,6 +701,7 @@ impl Expression {
            &Expression::Call(ref def) => &def.pos,
            &Expression::Macro(ref def) => &def.pos,
            &Expression::Select(ref def) => &def.pos,
+            &Expression::ListOp(ref def) => &def.pos,
        }
    }
 }
--- a/src/build.rs
+++ b/src/build.rs
@ -493,12 +493,16 @@ impl Builder {
            &Val::List(_) => {
                stack.push_back(first.clone());
            }
-            _ => {
+            val => {
+                eprintln!("Not a tuple or list! {:?}", val)
                // noop
            }
        }

        if let &Some(ref tail) = &sl.tail {
+            if tail.len() == 0 {
+                return Ok(first);
+            }
            let mut it = tail.iter().peekable();
            loop {
                let vref = stack.pop_front().unwrap();
@ -839,6 +843,39 @@ impl Builder {
        }
    }

+    // FIXME(jwall): We still need to write unit tests for these.
+    fn eval_list_op(&self, def: &ListOpDef) -> Result<Rc<Val>, Box<Error>> {
+        let l = &def.target.elems;
+        let mac = &def.mac;
+        if let &Val::Macro(ref macdef) = try!(self.lookup_selector(&mac.sel)).as_ref() {
+            let mut out = Vec::new();
+            for expr in l.iter() {
+                let argvals = vec![try!(self.eval_expr(expr))];
+                let fields = try!(macdef.eval(argvals));
+                if let Some(v) = Self::find_in_fieldlist(&def.field, &fields) {
+                    match def.typ {
+                        ListOpType::Map => {
+                            out.push(v.clone());
+                        }
+                        ListOpType::Filter => {
+                            if let &Val::Empty = v.as_ref() {
+                                // noop
+                                continue;
+                            }
+                            out.push(v.clone());
+                        }
+                    }
+                }
+            }
+            return Ok(Rc::new(Val::List(out)));
+        }
+        return Err(Box::new(error::Error::new(
+            format!("Expected macro but got {:?}", mac),
+            error::ErrorType::TypeFail,
+            def.pos.clone(),
+        )));
+    }
+
    // Evals a single Expression in the context of a running Builder.
    // It does not mutate the builders collected state at all.
    pub fn eval_expr(&self, expr: &Expression) -> Result<Rc<Val>, Box<Error>> {
@ -853,6 +890,7 @@ impl Builder {
            &Expression::Call(ref def) => self.eval_call(def),
            &Expression::Macro(ref def) => self.eval_macro_def(def),
            &Expression::Select(ref def) => self.eval_select(def),
+            &Expression::ListOp(ref def) => self.eval_list_op(def),
        }
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -54,19 +54,25 @@
 //! * select
 //! * macro
 //! * env
+//! * map
+//! * filter
 //! * NULL
 //!
 //! ### Primitive types
 //!
 //! ucg has a relatively simple syntax with 3 primitive types, Int, Float, and String.
 //!
-//! * An Int is any integer number.
+//! #### Int
+//!
+//! An Int is any integer number.
 //!
 //! ```ucg
 //! 1; // a single Integer
 //! ```
 //!
-//! * A Float is any number with a decimal point.
+//! #### Float
+//!
+//! A Float is any number with a decimal point.
 //!
 //! ```ucg
 //! 1.0; // A typical float.
@ -74,7 +80,9 @@
 //! .1 // the leading 0 is also optional.
 //! ```
 //!
-//! * A String is any quoted text. Backslashes within a string escape the next preceding
+//! #### String
+//!
+//! A String is any quoted text. Backslashes within a string escape the next preceding
 //! character.
 //!
 //! ``` ucg
@ -82,7 +90,9 @@
 //! "I'm a \"fine\" looking string"; // escaped quotes in a string.
 //! ```
 //!
-//! * A NULL is an empty type. It represents no value.
+//! #### NULL or the Empty type.
+//!
+//!  A NULL is an empty type. It represents no value.
 //!
 //! ```ucg
 //! let empty = NULL;
@ -92,7 +102,9 @@
 //!
 //! ucg has two complex data types, Lists and Tuples.
 //!
-//! * Lists are surrounded with square brackets `[ ]` and have comma separated elements.
+//! #### Lists
+//!
+//! Lists are surrounded with square brackets `[ ]` and have comma separated elements.
 //!
 //! ```ucg
 //! [1, 2, 3]; // A simple list of numbers.
@ -108,7 +120,47 @@
 //! let zero = mylist.0;
 //! ```
 //!
-//! * Tuple's are an ordered collection of name, value pairs. They are bounded by curly braces `{ }`
+//! ##### List macros
+//!
+//! ucg supports a couple of ways to use macros for mapping or filtering a list to a new list.
+//!
+//! A map expression starts with the map keyword followed by the name of a macro with exactly
+//! one argument, a `.`, and the name of the output field for the macro. ucg will apply the macro
+//! to each element of the list and then take the output field from the resulting tuple and add append
+//! it to the resulting list. If the output field does not exist in the macro it will be a compile
+//! error.
+//!
+//! ```ucg
+//! let list = [1, 2, 3, 4];
+//! let mapper = macro(item) => { result = item + 1 };
+//!
+//! // results in: [2, 3, 4, 5]
+//! let mapped = map mapper.result list;
+//! ```
+//!
+//
+//! A filter expression starts with the filter keyword followed by the name of a macro with exactly
+//! one argument, a `.`, and the name of the output field for the macro. The filter will apply the
+//! macro to each element of the list and if the output field is a Value that is not NULL then the
+//! list element is appended to the output list. If the output field returns a NULL Value then the
+//! element is not appended to the output list. If the output field does not exist in the macro it
+//! will be a compile error.
+//!
+//! ```ucg
+//! let list = ["foo", "bar", "foo", "bar"];
+//! let filtrator = macro(item) => {
+//!   ok = select item NULL {
+//!     foo = 1
+//!   }
+//! };
+//!
+//! // results in: ["foo", "foo"]
+//! let filtered = filter filtrator.ok list;
+//! ```
+//!
+//! #### Tuple
+//!
+//! Tuple's are an ordered collection of name, value pairs. They are bounded by curly braces `{ }`
 //! and contain name = value pairs separated by commas. Trailing commas are permitted. The name must
 //! be a bareword without quotes.
 //!
@ -223,10 +275,10 @@
 //!
 //! Macros look like functions but they are resolved at compile time and configurations don't execute so they never appear in output.
 //! They are useful for constructing tuples of a certain shape or otherwise promoting data reuse. You define a macro with the `macro`
-//! keyword followed by the arguments in parentheses and then a tuple.
+//! keyword followed by the arguments in parentheses, a `=>`, and then a tuple.
 //!
 //! ```ucg
-//! let myfunc = macro (arg1, arg2) {
+//! let myfunc = macro (arg1, arg2) => {
 //!     host = arg1,
 //!     port = arg2,
 //!     connstr = "couchdb://@:@" % (arg1, arg2),
@ -278,6 +330,10 @@
 //!
 //! let mysqlconf = dbconfigs.mysql;
 //! ```
+
+// The following is necessary to allow the macros in tokenizer and parse modules
+// to succeed.
+#![recursion_limit = "128"]
 #[macro_use]
 extern crate nom;
 #[macro_use]
--- a/src/parse.rs
+++ b/src/parse.rs
@ -404,7 +404,7 @@ named!(macro_expression<TokenIter, Expression, ParseError>,
       map_res!(
           do_parse!(
                pos: pos >>
-                start: word!("macro") >>
+                word!("macro") >>
                punct!("(") >>
                arglist: arglist >>
                punct!(")") >>
@ -512,6 +512,105 @@ named!(call_expression<TokenIter, Expression, ParseError>,
       )
 );

+fn symbol_or_list(input: TokenIter) -> NomResult<Value> {
+    let sym = do_parse!(input, sym: symbol >> (sym));
+
+    match sym {
+        IResult::Incomplete(i) => {
+            return IResult::Incomplete(i);
+        }
+        IResult::Error(_) => {
+            // TODO(jwall): Still missing some. But we need to avoid recursion
+            match list_value(input) {
+                IResult::Incomplete(i) => {
+                    return IResult::Incomplete(i);
+                }
+                IResult::Error(e) => {
+                    return IResult::Error(e);
+                }
+                IResult::Done(i, val) => {
+                    return IResult::Done(i, val);
+                }
+            }
+        }
+        IResult::Done(rest, val) => {
+            return IResult::Done(rest, val);
+        }
+    }
+}
+
+fn tuple_to_list_op(tpl: (Position, Token, Value, Value)) -> ParseResult<Expression> {
+    let pos = tpl.0;
+    let t = if &tpl.1.fragment == "map" {
+        ListOpType::Map
+    } else if &tpl.1.fragment == "filter" {
+        ListOpType::Filter
+    } else {
+        return Err(ParseError {
+            description: format!(
+                "Expected one of 'map' or 'filter' but got '{}'",
+                tpl.1.fragment
+            ),
+            pos: pos,
+        });
+    };
+    let macroname = tpl.2;
+    let list = tpl.3;
+    if let Value::Selector(mut def) = macroname {
+        // First of all we need to assert that this is a selector of at least
+        // two sections.
+        let fieldname: String = match &mut def.sel.tail {
+            &mut None => {
+                return Err(ParseError {
+                    description: format!("Missing a result field for the macro"),
+                    pos: pos,
+                });
+            }
+            &mut Some(ref mut tl) => {
+                if tl.len() < 1 {
+                    return Err(ParseError {
+                        description: format!("Missing a result field for the macro"),
+                        pos: def.pos.clone(),
+                    });
+                }
+                let fname = tl.pop();
+                fname.unwrap().fragment
+            }
+        };
+        if let Value::List(ldef) = list {
+            return Ok(Expression::ListOp(ListOpDef {
+                typ: t,
+                mac: def,
+                field: fieldname,
+                target: ldef,
+                pos: pos,
+            }));
+        }
+        // TODO(jwall): We should print a pretter message than debug formatting here.
+        return Err(ParseError {
+            pos: pos,
+            description: format!("Expected a list but got {:?}", list),
+        });
+    }
+    return Err(ParseError {
+        pos: pos,
+        description: format!("Expected a macro but got {:?}", macroname),
+    });
+}
+
+named!(list_op_expression<TokenIter, Expression, ParseError>,
+    map_res!(
+        do_parse!(
+            pos: pos >>
+            optype: alt!(word!("map") | word!("filter")) >>
+            macroname: selector_value >>
+            list: symbol_or_list >>
+            (pos, optype, macroname, list)
+        ),
+        tuple_to_list_op
+    )
+);
+
 // NOTE(jwall): HERE THERE BE DRAGONS. The order for these matters
 // alot. We need to process alternatives in order of decreasing
 // specificity.  Unfortunately this means we are required to go in a
@ -525,6 +624,7 @@ named!(call_expression<TokenIter, Expression, ParseError>,
 named!(expression<TokenIter, Expression, ParseError>,
    do_parse!(
        expr: alt!(
+           complete!(list_op_expression) |
           complete!(add_expression) |
           complete!(sub_expression) |
           complete!(mul_expression) |
@ -625,7 +725,7 @@ pub fn parse(input: LocatedSpan<&str>) -> Result<Vec<Statement>, ParseError> {
                    IResult::Error(e) => {
                        return Err(ParseError {
                            description: format!(
-                                "Tokenization error: {:?} current token: {:?}",
+                                "Statement Parse error: {:?} current token: {:?}",
                                e, i_[0]
                            ),
                            pos: Position {
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@ -205,6 +205,14 @@ named!(astok( Span ) -> Token,
       do_tag_tok!(TokenType::BAREWORD, "as")
 );

+named!(maptok( Span ) -> Token,
+       do_tag_tok!(TokenType::BAREWORD, "map")
+);
+
+named!(filtertok( Span ) -> Token,
+       do_tag_tok!(TokenType::BAREWORD, "filter")
+);
+
 fn end_of_input(input: Span) -> nom::IResult<Span, Token> {
    match eof!(input,) {
        nom::IResult::Done(_, _) => {
@ -273,7 +281,6 @@ named!(token( Span ) -> Token,
    alt!(
        strtok |
        emptytok | // This must come before the barewordtok
-        barewordtok |
        digittok |
        commatok |
        rbracetok |
@ -297,6 +304,9 @@ named!(token( Span ) -> Token,
        macrotok |
        importtok |
        astok |
+        maptok |
+        filtertok |
+        barewordtok |
        whitespace |
        end_of_input)
 );