diff --git a/Cargo.lock b/Cargo.lock index b8fd55e..7630e1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -499,10 +499,16 @@ dependencies = [ "serde_yaml 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "simple-error 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)", "xml-rs 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-segmentation" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unicode-width" version = "0.1.5" @@ -652,6 +658,7 @@ dependencies = [ "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum toml 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "4a2ecc31b0351ea18b3fe11274b8db6e4d82bce861bbb22e6dbed40417902c65" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" +"checksum unicode-segmentation 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aa6024fc12ddfd1c6dbc14a80fa2324d4568849869b779f6bd37e5e4c03344d1" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" diff --git a/Cargo.toml b/Cargo.toml index 4ec4510..3fd9aac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ xml-rs = "0.8.0" base64 = "0.10.0" regex = "1" dirs = "1.0.4" +unicode-segmentation = "1.2.1" [build-dependencies] walkdir = "2.2.7" diff --git a/TODO.md b/TODO.md index f935e8f..73e0583 100644 --- a/TODO.md +++ b/TODO.md @@ -2,18 +2,6 @@ ## Compile Errors as expression -## String handling - -Mostly handled by ranges and indexing for strings? - -Type comparisons? - -``` -let foo = "foo"; -foo is str; // evaluates to true -foo is int; // evaluates to false -``` - ## Query Language (Experimental) You should be able to ask the compiler to tell you any value or set of values in the diff --git a/docsite/site/content/reference/expressions.md b/docsite/site/content/reference/expressions.md index 47dbd81..cb58a73 100644 --- a/docsite/site/content/reference/expressions.md +++ b/docsite/site/content/reference/expressions.md @@ -299,7 +299,7 @@ Functional processing expressions --------------------------------- UCG has a few functional processing expressions called `map`, `filter`, and -`reduce`. All of them can process a list or tuple. +`reduce`. All of them can process a string, list, or tuple. Their syntax starts with either `map` `filter`, or `reduce followed by a symbol that references a valid macro and finally an expression that resolves to either diff --git a/integration_tests/functional_processing_test.ucg b/integration_tests/functional_processing_test.ucg index 727cc84..01970e8 100644 --- a/integration_tests/functional_processing_test.ucg +++ b/integration_tests/functional_processing_test.ucg @@ -46,6 +46,13 @@ assert { desc = "map mapper [1, 2, 3, 4] == [2, 3, 4, 5]", }; +let s_mapper = macro(arg) => arg + ","; + +assert { + ok = map s_mapper "foo" == "f,o,o,", + desc = "we can map over each character", +}; + assert { ok = filter filtrator list2 == ["foo", "foo"], desc = "filter filtrator list2 == [\"foo\", \"foo\"]", @@ -102,6 +109,13 @@ assert { desc = "filter tpl_filter test_tpl == { quux = \"baz\" }", }; +let o_str_filter = macro(s) => s != "o"; + +assert { + ok = filter o_str_filter "foobar" == "fbar", + desc = "We can strip out characters", +}; + let tpl_reducer = macro(acc, name, val) => acc{ keys = self.keys + [name], vals = self.vals + [val], @@ -110,4 +124,18 @@ let tpl_reducer = macro(acc, name, val) => acc{ assert { ok = reduce tpl_reducer {keys = [], vals = []}, test_tpl == {keys = ["foo", "quux"], vals = ["bar", "baz"]}, desc = "reduce tpl_reducer {keys = [], vals = []}, test_tpl == {keys = [\"foo\", \"quux\"], vals = [\"bar\", \"baz\"]}", +}; + +let str_identity_reducer = macro(acc, s) => acc + s; + +assert { + ok = reduce str_identity_reducer "", "foo" == "foo", + desc = "identity reducer copies string", +}; + +let char_iter = macro(acc, s) => acc + [s]; + +assert { + ok = reduce char_iter [], "foo" == ["f", "o", "o"], + desc = "we can split a string into grapheme clusters", }; \ No newline at end of file diff --git a/src/build/mod.rs b/src/build/mod.rs index 218124e..425b248 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -27,6 +27,7 @@ use std::string::ToString; use regex; use simple_error; +use unicode_segmentation::UnicodeSegmentation; use crate::ast::*; use crate::build::scope::{find_in_fieldlist, Scope, ValueMap}; @@ -1375,10 +1376,17 @@ impl<'a> FileBuilder<'a> { acc = result; } } + &Val::Str(ref s) => { + for gc in s.graphemes(true) { + let argvals = vec![acc.clone(), Rc::new(Val::Str(gc.to_string()))]; + let result = macdef.eval(self.file.clone(), self, argvals)?; + acc = result; + } + } other => { return Err(Box::new(error::BuildError::new( format!( - "Expected List or Tuple as target but got {:?}", + "Expected List Str, or Tuple as target but got {:?}", other.type_name() ), error::ErrorType::TypeFail, @@ -1389,6 +1397,56 @@ impl<'a> FileBuilder<'a> { Ok(acc) } + fn eval_functional_string_processing( + &self, + s: &str, + def: &MacroDef, + typ: ProcessingOpType, + ) -> Result, Box> { + let mut result = String::new(); + for gc in s.graphemes(true) { + let arg = Rc::new(Val::Str(gc.to_string())); + let out = def.eval(self.file.clone(), self, vec![arg])?; + match typ { + ProcessingOpType::Filter => { + match out.as_ref() { + Val::Boolean(b) => { + if *b { + result.push_str(gc); + } + } + Val::Empty => { + // noop + } + _ => { + return Err(Box::new(error::BuildError::new( + format!( + "Expected boolean or NULL for filter return but got {}", + out.type_name() + ), + error::ErrorType::TypeFail, + def.pos.clone(), + ))); + } + } + } + ProcessingOpType::Map => match out.as_ref() { + Val::Str(s) => { + result.push_str(&s); + } + _ => { + return Err(Box::new(error::BuildError::new( + format!("Expected string map return but got {}", out.type_name()), + error::ErrorType::TypeFail, + def.pos.clone(), + ))); + } + }, + } + } + Ok(Rc::new(Val::Str(result))) + } + fn eval_functional_processing( &self, def: &MapFilterOpDef, @@ -1410,6 +1468,8 @@ impl<'a> FileBuilder<'a> { return match maybe_target.as_ref() { &Val::List(ref elems) => self.eval_functional_list_processing(elems, macdef, typ), &Val::Tuple(ref fs) => self.eval_functional_tuple_processing(fs, macdef, typ), + // TODO(jwall): Strings? + &Val::Str(ref s) => self.eval_functional_string_processing(s, macdef, typ), other => Err(Box::new(error::BuildError::new( format!( "Expected List or Tuple as target but got {:?}", diff --git a/src/build/scope.rs b/src/build/scope.rs index 1af30eb..76cf31c 100644 --- a/src/build/scope.rs +++ b/src/build/scope.rs @@ -187,7 +187,7 @@ impl Scope { format!("Invalid idx type {} for list lookup", field), error::ErrorType::TypeFail, pos.clone(), - ))) + ))); } }; if idx < elems.len() { diff --git a/src/convert/exec.rs b/src/convert/exec.rs index b251713..b372fc2 100644 --- a/src/convert/exec.rs +++ b/src/convert/exec.rs @@ -158,7 +158,7 @@ impl ExecConverter { "Exec args must be a list of strings or tuples of strings.", ErrorType::TypeFail, Position::new(0, 0, 0), - ))) + ))); } } } diff --git a/src/lib.rs b/src/lib.rs index 7859c7f..d079efb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,7 @@ extern crate serde_json; extern crate serde_yaml; extern crate simple_error; extern crate toml; +extern crate unicode_segmentation; extern crate xml; #[macro_use] diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 77301aa..3efe5a0 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -31,7 +31,7 @@ fn is_symbol_char<'a>(i: OffsetStrIter<'a>) -> Result, u8> { return Result::Fail(Error::new( "Unexpected End of Input".to_string(), Box::new(_i.clone()), - )) + )); } }; if (c as char).is_ascii_alphanumeric() || c == b'-' || c == b'_' {