From 83a641e740ce2026ee7b4271c1325b46ed69416d Mon Sep 17 00:00:00 2001 From: Jeremy Wall Date: Mon, 7 Jan 2019 19:27:29 -0600 Subject: [PATCH] FEATURE: Add regular expression comparison operators. --- Cargo.lock | 76 ++++++++++++++++++++++++++ Cargo.toml | 1 + integration_tests/comparisons_test.ucg | 8 +++ src/ast/mod.rs | 4 ++ src/build/mod.rs | 40 ++++++++++++++ src/lib.rs | 1 + src/parse/precedence.rs | 4 ++ src/tokenizer/mod.rs | 44 +++++++++------ 8 files changed, 161 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9be241e..820a56d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,14 @@ name = "abortable_parser" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "aho-corasick" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "ansi_term" version = "0.9.0" @@ -139,6 +147,11 @@ name = "lazy_static" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "lazy_static" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "libc" version = "0.2.43" @@ -149,6 +162,16 @@ name = "linked-hash-map" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "memchr" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "redox_syscall" version = "0.1.40" @@ -162,6 +185,26 @@ dependencies = [ "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "regex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rustc-demangle" version = "0.1.9" @@ -237,6 +280,14 @@ dependencies = [ "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "thread_local" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "toml" version = "0.4.8" @@ -245,6 +296,11 @@ dependencies = [ "serde 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ucd-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ucg" version = "0.2.10" @@ -254,6 +310,7 @@ dependencies = [ "bencher 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.26.2 (registry+https://github.com/rust-lang/crates.io-index)", "cpuprofiler 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)", "serde_yaml 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "simple-error 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", @@ -266,11 +323,21 @@ name = "unicode-width" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "utf8-ranges" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "winapi" version = "0.2.8" @@ -315,6 +382,7 @@ dependencies = [ [metadata] "checksum abortable_parser 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "685d99bbca3566d6b7f34b09d68039089ce4a36226f6f99f61ed8495850e3213" +"checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e" "checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" "checksum backtrace 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "346d7644f0b5f9bc73082d3b2236b69a05fd35cce0cfa3724e184e6a5c9e2a2f" @@ -333,10 +401,14 @@ dependencies = [ "checksum itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5adb58558dcd1d786b5f0bd15f3226ee23486e24b7b58304b60f64dc68e62606" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" +"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1" "checksum libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)" = "76e3a3ef172f1a0b9a9ff0dd1491ae5e6c948b94479a3021819ba7d860c8645d" "checksum linked-hash-map 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "70fb39025bc7cdd76305867c4eccf2f2dcf6e9a57f5b21a93e1c2d86cd03ec9e" +"checksum memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "db4c41318937f6e76648f42826b1d9ade5c09cafb5aef7e351240a70f39206e9" "checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" +"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" "checksum ryu 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e7c066b8e2923f05d4718a06d2622f189ff362bc642bfade6c6629f0440f3827" "checksum serde 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)" = "22d340507cea0b7e6632900a176101fea959c7065d93ba555072da90aaaafc87" @@ -347,9 +419,13 @@ dependencies = [ "checksum term_size 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9e5b9a66db815dcfd2da92db471106457082577c3c278d4138ab3e3b4e189327" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df8e08afc40ae3459e4838f303e465aa50d823df8d7f83ca88108f6d3afe7edd" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum toml 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "4a2ecc31b0351ea18b3fe11274b8db6e4d82bce861bbb22e6dbed40417902c65" +"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" +"checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" +"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/Cargo.toml b/Cargo.toml index 69407e9..4ae8540 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ serde_yaml = "~0.8.1" toml = "~0.4.8" xml-rs = "0.8.0" base64 = "0.10.0" +regex = "1" [dev-dependencies] bencher = "~0.1.5" diff --git a/integration_tests/comparisons_test.ucg b/integration_tests/comparisons_test.ucg index daa277d..c047220 100644 --- a/integration_tests/comparisons_test.ucg +++ b/integration_tests/comparisons_test.ucg @@ -111,4 +111,12 @@ assert | |; assert | true in [ "foo" in {foo = 1}, false ]; +|; + +assert | + "foo" ~ "o+"; +|; + +assert | + "foo" !~ "bar"; |; \ No newline at end of file diff --git a/src/ast/mod.rs b/src/ast/mod.rs index aa23679..1fc1ec6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -485,6 +485,8 @@ pub enum BinaryExprType { NotEqual, GTEqual, LTEqual, + REMatch, + NotREMatch, IN, // Selector operator DOT, @@ -503,6 +505,8 @@ impl BinaryExprType { BinaryExprType::LTEqual => 1, BinaryExprType::GT => 1, BinaryExprType::LT => 1, + BinaryExprType::REMatch => 1, + BinaryExprType::NotREMatch => 1, BinaryExprType::IN => 1, // Sum operators are next least tightly bound BinaryExprType::Add => 2, diff --git a/src/build/mod.rs b/src/build/mod.rs index c39d8d9..32f97d4 100644 --- a/src/build/mod.rs +++ b/src/build/mod.rs @@ -25,6 +25,7 @@ use std::path::PathBuf; use std::rc::Rc; use std::string::ToString; +use regex; use simple_error; use crate::ast::*; @@ -818,6 +819,39 @@ impl<'a> FileBuilder<'a> { } } + fn eval_re_match( + &self, + left: Rc, + left_pos: &Position, + right: Rc, + right_pos: &Position, + negate: bool, + ) -> Result, Box> { + let re = if let Val::Str(ref s) = right.as_ref() { + regex::Regex::new(s.as_ref())? + } else { + return Err(Box::new(error::BuildError::new( + format!("Expected string for regex but got {}", right.type_name()), + error::ErrorType::TypeFail, + right_pos.clone(), + ))); + }; + let tgt = if let Val::Str(ref s) = left.as_ref() { + s.as_ref() + } else { + return Err(Box::new(error::BuildError::new( + format!("Expected string but got {}", left.type_name()), + error::ErrorType::TypeFail, + left_pos.clone(), + ))); + }; + return if negate { + Ok(Rc::new(Val::Boolean(!re.is_match(tgt)))) + } else { + Ok(Rc::new(Val::Boolean(re.is_match(tgt)))) + }; + } + fn eval_binary(&self, def: &BinaryOpDef, scope: &Scope) -> Result, Box> { let kind = &def.kind; if let &BinaryExprType::IN = kind { @@ -848,6 +882,12 @@ impl<'a> FileBuilder<'a> { &BinaryExprType::GTEqual => self.do_gtequal(&def.pos, left, right), &BinaryExprType::LTEqual => self.do_ltequal(&def.pos, left, right), &BinaryExprType::NotEqual => self.do_not_deep_equal(&def.pos, left, right), + &BinaryExprType::REMatch => { + self.eval_re_match(left, def.left.pos(), right, def.right.pos(), false) + } + &BinaryExprType::NotREMatch => { + self.eval_re_match(left, def.left.pos(), right, def.right.pos(), true) + } &BinaryExprType::IN | &BinaryExprType::DOT => panic!("Unreachable"), } } diff --git a/src/lib.rs b/src/lib.rs index 493171e..7859c7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ #[macro_use] extern crate abortable_parser; extern crate base64; +extern crate regex; extern crate serde_json; extern crate serde_yaml; extern crate simple_error; diff --git a/src/parse/precedence.rs b/src/parse/precedence.rs index d28af85..dea52ce 100644 --- a/src/parse/precedence.rs +++ b/src/parse/precedence.rs @@ -168,6 +168,8 @@ make_fn!( either!( do_each!(_ => punct!("=="), (Element::Op(BinaryExprType::Equal))), do_each!(_ => punct!("!="), (Element::Op(BinaryExprType::NotEqual))), + do_each!(_ => punct!("~"), (Element::Op(BinaryExprType::REMatch))), + do_each!(_ => punct!("!~"), (Element::Op(BinaryExprType::NotREMatch))), do_each!(_ => punct!("<="), (Element::Op(BinaryExprType::LTEqual))), do_each!(_ => punct!(">="), (Element::Op(BinaryExprType::GTEqual))), do_each!(_ => punct!("<"), (Element::Op(BinaryExprType::LT))), @@ -192,6 +194,8 @@ fn parse_compare_operator(i: SliceIter) -> Result, B | &BinaryExprType::LT | &BinaryExprType::LTEqual | &BinaryExprType::NotEqual + | &BinaryExprType::REMatch + | &BinaryExprType::NotREMatch | &BinaryExprType::Equal | &BinaryExprType::IN => { return Result::Complete(i_.clone(), op.clone()); diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 65aa18c..293ecc0 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -144,27 +144,27 @@ make_fn!(booleantok, macro_rules! do_text_token_tok { ($i:expr, $type:expr, $text_token:expr, WS) => { do_each!($i, - span => input!(), - frag => text_token!($text_token), - _ => either!(whitespace, comment), - (Token { - typ: $type, - pos: Position::from(&span), - fragment: frag.to_string(), - }) - ) + span => input!(), + frag => text_token!($text_token), + _ => either!(whitespace, comment), + (Token { + typ: $type, + pos: Position::from(&span), + fragment: frag.to_string(), + }) + ) }; ($i:expr, $type:expr, $text_token:expr) => { do_each!($i, - span => input!(), - frag => text_token!($text_token), - (Token { - typ: $type, - pos: Position::from(&span), - fragment: frag.to_string(), - }) - ) + span => input!(), + frag => text_token!($text_token), + (Token { + typ: $type, + pos: Position::from(&span), + fragment: frag.to_string(), + }) + ) }; } @@ -224,6 +224,14 @@ make_fn!(notequaltok, do_text_token_tok!(TokenType::PUNCT, "!=") ); +make_fn!(matchtok, + do_text_token_tok!(TokenType::PUNCT, "~") +); + +make_fn!(notmatchtok, + do_text_token_tok!(TokenType::PUNCT, "!~") +); + make_fn!(gttok, do_text_token_tok!(TokenType::PUNCT, ">") ); @@ -387,6 +395,8 @@ fn token<'a>(input: OffsetStrIter<'a>) -> Result, Token> { pcttok, eqeqtok, notequaltok, + matchtok, + notmatchtok, complete!("Not >=".to_string(), gtequaltok), complete!("Not <=".to_string(), ltequaltok), gttok,