Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion aw-query/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,28 @@ impl TryFrom<&DataType> for Rule {
))
}
};
let regex_rule = match RegexRule::new(regex_str, *ignore_case) {
let select_keys = match obj.get("select_keys") {
Some(DataType::List(keys)) => {
let mut select_keys = Vec::with_capacity(keys.len());
for key in keys {
match key {
DataType::String(key) => select_keys.push(key.clone()),
_ => return Err(QueryError::InvalidFunctionParameters(
"the select_keys field of the regex rule must contain only strings"
.to_string(),
)),
}
}
Some(select_keys)
}
Some(_) => {
return Err(QueryError::InvalidFunctionParameters(
"the select_keys field of the regex rule is not a list".to_string(),
))
}
None => None,
};
let regex_rule = match RegexRule::new(regex_str, *ignore_case, select_keys) {
Ok(regex_rule) => regex_rule,
Err(err) => {
return Err(QueryError::RegexCompileError(format!(
Expand Down
55 changes: 55 additions & 0 deletions aw-query/tests/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,38 @@ mod query_tests {
assert_eq!(tags.len(), 2);
}

#[test]
fn test_tag_select_keys() {
let ds = setup_datastore_with_bucket();
let interval = TimeInterval::new_from_string(TIME_INTERVAL).unwrap();

let event = Event {
id: None,
timestamp: chrono::Utc::now(),
duration: Duration::seconds(0),
data: json_map! {
"app": json!("terminal"),
"title": json!("just a test"),
"pid": json!(123)
},
};
ds.insert_events(BUCKET_ID, &[event]).unwrap();

let code = format!(
r#"
events = query_bucket("{}");
events = tag(events, [["title-match", {{ "type": "regex", "regex": "test$", "select_keys": ["title"] }}], ["app-match", {{ "type": "regex", "regex": "test$", "select_keys": ["app"] }}], ["pid-match", {{ "type": "regex", "regex": "123", "select_keys": ["pid"] }}], ["missing-match", {{ "type": "regex", "regex": "test", "select_keys": ["missing"] }}]]);
return events;"#,
BUCKET_ID
);
let result: DataType = aw_query::query(&code, &interval, &ds).unwrap();
let events: Vec<Event> = Vec::try_from(&result).unwrap();

let event = events.first().unwrap();
let tags = event.data.get("$tags").unwrap();
assert_eq!(tags, &serde_json::json!(vec!["title-match"]));
}

#[test]
fn test_rule_parsing() {
let ds = setup_datastore_populated();
Expand Down Expand Up @@ -479,6 +511,13 @@ mod query_tests {
return events;"#;
aw_query::query(code, &interval, &ds).unwrap();

// Test regex rule with valid select_keys field
let code = r#"
events = [];
events = tag(events, [["testtag", { "type": "regex", "regex": "test", "select_keys": ["key"] }]]);
return events;"#;
aw_query::query(code, &interval, &ds).unwrap();

// Test regex rule where ignore_case field is of invalid type
let code = r#"
events = [];
Expand All @@ -487,6 +526,22 @@ mod query_tests {
let res = aw_query::query(code, &interval, &ds);
assert_err_type!(res, QueryError::InvalidFunctionParameters(_));

// Test regex rule where select_keys field is of invalid type
let code = r#"
events = [];
events = tag(events, [["testtag", { "type": "regex", "regex": "test", "select_keys": "key" }]]);
return events;"#;
let res = aw_query::query(code, &interval, &ds);
assert_err_type!(res, QueryError::InvalidFunctionParameters(_));

// Test regex rule where select_keys contains non-string values
let code = r#"
events = [];
events = tag(events, [["testtag", { "type": "regex", "regex": "test", "select_keys": ["key", false] }]]);
return events;"#;
let res = aw_query::query(code, &interval, &ds);
assert_err_type!(res, QueryError::InvalidFunctionParameters(_));

// Test regex rule where uncompilable regex is supplied
let code = r#"
events = [];
Expand Down
79 changes: 70 additions & 9 deletions aw-transform/src/classify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,15 @@ trait RuleTrait {

pub struct RegexRule {
regex: Regex,
select_keys: Option<Vec<String>>,
}

impl RegexRule {
pub fn new(regex_str: &str, ignore_case: bool) -> Result<RegexRule, fancy_regex::Error> {
pub fn new(
regex_str: &str,
ignore_case: bool,
select_keys: Option<Vec<String>>,
) -> Result<RegexRule, fancy_regex::Error> {
// can't use `RegexBuilder::case_insensitive` because it's not supported by fancy_regex,
// so we need to prefix with `(?i)` to make it case insensitive.
let regex = if ignore_case {
Expand All @@ -37,7 +42,26 @@ impl RegexRule {
Regex::new(regex_str)?
};

Ok(RegexRule { regex })
// Validate that select_keys is not an empty list, which would silently never match.
if let Some(ref keys) = select_keys {
if keys.is_empty() {
return Err(fancy_regex::Error::ParseError(
0,
fancy_regex::ParseError::GeneralParseError(
"select_keys must not be empty".to_string(),
),
));
}
}

Ok(RegexRule { regex, select_keys })
}

fn value_matches(&self, value: &serde_json::Value) -> bool {
match value.as_str() {
Some(value) => self.regex.is_match(value).unwrap_or(false),
None => false,
}
}
Comment thread
TimeToBuildBob marked this conversation as resolved.
}

Expand All @@ -48,17 +72,22 @@ impl RegexRule {
/// compatibility (or have to maintain "old" query2 functions).
impl RuleTrait for RegexRule {
fn matches(&self, event: &Event) -> bool {
event
.data
.values()
.filter(|val| val.is_string())
.any(|val| self.regex.is_match(val.as_str().unwrap()).unwrap())
match &self.select_keys {
Some(select_keys) => select_keys
.iter()
.filter_map(|key| event.data.get(key))
.any(|val| self.value_matches(val)),
None => event.data.values().any(|val| self.value_matches(val)),
}
}
Comment thread
TimeToBuildBob marked this conversation as resolved.
}

impl From<Regex> for Rule {
fn from(re: Regex) -> Self {
Rule::Regex(RegexRule { regex: re })
Rule::Regex(RegexRule {
regex: re,
select_keys: None,
})
}
}

Expand Down Expand Up @@ -135,7 +164,7 @@ fn test_rule() {
.insert("nonono".into(), serde_json::json!("no match!"));

let rule_from_regex = Rule::from(Regex::new("test").unwrap());
let rule_from_new = Rule::Regex(RegexRule::new("test", false).unwrap());
let rule_from_new = Rule::Regex(RegexRule::new("test", false, None).unwrap());
let rule_none = Rule::None;
assert!(rule_from_regex.matches(&e_match));
assert!(rule_from_new.matches(&e_match));
Expand All @@ -157,6 +186,38 @@ fn test_rule_lookahead() {
assert!(!rule_from_regex.matches(&e_match));
}

#[test]
fn test_rule_select_keys() {
let mut event = Event::default();
event
.data
.insert("app".into(), serde_json::json!("terminal"));
event
.data
.insert("title".into(), serde_json::json!("just a test"));
event.data.insert("pid".into(), serde_json::json!(123));

let title_only =
Rule::Regex(RegexRule::new("test", false, Some(vec!["title".into()])).unwrap());
let app_only = Rule::Regex(RegexRule::new("test", false, Some(vec!["app".into()])).unwrap());
let missing_key =
Rule::Regex(RegexRule::new("test", false, Some(vec!["missing".into()])).unwrap());
let non_string_key =
Rule::Regex(RegexRule::new("123", false, Some(vec!["pid".into()])).unwrap());

assert!(title_only.matches(&event));
assert!(!app_only.matches(&event));
assert!(!missing_key.matches(&event));
assert!(!non_string_key.matches(&event));
}

#[test]
fn test_rule_select_keys_empty_list() {
// An empty select_keys list should return an error rather than
// silently producing a rule that never matches anything.
let result = RegexRule::new("test", false, Some(vec![]));
assert!(result.is_err());
}
#[test]
fn test_categorize() {
let mut e = Event::default();
Expand Down
Loading