Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 65 additions & 9 deletions quickwit/quickwit-query/src/elastic_query_dsl/regex_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,43 @@ use crate::elastic_query_dsl::ConvertibleToQueryAst;
use crate::elastic_query_dsl::one_field_map::OneFieldMap;
use crate::query_ast::{QueryAst, RegexQuery as AstRegexQuery};

#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)]
#[serde(deny_unknown_fields)]
pub struct RegexQueryParams {
value: String,
#[serde(default)]
case_insensitive: bool,
/// Elasticsearch supports two formats for regexp queries:
/// - Shorthand: `{"regexp": {"field": "pattern"}}`
/// - Full: `{"regexp": {"field": {"value": "pattern", "case_insensitive": true}}}`
#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
#[serde(untagged)]
pub enum RegexQueryParams {
Full {
#[serde(rename = "value")]
pattern: String,
#[serde(default)]
case_insensitive: bool,
},
Shorthand(String),
}

impl RegexQueryParams {
fn into_tuple(self) -> (String, bool) {
match self {
RegexQueryParams::Full {
pattern,
case_insensitive,
} => (pattern, case_insensitive),
RegexQueryParams::Shorthand(pattern) => (pattern, false),
}
}
}

pub type RegexQuery = OneFieldMap<RegexQueryParams>;

impl ConvertibleToQueryAst for RegexQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let regex = if self.value.case_insensitive {
format!("(?i){}", self.value.value)
let (pattern, case_insensitive) = self.value.into_tuple();

let regex = if case_insensitive {
format!("(?i){pattern}")
} else {
self.value.value.clone()
pattern
};
Ok(AstRegexQuery {
field: self.field,
Expand All @@ -42,3 +63,38 @@ impl ConvertibleToQueryAst for RegexQuery {
.into())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_regex_query_shorthand_format() {
let json = serde_json::json!({"service": ".*logs.*"});
let query: RegexQuery = serde_json::from_value(json).unwrap();
assert_eq!(query.field, "service");
let (pattern, case_insensitive) = query.value.into_tuple();
assert_eq!(pattern, ".*logs.*");
assert!(!case_insensitive);
}

#[test]
fn test_regex_query_full_format() {
let json = serde_json::json!({"service": {"value": ".*logs.*", "case_insensitive": true}});
let query: RegexQuery = serde_json::from_value(json).unwrap();
assert_eq!(query.field, "service");
let (pattern, case_insensitive) = query.value.into_tuple();
assert_eq!(pattern, ".*logs.*");
assert!(case_insensitive);
}

#[test]
fn test_regex_query_full_format_default_case() {
let json = serde_json::json!({"service": {"value": ".*logs.*"}});
let query: RegexQuery = serde_json::from_value(json).unwrap();
assert_eq!(query.field, "service");
let (pattern, case_insensitive) = query.value.into_tuple();
assert_eq!(pattern, ".*logs.*");
assert!(!case_insensitive);
}
}
22 changes: 14 additions & 8 deletions quickwit/quickwit-serve/src/elasticsearch_api/model/mappings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ fn build_properties(field_mappings: &[FieldMappingEntry]) -> HashMap<String, Fie

fn field_mapping_from_entry(entry: &FieldMappingEntry) -> Option<FieldMapping> {
match &entry.mapping_type {
FieldMappingType::Text(..) => Some(FieldMapping::Leaf { typ: "text" }),
// Quickwit text fields behave like ES keyword fields: they support exact
// match, prefix, and regexp queries. Reporting them as "keyword" enables
// downstream connectors (e.g. Trino ES connector) to push down filters and
// LIKE predicates, which they only do for keyword-typed fields.
FieldMappingType::Text(..) => Some(FieldMapping::Leaf { typ: "keyword" }),
FieldMappingType::I64(..) => Some(FieldMapping::Leaf { typ: "long" }),
FieldMappingType::U64(..) => Some(FieldMapping::Leaf { typ: "long" }),
FieldMappingType::F64(..) => Some(FieldMapping::Leaf { typ: "double" }),
Expand All @@ -115,7 +119,7 @@ fn field_mapping_from_entry(entry: &FieldMappingEntry) -> Option<FieldMapping> {
properties,
})
}
FieldMappingType::Concatenate(_) => None,
FieldMappingType::Concatenate(_) => Some(FieldMapping::Leaf { typ: "keyword" }),
}
}

Expand Down Expand Up @@ -178,7 +182,7 @@ mod tests {
let entry: FieldMappingEntry = serde_json::from_value(entry_json).unwrap();
let mapping = field_mapping_from_entry(&entry).unwrap();
let serialized = serde_json::to_value(&mapping).unwrap();
assert_eq!(serialized, json!({ "type": "text" }));
assert_eq!(serialized, json!({ "type": "keyword" }));
}

#[test]
Expand Down Expand Up @@ -209,21 +213,23 @@ mod tests {
"type": "object",
"properties": {
"id": { "type": "long" },
"label": { "type": "text" }
"label": { "type": "keyword" }
}
})
);
}

#[test]
fn test_field_mapping_from_entry_concatenate_skipped() {
fn test_field_mapping_from_entry_concatenate_exposed_as_keyword() {
let entry_json = json!({
"name": "concat_field",
"type": "concatenate",
"concatenate_fields": ["field_a", "field_b"]
});
let entry: FieldMappingEntry = serde_json::from_value(entry_json).unwrap();
assert!(field_mapping_from_entry(&entry).is_none());
let mapping = field_mapping_from_entry(&entry).unwrap();
let serialized = serde_json::to_value(&mapping).unwrap();
assert_eq!(serialized, json!({ "type": "keyword" }));
}

#[test]
Expand Down Expand Up @@ -251,7 +257,7 @@ mod tests {
let props = build_properties(&entries);
let to_json = |fm: &FieldMapping| serde_json::to_value(fm).unwrap();

assert_eq!(to_json(&props["title"]), json!({ "type": "text" }));
assert_eq!(to_json(&props["title"]), json!({ "type": "keyword" }));
assert_eq!(to_json(&props["count"]), json!({ "type": "long" }));
assert_eq!(to_json(&props["unsigned"]), json!({ "type": "long" }));
assert_eq!(to_json(&props["score"]), json!({ "type": "double" }));
Expand All @@ -263,7 +269,7 @@ mod tests {

let meta = to_json(&props["metadata"]);
assert_eq!(meta["type"], "object");
assert_eq!(meta["properties"]["source"]["type"], "text");
assert_eq!(meta["properties"]["source"]["type"], "keyword");
}

#[test]
Expand Down
Loading