diff --git a/docs/reference/es_compatible_api.md b/docs/reference/es_compatible_api.md index 15006970f44..c9ffa4f463b 100644 --- a/docs/reference/es_compatible_api.md +++ b/docs/reference/es_compatible_api.md @@ -741,6 +741,60 @@ Query matching only documents containing a non-null value for a given field. | -------- | ------ | ------------------------------------------------------- | ------- | | `field` | String | Only documents with a value for field will be returned. | - | +### `prefix` + +[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-prefix-query.html) + +Returns documents that contain a specific prefix in a provided field. + +#### Example + +```json +{ + "query": { + "prefix": { + "author.login" { + "value": "adm", + } + } + } +} +``` + +#### Supported Parameters + +| Variable | Type | Description | Default | +| -------- | ------ | ----------------------------------------------- | ------- | +| `value` | String | Beginning characters of terms you wish to find. | - | + +### `wildcard` + +[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-wildcard-query.html) + +Returns documents that contain terms matching a wildcard pattern: +* `?` replaces one and only one term character +* `*` replaces any number of term characters or an empty string + +#### Example + +```json +{ + "query": { + "wildcard": { + "author.login" { + "value": "adm?n*", + } + } + } +} +``` + +#### Supported Parameters + +| Variable | Type | Description | Default | +| -------- | ------ | -------------------------------------------- | ------- | +| `value` | String | Wildcard pattern for terms you wish to find. | - | + ### About the `lenient` argument diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs index 96fac10c32c..871032951e2 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs @@ -22,16 +22,19 @@ mod match_query; mod multi_match; mod one_field_map; mod phrase_prefix_query; +mod prefix_query; mod query_string_query; mod range_query; mod regex_query; mod string_or_struct; mod term_query; mod terms_query; +mod wildcard_query; use bool_query::BoolQuery; pub use one_field_map::OneFieldMap; use phrase_prefix_query::MatchPhrasePrefixQuery; +use prefix_query::PrefixQuery; pub(crate) use query_string_query::QueryStringQuery; use range_query::RangeQuery; pub(crate) use string_or_struct::StringOrStructForSerialization; @@ -44,6 +47,7 @@ use crate::elastic_query_dsl::match_query::MatchQuery; use crate::elastic_query_dsl::multi_match::MultiMatchQuery; use crate::elastic_query_dsl::regex_query::RegexQuery; use crate::elastic_query_dsl::terms_query::TermsQuery; +use crate::elastic_query_dsl::wildcard_query::WildcardQuery; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; @@ -85,6 +89,8 @@ pub(crate) enum ElasticQueryDslInner { Range(RangeQuery), Exists(ExistsQuery), Regexp(RegexQuery), + Wildcard(WildcardQuery), + Prefix(PrefixQuery), } #[derive(Deserialize, Debug, Eq, PartialEq, Clone)] @@ -133,6 +139,8 @@ impl ConvertibleToQueryAst for ElasticQueryDslInner { Self::Exists(exists_query) => exists_query.convert_to_query_ast(), Self::MultiMatch(multi_match_query) => multi_match_query.convert_to_query_ast(), Self::Regexp(regex_query) => regex_query.convert_to_query_ast(), + Self::Wildcard(wildcard_query) => wildcard_query.convert_to_query_ast(), + Self::Prefix(prefix_query) => prefix_query.convert_to_query_ast(), } } } diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/prefix_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/prefix_query.rs new file mode 100644 index 00000000000..2baa9a499c3 --- /dev/null +++ b/quickwit/quickwit-query/src/elastic_query_dsl/prefix_query.rs @@ -0,0 +1,112 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::Deserialize; + +use crate::elastic_query_dsl::one_field_map::OneFieldMap; +use crate::elastic_query_dsl::{ConvertibleToQueryAst, StringOrStructForSerialization}; +use crate::query_ast::{QueryAst, WildcardQuery as AstWildcardQuery}; + +#[derive(Deserialize, Clone, Eq, PartialEq, Debug)] +#[serde(from = "OneFieldMap>")] +pub(crate) struct PrefixQuery { + pub(crate) field: String, + pub(crate) params: PrefixQueryParams, +} + +#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)] +#[serde(deny_unknown_fields)] +pub struct PrefixQueryParams { + value: String, +} + +impl ConvertibleToQueryAst for PrefixQuery { + fn convert_to_query_ast(self) -> anyhow::Result { + let wildcard = format!( + "{}*", + self.params + .value + .replace(r"\", r"\\") + .replace("*", r"\*") + .replace("?", r"\?") + ); + Ok(AstWildcardQuery { + field: self.field, + value: wildcard, + lenient: true, + } + .into()) + } +} + +impl From>> for PrefixQuery { + fn from( + match_query_params: OneFieldMap>, + ) -> Self { + let OneFieldMap { field, value } = match_query_params; + PrefixQuery { + field, + params: value.inner, + } + } +} + +impl From for PrefixQueryParams { + fn from(value: String) -> PrefixQueryParams { + PrefixQueryParams { value } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_prefix_query_convert_to_query_ast() { + let prefix_query_json = r#"{ + "user_name": { + "value": "john" + } + }"#; + let prefix_query: PrefixQuery = serde_json::from_str(prefix_query_json).unwrap(); + let query_ast = prefix_query.convert_to_query_ast().unwrap(); + + if let QueryAst::Wildcard(prefix) = query_ast { + assert_eq!(prefix.field, "user_name"); + assert_eq!(prefix.value, "john*"); + assert!(prefix.lenient); + } else { + panic!("Expected QueryAst::Prefix, got {:?}", query_ast); + } + } + + #[test] + fn test_prefix_query_convert_to_query_ast_special_chars() { + let prefix_query_json = r#"{ + "user_name": { + "value": "a\\dm?n*" + } + }"#; + let prefix_query: PrefixQuery = serde_json::from_str(prefix_query_json).unwrap(); + let query_ast = prefix_query.convert_to_query_ast().unwrap(); + + if let QueryAst::Wildcard(prefix) = query_ast { + assert_eq!(prefix.field, "user_name"); + assert_eq!(prefix.value, r"a\\dm\?n\**"); + assert!(prefix.lenient); + } else { + panic!("Expected QueryAst::Prefix, got {:?}", query_ast); + } + } +} diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/wildcard_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/wildcard_query.rs new file mode 100644 index 00000000000..973c4d31cf1 --- /dev/null +++ b/quickwit/quickwit-query/src/elastic_query_dsl/wildcard_query.rs @@ -0,0 +1,114 @@ +// Copyright 2021-Present Datadog, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::Deserialize; + +use crate::NotNaNf32; +use crate::elastic_query_dsl::one_field_map::OneFieldMap; +use crate::elastic_query_dsl::{ConvertibleToQueryAst, StringOrStructForSerialization}; +use crate::query_ast::{QueryAst, WildcardQuery as AstWildcardQuery}; + +#[derive(Deserialize, Clone, Eq, PartialEq, Debug)] +#[serde(from = "OneFieldMap>")] +pub(crate) struct WildcardQuery { + pub(crate) field: String, + pub(crate) params: WildcardQueryParams, +} + +#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)] +#[serde(deny_unknown_fields)] +pub struct WildcardQueryParams { + value: String, + #[serde(default)] + pub boost: Option, +} + +impl ConvertibleToQueryAst for WildcardQuery { + fn convert_to_query_ast(self) -> anyhow::Result { + let wildcard_ast: QueryAst = AstWildcardQuery { + field: self.field, + value: self.params.value, + lenient: true, + } + .into(); + Ok(wildcard_ast.boost(self.params.boost)) + } +} + +impl From>> for WildcardQuery { + fn from( + match_query_params: OneFieldMap>, + ) -> Self { + let OneFieldMap { field, value } = match_query_params; + WildcardQuery { + field, + params: value.inner, + } + } +} + +impl From for WildcardQueryParams { + fn from(value: String) -> WildcardQueryParams { + WildcardQueryParams { value, boost: None } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_wildcard_query_convert_to_query_ast() { + let wildcard_query_json = r#"{ + "user_name": { + "value": "john*" + } + }"#; + let wildcard_query: WildcardQuery = serde_json::from_str(wildcard_query_json).unwrap(); + let query_ast = wildcard_query.convert_to_query_ast().unwrap(); + + if let QueryAst::Wildcard(wildcard) = query_ast { + assert_eq!(wildcard.field, "user_name"); + assert_eq!(wildcard.value, "john*"); + assert!(wildcard.lenient); + } else { + panic!("Expected QueryAst::Wildcard"); + } + } + + #[test] + fn test_boosted_wildcard_query_convert_to_query_ast() { + let wildcard_query_json = r#"{ + "user_name": { + "value": "john*", + "boost": 2.0 + } + }"#; + let wildcard_query: WildcardQuery = serde_json::from_str(wildcard_query_json).unwrap(); + let query_ast = wildcard_query.convert_to_query_ast().unwrap(); + + if let QueryAst::Boost { underlying, boost } = query_ast { + if let QueryAst::Wildcard(wildcard) = *underlying { + assert_eq!(wildcard.field, "user_name"); + assert_eq!(wildcard.value, "john*"); + assert!(wildcard.lenient); + } else { + panic!("Expected underlying QueryAst::Wildcard"); + } + assert_eq!(boost, NotNaNf32::try_from(2.0).unwrap()); + } else { + panic!("Expected QueryAst::Wildcard"); + } + } +} diff --git a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs index 74e3acf0bc5..78253076b29 100644 --- a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs +++ b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs @@ -255,6 +255,48 @@ mod tests { } } + #[test] + fn test_wildcard_query_to_regex_on_escaped_text() { + let query = WildcardQuery { + field: "text_field".to_string(), + value: "MyString Wh1ch\\?a.nOrMal Tokenizer would\\*cut".to_string(), + lenient: false, + }; + + let tokenizer_manager = create_default_quickwit_tokenizer_manager(); + for tokenizer in ["raw", "whitespace"] { + let mut schema_builder = TantivySchema::builder(); + let text_options = TextOptions::default() + .set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer)); + schema_builder.add_text_field("text_field", text_options); + let schema = schema_builder.build(); + + let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap(); + assert_eq!(regex, "MyString Wh1ch\\?a\\.nOrMal Tokenizer would\\*cut"); + assert!(path.is_none()); + } + + for tokenizer in [ + "raw_lowercase", + "lowercase", + "default", + "en_stem", + "chinese_compatible", + "source_code_default", + "source_code_with_hex", + ] { + let mut schema_builder = TantivySchema::builder(); + let text_options = TextOptions::default() + .set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer)); + schema_builder.add_text_field("text_field", text_options); + let schema = schema_builder.build(); + + let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap(); + assert_eq!(regex, "mystring wh1ch\\?a\\.normal tokenizer would\\*cut"); + assert!(path.is_none()); + } + } + #[test] fn test_wildcard_query_to_regex_on_json() { let query = WildcardQuery { diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0029-wildcard.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0029-wildcard.yaml new file mode 100644 index 00000000000..4bf35c0e18c --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0029-wildcard.yaml @@ -0,0 +1,28 @@ +json: + query: + wildcard: + actor.login: + value: jad?nk +expected: + hits: + total: + value: 2 +--- +json: + query: + wildcard: + actor.login: + value: j*nk +expected: + hits: + total: + value: 2 +--- +json: + query: + wildcard: + actor.login: jad?nk +expected: + hits: + total: + value: 2 diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0030-prefix.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0030-prefix.yaml new file mode 100644 index 00000000000..d239be8f69c --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0030-prefix.yaml @@ -0,0 +1,28 @@ +json: + query: + prefix: + actor.login: + value: jado +expected: + hits: + total: + value: 2 +--- +json: + query: + prefix: + actor.login: + value: j +expected: + hits: + total: + value: 10 +--- +json: + query: + prefix: + actor.login: jado +expected: + hits: + total: + value: 2