Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions docs/reference/es_compatible_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,60 @@ Query matching only documents containing a non-null value for a given field.
| -------- | ------ | ------------------------------------------------------- | ------- |
| `field` | String | Only documents with a value for field will be returned. | - |

### `prefix`

[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-prefix-query.html)

Returns documents that contain a specific prefix in a provided field.

#### Example

```json
{
"query": {
"prefix": {
"author.login" {
"value": "adm",
}
}
}
}
```

#### Supported Parameters

| Variable | Type | Description | Default |
| -------- | ------ | ----------------------------------------------- | ------- |
| `value` | String | Beginning characters of terms you wish to find. | - |

### `wildcard`

[Elasticsearch reference documentation](https://www.elastic.co/guide/en/elasticsearch/reference/8.8/query-dsl-wildcard-query.html)

Returns documents that contain terms matching a wildcard pattern:
* `?` replaces one and only one term character
* `*` replaces any number of term characters or an empty string

#### Example

```json
{
"query": {
"wildcard": {
"author.login" {
"value": "adm?n*",
}
}
}
}
```

#### Supported Parameters

| Variable | Type | Description | Default |
| -------- | ------ | -------------------------------------------- | ------- |
| `value` | String | Wildcard pattern for terms you wish to find. | - |


### About the `lenient` argument

Expand Down
8 changes: 8 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,19 @@ mod match_query;
mod multi_match;
mod one_field_map;
mod phrase_prefix_query;
mod prefix_query;
mod query_string_query;
mod range_query;
mod regex_query;
mod string_or_struct;
mod term_query;
mod terms_query;
mod wildcard_query;

use bool_query::BoolQuery;
pub use one_field_map::OneFieldMap;
use phrase_prefix_query::MatchPhrasePrefixQuery;
use prefix_query::PrefixQuery;
pub(crate) use query_string_query::QueryStringQuery;
use range_query::RangeQuery;
pub(crate) use string_or_struct::StringOrStructForSerialization;
Expand All @@ -44,6 +47,7 @@ use crate::elastic_query_dsl::match_query::MatchQuery;
use crate::elastic_query_dsl::multi_match::MultiMatchQuery;
use crate::elastic_query_dsl::regex_query::RegexQuery;
use crate::elastic_query_dsl::terms_query::TermsQuery;
use crate::elastic_query_dsl::wildcard_query::WildcardQuery;
use crate::not_nan_f32::NotNaNf32;
use crate::query_ast::QueryAst;

Expand Down Expand Up @@ -85,6 +89,8 @@ pub(crate) enum ElasticQueryDslInner {
Range(RangeQuery),
Exists(ExistsQuery),
Regexp(RegexQuery),
Wildcard(WildcardQuery),
Prefix(PrefixQuery),
}

#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
Expand Down Expand Up @@ -133,6 +139,8 @@ impl ConvertibleToQueryAst for ElasticQueryDslInner {
Self::Exists(exists_query) => exists_query.convert_to_query_ast(),
Self::MultiMatch(multi_match_query) => multi_match_query.convert_to_query_ast(),
Self::Regexp(regex_query) => regex_query.convert_to_query_ast(),
Self::Wildcard(wildcard_query) => wildcard_query.convert_to_query_ast(),
Self::Prefix(prefix_query) => prefix_query.convert_to_query_ast(),
}
}
}
Expand Down
112 changes: 112 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/prefix_query.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2021-Present Datadog, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use serde::Deserialize;

use crate::elastic_query_dsl::one_field_map::OneFieldMap;
use crate::elastic_query_dsl::{ConvertibleToQueryAst, StringOrStructForSerialization};
use crate::query_ast::{QueryAst, WildcardQuery as AstWildcardQuery};

#[derive(Deserialize, Clone, Eq, PartialEq, Debug)]
#[serde(from = "OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>")]
pub(crate) struct PrefixQuery {
pub(crate) field: String,
pub(crate) params: PrefixQueryParams,
}

#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)]
#[serde(deny_unknown_fields)]
pub struct PrefixQueryParams {
value: String,
}

impl ConvertibleToQueryAst for PrefixQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let wildcard = format!(
"{}*",
self.params
.value
.replace("\\", "\\\\")
.replace("*", "\\*")
.replace("?", "\\?")
);
Ok(AstWildcardQuery {
field: self.field,
value: wildcard,
lenient: true,
}
.into())
}
}

impl From<OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>> for PrefixQuery {
fn from(
match_query_params: OneFieldMap<StringOrStructForSerialization<PrefixQueryParams>>,
) -> Self {
let OneFieldMap { field, value } = match_query_params;
PrefixQuery {
field,
params: value.inner,
}
}
}

impl From<String> for PrefixQueryParams {
fn from(value: String) -> PrefixQueryParams {
PrefixQueryParams { value }
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_prefix_query_convert_to_query_ast() {
let prefix_query_json = r#"{
"user_name": {
"value": "john"
}
}"#;
let prefix_query: PrefixQuery = serde_json::from_str(prefix_query_json).unwrap();
let query_ast = prefix_query.convert_to_query_ast().unwrap();

if let QueryAst::Wildcard(prefix) = query_ast {
assert_eq!(prefix.field, "user_name");
assert_eq!(prefix.value, "john*");
assert!(prefix.lenient);
} else {
panic!("Expected QueryAst::Prefix, got {:?}", query_ast);
}
}

#[test]
fn test_prefix_query_convert_to_query_ast_special_chars() {
let prefix_query_json = r#"{
"user_name": {
"value": "a\\dm?n*"
}
}"#;
let prefix_query: PrefixQuery = serde_json::from_str(prefix_query_json).unwrap();
let query_ast = prefix_query.convert_to_query_ast().unwrap();

if let QueryAst::Wildcard(prefix) = query_ast {
assert_eq!(prefix.field, "user_name");
assert_eq!(prefix.value, "a\\\\dm\\?n\\**");
assert!(prefix.lenient);
} else {
panic!("Expected QueryAst::Prefix, got {:?}", query_ast);
}
}
}
85 changes: 85 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/wildcard_query.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2021-Present Datadog, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use serde::Deserialize;

use crate::elastic_query_dsl::one_field_map::OneFieldMap;
use crate::elastic_query_dsl::{ConvertibleToQueryAst, StringOrStructForSerialization};
use crate::query_ast::{QueryAst, WildcardQuery as AstWildcardQuery};

#[derive(Deserialize, Clone, Eq, PartialEq, Debug)]
#[serde(from = "OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>")]
pub(crate) struct WildcardQuery {
pub(crate) field: String,
pub(crate) params: WildcardQueryParams,
}

#[derive(Deserialize, Debug, Default, Eq, PartialEq, Clone)]
#[serde(deny_unknown_fields)]
pub struct WildcardQueryParams {
value: String,
}

impl ConvertibleToQueryAst for WildcardQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
Ok(AstWildcardQuery {
field: self.field,
value: self.params.value,
lenient: true,
}
.into())
}
}

impl From<OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>> for WildcardQuery {
fn from(
match_query_params: OneFieldMap<StringOrStructForSerialization<WildcardQueryParams>>,
) -> Self {
let OneFieldMap { field, value } = match_query_params;
WildcardQuery {
field,
params: value.inner,
}
}
}

impl From<String> for WildcardQueryParams {
fn from(value: String) -> WildcardQueryParams {
WildcardQueryParams { value }
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_wildcard_query_convert_to_query_ast() {
let wildcard_query_json = r#"{
"user_name": {
"value": "john*"
}
}"#;
let wildcard_query: WildcardQuery = serde_json::from_str(wildcard_query_json).unwrap();
let query_ast = wildcard_query.convert_to_query_ast().unwrap();

if let QueryAst::Wildcard(wildcard) = query_ast {
assert_eq!(wildcard.field, "user_name");
assert_eq!(wildcard.value, "john*");
assert!(wildcard.lenient);
} else {
panic!("Expected QueryAst::Wildcard");
}
}
}
42 changes: 42 additions & 0 deletions quickwit/quickwit-query/src/query_ast/wildcard_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,48 @@ mod tests {
}
}

#[test]
fn test_wildcard_query_to_regex_on_escaped_text() {
let query = WildcardQuery {
field: "text_field".to_string(),
value: "MyString Wh1ch\\?a.nOrMal Tokenizer would\\*cut".to_string(),
lenient: false,
};

let tokenizer_manager = create_default_quickwit_tokenizer_manager();
for tokenizer in ["raw", "whitespace"] {
let mut schema_builder = TantivySchema::builder();
let text_options = TextOptions::default()
.set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer));
schema_builder.add_text_field("text_field", text_options);
let schema = schema_builder.build();

let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
assert_eq!(regex, "MyString Wh1ch\\?a\\.nOrMal Tokenizer would\\*cut");
assert!(path.is_none());
}

for tokenizer in [
"raw_lowercase",
"lowercase",
"default",
"en_stem",
"chinese_compatible",
"source_code_default",
"source_code_with_hex",
] {
let mut schema_builder = TantivySchema::builder();
let text_options = TextOptions::default()
.set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer));
schema_builder.add_text_field("text_field", text_options);
let schema = schema_builder.build();

let (_field, path, regex) = query.to_regex(&schema, &tokenizer_manager).unwrap();
assert_eq!(regex, "mystring wh1ch\\?a\\.normal tokenizer would\\*cut");
assert!(path.is_none());
}
}

#[test]
fn test_wildcard_query_to_regex_on_json() {
let query = WildcardQuery {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
json:
query:
wildcard:
actor.login:
value: jad?nk
expected:
hits:
total:
value: 2
---
json:
query:
wildcard:
actor.login:
value: j*nk
expected:
hits:
total:
value: 2
---
json:
query:
wildcard:
actor.login: jad?nk
expected:
hits:
total:
value: 2
Loading