Skip to content

Commit

Permalink
Added support for minimum_should_match in quickwit es API
Browse files Browse the repository at this point in the history
Closes #4828
  • Loading branch information
fulmicoton committed Oct 11, 2024
1 parent 1579235 commit d1f0dce
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 29 deletions.
29 changes: 29 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pub struct BoolQuery {
filter: Vec<ElasticQueryDslInner>,
#[serde(default)]
pub boost: Option<NotNaNf32>,
#[serde(default)]
pub minimum_should_match: Option<usize>,
}

impl BoolQuery {
Expand All @@ -57,6 +59,7 @@ impl BoolQuery {
should: children,
filter: Vec::new(),
boost: None,
minimum_should_match: None,
}
}
}
Expand All @@ -75,6 +78,7 @@ impl ConvertibleToQueryAst for BoolQuery {
must_not: convert_vec(self.must_not)?,
should: convert_vec(self.should)?,
filter: convert_vec(self.filter)?,
minimum_should_match: self.minimum_should_match,
};
Ok(bool_query_ast.into())
}
Expand All @@ -90,6 +94,8 @@ impl From<BoolQuery> for ElasticQueryDslInner {
mod tests {
use crate::elastic_query_dsl::bool_query::BoolQuery;
use crate::elastic_query_dsl::term_query::term_query_from_field_value;
use crate::elastic_query_dsl::ConvertibleToQueryAst;
use crate::query_ast::QueryAst;

#[test]
fn test_dsl_bool_query_deserialize_simple() {
Expand All @@ -111,6 +117,7 @@ mod tests {
should: Vec::new(),
filter: Vec::new(),
boost: None,
minimum_should_match: None
}
);
}
Expand All @@ -130,6 +137,7 @@ mod tests {
should: Vec::new(),
filter: vec![term_query_from_field_value("product_id", "2").into(),],
boost: None,
minimum_should_match: None,
}
);
}
Expand All @@ -152,7 +160,28 @@ mod tests {
should: Vec::new(),
filter: Vec::new(),
boost: None,
minimum_should_match: None,
}
);
}

#[test]
fn test_dsl_query_with_minimum_should_match() {
let bool_query_json = r#"{
"should": [
{ "term": {"product_id": {"value": "1" }} },
{ "term": {"product_id": {"value": "2" }} },
{ "term": {"product_id": {"value": "3" }} }
],
"minimum_should_match": 2
}"#;
let bool_query: BoolQuery = serde_json::from_str(bool_query_json).unwrap();
assert_eq!(bool_query.should.len(), 3);
assert_eq!(bool_query.minimum_should_match, Some(2));
let QueryAst::Bool(bool_query_ast) = bool_query.convert_to_query_ast().unwrap() else {
panic!();
};
assert_eq!(bool_query_ast.should.len(), 3);
assert_eq!(bool_query_ast.minimum_should_match, Some(2));
}
}
3 changes: 3 additions & 0 deletions quickwit/quickwit-query/src/query_ast/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ pub struct BoolQuery {
pub should: Vec<QueryAst>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub filter: Vec<QueryAst>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub minimum_should_match: Option<usize>,
}

impl From<BoolQuery> for QueryAst {
Expand All @@ -65,6 +67,7 @@ impl BuildTantivyAst for BoolQuery {
with_validation: bool,
) -> Result<TantivyQueryAst, InvalidQuery> {
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery::default();
boolean_query.minimum_should_match = self.minimum_should_match;
for must in &self.must {
let must_leaf = must.build_tantivy_ast_call(
schema,
Expand Down
2 changes: 2 additions & 0 deletions quickwit/quickwit-query/src/query_ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ impl QueryAst {
must_not,
should,
filter,
minimum_should_match,
}) => {
let must = parse_user_query_in_asts(must, default_search_fields)?;
let must_not = parse_user_query_in_asts(must_not, default_search_fields)?;
Expand All @@ -92,6 +93,7 @@ impl QueryAst {
must_not,
should,
filter,
minimum_should_match,
}
.into())
}
Expand Down
75 changes: 46 additions & 29 deletions quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use tantivy::query::{
AllQuery as TantivyAllQuery, ConstScoreQuery as TantivyConstScoreQuery,
AllQuery as TantivyAllQuery, BooleanQuery, ConstScoreQuery as TantivyConstScoreQuery,
EmptyQuery as TantivyEmptyQuery,
};
use tantivy::query_grammar::Occur;
Expand Down Expand Up @@ -160,6 +160,7 @@ pub(crate) struct TantivyBoolQuery {
pub must_not: Vec<TantivyQueryAst>,
pub should: Vec<TantivyQueryAst>,
pub filter: Vec<TantivyQueryAst>,
pub minimum_should_match: Option<usize>,
}

fn simplify_asts(asts: Vec<TantivyQueryAst>) -> Vec<TantivyQueryAst> {
Expand All @@ -186,6 +187,7 @@ impl TantivyBoolQuery {
self.should = simplify_asts(self.should);
self.must_not = simplify_asts(self.must_not);
self.filter = simplify_asts(self.filter);

for must_children in [&mut self.must, &mut self.filter] {
for child in must_children {
if child.const_predicate() == Some(MatchAllOrNone::MatchNone) {
Expand All @@ -197,6 +199,7 @@ impl TantivyBoolQuery {
&& self.must.is_empty()
&& self.filter.is_empty()
&& self.must_not.is_empty()
&& self.minimum_should_match.unwrap_or(0) == 0
{
// This is just a convention mimicking Elastic/Commonsearch's behavior.
return TantivyQueryAst::match_all();
Expand All @@ -211,7 +214,7 @@ impl TantivyBoolQuery {
continue;
}
};
if must_bool.should.is_empty() {
if must_bool.should.is_empty() && must_bool.minimum_should_match.is_none() {
new_must.append(&mut must_bool.must);
self.filter.append(&mut must_bool.filter);
self.must_not.append(&mut must_bool.must_not);
Expand All @@ -230,10 +233,10 @@ impl TantivyBoolQuery {
continue;
}
};
if filter_bool.should.is_empty() {
if filter_bool.should.is_empty() && filter_bool.minimum_should_match.is_none() {
new_filter.append(&mut filter_bool.must);
new_filter.append(&mut filter_bool.filter);
// must_not doeen't contribute to score, no need to move it to some filter_not kind
// must_not doesn't contribute to score, no need to move it to some filter_not kind
// of thing
self.must_not.append(&mut filter_bool.must_not);
} else {
Expand All @@ -243,24 +246,27 @@ impl TantivyBoolQuery {
self.filter = new_filter;

let mut new_should = Vec::with_capacity(self.should.len());
for should in self.should {
let mut should_bool = match should {
TantivyQueryAst::Bool(bool_query) => bool_query,
_ => {
new_should.push(should);
continue;
if self.minimum_should_match.is_none() {
for should in self.should {
let mut should_bool = match should {
TantivyQueryAst::Bool(bool_query) => bool_query,
_ => {
new_should.push(should);
continue;
}
};
if should_bool.must.is_empty()
&& should_bool.filter.is_empty()
&& should_bool.must_not.is_empty()
&& should_bool.minimum_should_match.is_none()
{
new_should.append(&mut should_bool.should);
} else {
new_should.push(TantivyQueryAst::Bool(should_bool));
}
};
if should_bool.must.is_empty()
&& should_bool.filter.is_empty()
&& should_bool.must_not.is_empty()
{
new_should.append(&mut should_bool.should);
} else {
new_should.push(TantivyQueryAst::Bool(should_bool));
}
self.should = new_should;
}
self.should = new_should;

// TODO we could turn must_not(must_not(abc, def)) into should(filter(abc), filter(def)),
// we can't simply have should(abc, def) because of scoring, and should(filter(abc, def))
Expand Down Expand Up @@ -317,14 +323,15 @@ impl TantivyBoolQuery {
} else {
let num_children =
self.must.len() + self.should.len() + self.must_not.len() + self.filter.len();
if num_children == 1 {
if num_children == 1 && self.minimum_should_match.is_none() {
if let Some(ast) = self.must.pop().or(self.should.pop()) {
return ast;
}
// We do not optimize a single filter clause for the moment.
// We do need a mechanism to make sure we keep the boost of 0.
}
}

TantivyQueryAst::Bool(self)
}
}
Expand Down Expand Up @@ -360,7 +367,13 @@ impl From<TantivyBoolQuery> for Box<dyn TantivyQuery> {
Box::new(TantivyConstScoreQuery::new(filter_query, 0.0f32)),
));
}
Box::new(tantivy::query::BooleanQuery::from(clause))
let tantivy_bool_query = if let Some(minimum_should_match) = bool_query.minimum_should_match
{
BooleanQuery::with_minimum_required_clauses(clause, minimum_should_match)
} else {
BooleanQuery::from(clause)
};
Box::new(tantivy::query::BooleanQuery::from(tantivy_bool_query))
}
}

Expand Down Expand Up @@ -881,14 +894,18 @@ mod tests {
let filter = proptest::collection::vec(element.clone(), 0..4);
let should = proptest::collection::vec(element.clone(), 0..4);
let must_not = proptest::collection::vec(element.clone(), 0..4);
(must, filter, should, must_not).prop_map(|(must, filter, should, must_not)| {
TantivyQueryAst::Bool(TantivyBoolQuery {
must,
filter,
should,
must_not,
})
})
let minimum_should_match = (0usize..2).prop_map(|n: usize| n.checked_sub(1));
(must, filter, should, must_not, minimum_should_match).prop_map(
|(must, filter, should, must_not, minimum_should_match)| {
TantivyQueryAst::Bool(TantivyBoolQuery {
must,
filter,
should,
must_not,
minimum_should_match,
})
},
)
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ fn build_request_for_es_api(
must_not: Vec::new(),
should: Vec::new(),
filter: queries,
minimum_should_match: None,
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,20 @@ expected:
total:
value: 100
---
json:
query:
bool:
should:
- {"query_string": {"query": "type:PushEvent"}}
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 1
expected:
hits:
total:
value: 69
---
json:
query:
bool:
Expand All @@ -135,3 +149,32 @@ expected:
hits:
total:
value: 3
---
json:
query:
bool:
should:
- {"query_string": {"query": "type:PushEvent"}}
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 3
expected:
hits:
total:
value: 0
---
json:
query:
bool:
must:
- {"query_string": {"query": "type:PushEvent"}}
should:
- {"query_string": {"query": "actor.login:jadonk"}}
- {"query_string": {"query": "actor.login:teozfrank"}}
- {"query_string": {"query": "type:IssueCommentEvent"}}
minimum_should_match: 1
expected:
hits:
total:
value: 2

0 comments on commit d1f0dce

Please sign in to comment.