diff --git a/docs/reference/es_compatible_api.md b/docs/reference/es_compatible_api.md index 2d5329c6af4..3ead0fa927c 100644 --- a/docs/reference/es_compatible_api.md +++ b/docs/reference/es_compatible_api.md @@ -434,7 +434,7 @@ The following query types are supported. | `should` | `JsonObject[]` (Optional) | Sub-queries that should match the documents. | [] | | `filter` | `JsonObject[]` | Like must queries, but the match does not influence the `_score`. | [] | | `boost` | `Number` | Multiplier boost for score computation. | 1.0 | -| `minimum_should_match` | `Number` or `Str` | If present, quickwit will only match documents for which at least `minimum_should_match` should clauses are matching. `2`, `-1`, `"10%"` are supported. | | +| `minimum_should_match` | `Number` or `Str` | If present, quickwit will only match documents for which at least `minimum_should_match` should clauses are matching. `2`, `-1`, `"10%"` and `"-10%"` are supported. | | ### `range` diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs index a546f4e7c85..3949a55ed7f 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs @@ -64,12 +64,12 @@ impl MinimumShouldMatch { let Some(percentage) = parse_percentage(minimum_should_match_dsl) else { anyhow::bail!( "Unsupported minimum should match dsl {}. quickwit currently only \ - supports the format '35%'", + supports the format '35%' and `-35%`", minimum_should_match_dsl ); }; - let min_should_match = num_should_clauses * percentage as usize / 100; - Ok(MinimumShouldMatchResolved::Min(min_should_match)) + let min_should_match = percentage * num_should_clauses as isize / 100; + MinimumShouldMatch::Int(min_should_match).resolve(num_should_clauses) } MinimumShouldMatch::Int(neg_num_missing_should_clauses) if *neg_num_missing_should_clauses < 0 => @@ -88,9 +88,7 @@ impl MinimumShouldMatch { if num_required_should_clauses > num_should_clauses { Ok(MinimumShouldMatchResolved::NoMatch) } else { - Ok(MinimumShouldMatchResolved::Min( - num_required_should_clauses as usize, - )) + Ok(MinimumShouldMatchResolved::Min(num_required_should_clauses)) } } } @@ -104,13 +102,13 @@ enum MinimumShouldMatchResolved { NoMatch, } -fn parse_percentage(s: &str) -> Option { - let percentage_u32_str = s.strip_suffix('%')?; - let percentage_u32: u32 = percentage_u32_str.parse::().ok()?; - if percentage_u32 > 100 { +fn parse_percentage(s: &str) -> Option { + let percentage_str = s.strip_suffix('%')?; + let percentage_isize = percentage_str.parse::().ok()?; + if percentage_isize.abs() > 100 { return None; } - Some(percentage_u32) + Some(percentage_isize) } impl BoolQuery { @@ -301,7 +299,7 @@ mod tests { assert_eq!(parse_percentage("101%"), None); assert_eq!(parse_percentage("0%"), Some(0)); assert_eq!(parse_percentage("100%"), Some(100)); - assert_eq!(parse_percentage("-20%"), None); + assert_eq!(parse_percentage("-20%"), Some(-20)); assert_eq!(parse_percentage("20"), None); assert_eq!(parse_percentage("20a%"), None); } @@ -315,9 +313,12 @@ mod tests { MinimumShouldMatchResolved::Min(3) ); // not supported yet - assert!(MinimumShouldMatch::Str("-30%".to_string()) - .resolve(10) - .is_err()); + assert_eq!( + MinimumShouldMatch::Str("-30%".to_string()) + .resolve(10) + .unwrap(), + MinimumShouldMatchResolved::Min(7) + ); assert!(MinimumShouldMatch::Str("-30!".to_string()) .resolve(10) .is_err()); diff --git a/quickwit/quickwit-query/src/query_ast/bool_query.rs b/quickwit/quickwit-query/src/query_ast/bool_query.rs index 8a333bd850c..872b1782970 100644 --- a/quickwit/quickwit-query/src/query_ast/bool_query.rs +++ b/quickwit/quickwit-query/src/query_ast/bool_query.rs @@ -66,8 +66,10 @@ impl BuildTantivyAst for BoolQuery { search_fields: &[String], with_validation: bool, ) -> Result { - let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery::default(); - boolean_query.minimum_should_match = self.minimum_should_match; + let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery { + minimum_should_match: self.minimum_should_match, + ..Default::default() + }; for must in &self.must { let must_leaf = must.build_tantivy_ast_call( schema, diff --git a/quickwit/quickwit-query/src/query_ast/range_query.rs b/quickwit/quickwit-query/src/query_ast/range_query.rs index f8445d4bd8c..8fe9a8fa3b8 100644 --- a/quickwit/quickwit-query/src/query_ast/range_query.rs +++ b/quickwit/quickwit-query/src/query_ast/range_query.rs @@ -420,8 +420,8 @@ mod tests { type=I64, 1980)), upper_bound: Included(Term(field=6, type=Json, path=hello, \ type=I64, 1989)) } }), Leaf(FastFieldRangeQuery { bounds: BoundsRange { lower_bound: \ Included(Term(field=6, type=Json, path=hello, type=Str, \"1980\")), upper_bound: \ - Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: [] \ - })" + Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: [], \ + minimum_should_match: None })" ); } diff --git a/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs b/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs index dafbe1be456..9295324cd50 100644 --- a/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs +++ b/quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs @@ -245,8 +245,8 @@ impl TantivyBoolQuery { } self.filter = new_filter; - let mut new_should = Vec::with_capacity(self.should.len()); if self.minimum_should_match.is_none() { + let mut new_should = Vec::with_capacity(self.should.len()); for should in self.should { let mut should_bool = match should { TantivyQueryAst::Bool(bool_query) => bool_query, @@ -311,7 +311,11 @@ impl TantivyBoolQuery { } let has_positive_children = !(self.must.is_empty() && self.should.is_empty() && self.filter.is_empty()); + if !has_positive_children { + if self.minimum_should_match.unwrap_or(0) > 0 { + return MatchAllOrNone::MatchNone.into(); + } if self .must_not .iter() @@ -373,7 +377,7 @@ impl From for Box { } else { BooleanQuery::from(clause) }; - Box::new(tantivy::query::BooleanQuery::from(tantivy_bool_query)) + Box::new(tantivy_bool_query) } } @@ -886,11 +890,22 @@ mod tests { { return None; } - let should_score: u32 = self - .should - .iter() - .filter_map(|should| should.evaluate_test()) - .sum(); + + let mut should_score = 0u32; + let mut matching_should_count = 0; + for should in &self.should { + if let Some(score) = should.evaluate_test() { + should_score += score; + matching_should_count += 1; + } + } + + if let Some(minimum_should_match) = self.minimum_should_match { + if minimum_should_match > matching_should_count { + return None; + } + } + if self.must.len() + self.filter.len() > 0 { if self .must @@ -937,7 +952,7 @@ mod tests { let filter = proptest::collection::vec(element.clone(), 0..4); let should = proptest::collection::vec(element.clone(), 0..4); let must_not = proptest::collection::vec(element.clone(), 0..4); - let minimum_should_match = (0usize..2).prop_map(|n: usize| n.checked_sub(1)); + let minimum_should_match = (0usize..=2).prop_map(|n: usize| n.checked_sub(1)); (must, filter, should, must_not, minimum_should_match).prop_map( |(must, filter, should, must_not, minimum_should_match)| { TantivyQueryAst::Bool(TantivyBoolQuery { @@ -952,14 +967,28 @@ mod tests { }) } + #[track_caller] + fn test_aux_simplify_never_change_result(ast: TantivyQueryAst) { + let simplified_ast = ast.clone().simplify(); + assert_eq!(dbg!(simplified_ast).evaluate_test(), ast.evaluate_test()); + } + proptest::proptest! { #![proptest_config(ProptestConfig { - cases: 10000, .. ProptestConfig::default() + cases: 100000, .. ProptestConfig::default() })] #[test] fn test_proptest_simplify_never_change_result(ast in ast_strategy()) { - let simplified_ast = ast.clone().simplify(); - assert_eq!(dbg!(simplified_ast).evaluate_test(), ast.evaluate_test()); + test_aux_simplify_never_change_result(ast); } } + + #[test] + fn test_simplify_never_change_result_simple_corner_case() { + let ast = TantivyQueryAst::Bool(TantivyBoolQuery { + minimum_should_match: Some(1), + ..Default::default() + }); + test_aux_simplify_never_change_result(ast); + } } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml index a306e4e7d06..fad920d1b38 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/_setup.quickwit.yaml @@ -43,13 +43,6 @@ json: type: text fast: true indexed: true - # - name: actor - # type: object - # field_mappings: - # - name: id - # type: u64 - # fast: true - # indexed: true - name: public type: bool fast: false