Skip to content

Commit

Permalink
CR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Oct 17, 2024
1 parent ba35e9c commit a835f4f
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 38 deletions.
2 changes: 1 addition & 1 deletion docs/reference/es_compatible_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ The following query types are supported.
| `should` | `JsonObject[]` (Optional) | Sub-queries that should match the documents. | [] |
| `filter` | `JsonObject[]` | Like must queries, but the match does not influence the `_score`. | [] |
| `boost` | `Number` | Multiplier boost for score computation. | 1.0 |
| `minimum_should_match` | `Number` or `Str` | If present, quickwit will only match documents for which at least `minimum_should_match` should clauses are matching. `2`, `-1`, `"10%"` are supported. | |
| `minimum_should_match` | `Number` or `Str` | If present, quickwit will only match documents for which at least `minimum_should_match` should clauses are matching. `2`, `-1`, `"10%"` and `"-10%"` are supported. | |

### `range`

Expand Down
31 changes: 16 additions & 15 deletions quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ impl MinimumShouldMatch {
let Some(percentage) = parse_percentage(minimum_should_match_dsl) else {
anyhow::bail!(
"Unsupported minimum should match dsl {}. quickwit currently only \
supports the format '35%'",
supports the format '35%' and `-35%`",
minimum_should_match_dsl
);
};
let min_should_match = num_should_clauses * percentage as usize / 100;
Ok(MinimumShouldMatchResolved::Min(min_should_match))
let min_should_match = percentage * num_should_clauses as isize / 100;
MinimumShouldMatch::Int(min_should_match).resolve(num_should_clauses)
}
MinimumShouldMatch::Int(neg_num_missing_should_clauses)
if *neg_num_missing_should_clauses < 0 =>
Expand All @@ -88,9 +88,7 @@ impl MinimumShouldMatch {
if num_required_should_clauses > num_should_clauses {
Ok(MinimumShouldMatchResolved::NoMatch)
} else {
Ok(MinimumShouldMatchResolved::Min(
num_required_should_clauses as usize,
))
Ok(MinimumShouldMatchResolved::Min(num_required_should_clauses))
}
}
}
Expand All @@ -104,13 +102,13 @@ enum MinimumShouldMatchResolved {
NoMatch,
}

fn parse_percentage(s: &str) -> Option<u32> {
let percentage_u32_str = s.strip_suffix('%')?;
let percentage_u32: u32 = percentage_u32_str.parse::<u32>().ok()?;
if percentage_u32 > 100 {
fn parse_percentage(s: &str) -> Option<isize> {
let percentage_str = s.strip_suffix('%')?;
let percentage_isize = percentage_str.parse::<isize>().ok()?;
if percentage_isize.abs() > 100 {
return None;
}
Some(percentage_u32)
Some(percentage_isize)
}

impl BoolQuery {
Expand Down Expand Up @@ -301,7 +299,7 @@ mod tests {
assert_eq!(parse_percentage("101%"), None);
assert_eq!(parse_percentage("0%"), Some(0));
assert_eq!(parse_percentage("100%"), Some(100));
assert_eq!(parse_percentage("-20%"), None);
assert_eq!(parse_percentage("-20%"), Some(-20));
assert_eq!(parse_percentage("20"), None);
assert_eq!(parse_percentage("20a%"), None);
}
Expand All @@ -315,9 +313,12 @@ mod tests {
MinimumShouldMatchResolved::Min(3)
);
// not supported yet
assert!(MinimumShouldMatch::Str("-30%".to_string())
.resolve(10)
.is_err());
assert_eq!(
MinimumShouldMatch::Str("-30%".to_string())
.resolve(10)
.unwrap(),
MinimumShouldMatchResolved::Min(7)
);
assert!(MinimumShouldMatch::Str("-30!".to_string())
.resolve(10)
.is_err());
Expand Down
6 changes: 4 additions & 2 deletions quickwit/quickwit-query/src/query_ast/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,10 @@ impl BuildTantivyAst for BoolQuery {
search_fields: &[String],
with_validation: bool,
) -> Result<TantivyQueryAst, InvalidQuery> {
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery::default();
boolean_query.minimum_should_match = self.minimum_should_match;
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery {
minimum_should_match: self.minimum_should_match,
..Default::default()
};
for must in &self.must {
let must_leaf = must.build_tantivy_ast_call(
schema,
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-query/src/query_ast/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,8 @@ mod tests {
type=I64, 1980)), upper_bound: Included(Term(field=6, type=Json, path=hello, \
type=I64, 1989)) } }), Leaf(FastFieldRangeQuery { bounds: BoundsRange { lower_bound: \
Included(Term(field=6, type=Json, path=hello, type=Str, \"1980\")), upper_bound: \
Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: [] \
})"
Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: [], \
minimum_should_match: None })"
);
}

Expand Down
51 changes: 40 additions & 11 deletions quickwit/quickwit-query/src/query_ast/tantivy_query_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ impl TantivyBoolQuery {
}
self.filter = new_filter;

let mut new_should = Vec::with_capacity(self.should.len());
if self.minimum_should_match.is_none() {
let mut new_should = Vec::with_capacity(self.should.len());
for should in self.should {
let mut should_bool = match should {
TantivyQueryAst::Bool(bool_query) => bool_query,
Expand Down Expand Up @@ -311,7 +311,11 @@ impl TantivyBoolQuery {
}
let has_positive_children =
!(self.must.is_empty() && self.should.is_empty() && self.filter.is_empty());

if !has_positive_children {
if self.minimum_should_match.unwrap_or(0) > 0 {
return MatchAllOrNone::MatchNone.into();
}
if self
.must_not
.iter()
Expand Down Expand Up @@ -373,7 +377,7 @@ impl From<TantivyBoolQuery> for Box<dyn TantivyQuery> {
} else {
BooleanQuery::from(clause)
};
Box::new(tantivy::query::BooleanQuery::from(tantivy_bool_query))
Box::new(tantivy_bool_query)
}
}

Expand Down Expand Up @@ -886,11 +890,22 @@ mod tests {
{
return None;
}
let should_score: u32 = self
.should
.iter()
.filter_map(|should| should.evaluate_test())
.sum();

let mut should_score = 0u32;
let mut matching_should_count = 0;
for should in &self.should {
if let Some(score) = should.evaluate_test() {
should_score += score;
matching_should_count += 1;
}
}

if let Some(minimum_should_match) = self.minimum_should_match {
if minimum_should_match > matching_should_count {
return None;
}
}

if self.must.len() + self.filter.len() > 0 {
if self
.must
Expand Down Expand Up @@ -937,7 +952,7 @@ mod tests {
let filter = proptest::collection::vec(element.clone(), 0..4);
let should = proptest::collection::vec(element.clone(), 0..4);
let must_not = proptest::collection::vec(element.clone(), 0..4);
let minimum_should_match = (0usize..2).prop_map(|n: usize| n.checked_sub(1));
let minimum_should_match = (0usize..=2).prop_map(|n: usize| n.checked_sub(1));
(must, filter, should, must_not, minimum_should_match).prop_map(
|(must, filter, should, must_not, minimum_should_match)| {
TantivyQueryAst::Bool(TantivyBoolQuery {
Expand All @@ -952,14 +967,28 @@ mod tests {
})
}

#[track_caller]
fn test_aux_simplify_never_change_result(ast: TantivyQueryAst) {
let simplified_ast = ast.clone().simplify();
assert_eq!(dbg!(simplified_ast).evaluate_test(), ast.evaluate_test());
}

proptest::proptest! {
#![proptest_config(ProptestConfig {
cases: 10000, .. ProptestConfig::default()
cases: 100000, .. ProptestConfig::default()
})]
#[test]
fn test_proptest_simplify_never_change_result(ast in ast_strategy()) {
let simplified_ast = ast.clone().simplify();
assert_eq!(dbg!(simplified_ast).evaluate_test(), ast.evaluate_test());
test_aux_simplify_never_change_result(ast);
}
}

#[test]
fn test_simplify_never_change_result_simple_corner_case() {
let ast = TantivyQueryAst::Bool(TantivyBoolQuery {
minimum_should_match: Some(1),
..Default::default()
});
test_aux_simplify_never_change_result(ast);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,6 @@ json:
type: text
fast: true
indexed: true
# - name: actor
# type: object
# field_mappings:
# - name: id
# type: u64
# fast: true
# indexed: true
- name: public
type: bool
fast: false
Expand Down

0 comments on commit a835f4f

Please sign in to comment.