Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4829 minimum should match #5488

Merged
merged 3 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/reference/es_compatible_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,7 @@ The following query types are supported.
| `should` | `JsonObject[]` (Optional) | Sub-queries that should match the documents. | [] |
| `filter` | `JsonObject[]` | Like must queries, but the match does not influence the `_score`. | [] |
| `boost` | `Number` | Multiplier boost for score computation. | 1.0 |
| `minimum_should_match` | `Number` or `Str` | If present, quickwit will only match documents for which at least `minimum_should_match` should clauses are matching. `2`, `-1`, `"10%"` and `"-10%"` are supported. | |

### `range`

Expand Down
194 changes: 189 additions & 5 deletions quickwit/quickwit-query/src/elastic_query_dsl/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,79 @@ pub struct BoolQuery {
filter: Vec<ElasticQueryDslInner>,
#[serde(default)]
pub boost: Option<NotNaNf32>,
#[serde(default)]
pub minimum_should_match: Option<MinimumShouldMatch>,
}

#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
#[serde(untagged)]
pub enum MinimumShouldMatch {
Str(String),
Int(isize),
}

impl MinimumShouldMatch {
fn resolve(&self, num_should_clauses: usize) -> anyhow::Result<MinimumShouldMatchResolved> {
match self {
MinimumShouldMatch::Str(minimum_should_match_dsl) => {
let Some(percentage) = parse_percentage(minimum_should_match_dsl) else {
anyhow::bail!(
"Unsupported minimum should match dsl {}. quickwit currently only \
supports the format '35%' and `-35%`",
minimum_should_match_dsl
);
};
let min_should_match = percentage * num_should_clauses as isize / 100;
MinimumShouldMatch::Int(min_should_match).resolve(num_should_clauses)
}
MinimumShouldMatch::Int(neg_num_missing_should_clauses)
if *neg_num_missing_should_clauses < 0 =>
{
let num_missing_should_clauses = -neg_num_missing_should_clauses as usize;
if num_missing_should_clauses >= num_should_clauses {
Ok(MinimumShouldMatchResolved::Unspecified)
} else {
Ok(MinimumShouldMatchResolved::Min(
num_should_clauses - num_missing_should_clauses,
))
}
}
MinimumShouldMatch::Int(num_required_should_clauses) => {
let num_required_should_clauses: usize = *num_required_should_clauses as usize;
if num_required_should_clauses > num_should_clauses {
Ok(MinimumShouldMatchResolved::NoMatch)
} else {
Ok(MinimumShouldMatchResolved::Min(num_required_should_clauses))
}
}
}
}
}

#[derive(Deserialize, Debug, Copy, Clone, Eq, PartialEq)]
enum MinimumShouldMatchResolved {
Unspecified,
Min(usize),
NoMatch,
}

fn parse_percentage(s: &str) -> Option<isize> {
let percentage_str = s.strip_suffix('%')?;
let percentage_isize = percentage_str.parse::<isize>().ok()?;
if percentage_isize.abs() > 100 {
return None;
}
Some(percentage_isize)
}

impl BoolQuery {
fn resolve_minimum_should_match(&self) -> anyhow::Result<MinimumShouldMatchResolved> {
let num_should_clauses = self.should.len();
let Some(minimum_should_match) = &self.minimum_should_match else {
return Ok(MinimumShouldMatchResolved::Unspecified);
};
minimum_should_match.resolve(num_should_clauses)
}
}

impl BoolQuery {
Expand All @@ -57,6 +130,7 @@ impl BoolQuery {
should: children,
filter: Vec::new(),
boost: None,
minimum_should_match: None,
}
}
}
Expand All @@ -70,11 +144,25 @@ fn convert_vec(query_dsls: Vec<ElasticQueryDslInner>) -> anyhow::Result<Vec<Quer

impl ConvertibleToQueryAst for BoolQuery {
fn convert_to_query_ast(self) -> anyhow::Result<QueryAst> {
let minimum_should_match_resolved = self.resolve_minimum_should_match()?;
let must = convert_vec(self.must)?;
let must_not = convert_vec(self.must_not)?;
let should = convert_vec(self.should)?;
let filter = convert_vec(self.filter)?;

let minimum_should_match_opt = match minimum_should_match_resolved {
MinimumShouldMatchResolved::Unspecified => None,
MinimumShouldMatchResolved::Min(minimum_should_match) => Some(minimum_should_match),
MinimumShouldMatchResolved::NoMatch => {
return Ok(QueryAst::MatchNone);
}
};
let bool_query_ast = query_ast::BoolQuery {
must: convert_vec(self.must)?,
must_not: convert_vec(self.must_not)?,
should: convert_vec(self.should)?,
filter: convert_vec(self.filter)?,
must,
must_not,
should,
filter,
minimum_should_match: minimum_should_match_opt,
};
Ok(bool_query_ast.into())
}
Expand All @@ -88,8 +176,13 @@ impl From<BoolQuery> for ElasticQueryDslInner {

#[cfg(test)]
mod tests {
use crate::elastic_query_dsl::bool_query::BoolQuery;
use super::parse_percentage;
use crate::elastic_query_dsl::bool_query::{
BoolQuery, MinimumShouldMatch, MinimumShouldMatchResolved,
};
use crate::elastic_query_dsl::term_query::term_query_from_field_value;
use crate::elastic_query_dsl::ConvertibleToQueryAst;
use crate::query_ast::QueryAst;

#[test]
fn test_dsl_bool_query_deserialize_simple() {
Expand All @@ -111,6 +204,7 @@ mod tests {
should: Vec::new(),
filter: Vec::new(),
boost: None,
minimum_should_match: None
}
);
}
Expand All @@ -130,6 +224,7 @@ mod tests {
should: Vec::new(),
filter: vec![term_query_from_field_value("product_id", "2").into(),],
boost: None,
minimum_should_match: None,
}
);
}
Expand All @@ -152,7 +247,96 @@ mod tests {
should: Vec::new(),
filter: Vec::new(),
boost: None,
minimum_should_match: None,
}
);
}

#[test]
fn test_dsl_bool_query_deserialize_minimum_should_match() {
let bool_query: super::BoolQuery = serde_json::from_str(
r#"{
"must": [
{ "term": {"product_id": {"value": "1" }} },
{ "term": {"product_id": {"value": "2" }} }
],
"minimum_should_match": -2
}"#,
)
.unwrap();
assert_eq!(
bool_query.minimum_should_match.as_ref().unwrap(),
&MinimumShouldMatch::Int(-2)
);
}

#[test]
fn test_dsl_query_with_minimum_should_match() {
let bool_query_json = r#"{
"should": [
{ "term": {"product_id": {"value": "1" }} },
{ "term": {"product_id": {"value": "2" }} },
{ "term": {"product_id": {"value": "3" }} }
],
"minimum_should_match": 2
}"#;
let bool_query: BoolQuery = serde_json::from_str(bool_query_json).unwrap();
assert_eq!(bool_query.should.len(), 3);
assert_eq!(
bool_query.minimum_should_match.as_ref().unwrap(),
&super::MinimumShouldMatch::Int(2)
);
let QueryAst::Bool(bool_query_ast) = bool_query.convert_to_query_ast().unwrap() else {
panic!();
};
assert_eq!(bool_query_ast.should.len(), 3);
assert_eq!(bool_query_ast.minimum_should_match, Some(2));
}

#[test]
fn test_parse_percentage() {
assert_eq!(parse_percentage("10%"), Some(10));
assert_eq!(parse_percentage("101%"), None);
assert_eq!(parse_percentage("0%"), Some(0));
assert_eq!(parse_percentage("100%"), Some(100));
assert_eq!(parse_percentage("-20%"), Some(-20));
assert_eq!(parse_percentage("20"), None);
assert_eq!(parse_percentage("20a%"), None);
}

#[test]
fn test_resolve_minimum_should_match() {
assert_eq!(
MinimumShouldMatch::Str("30%".to_string())
.resolve(10)
.unwrap(),
MinimumShouldMatchResolved::Min(3)
);
// not supported yet
assert_eq!(
MinimumShouldMatch::Str("-30%".to_string())
.resolve(10)
.unwrap(),
MinimumShouldMatchResolved::Min(7)
);
assert!(MinimumShouldMatch::Str("-30!".to_string())
.resolve(10)
.is_err());
assert_eq!(
MinimumShouldMatch::Int(10).resolve(11).unwrap(),
MinimumShouldMatchResolved::Min(10)
);
assert_eq!(
MinimumShouldMatch::Int(-10).resolve(11).unwrap(),
MinimumShouldMatchResolved::Min(1)
);
assert_eq!(
MinimumShouldMatch::Int(-12).resolve(11).unwrap(),
MinimumShouldMatchResolved::Unspecified
);
assert_eq!(
MinimumShouldMatch::Int(12).resolve(11).unwrap(),
MinimumShouldMatchResolved::NoMatch
);
}
}
7 changes: 6 additions & 1 deletion quickwit/quickwit-query/src/query_ast/bool_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ pub struct BoolQuery {
pub should: Vec<QueryAst>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub filter: Vec<QueryAst>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub minimum_should_match: Option<usize>,
}

impl From<BoolQuery> for QueryAst {
Expand All @@ -64,7 +66,10 @@ impl BuildTantivyAst for BoolQuery {
search_fields: &[String],
with_validation: bool,
) -> Result<TantivyQueryAst, InvalidQuery> {
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery::default();
let mut boolean_query = super::tantivy_query_ast::TantivyBoolQuery {
minimum_should_match: self.minimum_should_match,
..Default::default()
};
for must in &self.must {
let must_leaf = must.build_tantivy_ast_call(
schema,
Expand Down
2 changes: 2 additions & 0 deletions quickwit/quickwit-query/src/query_ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ impl QueryAst {
must_not,
should,
filter,
minimum_should_match,
}) => {
let must = parse_user_query_in_asts(must, default_search_fields)?;
let must_not = parse_user_query_in_asts(must_not, default_search_fields)?;
Expand All @@ -92,6 +93,7 @@ impl QueryAst {
must_not,
should,
filter,
minimum_should_match,
}
.into())
}
Expand Down
4 changes: 2 additions & 2 deletions quickwit/quickwit-query/src/query_ast/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,8 @@ mod tests {
type=I64, 1980)), upper_bound: Included(Term(field=6, type=Json, path=hello, \
type=I64, 1989)) } }), Leaf(FastFieldRangeQuery { bounds: BoundsRange { lower_bound: \
Included(Term(field=6, type=Json, path=hello, type=Str, \"1980\")), upper_bound: \
Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: [] \
})"
Included(Term(field=6, type=Json, path=hello, type=Str, \"1989\")) } })], filter: \
[], minimum_should_match: None })"
);
}

Expand Down
Loading
Loading