Skip to content

Commit

Permalink
allow querying non existing fields (#5308)
Browse files Browse the repository at this point in the history
* make integration test idempotent

* ignore missing field on lenient query
  • Loading branch information
trinity-1686a authored Aug 30, 2024
1 parent 489c9d6 commit d684b92
Show file tree
Hide file tree
Showing 21 changed files with 123 additions and 38 deletions.
1 change: 1 addition & 0 deletions quickwit/quickwit-doc-mapper/src/doc_mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ mod tests {
user_text: "json_field.toto.titi:hello".to_string(),
default_fields: None,
default_operator: BooleanOperand::And,
lenient: false,
}
.parse_user_query(&[])
.unwrap();
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-doc-mapper/src/tag_pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ mod test {
user_text: user_query.to_string(),
default_fields: None,
default_operator: BooleanOperand::Or,
lenient: false,
}
.into();
let parsed_query_ast = query_ast.parse_user_query(&[]).unwrap();
Expand Down
3 changes: 2 additions & 1 deletion quickwit/quickwit-jaeger/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,8 @@ mod tests {
quickwit_query::query_ast::UserInputQuery {
user_text: "query".to_string(),
default_fields: None,
default_operator: quickwit_query::BooleanOperand::And
default_operator: quickwit_query::BooleanOperand::And,
lenient: false,
}
.into()
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@
"opstamp": 10,
"delete_query": {
"index_uid": "my-index:00000000000000000000000001",
"query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}"
"query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}},\"lenient\":false},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}},\"lenient\":false}]}"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@
"opstamp": 10,
"delete_query": {
"index_uid": "my-index:00000000000000000000000001",
"query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}}}]}"
"query_ast": "{\"type\":\"bool\",\"must\":[{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Harry\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}},\"lenient\":false},{\"type\":\"full_text\",\"field\":\"body\",\"text\":\"Potter\",\"params\":{\"mode\":{\"type\":\"phrase_fallback_to_intersection\"}},\"lenient\":false}]}"
}
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ impl ConvertibleToQueryAst for MatchBoolPrefixQuery {
field: self.field,
text: self.params.query,
params: full_text_params,
lenient: self.params.lenient,
}))
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ impl ConvertibleToQueryAst for MatchPhraseQuery {
field: self.field,
text: self.params.query,
params: full_text_params,
lenient: false,
}))
}
}
Expand Down Expand Up @@ -159,6 +160,7 @@ mod tests {
field,
text,
params,
lenient: _,
}) = ast
else {
panic!()
Expand Down
16 changes: 9 additions & 7 deletions quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ pub(crate) struct MatchQueryParams {
pub(crate) operator: BooleanOperand,
#[serde(default)]
pub(crate) zero_terms_query: MatchAllOrNone,
// Regardless of this option Quickwit behaves in elasticsearch definition of
// lenient. We include this property here just to accept user queries containing
// this option.
#[serde(default, rename = "lenient")]
pub(crate) _lenient: bool,
// Quickwit and Elastic have different notions of lenient. For us, it means it's okay to
// disregard part of the query where which uses non-existing collumn (which Elastic does by
// default). For Elastic, it covers type errors (searching text in an integer field).
#[serde(default)]
pub(crate) lenient: bool,
}

impl ConvertibleToQueryAst for MatchQuery {
Expand All @@ -60,6 +60,7 @@ impl ConvertibleToQueryAst for MatchQuery {
field: self.field,
text: self.params.query,
params: full_text_params,
lenient: self.params.lenient,
}))
}
}
Expand Down Expand Up @@ -88,7 +89,7 @@ impl From<String> for MatchQueryParams {
query,
zero_terms_query: Default::default(),
operator: Default::default(),
_lenient: false,
lenient: false,
}
}
}
Expand Down Expand Up @@ -137,14 +138,15 @@ mod tests {
query: "hello".to_string(),
operator: BooleanOperand::And,
zero_terms_query: crate::MatchAllOrNone::MatchAll,
_lenient: false,
lenient: false,
},
};
let ast = match_query.convert_to_query_ast().unwrap();
let QueryAst::FullText(FullTextQuery {
field,
text,
params,
lenient: _,
}) = ast
else {
panic!()
Expand Down
28 changes: 14 additions & 14 deletions quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use crate::elastic_query_dsl::phrase_prefix_query::{
};
use crate::elastic_query_dsl::{ConvertibleToQueryAst, ElasticQueryDslInner};

/// Multi match queries are a bit odd. They end up being expanded into another type query of query.
/// Multi match queries are a bit odd. They end up being expanded into another type of query.
/// In Quickwit, we operate this expansion in generic way at the time of deserialization.
#[derive(Deserialize, Debug, Eq, PartialEq, Clone)]
#[serde(try_from = "MultiMatchQueryForDeserialization")]
Expand All @@ -48,11 +48,11 @@ struct MultiMatchQueryForDeserialization {
#[serde_as(deserialize_as = "OneOrMany<_, PreferMany>")]
#[serde(default)]
fields: Vec<String>,
// Regardless of this option Quickwit behaves in elasticsearch definition of
// lenient. We include this property here just to accept user queries containing
// this option.
#[serde(default, rename = "lenient")]
_lenient: bool,
// Quickwit and Elastic have different notions of lenient. For us, it means it's okay to
// disregard part of the query where which uses non-existing collumn (which Elastic does by
// default). For Elastic, it covers type errors (searching text in an integer field).
#[serde(default)]
lenient: bool,
}

fn deserialize_match_query_for_one_field(
Expand Down Expand Up @@ -198,7 +198,7 @@ mod tests {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand All @@ -208,7 +208,7 @@ mod tests {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand All @@ -228,7 +228,7 @@ mod tests {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand All @@ -238,7 +238,7 @@ mod tests {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand All @@ -258,7 +258,7 @@ mod tests {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand All @@ -268,7 +268,7 @@ mod tests {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand Down Expand Up @@ -350,7 +350,7 @@ mod tests {
query: "quick brown".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand All @@ -360,7 +360,7 @@ mod tests {
query: "quick brown".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
lenient: false,
},
}
.into(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ pub(crate) struct QueryStringQuery {
// Regardless of this option Quickwit behaves in elasticsearch definition of
// lenient. We include this property here just to accept user queries containing
// this option.
#[serde(default, rename = "lenient")]
_lenient: bool,
#[serde(default)]
lenient: bool,
}

impl ConvertibleToQueryAst for QueryStringQuery {
Expand All @@ -60,6 +60,7 @@ impl ConvertibleToQueryAst for QueryStringQuery {
user_text: self.query,
default_fields,
default_operator: self.default_operator,
lenient: self.lenient,
};
Ok(user_text_query.into())
}
Expand All @@ -79,7 +80,7 @@ mod tests {
default_operator: crate::BooleanOperand::Or,
default_field: None,
boost: None,
_lenient: false,
lenient: false,
};
let QueryAst::UserInput(user_input_query) =
query_string_query.convert_to_query_ast().unwrap()
Expand All @@ -101,7 +102,7 @@ mod tests {
default_operator: crate::BooleanOperand::Or,
default_field: Some("hello".to_string()),
boost: None,
_lenient: false,
lenient: false,
};
let QueryAst::UserInput(user_input_query) =
query_string_query.convert_to_query_ast().unwrap()
Expand All @@ -123,7 +124,7 @@ mod tests {
default_operator: crate::BooleanOperand::Or,
default_field: Some("hello".to_string()),
boost: None,
_lenient: false,
lenient: false,
};
let err_msg = query_string_query
.convert_to_query_ast()
Expand All @@ -140,7 +141,7 @@ mod tests {
default_field: None,
default_operator: crate::BooleanOperand::And,
boost: None,
_lenient: false,
lenient: false,
};
let QueryAst::UserInput(user_input_query) =
query_string_query.convert_to_query_ast().unwrap()
Expand All @@ -158,7 +159,7 @@ mod tests {
default_field: None,
default_operator: crate::BooleanOperand::Or,
boost: None,
_lenient: false,
lenient: false,
};
let QueryAst::UserInput(user_input_query) =
query_string_query.convert_to_query_ast().unwrap()
Expand All @@ -177,7 +178,7 @@ mod tests {
default_field: None,
default_operator: crate::BooleanOperand::Or,
boost: None,
_lenient: false,
lenient: false,
};
let QueryAst::UserInput(user_input_query) =
query_string_query.convert_to_query_ast().unwrap()
Expand All @@ -200,7 +201,8 @@ mod tests {
assert!(matches!(query_ast, QueryAst::UserInput(UserInputQuery {
user_text,
default_fields,
default_operator
default_operator,
lenient: _,
}) if user_text == "hello world"
&& default_operator == BooleanOperand::Or
&& default_fields == Some(vec!["text".to_string()])));
Expand Down
6 changes: 6 additions & 0 deletions quickwit/quickwit-query/src/query_ast/full_text_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ pub struct FullTextQuery {
pub field: String,
pub text: String,
pub params: FullTextParams,
pub lenient: bool,
}

impl From<FullTextQuery> for QueryAst {
Expand All @@ -249,6 +250,7 @@ impl BuildTantivyAst for FullTextQuery {
&self.params,
schema,
tokenizer_manager,
self.lenient,
)
}
}
Expand Down Expand Up @@ -323,6 +325,7 @@ mod tests {
mode: BooleanOperand::And.into(),
zero_terms_query: crate::MatchAllOrNone::MatchAll,
},
lenient: false,
};
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("body", TEXT);
Expand All @@ -348,6 +351,7 @@ mod tests {
mode: FullTextMode::Phrase { slop: 1 },
zero_terms_query: crate::MatchAllOrNone::MatchAll,
},
lenient: false,
};
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("body", TEXT);
Expand Down Expand Up @@ -378,6 +382,7 @@ mod tests {
mode: FullTextMode::Phrase { slop: 1 },
zero_terms_query: crate::MatchAllOrNone::MatchAll,
},
lenient: false,
};
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("body", TEXT);
Expand Down Expand Up @@ -407,6 +412,7 @@ mod tests {
mode: BooleanOperand::And.into(),
zero_terms_query: crate::MatchAllOrNone::MatchAll,
},
lenient: false,
};
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("body", TEXT);
Expand Down
6 changes: 6 additions & 0 deletions quickwit/quickwit-query/src/query_ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ pub fn query_ast_from_user_text(user_text: &str, default_fields: Option<Vec<Stri
user_text: user_text.to_string(),
default_fields,
default_operator: BooleanOperand::And,
lenient: false,
}
.into()
}
Expand All @@ -334,6 +335,7 @@ mod tests {
user_text: "*".to_string(),
default_fields: Default::default(),
default_operator: Default::default(),
lenient: false,
}
.into();
let schema = tantivy::schema::Schema::builder().build();
Expand All @@ -357,6 +359,7 @@ mod tests {
user_text: "*".to_string(),
default_fields: Default::default(),
default_operator: Default::default(),
lenient: false,
}
.into();
let query_ast_with_parsed_user_query: QueryAst = query_ast.parse_user_query(&[]).unwrap();
Expand All @@ -378,6 +381,7 @@ mod tests {
user_text: "*".to_string(),
default_fields: Default::default(),
default_operator: Default::default(),
lenient: false,
}
.into();
let bool_query_ast: QueryAst = BoolQuery {
Expand Down Expand Up @@ -412,6 +416,7 @@ mod tests {
user_text: "field:hello field:toto".to_string(),
default_fields: None,
default_operator: crate::BooleanOperand::And,
lenient: false,
}
.parse_user_query(&[])
.unwrap();
Expand All @@ -427,6 +432,7 @@ mod tests {
user_text: "field:hello field:toto".to_string(),
default_fields: None,
default_operator: crate::BooleanOperand::Or,
lenient: false,
}
.parse_user_query(&[])
.unwrap();
Expand Down
Loading

0 comments on commit d684b92

Please sign in to comment.