Skip to content

Commit

Permalink
don't error on phrase query schema error for old splits (#5289)
Browse files Browse the repository at this point in the history
* fix running some kind of queries on older splits
  • Loading branch information
trinity-1686a authored Aug 6, 2024
1 parent 1fa3ca1 commit e911e99
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 6 deletions.
6 changes: 5 additions & 1 deletion quickwit/quickwit-doc-mapper/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,11 @@ impl<'a, 'b: 'a> QueryAstVisitor<'a> for ExtractPrefixTermRanges<'b> {
&mut self,
phrase_prefix: &'a PhrasePrefixQuery,
) -> Result<(), Self::Err> {
let (_, terms) = phrase_prefix.get_terms(self.schema, self.tokenizer_manager)?;
let terms = match phrase_prefix.get_terms(self.schema, self.tokenizer_manager) {
Ok((_, terms)) => terms,
Err(InvalidQuery::SchemaError(_)) => return Ok(()), /* the query will be nullified when casting to a tantivy ast */
Err(e) => return Err(e),
};
if let Some((_, term)) = terms.last() {
self.add_prefix_term(term.clone(), phrase_prefix.max_expansions, terms.len() > 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,7 @@ async fn test_update_doc_mapping_json_to_object() {
.await;
}

// TODO expected to be fix as part of #5084
#[tokio::test]
#[ignore]
async fn test_update_doc_mapping_tokenizer_default_to_raw() {
let index_id = "update-tokenizer-default-to-raw";
let original_doc_mappings = json!({
Expand All @@ -302,10 +300,11 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() {
ingest_after_update,
&[
("body:hello", Ok(&[json!({"body": "hello-world"})])),
("body:world", Ok(&[json!({"body": "bonjour-monde"})])),
("body:world", Ok(&[json!({"body": "hello-world"})])),
// phrases queries won't apply to older splits that didn't support them
("body:\"hello world\"", Ok(&[])),
("body:\"hello-world\"", Ok(&[])),
("body:\"hello-worl\"*", Ok(&[])),
("body:bonjour", Ok(&[])),
("body:monde", Ok(&[])),
// the raw tokenizer only returns exact matches
Expand All @@ -314,14 +313,16 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() {
"body:\"bonjour-monde\"",
Ok(&[json!({"body": "bonjour-monde"})]),
),
(
"body:\"bonjour-mond\"*",
Ok(&[json!({"body": "bonjour-monde"})]),
),
],
)
.await;
}

// TODO expected to be fix as part of #5084
#[tokio::test]
#[ignore]
async fn test_update_doc_mapping_tokenizer_add_position() {
let index_id = "update-tokenizer-add-position";
let original_doc_mappings = json!({
Expand All @@ -348,6 +349,7 @@ async fn test_update_doc_mapping_tokenizer_add_position() {
// phrases queries don't apply to older splits that didn't support them
("body:\"hello-world\"", Ok(&[])),
("body:\"hello world\"", Ok(&[])),
("body:\"hello-worl\"*", Ok(&[])),
("body:bonjour", Ok(&[json!({"body": "bonjour-monde"})])),
("body:monde", Ok(&[json!({"body": "bonjour-monde"})])),
(
Expand All @@ -358,6 +360,10 @@ async fn test_update_doc_mapping_tokenizer_add_position() {
"body:\"bonjour monde\"",
Ok(&[json!({"body": "bonjour-monde"})]),
),
(
"body:\"bonjour-mond\"*",
Ok(&[json!({"body": "bonjour-monde"})]),
),
],
)
.await;
Expand Down Expand Up @@ -408,6 +414,39 @@ async fn test_update_doc_mapping_tokenizer_raw_to_phrase() {
.await;
}

#[tokio::test]
async fn test_update_doc_mapping_unindexed_to_indexed() {
let index_id = "update-not-indexed-to-indexed";
let original_doc_mappings = json!({
"field_mappings": [
{"name": "body", "type": "text", "indexed": false}
]
});
let ingest_before_update = &[json!({"body": "hello"})];
let updated_doc_mappings = json!({
"field_mappings": [
{"name": "body", "type": "text", "tokenizer": "raw"}
]
});
let ingest_after_update = &[json!({"body": "bonjour"})];
validate_search_across_doc_mapping_updates(
index_id,
original_doc_mappings,
ingest_before_update,
updated_doc_mappings,
ingest_after_update,
&[
// term query won't apply to older splits that weren't indexed
("body:hello", Ok(&[])),
("body:IN [hello]", Ok(&[])),
// works on newer data
("body:bonjour", Ok(&[json!({"body": "bonjour"})])),
("body:IN [bonjour]", Ok(&[json!({"body": "bonjour"})])),
],
)
.await;
}

#[tokio::test]
#[ignore]
async fn test_update_doc_mapping_strict_to_dynamic() {
Expand Down
6 changes: 6 additions & 0 deletions quickwit/quickwit-query/src/query_ast/full_text_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ impl FullTextParams {
Ok(TantivyBoolQuery::build_clause(operator, leaf_queries).into())
}
FullTextMode::Phrase { slop } => {
if !index_record_option.has_positions() {
return Err(InvalidQuery::SchemaError(
"Applied phrase query on field which does not have positions indexed"
.to_string(),
));
}
let mut phrase_query = TantivyPhraseQuery::new_with_offset(terms);
phrase_query.set_slop(slop);
Ok(phrase_query.into())
Expand Down

0 comments on commit e911e99

Please sign in to comment.