From e911e9901f29097ff7adb6e7ea328ddcc410f7ce Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Tue, 6 Aug 2024 18:17:59 +0200 Subject: [PATCH] don't error on phrase query schema error for old splits (#5289) * fix running some kind of queries on older splits --- .../quickwit-doc-mapper/src/query_builder.rs | 6 ++- .../tests/update_tests/doc_mapping_tests.rs | 49 +++++++++++++++++-- .../src/query_ast/full_text_query.rs | 6 +++ 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index e244aed79cd..ca74afa2462 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -244,7 +244,11 @@ impl<'a, 'b: 'a> QueryAstVisitor<'a> for ExtractPrefixTermRanges<'b> { &mut self, phrase_prefix: &'a PhrasePrefixQuery, ) -> Result<(), Self::Err> { - let (_, terms) = phrase_prefix.get_terms(self.schema, self.tokenizer_manager)?; + let terms = match phrase_prefix.get_terms(self.schema, self.tokenizer_manager) { + Ok((_, terms)) => terms, + Err(InvalidQuery::SchemaError(_)) => return Ok(()), /* the query will be nullified when casting to a tantivy ast */ + Err(e) => return Err(e), + }; if let Some((_, term)) = terms.last() { self.add_prefix_term(term.clone(), phrase_prefix.max_expansions, terms.len() > 1); } diff --git a/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs b/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs index 422ab6e850f..2df523d54c1 100644 --- a/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs +++ b/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs @@ -277,9 +277,7 @@ async fn test_update_doc_mapping_json_to_object() { .await; } -// TODO expected to be fix as part of #5084 #[tokio::test] -#[ignore] async fn test_update_doc_mapping_tokenizer_default_to_raw() { let index_id = "update-tokenizer-default-to-raw"; let original_doc_mappings = json!({ @@ -302,10 +300,11 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() { ingest_after_update, &[ ("body:hello", Ok(&[json!({"body": "hello-world"})])), - ("body:world", Ok(&[json!({"body": "bonjour-monde"})])), + ("body:world", Ok(&[json!({"body": "hello-world"})])), // phrases queries won't apply to older splits that didn't support them ("body:\"hello world\"", Ok(&[])), ("body:\"hello-world\"", Ok(&[])), + ("body:\"hello-worl\"*", Ok(&[])), ("body:bonjour", Ok(&[])), ("body:monde", Ok(&[])), // the raw tokenizer only returns exact matches @@ -314,14 +313,16 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() { "body:\"bonjour-monde\"", Ok(&[json!({"body": "bonjour-monde"})]), ), + ( + "body:\"bonjour-mond\"*", + Ok(&[json!({"body": "bonjour-monde"})]), + ), ], ) .await; } -// TODO expected to be fix as part of #5084 #[tokio::test] -#[ignore] async fn test_update_doc_mapping_tokenizer_add_position() { let index_id = "update-tokenizer-add-position"; let original_doc_mappings = json!({ @@ -348,6 +349,7 @@ async fn test_update_doc_mapping_tokenizer_add_position() { // phrases queries don't apply to older splits that didn't support them ("body:\"hello-world\"", Ok(&[])), ("body:\"hello world\"", Ok(&[])), + ("body:\"hello-worl\"*", Ok(&[])), ("body:bonjour", Ok(&[json!({"body": "bonjour-monde"})])), ("body:monde", Ok(&[json!({"body": "bonjour-monde"})])), ( @@ -358,6 +360,10 @@ async fn test_update_doc_mapping_tokenizer_add_position() { "body:\"bonjour monde\"", Ok(&[json!({"body": "bonjour-monde"})]), ), + ( + "body:\"bonjour-mond\"*", + Ok(&[json!({"body": "bonjour-monde"})]), + ), ], ) .await; @@ -408,6 +414,39 @@ async fn test_update_doc_mapping_tokenizer_raw_to_phrase() { .await; } +#[tokio::test] +async fn test_update_doc_mapping_unindexed_to_indexed() { + let index_id = "update-not-indexed-to-indexed"; + let original_doc_mappings = json!({ + "field_mappings": [ + {"name": "body", "type": "text", "indexed": false} + ] + }); + let ingest_before_update = &[json!({"body": "hello"})]; + let updated_doc_mappings = json!({ + "field_mappings": [ + {"name": "body", "type": "text", "tokenizer": "raw"} + ] + }); + let ingest_after_update = &[json!({"body": "bonjour"})]; + validate_search_across_doc_mapping_updates( + index_id, + original_doc_mappings, + ingest_before_update, + updated_doc_mappings, + ingest_after_update, + &[ + // term query won't apply to older splits that weren't indexed + ("body:hello", Ok(&[])), + ("body:IN [hello]", Ok(&[])), + // works on newer data + ("body:bonjour", Ok(&[json!({"body": "bonjour"})])), + ("body:IN [bonjour]", Ok(&[json!({"body": "bonjour"})])), + ], + ) + .await; +} + #[tokio::test] #[ignore] async fn test_update_doc_mapping_strict_to_dynamic() { diff --git a/quickwit/quickwit-query/src/query_ast/full_text_query.rs b/quickwit/quickwit-query/src/query_ast/full_text_query.rs index cf1142e5cf6..2e809cdd476 100644 --- a/quickwit/quickwit-query/src/query_ast/full_text_query.rs +++ b/quickwit/quickwit-query/src/query_ast/full_text_query.rs @@ -143,6 +143,12 @@ impl FullTextParams { Ok(TantivyBoolQuery::build_clause(operator, leaf_queries).into()) } FullTextMode::Phrase { slop } => { + if !index_record_option.has_positions() { + return Err(InvalidQuery::SchemaError( + "Applied phrase query on field which does not have positions indexed" + .to_string(), + )); + } let mut phrase_query = TantivyPhraseQuery::new_with_offset(terms); phrase_query.set_slop(slop); Ok(phrase_query.into())