From e911e9901f29097ff7adb6e7ea328ddcc410f7ce Mon Sep 17 00:00:00 2001
From: trinity-1686a <trinity@quickwit.io>
Date: Tue, 6 Aug 2024 18:17:59 +0200
Subject: [PATCH] don't error on phrase query schema error for old splits
 (#5289)

* fix running some kind of queries on older splits
---
 .../quickwit-doc-mapper/src/query_builder.rs  |  6 ++-
 .../tests/update_tests/doc_mapping_tests.rs   | 49 +++++++++++++++++--
 .../src/query_ast/full_text_query.rs          |  6 +++
 3 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs
index e244aed79cd..ca74afa2462 100644
--- a/quickwit/quickwit-doc-mapper/src/query_builder.rs
+++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs
@@ -244,7 +244,11 @@ impl<'a, 'b: 'a> QueryAstVisitor<'a> for ExtractPrefixTermRanges<'b> {
         &mut self,
         phrase_prefix: &'a PhrasePrefixQuery,
     ) -> Result<(), Self::Err> {
-        let (_, terms) = phrase_prefix.get_terms(self.schema, self.tokenizer_manager)?;
+        let terms = match phrase_prefix.get_terms(self.schema, self.tokenizer_manager) {
+            Ok((_, terms)) => terms,
+            Err(InvalidQuery::SchemaError(_)) => return Ok(()), /* the query will be nullified when casting to a tantivy ast */
+            Err(e) => return Err(e),
+        };
         if let Some((_, term)) = terms.last() {
             self.add_prefix_term(term.clone(), phrase_prefix.max_expansions, terms.len() > 1);
         }
diff --git a/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs b/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs
index 422ab6e850f..2df523d54c1 100644
--- a/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs
+++ b/quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs
@@ -277,9 +277,7 @@ async fn test_update_doc_mapping_json_to_object() {
     .await;
 }
 
-// TODO expected to be fix as part of #5084
 #[tokio::test]
-#[ignore]
 async fn test_update_doc_mapping_tokenizer_default_to_raw() {
     let index_id = "update-tokenizer-default-to-raw";
     let original_doc_mappings = json!({
@@ -302,10 +300,11 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() {
         ingest_after_update,
         &[
             ("body:hello", Ok(&[json!({"body": "hello-world"})])),
-            ("body:world", Ok(&[json!({"body": "bonjour-monde"})])),
+            ("body:world", Ok(&[json!({"body": "hello-world"})])),
             // phrases queries won't apply to older splits that didn't support them
             ("body:\"hello world\"", Ok(&[])),
             ("body:\"hello-world\"", Ok(&[])),
+            ("body:\"hello-worl\"*", Ok(&[])),
             ("body:bonjour", Ok(&[])),
             ("body:monde", Ok(&[])),
             // the raw tokenizer only returns exact matches
@@ -314,14 +313,16 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() {
                 "body:\"bonjour-monde\"",
                 Ok(&[json!({"body": "bonjour-monde"})]),
             ),
+            (
+                "body:\"bonjour-mond\"*",
+                Ok(&[json!({"body": "bonjour-monde"})]),
+            ),
         ],
     )
     .await;
 }
 
-// TODO expected to be fix as part of #5084
 #[tokio::test]
-#[ignore]
 async fn test_update_doc_mapping_tokenizer_add_position() {
     let index_id = "update-tokenizer-add-position";
     let original_doc_mappings = json!({
@@ -348,6 +349,7 @@ async fn test_update_doc_mapping_tokenizer_add_position() {
             // phrases queries don't apply to older splits that didn't support them
             ("body:\"hello-world\"", Ok(&[])),
             ("body:\"hello world\"", Ok(&[])),
+            ("body:\"hello-worl\"*", Ok(&[])),
             ("body:bonjour", Ok(&[json!({"body": "bonjour-monde"})])),
             ("body:monde", Ok(&[json!({"body": "bonjour-monde"})])),
             (
@@ -358,6 +360,10 @@ async fn test_update_doc_mapping_tokenizer_add_position() {
                 "body:\"bonjour monde\"",
                 Ok(&[json!({"body": "bonjour-monde"})]),
             ),
+            (
+                "body:\"bonjour-mond\"*",
+                Ok(&[json!({"body": "bonjour-monde"})]),
+            ),
         ],
     )
     .await;
@@ -408,6 +414,39 @@ async fn test_update_doc_mapping_tokenizer_raw_to_phrase() {
     .await;
 }
 
+#[tokio::test]
+async fn test_update_doc_mapping_unindexed_to_indexed() {
+    let index_id = "update-not-indexed-to-indexed";
+    let original_doc_mappings = json!({
+        "field_mappings": [
+            {"name": "body", "type": "text", "indexed": false}
+        ]
+    });
+    let ingest_before_update = &[json!({"body": "hello"})];
+    let updated_doc_mappings = json!({
+        "field_mappings": [
+            {"name": "body", "type": "text", "tokenizer": "raw"}
+        ]
+    });
+    let ingest_after_update = &[json!({"body": "bonjour"})];
+    validate_search_across_doc_mapping_updates(
+        index_id,
+        original_doc_mappings,
+        ingest_before_update,
+        updated_doc_mappings,
+        ingest_after_update,
+        &[
+            // term query won't apply to older splits that weren't indexed
+            ("body:hello", Ok(&[])),
+            ("body:IN [hello]", Ok(&[])),
+            // works on newer data
+            ("body:bonjour", Ok(&[json!({"body": "bonjour"})])),
+            ("body:IN [bonjour]", Ok(&[json!({"body": "bonjour"})])),
+        ],
+    )
+    .await;
+}
+
 #[tokio::test]
 #[ignore]
 async fn test_update_doc_mapping_strict_to_dynamic() {
diff --git a/quickwit/quickwit-query/src/query_ast/full_text_query.rs b/quickwit/quickwit-query/src/query_ast/full_text_query.rs
index cf1142e5cf6..2e809cdd476 100644
--- a/quickwit/quickwit-query/src/query_ast/full_text_query.rs
+++ b/quickwit/quickwit-query/src/query_ast/full_text_query.rs
@@ -143,6 +143,12 @@ impl FullTextParams {
                 Ok(TantivyBoolQuery::build_clause(operator, leaf_queries).into())
             }
             FullTextMode::Phrase { slop } => {
+                if !index_record_option.has_positions() {
+                    return Err(InvalidQuery::SchemaError(
+                        "Applied phrase query on field which does not have positions indexed"
+                            .to_string(),
+                    ));
+                }
                 let mut phrase_query = TantivyPhraseQuery::new_with_offset(terms);
                 phrase_query.set_slop(slop);
                 Ok(phrase_query.into())