Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

don't error on phrase query schema error for old splits #5289

Merged
merged 4 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion quickwit/quickwit-doc-mapper/src/query_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,11 @@ impl<'a, 'b: 'a> QueryAstVisitor<'a> for ExtractPrefixTermRanges<'b> {
&mut self,
phrase_prefix: &'a PhrasePrefixQuery,
) -> Result<(), Self::Err> {
let (_, terms) = phrase_prefix.get_terms(self.schema, self.tokenizer_manager)?;
let terms = match phrase_prefix.get_terms(self.schema, self.tokenizer_manager) {
Ok((_, terms)) => terms,
Err(InvalidQuery::SchemaError(_)) => return Ok(()), /* the query will be nullified when casting to a tantivy ast */
Err(e) => return Err(e),
};
if let Some((_, term)) = terms.last() {
self.add_prefix_term(term.clone(), phrase_prefix.max_expansions, terms.len() > 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,7 @@ async fn test_update_doc_mapping_json_to_object() {
.await;
}

// TODO expected to be fix as part of #5084
#[tokio::test]
#[ignore]
async fn test_update_doc_mapping_tokenizer_default_to_raw() {
let index_id = "update-tokenizer-default-to-raw";
let original_doc_mappings = json!({
Expand All @@ -302,10 +300,11 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() {
ingest_after_update,
&[
("body:hello", Ok(&[json!({"body": "hello-world"})])),
("body:world", Ok(&[json!({"body": "bonjour-monde"})])),
("body:world", Ok(&[json!({"body": "hello-world"})])),
// phrases queries won't apply to older splits that didn't support them
("body:\"hello world\"", Ok(&[])),
("body:\"hello-world\"", Ok(&[])),
("body:\"hello-worl\"*", Ok(&[])),
("body:bonjour", Ok(&[])),
("body:monde", Ok(&[])),
// the raw tokenizer only returns exact matches
Expand All @@ -314,14 +313,16 @@ async fn test_update_doc_mapping_tokenizer_default_to_raw() {
"body:\"bonjour-monde\"",
Ok(&[json!({"body": "bonjour-monde"})]),
),
(
"body:\"bonjour-mond\"*",
Ok(&[json!({"body": "bonjour-monde"})]),
),
],
)
.await;
}

// TODO expected to be fix as part of #5084
#[tokio::test]
#[ignore]
async fn test_update_doc_mapping_tokenizer_add_position() {
let index_id = "update-tokenizer-add-position";
let original_doc_mappings = json!({
Expand All @@ -348,6 +349,7 @@ async fn test_update_doc_mapping_tokenizer_add_position() {
// phrases queries don't apply to older splits that didn't support them
("body:\"hello-world\"", Ok(&[])),
("body:\"hello world\"", Ok(&[])),
("body:\"hello-worl\"*", Ok(&[])),
("body:bonjour", Ok(&[json!({"body": "bonjour-monde"})])),
("body:monde", Ok(&[json!({"body": "bonjour-monde"})])),
(
Expand All @@ -358,6 +360,10 @@ async fn test_update_doc_mapping_tokenizer_add_position() {
"body:\"bonjour monde\"",
Ok(&[json!({"body": "bonjour-monde"})]),
),
(
"body:\"bonjour-mond\"*",
Ok(&[json!({"body": "bonjour-monde"})]),
),
],
)
.await;
Expand Down Expand Up @@ -408,6 +414,39 @@ async fn test_update_doc_mapping_tokenizer_raw_to_phrase() {
.await;
}

#[tokio::test]
async fn test_update_doc_mapping_unindexed_to_indexed() {
let index_id = "update-not-indexed-to-indexed";
let original_doc_mappings = json!({
"field_mappings": [
{"name": "body", "type": "text", "indexed": false}
]
});
let ingest_before_update = &[json!({"body": "hello"})];
let updated_doc_mappings = json!({
"field_mappings": [
{"name": "body", "type": "text", "tokenizer": "raw"}
]
});
let ingest_after_update = &[json!({"body": "bonjour"})];
validate_search_across_doc_mapping_updates(
index_id,
original_doc_mappings,
ingest_before_update,
updated_doc_mappings,
ingest_after_update,
&[
// term query won't apply to older splits that weren't indexed
("body:hello", Ok(&[])),
("body:IN [hello]", Ok(&[])),
// works on newer data
("body:bonjour", Ok(&[json!({"body": "bonjour"})])),
("body:IN [bonjour]", Ok(&[json!({"body": "bonjour"})])),
],
)
.await;
}

#[tokio::test]
#[ignore]
async fn test_update_doc_mapping_strict_to_dynamic() {
Expand Down
6 changes: 6 additions & 0 deletions quickwit/quickwit-query/src/query_ast/full_text_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ impl FullTextParams {
Ok(TantivyBoolQuery::build_clause(operator, leaf_queries).into())
}
FullTextMode::Phrase { slop } => {
if !index_record_option.has_positions() {
return Err(InvalidQuery::SchemaError(
"Applied phrase query on field which does not have positions indexed"
.to_string(),
));
}
let mut phrase_query = TantivyPhraseQuery::new_with_offset(terms);
phrase_query.set_slop(slop);
Ok(phrase_query.into())
Expand Down
Loading