From fc4eabe22b47800d5795ba0db16525b39c26795b Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Sun, 7 Apr 2024 17:39:56 +0900 Subject: [PATCH 1/2] expose FuzzyTermQuery --- src/query.rs | 44 ++++++++++++++++++++++++++++++++++++++++++- tantivy/tantivy.pyi | 8 ++++++++ tests/tantivy_test.py | 24 +++++++++++++++++++++++ 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/query.rs b/src/query.rs index 06222cb9..f2e0c43f 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,5 +1,5 @@ use crate::{make_term, Schema}; -use pyo3::{exceptions, prelude::*, types::PyAny}; +use pyo3::{exceptions, prelude::*, types::PyAny, types::PyString}; use tantivy as tv; /// Tantivy's Query @@ -52,4 +52,46 @@ impl Query { inner: Box::new(inner), }) } + + /// Construct a Tantivy's FuzzyTermQuery + #[staticmethod] + #[pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true))] + pub(crate) fn fuzzy_term_query( + schema: &Schema, + field_name: &str, + text: &PyString, + distance: u8, + transposition_cost_one: bool, + ) -> PyResult { + let term = make_term(&schema.inner, field_name, &text)?; + let inner = tv::query::FuzzyTermQuery::new( + term, + distance, + transposition_cost_one, + ); + Ok(Query { + inner: Box::new(inner), + }) + } + + /// Construct a Tantivy's FuzzyTermQuery of the term prefix + #[staticmethod] + #[pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true))] + pub(crate) fn fuzzy_term_query_prefix( + schema: &Schema, + field_name: &str, + text: &PyString, + distance: u8, + transposition_cost_one: bool, + ) -> PyResult { + let term = make_term(&schema.inner, field_name, &text)?; + let inner = tv::query::FuzzyTermQuery::new_prefix( + term, + distance, + transposition_cost_one, + ); + Ok(Query { + inner: Box::new(inner), + }) + } } diff --git a/tantivy/tantivy.pyi b/tantivy/tantivy.pyi index 35b058f8..f8a544e0 100644 --- a/tantivy/tantivy.pyi +++ b/tantivy/tantivy.pyi @@ -197,6 +197,14 @@ class Query: def all_query() -> Query: pass + @staticmethod + def fuzzy_term_query(schema: Schema, field_name: str, text: str, distance: int = 1, transposition_cost_one: bool = True) -> Query: + pass + + @staticmethod + def fuzzy_term_query_prefix(schema: Schema, field_name: str, text: str, distance: int = 1, transposition_cost_one: bool = True) -> Query: + pass + class Order(Enum): Asc = 1 diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index a89376c4..eba1d2a0 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -771,3 +771,27 @@ def test_all_query(self, ram_index): result = index.searcher().search(query, 10) assert len(result.hits) == 3 + + def test_fuzzy_term_query(self, ram_index): + index = ram_index + query = Query.fuzzy_term_query(index.schema, "title", "ice") + + # the query "ice" should match "mice" + result = index.searcher().search(query, 10) + assert len(result.hits) == 1 + _, doc_address = result.hits[0] + searched_doc = index.searcher().doc(doc_address) + assert searched_doc["title"] == ["Of Mice and Men"] + + def test_fuzzy_term_query_prefix(self, ram_index): + index = ram_index + query = Query.fuzzy_term_query_prefix(index.schema, "title", "man") + + # the query "man" should match both "man" and "men" + result = index.searcher().search(query, 10) + assert len(result.hits) == 2 + titles = set() + for _, doc_address in result.hits: + titles.update(index.searcher().doc(doc_address)["title"]) + assert titles == {"The Old Man and the Sea", "Of Mice and Men"} + From cda597cb19b14d775a02c5c7c0c93c48ab344ab3 Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Sat, 13 Apr 2024 18:03:31 +0900 Subject: [PATCH 2/2] add prefix param to fuzzy_term_query(); remove fuzzy_term_query_prefix() --- src/query.rs | 51 ++++++++++++++++++++----------------------- tantivy/tantivy.pyi | 6 +---- tests/tantivy_test.py | 3 ++- 3 files changed, 27 insertions(+), 33 deletions(-) diff --git a/src/query.rs b/src/query.rs index f2e0c43f..bafbba2b 100644 --- a/src/query.rs +++ b/src/query.rs @@ -54,42 +54,39 @@ impl Query { } /// Construct a Tantivy's FuzzyTermQuery + /// + /// # Arguments + /// + /// * `schema` - Schema of the target index. + /// * `field_name` - Field name to be searched. + /// * `text` - String representation of the query term. + /// * `distance` - (Optional) Edit distance you are going to alow. When not specified, the default is 1. + /// * `transposition_cost_one` - (Optional) If true, a transposition cost will be 1; otherwise it will be 2. When not specified, the default is true. + /// * `prefix` - (Optional) If true, only prefix matched results are returned. When not specified, the default is false. #[staticmethod] - #[pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true))] + #[pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true, prefix = false))] pub(crate) fn fuzzy_term_query( schema: &Schema, field_name: &str, text: &PyString, distance: u8, transposition_cost_one: bool, + prefix: bool, ) -> PyResult { let term = make_term(&schema.inner, field_name, &text)?; - let inner = tv::query::FuzzyTermQuery::new( - term, - distance, - transposition_cost_one, - ); - Ok(Query { - inner: Box::new(inner), - }) - } - - /// Construct a Tantivy's FuzzyTermQuery of the term prefix - #[staticmethod] - #[pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true))] - pub(crate) fn fuzzy_term_query_prefix( - schema: &Schema, - field_name: &str, - text: &PyString, - distance: u8, - transposition_cost_one: bool, - ) -> PyResult { - let term = make_term(&schema.inner, field_name, &text)?; - let inner = tv::query::FuzzyTermQuery::new_prefix( - term, - distance, - transposition_cost_one, - ); + let inner = if prefix { + tv::query::FuzzyTermQuery::new_prefix( + term, + distance, + transposition_cost_one, + ) + } else { + tv::query::FuzzyTermQuery::new( + term, + distance, + transposition_cost_one, + ) + }; Ok(Query { inner: Box::new(inner), }) diff --git a/tantivy/tantivy.pyi b/tantivy/tantivy.pyi index f8a544e0..db952916 100644 --- a/tantivy/tantivy.pyi +++ b/tantivy/tantivy.pyi @@ -198,11 +198,7 @@ class Query: pass @staticmethod - def fuzzy_term_query(schema: Schema, field_name: str, text: str, distance: int = 1, transposition_cost_one: bool = True) -> Query: - pass - - @staticmethod - def fuzzy_term_query_prefix(schema: Schema, field_name: str, text: str, distance: int = 1, transposition_cost_one: bool = True) -> Query: + def fuzzy_term_query(schema: Schema, field_name: str, text: str, distance: int = 1, transposition_cost_one: bool = True, prefix = False) -> Query: pass diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index eba1d2a0..9338c8e1 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -785,7 +785,7 @@ def test_fuzzy_term_query(self, ram_index): def test_fuzzy_term_query_prefix(self, ram_index): index = ram_index - query = Query.fuzzy_term_query_prefix(index.schema, "title", "man") + query = Query.fuzzy_term_query(index.schema, "title", "man", prefix=True) # the query "man" should match both "man" and "men" result = index.searcher().search(query, 10) @@ -795,3 +795,4 @@ def test_fuzzy_term_query_prefix(self, ram_index): titles.update(index.searcher().doc(doc_address)["title"]) assert titles == {"The Old Man and the Sea", "Of Mice and Men"} +