Skip to content

Commit

Permalink
Expose Tantivy's PhraseQuery (#234)
Browse files Browse the repository at this point in the history
  • Loading branch information
mocobeta authored May 3, 2024
1 parent 3d39495 commit 03b1c89
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 9 deletions.
42 changes: 42 additions & 0 deletions src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,48 @@ impl Query {
})
}

/// Construct a Tantivy's PhraseQuery with custom offsets and slop
///
/// # Arguments
///
/// * `schema` - Schema of the target index.
/// * `field_name` - Field name to be searched.
/// * `words` - Word list that constructs the phrase. A word can be a term text or a pair of term text and its offset in the phrase.
/// * `slop` - (Optional) The number of gaps permitted between the words in the query phrase. Default is 0.
#[staticmethod]
#[pyo3(signature = (schema, field_name, words, slop = 0))]
pub(crate) fn phrase_query(
schema: &Schema,
field_name: &str,
words: Vec<&PyAny>,
slop: u32,
) -> PyResult<Query> {
let mut terms_with_offset = Vec::with_capacity(words.len());
for (idx, word) in words.into_iter().enumerate() {
if let Ok((offset, value)) = word.extract() {
// Custom offset is provided.
let term = make_term(&schema.inner, field_name, value)?;
terms_with_offset.push((offset, term));
} else {
// Custom offset is not provided. Use the list index as the offset.
let term = make_term(&schema.inner, field_name, word)?;
terms_with_offset.push((idx, term));
};
}
if terms_with_offset.is_empty() {
return Err(exceptions::PyValueError::new_err(
"words must not be empty.",
));
}
let inner = tv::query::PhraseQuery::new_with_offset_and_slop(
terms_with_offset,
slop,
);
Ok(Query {
inner: Box::new(inner),
})
}

/// Construct a Tantivy's BooleanQuery
#[staticmethod]
#[pyo3(signature = (subqueries))]
Expand Down
12 changes: 3 additions & 9 deletions tantivy/tantivy.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from enum import Enum
from typing import Any, Optional, Sequence
from typing import Any, Optional, Sequence, Union

class Schema:
pass
Expand Down Expand Up @@ -206,16 +206,10 @@ class Query:
pass

@staticmethod
def fuzzy_term_query(
schema: Schema,
field_name: str,
text: str,
distance: int = 1,
transposition_cost_one: bool = True,
prefix=False,
) -> Query:
def phrase_query(schema: Schema, field_name: str, words: list[Union[str, tuple[int, str]]], slop: int = 0) -> Query:
pass


@staticmethod
def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query:
pass
Expand Down
32 changes: 32 additions & 0 deletions tests/tantivy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,38 @@ def test_all_query(self, ram_index):
result = index.searcher().search(query, 10)
assert len(result.hits) == 3

def test_phrase_query(self, ram_index):
index = ram_index
searcher = index.searcher()

query = Query.phrase_query(index.schema, "title", ["old", "man"])
# should match the title "The Old Man and the Sea"
result = searcher.search(query, 10)
assert len(result.hits) == 1

query = Query.phrase_query(index.schema, "title", ["man", "old"])
# sholdn't match any document
result = searcher.search(query, 10)
assert len(result.hits) == 0

query = Query.phrase_query(index.schema, "title", [(1, "man"), (0, "old")])
# should match "The Old Man and the Sea" with the given offsets
result = searcher.search(query, 10)
assert len(result.hits) == 1

query = Query.phrase_query(index.schema, "title", ["man", "sea"])
# sholdn't match any document with default slop 0.
result = searcher.search(query, 10)
assert len(result.hits) == 0

query = Query.phrase_query(index.schema, "title", ["man", "sea"], slop=2)
# should match the title "The Old Man and the Sea" with slop 2.
result = searcher.search(query, 10)
assert len(result.hits) == 1

with pytest.raises(ValueError, match = "words must not be empty."):
Query.phrase_query(index.schema, "title", [])

def test_fuzzy_term_query(self, ram_index):
index = ram_index
query = Query.fuzzy_term_query(index.schema, "title", "ice")
Expand Down

0 comments on commit 03b1c89

Please sign in to comment.