-
Notifications
You must be signed in to change notification settings - Fork 0
Add Valkey backend support with conditional SORTBY handling #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| from typing import List | ||
| from typing import List, Optional | ||
|
|
||
| import numpy as np | ||
|
|
||
|
|
@@ -13,10 +13,11 @@ | |
| from redis.commands.search.query import Query | ||
| from redis.commands.search.field import TagField, VectorField | ||
| from redis.client import Redis | ||
| from redis.exceptions import ResponseError | ||
|
|
||
|
|
||
| class RedisVectorStore(VectorBase): | ||
| """ vector store: Redis | ||
| """ vector store: Redis or Valkey | ||
|
|
||
| :param host: redis host, defaults to "localhost". | ||
| :type host: str | ||
|
|
@@ -40,6 +41,7 @@ class RedisVectorStore(VectorBase): | |
|
|
||
| vector_base = VectorBase("redis", dimension=10) | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| host: str = "localhost", | ||
|
|
@@ -61,6 +63,40 @@ def __init__( | |
| self.doc_prefix = f"{self.namespace}doc:" # Prefix with the specified namespace | ||
| self._create_collection(collection_name) | ||
|
|
||
| _sortby_supported: bool | None = None | ||
|
|
||
| def _check_sortby_support(self, index_name: str, sort_field: str) -> bool: | ||
| """ | ||
| Runtime capability detection for FT.SEARCH SORTBY. | ||
| Issues a zero-result query using SORTBY against the provided index. | ||
| - If the server rejects the keyword (e.g., Valkey builds without SORTBY), we cache False and return False. | ||
| - If the server *parses* SORTBY but complains about the field or schema (e.g., 'Property is not sortable' | ||
| or 'No such field'), we treat that as SORTBY being *supported* and bubble up the original error when the real | ||
| query runs. | ||
| NOTE: Valkey KNN results are ALWAYS sorted by distance. | ||
| 'The first name/value pair is for the distance computed' - | ||
| Ref: https://github.com/valkey-io/valkey-search/blob/main/COMMANDS.md | ||
| """ | ||
|
|
||
| if self._sortby_supported is not None: | ||
| return self._sortby_supported | ||
|
|
||
| try: | ||
| self._client.execute_command("FT.SEARCH", index_name, "*", "SORTBY", sort_field, "ASC", "LIMIT", 0, 0) | ||
| self._sortby_supported = True | ||
|
|
||
| except ResponseError as e: | ||
| if "SORTBY" in str(e): | ||
| try: | ||
| info = self._client.info("server") | ||
| is_valkey = info.get("server_name") == "valkey" or "valkey_version" in info | ||
| self._sortby_supported = not is_valkey | ||
| except Exception: | ||
| self._sortby_supported = False | ||
| else: | ||
| self._sortby_supported = True | ||
| return self._sortby_supported | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: verify it's Valkey before skipping SORTBY, as it is uncertain if there might be other cases (not Valkey) where SORT BY is not supported and no default sorting is guaranteed. except ResponseError as e:
if "SORTBY" in str(e):
try:
info = self._client.info("server")
is_valkey = info.get("server_name") == "valkey" or "valkey_version" in info
self._sortby_supported = not is_valkey
except Exception:
self._sortby_supported = False
else:
self._sortby_supported = True
return self._sortby_supported
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added verification |
||
|
|
||
| def _check_index_exists(self, index_name: str) -> bool: | ||
| """Check if Redis index exists.""" | ||
| try: | ||
|
|
@@ -115,11 +151,13 @@ def search(self, data: np.ndarray, top_k: int = -1): | |
| Query( | ||
| f"*=>[KNN {top_k if top_k > 0 else self.top_k} @vector $vec as score]" | ||
| ) | ||
| .sort_by("score") | ||
| .return_fields("id", "score") | ||
| .paging(0, top_k if top_k > 0 else self.top_k) | ||
| .dialect(2) | ||
| ) | ||
| if self._check_sortby_support(self.collection_name, "score"): | ||
| query = query.sort_by("score") | ||
|
|
||
| query_params = {"vec": data.astype(np.float32).tobytes()} | ||
| results = ( | ||
| self._client.ft(self.collection_name) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,10 +1,49 @@ | ||
| import numpy as np | ||
| import pytest | ||
| import redis | ||
|
|
||
| from gptcache.embedding import Onnx | ||
| from gptcache.manager import VectorBase | ||
| from gptcache.manager.vector_data.base import VectorData | ||
|
|
||
|
|
||
| def _is_valkey() -> bool: | ||
| try: | ||
| r = redis.Redis(host="127.0.0.1", port=6379, db=0, decode_responses=True) | ||
| info = r.info("server") | ||
| # Valkey exposes server_name: valkey and valkey_version | ||
| return info.get("server_name") == "valkey" or "valkey_version" in info | ||
| except Exception: | ||
| return False | ||
|
|
||
|
|
||
| def extract_score(item): | ||
| assert isinstance(item, (list, tuple)) and len(item) >= 2, f"Unexpected item shape: {item}" | ||
| return float(item[0]) | ||
|
|
||
|
|
||
| def is_nondecreasing(seq, *, tol=1e-9): | ||
| return all(seq[i] <= seq[i + 1] + tol for i in range(len(seq) - 1)) | ||
|
|
||
|
|
||
| def _is_nonincreasing(seq, tol=1e-9): | ||
| return all(seq[i] >= seq[i + 1] - tol for i in range(len(seq) - 1)) | ||
|
|
||
|
|
||
| @pytest.fixture(autouse=True) | ||
| def clean_redis_db(): | ||
| r = redis.Redis(host="127.0.0.1", port=6379, db=0, decode_responses=True) | ||
| try: | ||
| r.flushdb() | ||
| except Exception: | ||
| pass | ||
| yield | ||
| try: | ||
| r.flushdb() | ||
| except Exception: | ||
| pass | ||
|
|
||
|
|
||
| def test_redis_vector_store(): | ||
| encoder = Onnx() | ||
| dim = encoder.dimension | ||
|
|
@@ -24,3 +63,71 @@ def test_redis_vector_store(): | |
| search_res = vector_base.search(np.random.rand(dim), top_k=10) | ||
| print(search_res) | ||
| assert len(search_res) == 5 | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("ascending", [True]) | ||
| def test_redis_vector_store_sortby_supported(monkeypatch, ascending): | ||
| """ | ||
| Force the probe to say SORTBY is supported, ensuring the query adds `.sort_by("score")`. | ||
| With your return shape (score,id), verify scores are ascending (non-decreasing). | ||
| """ | ||
| encoder = Onnx() | ||
| dim = encoder.dimension | ||
| vector_base = VectorBase("redis", dimension=dim) | ||
| vector_base.mul_add([VectorData(id=i, data=np.random.rand(dim)) for i in range(30)]) | ||
|
|
||
| from gptcache.manager.vector_data.redis_vectorstore import RedisVectorStore | ||
|
|
||
| def _always_supports_sortby(self, index_name, field) -> bool: # noqa: ARG002 | ||
| return not _is_valkey() | ||
|
|
||
| monkeypatch.setattr( | ||
| RedisVectorStore, | ||
| "_check_sortby_support", | ||
| _always_supports_sortby, | ||
| raising=True, | ||
| ) | ||
|
|
||
| k = 10 | ||
| query_vec = np.random.rand(dim) | ||
| res = vector_base.search(query_vec, top_k=k) | ||
| assert isinstance(res, (list, tuple)) | ||
| assert len(res) == k | ||
|
|
||
| scores = [extract_score(item) for item in res] | ||
| assert is_nondecreasing(scores), f"Scores not sorted ASC: {scores}" | ||
|
|
||
|
|
||
| def test_redis_vector_store_sortby_unsupported(monkeypatch): | ||
| """ | ||
| Force the probe to say SORTBY is NOT supported (e.g., Valkey without SORTBY). | ||
| The search should still succeed and return k results; | ||
| """ | ||
| encoder = Onnx() | ||
| dim = encoder.dimension | ||
| vector_base = VectorBase("redis", dimension=dim) | ||
| vector_base.mul_add([VectorData(id=i, data=np.random.rand(dim)) for i in range(20)]) | ||
|
|
||
| from gptcache.manager.vector_data.redis_vectorstore import RedisVectorStore | ||
|
|
||
| def _never_supports_sortby(self, index_name, field): # noqa: ARG002 | ||
| return False | ||
|
|
||
| monkeypatch.setattr( | ||
| RedisVectorStore, | ||
| "_check_sortby_support", | ||
| _never_supports_sortby, | ||
| raising=True, | ||
| ) | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: Add assertion/ test for sorting:
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added assertion |
||
| k = 10 | ||
| query_vec = np.random.rand(dim) | ||
| res = vector_base.search(query_vec, top_k=k) | ||
| assert isinstance(res, (list, tuple)) | ||
| assert len(res) == k | ||
|
|
||
| scores = [extract_score(item) for item in res] | ||
| if _is_valkey(): | ||
| assert is_nondecreasing(scores), f"Scores not sorted ASC: {scores}" | ||
| else: | ||
| assert not is_nondecreasing(scores) and not _is_nonincreasing(scores), f"Scores sorted: {scores}" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update docstring: Add "NOTE: Valkey KNN results are ALWAYS sorted by distance. 'The first name/value pair is for the distance computed' - Ref: https://github.com/valkey-io/valkey-search/blob/main/COMMANDS.md"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added