From db5c57a21ac3ff77aa760f9d38914aa061d6b6b0 Mon Sep 17 00:00:00 2001 From: Ali Salimli <67149699+elisalimli@users.noreply.github.com> Date: Sat, 2 Mar 2024 09:05:39 +0400 Subject: [PATCH] fix: Qdrant's delete method (#73) * fix: qdrant delete method * chore: update .env.example * Fix formatting --------- Co-authored-by: Ismail Pelaseyed --- .env.example | 3 +++ .gitignore | 1 + vectordbs/qdrant.py | 25 +++++++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/.env.example b/.env.example index ab88b0e1..31ff8a25 100644 --- a/.env.example +++ b/.env.example @@ -16,3 +16,6 @@ PINECONE_API_KEY= PINECONE_HOST= PINECONE_INDEX= +# Unstructured API +UNSTRUCTURED_IO_API_KEY= +UNSTRUCTURED_IO_SERVER_URL= diff --git a/.gitignore b/.gitignore index 9e1098b5..c1337c83 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ venv .venv .env __pycache__/ +.DS_Store \ No newline at end of file diff --git a/vectordbs/qdrant.py b/vectordbs/qdrant.py index 5e3853e9..7d43dc36 100644 --- a/vectordbs/qdrant.py +++ b/vectordbs/qdrant.py @@ -5,6 +5,7 @@ from semantic_router.encoders import BaseEncoder from tqdm import tqdm +from models.delete import DeleteResponse from models.document import BaseDocumentChunk from vectordbs.base import BaseVectorDatabase @@ -89,6 +90,28 @@ async def query(self, input: str, top_k: int = MAX_QUERY_TOP_K) -> List: ] async def delete(self, file_url: str) -> None: + # client.count( + # collection_name="{collection_name}", + # count_filter=models.Filter( + # must=[ + # models.FieldCondition(key="color", match=models.MatchValue(value="red")), + # ] + # ), + # exact=True, + # ) + + deleted_chunks = self.client.count( + collection_name=self.index_name, + count_filter=rest.Filter( + must=[ + rest.FieldCondition( + key="file_url", match=rest.MatchValue(value=file_url) + ) + ] + ), + exact=True, + ) + self.client.delete( collection_name=self.index_name, points_selector=rest.FilterSelector( @@ -101,3 +124,5 @@ async def delete(self, file_url: str) -> None: ) ), ) + + return DeleteResponse(num_of_deleted_chunks=deleted_chunks.count)