From 1fbc8abdc360978eb2367f3f12201a492608da78 Mon Sep 17 00:00:00 2001 From: Ali Salimli <67149699+elisalimli@users.noreply.github.com> Date: Thu, 7 Mar 2024 21:14:43 +0400 Subject: [PATCH] Index Name issue when using Weaviate (#87) * fix small bugs * feat: make sure index name starts with a capital letter --- service/router.py | 4 +++- vectordbs/weaviate.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/service/router.py b/service/router.py index eddf1b9b..da3a5139 100644 --- a/service/router.py +++ b/service/router.py @@ -44,7 +44,9 @@ async def get_documents( if not len(chunks): logger.error(f"No documents found for query: {payload.input}") return [] - is_structured = chunks[0].metadata.get("filetype") in STRUCTURED_DATA + is_structured = ( + chunks[0].metadata and chunks[0].metadata.get("filetype") in STRUCTURED_DATA + ) reranked_chunks = [] if is_structured and payload.interpreter_mode: async with CodeInterpreterService( diff --git a/vectordbs/weaviate.py b/vectordbs/weaviate.py index 59cf0169..9cda042f 100644 --- a/vectordbs/weaviate.py +++ b/vectordbs/weaviate.py @@ -15,6 +15,8 @@ class WeaviateService(BaseVectorDatabase): def __init__( self, index_name: str, dimension: int, credentials: dict, encoder: BaseEncoder ): + # According to Weaviate's documentation, index names should start with a capital letter (https://weaviate.io/developers/weaviate/config-refs/schema#introduction) + index_name = index_name[0].upper() + index_name[1:] # TODO: create index if not exists super().__init__( index_name=index_name, @@ -77,7 +79,7 @@ async def query(self, input: str, top_k: int = 25) -> list[BaseDocumentChunk]: try: response = ( self.client.query.get( - class_name=self.index_name.capitalize(), + class_name=self.index_name, properties=["document_id", "text", "doc_url", "page_number"], ) .with_near_vector(vector) @@ -88,7 +90,7 @@ async def query(self, input: str, top_k: int = 25) -> list[BaseDocumentChunk]: logger.error(f"Missing 'data' in response: {response}") return [] - result_data = response["data"]["Get"][self.index_name.capitalize()] + result_data = response["data"]["Get"][self.index_name] document_chunks = [] for result in result_data: document_chunk = BaseDocumentChunk(