diff --git a/service/router.py b/service/router.py index eddf1b9b..da3a5139 100644 --- a/service/router.py +++ b/service/router.py @@ -44,7 +44,9 @@ async def get_documents( if not len(chunks): logger.error(f"No documents found for query: {payload.input}") return [] - is_structured = chunks[0].metadata.get("filetype") in STRUCTURED_DATA + is_structured = ( + chunks[0].metadata and chunks[0].metadata.get("filetype") in STRUCTURED_DATA + ) reranked_chunks = [] if is_structured and payload.interpreter_mode: async with CodeInterpreterService( diff --git a/vectordbs/weaviate.py b/vectordbs/weaviate.py index 59cf0169..9cda042f 100644 --- a/vectordbs/weaviate.py +++ b/vectordbs/weaviate.py @@ -15,6 +15,8 @@ class WeaviateService(BaseVectorDatabase): def __init__( self, index_name: str, dimension: int, credentials: dict, encoder: BaseEncoder ): + # According to Weaviate's documentation, index names should start with a capital letter (https://weaviate.io/developers/weaviate/config-refs/schema#introduction) + index_name = index_name[0].upper() + index_name[1:] # TODO: create index if not exists super().__init__( index_name=index_name, @@ -77,7 +79,7 @@ async def query(self, input: str, top_k: int = 25) -> list[BaseDocumentChunk]: try: response = ( self.client.query.get( - class_name=self.index_name.capitalize(), + class_name=self.index_name, properties=["document_id", "text", "doc_url", "page_number"], ) .with_near_vector(vector) @@ -88,7 +90,7 @@ async def query(self, input: str, top_k: int = 25) -> list[BaseDocumentChunk]: logger.error(f"Missing 'data' in response: {response}") return [] - result_data = response["data"]["Get"][self.index_name.capitalize()] + result_data = response["data"]["Get"][self.index_name] document_chunks = [] for result in result_data: document_chunk = BaseDocumentChunk(