Skip to content

Commit

Permalink
new hybrid document search
Browse files Browse the repository at this point in the history
  • Loading branch information
emrgnt-cmplxty committed Nov 11, 2024
1 parent 4fa24b7 commit c2bf92c
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 92 deletions.
19 changes: 12 additions & 7 deletions py/core/base/providers/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,11 +521,11 @@ async def full_text_search(
) -> list[VectorSearchResult]:
pass

@abstractmethod
async def search_documents(
self, query_text: str, settings: DocumentSearchSettings
) -> list[dict]:
pass
# @abstractmethod
# async def search_documents(
# self, query_text: str, settings: DocumentSearchSettings
# ) -> list[dict]:
# pass

@abstractmethod
async def hybrid_search(
Expand Down Expand Up @@ -1430,9 +1430,14 @@ async def hybrid_search(
)

async def search_documents(
self, query_text: str, settings: DocumentSearchSettings
self,
query_text: str,
settings: DocumentSearchSettings,
query_embedding: Optional[list[float]] = None,
) -> list[dict]:
return await self.vector_handler.search_documents(query_text, settings)
return await self.document_handler.search_documents(
query_text, query_embedding, settings
)

async def delete(
self, filters: dict[str, Any]
Expand Down
6 changes: 6 additions & 0 deletions py/core/main/api/retrieval_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,14 @@ async def search_documents(
Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.
"""

query_embedding = (
await self.service.providers.embedding.async_get_embedding(
query
)
)
results = await self.service.search_documents(
query=query,
query_embedding=query_embedding,
settings=settings,
)
return results
Expand Down
2 changes: 1 addition & 1 deletion py/core/main/services/ingestion_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ async def augment_document_info(
document_info.summary = response.choices[0].message.content

embedding = await self.providers.embedding.async_get_embedding(
text = document_info.summary,
text=document_info.summary,
)
document_info.summary_embedding = embedding
return
Expand Down
3 changes: 3 additions & 0 deletions py/core/main/services/retrieval_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
from typing import Optional
from uuid import UUID

from fastapi import HTTPException

from core import R2RStreamingRAGAgent
Expand Down Expand Up @@ -122,11 +123,13 @@ async def search_documents(
self,
query: str,
settings: DocumentSearchSettings,
query_embedding: Optional[list[float]] = None,
) -> list[dict]:

return await self.providers.database.search_documents(
query_text=query,
settings=settings,
query_embedding=query_embedding,
)

@telemetry_event("Completion")
Expand Down
2 changes: 1 addition & 1 deletion py/core/pipes/retrieval/vector_search_pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ async def search( # type: ignore
message,
purpose=EmbeddingPurpose.QUERY,
)

search_results = await (
self.database_provider.hybrid_search(
query_vector=query_vector,
Expand Down
Loading

0 comments on commit c2bf92c

Please sign in to comment.