Skip to content

Commit

Permalink
add indices
Browse files Browse the repository at this point in the history
  • Loading branch information
emrgnt-cmplxty committed Nov 5, 2024
1 parent bf96b0b commit dfeaf47
Show file tree
Hide file tree
Showing 29 changed files with 1,509 additions and 250 deletions.
2 changes: 1 addition & 1 deletion py/core/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
"generate_document_id",
"generate_extraction_id",
"generate_default_user_collection_id",
"generate_collection_id_from_name",
"generate_id_from_label",
"generate_user_id",
"increment_version",
"EntityType",
Expand Down
3 changes: 3 additions & 0 deletions py/core/base/api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
WrappedTokenResponse,
WrappedUserResponse,
)
from shared.api.models.base import PaginatedResultsWrapper, ResultsWrapper
from shared.api.models.ingestion.responses import (
CreateVectorIndexResponse,
IngestionResponse,
Expand Down Expand Up @@ -147,4 +148,6 @@
"WrappedCompletionResponse",
"WrappedRAGResponse",
"WrappedRAGAgentResponse",
"PaginatedResultsWrapper",
"ResultsWrapper",
]
7 changes: 4 additions & 3 deletions py/core/base/providers/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ async def get_chunk(self, chunk_id: UUID) -> Optional[dict[str, Any]]:
@abstractmethod
async def create_index(
self,
name: Optional[str] = None,
table_name: Optional[VectorTableName] = None,
index_measure: IndexMeasure = IndexMeasure.cosine_distance,
index_method: IndexMethod = IndexMethod.auto,
Expand All @@ -578,7 +579,7 @@ async def create_index(

@abstractmethod
async def list_indices(
self, table_name: Optional[VectorTableName] = None
self, offset: int = 0, limit: int = 10, filters: Optional[dict] = None
) -> list[dict]:
pass

Expand Down Expand Up @@ -1479,9 +1480,9 @@ async def create_index(
)

async def list_indices(
self, table_name: Optional[VectorTableName] = None
self, offset: int = 0, limit: int = 10, filters: Optional[dict] = None
) -> list[dict]:
return await self.vector_handler.list_indices(table_name)
return await self.vector_handler.list_indices(offset, limit, filters)

async def delete_index(
self,
Expand Down
4 changes: 2 additions & 2 deletions py/core/base/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
format_relations,
format_search_results_for_llm,
format_search_results_for_stream,
generate_collection_id_from_name,
generate_default_prompt_id,
generate_default_user_collection_id,
generate_document_id,
generate_extraction_id,
generate_id,
generate_id_from_label,
generate_user_id,
increment_version,
llm_cost_per_million_tokens,
Expand All @@ -35,7 +35,7 @@
"generate_document_id",
"generate_extraction_id",
"generate_user_id",
"generate_collection_id_from_name",
"generate_id_from_label",
"generate_default_prompt_id",
"RecursiveCharacterTextSplitter",
"TextSplitter",
Expand Down
2 changes: 1 addition & 1 deletion py/core/main/api/v2/ingestion_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ async def list_vector_indices_app(
description=list_vector_indices_descriptions.get("table_name"),
),
auth_user=Depends(self.service.providers.auth.auth_wrapper),
) -> WrappedListVectorIndicesResponse:
):
indices = await self.service.providers.database.list_indices(
table_name=table_name
)
Expand Down
24 changes: 16 additions & 8 deletions py/core/main/api/v3/chunks_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,16 @@ async def create_chunks(
run_with_orchestration: Optional[bool] = Body(True),
auth_user=Depends(self.providers.auth.auth_wrapper),
) -> ResultsWrapper[list[ChunkIngestionResponse]]:
f"""
"""
Create multiple chunks and process them through the ingestion pipeline.
This endpoint allows creating multiple chunks at once, optionally associating them
with documents and collections. The chunks will be processed asynchronously if
run_with_orchestration is True.
Maximum of {MAX_CHUNKS_PER_REQUEST} chunks can be created in a single request.
Maximum of 100,000 chunks can be created in a single request.
Note, it is not yet possible to add chunks to an existing document using this endpoint.
"""
default_document_id = generate_id()
if len(raw_chunks) > MAX_CHUNKS_PER_REQUEST:
Expand All @@ -212,9 +214,10 @@ async def create_chunks(
document_id = document_id or default_document_id
# Convert UnprocessedChunks to RawChunks for ingestion
raw_chunks_for_doc = [
RawChunk(
UnprocessedChunk(
text=chunk.text if hasattr(chunk, "text") else "",
metadata=chunk.metadata,
id=chunk.id,
)
for chunk in doc_chunks
]
Expand All @@ -229,6 +232,9 @@ async def create_chunks(
"user": auth_user.model_dump_json(),
}

# TODO - Modify create_chunks so that we can add chunks to existing document
# TODO - Modify create_chunks so that we can add chunks to existing document

if run_with_orchestration:
# Run ingestion with orchestration
raw_message = (
Expand Down Expand Up @@ -384,9 +390,11 @@ async def retrieve_chunk(
client = R2RClient("http://localhost:7272")
result = client.chunks.update(
id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
text="Updated content",
metadata={"key": "new value"}
{
"id": first_chunk_id,
"text": "Updated content",
"metadata": {"key": "new value"}
}
)
""",
}
Expand Down Expand Up @@ -498,7 +506,7 @@ async def enrich_chunk(
@self.base_endpoint
async def delete_chunk(
id: Json[UUID] = Path(...),
) -> ResultsWrapper[ChunkResponse]:
) -> ResultsWrapper[bool]:
"""
Delete a specific chunk by ID.
Expand All @@ -512,7 +520,7 @@ async def delete_chunk(
raise R2RException(f"Chunk {id} not found", 404)

await self.services["management"].delete({"$eq": {"chunk_id": id}})
return None
return True

@self.router.get(
"/chunks",
Expand Down
7 changes: 4 additions & 3 deletions py/core/main/api/v3/collections_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from core.base import R2RException, RunType
from core.base.api.models import (
ResultsWrapper,
WrappedAddUserResponse,
WrappedCollectionListResponse,
WrappedCollectionResponse,
Expand Down Expand Up @@ -348,7 +349,7 @@ async def delete_collection(
description="The unique identifier of the collection to delete",
),
auth_user=Depends(self.providers.auth.auth_wrapper),
) -> WrappedDeleteResponse:
) -> ResultsWrapper[bool]:
"""
Delete an existing collection.
Expand Down Expand Up @@ -566,7 +567,7 @@ async def remove_document_from_collection(
description="The unique identifier of the document to remove",
),
auth_user=Depends(self.providers.auth.auth_wrapper),
) -> WrappedDeleteResponse:
) -> ResultsWrapper[bool]:
"""
Remove a document from a collection.
Expand Down Expand Up @@ -774,7 +775,7 @@ async def remove_user_from_collection(
..., description="The unique identifier of the user to remove"
),
auth_user=Depends(self.providers.auth.auth_wrapper),
) -> WrappedDeleteResponse:
) -> ResultsWrapper[bool]:
"""
Remove a user from a collection.
Expand Down
Loading

0 comments on commit dfeaf47

Please sign in to comment.