diff --git a/py/core/main/api/v3/chunks_router.py b/py/core/main/api/v3/chunks_router.py index 2ba10bc15..2ba1c51d1 100644 --- a/py/core/main/api/v3/chunks_router.py +++ b/py/core/main/api/v3/chunks_router.py @@ -332,7 +332,9 @@ async def search_chunks( from r2r import R2RClient client = R2RClient("http://localhost:7272") -chunk = client.chunks.retrieve(id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa") +chunk = client.chunks.retrieve( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" +) """, } ] diff --git a/py/core/main/api/v3/collections_router.py b/py/core/main/api/v3/collections_router.py index 1e937d08e..7fa95d09c 100644 --- a/py/core/main/api/v3/collections_router.py +++ b/py/core/main/api/v3/collections_router.py @@ -1,9 +1,10 @@ import logging +import textwrap from typing import List, Optional, Union from uuid import UUID from fastapi import Body, Depends, Path, Query -from pydantic import BaseModel +from pydantic import BaseModel, Field from core.base import R2RException, RunType from core.base.api.models import ( @@ -26,8 +27,10 @@ class CollectionConfig(BaseModel): - name: str - description: Optional[str] = None + name: str = Field(..., description="The name of the collection") + description: Optional[str] = Field( + None, description="An optional description of the collection" + ) class CollectionsRouter(BaseRouterV3): @@ -43,140 +46,751 @@ def __init__( super().__init__(providers, services, orchestration_provider, run_type) def _setup_routes(self): - @self.router.post("/collections") + @self.router.post( + "/collections", + summary="Create a new collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.create( + name="My New Collection", + description="This is a sample collection" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{"name": "My New Collection", "description": "This is a sample collection"}' + """ + ), + }, + ] + }, + ) @self.base_endpoint async def create_collection( - config: CollectionConfig = Body(...), + config: CollectionConfig = Body( + ..., description="The configuration for the new collection" + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedCollectionResponse: """ Create a new collection and automatically add the creating user to it. + + This endpoint allows authenticated users to create a new collection with a specified name + and optional description. The user creating the collection is automatically added as a member. + + Args: + config (CollectionConfig): The configuration for the new collection, including name and description. + auth_user: The authenticated user making the request. + + Returns: + WrappedCollectionResponse: Details of the newly created collection. + + Raises: + R2RException: If there's an error in creating the collection. """ pass - @self.router.get("/collections") + @self.router.get( + "/collections", + summary="List collections", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.list( + offset=0, + limit=10, + name="Sample", + sort_by="created_at", + sort_order="desc" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections?offset=0&limit=10&name=Sample&sort_by=created_at&sort_order=desc" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def list_collections( - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - name: Optional[str] = Query(None), - sort_by: Optional[str] = Query(None), - sort_order: Optional[str] = Query("desc"), + offset: int = Query( + 0, + ge=0, + description="The number of collections to skip (for pagination)", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="The maximum number of collections to return (1-1000)", + ), + name: Optional[str] = Query( + None, + description="Filter collections by name (case-insensitive partial match)", + ), + sort_by: Optional[str] = Query( + None, description="The field to sort the results by" + ), + sort_order: Optional[str] = Query( + "desc", + description="The order to sort the results ('asc' or 'desc')", + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedCollectionListResponse: """ List collections the user has access to with pagination and filtering options. + + This endpoint returns a paginated list of collections that the authenticated user has access to. + It supports filtering by collection name and sorting options. + + Args: + offset (int): The number of collections to skip (for pagination). + limit (int): The maximum number of collections to return (1-1000). + name (str, optional): Filter collections by name (case-insensitive partial match). + sort_by (str, optional): The field to sort the results by. + sort_order (str, optional): The order to sort the results ("asc" or "desc"). + auth_user: The authenticated user making the request. + + Returns: + WrappedCollectionListResponse: A paginated list of collections and total count. + + Raises: + R2RException: If there's an error in retrieving the collections. """ pass - @self.router.get("/collections/{id}") + @self.router.get( + "/collections/{id}", + summary="Get collection details", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.get("123e4567-e89b-12d3-a456-426614174000") + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def get_collection( - id: UUID = Path(...), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedCollectionResponse: """ Get details of a specific collection. + + This endpoint retrieves detailed information about a single collection identified by its UUID. + The user must have access to the collection to view its details. + + Args: + id (UUID): The unique identifier of the collection. + auth_user: The authenticated user making the request. + + Returns: + WrappedCollectionResponse: Detailed information about the requested collection. + + Raises: + R2RException: If the collection is not found or the user doesn't have access. """ pass - @self.router.post("/collections/{id}") + @self.router.post( + "/collections/{id}", + summary="Update collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.update( + "123e4567-e89b-12d3-a456-426614174000", + name="Updated Collection Name", + description="Updated description" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{"name": "Updated Collection Name", "description": "Updated description"}' + """ + ), + }, + ] + }, + ) @self.base_endpoint async def update_collection( - id: UUID = Path(...), - config: CollectionConfig = Body(...), + id: UUID = Path( + ..., + description="The unique identifier of the collection to update", + ), + config: CollectionConfig = Body( + ..., description="The new configuration for the collection" + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedCollectionResponse: """ Update an existing collection's configuration. + + This endpoint allows updating the name and description of an existing collection. + The user must have appropriate permissions to modify the collection. + + Args: + id (UUID): The unique identifier of the collection to update. + config (CollectionConfig): The new configuration for the collection. + auth_user: The authenticated user making the request. + + Returns: + WrappedCollectionResponse: Updated details of the collection. + + Raises: + R2RException: If the collection is not found or the user doesn't have permission to update it. """ pass - @self.router.delete("/collections/{id}") + @self.router.delete( + "/collections/{id}", + summary="Delete collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.delete("123e4567-e89b-12d3-a456-426614174000") + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def delete_collection( - id: UUID = Path(...), + id: UUID = Path( + ..., + description="The unique identifier of the collection to delete", + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedDeleteResponse: """ Delete an existing collection. + + This endpoint allows deletion of a collection identified by its UUID. + The user must have appropriate permissions to delete the collection. + Deleting a collection removes all associations but does not delete the documents within it. + + Args: + id (UUID): The unique identifier of the collection to delete. + auth_user: The authenticated user making the request. + + Returns: + WrappedDeleteResponse: Confirmation of the deletion. + + Raises: + R2RException: If the collection is not found or the user doesn't have permission to delete it. """ pass - @self.router.get("/collections/{id}/documents") + @self.router.post( + "/collections/{id}/documents/{document_id}", + summary="Add document to collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.add_document( + "123e4567-e89b-12d3-a456-426614174000", + "456e789a-b12c-34d5-e678-901234567890" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/documents/456e789a-b12c-34d5-e678-901234567890" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + async def add_document_to_collection( + id: UUID = Path(...), + document_id: UUID = Path(...), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedAddUserResponse: + """ + Add a document to a collection. + """ + pass + + @self.router.get( + "/collections/{id}/documents", + summary="List documents in collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.list_documents( + "123e4567-e89b-12d3-a456-426614174000", + offset=0, + limit=10, + sort_by="created_at", + sort_order="desc" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/documents?offset=0&limit=10&sort_by=created_at&sort_order=desc" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def get_collection_documents( - id: UUID = Path(...), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - sort_by: Optional[str] = Query(None), - sort_order: Optional[str] = Query("desc"), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + offset: int = Query( + 0, + ge=0, + description="The number of documents to skip (for pagination)", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="The maximum number of documents to return (1-1000)", + ), + sort_by: Optional[str] = Query( + None, description="The field to sort the documents by" + ), + sort_order: Optional[str] = Query( + "desc", + description="The order to sort the documents ('asc' or 'desc')", + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedDocumentOverviewResponse: """ Get all documents in a collection with pagination and sorting options. + + This endpoint retrieves a paginated list of documents associated with a specific collection. + It supports sorting options to customize the order of returned documents. + + Args: + id (UUID): The unique identifier of the collection. + offset (int): The number of documents to skip (for pagination). + limit (int): The maximum number of documents to return (1-1000). + sort_by (str, optional): The field to sort the documents by. + sort_order (str, optional): The order to sort the documents ("asc" or "desc"). + auth_user: The authenticated user making the request. + + Returns: + WrappedDocumentOverviewResponse: A paginated list of documents in the collection. + + Raises: + R2RException: If the collection is not found or the user doesn't have access. """ pass - @self.router.post("/collections/{id}/documents/{document_id}") @self.base_endpoint async def add_document_to_collection( - id: UUID = Path(...), - document_id: UUID = Path(...), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + document_id: UUID = Path( + ..., description="The unique identifier of the document to add" + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedAddUserResponse: """ Add a document to a collection. + + This endpoint associates an existing document with a collection. + The user must have permissions to modify the collection and access to the document. + + Args: + id (UUID): The unique identifier of the collection. + document_id (UUID): The unique identifier of the document to add. + auth_user: The authenticated user making the request. + + Returns: + WrappedAddUserResponse: Confirmation of the document addition to the collection. + + Raises: + R2RException: If the collection or document is not found, or if the user lacks necessary permissions. """ pass - @self.router.delete("/collections/{id}/documents/{document_id}") + @self.router.delete( + "/collections/{id}/documents/{document_id}", + summary="Remove document from collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.remove_document( + "123e4567-e89b-12d3-a456-426614174000", + "456e789a-b12c-34d5-e678-901234567890" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/documents/456e789a-b12c-34d5-e678-901234567890" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def remove_document_from_collection( - id: UUID = Path(...), - document_id: UUID = Path(...), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + document_id: UUID = Path( + ..., + description="The unique identifier of the document to remove", + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedDeleteResponse: """ Remove a document from a collection. + + This endpoint removes the association between a document and a collection. + It does not delete the document itself. The user must have permissions to modify the collection. + + Args: + id (UUID): The unique identifier of the collection. + document_id (UUID): The unique identifier of the document to remove. + auth_user: The authenticated user making the request. + + Returns: + WrappedDeleteResponse: Confirmation of the document removal from the collection. + + Raises: + R2RException: If the collection or document is not found, or if the user lacks necessary permissions. """ pass - @self.router.get("/collections/{id}/users") + @self.router.get( + "/collections/{id}/users", + summary="List users in collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.list_users( + "123e4567-e89b-12d3-a456-426614174000", + offset=0, + limit=10, + sort_by="username", + sort_order="asc" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/users?offset=0&limit=10&sort_by=username&sort_order=asc" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def get_collection_users( - id: UUID = Path(...), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - sort_by: Optional[str] = Query(None), - sort_order: Optional[str] = Query("desc"), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + offset: int = Query( + 0, + ge=0, + description="The number of users to skip (for pagination)", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="The maximum number of users to return (1-1000)", + ), + sort_by: Optional[str] = Query( + None, description="The field to sort the users by" + ), + sort_order: Optional[str] = Query( + "desc", + description="The order to sort the users ('asc' or 'desc')", + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedUsersInCollectionResponse: """ Get all users in a collection with pagination and sorting options. + + This endpoint retrieves a paginated list of users who have access to a specific collection. + It supports sorting options to customize the order of returned users. + + Args: + id (UUID): The unique identifier of the collection. + offset (int): The number of users to skip (for pagination). + limit (int): The maximum number of users to return (1-1000). + sort_by (str, optional): The field to sort the users by. + sort_order (str, optional): The order to sort the users ("asc" or "desc"). + auth_user: The authenticated user making the request. + + Returns: + WrappedUsersInCollectionResponse: A paginated list of users with access to the collection. + + Raises: + R2RException: If the collection is not found or the user doesn't have permission to view its members. """ pass - @self.router.post("/collections/{id}/users/{user_id}") + @self.router.post( + "/collections/{id}/users/{user_id}", + summary="Add user to collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.add_user( + "123e4567-e89b-12d3-a456-426614174000", + "789a012b-c34d-5e6f-g789-012345678901" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/users/789a012b-c34d-5e6f-g789-012345678901" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def add_user_to_collection( - id: UUID = Path(...), - user_id: UUID = Path(...), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + user_id: UUID = Path( + ..., description="The unique identifier of the user to add" + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedAddUserResponse: """ Add a user to a collection. + + This endpoint grants a user access to a specific collection. + The authenticated user must have admin permissions for the collection to add new users. + + Args: + id (UUID): The unique identifier of the collection. + user_id (UUID): The unique identifier of the user to add. + auth_user: The authenticated user making the request. + + Returns: + WrappedAddUserResponse: Confirmation of the user addition to the collection. + + Raises: + R2RException: If the collection is not found, the user to be added doesn't exist, + or if the authenticated user lacks necessary permissions. """ pass - @self.router.delete("/collections/{id}/users/{user_id}") + @self.router.delete( + "/collections/{id}/users/{user_id}", + summary="Remove user from collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.remove_user( + "123e4567-e89b-12d3-a456-426614174000", + "789a012b-c34d-5e6f-g789-012345678901" + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/users/789a012b-c34d-5e6f-g789-012345678901" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) @self.base_endpoint async def remove_user_from_collection( - id: UUID = Path(...), - user_id: UUID = Path(...), + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + user_id: UUID = Path( + ..., description="The unique identifier of the user to remove" + ), auth_user=Depends(self.providers.auth.auth_wrapper), ) -> WrappedDeleteResponse: """ Remove a user from a collection. + + This endpoint revokes a user's access to a specific collection. + The authenticated user must have admin permissions for the collection to remove users. + + Args: + id (UUID): The unique identifier of the collection. + user_id (UUID): The unique identifier of the user to remove. + auth_user: The authenticated user making the request. + + Returns: + WrappedDeleteResponse: Confirmation of the user removal from the collection. + + Raises: + R2RException: If the collection is not found, the user to be removed doesn't exist, + or if the authenticated user lacks necessary permissions. """ pass diff --git a/py/core/main/api/v3/documents_router.py b/py/core/main/api/v3/documents_router.py index f36a4820f..ba877cd5d 100644 --- a/py/core/main/api/v3/documents_router.py +++ b/py/core/main/api/v3/documents_router.py @@ -351,91 +351,121 @@ async def update_document( Either a new file or text content must be provided, but not both. The update process runs asynchronously and its progress can be tracked using the returned task_id. + Metadata can be updated to change the document's title, description, or other fields. These changes are additive w.r.t. the existing metadata, but for chunks and knowledge graph data, the update is a full replacement. + Regular users can only update their own documents. Superusers can update any document. All previous document versions are preserved in the system. """ - if not file and not content: - raise R2RException( - status_code=422, - message="Either a file or content must be provided.", - ) if file and content: raise R2RException( status_code=422, message="Both a file and content cannot be provided.", ) - metadata = metadata or {} # type: ignore - - # Check if the user is a superuser - if not auth_user.is_superuser: - if "user_id" in metadata and metadata["user_id"] != str( - auth_user.id - ): - raise R2RException( - status_code=403, - message="Non-superusers cannot set user_id in metadata.", - ) - metadata["user_id"] = str(auth_user.id) - if file: - file_data = await self._process_file(file) - content_length = len(file_data["content"]) - file_content = BytesIO(base64.b64decode(file_data["content"])) - file_data.pop("content", None) - elif content: - content_length = len(content) - file_content = BytesIO(content.encode("utf-8")) - file_data = { - "filename": f"N/A", - "content_type": "text/plain", - } + if (not file and not content) and metadata: + pass + # metadata update only + ## TODO - Uncomment after merging in `main` + # workflow_input = { + # "document_id": str(id), + # "metadata": metadata, + # "user": auth_user.model_dump_json(), + # } + + # logger.info( + # "Running document metadata update without orchestration." + # ) + # from core.main.orchestration import simple_ingestion_factory + + # simple_ingestor = simple_ingestion_factory(self.service) + # await simple_ingestor["update-document-metadata"]( + # workflow_input + # ) + # return { # type: ignore + # "message": "Update metadata task completed successfully.", + # "id": str(document_id), + # "task_id": None, + # } - await self.providers.database.store_file( - id, - file_data["filename"], - file_content, - file_data["content_type"], - ) else: - raise R2RException( - status_code=422, - message="Either a file or content must be provided.", - ) + metadata = metadata or {} # type: ignore + + # Check if the user is a superuser + if not auth_user.is_superuser: + if "user_id" in metadata and metadata["user_id"] != str( + auth_user.id + ): + raise R2RException( + status_code=403, + message="Non-superusers cannot set user_id in metadata.", + ) + metadata["user_id"] = str(auth_user.id) + + if file: + file_data = await self._process_file(file) + content_length = len(file_data["content"]) + file_content = BytesIO( + base64.b64decode(file_data["content"]) + ) + file_data.pop("content", None) + elif content: + content_length = len(content) + file_content = BytesIO(content.encode("utf-8")) + file_data = { + "filename": f"N/A", + "content_type": "text/plain", + } - workflow_input = { - "file_datas": [file_data], - "document_ids": [str(id)], - "metadatas": [metadata], - "ingestion_config": ingestion_config, - "user": auth_user.model_dump_json(), - "file_sizes_in_bytes": [content_length], - "is_update": False, - "user": auth_user.model_dump_json(), - "is_update": True, - } + await self.providers.database.store_file( + id, + file_data["filename"], + file_content, + file_data["content_type"], + ) + else: + raise R2RException( + status_code=422, + message="Either a file or content must be provided.", + ) - if run_with_orchestration: - raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore - "update-files", {"request": workflow_input}, {} - ) - raw_message["message"] = "Update task queued successfully." - raw_message["document_id"] = workflow_input["document_ids"][0] + workflow_input = { + "file_datas": [file_data], + "document_ids": [str(id)], + "metadatas": [metadata], + "ingestion_config": ingestion_config, + "user": auth_user.model_dump_json(), + "file_sizes_in_bytes": [content_length], + "is_update": False, + "user": auth_user.model_dump_json(), + "is_update": True, + } - return raw_message # type: ignore - else: - logger.info("Running update without orchestration.") - # TODO - Clean up implementation logic here to be more explicitly `synchronous` - from core.main.orchestration import simple_ingestion_factory + if run_with_orchestration: + raw_message: dict[str, Union[str, None]] = await self.orchestration_provider.run_workflow( # type: ignore + "update-files", {"request": workflow_input}, {} + ) + raw_message["message"] = "Update task queued successfully." + raw_message["document_id"] = workflow_input[ + "document_ids" + ][0] + + return raw_message # type: ignore + else: + logger.info("Running update without orchestration.") + # TODO - Clean up implementation logic here to be more explicitly `synchronous` + from core.main.orchestration import ( + simple_ingestion_factory, + ) - simple_ingestor = simple_ingestion_factory( - self.services["ingestion"] - ) - await simple_ingestor["update-files"](workflow_input) - return { # type: ignore - "message": "Update task completed successfully.", - "document_id": workflow_input["document_ids"], - "task_id": None, - } + simple_ingestor = simple_ingestion_factory( + self.services["ingestion"] + ) + await simple_ingestor["update-files"](workflow_input) + return { # type: ignore + "message": "Update task completed successfully.", + "document_id": workflow_input["document_ids"], + "task_id": None, + } @self.router.get( "/documents", @@ -806,7 +836,7 @@ async def file_stream(): async def delete_document_by_id( id: UUID = Path(..., description="Document ID"), auth_user=Depends(self.providers.auth.auth_wrapper), - ) -> ResultsWrapper[None]: + ) -> ResultsWrapper[Optional[bool]]: """ Delete a specific document. All chunks corresponding to the document are deleted, and all other references to the document are removed. @@ -824,12 +854,38 @@ async def delete_document_by_id( @self.router.delete( "/documents/by-filter", summary="Delete documents by filter", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + result = client.documents.delete_by_filter( + filters='{"document_type": {"$eq": "text"}, "created_at": {"$lt": "2023-01-01T00:00:00Z"}}' + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/documents/by-filter?filters=%7B%22document_type%22%3A%7B%22%24eq%22%3A%22text%22%7D%2C%22created_at%22%3A%7B%22%24lt%22%3A%222023-01-01T00%3A00%3A00Z%22%7D%7D" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, ) @self.base_endpoint async def delete_document_by_filter( filters: str = Query(..., description="JSON-encoded filters"), auth_user=Depends(self.providers.auth.auth_wrapper), - ) -> ResultsWrapper[None]: + ) -> ResultsWrapper[Optional[bool]]: """ Delete documents based on provided filters. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. Deletion requests are limited to a user's own documents. """ diff --git a/py/sdk/v3/documents.py b/py/sdk/v3/documents.py index cd4162905..d26bc8ea2 100644 --- a/py/sdk/v3/documents.py +++ b/py/sdk/v3/documents.py @@ -275,7 +275,7 @@ async def delete_by_filter( """ filters_json = json.dumps(filters) await self.client._make_request( - "DELETE", "documents/filtered", params={"filters": filters_json} + "DELETE", "documents/by-filter", params={"filters": filters_json} )