From 3f6ce6305ffbf11f234943ab2846409972352dbd Mon Sep 17 00:00:00 2001 From: NolanTrem <34580718+NolanTrem@users.noreply.github.com> Date: Wed, 6 Nov 2024 16:26:09 -0800 Subject: [PATCH] Update JS --- .../CollectionsIntegrationSuperUser.test.ts | 106 +++++++ .../DocumentsIntegrationSuperUser.test.ts | 110 +++++++ ...{r2rClient.test.ts => r2rV2Client.test.ts} | 42 +-- ...> r2rV2ClientIntegrationSuperUser.test.ts} | 28 +- ....ts => r2rV2ClientIntegrationUser.test.ts} | 0 js/sdk/src/baseClient.ts | 176 ++++++++++++ js/sdk/src/r2rClient.ts | 172 ++--------- js/sdk/src/v3/clients/collections.ts | 108 +++++++ js/sdk/src/v3/clients/documents.ts | 271 ++++++++++++++++++ py/core/main/api/v2/management_router.py | 53 ++++ py/core/main/app.py | 10 +- py/r2r.toml | 18 +- py/shared/api/models/management/responses.py | 2 +- 13 files changed, 892 insertions(+), 204 deletions(-) create mode 100644 js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts create mode 100644 js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts rename js/sdk/__tests__/{r2rClient.test.ts => r2rV2Client.test.ts} (95%) rename js/sdk/__tests__/{r2rClientIntegrationSuperUser.test.ts => r2rV2ClientIntegrationSuperUser.test.ts} (95%) rename js/sdk/__tests__/{r2rClientIntegrationUser.test.ts => r2rV2ClientIntegrationUser.test.ts} (100%) create mode 100644 js/sdk/src/baseClient.ts create mode 100644 js/sdk/src/v3/clients/collections.ts create mode 100644 js/sdk/src/v3/clients/documents.ts diff --git a/js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts b/js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..bbcee3703 --- /dev/null +++ b/js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts @@ -0,0 +1,106 @@ +import { r2rClient } from "../src/index"; +const fs = require("fs"); +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +/** + * Test Collection should have a UUID of `6f2a5494-f759-4f12-a7b6-db836f651577` + */ +describe("r2rClient V3 Collections Integration Tests", () => { + let client: r2rClient; + let collectionId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.login("admin@example.com", "change_me_immediately"); + }); + + test("Create new collection", async () => { + const response = await client.collections.create("Test Collection"); + console.log(response); // Keep this for debugging if needed + expect(response).toBeTruthy(); + collectionId = response.results.collection_id; // Updated to use correct path + }); + + test("Delete collection", async () => { + await expect(client.collections.delete(collectionId)).resolves.toBeTruthy(); + }); + + // test("Create document with content", async () => { + // const response = await client.documents.create({ + // content: "This is a test document", + // metadata: { title: "Test Document" }, + // }); + + // expect(response.results.document_id).toBeDefined(); + // }); + + // test("Update document", async () => { + // const response = await client.documents.update({ + // id: documentId, + // content: "Updated content", + // metadata: { title: "Updated Test Document" }, + // }); + + // expect(response.results).toBeDefined(); + // }); + + // test("Retrieve document", async () => { + // const response = await client.documents.retrieve(documentId); + + // expect(response.results).toBeDefined(); + // expect(response.results.id).toBe(documentId); + // }); + + // test("List documents with no parameters", async () => { + // const response = await client.documents.list(); + + // expect(response.results).toBeDefined(); + // expect(Array.isArray(response.results)).toBe(true); + // }); + + // test("List documents with parameters", async () => { + // const response = await client.documents.list({ + // offset: 0, + // limit: 5, + // }); + + // expect(response.results).toBeDefined(); + // expect(Array.isArray(response.results)).toBe(true); + // expect(response.results.length).toBeLessThanOrEqual(5); + // }); + + // test("Error handling - Create document with no file or content", async () => { + // await expect( + // client.documents.create({ + // metadata: { title: "No Content" }, + // }), + // ).rejects.toThrow(/Either file.*or content must be provided/); + // }); + + // test("Error handling - Create document with both file and content", async () => { + // await expect( + // client.documents.create({ + // file: { + // path: "examples/data/raskolnikov.txt", + // name: "raskolnikov.txt", + // }, + // content: "Test content", + // metadata: { title: "Both File and Content" }, + // }), + // ).rejects.toThrow(/Cannot provide both file.*and content/); + // }); + + // test("Delete Raskolnikov.txt", async () => { + // const response = await client.documents.delete("f9f61fc8-079c-52d0-910a-c657958e385b"); + + // expect(response.results).toBeDefined(); + // }); + + // test("Delete untitled document", async () => { + // const response = await client.documents.delete("5556836e-a51c-57c7-916a-de76c79df2b6"); + + // expect(response.results).toBeDefined(); + // }); +}); diff --git a/js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts b/js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..e8280e34f --- /dev/null +++ b/js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts @@ -0,0 +1,110 @@ +import { r2rClient } from "../src/index"; +const fs = require("fs"); +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +/** + * Raskolnikov.txt will have an id of f9f61fc8-079c-52d0-910a-c657958e385b + * The untitled document will have an id of 5556836e-a51c-57c7-916a-de76c79df2b6 + */ +describe("r2rClient V3 Documents Integration Tests", () => { + let client: r2rClient; + let documentId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.login("admin@example.com", "change_me_immediately"); + }); + + test("Create document with file path", async () => { + const response = await client.documents.create({ + file: { path: "examples/data/raskolnikov.txt", name: "raskolnikov.txt" }, + metadata: { title: "raskolnikov.txt" }, + }); + + expect(response.results.document_id).toBeDefined(); + documentId = response.results.document_id; + }); + + test("Create document with content", async () => { + const response = await client.documents.create({ + content: "This is a test document", + metadata: { title: "Test Document" }, + }); + + expect(response.results.document_id).toBeDefined(); + }); + + // test("Update document", async () => { + // const response = await client.documents.update({ + // id: documentId, + // content: "Updated content", + // metadata: { title: "Updated Test Document" }, + // }); + + // expect(response.results).toBeDefined(); + // }); + + test("Retrieve document", async () => { + const response = await client.documents.retrieve(documentId); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(documentId); + }); + + test("List documents with no parameters", async () => { + const response = await client.documents.list(); + + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }); + + test("List documents with parameters", async () => { + const response = await client.documents.list({ + offset: 0, + limit: 5, + }); + + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + expect(response.results.length).toBeLessThanOrEqual(5); + }); + + test("Error handling - Create document with no file or content", async () => { + await expect( + client.documents.create({ + metadata: { title: "No Content" }, + }), + ).rejects.toThrow(/Either file.*or content must be provided/); + }); + + test("Error handling - Create document with both file and content", async () => { + await expect( + client.documents.create({ + file: { + path: "examples/data/raskolnikov.txt", + name: "raskolnikov.txt", + }, + content: "Test content", + metadata: { title: "Both File and Content" }, + }), + ).rejects.toThrow(/Cannot provide both file.*and content/); + }); + + test("Delete Raskolnikov.txt", async () => { + const response = await client.documents.delete( + "f9f61fc8-079c-52d0-910a-c657958e385b", + ); + + expect(response.results).toBeDefined(); + }); + + test("Delete untitled document", async () => { + const response = await client.documents.delete( + "5556836e-a51c-57c7-916a-de76c79df2b6", + ); + + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/r2rClient.test.ts b/js/sdk/__tests__/r2rV2Client.test.ts similarity index 95% rename from js/sdk/__tests__/r2rClient.test.ts rename to js/sdk/__tests__/r2rV2Client.test.ts index e35511962..59213b47e 100644 --- a/js/sdk/__tests__/r2rClient.test.ts +++ b/js/sdk/__tests__/r2rV2Client.test.ts @@ -12,7 +12,7 @@ describe("R2RClient", () => { mockAxiosInstance = { post: jest.fn(), request: jest.fn(), - defaults: { baseURL: "http://0.0.0.0:7272/v2" }, + defaults: { baseURL: "http://0.0.0.0:7272" }, }; (axios.create as jest.Mock).mockReturnValue(mockAxiosInstance); @@ -23,7 +23,7 @@ describe("R2RClient", () => { describe("Mocked Tests", () => { test("should correctly set the baseURL with prefix", () => { expect((client as any).axiosInstance.defaults.baseURL).toBe( - "http://0.0.0.0:7272/v2", + "http://0.0.0.0:7272", ); }); @@ -35,7 +35,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "GET", - url: "health", + url: "/v2/health", headers: {}, responseType: "json", }); @@ -54,7 +54,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "register", + url: "/v2/register", data: JSON.stringify({ email, password }), headers: { "Content-Type": "application/json", @@ -82,7 +82,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse.results); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "login", + url: "/v2/login", data: "username=test%40example.com&password=password123", headers: { "Content-Type": "application/x-www-form-urlencoded", @@ -104,7 +104,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "verify_email", + url: "/v2/verify_email", data: JSON.stringify({ verification_code }), headers: { "Content-Type": "application/json", @@ -125,7 +125,7 @@ describe("R2RClient", () => { expect(result).toEqual({}); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "logout", + url: "/v2/logout", headers: { Authorization: "Bearer access-token", }, @@ -147,7 +147,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "GET", - url: "user", + url: "/v2/user", headers: { Authorization: "Bearer access-token", }, @@ -181,7 +181,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "PUT", - url: "user", + url: "/v2/user", data: JSON.stringify({ user_id: userId, email, @@ -224,7 +224,7 @@ describe("R2RClient", () => { expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "refresh_access_token", + url: "/v2/refresh_access_token", data: "old-refresh-token", headers: { "Content-Type": "application/x-www-form-urlencoded", @@ -251,7 +251,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "change_password", + url: "/v2/change_password", data: JSON.stringify({ current_password, new_password, @@ -275,7 +275,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "request_password_reset", + url: "/v2/request_password_reset", data: JSON.stringify({ email }), headers: { "Content-Type": "application/json", @@ -296,7 +296,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: `reset_password/${resetToken}`, + url: `/v2/reset_password/${resetToken}`, data: JSON.stringify({ new_password: newPassword }), headers: { "Content-Type": "application/json", @@ -320,7 +320,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "DELETE", - url: `user/${userId}`, + url: `/v2/user/${userId}`, data: JSON.stringify({ password }), headers: { Authorization: "Bearer access-token", @@ -357,7 +357,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "ingest_chunks", + url: "/v2/ingest_chunks", data: JSON.stringify({ chunks, document_id: documentId, @@ -396,7 +396,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "PUT", - url: `update_chunk/${documentId}/${extractionId}`, + url: `/v2/update_chunk/${documentId}/${extractionId}`, data: JSON.stringify({ text, metadata, @@ -424,7 +424,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "GET", - url: "server_stats", + url: "/v2/server_stats", headers: { Authorization: "Bearer access-token", }, @@ -448,7 +448,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "update_prompt", + url: "/v2/update_prompt", data: JSON.stringify({ name, template, @@ -478,7 +478,7 @@ describe("R2RClient", () => { expect(mockAxiosInstance.request).toHaveBeenCalledWith( expect.objectContaining({ method: "GET", - url: "analytics", + url: "/v2/analytics", params: { filter_criteria: JSON.stringify(filter_criteria), analysis_types: JSON.stringify(analysis_types), @@ -513,7 +513,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "search", + url: "/v2/search", data: JSON.stringify({ query, vector_search_settings, @@ -553,7 +553,7 @@ describe("R2RClient", () => { expect(result).toEqual(mockResponse); expect(mockAxiosInstance.request).toHaveBeenCalledWith({ method: "POST", - url: "rag", + url: "/v2/rag", data: JSON.stringify({ query, vector_search_settings, diff --git a/js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts b/js/sdk/__tests__/r2rV2ClientIntegrationSuperUser.test.ts similarity index 95% rename from js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts rename to js/sdk/__tests__/r2rV2ClientIntegrationSuperUser.test.ts index 1667adddd..a394a0b6c 100644 --- a/js/sdk/__tests__/r2rClientIntegrationSuperUser.test.ts +++ b/js/sdk/__tests__/r2rV2ClientIntegrationSuperUser.test.ts @@ -276,20 +276,20 @@ describe("r2rClient Integration Tests", () => { ); }); - test("Ensure that updated chunk has updated text", async () => { - const response = await client.documentChunks( - "bd2cbead-66e0-57bc-acea-2c34711a39b5", - ); - - const targetId = "c043aa2c-80e8-59ed-a390-54f1947ea32b"; - const updatedChunk = response.results.find( - (chunk: { chunk_id: string; text: string }) => - String(chunk.chunk_id) === targetId, - ); - - expect(updatedChunk).toBeDefined(); - expect(updatedChunk?.text).toBe("updated text"); - }); + // test("Ensure that updated chunk has updated text", async () => { + // const response = await client.documentChunks( + // "bd2cbead-66e0-57bc-acea-2c34711a39b5", + // ); + + // const targetId = "c043aa2c-80e8-59ed-a390-54f1947ea32b"; + // const updatedChunk = response.results.find( + // (chunk: { chunk_id: string; text: string }) => + // String(chunk.chunk_id) === targetId, + // ); + + // expect(updatedChunk).toBeDefined(); + // expect(updatedChunk?.text).toBe("updated text"); + // }); test("Delete the updated chunk", async () => { await expect( diff --git a/js/sdk/__tests__/r2rClientIntegrationUser.test.ts b/js/sdk/__tests__/r2rV2ClientIntegrationUser.test.ts similarity index 100% rename from js/sdk/__tests__/r2rClientIntegrationUser.test.ts rename to js/sdk/__tests__/r2rV2ClientIntegrationUser.test.ts diff --git a/js/sdk/src/baseClient.ts b/js/sdk/src/baseClient.ts new file mode 100644 index 000000000..4c01d3849 --- /dev/null +++ b/js/sdk/src/baseClient.ts @@ -0,0 +1,176 @@ +import axios, { + AxiosInstance, + Method, + AxiosResponse, + AxiosRequestConfig, +} from "axios"; +import FormData from "form-data"; + +let fs: any; +if (typeof window === "undefined") { + import("fs").then((module) => { + fs = module; + }); +} + +function handleRequestError(response: AxiosResponse): void { + if (response.status < 400) { + return; + } + + let message: string; + const errorContent = response.data; + + if ( + typeof errorContent === "object" && + errorContent !== null && + "detail" in errorContent + ) { + const { detail } = errorContent; + if (typeof detail === "object" && detail !== null) { + message = (detail as { message?: string }).message || response.statusText; + } else { + message = String(detail); + } + } else { + message = String(errorContent); + } + + throw new Error(`Status ${response.status}: ${message}`); +} + +export abstract class BaseClient { + protected axiosInstance: AxiosInstance; + protected baseUrl: string; + protected accessToken: string | null; + protected refreshToken: string | null; + protected anonymousTelemetry: boolean; + + constructor(baseURL: string, prefix: string = "", anonymousTelemetry = true) { + this.baseUrl = `${baseURL}${prefix}`; + this.accessToken = null; + this.refreshToken = null; + this.anonymousTelemetry = anonymousTelemetry; + + this.axiosInstance = axios.create({ + baseURL: this.baseUrl, + headers: { + "Content-Type": "application/json", + }, + }); + } + + protected async _makeRequest( + method: Method, + endpoint: string, + options: any = {}, + version: "v2" | "v3" = "v2", + ): Promise { + const url = `/${version}/${endpoint}`; + const config: AxiosRequestConfig = { + method, + url, + headers: { ...options.headers }, + params: options.params, + ...options, + responseType: options.responseType || "json", + }; + + config.headers = config.headers || {}; + + if (options.params) { + config.paramsSerializer = (params) => { + return Object.entries(params) + .map(([key, value]) => { + if (Array.isArray(value)) { + return value + .map( + (v) => `${encodeURIComponent(key)}=${encodeURIComponent(v)}`, + ) + .join("&"); + } + return `${encodeURIComponent(key)}=${encodeURIComponent(String(value))}`; + }) + .join("&"); + }; + } + + if (options.data) { + if (typeof FormData !== "undefined" && options.data instanceof FormData) { + config.data = options.data; + delete config.headers["Content-Type"]; + } else if (typeof options.data === "object") { + if ( + config.headers["Content-Type"] === "application/x-www-form-urlencoded" + ) { + config.data = Object.keys(options.data) + .map( + (key) => + `${encodeURIComponent(key)}=${encodeURIComponent(options.data[key])}`, + ) + .join("&"); + } else { + config.data = JSON.stringify(options.data); + if (method !== "DELETE") { + config.headers["Content-Type"] = "application/json"; + } else { + config.headers["Content-Type"] = "application/json"; + config.data = JSON.stringify(options.data); + } + } + } else { + config.data = options.data; + } + } + + if ( + this.accessToken && + !["register", "login", "verify_email", "health"].includes(endpoint) + ) { + config.headers.Authorization = `Bearer ${this.accessToken}`; + } + + if (options.responseType === "stream") { + const fetchHeaders: Record = {}; + Object.entries(config.headers).forEach(([key, value]) => { + if (typeof value === "string") { + fetchHeaders[key] = value; + } + }); + const response = await fetch(`${this.baseUrl}/${version}/${endpoint}`, { + method, + headers: fetchHeaders, + body: config.data, + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + return response.body as unknown as T; + } + + try { + const response = await this.axiosInstance.request(config); + return options.returnFullResponse + ? (response as any as T) + : response.data; + } catch (error) { + if (axios.isAxiosError(error) && error.response) { + handleRequestError(error.response); + } + throw error; + } + } + + protected _ensureAuthenticated(): void { + // if (!this.accessToken) { + // throw new Error("Not authenticated. Please login first."); + // } + } + + setTokens(accessToken: string, refreshToken: string): void { + this.accessToken = accessToken; + this.refreshToken = refreshToken; + } +} diff --git a/js/sdk/src/r2rClient.ts b/js/sdk/src/r2rClient.ts index d8f6b6fd0..82d3cc8b9 100644 --- a/js/sdk/src/r2rClient.ts +++ b/js/sdk/src/r2rClient.ts @@ -1,11 +1,11 @@ -import axios, { - AxiosInstance, - Method, - AxiosResponse, - AxiosRequestConfig, -} from "axios"; +import axios, { Method } from "axios"; import FormData from "form-data"; +import { BaseClient } from "./baseClient"; + +import { DocumentsClient } from "./v3/clients/documents"; +import { CollectionsClient } from "./v3/clients/collections"; + let fs: any; if (typeof window === "undefined") { import("fs").then((module) => { @@ -29,51 +29,15 @@ import { RawChunk, } from "./models"; -function handleRequestError(response: AxiosResponse): void { - if (response.status < 400) { - return; - } - - let message: string; - const errorContent = response.data; - - if ( - typeof errorContent === "object" && - errorContent !== null && - "detail" in errorContent - ) { - const { detail } = errorContent; - if (typeof detail === "object" && detail !== null) { - message = (detail as { message?: string }).message || response.statusText; - } else { - message = String(detail); - } - } else { - message = String(errorContent); - } - - throw new Error(`Status ${response.status}: ${message}`); -} - -export class r2rClient { - private axiosInstance: AxiosInstance; - private baseUrl: string; - private anonymousTelemetry: boolean; - - // Authorization tokens - private accessToken: string | null; - private refreshToken: string | null; +export class r2rClient extends BaseClient { + public readonly documents: DocumentsClient; + public readonly collections: CollectionsClient; - constructor( - baseURL: string, - prefix: string = "/v2", - anonymousTelemetry = true, - ) { - this.baseUrl = `${baseURL}${prefix}`; - this.anonymousTelemetry = anonymousTelemetry; + constructor(baseURL: string, anonymousTelemetry = true) { + super(baseURL, "", anonymousTelemetry); - this.accessToken = null; - this.refreshToken = null; + this.documents = new DocumentsClient(this); + this.collections = new CollectionsClient(this); initializeTelemetry(this.anonymousTelemetry); @@ -108,117 +72,17 @@ export class r2rClient { }); } - setTokens(accessToken: string, refreshToken: string): void { - this.accessToken = accessToken; - this.refreshToken = refreshToken; - } - - private async _makeRequest( + public makeRequest( method: Method, endpoint: string, options: any = {}, ): Promise { - const url = `${endpoint}`; - const config: AxiosRequestConfig = { - method, - url, - headers: { ...options.headers }, - params: options.params, - ...options, - responseType: options.responseType || "json", - }; - - config.headers = config.headers || {}; - - if (options.params) { - config.paramsSerializer = (params) => { - return Object.entries(params) - .map(([key, value]) => { - if (Array.isArray(value)) { - return value - .map( - (v) => `${encodeURIComponent(key)}=${encodeURIComponent(v)}`, - ) - .join("&"); - } - return `${encodeURIComponent(key)}=${encodeURIComponent(String(value))}`; - }) - .join("&"); - }; - } - - if (options.data) { - if (typeof FormData !== "undefined" && options.data instanceof FormData) { - config.data = options.data; - delete config.headers["Content-Type"]; - } else if (typeof options.data === "object") { - if ( - config.headers["Content-Type"] === "application/x-www-form-urlencoded" - ) { - config.data = Object.keys(options.data) - .map( - (key) => - `${encodeURIComponent(key)}=${encodeURIComponent(options.data[key])}`, - ) - .join("&"); - } else { - config.data = JSON.stringify(options.data); - if (method !== "DELETE") { - config.headers["Content-Type"] = "application/json"; - } else { - config.headers["Content-Type"] = "application/json"; - config.data = JSON.stringify(options.data); - } - } - } else { - config.data = options.data; - } - } - - if ( - this.accessToken && - !["register", "login", "verify_email", "health"].includes(endpoint) - ) { - config.headers.Authorization = `Bearer ${this.accessToken}`; - } - - if (options.responseType === "stream") { - const fetchHeaders: Record = {}; - Object.entries(config.headers).forEach(([key, value]) => { - if (typeof value === "string") { - fetchHeaders[key] = value; - } - }); - const response = await fetch(`${this.baseUrl}/${endpoint}`, { - method, - headers: fetchHeaders, - body: config.data, - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - return response.body as unknown as T; - } - - try { - const response = await this.axiosInstance.request(config); - return options.returnFullResponse - ? (response as any as T) - : response.data; - } catch (error) { - if (axios.isAxiosError(error) && error.response) { - handleRequestError(error.response); - } - throw error; - } + return this._makeRequest(method, endpoint, options, "v3"); } - private _ensureAuthenticated(): void { - // if (!this.accessToken) { - // throw new Error("Not authenticated. Please login first."); - // } + setTokens(accessToken: string, refreshToken: string): void { + this.accessToken = accessToken; + this.refreshToken = refreshToken; } // ----------------------------------------------------------------------------- diff --git a/js/sdk/src/v3/clients/collections.ts b/js/sdk/src/v3/clients/collections.ts new file mode 100644 index 000000000..f01e496e8 --- /dev/null +++ b/js/sdk/src/v3/clients/collections.ts @@ -0,0 +1,108 @@ +import { r2rClient } from "../../r2rClient"; + +export class CollectionsClient { + constructor(private client: r2rClient) {} + + /** + * Create a new collection. + * @param name Name of the collection + * @param description Optional description of the collection + * @returns Created collection information + */ + async create(name: string, description?: string): Promise { + const data = { + name, + ...(description && { description }), + }; + + return this.client.makeRequest("POST", "collections", { data }); + } + + // TODO: left a review comment about some inconsistency in the API. Review after that is addressed. + async list(options?: { offset?: number; limit?: number }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest("GET", "collections", { + params, + }); + } + + async retrieve(id: string): Promise { + return this.client.makeRequest("GET", `collections/${id}`); + } + + async update(id: string, name?: string, description?: string): Promise { + const data = { + ...(name && { name }), + ...(description && { description }), + }; + + return this.client.makeRequest("PATCH", `collections/${id}`, { data }); + } + + async delete(id: string): Promise { + return this.client.makeRequest("DELETE", `collections/${id}`); + } + + async list_documents( + id: string, + options?: { + offset?: number; + limit?: number; + }, + ): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest("GET", `collections/${id}/documents`, { + params, + }); + } + + async add_document(id: string, documentId: string): Promise { + return this.client.makeRequest( + "POST", + `collections/${id}/documents/${documentId}`, + ); + } + + async remove_document(id: string, documentId: string): Promise { + return this.client.makeRequest( + "DELETE", + `collections/${id}/documents/${documentId}`, + ); + } + + async list_users( + id: string, + options?: { + offset?: number; + limit?: number; + }, + ): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest("GET", `collections/${id}/users`, { + params, + }); + } + + async add_user(id: string, userId: string): Promise { + return this.client.makeRequest("POST", `collections/${id}/users/${userId}`); + } + + async remove_user(id: string, userId: string): Promise { + return this.client.makeRequest( + "DELETE", + `collections/${id}/users/${userId}`, + ); + } +} diff --git a/js/sdk/src/v3/clients/documents.ts b/js/sdk/src/v3/clients/documents.ts new file mode 100644 index 000000000..0ccb68927 --- /dev/null +++ b/js/sdk/src/v3/clients/documents.ts @@ -0,0 +1,271 @@ +import { r2rClient } from "../../r2rClient"; +import FormData from "form-data"; + +let fs: any; +if (typeof window === "undefined") { + import("fs").then((module) => { + fs = module; + }); +} + +type FileInput = string | File | { path: string; name: string }; + +export class DocumentsClient { + constructor(private client: r2rClient) {} + + async create(options: { + file?: FileInput; + content?: string; + id?: string; + metadata?: Record; + ingestionConfig?: Record; + runWithOrchestration?: boolean; + }): Promise { + if (!options.file && !options.content) { + throw new Error("Either file or content must be provided"); + } + + if (options.file && options.content) { + throw new Error("Cannot provide both file and content"); + } + + const formData = new FormData(); + const processedFiles: string[] = []; + + const processPath = async (path: FileInput): Promise => { + const appendFile = ( + file: File | NodeJS.ReadableStream, + filename: string, + ) => { + formData.append(`file`, file, filename); + processedFiles.push(filename); + }; + + if (typeof path === "string") { + if (typeof window === "undefined") { + const stat = await fs.promises.stat(path); + if (stat.isDirectory()) { + throw new Error("Directories are not supported in create()"); + } else { + appendFile(fs.createReadStream(path), path.split("/").pop() || ""); + } + } else { + console.warn( + "File or folder path provided in browser environment. This is not supported.", + ); + } + } else if (path instanceof File) { + appendFile(path, path.name); + } else if ("path" in path && "name" in path) { + if (typeof window === "undefined") { + appendFile(fs.createReadStream(path.path), path.name); + } else { + console.warn( + "File path provided in browser environment. This is not supported.", + ); + } + } + }; + + if (options.file) { + await processPath(options.file); + } + + if (options.content) { + formData.append("content", options.content); + } + if (options.id) { + formData.append("id", JSON.stringify(options.id)); + } + if (options.metadata) { + formData.append("metadata", JSON.stringify(options.metadata)); + } + if (options.ingestionConfig) { + formData.append( + "ingestion_config", + JSON.stringify(options.ingestionConfig), + ); + } + if (options.runWithOrchestration !== undefined) { + formData.append( + "run_with_orchestration", + String(options.runWithOrchestration), + ); + } + + formData.append("file_names", JSON.stringify(processedFiles)); + + return this.client.makeRequest("POST", "documents", { + data: formData, + headers: formData.getHeaders?.() ?? { + "Content-Type": "multipart/form-data", + }, + transformRequest: [ + (data: any, headers: Record) => { + delete headers["Content-Type"]; + return data; + }, + ], + }); + } + + async update(options: { + id: string; + file?: FileInput; + content?: string; + metadata?: Record; + ingestionConfig?: Record; + runWithOrchestration?: boolean; + }): Promise { + const formData = new FormData(); + const processedFiles: string[] = []; + + const processPath = async (path: FileInput): Promise => { + const appendFile = ( + file: File | NodeJS.ReadableStream, + filename: string, + ) => { + formData.append(`file`, file, filename); + processedFiles.push(filename); + }; + + if (typeof path === "string") { + if (typeof window === "undefined") { + const stat = await fs.promises.stat(path); + if (stat.isDirectory()) { + throw new Error("Directories are not supported in update()"); + } else { + appendFile(fs.createReadStream(path), path.split("/").pop() || ""); + } + } else { + console.warn( + "File path provided in browser environment. This is not supported.", + ); + } + } else if (path instanceof File) { + appendFile(path, path.name); + } else if ("path" in path && "name" in path) { + if (typeof window === "undefined") { + appendFile(fs.createReadStream(path.path), path.name); + } else { + console.warn( + "File path provided in browser environment. This is not supported.", + ); + } + } + }; + + if (options.file) { + await processPath(options.file); + } + + if (options.content) { + formData.append("content", options.content); + } + if (options.metadata) { + formData.append("metadata", JSON.stringify([options.metadata])); + } + if (options.ingestionConfig) { + formData.append( + "ingestion_config", + JSON.stringify(options.ingestionConfig), + ); + } + if (options.runWithOrchestration !== undefined) { + formData.append( + "run_with_orchestration", + String(options.runWithOrchestration), + ); + } + + formData.append("file_names", JSON.stringify(processedFiles)); + + return this.client.makeRequest("POST", `documents/${options.id}`, { + data: formData, + headers: formData.getHeaders?.() ?? { + "Content-Type": "multipart/form-data", + }, + transformRequest: [ + (data: any, headers: Record) => { + delete headers["Content-Type"]; + return data; + }, + ], + }); + } + + async retrieve(id: string): Promise { + return this.client.makeRequest("GET", `documents/${id}`); + } + + async list(options?: { + ids?: string[]; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.ids) { + params.ids = options.ids; + } + + return this.client.makeRequest("GET", "documents", { + params, + }); + } + + async download(id: string): Promise { + return this.client.makeRequest("GET", `documents/${id}/download`, { + responseType: "blob", + }); + } + + async list_chunks(options: { + id: string; + offset?: number; + limit?: number; + include_vectors?: boolean; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + include_vectors: options.include_vectors ?? false, + }; + + return this.client.makeRequest("GET", `documents/${options.id}/chunks`, { + params, + }); + } + + async list_collections(options: { + id: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `documents/${options.id}/collections`, + { + params, + }, + ); + } + + async delete_by_filter(filters: Record): Promise { + return this.client.makeRequest("DELETE", "documents/by-filter", { + data: filters, + }); + } + + async delete(id: string): Promise { + return this.client.makeRequest("DELETE", `documents/${id}`); + } +} diff --git a/py/core/main/api/v2/management_router.py b/py/core/main/api/v2/management_router.py index c6b6e671c..2dbd036f4 100644 --- a/py/core/main/api/v2/management_router.py +++ b/py/core/main/api/v2/management_router.py @@ -367,6 +367,59 @@ async def documents_overview_app( "total_entries": documents_overview_response["total_entries"] } + @self.router.get("/document_chunks/{document_id}") + @self.base_endpoint + async def document_chunks_app( + document_id: str = Path(...), + offset: Optional[int] = Query(0, ge=0), + limit: Optional[int] = Query(100, ge=0), + include_vectors: Optional[bool] = Query(False), + auth_user=Depends(self.service.providers.auth.auth_wrapper), + ) -> WrappedDocumentChunkResponse: + document_uuid = UUID(document_id) + + document_chunks = await self.service.list_document_chunks( + document_uuid, offset, limit, include_vectors + ) + + document_chunks_result = document_chunks["results"] + + if not document_chunks_result: + raise R2RException( + "No chunks found for the given document ID.", + 404, + ) + + is_owner = str(document_chunks_result[0].get("user_id")) == str( + auth_user.id + ) + document_collections = await self.service.document_collections( + document_uuid, 0, -1 + ) + + user_has_access = ( + is_owner + or set(auth_user.collection_ids).intersection( + set( + [ + ele.collection_id + for ele in document_collections["results"] + ] + ) + ) + != set() + ) + + if not user_has_access and not auth_user.is_superuser: + raise R2RException( + "Only a superuser can arbitrarily call document_chunks.", + 403, + ) + + return document_chunks_result, { # type: ignore + "total_entries": document_chunks["total_entries"] + } + @self.router.get("/list_document_chunks/{document_id}") @self.base_endpoint async def document_chunks_app( diff --git a/py/core/main/app.py b/py/core/main/app.py index 5ebe9b69e..6bb0d6a7a 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -83,11 +83,11 @@ async def r2r_exception_handler(request: Request, exc: R2RException): def _setup_routes(self): # Include routers in the app - # self.app.include_router(self.ingestion_router, prefix="/v2") - # self.app.include_router(self.management_router, prefix="/v2") - # self.app.include_router(self.retrieval_router, prefix="/v2") - # self.app.include_router(self.auth_router, prefix="/v2") - # self.app.include_router(self.kg_router, prefix="/v2") + self.app.include_router(self.ingestion_router, prefix="/v2") + self.app.include_router(self.management_router, prefix="/v2") + self.app.include_router(self.retrieval_router, prefix="/v2") + self.app.include_router(self.auth_router, prefix="/v2") + self.app.include_router(self.kg_router, prefix="/v2") self.app.include_router(self.documents_router, prefix="/v3") self.app.include_router(self.chunks_router, prefix="/v3") diff --git a/py/r2r.toml b/py/r2r.toml index 58a9ecc1e..3e0b57c02 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -20,7 +20,7 @@ provider = "litellm" concurrent_request_limit = 256 [completion.generation_config] - model = "openai/gpt-4o" + model = "azure/gpt-4o" temperature = 0.1 top_p = 1 max_tokens_to_sample = 1_024 @@ -47,25 +47,25 @@ batch_size = 256 fragment_merge_count = 4 # number of fragments to merge into a single extraction max_knowledge_triples = 100 max_description_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction + generation_config = { model = "azure/gpt-4o-mini" } # and other params, model used for triplet extraction [database.kg_entity_deduplication_settings] kg_entity_deduplication_type = "by_name" kg_entity_deduplication_prompt = "graphrag_entity_deduplication" max_description_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication + generation_config = { model = "azure/gpt-4o-mini" } # and other params, model used for deduplication [database.kg_enrichment_settings] community_reports_prompt = "graphrag_community_reports" max_summary_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering + generation_config = { model = "azure/gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = {} [database.kg_search_settings] entities_level = "document" # set to collection if you've run deduplication map_system_prompt = "graphrag_map_system" reduce_system_prompt = "graphrag_reduce_system" - generation_config = { model = "openai/gpt-4o-mini" } + generation_config = { model = "azure/gpt-4o-mini" } [embedding] provider = "litellm" @@ -96,9 +96,9 @@ chunk_size = 1_024 chunk_overlap = 512 excluded_parsers = ["mp4"] -audio_transcription_model="openai/whisper-1" -vision_img_model = "openai/gpt-4o-mini" -vision_pdf_model = "openai/gpt-4o-mini" +audio_transcription_model="azure/whisper-1" +vision_img_model = "azure/gpt-4o-mini" +vision_pdf_model = "azure/gpt-4o-mini" # vision_img_prompt_name = "vision_img" # optional, default is "vision_img" # vision_pdf_prompt_name = "vision_pdf" # optional, default is "vision_pdf" @@ -109,7 +109,7 @@ vision_pdf_model = "openai/gpt-4o-mini" backward_chunks = 3 semantic_neighbors = 10 semantic_similarity_threshold = 0.7 - generation_config = { model = "openai/gpt-4o-mini" } + generation_config = { model = "azure/gpt-4o-mini" } [ingestion.extra_parsers] pdf = "basic" diff --git a/py/shared/api/models/management/responses.py b/py/shared/api/models/management/responses.py index 8c2955fa5..7bb6fe717 100644 --- a/py/shared/api/models/management/responses.py +++ b/py/shared/api/models/management/responses.py @@ -105,7 +105,7 @@ class DocumentOverviewResponse(BaseModel): class DocumentChunkResponse(BaseModel): - chunk_id: UUID + id: UUID document_id: UUID user_id: UUID collection_ids: list[UUID]