Skip to content

Commit

Permalink
js[minor]: Add clonePublicDataset and listSharedExamples (#937)
Browse files Browse the repository at this point in the history
  • Loading branch information
bracesproul authored Aug 22, 2024
2 parents ce8ec9e + 1651268 commit 90d7705
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 2 deletions.
2 changes: 1 addition & 1 deletion js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -276,4 +276,4 @@
},
"./package.json": "./package.json"
}
}
}
156 changes: 156 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1528,6 +1528,61 @@ export class Client {
return dataset as Dataset;
}

/**
* Get shared examples.
*
* @param {string} shareToken The share token to get examples for. A share token is the UUID (or LangSmith URL, including UUID) generated when explicitly marking an example as public.
* @param {Object} [options] Additional options for listing the examples.
* @param {string[] | undefined} [options.exampleIds] A list of example IDs to filter by.
* @returns {Promise<Example[]>} The shared examples.
*/
public async listSharedExamples(
shareToken: string,
options?: { exampleIds?: string[] }
): Promise<Example[]> {
const params: Record<string, string | string[]> = {};
if (options?.exampleIds) {
params.id = options.exampleIds;
}

const urlParams = new URLSearchParams();
Object.entries(params).forEach(([key, value]) => {
if (Array.isArray(value)) {
value.forEach((v) => urlParams.append(key, v));
} else {
urlParams.append(key, value);
}
});

const response = await this.caller.call(
fetch,
`${this.apiUrl}/public/${shareToken}/examples?${urlParams.toString()}`,
{
method: "GET",
headers: this.headers,
signal: AbortSignal.timeout(this.timeout_ms),
...this.fetchOptions,
}
);
const result = await response.json();
if (!response.ok) {
if ("detail" in result) {
throw new Error(
`Failed to list shared examples.\nStatus: ${
response.status
}\nMessage: ${result.detail.join("\n")}`
);
}
throw new Error(
`Failed to list shared examples: ${response.status} ${response.statusText}`
);
}
return result.map((example: any) => ({

Check warning on line 1580 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
...example,
_hostUrl: this.getHostUrl(),
}));
}

public async createProject({
projectName,
description = null,
Expand Down Expand Up @@ -3561,4 +3616,105 @@ export class Client {
});
return url;
}

/**
* Clone a public dataset to your own langsmith tenant.
* This operation is idempotent. If you already have a dataset with the given name,
* this function will do nothing.
* @param {string} tokenOrUrl The token of the public dataset to clone.
* @param {Object} [options] Additional options for cloning the dataset.
* @param {string} [options.sourceApiUrl] The URL of the langsmith server where the data is hosted. Defaults to the API URL of your current client.
* @param {string} [options.datasetName] The name of the dataset to create in your tenant. Defaults to the name of the public dataset.
* @returns {Promise<void>}
*/
async clonePublicDataset(
tokenOrUrl: string,
options: {
sourceApiUrl?: string;
datasetName?: string;
} = {}
): Promise<void> {
const { sourceApiUrl = this.apiUrl, datasetName } = options;
const [parsedApiUrl, tokenUuid] = this.parseTokenOrUrl(
tokenOrUrl,
sourceApiUrl
);
const sourceClient = new Client({
apiUrl: parsedApiUrl,
// Placeholder API key not needed anymore in most cases, but
// some private deployments may have API key-based rate limiting
// that would cause this to fail if we provide no value.
apiKey: "placeholder",
});

const ds = await sourceClient.readSharedDataset(tokenUuid);
const finalDatasetName = datasetName || ds.name;

try {
if (await this.hasDataset({ datasetId: finalDatasetName })) {
console.log(
`Dataset ${finalDatasetName} already exists in your tenant. Skipping.`
);
return;
}
} catch (_) {
// `.hasDataset` will throw an error if the dataset does not exist.
// no-op in that case
}

// Fetch examples first, then create the dataset
const examples = await sourceClient.listSharedExamples(tokenUuid);
const dataset = await this.createDataset(finalDatasetName, {
description: ds.description,
dataType: ds.data_type || "kv",
inputsSchema: ds.inputs_schema_definition ?? undefined,
outputsSchema: ds.outputs_schema_definition ?? undefined,
});
try {
await this.createExamples({
inputs: examples.map((e) => e.inputs),
outputs: examples.flatMap((e) => (e.outputs ? [e.outputs] : [])),
datasetId: dataset.id,
});
} catch (e) {
console.error(
`An error occurred while creating dataset ${finalDatasetName}. ` +
"You should delete it manually."
);
throw e;
}
}

private parseTokenOrUrl(
urlOrToken: string,
apiUrl: string,
numParts = 2,
kind = "dataset"
): [string, string] {
// Try parsing as UUID
try {
assertUuid(urlOrToken); // Will throw if it's not a UUID.
return [apiUrl, urlOrToken];
} catch (_) {
// no-op if it's not a uuid
}

// Parse as URL
try {
const parsedUrl = new URL(urlOrToken);
const pathParts = parsedUrl.pathname
.split("/")
.filter((part) => part !== "");

if (pathParts.length >= numParts) {
const tokenUuid = pathParts[pathParts.length - numParts];
return [apiUrl, tokenUuid];
} else {
throw new Error(`Invalid public ${kind} URL: ${urlOrToken}`);
}
} catch (error) {
throw new Error(`Invalid public ${kind} URL or token: ${urlOrToken}`);
}
}
}
2 changes: 2 additions & 0 deletions js/src/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ export interface BaseDataset {
description: string;
tenant_id: string;
data_type?: DataType;
inputs_schema_definition?: KVMap;
outputs_schema_definition?: KVMap;
}

export interface Dataset extends BaseDataset {
Expand Down
41 changes: 40 additions & 1 deletion js/src/tests/client.int.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Dataset, Run, TracerSession } from "../schemas.js";
import { Dataset, Example, Run, TracerSession } from "../schemas.js";
import {
FunctionMessage,
HumanMessage,
Expand Down Expand Up @@ -1074,3 +1074,42 @@ test("Test pull prompt include model", async () => {

await client.deletePrompt(promptName);
});

test("list shared examples can list shared examples", async () => {
const client = new Client();
const multiverseMathPublicDatasetShareToken =
"620596ee-570b-4d2b-8c8f-f828adbe5242";
const sharedExamples = await client.listSharedExamples(
multiverseMathPublicDatasetShareToken
);
expect(sharedExamples.length).toBeGreaterThan(0);
});

test("clonePublicDataset method can clone a dataset", async () => {
const client = new Client();
const datasetName = "multiverse_math_public_testing";
const multiverseMathPublicDatasetURL =
"https://smith.langchain.com/public/620596ee-570b-4d2b-8c8f-f828adbe5242/d";

try {
await client.clonePublicDataset(multiverseMathPublicDatasetURL, {
datasetName,
});

const clonedDataset = await client.hasDataset({ datasetName });
expect(clonedDataset).toBe(true);

const examples: Example[] = [];
for await (const ex of client.listExamples({ datasetName })) {
examples.push(ex);
}
expect(examples.length).toBeGreaterThan(0);
} finally {
try {
// Attempt to remove the newly created dataset if successful.
await client.deleteDataset({ datasetName });
} catch (_) {
// no-op if failure
}
}
});

0 comments on commit 90d7705

Please sign in to comment.