Skip to content

Commit

Permalink
Read OAI Ft Dataset Method (#245)
Browse files Browse the repository at this point in the history
Useful for a chat loader
  • Loading branch information
hinthornw authored Oct 6, 2023
1 parent 0ade270 commit 77081a8
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 6 deletions.
2 changes: 1 addition & 1 deletion js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "langsmith",
"version": "0.0.41",
"version": "0.0.42",
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
"files": [
"dist/",
Expand Down
42 changes: 38 additions & 4 deletions js/src/client.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import * as uuid from "uuid";

import { AsyncCaller, AsyncCallerParams } from "./utils/async_caller.js";
import {
DataType,
Dataset,
Example,
ExampleCreate,
ExampleUpdate,
Feedback,
KVMap,
LangChainBaseMessage,
Run,
RunCreate,
RunUpdate,
ScoreType,
TracerSession,
TracerSessionResult,
ValueType,
DataType,
LangChainBaseMessage,
} from "./schemas.js";
import {
convertLangChainMessageToExample,
isLangChainMessage,
} from "./utils/messages.js";
import { getEnvironmentVariable, getRuntimeEnvironment } from "./utils/env.js";

import { RunEvaluator } from "./evaluation/evaluator.js";

interface ClientConfig {
Expand Down Expand Up @@ -235,10 +237,10 @@ export class Client {
return headers;
}

private async _get<T>(
private async _getResponse(
path: string,
queryParams?: URLSearchParams
): Promise<T> {
): Promise<Response> {
const paramsString = queryParams?.toString() ?? "";
const url = `${this.apiUrl}${path}?${paramsString}`;
const response = await this.caller.call(fetch, url, {
Expand All @@ -251,6 +253,14 @@ export class Client {
`Failed to fetch ${path}: ${response.status} ${response.statusText}`
);
}
return response;
}

private async _get<T>(
path: string,
queryParams?: URLSearchParams
): Promise<T> {
const response = await this._getResponse(path, queryParams);
return response.json() as T;
}
private async *_getPaginated<T>(
Expand Down Expand Up @@ -860,6 +870,30 @@ export class Client {
return result;
}

public async readDatasetOpenaiFinetuning({
datasetId,
datasetName,
}: {
datasetId?: string;
datasetName?: string;
}): Promise<any[]> {
let path = "/datasets";
if (datasetId !== undefined) {
// do nothing
} else if (datasetName !== undefined) {
datasetId = (await this.readDataset({ datasetName })).id;
} else {
throw new Error("Must provide datasetName or datasetId");
}
const response = await this._getResponse(`${path}/${datasetId}/openai_ft`);
const datasetText = await response.text();
const dataset = datasetText
.trim()
.split("\n")
.map((line: string) => JSON.parse(line));
return dataset;
}

public async *listDatasets({
limit = 100,
offset = 0,
Expand Down
31 changes: 31 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,37 @@ def read_dataset(
return ls_schemas.Dataset(**result[0], _host_url=self._host_url)
return ls_schemas.Dataset(**result, _host_url=self._host_url)

def read_dataset_openai_finetuning(
self, dataset_id: Optional[str] = None, *, dataset_name: Optional[str] = None
) -> list:
"""
Download a dataset in OpenAI Jsonl format and load it as a list of dicts.
Parameters
----------
dataset_id : str
The ID of the dataset to download.
dataset_name : str
The name of the dataset to download.
Returns
-------
list
The dataset loaded as a list of dicts.
"""
path = "/datasets"
if dataset_id is not None:
pass
elif dataset_name is not None:
dataset_id = self.read_dataset(dataset_name=dataset_name).id
else:
raise ValueError("Must provide dataset_name or dataset_id")
response = self._get_with_retries(
f"{path}/{dataset_id}/openai_ft",
)
dataset = [json.loads(line) for line in response.text.strip().split("\n")]
return dataset

def list_datasets(
self,
*,
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langsmith"
version = "0.0.42"
version = "0.0.43"
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
authors = ["LangChain <support@langchain.dev>"]
license = "MIT"
Expand Down

0 comments on commit 77081a8

Please sign in to comment.