From 54a7a3c3a00913e4ad423e0b46be0b7a7cbdee43 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 20 Jan 2025 20:11:50 -0800 Subject: [PATCH] fix(js): Narrow evaluator types, fix example id generation (#1438) --- js/package.json | 2 +- js/src/index.ts | 2 +- js/src/tests/jestlike/jest.test.ts | 69 +++++++++++++++++++++ js/src/utils/jestlike/globals.ts | 6 +- js/src/utils/jestlike/index.ts | 18 +++--- js/src/utils/jestlike/reporter.ts | 4 +- js/src/utils/jestlike/types.ts | 7 +++ js/src/utils/jestlike/vendor/evaluatedBy.ts | 14 +++-- 8 files changed, 99 insertions(+), 23 deletions(-) diff --git a/js/package.json b/js/package.json index 38a09af56..b943bf8f0 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "langsmith", - "version": "0.3.0", + "version": "0.3.1", "description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.", "packageManager": "yarn@1.22.19", "files": [ diff --git a/js/src/index.ts b/js/src/index.ts index ffe556a66..36ba8a594 100644 --- a/js/src/index.ts +++ b/js/src/index.ts @@ -18,4 +18,4 @@ export { RunTree, type RunTreeConfig } from "./run_trees.js"; export { overrideFetchImplementation } from "./singletons/fetch.js"; // Update using yarn bump-version -export const __version__ = "0.3.0"; +export const __version__ = "0.3.1"; diff --git a/js/src/tests/jestlike/jest.test.ts b/js/src/tests/jestlike/jest.test.ts index 94cfe7a85..958dd5e3d 100644 --- a/js/src/tests/jestlike/jest.test.ts +++ b/js/src/tests/jestlike/jest.test.ts @@ -184,3 +184,72 @@ ls.describe( }, } ); + +const scoreMarketingCopyAgent = async () => { + return { + key: "marketing_copy_score", + score: 0.5, + }; +}; + +ls.describe.only("Test Tweet", () => { + ls.test( + "should generate a tweet LS", + { + inputs: { + request: "Write a tweet about LLMs", + }, + referenceOutputs: {}, + }, + async ({ inputs: { request } }: { inputs: { request: string } }) => { + const result = request.repeat(2); + ls.logOutputs({ response: result }); + ls.logFeedback({ + key: "length", + score: result.length, + }); + ls.logFeedback({ + key: "twitter_length", + score: result.length <= 280, + }); + const wrappedEvaluator = ls.wrapEvaluator(scoreMarketingCopyAgent); + await wrappedEvaluator({ + content: result, + query_type: "tweet", + }); + } + ); +}); + +ls.describe("Test Linkedin Post", () => { + ls.test( + "should generate a linkedin post LS", + { + inputs: { + request: "Write a linkedin post about LLMs", + }, + referenceOutputs: {}, + }, + async ({ inputs: { request } }: { inputs: { request: string } }) => { + const result = request.repeat(2); + ls.logOutputs({ response: result }); + ls.logFeedback({ + key: "length", + score: result.length, + }); + ls.logFeedback({ + key: "linkedin_length", + score: result.length > 280, + }); + ls.logFeedback({ + key: "multiline", + score: result.split("\n").length > 2, + }); + const wrappedEvaluator = ls.wrapEvaluator(scoreMarketingCopyAgent); + await wrappedEvaluator({ + content: result, + query_type: "linkedin post", + }); + } + ); +}); diff --git a/js/src/utils/jestlike/globals.ts b/js/src/utils/jestlike/globals.ts index f4358c041..3a22c22eb 100644 --- a/js/src/utils/jestlike/globals.ts +++ b/js/src/utils/jestlike/globals.ts @@ -3,8 +3,8 @@ import { Dataset, TracerSession, Example } from "../../schemas.js"; import { Client, CreateProjectParams } from "../../client.js"; import { getEnvironmentVariable } from "../env.js"; import { isTracingEnabled } from "../../env.js"; -import { EvaluationResult } from "../../evaluation/evaluator.js"; import { RunTree } from "../../run_trees.js"; +import { SimpleEvaluationResult } from "./types.js"; export const DEFAULT_TEST_CLIENT = new Client(); @@ -15,7 +15,7 @@ export type TestWrapperAsyncLocalStorageData = { projectConfig?: Partial; project?: TracerSession; setLoggedOutput?: (value: Record) => void; - onFeedbackLogged?: (feedback: EvaluationResult) => void; + onFeedbackLogged?: (feedback: SimpleEvaluationResult) => void; currentExample?: Partial & { syncPromise?: Promise }; client: Client; suiteUuid: string; @@ -40,7 +40,7 @@ export const syncExamplePromises = new Map(); export function _logTestFeedback(params: { exampleId?: string; - feedback: EvaluationResult; + feedback: SimpleEvaluationResult; context: TestWrapperAsyncLocalStorageData; runTree?: RunTree; client: Client; diff --git a/js/src/utils/jestlike/index.ts b/js/src/utils/jestlike/index.ts index 6109e60a9..e78f44337 100644 --- a/js/src/utils/jestlike/index.ts +++ b/js/src/utils/jestlike/index.ts @@ -28,7 +28,7 @@ import { DEFAULT_TEST_CLIENT, } from "./globals.js"; import { wrapExpect } from "./vendor/chain.js"; -import { EvaluationResult } from "../../evaluation/evaluator.js"; +import { SimpleEvaluationResult } from "./types.js"; import type { LangSmithJestlikeWrapperConfig, LangSmithJestlikeWrapperParams, @@ -45,7 +45,7 @@ export const STRIP_ANSI_REGEX = export const TEST_ID_DELIMITER = ", test_id="; export function logFeedback( - feedback: EvaluationResult, + feedback: SimpleEvaluationResult, config?: { sourceRunId?: string } ) { const context = testWrapperAsyncLocalStorageInstance.getStore(); @@ -166,12 +166,12 @@ export function generateWrapperFromJestlikeMethods( const datasetSetupInfo = new Map(); function getExampleId( - datasetName: string, + datasetId: string, inputs: Record, outputs?: Record ) { const identifier = JSON.stringify({ - datasetName, + datasetId, inputsHash: objectHash(inputs), outputsHash: objectHash(outputs ?? {}), }); @@ -454,8 +454,8 @@ export function generateWrapperFromJestlikeMethods( datasetSetupInfo.get(context.suiteUuid); const testInput: I = inputs; const testOutput: O = referenceOutputs; - const testFeedback: EvaluationResult[] = []; - const onFeedbackLogged = (feedback: EvaluationResult) => + const testFeedback: SimpleEvaluationResult[] = []; + const onFeedbackLogged = (feedback: SimpleEvaluationResult) => testFeedback.push(feedback); let loggedOutput: Record | undefined; const setLoggedOutput = (value: Record) => { @@ -543,11 +543,7 @@ export function generateWrapperFromJestlikeMethods( )} while syncing to LangSmith. Please contact us for help.` ); } - exampleId = getExampleId( - dataset.name, - inputs, - referenceOutputs - ); + exampleId = getExampleId(dataset.id, inputs, referenceOutputs); // TODO: Create or update the example in the background // Currently run end time has to be after example modified time diff --git a/js/src/utils/jestlike/reporter.ts b/js/src/utils/jestlike/reporter.ts index d1f7c3e8a..d359d24a1 100644 --- a/js/src/utils/jestlike/reporter.ts +++ b/js/src/utils/jestlike/reporter.ts @@ -4,7 +4,7 @@ import chalk from "chalk"; import * as os from "node:os"; import * as path from "node:path"; import * as fs from "node:fs/promises"; -import { EvaluationResult } from "../../evaluation/evaluator.js"; +import { SimpleEvaluationResult } from "./types.js"; import { ScoreType } from "../../schemas.js"; import { STRIP_ANSI_REGEX, TEST_ID_DELIMITER } from "./index.js"; @@ -131,7 +131,7 @@ export async function printReporterTable( continue; } const feedback = fileContent.feedback.reduce( - (acc: Record, current: EvaluationResult) => { + (acc: Record, current: SimpleEvaluationResult) => { if ( !RESERVED_KEYS.includes(current.key) && current.score !== undefined diff --git a/js/src/utils/jestlike/types.ts b/js/src/utils/jestlike/types.ts index 597f954f7..4e96781b5 100644 --- a/js/src/utils/jestlike/types.ts +++ b/js/src/utils/jestlike/types.ts @@ -1,3 +1,4 @@ +import { EvaluationResult } from "../../evaluation/evaluator.js"; import type { RunTreeConfig } from "../../run_trees.js"; import type { SimpleEvaluator } from "./vendor/evaluatedBy.js"; @@ -21,3 +22,9 @@ export type LangSmithJestDescribeWrapper = ( fn: () => void | Promise, config?: Partial ) => void; + +export type SimpleEvaluationResult = { + key: EvaluationResult["key"]; + score: NonNullable; + comment?: EvaluationResult["comment"]; +}; diff --git a/js/src/utils/jestlike/vendor/evaluatedBy.ts b/js/src/utils/jestlike/vendor/evaluatedBy.ts index cb92e1d2e..3e9654fca 100644 --- a/js/src/utils/jestlike/vendor/evaluatedBy.ts +++ b/js/src/utils/jestlike/vendor/evaluatedBy.ts @@ -5,7 +5,7 @@ import { trackingEnabled, } from "../globals.js"; -import { EvaluationResult } from "../../../evaluation/evaluator.js"; +import { SimpleEvaluationResult } from "../types.js"; import { RunTree, RunTreeConfig } from "../../../run_trees.js"; import { v4 } from "uuid"; @@ -17,9 +17,9 @@ export type SimpleEvaluatorParams = { export type SimpleEvaluator = ( params: SimpleEvaluatorParams -) => EvaluationResult | Promise; +) => SimpleEvaluationResult | Promise; -function isEvaluationResult(x: unknown): x is EvaluationResult { +function isEvaluationResult(x: unknown): x is SimpleEvaluationResult { return ( x != null && typeof x === "object" && @@ -29,11 +29,15 @@ function isEvaluationResult(x: unknown): x is EvaluationResult { ); } -export function wrapEvaluator(evaluator: (input: I) => O | Promise) { +export function wrapEvaluator( + evaluator: ( + input: I + ) => SimpleEvaluationResult | Promise +) { return async ( input: I, config?: Partial & { runId?: string } - ): Promise => { + ): Promise => { const context = testWrapperAsyncLocalStorageInstance.getStore(); if (context === undefined || context.currentExample === undefined) { throw new Error(