Add RunEvaluator to Py and JS Libraries (#4)

See: langchain-ai/langchain#5618 for example LangChain implementations I think we'll want some simple non-langchain completion function evaluators if we want this core interface to be of much use outside the OSS project. I also don't think I want to land the StringEvaluator class but put it up as an example of one approach (that contrasts with the ones above) Co-authored-by: Nuno Campos <nuno@boringbits.io>
langchain-ai · Jun 2, 2023 · 0159529 · 0159529
1 parent c671c5a
commit 0159529
Show file tree

Hide file tree

Showing 24 changed files with 672 additions and 111 deletions.
diff --git a/.github/workflows/js_test.yml b/.github/workflows/js_test.yml
@@ -6,7 +6,12 @@ name: JS Client CI
 on:
   push:
     branches: ["main"]
+    paths:
+      - "js/**"
   pull_request:
+    paths:
+      - "js/**"
+  workflow_dispatch:
 
 jobs:
   format:

diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml
@@ -2,8 +2,12 @@ name: Python CI
 
 on:
   push:
-    branches: [master]
+    branches: [main]
+    paths:
+      - "python/**"
   pull_request:
+    paths:
+      - "python/**"
   workflow_dispatch:
 
 env:

diff --git a/README.md b/README.md
@@ -3,11 +3,24 @@
 This repository contains the Python and Javascript clients for interacting with the [LangChainPlus platform](https://www.langchain.plus/).
 
 LangChainPlus helps you and your team develop and evaluate language models and intelligent agents. It works
-with any LLM Application, including a seamless integration with the [LangChain Python](https://github.com/hwchase17/langchain) and [LangChainJS](https://github.com/hwchase17/langchainjs) open source libraries.
+with any LLM Application, including a seamless integration with the [LangChain Python](https://github.com/hwchase17/langchain) and [LangChain JS](https://github.com/hwchase17/langchainjs) open source libraries.
 
 LangChainPlus is developed and maintained by [LangChain](https://langchain.com/), the company behind the LangChain framework.
 
+## Quick Start
+
+To get started with the Python SDK, [install the package](https://pypi.org/project/langchainplus-sdk/), then follow the instructions in the [Python README](python/README.md).
+
+```bash
+pip install langchainplus-sdk
+```
+
+To get started with the JavaScript / TypeScript SDK, [install the package](https://www.npmjs.com/package/langchainplus-sdk), then follow the insructions in the [JS README](js/README.md).
+
+```bash
+yarn add langchainplus-sdk
+```
 
 ## Documentation
 
-To learn more about the LangChainPlus platform, check out the [docs](https://docs.langchain.plus/docs/)
+To learn more about the LangChainPlus platform, check out the [docs](https://docs.langchain.plus/docs/)
diff --git a/js/.gitignore b/js/.gitignore
@@ -37,6 +37,15 @@ Chinook_Sqlite.sql
 /run_trees.cjs
 /run_trees.js
 /run_trees.d.ts
+/evaluation.cjs
+/evaluation.js
+/evaluation.d.ts
+/evaluation/evaluator.cjs
+/evaluation/evaluator.js
+/evaluation/evaluator.d.ts
+/evaluation/string_evaluator.cjs
+/evaluation/string_evaluator.js
+/evaluation/string_evaluator.d.ts
 /index.cjs
 /index.js
 /index.d.ts

diff --git a/js/README.md b/js/README.md
@@ -32,7 +32,7 @@ Note: Save the API Key in a secure location. It will not be shown again.
 
 You can log traces natively in your LangChain application or using a LangChainPlus RunTree.
 
-#### Logging Traces with LangChain
+### Logging Traces with LangChain
 
 LangChainPlus seamlessly integrates with the JavaScript LangChain library to record traces from your LLM applications.
 
@@ -67,7 +67,7 @@ const response = await chat.predict(
 console.log(response);
 ```
 
-#### Logging Traces Outside LangChain
+### Logging Traces Outside LangChain
 
 _Note: this API is experimental and may change in the future_
 
@@ -82,6 +82,7 @@ process.env["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"; // or your own
 process.env["LANGCHAIN_API_KEY"] = "<YOUR-LANGCHAINPLUS-API-KEY>";
 // process.env["LANGCHAIN_SESSION"] = "My Session Name"; // Optional: "default" is used if not set
 ```
+
 2. **Log traces using a RunTree.**
 
 A RunTree tracks your application. Each RunTree object is required to have a name and run_type. These and other important attributes are as follows:
@@ -93,85 +94,82 @@ A RunTree tracks your application. Each RunTree object is required to have a nam
 - `error`: `Optional<string>` - Any error messages that may have arisen during the call
 
 ```typescript
-import {
-    RunTree,
-    RunTreeConfig
-} from 'langchainplus-sdk';
+import { RunTree, RunTreeConfig } from "langchainplus-sdk";
 
 const parentRunConfig: RunTreeConfig = {
-    name: "My Chat Bot",
-    run_type: "chain",
-    inputs: {
-        text: "Summarize this morning's meetings."
-    },
-    serialized: {}, // Serialized representation of this chain
-    // session_name: "Defaults to the LANGCHAIN_SESSION env var"
-    // apiUrl: "Defaults to the LANGCHAIN_ENDPOINT env var"
-    // apiKey: "Defaults to the LANGCHAIN_API_KEY env var"
+  name: "My Chat Bot",
+  run_type: "chain",
+  inputs: {
+    text: "Summarize this morning's meetings.",
+  },
+  serialized: {}, // Serialized representation of this chain
+  // session_name: "Defaults to the LANGCHAIN_SESSION env var"
+  // apiUrl: "Defaults to the LANGCHAIN_ENDPOINT env var"
+  // apiKey: "Defaults to the LANGCHAIN_API_KEY env var"
 };
 
 const parentRun = new RunTree(parentRunConfig);
 
 const childLlmRun = await parentRun.createChild({
-    name: "My Proprietary LLM",
-    run_type: "llm",
-    inputs: {
-        "prompts": [
-            "You are an AI Assistant. The time is XYZ." +
-            " Summarize this morning's meetings."
-        ]
-    },
+  name: "My Proprietary LLM",
+  run_type: "llm",
+  inputs: {
+    prompts: [
+      "You are an AI Assistant. The time is XYZ." +
+        " Summarize this morning's meetings.",
+    ],
+  },
 });
 
 await childLlmRun.end({
-    outputs: {
-        "generations": [
-            "I should use the transcript_loader tool" +
-            " to fetch meeting_transcripts from XYZ"
-        ]
-    }
+  outputs: {
+    generations: [
+      "I should use the transcript_loader tool" +
+        " to fetch meeting_transcripts from XYZ",
+    ],
+  },
 });
 
 const childToolRun = await parentRun.createChild({
-    name: "transcript_loader",
-    run_type: "tool",
-    inputs: {
-        date: "XYZ",
-        content_type: "meeting_transcripts"
-    },
+  name: "transcript_loader",
+  run_type: "tool",
+  inputs: {
+    date: "XYZ",
+    content_type: "meeting_transcripts",
+  },
 });
 
 await childToolRun.end({
-    outputs: {
-        meetings: ["Meeting1 notes.."]
-    }
+  outputs: {
+    meetings: ["Meeting1 notes.."],
+  },
 });
 
 const childChainRun = await parentRun.createChild({
-    name: "Unreliable Component",
-    run_type: "tool",
-    inputs: {
-        input: "Summarize these notes..."
-    },
+  name: "Unreliable Component",
+  run_type: "tool",
+  inputs: {
+    input: "Summarize these notes...",
+  },
 });
 
 try {
-    // .... the component does work
-    throw new Error("Something went wrong");
+  // .... the component does work
+  throw new Error("Something went wrong");
 } catch (e) {
-    await childChainRun.end({
-        error: `I errored again ${e.message}`
-    });
+  await childChainRun.end({
+    error: `I errored again ${e.message}`,
+  });
 }
 
 await parentRun.end({
-    outputs: {
-        output: ["The meeting notes are as follows:..."]
-    }
+  outputs: {
+    output: ["The meeting notes are as follows:..."],
+  },
 });
 
 await parentRun.postRun({
-    exclude_child_runs: false
+  exclude_child_runs: false,
 });
 ```
 
@@ -182,16 +180,15 @@ For this example, we will do so using the Client, but you can also do this using
 the web interface, as explained in the [LangChainPlus docs](https://docs.langchain.plus/docs/).
 
 ```typescript
-import { LangChainPlusClient } from 'langchainplus-sdk/client';
+import { LangChainPlusClient } from "langchainplus-sdk/client";
 const client = new LangChainPlusClient({
-    // apiUrl: "https://api.langchain.com", // Defaults to the LANGCHAIN_ENDPOINT env var
-    // apiKey: "my_api_key", // Defaults to the LANGCHAIN_API_KEY env var
-    /* callerOptions: {
+  // apiUrl: "https://api.langchain.com", // Defaults to the LANGCHAIN_ENDPOINT env var
+  // apiKey: "my_api_key", // Defaults to the LANGCHAIN_API_KEY env var
+  /* callerOptions: {
          maxConcurrency?: Infinity; // Maximum number of concurrent requests to make
          maxRetries?: 6; // Maximum number of retries to make
     */
-    }, 
-);
+});
 const datasetName = "Example Dataset";
 // We will only use examples from the top level AgentExecutor run here,
 // and exclude runs that errored.
@@ -212,6 +209,58 @@ for (const run of runs) {
 }
 ```
 
+# Evaluating Runs
+
+You can run evaluations directly using the LangChainPlus client.
+
+```ts
+import { StringEvaluator } from "../evaluation/string_evaluator.js";
+
+function jaccardChars(output: string, answer: string): number {
+  const predictionChars = new Set(output.trim().toLowerCase());
+  const answerChars = new Set(answer.trim().toLowerCase());
+  const intersection = [...predictionChars].filter((x) => answerChars.has(x));
+  const union = new Set([...predictionChars, ...answerChars]);
+  return intersection.length / union.size;
+}
+
+async function grader(config: {
+  input: string;
+  prediction: string;
+  answer?: string;
+}): Promise<{ score: number; value: string }> {
+  let value: string;
+  let score: number;
+  if (config.answer === null || config.answer === undefined) {
+    value = "AMBIGUOUS";
+    score = 0.5;
+  } else {
+    score = jaccardChars(config.prediction, config.answer);
+    value = score > 0.9 ? "CORRECT" : "INCORRECT";
+  }
+  return { score: score, value: value };
+}
+
+const evaluator = new StringEvaluator({
+  evaluationName: "Jaccard",
+  gradingFunction: grader,
+});
+
+const runs = await client.listRuns({
+  sessionName: "my_session",
+  executionOrder: 1,
+  error: false,
+});
+
+for (const run of runs) {
+  client.evaluateRun(run, evaluator);
+}
+```
+
 ## Additional Documentation
 
 To learn more about the LangChainPlus platform, check out the [docs](https://docs.langchain.plus/docs/).
+
+```
+
+```
diff --git a/js/package.json b/js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "langchainplus-sdk",
-  "version": "0.0.1-rc.5",
+  "version": "0.0.2",
   "description": "Client library to connect to the LangChainPlus LLM Tracing and Evaluation Platform.",
   "files": [
     "dist/",
@@ -10,6 +10,15 @@
     "run_trees.cjs",
     "run_trees.js",
     "run_trees.d.ts",
+    "evaluation.cjs",
+    "evaluation.js",
+    "evaluation.d.ts",
+    "evaluation/evaluator.cjs",
+    "evaluation/evaluator.js",
+    "evaluation/evaluator.d.ts",
+    "evaluation/string_evaluator.cjs",
+    "evaluation/string_evaluator.js",
+    "evaluation/string_evaluator.d.ts",
     "index.cjs",
     "index.js",
     "index.d.ts"
@@ -96,6 +105,21 @@
       "import": "./run_trees.js",
       "require": "./run_trees.cjs"
     },
+    "./evaluation": {
+      "types": "./evaluation.d.ts",
+      "import": "./evaluation.js",
+      "require": "./evaluation.cjs"
+    },
+    "./evaluation/evaluator": {
+      "types": "./evaluation/evaluator.d.ts",
+      "import": "./evaluation/evaluator.js",
+      "require": "./evaluation/evaluator.cjs"
+    },
+    "./evaluation/string_evaluator": {
+      "types": "./evaluation/string_evaluator.d.ts",
+      "import": "./evaluation/string_evaluator.js",
+      "require": "./evaluation/string_evaluator.cjs"
+    },
     "./package.json": "./package.json"
   }
-}
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,7 +6,12 @@ name: JS Client CI @@
     on:
       push:
         branches: ["main"]
+        paths:
+          - "js/**"
       pull_request:
+        paths:
+          - "js/**"
+      workflow_dispatch:
     jobs:
       format:
@@ Expand Down @@