Skip to content

Commit

Permalink
Merge pull request #37 from sshivaditya2019/evals
Browse files Browse the repository at this point in the history
Evals
  • Loading branch information
shiv810 authored Dec 14, 2024
2 parents 49243a9 + a5d9302 commit a3814c8
Show file tree
Hide file tree
Showing 14 changed files with 495 additions and 97 deletions.
5 changes: 4 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@
"dylib",
"mobileprovision",
"icns",
"hono"
"hono",
"braintrust",
"autoevals",
"SEPERATOR"
],
"dictionaries": ["typescript", "node", "software-terms"],
"import": ["@cspell/dict-typescript/cspell-ext.json", "@cspell/dict-node/cspell-ext.json", "@cspell/dict-software-terms"],
Expand Down
2 changes: 1 addition & 1 deletion .github/knip.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const config: KnipConfig = {
ignore: ["src/types/config.ts", "**/__mocks__/**", "**/__fixtures__/**"],
ignoreExportsUsedInFile: true,
// eslint can also be safely ignored as per the docs: https://knip.dev/guides/handling-issues#eslint--jest
ignoreDependencies: ["eslint-config-prettier", "eslint-plugin-prettier", "ts-node", "hono", "cross-env"],
ignoreDependencies: ["eslint-config-prettier", "eslint-plugin-prettier", "hono", "ts-node"],
eslint: true,
};

Expand Down
47 changes: 47 additions & 0 deletions .github/workflows/evals-testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Run Braintrust Evals

on:
workflow_run:
workflows: ["Knip"]
types:
- completed

permissions: write-all

jobs:
eval:
name: Run evals
runs-on: ubuntu-latest
permissions: write-all

steps:
- uses: actions/setup-node@v4
with:
node-version: "20.10.0"

- uses: actions/checkout@master
with:
fetch-depth: 0

- name: Setup Bun
uses: oven-sh/setup-bun@v2

- name: Install toolchain
run: bun install --frozen-lockfile

- name: Run Evals
id: evals
run: bun eval
env:
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
SUPABASE_KEY: ${{ secrets.SUPABASE_KEY }}
VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
UBIQUITY_OS_APP_NAME: ${{ secrets.UBIQUITY_OS_APP_NAME }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Add Evals Report to Github Comment
if: always()
run: echo "$(cat eval-results.md)" >> $GITHUB_STEP_SUMMARY
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ junit.xml
cypress/screenshots
script.ts
.wrangler
test-dashboard.md
test-dashboard.md
eval-results.md
126 changes: 62 additions & 64 deletions CHANGELOG.md

Large diffs are not rendered by default.

Binary file modified bun.lockb
Binary file not shown.
8 changes: 4 additions & 4 deletions dist/index.js

Large diffs are not rendered by default.

25 changes: 25 additions & 0 deletions evals/data/eval-gold-responses.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"issueResponses": [
{
"scenario": "manifest.name mismatch ",
"issue": {
"body": "Manifests need to be updated so the name matches the intended name, which is the name of the repo it lives in.\n\nAny mismatch in manifest.name and the plugin repo, and we will not be able to install those plugins. The config will look like this:\n\nThis is because the worker URL contains the repo name, and we use that to match against manifest.name.",
"number": 27,
"html_url": "https://github.com/ubiquity-os/ubiquity-os-plugin-installer/issues/27/",
"question": "/ask could you please provide a summary of the issue ?"
},
"expectedResponse": "The manifest.name should match the name of the repo it lives in. This is because the worker URL contains the repo name, and we use that to match against manifest.name.",
"sender": {
"login": "sshivaditya2019",
"type": "User"
},
"repository": {
"name": "ubiquity-os-plugin-installer",
"owner": {
"login": "ubiquity-os",
"type": "Organization"
}
}
}
]
}
114 changes: 114 additions & 0 deletions evals/handlers/setup-context.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import { SupabaseClient } from "@supabase/supabase-js";
import { createAdapters } from "../../src/adapters";
import { CommentSimilaritySearchResult } from "../../src/adapters/supabase/helpers/comment";
import { IssueSimilaritySearchResult } from "../../src/adapters/supabase/helpers/issues";
import { fetchRepoLanguageStats, fetchRepoDependencies } from "../../src/handlers/ground-truths/chat-bot";
import { findGroundTruths } from "../../src/handlers/ground-truths/find-ground-truths";
import { logger } from "../../src/helpers/errors";
import { formatChatHistory } from "../../src/helpers/format-chat-history";
import { recursivelyFetchLinkedIssues } from "../../src/helpers/issue-fetching";
import { Context } from "../../src/types";
import { VoyageAIClient } from "voyageai";
import OpenAI from "openai";

const SEPERATOR = "######################################################\n";

export interface FetchContext {
rerankedText: string[];
formattedChat: string[];
groundTruths: string[];
}

export interface EvalClients {
supabase: SupabaseClient;
voyage: VoyageAIClient;
openai: OpenAI;
}

export const initAdapters = (context: Context, clients: EvalClients): Context => {

Check warning on line 28 in evals/handlers/setup-context.ts

View workflow job for this annotation

GitHub Actions / Check for formatting errors

Expected a function declaration
const adapters = createAdapters(clients.supabase, clients.voyage, clients.openai, context);
context.adapters = adapters;

// Update adapter contexts
Object.values(adapters).forEach((adapterGroup) => {
Object.values(adapterGroup).forEach((adapter) => {
if (adapter && typeof adapter === "object" && "context" in adapter) {
adapter.context = context;
}
});
});
return context;
};

export async function fetchContext(context: Context, question: string): Promise<FetchContext> {
const {
config: { similarityThreshold },
adapters: {
supabase: { comment, issue },
voyage: { reranker },
},
} = context;
const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({
context,
owner: context.payload.repository.owner.login,
repo: context.payload.repository.name,
});
let formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies);
logger.info(`${formattedChat.join("")}`);
// using db functions to find similar comments and issues
const [similarComments, similarIssues] = await Promise.all([
comment.findSimilarComments(question, 1 - similarityThreshold, ""),
issue.findSimilarIssues(question, 1 - similarityThreshold, ""),
]);
// combine the similar comments and issues into a single array
const similarText = [
...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []),
...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []),
];
// filter out any empty strings
formattedChat = formattedChat.filter((text) => text);
// rerank the similar text using voyageai
const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : [];
// gather structural data about the payload repository
const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]);
let groundTruths: string[] = [];
if (!languages.length) {
groundTruths.push("No languages found in the repository");
}
if (!Reflect.ownKeys(dependencies).length) {
groundTruths.push("No dependencies found in the repository");
}
if (!Reflect.ownKeys(devDependencies).length) {
groundTruths.push("No devDependencies found in the repository");
}
if (groundTruths.length > 3) {
groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies });
}
return {
rerankedText,
formattedChat,
groundTruths,
};
}

export function formattedHistory(fetchContext: FetchContext): string {
//Iterate through the formatted chat history and add it to the final formatted chat
let formattedChat = "#################### Chat History ####################\n";
fetchContext.formattedChat.forEach((chat) => {
formattedChat += chat;
});
formattedChat += SEPERATOR;
//Iterate through the reranked text and add it to the final formatted chat
formattedChat += "#################### Reranked Text ####################\n";
fetchContext.rerankedText.forEach((reranked) => {
formattedChat += reranked;
});
formattedChat += SEPERATOR;
//Iterate through the ground truths and add it to the final formatted chat
formattedChat += "#################### Ground Truths ####################\n";
fetchContext.groundTruths.forEach((truth) => {
formattedChat += truth;
});
formattedChat += SEPERATOR;
return formattedChat;
}
Loading

0 comments on commit a3814c8

Please sign in to comment.