generated from ubiquity-os/plugin-template
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #37 from sshivaditya2019/evals
Evals
- Loading branch information
Showing
14 changed files
with
495 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
name: Run Braintrust Evals | ||
|
||
on: | ||
workflow_run: | ||
workflows: ["Knip"] | ||
types: | ||
- completed | ||
|
||
permissions: write-all | ||
|
||
jobs: | ||
eval: | ||
name: Run evals | ||
runs-on: ubuntu-latest | ||
permissions: write-all | ||
|
||
steps: | ||
- uses: actions/setup-node@v4 | ||
with: | ||
node-version: "20.10.0" | ||
|
||
- uses: actions/checkout@master | ||
with: | ||
fetch-depth: 0 | ||
|
||
- name: Setup Bun | ||
uses: oven-sh/setup-bun@v2 | ||
|
||
- name: Install toolchain | ||
run: bun install --frozen-lockfile | ||
|
||
- name: Run Evals | ||
id: evals | ||
run: bun eval | ||
env: | ||
SUPABASE_URL: ${{ secrets.SUPABASE_URL }} | ||
SUPABASE_KEY: ${{ secrets.SUPABASE_KEY }} | ||
VOYAGEAI_API_KEY: ${{ secrets.VOYAGEAI_API_KEY }} | ||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | ||
UBIQUITY_OS_APP_NAME: ${{ secrets.UBIQUITY_OS_APP_NAME }} | ||
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Add Evals Report to Github Comment | ||
if: always() | ||
run: echo "$(cat eval-results.md)" >> $GITHUB_STEP_SUMMARY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,4 +15,5 @@ junit.xml | |
cypress/screenshots | ||
script.ts | ||
.wrangler | ||
test-dashboard.md | ||
test-dashboard.md | ||
eval-results.md |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"issueResponses": [ | ||
{ | ||
"scenario": "manifest.name mismatch ", | ||
"issue": { | ||
"body": "Manifests need to be updated so the name matches the intended name, which is the name of the repo it lives in.\n\nAny mismatch in manifest.name and the plugin repo, and we will not be able to install those plugins. The config will look like this:\n\nThis is because the worker URL contains the repo name, and we use that to match against manifest.name.", | ||
"number": 27, | ||
"html_url": "https://github.com/ubiquity-os/ubiquity-os-plugin-installer/issues/27/", | ||
"question": "/ask could you please provide a summary of the issue ?" | ||
}, | ||
"expectedResponse": "The manifest.name should match the name of the repo it lives in. This is because the worker URL contains the repo name, and we use that to match against manifest.name.", | ||
"sender": { | ||
"login": "sshivaditya2019", | ||
"type": "User" | ||
}, | ||
"repository": { | ||
"name": "ubiquity-os-plugin-installer", | ||
"owner": { | ||
"login": "ubiquity-os", | ||
"type": "Organization" | ||
} | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import { SupabaseClient } from "@supabase/supabase-js"; | ||
import { createAdapters } from "../../src/adapters"; | ||
import { CommentSimilaritySearchResult } from "../../src/adapters/supabase/helpers/comment"; | ||
import { IssueSimilaritySearchResult } from "../../src/adapters/supabase/helpers/issues"; | ||
import { fetchRepoLanguageStats, fetchRepoDependencies } from "../../src/handlers/ground-truths/chat-bot"; | ||
import { findGroundTruths } from "../../src/handlers/ground-truths/find-ground-truths"; | ||
import { logger } from "../../src/helpers/errors"; | ||
import { formatChatHistory } from "../../src/helpers/format-chat-history"; | ||
import { recursivelyFetchLinkedIssues } from "../../src/helpers/issue-fetching"; | ||
import { Context } from "../../src/types"; | ||
import { VoyageAIClient } from "voyageai"; | ||
import OpenAI from "openai"; | ||
|
||
const SEPERATOR = "######################################################\n"; | ||
|
||
export interface FetchContext { | ||
rerankedText: string[]; | ||
formattedChat: string[]; | ||
groundTruths: string[]; | ||
} | ||
|
||
export interface EvalClients { | ||
supabase: SupabaseClient; | ||
voyage: VoyageAIClient; | ||
openai: OpenAI; | ||
} | ||
|
||
export const initAdapters = (context: Context, clients: EvalClients): Context => { | ||
const adapters = createAdapters(clients.supabase, clients.voyage, clients.openai, context); | ||
context.adapters = adapters; | ||
|
||
// Update adapter contexts | ||
Object.values(adapters).forEach((adapterGroup) => { | ||
Object.values(adapterGroup).forEach((adapter) => { | ||
if (adapter && typeof adapter === "object" && "context" in adapter) { | ||
adapter.context = context; | ||
} | ||
}); | ||
}); | ||
return context; | ||
}; | ||
|
||
export async function fetchContext(context: Context, question: string): Promise<FetchContext> { | ||
const { | ||
config: { similarityThreshold }, | ||
adapters: { | ||
supabase: { comment, issue }, | ||
voyage: { reranker }, | ||
}, | ||
} = context; | ||
const { specAndBodies, streamlinedComments } = await recursivelyFetchLinkedIssues({ | ||
context, | ||
owner: context.payload.repository.owner.login, | ||
repo: context.payload.repository.name, | ||
}); | ||
let formattedChat = await formatChatHistory(context, streamlinedComments, specAndBodies); | ||
logger.info(`${formattedChat.join("")}`); | ||
// using db functions to find similar comments and issues | ||
const [similarComments, similarIssues] = await Promise.all([ | ||
comment.findSimilarComments(question, 1 - similarityThreshold, ""), | ||
issue.findSimilarIssues(question, 1 - similarityThreshold, ""), | ||
]); | ||
// combine the similar comments and issues into a single array | ||
const similarText = [ | ||
...(similarComments?.map((comment: CommentSimilaritySearchResult) => comment.comment_plaintext) || []), | ||
...(similarIssues?.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext) || []), | ||
]; | ||
// filter out any empty strings | ||
formattedChat = formattedChat.filter((text) => text); | ||
// rerank the similar text using voyageai | ||
const rerankedText = similarText.length > 0 ? await reranker.reRankResults(similarText, question) : []; | ||
// gather structural data about the payload repository | ||
const [languages, { dependencies, devDependencies }] = await Promise.all([fetchRepoLanguageStats(context), fetchRepoDependencies(context)]); | ||
let groundTruths: string[] = []; | ||
if (!languages.length) { | ||
groundTruths.push("No languages found in the repository"); | ||
} | ||
if (!Reflect.ownKeys(dependencies).length) { | ||
groundTruths.push("No dependencies found in the repository"); | ||
} | ||
if (!Reflect.ownKeys(devDependencies).length) { | ||
groundTruths.push("No devDependencies found in the repository"); | ||
} | ||
if (groundTruths.length > 3) { | ||
groundTruths = await findGroundTruths(context, "chat-bot", { languages, dependencies, devDependencies }); | ||
} | ||
return { | ||
rerankedText, | ||
formattedChat, | ||
groundTruths, | ||
}; | ||
} | ||
|
||
export function formattedHistory(fetchContext: FetchContext): string { | ||
//Iterate through the formatted chat history and add it to the final formatted chat | ||
let formattedChat = "#################### Chat History ####################\n"; | ||
fetchContext.formattedChat.forEach((chat) => { | ||
formattedChat += chat; | ||
}); | ||
formattedChat += SEPERATOR; | ||
//Iterate through the reranked text and add it to the final formatted chat | ||
formattedChat += "#################### Reranked Text ####################\n"; | ||
fetchContext.rerankedText.forEach((reranked) => { | ||
formattedChat += reranked; | ||
}); | ||
formattedChat += SEPERATOR; | ||
//Iterate through the ground truths and add it to the final formatted chat | ||
formattedChat += "#################### Ground Truths ####################\n"; | ||
fetchContext.groundTruths.forEach((truth) => { | ||
formattedChat += truth; | ||
}); | ||
formattedChat += SEPERATOR; | ||
return formattedChat; | ||
} |
Oops, something went wrong.