-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from thisguymartin/update-prompt
feat: enhance encryption utilities and update database service for in…
- Loading branch information
Showing
8 changed files
with
185 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
[ | ||
{ | ||
"input": { | ||
"context": "Tesla's Model 3 was first unveiled on March 31, 2016, with the first deliveries beginning in July 2017. The base model has an EPA-rated range of 272 miles and can accelerate from 0-60 mph in 5.8 seconds.", | ||
"question": "When did Tesla start delivering the Model 3?" | ||
}, | ||
"output": "July 2017", | ||
"criteria": "Answer must be exactly 'July 2017' based on the provided context" | ||
}, | ||
{ | ||
"input": { | ||
"context": "The Great Barrier Reef is the world's largest coral reef system, stretching over 2,300 kilometers along the northeast coast of Australia. It consists of over 2,900 individual reefs and 900 islands. The reef was declared a World Heritage site in 1981.", | ||
"question": "How long is the Great Barrier Reef?" | ||
}, | ||
"output": "2,300 kilometers", | ||
"criteria": "Answer must specify '2,300 kilometers' from the context, with units included" | ||
}, | ||
{ | ||
"input": { | ||
"context": "Python was created by Guido van Rossum and was first released in 1991. The language emphasizes code readability with its notable use of significant whitespace. Python 3.0 was released in 2008, with major improvements in handling text strings.", | ||
"question": "Who created Python?" | ||
}, | ||
"output": "Guido van Rossum", | ||
"criteria": "Answer must be the full name 'Guido van Rossum' as stated in the context" | ||
}, | ||
{ | ||
"input": { | ||
"context": "The human brain consists of approximately 86 billion neurons. Each neuron can make connections with up to 10,000 other neurons, resulting in trillions of neural connections. The brain uses about 20% of the body's total energy consumption.", | ||
"question": "What percentage of the body's energy does the brain use?" | ||
}, | ||
"output": "20%", | ||
"criteria": "Answer must be '20%' based on the context, including the percentage symbol" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[ | ||
{ | ||
"input": "List the first three prime numbers in ascending order, separated by commas.", | ||
"output": "2,3,5", | ||
"criteria": "Numbers must be in correct order, separated by commas with no spaces" | ||
}, | ||
{ | ||
"input": "What are the three states of matter, in alphabetical order?", | ||
"output": "gas,liquid,solid", | ||
"criteria": "States must be in alphabetical order, lowercase, separated by commas with no spaces" | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,48 @@ | ||
import { Command } from "https://deno.land/x/cliffy@v0.25.4/command/mod.ts"; | ||
import { Factuality, JSONDiff, ValidJSON } from "autoevals"; | ||
import { ApiKeyLoader } from "../services/openAi.ts"; | ||
import { FactualityEvaluator } from "../services/faculty-evaluator.ts"; | ||
import { FactualityEval, FactualityEvalItem } from "../services/faculty.ts"; | ||
import { CloseCaseQA, QAEvalItem } from "../services/close-qa.ts"; | ||
import { error, success } from "../utils/color.ts"; | ||
|
||
export const RunEval = new Command() | ||
.default("run-eval") | ||
.description("Run the evaluation") | ||
.option("-t, --type <string>", "Type of evaluation to run") | ||
.option("-p, --print <bool>", "Print the results") | ||
.arguments("<file>") | ||
.action(async (_, file: string) => { | ||
console.log(`Running evaluation from directory: ${JSON.stringify(file)}`); | ||
.action(async ({ type, print }, file: string) => { | ||
const keyLoader = new ApiKeyLoader(); | ||
await keyLoader.loadApiKey(); | ||
const v = new FactualityEvaluator(); | ||
const a = await v.evaluateFromJson(file); | ||
console.log(a); | ||
let generalResult: string; | ||
if (type === "factuality") { | ||
const faculty = new FactualityEval(); | ||
const items = await faculty.parseFile<FactualityEvalItem[]>(file); | ||
const results = await faculty.evaluateItems(items); | ||
console.log(results); | ||
generalResult = JSON.stringify(results, null, 2); | ||
} else if (type == "closeqa") { | ||
const closeQa = new CloseCaseQA(); | ||
const items = await closeQa.parseFile<QAEvalItem[]>(file); | ||
const result = await closeQa.evaluateItems(items); | ||
console.log(result); | ||
generalResult = JSON.stringify(result, null, 2); | ||
} else { | ||
console.error(error("Invalid evaluation type")); | ||
return; | ||
} | ||
|
||
console.log(success("=====================================")); | ||
|
||
if (print) { | ||
const downloads = Deno.env.get("HOME") + "/Downloads"; | ||
const filename = file.split("/").pop(); | ||
await Deno.writeTextFile( | ||
`${downloads}/${filename}-result.json`, | ||
generalResult, | ||
); | ||
|
||
console.log( | ||
success(`Results saved to ${downloads}/${filename}-result.json`), | ||
); | ||
} | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import { ClosedQA } from "autoevals"; | ||
import { Evaluate } from "./evaluate.ts"; | ||
|
||
export interface QAEvalItem { | ||
input: string; | ||
output: string; | ||
criteria: string; | ||
} | ||
|
||
export interface QAEvalResult { | ||
name: string; | ||
score: number; | ||
metetadata: { | ||
rationale : string; | ||
choice: string; | ||
} | ||
} | ||
|
||
export class CloseCaseQA extends Evaluate { | ||
async evaluateItems(items: QAEvalItem[]): Promise<QAEvalResult[]> { | ||
const results: QAEvalResult[] = []; | ||
for await (const item of items) { | ||
const result = await ClosedQA({ | ||
input: item.input, | ||
criteria: item.criteria, | ||
item: item.output, | ||
} as any); | ||
results.push(result as QAEvalResult); | ||
} | ||
|
||
return results; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
export class Evaluate { | ||
async parseFile<T>(jsonPath: string): Promise<T> { | ||
try { | ||
const jsonContent = await Deno.readTextFile(jsonPath); | ||
const evalItems = JSON.parse(jsonContent); | ||
return evalItems as T; | ||
} catch (error) { | ||
console.error(`Failed to evaluate from JSON: ${error}`); | ||
throw new Error(`Failed to evaluate from JSON`); | ||
} | ||
} | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import { Factuality } from "autoevals"; | ||
import { Evaluate } from "./evaluate.ts"; | ||
|
||
export interface FactualityEvalItem { | ||
input: string; | ||
output: string; | ||
expected: string; | ||
} | ||
|
||
interface FactualityEvalResult { | ||
name: string; | ||
score: number; | ||
metadata: { | ||
rationale: string | ||
choice: string | ||
} | ||
} | ||
|
||
export class FactualityEval extends Evaluate { | ||
static printResults(results: FactualityEvalResult[]): void { | ||
console.log("\nFactuality Evaluation Results:"); | ||
console.log("=============================\n"); | ||
|
||
results.forEach((result, index) => { | ||
console.log(`Item ${index + 1}:`); | ||
console.log(`Output: ${result.name}`); | ||
console.log(`Score: ${result.score}`); | ||
console.log(`Metadata: ${JSON.stringify(result.metadata, null, 2)}`); | ||
console.log("-----------------------------\n"); | ||
}); | ||
|
||
// Print summary statistics | ||
const avgScore = results.reduce((sum, r) => sum + r.score, 0) / | ||
results.length; | ||
console.log(`Average Score: ${avgScore.toFixed(3)}`); | ||
} | ||
|
||
async evaluateItems( | ||
items: FactualityEvalItem[], | ||
): Promise<FactualityEvalResult[]> { | ||
const results: FactualityEvalResult[] = []; | ||
|
||
for await (const item of items) { | ||
const result = await Factuality({ | ||
input: item.input, | ||
output: item.output, | ||
expected: item.expected, | ||
} as any); | ||
|
||
|
||
results.push(result as FactualityEvalResult); | ||
} | ||
|
||
return results; | ||
} | ||
} |