Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: better evaluation results #165

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions packages/backend/src/api/v1/evaluations/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,20 @@ evaluations.post(
checks: [], // TODO: remove this legacy col from DB,
}

const [insertedEvaluation] =
const [evaluation] =
await sql`insert into evaluation ${sql(evaluationToInsert)} returning *`

const evaluation = await getEvaluation(insertedEvaluation.id)
const prompts = await sql`
select * from dataset_prompt where dataset_id = ${datasetId}
`

let count = 0

for (const prompt of evaluation.dataset.prompts) {
for (const variation of prompt.variations) {
for (const prompt of prompts) {
const variations = await sql`
select * from dataset_prompt_variation where prompt_id = ${prompt.id}
`
for (const variation of variations) {
for (const provider of evaluation.providers) {
count++
queue.add(() =>
Expand All @@ -66,7 +71,7 @@ evaluations.post(
promptId: prompt.id,
variation,
provider,
prompt: prompt.content,
prompt: prompt.messages,
checklistId,
}),
)
Expand Down Expand Up @@ -121,9 +126,7 @@ evaluations.get(
const results = await sql`
select
*,
p.id as prompt_id,
p.messages as prompt_content
--p.extra as prompt_extra
p.id as prompt_id
from
evaluation_result er
left join dataset_prompt p on p.id = er.prompt_id
Expand Down
68 changes: 0 additions & 68 deletions packages/backend/src/api/v1/evaluations/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,71 +133,3 @@ export async function runEval({
console.error(error)
}
}

export async function getEvaluation(evaluationId: string) {
const rows = await sql`
select
e.id as id,
e.created_at as created_at,
e.name as name,
e.project_id as project_id,
e.owner_id as owner_id,
e.providers as providers,
e.checks as checks,
d.id as dataset_id,
d.slug as dataset_slug,
p.id as prompt_id,
p.messages as prompt_messages,
pv.id as variation_id,
pv.variables,
pv.context,
pv.ideal_output
from
evaluation e
left join dataset d on e.dataset_id = d.id
left join dataset_prompt p on d.id = p.dataset_id
left join dataset_prompt_variation pv on pv.prompt_id = p.id
where
e.id = ${evaluationId}
`

const {
id,
createdAt,
name,
ownerId,
projectId,
providers,
checks,
datasetId,
datasetSlug,
} = rows[0]

const evaluation = {
id,
createdAt,
name,
projectId,
ownerId,
providers,
checks,
dataset: {
id: datasetId,
slug: datasetSlug,
prompts: rows.map(({ promptId, promptMessages }) => ({
id: promptId,
content: promptMessages,
variations: rows
.filter((row) => row.promptId === promptId)
.map(({ variationId, variables, context, idealOutput }) => ({
id: variationId,
variables,
context,
idealOutput,
})),
})),
},
}

return evaluation
}
8 changes: 8 additions & 0 deletions packages/db/0009.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
alter table evaluation add column if not exists checklist_id uuid;
alter table evaluation DROP CONSTRAINT IF EXISTS evaluation_checklist_id_fkey;
alter table evaluation add constraint evaluation_checklist_id_fkey foreign key (checklist_id) references checklist(id) on delete set null;

drop table if exists evaluation_prompt cascade;
drop table if exists evaluation_prompt_variation cascade;

alter table evaluation_result add constraint "fk_evaluation_result_prompt_id" foreign key (prompt_id) references dataset_prompt(id) on delete cascade;
7 changes: 5 additions & 2 deletions packages/frontend/components/blocks/Feedback.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,12 @@ export default function Feedback({
})

return (
<Tooltip label="Feedback from parent run" position="bottom">
<Tooltip
label="Feedback from parent run"
position="bottom"
disabled={!isFromParent}
>
<Indicator inline disabled={!isFromParent} color="red" size={10}>
{/* <Tooltip disabled={!isFromParent} label={"Feedback from parent run"}> */}
<Group
style={{
padding: isFromParent ? "3px 6px" : "",
Expand Down
Loading
Loading