Skip to content

Commit

Permalink
4.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
andrefs committed Nov 1, 2024
1 parent 25d5ec6 commit 6dd0763
Show file tree
Hide file tree
Showing 23 changed files with 77 additions and 67 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "punuy-eval",
"version": "3.3.0",
"version": "4.0.0",
"description": "",
"main": "index.js",
"scripts": {
Expand Down
2 changes: 1 addition & 1 deletion src/lib/experiments/aux.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export function normalizeScale(
return (
targetScale.min +
((value - sourceScale.min) * (targetScale.max - targetScale.min)) /
(sourceScale.max - sourceScale.min)
(sourceScale.max - sourceScale.min)
);
}

Expand Down
7 changes: 4 additions & 3 deletions src/lib/experiments/compare-prompts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,8 @@ async function performMulti(

const res = [];
logger.info(
`Preparing to run experiment ${name}, ${trials} times on each variable combination (${trials}x${varCombs.length
`Preparing to run experiment ${name}, ${trials} times on each variable combination (${trials}x${
varCombs.length
}):\n${varCombs
.map(vc => "\t" + JSON.stringify(getVarIds(vc)))
.join(",\n")}.`
Expand Down Expand Up @@ -380,8 +381,8 @@ async function evaluate(exps: ExperimentData<CPExpTypes>[]) {
`🆚 Comparing ${comp.variables
.map(v => `[${v}]`)
.join(" and ")} with fixed variables ${JSON.stringify(
comp.fixedValueConfig
)}\n${tablePP}`
comp.fixedValueConfig
)}\n${tablePP}`
);
}

Expand Down
3 changes: 2 additions & 1 deletion src/lib/experiments/experiment/aux.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ export async function saveExpVarCombData<T extends GenericExpTypes>(
`💾 Saving experiment ${name} with traceId ${traceId} to ${filename}.`
);
logger.info(
`🥇 It ran successfully ${data.results.raw.length}/${data.meta.trials
`🥇 It ran successfully ${data.results.raw.length}/${
data.meta.trials
} times with variables ${JSON.stringify(getVarIds(data.variables))}.`
);

Expand Down
2 changes: 1 addition & 1 deletion src/lib/experiments/experiment/conversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ export async function getTurnResponse<T extends GenericExpTypes>(
const failedAttempts = [];
logger.info(
` 👥 ${prompt.pairs.length === 1 ? "pair" : "pairs"} ` +
prompt.pairs.map(p => `[${p[0]}, ${p[1]}]`).join(", ")
prompt.pairs.map(p => `[${p[0]}, ${p[1]}]`).join(", ")
);
while (failedAttempts.length < maxTurnAttempts) {
const faCount = failedAttempts.length;
Expand Down
11 changes: 6 additions & 5 deletions src/lib/experiments/experiment/perform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ export async function performMulti<T extends GenericExpTypes>(
for (const [index, vc] of varCombs.entries()) {
logger.info(
"⚗️ " +
pc.inverse(
`Running experiment ${index + 1}/${varCombs.length}: ${this.name}`
) +
` with variables ${JSON.stringify(getVarIds(vc))}.`
pc.inverse(
`Running experiment ${index + 1}/${varCombs.length}: ${this.name}`
) +
` with variables ${JSON.stringify(getVarIds(vc))}.`
);
res.push(await this.perform(vc, trials, Date.now(), folder));
addUsage(this.totalUsage, res[res.length - 1].usage);
Expand All @@ -97,7 +97,8 @@ function startUpLogs(
throw "🧐 No variable combinations to run experiments with, aborting.";
}
logger.info(
`🔬 Preparing to run experiment ${name
`🔬 Preparing to run experiment ${
name
}, ${trials} times on each variable combination (${trials}x${varCombs.length}): \n${varCombs
.map(vc => "\t" + JSON.stringify(getVarIds(vc)))
.join(",\n")}.`
Expand Down
20 changes: 10 additions & 10 deletions src/lib/experiments/experiment/print.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ export function printUsage<T extends GenericExpTypes>(
}
logger.info(
"📈💸 " +
(final ? "Final usage" : "Usage") +
" estimate:\n" +
Object.values(usage)
.map(u => `\t${JSON.stringify(u)} `)
.join("\n")
(final ? "Final usage" : "Usage") +
" estimate:\n" +
Object.values(usage)
.map(u => `\t${JSON.stringify(u)} `)
.join("\n")
);
}

Expand Down Expand Up @@ -112,9 +112,9 @@ function varsToStr(vars: Set<string>, variables: ExpVars) {
return vars.size === 1
? variables[Array.from(vars)[0] as keyof ExpVars]!.id
: Array.from(vars)
.map(v => [v, variables[v as keyof ExpVars]!.id])
.map(([v, id]) => `${v}=${id}`)
.join(";");
.map(v => [v, variables[v as keyof ExpVars]!.id])
.map(([v, id]) => `${v}=${id}`)
.join(";");
}

export function generateComparisons(
Expand Down Expand Up @@ -229,8 +229,8 @@ export function printExpResTable<T extends GenericExpTypes>(
`🆚 Comparing ${comp.variables
.map(v => `[${v}]`)
.join(" and ")} with fixed variables ${JSON.stringify(
comp.fixedValueConfig
)} \n${tablePP} \n${csv} `
comp.fixedValueConfig
)} \n${tablePP} \n${csv} `
);
}
}
10 changes: 5 additions & 5 deletions src/lib/models/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ const buildModel = (
dataObj: msg,
usage: msg.usage
? {
inputTokens: msg.usage.input_tokens,
outputTokens: msg.usage.output_tokens,
totalTokens: msg.usage.input_tokens + msg.usage.output_tokens,
modelId,
}
inputTokens: msg.usage.input_tokens,
outputTokens: msg.usage.output_tokens,
totalTokens: msg.usage.input_tokens + msg.usage.output_tokens,
modelId,
}
: undefined,
getDataText: () => {
let dataText;
Expand Down
16 changes: 8 additions & 8 deletions src/lib/models/cohere.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const buildModel = (
modelId: ModelId,
pricing?: ModelPricing
): Model => {
const makeRequest = async function(
const makeRequest = async function (
prompt: string,
toolParams: ModelTool
): Promise<CohereModelResponse> {
Expand All @@ -74,13 +74,13 @@ const buildModel = (
dataObj: prediction,
usage: prediction.usage?.billedUnits
? {
inputTokens: prediction.usage.billedUnits.inputTokens || 0,
outputTokens: prediction.usage.billedUnits.outputTokens || 0,
totalTokens:
(prediction.usage.billedUnits.inputTokens || 0) +
(prediction.usage.billedUnits.outputTokens || 0),
modelId,
}
inputTokens: prediction.usage.billedUnits.inputTokens || 0,
outputTokens: prediction.usage.billedUnits.outputTokens || 0,
totalTokens:
(prediction.usage.billedUnits.inputTokens || 0) +
(prediction.usage.billedUnits.outputTokens || 0),
modelId,
}
: undefined,
getDataText: () => {
let dataText;
Expand Down
10 changes: 5 additions & 5 deletions src/lib/models/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ const buildModel = (
dataObj: result.response.candidates![0],
usage: result.response.usageMetadata
? {
inputTokens: result.response.usageMetadata.promptTokenCount!,
outputTokens: result.response.usageMetadata.candidatesTokenCount!,
totalTokens: result.response.usageMetadata.totalTokenCount!,
modelId,
}
inputTokens: result.response.usageMetadata.promptTokenCount!,
outputTokens: result.response.usageMetadata.candidatesTokenCount!,
totalTokens: result.response.usageMetadata.totalTokenCount!,
modelId,
}
: undefined,
getDataText: () => {
let dataText;
Expand Down
10 changes: 5 additions & 5 deletions src/lib/models/mistral.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ const buildModel = (
dataObj: chatResponse,
usage: chatResponse.usage
? {
inputTokens: chatResponse.usage?.promptTokens,
outputTokens: chatResponse.usage?.completionTokens,
totalTokens: chatResponse.usage?.totalTokens,
modelId,
}
inputTokens: chatResponse.usage?.promptTokens,
outputTokens: chatResponse.usage?.completionTokens,
totalTokens: chatResponse.usage?.totalTokens,
modelId,
}
: undefined,
getDataText: () => {
let dataText;
Expand Down
12 changes: 6 additions & 6 deletions src/lib/models/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ const buildModel = (
modelId: ModelId,
pricing?: ModelPricing
) => {
const makeRequest = async function(prompt: string, toolParams: ModelTool) {
const makeRequest = async function (prompt: string, toolParams: ModelTool) {
const req = {
model: modelId,
messages: [
Expand Down Expand Up @@ -78,11 +78,11 @@ const buildModel = (
dataObj: completion,
usage: completion.usage
? {
inputTokens: completion.usage?.prompt_tokens,
outputTokens: completion.usage?.completion_tokens,
totalTokens: completion.usage?.total_tokens,
modelId,
}
inputTokens: completion.usage?.prompt_tokens,
outputTokens: completion.usage?.completion_tokens,
totalTokens: completion.usage?.total_tokens,
modelId,
}
: undefined,
getDataText: () => {
let dataText;
Expand Down
3 changes: 2 additions & 1 deletion src/scripts/batchVsSinglePair.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ const bvsp = async (vars: ExpVarMatrix) => {
for (const exp of res.experiments) {
logger.info(
{ ...exp.results.aggregated?.resultTypes },
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${exp.results.aggregated?.allDataAvg
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${
exp.results.aggregated?.allDataAvg
}`
);
logger.debug(
Expand Down
6 changes: 3 additions & 3 deletions src/scripts/compareMC30.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ const compareMC30 = async () => {
if (res.usage) {
logger.info(
"📈 Usage estimate:\n" +
Object.values(res.usage)
.map(u => `\t${JSON.stringify(u)}`)
.join("\n")
Object.values(res.usage)
.map(u => `\t${JSON.stringify(u)}`)
.join("\n")
);
}

Expand Down
6 changes: 3 additions & 3 deletions src/scripts/comparePrompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ const comparePromptsMain = async (vars: ExpVarMatrix) => {
if (res.usage) {
logger.info(
"📈 Usage estimate:\n" +
Object.values(res.usage)
.map(u => `\t${JSON.stringify(u)}`)
.join("\n")
Object.values(res.usage)
.map(u => `\t${JSON.stringify(u)}`)
.join("\n")
);
}

Expand Down
3 changes: 2 additions & 1 deletion src/scripts/dsNameFromDsSample.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ const nameFromSample = async (vars: ExpVarMatrix) => {
for (const r of res.experiments) {
logger.info(
{ ...r.results.aggregated?.resultTypes },
`${r.meta.name} ${JSON.stringify(getVarIds(r.variables))} ${r.results.aggregated?.okDataAvg
`${r.meta.name} ${JSON.stringify(getVarIds(r.variables))} ${
r.results.aggregated?.okDataAvg
}`
);
}
Expand Down
3 changes: 2 additions & 1 deletion src/scripts/dsPaperFromDsName.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ const paperFromName = async (vars: ExpVarMatrix) => {
for (const r of res.experiments) {
logger.info(
{ ...r.results.aggregated?.resultTypes },
`${r.meta.name} ${JSON.stringify(getVarIds(r.variables))} ${r.results.aggregated?.okDataAvg
`${r.meta.name} ${JSON.stringify(getVarIds(r.variables))} ${
r.results.aggregated?.okDataAvg
}`
);
}
Expand Down
3 changes: 2 additions & 1 deletion src/scripts/dsSampleFromDsName.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ const sampleFromName = async (vars: ExpVarMatrix) => {
for (const exp of res.experiments) {
logger.info(
{ ...exp.results.aggregated?.resultTypes },
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${exp.results.aggregated?.okDataAvg
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${
exp.results.aggregated?.okDataAvg
}`
);
logger.debug(
Expand Down
3 changes: 2 additions & 1 deletion src/scripts/dsSampleFromDsSample.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ const sampleFromSample = async (vars: ExpVarMatrix) => {
for (const r of res.experiments) {
logger.info(
{ ...r.results.aggregated?.resultTypes },
`${r.meta.name} ${JSON.stringify(getVarIds(r.variables))} ${r.results.aggregated?.okDataAvg
`${r.meta.name} ${JSON.stringify(getVarIds(r.variables))} ${
r.results.aggregated?.okDataAvg
}`
);
}
Expand Down
3 changes: 2 additions & 1 deletion src/scripts/dsValuesExactMatches.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ const valuesExactMatch = async (vars: ExpVarMatrix) => {
for (const exp of res.experiments) {
logger.info(
{ ...exp.results.aggregated?.resultTypes },
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${exp.results.aggregated?.okDataAvg
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${
exp.results.aggregated?.okDataAvg
}`
);
logger.debug(
Expand Down
2 changes: 1 addition & 1 deletion src/scripts/fix-eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ async function main(dirPath: string, expName?: string) {
const fixedExpVCData = await reEvalExperiment(expVCData, exp);
if (
expVCData.results.aggregated?.allDataAvg !==
fixedExpVCData.results.aggregated?.allDataAvg &&
fixedExpVCData.results.aggregated?.allDataAvg &&
isNumber(expVCData.results.aggregated?.allDataAvg) &&
isNumber(fixedExpVCData.results.aggregated?.allDataAvg)
) {
Expand Down
3 changes: 2 additions & 1 deletion src/scripts/predictionCorrelation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ const predCorr = async (vars: ExpVarMatrix) => {
for (const exp of res.experiments) {
logger.info(
{ ...exp.results.aggregated?.resultTypes },
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${exp.results.aggregated?.okDataAvg
`${exp.meta.name} ${JSON.stringify(getVarIds(exp.variables))} ${
exp.results.aggregated?.okDataAvg
}`
);
logger.debug(
Expand Down

0 comments on commit 6dd0763

Please sign in to comment.