Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin' into npour/use-browserbase-in-g…
Browse files Browse the repository at this point in the history
…ithub-action
  • Loading branch information
navidkpr committed Oct 3, 2024
2 parents 5d384ce + 46c379b commit 8b9727a
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 106 deletions.
21 changes: 14 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ jobs:
run-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
HEADLESS: true
EVAL_ENV: browserbase

steps:
- name: Check out repository code
Expand All @@ -28,13 +35,13 @@ jobs:
- name: Install Playwright browsers
run: pnpm exec playwright install --with-deps

- name: Initialize Banana Analyzer Evals
run: ./evals/bananalyzer-ts/init.sh

- name: Run Evals
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
HEADLESS: true
EVAL_ENV: browserbase
run: pnpm evals
timeout-minutes: 20

# Uncomment the lines below to ssh into the container for debugging
# - name: Install sshx
# run: curl -sSf https://sshx.io/get | sh -s run
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true
}
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@ See the API Reference below for more detail on the `act()`, `observe()`, and `ex
### `Stagehand()`

- `env`: `'LOCAL'` or '`BROWSERBASE'`.
- `verbose`: a `boolean` that enables more logging during automation
- `verbose`: a `integer` that enables several levels of logging during automation:
- `0`: limited to no logging
- `1`: SDK-level logging
- `2` LLM-client level logging (most granular)
- `debugDom`: a `boolean` that draws bounding boxes around elements presented to the LLM during automation.

### Methods
Expand Down
6 changes: 5 additions & 1 deletion evals/bananalyzer-ts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ export async function evaluateExample(
return false;
}

const stagehand = new Stagehand({ env: "LOCAL", verbose: 1 });
const stagehand = new Stagehand({
env: "LOCAL",
verbose: 1,
headless: process.env.HEADLESS !== "false",
});
await stagehand.init();

let server: Server | null = null;
Expand Down
144 changes: 49 additions & 95 deletions evals/index.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,119 +134,74 @@ const peeler_complex = async () => {
};

const extract_collaborators_from_github_repository = async () => {
const stagehand = new Stagehand({ env, verbose: 1 });
const stagehand = new Stagehand({
env: "LOCAL",
verbose: 1,
headless: process.env.HEADLESS !== "false",
});
await stagehand.init();

const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error("Operation timed out")), 60000),
);

try {
const extractionPromise = (async () => {
await stagehand.page.goto("https://github.com/facebook/react");
await stagehand.act({
action: "find the contributors section",
});

await stagehand.waitForSettledDom();

const { contributors } = await stagehand.extract({
instruction: "Extract top 20 contributors of this repository",
schema: z.object({
contributors: z.array(
z.object({
github_username: z.string(),
information: z.string(),
}),
),
}),
modelName: "gpt-4o-2024-08-06",
});

console.log("Extracted collaborators:", contributors);
return contributors.length === 20;
})();

const result = await Promise.race([extractionPromise, timeoutPromise]);
await stagehand.context.close();
return result;
} catch (error) {
console.error("Error or timeout occurred:", error);
await stagehand.context.close();
return false;
}
};
await stagehand.page.goto("https://github.com/facebook/react");
await stagehand.act({
action: "find the contributors section",
});

const extract_last_twenty_github_commits = async () => {
const stagehand = new Stagehand({ env, verbose: 1 });
await stagehand.init();
await stagehand.waitForSettledDom();

const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error("Operation timed out")), 60000),
);
const { contributors } = await stagehand.extract({
instruction: "Extract top 20 contributors of this repository",
schema: z.object({
contributors: z.array(
z.object({
github_username: z.string(),
information: z.string(),
}),
),
}),
modelName: "gpt-4o-2024-08-06",
});

try {
const extractionPromise = (async () => {
await stagehand.page.goto("https://github.com/facebook/react");

await stagehand.waitForSettledDom();

const { commits } = await stagehand.extract({
instruction: "Extract last 20 commits",
schema: z.object({
commits: z.array(
z.object({
commit_message: z.string(),
commit_url: z.string(),
commit_hash: z.string(),
}),
),
}),
modelName: "gpt-4o-2024-08-06",
});

console.log("Extracted commits:", commits);
return commits.length === 20;
})();

const result = await Promise.race([extractionPromise, timeoutPromise]);
console.log("Extracted collaborators:", contributors);
await stagehand.context.close();
return result;
return contributors.length === 20;
} catch (error) {
console.error("Error or timeout occurred:", error);
await stagehand.context.close();
return false;
}
};

const twitter_signup = async () => {
const stagehand = new Stagehand({ env, verbose: 1 });
const extract_last_twenty_github_commits = async () => {
const stagehand = new Stagehand({
env: "LOCAL",
verbose: 1,
headless: process.env.HEADLESS !== "false",
});
await stagehand.init();

const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error("Operation timed out")), 120000),
);

try {
const signupPromise = (async () => {
await stagehand.page.goto("https://twitter.com");

await stagehand.act({
action:
'sign up with email "{random 12 digit number}@gmail.com", password "TEstTEst.1234". Use whatever else you want for all other fields. You can only stop if you have reached the verification stage.',
});
await stagehand.page.goto("https://github.com/facebook/react");

await stagehand.waitForSettledDom();

// Add a check here to verify if signup was successful
// For example, check if a certain element is visible after signup
await stagehand.waitForSettledDom();

return true; // Return true if signup was successful
})();
const { commits } = await stagehand.extract({
instruction: "Extract last 20 commits",
schema: z.object({
commits: z.array(
z.object({
commit_message: z.string(),
commit_url: z.string(),
commit_hash: z.string(),
}),
),
}),
modelName: "gpt-4o-2024-08-06",
});

const result = await Promise.race([signupPromise, timeoutPromise]);
console.log("Extracted commits:", commits);
await stagehand.context.close();
return result;
return commits.length === 20;
} catch (error) {
console.error("Error or timeout occurred:", error);
await stagehand.context.close();
Expand Down Expand Up @@ -406,7 +361,6 @@ const tasks = {
simple_google_search,
extract_collaborators_from_github_repository,
extract_last_twenty_github_commits,
twitter_signup,
costar,
google_jobs,
};
Expand All @@ -416,7 +370,7 @@ const exactMatch = (args: { input; output; expected? }) => {

return {
name: "Exact match",
score: Boolean(args.output) ? 1 : 0,
score: args.output === true || args.output?.success == true,
};
};

Expand All @@ -443,7 +397,6 @@ const testcases = [
{ input: { name: "simple_google_search" } },
{ input: { name: "extract_collaborators_from_github_repository" } },
{ input: { name: "extract_last_twenty_github_commits" } },
{ input: { name: "twitter_signup" } },
// { input: { name: "costar" } },
{ input: { name: "google_jobs" } },
...chosenBananalyzerEvals.map((evalItem: any) => ({
Expand Down Expand Up @@ -506,4 +459,5 @@ Eval("stagehand", {
}
},
scores: [exactMatch],
// trialCount: 3,
});
3 changes: 1 addition & 2 deletions evals/playground.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { Eval } from "braintrust";
import { Stagehand } from "../lib";
import { z } from "zod";

Expand Down Expand Up @@ -116,7 +115,7 @@ async function main() {
// const [costarResult] = await Promise.all([costar()]);
const [homedepotResult] = await Promise.all([homedepot()]);

console.log("Homedepot result:", homedepotResult);
console.log("Result:", homedepotResult);
}

main().catch(console.error);

0 comments on commit 8b9727a

Please sign in to comment.