Skip to content

Commit bedb996

Browse files
committed
Merge remote-tracking branch 'origin' into npour/more-evals
2 parents d7ccb0e + ceaaed2 commit bedb996

File tree

8 files changed

+194
-156
lines changed

8 files changed

+194
-156
lines changed

.env.example

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
OPENAI_API_KEY=""
22
BROWSERBASE_API_KEY=""
33
BRAINTRUST_API_KEY=""
4-
ANTHROPIC_API_KEY=""
4+
ANTHROPIC_API_KEY=""
5+
HEADLESS=false

.github/workflows/ci.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Continuous Integration
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
8+
jobs:
9+
run-evals:
10+
runs-on: ubuntu-latest
11+
timeout-minutes: 25
12+
13+
steps:
14+
- name: Check out repository code
15+
uses: actions/checkout@v4
16+
17+
- name: Set up Node.js
18+
uses: actions/setup-node@v4
19+
with:
20+
node-version: '20'
21+
22+
- name: Install pnpm
23+
run: npm install -g pnpm
24+
25+
- name: Install dependencies
26+
run: pnpm install --no-frozen-lockfile
27+
28+
- name: Install Playwright browsers
29+
run: pnpm exec playwright install --with-deps
30+
31+
- name: Run Evals
32+
env:
33+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
34+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
35+
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
36+
HEADLESS: true
37+
run: pnpm evals
38+
timeout-minutes: 20

evals/index.eval.ts

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,13 @@ import { Stagehand } from "../lib";
33
import { z } from "zod";
44
import { evaluateExample, chosenBananalyzerEvals } from "./bananalyzer-ts";
55
import { createExpressServer } from "./bananalyzer-ts/server/expressServer";
6+
import process from "process";
67

78
const vanta = async () => {
8-
const stagehand = new Stagehand({ env: "LOCAL" });
9+
const stagehand = new Stagehand({
10+
env: "LOCAL",
11+
headless: process.env.HEADLESS !== "false",
12+
});
913
await stagehand.init();
1014

1115
await stagehand.page.goto("https://www.vanta.com/");
@@ -33,7 +37,10 @@ const vanta = async () => {
3337
};
3438

3539
const vanta_h = async () => {
36-
const stagehand = new Stagehand({ env: "LOCAL" });
40+
const stagehand = new Stagehand({
41+
env: "LOCAL",
42+
headless: process.env.HEADLESS !== "false",
43+
});
3744
await stagehand.init();
3845

3946
await stagehand.page.goto("https://www.vanta.com/");
@@ -64,8 +71,31 @@ const simple_google_search = async () => {
6471
return currentUrl.startsWith(expectedUrl);
6572
};
6673

74+
const simple_google_search = async () => {
75+
const stagehand = new Stagehand({
76+
env: "LOCAL",
77+
headless: process.env.HEADLESS !== "false",
78+
});
79+
await stagehand.init();
80+
81+
await stagehand.page.goto("https://www.google.com");
82+
83+
await stagehand.act({
84+
action: 'Search for "OpenAI"',
85+
});
86+
87+
const expectedUrl = "https://www.google.com/search?q=OpenAI";
88+
const currentUrl = await stagehand.page.url();
89+
await stagehand.context.close();
90+
91+
return currentUrl.startsWith(expectedUrl);
92+
};
93+
6794
const peeler_simple = async () => {
68-
const stagehand = new Stagehand({ env: "LOCAL" });
95+
const stagehand = new Stagehand({
96+
env: "LOCAL",
97+
headless: process.env.HEADLESS !== "false",
98+
});
6999
await stagehand.init();
70100

71101
await stagehand.page.goto(`file://${process.cwd()}/evals/assets/peeler.html`);
@@ -85,7 +115,8 @@ const peeler_simple = async () => {
85115
const peeler_complex = async () => {
86116
const stagehand = new Stagehand({
87117
env: "LOCAL",
88-
verbose: true,
118+
verbose: 1,
119+
headless: process.env.HEADLESS !== "false",
89120
});
90121
await stagehand.init();
91122

@@ -233,7 +264,8 @@ const twitter_signup = async () => {
233264
const wikipedia = async () => {
234265
const stagehand = new Stagehand({
235266
env: "LOCAL",
236-
verbose: true,
267+
verbose: 2,
268+
headless: process.env.HEADLESS !== "false",
237269
});
238270
await stagehand.init();
239271

@@ -252,43 +284,57 @@ const wikipedia = async () => {
252284
const costar = async () => {
253285
const stagehand = new Stagehand({
254286
env: "LOCAL",
255-
verbose: true,
287+
verbose: 2,
256288
debugDom: true,
289+
headless: process.env.HEADLESS !== "false",
257290
});
258291
await stagehand.init();
292+
// TODO: fix this eval - does not work in headless mode
293+
try {
294+
await Promise.race([
295+
stagehand.page.goto("https://www.costar.com/"),
296+
new Promise((_, reject) =>
297+
setTimeout(() => reject(new Error("Navigation timeout")), 30000),
298+
),
299+
]);
300+
await stagehand.waitForSettledDom();
301+
302+
await stagehand.act({ action: "click on the first article" });
303+
304+
await stagehand.act({ action: "find the footer of the page" });
305+
306+
await stagehand.waitForSettledDom();
307+
const articleTitle = await stagehand.extract({
308+
instruction: "extract the title of the article",
309+
schema: z.object({
310+
title: z.string().describe("the title of the article").nullable(),
311+
}),
312+
modelName: "gpt-4o-2024-08-06",
313+
});
259314

260-
await stagehand.page.goto("https://www.costar.com/");
261-
await stagehand.waitForSettledDom();
262-
263-
await stagehand.act({ action: "click on the first article" });
264-
265-
await stagehand.act({ action: "find the footer of the page" });
266-
267-
await stagehand.waitForSettledDom();
268-
const articleTitle = await stagehand.extract({
269-
instruction: "extract the title of the article",
270-
schema: z.object({
271-
title: z.string().describe("the title of the article").nullable(),
272-
}),
273-
modelName: "gpt-4o-2024-08-06",
274-
});
275-
276-
console.log("articleTitle", articleTitle);
315+
console.log("articleTitle", articleTitle);
277316

278-
// Check if the title is more than 5 characters
279-
const isTitleValid =
280-
articleTitle.title !== null && articleTitle.title.length > 5;
317+
// Check if the title is more than 5 characters
318+
const isTitleValid =
319+
articleTitle.title !== null && articleTitle.title.length > 5;
281320

282-
await stagehand.context.close();
321+
await stagehand.context.close();
283322

284-
return isTitleValid;
323+
return isTitleValid;
324+
} catch (error) {
325+
console.error(`Error in costar function: ${error.message}`);
326+
return { title: null };
327+
} finally {
328+
await stagehand.context.close();
329+
}
285330
};
286331

287332
const google_jobs = async () => {
288333
const stagehand = new Stagehand({
289334
env: "LOCAL",
290-
verbose: true,
335+
verbose: 2,
291336
debugDom: true,
337+
headless: process.env.HEADLESS !== "false",
292338
});
293339
await stagehand.init({ modelName: "gpt-4o-2024-08-06" });
294340

evals/playground.ts

Lines changed: 39 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,84 +2,58 @@ import { Eval } from "braintrust";
22
import { Stagehand } from "../lib";
33
import { z } from "zod";
44

5-
const google_jobs = async () => {
5+
const costar = async () => {
66
const stagehand = new Stagehand({
77
env: "LOCAL",
8-
verbose: true,
8+
verbose: 2,
99
debugDom: true,
10+
headless: process.env.HEADLESS !== "false",
1011
});
11-
await stagehand.init({ modelName: "gpt-4o-2024-08-06" });
12-
13-
await stagehand.page.goto("https://www.google.com/");
14-
await stagehand.waitForSettledDom();
15-
16-
await stagehand.act({ action: "click on the about page" });
17-
18-
await stagehand.act({ action: "click on the careers page" });
19-
20-
await stagehand.act({ action: "input data scientist into role" });
21-
22-
await stagehand.act({ action: "input new york city into location" });
23-
24-
await stagehand.act({ action: "click on the search button" });
25-
26-
await stagehand.act({
27-
action: "click on the learn more button for the first job",
28-
});
29-
30-
const jobDetails = await stagehand.extract({
31-
instruction:
32-
"Extract the following details from the job posting: application deadline, minimum qualifications (degree and years of experience), and preferred qualifications (degree and years of experience)",
33-
schema: z.object({
34-
applicationDeadline: z
35-
.string()
36-
.describe("The date until which the application window will be open"),
37-
minimumQualifications: z.object({
38-
degree: z.string().describe("The minimum required degree"),
39-
yearsOfExperience: z
40-
.number()
41-
.describe("The minimum required years of experience"),
12+
await stagehand.init();
13+
// TODO: fix this eval
14+
try {
15+
await Promise.race([
16+
stagehand.page.goto("https://www.costar.com/"),
17+
new Promise((_, reject) =>
18+
setTimeout(() => reject(new Error("Navigation timeout")), 30000),
19+
),
20+
]);
21+
await stagehand.waitForSettledDom();
22+
23+
await stagehand.act({ action: "click on the first article" });
24+
25+
await stagehand.act({ action: "find the footer of the page" });
26+
27+
await stagehand.waitForSettledDom();
28+
const articleTitle = await stagehand.extract({
29+
instruction: "extract the title of the article",
30+
schema: z.object({
31+
title: z.string().describe("the title of the article").nullable(),
4232
}),
43-
preferredQualifications: z.object({
44-
degree: z.string().describe("The preferred degree"),
45-
yearsOfExperience: z
46-
.number()
47-
.describe("The preferred years of experience"),
48-
}),
49-
}),
50-
modelName: "gpt-4o-2024-08-06",
51-
});
52-
53-
console.log("Job Details:", jobDetails);
33+
modelName: "gpt-4o-2024-08-06",
34+
});
5435

55-
const isJobDetailsValid =
56-
jobDetails &&
57-
Object.values(jobDetails).every(
58-
(value) =>
59-
value !== null &&
60-
value !== undefined &&
61-
value !== "" &&
62-
(typeof value !== "object" ||
63-
Object.values(value).every(
64-
(v) =>
65-
v !== null &&
66-
v !== undefined &&
67-
v !== "" &&
68-
(typeof v === "number" || typeof v === "string"),
69-
)),
70-
);
36+
console.log("articleTitle", articleTitle);
7137

72-
await stagehand.context.close();
38+
// Check if the title is more than 5 characters
39+
const isTitleValid =
40+
articleTitle.title !== null && articleTitle.title.length > 5;
7341

74-
console.log("Job Details valid:", isJobDetailsValid);
42+
await stagehand.context.close();
7543

76-
return isJobDetailsValid;
44+
return isTitleValid;
45+
} catch (error) {
46+
console.error(`Error in costar function: ${error.message}`);
47+
return { title: null };
48+
} finally {
49+
await stagehand.context.close();
50+
}
7751
};
7852

7953
async function main() {
80-
const [googleJobsResult] = await Promise.all([google_jobs()]);
54+
const [costarResult] = await Promise.all([costar()]);
8155

82-
console.log("Google Jobs result:", googleJobsResult);
56+
console.log("Costar result:", costarResult);
8357
}
8458

8559
main().catch(console.error);

lib/dom/process.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,31 @@ async function processElements(chunk: number) {
6969
if (isTextNode(element)) {
7070
outputString += `${index}:${element.textContent}\n`;
7171
} else if (isElementNode(element)) {
72-
outputString += `${index}:${element.outerHTML.trim()}\n`;
72+
const tagName = element.tagName.toLowerCase();
73+
74+
// Collect essential attributes
75+
const attributes: string[] = [];
76+
if (element.id) {
77+
attributes.push(`id="${element.id}"`);
78+
}
79+
if (element.className) {
80+
attributes.push(`class="${element.className}"`);
81+
}
82+
if (element.getAttribute('href')) {
83+
attributes.push(`href="${element.getAttribute('href')}"`);
84+
}
85+
if (element.getAttribute('src')) {
86+
attributes.push(`src="${element.getAttribute('src')}"`);
87+
}
88+
89+
// Build the simplified element string
90+
const openingTag = `<${tagName}${
91+
attributes.length > 0 ? ' ' + attributes.join(' ') : ''
92+
}>`;
93+
const closingTag = `</${tagName}>`;
94+
const textContent = element.textContent.trim();
95+
96+
outputString += `${index}:${openingTag}${textContent}${closingTag}\n`;
7397
}
7498

7599
selectorMap[index] = xpath;

0 commit comments

Comments
 (0)