From 825a20abd80e901218b00dd7579487e4afaaa3c4 Mon Sep 17 00:00:00 2001 From: Edward Sun Date: Mon, 15 Sep 2025 17:33:05 -0700 Subject: [PATCH 1/3] Fix computer use tool call (screenshot) problem and add smarter waiting --- packages/ai-provider-vercel/package.json | 10 +--- .../package.json | 10 +--- packages/computer-provider-core/package.json | 10 +--- packages/computer-provider-e2b/package.json | 10 +--- .../computer-provider-scrapybara/package.json | 10 +--- packages/core/package.json | 14 +++--- packages/core/src/tools/computer.ts | 50 ++++++++++++++++++- pnpm-lock.yaml | 3 ++ 8 files changed, 68 insertions(+), 49 deletions(-) diff --git a/packages/ai-provider-vercel/package.json b/packages/ai-provider-vercel/package.json index b3b619a..52a177e 100644 --- a/packages/ai-provider-vercel/package.json +++ b/packages/ai-provider-vercel/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/ai-provider-vercel#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-anchor-browser/package.json b/packages/computer-provider-anchor-browser/package.json index 73d64f8..e78d958 100644 --- a/packages/computer-provider-anchor-browser/package.json +++ b/packages/computer-provider-anchor-browser/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-anchor-browser#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-core/package.json b/packages/computer-provider-core/package.json index 6b649df..64d06e0 100644 --- a/packages/computer-provider-core/package.json +++ b/packages/computer-provider-core/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-core#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-e2b/package.json b/packages/computer-provider-e2b/package.json index 4ed8e72..77bfe30 100644 --- a/packages/computer-provider-e2b/package.json +++ b/packages/computer-provider-e2b/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-e2b#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-scrapybara/package.json b/packages/computer-provider-scrapybara/package.json index 19161e3..941bf28 100644 --- a/packages/computer-provider-scrapybara/package.json +++ b/packages/computer-provider-scrapybara/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-scrapybara#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/core/package.json b/packages/core/package.json index 5a12562..013a0d2 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/agent#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "typescript": "./src/index.ts", @@ -54,8 +48,12 @@ "dependencies": { "zod": "^3.24.4" }, + "peerDependencies": { + "playwright-core": "^1.54.1" + }, "devDependencies": { "@trymeka/typescript": "workspace:*", + "playwright-core": "^1.54.1", "tsup": "^8.5.0", "typescript": "^5.8.3" } diff --git a/packages/core/src/tools/computer.ts b/packages/core/src/tools/computer.ts index d2d57ab..23b6a32 100644 --- a/packages/core/src/tools/computer.ts +++ b/packages/core/src/tools/computer.ts @@ -1,4 +1,5 @@ import z from "zod"; +import type { Page } from "playwright-core"; import type { Tool } from "."; import { createAgentLogUpdate } from "../utils/agent-log"; @@ -67,7 +68,26 @@ export const parseComputerToolArgs = (args: string | object) => { if (actionString.includes("wait")) { return { schema: waitActionSchema, args: parsedArgs }; } - return null; + + // Handle screenshot action specifically - screenshots should be automatic, not manual + if (actionString.includes("screenshot")) { + console.warn( + "[parseComputerToolArgs] LLM requested screenshot action - screenshots should be automatic", + ); + return { + schema: waitActionSchema, + args: { ...parsedArgs, action: { type: "wait", duration: 0 } }, + }; + } + + // Fallback for any other unrecognized actions - don't return null + console.warn( + `[parseComputerToolArgs] Unrecognized computer action: ${actionString}`, + ); + return { + schema: waitActionSchema, + args: { ...parsedArgs, action: { type: "wait", duration: 0 } }, + }; }; const clickActionSchema = z @@ -344,6 +364,34 @@ export function createComputerTool({ }, execute: async (args, context) => { const result = await computerProvider.performAction(args.action, context); + + // Smart delay with network idle support + if (args.action.type === "click" || args.action.type === "double_click") { + try { + const instance = await computerProvider.getInstance( + context.sessionId, + ); + const page = (instance as { page?: Page })?.page; + + if (page?.waitForLoadState) { + await page.waitForLoadState("networkidle", { timeout: 1500 }); + } else { + await new Promise((resolve) => setTimeout(resolve, 1500)); + } + } catch { + await new Promise((resolve) => setTimeout(resolve, 1500)); + } + } else { + // Other action types get fixed delays + const delay = + args.action.type === "type" + ? 300 + : args.action.type === "scroll" + ? 800 + : 500; + await new Promise((resolve) => setTimeout(resolve, delay)); + } + const screenshot = await computerProvider.takeScreenshot( context.sessionId, ); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3660bd2..d9f3d36 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -192,6 +192,9 @@ importers: '@trymeka/typescript': specifier: workspace:* version: link:../../tooling/typescript + playwright-core: + specifier: ^1.54.1 + version: 1.54.1 tsup: specifier: ^8.5.0 version: 8.5.0(postcss@8.5.6)(tsx@4.20.3)(typescript@5.8.3) From e59d9c3e3d36db27e5b04e7338547d3f13ef9da5 Mon Sep 17 00:00:00 2001 From: Edward Sun Date: Mon, 15 Sep 2025 17:34:24 -0700 Subject: [PATCH 2/3] Add changeset --- .changeset/heavy-buckets-occur.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .changeset/heavy-buckets-occur.md diff --git a/.changeset/heavy-buckets-occur.md b/.changeset/heavy-buckets-occur.md new file mode 100644 index 0000000..1fbf76b --- /dev/null +++ b/.changeset/heavy-buckets-occur.md @@ -0,0 +1,10 @@ +--- +"@trymeka/computer-provider-anchor-browser": patch +"@trymeka/computer-provider-scrapybara": patch +"@trymeka/computer-provider-core": patch +"@trymeka/computer-provider-e2b": patch +"@trymeka/ai-provider-vercel": patch +"@trymeka/core": patch +--- + +Fix computer use "screenshot" tool call problem and add smarter waiting between steps From c8a23b4b3af4b2badbf6b8e5855e0278c847ebcb Mon Sep 17 00:00:00 2001 From: Edward Sun Date: Mon, 15 Sep 2025 17:35:52 -0700 Subject: [PATCH 3/3] Import order --- packages/core/src/tools/computer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/tools/computer.ts b/packages/core/src/tools/computer.ts index 23b6a32..c61d7bd 100644 --- a/packages/core/src/tools/computer.ts +++ b/packages/core/src/tools/computer.ts @@ -1,5 +1,5 @@ -import z from "zod"; import type { Page } from "playwright-core"; +import z from "zod"; import type { Tool } from "."; import { createAgentLogUpdate } from "../utils/agent-log";