diff --git a/.changeset/vast-hands-type.md b/.changeset/vast-hands-type.md new file mode 100644 index 0000000..6552f10 --- /dev/null +++ b/.changeset/vast-hands-type.md @@ -0,0 +1,10 @@ +--- +"@trymeka/computer-provider-anchor-browser": patch +"@trymeka/computer-provider-scrapybara": patch +"@trymeka/computer-provider-core": patch +"@trymeka/computer-provider-e2b": patch +"@trymeka/ai-provider-vercel": patch +"@trymeka/core": patch +--- + +Give the agent clarity on screenshot numbers diff --git a/packages/ai-provider-vercel/package.json b/packages/ai-provider-vercel/package.json index 09705b3..40470a9 100644 --- a/packages/ai-provider-vercel/package.json +++ b/packages/ai-provider-vercel/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/ai-provider-vercel#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-anchor-browser/package.json b/packages/computer-provider-anchor-browser/package.json index a6ac902..7a4addd 100644 --- a/packages/computer-provider-anchor-browser/package.json +++ b/packages/computer-provider-anchor-browser/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-anchor-browser#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-core/package.json b/packages/computer-provider-core/package.json index e32c843..1b312cd 100644 --- a/packages/computer-provider-core/package.json +++ b/packages/computer-provider-core/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-core#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-e2b/package.json b/packages/computer-provider-e2b/package.json index 7f7318c..6dee422 100644 --- a/packages/computer-provider-e2b/package.json +++ b/packages/computer-provider-e2b/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-e2b#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/computer-provider-scrapybara/package.json b/packages/computer-provider-scrapybara/package.json index 3d76bff..c649da0 100644 --- a/packages/computer-provider-scrapybara/package.json +++ b/packages/computer-provider-scrapybara/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/computer-provider-scrapybara#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "types": "./dist/index.d.ts", diff --git a/packages/core/package.json b/packages/core/package.json index a857343..f068736 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -6,9 +6,7 @@ "access": "public" }, "description": "TODO: Add description", - "keywords": [ - "trymeka" - ], + "keywords": ["trymeka"], "repository": { "type": "git", "url": "git+https://github.com/trymeka/agent.git", @@ -16,11 +14,7 @@ }, "license": "MIT", "homepage": "https://github.com/trymeka/agent/tree/main/packages/agent#readme", - "files": [ - "dist", - "!dist/**/*.map", - "README.md" - ], + "files": ["dist", "!dist/**/*.map", "README.md"], "exports": { ".": { "typescript": "./src/index.ts", diff --git a/packages/core/src/ai/agent.ts b/packages/core/src/ai/agent.ts index 4046339..6e9f343 100644 --- a/packages/core/src/ai/agent.ts +++ b/packages/core/src/ai/agent.ts @@ -420,8 +420,8 @@ export function createAgent(options: { { type: "text", text: `${task.instructions} - - Here is the current state of the screen:`, + + Starting screenshot:`, }, { type: "image", diff --git a/packages/core/src/ai/prompts/system.ts b/packages/core/src/ai/prompts/system.ts index e9a1b11..14b919c 100644 --- a/packages/core/src/ai/prompts/system.ts +++ b/packages/core/src/ai/prompts/system.ts @@ -53,7 +53,7 @@ IMPORTANT: You can interact with the ENTIRE computer screen, not just the browse 5. **Handle Obstacles Efficiently**: If you encounter obstacles that prevent completing the user's instructions, address them quickly or inform the user rather than continuing unsuccessfully. -6. **Context Awareness**: You have access to the most recent 7 steps and conversation history. Screenshots are labeled with step numbers (e.g., "Screenshot at Step 3") so you can track progress and avoid repeating failed actions from earlier steps. +6. **Context Awareness**: You have access to the most recent 7 steps and conversation history. Screenshots are labeled with step numbers (e.g., "Screenshot at Step 3") so you can track progress and avoid repeating failed actions from earlier steps. Always reference the highest step number screenshot to understand the current page state. 7. **Be exhaustive in your analysis and execution**: Think carefully about your approach, and remember that pages may require scrolling to see all elements. Something important that you are looking for may be hidden out of view and you should scroll to find it. diff --git a/packages/core/src/tools/computer.ts b/packages/core/src/tools/computer.ts index c61d7bd..233fb27 100644 --- a/packages/core/src/tools/computer.ts +++ b/packages/core/src/tools/computer.ts @@ -365,7 +365,7 @@ export function createComputerTool({ execute: async (args, context) => { const result = await computerProvider.performAction(args.action, context); - // Smart delay with network idle support + // Smart delay with network idle support for navigation actions only if (args.action.type === "click" || args.action.type === "double_click") { try { const instance = await computerProvider.getInstance( @@ -374,23 +374,14 @@ export function createComputerTool({ const page = (instance as { page?: Page })?.page; if (page?.waitForLoadState) { - await page.waitForLoadState("networkidle", { timeout: 1500 }); - } else { - await new Promise((resolve) => setTimeout(resolve, 1500)); + await page.waitForLoadState("networkidle", { timeout: 1000 }); } + // No fallback delay - if network idle doesn't work, proceed immediately } catch { - await new Promise((resolve) => setTimeout(resolve, 1500)); + // No fallback delay on error - proceed immediately } - } else { - // Other action types get fixed delays - const delay = - args.action.type === "type" - ? 300 - : args.action.type === "scroll" - ? 800 - : 500; - await new Promise((resolve) => setTimeout(resolve, delay)); } + // No delays for other action types - proceed immediately const screenshot = await computerProvider.takeScreenshot( context.sessionId, @@ -415,7 +406,7 @@ export function createComputerTool({ }, { type: "text" as const, - text: `Computer action on ${result.timestamp}, result: ${result.actionPerformed}. Reasoning: ${result.reasoning} Screenshot as attached.`, + text: `Computer action completed: ${result.actionPerformed}. Step ${context.step} screenshot attached.`, }, { type: "image" as const, diff --git a/packages/core/src/tools/wait.ts b/packages/core/src/tools/wait.ts index 86fc3af..c6ba07e 100644 --- a/packages/core/src/tools/wait.ts +++ b/packages/core/src/tools/wait.ts @@ -40,7 +40,7 @@ export function createWaitTool({ content: [ { type: "text" as const, - text: `Waited for ${args.duration} seconds. Reason: ${args.reasoning}. Screenshot as attached.`, + text: `Waited for ${args.duration} seconds. Reason: ${args.reasoning}. Screenshot at Step ${context.step} as attached.`, }, { type: "image" as const,