diff --git a/bin/agent-browser b/bin/agent-browser index e9239fe0..25b75376 100755 Binary files a/bin/agent-browser and b/bin/agent-browser differ diff --git a/cli/src/flags.rs b/cli/src/flags.rs index 713559ac..3caa57d2 100644 --- a/cli/src/flags.rs +++ b/cli/src/flags.rs @@ -9,6 +9,8 @@ pub struct Flags { pub headers: Option, pub executable_path: Option, pub cdp: Option, + pub ws: Option, + pub browser: Option, pub extensions: Vec, pub profile: Option, pub proxy: Option, @@ -33,6 +35,8 @@ pub fn parse_flags(args: &[String]) -> Flags { headers: None, executable_path: env::var("AGENT_BROWSER_EXECUTABLE_PATH").ok(), cdp: None, + ws: env::var("AGENT_BROWSER_WS").ok(), + browser: env::var("AGENT_BROWSER_BROWSER").ok(), extensions: extensions_env, profile: env::var("AGENT_BROWSER_PROFILE").ok(), proxy: env::var("AGENT_BROWSER_PROXY").ok(), @@ -79,6 +83,18 @@ pub fn parse_flags(args: &[String]) -> Flags { i += 1; } } + "--ws" => { + if let Some(s) = args.get(i + 1) { + flags.ws = Some(s.clone()); + i += 1; + } + } + "-b" | "--browser" => { + if let Some(s) = args.get(i + 1) { + flags.browser = Some(s.clone()); + i += 1; + } + } "--profile" => { if let Some(s) = args.get(i + 1) { flags.profile = Some(s.clone()); @@ -134,6 +150,9 @@ pub fn clean_args(args: &[String]) -> Vec { "--headers", "--executable-path", "--cdp", + "--ws", + "-b", + "--browser", "--extension", "--profile", "--proxy", diff --git a/cli/src/main.rs b/cli/src/main.rs index 84d5d5d4..5bc37aa8 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -248,6 +248,26 @@ fn main() { exit(1); } + if flags.ws.is_some() && flags.cdp.is_some() { + let msg = "Cannot use --ws and --cdp together"; + if flags.json { + println!(r#"{{"success":false,"error":"{}"}}"#, msg); + } else { + eprintln!("\x1b[31m✗\x1b[0m {}", msg); + } + exit(1); + } + + if flags.ws.is_some() && flags.provider.is_some() { + let msg = "Cannot use --ws and -p/--provider together"; + if flags.json { + println!(r#"{{"success":false,"error":"{}"}}"#, msg); + } else { + eprintln!("\x1b[31m✗\x1b[0m {}", msg); + } + exit(1); + } + if flags.provider.is_some() && !flags.extensions.is_empty() { let msg = "Cannot use --extension with -p/--provider (extensions require local browser)"; if flags.json { @@ -336,6 +356,56 @@ fn main() { } } + // Connect via Playwright WebSocket if --ws flag is set + // Used to connect to browsers like Camoufox (Firefox) that don't support CDP + if let Some(ref ws_value) = flags.ws { + if !ws_value.starts_with("ws://") && !ws_value.starts_with("wss://") { + let msg = "Invalid WebSocket URL: must start with ws:// or wss://"; + if flags.json { + println!(r#"{{"success":false,"error":"{}"}}"#, msg); + } else { + eprintln!("{} {}", color::error_indicator(), msg); + } + exit(1); + } + + let browser_type = flags.browser.as_deref().unwrap_or("chromium"); + if browser_type != "chromium" && browser_type != "firefox" && browser_type != "webkit" { + let msg = format!("Invalid browser type: '{}'. Must be chromium, firefox, or webkit", browser_type); + if flags.json { + println!(r#"{{"success":false,"error":"{}"}}"#, msg); + } else { + eprintln!("{} {}", color::error_indicator(), msg); + } + exit(1); + } + + let launch_cmd = json!({ + "id": gen_id(), + "action": "launch", + "wsEndpoint": ws_value, + "browser": browser_type + }); + + let err = match send_command(launch_cmd, &flags.session) { + Ok(resp) if resp.success => None, + Ok(resp) => Some( + resp.error + .unwrap_or_else(|| "WebSocket connection failed".to_string()), + ), + Err(e) => Some(e.to_string()), + }; + + if let Some(msg) = err { + if flags.json { + println!(r#"{{"success":false,"error":"{}"}}"#, msg); + } else { + eprintln!("{} {}", color::error_indicator(), msg); + } + exit(1); + } + } + // Launch with cloud provider if -p flag is set if let Some(ref provider) = flags.provider { let launch_cmd = json!({ @@ -360,8 +430,8 @@ fn main() { } } - // Launch headed browser or configure browser options (without CDP or provider) - if (flags.headed || flags.profile.is_some() || flags.proxy.is_some() || flags.args.is_some() || flags.user_agent.is_some()) && flags.cdp.is_none() && flags.provider.is_none() { + // Launch headed browser or configure browser options (without CDP, WS, or provider) + if (flags.headed || flags.profile.is_some() || flags.proxy.is_some() || flags.args.is_some() || flags.user_agent.is_some()) && flags.cdp.is_none() && flags.ws.is_none() && flags.provider.is_none() { let mut launch_cmd = json!({ "id": gen_id(), "action": "launch", diff --git a/cli/src/output.rs b/cli/src/output.rs index 12c157d6..3a402e32 100644 --- a/cli/src/output.rs +++ b/cli/src/output.rs @@ -1539,6 +1539,10 @@ Options: --full, -f Full page screenshot --headed Show browser window (not headless) --cdp Connect via CDP (Chrome DevTools Protocol) + --ws Connect via Playwright WebSocket (for Firefox/Camoufox) + e.g., --ws "ws://localhost:9222/browser" -b firefox + -b, --browser Browser type: chromium, firefox, webkit (default: chromium) + Used with --ws to specify browser type for WebSocket connection --debug Debug output --version, -V Show version @@ -1546,6 +1550,8 @@ Environment: AGENT_BROWSER_SESSION Session name (default: "default") AGENT_BROWSER_EXECUTABLE_PATH Custom browser executable path AGENT_BROWSER_STREAM_PORT Enable WebSocket streaming on port (e.g., 9223) + AGENT_BROWSER_WS Default WebSocket endpoint for connection + AGENT_BROWSER_BROWSER Default browser type (chromium, firefox, webkit) Examples: agent-browser open example.com @@ -1556,6 +1562,7 @@ Examples: agent-browser get text @e1 agent-browser screenshot --full agent-browser --cdp 9222 snapshot # Connect via CDP port + agent-browser --ws ws://localhost:9222/browser -b firefox snapshot # Connect to Camoufox agent-browser --profile ~/.myapp open example.com # Persistent profile "# ); diff --git a/package.json b/package.json index 911bf850..63afa2f2 100644 --- a/package.json +++ b/package.json @@ -52,7 +52,7 @@ }, "homepage": "https://github.com/vercel-labs/agent-browser#readme", "dependencies": { - "playwright-core": "^1.57.0", + "playwright-core": "^1.58.0", "ws": "^8.19.0", "zod": "^3.22.4" }, @@ -61,7 +61,7 @@ "@types/ws": "^8.18.1", "husky": "^9.1.7", "lint-staged": "^15.2.11", - "playwright": "^1.57.0", + "playwright": "^1.58.0", "prettier": "^3.7.4", "tsx": "^4.6.0", "typescript": "^5.3.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 77dc0725..9af106a3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,7 +9,7 @@ importers: .: dependencies: playwright-core: - specifier: ^1.57.0 + specifier: 1.57.0 version: 1.57.0 ws: specifier: ^8.19.0 @@ -31,8 +31,8 @@ importers: specifier: ^15.2.11 version: 15.5.2 playwright: - specifier: ^1.57.0 - version: 1.57.0 + specifier: ^1.58.0 + version: 1.58.0 prettier: specifier: ^3.7.4 version: 3.7.4 @@ -616,8 +616,13 @@ packages: engines: {node: '>=18'} hasBin: true - playwright@1.57.0: - resolution: {integrity: sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==} + playwright-core@1.58.0: + resolution: {integrity: sha512-aaoB1RWrdNi3//rOeKuMiS65UCcgOVljU46At6eFcOFPFHWtd2weHRRow6z/n+Lec0Lvu0k9ZPKJSjPugikirw==} + engines: {node: '>=18'} + hasBin: true + + playwright@1.58.0: + resolution: {integrity: sha512-2SVA0sbPktiIY/MCOPX8e86ehA/e+tDNq+e5Y8qjKYti2Z/JG7xnronT/TXTIkKbYGWlCbuucZ6dziEgkoEjQQ==} engines: {node: '>=18'} hasBin: true @@ -1266,9 +1271,11 @@ snapshots: playwright-core@1.57.0: {} - playwright@1.57.0: + playwright-core@1.58.0: {} + + playwright@1.58.0: dependencies: - playwright-core: 1.57.0 + playwright-core: 1.58.0 optionalDependencies: fsevents: 2.3.2 diff --git a/skills/agent-browser/SKILL.md b/skills/agent-browser/SKILL.md index 3028a834..309464e9 100644 --- a/skills/agent-browser/SKILL.md +++ b/skills/agent-browser/SKILL.md @@ -69,6 +69,7 @@ agent-browser scroll down 500 # Scroll page (default: down 300px) agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto) agent-browser drag @e1 @e2 # Drag and drop agent-browser upload @e1 file.pdf # Upload files +agent-browser download @e1 ./out # Download file by clicking element ``` ### Get information @@ -221,12 +222,18 @@ agent-browser eval "document.title" # Run JavaScript ```bash agent-browser --session ... # Isolated browser session +agent-browser --profile ... # Persistent browser profile directory agent-browser --json ... # JSON output for parsing agent-browser --headed ... # Show browser window (not headless) agent-browser --full ... # Full page screenshot (-f) agent-browser --cdp ... # Connect via Chrome DevTools Protocol +agent-browser --ws ... # Connect via Playwright WebSocket (for Firefox/Camoufox) +agent-browser -b ... # Browser type: chromium, firefox, webkit (default: chromium) agent-browser --proxy ... # Use proxy server +agent-browser --proxy-bypass # Bypass proxy for these hosts agent-browser --headers ... # HTTP headers scoped to URL's origin +agent-browser --user-agent ... # Custom User-Agent string +agent-browser --args ... # Browser launch args (comma separated) agent-browser --executable-path

# Custom browser executable agent-browser --extension ... # Load browser extension (repeatable) agent-browser --help # Show help (-h) @@ -246,10 +253,16 @@ agent-browser --proxy socks5://proxy.com:1080 open example.com ```bash AGENT_BROWSER_SESSION="mysession" # Default session name +AGENT_BROWSER_PROFILE="/path/to/profile" # Persistent browser profile AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port -AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daemon.js) +AGENT_BROWSER_WS="ws://localhost:9222" # Default WebSocket endpoint +AGENT_BROWSER_BROWSER="chromium" # Default browser type (chromium, firefox, webkit) +AGENT_BROWSER_USER_AGENT="..." # Custom User-Agent string +AGENT_BROWSER_ARGS="--no-sandbox,..." # Browser launch arguments +AGENT_BROWSER_PROXY="http://proxy:8080" # Proxy server URL +AGENT_BROWSER_PROXY_BYPASS="localhost,*.local" # Bypass proxy for hosts ``` ## Example: Form submission @@ -317,6 +330,19 @@ agent-browser record start ./debug.webm # Record video from current page agent-browser record stop # Save recording ``` +## Connect to Camoufox (Firefox) + +```bash +# Connect via WebSocket to a running Camoufox instance +agent-browser --ws ws://localhost:9222/browser -b firefox open example.com +agent-browser --ws ws://localhost:9222/browser -b firefox snapshot -i + +# Or set environment variables +export AGENT_BROWSER_WS="ws://localhost:9222/browser" +export AGENT_BROWSER_BROWSER="firefox" +agent-browser open example.com +``` + ## Deep-dive documentation For detailed patterns and best practices, see: diff --git a/src/browser.ts b/src/browser.ts index 04cfa66a..cd6046e1 100644 --- a/src/browser.ts +++ b/src/browser.ts @@ -817,6 +817,7 @@ export class BrowserManager { async launch(options: LaunchCommand): Promise { // Determine CDP endpoint: prefer cdpUrl over cdpPort for flexibility const cdpEndpoint = options.cdpUrl ?? (options.cdpPort ? String(options.cdpPort) : undefined); + const wsEndpoint = options.wsEndpoint; const hasExtensions = !!options.extensions?.length; const hasProfile = !!options.profile; @@ -828,10 +829,19 @@ export class BrowserManager { throw new Error('Profile cannot be used with CDP connection'); } + if (hasExtensions && wsEndpoint) { + throw new Error('Extensions cannot be used with WebSocket connection'); + } + + if (hasProfile && wsEndpoint) { + throw new Error('Profile cannot be used with WebSocket connection'); + } + if (this.isLaunched()) { const needsRelaunch = - (!cdpEndpoint && this.cdpEndpoint !== null) || - (!!cdpEndpoint && this.needsCdpReconnect(cdpEndpoint)); + (!cdpEndpoint && !wsEndpoint && this.cdpEndpoint !== null) || + (!!cdpEndpoint && this.needsCdpReconnect(cdpEndpoint)) || + (!!wsEndpoint && this.cdpEndpoint !== wsEndpoint); if (needsRelaunch) { await this.close(); } else { @@ -839,6 +849,12 @@ export class BrowserManager { } } + // Connect via WebSocket endpoint (for Firefox-based browsers like Camoufox) + if (wsEndpoint) { + await this.connectViaWebSocket(wsEndpoint, options.browser ?? 'firefox'); + return; + } + if (cdpEndpoint) { await this.connectViaCDP(cdpEndpoint); return; @@ -997,6 +1013,67 @@ export class BrowserManager { } } + /** + * Connect to a running browser via Playwright WebSocket endpoint + * Used for Firefox-based browsers like Camoufox that don't support CDP + * @param wsEndpoint The WebSocket endpoint URL (ws:// or wss://) + * @param browserType The browser type to use for connection + */ + private async connectViaWebSocket( + wsEndpoint: string, + browserType: 'chromium' | 'firefox' | 'webkit' = 'firefox' + ): Promise { + if (!wsEndpoint) { + throw new Error('WebSocket endpoint is required for WebSocket connection'); + } + + if (!wsEndpoint.startsWith('ws://') && !wsEndpoint.startsWith('wss://')) { + throw new Error('WebSocket endpoint must start with ws:// or wss://'); + } + + const launcher = + browserType === 'firefox' ? firefox : browserType === 'webkit' ? webkit : chromium; + + const browser = await launcher.connect(wsEndpoint).catch(() => { + throw new Error( + `Failed to connect via WebSocket to ${wsEndpoint}. ` + + 'Make sure the browser server is running and accessible.' + ); + }); + + // Validate and set up state, cleaning up browser connection if anything fails + try { + const contexts = browser.contexts(); + let context: BrowserContext; + let page: Page; + + if (contexts.length === 0) { + // Create a new context if none exists + context = await browser.newContext(); + page = await context.newPage(); + } else { + context = contexts[0]; + const pages = context.pages(); + page = pages[0] ?? (await context.newPage()); + } + + // All validation passed - commit state + this.browser = browser; + this.cdpEndpoint = wsEndpoint; // Store WebSocket endpoint in cdpEndpoint for reconnect detection + + context.setDefaultTimeout(60000); + this.contexts.push(context); + this.pages.push(page); + this.activePageIndex = 0; + this.setupPageTracking(page); + this.setupContextTracking(context); + } catch (error) { + // Clean up browser connection if validation or setup failed + await browser.close().catch(() => {}); + throw error; + } + } + /** * Set up console, error, and close tracking for a page */ diff --git a/src/protocol.ts b/src/protocol.ts index 4e13e2c8..b16245ce 100644 --- a/src/protocol.ts +++ b/src/protocol.ts @@ -31,6 +31,13 @@ const launchSchema = baseCommandSchema.extend({ { message: 'CDP URL must start with ws://, wss://, http://, or https://' } ) .optional(), + wsEndpoint: z + .string() + .url() + .refine((url) => url.startsWith('ws://') || url.startsWith('wss://'), { + message: 'WebSocket endpoint must start with ws:// or wss://', + }) + .optional(), executablePath: z.string().optional(), extensions: z.array(z.string()).optional(), headers: z.record(z.string()).optional(), diff --git a/src/types.ts b/src/types.ts index 6fdcbb9c..f922b899 100644 --- a/src/types.ts +++ b/src/types.ts @@ -16,6 +16,7 @@ export interface LaunchCommand extends BaseCommand { executablePath?: string; cdpPort?: number; cdpUrl?: string; + wsEndpoint?: string; // WebSocket endpoint for connecting to browsers like Camoufox (Firefox) extensions?: string[]; profile?: string; // Path to persistent browser profile directory proxy?: {