Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified bin/agent-browser
Binary file not shown.
19 changes: 19 additions & 0 deletions cli/src/flags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ pub struct Flags {
pub headers: Option<String>,
pub executable_path: Option<String>,
pub cdp: Option<String>,
pub ws: Option<String>,
pub browser: Option<String>,
pub extensions: Vec<String>,
pub profile: Option<String>,
pub proxy: Option<String>,
Expand All @@ -33,6 +35,8 @@ pub fn parse_flags(args: &[String]) -> Flags {
headers: None,
executable_path: env::var("AGENT_BROWSER_EXECUTABLE_PATH").ok(),
cdp: None,
ws: env::var("AGENT_BROWSER_WS").ok(),
browser: env::var("AGENT_BROWSER_BROWSER").ok(),
extensions: extensions_env,
profile: env::var("AGENT_BROWSER_PROFILE").ok(),
proxy: env::var("AGENT_BROWSER_PROXY").ok(),
Expand Down Expand Up @@ -79,6 +83,18 @@ pub fn parse_flags(args: &[String]) -> Flags {
i += 1;
}
}
"--ws" => {
if let Some(s) = args.get(i + 1) {
flags.ws = Some(s.clone());
i += 1;
}
}
"-b" | "--browser" => {
if let Some(s) = args.get(i + 1) {
flags.browser = Some(s.clone());
i += 1;
}
}
"--profile" => {
if let Some(s) = args.get(i + 1) {
flags.profile = Some(s.clone());
Expand Down Expand Up @@ -134,6 +150,9 @@ pub fn clean_args(args: &[String]) -> Vec<String> {
"--headers",
"--executable-path",
"--cdp",
"--ws",
"-b",
"--browser",
"--extension",
"--profile",
"--proxy",
Expand Down
74 changes: 72 additions & 2 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,26 @@ fn main() {
exit(1);
}

if flags.ws.is_some() && flags.cdp.is_some() {
let msg = "Cannot use --ws and --cdp together";
if flags.json {
println!(r#"{{"success":false,"error":"{}"}}"#, msg);
} else {
eprintln!("\x1b[31m✗\x1b[0m {}", msg);
}
exit(1);
}

if flags.ws.is_some() && flags.provider.is_some() {
let msg = "Cannot use --ws and -p/--provider together";
if flags.json {
println!(r#"{{"success":false,"error":"{}"}}"#, msg);
} else {
eprintln!("\x1b[31m✗\x1b[0m {}", msg);
}
exit(1);
}

if flags.provider.is_some() && !flags.extensions.is_empty() {
let msg = "Cannot use --extension with -p/--provider (extensions require local browser)";
if flags.json {
Expand Down Expand Up @@ -336,6 +356,56 @@ fn main() {
}
}

// Connect via Playwright WebSocket if --ws flag is set
// Used to connect to browsers like Camoufox (Firefox) that don't support CDP
if let Some(ref ws_value) = flags.ws {
if !ws_value.starts_with("ws://") && !ws_value.starts_with("wss://") {
let msg = "Invalid WebSocket URL: must start with ws:// or wss://";
if flags.json {
println!(r#"{{"success":false,"error":"{}"}}"#, msg);
} else {
eprintln!("{} {}", color::error_indicator(), msg);
}
exit(1);
}

let browser_type = flags.browser.as_deref().unwrap_or("chromium");
if browser_type != "chromium" && browser_type != "firefox" && browser_type != "webkit" {
let msg = format!("Invalid browser type: '{}'. Must be chromium, firefox, or webkit", browser_type);
if flags.json {
println!(r#"{{"success":false,"error":"{}"}}"#, msg);
} else {
eprintln!("{} {}", color::error_indicator(), msg);
}
exit(1);
}

let launch_cmd = json!({
"id": gen_id(),
"action": "launch",
"wsEndpoint": ws_value,
"browser": browser_type
});

let err = match send_command(launch_cmd, &flags.session) {
Ok(resp) if resp.success => None,
Ok(resp) => Some(
resp.error
.unwrap_or_else(|| "WebSocket connection failed".to_string()),
),
Err(e) => Some(e.to_string()),
};

if let Some(msg) = err {
if flags.json {
println!(r#"{{"success":false,"error":"{}"}}"#, msg);
} else {
eprintln!("{} {}", color::error_indicator(), msg);
}
exit(1);
}
}

// Launch with cloud provider if -p flag is set
if let Some(ref provider) = flags.provider {
let launch_cmd = json!({
Expand All @@ -360,8 +430,8 @@ fn main() {
}
}

// Launch headed browser or configure browser options (without CDP or provider)
if (flags.headed || flags.profile.is_some() || flags.proxy.is_some() || flags.args.is_some() || flags.user_agent.is_some()) && flags.cdp.is_none() && flags.provider.is_none() {
// Launch headed browser or configure browser options (without CDP, WS, or provider)
if (flags.headed || flags.profile.is_some() || flags.proxy.is_some() || flags.args.is_some() || flags.user_agent.is_some()) && flags.cdp.is_none() && flags.ws.is_none() && flags.provider.is_none() {
let mut launch_cmd = json!({
"id": gen_id(),
"action": "launch",
Expand Down
7 changes: 7 additions & 0 deletions cli/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1539,13 +1539,19 @@ Options:
--full, -f Full page screenshot
--headed Show browser window (not headless)
--cdp <port> Connect via CDP (Chrome DevTools Protocol)
--ws <url> Connect via Playwright WebSocket (for Firefox/Camoufox)
e.g., --ws "ws://localhost:9222/browser" -b firefox
-b, --browser <type> Browser type: chromium, firefox, webkit (default: chromium)
Used with --ws to specify browser type for WebSocket connection
--debug Debug output
--version, -V Show version

Environment:
AGENT_BROWSER_SESSION Session name (default: "default")
AGENT_BROWSER_EXECUTABLE_PATH Custom browser executable path
AGENT_BROWSER_STREAM_PORT Enable WebSocket streaming on port (e.g., 9223)
AGENT_BROWSER_WS Default WebSocket endpoint for connection
AGENT_BROWSER_BROWSER Default browser type (chromium, firefox, webkit)

Examples:
agent-browser open example.com
Expand All @@ -1556,6 +1562,7 @@ Examples:
agent-browser get text @e1
agent-browser screenshot --full
agent-browser --cdp 9222 snapshot # Connect via CDP port
agent-browser --ws ws://localhost:9222/browser -b firefox snapshot # Connect to Camoufox
agent-browser --profile ~/.myapp open example.com # Persistent profile
"#
);
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
},
"homepage": "https://github.com/vercel-labs/agent-browser#readme",
"dependencies": {
"playwright-core": "^1.57.0",
"playwright-core": "^1.58.0",
"ws": "^8.19.0",
"zod": "^3.22.4"
},
Expand All @@ -61,7 +61,7 @@
"@types/ws": "^8.18.1",
"husky": "^9.1.7",
"lint-staged": "^15.2.11",
"playwright": "^1.57.0",
"playwright": "^1.58.0",
"prettier": "^3.7.4",
"tsx": "^4.6.0",
"typescript": "^5.3.0",
Expand Down
21 changes: 14 additions & 7 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 27 additions & 1 deletion skills/agent-browser/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ agent-browser scroll down 500 # Scroll page (default: down 300px)
agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto)
agent-browser drag @e1 @e2 # Drag and drop
agent-browser upload @e1 file.pdf # Upload files
agent-browser download @e1 ./out # Download file by clicking element
```

### Get information
Expand Down Expand Up @@ -221,12 +222,18 @@ agent-browser eval "document.title" # Run JavaScript

```bash
agent-browser --session <name> ... # Isolated browser session
agent-browser --profile <path> ... # Persistent browser profile directory
agent-browser --json ... # JSON output for parsing
agent-browser --headed ... # Show browser window (not headless)
agent-browser --full ... # Full page screenshot (-f)
agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol
agent-browser --ws <url> ... # Connect via Playwright WebSocket (for Firefox/Camoufox)
agent-browser -b <type> ... # Browser type: chromium, firefox, webkit (default: chromium)
agent-browser --proxy <url> ... # Use proxy server
agent-browser --proxy-bypass <hosts> # Bypass proxy for these hosts
agent-browser --headers <json> ... # HTTP headers scoped to URL's origin
agent-browser --user-agent <ua> ... # Custom User-Agent string
agent-browser --args <args> ... # Browser launch args (comma separated)
agent-browser --executable-path <p> # Custom browser executable
agent-browser --extension <path> ... # Load browser extension (repeatable)
agent-browser --help # Show help (-h)
Expand All @@ -246,10 +253,16 @@ agent-browser --proxy socks5://proxy.com:1080 open example.com

```bash
AGENT_BROWSER_SESSION="mysession" # Default session name
AGENT_BROWSER_PROFILE="/path/to/profile" # Persistent browser profile
AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path
AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths
AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port
AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daemon.js)
AGENT_BROWSER_WS="ws://localhost:9222" # Default WebSocket endpoint
AGENT_BROWSER_BROWSER="chromium" # Default browser type (chromium, firefox, webkit)
AGENT_BROWSER_USER_AGENT="..." # Custom User-Agent string
AGENT_BROWSER_ARGS="--no-sandbox,..." # Browser launch arguments
AGENT_BROWSER_PROXY="http://proxy:8080" # Proxy server URL
AGENT_BROWSER_PROXY_BYPASS="localhost,*.local" # Bypass proxy for hosts
```

## Example: Form submission
Expand Down Expand Up @@ -317,6 +330,19 @@ agent-browser record start ./debug.webm # Record video from current page
agent-browser record stop # Save recording
```

## Connect to Camoufox (Firefox)

```bash
# Connect via WebSocket to a running Camoufox instance
agent-browser --ws ws://localhost:9222/browser -b firefox open example.com
agent-browser --ws ws://localhost:9222/browser -b firefox snapshot -i

# Or set environment variables
export AGENT_BROWSER_WS="ws://localhost:9222/browser"
export AGENT_BROWSER_BROWSER="firefox"
agent-browser open example.com
```

## Deep-dive documentation

For detailed patterns and best practices, see:
Expand Down
Loading