From c68cf7ea1504eb9c4acaa9948050614cad619ecc Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 6 Feb 2026 03:45:34 -0600 Subject: [PATCH] chore: use npx to invoke agent-browser in skill docs and templates --- skills/agent-browser/SKILL.md | 320 +++++++++--------- .../references/authentication.md | 110 +++--- .../agent-browser/references/proxy-support.md | 38 +-- .../references/session-management.md | 80 ++--- .../agent-browser/references/snapshot-refs.md | 56 +-- .../references/video-recording.md | 84 ++--- .../templates/authenticated-session.sh | 42 +-- .../templates/capture-workflow.sh | 34 +- .../templates/form-automation.sh | 40 +-- 9 files changed, 402 insertions(+), 402 deletions(-) diff --git a/skills/agent-browser/SKILL.md b/skills/agent-browser/SKILL.md index ab3ea3c6..d65861d8 100644 --- a/skills/agent-browser/SKILL.md +++ b/skills/agent-browser/SKILL.md @@ -1,7 +1,7 @@ --- name: agent-browser description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages. -allowed-tools: Bash(agent-browser:*) +allowed-tools: Bash(npx agent-browser:*) --- # Browser Automation with agent-browser @@ -9,17 +9,17 @@ allowed-tools: Bash(agent-browser:*) ## Quick start ```bash -agent-browser open # Navigate to page -agent-browser snapshot -i # Get interactive elements with refs -agent-browser click @e1 # Click element by ref -agent-browser fill @e2 "text" # Fill input by ref -agent-browser close # Close browser +npx agent-browser open # Navigate to page +npx agent-browser snapshot -i # Get interactive elements with refs +npx agent-browser click @e1 # Click element by ref +npx agent-browser fill @e2 "text" # Fill input by ref +npx agent-browser close # Close browser ``` ## Core workflow -1. Navigate: `agent-browser open ` -2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`) +1. Navigate: `npx agent-browser open ` +2. Snapshot: `npx agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`) 3. Interact using refs from the snapshot 4. Re-snapshot after navigation or significant DOM changes @@ -28,87 +28,87 @@ agent-browser close # Close browser ### Navigation ```bash -agent-browser open # Navigate to URL (aliases: goto, navigate) +npx agent-browser open # Navigate to URL (aliases: goto, navigate) # Supports: https://, http://, file://, about:, data:// # Auto-prepends https:// if no protocol given -agent-browser back # Go back -agent-browser forward # Go forward -agent-browser reload # Reload page -agent-browser close # Close browser (aliases: quit, exit) -agent-browser connect 9222 # Connect to browser via CDP port +npx agent-browser back # Go back +npx agent-browser forward # Go forward +npx agent-browser reload # Reload page +npx agent-browser close # Close browser (aliases: quit, exit) +npx agent-browser connect 9222 # Connect to browser via CDP port ``` ### Snapshot (page analysis) ```bash -agent-browser snapshot # Full accessibility tree -agent-browser snapshot -i # Interactive elements only (recommended) -agent-browser snapshot -c # Compact output -agent-browser snapshot -d 3 # Limit depth to 3 -agent-browser snapshot -s "#main" # Scope to CSS selector +npx agent-browser snapshot # Full accessibility tree +npx agent-browser snapshot -i # Interactive elements only (recommended) +npx agent-browser snapshot -c # Compact output +npx agent-browser snapshot -d 3 # Limit depth to 3 +npx agent-browser snapshot -s "#main" # Scope to CSS selector ``` ### Interactions (use @refs from snapshot) ```bash -agent-browser click @e1 # Click -agent-browser dblclick @e1 # Double-click -agent-browser focus @e1 # Focus element -agent-browser fill @e2 "text" # Clear and type -agent-browser type @e2 "text" # Type without clearing -agent-browser press Enter # Press key (alias: key) -agent-browser press Control+a # Key combination -agent-browser keydown Shift # Hold key down -agent-browser keyup Shift # Release key -agent-browser hover @e1 # Hover -agent-browser check @e1 # Check checkbox -agent-browser uncheck @e1 # Uncheck checkbox -agent-browser select @e1 "value" # Select dropdown option -agent-browser select @e1 "a" "b" # Select multiple options -agent-browser scroll down 500 # Scroll page (default: down 300px) -agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto) -agent-browser drag @e1 @e2 # Drag and drop -agent-browser upload @e1 file.pdf # Upload files +npx agent-browser click @e1 # Click +npx agent-browser dblclick @e1 # Double-click +npx agent-browser focus @e1 # Focus element +npx agent-browser fill @e2 "text" # Clear and type +npx agent-browser type @e2 "text" # Type without clearing +npx agent-browser press Enter # Press key (alias: key) +npx agent-browser press Control+a # Key combination +npx agent-browser keydown Shift # Hold key down +npx agent-browser keyup Shift # Release key +npx agent-browser hover @e1 # Hover +npx agent-browser check @e1 # Check checkbox +npx agent-browser uncheck @e1 # Uncheck checkbox +npx agent-browser select @e1 "value" # Select dropdown option +npx agent-browser select @e1 "a" "b" # Select multiple options +npx agent-browser scroll down 500 # Scroll page (default: down 300px) +npx agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto) +npx agent-browser drag @e1 @e2 # Drag and drop +npx agent-browser upload @e1 file.pdf # Upload files ``` ### Get information ```bash -agent-browser get text @e1 # Get element text -agent-browser get html @e1 # Get innerHTML -agent-browser get value @e1 # Get input value -agent-browser get attr @e1 href # Get attribute -agent-browser get title # Get page title -agent-browser get url # Get current URL -agent-browser get count ".item" # Count matching elements -agent-browser get box @e1 # Get bounding box -agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.) +npx agent-browser get text @e1 # Get element text +npx agent-browser get html @e1 # Get innerHTML +npx agent-browser get value @e1 # Get input value +npx agent-browser get attr @e1 href # Get attribute +npx agent-browser get title # Get page title +npx agent-browser get url # Get current URL +npx agent-browser get count ".item" # Count matching elements +npx agent-browser get box @e1 # Get bounding box +npx agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.) ``` ### Check state ```bash -agent-browser is visible @e1 # Check if visible -agent-browser is enabled @e1 # Check if enabled -agent-browser is checked @e1 # Check if checked +npx agent-browser is visible @e1 # Check if visible +npx agent-browser is enabled @e1 # Check if enabled +npx agent-browser is checked @e1 # Check if checked ``` ### Screenshots & PDF ```bash -agent-browser screenshot # Save to a temporary directory -agent-browser screenshot path.png # Save to a specific path -agent-browser screenshot --full # Full page -agent-browser pdf output.pdf # Save as PDF +npx agent-browser screenshot # Save to a temporary directory +npx agent-browser screenshot path.png # Save to a specific path +npx agent-browser screenshot --full # Full page +npx agent-browser pdf output.pdf # Save as PDF ``` ### Video recording ```bash -agent-browser record start ./demo.webm # Start recording (uses current URL + state) -agent-browser click @e1 # Perform actions -agent-browser record stop # Stop and save video -agent-browser record restart ./take2.webm # Stop current + start new recording +npx agent-browser record start ./demo.webm # Start recording (uses current URL + state) +npx agent-browser click @e1 # Perform actions +npx agent-browser record stop # Stop and save video +npx agent-browser record restart ./take2.webm # Stop current + start new recording ``` Recording creates a fresh context but preserves cookies/storage from your session. If no URL is provided, it @@ -117,130 +117,130 @@ automatically returns to your current page. For smooth demos, explore first, the ### Wait ```bash -agent-browser wait @e1 # Wait for element -agent-browser wait 2000 # Wait milliseconds -agent-browser wait --text "Success" # Wait for text (or -t) -agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u) -agent-browser wait --load networkidle # Wait for network idle (or -l) -agent-browser wait --fn "window.ready" # Wait for JS condition (or -f) +npx agent-browser wait @e1 # Wait for element +npx agent-browser wait 2000 # Wait milliseconds +npx agent-browser wait --text "Success" # Wait for text (or -t) +npx agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u) +npx agent-browser wait --load networkidle # Wait for network idle (or -l) +npx agent-browser wait --fn "window.ready" # Wait for JS condition (or -f) ``` ### Mouse control ```bash -agent-browser mouse move 100 200 # Move mouse -agent-browser mouse down left # Press button -agent-browser mouse up left # Release button -agent-browser mouse wheel 100 # Scroll wheel +npx agent-browser mouse move 100 200 # Move mouse +npx agent-browser mouse down left # Press button +npx agent-browser mouse up left # Release button +npx agent-browser mouse wheel 100 # Scroll wheel ``` ### Semantic locators (alternative to refs) ```bash -agent-browser find role button click --name "Submit" -agent-browser find text "Sign In" click -agent-browser find text "Sign In" click --exact # Exact match only -agent-browser find label "Email" fill "user@test.com" -agent-browser find placeholder "Search" type "query" -agent-browser find alt "Logo" click -agent-browser find title "Close" click -agent-browser find testid "submit-btn" click -agent-browser find first ".item" click -agent-browser find last ".item" click -agent-browser find nth 2 "a" hover +npx agent-browser find role button click --name "Submit" +npx agent-browser find text "Sign In" click +npx agent-browser find text "Sign In" click --exact # Exact match only +npx agent-browser find label "Email" fill "user@test.com" +npx agent-browser find placeholder "Search" type "query" +npx agent-browser find alt "Logo" click +npx agent-browser find title "Close" click +npx agent-browser find testid "submit-btn" click +npx agent-browser find first ".item" click +npx agent-browser find last ".item" click +npx agent-browser find nth 2 "a" hover ``` ### Browser settings ```bash -agent-browser set viewport 1920 1080 # Set viewport size -agent-browser set device "iPhone 14" # Emulate device -agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation) -agent-browser set offline on # Toggle offline mode -agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers -agent-browser set credentials user pass # HTTP basic auth (alias: auth) -agent-browser set media dark # Emulate color scheme -agent-browser set media light reduced-motion # Light mode + reduced motion +npx agent-browser set viewport 1920 1080 # Set viewport size +npx agent-browser set device "iPhone 14" # Emulate device +npx agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation) +npx agent-browser set offline on # Toggle offline mode +npx agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers +npx agent-browser set credentials user pass # HTTP basic auth (alias: auth) +npx agent-browser set media dark # Emulate color scheme +npx agent-browser set media light reduced-motion # Light mode + reduced motion ``` ### Cookies & Storage ```bash -agent-browser cookies # Get all cookies -agent-browser cookies set name value # Set cookie -agent-browser cookies clear # Clear cookies -agent-browser storage local # Get all localStorage -agent-browser storage local key # Get specific key -agent-browser storage local set k v # Set value -agent-browser storage local clear # Clear all +npx agent-browser cookies # Get all cookies +npx agent-browser cookies set name value # Set cookie +npx agent-browser cookies clear # Clear cookies +npx agent-browser storage local # Get all localStorage +npx agent-browser storage local key # Get specific key +npx agent-browser storage local set k v # Set value +npx agent-browser storage local clear # Clear all ``` ### Network ```bash -agent-browser network route # Intercept requests -agent-browser network route --abort # Block requests -agent-browser network route --body '{}' # Mock response -agent-browser network unroute [url] # Remove routes -agent-browser network requests # View tracked requests -agent-browser network requests --filter api # Filter requests +npx agent-browser network route # Intercept requests +npx agent-browser network route --abort # Block requests +npx agent-browser network route --body '{}' # Mock response +npx agent-browser network unroute [url] # Remove routes +npx agent-browser network requests # View tracked requests +npx agent-browser network requests --filter api # Filter requests ``` ### Tabs & Windows ```bash -agent-browser tab # List tabs -agent-browser tab new [url] # New tab -agent-browser tab 2 # Switch to tab by index -agent-browser tab close # Close current tab -agent-browser tab close 2 # Close tab by index -agent-browser window new # New window +npx agent-browser tab # List tabs +npx agent-browser tab new [url] # New tab +npx agent-browser tab 2 # Switch to tab by index +npx agent-browser tab close # Close current tab +npx agent-browser tab close 2 # Close tab by index +npx agent-browser window new # New window ``` ### Frames ```bash -agent-browser frame "#iframe" # Switch to iframe -agent-browser frame main # Back to main frame +npx agent-browser frame "#iframe" # Switch to iframe +npx agent-browser frame main # Back to main frame ``` ### Dialogs ```bash -agent-browser dialog accept [text] # Accept dialog -agent-browser dialog dismiss # Dismiss dialog +npx agent-browser dialog accept [text] # Accept dialog +npx agent-browser dialog dismiss # Dismiss dialog ``` ### JavaScript ```bash -agent-browser eval "document.title" # Run JavaScript +npx agent-browser eval "document.title" # Run JavaScript ``` ## Global options ```bash -agent-browser --session ... # Isolated browser session -agent-browser --json ... # JSON output for parsing -agent-browser --headed ... # Show browser window (not headless) -agent-browser --full ... # Full page screenshot (-f) -agent-browser --cdp ... # Connect via Chrome DevTools Protocol -agent-browser -p ... # Cloud browser provider (--provider) -agent-browser --proxy ... # Use proxy server -agent-browser --headers ... # HTTP headers scoped to URL's origin -agent-browser --executable-path

# Custom browser executable -agent-browser --extension ... # Load browser extension (repeatable) -agent-browser --help # Show help (-h) -agent-browser --version # Show version (-V) -agent-browser --help # Show detailed help for a command +npx agent-browser --session ... # Isolated browser session +npx agent-browser --json ... # JSON output for parsing +npx agent-browser --headed ... # Show browser window (not headless) +npx agent-browser --full ... # Full page screenshot (-f) +npx agent-browser --cdp ... # Connect via Chrome DevTools Protocol +npx agent-browser -p ... # Cloud browser provider (--provider) +npx agent-browser --proxy ... # Use proxy server +npx agent-browser --headers ... # HTTP headers scoped to URL's origin +npx agent-browser --executable-path

# Custom browser executable +npx agent-browser --extension ... # Load browser extension (repeatable) +npx agent-browser --help # Show help (-h) +npx agent-browser --version # Show version (-V) +npx agent-browser --help # Show detailed help for a command ``` ### Proxy support ```bash -agent-browser --proxy http://proxy.com:8080 open example.com -agent-browser --proxy http://user:pass@proxy.com:8080 open example.com -agent-browser --proxy socks5://proxy.com:1080 open example.com +npx agent-browser --proxy http://proxy.com:8080 open example.com +npx agent-browser --proxy http://user:pass@proxy.com:8080 open example.com +npx agent-browser --proxy socks5://proxy.com:1080 open example.com ``` ## Environment variables @@ -257,40 +257,40 @@ AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daem ## Example: Form submission ```bash -agent-browser open https://example.com/form -agent-browser snapshot -i +npx agent-browser open https://example.com/form +npx agent-browser snapshot -i # Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3] -agent-browser fill @e1 "user@example.com" -agent-browser fill @e2 "password123" -agent-browser click @e3 -agent-browser wait --load networkidle -agent-browser snapshot -i # Check result +npx agent-browser fill @e1 "user@example.com" +npx agent-browser fill @e2 "password123" +npx agent-browser click @e3 +npx agent-browser wait --load networkidle +npx agent-browser snapshot -i # Check result ``` ## Example: Authentication with saved state ```bash # Login once -agent-browser open https://app.example.com/login -agent-browser snapshot -i -agent-browser fill @e1 "username" -agent-browser fill @e2 "password" -agent-browser click @e3 -agent-browser wait --url "**/dashboard" -agent-browser state save auth.json +npx agent-browser open https://app.example.com/login +npx agent-browser snapshot -i +npx agent-browser fill @e1 "username" +npx agent-browser fill @e2 "password" +npx agent-browser click @e3 +npx agent-browser wait --url "**/dashboard" +npx agent-browser state save auth.json # Later sessions: load saved state -agent-browser state load auth.json -agent-browser open https://app.example.com/dashboard +npx agent-browser state load auth.json +npx agent-browser open https://app.example.com/dashboard ``` ## Sessions (parallel browsers) ```bash -agent-browser --session test1 open site-a.com -agent-browser --session test2 open site-b.com -agent-browser session list +npx agent-browser --session test1 open site-a.com +npx agent-browser --session test2 open site-b.com +npx agent-browser session list ``` ## JSON output (for parsing) @@ -298,25 +298,25 @@ agent-browser session list Add `--json` for machine-readable output: ```bash -agent-browser snapshot -i --json -agent-browser get text @e1 --json +npx agent-browser snapshot -i --json +npx agent-browser get text @e1 --json ``` ## Debugging ```bash -agent-browser --headed open example.com # Show browser window -agent-browser --cdp 9222 snapshot # Connect via CDP port -agent-browser connect 9222 # Alternative: connect command -agent-browser console # View console messages -agent-browser console --clear # Clear console -agent-browser errors # View page errors -agent-browser errors --clear # Clear errors -agent-browser highlight @e1 # Highlight element -agent-browser trace start # Start recording trace -agent-browser trace stop trace.zip # Stop and save trace -agent-browser record start ./debug.webm # Record video from current page -agent-browser record stop # Save recording +npx agent-browser --headed open example.com # Show browser window +npx agent-browser --cdp 9222 snapshot # Connect via CDP port +npx agent-browser connect 9222 # Alternative: connect command +npx agent-browser console # View console messages +npx agent-browser console --clear # Clear console +npx agent-browser errors # View page errors +npx agent-browser errors --clear # Clear errors +npx agent-browser highlight @e1 # Highlight element +npx agent-browser trace start # Start recording trace +npx agent-browser trace stop trace.zip # Stop and save trace +npx agent-browser record start ./debug.webm # Record video from current page +npx agent-browser record stop # Save recording ``` ## Deep-dive documentation @@ -352,5 +352,5 @@ Usage: For sites with self-signed or invalid certificates: ```bash -agent-browser open https://localhost:8443 --ignore-https-errors +npx agent-browser open https://localhost:8443 --ignore-https-errors ``` diff --git a/skills/agent-browser/references/authentication.md b/skills/agent-browser/references/authentication.md index 5d801f6a..1505f4ee 100644 --- a/skills/agent-browser/references/authentication.md +++ b/skills/agent-browser/references/authentication.md @@ -6,23 +6,23 @@ Patterns for handling login flows, session persistence, and authenticated browsi ```bash # Navigate to login page -agent-browser open https://app.example.com/login -agent-browser wait --load networkidle +npx agent-browser open https://app.example.com/login +npx agent-browser wait --load networkidle # Get form elements -agent-browser snapshot -i +npx agent-browser snapshot -i # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In" # Fill credentials -agent-browser fill @e1 "user@example.com" -agent-browser fill @e2 "password123" +npx agent-browser fill @e1 "user@example.com" +npx agent-browser fill @e2 "password123" # Submit -agent-browser click @e3 -agent-browser wait --load networkidle +npx agent-browser click @e3 +npx agent-browser wait --load networkidle # Verify login succeeded -agent-browser get url # Should be dashboard, not login +npx agent-browser get url # Should be dashboard, not login ``` ## Saving Authentication State @@ -31,15 +31,15 @@ After logging in, save state for reuse: ```bash # Login first (see above) -agent-browser open https://app.example.com/login -agent-browser snapshot -i -agent-browser fill @e1 "user@example.com" -agent-browser fill @e2 "password123" -agent-browser click @e3 -agent-browser wait --url "**/dashboard" +npx agent-browser open https://app.example.com/login +npx agent-browser snapshot -i +npx agent-browser fill @e1 "user@example.com" +npx agent-browser fill @e2 "password123" +npx agent-browser click @e3 +npx agent-browser wait --url "**/dashboard" # Save authenticated state -agent-browser state save ./auth-state.json +npx agent-browser state save ./auth-state.json ``` ## Restoring Authentication @@ -48,13 +48,13 @@ Skip login by loading saved state: ```bash # Load saved auth state -agent-browser state load ./auth-state.json +npx agent-browser state load ./auth-state.json # Navigate directly to protected page -agent-browser open https://app.example.com/dashboard +npx agent-browser open https://app.example.com/dashboard # Verify authenticated -agent-browser snapshot -i +npx agent-browser snapshot -i ``` ## OAuth / SSO Flows @@ -63,23 +63,23 @@ For OAuth redirects: ```bash # Start OAuth flow -agent-browser open https://app.example.com/auth/google +npx agent-browser open https://app.example.com/auth/google # Handle redirects automatically -agent-browser wait --url "**/accounts.google.com**" -agent-browser snapshot -i +npx agent-browser wait --url "**/accounts.google.com**" +npx agent-browser snapshot -i # Fill Google credentials -agent-browser fill @e1 "user@gmail.com" -agent-browser click @e2 # Next button -agent-browser wait 2000 -agent-browser snapshot -i -agent-browser fill @e3 "password" -agent-browser click @e4 # Sign in +npx agent-browser fill @e1 "user@gmail.com" +npx agent-browser click @e2 # Next button +npx agent-browser wait 2000 +npx agent-browser snapshot -i +npx agent-browser fill @e3 "password" +npx agent-browser click @e4 # Sign in # Wait for redirect back -agent-browser wait --url "**/app.example.com**" -agent-browser state save ./oauth-state.json +npx agent-browser wait --url "**/app.example.com**" +npx agent-browser state save ./oauth-state.json ``` ## Two-Factor Authentication @@ -88,18 +88,18 @@ Handle 2FA with manual intervention: ```bash # Login with credentials -agent-browser open https://app.example.com/login --headed # Show browser -agent-browser snapshot -i -agent-browser fill @e1 "user@example.com" -agent-browser fill @e2 "password123" -agent-browser click @e3 +npx agent-browser open https://app.example.com/login --headed # Show browser +npx agent-browser snapshot -i +npx agent-browser fill @e1 "user@example.com" +npx agent-browser fill @e2 "password123" +npx agent-browser click @e3 # Wait for user to complete 2FA manually echo "Complete 2FA in the browser window..." -agent-browser wait --url "**/dashboard" --timeout 120000 +npx agent-browser wait --url "**/dashboard" --timeout 120000 # Save state after 2FA -agent-browser state save ./2fa-state.json +npx agent-browser state save ./2fa-state.json ``` ## HTTP Basic Auth @@ -108,10 +108,10 @@ For sites using HTTP Basic Authentication: ```bash # Set credentials before navigation -agent-browser set credentials username password +npx agent-browser set credentials username password # Navigate to protected resource -agent-browser open https://protected.example.com/api +npx agent-browser open https://protected.example.com/api ``` ## Cookie-Based Auth @@ -120,10 +120,10 @@ Manually set authentication cookies: ```bash # Set auth cookie -agent-browser cookies set session_token "abc123xyz" +npx agent-browser cookies set session_token "abc123xyz" # Navigate to protected page -agent-browser open https://app.example.com/dashboard +npx agent-browser open https://app.example.com/dashboard ``` ## Token Refresh Handling @@ -138,24 +138,24 @@ STATE_FILE="./auth-state.json" # Try loading existing state if [[ -f "$STATE_FILE" ]]; then - agent-browser state load "$STATE_FILE" - agent-browser open https://app.example.com/dashboard + npx agent-browser state load "$STATE_FILE" + npx agent-browser open https://app.example.com/dashboard # Check if session is still valid - URL=$(agent-browser get url) + URL=$(npx agent-browser get url) if [[ "$URL" == *"/login"* ]]; then echo "Session expired, re-authenticating..." # Perform fresh login - agent-browser snapshot -i - agent-browser fill @e1 "$USERNAME" - agent-browser fill @e2 "$PASSWORD" - agent-browser click @e3 - agent-browser wait --url "**/dashboard" - agent-browser state save "$STATE_FILE" + npx agent-browser snapshot -i + npx agent-browser fill @e1 "$USERNAME" + npx agent-browser fill @e2 "$PASSWORD" + npx agent-browser click @e3 + npx agent-browser wait --url "**/dashboard" + npx agent-browser state save "$STATE_FILE" fi else # First-time login - agent-browser open https://app.example.com/login + npx agent-browser open https://app.example.com/login # ... login flow ... fi ``` @@ -169,20 +169,20 @@ fi 2. **Use environment variables for credentials** ```bash - agent-browser fill @e1 "$APP_USERNAME" - agent-browser fill @e2 "$APP_PASSWORD" + npx agent-browser fill @e1 "$APP_USERNAME" + npx agent-browser fill @e2 "$APP_PASSWORD" ``` 3. **Clean up after automation** ```bash - agent-browser cookies clear + npx agent-browser cookies clear rm -f ./auth-state.json ``` 4. **Use short-lived sessions for CI/CD** ```bash # Don't persist state in CI - agent-browser open https://app.example.com/login + npx agent-browser open https://app.example.com/login # ... login and perform actions ... - agent-browser close # Session ends, nothing persisted + npx agent-browser close # Session ends, nothing persisted ``` diff --git a/skills/agent-browser/references/proxy-support.md b/skills/agent-browser/references/proxy-support.md index 05fcec26..0ebb10e0 100644 --- a/skills/agent-browser/references/proxy-support.md +++ b/skills/agent-browser/references/proxy-support.md @@ -9,16 +9,16 @@ Set proxy via environment variable before starting: ```bash # HTTP proxy export HTTP_PROXY="http://proxy.example.com:8080" -agent-browser open https://example.com +npx agent-browser open https://example.com # HTTPS proxy export HTTPS_PROXY="https://proxy.example.com:8080" -agent-browser open https://example.com +npx agent-browser open https://example.com # Both export HTTP_PROXY="http://proxy.example.com:8080" export HTTPS_PROXY="http://proxy.example.com:8080" -agent-browser open https://example.com +npx agent-browser open https://example.com ``` ## Authenticated Proxy @@ -28,7 +28,7 @@ For proxies requiring authentication: ```bash # Include credentials in URL export HTTP_PROXY="http://username:password@proxy.example.com:8080" -agent-browser open https://example.com +npx agent-browser open https://example.com ``` ## SOCKS Proxy @@ -36,11 +36,11 @@ agent-browser open https://example.com ```bash # SOCKS5 proxy export ALL_PROXY="socks5://proxy.example.com:1080" -agent-browser open https://example.com +npx agent-browser open https://example.com # SOCKS5 with auth export ALL_PROXY="socks5://user:pass@proxy.example.com:1080" -agent-browser open https://example.com +npx agent-browser open https://example.com ``` ## Proxy Bypass @@ -50,8 +50,8 @@ Skip proxy for specific domains: ```bash # Bypass proxy for local addresses export NO_PROXY="localhost,127.0.0.1,.internal.company.com" -agent-browser open https://internal.company.com # Direct connection -agent-browser open https://external.com # Via proxy +npx agent-browser open https://internal.company.com # Direct connection +npx agent-browser open https://external.com # Via proxy ``` ## Common Use Cases @@ -75,9 +75,9 @@ for proxy in "${PROXIES[@]}"; do region=$(echo "$proxy" | grep -oP '^\w+-\w+') echo "Testing from: $region" - agent-browser --session "$region" open https://example.com - agent-browser --session "$region" screenshot "./screenshots/$region.png" - agent-browser --session "$region" close + npx agent-browser --session "$region" open https://example.com + npx agent-browser --session "$region" screenshot "./screenshots/$region.png" + npx agent-browser --session "$region" close done ``` @@ -104,9 +104,9 @@ for i in "${!URLS[@]}"; do export HTTP_PROXY="${PROXY_LIST[$proxy_index]}" export HTTPS_PROXY="${PROXY_LIST[$proxy_index]}" - agent-browser open "${URLS[$i]}" - agent-browser get text body > "output-$i.txt" - agent-browser close + npx agent-browser open "${URLS[$i]}" + npx agent-browser get text body > "output-$i.txt" + npx agent-browser close sleep 1 # Polite delay done @@ -123,18 +123,18 @@ export HTTPS_PROXY="http://corpproxy.company.com:8080" export NO_PROXY="localhost,127.0.0.1,.company.com" # External sites go through proxy -agent-browser open https://external-vendor.com +npx agent-browser open https://external-vendor.com # Internal sites bypass proxy -agent-browser open https://intranet.company.com +npx agent-browser open https://intranet.company.com ``` ## Verifying Proxy Connection ```bash # Check your apparent IP -agent-browser open https://httpbin.org/ip -agent-browser get text body +npx agent-browser open https://httpbin.org/ip +npx agent-browser get text body # Should show proxy's IP, not your real IP ``` @@ -156,7 +156,7 @@ Some proxies perform SSL inspection. If you encounter certificate errors: ```bash # For testing only - not recommended for production -agent-browser open https://example.com --ignore-https-errors +npx agent-browser open https://example.com --ignore-https-errors ``` ### Slow Performance diff --git a/skills/agent-browser/references/session-management.md b/skills/agent-browser/references/session-management.md index cfc33624..6ba633bc 100644 --- a/skills/agent-browser/references/session-management.md +++ b/skills/agent-browser/references/session-management.md @@ -8,14 +8,14 @@ Use `--session` flag to isolate browser contexts: ```bash # Session 1: Authentication flow -agent-browser --session auth open https://app.example.com/login +npx agent-browser --session auth open https://app.example.com/login # Session 2: Public browsing (separate cookies, storage) -agent-browser --session public open https://example.com +npx agent-browser --session public open https://example.com # Commands are isolated by session -agent-browser --session auth fill @e1 "user@example.com" -agent-browser --session public get text body +npx agent-browser --session auth fill @e1 "user@example.com" +npx agent-browser --session public get text body ``` ## Session Isolation Properties @@ -34,17 +34,17 @@ Each session has independent: ```bash # Save cookies, storage, and auth state -agent-browser state save /path/to/auth-state.json +npx agent-browser state save /path/to/auth-state.json ``` ### Load Session State ```bash # Restore saved state -agent-browser state load /path/to/auth-state.json +npx agent-browser state load /path/to/auth-state.json # Continue with authenticated session -agent-browser open https://app.example.com/dashboard +npx agent-browser open https://app.example.com/dashboard ``` ### State File Contents @@ -70,19 +70,19 @@ STATE_FILE="/tmp/auth-state.json" # Check if we have saved state if [[ -f "$STATE_FILE" ]]; then - agent-browser state load "$STATE_FILE" - agent-browser open https://app.example.com/dashboard + npx agent-browser state load "$STATE_FILE" + npx agent-browser open https://app.example.com/dashboard else # Perform login - agent-browser open https://app.example.com/login - agent-browser snapshot -i - agent-browser fill @e1 "$USERNAME" - agent-browser fill @e2 "$PASSWORD" - agent-browser click @e3 - agent-browser wait --load networkidle + npx agent-browser open https://app.example.com/login + npx agent-browser snapshot -i + npx agent-browser fill @e1 "$USERNAME" + npx agent-browser fill @e2 "$PASSWORD" + npx agent-browser click @e3 + npx agent-browser wait --load networkidle # Save for future use - agent-browser state save "$STATE_FILE" + npx agent-browser state save "$STATE_FILE" fi ``` @@ -93,32 +93,32 @@ fi # Scrape multiple sites concurrently # Start all sessions -agent-browser --session site1 open https://site1.com & -agent-browser --session site2 open https://site2.com & -agent-browser --session site3 open https://site3.com & +npx agent-browser --session site1 open https://site1.com & +npx agent-browser --session site2 open https://site2.com & +npx agent-browser --session site3 open https://site3.com & wait # Extract from each -agent-browser --session site1 get text body > site1.txt -agent-browser --session site2 get text body > site2.txt -agent-browser --session site3 get text body > site3.txt +npx agent-browser --session site1 get text body > site1.txt +npx agent-browser --session site2 get text body > site2.txt +npx agent-browser --session site3 get text body > site3.txt # Cleanup -agent-browser --session site1 close -agent-browser --session site2 close -agent-browser --session site3 close +npx agent-browser --session site1 close +npx agent-browser --session site2 close +npx agent-browser --session site3 close ``` ### A/B Testing Sessions ```bash # Test different user experiences -agent-browser --session variant-a open "https://app.com?variant=a" -agent-browser --session variant-b open "https://app.com?variant=b" +npx agent-browser --session variant-a open "https://app.com?variant=a" +npx agent-browser --session variant-b open "https://app.com?variant=b" # Compare -agent-browser --session variant-a screenshot /tmp/variant-a.png -agent-browser --session variant-b screenshot /tmp/variant-b.png +npx agent-browser --session variant-a screenshot /tmp/variant-a.png +npx agent-browser --session variant-b screenshot /tmp/variant-b.png ``` ## Default Session @@ -127,19 +127,19 @@ When `--session` is omitted, commands use the default session: ```bash # These use the same default session -agent-browser open https://example.com -agent-browser snapshot -i -agent-browser close # Closes default session +npx agent-browser open https://example.com +npx agent-browser snapshot -i +npx agent-browser close # Closes default session ``` ## Session Cleanup ```bash # Close specific session -agent-browser --session auth close +npx agent-browser --session auth close # List active sessions -agent-browser session list +npx agent-browser session list ``` ## Best Practices @@ -148,19 +148,19 @@ agent-browser session list ```bash # GOOD: Clear purpose -agent-browser --session github-auth open https://github.com -agent-browser --session docs-scrape open https://docs.example.com +npx agent-browser --session github-auth open https://github.com +npx agent-browser --session docs-scrape open https://docs.example.com # AVOID: Generic names -agent-browser --session s1 open https://github.com +npx agent-browser --session s1 open https://github.com ``` ### 2. Always Clean Up ```bash # Close sessions when done -agent-browser --session auth close -agent-browser --session scrape close +npx agent-browser --session auth close +npx agent-browser --session scrape close ``` ### 3. Handle State Files Securely @@ -177,5 +177,5 @@ rm /tmp/auth-state.json ```bash # Set timeout for automated scripts -timeout 60 agent-browser --session long-task get text body +timeout 60 npx agent-browser --session long-task get text body ``` diff --git a/skills/agent-browser/references/snapshot-refs.md b/skills/agent-browser/references/snapshot-refs.md index 0b17a4d4..a67d49fc 100644 --- a/skills/agent-browser/references/snapshot-refs.md +++ b/skills/agent-browser/references/snapshot-refs.md @@ -22,10 +22,10 @@ Compact snapshot → @refs assigned → Direct ref interaction ```bash # Basic snapshot (shows page structure) -agent-browser snapshot +npx agent-browser snapshot # Interactive snapshot (-i flag) - RECOMMENDED -agent-browser snapshot -i +npx agent-browser snapshot -i ``` ### Snapshot Output Format @@ -58,16 +58,16 @@ Once you have refs, interact directly: ```bash # Click the "Sign In" button -agent-browser click @e6 +npx agent-browser click @e6 # Fill email input -agent-browser fill @e10 "user@example.com" +npx agent-browser fill @e10 "user@example.com" # Fill password -agent-browser fill @e11 "password123" +npx agent-browser fill @e11 "password123" # Submit the form -agent-browser click @e12 +npx agent-browser click @e12 ``` ## Ref Lifecycle @@ -76,14 +76,14 @@ agent-browser click @e12 ```bash # Get initial snapshot -agent-browser snapshot -i +npx agent-browser snapshot -i # @e1 [button] "Next" # Click triggers page change -agent-browser click @e1 +npx agent-browser click @e1 # MUST re-snapshot to get new refs! -agent-browser snapshot -i +npx agent-browser snapshot -i # @e1 [h1] "Page 2" ← Different element now! ``` @@ -93,29 +93,29 @@ agent-browser snapshot -i ```bash # CORRECT -agent-browser open https://example.com -agent-browser snapshot -i # Get refs first -agent-browser click @e1 # Use ref +npx agent-browser open https://example.com +npx agent-browser snapshot -i # Get refs first +npx agent-browser click @e1 # Use ref # WRONG -agent-browser open https://example.com -agent-browser click @e1 # Ref doesn't exist yet! +npx agent-browser open https://example.com +npx agent-browser click @e1 # Ref doesn't exist yet! ``` ### 2. Re-Snapshot After Navigation ```bash -agent-browser click @e5 # Navigates to new page -agent-browser snapshot -i # Get new refs -agent-browser click @e1 # Use new refs +npx agent-browser click @e5 # Navigates to new page +npx agent-browser snapshot -i # Get new refs +npx agent-browser click @e1 # Use new refs ``` ### 3. Re-Snapshot After Dynamic Changes ```bash -agent-browser click @e1 # Opens dropdown -agent-browser snapshot -i # See dropdown items -agent-browser click @e7 # Select item +npx agent-browser click @e1 # Opens dropdown +npx agent-browser snapshot -i # See dropdown items +npx agent-browser click @e7 # Select item ``` ### 4. Snapshot Specific Regions @@ -124,7 +124,7 @@ For complex pages, snapshot specific areas: ```bash # Snapshot just the form -agent-browser snapshot @e9 +npx agent-browser snapshot @e9 ``` ## Ref Notation Details @@ -160,27 +160,27 @@ agent-browser snapshot @e9 ```bash # Ref may have changed - re-snapshot -agent-browser snapshot -i +npx agent-browser snapshot -i ``` ### Element Not Visible in Snapshot ```bash # Scroll to reveal element -agent-browser scroll --bottom -agent-browser snapshot -i +npx agent-browser scroll --bottom +npx agent-browser snapshot -i # Or wait for dynamic content -agent-browser wait 1000 -agent-browser snapshot -i +npx agent-browser wait 1000 +npx agent-browser snapshot -i ``` ### Too Many Elements ```bash # Snapshot specific container -agent-browser snapshot @e5 +npx agent-browser snapshot @e5 # Or use get text for content-only extraction -agent-browser get text @e5 +npx agent-browser get text @e5 ``` diff --git a/skills/agent-browser/references/video-recording.md b/skills/agent-browser/references/video-recording.md index 98e6b0a1..9fd868e6 100644 --- a/skills/agent-browser/references/video-recording.md +++ b/skills/agent-browser/references/video-recording.md @@ -6,29 +6,29 @@ Capture browser automation sessions as video for debugging, documentation, or ve ```bash # Start recording -agent-browser record start ./demo.webm +npx agent-browser record start ./demo.webm # Perform actions -agent-browser open https://example.com -agent-browser snapshot -i -agent-browser click @e1 -agent-browser fill @e2 "test input" +npx agent-browser open https://example.com +npx agent-browser snapshot -i +npx agent-browser click @e1 +npx agent-browser fill @e2 "test input" # Stop and save -agent-browser record stop +npx agent-browser record stop ``` ## Recording Commands ```bash # Start recording to file -agent-browser record start ./output.webm +npx agent-browser record start ./output.webm # Stop current recording -agent-browser record stop +npx agent-browser record stop # Restart with new file (stops current + starts new) -agent-browser record restart ./take2.webm +npx agent-browser record restart ./take2.webm ``` ## Use Cases @@ -39,18 +39,18 @@ agent-browser record restart ./take2.webm #!/bin/bash # Record automation for debugging -agent-browser record start ./debug-$(date +%Y%m%d-%H%M%S).webm +npx agent-browser record start ./debug-$(date +%Y%m%d-%H%M%S).webm # Run your automation -agent-browser open https://app.example.com -agent-browser snapshot -i -agent-browser click @e1 || { +npx agent-browser open https://app.example.com +npx agent-browser snapshot -i +npx agent-browser click @e1 || { echo "Click failed - check recording" - agent-browser record stop + npx agent-browser record stop exit 1 } -agent-browser record stop +npx agent-browser record stop ``` ### Documentation Generation @@ -59,23 +59,23 @@ agent-browser record stop #!/bin/bash # Record workflow for documentation -agent-browser record start ./docs/how-to-login.webm +npx agent-browser record start ./docs/how-to-login.webm -agent-browser open https://app.example.com/login -agent-browser wait 1000 # Pause for visibility +npx agent-browser open https://app.example.com/login +npx agent-browser wait 1000 # Pause for visibility -agent-browser snapshot -i -agent-browser fill @e1 "demo@example.com" -agent-browser wait 500 +npx agent-browser snapshot -i +npx agent-browser fill @e1 "demo@example.com" +npx agent-browser wait 500 -agent-browser fill @e2 "password" -agent-browser wait 500 +npx agent-browser fill @e2 "password" +npx agent-browser wait 500 -agent-browser click @e3 -agent-browser wait --load networkidle -agent-browser wait 1000 # Show result +npx agent-browser click @e3 +npx agent-browser wait --load networkidle +npx agent-browser wait 1000 # Show result -agent-browser record stop +npx agent-browser record stop ``` ### CI/CD Test Evidence @@ -88,7 +88,7 @@ TEST_NAME="${1:-e2e-test}" RECORDING_DIR="./test-recordings" mkdir -p "$RECORDING_DIR" -agent-browser record start "$RECORDING_DIR/$TEST_NAME-$(date +%s).webm" +npx agent-browser record start "$RECORDING_DIR/$TEST_NAME-$(date +%s).webm" # Run test if run_e2e_test; then @@ -97,7 +97,7 @@ else echo "Test failed - recording saved" fi -agent-browser record stop +npx agent-browser record stop ``` ## Best Practices @@ -106,16 +106,16 @@ agent-browser record stop ```bash # Slow down for human viewing -agent-browser click @e1 -agent-browser wait 500 # Let viewer see result +npx agent-browser click @e1 +npx agent-browser wait 500 # Let viewer see result ``` ### 2. Use Descriptive Filenames ```bash # Include context in filename -agent-browser record start ./recordings/login-flow-2024-01-15.webm -agent-browser record start ./recordings/checkout-test-run-42.webm +npx agent-browser record start ./recordings/login-flow-2024-01-15.webm +npx agent-browser record start ./recordings/checkout-test-run-42.webm ``` ### 3. Handle Recording in Error Cases @@ -125,12 +125,12 @@ agent-browser record start ./recordings/checkout-test-run-42.webm set -e cleanup() { - agent-browser record stop 2>/dev/null || true - agent-browser close 2>/dev/null || true + npx agent-browser record stop 2>/dev/null || true + npx agent-browser close 2>/dev/null || true } trap cleanup EXIT -agent-browser record start ./automation.webm +npx agent-browser record start ./automation.webm # ... automation steps ... ``` @@ -138,15 +138,15 @@ agent-browser record start ./automation.webm ```bash # Record video AND capture key frames -agent-browser record start ./flow.webm +npx agent-browser record start ./flow.webm -agent-browser open https://example.com -agent-browser screenshot ./screenshots/step1-homepage.png +npx agent-browser open https://example.com +npx agent-browser screenshot ./screenshots/step1-homepage.png -agent-browser click @e1 -agent-browser screenshot ./screenshots/step2-after-click.png +npx agent-browser click @e1 +npx agent-browser screenshot ./screenshots/step2-after-click.png -agent-browser record stop +npx agent-browser record stop ``` ## Output Format diff --git a/skills/agent-browser/templates/authenticated-session.sh b/skills/agent-browser/templates/authenticated-session.sh index e44aaad5..3bc2558b 100755 --- a/skills/agent-browser/templates/authenticated-session.sh +++ b/skills/agent-browser/templates/authenticated-session.sh @@ -22,14 +22,14 @@ echo "Authentication workflow for: $LOGIN_URL" # ══════════════════════════════════════════════════════════════ if [[ -f "$STATE_FILE" ]]; then echo "Loading saved authentication state..." - agent-browser state load "$STATE_FILE" - agent-browser open "$LOGIN_URL" - agent-browser wait --load networkidle + npx agent-browser state load "$STATE_FILE" + npx agent-browser open "$LOGIN_URL" + npx agent-browser wait --load networkidle - CURRENT_URL=$(agent-browser get url) + CURRENT_URL=$(npx agent-browser get url) if [[ "$CURRENT_URL" != *"login"* ]] && [[ "$CURRENT_URL" != *"signin"* ]]; then echo "Session restored successfully!" - agent-browser snapshot -i + npx agent-browser snapshot -i exit 0 fi echo "Session expired, performing fresh login..." @@ -40,14 +40,14 @@ fi # DISCOVERY MODE: Show form structure (remove after setup) # ══════════════════════════════════════════════════════════════ echo "Opening login page..." -agent-browser open "$LOGIN_URL" -agent-browser wait --load networkidle +npx agent-browser open "$LOGIN_URL" +npx agent-browser wait --load networkidle echo "" echo "┌─────────────────────────────────────────────────────────┐" echo "│ LOGIN FORM STRUCTURE │" echo "├─────────────────────────────────────────────────────────┤" -agent-browser snapshot -i +npx agent-browser snapshot -i echo "└─────────────────────────────────────────────────────────┘" echo "" echo "Next steps:" @@ -56,7 +56,7 @@ echo " 2. Uncomment LOGIN FLOW section below" echo " 3. Replace @e1, @e2, @e3 with your refs" echo " 4. Delete this DISCOVERY MODE section" echo "" -agent-browser close +npx agent-browser close exit 0 # ══════════════════════════════════════════════════════════════ @@ -65,27 +65,27 @@ exit 0 # : "${APP_USERNAME:?Set APP_USERNAME environment variable}" # : "${APP_PASSWORD:?Set APP_PASSWORD environment variable}" # -# agent-browser open "$LOGIN_URL" -# agent-browser wait --load networkidle -# agent-browser snapshot -i +# npx agent-browser open "$LOGIN_URL" +# npx agent-browser wait --load networkidle +# npx agent-browser snapshot -i # # # Fill credentials (update refs to match your form) -# agent-browser fill @e1 "$APP_USERNAME" -# agent-browser fill @e2 "$APP_PASSWORD" -# agent-browser click @e3 -# agent-browser wait --load networkidle +# npx agent-browser fill @e1 "$APP_USERNAME" +# npx agent-browser fill @e2 "$APP_PASSWORD" +# npx agent-browser click @e3 +# npx agent-browser wait --load networkidle # # # Verify login succeeded -# FINAL_URL=$(agent-browser get url) +# FINAL_URL=$(npx agent-browser get url) # if [[ "$FINAL_URL" == *"login"* ]] || [[ "$FINAL_URL" == *"signin"* ]]; then # echo "ERROR: Login failed - still on login page" -# agent-browser screenshot /tmp/login-failed.png -# agent-browser close +# npx agent-browser screenshot /tmp/login-failed.png +# npx agent-browser close # exit 1 # fi # # # Save state for future runs # echo "Saving authentication state to: $STATE_FILE" -# agent-browser state save "$STATE_FILE" +# npx agent-browser state save "$STATE_FILE" # echo "Login successful!" -# agent-browser snapshot -i +# npx agent-browser snapshot -i diff --git a/skills/agent-browser/templates/capture-workflow.sh b/skills/agent-browser/templates/capture-workflow.sh index a4eae751..4494964f 100755 --- a/skills/agent-browser/templates/capture-workflow.sh +++ b/skills/agent-browser/templates/capture-workflow.sh @@ -12,56 +12,56 @@ mkdir -p "$OUTPUT_DIR" # Optional: Load authentication state if needed # if [[ -f "./auth-state.json" ]]; then -# agent-browser state load "./auth-state.json" +# npx agent-browser state load "./auth-state.json" # fi # Navigate to target page -agent-browser open "$TARGET_URL" -agent-browser wait --load networkidle +npx agent-browser open "$TARGET_URL" +npx agent-browser wait --load networkidle # Get page metadata -echo "Page title: $(agent-browser get title)" -echo "Page URL: $(agent-browser get url)" +echo "Page title: $(npx agent-browser get title)" +echo "Page URL: $(npx agent-browser get url)" # Capture full page screenshot -agent-browser screenshot --full "$OUTPUT_DIR/page-full.png" +npx agent-browser screenshot --full "$OUTPUT_DIR/page-full.png" echo "Screenshot saved: $OUTPUT_DIR/page-full.png" # Get page structure -agent-browser snapshot -i > "$OUTPUT_DIR/page-structure.txt" +npx agent-browser snapshot -i > "$OUTPUT_DIR/page-structure.txt" echo "Structure saved: $OUTPUT_DIR/page-structure.txt" # Extract main content # Adjust selector based on target site structure -# agent-browser get text @e1 > "$OUTPUT_DIR/main-content.txt" +# npx agent-browser get text @e1 > "$OUTPUT_DIR/main-content.txt" # Extract specific elements (uncomment as needed) -# agent-browser get text "article" > "$OUTPUT_DIR/article.txt" -# agent-browser get text "main" > "$OUTPUT_DIR/main.txt" -# agent-browser get text ".content" > "$OUTPUT_DIR/content.txt" +# npx agent-browser get text "article" > "$OUTPUT_DIR/article.txt" +# npx agent-browser get text "main" > "$OUTPUT_DIR/main.txt" +# npx agent-browser get text ".content" > "$OUTPUT_DIR/content.txt" # Get full page text -agent-browser get text body > "$OUTPUT_DIR/page-text.txt" +npx agent-browser get text body > "$OUTPUT_DIR/page-text.txt" echo "Text content saved: $OUTPUT_DIR/page-text.txt" # Optional: Save as PDF -agent-browser pdf "$OUTPUT_DIR/page.pdf" +npx agent-browser pdf "$OUTPUT_DIR/page.pdf" echo "PDF saved: $OUTPUT_DIR/page.pdf" # Optional: Capture with scrolling for infinite scroll pages # scroll_and_capture() { # local count=0 # while [[ $count -lt 5 ]]; do -# agent-browser scroll down 1000 -# agent-browser wait 1000 +# npx agent-browser scroll down 1000 +# npx agent-browser wait 1000 # ((count++)) # done -# agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png" +# npx agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png" # } # scroll_and_capture # Cleanup -agent-browser close +npx agent-browser close echo "" echo "Capture complete! Files saved to: $OUTPUT_DIR" diff --git a/skills/agent-browser/templates/form-automation.sh b/skills/agent-browser/templates/form-automation.sh index 02a7c811..74e37eb5 100755 --- a/skills/agent-browser/templates/form-automation.sh +++ b/skills/agent-browser/templates/form-automation.sh @@ -9,56 +9,56 @@ FORM_URL="${1:?Usage: $0 }" echo "Automating form at: $FORM_URL" # Navigate to form page -agent-browser open "$FORM_URL" -agent-browser wait --load networkidle +npx agent-browser open "$FORM_URL" +npx agent-browser wait --load networkidle # Get interactive snapshot to identify form fields echo "Analyzing form structure..." -agent-browser snapshot -i +npx agent-browser snapshot -i # Example: Fill common form fields # Uncomment and modify refs based on snapshot output # Text inputs -# agent-browser fill @e1 "John Doe" # Name field -# agent-browser fill @e2 "user@example.com" # Email field -# agent-browser fill @e3 "+1-555-123-4567" # Phone field +# npx agent-browser fill @e1 "John Doe" # Name field +# npx agent-browser fill @e2 "user@example.com" # Email field +# npx agent-browser fill @e3 "+1-555-123-4567" # Phone field # Password fields -# agent-browser fill @e4 "SecureP@ssw0rd!" +# npx agent-browser fill @e4 "SecureP@ssw0rd!" # Dropdowns -# agent-browser select @e5 "Option Value" +# npx agent-browser select @e5 "Option Value" # Checkboxes -# agent-browser check @e6 # Check -# agent-browser uncheck @e7 # Uncheck +# npx agent-browser check @e6 # Check +# npx agent-browser uncheck @e7 # Uncheck # Radio buttons -# agent-browser click @e8 # Select radio option +# npx agent-browser click @e8 # Select radio option # Text areas -# agent-browser fill @e9 "Multi-line text content here" +# npx agent-browser fill @e9 "Multi-line text content here" # File uploads -# agent-browser upload @e10 /path/to/file.pdf +# npx agent-browser upload @e10 /path/to/file.pdf # Submit form -# agent-browser click @e11 # Submit button +# npx agent-browser click @e11 # Submit button # Wait for response -# agent-browser wait --load networkidle -# agent-browser wait --url "**/success" # Or wait for redirect +# npx agent-browser wait --load networkidle +# npx agent-browser wait --url "**/success" # Or wait for redirect # Verify submission echo "Form submission result:" -agent-browser get url -agent-browser snapshot -i +npx agent-browser get url +npx agent-browser snapshot -i # Take screenshot of result -agent-browser screenshot /tmp/form-result.png +npx agent-browser screenshot /tmp/form-result.png # Cleanup -agent-browser close +npx agent-browser close echo "Form automation complete"