Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
- `--max-field-length <N>` flag for `extract` macro to configure maximum characters per extracted field (default: 500, max: 2000)
- `--wait-loaded` flag for goto action - waits for async-rendered content to finish loading before taking the snapshot. Combines network idle, DOM stability, and loading indicator absence detection (spinners, skeletons, progress bars, aria-busy). Use `--timeout <ms>` to set wait timeout (default: 15000ms)
- Automatic content blocking detection in goto action - detects when sites serve pages but block content from headless browsers (e.g., X.com empty timelines). Uses provider-specific heuristics (content selectors, blocked indicators) and generic checks (empty content, persistent spinners). Response includes `contentBlocked: true`, `warning: 'content_blocked'`, and recovery suggestions. Disable with `--no-content-block-detect` flag
- Deep stealth hardening for headless browsers - CDP artifact removal, screen/viewport dimension spoofing, navigator.connection, WebRTC IP leak prevention. Reduces detection by aggressive anti-bot sites
- Auto headed fallback when content is blocked in headless - automatically switches to a headed browser to retrieve content when headless is detected and blocked. Response includes `headedFallback: true`. Disable with `--no-auto-recover` flag

### Fixed
- Smart default snapshot scoping now includes complementary ARIA landmarks (`<aside>`, `[role="complementary"]`) alongside `<main>`, capturing sidebar content like repository stats (#26)
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ web-ctl session end github

| Action | Usage | Returns |
|--------|-------|---------|
| `goto` | `run <s> goto <url> [--no-auth-wall-detect] [--no-content-block-detect] [--ensure-auth] [--wait-loaded]` | `{ url, status, authWallDetected, checkpointCompleted, ensureAuthCompleted, waitLoaded, contentBlocked, warning, snapshot }` |
| `goto` | `run <s> goto <url> [--no-auth-wall-detect] [--no-content-block-detect] [--no-auto-recover] [--ensure-auth] [--wait-loaded]` | `{ url, status, authWallDetected, checkpointCompleted, ensureAuthCompleted, waitLoaded, contentBlocked, headedFallback, warning, snapshot }` |
| `snapshot` | `run <s> snapshot` | `{ url, snapshot }` |
| `click` | `run <s> click <sel> [--wait-stable]` | `{ url, clicked, snapshot }` |
| `click-wait` | `run <s> click-wait <sel> [--timeout]` | `{ url, clicked, settled, snapshot }` |
Expand Down Expand Up @@ -174,6 +174,7 @@ This eliminates the common click-snapshot-check loop that wastes agent turns on
| `--allow-evaluate` | `evaluate` | Required safety flag for JS execution |
| `--no-auth-wall-detect` | `goto` | Disable automatic auth wall detection and checkpoint opening |
| `--no-content-block-detect` | `goto` | Disable automatic content blocking detection (e.g., sites serving empty pages to headless browsers) |
| `--no-auto-recover` | `goto` | Disable automatic headed fallback when content is blocked in headless mode |
| `--ensure-auth` | `goto` | Poll for auth completion instead of timed checkpoint; overrides `--no-auth-wall-detect` |
| `--wait-loaded` | `goto` | Wait for async content to finish rendering (network idle + loading indicator absence + DOM quiet) |
| `--snapshot-depth <N>` | Any action with snapshot | Limit ARIA tree depth (e.g. 3 for top 3 levels) |
Expand Down
31 changes: 30 additions & 1 deletion scripts/browser-launcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,12 @@ async function launchBrowser(sessionName, options = {}) {

const launchOptions = {
headless,
viewport: { width: 1920, height: 1080 },
args: [
'--disable-blink-features=AutomationControlled',
'--no-first-run',
'--no-default-browser-check'
'--no-default-browser-check',
'--window-size=1920,1080'
]
};

Expand Down Expand Up @@ -146,6 +148,33 @@ async function launchBrowser(sessionName, options = {}) {
}
return origQuery(params);
};

// Remove known CDP detection artifacts (targeted list, avoids Object.keys(window) scan)
['cdc_adoQpoasnfa76pfcZLmcfl_Array', 'cdc_adoQpoasnfa76pfcZLmcfl_Promise',
'cdc_adoQpoasnfa76pfcZLmcfl_Symbol'].forEach(k => { try { delete window[k]; } catch {} });

// Screen dimensions (headless reports 0 for outerWidth/outerHeight)
Object.defineProperty(window, 'outerWidth', { get: () => window.innerWidth });
Object.defineProperty(window, 'outerHeight', { get: () => window.innerHeight + 85 });
Object.defineProperty(screen, 'availWidth', { get: () => screen.width });
Object.defineProperty(screen, 'availHeight', { get: () => screen.height - 40 });

// navigator.connection (missing in some headless environments)
if (!navigator.connection) {
Object.defineProperty(navigator, 'connection', {
get: () => ({ effectiveType: '4g', rtt: 50, downlink: 10, saveData: false })
});
}

// Prevent WebRTC local IP leak (fingerprinting signal)
if (window.RTCPeerConnection) {
const OrigRTC = window.RTCPeerConnection;
window.RTCPeerConnection = function(config, constraints) {
const safeConfig = config ? { ...config, iceServers: [] } : config;
return new OrigRTC(safeConfig, constraints);
};
window.RTCPeerConnection.prototype = OrigRTC.prototype;
}
});

// Get or create the first page
Expand Down
83 changes: 81 additions & 2 deletions scripts/web-ctl.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const BOOLEAN_FLAGS = new Set([
'--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc',
'--exact', '--accept', '--submit', '--dismiss', '--auto',
'--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact',
'--snapshot-full', '--no-auth-wall-detect', '--no-content-block-detect', '--ensure-auth', '--wait-loaded',
'--snapshot-full', '--no-auth-wall-detect', '--no-content-block-detect', '--no-auto-recover', '--ensure-auth', '--wait-loaded',
]);

function validateSessionName(name) {
Expand Down Expand Up @@ -88,6 +88,22 @@ function matchProviderByDomain(url) {
}
}

/**
* Cached result for canLaunchHeaded (display availability rarely changes mid-session).
* TTL: 60 seconds.
*/
let _headedCache = null;
let _headedCacheTime = 0;
const HEADED_CACHE_TTL = 60000;
async function cachedCanLaunchHeaded() {
if (_headedCache !== null && Date.now() - _headedCacheTime < HEADED_CACHE_TTL) {
return _headedCache;
}
_headedCache = await canLaunchHeaded();
_headedCacheTime = Date.now();
return _headedCache;
}

/**
* Convert selector string to Playwright locator.
*/
Expand Down Expand Up @@ -1096,16 +1112,79 @@ async function runAction(sessionName, action, actionArgs, opts) {
contentBlockedIndicators: provider?.contentBlockedIndicators
});
}
// Auto headed fallback when content is blocked
if (contentBlockResult?.detected && !opts.noAutoRecover) {
const headed = await cachedCanLaunchHeaded();
if (headed) {
console.warn('[WARN] Content blocked in headless - falling back to headed browser');
// Save headless snapshot before closing (fallback may fail)
const headlessSnapshot = await getSnapshot(page, opts);
const headlessUrl = page.url();
const headlessStatus = response ? response.status() : null;
await closeBrowser(sessionName, context);
await new Promise(resolve => setTimeout(resolve, 500));
try {
const headedBrowser = await launchBrowser(sessionName, { headless: false });
context = headedBrowser.context;
page = headedBrowser.page;
const headedResponse = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
if (opts.waitLoaded) {
await waitForLoaded(page, { timeout: loadedTimeout });
}
// Re-detect content blocking in headed mode
const headedProvider = matchProviderByDomain(url);
const headedBlockResult = await detectContentBlocked(page, {
contentSelectors: headedProvider?.contentSelectors,
contentBlockedIndicators: headedProvider?.contentBlockedIndicators
});
const headedSnapshot = await getSnapshot(page, opts);
result = {
url: page.url(),
status: headedResponse ? headedResponse.status() : null,
contentBlocked: true,
headedFallback: true,
...(headedBlockResult?.detected && { headedAlsoBlocked: true }),
warning: headedBlockResult?.detected ? 'content_blocked_headed_also' : 'content_blocked_headed_fallback',
suggestion: headedBlockResult?.detected
? 'Content blocked in both headless and headed modes.'
: 'Content was blocked in headless mode. Retrieved via headed browser.',
...(opts.waitLoaded && { waitLoaded: true }),
...(headedSnapshot != null && { snapshot: headedSnapshot })
};
break;
} catch (fallbackErr) {
console.warn('[WARN] Headed fallback failed: ' + fallbackErr.message);
// Return headless result captured before close
context = null;
page = null;
result = {
url: headlessUrl,
status: headlessStatus,
contentBlocked: true,
headedFallback: false,
warning: 'content_blocked',
contentBlockedReason: contentBlockResult.reason,
suggestion: 'Headed fallback failed: ' + fallbackErr.message,
...(opts.waitLoaded && { waitLoaded: true }),
...(headlessSnapshot != null && { snapshot: headlessSnapshot })
};
break;
}
}
}
const snapshot = await getSnapshot(page, opts);
result = {
url: page.url(),
status: response ? response.status() : null,
...(opts.waitLoaded && { waitLoaded: true }),
...(contentBlockResult?.detected && {
contentBlocked: true,
headedFallback: false,
warning: 'content_blocked',
contentBlockedReason: contentBlockResult.reason,
suggestion: "Site may be blocking headless browsers. Try: (1) authenticate with 'session auth <name> --provider <provider>', (2) use --ensure-auth for headed mode"
suggestion: opts.noAutoRecover
? "Site may be blocking headless browsers. Try: (1) authenticate with 'session auth <name> --provider <provider>', (2) use --ensure-auth for headed mode"
: 'Content blocked and no display for headed fallback. Try: ssh -X or set DISPLAY.'
}),
...(snapshot != null && { snapshot })
};
Expand Down
8 changes: 5 additions & 3 deletions skills/web-browse/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Safe practice: always double-quote URL arguments.
### goto - Navigate to URL

```bash
node ${PLUGIN_ROOT}/scripts/web-ctl.js run <session> goto <url> [--no-auth-wall-detect] [--no-content-block-detect] [--ensure-auth] [--wait-loaded]
node ${PLUGIN_ROOT}/scripts/web-ctl.js run <session> goto <url> [--no-auth-wall-detect] [--no-content-block-detect] [--no-auto-recover] [--ensure-auth] [--wait-loaded]
```

Navigates to a URL and automatically detects authentication walls using a three-heuristic detection system:
Expand All @@ -62,9 +62,11 @@ Use `--ensure-auth` to actively poll for authentication completion instead of a

Use `--wait-loaded` to wait for async-rendered content to finish loading before taking the snapshot. This combines network idle, DOM stability, loading indicator absence detection (spinners, skeletons, progress bars, aria-busy), and a final DOM quiet period. Use `--timeout <ms>` to set the wait timeout (default: 15000ms). Ideal for SPAs and pages that render content after the initial page load.

Use `--no-content-block-detect` to disable automatic detection of content blocking (e.g., sites serving empty pages to headless browsers). When content blocking is detected, the response includes `contentBlocked: true`, `warning: 'content_blocked'`, and a suggestion to authenticate or use headed mode.
Use `--no-content-block-detect` to disable automatic detection of content blocking (e.g., sites serving empty pages to headless browsers). When content blocking is detected, the goto action automatically falls back to a headed browser to retrieve the content. The response includes `contentBlocked: true`, `headedFallback: true`, and the snapshot from the headed session.

Returns: `{ url, status, authWallDetected, checkpointCompleted, ensureAuthCompleted, waitLoaded, contentBlocked, warning, contentBlockedReason, suggestion, snapshot }`
Use `--no-auto-recover` to disable the automatic headed fallback. When set, content blocking detection still runs but only returns a warning without attempting recovery.

Returns: `{ url, status, authWallDetected, checkpointCompleted, ensureAuthCompleted, waitLoaded, contentBlocked, headedFallback, warning, contentBlockedReason, suggestion, snapshot }`

### snapshot - Get Accessibility Tree

Expand Down
144 changes: 144 additions & 0 deletions tests/browser-stealth.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
'use strict';

const { describe, it } = require('node:test');
const assert = require('node:assert/strict');
const fs = require('fs');
const path = require('path');

const launcherSource = fs.readFileSync(
path.join(__dirname, '..', 'scripts', 'browser-launcher.js'),
'utf8'
);

describe('browser stealth init script', () => {

it('hides navigator.webdriver', () => {
assert.ok(
launcherSource.includes("navigator, 'webdriver'"),
'should spoof navigator.webdriver'
);
});

it('spoofs window.chrome object', () => {
assert.ok(
launcherSource.includes('window.chrome'),
'should define window.chrome'
);
});

it('spoofs navigator.plugins', () => {
assert.ok(
launcherSource.includes("navigator, 'plugins'"),
'should spoof navigator.plugins'
);
assert.ok(
launcherSource.includes('Chrome PDF Plugin'),
'should include a realistic plugin'
);
});

it('spoofs navigator.languages', () => {
assert.ok(
launcherSource.includes("navigator, 'languages'"),
'should spoof navigator.languages'
);
});

it('overrides WebGL renderer', () => {
assert.ok(
launcherSource.includes('UNMASKED_VENDOR_WEBGL'),
'should override WebGL vendor'
);
assert.ok(
launcherSource.includes('UNMASKED_RENDERER_WEBGL'),
'should override WebGL renderer'
);
});

it('overrides permissions.query', () => {
assert.ok(
launcherSource.includes('permissions.query'),
'should override permissions.query'
);
});

it('removes known CDP detection artifacts', () => {
assert.ok(
launcherSource.includes('cdc_adoQpoasnfa76pfcZLmcfl_Array'),
'should target known CDP artifact names'
);
});

it('spoofs screen dimensions', () => {
assert.ok(
launcherSource.includes("'outerWidth'"),
'should spoof window.outerWidth'
);
assert.ok(
launcherSource.includes("'outerHeight'"),
'should spoof window.outerHeight'
);
assert.ok(
launcherSource.includes("'availWidth'"),
'should spoof screen.availWidth'
);
assert.ok(
launcherSource.includes("'availHeight'"),
'should spoof screen.availHeight'
);
});

it('spoofs navigator.connection', () => {
assert.ok(
launcherSource.includes('navigator.connection'),
'should spoof navigator.connection'
);
assert.ok(
launcherSource.includes("effectiveType: '4g'"),
'should report 4g connection'
);
});

it('prevents WebRTC IP leak', () => {
assert.ok(
launcherSource.includes('RTCPeerConnection'),
'should override RTCPeerConnection'
);
assert.ok(
launcherSource.includes('iceServers'),
'should clear iceServers'
);
});
});

describe('browser launch options', () => {

it('sets realistic viewport size', () => {
assert.ok(
launcherSource.includes('viewport:'),
'should set viewport'
);
assert.ok(
launcherSource.includes('1920'),
'should use 1920 width'
);
assert.ok(
launcherSource.includes('1080'),
'should use 1080 height'
);
});

it('sets window-size arg', () => {
assert.ok(
launcherSource.includes('--window-size=1920,1080'),
'should set --window-size arg'
);
});

it('disables automation controlled features', () => {
assert.ok(
launcherSource.includes('--disable-blink-features=AutomationControlled'),
'should disable AutomationControlled'
);
});
});
Loading
Loading