From 65c9256506aa07a1950547ef8179dc2628c405c4 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:10:28 +0200 Subject: [PATCH 1/7] feat: add auth wall detection module Three-heuristic AND-logic detection: 1. Domain cookies exist for target URL 2. Current page URL matches auth patterns 3. DOM contains login elements or text Short-circuits on first failing heuristic for efficiency. --- scripts/auth-wall-detect.js | 151 ++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 scripts/auth-wall-detect.js diff --git a/scripts/auth-wall-detect.js b/scripts/auth-wall-detect.js new file mode 100644 index 0000000..f2cb26a --- /dev/null +++ b/scripts/auth-wall-detect.js @@ -0,0 +1,151 @@ +'use strict'; + +/** + * Auth wall detection module. + * + * Detects whether a page is showing an authentication wall after navigation. + * Uses three heuristics (ALL must pass - AND logic): + * 1. Domain cookies exist for the target URL + * 2. Current page URL matches a known auth URL pattern + * 3. Page DOM contains login-related elements or text + * + * Short-circuits: if cookie check fails, skips URL and DOM checks. + */ + +const AUTH_URL_PATTERNS = [ + 'login', + 'signin', + 'sign_in', + 'sign-in', + 'oauth', + 'accounts', + 'auth/realms' +]; + +const AUTH_DOM_SELECTORS = [ + 'input[type="password"]', + 'form[action*="login"]', + 'form[action*="signin"]', + 'form[action*="authenticate"]', + 'input[name="username"]', + 'input[name="email"][type="email"]' +]; + +const AUTH_TEXT_PATTERNS = [ + 'sign in', + 'log in', + 'enter your password', + 'choose an account', + 'pick an account', + 'select an account' +]; + +/** + * Extract the domain from a URL string. + * Returns null if the URL is invalid. + */ +function extractDomain(url) { + try { + return new URL(url).hostname; + } catch { + return null; + } +} + +/** + * Check whether a cookie domain matches the target domain. + * Supports parent domain matching (e.g., cookie for `.github.com` matches `github.com`). + */ +function cookieDomainMatches(cookieDomain, targetDomain) { + const bare = cookieDomain.replace(/^\./, ''); + if (bare === targetDomain) return true; + if (targetDomain.endsWith('.' + bare)) return true; + return false; +} + +/** + * Detect whether the current page is an auth wall. + * + * @param {import('playwright').Page} page + * @param {import('playwright').BrowserContext} context + * @param {string} targetUrl - The URL we navigated to + * @returns {Promise<{ detected: boolean, reason: string, details?: object }>} + */ +async function detectAuthWall(page, context, targetUrl) { + const targetDomain = extractDomain(targetUrl); + if (!targetDomain) { + return { detected: false, reason: 'invalid_target_url' }; + } + + // Heuristic 1: Domain cookies exist + let cookies; + try { + cookies = await context.cookies(); + } catch { + return { detected: false, reason: 'cookie_read_error' }; + } + + const hasDomainCookies = cookies.some(c => cookieDomainMatches(c.domain, targetDomain)); + if (!hasDomainCookies) { + return { detected: false, reason: 'no_domain_cookies' }; + } + + // Heuristic 2: URL matches auth pattern + const currentUrl = page.url().toLowerCase(); + const authUrlPattern = AUTH_URL_PATTERNS.find(pattern => currentUrl.includes(pattern)); + if (!authUrlPattern) { + return { detected: false, reason: 'url_not_auth_pattern' }; + } + + // Heuristic 3: DOM contains auth elements + // 3a: Check selectors + let matchedSelector = null; + for (const selector of AUTH_DOM_SELECTORS) { + try { + const el = await page.$(selector); + if (el) { + matchedSelector = selector; + break; + } + } catch { + // Selector query failed - continue to next + } + } + + if (matchedSelector) { + return { + detected: true, + reason: 'auth_wall', + details: { + hasDomainCookies: true, + authUrlPattern, + domElement: matchedSelector + } + }; + } + + // 3b: Check text patterns + let matchedText = null; + try { + const bodyText = (await page.textContent('body') || '').toLowerCase(); + matchedText = AUTH_TEXT_PATTERNS.find(pattern => bodyText.includes(pattern)); + } catch { + // textContent failed - no text match + } + + if (matchedText) { + return { + detected: true, + reason: 'auth_wall', + details: { + hasDomainCookies: true, + authUrlPattern, + domElement: matchedText + } + }; + } + + return { detected: false, reason: 'no_auth_elements' }; +} + +module.exports = { detectAuthWall, AUTH_URL_PATTERNS, AUTH_DOM_SELECTORS, AUTH_TEXT_PATTERNS }; From 20cf1c20c32a02e3d7b5d1dbc7b28f1226a7f0a3 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:11:13 +0200 Subject: [PATCH 2/7] feat: integrate auth wall detection into goto action When navigating via goto, automatically detect auth walls using the three-heuristic module. On detection: close headless browser, relaunch headed for interactive checkpoint, return post-auth snapshot. Opt out with --no-auth-wall-detect flag. --- scripts/web-ctl.js | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index 9867a50..d96638b 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -2,7 +2,8 @@ 'use strict'; const sessionStore = require('./session-store'); -const { launchBrowser, closeBrowser, randomDelay, waitForStable } = require('./browser-launcher'); +const { launchBrowser, closeBrowser, randomDelay, waitForStable, canLaunchHeaded } = require('./browser-launcher'); +const { detectAuthWall } = require('./auth-wall-detect'); const { runAuthFlow } = require('./auth-flow'); const { checkAuthSuccess } = require('./auth-check'); const { sanitizeWebContent, wrapOutput } = require('./redact'); @@ -20,7 +21,7 @@ const BOOLEAN_FLAGS = new Set([ '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', '--auto', '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', - '--snapshot-full', + '--snapshot-full', '--no-auth-wall-detect', ]); function validateSessionName(name) { @@ -919,6 +920,33 @@ async function runAction(sessionName, action, actionArgs, opts) { if (!url) throw new Error('URL required: run goto '); validateUrl(url); const response = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); + if (!opts.noAuthWallDetect) { + const detection = await detectAuthWall(page, context, url); + if (detection.detected) { + console.warn('[WARN] Auth wall detected for ' + new URL(url).hostname); + await closeBrowser(sessionName, context); + const headed = await canLaunchHeaded(); + if (headed) { + const headedBrowser = await launchBrowser(sessionName, { headless: false }); + context = headedBrowser.context; + page = headedBrowser.page; + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); + const ckTimeout = (opts.timeout ? parseInt(opts.timeout, 10) : 120) * 1000; + console.warn('[WARN] Checkpoint open for ' + (ckTimeout / 1000) + 's'); + await new Promise(resolve => setTimeout(resolve, ckTimeout)); + const snapshot = await getSnapshot(page, opts); + result = { url: page.url(), authWallDetected: true, checkpointCompleted: true, + ...(snapshot != null && { snapshot }) }; + break; + } else { + const snapshot = await getSnapshot(page, opts); + result = { url: page.url(), authWallDetected: true, checkpointCompleted: false, + message: 'Auth wall detected but no display for headed checkpoint.', + ...(snapshot != null && { snapshot }) }; + break; + } + } + } const snapshot = await getSnapshot(page, opts); result = { url: page.url(), status: response ? response.status() : null, ...(snapshot != null && { snapshot }) }; break; From 49511882dcefce1021bf9e5363ca020738c77ec8 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:12:18 +0200 Subject: [PATCH 3/7] test: add auth wall detection tests Unit tests for the detection module with 14 test cases covering: - Three-heuristic AND logic (each can fail independently) - Cookie domain matching (parent + exact) - Real-world providers (Google, Microsoft) - Error handling for page.$ and textContent failures - Exported constants validation Integration tests verifying web-ctl.js source-level integration. --- tests/auth-wall-detect.test.js | 189 +++++++++++++++++++++++++++++++++ tests/web-ctl-actions.test.js | 60 +++++++++++ 2 files changed, 249 insertions(+) create mode 100644 tests/auth-wall-detect.test.js diff --git a/tests/auth-wall-detect.test.js b/tests/auth-wall-detect.test.js new file mode 100644 index 0000000..0aebc48 --- /dev/null +++ b/tests/auth-wall-detect.test.js @@ -0,0 +1,189 @@ +'use strict'; + +const { describe, it } = require('node:test'); +const assert = require('node:assert/strict'); +const { + detectAuthWall, + AUTH_URL_PATTERNS, + AUTH_DOM_SELECTORS, + AUTH_TEXT_PATTERNS +} = require('../scripts/auth-wall-detect'); + +// --- Mock helpers --- + +function mockPage({ url, selectors, bodyText } = {}) { + return { + url: () => url || 'about:blank', + $: async (sel) => (selectors && selectors.includes(sel)) ? {} : null, + textContent: async (sel) => sel === 'body' ? (bodyText || '') : '' + }; +} + +function mockContext({ cookies } = {}) { + return { + cookies: async () => cookies || [] + }; +} + +// --- Tests --- + +describe('detectAuthWall', () => { + + it('returns detected: false when no cookies for target domain', async () => { + const page = mockPage({ url: 'https://github.com/login' }); + const context = mockContext({ cookies: [] }); + const result = await detectAuthWall(page, context, 'https://github.com/dashboard'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'no_domain_cookies'); + }); + + it('returns detected: false when cookies exist but URL not auth pattern', async () => { + const page = mockPage({ url: 'https://github.com/settings/profile' }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 'session', value: 'abc' }] }); + const result = await detectAuthWall(page, context, 'https://github.com/settings/profile'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'url_not_auth_pattern'); + }); + + it('returns detected: false when cookies + URL match but no auth DOM elements', async () => { + const page = mockPage({ + url: 'https://github.com/login', + selectors: [], + bodyText: 'Welcome to the dashboard' + }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 'session', value: 'abc' }] }); + const result = await detectAuthWall(page, context, 'https://github.com/dashboard'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'no_auth_elements'); + }); + + it('returns detected: true when all three heuristics match (selector)', async () => { + const page = mockPage({ + url: 'https://github.com/login', + selectors: ['input[type="password"]'] + }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 'session', value: 'abc' }] }); + const result = await detectAuthWall(page, context, 'https://github.com/dashboard'); + assert.equal(result.detected, true); + assert.equal(result.reason, 'auth_wall'); + assert.equal(result.details.hasDomainCookies, true); + assert.equal(result.details.authUrlPattern, 'login'); + assert.equal(result.details.domElement, 'input[type="password"]'); + }); + + it('returns detected: true when cookies + URL + text pattern match', async () => { + const page = mockPage({ + url: 'https://accounts.google.com/signin', + selectors: [], + bodyText: 'Sign in to continue to Google Drive' + }); + const context = mockContext({ cookies: [{ domain: '.google.com', name: 'NID', value: 'xyz' }] }); + const result = await detectAuthWall(page, context, 'https://drive.google.com'); + assert.equal(result.detected, true); + assert.equal(result.reason, 'auth_wall'); + assert.equal(result.details.domElement, 'sign in'); + }); + + it('matches each AUTH_URL_PATTERNS entry', async () => { + for (const pattern of AUTH_URL_PATTERNS) { + const page = mockPage({ + url: `https://example.com/${pattern}/page`, + selectors: ['input[type="password"]'] + }); + const context = mockContext({ cookies: [{ domain: '.example.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://example.com/app'); + assert.equal(result.detected, true, `Should detect auth wall for URL pattern: ${pattern}`); + assert.equal(result.details.authUrlPattern, pattern); + } + }); + + it('cookie domain matching: parent domain (.github.com matches github.com)', async () => { + const page = mockPage({ + url: 'https://github.com/login', + selectors: ['input[type="password"]'] + }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://github.com/dashboard'); + assert.equal(result.detected, true); + }); + + it('cookie domain matching: exact match', async () => { + const page = mockPage({ + url: 'https://github.com/login', + selectors: ['input[type="password"]'] + }); + const context = mockContext({ cookies: [{ domain: 'github.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://github.com/dashboard'); + assert.equal(result.detected, true); + }); + + it('detects Google accounts.google.com', async () => { + const page = mockPage({ + url: 'https://accounts.google.com/v3/chooser', + selectors: ['input[name="email"][type="email"]'], + bodyText: 'Choose an account to continue' + }); + const context = mockContext({ cookies: [{ domain: '.google.com', name: 'NID', value: 'abc' }] }); + const result = await detectAuthWall(page, context, 'https://mail.google.com'); + assert.equal(result.detected, true); + assert.equal(result.details.authUrlPattern, 'accounts'); + }); + + it('detects Microsoft login.microsoftonline.com', async () => { + const page = mockPage({ + url: 'https://login.microsoftonline.com/common/oauth2/authorize', + selectors: ['input[name="username"]'], + bodyText: 'Pick an account' + }); + const context = mockContext({ cookies: [{ domain: '.microsoftonline.com', name: 'buid', value: 'abc' }] }); + const result = await detectAuthWall(page, context, 'https://portal.microsoftonline.com/dashboard'); + assert.equal(result.detected, true); + assert.equal(result.details.authUrlPattern, 'login'); + }); + + it('does NOT detect on settings/password-change pages (no auth URL pattern)', async () => { + const page = mockPage({ + url: 'https://github.com/settings/security', + selectors: ['input[type="password"]'], + bodyText: 'Enter your password to confirm changes' + }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://github.com/settings/security'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'url_not_auth_pattern'); + }); + + it('handles page.$() error gracefully', async () => { + const page = { + url: () => 'https://example.com/login', + $: async () => { throw new Error('Frame detached'); }, + textContent: async () => 'Sign in to your account' + }; + const context = mockContext({ cookies: [{ domain: '.example.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://example.com/app'); + // Should still detect via text pattern after selector errors + assert.equal(result.detected, true); + assert.equal(result.details.domElement, 'sign in'); + }); + + it('handles page.textContent() error gracefully', async () => { + const page = { + url: () => 'https://example.com/login', + $: async () => null, + textContent: async () => { throw new Error('Frame detached'); } + }; + const context = mockContext({ cookies: [{ domain: '.example.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://example.com/app'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'no_auth_elements'); + }); + + it('exported constants are arrays', () => { + assert.ok(Array.isArray(AUTH_URL_PATTERNS), 'AUTH_URL_PATTERNS should be an array'); + assert.ok(Array.isArray(AUTH_DOM_SELECTORS), 'AUTH_DOM_SELECTORS should be an array'); + assert.ok(Array.isArray(AUTH_TEXT_PATTERNS), 'AUTH_TEXT_PATTERNS should be an array'); + assert.ok(AUTH_URL_PATTERNS.length > 0, 'AUTH_URL_PATTERNS should not be empty'); + assert.ok(AUTH_DOM_SELECTORS.length > 0, 'AUTH_DOM_SELECTORS should not be empty'); + assert.ok(AUTH_TEXT_PATTERNS.length > 0, 'AUTH_TEXT_PATTERNS should not be empty'); + }); +}); diff --git a/tests/web-ctl-actions.test.js b/tests/web-ctl-actions.test.js index 21b32aa..08e2daa 100644 --- a/tests/web-ctl-actions.test.js +++ b/tests/web-ctl-actions.test.js @@ -695,3 +695,63 @@ describe('auto-create session CLI integration', () => { 'response should not include autoCreated when session already exists'); }); }); + +describe('auth wall detection in goto', () => { + const fs = require('fs'); + const path = require('path'); + const webCtlSource = fs.readFileSync( + path.join(__dirname, '..', 'scripts', 'web-ctl.js'), + 'utf8' + ); + + it('web-ctl.js imports auth-wall-detect module', () => { + assert.ok( + webCtlSource.includes("require('./auth-wall-detect')"), + 'web-ctl.js should require auth-wall-detect' + ); + }); + + it('BOOLEAN_FLAGS includes --no-auth-wall-detect', () => { + assert.ok( + webCtlSource.includes("'--no-auth-wall-detect'"), + '--no-auth-wall-detect should be in BOOLEAN_FLAGS' + ); + }); + + it('auth-wall-detect.js exports detectAuthWall as function', () => { + const { detectAuthWall } = require('../scripts/auth-wall-detect'); + assert.equal(typeof detectAuthWall, 'function'); + }); + + it('goto case calls detectAuthWall', () => { + assert.ok( + webCtlSource.includes('detectAuthWall(page, context, url)'), + 'goto case should call detectAuthWall' + ); + }); + + it('goto case checks noAuthWallDetect opt-out', () => { + assert.ok( + webCtlSource.includes('opts.noAuthWallDetect'), + 'goto case should check noAuthWallDetect flag' + ); + }); + + it('goto case relaunches headed browser on detection', () => { + assert.ok( + webCtlSource.includes('canLaunchHeaded()'), + 'goto case should call canLaunchHeaded on auth wall detection' + ); + assert.ok( + webCtlSource.includes("launchBrowser(sessionName, { headless: false })"), + 'goto case should relaunch browser headed' + ); + }); + + it('goto case includes authWallDetected in result', () => { + assert.ok( + webCtlSource.includes('authWallDetected: true'), + 'result should include authWallDetected flag' + ); + }); +}); From 495c85971236d9916155605f4e5ddedeb480dbfe Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:16:16 +0200 Subject: [PATCH 4/7] fix: clean up AI slop --- .claude/web-ctl/workflow-status.json | 10 ++++++++++ scripts/auth-wall-detect.js | 2 -- tests/auth-wall-detect.test.js | 1 - 3 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 .claude/web-ctl/workflow-status.json diff --git a/.claude/web-ctl/workflow-status.json b/.claude/web-ctl/workflow-status.json new file mode 100644 index 0000000..42855a0 --- /dev/null +++ b/.claude/web-ctl/workflow-status.json @@ -0,0 +1,10 @@ +{ + "taskId": "34", + "title": "Auto-detect auth walls after successful authentication", + "phase": "implementation", + "currentStep": 5, + "totalSteps": 5, + "implementationComplete": true, + "filesModified": ["scripts/auth-wall-detect.js", "scripts/web-ctl.js", "tests/auth-wall-detect.test.js", "tests/web-ctl-actions.test.js"], + "lastActivityAt": "2026-02-25T00:03:00Z" +} diff --git a/scripts/auth-wall-detect.js b/scripts/auth-wall-detect.js index f2cb26a..25efb16 100644 --- a/scripts/auth-wall-detect.js +++ b/scripts/auth-wall-detect.js @@ -108,7 +108,6 @@ async function detectAuthWall(page, context, targetUrl) { break; } } catch { - // Selector query failed - continue to next } } @@ -130,7 +129,6 @@ async function detectAuthWall(page, context, targetUrl) { const bodyText = (await page.textContent('body') || '').toLowerCase(); matchedText = AUTH_TEXT_PATTERNS.find(pattern => bodyText.includes(pattern)); } catch { - // textContent failed - no text match } if (matchedText) { diff --git a/tests/auth-wall-detect.test.js b/tests/auth-wall-detect.test.js index 0aebc48..4a516e2 100644 --- a/tests/auth-wall-detect.test.js +++ b/tests/auth-wall-detect.test.js @@ -161,7 +161,6 @@ describe('detectAuthWall', () => { }; const context = mockContext({ cookies: [{ domain: '.example.com', name: 's', value: 'v' }] }); const result = await detectAuthWall(page, context, 'https://example.com/app'); - // Should still detect via text pattern after selector errors assert.equal(result.detected, true); assert.equal(result.details.domElement, 'sign in'); }); From bcd4debfe51ae7b6685bee7e601d9a0f6cadd6a8 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:20:50 +0200 Subject: [PATCH 5/7] fix: address review findings - parallelize DOM queries, cap timeout, add tests - Parallelize DOM selector checks with Promise.allSettled (perf) - Limit textContent to first 5000 chars (perf/memory) - Cap checkpoint timeout at 3600s max (security) - Add tests for invalid URL and cookie read error paths - Add case-insensitive URL matching test --- scripts/auth-wall-detect.js | 18 ++++++++++-------- scripts/web-ctl.js | 2 +- tests/auth-wall-detect.test.js | 27 +++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/scripts/auth-wall-detect.js b/scripts/auth-wall-detect.js index 25efb16..7c3c1a3 100644 --- a/scripts/auth-wall-detect.js +++ b/scripts/auth-wall-detect.js @@ -98,17 +98,19 @@ async function detectAuthWall(page, context, targetUrl) { } // Heuristic 3: DOM contains auth elements - // 3a: Check selectors + // 3a: Check selectors (parallel for performance) let matchedSelector = null; - for (const selector of AUTH_DOM_SELECTORS) { - try { - const el = await page.$(selector); - if (el) { - matchedSelector = selector; + try { + const results = await Promise.allSettled( + AUTH_DOM_SELECTORS.map(async (sel) => ({ sel, el: await page.$(sel) })) + ); + for (const r of results) { + if (r.status === 'fulfilled' && r.value.el) { + matchedSelector = r.value.sel; break; } - } catch { } + } catch { } if (matchedSelector) { @@ -126,7 +128,7 @@ async function detectAuthWall(page, context, targetUrl) { // 3b: Check text patterns let matchedText = null; try { - const bodyText = (await page.textContent('body') || '').toLowerCase(); + const bodyText = (await page.textContent('body') || '').slice(0, 5000).toLowerCase(); matchedText = AUTH_TEXT_PATTERNS.find(pattern => bodyText.includes(pattern)); } catch { } diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index d96638b..3dfb912 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -931,7 +931,7 @@ async function runAction(sessionName, action, actionArgs, opts) { context = headedBrowser.context; page = headedBrowser.page; await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); - const ckTimeout = (opts.timeout ? parseInt(opts.timeout, 10) : 120) * 1000; + const ckTimeout = Math.min(opts.timeout ? parseInt(opts.timeout, 10) : 120, 3600) * 1000; console.warn('[WARN] Checkpoint open for ' + (ckTimeout / 1000) + 's'); await new Promise(resolve => setTimeout(resolve, ckTimeout)); const snapshot = await getSnapshot(page, opts); diff --git a/tests/auth-wall-detect.test.js b/tests/auth-wall-detect.test.js index 4a516e2..99fd0a9 100644 --- a/tests/auth-wall-detect.test.js +++ b/tests/auth-wall-detect.test.js @@ -177,6 +177,33 @@ describe('detectAuthWall', () => { assert.equal(result.reason, 'no_auth_elements'); }); + it('returns detected: false when target URL is invalid', async () => { + const page = mockPage({ url: 'https://github.com/login' }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'not-a-valid-url'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'invalid_target_url'); + }); + + it('handles context.cookies() error gracefully', async () => { + const page = mockPage({ url: 'https://github.com/login' }); + const context = { cookies: async () => { throw new Error('Context destroyed'); } }; + const result = await detectAuthWall(page, context, 'https://github.com/dashboard'); + assert.equal(result.detected, false); + assert.equal(result.reason, 'cookie_read_error'); + }); + + it('detects auth wall with uppercase URL (case insensitive)', async () => { + const page = mockPage({ + url: 'https://example.com/LOGIN/page', + selectors: ['input[type="password"]'] + }); + const context = mockContext({ cookies: [{ domain: '.example.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://example.com/app'); + assert.equal(result.detected, true); + assert.equal(result.details.authUrlPattern, 'login'); + }); + it('exported constants are arrays', () => { assert.ok(Array.isArray(AUTH_URL_PATTERNS), 'AUTH_URL_PATTERNS should be an array'); assert.ok(Array.isArray(AUTH_DOM_SELECTORS), 'AUTH_DOM_SELECTORS should be an array'); From 792c1f178bd16ccfdbfff1f6f5ebba77ce69a3df Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:22:51 +0200 Subject: [PATCH 6/7] test: add subdomain cookie matching and body truncation edge case tests --- tests/auth-wall-detect.test.js | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/auth-wall-detect.test.js b/tests/auth-wall-detect.test.js index 99fd0a9..e952425 100644 --- a/tests/auth-wall-detect.test.js +++ b/tests/auth-wall-detect.test.js @@ -117,6 +117,29 @@ describe('detectAuthWall', () => { assert.equal(result.detected, true); }); + it('cookie domain matching: subdomain (.github.com matches api.github.com)', async () => { + const page = mockPage({ + url: 'https://api.github.com/login', + selectors: ['input[type="password"]'] + }); + const context = mockContext({ cookies: [{ domain: '.github.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://api.github.com/dashboard'); + assert.equal(result.detected, true); + }); + + it('detects auth text within 5000 char body limit', async () => { + const longBody = 'x'.repeat(4980) + ' sign in to continue'; + const page = mockPage({ + url: 'https://example.com/login', + selectors: [], + bodyText: longBody + }); + const context = mockContext({ cookies: [{ domain: '.example.com', name: 's', value: 'v' }] }); + const result = await detectAuthWall(page, context, 'https://example.com/app'); + assert.equal(result.detected, true); + assert.equal(result.details.domElement, 'sign in'); + }); + it('detects Google accounts.google.com', async () => { const page = mockPage({ url: 'https://accounts.google.com/v3/chooser', From 66586edb8626c92ce477c0ba75c5a629c3fe377c Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Wed, 25 Feb 2026 00:26:56 +0200 Subject: [PATCH 7/7] docs: sync documentation with code changes --- CHANGELOG.md | 1 + README.md | 3 ++- skills/web-browse/SKILL.md | 13 +++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72b7a7f..02ac84d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [Unreleased] ### Added +- Auto-detect authentication walls after goto navigation - uses three-heuristic detection (domain cookies, URL auth patterns, DOM login elements) and automatically opens headed checkpoint. Disable with `--no-auth-wall-detect` flag - Smart default snapshot scoping - snapshots automatically scope to `
` element (then `[role="main"]`, fallback to ``), reducing output size by excluding navigation, headers, and footers. Use `--snapshot-full` to capture full page body when needed - `--snapshot-compact` flag for token-efficient LLM consumption - applies four transforms: link collapsing (merges link + /url child into `link "Title" -> /path`), heading inlining (merges heading with single link child), decorative image removal (strips img nodes with empty or single-char alt text), and duplicate URL dedup (removes second occurrence at same depth scope). Applied after `--snapshot-depth` and before `--snapshot-collapse` in the pipeline - `--snapshot-max-lines ` flag to truncate snapshot output to a maximum number of lines, with a `... (K more lines)` marker when lines are omitted diff --git a/README.md b/README.md index 0cf161a..2e9005b 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ web-ctl session end github | Action | Usage | Returns | |--------|-------|---------| -| `goto` | `run goto ` | `{ url, status, snapshot }` | +| `goto` | `run goto [--no-auth-wall-detect]` | `{ url, status, authWallDetected, checkpointCompleted, snapshot }` | | `snapshot` | `run snapshot` | `{ url, snapshot }` | | `click` | `run click [--wait-stable]` | `{ url, clicked, snapshot }` | | `click-wait` | `run click-wait [--timeout]` | `{ url, clicked, settled, snapshot }` | @@ -172,6 +172,7 @@ This eliminates the common click-snapshot-check loop that wastes agent turns on | `--filter ` | `network` | Filter captured requests by URL pattern | | `--path ` | `screenshot` | Custom screenshot path (within session dir) | | `--allow-evaluate` | `evaluate` | Required safety flag for JS execution | +| `--no-auth-wall-detect` | `goto` | Disable automatic auth wall detection and checkpoint opening | | `--snapshot-depth ` | Any action with snapshot | Limit ARIA tree depth (e.g. 3 for top 3 levels) | | `--snapshot-selector ` | Any action with snapshot | Scope snapshot to a DOM subtree | | `--snapshot-max-lines ` | Any action with snapshot | Truncate snapshot to N lines | diff --git a/skills/web-browse/SKILL.md b/skills/web-browse/SKILL.md index f35f8d7..4890faa 100644 --- a/skills/web-browse/SKILL.md +++ b/skills/web-browse/SKILL.md @@ -46,10 +46,19 @@ Safe practice: always double-quote URL arguments. ### goto - Navigate to URL ```bash -node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto +node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto [--no-auth-wall-detect] ``` -Returns: `{ url, status, snapshot }` +Navigates to a URL and automatically detects authentication walls using a three-heuristic detection system: +1. Domain cookies (checks for auth-related cookie names on the target domain) +2. URL auth patterns (detects common login URL patterns like `/login`, `/signin`, `/auth`) +3. DOM login elements (scans the page for login forms and auth UI elements) + +When an authentication wall is detected, the tool automatically opens a headed checkpoint, allowing the user to complete authentication. The checkpoint times out after 120 seconds by default. + +Use `--no-auth-wall-detect` to disable this automatic detection and skip the checkpoint, navigating headlessly without waiting for user interaction. + +Returns: `{ url, status, authWallDetected, checkpointCompleted, snapshot }` ### snapshot - Get Accessibility Tree