diff --git a/CHANGELOG.md b/CHANGELOG.md index 8adafe4..72b7a7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [Unreleased] ### Added +- Smart default snapshot scoping - snapshots automatically scope to `
` element (then `[role="main"]`, fallback to ``), reducing output size by excluding navigation, headers, and footers. Use `--snapshot-full` to capture full page body when needed - `--snapshot-compact` flag for token-efficient LLM consumption - applies four transforms: link collapsing (merges link + /url child into `link "Title" -> /path`), heading inlining (merges heading with single link child), decorative image removal (strips img nodes with empty or single-char alt text), and duplicate URL dedup (removes second occurrence at same depth scope). Applied after `--snapshot-depth` and before `--snapshot-collapse` in the pipeline - `--snapshot-max-lines ` flag to truncate snapshot output to a maximum number of lines, with a `... (K more lines)` marker when lines are omitted - `--snapshot-collapse` flag to collapse repeated consecutive siblings of the same ARIA type - keeps first 2 with subtrees, replaces the rest with `... (K more )` markers. Works recursively on nested structures diff --git a/README.md b/README.md index a2c1fa7..0cf161a 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,7 @@ This eliminates the common click-snapshot-check loop that wastes agent turns on | `--snapshot-collapse` | Any action with snapshot | Collapse repeated siblings (keep first 2, summarize rest) | | `--snapshot-text-only` | Any action with snapshot | Strip structural nodes, keep content only | | `--max-field-length ` | `extract` | Max characters per field (default: 500, max: 2000) | +| `--snapshot-full` | Any action with snapshot | Use full page body (default: auto-scope to `
` content area) | | `--no-snapshot` | Any action with snapshot | Omit snapshot from output entirely | ## Error Handling diff --git a/commands/web-ctl.md b/commands/web-ctl.md index fb88d9d..9b7891a 100644 --- a/commands/web-ctl.md +++ b/commands/web-ctl.md @@ -61,6 +61,7 @@ node ${PLUGIN_ROOT}/scripts/web-ctl.js run click --no-snapshot node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-collapse node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-compact node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-text-only --snapshot-max-lines 50 +node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-full node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-collapse --snapshot-depth 4 # Macros diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index df494d7..9867a50 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -20,6 +20,7 @@ const BOOLEAN_FLAGS = new Set([ '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', '--auto', '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', + '--snapshot-full', ]); function validateSessionName(name) { @@ -85,6 +86,26 @@ function resolveSelector(page, selector) { return page.locator(selector); } +/** + * Detect the main content area of the page. + * Tries
, then [role="main"], then falls back to . + * + * @param {object} page - Playwright page object + * @returns {object} Playwright locator for the main content area + */ +async function detectMainContent(page) { + try { + const mainTag = page.locator('main').first(); + const mainRole = page.locator('[role="main"]').first(); + const [mainCount, roleCount] = await Promise.all([mainTag.count(), mainRole.count()]); + if (mainCount > 0) return mainTag; + if (roleCount > 0) return mainRole; + } catch { + // fall through to body + } + return page.locator('body'); +} + /** * Get accessibility tree snapshot formatted as text. * Uses Playwright's ariaSnapshot API (page.accessibility was removed in v1.50+). @@ -93,6 +114,7 @@ function resolveSelector(page, selector) { * @param {object} [opts={}] - Snapshot options * @param {boolean} [opts.noSnapshot] - Return null to omit snapshot entirely * @param {string} [opts.snapshotSelector] - Scope snapshot to a DOM subtree + * @param {boolean} [opts.snapshotFull] - Use full page body (skip
auto-detection) * @param {number} [opts.snapshotDepth] - Limit ARIA tree depth * @param {boolean} [opts.snapshotCompact] - Compact format for token efficiency * @param {boolean} [opts.snapshotCollapse] - Collapse repeated siblings @@ -104,7 +126,9 @@ async function getSnapshot(page, opts = {}) { try { const root = opts.snapshotSelector ? resolveSelector(page, opts.snapshotSelector) - : page.locator('body'); + : opts.snapshotFull + ? page.locator('body') + : await detectMainContent(page); const raw = await root.ariaSnapshot(); let result = raw; if (opts.snapshotDepth) result = trimByDepth(result, opts.snapshotDepth); @@ -1170,6 +1194,7 @@ Snapshot options (apply to any action that returns a snapshot): remove decorative images, dedup URLs --snapshot-collapse Collapse repeated siblings (show first 2) --snapshot-text-only Strip structural nodes, keep content only + --snapshot-full Use full page body (skip
auto-detection) Selector syntax: role=button[name='Submit'] ARIA role selector diff --git a/skills/web-browse/SKILL.md b/skills/web-browse/SKILL.md index 3b40563..f35f8d7 100644 --- a/skills/web-browse/SKILL.md +++ b/skills/web-browse/SKILL.md @@ -346,7 +346,9 @@ Auto-detect mode also returns the detected CSS selector, which can be reused wit ## Snapshot Control -All actions that return a snapshot support these flags to control output size: +All actions that return a snapshot support these flags to control output size. + +By default, snapshots are auto-scoped to the main content area of the page. The tool looks for a `
` element, then `[role="main"]`, and falls back to `` if neither exists. This automatically excludes navigation, headers, and footers from snapshots, reducing noise and token usage. Use `--snapshot-full` to capture the full page body when needed, or `--snapshot-selector` to scope to a specific element. ### --snapshot-depth N - Limit Tree Depth @@ -366,6 +368,15 @@ node ${PLUGIN_ROOT}/scripts/web-ctl.js run click "#btn" --snapshot-sel Takes the snapshot from a specific DOM subtree instead of the full body. Accepts the same selector syntax as other actions. +### --snapshot-full - Full Page Snapshot + +```bash +node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-full +node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-full +``` + +Bypasses the default auto-scoping to `
` and captures the full page body instead. Use this when you need to see navigation, headers, footers, or other content outside the main content area. + ### --no-snapshot - Omit Snapshot ```bash diff --git a/tests/get-snapshot.test.js b/tests/get-snapshot.test.js index eb8b20f..1f44619 100644 --- a/tests/get-snapshot.test.js +++ b/tests/get-snapshot.test.js @@ -23,12 +23,28 @@ function resolveSelector(page, selector) { return page.locator(selector); } +// Keep this in sync with scripts/web-ctl.js. +async function detectMainContent(page) { + try { + const mainTag = page.locator('main').first(); + const mainRole = page.locator('[role="main"]').first(); + const [mainCount, roleCount] = await Promise.all([mainTag.count(), mainRole.count()]); + if (mainCount > 0) return mainTag; + if (roleCount > 0) return mainRole; + } catch { + // fall through to body + } + return page.locator('body'); +} + async function getSnapshot(page, opts = {}) { if (opts.noSnapshot) return null; try { const root = opts.snapshotSelector ? resolveSelector(page, opts.snapshotSelector) - : page.locator('body'); + : opts.snapshotFull + ? page.locator('body') + : await detectMainContent(page); const raw = await root.ariaSnapshot(); let result = raw; if (opts.snapshotDepth) result = trimByDepth(result, opts.snapshotDepth); @@ -734,13 +750,22 @@ describe('trimByDepth', () => { // ============ getSnapshot tests ============ describe('getSnapshot', () => { - it('returns aria snapshot from body locator', async () => { + it('returns aria snapshot from body locator when no main element', async () => { const mockPage = { locator(selector) { - assert.equal(selector, 'body'); - return { - ariaSnapshot: async () => '- heading "Example" [level=1]\n- link "More"' - }; + if (selector === 'main' || selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + if (selector === 'body') { + return { + ariaSnapshot: async () => '- heading "Example" [level=1]\n- link "More"' + }; + } + return { ariaSnapshot: async () => '' }; } }; const result = await getSnapshot(mockPage); @@ -837,10 +862,19 @@ describe('getSnapshot with opts', () => { it('trims output when snapshotDepth is set', async () => { const mockPage = { locator(selector) { - assert.equal(selector, 'body'); - return { - ariaSnapshot: async () => '- navigation\n - link "Home"\n - link "About"' - }; + if (selector === 'main' || selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + if (selector === 'body') { + return { + ariaSnapshot: async () => '- navigation\n - link "Home"\n - link "About"' + }; + } + return { ariaSnapshot: async () => '' }; } }; const result = await getSnapshot(mockPage, { snapshotDepth: 1 }); @@ -869,13 +903,22 @@ describe('getSnapshot with opts', () => { assert.ok(!result.includes('link "Item"')); }); - it('preserves default behavior with empty opts', async () => { + it('falls back to body with empty opts when no main element', async () => { const mockPage = { locator(selector) { - assert.equal(selector, 'body'); - return { - ariaSnapshot: async () => '- heading "Title"' - }; + if (selector === 'main' || selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + if (selector === 'body') { + return { + ariaSnapshot: async () => '- heading "Title"' + }; + } + return { ariaSnapshot: async () => '' }; } }; const result = await getSnapshot(mockPage, {}); @@ -1199,6 +1242,13 @@ describe('getSnapshot pipeline', () => { function makeMockPage(snapshot) { return { locator(selector) { + if (selector === 'main' || selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } return { ariaSnapshot: async () => snapshot }; } }; @@ -1297,3 +1347,270 @@ describe('getSnapshot pipeline', () => { assert.ok(!result.includes('- list\n'), 'structural list should be stripped by text-only'); }); }); + +// ============ detectMainContent tests ============ + +describe('detectMainContent', () => { + it('returns main locator when
element exists', async () => { + const mainLocator = { ariaSnapshot: async () => 'main content' }; + const mockPage = { + locator(selector) { + if (selector === 'main') { + return { + first() { + return { + count: async () => 1, + ariaSnapshot: mainLocator.ariaSnapshot + }; + } + }; + } + if (selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + return { ariaSnapshot: async () => 'body content' }; + } + }; + const result = await detectMainContent(mockPage); + const snapshot = await result.ariaSnapshot(); + assert.equal(snapshot, 'main content'); + }); + + it('returns role=main locator when no
but role exists', async () => { + const roleLocator = { ariaSnapshot: async () => 'role content' }; + const mockPage = { + locator(selector) { + if (selector === 'main') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + if (selector === '[role="main"]') { + return { + first() { + return { + count: async () => 1, + ariaSnapshot: roleLocator.ariaSnapshot + }; + } + }; + } + return { ariaSnapshot: async () => 'body content' }; + } + }; + const result = await detectMainContent(mockPage); + const snapshot = await result.ariaSnapshot(); + assert.equal(snapshot, 'role content'); + }); + + it('falls back to body when neither main nor role exists', async () => { + const mockPage = { + locator(selector) { + if (selector === 'main' || selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + if (selector === 'body') { + return { ariaSnapshot: async () => 'body content' }; + } + return { ariaSnapshot: async () => '' }; + } + }; + const result = await detectMainContent(mockPage); + const snapshot = await result.ariaSnapshot(); + assert.equal(snapshot, 'body content'); + }); + + it('prioritizes
over [role="main"] when both exist', async () => { + const mockPage = { + locator(selector) { + if (selector === 'main') { + return { + first() { + return { + count: async () => 1, + ariaSnapshot: async () => 'main tag content' + }; + } + }; + } + if (selector === '[role="main"]') { + return { + first() { + return { + count: async () => 1, + ariaSnapshot: async () => 'role content' + }; + } + }; + } + return { ariaSnapshot: async () => 'body content' }; + } + }; + const result = await detectMainContent(mockPage); + const snapshot = await result.ariaSnapshot(); + assert.equal(snapshot, 'main tag content', '
should take priority over [role="main"]'); + }); + + it('falls back to body when locator.count() throws', async () => { + const mockPage = { + locator(selector) { + if (selector === 'main' || selector === '[role="main"]') { + return { + first() { + return { count: async () => { throw new Error('detached'); } }; + } + }; + } + if (selector === 'body') { + return { ariaSnapshot: async () => 'body fallback' }; + } + return { ariaSnapshot: async () => '' }; + } + }; + const result = await detectMainContent(mockPage); + const snapshot = await result.ariaSnapshot(); + assert.equal(snapshot, 'body fallback'); + }); +}); + +// ============ getSnapshot auto-scoping tests ============ + +describe('getSnapshot auto-scoping', () => { + it('auto-scopes to main when present', async () => { + let usedLocator = null; + const mockPage = { + locator(selector) { + if (selector === 'main') { + return { + first() { + const loc = { + count: async () => 1, + ariaSnapshot: async () => '- heading "Main Content"' + }; + usedLocator = 'main'; + return loc; + } + }; + } + if (selector === '[role="main"]') { + return { + first() { + return { count: async () => 0 }; + } + }; + } + return { ariaSnapshot: async () => '- heading "Full Body"' }; + } + }; + const result = await getSnapshot(mockPage); + assert.equal(usedLocator, 'main'); + assert.equal(result, '- heading "Main Content"'); + }); + + it('respects snapshotFull flag - uses body even when main exists', async () => { + const mockPage = { + locator(selector) { + if (selector === 'main') { + return { + first() { + return { + count: async () => 1, + ariaSnapshot: async () => '- heading "Main Only"' + }; + } + }; + } + if (selector === 'body') { + return { + ariaSnapshot: async () => '- heading "Full Body"' + }; + } + return { + first() { return { count: async () => 0 }; }, + ariaSnapshot: async () => '' + }; + } + }; + const result = await getSnapshot(mockPage, { snapshotFull: true }); + assert.equal(result, '- heading "Full Body"'); + }); + + it('snapshotSelector takes priority over auto-detection', async () => { + let usedSelector = null; + const mockPage = { + locator(selector) { + usedSelector = selector; + if (selector === 'main') { + return { + first() { + return { + count: async () => 1, + ariaSnapshot: async () => '- heading "Main"' + }; + } + }; + } + return { + ariaSnapshot: async () => '- heading "Custom Scope"' + }; + } + }; + const result = await getSnapshot(mockPage, { snapshotSelector: 'css=nav' }); + assert.equal(usedSelector, 'nav', 'Should use custom selector, not main detection'); + assert.equal(result, '- heading "Custom Scope"'); + }); + + it('snapshotSelector takes priority over snapshotFull', async () => { + let usedSelector = null; + const mockPage = { + locator(selector) { + usedSelector = selector; + return { + ariaSnapshot: async () => '- heading "Selector Wins"' + }; + } + }; + const result = await getSnapshot(mockPage, { snapshotSelector: '#sidebar', snapshotFull: true }); + assert.equal(usedSelector, '#sidebar', 'snapshotSelector should override snapshotFull'); + assert.equal(result, '- heading "Selector Wins"'); + }); + + it('snapshotFull works with snapshot transform flags', async () => { + const mockPage = { + locator(selector) { + if (selector === 'main') { + return { + first() { + return { count: async () => 1, ariaSnapshot: async () => 'main' }; + } + }; + } + if (selector === '[role="main"]') { + return { + first() { return { count: async () => 0 }; } + }; + } + if (selector === 'body') { + return { + ariaSnapshot: async () => '- navigation\n - link "Home"\n - link "About"' + }; + } + return { ariaSnapshot: async () => '' }; + } + }; + const result = await getSnapshot(mockPage, { snapshotFull: true, snapshotDepth: 1 }); + assert.ok(result.includes('- navigation'), 'Should include top-level node'); + assert.ok(result.includes('- ...'), 'Should truncate deeper nodes'); + assert.ok(!result.includes('link "Home"'), 'Should not include depth-2 nodes'); + }); +}); diff --git a/tests/web-ctl-actions.test.js b/tests/web-ctl-actions.test.js index 91ebb0b..21b32aa 100644 --- a/tests/web-ctl-actions.test.js +++ b/tests/web-ctl-actions.test.js @@ -201,6 +201,7 @@ describe('snapshot option flag parsing', () => { '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', + '--snapshot-full', ]); // Replicate parseOptions for unit testing @@ -308,6 +309,17 @@ describe('snapshot option flag parsing', () => { assert.equal(opts['css=nav'], undefined); }); + it('parses --snapshot-full as snapshotFull boolean', () => { + const opts = parseOptions(['--snapshot-full']); + assert.equal(opts.snapshotFull, true); + }); + + it('--snapshot-full does not consume next positional arg', () => { + const opts = parseOptions(['--snapshot-full', 'css=nav']); + assert.equal(opts.snapshotFull, true); + assert.equal(opts['css=nav'], undefined); + }); + it('combines all new snapshot flags', () => { const opts = parseOptions([ '--snapshot-depth', '3', @@ -499,6 +511,18 @@ describe('snapshot options in web-ctl source', () => { assert.ok(textOnlyIdx > collapseIdx, 'textOnly should follow collapseRepeated'); assert.ok(maxLinesIdx > textOnlyIdx, 'trimByLines should follow textOnly'); }); + + it('BOOLEAN_FLAGS includes --snapshot-full', () => { + assert.ok(webCtlSource.includes("'--snapshot-full'"), '--snapshot-full should be in BOOLEAN_FLAGS'); + }); + + it('help text contains --snapshot-full flag', () => { + assert.ok(webCtlSource.includes('--snapshot-full'), 'help should document --snapshot-full'); + }); + + it('detectMainContent function exists', () => { + assert.ok(webCtlSource.includes('async function detectMainContent(page)'), 'detectMainContent should be defined'); + }); }); describe('web-ctl navigation state persistence', () => {