From 19d0737d81577b7e75af56da0dd8b681649dedf4 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Tue, 24 Feb 2026 18:58:46 +0200 Subject: [PATCH 1/6] feat(snapshot): add compactFormat function and --snapshot-compact flag Register --snapshot-compact as a boolean flag and implement the compactFormat() transform with four sub-passes: link collapsing, heading inlining, decorative image removal, and duplicate URL dedup. Insert in the getSnapshot pipeline between trimByDepth and collapseRepeated. --- scripts/web-ctl.js | 223 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 222 insertions(+), 1 deletion(-) diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index 5d9487e..8271ca8 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -19,7 +19,7 @@ const ALLOWED_SCHEMES = /^https?:\/\//i; const BOOLEAN_FLAGS = new Set([ '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', '--auto', - '--snapshot-collapse', '--snapshot-text-only', + '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', ]); function validateSessionName(name) { @@ -94,6 +94,7 @@ function resolveSelector(page, selector) { * @param {boolean} [opts.noSnapshot] - Return null to omit snapshot entirely * @param {string} [opts.snapshotSelector] - Scope snapshot to a DOM subtree * @param {number} [opts.snapshotDepth] - Limit ARIA tree depth + * @param {boolean} [opts.snapshotCompact] - Compact format for token efficiency * @param {boolean} [opts.snapshotCollapse] - Collapse repeated siblings * @param {boolean} [opts.snapshotTextOnly] - Strip structural nodes, keep content * @param {number} [opts.snapshotMaxLines] - Truncate to N lines @@ -107,6 +108,7 @@ async function getSnapshot(page, opts = {}) { const raw = await root.ariaSnapshot(); let result = raw; if (opts.snapshotDepth) result = trimByDepth(result, opts.snapshotDepth); + if (opts.snapshotCompact) result = compactFormat(result); if (opts.snapshotCollapse) result = collapseRepeated(result); if (opts.snapshotTextOnly) result = textOnly(result); if (opts.snapshotMaxLines) result = trimByLines(result, opts.snapshotMaxLines); @@ -155,6 +157,225 @@ function trimByDepth(snapshot, maxDepth) { return result.join('\n'); } +/** + * Compact snapshot for token-efficient LLM consumption. + * Applies four transforms in sequence: + * 1. Link collapsing: merges link + child /url into a single line + * 2. Heading inlining: merges heading with single link child + * 3. Decorative image removal: strips img nodes with empty or single-char alt text + * 4. Duplicate URL dedup: removes second occurrence of the same URL at the same depth scope + * + * @param {string} snapshot - ARIA snapshot text + * @returns {string} Compacted snapshot + */ +function compactFormat(snapshot) { + if (snapshot == null) return snapshot; + if (typeof snapshot === 'string' && snapshot.startsWith('(')) return snapshot; + + let lines = snapshot.split('\n'); + + // --- Pass 1: Link collapsing --- + // Pattern: "- link "Title":" followed by child "- /url: /path" + // Collapsed to: "- link "Title" -> /path" + const linkCollapsed = []; + let i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + // Check if this is a link line with a colon suffix (has children) + const linkMatch = content.match(/^- link "(.+)":/); + if (linkMatch) { + const parentDepth = Math.floor(spaces / 2); + const childIndent = (parentDepth + 1) * 2; + + // Collect children + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + // Find /url: child among direct children (depth === parentDepth + 1) + const urlChildIdx = children.findIndex(c => + c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (.+)/) + ); + + if (urlChildIdx !== -1) { + const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (.+)/); + const url = urlMatch[1]; + const otherChildren = children.filter((_, idx) => idx !== urlChildIdx); + + if (otherChildren.length === 0) { + // Simple case: link + /url only -> merge to single line + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}`); + } else { + // Link has extra children beyond /url: append -> url to parent, keep other children + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}:`); + for (const child of otherChildren) { + linkCollapsed.push(child.line); + } + } + i = j; + continue; + } + } + + linkCollapsed.push(line); + i++; + } + lines = linkCollapsed; + + // --- Pass 2: Heading inlining --- + // Pattern: heading with [level=N] and single link child -> merged + const headingInlined = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const headingMatch = content.match(/^- heading "(.+)" \[level=(\d+)\]:/); + if (headingMatch) { + const parentDepth = Math.floor(spaces / 2); + + // Collect direct children + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + // Check for single direct child that is a link (possibly with -> url already) + const directChildren = children.filter(c => c.depth === parentDepth + 1); + if (directChildren.length === 1) { + const childContent = directChildren[0].line.trim(); + const linkArrowMatch = childContent.match(/^- link "(.+)" -> (.+)$/); + if (linkArrowMatch) { + // heading + link -> url: merge into one line + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}" -> ${linkArrowMatch[2]}`); + i = j; + continue; + } + const linkPlainMatch = childContent.match(/^- link "(.+)"$/); + if (linkPlainMatch) { + // heading + plain link (no url): inline + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}"`); + i = j; + continue; + } + } + } + + headingInlined.push(line); + i++; + } + lines = headingInlined; + + // --- Pass 3: Decorative image removal --- + // Remove img nodes with empty name or single-char alt text + const imagesFiltered = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const imgMatch = content.match(/^- img(?:\s+"(.*)")?/); + if (imgMatch) { + const altText = imgMatch[1] || ''; + if (altText.length <= 1) { + // Decorative image - skip it and its children + const parentDepth = Math.floor(spaces / 2); + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + j++; + } else { + break; + } + } + i = j; + continue; + } + } + + imagesFiltered.push(line); + i++; + } + lines = imagesFiltered; + + // --- Pass 4: Duplicate URL dedup --- + // Track seen URLs per depth scope; second occurrence removed + // Reset when depth decreases + const deduped = []; + const seenUrls = new Map(); // depth -> Set of URLs + let prevDepth = -1; + + for (i = 0; i < lines.length; i++) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const depth = Math.floor(spaces / 2); + + // When depth decreases, clear URL tracking for deeper levels + if (depth < prevDepth) { + for (const [d] of seenUrls) { + if (d > depth) seenUrls.delete(d); + } + } + prevDepth = depth; + + // Extract URL from lines with "-> url" pattern + const urlArrowMatch = line.match(/ -> (\/\S+|https?:\/\/\S+)/); + if (urlArrowMatch) { + const url = urlArrowMatch[1]; + if (!seenUrls.has(depth)) seenUrls.set(depth, new Set()); + const depthSet = seenUrls.get(depth); + if (depthSet.has(url)) { + // Duplicate - skip this line and its children + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > depth) { + j++; + } else { + break; + } + } + i = j - 1; // -1 because for loop increments + continue; + } + depthSet.add(url); + } + + deduped.push(line); + } + + return deduped.join('\n'); +} + /** * Truncate snapshot output to a maximum number of lines. * Appends a marker indicating how many lines were omitted. From 1b0991bd726749a7866dbeea7e6780866ec224f0 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Tue, 24 Feb 2026 18:59:08 +0200 Subject: [PATCH 2/6] docs(help): add --snapshot-compact to help text and examples --- scripts/web-ctl.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index 8271ca8..b11c7d7 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -1167,6 +1167,8 @@ Snapshot options (apply to any action that returns a snapshot): --snapshot-selector Scope snapshot to a DOM subtree --no-snapshot Omit snapshot from output entirely --snapshot-max-lines Truncate snapshot to N lines + --snapshot-compact Compact format: collapse links, inline headings, + remove decorative images, dedup URLs --snapshot-collapse Collapse repeated siblings (show first 2) --snapshot-text-only Strip structural nodes, keep content only @@ -1190,6 +1192,7 @@ Examples: web-ctl run github goto "https://github.com" --snapshot-selector "css=nav" web-ctl run github click "#btn" --no-snapshot web-ctl run github snapshot --snapshot-collapse + web-ctl run github snapshot --snapshot-compact web-ctl run github snapshot --snapshot-text-only --snapshot-max-lines 50 web-ctl session end github`); } From 38caf4c232ee40adf8e4eec2422f22775382d873 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Tue, 24 Feb 2026 19:00:41 +0200 Subject: [PATCH 3/6] test(snapshot): add compactFormat unit and pipeline tests Add compactFormat() function replica to test file and comprehensive tests covering guard clauses, link collapsing, heading inlining, decorative image removal, duplicate URL dedup, combination transforms, and getSnapshot pipeline integration with snapshotCompact option. --- tests/get-snapshot.test.js | 422 ++++++++++++++++++++++++++++++++++++- 1 file changed, 421 insertions(+), 1 deletion(-) diff --git a/tests/get-snapshot.test.js b/tests/get-snapshot.test.js index d6ec505..564d403 100644 --- a/tests/get-snapshot.test.js +++ b/tests/get-snapshot.test.js @@ -32,6 +32,7 @@ async function getSnapshot(page, opts = {}) { const raw = await root.ariaSnapshot(); let result = raw; if (opts.snapshotDepth) result = trimByDepth(result, opts.snapshotDepth); + if (opts.snapshotCompact) result = compactFormat(result); if (opts.snapshotCollapse) result = collapseRepeated(result); if (opts.snapshotTextOnly) result = textOnly(result); if (opts.snapshotMaxLines) result = trimByLines(result, opts.snapshotMaxLines); @@ -202,6 +203,404 @@ function textOnly(snapshot) { return result.join('\n'); } +// Keep this in sync with scripts/web-ctl.js. +function compactFormat(snapshot) { + if (snapshot == null) return snapshot; + if (typeof snapshot === 'string' && snapshot.startsWith('(')) return snapshot; + + let lines = snapshot.split('\n'); + + // --- Pass 1: Link collapsing --- + const linkCollapsed = []; + let i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const linkMatch = content.match(/^- link "(.+)":/); + if (linkMatch) { + const parentDepth = Math.floor(spaces / 2); + + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + const urlChildIdx = children.findIndex(c => + c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (.+)/) + ); + + if (urlChildIdx !== -1) { + const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (.+)/); + const url = urlMatch[1]; + const otherChildren = children.filter((_, idx) => idx !== urlChildIdx); + + if (otherChildren.length === 0) { + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}`); + } else { + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}:`); + for (const child of otherChildren) { + linkCollapsed.push(child.line); + } + } + i = j; + continue; + } + } + + linkCollapsed.push(line); + i++; + } + lines = linkCollapsed; + + // --- Pass 2: Heading inlining --- + const headingInlined = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const headingMatch = content.match(/^- heading "(.+)" \[level=(\d+)\]:/); + if (headingMatch) { + const parentDepth = Math.floor(spaces / 2); + + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + const directChildren = children.filter(c => c.depth === parentDepth + 1); + if (directChildren.length === 1) { + const childContent = directChildren[0].line.trim(); + const linkArrowMatch = childContent.match(/^- link "(.+)" -> (.+)$/); + if (linkArrowMatch) { + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}" -> ${linkArrowMatch[2]}`); + i = j; + continue; + } + const linkPlainMatch = childContent.match(/^- link "(.+)"$/); + if (linkPlainMatch) { + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}"`); + i = j; + continue; + } + } + } + + headingInlined.push(line); + i++; + } + lines = headingInlined; + + // --- Pass 3: Decorative image removal --- + const imagesFiltered = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const imgMatch = content.match(/^- img(?:\s+"(.*)")?/); + if (imgMatch) { + const altText = imgMatch[1] || ''; + if (altText.length <= 1) { + const parentDepth = Math.floor(spaces / 2); + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + j++; + } else { + break; + } + } + i = j; + continue; + } + } + + imagesFiltered.push(line); + i++; + } + lines = imagesFiltered; + + // --- Pass 4: Duplicate URL dedup --- + const deduped = []; + const seenUrls = new Map(); + let prevDepth = -1; + + for (i = 0; i < lines.length; i++) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const depth = Math.floor(spaces / 2); + + if (depth < prevDepth) { + for (const [d] of seenUrls) { + if (d > depth) seenUrls.delete(d); + } + } + prevDepth = depth; + + const urlArrowMatch = line.match(/ -> (\/\S+|https?:\/\/\S+)/); + if (urlArrowMatch) { + const url = urlArrowMatch[1]; + if (!seenUrls.has(depth)) seenUrls.set(depth, new Set()); + const depthSet = seenUrls.get(depth); + if (depthSet.has(url)) { + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > depth) { + j++; + } else { + break; + } + } + i = j - 1; + continue; + } + depthSet.add(url); + } + + deduped.push(line); + } + + return deduped.join('\n'); +} + +// ============ compactFormat tests ============ + +describe('compactFormat', () => { + it('passes through null', () => { + assert.equal(compactFormat(null), null); + }); + + it('passes through undefined', () => { + assert.equal(compactFormat(undefined), undefined); + }); + + it('passes through fallback strings starting with (', () => { + const fallback = '(accessibility tree unavailable - crashed)'; + assert.equal(compactFormat(fallback), fallback); + }); + + it('passes through empty string', () => { + assert.equal(compactFormat(''), ''); + }); + + // --- Link collapsing --- + + it('collapses link with /url child into single line', () => { + const input = [ + '- link "Home":', + ' - /url: /home' + ].join('\n'); + assert.equal(compactFormat(input), '- link "Home" -> /home'); + }); + + it('preserves link without /url child', () => { + const input = '- link "Home"'; + assert.equal(compactFormat(input), input); + }); + + it('keeps extra children when link has /url plus others', () => { + const input = [ + '- link "Dashboard":', + ' - /url: /dash', + ' - img "icon"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Dashboard" -> /dash:')); + assert.ok(result.includes(' - img "icon"')); + assert.ok(!result.includes('/url:')); + }); + + it('collapses nested link inside a list', () => { + const input = [ + '- list:', + ' - listitem:', + ' - link "About":', + ' - /url: /about' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes(' - link "About" -> /about')); + assert.ok(!result.includes('/url:')); + }); + + // --- Heading inlining --- + + it('inlines heading with single link child', () => { + const input = [ + '- heading "Getting Started" [level=2]:', + ' - link "Getting Started" -> /docs/start' + ].join('\n'); + const result = compactFormat(input); + assert.equal(result, '- heading [h2] "Getting Started" -> /docs/start'); + }); + + it('preserves heading with multiple children', () => { + const input = [ + '- heading "Title" [level=1]:', + ' - link "Link 1"', + ' - link "Link 2"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- heading "Title" [level=1]:')); + assert.ok(result.includes(' - link "Link 1"')); + assert.ok(result.includes(' - link "Link 2"')); + }); + + it('preserves heading without level attribute', () => { + const input = [ + '- heading "Title":', + ' - link "Click"' + ].join('\n'); + // No [level=N] means the regex won't match, so heading stays as-is + assert.equal(compactFormat(input), input); + }); + + it('inlines heading with plain link child (no URL)', () => { + const input = [ + '- heading "Section" [level=3]:', + ' - link "Section"' + ].join('\n'); + const result = compactFormat(input); + assert.equal(result, '- heading [h3] "Section"'); + }); + + // --- Decorative image removal --- + + it('removes img with empty name', () => { + const input = [ + '- heading "Title"', + '- img', + '- link "More"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(!result.includes('- img')); + assert.ok(result.includes('- heading "Title"')); + assert.ok(result.includes('- link "More"')); + }); + + it('removes img with single-char alt text', () => { + const input = [ + '- img "x"', + '- paragraph "Content"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(!result.includes('img')); + assert.ok(result.includes('paragraph "Content"')); + }); + + it('preserves img with meaningful alt text', () => { + const input = '- img "Product screenshot"'; + assert.equal(compactFormat(input), input); + }); + + it('removes decorative img and its children', () => { + const input = [ + '- img "":', + ' - text "caption"', + '- link "Next"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(!result.includes('img')); + assert.ok(!result.includes('caption')); + assert.ok(result.includes('- link "Next"')); + }); + + // --- Duplicate URL dedup --- + + it('removes second occurrence of same URL at same depth', () => { + const input = [ + '- link "Home" -> /home', + '- link "Home Again" -> /home' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(!result.includes('Home Again')); + }); + + it('keeps same URL at different depths', () => { + const input = [ + '- link "Home" -> /home', + '- list:', + ' - link "Home" -> /home' + ].join('\n'); + const result = compactFormat(input); + const homeCount = (result.match(/-> \/home/g) || []).length; + assert.equal(homeCount, 2); + }); + + it('resets dedup tracking when depth decreases', () => { + const input = [ + '- navigation:', + ' - link "About" -> /about', + '- main:', + ' - link "About" -> /about' + ].join('\n'); + const result = compactFormat(input); + const aboutCount = (result.match(/-> \/about/g) || []).length; + assert.equal(aboutCount, 2, 'URL should appear twice since depth scope reset between nav and main'); + }); + + // --- Combination test --- + + it('applies all transforms on realistic page snippet', () => { + const input = [ + '- navigation "Main":', + ' - link "Home":', + ' - /url: /home', + ' - link "About":', + ' - /url: /about', + ' - img ""', + '- main:', + ' - heading "Welcome" [level=1]:', + ' - link "Welcome" -> /home', + ' - img "x"', + ' - link "About" -> /about', + ' - img "Team photo"', + ' - paragraph "Hello world"' + ].join('\n'); + const result = compactFormat(input); + // Links collapsed + assert.ok(result.includes(' - link "Home" -> /home')); + assert.ok(result.includes(' - link "About" -> /about')); + assert.ok(!result.includes('/url:')); + // Heading inlined (but /home is duplicate at depth 1 from nav, so heading gets deduped) + // Actually heading is at depth 1, nav links are at depth 1 too, so /home is a dup at depth 1 + // The heading inline fires first (pass 2), then dedup (pass 4) removes the dup + assert.ok(!result.includes('img ""'), 'empty alt img removed'); + assert.ok(!result.includes('img "x"'), 'single char alt img removed'); + assert.ok(result.includes('img "Team photo"'), 'meaningful alt preserved'); + assert.ok(result.includes('paragraph "Hello world"')); + }); +}); + // ============ trimByDepth tests ============ describe('trimByDepth', () => { @@ -808,8 +1207,29 @@ describe('getSnapshot pipeline', () => { assert.equal(lines[2], '... (3 more lines)'); }); + it('applies snapshotCompact when set', async () => { + const snapshot = [ + '- link "Home":', + ' - /url: /home', + '- img ""', + '- heading "News" [level=2]:', + ' - link "News" -> /news', + '- paragraph "Content"' + ].join('\n'); + const result = await getSnapshot(makeMockPage(snapshot), { snapshotCompact: true }); + // Links collapsed + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(!result.includes('/url:')); + // Decorative img removed + assert.ok(!result.includes('img ""')); + // Heading inlined + assert.ok(result.includes('- heading [h2] "News" -> /news')); + // Content preserved + assert.ok(result.includes('paragraph "Content"')); + }); + it('chains all options together', async () => { - // depth -> collapse -> text-only -> max-lines + // depth -> compact -> collapse -> text-only -> max-lines const snapshot = [ '- main', ' - list', From 072a0a08de381be13881823fd10a782baf48d5d8 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Tue, 24 Feb 2026 19:01:27 +0200 Subject: [PATCH 4/6] test(actions): add --snapshot-compact flag parsing and source validation tests Add --snapshot-compact to BOOLEAN_FLAGS replica, flag parsing tests, source validation for compactFormat function existence, and update pipeline order test from four to five transforms. --- tests/web-ctl-actions.test.js | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/tests/web-ctl-actions.test.js b/tests/web-ctl-actions.test.js index d6c39fd..91ebb0b 100644 --- a/tests/web-ctl-actions.test.js +++ b/tests/web-ctl-actions.test.js @@ -200,7 +200,7 @@ describe('snapshot option flag parsing', () => { const BOOLEAN_FLAGS = new Set([ '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', - '--snapshot-collapse', '--snapshot-text-only', + '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', ]); // Replicate parseOptions for unit testing @@ -297,14 +297,27 @@ describe('snapshot option flag parsing', () => { assert.equal(opts.snapshotTextOnly, true); }); + it('parses --snapshot-compact as snapshotCompact boolean', () => { + const opts = parseOptions(['--snapshot-compact']); + assert.equal(opts.snapshotCompact, true); + }); + + it('--snapshot-compact does not consume next positional arg', () => { + const opts = parseOptions(['--snapshot-compact', 'css=nav']); + assert.equal(opts.snapshotCompact, true); + assert.equal(opts['css=nav'], undefined); + }); + it('combines all new snapshot flags', () => { const opts = parseOptions([ '--snapshot-depth', '3', + '--snapshot-compact', '--snapshot-collapse', '--snapshot-text-only', '--snapshot-max-lines', '100' ]); assert.equal(opts.snapshotDepth, '3'); + assert.equal(opts.snapshotCompact, true); assert.equal(opts.snapshotCollapse, true); assert.equal(opts.snapshotTextOnly, true); assert.equal(opts.snapshotMaxLines, '100'); @@ -461,14 +474,28 @@ describe('snapshot options in web-ctl source', () => { assert.ok(webCtlSource.includes("'--snapshot-text-only'"), '--snapshot-text-only should be in BOOLEAN_FLAGS'); }); - it('getSnapshot pipeline applies all four transforms in order', () => { - // Verify the pipeline: depth -> collapse -> text-only -> max-lines + it('BOOLEAN_FLAGS includes --snapshot-compact', () => { + assert.ok(webCtlSource.includes("'--snapshot-compact'"), '--snapshot-compact should be in BOOLEAN_FLAGS'); + }); + + it('compactFormat function exists', () => { + assert.ok(webCtlSource.includes('function compactFormat(snapshot)'), 'compactFormat should be defined'); + }); + + it('help text contains --snapshot-compact flag', () => { + assert.ok(webCtlSource.includes('--snapshot-compact'), 'help should document --snapshot-compact'); + }); + + it('getSnapshot pipeline applies all five transforms in order', () => { + // Verify the pipeline: depth -> compact -> collapse -> text-only -> max-lines const depthIdx = webCtlSource.indexOf('opts.snapshotDepth) result = trimByDepth'); + const compactIdx = webCtlSource.indexOf('opts.snapshotCompact) result = compactFormat'); const collapseIdx = webCtlSource.indexOf('opts.snapshotCollapse) result = collapseRepeated'); const textOnlyIdx = webCtlSource.indexOf('opts.snapshotTextOnly) result = textOnly'); const maxLinesIdx = webCtlSource.indexOf('opts.snapshotMaxLines) result = trimByLines'); assert.ok(depthIdx > 0, 'trimByDepth should be in pipeline'); - assert.ok(collapseIdx > depthIdx, 'collapseRepeated should follow trimByDepth'); + assert.ok(compactIdx > depthIdx, 'compactFormat should follow trimByDepth'); + assert.ok(collapseIdx > compactIdx, 'collapseRepeated should follow compactFormat'); assert.ok(textOnlyIdx > collapseIdx, 'textOnly should follow collapseRepeated'); assert.ok(maxLinesIdx > textOnlyIdx, 'trimByLines should follow textOnly'); }); From a6931d10dbab3fa8aa8666592ab5fdb410c8e55e Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Tue, 24 Feb 2026 19:02:02 +0200 Subject: [PATCH 5/6] docs: add --snapshot-compact to all documentation Update SKILL.md with full subsection, commands/web-ctl.md with example, README.md Common Flags table, and CHANGELOG.md with detailed entry. --- CHANGELOG.md | 1 + README.md | 1 + commands/web-ctl.md | 1 + skills/web-browse/SKILL.md | 16 ++++++++++++++++ 4 files changed, 19 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9d21bb..8adafe4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [Unreleased] ### Added +- `--snapshot-compact` flag for token-efficient LLM consumption - applies four transforms: link collapsing (merges link + /url child into `link "Title" -> /path`), heading inlining (merges heading with single link child), decorative image removal (strips img nodes with empty or single-char alt text), and duplicate URL dedup (removes second occurrence at same depth scope). Applied after `--snapshot-depth` and before `--snapshot-collapse` in the pipeline - `--snapshot-max-lines ` flag to truncate snapshot output to a maximum number of lines, with a `... (K more lines)` marker when lines are omitted - `--snapshot-collapse` flag to collapse repeated consecutive siblings of the same ARIA type - keeps first 2 with subtrees, replaces the rest with `... (K more )` markers. Works recursively on nested structures - `--snapshot-text-only` flag to strip structural container nodes (list, listitem, group, region, main, form, table, row, grid, generic, etc.) and keep only content-bearing nodes. Labeled structural nodes are preserved. Indentation is re-compressed diff --git a/README.md b/README.md index 0b15f18..a2c1fa7 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,7 @@ This eliminates the common click-snapshot-check loop that wastes agent turns on | `--snapshot-depth ` | Any action with snapshot | Limit ARIA tree depth (e.g. 3 for top 3 levels) | | `--snapshot-selector ` | Any action with snapshot | Scope snapshot to a DOM subtree | | `--snapshot-max-lines ` | Any action with snapshot | Truncate snapshot to N lines | +| `--snapshot-compact` | Any action with snapshot | Compact format: collapse links, inline headings, remove decorative images, dedup URLs | | `--snapshot-collapse` | Any action with snapshot | Collapse repeated siblings (keep first 2, summarize rest) | | `--snapshot-text-only` | Any action with snapshot | Strip structural nodes, keep content only | | `--max-field-length ` | `extract` | Max characters per field (default: 500, max: 2000) | diff --git a/commands/web-ctl.md b/commands/web-ctl.md index 18fe89f..fb88d9d 100644 --- a/commands/web-ctl.md +++ b/commands/web-ctl.md @@ -59,6 +59,7 @@ node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-depth 3 node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-selector "css=nav" node ${PLUGIN_ROOT}/scripts/web-ctl.js run click --no-snapshot node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-collapse +node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-compact node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-text-only --snapshot-max-lines 50 node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-collapse --snapshot-depth 4 diff --git a/skills/web-browse/SKILL.md b/skills/web-browse/SKILL.md index 794a461..3b40563 100644 --- a/skills/web-browse/SKILL.md +++ b/skills/web-browse/SKILL.md @@ -384,6 +384,22 @@ node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-max-l Hard-caps the snapshot output to N lines. A marker like `... (42 more lines)` is appended when lines are omitted. Applied after all other snapshot transforms, so it acts as a final safety net. Max value: 10000. +### --snapshot-compact - Token-Efficient Compact Format + +```bash +node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-compact +node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-compact +``` + +Applies four token-saving transforms in sequence: + +1. **Link collapsing** - Merges `link "Title":` with its `/url: /path` child into `link "Title" -> /path` +2. **Heading inlining** - Merges `heading "Title" [level=N]:` with a single link child into `heading [hN] "Title" -> /path` +3. **Decorative image removal** - Strips `img` nodes with empty or single-character alt text (decorative icons, spacers) +4. **Duplicate URL dedup** - Removes the second occurrence of the same URL within the same depth scope + +Combines well with `--snapshot-collapse` and `--snapshot-text-only` for maximum reduction. Applied after `--snapshot-depth` and before `--snapshot-collapse` in the pipeline. + ### --snapshot-collapse - Collapse Repeated Siblings ```bash From 0ccb9530b8c0ba964867206c2d6f077bbbb24906 Mon Sep 17 00:00:00 2001 From: Avi Fenesh Date: Tue, 24 Feb 2026 19:11:43 +0200 Subject: [PATCH 6/6] fix: harden compactFormat regex patterns and add edge case tests Replace greedy .+ with bounded [^"]+ in quoted string regexes and \S+ in URL regexes to prevent ReDoS. Remove unused childIndent variable. Add tests for blank lines, transform interaction, and dedup after collapse. --- scripts/web-ctl.js | 15 +++++------ tests/get-snapshot.test.js | 52 +++++++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index b11c7d7..df494d7 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -186,10 +186,9 @@ function compactFormat(snapshot) { const content = line.slice(spaces); // Check if this is a link line with a colon suffix (has children) - const linkMatch = content.match(/^- link "(.+)":/); + const linkMatch = content.match(/^- link "([^"]+)":/); if (linkMatch) { const parentDepth = Math.floor(spaces / 2); - const childIndent = (parentDepth + 1) * 2; // Collect children const children = []; @@ -207,11 +206,11 @@ function compactFormat(snapshot) { // Find /url: child among direct children (depth === parentDepth + 1) const urlChildIdx = children.findIndex(c => - c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (.+)/) + c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (\S+)/) ); if (urlChildIdx !== -1) { - const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (.+)/); + const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (\S+)/); const url = urlMatch[1]; const otherChildren = children.filter((_, idx) => idx !== urlChildIdx); @@ -245,7 +244,7 @@ function compactFormat(snapshot) { while (spaces < line.length && line[spaces] === ' ') spaces++; const content = line.slice(spaces); - const headingMatch = content.match(/^- heading "(.+)" \[level=(\d+)\]:/); + const headingMatch = content.match(/^- heading "([^"]+)" \[level=(\d+)\]:/); if (headingMatch) { const parentDepth = Math.floor(spaces / 2); @@ -267,14 +266,14 @@ function compactFormat(snapshot) { const directChildren = children.filter(c => c.depth === parentDepth + 1); if (directChildren.length === 1) { const childContent = directChildren[0].line.trim(); - const linkArrowMatch = childContent.match(/^- link "(.+)" -> (.+)$/); + const linkArrowMatch = childContent.match(/^- link "([^"]+)" -> (\S+)$/); if (linkArrowMatch) { // heading + link -> url: merge into one line headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}" -> ${linkArrowMatch[2]}`); i = j; continue; } - const linkPlainMatch = childContent.match(/^- link "(.+)"$/); + const linkPlainMatch = childContent.match(/^- link "([^"]+)"$/); if (linkPlainMatch) { // heading + plain link (no url): inline headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}"`); @@ -299,7 +298,7 @@ function compactFormat(snapshot) { while (spaces < line.length && line[spaces] === ' ') spaces++; const content = line.slice(spaces); - const imgMatch = content.match(/^- img(?:\s+"(.*)")?/); + const imgMatch = content.match(/^- img(?:\s+"([^"]*)")?/); if (imgMatch) { const altText = imgMatch[1] || ''; if (altText.length <= 1) { diff --git a/tests/get-snapshot.test.js b/tests/get-snapshot.test.js index 564d403..eb8b20f 100644 --- a/tests/get-snapshot.test.js +++ b/tests/get-snapshot.test.js @@ -219,7 +219,7 @@ function compactFormat(snapshot) { while (spaces < line.length && line[spaces] === ' ') spaces++; const content = line.slice(spaces); - const linkMatch = content.match(/^- link "(.+)":/); + const linkMatch = content.match(/^- link "([^"]+)":/); if (linkMatch) { const parentDepth = Math.floor(spaces / 2); @@ -237,11 +237,11 @@ function compactFormat(snapshot) { } const urlChildIdx = children.findIndex(c => - c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (.+)/) + c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (\S+)/) ); if (urlChildIdx !== -1) { - const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (.+)/); + const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (\S+)/); const url = urlMatch[1]; const otherChildren = children.filter((_, idx) => idx !== urlChildIdx); @@ -272,7 +272,7 @@ function compactFormat(snapshot) { while (spaces < line.length && line[spaces] === ' ') spaces++; const content = line.slice(spaces); - const headingMatch = content.match(/^- heading "(.+)" \[level=(\d+)\]:/); + const headingMatch = content.match(/^- heading "([^"]+)" \[level=(\d+)\]:/); if (headingMatch) { const parentDepth = Math.floor(spaces / 2); @@ -292,13 +292,13 @@ function compactFormat(snapshot) { const directChildren = children.filter(c => c.depth === parentDepth + 1); if (directChildren.length === 1) { const childContent = directChildren[0].line.trim(); - const linkArrowMatch = childContent.match(/^- link "(.+)" -> (.+)$/); + const linkArrowMatch = childContent.match(/^- link "([^"]+)" -> (\S+)$/); if (linkArrowMatch) { headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}" -> ${linkArrowMatch[2]}`); i = j; continue; } - const linkPlainMatch = childContent.match(/^- link "(.+)"$/); + const linkPlainMatch = childContent.match(/^- link "([^"]+)"$/); if (linkPlainMatch) { headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}"`); i = j; @@ -321,7 +321,7 @@ function compactFormat(snapshot) { while (spaces < line.length && line[spaces] === ' ') spaces++; const content = line.slice(spaces); - const imgMatch = content.match(/^- img(?:\s+"(.*)")?/); + const imgMatch = content.match(/^- img(?:\s+"([^"]*)")?/); if (imgMatch) { const altText = imgMatch[1] || ''; if (altText.length <= 1) { @@ -599,6 +599,44 @@ describe('compactFormat', () => { assert.ok(result.includes('img "Team photo"'), 'meaningful alt preserved'); assert.ok(result.includes('paragraph "Hello world"')); }); + + // --- Edge cases --- + + it('handles blank lines in input', () => { + const input = [ + '- link "Home":', + ' - /url: /home', + '', + '- link "About":', + ' - /url: /about' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(result.includes('- link "About" -> /about')); + }); + + it('link collapse feeds into heading inline', () => { + // Pass 1 collapses link, Pass 2 inlines heading with the collapsed link + const input = [ + '- heading "Docs" [level=2]:', + ' - link "Docs":', + ' - /url: /docs' + ].join('\n'); + const result = compactFormat(input); + assert.equal(result, '- heading [h2] "Docs" -> /docs'); + }); + + it('deduplicates URLs produced by link collapsing', () => { + const input = [ + '- link "Home":', + ' - /url: /home', + '- link "Home link":', + ' - /url: /home' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(!result.includes('Home link'), 'duplicate URL removed after collapse'); + }); }); // ============ trimByDepth tests ============