diff --git a/CHANGELOG.md b/CHANGELOG.md index c9d21bb..8adafe4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [Unreleased] ### Added +- `--snapshot-compact` flag for token-efficient LLM consumption - applies four transforms: link collapsing (merges link + /url child into `link "Title" -> /path`), heading inlining (merges heading with single link child), decorative image removal (strips img nodes with empty or single-char alt text), and duplicate URL dedup (removes second occurrence at same depth scope). Applied after `--snapshot-depth` and before `--snapshot-collapse` in the pipeline - `--snapshot-max-lines ` flag to truncate snapshot output to a maximum number of lines, with a `... (K more lines)` marker when lines are omitted - `--snapshot-collapse` flag to collapse repeated consecutive siblings of the same ARIA type - keeps first 2 with subtrees, replaces the rest with `... (K more )` markers. Works recursively on nested structures - `--snapshot-text-only` flag to strip structural container nodes (list, listitem, group, region, main, form, table, row, grid, generic, etc.) and keep only content-bearing nodes. Labeled structural nodes are preserved. Indentation is re-compressed diff --git a/README.md b/README.md index 0b15f18..a2c1fa7 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,7 @@ This eliminates the common click-snapshot-check loop that wastes agent turns on | `--snapshot-depth ` | Any action with snapshot | Limit ARIA tree depth (e.g. 3 for top 3 levels) | | `--snapshot-selector ` | Any action with snapshot | Scope snapshot to a DOM subtree | | `--snapshot-max-lines ` | Any action with snapshot | Truncate snapshot to N lines | +| `--snapshot-compact` | Any action with snapshot | Compact format: collapse links, inline headings, remove decorative images, dedup URLs | | `--snapshot-collapse` | Any action with snapshot | Collapse repeated siblings (keep first 2, summarize rest) | | `--snapshot-text-only` | Any action with snapshot | Strip structural nodes, keep content only | | `--max-field-length ` | `extract` | Max characters per field (default: 500, max: 2000) | diff --git a/commands/web-ctl.md b/commands/web-ctl.md index 18fe89f..fb88d9d 100644 --- a/commands/web-ctl.md +++ b/commands/web-ctl.md @@ -59,6 +59,7 @@ node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-depth 3 node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-selector "css=nav" node ${PLUGIN_ROOT}/scripts/web-ctl.js run click --no-snapshot node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-collapse +node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-compact node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-text-only --snapshot-max-lines 50 node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-collapse --snapshot-depth 4 diff --git a/scripts/web-ctl.js b/scripts/web-ctl.js index 5d9487e..df494d7 100755 --- a/scripts/web-ctl.js +++ b/scripts/web-ctl.js @@ -19,7 +19,7 @@ const ALLOWED_SCHEMES = /^https?:\/\//i; const BOOLEAN_FLAGS = new Set([ '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', '--auto', - '--snapshot-collapse', '--snapshot-text-only', + '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', ]); function validateSessionName(name) { @@ -94,6 +94,7 @@ function resolveSelector(page, selector) { * @param {boolean} [opts.noSnapshot] - Return null to omit snapshot entirely * @param {string} [opts.snapshotSelector] - Scope snapshot to a DOM subtree * @param {number} [opts.snapshotDepth] - Limit ARIA tree depth + * @param {boolean} [opts.snapshotCompact] - Compact format for token efficiency * @param {boolean} [opts.snapshotCollapse] - Collapse repeated siblings * @param {boolean} [opts.snapshotTextOnly] - Strip structural nodes, keep content * @param {number} [opts.snapshotMaxLines] - Truncate to N lines @@ -107,6 +108,7 @@ async function getSnapshot(page, opts = {}) { const raw = await root.ariaSnapshot(); let result = raw; if (opts.snapshotDepth) result = trimByDepth(result, opts.snapshotDepth); + if (opts.snapshotCompact) result = compactFormat(result); if (opts.snapshotCollapse) result = collapseRepeated(result); if (opts.snapshotTextOnly) result = textOnly(result); if (opts.snapshotMaxLines) result = trimByLines(result, opts.snapshotMaxLines); @@ -155,6 +157,224 @@ function trimByDepth(snapshot, maxDepth) { return result.join('\n'); } +/** + * Compact snapshot for token-efficient LLM consumption. + * Applies four transforms in sequence: + * 1. Link collapsing: merges link + child /url into a single line + * 2. Heading inlining: merges heading with single link child + * 3. Decorative image removal: strips img nodes with empty or single-char alt text + * 4. Duplicate URL dedup: removes second occurrence of the same URL at the same depth scope + * + * @param {string} snapshot - ARIA snapshot text + * @returns {string} Compacted snapshot + */ +function compactFormat(snapshot) { + if (snapshot == null) return snapshot; + if (typeof snapshot === 'string' && snapshot.startsWith('(')) return snapshot; + + let lines = snapshot.split('\n'); + + // --- Pass 1: Link collapsing --- + // Pattern: "- link "Title":" followed by child "- /url: /path" + // Collapsed to: "- link "Title" -> /path" + const linkCollapsed = []; + let i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + // Check if this is a link line with a colon suffix (has children) + const linkMatch = content.match(/^- link "([^"]+)":/); + if (linkMatch) { + const parentDepth = Math.floor(spaces / 2); + + // Collect children + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + // Find /url: child among direct children (depth === parentDepth + 1) + const urlChildIdx = children.findIndex(c => + c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (\S+)/) + ); + + if (urlChildIdx !== -1) { + const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (\S+)/); + const url = urlMatch[1]; + const otherChildren = children.filter((_, idx) => idx !== urlChildIdx); + + if (otherChildren.length === 0) { + // Simple case: link + /url only -> merge to single line + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}`); + } else { + // Link has extra children beyond /url: append -> url to parent, keep other children + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}:`); + for (const child of otherChildren) { + linkCollapsed.push(child.line); + } + } + i = j; + continue; + } + } + + linkCollapsed.push(line); + i++; + } + lines = linkCollapsed; + + // --- Pass 2: Heading inlining --- + // Pattern: heading with [level=N] and single link child -> merged + const headingInlined = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const headingMatch = content.match(/^- heading "([^"]+)" \[level=(\d+)\]:/); + if (headingMatch) { + const parentDepth = Math.floor(spaces / 2); + + // Collect direct children + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + // Check for single direct child that is a link (possibly with -> url already) + const directChildren = children.filter(c => c.depth === parentDepth + 1); + if (directChildren.length === 1) { + const childContent = directChildren[0].line.trim(); + const linkArrowMatch = childContent.match(/^- link "([^"]+)" -> (\S+)$/); + if (linkArrowMatch) { + // heading + link -> url: merge into one line + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}" -> ${linkArrowMatch[2]}`); + i = j; + continue; + } + const linkPlainMatch = childContent.match(/^- link "([^"]+)"$/); + if (linkPlainMatch) { + // heading + plain link (no url): inline + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}"`); + i = j; + continue; + } + } + } + + headingInlined.push(line); + i++; + } + lines = headingInlined; + + // --- Pass 3: Decorative image removal --- + // Remove img nodes with empty name or single-char alt text + const imagesFiltered = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const imgMatch = content.match(/^- img(?:\s+"([^"]*)")?/); + if (imgMatch) { + const altText = imgMatch[1] || ''; + if (altText.length <= 1) { + // Decorative image - skip it and its children + const parentDepth = Math.floor(spaces / 2); + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + j++; + } else { + break; + } + } + i = j; + continue; + } + } + + imagesFiltered.push(line); + i++; + } + lines = imagesFiltered; + + // --- Pass 4: Duplicate URL dedup --- + // Track seen URLs per depth scope; second occurrence removed + // Reset when depth decreases + const deduped = []; + const seenUrls = new Map(); // depth -> Set of URLs + let prevDepth = -1; + + for (i = 0; i < lines.length; i++) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const depth = Math.floor(spaces / 2); + + // When depth decreases, clear URL tracking for deeper levels + if (depth < prevDepth) { + for (const [d] of seenUrls) { + if (d > depth) seenUrls.delete(d); + } + } + prevDepth = depth; + + // Extract URL from lines with "-> url" pattern + const urlArrowMatch = line.match(/ -> (\/\S+|https?:\/\/\S+)/); + if (urlArrowMatch) { + const url = urlArrowMatch[1]; + if (!seenUrls.has(depth)) seenUrls.set(depth, new Set()); + const depthSet = seenUrls.get(depth); + if (depthSet.has(url)) { + // Duplicate - skip this line and its children + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > depth) { + j++; + } else { + break; + } + } + i = j - 1; // -1 because for loop increments + continue; + } + depthSet.add(url); + } + + deduped.push(line); + } + + return deduped.join('\n'); +} + /** * Truncate snapshot output to a maximum number of lines. * Appends a marker indicating how many lines were omitted. @@ -946,6 +1166,8 @@ Snapshot options (apply to any action that returns a snapshot): --snapshot-selector Scope snapshot to a DOM subtree --no-snapshot Omit snapshot from output entirely --snapshot-max-lines Truncate snapshot to N lines + --snapshot-compact Compact format: collapse links, inline headings, + remove decorative images, dedup URLs --snapshot-collapse Collapse repeated siblings (show first 2) --snapshot-text-only Strip structural nodes, keep content only @@ -969,6 +1191,7 @@ Examples: web-ctl run github goto "https://github.com" --snapshot-selector "css=nav" web-ctl run github click "#btn" --no-snapshot web-ctl run github snapshot --snapshot-collapse + web-ctl run github snapshot --snapshot-compact web-ctl run github snapshot --snapshot-text-only --snapshot-max-lines 50 web-ctl session end github`); } diff --git a/skills/web-browse/SKILL.md b/skills/web-browse/SKILL.md index 794a461..3b40563 100644 --- a/skills/web-browse/SKILL.md +++ b/skills/web-browse/SKILL.md @@ -384,6 +384,22 @@ node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-max-l Hard-caps the snapshot output to N lines. A marker like `... (42 more lines)` is appended when lines are omitted. Applied after all other snapshot transforms, so it acts as a final safety net. Max value: 10000. +### --snapshot-compact - Token-Efficient Compact Format + +```bash +node ${PLUGIN_ROOT}/scripts/web-ctl.js run snapshot --snapshot-compact +node ${PLUGIN_ROOT}/scripts/web-ctl.js run goto --snapshot-compact +``` + +Applies four token-saving transforms in sequence: + +1. **Link collapsing** - Merges `link "Title":` with its `/url: /path` child into `link "Title" -> /path` +2. **Heading inlining** - Merges `heading "Title" [level=N]:` with a single link child into `heading [hN] "Title" -> /path` +3. **Decorative image removal** - Strips `img` nodes with empty or single-character alt text (decorative icons, spacers) +4. **Duplicate URL dedup** - Removes the second occurrence of the same URL within the same depth scope + +Combines well with `--snapshot-collapse` and `--snapshot-text-only` for maximum reduction. Applied after `--snapshot-depth` and before `--snapshot-collapse` in the pipeline. + ### --snapshot-collapse - Collapse Repeated Siblings ```bash diff --git a/tests/get-snapshot.test.js b/tests/get-snapshot.test.js index d6ec505..eb8b20f 100644 --- a/tests/get-snapshot.test.js +++ b/tests/get-snapshot.test.js @@ -32,6 +32,7 @@ async function getSnapshot(page, opts = {}) { const raw = await root.ariaSnapshot(); let result = raw; if (opts.snapshotDepth) result = trimByDepth(result, opts.snapshotDepth); + if (opts.snapshotCompact) result = compactFormat(result); if (opts.snapshotCollapse) result = collapseRepeated(result); if (opts.snapshotTextOnly) result = textOnly(result); if (opts.snapshotMaxLines) result = trimByLines(result, opts.snapshotMaxLines); @@ -202,6 +203,442 @@ function textOnly(snapshot) { return result.join('\n'); } +// Keep this in sync with scripts/web-ctl.js. +function compactFormat(snapshot) { + if (snapshot == null) return snapshot; + if (typeof snapshot === 'string' && snapshot.startsWith('(')) return snapshot; + + let lines = snapshot.split('\n'); + + // --- Pass 1: Link collapsing --- + const linkCollapsed = []; + let i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const linkMatch = content.match(/^- link "([^"]+)":/); + if (linkMatch) { + const parentDepth = Math.floor(spaces / 2); + + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + const urlChildIdx = children.findIndex(c => + c.depth === parentDepth + 1 && c.line.trim().match(/^- \/url: (\S+)/) + ); + + if (urlChildIdx !== -1) { + const urlMatch = children[urlChildIdx].line.trim().match(/^- \/url: (\S+)/); + const url = urlMatch[1]; + const otherChildren = children.filter((_, idx) => idx !== urlChildIdx); + + if (otherChildren.length === 0) { + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}`); + } else { + linkCollapsed.push(`${' '.repeat(spaces)}- link "${linkMatch[1]}" -> ${url}:`); + for (const child of otherChildren) { + linkCollapsed.push(child.line); + } + } + i = j; + continue; + } + } + + linkCollapsed.push(line); + i++; + } + lines = linkCollapsed; + + // --- Pass 2: Heading inlining --- + const headingInlined = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const headingMatch = content.match(/^- heading "([^"]+)" \[level=(\d+)\]:/); + if (headingMatch) { + const parentDepth = Math.floor(spaces / 2); + + const children = []; + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + children.push({ index: j, line: lines[j], depth: Math.floor(cs / 2) }); + j++; + } else { + break; + } + } + + const directChildren = children.filter(c => c.depth === parentDepth + 1); + if (directChildren.length === 1) { + const childContent = directChildren[0].line.trim(); + const linkArrowMatch = childContent.match(/^- link "([^"]+)" -> (\S+)$/); + if (linkArrowMatch) { + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}" -> ${linkArrowMatch[2]}`); + i = j; + continue; + } + const linkPlainMatch = childContent.match(/^- link "([^"]+)"$/); + if (linkPlainMatch) { + headingInlined.push(`${' '.repeat(spaces)}- heading [h${headingMatch[2]}] "${headingMatch[1]}"`); + i = j; + continue; + } + } + } + + headingInlined.push(line); + i++; + } + lines = headingInlined; + + // --- Pass 3: Decorative image removal --- + const imagesFiltered = []; + i = 0; + while (i < lines.length) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const content = line.slice(spaces); + + const imgMatch = content.match(/^- img(?:\s+"([^"]*)")?/); + if (imgMatch) { + const altText = imgMatch[1] || ''; + if (altText.length <= 1) { + const parentDepth = Math.floor(spaces / 2); + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > parentDepth) { + j++; + } else { + break; + } + } + i = j; + continue; + } + } + + imagesFiltered.push(line); + i++; + } + lines = imagesFiltered; + + // --- Pass 4: Duplicate URL dedup --- + const deduped = []; + const seenUrls = new Map(); + let prevDepth = -1; + + for (i = 0; i < lines.length; i++) { + const line = lines[i]; + let spaces = 0; + while (spaces < line.length && line[spaces] === ' ') spaces++; + const depth = Math.floor(spaces / 2); + + if (depth < prevDepth) { + for (const [d] of seenUrls) { + if (d > depth) seenUrls.delete(d); + } + } + prevDepth = depth; + + const urlArrowMatch = line.match(/ -> (\/\S+|https?:\/\/\S+)/); + if (urlArrowMatch) { + const url = urlArrowMatch[1]; + if (!seenUrls.has(depth)) seenUrls.set(depth, new Set()); + const depthSet = seenUrls.get(depth); + if (depthSet.has(url)) { + let j = i + 1; + while (j < lines.length) { + let cs = 0; + while (cs < lines[j].length && lines[j][cs] === ' ') cs++; + if (Math.floor(cs / 2) > depth) { + j++; + } else { + break; + } + } + i = j - 1; + continue; + } + depthSet.add(url); + } + + deduped.push(line); + } + + return deduped.join('\n'); +} + +// ============ compactFormat tests ============ + +describe('compactFormat', () => { + it('passes through null', () => { + assert.equal(compactFormat(null), null); + }); + + it('passes through undefined', () => { + assert.equal(compactFormat(undefined), undefined); + }); + + it('passes through fallback strings starting with (', () => { + const fallback = '(accessibility tree unavailable - crashed)'; + assert.equal(compactFormat(fallback), fallback); + }); + + it('passes through empty string', () => { + assert.equal(compactFormat(''), ''); + }); + + // --- Link collapsing --- + + it('collapses link with /url child into single line', () => { + const input = [ + '- link "Home":', + ' - /url: /home' + ].join('\n'); + assert.equal(compactFormat(input), '- link "Home" -> /home'); + }); + + it('preserves link without /url child', () => { + const input = '- link "Home"'; + assert.equal(compactFormat(input), input); + }); + + it('keeps extra children when link has /url plus others', () => { + const input = [ + '- link "Dashboard":', + ' - /url: /dash', + ' - img "icon"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Dashboard" -> /dash:')); + assert.ok(result.includes(' - img "icon"')); + assert.ok(!result.includes('/url:')); + }); + + it('collapses nested link inside a list', () => { + const input = [ + '- list:', + ' - listitem:', + ' - link "About":', + ' - /url: /about' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes(' - link "About" -> /about')); + assert.ok(!result.includes('/url:')); + }); + + // --- Heading inlining --- + + it('inlines heading with single link child', () => { + const input = [ + '- heading "Getting Started" [level=2]:', + ' - link "Getting Started" -> /docs/start' + ].join('\n'); + const result = compactFormat(input); + assert.equal(result, '- heading [h2] "Getting Started" -> /docs/start'); + }); + + it('preserves heading with multiple children', () => { + const input = [ + '- heading "Title" [level=1]:', + ' - link "Link 1"', + ' - link "Link 2"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- heading "Title" [level=1]:')); + assert.ok(result.includes(' - link "Link 1"')); + assert.ok(result.includes(' - link "Link 2"')); + }); + + it('preserves heading without level attribute', () => { + const input = [ + '- heading "Title":', + ' - link "Click"' + ].join('\n'); + // No [level=N] means the regex won't match, so heading stays as-is + assert.equal(compactFormat(input), input); + }); + + it('inlines heading with plain link child (no URL)', () => { + const input = [ + '- heading "Section" [level=3]:', + ' - link "Section"' + ].join('\n'); + const result = compactFormat(input); + assert.equal(result, '- heading [h3] "Section"'); + }); + + // --- Decorative image removal --- + + it('removes img with empty name', () => { + const input = [ + '- heading "Title"', + '- img', + '- link "More"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(!result.includes('- img')); + assert.ok(result.includes('- heading "Title"')); + assert.ok(result.includes('- link "More"')); + }); + + it('removes img with single-char alt text', () => { + const input = [ + '- img "x"', + '- paragraph "Content"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(!result.includes('img')); + assert.ok(result.includes('paragraph "Content"')); + }); + + it('preserves img with meaningful alt text', () => { + const input = '- img "Product screenshot"'; + assert.equal(compactFormat(input), input); + }); + + it('removes decorative img and its children', () => { + const input = [ + '- img "":', + ' - text "caption"', + '- link "Next"' + ].join('\n'); + const result = compactFormat(input); + assert.ok(!result.includes('img')); + assert.ok(!result.includes('caption')); + assert.ok(result.includes('- link "Next"')); + }); + + // --- Duplicate URL dedup --- + + it('removes second occurrence of same URL at same depth', () => { + const input = [ + '- link "Home" -> /home', + '- link "Home Again" -> /home' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(!result.includes('Home Again')); + }); + + it('keeps same URL at different depths', () => { + const input = [ + '- link "Home" -> /home', + '- list:', + ' - link "Home" -> /home' + ].join('\n'); + const result = compactFormat(input); + const homeCount = (result.match(/-> \/home/g) || []).length; + assert.equal(homeCount, 2); + }); + + it('resets dedup tracking when depth decreases', () => { + const input = [ + '- navigation:', + ' - link "About" -> /about', + '- main:', + ' - link "About" -> /about' + ].join('\n'); + const result = compactFormat(input); + const aboutCount = (result.match(/-> \/about/g) || []).length; + assert.equal(aboutCount, 2, 'URL should appear twice since depth scope reset between nav and main'); + }); + + // --- Combination test --- + + it('applies all transforms on realistic page snippet', () => { + const input = [ + '- navigation "Main":', + ' - link "Home":', + ' - /url: /home', + ' - link "About":', + ' - /url: /about', + ' - img ""', + '- main:', + ' - heading "Welcome" [level=1]:', + ' - link "Welcome" -> /home', + ' - img "x"', + ' - link "About" -> /about', + ' - img "Team photo"', + ' - paragraph "Hello world"' + ].join('\n'); + const result = compactFormat(input); + // Links collapsed + assert.ok(result.includes(' - link "Home" -> /home')); + assert.ok(result.includes(' - link "About" -> /about')); + assert.ok(!result.includes('/url:')); + // Heading inlined (but /home is duplicate at depth 1 from nav, so heading gets deduped) + // Actually heading is at depth 1, nav links are at depth 1 too, so /home is a dup at depth 1 + // The heading inline fires first (pass 2), then dedup (pass 4) removes the dup + assert.ok(!result.includes('img ""'), 'empty alt img removed'); + assert.ok(!result.includes('img "x"'), 'single char alt img removed'); + assert.ok(result.includes('img "Team photo"'), 'meaningful alt preserved'); + assert.ok(result.includes('paragraph "Hello world"')); + }); + + // --- Edge cases --- + + it('handles blank lines in input', () => { + const input = [ + '- link "Home":', + ' - /url: /home', + '', + '- link "About":', + ' - /url: /about' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(result.includes('- link "About" -> /about')); + }); + + it('link collapse feeds into heading inline', () => { + // Pass 1 collapses link, Pass 2 inlines heading with the collapsed link + const input = [ + '- heading "Docs" [level=2]:', + ' - link "Docs":', + ' - /url: /docs' + ].join('\n'); + const result = compactFormat(input); + assert.equal(result, '- heading [h2] "Docs" -> /docs'); + }); + + it('deduplicates URLs produced by link collapsing', () => { + const input = [ + '- link "Home":', + ' - /url: /home', + '- link "Home link":', + ' - /url: /home' + ].join('\n'); + const result = compactFormat(input); + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(!result.includes('Home link'), 'duplicate URL removed after collapse'); + }); +}); + // ============ trimByDepth tests ============ describe('trimByDepth', () => { @@ -808,8 +1245,29 @@ describe('getSnapshot pipeline', () => { assert.equal(lines[2], '... (3 more lines)'); }); + it('applies snapshotCompact when set', async () => { + const snapshot = [ + '- link "Home":', + ' - /url: /home', + '- img ""', + '- heading "News" [level=2]:', + ' - link "News" -> /news', + '- paragraph "Content"' + ].join('\n'); + const result = await getSnapshot(makeMockPage(snapshot), { snapshotCompact: true }); + // Links collapsed + assert.ok(result.includes('- link "Home" -> /home')); + assert.ok(!result.includes('/url:')); + // Decorative img removed + assert.ok(!result.includes('img ""')); + // Heading inlined + assert.ok(result.includes('- heading [h2] "News" -> /news')); + // Content preserved + assert.ok(result.includes('paragraph "Content"')); + }); + it('chains all options together', async () => { - // depth -> collapse -> text-only -> max-lines + // depth -> compact -> collapse -> text-only -> max-lines const snapshot = [ '- main', ' - list', diff --git a/tests/web-ctl-actions.test.js b/tests/web-ctl-actions.test.js index d6c39fd..91ebb0b 100644 --- a/tests/web-ctl-actions.test.js +++ b/tests/web-ctl-actions.test.js @@ -200,7 +200,7 @@ describe('snapshot option flag parsing', () => { const BOOLEAN_FLAGS = new Set([ '--allow-evaluate', '--no-snapshot', '--wait-stable', '--vnc', '--exact', '--accept', '--submit', '--dismiss', - '--snapshot-collapse', '--snapshot-text-only', + '--snapshot-collapse', '--snapshot-text-only', '--snapshot-compact', ]); // Replicate parseOptions for unit testing @@ -297,14 +297,27 @@ describe('snapshot option flag parsing', () => { assert.equal(opts.snapshotTextOnly, true); }); + it('parses --snapshot-compact as snapshotCompact boolean', () => { + const opts = parseOptions(['--snapshot-compact']); + assert.equal(opts.snapshotCompact, true); + }); + + it('--snapshot-compact does not consume next positional arg', () => { + const opts = parseOptions(['--snapshot-compact', 'css=nav']); + assert.equal(opts.snapshotCompact, true); + assert.equal(opts['css=nav'], undefined); + }); + it('combines all new snapshot flags', () => { const opts = parseOptions([ '--snapshot-depth', '3', + '--snapshot-compact', '--snapshot-collapse', '--snapshot-text-only', '--snapshot-max-lines', '100' ]); assert.equal(opts.snapshotDepth, '3'); + assert.equal(opts.snapshotCompact, true); assert.equal(opts.snapshotCollapse, true); assert.equal(opts.snapshotTextOnly, true); assert.equal(opts.snapshotMaxLines, '100'); @@ -461,14 +474,28 @@ describe('snapshot options in web-ctl source', () => { assert.ok(webCtlSource.includes("'--snapshot-text-only'"), '--snapshot-text-only should be in BOOLEAN_FLAGS'); }); - it('getSnapshot pipeline applies all four transforms in order', () => { - // Verify the pipeline: depth -> collapse -> text-only -> max-lines + it('BOOLEAN_FLAGS includes --snapshot-compact', () => { + assert.ok(webCtlSource.includes("'--snapshot-compact'"), '--snapshot-compact should be in BOOLEAN_FLAGS'); + }); + + it('compactFormat function exists', () => { + assert.ok(webCtlSource.includes('function compactFormat(snapshot)'), 'compactFormat should be defined'); + }); + + it('help text contains --snapshot-compact flag', () => { + assert.ok(webCtlSource.includes('--snapshot-compact'), 'help should document --snapshot-compact'); + }); + + it('getSnapshot pipeline applies all five transforms in order', () => { + // Verify the pipeline: depth -> compact -> collapse -> text-only -> max-lines const depthIdx = webCtlSource.indexOf('opts.snapshotDepth) result = trimByDepth'); + const compactIdx = webCtlSource.indexOf('opts.snapshotCompact) result = compactFormat'); const collapseIdx = webCtlSource.indexOf('opts.snapshotCollapse) result = collapseRepeated'); const textOnlyIdx = webCtlSource.indexOf('opts.snapshotTextOnly) result = textOnly'); const maxLinesIdx = webCtlSource.indexOf('opts.snapshotMaxLines) result = trimByLines'); assert.ok(depthIdx > 0, 'trimByDepth should be in pipeline'); - assert.ok(collapseIdx > depthIdx, 'collapseRepeated should follow trimByDepth'); + assert.ok(compactIdx > depthIdx, 'compactFormat should follow trimByDepth'); + assert.ok(collapseIdx > compactIdx, 'collapseRepeated should follow compactFormat'); assert.ok(textOnlyIdx > collapseIdx, 'textOnly should follow collapseRepeated'); assert.ok(maxLinesIdx > textOnlyIdx, 'trimByLines should follow textOnly'); });