Skip to content

Commit 9c787ec

Browse files
authored
fix copy markup widget (#443)
1 parent 2e32f0d commit 9c787ec

File tree

1 file changed

+194
-41
lines changed

1 file changed

+194
-41
lines changed

src/components/CopyPageDropdown.tsx

Lines changed: 194 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,10 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
4040
const extractCleanMarkdown = (element: Element): string => {
4141
const lines: string[] = [];
4242

43-
const processNode = (node: Node, depth: number = 0): void => {
44-
if (node.nodeType === Node.TEXT_NODE) {
45-
const text = node.textContent?.trim();
46-
if (text) {
47-
lines.push(text);
48-
}
49-
return;
50-
}
51-
52-
if (node.nodeType !== Node.ELEMENT_NODE) return;
53-
54-
const el = node as Element;
43+
// Helper to check if element should be skipped
44+
const shouldSkip = (el: Element): boolean => {
5545
const tagName = el.tagName.toLowerCase();
56-
57-
// Skip unwanted elements
58-
if (
46+
return (
5947
el.classList.contains('sl-banner') ||
6048
el.classList.contains('copy-page-dropdown') ||
6149
el.classList.contains('pagination-links') ||
@@ -76,18 +64,147 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
7664
tagName === 'textarea' ||
7765
tagName === 'svg' ||
7866
el.getAttribute('aria-hidden') === 'true' ||
79-
// Skip "Section titled" links and "Edit page" links
8067
(tagName === 'a' && el.textContent?.includes('Section titled')) ||
8168
(tagName === 'a' && el.textContent?.includes('Edit page'))
82-
) {
69+
);
70+
};
71+
72+
// Helper to extract inline content with formatting preserved
73+
const extractInlineContent = (node: Node): string => {
74+
if (node.nodeType === Node.TEXT_NODE) {
75+
return node.textContent || '';
76+
}
77+
78+
if (node.nodeType !== Node.ELEMENT_NODE) return '';
79+
80+
const el = node as Element;
81+
const tagName = el.tagName.toLowerCase();
82+
83+
if (shouldSkip(el)) return '';
84+
85+
// Handle images (including those in custom elements like starlight-image-zoom)
86+
if (tagName === 'img') {
87+
const alt = el.getAttribute('alt') || '';
88+
const src = el.getAttribute('src') || '';
89+
if (src) {
90+
const fullUrl = src.startsWith('http') ? src : `https://docs.localstack.cloud${src}`;
91+
return `\n\n![${alt}](${fullUrl})\n\n`;
92+
}
93+
return '';
94+
}
95+
96+
// Handle inline code
97+
if (tagName === 'code' && !el.closest('pre')) {
98+
return `\`${el.textContent}\``;
99+
}
100+
101+
// Handle bold/strong
102+
if (tagName === 'strong' || tagName === 'b') {
103+
const innerContent = Array.from(el.childNodes).map(extractInlineContent).join('');
104+
return `**${innerContent}**`;
105+
}
106+
107+
// Handle italic/emphasis
108+
if (tagName === 'em' || tagName === 'i') {
109+
const innerContent = Array.from(el.childNodes).map(extractInlineContent).join('');
110+
return `*${innerContent}*`;
111+
}
112+
113+
// Handle links
114+
if (tagName === 'a' && !el.textContent?.includes('Section titled')) {
115+
const href = el.getAttribute('href');
116+
const innerContent = Array.from(el.childNodes).map(extractInlineContent).join('');
117+
if (innerContent && href && !href.startsWith('#')) {
118+
// Clean URL by removing tracking parameters
119+
let cleanUrl = href.startsWith('http') ? href : `https://docs.localstack.cloud${href}`;
120+
try {
121+
const url = new URL(cleanUrl);
122+
// Remove common tracking parameters
123+
['__hstc', '__hssc', '__hsfp', 'utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content'].forEach(param => {
124+
url.searchParams.delete(param);
125+
});
126+
cleanUrl = url.toString();
127+
} catch (e) {
128+
// If URL parsing fails, use the original
129+
}
130+
return `[${innerContent}](${cleanUrl})`;
131+
}
132+
return innerContent;
133+
}
134+
135+
// Handle line breaks
136+
if (tagName === 'br') {
137+
return '\n';
138+
}
139+
140+
// For other inline elements, just extract children
141+
return Array.from(el.childNodes).map(extractInlineContent).join('');
142+
};
143+
144+
// Helper to process list items recursively
145+
const processListItem = (li: Element, prefix: string, indentLevel: number): string[] => {
146+
const result: string[] = [];
147+
const indent = ' '.repeat(indentLevel);
148+
let mainContent = '';
149+
let nestedLists: Element[] = [];
150+
151+
// Separate main content from nested lists
152+
li.childNodes.forEach(child => {
153+
if (child.nodeType === Node.ELEMENT_NODE) {
154+
const el = child as Element;
155+
const tagName = el.tagName.toLowerCase();
156+
if (tagName === 'ul' || tagName === 'ol') {
157+
nestedLists.push(el);
158+
} else {
159+
mainContent += extractInlineContent(child);
160+
}
161+
} else {
162+
mainContent += extractInlineContent(child);
163+
}
164+
});
165+
166+
// Clean up main content
167+
mainContent = mainContent.trim().replace(/\s+/g, ' ');
168+
169+
if (mainContent) {
170+
result.push(`${indent}${prefix} ${mainContent}`);
171+
}
172+
173+
// Process nested lists
174+
nestedLists.forEach(nestedList => {
175+
const nestedTagName = nestedList.tagName.toLowerCase();
176+
const nestedItems = nestedList.querySelectorAll(':scope > li');
177+
nestedItems.forEach((nestedLi, idx) => {
178+
const nestedPrefix = nestedTagName === 'ol' ? `${idx + 1}.` : '-';
179+
result.push(...processListItem(nestedLi, nestedPrefix, indentLevel + 1));
180+
});
181+
});
182+
183+
return result;
184+
};
185+
186+
const processNode = (node: Node, depth: number = 0): void => {
187+
if (node.nodeType === Node.TEXT_NODE) {
188+
// Only add standalone text if it has meaningful content
189+
const text = node.textContent?.trim();
190+
if (text && node.parentElement?.tagName.toLowerCase() === 'main') {
191+
lines.push(text);
192+
}
83193
return;
84194
}
85195

196+
if (node.nodeType !== Node.ELEMENT_NODE) return;
197+
198+
const el = node as Element;
199+
const tagName = el.tagName.toLowerCase();
200+
201+
if (shouldSkip(el)) return;
202+
86203
// Handle headings
87204
if (/^h[1-6]$/.test(tagName)) {
88205
const level = parseInt(tagName[1]);
89206
const prefix = '#'.repeat(level);
90-
const text = el.textContent?.replace(/Section titled "[^"]*"/g, '').trim();
207+
const text = extractInlineContent(el).replace(/Section titled "[^"]*"/g, '').trim();
91208
if (text) {
92209
lines.push('');
93210
lines.push(`${prefix} ${text}`);
@@ -96,12 +213,41 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
96213
return;
97214
}
98215

216+
// Handle images
217+
if (tagName === 'img') {
218+
const alt = el.getAttribute('alt') || '';
219+
const src = el.getAttribute('src') || '';
220+
if (src) {
221+
// Make relative URLs absolute
222+
const fullUrl = src.startsWith('http') ? src : `https://docs.localstack.cloud${src}`;
223+
lines.push('');
224+
lines.push(`![${alt}](${fullUrl})`);
225+
lines.push('');
226+
}
227+
return;
228+
}
229+
230+
// Handle figure elements (which often wrap images)
231+
if (tagName === 'figure') {
232+
const img = el.querySelector('img');
233+
if (img) {
234+
const alt = img.getAttribute('alt') || '';
235+
const src = img.getAttribute('src') || '';
236+
if (src) {
237+
const fullUrl = src.startsWith('http') ? src : `https://docs.localstack.cloud${src}`;
238+
lines.push('');
239+
lines.push(`![${alt}](${fullUrl})`);
240+
lines.push('');
241+
}
242+
}
243+
return;
244+
}
245+
99246
// Handle code blocks
100247
if (tagName === 'pre' || el.classList.contains('expressive-code')) {
101248
const codeEl = el.querySelector('code');
102249
if (codeEl) {
103250
const code = codeEl.textContent?.trim() || '';
104-
// Try to detect language from class
105251
const langClass = Array.from(codeEl.classList).find(c => c.startsWith('language-'));
106252
const lang = langClass ? langClass.replace('language-', '') : '';
107253
lines.push('');
@@ -113,45 +259,44 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
113259
return;
114260
}
115261

116-
// Handle inline code
262+
// Handle inline code (standalone)
117263
if (tagName === 'code' && !el.closest('pre')) {
118264
lines.push(`\`${el.textContent}\``);
119265
return;
120266
}
121267

122-
// Handle links
123-
if (tagName === 'a' && !el.textContent?.includes('Section titled')) {
124-
const href = el.getAttribute('href');
125-
const text = el.textContent?.trim();
126-
if (text && href && !href.startsWith('#')) {
127-
// Make relative URLs absolute
128-
const fullUrl = href.startsWith('http') ? href : `https://docs.localstack.cloud${href}`;
129-
lines.push(`[${text}](${fullUrl})`);
130-
return;
131-
}
132-
}
133-
134268
// Handle lists
135269
if (tagName === 'ul' || tagName === 'ol') {
136270
lines.push('');
137271
const items = el.querySelectorAll(':scope > li');
138272
items.forEach((li, idx) => {
139273
const prefix = tagName === 'ol' ? `${idx + 1}.` : '-';
140-
const text = li.textContent?.trim();
141-
if (text) {
142-
lines.push(`${prefix} ${text}`);
143-
}
274+
const listLines = processListItem(li, prefix, 0);
275+
lines.push(...listLines);
144276
});
145277
lines.push('');
146278
return;
147279
}
148280

149281
// Handle paragraphs
150282
if (tagName === 'p') {
151-
const text = el.textContent?.trim();
152-
if (text) {
283+
const content = extractInlineContent(el).trim();
284+
if (content) {
285+
lines.push('');
286+
lines.push(content);
287+
}
288+
return;
289+
}
290+
291+
// Handle blockquotes
292+
if (tagName === 'blockquote') {
293+
const content = extractInlineContent(el).trim();
294+
if (content) {
295+
lines.push('');
296+
content.split('\n').forEach(line => {
297+
lines.push(`> ${line}`);
298+
});
153299
lines.push('');
154-
lines.push(text);
155300
}
156301
return;
157302
}
@@ -162,7 +307,7 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
162307
const rows = el.querySelectorAll('tr');
163308
rows.forEach((row, rowIdx) => {
164309
const cells = row.querySelectorAll('th, td');
165-
const cellTexts = Array.from(cells).map(cell => cell.textContent?.trim() || '');
310+
const cellTexts = Array.from(cells).map(cell => extractInlineContent(cell).trim() || '');
166311
lines.push('| ' + cellTexts.join(' | ') + ' |');
167312
if (rowIdx === 0) {
168313
lines.push('| ' + cellTexts.map(() => '---').join(' | ') + ' |');
@@ -172,7 +317,15 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
172317
return;
173318
}
174319

175-
// Recursively process children for other elements
320+
// Handle horizontal rules
321+
if (tagName === 'hr') {
322+
lines.push('');
323+
lines.push('---');
324+
lines.push('');
325+
return;
326+
}
327+
328+
// Recursively process children for other elements (div, section, article, etc.)
176329
el.childNodes.forEach(child => processNode(child, depth + 1));
177330
};
178331

0 commit comments

Comments
 (0)