@@ -40,22 +40,10 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
4040 const extractCleanMarkdown = ( element : Element ) : string => {
4141 const lines : string [ ] = [ ] ;
4242
43- const processNode = ( node : Node , depth : number = 0 ) : void => {
44- if ( node . nodeType === Node . TEXT_NODE ) {
45- const text = node . textContent ?. trim ( ) ;
46- if ( text ) {
47- lines . push ( text ) ;
48- }
49- return ;
50- }
51-
52- if ( node . nodeType !== Node . ELEMENT_NODE ) return ;
53-
54- const el = node as Element ;
43+ // Helper to check if element should be skipped
44+ const shouldSkip = ( el : Element ) : boolean => {
5545 const tagName = el . tagName . toLowerCase ( ) ;
56-
57- // Skip unwanted elements
58- if (
46+ return (
5947 el . classList . contains ( 'sl-banner' ) ||
6048 el . classList . contains ( 'copy-page-dropdown' ) ||
6149 el . classList . contains ( 'pagination-links' ) ||
@@ -76,18 +64,147 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
7664 tagName === 'textarea' ||
7765 tagName === 'svg' ||
7866 el . getAttribute ( 'aria-hidden' ) === 'true' ||
79- // Skip "Section titled" links and "Edit page" links
8067 ( tagName === 'a' && el . textContent ?. includes ( 'Section titled' ) ) ||
8168 ( tagName === 'a' && el . textContent ?. includes ( 'Edit page' ) )
82- ) {
69+ ) ;
70+ } ;
71+
72+ // Helper to extract inline content with formatting preserved
73+ const extractInlineContent = ( node : Node ) : string => {
74+ if ( node . nodeType === Node . TEXT_NODE ) {
75+ return node . textContent || '' ;
76+ }
77+
78+ if ( node . nodeType !== Node . ELEMENT_NODE ) return '' ;
79+
80+ const el = node as Element ;
81+ const tagName = el . tagName . toLowerCase ( ) ;
82+
83+ if ( shouldSkip ( el ) ) return '' ;
84+
85+ // Handle images (including those in custom elements like starlight-image-zoom)
86+ if ( tagName === 'img' ) {
87+ const alt = el . getAttribute ( 'alt' ) || '' ;
88+ const src = el . getAttribute ( 'src' ) || '' ;
89+ if ( src ) {
90+ const fullUrl = src . startsWith ( 'http' ) ? src : `https://docs.localstack.cloud${ src } ` ;
91+ return `\n\n\n\n` ;
92+ }
93+ return '' ;
94+ }
95+
96+ // Handle inline code
97+ if ( tagName === 'code' && ! el . closest ( 'pre' ) ) {
98+ return `\`${ el . textContent } \`` ;
99+ }
100+
101+ // Handle bold/strong
102+ if ( tagName === 'strong' || tagName === 'b' ) {
103+ const innerContent = Array . from ( el . childNodes ) . map ( extractInlineContent ) . join ( '' ) ;
104+ return `**${ innerContent } **` ;
105+ }
106+
107+ // Handle italic/emphasis
108+ if ( tagName === 'em' || tagName === 'i' ) {
109+ const innerContent = Array . from ( el . childNodes ) . map ( extractInlineContent ) . join ( '' ) ;
110+ return `*${ innerContent } *` ;
111+ }
112+
113+ // Handle links
114+ if ( tagName === 'a' && ! el . textContent ?. includes ( 'Section titled' ) ) {
115+ const href = el . getAttribute ( 'href' ) ;
116+ const innerContent = Array . from ( el . childNodes ) . map ( extractInlineContent ) . join ( '' ) ;
117+ if ( innerContent && href && ! href . startsWith ( '#' ) ) {
118+ // Clean URL by removing tracking parameters
119+ let cleanUrl = href . startsWith ( 'http' ) ? href : `https://docs.localstack.cloud${ href } ` ;
120+ try {
121+ const url = new URL ( cleanUrl ) ;
122+ // Remove common tracking parameters
123+ [ '__hstc' , '__hssc' , '__hsfp' , 'utm_source' , 'utm_medium' , 'utm_campaign' , 'utm_term' , 'utm_content' ] . forEach ( param => {
124+ url . searchParams . delete ( param ) ;
125+ } ) ;
126+ cleanUrl = url . toString ( ) ;
127+ } catch ( e ) {
128+ // If URL parsing fails, use the original
129+ }
130+ return `[${ innerContent } ](${ cleanUrl } )` ;
131+ }
132+ return innerContent ;
133+ }
134+
135+ // Handle line breaks
136+ if ( tagName === 'br' ) {
137+ return '\n' ;
138+ }
139+
140+ // For other inline elements, just extract children
141+ return Array . from ( el . childNodes ) . map ( extractInlineContent ) . join ( '' ) ;
142+ } ;
143+
144+ // Helper to process list items recursively
145+ const processListItem = ( li : Element , prefix : string , indentLevel : number ) : string [ ] => {
146+ const result : string [ ] = [ ] ;
147+ const indent = ' ' . repeat ( indentLevel ) ;
148+ let mainContent = '' ;
149+ let nestedLists : Element [ ] = [ ] ;
150+
151+ // Separate main content from nested lists
152+ li . childNodes . forEach ( child => {
153+ if ( child . nodeType === Node . ELEMENT_NODE ) {
154+ const el = child as Element ;
155+ const tagName = el . tagName . toLowerCase ( ) ;
156+ if ( tagName === 'ul' || tagName === 'ol' ) {
157+ nestedLists . push ( el ) ;
158+ } else {
159+ mainContent += extractInlineContent ( child ) ;
160+ }
161+ } else {
162+ mainContent += extractInlineContent ( child ) ;
163+ }
164+ } ) ;
165+
166+ // Clean up main content
167+ mainContent = mainContent . trim ( ) . replace ( / \s + / g, ' ' ) ;
168+
169+ if ( mainContent ) {
170+ result . push ( `${ indent } ${ prefix } ${ mainContent } ` ) ;
171+ }
172+
173+ // Process nested lists
174+ nestedLists . forEach ( nestedList => {
175+ const nestedTagName = nestedList . tagName . toLowerCase ( ) ;
176+ const nestedItems = nestedList . querySelectorAll ( ':scope > li' ) ;
177+ nestedItems . forEach ( ( nestedLi , idx ) => {
178+ const nestedPrefix = nestedTagName === 'ol' ? `${ idx + 1 } .` : '-' ;
179+ result . push ( ...processListItem ( nestedLi , nestedPrefix , indentLevel + 1 ) ) ;
180+ } ) ;
181+ } ) ;
182+
183+ return result ;
184+ } ;
185+
186+ const processNode = ( node : Node , depth : number = 0 ) : void => {
187+ if ( node . nodeType === Node . TEXT_NODE ) {
188+ // Only add standalone text if it has meaningful content
189+ const text = node . textContent ?. trim ( ) ;
190+ if ( text && node . parentElement ?. tagName . toLowerCase ( ) === 'main' ) {
191+ lines . push ( text ) ;
192+ }
83193 return ;
84194 }
85195
196+ if ( node . nodeType !== Node . ELEMENT_NODE ) return ;
197+
198+ const el = node as Element ;
199+ const tagName = el . tagName . toLowerCase ( ) ;
200+
201+ if ( shouldSkip ( el ) ) return ;
202+
86203 // Handle headings
87204 if ( / ^ h [ 1 - 6 ] $ / . test ( tagName ) ) {
88205 const level = parseInt ( tagName [ 1 ] ) ;
89206 const prefix = '#' . repeat ( level ) ;
90- const text = el . textContent ? .replace ( / S e c t i o n t i t l e d " [ ^ " ] * " / g, '' ) . trim ( ) ;
207+ const text = extractInlineContent ( el ) . replace ( / S e c t i o n t i t l e d " [ ^ " ] * " / g, '' ) . trim ( ) ;
91208 if ( text ) {
92209 lines . push ( '' ) ;
93210 lines . push ( `${ prefix } ${ text } ` ) ;
@@ -96,12 +213,41 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
96213 return ;
97214 }
98215
216+ // Handle images
217+ if ( tagName === 'img' ) {
218+ const alt = el . getAttribute ( 'alt' ) || '' ;
219+ const src = el . getAttribute ( 'src' ) || '' ;
220+ if ( src ) {
221+ // Make relative URLs absolute
222+ const fullUrl = src . startsWith ( 'http' ) ? src : `https://docs.localstack.cloud${ src } ` ;
223+ lines . push ( '' ) ;
224+ lines . push ( `` ) ;
225+ lines . push ( '' ) ;
226+ }
227+ return ;
228+ }
229+
230+ // Handle figure elements (which often wrap images)
231+ if ( tagName === 'figure' ) {
232+ const img = el . querySelector ( 'img' ) ;
233+ if ( img ) {
234+ const alt = img . getAttribute ( 'alt' ) || '' ;
235+ const src = img . getAttribute ( 'src' ) || '' ;
236+ if ( src ) {
237+ const fullUrl = src . startsWith ( 'http' ) ? src : `https://docs.localstack.cloud${ src } ` ;
238+ lines . push ( '' ) ;
239+ lines . push ( `` ) ;
240+ lines . push ( '' ) ;
241+ }
242+ }
243+ return ;
244+ }
245+
99246 // Handle code blocks
100247 if ( tagName === 'pre' || el . classList . contains ( 'expressive-code' ) ) {
101248 const codeEl = el . querySelector ( 'code' ) ;
102249 if ( codeEl ) {
103250 const code = codeEl . textContent ?. trim ( ) || '' ;
104- // Try to detect language from class
105251 const langClass = Array . from ( codeEl . classList ) . find ( c => c . startsWith ( 'language-' ) ) ;
106252 const lang = langClass ? langClass . replace ( 'language-' , '' ) : '' ;
107253 lines . push ( '' ) ;
@@ -113,45 +259,44 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
113259 return ;
114260 }
115261
116- // Handle inline code
262+ // Handle inline code (standalone)
117263 if ( tagName === 'code' && ! el . closest ( 'pre' ) ) {
118264 lines . push ( `\`${ el . textContent } \`` ) ;
119265 return ;
120266 }
121267
122- // Handle links
123- if ( tagName === 'a' && ! el . textContent ?. includes ( 'Section titled' ) ) {
124- const href = el . getAttribute ( 'href' ) ;
125- const text = el . textContent ?. trim ( ) ;
126- if ( text && href && ! href . startsWith ( '#' ) ) {
127- // Make relative URLs absolute
128- const fullUrl = href . startsWith ( 'http' ) ? href : `https://docs.localstack.cloud${ href } ` ;
129- lines . push ( `[${ text } ](${ fullUrl } )` ) ;
130- return ;
131- }
132- }
133-
134268 // Handle lists
135269 if ( tagName === 'ul' || tagName === 'ol' ) {
136270 lines . push ( '' ) ;
137271 const items = el . querySelectorAll ( ':scope > li' ) ;
138272 items . forEach ( ( li , idx ) => {
139273 const prefix = tagName === 'ol' ? `${ idx + 1 } .` : '-' ;
140- const text = li . textContent ?. trim ( ) ;
141- if ( text ) {
142- lines . push ( `${ prefix } ${ text } ` ) ;
143- }
274+ const listLines = processListItem ( li , prefix , 0 ) ;
275+ lines . push ( ...listLines ) ;
144276 } ) ;
145277 lines . push ( '' ) ;
146278 return ;
147279 }
148280
149281 // Handle paragraphs
150282 if ( tagName === 'p' ) {
151- const text = el . textContent ?. trim ( ) ;
152- if ( text ) {
283+ const content = extractInlineContent ( el ) . trim ( ) ;
284+ if ( content ) {
285+ lines . push ( '' ) ;
286+ lines . push ( content ) ;
287+ }
288+ return ;
289+ }
290+
291+ // Handle blockquotes
292+ if ( tagName === 'blockquote' ) {
293+ const content = extractInlineContent ( el ) . trim ( ) ;
294+ if ( content ) {
295+ lines . push ( '' ) ;
296+ content . split ( '\n' ) . forEach ( line => {
297+ lines . push ( `> ${ line } ` ) ;
298+ } ) ;
153299 lines . push ( '' ) ;
154- lines . push ( text ) ;
155300 }
156301 return ;
157302 }
@@ -162,7 +307,7 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
162307 const rows = el . querySelectorAll ( 'tr' ) ;
163308 rows . forEach ( ( row , rowIdx ) => {
164309 const cells = row . querySelectorAll ( 'th, td' ) ;
165- const cellTexts = Array . from ( cells ) . map ( cell => cell . textContent ? .trim ( ) || '' ) ;
310+ const cellTexts = Array . from ( cells ) . map ( cell => extractInlineContent ( cell ) . trim ( ) || '' ) ;
166311 lines . push ( '| ' + cellTexts . join ( ' | ' ) + ' |' ) ;
167312 if ( rowIdx === 0 ) {
168313 lines . push ( '| ' + cellTexts . map ( ( ) => '---' ) . join ( ' | ' ) + ' |' ) ;
@@ -172,7 +317,15 @@ export function CopyPageDropdown({ pageUrl, pageTitle }: CopyPageDropdownProps)
172317 return ;
173318 }
174319
175- // Recursively process children for other elements
320+ // Handle horizontal rules
321+ if ( tagName === 'hr' ) {
322+ lines . push ( '' ) ;
323+ lines . push ( '---' ) ;
324+ lines . push ( '' ) ;
325+ return ;
326+ }
327+
328+ // Recursively process children for other elements (div, section, article, etc.)
176329 el . childNodes . forEach ( child => processNode ( child , depth + 1 ) ) ;
177330 } ;
178331
0 commit comments