diff --git a/src/lib/search-service.ts b/src/lib/search-service.ts index 8b5d9157..d35f27e6 100644 --- a/src/lib/search-service.ts +++ b/src/lib/search-service.ts @@ -316,12 +316,29 @@ export class BraveSearchService { } private extractTextContent(html: string): string { - return html - .replace(/]*>.*?<\/script>/gi, '') - .replace(/]*>.*?<\/style>/gi, '') - .replace(/<[^>]*>/g, ' ') - .replace(/\s+/g, ' ') - .trim(); + if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') { + const parser = new window.DOMParser(); + const doc = parser.parseFromString(html, 'text/html'); + return doc.body.textContent?.replace(/\s+/g, ' ').trim() || ''; + } else { + // Fallback: improved regex-based approach (repeat until no matches) + let sanitized = html; + let previous; + // Remove blocks repeatedly + do { + previous = sanitized; + sanitized = sanitized.replace(/[\s\S]*?<\/script[\s\S]*?>/gi, ''); + } while (sanitized !== previous); + // Remove blocks repeatedly + do { + previous = sanitized; + sanitized = sanitized.replace(/[\s\S]*?<\/style[\s\S]*?>/gi, ''); + } while (sanitized !== previous); + return sanitized + .replace(/<[^>]*>/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + } } }