Jackson57279 · Jackson57279 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025 · coderabbitai
diff --git a/src/lib/search-service.ts b/src/lib/search-service.ts
@@ -316,12 +316,29 @@ export class BraveSearchService {
   }
 
   private extractTextContent(html: string): string {
-    return html
-      .replace(/<script[^>]*>.*?<\/script>/gi, '')
-      .replace(/<style[^>]*>.*?<\/style>/gi, '')
-      .replace(/<[^>]*>/g, ' ')
-      .replace(/\s+/g, ' ')
-      .trim();
+    if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') {
+      const parser = new window.DOMParser();
+      const doc = parser.parseFromString(html, 'text/html');
-    if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') {
-      const parser = new window.DOMParser();
-      const doc = parser.parseFromString(html, 'text/html');
+    if (typeof DOMParser !== 'undefined') {
+      const parser = new DOMParser();
+      const doc = parser.parseFromString(html, 'text/html');
-    if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') {
-      const parser = new window.DOMParser();
-      const doc = parser.parseFromString(html, 'text/html');
+    if (typeof DOMParser !== 'undefined') {
+      const parser = new DOMParser();
+      const doc = parser.parseFromString(html, 'text/html');
+      return doc.body.textContent?.replace(/\s+/g, ' ').trim() || '';
+    } else {
-    if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') {
-      const parser = new window.DOMParser();
-      const doc = parser.parseFromString(html, 'text/html');
-      return doc.body.textContent?.replace(/\s+/g, ' ').trim() || '';
-    } else {
+    if (typeof DOMParser !== 'undefined') {
+      const parser = new DOMParser();
+      const doc = parser.parseFromString(html, 'text/html');
+      if (doc.body) {
+        doc.body.querySelectorAll('script,style,noscript').forEach((el) => el.remove());
+      }
+      return doc.body?.textContent?.replace(/\s+/g, ' ').trim() || '';
+    } else {
+      // …existing fallback logic…
-    if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') {
-      const parser = new window.DOMParser();
-      const doc = parser.parseFromString(html, 'text/html');
-      return doc.body.textContent?.replace(/\s+/g, ' ').trim() || '';
-    } else {
+    if (typeof DOMParser !== 'undefined') {
+      const parser = new DOMParser();
+      const doc = parser.parseFromString(html, 'text/html');
+      if (doc.body) {
+        doc.body.querySelectorAll('script,style,noscript').forEach((el) => el.remove());
+      }
+      return doc.body?.textContent?.replace(/\s+/g, ' ').trim() || '';
+    } else {
+      // …existing fallback logic…
+      // Fallback: improved regex-based approach (repeat until no matches)
+      let sanitized = html;
+      let previous;
+      // Remove <script>...</script> blocks repeatedly
+      do {
+        previous = sanitized;
+        sanitized = sanitized.replace(/<script[\s\S]*?>[\s\S]*?<\/script[\s\S]*?>/gi, '');
+      } while (sanitized !== previous);
+      // Remove <style>...</style> blocks repeatedly
+      do {
+        previous = sanitized;
+        sanitized = sanitized.replace(/<style[\s\S]*?>[\s\S]*?<\/style[\s\S]*?>/gi, '');
+      } while (sanitized !== previous);
+      return sanitized
+        .replace(/<[^>]*>/g, ' ')
+        .replace(/\s+/g, ' ')
+        .trim();
+    }
   }
 }