From b39da0fcbb5435cb902bbf1ae9d7c3bb69a8dcd6 Mon Sep 17 00:00:00 2001 From: otdoges Date: Tue, 12 Aug 2025 13:49:51 -0500 Subject: [PATCH 1/2] Potential fix for code scanning alert no. 120: Bad HTML filtering regexp Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- src/lib/search-service.ts | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/lib/search-service.ts b/src/lib/search-service.ts index 8b5d9157..6fe7b881 100644 --- a/src/lib/search-service.ts +++ b/src/lib/search-service.ts @@ -316,12 +316,19 @@ export class BraveSearchService { } private extractTextContent(html: string): string { - return html - .replace(/]*>.*?<\/script>/gi, '') - .replace(/]*>.*?<\/style>/gi, '') - .replace(/<[^>]*>/g, ' ') - .replace(/\s+/g, ' ') - .trim(); + if (typeof window !== 'undefined' && typeof window.DOMParser !== 'undefined') { + const parser = new window.DOMParser(); + const doc = parser.parseFromString(html, 'text/html'); + return doc.body.textContent?.replace(/\s+/g, ' ').trim() || ''; + } else { + // Fallback: original regex-based approach (less safe) + return html + .replace(/[\s\S]*?<\/script[\s\S]*?>/gi, '') + .replace(/[\s\S]*?<\/style[\s\S]*?>/gi, '') + .replace(/<[^>]*>/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + } } } From e2ff841ea0887281cf49ebb7fda850ce46216bdc Mon Sep 17 00:00:00 2001 From: otdoges Date: Tue, 12 Aug 2025 13:54:59 -0500 Subject: [PATCH 2/2] Potential fix for code scanning alert no. 189: Incomplete multi-character sanitization Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- src/lib/search-service.ts | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/lib/search-service.ts b/src/lib/search-service.ts index 6fe7b881..d35f27e6 100644 --- a/src/lib/search-service.ts +++ b/src/lib/search-service.ts @@ -321,10 +321,20 @@ export class BraveSearchService { const doc = parser.parseFromString(html, 'text/html'); return doc.body.textContent?.replace(/\s+/g, ' ').trim() || ''; } else { - // Fallback: original regex-based approach (less safe) - return html - .replace(/[\s\S]*?<\/script[\s\S]*?>/gi, '') - .replace(/[\s\S]*?<\/style[\s\S]*?>/gi, '') + // Fallback: improved regex-based approach (repeat until no matches) + let sanitized = html; + let previous; + // Remove blocks repeatedly + do { + previous = sanitized; + sanitized = sanitized.replace(/[\s\S]*?<\/script[\s\S]*?>/gi, ''); + } while (sanitized !== previous); + // Remove blocks repeatedly + do { + previous = sanitized; + sanitized = sanitized.replace(/[\s\S]*?<\/style[\s\S]*?>/gi, ''); + } while (sanitized !== previous); + return sanitized .replace(/<[^>]*>/g, ' ') .replace(/\s+/g, ' ') .trim();