From 0820e511f08b0be930f10de560a612d478701efd Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-7-215.ap-southeast-2.compute.internal>
Date: Tue, 10 Feb 2026 01:03:23 +0000
Subject: [PATCH] Fix Typesense query limit violations and filter syntax issues

- dashboard-content.html: Fix fragment lookup to use page_url filter directly
  instead of keyword search, ensuring correct fragments display for each page
- dashboard-platform.html: Reduce per_page from 1000 to 250 (Typesense max),
  fix array filter syntax (site_hierarchy:X not site_hierarchy:=X), change
  deduplication from title-based to page_url-based
- dashboard-topic.html: Reduce per_page from 500 to 250
- fragments-visual.html: Reduce per_page from 500 to 250
- dashboard-unified.html: Fix filter syntax escaping for life_events filter

These fixes resolve 422 "Unprocessable Entity" errors caused by exceeding
Typesense's 250 per_page maximum limit.

Co-Authored-By: Claude (global.anthropic.claude-opus-4-5-20251101-v1:0) <noreply@anthropic.com>
---
 dashboard-content.html  | 59 +++++++++++-----------------
 dashboard-platform.html | 86 ++++++++++++++++++++++++++++-------------
 dashboard-topic.html    |  4 +-
 dashboard-unified.html  |  2 +-
 fragments-visual.html   |  2 +-
 5 files changed, 87 insertions(+), 66 deletions(-)

diff --git a/dashboard-content.html b/dashboard-content.html
index 1d460e5..1698704 100644
--- a/dashboard-content.html
+++ b/dashboard-content.html
@@ -525,55 +525,42 @@
             async function loadPageData() {
                 setLoading(true);
                 try {
-                    // Search for fragments related to this page - include reading_level and content_hash
-                    // If we have a URL param, filter by page_url for exact match
+                    // Get the base page URL (without hash or query params) to find fragments on this page
+                    let baseUrl = (urlParamPage || selectedPage.url || '').split('#')[0].split('?')[0];
+
+                    // Also try stripping query params more thoroughly
+                    try {
+                        const u = new URL(urlParamPage || selectedPage.url || '');
+                        baseUrl = u.origin + u.pathname;
+                    } catch (e) {}
+
+                    console.log('Loading fragments for page_url:', baseUrl);
+
+                    // Search for fragments by filtering on page_url directly
                     const fragQuery = {
                         q: '*',
-                        query_by: 'title,content_text',
+                        query_by: 'title',
                         include_fields: 'id,url,title,content_text,reading_level,content_hash,last_seen_at,page_url,page_hierarchy,hierarchy_lvl0,hierarchy_lvl1,hierarchy_lvl2,hierarchy_lvl3,component_type',
                         per_page: 100
                     };
 
-                    // Get the base page URL (without hash) to find fragments on this page
-                    const baseUrl = (urlParamPage || selectedPage.url || '').split('#')[0];
-
-                    // If we have a base URL, search title for keywords from URL path
+                    // Filter by page_url if we have a base URL
                     if (baseUrl) {
-                        try {
-                            const urlPath = new URL(baseUrl).pathname;
-                            const keywords = urlPath.split('/').filter(p => p && p.length > 3).slice(-2).join(' ');
-                            if (keywords) {
-                                fragQuery.q = keywords;
-                            }
-                        } catch (e) {}
-                    } else if (selectedPage.title) {
-                        fragQuery.q = selectedPage.title;
+                        fragQuery.filter_by = `page_url:=\`${baseUrl}\``;
                     }
 
-                    const fragData = await tsSearch('content_fragments', fragQuery);
+                    let fragData = await tsSearch('content_fragments', fragQuery);
+                    console.log('Fragments found with page_url filter:', fragData.found);
 
-                    // Helper to get URL path without query params or hash
-                    function getUrlPath(url) {
-                        try {
-                            const u = new URL(url);
-                            return u.origin + u.pathname;
-                        } catch {
-                            return url.split('?')[0].split('#')[0];
-                        }
+                    // If no results with exact match, try without trailing slash or with it
+                    if ((!fragData.hits || fragData.hits.length === 0) && baseUrl) {
+                        const altUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl + '/';
+                        fragQuery.filter_by = `page_url:=\`${altUrl}\``;
+                        fragData = await tsSearch('content_fragments', fragQuery);
+                        console.log('Fragments found with alt URL:', fragData.found);
                     }
 
-                    // Filter client-side to match fragments from this specific page URL
-                    // Must strip both hash AND query params for comparison
-                    const baseUrlPath = getUrlPath(baseUrl);
                     const fragmentList = (fragData.hits || [])
-                        .filter(hit => {
-                            if (!baseUrl) return true;
-                            const fragUrlPath = getUrlPath(hit.document.url || '');
-                            // Match if URL paths match or one contains the other
-                            return fragUrlPath === baseUrlPath ||
-                                   fragUrlPath.startsWith(baseUrlPath) ||
-                                   baseUrlPath.startsWith(fragUrlPath);
-                        })
                         .slice(0, 100)
                         .map((hit, i) => ({
                             id: hit.document.id || i,
diff --git a/dashboard-platform.html b/dashboard-platform.html
index a10dc95..51cc4cf 100644
--- a/dashboard-platform.html
+++ b/dashboard-platform.html
@@ -409,6 +409,8 @@
             });
             const [alerts, setAlerts] = useState([]);
             const [searchQuery, setSearchQuery] = useState('');
+            const [hideDuplicates, setHideDuplicates] = useState(true);
+            const [allFragments, setAllFragments] = useState([]); // Store all fragments before dedup
             const chartRef = useRef(null);
             const chartInstance = useRef(null);
 
@@ -494,56 +496,77 @@
                     // Get page URL paths for matching
                     const pageUrlPaths = pageUrls.map(u => getUrlPath(u));
 
-                    // Fetch fragments without sorting (to get mix of all platforms), then filter and sort client-side
+                    // Fetch fragments - search for actual content, not boilerplate alerts
                     console.log('Loading fragments for platform:', selectedPlatform);
 
+                    // Boilerplate titles to filter out
+                    const boilerplateTitles = [
+                        'important information', 'we have multiple system updates',
+                        'skip to main content', 'navigation', 'menu', 'footer',
+                        'search', 'sign in', 'log in', 'breadcrumb'
+                    ];
+                    const isBoilerplate = (title) => {
+                        const t = (title || '').toLowerCase().trim();
+                        return boilerplateTitles.some(bp => t.includes(bp)) || t.length < 3;
+                    };
+
+                    // Use site_hierarchy filter to efficiently get fragments from this host
+                    // Combined with component_type filter to exclude alerts
+                    console.log('Fetching fragments for platform:', selectedPlatform);
+
                     const fragData = await tsSearch('content_fragments', {
                         q: '*',
                         query_by: 'title,content_text',
-                        include_fields: 'id,title,url,reading_level,content_text',
+                        include_fields: 'id,title,url,reading_level,content_text,component_type,page_url',
+                        filter_by: `site_hierarchy:${selectedPlatform} && component_type:[content,form,table,card]`,
                         per_page: 250
                     });
-                    console.log('Fetched fragments:', fragData.found, 'total,', fragData.hits?.length, 'returned');
+                    console.log('Fetched fragments:', fragData.found, 'total for', selectedPlatform, ',', fragData.hits?.length, 'returned');
 
-                    // Filter to fragments matching this platform URL
+                    // Filter out boilerplate titles
                     let platformFragments = (fragData.hits || [])
-                        .filter(hit => (hit.document.url || '').includes(selectedPlatform));
-                    console.log('After platform filter:', platformFragments.length, 'matches for', selectedPlatform);
+                        .filter(hit => !isBoilerplate(hit.document.title));
+                    console.log('After boilerplate filter:', platformFragments.length, 'matches');
 
-                    // If no results in default order, try with last_seen_at:desc (gets different fragments)
+                    // If no results with component filter, try without it
                     if (platformFragments.length === 0) {
-                        console.log('No matches, trying with last_seen_at sort...');
+                        console.log('No matches, trying without component_type filter...');
                         const fragData2 = await tsSearch('content_fragments', {
                             q: '*',
                             query_by: 'title,content_text',
-                            include_fields: 'id,title,url,reading_level,content_text',
-                            sort_by: 'last_seen_at:desc',
+                            include_fields: 'id,title,url,reading_level,content_text,component_type,page_url',
+                            filter_by: `site_hierarchy:${selectedPlatform}`,
                             per_page: 250
                         });
                         platformFragments = (fragData2.hits || [])
-                            .filter(hit => (hit.document.url || '').includes(selectedPlatform));
+                            .filter(hit => !isBoilerplate(hit.document.title));
                         console.log('After fallback search:', platformFragments.length, 'matches');
                     }
 
-                    // Map to objects, deduplicate by title, and sort by reading level
-                    const seenTitles = new Set();
-                    platformFragments = platformFragments
+                    // Map to objects and sort by reading level
+                    const mappedFragments = platformFragments
                         .map(hit => ({
                             id: hit.document.id,
                             title: hit.document.title || 'Untitled',
                             url: hit.document.url,
+                            page_url: hit.document.page_url || hit.document.url?.split('#')[0],
                             reading_level: hit.document.reading_level || 8,
                             preview: (hit.document.content_text || '').slice(0, 100)
                         }))
-                        .filter(f => {
-                            // Deduplicate by title (keep first occurrence)
-                            const titleKey = f.title.toLowerCase().trim();
-                            if (seenTitles.has(titleKey)) return false;
-                            seenTitles.add(titleKey);
-                            return true;
-                        })
                         .sort((a, b) => b.reading_level - a.reading_level);
 
+                    // Store all fragments for toggle
+                    setAllFragments(mappedFragments);
+
+                    // Deduplicate by page_url to get one representative fragment per page
+                    const seenPages = new Set();
+                    platformFragments = mappedFragments.filter(f => {
+                        const pageKey = f.page_url || f.url;
+                        if (seenPages.has(pageKey)) return false;
+                        seenPages.add(pageKey);
+                        return true;
+                    });
+
                     // Calculate reading level metrics from actual fragments
                     const readingLevels = platformFragments.map(f => f.reading_level).filter(r => r > 0);
                     const totalFragments = platformFragments.length;
@@ -816,18 +839,29 @@ <h1>Platform Dashboard</h1>
                                         <div className="card" style={{gridColumn: 'span 2'}}>
                                             <div className="card-header">
                                                 <span className="card-title">Content by Readability (Hardest First)</span>
-                                                <span style={{fontSize: '0.75rem', color: '#71767b'}}>
-                                                    {metrics.fragments} fragments total • Click to view page
-                                                </span>
+                                                <div style={{display: 'flex', alignItems: 'center', gap: '1rem'}}>
+                                                    <label style={{fontSize: '0.75rem', color: '#71767b', display: 'flex', alignItems: 'center', gap: '0.5rem', cursor: 'pointer'}}>
+                                                        <input
+                                                            type="checkbox"
+                                                            checked={hideDuplicates}
+                                                            onChange={(e) => setHideDuplicates(e.target.checked)}
+                                                            style={{cursor: 'pointer'}}
+                                                        />
+                                                        Hide duplicates
+                                                    </label>
+                                                    <span style={{fontSize: '0.75rem', color: '#71767b'}}>
+                                                        {hideDuplicates ? metrics.fragments : allFragments.length} fragments {hideDuplicates ? '(unique)' : '(all)'}
+                                                    </span>
+                                                </div>
                                             </div>
                                             <div className="card-body">
-                                                {unhealthyContent.length === 0 ? (
+                                                {(hideDuplicates ? unhealthyContent : allFragments.slice(0, 10)).length === 0 ? (
                                                     <div style={{textAlign: 'center', padding: '2rem', color: '#71767b'}}>
                                                         No content found for this platform
                                                     </div>
                                                 ) : (
                                                     <ul className="topic-list">
-                                                        {unhealthyContent.map(content => (
+                                                        {(hideDuplicates ? unhealthyContent : allFragments.slice(0, 10)).map(content => (
                                                             <li key={content.id} className="topic-item"
                                                                 onClick={() => window.location.href = `dashboard-content.html?page_url=${encodeURIComponent((content.url || '').split('#')[0])}`}
                                                                 style={{cursor: 'pointer'}}>
diff --git a/dashboard-topic.html b/dashboard-topic.html
index 0b0e10c..a3c0952 100644
--- a/dashboard-topic.html
+++ b/dashboard-topic.html
@@ -637,12 +637,12 @@
                     if (pageUrls.length > 0 && fragmentList.length < 10) {
                         const pageUrlPaths = pageUrls.map(u => getUrlPath(u));
 
-                        // Fetch more fragments for URL matching
+                        // Fetch more fragments for URL matching (Typesense max is 250)
                         const moreFragData = await tsSearch('content_fragments', {
                             q: '*',
                             query_by: 'title,content_text',
                             include_fields: 'id,url,title,content_text,reading_level,component_type',
-                            per_page: 500
+                            per_page: 250
                         });
 
                         const urlMatchedFragments = (moreFragData.hits || [])
diff --git a/dashboard-unified.html b/dashboard-unified.html
index b647e3d..db7e908 100644
--- a/dashboard-unified.html
+++ b/dashboard-unified.html
@@ -455,7 +455,7 @@
                         const topicPages = await tsSearch('content_pages', {
                             q: '*',
                             query_by: 'title',
-                            filter_by: `life_events:=${topicFilter}`,
+                            filter_by: `life_events:=\`${topicFilter}\``,
                             include_fields: 'url',
                             per_page: 250
                         });
diff --git a/fragments-visual.html b/fragments-visual.html
index 1a714a0..87df484 100644
--- a/fragments-visual.html
+++ b/fragments-visual.html
@@ -366,7 +366,7 @@ <h3>Facets</h3>
       
       try {
         // Fetch pages to get life event associations
-        const pageResponse = await fetch(`${API}/api/pages/search?life_event=${focusEvent || ''}&per_page=500`);
+        const pageResponse = await fetch(`${API}/api/pages/search?life_event=${focusEvent || ''}&per_page=250`);
         if (pageResponse.ok) {
           const pageData = await pageResponse.json();
           pageData.results?.forEach(page => {