From 0820e511f08b0be930f10de560a612d478701efd Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Feb 2026 01:03:23 +0000 Subject: [PATCH] Fix Typesense query limit violations and filter syntax issues - dashboard-content.html: Fix fragment lookup to use page_url filter directly instead of keyword search, ensuring correct fragments display for each page - dashboard-platform.html: Reduce per_page from 1000 to 250 (Typesense max), fix array filter syntax (site_hierarchy:X not site_hierarchy:=X), change deduplication from title-based to page_url-based - dashboard-topic.html: Reduce per_page from 500 to 250 - fragments-visual.html: Reduce per_page from 500 to 250 - dashboard-unified.html: Fix filter syntax escaping for life_events filter These fixes resolve 422 "Unprocessable Entity" errors caused by exceeding Typesense's 250 per_page maximum limit. Co-Authored-By: Claude (global.anthropic.claude-opus-4-5-20251101-v1:0) --- dashboard-content.html | 59 +++++++++++----------------- dashboard-platform.html | 86 ++++++++++++++++++++++++++++------------- dashboard-topic.html | 4 +- dashboard-unified.html | 2 +- fragments-visual.html | 2 +- 5 files changed, 87 insertions(+), 66 deletions(-) diff --git a/dashboard-content.html b/dashboard-content.html index 1d460e5..1698704 100644 --- a/dashboard-content.html +++ b/dashboard-content.html @@ -525,55 +525,42 @@ async function loadPageData() { setLoading(true); try { - // Search for fragments related to this page - include reading_level and content_hash - // If we have a URL param, filter by page_url for exact match + // Get the base page URL (without hash or query params) to find fragments on this page + let baseUrl = (urlParamPage || selectedPage.url || '').split('#')[0].split('?')[0]; + + // Also try stripping query params more thoroughly + try { + const u = new URL(urlParamPage || selectedPage.url || ''); + baseUrl = u.origin + u.pathname; + } catch (e) {} + + console.log('Loading fragments for page_url:', baseUrl); + + // Search for fragments by filtering on page_url directly const fragQuery = { q: '*', - query_by: 'title,content_text', + query_by: 'title', include_fields: 'id,url,title,content_text,reading_level,content_hash,last_seen_at,page_url,page_hierarchy,hierarchy_lvl0,hierarchy_lvl1,hierarchy_lvl2,hierarchy_lvl3,component_type', per_page: 100 }; - // Get the base page URL (without hash) to find fragments on this page - const baseUrl = (urlParamPage || selectedPage.url || '').split('#')[0]; - - // If we have a base URL, search title for keywords from URL path + // Filter by page_url if we have a base URL if (baseUrl) { - try { - const urlPath = new URL(baseUrl).pathname; - const keywords = urlPath.split('/').filter(p => p && p.length > 3).slice(-2).join(' '); - if (keywords) { - fragQuery.q = keywords; - } - } catch (e) {} - } else if (selectedPage.title) { - fragQuery.q = selectedPage.title; + fragQuery.filter_by = `page_url:=\`${baseUrl}\``; } - const fragData = await tsSearch('content_fragments', fragQuery); + let fragData = await tsSearch('content_fragments', fragQuery); + console.log('Fragments found with page_url filter:', fragData.found); - // Helper to get URL path without query params or hash - function getUrlPath(url) { - try { - const u = new URL(url); - return u.origin + u.pathname; - } catch { - return url.split('?')[0].split('#')[0]; - } + // If no results with exact match, try without trailing slash or with it + if ((!fragData.hits || fragData.hits.length === 0) && baseUrl) { + const altUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl + '/'; + fragQuery.filter_by = `page_url:=\`${altUrl}\``; + fragData = await tsSearch('content_fragments', fragQuery); + console.log('Fragments found with alt URL:', fragData.found); } - // Filter client-side to match fragments from this specific page URL - // Must strip both hash AND query params for comparison - const baseUrlPath = getUrlPath(baseUrl); const fragmentList = (fragData.hits || []) - .filter(hit => { - if (!baseUrl) return true; - const fragUrlPath = getUrlPath(hit.document.url || ''); - // Match if URL paths match or one contains the other - return fragUrlPath === baseUrlPath || - fragUrlPath.startsWith(baseUrlPath) || - baseUrlPath.startsWith(fragUrlPath); - }) .slice(0, 100) .map((hit, i) => ({ id: hit.document.id || i, diff --git a/dashboard-platform.html b/dashboard-platform.html index a10dc95..51cc4cf 100644 --- a/dashboard-platform.html +++ b/dashboard-platform.html @@ -409,6 +409,8 @@ }); const [alerts, setAlerts] = useState([]); const [searchQuery, setSearchQuery] = useState(''); + const [hideDuplicates, setHideDuplicates] = useState(true); + const [allFragments, setAllFragments] = useState([]); // Store all fragments before dedup const chartRef = useRef(null); const chartInstance = useRef(null); @@ -494,56 +496,77 @@ // Get page URL paths for matching const pageUrlPaths = pageUrls.map(u => getUrlPath(u)); - // Fetch fragments without sorting (to get mix of all platforms), then filter and sort client-side + // Fetch fragments - search for actual content, not boilerplate alerts console.log('Loading fragments for platform:', selectedPlatform); + // Boilerplate titles to filter out + const boilerplateTitles = [ + 'important information', 'we have multiple system updates', + 'skip to main content', 'navigation', 'menu', 'footer', + 'search', 'sign in', 'log in', 'breadcrumb' + ]; + const isBoilerplate = (title) => { + const t = (title || '').toLowerCase().trim(); + return boilerplateTitles.some(bp => t.includes(bp)) || t.length < 3; + }; + + // Use site_hierarchy filter to efficiently get fragments from this host + // Combined with component_type filter to exclude alerts + console.log('Fetching fragments for platform:', selectedPlatform); + const fragData = await tsSearch('content_fragments', { q: '*', query_by: 'title,content_text', - include_fields: 'id,title,url,reading_level,content_text', + include_fields: 'id,title,url,reading_level,content_text,component_type,page_url', + filter_by: `site_hierarchy:${selectedPlatform} && component_type:[content,form,table,card]`, per_page: 250 }); - console.log('Fetched fragments:', fragData.found, 'total,', fragData.hits?.length, 'returned'); + console.log('Fetched fragments:', fragData.found, 'total for', selectedPlatform, ',', fragData.hits?.length, 'returned'); - // Filter to fragments matching this platform URL + // Filter out boilerplate titles let platformFragments = (fragData.hits || []) - .filter(hit => (hit.document.url || '').includes(selectedPlatform)); - console.log('After platform filter:', platformFragments.length, 'matches for', selectedPlatform); + .filter(hit => !isBoilerplate(hit.document.title)); + console.log('After boilerplate filter:', platformFragments.length, 'matches'); - // If no results in default order, try with last_seen_at:desc (gets different fragments) + // If no results with component filter, try without it if (platformFragments.length === 0) { - console.log('No matches, trying with last_seen_at sort...'); + console.log('No matches, trying without component_type filter...'); const fragData2 = await tsSearch('content_fragments', { q: '*', query_by: 'title,content_text', - include_fields: 'id,title,url,reading_level,content_text', - sort_by: 'last_seen_at:desc', + include_fields: 'id,title,url,reading_level,content_text,component_type,page_url', + filter_by: `site_hierarchy:${selectedPlatform}`, per_page: 250 }); platformFragments = (fragData2.hits || []) - .filter(hit => (hit.document.url || '').includes(selectedPlatform)); + .filter(hit => !isBoilerplate(hit.document.title)); console.log('After fallback search:', platformFragments.length, 'matches'); } - // Map to objects, deduplicate by title, and sort by reading level - const seenTitles = new Set(); - platformFragments = platformFragments + // Map to objects and sort by reading level + const mappedFragments = platformFragments .map(hit => ({ id: hit.document.id, title: hit.document.title || 'Untitled', url: hit.document.url, + page_url: hit.document.page_url || hit.document.url?.split('#')[0], reading_level: hit.document.reading_level || 8, preview: (hit.document.content_text || '').slice(0, 100) })) - .filter(f => { - // Deduplicate by title (keep first occurrence) - const titleKey = f.title.toLowerCase().trim(); - if (seenTitles.has(titleKey)) return false; - seenTitles.add(titleKey); - return true; - }) .sort((a, b) => b.reading_level - a.reading_level); + // Store all fragments for toggle + setAllFragments(mappedFragments); + + // Deduplicate by page_url to get one representative fragment per page + const seenPages = new Set(); + platformFragments = mappedFragments.filter(f => { + const pageKey = f.page_url || f.url; + if (seenPages.has(pageKey)) return false; + seenPages.add(pageKey); + return true; + }); + // Calculate reading level metrics from actual fragments const readingLevels = platformFragments.map(f => f.reading_level).filter(r => r > 0); const totalFragments = platformFragments.length; @@ -816,18 +839,29 @@

Platform Dashboard

Content by Readability (Hardest First) - - {metrics.fragments} fragments total • Click to view page - +
+ + + {hideDuplicates ? metrics.fragments : allFragments.length} fragments {hideDuplicates ? '(unique)' : '(all)'} + +
- {unhealthyContent.length === 0 ? ( + {(hideDuplicates ? unhealthyContent : allFragments.slice(0, 10)).length === 0 ? (
No content found for this platform
) : (
    - {unhealthyContent.map(content => ( + {(hideDuplicates ? unhealthyContent : allFragments.slice(0, 10)).map(content => (
  • window.location.href = `dashboard-content.html?page_url=${encodeURIComponent((content.url || '').split('#')[0])}`} style={{cursor: 'pointer'}}> diff --git a/dashboard-topic.html b/dashboard-topic.html index 0b0e10c..a3c0952 100644 --- a/dashboard-topic.html +++ b/dashboard-topic.html @@ -637,12 +637,12 @@ if (pageUrls.length > 0 && fragmentList.length < 10) { const pageUrlPaths = pageUrls.map(u => getUrlPath(u)); - // Fetch more fragments for URL matching + // Fetch more fragments for URL matching (Typesense max is 250) const moreFragData = await tsSearch('content_fragments', { q: '*', query_by: 'title,content_text', include_fields: 'id,url,title,content_text,reading_level,component_type', - per_page: 500 + per_page: 250 }); const urlMatchedFragments = (moreFragData.hits || []) diff --git a/dashboard-unified.html b/dashboard-unified.html index b647e3d..db7e908 100644 --- a/dashboard-unified.html +++ b/dashboard-unified.html @@ -455,7 +455,7 @@ const topicPages = await tsSearch('content_pages', { q: '*', query_by: 'title', - filter_by: `life_events:=${topicFilter}`, + filter_by: `life_events:=\`${topicFilter}\``, include_fields: 'url', per_page: 250 }); diff --git a/fragments-visual.html b/fragments-visual.html index 1a714a0..87df484 100644 --- a/fragments-visual.html +++ b/fragments-visual.html @@ -366,7 +366,7 @@

    Facets

    try { // Fetch pages to get life event associations - const pageResponse = await fetch(`${API}/api/pages/search?life_event=${focusEvent || ''}&per_page=500`); + const pageResponse = await fetch(`${API}/api/pages/search?life_event=${focusEvent || ''}&per_page=250`); if (pageResponse.ok) { const pageData = await pageResponse.json(); pageData.results?.forEach(page => {