diff --git a/dashboard-content.html b/dashboard-content.html index 1d460e5..1698704 100644 --- a/dashboard-content.html +++ b/dashboard-content.html @@ -525,55 +525,42 @@ async function loadPageData() { setLoading(true); try { - // Search for fragments related to this page - include reading_level and content_hash - // If we have a URL param, filter by page_url for exact match + // Get the base page URL (without hash or query params) to find fragments on this page + let baseUrl = (urlParamPage || selectedPage.url || '').split('#')[0].split('?')[0]; + + // Also try stripping query params more thoroughly + try { + const u = new URL(urlParamPage || selectedPage.url || ''); + baseUrl = u.origin + u.pathname; + } catch (e) {} + + console.log('Loading fragments for page_url:', baseUrl); + + // Search for fragments by filtering on page_url directly const fragQuery = { q: '*', - query_by: 'title,content_text', + query_by: 'title', include_fields: 'id,url,title,content_text,reading_level,content_hash,last_seen_at,page_url,page_hierarchy,hierarchy_lvl0,hierarchy_lvl1,hierarchy_lvl2,hierarchy_lvl3,component_type', per_page: 100 }; - // Get the base page URL (without hash) to find fragments on this page - const baseUrl = (urlParamPage || selectedPage.url || '').split('#')[0]; - - // If we have a base URL, search title for keywords from URL path + // Filter by page_url if we have a base URL if (baseUrl) { - try { - const urlPath = new URL(baseUrl).pathname; - const keywords = urlPath.split('/').filter(p => p && p.length > 3).slice(-2).join(' '); - if (keywords) { - fragQuery.q = keywords; - } - } catch (e) {} - } else if (selectedPage.title) { - fragQuery.q = selectedPage.title; + fragQuery.filter_by = `page_url:=\`${baseUrl}\``; } - const fragData = await tsSearch('content_fragments', fragQuery); + let fragData = await tsSearch('content_fragments', fragQuery); + console.log('Fragments found with page_url filter:', fragData.found); - // Helper to get URL path without query params or hash - function getUrlPath(url) { - try { - const u = new URL(url); - return u.origin + u.pathname; - } catch { - return url.split('?')[0].split('#')[0]; - } + // If no results with exact match, try without trailing slash or with it + if ((!fragData.hits || fragData.hits.length === 0) && baseUrl) { + const altUrl = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl + '/'; + fragQuery.filter_by = `page_url:=\`${altUrl}\``; + fragData = await tsSearch('content_fragments', fragQuery); + console.log('Fragments found with alt URL:', fragData.found); } - // Filter client-side to match fragments from this specific page URL - // Must strip both hash AND query params for comparison - const baseUrlPath = getUrlPath(baseUrl); const fragmentList = (fragData.hits || []) - .filter(hit => { - if (!baseUrl) return true; - const fragUrlPath = getUrlPath(hit.document.url || ''); - // Match if URL paths match or one contains the other - return fragUrlPath === baseUrlPath || - fragUrlPath.startsWith(baseUrlPath) || - baseUrlPath.startsWith(fragUrlPath); - }) .slice(0, 100) .map((hit, i) => ({ id: hit.document.id || i, diff --git a/dashboard-platform.html b/dashboard-platform.html index a10dc95..51cc4cf 100644 --- a/dashboard-platform.html +++ b/dashboard-platform.html @@ -409,6 +409,8 @@ }); const [alerts, setAlerts] = useState([]); const [searchQuery, setSearchQuery] = useState(''); + const [hideDuplicates, setHideDuplicates] = useState(true); + const [allFragments, setAllFragments] = useState([]); // Store all fragments before dedup const chartRef = useRef(null); const chartInstance = useRef(null); @@ -494,56 +496,77 @@ // Get page URL paths for matching const pageUrlPaths = pageUrls.map(u => getUrlPath(u)); - // Fetch fragments without sorting (to get mix of all platforms), then filter and sort client-side + // Fetch fragments - search for actual content, not boilerplate alerts console.log('Loading fragments for platform:', selectedPlatform); + // Boilerplate titles to filter out + const boilerplateTitles = [ + 'important information', 'we have multiple system updates', + 'skip to main content', 'navigation', 'menu', 'footer', + 'search', 'sign in', 'log in', 'breadcrumb' + ]; + const isBoilerplate = (title) => { + const t = (title || '').toLowerCase().trim(); + return boilerplateTitles.some(bp => t.includes(bp)) || t.length < 3; + }; + + // Use site_hierarchy filter to efficiently get fragments from this host + // Combined with component_type filter to exclude alerts + console.log('Fetching fragments for platform:', selectedPlatform); + const fragData = await tsSearch('content_fragments', { q: '*', query_by: 'title,content_text', - include_fields: 'id,title,url,reading_level,content_text', + include_fields: 'id,title,url,reading_level,content_text,component_type,page_url', + filter_by: `site_hierarchy:${selectedPlatform} && component_type:[content,form,table,card]`, per_page: 250 }); - console.log('Fetched fragments:', fragData.found, 'total,', fragData.hits?.length, 'returned'); + console.log('Fetched fragments:', fragData.found, 'total for', selectedPlatform, ',', fragData.hits?.length, 'returned'); - // Filter to fragments matching this platform URL + // Filter out boilerplate titles let platformFragments = (fragData.hits || []) - .filter(hit => (hit.document.url || '').includes(selectedPlatform)); - console.log('After platform filter:', platformFragments.length, 'matches for', selectedPlatform); + .filter(hit => !isBoilerplate(hit.document.title)); + console.log('After boilerplate filter:', platformFragments.length, 'matches'); - // If no results in default order, try with last_seen_at:desc (gets different fragments) + // If no results with component filter, try without it if (platformFragments.length === 0) { - console.log('No matches, trying with last_seen_at sort...'); + console.log('No matches, trying without component_type filter...'); const fragData2 = await tsSearch('content_fragments', { q: '*', query_by: 'title,content_text', - include_fields: 'id,title,url,reading_level,content_text', - sort_by: 'last_seen_at:desc', + include_fields: 'id,title,url,reading_level,content_text,component_type,page_url', + filter_by: `site_hierarchy:${selectedPlatform}`, per_page: 250 }); platformFragments = (fragData2.hits || []) - .filter(hit => (hit.document.url || '').includes(selectedPlatform)); + .filter(hit => !isBoilerplate(hit.document.title)); console.log('After fallback search:', platformFragments.length, 'matches'); } - // Map to objects, deduplicate by title, and sort by reading level - const seenTitles = new Set(); - platformFragments = platformFragments + // Map to objects and sort by reading level + const mappedFragments = platformFragments .map(hit => ({ id: hit.document.id, title: hit.document.title || 'Untitled', url: hit.document.url, + page_url: hit.document.page_url || hit.document.url?.split('#')[0], reading_level: hit.document.reading_level || 8, preview: (hit.document.content_text || '').slice(0, 100) })) - .filter(f => { - // Deduplicate by title (keep first occurrence) - const titleKey = f.title.toLowerCase().trim(); - if (seenTitles.has(titleKey)) return false; - seenTitles.add(titleKey); - return true; - }) .sort((a, b) => b.reading_level - a.reading_level); + // Store all fragments for toggle + setAllFragments(mappedFragments); + + // Deduplicate by page_url to get one representative fragment per page + const seenPages = new Set(); + platformFragments = mappedFragments.filter(f => { + const pageKey = f.page_url || f.url; + if (seenPages.has(pageKey)) return false; + seenPages.add(pageKey); + return true; + }); + // Calculate reading level metrics from actual fragments const readingLevels = platformFragments.map(f => f.reading_level).filter(r => r > 0); const totalFragments = platformFragments.length; @@ -816,18 +839,29 @@

Platform Dashboard

Content by Readability (Hardest First) - - {metrics.fragments} fragments total • Click to view page - +
+ + + {hideDuplicates ? metrics.fragments : allFragments.length} fragments {hideDuplicates ? '(unique)' : '(all)'} + +
- {unhealthyContent.length === 0 ? ( + {(hideDuplicates ? unhealthyContent : allFragments.slice(0, 10)).length === 0 ? (
No content found for this platform
) : (