koala73 · koala73 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -29,7 +29,7 @@
     "test:e2e:finance": "cross-env VITE_VARIANT=finance playwright test",
     "test:e2e:runtime": "cross-env VITE_VARIANT=full playwright test e2e/runtime-fetch.spec.ts",
     "test:e2e": "npm run test:e2e:runtime && npm run test:e2e:full && npm run test:e2e:tech && npm run test:e2e:finance",
-    "test:data": "node --test tests/*.test.mjs",
+    "test:data": "tsx --test tests/*.test.mjs tests/*.test.mts",
     "test:feeds": "node scripts/validate-rss-feeds.mjs",
     "test:sidecar": "node --test src-tauri/sidecar/local-api-server.test.mjs api/_cors.test.mjs api/youtube/embed.test.mjs api/cyber-threats.test.mjs api/usni-fleet.test.mjs scripts/ais-relay-rss.test.cjs api/loaders-xml-wms-regression.test.mjs",
     "test:e2e:visual:full": "cross-env VITE_VARIANT=full playwright test -g \"matches golden screenshots per layer and zoom\"",
@@ -64,6 +64,7 @@
     "cross-env": "^10.1.0",
     "esbuild": "^0.27.3",
     "markdownlint-cli2": "^0.20.0",
+    "tsx": "^4.21.0",
     "typescript": "^5.7.2",
     "vite": "^6.0.7",
     "vite-plugin-pwa": "^1.2.0",

diff --git a/src/app/data-loader.ts b/src/app/data-loader.ts
@@ -12,6 +12,7 @@ import {
   LAYER_TO_SOURCE,
 } from '@/config';
 import { INTEL_HOTSPOTS, CONFLICT_ZONES } from '@/config/geo';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 import {
   fetchCategoryFeeds,
   getFeedFailures,
@@ -315,15 +316,15 @@ export class DataLoaderManager implements AppModule {
   }
 
   private findFlashLocation(title: string): { lat: number; lon: number } | null {
-    const titleLower = title.toLowerCase();
+    const tokens = tokenizeForMatch(title);
     let bestMatch: { lat: number; lon: number; matches: number } | null = null;
 
     const countKeywordMatches = (keywords: string[] | undefined): number => {
       if (!keywords) return 0;
       let matches = 0;
       for (const keyword of keywords) {
         const cleaned = keyword.trim().toLowerCase();
-        if (cleaned.length >= 3 && titleLower.includes(cleaned)) {
+        if (cleaned.length >= 3 && matchKeyword(tokens, cleaned)) {
           matches++;
         }
       }

diff --git a/src/components/DeckGLMap.ts b/src/components/DeckGLMap.ts
@@ -44,6 +44,7 @@ import { ArcLayer } from '@deck.gl/layers';
 import { HeatmapLayer } from '@deck.gl/aggregation-layers';
 import type { WeatherAlert } from '@/services/weather';
 import { escapeHtml } from '@/utils/sanitize';
+import { tokenizeForMatch, matchKeyword, matchesAnyKeyword, findMatchingKeywords } from '@/utils/keyword-match';
 import { t } from '@/services/i18n';
 import { debounce, rafSchedule, getCurrentTheme } from '@/utils/index';
 import {
@@ -3796,10 +3797,9 @@ export class DeckGLMap {
     const matchCounts = new Map<string, number>();
 
     recentNews.forEach(item => {
+      const tokens = tokenizeForMatch(item.title);
       this.hotspots.forEach(hotspot => {
-        if (hotspot.keywords.some(kw =>
-          item.title.toLowerCase().includes(kw.toLowerCase())
-        )) {
+        if (matchesAnyKeyword(tokens, hotspot.keywords)) {
           breakingKeywords.add(hotspot.id);
           matchCounts.set(hotspot.id, (matchCounts.get(hotspot.id) || 0) + 1);
         }
@@ -3820,32 +3820,27 @@ export class DeckGLMap {
 
   /** Get news items related to a hotspot by keyword matching */
   private getRelatedNews(hotspot: Hotspot): NewsItem[] {
-    // High-priority conflict keywords that indicate the news is really about another topic
-    const conflictTopics = ['gaza', 'ukraine', 'russia', 'israel', 'iran', 'china', 'taiwan', 'korea', 'syria'];
+    const conflictTopics = ['gaza', 'ukraine', 'ukrainian', 'russia', 'russian', 'israel', 'israeli', 'iran', 'iranian', 'china', 'chinese', 'taiwan', 'taiwanese', 'korea', 'korean', 'syria', 'syrian'];
 
     return this.news
       .map((item) => {
-        const titleLower = item.title.toLowerCase();
-        const matchedKeywords = hotspot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
+        const tokens = tokenizeForMatch(item.title);
+        const matchedKeywords = findMatchingKeywords(tokens, hotspot.keywords);
 
         if (matchedKeywords.length === 0) return null;
 
-        // Check if this news mentions other hotspot conflict topics
         const conflictMatches = conflictTopics.filter(t =>
-          titleLower.includes(t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
+          matchKeyword(tokens, t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
         );
 
-        // If article mentions a major conflict topic that isn't this hotspot, deprioritize heavily
         if (conflictMatches.length > 0) {
-          // Only include if it ALSO has a strong local keyword (city name, agency)
           const strongLocalMatch = matchedKeywords.some(kw =>
             kw.toLowerCase() === hotspot.name.toLowerCase() ||
-            hotspot.agencies?.some(a => titleLower.includes(a.toLowerCase()))
+            hotspot.agencies?.some(a => matchKeyword(tokens, a))
           );
           if (!strongLocalMatch) return null;
         }
 
-        // Score: more keyword matches = more relevant
         const score = matchedKeywords.length;
         return { item, score };
       })

diff --git a/src/components/Map.ts b/src/components/Map.ts
@@ -42,6 +42,7 @@ import {
   CENTRAL_BANKS,
   COMMODITY_HUBS,
 } from '@/config';
+import { tokenizeForMatch, matchKeyword, findMatchingKeywords } from '@/utils/keyword-match';
 import { MapPopup } from './MapPopup';
 import {
   updateHotspotEscalation,
@@ -2740,32 +2741,27 @@ export class MapComponent {
   }
 
   private getRelatedNews(hotspot: Hotspot): NewsItem[] {
-    // High-priority conflict keywords that indicate the news is really about another topic
-    const conflictTopics = ['gaza', 'ukraine', 'russia', 'israel', 'iran', 'china', 'taiwan', 'korea', 'syria'];
+    const conflictTopics = ['gaza', 'ukraine', 'ukrainian', 'russia', 'russian', 'israel', 'israeli', 'iran', 'iranian', 'china', 'chinese', 'taiwan', 'taiwanese', 'korea', 'korean', 'syria', 'syrian'];
 
     return this.news
       .map((item) => {
-        const titleLower = item.title.toLowerCase();
-        const matchedKeywords = hotspot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
+        const tokens = tokenizeForMatch(item.title);
+        const matchedKeywords = findMatchingKeywords(tokens, hotspot.keywords);
 
         if (matchedKeywords.length === 0) return null;
 
-        // Check if this news mentions other hotspot conflict topics
         const conflictMatches = conflictTopics.filter(t =>
-          titleLower.includes(t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
+          matchKeyword(tokens, t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
         );
 
-        // If article mentions a major conflict topic that isn't this hotspot, deprioritize heavily
         if (conflictMatches.length > 0) {
-          // Only include if it ALSO has a strong local keyword (city name, agency)
           const strongLocalMatch = matchedKeywords.some(kw =>
             kw.toLowerCase() === hotspot.name.toLowerCase() ||
-            hotspot.agencies?.some(a => titleLower.includes(a.toLowerCase()))
+            hotspot.agencies?.some(a => matchKeyword(tokens, a))
           );
           if (!strongLocalMatch) return null;
         }
 
-        // Score: more keyword matches = more relevant
         const score = matchedKeywords.length;
         return { item, score };
       })
@@ -2784,8 +2780,8 @@ export class MapComponent {
       let matchedCount = 0;
 
       news.forEach((item) => {
-        const titleLower = item.title.toLowerCase();
-        const matches = spot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
+        const tokens = tokenizeForMatch(item.title);
+        const matches = spot.keywords.filter((kw) => matchKeyword(tokens, kw));
 
         if (matches.length > 0) {
           matchedCount++;

diff --git a/src/config/geo.ts b/src/config/geo.ts
@@ -81,7 +81,7 @@ export const INTEL_HOTSPOTS: Hotspot[] = [
     lat: 38.9,
     lon: -77.0,
     location: 'Washington D.C., USA',
-    keywords: ['pentagon', 'white house', 'congress', 'cia', 'nsa', 'washington', 'biden', 'trump', 'house', 'senate', 'supreme court', 'vance', 'elon', 'us '],
+    keywords: ['pentagon', 'white house', 'congress', 'cia', 'nsa', 'washington', 'biden', 'trump', 'senate', 'supreme court', 'vance', 'elon'],
     agencies: ['Pentagon', 'CIA', 'NSA', 'State Dept'],
     description: 'US government and military headquarters. Intelligence community center.',
     status: 'Monitoring',

diff --git a/src/services/country-instability.ts b/src/services/country-instability.ts
@@ -1,4 +1,5 @@
 import type { SocialUnrestEvent, MilitaryFlight, MilitaryVessel, ClusteredEvent, InternetOutage } from '@/types';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 import { INTEL_HOTSPOTS, CONFLICT_ZONES, STRATEGIC_WATERWAYS } from '@/config/geo';
 import { CURATED_COUNTRIES, DEFAULT_BASELINE_RISK, DEFAULT_EVENT_MULTIPLIER, getHotspotCountries } from '@/config/countries';
 import { focalPointDetector } from './focal-point-detector';
@@ -136,11 +137,11 @@ export function getPreviousScores(): Map<string, number> {
 export type { CountryData };
 
 function normalizeCountryName(name: string): string | null {
-  const lower = name.toLowerCase();
+  const tokens = tokenizeForMatch(name);
   for (const [code, cfg] of Object.entries(CURATED_COUNTRIES)) {
-    if (cfg.scoringKeywords.some(kw => lower.includes(kw))) return code;
+    if (cfg.scoringKeywords.some(kw => matchKeyword(tokens, kw))) return code;
   }
-  return nameToCountryCode(lower);
+  return nameToCountryCode(name.toLowerCase());
 }
 
 export function ingestProtestsForCII(events: SocialUnrestEvent[]): void {
@@ -347,16 +348,16 @@ export function ingestMilitaryForCII(flights: MilitaryFlight[], vessels: Militar
 
 export function ingestNewsForCII(events: ClusteredEvent[]): void {
   for (const e of events) {
-    const title = e.primaryTitle.toLowerCase();
+    const tokens = tokenizeForMatch(e.primaryTitle);
     const matched = new Set<string>();
 
     for (const [code, cfg] of Object.entries(CURATED_COUNTRIES)) {
-      if (cfg.scoringKeywords.some(kw => title.includes(kw))) {
+      if (cfg.scoringKeywords.some(kw => matchKeyword(tokens, kw))) {
         matched.add(code);
       }
     }
 
-    for (const code of matchCountryNamesInText(title)) {
+    for (const code of matchCountryNamesInText(e.primaryTitle.toLowerCase())) {
       matched.add(code);
     }
 

diff --git a/src/services/geo-hub-index.ts b/src/services/geo-hub-index.ts
@@ -1,3 +1,4 @@
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 // Geopolitical Hub Index - aggregates news by strategic locations
 
 export interface GeoHubLocation {
@@ -109,22 +110,13 @@ export interface GeoHubMatch {
 export function inferGeoHubsFromTitle(title: string): GeoHubMatch[] {
   const index = buildGeoHubIndex();
   const matches: GeoHubMatch[] = [];
-  const titleLower = title.toLowerCase();
+  const tokens = tokenizeForMatch(title);
   const seenHubs = new Set<string>();
 
   for (const [keyword, hubIds] of index.byKeyword) {
     if (keyword.length < 2) continue;
 
-    // Word boundary check for short keywords to avoid false positives
-    const regex = keyword.length < 5
-      ? new RegExp(`\\b${keyword}\\b`, 'i')
-      : null;
-
-    const found = regex
-      ? regex.test(titleLower)
-      : titleLower.includes(keyword);
-
-    if (found) {
+    if (matchKeyword(tokens, keyword)) {
       for (const hubId of hubIds) {
         if (seenHubs.has(hubId)) continue;
         seenHubs.add(hubId);

diff --git a/src/services/related-assets.ts b/src/services/related-assets.ts
@@ -1,4 +1,5 @@
 import type { ClusteredEvent, RelatedAsset, AssetType, RelatedAssetContext } from '@/types';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 import { t } from '@/services/i18n';
 import {
   INTEL_HOTSPOTS,
@@ -27,24 +28,20 @@ interface AssetOrigin {
   label: string;
 }
 
-function toTitleLower(titles: string[]): string[] {
-  return titles.map(title => title.toLowerCase());
-}
-
 function detectAssetTypes(titles: string[]): AssetType[] {
-  const normalized = toTitleLower(titles);
+  const tokenized = titles.map(t => tokenizeForMatch(t));
   const types = Object.entries(ASSET_KEYWORDS)
     .filter(([, keywords]) =>
-      normalized.some(title => keywords.some(keyword => title.includes(keyword)))
+      tokenized.some(tokens => keywords.some(keyword => matchKeyword(tokens, keyword)))
     )
     .map(([type]) => type as AssetType);
   return types;
 }
 
 function countKeywordMatches(titles: string[], keywords: string[]): number {
-  const normalized = toTitleLower(titles);
+  const tokenized = titles.map(t => tokenizeForMatch(t));
   return keywords.reduce((count, keyword) => {
-    return count + normalized.filter(title => title.includes(keyword)).length;
+    return count + tokenized.filter(tokens => matchKeyword(tokens, keyword)).length;
   }, 0);
 }
 

diff --git a/src/services/story-data.ts b/src/services/story-data.ts
@@ -2,6 +2,7 @@ import { calculateCII, type CountryScore } from './country-instability';
 import type { ClusteredEvent } from '@/types';
 import type { ThreatLevel } from './threat-classifier';
 import { CURATED_COUNTRIES } from '@/config/countries';
+import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
 
 export interface StoryData {
   countryCode: string;
@@ -65,8 +66,8 @@ export function collectStoryData(
 
   const keywords = CURATED_COUNTRIES[countryCode]?.scoringKeywords || [countryName.toLowerCase()];
   const countryNews = allNews.filter(e => {
-    const lower = e.primaryTitle.toLowerCase();
-    return keywords.some(kw => lower.includes(kw));
+    const tokens = tokenizeForMatch(e.primaryTitle);
+    return keywords.some(kw => matchKeyword(tokens, kw));
   });
 
   const sortedNews = [...countryNews].sort((a, b) => {
@@ -82,8 +83,8 @@ export function collectStoryData(
   ) || null;
 
   const countryMarkets = predictionMarkets.filter(m => {
-    const lower = m.title.toLowerCase();
-    return keywords.some(kw => lower.includes(kw));
+    const mTokens = tokenizeForMatch(m.title);
+    return keywords.some(kw => matchKeyword(mTokens, kw));
   });
 
   const threatCounts = { critical: 0, high: 0, medium: 0, categories: new Set<string>() };