Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"test:e2e:finance": "cross-env VITE_VARIANT=finance playwright test",
"test:e2e:runtime": "cross-env VITE_VARIANT=full playwright test e2e/runtime-fetch.spec.ts",
"test:e2e": "npm run test:e2e:runtime && npm run test:e2e:full && npm run test:e2e:tech && npm run test:e2e:finance",
"test:data": "node --test tests/*.test.mjs",
"test:data": "tsx --test tests/*.test.mjs tests/*.test.mts",
"test:feeds": "node scripts/validate-rss-feeds.mjs",
"test:sidecar": "node --test src-tauri/sidecar/local-api-server.test.mjs api/_cors.test.mjs api/youtube/embed.test.mjs api/cyber-threats.test.mjs api/usni-fleet.test.mjs scripts/ais-relay-rss.test.cjs api/loaders-xml-wms-regression.test.mjs",
"test:e2e:visual:full": "cross-env VITE_VARIANT=full playwright test -g \"matches golden screenshots per layer and zoom\"",
Expand Down Expand Up @@ -64,6 +64,7 @@
"cross-env": "^10.1.0",
"esbuild": "^0.27.3",
"markdownlint-cli2": "^0.20.0",
"tsx": "^4.21.0",
"typescript": "^5.7.2",
"vite": "^6.0.7",
"vite-plugin-pwa": "^1.2.0",
Expand Down
5 changes: 3 additions & 2 deletions src/app/data-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
LAYER_TO_SOURCE,
} from '@/config';
import { INTEL_HOTSPOTS, CONFLICT_ZONES } from '@/config/geo';
import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
import {
fetchCategoryFeeds,
getFeedFailures,
Expand Down Expand Up @@ -315,15 +316,15 @@ export class DataLoaderManager implements AppModule {
}

private findFlashLocation(title: string): { lat: number; lon: number } | null {
const titleLower = title.toLowerCase();
const tokens = tokenizeForMatch(title);
let bestMatch: { lat: number; lon: number; matches: number } | null = null;

const countKeywordMatches = (keywords: string[] | undefined): number => {
if (!keywords) return 0;
let matches = 0;
for (const keyword of keywords) {
const cleaned = keyword.trim().toLowerCase();
if (cleaned.length >= 3 && titleLower.includes(cleaned)) {
if (cleaned.length >= 3 && matchKeyword(tokens, cleaned)) {
matches++;
}
}
Expand Down
21 changes: 8 additions & 13 deletions src/components/DeckGLMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import { ArcLayer } from '@deck.gl/layers';
import { HeatmapLayer } from '@deck.gl/aggregation-layers';
import type { WeatherAlert } from '@/services/weather';
import { escapeHtml } from '@/utils/sanitize';
import { tokenizeForMatch, matchKeyword, matchesAnyKeyword, findMatchingKeywords } from '@/utils/keyword-match';
import { t } from '@/services/i18n';
import { debounce, rafSchedule, getCurrentTheme } from '@/utils/index';
import {
Expand Down Expand Up @@ -3796,10 +3797,9 @@ export class DeckGLMap {
const matchCounts = new Map<string, number>();

recentNews.forEach(item => {
const tokens = tokenizeForMatch(item.title);
this.hotspots.forEach(hotspot => {
if (hotspot.keywords.some(kw =>
item.title.toLowerCase().includes(kw.toLowerCase())
)) {
if (matchesAnyKeyword(tokens, hotspot.keywords)) {
breakingKeywords.add(hotspot.id);
matchCounts.set(hotspot.id, (matchCounts.get(hotspot.id) || 0) + 1);
}
Expand All @@ -3820,32 +3820,27 @@ export class DeckGLMap {

/** Get news items related to a hotspot by keyword matching */
private getRelatedNews(hotspot: Hotspot): NewsItem[] {
// High-priority conflict keywords that indicate the news is really about another topic
const conflictTopics = ['gaza', 'ukraine', 'russia', 'israel', 'iran', 'china', 'taiwan', 'korea', 'syria'];
const conflictTopics = ['gaza', 'ukraine', 'ukrainian', 'russia', 'russian', 'israel', 'israeli', 'iran', 'iranian', 'china', 'chinese', 'taiwan', 'taiwanese', 'korea', 'korean', 'syria', 'syrian'];

return this.news
.map((item) => {
const titleLower = item.title.toLowerCase();
const matchedKeywords = hotspot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
const tokens = tokenizeForMatch(item.title);
const matchedKeywords = findMatchingKeywords(tokens, hotspot.keywords);

if (matchedKeywords.length === 0) return null;

// Check if this news mentions other hotspot conflict topics
const conflictMatches = conflictTopics.filter(t =>
titleLower.includes(t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
matchKeyword(tokens, t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
);

// If article mentions a major conflict topic that isn't this hotspot, deprioritize heavily
if (conflictMatches.length > 0) {
// Only include if it ALSO has a strong local keyword (city name, agency)
const strongLocalMatch = matchedKeywords.some(kw =>
kw.toLowerCase() === hotspot.name.toLowerCase() ||
hotspot.agencies?.some(a => titleLower.includes(a.toLowerCase()))
hotspot.agencies?.some(a => matchKeyword(tokens, a))
);
if (!strongLocalMatch) return null;
}

// Score: more keyword matches = more relevant
const score = matchedKeywords.length;
return { item, score };
})
Expand Down
20 changes: 8 additions & 12 deletions src/components/Map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import {
CENTRAL_BANKS,
COMMODITY_HUBS,
} from '@/config';
import { tokenizeForMatch, matchKeyword, findMatchingKeywords } from '@/utils/keyword-match';
import { MapPopup } from './MapPopup';
import {
updateHotspotEscalation,
Expand Down Expand Up @@ -2740,32 +2741,27 @@ export class MapComponent {
}

private getRelatedNews(hotspot: Hotspot): NewsItem[] {
// High-priority conflict keywords that indicate the news is really about another topic
const conflictTopics = ['gaza', 'ukraine', 'russia', 'israel', 'iran', 'china', 'taiwan', 'korea', 'syria'];
const conflictTopics = ['gaza', 'ukraine', 'ukrainian', 'russia', 'russian', 'israel', 'israeli', 'iran', 'iranian', 'china', 'chinese', 'taiwan', 'taiwanese', 'korea', 'korean', 'syria', 'syrian'];

return this.news
.map((item) => {
const titleLower = item.title.toLowerCase();
const matchedKeywords = hotspot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
const tokens = tokenizeForMatch(item.title);
const matchedKeywords = findMatchingKeywords(tokens, hotspot.keywords);

if (matchedKeywords.length === 0) return null;

// Check if this news mentions other hotspot conflict topics
const conflictMatches = conflictTopics.filter(t =>
titleLower.includes(t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
matchKeyword(tokens, t) && !hotspot.keywords.some(k => k.toLowerCase().includes(t))
);

// If article mentions a major conflict topic that isn't this hotspot, deprioritize heavily
if (conflictMatches.length > 0) {
// Only include if it ALSO has a strong local keyword (city name, agency)
const strongLocalMatch = matchedKeywords.some(kw =>
kw.toLowerCase() === hotspot.name.toLowerCase() ||
hotspot.agencies?.some(a => titleLower.includes(a.toLowerCase()))
hotspot.agencies?.some(a => matchKeyword(tokens, a))
);
if (!strongLocalMatch) return null;
}

// Score: more keyword matches = more relevant
const score = matchedKeywords.length;
return { item, score };
})
Expand All @@ -2784,8 +2780,8 @@ export class MapComponent {
let matchedCount = 0;

news.forEach((item) => {
const titleLower = item.title.toLowerCase();
const matches = spot.keywords.filter((kw) => titleLower.includes(kw.toLowerCase()));
const tokens = tokenizeForMatch(item.title);
const matches = spot.keywords.filter((kw) => matchKeyword(tokens, kw));

if (matches.length > 0) {
matchedCount++;
Expand Down
2 changes: 1 addition & 1 deletion src/config/geo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ export const INTEL_HOTSPOTS: Hotspot[] = [
lat: 38.9,
lon: -77.0,
location: 'Washington D.C., USA',
keywords: ['pentagon', 'white house', 'congress', 'cia', 'nsa', 'washington', 'biden', 'trump', 'house', 'senate', 'supreme court', 'vance', 'elon', 'us '],
keywords: ['pentagon', 'white house', 'congress', 'cia', 'nsa', 'washington', 'biden', 'trump', 'senate', 'supreme court', 'vance', 'elon'],
agencies: ['Pentagon', 'CIA', 'NSA', 'State Dept'],
description: 'US government and military headquarters. Intelligence community center.',
status: 'Monitoring',
Expand Down
13 changes: 7 additions & 6 deletions src/services/country-instability.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { SocialUnrestEvent, MilitaryFlight, MilitaryVessel, ClusteredEvent, InternetOutage } from '@/types';
import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
import { INTEL_HOTSPOTS, CONFLICT_ZONES, STRATEGIC_WATERWAYS } from '@/config/geo';
import { CURATED_COUNTRIES, DEFAULT_BASELINE_RISK, DEFAULT_EVENT_MULTIPLIER, getHotspotCountries } from '@/config/countries';
import { focalPointDetector } from './focal-point-detector';
Expand Down Expand Up @@ -136,11 +137,11 @@ export function getPreviousScores(): Map<string, number> {
export type { CountryData };

function normalizeCountryName(name: string): string | null {
const lower = name.toLowerCase();
const tokens = tokenizeForMatch(name);
for (const [code, cfg] of Object.entries(CURATED_COUNTRIES)) {
if (cfg.scoringKeywords.some(kw => lower.includes(kw))) return code;
if (cfg.scoringKeywords.some(kw => matchKeyword(tokens, kw))) return code;
}
return nameToCountryCode(lower);
return nameToCountryCode(name.toLowerCase());
}

export function ingestProtestsForCII(events: SocialUnrestEvent[]): void {
Expand Down Expand Up @@ -347,16 +348,16 @@ export function ingestMilitaryForCII(flights: MilitaryFlight[], vessels: Militar

export function ingestNewsForCII(events: ClusteredEvent[]): void {
for (const e of events) {
const title = e.primaryTitle.toLowerCase();
const tokens = tokenizeForMatch(e.primaryTitle);
const matched = new Set<string>();

for (const [code, cfg] of Object.entries(CURATED_COUNTRIES)) {
if (cfg.scoringKeywords.some(kw => title.includes(kw))) {
if (cfg.scoringKeywords.some(kw => matchKeyword(tokens, kw))) {
matched.add(code);
}
}

for (const code of matchCountryNamesInText(title)) {
for (const code of matchCountryNamesInText(e.primaryTitle.toLowerCase())) {
matched.add(code);
}

Expand Down
14 changes: 3 additions & 11 deletions src/services/geo-hub-index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
// Geopolitical Hub Index - aggregates news by strategic locations

export interface GeoHubLocation {
Expand Down Expand Up @@ -109,22 +110,13 @@ export interface GeoHubMatch {
export function inferGeoHubsFromTitle(title: string): GeoHubMatch[] {
const index = buildGeoHubIndex();
const matches: GeoHubMatch[] = [];
const titleLower = title.toLowerCase();
const tokens = tokenizeForMatch(title);
const seenHubs = new Set<string>();

for (const [keyword, hubIds] of index.byKeyword) {
if (keyword.length < 2) continue;

// Word boundary check for short keywords to avoid false positives
const regex = keyword.length < 5
? new RegExp(`\\b${keyword}\\b`, 'i')
: null;

const found = regex
? regex.test(titleLower)
: titleLower.includes(keyword);

if (found) {
if (matchKeyword(tokens, keyword)) {
for (const hubId of hubIds) {
if (seenHubs.has(hubId)) continue;
seenHubs.add(hubId);
Expand Down
13 changes: 5 additions & 8 deletions src/services/related-assets.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { ClusteredEvent, RelatedAsset, AssetType, RelatedAssetContext } from '@/types';
import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';
import { t } from '@/services/i18n';
import {
INTEL_HOTSPOTS,
Expand Down Expand Up @@ -27,24 +28,20 @@ interface AssetOrigin {
label: string;
}

function toTitleLower(titles: string[]): string[] {
return titles.map(title => title.toLowerCase());
}

function detectAssetTypes(titles: string[]): AssetType[] {
const normalized = toTitleLower(titles);
const tokenized = titles.map(t => tokenizeForMatch(t));
const types = Object.entries(ASSET_KEYWORDS)
.filter(([, keywords]) =>
normalized.some(title => keywords.some(keyword => title.includes(keyword)))
tokenized.some(tokens => keywords.some(keyword => matchKeyword(tokens, keyword)))
)
.map(([type]) => type as AssetType);
return types;
}

function countKeywordMatches(titles: string[], keywords: string[]): number {
const normalized = toTitleLower(titles);
const tokenized = titles.map(t => tokenizeForMatch(t));
return keywords.reduce((count, keyword) => {
return count + normalized.filter(title => title.includes(keyword)).length;
return count + tokenized.filter(tokens => matchKeyword(tokens, keyword)).length;
}, 0);
}

Expand Down
9 changes: 5 additions & 4 deletions src/services/story-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { calculateCII, type CountryScore } from './country-instability';
import type { ClusteredEvent } from '@/types';
import type { ThreatLevel } from './threat-classifier';
import { CURATED_COUNTRIES } from '@/config/countries';
import { tokenizeForMatch, matchKeyword } from '@/utils/keyword-match';

export interface StoryData {
countryCode: string;
Expand Down Expand Up @@ -65,8 +66,8 @@ export function collectStoryData(

const keywords = CURATED_COUNTRIES[countryCode]?.scoringKeywords || [countryName.toLowerCase()];
const countryNews = allNews.filter(e => {
const lower = e.primaryTitle.toLowerCase();
return keywords.some(kw => lower.includes(kw));
const tokens = tokenizeForMatch(e.primaryTitle);
return keywords.some(kw => matchKeyword(tokens, kw));
});

const sortedNews = [...countryNews].sort((a, b) => {
Expand All @@ -82,8 +83,8 @@ export function collectStoryData(
) || null;

const countryMarkets = predictionMarkets.filter(m => {
const lower = m.title.toLowerCase();
return keywords.some(kw => lower.includes(kw));
const mTokens = tokenizeForMatch(m.title);
return keywords.some(kw => matchKeyword(mTokens, kw));
});

const threatCounts = { critical: 0, high: 0, medium: 0, categories: new Set<string>() };
Expand Down
Loading