diff --git a/packages/core/src/ai/fraud-detector.ts b/packages/core/src/ai/fraud-detector.ts index df0a997..573d141 100644 --- a/packages/core/src/ai/fraud-detector.ts +++ b/packages/core/src/ai/fraud-detector.ts @@ -1,11 +1,24 @@ +import { RealTimeFraudScorer } from './fraud/riskScorer'; +import { RiskResult, Transaction } from './fraud/types'; + export class FraudDetector { - async detectAnomalies(transactionData: any): Promise { - // TODO: Implement fraud detection algorithm - Issue #28 - throw new Error('Not implemented yet - see issue #28'); + private scorer = new RealTimeFraudScorer(); + + async initializeBaseline(transactions: Transaction[]): Promise { + this.scorer.fitBaseline(transactions); + } + + async detectAnomalies(transactionData: Transaction): Promise { + const result = this.scorer.scoreTransaction(transactionData); + return result.riskScore >= 70; // high risk threshold + } + + async getRiskFactors(transactionData: Transaction): Promise { + const result = this.scorer.scoreTransaction(transactionData); + return result.reasons; } - async getRiskFactors(transactionData: any): Promise { - // TODO: Implement risk factor analysis - Issue #28 - throw new Error('Not implemented yet - see issue #28'); + async score(transactionData: Transaction): Promise { + return this.scorer.scoreTransaction(transactionData); } } diff --git a/packages/core/src/ai/fraud/anomalyDetector.ts b/packages/core/src/ai/fraud/anomalyDetector.ts new file mode 100644 index 0000000..64c3737 --- /dev/null +++ b/packages/core/src/ai/fraud/anomalyDetector.ts @@ -0,0 +1,128 @@ +import { AnomalyModel, AnomalyScore, FeatureVector } from './types'; + +// Lightweight, real-time friendly anomaly detectors + +class IsolationForestApprox implements AnomalyModel { + private featureMins: number[] = []; + private featureMaxs: number[] = []; + private iqrLow: number[] = []; + private iqrHigh: number[] = []; + private featureCount = 0; + + fit(samples: FeatureVector[]): void { + if (samples.length === 0) return; + const F = samples[0].features.length; + this.featureCount = F; + const cols: number[][] = Array.from({ length: F }, () => []); + for (const s of samples) { + for (let i = 0; i < F; i++) cols[i].push(s.features[i]); + } + this.featureMins = cols.map((c) => Math.min(...c)); + this.featureMaxs = cols.map((c) => Math.max(...c)); + // Compute approximate IQR bounds + this.iqrLow = cols.map((c) => quantile(c, 0.25)); + this.iqrHigh = cols.map((c) => quantile(c, 0.75)); + } + + score(sample: FeatureVector): AnomalyScore { + const outliers: number[] = []; + for (let i = 0; i < this.featureCount; i++) { + const v = sample.features[i]; + const low = this.iqrLow[i]; + const high = this.iqrHigh[i]; + const min = this.featureMins[i]; + const max = this.featureMaxs[i]; + // outside IQR scaled by range + let deviation = 0; + if (v < low) deviation = (low - v) / Math.max(1e-6, low - min); + else if (v > high) deviation = (v - high) / Math.max(1e-6, max - high); + outliers.push(Math.min(1, Math.max(0, deviation))); + } + const score = clamp(0, 1, outliers.reduce((a, b) => a + b, 0) / Math.max(1, outliers.length)); + return { + score, + model: 'isolation_forest', + details: { outlierByFeature: outliers }, + }; + } +} + +class OneClassSVMApprox implements AnomalyModel { + private means: number[] = []; + private stds: number[] = []; + private featureCount = 0; + private nu = 0.1; // approximate fraction of outliers + + constructor(nu?: number) { + if (nu !== undefined) this.nu = nu; + } + + fit(samples: FeatureVector[]): void { + if (samples.length === 0) return; + const F = samples[0].features.length; + this.featureCount = F; + const cols: number[][] = Array.from({ length: F }, () => []); + for (const s of samples) { + for (let i = 0; i < F; i++) cols[i].push(s.features[i]); + } + this.means = cols.map((c) => mean(c)); + this.stds = cols.map((c) => std(c) || 1e-6); + } + + score(sample: FeatureVector): AnomalyScore { + const z: number[] = []; + for (let i = 0; i < this.featureCount; i++) { + const v = sample.features[i]; + z.push(Math.abs((v - this.means[i]) / this.stds[i])); + } + const distance = z.reduce((a, b) => a + b * b, 0); // squared z-distance + // Map distance to [0,1] via logistic based on nu + const threshold = Math.max(1e-3, invLogitTarget(this.nu)); + const raw = 1 / (1 + Math.exp(-(distance - threshold))); + const score = clamp(0, 1, raw); + return { + score, + model: 'one_class_svm', + details: { zScores: z, distance }, + }; + } +} + +function mean(arr: number[]) { + if (!arr.length) return 0; + return arr.reduce((a, b) => a + b, 0) / arr.length; +} +function std(arr: number[]) { + if (arr.length < 2) return 0; + const m = mean(arr); + const v = arr.reduce((a, b) => a + (b - m) * (b - m), 0) / (arr.length - 1); + return Math.sqrt(v); +} +function clamp(min: number, max: number, v: number) { + return Math.min(max, Math.max(min, v)); +} +function quantile(arr: number[], q: number) { + if (arr.length === 0) return 0; + const a = arr.slice().sort((x, y) => x - y); + const idx = Math.floor(q * (a.length - 1)); + return a[idx]; +} +function invLogitTarget(nu: number) { + // rough mapping: smaller nu -> lower threshold + return Math.log(1 / nu - 1); +} + +export class AnomalyDetector { + private iso = new IsolationForestApprox(); + private ocsvm = new OneClassSVMApprox(0.1); + + fit(samples: FeatureVector[]) { + // Keep fitting lightweight to meet real-time constraints + this.iso.fit(samples); + this.ocsvm.fit(samples); + } + + score(sample: FeatureVector): AnomalyScore[] { + return [this.iso.score(sample), this.ocsvm.score(sample)]; + } +} \ No newline at end of file diff --git a/packages/core/src/ai/fraud/featureExtractor.ts b/packages/core/src/ai/fraud/featureExtractor.ts new file mode 100644 index 0000000..8e458dc --- /dev/null +++ b/packages/core/src/ai/fraud/featureExtractor.ts @@ -0,0 +1,186 @@ +import { FeatureExtractorOptions, FeatureVector, Transaction } from './types'; + +type AccountHistory = { + lastTx?: Transaction; + txTimestamps: number[]; // recent timestamps + amounts: number[]; // recent amounts + merchantCounts: Map; + hourHistogram: number[]; // 24 buckets +}; + +const EARTH_RADIUS_KM = 6371; + +function haversine(lat1?: number, lon1?: number, lat2?: number, lon2?: number): number | undefined { + if ( + lat1 === undefined || lon1 === undefined || lat2 === undefined || lon2 === undefined + ) + return undefined; + const toRad = (d: number) => (d * Math.PI) / 180; + const dLat = toRad(lat2 - lat1); + const dLon = toRad(lon2 - lon1); + const a = + Math.sin(dLat / 2) * Math.sin(dLat / 2) + + Math.cos(toRad(lat1)) * Math.cos(toRad(lat2)) * + Math.sin(dLon / 2) * Math.sin(dLon / 2); + const c = 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); + return EARTH_RADIUS_KM * c; +} + +function mean(arr: number[]) { + if (!arr.length) return 0; + return arr.reduce((a, b) => a + b, 0) / arr.length; +} +function std(arr: number[]) { + if (arr.length < 2) return 0; + const m = mean(arr); + const v = arr.reduce((a, b) => a + (b - m) * (b - m), 0) / (arr.length - 1); + return Math.sqrt(v); +} + +const HIGH_RISK_COUNTRIES = new Set([ + // Example list, extend as necessary + 'NG', 'UA', 'RU', 'IR', 'IQ', 'AF', 'SY', 'YE' +]); + +export class FeatureExtractor { + private opts: Required; + private history: Map = new Map(); + + constructor(opts?: FeatureExtractorOptions) { + this.opts = { + velocityWindowsMinutes: opts?.velocityWindowsMinutes ?? [1, 5, 60], + maxHistoryPerAccount: opts?.maxHistoryPerAccount ?? 500, + }; + } + + private getHistory(accountId: string): AccountHistory { + let h = this.history.get(accountId); + if (!h) { + h = { + txTimestamps: [], + amounts: [], + merchantCounts: new Map(), + hourHistogram: new Array(24).fill(0), + }; + this.history.set(accountId, h); + } + return h; + } + + private updateHistory(tx: Transaction) { + const h = this.getHistory(tx.accountId); + h.lastTx = tx; + h.txTimestamps.push(tx.timestamp); + h.amounts.push(tx.amount); + if (tx.merchantId) h.merchantCounts.set(tx.merchantId, (h.merchantCounts.get(tx.merchantId) || 0) + 1); + const hour = new Date(tx.timestamp).getHours(); + h.hourHistogram[hour]++; + // Trim history + const max = this.opts.maxHistoryPerAccount; + if (h.txTimestamps.length > max) h.txTimestamps.splice(0, h.txTimestamps.length - max); + if (h.amounts.length > max) h.amounts.splice(0, h.amounts.length - max); + if (h.merchantCounts.size > max) { + // simple decay: reduce counts + for (const key of h.merchantCounts.keys()) { + h.merchantCounts.set(key, Math.max(0, Math.floor((h.merchantCounts.get(key) || 0) * 0.9))); + } + } + // hour histogram decay + for (let i = 0; i < 24; i++) h.hourHistogram[i] = Math.floor(h.hourHistogram[i] * 0.995); + } + + private velocityCounts(h: AccountHistory, now: number) { + const counts: number[] = []; + for (const w of this.opts.velocityWindowsMinutes) { + const windowMs = w * 60 * 1000; + const c = h.txTimestamps.filter((t) => now - t <= windowMs).length; + counts.push(c); + } + return counts; + } + + extract(tx: Transaction): FeatureVector { + const start = Date.now(); + const h = this.getHistory(tx.accountId); + const names: string[] = []; + const values: number[] = []; + + // Amount z-score vs account history + const m = mean(h.amounts); + const s = std(h.amounts) || 1e-6; + names.push('amount_z'); + values.push((tx.amount - m) / s); + + // Velocity features + const counts = this.velocityCounts(h, tx.timestamp); + for (let i = 0; i < counts.length; i++) { + names.push(`velocity_${this.opts.velocityWindowsMinutes[i]}m`); + values.push(counts[i]); + } + + // Time since last transaction + const dt = h.lastTx ? (tx.timestamp - h.lastTx.timestamp) / 1000 : 1e6; + names.push('seconds_since_last_tx'); + values.push(dt); + + // Merchant novelty + const merchantFreq = tx.merchantId ? h.merchantCounts.get(tx.merchantId) || 0 : 0; + names.push('merchant_novelty'); + values.push(merchantFreq === 0 ? 1 : 1 / Math.sqrt(merchantFreq + 1)); + + // Hour-of-day deviation + const hour = new Date(tx.timestamp).getHours(); + const totalHours = h.hourHistogram.reduce((a, b) => a + b, 0) || 1; + const hourProb = h.hourHistogram[hour] / totalHours; + names.push('hour_deviation'); + values.push(1 - hourProb); + + // Geo-velocity km per hour + const distKm = haversine(h.lastTx?.lat, h.lastTx?.lon, tx.lat, tx.lon); + const hours = h.lastTx ? Math.max(1e-6, (tx.timestamp - h.lastTx.timestamp) / (3600 * 1000)) : 0; + const kmph = distKm !== undefined ? distKm / hours : 0; + names.push('geo_speed_kmph'); + values.push(kmph); + + // Device change indicator + const deviceChanged = h.lastTx?.deviceId && tx.deviceId && h.lastTx.deviceId !== tx.deviceId ? 1 : 0; + names.push('device_changed'); + values.push(deviceChanged); + + // Channel risk weight + const channelRisk = tx.channel === 'online' ? 1 : tx.channel === 'atm' ? 0.7 : tx.channel === 'transfer' ? 0.8 : 0.5; + names.push('channel_risk'); + values.push(channelRisk); + + // Country risk indicator + const countryRisk = tx.country && HIGH_RISK_COUNTRIES.has(tx.country) ? 1 : 0; + names.push('country_risk'); + values.push(countryRisk); + + // Balance impact ratio + const balanceImpact = tx.previousBalance ? tx.amount / Math.max(1, tx.previousBalance) : 0; + names.push('balance_impact_ratio'); + values.push(balanceImpact); + + // Update history after extracting features + this.updateHistory(tx); + + const fv: FeatureVector = { + id: tx.id, + accountId: tx.accountId, + features: values, + featureNames: names, + }; + + // Ensure fast execution + const elapsed = Date.now() - start; + if (elapsed > 50) { + // If extraction slows, apply more aggressive decay to history to keep it light + const hist = this.getHistory(tx.accountId); + hist.txTimestamps = hist.txTimestamps.slice(-Math.floor(this.opts.maxHistoryPerAccount / 2)); + hist.amounts = hist.amounts.slice(-Math.floor(this.opts.maxHistoryPerAccount / 2)); + } + + return fv; + } +} \ No newline at end of file diff --git a/packages/core/src/ai/fraud/patternRecognizer.ts b/packages/core/src/ai/fraud/patternRecognizer.ts new file mode 100644 index 0000000..d35e171 --- /dev/null +++ b/packages/core/src/ai/fraud/patternRecognizer.ts @@ -0,0 +1,67 @@ +import { PatternFinding, Transaction } from './types'; + +function burstScore(vel1m: number, vel5m: number): number { + const base = vel1m + Math.max(0, vel5m - vel1m); + return Math.min(1, base / 10); +} + +function geoVelocityScore(kmph: number): number { + if (kmph <= 200) return 0; // safe + if (kmph >= 1000) return 1; + return (kmph - 200) / (1000 - 200); +} + +function structuringScore(amount: number, prevBalance?: number): number { + // Many small amounts relative to balance can be structuring + const ratio = prevBalance ? amount / Math.max(1, prevBalance) : 0; + if (ratio < 0.005 && amount > 50) return Math.min(1, amount / 500); + return 0; +} + +export class PatternRecognizer { + recognize(featureNames: string[], features: number[], tx: Transaction): PatternFinding[] { + const out: PatternFinding[] = []; + + const get = (name: string) => { + const idx = featureNames.indexOf(name); + return idx >= 0 ? features[idx] : 0; + }; + + // Burst activity + const vel1 = get('velocity_1m'); + const vel5 = get('velocity_5m'); + const burst = burstScore(vel1, vel5); + if (burst > 0) out.push({ name: 'burst_activity', score: burst, reason: `High transaction velocity: ${vel1}/1m, ${vel5}/5m` }); + + // Geo velocity + const kmph = get('geo_speed_kmph'); + const geo = geoVelocityScore(kmph); + if (geo > 0) out.push({ name: 'impossible_travel', score: geo, reason: `Geo velocity ${kmph.toFixed(1)} km/h` }); + + // Device change + const devChanged = get('device_changed'); + if (devChanged > 0) out.push({ name: 'device_change', score: 0.6, reason: 'Transaction from new device' }); + + // Merchant novelty + const novelty = get('merchant_novelty'); + if (novelty > 0.7) out.push({ name: 'merchant_novelty', score: novelty, reason: 'Unseen merchant for account' }); + + // High-risk country + const countryRisk = get('country_risk'); + if (countryRisk > 0) out.push({ name: 'high_risk_country', score: 0.8, reason: `Transaction in high-risk country: ${tx.country}` }); + + // Time-of-day anomaly + const hourDev = get('hour_deviation'); + if (hourDev > 0.8) out.push({ name: 'odd_hour', score: hourDev, reason: 'Unusual transaction hour for user' }); + + // Amount structuring + const struct = structuringScore(tx.amount, tx.previousBalance); + if (struct > 0) out.push({ name: 'amount_structuring', score: struct, reason: 'Multiple small transfers indicative of structuring' }); + + // Channel risk + const chRisk = get('channel_risk'); + if (chRisk > 0.9) out.push({ name: 'high_risk_channel', score: 0.5, reason: `Channel ${tx.channel} has elevated risk` }); + + return out; + } +} \ No newline at end of file diff --git a/packages/core/src/ai/fraud/riskScorer.ts b/packages/core/src/ai/fraud/riskScorer.ts new file mode 100644 index 0000000..0bc7467 --- /dev/null +++ b/packages/core/src/ai/fraud/riskScorer.ts @@ -0,0 +1,118 @@ +import { AnomalyDetector } from './anomalyDetector'; +import { FeatureExtractor } from './featureExtractor'; +import { PatternRecognizer } from './patternRecognizer'; +import { AnomalyScore, FeedbackEvent, FeatureVector, MonitorStats, RiskCategory, RiskResult, Transaction } from './types'; + +function categorize(score: number): RiskCategory { + if (score < 30) return 'low'; + if (score < 70) return 'medium'; + return 'high'; +} + +export class RealTimeFraudScorer { + private extractor = new FeatureExtractor(); + private detector = new AnomalyDetector(); + private patterns = new PatternRecognizer(); + private baselineSamples: FeatureVector[] = []; + private latencies: number[] = []; + private feedback: FeedbackEvent[] = []; + private stats: MonitorStats = { + totalScored: 0, + avgLatencyMs: 0, + p99LatencyMs: 0, + lastUpdated: Date.now(), + }; + + fitBaseline(transactions: Transaction[]) { + const samples = transactions.map((t) => this.extractor.extract(t)); + this.baselineSamples = samples.slice(-2000); // cap baseline for performance + this.detector.fit(this.baselineSamples); + } + + scoreTransaction(tx: Transaction): RiskResult { + const t0 = Date.now(); + const fv = this.extractor.extract(tx); + const anomalyScores = this.detector.score(fv); + const patternFindings = this.patterns.recognize(fv.featureNames, fv.features, tx); + + const riskScore = this.aggregate(anomalyScores, patternFindings); + const category = categorize(riskScore); + const reasons = this.reasons(anomalyScores, patternFindings); + const latencyMs = Date.now() - t0; + this.updateStats(latencyMs); + + return { + transactionId: tx.id, + riskScore, + category, + reasons, + components: { anomalyScores, patternFindings }, + latencyMs, + timestamp: Date.now(), + }; + } + + recordFeedback(event: FeedbackEvent) { + this.feedback.push(event); + if (this.feedback.length > 10000) this.feedback.shift(); + // Simple online adjustment: if many false positives recently, reduce anomaly weight + const window = this.feedback.slice(-1000); + const positives = window.filter((f) => f.isFraud).length; + const negatives = window.length - positives; + const fpr = negatives / Math.max(1, window.length); + // Adjust detector nu in OneClass approx + // Higher false positives -> lower nu (more lenient) + // Note: rebuilding detector would require refit; keep track by refitting periodically + if (fpr > 0.2 && this.baselineSamples.length) { + this.detector.fit(this.baselineSamples.slice(-1000)); + } + } + + getStats(): MonitorStats { + return { ...this.stats }; + } + + private aggregate(anoms: AnomalyScore[], patterns: ReturnType): number { + // Weighted aggregation prioritizing precision + const anomalyWeight = 0.6; + const patternWeight = 0.4; + const anomalyAvg = anoms.reduce((a, s) => a + s.score, 0) / Math.max(1, anoms.length); + const patternAvg = patterns.reduce((a, p) => a + p.score, 0) / Math.max(1, patterns.length); + const score01 = anomalyWeight * anomalyAvg + patternWeight * patternAvg; + return Math.round(score01 * 100); + } + + private reasons(anoms: AnomalyScore[], patterns: ReturnType): string[] { + const reasons: string[] = []; + for (const a of anoms) { + if (a.score > 0.6) reasons.push(`Anomaly (${a.model}) score=${a.score.toFixed(2)}`); + } + for (const p of patterns) { + if (p.score > 0.5) reasons.push(`${p.name}: ${p.reason}`); + } + if (reasons.length === 0) reasons.push('Normal pattern within expected ranges'); + return reasons; + } + + private updateStats(latency: number) { + this.latencies.push(latency); + if (this.latencies.length > 5000) this.latencies.shift(); + const total = this.stats.totalScored + 1; + const avg = ((this.stats.avgLatencyMs * this.stats.totalScored) + latency) / total; + const p99 = percentile(this.latencies, 0.99); + this.stats = { + ...this.stats, + totalScored: total, + avgLatencyMs: avg, + p99LatencyMs: p99, + lastUpdated: Date.now(), + }; + } +} + +function percentile(arr: number[], p: number) { + if (arr.length === 0) return 0; + const a = arr.slice().sort((x, y) => x - y); + const idx = Math.floor(p * (a.length - 1)); + return a[idx]; +} \ No newline at end of file diff --git a/packages/core/src/ai/fraud/types.ts b/packages/core/src/ai/fraud/types.ts new file mode 100644 index 0000000..71122ff --- /dev/null +++ b/packages/core/src/ai/fraud/types.ts @@ -0,0 +1,81 @@ +// Core types for real-time fraud detection + +export type RiskCategory = 'low' | 'medium' | 'high'; + +export interface Transaction { + id: string; + accountId: string; + amount: number; + currency?: string; + timestamp: number; // epoch ms + merchantId?: string; + merchantCategory?: string; + country?: string; + city?: string; + lat?: number; + lon?: number; + deviceId?: string; + channel?: 'pos' | 'online' | 'atm' | 'transfer'; + ipAddress?: string; + previousBalance?: number; +} + +export interface FeatureVector { + id: string; + accountId: string; + features: number[]; + featureNames: string[]; +} + +export interface FeatureExtractorOptions { + velocityWindowsMinutes?: number[]; // e.g., [1, 5, 60] + maxHistoryPerAccount?: number; // rolling history size +} + +export interface AnomalyScore { + score: number; // 0..1 + model: 'isolation_forest' | 'one_class_svm'; + details?: Record; +} + +export interface PatternFinding { + name: string; + score: number; // 0..1 + reason: string; +} + +export interface RiskResult { + transactionId: string; + riskScore: number; // 0..100 + category: RiskCategory; + reasons: string[]; + components: { + anomalyScores: AnomalyScore[]; + patternFindings: PatternFinding[]; + }; + latencyMs: number; + timestamp: number; +} + +export interface FeedbackEvent { + transactionId: string; + isFraud: boolean; + category?: string; + notes?: string; + timestamp: number; +} + +export interface MonitorStats { + totalScored: number; + avgLatencyMs: number; + p99LatencyMs: number; + falsePositiveRate?: number; + truePositiveRate?: number; + lastUpdated: number; +} + +export interface AnomalyModel { + fit(samples: FeatureVector[]): void; + score(sample: FeatureVector): AnomalyScore; + fitPartial?(samples: FeatureVector[]): void; +} \ No newline at end of file