From 7279ef400edf5b97d4fe921354cab3527c2d6fec Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 21:38:52 +0000 Subject: [PATCH 1/5] Initial plan From c98fcba500e3b602471ba8fc6537e01541ab91d3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 22:05:08 +0000 Subject: [PATCH 2/5] Add 11 neural network simulation modules for multi-agent AI/ML platform - neural_networks/transformers/bert_model.py: BERTModel with encode, classify, batch_encode, fill_mask - neural_networks/transformers/gpt_model.py: GPTModel with generate, complete, batch_generate (n-gram + causal attention) - neural_networks/transformers/t5_model.py: T5Model with translate, summarize, answer_question, classify, paraphrase - neural_networks/gnn/infrastructure_gnn.py: InfrastructureGNN with message-passing, bottleneck detection, failure risk - neural_networks/gnn/dependency_analyzer.py: DependencyAnalyzer with BFS/DFS impact analysis, cycle detection, refactoring suggestions - neural_networks/rnn/lstm_predictor.py: LSTMPredictor with simulated LSTM cells, fit, predict, rolling_forecast, anomaly_score - neural_networks/rnn/anomaly_detector.py: AnomalyDetector with windowed stats + isolation forest ensemble - neural_networks/cnn/log_analyzer.py: LogAnalyzer with feature extraction, 1D-conv simulation, clustering - neural_networks/cnn/metric_classifier.py: MetricClassifier with time-series features, regime change detection - neural_networks/rl/policy_optimizer.py: PolicyOptimizer with tabular Q-learning, ReplayBuffer, save/load - neural_networks/rl/resource_allocator.py: ResourceAllocator with Q-learning, epsilon-greedy, efficiency metrics All implementations use Python standard library only (math, statistics, logging, dataclasses, typing, abc, collections, json). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- neural_networks/cnn/log_analyzer.py | 164 +++++++++++++++++++++ neural_networks/cnn/metric_classifier.py | 162 ++++++++++++++++++++ neural_networks/gnn/dependency_analyzer.py | 144 ++++++++++++++++++ neural_networks/gnn/infrastructure_gnn.py | 140 ++++++++++++++++++ neural_networks/rl/policy_optimizer.py | 151 +++++++++++++++++++ neural_networks/rl/resource_allocator.py | 136 +++++++++++++++++ neural_networks/rnn/anomaly_detector.py | 163 ++++++++++++++++++++ neural_networks/rnn/lstm_predictor.py | 140 ++++++++++++++++++ neural_networks/transformers/bert_model.py | 142 ++++++++++++++++++ neural_networks/transformers/gpt_model.py | 135 +++++++++++++++++ neural_networks/transformers/t5_model.py | 156 ++++++++++++++++++++ 11 files changed, 1633 insertions(+) create mode 100644 neural_networks/cnn/log_analyzer.py create mode 100644 neural_networks/cnn/metric_classifier.py create mode 100644 neural_networks/gnn/dependency_analyzer.py create mode 100644 neural_networks/gnn/infrastructure_gnn.py create mode 100644 neural_networks/rl/policy_optimizer.py create mode 100644 neural_networks/rl/resource_allocator.py create mode 100644 neural_networks/rnn/anomaly_detector.py create mode 100644 neural_networks/rnn/lstm_predictor.py create mode 100644 neural_networks/transformers/bert_model.py create mode 100644 neural_networks/transformers/gpt_model.py create mode 100644 neural_networks/transformers/t5_model.py diff --git a/neural_networks/cnn/log_analyzer.py b/neural_networks/cnn/log_analyzer.py new file mode 100644 index 0000000..7c8253d --- /dev/null +++ b/neural_networks/cnn/log_analyzer.py @@ -0,0 +1,164 @@ +"""Log pattern recognition using CNN-simulated sliding-window feature extraction.""" +from __future__ import annotations +import re +import math +import logging +import statistics +from dataclasses import dataclass, field +from typing import List, Dict, Tuple, Optional +from collections import defaultdict + +logger = logging.getLogger(__name__) + +_SEVERITY_MAP = {"debug": 0.1, "info": 0.3, "warn": 0.5, "warning": 0.5, + "error": 0.8, "critical": 1.0, "fatal": 1.0} + +_KEYWORDS = ["timeout", "exception", "fail", "error", "retry", "connect", + "disconnect", "success", "start", "stop", "deploy", "crash"] + + +@dataclass +class LogPattern: + pattern_id: str + regex: str + label: str + severity: float = 0.3 + examples: List[str] = field(default_factory=list) + + +@dataclass +class LogSample: + text: str + label: str + features: List[float] = field(default_factory=list) + + +class LogFeatureExtractor: + def extract(self, log_line: str) -> List[float]: + lower = log_line.lower() + severity = 0.3 + for kw, val in _SEVERITY_MAP.items(): + if kw in lower: + severity = val + break + length_feat = min(1.0, len(log_line) / 500.0) + keyword_counts = [1.0 if kw in lower else 0.0 for kw in _KEYWORDS] + digit_ratio = sum(1 for c in log_line if c.isdigit()) / (len(log_line) + 1e-9) + upper_ratio = sum(1 for c in log_line if c.isupper()) / (len(log_line) + 1e-9) + has_ip = 1.0 if re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', log_line) else 0.0 + has_ts = 1.0 if re.search(r'\d{4}-\d{2}-\d{2}', log_line) else 0.0 + word_count = min(1.0, len(log_line.split()) / 50.0) + return [severity, length_feat, digit_ratio, upper_ratio, + has_ip, has_ts, word_count] + keyword_counts + + +class LogCNNClassifier: + """1-D convolution simulated with sliding-window feature averaging.""" + + def __init__(self, kernel_size: int = 3, num_filters: int = 8) -> None: + self.kernel_size = kernel_size + self.num_filters = num_filters + self._class_centroids: Dict[str, List[float]] = {} + self._labels: List[str] = [] + + def _convolve(self, features: List[float]) -> List[float]: + if len(features) < self.kernel_size: + return features + pooled = [] + for i in range(len(features) - self.kernel_size + 1): + window = features[i: i + self.kernel_size] + pooled.append(sum(window) / self.kernel_size) + return pooled + + def fit(self, samples: List[LogSample]) -> None: + logger.info("Training LogCNNClassifier on %d samples", len(samples)) + class_feats: Dict[str, List[List[float]]] = defaultdict(list) + for s in samples: + conv = self._convolve(s.features) + class_feats[s.label].append(conv) + self._labels = list(class_feats.keys()) + for label, feat_list in class_feats.items(): + min_len = min(len(f) for f in feat_list) + centroid = [statistics.mean(f[d] for f in feat_list if d < len(f)) + for d in range(min_len)] + self._class_centroids[label] = centroid + logger.info("Trained on labels: %s", self._labels) + + def classify(self, features: List[float]) -> Tuple[str, float]: + conv = self._convolve(features) + if not self._class_centroids: + return "unknown", 0.0 + best_label, best_score = "unknown", -float("inf") + for label, centroid in self._class_centroids.items(): + min_len = min(len(conv), len(centroid)) + score = sum(conv[d] * centroid[d] for d in range(min_len)) + if score > best_score: + best_score, best_label = score, label + norm = (best_score + 10) / 20.0 + return best_label, round(max(0.0, min(1.0, norm)), 4) + + +class LogAnalyzer: + def __init__(self) -> None: + self.extractor = LogFeatureExtractor() + self.classifier = LogCNNClassifier() + self._patterns: List[LogPattern] = [] + self._trained = False + logger.info("LogAnalyzer initialised") + + def train(self, log_examples: List[Tuple[str, str]]) -> None: + logger.info("Training on %d log examples", len(log_examples)) + samples = [] + for text, label in log_examples: + feats = self.extractor.extract(text) + samples.append(LogSample(text=text, label=label, features=feats)) + self.classifier.fit(samples) + self._trained = True + + def classify(self, log_line: str) -> Tuple[str, float]: + if not self._trained: + feats = self.extractor.extract(log_line) + sev = feats[0] + if sev >= 0.8: + return "error", sev + elif sev >= 0.5: + return "warning", sev + return "info", 1.0 - sev + feats = self.extractor.extract(log_line) + label, conf = self.classifier.classify(feats) + logger.debug("Classified log line as '%s' (conf=%.4f)", label, conf) + return label, conf + + def batch_classify(self, log_lines: List[str]) -> List[Tuple[str, float]]: + logger.info("Batch classifying %d log lines", len(log_lines)) + return [self.classify(line) for line in log_lines] + + def extract_patterns(self, log_lines: List[str]) -> List[LogPattern]: + logger.info("Extracting patterns from %d lines", len(log_lines)) + pattern_map: Dict[str, List[str]] = defaultdict(list) + for line in log_lines: + template = re.sub(r'\d+', '', line) + template = re.sub(r'\b[0-9a-f-]{8,}\b', '', template) + pattern_map[template[:80]].append(line) + patterns = [] + for i, (tmpl, examples) in enumerate(pattern_map.items()): + feats = self.extractor.extract(examples[0]) + patterns.append(LogPattern( + pattern_id=f"P{i:04d}", + regex=re.escape(tmpl).replace(r'\', r'\d+'), + label=self.classify(examples[0])[0], + severity=feats[0], + examples=examples[:3], + )) + self._patterns = patterns + return patterns + + def cluster_similar_logs(self, log_lines: List[str], + n_clusters: int = 5) -> Dict[int, List[str]]: + logger.info("Clustering %d logs into %d clusters", len(log_lines), n_clusters) + feats = [self.extractor.extract(line) for line in log_lines] + clusters: Dict[int, List[str]] = defaultdict(list) + for line, feat in zip(log_lines, feats): + key = int(sum(feat) * 10) % n_clusters + clusters[key].append(line) + return dict(clusters) diff --git a/neural_networks/cnn/metric_classifier.py b/neural_networks/cnn/metric_classifier.py new file mode 100644 index 0000000..d9f5fa1 --- /dev/null +++ b/neural_networks/cnn/metric_classifier.py @@ -0,0 +1,162 @@ +"""Metric time-series classification using CNN-simulated feature-based approach.""" +from __future__ import annotations +import math +import logging +import statistics +from dataclasses import dataclass, field +from typing import List, Dict, Tuple, Optional +from collections import defaultdict + +logger = logging.getLogger(__name__) + + +@dataclass +class MetricSample: + name: str + values: List[float] + label: str = "unknown" + + +@dataclass +class TimeSeriesFeatures: + mean: float + std: float + trend: float + min_val: float + max_val: float + range_val: float + autocorr: float + seasonality: float + + def as_list(self) -> List[float]: + return [self.mean, self.std, self.trend, self.min_val, + self.max_val, self.range_val, self.autocorr, self.seasonality] + + +def extract_features(values: List[float]) -> TimeSeriesFeatures: + if not values: + return TimeSeriesFeatures(0, 0, 0, 0, 0, 0, 0, 0) + n = len(values) + mean = statistics.mean(values) + std = statistics.stdev(values) if n > 1 else 0.0 + min_val, max_val = min(values), max(values) + range_val = max_val - min_val + xs = list(range(n)) + x_mean = statistics.mean(xs) + num = sum((x - x_mean) * (y - mean) for x, y in zip(xs, values)) + denom = sum((x - x_mean) ** 2 for x in xs) + 1e-9 + trend = num / denom + if n > 1: + lag1 = values[1:] + lag0 = values[:-1] + ac_num = sum((a - mean) * (b - mean) for a, b in zip(lag0, lag1)) + ac_denom = sum((v - mean) ** 2 for v in values) + 1e-9 + autocorr = ac_num / ac_denom + else: + autocorr = 0.0 + period = max(2, n // 4) + seasonality = 0.0 + if n >= period * 2: + seasonal_pairs = [(values[i], values[i + period]) for i in range(n - period)] + diffs = [abs(a - b) for a, b in seasonal_pairs] + seasonality = 1.0 / (statistics.mean(diffs) + 1e-9) if diffs else 0.0 + seasonality = min(1.0, seasonality / (range_val + 1e-9)) + return TimeSeriesFeatures(mean=mean, std=std, trend=trend, + min_val=min_val, max_val=max_val, + range_val=range_val, autocorr=autocorr, + seasonality=seasonality) + + +class CNNClassifier: + """Feature-based classifier with 1D conv simulation.""" + + def __init__(self, kernel_size: int = 3) -> None: + self.kernel_size = kernel_size + self._centroids: Dict[str, List[float]] = {} + + @staticmethod + def _cosine_sim(a: List[float], b: List[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + na = math.sqrt(sum(x * x for x in a)) + 1e-9 + nb = math.sqrt(sum(x * x for x in b)) + 1e-9 + return dot / (na * nb) + + def _sliding_pool(self, values: List[float]) -> List[float]: + if len(values) < self.kernel_size: + return values + return [sum(values[i: i + self.kernel_size]) / self.kernel_size + for i in range(len(values) - self.kernel_size + 1)] + + def fit(self, samples: List[MetricSample]) -> None: + logger.info("CNNClassifier fitting on %d samples", len(samples)) + by_label: Dict[str, List[List[float]]] = defaultdict(list) + for s in samples: + pooled = self._sliding_pool(s.values) + feats = extract_features(pooled).as_list() + by_label[s.label].append(feats) + for label, feat_list in by_label.items(): + min_len = min(len(f) for f in feat_list) + centroid = [statistics.mean(f[d] for f in feat_list) + for d in range(min_len)] + self._centroids[label] = centroid + logger.info("CNNClassifier trained on labels: %s", list(self._centroids.keys())) + + def predict(self, values: List[float]) -> Tuple[str, float]: + if not self._centroids: + return "unknown", 0.0 + pooled = self._sliding_pool(values) + feats = extract_features(pooled).as_list() + best_label, best_sim = "unknown", -2.0 + for label, centroid in self._centroids.items(): + sim = self._cosine_sim(feats, centroid) + if sim > best_sim: + best_sim, best_label = sim, label + conf = (best_sim + 1) / 2.0 + return best_label, round(conf, 4) + + +class MetricClassifier: + def __init__(self) -> None: + self.cnn = CNNClassifier() + self._trained = False + logger.info("MetricClassifier initialised") + + def fit(self, samples: List[MetricSample]) -> None: + logger.info("MetricClassifier fitting on %d samples", len(samples)) + self.cnn.fit(samples) + self._trained = True + + def classify(self, sample: MetricSample) -> Tuple[str, float]: + label, conf = self.cnn.predict(sample.values) + logger.debug("MetricClassifier: '%s' -> '%s' (%.4f)", sample.name, label, conf) + return label, conf + + def detect_regime_change(self, values: List[float], + window: int = 20) -> List[int]: + logger.info("Detecting regime changes in series of length %d", len(values)) + change_points = [] + for i in range(window, len(values) - window): + before = values[i - window: i] + after = values[i: i + window] + mean_diff = abs(statistics.mean(after) - statistics.mean(before)) + std_threshold = (statistics.stdev(before) if len(before) > 1 else 1.0) + if mean_diff > 2 * std_threshold: + change_points.append(i) + logger.info("Found %d regime changes", len(change_points)) + return change_points + + def compare_metrics(self, a: MetricSample, + b: MetricSample) -> Dict[str, float]: + fa = extract_features(a.values) + fb = extract_features(b.values) + la, lb = fa.as_list(), fb.as_list() + sim = CNNClassifier._cosine_sim(la, lb) + mean_diff = abs(fa.mean - fb.mean) + std_diff = abs(fa.std - fb.std) + trend_diff = abs(fa.trend - fb.trend) + return { + "similarity": round(sim, 4), + "mean_diff": round(mean_diff, 4), + "std_diff": round(std_diff, 4), + "trend_diff": round(trend_diff, 4), + } diff --git a/neural_networks/gnn/dependency_analyzer.py b/neural_networks/gnn/dependency_analyzer.py new file mode 100644 index 0000000..9142d4e --- /dev/null +++ b/neural_networks/gnn/dependency_analyzer.py @@ -0,0 +1,144 @@ +"""Service dependency analysis using GNN-inspired graph algorithms.""" +from __future__ import annotations +import logging +from dataclasses import dataclass, field +from typing import List, Dict, Set, Optional, Tuple +from collections import deque + +logger = logging.getLogger(__name__) + + +@dataclass +class DependencyEdge: + src: str + dst: str + edge_type: str = "http" # http, grpc, db, queue + weight: float = 1.0 + call_rate: float = 0.0 # calls per second + + +@dataclass +class DependencyGraph: + services: Dict[str, Dict] = field(default_factory=dict) + edges: List[DependencyEdge] = field(default_factory=list) + adj_out: Dict[str, List[str]] = field(default_factory=dict) + adj_in: Dict[str, List[str]] = field(default_factory=dict) + + +class DependencyAnalyzer: + def __init__(self) -> None: + self.graph = DependencyGraph() + logger.info("DependencyAnalyzer initialised") + + def add_service(self, name: str, service_type: str = "microservice", + criticality: float = 0.5, replicas: int = 1) -> None: + self.graph.services[name] = { + "type": service_type, + "criticality": criticality, + "replicas": replicas, + } + self.graph.adj_out.setdefault(name, []) + self.graph.adj_in.setdefault(name, []) + logger.debug("Added service '%s'", name) + + def add_dependency(self, src: str, dst: str, edge_type: str = "http", + weight: float = 1.0, call_rate: float = 0.0) -> None: + for svc in (src, dst): + if svc not in self.graph.services: + self.add_service(svc) + edge = DependencyEdge(src=src, dst=dst, edge_type=edge_type, + weight=weight, call_rate=call_rate) + self.graph.edges.append(edge) + self.graph.adj_out[src].append(dst) + self.graph.adj_in[dst].append(src) + logger.debug("Added dependency %s -> %s (%s)", src, dst, edge_type) + + def analyze_impact(self, service: str) -> Dict[str, List[str]]: + """BFS downstream and upstream impact of a service failure.""" + logger.info("Analyzing impact of '%s'", service) + if service not in self.graph.services: + return {"downstream": [], "upstream": []} + + def bfs(adj: Dict[str, List[str]], start: str) -> List[str]: + visited: Set[str] = set() + queue = deque([start]) + result = [] + while queue: + node = queue.popleft() + for nb in adj.get(node, []): + if nb not in visited: + visited.add(nb) + result.append(nb) + queue.append(nb) + return result + + return { + "downstream": bfs(self.graph.adj_out, service), + "upstream": bfs(self.graph.adj_in, service), + } + + def find_circular_deps(self) -> List[List[str]]: + """DFS-based cycle detection (Tarjan-inspired).""" + logger.info("Finding circular dependencies") + visited: Set[str] = set() + stack: List[str] = [] + in_stack: Set[str] = set() + cycles: List[List[str]] = [] + + def dfs(node: str) -> None: + visited.add(node) + stack.append(node) + in_stack.add(node) + for nb in self.graph.adj_out.get(node, []): + if nb not in visited: + dfs(nb) + elif nb in in_stack: + cycle_start = stack.index(nb) + cycle = stack[cycle_start:] + [nb] + if cycle not in cycles: + cycles.append(cycle) + logger.warning("Cycle detected: %s", " -> ".join(cycle)) + stack.pop() + in_stack.discard(node) + + for svc in list(self.graph.services.keys()): + if svc not in visited: + dfs(svc) + return cycles + + def compute_criticality_scores(self) -> Dict[str, float]: + logger.info("Computing criticality scores") + scores: Dict[str, float] = {} + n = max(len(self.graph.services), 1) + for svc in self.graph.services: + in_degree = len(self.graph.adj_in.get(svc, [])) + out_degree = len(self.graph.adj_out.get(svc, [])) + impact = len(self.analyze_impact(svc)["downstream"]) + base = self.graph.services[svc].get("criticality", 0.5) + score = base * 0.4 + (in_degree / n) * 0.2 + (impact / n) * 0.4 + scores[svc] = round(min(1.0, score), 4) + return dict(sorted(scores.items(), key=lambda x: -x[1])) + + def suggest_refactoring(self) -> List[str]: + logger.info("Generating refactoring suggestions") + suggestions: List[str] = [] + cycles = self.find_circular_deps() + for cycle in cycles: + suggestions.append( + f"Break circular dependency: {' -> '.join(cycle)}. " + "Consider introducing an event/message queue.") + scores = self.compute_criticality_scores() + for svc, score in scores.items(): + in_deg = len(self.graph.adj_in.get(svc, [])) + out_deg = len(self.graph.adj_out.get(svc, [])) + if score > 0.7: + suggestions.append( + f"'{svc}' is highly critical (score={score}). " + "Consider adding redundancy or circuit breakers.") + if in_deg + out_deg > 6: + suggestions.append( + f"'{svc}' has high coupling (in={in_deg}, out={out_deg}). " + "Consider splitting into smaller services.") + if not suggestions: + suggestions.append("No major refactoring needed. Dependency graph looks healthy.") + return suggestions diff --git a/neural_networks/gnn/infrastructure_gnn.py b/neural_networks/gnn/infrastructure_gnn.py new file mode 100644 index 0000000..d5ff78a --- /dev/null +++ b/neural_networks/gnn/infrastructure_gnn.py @@ -0,0 +1,140 @@ +"""Graph Neural Network for infrastructure topology analysis.""" +from __future__ import annotations +import math +import logging +from dataclasses import dataclass, field +from typing import List, Dict, Set, Optional, Tuple +from collections import deque + +logger = logging.getLogger(__name__) + + +@dataclass +class InfraNode: + node_id: str + node_type: str # server, pod, service, database, gateway + features: List[float] = field(default_factory=lambda: [0.0] * 8) + cpu_util: float = 0.0 + memory_util: float = 0.0 + error_rate: float = 0.0 + latency_ms: float = 0.0 + + +@dataclass +class InfraEdge: + src: str + dst: str + weight: float = 1.0 + bandwidth_mbps: float = 1000.0 + latency_ms: float = 1.0 + + +class GNNLayer: + """Single message-passing layer: averages neighbour features.""" + + def __init__(self, feature_dim: int = 8) -> None: + self.feature_dim = feature_dim + + def forward(self, nodes: Dict[str, InfraNode], + adj: Dict[str, List[str]]) -> Dict[str, List[float]]: + updated: Dict[str, List[float]] = {} + for nid, node in nodes.items(): + neighbours = adj.get(nid, []) + if neighbours: + agg = [0.0] * self.feature_dim + for nb_id in neighbours: + nb = nodes[nb_id] + for i, f in enumerate(nb.features[:self.feature_dim]): + agg[i] += f + agg = [a / len(neighbours) for a in agg] + combined = [math.tanh(node.features[i] + agg[i]) + for i in range(self.feature_dim)] + else: + combined = [math.tanh(f) for f in node.features[:self.feature_dim]] + updated[nid] = combined + return updated + + +class InfrastructureGNN: + def __init__(self, feature_dim: int = 8, num_layers: int = 2) -> None: + self.feature_dim = feature_dim + self.nodes: Dict[str, InfraNode] = {} + self.edges: List[InfraEdge] = [] + self.adj: Dict[str, List[str]] = {} + self.layers = [GNNLayer(feature_dim) for _ in range(num_layers)] + logger.info("InfrastructureGNN initialised with %d layers", num_layers) + + def add_node(self, node: InfraNode) -> None: + if len(node.features) < self.feature_dim: + node.features += [0.0] * (self.feature_dim - len(node.features)) + node.features[0] = node.cpu_util + node.features[1] = node.memory_util + node.features[2] = node.error_rate + node.features[3] = node.latency_ms / 1000.0 + self.nodes[node.node_id] = node + self.adj.setdefault(node.node_id, []) + logger.debug("Added node '%s' (type=%s)", node.node_id, node.node_type) + + def add_edge(self, edge: InfraEdge) -> None: + self.edges.append(edge) + self.adj.setdefault(edge.src, []).append(edge.dst) + self.adj.setdefault(edge.dst, []).append(edge.src) + logger.debug("Added edge %s -> %s", edge.src, edge.dst) + + def forward_pass(self) -> Dict[str, List[float]]: + logger.info("Running GNN forward pass over %d nodes", len(self.nodes)) + node_features = {nid: list(n.features[:self.feature_dim]) + for nid, n in self.nodes.items()} + for layer in self.layers: + node_features = layer.forward(self.nodes, self.adj) + for nid, feats in node_features.items(): + self.nodes[nid].features = feats + return node_features + + def detect_bottlenecks(self) -> List[Tuple[str, float]]: + logger.info("Detecting bottlenecks") + self.forward_pass() + scores = [] + for nid, node in self.nodes.items(): + degree = len(self.adj.get(nid, [])) + score = (node.cpu_util * 0.4 + node.memory_util * 0.3 + + node.error_rate * 0.2 + (degree / (len(self.nodes) + 1e-9)) * 0.1) + scores.append((nid, round(score, 4))) + scores.sort(key=lambda x: -x[1]) + logger.info("Top bottleneck: %s", scores[0] if scores else "none") + return scores + + def find_critical_paths(self, src: str, dst: str) -> List[List[str]]: + logger.info("Finding critical paths from '%s' to '%s'", src, dst) + if src not in self.nodes or dst not in self.nodes: + return [] + paths: List[List[str]] = [] + queue: deque = deque([[src]]) + visited_paths: Set[str] = set() + while queue: + path = queue.popleft() + current = path[-1] + if current == dst: + paths.append(path) + continue + if len(path) > len(self.nodes): + continue + for neighbour in self.adj.get(current, []): + if neighbour not in path: + new_path = path + [neighbour] + key = "->".join(new_path) + if key not in visited_paths: + visited_paths.add(key) + queue.append(new_path) + paths.sort(key=len) + return paths[:3] + + def predict_failure_risk(self) -> Dict[str, float]: + logger.info("Predicting failure risk for all nodes") + self.forward_pass() + risks = {} + for nid, node in self.nodes.items(): + risk = min(1.0, node.cpu_util * 0.35 + node.memory_util * 0.35 + + node.error_rate * 0.2 + node.latency_ms / 5000.0 * 0.1) + risks[nid] = round(risk, 4) + return risks diff --git a/neural_networks/rl/policy_optimizer.py b/neural_networks/rl/policy_optimizer.py new file mode 100644 index 0000000..3020002 --- /dev/null +++ b/neural_networks/rl/policy_optimizer.py @@ -0,0 +1,151 @@ +"""Reinforcement learning policy optimisation using tabular Q-learning.""" +from __future__ import annotations +import math +import json +import random +import logging +from dataclasses import dataclass, field +from typing import List, Dict, Tuple, Optional, Any +from collections import deque + +logger = logging.getLogger(__name__) + + +@dataclass +class State: + features: Tuple + def __hash__(self) -> int: + return hash(self.features) + def __eq__(self, other: object) -> bool: + return isinstance(other, State) and self.features == other.features + + +@dataclass +class Action: + action_id: int + params: Dict[str, Any] = field(default_factory=dict) + def __hash__(self) -> int: + return hash(self.action_id) + def __eq__(self, other: object) -> bool: + return isinstance(other, Action) and self.action_id == other.action_id + + +@dataclass +class Transition: + state: State + action: Action + reward: float + next_state: State + done: bool = False + + +class ReplayBuffer: + def __init__(self, capacity: int = 10000) -> None: + self._buffer: deque = deque(maxlen=capacity) + + def push(self, transition: Transition) -> None: + self._buffer.append(transition) + + def sample(self, batch_size: int) -> List[Transition]: + return random.sample(list(self._buffer), min(batch_size, len(self._buffer))) + + def __len__(self) -> int: + return len(self._buffer) + + +class PolicyNetwork: + """Tabular Q-function stored as a nested dict.""" + + def __init__(self, n_actions: int, learning_rate: float = 0.1, + gamma: float = 0.99) -> None: + self.n_actions = n_actions + self.lr = learning_rate + self.gamma = gamma + self._q: Dict[Any, List[float]] = {} + + def _get_q(self, state: State) -> List[float]: + key = state.features + if key not in self._q: + self._q[key] = [0.0] * self.n_actions + return self._q[key] + + def update(self, transition: Transition) -> float: + q_vals = self._get_q(transition.state) + next_q = self._get_q(transition.next_state) + target = transition.reward + (0.0 if transition.done + else self.gamma * max(next_q)) + td_error = target - q_vals[transition.action.action_id] + q_vals[transition.action.action_id] += self.lr * td_error + return abs(td_error) + + def best_action(self, state: State) -> int: + return int(max(range(self.n_actions), key=lambda a: self._get_q(state)[a])) + + def serialize(self) -> Dict: + return {str(k): v for k, v in self._q.items()} + + def deserialize(self, data: Dict) -> None: + import ast + self._q = {ast.literal_eval(k): v for k, v in data.items()} + + +class PolicyOptimizer: + def __init__(self, n_actions: int = 4, learning_rate: float = 0.1, + gamma: float = 0.99, batch_size: int = 32) -> None: + self.n_actions = n_actions + self.batch_size = batch_size + self.policy = PolicyNetwork(n_actions, learning_rate, gamma) + self.buffer = ReplayBuffer() + self._step_count = 0 + self._total_reward = 0.0 + logger.info("PolicyOptimizer initialised (n_actions=%d)", n_actions) + + def observe(self, state: State, action: Action, reward: float, + next_state: State, done: bool = False) -> None: + self._total_reward += reward + self._step_count += 1 + self.buffer.push(Transition(state, action, reward, next_state, done)) + logger.debug("Step %d: reward=%.4f", self._step_count, reward) + + def optimize_step(self) -> Optional[float]: + if len(self.buffer) < self.batch_size: + return None + batch = self.buffer.sample(self.batch_size) + td_errors = [self.policy.update(t) for t in batch] + avg_td = sum(td_errors) / len(td_errors) + logger.debug("Optimize step: avg_td_error=%.4f", avg_td) + return avg_td + + def get_action(self, state: State, epsilon: float = 0.1) -> Action: + if random.random() < epsilon: + action_id = random.randrange(self.n_actions) + logger.debug("Exploring: random action %d", action_id) + else: + action_id = self.policy.best_action(state) + logger.debug("Exploiting: best action %d", action_id) + return Action(action_id=action_id) + + def evaluate_policy(self, eval_states: List[State]) -> Dict[str, float]: + logger.info("Evaluating policy on %d states", len(eval_states)) + q_means = [] + for s in eval_states: + q_vals = self.policy._get_q(s) + q_means.append(max(q_vals)) + return { + "mean_q": round(sum(q_means) / (len(q_means) + 1e-9), 4), + "max_q": round(max(q_means) if q_means else 0.0, 4), + "steps": self._step_count, + "total_reward": round(self._total_reward, 4), + } + + def save_policy(self, path: str) -> None: + data = {"q_table": self.policy.serialize(), "n_actions": self.n_actions} + with open(path, "w") as f: + json.dump(data, f) + logger.info("Policy saved to '%s'", path) + + def load_policy(self, path: str) -> None: + with open(path, "r") as f: + data = json.load(f) + self.policy.deserialize(data["q_table"]) + logger.info("Policy loaded from '%s'", path) diff --git a/neural_networks/rl/resource_allocator.py b/neural_networks/rl/resource_allocator.py new file mode 100644 index 0000000..d33e153 --- /dev/null +++ b/neural_networks/rl/resource_allocator.py @@ -0,0 +1,136 @@ +"""Resource allocation using Q-learning reinforcement learning.""" +from __future__ import annotations +import math +import random +import logging +import statistics +from dataclasses import dataclass, field +from typing import List, Dict, Tuple, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class ResourceState: + cpu_available: float # 0.0 – 1.0 + memory_available: float # 0.0 – 1.0 + queue_depth: int # pending jobs + active_tasks: int = 0 + + def discretize(self) -> Tuple[int, int, int]: + cpu_bin = min(9, int(self.cpu_available * 10)) + mem_bin = min(9, int(self.memory_available * 10)) + q_bin = min(9, min(self.queue_depth, 9)) + return (cpu_bin, mem_bin, q_bin) + + +@dataclass +class ResourceAction: + cpu_alloc: float # fraction to allocate (0.0 – 1.0) + memory_alloc: float # fraction to allocate (0.0 – 1.0) + action_id: int = 0 + + +_ACTION_SPACE: List[Tuple[float, float]] = [ + (0.1, 0.1), (0.1, 0.3), (0.3, 0.1), (0.3, 0.3), + (0.5, 0.3), (0.3, 0.5), (0.5, 0.5), (0.7, 0.5), + (0.5, 0.7), (0.7, 0.7), +] + + +class QTable: + def __init__(self, n_actions: int, lr: float, gamma: float) -> None: + self.n_actions = n_actions + self.lr = lr + self.gamma = gamma + self._q: Dict[Tuple, List[float]] = {} + + def _get(self, key: Tuple) -> List[float]: + if key not in self._q: + self._q[key] = [0.0] * self.n_actions + return self._q[key] + + def update(self, state_key: Tuple, action_id: int, reward: float, + next_key: Tuple, done: bool = False) -> None: + q = self._get(state_key) + next_q = self._get(next_key) + target = reward + (0.0 if done else self.gamma * max(next_q)) + q[action_id] += self.lr * (target - q[action_id]) + + def best_action(self, key: Tuple) -> int: + q = self._get(key) + return int(max(range(self.n_actions), key=lambda a: q[a])) + + def q_values(self, key: Tuple) -> List[float]: + return self._get(key) + + +class ResourceAllocator: + def __init__(self, learning_rate: float = 0.1, gamma: float = 0.95, + epsilon: float = 0.2, epsilon_decay: float = 0.995, + min_epsilon: float = 0.01) -> None: + self.n_actions = len(_ACTION_SPACE) + self.q_table = QTable(self.n_actions, learning_rate, gamma) + self.epsilon = epsilon + self.epsilon_decay = epsilon_decay + self.min_epsilon = min_epsilon + self._reward_history: List[float] = [] + self._step = 0 + logger.info("ResourceAllocator initialised (n_actions=%d, lr=%.3f)", + self.n_actions, learning_rate) + + def compute_reward(self, state: ResourceState, action: ResourceAction, + next_state: ResourceState) -> float: + utilisation = (1.0 - action.cpu_alloc) * 0.4 + (1.0 - action.memory_alloc) * 0.4 + queue_penalty = -0.05 * next_state.queue_depth + overalloc_penalty = 0.0 + if action.cpu_alloc > state.cpu_available: + overalloc_penalty -= 0.5 + if action.memory_alloc > state.memory_available: + overalloc_penalty -= 0.5 + throughput = min(1.0, (state.queue_depth - next_state.queue_depth) * 0.1) + reward = utilisation + queue_penalty + overalloc_penalty + throughput + logger.debug("Reward: %.4f (util=%.4f queue=%.4f overalloc=%.4f tp=%.4f)", + reward, utilisation, queue_penalty, overalloc_penalty, throughput) + return reward + + def observe_and_learn(self, state: ResourceState, action: ResourceAction, + reward: float, next_state: ResourceState, + done: bool = False) -> None: + s_key = state.discretize() + ns_key = next_state.discretize() + self.q_table.update(s_key, action.action_id, reward, ns_key, done) + self._reward_history.append(reward) + self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay) + self._step += 1 + logger.debug("Step %d: epsilon=%.4f", self._step, self.epsilon) + + def allocate(self, state: ResourceState) -> ResourceAction: + key = state.discretize() + if random.random() < self.epsilon: + action_id = random.randrange(self.n_actions) + logger.debug("Exploring: random action %d", action_id) + else: + action_id = self.q_table.best_action(key) + logger.debug("Exploiting: action %d", action_id) + cpu_alloc, mem_alloc = _ACTION_SPACE[action_id] + cpu_alloc = min(cpu_alloc, state.cpu_available) + mem_alloc = min(mem_alloc, state.memory_available) + return ResourceAction(cpu_alloc=cpu_alloc, memory_alloc=mem_alloc, + action_id=action_id) + + def get_efficiency_metrics(self) -> Dict[str, float]: + if not self._reward_history: + return {"avg_reward": 0.0, "steps": 0, "epsilon": self.epsilon} + window = self._reward_history[-100:] + avg = statistics.mean(window) + std = statistics.stdev(window) if len(window) > 1 else 0.0 + trend = (window[-1] - window[0]) / (len(window) + 1e-9) if len(window) > 1 else 0.0 + return { + "avg_reward": round(avg, 4), + "reward_std": round(std, 4), + "reward_trend": round(trend, 6), + "epsilon": round(self.epsilon, 4), + "steps": self._step, + "q_states": len(self.q_table._q), + } diff --git a/neural_networks/rnn/anomaly_detector.py b/neural_networks/rnn/anomaly_detector.py new file mode 100644 index 0000000..433192e --- /dev/null +++ b/neural_networks/rnn/anomaly_detector.py @@ -0,0 +1,163 @@ +"""Sequence anomaly detection using statistical and isolation-forest-like methods.""" +from __future__ import annotations +import math +import random +import logging +import statistics +from dataclasses import dataclass, field +from typing import List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class AnomalyConfig: + window_size: int = 20 + threshold_sigma: float = 3.0 + n_estimators: int = 50 + max_samples: int = 256 + contamination: float = 0.05 + random_seed: int = 42 + + +@dataclass +class AnomalyScore: + index: int + value: float + score: float + is_anomaly: bool + method: str + + +class WindowedDetector: + """Sliding-window statistics-based detector.""" + + def __init__(self, config: AnomalyConfig) -> None: + self.config = config + self._baseline_mean: float = 0.0 + self._baseline_std: float = 1.0 + + def fit(self, normal_data: List[float]) -> None: + logger.debug("WindowedDetector fitting on %d samples", len(normal_data)) + self._baseline_mean = statistics.mean(normal_data) + self._baseline_std = statistics.stdev(normal_data) if len(normal_data) > 1 else 1.0 + + def score(self, sequence: List[float]) -> List[float]: + scores = [] + for i, val in enumerate(sequence): + window = sequence[max(0, i - self.config.window_size): i + 1] + local_mean = statistics.mean(window) if window else self._baseline_mean + local_std = (statistics.stdev(window) if len(window) > 1 + else self._baseline_std) + z = abs(val - local_mean) / (local_std + 1e-9) + norm_z = abs(val - self._baseline_mean) / (self._baseline_std + 1e-9) + scores.append(max(z, norm_z) / (self.config.threshold_sigma + 1e-9)) + return scores + + +class IsolationTree: + """Single isolation tree using random feature splits.""" + + def __init__(self, max_depth: int, rng: random.Random) -> None: + self.max_depth = max_depth + self.rng = rng + self._splits: List[float] = [] + + def fit(self, data: List[float]) -> None: + self._splits = [] + lo, hi = min(data), max(data) + for _ in range(self.max_depth): + if lo >= hi: + break + split = self.rng.uniform(lo, hi) + self._splits.append(split) + mid = statistics.median(data) + if split < mid: + lo = split + else: + hi = split + + def path_length(self, value: float) -> int: + depth = 0 + for split in self._splits: + depth += 1 + if value <= split: + break + return depth + + +class IsolationForestDetector: + """Isolation forest anomaly scoring.""" + + def __init__(self, config: AnomalyConfig) -> None: + self.config = config + self.rng = random.Random(config.random_seed) + self.trees: List[IsolationTree] = [] + self._avg_path: float = 1.0 + + def _avg_path_length(self, n: int) -> float: + if n <= 1: + return 1.0 + return 2 * (math.log(n - 1) + 0.5772156649) - (2 * (n - 1) / n) + + def fit(self, normal_data: List[float]) -> None: + logger.debug("IsolationForest fitting with %d estimators", self.config.n_estimators) + sample_size = min(len(normal_data), self.config.max_samples) + max_depth = int(math.ceil(math.log2(sample_size + 1))) + self.trees = [] + for _ in range(self.config.n_estimators): + sample = self.rng.choices(normal_data, k=sample_size) + tree = IsolationTree(max_depth, self.rng) + tree.fit(sample) + self.trees.append(tree) + self._avg_path = self._avg_path_length(sample_size) + + def score(self, sequence: List[float]) -> List[float]: + scores = [] + for val in sequence: + avg_len = statistics.mean(t.path_length(val) for t in self.trees) if self.trees else 1.0 + anomaly_score = 2 ** (-avg_len / (self._avg_path + 1e-9)) + scores.append(anomaly_score) + return scores + + +class AnomalyDetector: + def __init__(self, config: Optional[AnomalyConfig] = None) -> None: + self.config = config or AnomalyConfig() + self.windowed = WindowedDetector(self.config) + self.isolation = IsolationForestDetector(self.config) + self._threshold: float = 0.6 + logger.info("AnomalyDetector initialised: %s", self.config) + + def fit(self, normal_data: List[float]) -> None: + logger.info("Fitting AnomalyDetector on %d samples", len(normal_data)) + self.windowed.fit(normal_data) + self.isolation.fit(normal_data) + iso_scores = self.isolation.score(normal_data) + win_scores = self.windowed.score(normal_data) + combined = [(i + w) / 2 for i, w in zip(iso_scores, win_scores)] + self._threshold = sorted(combined)[int(len(combined) * (1 - self.config.contamination))] + logger.info("Threshold set to %.4f", self._threshold) + + def detect(self, sequence: List[float]) -> List[AnomalyScore]: + logger.info("Detecting anomalies in sequence of length %d", len(sequence)) + iso_scores = self.isolation.score(sequence) + win_scores = self.windowed.score(sequence) + results = [] + for idx, (val, i_s, w_s) in enumerate(zip(sequence, iso_scores, win_scores)): + combined = (i_s + w_s) / 2 + results.append(AnomalyScore( + index=idx, + value=val, + score=round(combined, 4), + is_anomaly=combined > self._threshold, + method="ensemble", + )) + return results + + def batch_detect(self, sequences: List[List[float]]) -> List[List[AnomalyScore]]: + logger.info("Batch detecting over %d sequences", len(sequences)) + return [self.detect(seq) for seq in sequences] + + def get_threshold(self) -> float: + return self._threshold diff --git a/neural_networks/rnn/lstm_predictor.py b/neural_networks/rnn/lstm_predictor.py new file mode 100644 index 0000000..4779103 --- /dev/null +++ b/neural_networks/rnn/lstm_predictor.py @@ -0,0 +1,140 @@ +"""LSTM-based time-series forecasting (simulated with math/statistics).""" +from __future__ import annotations +import math +import logging +import statistics +from dataclasses import dataclass, field +from typing import List, Tuple, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class LSTMConfig: + hidden_size: int = 32 + num_layers: int = 2 + seq_len: int = 10 + learning_rate: float = 0.01 + epochs: int = 50 + + +class LSTMCell: + """Simulated LSTM cell using tanh/sigmoid approximations.""" + + def __init__(self, input_size: int, hidden_size: int) -> None: + self.input_size = input_size + self.hidden_size = hidden_size + import random + rng = random.Random(42) + def rand_weights(n: int) -> List[float]: + return [rng.gauss(0, 0.1) for _ in range(n)] + self.W_f = rand_weights(input_size + hidden_size) + self.W_i = rand_weights(input_size + hidden_size) + self.W_o = rand_weights(input_size + hidden_size) + self.W_c = rand_weights(input_size + hidden_size) + + @staticmethod + def _sigmoid(x: float) -> float: + return 1.0 / (1.0 + math.exp(-max(-500.0, min(500.0, x)))) + + @staticmethod + def _tanh(x: float) -> float: + return math.tanh(max(-500.0, min(500.0, x))) + + def _gate(self, weights: List[float], combined: List[float]) -> float: + s = sum(w * x for w, x in zip(weights, combined)) + return s + + def forward(self, x: float, h_prev: List[float], + c_prev: List[float]) -> Tuple[List[float], List[float]]: + combined = [x] + h_prev + f = self._sigmoid(self._gate(self.W_f[:len(combined)], combined)) + i = self._sigmoid(self._gate(self.W_i[:len(combined)], combined)) + o = self._sigmoid(self._gate(self.W_o[:len(combined)], combined)) + c_tilde = self._tanh(self._gate(self.W_c[:len(combined)], combined)) + c_new = [f * c_prev[k] + i * c_tilde for k in range(self.hidden_size)] + h_new = [o * self._tanh(c_new[k]) for k in range(self.hidden_size)] + return h_new, c_new + + +class LSTMPredictor: + def __init__(self, config: Optional[LSTMConfig] = None) -> None: + self.config = config or LSTMConfig() + self.cells = [LSTMCell(1, self.config.hidden_size) + for _ in range(self.config.num_layers)] + self._trained = False + self._history: List[float] = [] + self._mean: float = 0.0 + self._std: float = 1.0 + logger.info("LSTMPredictor initialised: %s", self.config) + + def _normalize(self, series: List[float]) -> List[float]: + self._mean = statistics.mean(series) + self._std = statistics.stdev(series) if len(series) > 1 else 1.0 + return [(x - self._mean) / (self._std + 1e-9) for x in series] + + def _denormalize(self, val: float) -> float: + return val * self._std + self._mean + + def _run_cells(self, seq: List[float]) -> float: + h = [[0.0] * self.config.hidden_size for _ in range(self.config.num_layers)] + c = [[0.0] * self.config.hidden_size for _ in range(self.config.num_layers)] + out = 0.0 + for x in seq: + inp = x + for layer_idx, cell in enumerate(self.cells): + h[layer_idx], c[layer_idx] = cell.forward(inp, h[layer_idx], c[layer_idx]) + inp = h[layer_idx][0] + out = h[-1][0] + return out + + def fit(self, time_series: List[float]) -> None: + logger.info("Fitting LSTMPredictor on %d samples", len(time_series)) + self._history = list(time_series) + norm = self._normalize(time_series) + for epoch in range(self.config.epochs): + for i in range(len(norm) - self.config.seq_len - 1): + seq = norm[i: i + self.config.seq_len] + _ = self._run_cells(seq) + self._trained = True + logger.info("Training complete. mean=%.4f std=%.4f", self._mean, self._std) + + def predict(self, steps_ahead: int = 1) -> List[float]: + if not self._trained: + raise RuntimeError("Call fit() before predict()") + logger.info("Predicting %d steps ahead", steps_ahead) + norm = self._normalize(self._history) + window = list(norm[-self.config.seq_len:]) + predictions = [] + for _ in range(steps_ahead): + out = self._run_cells(window) + predictions.append(self._denormalize(out)) + window = window[1:] + [out] + return predictions + + def rolling_forecast(self, horizon: int = 5) -> List[float]: + logger.info("Rolling forecast for horizon=%d", horizon) + results = [] + for i in range(horizon): + preds = self.predict(steps_ahead=1) + results.append(preds[0]) + self._history.append(preds[0]) + return results + + def anomaly_score(self, value: float) -> float: + if len(self._history) < 2: + return 0.0 + mean = statistics.mean(self._history[-50:]) + std = statistics.stdev(self._history[-50:]) if len(self._history[-50:]) > 1 else 1.0 + score = abs(value - mean) / (std + 1e-9) + logger.debug("Anomaly score for %.4f: %.4f", value, score) + return round(score, 4) + + def evaluate(self, test_data: List[float]) -> dict: + logger.info("Evaluating on %d test points", len(test_data)) + preds = self.predict(steps_ahead=len(test_data)) + errors = [abs(p - a) for p, a in zip(preds, test_data)] + mae = statistics.mean(errors) + rmse = math.sqrt(statistics.mean([e ** 2 for e in errors])) + return {"mae": round(mae, 4), "rmse": round(rmse, 4), + "n": len(test_data), "predictions": preds} diff --git a/neural_networks/transformers/bert_model.py b/neural_networks/transformers/bert_model.py new file mode 100644 index 0000000..53b480e --- /dev/null +++ b/neural_networks/transformers/bert_model.py @@ -0,0 +1,142 @@ +"""BERT model simulation for text understanding tasks.""" +from __future__ import annotations +import math +import logging +import hashlib +from dataclasses import dataclass, field +from typing import List, Tuple, Dict, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class BERTConfig: + vocab_size: int = 30522 + hidden_size: int = 128 + num_heads: int = 4 + num_layers: int = 2 + max_position: int = 512 + dropout: float = 0.1 + mask_token: str = "[MASK]" + cls_token: str = "[CLS]" + sep_token: str = "[SEP]" + + +class BERTEmbedding: + def __init__(self, config: BERTConfig) -> None: + self.config = config + + def tokenize(self, text: str) -> List[int]: + tokens = [self.config.cls_token] + text.lower().split() + [self.config.sep_token] + return [int(hashlib.md5(t.encode()).hexdigest(), 16) % self.config.vocab_size for t in tokens] + + def positional_encoding(self, length: int) -> List[List[float]]: + pe: List[List[float]] = [] + for pos in range(length): + row = [] + for i in range(self.config.hidden_size): + angle = pos / (10000 ** (2 * (i // 2) / self.config.hidden_size)) + row.append(math.sin(angle) if i % 2 == 0 else math.cos(angle)) + pe.append(row) + return pe + + def embed(self, token_ids: List[int]) -> List[List[float]]: + pe = self.positional_encoding(len(token_ids)) + embeddings = [] + for idx, tid in enumerate(token_ids): + vec = [(math.sin(tid * (i + 1) * 0.01) + pe[idx][i]) / 2.0 + for i in range(self.config.hidden_size)] + embeddings.append(vec) + return embeddings + + +class BERTAttention: + def __init__(self, config: BERTConfig) -> None: + self.config = config + + @staticmethod + def _cosine(a: List[float], b: List[float]) -> float: + dot = sum(x * y for x, y in zip(a, b)) + na = math.sqrt(sum(x * x for x in a)) + 1e-9 + nb = math.sqrt(sum(x * x for x in b)) + 1e-9 + return dot / (na * nb) + + def attend(self, embeddings: List[List[float]]) -> List[List[float]]: + n = len(embeddings) + output = [] + for i in range(n): + scores = [self._cosine(embeddings[i], embeddings[j]) for j in range(n)] + max_s = max(scores) + exp_scores = [math.exp(s - max_s) for s in scores] + total = sum(exp_scores) + 1e-9 + weights = [e / total for e in exp_scores] + attended = [sum(weights[j] * embeddings[j][d] for j in range(n)) + for d in range(self.config.hidden_size)] + output.append(attended) + return output + + +class BERTEncoder: + def __init__(self, config: BERTConfig) -> None: + self.layers = [BERTAttention(config) for _ in range(config.num_layers)] + + def encode(self, embeddings: List[List[float]]) -> List[List[float]]: + h = embeddings + for layer in self.layers: + h = layer.attend(h) + return h + + +class BERTModel: + def __init__(self, config: Optional[BERTConfig] = None) -> None: + self.config = config or BERTConfig() + self.embedding = BERTEmbedding(self.config) + self.encoder = BERTEncoder(self.config) + logger.info("BERTModel initialised with config: %s", self.config) + + def _forward(self, text: str) -> List[List[float]]: + token_ids = self.embedding.tokenize(text) + embeds = self.embedding.embed(token_ids) + return self.encoder.encode(embeds) + + def encode(self, text: str) -> List[float]: + logger.debug("Encoding text of length %d", len(text)) + hidden = self._forward(text) + cls_vec = hidden[0] + norm = math.sqrt(sum(x * x for x in cls_vec)) + 1e-9 + return [x / norm for x in cls_vec] + + def classify(self, text: str, labels: List[str]) -> Tuple[str, float]: + logger.debug("Classifying text into %d labels", len(labels)) + text_vec = self.encode(text) + best_label, best_score = labels[0], -1.0 + for label in labels: + label_vec = self.encode(label) + score = sum(a * b for a, b in zip(text_vec, label_vec)) + if score > best_score: + best_score, best_label = score, label + confidence = (best_score + 1) / 2.0 + logger.info("Classified as '%s' with confidence %.4f", best_label, confidence) + return best_label, confidence + + def batch_encode(self, texts: List[str]) -> List[List[float]]: + logger.debug("Batch encoding %d texts", len(texts)) + return [self.encode(t) for t in texts] + + def fill_mask(self, text_with_mask: str) -> Dict[str, float]: + logger.debug("fill_mask called") + words = text_with_mask.split() + mask_idx = next((i for i, w in enumerate(words) + if w == self.config.mask_token), None) + if mask_idx is None: + logger.warning("No [MASK] token found") + return {} + candidates = [w for w in words if w != self.config.mask_token][:5] or ["unknown"] + scores: Dict[str, float] = {} + for cand in candidates: + test = words.copy() + test[mask_idx] = cand + vec = self.encode(" ".join(test)) + scores[cand] = round((sum(vec) + len(vec)) / (2 * len(vec)), 4) + total = sum(scores.values()) + 1e-9 + return {k: round(v / total, 4) for k, v in scores.items()} diff --git a/neural_networks/transformers/gpt_model.py b/neural_networks/transformers/gpt_model.py new file mode 100644 index 0000000..05906b7 --- /dev/null +++ b/neural_networks/transformers/gpt_model.py @@ -0,0 +1,135 @@ +"""GPT model simulation for text generation tasks.""" +from __future__ import annotations +import math +import random +import logging +import hashlib +from dataclasses import dataclass, field +from typing import List, Dict, Tuple, Optional +from collections import defaultdict + +logger = logging.getLogger(__name__) + + +@dataclass +class GPTConfig: + vocab_size: int = 50257 + hidden_size: int = 128 + num_heads: int = 4 + num_layers: int = 2 + max_tokens: int = 512 + temperature: float = 1.0 + top_k: int = 10 + + +class GPTTokenizer: + def __init__(self, config: GPTConfig) -> None: + self.config = config + + def encode(self, text: str) -> List[int]: + return [int(hashlib.md5(w.encode()).hexdigest(), 16) % self.config.vocab_size + for w in text.lower().split()] + + def decode(self, token_ids: List[int]) -> str: + words = [] + for tid in token_ids: + seed = tid % 1000 + random.seed(seed) + words.append(random.choice(["data", "model", "system", "service", + "result", "output", "value", "process"])) + return " ".join(words) + + def build_ngrams(self, text: str, n: int = 2) -> Dict[Tuple, List[str]]: + tokens = text.lower().split() + ngrams: Dict[Tuple, List[str]] = defaultdict(list) + for i in range(len(tokens) - n): + key = tuple(tokens[i:i + n]) + ngrams[key].append(tokens[i + n]) + return ngrams + + +class GPTAttention: + """Causal (autoregressive) attention simulation.""" + + def __init__(self, config: GPTConfig) -> None: + self.config = config + + @staticmethod + def _dot(a: List[float], b: List[float]) -> float: + return sum(x * y for x, y in zip(a, b)) + + def causal_attend(self, embeddings: List[List[float]]) -> List[List[float]]: + n = len(embeddings) + d = len(embeddings[0]) + scale = math.sqrt(d) + 1e-9 + output = [] + for i in range(n): + scores = [self._dot(embeddings[i], embeddings[j]) / scale + if j <= i else -1e9 for j in range(n)] + max_s = max(scores) + exp_s = [math.exp(s - max_s) for s in scores] + total = sum(exp_s) + 1e-9 + weights = [e / total for e in exp_s] + attended = [sum(weights[j] * embeddings[j][k] for j in range(n)) + for k in range(d)] + output.append(attended) + return output + + +class GPTModel: + def __init__(self, config: Optional[GPTConfig] = None) -> None: + self.config = config or GPTConfig() + self.tokenizer = GPTTokenizer(self.config) + self.attention_layers = [GPTAttention(self.config) + for _ in range(self.config.num_layers)] + self._ngram_cache: Dict[Tuple, List[str]] = {} + logger.info("GPTModel initialised with config: %s", self.config) + + def _embed(self, token_ids: List[int]) -> List[List[float]]: + return [[math.sin(tid * (i + 1) * 0.01) + for i in range(self.config.hidden_size)] + for tid in token_ids] + + def _sample(self, candidates: List[str], temperature: float) -> str: + if not candidates: + return "the" + if temperature <= 0: + return candidates[0] + scores = [math.exp(-(i * temperature)) for i in range(len(candidates))] + total = sum(scores) + r = random.random() * total + cumulative = 0.0 + for cand, score in zip(candidates, scores): + cumulative += score + if r <= cumulative: + return cand + return candidates[-1] + + def generate(self, prompt: str, max_tokens: int = 50, + temperature: float = 1.0) -> str: + logger.info("Generating up to %d tokens from prompt", max_tokens) + ngrams = self.tokenizer.build_ngrams(prompt, n=2) + self._ngram_cache.update(ngrams) + words = prompt.lower().split() + for _ in range(max_tokens): + key = tuple(words[-2:]) if len(words) >= 2 else tuple(words[-1:]) + candidates = self._ngram_cache.get(key, []) + if not candidates: + fallback = ["the", "a", "in", "of", "and", "is", "for"] + candidates = fallback + next_word = self._sample(candidates[:self.config.top_k], temperature) + words.append(next_word) + if next_word in {".", "!", "?"}: + break + generated = " ".join(words[len(prompt.split()):]) + logger.debug("Generated %d tokens", len(words) - len(prompt.split())) + return generated + + def complete(self, text: str) -> str: + logger.debug("Completing text snippet") + return text + " " + self.generate(text, max_tokens=20, + temperature=self.config.temperature) + + def batch_generate(self, prompts: List[str], max_tokens: int = 30) -> List[str]: + logger.info("Batch generating for %d prompts", len(prompts)) + return [self.generate(p, max_tokens) for p in prompts] diff --git a/neural_networks/transformers/t5_model.py b/neural_networks/transformers/t5_model.py new file mode 100644 index 0000000..c95a180 --- /dev/null +++ b/neural_networks/transformers/t5_model.py @@ -0,0 +1,156 @@ +"""T5 model simulation for sequence-to-sequence tasks.""" +from __future__ import annotations +import math +import logging +import hashlib +from dataclasses import dataclass +from typing import List, Dict, Optional + +logger = logging.getLogger(__name__) + +_LANG_PREFIXES: Dict[str, str] = { + "fr": "En français: ", + "es": "En español: ", + "de": "Auf Deutsch: ", + "it": "In italiano: ", +} + +_STOP_WORDS = {"the", "a", "an", "is", "are", "was", "were", "be", + "been", "being", "have", "has", "had", "do", "does", "did", + "will", "would", "could", "should", "may", "might", "shall", + "to", "of", "in", "for", "on", "with", "at", "by", "from"} + + +@dataclass +class T5Config: + hidden_size: int = 128 + num_heads: int = 4 + num_encoder_layers: int = 2 + num_decoder_layers: int = 2 + max_length: int = 512 + vocab_size: int = 32128 + + +class T5Encoder: + def __init__(self, config: T5Config) -> None: + self.config = config + + def _token_id(self, word: str) -> int: + return int(hashlib.md5(word.encode()).hexdigest(), 16) % self.config.vocab_size + + def _embed(self, text: str) -> List[List[float]]: + words = text.lower().split() + embeddings = [] + for pos, word in enumerate(words): + tid = self._token_id(word) + vec = [math.sin(tid * (i + 1) * 0.01 + pos * 0.001) + for i in range(self.config.hidden_size)] + embeddings.append(vec) + return embeddings + + def encode(self, text: str) -> List[List[float]]: + logger.debug("T5Encoder encoding %d chars", len(text)) + embeddings = self._embed(text) + if not embeddings: + return [] + for _ in range(self.config.num_encoder_layers): + new_emb = [] + for i, vec in enumerate(embeddings): + context = embeddings[max(0, i - 1): i + 2] + avg = [sum(c[d] for c in context) / len(context) + for d in range(self.config.hidden_size)] + new_emb.append([math.tanh(v + a) for v, a in zip(vec, avg)]) + embeddings = new_emb + return embeddings + + def pool(self, encoded: List[List[float]]) -> List[float]: + if not encoded: + return [0.0] * self.config.hidden_size + return [sum(row[d] for row in encoded) / len(encoded) + for d in range(self.config.hidden_size)] + + +class T5Decoder: + def __init__(self, config: T5Config) -> None: + self.config = config + + def decode(self, encoder_output: List[float], target_hint: str) -> str: + logger.debug("T5Decoder generating from hint: '%s'", target_hint) + words = target_hint.lower().split() + enriched = [] + for i, word in enumerate(words): + score = encoder_output[i % self.config.hidden_size] + enriched.append((word, score)) + enriched.sort(key=lambda x: -abs(x[1])) + return " ".join(w for w, _ in enriched[:max(1, len(enriched))]) + + +class T5Model: + def __init__(self, config: Optional[T5Config] = None) -> None: + self.config = config or T5Config() + self.encoder = T5Encoder(self.config) + self.decoder = T5Decoder(self.config) + logger.info("T5Model initialised with config: %s", self.config) + + def translate(self, text: str, target_lang: str = "fr") -> str: + logger.info("Translating to '%s'", target_lang) + encoded = self.encoder.encode(text) + pooled = self.encoder.pool(encoded) + prefix = _LANG_PREFIXES.get(target_lang, f"[{target_lang}]: ") + decoded = self.decoder.decode(pooled, text) + return prefix + decoded + + def summarize(self, text: str, max_length: int = 50) -> str: + logger.info("Summarizing text of length %d", len(text)) + encoded = self.encoder.encode(text) + sentences = [s.strip() for s in text.replace("!", ".").replace("?", ".").split(".") if s.strip()] + if not sentences: + return text[:max_length] + pooled = self.encoder.pool(encoded) + scored = [] + for sent in sentences: + words = sent.lower().split() + content_words = [w for w in words if w not in _STOP_WORDS] + score = len(content_words) / (len(words) + 1e-9) + sim = sum(pooled[i % self.config.hidden_size] for i in range(len(words))) + scored.append((sent, score + abs(sim) * 0.01)) + scored.sort(key=lambda x: -x[1]) + summary = " ".join(s for s, _ in scored[:2]) + return summary[:max_length] if len(summary) > max_length else summary + + def answer_question(self, context: str, question: str) -> str: + logger.info("Answering question from context") + q_enc = self.encoder.pool(self.encoder.encode(question)) + sentences = [s.strip() for s in context.replace("!", ".").replace("?", ".").split(".") if s.strip()] + if not sentences: + return "No answer found." + best_sent, best_score = sentences[0], -float("inf") + for sent in sentences: + s_enc = self.encoder.pool(self.encoder.encode(sent)) + score = sum(a * b for a, b in zip(q_enc, s_enc)) + if score > best_score: + best_score, best_sent = score, sent + return best_sent + + def classify(self, text: str, task: str = "sentiment") -> str: + logger.info("Classifying text for task '%s'", task) + pooled = self.encoder.pool(self.encoder.encode(text)) + score = sum(pooled[:self.config.hidden_size // 2]) + if task == "sentiment": + return "positive" if score > 0 else "negative" + if task == "topic": + topics = ["technology", "science", "sports", "politics", "health"] + idx = int(abs(score) * 10) % len(topics) + return topics[idx] + return "unknown" + + def paraphrase(self, text: str) -> str: + logger.info("Paraphrasing text") + encoded = self.encoder.encode(text) + pooled = self.encoder.pool(encoded) + words = text.split() + paraphrased = [] + for i, word in enumerate(words): + shift = pooled[i % self.config.hidden_size] + paraphrased.append(word if shift > 0 else word.lower()) + return " ".join(paraphrased) From 3bb1960b4bc3891c69fff5257fb779fa9aa6a08b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 22:14:29 +0000 Subject: [PATCH 3/5] Add 21 human-connection and integration Python modules for multi-agent AI/ML platform Files added: - human_connection/emotional_intelligence: empathy_detector, tone_adjuster, stress_monitor, compassion_engine - human_connection/purpose_driven: impact_tracker, ethics_checker, accessibility, empowerment_coach - human_connection/personalization: user_profiler, style_adapter, context_memory - human_connection/wellbeing: workload_analyzer, break_suggester, celebration_engine - integration: agi_orchestrator, ml_pipeline_connector, data_flow_manager, devops_bridge, nlu_nlp_router, neural_net_inference, human_centric_wrapper All modules use Python standard library only (dataclasses, typing, logging, re, collections, statistics, uuid, datetime, enum, abc). Full smoke-test suite passes. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- agents/__init__.py | 0 agents/agi_agent/__init__.py | 0 agents/agi_agent/decision_optimizer.py | 275 ++++++++++++++++ agents/agi_agent/knowledge_graph.py | 269 ++++++++++++++++ agents/agi_agent/meta_learner.py | 237 ++++++++++++++ agents/agi_agent/reasoning_engine.py | 299 ++++++++++++++++++ agents/agi_agent/transfer_learning.py | 254 +++++++++++++++ agents/nlp_agent/__init__.py | 0 agents/nlp_agent/doc_parser.py | 251 +++++++++++++++ agents/nlp_agent/qa_system.py | 223 +++++++++++++ agents/nlp_agent/summarizer.py | 195 ++++++++++++ agents/nlp_agent/text_generator.py | 224 +++++++++++++ agents/nlp_agent/translator.py | 222 +++++++++++++ agents/nlu_agent/__init__.py | 0 agents/nlu_agent/context_manager.py | 195 ++++++++++++ agents/nlu_agent/dialogue_manager.py | 228 +++++++++++++ agents/nlu_agent/entity_extractor.py | 194 ++++++++++++ agents/nlu_agent/intent_classifier.py | 209 ++++++++++++ agents/nlu_agent/sentiment_analyzer.py | 210 ++++++++++++ dataops/__init__.py | 0 dataops/data_catalog/__init__.py | 0 dataops/data_catalog/metadata_store.py | 175 ++++++++++ dataops/data_catalog/search.py | 236 ++++++++++++++ dataops/data_ingestion/__init__.py | 0 dataops/data_ingestion/batch_processor.py | 189 +++++++++++ dataops/data_ingestion/connectors.py | 203 ++++++++++++ dataops/data_ingestion/streaming.py | 226 +++++++++++++ dataops/data_lineage/__init__.py | 0 dataops/data_lineage/tracker.py | 170 ++++++++++ dataops/data_lineage/visualizer.py | 170 ++++++++++ dataops/data_quality/__init__.py | 0 dataops/data_quality/cleansing.py | 236 ++++++++++++++ dataops/data_quality/profiler.py | 232 ++++++++++++++ dataops/data_quality/validators.py | 206 ++++++++++++ devops/__init__.py | 0 devops/cicd/__init__.py | 0 devops/cicd/deployment.py | 186 +++++++++++ devops/cicd/pipeline_generator.py | 161 ++++++++++ devops/cicd/test_runner.py | 155 +++++++++ devops/iac/__init__.py | 0 devops/iac/ansible_adapter.py | 126 ++++++++ devops/iac/cloudformation.py | 161 ++++++++++ devops/iac/terraform_adapter.py | 123 +++++++ devops/observability/__init__.py | 0 devops/observability/logging.py | 175 ++++++++++ devops/observability/metrics.py | 208 ++++++++++++ devops/observability/tracing.py | 186 +++++++++++ devops/secrets/__init__.py | 0 devops/secrets/key_manager.py | 172 ++++++++++ devops/secrets/vault_integration.py | 151 +++++++++ human_connection/__init__.py | 0 .../emotional_intelligence/__init__.py | 0 .../compassion_engine.py | 132 ++++++++ .../empathy_detector.py | 108 +++++++ .../emotional_intelligence/stress_monitor.py | 135 ++++++++ .../emotional_intelligence/tone_adjuster.py | 105 ++++++ human_connection/personalization/__init__.py | 0 .../personalization/context_memory.py | 99 ++++++ .../personalization/style_adapter.py | 115 +++++++ .../personalization/user_profiler.py | 102 ++++++ human_connection/purpose_driven/__init__.py | 0 .../purpose_driven/accessibility.py | 138 ++++++++ .../purpose_driven/empowerment_coach.py | 117 +++++++ .../purpose_driven/ethics_checker.py | 133 ++++++++ .../purpose_driven/impact_tracker.py | 110 +++++++ human_connection/wellbeing/__init__.py | 0 human_connection/wellbeing/break_suggester.py | 119 +++++++ .../wellbeing/celebration_engine.py | 114 +++++++ .../wellbeing/workload_analyzer.py | 108 +++++++ integration/__init__.py | 0 integration/agi_orchestrator.py | 128 ++++++++ integration/data_flow_manager.py | 128 ++++++++ integration/devops_bridge.py | 118 +++++++ integration/human_centric_wrapper.py | 142 +++++++++ integration/ml_pipeline_connector.py | 110 +++++++ integration/neural_net_inference.py | 152 +++++++++ integration/nlu_nlp_router.py | 135 ++++++++ mlops/__init__.py | 0 mlops/ab_testing.py | 252 +++++++++++++++ mlops/auto_retrain.py | 214 +++++++++++++ mlops/drift_detector.py | 230 ++++++++++++++ mlops/feature_store.py | 233 ++++++++++++++ mlops/model_monitor.py | 228 +++++++++++++ mlops/model_registry.py | 205 ++++++++++++ mlops/training_pipeline.py | 217 +++++++++++++ neural_networks/__init__.py | 0 .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 152 bytes neural_networks/cnn/__init__.py | 0 .../cnn/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 156 bytes .../__pycache__/log_analyzer.cpython-312.pyc | Bin 0 -> 12070 bytes .../metric_classifier.cpython-312.pyc | Bin 0 -> 12368 bytes neural_networks/gnn/__init__.py | 0 .../gnn/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 156 bytes .../dependency_analyzer.cpython-312.pyc | Bin 0 -> 9790 bytes .../infrastructure_gnn.cpython-312.pyc | Bin 0 -> 10315 bytes neural_networks/rl/__init__.py | 0 .../rl/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 155 bytes .../policy_optimizer.cpython-312.pyc | Bin 0 -> 11408 bytes .../resource_allocator.cpython-312.pyc | Bin 0 -> 9057 bytes neural_networks/rnn/__init__.py | 0 .../rnn/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 156 bytes .../anomaly_detector.cpython-312.pyc | Bin 0 -> 11099 bytes .../lstm_predictor.cpython-312.pyc | Bin 0 -> 11481 bytes neural_networks/transformers/__init__.py | 0 .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 165 bytes .../__pycache__/bert_model.cpython-312.pyc | Bin 0 -> 12250 bytes .../__pycache__/gpt_model.cpython-312.pyc | Bin 0 -> 10118 bytes .../__pycache__/t5_model.cpython-312.pyc | Bin 0 -> 13240 bytes 108 files changed, 11459 insertions(+) create mode 100644 agents/__init__.py create mode 100644 agents/agi_agent/__init__.py create mode 100644 agents/agi_agent/decision_optimizer.py create mode 100644 agents/agi_agent/knowledge_graph.py create mode 100644 agents/agi_agent/meta_learner.py create mode 100644 agents/agi_agent/reasoning_engine.py create mode 100644 agents/agi_agent/transfer_learning.py create mode 100644 agents/nlp_agent/__init__.py create mode 100644 agents/nlp_agent/doc_parser.py create mode 100644 agents/nlp_agent/qa_system.py create mode 100644 agents/nlp_agent/summarizer.py create mode 100644 agents/nlp_agent/text_generator.py create mode 100644 agents/nlp_agent/translator.py create mode 100644 agents/nlu_agent/__init__.py create mode 100644 agents/nlu_agent/context_manager.py create mode 100644 agents/nlu_agent/dialogue_manager.py create mode 100644 agents/nlu_agent/entity_extractor.py create mode 100644 agents/nlu_agent/intent_classifier.py create mode 100644 agents/nlu_agent/sentiment_analyzer.py create mode 100644 dataops/__init__.py create mode 100644 dataops/data_catalog/__init__.py create mode 100644 dataops/data_catalog/metadata_store.py create mode 100644 dataops/data_catalog/search.py create mode 100644 dataops/data_ingestion/__init__.py create mode 100644 dataops/data_ingestion/batch_processor.py create mode 100644 dataops/data_ingestion/connectors.py create mode 100644 dataops/data_ingestion/streaming.py create mode 100644 dataops/data_lineage/__init__.py create mode 100644 dataops/data_lineage/tracker.py create mode 100644 dataops/data_lineage/visualizer.py create mode 100644 dataops/data_quality/__init__.py create mode 100644 dataops/data_quality/cleansing.py create mode 100644 dataops/data_quality/profiler.py create mode 100644 dataops/data_quality/validators.py create mode 100644 devops/__init__.py create mode 100644 devops/cicd/__init__.py create mode 100644 devops/cicd/deployment.py create mode 100644 devops/cicd/pipeline_generator.py create mode 100644 devops/cicd/test_runner.py create mode 100644 devops/iac/__init__.py create mode 100644 devops/iac/ansible_adapter.py create mode 100644 devops/iac/cloudformation.py create mode 100644 devops/iac/terraform_adapter.py create mode 100644 devops/observability/__init__.py create mode 100644 devops/observability/logging.py create mode 100644 devops/observability/metrics.py create mode 100644 devops/observability/tracing.py create mode 100644 devops/secrets/__init__.py create mode 100644 devops/secrets/key_manager.py create mode 100644 devops/secrets/vault_integration.py create mode 100644 human_connection/__init__.py create mode 100644 human_connection/emotional_intelligence/__init__.py create mode 100644 human_connection/emotional_intelligence/compassion_engine.py create mode 100644 human_connection/emotional_intelligence/empathy_detector.py create mode 100644 human_connection/emotional_intelligence/stress_monitor.py create mode 100644 human_connection/emotional_intelligence/tone_adjuster.py create mode 100644 human_connection/personalization/__init__.py create mode 100644 human_connection/personalization/context_memory.py create mode 100644 human_connection/personalization/style_adapter.py create mode 100644 human_connection/personalization/user_profiler.py create mode 100644 human_connection/purpose_driven/__init__.py create mode 100644 human_connection/purpose_driven/accessibility.py create mode 100644 human_connection/purpose_driven/empowerment_coach.py create mode 100644 human_connection/purpose_driven/ethics_checker.py create mode 100644 human_connection/purpose_driven/impact_tracker.py create mode 100644 human_connection/wellbeing/__init__.py create mode 100644 human_connection/wellbeing/break_suggester.py create mode 100644 human_connection/wellbeing/celebration_engine.py create mode 100644 human_connection/wellbeing/workload_analyzer.py create mode 100644 integration/__init__.py create mode 100644 integration/agi_orchestrator.py create mode 100644 integration/data_flow_manager.py create mode 100644 integration/devops_bridge.py create mode 100644 integration/human_centric_wrapper.py create mode 100644 integration/ml_pipeline_connector.py create mode 100644 integration/neural_net_inference.py create mode 100644 integration/nlu_nlp_router.py create mode 100644 mlops/__init__.py create mode 100644 mlops/ab_testing.py create mode 100644 mlops/auto_retrain.py create mode 100644 mlops/drift_detector.py create mode 100644 mlops/feature_store.py create mode 100644 mlops/model_monitor.py create mode 100644 mlops/model_registry.py create mode 100644 mlops/training_pipeline.py create mode 100644 neural_networks/__init__.py create mode 100644 neural_networks/__pycache__/__init__.cpython-312.pyc create mode 100644 neural_networks/cnn/__init__.py create mode 100644 neural_networks/cnn/__pycache__/__init__.cpython-312.pyc create mode 100644 neural_networks/cnn/__pycache__/log_analyzer.cpython-312.pyc create mode 100644 neural_networks/cnn/__pycache__/metric_classifier.cpython-312.pyc create mode 100644 neural_networks/gnn/__init__.py create mode 100644 neural_networks/gnn/__pycache__/__init__.cpython-312.pyc create mode 100644 neural_networks/gnn/__pycache__/dependency_analyzer.cpython-312.pyc create mode 100644 neural_networks/gnn/__pycache__/infrastructure_gnn.cpython-312.pyc create mode 100644 neural_networks/rl/__init__.py create mode 100644 neural_networks/rl/__pycache__/__init__.cpython-312.pyc create mode 100644 neural_networks/rl/__pycache__/policy_optimizer.cpython-312.pyc create mode 100644 neural_networks/rl/__pycache__/resource_allocator.cpython-312.pyc create mode 100644 neural_networks/rnn/__init__.py create mode 100644 neural_networks/rnn/__pycache__/__init__.cpython-312.pyc create mode 100644 neural_networks/rnn/__pycache__/anomaly_detector.cpython-312.pyc create mode 100644 neural_networks/rnn/__pycache__/lstm_predictor.cpython-312.pyc create mode 100644 neural_networks/transformers/__init__.py create mode 100644 neural_networks/transformers/__pycache__/__init__.cpython-312.pyc create mode 100644 neural_networks/transformers/__pycache__/bert_model.cpython-312.pyc create mode 100644 neural_networks/transformers/__pycache__/gpt_model.cpython-312.pyc create mode 100644 neural_networks/transformers/__pycache__/t5_model.cpython-312.pyc diff --git a/agents/__init__.py b/agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agents/agi_agent/__init__.py b/agents/agi_agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agents/agi_agent/decision_optimizer.py b/agents/agi_agent/decision_optimizer.py new file mode 100644 index 0000000..501a065 --- /dev/null +++ b/agents/agi_agent/decision_optimizer.py @@ -0,0 +1,275 @@ +"""Multi-objective optimization for decision making.""" +from __future__ import annotations + +import logging +import math +import random +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Objective: + """A single optimization objective.""" + name: str + weight: float = 1.0 + minimize: bool = True # True = minimize, False = maximize + constraint_min: Optional[float] = None + constraint_max: Optional[float] = None + + +@dataclass +class Decision: + """A candidate decision with evaluated objectives.""" + decision_id: str = field(default_factory=lambda: str(uuid.uuid4())) + variables: Dict[str, float] = field(default_factory=dict) + objective_values: Dict[str, float] = field(default_factory=dict) + aggregate_score: float = 0.0 + rank: int = 0 + feasible: bool = True + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class OptimizationProblem: + """Definition of a multi-objective optimization problem.""" + problem_id: str = field(default_factory=lambda: str(uuid.uuid4())) + objectives: List[Objective] = field(default_factory=list) + variable_bounds: Dict[str, Tuple[float, float]] = field(default_factory=dict) + description: str = "" + + +@dataclass +class OptimizationResult: + """Result of an optimization run.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + problem_id: str = "" + pareto_front: List[Decision] = field(default_factory=list) + best_decision: Optional[Decision] = None + iterations: int = 0 + converged: bool = False + metadata: Dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + + +def _weighted_sum(decision: Decision, objectives: List[Objective]) -> float: + total_weight = sum(o.weight for o in objectives) or 1.0 + score = 0.0 + for obj in objectives: + val = decision.objective_values.get(obj.name, 0.0) + normalized = val / (abs(val) + 1e-9) + contribution = obj.weight / total_weight * (normalized if obj.minimize else -normalized) + score += contribution + return score + + +def _dominates(a: Decision, b: Decision, objectives: List[Objective]) -> bool: + """Return True if decision a Pareto-dominates b.""" + at_least_one_better = False + for obj in objectives: + av = a.objective_values.get(obj.name, float("inf")) + bv = b.objective_values.get(obj.name, float("inf")) + if obj.minimize: + if av > bv: + return False + if av < bv: + at_least_one_better = True + else: + if av < bv: + return False + if av > bv: + at_least_one_better = True + return at_least_one_better + + +def _pareto_front(decisions: List[Decision], objectives: List[Objective]) -> List[Decision]: + """Extract non-dominated (Pareto-optimal) decisions.""" + front: List[Decision] = [] + for candidate in decisions: + dominated = False + for other in decisions: + if other.decision_id != candidate.decision_id and _dominates(other, candidate, objectives): + dominated = True + break + if not dominated: + front.append(candidate) + return front + + +class RandomSearchOptimizer: + """Baseline optimizer: random search with constraint checking.""" + + def __init__(self, n_samples: int = 200, seed: Optional[int] = None) -> None: + self.n_samples = n_samples + self._rng = random.Random(seed) + + def optimize(self, problem: OptimizationProblem, + evaluator: Callable[[Dict[str, float]], Dict[str, float]]) -> List[Decision]: + decisions: List[Decision] = [] + for _ in range(self.n_samples): + variables = { + name: self._rng.uniform(lo, hi) + for name, (lo, hi) in problem.variable_bounds.items() + } + obj_values = evaluator(variables) + feasible = self._check_feasibility(obj_values, problem.objectives) + d = Decision(variables=variables, objective_values=obj_values, feasible=feasible) + d.aggregate_score = _weighted_sum(d, problem.objectives) + decisions.append(d) + return decisions + + def _check_feasibility(self, obj_values: Dict[str, float], objectives: List[Objective]) -> bool: + for obj in objectives: + val = obj_values.get(obj.name, 0.0) + if obj.constraint_min is not None and val < obj.constraint_min: + return False + if obj.constraint_max is not None and val > obj.constraint_max: + return False + return True + + +class GeneticOptimizer: + """Simple genetic algorithm for multi-objective optimization.""" + + def __init__(self, population_size: int = 50, generations: int = 20, + mutation_rate: float = 0.1, seed: Optional[int] = None) -> None: + self.population_size = population_size + self.generations = generations + self.mutation_rate = mutation_rate + self._rng = random.Random(seed) + + def optimize(self, problem: OptimizationProblem, + evaluator: Callable[[Dict[str, float]], Dict[str, float]]) -> Tuple[List[Decision], int]: + population = self._init_population(problem, evaluator) + converged = False + prev_best = float("inf") + + for gen in range(self.generations): + population = sorted(population, key=lambda d: d.aggregate_score) + best = population[0].aggregate_score + if abs(best - prev_best) < 1e-5: + converged = True + break + prev_best = best + + elites = population[: self.population_size // 4] + offspring = self._crossover_mutate(elites, problem, evaluator) + population = elites + offspring + logger.debug("GA gen %d, best score: %.4f", gen, best) + + return population, self.generations if not converged else gen + 1 + + def _init_population(self, problem: OptimizationProblem, + evaluator: Callable) -> List[Decision]: + pop = [] + for _ in range(self.population_size): + variables = { + name: self._rng.uniform(lo, hi) + for name, (lo, hi) in problem.variable_bounds.items() + } + obj_values = evaluator(variables) + d = Decision(variables=variables, objective_values=obj_values) + d.aggregate_score = _weighted_sum(d, problem.objectives) + pop.append(d) + return pop + + def _crossover_mutate(self, elites: List[Decision], problem: OptimizationProblem, + evaluator: Callable) -> List[Decision]: + offspring: List[Decision] = [] + n = self.population_size - len(elites) + for _ in range(n): + p1, p2 = self._rng.sample(elites, min(2, len(elites))) + child_vars: Dict[str, float] = {} + for name, (lo, hi) in problem.variable_bounds.items(): + gene = p1.variables.get(name, lo) if self._rng.random() > 0.5 else p2.variables.get(name, lo) + if self._rng.random() < self.mutation_rate: + gene = self._rng.uniform(lo, hi) + child_vars[name] = gene + obj_values = evaluator(child_vars) + d = Decision(variables=child_vars, objective_values=obj_values) + d.aggregate_score = _weighted_sum(d, problem.objectives) + offspring.append(d) + return offspring + + +class DecisionOptimizer: + """ + High-level multi-objective decision optimizer combining random search, + genetic algorithms, and Pareto-front analysis. + """ + + def __init__(self) -> None: + self._random_optimizer = RandomSearchOptimizer(n_samples=200) + self._ga_optimizer = GeneticOptimizer(population_size=50, generations=20) + self._history: List[OptimizationResult] = [] + logger.info("DecisionOptimizer initialized") + + def optimize(self, problem: OptimizationProblem, + evaluator: Callable[[Dict[str, float]], Dict[str, float]], + method: str = "genetic") -> OptimizationResult: + """Run optimization and return Pareto front + best decision.""" + logger.info("Optimizing problem '%s' using method '%s'", problem.problem_id, method) + + if method == "random": + decisions = self._random_optimizer.optimize(problem, evaluator) + iterations = self._random_optimizer.n_samples + converged = True + else: + decisions, iterations = self._ga_optimizer.optimize(problem, evaluator) + converged = iterations < self._ga_optimizer.generations + + feasible = [d for d in decisions if d.feasible] + if not feasible: + feasible = decisions # relax feasibility if nothing found + + pareto = _pareto_front(feasible, problem.objectives) + for rank, d in enumerate(sorted(pareto, key=lambda d: d.aggregate_score)): + d.rank = rank + 1 + + best = min(pareto, key=lambda d: d.aggregate_score) if pareto else None + + result = OptimizationResult( + problem_id=problem.problem_id, + pareto_front=pareto, + best_decision=best, + iterations=iterations, + converged=converged, + metadata={"total_evaluated": len(decisions), "pareto_size": len(pareto), "method": method}, + ) + self._history.append(result) + logger.info("Optimization done: pareto_size=%d, converged=%s", len(pareto), converged) + return result + + def recommend(self, problem: OptimizationProblem, + evaluator: Callable[[Dict[str, float]], Dict[str, float]], + n: int = 3) -> List[Decision]: + """Return top-n recommended decisions.""" + result = self.optimize(problem, evaluator) + return sorted(result.pareto_front, key=lambda d: d.aggregate_score)[:n] + + def sensitivity_analysis(self, problem: OptimizationProblem, + evaluator: Callable[[Dict[str, float]], Dict[str, float]], + n_perturbations: int = 50) -> Dict[str, float]: + """Estimate sensitivity of each variable by random perturbation.""" + base_vars = {name: (lo + hi) / 2 for name, (lo, hi) in problem.variable_bounds.items()} + base_obj = evaluator(base_vars) + base_score = sum(base_obj.values()) + + sensitivities: Dict[str, List[float]] = {name: [] for name in problem.variable_bounds} + rng = random.Random(42) + for _ in range(n_perturbations): + for name, (lo, hi) in problem.variable_bounds.items(): + perturbed = dict(base_vars) + perturbed[name] = rng.uniform(lo, hi) + obj = evaluator(perturbed) + delta = abs(sum(obj.values()) - base_score) + sensitivities[name].append(delta) + + return { + name: sum(deltas) / len(deltas) if deltas else 0.0 + for name, deltas in sensitivities.items() + } diff --git a/agents/agi_agent/knowledge_graph.py b/agents/agi_agent/knowledge_graph.py new file mode 100644 index 0000000..a80ec19 --- /dev/null +++ b/agents/agi_agent/knowledge_graph.py @@ -0,0 +1,269 @@ +"""Knowledge graph for context and relationship mapping.""" +from __future__ import annotations + +import json +import logging +import uuid +from collections import defaultdict, deque +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, Iterator, List, Optional, Set, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Node: + """A node in the knowledge graph.""" + node_id: str = field(default_factory=lambda: str(uuid.uuid4())) + label: str = "" + node_type: str = "concept" + properties: Dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + updated_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class Edge: + """A directed edge between two nodes.""" + edge_id: str = field(default_factory=lambda: str(uuid.uuid4())) + source_id: str = "" + target_id: str = "" + relation: str = "related_to" + weight: float = 1.0 + properties: Dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class GraphQuery: + """Query specification for knowledge graph traversal.""" + start_node: Optional[str] = None + relation_filter: Optional[str] = None + node_type_filter: Optional[str] = None + max_depth: int = 3 + max_results: int = 50 + + +@dataclass +class GraphQueryResult: + """Result of a knowledge graph query.""" + nodes: List[Node] = field(default_factory=list) + edges: List[Edge] = field(default_factory=list) + paths: List[List[str]] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + +class KnowledgeGraph: + """ + In-memory knowledge graph supporting nodes, directed edges, + BFS/DFS traversal, shortest path, and semantic search. + """ + + def __init__(self) -> None: + self._nodes: Dict[str, Node] = {} + self._edges: Dict[str, Edge] = {} + self._adj: Dict[str, List[str]] = defaultdict(list) # node_id -> [edge_ids] + self._rev_adj: Dict[str, List[str]] = defaultdict(list) # node_id -> [incoming edge_ids] + self._label_index: Dict[str, Set[str]] = defaultdict(set) # label -> node_ids + self._type_index: Dict[str, Set[str]] = defaultdict(set) # type -> node_ids + logger.info("KnowledgeGraph initialized") + + # ------------------------------------------------------------------ nodes + + def add_node(self, label: str, node_type: str = "concept", properties: Optional[Dict[str, Any]] = None) -> Node: + node = Node(label=label, node_type=node_type, properties=properties or {}) + self._nodes[node.node_id] = node + self._label_index[label.lower()].add(node.node_id) + self._type_index[node_type].add(node.node_id) + logger.debug("Added node '%s' (%s)", label, node.node_id) + return node + + def get_node(self, node_id: str) -> Optional[Node]: + return self._nodes.get(node_id) + + def find_by_label(self, label: str) -> List[Node]: + ids = self._label_index.get(label.lower(), set()) + return [self._nodes[nid] for nid in ids if nid in self._nodes] + + def find_by_type(self, node_type: str) -> List[Node]: + ids = self._type_index.get(node_type, set()) + return [self._nodes[nid] for nid in ids if nid in self._nodes] + + def update_node(self, node_id: str, properties: Dict[str, Any]) -> bool: + node = self._nodes.get(node_id) + if not node: + return False + node.properties.update(properties) + node.updated_at = datetime.utcnow() + return True + + def remove_node(self, node_id: str) -> bool: + if node_id not in self._nodes: + return False + # Remove incident edges + for eid in list(self._adj[node_id]): + self._remove_edge_by_id(eid) + for eid in list(self._rev_adj[node_id]): + self._remove_edge_by_id(eid) + node = self._nodes.pop(node_id) + self._label_index[node.label.lower()].discard(node_id) + self._type_index[node.node_type].discard(node_id) + return True + + # ------------------------------------------------------------------ edges + + def add_edge(self, source_id: str, target_id: str, relation: str = "related_to", + weight: float = 1.0, properties: Optional[Dict[str, Any]] = None) -> Optional[Edge]: + if source_id not in self._nodes or target_id not in self._nodes: + logger.warning("Cannot add edge: node(s) not found") + return None + edge = Edge(source_id=source_id, target_id=target_id, relation=relation, + weight=weight, properties=properties or {}) + self._edges[edge.edge_id] = edge + self._adj[source_id].append(edge.edge_id) + self._rev_adj[target_id].append(edge.edge_id) + return edge + + def _remove_edge_by_id(self, edge_id: str) -> None: + edge = self._edges.pop(edge_id, None) + if edge: + self._adj[edge.source_id] = [e for e in self._adj[edge.source_id] if e != edge_id] + self._rev_adj[edge.target_id] = [e for e in self._rev_adj[edge.target_id] if e != edge_id] + + def get_neighbors(self, node_id: str, relation: Optional[str] = None) -> List[Node]: + neighbors: List[Node] = [] + for eid in self._adj.get(node_id, []): + edge = self._edges.get(eid) + if edge and (relation is None or edge.relation == relation): + target = self._nodes.get(edge.target_id) + if target: + neighbors.append(target) + return neighbors + + # ---------------------------------------------------------------- traversal + + def bfs(self, start_id: str, max_depth: int = 3, relation: Optional[str] = None) -> GraphQueryResult: + visited: Set[str] = set() + queue: deque = deque([(start_id, 0, [start_id])]) + result_nodes: List[Node] = [] + result_edges: List[Edge] = [] + paths: List[List[str]] = [] + + while queue: + current_id, depth, path = queue.popleft() + if current_id in visited or depth > max_depth: + continue + visited.add(current_id) + node = self._nodes.get(current_id) + if node: + result_nodes.append(node) + paths.append(path) + + if depth < max_depth: + for eid in self._adj.get(current_id, []): + edge = self._edges.get(eid) + if edge and (relation is None or edge.relation == relation): + result_edges.append(edge) + if edge.target_id not in visited: + queue.append((edge.target_id, depth + 1, path + [edge.target_id])) + + return GraphQueryResult(nodes=result_nodes, edges=result_edges, paths=paths, + metadata={"visited": len(visited), "start": start_id}) + + def shortest_path(self, source_id: str, target_id: str) -> List[str]: + """BFS-based shortest path between two nodes.""" + if source_id == target_id: + return [source_id] + visited: Set[str] = {source_id} + queue: deque = deque([(source_id, [source_id])]) + while queue: + current, path = queue.popleft() + for eid in self._adj.get(current, []): + edge = self._edges.get(eid) + if not edge: + continue + nxt = edge.target_id + if nxt == target_id: + return path + [nxt] + if nxt not in visited: + visited.add(nxt) + queue.append((nxt, path + [nxt])) + return [] + + def query(self, gq: GraphQuery) -> GraphQueryResult: + if gq.start_node: + result = self.bfs(gq.start_node, max_depth=gq.max_depth, relation=gq.relation_filter) + else: + nodes = list(self._nodes.values()) + if gq.node_type_filter: + nodes = [n for n in nodes if n.node_type == gq.node_type_filter] + result = GraphQueryResult(nodes=nodes[:gq.max_results], edges=[], + metadata={"total_nodes": len(self._nodes)}) + return result + + def semantic_search(self, query: str, top_k: int = 10) -> List[Tuple[Node, float]]: + """Simple token-overlap similarity search.""" + query_tokens = set(query.lower().split()) + scored: List[Tuple[Node, float]] = [] + for node in self._nodes.values(): + node_tokens = set((node.label + " " + node.node_type).lower().split()) + node_tokens |= {str(v).lower() for v in node.properties.values()} + overlap = len(query_tokens & node_tokens) + if overlap: + score = overlap / len(query_tokens | node_tokens) + scored.append((node, score)) + scored.sort(key=lambda x: x[1], reverse=True) + return scored[:top_k] + + def add_context(self, subject: str, predicate: str, obj: str) -> Tuple[Node, Edge, Node]: + """Convenience triple-store style insertion.""" + s_nodes = self.find_by_label(subject) + s_node = s_nodes[0] if s_nodes else self.add_node(subject) + o_nodes = self.find_by_label(obj) + o_node = o_nodes[0] if o_nodes else self.add_node(obj) + edge = self.add_edge(s_node.node_id, o_node.node_id, relation=predicate) + return s_node, edge, o_node # type: ignore[return-value] + + # ------------------------------------------------------------------ stats + + @property + def stats(self) -> Dict[str, Any]: + return { + "nodes": len(self._nodes), + "edges": len(self._edges), + "node_types": {t: len(ids) for t, ids in self._type_index.items()}, + } + + def to_dict(self) -> Dict[str, Any]: + return { + "nodes": [ + {"id": n.node_id, "label": n.label, "type": n.node_type, "properties": n.properties} + for n in self._nodes.values() + ], + "edges": [ + {"id": e.edge_id, "source": e.source_id, "target": e.target_id, + "relation": e.relation, "weight": e.weight} + for e in self._edges.values() + ], + } + + def to_json(self) -> str: + return json.dumps(self.to_dict(), indent=2, default=str) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "KnowledgeGraph": + kg = cls() + for nd in data.get("nodes", []): + node = Node(node_id=nd["id"], label=nd["label"], node_type=nd.get("type", "concept"), + properties=nd.get("properties", {})) + kg._nodes[node.node_id] = node + kg._label_index[node.label.lower()].add(node.node_id) + kg._type_index[node.node_type].add(node.node_id) + for ed in data.get("edges", []): + edge = Edge(edge_id=ed["id"], source_id=ed["source"], target_id=ed["target"], + relation=ed["relation"], weight=ed.get("weight", 1.0)) + kg._edges[edge.edge_id] = edge + kg._adj[edge.source_id].append(edge.edge_id) + kg._rev_adj[edge.target_id].append(edge.edge_id) + return kg diff --git a/agents/agi_agent/meta_learner.py b/agents/agi_agent/meta_learner.py new file mode 100644 index 0000000..77259fa --- /dev/null +++ b/agents/agi_agent/meta_learner.py @@ -0,0 +1,237 @@ +"""Adaptive meta-learning system that learns from limited examples.""" +from __future__ import annotations + +import logging +import math +import random +import statistics +import uuid +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Example: + """A labeled example for meta-learning.""" + example_id: str = field(default_factory=lambda: str(uuid.uuid4())) + features: Dict[str, float] = field(default_factory=dict) + label: str = "" + domain: str = "general" + confidence: float = 1.0 + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class Task: + """A few-shot learning task.""" + task_id: str = field(default_factory=lambda: str(uuid.uuid4())) + support_set: List[Example] = field(default_factory=list) # labeled examples + query_set: List[Example] = field(default_factory=list) # to predict + domain: str = "general" + n_way: int = 2 + k_shot: int = 5 + + +@dataclass +class MetaLearningResult: + """Outcome of a meta-learning inference.""" + task_id: str = "" + predictions: List[Dict[str, Any]] = field(default_factory=list) + accuracy: float = 0.0 + adaptation_steps: int = 0 + domain: str = "general" + metadata: Dict[str, Any] = field(default_factory=dict) + + +def _cosine_similarity(a: Dict[str, float], b: Dict[str, float]) -> float: + keys = set(a) | set(b) + dot = sum(a.get(k, 0.0) * b.get(k, 0.0) for k in keys) + mag_a = math.sqrt(sum(v ** 2 for v in a.values())) + mag_b = math.sqrt(sum(v ** 2 for v in b.values())) + if mag_a == 0 or mag_b == 0: + return 0.0 + return dot / (mag_a * mag_b) + + +def _euclidean_distance(a: Dict[str, float], b: Dict[str, float]) -> float: + keys = set(a) | set(b) + return math.sqrt(sum((a.get(k, 0.0) - b.get(k, 0.0)) ** 2 for k in keys)) + + +class PrototypicalNetwork: + """ + Simulates prototypical networks: computes class prototypes from + support examples and classifies queries by nearest prototype. + """ + + def __init__(self) -> None: + self.prototypes: Dict[str, Dict[str, float]] = {} + + def fit(self, support_set: List[Example]) -> None: + class_features: Dict[str, List[Dict[str, float]]] = defaultdict(list) + for ex in support_set: + class_features[ex.label].append(ex.features) + for label, feats_list in class_features.items(): + all_keys = set(k for f in feats_list for k in f) + proto = {k: statistics.mean(f.get(k, 0.0) for f in feats_list) for k in all_keys} + self.prototypes[label] = proto + logger.debug("Fitted %d prototypes", len(self.prototypes)) + + def predict(self, query_features: Dict[str, float]) -> Tuple[str, float]: + if not self.prototypes: + return "unknown", 0.0 + distances = { + label: _euclidean_distance(query_features, proto) + for label, proto in self.prototypes.items() + } + best_label = min(distances, key=lambda l: distances[l]) + max_dist = max(distances.values()) if len(distances) > 1 else 1.0 + confidence = 1.0 - (distances[best_label] / (max_dist + 1e-9)) + return best_label, confidence + + +class MAMLLearner: + """ + Simulates MAML (Model-Agnostic Meta-Learning) with gradient-like + parameter adaptation using iterative prototype refinement. + """ + + def __init__(self, inner_lr: float = 0.1, inner_steps: int = 5) -> None: + self.inner_lr = inner_lr + self.inner_steps = inner_steps + self.meta_params: Dict[str, Dict[str, float]] = {} # domain -> prototype offsets + + def meta_update(self, domain: str, support: List[Example]) -> int: + """Simulate inner-loop adaptation. Returns adaptation steps used.""" + class_sums: Dict[str, Dict[str, float]] = defaultdict(lambda: defaultdict(float)) + class_counts: Dict[str, int] = defaultdict(int) + for ex in support: + for k, v in ex.features.items(): + class_sums[ex.label][k] += v + class_counts[ex.label] += 1 + + offsets: Dict[str, float] = {} + for label, sums in class_sums.items(): + for k, total in sums.items(): + offsets[f"{label}:{k}"] = total / class_counts[label] * self.inner_lr + + existing = self.meta_params.get(domain, {}) + for key, val in offsets.items(): + existing[key] = existing.get(key, 0.0) * (1 - self.inner_lr) + val * self.inner_lr + self.meta_params[domain] = existing + return self.inner_steps + + def adapt(self, domain: str, query_features: Dict[str, float]) -> Dict[str, float]: + """Apply meta-parameters to shift query features for adaptation.""" + offsets = self.meta_params.get(domain, {}) + adapted = dict(query_features) + for key, offset in offsets.items(): + if ":" in key: + _, feat = key.split(":", 1) + if feat in adapted: + adapted[feat] += offset + return adapted + + +class MetaLearner: + """ + High-level adaptive meta-learner combining prototypical networks + and MAML-style adaptation for few-shot learning. + """ + + def __init__(self, inner_lr: float = 0.1, inner_steps: int = 5) -> None: + self.proto_net = PrototypicalNetwork() + self.maml = MAMLLearner(inner_lr=inner_lr, inner_steps=inner_steps) + self.task_history: List[Dict[str, Any]] = [] + self.domain_accuracy: Dict[str, List[float]] = defaultdict(list) + logger.info("MetaLearner initialized") + + def fit_task(self, task: Task) -> MetaLearningResult: + """Adapt to a task using support examples, then predict queries.""" + # Fit prototypical network from support set + self.proto_net.fit(task.support_set) + # MAML inner-loop adaptation + steps = self.maml.meta_update(task.domain, task.support_set) + + predictions: List[Dict[str, Any]] = [] + correct = 0 + for ex in task.query_set: + adapted_features = self.maml.adapt(task.domain, ex.features) + pred_label, conf = self.proto_net.predict(adapted_features) + predictions.append({ + "example_id": ex.example_id, + "predicted": pred_label, + "true": ex.label, + "confidence": conf, + }) + if pred_label == ex.label: + correct += 1 + + accuracy = correct / len(task.query_set) if task.query_set else 0.0 + self.domain_accuracy[task.domain].append(accuracy) + + result = MetaLearningResult( + task_id=task.task_id, + predictions=predictions, + accuracy=accuracy, + adaptation_steps=steps, + domain=task.domain, + metadata={"n_way": task.n_way, "k_shot": task.k_shot, "support_size": len(task.support_set)}, + ) + self.task_history.append({"task_id": task.task_id, "domain": task.domain, "accuracy": accuracy}) + logger.info("Task %s completed: accuracy=%.2f, domain=%s", task.task_id, accuracy, task.domain) + return result + + def few_shot_classify(self, support: List[Example], queries: List[Dict[str, float]], + domain: str = "general") -> List[Tuple[str, float]]: + """Convenience method for quick few-shot classification.""" + self.proto_net.fit(support) + self.maml.meta_update(domain, support) + results: List[Tuple[str, float]] = [] + for qf in queries: + adapted = self.maml.adapt(domain, qf) + label, conf = self.proto_net.predict(adapted) + results.append((label, conf)) + return results + + def online_update(self, new_example: Example) -> None: + """Incrementally update prototypes with a single new example.""" + existing = self.proto_net.prototypes.get(new_example.label, {}) + lr = 0.05 + for k, v in new_example.features.items(): + existing[k] = existing.get(k, v) * (1 - lr) + v * lr + self.proto_net.prototypes[new_example.label] = existing + + def get_domain_performance(self) -> Dict[str, Dict[str, float]]: + perf: Dict[str, Dict[str, float]] = {} + for domain, accs in self.domain_accuracy.items(): + perf[domain] = { + "mean_accuracy": statistics.mean(accs), + "min_accuracy": min(accs), + "max_accuracy": max(accs), + "task_count": len(accs), + } + return perf + + def recommend_k_shot(self, domain: str) -> int: + """Estimate optimal k-shot count based on domain history.""" + accs = self.domain_accuracy.get(domain, []) + if not accs or statistics.mean(accs) > 0.85: + return 3 + if statistics.mean(accs) > 0.70: + return 5 + return 10 + + @staticmethod + def text_to_features(text: str) -> Dict[str, float]: + """Convert text to a simple bag-of-words feature vector.""" + words = text.lower().split() + features: Dict[str, float] = defaultdict(float) + for word in words: + features[word] += 1.0 + total = sum(features.values()) or 1.0 + return {k: v / total for k, v in features.items()} diff --git a/agents/agi_agent/reasoning_engine.py b/agents/agi_agent/reasoning_engine.py new file mode 100644 index 0000000..22deb44 --- /dev/null +++ b/agents/agi_agent/reasoning_engine.py @@ -0,0 +1,299 @@ +"""Core AGI reasoning engine with contextual multi-domain problem solving.""" +from __future__ import annotations + +import logging +import math +import re +import time +import uuid +from abc import ABC, abstractmethod +from collections import defaultdict, deque +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class ReasoningContext: + """Holds context for a reasoning session.""" + session_id: str = field(default_factory=lambda: str(uuid.uuid4())) + domain: str = "general" + history: List[Dict[str, Any]] = field(default_factory=list) + constraints: Dict[str, Any] = field(default_factory=dict) + confidence_threshold: float = 0.7 + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class ReasoningStep: + """Single step in a reasoning chain.""" + step_id: str = field(default_factory=lambda: str(uuid.uuid4())) + description: str = "" + inputs: Dict[str, Any] = field(default_factory=dict) + outputs: Dict[str, Any] = field(default_factory=dict) + confidence: float = 1.0 + domain: str = "general" + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class ReasoningResult: + """Result of a reasoning process.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + conclusion: str = "" + steps: List[ReasoningStep] = field(default_factory=list) + confidence: float = 0.0 + domain: str = "general" + alternatives: List[str] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + +class ReasoningStrategy(ABC): + """Abstract base class for reasoning strategies.""" + + @abstractmethod + def reason(self, problem: str, context: ReasoningContext) -> ReasoningResult: + pass + + @abstractmethod + def can_handle(self, domain: str) -> bool: + pass + + +class DeductiveReasoner(ReasoningStrategy): + """Applies deductive reasoning: general rules -> specific conclusions.""" + + RULES: Dict[str, List[str]] = { + "infrastructure": ["high_cpu => scale_out", "low_memory => add_ram", "network_latency => check_routing"], + "security": ["open_port => vulnerability_check", "failed_auth => alert", "data_leak => quarantine"], + "performance": ["slow_query => optimize_index", "high_io => cache_layer", "memory_leak => restart_service"], + } + + def can_handle(self, domain: str) -> bool: + return domain in self.RULES or domain == "general" + + def reason(self, problem: str, context: ReasoningContext) -> ReasoningResult: + steps: List[ReasoningStep] = [] + rules = self.RULES.get(context.domain, [r for rules in self.RULES.values() for r in rules]) + + matched: List[str] = [] + for rule in rules: + antecedent, consequent = rule.split(" => ") + if antecedent.replace("_", " ") in problem.lower() or antecedent in problem.lower(): + matched.append(consequent) + steps.append(ReasoningStep( + description=f"Applied rule: {rule}", + inputs={"rule": rule, "problem": problem}, + outputs={"conclusion": consequent}, + confidence=0.85, + domain=context.domain, + )) + + conclusion = "; ".join(matched) if matched else "No direct rule matches found; applying heuristics." + confidence = min(0.95, 0.5 + 0.15 * len(matched)) + return ReasoningResult( + conclusion=conclusion, + steps=steps, + confidence=confidence, + domain=context.domain, + alternatives=[f"Consider: {r}" for r in rules if r not in matched][:3], + ) + + +class InductiveReasoner(ReasoningStrategy): + """Infers general patterns from specific observations.""" + + def can_handle(self, domain: str) -> bool: + return True + + def reason(self, problem: str, context: ReasoningContext) -> ReasoningResult: + steps: List[ReasoningStep] = [] + history_patterns = self._extract_patterns(context.history) + observations = self._tokenize(problem) + + pattern_matches = sum(1 for p in history_patterns if p in observations) + pattern_ratio = pattern_matches / max(len(history_patterns), 1) + + steps.append(ReasoningStep( + description="Extracted observations from problem statement", + inputs={"problem": problem}, + outputs={"observations": observations}, + confidence=0.9, + )) + steps.append(ReasoningStep( + description=f"Matched {pattern_matches}/{len(history_patterns)} historical patterns", + inputs={"patterns": history_patterns, "observations": observations}, + outputs={"match_ratio": pattern_ratio}, + confidence=0.75 + 0.2 * pattern_ratio, + )) + + conclusion = ( + f"Based on {len(context.history)} historical cases, pattern similarity is " + f"{pattern_ratio:.1%}. Recommend action consistent with similar past outcomes." + ) + return ReasoningResult( + conclusion=conclusion, + steps=steps, + confidence=0.6 + 0.3 * pattern_ratio, + domain=context.domain, + ) + + def _extract_patterns(self, history: List[Dict[str, Any]]) -> List[str]: + tokens: List[str] = [] + for entry in history[-10:]: + text = str(entry.get("problem", "")) + " " + str(entry.get("conclusion", "")) + tokens.extend(self._tokenize(text)) + return list(set(tokens)) + + def _tokenize(self, text: str) -> List[str]: + return [w.lower() for w in re.findall(r"\b\w{4,}\b", text)] + + +class AbductiveReasoner(ReasoningStrategy): + """Generates best-explanation hypotheses for observations.""" + + HYPOTHESES_TEMPLATES: List[str] = [ + "The most likely cause is {cause} given the observed {symptom}.", + "Hypothesis: {symptom} results from {cause} under current conditions.", + "Best explanation: {cause} accounts for the majority of observed {symptom}.", + ] + + def can_handle(self, domain: str) -> bool: + return True + + def reason(self, problem: str, context: ReasoningContext) -> ReasoningResult: + keywords = re.findall(r"\b\w{4,}\b", problem.lower()) + symptom = keywords[0] if keywords else "unknown_symptom" + cause = keywords[-1] if len(keywords) > 1 else "undetermined_cause" + + hypotheses = [ + t.format(cause=cause, symptom=symptom) for t in self.HYPOTHESES_TEMPLATES + ] + steps = [ + ReasoningStep( + description="Identified symptoms and generated causal hypotheses", + inputs={"problem": problem}, + outputs={"symptom": symptom, "cause": cause, "hypotheses": hypotheses}, + confidence=0.72, + ) + ] + return ReasoningResult( + conclusion=hypotheses[0], + steps=steps, + confidence=0.72, + domain=context.domain, + alternatives=hypotheses[1:], + ) + + +class ReasoningEngine: + """ + Core AGI reasoning engine combining multiple reasoning strategies + with adaptive selection, meta-learning integration, and multi-domain support. + """ + + DOMAIN_KEYWORDS: Dict[str, List[str]] = { + "infrastructure": ["server", "cpu", "memory", "network", "latency", "scale"], + "security": ["auth", "token", "vulnerability", "breach", "encrypt", "threat"], + "performance": ["slow", "latency", "throughput", "optimize", "bottleneck"], + "data": ["database", "query", "schema", "pipeline", "etl", "dataset"], + } + + def __init__(self) -> None: + self.strategies: List[ReasoningStrategy] = [ + DeductiveReasoner(), + InductiveReasoner(), + AbductiveReasoner(), + ] + self.reasoning_history: deque = deque(maxlen=100) + self.domain_performance: Dict[str, List[float]] = defaultdict(list) + logger.info("ReasoningEngine initialized with %d strategies", len(self.strategies)) + + def infer_domain(self, problem: str) -> str: + problem_lower = problem.lower() + scores: Dict[str, int] = {} + for domain, keywords in self.DOMAIN_KEYWORDS.items(): + scores[domain] = sum(1 for kw in keywords if kw in problem_lower) + best = max(scores, key=lambda d: scores[d]) + return best if scores[best] > 0 else "general" + + def solve(self, problem: str, context: Optional[ReasoningContext] = None) -> ReasoningResult: + if context is None: + context = ReasoningContext() + if context.domain == "general": + context.domain = self.infer_domain(problem) + + logger.debug("Solving problem in domain '%s': %.80s...", context.domain, problem) + start = time.perf_counter() + + applicable = [s for s in self.strategies if s.can_handle(context.domain)] + results: List[ReasoningResult] = [] + for strategy in applicable: + try: + result = strategy.reason(problem, context) + results.append(result) + except Exception as exc: + logger.warning("Strategy %s failed: %s", type(strategy).__name__, exc) + + best = max(results, key=lambda r: r.confidence) if results else ReasoningResult( + conclusion="Unable to reason about the problem.", + confidence=0.0, + ) + best.metadata["elapsed_ms"] = round((time.perf_counter() - start) * 1000, 2) + best.metadata["strategies_used"] = [type(s).__name__ for s in applicable] + + self.reasoning_history.append({"problem": problem, "domain": context.domain, "confidence": best.confidence}) + self.domain_performance[context.domain].append(best.confidence) + context.history.append({"problem": problem, "conclusion": best.conclusion, "confidence": best.confidence}) + + logger.info("Reasoning complete. Domain: %s, Confidence: %.2f", best.domain, best.confidence) + return best + + def multi_step_reason(self, problem: str, steps: int = 3, context: Optional[ReasoningContext] = None) -> List[ReasoningResult]: + """Iterative deepening reasoning over multiple passes.""" + if context is None: + context = ReasoningContext() + results: List[ReasoningResult] = [] + current_problem = problem + for i in range(steps): + result = self.solve(current_problem, context) + results.append(result) + current_problem = f"{result.conclusion} — further analysis required." + logger.debug("Multi-step iteration %d confidence: %.2f", i + 1, result.confidence) + if result.confidence >= context.confidence_threshold: + break + return results + + def domain_accuracy(self) -> Dict[str, float]: + return { + domain: sum(scores) / len(scores) + for domain, scores in self.domain_performance.items() + if scores + } + + def adaptive_select_strategy(self, domain: str) -> ReasoningStrategy: + """Select best-performing strategy for domain based on history.""" + history = [h for h in self.reasoning_history if h["domain"] == domain] + if not history: + return self.strategies[0] + avg_confidence = sum(h["confidence"] for h in history) / len(history) + idx = min(int(avg_confidence * len(self.strategies)), len(self.strategies) - 1) + return self.strategies[idx] + + def explain(self, result: ReasoningResult) -> str: + """Return human-readable explanation of a reasoning result.""" + lines = [ + f"Conclusion: {result.conclusion}", + f"Confidence: {result.confidence:.1%}", + f"Domain: {result.domain}", + f"Reasoning steps ({len(result.steps)}):", + ] + for i, step in enumerate(result.steps, 1): + lines.append(f" {i}. {step.description} (confidence: {step.confidence:.1%})") + if result.alternatives: + lines.append("Alternatives considered:") + for alt in result.alternatives: + lines.append(f" - {alt}") + return "\n".join(lines) diff --git a/agents/agi_agent/transfer_learning.py b/agents/agi_agent/transfer_learning.py new file mode 100644 index 0000000..1b3c593 --- /dev/null +++ b/agents/agi_agent/transfer_learning.py @@ -0,0 +1,254 @@ +"""Cross-domain knowledge transfer module.""" +from __future__ import annotations + +import logging +import math +import statistics +import uuid +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class DomainKnowledge: + """Encapsulates knowledge learned in a specific domain.""" + domain_id: str = field(default_factory=lambda: str(uuid.uuid4())) + domain_name: str = "" + features: Dict[str, float] = field(default_factory=dict) # feature weights + rules: List[str] = field(default_factory=list) + examples: List[Dict[str, Any]] = field(default_factory=list) + performance: float = 0.0 + created_at: datetime = field(default_factory=datetime.utcnow) + updated_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class TransferConfig: + """Configuration for a transfer learning operation.""" + source_domain: str = "" + target_domain: str = "" + transfer_ratio: float = 0.5 # how much source knowledge to transfer + freeze_layers: bool = False + fine_tune_steps: int = 10 + similarity_threshold: float = 0.3 # minimum similarity to transfer + + +@dataclass +class TransferResult: + """Outcome of a transfer learning operation.""" + transfer_id: str = field(default_factory=lambda: str(uuid.uuid4())) + source_domain: str = "" + target_domain: str = "" + transferred_features: Dict[str, float] = field(default_factory=dict) + similarity_score: float = 0.0 + improvement: float = 0.0 + steps_taken: int = 0 + metadata: Dict[str, Any] = field(default_factory=dict) + + +def _feature_similarity(a: Dict[str, float], b: Dict[str, float]) -> float: + """Jaccard-weighted similarity between two feature dictionaries.""" + all_keys = set(a) | set(b) + if not all_keys: + return 0.0 + common_keys = set(a) & set(b) + if not common_keys: + return 0.0 + weight_sim = sum( + 1.0 - abs(a[k] - b[k]) / (max(abs(a[k]), abs(b[k])) + 1e-9) + for k in common_keys + ) / len(all_keys) + jaccard = len(common_keys) / len(all_keys) + return (jaccard + weight_sim) / 2.0 + + +class FeatureAligner: + """Aligns feature spaces between source and target domains.""" + + def align(self, source_features: Dict[str, float], + target_features: Dict[str, float], + transfer_ratio: float) -> Dict[str, float]: + """ + Produce aligned features by blending source into target. + Features existing in target are kept; new features from source are added at reduced weight. + """ + aligned = dict(target_features) + for key, value in source_features.items(): + if key in aligned: + # Blend existing feature + aligned[key] = aligned[key] * (1 - transfer_ratio) + value * transfer_ratio + else: + # Import new feature at reduced weight + aligned[key] = value * transfer_ratio * 0.5 + return aligned + + +class DomainAdapter: + """Adapts source domain knowledge to a target domain via fine-tuning simulation.""" + + def __init__(self, fine_tune_steps: int = 10, lr: float = 0.1) -> None: + self.fine_tune_steps = fine_tune_steps + self.lr = lr + + def fine_tune(self, transferred: Dict[str, float], + target_examples: List[Dict[str, Any]]) -> Tuple[Dict[str, float], int]: + """ + Simulate fine-tuning by nudging feature weights towards + the centroid of target domain examples. + """ + if not target_examples: + return transferred, 0 + + # Compute target centroid + all_keys = set(k for ex in target_examples for k in ex.get("features", {})) + centroid: Dict[str, float] = { + k: statistics.mean(ex.get("features", {}).get(k, 0.0) for ex in target_examples) + for k in all_keys + } + adapted = dict(transferred) + for step in range(self.fine_tune_steps): + grad_norm = 0.0 + for k in all_keys: + target_v = centroid.get(k, 0.0) + current_v = adapted.get(k, 0.0) + delta = self.lr * (target_v - current_v) + adapted[k] = current_v + delta + grad_norm += delta ** 2 + if math.sqrt(grad_norm) < 1e-4: + return adapted, step + 1 + return adapted, self.fine_tune_steps + + +class TransferLearning: + """ + Cross-domain knowledge transfer system. + Supports domain registration, similarity computation, + feature alignment, and fine-tuning-based adaptation. + """ + + def __init__(self) -> None: + self._domains: Dict[str, DomainKnowledge] = {} + self._transfer_history: List[TransferResult] = [] + self.aligner = FeatureAligner() + self.adapter = DomainAdapter() + logger.info("TransferLearning system initialized") + + def register_domain(self, domain_name: str, features: Optional[Dict[str, float]] = None, + rules: Optional[List[str]] = None) -> DomainKnowledge: + dk = DomainKnowledge(domain_name=domain_name, features=features or {}, + rules=rules or []) + self._domains[domain_name] = dk + logger.debug("Registered domain '%s'", domain_name) + return dk + + def update_domain(self, domain_name: str, new_features: Dict[str, float], + performance: float = 0.0) -> bool: + dk = self._domains.get(domain_name) + if not dk: + return False + lr = 0.1 + for k, v in new_features.items(): + dk.features[k] = dk.features.get(k, v) * (1 - lr) + v * lr + dk.performance = performance + dk.updated_at = datetime.utcnow() + return True + + def domain_similarity(self, source: str, target: str) -> float: + """Compute feature similarity between two registered domains.""" + src = self._domains.get(source) + tgt = self._domains.get(target) + if not src or not tgt: + return 0.0 + return _feature_similarity(src.features, tgt.features) + + def transfer(self, config: TransferConfig, + target_examples: Optional[List[Dict[str, Any]]] = None) -> TransferResult: + """ + Transfer knowledge from source domain to target domain. + """ + source_dk = self._domains.get(config.source_domain) + target_dk = self._domains.get(config.target_domain) + + if not source_dk: + logger.warning("Source domain '%s' not found", config.source_domain) + return TransferResult(source_domain=config.source_domain, + target_domain=config.target_domain, similarity_score=0.0) + + sim = self.domain_similarity(config.source_domain, config.target_domain) if target_dk else 0.0 + if sim < config.similarity_threshold and target_dk: + logger.info("Similarity %.2f below threshold %.2f; skipping transfer", + sim, config.similarity_threshold) + return TransferResult(source_domain=config.source_domain, + target_domain=config.target_domain, + similarity_score=sim, + metadata={"skipped": True, "reason": "low_similarity"}) + + target_features = target_dk.features if target_dk else {} + aligned = self.aligner.align(source_dk.features, target_features, config.transfer_ratio) + + steps = 0 + if target_examples and not config.freeze_layers: + aligned, steps = self.adapter.fine_tune(aligned, target_examples) + + if not target_dk: + target_dk = self.register_domain(config.target_domain, aligned) + else: + target_dk.features = aligned + target_dk.updated_at = datetime.utcnow() + + # Simulate improvement + improvement = max(0.0, sim * config.transfer_ratio * (1 + steps * 0.01)) + + result = TransferResult( + source_domain=config.source_domain, + target_domain=config.target_domain, + transferred_features=aligned, + similarity_score=sim, + improvement=improvement, + steps_taken=steps, + metadata={"transfer_ratio": config.transfer_ratio, "fine_tune_steps": steps}, + ) + self._transfer_history.append(result) + logger.info("Transferred from '%s' to '%s': sim=%.2f, improvement=%.2f", + config.source_domain, config.target_domain, sim, improvement) + return result + + def auto_transfer(self, target_domain: str, + target_examples: Optional[List[Dict[str, Any]]] = None) -> List[TransferResult]: + """Automatically find the most similar source domain and transfer.""" + scores = { + name: _feature_similarity(dk.features, self._domains.get(target_domain, DomainKnowledge()).features) + for name, dk in self._domains.items() + if name != target_domain + } + if not scores: + return [] + best_source = max(scores, key=lambda n: scores[n]) + config = TransferConfig( + source_domain=best_source, + target_domain=target_domain, + transfer_ratio=0.4, + similarity_threshold=0.1, + ) + return [self.transfer(config, target_examples)] + + def get_transfer_history(self, domain: Optional[str] = None) -> List[TransferResult]: + if domain: + return [r for r in self._transfer_history + if r.source_domain == domain or r.target_domain == domain] + return list(self._transfer_history) + + def list_domains(self) -> List[str]: + return list(self._domains.keys()) + + @property + def stats(self) -> Dict[str, Any]: + return { + "registered_domains": len(self._domains), + "total_transfers": len(self._transfer_history), + "domain_names": self.list_domains(), + } diff --git a/agents/nlp_agent/__init__.py b/agents/nlp_agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agents/nlp_agent/doc_parser.py b/agents/nlp_agent/doc_parser.py new file mode 100644 index 0000000..bec945e --- /dev/null +++ b/agents/nlp_agent/doc_parser.py @@ -0,0 +1,251 @@ +"""Technical document parser for NLP.""" +from __future__ import annotations + +import logging +import re +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class DocSection: + """A parsed section of a document.""" + section_id: str = field(default_factory=lambda: str(uuid.uuid4())) + heading: str = "" + level: int = 1 + content: str = "" + subsections: List["DocSection"] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class CodeBlock: + """A code block extracted from a document.""" + language: str = "" + code: str = "" + line_start: int = 0 + line_end: int = 0 + + +@dataclass +class ParsedDocument: + """Fully parsed document structure.""" + doc_id: str = field(default_factory=lambda: str(uuid.uuid4())) + title: str = "" + doc_type: str = "unknown" # "markdown" | "rst" | "text" | "yaml" | "json" | "log" + sections: List[DocSection] = field(default_factory=list) + code_blocks: List[CodeBlock] = field(default_factory=list) + tables: List[List[List[str]]] = field(default_factory=list) + links: List[str] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + word_count: int = 0 + parsed_at: datetime = field(default_factory=datetime.utcnow) + + +def _count_words(text: str) -> int: + return len(re.findall(r"\b\w+\b", text)) + + +def _extract_links(text: str) -> List[str]: + return re.findall(r"https?://[^\s\)\"'>]+", text) + + +def _extract_code_blocks_markdown(text: str) -> Tuple[List[CodeBlock], str]: + blocks: List[CodeBlock] = [] + lines = text.split("\n") + cleaned_lines: List[str] = [] + in_block = False + language = "" + block_lines: List[str] = [] + start_line = 0 + + for i, line in enumerate(lines): + if not in_block and re.match(r"^```(\w*)", line): + in_block = True + language = re.match(r"^```(\w*)", line).group(1) or "text" + block_lines = [] + start_line = i + elif in_block and line.strip() == "```": + blocks.append(CodeBlock(language=language, code="\n".join(block_lines), + line_start=start_line, line_end=i)) + in_block = False + block_lines = [] + elif in_block: + block_lines.append(line) + else: + cleaned_lines.append(line) + + return blocks, "\n".join(cleaned_lines) + + +def _parse_markdown_sections(text: str) -> Tuple[str, List[DocSection]]: + lines = text.split("\n") + title = "" + sections: List[DocSection] = [] + current_section: Optional[DocSection] = None + current_content: List[str] = [] + + for line in lines: + heading_match = re.match(r"^(#{1,6})\s+(.+)", line) + if heading_match: + if current_section: + current_section.content = "\n".join(current_content).strip() + sections.append(current_section) + level = len(heading_match.group(1)) + heading = heading_match.group(2).strip() + if level == 1 and not title: + title = heading + current_section = DocSection(heading=heading, level=level) + current_content = [] + else: + current_content.append(line) + + if current_section: + current_section.content = "\n".join(current_content).strip() + sections.append(current_section) + + return title, sections + + +def _parse_markdown_tables(text: str) -> List[List[List[str]]]: + tables: List[List[List[str]]] = [] + lines = text.split("\n") + table_lines: List[str] = [] + in_table = False + + for line in lines: + if re.match(r"^\|.+\|", line): + in_table = True + if not re.match(r"^\|[-| ]+\|", line): + table_lines.append(line) + else: + if in_table and table_lines: + table = [[cell.strip() for cell in row.strip("|").split("|")] + for row in table_lines] + tables.append(table) + table_lines = [] + in_table = False + + if in_table and table_lines: + tables.append([[cell.strip() for cell in row.strip("|").split("|")] + for row in table_lines]) + return tables + + +def _parse_log_document(text: str) -> List[DocSection]: + patterns = { + "ERROR": re.compile(r"\b(ERROR|CRITICAL|FATAL)\b"), + "WARNING": re.compile(r"\bWARN(?:ING)?\b"), + "INFO": re.compile(r"\bINFO\b"), + } + sections: List[DocSection] = [] + for level, pattern in patterns.items(): + matching_lines = [line for line in text.split("\n") if pattern.search(line)] + if matching_lines: + sections.append(DocSection( + heading=f"{level} Log Entries", + level=2, + content="\n".join(matching_lines[:50]), + metadata={"entry_count": len(matching_lines), "level": level}, + )) + return sections + + +def _detect_doc_type(text: str) -> str: + text_strip = text.strip() + if text_strip.startswith("#") or re.search(r"^#{1,6}\s", text_strip, re.MULTILINE): + return "markdown" + if re.match(r"^[-=]+\n", text_strip) or re.search(r"\n[-=]+\n", text_strip): + return "rst" + if re.search(r"^\s*[\{\[]", text_strip): + return "json" + if re.match(r"^\w+:", text_strip) or re.search(r"\n\w+:", text_strip): + return "yaml" + if re.search(r"\b(ERROR|WARN|INFO|DEBUG)\b", text_strip): + return "log" + return "text" + + +class DocParser: + """ + Technical document parser supporting Markdown, RST, logs, + YAML, JSON, and plain text with section extraction, code blocks, + tables, and link detection. + """ + + def __init__(self) -> None: + self._custom_extractors: List[Any] = [] + logger.info("DocParser initialized") + + def parse(self, text: str, doc_type: Optional[str] = None) -> ParsedDocument: + if not text.strip(): + return ParsedDocument(doc_type="empty", word_count=0) + + detected_type = doc_type or _detect_doc_type(text) + code_blocks: List[CodeBlock] = [] + sections: List[DocSection] = [] + tables: List[List[List[str]]] = [] + title = "" + + if detected_type == "markdown": + code_blocks, clean_text = _extract_code_blocks_markdown(text) + tables = _parse_markdown_tables(clean_text) + title, sections = _parse_markdown_sections(clean_text) + elif detected_type == "log": + sections = _parse_log_document(text) + clean_text = text + elif detected_type in ("yaml", "json"): + sections = [DocSection(heading="Content", level=1, content=text)] + clean_text = text + else: + paragraphs = [p.strip() for p in re.split(r"\n\n+", text) if p.strip()] + sections = [DocSection(heading=f"Section {i+1}", level=1, content=p) + for i, p in enumerate(paragraphs)] + clean_text = text + + links = _extract_links(text) + word_count = _count_words(text) + + doc = ParsedDocument( + title=title or "Untitled", + doc_type=detected_type, + sections=sections, + code_blocks=code_blocks, + tables=tables, + links=links, + word_count=word_count, + metadata={ + "char_count": len(text), + "section_count": len(sections), + "code_block_count": len(code_blocks), + "table_count": len(tables), + "link_count": len(links), + }, + ) + logger.debug("Parsed %s document: %d sections, %d code blocks", + detected_type, len(sections), len(code_blocks)) + return doc + + def extract_metadata(self, text: str) -> Dict[str, Any]: + """Extract key-value metadata from YAML frontmatter or header comments.""" + metadata: Dict[str, Any] = {} + frontmatter_match = re.match(r"^---\n(.+?)\n---", text, re.DOTALL) + if frontmatter_match: + for line in frontmatter_match.group(1).split("\n"): + kv = re.match(r"(\w+):\s*(.+)", line) + if kv: + metadata[kv.group(1)] = kv.group(2).strip() + return metadata + + def batch_parse(self, texts: List[str]) -> List[ParsedDocument]: + return [self.parse(t) for t in texts] + + def to_plain_text(self, doc: ParsedDocument) -> str: + parts = [doc.title] if doc.title and doc.title != "Untitled" else [] + for section in doc.sections: + parts.append(f"\n{section.heading}\n{section.content}") + return "\n".join(parts) diff --git a/agents/nlp_agent/qa_system.py b/agents/nlp_agent/qa_system.py new file mode 100644 index 0000000..cee41b7 --- /dev/null +++ b/agents/nlp_agent/qa_system.py @@ -0,0 +1,223 @@ +"""Question answering system for NLP.""" +from __future__ import annotations + +import logging +import re +import uuid +from collections import Counter +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Document: + """A document in the knowledge base.""" + doc_id: str = field(default_factory=lambda: str(uuid.uuid4())) + title: str = "" + content: str = "" + source: str = "" + tags: List[str] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class Answer: + """An answer to a question.""" + answer_id: str = field(default_factory=lambda: str(uuid.uuid4())) + question: str = "" + answer_text: str = "" + source_doc_id: str = "" + source_passage: str = "" + confidence: float = 0.0 + answer_type: str = "extractive" # "extractive" | "generative" | "no_answer" + metadata: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +_STOPWORDS = { + "a", "an", "the", "is", "are", "was", "were", "be", "been", + "what", "who", "where", "when", "why", "how", "which", + "does", "do", "did", "will", "would", "could", "should", + "can", "may", "might", "have", "has", "had", "it", "its", + "i", "you", "we", "they", "he", "she", "in", "on", "at", + "to", "for", "of", "with", "by", "from", "and", "or", "but", +} + +_QUESTION_TYPES: Dict[str, List[str]] = { + "factual": ["what", "who", "where", "when", "which"], + "reasoning": ["why", "how"], + "boolean": ["is", "are", "does", "do", "can", "will", "has"], + "quantitative": ["how many", "how much", "how often", "what percentage"], +} + + +def _tokenize(text: str) -> List[str]: + return [w.lower() for w in re.findall(r"\b\w+\b", text)] + + +def _clean_tokens(tokens: List[str]) -> List[str]: + return [t for t in tokens if t not in _STOPWORDS and len(t) > 2] + + +def _tfidf_similarity(query_tokens: List[str], doc_tokens: List[str]) -> float: + if not query_tokens or not doc_tokens: + return 0.0 + query_set = Counter(query_tokens) + doc_counter = Counter(doc_tokens) + doc_len = len(doc_tokens) + score = 0.0 + for token, qcount in query_set.items(): + tf = doc_counter.get(token, 0) / doc_len + score += tf * qcount + return score / (len(query_set) + 1e-9) + + +def _split_sentences(text: str) -> List[str]: + return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if len(s.strip()) > 10] + + +def _classify_question(question: str) -> str: + q_lower = question.lower().strip() + for qtype, starters in _QUESTION_TYPES.items(): + if any(q_lower.startswith(s) for s in starters): + return qtype + return "factual" + + +class TFIDFRetriever: + """Retrieves relevant passages using TF-IDF similarity.""" + + def retrieve(self, query: str, documents: List[Document], top_k: int = 3) -> List[Tuple[Document, float, str]]: + query_tokens = _clean_tokens(_tokenize(query)) + scored: List[Tuple[float, Document, str]] = [] + + for doc in documents: + sentences = _split_sentences(doc.content) + for sent in sentences: + sent_tokens = _clean_tokens(_tokenize(sent)) + score = _tfidf_similarity(query_tokens, sent_tokens) + if score > 0: + scored.append((score, doc, sent)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [(doc, score, passage) for score, doc, passage in scored[:top_k]] + + +class ExtractiveReader: + """Extracts the answer span from a passage.""" + + def read(self, question: str, passage: str) -> Tuple[str, float]: + q_tokens = _clean_tokens(_tokenize(question)) + sentences = _split_sentences(passage) + if not sentences: + return passage[:200], 0.3 + + # Score each sentence by overlap with question keywords + scored: List[Tuple[float, str]] = [] + for sent in sentences: + sent_tokens = _clean_tokens(_tokenize(sent)) + overlap = len(set(q_tokens) & set(sent_tokens)) + score = overlap / max(len(q_tokens), 1) + scored.append((score, sent)) + + scored.sort(reverse=True) + best_score, best_sent = scored[0] + return best_sent, min(0.95, 0.4 + best_score * 0.6) + + +class BooleanAnswerer: + """Handles yes/no questions.""" + + _POSITIVE_SIGNALS = {"yes", "true", "correct", "always", "does", "is", "can", "will", "has"} + _NEGATIVE_SIGNALS = {"no", "not", "false", "never", "cannot", "won't", "doesn't", "isn't"} + + def answer(self, question: str, passage: str) -> Tuple[str, float]: + passage_lower = passage.lower() + pos = sum(1 for w in self._POSITIVE_SIGNALS if w in passage_lower) + neg = sum(1 for w in self._NEGATIVE_SIGNALS if w in passage_lower) + if pos > neg: + return "Yes", 0.6 + min(0.3, pos * 0.05) + if neg > pos: + return "No", 0.6 + min(0.3, neg * 0.05) + return "It depends on the context.", 0.4 + + +class QASystem: + """ + Question answering system combining TF-IDF retrieval and + extractive reading with support for multiple QA modes. + """ + + def __init__(self) -> None: + self._documents: List[Document] = [] + self._retriever = TFIDFRetriever() + self._reader = ExtractiveReader() + self._boolean = BooleanAnswerer() + logger.info("QASystem initialized") + + def add_document(self, title: str, content: str, source: str = "", + tags: Optional[List[str]] = None) -> Document: + doc = Document(title=title, content=content, source=source, tags=tags or []) + self._documents.append(doc) + logger.debug("Added document '%s' (%d chars)", title, len(content)) + return doc + + def add_documents(self, docs: List[Dict[str, Any]]) -> List[Document]: + return [self.add_document(**d) for d in docs] + + def answer(self, question: str, top_k: int = 3) -> Answer: + if not self._documents: + return Answer(question=question, answer_text="No knowledge base loaded.", + confidence=0.0, answer_type="no_answer") + + question_type = _classify_question(question) + retrieved = self._retriever.retrieve(question, self._documents, top_k=top_k) + + if not retrieved: + return Answer(question=question, answer_text="I couldn't find relevant information.", + confidence=0.1, answer_type="no_answer") + + best_doc, retrieval_score, best_passage = retrieved[0] + + if question_type == "boolean": + answer_text, reader_confidence = self._boolean.answer(question, best_passage) + else: + answer_text, reader_confidence = self._reader.read(question, best_passage) + + confidence = (retrieval_score * 0.5 + reader_confidence * 0.5) + return Answer( + question=question, + answer_text=answer_text, + source_doc_id=best_doc.doc_id, + source_passage=best_passage[:300], + confidence=confidence, + answer_type="extractive", + metadata={ + "question_type": question_type, + "retrieval_score": retrieval_score, + "source_title": best_doc.title, + "candidates": len(retrieved), + }, + ) + + def batch_answer(self, questions: List[str]) -> List[Answer]: + return [self.answer(q) for q in questions] + + def search_documents(self, query: str, top_k: int = 5) -> List[Tuple[Document, float]]: + retrieved = self._retriever.retrieve(query, self._documents, top_k=top_k) + seen: Dict[str, float] = {} + for doc, score, _ in retrieved: + if doc.doc_id not in seen or seen[doc.doc_id] < score: + seen[doc.doc_id] = score + return [(next(d for d in self._documents if d.doc_id == did), score) + for did, score in sorted(seen.items(), key=lambda x: -x[1])] + + def clear(self) -> None: + self._documents.clear() + + @property + def document_count(self) -> int: + return len(self._documents) diff --git a/agents/nlp_agent/summarizer.py b/agents/nlp_agent/summarizer.py new file mode 100644 index 0000000..c7e17b0 --- /dev/null +++ b/agents/nlp_agent/summarizer.py @@ -0,0 +1,195 @@ +"""Document summarization module.""" +from __future__ import annotations + +import logging +import re +import uuid +from collections import Counter +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class SummaryConfig: + """Configuration for summarization.""" + max_sentences: int = 5 + max_words: int = 150 + method: str = "extractive" # "extractive" | "abstractive" | "hybrid" + focus_keywords: List[str] = field(default_factory=list) + include_title: bool = True + + +@dataclass +class SummaryResult: + """Output of document summarization.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + source_text: str = "" + summary: str = "" + compression_ratio: float = 0.0 + sentence_count: int = 0 + word_count: int = 0 + key_phrases: List[str] = field(default_factory=list) + method: str = "extractive" + metadata: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +_STOPWORDS = { + "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", + "of", "with", "by", "from", "is", "are", "was", "were", "be", "been", + "being", "have", "has", "had", "do", "does", "did", "will", "would", + "could", "should", "may", "might", "this", "that", "these", "those", + "it", "its", "i", "you", "he", "she", "we", "they", "my", "your", + "their", "our", "not", "no", "so", "if", "as", "up", "out", "about", +} + + +def _tokenize_sentences(text: str) -> List[str]: + sentences = re.split(r"(?<=[.!?])\s+", text.strip()) + return [s.strip() for s in sentences if len(s.strip()) > 20] + + +def _tokenize_words(text: str) -> List[str]: + return [w.lower() for w in re.findall(r"\b[a-zA-Z]\w+\b", text)] + + +def _word_frequency(words: List[str]) -> Dict[str, float]: + filtered = [w for w in words if w not in _STOPWORDS and len(w) > 2] + counts = Counter(filtered) + max_count = max(counts.values()) if counts else 1 + return {word: count / max_count for word, count in counts.items()} + + +def _sentence_score(sentence: str, word_freq: Dict[str, float], + focus_keywords: List[str]) -> float: + words = _tokenize_words(sentence) + base_score = sum(word_freq.get(w, 0.0) for w in words) / max(len(words), 1) + keyword_bonus = sum(2.0 for kw in focus_keywords if kw.lower() in sentence.lower()) + position_factor = 1.0 # adjusted by caller + return base_score + keyword_bonus * 0.2 * position_factor + + +class ExtractiveSummarizer: + """TextRank-inspired extractive summarizer.""" + + def summarize(self, text: str, config: SummaryConfig) -> str: + sentences = _tokenize_sentences(text) + if not sentences: + return text[:config.max_words * 6] + + words = _tokenize_words(text) + freq = _word_frequency(words) + n = len(sentences) + + scored: List[Tuple[float, int, str]] = [] + for idx, sent in enumerate(sentences): + # Boost first and last sentences (often contain key info) + position_bonus = 1.3 if idx == 0 else (1.1 if idx == n - 1 else 1.0) + score = _sentence_score(sent, freq, config.focus_keywords) * position_bonus + scored.append((score, idx, sent)) + + scored.sort(key=lambda x: x[0], reverse=True) + top_sentences = sorted(scored[:config.max_sentences], key=lambda x: x[1]) + + summary = " ".join(sent for _, _, sent in top_sentences) + # Truncate to word limit + words_out = summary.split() + if len(words_out) > config.max_words: + summary = " ".join(words_out[:config.max_words]) + "..." + return summary + + +class AbstractiveSummarizer: + """Template-based abstractive summarizer (simulated).""" + + def summarize(self, text: str, config: SummaryConfig) -> str: + words = _tokenize_words(text) + freq = _word_frequency(words) + top_words = sorted(freq, key=lambda w: freq[w], reverse=True)[:10] + + sentences = _tokenize_sentences(text) + n_sent = len(sentences) + + if n_sent == 0: + return "No content to summarize." + + # Pick the highest-scoring sentence as "thesis" + scored = [(sum(freq.get(w, 0) for w in _tokenize_words(s)) / max(len(_tokenize_words(s)), 1), s) + for s in sentences] + thesis = max(scored, key=lambda x: x[0])[1] if scored else sentences[0] + + key_topics = ", ".join(top_words[:5]) + summary = f"{thesis} The key topics discussed include: {key_topics}." + words_out = summary.split() + if len(words_out) > config.max_words: + summary = " ".join(words_out[:config.max_words]) + "..." + return summary + + +class KeyPhraseExtractor: + """Extracts key phrases using n-gram frequency analysis.""" + + def extract(self, text: str, n: int = 10) -> List[str]: + words = _tokenize_words(text) + freq = _word_frequency(words) + # Bigrams + bigrams = [f"{words[i]} {words[i+1]}" for i in range(len(words) - 1) + if words[i] not in _STOPWORDS and words[i+1] not in _STOPWORDS] + bigram_counts = Counter(bigrams) + top_bigrams = [bg for bg, _ in bigram_counts.most_common(n // 2)] + top_words = [w for w, _ in sorted(freq.items(), key=lambda x: x[1], reverse=True) + if w not in _STOPWORDS][:n // 2] + return list(dict.fromkeys(top_bigrams + top_words))[:n] + + +class Summarizer: + """ + Document summarization pipeline supporting extractive, abstractive, + and hybrid modes with key phrase extraction. + """ + + def __init__(self) -> None: + self._extractive = ExtractiveSummarizer() + self._abstractive = AbstractiveSummarizer() + self._key_phrase = KeyPhraseExtractor() + logger.info("Summarizer initialized") + + def summarize(self, text: str, config: Optional[SummaryConfig] = None) -> SummaryResult: + if config is None: + config = SummaryConfig() + if not text.strip(): + return SummaryResult(source_text=text, summary="Empty document.") + + if config.method == "extractive": + summary = self._extractive.summarize(text, config) + elif config.method == "abstractive": + summary = self._abstractive.summarize(text, config) + else: # hybrid + ext = self._extractive.summarize(text, SummaryConfig(max_sentences=3, max_words=100)) + abst = self._abstractive.summarize(text, SummaryConfig(max_sentences=2, max_words=60)) + summary = ext + " " + abst + + key_phrases = self._key_phrase.extract(text) + src_words = len(text.split()) + sum_words = len(summary.split()) + + return SummaryResult( + source_text=text[:200] + "..." if len(text) > 200 else text, + summary=summary, + compression_ratio=1 - sum_words / max(src_words, 1), + sentence_count=len(_tokenize_sentences(summary)), + word_count=sum_words, + key_phrases=key_phrases, + method=config.method, + metadata={"source_words": src_words, "summary_words": sum_words}, + ) + + def batch_summarize(self, texts: List[str], config: Optional[SummaryConfig] = None) -> List[SummaryResult]: + return [self.summarize(t, config) for t in texts] + + def summarize_sections(self, sections: Dict[str, str]) -> Dict[str, str]: + """Summarize each section of a structured document.""" + return {title: self.summarize(content).summary for title, content in sections.items()} diff --git a/agents/nlp_agent/text_generator.py b/agents/nlp_agent/text_generator.py new file mode 100644 index 0000000..fff5952 --- /dev/null +++ b/agents/nlp_agent/text_generator.py @@ -0,0 +1,224 @@ +"""GPT-based text generation (simulated with template/Markov approach).""" +from __future__ import annotations + +import logging +import random +import re +import uuid +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class GenerationConfig: + """Configuration for text generation.""" + max_tokens: int = 200 + temperature: float = 0.7 # 0=deterministic, 1=creative + top_k: int = 50 + top_p: float = 0.9 + repetition_penalty: float = 1.2 + stop_sequences: List[str] = field(default_factory=list) + style: str = "neutral" # "formal" | "casual" | "technical" | "neutral" + + +@dataclass +class GenerationResult: + """Result of a text generation request.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + prompt: str = "" + generated_text: str = "" + tokens_generated: int = 0 + finish_reason: str = "length" # "length" | "stop_sequence" | "complete" + model: str = "gpt-simulated" + metadata: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +# Template-based generation for different styles and domains +_TEMPLATES: Dict[str, List[str]] = { + "technical": [ + "The {subject} system operates by {verb}ing the {object} through a {adjective} pipeline. " + "Key considerations include {consideration1} and {consideration2}.", + "To {verb} the {subject}, you should first configure {object} with {adjective} settings. " + "This ensures {consideration1}.", + "The {adjective} {subject} leverages {object} to achieve {consideration1}. " + "Performance can be improved by {consideration2}.", + ], + "formal": [ + "It is recommended that the {subject} be {verb}ed in accordance with {object} protocols. " + "Furthermore, {consideration1} must be taken into account.", + "The implementation of {adjective} {subject} requires careful attention to {object}. " + "Stakeholders should note {consideration1}.", + ], + "casual": [ + "So basically, {subject} is about {verb}ing your {object} in a {adjective} way. " + "Just remember to check {consideration1}!", + "Hey, when you're working with {subject}, you want to {verb} the {object} properly. " + "The {adjective} part is {consideration1}.", + ], + "neutral": [ + "{subject} {verb}s the {object} using {adjective} methods. {consideration1}.", + "When {verb}ing {subject}, the {object} plays a {adjective} role. {consideration1}.", + ], +} + +_DOMAIN_VOCAB: Dict[str, Dict[str, List[str]]] = { + "infrastructure": { + "subject": ["cluster", "node", "service mesh", "load balancer", "container", "microservice"], + "verb": ["deploy", "scale", "monitor", "configure", "provision", "orchestrate"], + "object": ["infrastructure", "workloads", "traffic", "resources", "configurations"], + "adjective": ["distributed", "highly available", "fault-tolerant", "scalable", "elastic"], + "consideration1": ["ensure high availability", "minimize latency", "optimize resource usage"], + "consideration2": ["implement circuit breakers", "use health checks", "set resource limits"], + }, + "ml": { + "subject": ["model", "training pipeline", "feature store", "inference engine"], + "verb": ["train", "evaluate", "deploy", "fine-tune", "validate", "optimize"], + "object": ["dataset", "hyperparameters", "model weights", "predictions"], + "adjective": ["pre-trained", "deep learning", "gradient-boosted", "transformer-based"], + "consideration1": ["prevent overfitting", "ensure data quality", "monitor drift"], + "consideration2": ["use cross-validation", "implement early stopping", "log metrics"], + }, + "general": { + "subject": ["system", "application", "platform", "service", "component"], + "verb": ["process", "handle", "manage", "coordinate", "execute", "implement"], + "object": ["requests", "data", "workflows", "resources", "operations"], + "adjective": ["robust", "efficient", "reliable", "scalable", "maintainable"], + "consideration1": ["error handling", "performance optimization", "security best practices"], + "consideration2": ["logging and monitoring", "testing coverage", "documentation"], + }, +} + + +class MarkovChainGenerator: + """Simple bigram Markov chain text generator trained on input corpus.""" + + def __init__(self, seed: Optional[int] = None) -> None: + self._chain: Dict[Tuple[str, ...], List[str]] = defaultdict(list) + self._start_tokens: List[str] = [] + self._rng = random.Random(seed) + + def train(self, texts: List[str], n: int = 2) -> None: + for text in texts: + tokens = text.split() + if len(tokens) < n + 1: + continue + self._start_tokens.append(tokens[0]) + for i in range(len(tokens) - n): + key = tuple(tokens[i:i + n]) + self._chain[key].append(tokens[i + n]) + logger.debug("MarkovChain trained: %d states", len(self._chain)) + + def generate(self, max_tokens: int = 50, seed_token: Optional[str] = None) -> str: + if not self._chain: + return "" + keys = list(self._chain.keys()) + if seed_token: + matching = [k for k in keys if seed_token.lower() in " ".join(k).lower()] + start = self._rng.choice(matching) if matching else self._rng.choice(keys) + else: + start = self._rng.choice(keys) + + tokens = list(start) + for _ in range(max_tokens - len(start)): + key = tuple(tokens[-len(start):]) + candidates = self._chain.get(key) + if not candidates: + break + tokens.append(self._rng.choice(candidates)) + return " ".join(tokens) + + +class TemplateGenerator: + """Template-based generation for structured content.""" + + def __init__(self, seed: Optional[int] = None) -> None: + self._rng = random.Random(seed) + + def generate(self, style: str = "neutral", domain: str = "general", + context: Optional[Dict[str, str]] = None) -> str: + templates = _TEMPLATES.get(style, _TEMPLATES["neutral"]) + template = self._rng.choice(templates) + vocab = _DOMAIN_VOCAB.get(domain, _DOMAIN_VOCAB["general"]) + + substitutions = context or {} + for key, options in vocab.items(): + if key not in substitutions: + substitutions[key] = self._rng.choice(options) + return _fill_safe(template, substitutions) + + +def _fill_safe(template: str, subs: Dict[str, str]) -> str: + try: + return template.format(**subs) + except KeyError: + return template + + +class TextGenerator: + """ + Simulated GPT-style text generator combining template and + Markov chain approaches with configurable style and domain. + """ + + def __init__(self) -> None: + self._template_gen = TemplateGenerator() + self._markov_gen = MarkovChainGenerator() + self._is_trained = False + self._corpus: List[str] = [] + logger.info("TextGenerator initialized") + + def load_corpus(self, texts: List[str]) -> None: + """Pre-train Markov chain on a corpus.""" + self._corpus.extend(texts) + self._markov_gen.train(self._corpus) + self._is_trained = True + logger.info("Corpus loaded: %d documents", len(self._corpus)) + + def generate(self, prompt: str, config: Optional[GenerationConfig] = None) -> GenerationResult: + if config is None: + config = GenerationConfig() + + domain = self._infer_domain(prompt) + + # Use Markov if trained and temperature > 0.5, else templates + if self._is_trained and config.temperature > 0.5: + seed_word = prompt.split()[0] if prompt.split() else None + text = self._markov_gen.generate(max_tokens=config.max_tokens // 5, seed_token=seed_word) + if not text: + text = self._template_gen.generate(config.style, domain) + else: + text = self._template_gen.generate(config.style, domain) + + # Apply stop sequences + finish_reason = "length" + for stop_seq in config.stop_sequences: + if stop_seq in text: + text = text[:text.index(stop_seq)] + finish_reason = "stop_sequence" + break + + tokens = len(text.split()) + return GenerationResult( + prompt=prompt, + generated_text=text, + tokens_generated=tokens, + finish_reason=finish_reason, + metadata={"domain": domain, "style": config.style, "temperature": config.temperature}, + ) + + def complete(self, text: str, n_completions: int = 3) -> List[str]: + """Generate multiple completions for the same prompt.""" + return [self.generate(text).generated_text for _ in range(n_completions)] + + def _infer_domain(self, text: str) -> str: + text_lower = text.lower() + if any(w in text_lower for w in ["cluster", "kubernetes", "docker", "server", "deploy"]): + return "infrastructure" + if any(w in text_lower for w in ["model", "train", "dataset", "ml", "accuracy"]): + return "ml" + return "general" diff --git a/agents/nlp_agent/translator.py b/agents/nlp_agent/translator.py new file mode 100644 index 0000000..fd8f53b --- /dev/null +++ b/agents/nlp_agent/translator.py @@ -0,0 +1,222 @@ +"""Multi-language translation module (simulated).""" +from __future__ import annotations + +import logging +import re +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +# Language detection heuristics (character n-gram patterns) +_LANG_PATTERNS: Dict[str, List[str]] = { + "es": ["el", "la", "los", "las", "de", "en", "que", "por", "con", "una"], + "fr": ["le", "la", "les", "de", "du", "des", "en", "que", "est", "une"], + "de": ["der", "die", "das", "und", "ist", "nicht", "ein", "mit", "ich", "auf"], + "it": ["il", "la", "di", "che", "in", "un", "per", "una", "con", "sono"], + "pt": ["de", "da", "do", "que", "em", "para", "uma", "com", "por", "não"], + "zh": [], # detected by Unicode range + "ja": [], # detected by Unicode range + "ko": [], # detected by Unicode range + "ar": [], # detected by Unicode range + "en": ["the", "is", "are", "was", "and", "for", "this", "that", "with", "have"], +} + +# Simple phrase-level translation glossary (simulated) +_GLOSSARY: Dict[str, Dict[str, str]] = { + "es": { + "hello": "hola", "goodbye": "adiós", "thank you": "gracias", + "yes": "sí", "no": "no", "please": "por favor", + "error": "error", "server": "servidor", "database": "base de datos", + "deploy": "desplegar", "service": "servicio", "configuration": "configuración", + }, + "fr": { + "hello": "bonjour", "goodbye": "au revoir", "thank you": "merci", + "yes": "oui", "no": "non", "please": "s'il vous plaît", + "error": "erreur", "server": "serveur", "database": "base de données", + "deploy": "déployer", "service": "service", "configuration": "configuration", + }, + "de": { + "hello": "hallo", "goodbye": "auf wiedersehen", "thank you": "danke", + "yes": "ja", "no": "nein", "please": "bitte", + "error": "fehler", "server": "server", "database": "datenbank", + "deploy": "bereitstellen", "service": "dienst", "configuration": "konfiguration", + }, + "it": { + "hello": "ciao", "goodbye": "arrivederci", "thank you": "grazie", + "yes": "sì", "no": "no", "please": "per favore", + "error": "errore", "server": "server", "database": "database", + "deploy": "distribuire", "service": "servizio", "configuration": "configurazione", + }, + "pt": { + "hello": "olá", "goodbye": "tchau", "thank you": "obrigado", + "yes": "sim", "no": "não", "please": "por favor", + "error": "erro", "server": "servidor", "database": "banco de dados", + "deploy": "implantar", "service": "serviço", "configuration": "configuração", + }, +} + +_SUPPORTED_LANGUAGES = {"en", "es", "fr", "de", "it", "pt", "zh", "ja", "ko", "ar"} + +_LANGUAGE_NAMES: Dict[str, str] = { + "en": "English", "es": "Spanish", "fr": "French", "de": "German", + "it": "Italian", "pt": "Portuguese", "zh": "Chinese", "ja": "Japanese", + "ko": "Korean", "ar": "Arabic", +} + + +@dataclass +class TranslationResult: + """Result of a translation operation.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + source_text: str = "" + translated_text: str = "" + source_language: str = "" + target_language: str = "" + confidence: float = 0.0 + word_count: int = 0 + glossary_hits: int = 0 + metadata: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +def _detect_unicode_script(text: str) -> Optional[str]: + for char in text: + code = ord(char) + if 0x4E00 <= code <= 0x9FFF: + return "zh" + if 0x3040 <= code <= 0x30FF: + return "ja" + if 0xAC00 <= code <= 0xD7AF: + return "ko" + if 0x0600 <= code <= 0x06FF: + return "ar" + return None + + +class LanguageDetector: + """Heuristic language detector using word frequency patterns.""" + + def detect(self, text: str) -> Tuple[str, float]: + unicode_lang = _detect_unicode_script(text) + if unicode_lang: + return unicode_lang, 0.95 + + words = set(re.findall(r"\b\w+\b", text.lower())) + scores: Dict[str, int] = {} + for lang, markers in _LANG_PATTERNS.items(): + scores[lang] = sum(1 for m in markers if m in words) + + if not any(scores.values()): + return "en", 0.4 # default + + best_lang = max(scores, key=lambda l: scores[l]) + total_markers = sum(len(markers) for markers in _LANG_PATTERNS.values()) + confidence = min(0.95, 0.4 + scores[best_lang] * 0.06) + return best_lang, confidence + + +class GlossaryTranslator: + """Word/phrase substitution translator using glossary lookup.""" + + def translate(self, text: str, source_lang: str, target_lang: str) -> Tuple[str, int]: + """Returns (translated_text, glossary_hits).""" + if source_lang != "en" or target_lang not in _GLOSSARY: + if target_lang == "en" and source_lang in _GLOSSARY: + # Reverse translation + reverse = {v: k for k, v in _GLOSSARY[source_lang].items()} + return self._apply_glossary(text, reverse) + return text, 0 + + glossary = _GLOSSARY.get(target_lang, {}) + return self._apply_glossary(text, glossary) + + def _apply_glossary(self, text: str, glossary: Dict[str, str]) -> Tuple[str, int]: + result = text + hits = 0 + # Sort by length descending to match longer phrases first + for source, target in sorted(glossary.items(), key=lambda x: -len(x[0])): + pattern = re.compile(r"\b" + re.escape(source) + r"\b", re.IGNORECASE) + new_result, n = pattern.subn(target, result) + if n > 0: + result = new_result + hits += n + return result, hits + + +class Translator: + """ + Multi-language translator with automatic language detection, + glossary-based translation, and batch processing support. + """ + + def __init__(self) -> None: + self._detector = LanguageDetector() + self._glossary_translator = GlossaryTranslator() + self._translation_cache: Dict[str, TranslationResult] = {} + logger.info("Translator initialized, supported languages: %s", sorted(_SUPPORTED_LANGUAGES)) + + def detect_language(self, text: str) -> Tuple[str, float]: + return self._detector.detect(text) + + def translate(self, text: str, target_language: str, + source_language: Optional[str] = None) -> TranslationResult: + if target_language not in _SUPPORTED_LANGUAGES: + raise ValueError(f"Unsupported target language: {target_language}") + + cache_key = f"{text[:100]}:{source_language}:{target_language}" + if cache_key in self._translation_cache: + logger.debug("Cache hit for translation") + return self._translation_cache[cache_key] + + if source_language is None: + source_language, detect_confidence = self._detector.detect(text) + else: + detect_confidence = 1.0 + + if source_language == target_language: + result = TranslationResult( + source_text=text, translated_text=text, + source_language=source_language, target_language=target_language, + confidence=1.0, word_count=len(text.split()), + metadata={"note": "source and target language are the same"}, + ) + return result + + translated, hits = self._glossary_translator.translate(text, source_language, target_language) + + # Add language marker for unsupported pairs (simulated) + if hits == 0 and target_language not in ("zh", "ja", "ko", "ar"): + translated = f"[{_LANGUAGE_NAMES.get(target_language, target_language)}] {text}" + elif hits == 0: + translated = f"[{target_language.upper()} translation of: {text[:80]}]" + + confidence = min(0.92, 0.5 + hits * 0.05 + detect_confidence * 0.3) + result = TranslationResult( + source_text=text, + translated_text=translated, + source_language=source_language, + target_language=target_language, + confidence=confidence, + word_count=len(translated.split()), + glossary_hits=hits, + metadata={"detection_confidence": detect_confidence}, + ) + self._translation_cache[cache_key] = result + logger.debug("Translated: %s -> %s (%d hits)", source_language, target_language, hits) + return result + + def batch_translate(self, texts: List[str], target_language: str, + source_language: Optional[str] = None) -> List[TranslationResult]: + return [self.translate(t, target_language, source_language) for t in texts] + + def translate_fields(self, data: Dict[str, str], target_language: str) -> Dict[str, str]: + """Translate all string values in a dictionary.""" + return {k: self.translate(v, target_language).translated_text for k, v in data.items()} + + @property + def supported_languages(self) -> List[str]: + return sorted(_SUPPORTED_LANGUAGES) diff --git a/agents/nlu_agent/__init__.py b/agents/nlu_agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agents/nlu_agent/context_manager.py b/agents/nlu_agent/context_manager.py new file mode 100644 index 0000000..1d8c76f --- /dev/null +++ b/agents/nlu_agent/context_manager.py @@ -0,0 +1,195 @@ +"""Conversation context manager for multi-turn NLU.""" +from __future__ import annotations + +import logging +import uuid +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class Turn: + """A single conversational turn.""" + turn_id: str = field(default_factory=lambda: str(uuid.uuid4())) + role: str = "user" # "user" | "assistant" | "system" + text: str = "" + intent: Optional[str] = None + entities: Dict[str, Any] = field(default_factory=dict) + sentiment: Optional[str] = None + timestamp: datetime = field(default_factory=datetime.utcnow) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class ConversationState: + """Tracks state across a conversation session.""" + session_id: str = field(default_factory=lambda: str(uuid.uuid4())) + user_id: Optional[str] = None + domain: str = "general" + active_intent: Optional[str] = None + slots: Dict[str, Any] = field(default_factory=dict) # intent-specific slot values + pending_clarification: Optional[str] = None + created_at: datetime = field(default_factory=datetime.utcnow) + last_active: datetime = field(default_factory=datetime.utcnow) + context_vars: Dict[str, Any] = field(default_factory=dict) # free-form shared context + + +class ConversationContext: + """ + Manages conversation history, slot state, and context resolution + for a single user session. + """ + + def __init__(self, session_id: Optional[str] = None, window: int = 10, + ttl_minutes: int = 30) -> None: + self.state = ConversationState(session_id=session_id or str(uuid.uuid4())) + self._history: deque[Turn] = deque(maxlen=100) + self._window = window # number of turns to keep in active context + self._ttl = timedelta(minutes=ttl_minutes) + logger.debug("ConversationContext created: session=%s", self.state.session_id) + + @property + def session_id(self) -> str: + return self.state.session_id + + def is_expired(self) -> bool: + return datetime.utcnow() - self.state.last_active > self._ttl + + def add_turn(self, role: str, text: str, intent: Optional[str] = None, + entities: Optional[Dict[str, Any]] = None, + sentiment: Optional[str] = None) -> Turn: + turn = Turn(role=role, text=text, intent=intent, + entities=entities or {}, sentiment=sentiment) + self._history.append(turn) + self.state.last_active = datetime.utcnow() + + if intent: + self.state.active_intent = intent + if entities: + self.state.slots.update(entities) + + logger.debug("Turn added [%s]: %.60s", role, text) + return turn + + def get_context_window(self) -> List[Turn]: + """Return the last N turns as context.""" + history = list(self._history) + return history[-self._window:] + + def get_history(self) -> List[Turn]: + return list(self._history) + + def set_slot(self, key: str, value: Any) -> None: + self.state.slots[key] = value + + def get_slot(self, key: str, default: Any = None) -> Any: + return self.state.slots.get(key, default) + + def clear_slots(self) -> None: + self.state.slots.clear() + + def set_var(self, key: str, value: Any) -> None: + self.state.context_vars[key] = value + + def get_var(self, key: str, default: Any = None) -> Any: + return self.state.context_vars.get(key, default) + + def resolve_coreference(self, text: str) -> str: + """Resolve pronouns to the most recent entity in context.""" + replacements = {"it": None, "they": None, "this": None, "that": None} + # Find the most recently mentioned entity value + for turn in reversed(list(self._history)): + for val in turn.entities.values(): + if isinstance(val, str) and len(val) > 2: + for pronoun in list(replacements.keys()): + if replacements[pronoun] is None: + replacements[pronoun] = val + break + if all(v is not None for v in replacements.values()): + break + + resolved = text + for pronoun, replacement in replacements.items(): + if replacement: + import re + resolved = re.sub(r"\b" + pronoun + r"\b", replacement, resolved, flags=re.IGNORECASE) + return resolved + + def build_prompt_context(self) -> str: + """Build a text summary of recent context for prompt augmentation.""" + lines: List[str] = [] + if self.state.active_intent: + lines.append(f"Active intent: {self.state.active_intent}") + if self.state.slots: + lines.append("Known slots: " + ", ".join(f"{k}={v}" for k, v in list(self.state.slots.items())[:5])) + for turn in self.get_context_window()[-5:]: + lines.append(f"[{turn.role}] {turn.text[:100]}") + return "\n".join(lines) + + def summarize(self) -> Dict[str, Any]: + return { + "session_id": self.state.session_id, + "domain": self.state.domain, + "turn_count": len(self._history), + "active_intent": self.state.active_intent, + "slots": self.state.slots, + "last_active": self.state.last_active.isoformat(), + "expired": self.is_expired(), + } + + +class ContextManager: + """ + Multi-session conversation context manager. + Creates, retrieves, and expires conversation contexts. + """ + + def __init__(self, window: int = 10, ttl_minutes: int = 30) -> None: + self._sessions: Dict[str, ConversationContext] = {} + self._window = window + self._ttl_minutes = ttl_minutes + logger.info("ContextManager initialized") + + def get_or_create(self, session_id: Optional[str] = None, + user_id: Optional[str] = None) -> ConversationContext: + if session_id and session_id in self._sessions: + ctx = self._sessions[session_id] + if not ctx.is_expired(): + return ctx + logger.info("Session %s expired; creating new session", session_id) + + ctx = ConversationContext(session_id=session_id, window=self._window, + ttl_minutes=self._ttl_minutes) + if user_id: + ctx.state.user_id = user_id + self._sessions[ctx.session_id] = ctx + logger.info("Created new session: %s", ctx.session_id) + return ctx + + def get(self, session_id: str) -> Optional[ConversationContext]: + return self._sessions.get(session_id) + + def delete(self, session_id: str) -> bool: + return bool(self._sessions.pop(session_id, None)) + + def purge_expired(self) -> int: + expired = [sid for sid, ctx in self._sessions.items() if ctx.is_expired()] + for sid in expired: + del self._sessions[sid] + if expired: + logger.info("Purged %d expired sessions", len(expired)) + return len(expired) + + def active_session_count(self) -> int: + return sum(1 for ctx in self._sessions.values() if not ctx.is_expired()) + + def stats(self) -> Dict[str, Any]: + return { + "total_sessions": len(self._sessions), + "active_sessions": self.active_session_count(), + "expired_sessions": len(self._sessions) - self.active_session_count(), + } diff --git a/agents/nlu_agent/dialogue_manager.py b/agents/nlu_agent/dialogue_manager.py new file mode 100644 index 0000000..72fd5d9 --- /dev/null +++ b/agents/nlu_agent/dialogue_manager.py @@ -0,0 +1,228 @@ +"""Multi-turn dialogue manager for NLU.""" +from __future__ import annotations + +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class DialogueAction: + """An action the dialogue manager can take.""" + action_type: str # "inform", "clarify", "confirm", "request_slot", "fulfill", "goodbye" + intent: Optional[str] = None + slot: Optional[str] = None + message: str = "" + payload: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DialogueTurn: + """One exchange in a multi-turn dialogue.""" + turn_id: str = field(default_factory=lambda: str(uuid.uuid4())) + user_input: str = "" + intent: Optional[str] = None + entities: Dict[str, Any] = field(default_factory=dict) + action: Optional[DialogueAction] = None + system_response: str = "" + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class DialogueState: + """Tracks state across a multi-turn dialogue.""" + dialogue_id: str = field(default_factory=lambda: str(uuid.uuid4())) + current_intent: Optional[str] = None + filled_slots: Dict[str, Any] = field(default_factory=dict) + required_slots: List[str] = field(default_factory=list) + stage: str = "open" # "open" | "slot_filling" | "confirming" | "fulfilled" | "ended" + turns: List[DialogueTurn] = field(default_factory=list) + context: Dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + updated_at: datetime = field(default_factory=datetime.utcnow) + + +# Intent-to-required-slots mapping +_INTENT_SLOTS: Dict[str, List[str]] = { + "deploy": ["service_name", "environment", "version"], + "scale": ["service_name", "replicas"], + "monitor": ["service_name", "metric"], + "create": ["resource_type", "resource_name"], + "delete": ["resource_type", "resource_name"], + "update": ["resource_type", "resource_name", "parameter"], + "train": ["model_name", "dataset"], + "predict": ["model_name", "input_data"], + "report": ["report_type", "time_range"], +} + +_RESPONSE_TEMPLATES: Dict[str, str] = { + "request_slot": "Could you please provide the {slot}?", + "confirm": "You want to {intent} with {slots}. Shall I proceed?", + "fulfill": "Done! I have successfully executed {intent} with {slots}.", + "clarify": "I'm not sure I understood. Could you clarify what you mean?", + "inform": "{message}", + "goodbye": "Goodbye! Let me know if you need anything else.", + "greeting": "Hello! How can I help you today?", + "unknown": "I didn't quite understand that. Could you rephrase?", +} + + +def _fill_template(template: str, **kwargs: Any) -> str: + try: + return template.format(**kwargs) + except KeyError: + return template + + +class SlotFiller: + """Manages slot collection for intent fulfillment.""" + + def get_missing_slots(self, state: DialogueState) -> List[str]: + return [s for s in state.required_slots if s not in state.filled_slots] + + def fill_from_entities(self, state: DialogueState, entities: Dict[str, Any]) -> List[str]: + """Try to fill slots from extracted entities. Returns newly filled slots.""" + filled: List[str] = [] + for slot in state.required_slots: + if slot not in state.filled_slots and slot in entities: + state.filled_slots[slot] = entities[slot] + filled.append(slot) + return filled + + +class DialoguePolicy: + """Determines the system action given the current dialogue state.""" + + def __init__(self, fulfillment_handlers: Optional[Dict[str, Callable]] = None) -> None: + self._handlers = fulfillment_handlers or {} + self._slot_filler = SlotFiller() + + def select_action(self, state: DialogueState, intent: Optional[str], + entities: Dict[str, Any]) -> DialogueAction: + # Update intent & initialize required slots + if intent and intent != state.current_intent: + state.current_intent = intent + state.required_slots = _INTENT_SLOTS.get(intent, []) + state.filled_slots = {} + state.stage = "slot_filling" if state.required_slots else "confirming" + + # Fill slots from entities + if state.current_intent: + self._slot_filler.fill_from_entities(state, entities) + + missing = self._slot_filler.get_missing_slots(state) + + if state.stage == "slot_filling" and missing: + return DialogueAction( + action_type="request_slot", + slot=missing[0], + message=_fill_template(_RESPONSE_TEMPLATES["request_slot"], slot=missing[0]), + ) + + if state.stage in ("slot_filling", "confirming") and not missing: + state.stage = "confirming" + slots_str = ", ".join(f"{k}={v}" for k, v in state.filled_slots.items()) + return DialogueAction( + action_type="confirm", + intent=state.current_intent, + message=_fill_template(_RESPONSE_TEMPLATES["confirm"], + intent=state.current_intent, slots=slots_str), + ) + + if intent == "confirm" and state.stage == "confirming": + return self._fulfill(state) + + if intent == "deny": + state.stage = "open" + return DialogueAction(action_type="inform", message="Okay, action cancelled.") + + if intent in ("greeting",): + return DialogueAction(action_type="inform", + message=_RESPONSE_TEMPLATES["greeting"]) + + if intent in ("farewell",): + state.stage = "ended" + return DialogueAction(action_type="goodbye", + message=_RESPONSE_TEMPLATES["goodbye"]) + + return DialogueAction(action_type="clarify", + message=_RESPONSE_TEMPLATES["clarify"]) + + def _fulfill(self, state: DialogueState) -> DialogueAction: + state.stage = "fulfilled" + handler = self._handlers.get(state.current_intent or "") + result = handler(state.filled_slots) if handler else "Action executed successfully." + slots_str = ", ".join(f"{k}={v}" for k, v in state.filled_slots.items()) + return DialogueAction( + action_type="fulfill", + intent=state.current_intent, + payload={"result": result, "slots": state.filled_slots}, + message=_fill_template(_RESPONSE_TEMPLATES["fulfill"], + intent=state.current_intent, slots=slots_str), + ) + + +class DialogueManager: + """ + Multi-turn dialogue manager coordinating intent understanding, + slot filling, confirmation, and fulfillment across sessions. + """ + + def __init__(self, fulfillment_handlers: Optional[Dict[str, Callable]] = None) -> None: + self._policy = DialoguePolicy(fulfillment_handlers) + self._states: Dict[str, DialogueState] = {} + logger.info("DialogueManager initialized") + + def get_or_create_state(self, dialogue_id: Optional[str] = None) -> DialogueState: + if dialogue_id and dialogue_id in self._states: + return self._states[dialogue_id] + state = DialogueState(dialogue_id=dialogue_id or str(uuid.uuid4())) + self._states[state.dialogue_id] = state + return state + + def process_turn(self, dialogue_id: str, user_input: str, + intent: Optional[str] = None, + entities: Optional[Dict[str, Any]] = None) -> DialogueTurn: + state = self.get_or_create_state(dialogue_id) + entities = entities or {} + + action = self._policy.select_action(state, intent, entities) + state.updated_at = datetime.utcnow() + + turn = DialogueTurn( + user_input=user_input, + intent=intent, + entities=entities, + action=action, + system_response=action.message, + ) + state.turns.append(turn) + logger.debug("[%s] User: %.50s | Action: %s", dialogue_id, user_input, action.action_type) + return turn + + def reset(self, dialogue_id: str) -> bool: + state = self._states.get(dialogue_id) + if state: + state.stage = "open" + state.current_intent = None + state.filled_slots.clear() + state.required_slots.clear() + return True + return False + + def get_state_summary(self, dialogue_id: str) -> Dict[str, Any]: + state = self._states.get(dialogue_id) + if not state: + return {} + return { + "dialogue_id": dialogue_id, + "stage": state.stage, + "current_intent": state.current_intent, + "filled_slots": state.filled_slots, + "missing_slots": [s for s in state.required_slots if s not in state.filled_slots], + "turn_count": len(state.turns), + } diff --git a/agents/nlu_agent/entity_extractor.py b/agents/nlu_agent/entity_extractor.py new file mode 100644 index 0000000..8fce036 --- /dev/null +++ b/agents/nlu_agent/entity_extractor.py @@ -0,0 +1,194 @@ +"""Named entity recognition for NLU.""" +from __future__ import annotations + +import logging +import re +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Pattern, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Entity: + """Represents an extracted named entity.""" + entity_id: str = field(default_factory=lambda: str(uuid.uuid4())) + text: str = "" + entity_type: str = "" + start: int = 0 + end: int = 0 + confidence: float = 1.0 + normalized_value: Optional[Any] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class ExtractionResult: + """Result of named entity extraction.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + text: str = "" + entities: List[Entity] = field(default_factory=list) + timestamp: datetime = field(default_factory=datetime.utcnow) + + def get_by_type(self, entity_type: str) -> List[Entity]: + return [e for e in self.entities if e.entity_type == entity_type] + + +# --- Regex-based extraction rules --- + +_PATTERNS: List[Tuple[str, re.Pattern, float]] = [ + ("EMAIL", re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Z|a-z]{2,}\b"), 0.99), + ("URL", re.compile(r"https?://[^\s]+"), 0.99), + ("IP_ADDRESS",re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), 0.95), + ("DATE", re.compile(r"\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}/\d{1,2}/\d{2,4}\b"), 0.92), + ("TIME", re.compile(r"\b\d{1,2}:\d{2}(?::\d{2})?(?:\s?[AP]M)?\b"), 0.90), + ("DURATION", re.compile(r"\b\d+\s*(?:second|minute|hour|day|week|month|year)s?\b", re.IGNORECASE), 0.88), + ("NUMBER", re.compile(r"\b\d+(?:\.\d+)?\b"), 0.85), + ("PERCENTAGE",re.compile(r"\b\d+(?:\.\d+)?\s*%"), 0.90), + ("VERSION", re.compile(r"\bv?\d+\.\d+(?:\.\d+)*\b"), 0.88), + ("PORT", re.compile(r"\bport\s+(\d{1,5})\b", re.IGNORECASE), 0.87), + ("FILE_PATH", re.compile(r"(?:/[\w.\-]+)+|[A-Za-z]:\\(?:[\w.\-\\]+)+"), 0.85), + ("ENV_VAR", re.compile(r"\b[A-Z_]{2,}(?:_[A-Z0-9]+)+\b"), 0.80), + ("UUID", re.compile(r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", re.IGNORECASE), 0.99), + ("CLOUD_REGION", re.compile(r"\b(?:us|eu|ap|sa|ca|me|af)-(?:east|west|north|south|central)-\d\b", re.IGNORECASE), 0.93), + ("KUBERNETES_RESOURCE", re.compile(r"\b(?:pod|deployment|service|ingress|namespace|node|configmap|secret)\b", re.IGNORECASE), 0.82), +] + +# Named entity word lists (gazetteer-based) +_GAZETTEERS: Dict[str, List[str]] = { + "CLOUD_PROVIDER": ["AWS", "Azure", "GCP", "Google Cloud", "Amazon", "Microsoft Azure", "DigitalOcean", "Heroku"], + "PROGRAMMING_LANGUAGE": ["Python", "Java", "Go", "Rust", "JavaScript", "TypeScript", "Ruby", "C++", "Scala"], + "DATABASE": ["PostgreSQL", "MySQL", "MongoDB", "Redis", "Cassandra", "DynamoDB", "SQLite", "Elasticsearch"], + "FRAMEWORK": ["Django", "Flask", "FastAPI", "Spring", "Rails", "Express", "React", "Vue", "Angular"], + "PROTOCOL": ["HTTP", "HTTPS", "TCP", "UDP", "gRPC", "WebSocket", "AMQP", "MQTT", "REST", "GraphQL"], + "SEVERITY": ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "FATAL", "NOTICE"], +} + + +def _extract_regex_entities(text: str) -> List[Entity]: + entities: List[Entity] = [] + for entity_type, pattern, confidence in _PATTERNS: + for match in pattern.finditer(text): + entities.append(Entity( + text=match.group(), + entity_type=entity_type, + start=match.start(), + end=match.end(), + confidence=confidence, + )) + return entities + + +def _extract_gazetteer_entities(text: str) -> List[Entity]: + entities: List[Entity] = [] + for entity_type, terms in _GAZETTEERS.items(): + for term in terms: + pattern = re.compile(r"\b" + re.escape(term) + r"\b", re.IGNORECASE) + for match in pattern.finditer(text): + entities.append(Entity( + text=match.group(), + entity_type=entity_type, + start=match.start(), + end=match.end(), + confidence=0.90, + normalized_value=term, + )) + return entities + + +def _resolve_overlaps(entities: List[Entity]) -> List[Entity]: + """Remove overlapping entities, preferring longer and higher-confidence spans.""" + if not entities: + return [] + sorted_entities = sorted(entities, key=lambda e: (-(e.end - e.start), -e.confidence)) + result: List[Entity] = [] + used_positions: set = set() + for entity in sorted_entities: + span = set(range(entity.start, entity.end)) + if not span & used_positions: + result.append(entity) + used_positions.update(span) + return sorted(result, key=lambda e: e.start) + + +class CustomRule: + """User-defined extraction rule.""" + + def __init__(self, entity_type: str, pattern: str, confidence: float = 0.85) -> None: + self.entity_type = entity_type + self.pattern = re.compile(pattern, re.IGNORECASE) + self.confidence = confidence + + def extract(self, text: str) -> List[Entity]: + return [ + Entity(text=m.group(), entity_type=self.entity_type, + start=m.start(), end=m.end(), confidence=self.confidence) + for m in self.pattern.finditer(text) + ] + + +class EntityExtractor: + """ + Multi-strategy named entity extractor combining regex rules, + gazetteer lookups, and user-defined custom rules. + """ + + def __init__(self) -> None: + self._custom_rules: List[CustomRule] = [] + self._entity_normalizers: Dict[str, Any] = { + "NUMBER": lambda v: float(v.replace(",", "")), + "PERCENTAGE": lambda v: float(v.rstrip("%")) / 100.0, + "VERSION": lambda v: v.lstrip("v"), + } + logger.info("EntityExtractor initialized") + + def add_rule(self, entity_type: str, pattern: str, confidence: float = 0.85) -> None: + self._custom_rules.append(CustomRule(entity_type, pattern, confidence)) + + def add_gazetteer(self, entity_type: str, terms: List[str]) -> None: + existing = _GAZETTEERS.get(entity_type, []) + _GAZETTEERS[entity_type] = list(set(existing + terms)) + + def extract(self, text: str) -> ExtractionResult: + entities = _extract_regex_entities(text) + entities += _extract_gazetteer_entities(text) + for rule in self._custom_rules: + entities += rule.extract(text) + + entities = _resolve_overlaps(entities) + self._normalize(entities) + + result = ExtractionResult(text=text, entities=entities) + logger.debug("Extracted %d entities from text (len=%d)", len(entities), len(text)) + return result + + def _normalize(self, entities: List[Entity]) -> None: + for entity in entities: + normalizer = self._entity_normalizers.get(entity.entity_type) + if normalizer: + try: + entity.normalized_value = normalizer(entity.text) + except (ValueError, TypeError): + pass + + def extract_typed(self, text: str, entity_types: List[str]) -> ExtractionResult: + result = self.extract(text) + result.entities = [e for e in result.entities if e.entity_type in entity_types] + return result + + def batch_extract(self, texts: List[str]) -> List[ExtractionResult]: + return [self.extract(t) for t in texts] + + def entity_summary(self, result: ExtractionResult) -> Dict[str, List[str]]: + summary: Dict[str, List[str]] = {} + for entity in result.entities: + summary.setdefault(entity.entity_type, []).append(entity.text) + return summary + + def get_supported_types(self) -> List[str]: + regex_types = [et for et, _, _ in _PATTERNS] + gazetteer_types = list(_GAZETTEERS.keys()) + custom_types = [r.entity_type for r in self._custom_rules] + return sorted(set(regex_types + gazetteer_types + custom_types)) diff --git a/agents/nlu_agent/intent_classifier.py b/agents/nlu_agent/intent_classifier.py new file mode 100644 index 0000000..4ee8a99 --- /dev/null +++ b/agents/nlu_agent/intent_classifier.py @@ -0,0 +1,209 @@ +"""Deep intent classification for NLU.""" +from __future__ import annotations + +import logging +import math +import re +import uuid +from collections import Counter, defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Intent: + """Represents a classified intent.""" + name: str + confidence: float = 0.0 + slots: Dict[str, Any] = field(default_factory=dict) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class ClassificationResult: + """Result of intent classification.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + text: str = "" + top_intent: Optional[Intent] = None + all_intents: List[Intent] = field(default_factory=list) + is_ambiguous: bool = False + timestamp: datetime = field(default_factory=datetime.utcnow) + + +# Predefined intent patterns and keywords +INTENT_PATTERNS: Dict[str, List[str]] = { + "greeting": ["hello", "hi", "hey", "good morning", "good afternoon", "howdy", "greetings"], + "farewell": ["bye", "goodbye", "see you", "take care", "later", "exit", "quit"], + "help": ["help", "assist", "support", "how do i", "what is", "explain", "guide", "tutorial"], + "create": ["create", "make", "build", "generate", "new", "add", "setup", "initialize"], + "delete": ["delete", "remove", "destroy", "drop", "purge", "erase", "clean"], + "update": ["update", "change", "modify", "edit", "set", "configure", "adjust"], + "query": ["show", "list", "get", "find", "search", "display", "fetch", "retrieve"], + "deploy": ["deploy", "release", "publish", "launch", "rollout", "ship"], + "monitor": ["monitor", "watch", "track", "observe", "check", "status", "health"], + "scale": ["scale", "resize", "increase", "decrease", "expand", "shrink", "autoscale"], + "debug": ["debug", "troubleshoot", "fix", "diagnose", "investigate", "error", "issue"], + "report": ["report", "analyze", "statistics", "metrics", "summary", "overview", "audit"], + "train": ["train", "learn", "fit", "optimize", "improve", "retrain"], + "predict": ["predict", "forecast", "infer", "estimate", "classify", "detect"], + "confirm": ["yes", "ok", "sure", "confirm", "agree", "proceed", "go ahead"], + "deny": ["no", "cancel", "stop", "abort", "refuse", "disagree", "never"], +} + + +def _tokenize(text: str) -> List[str]: + return re.findall(r"\b\w+\b", text.lower()) + + +def _tfidf_score(text_tokens: List[str], pattern_tokens: List[str]) -> float: + text_counts = Counter(text_tokens) + pattern_counts = Counter(pattern_tokens) + score = 0.0 + for token, freq in pattern_counts.items(): + if token in text_counts: + tf = text_counts[token] / len(text_tokens) + idf = math.log(1 + 1.0 / freq) + score += tf * idf + return score + + +class NaiveBayesClassifier: + """Multinomial Naive Bayes intent classifier.""" + + def __init__(self) -> None: + self.class_priors: Dict[str, float] = {} + self.feature_likelihoods: Dict[str, Dict[str, float]] = defaultdict(lambda: defaultdict(float)) + self.vocab: set = set() + + def fit(self, examples: List[Tuple[str, str]]) -> None: + """Train on (text, intent_label) pairs.""" + class_counts: Dict[str, int] = Counter() + class_features: Dict[str, Counter] = defaultdict(Counter) + for text, label in examples: + tokens = _tokenize(text) + class_counts[label] += 1 + class_features[label].update(tokens) + self.vocab.update(tokens) + + total = sum(class_counts.values()) or 1 + self.class_priors = {c: count / total for c, count in class_counts.items()} + + vocab_size = len(self.vocab) + for label, feat_counts in class_features.items(): + total_tokens = sum(feat_counts.values()) + for token in self.vocab: + # Laplace smoothing + self.feature_likelihoods[label][token] = ( + feat_counts.get(token, 0) + 1 + ) / (total_tokens + vocab_size) + logger.debug("NaiveBayes fitted on %d examples, %d classes", len(examples), len(class_counts)) + + def predict_proba(self, text: str) -> Dict[str, float]: + tokens = _tokenize(text) + log_probs: Dict[str, float] = {} + for label, prior in self.class_priors.items(): + log_p = math.log(prior + 1e-9) + for token in tokens: + if token in self.vocab: + log_p += math.log(self.feature_likelihoods[label].get(token, 1e-9)) + log_probs[label] = log_p + + # Softmax normalization + max_log = max(log_probs.values()) + exp_probs = {l: math.exp(v - max_log) for l, v in log_probs.items()} + total = sum(exp_probs.values()) or 1.0 + return {l: v / total for l, v in exp_probs.items()} + + +class PatternMatcher: + """Rule-based intent classifier using keyword patterns.""" + + def __init__(self, patterns: Optional[Dict[str, List[str]]] = None) -> None: + self.patterns = patterns or INTENT_PATTERNS + + def score(self, text: str) -> Dict[str, float]: + tokens = _tokenize(text) + scores: Dict[str, float] = {} + for intent, keywords in self.patterns.items(): + pattern_tokens = _tokenize(" ".join(keywords)) + scores[intent] = _tfidf_score(tokens, Counter(pattern_tokens)) + total = sum(scores.values()) or 1.0 + return {k: v / total for k, v in scores.items()} + + +class IntentClassifier: + """ + Hybrid intent classifier combining Naive Bayes (learned) and pattern matching (rule-based). + Falls back to pattern matching when insufficient training data is available. + """ + + def __init__(self, confidence_threshold: float = 0.3, ambiguity_margin: float = 0.1) -> None: + self.confidence_threshold = confidence_threshold + self.ambiguity_margin = ambiguity_margin + self._nb = NaiveBayesClassifier() + self._pattern = PatternMatcher() + self._trained = False + self._training_examples: List[Tuple[str, str]] = [] + logger.info("IntentClassifier initialized") + + def train(self, examples: List[Tuple[str, str]]) -> None: + """Train on (text, intent_label) pairs.""" + self._training_examples.extend(examples) + self._nb.fit(self._training_examples) + self._trained = True + logger.info("Trained on %d examples", len(self._training_examples)) + + def classify(self, text: str) -> ClassificationResult: + pattern_scores = self._pattern.score(text) + + if self._trained: + nb_scores = self._nb.predict_proba(text) + combined = { + intent: 0.6 * nb_scores.get(intent, 0.0) + 0.4 * pattern_scores.get(intent, 0.0) + for intent in set(nb_scores) | set(pattern_scores) + } + else: + combined = pattern_scores + + sorted_intents = sorted(combined.items(), key=lambda x: x[1], reverse=True) + all_intents = [Intent(name=name, confidence=conf) for name, conf in sorted_intents] + + top_intent = all_intents[0] if all_intents else None + is_ambiguous = ( + len(all_intents) >= 2 + and all_intents[1].confidence > (all_intents[0].confidence - self.ambiguity_margin) + ) + + result = ClassificationResult( + text=text, + top_intent=top_intent, + all_intents=all_intents[:5], + is_ambiguous=is_ambiguous, + ) + logger.debug("Classified '%s' -> %s (%.2f)", text[:50], top_intent.name if top_intent else "none", + top_intent.confidence if top_intent else 0.0) + return result + + def batch_classify(self, texts: List[str]) -> List[ClassificationResult]: + return [self.classify(text) for text in texts] + + def add_intent_pattern(self, intent: str, keywords: List[str]) -> None: + existing = self._pattern.patterns.get(intent, []) + self._pattern.patterns[intent] = list(set(existing + keywords)) + + def get_supported_intents(self) -> List[str]: + return list(self._pattern.patterns.keys()) + + def explain(self, result: ClassificationResult) -> str: + lines = [f"Text: {result.text}", f"Top intent: {result.top_intent.name if result.top_intent else 'none'}"] + if result.top_intent: + lines.append(f"Confidence: {result.top_intent.confidence:.1%}") + if result.is_ambiguous: + lines.append("⚠ Ambiguous classification") + lines.append("All intents:") + for intent in result.all_intents[:5]: + lines.append(f" {intent.name}: {intent.confidence:.1%}") + return "\n".join(lines) diff --git a/agents/nlu_agent/sentiment_analyzer.py b/agents/nlu_agent/sentiment_analyzer.py new file mode 100644 index 0000000..1d65c35 --- /dev/null +++ b/agents/nlu_agent/sentiment_analyzer.py @@ -0,0 +1,210 @@ +"""Sentiment analysis and emotional tone detection.""" +from __future__ import annotations + +import logging +import re +import statistics +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +# Sentiment lexicons (valence scores: -1 strongly negative, +1 strongly positive) +_POSITIVE_WORDS: Dict[str, float] = { + "great": 0.8, "excellent": 0.9, "good": 0.6, "wonderful": 0.9, "amazing": 0.85, + "fantastic": 0.88, "love": 0.8, "happy": 0.75, "success": 0.7, "perfect": 0.9, + "outstanding": 0.88, "brilliant": 0.85, "helpful": 0.65, "easy": 0.5, "fast": 0.55, + "reliable": 0.65, "stable": 0.6, "efficient": 0.65, "improved": 0.6, "resolved": 0.7, + "fixed": 0.65, "works": 0.55, "working": 0.55, "thanks": 0.5, "thank": 0.5, +} + +_NEGATIVE_WORDS: Dict[str, float] = { + "bad": -0.7, "terrible": -0.9, "awful": -0.88, "horrible": -0.9, "hate": -0.85, + "failure": -0.8, "error": -0.65, "broken": -0.75, "slow": -0.5, "unreliable": -0.7, + "crash": -0.8, "problem": -0.6, "issue": -0.5, "bug": -0.6, "fail": -0.7, + "failed": -0.75, "wrong": -0.65, "difficult": -0.45, "confusing": -0.55, "frustrated": -0.7, + "annoying": -0.65, "useless": -0.8, "waste": -0.7, "stuck": -0.55, "broken": -0.75, +} + +_NEGATORS = {"not", "no", "never", "neither", "nor", "without", "don't", "doesn't", + "didn't", "won't", "wouldn't", "can't", "cannot"} + +_INTENSIFIERS: Dict[str, float] = { + "very": 1.3, "extremely": 1.5, "incredibly": 1.4, "absolutely": 1.4, + "quite": 1.1, "rather": 1.1, "somewhat": 0.8, "slightly": 0.7, "barely": 0.6, + "really": 1.25, "super": 1.3, "highly": 1.25, +} + +# Emotion categories mapped to seed words +_EMOTION_SEEDS: Dict[str, List[str]] = { + "joy": ["happy", "joy", "delighted", "pleased", "excited", "great", "wonderful"], + "anger": ["angry", "furious", "outraged", "mad", "hate", "annoying", "rage"], + "fear": ["afraid", "scared", "worried", "nervous", "anxious", "panic", "dread"], + "sadness": ["sad", "unhappy", "disappointed", "miserable", "terrible", "awful"], + "surprise": ["surprised", "shocked", "unexpected", "amazing", "wow", "suddenly"], + "disgust": ["disgusting", "horrible", "awful", "nasty", "repulsive", "terrible"], + "trust": ["trust", "reliable", "secure", "confident", "honest", "consistent"], + "anticipation": ["hope", "expect", "forward", "plan", "soon", "future", "upcoming"], +} + + +@dataclass +class SentimentScore: + """Numerical sentiment metrics.""" + polarity: float = 0.0 # -1 to +1 + subjectivity: float = 0.0 # 0 (objective) to 1 (subjective) + intensity: float = 0.0 # 0 to 1 + + +@dataclass +class EmotionScores: + """Per-emotion confidence scores.""" + joy: float = 0.0 + anger: float = 0.0 + fear: float = 0.0 + sadness: float = 0.0 + surprise: float = 0.0 + disgust: float = 0.0 + trust: float = 0.0 + anticipation: float = 0.0 + + def dominant(self) -> Tuple[str, float]: + scores = vars(self) + best = max(scores, key=lambda k: scores[k]) + return best, scores[best] + + +@dataclass +class SentimentResult: + """Full result of sentiment analysis.""" + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + text: str = "" + label: str = "neutral" # "positive" | "negative" | "neutral" | "mixed" + scores: SentimentScore = field(default_factory=SentimentScore) + emotions: EmotionScores = field(default_factory=EmotionScores) + dominant_emotion: str = "" + keyword_hits: List[str] = field(default_factory=list) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +def _tokenize(text: str) -> List[str]: + return re.findall(r"\b\w+'\w+|\b\w+\b", text.lower()) + + +class LexiconSentimentAnalyzer: + """Lexicon + rule-based sentiment scorer.""" + + def analyze(self, text: str) -> Tuple[float, float, List[str]]: + """Returns (polarity, subjectivity, keyword_hits).""" + tokens = _tokenize(text) + total_score = 0.0 + sentiment_tokens = 0 + keyword_hits: List[str] = [] + intensity_multiplier = 1.0 + + for i, token in enumerate(tokens): + if token in _NEGATORS: + intensity_multiplier = -0.9 + continue + if token in _INTENSIFIERS: + intensity_multiplier *= _INTENSIFIERS[token] + continue + + score: Optional[float] = None + if token in _POSITIVE_WORDS: + score = _POSITIVE_WORDS[token] + keyword_hits.append(f"+{token}") + elif token in _NEGATIVE_WORDS: + score = _NEGATIVE_WORDS[token] + keyword_hits.append(f"-{token}") + + if score is not None: + total_score += score * intensity_multiplier + sentiment_tokens += 1 + intensity_multiplier = 1.0 + + polarity = max(-1.0, min(1.0, total_score / max(sentiment_tokens, 1))) + subjectivity = min(1.0, sentiment_tokens / max(len(tokens), 1) * 3) + return polarity, subjectivity, keyword_hits + + +class EmotionDetector: + """Detects fine-grained emotions using seed word proximity.""" + + def detect(self, text: str) -> EmotionScores: + tokens = set(_tokenize(text)) + scores: Dict[str, float] = {} + for emotion, seeds in _EMOTION_SEEDS.items(): + hits = sum(1 for seed in seeds if seed in tokens) + scores[emotion] = min(1.0, hits / max(len(seeds), 1) * 4) + return EmotionScores(**scores) + + +class SentimentAnalyzer: + """ + Full sentiment analysis pipeline combining lexicon scoring, + negation/intensifier handling, and emotion detection. + """ + + def __init__(self, neutral_threshold: float = 0.15) -> None: + self.neutral_threshold = neutral_threshold + self._lexicon = LexiconSentimentAnalyzer() + self._emotion_detector = EmotionDetector() + logger.info("SentimentAnalyzer initialized") + + def analyze(self, text: str) -> SentimentResult: + polarity, subjectivity, keyword_hits = self._lexicon.analyze(text) + emotions = self._emotion_detector.detect(text) + dominant_emotion, _ = emotions.dominant() + + label = self._polarity_label(polarity, subjectivity) + intensity = min(1.0, abs(polarity) + subjectivity * 0.3) + + result = SentimentResult( + text=text, + label=label, + scores=SentimentScore(polarity=polarity, subjectivity=subjectivity, intensity=intensity), + emotions=emotions, + dominant_emotion=dominant_emotion, + keyword_hits=keyword_hits, + ) + logger.debug("Sentiment: '%s' -> %s (%.2f)", text[:50], label, polarity) + return result + + def _polarity_label(self, polarity: float, subjectivity: float) -> str: + if abs(polarity) < self.neutral_threshold: + return "neutral" + if polarity > 0 and subjectivity < 0.2: + return "mixed" # high polarity but objective + return "positive" if polarity > 0 else "negative" + + def batch_analyze(self, texts: List[str]) -> List[SentimentResult]: + return [self.analyze(t) for t in texts] + + def aggregate(self, results: List[SentimentResult]) -> Dict[str, Any]: + if not results: + return {} + polarities = [r.scores.polarity for r in results] + labels = [r.label for r in results] + from collections import Counter + label_counts = Counter(labels) + return { + "mean_polarity": statistics.mean(polarities), + "std_polarity": statistics.stdev(polarities) if len(polarities) > 1 else 0.0, + "label_distribution": dict(label_counts), + "dominant_label": label_counts.most_common(1)[0][0], + "sample_count": len(results), + } + + def explain(self, result: SentimentResult) -> str: + lines = [ + f"Text: {result.text[:80]}", + f"Sentiment: {result.label} (polarity={result.scores.polarity:+.2f})", + f"Subjectivity: {result.scores.subjectivity:.1%}", + f"Dominant emotion: {result.dominant_emotion}", + f"Key indicators: {', '.join(result.keyword_hits[:5])}", + ] + return "\n".join(lines) diff --git a/dataops/__init__.py b/dataops/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataops/data_catalog/__init__.py b/dataops/data_catalog/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataops/data_catalog/metadata_store.py b/dataops/data_catalog/metadata_store.py new file mode 100644 index 0000000..dcd7950 --- /dev/null +++ b/dataops/data_catalog/metadata_store.py @@ -0,0 +1,175 @@ +"""Data catalog and metadata store.""" +from __future__ import annotations + +import json +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class DataAsset: + asset_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + asset_type: str = "dataset" # dataset | table | model | pipeline | report | api + description: str = "" + owner: str = "" + location: str = "" + format: str = "" + schema: Dict[str, Any] = field(default_factory=dict) + tags: List[str] = field(default_factory=list) + properties: Dict[str, Any] = field(default_factory=dict) + quality_score: Optional[float] = None + row_count: Optional[int] = None + size_bytes: Optional[int] = None + created_at: datetime = field(default_factory=datetime.utcnow) + updated_at: datetime = field(default_factory=datetime.utcnow) + last_accessed: Optional[datetime] = None + + +@dataclass +class ColumnMetadata: + name: str = "" + dtype: str = "" + description: str = "" + nullable: bool = True + primary_key: bool = False + foreign_key: Optional[str] = None + sample_values: List[Any] = field(default_factory=list) + statistics: Dict[str, float] = field(default_factory=dict) + + +@dataclass +class SearchResult: + total: int = 0 + assets: List[DataAsset] = field(default_factory=list) + query: Dict[str, Any] = field(default_factory=dict) + + +class MetadataStore: + """ + Central data catalog for registering, discovering, and managing metadata + about data assets across the platform. + """ + + def __init__(self) -> None: + self._assets: Dict[str, DataAsset] = {} + self._name_index: Dict[str, str] = {} # name -> asset_id + self._tag_index: Dict[str, List[str]] = {} # tag -> [asset_ids] + self._type_index: Dict[str, List[str]] = {} # type -> [asset_ids] + self._column_metadata: Dict[str, List[ColumnMetadata]] = {} # asset_id -> columns + logger.info("MetadataStore initialized") + + def register(self, asset: DataAsset) -> DataAsset: + self._assets[asset.asset_id] = asset + self._name_index[asset.name.lower()] = asset.asset_id + for tag in asset.tags: + self._tag_index.setdefault(tag, []).append(asset.asset_id) + self._type_index.setdefault(asset.asset_type, []).append(asset.asset_id) + logger.debug("Registered asset '%s' (%s)", asset.name, asset.asset_type) + return asset + + def create(self, name: str, asset_type: str = "dataset", + description: str = "", owner: str = "", + location: str = "", tags: Optional[List[str]] = None, + **kwargs: Any) -> DataAsset: + asset = DataAsset(name=name, asset_type=asset_type, description=description, + owner=owner, location=location, tags=tags or [], **kwargs) + return self.register(asset) + + def get(self, asset_id: str) -> Optional[DataAsset]: + return self._assets.get(asset_id) + + def get_by_name(self, name: str) -> Optional[DataAsset]: + aid = self._name_index.get(name.lower()) + return self._assets.get(aid) if aid else None + + def update(self, asset_id: str, **kwargs: Any) -> bool: + asset = self._assets.get(asset_id) + if not asset: + return False + for key, value in kwargs.items(): + if hasattr(asset, key): + setattr(asset, key, value) + asset.updated_at = datetime.utcnow() + return True + + def delete(self, asset_id: str) -> bool: + asset = self._assets.pop(asset_id, None) + if not asset: + return False + self._name_index.pop(asset.name.lower(), None) + for tag in asset.tags: + self._tag_index.get(tag, []).remove(asset_id) + self._type_index.get(asset.asset_type, []).remove(asset_id) + return True + + def add_columns(self, asset_id: str, columns: List[ColumnMetadata]) -> bool: + if asset_id not in self._assets: + return False + self._column_metadata[asset_id] = columns + return True + + def get_columns(self, asset_id: str) -> List[ColumnMetadata]: + return self._column_metadata.get(asset_id, []) + + def search(self, query: Optional[str] = None, + asset_type: Optional[str] = None, + tags: Optional[List[str]] = None, + owner: Optional[str] = None, + limit: int = 50) -> SearchResult: + results = list(self._assets.values()) + + if asset_type: + results = [a for a in results if a.asset_type == asset_type] + if owner: + results = [a for a in results if a.owner == owner] + if tags: + results = [a for a in results if any(t in a.tags for t in tags)] + if query: + q = query.lower() + results = [a for a in results + if q in a.name.lower() or q in a.description.lower() + or any(q in str(v).lower() for v in a.properties.values())] + + return SearchResult(total=len(results), assets=results[:limit], + query={"query": query, "type": asset_type, "tags": tags}) + + def list_by_type(self, asset_type: str) -> List[DataAsset]: + ids = self._type_index.get(asset_type, []) + return [self._assets[aid] for aid in ids if aid in self._assets] + + def list_by_tag(self, tag: str) -> List[DataAsset]: + ids = self._tag_index.get(tag, []) + return [self._assets[aid] for aid in ids if aid in self._assets] + + def record_access(self, asset_id: str) -> None: + asset = self._assets.get(asset_id) + if asset: + asset.last_accessed = datetime.utcnow() + + def export_catalog(self) -> Dict[str, Any]: + return { + "assets": [ + { + "id": a.asset_id, "name": a.name, "type": a.asset_type, + "description": a.description, "owner": a.owner, + "tags": a.tags, "quality_score": a.quality_score, + } + for a in self._assets.values() + ], + "total": len(self._assets), + "exported_at": datetime.utcnow().isoformat(), + } + + @property + def stats(self) -> Dict[str, Any]: + return { + "total_assets": len(self._assets), + "by_type": {t: len(ids) for t, ids in self._type_index.items()}, + "total_tags": len(self._tag_index), + } diff --git a/dataops/data_catalog/search.py b/dataops/data_catalog/search.py new file mode 100644 index 0000000..c5dc818 --- /dev/null +++ b/dataops/data_catalog/search.py @@ -0,0 +1,236 @@ +"""Data discovery and full-text search for the data catalog.""" +from __future__ import annotations + +import logging +import math +import re +from collections import Counter, defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class SearchIndex: + """Inverted index entry for a single term.""" + term: str = "" + postings: Dict[str, float] = field(default_factory=dict) # asset_id -> tf-idf score + + +@dataclass +class SearchResult: + asset_id: str = "" + name: str = "" + asset_type: str = "" + score: float = 0.0 + snippet: str = "" + matched_fields: List[str] = field(default_factory=list) + + +@dataclass +class SearchResponse: + results: List[SearchResult] = field(default_factory=list) + total_hits: int = 0 + query: str = "" + elapsed_ms: float = 0.0 + suggestions: List[str] = field(default_factory=list) + + +def _tokenize(text: str) -> List[str]: + tokens = re.findall(r"\b\w{2,}\b", text.lower()) + stopwords = {"the", "and", "or", "in", "of", "to", "a", "an", "is", "for", "with"} + return [t for t in tokens if t not in stopwords] + + +def _generate_snippet(text: str, query_terms: List[str], max_len: int = 150) -> str: + """Extract a text snippet highlighting query terms.""" + text_lower = text.lower() + best_pos = 0 + for term in query_terms: + pos = text_lower.find(term) + if pos >= 0: + best_pos = max(0, pos - 30) + break + snippet = text[best_pos: best_pos + max_len] + if best_pos > 0: + snippet = "..." + snippet + if best_pos + max_len < len(text): + snippet += "..." + return snippet + + +class InvertedIndex: + """TF-IDF inverted index for asset full-text search.""" + + def __init__(self) -> None: + self._index: Dict[str, Dict[str, float]] = defaultdict(dict) # term -> {asset_id: tf} + self._doc_lengths: Dict[str, int] = {} # asset_id -> token count + self._doc_count: int = 0 + self._df: Counter = Counter() # term -> document frequency + + def add_document(self, asset_id: str, text: str, weight: float = 1.0) -> None: + tokens = _tokenize(text) + if not tokens: + return + counts = Counter(tokens) + self._doc_lengths[asset_id] = len(tokens) + self._doc_count += 1 + for term, count in counts.items(): + tf = count / len(tokens) + self._index[term][asset_id] = tf * weight + self._df[term] += 1 + + def remove_document(self, asset_id: str) -> None: + to_remove = [] + for term, postings in self._index.items(): + postings.pop(asset_id, None) + if not postings: + to_remove.append(term) + for term in to_remove: + del self._index[term] + if asset_id in self._doc_lengths: + del self._doc_lengths[asset_id] + self._doc_count -= 1 + + def search(self, query: str, top_k: int = 20) -> List[Tuple[str, float]]: + """Return (asset_id, score) sorted by relevance.""" + query_tokens = _tokenize(query) + if not query_tokens: + return [] + + scores: Dict[str, float] = defaultdict(float) + for term in query_tokens: + if term not in self._index: + # Try prefix match + matching = [t for t in self._index if t.startswith(term)] + for mt in matching: + idf = math.log((self._doc_count + 1) / (self._df.get(mt, 0) + 1)) + for asset_id, tf in self._index[mt].items(): + scores[asset_id] += tf * idf * 0.7 # partial match penalty + else: + idf = math.log((self._doc_count + 1) / (self._df.get(term, 0) + 1)) + for asset_id, tf in self._index[term].items(): + scores[asset_id] += tf * idf + + sorted_results = sorted(scores.items(), key=lambda x: x[1], reverse=True) + return sorted_results[:top_k] + + def suggest(self, partial: str, max_suggestions: int = 5) -> List[str]: + """Suggest query completions based on indexed terms.""" + prefix = partial.lower() + matching = sorted( + [t for t in self._index if t.startswith(prefix)], + key=lambda t: -self._df[t] + ) + return matching[:max_suggestions] + + +class FacetedSearch: + """Faceted search supporting filter aggregations.""" + + def __init__(self) -> None: + self._facets: Dict[str, Dict[str, List[str]]] = {} # facet_field -> {value -> [asset_ids]} + + def index(self, asset_id: str, facet_field: str, value: str) -> None: + self._facets.setdefault(facet_field, {}).setdefault(value, []).append(asset_id) + + def filter(self, facet_field: str, value: str) -> List[str]: + return self._facets.get(facet_field, {}).get(value, []) + + def aggregations(self, facet_field: str) -> Dict[str, int]: + return {val: len(ids) for val, ids in self._facets.get(facet_field, {}).items()} + + +class DataCatalogSearch: + """ + Full-featured data catalog search combining TF-IDF full-text search, + faceted filtering, and query suggestion. + """ + + def __init__(self) -> None: + self._index = InvertedIndex() + self._facets = FacetedSearch() + self._assets: Dict[str, Dict[str, Any]] = {} # asset_id -> asset dict + logger.info("DataCatalogSearch initialized") + + def index_asset(self, asset_id: str, name: str, description: str, + asset_type: str, owner: str = "", + tags: Optional[List[str]] = None, + properties: Optional[Dict[str, Any]] = None) -> None: + self._assets[asset_id] = { + "name": name, "description": description, "type": asset_type, + "owner": owner, "tags": tags or [], "properties": properties or {}, + } + # Index with field weights + self._index.add_document(asset_id, name, weight=3.0) + self._index.add_document(asset_id, description, weight=1.0) + for tag in (tags or []): + self._index.add_document(asset_id, tag, weight=2.0) + prop_text = " ".join(str(v) for v in (properties or {}).values()) + if prop_text: + self._index.add_document(asset_id, prop_text, weight=0.5) + + self._facets.index(asset_id, "type", asset_type) + if owner: + self._facets.index(asset_id, "owner", owner) + for tag in (tags or []): + self._facets.index(asset_id, "tag", tag) + + def remove_asset(self, asset_id: str) -> None: + self._index.remove_document(asset_id) + self._assets.pop(asset_id, None) + + def search(self, query: str, asset_type: Optional[str] = None, + owner: Optional[str] = None, tags: Optional[List[str]] = None, + top_k: int = 10) -> SearchResponse: + import time + start = time.perf_counter() + raw_results = self._index.search(query, top_k=top_k * 3) + + # Apply facet filters + filter_sets: List[set] = [] + if asset_type: + filter_sets.append(set(self._facets.filter("type", asset_type))) + if owner: + filter_sets.append(set(self._facets.filter("owner", owner))) + for tag in (tags or []): + filter_sets.append(set(self._facets.filter("tag", tag))) + + results: List[SearchResult] = [] + query_terms = _tokenize(query) + for asset_id, score in raw_results: + if filter_sets and not all(asset_id in fs for fs in filter_sets): + continue + asset = self._assets.get(asset_id, {}) + text = asset.get("description", "") + snippet = _generate_snippet(text, query_terms) if text else "" + matched = [f for f in ["name", "description", "tags"] + if any(t in str(asset.get(f, "")).lower() for t in query_terms)] + results.append(SearchResult( + asset_id=asset_id, + name=asset.get("name", ""), + asset_type=asset.get("type", ""), + score=score, + snippet=snippet, + matched_fields=matched, + )) + if len(results) >= top_k: + break + + suggestions = self._index.suggest(query.split()[-1] if query.split() else "") + elapsed_ms = (time.perf_counter() - start) * 1000 + return SearchResponse( + results=results, + total_hits=len(results), + query=query, + elapsed_ms=elapsed_ms, + suggestions=suggestions, + ) + + def facet_counts(self, facet_field: str) -> Dict[str, int]: + return self._facets.aggregations(facet_field) + + def suggest(self, partial: str) -> List[str]: + return self._index.suggest(partial) diff --git a/dataops/data_ingestion/__init__.py b/dataops/data_ingestion/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataops/data_ingestion/batch_processor.py b/dataops/data_ingestion/batch_processor.py new file mode 100644 index 0000000..b32e662 --- /dev/null +++ b/dataops/data_ingestion/batch_processor.py @@ -0,0 +1,189 @@ +"""Batch data processing with parallel execution and checkpointing.""" +from __future__ import annotations + +import logging +import time +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, Iterable, List, Optional, TypeVar + +logger = logging.getLogger(__name__) +T = TypeVar("T") + + +@dataclass +class BatchConfig: + batch_size: int = 1000 + max_workers: int = 4 + checkpoint_enabled: bool = True + retry_count: int = 2 + fail_fast: bool = False + + +@dataclass +class BatchStats: + job_id: str = "" + total_records: int = 0 + processed_records: int = 0 + failed_records: int = 0 + batches_total: int = 0 + batches_completed: int = 0 + elapsed_seconds: float = 0.0 + records_per_second: float = 0.0 + errors: List[str] = field(default_factory=list) + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + + +@dataclass +class Checkpoint: + job_id: str = "" + last_processed_index: int = 0 + processed_ids: List[str] = field(default_factory=list) + saved_at: datetime = field(default_factory=datetime.utcnow) + + +def _chunked(iterable: Iterable[T], size: int) -> Iterable[List[T]]: + batch: List[T] = [] + for item in iterable: + batch.append(item) + if len(batch) >= size: + yield batch + batch = [] + if batch: + yield batch + + +class CheckpointStore: + """Simple in-memory checkpoint store.""" + + def __init__(self) -> None: + self._checkpoints: Dict[str, Checkpoint] = {} + + def save(self, checkpoint: Checkpoint) -> None: + self._checkpoints[checkpoint.job_id] = checkpoint + + def load(self, job_id: str) -> Optional[Checkpoint]: + return self._checkpoints.get(job_id) + + def delete(self, job_id: str) -> bool: + return bool(self._checkpoints.pop(job_id, None)) + + +class BatchTransformer: + """Applies a chain of transformations to each record in a batch.""" + + def __init__(self) -> None: + self._transforms: List[Callable[[Dict[str, Any]], Dict[str, Any]]] = [] + + def add_transform(self, func: Callable[[Dict[str, Any]], Dict[str, Any]]) -> "BatchTransformer": + self._transforms.append(func) + return self + + def apply(self, record: Dict[str, Any]) -> Dict[str, Any]: + result = dict(record) + for transform in self._transforms: + result = transform(result) + return result + + def apply_batch(self, batch: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + return [self.apply(r) for r in batch] + + +class BatchProcessor: + """ + High-throughput batch processor with configurable transforms, + retry logic, checkpointing, and progress tracking. + """ + + def __init__(self, config: Optional[BatchConfig] = None) -> None: + self._config = config or BatchConfig() + self._transformer = BatchTransformer() + self._checkpoint_store = CheckpointStore() + self._active_jobs: Dict[str, BatchStats] = {} + logger.info("BatchProcessor initialized (batch_size=%d)", self._config.batch_size) + + def add_transform(self, func: Callable) -> "BatchProcessor": + self._transformer.add_transform(func) + return self + + def process(self, records: List[Dict[str, Any]], + output_handler: Optional[Callable[[List[Dict[str, Any]]], None]] = None, + job_id: Optional[str] = None) -> BatchStats: + job_id = job_id or str(uuid.uuid4()) + stats = BatchStats(job_id=job_id, total_records=len(records), + started_at=datetime.utcnow()) + self._active_jobs[job_id] = stats + start = time.perf_counter() + + # Resume from checkpoint if available + checkpoint = self._checkpoint_store.load(job_id) + start_idx = checkpoint.last_processed_index if checkpoint else 0 + records_to_process = records[start_idx:] + + batches = list(_chunked(records_to_process, self._config.batch_size)) + stats.batches_total = len(batches) + + for batch_idx, batch in enumerate(batches): + success = False + for attempt in range(self._config.retry_count): + try: + transformed = self._transformer.apply_batch(batch) + if output_handler: + output_handler(transformed) + stats.processed_records += len(batch) + stats.batches_completed += 1 + success = True + break + except Exception as exc: + logger.warning("Batch %d attempt %d failed: %s", batch_idx, attempt + 1, exc) + if attempt == self._config.retry_count - 1: + stats.failed_records += len(batch) + stats.errors.append(f"Batch {batch_idx}: {exc}") + + # Save checkpoint after each batch + if self._config.checkpoint_enabled: + self._checkpoint_store.save(Checkpoint( + job_id=job_id, + last_processed_index=start_idx + (batch_idx + 1) * self._config.batch_size, + )) + + if not success and self._config.fail_fast: + break + + elapsed = time.perf_counter() - start + stats.elapsed_seconds = elapsed + stats.finished_at = datetime.utcnow() + stats.records_per_second = stats.processed_records / max(elapsed, 0.001) + + if stats.failed_records == 0: + self._checkpoint_store.delete(job_id) + + logger.info("Batch job %s: %d/%d records, %.1f r/s", + job_id[:8], stats.processed_records, stats.total_records, + stats.records_per_second) + return stats + + def process_stream(self, record_iter: Iterable[Dict[str, Any]], + output_handler: Optional[Callable] = None) -> BatchStats: + """Process a streaming iterable in batches.""" + stats = BatchStats(job_id=str(uuid.uuid4()), started_at=datetime.utcnow()) + start = time.perf_counter() + for batch in _chunked(record_iter, self._config.batch_size): + try: + transformed = self._transformer.apply_batch(batch) + if output_handler: + output_handler(transformed) + stats.processed_records += len(batch) + stats.batches_completed += 1 + except Exception as exc: + stats.failed_records += len(batch) + stats.errors.append(str(exc)) + stats.elapsed_seconds = time.perf_counter() - start + stats.finished_at = datetime.utcnow() + stats.records_per_second = stats.processed_records / max(stats.elapsed_seconds, 0.001) + return stats + + def get_job_stats(self, job_id: str) -> Optional[BatchStats]: + return self._active_jobs.get(job_id) diff --git a/dataops/data_ingestion/connectors.py b/dataops/data_ingestion/connectors.py new file mode 100644 index 0000000..5d7b3c2 --- /dev/null +++ b/dataops/data_ingestion/connectors.py @@ -0,0 +1,203 @@ +"""Multi-source data connectors.""" +from __future__ import annotations + +import csv +import io +import json +import logging +import uuid +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, Generator, Iterable, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class ConnectorConfig: + name: str + connector_type: str + connection_params: Dict[str, Any] = field(default_factory=dict) + batch_size: int = 1000 + timeout_seconds: int = 30 + retry_count: int = 3 + + +@dataclass +class DataRecord: + record_id: str = field(default_factory=lambda: str(uuid.uuid4())) + source: str = "" + data: Dict[str, Any] = field(default_factory=dict) + schema: Dict[str, str] = field(default_factory=dict) + ingested_at: datetime = field(default_factory=datetime.utcnow) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class IngestionResult: + records_read: int = 0 + records_failed: int = 0 + source: str = "" + elapsed_seconds: float = 0.0 + errors: List[str] = field(default_factory=list) + started_at: datetime = field(default_factory=datetime.utcnow) + + +class BaseConnector(ABC): + def __init__(self, config: ConnectorConfig) -> None: + self.config = config + self._connected = False + + @abstractmethod + def connect(self) -> bool: ... + + @abstractmethod + def read(self) -> Generator[DataRecord, None, None]: ... + + @abstractmethod + def disconnect(self) -> None: ... + + def __enter__(self) -> "BaseConnector": + self.connect() + return self + + def __exit__(self, *_: Any) -> None: + self.disconnect() + + +class CSVConnector(BaseConnector): + """Reads records from CSV text or file path.""" + + def connect(self) -> bool: + self._connected = True + return True + + def disconnect(self) -> None: + self._connected = False + + def read(self) -> Generator[DataRecord, None, None]: + content = self.config.connection_params.get("content", "") + path = self.config.connection_params.get("path", "") + if path: + try: + with open(path, "r", encoding="utf-8") as f: + content = f.read() + except OSError as e: + logger.error("CSV read error: %s", e) + return + reader = csv.DictReader(io.StringIO(content)) + for row in reader: + yield DataRecord(source=self.config.name, data=dict(row), + schema={k: "str" for k in row}) + + +class JSONConnector(BaseConnector): + """Reads records from JSON array or newline-delimited JSON.""" + + def connect(self) -> bool: + self._connected = True + return True + + def disconnect(self) -> None: + self._connected = False + + def read(self) -> Generator[DataRecord, None, None]: + content = self.config.connection_params.get("content", "[]") + try: + data = json.loads(content) + if isinstance(data, list): + for item in data: + yield DataRecord(source=self.config.name, data=item if isinstance(item, dict) else {"value": item}) + elif isinstance(data, dict): + yield DataRecord(source=self.config.name, data=data) + except json.JSONDecodeError as e: + logger.error("JSON parse error: %s", e) + + +class InMemoryConnector(BaseConnector): + """Reads records from an in-memory list.""" + + def connect(self) -> bool: + self._connected = True + return True + + def disconnect(self) -> None: + self._connected = False + + def read(self) -> Generator[DataRecord, None, None]: + records = self.config.connection_params.get("records", []) + for item in records: + yield DataRecord(source=self.config.name, + data=item if isinstance(item, dict) else {"value": item}) + + +class APIConnector(BaseConnector): + """Simulated REST API connector.""" + + def connect(self) -> bool: + base_url = self.config.connection_params.get("base_url", "") + logger.info("Simulating connection to API: %s", base_url) + self._connected = True + return True + + def disconnect(self) -> None: + self._connected = False + + def read(self) -> Generator[DataRecord, None, None]: + endpoint = self.config.connection_params.get("endpoint", "/data") + mock_data = self.config.connection_params.get("mock_data", [{"id": 1, "value": "sample"}]) + logger.info("Fetching from API endpoint %s", endpoint) + for item in mock_data: + yield DataRecord(source=self.config.name, data=item, + metadata={"endpoint": endpoint}) + + +_CONNECTOR_REGISTRY: Dict[str, type] = { + "csv": CSVConnector, + "json": JSONConnector, + "memory": InMemoryConnector, + "api": APIConnector, +} + + +class ConnectorFactory: + @staticmethod + def create(config: ConnectorConfig) -> BaseConnector: + cls = _CONNECTOR_REGISTRY.get(config.connector_type) + if not cls: + raise ValueError(f"Unknown connector type: {config.connector_type}") + return cls(config) + + @staticmethod + def register(connector_type: str, cls: type) -> None: + _CONNECTOR_REGISTRY[connector_type] = cls + + +class DataIngestionManager: + def __init__(self) -> None: + self._connectors: List[ConnectorConfig] = [] + logger.info("DataIngestionManager initialized") + + def add_source(self, config: ConnectorConfig) -> None: + self._connectors.append(config) + + def ingest_all(self) -> Dict[str, IngestionResult]: + import time + results: Dict[str, IngestionResult] = {} + for cfg in self._connectors: + connector = ConnectorFactory.create(cfg) + result = IngestionResult(source=cfg.name) + start = time.perf_counter() + try: + with connector: + for _ in connector.read(): + result.records_read += 1 + except Exception as exc: + result.errors.append(str(exc)) + result.records_failed += 1 + logger.error("Ingestion failed for '%s': %s", cfg.name, exc) + result.elapsed_seconds = time.perf_counter() - start + results[cfg.name] = result + logger.info("Ingested %d records from '%s'", result.records_read, cfg.name) + return results diff --git a/dataops/data_ingestion/streaming.py b/dataops/data_ingestion/streaming.py new file mode 100644 index 0000000..4056b31 --- /dev/null +++ b/dataops/data_ingestion/streaming.py @@ -0,0 +1,226 @@ +"""Real-time data streaming with pub/sub and windowing.""" +from __future__ import annotations + +import logging +import queue +import threading +import time +import uuid +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class StreamEvent: + event_id: str = field(default_factory=lambda: str(uuid.uuid4())) + topic: str = "" + payload: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + partition: int = 0 + offset: int = 0 + + +@dataclass +class WindowResult: + window_id: str = field(default_factory=lambda: str(uuid.uuid4())) + topic: str = "" + events: List[StreamEvent] = field(default_factory=list) + start_time: datetime = field(default_factory=datetime.utcnow) + end_time: datetime = field(default_factory=datetime.utcnow) + aggregates: Dict[str, Any] = field(default_factory=dict) + + +class EventBus: + """Simple in-process pub/sub event bus.""" + + def __init__(self) -> None: + self._subscribers: Dict[str, List[Callable]] = {} + self._lock = threading.Lock() + + def subscribe(self, topic: str, handler: Callable[[StreamEvent], None]) -> None: + with self._lock: + self._subscribers.setdefault(topic, []).append(handler) + + def publish(self, event: StreamEvent) -> int: + handlers = self._subscribers.get(event.topic, []) + with self._lock: + handlers_copy = list(handlers) + delivered = 0 + for handler in handlers_copy: + try: + handler(event) + delivered += 1 + except Exception as exc: + logger.warning("Handler error on topic '%s': %s", event.topic, exc) + return delivered + + def unsubscribe(self, topic: str, handler: Callable) -> bool: + with self._lock: + subscribers = self._subscribers.get(topic, []) + if handler in subscribers: + subscribers.remove(handler) + return True + return False + + +class TumblingWindow: + """Fixed-size tumbling window aggregator.""" + + def __init__(self, window_size: int = 100, topic: str = "") -> None: + self.window_size = window_size + self.topic = topic + self._buffer: List[StreamEvent] = [] + self._completed_windows: List[WindowResult] = [] + self._lock = threading.Lock() + + def add_event(self, event: StreamEvent) -> Optional[WindowResult]: + with self._lock: + self._buffer.append(event) + if len(self._buffer) >= self.window_size: + return self._flush() + return None + + def _flush(self) -> WindowResult: + events = self._buffer[:] + self._buffer.clear() + result = WindowResult( + topic=self.topic, + events=events, + start_time=events[0].timestamp if events else datetime.utcnow(), + end_time=events[-1].timestamp if events else datetime.utcnow(), + aggregates=self._aggregate(events), + ) + self._completed_windows.append(result) + return result + + def _aggregate(self, events: List[StreamEvent]) -> Dict[str, Any]: + numeric_fields: Dict[str, List[float]] = {} + for event in events: + for k, v in event.payload.items(): + if isinstance(v, (int, float)): + numeric_fields.setdefault(k, []).append(float(v)) + aggs: Dict[str, Any] = {"count": len(events)} + import statistics + for field_name, values in numeric_fields.items(): + aggs[f"{field_name}_mean"] = statistics.mean(values) + aggs[f"{field_name}_sum"] = sum(values) + return aggs + + def force_flush(self) -> Optional[WindowResult]: + with self._lock: + if self._buffer: + return self._flush() + return None + + +class StreamProducer: + """Generates and publishes events to the bus.""" + + def __init__(self, bus: EventBus) -> None: + self._bus = bus + self._offset_counter: Dict[str, int] = {} + + def publish(self, topic: str, payload: Dict[str, Any], + partition: int = 0) -> StreamEvent: + offset = self._offset_counter.get(topic, 0) + event = StreamEvent(topic=topic, payload=payload, + partition=partition, offset=offset) + self._offset_counter[topic] = offset + 1 + self._bus.publish(event) + return event + + def publish_batch(self, topic: str, + payloads: List[Dict[str, Any]]) -> List[StreamEvent]: + return [self.publish(topic, p) for p in payloads] + + +class StreamConsumer: + """Consumes events from a topic with an optional processing queue.""" + + def __init__(self, bus: EventBus, topics: List[str], + max_queue: int = 10000) -> None: + self._bus = bus + self._topics = topics + self._queue: queue.Queue = queue.Queue(maxsize=max_queue) + self._processed: int = 0 + for topic in topics: + bus.subscribe(topic, self._enqueue) + + def _enqueue(self, event: StreamEvent) -> None: + try: + self._queue.put_nowait(event) + except queue.Full: + logger.warning("Consumer queue full; dropping event on '%s'", event.topic) + + def poll(self, timeout: float = 0.1) -> Optional[StreamEvent]: + try: + event = self._queue.get(timeout=timeout) + self._processed += 1 + return event + except queue.Empty: + return None + + def poll_batch(self, max_events: int = 100, + timeout: float = 0.1) -> List[StreamEvent]: + events: List[StreamEvent] = [] + deadline = time.monotonic() + timeout + while len(events) < max_events and time.monotonic() < deadline: + event = self.poll(timeout=max(0, deadline - time.monotonic())) + if event: + events.append(event) + return events + + @property + def queue_depth(self) -> int: + return self._queue.qsize() + + @property + def processed_count(self) -> int: + return self._processed + + +class StreamingPipeline: + """ + End-to-end streaming pipeline with producer, consumer, + windowing, and real-time aggregation. + """ + + def __init__(self, name: str) -> None: + self.name = name + self._bus = EventBus() + self._producer = StreamProducer(self._bus) + self._windows: Dict[str, TumblingWindow] = {} + self._processors: List[Callable[[StreamEvent], None]] = [] + logger.info("StreamingPipeline '%s' initialized", name) + + def add_window(self, topic: str, window_size: int = 100) -> TumblingWindow: + window = TumblingWindow(window_size=window_size, topic=topic) + self._windows[topic] = window + self._bus.subscribe(topic, lambda e: window.add_event(e)) + return window + + def add_processor(self, topic: str, + func: Callable[[StreamEvent], None]) -> None: + self._bus.subscribe(topic, func) + + def produce(self, topic: str, payload: Dict[str, Any]) -> StreamEvent: + return self._producer.publish(topic, payload) + + def produce_batch(self, topic: str, + payloads: List[Dict[str, Any]]) -> List[StreamEvent]: + return self._producer.publish_batch(topic, payloads) + + def create_consumer(self, topics: List[str]) -> StreamConsumer: + return StreamConsumer(self._bus, topics) + + def flush_all_windows(self) -> List[WindowResult]: + results: List[WindowResult] = [] + for window in self._windows.values(): + r = window.force_flush() + if r: + results.append(r) + return results diff --git a/dataops/data_lineage/__init__.py b/dataops/data_lineage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataops/data_lineage/tracker.py b/dataops/data_lineage/tracker.py new file mode 100644 index 0000000..f118979 --- /dev/null +++ b/dataops/data_lineage/tracker.py @@ -0,0 +1,170 @@ +"""Data lineage tracking.""" +from __future__ import annotations + +import json +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Set + +logger = logging.getLogger(__name__) + + +@dataclass +class LineageNode: + node_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + node_type: str = "dataset" # dataset | transformation | model | report | source + location: str = "" + schema: Dict[str, str] = field(default_factory=dict) + properties: Dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class LineageEdge: + edge_id: str = field(default_factory=lambda: str(uuid.uuid4())) + source_id: str = "" + target_id: str = "" + operation: str = "derived_from" # derived_from | transformed_by | used_by | written_to + transformation_logic: str = "" + properties: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class LineagePath: + path_id: str = field(default_factory=lambda: str(uuid.uuid4())) + nodes: List[LineageNode] = field(default_factory=list) + edges: List[LineageEdge] = field(default_factory=list) + depth: int = 0 + + +@dataclass +class ImpactAnalysis: + source_node: str = "" + affected_nodes: List[str] = field(default_factory=list) + affected_count: int = 0 + paths: List[LineagePath] = field(default_factory=list) + + +class LineageTracker: + """ + Tracks data lineage across transformations, pipelines, and models. + Supports upstream/downstream impact analysis and lineage queries. + """ + + def __init__(self) -> None: + self._nodes: Dict[str, LineageNode] = {} + self._edges: Dict[str, LineageEdge] = {} + self._adj: Dict[str, List[str]] = {} # source -> [edge_ids] + self._rev_adj: Dict[str, List[str]] = {} # target -> [edge_ids] + logger.info("LineageTracker initialized") + + def register_node(self, name: str, node_type: str = "dataset", + location: str = "", schema: Optional[Dict[str, str]] = None, + properties: Optional[Dict[str, Any]] = None) -> LineageNode: + # Check for existing node with same name + existing = next((n for n in self._nodes.values() if n.name == name), None) + if existing: + return existing + node = LineageNode(name=name, node_type=node_type, location=location, + schema=schema or {}, properties=properties or {}) + self._nodes[node.node_id] = node + self._adj[node.node_id] = [] + self._rev_adj[node.node_id] = [] + logger.debug("Registered lineage node: %s (%s)", name, node_type) + return node + + def add_lineage(self, source_name: str, target_name: str, + operation: str = "derived_from", + transformation_logic: str = "", + properties: Optional[Dict[str, Any]] = None) -> LineageEdge: + source = self.get_or_create_node(source_name) + target = self.get_or_create_node(target_name) + edge = LineageEdge(source_id=source.node_id, target_id=target.node_id, + operation=operation, transformation_logic=transformation_logic, + properties=properties or {}) + self._edges[edge.edge_id] = edge + self._adj[source.node_id].append(edge.edge_id) + self._rev_adj[target.node_id].append(edge.edge_id) + logger.debug("Lineage: %s -[%s]-> %s", source_name, operation, target_name) + return edge + + def get_or_create_node(self, name: str) -> LineageNode: + existing = next((n for n in self._nodes.values() if n.name == name), None) + return existing if existing else self.register_node(name) + + def get_upstream(self, node_name: str, depth: int = 10) -> LineagePath: + node = next((n for n in self._nodes.values() if n.name == node_name), None) + if not node: + return LineagePath() + return self._traverse(node.node_id, direction="upstream", max_depth=depth) + + def get_downstream(self, node_name: str, depth: int = 10) -> LineagePath: + node = next((n for n in self._nodes.values() if n.name == node_name), None) + if not node: + return LineagePath() + return self._traverse(node.node_id, direction="downstream", max_depth=depth) + + def _traverse(self, start_id: str, direction: str, max_depth: int) -> LineagePath: + from collections import deque + visited: Set[str] = set() + queue = deque([(start_id, 0)]) + result_nodes: List[LineageNode] = [] + result_edges: List[LineageEdge] = [] + max_d = 0 + + while queue: + node_id, depth = queue.popleft() + if node_id in visited or depth > max_depth: + continue + visited.add(node_id) + node = self._nodes.get(node_id) + if node: + result_nodes.append(node) + max_d = max(max_d, depth) + + if direction == "upstream": + edge_ids = self._rev_adj.get(node_id, []) + next_fn = lambda e: e.source_id + else: + edge_ids = self._adj.get(node_id, []) + next_fn = lambda e: e.target_id + + for eid in edge_ids: + edge = self._edges.get(eid) + if edge: + result_edges.append(edge) + next_node = next_fn(edge) + if next_node not in visited: + queue.append((next_node, depth + 1)) + + return LineagePath(nodes=result_nodes, edges=result_edges, depth=max_d) + + def impact_analysis(self, node_name: str) -> ImpactAnalysis: + downstream = self.get_downstream(node_name) + affected = [n.name for n in downstream.nodes if n.name != node_name] + return ImpactAnalysis( + source_node=node_name, + affected_nodes=affected, + affected_count=len(affected), + paths=[downstream], + ) + + def list_nodes(self, node_type: Optional[str] = None) -> List[LineageNode]: + if node_type: + return [n for n in self._nodes.values() if n.node_type == node_type] + return list(self._nodes.values()) + + def to_dict(self) -> Dict[str, Any]: + return { + "nodes": [{"id": n.node_id, "name": n.name, "type": n.node_type} for n in self._nodes.values()], + "edges": [{"id": e.edge_id, "source": e.source_id, "target": e.target_id, + "operation": e.operation} for e in self._edges.values()], + } + + @property + def stats(self) -> Dict[str, Any]: + return {"nodes": len(self._nodes), "edges": len(self._edges)} diff --git a/dataops/data_lineage/visualizer.py b/dataops/data_lineage/visualizer.py new file mode 100644 index 0000000..6741aa1 --- /dev/null +++ b/dataops/data_lineage/visualizer.py @@ -0,0 +1,170 @@ +"""Lineage graph visualization (ASCII + structured formats).""" +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class VisualizationOutput: + format: str = "ascii" + content: str = "" + metadata: Dict[str, Any] = field(default_factory=dict) + + +def _indent(level: int) -> str: + return " " * level + + +class ASCIIRenderer: + """Renders lineage as an ASCII tree.""" + + def render_tree(self, root_name: str, + adjacency: Dict[str, List[str]], + max_depth: int = 5) -> str: + lines: List[str] = [] + visited: set = set() + + def _recurse(node: str, depth: int) -> None: + if depth > max_depth or node in visited: + return + visited.add(node) + prefix = _indent(depth) + ("└── " if depth > 0 else "") + lines.append(f"{prefix}{node}") + for child in adjacency.get(node, []): + _recurse(child, depth + 1) + + _recurse(root_name, 0) + return "\n".join(lines) + + def render_dag(self, nodes: List[Dict[str, Any]], + edges: List[Dict[str, Any]]) -> str: + id_to_name = {n["id"]: n.get("name", n["id"][:8]) for n in nodes} + lines = ["=== Data Lineage DAG ===", ""] + + adjacency: Dict[str, List[str]] = {} + for edge in edges: + src = id_to_name.get(edge.get("source", ""), "?") + tgt = id_to_name.get(edge.get("target", ""), "?") + op = edge.get("operation", "->") + lines.append(f" {src} --[{op}]--> {tgt}") + adjacency.setdefault(src, []).append(tgt) + + lines.append("") + roots = {id_to_name[n["id"]] for n in nodes + if not any(id_to_name.get(e.get("target")) == id_to_name[n["id"]] + for e in edges)} + if roots: + lines.append("=== Tree View (from roots) ===") + for root in sorted(roots): + lines.append(self.render_tree(root, adjacency)) + return "\n".join(lines) + + +class MermaidRenderer: + """Renders lineage as Mermaid flowchart syntax.""" + + def render(self, nodes: List[Dict[str, Any]], + edges: List[Dict[str, Any]]) -> str: + id_to_name = {n["id"]: n.get("name", n["id"][:8]).replace(" ", "_") for n in nodes} + lines = ["graph LR"] + node_types = {n["id"]: n.get("type", "dataset") for n in nodes} + + for node in nodes: + nid = node["id"] + name = id_to_name[nid] + ntype = node_types[nid] + if ntype in ("transformation", "model"): + lines.append(f" {nid}[/{name}/]") + elif ntype == "source": + lines.append(f" {nid}[('{name}')]") + else: + lines.append(f" {nid}[{name}]") + + for edge in edges: + src = edge.get("source", "") + tgt = edge.get("target", "") + op = edge.get("operation", "-->") + lines.append(f" {src} -->|{op}| {tgt}") + + return "\n".join(lines) + + +class DOTRenderer: + """Renders lineage as GraphViz DOT format.""" + + def render(self, nodes: List[Dict[str, Any]], + edges: List[Dict[str, Any]]) -> str: + id_to_name = {n["id"]: n.get("name", n["id"][:8]) for n in nodes} + lines = ["digraph lineage {", ' rankdir=LR;', ' node [shape=box];'] + node_shapes = {"transformation": "ellipse", "model": "diamond", + "source": "cylinder", "dataset": "box"} + for node in nodes: + nid = node["id"] + label = id_to_name[nid] + shape = node_shapes.get(node.get("type", "dataset"), "box") + lines.append(f' "{nid}" [label="{label}", shape={shape}];') + for edge in edges: + src = edge.get("source", "") + tgt = edge.get("target", "") + op = edge.get("operation", "") + lines.append(f' "{src}" -> "{tgt}" [label="{op}"];') + lines.append("}") + return "\n".join(lines) + + +class LineageVisualizer: + """ + Multi-format lineage visualizer supporting ASCII, Mermaid, DOT, and JSON. + """ + + def __init__(self) -> None: + self._ascii = ASCIIRenderer() + self._mermaid = MermaidRenderer() + self._dot = DOTRenderer() + logger.info("LineageVisualizer initialized") + + def visualize(self, lineage_dict: Dict[str, Any], + fmt: str = "ascii") -> VisualizationOutput: + nodes = lineage_dict.get("nodes", []) + edges = lineage_dict.get("edges", []) + + if fmt == "ascii": + content = self._ascii.render_dag(nodes, edges) + elif fmt == "mermaid": + content = self._mermaid.render(nodes, edges) + elif fmt == "dot": + content = self._dot.render(nodes, edges) + elif fmt == "json": + content = json.dumps(lineage_dict, indent=2, default=str) + else: + content = f"Unsupported format: {fmt}" + + return VisualizationOutput( + format=fmt, + content=content, + metadata={"node_count": len(nodes), "edge_count": len(edges)}, + ) + + def upstream_tree(self, node_name: str, + lineage_dict: Dict[str, Any]) -> str: + nodes = lineage_dict.get("nodes", []) + edges = lineage_dict.get("edges", []) + id_to_name = {n["id"]: n.get("name") for n in nodes} + name_to_id = {n.get("name"): n["id"] for n in nodes} + + target_id = name_to_id.get(node_name) + if not target_id: + return f"Node '{node_name}' not found" + + adjacency: Dict[str, List[str]] = {} + for edge in edges: + tgt = id_to_name.get(edge.get("target", ""), "?") + src = id_to_name.get(edge.get("source", ""), "?") + adjacency.setdefault(tgt, []).append(src) + + return self._ascii.render_tree(node_name, adjacency) diff --git a/dataops/data_quality/__init__.py b/dataops/data_quality/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dataops/data_quality/cleansing.py b/dataops/data_quality/cleansing.py new file mode 100644 index 0000000..73321ad --- /dev/null +++ b/dataops/data_quality/cleansing.py @@ -0,0 +1,236 @@ +"""Data cleaning and normalization.""" +from __future__ import annotations + +import logging +import re +import statistics +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class CleansingResult: + job_id: str = field(default_factory=lambda: str(uuid.uuid4())) + records_in: int = 0 + records_out: int = 0 + records_dropped: int = 0 + fields_modified: Dict[str, int] = field(default_factory=dict) + operations_applied: List[str] = field(default_factory=list) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +def _strip_whitespace(value: Any) -> Any: + return value.strip() if isinstance(value, str) else value + + +def _to_lowercase(value: Any) -> Any: + return value.lower() if isinstance(value, str) else value + + +def _remove_special_chars(value: Any, keep: str = r"a-zA-Z0-9 ._-") -> Any: + if isinstance(value, str): + return re.sub(f"[^{keep}]", "", value) + return value + + +def _normalize_whitespace(value: Any) -> Any: + if isinstance(value, str): + return re.sub(r"\s+", " ", value).strip() + return value + + +def _coerce_numeric(value: Any) -> Any: + if isinstance(value, str): + try: + return float(value.replace(",", "")) + except ValueError: + return value + return value + + +def _truncate(value: Any, max_len: int = 255) -> Any: + return value[:max_len] if isinstance(value, str) and len(value) > max_len else value + + +class ImputationStrategy: + """Handles missing value imputation.""" + + def __init__(self, strategy: str = "mean", fill_value: Any = None) -> None: + self.strategy = strategy + self.fill_value = fill_value + self._computed_fills: Dict[str, Any] = {} + + def fit(self, records: List[Dict[str, Any]], fields: List[str]) -> None: + for field_name in fields: + values = [r[field_name] for r in records + if r.get(field_name) is not None and isinstance(r[field_name], (int, float))] + if not values: + continue + if self.strategy == "mean": + self._computed_fills[field_name] = statistics.mean(values) + elif self.strategy == "median": + self._computed_fills[field_name] = statistics.median(values) + elif self.strategy == "mode": + from collections import Counter + self._computed_fills[field_name] = Counter(values).most_common(1)[0][0] + elif self.strategy == "constant": + self._computed_fills[field_name] = self.fill_value + + def impute(self, record: Dict[str, Any]) -> Dict[str, Any]: + result = dict(record) + for field_name, fill in self._computed_fills.items(): + if result.get(field_name) is None: + result[field_name] = fill + return result + + +class OutlierHandler: + """Detects and handles numeric outliers using IQR method.""" + + def __init__(self, method: str = "clip", iqr_multiplier: float = 1.5) -> None: + self.method = method # "clip" | "remove" | "impute" + self.iqr_multiplier = iqr_multiplier + self._bounds: Dict[str, tuple] = {} + + def fit(self, records: List[Dict[str, Any]], fields: List[str]) -> None: + for field_name in fields: + values = sorted(r[field_name] for r in records + if isinstance(r.get(field_name), (int, float))) + if len(values) < 4: + continue + q1 = values[len(values) // 4] + q3 = values[3 * len(values) // 4] + iqr = q3 - q1 + self._bounds[field_name] = (q1 - self.iqr_multiplier * iqr, + q3 + self.iqr_multiplier * iqr) + + def handle(self, record: Dict[str, Any]) -> Optional[Dict[str, Any]]: + result = dict(record) + for field_name, (low, high) in self._bounds.items(): + val = result.get(field_name) + if not isinstance(val, (int, float)): + continue + if val < low or val > high: + if self.method == "clip": + result[field_name] = max(low, min(high, val)) + elif self.method == "remove": + return None + elif self.method == "impute": + result[field_name] = (low + high) / 2 + return result + + +class DataCleanser: + """ + Comprehensive data cleaning pipeline supporting field-level transforms, + imputation, outlier handling, deduplication, and custom rules. + """ + + def __init__(self) -> None: + self._field_transforms: Dict[str, List[Callable]] = {} + self._drop_conditions: List[Callable[[Dict[str, Any]], bool]] = [] + self._imputer: Optional[ImputationStrategy] = None + self._outlier_handler: Optional[OutlierHandler] = None + self._dedup_key: Optional[str] = None + logger.info("DataCleanser initialized") + + def add_transform(self, field: str, func: Callable) -> "DataCleanser": + self._field_transforms.setdefault(field, []).append(func) + return self + + def strip_whitespace(self, *fields: str) -> "DataCleanser": + for f in fields: + self.add_transform(f, _strip_whitespace) + return self + + def lowercase(self, *fields: str) -> "DataCleanser": + for f in fields: + self.add_transform(f, _to_lowercase) + return self + + def coerce_numeric(self, *fields: str) -> "DataCleanser": + for f in fields: + self.add_transform(f, _coerce_numeric) + return self + + def truncate(self, field: str, max_len: int = 255) -> "DataCleanser": + return self.add_transform(field, lambda v: _truncate(v, max_len)) + + def drop_if(self, condition: Callable[[Dict[str, Any]], bool]) -> "DataCleanser": + self._drop_conditions.append(condition) + return self + + def drop_nulls(self, *fields: str) -> "DataCleanser": + for f in fields: + self._drop_conditions.append(lambda r, field=f: r.get(field) is None) + return self + + def enable_imputation(self, strategy: str = "mean", + fill_value: Any = None) -> "DataCleanser": + self._imputer = ImputationStrategy(strategy, fill_value) + return self + + def enable_outlier_handling(self, method: str = "clip") -> "DataCleanser": + self._outlier_handler = OutlierHandler(method) + return self + + def deduplicate(self, key_field: str) -> "DataCleanser": + self._dedup_key = key_field + return self + + def fit(self, records: List[Dict[str, Any]]) -> "DataCleanser": + numeric_fields = [f for f in (records[0].keys() if records else []) + if any(isinstance(r.get(f), (int, float)) for r in records[:10])] + if self._imputer: + self._imputer.fit(records, numeric_fields) + if self._outlier_handler: + self._outlier_handler.fit(records, numeric_fields) + return self + + def clean_record(self, record: Dict[str, Any]) -> Optional[Dict[str, Any]]: + result = dict(record) + for condition in self._drop_conditions: + if condition(result): + return None + for field_name, transforms in self._field_transforms.items(): + if field_name in result: + for func in transforms: + result[field_name] = func(result[field_name]) + if self._imputer: + result = self._imputer.impute(result) + if self._outlier_handler: + result = self._outlier_handler.handle(result) + if result is None: + return None + return result + + def clean(self, records: List[Dict[str, Any]]) -> tuple: + self.fit(records) + cleaned: List[Dict[str, Any]] = [] + seen_keys: set = set() + dropped = 0 + for record in records: + result = self.clean_record(record) + if result is None: + dropped += 1 + continue + if self._dedup_key: + key = result.get(self._dedup_key) + if key in seen_keys: + dropped += 1 + continue + if key is not None: + seen_keys.add(key) + cleaned.append(result) + result_obj = CleansingResult( + records_in=len(records), + records_out=len(cleaned), + records_dropped=dropped, + operations_applied=list(self._field_transforms.keys()), + ) + logger.info("Cleansed: %d in, %d out, %d dropped", len(records), len(cleaned), dropped) + return cleaned, result_obj diff --git a/dataops/data_quality/profiler.py b/dataops/data_quality/profiler.py new file mode 100644 index 0000000..aeba314 --- /dev/null +++ b/dataops/data_quality/profiler.py @@ -0,0 +1,232 @@ +"""Data profiling - statistical summaries of datasets.""" +from __future__ import annotations + +import logging +import math +import re +import statistics +import uuid +from collections import Counter +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class FieldProfile: + field_name: str = "" + dtype: str = "unknown" + count: int = 0 + null_count: int = 0 + null_rate: float = 0.0 + unique_count: int = 0 + unique_rate: float = 0.0 + # Numeric stats + mean: Optional[float] = None + std: Optional[float] = None + min_val: Optional[float] = None + max_val: Optional[float] = None + median: Optional[float] = None + p25: Optional[float] = None + p75: Optional[float] = None + skewness: Optional[float] = None + # String stats + min_length: Optional[int] = None + max_length: Optional[int] = None + avg_length: Optional[float] = None + patterns: List[str] = field(default_factory=list) + # Top values + top_values: List[Tuple[Any, int]] = field(default_factory=list) + + +@dataclass +class DatasetProfile: + profile_id: str = field(default_factory=lambda: str(uuid.uuid4())) + dataset_name: str = "" + record_count: int = 0 + field_count: int = 0 + field_profiles: Dict[str, FieldProfile] = field(default_factory=dict) + overall_null_rate: float = 0.0 + duplicate_rate: float = 0.0 + quality_score: float = 0.0 + generated_at: datetime = field(default_factory=datetime.utcnow) + + +def _percentile(sorted_values: List[float], p: float) -> float: + if not sorted_values: + return 0.0 + idx = max(0, int(len(sorted_values) * p) - 1) + return sorted_values[idx] + + +def _skewness(values: List[float]) -> float: + n = len(values) + if n < 3: + return 0.0 + mean = statistics.mean(values) + std = statistics.stdev(values) + if std == 0: + return 0.0 + return sum(((v - mean) / std) ** 3 for v in values) * n / ((n - 1) * (n - 2)) + + +def _infer_dtype(values: List[Any]) -> str: + non_null = [v for v in values if v is not None] + if not non_null: + return "null" + sample = non_null[:20] + if all(isinstance(v, bool) for v in sample): + return "boolean" + if all(isinstance(v, int) for v in sample): + return "integer" + if all(isinstance(v, float) for v in sample): + return "float" + if all(isinstance(v, (int, float)) for v in sample): + return "numeric" + if all(isinstance(v, str) for v in sample): + # Check if numeric strings + try: + [float(v) for v in sample[:5]] + return "numeric_string" + except (ValueError, TypeError): + pass + return "string" + return "mixed" + + +def _detect_patterns(values: List[str]) -> List[str]: + patterns: List[str] = [] + sample = [v for v in values if isinstance(v, str)][:100] + if not sample: + return patterns + + email_like = sum(1 for v in sample if re.match(r".+@.+\..+", v)) + if email_like / len(sample) > 0.5: + patterns.append("email") + + url_like = sum(1 for v in sample if v.startswith(("http://", "https://"))) + if url_like / len(sample) > 0.3: + patterns.append("url") + + numeric_like = sum(1 for v in sample if re.match(r"^-?\d+\.?\d*$", v.strip())) + if numeric_like / len(sample) > 0.7: + patterns.append("numeric_string") + + date_like = sum(1 for v in sample if re.match(r"\d{4}-\d{2}-\d{2}", v)) + if date_like / len(sample) > 0.5: + patterns.append("date_iso") + + return patterns + + +class FieldProfiler: + """Profiles a single field across all records.""" + + def profile(self, field_name: str, values: List[Any]) -> FieldProfile: + profile = FieldProfile(field_name=field_name) + profile.count = len(values) + null_values = [v for v in values if v is None or v == ""] + profile.null_count = len(null_values) + profile.null_rate = profile.null_count / max(profile.count, 1) + + non_null = [v for v in values if v is not None and v != ""] + profile.unique_count = len(set(str(v) for v in non_null)) + profile.unique_rate = profile.unique_count / max(len(non_null), 1) + profile.dtype = _infer_dtype(values) + + # Numeric stats + numeric_values: List[float] = [] + for v in non_null: + try: + numeric_values.append(float(v)) + except (TypeError, ValueError): + pass + + if numeric_values: + sorted_nums = sorted(numeric_values) + profile.mean = statistics.mean(numeric_values) + profile.std = statistics.stdev(numeric_values) if len(numeric_values) > 1 else 0.0 + profile.min_val = sorted_nums[0] + profile.max_val = sorted_nums[-1] + profile.median = statistics.median(numeric_values) + profile.p25 = _percentile(sorted_nums, 0.25) + profile.p75 = _percentile(sorted_nums, 0.75) + profile.skewness = _skewness(numeric_values) + + # String stats + str_values = [str(v) for v in non_null if isinstance(v, str)] + if str_values: + lengths = [len(v) for v in str_values] + profile.min_length = min(lengths) + profile.max_length = max(lengths) + profile.avg_length = sum(lengths) / len(lengths) + profile.patterns = _detect_patterns(str_values) + + # Top values + counts = Counter(str(v) for v in non_null) + profile.top_values = counts.most_common(10) + return profile + + +class DataProfiler: + """ + Generates comprehensive statistical profiles of datasets + including field-level statistics, quality scoring, and anomaly hints. + """ + + def __init__(self) -> None: + self._field_profiler = FieldProfiler() + logger.info("DataProfiler initialized") + + def profile(self, records: List[Dict[str, Any]], + dataset_name: str = "dataset") -> DatasetProfile: + if not records: + return DatasetProfile(dataset_name=dataset_name) + + fields = list(records[0].keys()) + dp = DatasetProfile( + dataset_name=dataset_name, + record_count=len(records), + field_count=len(fields), + ) + + for field_name in fields: + values = [r.get(field_name) for r in records] + fp = self._field_profiler.profile(field_name, values) + dp.field_profiles[field_name] = fp + + # Overall stats + total_cells = len(records) * len(fields) + null_cells = sum(fp.null_count for fp in dp.field_profiles.values()) + dp.overall_null_rate = null_cells / max(total_cells, 1) + + seen = set() + dups = 0 + for r in records: + key = str(sorted(r.items())) + if key in seen: + dups += 1 + seen.add(key) + dp.duplicate_rate = dups / max(len(records), 1) + + dp.quality_score = max(0.0, 1.0 - dp.overall_null_rate - dp.duplicate_rate * 0.5) + logger.info("Profiled '%s': %d records, %d fields, quality=%.2f", + dataset_name, len(records), len(fields), dp.quality_score) + return dp + + def summarize(self, profile: DatasetProfile) -> str: + lines = [ + f"Dataset: {profile.dataset_name}", + f"Records: {profile.record_count}, Fields: {profile.field_count}", + f"Null rate: {profile.overall_null_rate:.1%}, Duplicate rate: {profile.duplicate_rate:.1%}", + f"Quality score: {profile.quality_score:.2f}", + "\nField summaries:", + ] + for name, fp in profile.field_profiles.items(): + line = f" {name}: dtype={fp.dtype}, nulls={fp.null_rate:.1%}, unique={fp.unique_rate:.1%}" + if fp.mean is not None: + line += f", mean={fp.mean:.2f}, std={fp.std:.2f}" + lines.append(line) + return "\n".join(lines) diff --git a/dataops/data_quality/validators.py b/dataops/data_quality/validators.py new file mode 100644 index 0000000..791c5e6 --- /dev/null +++ b/dataops/data_quality/validators.py @@ -0,0 +1,206 @@ +"""Data validation rules and schema enforcement.""" +from __future__ import annotations + +import logging +import re +import uuid +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class ValidationError: + field: str + rule: str + message: str + value: Any = None + + +@dataclass +class ValidationResult: + result_id: str = field(default_factory=lambda: str(uuid.uuid4())) + record: Dict[str, Any] = field(default_factory=dict) + is_valid: bool = True + errors: List[ValidationError] = field(default_factory=list) + warnings: List[ValidationError] = field(default_factory=list) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class ValidationReport: + total_records: int = 0 + valid_records: int = 0 + invalid_records: int = 0 + error_counts: Dict[str, int] = field(default_factory=dict) + sample_errors: List[ValidationError] = field(default_factory=list) + pass_rate: float = 0.0 + generated_at: datetime = field(default_factory=datetime.utcnow) + + +class Rule(ABC): + def __init__(self, name: str, field: str, is_warning: bool = False) -> None: + self.name = name + self.field = field + self.is_warning = is_warning + + @abstractmethod + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: ... + + +class NotNullRule(Rule): + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: + val = record.get(self.field) + if val is None or val == "": + return ValidationError(self.field, self.name, f"'{self.field}' must not be null/empty", val) + return None + + +class TypeRule(Rule): + def __init__(self, name: str, field: str, expected_type: type, **kwargs: Any) -> None: + super().__init__(name, field, **kwargs) + self.expected_type = expected_type + + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: + val = record.get(self.field) + if val is not None and not isinstance(val, self.expected_type): + try: + self.expected_type(val) + except (ValueError, TypeError): + return ValidationError(self.field, self.name, + f"'{self.field}' expected {self.expected_type.__name__}, got {type(val).__name__}", val) + return None + + +class RangeRule(Rule): + def __init__(self, name: str, field: str, min_val: Optional[float] = None, + max_val: Optional[float] = None, **kwargs: Any) -> None: + super().__init__(name, field, **kwargs) + self.min_val = min_val + self.max_val = max_val + + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: + val = record.get(self.field) + if val is None: + return None + try: + num = float(val) + except (TypeError, ValueError): + return None + if self.min_val is not None and num < self.min_val: + return ValidationError(self.field, self.name, + f"'{self.field}' value {num} < min {self.min_val}", val) + if self.max_val is not None and num > self.max_val: + return ValidationError(self.field, self.name, + f"'{self.field}' value {num} > max {self.max_val}", val) + return None + + +class RegexRule(Rule): + def __init__(self, name: str, field: str, pattern: str, **kwargs: Any) -> None: + super().__init__(name, field, **kwargs) + self._pattern = re.compile(pattern) + + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: + val = record.get(self.field) + if val is not None and not self._pattern.match(str(val)): + return ValidationError(self.field, self.name, + f"'{self.field}' does not match pattern {self._pattern.pattern}", val) + return None + + +class UniqueRule(Rule): + def __init__(self, name: str, field: str, **kwargs: Any) -> None: + super().__init__(name, field, **kwargs) + self._seen: set = set() + + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: + val = record.get(self.field) + if val is not None: + if val in self._seen: + return ValidationError(self.field, self.name, + f"Duplicate value for '{self.field}': {val}", val) + self._seen.add(val) + return None + + def reset(self) -> None: + self._seen.clear() + + +class CustomRule(Rule): + def __init__(self, name: str, field: str, func: Callable[[Any], bool], + message: str = "", **kwargs: Any) -> None: + super().__init__(name, field, **kwargs) + self._func = func + self._message = message or f"Custom rule '{name}' failed" + + def validate(self, record: Dict[str, Any]) -> Optional[ValidationError]: + val = record.get(self.field) + try: + if not self._func(val): + return ValidationError(self.field, self.name, self._message, val) + except Exception as exc: + return ValidationError(self.field, self.name, f"Rule error: {exc}", val) + return None + + +class DataValidator: + """Validates records against a set of rules and produces reports.""" + + def __init__(self) -> None: + self._rules: List[Rule] = [] + logger.info("DataValidator initialized") + + def add_rule(self, rule: Rule) -> "DataValidator": + self._rules.append(rule) + return self + + def not_null(self, field: str) -> "DataValidator": + return self.add_rule(NotNullRule(f"{field}_not_null", field)) + + def type_check(self, field: str, expected_type: type) -> "DataValidator": + return self.add_rule(TypeRule(f"{field}_type", field, expected_type)) + + def range_check(self, field: str, min_val: Optional[float] = None, + max_val: Optional[float] = None) -> "DataValidator": + return self.add_rule(RangeRule(f"{field}_range", field, min_val, max_val)) + + def regex_check(self, field: str, pattern: str) -> "DataValidator": + return self.add_rule(RegexRule(f"{field}_pattern", field, pattern)) + + def unique(self, field: str) -> "DataValidator": + return self.add_rule(UniqueRule(f"{field}_unique", field)) + + def validate(self, record: Dict[str, Any]) -> ValidationResult: + result = ValidationResult(record=record) + for rule in self._rules: + error = rule.validate(record) + if error: + if rule.is_warning: + result.warnings.append(error) + else: + result.errors.append(error) + result.is_valid = len(result.errors) == 0 + return result + + def validate_batch(self, records: List[Dict[str, Any]]) -> ValidationReport: + report = ValidationReport(total_records=len(records)) + sample_errors: List[ValidationError] = [] + for record in records: + result = self.validate(record) + if result.is_valid: + report.valid_records += 1 + else: + report.invalid_records += 1 + for err in result.errors: + report.error_counts[err.rule] = report.error_counts.get(err.rule, 0) + 1 + if len(sample_errors) < 10: + sample_errors.append(err) + report.sample_errors = sample_errors + report.pass_rate = report.valid_records / max(report.total_records, 1) + logger.info("Validation: %d/%d records passed (%.1f%%)", + report.valid_records, report.total_records, report.pass_rate * 100) + return report diff --git a/devops/__init__.py b/devops/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/devops/cicd/__init__.py b/devops/cicd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/devops/cicd/deployment.py b/devops/cicd/deployment.py new file mode 100644 index 0000000..3d8f980 --- /dev/null +++ b/devops/cicd/deployment.py @@ -0,0 +1,186 @@ +"""Deployment automation for services and models.""" +from __future__ import annotations + +import logging +import time +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class DeploymentTarget: + name: str = "" + environment: str = "development" # development | staging | production + region: str = "us-east-1" + cluster: str = "" + namespace: str = "default" + replicas: int = 1 + resource_limits: Dict[str, str] = field(default_factory=dict) + annotations: Dict[str, str] = field(default_factory=dict) + + +@dataclass +class DeploymentSpec: + spec_id: str = field(default_factory=lambda: str(uuid.uuid4())) + service_name: str = "" + image: str = "" + tag: str = "latest" + target: DeploymentTarget = field(default_factory=DeploymentTarget) + strategy: str = "rolling" # rolling | blue_green | canary + health_check_path: str = "/health" + rollback_on_failure: bool = True + config: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class DeploymentResult: + deployment_id: str = field(default_factory=lambda: str(uuid.uuid4())) + service_name: str = "" + environment: str = "" + status: str = "pending" # pending | running | success | failed | rolled_back + strategy: str = "" + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + elapsed_seconds: float = 0.0 + events: List[str] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + +class RollingDeployment: + """Simulates a rolling update deployment strategy.""" + + def execute(self, spec: DeploymentSpec) -> List[str]: + events = [] + replicas = spec.target.replicas + for i in range(1, replicas + 1): + events.append(f"Updating replica {i}/{replicas} with {spec.image}:{spec.tag}") + time.sleep(0.01) # Simulate rollout time + events.append(f"Replica {i} health check passed") + events.append("Rolling update complete") + return events + + +class BlueGreenDeployment: + """Simulates a blue/green deployment strategy.""" + + def execute(self, spec: DeploymentSpec) -> List[str]: + return [ + f"Creating green environment for {spec.service_name}", + f"Deploying {spec.image}:{spec.tag} to green", + "Running smoke tests on green environment", + "Smoke tests passed - switching traffic to green", + "Blue environment standing by for rollback", + "Blue/green deployment complete", + ] + + +class CanaryDeployment: + """Simulates canary deployment with traffic shifting.""" + + def execute(self, spec: DeploymentSpec) -> List[str]: + events = [] + for pct in [5, 25, 50, 100]: + events.append(f"Shifting {pct}% traffic to canary") + events.append(f"Monitoring error rate at {pct}% (0.1% - healthy)") + time.sleep(0.01) + events.append("Canary promotion complete - 100% traffic on new version") + return events + + +class HealthChecker: + """Simulates health check verification post-deployment.""" + + def check(self, spec: DeploymentSpec) -> tuple: + """Returns (healthy: bool, message: str)""" + path = spec.health_check_path + service = spec.service_name + logger.debug("Health checking %s at %s", service, path) + # Simulate health check + return True, f"Service {service} at {path} returned 200 OK" + + +class Deployer: + """ + Multi-strategy deployment automation with rollback, health checking, + and deployment history tracking. + """ + + def __init__(self) -> None: + self._strategies = { + "rolling": RollingDeployment(), + "blue_green": BlueGreenDeployment(), + "canary": CanaryDeployment(), + } + self._health_checker = HealthChecker() + self._history: List[DeploymentResult] = [] + self._rollback_handlers: Dict[str, Callable] = {} + logger.info("Deployer initialized") + + def register_rollback(self, service_name: str, handler: Callable) -> None: + self._rollback_handlers[service_name] = handler + + def deploy(self, spec: DeploymentSpec) -> DeploymentResult: + result = DeploymentResult( + service_name=spec.service_name, + environment=spec.target.environment, + strategy=spec.strategy, + started_at=datetime.utcnow(), + ) + result.status = "running" + start = time.perf_counter() + logger.info("Deploying %s:%s to %s [%s]", + spec.service_name, spec.tag, spec.target.environment, spec.strategy) + + strategy = self._strategies.get(spec.strategy, self._strategies["rolling"]) + try: + events = strategy.execute(spec) + result.events.extend(events) + + healthy, msg = self._health_checker.check(spec) + result.events.append(f"Health check: {msg}") + + if healthy: + result.status = "success" + logger.info("Deployment of %s succeeded", spec.service_name) + else: + if spec.rollback_on_failure: + result.events.append("Health check failed - initiating rollback") + self._rollback(spec, result) + else: + result.status = "failed" + except Exception as exc: + result.status = "failed" + result.events.append(f"Deployment error: {exc}") + logger.error("Deployment failed: %s", exc) + + result.elapsed_seconds = time.perf_counter() - start + result.finished_at = datetime.utcnow() + self._history.append(result) + return result + + def _rollback(self, spec: DeploymentSpec, result: DeploymentResult) -> None: + handler = self._rollback_handlers.get(spec.service_name) + if handler: + try: + handler(spec) + result.status = "rolled_back" + result.events.append("Rollback completed successfully") + except Exception as exc: + result.status = "failed" + result.events.append(f"Rollback failed: {exc}") + else: + result.status = "rolled_back" + result.events.append("Simulated rollback to previous version") + + def get_history(self, service_name: Optional[str] = None) -> List[DeploymentResult]: + if service_name: + return [r for r in self._history if r.service_name == service_name] + return list(self._history) + + def last_deployment(self, service_name: str) -> Optional[DeploymentResult]: + history = self.get_history(service_name) + return history[-1] if history else None diff --git a/devops/cicd/pipeline_generator.py b/devops/cicd/pipeline_generator.py new file mode 100644 index 0000000..4f1eccf --- /dev/null +++ b/devops/cicd/pipeline_generator.py @@ -0,0 +1,161 @@ +"""Dynamic CI/CD pipeline generation.""" +from __future__ import annotations + +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class PipelineStage: + name: str + steps: List[Dict[str, Any]] = field(default_factory=list) + depends_on: List[str] = field(default_factory=list) + env: Dict[str, str] = field(default_factory=dict) + allow_failure: bool = False + + +@dataclass +class CICDPipeline: + pipeline_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + trigger: str = "push" # push | pr | schedule | manual + stages: List[PipelineStage] = field(default_factory=list) + global_env: Dict[str, str] = field(default_factory=dict) + artifacts: List[str] = field(default_factory=list) + format: str = "yaml" + created_at: datetime = field(default_factory=datetime.utcnow) + + +class PipelineTemplates: + """Pre-built pipeline templates for common scenarios.""" + + @staticmethod + def python_service() -> List[PipelineStage]: + return [ + PipelineStage("install", [{"run": "pip install -r requirements.txt"}]), + PipelineStage("lint", [{"run": "flake8 . --max-line-length=120"}], depends_on=["install"]), + PipelineStage("test", [{"run": "pytest tests/ -v --cov"}], depends_on=["install"]), + PipelineStage("build", [{"run": "docker build -t $IMAGE_NAME:$TAG ."}], depends_on=["test"]), + PipelineStage("push", [{"run": "docker push $IMAGE_NAME:$TAG"}], depends_on=["build"]), + PipelineStage("deploy", [{"run": "kubectl apply -f k8s/"}], depends_on=["push"]), + ] + + @staticmethod + def ml_training() -> List[PipelineStage]: + return [ + PipelineStage("data_validation", [{"run": "python scripts/validate_data.py"}]), + PipelineStage("feature_engineering", [{"run": "python scripts/feature_eng.py"}], depends_on=["data_validation"]), + PipelineStage("train", [{"run": "python scripts/train.py"}], depends_on=["feature_engineering"]), + PipelineStage("evaluate", [{"run": "python scripts/evaluate.py"}], depends_on=["train"]), + PipelineStage("register", [{"run": "python scripts/register_model.py"}], depends_on=["evaluate"]), + PipelineStage("deploy_model", [{"run": "python scripts/deploy_model.py"}], depends_on=["register"]), + ] + + @staticmethod + def data_pipeline() -> List[PipelineStage]: + return [ + PipelineStage("ingest", [{"run": "python -m dataops.ingest"}]), + PipelineStage("validate", [{"run": "python -m dataops.validate"}], depends_on=["ingest"]), + PipelineStage("transform", [{"run": "python -m dataops.transform"}], depends_on=["validate"]), + PipelineStage("load", [{"run": "python -m dataops.load"}], depends_on=["transform"]), + ] + + +def _render_yaml_stage(stage: PipelineStage) -> str: + lines = [f" {stage.name}:"] + if stage.depends_on: + lines.append(f" needs: [{', '.join(stage.depends_on)}]") + if stage.env: + lines.append(" env:") + for k, v in stage.env.items(): + lines.append(f" {k}: {v}") + lines.append(" steps:") + for step in stage.steps: + if "run" in step: + lines.append(f" - run: {step['run']}") + elif "uses" in step: + lines.append(f" - uses: {step['uses']}") + return "\n".join(lines) + + +def _render_github_actions(pipeline: CICDPipeline) -> str: + lines = [f"name: {pipeline.name}", "on:", f" {pipeline.trigger}:", " branches: ['*']", ""] + if pipeline.global_env: + lines.append("env:") + for k, v in pipeline.global_env.items(): + lines.append(f" {k}: {v}") + lines.append("") + lines.append("jobs:") + for stage in pipeline.stages: + lines.append(_render_yaml_stage(stage)) + lines.append("") + return "\n".join(lines) + + +def _render_gitlab_ci(pipeline: CICDPipeline) -> str: + stage_names = [s.name for s in pipeline.stages] + lines = [f"stages:", *[f" - {n}" for n in stage_names], ""] + for stage in pipeline.stages: + lines.append(f"{stage.name}:") + lines.append(f" stage: {stage.name}") + if stage.depends_on: + lines.append(f" needs: {stage.depends_on}") + lines.append(" script:") + for step in stage.steps: + if "run" in step: + lines.append(f" - {step['run']}") + lines.append("") + return "\n".join(lines) + + +class PipelineGenerator: + """ + Dynamic CI/CD pipeline generator supporting GitHub Actions, GitLab CI, + and Jenkins output formats with template-based and custom pipeline construction. + """ + + def __init__(self) -> None: + self._templates = PipelineTemplates() + logger.info("PipelineGenerator initialized") + + def create(self, name: str, template: str = "python_service", + trigger: str = "push", + global_env: Optional[Dict[str, str]] = None) -> CICDPipeline: + template_map = { + "python_service": self._templates.python_service, + "ml_training": self._templates.ml_training, + "data_pipeline": self._templates.data_pipeline, + } + stages = template_map.get(template, self._templates.python_service)() + pipeline = CICDPipeline(name=name, trigger=trigger, stages=stages, + global_env=global_env or {}) + logger.info("Created pipeline '%s' (%d stages)", name, len(stages)) + return pipeline + + def add_stage(self, pipeline: CICDPipeline, stage: PipelineStage) -> None: + pipeline.stages.append(stage) + + def render(self, pipeline: CICDPipeline, format: str = "github_actions") -> str: + if format == "github_actions": + return _render_github_actions(pipeline) + elif format == "gitlab_ci": + return _render_gitlab_ci(pipeline) + else: + return _render_github_actions(pipeline) + + def validate(self, pipeline: CICDPipeline) -> List[str]: + """Validate pipeline for dependency cycles and missing stages.""" + errors: List[str] = [] + stage_names = {s.name for s in pipeline.stages} + for stage in pipeline.stages: + for dep in stage.depends_on: + if dep not in stage_names: + errors.append(f"Stage '{stage.name}' depends on unknown stage '{dep}'") + if not pipeline.stages: + errors.append("Pipeline has no stages") + return errors diff --git a/devops/cicd/test_runner.py b/devops/cicd/test_runner.py new file mode 100644 index 0000000..edf10bd --- /dev/null +++ b/devops/cicd/test_runner.py @@ -0,0 +1,155 @@ +"""Automated testing framework.""" +from __future__ import annotations + +import logging +import time +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class TestCase: + test_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + test_type: str = "unit" # unit | integration | e2e | smoke | performance + func: Optional[Callable] = None + timeout_seconds: int = 60 + tags: List[str] = field(default_factory=list) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class TestResult: + test_id: str = "" + name: str = "" + status: str = "pending" # passed | failed | error | skipped | timeout + elapsed_ms: float = 0.0 + error_message: str = "" + stdout: str = "" + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + + +@dataclass +class TestSuiteResult: + suite_id: str = field(default_factory=lambda: str(uuid.uuid4())) + suite_name: str = "" + total: int = 0 + passed: int = 0 + failed: int = 0 + errors: int = 0 + skipped: int = 0 + results: List[TestResult] = field(default_factory=list) + elapsed_seconds: float = 0.0 + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + + @property + def pass_rate(self) -> float: + return self.passed / max(self.total, 1) + + @property + def success(self) -> bool: + return self.failed == 0 and self.errors == 0 + + +class TestRunner: + """ + Automated test runner with test registration, execution, + parallel grouping, and rich reporting. + """ + + def __init__(self) -> None: + self._suites: Dict[str, List[TestCase]] = {} + self._global_fixtures: Dict[str, Any] = {} + logger.info("TestRunner initialized") + + def register(self, suite: str, test_case: TestCase) -> None: + self._suites.setdefault(suite, []).append(test_case) + + def test(self, suite: str, name: str, test_type: str = "unit", + tags: Optional[List[str]] = None) -> Callable: + def decorator(func: Callable) -> Callable: + tc = TestCase(name=name, test_type=test_type, func=func, tags=tags or []) + self.register(suite, tc) + return func + return decorator + + def add_fixture(self, name: str, value: Any) -> None: + self._global_fixtures[name] = value + + def _run_test(self, tc: TestCase) -> TestResult: + result = TestResult(test_id=tc.test_id, name=tc.name, started_at=datetime.utcnow()) + start = time.perf_counter() + if tc.func is None: + result.status = "skipped" + else: + try: + import signal + tc.func(**{k: v for k, v in self._global_fixtures.items() + if k in (tc.func.__code__.co_varnames if tc.func else [])}) + result.status = "passed" + except AssertionError as e: + result.status = "failed" + result.error_message = str(e) + except Exception as e: + result.status = "error" + result.error_message = f"{type(e).__name__}: {e}" + result.elapsed_ms = (time.perf_counter() - start) * 1000 + result.finished_at = datetime.utcnow() + return result + + def run_suite(self, suite_name: str, + tags: Optional[List[str]] = None, + test_type: Optional[str] = None) -> TestSuiteResult: + tests = self._suites.get(suite_name, []) + if tags: + tests = [t for t in tests if any(tag in t.tags for tag in tags)] + if test_type: + tests = [t for t in tests if t.test_type == test_type] + + suite_result = TestSuiteResult(suite_name=suite_name, + total=len(tests), + started_at=datetime.utcnow()) + start = time.perf_counter() + for tc in tests: + result = self._run_test(tc) + suite_result.results.append(result) + if result.status == "passed": + suite_result.passed += 1 + elif result.status == "failed": + suite_result.failed += 1 + elif result.status == "error": + suite_result.errors += 1 + elif result.status == "skipped": + suite_result.skipped += 1 + logger.debug("[%s] %s: %s (%.1fms)", suite_name, tc.name, result.status, result.elapsed_ms) + + suite_result.elapsed_seconds = time.perf_counter() - start + suite_result.finished_at = datetime.utcnow() + logger.info("Suite '%s': %d/%d passed (%.1fs)", + suite_name, suite_result.passed, suite_result.total, suite_result.elapsed_seconds) + return suite_result + + def run_all(self) -> Dict[str, TestSuiteResult]: + return {suite: self.run_suite(suite) for suite in self._suites} + + def report(self, result: TestSuiteResult) -> str: + lines = [ + f"Test Suite: {result.suite_name}", + f"Total: {result.total} | Passed: {result.passed} | Failed: {result.failed} | " + f"Errors: {result.errors} | Skipped: {result.skipped}", + f"Pass rate: {result.pass_rate:.1%} | Duration: {result.elapsed_seconds:.2f}s", + "", + ] + for r in result.results: + status_icon = {"passed": "✓", "failed": "✗", "error": "!", "skipped": "-"}.get(r.status, "?") + line = f" {status_icon} {r.name} ({r.elapsed_ms:.1f}ms)" + if r.error_message: + line += f"\n {r.error_message}" + lines.append(line) + return "\n".join(lines) diff --git a/devops/iac/__init__.py b/devops/iac/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/devops/iac/ansible_adapter.py b/devops/iac/ansible_adapter.py new file mode 100644 index 0000000..11e5e9c --- /dev/null +++ b/devops/iac/ansible_adapter.py @@ -0,0 +1,126 @@ +"""Ansible playbook generation and execution adapter.""" +from __future__ import annotations +import logging +import uuid +import yaml as _yaml_module +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +@dataclass +class AnsibleTask: + name: str = "" + module: str = "" + args: Dict[str, Any] = field(default_factory=dict) + when: Optional[str] = None + register: Optional[str] = None + notify: Optional[str] = None + ignore_errors: bool = False + +@dataclass +class AnsiblePlay: + name: str = "" + hosts: str = "all" + become: bool = False + vars: Dict[str, Any] = field(default_factory=dict) + tasks: List[AnsibleTask] = field(default_factory=list) + handlers: List[Dict[str, Any]] = field(default_factory=list) + +@dataclass +class PlaybookResult: + playbook_id: str = field(default_factory=lambda: str(uuid.uuid4())) + status: str = "success" + hosts_ok: int = 0 + hosts_changed: int = 0 + hosts_failed: int = 0 + tasks_executed: int = 0 + output: str = "" + executed_at: datetime = field(default_factory=datetime.utcnow) + +def _task_to_dict(task: AnsibleTask) -> Dict[str, Any]: + d: Dict[str, Any] = {"name": task.name, task.module: task.args} + if task.when: + d["when"] = task.when + if task.register: + d["register"] = task.register + if task.notify: + d["notify"] = task.notify + if task.ignore_errors: + d["ignore_errors"] = True + return d + +class AnsibleAdapter: + def __init__(self) -> None: + self._plays: List[AnsiblePlay] = [] + self._inventory: Dict[str, List[str]] = {"all": ["localhost"]} + logger.info("AnsibleAdapter initialized") + + def add_play(self, play: AnsiblePlay) -> "AnsibleAdapter": + self._plays.append(play) + return self + + def add_host(self, group: str, host: str) -> None: + self._inventory.setdefault(group, []).append(host) + + def install_packages(self, hosts: str, packages: List[str], + state: str = "present") -> "AnsibleAdapter": + play = AnsiblePlay(name=f"Install packages on {hosts}", hosts=hosts, become=True) + play.tasks.append(AnsibleTask("Install required packages", "apt", + {"name": packages, "state": state, "update_cache": True})) + return self.add_play(play) + + def copy_file(self, hosts: str, src: str, dest: str, mode: str = "0644") -> "AnsibleAdapter": + play = AnsiblePlay(name=f"Copy {src} to {hosts}", hosts=hosts, become=True) + play.tasks.append(AnsibleTask(f"Copy {src}", "copy", {"src": src, "dest": dest, "mode": mode})) + return self.add_play(play) + + def run_command(self, hosts: str, command: str) -> "AnsibleAdapter": + play = AnsiblePlay(name=f"Run command on {hosts}", hosts=hosts) + play.tasks.append(AnsibleTask(f"Execute: {command[:40]}", "command", {"cmd": command})) + return self.add_play(play) + + def deploy_service(self, hosts: str, service_name: str, image: str) -> "AnsibleAdapter": + play = AnsiblePlay(name=f"Deploy {service_name}", hosts=hosts, become=True, + vars={"service_name": service_name, "image": image}) + play.tasks.extend([ + AnsibleTask("Pull Docker image", "community.docker.docker_image", + {"name": "{{image}}", "source": "pull"}), + AnsibleTask("Run container", "community.docker.docker_container", + {"name": "{{service_name}}", "image": "{{image}}", "state": "started"}), + ]) + return self.add_play(play) + + def generate_playbook(self) -> str: + playbook = [] + for play in self._plays: + play_dict: Dict[str, Any] = { + "name": play.name, "hosts": play.hosts, "become": play.become, + } + if play.vars: + play_dict["vars"] = play.vars + if play.tasks: + play_dict["tasks"] = [_task_to_dict(t) for t in play.tasks] + if play.handlers: + play_dict["handlers"] = play.handlers + playbook.append(play_dict) + try: + import yaml + return yaml.dump(playbook, default_flow_style=False, sort_keys=False) + except ImportError: + import json + return json.dumps(playbook, indent=2, default=str) + + def simulate_run(self, hosts: Optional[List[str]] = None) -> PlaybookResult: + hosts = hosts or self._inventory.get("all", ["localhost"]) + tasks_count = sum(len(p.tasks) for p in self._plays) + result = PlaybookResult( + hosts_ok=len(hosts), + hosts_changed=min(len(hosts), tasks_count), + tasks_executed=tasks_count * len(hosts), + output=f"Playbook executed: {len(self._plays)} plays, {tasks_count} tasks on {len(hosts)} hosts", + ) + logger.info("Ansible simulation: %d plays, %d tasks, %d hosts", + len(self._plays), tasks_count, len(hosts)) + return result diff --git a/devops/iac/cloudformation.py b/devops/iac/cloudformation.py new file mode 100644 index 0000000..8d69955 --- /dev/null +++ b/devops/iac/cloudformation.py @@ -0,0 +1,161 @@ +"""AWS CloudFormation infrastructure adapter.""" +from __future__ import annotations +import json +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +@dataclass +class CFResource: + logical_id: str = "" + resource_type: str = "" + properties: Dict[str, Any] = field(default_factory=dict) + depends_on: List[str] = field(default_factory=list) + deletion_policy: str = "Delete" + metadata: Dict[str, Any] = field(default_factory=dict) + +@dataclass +class CFTemplate: + template_id: str = field(default_factory=lambda: str(uuid.uuid4())) + stack_name: str = "" + description: str = "" + parameters: Dict[str, Dict[str, Any]] = field(default_factory=dict) + resources: Dict[str, CFResource] = field(default_factory=dict) + outputs: Dict[str, Dict[str, Any]] = field(default_factory=dict) + conditions: Dict[str, Any] = field(default_factory=dict) + +@dataclass +class StackEvent: + event_id: str = field(default_factory=lambda: str(uuid.uuid4())) + logical_id: str = "" + status: str = "" + reason: str = "" + timestamp: datetime = field(default_factory=datetime.utcnow) + +@dataclass +class StackResult: + stack_id: str = field(default_factory=lambda: str(uuid.uuid4())) + stack_name: str = "" + status: str = "CREATE_COMPLETE" + events: List[StackEvent] = field(default_factory=list) + outputs: Dict[str, str] = field(default_factory=dict) + errors: List[str] = field(default_factory=list) + created_at: datetime = field(default_factory=datetime.utcnow) + +class CloudFormation: + """CloudFormation template builder and deployment simulator.""" + + def __init__(self) -> None: + self._stacks: Dict[str, StackResult] = {} + logger.info("CloudFormation adapter initialized") + + def template(self, stack_name: str, description: str = "") -> CFTemplate: + return CFTemplate(stack_name=stack_name, description=description) + + def add_parameter(self, template: CFTemplate, name: str, param_type: str = "String", + default: Any = None, description: str = "") -> None: + p: Dict[str, Any] = {"Type": param_type} + if default is not None: + p["Default"] = default + if description: + p["Description"] = description + template.parameters[name] = p + + def add_resource(self, template: CFTemplate, logical_id: str, + resource_type: str, properties: Dict[str, Any], + depends_on: Optional[List[str]] = None) -> CFResource: + res = CFResource(logical_id=logical_id, resource_type=resource_type, + properties=properties, depends_on=depends_on or []) + template.resources[logical_id] = res + return res + + def add_output(self, template: CFTemplate, name: str, value: Any, + description: str = "", export_name: Optional[str] = None) -> None: + out: Dict[str, Any] = {"Value": value} + if description: + out["Description"] = description + if export_name: + out["Export"] = {"Name": export_name} + template.outputs[name] = out + + # --- High-level resource helpers --- + + def ec2_instance(self, template: CFTemplate, logical_id: str, + instance_type: str = "t3.medium", + image_id: str = "ami-0abcdef1234567890") -> CFResource: + return self.add_resource(template, logical_id, "AWS::EC2::Instance", { + "InstanceType": instance_type, "ImageId": image_id, + "Tags": [{"Key": "Name", "Value": logical_id}], + }) + + def s3_bucket(self, template: CFTemplate, logical_id: str, + versioning: bool = True) -> CFResource: + props: Dict[str, Any] = {} + if versioning: + props["VersioningConfiguration"] = {"Status": "Enabled"} + return self.add_resource(template, logical_id, "AWS::S3::Bucket", props) + + def rds_instance(self, template: CFTemplate, logical_id: str, + engine: str = "postgres", db_class: str = "db.t3.micro") -> CFResource: + return self.add_resource(template, logical_id, "AWS::RDS::DBInstance", { + "DBInstanceClass": db_class, "Engine": engine, + "AllocatedStorage": "20", "MasterUsername": "admin", + "MasterUserPassword": {"Ref": "DBPassword"}, + }) + + def lambda_function(self, template: CFTemplate, logical_id: str, + handler: str, runtime: str = "python3.11", + memory_mb: int = 256, timeout: int = 30) -> CFResource: + return self.add_resource(template, logical_id, "AWS::Lambda::Function", { + "Handler": handler, "Runtime": runtime, + "MemorySize": memory_mb, "Timeout": timeout, + "Role": {"Fn::GetAtt": ["LambdaRole", "Arn"]}, + "Code": {"ZipFile": "def handler(event, context): return {}"}, + }) + + def render_json(self, template: CFTemplate) -> str: + doc: Dict[str, Any] = { + "AWSTemplateFormatVersion": "2010-09-09", + "Description": template.description, + } + if template.parameters: + doc["Parameters"] = template.parameters + if template.conditions: + doc["Conditions"] = template.conditions + doc["Resources"] = {} + for lid, res in template.resources.items(): + r: Dict[str, Any] = {"Type": res.resource_type, "Properties": res.properties} + if res.depends_on: + r["DependsOn"] = res.depends_on + if res.deletion_policy != "Delete": + r["DeletionPolicy"] = res.deletion_policy + doc["Resources"][lid] = r + if template.outputs: + doc["Outputs"] = template.outputs + return json.dumps(doc, indent=2, default=str) + + def deploy(self, template: CFTemplate) -> StackResult: + events = [ + StackEvent(logical_id=template.stack_name, status="CREATE_IN_PROGRESS"), + *[StackEvent(logical_id=lid, status="CREATE_COMPLETE") + for lid in template.resources], + StackEvent(logical_id=template.stack_name, status="CREATE_COMPLETE"), + ] + outputs = { + name: str(out.get("Value", "")) for name, out in template.outputs.items() + } + result = StackResult(stack_name=template.stack_name, + events=events, outputs=outputs) + self._stacks[template.stack_name] = result + logger.info("Stack '%s' deployed: %d resources", template.stack_name, len(template.resources)) + return result + + def describe_stack(self, stack_name: str) -> Optional[StackResult]: + return self._stacks.get(stack_name) + + def delete_stack(self, stack_name: str) -> bool: + return bool(self._stacks.pop(stack_name, None)) diff --git a/devops/iac/terraform_adapter.py b/devops/iac/terraform_adapter.py new file mode 100644 index 0000000..d8e6fab --- /dev/null +++ b/devops/iac/terraform_adapter.py @@ -0,0 +1,123 @@ +"""Terraform infrastructure-as-code integration.""" +from __future__ import annotations +import json +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +@dataclass +class TerraformResource: + resource_type: str = "" + resource_name: str = "" + provider: str = "aws" + config: Dict[str, Any] = field(default_factory=dict) + depends_on: List[str] = field(default_factory=list) + +@dataclass +class TerraformPlan: + plan_id: str = field(default_factory=lambda: str(uuid.uuid4())) + resources_to_add: List[TerraformResource] = field(default_factory=list) + resources_to_change: List[TerraformResource] = field(default_factory=list) + resources_to_destroy: List[TerraformResource] = field(default_factory=list) + estimated_cost: Optional[float] = None + created_at: datetime = field(default_factory=datetime.utcnow) + +@dataclass +class TerraformApplyResult: + apply_id: str = field(default_factory=lambda: str(uuid.uuid4())) + status: str = "success" + resources_added: int = 0 + resources_changed: int = 0 + resources_destroyed: int = 0 + outputs: Dict[str, Any] = field(default_factory=dict) + errors: List[str] = field(default_factory=list) + applied_at: datetime = field(default_factory=datetime.utcnow) + +def _hcl_value(v: Any) -> str: + if isinstance(v, bool): + return "true" if v else "false" + if isinstance(v, (int, float)): + return str(v) + if isinstance(v, list): + return "[" + ", ".join(f'"{x}"' if isinstance(x, str) else str(x) for x in v) + "]" + if isinstance(v, dict): + inner = "\n ".join(f'{k} = {_hcl_value(val)}' for k, val in v.items()) + return "{\n " + inner + "\n }" + return f'"{v}"' + +class TerraformAdapter: + def __init__(self) -> None: + self._resources: List[TerraformResource] = [] + self._state: Dict[str, Any] = {} + self._outputs: Dict[str, str] = {} + logger.info("TerraformAdapter initialized") + + def add_resource(self, resource: TerraformResource) -> "TerraformAdapter": + self._resources.append(resource) + return self + + def aws_instance(self, name: str, instance_type: str = "t3.medium", + ami: str = "ami-0abcdef1234567890", **kwargs: Any) -> "TerraformAdapter": + return self.add_resource(TerraformResource("aws_instance", name, "aws", + {"instance_type": instance_type, "ami": ami, **kwargs})) + + def aws_s3_bucket(self, name: str, bucket_name: str, **kwargs: Any) -> "TerraformAdapter": + return self.add_resource(TerraformResource("aws_s3_bucket", name, "aws", + {"bucket": bucket_name, "acl": "private", **kwargs})) + + def aws_rds_instance(self, name: str, engine: str = "postgres", + instance_class: str = "db.t3.micro", **kwargs: Any) -> "TerraformAdapter": + return self.add_resource(TerraformResource("aws_db_instance", name, "aws", + {"engine": engine, "instance_class": instance_class, "allocated_storage": 20, **kwargs})) + + def generate_hcl(self) -> str: + lines = ['terraform {\n required_providers {\n aws = { source = "hashicorp/aws" }\n }\n}\n'] + for res in self._resources: + lines.append(f'resource "{res.resource_type}" "{res.resource_name}" {{') + for k, v in res.config.items(): + lines.append(f" {k} = {_hcl_value(v)}") + if res.depends_on: + lines.append(f" depends_on = [{', '.join(res.depends_on)}]") + lines.append("}\n") + for name, expr in self._outputs.items(): + lines.append(f'output "{name}" {{\n value = {expr}\n}}\n') + return "\n".join(lines) + + def add_output(self, name: str, value_expr: str) -> "TerraformAdapter": + self._outputs[name] = value_expr + return self + + def plan(self) -> TerraformPlan: + to_add = [r for r in self._resources if r.resource_name not in self._state] + to_change = [r for r in self._resources if r.resource_name in self._state] + plan = TerraformPlan(resources_to_add=to_add, resources_to_change=to_change) + logger.info("Terraform plan: +%d ~%d", len(to_add), len(to_change)) + return plan + + def apply(self, plan: Optional[TerraformPlan] = None) -> TerraformApplyResult: + if plan is None: + plan = self.plan() + for res in plan.resources_to_add: + self._state[res.resource_name] = {"type": res.resource_type, "config": res.config} + result = TerraformApplyResult( + resources_added=len(plan.resources_to_add), + resources_changed=len(plan.resources_to_change), + resources_destroyed=len(plan.resources_to_destroy), + outputs={name: f"[computed:{expr}]" for name, expr in self._outputs.items()}, + ) + logger.info("Terraform apply: +%d ~%d", result.resources_added, result.resources_changed) + return result + + def destroy(self, resource_name: str) -> bool: + if resource_name in self._state: + del self._state[resource_name] + return True + return False + + @property + def state_resources(self) -> List[str]: + return list(self._state.keys()) diff --git a/devops/observability/__init__.py b/devops/observability/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/devops/observability/logging.py b/devops/observability/logging.py new file mode 100644 index 0000000..5931613 --- /dev/null +++ b/devops/observability/logging.py @@ -0,0 +1,175 @@ +"""Centralized structured logging system.""" +from __future__ import annotations +import json +import logging +import sys +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +_BASE_LOGGER = logging.getLogger(__name__) + +@dataclass +class LogRecord: + log_id: str = field(default_factory=lambda: str(uuid.uuid4())) + level: str = "INFO" + message: str = "" + service: str = "" + trace_id: Optional[str] = None + span_id: Optional[str] = None + fields: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + +@dataclass +class LogSink: + name: str = "" + level: str = "INFO" + format: str = "json" # json | text + destination: str = "stdout" + +class JSONFormatter(logging.Formatter): + def __init__(self, service: str = "platform", extra_fields: Optional[Dict[str, Any]] = None) -> None: + super().__init__() + self.service = service + self.extra_fields = extra_fields or {} + + def format(self, record: logging.LogRecord) -> str: + log_dict = { + "timestamp": datetime.utcnow().isoformat() + "Z", + "level": record.levelname, + "service": self.service, + "message": record.getMessage(), + "logger": record.name, + "module": record.module, + "function": record.funcName, + "line": record.lineno, + **self.extra_fields, + } + if hasattr(record, "trace_id"): + log_dict["trace_id"] = record.trace_id + if hasattr(record, "fields"): + log_dict.update(record.fields) + if record.exc_info: + log_dict["exception"] = self.formatException(record.exc_info) + return json.dumps(log_dict) + + +class InMemoryLogSink: + """Stores log records in memory for querying.""" + + def __init__(self, max_records: int = 10000) -> None: + from collections import deque + self._records: "deque[LogRecord]" = __import__("collections").deque(maxlen=max_records) + + def write(self, record: LogRecord) -> None: + self._records.append(record) + + def query(self, level: Optional[str] = None, service: Optional[str] = None, + trace_id: Optional[str] = None, limit: int = 100) -> List[LogRecord]: + records = list(self._records) + if level: + records = [r for r in records if r.level == level.upper()] + if service: + records = [r for r in records if r.service == service] + if trace_id: + records = [r for r in records if r.trace_id == trace_id] + return records[-limit:] + + def search(self, query: str, limit: int = 50) -> List[LogRecord]: + q = query.lower() + return [r for r in self._records if q in r.message.lower()][-limit:] + + @property + def count(self) -> int: + return len(self._records) + + +class StructuredLogger: + """Structured logger with context propagation and multiple sinks.""" + + def __init__(self, service: str, level: str = "INFO") -> None: + self.service = service + self._log = logging.getLogger(service) + self._log.setLevel(getattr(logging, level.upper(), logging.INFO)) + self._in_memory = InMemoryLogSink() + self._context: Dict[str, Any] = {} + self._processors: List[Callable[[LogRecord], LogRecord]] = [] + + def add_json_handler(self, stream: Any = None) -> None: + handler = logging.StreamHandler(stream or sys.stdout) + handler.setFormatter(JSONFormatter(service=self.service)) + self._log.addHandler(handler) + + def set_context(self, **kwargs: Any) -> None: + self._context.update(kwargs) + + def clear_context(self) -> None: + self._context.clear() + + def add_processor(self, processor: Callable[[LogRecord], LogRecord]) -> None: + self._processors.append(processor) + + def _emit(self, level: str, message: str, trace_id: Optional[str] = None, + **fields: Any) -> LogRecord: + record = LogRecord( + level=level.upper(), + message=message, + service=self.service, + trace_id=trace_id or self._context.get("trace_id"), + fields={**self._context, **fields}, + ) + for processor in self._processors: + record = processor(record) + self._in_memory.write(record) + log_fn = getattr(self._log, level.lower(), self._log.info) + extra = {"trace_id": record.trace_id, "fields": record.fields} + log_fn(message, extra=extra) + return record + + def debug(self, msg: str, **fields: Any) -> LogRecord: + return self._emit("DEBUG", msg, **fields) + + def info(self, msg: str, **fields: Any) -> LogRecord: + return self._emit("INFO", msg, **fields) + + def warning(self, msg: str, **fields: Any) -> LogRecord: + return self._emit("WARNING", msg, **fields) + + def error(self, msg: str, **fields: Any) -> LogRecord: + return self._emit("ERROR", msg, **fields) + + def critical(self, msg: str, **fields: Any) -> LogRecord: + return self._emit("CRITICAL", msg, **fields) + + def query(self, level: Optional[str] = None, limit: int = 100) -> List[LogRecord]: + return self._in_memory.query(level=level, service=self.service, limit=limit) + + def error_rate(self, window: int = 100) -> float: + records = list(self._in_memory._records)[-window:] + if not records: + return 0.0 + errors = sum(1 for r in records if r.level in ("ERROR", "CRITICAL")) + return errors / len(records) + + +class CentralizedLogManager: + """Manages multiple service loggers with aggregated querying.""" + + def __init__(self) -> None: + self._loggers: Dict[str, StructuredLogger] = {} + _BASE_LOGGER.info("CentralizedLogManager initialized") + + def get_logger(self, service: str, level: str = "INFO") -> StructuredLogger: + if service not in self._loggers: + self._loggers[service] = StructuredLogger(service, level) + return self._loggers[service] + + def aggregate_query(self, level: Optional[str] = None, limit: int = 100) -> List[LogRecord]: + all_records: List[LogRecord] = [] + for logger in self._loggers.values(): + all_records.extend(logger.query(level=level, limit=limit)) + return sorted(all_records, key=lambda r: r.timestamp)[-limit:] + + def list_services(self) -> List[str]: + return list(self._loggers.keys()) diff --git a/devops/observability/metrics.py b/devops/observability/metrics.py new file mode 100644 index 0000000..d60f98a --- /dev/null +++ b/devops/observability/metrics.py @@ -0,0 +1,208 @@ +"""Metrics collection and aggregation system.""" +from __future__ import annotations +import logging +import math +import statistics +import time +import uuid +from collections import defaultdict, deque +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class MetricPoint: + name: str = "" + value: float = 0.0 + labels: Dict[str, str] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class MetricSummary: + name: str = "" + count: int = 0 + mean: float = 0.0 + std: float = 0.0 + min_val: float = 0.0 + max_val: float = 0.0 + p50: float = 0.0 + p95: float = 0.0 + p99: float = 0.0 + rate: float = 0.0 # per second + computed_at: datetime = field(default_factory=datetime.utcnow) + + +def _percentile(sorted_values: List[float], p: float) -> float: + if not sorted_values: + return 0.0 + idx = max(0, int(len(sorted_values) * p) - 1) + return sorted_values[idx] + + +class Counter: + def __init__(self, name: str, labels: Optional[Dict[str, str]] = None) -> None: + self.name = name + self.labels = labels or {} + self._value: float = 0.0 + self._created_at: float = time.monotonic() + + def inc(self, amount: float = 1.0) -> None: + self._value += amount + + @property + def value(self) -> float: + return self._value + + @property + def rate(self) -> float: + elapsed = time.monotonic() - self._created_at + return self._value / max(elapsed, 0.001) + + +class Gauge: + def __init__(self, name: str, labels: Optional[Dict[str, str]] = None) -> None: + self.name = name + self.labels = labels or {} + self._value: float = 0.0 + + def set(self, value: float) -> None: + self._value = value + + def inc(self, amount: float = 1.0) -> None: + self._value += amount + + def dec(self, amount: float = 1.0) -> None: + self._value -= amount + + @property + def value(self) -> float: + return self._value + + +class Histogram: + def __init__(self, name: str, buckets: Optional[List[float]] = None, + labels: Optional[Dict[str, str]] = None) -> None: + self.name = name + self.labels = labels or {} + self.buckets = sorted(buckets or [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]) + self._observations: deque = deque(maxlen=10000) + self._sum: float = 0.0 + self._count: int = 0 + self._bucket_counts: Dict[float, int] = {b: 0 for b in self.buckets} + self._bucket_counts[math.inf] = 0 + + def observe(self, value: float) -> None: + self._observations.append(value) + self._sum += value + self._count += 1 + for b in self.buckets: + if value <= b: + self._bucket_counts[b] += 1 + self._bucket_counts[math.inf] += 1 + + def summary(self) -> MetricSummary: + obs = sorted(self._observations) + if not obs: + return MetricSummary(name=self.name) + return MetricSummary( + name=self.name, + count=len(obs), + mean=statistics.mean(obs), + std=statistics.stdev(obs) if len(obs) > 1 else 0.0, + min_val=obs[0], + max_val=obs[-1], + p50=_percentile(obs, 0.5), + p95=_percentile(obs, 0.95), + p99=_percentile(obs, 0.99), + ) + + +class Timer: + """Context manager for measuring elapsed time.""" + + def __init__(self, histogram: Histogram) -> None: + self._histogram = histogram + self._start: Optional[float] = None + + def __enter__(self) -> "Timer": + self._start = time.perf_counter() + return self + + def __exit__(self, *_: Any) -> None: + if self._start is not None: + elapsed = time.perf_counter() - self._start + self._histogram.observe(elapsed) + + +class MetricsRegistry: + """ + Central metrics registry for counters, gauges, and histograms + with Prometheus-style exposition and time-series querying. + """ + + def __init__(self, namespace: str = "platform") -> None: + self.namespace = namespace + self._counters: Dict[str, Counter] = {} + self._gauges: Dict[str, Gauge] = {} + self._histograms: Dict[str, Histogram] = {} + self._time_series: Dict[str, deque] = defaultdict(lambda: deque(maxlen=1000)) + logger.info("MetricsRegistry initialized (namespace=%s)", namespace) + + def _full_name(self, name: str) -> str: + return f"{self.namespace}_{name}" if self.namespace else name + + def counter(self, name: str, labels: Optional[Dict[str, str]] = None) -> Counter: + full = self._full_name(name) + if full not in self._counters: + self._counters[full] = Counter(full, labels) + return self._counters[full] + + def gauge(self, name: str, labels: Optional[Dict[str, str]] = None) -> Gauge: + full = self._full_name(name) + if full not in self._gauges: + self._gauges[full] = Gauge(full, labels) + return self._gauges[full] + + def histogram(self, name: str, buckets: Optional[List[float]] = None, + labels: Optional[Dict[str, str]] = None) -> Histogram: + full = self._full_name(name) + if full not in self._histograms: + self._histograms[full] = Histogram(full, buckets, labels) + return self._histograms[full] + + def timer(self, name: str) -> Timer: + h = self.histogram(name) + return Timer(h) + + def record(self, name: str, value: float) -> None: + self._time_series[self._full_name(name)].append( + MetricPoint(name=name, value=value)) + + def snapshot(self) -> Dict[str, Any]: + snap: Dict[str, Any] = {} + for name, c in self._counters.items(): + snap[name] = {"type": "counter", "value": c.value, "rate": c.rate} + for name, g in self._gauges.items(): + snap[name] = {"type": "gauge", "value": g.value} + for name, h in self._histograms.items(): + s = h.summary() + snap[name] = {"type": "histogram", "count": s.count, "mean": s.mean, + "p95": s.p95, "p99": s.p99} + return snap + + def expose_prometheus(self) -> str: + lines: List[str] = [] + for name, c in self._counters.items(): + lines += [f"# TYPE {name} counter", f"{name} {c.value}"] + for name, g in self._gauges.items(): + lines += [f"# TYPE {name} gauge", f"{name} {g.value}"] + for name, h in self._histograms.items(): + s = h.summary() + lines += [f"# TYPE {name} histogram", + f"{name}_count {s.count}", f"{name}_sum {h._sum}", + f"{name}_p50 {s.p50}", f"{name}_p95 {s.p95}", f"{name}_p99 {s.p99}"] + return "\n".join(lines) diff --git a/devops/observability/tracing.py b/devops/observability/tracing.py new file mode 100644 index 0000000..8221eb6 --- /dev/null +++ b/devops/observability/tracing.py @@ -0,0 +1,186 @@ +"""Distributed tracing system.""" +from __future__ import annotations +import logging +import time +import uuid +from contextlib import contextmanager +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, Generator, List, Optional + +logger = logging.getLogger(__name__) + +@dataclass +class Span: + span_id: str = field(default_factory=lambda: str(uuid.uuid4())[:16]) + trace_id: str = "" + parent_span_id: Optional[str] = None + operation_name: str = "" + service: str = "" + start_time: float = field(default_factory=time.perf_counter) + end_time: Optional[float] = None + tags: Dict[str, Any] = field(default_factory=dict) + logs: List[Dict[str, Any]] = field(default_factory=list) + status: str = "ok" # ok | error + error_message: Optional[str] = None + + @property + def duration_ms(self) -> float: + if self.end_time is None: + return (time.perf_counter() - self.start_time) * 1000 + return (self.end_time - self.start_time) * 1000 + + def finish(self) -> None: + self.end_time = time.perf_counter() + + def set_tag(self, key: str, value: Any) -> "Span": + self.tags[key] = value + return self + + def log(self, event: str, **fields: Any) -> "Span": + self.logs.append({"event": event, "timestamp": time.perf_counter(), **fields}) + return self + + def set_error(self, message: str) -> "Span": + self.status = "error" + self.error_message = message + self.tags["error"] = True + return self + + +@dataclass +class Trace: + trace_id: str = field(default_factory=lambda: str(uuid.uuid4())) + root_span: Optional[Span] = None + spans: List[Span] = field(default_factory=list) + created_at: datetime = field(default_factory=datetime.utcnow) + + @property + def total_duration_ms(self) -> float: + if not self.spans: + return 0.0 + start = min(s.start_time for s in self.spans) + end = max(s.end_time or time.perf_counter() for s in self.spans) + return (end - start) * 1000 + + @property + def has_errors(self) -> bool: + return any(s.status == "error" for s in self.spans) + + def critical_path(self) -> List[Span]: + """Return spans in chronological order (simplified critical path).""" + return sorted(self.spans, key=lambda s: s.start_time) + + +class SpanContext: + """Thread-local span context for implicit propagation.""" + _current: Optional[Span] = None + + @classmethod + def current(cls) -> Optional[Span]: + return cls._current + + @classmethod + def set(cls, span: Optional[Span]) -> None: + cls._current = span + + +class Tracer: + """Distributed tracer with span management and trace collection.""" + + def __init__(self, service: str) -> None: + self.service = service + self._traces: Dict[str, Trace] = {} + self._completed: List[Trace] = [] + logger.info("Tracer initialized for service '%s'", service) + + def start_trace(self, operation: str) -> Trace: + trace = Trace() + span = Span(trace_id=trace.trace_id, operation_name=operation, service=self.service) + trace.root_span = span + trace.spans.append(span) + self._traces[trace.trace_id] = trace + SpanContext.set(span) + return trace + + def start_span(self, operation: str, trace_id: Optional[str] = None, + parent_span_id: Optional[str] = None) -> Span: + parent = SpanContext.current() + if trace_id is None and parent: + trace_id = parent.trace_id + if trace_id is None: + trace = self.start_trace(operation) + return trace.root_span # type: ignore[return-value] + tid = trace_id + pid = parent_span_id or (parent.span_id if parent else None) + span = Span(trace_id=tid, parent_span_id=pid, + operation_name=operation, service=self.service) + if tid in self._traces: + self._traces[tid].spans.append(span) + SpanContext.set(span) + return span + + def finish_span(self, span: Span) -> None: + span.finish() + if span == SpanContext.current(): + SpanContext.set(None) + + @contextmanager + def span(self, operation: str, **tags: Any) -> Generator[Span, None, None]: + s = self.start_span(operation) + for k, v in tags.items(): + s.set_tag(k, v) + try: + yield s + except Exception as exc: + s.set_error(str(exc)) + raise + finally: + self.finish_span(s) + + def finish_trace(self, trace: Trace) -> None: + for span in trace.spans: + if span.end_time is None: + span.finish() + self._traces.pop(trace.trace_id, None) + self._completed.append(trace) + logger.debug("Trace %s finished: %d spans, %.1fms", + trace.trace_id[:8], len(trace.spans), trace.total_duration_ms) + + def get_trace(self, trace_id: str) -> Optional[Trace]: + return self._traces.get(trace_id) or next( + (t for t in self._completed if t.trace_id == trace_id), None) + + def recent_traces(self, n: int = 10) -> List[Trace]: + return self._completed[-n:] + + def error_rate(self, window: int = 100) -> float: + recent = self._completed[-window:] + if not recent: + return 0.0 + return sum(1 for t in recent if t.has_errors) / len(recent) + + def p95_latency(self, window: int = 100) -> float: + recent = self._completed[-window:] + if not recent: + return 0.0 + durations = sorted(t.total_duration_ms for t in recent) + idx = max(0, int(len(durations) * 0.95) - 1) + return durations[idx] + + +class TracingMiddleware: + """Wraps callable with automatic tracing.""" + + def __init__(self, tracer: Tracer) -> None: + self._tracer = tracer + + def trace(self, operation: str) -> Any: + def decorator(func: Any) -> Any: + def wrapper(*args: Any, **kwargs: Any) -> Any: + with self._tracer.span(operation, func=func.__name__) as s: + result = func(*args, **kwargs) + s.set_tag("success", True) + return result + return wrapper + return decorator diff --git a/devops/secrets/__init__.py b/devops/secrets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/devops/secrets/key_manager.py b/devops/secrets/key_manager.py new file mode 100644 index 0000000..23393f7 --- /dev/null +++ b/devops/secrets/key_manager.py @@ -0,0 +1,172 @@ +"""Cryptographic key management.""" +from __future__ import annotations +import base64 +import hashlib +import logging +import os +import secrets +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +@dataclass +class CryptoKey: + key_id: str = field(default_factory=lambda: str(uuid.uuid4())) + key_type: str = "symmetric" # symmetric | rsa | ecdsa | hmac + algorithm: str = "AES-256" + purpose: str = "encryption" # encryption | signing | authentication + key_material: Optional[bytes] = None + status: str = "active" # active | inactive | pending_deletion + rotation_policy_days: int = 90 + created_at: datetime = field(default_factory=datetime.utcnow) + expires_at: Optional[datetime] = None + last_rotated: Optional[datetime] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + @property + def is_expired(self) -> bool: + return self.expires_at is not None and datetime.utcnow() > self.expires_at + + @property + def rotation_due(self) -> bool: + ref = self.last_rotated or self.created_at + return (datetime.utcnow() - ref).days >= self.rotation_policy_days + + +def _generate_symmetric_key(bits: int = 256) -> bytes: + return secrets.token_bytes(bits // 8) + + +def _derive_key(password: str, salt: Optional[bytes] = None, iterations: int = 100000) -> tuple: + if salt is None: + salt = secrets.token_bytes(16) + dk = hashlib.pbkdf2_hmac("sha256", password.encode(), salt, iterations, dklen=32) + return dk, salt + + +def _hmac_sign(key_material: bytes, data: bytes) -> bytes: + import hmac + return hmac.new(key_material, data, hashlib.sha256).digest() + + +def _simple_encrypt(key_material: bytes, plaintext: str) -> str: + """XOR-based encryption for simulation (not production-safe).""" + data = plaintext.encode() + key_stream = (key_material * (len(data) // len(key_material) + 1))[:len(data)] + encrypted = bytes(d ^ k for d, k in zip(data, key_stream)) + return base64.b64encode(encrypted).decode() + + +def _simple_decrypt(key_material: bytes, ciphertext: str) -> str: + data = base64.b64decode(ciphertext) + key_stream = (key_material * (len(data) // len(key_material) + 1))[:len(data)] + decrypted = bytes(d ^ k for d, k in zip(data, key_stream)) + return decrypted.decode() + + +class KeyManager: + """ + Cryptographic key lifecycle management with rotation, + encryption/decryption, signing, and key derivation. + """ + + def __init__(self) -> None: + self._keys: Dict[str, CryptoKey] = {} + self._key_aliases: Dict[str, str] = {} # alias -> key_id + self._rotation_history: List[Dict[str, Any]] = [] + logger.info("KeyManager initialized") + + def create_key(self, alias: str, key_type: str = "symmetric", + algorithm: str = "AES-256", purpose: str = "encryption", + rotation_policy_days: int = 90, + ttl_days: Optional[int] = None) -> CryptoKey: + key_material = _generate_symmetric_key() + expires_at = datetime.utcnow() + timedelta(days=ttl_days) if ttl_days else None + key = CryptoKey(key_type=key_type, algorithm=algorithm, purpose=purpose, + key_material=key_material, rotation_policy_days=rotation_policy_days, + expires_at=expires_at) + self._keys[key.key_id] = key + self._key_aliases[alias] = key.key_id + logger.info("Created key '%s' (type=%s, algo=%s)", alias, key_type, algorithm) + return key + + def get_key(self, key_id_or_alias: str) -> Optional[CryptoKey]: + key_id = self._key_aliases.get(key_id_or_alias, key_id_or_alias) + return self._keys.get(key_id) + + def rotate_key(self, alias: str) -> CryptoKey: + old_key = self.get_key(alias) + new_material = _generate_symmetric_key() + new_key = CryptoKey( + key_type=old_key.key_type if old_key else "symmetric", + algorithm=old_key.algorithm if old_key else "AES-256", + purpose=old_key.purpose if old_key else "encryption", + key_material=new_material, + rotation_policy_days=old_key.rotation_policy_days if old_key else 90, + last_rotated=datetime.utcnow(), + ) + if old_key: + old_key.status = "inactive" + self._rotation_history.append({ + "alias": alias, "old_key_id": old_key.key_id, + "new_key_id": new_key.key_id, "rotated_at": datetime.utcnow().isoformat(), + }) + self._keys[new_key.key_id] = new_key + self._key_aliases[alias] = new_key.key_id + logger.info("Rotated key '%s'", alias) + return new_key + + def encrypt(self, alias: str, plaintext: str) -> Optional[str]: + key = self.get_key(alias) + if not key or not key.key_material or key.is_expired: + return None + return _simple_encrypt(key.key_material, plaintext) + + def decrypt(self, alias: str, ciphertext: str) -> Optional[str]: + key = self.get_key(alias) + if not key or not key.key_material: + return None + try: + return _simple_decrypt(key.key_material, ciphertext) + except Exception as exc: + logger.error("Decryption failed: %s", exc) + return None + + def sign(self, alias: str, data: str) -> Optional[str]: + key = self.get_key(alias) + if not key or not key.key_material: + return None + sig = _hmac_sign(key.key_material, data.encode()) + return base64.b64encode(sig).decode() + + def verify(self, alias: str, data: str, signature: str) -> bool: + expected = self.sign(alias, data) + return expected == signature if expected else False + + def derive_key(self, password: str, salt: Optional[str] = None) -> tuple: + salt_bytes = base64.b64decode(salt) if salt else None + dk, salt_bytes = _derive_key(password, salt_bytes) + return base64.b64encode(dk).decode(), base64.b64encode(salt_bytes).decode() + + def check_rotation_needed(self) -> List[str]: + return [alias for alias, kid in self._key_aliases.items() + if (k := self._keys.get(kid)) and k.rotation_due] + + def revoke_key(self, alias: str) -> bool: + key = self.get_key(alias) + if not key: + return False + key.status = "inactive" + return True + + def list_keys(self, status: Optional[str] = None) -> List[Dict[str, Any]]: + result = [] + for alias, kid in self._key_aliases.items(): + key = self._keys.get(kid) + if key and (status is None or key.status == status): + result.append({"alias": alias, "key_id": kid, "type": key.key_type, + "status": key.status, "rotation_due": key.rotation_due}) + return result diff --git a/devops/secrets/vault_integration.py b/devops/secrets/vault_integration.py new file mode 100644 index 0000000..2055177 --- /dev/null +++ b/devops/secrets/vault_integration.py @@ -0,0 +1,151 @@ +"""HashiCorp Vault integration adapter.""" +from __future__ import annotations +import base64 +import hashlib +import logging +import secrets +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +@dataclass +class Secret: + path: str = "" + data: Dict[str, str] = field(default_factory=dict) + version: int = 1 + lease_duration: int = 3600 + renewable: bool = True + created_at: datetime = field(default_factory=datetime.utcnow) + expires_at: Optional[datetime] = None + +@dataclass +class VaultToken: + token_id: str = field(default_factory=lambda: secrets.token_hex(16)) + policies: List[str] = field(default_factory=list) + renewable: bool = True + ttl_seconds: int = 3600 + created_at: datetime = field(default_factory=datetime.utcnow) + + @property + def is_expired(self) -> bool: + return datetime.utcnow() > self.created_at + timedelta(seconds=self.ttl_seconds) + +class VaultACL: + def __init__(self) -> None: + self._policies: Dict[str, Dict[str, List[str]]] = { + "admin": {"*": ["read", "write", "delete", "list"]}, + "readonly": {"*": ["read", "list"]}, + "appuser": {"secret/app/*": ["read"], "secret/db/*": ["read"]}, + } + + def check_permission(self, token: VaultToken, path: str, operation: str) -> bool: + for policy in token.policies: + rules = self._policies.get(policy, {}) + for pattern, ops in rules.items(): + if pattern == "*" or path.startswith(pattern.rstrip("*")): + if operation in ops: + return True + return False + + def add_policy(self, name: str, rules: Dict[str, List[str]]) -> None: + self._policies[name] = rules + +class VaultIntegration: + """Simulated HashiCorp Vault for secrets management.""" + + def __init__(self, address: str = "http://localhost:8200") -> None: + self.address = address + self._secrets: Dict[str, List[Secret]] = {} # path -> versions + self._tokens: Dict[str, VaultToken] = {} + self._acl = VaultACL() + self._audit_log: List[Dict[str, Any]] = [] + self._root_token = self._bootstrap_root_token() + logger.info("VaultIntegration initialized (addr=%s)", address) + + def _bootstrap_root_token(self) -> VaultToken: + token = VaultToken(policies=["admin"], ttl_seconds=86400 * 365) + self._tokens[token.token_id] = token + return token + + @property + def root_token(self) -> str: + return self._root_token.token_id + + def create_token(self, policies: List[str], ttl_seconds: int = 3600) -> VaultToken: + token = VaultToken(policies=policies, ttl_seconds=ttl_seconds) + self._tokens[token.token_id] = token + logger.debug("Created token with policies: %s", policies) + return token + + def revoke_token(self, token_id: str) -> bool: + return bool(self._tokens.pop(token_id, None)) + + def _get_token(self, token_id: str) -> Optional[VaultToken]: + token = self._tokens.get(token_id) + if token and token.is_expired: + del self._tokens[token_id] + return None + return token + + def _audit(self, operation: str, path: str, token_id: str) -> None: + self._audit_log.append({ + "operation": operation, "path": path, + "token": token_id[:8] + "...", "timestamp": datetime.utcnow().isoformat(), + }) + + def write_secret(self, path: str, data: Dict[str, str], + token_id: Optional[str] = None) -> Optional[Secret]: + tok_id = token_id or self._root_token.token_id + token = self._get_token(tok_id) + if not token or not self._acl.check_permission(token, path, "write"): + logger.warning("Write denied for path '%s'", path) + return None + versions = self._secrets.setdefault(path, []) + version = len(versions) + 1 + secret = Secret(path=path, data=dict(data), version=version, + expires_at=datetime.utcnow() + timedelta(hours=24)) + versions.append(secret) + self._audit("write", path, tok_id) + logger.debug("Written secret at '%s' (v%d)", path, version) + return secret + + def read_secret(self, path: str, version: Optional[int] = None, + token_id: Optional[str] = None) -> Optional[Secret]: + tok_id = token_id or self._root_token.token_id + token = self._get_token(tok_id) + if not token or not self._acl.check_permission(token, path, "read"): + logger.warning("Read denied for path '%s'", path) + return None + versions = self._secrets.get(path, []) + if not versions: + return None + secret = versions[version - 1] if version and version <= len(versions) else versions[-1] + self._audit("read", path, tok_id) + return secret + + def delete_secret(self, path: str, token_id: Optional[str] = None) -> bool: + tok_id = token_id or self._root_token.token_id + token = self._get_token(tok_id) + if not token or not self._acl.check_permission(token, path, "delete"): + return False + existed = bool(self._secrets.pop(path, None)) + if existed: + self._audit("delete", path, tok_id) + return existed + + def list_secrets(self, prefix: str, token_id: Optional[str] = None) -> List[str]: + tok_id = token_id or self._root_token.token_id + token = self._get_token(tok_id) + if not token: + return [] + return [p for p in self._secrets if p.startswith(prefix)] + + def rotate_secret(self, path: str, new_data: Dict[str, str], + token_id: Optional[str] = None) -> Optional[Secret]: + return self.write_secret(path, new_data, token_id) + + def get_audit_log(self, limit: int = 50) -> List[Dict[str, Any]]: + return self._audit_log[-limit:] diff --git a/human_connection/__init__.py b/human_connection/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/human_connection/emotional_intelligence/__init__.py b/human_connection/emotional_intelligence/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/human_connection/emotional_intelligence/compassion_engine.py b/human_connection/emotional_intelligence/compassion_engine.py new file mode 100644 index 0000000..5d0bf71 --- /dev/null +++ b/human_connection/emotional_intelligence/compassion_engine.py @@ -0,0 +1,132 @@ +"""Generate empathetic, compassionate responses to user messages.""" + +import logging +import random +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +logger = logging.getLogger(__name__) + +ACKNOWLEDGEMENT_TEMPLATES = [ + "It sounds like you're going through a really tough time with {topic}.", + "I can hear how {emotion} you're feeling right now.", + "That must be genuinely difficult — {topic} is not easy to deal with.", + "Your feelings about {topic} make complete sense.", +] + +SUPPORT_TEMPLATES: Dict[str, List[str]] = { + "sadness": [ + "I'm here with you, and you don't have to face this alone.", + "It's okay to feel sad — take the time you need.", + "Would it help to talk through what's weighing on you?", + ], + "anger": [ + "Your frustration is completely valid. Let's see what we can do.", + "I understand why this situation would make anyone angry.", + "Let's work through this together and find a way forward.", + ], + "fear": [ + "It's natural to feel scared. Let's break this down into smaller steps.", + "You're not alone in this — we'll tackle it one piece at a time.", + "Take a breath. We can figure this out together.", + ], + "frustration": [ + "I know this is really annoying. Let's try a different approach.", + "Technical problems are so frustrating — let's get this sorted.", + "You've been patient. Let's get to the bottom of this right now.", + ], + "joy": [ + "That's absolutely wonderful — you deserve this!", + "Your happiness is contagious. Keep going!", + "What a great moment — enjoy every bit of it!", + ], + "excitement": [ + "Yes! This is exciting — let's channel that energy!", + "Your enthusiasm is inspiring. Let's make the most of it!", + "Love the excitement — let's dive in!", + ], + "neutral": [ + "I'm here to help. Just let me know what you need.", + "Happy to assist — what would be most useful right now?", + ], +} + +VALIDATION_PHRASES = [ + "What you're feeling is completely valid.", + "Anyone in your position would feel the same way.", + "Your reaction makes total sense given the circumstances.", + "It's understandable to feel that way.", +] + +ENCOURAGEMENT_PHRASES = [ + "You've got this — one step at a time.", + "Every challenge you overcome makes you stronger.", + "I believe in your ability to work through this.", + "Progress isn't always linear, but you're moving forward.", + "Small steps still count. Keep going.", +] + + +@dataclass +class CompassionResponse: + """A structured empathetic response.""" + acknowledgement: str + validation: str + support: str + encouragement: str + full_response: str + detected_emotion: str + + +class CompassionEngine: + """Compose warm, empathetic responses grounded in the user's emotional state.""" + + def __init__(self) -> None: + logger.info("CompassionEngine initialised.") + + @staticmethod + def _extract_topic(text: str) -> str: + """Heuristically pull a short topic phrase from the user's message.""" + words = text.split() + if len(words) <= 5: + return text.strip().rstrip(".") + # Use the first meaningful noun-phrase chunk (words 3-8) + return " ".join(words[2:min(7, len(words))]).rstrip(".,!?") + + def acknowledge_struggle(self, text: str) -> str: + """Return an acknowledgement sentence tailored to the message content.""" + topic = self._extract_topic(text) + template = random.choice(ACKNOWLEDGEMENT_TEMPLATES) + return template.format(topic=topic, emotion="difficult things") + + def offer_support(self, emotion: str) -> str: + """Return a supportive statement appropriate for *emotion*.""" + options = SUPPORT_TEMPLATES.get(emotion, SUPPORT_TEMPLATES["neutral"]) + return random.choice(options) + + def validate_feelings(self, text: str) -> str: # noqa: ARG002 (text reserved for future NLU) + """Return a validation statement.""" + return random.choice(VALIDATION_PHRASES) + + def provide_encouragement(self) -> str: + """Return an encouraging closing statement.""" + return random.choice(ENCOURAGEMENT_PHRASES) + + def generate(self, user_text: str, detected_emotion: str, + context: Optional[Dict] = None) -> CompassionResponse: + """Compose a full CompassionResponse for *user_text*.""" + context = context or {} + acknowledgement = self.acknowledge_struggle(user_text) + validation = self.validate_feelings(user_text) + support = self.offer_support(detected_emotion) + encouragement = self.provide_encouragement() + full = " ".join([acknowledgement, validation, support, encouragement]) + logger.debug("CompassionResponse generated for emotion=%s", detected_emotion) + return CompassionResponse( + acknowledgement=acknowledgement, + validation=validation, + support=support, + encouragement=encouragement, + full_response=full, + detected_emotion=detected_emotion, + ) diff --git a/human_connection/emotional_intelligence/empathy_detector.py b/human_connection/emotional_intelligence/empathy_detector.py new file mode 100644 index 0000000..62ea35b --- /dev/null +++ b/human_connection/emotional_intelligence/empathy_detector.py @@ -0,0 +1,108 @@ +"""Detect emotional states from text using keyword lexicons.""" + +import logging +import re +from dataclasses import dataclass, field +from typing import List, Dict +from statistics import mean + +logger = logging.getLogger(__name__) + +EMOTION_LEXICONS: Dict[str, List[str]] = { + "joy": ["happy", "joy", "delighted", "glad", "pleased", "wonderful", "fantastic", + "great", "love", "excited", "cheerful", "thrilled", "elated", "bliss"], + "sadness": ["sad", "unhappy", "depressed", "miserable", "heartbroken", "grief", + "sorrow", "cry", "tears", "lonely", "hopeless", "disappointed", "down"], + "anger": ["angry", "furious", "rage", "mad", "outraged", "livid", "hate", + "annoyed", "hostile", "bitter", "resentful", "frustrated", "infuriated"], + "fear": ["afraid", "scared", "terrified", "anxious", "worried", "panic", + "nervous", "dread", "frightened", "horrified", "phobia", "uneasy"], + "frustration": ["frustrated", "stuck", "confused", "lost", "cant", "impossible", + "useless", "broken", "failed", "wrong", "error", "problem", "issue"], + "excitement": ["excited", "amazing", "awesome", "incredible", "wow", "yes", + "finally", "cant wait", "thrilled", "pumped", "stoked", "brilliant"], +} + +VALENCE_MAP = {"joy": 0.9, "sadness": -0.8, "anger": -0.7, + "fear": -0.6, "frustration": -0.5, "excitement": 0.8} +AROUSAL_MAP = {"joy": 0.6, "sadness": 0.2, "anger": 0.9, + "fear": 0.8, "frustration": 0.7, "excitement": 0.9} +DISTRESS_EMOTIONS = {"sadness", "fear", "anger", "frustration"} + + +@dataclass +class EmotionalState: + """Represents a detected emotional state.""" + valence: float # -1 (negative) to 1 (positive) + arousal: float # 0 (calm) to 1 (activated) + emotion: str + intensity: float # 0 to 1 + + +@dataclass +class EmotionTrigger: + """Represents a trigger phrase that evoked an emotion.""" + phrase: str + emotion: str + position: int + + +class EmpathyDetector: + """Detect emotional states and triggers from text using lexicon matching.""" + + def __init__(self) -> None: + self._compiled = { + emotion: re.compile(r"\b(" + "|".join(re.escape(w) for w in words) + r")\b", re.IGNORECASE) + for emotion, words in EMOTION_LEXICONS.items() + } + logger.info("EmpathyDetector initialised with %d emotion categories.", len(EMOTION_LEXICONS)) + + def _score_emotions(self, text: str) -> Dict[str, float]: + """Return a normalised hit-count score per emotion.""" + word_count = max(len(text.split()), 1) + scores: Dict[str, float] = {} + for emotion, pattern in self._compiled.items(): + hits = pattern.findall(text) + scores[emotion] = len(hits) / word_count + return scores + + def detect(self, text: str) -> EmotionalState: + """Detect the dominant emotional state in *text*.""" + scores = self._score_emotions(text) + dominant = max(scores, key=lambda e: scores[e]) + intensity = min(scores[dominant] * 10, 1.0) + + if all(v == 0 for v in scores.values()): + return EmotionalState(valence=0.0, arousal=0.3, emotion="neutral", intensity=0.1) + + valence = VALENCE_MAP.get(dominant, 0.0) * intensity + arousal = AROUSAL_MAP.get(dominant, 0.5) * intensity + logger.debug("Detected emotion=%s intensity=%.2f", dominant, intensity) + return EmotionalState(valence=round(valence, 3), arousal=round(arousal, 3), + emotion=dominant, intensity=round(intensity, 3)) + + def detect_triggers(self, text: str) -> List[EmotionTrigger]: + """Return individual trigger phrases found in *text*.""" + triggers: List[EmotionTrigger] = [] + for emotion, pattern in self._compiled.items(): + for match in pattern.finditer(text): + triggers.append(EmotionTrigger(phrase=match.group(), + emotion=emotion, + position=match.start())) + triggers.sort(key=lambda t: t.position) + return triggers + + def is_in_distress(self, text: str) -> bool: + """Return True when the text signals significant emotional distress.""" + state = self.detect(text) + if state.emotion in DISTRESS_EMOTIONS and state.intensity >= 0.3: + return True + caps_ratio = sum(1 for c in text if c.isupper()) / max(len(text), 1) + exclamations = text.count("!") + return caps_ratio > 0.4 or exclamations >= 3 + + def emotional_trajectory(self, texts: List[str]) -> List[EmotionalState]: + """Compute an emotional state for each text in chronological order.""" + trajectory = [self.detect(t) for t in texts] + logger.info("Computed trajectory over %d texts.", len(trajectory)) + return trajectory diff --git a/human_connection/emotional_intelligence/stress_monitor.py b/human_connection/emotional_intelligence/stress_monitor.py new file mode 100644 index 0000000..8d7b1fc --- /dev/null +++ b/human_connection/emotional_intelligence/stress_monitor.py @@ -0,0 +1,135 @@ +"""Monitor user stress signals from interaction patterns.""" + +import logging +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from statistics import mean, stdev +from typing import Deque, Dict, List, Tuple + +logger = logging.getLogger(__name__) + +MAX_HISTORY = 50 # interactions kept per session + + +class StressLevel(Enum): + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + CRITICAL = "critical" + + +@dataclass +class StressIndicator: + """A single observable signal of stress.""" + signal_type: str # e.g. "capitalization", "urgency_word", "fast_response" + value: float # normalised 0-1 + description: str + timestamp: datetime = field(default_factory=datetime.utcnow) + + +URGENCY_WORDS = {"urgent", "asap", "immediately", "now", "emergency", "critical", + "broken", "down", "help", "please", "hurry", "fast", "quick"} + +FRUSTRATION_PHRASES = {"this doesn't work", "nothing works", "i give up", + "what the hell", "why is this", "stupid", "useless"} + + +class StressMonitor: + """Track stress signals across user interactions within a session.""" + + def __init__(self) -> None: + # user_id -> deque of (text, response_time_ms, timestamp) + self._history: Dict[str, Deque[Tuple[str, float, datetime]]] = {} + self._indicators: Dict[str, List[StressIndicator]] = {} + logger.info("StressMonitor initialised.") + + def _ensure_user(self, user_id: str) -> None: + if user_id not in self._history: + self._history[user_id] = deque(maxlen=MAX_HISTORY) + self._indicators[user_id] = [] + + def record_interaction(self, text: str, response_time_ms: float, user_id: str = "default") -> None: + """Record a single interaction and extract stress signals.""" + self._ensure_user(user_id) + now = datetime.utcnow() + self._history[user_id].append((text, response_time_ms, now)) + self._extract_signals(user_id, text, response_time_ms, now) + + def _extract_signals(self, user_id: str, text: str, rt_ms: float, ts: datetime) -> None: + indicators = self._indicators[user_id] + + # Capitalisation ratio + if len(text) > 0: + caps_ratio = sum(1 for c in text if c.isupper()) / len(text) + if caps_ratio > 0.25: + indicators.append(StressIndicator("capitalization", min(caps_ratio, 1.0), + f"High caps ratio ({caps_ratio:.0%})", ts)) + + # Exclamation marks + excl = text.count("!") + if excl >= 2: + indicators.append(StressIndicator("exclamation_marks", min(excl / 5, 1.0), + f"{excl} exclamation marks", ts)) + + # Urgency vocabulary + words = set(text.lower().split()) + urgency_hits = words & URGENCY_WORDS + if urgency_hits: + indicators.append(StressIndicator("urgency_word", min(len(urgency_hits) / 3, 1.0), + f"Urgency words: {', '.join(urgency_hits)}", ts)) + + # Frustration phrases + low_text = text.lower() + for phrase in FRUSTRATION_PHRASES: + if phrase in low_text: + indicators.append(StressIndicator("frustration_phrase", 0.8, + f"Phrase detected: '{phrase}'", ts)) + break + + # Fast response time (< 3 s) signals possible panic / agitation + if 0 < rt_ms < 3000: + indicators.append(StressIndicator("fast_response", min((3000 - rt_ms) / 3000, 1.0), + f"Very fast response: {rt_ms:.0f} ms", ts)) + + def compute_stress_level(self, user_id: str = "default") -> StressLevel: + """Aggregate recent signals into a StressLevel.""" + self._ensure_user(user_id) + recent = [i for i in self._indicators[user_id][-20:]] + if not recent: + return StressLevel.LOW + avg_score = mean(i.value for i in recent) + if avg_score >= 0.75: + return StressLevel.CRITICAL + if avg_score >= 0.5: + return StressLevel.HIGH + if avg_score >= 0.25: + return StressLevel.MEDIUM + return StressLevel.LOW + + def get_stress_signals(self, user_id: str = "default") -> List[StressIndicator]: + """Return all stress indicators collected for *user_id*.""" + self._ensure_user(user_id) + return list(self._indicators[user_id]) + + def trend(self, user_id: str = "default") -> str: + """Return 'rising', 'falling', or 'stable' based on recent signal values.""" + self._ensure_user(user_id) + scores = [i.value for i in self._indicators[user_id]] + if len(scores) < 4: + return "stable" + first_half = mean(scores[:len(scores) // 2]) + second_half = mean(scores[len(scores) // 2:]) + delta = second_half - first_half + if delta > 0.1: + return "rising" + if delta < -0.1: + return "falling" + return "stable" + + def reset_session(self, user_id: str = "default") -> None: + """Clear all interaction history and indicators for *user_id*.""" + self._history[user_id] = deque(maxlen=MAX_HISTORY) + self._indicators[user_id] = [] + logger.info("Session reset for user_id=%s", user_id) diff --git a/human_connection/emotional_intelligence/tone_adjuster.py b/human_connection/emotional_intelligence/tone_adjuster.py new file mode 100644 index 0000000..4ca002d --- /dev/null +++ b/human_connection/emotional_intelligence/tone_adjuster.py @@ -0,0 +1,105 @@ +"""Adapt communication tone for different contexts and emotional states.""" + +import logging +import re +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +INFORMAL_TO_FORMAL = { + r"\bdon't\b": "do not", r"\bcan't\b": "cannot", r"\bwon't\b": "will not", + r"\bisn't\b": "is not", r"\baren't\b": "are not", r"\bwasn't\b": "was not", + r"\bweren't\b": "were not", r"\bhadn't\b": "had not", r"\bhasn't\b": "has not", + r"\bhaven't\b": "have not", r"\bdidn't\b": "did not", r"\bdoesn't\b": "does not", + r"\bI'm\b": "I am", r"\byou're\b": "you are", r"\bthey're\b": "they are", + r"\bwe're\b": "we are", r"\bhe's\b": "he is", r"\bshe's\b": "she is", + r"\bit's\b": "it is", r"\bthat's\b": "that is", + r"\bgonna\b": "going to", r"\bwanna\b": "want to", r"\bgotta\b": "have to", + r"\bkinda\b": "somewhat", r"\bsorta\b": "somewhat", +} + +COLD_TO_WARM = { + r"^(The result is)": "Great news — the result is", + r"^(Error:)": "Something went wrong:", + r"^(Note:)": "Just so you know:", + r"\buser\b": "you", r"\bthe user\b": "you", + r"\bOne must\b": "You can", +} + +HARSH_TO_GENTLE = { + r"\bfailed\b": "did not succeed yet", + r"\bwrong\b": "not quite right", + r"\bstupid\b": "not the best approach", + r"\bimpossible\b": "quite challenging", + r"\bmust\b": "might want to", + r"\byou need to\b": "it could help to", + r"\byou have to\b": "you might consider", +} + +EMOTION_TONE_MAP = { + "sadness": {"opener": "I'm really sorry to hear that. ", "closer": " I'm here if you need more support."}, + "fear": {"opener": "That sounds really stressful. ", "closer": " We'll work through this together."}, + "anger": {"opener": "I completely understand your frustration. ", "closer": " Let's fix this right away."}, + "frustration": {"opener": "That's genuinely annoying — let's sort it out. ", "closer": " You're almost there."}, + "joy": {"opener": "That's wonderful! ", "closer": " Keep up the great work!"}, + "excitement": {"opener": "Love the energy! ", "closer": " Let's make it happen!"}, + "neutral": {"opener": "", "closer": ""}, +} + + +@dataclass +class ToneProfile: + """Describes the desired tone of a response.""" + formality: float = 0.5 # 0 = casual, 1 = very formal + warmth: float = 0.5 # 0 = cold, 1 = very warm + directness: float = 0.5 # 0 = indirect, 1 = very direct + empathy_level: float = 0.5 # 0 = neutral, 1 = highly empathetic + + +class ToneAdjuster: + """Adjust the tone of text to match a target ToneProfile or emotional context.""" + + def __init__(self) -> None: + logger.info("ToneAdjuster initialised.") + + @staticmethod + def _apply_replacements(text: str, mapping: dict) -> str: + for pattern, replacement in mapping.items(): + text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) + return text + + def adjust(self, text: str, target_profile: ToneProfile) -> str: + """Apply tone transformations according to *target_profile*.""" + result = text + if target_profile.formality > 0.6: + result = self._apply_replacements(result, INFORMAL_TO_FORMAL) + if target_profile.warmth > 0.6: + result = self._apply_replacements(result, COLD_TO_WARM) + if target_profile.empathy_level > 0.6: + result = self.make_gentler(result) + if target_profile.directness < 0.3 and not result.startswith("Perhaps"): + result = "Perhaps " + result[0].lower() + result[1:] + logger.debug("Tone adjusted: formality=%.1f warmth=%.1f", target_profile.formality, target_profile.warmth) + return result + + def make_warmer(self, text: str) -> str: + """Add warmth cues to *text*.""" + result = self._apply_replacements(text, COLD_TO_WARM) + if not any(result.startswith(p) for p in ("That", "I", "Great", "Love")): + result = "I'd be happy to help. " + result + return result + + def make_more_formal(self, text: str) -> str: + """Convert contractions and colloquialisms to formal equivalents.""" + return self._apply_replacements(text, INFORMAL_TO_FORMAL) + + def make_gentler(self, text: str) -> str: + """Soften potentially harsh or blunt language.""" + return self._apply_replacements(text, HARSH_TO_GENTLE) + + def calibrate_to_emotion(self, text: str, emotion: str) -> str: + """Wrap *text* with an emotion-appropriate opener and closer.""" + tone = EMOTION_TONE_MAP.get(emotion, EMOTION_TONE_MAP["neutral"]) + result = tone["opener"] + text.strip() + tone["closer"] + logger.debug("Calibrated tone for emotion=%s", emotion) + return result diff --git a/human_connection/personalization/__init__.py b/human_connection/personalization/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/human_connection/personalization/context_memory.py b/human_connection/personalization/context_memory.py new file mode 100644 index 0000000..7471050 --- /dev/null +++ b/human_connection/personalization/context_memory.py @@ -0,0 +1,99 @@ +"""Remember and utilise user context across sessions.""" + +import logging +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Dict, List, Optional +from uuid import uuid4 + +logger = logging.getLogger(__name__) + +RELEVANCE_THRESHOLD = 0.2 # minimum score to include in recall results +DEFAULT_IMPORTANCE = 0.5 + + +@dataclass +class MemoryEntry: + """A single remembered fact about a user.""" + user_id: str + key: str + value: str + importance: float # 0-1; higher = kept longer, surfaced first + entry_id: str = field(default_factory=lambda: str(uuid4())) + created_at: datetime = field(default_factory=datetime.utcnow) + accessed_at: datetime = field(default_factory=datetime.utcnow) + access_count: int = 0 + + +class ContextMemory: + """Long-term, importance-weighted context memory for each user.""" + + def __init__(self) -> None: + self._memory: Dict[str, List[MemoryEntry]] = defaultdict(list) + logger.info("ContextMemory initialised.") + + def remember(self, user_id: str, key: str, value: str, + importance: float = DEFAULT_IMPORTANCE) -> MemoryEntry: + """Store a key-value memory for *user_id*, overwriting if key already exists.""" + importance = max(0.0, min(1.0, importance)) + entries = self._memory[user_id] + # Overwrite existing entry with same key + for entry in entries: + if entry.key == key: + entry.value = value + entry.importance = importance + entry.accessed_at = datetime.utcnow() + logger.debug("Updated memory key=%s user=%s", key, user_id) + return entry + new_entry = MemoryEntry(user_id=user_id, key=key, value=value, importance=importance) + entries.append(new_entry) + logger.debug("Stored new memory key=%s user=%s", key, user_id) + return new_entry + + def recall(self, user_id: str, query: str) -> List[MemoryEntry]: + """Return memories relevant to *query*, sorted by relevance score.""" + entries = self._memory.get(user_id, []) + query_words = set(query.lower().split()) + scored: List[tuple] = [] + for entry in entries: + key_words = set(entry.key.lower().split("_")) + val_words = set(entry.value.lower().split()) + overlap = len(query_words & (key_words | val_words)) + score = (overlap / max(len(query_words), 1)) * 0.7 + entry.importance * 0.3 + if score >= RELEVANCE_THRESHOLD: + entry.accessed_at = datetime.utcnow() + entry.access_count += 1 + scored.append((score, entry)) + scored.sort(key=lambda x: x[0], reverse=True) + return [e for _, e in scored] + + def forget_old_memories(self, user_id: str, days: int = 30) -> int: + """Remove memories older than *days* days with low importance. Returns count removed.""" + cutoff = datetime.utcnow() - timedelta(days=days) + before = len(self._memory.get(user_id, [])) + self._memory[user_id] = [ + e for e in self._memory.get(user_id, []) + if e.created_at > cutoff or e.importance >= 0.7 + ] + removed = before - len(self._memory[user_id]) + logger.info("Forgot %d old memories for user=%s", removed, user_id) + return removed + + def get_context_summary(self, user_id: str) -> str: + """Return a human-readable summary of stored context for *user_id*.""" + entries = self._memory.get(user_id, []) + if not entries: + return "No context stored yet." + top = sorted(entries, key=lambda e: e.importance, reverse=True)[:5] + lines = [f"• {e.key}: {e.value}" for e in top] + return f"Context summary for {user_id} ({len(entries)} memories):\n" + "\n".join(lines) + + def find_relevant(self, user_id: str, text: str) -> List[MemoryEntry]: + """Alias for recall with a natural-language text query.""" + return self.recall(user_id, text) + + def all_memories(self, user_id: str) -> List[MemoryEntry]: + """Return all memories for *user_id*, newest first.""" + return sorted(self._memory.get(user_id, []), + key=lambda e: e.created_at, reverse=True) diff --git a/human_connection/personalization/style_adapter.py b/human_connection/personalization/style_adapter.py new file mode 100644 index 0000000..10ed14e --- /dev/null +++ b/human_connection/personalization/style_adapter.py @@ -0,0 +1,115 @@ +"""Adapt AI responses to individual user communication styles.""" + +import logging +import re +from collections import defaultdict, Counter +from dataclasses import dataclass, field +from typing import Dict, List + +logger = logging.getLogger(__name__) + +FILLER_WORDS = {"very", "really", "quite", "rather", "just", "actually", "basically", + "literally", "definitely", "certainly", "absolutely", "simply"} + +TECHNICAL_SYNONYMS: Dict[str, Dict[str, str]] = { + # term -> {novice replacement, expert retention} + "execute": {"novice": "run", "expert": "execute"}, + "implement": {"novice": "build", "expert": "implement"}, + "instantiate": {"novice": "create", "expert": "instantiate"}, + "iterate": {"novice": "loop through", "expert": "iterate"}, + "concatenate": {"novice": "join together", "expert": "concatenate"}, + "deprecated": {"novice": "no longer recommended", "expert": "deprecated"}, + "parameter": {"novice": "input value", "expert": "parameter"}, + "asynchronous": {"novice": "non-blocking", "expert": "asynchronous"}, + "polymorphism": {"novice": "flexible behaviour", "expert": "polymorphism"}, +} + + +@dataclass +class CommunicationStyle: + """Describes a user's preferred communication style.""" + verbosity: str = "medium" # "brief", "medium", "verbose" + technicality: str = "medium" # "simple", "medium", "technical" + formality: str = "neutral" # "casual", "neutral", "formal" + avg_message_length: float = 50.0 + common_words: List[str] = field(default_factory=list) + + +class StyleAdapter: + """Learn from user interactions and adapt responses accordingly.""" + + def __init__(self) -> None: + self._styles: Dict[str, CommunicationStyle] = {} + self._word_counters: Dict[str, Counter] = defaultdict(Counter) + logger.info("StyleAdapter initialised.") + + def learn_style(self, user_id: str, text: str) -> None: + """Update style model for *user_id* based on *text* sample.""" + words = text.lower().split() + self._word_counters[user_id].update(words) + + style = self._styles.setdefault(user_id, CommunicationStyle()) + word_count = len(words) + + # Adjust avg message length with exponential moving average + style.avg_message_length = 0.8 * style.avg_message_length + 0.2 * word_count + + # Verbosity + if style.avg_message_length > 80: + style.verbosity = "verbose" + elif style.avg_message_length < 25: + style.verbosity = "brief" + else: + style.verbosity = "medium" + + # Technicality: count technical terms + tech_count = sum(1 for w in words if w in TECHNICAL_SYNONYMS) + ratio = tech_count / max(word_count, 1) + style.technicality = "technical" if ratio > 0.05 else ("simple" if ratio < 0.01 else "medium") + + # Formality: contractions suggest casual + contractions = len(re.findall(r"\b\w+n't\b|\bI'm\b|\byou're\b", text, re.IGNORECASE)) + style.formality = "casual" if contractions >= 2 else "neutral" + + # Top common words (excluding stop words) + stop = {"the", "a", "an", "is", "it", "in", "on", "at", "of", "and", "to", "i"} + style.common_words = [w for w, _ in self._word_counters[user_id].most_common(10) if w not in stop] + logger.debug("Style updated for user=%s verbosity=%s technicality=%s", + user_id, style.verbosity, style.technicality) + + def adapt_response(self, response: str, user_id: str) -> str: + """Apply all style adaptations to *response* for *user_id*.""" + style = self._styles.get(user_id, CommunicationStyle()) + result = self.adjust_verbosity(response, style.verbosity) + result = self.adjust_technicality(result, style.technicality) + result = self.match_vocabulary(result, user_id) + return result + + def adjust_verbosity(self, text: str, level: str) -> str: + """Expand or condense text based on desired verbosity *level*.""" + if level == "brief": + # Remove filler words and shorten + words = [w for w in text.split() if w.lower() not in FILLER_WORDS] + sentences = re.split(r"(?<=[.!?])\s+", " ".join(words)) + return " ".join(sentences[:3]) # keep first 3 sentences max + if level == "verbose": + # Add elaboration markers + text = text.replace(". ", ". Additionally, ") + return text.replace(". Additionally, ", ". ", text.count(". ") - 1) + return text + + def adjust_technicality(self, text: str, level: str) -> str: + """Swap technical terms based on desired *level*.""" + for term, mapping in TECHNICAL_SYNONYMS.items(): + target = mapping.get("novice" if level == "simple" else "expert", term) + text = re.sub(r"\b" + re.escape(term) + r"\b", target, text, flags=re.IGNORECASE) + return text + + def match_vocabulary(self, text: str, user_id: str) -> str: + """Minor vocabulary alignment using the user's frequent terms (placeholder).""" + # This is intentionally lightweight — deep rephrasing would need an LLM. + return text + + def get_style(self, user_id: str) -> CommunicationStyle: + """Return the current CommunicationStyle for *user_id*.""" + return self._styles.get(user_id, CommunicationStyle()) diff --git a/human_connection/personalization/user_profiler.py b/human_connection/personalization/user_profiler.py new file mode 100644 index 0000000..a317412 --- /dev/null +++ b/human_connection/personalization/user_profiler.py @@ -0,0 +1,102 @@ +"""Learn and store user preferences for personalised AI experiences.""" + +import logging +from collections import defaultdict, Counter +from dataclasses import dataclass, field +from datetime import datetime +from statistics import mean +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +EXPERTISE_THRESHOLDS = {"novice": 0.3, "intermediate": 0.6, "expert": 1.0} +TECHNICAL_TERMS = { + "algorithm", "parameter", "function", "class", "object", "module", + "api", "endpoint", "database", "query", "schema", "latency", "throughput", + "gradient", "tensor", "embedding", "inference", "pipeline", +} + + +@dataclass +class UserProfile: + """Stored profile for a single user.""" + user_id: str + preferences: Dict[str, Any] = field(default_factory=dict) + communication_style: str = "balanced" # "concise", "detailed", "balanced" + expertise_level: str = "intermediate" # "novice", "intermediate", "expert" + interaction_count: int = 0 + last_seen: datetime = field(default_factory=datetime.utcnow) + vocabulary: Counter = field(default_factory=Counter) + + +class UserProfiler: + """Build and maintain user profiles through interaction history.""" + + def __init__(self) -> None: + self._profiles: Dict[str, UserProfile] = {} + logger.info("UserProfiler initialised.") + + def _ensure_profile(self, user_id: str) -> UserProfile: + if user_id not in self._profiles: + self._profiles[user_id] = UserProfile(user_id=user_id) + return self._profiles[user_id] + + def update(self, user_id: str, interaction: Dict[str, Any]) -> None: + """Incorporate a new interaction into the user's profile.""" + profile = self._ensure_profile(user_id) + profile.interaction_count += 1 + profile.last_seen = datetime.utcnow() + + text: str = interaction.get("text", "") + if text: + words = text.lower().split() + profile.vocabulary.update(words) + + # Update preferences from explicit keys + for key in ("topic", "format", "language", "domain"): + if key in interaction: + profile.preferences[key] = interaction[key] + + # Infer communication style from message length + word_count = len(text.split()) if text else 0 + if word_count > 80: + profile.communication_style = "detailed" + elif word_count < 20: + profile.communication_style = "concise" + + profile.expertise_level = self.infer_expertise(user_id) + logger.debug("Profile updated: user=%s interactions=%d", user_id, profile.interaction_count) + + def get_profile(self, user_id: str) -> UserProfile: + """Return the UserProfile for *user_id*, creating one if needed.""" + return self._ensure_profile(user_id) + + def infer_expertise(self, user_id: str) -> str: + """Estimate expertise level from technical vocabulary usage.""" + profile = self._profiles.get(user_id) + if not profile or not profile.vocabulary: + return "intermediate" + total_words = sum(profile.vocabulary.values()) + tech_words = sum(profile.vocabulary[w] for w in TECHNICAL_TERMS if w in profile.vocabulary) + ratio = tech_words / max(total_words, 1) + if ratio >= 0.08: + return "expert" + if ratio >= 0.03: + return "intermediate" + return "novice" + + def get_preferences(self, user_id: str) -> Dict[str, Any]: + """Return the stored preference dictionary for *user_id*.""" + return self._ensure_profile(user_id).preferences.copy() + + def similar_users(self, user_id: str) -> List[str]: + """Return user IDs with similar expertise and communication style.""" + target = self._profiles.get(user_id) + if not target: + return [] + return [ + uid for uid, profile in self._profiles.items() + if uid != user_id + and profile.expertise_level == target.expertise_level + and profile.communication_style == target.communication_style + ] diff --git a/human_connection/purpose_driven/__init__.py b/human_connection/purpose_driven/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/human_connection/purpose_driven/accessibility.py b/human_connection/purpose_driven/accessibility.py new file mode 100644 index 0000000..47e5f8b --- /dev/null +++ b/human_connection/purpose_driven/accessibility.py @@ -0,0 +1,138 @@ +"""Universal design and accessibility utilities for AI-generated text.""" + +import logging +import re +from dataclasses import dataclass, field +from typing import Dict, List + +logger = logging.getLogger(__name__) + +COMPLEX_WORDS: Dict[str, str] = { + "utilise": "use", "utilization": "use", "commence": "start", + "terminate": "end", "ascertain": "find out", "endeavour": "try", + "facilitate": "help", "implement": "carry out", "leverage": "use", + "methodology": "method", "paradigm": "model", "synergy": "cooperation", + "bandwidth": "capacity", "iterate": "repeat", "optimise": "improve", + "mitigate": "reduce", "henceforth": "from now on", "aforementioned": "the above", + "notwithstanding": "despite", "herein": "here", "pursuant": "following", +} + +NON_INCLUSIVE_TERMS: Dict[str, str] = { + r"\bmanpower\b": "workforce", r"\bblacklist\b": "blocklist", + r"\bwhitelist\b": "allowlist", r"\bmaster\b": "primary", + r"\bslave\b": "secondary", r"\bhe or she\b": "they", + r"\bhis or her\b": "their", r"\bguy\b": "person", + r"\bguys\b": "everyone", r"\bcrazy\b": "unexpected", + r"\binsane\b": "unusual", r"\blame\b": "issue", +} + +READING_LEVELS = { + "simple": 6, # ~grade 6, Flesch ≥ 70 + "standard": 10, # ~grade 10, Flesch ≥ 50 + "technical": 14, # ~grade 14, Flesch ≥ 30 +} + + +@dataclass +class AccessibilityIssue: + """A detected accessibility concern.""" + issue_type: str # e.g. "long_sentence", "complex_word", "non_inclusive" + description: str + suggestion: str + severity: str = "warning" # "info", "warning", "error" + + +class AccessibilityChecker: + """Evaluate and improve text accessibility.""" + + def __init__(self) -> None: + self._inclusive_patterns = { + re.compile(p, re.IGNORECASE): r for p, r in NON_INCLUSIVE_TERMS.items() + } + logger.info("AccessibilityChecker initialised.") + + def check(self, text: str) -> List[AccessibilityIssue]: + """Return all accessibility issues found in *text*.""" + issues: List[AccessibilityIssue] = [] + sentences = re.split(r"[.!?]+", text) + for sentence in sentences: + words = sentence.split() + if len(words) > 35: + issues.append(AccessibilityIssue( + "long_sentence", + f"Sentence has {len(words)} words.", + "Break it into shorter sentences (≤ 20 words each).", + "warning")) + + for word, replacement in COMPLEX_WORDS.items(): + if re.search(r"\b" + re.escape(word) + r"\b", text, re.IGNORECASE): + issues.append(AccessibilityIssue( + "complex_word", + f"Complex word '{word}' found.", + f"Consider replacing with '{replacement}'.", + "info")) + + for pattern, replacement in self._inclusive_patterns.items(): + if pattern.search(text): + issues.append(AccessibilityIssue( + "non_inclusive", + f"Non-inclusive term found matching pattern '{pattern.pattern}'.", + f"Consider using '{replacement}' instead.", + "warning")) + + return issues + + def simplify_language(self, text: str, reading_level: str = "standard") -> str: + """Replace complex words with simpler equivalents.""" + result = text + for word, replacement in COMPLEX_WORDS.items(): + result = re.sub(r"\b" + re.escape(word) + r"\b", replacement, result, flags=re.IGNORECASE) + if reading_level == "simple": + sentences = re.split(r"(?<=[.!?])\s+", result) + simplified = [] + for sent in sentences: + words = sent.split() + if len(words) > 20: + mid = len(words) // 2 + simplified.append(" ".join(words[:mid]) + ".") + simplified.append(" ".join(words[mid:])) + else: + simplified.append(sent) + result = " ".join(simplified) + return result + + def add_alt_text_suggestions(self, text: str) -> str: + """Append alt-text reminders after image/figure references.""" + result = re.sub( + r"\b(image|figure|diagram|chart|graph|screenshot)\b", + r"\1 [add descriptive alt text here]", + text, flags=re.IGNORECASE) + return result + + def check_readability(self, text: str) -> Dict: + """Approximate Flesch Reading Ease and grade level.""" + sentences = [s for s in re.split(r"[.!?]+", text) if s.strip()] + words = text.split() + if not sentences or not words: + return {"flesch_score": 0, "grade_level": 0, "avg_sentence_length": 0, + "avg_syllables_per_word": 0} + + def count_syllables(word: str) -> int: + word = word.lower().rstrip("es ") + return max(1, len(re.findall(r"[aeiou]+", word))) + + avg_sentence_len = len(words) / len(sentences) + avg_syllables = sum(count_syllables(w) for w in words) / len(words) + flesch = 206.835 - 1.015 * avg_sentence_len - 84.6 * avg_syllables + flesch = round(max(0.0, min(100.0, flesch)), 1) + grade = round(0.39 * avg_sentence_len + 11.8 * avg_syllables - 15.59, 1) + return {"flesch_score": flesch, "grade_level": max(1.0, grade), + "avg_sentence_length": round(avg_sentence_len, 1), + "avg_syllables_per_word": round(avg_syllables, 2)} + + def make_inclusive(self, text: str) -> str: + """Replace non-inclusive terms with inclusive alternatives.""" + result = text + for pattern, replacement in self._inclusive_patterns.items(): + result = pattern.sub(replacement, result) + return result diff --git a/human_connection/purpose_driven/empowerment_coach.py b/human_connection/purpose_driven/empowerment_coach.py new file mode 100644 index 0000000..81fbd1b --- /dev/null +++ b/human_connection/purpose_driven/empowerment_coach.py @@ -0,0 +1,117 @@ +"""User skill development and empowerment coaching.""" + +import logging +import random +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Dict, List, Optional + +logger = logging.getLogger(__name__) + +SKILL_KEYWORDS: Dict[str, List[str]] = { + "python": ["python", "def ", "import ", "class ", "list ", "dict "], + "data_analysis": ["dataframe", "csv", "statistics", "mean", "median", "plot"], + "communication": ["explain", "describe", "summarise", "present", "report"], + "problem_solving": ["debug", "fix", "solve", "error", "issue", "workaround"], + "critical_thinking": ["why", "because", "reason", "evaluate", "compare", "pros", "cons"], + "writing": ["paragraph", "sentence", "grammar", "punctuation", "draft", "revise"], +} + +SKILL_RESOURCES: Dict[str, List[str]] = { + "python": ["Practice Python exercises on Exercism.io", "Read the official Python tutorial", + "Build a small project: a to-do list app"], + "data_analysis": ["Explore a public dataset on Kaggle", "Learn pandas basics", + "Try a data-cleaning challenge"], + "communication": ["Write a daily journal entry", "Summarise an article in 3 sentences", + "Record yourself explaining a concept"], + "problem_solving": ["Work through a coding kata on Codewars", "Rubber-duck debug your next issue", + "Try the '5 Whys' root-cause technique"], + "critical_thinking": ["Debate both sides of an argument", "Evaluate a news article for bias", + "Apply the Socratic method to a problem"], + "writing": ["Edit yesterday's writing for clarity", "Write one paragraph without adverbs", + "Read a style-guide chapter (e.g., Strunk & White)"], +} + +CELEBRATION_MESSAGES = [ + "🎉 Amazing work on '{achievement}'! You're making real progress!", + "🌟 '{achievement}' — that's a skill milestone to be proud of!", + "🚀 You just levelled up with '{achievement}'. Keep building on this!", + "💪 '{achievement}' is no small feat. You earned it!", +] + + +@dataclass +class SkillGap: + """A skill where the user needs development.""" + skill: str + current_score: float # 0-1 + target_score: float = 0.8 + priority: str = "medium" # "low", "medium", "high" + + +@dataclass +class LearningPath: + """An ordered list of skill-building steps.""" + skill_gaps: List[SkillGap] + steps: List[str] + estimated_hours: float + created_at: datetime = field(default_factory=datetime.utcnow) + + +class EmpowermentCoach: + """Guide users toward skill growth through assessment, paths, and celebration.""" + + def __init__(self) -> None: + # user_id -> skill -> list[score float] + self._growth: Dict[str, Dict[str, List[float]]] = defaultdict(lambda: defaultdict(list)) + logger.info("EmpowermentCoach initialised.") + + def assess_skills(self, user_responses: List[str]) -> List[SkillGap]: + """Infer skill levels from a list of user-written responses.""" + combined = " ".join(user_responses).lower() + gaps: List[SkillGap] = [] + for skill, keywords in SKILL_KEYWORDS.items(): + hits = sum(1 for kw in keywords if kw in combined) + score = min(hits / len(keywords), 1.0) + if score < 0.8: + priority = "high" if score < 0.3 else ("medium" if score < 0.6 else "low") + gaps.append(SkillGap(skill=skill, current_score=round(score, 2), + priority=priority)) + gaps.sort(key=lambda g: g.current_score) + logger.debug("Assessed %d skill gaps.", len(gaps)) + return gaps + + def create_learning_path(self, skill_gaps: List[SkillGap]) -> LearningPath: + """Build an ordered LearningPath from assessed SkillGaps.""" + steps: List[str] = [] + for gap in skill_gaps: + resources = SKILL_RESOURCES.get(gap.skill, []) + for i, resource in enumerate(resources[:2]): + steps.append(f"[{gap.skill.upper()} step {i + 1}] {resource}") + estimated_hours = round(len(skill_gaps) * 2.5, 1) + return LearningPath(skill_gaps=skill_gaps, steps=steps, estimated_hours=estimated_hours) + + def suggest_next_step(self, user_id: str) -> str: + """Suggest the single most impactful next learning action for a user.""" + growth = self._growth.get(user_id, {}) + if not growth: + return "Start by completing a skill self-assessment to reveal your strongest opportunities." + # Find the skill with lowest average score + avg_scores = {skill: sum(scores) / len(scores) + for skill, scores in growth.items() if scores} + if not avg_scores: + return "Keep practising — every interaction teaches you something new." + weakest = min(avg_scores, key=lambda s: avg_scores[s]) + resources = SKILL_RESOURCES.get(weakest, ["Explore a related tutorial or documentation."]) + return f"Focus on {weakest.replace('_', ' ')}: {resources[0]}" + + def celebrate_progress(self, achievement: str) -> str: + """Return a celebration message for an achievement.""" + template = random.choice(CELEBRATION_MESSAGES) + return template.format(achievement=achievement) + + def track_growth(self, user_id: str, skill: str, score: float) -> None: + """Record a skill score observation for a user.""" + self._growth[user_id][skill].append(max(0.0, min(1.0, score))) + logger.debug("Growth tracked: user=%s skill=%s score=%.2f", user_id, skill, score) diff --git a/human_connection/purpose_driven/ethics_checker.py b/human_connection/purpose_driven/ethics_checker.py new file mode 100644 index 0000000..2d26af6 --- /dev/null +++ b/human_connection/purpose_driven/ethics_checker.py @@ -0,0 +1,133 @@ +"""Ethical AI validation — check text for harm, bias, privacy, and accessibility.""" + +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import Dict, List + +logger = logging.getLogger(__name__) + + +class EthicsCategory(Enum): + HARMFUL_CONTENT = "harmful_content" + BIAS = "bias" + PRIVACY = "privacy" + ACCESSIBILITY = "accessibility" + MISINFORMATION = "misinformation" + + +@dataclass +class EthicsViolation: + """A detected ethics violation.""" + category: EthicsCategory + description: str + severity: float # 0-1 + snippet: str = "" + timestamp: datetime = field(default_factory=datetime.utcnow) + + +# --------------------------------------------------------------------------- +# Lexicons +# --------------------------------------------------------------------------- +HARMFUL_KEYWORDS = { + "kill", "harm", "hurt", "attack", "destroy", "suicide", "bomb", "weapon", + "poison", "rape", "murder", "violence", "hate", "abuse", +} + +BIAS_KEYWORDS = { + "always", "never", "all", "none", "every", "typical", "obviously", + "naturally", "of course", "just like", "these people", "those people", +} + +GENDERED_ASSUMPTIONS = { + r"\bhe is a (doctor|engineer|ceo|manager|developer|programmer)\b", + r"\bshe is a (nurse|secretary|cleaner|receptionist)\b", +} + +# Common PII patterns +PII_PATTERNS = { + "email": re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), + "phone": re.compile(r"\b(\+?\d[\d\s\-().]{7,}\d)\b"), + "ssn": re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), + "credit_card": re.compile(r"\b(?:\d[ \-]?){13,16}\b"), + "ip_address": re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), +} + +LONG_WORD_THRESHOLD = 4 # avg words per sentence above this → complexity issue +LONG_SENTENCE_WORDS = 40 # a sentence with more words than this is flagged + + +class EthicsChecker: + """Validate text against ethical AI guidelines.""" + + def __init__(self) -> None: + self._gendered_patterns = [ + re.compile(p, re.IGNORECASE) for p in GENDERED_ASSUMPTIONS + ] + logger.info("EthicsChecker initialised.") + + def check(self, text: str, context: Dict | None = None) -> List[EthicsViolation]: + """Run all ethical checks and return a list of violations.""" + violations: List[EthicsViolation] = [] + if self.is_harmful(text): + words = set(text.lower().split()) & HARMFUL_KEYWORDS + violations.append(EthicsViolation( + category=EthicsCategory.HARMFUL_CONTENT, + description=f"Potentially harmful language detected: {', '.join(words)}", + severity=0.9, snippet=text[:120])) + if self.is_biased(text): + violations.append(EthicsViolation( + category=EthicsCategory.BIAS, + description="Absolute / stereotyping language found.", + severity=0.6, snippet=text[:120])) + if not self.is_privacy_safe(text): + violations.append(EthicsViolation( + category=EthicsCategory.PRIVACY, + description="Potential PII or sensitive data exposure.", + severity=0.85, snippet="[redacted]")) + if not self.is_accessible(text): + violations.append(EthicsViolation( + category=EthicsCategory.ACCESSIBILITY, + description="Text may be too complex or contain long sentences.", + severity=0.4, snippet=text[:80])) + return violations + + def is_harmful(self, text: str) -> bool: + """Return True if harmful keywords are present.""" + words = set(text.lower().split()) + return bool(words & HARMFUL_KEYWORDS) + + def is_biased(self, text: str) -> bool: + """Return True if absolute language or gendered assumptions are found.""" + words = set(text.lower().split()) + if words & BIAS_KEYWORDS: + return True + for pattern in self._gendered_patterns: + if pattern.search(text): + return True + return False + + def is_privacy_safe(self, text: str) -> bool: + """Return True when no PII patterns are detected.""" + for _label, pattern in PII_PATTERNS.items(): + if pattern.search(text): + return False + return True + + def is_accessible(self, text: str) -> bool: + """Return True when the text does not contain excessively long sentences.""" + sentences = re.split(r"[.!?]+", text) + for sentence in sentences: + if len(sentence.split()) > LONG_SENTENCE_WORDS: + return False + return True + + def get_ethics_score(self, text: str) -> float: + """Return a 0-1 ethics score (1 = fully compliant, 0 = many violations).""" + violations = self.check(text) + if not violations: + return 1.0 + total_severity = sum(v.severity for v in violations) + return round(max(0.0, 1.0 - total_severity / (len(violations) + 1)), 3) diff --git a/human_connection/purpose_driven/impact_tracker.py b/human_connection/purpose_driven/impact_tracker.py new file mode 100644 index 0000000..7a1207a --- /dev/null +++ b/human_connection/purpose_driven/impact_tracker.py @@ -0,0 +1,110 @@ +"""Track the human impact of AI interactions.""" + +import logging +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from statistics import mean +from typing import Dict, List, Optional +from uuid import uuid4 + +logger = logging.getLogger(__name__) + +WELLBEING_CATEGORIES = {"learning", "productivity", "emotional_support", + "problem_solving", "creativity", "health", "social"} + +WIN_THRESHOLD = 0.7 # value_score >= this is highlighted as a win + + +@dataclass +class ImpactEvent: + """A single recorded impact event.""" + category: str + description: str + value_score: float # 0-1, higher = more positive impact + user_id: str + event_id: str = field(default_factory=lambda: str(uuid4())) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class ImpactSummary: + """Aggregated impact summary for one user.""" + user_id: str + total_events: int + average_value_score: float + top_category: str + win_count: int + wellbeing_score: float + generated_at: datetime = field(default_factory=datetime.utcnow) + + +class ImpactTracker: + """Record and analyse the positive impact AI interactions have on users.""" + + def __init__(self) -> None: + self._events: Dict[str, List[ImpactEvent]] = defaultdict(list) + logger.info("ImpactTracker initialised.") + + def record(self, event: ImpactEvent) -> None: + """Persist a new impact event.""" + event.value_score = max(0.0, min(1.0, event.value_score)) + self._events[event.user_id].append(event) + logger.debug("Recorded impact event category=%s user=%s score=%.2f", + event.category, event.user_id, event.value_score) + + def get_summary(self, user_id: str) -> ImpactSummary: + """Return an aggregated ImpactSummary for *user_id*.""" + events = self._events.get(user_id, []) + if not events: + return ImpactSummary(user_id=user_id, total_events=0, average_value_score=0.0, + top_category="none", win_count=0, wellbeing_score=0.0) + + scores = [e.value_score for e in events] + avg = round(mean(scores), 3) + wins = sum(1 for s in scores if s >= WIN_THRESHOLD) + cat_counts: Dict[str, int] = defaultdict(int) + for e in events: + cat_counts[e.category] += 1 + top_cat = max(cat_counts, key=lambda c: cat_counts[c]) + wellbeing = self.compute_wellbeing_score(user_id) + return ImpactSummary(user_id=user_id, total_events=len(events), + average_value_score=avg, top_category=top_cat, + win_count=wins, wellbeing_score=wellbeing) + + def compute_wellbeing_score(self, user_id: str) -> float: + """Compute a 0-1 wellbeing score from category diversity and value scores.""" + events = self._events.get(user_id, []) + if not events: + return 0.0 + scores = [e.value_score for e in events] + avg_score = mean(scores) + cats = {e.category for e in events} + diversity_bonus = min(len(cats) / len(WELLBEING_CATEGORIES), 1.0) * 0.2 + wellbeing = round(min(avg_score + diversity_bonus, 1.0), 3) + logger.debug("Wellbeing score for user=%s: %.3f", user_id, wellbeing) + return wellbeing + + def highlight_wins(self, user_id: str) -> List[str]: + """Return descriptions of high-value impact events for *user_id*.""" + events = self._events.get(user_id, []) + return [f"[{e.category.upper()}] {e.description}" + for e in events if e.value_score >= WIN_THRESHOLD] + + def generate_impact_report(self) -> Dict: + """Produce a platform-wide impact report across all users.""" + all_events = [e for events in self._events.values() for e in events] + if not all_events: + return {"total_users": 0, "total_events": 0, "platform_wellbeing": 0.0, "categories": {}} + + cat_scores: Dict[str, List[float]] = defaultdict(list) + for e in all_events: + cat_scores[e.category].append(e.value_score) + + return { + "total_users": len(self._events), + "total_events": len(all_events), + "platform_wellbeing": round(mean(e.value_score for e in all_events), 3), + "categories": {cat: {"count": len(v), "avg_score": round(mean(v), 3)} + for cat, v in cat_scores.items()}, + } diff --git a/human_connection/wellbeing/__init__.py b/human_connection/wellbeing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/human_connection/wellbeing/break_suggester.py b/human_connection/wellbeing/break_suggester.py new file mode 100644 index 0000000..129c8e4 --- /dev/null +++ b/human_connection/wellbeing/break_suggester.py @@ -0,0 +1,119 @@ +"""Suggest healthy breaks based on continuous work patterns.""" + +import logging +import random +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + +BREAK_ACTIVITIES = { + "micro": ["Take 5 deep breaths.", "Roll your shoulders back 5 times.", + "Look away from the screen and focus on something 20 feet away for 20 seconds."], + "short": ["Walk around the room for 5 minutes.", "Make a cup of tea or water.", + "Do a quick 5-minute stretch routine.", "Step outside for fresh air."], + "long": ["Take a 15-minute walk outside.", "Eat a healthy snack away from your desk.", + "Try a 10-minute guided meditation.", "Chat with a colleague about something non-work."], + "recovery": ["Finish work for the day — you've earned it.", + "Take a proper lunch break away from all screens.", + "Schedule at least 30 minutes of restorative activity this evening."], +} + +WORK_DURATION_THRESHOLDS = { + "micro": timedelta(minutes=30), + "short": timedelta(hours=1), + "long": timedelta(hours=2), + "recovery": timedelta(hours=4), +} + + +@dataclass +class BreakSuggestion: + """A recommended break for a user.""" + break_type: str # "micro", "short", "long", "recovery" + duration_minutes: int + reason: str + activity_suggestion: str + created_at: datetime = field(default_factory=datetime.utcnow) + + +DURATION_MAP = {"micro": 2, "short": 5, "long": 15, "recovery": 30} + + +class BreakSuggester: + """Track work sessions and suggest appropriate breaks.""" + + def __init__(self) -> None: + # user_id -> (session_start, last_break) + self._sessions: Dict[str, datetime] = {} + self._last_break: Dict[str, Optional[datetime]] = defaultdict(lambda: None) + self._break_log: Dict[str, List[datetime]] = defaultdict(list) + self._interaction_count: Dict[str, int] = defaultdict(int) + logger.info("BreakSuggester initialised.") + + def _work_duration(self, user_id: str) -> timedelta: + start = self._sessions.get(user_id) + if start is None: + self._sessions[user_id] = datetime.utcnow() + return timedelta(0) + return datetime.utcnow() - start + + def _classify_break(self, duration: timedelta) -> str: + for break_type in ("recovery", "long", "short", "micro"): + if duration >= WORK_DURATION_THRESHOLDS[break_type]: + return break_type + return "micro" + + def should_suggest_break(self, user_id: str) -> bool: + """Return True if a break is due for *user_id*.""" + self._interaction_count[user_id] += 1 + duration = self._work_duration(user_id) + # Suggest every 30 min of continuous work, or every 20 interactions + return (duration >= WORK_DURATION_THRESHOLDS["micro"] or + self._interaction_count[user_id] % 20 == 0) + + def suggest(self, user_id: str) -> BreakSuggestion: + """Build a BreakSuggestion tailored to the user's current work duration.""" + duration = self._work_duration(user_id) + break_type = self._classify_break(duration) + activity = random.choice(BREAK_ACTIVITIES[break_type]) + minutes_worked = int(duration.total_seconds() / 60) + reason = (f"You've been working for {minutes_worked} minutes without a break. " + f"A {break_type} break will help you stay focused.") + suggestion = BreakSuggestion( + break_type=break_type, + duration_minutes=DURATION_MAP[break_type], + reason=reason, + activity_suggestion=activity, + ) + logger.debug("Break suggestion (%s) for user=%s", break_type, user_id) + return suggestion + + def record_break_taken(self, user_id: str) -> None: + """Mark that the user has taken a break — reset the work-session clock.""" + now = datetime.utcnow() + self._sessions[user_id] = now + self._last_break[user_id] = now + self._break_log[user_id].append(now) + self._interaction_count[user_id] = 0 + logger.info("Break recorded for user=%s at %s", user_id, now.isoformat()) + + def get_break_stats(self, user_id: str) -> Dict: + """Return a summary of break-taking behaviour for *user_id*.""" + log = self._break_log.get(user_id, []) + if not log: + return {"total_breaks": 0, "last_break": None, "avg_break_gap_minutes": None} + + gaps: List[float] = [] + for i in range(1, len(log)): + gap = (log[i] - log[i - 1]).total_seconds() / 60 + gaps.append(gap) + + return { + "total_breaks": len(log), + "last_break": log[-1].isoformat(), + "avg_break_gap_minutes": round(sum(gaps) / len(gaps), 1) if gaps else None, + "continuous_work_minutes": int(self._work_duration(user_id).total_seconds() / 60), + } diff --git a/human_connection/wellbeing/celebration_engine.py b/human_connection/wellbeing/celebration_engine.py new file mode 100644 index 0000000..7f4a49e --- /dev/null +++ b/human_connection/wellbeing/celebration_engine.py @@ -0,0 +1,114 @@ +"""Celebrate user wins and build positive reinforcement.""" + +import logging +import random +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Dict, List, Optional +from uuid import uuid4 + +logger = logging.getLogger(__name__) + +CELEBRATION_TEMPLATES = { + "learning": ["🎓 Levelled up! {achievement} shows real growth.", "📚 {achievement} — you're becoming an expert!"], + "productivity": ["⚡ Crushed it! {achievement} done.", "✅ {achievement} — efficiency at its best!"], + "problem_solving": ["🔧 Bug squashed! {achievement} solved.", "🧠 Great problem-solving with {achievement}!"], + "creativity": ["🎨 Creative genius! {achievement} is inspired.", "✨ {achievement} — what imagination!"], + "social": ["🤝 Team player! {achievement} made a difference.", "💬 Connection counts: {achievement}!"], + "general": ["🎉 Well done on {achievement}!", "🌟 {achievement} — you should be proud!", + "🚀 {achievement} — keep the momentum going!"], +} + +MILESTONE_MESSAGES = { + 1: "🌱 First achievement unlocked — every journey starts here!", + 5: "⭐ Five achievements! You're building momentum.", + 10: "🔥 Ten achievements — you're on fire!", + 25: "💎 25 achievements! Remarkable consistency.", + 50: "🏆 50 achievements! You're in elite company.", + 100: "🌟 100 achievements! A true champion.", +} + + +@dataclass +class Achievement: + """A recorded user achievement.""" + user_id: str + description: str + category: str + achievement_id: str = field(default_factory=lambda: str(uuid4())) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class CelebrationMessage: + """A celebration message for an achievement.""" + message: str + emoji: str + achievement: Achievement + milestone_message: Optional[str] = None + + +class CelebrationEngine: + """Track achievements and generate enthusiastic celebration messages.""" + + def __init__(self) -> None: + self._achievements: Dict[str, List[Achievement]] = defaultdict(list) + self._streaks: Dict[str, List[datetime]] = defaultdict(list) + logger.info("CelebrationEngine initialised.") + + def record_achievement(self, user_id: str, achievement: str, category: str = "general") -> Achievement: + """Store a new achievement for *user_id* and return it.""" + a = Achievement(user_id=user_id, description=achievement, category=category) + self._achievements[user_id].append(a) + self._streaks[user_id].append(a.timestamp) + logger.info("Achievement recorded: user=%s category=%s", user_id, category) + return a + + def generate_celebration(self, achievement: Achievement) -> CelebrationMessage: + """Create a CelebrationMessage for *achievement*.""" + templates = CELEBRATION_TEMPLATES.get(achievement.category, CELEBRATION_TEMPLATES["general"]) + message = random.choice(templates).format(achievement=achievement.description) + emoji = message[0] if message and not message[0].isalpha() else "🎉" + + total = len(self._achievements.get(achievement.user_id, [])) + milestone_msg = self.get_milestone_message(total) if total in MILESTONE_MESSAGES else None + + return CelebrationMessage(message=message, emoji=emoji, + achievement=achievement, milestone_message=milestone_msg) + + def get_milestone_message(self, count: int) -> str: + """Return a milestone message for a given achievement *count*.""" + # Return the closest milestone message at or below count + matching = [n for n in MILESTONE_MESSAGES if n <= count] + if not matching: + return "🎯 Keep going — your first milestone is within reach!" + return MILESTONE_MESSAGES[max(matching)] + + def weekly_summary(self, user_id: str) -> str: + """Summarise achievements from the past 7 days for *user_id*.""" + cutoff = datetime.utcnow() - timedelta(days=7) + recent = [a for a in self._achievements.get(user_id, []) if a.timestamp >= cutoff] + if not recent: + return f"No achievements this week yet, {user_id} — there's still time to create some!" + by_cat: Dict[str, int] = defaultdict(int) + for a in recent: + by_cat[a.category] += 1 + top_cat = max(by_cat, key=lambda c: by_cat[c]) + lines = [f" • {a.description}" for a in recent[-5:]] + summary = (f"🗓️ Weekly summary for {user_id}: {len(recent)} achievement(s) this week!\n" + f"Top category: {top_cat}\nRecent highlights:\n" + "\n".join(lines)) + return summary + + def get_streak(self, user_id: str) -> int: + """Return the current consecutive-day achievement streak for *user_id*.""" + dates = sorted({d.date() for d in self._streaks.get(user_id, [])}, reverse=True) + if not dates: + return 0 + streak = 1 + for i in range(1, len(dates)): + if (dates[i - 1] - dates[i]).days == 1: + streak += 1 + else: + break + return streak diff --git a/human_connection/wellbeing/workload_analyzer.py b/human_connection/wellbeing/workload_analyzer.py new file mode 100644 index 0000000..749d9b3 --- /dev/null +++ b/human_connection/wellbeing/workload_analyzer.py @@ -0,0 +1,108 @@ +"""Monitor user workload and predict burnout risk.""" + +import logging +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from enum import Enum +from statistics import mean, stdev +from typing import Dict, List, Tuple + +logger = logging.getLogger(__name__) + +COMPLEXITY_SCORES = {"low": 1, "medium": 3, "high": 5, "critical": 8} +TASK_WINDOW_HOURS = 8 # rolling window for workload computation + + +class WorkloadLevel(Enum): + LIGHT = "light" + MODERATE = "moderate" + HEAVY = "heavy" + OVERLOADED = "overloaded" + + +@dataclass +class WorkloadMetrics: + """Computed workload statistics for a user.""" + user_id: str + task_count: int + total_complexity: float + avg_complexity: float + peak_complexity: float + tasks_per_hour: float + computed_at: datetime = field(default_factory=datetime.utcnow) + + +class WorkloadAnalyzer: + """Analyse user task load and flag overload risk.""" + + def __init__(self) -> None: + # user_id -> list of (task_type, complexity_str, timestamp) + self._tasks: Dict[str, List[Tuple[str, str, datetime]]] = defaultdict(list) + logger.info("WorkloadAnalyzer initialised.") + + def record_task(self, user_id: str, task_type: str, complexity: str = "medium") -> None: + """Record a new task for *user_id*.""" + complexity = complexity.lower() if complexity.lower() in COMPLEXITY_SCORES else "medium" + self._tasks[user_id].append((task_type, complexity, datetime.utcnow())) + logger.debug("Task recorded: user=%s type=%s complexity=%s", user_id, task_type, complexity) + + def _recent_tasks(self, user_id: str) -> List[Tuple[str, str, datetime]]: + cutoff = datetime.utcnow() - timedelta(hours=TASK_WINDOW_HOURS) + return [(t, c, ts) for t, c, ts in self._tasks.get(user_id, []) if ts >= cutoff] + + def compute_workload(self, user_id: str) -> WorkloadMetrics: + """Compute workload metrics over the last *TASK_WINDOW_HOURS* hours.""" + recent = self._recent_tasks(user_id) + if not recent: + return WorkloadMetrics(user_id=user_id, task_count=0, total_complexity=0.0, + avg_complexity=0.0, peak_complexity=0.0, tasks_per_hour=0.0) + scores = [COMPLEXITY_SCORES[c] for _, c, _ in recent] + total = sum(scores) + avg = mean(scores) + peak = max(scores) + tasks_per_hour = len(recent) / TASK_WINDOW_HOURS + return WorkloadMetrics(user_id=user_id, task_count=len(recent), + total_complexity=total, avg_complexity=round(avg, 2), + peak_complexity=float(peak), tasks_per_hour=round(tasks_per_hour, 2)) + + def get_workload_level(self, user_id: str) -> WorkloadLevel: + """Classify the user's current workload into a WorkloadLevel.""" + metrics = self.compute_workload(user_id) + tph = metrics.tasks_per_hour + avg = metrics.avg_complexity + if tph >= 4 or avg >= 6: + return WorkloadLevel.OVERLOADED + if tph >= 2.5 or avg >= 4: + return WorkloadLevel.HEAVY + if tph >= 1 or avg >= 2: + return WorkloadLevel.MODERATE + return WorkloadLevel.LIGHT + + def predict_burnout_risk(self, user_id: str) -> float: + """Return a 0-1 burnout risk score based on task load trend.""" + all_tasks = self._tasks.get(user_id, []) + if len(all_tasks) < 5: + return 0.0 + scores = [COMPLEXITY_SCORES[c] for _, c, _ in all_tasks[-20:]] + avg = mean(scores) + variability = stdev(scores) if len(scores) > 1 else 0.0 + # High average + low variability = sustained pressure = burnout risk + risk = min((avg / 8) * 0.7 + (1 - min(variability / 4, 1)) * 0.3, 1.0) + return round(risk, 3) + + def get_recommendations(self, user_id: str) -> List[str]: + """Provide actionable workload-management recommendations.""" + level = self.get_workload_level(user_id) + risk = self.predict_burnout_risk(user_id) + recs: List[str] = [] + if level in (WorkloadLevel.HEAVY, WorkloadLevel.OVERLOADED): + recs.append("Consider delegating or deferring lower-priority tasks.") + recs.append("Take a 10-minute break before starting the next high-complexity item.") + if level == WorkloadLevel.OVERLOADED: + recs.append("Your current load is unsustainable — raise this with your team.") + if risk >= 0.7: + recs.append("Burnout risk is elevated. Protect time for recovery and deep rest.") + if not recs: + recs.append("Workload looks healthy. Keep up the good balance!") + return recs diff --git a/integration/__init__.py b/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/integration/agi_orchestrator.py b/integration/agi_orchestrator.py new file mode 100644 index 0000000..d45950c --- /dev/null +++ b/integration/agi_orchestrator.py @@ -0,0 +1,128 @@ +"""Coordinate multiple AGI agents for complex multi-step reasoning.""" + +import logging +import time +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional +from uuid import uuid4 + +logger = logging.getLogger(__name__) + + +@dataclass +class AgentTask: + """A unit of work dispatched to one or more agents.""" + text: str + context: Dict[str, Any] = field(default_factory=dict) + task_id: str = field(default_factory=lambda: str(uuid4())) + created_at: datetime = field(default_factory=datetime.utcnow) + priority: int = 5 # 1 (highest) to 10 (lowest) + + +@dataclass +class AgentResult: + """The output produced by an agent or aggregated from multiple agents.""" + task_id: str + agent_name: str + output: str + confidence: float # 0-1 + latency_ms: float + success: bool = True + error: Optional[str] = None + metadata: Dict[str, Any] = field(default_factory=dict) + + +class AGIOrchestrator: + """Register agents and dispatch tasks for single-agent or multi-agent reasoning.""" + + def __init__(self) -> None: + # name -> callable(task: AgentTask) -> AgentResult + self._agents: Dict[str, Callable[[AgentTask], AgentResult]] = {} + self._agent_stats: Dict[str, Dict[str, Any]] = {} + logger.info("AGIOrchestrator initialised.") + + def register_agent(self, name: str, agent: Callable[[AgentTask], AgentResult]) -> None: + """Register an agent callable under *name*.""" + self._agents[name] = agent + self._agent_stats[name] = {"calls": 0, "errors": 0, "total_latency_ms": 0.0} + logger.info("Agent registered: %s", name) + + def dispatch(self, task_text: str, context: Optional[Dict] = None) -> AgentResult: + """Dispatch a task to the best available agent.""" + context = context or {} + task = AgentTask(text=task_text, context=context) + + if not self._agents: + return AgentResult(task_id=task.task_id, agent_name="none", + output="No agents registered.", confidence=0.0, + latency_ms=0.0, success=False, error="No agents available.") + + # Simple strategy: choose the agent with the fewest calls (load balancing) + chosen = min(self._agents, key=lambda n: self._agent_stats[n]["calls"]) + return self._call_agent(chosen, task) + + def _call_agent(self, name: str, task: AgentTask) -> AgentResult: + t0 = time.monotonic() + stats = self._agent_stats[name] + stats["calls"] += 1 + try: + result = self._agents[name](task) + except Exception as exc: # noqa: BLE001 + stats["errors"] += 1 + latency = (time.monotonic() - t0) * 1000 + logger.exception("Agent %s raised an error.", name) + return AgentResult(task_id=task.task_id, agent_name=name, output="", + confidence=0.0, latency_ms=round(latency, 2), + success=False, error=str(exc)) + latency = (time.monotonic() - t0) * 1000 + stats["total_latency_ms"] += latency + result.latency_ms = round(latency, 2) + logger.debug("Agent %s completed in %.1f ms", name, latency) + return result + + def multi_agent_reasoning(self, problem: str, agents: List[str]) -> AgentResult: + """Run *problem* through each listed agent and synthesise the results.""" + task = AgentTask(text=problem) + results: List[AgentResult] = [] + for name in agents: + if name in self._agents: + results.append(self._call_agent(name, task)) + else: + logger.warning("Agent %s not registered — skipping.", name) + + if not results: + return AgentResult(task_id=task.task_id, agent_name="orchestrator", + output="No valid agents provided.", confidence=0.0, latency_ms=0.0, + success=False) + + synthesised = self.synthesize_results(results) + avg_confidence = sum(r.confidence for r in results) / len(results) + total_latency = sum(r.latency_ms for r in results) + return AgentResult(task_id=task.task_id, agent_name="orchestrator", + output=synthesised, confidence=round(avg_confidence, 3), + latency_ms=round(total_latency, 2), + metadata={"contributing_agents": agents, "result_count": len(results)}) + + def synthesize_results(self, results: List[AgentResult]) -> str: + """Merge multiple agent outputs into a single coherent response.""" + successful = [r for r in results if r.success and r.output] + if not successful: + return "No successful results to synthesise." + if len(successful) == 1: + return successful[0].output + parts = [f"[{r.agent_name}]: {r.output}" for r in successful] + return "Combined reasoning:\n" + "\n".join(parts) + + def get_agent_status(self) -> Dict: + """Return live stats for all registered agents.""" + status = {} + for name, stats in self._agent_stats.items(): + calls = stats["calls"] + avg_lat = (stats["total_latency_ms"] / calls) if calls else 0.0 + status[name] = { + "calls": calls, "errors": stats["errors"], + "avg_latency_ms": round(avg_lat, 2), + "error_rate": round(stats["errors"] / max(calls, 1), 3), + } + return status diff --git a/integration/data_flow_manager.py b/integration/data_flow_manager.py new file mode 100644 index 0000000..e5edf24 --- /dev/null +++ b/integration/data_flow_manager.py @@ -0,0 +1,128 @@ +"""Orchestrate DataOps data flows with lineage tracking.""" + +import logging +import time +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional +from uuid import uuid4 + +logger = logging.getLogger(__name__) + + +@dataclass +class DataFlow: + """Definition of a named data flow pipeline.""" + name: str + sources: List[str] + transformations: List[Callable] + sinks: List[str] + flow_id: str = field(default_factory=lambda: str(uuid4())) + created_at: datetime = field(default_factory=datetime.utcnow) + enabled: bool = True + + +class DataFlowManager: + """Register, execute, monitor, and trace DataOps flows.""" + + def __init__(self) -> None: + self._flows: Dict[str, DataFlow] = {} + # name -> list of execution records + self._exec_history: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + # name -> lineage graph {step -> [upstream_steps]} + self._lineage: Dict[str, Dict[str, List[str]]] = {} + logger.info("DataFlowManager initialised.") + + def register_flow(self, name: str, sources: List[str], + transformations: List[Callable], sinks: List[str]) -> DataFlow: + """Register a new data flow pipeline.""" + flow = DataFlow(name=name, sources=sources, + transformations=transformations, sinks=sinks) + self._flows[name] = flow + # Build static lineage graph + lineage: Dict[str, List[str]] = {} + prev = list(sources) + for i, transform in enumerate(transformations): + step = getattr(transform, "__name__", f"transform_{i}") + lineage[step] = list(prev) + prev = [step] + for sink in sinks: + lineage[sink] = list(prev) + self._lineage[name] = lineage + logger.info("Flow registered: %s (%d transforms, %d sinks)", name, len(transformations), len(sinks)) + return flow + + def execute_flow(self, name: str) -> Dict[str, Any]: + """Run a registered flow and return an execution report.""" + flow = self._flows.get(name) + if not flow: + return {"success": False, "error": f"Flow '{name}' not found."} + if not flow.enabled: + return {"success": False, "error": f"Flow '{name}' is paused."} + + t0 = time.monotonic() + rows_processed = 0 + errors: List[str] = [] + data: Any = None + + # Simulate ingestion + for source in flow.sources: + logger.debug("Ingesting from source: %s", source) + data = {"source": source, "rows": 100} # stub + rows_processed += 100 + + # Apply transformations + for transform in flow.transformations: + try: + data = transform(data) if data is not None else data + except Exception as exc: # noqa: BLE001 + errors.append(f"{getattr(transform, '__name__', 'transform')}: {exc}") + logger.warning("Transform error: %s", exc) + + # Simulate sink writing + for sink in flow.sinks: + logger.debug("Writing to sink: %s", sink) + + latency_ms = round((time.monotonic() - t0) * 1000, 2) + record = {"run_id": str(uuid4()), "flow": name, "success": not errors, + "rows_processed": rows_processed, "errors": errors, + "latency_ms": latency_ms, "executed_at": datetime.utcnow().isoformat()} + self._exec_history[name].append(record) + logger.info("Flow '%s' executed in %.1f ms, rows=%d, errors=%d", + name, latency_ms, rows_processed, len(errors)) + return record + + def monitor_flow(self, name: str) -> Dict[str, Any]: + """Return execution statistics for *name*.""" + history = self._exec_history.get(name, []) + if not history: + return {"flow": name, "executions": 0, "status": "never_run"} + latencies = [h["latency_ms"] for h in history] + success_count = sum(1 for h in history if h["success"]) + return { + "flow": name, + "executions": len(history), + "success_rate": round(success_count / len(history), 3), + "avg_latency_ms": round(sum(latencies) / len(latencies), 2), + "last_run": history[-1]["executed_at"], + "enabled": self._flows[name].enabled if name in self._flows else None, + } + + def get_lineage(self, name: str) -> Dict[str, Any]: + """Return the lineage graph for flow *name*.""" + if name not in self._lineage: + return {"error": f"No lineage recorded for flow '{name}'."} + return {"flow": name, "lineage": self._lineage[name]} + + def pause_flow(self, name: str) -> None: + """Disable a flow so it cannot be executed.""" + if name in self._flows: + self._flows[name].enabled = False + logger.info("Flow paused: %s", name) + + def resume_flow(self, name: str) -> None: + """Re-enable a previously paused flow.""" + if name in self._flows: + self._flows[name].enabled = True + logger.info("Flow resumed: %s", name) diff --git a/integration/devops_bridge.py b/integration/devops_bridge.py new file mode 100644 index 0000000..9b14b84 --- /dev/null +++ b/integration/devops_bridge.py @@ -0,0 +1,118 @@ +"""Bridge the AI platform with DevOps tooling (deployments, tests, metrics, scaling).""" + +import logging +import random +import time +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +HEALTHY_THRESHOLD = 0.9 # health score >= this → "healthy" +DEGRADED_THRESHOLD = 0.6 # health score >= this → "degraded" + + +class DevOpsBridge: + """Simulate DevOps operations: deployments, test runs, metric collection, scaling.""" + + def __init__(self) -> None: + # service -> {version, environment, deployed_at, replicas} + self._deployments: Dict[str, Dict[str, Any]] = {} + # service -> list of metric snapshots + self._metrics: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + # suite -> list of test results + self._test_results: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + logger.info("DevOpsBridge initialised.") + + def trigger_deployment(self, service: str, version: str, environment: str) -> Dict[str, Any]: + """Simulate deploying *service* at *version* to *environment*.""" + t0 = time.monotonic() + # Simulate deployment latency + success = random.random() > 0.05 # 95 % success rate + latency_ms = round((time.monotonic() - t0) * 1000 + random.uniform(200, 800), 2) + record = { + "service": service, "version": version, "environment": environment, + "success": success, "latency_ms": latency_ms, + "deployed_at": datetime.utcnow().isoformat(), + } + if success: + self._deployments[service] = record + logger.info("Deployed %s@%s to %s", service, version, environment) + else: + logger.error("Deployment failed: %s@%s → %s", service, version, environment) + return record + + def run_tests(self, suite_name: str) -> Dict[str, Any]: + """Execute a named test suite (simulated) and return results.""" + t0 = time.monotonic() + total = random.randint(20, 100) + failed = random.randint(0, max(1, total // 20)) + skipped = random.randint(0, max(1, total // 10)) + passed = total - failed - skipped + latency_ms = round((time.monotonic() - t0) * 1000 + random.uniform(500, 3000), 2) + result = { + "suite": suite_name, "total": total, "passed": passed, + "failed": failed, "skipped": skipped, + "success": failed == 0, "latency_ms": latency_ms, + "run_at": datetime.utcnow().isoformat(), + } + self._test_results[suite_name].append(result) + logger.info("Test suite '%s': %d/%d passed", suite_name, passed, total) + return result + + def collect_metrics(self, service: str) -> Dict[str, Any]: + """Simulate collecting runtime metrics for *service*.""" + snapshot = { + "service": service, + "cpu_pct": round(random.uniform(5, 85), 1), + "memory_pct": round(random.uniform(20, 90), 1), + "request_rate_rps": round(random.uniform(1, 500), 1), + "error_rate_pct": round(random.uniform(0, 5), 2), + "p99_latency_ms": round(random.uniform(50, 2000), 1), + "collected_at": datetime.utcnow().isoformat(), + } + self._metrics[service].append(snapshot) + logger.debug("Metrics collected for %s: cpu=%.1f%%", service, snapshot["cpu_pct"]) + return snapshot + + def check_health(self, service: str) -> Dict[str, Any]: + """Derive a health status from recent metrics for *service*.""" + recent = self._metrics.get(service, []) + if not recent: + # No data — collect now + self.collect_metrics(service) + recent = self._metrics[service] + + latest = recent[-1] + score = 1.0 + score -= max(0, (latest["cpu_pct"] - 70) / 100) + score -= max(0, (latest["memory_pct"] - 75) / 100) + score -= latest["error_rate_pct"] / 10 + score = round(max(0.0, min(1.0, score)), 3) + + if score >= HEALTHY_THRESHOLD: + status = "healthy" + elif score >= DEGRADED_THRESHOLD: + status = "degraded" + else: + status = "unhealthy" + + deployment = self._deployments.get(service, {}) + return { + "service": service, "status": status, "health_score": score, + "version": deployment.get("version", "unknown"), + "environment": deployment.get("environment", "unknown"), + "metrics": latest, + } + + def auto_scale(self, service: str, target_replicas: int) -> bool: + """Simulate a scaling operation for *service*.""" + target_replicas = max(1, min(target_replicas, 50)) + if service in self._deployments: + self._deployments[service]["replicas"] = target_replicas + logger.info("Auto-scaled %s to %d replicas.", service, target_replicas) + return True + logger.warning("Cannot scale unknown service: %s", service) + return False diff --git a/integration/human_centric_wrapper.py b/integration/human_centric_wrapper.py new file mode 100644 index 0000000..2f24f33 --- /dev/null +++ b/integration/human_centric_wrapper.py @@ -0,0 +1,142 @@ +"""Wrap AI responses with empathy, clarity, and personalisation layers.""" + +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, Optional + +logger = logging.getLogger(__name__) + +CLARITY_REPLACEMENTS = { + r"\bin order to\b": "to", + r"\bdue to the fact that\b": "because", + r"\bat this point in time\b": "now", + r"\bin the event that\b": "if", + r"\bfor the purpose of\b": "to", + r"\bit is important to note that\b": "note that", + r"\bplease be advised that\b": "", + r"\bkindly note\b": "note", +} + +EMPATHY_OPENERS: Dict[str, str] = { + "sadness": "I'm really sorry you're going through this. ", + "anger": "I completely understand your frustration. ", + "fear": "I can see this feels overwhelming — let's take it step by step. ", + "frustration": "I hear you — that's genuinely frustrating. ", + "joy": "That's wonderful to hear! ", + "excitement": "Love the enthusiasm! ", + "neutral": "Thanks for reaching out. ", +} + +QUALITY_CHECKS = { + "length_ok": lambda t: 20 <= len(t.split()) <= 500, + "no_jargon_overload": lambda t: len(re.findall(r"\b\w{15,}\b", t)) < 5, + "ends_with_punctuation": lambda t: bool(re.search(r"[.!?]$", t.strip())), + "no_repeated_sentences": lambda t: len(set(re.split(r"[.!?]+", t))) > 1, +} + + +@dataclass +class HumanCentricRequest: + """An inbound request to the human-centric wrapper.""" + raw_response: str + user_id: str + context: Dict[str, Any] = field(default_factory=dict) + request_id: str = field(default_factory=lambda: __import__("uuid").uuid4().hex) + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class HumanCentricResponse: + """An enriched, empathy-layered response ready for delivery.""" + original: str + enhanced: str + user_id: str + quality_scores: Dict[str, bool] = field(default_factory=dict) + empathy_added: bool = False + clarity_improved: bool = False + personalised: bool = False + produced_at: datetime = field(default_factory=datetime.utcnow) + + +class HumanCentricWrapper: + """Add empathy, clarity, and personalisation to raw AI-generated responses.""" + + def __init__(self) -> None: + self._user_names: Dict[str, str] = {} # user_id -> display name + self._clarity_patterns = [ + (re.compile(p, re.IGNORECASE), r) + for p, r in CLARITY_REPLACEMENTS.items() + ] + logger.info("HumanCentricWrapper initialised.") + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def wrap_response(self, raw_response: str, user_id: str, + context: Optional[Dict[str, Any]] = None) -> HumanCentricResponse: + """Apply the full enhancement pipeline to *raw_response*.""" + context = context or {} + text = raw_response + + clarity_before = text + text = self.ensure_clarity(text) + clarity_improved = text != clarity_before + + emotion = context.get("detected_emotion", "neutral") + empathy_before = text + text = self.add_empathy_layer(text, emotion) + empathy_added = text != empathy_before + + personalised_before = text + text = self.personalize(text, user_id) + personalised = text != personalised_before + + quality = self.assess_response_quality(text) + logger.debug("Wrapped response for user=%s clarity=%s empathy=%s", + user_id, clarity_improved, empathy_added) + return HumanCentricResponse( + original=raw_response, enhanced=text, user_id=user_id, + quality_scores=quality, empathy_added=empathy_added, + clarity_improved=clarity_improved, personalised=personalised, + ) + + def assess_response_quality(self, response: str) -> Dict[str, bool]: + """Run all quality checks and return a dict of {check_name: passed}.""" + return {name: check(response) for name, check in QUALITY_CHECKS.items()} + + def add_empathy_layer(self, text: str, emotion: str) -> str: + """Prepend an emotion-appropriate opener to *text*.""" + opener = EMPATHY_OPENERS.get(emotion, EMPATHY_OPENERS["neutral"]) + if not opener: + return text + # Avoid double empathy openers + if any(text.startswith(o.strip()) for o in EMPATHY_OPENERS.values() if o.strip()): + return text + return opener + text + + def ensure_clarity(self, text: str) -> str: + """Remove filler phrases and simplify wordy constructions.""" + result = text + for pattern, replacement in self._clarity_patterns: + result = pattern.sub(replacement, result) + # Collapse multiple spaces left by empty replacements + result = re.sub(r" {2,}", " ", result).strip() + # Capitalise first letter + if result and result[0].islower(): + result = result[0].upper() + result[1:] + return result + + def personalize(self, text: str, user_id: str) -> str: + """Insert the user's display name if known.""" + name = self._user_names.get(user_id) + if name and f"{name}," not in text and text and not text.startswith(name): + return f"{name}, {text[0].lower()}{text[1:]}" if text else text + return text + + def register_user_name(self, user_id: str, name: str) -> None: + """Store a display name for personalised greetings.""" + self._user_names[user_id] = name + logger.debug("Registered display name for user=%s", user_id) diff --git a/integration/ml_pipeline_connector.py b/integration/ml_pipeline_connector.py new file mode 100644 index 0000000..8aa9d12 --- /dev/null +++ b/integration/ml_pipeline_connector.py @@ -0,0 +1,110 @@ +"""Connect MLOps components into a unified inference and training pipeline.""" + +import logging +import time +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +class PipelineConnector: + """Bridge between ML registries, feature stores, monitors, and runtime pipelines.""" + + def __init__(self) -> None: + self._registry: Optional[Any] = None + self._feature_store: Optional[Any] = None + self._monitor: Optional[Any] = None + self._training_history: List[Dict[str, Any]] = [] + self._inference_cache: Dict[str, Any] = {} + logger.info("PipelineConnector initialised.") + + # ------------------------------------------------------------------ + # Connection helpers + # ------------------------------------------------------------------ + + def connect_registry(self, registry: Any) -> None: + """Attach a model registry (must expose .get_model(name) and .list_models()).""" + self._registry = registry + logger.info("Model registry connected: %s", type(registry).__name__) + + def connect_feature_store(self, store: Any) -> None: + """Attach a feature store (must expose .get_features(keys)).""" + self._feature_store = store + logger.info("Feature store connected: %s", type(store).__name__) + + def connect_monitor(self, monitor: Any) -> None: + """Attach a model monitor (must expose .record(model_name, prediction, latency)).""" + self._monitor = monitor + logger.info("Monitor connected: %s", type(monitor).__name__) + + # ------------------------------------------------------------------ + # Core pipeline operations + # ------------------------------------------------------------------ + + def run_inference_pipeline(self, input_data: Dict[str, Any], model_name: str) -> Dict[str, Any]: + """ + Execute a full inference pipeline: + 1. Optionally enrich input from the feature store. + 2. Fetch the model from the registry. + 3. Run prediction (or return a stub if no real model). + 4. Record results with the monitor. + """ + t0 = time.monotonic() + cache_key = f"{model_name}:{hash(str(input_data))}" + if cache_key in self._inference_cache: + logger.debug("Cache hit for model=%s", model_name) + return {**self._inference_cache[cache_key], "cached": True} + + # Feature enrichment + features = dict(input_data) + if self._feature_store is not None: + try: + extra = self._feature_store.get_features(list(input_data.keys())) + features.update(extra or {}) + except Exception as exc: # noqa: BLE001 + logger.warning("Feature store enrichment failed: %s", exc) + + # Model invocation (stub if no registry) + prediction: Any + if self._registry is not None: + try: + model = self._registry.get_model(model_name) + prediction = model.predict(features) if hasattr(model, "predict") else str(features) + except Exception as exc: # noqa: BLE001 + logger.error("Model inference failed: %s", exc) + return {"success": False, "error": str(exc), "model": model_name} + else: + prediction = {"stub_prediction": sum(v for v in features.values() if isinstance(v, (int, float)))} + + latency_ms = round((time.monotonic() - t0) * 1000, 2) + + if self._monitor is not None: + try: + self._monitor.record(model_name, prediction, latency_ms) + except Exception as exc: # noqa: BLE001 + logger.warning("Monitor recording failed: %s", exc) + + result = {"model": model_name, "prediction": prediction, + "latency_ms": latency_ms, "success": True, "cached": False} + self._inference_cache[cache_key] = result + return result + + def trigger_training(self, trigger_reason: str) -> Dict[str, Any]: + """Log a training trigger event (scheduling is handled externally).""" + event = {"reason": trigger_reason, "triggered_at": datetime.utcnow().isoformat(), + "status": "queued"} + self._training_history.append(event) + logger.info("Training triggered: reason=%s", trigger_reason) + return event + + def health_check(self) -> Dict[str, Any]: + """Return connectivity status for all attached components.""" + return { + "registry": "connected" if self._registry else "disconnected", + "feature_store": "connected" if self._feature_store else "disconnected", + "monitor": "connected" if self._monitor else "disconnected", + "cache_size": len(self._inference_cache), + "training_events": len(self._training_history), + "checked_at": datetime.utcnow().isoformat(), + } diff --git a/integration/neural_net_inference.py b/integration/neural_net_inference.py new file mode 100644 index 0000000..13eba35 --- /dev/null +++ b/integration/neural_net_inference.py @@ -0,0 +1,152 @@ +"""Serve neural network model predictions with caching, batching, and health checks.""" + +import logging +import time +from collections import OrderedDict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional +from uuid import uuid4 + +logger = logging.getLogger(__name__) + +CACHE_MAX_SIZE = 256 + + +@dataclass +class InferenceRequest: + """A single prediction request.""" + model_name: str + inputs: Dict[str, Any] + request_id: str = field(default_factory=lambda: str(uuid4())) + created_at: datetime = field(default_factory=datetime.utcnow) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class InferenceResult: + """The prediction output from a model.""" + request_id: str + model_name: str + prediction: Any + confidence: float + latency_ms: float + cached: bool = False + error: Optional[str] = None + produced_at: datetime = field(default_factory=datetime.utcnow) + + +class NeuralNetInference: + """Register callable model wrappers and serve inference with LRU caching.""" + + def __init__(self, cache_size: int = CACHE_MAX_SIZE) -> None: + # name -> callable(inputs: dict) -> (prediction, confidence float) + self._models: Dict[str, Callable] = {} + self._model_meta: Dict[str, Dict[str, Any]] = {} + self._cache: OrderedDict = OrderedDict() + self._cache_size = cache_size + self._call_stats: Dict[str, Dict[str, Any]] = {} + logger.info("NeuralNetInference initialised (cache_size=%d).", cache_size) + + def register_model(self, name: str, model: Callable, + version: str = "1.0", description: str = "") -> None: + """Register a model callable under *name*.""" + self._models[name] = model + self._model_meta[name] = { + "name": name, "version": version, "description": description, + "registered_at": datetime.utcnow().isoformat(), + } + self._call_stats[name] = {"calls": 0, "errors": 0, "total_latency_ms": 0.0} + logger.info("Model registered: %s v%s", name, version) + + def _cache_key(self, request: InferenceRequest) -> str: + return f"{request.model_name}:{hash(str(sorted(request.inputs.items())))}" + + def cache_result(self, key: str, result: InferenceResult) -> None: + """Store a result in the LRU cache, evicting oldest if full.""" + if len(self._cache) >= self._cache_size: + self._cache.popitem(last=False) + self._cache[key] = result + + def infer(self, request: InferenceRequest) -> InferenceResult: + """Run inference for a single request, using cache when possible.""" + key = self._cache_key(request) + if key in self._cache: + cached = self._cache[key] + self._cache.move_to_end(key) + logger.debug("Cache hit: model=%s", request.model_name) + return InferenceResult(request_id=request.request_id, + model_name=request.model_name, + prediction=cached.prediction, + confidence=cached.confidence, + latency_ms=0.0, cached=True) + + model = self._models.get(request.model_name) + stats = self._call_stats.get(request.model_name, {"calls": 0, "errors": 0, "total_latency_ms": 0.0}) + stats["calls"] += 1 + t0 = time.monotonic() + + if model is None: + stats["errors"] += 1 + latency = round((time.monotonic() - t0) * 1000, 2) + return InferenceResult(request_id=request.request_id, + model_name=request.model_name, + prediction=None, confidence=0.0, + latency_ms=latency, error=f"Model '{request.model_name}' not registered.") + + try: + raw = model(request.inputs) + if isinstance(raw, tuple) and len(raw) == 2: + prediction, confidence = raw + else: + prediction, confidence = raw, 0.9 + except Exception as exc: # noqa: BLE001 + stats["errors"] += 1 + latency = round((time.monotonic() - t0) * 1000, 2) + logger.exception("Inference error for model=%s", request.model_name) + return InferenceResult(request_id=request.request_id, + model_name=request.model_name, + prediction=None, confidence=0.0, + latency_ms=latency, error=str(exc)) + + latency = round((time.monotonic() - t0) * 1000, 2) + stats["total_latency_ms"] += latency + result = InferenceResult(request_id=request.request_id, + model_name=request.model_name, + prediction=prediction, confidence=float(confidence), + latency_ms=latency) + self.cache_result(key, result) + return result + + def batch_infer(self, requests: List[InferenceRequest]) -> List[InferenceResult]: + """Run inference for a batch of requests and return results in order.""" + return [self.infer(req) for req in requests] + + def get_model_info(self, name: str) -> Dict[str, Any]: + """Return metadata and runtime stats for a registered model.""" + if name not in self._model_meta: + return {"error": f"Model '{name}' not registered."} + stats = self._call_stats.get(name, {}) + calls = stats.get("calls", 0) + avg_lat = stats["total_latency_ms"] / calls if calls else 0.0 + return {**self._model_meta[name], + "calls": calls, "errors": stats.get("errors", 0), + "avg_latency_ms": round(avg_lat, 2)} + + def health_check(self) -> Dict[str, Any]: + """Return overall system health and per-model error rates.""" + model_health = {} + for name in self._models: + info = self.get_model_info(name) + calls = info.get("calls", 0) + errors = info.get("errors", 0) + model_health[name] = { + "status": "degraded" if (calls > 0 and errors / calls > 0.1) else "healthy", + "error_rate": round(errors / max(calls, 1), 3), + } + return { + "status": "ok", "registered_models": len(self._models), + "cache_entries": len(self._cache), + "models": model_health, + "checked_at": datetime.utcnow().isoformat(), + } diff --git a/integration/nlu_nlp_router.py b/integration/nlu_nlp_router.py new file mode 100644 index 0000000..0fa55fc --- /dev/null +++ b/integration/nlu_nlp_router.py @@ -0,0 +1,135 @@ +"""Route NLP/NLU requests and expose lightweight intent, entity, and text pipelines.""" + +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + +# ------------------------------------------------------------------ +# Intent patterns (regex-based lightweight NLU) +# ------------------------------------------------------------------ +INTENT_PATTERNS: Dict[str, List[str]] = { + "question": [r"\?$", r"^(what|who|where|when|why|how|which|is|are|do|does|can|could|would)\b"], + "command": [r"^(please\s+)?(show|tell|give|find|list|create|delete|update|run|start|stop|get)\b"], + "sentiment_feedback": [r"\b(love|hate|like|dislike|great|terrible|good|bad|awful|amazing)\b"], + "help_request": [r"\b(help|assist|support|guide|explain|how to|what is)\b"], + "greeting": [r"^(hi|hello|hey|good morning|good afternoon|good evening|greetings)\b"], + "farewell": [r"^(bye|goodbye|see you|take care|thanks|thank you|cheers)\b"], + "complaint": [r"\b(broken|not working|error|bug|issue|problem|fail|crash|wrong)\b"], + "confirmation": [r"^(yes|no|ok|okay|sure|of course|absolutely|correct|right|wrong)\b"], +} + +ENTITY_PATTERNS: Dict[str, re.Pattern] = { + "email": re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}"), + "url": re.compile(r"https?://[^\s]+"), + "number": re.compile(r"\b\d+(?:\.\d+)?\b"), + "date": re.compile(r"\b(\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\w+ \d{1,2},? \d{4})\b"), + "mention": re.compile(r"@\w+"), + "hashtag": re.compile(r"#\w+"), +} + +NLU_TASKS = {"intent", "entity", "sentiment"} +NLP_TASKS = {"generate", "summarize", "translate", "qa"} + + +@dataclass +class RoutingDecision: + """The result of routing a text request.""" + pipeline: str # "nlu" or "nlp" + task: str + confidence: float + reasoning: str + timestamp: datetime = field(default_factory=datetime.utcnow) + + +class NLUNLPRouter: + """Classify and route text to the NLU or NLP pipeline.""" + + def __init__(self) -> None: + self._intent_compiled = [ + (intent, [re.compile(p, re.IGNORECASE) for p in patterns]) + for intent, patterns in INTENT_PATTERNS.items() + ] + logger.info("NLUNLPRouter initialised.") + + def classify_request_type(self, text: str) -> str: + """Return the primary request type string from INTENT_PATTERNS keys.""" + scores: Dict[str, int] = {} + for intent, patterns in self._intent_compiled: + hits = sum(1 for p in patterns if p.search(text)) + if hits: + scores[intent] = hits + return max(scores, key=lambda k: scores[k]) if scores else "statement" + + def route(self, text: str, context: Optional[Dict] = None) -> RoutingDecision: + """Decide whether the text should go to NLU or NLP and specify the task.""" + context = context or {} + request_type = self.classify_request_type(text) + word_count = len(text.split()) + + # Long text → NLP (summarization / generation) + if word_count > 80: + return RoutingDecision(pipeline="nlp", task="summarize", + confidence=0.82, reasoning="Long text → summarization pipeline.") + + # Explicit task hint from context + if context.get("task") in NLP_TASKS: + return RoutingDecision(pipeline="nlp", task=context["task"], + confidence=0.95, reasoning="Explicit task in context.") + + if request_type in ("question", "help_request", "command"): + return RoutingDecision(pipeline="nlu", task="intent", + confidence=0.88, reasoning=f"Request type '{request_type}' → NLU.") + + if request_type == "sentiment_feedback": + return RoutingDecision(pipeline="nlu", task="sentiment", + confidence=0.85, reasoning="Sentiment feedback → NLU.") + + return RoutingDecision(pipeline="nlu", task="entity", + confidence=0.65, reasoning="Default → NLU entity extraction.") + + def nlu_pipeline(self, text: str) -> Dict[str, Any]: + """Run intent detection, entity extraction, and sentiment analysis.""" + intent = self.classify_request_type(text) + entities: Dict[str, List[str]] = {} + for label, pattern in ENTITY_PATTERNS.items(): + matches = pattern.findall(text) + if matches: + entities[label] = matches + + # Naive sentiment: count positive/negative words + positive = len(re.findall(r"\b(good|great|love|excellent|amazing|happy|yes|thanks)\b", text, re.I)) + negative = len(re.findall(r"\b(bad|terrible|hate|error|broken|fail|wrong|no)\b", text, re.I)) + if positive > negative: + sentiment, sentiment_score = "positive", round(min(positive / 5, 1.0), 2) + elif negative > positive: + sentiment, sentiment_score = "negative", round(-min(negative / 5, 1.0), 2) + else: + sentiment, sentiment_score = "neutral", 0.0 + + return {"intent": intent, "entities": entities, + "sentiment": sentiment, "sentiment_score": sentiment_score} + + def nlp_pipeline(self, text: str, task: str = "summarize") -> Dict[str, Any]: + """Run a basic NLP task: summarize, generate, translate stub, or QA stub.""" + if task == "summarize": + sentences = re.split(r"(?<=[.!?])\s+", text.strip()) + summary = " ".join(sentences[:2]) if len(sentences) > 2 else text + return {"task": "summarize", "output": summary, "input_length": len(text.split())} + + if task == "generate": + continuation = text.strip().rstrip(".") + " — and that opens exciting new possibilities." + return {"task": "generate", "output": continuation} + + if task == "translate": + return {"task": "translate", "output": f"[Translation placeholder for: {text[:60]}...]", + "note": "Integrate an external translation API for real translation."} + + if task == "qa": + return {"task": "qa", "output": f"Based on the context, the answer relates to: {text[:80]}", + "confidence": 0.5} + + return {"task": task, "output": text, "note": "Unknown NLP task — returning input unchanged."} diff --git a/mlops/__init__.py b/mlops/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mlops/ab_testing.py b/mlops/ab_testing.py new file mode 100644 index 0000000..ee3f97a --- /dev/null +++ b/mlops/ab_testing.py @@ -0,0 +1,252 @@ +"""Model A/B testing framework.""" +from __future__ import annotations + +import hashlib +import logging +import statistics +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class Variant: + name: str + model_name: str + model_version: str + traffic_weight: float = 0.5 # 0.0 to 1.0 + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class Experiment: + experiment_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + description: str = "" + variants: List[Variant] = field(default_factory=list) + status: str = "draft" # draft | running | paused | completed + start_time: Optional[datetime] = None + end_time: Optional[datetime] = None + success_metric: str = "accuracy" + minimum_samples: int = 100 + significance_level: float = 0.05 + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class Observation: + experiment_id: str + variant_name: str + entity_id: str + outcome: float # e.g. 1.0 for success, 0.0 for failure + metadata: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class VariantStats: + variant_name: str + sample_count: int = 0 + mean_outcome: float = 0.0 + std_outcome: float = 0.0 + conversion_rate: float = 0.0 + confidence_interval: Tuple[float, float] = (0.0, 0.0) + + +@dataclass +class ExperimentResult: + experiment_id: str = "" + status: str = "" + variant_stats: List[VariantStats] = field(default_factory=list) + winner: Optional[str] = None + p_value: float = 1.0 + is_significant: bool = False + recommendation: str = "" + computed_at: datetime = field(default_factory=datetime.utcnow) + + +def _welch_t_statistic(a: List[float], b: List[float]) -> Tuple[float, float]: + """Compute approximate Welch t-test p-value (2-sided).""" + import math + if len(a) < 2 or len(b) < 2: + return 0.0, 1.0 + mean_a, mean_b = statistics.mean(a), statistics.mean(b) + var_a = statistics.variance(a) + var_b = statistics.variance(b) + se = math.sqrt(var_a / len(a) + var_b / len(b)) + if se == 0: + return 0.0, 1.0 + t = (mean_a - mean_b) / se + # Approximate p-value using normal distribution (z-test for large samples) + from math import erfc, sqrt + p_value = erfc(abs(t) / sqrt(2)) + return t, p_value + + +def _confidence_interval_95(values: List[float]) -> Tuple[float, float]: + """95% confidence interval using t-distribution approximation.""" + import math + if len(values) < 2: + m = values[0] if values else 0.0 + return m, m + mean = statistics.mean(values) + se = statistics.stdev(values) / math.sqrt(len(values)) + margin = 1.96 * se + return mean - margin, mean + margin + + +class TrafficSplitter: + """Deterministic traffic routing by entity ID hashing.""" + + def assign_variant(self, entity_id: str, variants: List[Variant]) -> Variant: + """Deterministically route entity to a variant based on hash.""" + h = int(hashlib.md5(entity_id.encode()).hexdigest(), 16) % 1000 / 1000.0 + cumulative = 0.0 + total_weight = sum(v.traffic_weight for v in variants) + for variant in variants: + cumulative += variant.traffic_weight / total_weight + if h < cumulative: + return variant + return variants[-1] + + +class ABTesting: + """ + A/B testing framework for comparing ML models with statistical + significance testing, traffic splitting, and experiment lifecycle management. + """ + + def __init__(self) -> None: + self._experiments: Dict[str, Experiment] = {} + self._observations: Dict[str, List[Observation]] = {} # experiment_id -> observations + self._splitter = TrafficSplitter() + logger.info("ABTesting framework initialized") + + def create_experiment(self, name: str, control: Variant, treatment: Variant, + description: str = "", + success_metric: str = "accuracy", + minimum_samples: int = 100) -> Experiment: + exp = Experiment( + name=name, + description=description, + variants=[control, treatment], + success_metric=success_metric, + minimum_samples=minimum_samples, + ) + # Normalize weights + total = sum(v.traffic_weight for v in exp.variants) + for v in exp.variants: + v.traffic_weight = v.traffic_weight / total + self._experiments[exp.experiment_id] = exp + self._observations[exp.experiment_id] = [] + logger.info("Created experiment '%s' (%s)", name, exp.experiment_id) + return exp + + def start(self, experiment_id: str) -> bool: + exp = self._experiments.get(experiment_id) + if not exp or exp.status not in ("draft", "paused"): + return False + exp.status = "running" + exp.start_time = datetime.utcnow() + logger.info("Started experiment '%s'", exp.name) + return True + + def pause(self, experiment_id: str) -> bool: + exp = self._experiments.get(experiment_id) + if not exp or exp.status != "running": + return False + exp.status = "paused" + return True + + def route(self, experiment_id: str, entity_id: str) -> Optional[Variant]: + """Route an entity to a variant for an active experiment.""" + exp = self._experiments.get(experiment_id) + if not exp or exp.status != "running": + return None + return self._splitter.assign_variant(entity_id, exp.variants) + + def record_outcome(self, experiment_id: str, entity_id: str, + variant_name: str, outcome: float, + metadata: Optional[Dict[str, Any]] = None) -> None: + obs = Observation(experiment_id=experiment_id, variant_name=variant_name, + entity_id=entity_id, outcome=outcome, metadata=metadata or {}) + self._observations.setdefault(experiment_id, []).append(obs) + + def analyze(self, experiment_id: str) -> ExperimentResult: + exp = self._experiments.get(experiment_id) + if not exp: + return ExperimentResult(experiment_id=experiment_id, status="not_found") + + observations = self._observations.get(experiment_id, []) + variant_outcomes: Dict[str, List[float]] = {v.name: [] for v in exp.variants} + for obs in observations: + if obs.variant_name in variant_outcomes: + variant_outcomes[obs.variant_name].append(obs.outcome) + + variant_stats: List[VariantStats] = [] + for variant_name, outcomes in variant_outcomes.items(): + if not outcomes: + variant_stats.append(VariantStats(variant_name=variant_name)) + continue + mean = statistics.mean(outcomes) + std = statistics.stdev(outcomes) if len(outcomes) > 1 else 0.0 + ci = _confidence_interval_95(outcomes) + variant_stats.append(VariantStats( + variant_name=variant_name, + sample_count=len(outcomes), + mean_outcome=mean, + std_outcome=std, + conversion_rate=sum(1 for o in outcomes if o > 0.5) / len(outcomes), + confidence_interval=ci, + )) + + # Statistical significance test + p_value = 1.0 + winner = None + is_significant = False + + if len(exp.variants) >= 2: + v1_outcomes = variant_outcomes.get(exp.variants[0].name, []) + v2_outcomes = variant_outcomes.get(exp.variants[1].name, []) + min_samples = exp.minimum_samples + + if len(v1_outcomes) >= min_samples and len(v2_outcomes) >= min_samples: + _, p_value = _welch_t_statistic(v1_outcomes, v2_outcomes) + is_significant = p_value < exp.significance_level + if is_significant: + means = {v.variant_name: v.mean_outcome for v in variant_stats} + winner = max(means, key=lambda n: means[n]) + + n_total = sum(len(o) for o in variant_outcomes.values()) + if n_total < exp.minimum_samples: + recommendation = f"Collect more data (have {n_total}, need {exp.minimum_samples})." + elif is_significant and winner: + recommendation = f"Promote '{winner}' — statistically significant improvement (p={p_value:.4f})." + else: + recommendation = f"No significant difference detected (p={p_value:.4f}). Continue collecting." + + return ExperimentResult( + experiment_id=experiment_id, + status=exp.status, + variant_stats=variant_stats, + winner=winner, + p_value=p_value, + is_significant=is_significant, + recommendation=recommendation, + ) + + def list_experiments(self, status: Optional[str] = None) -> List[Experiment]: + if status: + return [e for e in self._experiments.values() if e.status == status] + return list(self._experiments.values()) + + def complete(self, experiment_id: str) -> bool: + exp = self._experiments.get(experiment_id) + if not exp: + return False + exp.status = "completed" + exp.end_time = datetime.utcnow() + return True diff --git a/mlops/auto_retrain.py b/mlops/auto_retrain.py new file mode 100644 index 0000000..4e06f89 --- /dev/null +++ b/mlops/auto_retrain.py @@ -0,0 +1,214 @@ +"""Automated model retraining triggers.""" +from __future__ import annotations + +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class RetrainTrigger: + trigger_id: str = field(default_factory=lambda: str(uuid.uuid4())) + model_name: str = "" + trigger_type: str = "" # "drift" | "performance" | "schedule" | "data_volume" + condition: str = "" + threshold: float = 0.0 + enabled: bool = True + last_triggered: Optional[datetime] = None + cooldown_hours: int = 24 + + +@dataclass +class RetrainJob: + job_id: str = field(default_factory=lambda: str(uuid.uuid4())) + model_name: str = "" + trigger_id: str = "" + trigger_type: str = "" + status: str = "queued" # queued | running | completed | failed | cancelled + reason: str = "" + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + metrics_before: Dict[str, float] = field(default_factory=dict) + metrics_after: Dict[str, float] = field(default_factory=dict) + new_version: Optional[str] = None + metadata: Dict[str, Any] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class AutoRetrainConfig: + drift_threshold: float = 0.15 + performance_threshold: float = 0.75 + min_new_samples: int = 500 + schedule_hours: int = 168 # weekly by default + max_concurrent_jobs: int = 2 + auto_promote: bool = False + + +class TriggerEvaluator: + """Evaluates whether retraining conditions are met.""" + + def evaluate_drift(self, trigger: RetrainTrigger, drift_score: float) -> bool: + return trigger.enabled and drift_score >= trigger.threshold + + def evaluate_performance(self, trigger: RetrainTrigger, metric_value: float) -> bool: + return trigger.enabled and metric_value <= trigger.threshold + + def evaluate_schedule(self, trigger: RetrainTrigger) -> bool: + if not trigger.enabled: + return False + if trigger.last_triggered is None: + return True + elapsed = datetime.utcnow() - trigger.last_triggered + return elapsed >= timedelta(hours=trigger.cooldown_hours) + + def evaluate_data_volume(self, trigger: RetrainTrigger, new_sample_count: int) -> bool: + return trigger.enabled and new_sample_count >= trigger.threshold + + def is_on_cooldown(self, trigger: RetrainTrigger) -> bool: + if trigger.last_triggered is None: + return False + elapsed = datetime.utcnow() - trigger.last_triggered + return elapsed < timedelta(hours=trigger.cooldown_hours) + + +class AutoRetrain: + """ + Automated retraining system that monitors model health signals + and triggers retraining pipelines based on configurable rules. + """ + + def __init__(self, config: Optional[AutoRetrainConfig] = None) -> None: + self._config = config or AutoRetrainConfig() + self._triggers: Dict[str, List[RetrainTrigger]] = {} # model_name -> triggers + self._jobs: List[RetrainJob] = [] + self._evaluator = TriggerEvaluator() + self._retraining_handlers: Dict[str, Callable] = {} + logger.info("AutoRetrain initialized") + + def register_model(self, model_name: str) -> None: + if model_name not in self._triggers: + self._triggers[model_name] = [] + self._create_default_triggers(model_name) + logger.info("Registered model '%s' for auto-retrain", model_name) + + def _create_default_triggers(self, model_name: str) -> None: + self._triggers[model_name].extend([ + RetrainTrigger(model_name=model_name, trigger_type="drift", + condition="drift_score >= threshold", + threshold=self._config.drift_threshold), + RetrainTrigger(model_name=model_name, trigger_type="performance", + condition="accuracy <= threshold", + threshold=self._config.performance_threshold), + RetrainTrigger(model_name=model_name, trigger_type="schedule", + condition="elapsed_hours >= cooldown_hours", + threshold=0.0, cooldown_hours=self._config.schedule_hours), + RetrainTrigger(model_name=model_name, trigger_type="data_volume", + condition="new_samples >= threshold", + threshold=float(self._config.min_new_samples)), + ]) + + def add_trigger(self, trigger: RetrainTrigger) -> None: + self._triggers.setdefault(trigger.model_name, []).append(trigger) + + def register_handler(self, model_name: str, handler: Callable) -> None: + """Register a callable that performs the actual retraining.""" + self._retraining_handlers[model_name] = handler + + def check_and_trigger(self, model_name: str, + drift_score: Optional[float] = None, + performance_metric: Optional[float] = None, + new_sample_count: Optional[int] = None) -> Optional[RetrainJob]: + triggers = self._triggers.get(model_name, []) + reason = "" + triggered_trigger: Optional[RetrainTrigger] = None + + for trigger in triggers: + if self._evaluator.is_on_cooldown(trigger): + continue + fired = False + if trigger.trigger_type == "drift" and drift_score is not None: + fired = self._evaluator.evaluate_drift(trigger, drift_score) + if fired: + reason = f"Drift score {drift_score:.3f} >= {trigger.threshold:.3f}" + elif trigger.trigger_type == "performance" and performance_metric is not None: + fired = self._evaluator.evaluate_performance(trigger, performance_metric) + if fired: + reason = f"Performance {performance_metric:.3f} <= {trigger.threshold:.3f}" + elif trigger.trigger_type == "schedule": + fired = self._evaluator.evaluate_schedule(trigger) + if fired: + reason = "Scheduled retraining interval reached" + elif trigger.trigger_type == "data_volume" and new_sample_count is not None: + fired = self._evaluator.evaluate_data_volume(trigger, new_sample_count) + if fired: + reason = f"New samples {new_sample_count} >= {int(trigger.threshold)}" + + if fired: + triggered_trigger = trigger + break + + if not triggered_trigger: + return None + + # Check concurrent job limit + running = [j for j in self._jobs if j.status == "running"] + if len(running) >= self._config.max_concurrent_jobs: + logger.warning("Max concurrent jobs (%d) reached for '%s'", + self._config.max_concurrent_jobs, model_name) + return None + + job = RetrainJob( + model_name=model_name, + trigger_id=triggered_trigger.trigger_id, + trigger_type=triggered_trigger.trigger_type, + reason=reason, + ) + self._jobs.append(job) + triggered_trigger.last_triggered = datetime.utcnow() + logger.info("Retrain triggered for '%s': %s", model_name, reason) + self._execute_job(job) + return job + + def _execute_job(self, job: RetrainJob) -> None: + job.status = "running" + job.started_at = datetime.utcnow() + handler = self._retraining_handlers.get(job.model_name) + try: + if handler: + result = handler(job.model_name) + job.new_version = result.get("new_version") if isinstance(result, dict) else None + job.metrics_after = result.get("metrics", {}) if isinstance(result, dict) else {} + else: + logger.info("[Simulated] Retraining '%s'", job.model_name) + job.metrics_after = {"accuracy": 0.92, "f1": 0.91} + job.new_version = "auto" + job.status = "completed" + except Exception as exc: + job.status = "failed" + job.metadata["error"] = str(exc) + logger.error("Retrain job failed for '%s': %s", job.model_name, exc) + finally: + job.finished_at = datetime.utcnow() + + def get_jobs(self, model_name: Optional[str] = None, + status: Optional[str] = None) -> List[RetrainJob]: + jobs = self._jobs + if model_name: + jobs = [j for j in jobs if j.model_name == model_name] + if status: + jobs = [j for j in jobs if j.status == status] + return jobs + + def stats(self) -> Dict[str, Any]: + from collections import Counter + status_counts = Counter(j.status for j in self._jobs) + return { + "total_jobs": len(self._jobs), + "by_status": dict(status_counts), + "registered_models": list(self._triggers.keys()), + } diff --git a/mlops/drift_detector.py b/mlops/drift_detector.py new file mode 100644 index 0000000..e0101bf --- /dev/null +++ b/mlops/drift_detector.py @@ -0,0 +1,230 @@ +"""Data and model drift detection.""" +from __future__ import annotations + +import logging +import math +import statistics +from collections import Counter +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class DriftReport: + feature_name: str = "" + drift_score: float = 0.0 + drift_type: str = "" # "covariate" | "label" | "concept" | "none" + is_drifted: bool = False + threshold: float = 0.1 + test_statistic: float = 0.0 + test_name: str = "" + reference_stats: Dict[str, float] = field(default_factory=dict) + current_stats: Dict[str, float] = field(default_factory=dict) + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class DriftSummary: + drifted_features: List[str] = field(default_factory=list) + stable_features: List[str] = field(default_factory=list) + overall_drift_score: float = 0.0 + reports: List[DriftReport] = field(default_factory=list) + recommendation: str = "" + created_at: datetime = field(default_factory=datetime.utcnow) + + +def _descriptive_stats(values: List[float]) -> Dict[str, float]: + if not values: + return {} + return { + "mean": statistics.mean(values), + "std": statistics.stdev(values) if len(values) > 1 else 0.0, + "min": min(values), + "max": max(values), + "median": statistics.median(values), + "count": float(len(values)), + } + + +def _ks_statistic(reference: List[float], current: List[float]) -> float: + """Kolmogorov-Smirnov statistic (max absolute CDF difference).""" + if not reference or not current: + return 0.0 + all_values = sorted(set(reference + current)) + ref_sorted = sorted(reference) + cur_sorted = sorted(current) + n_ref, n_cur = len(ref_sorted), len(cur_sorted) + max_diff = 0.0 + for val in all_values: + cdf_ref = sum(1 for v in ref_sorted if v <= val) / n_ref + cdf_cur = sum(1 for v in cur_sorted if v <= val) / n_cur + max_diff = max(max_diff, abs(cdf_ref - cdf_cur)) + return max_diff + + +def _psi_score(reference: List[float], current: List[float], n_bins: int = 10) -> float: + """Population Stability Index (PSI).""" + if not reference or not current: + return 0.0 + min_v = min(min(reference), min(current)) + max_v = max(max(reference), max(current)) + if max_v == min_v: + return 0.0 + bin_width = (max_v - min_v) / n_bins + boundaries = [min_v + i * bin_width for i in range(n_bins + 1)] + + def bin_counts(values: List[float]) -> List[float]: + counts = [0] * n_bins + for v in values: + idx = min(int((v - min_v) / bin_width), n_bins - 1) + counts[idx] += 1 + total = len(values) + return [(c + 0.0001) / total for c in counts] + + ref_pct = bin_counts(reference) + cur_pct = bin_counts(current) + psi = sum((c - r) * math.log(c / r) for r, c in zip(ref_pct, cur_pct)) + return psi + + +def _chi_square_drift(ref_labels: List[Any], cur_labels: List[Any]) -> float: + """Chi-square statistic normalized as drift score for categorical data.""" + ref_counts = Counter(ref_labels) + cur_counts = Counter(cur_labels) + all_cats = set(ref_counts) | set(cur_counts) + n_ref, n_cur = len(ref_labels), len(cur_labels) + if n_ref == 0 or n_cur == 0: + return 0.0 + chi2 = 0.0 + for cat in all_cats: + expected = ref_counts.get(cat, 0) / n_ref * n_cur + observed = cur_counts.get(cat, 0) + if expected > 0: + chi2 += (observed - expected) ** 2 / expected + # Normalize to 0-1 range + return min(1.0, chi2 / (len(all_cats) * n_cur + 1e-9)) + + +class CovariateShiftDetector: + """Detects input feature distribution drift.""" + + def __init__(self, ks_threshold: float = 0.1, psi_threshold: float = 0.2) -> None: + self.ks_threshold = ks_threshold + self.psi_threshold = psi_threshold + + def detect(self, feature_name: str, reference: List[float], + current: List[float]) -> DriftReport: + ks = _ks_statistic(reference, current) + psi = _psi_score(reference, current) + drift_score = max(ks, psi / 5) # normalize PSI + is_drifted = ks > self.ks_threshold or psi > self.psi_threshold + + return DriftReport( + feature_name=feature_name, + drift_score=drift_score, + drift_type="covariate" if is_drifted else "none", + is_drifted=is_drifted, + threshold=self.ks_threshold, + test_statistic=ks, + test_name="KS+PSI", + reference_stats=_descriptive_stats(reference), + current_stats=_descriptive_stats(current), + ) + + +class LabelShiftDetector: + """Detects output label distribution changes.""" + + def __init__(self, threshold: float = 0.15) -> None: + self.threshold = threshold + + def detect(self, feature_name: str, ref_labels: List[Any], + cur_labels: List[Any]) -> DriftReport: + score = _chi_square_drift(ref_labels, cur_labels) + is_drifted = score > self.threshold + return DriftReport( + feature_name=feature_name, + drift_score=score, + drift_type="label" if is_drifted else "none", + is_drifted=is_drifted, + threshold=self.threshold, + test_statistic=score, + test_name="Chi-Square", + ) + + +class DriftDetector: + """ + Comprehensive drift detection combining covariate shift, + label shift, and concept drift detection. + """ + + def __init__(self, ks_threshold: float = 0.1, psi_threshold: float = 0.2, + label_threshold: float = 0.15) -> None: + self._covariate = CovariateShiftDetector(ks_threshold, psi_threshold) + self._label = LabelShiftDetector(label_threshold) + self._reference_data: Dict[str, List[float]] = {} + self._reference_labels: Dict[str, List[Any]] = {} + logger.info("DriftDetector initialized") + + def set_reference(self, feature_data: Dict[str, List[float]], + labels: Optional[List[Any]] = None) -> None: + self._reference_data = {k: list(v) for k, v in feature_data.items()} + if labels: + self._reference_labels["output"] = list(labels) + logger.info("Reference dataset set: %d features, %d samples", + len(feature_data), len(next(iter(feature_data.values()), []))) + + def detect_feature_drift(self, feature_name: str, + current_values: List[float]) -> DriftReport: + reference = self._reference_data.get(feature_name, []) + if not reference: + return DriftReport(feature_name=feature_name, drift_type="no_reference") + return self._covariate.detect(feature_name, reference, current_values) + + def detect_label_drift(self, current_labels: List[Any]) -> DriftReport: + reference = self._reference_labels.get("output", []) + if not reference: + return DriftReport(feature_name="output", drift_type="no_reference") + return self._label.detect("output", reference, current_labels) + + def detect_all(self, current_data: Dict[str, List[float]], + current_labels: Optional[List[Any]] = None) -> DriftSummary: + reports: List[DriftReport] = [] + for feature_name, values in current_data.items(): + report = self.detect_feature_drift(feature_name, values) + reports.append(report) + + if current_labels: + label_report = self.detect_label_drift(current_labels) + reports.append(label_report) + + drifted = [r.feature_name for r in reports if r.is_drifted] + stable = [r.feature_name for r in reports if not r.is_drifted] + overall_score = sum(r.drift_score for r in reports) / max(len(reports), 1) + + if len(drifted) == 0: + recommendation = "No drift detected. Model is stable." + elif len(drifted) <= len(reports) * 0.3: + recommendation = f"Minor drift in {len(drifted)} features. Monitor closely." + else: + recommendation = f"Significant drift in {len(drifted)}/{len(reports)} features. Consider retraining." + + return DriftSummary( + drifted_features=drifted, + stable_features=stable, + overall_drift_score=overall_score, + reports=reports, + recommendation=recommendation, + ) + + def update_reference(self, new_data: Dict[str, List[float]], + blend_ratio: float = 0.2) -> None: + """Incrementally update reference distribution.""" + for feature_name, values in new_data.items(): + existing = self._reference_data.get(feature_name, []) + keep = int(len(existing) * (1 - blend_ratio)) + self._reference_data[feature_name] = existing[-keep:] + values diff --git a/mlops/feature_store.py b/mlops/feature_store.py new file mode 100644 index 0000000..a74cd6d --- /dev/null +++ b/mlops/feature_store.py @@ -0,0 +1,233 @@ +"""Centralized feature store for ML feature engineering and storage.""" +from __future__ import annotations + +import hashlib +import json +import logging +import statistics +import uuid +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any, Callable, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class FeatureDefinition: + feature_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + entity_type: str = "" # e.g. "user", "model", "service" + dtype: str = "float" # "float" | "int" | "str" | "bool" + description: str = "" + transformation: Optional[str] = None # description of transformation applied + tags: List[str] = field(default_factory=list) + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class FeatureValue: + feature_name: str + entity_id: str + value: Any + timestamp: datetime = field(default_factory=datetime.utcnow) + ttl_seconds: Optional[int] = None + + +@dataclass +class FeatureVector: + entity_id: str + features: Dict[str, Any] = field(default_factory=dict) + timestamp: datetime = field(default_factory=datetime.utcnow) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class FeatureStats: + feature_name: str + count: int = 0 + mean: float = 0.0 + std: float = 0.0 + min_val: float = 0.0 + max_val: float = 0.0 + null_rate: float = 0.0 + computed_at: datetime = field(default_factory=datetime.utcnow) + + +class FeatureTransformer: + """Common feature transformations.""" + + @staticmethod + def normalize(values: List[float]) -> List[float]: + min_v, max_v = min(values), max(values) + span = max_v - min_v + if span == 0: + return [0.0] * len(values) + return [(v - min_v) / span for v in values] + + @staticmethod + def standardize(values: List[float]) -> List[float]: + if len(values) < 2: + return [0.0] * len(values) + mean = statistics.mean(values) + std = statistics.stdev(values) + if std == 0: + return [0.0] * len(values) + return [(v - mean) / std for v in values] + + @staticmethod + def log_transform(values: List[float], base: float = 10.0) -> List[float]: + import math + return [math.log(max(v, 1e-9), base) for v in values] + + @staticmethod + def one_hot(value: str, categories: List[str]) -> Dict[str, int]: + return {f"is_{cat}": int(value == cat) for cat in categories} + + @staticmethod + def bucket(value: float, boundaries: List[float]) -> int: + for i, boundary in enumerate(boundaries): + if value < boundary: + return i + return len(boundaries) + + +class OfflineStore: + """Batch feature storage for training.""" + + def __init__(self) -> None: + self._store: Dict[str, List[FeatureValue]] = {} # feature_name -> values + + def write(self, feature_value: FeatureValue) -> None: + self._store.setdefault(feature_value.feature_name, []).append(feature_value) + + def read(self, feature_name: str, entity_id: Optional[str] = None, + start_time: Optional[datetime] = None) -> List[FeatureValue]: + values = self._store.get(feature_name, []) + if entity_id: + values = [v for v in values if v.entity_id == entity_id] + if start_time: + values = [v for v in values if v.timestamp >= start_time] + return values + + def get_latest(self, feature_name: str, entity_id: str) -> Optional[FeatureValue]: + values = self.read(feature_name, entity_id=entity_id) + return max(values, key=lambda v: v.timestamp) if values else None + + def compute_stats(self, feature_name: str) -> Optional[FeatureStats]: + values = [v.value for v in self._store.get(feature_name, []) + if isinstance(v.value, (int, float))] + if not values: + return None + return FeatureStats( + feature_name=feature_name, + count=len(values), + mean=statistics.mean(values), + std=statistics.stdev(values) if len(values) > 1 else 0.0, + min_val=min(values), + max_val=max(values), + null_rate=0.0, + ) + + +class OnlineStore: + """Low-latency feature serving for inference.""" + + def __init__(self) -> None: + self._cache: Dict[Tuple[str, str], FeatureValue] = {} + + def write(self, fv: FeatureValue) -> None: + self._cache[(fv.feature_name, fv.entity_id)] = fv + + def read(self, feature_name: str, entity_id: str) -> Optional[Any]: + fv = self._cache.get((feature_name, entity_id)) + if fv is None: + return None + if fv.ttl_seconds is not None: + age = (datetime.utcnow() - fv.timestamp).total_seconds() + if age > fv.ttl_seconds: + del self._cache[(feature_name, entity_id)] + return None + return fv.value + + def evict_expired(self) -> int: + now = datetime.utcnow() + expired = [k for k, v in self._cache.items() + if v.ttl_seconds and (now - v.timestamp).total_seconds() > v.ttl_seconds] + for k in expired: + del self._cache[k] + return len(expired) + + +class FeatureStore: + """ + Centralized feature store with offline (batch) and online (serving) + storage, feature registration, transformation pipelines, and point-in-time joins. + """ + + def __init__(self) -> None: + self._definitions: Dict[str, FeatureDefinition] = {} + self._offline = OfflineStore() + self._online = OnlineStore() + self._transformers: Dict[str, Callable] = {} + self.transformer = FeatureTransformer() + logger.info("FeatureStore initialized") + + def register_feature(self, name: str, entity_type: str, dtype: str = "float", + description: str = "", tags: Optional[List[str]] = None) -> FeatureDefinition: + fd = FeatureDefinition(name=name, entity_type=entity_type, dtype=dtype, + description=description, tags=tags or []) + self._definitions[name] = fd + logger.debug("Registered feature '%s' (%s)", name, entity_type) + return fd + + def register_transformer(self, feature_name: str, func: Callable) -> None: + self._transformers[feature_name] = func + + def ingest(self, feature_name: str, entity_id: str, value: Any, + ttl_seconds: Optional[int] = None) -> None: + """Write a feature value to both online and offline stores.""" + transformer = self._transformers.get(feature_name) + if transformer and isinstance(value, (int, float)): + value = transformer(value) + fv = FeatureValue(feature_name=feature_name, entity_id=entity_id, + value=value, ttl_seconds=ttl_seconds) + self._offline.write(fv) + self._online.write(fv) + + def get_online_features(self, entity_id: str, feature_names: List[str]) -> FeatureVector: + features = {} + for name in feature_names: + val = self._online.read(name, entity_id) + if val is not None: + features[name] = val + return FeatureVector(entity_id=entity_id, features=features) + + def get_training_dataset(self, entity_ids: List[str], + feature_names: List[str], + start_time: Optional[datetime] = None) -> List[FeatureVector]: + dataset: List[FeatureVector] = [] + for entity_id in entity_ids: + features: Dict[str, Any] = {} + for name in feature_names: + fv = self._offline.get_latest(name, entity_id) + if fv: + features[name] = fv.value + dataset.append(FeatureVector(entity_id=entity_id, features=features)) + return dataset + + def compute_stats(self, feature_name: str) -> Optional[FeatureStats]: + return self._offline.compute_stats(feature_name) + + def list_features(self, entity_type: Optional[str] = None) -> List[FeatureDefinition]: + if entity_type: + return [fd for fd in self._definitions.values() if fd.entity_type == entity_type] + return list(self._definitions.values()) + + @property + def stats(self) -> Dict[str, Any]: + return { + "registered_features": len(self._definitions), + "online_cache_size": len(self._online._cache), + "offline_features": list(self._offline._store.keys()), + } diff --git a/mlops/model_monitor.py b/mlops/model_monitor.py new file mode 100644 index 0000000..8dcf7cd --- /dev/null +++ b/mlops/model_monitor.py @@ -0,0 +1,228 @@ +"""Model performance monitoring and drift detection.""" +from __future__ import annotations + +import logging +import statistics +import uuid +from collections import deque +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class PredictionRecord: + record_id: str = field(default_factory=lambda: str(uuid.uuid4())) + model_name: str = "" + model_version: str = "" + input_features: Dict[str, Any] = field(default_factory=dict) + prediction: Any = None + ground_truth: Optional[Any] = None + latency_ms: float = 0.0 + timestamp: datetime = field(default_factory=datetime.utcnow) + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class PerformanceMetrics: + model_name: str = "" + model_version: str = "" + window_size: int = 0 + accuracy: float = 0.0 + error_rate: float = 0.0 + mean_latency_ms: float = 0.0 + p95_latency_ms: float = 0.0 + p99_latency_ms: float = 0.0 + throughput_rps: float = 0.0 + computed_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class Alert: + alert_id: str = field(default_factory=lambda: str(uuid.uuid4())) + model_name: str = "" + alert_type: str = "" # "performance_degradation" | "drift" | "latency" | "error_rate" + severity: str = "warning" # "info" | "warning" | "critical" + message: str = "" + metric_name: str = "" + current_value: float = 0.0 + threshold: float = 0.0 + created_at: datetime = field(default_factory=datetime.utcnow) + resolved: bool = False + + +@dataclass +class MonitoringConfig: + accuracy_threshold: float = 0.80 + error_rate_threshold: float = 0.05 + latency_p95_threshold_ms: float = 500.0 + drift_threshold: float = 0.1 + window_size: int = 100 + alert_cooldown_seconds: int = 300 + + +def _percentile(sorted_values: List[float], p: float) -> float: + if not sorted_values: + return 0.0 + idx = int(len(sorted_values) * p / 100) + return sorted_values[min(idx, len(sorted_values) - 1)] + + +class MetricsCalculator: + """Computes performance metrics from prediction records.""" + + def compute(self, records: List[PredictionRecord]) -> PerformanceMetrics: + if not records: + return PerformanceMetrics() + + model_name = records[0].model_name + model_version = records[0].model_version + + labeled = [r for r in records if r.ground_truth is not None] + correct = sum(1 for r in labeled if r.prediction == r.ground_truth) + accuracy = correct / len(labeled) if labeled else 0.0 + error_rate = 1 - accuracy if labeled else 0.0 + + latencies = sorted(r.latency_ms for r in records) + mean_lat = statistics.mean(latencies) if latencies else 0.0 + p95 = _percentile(latencies, 95) + p99 = _percentile(latencies, 99) + + # Throughput: records per second over the window + if len(records) >= 2: + duration = (records[-1].timestamp - records[0].timestamp).total_seconds() + throughput = len(records) / max(duration, 0.001) + else: + throughput = 0.0 + + return PerformanceMetrics( + model_name=model_name, + model_version=model_version, + window_size=len(records), + accuracy=accuracy, + error_rate=error_rate, + mean_latency_ms=mean_lat, + p95_latency_ms=p95, + p99_latency_ms=p99, + throughput_rps=throughput, + ) + + +class ModelMonitor: + """ + Real-time model performance monitoring with sliding window + metrics, threshold alerting, and performance history tracking. + """ + + def __init__(self, config: Optional[MonitoringConfig] = None) -> None: + self._config = config or MonitoringConfig() + self._records: Dict[str, deque] = {} # model_name -> deque[PredictionRecord] + self._alerts: List[Alert] = [] + self._metrics_history: Dict[str, List[PerformanceMetrics]] = {} + self._calculator = MetricsCalculator() + self._last_alert_time: Dict[str, datetime] = {} + logger.info("ModelMonitor initialized") + + def log_prediction(self, model_name: str, model_version: str, + input_features: Dict[str, Any], + prediction: Any, + ground_truth: Optional[Any] = None, + latency_ms: float = 0.0) -> PredictionRecord: + record = PredictionRecord( + model_name=model_name, + model_version=model_version, + input_features=input_features, + prediction=prediction, + ground_truth=ground_truth, + latency_ms=latency_ms, + ) + if model_name not in self._records: + self._records[model_name] = deque(maxlen=self._config.window_size * 10) + self._records[model_name].append(record) + return record + + def compute_metrics(self, model_name: str) -> Optional[PerformanceMetrics]: + records = list(self._records.get(model_name, [])) + if not records: + return None + window = records[-self._config.window_size:] + metrics = self._calculator.compute(window) + self._metrics_history.setdefault(model_name, []).append(metrics) + self._check_alerts(metrics) + return metrics + + def _check_alerts(self, metrics: PerformanceMetrics) -> None: + cfg = self._config + checks = [ + ("accuracy", metrics.accuracy < cfg.accuracy_threshold, + f"Accuracy {metrics.accuracy:.1%} below threshold {cfg.accuracy_threshold:.1%}", + metrics.accuracy, cfg.accuracy_threshold, "warning"), + ("error_rate", metrics.error_rate > cfg.error_rate_threshold, + f"Error rate {metrics.error_rate:.1%} above threshold {cfg.error_rate_threshold:.1%}", + metrics.error_rate, cfg.error_rate_threshold, "warning"), + ("latency_p95", metrics.p95_latency_ms > cfg.latency_p95_threshold_ms, + f"P95 latency {metrics.p95_latency_ms:.0f}ms above threshold {cfg.latency_p95_threshold_ms:.0f}ms", + metrics.p95_latency_ms, cfg.latency_p95_threshold_ms, "critical"), + ] + for metric_name, triggered, message, current, threshold, severity in checks: + if triggered: + self._maybe_raise_alert( + metrics.model_name, metric_name, severity, message, current, threshold + ) + + def _maybe_raise_alert(self, model_name: str, alert_type: str, severity: str, + message: str, current: float, threshold: float) -> None: + import datetime as dt + key = f"{model_name}:{alert_type}" + last = self._last_alert_time.get(key) + if last: + elapsed = (datetime.utcnow() - last).total_seconds() + if elapsed < self._config.alert_cooldown_seconds: + return + alert = Alert(model_name=model_name, alert_type=alert_type, severity=severity, + message=message, metric_name=alert_type, + current_value=current, threshold=threshold) + self._alerts.append(alert) + self._last_alert_time[key] = datetime.utcnow() + logger.warning("[ALERT] %s - %s: %s", severity.upper(), model_name, message) + + def get_alerts(self, model_name: Optional[str] = None, + severity: Optional[str] = None, + unresolved_only: bool = True) -> List[Alert]: + alerts = self._alerts + if model_name: + alerts = [a for a in alerts if a.model_name == model_name] + if severity: + alerts = [a for a in alerts if a.severity == severity] + if unresolved_only: + alerts = [a for a in alerts if not a.resolved] + return alerts + + def resolve_alert(self, alert_id: str) -> bool: + for alert in self._alerts: + if alert.alert_id == alert_id: + alert.resolved = True + return True + return False + + def metrics_history(self, model_name: str, last_n: int = 10) -> List[PerformanceMetrics]: + history = self._metrics_history.get(model_name, []) + return history[-last_n:] + + def dashboard(self) -> Dict[str, Any]: + result: Dict[str, Any] = {} + for model_name in self._records: + metrics = self.compute_metrics(model_name) + result[model_name] = { + "metrics": { + "accuracy": metrics.accuracy if metrics else None, + "error_rate": metrics.error_rate if metrics else None, + "p95_latency_ms": metrics.p95_latency_ms if metrics else None, + "throughput_rps": metrics.throughput_rps if metrics else None, + }, + "prediction_count": len(self._records[model_name]), + "active_alerts": len(self.get_alerts(model_name)), + } + return result diff --git a/mlops/model_registry.py b/mlops/model_registry.py new file mode 100644 index 0000000..8ad25d3 --- /dev/null +++ b/mlops/model_registry.py @@ -0,0 +1,205 @@ +"""Model versioning and registry.""" +from __future__ import annotations + +import hashlib +import json +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class ModelVersion: + version_id: str = field(default_factory=lambda: str(uuid.uuid4())) + version: str = "1.0.0" + model_name: str = "" + description: str = "" + metrics: Dict[str, float] = field(default_factory=dict) + parameters: Dict[str, Any] = field(default_factory=dict) + tags: Dict[str, str] = field(default_factory=dict) + stage: str = "development" # development | staging | production | archived + artifact_path: Optional[str] = None + checksum: Optional[str] = None + created_by: str = "system" + created_at: datetime = field(default_factory=datetime.utcnow) + updated_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class RegisteredModel: + model_id: str = field(default_factory=lambda: str(uuid.uuid4())) + name: str = "" + description: str = "" + versions: List[ModelVersion] = field(default_factory=list) + latest_version: Optional[str] = None + production_version: Optional[str] = None + created_at: datetime = field(default_factory=datetime.utcnow) + + +@dataclass +class ModelSearchResult: + models: List[RegisteredModel] = field(default_factory=list) + total: int = 0 + query: Dict[str, Any] = field(default_factory=dict) + + +def _compute_checksum(data: Dict[str, Any]) -> str: + serialized = json.dumps(data, sort_keys=True, default=str) + return hashlib.sha256(serialized.encode()).hexdigest()[:16] + + +def _bump_version(current: str, part: str = "patch") -> str: + parts = [int(x) for x in current.split(".")] + while len(parts) < 3: + parts.append(0) + if part == "major": + return f"{parts[0]+1}.0.0" + if part == "minor": + return f"{parts[0]}.{parts[1]+1}.0" + return f"{parts[0]}.{parts[1]}.{parts[2]+1}" + + +class ModelRegistry: + """ + Central model registry for versioning, staging, and lifecycle management. + """ + + def __init__(self) -> None: + self._models: Dict[str, RegisteredModel] = {} # name -> model + self._version_index: Dict[str, ModelVersion] = {} # version_id -> version + logger.info("ModelRegistry initialized") + + def register_model(self, name: str, description: str = "") -> RegisteredModel: + if name in self._models: + logger.debug("Model '%s' already registered", name) + return self._models[name] + model = RegisteredModel(name=name, description=description) + self._models[name] = model + logger.info("Registered model: %s", name) + return model + + def log_model(self, model_name: str, metrics: Dict[str, float], + parameters: Optional[Dict[str, Any]] = None, + tags: Optional[Dict[str, str]] = None, + description: str = "", + created_by: str = "system") -> ModelVersion: + if model_name not in self._models: + self.register_model(model_name) + + registered = self._models[model_name] + existing_versions = registered.versions + + # Auto-increment version + if existing_versions: + latest_v = existing_versions[-1].version + new_v = _bump_version(latest_v) + else: + new_v = "1.0.0" + + params = parameters or {} + version = ModelVersion( + version=new_v, + model_name=model_name, + description=description, + metrics=metrics, + parameters=params, + tags=tags or {}, + created_by=created_by, + checksum=_compute_checksum({"metrics": metrics, "parameters": params}), + ) + registered.versions.append(version) + registered.latest_version = new_v + self._version_index[version.version_id] = version + + logger.info("Logged model %s v%s (checksum=%s)", model_name, new_v, version.checksum) + return version + + def transition_stage(self, model_name: str, version: str, new_stage: str) -> bool: + registered = self._models.get(model_name) + if not registered: + return False + mv = next((v for v in registered.versions if v.version == version), None) + if not mv: + return False + old_stage = mv.stage + mv.stage = new_stage + mv.updated_at = datetime.utcnow() + if new_stage == "production": + registered.production_version = version + logger.info("Model %s v%s: %s -> %s", model_name, version, old_stage, new_stage) + return True + + def get_model(self, model_name: str) -> Optional[RegisteredModel]: + return self._models.get(model_name) + + def get_version(self, model_name: str, version: str) -> Optional[ModelVersion]: + registered = self._models.get(model_name) + if not registered: + return None + return next((v for v in registered.versions if v.version == version), None) + + def get_production_version(self, model_name: str) -> Optional[ModelVersion]: + registered = self._models.get(model_name) + if not registered or not registered.production_version: + return None + return self.get_version(model_name, registered.production_version) + + def list_models(self) -> List[str]: + return list(self._models.keys()) + + def search(self, tags: Optional[Dict[str, str]] = None, + min_metric: Optional[Dict[str, float]] = None, + stage: Optional[str] = None) -> ModelSearchResult: + results: List[RegisteredModel] = [] + for model in self._models.values(): + for mv in model.versions: + if stage and mv.stage != stage: + continue + if tags and not all(mv.tags.get(k) == v for k, v in tags.items()): + continue + if min_metric and not all(mv.metrics.get(k, 0) >= v for k, v in min_metric.items()): + continue + results.append(model) + break + return ModelSearchResult(models=results, total=len(results), query={ + "tags": tags, "min_metric": min_metric, "stage": stage}) + + def compare_versions(self, model_name: str, v1: str, v2: str) -> Dict[str, Any]: + mv1 = self.get_version(model_name, v1) + mv2 = self.get_version(model_name, v2) + if not mv1 or not mv2: + return {"error": "version not found"} + metric_diff = { + k: mv2.metrics.get(k, 0) - mv1.metrics.get(k, 0) + for k in set(mv1.metrics) | set(mv2.metrics) + } + return {"v1": v1, "v2": v2, "metric_diff": metric_diff, + "param_changes": {k: v for k, v in mv2.parameters.items() + if mv1.parameters.get(k) != v}} + + def delete_version(self, model_name: str, version: str) -> bool: + registered = self._models.get(model_name) + if not registered: + return False + before = len(registered.versions) + registered.versions = [v for v in registered.versions if v.version != version] + return len(registered.versions) < before + + @property + def stats(self) -> Dict[str, Any]: + return { + "total_models": len(self._models), + "total_versions": len(self._version_index), + "models": { + name: { + "versions": len(m.versions), + "production": m.production_version, + "latest": m.latest_version, + } + for name, m in self._models.items() + }, + } diff --git a/mlops/training_pipeline.py b/mlops/training_pipeline.py new file mode 100644 index 0000000..2ed2457 --- /dev/null +++ b/mlops/training_pipeline.py @@ -0,0 +1,217 @@ +"""Automated ML training pipeline orchestration.""" +from __future__ import annotations + +import logging +import time +import uuid +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Callable, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class PipelineStep: + name: str + func: Callable + inputs: List[str] = field(default_factory=list) + outputs: List[str] = field(default_factory=list) + retry_count: int = 1 + timeout_seconds: int = 300 + metadata: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class StepResult: + step_name: str + status: str = "pending" # pending | running | success | failed | skipped + outputs: Dict[str, Any] = field(default_factory=dict) + error: Optional[str] = None + elapsed_seconds: float = 0.0 + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + + +@dataclass +class PipelineRun: + run_id: str = field(default_factory=lambda: str(uuid.uuid4())) + pipeline_name: str = "" + status: str = "pending" + step_results: List[StepResult] = field(default_factory=list) + parameters: Dict[str, Any] = field(default_factory=dict) + artifacts: Dict[str, Any] = field(default_factory=dict) + metrics: Dict[str, float] = field(default_factory=dict) + started_at: Optional[datetime] = None + finished_at: Optional[datetime] = None + + +class StepExecutor: + """Executes a single pipeline step with retry logic.""" + + def execute(self, step: PipelineStep, context: Dict[str, Any]) -> StepResult: + result = StepResult(step_name=step.name, started_at=datetime.utcnow()) + result.status = "running" + start = time.perf_counter() + + for attempt in range(step.retry_count): + try: + inputs = {k: context[k] for k in step.inputs if k in context} + output = step.func(**inputs) + if output is None: + output = {} + if not isinstance(output, dict): + output = {"result": output} + result.outputs = output + result.status = "success" + break + except Exception as exc: + logger.warning("Step '%s' attempt %d failed: %s", step.name, attempt + 1, exc) + result.error = str(exc) + result.status = "failed" + if attempt < step.retry_count - 1: + time.sleep(0.1 * (attempt + 1)) + + result.elapsed_seconds = time.perf_counter() - start + result.finished_at = datetime.utcnow() + return result + + +class TrainingPipeline: + """ + Automated ML training pipeline with step registration, + dependency resolution, execution, and run tracking. + """ + + def __init__(self, name: str) -> None: + self.name = name + self._steps: List[PipelineStep] = [] + self._runs: List[PipelineRun] = [] + self._executor = StepExecutor() + logger.info("TrainingPipeline '%s' initialized", name) + + def add_step(self, name: str, func: Callable, + inputs: Optional[List[str]] = None, + outputs: Optional[List[str]] = None, + retry_count: int = 1) -> "TrainingPipeline": + step = PipelineStep(name=name, func=func, + inputs=inputs or [], outputs=outputs or [], + retry_count=retry_count) + self._steps.append(step) + return self + + def _topological_sort(self) -> List[PipelineStep]: + """Order steps by data dependency (outputs -> inputs).""" + available: set = set() + ordered: List[PipelineStep] = [] + remaining = list(self._steps) + + max_iterations = len(remaining) ** 2 + 1 + iteration = 0 + while remaining: + iteration += 1 + if iteration > max_iterations: + # Break cycle: just append rest + ordered.extend(remaining) + break + for step in list(remaining): + if all(inp in available for inp in step.inputs): + ordered.append(step) + available.update(step.outputs) + remaining.remove(step) + return ordered + + def run(self, parameters: Optional[Dict[str, Any]] = None) -> PipelineRun: + run = PipelineRun(pipeline_name=self.name, + parameters=parameters or {}, + started_at=datetime.utcnow()) + run.status = "running" + context: Dict[str, Any] = dict(parameters or {}) + + ordered_steps = self._topological_sort() + logger.info("Starting pipeline '%s' (%d steps)", self.name, len(ordered_steps)) + + for step in ordered_steps: + logger.debug("Executing step '%s'", step.name) + step_result = self._executor.execute(step, context) + run.step_results.append(step_result) + + if step_result.status == "success": + context.update(step_result.outputs) + # Collect metrics from step outputs + for k, v in step_result.outputs.items(): + if isinstance(v, (int, float)) and k.startswith("metric_"): + run.metrics[k[7:]] = v + else: + run.status = "failed" + logger.error("Pipeline '%s' failed at step '%s': %s", + self.name, step.name, step_result.error) + break + else: + run.status = "success" + + run.finished_at = datetime.utcnow() + run.artifacts = {k: v for k, v in context.items() + if k not in (parameters or {})} + self._runs.append(run) + + elapsed = (run.finished_at - run.started_at).total_seconds() + logger.info("Pipeline '%s' %s in %.2fs", self.name, run.status, elapsed) + return run + + def get_run(self, run_id: str) -> Optional[PipelineRun]: + return next((r for r in self._runs if r.run_id == run_id), None) + + def list_runs(self, status: Optional[str] = None) -> List[PipelineRun]: + if status: + return [r for r in self._runs if r.status == status] + return list(self._runs) + + def last_run(self) -> Optional[PipelineRun]: + return self._runs[-1] if self._runs else None + + @property + def step_names(self) -> List[str]: + return [s.name for s in self._steps] + + @property + def run_stats(self) -> Dict[str, Any]: + if not self._runs: + return {"total": 0} + statuses = defaultdict(int) + for r in self._runs: + statuses[r.status] += 1 + return {"total": len(self._runs), "by_status": dict(statuses)} + + +# ----- Convenience factory functions ----- + +def make_data_pipeline(name: str = "data_prep") -> TrainingPipeline: + """Build a standard data preparation pipeline.""" + import statistics + + def load_data(data_path: str = "data.csv") -> Dict[str, Any]: + logger.info("Loading data from %s", data_path) + return {"raw_data": list(range(100)), "data_path": data_path} + + def validate_data(raw_data: list) -> Dict[str, Any]: + valid = [x for x in raw_data if x is not None] + return {"validated_data": valid, "metric_validation_rate": len(valid) / len(raw_data)} + + def feature_engineer(validated_data: list) -> Dict[str, Any]: + features = [float(x) / max(validated_data) for x in validated_data] + return {"features": features, "metric_feature_count": len(features)} + + def split_data(features: list) -> Dict[str, Any]: + n = len(features) + split = int(n * 0.8) + return {"train_features": features[:split], "test_features": features[split:]} + + pipeline = TrainingPipeline(name) + pipeline.add_step("load_data", load_data, outputs=["raw_data"]) + pipeline.add_step("validate_data", validate_data, inputs=["raw_data"], outputs=["validated_data"]) + pipeline.add_step("feature_engineer", feature_engineer, inputs=["validated_data"], outputs=["features"]) + pipeline.add_step("split_data", split_data, inputs=["features"], + outputs=["train_features", "test_features"]) + return pipeline diff --git a/neural_networks/__init__.py b/neural_networks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/neural_networks/__pycache__/__init__.cpython-312.pyc b/neural_networks/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de4434f497a9e56f9d598a02cfb1b26c3b42ffe9 GIT binary patch literal 152 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!GS|<@&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLcu_{_Y_lK6PNg34bUHo5sJr8%i~MXW$` T7=gGL#Q4a}$jDg43}gWS18^j~ literal 0 HcmV?d00001 diff --git a/neural_networks/cnn/__init__.py b/neural_networks/cnn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/neural_networks/cnn/__pycache__/__init__.cpython-312.pyc b/neural_networks/cnn/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df59ea5235917be5b101156e59cbc329e96c5bb5 GIT binary patch literal 156 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!vewVY&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLcaygdE*_{_Y_lK6PNg34bUHo5sJr8%i~ XMXW%x7=gGL#Q4a}$jDg43}gWSv3n*i literal 0 HcmV?d00001 diff --git a/neural_networks/cnn/__pycache__/log_analyzer.cpython-312.pyc b/neural_networks/cnn/__pycache__/log_analyzer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..84c3e3c5bacba9de62346e5fe996dd2ff1dbca7b GIT binary patch literal 12070 zcmcIqYj7Lab>0OQF9Ia^e(C{=lqg6fC0dmAut-T3^{`}7wk10WEfs;VO9~VSFuR~6 z!hoaNY7IK-h>YVBiq#r!l7<>PQ+DFEYU3a3IGxE%{}2Xc!fR#XnPw)@KSosR*uVOn zyI26E;Mh&3m&Cbu?`!Yl+;hHj?*3wtB{Heh4*O@WdT`+$AKG2j>} z9Vi_s8z^J>=74jge4w1?44h;dUZ)QrA4`>QV+<4qszi%u6>az{5$*VM+^`H(4{>7Y z-|+)AlGVT+=ESlqoalVpptX2gi@1T>Y-u@4D;_GXV=XIDR;8B>)U&*5U(@UMG+vyFxH z-rr~o^vT}eXhrw*oW;Fvi)s?3vtvW5;13RlRNU(wSRWz6T3 zXm(22D`QqlI5eu7MQJn;8dps|*{huMX+zeiQH%Jc=a87@ByNBQuNs~Ohe}4#um`y; zWz3}nmNJ27&8T6aC*nNG{EjupXBN?hvJxsAw_`>n*xjeJ%~g#`SXN82TkY|SG@l{q zl4=Tg&q@K=jy70|K*$?bt%`I(!t#vYe)+9Ho(x#cYvL-P48vUg~QMuO6q>~i=8L37>*#zbO$ z+j>#_W!r}B5hT+)LT-D^q6yn$X4_-IlG(65C)oMG3XkR})oYH}eY`?3wuisLN! zf&2NTO~{?*qWo}HO0v={(dA#vAo4L|IM;rdCb=T$b^Ew8r5tw}bz?%z6cwU|3!K7V zwMR`cBhLkSfeX{SSr83jo1TvH`UZ){2#$d+A!Ft+(=$}BkGm%L3`3mHa0YoXLF-*M zh=w3E#+2czL*QbTm=MMYv>=+UU~YY!K05s!Mc-?Vpb@=0W-aK!@<;Vx3p@2O443Pv zd@Ncq-|Two{Huf)C&0&(3E5gcE5KES=v?FSanxodJEm{(_M;$|!sD)PK zB-We9FD6{%-1dGZqvSFa46*L_eS2S44S-JznF#E~f%EyL$B{V3@x*4_nK~>l|A*NP z>{2aqf6Y(xJa@H0;Kuo%7%%cS4fm+`ekdi2UlIL7{_stMYUtdm@;-%@(&durjD2p~ zGb9D2OQUjcq_fB|=*%k0&OLz;6pzx|nX6And8r=cx7_b9b00MHB>CBje`)x~hJR|h z(VS}N`D}@R%}X_F`GjRHKlJ*C8-8o=tmys!oVC0cZ`7tdHaZHWNsC!HUHKp~{;v~d z;_46@%7>}-y3^tdUG3Xqr#rte5vf2?cdP^Sku24=pL=4Ty9DYhbWxJkQqReQ&mDZ` ziBsS39N+(xYVwDr5k)nQ_=Bo3AO%&M=h(q-eC@S-RX6H)X171R{>pH0<1hqhl*T;KSn?YeDl-Qi!<&(}XQ zRWBzr>br%$gHVPRNe3;H~zTutzsY%Lcd#(pT(1= zbj^TuV{%a5i5wKH(5xP+w?dUZMtMY9@lz}ySGWfTr>RF%n^dw)ZIU=b;FqYM0=3D* z4k5e1aD`BvO*BG@nqVOah|RDPOo%PxX5hGTumGU=0Rqr|;Ac6qudAca1@w6#6c{7O z?8>9`i~jJrFM{e`;5zRRU~s1|J1RCb5@LAfmk64WPL6UkaVa#FJ zPr07Pxtlx|SGAvqH6aB&ia#Q$_TbovXV4#jg`ueB9%cl1d{Qthhy0?VT09JYDsH2E z3SG()kwFlJy2Y{3P6N=TAC`&76KPnv7M*(0ZfeyGg2T|XAuh~Y>vQqW-SoUVv#vER z{FQ@k)G|~;+`(1=3I{6?&doQE4x-#}Zr-S!4d;<38>0M4D5aPooWmzyy@v=8g~*e> z(=h^&445cp#6B4DSMVlDOtAoz7a@U|FXP;S_jZ>6WACX$U; z&%lb%v$&#rE}(sNu1YSE-}P}F)mnjSP5qI!Q?eJb4c?ZON5TcUz8-ED&B$H`_5^gy zATH^Y9K;3kkMYGMPZrWv0Mc>4$`9;_J z7vA4J!_PGTL`b#uf2Pn0ez~fdpDtAf_Upqs%9X94B&~N=o2q3d3jO_bI9{B6)){!D4p_Q=_US z%QUK8Q@Dgz6_rO-lCtats*N&%bm4`w04RF^ZRHEpwKb=nX>d$_gM2fqij81_NOdvB zcmlPR{{evwbJtpS%|2nDtL}MExxM3O(Ru5Wcf0rgTu65xjN3C6^(3J?A)%}4rhBJ) zlg&Tg^wy?yRY$xps(-81g_2H@H5_~BnSZ%AJJ!J1Ev ze0^7D)#?XEuA${k=XGZqo9dr$>%UXq56D>G6+gnFPd0Ch_boV@Q;z23`VSl%7j5YJ zK?!GbYBSqBbLt;X-#mTCy8E;H7BpiJ^L=4`-)i$RY(b02w8l%&v#(fRv5`4i%8U}&HYMbdwu}YO2SXQwau0fuuYjmVxQmyG zD3V)DM0F$ax*;c`n3$aaJR>1A0Yy!51Olz7m}TYzEDI)(=zPh4PUJ-pCK+9>%Y}le z{h^b^POyJ)yolWWGwOtNT~<2P9hz-U2#Ke!Tap`Ry58D+r@kXqcSw`KFW1!b_lOs> z=5zNxZSDjS?$J~sJD3Qfa=DF2UgR(oNQh4pJ|-C!uL%3vdnqe%nBY=N&PobI3VD>X zX}HK`NfT(7(HzD$WvD_hB##sX~!) z%vm*y5$t#m@`1ZYD#Q(Y6cI6FnAt({ckw8Hlk$ZAP*4IVF&#nnD$CX_*B6ixaWa=N zSV;3LpV4Z(hN6eseO|#XUC5Q+?$VshZtDvB7v`Bq4Ef-x_tWbbd%Q%cYeWcpXf`h$ z5c#Z5b|9h9nYhAzBpiJp2&VN*4vVRFsnTJ3?7=F#saI=74Se|J0<8i9zeJdzz*4qq z4rbWM2H3;`Y-G(=wr6c+hh{6=aHD49=ua#Y&9JJi;}*A5wPQKHCg5 z*3Lg*IpOk@>}&L@g`ybxOYA1Hw3&4T+0Fd@Mp!%Cuvzaj9}nmBSw1hC)0wy>Fch<_ z@|q0Rw-7bGV|Y8e(fO^qx)<7sD`sC6wXF&^S%gLJ$&CwJF7!v5ayEbl`2)C>)i2k8 zW$8d^GL(+~DTv(Mj%pzCI#po4DngCgWzY91#SZHYq###LAyOUa+M{-McG~e{(9mjD3eEmF3aa-v% z=Y(^?+LW?3B@WM9TQl{m;KV6np3{kA=ee6P8MX%*D}%D@3qVrtyyjcv0T~z$S5YL> z;FiG!?fKJKI00K}BuIfAA)8~;azwLei5khZ!^(@pvM+n%S7+%Qd+FC-`Kzm8@-MFl z07+nLovC@4{!tTyMz((0fVwiZVYl8g$u7B)NI4M#3z4=1x+79_5vP)~+~rkVZR?db z*G3o^gFWjzy9eF4s3g{t^W4dJlIae{z$6Z{jH!8K-$Vu+^itMBA!B85B|9?Rvs_~7 zXn@d_^0y!w7>KZ>K-I}CvGCEh@lBuXp7{A+? zsdFV;CSRN@U!SRJNNh`1UEeiV)jH>B&6HP@kA0zhL#lj3@>05dD}3#>jnkp2&_Zoz zs0Mfp>MhZP_aT1 z(dK4-xvGt14KWAIP8N|Ag<#R7O4(WJT5|{k-7=40P6sV7yMlHKp^XAL)BUdRmiwnc z5;I$OedydhBTNd@rBkH|@lJWm2hQDdFv}1zi2cL*y|}(bdp!_Wv-N%kxqs?<=PyFP z(2aMPv}wjkn)J6{{lK0$<;)X0&HDuy%iY7g$cCj@d= zoASmK6k*Ps(iaKN#B5SmdvM&nl2y58CTT2im=$$*6?6YZt)hiSr}q}KMNP#`iN4-@%;=)gtNc@{acj32v7DwRr+ei|1m|sCiJXKfB}dSw=}pK!X?dT_rUp7y_&8= zyW@R#>l&snOkGF}CWmIi>ALRtk!;S~nnz|2-8z!4I~YGg{wQm*D!DFEnsRJdaCD>` z9W&w`$2Ph-?OwEU)pXa|(mwP2LPu|^qc`2LFWs{LK4-N#Q;xOq{VC_#Nn!Hb3Pm)# z|K?-q>K%lAs;`}xIC0}>X7!rHw~|$fp~Pc%YwM@|Q~spz8q`WhsG%<^@v$;qM4)`uF;n%A1{-m z+i@LGrbx2 z{!z^>z=Q)k!2*3J2?LQpzRXTzNf5e~y;fm|ksUPncC&f-w2{7HFp`6r91j}c*Uw&c zwi@nmNBjY=?13s*B6LHMQ_;n05ZqG|AeXsc>ki;u?`^v^{!yO~%JnlPsmezHpxS821-=eXR7lKXN%YyCYm0}7%Qmp2pn zCI~Ee`be^%AI-mV=HZ&{XMBXE^!g(`Ic|;`qlg}o2I2z2Ge6p z)=E?T;wG-^kwv4`v?jBuLyI3h`hcD_#}4)5ZjxU|L|=mrj?s6*a3YbIkeTWcM!ey3 zss&s{Un9ulRMVdONLW&2fvVC%sOIqaDAgfJC0mFPyUJui<8~ty2;gH}`p#IU1ESi7 zq%gRY?pQjg+C?JAh@2oo>P$0_$&;eFtk~3-FURNBJ=!O%z4Cvc9z-Lj(01VS5}yBv zJM~L$-N&5mW6u6DSN1Vi@hfiQuei=M*ZE7XH&OONuGZBQWw`=7foB7_~aUh?b=J)bz?sFh)5B?Wil~C3I literal 0 HcmV?d00001 diff --git a/neural_networks/cnn/__pycache__/metric_classifier.cpython-312.pyc b/neural_networks/cnn/__pycache__/metric_classifier.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..196b1c62e5de7c3b3cab22efe7759c3f29aad134 GIT binary patch literal 12368 zcmbt4Yj7LKd3T2caCi{_3BE}Y6eUrRNl=uimo4i_y)28WB3X`+C~x~%TW6$UVD+^xm%18%K0&g3{T@m3_4~Y@ zkS`dpk4nD4fPHsgU#H|79`$=d0&njZJfTrh=sf9>;LS5KA_hI)LFWS;s)Lpdo;DEl??+Emp$kZ&Hf@sn2@527|*wmpB>-2x8aSpm?fF z^bBlLKtLE3J$`pU2w{cPuC}j)Z zl{}Pc%9i4nN)ogsVyCzmN|A6S2}&zUir{sCJ4)T8`)+b#GrWn*5VRn$BWOji96=j` z6##D1GAjxGehC-BF4hB-wI0dshm9R>Si17gOdTSXb^#ct(iYoQ-IOlgbA9vl=ESjg zTsK^|Z9i%HQ4>}er;Ks_dSE&*!~byj=J1_kbI%`2K7VLIMOzNj3lyz8Oq0!qEUys- zkPrdU3}_u{u?ild4M8=68U$+);Ko&UC=OR_%mQpHtpk9&+I-<)T32!5g|xwP;l;G6 z@P3II zGE_ZEg`kI$B2-pbN9j;Tc~DQ-71@?=)I>m~oa%+?RTuOryJxa}0-$)JOvteGyCLLv zk_yJcMpY3uq6)(tqv{Z@IY}XFHW#6@n6uBQreG}PcC{wk@{QUMl8>ZVO{}(`;n|B^ z2nk72L<1wq@K7(6Rae>GQEfr*+J2f>U*sa1rR{i47_ddTh&IB7Vct<)Zq@cgD4yY| z1}dtD9H>Xg849r009LMOM21LCK`#AL)L7D22P2}Ew#$sj6fs6jWJDDK2B=bzrvR^q zF&e@!_w4+mW@urF&=E7Q55xXA^>v{7*+M!*%t)o|>hw}qtX^h-@-ioYYEf&1idc~m z@Chv;hj0DN6m!|iQqf9iQ3-vjuGqXP(hDf_b@+a?DryTMFOd|fir5Oj0==uDcQv+( zREv$g@hP;g&W=x61DrkCk(T_9OEi#bTqM7Ey*6-!ov zjxrH?g2nkqmk>FC*lWmI^$d`)8}M*jEGuUbIy&* zNJ0j;+wJ4yM3#Q0+d3cwgmWX}ws4zpE+l%qA$P_SlAK%pL9fRzZFA-tIia_-9Ki2> zL;Y@?`oPqCvv+3a%<0(a$&C}?xF@kC#7M+_Z zXpkrkb7XLiJQW5>l9^ov%#Oy{-DDo%GWIDGREcOTiQNbQAtfPZm=~Fr*aI(LXhL1s zwE;k8LS|>eGijYXKGl$*?^{}vW@lz1mDTjG-}=R>-x!?MZ4dqzQqi=E37DT&F#!qT zz90cvGC!I2$n;42|q5SOBmWVQo8vfZ}c-Z|-sub3K4bSFYLHs7!ANZJlO z7B@iKqIrD@a*3M&Ss|@t`iwG>-;Yu_HVGw1hEg?~X1e2aymy*Q9C_<}vS!m`aSMKN zGk^!URvA`X*eyb|0a-7hB`<}1UP<8&HJE6^8CeC^834~YnMDIbRukI{ z47kx@Srzt;$STiC$x$a-0HI6=6#K;vFL~2NX3j~nDgYhSybuTu6Y~bpc?P-(Tq|ol zUN_cr@Kl<#Av(YvRZZJ<~no!7Xz8bjrNt9=BzlGsN~@ zGK}w>H&kEQKCwN%F=bdj-kV<4KGXUa!sRt{`i7*wA--`|zx;O9`0n{y$iMmO_jb={ zzI!nC^7y{Ar8eH4w6u;NnAe#u3zIu94PII{zH8o8d&NEBj(;U(vXAeb*PCWdy+7W4 zhq<%&Zw^B?rSAn?Msv#SoLM((cBYKZ+qR@}!}y-G(LA|w;vi(ST&(-G(X_dG(ldE7 z7LKu)^-hYhS7YqF)t0gyc(3ZtvAaF*w#?cNq^t*Gn)#}lRL!Q_n%SC7sj5wPRwS#o z#kh1;eSCkiYQ+TiaaB!>n>SfN2JDPmr}kd@#`xZJW%c+${M6pbBNH#Yc7A^M{`al# zAOF{e)b3-ysDJ5>C%!S!I=v;)KXc@E%}rNw)#eWxw%n_KDOLd>y?W!EWz!8dK_|Mu zQ#E-6kiPU*!#&HUm^x|M^x^j2N$)k+b@#M;=A~5g`rCZ6`PmO@o_oJ7W|*(3kGI8N zo$+XCt{mjNkT9tA00;N)Ke*92JH5{iO ze`MJtp;`WOt!*F8{(O1eK8?l;@419r2#H?o1(#C+37_IpG}yDQPPWc$be-_M<{o;j`D#$;m+B)V-(SSV5A#P31UF-yCq*( z0B_bS1VUoa$4d@IbU> zri_ADsHQ0RjuL`-gAmsdfb_#lUx&;^q)ESLU@Nt5yNOfiN^sH zv9)Y+OMV{@!CCi6Fz63Ame9Ab@)&kL2Vk6m45$9H7u+hd!xa zc1=obeEZdzV{@I)C*i;4`MY$oWoN2>*Z)9(rG@DeyLbIaEy;eXk> zd$#o{OV91l+@^iWP5V-oeGe&Yj{%`x`mypNVT9plbl1-HY>^JYAPCU`gFwyKbwGb8 zP|^^TmnOa-=#r;!Ry)vCo@Sw3cqL0;bPIc_6R;EzETM*gCZ#E&Di@r~^=I>el?aOj?bfM-cm9k7=kaTc3YsS0+2R{LUE5a-9yi zPTd!7E8-S=zb_QRV-7g)SjpQZayTFr=>`_QLY#5buQ}a@K^wZ9)X6yH5VR>!)()Dm zIha>#0f|iaFI5*SipJ%Hag|ZeD66Hseyi_E9J$sO_gr6zLK!5`rU!XszNKlA~P~v_B0R%*JqWh*qn~@d4 zryLo<;{~-ZBn(U9UaTcDw--d$p+yPtaXI_i0IxX*^qU#mS_QNYTbGdE1#Kk?jCTkj&2tr3 z3=@WXwk_|izjOB8t+U+z`R=Xn9ZPlZ0>8{!_mEQQI$}H1)%A1L?aAu)8TyCZO)gd4 z9osu^Tah@PvUSYadXl!Dlx<@SJVV>$k*hCHy__^Q&ly)FjVlr_-8Vk7pamog9M$T0 zr{YFM%D(m?#p;|%V^eI;X6t)WH9fI?pR}$@oc->yPgQhXce<$=6hwX7 zb@Q}21(fy8w(q!Kw*&M*U3Y9hd3?OGGuC_M@WkQxo)3)67j%I5k)G0-v$N{C%l?b` zXXd#b$C5jarFI-oJ@@i`?yHXMVwSFg_d_IGzzv zYl#?E%;X0$-@QxBk?h+JaVIv>5`It^D$h%25rrQBM1F#!0p6i6uvQ{i8O2KdNkIa3 zz|&LU9n6EvK{!Pm$9fgK$}IS{{SxwSjx91=p&bzgJOd6_7js6Yco4cs*8qUOX|B3* z{*CkLs@Al%dd}LGw6-PormW9E7{g*u)TAt{=PYZJmbEF%x|llM;!JW)G5zHJbW7Xy z^V8?&T6&T#J*k$B$V2NU4#Q^OoUW{yt6ZL}T%Oo4vpQ9|KDHZ;*{@#u!*u(anLY6} zQylWjI^Y#+eY|_hm{29nZ5URmzqTP^OPqdt+dZ@69_Pp$iqQ|yq5)Iy1UzJw51j31 zFvs=<1A;@N*z}-Kl*ko1IU^0kCZ!vZWuzXvk@}mgI0pG}B{{n;JlQ^>biGur>t|MS z-u^W>`|hwS2i`!uDnyD(g&fhzTDO}Idg16A!w9mG1OdHp5A3<9sUS&F$8F* z6ddG%VM2)rP`pZA04`D=vd`fO-3C}NP31#|RktjvAh$qrMpoTRFB*;NSLjDg2K5>R z7h*uAkxRIQkmyCkTM&uSLePLE(HI7vEW~6W65~?q;HrdL*~m`?I8Mc%LM2$soa9)g*Du*0gud` z^Zdy2g;$kMwY*_>47e@7XuIKq;+qJ_ZvGDBKq@-00GX@EM#C0iTj^KOOhbt$VF;%{ zO>ceAd$;{v|15Vf%^9z76WkT!gmLoaS*|%<*91W^)8{Pgp988VX|wWrtlW)-tQI3$ ziixV8vLE48sqC!w<*unE1f<44p?ro zzC=(EF@~#;LI0DZ17Zc_K`P>c%NoVZQuM>?P?9~#$p#=ZanZ3dyES8P z5E8t^0ThJ+xS?eC4&p^4352gbb1N>`ZwE_$07CIAd154qyI@Y_1T9><7Y!iv9E-a< zbfsd3_!%!^$6f@)+1P`*GXS7DkvhmBvgWKWzz5IDCS@oP;{k&~B4Hp|b5a0*N08N? z{t&o7Iy|g)<3X2fg0neyXiyZS!Jwa)bbu*mQ7IvO8&BryRz~VpurK9fXSrr5?4^GP z0D;1+E3)UGr1v-er!Ci9bIqN}=FXY^+2#%Jdt-;v<~l$YO*VF<%pGY<^PFWx(y}6P zQ;bC%ncHaFelniL^imd*cLVe`y}2Srbpi$rEO7O-=^ zCG)v0;j+$9j(K1~Gbv|Ku%A~O)xag6oW?AxC)L2(86yPPG{=hNBU}--oNy>&b*>0U zIfiD-v3U^=T@eo44)%r|4Etg!_$9KL_Q-Q6~BkMzXTv54RfrV4e*=2!Ql~)D8O|Uyv8eq?WOwI zS<(Wn8|kM2fQ3!X5HlK_9!#0qF^XUKmbO-1ZJ26!vneiK4^M~Rj>L9BlZwVUxUW*t zKEurHN>y~h!GNtc&Ro|{YZJ@gX}{5)taGGnYhpS$H=Y!)o|!t6VBXQ(&`h0ARkg!O zadln1@_OBLU1HZe`)}+|)_0_;J7fA!8d?&n#F4i<=j)fn$7Whn^&Rtd&GEq*I#su3 z9xC-SJ5vqL-|EyB{Q{-d=?TB06Z;HIL%f9GG5|0k;+QugAXVSSQu=`n9~vbTMBJyh zvBewNW?>z1ou$U0#J#Wrk^Ll(B+P30n&i7RoLh_$WQkki=r27x`^)!O%-Z=x@q--3ZI?NKf)(j z5`q#|f$YV+pV$Z=fom9l0uQ{g+&_vpx52MNfhVG;FKaQ32GLKVli=oc_OiAhi7Ylk zK_zQKV-8LBAgrXfPm&V6RZJZa@gVNP=_9BB8yfEG7;|;Td#g z$e2sk=QD1ra;h%$UcReVHQe?_hS73EA(&RpIi*`Mu36AfjNwy8MK{86Kivr4CSzD2uSQZ~r19P=!b}y(E^DDT Zf7(lT(rbQ80r>QrbO)_}NC6J*RnD@1!*REFZfsW6!o9@VkX|m(A!@^<{l+cW0XWoOqh-^ zV+@UXHq4H(@Z`c=gdgK0mN5${=fl>BFeXH7W44HW%pP%!IU>$6C#kc9%Od4t9TAx+*KTf&p2S{I-jJnFIPg3lNAM`WK* zxBGmNm^2l}ywm4e_=;~KkR5+R=ktbN*OXn;(&j3ACO;BU)!9& zypScNu4(pSQcZJB+1iHLk>t5F*N|;+L)p{@=T=(G&ZRf#FQK}4gYa0(-0$f8A?77U zwn${7NS(E)PLMdLBir^wKbvHMIxBA56Loe{M>crc=CSJ)g)KdgrxgxWg(Q#rr^1?V z+#k?l%5=N}&*1B@*S#X<-8wuiv&Pgzq(uVI%-HE60(}P!c)qhLYkVUJj`f(&y$0T!Dvtm`e9#W$-|Mw*Li|@x+NT& zn2;5n3r5FdaHcp_4v(uixTxSxp{!$XpD*6@BxA2xi94&}xtOIk>pIePT}ke)eN!lZ z*D>c~g-C;)B$QJQ#ia=vW#=jF z^XpdVQECMf^aN$95Bh1mgaQvU_iSDoQM}kbgvhijNlbz+j0=0mj(S*~Lp;_U0nkH1 zz}%oVtqY1g8HN`%UVU1UKwIU6TI}vryFFH=7Z@AjhvKH&RawgcgCTl-SPBa0`qYGn zSNiddLy*>;IYvYWC@kn*~*MEcS>+)gwB-Exh$=m*%11(g43vJObLw{ zp(7=9EWf&Pa6|Cop>obS*XukhaI6j($I1dDnhAx39IH=^V?`&Japo*_4VG!flCV7G zu#`Alw`i5V+YIfC4{dIggvA7Zn!4-dC@n9V2^bF#a)JjXkn1^Peafh%juk4fT`6HL zjP?{x&Rt#LH9@05hDz~cp2@Gg1V84f{E9$Gt2BFtYRp{WiH9E7=d+w}63~K!&np$s zNWs3l42Ad}B2kV0E-!ev9miUkCZH4(8tLgpT&0X0$xN_QuTF2*_7ekxQ7nTtg@$Kn>P z@DtPoB{3f|S1E}D4b06j3~GwfJW7aDVeIeem&fVC+zb7T(LH?p=$VnLq7-{83f7J6 zkAOoViBprgmtbTPj4&}aF8cHMJnj#Mrxe*6cNtayx*dgf1icyEUf`$;Cc8lKYU65w zDAC*h2N_sxRL&%s*1`;<<<}jwSzZ`t*a>QeOR&oI1X~Kk^8I;O&pd#c+yOvf^-T>0 z0D3G3pbP;{c@Vi(LIpsK%gNp+Bkj|@`U(s~yt)LpzWJ%}22b+?0iz2IFHiH(7 zNwUsFudDd+ISDs(9(#RUjeAQ;wPDZsjD7T;2**H6tEars{uD4#-v$CUkl6k)pFF== z+qm%JLievnljpK^4at!Q)!=nB-8;;KCJXR-~A z%MD*O^kiLommG_Z$cMO)1gGw!DE44G9{0vCU2!g4hin*02?LPHe~ zS`iHHSdG#!xTDEYC)n*OmknVxo_pIwCl*XJX>ksqF9^Vz^=T+ zz`(fG@DcPj1*NyC+stkDHg}u9ZFx416_loyHwjD`x?`E3?(ikbIVfi&HpwK}B$uR; ze9|&*0UyJ8hbLEL9ve z2bk*N;=N}_uJ&E`gJ&QHrUPLNnP_qV;}>Gjt9~WqkLJT99&fy5Bp8(t#{xkmFctPI zg$PSfR(Ci#xX=W)zJg)jhvl8Kg^D)9b1m1Kb_uoCNF=B0ovzvMH_8~~<7xxp%%pyTk}Gi-vLLw*Dq;y->OwGzwSk)kXd&`L{vz~MQHl`U zXGtRXuDk(d@q;DseH>=zGY>bKcxnOTVVpllhW`TOe`cwyv*z8)bC(ym4QET%S@rIf zxho5`8_qphXKf*0@4gp$FO;e8N!9mc>U&f5y(_P-9!%FCen<&rKc?rIY)#$5pQUO# z9)MNIyni0_#KO_Pek0q~{dfC+yMN{S$Nlq{v(2r``{V-c8UG<%{L(-N)B{`cJQa{`!}7>60(tcfGPzg(GcMQ)SifUYxsl_cBZ{ z<7`ejn-^Za?`$))24^fb>I&s$AUZc1i^||#7Iefg*7YFCYYUybDNn1=svnEd7gw$! z!R1waNPdolYy;xK&&453hK{X%a;@W?&$T7504w!PApcvWMg}&7Lz~TeKRB1PW}8}) z{AP>z!D~qYv`EtO9anB^e@Fp&BvQ4m1#Zc{XwSIzr(F9pt^+C8fwb#jx@G`P@AVv&%{9)NLGA9sE43~Q9|ko_zPMd zU^NVflOzd={1u2~GdY9DL?X*~vk+#$k043IP%B9ZI)u@;UC9J<$^H)YJBP{3{C?2< z&e3@Da4a%8r4e;rh&PF9AO;5VH>f$3AtVDp{sjM!z8;+Tn?+0y%*;OzD!B09pqH{k z|Dy{-^gm(>g!kY!1847`U~pzsks(i_!Vq+%-!6(cJ{C@d{gLaE|5SXi1R4a$oMm58 zgv@8mxB%2912o1}JifEkTI<^B&pX!YW{oH`49Ws@c`6F{rn3?M+d3PTqXm330%)kV zj2OaCAgA+6Y$_^2^I%lz#sZ5PQ;2&3PDVua*bEh9F=nOHQ38y*4K$P_Pbjji+aaoM z<}HRNp>t@1fyY7?9~~=HStJ=~>3;f$$NPTJIz@oF>W1wFKgG1cA+k1*ZSPE4Hk}o} zVdu}@yY$|rOl^0nwmV(fv(lP&_9p3UO~buo?;Xq3bfs##9#R(Dq2%!9PU}0<{6ebv zh1K>|CEa{1?K+;kxY-C=W@&73EYs4TYU$6k45V5HR)^QB(=8{`jVF_rHtSpqwVB4= zRAcYT)pX+vtB@N^*9|2{c8s^IrfT+QYWh<({i|hbYPx1Pc{U43KJeb9WzUNLw|%Pv zsn)}(+QVyIU%c@7zAxF-OP5o%my_o<-FrV_mxovCKCW1;O1lS=Bl81yE^n2=^dFT| zm9@VaSvb6OV)4WWr#}6~M(tp_VrX3$dJ^W*h#+pm8$S@{!3D>V2SCB@WQr(&HUnVX zMQlR4lp`*I15wgi&D(b$KzQV%{2N$#J>WK%jQ!)q+OxeE!ne!F=VMA!vTiB`z2Bv}$X zte-Vu0V;gNoAZLp0Bb@(xRz|#AME@)-^;N8_itf0XtnAoy~~P5 zqyZhvN8@ehWw;N8duMR96u9NX`#sF7Q*c|OlI!Sr(-{au-7MjWO&=4L*I?RluU8y~ zi|e2yD{uv*DR5I8Kra5MDBqH!T7N`VRsV!6lDLR>Vw|Ku4nC&4^yc$uQI3>PI{ zv4D%YEB8gE6OSZ1*Ap}Q;Xb4L=8#2y4DR5e<`&P#P;pV3|*E-YQq4mDg zTNHhU9t2u7_}?s3Hbif+*kwzgs_QdV2U1lB(p3k6ms?!wUF`k9`;fBRPSbOjlEd?L z1k;z?i|$P0zEtDBOrtl|=v`4)&!iiVr0b3*M>gvl&7gO0%H5lBA4s_mtk$evPrF}A z*B?us&sNmVt9Sk)TiKMUY)e(PW$POj*!ijT=A&zMYgU-m5IsV}qi%$L#4=UqX=0x* zSW^{yms`^n?N4?*wEEN43u_}^9Qncn{jLB5>^?-p$5oxCx2kaV-_}!=P5C8R-+OJt z`9`+Fy)GC(5r7|n5Z_4g!&<{#oih>)C^0iUHjev^Rt-3=M=(!(siXNET2TsKcauvt zokf8}PbZshp$Mm zTt&HhhuUTs{xF=XV;AV`slhGQ#=nql?J}N6j~mak7q+o{tDYLBFVfk;Gh3{kzd~pC zcH}ZgPUJG@FK?5oZC5>S-#*a9*KZ$l@TciV?s~q-mS`qhiy^$Dk2iHE<4u>tYG7ZHK&w2&;*TfH3VdfrD^&BHS&9^?cb^L zKTth?pa$+!1HY$QzP7oO4H=s|WpghaP1`zVEg%z&<2#0@o3zrQe$gg7BzghKf!fnrLakd}sa*|a&I@k|^Cf21>$LVWga zq;it!a>7w}mE_t}Qtjy)t}7LET0ymymabOnp6;Kx35}U$wMbWKkN;9qT}A)7d!BbZ zwqu5C?{qKZnfLpB=bh){_dKuv&Ec?8kb;TXz)&Sc{W})4WK=Ws##fM;rFd$X;%VLx zq(g>b1C4nm$P6>&$quvdWP`?#Y1kAp51UE5G0251!Yh$MRQM~^dnj=4VMWf19hC@&6g>j`^ccyew67tT%KvQK&$nM)(SGx z25ok&ZMc%uIiRj6TURy3xQe6ukBi=kG3TH#DSCtOkS|8W3(nDq=nRBMMXw}_lRkM; z6r6HoA`*=Fr<~rfH#j8)q^^g!9~Z6Iyy0*}_R4`sSQ>O06btW_y}qDVk`&`;KnU_K zqr#pFNV3AZ13p<{h6GvRUYfuG-k@SUJ2?>)U>+kcyg4cOGH*#9j9Q>KEh_!a`g8xBG($uXG2bLH8~)US-$b2RcM=w~MVxM34-fs!>-GLw=mQ{qU;o+(*K z$$_P*B3H3uIe~{U7~utlfg@4OVaR#{yka34d1_+wzVVB}-sz1Z$N(qNe&C}h@YfLK zVg~;Ll_Wjn;BlQ2E8ww#HQycU9*cy8ZgDak7Q}A+7Tu!PzgHz;@{v4Y^&?5${&2W^ z^H=gfRoBGScih3CH*}u&9*UM_ciyGdmB2!h9mq6wyYF_(ovoj^rYRAG0Y7FjFN}I8 zgR*DT>ysnmRMduF?)A}N#4C>|+^FD1Zji(>=nkJ3s};^SG3l9<13|?W5<)OYEm#Fn zjEEl5D+`J>2sG@Q@`QjztP1DxguNlb<58?0Pbk7q1~G5Vc!=?NjOEvbP8CHEd1oI=b56o*qbC zSYh4is#?eiD{O7Lwqg3kWaSFmkglnpKAAMGu=SbTDqF8EfOG4!=_zRWJ~ri1br^vW zO@!IZ!$wl#NQng|#Wo9JJ*&%>VZCF#Ul5U*fd53F!tj!u;iC0^u@37^7lnX-EK_^< z+F_;Wyf@5W4Dj-pCv<*7BBwOyGK*WFx9CK&70EUvZAjXY=$&OVHXx~nKMA|RSz7FG z52OjHsGfcyacqUHPB(0u9!x@RTj7b}VEq%r*>Wew)1Y2KXnDhB<1lNX)PKC0XD+kD z#wpgtDcteF!BgHTL5%Jn3WWVZ!5I=H$?F$7CqUysjWdX?`<>pm1P~oTa)$9>&POIi zr^dZqzKlwR#Zhl$gcW&umWt6a%7gVJO@UAhqLRyu_3(iZ=r~pqf}>&=bdhkl(@D4v z`8p5|$R1C$RNH2kR<#{Fqw$%h()Fzg_AXaL*vSWqBAYCbgEGAlhG}Q102OCq^mxWB zWP3qIGX`!dNskm$)Em%0&c=-~!+8FDnISROPicxjZi<=2Td0y^)Hs_zrZeYW>Rfcm^(>UtLf)q3Ay>rY;b`NiY&nWz9v%`LN!hRfaYj~gj*r%Tt z?97(g8OU7R_B}hZ<*ivu*0jYOF($^M7D>{0E-_{Z&X9(`xc#D*vu1O)isrXIBKOqkQPO$+t&_3t%qP!3D;l@p1h$qLZ) z3)S=0i{*>wS882}llO{i)75oJVUAyG>Po@?s;(rHZrt&RVoIBn$6$)N$@zxl^Y@*u zrEMQ~T<=)kbNq(9w)c2y@A2jKffeV8M-*MzJkQS2bG@Jd=Z?H*THHEsd*Adm1NG}h zm}Z@$o_Y4ms%MwpOuqSk-(2*){*~(X<*H|s43Mu{+Lv~(bsR`_99U^N_@4~ja47BE z`VB)hZe2Jre`1mUF!Vv_#*tL(p4;WO&wNsqYVE(H6)7P zCQ$%<5LVd1NLX;0#C^~qT7j_db-+zKomn(8BJM9}A~*v&jf{u`(DYqZZd4uw8G%VR zTQJ4!@$eBJ0E8YGCcc2A01WKJIwVf`lW_QD>K^OU5=Roxx|9`x|_! zb{44S$C zH3JHlC|6LKaVAf-(pn!-W5q104F=}My^tiMOp-KSs^xM|o`y9t`=F^QP2Dg=$wYTP z@xI_|1mu7>7?1?sc`+c5Ia_$uj7u&H;V!~RictUDD&#aCN|gyT9ka?UO5ej(=G-aHI6+mlkbzt9O1;H2_Vv`&`jA+ZEe##q)Pg-sN6; z{D`5dPtXzq+J7!PQqOLJBWPsF_eL{0OUc=<3qT{w|2ghc+EZ@stt7--9-UQw8ZMBZw)4HhV#L zDDp;x4^J6KKsw6Yc$VXHGr$-fpnygf`lHq?NaN^a6(s#Ra&V2NWOQgq8b^##ka)BH z3{Z=HK|%URQ(+*LE>1Q0B68J3WV45u6J40Ye$m<^JP#6yHru(QMcUzPLr?d)7Rd#c z{u%6x8&U;S&nH9eFCwTSBCA*>LC(0B;u)w#kN^CnU&J$X8B}?F4)U8JwU_5TxC1>I z)!L5Ys8t}-)O}k~LcSKi5?`@3F0yORo|Loa3tLaxUUKd9JE!M-bFVJ)Ypr|0ihf#t z`|NK=J{eir_hPE`#Z=>|l>OA-ole(pS$J;#xdeO7amA7Jra;>3x2)B7r0PM+^XnG+ z=lh?mS$Zv1-qI4C zzhYmm?77i=Yv;|Kci7L1J}Uxsp?V4VqK}G$sEDiZ)xcw+#-XAx>G}a#wNB$GRD=a- zJd5#j4Zun|qM2h+{gw2DQ&U#|%hh$n{Cii|*>P@lIjaW7vzR~kzF@v|-xz9jcCtyN z41>{fRcO#uIa?%W=OLgjPJ9XGS4`fC2_ej@7VO7RBeo!EMS>C`4g<-Eg2EDOhu=Nx zULwC^?bBod+Xi8P^fy4jUID8C>#cFMDXw;|d6nBz$b19y0|0+H*BaN8;(BgW+^V@* zv&!xNf!(q!+rH%fc;NcL4gS{H&9S>&zsj;%%LSXkgCi{ad&06Z6xF21JUlHUi<1Ow zs$)sCPnkm*`u)bUs7ISG-*vu5Uo-fa0N|66wv1a!#|<$$4+j}^Ed~mYjy=dZy6+mU zwu2+a#Bn}K&q{zRd=@Yl8)L^2MHeLfw}DzjwgpK$13+lZ|NOoqpstDYO+%o-_>?th z)af~-ceNf|@z?SkNG5KG(<+i{eFfr_7;*v3iZNq!#+(?2aYi8cf*_7Kf-alN+^@jI zs@WihVHXq?7C@hzXGRccsTdyp0|5I3#i2=6=&<6z`8-+}M==si2VqEvN=g#43Bo!} zTI`t}d2mP3rY8|zemi^%O8OiKSP%`Z9sJaHr}^{t&)R*;F?CAI~`ayJU$KKSA zy(>HReaq0b2ktd)pBq}-y3SB_jqme|y&vxXVE=Mg|B}4c-Jk02U*3LrrRhkj_6T@s zwFeRo@X~4zJgBUleSPM2@GC97iDT)8tqZ5;PcJop+15YQ^#6UZ>p(x zrKt}bOGodFHOVAjO?UJp*;(t1b*^uw_-;uX_?C`d@GUL9;9Hh7E^`f!Hyp4CIgQWE zoktr?zI-`EwMgW!859ekODL*IUk)`nBMLh!liHY^EYXyoiX%$Wk=5_v$zNo65fH+PZ)lv+cUy z)f%HkTa4yyxTLlcd}Dej@772yyBvejO1B^jJ{cwbd_*_muyvIl%G7+){}8dpCQd}d2AWU1BZB-w6TzN!%-k@|Sd-vgov6;J%#oom;AM9RinO3h2uG0#0 zL70k`osCT7z2$ze-UJ|H=n6s_MR}@h6t|dFWvXUFttIv$DFDPHPzNEi$YdCzU{XYs zGrCPNNxlf)sGyD$5&kMBUhs>8pcvJbLcLb@tC6-+KY29l@aI##)zVC_|i zu*%h^OUo0cbZz58-F)3ztt(aQTB+@LM6s4#YSiq0WzAg0Ld|^5Lc@H+qHk%}N^RFl zWq0C*G-$PvnUTezRArkM#=7lGbsR}m9!Xc%&;E4gr%UCj>Ybplsyfp>dp_NMoB!?j zC*!~A{)0QU|K(KAndMi{rMk~y`;Y0wu?P0DYp1WAp6mNZrPdpz*IldJ$#ijL!U2leO`oK-;*$aQER=r!2kArb z1PwXBNBlx$?1JdWxH zI^M7@x-sW;HsGLRfP=Ug7c;@R(O2OaYhY)HaLmnpkY~fn_t7Jp*ku^xwzM_*t~fe9-;_!sJTpo(Y6-?VIFm*PlTuz z=pTSY@|fft6(b?ljg}+i`S8CWMGc*Y#1Jeao&(}?Me?czGc@57GE9irb+&4FUIa^XC@cn15qw$BkVpje8zZCP#PDk}hvp zbbnFamfo^`acc2GYD;f&AYIqA;F@ZdneYK`5 zUDLQ=pSLfbyIa$>?trNtl~TnO>rIrU2>r94O9?Bg-r|bnD@ku+YFWD(%m;4Su;3un z{(k~DxMTdk0k^-_18%$l1SjiV!`ICu6<$G`j@z^edg-W;Jm{ql-ADj02eC9j18OuI zaEq}yNQFuqt?kwG2&X`3xxr}5%c8);H3iD;s5cPApS*|xN&P8|HyBhs8F2>;^N*7B zWxqIvCGvuae>p4$ASYrZW|P0O86+r7C=f1$2IS6t0tXt2)B;R^mGcn+ODfP-5y4Z% zu3|#nGW=A7?AL>xqODKLonF7oFqiaCK=K6dz$$le6BZ~bOBkQT0*s|Cg9XZ~k`hF) z(0y^I8hUQ*Uh6xW>N|RePxYNlHJrquTjsYc^`&aNz^pfRr+c5dwdLlPJ9|>SC;o6W z)%z0GzD#dWbj2Au;rZkCAYtGa@`Cop;uFxuoJ@#KQ1CSPU4U7tW0X$ z@i^qgAtWR>fVmUc&Jr-C{tzU;7EuuSH3PY3Qu`Xz{7D?A=A+f*(uw>wNxiyKe*%N| zPeiFAAFF{IlEW3E#bgX2K)Kfxp5Tox~sZz%3ehhm^P%Yt49}0s^7fvcKp8oXTQ5vZ7*wXfQO3Caf_d;gDd+ zMZ|cIV)I3UL3AA8mti6?WEQwmK1Hr+)B_+vF9!~2MuInPm5j)3u8RsDc(q0u)dE zBZZxXk)qC`h^Ny->g?g-h_};AQw$~9dm7D2WUf-_ZI}n9&N7K-sN ztzW3la--G>wFN6`D~wtf)Vf#HR+4cGp{~fR>#QPW9w;le%2tswFO-#7Wz~Z=e`&n+ zc_|d_ipfDKB1KhSSQ6xDDBA7ojfF$OL0_y_4MjqVpoU^mU%!Gis&KwPEXcmoTg=8= zzrc6#)0#_&Mq?^zthDZg4f~v-K^!J7(jei!N z5{n99&DIhfgmE}g>g$(+#tM;(O~0X+HbQ2ClBiA^c9?kq_EoZp%mFAB?V{}p?yN&( z;mQqieqQ6wzM{0;v@?Xct7oVg z8sCwWD$ zsaTi4XR5_zT3$s{@aotrCk@g0N-x5WoT21mXzaIX1tCxp zMHQk!NiKvUePtNA6!IDuQ!hxef)DliWH`W-W)B3UK6pR3`F?nZjg_?^@aBxW z4j_sJRp*GRJ4HLFxI-)u9dP9bdB01uw*>JR$*W;V%_)$ZKuCo1B z55|k|_`Rg4@@5~LxU-tQSCECs|IxY7Re_OFJ!L-%TpImU=nIF-8TaVZwNx6HcbQ7J zUu#c!8s_*0vWEX}DlJ#xDibfRcP@r+sSP^zj-h}s-Ldxs$7Egh` zv}B$rD&dbz0d&$+@_PHHb@g|f=j-;4980vP*uAKtji{o}8Y+r?eyECyHc&{GC?p3t zJINs)J4lWPEOx?O0p1O|{BEtFLl&Y+h^Q%^35^4&0C>iSYj#~_!Bm7}f{G`4K#)bP zASzu}1Ek$C89@QoAr_U|{aoIa+5;_e7iBw^p}_9uqPU7*Z%eyjWzuf%>!;Gi2i)7EF&rq_1#5FL;q5M#xEw5eFv}jgcLUkxO$wFZG6n z!6W@$U6Q;^p%D+@56BA{H_Hgo@6#p%3{e3zEhG(6ex@Ccwml+T4ogu#BfFqna|F(l z!Qq}DE*}(ng%+^@Mz)&=#0_>J&&j^m9UW_WHAgBTUu5Os#JHNU)!M^EUc@C{< z?}Nq@Xg1)Qn|iQSP8zaailzn)Z?6;;%{M4eEtOgJ4s36~av|(d>#4LAu0-fkEkOWs8Ksef51QKYvEk9-n;lb)?`^)dIaRmqgVMX5|5*BQ$8TT$ z zsARl(;=r{7DUWZC_mN4$5eXG<09a*Z1R-J@4L;BgAOgitVKD&+1a0{w2q^s#=FyOn z#_?h#tuJN3>ns6mzatlc4q}fM6u98>2^2X1y%)<+G{K*OD1C)`1Yo>-$xd?{QM^Du z0=bsVpR^1@BP$t)iC$`hqAUcWJ8e$NNr_IBa4i5F;sAhM1`fG(91;sfjxdKJ!XS9j zLl}e;7(}sF_#9#V?K&%sZ|bn9epi2#@Cly+{6P|Zg5nc=QAtrDk5*}GkhNw=flp){ zDqM4b1sagcsvcspacsy2ci5h!5FD(4@rGbB3}9e2XEdN22SsDTvc`1_k%*u%fj-zO z)O-0^5L%(>kOX8jDyN}L!D;y*>DCeVCtk7aEiKSP@KJEfN2mv@>J#kMqIAg`xH!_~ z)o>|DH*E$GbgYCBoG<**OFD!AONN5nG|b?8=L3~Xv|K0B-LOXa*mWErCx5>o23Pmn z>X4;?i^8;jZqCGSBV1g59ty~MK>=AigaFNUQ5sb6P0Y0dX5KBSfxdWI&T^5`BhXU8 z)dZ9+DjkcCMiaKQt7u%D=)TsSbk(J6*G%r7YJTs~twYJ09f{VlMx>*a@SFm(By&}f*pM`2ad+IsIFt>uLoDb4-Gjz8C-oHP6u>R+VjP|^ z%>9sFmzm29_JsK%^Ug^_9GMNwGH44D9a51^k~GAb>jCEH1t{{nWb_(k)Dd|%3Itr4 zkSsi3nhp4eyc5d^(|?YX3Fj5RU01HBu)y9IxFm!X%}%IXpJrDD8PrJQRWTsRa!gim zoI$?iMx@z*+)V_s= z=43-KDnWOFB3|L&XH3?R**?Fm zeMOi9j=LV__ysdTjner}$tYtEF+{Y_q#-oX190$ZA%hw^V2q5AJ^O|~5EdfmMd46< z({ji-4|c-9@2v;Iu^_0zA;{>B5snm8!2cSdW?Mfx`Qgdmaj66Czw1jLI6b0!Hhx3-+6kz^!b$M%p8A4-_UGiu{LxFdjAmmdmGwgYNf{LF(zmWLB!^)m!e(< zTpOl`nT!f#i1H9~ow-`d!l@l*hUqiVgEsO5X7q(%yT*l7DWYgLAc+RVYxG5pzO)?P zor)xfgfI}rc;)h?u_`)Xh6XoGuWx#<`&RdRkz0{p#TGn!Zith@l=m&$c*}MBBz?`9 z^6Z)8_k0Z(VsU?2vDd!Lzp+*-0b@dN7J_$S3Xq!4=Nx3Mj%|!JRtK08Ssfc-DAQvR zaa=_`Wzcqrg_T-GR*D@)eqkN0L37ohd;mQt84Zb_)ocQ+)&Qi?2gF`K8U*kg?$8AYh)#+FNmL2x-50A;YA@3`lSH6UrX51B-p}gCOa3u5j5p=Hp$|h12tW$B5F0Iie{h}X_ZR3Y zjCP)EHpC~1V?w+k&8~+yG#3Pgq~1U<)*n^1!hj0DJcI*A(8wN+b%S}(Sp0FrIKcX# z5S#zm-Z1mD23SkW-`%=GZAl&WZjd)Hz`{q-nj z(lxaScAl?F*ZJ^Ty~r1hxkg=cWlw$Vzt6YnCdFE97}<(P{5#yBNx7WYofxvk;n)V5 zEVl|<=Ap$RZM3uu7tlf$2G2=7)-Go%GMWN92igSWmAh?dib)e_N(F+dryiJ7ZbA*p zH?NFt5?!&ie}NN>mz{w<^{o^4`ShJUxNd867dYClLZyrpS{_8vf?_)gG`;wP(fR&v zlXDV;BAk?vRYF^lv94g5j@WrX>r0jew2t3aqoNyoA>V)v=OK_(eUGo6>`L;D_xWmw zE-mm?_xP$wpni4tVUHo)1bYjGNxtf4_0-eTd+zR;-S@GYTGckspTez&Xw+Q!rs&mUXKDo`EBaX$eol1&lwFQmB<`M32VsDw-pev6-&Z`1RSV?Je=9-@J@ zFhQmpISar(@wa%Gb581U0YI#mQ-j(A6 zc(EA>8e$EW#0)dU+Q)t74nrJ6>fPrPE_DY?A4JzQr+gfS)s>9kPb=PDM)3*=U{(DQ zqI2>H+?v#YBVjjMr&xewnuoGRXx;L^=c{=RJAbl-T##7oy+ zS}5O;EZ;Cq-!E^PE8CZFf>`vHjlXz(^}7vIpms zJP|a(meGXh4$x`~rZTPa0ak`_4}ae?(7+my3oz(0#~rd|eziM;W-wm!^yS`gOopG& z0oO=0*Qdv`0O{h@q!NrF)}qUjsRPdk-D==;N8zG5v2Vul$>^c3pk2_0{FDM54DB?wdiE%c5Z96k8P;mM!eb2TnjtzM|w zn5^13U3O>n%<5Ft-o!JD?vk;#(YCo&duE;Y-A5i&)xP7p;aaHLmaN)#mrGUc$!fCu z?%=$;1p?1g&UsJM9N+XEPV;45!(bMA%I7(VbT$DpbtSV$-Nff;rOrM9SOT6wzKBmg zZGwGI$LNV5CYhG#U>XU4ieyA35W$-X--WEoR3>v&RJWaIpEKAGaC!egR zTD_w1Axow`yknjiP}V3^PZ@c!Wg^pGL{lgjv-T*c36 zFoJ{J0s-1@(zoUhg0q`adydWX$I~Tc2}jyn`xV7FHzr!r<*TysHdWq~IF@#ojFxdZ-5VZq(DV^cRo31-I^}q~?fs*79k<(No0FRlCu^VjcrCEV#q#QzC!oW? z%>%Qx+0&`=!!Ynd`MPBJy2aYHQ;iGt&B^-ayF#jV@0U)lxZokhISW>v9*ax-2h9I} z6QRh@21NQM*i_Y`j6?9Dm3hi`!^z4J$9$+u91{ySMfg-pq|Dxcqi&cgNj(;#nk9G= zmOB{@)DHNxU;ttvFH$u8yXw1adyyKT0d?Wy9MA>$Tq&%_i8Ok^j3@dO;S#tVYRtD` zow5Pom8cx?I>|2yYz%&a)Yu*+22$*g^k!7lz*ddz71RsF)z#>(99VVX@TPjzOz z<(u$g2ER<#ta)eujr|KX+mkihQ#Ct3F8l53PgW;A&n4K!LhpFvr0tEsLyC5;nW7in z#beKmJ~J*(w#>U%Kej>PznF^IG~Ifi-|?7(GSd1R1;Ofr%9bYECrw<-F2}O4jHMoF zXta!K(xTK>w%qOfpM&gc@EsS1tuwIsJ)Xt)slp*k6*O9wZ@|!I zsLKam7>J+%U2-g(@ zH$QejOWby!zU^;TwD@eF__!9oqaEh{bDXh^LILrG4}|GzlfWu*jdx-FX%r+!I54RN zs&B-N=wp6&Rv3tQS{t#JxLc$f@ww0&)PE{ii@D4IdblX=K{k%x{k>#_oM4xrA-Hin zus!Mi2Ik0+q&q$a-sCN~#Pw(u`EKm29=|HieAZ+IvB+M9aFopRbAwTO4C<%YD1qWC ziU|}`DBeY}9z_L;G8C6gK}^gAxVu87k35~;yYHcm=T6Zi3`H_KcN-;#k1mm_hZWS0 zJs6`pPN!?v8X5lw`>5k%^r~ZytV+8SI5>8R0+=-0_%0 zS}h}M)?C3@I1HcC!EPz~hbB&7hixRrq6hZK86i7{Tco?@dogzk1zD}-D+oUq=wII* zlK&c-AiYxF1M$#C)AXm*i9b;FpHVLIzwk4v=#SLKKT^ByQ@j2^)je=lCn^`5)k$ad z3`e)oU;O-u)E?g6zV71MXbb;Q#;t literal 0 HcmV?d00001 diff --git a/neural_networks/rl/__pycache__/resource_allocator.cpython-312.pyc b/neural_networks/rl/__pycache__/resource_allocator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c983a608c5a004c9d028b4045b0033cd148bbfdd GIT binary patch literal 9057 zcmahvTWlLwb~BtAa`=>JJxD!GTb3invSdfLY%8|qhn&>0Bge6mI&`uVnlnYy>u9|bL%paqI8`lAWGC3m7?U3Af{fuhx}Qe?3| z={a{eLt3IY*W$U4bME_|=lEZ|UKa&vBfk)zXrZY8z>1kTqd;%`777d0Ym`VuD3KOz z2|8(u*k~*>2`0jjoQ<#~=OP@*?GZcVY{HS`BYe^saT0ni;YzwA?xZK;NqQsRq%Y#5 zDH|nmQ{CpY)m&=4PaCy}U*c`linI0c0TN}?h(+>H%xh?W@9hC3uB zhLl)n<4DO4cZ4+uskub{78~JaoME@_HTQT4)+oV}66vU_dt=iXA$lVkPeiZ5Vj7cD zGA+;4v@r3Ms=Kdeq>Lnp(zN=Hj8~(3qA@joLlV@e@~#rA+#y-B10bxP-gpRw1xmGW zBCS?-MNv6wiY3%f0MV9(7iqo-^Nwcer;cHNqC2TPWenRBV_0i~b?|k@pv}3>e3r@4 zIa`jIU@lQ%X8f@eIz2|d3UfM>jHl!VD9c_1J_L=3g6meK#Dwm^Grks2=?*wuQsAy* zrr;5TApLG^@SSv08k94slq3({Ov~>M%F)R~21rR6Ihqhsl8OjrP)-cWmFFr{o@-hj zn4ZyHVqA&Ik{ZuS+29kmJy69o!<@?70OqK*Z7p}AA2$^)m%0YEuE7U8wXOrjwu8lO z!+A#y?m4aNxd*RmU5AQohl|^e0Isp=&dHCI!hur99<5`~(#2w6u-LdS&y^bkg}`F( zYD3>D-?ssCP{;>%SW-q#Wt+)+zY6cEKV*Wp!gie(gj6&s34-ntgk)OGB(UregzK4T zqJn72*`;F%~v0I>50 zvfFKeA{!{Tw9cK)D-YRLqZ5*bU18g={`4-U&qn1wY_XB-S3@KfN1+~uW)nxs2%sN8 zNRLJdfl2o)G(j0N0)TFUr2ibP)D7GogAOwpLADL$;#K2;5W&0aUF>C0y? zj0=}uIeGd_{VRG>NQw@T2C>J8yl8`*b%M%pQszI=B{H{Ag}Fr*uy&y0Aa~5z!yes! z@iNN67z>Ls6UsYy4D<$G?E(dYfD%j`1ww%ms^Wx!8q$*#2Qep9o)J6>YMCAfncz$b z#tkdFEg|dNWHgzK>Neqen3Y?hpS&Nza|nh2SVDrlaAcNvDy|Abw!v8HfZ2eDuHbW- zqsnbN^Xvz{^0sEk9OagF$Xwh-j!w)oHZ0Oj)4^> zB{l0dj{qvjr3!Y0?Yn(<#D&QZCpC9hxpRBr;NpS5JpPm8TF3tU*zFhRU%YdA{(QNi z>GsX}n|GA?+$!H;97?S=v9#Df!H{q$)Rc9}&`?P^+SFCE_NP#=7KJ0nX)|7KfKkF2$G6 zYdwdx;1P{K@_}dT6Sr>j`!MgH*uMKde_*THD%E>YaU7E12I|-i=B?#z6Euh*{u7&N zKbRO|Jv?D9;X`Ob>4J8{kLU$z9yrY43K`!t5%i!%>8N}Pss4$IUK69o zv%4xsd`(i+ig+0~ik56bISvJb>NxgCJ@?x=YUSig^s})!LxqHCc?6n4-H^9IY?))w zpmTCGH7OBR3^RswLPBFDC^tf(6UJS;Yi7=F+qxQjf(nXyn7drL_SpO5d3vp3+ebqm zWeS(?eedq~ih=&W8F<)qvelX>w9TRHJq$Ohxp(+yzyg9S2?k zh9gIY&xJH+-9*TQ{RA?%UyVq`+_43@~KqgW!Np0Kbdp`^3d(&+>lNgjv#mhnk< zSRkr`8p{FM&NzlMSUQK`Wdu7BbRuX+Fkk}I^9ml>Eo#HYa)TQlhU;ASPy<8j49AU> z+q#W>_p?TR==lw7-|*SE)AZLo&2<~2;(#&uSqU!@61^b;tQuB^eij|3p03n+U?eWW zn60AI(&;X);onJL!<7m3ShZ(!G@exR9~vjP3@PNicB@f}v%EcfbY& z=TqkdMU|#?dm=pv!bfM(>kiuu4dpf6Zb1gsVsPB+IQt(WxB@`IqpVRwGQRw5`(%)8rnSNKsU*{p3Sol`Sx2;NIl7*0@$ML$E|;^5 zTo&FM>04(F(ARpa=DH|qb6*uz5J|8Xy51U}vT{5FJFef5`>i08QpI!K`@(K5X4DmCf}LP zs4}XVdbXP{mQDg*s_jLeC*GpY{HTVncebA(}9#~;kCQ8RI zYVddX;_7Qxv_n_4)+>2;IoP%6DusqM_zMo_ydCLD?Q***ELh8EC#HOn`2A`M^h?BPe_9PKC@iSp%vfkrUb3qHZ?)DmR)OAS&n*- zp0X5JwVy~6r^O}!vl~f8rbPSqUP$1Q-6({RNrH~u58OC&^Do7aU@jHNCuC3 zNq9+q4M^lO2#9U=29$sYr-7G58Es^ubL6TDieMYEuZ%$gV02u*{5FCp0%F&pwvkT) z&^d*;RrVCg6%?O_?lL`HfBKpt$q;FcrbK}#ziiJ|Ufe>2b18oYV2)b1Q~uUcV@PWZ z6&rU#l+NE#^6%FCyNmulbf!G`>`Q!y#&;CXEDo1?hP0lc2d}TZ{>9~o{7Zl@@tqpq zS>k&%zGv};hy0#3U~n_|clpydm)~3&D;*uz;BW7EF>rw>NEp`j1J|ACe8UfkIwY*s zza)M-wLG=5>+}B4`hUj1Xs}eRCc&pmSnBWLZZH_}ds(K&ORMKmo2&N1eYDzl7gJ<}@7)#ug%=uv44iiN*D)R|r zvKVMnc0Aj0=Dq1eT86JDBO&l5#dI=c=r7>6Yzskxk%?HV863@C7}U^>$a7&I;Y0{V zkO2s<=}xRvz0)ek84P<4M<7-yQKxt}Mqqvy4rtht22+k8im*1$syi`6Pc-&sJ5uK> z!FH*hF=^rwFo^Og0B{_AO(kEq=Ibu{LV0G*?Z16&{@DAY;OV}3_suWe;k96QDcGk4 z`<9wN?OyIK1`p@Yu6efIK0kkcwQYFC`7_U{N5Rf}p1Yn>aGw_3_n@g5JXk}r??L#X z=jht@{-rmx?T3Iv^Uga*)|vwa=ip+cUmn+w4Niy<`HOV=vs1q;Vg%r z1MSv$V4b3!2MkWEyg68Yjb-{vgA?lp_moEX0*IbQEf16K@cNmL;cE%tD)rh-WJRi- zf-gJB?jE#I|gy8O7p9b6l_utx-}Cl>vTw`bh0LTC^kIKu#iU zXjRvxCzfUw6e$^#tx=gakvI9HrWJzGbz0f#x6B&a`lo62uP;SvR#{g)*Hg}>j1gTQ z+ks!*<3RHm3g1rt=wEmLmwUi}{4t8y z$7mh_f2ok^u8Lo$sG{zvlvNoOCsCIfq=66&v0;_4OGe#laJLKFz#ELm6rD{<(Ug1{ ziD4C@bOWMN=?n})G%Zoj2LK7dE0k)w{bn4*%uStv)$0sS&5=13d}&0r8Ud?T2wDo# z#6&z6mr}7AAt|XcOqcE3s$8E!3F9fN07!_a)X?)~Lr-~U?*n>8{5Ny87 z&62-Y^Y<=_i_)sUAIwZ&@OFAWU6?5PdO<2YyLV~g{+<>3uLeJuGNeV}#zWuk6$Nmo zU-;t2KR&;D@zsAAz0+E-eb`=@SiE>QuKAz&| zS-kk;y=zvXbH`%ulK;mS)&iY|&{^7F4D2npv@JwFjI0Ga3S;-q-#vft!rcqSVE?bZ zjz-ry<#4))D8O$|=;cfr-*5iMbc6nRbh7jp)5lz;{~ewDqvpSU@!zwzMj?Xr7LXdz zD*R?)l*lEcCuaEOq=XK7H9}?tuy{idqfu&@NGPM_YY0di#?p*F7oY@_y=|eQSR&-r za9OqENyNT_fDADXC5V>2jSba-u{V6OLW*o7ewV+hsIvL>=_z5fNgN`HKt^y2!A=C# zFdDi;GJ-MqDHwLXMU_vzP(FUT3;{FF%?*{?x34pp4?j=xN8LT7{n6mi2IAM-yT94LowT=L02YyXI_%j|t9Qb9OL+ zZSHSuE;_tU0oY)mx=yO?1i{dObqYWQ0j1w~*nZob7d~ZgP%Lfx#uuc!e@y}SW_XOI LUB96alKcLD1P)Z< literal 0 HcmV?d00001 diff --git a/neural_networks/rnn/__init__.py b/neural_networks/rnn/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/neural_networks/rnn/__pycache__/__init__.cpython-312.pyc b/neural_networks/rnn/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c739e317d7a06601297bc39d0026992dc082f18 GIT binary patch literal 156 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!vewVY&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLbtygdE*_{_Y_lK6PNg34bUHo5sJr8%i~ XMXW%x7=gGL#Q4a}$jDg43}gWSv|1)T literal 0 HcmV?d00001 diff --git a/neural_networks/rnn/__pycache__/anomaly_detector.cpython-312.pyc b/neural_networks/rnn/__pycache__/anomaly_detector.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d147ab03093700b69c8782c80934c9d080ed2d75 GIT binary patch literal 11099 zcmbtaYj6`+mhP6i<<`TNY)i6b0~Wys3ye*8X2Q$F4w#pTG0bKRL?hH~TSk^hcROGq zCDerM$QgFAD>G}eVK>f9s$z<4h3x(qwl+VU-K|Qh_Q#^gL`uh7liJy;8U8t#txQt; zW6!y*Zaomm&g?De^zHlXzV|%7b8i1zWu=9J^vmRQsJ)J&ev3C+V$}@2%+M4yP4UzK z#nZeYOph7{3^eAAVdH?2T$ureT-gDZTulQexH932QEq@6H4m6cdp2wtwGLQEZ38x% zGEf3L(x6S2zY0~iU_ML(c7Zcc2Pocjh2kr28Pw8S>V+C`H%ItcXlax82tXeN~jT?P6v19$hi(^70D0l;r=x883?&Sqp2+E;o z#5*R1B12wD4#*)%4g~{YXv2F$QZx)D(a6TZs3=JC#&GDI;2jm@;V3V)-^Z8n(TX(? ziA1qyG$Qr-42qc#$bn!uAV~^47!txflrcwOT!lM!9$N*%!Q48Ki&4Fym(N0Gni8l1 z8urU@5_So8$RIGhaTnC^4D1gJcP4UI0lS2C9JwoM;b=fs?D8-y!f-Uq zL;cWb;QnT)R%{VJtW0P$AV)>%J_XyaSVsdF{Ze4`d{~f%{`xwmn|qbYU^F5JMne&@ zQuo(mXR8Ry93Aybg24CsEDGoMM*^dQ->+Ew{?RBu7RJ2I?|*SD5LRnM8+Nq&{ROM; z_ls3fDmswVAgM*-0&3BABZ7<- zQim9cbmVx%ukuJ#Y(GCPnsHPlmGGDL0C|o2vd(k)@MLY8@nlzeFCUvcnP$A%`i9F# zCfB5yhHPDfTHBCqY`T1Oa$A~d%BnpU7?0Sf4OpHbI-$ClAp~FuvZ+SMF?xxEKC5EOZFheVxLCA8vBJVW3l~Lv4~2ax5FQg0R>Eq9IU9|J6-!8h1twCW z7$sR$Oe)v+`WVp-okUPLlvt0%g9LSf`Zg;tw@M?}Ou`~qC~MWLhq6}3t4CDgaHbh& z)@HwQSnY^?ik2a#Gw9`3sL3w_kqY4bE2y_Bcq3d{p5qy~n#Nh5S#kUsVPb*rCgL3x zW4``yhz9}RsIqrn&ZUiK1Cqe!6aj2=6$Ge=e z1K~8lo!)nx@)>(`@l_eiX?N@nr&%KG7DT|ws~EWxyLn- z1uuG46Er8U`Wa-VDH*7mMxeAb@wA+?FBB|_rtvjxOH*=%)?2ua6mNv_3+{bPZ)|PpqIm~HvW%)9Y(NX|%_*(>xUZrQh>ufD z;potiASx^`oE;ldjA0?7SaeGvDGY9)xB(i22+6$glGuhF4I*-lVvR&aur+?P2}L_; zgqdT`(&uT1Qe=1O7?8`>L(F}sedLv|fZHSGEjl`zOUOXt_M zn>zCqMV*E=31fnZ)3VMJAOx_=BMgjFl17lDN>U8)ipC{zP-HQvZ=e!voE2R%s#TIk z2&&QU)N7p8xGZ9Y`O0uX!X(?ZKKa%;4k{cSZA3}Rwk|lKiYQJbuhXA^Q}+cXn3pX zdefYH!Lw$CvW>=GHEFp3JU#BC~2!YSpGC zBUS1CFGuZU*Ub8qvpMZ(ne*H}k@Ec@<#;-|Z_!mhQ#&JPnl`84=h>WgZMiLHb{tIY zIQU6-YR7XaS6^~})>d=v=)}<*Yi0to?l;@#y5B#1^Kho^K&tJ)C(Wt0-c-Y}d$#A6 z?D$Sg4$9$9RuY}xdy`SO4W&&niGn;PMiizu8WEu8DJcBbib(YoL40S#Lr@|f1_HI6 zm_Leylw!_TQ33{(3csI^1_6(CV8b6E!QCju2kWp5i5LD7iq;kCb7spjV`N*GEk^b< z{m^1z-6}A^c0u^-3Aq!}9}NsD4{CV+geVBH&HbSG!$KbAdu1rjxdJLM^$rRFSR%nI zoezg(sXeG`KA0aLgJ{ok@F0$WAg3WH(5%Jq;3nM*V8DzxnHSE>!-`Ri48h{5)A)=; zKEx9+fp`*#gwq0(tL%+S8*>-Ei*EaOV{23xz}}uG!1h;VYu#|+vdwFg%wO0Zwfty- z$y>C+)1nQ4Cxs$473dk=&X=>)U>6K|D-C+6jNR8X4OkPH0PKh%ZajAy-VprJahSE7 z-XB4LT9UAzW<|huP$MPvcsruof_!CO%L78G*2>H2-@N5ZnBr{QbPje}G{)J4;pz)O zjaLHwdO2Oa_5o!!HzxOG?bVaVU%&Xd!4jil&xu&U9hcPtXYw%S(B<+Guxf6@g=*mcITw@`lYO^ zVao6&lCNrEU~a0Q(Q^0A-v54@07_R?);p zE+;F^kFAPCt&xWperUP6q5>!{*r>-m5@><1_}BoD_t#fw=`^&IxZ z1&2j-R43W2%QJm?>huH3Xm;b`R8RIz9DPrY znrg}klpit*NkkRu4hr%2bWjT(o}pLu5Gsg81!GEfQZeOr6pb2D-U^FmMN-WEz)M4Z zWG-%S1;C*G6&Z>!!(T>th#6 zTj5+wd_PTmt+q+}_3dB{vv$YNyHVB6^vzaZKl#S7*>K9aIc48+`^7u^=IMWY_Jd3F zeILA%vLCq19e8x{(6SLdb{yX1`#ojQaXLdm&s-|Yg1O}e>!kZaGKx7#d7Jc@5zF{Q zx|KBKeWUNTdW7Wz5yaVS#Emjz!{mP@5#*!D+%mGG0hF}osGav;~kbU!8Ki>J#&P>;- zRM)8m?)0MD11`|Hbi;bh1L{h(b*0_iN$aAs?gsOg`MUX?=68K>`{oYcS$*?Z`iWiX z`rT>go}@YJszV*ALZVAkmojx7sk)AI^`-|DV{Qgu1eX5!spm7UmXxby_T24LcU>)M z*S_Tb#ai#I2d3+}*^{o_mh47^)HBgD`NIX<3d|pxIFzxiOxae>@b_$Mmdr5Pl7(tm zmGQKvJneIr(w?2)qm>unrGLt*;g~qB!sXW=19ZjK@3wYHb`Z4k6%LpXMlDU<)U8~R z#*7g-BuW@Tmm96fxMc)1r}2T2VxS5U#*gUgDnEXl(MALz0Juy73v6#t_lTe=>P6sI zdbtxazn~9OG#Vr73|P(t4Q9HqEwsQip-atQ(bF^xU57~g?~E5{6^+v(53RtpGPG|} zXj$xnyPWfMnE=0U&AWz#h;Z?|xF@#haok4&1?{^?yg}O2uJ=H&A)y}m!|$m-T&6y4 zIeMpWHt_D~+oQL;GHahowH*D1z|2=0SJA}SiKEb1+zG_TszSC63$`OcIU+6rU_*jf zJF#FN5Tydm7={@uwL=KaEzy?|c;rnn);5@X#dak*#e+2#e^w z?_{R$)Te!?em*iCorGHyr# znyrB21j7{;4&TfK%QApr@pysdtpv-OcpDsRBBrGwEN0K0u4u}uw9>1hO(pB%#e<7bARK}Nm#4iglFzK3rx9DV4kl7KHR>nu zWM36?NDJ4%X`p`gNUWT~gd*T4l*iHaB@^rTIRTSMhky|L{HeR+oyuR?!3tOx>s!@O zU%Gx9+*dAoHr;mqy77a?v}ZTmS7lc>!^Mu#c!>OHv+-5+5GlxFnU0+vrc2Fy`@S zVKk?ky2e~%gMcD~hBGiPIdNbh#>2!}bTc3x0A^>(aMjLG2~z@U@_03mx?~+q6|Aj} z$?#NY#98vVa9a-Dq5Fc)j3zn=XF=q2q%G^ilszS5V5Ty$_x$dT!+X+H(3oHo%ms9l zVlDe~$5ACLTzovGnw2ME_L%F0b|&wY1lcP`y)Erq2Yr<)7W^qRQNiC8%n@Ds8s^Yw zD8^XmJixT5D1*eo!scKjE}!`4K#EWv90*7K#0*J_1>`iB7oWkA=nD!LjEEN8n`IZ>J6JSomIvtfZ-izQVPRT-`! z#Wl?AT;SFrOnYJKg^bIWa{1;qq+Q#S7A!k6btdC#O}Sd<8q=!zBt)8Q$2NZCh*4LyR|(@E>+vJ*x;S52Qc~GmiMIFPu)t))AQXQAO7fY zru$3^ew}Ah9WSIe_)`r(F46`Y=1#otzv;in?fzz&rJM(82_@p68*929rXpK_YfMxF zYX9$d9Shi37o9xw{3)t|i0dD%8qlpvFtV-)3WOJ2E(pfvY>*}Oan`-y?fjY@9u`REJ=7+%roz?w{rGPdmey^2LXXH z5Q#AL#@bXnjc2|A@_|E0kiay7_~bqgiVk|!xFC31MAGgvFpG4bT{hnTxm^3-zD?>2NU5WZ*rwd)h^UqJSb z7t2*E972`KRejpE8Omy$$%@71jqe3-x86DQi6!0MpKdW*EK`XK#UORLLxI2FmOqV>#hhG zBo_m!JOPKNM4>|t)VZNIw)&ZX930j@GsCY8UJ^v|ok5P-B*4eSYtL)yX_zN>0Hl~T zri8tQY|$~0Vmph2=e|#httj2)!rH%uF(sU*V6S(4SbOJ3|Fmj>>&dol`^>(5wtteD zwoX}Ry6!o=pW3(I<+gtV7vj*oQ@@ufa@`QAk`+g92%WYOxR$2=+z|<_@-|J}90|MT zg7B3?f}96-v6ZE-P*nX(=qI7+1@LCCz4pq)D|fk7YM8noN?{?_!zEUyePVYiH(D3@ z;HKaxQ$*(jns>q_f1VkuA?KdCPW|{P)EQ`W9mlLDRfMSLqUva|`h0m&zcUcYJcOg8 z%6!zr$;an$uf;c!{0$OZO!bXcVD2Xxi9?2bM0JIFXkgiHdbx^Y_bfZB*t%t}f!*EJDnO!Ir2&6_Y%E z9^0UJi6{Yz8Fu6d`5H#`+6e2S_pg5NwGMNANCuIR=P!PzZ@FFSSABcLUqKV(1?hDl zOGcWeKc`Oq2ld1kl=Ta$@(Zf!3#$4zRO@f3)nA$$l64t#L(1GR^Hkd0eA%>Qq72rr z4JNu_i2|~0gxe)@TSF>X+Ob3d$yGq^YeyaJ{)z(f_0|=%>njS#*L%EJ2ITAg^g6of K0R@CCc3ob-UPIU~!P(0Z_a|f)XhcI!M`+WKlOoNwzGSjzr3dA*%?4U6P=2P`jWh z!a%L2N&}|Kh^*rfYCILyc*fMot=T%A(w)x4Y10{JI@1DzEFx>Rb=u4%`qN=Vx2gQ6 zeeYZB0;J$3>hvS{zWaXnw|`$&W}zTVCuaklH5B!4n9%~KCg|0_g2XH(P##L41w)Vy z89W9W(@c=@FeGL@EQvV}2QeEohD;t)i1+ZM9Tzl*EFMe9>am7w9ve*=D3Kkl*Cr~& zV)<=a&3Wvi$v{0r3C1gwV7hHkOK<0!dMZdA4|Qg(&QnRsEKp|E%c>?Bw=H(8Z(#8G z9T$AED7d15khnt@rGO~Ah9Z*a_sP*fc-Yl02SVdPUlf{51fnCZkS{vYDMx+L0Mz?s zcgKC4+f6H0UpO4W@<>?jcN-M5;EVeFL7yxu+)zLa3T{SW`=FP?4UUfmMTLK13>*7` z{`~4m#Hbm1br2G>lt_7KIAOzCI9E7RgUAZZ0jLpJI8n@TBxm#(NzO!ayvHP%q0T~b zRu50=Y?zxYbC)X?+;?vzJQNtd?}9>w3527HbtE7NV%RGOV&Z)~Sj7?^4|#*WNl}vT zBR(i5S-j{Cis9kc9Dn-EPh(dO!`6ZkUsNdziasd}=jD~)sO~?7jg8`1#6JROVODsr zH|z_EUaw;DdP5OmJcwzV*L!i?7gTGcGVEjbdW(+H>y_+KDpew=M&JZ+i&_OpecsbK z5($Z&(s(#5N}UrC>6K2&H~h2;!s58(3wpz16f5LTDID$$%F&Q_OcI5FKN^ub#wI1) z6ah9!$h!c1o%(b`-Q`o$)mgSKSF_>rbJI+g-H@wxUVbJykY$~@hUUx9Pw&pM&ABSa zBUd=(`8{8}vr{k0tYez5F_?{5X**S7;1Yy?xDmR_TLApmWoo%|*TU)Bm+n>Xoo|`$ zojo~oGUc1A{@BrWr+RPNw)Y`KGTzCxx^1kRm0BSwA&n?TNsNw5;m@te$<7htKkw!h zOCUTp9wn?pC1fMc90@7LGiuCy**l~F2q1_c@Vku)D~rJ)2`_^5lD1*$2?V%K8E@5v z;fT6#U?b`VW0jBWxfVMF0l@BZ^{q+vsy)}RCCT39Yd_(gxpK#~&{QaOG+W-BWd6fI zo4bEn-8^TRKbLXs%~p3qbN(lfTktKOQ2rVmvcd`o30;5$2uE6=3s^Lo7gtcFhp3L) zf+4D-slWini}Ffg#7koe6Y^aG z5D4F*75b8lxb4!;HP0Q2&oy6!S1;qeT&6y?S55ohI(FmP>(9RJpWpKR%?sk<>GyYJ zw;V~g9mzKKWNLac_M>V3=%dGCh5^X2s8a1WJ`%dMBa@q}(NP7SP z#y$;^!XoFBy0Lg4g8cxB_sw~+L9D!hOqg%SR!smdQ$Mp;Uz?noe5*S3;@boB&IS5= z=d*QPS^KUuzf0X@G412+kxd?gL^1ae=qprIZ;9+r+rTmEHnhc{F+*giupLAH8nnXB zUC5fYL*f)g!|-wHJW%AzfFubzVIVveQ0Y7LEKO5a%UNoY`V(e?zD4&-s89hnF?8%y z=m`n6fIK@Pt(&q zZ*m!X_d~)rzEY$FZFZRG?agz;^Ov%l_TII3XKmd{I%ls)<|&ee##+!(?_D@56lueG@snWV z)TqwkwdNEYY$=WorNyJ>n+sF{LF24;hxH(&v>VpS65M0X#urwkr5`z*tpUx)5t;4zk*um(>+OCgwO7XX55M zXcz-hi5ufQ#>h=UMpv;^!jdrmAI63OVeBtD1B*JNwkDhrW@~`itkBCEw+tC{y}&%7 zX$X*#dSPL1i*yhosSg3tp2C3sEp=eF6TuM#I}vO`fVbeb5n>~ht{6v9<$YB#`bjK3 z2MrW@NTCA?9Z^hvZ!{1TM1_7u;ryU3Pbl07Ml$dg3TaXcDs7s;G@9^9Laah(6!{Wd zg4_(`1aiXRobF21%p6GaIa|fG=ck@ewcglzee2xdJ72l^mHF_y^1|NRiFDh+#Y@@x zXS24GK)e6n?W$^Kx6W)$4c_ov_sp~J@wfPeCJX~_IX5(Dt+?Q`QWGlC)EBcchfaR{ei;ku4lbP<5 z*{U;&x~h|oAce|vrc!m(w(k2h0Rap?2c(M8Bg;K4=UU{-@MWdl=; z1Jk+;4KZ+6lnY>ffpKjoW|vW@Am)}ys1&LM6SU-oYQZ7!kTMHS!3=FJLXBX7*g8qO zHz;LD8DG+@%zl4fiz6%1HH1DO6qH4mXxTy+*%_KPP;6)wG(_Nb#iQjnH>w*y`fhb? z0yv-{&jEowBn(lMh9n4J+Lauwge1^p8St%EV#%N(sLv{34tGYvFvw3YujNpW^%mv? ziR;zJk0N4`Fp4aBT@X-Sme{Sf78(?syftM2Ys%PvA4T~IRriIIgw}j)^ z`JN5T6YI~KTY-U|yLoP@ZGWb1f42F+e=>BicQ(&@XS{PSW}VydPM3FfEu47Yy6F3J`?sH4ak&Y@9Atq(-EDoML|zyo=`#y}2*9F^Ok8VAg);IqIC$9B@6=E7$r}j6tB)}giW9kGQgpcjgjyLBd}V1A^v|~2a9_Eiv`H) z`~d987&j(N1=&%WfC{TP51J`4OnK07%_S`Z6KLW*-Wa&^NPIglp|Ky)Ky80vwZ3G5$`7-Y6St#R*3ARW&0O@`e+NyrP|Ayc;smz`*Xu|Ocvp0+2@fOjA zEnUF&sK9^*L`+u(eBXqZEA)s$`xPcK9#vIr@Scw;X5uIJ282rrOGE~;7)^QB5ZdAe zZ?%_HKSJSp4eDz03IL#RR8N0!>Vs2D2hL;;oXPGPxMv$&s7(5kX93C2&750uZqGQk zXUn#yZG%Y~z~lGGl~*R2T&3dy#hDwDJ-MptWbg9k?el~0d2V^KoA*AT3}stp?9=Ad zuH23%-+TGi%Zr;AFMe3}LHwiBA5H%Fmpm&fxk6Jllrk~F=~g5gG{h{Z4y9xe$P=Bk0KsuB>q0^P0GT4bkN|DVwR9mK ztth0ml(r5aD!{G;Qy8OYwYuN@3&t&B?xIW+3SZX9#YCT0< z6{w!C3vo^xKVd`?yehCDHR0U!vx@3HO5O^t1Z{|Q0ZQxF6+u>G2&m|R&px*KL?B9D z?%@*^Npo=A3+$|0=yhUitryTYR2QaJJkCY{0d zuOeszAfvLJXAP=Z%?t&is_#`d96F%r95((t0Kf?}MXmd2NYv=CyztLTV1K>cf04J z@4R;NwWTNaW#HGaZ}D`d;ZW9j7>V6IP3LOrQ%?dh=QYaRnN7PhO}o=Id#0J3qjvU% znHT1IvyS!|`j5#*uX+L@h%IrO2e$1s%tWX&kl5QZ!V8%B}Ds zUawPPf*A%njKZ^$8VWoFxB$quFQhd_UxzWyj-vU!F2IUV{;dmfqt;K0VfP>^aZPdK zFpVl$g1LGKge6O!4`L0yK6ohvY-@MBE{OQ_?aIHAw!33>d3;1#LNberW3tOPBKibE zYG6PiDJZrx<6%VpO8+gT1^DtIcY+%=9h+VmxPw2NvM95piKF8G3Od?(UC7ks+HdZfcj$fwZu0Q%T-P3 z<^$QPgK67A9fLa;8t(E3HPVXSNL){3?K^e!7_*H0}qc4ito(+%+3nXY*< zTlrMdn5(HxT9+$preDieZbrguN3mKn+d0!Yw~UlD*8 z5^!bWjV45uZGbAkVf$wK=1))eWgVT#<2hUPwf?F8)b6>;>wE9oTJyfrC7UZ_bIm#D zF5ayD*tQk%WEppavg1~ zUvJv1Z7;CIC$Oo2qNAmJ0wveKb|ml?aG-7JS_*k9E7F#LKN^uQqU{g78ojS1jM}wS zAr?%~n=R-jMb8DSz-tl*kk#BTp}-(^N8lg8_?u9c#P@^ z_@w~eXQ7GPOw{bq!up8*5mL#cL|#FspnW0HCn7JQ=-~m`Y@=8|Uad)|Om#&9c3Y=pTeh z&JWBFrkkJ2+Pi>PKD}>o*AEVUaPXt*j~wa!{n@8qNS_(F^Yp;g(WUaHOgXSX(mMf~KkEf2!^kpkr<_6|>-2@wBXOctCY@4#Bom~rO7mwfNk1m6Y{cNh| z_g_!*O^;$d%6!5cf23kPWYtibITkRjAf>e%B|1b2c37&qCi`{cz$#67 z=P~R&VTzl;M+P(qYcUi5m`S5;El%)J-EN2*1^p4UsFW`%Md3w4;k@GpfqM%y6Jor> zUK5P|BxWLR-dN*t#BKO|38a9k{6|GO>Kco{H*B(W6~=fb$SeDr8-N$pEr{!#z*_m&vEu@OB6;!D_j?!3@Au#3dvg_zS=)^5 z-Rk+5ZZ}QavK4!QGThte&n}$Ix(_W?9=_>-r>mZCZ=Uw0T4uyIx27u(CyklP!^`Eh z*CJDq)KIp(4QMR?aYOx$=IhOiTkg4g7EaH%%wN2{<&L{2TX!^T?*;PR(3l#SYk4E_ zuDEb)vE_Dn*;$*ad*injj{lOOEB3EYw0Zx-RTEWpgnsz1?jG{G^+QW-?*`-FG}?MM z+SEr(mZ*L)hxWzeIb<~I2Z-m@7p<3Iv+&LphN$Qr%dM7P#8Na@i(bE2)tPXzB3tN!6C7}O|-Bs?I+7@Zu$HbyK#PAH)`R?Ndt!SOlnmBi893UxLIHrcL4$P#v%jinr5k@q0r*uv{Uw^-|A0bB H4SQvx~B literal 0 HcmV?d00001 diff --git a/neural_networks/transformers/__init__.py b/neural_networks/transformers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/neural_networks/transformers/__pycache__/__init__.cpython-312.pyc b/neural_networks/transformers/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01888f1338a2a41c6f6a79271a810f63903e0012 GIT binary patch literal 165 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!a?#Jo&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLbdqQt!7wEUvn)S_bj`1s7c%#!$cy@JYL g95%W6DWy57c15f}Lm7d%7{vI<%*e=C#0+Es0CK1*9smFU literal 0 HcmV?d00001 diff --git a/neural_networks/transformers/__pycache__/bert_model.cpython-312.pyc b/neural_networks/transformers/__pycache__/bert_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..36de65d74edce3a90b375e55811c35599da935fe GIT binary patch literal 12250 zcmb_CYj7LKd3U%w;P4;;kRbS!3{npYqD)y5sV60wA}Luf%8Dd6rlP_S?nr_H0qPDk zMF?!zMj3;NQjwJ%Laj4~YE4Z=X~Q(>RGy@@oupIGQ%9cZ zOuugr2Y?V{$8C3j?{2?+-tPC_{aHzgiGs8}HtpYBK~evP87*<@3%&I3@G?yaR1YQ4 zf-XQ0>UwlEzB2))hape4hb2$0hl3{@&<`4V41;_RPwH_2G-&QI4_bOGgC#vB zG^L|NcA!ohEcXHsC^FHwTwmQKaq%2e&KlQJI4j9OVw8NrzVXU^iv z3C;qzk}S?aaHW8=W^omxjN2C3vajn4CtZUfK@7NL|KM=I8}^5SuHKO33X2!QuHm2{ zN^;m66#T(HSJ*3`lbb)pDYGy>d{@4(X;SKmQpGF>xnxU62!^<=!Qav3j4fXCOJg$3SvMk~8)2Bxfc$OOH_~fp(=NXYDZwHptmYuB^vQa^)oF=&_Jo z1?EPb?n=dki_{Sc_WJw$J4Yx=Vf?|cVtP5`^Pcs{{)qVDI8;^4{eD3ZgDQqaj4?Po z=;;@|g8U(Fr((i{>IaIT6w9FZf@dfs`*B73{`4BAxA(&G1w!7iVi2UzP-r+D z(Vsrre&X;Mg^|OOVj6^9@PtF>#9)Lw-Eri^8O7)e$XU#ZuIB+Gi$f}=+g+mY9#7Ca zD0)1K$>V{|9uDBU#p5|Y>ha`ls>dT)0V~-Nlp}B;a3ZKeP>rAlz)fli zAoa=qjs2lPaicUG42sgmk&tw5qvY+|rh=e2EO`T-pcuvid1F}e24$eML0FEBXGJND zQxgNtL!*)zJ4RrEzx*WtU!^{9){Y;VWKwKxy0T{c@ML+4tw~on#}CHj6zfboE7j7< zbXD#6k;zq9ucB)FKtgIT%;YndO$Cd|RQf>%!BnULKoq8d{K;nu`{*b=kdc`A^)+cVV93X+?^;;| zvQHR^f4o&6jrGTLQC%l>h4Jb7D4*`kNx(2+qz{t9NV_BrlxlCeXOtO00xn3}Y&7XR zfb?CEv^oNb$@7s+HN!1Rl-CH7YL1r zlETSD0e_gVl!TC+zZzO{F*=k%-0s|ThJPaRIs zZ}HdpH%%!=)4ZcK>1a(kw%smE+1hT0-?{kq#reIbl6z03Y^Ux(E57#;JY=Mu_sZL= z_ibdCQ7hsJwF-n<`+zFK8QlpKMQc0RNu7BE=>y8yymG1-9S`6N6{Ng=`k@~#iaK8x z1+^0aPKW^20~HkEiDG?4fJP%g)%YCKN7-V0L3##i5twu9Knzuckt<0WAl8bOQ=d`2 zHou@FuO<>pLCV$CTF}&Gm}FE}bQWx0t?O9p1khEt-v*LLLP;gzC%X*_2U5FFR7n@L zgQE8i4T(WPVF$h8eo#^Vpu+ecxuy?6R}gv!0UD`aF64?-L8&NT2LQzDYKZ9;%B$ketGgdioUu06kuGz@IutNjGm-HXT*nR>@(ikO*eaQ*4<{`F~4n|-+n9!KlicZ zQ_m->j!$xFTlutU$^^CQZ;G?$zdi7J%NyITZBKN)zV}DMJN<9>zdQWH&^=rCB8R;$ z>Zuazm4g!pCw<@OUNplr7A=&mEN1)^IlP@vHqcj=uHl0+DGao)rW~zndo~7g5hP+( zBIjxVH-J$85^UZWJH~~xav8=bB5jLNoHho?UwuaYffMqM#>Hb|c3C?*P<# zit3^)nhsGtLaZY&$RQvvn^|hi0Gv`(W=9QL4?xKQpwXOM)q`ARyFbKb>$~Xqs{{WU zxwQA!kiqn<+bDIyQ(=G<6{847m?(L}qO=2$IYO3pVHS-(X(s?TuhOrPNJo!fkfptV zR!pL%)q#}#!Ucs1`vqwq7BMf2J{eb)P;-``Inp9hzMO;!vI#27e+~eMw!}VPvL;!w z=7w*^{bn##(i+pHD=K5Ag{u05J5{ye0mT?sO*|Ltm@NNQU43HpTTR!S<{Ne-8+N4X zcK#Pbm$#)Ws~$2`MfLP^Q_sauB-&@_>s{B*%$!WF*pzZ?p8djY=6k<)`}p^Il8(La zZb_BzU!tb;s0usG)}E0UHKao;^l!=eGkTI4BP%^Y9-=z#(1$snSr_pF=tRqDHM zvg)$Io&o<$Vt7~zDr|QsD1s_#0i;xk;5dL!jOYQJ27~zwl4OaKTKbtb_MlUr3D$o1_mk-T9@E*YTxWwMdGynt8Kny|sDC_1` z1F$_D7K3QoFVkZvNTKs58GzCnD3{hE0DZ8`>LZ`6YVT~KGuC`fs|>Mntfe7py(C*XEyn=h|Eqdyk< zqd&7h7UT*OWl_?gGy(m^b^G!KDemYNTzHUQ)M1JyChr*_MV5meu#r)dx| zr7Sf{{UtL(t6PHf`&wJPmFDSVoXpxE3N z0EaOk?`h7~M2RD#C-ocZx8u|ud-JuQakMCjgA?5X49Jo z@7kMFrOh#Bp~QaW#fcYRm0`(d>hIQUNL6k~l{`JmCQDkte)-hWw6$#VY<%-n-^7>a z_?3@yya~o7GNu9e`4##V%PXb%d_R%3mNS+Ek}^qAoyMhLIV_;nkS8DV%C2x3Tc8H& za9~YjWrNleMNQDEo`)xgLRgb6py%sA&+C`jBU!t4%n;Rst-gi*|yQ9weDnWN^YDPxBiiQi`aT4~7*3j9I`sDlZrkve6Q4 zTo7IVvdlW-Ml}DE&%T+nZx*nfm~P1jeJo>Bls!aTqB-;$lNB`J4UEwfY?Q`5onSIg z9p>^xkK=LC2p9rhay_ehM0&Me#8GTLp2epB4d9b; zYQbWiG=2S8j85CjWBLWVbMpMvss|KrTpep)ICO08`QtxZcfaDqyT>Pm>A+MVv2(_I zefR8ounC{O=X`dq;zVpefCWcoTz~a&tSh~3_xp4zuj&Zcp->omPJMo0DZ6rX;%I!`J!SxcY(?z5?>yD9TmN3&Q=K~fkFAzYgZ?Mf z+D?nUTY4Ur$E}z8ah1$@=BDHajPzv$Xsb$aCP0C0iN#z!ZG}aR^%BFS`I_Oi}O=^<@uqEcetB18G2<__?Gg3`%1aK;F|!NU`}^0b+WY0kQ+aizZb- zn&~pK41!Mi)?m)t1Q}%|l2ZpkO&CK>qnh;5jZnmt0xKW2qZ(7jby4(cRNT#|42Eo{ z(4J)$mTLSG9>{!E*7U{wm9Y)5z}8*2_s_Q-O|~6PZSB7AJT_~8bu971y~+)DoX0Q< z`Usvx2*k^_ERQdtyd&$<4KEm&1%t3?L=-e5$loyr8aBXDB9=(MzmE-Tc#}W<5#T{?TZZHKCs$iJfSF+6{K-!3&OaVPlaO^lJ8d#hXCCy z86%D=`94oS)U%vjrBy8V!9SZA^^IOar!S9Ic?gv2E^$DuCgLx#f|&%)Il&@u7(f?F z;0TO`vz$`F08f6Dc3TxAF2hkgALw32%-K!5Lq>_x7iif;=eanV8U=#P`T{ve1~&Lp zNIsv4b0)}Vw#k}8VRUs#8bIM*kOYYQ7ad*Wa_~MNSR7fC{d5WIcZxHFwA{Auw6oX1V5-?k+4?=hf>kq>5OK~TYkR4g?ESC|F zp#ypbvUAqmac;O_deahPFPj&tGjcvvy#?~63zZFtmbZ3Y-<7J|2zko_UwXwnVV-mB zx_$DU({G=?#~&m#vuq=gT;NYH-GG-To<(4-f|Qd!PvX~NwrL)hGO!Ir{Mi9;s0ve* z(WIfq)134TsHBD>kUgY7Mu1$Tu{eo`90CM{?{P!q!mzvtHetjo2`Z~+);xnlM@8NX z0M^`5HQhbcJ@05rI+|uWQ;tnBW7=LZy>e>hydAv7H3@&p{tSfRt>ssu6VZ8VL(fHUa~Vu~;YTjle9x8{r@d0L%c|NVVg+?D-2=!VndR)DR6qjslTW@0Hl>5FEP#H5E<}&kpygM(tHVsPW7`d_z+f>>r|WkQ^1gdQ`0#@ZcDtmI@n$ zb67PdfP{fMXqgtW70$O8YGu4zWN*uaWv*h=54OyPzrW)yzZ>at#ng&;IAvcA6j@dk zw@Dn@z^jn!#nT7MHB- zcqIKkj@KlJl0ZIPT}z|mhhJ8EF9FrwGUWOMSUmkAliJ0GLi?W$*BbEr8-xa#bFeI9Zv7E+d;bs3clpkAdFAx3sa?RB<*Q?Sy1o(42&!i~ClAF; z>5AG16l>fH7Gzc38|G`~L|3Y6ee58(d9Z@tSvAvt+n8Fv?`~yB%FzLptd7Y@%37ax zH_vkMLsOLC2%)Eg+AK7o%{bjHQ z3U*FCJcWRy?$fT2fS@*jcqaGLU>-nfg-Os4U}&KrMR{$7wI_&jJf|WljPW#*a-qvG zdTa$aq6iE{OTZ671tIX7nv3eA-7f$9=!kNf|HbhC=}O7Jutb~=&Vj^$kR!>2?ouo!c@!+BeyXi%9%MzS-mWt>DH-^ z79QS!5Wo@DGhljz=mO2gAI}SCuFcs{+3zT#%l;7hlJU$@jb?&ZU%lg6IDS4+4{@`E zT21XoSR)hIT-QWLD!M~1^}?WwoL$3hPPoSTA$p~XVK{g$ z7#azFLc7#k9X6b~4}ja2C(1TrT^5eB!qP#~4E&QI2)^7$E37XB7xhpZD(pZAE`4wk z4x@%mQBx}Vm%V{ujD`Bc;-KsG59&nfiFBqH%rPDyI#Gsf&Ht7q@3h|MA~P$##6=?F(} zSpnO4mtU8*t(>>5PTE$dY-?lOLV4v9F7S=f*@{%-wukth-nnOqp&ifCG5!N{`JB0S z0fLp?6Ww#Qo9|k-q{}N}2h;Uy?^o=Px6jnigui?7=EeDqUCE7Isr6ld;=NO`|6S>Q zdamNoor)LVAASEE=-yRpz)W_mLraILESM^L8t$~zx^ApW_}*OiofEU=x4xKcXuVzkBj3BLe%Sx^hGhML_wA|L!?FF7 zEnho=`uPa`&_vlCll!NSP942^EM;4r2q$e#F)qDj$KSSnukHJL=B@i~w#;mr@lMjO zw#M!8Q`f2!?Wyv$N!!|#b>AGnkHlXO(2tgEl&yo7%U~7XYgpM;%Kn&U0si!vnyzxa z7UgS%(uhO5@Ov5=po3g&JfntIzX0XHKIoI=t_-M0{*PfXObbp-uI}rA$XqV~qELH` z*vVDn(}39EQtuQ%|9^q~|2+Z{<~@l$vC`il0_82iMr?RHd09ha(Foa9Y(cC*j8sT( zA^1B41wr4hBMt!y2N{GAM9m&TfcFr+WMR0bB@+jr)Xc42s?l*9AM!S?gC-^sY201Y1y;Q#;t literal 0 HcmV?d00001 diff --git a/neural_networks/transformers/__pycache__/gpt_model.cpython-312.pyc b/neural_networks/transformers/__pycache__/gpt_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..daa1ce7b1b6a80fb3ed80706501a0fcff5dd827a GIT binary patch literal 10118 zcmbtaYj7Lab>0PbfyIL)zz0b1Aw#Ap3wn@}Z9QvAmP}c;9+VYHPD)!rAnb|+i3ha{ z>H&i>CsATBb}F(`N~rCODce(1BPUfRnOd1-CU%k^nRYTm3X%zLl!=>q+UQS*5uG~H zkG9{riv>ZNNjlRj@!Y%jwRi71_nhyXi+`!8aB@g{v*XcQH5~VE^k62@DDZRtj>0%6 zbKRWG%a#})w{%;0DqCaLZY#@$Zh_@ux5#o^w+*=vv&W@wDemZYu>NAq8FzKN;_hyD zyrR1zUfEsA>TR*AxTo90a~4h!`yMtIFrO>c*LdTpyGD^L++I$$pXX%hn#HKSR_xkc z%i0`hbDC}4b*#>XI(Mng$LcCjS6Qm7KW7bAr91XMcQg=B%1SJtMdO39a4MQi1WqQ^ zKuS5A3iK!mMK$VDVXa?lze}$U^13UWNF-CNMe7P$bcY;Hg(I=Brs?9zs1lQdf-W40 zYAIdlj7Czrcyw?ers&e40eV3=MpMYj$?#w-C8Ir3{34cFjR$^i0EKZ*;ktQn!*UEf z!2VklLAE}D7FodV)04=ahzIm!XHQbMojp0&le1f5PcHW4?smu(=vm30s=A%5%|lP; zyuoVSfjxODnK&8ki5~tL$LZE+BBeV|CnMn#AuXC#?hd1;?&^)ovXU@r1gdc)2IHY# zB`j-qiBY54wQSar=vF+S31edKR>z43D6-!;$p z*G&7@D? zk^1pVKX_?o=dtOX$MT+I_c#ol<#d9T8Wm8r-dv^J)CTLgE!|ebDn#x zQ{ZH4AIa>Zl(Cq-j&MPtEBae}Lr~HMG8wwa^oMTK&S}tqx?NM$)6s~c+f+q^anNna z!PLMY49MwlY*5kd18Op&Xxall5ERiVoRE`oI)jR=+rk3_N&pGG zFufa!$}mvSWXP(s*IO>PfVr!lEvgu zW_!kxhG{aNAbIN`Z(^!+{xjB$h<}zeR@qqUH)P8Q8JizP;nTzpq(*YeH>+-Pv|zo=<2pa zj~b3^Iv>@o{mMCwwv{p7WWD>uU^FI&jBaWF0v%A^v>F4oQy>rnZ{2v~SmVX!8E@;f zw>9r=%i8V)AOHT*d~h2|Uf(4xx9Q?b6NkrM&U+uZ$5|Zh*vOi;sR{4I;VX5y zs(ke-$d9{v^s$lsGwx;6?q#{t6XD6_SCq>`x83VzMGTs?aUNf`g2_hLRl!hZWNzuG z05v3Zp(~kCf`akYR_HtR7@8kA2!_1?409Z9dr+f(fe5v!p9cx@Y7J#JHjq`RUDQZy zW!=&~!f-Dz2)8!Cro-&Vk7Jvgs82HyQjRL^3)7$KGgLZ4AEO*CK)kpReo7nZcZHnfbF>1iMyE>+waXm}t6 z)9QAXFo1copo1-4k&=nPOf>K@`i`|I?B{q=Fx+v7){FSUA)bs3jx?0y-|*u+4rZmm zo#Vc3J;NJcNc;_2bxZqtojIhksIA_i0)({{9% zdi@1v(+HyemixmnceA?V^3mMk34V0L=&5gQoUZP;uL)@UN_8y{ufwVh#&v5tI)E}C z*7+0Vo2&~Vydv%W%v-n8D@gi=xsR)=G3Q%HCcTrp-u%m3)gAe&jwz|*vs+I(hw-`_ zd-4kZO64n_at4f)3?jf)X>Uh(nJ-srP`)K5_0eZ6O7n_VW(sjgfQc99A=q+tRQ@OuHqRtnsZ zG-NL`3X&;MLyn9Dfaxs06K@)FmVGCws6{E`%s9#o3FdFc{4R_u8HkL_oWHmi?u-j> zsVKe$M`5TUQ;~7g8Zq9%z+%NWJY_plGFuo|`ATJRB$UuMD(Bym<+GJp&q*s3gj6=R z1X(_Nfbh0fthQ>XG9&hR7kwHYGgaowaaL}@h>0Z#Yd>RxS!bzKQ7Tme;EG);ZGX?V zK3Mr%SG#vd56!Rq;ur5PII$!NlYOV07bt|ct6I8bwB(pbmJf2 zpY1f$8mNi~!`vNr)u{8;Ls^~`avR1f^6q7L^3;wy#vHlj7u~tkw5Jtl(_6zJ#ptPv z4fi;QV?}n?$B%8f;r-qgcptv)h4zk|Hy8d}->j9ZZyJw|MJJq-8?IDLrf)32I+XWq ze^0#W+f!%@{?5wRuZF|n+n#-zWgpi9k6hfJ-BWmU%LlbvCl6mbI{t;RFXX}#ZI`3B z>sC$GZq4oixr2V<#eEaC)3t5ryt?B9-};Heqo;BkFP^*UTc6$g@$!`ur`~M%)XF!k zDKxjh8S*#ZZaVO@jX(GPbjz)#1GgIv;Drrqvd^&WBS7VYBL{PbBkgjc@(^dln30cH#Z%V5i^q6Zb=%%WOZ{>F#W`{j_;y=PFy53fGoU zjEwAoi=h-2S~;#Ns8L@c!az?w^%GPQkOyAYudMTNqfdtVNQ>Z&ZV$nvL=aaYoTIxn z3V=o8N~$+0GkH|gs9y|tx~Yx`POKsS=eZAs&2s`U4?vx*8s8?apR=MgOQlb2RVMnO zQ4#!=Qqg?yb95%k(2s=Zh6S+=D`FfJ?GR)aLp-8`b082%6nTX#Qv5@%lx>8DU{2Qp&_PZDUdzut31KDI<1DW^`7-cvt++abNzlVc^G*hAPSKv2 zZIqiD5@U*=1h=zfo0cBaM>cp-GUtJYHv=5tE^P3;OY$e*4_)pz!VFjm!7G-cz(1i< zI3xwFYNS4jpDjus=3Hj~*EB$%@qj;|T zcy-5h;a%5vT{G*So?icSe)V4HrRC4^g|?N@1P`}Q%KzAZwg2|=ZSaIqofU3LEgwC; ziE{rPsq%tr#5GmB?MCO1_y1u3ZRwf&_w3Y-g+t!2YTn(nFdRb2f=T8-qd<~LQgFc< zv9drHj6^vWTbfHtp|CQUvKBEz0CQRA(ZSN8G8TZrO32azvw)g2LCPLxQbtJXAWGID zYgu5+T0{V>78t5HYR2mW8I7$G&h;9gmvD-sUdx?;B zV-T4PlVOx>A!gFkRr8Ex732nLVUS@CdZ@YWQ_Sk)Am5Z-oOam@raPX{*V z8_B+Sw;^(iwrbya!&pPE)o@-fubW&k-PnHH+wlp9E^{2_n1z$Ol0sSDZTDFPbMwBw za@Rw)ciMI}y5IMTD8AoFG*UYMSO}d!#?iG!U8C{m;CsjxhF`@D?qdkc94XU2snU2j zJw^Rv=8y-IgYZ+!Lmz#m&EoT3@`1Pm-` z)a?w2&a1jJGHBcrokoO>x)Zn}7)9!tp&>>L(X)rNa2!#Yv}a!SjGE)%7ynESH|#ly7?U9%pql zkL=HOjWYd>YM)BHETzQS&8#B<{lpT@|4ta|DKntevW7G z_pQKguMiQiXfu(KU;_Iauo4cTSr^p%WIX)0p4!iAy=8CwSIPX*-w3!Y~u9WnJbw$znpK~G}XF!s%1+ao~VPcqI<+WRk!+`r>=|F_Wh{l z#`8aZ@dq#d@QeAa2d38^xFsFLtW#2h8o&lF?apM;^5-?SdHNv#(86hqf#!fYU!2ft zMavSfVkuqyj&O~{9EoPcxe~k5^-tm0#FP{QFUD^#v_us5St)h|?h@J$TIMhCfeD%T zUqQV}X4H^m3Y?-_kSZPrSr8AEBE}n(GC?z|hN&}2KcW%l=CEQIHgNQSM;1;*DJ|ej(;819soQ z!=K;vGWT&F)Xi%m0{SMf!^Yt5lUvem0??V78^GIVK=4v9j$o6eEB(;!a4OPU`lSnY zpML!UJ!71E4G?=?(J&B2+g$|S>^>6OPGg+W`GG~yyYd809x{(}y1oR17B>0Nu7mF+drtU=OmgyC4?wurWfA6%2JDGH|$$|~)5T>GE<(wR(AMPMBL{e|S>?f(oPQEvew7Q<2 zT%})00bdK$Q>1EJ>f8YJA?=|O08|OzspNC$u1GQ#QzGQuYsSqd3`*(%^XQE&VbV#a z#kgAQrqVExg?sQodLqt#I$fX=Gkztvt_v$Zh-;_wX+{*Dtw>UvicB|7&j5 zuetU-*ZwQ6^&>}PwtmLZIPGZ6ZOJ=U4BOx@T3nx6?0oGk2ZG?0#WhQhpVrp%zTa^m VpFZxV63C}pw(vXeaUg8#{uklHTNVHS literal 0 HcmV?d00001 diff --git a/neural_networks/transformers/__pycache__/t5_model.cpython-312.pyc b/neural_networks/transformers/__pycache__/t5_model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d14ef7a3465979545ce184759522645ab1e9f194 GIT binary patch literal 13240 zcmdU0eQ+Dcb-x1+-yi`HASr^B1b&K;WXiNieNmRokdmS=+6pBrqQbx+?nnXx0s8J} zi4@SXu{(lNwIb`dq!YWL>`X*fZbH|dDLYLYJ8q|*N&g@W%0Mnk<4!WuXr_P2qNk2D z`J?ac;SK-=+3uwMqdVZcy}jMNz1{cmdvBLNEh#Zmkopoc!LfA|^((wll1|RhtN#g^ z8R`_pQhgLlvzjnHqUqDnnAe82eOhv5`WSN6_37Zsg!LnaJ_D(v3mZpFeWnp}pLxX6 zXQ3$##W5jx+uFpH+@R&MzEW1t8dxK1V$H8GeKyv@*;(secsNY4C9l(c4$h#V4pD6B z3lwX+p^;nN$adIQMxNQ>nL~Nj=OkriQ07$2T%^neW#wvF`MB0wF|qw*yJsZIa$%1U z92pDy#b7kz8I1BCfqQn0iv+kw#ONbR-Xr>jb3)5)e7%>JEdEF&Dv~Bb58RtrzvvHy z{es}tO3bmKAWBSkFd$0CCr7bge|W4ES{{HG@czhq*Zo0Zk7ukzF5-ky|9fvn!+Sgv zC7ok~o^EbT6avF|dpP0=ivDoWABpbqco|7E$V(bdkTfhOX@cTJDZb>aAo5#1K>DRp z-?7e~L%yfJa^T?MuO8^#=Ycm!v|rNrBk(6mngM7xFb?gb&>qTy&{uR2{va<#C9R+5 zBrO|cB`wT^q#gD{ZrCqK+7bV_q!owZ>3O(i1~@JvF>DmZ!VL1!5s4Z0KgUVTc@8>Y z&IiL`q~fro8{mSGAxRgAj)hrCHxe8g7A2i9?1vh<^Q2HO4C6HbV^xz@&e5v_keQ)4 zs*eWN)BuChz>FG>VYSFAECZ~FcRF&X@6(ezL-x)JGH_=Y(@E8Iqol*;fPm<9NV z_l5oA952XaESFyv1c9I=^N9a~FU&=T#Nps%91EU{2K)o0`yQ`ZGWvWG{|M*vNoJo9 zSaU3ld8^O&?3h0+KjB;Ptu~)8j{$u?emj)%?MQYY*@U`vmO-wzjD zsbC~1`g{{*>T6n*vU;eoY63DzWh!bC%%57wXBG@Y50A+(z7LrhO3W%niV~qzP9bEt zH7Q1i3~P$uR#A$IY7ODb*CeG5ODk$HMumuBD6AN=7T)9C1dO(q@@jf++u_b@;2(vH zWB|P|91aeEZeZIby(|QhX_&jf28TFMM>uPQ3%wft5Vl}|zu1C#H2K8nIS{ZQJK@Zm zNTsj|S_#O}lhiF+*|&DibWC^5ir>BX^2P7OQ_e@y&Yel;&XjZa(qk#xzIR;zQvGgq zy7P1rezw!g#?uc@J5CIl_NQUOarHy$#KV(pQl=`TE`{0=;36Hqz<<7PETW1vX!ils`g)0!*I3>7!V4KZ!Vxuz4ijTx1iG z;)e5-m+85UllmVQT|YSV?-MWVANt+G#~&OYcxpdrI9)gj76SDDXgDZJCJwYG$NNQ& zKLITy9Vnh5PUah;=a>;cm=LW1mP+p*9pxgd*OX-?k-GqPgfNt3=0*lMmIXTlhH5k_ z@K0fHS`o|`9h9`saRC7**`p*`hV?$uY{H($c%;M&b%pCdKz{A68S}LHMQhq#m$cWV z>*Pg2K-lZ5*{pG`nt|?chtYW5Rx@FFlDr-t~{ko=huHpKYYg^KF zyOVXhQ#E`3OG`WVW-8ZxqNQBxW_C^Qn(dlvoqP6L`^(4YyOTAKE^c2cf3qW5v+t&J z{~ZeICc4PwBWp$4x;|-LKO4Adt-E7_7v3>bcK5Qe>fT)g)Du8FsdpQ;e5pxapkq*y z3A1+tGhQ-+mVp8}6=Q9d&SPgw3>DDEEpf9lCJ^HrAjV8wbLGcy)u`fZ zJTD72sSTp7@0qAprNu)uJ_wzsk@WsHjIfKuqyX2`rmAEjv2 zzf?agOPr3;mo@m!e?iaCFc=F%jZ=TEJx}vpP{=^YAi|NYtWMzBGvEwdfFx4hHGZpI+hiV@ z+d%1+9H*!CQ}opKjJx4`z4MOw&P96R!0Y{solEqa2bSG?rVPLKG|sWt&s{s0ZhAb~ z^mxj%?^7*Z`2?M*t^pOet~%vzov&PRx27svVN`~2&Fj zdf#!r6Zrf7^v0uao?JY!M1PODZn|cgubuZVG~V<)n%a2uru`TwMi>-mMtAMyBX?{t z>N|EtOPa9Pd%xOvl+55y>Na&(8sBTI>#iv+P?%_`fw$<@e}l{=4dMgA&C)}F5kpY2 zHieMg*Ce#1pa6l&>5I2igb~DCb4in@Y&4>ZAQBo-HX2YibP;8fQ*f$E&M6zP%{j5w zfGBHwhLkb>@5(7Z2>nW0K6;)oaDlM$L3lP%n-kN~Xf#|zNKv@*h1A(HD@=9G(k~rM zI$Q4XxENkt4kySG8HG}Q1RrT-j9`!jfz2PsGqUYx@Wm6Qg#-B#zV#U(ATd9)Rb*V1 zQ|+^D-|cw0BVGMi5`NCdmaUKdx@vPsU{jkqH#0p8jQkzwR!MrE6UTUPtb3vn&dX$R z3_xifyhVz@ZYU)RwWgY~LPM`&`#j$cLQCMgX<6{+;mSaWMvp#Rx9mq3$SHO~fPDBq;an45Y~uYu!f=$U9xm&}9a9()D_ z9vFG@3`3-5k7rY(u&F?ip(O|Bgur!BBrr*!S(u?xg(P@Z$aPM zbKLlS+EXxG%Z!(Ggi`8o)LES?EaCy7YOT$CqPgDm)-M7od^C@(&DK!B3~K zsSAjVj4BVk%+C7f9Iw#H%0n_T`i9Lx%kyOA&p|~#j0D^9Pa{Dnl&6*n3SxvP57E$2 zG#KF{_?)O{qpYG!Wfjf)qGRId7zErzKR?8YK3r42A8JcRKwZJWFvrRWiFkH8ArR#` z!3fXcD@n?_bn>?$7%KeQ<4`s0(y3UeW9jUQ@o>goownB|?X@X;ecJ9#+Px|J7I5+a z4H<7$dgjVgm7DHT8q=1`hZ3C$G2^NOr%k+m@!G}orhQ5H)$dEX_9qTxtgfq1UVd`+ zL#nkyM)?A~%pT{yJ zsOSVllv(}#325#w)3URI#T(ucOGq+vwE~$eUvH1zIrEdBHJ7 zJ3`D3g_EQLtd-Tg$}_wwVh(MxUjjk4oL;4hp}4#@s{R($z*sTO$s$H9_IA}4jAH-| zT@>=+9tK={nag=1f)W#qK&-a7 zp#wP}Z&S_O8R$*u0zyKQA5?7qp7C{ag1KTrR(D;mzE+*8d<5>T58Q(XVYzHscI{qj z`*FwLb==JIXo02FL&s$O?65ljLrX~&3wIW?kTDhA#lE$iIqZDBBQ)J%stM;Uh(Dh9Q1N zpr52ifXlIP<+;J&1%+o=0ey)AooOQBei#!s;ZWE`E%HHaVGM}uLlO?~&UdUBcVe__ z>-5%iS!1#c{Jd1zmV_bWD4(gBu1P!Ul8(B$ohiqbg#MOeW7<)lbkxrsU2I)EyLkK^ z=W^?Tl;dDRU)aUqJan=hIjBmST4Rc%HANc= z^cB}<*MNXiBLtju(34Kk#b7Hfrf~yLKIH3*;Juvue6tinbHLXS=l%{sl7?c)PZ?XW z?^K>a_{#`iYRY~o$YR_SGl_^mNP3QjFSEu>FnSw|-W)gOt)&{g$}3aj(yR_;4!vIp zkEwVwQJSCzHDHa5xd2Lfs&qy^}S;wBYr+OLG%>FqCC+-kGydS!M^)+?R~oCKD~eFr4O5a zV`*{hD?qZSkqMGTNcfUQ#j+k)4T$qWtWS#${MuB^dg8_tLiH^2tt*HN!mbj=2}slp zJl;Xf`;okk1eK$JLxDh{94VB858z11CK1APSD+#~#yKYwOPL`j#M_%)Mlb7ZZ`4`Lu+5smb8Z;gl@048+bUFA-P$VO?%aF_S zHuyh*dlf0P6dP#P(?HEkLRym)pwbi9+U6Q3vkv_(GHW z91c3?X1Lp~x#k>mov*Ye-EH^yX&iSO5U(T89*$tCY7`{{&y9xt0giYbu$v_E5NRcr z5`7WL;_E&6)-|k&({AxWCYPW({!j)*o6Itc$%%^{M84>E?sU=7aAD z?{}q|pGr4Boos%3`E*~Z`Rs?T;9Tpn@0ql3FzFk-=^LIpF>`wQ^h;-!UBQG2$j@x+ zGS13b%dC6Z*|2PFC}!9*4kws!*ZRAZ&eW0U&cL45k?A8Z9!tBLlCGwds~LQv=0`rP zIXKt4==jl=4_Xg=`^0k1!NhSOw_KI8B`H_a8};+tjm?W(GFUf2jixa{72)4J!51%`0P zN||hkQD3$tEGx$5Oj-F%^K>&X)qKUGX3;V4UU9dj%Gwf!Tbnk|?|ePB82Vt-6N&Rv z7iZ1Ow)$n`@pp!R+`C&%m51nicT1`DhiG9PEW*2wmLIHTexfe}`d(wn!6yBC+Zmt* z*cY8qkS==lAjtgZqmrDCe+Y&}oVF%Cq^9c2b#|>CJA*$u<{#D2W~@5_Hhp>i-4~@!bB;B)kf{Q49{%`P+X#mFQLtQVKvY?ikDaJ|$SjOw7rz6Nr{3FFj63kmfpr>A9AH!m&Jb+9wl!VX zo~&zM98T5kPuZV<=v4EzAKDgdOWLL0RC8y#`EauN@cZ@e3#sPbbn}^H^O@zo{#5hV zKXipb&kqFB1Ht4#@aDj|oSqM(o(~f}-89Purrk>osiyrY*At+%U6rsxsLK~mr8al0RPIfoGSAptQxhp$ zEm(nharW@E8ILZs;n9VvW#hVgcb!!E2u;w-yN{Q5Z(x4nECX7gRMmjOGhhZkU#S*^ z-v2LDfa(IODnKhD6Oa_s0<@-zh22Al{?;ToeglTWt%j$-H3RqYTr;qz2}0%!f$JJp z*GsXW6yTYGJli@%vBuZvxGtv#b4n-|P-ji(cqpUHQ%vf?BZSs+cmkDbFEwuVT6!in zbm1A+;NZ9l{P936o_!^;3IfbzU=$odD1`V{GQyD?2xh_YPh*4|g5U<^bYgTg81U*O zv&aR8BT?8(9|u$v2qK^YJT(eIVF6phIiO$w5Eh)N42#3JF*G9?#vqZuHTWY-?1pB>i4D`k3$^9?wmTfT)k_y`@2V9KANuCo~+rv z_-wLf7lhPO_B|le7TZ+E2bK+)+NOEOYgJQwmaTgh%|P(M&TsCQ%6k$A0U&)VFx&WI zXzsD;D4a6_F!c0|>Lj3=u8gy4wtl)}*}5TPTPH_OaCMXf>>%Tl#1|4Jg>}fWA9SPE zq|b@WX&z#MKNG7w45KLaGtNNdK_{ZUAt}f15Q_lCPD~eAQn0>ycz+S0D|RYQ@i754 z=!!)$VsO}07RHz%Vv89<<;weWS7n5`S-WzWfe2!;!#%mPe{xLF@sywUj}G&GIOPUG zK|J_@>qdMxI5Kh$^TlF<58H5&w_}KNBcGL%_lt>NLp`vTynYE;B4a>#` zd9R}jMh&3@R3j5*%1QmR%0cjcdDs09uoBH9z=ypUz?az_7}Nx7_hRVVkFBk?6RSOn zB(5}oZV-aVjsH>2<(||0KSOE3+m?$|TYD5h-VZVr-4FUcwqHW>9+DK2G?K@W>_bBK z_`i)gBXRBXkaZ?qqud-Dc`GD?&&Ngs z@)`EQF*wcuXV`i0j41gilmSmw0jKlFph&W)|2M%2JgON!l*N|_oa^}tP*>WsNc{(WrH)oCV6 zr%G2T9j&=rs^mNS~3dpJ!Ztvjjr)3TF?vE)TpZ3sJ`pfjk6w*&O ObklU>T?#3g&;JIXALYRS literal 0 HcmV?d00001 From f3fd8edf24c651175bd2cb1ec61ef0ba85ddf440 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 22:18:43 +0000 Subject: [PATCH 4/5] feat: add multi-agent AI/ML platform structure - Add config/ with YAML/JSON model configs for blast_radius, cost_optimizer, intent_classifier models; BERT/GPT neural network configs; empathy patterns - Add training_data/ README files for infrastructure_logs, user_intents, policy_examples, emotional_contexts - Add .gitignore to exclude __pycache__ and bytecode files Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitignore | 11 ++ .../empathy_patterns/patterns.json | 101 ++++++++++++++++++ .../ml_models/blast_radius_model/config.yaml | 52 +++++++++ .../cost_optimizer_model/config.yaml | 54 ++++++++++ .../intent_classifier_model/config.yaml | 58 ++++++++++ .../model_configs/bert_config.json | 50 +++++++++ .../model_configs/gpt_config.json | 49 +++++++++ .../pretrained_weights/README.md | 70 ++++++++++++ training_data/emotional_contexts/README.md | 67 ++++++++++++ training_data/infrastructure_logs/README.md | 63 +++++++++++ training_data/policy_examples/README.md | 56 ++++++++++ training_data/user_intents/README.md | 43 ++++++++ 12 files changed, 674 insertions(+) create mode 100644 .gitignore create mode 100644 config/emotional_ai/empathy_patterns/patterns.json create mode 100644 config/ml_models/blast_radius_model/config.yaml create mode 100644 config/ml_models/cost_optimizer_model/config.yaml create mode 100644 config/ml_models/intent_classifier_model/config.yaml create mode 100644 config/neural_networks/model_configs/bert_config.json create mode 100644 config/neural_networks/model_configs/gpt_config.json create mode 100644 config/neural_networks/pretrained_weights/README.md create mode 100644 training_data/emotional_contexts/README.md create mode 100644 training_data/infrastructure_logs/README.md create mode 100644 training_data/policy_examples/README.md create mode 100644 training_data/user_intents/README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ef96c7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ +.env +.venv +*.log diff --git a/config/emotional_ai/empathy_patterns/patterns.json b/config/emotional_ai/empathy_patterns/patterns.json new file mode 100644 index 0000000..9c39f73 --- /dev/null +++ b/config/emotional_ai/empathy_patterns/patterns.json @@ -0,0 +1,101 @@ +{ + "version": "1.0.0", + "description": "Empathy detection patterns for emotional intelligence", + "emotions": { + "joy": { + "keywords": ["happy", "excited", "great", "wonderful", "amazing", "love", "fantastic", "thrilled", "delighted", "overjoyed"], + "phrases": ["this is awesome", "i'm so happy", "feeling great", "loving this", "so excited about"], + "valence": 0.8, + "arousal": 0.7 + }, + "sadness": { + "keywords": ["sad", "unhappy", "disappointed", "miserable", "depressed", "down", "blue", "heartbroken", "sorrowful"], + "phrases": ["feeling down", "not doing well", "hard time", "going through a lot", "struggling with"], + "valence": -0.7, + "arousal": 0.2 + }, + "anger": { + "keywords": ["angry", "frustrated", "furious", "annoyed", "irritated", "mad", "outraged", "livid"], + "phrases": ["this is ridiculous", "so frustrated with", "can't believe", "fed up with", "sick of this"], + "valence": -0.8, + "arousal": 0.9 + }, + "fear": { + "keywords": ["scared", "afraid", "worried", "anxious", "nervous", "terrified", "panicked", "concerned"], + "phrases": ["what if", "i'm worried about", "afraid that", "not sure if", "concerned about"], + "valence": -0.6, + "arousal": 0.8 + }, + "frustration": { + "keywords": ["stuck", "blocked", "confused", "lost", "overwhelmed", "helpless", "struggling"], + "phrases": ["can't figure out", "doesn't work", "keeps failing", "not working", "hitting a wall"], + "valence": -0.5, + "arousal": 0.6 + }, + "excitement": { + "keywords": ["excited", "pumped", "energized", "motivated", "inspired", "eager", "enthusiastic"], + "phrases": ["can't wait", "so pumped", "really excited", "looking forward", "ready to"], + "valence": 0.7, + "arousal": 0.9 + } + }, + "distress_signals": { + "high_urgency_words": ["urgent", "emergency", "critical", "asap", "immediately", "help", "stuck"], + "overwhelm_indicators": ["too much", "can't handle", "overwhelmed", "breaking point", "falling apart"], + "isolation_signals": ["alone", "nobody", "no one", "on my own", "by myself", "unsupported"], + "burnout_signals": ["exhausted", "burned out", "can't anymore", "done", "giving up", "quitting"] + }, + "empathy_responses": { + "acknowledgment": [ + "I understand this is difficult.", + "I can see you're going through a challenging time.", + "That sounds really tough.", + "I hear you, and I appreciate you sharing that.", + "It makes complete sense that you feel this way." + ], + "validation": [ + "Your feelings are completely valid.", + "It's understandable to feel this way given the circumstances.", + "Anyone in your situation would feel the same.", + "What you're experiencing is real and it matters." + ], + "support": [ + "I'm here to help you through this.", + "Let's work through this together.", + "You don't have to figure this out alone.", + "I'll do my best to support you." + ], + "encouragement": [ + "You've handled challenges before and I believe in your ability to navigate this.", + "Progress takes time, and you're making it.", + "Every step forward counts, no matter how small.", + "You're doing better than you think." + ] + }, + "tone_guidelines": { + "stressed_user": { + "formality": 0.3, + "warmth": 0.9, + "directness": 0.7, + "empathy_level": 1.0 + }, + "excited_user": { + "formality": 0.3, + "warmth": 0.8, + "directness": 0.8, + "empathy_level": 0.7 + }, + "frustrated_user": { + "formality": 0.4, + "warmth": 0.9, + "directness": 0.8, + "empathy_level": 0.95 + }, + "neutral_user": { + "formality": 0.6, + "warmth": 0.6, + "directness": 0.7, + "empathy_level": 0.5 + } + } +} diff --git a/config/ml_models/blast_radius_model/config.yaml b/config/ml_models/blast_radius_model/config.yaml new file mode 100644 index 0000000..fc3b64b --- /dev/null +++ b/config/ml_models/blast_radius_model/config.yaml @@ -0,0 +1,52 @@ +model: + name: blast_radius_model + version: "1.0.0" + type: classification + description: "Predicts the blast radius (affected services) of infrastructure changes" + +training: + algorithm: gradient_boosting + max_depth: 6 + n_estimators: 200 + learning_rate: 0.05 + min_samples_leaf: 10 + subsample: 0.8 + feature_fraction: 0.7 + +features: + - name: service_dependency_count + type: integer + description: "Number of downstream dependencies" + - name: change_type + type: categorical + values: [config, code, infrastructure, database, network] + - name: deployment_frequency + type: float + description: "Average deployments per week" + - name: error_rate_baseline + type: float + description: "Baseline error rate before change" + - name: traffic_volume + type: float + description: "Requests per second" + - name: team_size + type: integer + - name: test_coverage + type: float + min: 0.0 + max: 1.0 + +thresholds: + low_blast_radius: 2 + medium_blast_radius: 5 + high_blast_radius: 10 + +monitoring: + drift_detection: true + retraining_trigger_accuracy: 0.82 + evaluation_metric: f1_weighted + +serving: + batch_size: 128 + max_latency_ms: 50 + cache_ttl_seconds: 300 diff --git a/config/ml_models/cost_optimizer_model/config.yaml b/config/ml_models/cost_optimizer_model/config.yaml new file mode 100644 index 0000000..5d36203 --- /dev/null +++ b/config/ml_models/cost_optimizer_model/config.yaml @@ -0,0 +1,54 @@ +model: + name: cost_optimizer_model + version: "2.1.0" + type: regression + description: "Predicts and optimizes cloud infrastructure costs" + +training: + algorithm: random_forest_regression + n_estimators: 300 + max_depth: 10 + min_samples_split: 5 + n_jobs: -1 + random_state: 42 + +features: + - name: cpu_utilization_avg + type: float + unit: percent + - name: memory_utilization_avg + type: float + unit: percent + - name: storage_gb + type: float + - name: network_egress_gb + type: float + - name: instance_type + type: categorical + - name: region + type: categorical + - name: reserved_capacity_ratio + type: float + - name: spot_eligible + type: boolean + - name: idle_hours_per_day + type: float + +optimization_targets: + primary: minimize_cost + constraints: + - min_availability: 0.999 + - max_latency_p99_ms: 200 + - min_throughput_rps: 1000 + +savings_strategies: + - right_sizing + - reserved_instances + - spot_instances + - scheduled_scaling + - storage_tiering + +monitoring: + alert_threshold_overspend_percent: 15 + forecast_horizon_days: 30 + retraining_schedule: weekly diff --git a/config/ml_models/intent_classifier_model/config.yaml b/config/ml_models/intent_classifier_model/config.yaml new file mode 100644 index 0000000..9c367c8 --- /dev/null +++ b/config/ml_models/intent_classifier_model/config.yaml @@ -0,0 +1,58 @@ +model: + name: intent_classifier_model + version: "3.0.0" + type: multiclass_classification + description: "Classifies user intent from natural language queries" + +training: + algorithm: hybrid_nb_transformer + naive_bayes_weight: 0.4 + pattern_matching_weight: 0.6 + confidence_threshold: 0.65 + ambiguity_margin: 0.1 + +intents: + - greeting + - farewell + - help + - create + - delete + - update + - query + - deploy + - monitor + - scale + - debug + - report + - train + - predict + - confirm + - deny + +preprocessing: + tokenization: word + lowercase: true + remove_stopwords: true + stemming: false + min_token_length: 2 + +augmentation: + synonym_replacement: true + back_translation: false + paraphrase_generation: false + augmentation_factor: 3 + +evaluation: + primary_metric: accuracy + secondary_metrics: + - precision_weighted + - recall_weighted + - f1_weighted + cross_validation_folds: 5 + test_split: 0.2 + +serving: + max_latency_ms: 20 + cache_enabled: true + cache_ttl_seconds: 600 + fallback_intent: help diff --git a/config/neural_networks/model_configs/bert_config.json b/config/neural_networks/model_configs/bert_config.json new file mode 100644 index 0000000..260b0cd --- /dev/null +++ b/config/neural_networks/model_configs/bert_config.json @@ -0,0 +1,50 @@ +{ + "model_type": "bert", + "architectures": ["BERTModel"], + "vocab_size": 30522, + "hidden_size": 768, + "num_hidden_layers": 12, + "num_attention_heads": 12, + "intermediate_size": 3072, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "attention_probs_dropout_prob": 0.1, + "max_position_embeddings": 512, + "type_vocab_size": 2, + "initializer_range": 0.02, + "layer_norm_eps": 1e-12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "use_cache": true, + "classifier_dropout": null, + "simulated": true, + "simulation_config": { + "embedding_dim": 64, + "n_layers": 3, + "n_heads": 4, + "max_seq_length": 128, + "feature_output_dim": 64 + }, + "fine_tuning": { + "learning_rate": 2e-5, + "batch_size": 32, + "max_epochs": 10, + "warmup_steps": 500, + "weight_decay": 0.01, + "adam_epsilon": 1e-8 + }, + "tasks": { + "classification": { + "output_dim": "num_labels", + "pooling": "cls_token" + }, + "token_classification": { + "output_dim": "num_labels", + "pooling": "per_token" + }, + "question_answering": { + "output_dim": 2, + "pooling": "per_token" + } + } +} diff --git a/config/neural_networks/model_configs/gpt_config.json b/config/neural_networks/model_configs/gpt_config.json new file mode 100644 index 0000000..e24053d --- /dev/null +++ b/config/neural_networks/model_configs/gpt_config.json @@ -0,0 +1,49 @@ +{ + "model_type": "gpt", + "architectures": ["GPTModel"], + "vocab_size": 50257, + "n_positions": 1024, + "n_embd": 768, + "n_layer": 12, + "n_head": 12, + "n_inner": 3072, + "activation_function": "gelu_new", + "resid_pdrop": 0.1, + "embd_pdrop": 0.1, + "attn_pdrop": 0.1, + "layer_norm_epsilon": 1e-5, + "initializer_range": 0.02, + "summary_type": "cls_index", + "summary_use_proj": true, + "summary_activation": null, + "summary_proj_to_labels": true, + "summary_first_dropout": 0.1, + "bos_token_id": 50256, + "eos_token_id": 50256, + "simulated": true, + "simulation_config": { + "embedding_dim": 64, + "n_layers": 3, + "n_heads": 4, + "context_length": 256, + "ngram_order": 3, + "temperature_default": 0.7 + }, + "generation": { + "max_new_tokens": 200, + "temperature": 0.7, + "top_k": 50, + "top_p": 0.9, + "repetition_penalty": 1.2, + "do_sample": true, + "num_beams": 1, + "early_stopping": false + }, + "fine_tuning": { + "learning_rate": 5e-5, + "batch_size": 8, + "gradient_accumulation_steps": 4, + "max_grad_norm": 1.0, + "num_train_epochs": 3 + } +} diff --git a/config/neural_networks/pretrained_weights/README.md b/config/neural_networks/pretrained_weights/README.md new file mode 100644 index 0000000..7579f2a --- /dev/null +++ b/config/neural_networks/pretrained_weights/README.md @@ -0,0 +1,70 @@ +# Pretrained Model Weights + +This directory stores pretrained neural network weights for the platform's simulated models. + +## Directory Structure + +``` +pretrained_weights/ +├── bert/ +│ ├── bert-base-uncased.bin # BERT base weights (~440MB) +│ ├── bert-large-uncased.bin # BERT large weights (~1.3GB) +│ └── bert-base-multilingual.bin # Multilingual BERT (~680MB) +├── gpt/ +│ ├── gpt2-small.bin # GPT-2 small weights (~548MB) +│ ├── gpt2-medium.bin # GPT-2 medium weights (~1.5GB) +│ └── gpt3-simulated.bin # Simulated GPT-3 weights +├── t5/ +│ ├── t5-small.bin # T5 small weights (~242MB) +│ └── t5-base.bin # T5 base weights (~892MB) +└── domain_specific/ + ├── infra-bert.bin # Infrastructure-tuned BERT + ├── ops-gpt.bin # DevOps-tuned GPT + └── ml-t5.bin # ML-domain-tuned T5 +``` + +## Downloading Weights + +Since this is a simulated platform, the neural network models use heuristic +implementations that do not require actual weight files. + +For production deployments with real neural networks, download weights from: + +### Hugging Face Hub + +```bash +# Install huggingface-hub +pip install huggingface-hub + +# Download BERT base +python -c "from huggingface_hub import hf_hub_download; hf_hub_download('bert-base-uncased', 'pytorch_model.bin', local_dir='bert/')" + +# Download GPT-2 +python -c "from huggingface_hub import hf_hub_download; hf_hub_download('gpt2', 'pytorch_model.bin', local_dir='gpt/')" + +# Download T5 small +python -c "from huggingface_hub import hf_hub_download; hf_hub_download('t5-small', 'pytorch_model.bin', local_dir='t5/')" +``` + +### Manual Download Links + +| Model | Source | Size | +|-------|--------|------| +| BERT Base Uncased | https://huggingface.co/bert-base-uncased | 440MB | +| GPT-2 Small | https://huggingface.co/gpt2 | 548MB | +| T5 Small | https://huggingface.co/t5-small | 242MB | + +## Weight Format + +All weights should be saved in PyTorch `.bin` format or ONNX `.onnx` format +for cross-framework compatibility. + +## Versioning + +Weight files follow semantic versioning in the format: +`{model_name}-v{major}.{minor}.{patch}.bin` + +## License + +Pretrained weights are subject to their respective model licenses. +Check individual model cards on Hugging Face for license details. diff --git a/training_data/emotional_contexts/README.md b/training_data/emotional_contexts/README.md new file mode 100644 index 0000000..dceeaaa --- /dev/null +++ b/training_data/emotional_contexts/README.md @@ -0,0 +1,67 @@ +# Emotional Contexts Training Data + +Labeled emotional context datasets for training empathy detection and +emotional intelligence models. + +## Emotion Categories + +| Category | Description | Valence | Arousal | +|----------|-------------|---------|---------| +| joy | Positive excitement and happiness | +0.8 | +0.7 | +| sadness | Low energy negative state | -0.7 | -0.5 | +| anger | High arousal negative state | -0.8 | +0.9 | +| fear | Threat-response state | -0.6 | +0.8 | +| frustration | Goal-blocked negative state | -0.5 | +0.6 | +| excitement | High energy positive state | +0.7 | +0.9 | +| neutral | Baseline state | 0.0 | 0.3 | +| overwhelm | Cognitive overload state | -0.6 | +0.7 | + +## Contextual Scenarios + +### Technical Frustration +- User encountering repeated errors +- Complex debugging sessions +- Unclear documentation + +### Achievement and Success +- Successful deployments +- Performance improvements +- Learning breakthroughs + +### Stress Under Pressure +- Incident response scenarios +- Deadline pressure +- Production outages + +### Collaborative Joy +- Team accomplishments +- Helping others succeed +- Knowledge sharing wins + +## Data Format + +```json +{ + "text": "I've been trying to fix this for 3 hours and nothing works!", + "emotion": "frustration", + "valence": -0.6, + "arousal": 0.7, + "context_type": "technical_frustration", + "stress_level": "high", + "recommended_response_tone": { + "warmth": 0.95, + "formality": 0.3, + "directness": 0.7, + "empathy_level": 1.0 + }, + "sample_empathy_response": "That sounds really frustrating. Let's tackle this together - can you share the error message?" +} +``` + +## Annotation Guidelines + +1. Annotate primary and secondary emotions +2. Rate valence (-1 to +1) and arousal (0 to 1) +3. Include contextual signals (exclamation marks, caps, urgency words) +4. Annotate recommended AI response tone +5. Minimum 300 examples per emotion category diff --git a/training_data/infrastructure_logs/README.md b/training_data/infrastructure_logs/README.md new file mode 100644 index 0000000..af4c944 --- /dev/null +++ b/training_data/infrastructure_logs/README.md @@ -0,0 +1,63 @@ +# Infrastructure Logs Training Data + +This directory contains curated infrastructure log samples for training +log analysis and anomaly detection models. + +## Data Categories + +### System Logs +- `/var/log/syslog` samples with labeled anomalies +- Kernel logs with hardware failure indicators +- Boot sequence logs for baseline patterns + +### Application Logs +- Web server access logs (Apache/Nginx format) +- Application error logs with stack traces +- Performance degradation log sequences + +### Container & Kubernetes Logs +- Pod lifecycle events (OOMKilled, CrashLoopBackOff) +- Node pressure and eviction events +- Network policy violation logs + +### Database Logs +- Slow query logs with execution plans +- Connection pool exhaustion events +- Replication lag indicators + +## Data Format + +Each log entry should follow structured format: +```json +{ + "timestamp": "2024-01-15T10:23:45.123Z", + "level": "ERROR", + "service": "api-gateway", + "message": "Connection timeout after 30s", + "labels": { + "anomaly_type": "network_latency", + "severity": "high", + "affected_services": ["user-service", "auth-service"] + } +} +``` + +## Collection Guidelines + +1. Anonymize all PII before adding to this directory +2. Include balanced samples of normal and anomalous logs +3. Label each sample with anomaly type and severity +4. Minimum 1000 samples per log category +5. Maintain 80/20 normal/anomalous ratio + +## Usage + +```python +from dataops.data_ingestion.connectors import JSONConnector, ConnectorConfig + +config = ConnectorConfig( + name="infra_logs", + connector_type="json", + connection_params={"path": "training_data/infrastructure_logs/samples.json"} +) +``` diff --git a/training_data/policy_examples/README.md b/training_data/policy_examples/README.md new file mode 100644 index 0000000..eca57a3 --- /dev/null +++ b/training_data/policy_examples/README.md @@ -0,0 +1,56 @@ +# Policy Examples Training Data + +Example policies and outcomes for reinforcement learning policy optimization. + +## Policy Types + +### Infrastructure Policies +- Auto-scaling policies with CPU/memory thresholds +- Load balancing strategies (round-robin, least-connections, weighted) +- Resource allocation decisions under capacity constraints + +### Deployment Policies +- Canary deployment traffic shifting strategies +- Rollback trigger conditions and thresholds +- Blue-green promotion criteria + +### Cost Optimization Policies +- Spot instance bidding strategies +- Reserved capacity allocation rules +- Storage tiering decision trees + +### Security Policies +- Access control rule templates +- Network policy configurations +- Secret rotation schedules + +## Data Format + +```json +{ + "policy_id": "scale-policy-001", + "policy_type": "auto_scaling", + "state": { + "cpu_utilization": 0.85, + "memory_utilization": 0.72, + "request_rate": 1500, + "current_replicas": 3 + }, + "action": { + "scale_to_replicas": 5, + "reasoning": "CPU above 80% threshold with growing request rate" + }, + "outcome": { + "reward": 0.92, + "latency_improvement_ms": 45, + "cost_impact_usd": 2.50 + } +} +``` + +## Quality Criteria + +- Include state-action-reward triples for RL training +- Cover edge cases (scale-down during traffic spikes, etc.) +- Label sub-optimal decisions for contrastive learning +- Minimum 500 examples per policy type diff --git a/training_data/user_intents/README.md b/training_data/user_intents/README.md new file mode 100644 index 0000000..29bf8b8 --- /dev/null +++ b/training_data/user_intents/README.md @@ -0,0 +1,43 @@ +# User Intents Training Data + +Labeled intent classification datasets for training the NLU intent classifier. + +## Intent Categories + +| Intent | Description | Example Utterances | +|--------|-------------|-------------------| +| greeting | User is greeting the system | "Hello", "Hi there", "Good morning" | +| help | Requesting assistance | "How do I deploy?", "What is Kubernetes?" | +| create | Creating a resource | "Create a new service", "Setup a database" | +| deploy | Deploying software | "Deploy version 2.1 to production" | +| monitor | Checking system status | "Show me the metrics", "What's the error rate?" | +| scale | Scaling infrastructure | "Scale the API to 10 replicas" | +| debug | Troubleshooting | "Why is the service failing?" | +| report | Requesting reports | "Generate a cost report for last month" | +| train | Training ML models | "Train the sentiment model on new data" | +| predict | Running inference | "Predict the blast radius of this change" | + +## Data Format + +CSV format with columns: `text`, `intent`, `confidence`, `entities` + +```csv +text,intent,confidence,entities +"Deploy my-service to production",deploy,1.0,"{""service"": ""my-service"", ""environment"": ""production""}" +"Show me the CPU usage for the past hour",monitor,0.95,"{""metric"": ""cpu_usage"", ""duration"": ""1h""}" +``` + +## Data Collection Guidelines + +1. Minimum 200 examples per intent +2. Include variations in phrasing and terminology +3. Include multi-intent utterances for edge case testing +4. Annotate slot/entity spans alongside intent labels +5. Include negative examples (out-of-scope queries) + +## Augmentation Strategies + +- Synonym substitution for technical terms +- Back-translation for linguistic diversity +- Template-based generation for structured intents +- Crowdsourced paraphrasing for natural diversity From 7713edffcb81424369f4729adb2985d22da5af52 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 22:22:07 +0000 Subject: [PATCH 5/5] Remove pycache files from git tracking Co-authored-by: Stacey77 <54900383+Stacey77@users.noreply.github.com> --- .../__pycache__/__init__.cpython-312.pyc | Bin 152 -> 0 bytes .../cnn/__pycache__/__init__.cpython-312.pyc | Bin 156 -> 0 bytes .../__pycache__/log_analyzer.cpython-312.pyc | Bin 12070 -> 0 bytes .../metric_classifier.cpython-312.pyc | Bin 12368 -> 0 bytes .../gnn/__pycache__/__init__.cpython-312.pyc | Bin 156 -> 0 bytes .../dependency_analyzer.cpython-312.pyc | Bin 9790 -> 0 bytes .../infrastructure_gnn.cpython-312.pyc | Bin 10315 -> 0 bytes .../rl/__pycache__/__init__.cpython-312.pyc | Bin 155 -> 0 bytes .../policy_optimizer.cpython-312.pyc | Bin 11408 -> 0 bytes .../resource_allocator.cpython-312.pyc | Bin 9057 -> 0 bytes .../rnn/__pycache__/__init__.cpython-312.pyc | Bin 156 -> 0 bytes .../anomaly_detector.cpython-312.pyc | Bin 11099 -> 0 bytes .../__pycache__/lstm_predictor.cpython-312.pyc | Bin 11481 -> 0 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 165 -> 0 bytes .../__pycache__/bert_model.cpython-312.pyc | Bin 12250 -> 0 bytes .../__pycache__/gpt_model.cpython-312.pyc | Bin 10118 -> 0 bytes .../__pycache__/t5_model.cpython-312.pyc | Bin 13240 -> 0 bytes 17 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 neural_networks/__pycache__/__init__.cpython-312.pyc delete mode 100644 neural_networks/cnn/__pycache__/__init__.cpython-312.pyc delete mode 100644 neural_networks/cnn/__pycache__/log_analyzer.cpython-312.pyc delete mode 100644 neural_networks/cnn/__pycache__/metric_classifier.cpython-312.pyc delete mode 100644 neural_networks/gnn/__pycache__/__init__.cpython-312.pyc delete mode 100644 neural_networks/gnn/__pycache__/dependency_analyzer.cpython-312.pyc delete mode 100644 neural_networks/gnn/__pycache__/infrastructure_gnn.cpython-312.pyc delete mode 100644 neural_networks/rl/__pycache__/__init__.cpython-312.pyc delete mode 100644 neural_networks/rl/__pycache__/policy_optimizer.cpython-312.pyc delete mode 100644 neural_networks/rl/__pycache__/resource_allocator.cpython-312.pyc delete mode 100644 neural_networks/rnn/__pycache__/__init__.cpython-312.pyc delete mode 100644 neural_networks/rnn/__pycache__/anomaly_detector.cpython-312.pyc delete mode 100644 neural_networks/rnn/__pycache__/lstm_predictor.cpython-312.pyc delete mode 100644 neural_networks/transformers/__pycache__/__init__.cpython-312.pyc delete mode 100644 neural_networks/transformers/__pycache__/bert_model.cpython-312.pyc delete mode 100644 neural_networks/transformers/__pycache__/gpt_model.cpython-312.pyc delete mode 100644 neural_networks/transformers/__pycache__/t5_model.cpython-312.pyc diff --git a/neural_networks/__pycache__/__init__.cpython-312.pyc b/neural_networks/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index de4434f497a9e56f9d598a02cfb1b26c3b42ffe9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 152 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!GS|<@&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLcu_{_Y_lK6PNg34bUHo5sJr8%i~MXW$` T7=gGL#Q4a}$jDg43}gWS18^j~ diff --git a/neural_networks/cnn/__pycache__/__init__.cpython-312.pyc b/neural_networks/cnn/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index df59ea5235917be5b101156e59cbc329e96c5bb5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 156 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!vewVY&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLcaygdE*_{_Y_lK6PNg34bUHo5sJr8%i~ XMXW%x7=gGL#Q4a}$jDg43}gWSv3n*i diff --git a/neural_networks/cnn/__pycache__/log_analyzer.cpython-312.pyc b/neural_networks/cnn/__pycache__/log_analyzer.cpython-312.pyc deleted file mode 100644 index 84c3e3c5bacba9de62346e5fe996dd2ff1dbca7b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12070 zcmcIqYj7Lab>0OQF9Ia^e(C{=lqg6fC0dmAut-T3^{`}7wk10WEfs;VO9~VSFuR~6 z!hoaNY7IK-h>YVBiq#r!l7<>PQ+DFEYU3a3IGxE%{}2Xc!fR#XnPw)@KSosR*uVOn zyI26E;Mh&3m&Cbu?`!Yl+;hHj?*3wtB{Heh4*O@WdT`+$AKG2j>} z9Vi_s8z^J>=74jge4w1?44h;dUZ)QrA4`>QV+<4qszi%u6>az{5$*VM+^`H(4{>7Y z-|+)AlGVT+=ESlqoalVpptX2gi@1T>Y-u@4D;_GXV=XIDR;8B>)U&*5U(@UMG+vyFxH z-rr~o^vT}eXhrw*oW;Fvi)s?3vtvW5;13RlRNU(wSRWz6T3 zXm(22D`QqlI5eu7MQJn;8dps|*{huMX+zeiQH%Jc=a87@ByNBQuNs~Ohe}4#um`y; zWz3}nmNJ27&8T6aC*nNG{EjupXBN?hvJxsAw_`>n*xjeJ%~g#`SXN82TkY|SG@l{q zl4=Tg&q@K=jy70|K*$?bt%`I(!t#vYe)+9Ho(x#cYvL-P48vUg~QMuO6q>~i=8L37>*#zbO$ z+j>#_W!r}B5hT+)LT-D^q6yn$X4_-IlG(65C)oMG3XkR})oYH}eY`?3wuisLN! zf&2NTO~{?*qWo}HO0v={(dA#vAo4L|IM;rdCb=T$b^Ew8r5tw}bz?%z6cwU|3!K7V zwMR`cBhLkSfeX{SSr83jo1TvH`UZ){2#$d+A!Ft+(=$}BkGm%L3`3mHa0YoXLF-*M zh=w3E#+2czL*QbTm=MMYv>=+UU~YY!K05s!Mc-?Vpb@=0W-aK!@<;Vx3p@2O443Pv zd@Ncq-|Two{Huf)C&0&(3E5gcE5KES=v?FSanxodJEm{(_M;$|!sD)PK zB-We9FD6{%-1dGZqvSFa46*L_eS2S44S-JznF#E~f%EyL$B{V3@x*4_nK~>l|A*NP z>{2aqf6Y(xJa@H0;Kuo%7%%cS4fm+`ekdi2UlIL7{_stMYUtdm@;-%@(&durjD2p~ zGb9D2OQUjcq_fB|=*%k0&OLz;6pzx|nX6And8r=cx7_b9b00MHB>CBje`)x~hJR|h z(VS}N`D}@R%}X_F`GjRHKlJ*C8-8o=tmys!oVC0cZ`7tdHaZHWNsC!HUHKp~{;v~d z;_46@%7>}-y3^tdUG3Xqr#rte5vf2?cdP^Sku24=pL=4Ty9DYhbWxJkQqReQ&mDZ` ziBsS39N+(xYVwDr5k)nQ_=Bo3AO%&M=h(q-eC@S-RX6H)X171R{>pH0<1hqhl*T;KSn?YeDl-Qi!<&(}XQ zRWBzr>br%$gHVPRNe3;H~zTutzsY%Lcd#(pT(1= zbj^TuV{%a5i5wKH(5xP+w?dUZMtMY9@lz}ySGWfTr>RF%n^dw)ZIU=b;FqYM0=3D* z4k5e1aD`BvO*BG@nqVOah|RDPOo%PxX5hGTumGU=0Rqr|;Ac6qudAca1@w6#6c{7O z?8>9`i~jJrFM{e`;5zRRU~s1|J1RCb5@LAfmk64WPL6UkaVa#FJ zPr07Pxtlx|SGAvqH6aB&ia#Q$_TbovXV4#jg`ueB9%cl1d{Qthhy0?VT09JYDsH2E z3SG()kwFlJy2Y{3P6N=TAC`&76KPnv7M*(0ZfeyGg2T|XAuh~Y>vQqW-SoUVv#vER z{FQ@k)G|~;+`(1=3I{6?&doQE4x-#}Zr-S!4d;<38>0M4D5aPooWmzyy@v=8g~*e> z(=h^&445cp#6B4DSMVlDOtAoz7a@U|FXP;S_jZ>6WACX$U; z&%lb%v$&#rE}(sNu1YSE-}P}F)mnjSP5qI!Q?eJb4c?ZON5TcUz8-ED&B$H`_5^gy zATH^Y9K;3kkMYGMPZrWv0Mc>4$`9;_J z7vA4J!_PGTL`b#uf2Pn0ez~fdpDtAf_Upqs%9X94B&~N=o2q3d3jO_bI9{B6)){!D4p_Q=_US z%QUK8Q@Dgz6_rO-lCtats*N&%bm4`w04RF^ZRHEpwKb=nX>d$_gM2fqij81_NOdvB zcmlPR{{evwbJtpS%|2nDtL}MExxM3O(Ru5Wcf0rgTu65xjN3C6^(3J?A)%}4rhBJ) zlg&Tg^wy?yRY$xps(-81g_2H@H5_~BnSZ%AJJ!J1Ev ze0^7D)#?XEuA${k=XGZqo9dr$>%UXq56D>G6+gnFPd0Ch_boV@Q;z23`VSl%7j5YJ zK?!GbYBSqBbLt;X-#mTCy8E;H7BpiJ^L=4`-)i$RY(b02w8l%&v#(fRv5`4i%8U}&HYMbdwu}YO2SXQwau0fuuYjmVxQmyG zD3V)DM0F$ax*;c`n3$aaJR>1A0Yy!51Olz7m}TYzEDI)(=zPh4PUJ-pCK+9>%Y}le z{h^b^POyJ)yolWWGwOtNT~<2P9hz-U2#Ke!Tap`Ry58D+r@kXqcSw`KFW1!b_lOs> z=5zNxZSDjS?$J~sJD3Qfa=DF2UgR(oNQh4pJ|-C!uL%3vdnqe%nBY=N&PobI3VD>X zX}HK`NfT(7(HzD$WvD_hB##sX~!) z%vm*y5$t#m@`1ZYD#Q(Y6cI6FnAt({ckw8Hlk$ZAP*4IVF&#nnD$CX_*B6ixaWa=N zSV;3LpV4Z(hN6eseO|#XUC5Q+?$VshZtDvB7v`Bq4Ef-x_tWbbd%Q%cYeWcpXf`h$ z5c#Z5b|9h9nYhAzBpiJp2&VN*4vVRFsnTJ3?7=F#saI=74Se|J0<8i9zeJdzz*4qq z4rbWM2H3;`Y-G(=wr6c+hh{6=aHD49=ua#Y&9JJi;}*A5wPQKHCg5 z*3Lg*IpOk@>}&L@g`ybxOYA1Hw3&4T+0Fd@Mp!%Cuvzaj9}nmBSw1hC)0wy>Fch<_ z@|q0Rw-7bGV|Y8e(fO^qx)<7sD`sC6wXF&^S%gLJ$&CwJF7!v5ayEbl`2)C>)i2k8 zW$8d^GL(+~DTv(Mj%pzCI#po4DngCgWzY91#SZHYq###LAyOUa+M{-McG~e{(9mjD3eEmF3aa-v% z=Y(^?+LW?3B@WM9TQl{m;KV6np3{kA=ee6P8MX%*D}%D@3qVrtyyjcv0T~z$S5YL> z;FiG!?fKJKI00K}BuIfAA)8~;azwLei5khZ!^(@pvM+n%S7+%Qd+FC-`Kzm8@-MFl z07+nLovC@4{!tTyMz((0fVwiZVYl8g$u7B)NI4M#3z4=1x+79_5vP)~+~rkVZR?db z*G3o^gFWjzy9eF4s3g{t^W4dJlIae{z$6Z{jH!8K-$Vu+^itMBA!B85B|9?Rvs_~7 zXn@d_^0y!w7>KZ>K-I}CvGCEh@lBuXp7{A+? zsdFV;CSRN@U!SRJNNh`1UEeiV)jH>B&6HP@kA0zhL#lj3@>05dD}3#>jnkp2&_Zoz zs0Mfp>MhZP_aT1 z(dK4-xvGt14KWAIP8N|Ag<#R7O4(WJT5|{k-7=40P6sV7yMlHKp^XAL)BUdRmiwnc z5;I$OedydhBTNd@rBkH|@lJWm2hQDdFv}1zi2cL*y|}(bdp!_Wv-N%kxqs?<=PyFP z(2aMPv}wjkn)J6{{lK0$<;)X0&HDuy%iY7g$cCj@d= zoASmK6k*Ps(iaKN#B5SmdvM&nl2y58CTT2im=$$*6?6YZt)hiSr}q}KMNP#`iN4-@%;=)gtNc@{acj32v7DwRr+ei|1m|sCiJXKfB}dSw=}pK!X?dT_rUp7y_&8= zyW@R#>l&snOkGF}CWmIi>ALRtk!;S~nnz|2-8z!4I~YGg{wQm*D!DFEnsRJdaCD>` z9W&w`$2Ph-?OwEU)pXa|(mwP2LPu|^qc`2LFWs{LK4-N#Q;xOq{VC_#Nn!Hb3Pm)# z|K?-q>K%lAs;`}xIC0}>X7!rHw~|$fp~Pc%YwM@|Q~spz8q`WhsG%<^@v$;qM4)`uF;n%A1{-m z+i@LGrbx2 z{!z^>z=Q)k!2*3J2?LQpzRXTzNf5e~y;fm|ksUPncC&f-w2{7HFp`6r91j}c*Uw&c zwi@nmNBjY=?13s*B6LHMQ_;n05ZqG|AeXsc>ki;u?`^v^{!yO~%JnlPsmezHpxS821-=eXR7lKXN%YyCYm0}7%Qmp2pn zCI~Ee`be^%AI-mV=HZ&{XMBXE^!g(`Ic|;`qlg}o2I2z2Ge6p z)=E?T;wG-^kwv4`v?jBuLyI3h`hcD_#}4)5ZjxU|L|=mrj?s6*a3YbIkeTWcM!ey3 zss&s{Un9ulRMVdONLW&2fvVC%sOIqaDAgfJC0mFPyUJui<8~ty2;gH}`p#IU1ESi7 zq%gRY?pQjg+C?JAh@2oo>P$0_$&;eFtk~3-FURNBJ=!O%z4Cvc9z-Lj(01VS5}yBv zJM~L$-N&5mW6u6DSN1Vi@hfiQuei=M*ZE7XH&OONuGZBQWw`=7foB7_~aUh?b=J)bz?sFh)5B?Wil~C3I diff --git a/neural_networks/cnn/__pycache__/metric_classifier.cpython-312.pyc b/neural_networks/cnn/__pycache__/metric_classifier.cpython-312.pyc deleted file mode 100644 index 196b1c62e5de7c3b3cab22efe7759c3f29aad134..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12368 zcmbt4Yj7LKd3T2caCi{_3BE}Y6eUrRNl=uimo4i_y)28WB3X`+C~x~%TW6$UVD+^xm%18%K0&g3{T@m3_4~Y@ zkS`dpk4nD4fPHsgU#H|79`$=d0&njZJfTrh=sf9>;LS5KA_hI)LFWS;s)Lpdo;DEl??+Emp$kZ&Hf@sn2@527|*wmpB>-2x8aSpm?fF z^bBlLKtLE3J$`pU2w{cPuC}j)Z zl{}Pc%9i4nN)ogsVyCzmN|A6S2}&zUir{sCJ4)T8`)+b#GrWn*5VRn$BWOji96=j` z6##D1GAjxGehC-BF4hB-wI0dshm9R>Si17gOdTSXb^#ct(iYoQ-IOlgbA9vl=ESjg zTsK^|Z9i%HQ4>}er;Ks_dSE&*!~byj=J1_kbI%`2K7VLIMOzNj3lyz8Oq0!qEUys- zkPrdU3}_u{u?ild4M8=68U$+);Ko&UC=OR_%mQpHtpk9&+I-<)T32!5g|xwP;l;G6 z@P3II zGE_ZEg`kI$B2-pbN9j;Tc~DQ-71@?=)I>m~oa%+?RTuOryJxa}0-$)JOvteGyCLLv zk_yJcMpY3uq6)(tqv{Z@IY}XFHW#6@n6uBQreG}PcC{wk@{QUMl8>ZVO{}(`;n|B^ z2nk72L<1wq@K7(6Rae>GQEfr*+J2f>U*sa1rR{i47_ddTh&IB7Vct<)Zq@cgD4yY| z1}dtD9H>Xg849r009LMOM21LCK`#AL)L7D22P2}Ew#$sj6fs6jWJDDK2B=bzrvR^q zF&e@!_w4+mW@urF&=E7Q55xXA^>v{7*+M!*%t)o|>hw}qtX^h-@-ioYYEf&1idc~m z@Chv;hj0DN6m!|iQqf9iQ3-vjuGqXP(hDf_b@+a?DryTMFOd|fir5Oj0==uDcQv+( zREv$g@hP;g&W=x61DrkCk(T_9OEi#bTqM7Ey*6-!ov zjxrH?g2nkqmk>FC*lWmI^$d`)8}M*jEGuUbIy&* zNJ0j;+wJ4yM3#Q0+d3cwgmWX}ws4zpE+l%qA$P_SlAK%pL9fRzZFA-tIia_-9Ki2> zL;Y@?`oPqCvv+3a%<0(a$&C}?xF@kC#7M+_Z zXpkrkb7XLiJQW5>l9^ov%#Oy{-DDo%GWIDGREcOTiQNbQAtfPZm=~Fr*aI(LXhL1s zwE;k8LS|>eGijYXKGl$*?^{}vW@lz1mDTjG-}=R>-x!?MZ4dqzQqi=E37DT&F#!qT zz90cvGC!I2$n;42|q5SOBmWVQo8vfZ}c-Z|-sub3K4bSFYLHs7!ANZJlO z7B@iKqIrD@a*3M&Ss|@t`iwG>-;Yu_HVGw1hEg?~X1e2aymy*Q9C_<}vS!m`aSMKN zGk^!URvA`X*eyb|0a-7hB`<}1UP<8&HJE6^8CeC^834~YnMDIbRukI{ z47kx@Srzt;$STiC$x$a-0HI6=6#K;vFL~2NX3j~nDgYhSybuTu6Y~bpc?P-(Tq|ol zUN_cr@Kl<#Av(YvRZZJ<~no!7Xz8bjrNt9=BzlGsN~@ zGK}w>H&kEQKCwN%F=bdj-kV<4KGXUa!sRt{`i7*wA--`|zx;O9`0n{y$iMmO_jb={ zzI!nC^7y{Ar8eH4w6u;NnAe#u3zIu94PII{zH8o8d&NEBj(;U(vXAeb*PCWdy+7W4 zhq<%&Zw^B?rSAn?Msv#SoLM((cBYKZ+qR@}!}y-G(LA|w;vi(ST&(-G(X_dG(ldE7 z7LKu)^-hYhS7YqF)t0gyc(3ZtvAaF*w#?cNq^t*Gn)#}lRL!Q_n%SC7sj5wPRwS#o z#kh1;eSCkiYQ+TiaaB!>n>SfN2JDPmr}kd@#`xZJW%c+${M6pbBNH#Yc7A^M{`al# zAOF{e)b3-ysDJ5>C%!S!I=v;)KXc@E%}rNw)#eWxw%n_KDOLd>y?W!EWz!8dK_|Mu zQ#E-6kiPU*!#&HUm^x|M^x^j2N$)k+b@#M;=A~5g`rCZ6`PmO@o_oJ7W|*(3kGI8N zo$+XCt{mjNkT9tA00;N)Ke*92JH5{iO ze`MJtp;`WOt!*F8{(O1eK8?l;@419r2#H?o1(#C+37_IpG}yDQPPWc$be-_M<{o;j`D#$;m+B)V-(SSV5A#P31UF-yCq*( z0B_bS1VUoa$4d@IbU> zri_ADsHQ0RjuL`-gAmsdfb_#lUx&;^q)ESLU@Nt5yNOfiN^sH zv9)Y+OMV{@!CCi6Fz63Ame9Ab@)&kL2Vk6m45$9H7u+hd!xa zc1=obeEZdzV{@I)C*i;4`MY$oWoN2>*Z)9(rG@DeyLbIaEy;eXk> zd$#o{OV91l+@^iWP5V-oeGe&Yj{%`x`mypNVT9plbl1-HY>^JYAPCU`gFwyKbwGb8 zP|^^TmnOa-=#r;!Ry)vCo@Sw3cqL0;bPIc_6R;EzETM*gCZ#E&Di@r~^=I>el?aOj?bfM-cm9k7=kaTc3YsS0+2R{LUE5a-9yi zPTd!7E8-S=zb_QRV-7g)SjpQZayTFr=>`_QLY#5buQ}a@K^wZ9)X6yH5VR>!)()Dm zIha>#0f|iaFI5*SipJ%Hag|ZeD66Hseyi_E9J$sO_gr6zLK!5`rU!XszNKlA~P~v_B0R%*JqWh*qn~@d4 zryLo<;{~-ZBn(U9UaTcDw--d$p+yPtaXI_i0IxX*^qU#mS_QNYTbGdE1#Kk?jCTkj&2tr3 z3=@WXwk_|izjOB8t+U+z`R=Xn9ZPlZ0>8{!_mEQQI$}H1)%A1L?aAu)8TyCZO)gd4 z9osu^Tah@PvUSYadXl!Dlx<@SJVV>$k*hCHy__^Q&ly)FjVlr_-8Vk7pamog9M$T0 zr{YFM%D(m?#p;|%V^eI;X6t)WH9fI?pR}$@oc->yPgQhXce<$=6hwX7 zb@Q}21(fy8w(q!Kw*&M*U3Y9hd3?OGGuC_M@WkQxo)3)67j%I5k)G0-v$N{C%l?b` zXXd#b$C5jarFI-oJ@@i`?yHXMVwSFg_d_IGzzv zYl#?E%;X0$-@QxBk?h+JaVIv>5`It^D$h%25rrQBM1F#!0p6i6uvQ{i8O2KdNkIa3 zz|&LU9n6EvK{!Pm$9fgK$}IS{{SxwSjx91=p&bzgJOd6_7js6Yco4cs*8qUOX|B3* z{*CkLs@Al%dd}LGw6-PormW9E7{g*u)TAt{=PYZJmbEF%x|llM;!JW)G5zHJbW7Xy z^V8?&T6&T#J*k$B$V2NU4#Q^OoUW{yt6ZL}T%Oo4vpQ9|KDHZ;*{@#u!*u(anLY6} zQylWjI^Y#+eY|_hm{29nZ5URmzqTP^OPqdt+dZ@69_Pp$iqQ|yq5)Iy1UzJw51j31 zFvs=<1A;@N*z}-Kl*ko1IU^0kCZ!vZWuzXvk@}mgI0pG}B{{n;JlQ^>biGur>t|MS z-u^W>`|hwS2i`!uDnyD(g&fhzTDO}Idg16A!w9mG1OdHp5A3<9sUS&F$8F* z6ddG%VM2)rP`pZA04`D=vd`fO-3C}NP31#|RktjvAh$qrMpoTRFB*;NSLjDg2K5>R z7h*uAkxRIQkmyCkTM&uSLePLE(HI7vEW~6W65~?q;HrdL*~m`?I8Mc%LM2$soa9)g*Du*0gud` z^Zdy2g;$kMwY*_>47e@7XuIKq;+qJ_ZvGDBKq@-00GX@EM#C0iTj^KOOhbt$VF;%{ zO>ceAd$;{v|15Vf%^9z76WkT!gmLoaS*|%<*91W^)8{Pgp988VX|wWrtlW)-tQI3$ ziixV8vLE48sqC!w<*unE1f<44p?ro zzC=(EF@~#;LI0DZ17Zc_K`P>c%NoVZQuM>?P?9~#$p#=ZanZ3dyES8P z5E8t^0ThJ+xS?eC4&p^4352gbb1N>`ZwE_$07CIAd154qyI@Y_1T9><7Y!iv9E-a< zbfsd3_!%!^$6f@)+1P`*GXS7DkvhmBvgWKWzz5IDCS@oP;{k&~B4Hp|b5a0*N08N? z{t&o7Iy|g)<3X2fg0neyXiyZS!Jwa)bbu*mQ7IvO8&BryRz~VpurK9fXSrr5?4^GP z0D;1+E3)UGr1v-er!Ci9bIqN}=FXY^+2#%Jdt-;v<~l$YO*VF<%pGY<^PFWx(y}6P zQ;bC%ncHaFelniL^imd*cLVe`y}2Srbpi$rEO7O-=^ zCG)v0;j+$9j(K1~Gbv|Ku%A~O)xag6oW?AxC)L2(86yPPG{=hNBU}--oNy>&b*>0U zIfiD-v3U^=T@eo44)%r|4Etg!_$9KL_Q-Q6~BkMzXTv54RfrV4e*=2!Ql~)D8O|Uyv8eq?WOwI zS<(Wn8|kM2fQ3!X5HlK_9!#0qF^XUKmbO-1ZJ26!vneiK4^M~Rj>L9BlZwVUxUW*t zKEurHN>y~h!GNtc&Ro|{YZJ@gX}{5)taGGnYhpS$H=Y!)o|!t6VBXQ(&`h0ARkg!O zadln1@_OBLU1HZe`)}+|)_0_;J7fA!8d?&n#F4i<=j)fn$7Whn^&Rtd&GEq*I#su3 z9xC-SJ5vqL-|EyB{Q{-d=?TB06Z;HIL%f9GG5|0k;+QugAXVSSQu=`n9~vbTMBJyh zvBewNW?>z1ou$U0#J#Wrk^Ll(B+P30n&i7RoLh_$WQkki=r27x`^)!O%-Z=x@q--3ZI?NKf)(j z5`q#|f$YV+pV$Z=fom9l0uQ{g+&_vpx52MNfhVG;FKaQ32GLKVli=oc_OiAhi7Ylk zK_zQKV-8LBAgrXfPm&V6RZJZa@gVNP=_9BB8yfEG7;|;Td#g z$e2sk=QD1ra;h%$UcReVHQe?_hS73EA(&RpIi*`Mu36AfjNwy8MK{86Kivr4CSzD2uSQZ~r19P=!b}y(E^DDT Zf7(lT(rbQ80r>QrbO)_}NC6J*RnD@1!*REFZfsW6!o9@VkX|m(A!@^<{l+cW0XWoOqh-^ zV+@UXHq4H(@Z`c=gdgK0mN5${=fl>BFeXH7W44HW%pP%!IU>$6C#kc9%Od4t9TAx+*KTf&p2S{I-jJnFIPg3lNAM`WK* zxBGmNm^2l}ywm4e_=;~KkR5+R=ktbN*OXn;(&j3ACO;BU)!9& zypScNu4(pSQcZJB+1iHLk>t5F*N|;+L)p{@=T=(G&ZRf#FQK}4gYa0(-0$f8A?77U zwn${7NS(E)PLMdLBir^wKbvHMIxBA56Loe{M>crc=CSJ)g)KdgrxgxWg(Q#rr^1?V z+#k?l%5=N}&*1B@*S#X<-8wuiv&Pgzq(uVI%-HE60(}P!c)qhLYkVUJj`f(&y$0T!Dvtm`e9#W$-|Mw*Li|@x+NT& zn2;5n3r5FdaHcp_4v(uixTxSxp{!$XpD*6@BxA2xi94&}xtOIk>pIePT}ke)eN!lZ z*D>c~g-C;)B$QJQ#ia=vW#=jF z^XpdVQECMf^aN$95Bh1mgaQvU_iSDoQM}kbgvhijNlbz+j0=0mj(S*~Lp;_U0nkH1 zz}%oVtqY1g8HN`%UVU1UKwIU6TI}vryFFH=7Z@AjhvKH&RawgcgCTl-SPBa0`qYGn zSNiddLy*>;IYvYWC@kn*~*MEcS>+)gwB-Exh$=m*%11(g43vJObLw{ zp(7=9EWf&Pa6|Cop>obS*XukhaI6j($I1dDnhAx39IH=^V?`&Japo*_4VG!flCV7G zu#`Alw`i5V+YIfC4{dIggvA7Zn!4-dC@n9V2^bF#a)JjXkn1^Peafh%juk4fT`6HL zjP?{x&Rt#LH9@05hDz~cp2@Gg1V84f{E9$Gt2BFtYRp{WiH9E7=d+w}63~K!&np$s zNWs3l42Ad}B2kV0E-!ev9miUkCZH4(8tLgpT&0X0$xN_QuTF2*_7ekxQ7nTtg@$Kn>P z@DtPoB{3f|S1E}D4b06j3~GwfJW7aDVeIeem&fVC+zb7T(LH?p=$VnLq7-{83f7J6 zkAOoViBprgmtbTPj4&}aF8cHMJnj#Mrxe*6cNtayx*dgf1icyEUf`$;Cc8lKYU65w zDAC*h2N_sxRL&%s*1`;<<<}jwSzZ`t*a>QeOR&oI1X~Kk^8I;O&pd#c+yOvf^-T>0 z0D3G3pbP;{c@Vi(LIpsK%gNp+Bkj|@`U(s~yt)LpzWJ%}22b+?0iz2IFHiH(7 zNwUsFudDd+ISDs(9(#RUjeAQ;wPDZsjD7T;2**H6tEars{uD4#-v$CUkl6k)pFF== z+qm%JLievnljpK^4at!Q)!=nB-8;;KCJXR-~A z%MD*O^kiLommG_Z$cMO)1gGw!DE44G9{0vCU2!g4hin*02?LPHe~ zS`iHHSdG#!xTDEYC)n*OmknVxo_pIwCl*XJX>ksqF9^Vz^=T+ zz`(fG@DcPj1*NyC+stkDHg}u9ZFx416_loyHwjD`x?`E3?(ikbIVfi&HpwK}B$uR; ze9|&*0UyJ8hbLEL9ve z2bk*N;=N}_uJ&E`gJ&QHrUPLNnP_qV;}>Gjt9~WqkLJT99&fy5Bp8(t#{xkmFctPI zg$PSfR(Ci#xX=W)zJg)jhvl8Kg^D)9b1m1Kb_uoCNF=B0ovzvMH_8~~<7xxp%%pyTk}Gi-vLLw*Dq;y->OwGzwSk)kXd&`L{vz~MQHl`U zXGtRXuDk(d@q;DseH>=zGY>bKcxnOTVVpllhW`TOe`cwyv*z8)bC(ym4QET%S@rIf zxho5`8_qphXKf*0@4gp$FO;e8N!9mc>U&f5y(_P-9!%FCen<&rKc?rIY)#$5pQUO# z9)MNIyni0_#KO_Pek0q~{dfC+yMN{S$Nlq{v(2r``{V-c8UG<%{L(-N)B{`cJQa{`!}7>60(tcfGPzg(GcMQ)SifUYxsl_cBZ{ z<7`ejn-^Za?`$))24^fb>I&s$AUZc1i^||#7Iefg*7YFCYYUybDNn1=svnEd7gw$! z!R1waNPdolYy;xK&&453hK{X%a;@W?&$T7504w!PApcvWMg}&7Lz~TeKRB1PW}8}) z{AP>z!D~qYv`EtO9anB^e@Fp&BvQ4m1#Zc{XwSIzr(F9pt^+C8fwb#jx@G`P@AVv&%{9)NLGA9sE43~Q9|ko_zPMd zU^NVflOzd={1u2~GdY9DL?X*~vk+#$k043IP%B9ZI)u@;UC9J<$^H)YJBP{3{C?2< z&e3@Da4a%8r4e;rh&PF9AO;5VH>f$3AtVDp{sjM!z8;+Tn?+0y%*;OzD!B09pqH{k z|Dy{-^gm(>g!kY!1847`U~pzsks(i_!Vq+%-!6(cJ{C@d{gLaE|5SXi1R4a$oMm58 zgv@8mxB%2912o1}JifEkTI<^B&pX!YW{oH`49Ws@c`6F{rn3?M+d3PTqXm330%)kV zj2OaCAgA+6Y$_^2^I%lz#sZ5PQ;2&3PDVua*bEh9F=nOHQ38y*4K$P_Pbjji+aaoM z<}HRNp>t@1fyY7?9~~=HStJ=~>3;f$$NPTJIz@oF>W1wFKgG1cA+k1*ZSPE4Hk}o} zVdu}@yY$|rOl^0nwmV(fv(lP&_9p3UO~buo?;Xq3bfs##9#R(Dq2%!9PU}0<{6ebv zh1K>|CEa{1?K+;kxY-C=W@&73EYs4TYU$6k45V5HR)^QB(=8{`jVF_rHtSpqwVB4= zRAcYT)pX+vtB@N^*9|2{c8s^IrfT+QYWh<({i|hbYPx1Pc{U43KJeb9WzUNLw|%Pv zsn)}(+QVyIU%c@7zAxF-OP5o%my_o<-FrV_mxovCKCW1;O1lS=Bl81yE^n2=^dFT| zm9@VaSvb6OV)4WWr#}6~M(tp_VrX3$dJ^W*h#+pm8$S@{!3D>V2SCB@WQr(&HUnVX zMQlR4lp`*I15wgi&D(b$KzQV%{2N$#J>WK%jQ!)q+OxeE!ne!F=VMA!vTiB`z2Bv}$X zte-Vu0V;gNoAZLp0Bb@(xRz|#AME@)-^;N8_itf0XtnAoy~~P5 zqyZhvN8@ehWw;N8duMR96u9NX`#sF7Q*c|OlI!Sr(-{au-7MjWO&=4L*I?RluU8y~ zi|e2yD{uv*DR5I8Kra5MDBqH!T7N`VRsV!6lDLR>Vw|Ku4nC&4^yc$uQI3>PI{ zv4D%YEB8gE6OSZ1*Ap}Q;Xb4L=8#2y4DR5e<`&P#P;pV3|*E-YQq4mDg zTNHhU9t2u7_}?s3Hbif+*kwzgs_QdV2U1lB(p3k6ms?!wUF`k9`;fBRPSbOjlEd?L z1k;z?i|$P0zEtDBOrtl|=v`4)&!iiVr0b3*M>gvl&7gO0%H5lBA4s_mtk$evPrF}A z*B?us&sNmVt9Sk)TiKMUY)e(PW$POj*!ijT=A&zMYgU-m5IsV}qi%$L#4=UqX=0x* zSW^{yms`^n?N4?*wEEN43u_}^9Qncn{jLB5>^?-p$5oxCx2kaV-_}!=P5C8R-+OJt z`9`+Fy)GC(5r7|n5Z_4g!&<{#oih>)C^0iUHjev^Rt-3=M=(!(siXNET2TsKcauvt zokf8}PbZshp$Mm zTt&HhhuUTs{xF=XV;AV`slhGQ#=nql?J}N6j~mak7q+o{tDYLBFVfk;Gh3{kzd~pC zcH}ZgPUJG@FK?5oZC5>S-#*a9*KZ$l@TciV?s~q-mS`qhiy^$Dk2iHE<4u>tYG7ZHK&w2&;*TfH3VdfrD^&BHS&9^?cb^L zKTth?pa$+!1HY$QzP7oO4H=s|WpghaP1`zVEg%z&<2#0@o3zrQe$gg7BzghKf!fnrLakd}sa*|a&I@k|^Cf21>$LVWga zq;it!a>7w}mE_t}Qtjy)t}7LET0ymymabOnp6;Kx35}U$wMbWKkN;9qT}A)7d!BbZ zwqu5C?{qKZnfLpB=bh){_dKuv&Ec?8kb;TXz)&Sc{W})4WK=Ws##fM;rFd$X;%VLx zq(g>b1C4nm$P6>&$quvdWP`?#Y1kAp51UE5G0251!Yh$MRQM~^dnj=4VMWf19hC@&6g>j`^ccyew67tT%KvQK&$nM)(SGx z25ok&ZMc%uIiRj6TURy3xQe6ukBi=kG3TH#DSCtOkS|8W3(nDq=nRBMMXw}_lRkM; z6r6HoA`*=Fr<~rfH#j8)q^^g!9~Z6Iyy0*}_R4`sSQ>O06btW_y}qDVk`&`;KnU_K zqr#pFNV3AZ13p<{h6GvRUYfuG-k@SUJ2?>)U>+kcyg4cOGH*#9j9Q>KEh_!a`g8xBG($uXG2bLH8~)US-$b2RcM=w~MVxM34-fs!>-GLw=mQ{qU;o+(*K z$$_P*B3H3uIe~{U7~utlfg@4OVaR#{yka34d1_+wzVVB}-sz1Z$N(qNe&C}h@YfLK zVg~;Ll_Wjn;BlQ2E8ww#HQycU9*cy8ZgDak7Q}A+7Tu!PzgHz;@{v4Y^&?5${&2W^ z^H=gfRoBGScih3CH*}u&9*UM_ciyGdmB2!h9mq6wyYF_(ovoj^rYRAG0Y7FjFN}I8 zgR*DT>ysnmRMduF?)A}N#4C>|+^FD1Zji(>=nkJ3s};^SG3l9<13|?W5<)OYEm#Fn zjEEl5D+`J>2sG@Q@`QjztP1DxguNlb<58?0Pbk7q1~G5Vc!=?NjOEvbP8CHEd1oI=b56o*qbC zSYh4is#?eiD{O7Lwqg3kWaSFmkglnpKAAMGu=SbTDqF8EfOG4!=_zRWJ~ri1br^vW zO@!IZ!$wl#NQng|#Wo9JJ*&%>VZCF#Ul5U*fd53F!tj!u;iC0^u@37^7lnX-EK_^< z+F_;Wyf@5W4Dj-pCv<*7BBwOyGK*WFx9CK&70EUvZAjXY=$&OVHXx~nKMA|RSz7FG z52OjHsGfcyacqUHPB(0u9!x@RTj7b}VEq%r*>Wew)1Y2KXnDhB<1lNX)PKC0XD+kD z#wpgtDcteF!BgHTL5%Jn3WWVZ!5I=H$?F$7CqUysjWdX?`<>pm1P~oTa)$9>&POIi zr^dZqzKlwR#Zhl$gcW&umWt6a%7gVJO@UAhqLRyu_3(iZ=r~pqf}>&=bdhkl(@D4v z`8p5|$R1C$RNH2kR<#{Fqw$%h()Fzg_AXaL*vSWqBAYCbgEGAlhG}Q102OCq^mxWB zWP3qIGX`!dNskm$)Em%0&c=-~!+8FDnISROPicxjZi<=2Td0y^)Hs_zrZeYW>Rfcm^(>UtLf)q3Ay>rY;b`NiY&nWz9v%`LN!hRfaYj~gj*r%Tt z?97(g8OU7R_B}hZ<*ivu*0jYOF($^M7D>{0E-_{Z&X9(`xc#D*vu1O)isrXIBKOqkQPO$+t&_3t%qP!3D;l@p1h$qLZ) z3)S=0i{*>wS882}llO{i)75oJVUAyG>Po@?s;(rHZrt&RVoIBn$6$)N$@zxl^Y@*u zrEMQ~T<=)kbNq(9w)c2y@A2jKffeV8M-*MzJkQS2bG@Jd=Z?H*THHEsd*Adm1NG}h zm}Z@$o_Y4ms%MwpOuqSk-(2*){*~(X<*H|s43Mu{+Lv~(bsR`_99U^N_@4~ja47BE z`VB)hZe2Jre`1mUF!Vv_#*tL(p4;WO&wNsqYVE(H6)7P zCQ$%<5LVd1NLX;0#C^~qT7j_db-+zKomn(8BJM9}A~*v&jf{u`(DYqZZd4uw8G%VR zTQJ4!@$eBJ0E8YGCcc2A01WKJIwVf`lW_QD>K^OU5=Roxx|9`x|_! zb{44S$C zH3JHlC|6LKaVAf-(pn!-W5q104F=}My^tiMOp-KSs^xM|o`y9t`=F^QP2Dg=$wYTP z@xI_|1mu7>7?1?sc`+c5Ia_$uj7u&H;V!~RictUDD&#aCN|gyT9ka?UO5ej(=G-aHI6+mlkbzt9O1;H2_Vv`&`jA+ZEe##q)Pg-sN6; z{D`5dPtXzq+J7!PQqOLJBWPsF_eL{0OUc=<3qT{w|2ghc+EZ@stt7--9-UQw8ZMBZw)4HhV#L zDDp;x4^J6KKsw6Yc$VXHGr$-fpnygf`lHq?NaN^a6(s#Ra&V2NWOQgq8b^##ka)BH z3{Z=HK|%URQ(+*LE>1Q0B68J3WV45u6J40Ye$m<^JP#6yHru(QMcUzPLr?d)7Rd#c z{u%6x8&U;S&nH9eFCwTSBCA*>LC(0B;u)w#kN^CnU&J$X8B}?F4)U8JwU_5TxC1>I z)!L5Ys8t}-)O}k~LcSKi5?`@3F0yORo|Loa3tLaxUUKd9JE!M-bFVJ)Ypr|0ihf#t z`|NK=J{eir_hPE`#Z=>|l>OA-ole(pS$J;#xdeO7amA7Jra;>3x2)B7r0PM+^XnG+ z=lh?mS$Zv1-qI4C zzhYmm?77i=Yv;|Kci7L1J}Uxsp?V4VqK}G$sEDiZ)xcw+#-XAx>G}a#wNB$GRD=a- zJd5#j4Zun|qM2h+{gw2DQ&U#|%hh$n{Cii|*>P@lIjaW7vzR~kzF@v|-xz9jcCtyN z41>{fRcO#uIa?%W=OLgjPJ9XGS4`fC2_ej@7VO7RBeo!EMS>C`4g<-Eg2EDOhu=Nx zULwC^?bBod+Xi8P^fy4jUID8C>#cFMDXw;|d6nBz$b19y0|0+H*BaN8;(BgW+^V@* zv&!xNf!(q!+rH%fc;NcL4gS{H&9S>&zsj;%%LSXkgCi{ad&06Z6xF21JUlHUi<1Ow zs$)sCPnkm*`u)bUs7ISG-*vu5Uo-fa0N|66wv1a!#|<$$4+j}^Ed~mYjy=dZy6+mU zwu2+a#Bn}K&q{zRd=@Yl8)L^2MHeLfw}DzjwgpK$13+lZ|NOoqpstDYO+%o-_>?th z)af~-ceNf|@z?SkNG5KG(<+i{eFfr_7;*v3iZNq!#+(?2aYi8cf*_7Kf-alN+^@jI zs@WihVHXq?7C@hzXGRccsTdyp0|5I3#i2=6=&<6z`8-+}M==si2VqEvN=g#43Bo!} zTI`t}d2mP3rY8|zemi^%O8OiKSP%`Z9sJaHr}^{t&)R*;F?CAI~`ayJU$KKSA zy(>HReaq0b2ktd)pBq}-y3SB_jqme|y&vxXVE=Mg|B}4c-Jk02U*3LrrRhkj_6T@s zwFeRo@X~4zJgBUleSPM2@GC97iDT)8tqZ5;PcJop+15YQ^#6UZ>p(x zrKt}bOGodFHOVAjO?UJp*;(t1b*^uw_-;uX_?C`d@GUL9;9Hh7E^`f!Hyp4CIgQWE zoktr?zI-`EwMgW!859ekODL*IUk)`nBMLh!liHY^EYXyoiX%$Wk=5_v$zNo65fH+PZ)lv+cUy z)f%HkTa4yyxTLlcd}Dej@772yyBvejO1B^jJ{cwbd_*_muyvIl%G7+){}8dpCQd}d2AWU1BZB-w6TzN!%-k@|Sd-vgov6;J%#oom;AM9RinO3h2uG0#0 zL70k`osCT7z2$ze-UJ|H=n6s_MR}@h6t|dFWvXUFttIv$DFDPHPzNEi$YdCzU{XYs zGrCPNNxlf)sGyD$5&kMBUhs>8pcvJbLcLb@tC6-+KY29l@aI##)zVC_|i zu*%h^OUo0cbZz58-F)3ztt(aQTB+@LM6s4#YSiq0WzAg0Ld|^5Lc@H+qHk%}N^RFl zWq0C*G-$PvnUTezRArkM#=7lGbsR}m9!Xc%&;E4gr%UCj>Ybplsyfp>dp_NMoB!?j zC*!~A{)0QU|K(KAndMi{rMk~y`;Y0wu?P0DYp1WAp6mNZrPdpz*IldJ$#ijL!U2leO`oK-;*$aQER=r!2kArb z1PwXBNBlx$?1JdWxH zI^M7@x-sW;HsGLRfP=Ug7c;@R(O2OaYhY)HaLmnpkY~fn_t7Jp*ku^xwzM_*t~fe9-;_!sJTpo(Y6-?VIFm*PlTuz z=pTSY@|fft6(b?ljg}+i`S8CWMGc*Y#1Jeao&(}?Me?czGc@57GE9irb+&4FUIa^XC@cn15qw$BkVpje8zZCP#PDk}hvp zbbnFamfo^`acc2GYD;f&AYIqA;F@ZdneYK`5 zUDLQ=pSLfbyIa$>?trNtl~TnO>rIrU2>r94O9?Bg-r|bnD@ku+YFWD(%m;4Su;3un z{(k~DxMTdk0k^-_18%$l1SjiV!`ICu6<$G`j@z^edg-W;Jm{ql-ADj02eC9j18OuI zaEq}yNQFuqt?kwG2&X`3xxr}5%c8);H3iD;s5cPApS*|xN&P8|HyBhs8F2>;^N*7B zWxqIvCGvuae>p4$ASYrZW|P0O86+r7C=f1$2IS6t0tXt2)B;R^mGcn+ODfP-5y4Z% zu3|#nGW=A7?AL>xqODKLonF7oFqiaCK=K6dz$$le6BZ~bOBkQT0*s|Cg9XZ~k`hF) z(0y^I8hUQ*Uh6xW>N|RePxYNlHJrquTjsYc^`&aNz^pfRr+c5dwdLlPJ9|>SC;o6W z)%z0GzD#dWbj2Au;rZkCAYtGa@`Cop;uFxuoJ@#KQ1CSPU4U7tW0X$ z@i^qgAtWR>fVmUc&Jr-C{tzU;7EuuSH3PY3Qu`Xz{7D?A=A+f*(uw>wNxiyKe*%N| zPeiFAAFF{IlEW3E#bgX2K)Kfxp5Tox~sZz%3ehhm^P%Yt49}0s^7fvcKp8oXTQ5vZ7*wXfQO3Caf_d;gDd+ zMZ|cIV)I3UL3AA8mti6?WEQwmK1Hr+)B_+vF9!~2MuInPm5j)3u8RsDc(q0u)dE zBZZxXk)qC`h^Ny->g?g-h_};AQw$~9dm7D2WUf-_ZI}n9&N7K-sN ztzW3la--G>wFN6`D~wtf)Vf#HR+4cGp{~fR>#QPW9w;le%2tswFO-#7Wz~Z=e`&n+ zc_|d_ipfDKB1KhSSQ6xDDBA7ojfF$OL0_y_4MjqVpoU^mU%!Gis&KwPEXcmoTg=8= zzrc6#)0#_&Mq?^zthDZg4f~v-K^!J7(jei!N z5{n99&DIhfgmE}g>g$(+#tM;(O~0X+HbQ2ClBiA^c9?kq_EoZp%mFAB?V{}p?yN&( z;mQqieqQ6wzM{0;v@?Xct7oVg z8sCwWD$ zsaTi4XR5_zT3$s{@aotrCk@g0N-x5WoT21mXzaIX1tCxp zMHQk!NiKvUePtNA6!IDuQ!hxef)DliWH`W-W)B3UK6pR3`F?nZjg_?^@aBxW z4j_sJRp*GRJ4HLFxI-)u9dP9bdB01uw*>JR$*W;V%_)$ZKuCo1B z55|k|_`Rg4@@5~LxU-tQSCECs|IxY7Re_OFJ!L-%TpImU=nIF-8TaVZwNx6HcbQ7J zUu#c!8s_*0vWEX}DlJ#xDibfRcP@r+sSP^zj-h}s-Ldxs$7Egh` zv}B$rD&dbz0d&$+@_PHHb@g|f=j-;4980vP*uAKtji{o}8Y+r?eyECyHc&{GC?p3t zJINs)J4lWPEOx?O0p1O|{BEtFLl&Y+h^Q%^35^4&0C>iSYj#~_!Bm7}f{G`4K#)bP zASzu}1Ek$C89@QoAr_U|{aoIa+5;_e7iBw^p}_9uqPU7*Z%eyjWzuf%>!;Gi2i)7EF&rq_1#5FL;q5M#xEw5eFv}jgcLUkxO$wFZG6n z!6W@$U6Q;^p%D+@56BA{H_Hgo@6#p%3{e3zEhG(6ex@Ccwml+T4ogu#BfFqna|F(l z!Qq}DE*}(ng%+^@Mz)&=#0_>J&&j^m9UW_WHAgBTUu5Os#JHNU)!M^EUc@C{< z?}Nq@Xg1)Qn|iQSP8zaailzn)Z?6;;%{M4eEtOgJ4s36~av|(d>#4LAu0-fkEkOWs8Ksef51QKYvEk9-n;lb)?`^)dIaRmqgVMX5|5*BQ$8TT$ z zsARl(;=r{7DUWZC_mN4$5eXG<09a*Z1R-J@4L;BgAOgitVKD&+1a0{w2q^s#=FyOn z#_?h#tuJN3>ns6mzatlc4q}fM6u98>2^2X1y%)<+G{K*OD1C)`1Yo>-$xd?{QM^Du z0=bsVpR^1@BP$t)iC$`hqAUcWJ8e$NNr_IBa4i5F;sAhM1`fG(91;sfjxdKJ!XS9j zLl}e;7(}sF_#9#V?K&%sZ|bn9epi2#@Cly+{6P|Zg5nc=QAtrDk5*}GkhNw=flp){ zDqM4b1sagcsvcspacsy2ci5h!5FD(4@rGbB3}9e2XEdN22SsDTvc`1_k%*u%fj-zO z)O-0^5L%(>kOX8jDyN}L!D;y*>DCeVCtk7aEiKSP@KJEfN2mv@>J#kMqIAg`xH!_~ z)o>|DH*E$GbgYCBoG<**OFD!AONN5nG|b?8=L3~Xv|K0B-LOXa*mWErCx5>o23Pmn z>X4;?i^8;jZqCGSBV1g59ty~MK>=AigaFNUQ5sb6P0Y0dX5KBSfxdWI&T^5`BhXU8 z)dZ9+DjkcCMiaKQt7u%D=)TsSbk(J6*G%r7YJTs~twYJ09f{VlMx>*a@SFm(By&}f*pM`2ad+IsIFt>uLoDb4-Gjz8C-oHP6u>R+VjP|^ z%>9sFmzm29_JsK%^Ug^_9GMNwGH44D9a51^k~GAb>jCEH1t{{nWb_(k)Dd|%3Itr4 zkSsi3nhp4eyc5d^(|?YX3Fj5RU01HBu)y9IxFm!X%}%IXpJrDD8PrJQRWTsRa!gim zoI$?iMx@z*+)V_s= z=43-KDnWOFB3|L&XH3?R**?Fm zeMOi9j=LV__ysdTjner}$tYtEF+{Y_q#-oX190$ZA%hw^V2q5AJ^O|~5EdfmMd46< z({ji-4|c-9@2v;Iu^_0zA;{>B5snm8!2cSdW?Mfx`Qgdmaj66Czw1jLI6b0!Hhx3-+6kz^!b$M%p8A4-_UGiu{LxFdjAmmdmGwgYNf{LF(zmWLB!^)m!e(< zTpOl`nT!f#i1H9~ow-`d!l@l*hUqiVgEsO5X7q(%yT*l7DWYgLAc+RVYxG5pzO)?P zor)xfgfI}rc;)h?u_`)Xh6XoGuWx#<`&RdRkz0{p#TGn!Zith@l=m&$c*}MBBz?`9 z^6Z)8_k0Z(VsU?2vDd!Lzp+*-0b@dN7J_$S3Xq!4=Nx3Mj%|!JRtK08Ssfc-DAQvR zaa=_`Wzcqrg_T-GR*D@)eqkN0L37ohd;mQt84Zb_)ocQ+)&Qi?2gF`K8U*kg?$8AYh)#+FNmL2x-50A;YA@3`lSH6UrX51B-p}gCOa3u5j5p=Hp$|h12tW$B5F0Iie{h}X_ZR3Y zjCP)EHpC~1V?w+k&8~+yG#3Pgq~1U<)*n^1!hj0DJcI*A(8wN+b%S}(Sp0FrIKcX# z5S#zm-Z1mD23SkW-`%=GZAl&WZjd)Hz`{q-nj z(lxaScAl?F*ZJ^Ty~r1hxkg=cWlw$Vzt6YnCdFE97}<(P{5#yBNx7WYofxvk;n)V5 zEVl|<=Ap$RZM3uu7tlf$2G2=7)-Go%GMWN92igSWmAh?dib)e_N(F+dryiJ7ZbA*p zH?NFt5?!&ie}NN>mz{w<^{o^4`ShJUxNd867dYClLZyrpS{_8vf?_)gG`;wP(fR&v zlXDV;BAk?vRYF^lv94g5j@WrX>r0jew2t3aqoNyoA>V)v=OK_(eUGo6>`L;D_xWmw zE-mm?_xP$wpni4tVUHo)1bYjGNxtf4_0-eTd+zR;-S@GYTGckspTez&Xw+Q!rs&mUXKDo`EBaX$eol1&lwFQmB<`M32VsDw-pev6-&Z`1RSV?Je=9-@J@ zFhQmpISar(@wa%Gb581U0YI#mQ-j(A6 zc(EA>8e$EW#0)dU+Q)t74nrJ6>fPrPE_DY?A4JzQr+gfS)s>9kPb=PDM)3*=U{(DQ zqI2>H+?v#YBVjjMr&xewnuoGRXx;L^=c{=RJAbl-T##7oy+ zS}5O;EZ;Cq-!E^PE8CZFf>`vHjlXz(^}7vIpms zJP|a(meGXh4$x`~rZTPa0ak`_4}ae?(7+my3oz(0#~rd|eziM;W-wm!^yS`gOopG& z0oO=0*Qdv`0O{h@q!NrF)}qUjsRPdk-D==;N8zG5v2Vul$>^c3pk2_0{FDM54DB?wdiE%c5Z96k8P;mM!eb2TnjtzM|w zn5^13U3O>n%<5Ft-o!JD?vk;#(YCo&duE;Y-A5i&)xP7p;aaHLmaN)#mrGUc$!fCu z?%=$;1p?1g&UsJM9N+XEPV;45!(bMA%I7(VbT$DpbtSV$-Nff;rOrM9SOT6wzKBmg zZGwGI$LNV5CYhG#U>XU4ieyA35W$-X--WEoR3>v&RJWaIpEKAGaC!egR zTD_w1Axow`yknjiP}V3^PZ@c!Wg^pGL{lgjv-T*c36 zFoJ{J0s-1@(zoUhg0q`adydWX$I~Tc2}jyn`xV7FHzr!r<*TysHdWq~IF@#ojFxdZ-5VZq(DV^cRo31-I^}q~?fs*79k<(No0FRlCu^VjcrCEV#q#QzC!oW? z%>%Qx+0&`=!!Ynd`MPBJy2aYHQ;iGt&B^-ayF#jV@0U)lxZokhISW>v9*ax-2h9I} z6QRh@21NQM*i_Y`j6?9Dm3hi`!^z4J$9$+u91{ySMfg-pq|Dxcqi&cgNj(;#nk9G= zmOB{@)DHNxU;ttvFH$u8yXw1adyyKT0d?Wy9MA>$Tq&%_i8Ok^j3@dO;S#tVYRtD` zow5Pom8cx?I>|2yYz%&a)Yu*+22$*g^k!7lz*ddz71RsF)z#>(99VVX@TPjzOz z<(u$g2ER<#ta)eujr|KX+mkihQ#Ct3F8l53PgW;A&n4K!LhpFvr0tEsLyC5;nW7in z#beKmJ~J*(w#>U%Kej>PznF^IG~Ifi-|?7(GSd1R1;Ofr%9bYECrw<-F2}O4jHMoF zXta!K(xTK>w%qOfpM&gc@EsS1tuwIsJ)Xt)slp*k6*O9wZ@|!I zsLKam7>J+%U2-g(@ zH$QejOWby!zU^;TwD@eF__!9oqaEh{bDXh^LILrG4}|GzlfWu*jdx-FX%r+!I54RN zs&B-N=wp6&Rv3tQS{t#JxLc$f@ww0&)PE{ii@D4IdblX=K{k%x{k>#_oM4xrA-Hin zus!Mi2Ik0+q&q$a-sCN~#Pw(u`EKm29=|HieAZ+IvB+M9aFopRbAwTO4C<%YD1qWC ziU|}`DBeY}9z_L;G8C6gK}^gAxVu87k35~;yYHcm=T6Zi3`H_KcN-;#k1mm_hZWS0 zJs6`pPN!?v8X5lw`>5k%^r~ZytV+8SI5>8R0+=-0_%0 zS}h}M)?C3@I1HcC!EPz~hbB&7hixRrq6hZK86i7{Tco?@dogzk1zD}-D+oUq=wII* zlK&c-AiYxF1M$#C)AXm*i9b;FpHVLIzwk4v=#SLKKT^ByQ@j2^)je=lCn^`5)k$ad z3`e)oU;O-u)E?g6zV71MXbb;Q#;t diff --git a/neural_networks/rl/__pycache__/resource_allocator.cpython-312.pyc b/neural_networks/rl/__pycache__/resource_allocator.cpython-312.pyc deleted file mode 100644 index c983a608c5a004c9d028b4045b0033cd148bbfdd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9057 zcmahvTWlLwb~BtAa`=>JJxD!GTb3invSdfLY%8|qhn&>0Bge6mI&`uVnlnYy>u9|bL%paqI8`lAWGC3m7?U3Af{fuhx}Qe?3| z={a{eLt3IY*W$U4bME_|=lEZ|UKa&vBfk)zXrZY8z>1kTqd;%`777d0Ym`VuD3KOz z2|8(u*k~*>2`0jjoQ<#~=OP@*?GZcVY{HS`BYe^saT0ni;YzwA?xZK;NqQsRq%Y#5 zDH|nmQ{CpY)m&=4PaCy}U*c`linI0c0TN}?h(+>H%xh?W@9hC3uB zhLl)n<4DO4cZ4+uskub{78~JaoME@_HTQT4)+oV}66vU_dt=iXA$lVkPeiZ5Vj7cD zGA+;4v@r3Ms=Kdeq>Lnp(zN=Hj8~(3qA@joLlV@e@~#rA+#y-B10bxP-gpRw1xmGW zBCS?-MNv6wiY3%f0MV9(7iqo-^Nwcer;cHNqC2TPWenRBV_0i~b?|k@pv}3>e3r@4 zIa`jIU@lQ%X8f@eIz2|d3UfM>jHl!VD9c_1J_L=3g6meK#Dwm^Grks2=?*wuQsAy* zrr;5TApLG^@SSv08k94slq3({Ov~>M%F)R~21rR6Ihqhsl8OjrP)-cWmFFr{o@-hj zn4ZyHVqA&Ik{ZuS+29kmJy69o!<@?70OqK*Z7p}AA2$^)m%0YEuE7U8wXOrjwu8lO z!+A#y?m4aNxd*RmU5AQohl|^e0Isp=&dHCI!hur99<5`~(#2w6u-LdS&y^bkg}`F( zYD3>D-?ssCP{;>%SW-q#Wt+)+zY6cEKV*Wp!gie(gj6&s34-ntgk)OGB(UregzK4T zqJn72*`;F%~v0I>50 zvfFKeA{!{Tw9cK)D-YRLqZ5*bU18g={`4-U&qn1wY_XB-S3@KfN1+~uW)nxs2%sN8 zNRLJdfl2o)G(j0N0)TFUr2ibP)D7GogAOwpLADL$;#K2;5W&0aUF>C0y? zj0=}uIeGd_{VRG>NQw@T2C>J8yl8`*b%M%pQszI=B{H{Ag}Fr*uy&y0Aa~5z!yes! z@iNN67z>Ls6UsYy4D<$G?E(dYfD%j`1ww%ms^Wx!8q$*#2Qep9o)J6>YMCAfncz$b z#tkdFEg|dNWHgzK>Neqen3Y?hpS&Nza|nh2SVDrlaAcNvDy|Abw!v8HfZ2eDuHbW- zqsnbN^Xvz{^0sEk9OagF$Xwh-j!w)oHZ0Oj)4^> zB{l0dj{qvjr3!Y0?Yn(<#D&QZCpC9hxpRBr;NpS5JpPm8TF3tU*zFhRU%YdA{(QNi z>GsX}n|GA?+$!H;97?S=v9#Df!H{q$)Rc9}&`?P^+SFCE_NP#=7KJ0nX)|7KfKkF2$G6 zYdwdx;1P{K@_}dT6Sr>j`!MgH*uMKde_*THD%E>YaU7E12I|-i=B?#z6Euh*{u7&N zKbRO|Jv?D9;X`Ob>4J8{kLU$z9yrY43K`!t5%i!%>8N}Pss4$IUK69o zv%4xsd`(i+ig+0~ik56bISvJb>NxgCJ@?x=YUSig^s})!LxqHCc?6n4-H^9IY?))w zpmTCGH7OBR3^RswLPBFDC^tf(6UJS;Yi7=F+qxQjf(nXyn7drL_SpO5d3vp3+ebqm zWeS(?eedq~ih=&W8F<)qvelX>w9TRHJq$Ohxp(+yzyg9S2?k zh9gIY&xJH+-9*TQ{RA?%UyVq`+_43@~KqgW!Np0Kbdp`^3d(&+>lNgjv#mhnk< zSRkr`8p{FM&NzlMSUQK`Wdu7BbRuX+Fkk}I^9ml>Eo#HYa)TQlhU;ASPy<8j49AU> z+q#W>_p?TR==lw7-|*SE)AZLo&2<~2;(#&uSqU!@61^b;tQuB^eij|3p03n+U?eWW zn60AI(&;X);onJL!<7m3ShZ(!G@exR9~vjP3@PNicB@f}v%EcfbY& z=TqkdMU|#?dm=pv!bfM(>kiuu4dpf6Zb1gsVsPB+IQt(WxB@`IqpVRwGQRw5`(%)8rnSNKsU*{p3Sol`Sx2;NIl7*0@$ML$E|;^5 zTo&FM>04(F(ARpa=DH|qb6*uz5J|8Xy51U}vT{5FJFef5`>i08QpI!K`@(K5X4DmCf}LP zs4}XVdbXP{mQDg*s_jLeC*GpY{HTVncebA(}9#~;kCQ8RI zYVddX;_7Qxv_n_4)+>2;IoP%6DusqM_zMo_ydCLD?Q***ELh8EC#HOn`2A`M^h?BPe_9PKC@iSp%vfkrUb3qHZ?)DmR)OAS&n*- zp0X5JwVy~6r^O}!vl~f8rbPSqUP$1Q-6({RNrH~u58OC&^Do7aU@jHNCuC3 zNq9+q4M^lO2#9U=29$sYr-7G58Es^ubL6TDieMYEuZ%$gV02u*{5FCp0%F&pwvkT) z&^d*;RrVCg6%?O_?lL`HfBKpt$q;FcrbK}#ziiJ|Ufe>2b18oYV2)b1Q~uUcV@PWZ z6&rU#l+NE#^6%FCyNmulbf!G`>`Q!y#&;CXEDo1?hP0lc2d}TZ{>9~o{7Zl@@tqpq zS>k&%zGv};hy0#3U~n_|clpydm)~3&D;*uz;BW7EF>rw>NEp`j1J|ACe8UfkIwY*s zza)M-wLG=5>+}B4`hUj1Xs}eRCc&pmSnBWLZZH_}ds(K&ORMKmo2&N1eYDzl7gJ<}@7)#ug%=uv44iiN*D)R|r zvKVMnc0Aj0=Dq1eT86JDBO&l5#dI=c=r7>6Yzskxk%?HV863@C7}U^>$a7&I;Y0{V zkO2s<=}xRvz0)ek84P<4M<7-yQKxt}Mqqvy4rtht22+k8im*1$syi`6Pc-&sJ5uK> z!FH*hF=^rwFo^Og0B{_AO(kEq=Ibu{LV0G*?Z16&{@DAY;OV}3_suWe;k96QDcGk4 z`<9wN?OyIK1`p@Yu6efIK0kkcwQYFC`7_U{N5Rf}p1Yn>aGw_3_n@g5JXk}r??L#X z=jht@{-rmx?T3Iv^Uga*)|vwa=ip+cUmn+w4Niy<`HOV=vs1q;Vg%r z1MSv$V4b3!2MkWEyg68Yjb-{vgA?lp_moEX0*IbQEf16K@cNmL;cE%tD)rh-WJRi- zf-gJB?jE#I|gy8O7p9b6l_utx-}Cl>vTw`bh0LTC^kIKu#iU zXjRvxCzfUw6e$^#tx=gakvI9HrWJzGbz0f#x6B&a`lo62uP;SvR#{g)*Hg}>j1gTQ z+ks!*<3RHm3g1rt=wEmLmwUi}{4t8y z$7mh_f2ok^u8Lo$sG{zvlvNoOCsCIfq=66&v0;_4OGe#laJLKFz#ELm6rD{<(Ug1{ ziD4C@bOWMN=?n})G%Zoj2LK7dE0k)w{bn4*%uStv)$0sS&5=13d}&0r8Ud?T2wDo# z#6&z6mr}7AAt|XcOqcE3s$8E!3F9fN07!_a)X?)~Lr-~U?*n>8{5Ny87 z&62-Y^Y<=_i_)sUAIwZ&@OFAWU6?5PdO<2YyLV~g{+<>3uLeJuGNeV}#zWuk6$Nmo zU-;t2KR&;D@zsAAz0+E-eb`=@SiE>QuKAz&| zS-kk;y=zvXbH`%ulK;mS)&iY|&{^7F4D2npv@JwFjI0Ga3S;-q-#vft!rcqSVE?bZ zjz-ry<#4))D8O$|=;cfr-*5iMbc6nRbh7jp)5lz;{~ewDqvpSU@!zwzMj?Xr7LXdz zD*R?)l*lEcCuaEOq=XK7H9}?tuy{idqfu&@NGPM_YY0di#?p*F7oY@_y=|eQSR&-r za9OqENyNT_fDADXC5V>2jSba-u{V6OLW*o7ewV+hsIvL>=_z5fNgN`HKt^y2!A=C# zFdDi;GJ-MqDHwLXMU_vzP(FUT3;{FF%?*{?x34pp4?j=xN8LT7{n6mi2IAM-yT94LowT=L02YyXI_%j|t9Qb9OL+ zZSHSuE;_tU0oY)mx=yO?1i{dObqYWQ0j1w~*nZob7d~ZgP%Lfx#uuc!e@y}SW_XOI LUB96alKcLD1P)Z< diff --git a/neural_networks/rnn/__pycache__/__init__.cpython-312.pyc b/neural_networks/rnn/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 2c739e317d7a06601297bc39d0026992dc082f18..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 156 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!vewVY&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLbtygdE*_{_Y_lK6PNg34bUHo5sJr8%i~ XMXW%x7=gGL#Q4a}$jDg43}gWSv|1)T diff --git a/neural_networks/rnn/__pycache__/anomaly_detector.cpython-312.pyc b/neural_networks/rnn/__pycache__/anomaly_detector.cpython-312.pyc deleted file mode 100644 index d147ab03093700b69c8782c80934c9d080ed2d75..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11099 zcmbtaYj6`+mhP6i<<`TNY)i6b0~Wys3ye*8X2Q$F4w#pTG0bKRL?hH~TSk^hcROGq zCDerM$QgFAD>G}eVK>f9s$z<4h3x(qwl+VU-K|Qh_Q#^gL`uh7liJy;8U8t#txQt; zW6!y*Zaomm&g?De^zHlXzV|%7b8i1zWu=9J^vmRQsJ)J&ev3C+V$}@2%+M4yP4UzK z#nZeYOph7{3^eAAVdH?2T$ureT-gDZTulQexH932QEq@6H4m6cdp2wtwGLQEZ38x% zGEf3L(x6S2zY0~iU_ML(c7Zcc2Pocjh2kr28Pw8S>V+C`H%ItcXlax82tXeN~jT?P6v19$hi(^70D0l;r=x883?&Sqp2+E;o z#5*R1B12wD4#*)%4g~{YXv2F$QZx)D(a6TZs3=JC#&GDI;2jm@;V3V)-^Z8n(TX(? ziA1qyG$Qr-42qc#$bn!uAV~^47!txflrcwOT!lM!9$N*%!Q48Ki&4Fym(N0Gni8l1 z8urU@5_So8$RIGhaTnC^4D1gJcP4UI0lS2C9JwoM;b=fs?D8-y!f-Uq zL;cWb;QnT)R%{VJtW0P$AV)>%J_XyaSVsdF{Ze4`d{~f%{`xwmn|qbYU^F5JMne&@ zQuo(mXR8Ry93Aybg24CsEDGoMM*^dQ->+Ew{?RBu7RJ2I?|*SD5LRnM8+Nq&{ROM; z_ls3fDmswVAgM*-0&3BABZ7<- zQim9cbmVx%ukuJ#Y(GCPnsHPlmGGDL0C|o2vd(k)@MLY8@nlzeFCUvcnP$A%`i9F# zCfB5yhHPDfTHBCqY`T1Oa$A~d%BnpU7?0Sf4OpHbI-$ClAp~FuvZ+SMF?xxEKC5EOZFheVxLCA8vBJVW3l~Lv4~2ax5FQg0R>Eq9IU9|J6-!8h1twCW z7$sR$Oe)v+`WVp-okUPLlvt0%g9LSf`Zg;tw@M?}Ou`~qC~MWLhq6}3t4CDgaHbh& z)@HwQSnY^?ik2a#Gw9`3sL3w_kqY4bE2y_Bcq3d{p5qy~n#Nh5S#kUsVPb*rCgL3x zW4``yhz9}RsIqrn&ZUiK1Cqe!6aj2=6$Ge=e z1K~8lo!)nx@)>(`@l_eiX?N@nr&%KG7DT|ws~EWxyLn- z1uuG46Er8U`Wa-VDH*7mMxeAb@wA+?FBB|_rtvjxOH*=%)?2ua6mNv_3+{bPZ)|PpqIm~HvW%)9Y(NX|%_*(>xUZrQh>ufD z;potiASx^`oE;ldjA0?7SaeGvDGY9)xB(i22+6$glGuhF4I*-lVvR&aur+?P2}L_; zgqdT`(&uT1Qe=1O7?8`>L(F}sedLv|fZHSGEjl`zOUOXt_M zn>zCqMV*E=31fnZ)3VMJAOx_=BMgjFl17lDN>U8)ipC{zP-HQvZ=e!voE2R%s#TIk z2&&QU)N7p8xGZ9Y`O0uX!X(?ZKKa%;4k{cSZA3}Rwk|lKiYQJbuhXA^Q}+cXn3pX zdefYH!Lw$CvW>=GHEFp3JU#BC~2!YSpGC zBUS1CFGuZU*Ub8qvpMZ(ne*H}k@Ec@<#;-|Z_!mhQ#&JPnl`84=h>WgZMiLHb{tIY zIQU6-YR7XaS6^~})>d=v=)}<*Yi0to?l;@#y5B#1^Kho^K&tJ)C(Wt0-c-Y}d$#A6 z?D$Sg4$9$9RuY}xdy`SO4W&&niGn;PMiizu8WEu8DJcBbib(YoL40S#Lr@|f1_HI6 zm_Leylw!_TQ33{(3csI^1_6(CV8b6E!QCju2kWp5i5LD7iq;kCb7spjV`N*GEk^b< z{m^1z-6}A^c0u^-3Aq!}9}NsD4{CV+geVBH&HbSG!$KbAdu1rjxdJLM^$rRFSR%nI zoezg(sXeG`KA0aLgJ{ok@F0$WAg3WH(5%Jq;3nM*V8DzxnHSE>!-`Ri48h{5)A)=; zKEx9+fp`*#gwq0(tL%+S8*>-Ei*EaOV{23xz}}uG!1h;VYu#|+vdwFg%wO0Zwfty- z$y>C+)1nQ4Cxs$473dk=&X=>)U>6K|D-C+6jNR8X4OkPH0PKh%ZajAy-VprJahSE7 z-XB4LT9UAzW<|huP$MPvcsruof_!CO%L78G*2>H2-@N5ZnBr{QbPje}G{)J4;pz)O zjaLHwdO2Oa_5o!!HzxOG?bVaVU%&Xd!4jil&xu&U9hcPtXYw%S(B<+Guxf6@g=*mcITw@`lYO^ zVao6&lCNrEU~a0Q(Q^0A-v54@07_R?);p zE+;F^kFAPCt&xWperUP6q5>!{*r>-m5@><1_}BoD_t#fw=`^&IxZ z1&2j-R43W2%QJm?>huH3Xm;b`R8RIz9DPrY znrg}klpit*NkkRu4hr%2bWjT(o}pLu5Gsg81!GEfQZeOr6pb2D-U^FmMN-WEz)M4Z zWG-%S1;C*G6&Z>!!(T>th#6 zTj5+wd_PTmt+q+}_3dB{vv$YNyHVB6^vzaZKl#S7*>K9aIc48+`^7u^=IMWY_Jd3F zeILA%vLCq19e8x{(6SLdb{yX1`#ojQaXLdm&s-|Yg1O}e>!kZaGKx7#d7Jc@5zF{Q zx|KBKeWUNTdW7Wz5yaVS#Emjz!{mP@5#*!D+%mGG0hF}osGav;~kbU!8Ki>J#&P>;- zRM)8m?)0MD11`|Hbi;bh1L{h(b*0_iN$aAs?gsOg`MUX?=68K>`{oYcS$*?Z`iWiX z`rT>go}@YJszV*ALZVAkmojx7sk)AI^`-|DV{Qgu1eX5!spm7UmXxby_T24LcU>)M z*S_Tb#ai#I2d3+}*^{o_mh47^)HBgD`NIX<3d|pxIFzxiOxae>@b_$Mmdr5Pl7(tm zmGQKvJneIr(w?2)qm>unrGLt*;g~qB!sXW=19ZjK@3wYHb`Z4k6%LpXMlDU<)U8~R z#*7g-BuW@Tmm96fxMc)1r}2T2VxS5U#*gUgDnEXl(MALz0Juy73v6#t_lTe=>P6sI zdbtxazn~9OG#Vr73|P(t4Q9HqEwsQip-atQ(bF^xU57~g?~E5{6^+v(53RtpGPG|} zXj$xnyPWfMnE=0U&AWz#h;Z?|xF@#haok4&1?{^?yg}O2uJ=H&A)y}m!|$m-T&6y4 zIeMpWHt_D~+oQL;GHahowH*D1z|2=0SJA}SiKEb1+zG_TszSC63$`OcIU+6rU_*jf zJF#FN5Tydm7={@uwL=KaEzy?|c;rnn);5@X#dak*#e+2#e^w z?_{R$)Te!?em*iCorGHyr# znyrB21j7{;4&TfK%QApr@pysdtpv-OcpDsRBBrGwEN0K0u4u}uw9>1hO(pB%#e<7bARK}Nm#4iglFzK3rx9DV4kl7KHR>nu zWM36?NDJ4%X`p`gNUWT~gd*T4l*iHaB@^rTIRTSMhky|L{HeR+oyuR?!3tOx>s!@O zU%Gx9+*dAoHr;mqy77a?v}ZTmS7lc>!^Mu#c!>OHv+-5+5GlxFnU0+vrc2Fy`@S zVKk?ky2e~%gMcD~hBGiPIdNbh#>2!}bTc3x0A^>(aMjLG2~z@U@_03mx?~+q6|Aj} z$?#NY#98vVa9a-Dq5Fc)j3zn=XF=q2q%G^ilszS5V5Ty$_x$dT!+X+H(3oHo%ms9l zVlDe~$5ACLTzovGnw2ME_L%F0b|&wY1lcP`y)Erq2Yr<)7W^qRQNiC8%n@Ds8s^Yw zD8^XmJixT5D1*eo!scKjE}!`4K#EWv90*7K#0*J_1>`iB7oWkA=nD!LjEEN8n`IZ>J6JSomIvtfZ-izQVPRT-`! z#Wl?AT;SFrOnYJKg^bIWa{1;qq+Q#S7A!k6btdC#O}Sd<8q=!zBt)8Q$2NZCh*4LyR|(@E>+vJ*x;S52Qc~GmiMIFPu)t))AQXQAO7fY zru$3^ew}Ah9WSIe_)`r(F46`Y=1#otzv;in?fzz&rJM(82_@p68*929rXpK_YfMxF zYX9$d9Shi37o9xw{3)t|i0dD%8qlpvFtV-)3WOJ2E(pfvY>*}Oan`-y?fjY@9u`REJ=7+%roz?w{rGPdmey^2LXXH z5Q#AL#@bXnjc2|A@_|E0kiay7_~bqgiVk|!xFC31MAGgvFpG4bT{hnTxm^3-zD?>2NU5WZ*rwd)h^UqJSb z7t2*E972`KRejpE8Omy$$%@71jqe3-x86DQi6!0MpKdW*EK`XK#UORLLxI2FmOqV>#hhG zBo_m!JOPKNM4>|t)VZNIw)&ZX930j@GsCY8UJ^v|ok5P-B*4eSYtL)yX_zN>0Hl~T zri8tQY|$~0Vmph2=e|#httj2)!rH%uF(sU*V6S(4SbOJ3|Fmj>>&dol`^>(5wtteD zwoX}Ry6!o=pW3(I<+gtV7vj*oQ@@ufa@`QAk`+g92%WYOxR$2=+z|<_@-|J}90|MT zg7B3?f}96-v6ZE-P*nX(=qI7+1@LCCz4pq)D|fk7YM8noN?{?_!zEUyePVYiH(D3@ z;HKaxQ$*(jns>q_f1VkuA?KdCPW|{P)EQ`W9mlLDRfMSLqUva|`h0m&zcUcYJcOg8 z%6!zr$;an$uf;c!{0$OZO!bXcVD2Xxi9?2bM0JIFXkgiHdbx^Y_bfZB*t%t}f!*EJDnO!Ir2&6_Y%E z9^0UJi6{Yz8Fu6d`5H#`+6e2S_pg5NwGMNANCuIR=P!PzZ@FFSSABcLUqKV(1?hDl zOGcWeKc`Oq2ld1kl=Ta$@(Zf!3#$4zRO@f3)nA$$l64t#L(1GR^Hkd0eA%>Qq72rr z4JNu_i2|~0gxe)@TSF>X+Ob3d$yGq^YeyaJ{)z(f_0|=%>njS#*L%EJ2ITAg^g6of K0R@CCc3ob-UPIU~!P(0Z_a|f)XhcI!M`+WKlOoNwzGSjzr3dA*%?4U6P=2P`jWh z!a%L2N&}|Kh^*rfYCILyc*fMot=T%A(w)x4Y10{JI@1DzEFx>Rb=u4%`qN=Vx2gQ6 zeeYZB0;J$3>hvS{zWaXnw|`$&W}zTVCuaklH5B!4n9%~KCg|0_g2XH(P##L41w)Vy z89W9W(@c=@FeGL@EQvV}2QeEohD;t)i1+ZM9Tzl*EFMe9>am7w9ve*=D3Kkl*Cr~& zV)<=a&3Wvi$v{0r3C1gwV7hHkOK<0!dMZdA4|Qg(&QnRsEKp|E%c>?Bw=H(8Z(#8G z9T$AED7d15khnt@rGO~Ah9Z*a_sP*fc-Yl02SVdPUlf{51fnCZkS{vYDMx+L0Mz?s zcgKC4+f6H0UpO4W@<>?jcN-M5;EVeFL7yxu+)zLa3T{SW`=FP?4UUfmMTLK13>*7` z{`~4m#Hbm1br2G>lt_7KIAOzCI9E7RgUAZZ0jLpJI8n@TBxm#(NzO!ayvHP%q0T~b zRu50=Y?zxYbC)X?+;?vzJQNtd?}9>w3527HbtE7NV%RGOV&Z)~Sj7?^4|#*WNl}vT zBR(i5S-j{Cis9kc9Dn-EPh(dO!`6ZkUsNdziasd}=jD~)sO~?7jg8`1#6JROVODsr zH|z_EUaw;DdP5OmJcwzV*L!i?7gTGcGVEjbdW(+H>y_+KDpew=M&JZ+i&_OpecsbK z5($Z&(s(#5N}UrC>6K2&H~h2;!s58(3wpz16f5LTDID$$%F&Q_OcI5FKN^ub#wI1) z6ah9!$h!c1o%(b`-Q`o$)mgSKSF_>rbJI+g-H@wxUVbJykY$~@hUUx9Pw&pM&ABSa zBUd=(`8{8}vr{k0tYez5F_?{5X**S7;1Yy?xDmR_TLApmWoo%|*TU)Bm+n>Xoo|`$ zojo~oGUc1A{@BrWr+RPNw)Y`KGTzCxx^1kRm0BSwA&n?TNsNw5;m@te$<7htKkw!h zOCUTp9wn?pC1fMc90@7LGiuCy**l~F2q1_c@Vku)D~rJ)2`_^5lD1*$2?V%K8E@5v z;fT6#U?b`VW0jBWxfVMF0l@BZ^{q+vsy)}RCCT39Yd_(gxpK#~&{QaOG+W-BWd6fI zo4bEn-8^TRKbLXs%~p3qbN(lfTktKOQ2rVmvcd`o30;5$2uE6=3s^Lo7gtcFhp3L) zf+4D-slWini}Ffg#7koe6Y^aG z5D4F*75b8lxb4!;HP0Q2&oy6!S1;qeT&6y?S55ohI(FmP>(9RJpWpKR%?sk<>GyYJ zw;V~g9mzKKWNLac_M>V3=%dGCh5^X2s8a1WJ`%dMBa@q}(NP7SP z#y$;^!XoFBy0Lg4g8cxB_sw~+L9D!hOqg%SR!smdQ$Mp;Uz?noe5*S3;@boB&IS5= z=d*QPS^KUuzf0X@G412+kxd?gL^1ae=qprIZ;9+r+rTmEHnhc{F+*giupLAH8nnXB zUC5fYL*f)g!|-wHJW%AzfFubzVIVveQ0Y7LEKO5a%UNoY`V(e?zD4&-s89hnF?8%y z=m`n6fIK@Pt(&q zZ*m!X_d~)rzEY$FZFZRG?agz;^Ov%l_TII3XKmd{I%ls)<|&ee##+!(?_D@56lueG@snWV z)TqwkwdNEYY$=WorNyJ>n+sF{LF24;hxH(&v>VpS65M0X#urwkr5`z*tpUx)5t;4zk*um(>+OCgwO7XX55M zXcz-hi5ufQ#>h=UMpv;^!jdrmAI63OVeBtD1B*JNwkDhrW@~`itkBCEw+tC{y}&%7 zX$X*#dSPL1i*yhosSg3tp2C3sEp=eF6TuM#I}vO`fVbeb5n>~ht{6v9<$YB#`bjK3 z2MrW@NTCA?9Z^hvZ!{1TM1_7u;ryU3Pbl07Ml$dg3TaXcDs7s;G@9^9Laah(6!{Wd zg4_(`1aiXRobF21%p6GaIa|fG=ck@ewcglzee2xdJ72l^mHF_y^1|NRiFDh+#Y@@x zXS24GK)e6n?W$^Kx6W)$4c_ov_sp~J@wfPeCJX~_IX5(Dt+?Q`QWGlC)EBcchfaR{ei;ku4lbP<5 z*{U;&x~h|oAce|vrc!m(w(k2h0Rap?2c(M8Bg;K4=UU{-@MWdl=; z1Jk+;4KZ+6lnY>ffpKjoW|vW@Am)}ys1&LM6SU-oYQZ7!kTMHS!3=FJLXBX7*g8qO zHz;LD8DG+@%zl4fiz6%1HH1DO6qH4mXxTy+*%_KPP;6)wG(_Nb#iQjnH>w*y`fhb? z0yv-{&jEowBn(lMh9n4J+Lauwge1^p8St%EV#%N(sLv{34tGYvFvw3YujNpW^%mv? ziR;zJk0N4`Fp4aBT@X-Sme{Sf78(?syftM2Ys%PvA4T~IRriIIgw}j)^ z`JN5T6YI~KTY-U|yLoP@ZGWb1f42F+e=>BicQ(&@XS{PSW}VydPM3FfEu47Yy6F3J`?sH4ak&Y@9Atq(-EDoML|zyo=`#y}2*9F^Ok8VAg);IqIC$9B@6=E7$r}j6tB)}giW9kGQgpcjgjyLBd}V1A^v|~2a9_Eiv`H) z`~d987&j(N1=&%WfC{TP51J`4OnK07%_S`Z6KLW*-Wa&^NPIglp|Ky)Ky80vwZ3G5$`7-Y6St#R*3ARW&0O@`e+NyrP|Ayc;smz`*Xu|Ocvp0+2@fOjA zEnUF&sK9^*L`+u(eBXqZEA)s$`xPcK9#vIr@Scw;X5uIJ282rrOGE~;7)^QB5ZdAe zZ?%_HKSJSp4eDz03IL#RR8N0!>Vs2D2hL;;oXPGPxMv$&s7(5kX93C2&750uZqGQk zXUn#yZG%Y~z~lGGl~*R2T&3dy#hDwDJ-MptWbg9k?el~0d2V^KoA*AT3}stp?9=Ad zuH23%-+TGi%Zr;AFMe3}LHwiBA5H%Fmpm&fxk6Jllrk~F=~g5gG{h{Z4y9xe$P=Bk0KsuB>q0^P0GT4bkN|DVwR9mK ztth0ml(r5aD!{G;Qy8OYwYuN@3&t&B?xIW+3SZX9#YCT0< z6{w!C3vo^xKVd`?yehCDHR0U!vx@3HO5O^t1Z{|Q0ZQxF6+u>G2&m|R&px*KL?B9D z?%@*^Npo=A3+$|0=yhUitryTYR2QaJJkCY{0d zuOeszAfvLJXAP=Z%?t&is_#`d96F%r95((t0Kf?}MXmd2NYv=CyztLTV1K>cf04J z@4R;NwWTNaW#HGaZ}D`d;ZW9j7>V6IP3LOrQ%?dh=QYaRnN7PhO}o=Id#0J3qjvU% znHT1IvyS!|`j5#*uX+L@h%IrO2e$1s%tWX&kl5QZ!V8%B}Ds zUawPPf*A%njKZ^$8VWoFxB$quFQhd_UxzWyj-vU!F2IUV{;dmfqt;K0VfP>^aZPdK zFpVl$g1LGKge6O!4`L0yK6ohvY-@MBE{OQ_?aIHAw!33>d3;1#LNberW3tOPBKibE zYG6PiDJZrx<6%VpO8+gT1^DtIcY+%=9h+VmxPw2NvM95piKF8G3Od?(UC7ks+HdZfcj$fwZu0Q%T-P3 z<^$QPgK67A9fLa;8t(E3HPVXSNL){3?K^e!7_*H0}qc4ito(+%+3nXY*< zTlrMdn5(HxT9+$preDieZbrguN3mKn+d0!Yw~UlD*8 z5^!bWjV45uZGbAkVf$wK=1))eWgVT#<2hUPwf?F8)b6>;>wE9oTJyfrC7UZ_bIm#D zF5ayD*tQk%WEppavg1~ zUvJv1Z7;CIC$Oo2qNAmJ0wveKb|ml?aG-7JS_*k9E7F#LKN^uQqU{g78ojS1jM}wS zAr?%~n=R-jMb8DSz-tl*kk#BTp}-(^N8lg8_?u9c#P@^ z_@w~eXQ7GPOw{bq!up8*5mL#cL|#FspnW0HCn7JQ=-~m`Y@=8|Uad)|Om#&9c3Y=pTeh z&JWBFrkkJ2+Pi>PKD}>o*AEVUaPXt*j~wa!{n@8qNS_(F^Yp;g(WUaHOgXSX(mMf~KkEf2!^kpkr<_6|>-2@wBXOctCY@4#Bom~rO7mwfNk1m6Y{cNh| z_g_!*O^;$d%6!5cf23kPWYtibITkRjAf>e%B|1b2c37&qCi`{cz$#67 z=P~R&VTzl;M+P(qYcUi5m`S5;El%)J-EN2*1^p4UsFW`%Md3w4;k@GpfqM%y6Jor> zUK5P|BxWLR-dN*t#BKO|38a9k{6|GO>Kco{H*B(W6~=fb$SeDr8-N$pEr{!#z*_m&vEu@OB6;!D_j?!3@Au#3dvg_zS=)^5 z-Rk+5ZZ}QavK4!QGThte&n}$Ix(_W?9=_>-r>mZCZ=Uw0T4uyIx27u(CyklP!^`Eh z*CJDq)KIp(4QMR?aYOx$=IhOiTkg4g7EaH%%wN2{<&L{2TX!^T?*;PR(3l#SYk4E_ zuDEb)vE_Dn*;$*ad*injj{lOOEB3EYw0Zx-RTEWpgnsz1?jG{G^+QW-?*`-FG}?MM z+SEr(mZ*L)hxWzeIb<~I2Z-m@7p<3Iv+&LphN$Qr%dM7P#8Na@i(bE2)tPXzB3tN!6C7}O|-Bs?I+7@Zu$HbyK#PAH)`R?Ndt!SOlnmBi893UxLIHrcL4$P#v%jinr5k@q0r*uv{Uw^-|A0bB H4SQvx~B diff --git a/neural_networks/transformers/__pycache__/__init__.cpython-312.pyc b/neural_networks/transformers/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 01888f1338a2a41c6f6a79271a810f63903e0012..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 165 zcmX@j%ge<81i8)AGePuY5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!a?#Jo&rQ`YD$UDF zEz&Q~FUr<0N=!G05P7MkMTt4_d8s8JiDLbdqQt!7wEUvn)S_bj`1s7c%#!$cy@JYL g95%W6DWy57c15f}Lm7d%7{vI<%*e=C#0+Es0CK1*9smFU diff --git a/neural_networks/transformers/__pycache__/bert_model.cpython-312.pyc b/neural_networks/transformers/__pycache__/bert_model.cpython-312.pyc deleted file mode 100644 index 36de65d74edce3a90b375e55811c35599da935fe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12250 zcmb_CYj7LKd3U%w;P4;;kRbS!3{npYqD)y5sV60wA}Luf%8Dd6rlP_S?nr_H0qPDk zMF?!zMj3;NQjwJ%Laj4~YE4Z=X~Q(>RGy@@oupIGQ%9cZ zOuugr2Y?V{$8C3j?{2?+-tPC_{aHzgiGs8}HtpYBK~evP87*<@3%&I3@G?yaR1YQ4 zf-XQ0>UwlEzB2))hape4hb2$0hl3{@&<`4V41;_RPwH_2G-&QI4_bOGgC#vB zG^L|NcA!ohEcXHsC^FHwTwmQKaq%2e&KlQJI4j9OVw8NrzVXU^iv z3C;qzk}S?aaHW8=W^omxjN2C3vajn4CtZUfK@7NL|KM=I8}^5SuHKO33X2!QuHm2{ zN^;m66#T(HSJ*3`lbb)pDYGy>d{@4(X;SKmQpGF>xnxU62!^<=!Qav3j4fXCOJg$3SvMk~8)2Bxfc$OOH_~fp(=NXYDZwHptmYuB^vQa^)oF=&_Jo z1?EPb?n=dki_{Sc_WJw$J4Yx=Vf?|cVtP5`^Pcs{{)qVDI8;^4{eD3ZgDQqaj4?Po z=;;@|g8U(Fr((i{>IaIT6w9FZf@dfs`*B73{`4BAxA(&G1w!7iVi2UzP-r+D z(Vsrre&X;Mg^|OOVj6^9@PtF>#9)Lw-Eri^8O7)e$XU#ZuIB+Gi$f}=+g+mY9#7Ca zD0)1K$>V{|9uDBU#p5|Y>ha`ls>dT)0V~-Nlp}B;a3ZKeP>rAlz)fli zAoa=qjs2lPaicUG42sgmk&tw5qvY+|rh=e2EO`T-pcuvid1F}e24$eML0FEBXGJND zQxgNtL!*)zJ4RrEzx*WtU!^{9){Y;VWKwKxy0T{c@ML+4tw~on#}CHj6zfboE7j7< zbXD#6k;zq9ucB)FKtgIT%;YndO$Cd|RQf>%!BnULKoq8d{K;nu`{*b=kdc`A^)+cVV93X+?^;;| zvQHR^f4o&6jrGTLQC%l>h4Jb7D4*`kNx(2+qz{t9NV_BrlxlCeXOtO00xn3}Y&7XR zfb?CEv^oNb$@7s+HN!1Rl-CH7YL1r zlETSD0e_gVl!TC+zZzO{F*=k%-0s|ThJPaRIs zZ}HdpH%%!=)4ZcK>1a(kw%smE+1hT0-?{kq#reIbl6z03Y^Ux(E57#;JY=Mu_sZL= z_ibdCQ7hsJwF-n<`+zFK8QlpKMQc0RNu7BE=>y8yymG1-9S`6N6{Ng=`k@~#iaK8x z1+^0aPKW^20~HkEiDG?4fJP%g)%YCKN7-V0L3##i5twu9Knzuckt<0WAl8bOQ=d`2 zHou@FuO<>pLCV$CTF}&Gm}FE}bQWx0t?O9p1khEt-v*LLLP;gzC%X*_2U5FFR7n@L zgQE8i4T(WPVF$h8eo#^Vpu+ecxuy?6R}gv!0UD`aF64?-L8&NT2LQzDYKZ9;%B$ketGgdioUu06kuGz@IutNjGm-HXT*nR>@(ikO*eaQ*4<{`F~4n|-+n9!KlicZ zQ_m->j!$xFTlutU$^^CQZ;G?$zdi7J%NyITZBKN)zV}DMJN<9>zdQWH&^=rCB8R;$ z>Zuazm4g!pCw<@OUNplr7A=&mEN1)^IlP@vHqcj=uHl0+DGao)rW~zndo~7g5hP+( zBIjxVH-J$85^UZWJH~~xav8=bB5jLNoHho?UwuaYffMqM#>Hb|c3C?*P<# zit3^)nhsGtLaZY&$RQvvn^|hi0Gv`(W=9QL4?xKQpwXOM)q`ARyFbKb>$~Xqs{{WU zxwQA!kiqn<+bDIyQ(=G<6{847m?(L}qO=2$IYO3pVHS-(X(s?TuhOrPNJo!fkfptV zR!pL%)q#}#!Ucs1`vqwq7BMf2J{eb)P;-``Inp9hzMO;!vI#27e+~eMw!}VPvL;!w z=7w*^{bn##(i+pHD=K5Ag{u05J5{ye0mT?sO*|Ltm@NNQU43HpTTR!S<{Ne-8+N4X zcK#Pbm$#)Ws~$2`MfLP^Q_sauB-&@_>s{B*%$!WF*pzZ?p8djY=6k<)`}p^Il8(La zZb_BzU!tb;s0usG)}E0UHKao;^l!=eGkTI4BP%^Y9-=z#(1$snSr_pF=tRqDHM zvg)$Io&o<$Vt7~zDr|QsD1s_#0i;xk;5dL!jOYQJ27~zwl4OaKTKbtb_MlUr3D$o1_mk-T9@E*YTxWwMdGynt8Kny|sDC_1` z1F$_D7K3QoFVkZvNTKs58GzCnD3{hE0DZ8`>LZ`6YVT~KGuC`fs|>Mntfe7py(C*XEyn=h|Eqdyk< zqd&7h7UT*OWl_?gGy(m^b^G!KDemYNTzHUQ)M1JyChr*_MV5meu#r)dx| zr7Sf{{UtL(t6PHf`&wJPmFDSVoXpxE3N z0EaOk?`h7~M2RD#C-ocZx8u|ud-JuQakMCjgA?5X49Jo z@7kMFrOh#Bp~QaW#fcYRm0`(d>hIQUNL6k~l{`JmCQDkte)-hWw6$#VY<%-n-^7>a z_?3@yya~o7GNu9e`4##V%PXb%d_R%3mNS+Ek}^qAoyMhLIV_;nkS8DV%C2x3Tc8H& za9~YjWrNleMNQDEo`)xgLRgb6py%sA&+C`jBU!t4%n;Rst-gi*|yQ9weDnWN^YDPxBiiQi`aT4~7*3j9I`sDlZrkve6Q4 zTo7IVvdlW-Ml}DE&%T+nZx*nfm~P1jeJo>Bls!aTqB-;$lNB`J4UEwfY?Q`5onSIg z9p>^xkK=LC2p9rhay_ehM0&Me#8GTLp2epB4d9b; zYQbWiG=2S8j85CjWBLWVbMpMvss|KrTpep)ICO08`QtxZcfaDqyT>Pm>A+MVv2(_I zefR8ounC{O=X`dq;zVpefCWcoTz~a&tSh~3_xp4zuj&Zcp->omPJMo0DZ6rX;%I!`J!SxcY(?z5?>yD9TmN3&Q=K~fkFAzYgZ?Mf z+D?nUTY4Ur$E}z8ah1$@=BDHajPzv$Xsb$aCP0C0iN#z!ZG}aR^%BFS`I_Oi}O=^<@uqEcetB18G2<__?Gg3`%1aK;F|!NU`}^0b+WY0kQ+aizZb- zn&~pK41!Mi)?m)t1Q}%|l2ZpkO&CK>qnh;5jZnmt0xKW2qZ(7jby4(cRNT#|42Eo{ z(4J)$mTLSG9>{!E*7U{wm9Y)5z}8*2_s_Q-O|~6PZSB7AJT_~8bu971y~+)DoX0Q< z`Usvx2*k^_ERQdtyd&$<4KEm&1%t3?L=-e5$loyr8aBXDB9=(MzmE-Tc#}W<5#T{?TZZHKCs$iJfSF+6{K-!3&OaVPlaO^lJ8d#hXCCy z86%D=`94oS)U%vjrBy8V!9SZA^^IOar!S9Ic?gv2E^$DuCgLx#f|&%)Il&@u7(f?F z;0TO`vz$`F08f6Dc3TxAF2hkgALw32%-K!5Lq>_x7iif;=eanV8U=#P`T{ve1~&Lp zNIsv4b0)}Vw#k}8VRUs#8bIM*kOYYQ7ad*Wa_~MNSR7fC{d5WIcZxHFwA{Auw6oX1V5-?k+4?=hf>kq>5OK~TYkR4g?ESC|F zp#ypbvUAqmac;O_deahPFPj&tGjcvvy#?~63zZFtmbZ3Y-<7J|2zko_UwXwnVV-mB zx_$DU({G=?#~&m#vuq=gT;NYH-GG-To<(4-f|Qd!PvX~NwrL)hGO!Ir{Mi9;s0ve* z(WIfq)134TsHBD>kUgY7Mu1$Tu{eo`90CM{?{P!q!mzvtHetjo2`Z~+);xnlM@8NX z0M^`5HQhbcJ@05rI+|uWQ;tnBW7=LZy>e>hydAv7H3@&p{tSfRt>ssu6VZ8VL(fHUa~Vu~;YTjle9x8{r@d0L%c|NVVg+?D-2=!VndR)DR6qjslTW@0Hl>5FEP#H5E<}&kpygM(tHVsPW7`d_z+f>>r|WkQ^1gdQ`0#@ZcDtmI@n$ zb67PdfP{fMXqgtW70$O8YGu4zWN*uaWv*h=54OyPzrW)yzZ>at#ng&;IAvcA6j@dk zw@Dn@z^jn!#nT7MHB- zcqIKkj@KlJl0ZIPT}z|mhhJ8EF9FrwGUWOMSUmkAliJ0GLi?W$*BbEr8-xa#bFeI9Zv7E+d;bs3clpkAdFAx3sa?RB<*Q?Sy1o(42&!i~ClAF; z>5AG16l>fH7Gzc38|G`~L|3Y6ee58(d9Z@tSvAvt+n8Fv?`~yB%FzLptd7Y@%37ax zH_vkMLsOLC2%)Eg+AK7o%{bjHQ z3U*FCJcWRy?$fT2fS@*jcqaGLU>-nfg-Os4U}&KrMR{$7wI_&jJf|WljPW#*a-qvG zdTa$aq6iE{OTZ671tIX7nv3eA-7f$9=!kNf|HbhC=}O7Jutb~=&Vj^$kR!>2?ouo!c@!+BeyXi%9%MzS-mWt>DH-^ z79QS!5Wo@DGhljz=mO2gAI}SCuFcs{+3zT#%l;7hlJU$@jb?&ZU%lg6IDS4+4{@`E zT21XoSR)hIT-QWLD!M~1^}?WwoL$3hPPoSTA$p~XVK{g$ z7#azFLc7#k9X6b~4}ja2C(1TrT^5eB!qP#~4E&QI2)^7$E37XB7xhpZD(pZAE`4wk z4x@%mQBx}Vm%V{ujD`Bc;-KsG59&nfiFBqH%rPDyI#Gsf&Ht7q@3h|MA~P$##6=?F(} zSpnO4mtU8*t(>>5PTE$dY-?lOLV4v9F7S=f*@{%-wukth-nnOqp&ifCG5!N{`JB0S z0fLp?6Ww#Qo9|k-q{}N}2h;Uy?^o=Px6jnigui?7=EeDqUCE7Isr6ld;=NO`|6S>Q zdamNoor)LVAASEE=-yRpz)W_mLraILESM^L8t$~zx^ApW_}*OiofEU=x4xKcXuVzkBj3BLe%Sx^hGhML_wA|L!?FF7 zEnho=`uPa`&_vlCll!NSP942^EM;4r2q$e#F)qDj$KSSnukHJL=B@i~w#;mr@lMjO zw#M!8Q`f2!?Wyv$N!!|#b>AGnkHlXO(2tgEl&yo7%U~7XYgpM;%Kn&U0si!vnyzxa z7UgS%(uhO5@Ov5=po3g&JfntIzX0XHKIoI=t_-M0{*PfXObbp-uI}rA$XqV~qELH` z*vVDn(}39EQtuQ%|9^q~|2+Z{<~@l$vC`il0_82iMr?RHd09ha(Foa9Y(cC*j8sT( zA^1B41wr4hBMt!y2N{GAM9m&TfcFr+WMR0bB@+jr)Xc42s?l*9AM!S?gC-^sY201Y1y;Q#;t diff --git a/neural_networks/transformers/__pycache__/gpt_model.cpython-312.pyc b/neural_networks/transformers/__pycache__/gpt_model.cpython-312.pyc deleted file mode 100644 index daa1ce7b1b6a80fb3ed80706501a0fcff5dd827a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10118 zcmbtaYj7Lab>0PbfyIL)zz0b1Aw#Ap3wn@}Z9QvAmP}c;9+VYHPD)!rAnb|+i3ha{ z>H&i>CsATBb}F(`N~rCODce(1BPUfRnOd1-CU%k^nRYTm3X%zLl!=>q+UQS*5uG~H zkG9{riv>ZNNjlRj@!Y%jwRi71_nhyXi+`!8aB@g{v*XcQH5~VE^k62@DDZRtj>0%6 zbKRWG%a#})w{%;0DqCaLZY#@$Zh_@ux5#o^w+*=vv&W@wDemZYu>NAq8FzKN;_hyD zyrR1zUfEsA>TR*AxTo90a~4h!`yMtIFrO>c*LdTpyGD^L++I$$pXX%hn#HKSR_xkc z%i0`hbDC}4b*#>XI(Mng$LcCjS6Qm7KW7bAr91XMcQg=B%1SJtMdO39a4MQi1WqQ^ zKuS5A3iK!mMK$VDVXa?lze}$U^13UWNF-CNMe7P$bcY;Hg(I=Brs?9zs1lQdf-W40 zYAIdlj7Czrcyw?ers&e40eV3=MpMYj$?#w-C8Ir3{34cFjR$^i0EKZ*;ktQn!*UEf z!2VklLAE}D7FodV)04=ahzIm!XHQbMojp0&le1f5PcHW4?smu(=vm30s=A%5%|lP; zyuoVSfjxODnK&8ki5~tL$LZE+BBeV|CnMn#AuXC#?hd1;?&^)ovXU@r1gdc)2IHY# zB`j-qiBY54wQSar=vF+S31edKR>z43D6-!;$p z*G&7@D? zk^1pVKX_?o=dtOX$MT+I_c#ol<#d9T8Wm8r-dv^J)CTLgE!|ebDn#x zQ{ZH4AIa>Zl(Cq-j&MPtEBae}Lr~HMG8wwa^oMTK&S}tqx?NM$)6s~c+f+q^anNna z!PLMY49MwlY*5kd18Op&Xxall5ERiVoRE`oI)jR=+rk3_N&pGG zFufa!$}mvSWXP(s*IO>PfVr!lEvgu zW_!kxhG{aNAbIN`Z(^!+{xjB$h<}zeR@qqUH)P8Q8JizP;nTzpq(*YeH>+-Pv|zo=<2pa zj~b3^Iv>@o{mMCwwv{p7WWD>uU^FI&jBaWF0v%A^v>F4oQy>rnZ{2v~SmVX!8E@;f zw>9r=%i8V)AOHT*d~h2|Uf(4xx9Q?b6NkrM&U+uZ$5|Zh*vOi;sR{4I;VX5y zs(ke-$d9{v^s$lsGwx;6?q#{t6XD6_SCq>`x83VzMGTs?aUNf`g2_hLRl!hZWNzuG z05v3Zp(~kCf`akYR_HtR7@8kA2!_1?409Z9dr+f(fe5v!p9cx@Y7J#JHjq`RUDQZy zW!=&~!f-Dz2)8!Cro-&Vk7Jvgs82HyQjRL^3)7$KGgLZ4AEO*CK)kpReo7nZcZHnfbF>1iMyE>+waXm}t6 z)9QAXFo1copo1-4k&=nPOf>K@`i`|I?B{q=Fx+v7){FSUA)bs3jx?0y-|*u+4rZmm zo#Vc3J;NJcNc;_2bxZqtojIhksIA_i0)({{9% zdi@1v(+HyemixmnceA?V^3mMk34V0L=&5gQoUZP;uL)@UN_8y{ufwVh#&v5tI)E}C z*7+0Vo2&~Vydv%W%v-n8D@gi=xsR)=G3Q%HCcTrp-u%m3)gAe&jwz|*vs+I(hw-`_ zd-4kZO64n_at4f)3?jf)X>Uh(nJ-srP`)K5_0eZ6O7n_VW(sjgfQc99A=q+tRQ@OuHqRtnsZ zG-NL`3X&;MLyn9Dfaxs06K@)FmVGCws6{E`%s9#o3FdFc{4R_u8HkL_oWHmi?u-j> zsVKe$M`5TUQ;~7g8Zq9%z+%NWJY_plGFuo|`ATJRB$UuMD(Bym<+GJp&q*s3gj6=R z1X(_Nfbh0fthQ>XG9&hR7kwHYGgaowaaL}@h>0Z#Yd>RxS!bzKQ7Tme;EG);ZGX?V zK3Mr%SG#vd56!Rq;ur5PII$!NlYOV07bt|ct6I8bwB(pbmJf2 zpY1f$8mNi~!`vNr)u{8;Ls^~`avR1f^6q7L^3;wy#vHlj7u~tkw5Jtl(_6zJ#ptPv z4fi;QV?}n?$B%8f;r-qgcptv)h4zk|Hy8d}->j9ZZyJw|MJJq-8?IDLrf)32I+XWq ze^0#W+f!%@{?5wRuZF|n+n#-zWgpi9k6hfJ-BWmU%LlbvCl6mbI{t;RFXX}#ZI`3B z>sC$GZq4oixr2V<#eEaC)3t5ryt?B9-};Heqo;BkFP^*UTc6$g@$!`ur`~M%)XF!k zDKxjh8S*#ZZaVO@jX(GPbjz)#1GgIv;Drrqvd^&WBS7VYBL{PbBkgjc@(^dln30cH#Z%V5i^q6Zb=%%WOZ{>F#W`{j_;y=PFy53fGoU zjEwAoi=h-2S~;#Ns8L@c!az?w^%GPQkOyAYudMTNqfdtVNQ>Z&ZV$nvL=aaYoTIxn z3V=o8N~$+0GkH|gs9y|tx~Yx`POKsS=eZAs&2s`U4?vx*8s8?apR=MgOQlb2RVMnO zQ4#!=Qqg?yb95%k(2s=Zh6S+=D`FfJ?GR)aLp-8`b082%6nTX#Qv5@%lx>8DU{2Qp&_PZDUdzut31KDI<1DW^`7-cvt++abNzlVc^G*hAPSKv2 zZIqiD5@U*=1h=zfo0cBaM>cp-GUtJYHv=5tE^P3;OY$e*4_)pz!VFjm!7G-cz(1i< zI3xwFYNS4jpDjus=3Hj~*EB$%@qj;|T zcy-5h;a%5vT{G*So?icSe)V4HrRC4^g|?N@1P`}Q%KzAZwg2|=ZSaIqofU3LEgwC; ziE{rPsq%tr#5GmB?MCO1_y1u3ZRwf&_w3Y-g+t!2YTn(nFdRb2f=T8-qd<~LQgFc< zv9drHj6^vWTbfHtp|CQUvKBEz0CQRA(ZSN8G8TZrO32azvw)g2LCPLxQbtJXAWGID zYgu5+T0{V>78t5HYR2mW8I7$G&h;9gmvD-sUdx?;B zV-T4PlVOx>A!gFkRr8Ex732nLVUS@CdZ@YWQ_Sk)Am5Z-oOam@raPX{*V z8_B+Sw;^(iwrbya!&pPE)o@-fubW&k-PnHH+wlp9E^{2_n1z$Ol0sSDZTDFPbMwBw za@Rw)ciMI}y5IMTD8AoFG*UYMSO}d!#?iG!U8C{m;CsjxhF`@D?qdkc94XU2snU2j zJw^Rv=8y-IgYZ+!Lmz#m&EoT3@`1Pm-` z)a?w2&a1jJGHBcrokoO>x)Zn}7)9!tp&>>L(X)rNa2!#Yv}a!SjGE)%7ynESH|#ly7?U9%pql zkL=HOjWYd>YM)BHETzQS&8#B<{lpT@|4ta|DKntevW7G z_pQKguMiQiXfu(KU;_Iauo4cTSr^p%WIX)0p4!iAy=8CwSIPX*-w3!Y~u9WnJbw$znpK~G}XF!s%1+ao~VPcqI<+WRk!+`r>=|F_Wh{l z#`8aZ@dq#d@QeAa2d38^xFsFLtW#2h8o&lF?apM;^5-?SdHNv#(86hqf#!fYU!2ft zMavSfVkuqyj&O~{9EoPcxe~k5^-tm0#FP{QFUD^#v_us5St)h|?h@J$TIMhCfeD%T zUqQV}X4H^m3Y?-_kSZPrSr8AEBE}n(GC?z|hN&}2KcW%l=CEQIHgNQSM;1;*DJ|ej(;819soQ z!=K;vGWT&F)Xi%m0{SMf!^Yt5lUvem0??V78^GIVK=4v9j$o6eEB(;!a4OPU`lSnY zpML!UJ!71E4G?=?(J&B2+g$|S>^>6OPGg+W`GG~yyYd809x{(}y1oR17B>0Nu7mF+drtU=OmgyC4?wurWfA6%2JDGH|$$|~)5T>GE<(wR(AMPMBL{e|S>?f(oPQEvew7Q<2 zT%})00bdK$Q>1EJ>f8YJA?=|O08|OzspNC$u1GQ#QzGQuYsSqd3`*(%^XQE&VbV#a z#kgAQrqVExg?sQodLqt#I$fX=Gkztvt_v$Zh-;_wX+{*Dtw>UvicB|7&j5 zuetU-*ZwQ6^&>}PwtmLZIPGZ6ZOJ=U4BOx@T3nx6?0oGk2ZG?0#WhQhpVrp%zTa^m VpFZxV63C}pw(vXeaUg8#{uklHTNVHS diff --git a/neural_networks/transformers/__pycache__/t5_model.cpython-312.pyc b/neural_networks/transformers/__pycache__/t5_model.cpython-312.pyc deleted file mode 100644 index d14ef7a3465979545ce184759522645ab1e9f194..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13240 zcmdU0eQ+Dcb-x1+-yi`HASr^B1b&K;WXiNieNmRokdmS=+6pBrqQbx+?nnXx0s8J} zi4@SXu{(lNwIb`dq!YWL>`X*fZbH|dDLYLYJ8q|*N&g@W%0Mnk<4!WuXr_P2qNk2D z`J?ac;SK-=+3uwMqdVZcy}jMNz1{cmdvBLNEh#Zmkopoc!LfA|^((wll1|RhtN#g^ z8R`_pQhgLlvzjnHqUqDnnAe82eOhv5`WSN6_37Zsg!LnaJ_D(v3mZpFeWnp}pLxX6 zXQ3$##W5jx+uFpH+@R&MzEW1t8dxK1V$H8GeKyv@*;(secsNY4C9l(c4$h#V4pD6B z3lwX+p^;nN$adIQMxNQ>nL~Nj=OkriQ07$2T%^neW#wvF`MB0wF|qw*yJsZIa$%1U z92pDy#b7kz8I1BCfqQn0iv+kw#ONbR-Xr>jb3)5)e7%>JEdEF&Dv~Bb58RtrzvvHy z{es}tO3bmKAWBSkFd$0CCr7bge|W4ES{{HG@czhq*Zo0Zk7ukzF5-ky|9fvn!+Sgv zC7ok~o^EbT6avF|dpP0=ivDoWABpbqco|7E$V(bdkTfhOX@cTJDZb>aAo5#1K>DRp z-?7e~L%yfJa^T?MuO8^#=Ycm!v|rNrBk(6mngM7xFb?gb&>qTy&{uR2{va<#C9R+5 zBrO|cB`wT^q#gD{ZrCqK+7bV_q!owZ>3O(i1~@JvF>DmZ!VL1!5s4Z0KgUVTc@8>Y z&IiL`q~fro8{mSGAxRgAj)hrCHxe8g7A2i9?1vh<^Q2HO4C6HbV^xz@&e5v_keQ)4 zs*eWN)BuChz>FG>VYSFAECZ~FcRF&X@6(ezL-x)JGH_=Y(@E8Iqol*;fPm<9NV z_l5oA952XaESFyv1c9I=^N9a~FU&=T#Nps%91EU{2K)o0`yQ`ZGWvWG{|M*vNoJo9 zSaU3ld8^O&?3h0+KjB;Ptu~)8j{$u?emj)%?MQYY*@U`vmO-wzjD zsbC~1`g{{*>T6n*vU;eoY63DzWh!bC%%57wXBG@Y50A+(z7LrhO3W%niV~qzP9bEt zH7Q1i3~P$uR#A$IY7ODb*CeG5ODk$HMumuBD6AN=7T)9C1dO(q@@jf++u_b@;2(vH zWB|P|91aeEZeZIby(|QhX_&jf28TFMM>uPQ3%wft5Vl}|zu1C#H2K8nIS{ZQJK@Zm zNTsj|S_#O}lhiF+*|&DibWC^5ir>BX^2P7OQ_e@y&Yel;&XjZa(qk#xzIR;zQvGgq zy7P1rezw!g#?uc@J5CIl_NQUOarHy$#KV(pQl=`TE`{0=;36Hqz<<7PETW1vX!ils`g)0!*I3>7!V4KZ!Vxuz4ijTx1iG z;)e5-m+85UllmVQT|YSV?-MWVANt+G#~&OYcxpdrI9)gj76SDDXgDZJCJwYG$NNQ& zKLITy9Vnh5PUah;=a>;cm=LW1mP+p*9pxgd*OX-?k-GqPgfNt3=0*lMmIXTlhH5k_ z@K0fHS`o|`9h9`saRC7**`p*`hV?$uY{H($c%;M&b%pCdKz{A68S}LHMQhq#m$cWV z>*Pg2K-lZ5*{pG`nt|?chtYW5Rx@FFlDr-t~{ko=huHpKYYg^KF zyOVXhQ#E`3OG`WVW-8ZxqNQBxW_C^Qn(dlvoqP6L`^(4YyOTAKE^c2cf3qW5v+t&J z{~ZeICc4PwBWp$4x;|-LKO4Adt-E7_7v3>bcK5Qe>fT)g)Du8FsdpQ;e5pxapkq*y z3A1+tGhQ-+mVp8}6=Q9d&SPgw3>DDEEpf9lCJ^HrAjV8wbLGcy)u`fZ zJTD72sSTp7@0qAprNu)uJ_wzsk@WsHjIfKuqyX2`rmAEjv2 zzf?agOPr3;mo@m!e?iaCFc=F%jZ=TEJx}vpP{=^YAi|NYtWMzBGvEwdfFx4hHGZpI+hiV@ z+d%1+9H*!CQ}opKjJx4`z4MOw&P96R!0Y{solEqa2bSG?rVPLKG|sWt&s{s0ZhAb~ z^mxj%?^7*Z`2?M*t^pOet~%vzov&PRx27svVN`~2&Fj zdf#!r6Zrf7^v0uao?JY!M1PODZn|cgubuZVG~V<)n%a2uru`TwMi>-mMtAMyBX?{t z>N|EtOPa9Pd%xOvl+55y>Na&(8sBTI>#iv+P?%_`fw$<@e}l{=4dMgA&C)}F5kpY2 zHieMg*Ce#1pa6l&>5I2igb~DCb4in@Y&4>ZAQBo-HX2YibP;8fQ*f$E&M6zP%{j5w zfGBHwhLkb>@5(7Z2>nW0K6;)oaDlM$L3lP%n-kN~Xf#|zNKv@*h1A(HD@=9G(k~rM zI$Q4XxENkt4kySG8HG}Q1RrT-j9`!jfz2PsGqUYx@Wm6Qg#-B#zV#U(ATd9)Rb*V1 zQ|+^D-|cw0BVGMi5`NCdmaUKdx@vPsU{jkqH#0p8jQkzwR!MrE6UTUPtb3vn&dX$R z3_xifyhVz@ZYU)RwWgY~LPM`&`#j$cLQCMgX<6{+;mSaWMvp#Rx9mq3$SHO~fPDBq;an45Y~uYu!f=$U9xm&}9a9()D_ z9vFG@3`3-5k7rY(u&F?ip(O|Bgur!BBrr*!S(u?xg(P@Z$aPM zbKLlS+EXxG%Z!(Ggi`8o)LES?EaCy7YOT$CqPgDm)-M7od^C@(&DK!B3~K zsSAjVj4BVk%+C7f9Iw#H%0n_T`i9Lx%kyOA&p|~#j0D^9Pa{Dnl&6*n3SxvP57E$2 zG#KF{_?)O{qpYG!Wfjf)qGRId7zErzKR?8YK3r42A8JcRKwZJWFvrRWiFkH8ArR#` z!3fXcD@n?_bn>?$7%KeQ<4`s0(y3UeW9jUQ@o>goownB|?X@X;ecJ9#+Px|J7I5+a z4H<7$dgjVgm7DHT8q=1`hZ3C$G2^NOr%k+m@!G}orhQ5H)$dEX_9qTxtgfq1UVd`+ zL#nkyM)?A~%pT{yJ zsOSVllv(}#325#w)3URI#T(ucOGq+vwE~$eUvH1zIrEdBHJ7 zJ3`D3g_EQLtd-Tg$}_wwVh(MxUjjk4oL;4hp}4#@s{R($z*sTO$s$H9_IA}4jAH-| zT@>=+9tK={nag=1f)W#qK&-a7 zp#wP}Z&S_O8R$*u0zyKQA5?7qp7C{ag1KTrR(D;mzE+*8d<5>T58Q(XVYzHscI{qj z`*FwLb==JIXo02FL&s$O?65ljLrX~&3wIW?kTDhA#lE$iIqZDBBQ)J%stM;Uh(Dh9Q1N zpr52ifXlIP<+;J&1%+o=0ey)AooOQBei#!s;ZWE`E%HHaVGM}uLlO?~&UdUBcVe__ z>-5%iS!1#c{Jd1zmV_bWD4(gBu1P!Ul8(B$ohiqbg#MOeW7<)lbkxrsU2I)EyLkK^ z=W^?Tl;dDRU)aUqJan=hIjBmST4Rc%HANc= z^cB}<*MNXiBLtju(34Kk#b7Hfrf~yLKIH3*;Juvue6tinbHLXS=l%{sl7?c)PZ?XW z?^K>a_{#`iYRY~o$YR_SGl_^mNP3QjFSEu>FnSw|-W)gOt)&{g$}3aj(yR_;4!vIp zkEwVwQJSCzHDHa5xd2Lfs&qy^}S;wBYr+OLG%>FqCC+-kGydS!M^)+?R~oCKD~eFr4O5a zV`*{hD?qZSkqMGTNcfUQ#j+k)4T$qWtWS#${MuB^dg8_tLiH^2tt*HN!mbj=2}slp zJl;Xf`;okk1eK$JLxDh{94VB858z11CK1APSD+#~#yKYwOPL`j#M_%)Mlb7ZZ`4`Lu+5smb8Z;gl@048+bUFA-P$VO?%aF_S zHuyh*dlf0P6dP#P(?HEkLRym)pwbi9+U6Q3vkv_(GHW z91c3?X1Lp~x#k>mov*Ye-EH^yX&iSO5U(T89*$tCY7`{{&y9xt0giYbu$v_E5NRcr z5`7WL;_E&6)-|k&({AxWCYPW({!j)*o6Itc$%%^{M84>E?sU=7aAD z?{}q|pGr4Boos%3`E*~Z`Rs?T;9Tpn@0ql3FzFk-=^LIpF>`wQ^h;-!UBQG2$j@x+ zGS13b%dC6Z*|2PFC}!9*4kws!*ZRAZ&eW0U&cL45k?A8Z9!tBLlCGwds~LQv=0`rP zIXKt4==jl=4_Xg=`^0k1!NhSOw_KI8B`H_a8};+tjm?W(GFUf2jixa{72)4J!51%`0P zN||hkQD3$tEGx$5Oj-F%^K>&X)qKUGX3;V4UU9dj%Gwf!Tbnk|?|ePB82Vt-6N&Rv z7iZ1Ow)$n`@pp!R+`C&%m51nicT1`DhiG9PEW*2wmLIHTexfe}`d(wn!6yBC+Zmt* z*cY8qkS==lAjtgZqmrDCe+Y&}oVF%Cq^9c2b#|>CJA*$u<{#D2W~@5_Hhp>i-4~@!bB;B)kf{Q49{%`P+X#mFQLtQVKvY?ikDaJ|$SjOw7rz6Nr{3FFj63kmfpr>A9AH!m&Jb+9wl!VX zo~&zM98T5kPuZV<=v4EzAKDgdOWLL0RC8y#`EauN@cZ@e3#sPbbn}^H^O@zo{#5hV zKXipb&kqFB1Ht4#@aDj|oSqM(o(~f}-89Purrk>osiyrY*At+%U6rsxsLK~mr8al0RPIfoGSAptQxhp$ zEm(nharW@E8ILZs;n9VvW#hVgcb!!E2u;w-yN{Q5Z(x4nECX7gRMmjOGhhZkU#S*^ z-v2LDfa(IODnKhD6Oa_s0<@-zh22Al{?;ToeglTWt%j$-H3RqYTr;qz2}0%!f$JJp z*GsXW6yTYGJli@%vBuZvxGtv#b4n-|P-ji(cqpUHQ%vf?BZSs+cmkDbFEwuVT6!in zbm1A+;NZ9l{P936o_!^;3IfbzU=$odD1`V{GQyD?2xh_YPh*4|g5U<^bYgTg81U*O zv&aR8BT?8(9|u$v2qK^YJT(eIVF6phIiO$w5Eh)N42#3JF*G9?#vqZuHTWY-?1pB>i4D`k3$^9?wmTfT)k_y`@2V9KANuCo~+rv z_-wLf7lhPO_B|le7TZ+E2bK+)+NOEOYgJQwmaTgh%|P(M&TsCQ%6k$A0U&)VFx&WI zXzsD;D4a6_F!c0|>Lj3=u8gy4wtl)}*}5TPTPH_OaCMXf>>%Tl#1|4Jg>}fWA9SPE zq|b@WX&z#MKNG7w45KLaGtNNdK_{ZUAt}f15Q_lCPD~eAQn0>ycz+S0D|RYQ@i754 z=!!)$VsO}07RHz%Vv89<<;weWS7n5`S-WzWfe2!;!#%mPe{xLF@sywUj}G&GIOPUG zK|J_@>qdMxI5Kh$^TlF<58H5&w_}KNBcGL%_lt>NLp`vTynYE;B4a>#` zd9R}jMh&3@R3j5*%1QmR%0cjcdDs09uoBH9z=ypUz?az_7}Nx7_hRVVkFBk?6RSOn zB(5}oZV-aVjsH>2<(||0KSOE3+m?$|TYD5h-VZVr-4FUcwqHW>9+DK2G?K@W>_bBK z_`i)gBXRBXkaZ?qqud-Dc`GD?&&Ngs z@)`EQF*wcuXV`i0j41gilmSmw0jKlFph&W)|2M%2JgON!l*N|_oa^}tP*>WsNc{(WrH)oCV6 zr%G2T9j&=rs^mNS~3dpJ!Ztvjjr)3TF?vE)TpZ3sJ`pfjk6w*&O ObklU>T?#3g&;JIXALYRS