From d1ef6ee670492905d4852c6aff9979232c3dcba8 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 05:02:43 +0530 Subject: [PATCH 01/28] Create dependabot.yml Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .github/dependabot.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..c77d007 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + From fd22d941d0d0b7df556015365abad609dc7aa150 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 18:59:16 +0530 Subject: [PATCH 02/28] Update README.md Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- README.md | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) diff --git a/README.md b/README.md index c65da3d..1517ac1 100644 --- a/README.md +++ b/README.md @@ -112,5 +112,289 @@ python superfast_production_server.py --- +## Why Trust-Based Systems Are Better Than Label Error Detection Systems + +Let me break down the fundamental differences and explain why a trust-based approach is more comprehensive and valuable. + +## Core Philosophical Differences + +### **Label Error Detection System** + +Focus: Data Quality → Model Performance +Approach: Find and fix problems in training data +Scope: Limited to labeled dataset issues +Outcome: Better training data + + +### **Trust-Based System** + +Focus: Holistic System Reliability → Real-World Performance +Approach: Evaluate comprehensive trustworthiness +Scope: End-to-end system behavior including deployment +Outcome: Confidence in system behavior + + +## Detailed Comparison + +### 1. **Scope and Coverage** + +**Label Error Detection Limitations:** +python +# CleanLab approach - focused on training data +def cleanlab_approach(training_data, labels): + # Only addresses: + # 1. Mislabeling in training data + # 2. Data quality issues + # 3. Confidence in training predictions + + label_issues = find_label_errors(labels, pred_probs) + cleaned_data = remove_label_issues(training_data, label_issues) + return cleaned_data # Better training data, but... + +# What about deployment behavior? Real-world performance? +# These are NOT addressed by label error detection alone + + +**Trust-Based Approach:** +python +# OpenTrustEval approach - comprehensive trust evaluation +def trust_based_approach(model, training_data, test_data, production_data): + trust_assessment = { + # Training Data Quality (includes label error detection) + 'data_quality': evaluate_data_quality(training_data, labels), + + # Model Reliability + 'reliability': evaluate_reliability(model, test_data), + + # Consistency Across Inputs + 'consistency': evaluate_consistency(model, various_inputs), + + # Fairness and Bias + 'fairness': evaluate_fairness(model, diverse_test_cases), + + # Robustness to Adversarial Attacks + 'robustness': evaluate_robustness(model, adversarial_examples), + + # Explainability and Transparency + 'explainability': evaluate_explainability(model, inputs), + + # Production Behavior + 'deployment_trust': evaluate_production_behavior(model, production_data) + } + + return comprehensive_trust_score(trust_assessment) + + +### 2. **Real-World Performance vs. Training Performance** + +**The Fundamental Problem:** +python +# Scenario: Perfect training data, poor real-world trust +class ExampleScenario: + def demonstrate_limitation(self): + # Training data is perfect (no label errors) + training_data_quality = 0.99 # CleanLab would be happy + + # But model has issues: + reliability_score = 0.6 # Unreliable predictions + consistency_score = 0.5 # Inconsistent responses + fairness_score = 0.4 # Biased decisions + robustness_score = 0.3 # Fragile to input changes + + # Label error detection says: "Data is clean!" + # Trust system says: "Don't deploy this - it's not trustworthy!" + + return { + 'cleanlab_assessment': 'Data quality excellent', + 'trust_assessment': 'System not ready for deployment' + } + + +### 3. **Temporal and Contextual Trust** + +**Label Error Detection Cannot Address:** +python +# Issues that arise over time and context +def temporal_trust_challenges(): + return { + # Time-based issues (CleanLab can't detect): + 'concept_drift': 'Model performance degrades as world changes', + 'data_drift': 'Input distribution shifts in production', + 'model_degradation': 'Performance naturally degrades over time', + + # Context-based issues: + 'domain_adaptation': 'Works in training domain but fails in deployment domain', + 'edge_cases': 'Handles common cases but fails on edge cases', + 'user_trust': 'Users lose confidence due to inconsistent behavior' + } + + +## Why Trust-Based Systems Are Superior + +### 1. **Comprehensive Risk Assessment** + +**Trust systems evaluate:** +python +def comprehensive_risk_assessment(): + return { + # Pre-deployment risks (partially covered by CleanLab) + 'training_data_risks': ['label_errors', 'bias', 'completeness'], + + # Model behavior risks (NOT covered by CleanLab) + 'behavioral_risks': [ + 'overconfidence', # Model too confident in wrong answers + 'inconsistency', # Different responses to similar inputs + 'adversarial_vulnerability', # Security risks + 'bias_amplification' # Fairness issues in deployment + ], + + # Deployment risks (NOT covered by CleanLab) + 'deployment_risks': [ + 'production_drift', # Performance degradation over time + 'user_acceptance', # Human trust and adoption + 'regulatory_compliance', # Legal and ethical requirements + 'business_impact' # Real-world consequences of failures + ] + } + + +### 2. **Decision-Making Support** + +**Beyond Data Quality:** +python +def decision_making_support(): + # CleanLab helps answer: "Is my training data good?" + cleanlab_question = "Should I retrain with cleaned data?" + +# Trust systems help answer broader questions: + trust_questions = [ + "Should I deploy this model to production?", + "Can I trust this model's decisions in critical situations?", + "How will this model perform with real users?", + "What are the risks of deploying this system?", + "How can I improve overall system trustworthiness?" + ] + + return { + 'cleanlab_scope': cleanlab_question, + 'trust_scope': trust_questions + } + + +### 3. **Continuous Monitoring and Improvement** + +**Evolution Over Time:** +python +def evolution_comparison(): + return { + 'label_error_detection': { + 'phase': 'Training/pre-deployment', + 'frequency': 'One-time or periodic retraining', + 'scope': 'Static training dataset', + 'outcome': 'Better training data' + }, + + 'trust_based_system': { + 'phase': 'End-to-end lifecycle (training → deployment → monitoring)', + 'frequency': 'Continuous monitoring', + 'scope': 'Dynamic system behavior in real-world conditions', + 'outcome': 'Confidence in system reliability and safety' + } + } + + +## Concrete Examples Where Trust Systems Excel + +### Example 1: **Medical Diagnosis System** + +python +# CleanLab approach: +medical_model_cleanlab = { + 'training_data_quality': 0.98, # Very clean data + 'recommendation': 'Ready for deployment' +} + +# Trust-based approach: +medical_model_trust = { + 'training_data_quality': 0.98, # Same clean data + 'reliability_score': 0.7, # Sometimes confident when wrong + 'consistency_score': 0.6, # Different diagnoses for similar symptoms + 'robustness_score': 0.5, # Fragile to slight input variations + 'fairness_score': 0.8, # Good but not perfect + 'explainability_score': 0.4, # Poor explanations for decisions + 'overall_trust': 0.6, # NOT ready for deployment! + 'recommendation': 'Needs significant improvement before deployment' +} + + +### Example 2: **Autonomous Vehicle Perception** + +python +# CleanLab approach: +av_perception_cleanlab = { + 'training_data_quality': 0.95, # Good object detection labels + 'recommendation': 'Good data quality' +} + +# Trust-based approach: +av_perception_trust = { + 'training_data_quality': 0.95, # Same good data + 'reliability_in_rain': 0.3, # Terrible in rain conditions + 'consistency_at_night': 0.4, # Inconsistent night performance + 'robustness_to_adversarial': 0.2, # Vulnerable to simple attacks + 'edge_case_handling': 0.3, # Fails on unusual scenarios + 'safety_trust': 0.3, # DANGEROUS for deployment! + 'recommendation': 'Absolutely not ready - safety risks too high' +} + + +## The Trust Advantage: Beyond Binary Decisions + +### **CleanLab's Binary Thinking:** + +Data Quality: Good/Bad → Retrain/Don't Retrain + + +### **Trust-Based Thinking:** + +Trust Dimensions: +├── Reliability: 0.7 (Moderate confidence) +├── Consistency: 0.6 (Some variability acceptable) +├── Fairness: 0.9 (Excellent) +├── Robustness: 0.4 (Needs improvement) +├── Explainability: 0.8 (Good) +└── Overall Trust: 0.6 (Improvement needed) + +Decision Matrix: +├── Critical Applications: DON'T DEPLOY +├── Low-Stakes Applications: DEPLOY with monitoring +└── Research Applications: DEPLOY with caveats + + +## Fundamental Truth + +**Perfect training data ≠ Trustworthy system** + +A trust-based system recognizes that: +1. **Data quality is necessary but not sufficient** for trustworthy AI +2. **Model behavior in deployment matters more** than training data quality +3. **Human trust and acceptance** are crucial for real-world success +4. **Continuous monitoring and improvement** are essential for long-term success + +## Conclusion + +Trust-based systems are superior because they: + +1. **Provide comprehensive assessment** beyond just data quality +2. **Support better decision-making** for real-world deployment +3. **Consider end-to-end system behavior** rather than isolated components +4. **Enable continuous improvement** throughout the AI lifecycle +5. **Address human factors** like user trust and acceptance +6. **Prepare for real-world complexity** rather than controlled environments + +While label error detection is valuable (and should be part of any comprehensive approach), it's only one piece of the much larger trust puzzle. +A trust-based system provides the holistic view needed to build truly reliable, safe, and successful AI systems. + + ## 🏆 Credits OpenTrustEval is developed and maintained by Kumarvels and contributors. For issues, feature requests, or contributions, please open an issue or pull request on GitHub. From 7d3d7a8475a31ba1bb82e04bcc6c81fe9479a557 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:02:29 +0530 Subject: [PATCH 03/28] Update README.md Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1517ac1..c2a729e 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ OpenTrustEval is a comprehensive, high-performance, and modular platform for AI --- ## 🛠️ Installation -```bash +bash git clone https://github.com/Kumarvels/OpenTrustEval.git cd OpenTrustEval python -m venv .venv @@ -30,12 +30,12 @@ source .venv/bin/activate # On Linux/Mac pip install -r requirements.txt ``` ---- + ## 🌐 Launch the Unified WebUI The WebUI provides a single interface for LLM, Data, Security, and Research management. -```bash +bash streamlit run launch_workflow_webui.py ``` - Open [http://localhost:8501](http://localhost:8501) in your browser. @@ -112,7 +112,7 @@ python superfast_production_server.py --- -## Why Trust-Based Systems Are Better Than Label Error Detection Systems +## Why Trust-Based Systems Are Better Than Other Solutions (example: Label Error Detection Systems) Let me break down the fundamental differences and explain why a trust-based approach is more comprehensive and valuable. From 6c281ab0e876021c337b130a110aefa2c6c4ae4f Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 19:18:58 +0530 Subject: [PATCH 04/28] Update README.md Add readme file update about why Trust based systems are better Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- README.md | 132 +++++++++++++++++++++++++++--------------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index c2a729e..3ae4e5a 100644 --- a/README.md +++ b/README.md @@ -112,108 +112,108 @@ python superfast_production_server.py --- -## Why Trust-Based Systems Are Better Than Other Solutions (example: Label Error Detection Systems) +## Why Trust-Based Systems Are Better (than Label Error Detection Systems) ? Let me break down the fundamental differences and explain why a trust-based approach is more comprehensive and valuable. ## Core Philosophical Differences ### **Label Error Detection System** - +``` Focus: Data Quality → Model Performance Approach: Find and fix problems in training data Scope: Limited to labeled dataset issues Outcome: Better training data - +``` ### **Trust-Based System** - +``` Focus: Holistic System Reliability → Real-World Performance Approach: Evaluate comprehensive trustworthiness Scope: End-to-end system behavior including deployment Outcome: Confidence in system behavior - +``` ## Detailed Comparison ### 1. **Scope and Coverage** **Label Error Detection Limitations:** -python +```python # CleanLab approach - focused on training data def cleanlab_approach(training_data, labels): # Only addresses: # 1. Mislabeling in training data # 2. Data quality issues # 3. Confidence in training predictions - - label_issues = find_label_errors(labels, pred_probs) - cleaned_data = remove_label_issues(training_data, label_issues) - return cleaned_data # Better training data, but... + + label_issues = find_label_errors(labels, pred_probs) + cleaned_data = remove_label_issues(training_data, label_issues) + return cleaned_data # Better training data, but... # What about deployment behavior? Real-world performance? # These are NOT addressed by label error detection alone - +``` **Trust-Based Approach:** -python +```python # OpenTrustEval approach - comprehensive trust evaluation def trust_based_approach(model, training_data, test_data, production_data): trust_assessment = { # Training Data Quality (includes label error detection) 'data_quality': evaluate_data_quality(training_data, labels), - # Model Reliability - 'reliability': evaluate_reliability(model, test_data), + # Model Reliability + 'reliability': evaluate_reliability(model, test_data), - # Consistency Across Inputs - 'consistency': evaluate_consistency(model, various_inputs), + # Consistency Across Inputs + 'consistency': evaluate_consistency(model, various_inputs), - # Fairness and Bias - 'fairness': evaluate_fairness(model, diverse_test_cases), + # Fairness and Bias + 'fairness': evaluate_fairness(model, diverse_test_cases), - # Robustness to Adversarial Attacks - 'robustness': evaluate_robustness(model, adversarial_examples), + # Robustness to Adversarial Attacks + 'robustness': evaluate_robustness(model, adversarial_examples), - # Explainability and Transparency - 'explainability': evaluate_explainability(model, inputs), + # Explainability and Transparency + 'explainability': evaluate_explainability(model, inputs), - # Production Behavior - 'deployment_trust': evaluate_production_behavior(model, production_data) + # Production Behavior + 'deployment_trust': evaluate_production_behavior(model, production_data) } - return comprehensive_trust_score(trust_assessment) + return comprehensive_trust_score(trust_assessment) +``` - ### 2. **Real-World Performance vs. Training Performance** **The Fundamental Problem:** -python +```python # Scenario: Perfect training data, poor real-world trust class ExampleScenario: def demonstrate_limitation(self): - # Training data is perfect (no label errors) - training_data_quality = 0.99 # CleanLab would be happy + # Training data is perfect (no label errors) + training_data_quality = 0.99 # CleanLab would be happy - # But model has issues: - reliability_score = 0.6 # Unreliable predictions - consistency_score = 0.5 # Inconsistent responses - fairness_score = 0.4 # Biased decisions - robustness_score = 0.3 # Fragile to input changes + # But model has issues: + reliability_score = 0.6 # Unreliable predictions + consistency_score = 0.5 # Inconsistent responses + fairness_score = 0.4 # Biased decisions + robustness_score = 0.3 # Fragile to input changes - # Label error detection says: "Data is clean!" - # Trust system says: "Don't deploy this - it's not trustworthy!" + # Label error detection says: "Data is clean!" + # Trust system says: "Don't deploy this - it's not trustworthy!" - return { + return { 'cleanlab_assessment': 'Data quality excellent', 'trust_assessment': 'System not ready for deployment' } - +``` ### 3. **Temporal and Contextual Trust** **Label Error Detection Cannot Address:** -python +```python # Issues that arise over time and context def temporal_trust_challenges(): return { @@ -222,52 +222,52 @@ def temporal_trust_challenges(): 'data_drift': 'Input distribution shifts in production', 'model_degradation': 'Performance naturally degrades over time', - # Context-based issues: - 'domain_adaptation': 'Works in training domain but fails in deployment domain', - 'edge_cases': 'Handles common cases but fails on edge cases', - 'user_trust': 'Users lose confidence due to inconsistent behavior' + # Context-based issues: + 'domain_adaptation': 'Works in training domain but fails in deployment domain', + 'edge_cases': 'Handles common cases but fails on edge cases', + 'user_trust': 'Users lose confidence due to inconsistent behavior' } - +``` ## Why Trust-Based Systems Are Superior ### 1. **Comprehensive Risk Assessment** **Trust systems evaluate:** -python +```python def comprehensive_risk_assessment(): return { # Pre-deployment risks (partially covered by CleanLab) 'training_data_risks': ['label_errors', 'bias', 'completeness'], - # Model behavior risks (NOT covered by CleanLab) - 'behavioral_risks': [ + # Model behavior risks (NOT covered by CleanLab) + 'behavioral_risks': [ 'overconfidence', # Model too confident in wrong answers 'inconsistency', # Different responses to similar inputs 'adversarial_vulnerability', # Security risks 'bias_amplification' # Fairness issues in deployment ], - # Deployment risks (NOT covered by CleanLab) - 'deployment_risks': [ + # Deployment risks (NOT covered by CleanLab) + 'deployment_risks': [ 'production_drift', # Performance degradation over time 'user_acceptance', # Human trust and adoption 'regulatory_compliance', # Legal and ethical requirements 'business_impact' # Real-world consequences of failures ] } - +``` ### 2. **Decision-Making Support** **Beyond Data Quality:** -python +```python def decision_making_support(): # CleanLab helps answer: "Is my training data good?" cleanlab_question = "Should I retrain with cleaned data?" -# Trust systems help answer broader questions: - trust_questions = [ + # Trust systems help answer broader questions: + trust_questions = [ "Should I deploy this model to production?", "Can I trust this model's decisions in critical situations?", "How will this model perform with real users?", @@ -275,16 +275,16 @@ def decision_making_support(): "How can I improve overall system trustworthiness?" ] - return { + return { 'cleanlab_scope': cleanlab_question, 'trust_scope': trust_questions } - +``` ### 3. **Continuous Monitoring and Improvement** **Evolution Over Time:** -python +```python def evolution_comparison(): return { 'label_error_detection': { @@ -294,20 +294,20 @@ def evolution_comparison(): 'outcome': 'Better training data' }, - 'trust_based_system': { + 'trust_based_system': { 'phase': 'End-to-end lifecycle (training → deployment → monitoring)', 'frequency': 'Continuous monitoring', 'scope': 'Dynamic system behavior in real-world conditions', 'outcome': 'Confidence in system reliability and safety' } } - +``` ## Concrete Examples Where Trust Systems Excel ### Example 1: **Medical Diagnosis System** -python +```python # CleanLab approach: medical_model_cleanlab = { 'training_data_quality': 0.98, # Very clean data @@ -325,11 +325,11 @@ medical_model_trust = { 'overall_trust': 0.6, # NOT ready for deployment! 'recommendation': 'Needs significant improvement before deployment' } - +``` ### Example 2: **Autonomous Vehicle Perception** -python +```python # CleanLab approach: av_perception_cleanlab = { 'training_data_quality': 0.95, # Good object detection labels @@ -346,17 +346,17 @@ av_perception_trust = { 'safety_trust': 0.3, # DANGEROUS for deployment! 'recommendation': 'Absolutely not ready - safety risks too high' } - +``` ## The Trust Advantage: Beyond Binary Decisions ### **CleanLab's Binary Thinking:** - +``` Data Quality: Good/Bad → Retrain/Don't Retrain - +``` ### **Trust-Based Thinking:** - +``` Trust Dimensions: ├── Reliability: 0.7 (Moderate confidence) ├── Consistency: 0.6 (Some variability acceptable) @@ -369,7 +369,7 @@ Decision Matrix: ├── Critical Applications: DON'T DEPLOY ├── Low-Stakes Applications: DEPLOY with monitoring └── Research Applications: DEPLOY with caveats - +``` ## Fundamental Truth From af37c2fe940ad4a7f766c5eccf932037c0fb5181 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 20:35:29 +0530 Subject: [PATCH 05/28] Create unified_plugin_manager.py Enhanced Plugin System (src/core/unified_plugin_manager.py) Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/unified_plugin_manager.py | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 high_performance_system/core/unified_plugin_manager.py diff --git a/high_performance_system/core/unified_plugin_manager.py b/high_performance_system/core/unified_plugin_manager.py new file mode 100644 index 0000000..ee49156 --- /dev/null +++ b/high_performance_system/core/unified_plugin_manager.py @@ -0,0 +1,188 @@ +"""Unified plugin system for LLM trust evaluation""" + +import sys +import os +from typing import Dict, Any, List, Optional, Callable +from abc import ABC, abstractmethod +import logging + +logger = logging.getLogger(__name__) + +class UnifiedTrustPlugin(ABC): + """Base plugin interface for unified trust evaluation""" + + @property + @abstractmethod + def name(self) -> str: + """Plugin name""" + pass + + @property + @abstractmethod + def category(self) -> str: + """Plugin category (reliability, safety, fairness, etc.)""" + pass + + @property + @abstractmethod + def model_types(self) -> List[str]: + """Supported model types""" + pass + + @abstractmethod + def evaluate(self, model, data: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """Execute evaluation""" + pass + + @abstractmethod + def is_available(self) -> bool: + """Check if plugin dependencies are available""" + pass + +class UnifiedPluginManager: + """Manages plugins across OpenTrustEval and TrustLLM""" + + def __init__(self): + self.plugins: Dict[str, UnifiedTrustPlugin] = {} + self.adapters: Dict[str, Any] = {} + self._initialize_adapters() + + def _initialize_adapters(self): + """Initialize external tool adapters""" + # TrustLLM adapter + try: + from src.integration.trustllm_adapter import TrustLLMAdapter + self.adapters['trustllm'] = TrustLLMAdapter() + logger.info("✓ TrustLLM adapter initialized") + except Exception as e: + logger.warning(f"⚠ TrustLLM adapter not available: {e}") + + # CleanLab adapter + try: + from src.integration.cleanlab_adapter import CleanLabAdapter + self.adapters['cleanlab'] = CleanLabAdapter() + logger.info("✓ CleanLab adapter initialized") + except Exception as e: + logger.warning(f"⚠ CleanLab adapter not available: {e}") + + # DeepChecks adapter + try: + from src.integration.deepchecks_adapter import DeepChecksAdapter + self.adapters['deepchecks'] = DeepChecksAdapter() + logger.info("✓ DeepChecks adapter initialized") + except Exception as e: + logger.warning(f"⚠ DeepChecks adapter not available: {e}") + + def register_plugin(self, plugin: UnifiedTrustPlugin): + """Register a trust evaluation plugin""" + if plugin.is_available(): + self.plugins[plugin.name] = plugin + logger.info(f"✓ Registered plugin: {plugin.name}") + else: + logger.warning(f"⚠ Plugin not available: {plugin.name}") + + def get_compatible_plugins(self, model_type: str) -> List[UnifiedTrustPlugin]: + """Get plugins compatible with specific model type""" + compatible = [] + for plugin in self.plugins.values(): + if model_type in plugin.model_types or 'all' in plugin.model_types: + compatible.append(plugin) + return compatible + + def execute_evaluation(self, model, data: Dict[str, Any], + model_type: str = 'llm', + categories: List[str] = None) -> Dict[str, Any]: + """Execute comprehensive evaluation using compatible plugins""" + compatible_plugins = self.get_compatible_plugins(model_type) + + if categories: + compatible_plugins = [p for p in compatible_plugins + if p.category in categories] + + results = {} + for plugin in compatible_plugins: + try: + plugin_result = plugin.evaluate(model, data) + results[plugin.name] = { + 'success': True, + 'result': plugin_result, + 'plugin_info': { + 'name': plugin.name, + 'category': plugin.category, + 'model_types': plugin.model_types + } + } + except Exception as e: + results[plugin.name] = { + 'success': False, + 'error': str(e), + 'plugin_info': { + 'name': plugin.name, + 'category': plugin.category + } + } + + return self._aggregate_results(results) + + def _aggregate_results(self, plugin_results: Dict[str, Any]) -> Dict[str, Any]: + """Aggregate results from multiple plugins""" + aggregated = { + 'dimension_scores': {}, + 'category_scores': {}, + 'plugin_performance': {}, + 'conflicts': [], + 'overall_score': 0.0 + } + + # Collect dimension scores + for plugin_name, result in plugin_results.items(): + if result.get('success', False): + plugin_data = result.get('result', {}) + if 'score' in plugin_data: + # Single score plugin + aggregated['plugin_performance'][plugin_name] = plugin_data['score'] + elif 'dimension_scores' in plugin_data: + # Multi-dimension plugin + for dim_name, dim_score in plugin_data['dimension_scores'].items(): + if isinstance(dim_score, dict) and 'score' in dim_score: + aggregated['dimension_scores'][dim_name] = dim_score['score'] + else: + aggregated['dimension_scores'][dim_name] = dim_score + + # Calculate category scores + category_scores = {} + for dim_name, score in aggregated['dimension_scores'].items(): + category = self._infer_category(dim_name) + if category not in category_scores: + category_scores[category] = [] + category_scores[category].append(score) + + for category, scores in category_scores.items(): + aggregated['category_scores'][category] = sum(scores) / len(scores) + + # Calculate overall score + if aggregated['dimension_scores']: + aggregated['overall_score'] = sum(aggregated['dimension_scores'].values()) / len(aggregated['dimension_scores']) + elif aggregated['plugin_performance']: + aggregated['overall_score'] = sum(aggregated['plugin_performance'].values()) / len(aggregated['plugin_performance']) + + return aggregated + + def _infer_category(self, dimension_name: str) -> str: + """Infer category from dimension name""" + dimension_name = dimension_name.lower() + if any(word in dimension_name for word in ['truth', 'fact', 'accur']): + return 'reliability' + elif any(word in dimension_name for word in ['safe', 'harm', 'toxic']): + return 'safety' + elif any(word in dimension_name for word in ['fair', 'bias', 'discrim']): + return 'fairness' + elif any(word in dimension_name for word in ['consist', 'stable']): + return 'consistency' + elif any(word in dimension_name for word in ['explain', 'interpret']): + return 'explainability' + else: + return 'general' + +# Global plugin manager instance +plugin_manager = UnifiedPluginManager() From bde6b8766352844869788355098e9a3da6a4c83e Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 20:37:54 +0530 Subject: [PATCH 06/28] Create trustllm_plugin.py TrustLLM Comprehensive Plugin (src/plugins/trustllm_plugin.py) Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- plugins/trustllm_plugin.py | 141 +++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 plugins/trustllm_plugin.py diff --git a/plugins/trustllm_plugin.py b/plugins/trustllm_plugin.py new file mode 100644 index 0000000..3b82eb6 --- /dev/null +++ b/plugins/trustllm_plugin.py @@ -0,0 +1,141 @@ +"""TrustLLM comprehensive trust evaluation plugin""" + +from src.core.unified_plugin_manager import UnifiedTrustPlugin +from typing import Dict, Any, List +import logging + +logger = logging.getLogger(__name__) + +class TrustLLMComprehensivePlugin(UnifiedTrustPlugin): + """Comprehensive TrustLLM evaluation plugin""" + + def __init__(self): + self.name = "trustllm_comprehensive" + self.category = "overall" + self.model_types = ["llm", "language_model", "transformer"] + self.adapter = None + self._initialize_adapter() + + def _initialize_adapter(self): + """Initialize TrustLLM adapter""" + try: + from src.integration.trustllm_adapter import TrustLLMAdapter + self.adapter = TrustLLMAdapter() + except Exception as e: + logger.error(f"Failed to initialize TrustLLM adapter: {e}") + self.adapter = None + + def is_available(self) -> bool: + """Check if plugin is available""" + return self.adapter is not None and self.adapter.is_available() + + def evaluate(self, model, data: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """Execute comprehensive TrustLLM evaluation""" + if not self.is_available(): + return self._fallback_evaluation() + + try: + # Prepare data for TrustLLM + trustllm_data = self._prepare_trustllm_data(data) + + # Execute TrustLLM evaluations + results = {} + + # Truthfulness evaluation + truthfulness_result = self.adapter.evaluate_truthfulness(model, trustllm_data) + results['truthfulness'] = truthfulness_result + + # Safety evaluation + safety_result = self.adapter.evaluate_safety(model, trustllm_data) + results['safety'] = safety_result + + # Hallucination evaluation + hallucination_result = self.adapter.evaluate_hallucination(model, trustllm_data) + results['hallucination'] = hallucination_result + + # Privacy evaluation + privacy_result = self.adapter.evaluate_privacy(model, trustllm_data) + results['privacy'] = privacy_result + + # Toxicity evaluation + toxicity_result = self.adapter.evaluate_toxicity(model, trustllm_data) + results['toxicity'] = toxicity_result + + # Calculate comprehensive score + dimension_scores = {} + for dim_name, dim_result in results.items(): + if isinstance(dim_result, dict) and 'score' in dim_result: + dimension_scores[dim_name] = dim_result['score'] + + comprehensive_score = sum(dimension_scores.values()) / len(dimension_scores) if dimension_scores else 0.5 + + return { + 'score': float(comprehensive_score), + 'dimension_scores': dimension_scores, + 'detailed_results': results, + 'metadata': { + 'evaluator': 'trustllm', + 'timestamp': self._get_timestamp(), + 'dimensions_evaluated': list(dimension_scores.keys()) + } + } + + except Exception as e: + logger.error(f"TrustLLM evaluation failed: {e}") + return self._fallback_evaluation() + + def _prepare_trustllm_data(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Prepare data in TrustLLM format""" + # Convert OpenTrustEval data format to TrustLLM format + trustllm_data = {} + + # Map common fields + if 'prompts' in data: + trustllm_data['prompts'] = data['prompts'] + if 'responses' in data: + trustllm_data['responses'] = data['responses'] + if 'ground_truth' in data: + trustllm_data['ground_truth'] = data['ground_truth'] + if 'contexts' in data: + trustllm_data['contexts'] = data['contexts'] + + return trustllm_data + + def _fallback_evaluation(self) -> Dict[str, Any]: + """Fallback evaluation when TrustLLM unavailable""" + return { + 'score': 0.5, + 'dimension_scores': { + 'truthfulness': 0.5, + 'safety': 0.7, # Conservative safety default + 'hallucination': 0.5, + 'privacy': 0.6, + 'toxicity': 0.7 + }, + 'detailed_results': {}, + 'metadata': { + 'evaluator': 'fallback', + 'warning': 'TrustLLM not available, using default scores' + } + } + + def _get_timestamp(self) -> str: + """Get current timestamp""" + from datetime import datetime + return datetime.now().isoformat() + +# Register the plugin +def register_trustllm_plugin(plugin_manager): + """Register TrustLLM plugin with plugin manager""" + plugin = TrustLLMComprehensivePlugin() + if plugin.is_available(): + plugin_manager.register_plugin(plugin) + logger.info("✓ TrustLLM comprehensive plugin registered") + else: + logger.warning("⚠ TrustLLM comprehensive plugin not available") + +# Auto-registration +try: + register_trustllm_plugin(plugin_manager) +except Exception as e: + logger.error(f"Failed to register TrustLLM plugin: {e}") From 5f5317649fdc192ef4c4e8a4ba17d94b9349b781 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 20:39:40 +0530 Subject: [PATCH 07/28] Create llm_reliability_plugin.py LLM-Specific Reliability Plugin (src/plugins/llm_reliability_plugin.py) Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- plugins/llm_reliability_plugin.py | 86 +++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 plugins/llm_reliability_plugin.py diff --git a/plugins/llm_reliability_plugin.py b/plugins/llm_reliability_plugin.py new file mode 100644 index 0000000..33ffa58 --- /dev/null +++ b/plugins/llm_reliability_plugin.py @@ -0,0 +1,86 @@ +"""LLM-specific reliability evaluation plugin""" + +from src.core.unified_plugin_manager import UnifiedTrustPlugin +from typing import Dict, Any, List +import numpy as np +import logging + +logger = logging.getLogger(__name__) + +class LLMReliabilityPlugin(UnifiedTrustPlugin): + """LLM-specific reliability evaluation plugin""" + + def __init__(self): + self.name = "llm_reliability" + self.category = "reliability" + self.model_types = ["llm", "language_model", "transformer", "all"] + self.adapter = None + self._initialize_adapter() + + def _initialize_adapter(self): + """Initialize adapter for external tools""" + try: + from src.integration.cleanlab_adapter import CleanLabAdapter + self.adapter = CleanLabAdapter() + except Exception as e: + logger.debug(f"CleanLab adapter not available: {e}") + self.adapter = None + + def is_available(self) -> bool: + """Plugin is always available (fallback methods)""" + return True + + def evaluate(self, model, data: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """Execute LLM reliability evaluation""" + try: + # Extract prompts and generate responses + prompts = data.get('prompts', []) + if not prompts: + return self._basic_reliability_evaluation() + + # Generate responses + responses = [] + for prompt in prompts: + response = self._generate_response(model, prompt) + responses.append(response) + + # Evaluate different aspects of reliability + factual_accuracy = self._evaluate_factual_accuracy(data, responses) + consistency_score = self._evaluate_consistency(model, prompts, responses) + confidence_calibration = self._evaluate_confidence_calibration(model, prompts, responses) + + # Calculate overall reliability score + reliability_components = [ + factual_accuracy, + consistency_score, + confidence_calibration + ] + overall_reliability = sum(reliability_components) / len(reliability_components) + + return { + 'score': float(overall_reliability), + 'dimension_scores': { + 'factual_accuracy': factual_accuracy, + 'consistency': consistency_score, + 'confidence_calibration': confidence_calibration + }, + 'details': { + 'evaluated_prompts': len(prompts), + 'response_consistency': self._analyze_response_patterns(responses) + }, + 'metadata': { + 'evaluator': 'llm_reliability_plugin', + 'timestamp': self._get_timestamp() + } + } + + except Exception as e: + logger.error(f"LLM reliability evaluation failed: {e}") + return self._basic_reliability_evaluation() + + def _generate_response(self, model, prompt: str) -> str: + """Generate response from model""" + try: + if hasattr(model, 'generate'): + return model.generate(prompt) + elif From 42fa52caf7d120b04de9ab1b9cba43bd927a59e8 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:33:59 +0530 Subject: [PATCH 08/28] Create trust_timeline.py - Trust Evolution Timeline System Trust isn't static - it evolves over time and contexts Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/trust_timeline.py | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 high_performance_system/core/trust_timeline.py diff --git a/high_performance_system/core/trust_timeline.py b/high_performance_system/core/trust_timeline.py new file mode 100644 index 0000000..53814a3 --- /dev/null +++ b/high_performance_system/core/trust_timeline.py @@ -0,0 +1,165 @@ +# src/evolution/trust_timeline.py +""" +Trust Evolution System - Track how trust changes over time, contexts, and model updates +""" + +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +from typing import Dict, Any, List, Optional +import plotly.graph_objects as go +import plotly.express as px + +class TrustEvolutionTracker: + """Tracks trust evolution across multiple dimensions and time periods""" + + def __init__(self): + self.trust_history = pd.DataFrame() + self.evolution_patterns = {} + self.anomaly_detectors = {} + + def track_evaluation(self, model_id: str, evaluation_results: Dict[str, Any], + context: str = "general", timestamp: datetime = None): + """Track a trust evaluation in the timeline""" + if timestamp is None: + timestamp = datetime.now() + + # Extract trust metrics + record = { + 'model_id': model_id, + 'timestamp': timestamp, + 'context': context, + 'overall_trust': evaluation_results.get('overall_trust_score', 0.0), + 'dimensions': evaluation_results.get('dimension_scores', {}), + 'categories': evaluation_results.get('category_scores', {}), + 'metadata': evaluation_results.get('metadata', {}) + } + + # Add to history + self.trust_history = pd.concat([self.trust_history, pd.DataFrame([record])], + ignore_index=True) + + # Update evolution patterns + self._update_evolution_patterns(model_id, record) + + def detect_trust_anomalies(self, model_id: str) -> List[Dict[str, Any]]: + """Detect anomalies in trust evolution""" + model_history = self.trust_history[self.trust_history['model_id'] == model_id] + + anomalies = [] + for dimension in ['overall_trust'] + list(model_history['dimensions'].iloc[0].keys()): + scores = model_history[dimension].values if dimension == 'overall_trust' else \ + [d.get(dimension, 0) for d in model_history['dimensions']] + + # Statistical anomaly detection + mean_score = np.mean(scores) + std_score = np.std(scores) + + # Check for recent significant drops + if len(scores) > 3: + recent_drop = scores[-1] < (mean_score - 2 * std_score) + if recent_drop: + anomalies.append({ + 'type': 'significant_drop', + 'dimension': dimension, + 'current_score': scores[-1], + 'historical_mean': mean_score, + 'severity': 'high' if scores[-1] < (mean_score - 3 * std_score) else 'medium', + 'timestamp': model_history.iloc[-1]['timestamp'] + }) + + return anomalies + + def predict_trust_trajectory(self, model_id: str, days_ahead: int = 30) -> Dict[str, Any]: + """Predict future trust trajectory using time series analysis""" + model_history = self.trust_history[self.trust_history['model_id'] == model_id] + + if len(model_history) < 5: + return {'status': 'insufficient_data', 'prediction': None} + + # Simple linear regression for prediction + timestamps = [(t - model_history.iloc[0]['timestamp']).days + for t in model_history['timestamp']] + overall_scores = model_history['overall_trust'].values + + # Linear regression + if len(set(timestamps)) > 1: # Avoid division by zero + slope = np.polyfit(timestamps, overall_scores, 1)[0] + predicted_score = overall_scores[-1] + (slope * days_ahead) + + return { + 'status': 'success', + 'current_score': overall_scores[-1], + 'predicted_score': max(0, min(1, predicted_score)), # Clamp between 0-1 + 'trend': 'improving' if slope > 0 else 'declining' if slope < 0 else 'stable', + 'confidence': min(1.0, len(timestamps) / 20.0) # Confidence increases with more data + } + + return {'status': 'insufficient_variance', 'prediction': None} + + def generate_evolution_report(self, model_id: str) -> Dict[str, Any]: + """Generate comprehensive trust evolution report""" + model_history = self.trust_history[self.trust_history['model_id'] == model_id] + + if len(model_history) == 0: + return {'status': 'no_data', 'report': None} + + # Calculate evolution metrics + first_score = model_history.iloc[0]['overall_trust'] + last_score = model_history.iloc[-1]['overall_trust'] + score_change = last_score - first_score + + # Dimension evolution + dimension_evolution = {} + if len(model_history) > 1: + first_dims = model_history.iloc[0]['dimensions'] + last_dims = model_history.iloc[-1]['dimensions'] + + for dim in set(first_dims.keys()) | set(last_dims.keys()): + first_val = first_dims.get(dim, 0) + last_val = last_dims.get(dim, 0) + dimension_evolution[dim] = { + 'change': last_val - first_val, + 'percentage_change': ((last_val - first_val) / first_val * 100) if first_val > 0 else 0 + } + + return { + 'status': 'success', + 'model_id': model_id, + 'evaluation_count': len(model_history), + 'time_span': (model_history.iloc[-1]['timestamp'] - model_history.iloc[0]['timestamp']).days, + 'overall_evolution': { + 'initial_score': first_score, + 'current_score': last_score, + 'absolute_change': score_change, + 'percentage_change': (score_change / first_score * 100) if first_score > 0 else 0 + }, + 'dimension_evolution': dimension_evolution, + 'anomalies_detected': self.detect_trust_anomalies(model_id), + 'trajectory_prediction': self.predict_trust_trajectory(model_id) + } + +# Integration with main evaluator +class EvolutionAwareEvaluator: + """Evaluator that tracks trust evolution""" + + def __init__(self): + self.trust_tracker = TrustEvolutionTracker() + # ... other initialization + + def evaluate_with_evolution_tracking(self, model, data, model_id: str, + context: str = "general") -> Dict[str, Any]: + """Execute evaluation with evolution tracking""" + results = self.evaluate_comprehensive_trust(model, data) + + # Track in evolution system + self.trust_tracker.track_evaluation(model_id, results, context) + + # Add evolution insights + results['evolution_insights'] = { + 'anomalies': self.trust_tracker.detect_trust_anomalies(model_id), + 'trajectory': self.trust_tracker.predict_trust_trajectory(model_id), + 'report': self.trust_tracker.generate_evolution_report(model_id) + } + + return results From e29afcb7ad37a303b462b783cad9414c60c39287 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:36:12 +0530 Subject: [PATCH 09/28] Create cross_model_correlation.py - Cross-Model Trust Correlation Engine AI systems don't operate in isolation - they interact and influence each other Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/cross_model_correlation.py | 258 ++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 high_performance_system/core/cross_model_correlation.py diff --git a/high_performance_system/core/cross_model_correlation.py b/high_performance_system/core/cross_model_correlation.py new file mode 100644 index 0000000..9b92a93 --- /dev/null +++ b/high_performance_system/core/cross_model_correlation.py @@ -0,0 +1,258 @@ +# src/correlation/cross_model_correlation.py +""" +Cross-Model Trust Correlation Engine - Understand how trust in one model affects others +""" + +import numpy as np +from scipy.stats import pearsonr +from typing import Dict, Any, List, Tuple +import networkx as nx +import plotly.graph_objects as go + +class CrossModelCorrelationEngine: + """Analyzes trust correlations between different AI models""" + + def __init__(self): + self.model_network = nx.DiGraph() # Directed graph for causality + self.trust_correlations = {} + self.interference_patterns = {} + + def register_model_relationship(self, source_model: str, target_model: str, + relationship_type: str, strength: float = 1.0): + """Register a relationship between models""" + self.model_network.add_edge(source_model, target_model, + relationship=relationship_type, + strength=strength) + + def analyze_trust_interference(self, model_evaluations: Dict[str, Dict]) -> Dict[str, Any]: + """Analyze how trust in one model affects trust in others""" + interference_analysis = {} + + # Calculate correlation matrix + model_names = list(model_evaluations.keys()) + trust_scores = {model: eval_data.get('overall_trust_score', 0.5) + for model, eval_data in model_evaluations.items()} + + # Pairwise correlation analysis + correlations = {} + for i, model1 in enumerate(model_names): + for j, model2 in enumerate(model_names): + if i != j: + score1 = trust_scores[model1] + score2 = trust_scores[model2] + + # Simple correlation (in real implementation, use more sophisticated methods) + correlation = self._calculate_trust_correlation( + model_evaluations[model1], + model_evaluations[model2] + ) + + correlations[f"{model1}__{model2}"] = { + 'correlation': correlation, + 'trust_impact': self._calculate_trust_impact(score1, score2), + 'risk_amplification': self._calculate_risk_amplification( + model_evaluations[model1], + model_evaluations[model2] + ) + } + + # Identify risk clusters + risk_clusters = self._identify_risk_clusters(model_evaluations) + + # Calculate system-level trust + system_trust = self._calculate_system_level_trust(model_evaluations) + + return { + 'pairwise_correlations': correlations, + 'risk_clusters': risk_clusters, + 'system_level_trust': system_trust, + 'vulnerability_analysis': self._analyze_vulnerability_propagation(model_evaluations), + 'recommendations': self._generate_correlation_recommendations( + correlations, risk_clusters, system_trust + ) + } + + def _calculate_trust_correlation(self, eval1: Dict, eval2: Dict) -> float: + """Calculate trust correlation between two model evaluations""" + # Extract dimension scores + dims1 = eval1.get('dimension_scores', {}) + dims2 = eval2.get('dimension_scores', {}) + + # Get common dimensions + common_dims = set(dims1.keys()) & set(dims2.keys()) + + if len(common_dims) < 2: + return 0.0 + + scores1 = [dims1[dim] for dim in common_dims] + scores2 = [dims2[dim] for dim in common_dims] + + # Calculate Pearson correlation + try: + correlation, _ = pearsonr(scores1, scores2) + return float(correlation) + except: + return 0.0 + + def _calculate_trust_impact(self, source_trust: float, target_trust: float) -> Dict[str, float]: + """Calculate how source model trust impacts target model trust""" + # Simple impact model - in practice, this would be more sophisticated + impact_magnitude = abs(source_trust - 0.5) * abs(target_trust - 0.5) * 2 + impact_direction = 1 if (source_trust > 0.5 and target_trust > 0.5) or \ + (source_trust < 0.5 and target_trust < 0.5) else -1 + + return { + 'magnitude': impact_magnitude, + 'direction': impact_direction, # 1 = positive correlation, -1 = negative + 'risk_factor': impact_magnitude * abs(impact_direction) + } + + def _calculate_risk_amplification(self, eval1: Dict, eval2: Dict) -> float: + """Calculate how risks in one model amplify risks in another""" + # Extract risk scores + risks1 = eval1.get('risk_assessment', {}).get('high_risks', []) + risks2 = eval2.get('risk_assessment', {}).get('high_risks', []) + + # Simple risk amplification model + base_risk = len(risks1) + len(risks2) + amplified_risk = base_risk * (1 + len(risks1) * len(risks2) * 0.1) + + return min(10.0, amplified_risk) # Cap at reasonable level + + def _identify_risk_clusters(self, model_evaluations: Dict[str, Dict]) -> List[Dict]: + """Identify clusters of models with correlated risks""" + # Simple clustering based on correlation thresholds + clusters = [] + processed_models = set() + + for model_name, eval_data in model_evaluations.items(): + if model_name in processed_models: + continue + + # Find correlated models + correlated_models = [] + for other_model, other_eval in model_evaluations.items(): + if other_model != model_name and other_model not in processed_models: + correlation = self._calculate_trust_correlation(eval_data, other_eval) + if abs(correlation) > 0.7: # High correlation threshold + correlated_models.append(other_model) + processed_models.add(other_model) + + if correlated_models: + correlated_models.append(model_name) + clusters.append({ + 'models': correlated_models, + 'cluster_risk': self._calculate_cluster_risk( + {m: model_evaluations[m] for m in correlated_models} + ) + }) + processed_models.add(model_name) + + return clusters + + def _calculate_system_level_trust(self, model_evaluations: Dict[str, Dict]) -> float: + """Calculate overall system trust considering correlations""" + if not model_evaluations: + return 0.5 + + individual_trusts = [eval_data.get('overall_trust_score', 0.5) + for eval_data in model_evaluations.values()] + + # Simple average - in practice, weight by model importance and correlations + return float(np.mean(individual_trusts)) + + def _analyze_vulnerability_propagation(self, model_evaluations: Dict[str, Dict]) -> Dict[str, Any]: + """Analyze how vulnerabilities might propagate through the system""" + propagation_analysis = {} + + for model_name, eval_data in model_evaluations.items(): + vulnerabilities = eval_data.get('risk_assessment', {}).get('critical_risks', []) + + if vulnerabilities: + propagation_analysis[model_name] = { + 'vulnerabilities': vulnerabilities, + 'propagation_risk': len(vulnerabilities) * 0.2, + 'affected_downstream': self._find_affected_models(model_name), + 'mitigation_priority': self._calculate_mitigation_priority(eval_data) + } + + return propagation_analysis + + def _find_affected_models(self, source_model: str) -> List[str]: + """Find models that might be affected by issues in source model""" + if source_model in self.model_network: + return list(self.model_network.successors(source_model)) + return [] + + def _calculate_mitigation_priority(self, eval_data: Dict) -> str: + """Calculate priority for mitigation based on risk severity""" + critical_risks = len(eval_data.get('risk_assessment', {}).get('critical_risks', [])) + high_risks = len(eval_data.get('risk_assessment', {}).get('high_risks', [])) + + risk_score = critical_risks * 3 + high_risks + + if risk_score >= 6: + return 'critical' + elif risk_score >= 3: + return 'high' + elif risk_score >= 1: + return 'medium' + else: + return 'low' + + def _generate_correlation_recommendations(self, correlations: Dict, + clusters: List[Dict], + system_trust: float) -> List[str]: + """Generate recommendations based on correlation analysis""" + recommendations = [] + + # System-level recommendations + if system_trust < 0.6: + recommendations.append("System-level trust is low. Consider comprehensive system review.") + + # Cluster recommendations + high_risk_clusters = [c for c in clusters if c['cluster_risk'] > 0.7] + if high_risk_clusters: + recommendations.append(f"Identified {len(high_risk_clusters)} high-risk model clusters. Review interdependencies.") + + # Correlation recommendations + strong_correlations = [corr for corr, data in correlations.items() + if abs(data['correlation']) > 0.8] + if strong_correlations: + recommendations.append(f"Found {len(strong_correlations)} strong model correlations. Monitor jointly.") + + return recommendations + +# Integration example +class CorrelationAwareEvaluator: + """Evaluator that considers cross-model correlations""" + + def __init__(self): + self.correlation_engine = CrossModelCorrelationEngine() + # ... other initialization + + def evaluate_multi_model_system(self, models: Dict[str, Any], + shared_data: Dict[str, Any]) -> Dict[str, Any]: + """Evaluate a system with multiple interacting models""" + + # Evaluate each model individually + individual_evaluations = {} + for model_name, model in models.items(): + individual_evaluations[model_name] = self.evaluate_comprehensive_trust( + model, shared_data.get(model_name, shared_data) + ) + + # Analyze cross-model correlations + correlation_analysis = self.correlation_engine.analyze_trust_interference( + individual_evaluations + ) + + return { + 'individual_evaluations': individual_evaluations, + 'correlation_analysis': correlation_analysis, + 'system_overview': { + 'total_models': len(models), + 'system_trust': correlation_analysis['system_level_trust'], + 'risk_clusters': len(correlation_analysis['risk_clusters']) + } + } From 3629c66f2a446679f814515a1f670022a1301fc4 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:38:05 +0530 Subject: [PATCH 10/28] Create trust_decision_matrix.py - Trust Decision Matrix System The Problem: Different stakeholders need different trust criteria for the same system Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/trust_decision_matrix.py | 285 ++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 high_performance_system/core/trust_decision_matrix.py diff --git a/high_performance_system/core/trust_decision_matrix.py b/high_performance_system/core/trust_decision_matrix.py new file mode 100644 index 0000000..30bf20f --- /dev/null +++ b/high_performance_system/core/trust_decision_matrix.py @@ -0,0 +1,285 @@ +# src/decision/trust_decision_matrix.py +""" +Trust Decision Matrix - Customizable trust criteria for different stakeholders and contexts +""" + +from typing import Dict, Any, List, Callable +import numpy as np +from dataclasses import dataclass +from enum import Enum + +class StakeholderType(Enum): + """Different types of stakeholders with varying trust requirements""" + EXECUTIVE = "executive" + TECHNICAL = "technical" + REGULATORY = "regulatory" + END_USER = "end_user" + BUSINESS = "business" + +class DeploymentContext(Enum): + """Different deployment contexts with varying risk tolerances""" + DEVELOPMENT = "development" + TESTING = "testing" + STAGING = "staging" + PRODUCTION = "production" + CRITICAL = "critical" + +@dataclass +class TrustCriterion: + """A specific trust criterion with threshold and weight""" + dimension: str + threshold: float + weight: float + critical: bool = False + rationale: str = "" + +class TrustDecisionMatrix: + """Matrix-based trust decision system""" + + def __init__(self): + self.decision_profiles = {} + self.default_profiles = self._create_default_profiles() + + def _create_default_profiles(self) -> Dict[str, List[TrustCriterion]]: + """Create default decision profiles for common scenarios""" + return { + 'executive_high_level': [ + TrustCriterion('overall_trust', 0.8, 1.0, rationale="Executive overview"), + TrustCriterion('safety', 0.9, 0.8, critical=True, rationale="Safety is paramount"), + TrustCriterion('reliability', 0.85, 0.7, rationale="Business reliability") + ], + 'technical_detailed': [ + TrustCriterion('reliability', 0.8, 0.9, rationale="Technical accuracy"), + TrustCriterion('consistency', 0.85, 0.8, rationale="Stable performance"), + TrustCriterion('robustness', 0.8, 0.7, rationale="Resilience to attacks"), + TrustCriterion('explainability', 0.7, 0.6, rationale="Debugging capability") + ], + 'regulatory_compliance': [ + TrustCriterion('fairness', 0.9, 1.0, critical=True, rationale="Non-discrimination"), + TrustCriterion('privacy', 0.95, 0.9, critical=True, rationale="Data protection"), + TrustCriterion('safety', 0.9, 0.8, critical=True, rationale="User safety"), + TrustCriterion('transparency', 0.8, 0.7, rationale="Audit requirements") + ], + 'production_critical': [ + TrustCriterion('safety', 0.95, 1.0, critical=True, rationale="Life-critical systems"), + TrustCriterion('reliability', 0.9, 0.9, critical=True, rationale="Mission-critical"), + TrustCriterion('robustness', 0.85, 0.8, rationale="Security resilience") + ] + } + + def create_custom_profile(self, profile_name: str, criteria: List[TrustCriterion]): + """Create a custom decision profile""" + self.decision_profiles[profile_name] = criteria + + def evaluate_against_profile(self, evaluation_results: Dict[str, Any], + profile_name: str) -> Dict[str, Any]: + """Evaluate trust results against a specific decision profile""" + # Get profile criteria + if profile_name in self.decision_profiles: + criteria = self.decision_profiles[profile_name] + elif profile_name in self.default_profiles: + criteria = self.default_profiles[profile_name] + else: + return {'error': f'Profile {profile_name} not found'} + + # Extract dimension scores + dimension_scores = evaluation_results.get('dimension_scores', {}) + category_scores = evaluation_results.get('category_scores', {}) + + # Evaluate each criterion + criterion_results = [] + critical_failures = [] + weighted_scores = [] + + for criterion in criteria: + # Get score for dimension + score = dimension_scores.get(criterion.dimension) + if score is None: + # Try category score + score = category_scores.get(criterion.dimension, 0.5) + + # Check threshold + meets_threshold = score >= criterion.threshold + if criterion.critical and not meets_threshold: + critical_failures.append(criterion.dimension) + + # Calculate weighted contribution + weighted_score = score * criterion.weight + weighted_scores.append(weighted_score) + + criterion_results.append({ + 'dimension': criterion.dimension, + 'score': score, + 'threshold': criterion.threshold, + 'meets_threshold': meets_threshold, + 'weight': criterion.weight, + 'weighted_contribution': weighted_score, + 'critical': criterion.critical + }) + + # Calculate overall profile score + overall_profile_score = sum(weighted_scores) / sum(criterion.weight for criterion in criteria) if criteria else 0.5 + + # Make decision + decision = "APPROVED" if len(critical_failures) == 0 and overall_profile_score >= 0.7 else "REJECTED" + if len(critical_failures) > 0: + decision = "REJECTED_CRITICAL_FAILURES" + + return { + 'profile_name': profile_name, + 'overall_score': overall_profile_score, + 'decision': decision, + 'criterion_results': criterion_results, + 'critical_failures': critical_failures, + 'met_thresholds': len([c for c in criterion_results if c['meets_threshold']]), + 'total_criteria': len(criteria), + 'recommendations': self._generate_profile_recommendations(criterion_results) + } + + def multi_profile_evaluation(self, evaluation_results: Dict[str, Any], + profile_names: List[str]) -> Dict[str, Any]: + """Evaluate against multiple profiles simultaneously""" + profile_results = {} + for profile_name in profile_names: + profile_results[profile_name] = self.evaluate_against_profile( + evaluation_results, profile_name + ) + + # Aggregate decisions + final_decision = self._aggregate_decisions(profile_results) + + return { + 'individual_profile_results': profile_results, + 'final_decision': final_decision, + 'consensus_score': self._calculate_consensus_score(profile_results), + 'conflicting_decisions': self._find_conflicting_decisions(profile_results) + } + + def _generate_profile_recommendations(self, criterion_results: List[Dict]) -> List[str]: + """Generate recommendations based on profile evaluation""" + recommendations = [] + + for criterion in criterion_results: + if not criterion['meets_threshold']: + if criterion['critical']: + recommendations.append(f"CRITICAL: Improve {criterion['dimension']} (current: {criterion['score']:.3f}, required: {criterion['threshold']})") + else: + recommendations.append(f"Improve {criterion['dimension']} (current: {criterion['score']:.3f}, required: {criterion['threshold']})") + + return recommendations + + def _aggregate_decisions(self, profile_results: Dict[str, Dict]) -> str: + """Aggregate decisions from multiple profiles""" + decisions = [result['decision'] for result in profile_results.values()] + + if 'REJECTED_CRITICAL_FAILURES' in decisions: + return 'REJECTED_CRITICAL_FAILURES' + elif 'REJECTED' in decisions: + return 'REJECTED' + else: + return 'APPROVED' + + def _calculate_consensus_score(self, profile_results: Dict[str, Dict]) -> float: + """Calculate consensus score across profiles""" + scores = [result['overall_score'] for result in profile_results.values()] + return float(np.mean(scores)) if scores else 0.5 + + def _find_conflicting_decisions(self, profile_results: Dict[str, Dict]) -> List[str]: + """Find profiles with conflicting decisions""" + approved_profiles = [name for name, result in profile_results.items() + if result['decision'] == 'APPROVED'] + rejected_profiles = [name for name, result in profile_results.items() + if result['decision'] in ['REJECTED', 'REJECTED_CRITICAL_FAILURES']] + + if approved_profiles and rejected_profiles: + return [f"Approved: {approved_profiles}, Rejected: {rejected_profiles}"] + return [] + +# Integration with main system +class DecisionMatrixEvaluator: + """Evaluator with decision matrix capabilities""" + + def __init__(self): + self.decision_matrix = TrustDecisionMatrix() + # ... other initialization + + def evaluate_with_decision_matrix(self, model, data, + stakeholder_type: StakeholderType = None, + deployment_context: DeploymentContext = None, + custom_profiles: List[str] = None) -> Dict[str, Any]: + """Execute evaluation with decision matrix analysis""" + + # Standard evaluation + evaluation_results = self.evaluate_comprehensive_trust(model, data) + + # Determine profiles to evaluate against + profiles_to_evaluate = [] + + if custom_profiles: + profiles_to_evaluate.extend(custom_profiles) + elif stakeholder_type: + profile_mapping = { + StakeholderType.EXECUTIVE: ['executive_high_level'], + StakeholderType.TECHNICAL: ['technical_detailed'], + StakeholderType.REGULATORY: ['regulatory_compliance'], + StakeholderType.END_USER: ['executive_high_level'], + StakeholderType.BUSINESS: ['executive_high_level'] + } + profiles_to_evaluate.extend(profile_mapping.get(stakeholder_type, [])) + + if deployment_context == DeploymentContext.CRITICAL: + profiles_to_evaluate.append('production_critical') + elif deployment_context == DeploymentContext.PRODUCTION: + profiles_to_evaluate.append('technical_detailed') + + # If no specific profiles, use default comprehensive evaluation + if not profiles_to_evaluate: + profiles_to_evaluate = ['executive_high_level', 'technical_detailed'] + + # Multi-profile evaluation + decision_results = self.decision_matrix.multi_profile_evaluation( + evaluation_results, profiles_to_evaluate + ) + + # Combine results + final_results = evaluation_results.copy() + final_results['decision_matrix_analysis'] = decision_results + + return final_results + +# Usage example +def advanced_trust_decision_example(): + """Example of advanced trust decision making""" + + # Create evaluator + evaluator = DecisionMatrixEvaluator() + + # Define custom profile for healthcare application + healthcare_criteria = [ + TrustCriterion('safety', 0.95, 1.0, critical=True, rationale="Patient safety"), + TrustCriterion('privacy', 0.95, 0.9, critical=True, rationale="HIPAA compliance"), + TrustCriterion('reliability', 0.9, 0.8, rationale="Medical accuracy"), + TrustCriterion('fairness', 0.9, 0.7, rationale="Non-discrimination") + ] + + evaluator.decision_matrix.create_custom_profile('healthcare_medical_ai', healthcare_criteria) + + # Evaluate model + results = evaluator.evaluate_with_decision_matrix( + model=my_medical_ai_model, + data=medical_test_data, + custom_profiles=['healthcare_medical_ai', 'regulatory_compliance'] + ) + + print(f"Overall Trust Score: {results['overall_trust_score']:.3f}") + print(f"Decision: {results['decision_matrix_analysis']['final_decision']}") + + # Show detailed analysis + for profile_name, profile_result in results['decision_matrix_analysis']['individual_profile_results'].items(): + print(f"\n{profile_name.upper()} Profile:") + print(f" Score: {profile_result['overall_score']:.3f}") + print(f" Decision: {profile_result['decision']}") + if profile_result['recommendations']: + print(" Recommendations:") + for rec in profile_result['recommendations']: + print(f" - {rec}") From 585fba1f4fe1cb2b7b2b6964c6870750c7e5c03c Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:42:04 +0530 Subject: [PATCH 11/28] Create trust_simulation.py - Trust Simulation and Stress Testing System The Problem: Need to test trust under extreme conditions before deployment Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/trust_simulation.py | 442 ++++++++++++++++++ 1 file changed, 442 insertions(+) create mode 100644 high_performance_system/core/trust_simulation.py diff --git a/high_performance_system/core/trust_simulation.py b/high_performance_system/core/trust_simulation.py new file mode 100644 index 0000000..ab40de7 --- /dev/null +++ b/high_performance_system/core/trust_simulation.py @@ -0,0 +1,442 @@ +# src/simulation/trust_simulation.py +""" +Trust Simulation and Stress Testing - Test trust under extreme conditions +""" + +import numpy as np +from typing import Dict, Any, List, Callable +import random +from dataclasses import dataclass +from abc import ABC, abstractmethod + +@dataclass +class SimulationScenario: + """A specific simulation scenario with parameters""" + name: str + description: str + stress_factors: Dict[str, float] # Factor name -> intensity (0-1) + duration: int # Simulation steps + critical_threshold: float = 0.6 + +class StressTestScenario(ABC): + """Base class for stress test scenarios""" + + def __init__(self, name: str, description: str): + self.name = name + self.description = description + + @abstractmethod + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + """Apply stress to model and data""" + pass + + @abstractmethod + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + """Measure impact of stress on trust metrics""" + pass + +class AdversarialAttackScenario(StressTestScenario): + """Simulate adversarial attacks on the model""" + + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + """Apply adversarial perturbations to data""" + stressed_data = data.copy() + + # Add adversarial noise to prompts (simplified) + if 'prompts' in stressed_data: + prompts = stressed_data['prompts'] + stressed_prompts = [] + + for prompt in prompts: + if random.random() < intensity: # Apply stress with probability + # Simple adversarial perturbation + perturbed_prompt = self._perturb_prompt(prompt, intensity) + stressed_prompts.append(perturbed_prompt) + else: + stressed_prompts.append(prompt) + + stressed_data['prompts'] = stressed_prompts + + return stressed_data + + def _perturb_prompt(self, prompt: str, intensity: float) -> str: + """Apply adversarial perturbation to prompt""" + words = prompt.split() + perturbation_count = max(1, int(len(words) * intensity * 0.3)) + + for _ in range(perturbation_count): + if words: + # Randomly swap, delete, or add words + action = random.choice(['swap', 'delete', 'add']) + if action == 'swap' and len(words) > 1: + i, j = random.sample(range(len(words)), 2) + words[i], words[j] = words[j], words[i] + elif action == 'delete' and len(words) > 1: + words.pop(random.randint(0, len(words) - 1)) + elif action == 'add': + words.insert(random.randint(0, len(words)), '[ATTACK]') + + return ' '.join(words) + + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + """Measure impact of adversarial attack""" + original_score = original_results.get('overall_trust_score', 0.5) + stressed_score = stressed_results.get('overall_trust_score', 0.5) + + score_degradation = original_score - stressed_score + + # Analyze dimension-specific impacts + original_dims = original_results.get('dimension_scores', {}) + stressed_dims = stressed_results.get('dimension_scores', {}) + + dimension_impacts = {} + for dim in set(original_dims.keys()) | set(stressed_dims.keys()): + orig_val = original_dims.get(dim, 0.5) + stress_val = stressed_dims.get(dim, 0.5) + dimension_impacts[dim] = { + 'degradation': orig_val - stress_val, + 'percentage_drop': ((orig_val - stress_val) / orig_val * 100) if orig_val > 0 else 0 + } + + return { + 'score_degradation': score_degradation, + 'percentage_degradation': (score_degradation / original_score * 100) if original_score > 0 else 0, + 'dimension_impacts': dimension_impacts, + 'vulnerability_score': max(0, min(1, score_degradation * 2)), # Scaled vulnerability + 'recommendation': self._generate_recommendation(score_degradation, dimension_impacts) + } + + def _generate_recommendation(self, score_degradation: float, dimension_impacts: Dict) -> str: + """Generate recommendation based on impact analysis""" + if score_degradation > 0.3: + return "High vulnerability to adversarial attacks. Implement robust adversarial training." + elif score_degradation > 0.1: + return "Moderate vulnerability detected. Consider adversarial defense mechanisms." + else: + return "Low vulnerability to tested adversarial scenarios." + +class DataDriftScenario(StressTestScenario): + """Simulate data drift scenarios""" + + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + """Apply data drift simulation""" + stressed_data = data.copy() + + # Simulate concept drift by modifying data characteristics + if 'contexts' in stressed_data: + contexts = stressed_data['contexts'] + drifted_contexts = [] + + for context in contexts: + if random.random() < intensity: + # Modify context to simulate drift + drifted_context = self._drift_context(context, intensity) + drifted_contexts.append(drifted_context) + else: + drifted_contexts.append(context) + + stressed_data['contexts'] = drifted_contexts + + return stressed_data + + def _drift_context(self, context: str, intensity: float) -> str: + """Apply context drift""" + # Simplified context drift simulation + drift_indicators = ['[FUTURE]', '[PAST]', '[DIFFERENT_DOMAIN]', '[EVOLVED]'] + drift_count = int(intensity * 3) + + for _ in range(drift_count): + indicator = random.choice(drift_indicators) + context = f"{indicator} {context}" + + return context + + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + """Measure impact of data drift""" + original_score = original_results.get('overall_trust_score', 0.5) + stressed_score = stressed_results.get('overall_trust_score', 0.5) + + drift_impact = original_score - stressed_score + + return { + 'drift_impact': drift_impact, + 'adaptability_score': max(0, min(1, 1 - drift_impact)), # Higher is better + 'recommendation': self._generate_recommendation(drift_impact) + } + + def _generate_recommendation(self, drift_impact: float) -> str: + """Generate recommendation based on drift impact""" + if drift_impact > 0.2: + return "Significant performance degradation under data drift. Implement continuous learning and monitoring." + elif drift_impact > 0.1: + return "Moderate drift sensitivity. Consider drift detection mechanisms." + else: + return "Good robustness to data drift scenarios." + +class TrustSimulationEngine: + """Main simulation engine for trust stress testing""" + + def __init__(self): + self.scenarios = { + 'adversarial_attack': AdversarialAttackScenario( + 'adversarial_attack', + 'Test model robustness against adversarial inputs' + ), + 'data_drift': DataDriftScenario( + 'data_drift', + 'Test model performance under data distribution shifts' + ) + } + self.default_scenarios = [ + 'adversarial_attack', + 'data_drift' + ] + + def register_scenario(self, name: str, scenario: StressTestScenario): + """Register a custom stress test scenario""" + self.scenarios[name] = scenario + + def run_simulation(self, model, baseline_data: Dict[str, Any], + scenario_name: str, intensity: float = 0.5, + evaluator = None) -> Dict[str, Any]: + """Run a single simulation scenario""" + if scenario_name not in self.scenarios: + return {'error': f'Scenario {scenario_name} not found'} + + scenario = self.scenarios[scenario_name] + + # Get baseline evaluation + if evaluator is None: + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + evaluator = CompositeTrustEvaluator() + + baseline_results = evaluator.evaluate_comprehensive_trust(model, baseline_data) + + # Apply stress + stressed_data = scenario.apply_stress(model, baseline_data, intensity) + + # Evaluate stressed performance + stressed_results = evaluator.evaluate_comprehensive_trust(model, stressed_data) + + # Measure impact + impact_analysis = scenario.measure_impact(baseline_results, stressed_results) + + return { + 'scenario': scenario_name, + 'intensity': intensity, + 'baseline_results': baseline_results, + 'stressed_results': stressed_results, + 'impact_analysis': impact_analysis, + 'stress_applied': stressed_data != baseline_data + } + + def run_comprehensive_simulation(self, model, baseline_data: Dict[str, Any], + scenarios: List[str] = None, + intensities: List[float] = None, + evaluator = None) -> Dict[str, Any]: + """Run comprehensive simulation across multiple scenarios""" + if scenarios is None: + scenarios = self.default_scenarios + + if intensities is None: + intensities = [0.3, 0.5, 0.7, 0.9] + + simulation_results = {} + scenario_summaries = {} + + for scenario_name in scenarios: + scenario_results = [] + for intensity in intensities: + result = self.run_simulation(model, baseline_data, scenario_name, + intensity, evaluator) + scenario_results.append(result) + + simulation_results[scenario_name] = scenario_results + + # Summarize scenario results + scenario_summaries[scenario_name] = self._summarize_scenario_results(scenario_results) + + # Overall simulation summary + overall_summary = self._generate_overall_summary(scenario_summaries) + + return { + 'detailed_results': simulation_results, + 'scenario_summaries': scenario_summaries, + 'overall_summary': overall_summary, + 'robustness_score': overall_summary.get('overall_robustness', 0.5), + 'recommendations': self._generate_simulation_recommendations(scenario_summaries) + } + + def _summarize_scenario_results(self, scenario_results: List[Dict]) -> Dict[str, Any]: + """Summarize results for a single scenario across intensities""" + if not scenario_results: + return {} + + # Extract key metrics across intensities + intensities = [r['intensity'] for r in scenario_results] + impacts = [r['impact_analysis'].get('score_degradation', 0) for r in scenario_results] + + # Find maximum impact + max_impact = max(impacts) if impacts else 0 + + # Calculate robustness (inverse of impact) + robustness_scores = [1 - impact for impact in impacts] + avg_robustness = sum(robustness_scores) / len(robustness_scores) if robustness_scores else 0.5 + + return { + 'max_impact': max_impact, + 'average_robustness': avg_robustness, + 'intensity_impact_curve': list(zip(intensities, impacts)), + 'worst_case_intensity': intensities[impacts.index(max_impact)] if impacts else 0.5 + } + + def _generate_overall_summary(self, scenario_summaries: Dict[str, Dict]) -> Dict[str, Any]: + """Generate overall simulation summary""" + if not scenario_summaries: + return {} + + avg_robustness = np.mean([summary.get('average_robustness', 0.5) + for summary in scenario_summaries.values()]) + + max_impacts = [summary.get('max_impact', 0) + for summary in scenario_summaries.values()] + worst_case_impact = max(max_impacts) if max_impacts else 0 + + return { + 'overall_robustness': float(avg_robustness), + 'worst_case_impact': worst_case_impact, + 'scenarios_tested': list(scenario_summaries.keys()), + 'deployment_readiness': 'HIGH' if avg_robustness > 0.8 else + 'MEDIUM' if avg_robustness > 0.6 else 'LOW' + } + + def _generate_simulation_recommendations(self, scenario_summaries: Dict[str, Dict]) -> List[str]: + """Generate recommendations based on simulation results""" + recommendations = [] + + for scenario_name, summary in scenario_summaries.items(): + avg_robustness = summary.get('average_robustness', 0.5) + max_impact = summary.get('max_impact', 0) + + if avg_robustness < 0.6: + recommendations.append(f"Low robustness in {scenario_name} scenarios. Requires improvement.") + elif max_impact > 0.3: + recommendations.append(f"Significant vulnerability detected in {scenario_name}. Monitor closely.") + + return recommendations + +# Integration with main evaluator +class SimulationEnhancedEvaluator: + """Evaluator with simulation and stress testing capabilities""" + + def __init__(self): + self.simulation_engine = TrustSimulationEngine() + # ... other initialization + + def evaluate_with_simulation(self, model, data, + simulation_config: Dict[str, Any] = None) -> Dict[str, Any]: + """Execute evaluation with comprehensive simulation testing""" + + # Standard evaluation + standard_results = self.evaluate_comprehensive_trust(model, data) + + # Run simulations if configured + if simulation_config is not None: + simulation_results = self.simulation_engine.run_comprehensive_simulation( + model, data, + scenarios=simulation_config.get('scenarios'), + intensities=simulation_config.get('intensities'), + evaluator=self # Pass self as evaluator + ) + + # Combine results + final_results = standard_results.copy() + final_results['simulation_analysis'] = simulation_results + + # Add simulation-based trust score + final_results['simulation_adjusted_trust'] = self._calculate_simulation_adjusted_trust( + standard_results, simulation_results + ) + + return final_results + else: + return standard_results + + def _calculate_simulation_adjusted_trust(self, standard_results: Dict[str, Any], + simulation_results: Dict[str, Any]) -> float: + """Calculate trust score adjusted for simulation results""" + base_trust = standard_results.get('overall_trust_score', 0.5) + simulation_robustness = simulation_results.get('robustness_score', 0.5) + + # Adjust trust score based on robustness + adjusted_trust = base_trust * simulation_robustness + + return max(0, min(1, adjusted_trust)) + +# Usage example +def simulation_testing_example(): + """Example of trust simulation and stress testing""" + + # Create simulation-enhanced evaluator + evaluator = SimulationEnhancedEvaluator() + + # Define simulation configuration + simulation_config = { + 'scenarios': ['adversarial_attack', 'data_drift'], + 'intensities': [0.3, 0.5, 0.7, 0.9] + } + + # Run evaluation with simulation + results = evaluator.evaluate_with_simulation( + model=my_llm_model, + data=test_data, + simulation_config=simulation_config + ) + + print("=== Trust Simulation Results ===") + print(f"Base Trust Score: {results['overall_trust_score']:.3f}") + print(f"Simulation-Adjusted Trust: {results['simulation_adjusted_trust']:.3f}") + print(f"Overall Robustness: {results['simulation_analysis']['overall_summary']['overall_robustness']:.3f}") + print(f"Deployment Readiness: {results['simulation_analysis']['overall_summary']['deployment_readiness']}") + + # Show scenario summaries + print("\nScenario Summaries:") + for scenario_name, summary in results['simulation_analysis']['scenario_summaries'].items(): + print(f" {scenario_name}:") + print(f" Average Robustness: {summary['average_robustness']:.3f}") + print(f" Max Impact: {summary['max_impact']:.3f}") + + # Show recommendations + if results['simulation_analysis']['recommendations']: + print("\nRecommendations:") + for rec in results['simulation_analysis']['recommendations']: + print(f" - {rec}") + +# Advanced usage with custom scenarios +def custom_scenario_example(): + """Example with custom stress test scenarios""" + + class CustomStressScenario(StressTestScenario): + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + # Custom stress logic + stressed_data = data.copy() + # ... implementation + return stressed_data + + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + # Custom impact measurement + return {'custom_impact': 0.5} + + # Register custom scenario + simulation_engine = TrustSimulationEngine() + simulation_engine.register_scenario('custom_stress', CustomStressScenario( + 'custom_stress', 'Custom stress test scenario' + )) + + +# Use in evaluation +# ... implementation From 91ee58a9e138c2e6f351f4833f3d1f3982a8efbc Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:43:55 +0530 Subject: [PATCH 12/28] Create trust_orchestrator.py - Trust Orchestration and Deployment Pipeline The Problem: Need seamless integration from development to production with trust monitoring Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/trust_orchestrator.py | 444 ++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 high_performance_system/core/trust_orchestrator.py diff --git a/high_performance_system/core/trust_orchestrator.py b/high_performance_system/core/trust_orchestrator.py new file mode 100644 index 0000000..cc0eee4 --- /dev/null +++ b/high_performance_system/core/trust_orchestrator.py @@ -0,0 +1,444 @@ +# src/orchestration/trust_orchestrator.py +""" +Trust Orchestration System - End-to-end trust management from development to production +""" + +import asyncio +import json +from datetime import datetime, timedelta +from typing import Dict, Any, List, Optional, Callable +from dataclasses import dataclass, asdict +import yaml +import logging + +logger = logging.getLogger(__name__) + +@dataclass +class TrustPipelineStage: + """A stage in the trust evaluation pipeline""" + name: str + description: str + required_trust_score: float + evaluation_config: Dict[str, Any] + timeout_seconds: int = 300 + parallel_execution: bool = False + +@dataclass +class DeploymentEnvironment: + """Configuration for a deployment environment""" + name: str + trust_requirements: Dict[str, Any] + monitoring_config: Dict[str, Any] + rollback_conditions: Dict[str, Any] + +class TrustOrchestrator: + """Orchestrates trust evaluation across the entire AI lifecycle""" + + def __init__(self): + self.pipeline_stages = [] + self.environments = {} + self.deployment_history = [] + self.monitoring_systems = {} + self.alerting_systems = {} + + def define_pipeline_stage(self, stage: TrustPipelineStage): + """Define a stage in the trust evaluation pipeline""" + self.pipeline_stages.append(stage) + logger.info(f"Added pipeline stage: {stage.name}") + + def define_environment(self, env_name: str, environment: DeploymentEnvironment): + """Define a deployment environment""" + self.environments[env_name] = environment + logger.info(f"Defined environment: {env_name}") + + async def execute_trust_pipeline(self, model, data: Dict[str, Any], + pipeline_stages: List[str] = None, + evaluator = None) -> Dict[str, Any]: + """Execute the trust evaluation pipeline""" + if evaluator is None: + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + evaluator = CompositeTrustEvaluator() + + if pipeline_stages is None: + pipeline_stages = [stage.name for stage in self.pipeline_stages] + + results = {} + pipeline_success = True + failed_stages = [] + + for stage in self.pipeline_stages: + if stage.name not in pipeline_stages: + continue + + logger.info(f"Executing pipeline stage: {stage.name}") + + try: + # Execute stage with timeout + stage_result = await asyncio.wait_for( + self._execute_stage(stage, model, data, evaluator), + timeout=stage.timeout_seconds + ) + + results[stage.name] = stage_result + + # Check if stage meets requirements + if stage_result.get('overall_trust_score', 0) < stage.required_trust_score: + logger.warning(f"Stage {stage.name} failed trust requirement") + pipeline_success = False + failed_stages.append(stage.name) + break # Stop pipeline on failure + + except asyncio.TimeoutError: + logger.error(f"Stage {stage.name} timed out") + pipeline_success = False + failed_stages.append(stage.name) + break + except Exception as e: + logger.error(f"Stage {stage.name} failed: {e}") + pipeline_success = False + failed_stages.append(stage.name) + break + + return { + 'pipeline_success': pipeline_success, + 'stage_results': results, + 'failed_stages': failed_stages, + 'overall_trust_score': self._calculate_pipeline_trust_score(results), + 'recommendations': self._generate_pipeline_recommendations(results, failed_stages) + } + + async def _execute_stage(self, stage: TrustPipelineStage, model, + data: Dict[str, Any], evaluator) -> Dict[str, Any]: + """Execute a single pipeline stage""" + if stage.parallel_execution: + # Execute in parallel if configured + return await self._execute_parallel_stage(stage, model, data, evaluator) + else: + # Execute sequentially + return evaluator.evaluate_comprehensive_trust(model, data, **stage.evaluation_config) + + async def _execute_parallel_stage(self, stage: TrustPipelineStage, model, + data: Dict[str, Any], evaluator) -> Dict[str, Any]: + """Execute stage with parallel processing""" + # This would implement parallel evaluation of different dimensions + # For simplicity, we'll just return standard evaluation + return evaluator.evaluate_comprehensive_trust(model, data, **stage.evaluation_config) + + def _calculate_pipeline_trust_score(self, stage_results: Dict[str, Any]) -> float: + """Calculate overall trust score from pipeline results""" + if not stage_results: + return 0.5 + + scores = [result.get('overall_trust_score', 0.5) + for result in stage_results.values()] + return float(sum(scores) / len(scores)) if scores else 0.5 + + def _generate_pipeline_recommendations(self, stage_results: Dict[str, Any], + failed_stages: List[str]) -> List[str]: + """Generate recommendations based on pipeline results""" + recommendations = [] + + if failed_stages: + recommendations.append(f"Pipeline failed at stages: {', '.join(failed_stages)}") + for stage_name in failed_stages: + stage_result = stage_results.get(stage_name, {}) + score = stage_result.get('overall_trust_score', 0) + required = next((s.required_trust_score for s in self.pipeline_stages + if s.name == stage_name), 0) + recommendations.append(f" {stage_name}: Score {score:.3f} < Required {required}") + + # General recommendations from stage results + for stage_name, result in stage_results.items(): + if 'recommendations' in result: + recommendations.extend([f"{stage_name}: {rec}" for rec in result['recommendations']]) + + return recommendations + + async def deploy_with_trust_monitoring(self, model, data: Dict[str, Any], + environment_name: str, + evaluator = None) -> Dict[str, Any]: + """Deploy model with continuous trust monitoring""" + if environment_name not in self.environments: + return {'error': f'Environment {environment_name} not defined'} + + environment = self.environments[environment_name] + + # Execute trust pipeline first + pipeline_results = await self.execute_trust_pipeline(model, data, evaluator=evaluator) + + if not pipeline_results['pipeline_success']: + return { + 'deployment_status': 'FAILED', + 'reason': 'Trust pipeline failed', + 'pipeline_results': pipeline_results + } + + # Check environment-specific requirements + env_requirements_met = self._check_environment_requirements( + pipeline_results, environment + ) + + if not env_requirements_met: + return { + 'deployment_status': 'FAILED', + 'reason': 'Environment trust requirements not met', + 'pipeline_results': pipeline_results + } + + # Deploy model (simulated) + deployment_id = self._generate_deployment_id() + deployment_info = { + 'deployment_id': deployment_id, + 'environment': environment_name, + 'timestamp': datetime.now().isoformat(), + 'model_trust_score': pipeline_results['overall_trust_score'], + 'status': 'DEPLOYED' + } + + # Start monitoring + monitoring_task = asyncio.create_task( + self._start_continuous_monitoring(deployment_id, model, data, environment) + ) + + # Record deployment + self.deployment_history.append(deployment_info) + + return { + 'deployment_status': 'SUCCESS', + 'deployment_info': deployment_info, + 'pipeline_results': pipeline_results, + 'monitoring_started': True + } + + def _check_environment_requirements(self, pipeline_results: Dict[str, Any], + environment: DeploymentEnvironment) -> bool: + """Check if pipeline results meet environment requirements""" + overall_score = pipeline_results.get('overall_trust_score', 0) + env_min_score = environment.trust_requirements.get('minimum_trust_score', 0.7) + + return overall_score >= env_min_score + + def _generate_deployment_id(self) -> str: + """Generate unique deployment ID""" + import uuid + return str(uuid.uuid4())[:8] + + async def _start_continuous_monitoring(self, deployment_id: str, model, + data: Dict[str, Any], + environment: DeploymentEnvironment): + """Start continuous monitoring for deployed model""" + monitoring_config = environment.monitoring_config + interval_seconds = monitoring_config.get('check_interval_seconds', 300) + + logger.info(f"Starting monitoring for deployment {deployment_id}") + + while True: + try: + # Execute monitoring evaluation + monitoring_results = await self._execute_monitoring_check( + model, data, monitoring_config + ) + + # Check for alerts + alerts = self._check_monitoring_alerts( + monitoring_results, environment + ) + + if alerts: + await self._trigger_alerts(deployment_id, alerts) + + # Check rollback conditions + if self._check_rollback_conditions(monitoring_results, environment): + await self._initiate_rollback(deployment_id) + break + + await asyncio.sleep(interval_seconds) + + except Exception as e: + logger.error(f"Monitoring error for deployment {deployment_id}: {e}") + await asyncio.sleep(interval_seconds) + + async def _execute_monitoring_check(self, model, data: Dict[str, Any], + config: Dict[str, Any]): + """Execute monitoring check""" + # This would implement actual monitoring logic + # For now, simulate with basic evaluation + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + evaluator = CompositeTrustEvaluator() + + return evaluator.evaluate_comprehensive_trust(model, data) + + def _check_monitoring_alerts(self, monitoring_results: Dict[str, Any], + environment: DeploymentEnvironment) -> List[Dict[str, Any]]: + """Check for monitoring alerts""" + alerts = [] + + current_score = monitoring_results.get('overall_trust_score', 0.5) + alert_threshold = environment.monitoring_config.get('alert_threshold', 0.6) + + if current_score < alert_threshold: + alerts.append({ + 'type': 'trust_score_drop', + 'severity': 'HIGH' if current_score < alert_threshold * 0.8 else 'MEDIUM', + 'current_score': current_score, + 'threshold': alert_threshold, + 'timestamp': datetime.now().isoformat() + }) + + return alerts + + async def _trigger_alerts(self, deployment_id: str, alerts: List[Dict[str, Any]]): + """Trigger alerts for monitoring issues""" + logger.warning(f"Alerts triggered for deployment {deployment_id}: {alerts}") + # In real implementation, this would send notifications via email, Slack, etc. + + def _check_rollback_conditions(self, monitoring_results: Dict[str, Any], + environment: DeploymentEnvironment) -> bool: + """Check if rollback conditions are met""" + rollback_conditions = environment.rollback_conditions + + current_score = monitoring_results.get('overall_trust_score', 0.5) + rollback_threshold = rollback_conditions.get('critical_threshold', 0.4) + + return current_score < rollback_threshold + + async def _initiate_rollback(self, deployment_id: str): + """Initiate rollback for problematic deployment""" + logger.critical(f"Initiating rollback for deployment {deployment_id}") + # In real implementation, this would trigger actual rollback procedures + +# Configuration system +class TrustOrchestrationConfig: + """Configuration for trust orchestration""" + + def __init__(self, config_file: str = None): + self.config = self._load_config(config_file) + self.orchestrator = TrustOrchestrator() + self._setup_from_config() + + def _load_config(self, config_file: str = None) -> Dict[str, Any]: + """Load configuration from file""" + if config_file and config_file.endswith('.yaml'): + with open(config_file, 'r') as f: + return yaml.safe_load(f) + elif config_file and config_file.endswith('.json'): + with open(config_file, 'r') as f: + return json.load(f) + else: + return self._get_default_config() + + def _get_default_config(self) -> Dict[str, Any]: + """Get default configuration""" + return { + 'pipeline_stages': [ + { + 'name': 'initial_validation', + 'description': 'Initial trust validation', + 'required_trust_score': 0.6, + 'evaluation_config': {'categories': ['reliability', 'safety']}, + 'timeout_seconds': 300 + }, + { + 'name': 'comprehensive_evaluation', + 'description': 'Full trust evaluation', + 'required_trust_score': 0.7, + 'evaluation_config': {}, + 'timeout_seconds': 600 + }, + { + 'name': 'simulation_testing', + 'description': 'Stress testing and simulation', + 'required_trust_score': 0.75, + 'evaluation_config': {'simulation_enabled': True}, + 'timeout_seconds': 900 + } + ], + 'environments': { + 'development': { + 'trust_requirements': {'minimum_trust_score': 0.5}, + 'monitoring_config': { + 'check_interval_seconds': 3600, + 'alert_threshold': 0.4 + }, + 'rollback_conditions': {'critical_threshold': 0.2} + }, + 'production': { + 'trust_requirements': {'minimum_trust_score': 0.8}, + 'monitoring_config': { + 'check_interval_seconds': 300, + 'alert_threshold': 0.7 + }, + 'rollback_conditions': {'critical_threshold': 0.5} + } + } + } + + def _setup_from_config(self): + """Set up orchestrator from configuration""" + # Setup pipeline stages + for stage_config in self.config.get('pipeline_stages', []): + stage = TrustPipelineStage(**stage_config) + self.orchestrator.define_pipeline_stage(stage) + + # Setup environments + for env_name, env_config in self.config.get('environments', {}).items(): + environment = DeploymentEnvironment( + name=env_name, + trust_requirements=env_config.get('trust_requirements', {}), + monitoring_config=env_config.get('monitoring_config', {}), + rollback_conditions=env_config.get('rollback_conditions', {}) + ) + self.orchestrator.define_environment(env_name, environment) + +# Usage example +async def orchestration_example(): + """Example of trust orchestration in action""" + + # Load configuration + config = TrustOrchestrationConfig('trust_orchestration_config.yaml') + + # Prepare model and data + model = my_llm_model + data = test_data + + # Execute trust pipeline + print("Executing trust evaluation pipeline...") + pipeline_results = await config.orchestrator.execute_trust_pipeline(model, data) + + print(f"Pipeline Success: {pipeline_results['pipeline_success']}") + print(f"Overall Trust Score: {pipeline_results['overall_trust_score']:.3f}") + + if pipeline_results['failed_stages']: + print(f"Failed Stages: {pipeline_results['failed_stages']}") + + # Deploy to production environment + print("\nDeploying to production environment...") + deployment_results = await config.orchestrator.deploy_with_trust_monitoring( + model, data, 'production' + ) + + print(f"Deployment Status: {deployment_results['deployment_status']}") + if 'deployment_info' in deployment_results: + print(f"Deployment ID: {deployment_results['deployment_info']['deployment_id']}") + + return deployment_results + +# CLI interface +def main(): + """Main CLI interface""" + import argparse + import asyncio + + parser = argparse.ArgumentParser(description='Trust Orchestration System') + parser.add_argument('--config', help='Configuration file path') + parser.add_argument('--deploy-env', help='Environment to deploy to') + parser.add_argument('--model-path', help='Path to model') + parser.add_argument('--data-path', help='Path to evaluation data') + + args = parser.parse_args() + + # Run orchestration + asyncio.run(orchestration_example()) + +if __name__ == "__main__": + main() From 3f3d2a5143026b72e1486e09a2e7f07ca78628cb Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:46:38 +0530 Subject: [PATCH 13/28] Create trust_api.py - Trust API and Microservices Architecture The Problem: Need to scale trust evaluation across distributed systems Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- cloudscale_apis/endpoints/trust_api.py | 338 +++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 cloudscale_apis/endpoints/trust_api.py diff --git a/cloudscale_apis/endpoints/trust_api.py b/cloudscale_apis/endpoints/trust_api.py new file mode 100644 index 0000000..b1c18a8 --- /dev/null +++ b/cloudscale_apis/endpoints/trust_api.py @@ -0,0 +1,338 @@ +# src/api/trust_api.py +""" +Trust API and Microservices - Scalable trust evaluation services +""" + +from fastapi import FastAPI, HTTPException, BackgroundTasks +from pydantic import BaseModel, Field +from typing import Dict, Any, List, Optional +import asyncio +import uuid +from datetime import datetime +import redis +import json +import logging + +logger = logging.getLogger(__name__) + +# FastAPI app +app = FastAPI( + title="OpenTrustEval API", + description="Scalable trust evaluation API with microservices architecture", + version="1.0.0" +) + +# Redis for caching and job queue +redis_client = redis.Redis(host='localhost', port=6379, db=0) + +# Pydantic models +class TrustEvaluationRequest(BaseModel): + model_id: str = Field(..., description="Unique identifier for the model") + model_type: str = Field(default="llm", description="Type of model (llm, ml, cv, etc.)") + data: Dict[str, Any] = Field(..., description="Evaluation data") + evaluation_config: Optional[Dict[str, Any]] = Field(default={}, description="Evaluation configuration") + callback_url: Optional[str] = Field(default=None, description="URL for callback notifications") + +class TrustEvaluationResponse(BaseModel): + evaluation_id: str + status: str + results: Optional[Dict[str, Any]] = None + error: Optional[str] = None + created_at: datetime + completed_at: Optional[datetime] = None + +class TrustEvaluationStatus(BaseModel): + evaluation_id: str + status: str + progress: Optional[float] = None + estimated_completion: Optional[datetime] = None + +# In-memory storage for demonstration (use database in production) +evaluation_storage = {} + +# Trust evaluation service +class TrustEvaluationService: + """Service for trust evaluation operations""" + + @staticmethod + async def execute_evaluation(request: TrustEvaluationRequest) -> Dict[str, Any]: + """Execute trust evaluation""" + try: + # Import evaluator (lazy import for performance) + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + + # Create evaluator + evaluator = CompositeTrustEvaluator() + + # Execute evaluation + results = evaluator.evaluate_comprehensive_trust( + model=None, # In real implementation, load model by model_id + data=request.data, + model_type=request.model_type, + **request.evaluation_config + ) + + return { + 'status': 'completed', + 'results': results, + 'completed_at': datetime.now() + } + + except Exception as e: + logger.error(f"Evaluation failed: {e}") + return { + 'status': 'failed', + 'error': str(e), + 'completed_at': datetime.now() + } + + @staticmethod + async def queue_evaluation(request: TrustEvaluationRequest) -> str: + """Queue evaluation for background processing""" + evaluation_id = str(uuid.uuid4()) + + # Store initial request + evaluation_storage[evaluation_id] = { + 'request': request.dict(), + 'status': 'queued', + 'created_at': datetime.now() + } + + # Add to Redis queue + job_data = { + 'evaluation_id': evaluation_id, + 'request': request.dict() + } + redis_client.lpush('trust_evaluation_queue', json.dumps(job_data)) + + return evaluation_id + +# API endpoints +@app.post("/evaluate", response_model=TrustEvaluationResponse) +async def evaluate_trust(request: TrustEvaluationRequest, background_tasks: BackgroundTasks): + """Submit trust evaluation request""" + try: + # Queue for background processing + evaluation_id = await TrustEvaluationService.queue_evaluation(request) + + return TrustEvaluationResponse( + evaluation_id=evaluation_id, + status="queued", + created_at=evaluation_storage[evaluation_id]['created_at'] + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/evaluation/{evaluation_id}", response_model=TrustEvaluationResponse) +async def get_evaluation_status(evaluation_id: str): + """Get evaluation status and results""" + if evaluation_id not in evaluation_storage: + raise HTTPException(status_code=404, detail="Evaluation not found") + + stored_data = evaluation_storage[evaluation_id] + + return TrustEvaluationResponse( + evaluation_id=evaluation_id, + status=stored_data['status'], + results=stored_data.get('results'), + error=stored_data.get('error'), + created_at=stored_data['created_at'], + completed_at=stored_data.get('completed_at') + ) + +@app.get("/evaluation/{evaluation_id}/status", response_model=TrustEvaluationStatus) +async def get_evaluation_progress(evaluation_id: str): + """Get evaluation progress""" + if evaluation_id not in evaluation_storage: + raise HTTPException(status_code=404, detail="Evaluation not found") + + stored_data = evaluation_storage[evaluation_id] + + return TrustEvaluationStatus( + evaluation_id=evaluation_id, + status=stored_data['status'], + progress=stored_data.get('progress', 0.0) if stored_data['status'] == 'processing' else + (1.0 if stored_data['status'] == 'completed' else 0.0) + ) + +# Background worker +async def trust_evaluation_worker(): + """Background worker for processing trust evaluations""" + logger.info("Starting trust evaluation worker") + + while True: + try: + # Get job from Redis queue + job_data_json = redis_client.brpop('trust_evaluation_queue', timeout=1) + + if job_data_json: + _, job_data_bytes = job_data_json + job_data = json.loads(job_data_bytes.decode('utf-8')) + + evaluation_id = job_data['evaluation_id'] + request_data = job_data['request'] + + logger.info(f"Processing evaluation {evaluation_id}") + + # Update status + evaluation_storage[evaluation_id]['status'] = 'processing' + evaluation_storage[evaluation_id]['progress'] = 0.1 + + # Execute evaluation + request = TrustEvaluationRequest(**request_data) + results = await TrustEvaluationService.execute_evaluation(request) + + # Update storage + evaluation_storage[evaluation_id].update(results) + evaluation_storage[evaluation_id]['status'] = results['status'] + + # Trigger callback if provided + if request.callback_url: + await trigger_callback(request.callback_url, evaluation_id, results) + + logger.info(f"Completed evaluation {evaluation_id}") + + await asyncio.sleep(0.1) # Prevent busy waiting + + except Exception as e: + logger.error(f"Worker error: {e}") + await asyncio.sleep(1) # Slow down on errors + +async def trigger_callback(callback_url: str, evaluation_id: str, results: Dict[str, Any]): + """Trigger callback notification""" + try: + import httpx + async with httpx.AsyncClient() as client: + callback_data = { + 'evaluation_id': evaluation_id, + 'results': results + } + await client.post(callback_url, json=callback_data) + except Exception as e: + logger.error(f"Callback failed: {e}") + +# Batch evaluation endpoints +class BatchEvaluationRequest(BaseModel): + evaluations: List[TrustEvaluationRequest] + batch_config: Optional[Dict[str, Any]] = Field(default={}, description="Batch processing configuration") + +class BatchEvaluationResponse(BaseModel): + batch_id: str + status: str + completed_evaluations: int = 0 + total_evaluations: int + results: Optional[List[Dict[str, Any]]] = None + +@app.post("/batch-evaluate", response_model=BatchEvaluationResponse) +async def batch_evaluate(request: BatchEvaluationRequest, background_tasks: BackgroundTasks): + """Submit batch trust evaluation request""" + batch_id = str(uuid.uuid4()) + + # Queue individual evaluations + evaluation_ids = [] + for eval_request in request.evaluations: + eval_id = await TrustEvaluationService.queue_evaluation(eval_request) + evaluation_ids.append(eval_id) + + # Store batch information + batch_storage[batch_id] = { + 'evaluation_ids': evaluation_ids, + 'status': 'processing', + 'total_evaluations': len(evaluation_ids), + 'completed_evaluations': 0, + 'created_at': datetime.now() + } + + # Start batch monitoring in background + background_tasks.add_task(monitor_batch_progress, batch_id, evaluation_ids) + + return BatchEvaluationResponse( + batch_id=batch_id, + status="processing", + total_evaluations=len(evaluation_ids) + ) + +# Batch storage +batch_storage = {} + +async def monitor_batch_progress(batch_id: str, evaluation_ids: List[str]): + """Monitor batch evaluation progress""" + while batch_storage[batch_id]['completed_evaluations'] < batch_storage[batch_id]['total_evaluations']: + completed_count = 0 + for eval_id in evaluation_ids: + if eval_id in evaluation_storage: + if evaluation_storage[eval_id]['status'] in ['completed', 'failed']: + completed_count += 1 + + batch_storage[batch_id]['completed_evaluations'] = completed_count + + if completed_count == len(evaluation_ids): + batch_storage[batch_id]['status'] = 'completed' + # Collect results + results = [] + for eval_id in evaluation_ids: + if eval_id in evaluation_storage: + results.append(evaluation_storage[eval_id]) + batch_storage[batch_id]['results'] = results + break + + await asyncio.sleep(5) # Check every 5 seconds + +# Health check endpoint +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return { + "status": "healthy", + "timestamp": datetime.now().isoformat(), + "services": { + "api": "running", + "worker": "running" if is_worker_alive() else "stopped", + "redis": "connected" if is_redis_connected() else "disconnected" + } + } + +def is_worker_alive() -> bool: + """Check if worker is alive""" + # Implementation would check worker status + return True + +def is_redis_connected() -> bool: + """Check Redis connection""" + try: + redis_client.ping() + return True + except: + return False + +# Startup and shutdown events +@app.on_event("startup") +async def startup_event(): + """Startup event handler""" + logger.info("Starting Trust API service") + + # Start background worker + asyncio.create_task(trust_evaluation_worker()) + + logger.info("Trust API service started") + +@app.on_event("shutdown") +async def shutdown_event(): + """Shutdown event handler""" + logger.info("Shutting down Trust API service") + +# CLI for API management +def start_api_server(): + """Start API server""" + import uvicorn + uvicorn.run( + "src.api.trust_api:app", + host="0.0.0.0", + port=8000, + reload=True, + log_level="info" + ) + +if __name__ == "__main__": + start_api_server() From f408cdfc9d72ccee7fd303ed955d6bc2c2919c5f Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:48:45 +0530 Subject: [PATCH 14/28] Create trust_dashboard.py - Trust Visualization and Dashboard System The Problem: Need intuitive visualization of complex trust metrics Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .../core/trust_dashboard.py | 429 ++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 high_performance_system/core/trust_dashboard.py diff --git a/high_performance_system/core/trust_dashboard.py b/high_performance_system/core/trust_dashboard.py new file mode 100644 index 0000000..33ed634 --- /dev/null +++ b/high_performance_system/core/trust_dashboard.py @@ -0,0 +1,429 @@ +# src/visualization/trust_dashboard.py +""" +Trust Visualization and Dashboard System - Interactive trust monitoring and analysis +""" + +import dash +from dash import dcc, html, Input, Output, callback +import plotly.graph_objects as go +import plotly.express as px +from plotly.subplots import make_subplots +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +import json +import logging + +logger = logging.getLogger(__name__) + +# Initialize Dash app +app = dash.Dash(__name__, title="Trust Evaluation Dashboard") + +class TrustVisualizationEngine: + """Engine for creating trust visualizations""" + + @staticmethod + def create_trust_radar_chart(dimension_scores: Dict[str, float], + title: str = "Trust Dimensions") -> go.Figure: + """Create radar chart for trust dimensions""" + categories = list(dimension_scores.keys()) + values = list(dimension_scores.values()) + + # Close the radar chart + categories.append(categories[0]) + values.append(values[0]) + + fig = go.Figure() + fig.add_trace(go.Scatterpolar( + r=values, + theta=categories, + fill='toself', + name=title + )) + + fig.update_layout( + polar=dict( + radialaxis=dict( + visible=True, + range=[0, 1] + ) + ), + showlegend=False, + title=title + ) + + return fig + + @staticmethod + def create_trust_timeline(history_data: pd.DataFrame) -> go.Figure: + """Create timeline of trust scores""" + fig = go.Figure() + + # Overall trust score timeline + fig.add_trace(go.Scatter( + x=history_data['timestamp'], + y=history_data['overall_trust'], + mode='lines+markers', + name='Overall Trust', + line=dict(color='blue') + )) + + # Add dimension scores if available + if 'dimensions' in history_data.columns: + sample_dims = history_data.iloc[0]['dimensions'] + for dim_name in sample_dims.keys(): + dim_values = [dims.get(dim_name, 0.5) for dims in history_data['dimensions']] + fig.add_trace(go.Scatter( + x=history_data['timestamp'], + y=dim_values, + mode='lines', + name=f'{dim_name}', + line=dict(dash='dot') + )) + + fig.update_layout( + title='Trust Evolution Over Time', + xaxis_title='Time', + yaxis_title='Trust Score', + yaxis=dict(range=[0, 1]) + ) + + return fig + + @staticmethod + def create_trust_heatmap(correlation_data: Dict[str, Dict[str, float]]) -> go.Figure: + """Create heatmap of trust correlations""" + # Convert to matrix format + dimensions = list(correlation_data.keys()) + correlation_matrix = [] + + for dim1 in dimensions: + row = [] + for dim2 in dimensions: + row.append(correlation_data[dim1].get(dim2, 0)) + correlation_matrix.append(row) + + fig = go.Figure(data=go.Heatmap( + z=correlation_matrix, + x=dimensions, + y=dimensions, + colorscale='RdBu', + zmid=0 + )) + + fig.update_layout( + title='Trust Dimension Correlations', + xaxis_title='Dimensions', + yaxis_title='Dimensions' + ) + + return fig + + @staticmethod + def create_risk_matrix(risk_data: Dict[str, Any]) -> go.Figure: + """Create risk matrix visualization""" + risks = risk_data.get('all_risks', []) + + if not risks: + # Create sample data for demonstration + risks = [ + {'dimension': 'safety', 'probability': 0.8, 'impact': 0.9, 'category': 'critical'}, + {'dimension': 'reliability', 'probability': 0.6, 'impact': 0.7, 'category': 'high'}, + {'dimension': 'fairness', 'probability': 0.4, 'impact': 0.5, 'category': 'medium'}, + {'dimension': 'privacy', 'probability': 0.3, 'impact': 0.8, 'category': 'high'} + ] + + # Create DataFrame + df = pd.DataFrame(risks) + + # Categorize risk levels + def categorize_risk(row): + if row['probability'] * row['impact'] > 0.7: + return 'Critical' + elif row['probability'] * row['impact'] > 0.4: + return 'High' + elif row['probability'] * row['impact'] > 0.2: + return 'Medium' + else: + return 'Low' + + df['risk_level'] = df.apply(categorize_risk, axis=1) + + # Create scatter plot + fig = px.scatter(df, x='probability', y='impact', + color='risk_level', text='dimension', + size_max=60, + title='Risk Matrix') + + # Add quadrant lines + fig.add_shape(type='line', x0=0.5, y0=0, x1=0.5, y1=1, + line=dict(color='gray', width=1, dash='dot')) + fig.add_shape(type='line', x0=0, y0=0.5, x1=1, y1=0.5, + line=dict(color='gray', width=1, dash='dot')) + + fig.update_layout( + xaxis_title='Probability', + yaxis_title='Impact', + xaxis=dict(range=[0, 1]), + yaxis=dict(range=[0, 1]) + ) + + return fig + +# Dashboard layout +app.layout = html.Div([ + html.H1("AI Trust Evaluation Dashboard", + style={'textAlign': 'center', 'marginBottom': 30}), + + # Control panel + html.Div([ + html.Div([ + html.Label("Model Selection:"), + dcc.Dropdown( + id='model-selector', + options=[ + {'label': 'GPT-4', 'value': 'gpt4'}, + {'label': 'LLaMA-2', 'value': 'llama2'}, + {'label': 'Claude', 'value': 'claude'}, + {'label': 'Custom Model', 'value': 'custom'} + ], + value='gpt4' + ) + ], style={'width': '30%', 'display': 'inline-block'}), + + html.Div([ + html.Label("Time Range:"), + dcc.Dropdown( + id='time-range', + options=[ + {'label': 'Last Hour', 'value': 'hour'}, + {'label': 'Last Day', 'value': 'day'}, + {'label': 'Last Week', 'value': 'week'}, + {'label': 'Last Month', 'value': 'month'} + ], + value='week' + ) + ], style={'width': '30%', 'display': 'inline-block', 'marginLeft': '5%'}), + + html.Div([ + html.Label("View Type:"), + dcc.RadioItems( + id='view-type', + options=[ + {'label': 'Current', 'value': 'current'}, + {'label': 'Historical', 'value': 'historical'}, + {'label': 'Comparison', 'value': 'comparison'} + ], + value='current', + inline=True + ) + ], style={'width': '30%', 'display': 'inline-block', 'marginLeft': '5%'}) + ], style={'marginBottom': 30}), + + # Main dashboard + html.Div([ + # Trust radar chart + html.Div([ + dcc.Graph(id='trust-radar-chart') + ], style={'width': '50%', 'display': 'inline-block'}), + + # Trust timeline + html.Div([ + dcc.Graph(id='trust-timeline') + ], style={'width': '50%', 'display': 'inline-block'}) + ]), + + # Risk matrix and heatmap + html.Div([ + html.Div([ + dcc.Graph(id='risk-matrix') + ], style={'width': '50%', 'display': 'inline-block'}), + + html.Div([ + dcc.Graph(id='trust-heatmap') + ], style={'width': '50%', 'display': 'inline-block'}) + ]), + + # Recommendations and alerts + html.Div([ + html.H3("Key Insights and Recommendations"), + html.Div(id='recommendations', + style={'padding': '20px', 'backgroundColor': '#f0f0f0', 'borderRadius': '5px'}) + ], style={'marginTop': '30px'}) +]) + +# Callbacks for interactive dashboard +@callback( + [Output('trust-radar-chart', 'figure'), + Output('trust-timeline', 'figure'), + Output('risk-matrix', 'figure'), + Output('trust-heatmap', 'figure'), + Output('recommendations', 'children')], + [Input('model-selector', 'value'), + Input('time-range', 'value'), + Input('view-type', 'value')] +) +def update_dashboard(model_id, time_range, view_type): + """Update dashboard based on selections""" + + # Generate sample data (in real implementation, fetch from database/API) + sample_dimension_scores = { + 'reliability': 0.85, + 'safety': 0.92, + 'fairness': 0.78, + 'consistency': 0.88, + 'robustness': 0.81, + 'explainability': 0.75 + } + + # Create visualizations + radar_fig = TrustVisualizationEngine.create_trust_radar_chart( + sample_dimension_scores, "Current Trust Profile" + ) + + # Generate timeline data + timeline_data = generate_sample_timeline_data(time_range) + timeline_fig = TrustVisualizationEngine.create_trust_timeline(timeline_data) + + # Generate risk data + risk_data = generate_sample_risk_data() + risk_fig = TrustVisualizationEngine.create_risk_matrix(risk_data) + + # Generate correlation data + correlation_data = generate_sample_correlation_data() + heatmap_fig = TrustVisualizationEngine.create_trust_heatmap(correlation_data) + + # Generate recommendations + recommendations = generate_sample_recommendations(sample_dimension_scores) + + return radar_fig, timeline_fig, risk_fig, heatmap_fig, recommendations + +def generate_sample_timeline_data(time_range: str) -> pd.DataFrame: + """Generate sample timeline data""" + end_time = datetime.now() + + if time_range == 'hour': + start_time = end_time - timedelta(hours=1) + freq = '5min' + elif time_range == 'day': + start_time = end_time - timedelta(days=1) + freq = '1h' + elif time_range == 'week': + start_time = end_time - timedelta(weeks=1) + freq = '1d' + else: # month + start_time = end_time - timedelta(days=30) + freq = '1d' + + timestamps = pd.date_range(start=start_time, end=end_time, freq=freq) + + # Generate sample trust scores with some variation + np.random.seed(42) + base_score = 0.8 + scores = base_score + np.random.normal(0, 0.05, len(timestamps)) + scores = np.clip(scores, 0, 1) # Keep between 0 and 1 + + # Generate dimension scores + dimensions = [] + for _ in timestamps: + dim_scores = { + 'reliability': np.clip(base_score + np.random.normal(0, 0.03), 0, 1), + 'safety': np.clip(base_score + 0.05 + np.random.normal(0, 0.02), 0, 1), + 'fairness': np.clip(base_score - 0.02 + np.random.normal(0, 0.04), 0, 1) + } + dimensions.append(dim_scores) + + return pd.DataFrame({ + 'timestamp': timestamps, + 'overall_trust': scores, + 'dimensions': dimensions + }) + +def generate_sample_risk_data() -> Dict[str, Any]: + """Generate sample risk data""" + return { + 'all_risks': [ + {'dimension': 'Safety', 'probability': 0.1, 'impact': 0.9, 'category': 'critical'}, + {'dimension': 'Reliability', 'probability': 0.2, 'impact': 0.7, 'category': 'high'}, + {'dimension': 'Fairness', 'probability': 0.3, 'impact': 0.6, 'category': 'medium'}, + {'dimension': 'Privacy', 'probability': 0.15, 'impact': 0.8, 'category': 'high'}, + {'dimension': 'Robustness', 'probability': 0.25, 'impact': 0.5, 'category': 'medium'} + ] + } + +def generate_sample_correlation_data() -> Dict[str, Dict[str, float]]: + """Generate sample correlation data""" + dimensions = ['reliability', 'safety', 'fairness', 'consistency', 'robustness'] + correlation_data = {} + + np.random.seed(42) + for i, dim1 in enumerate(dimensions): + correlation_data[dim1] = {} + for j, dim2 in enumerate(dimensions): + if i == j: + correlation_data[dim1][dim2] = 1.0 + else: + # Generate realistic correlations + correlation = np.random.normal(0, 0.3) + correlation_data[dim1][dim2] = np.clip(correlation, -1, 1) + + return correlation_data + +def generate_sample_recommendations(dimension_scores: Dict[str, float]) -> str: + """Generate sample recommendations""" + recommendations = [] + + for dimension, score in dimension_scores.items(): + if score < 0.7: + recommendations.append(f"⚠️ Low {dimension} score ({score:.2f}). Consider improvement actions.") + elif score < 0.8: + recommendations.append(f"ℹ️ {dimension} score could be improved ({score:.2f}).") + + if not recommendations: + recommendations.append("✅ All trust dimensions are performing well!") + recommendations.append("💡 Consider running stress tests to validate robustness.") + + return html.Ul([html.Li(rec) for rec in recommendations]) + +# Real-time monitoring component +class RealTimeTrustMonitor: + """Real-time trust monitoring with WebSocket updates""" + + def __init__(self): + self.clients = set() + self.monitoring_data = {} + + async def register_client(self, websocket): + """Register WebSocket client""" + self.clients.add(websocket) + + async def unregister_client(self, websocket): + """Unregister WebSocket client""" + self.clients.discard(websocket) + + async def broadcast_update(self, update_data: Dict[str, Any]): + """Broadcast trust updates to all clients""" + if self.clients: + message = json.dumps(update_data) + # In real implementation, send to all WebSocket clients + for client in self.clients.copy(): + try: + await client.send(message) + except: + await self.unregister_client(client) + +# API endpoint for real-time data +@app.server.route('/api/realtime') +async def realtime_endpoint(): + """WebSocket endpoint for real-time trust updates""" + # Implementation would handle WebSocket connections + pass + +# CLI for starting dashboard +def start_dashboard(): + """Start the trust dashboard""" + logger.info("Starting Trust Dashboard") + app.run_server(debug=True, host='0.0.0.0', port=8050) + +if __name__ == '__main__': + start_dashboard() From 971d43f64f7f2e1db0251f491dfa9b4c4dc1a496 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:53:35 +0530 Subject: [PATCH 15/28] Update README.md OpenTrustEval from a simple evaluation tool into a comprehensive trust management platform Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- high_performance_system/core/README.md | 50 ++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/high_performance_system/core/README.md b/high_performance_system/core/README.md index ee9a473..83054ca 100644 --- a/high_performance_system/core/README.md +++ b/high_performance_system/core/README.md @@ -1,4 +1,4 @@ -# 🧠 high_performance_system/core – Functional, Technical & System Design +# 🧠 High_performance_system/core – Functional, Technical & System Design --- @@ -119,6 +119,52 @@ flowchart TD - **Secure:** PII and compliance are first-class concerns. - **Extensible:** New modules can be added with minimal changes. +--- +--- +--- +--- +=================================== + Summary of Revolutionary Features +=================================== + +1. Trust Evolution Timeline +Track trust changes over time +Predict future trust trajectories +Detect anomalies in trust patterns + +2. Cross-Model Correlation Engine +Understand how trust in one model affects others +Identify risk clusters and propagation paths +System-level trust assessment + +3. Trust Decision Matrix +Customizable trust criteria for different stakeholders +Multi-profile evaluation for various contexts +Automated deployment decisions based on trust + +4. Trust Simulation and Stress Testing +Adversarial attack simulation +Data drift testing +Robustness scoring and recommendations + +5. Trust Orchestration Pipeline +End-to-end trust evaluation pipeline +Environment-specific deployment with monitoring +Automated rollback on trust degradation + +6. Trust API and Microservices +Scalable REST API for trust evaluation +Background job processing +Batch evaluation capabilities + +7. Trust Visualization Dashboard +Interactive radar charts and timelines +Risk matrices and correlation heatmaps +Real-time monitoring with WebSocket updates + +Above 7 features transform OpenTrustEval from a simple evaluation tool into a comprehensive trust management platform that addresses the +real-world challenges of deploying trustworthy AI systems at scale. + --- -For more details, see the code and docstrings in each file, and refer to the main system README for integration and usage examples. \ No newline at end of file +For more details, see the code and docstrings in each file, and refer to the main system README for integration and usage examples. From 4cf8d18db3b64706167f0a78b7cb3320b63c37ab Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Fri, 25 Jul 2025 21:54:58 +0530 Subject: [PATCH 16/28] Update README.md Updates: OpenTrustEval from a simple evaluation tool into a comprehensive trust management platform Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- high_performance_system/core/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/high_performance_system/core/README.md b/high_performance_system/core/README.md index 83054ca..9f9f24d 100644 --- a/high_performance_system/core/README.md +++ b/high_performance_system/core/README.md @@ -123,10 +123,9 @@ flowchart TD --- --- --- -=================================== - Summary of Revolutionary Features -=================================== + Summary of Revolutionary Features +--- 1. Trust Evolution Timeline Track trust changes over time Predict future trust trajectories From 6abd328249d0aa20702b95853c69d9962a5810bc Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 02:10:22 +0000 Subject: [PATCH 17/28] Retest web UI and validate libraries --- =0.3.0 | 43 ++++ =0.4.27 | 1 + =1.1.0 | 12 + =1.26.0 | 19 ++ =1.3.1 | 45 ++++ =2.0.1 | 1 + =2.8.0 | 45 ++++ =3.0.10 | 9 + =3.7.0 | 24 ++ =4.9.0 | 6 + data_engineering/requirements_dashboard.txt | 2 +- launch_workflow_webui.py | 16 ++ ...low_diagnostic_report_20250726_020600.json | 242 ++++++++++++++++++ 13 files changed, 464 insertions(+), 1 deletion(-) create mode 100644 =0.3.0 create mode 100644 =0.4.27 create mode 100644 =1.1.0 create mode 100644 =1.26.0 create mode 100644 =1.3.1 create mode 100644 =2.0.1 create mode 100644 =2.8.0 create mode 100644 =3.0.10 create mode 100644 =3.7.0 create mode 100644 =4.9.0 create mode 100644 launch_workflow_webui.py create mode 100644 workflow_diagnostic_report_20250726_020600.json diff --git a/=0.3.0 b/=0.3.0 new file mode 100644 index 0000000..9aedf66 --- /dev/null +++ b/=0.3.0 @@ -0,0 +1,43 @@ +Collecting streamlit-option-menu + Downloading streamlit_option_menu-0.4.0-py3-none-any.whl.metadata (2.5 kB) +Requirement already satisfied: streamlit>=1.36 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit-option-menu) (1.47.1) +Requirement already satisfied: altair<6,>=4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (5.5.0) +Requirement already satisfied: blinker<2,>=1.5.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (1.9.0) +Requirement already satisfied: cachetools<7,>=4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (5.5.2) +Requirement already satisfied: click<9,>=7.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (8.2.1) +Requirement already satisfied: numpy<3,>=1.23 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (1.26.4) +Requirement already satisfied: packaging<26,>=20 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (25.0) +Requirement already satisfied: pandas<3,>=1.4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (2.2.2) +Requirement already satisfied: pillow<12,>=7.1.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (11.3.0) +Requirement already satisfied: protobuf<7,>=3.20 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (4.25.8) +Requirement already satisfied: pyarrow>=7.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (21.0.0) +Requirement already satisfied: requests<3,>=2.27 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (2.31.0) +Requirement already satisfied: tenacity<10,>=8.1.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (9.1.2) +Requirement already satisfied: toml<2,>=0.10.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (0.10.2) +Requirement already satisfied: typing-extensions<5,>=4.4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (4.14.1) +Requirement already satisfied: watchdog<7,>=2.1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (6.0.0) +Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (3.1.45) +Requirement already satisfied: pydeck<1,>=0.8.0b4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (0.9.1) +Requirement already satisfied: tornado!=6.5.0,<7,>=6.0.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (6.5.1) +Requirement already satisfied: jinja2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (3.1.6) +Requirement already satisfied: jsonschema>=3.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (4.25.0) +Requirement already satisfied: narwhals>=1.14.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (1.48.1) +Requirement already satisfied: gitdb<5,>=4.0.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.36->streamlit-option-menu) (4.0.12) +Requirement already satisfied: smmap<6,>=3.0.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.36->streamlit-option-menu) (5.0.2) +Requirement already satisfied: python-dateutil>=2.8.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2.9.0.post0) +Requirement already satisfied: pytz>=2020.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2025.2) +Requirement already satisfied: tzdata>=2022.7 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2025.2) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2025.7.14) +Requirement already satisfied: MarkupSafe>=2.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jinja2->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (3.0.2) +Requirement already satisfied: attrs>=22.2.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (25.3.0) +Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (2025.4.1) +Requirement already satisfied: referencing>=0.28.4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (0.36.2) +Requirement already satisfied: rpds-py>=0.7.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (0.26.0) +Requirement already satisfied: six>=1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (1.17.0) +Downloading streamlit_option_menu-0.4.0-py3-none-any.whl (829 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 829.3/829.3 kB 22.1 MB/s eta 0:00:00 +Installing collected packages: streamlit-option-menu +Successfully installed streamlit-option-menu-0.4.0 diff --git a/=0.4.27 b/=0.4.27 new file mode 100644 index 0000000..1102861 --- /dev/null +++ b/=0.4.27 @@ -0,0 +1 @@ +Requirement already satisfied: python-magic in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (0.4.27) diff --git a/=1.1.0 b/=1.1.0 new file mode 100644 index 0000000..55de4d4 --- /dev/null +++ b/=1.1.0 @@ -0,0 +1,12 @@ +Collecting pyreadstat + Downloading pyreadstat-1.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB) +Requirement already satisfied: pandas>=1.2.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pyreadstat) (2.2.2) +Requirement already satisfied: numpy>=1.26.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (1.26.4) +Requirement already satisfied: python-dateutil>=2.8.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (2.9.0.post0) +Requirement already satisfied: pytz>=2020.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (2025.2) +Requirement already satisfied: tzdata>=2022.7 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (2025.2) +Requirement already satisfied: six>=1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas>=1.2.0->pyreadstat) (1.17.0) +Downloading pyreadstat-1.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (661 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 661.2/661.2 kB 18.6 MB/s eta 0:00:00 +Installing collected packages: pyreadstat +Successfully installed pyreadstat-1.3.0 diff --git a/=1.26.0 b/=1.26.0 new file mode 100644 index 0000000..0670781 --- /dev/null +++ b/=1.26.0 @@ -0,0 +1,19 @@ +Collecting boto3 + Downloading boto3-1.39.13-py3-none-any.whl.metadata (6.7 kB) +Collecting botocore<1.40.0,>=1.39.13 (from boto3) + Downloading botocore-1.39.13-py3-none-any.whl.metadata (5.7 kB) +Collecting jmespath<2.0.0,>=0.7.1 (from boto3) + Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB) +Collecting s3transfer<0.14.0,>=0.13.0 (from boto3) + Downloading s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB) +Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from botocore<1.40.0,>=1.39.13->boto3) (2.9.0.post0) +Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from botocore<1.40.0,>=1.39.13->boto3) (2.5.0) +Requirement already satisfied: six>=1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.40.0,>=1.39.13->boto3) (1.17.0) +Downloading boto3-1.39.13-py3-none-any.whl (139 kB) +Downloading botocore-1.39.13-py3-none-any.whl (13.9 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.9/13.9 MB 43.6 MB/s eta 0:00:00 +Downloading jmespath-1.0.1-py3-none-any.whl (20 kB) +Downloading s3transfer-0.13.1-py3-none-any.whl (85 kB) +Installing collected packages: jmespath, botocore, s3transfer, boto3 + +Successfully installed boto3-1.39.13 botocore-1.39.13 jmespath-1.0.1 s3transfer-0.13.1 diff --git a/=1.3.1 b/=1.3.1 new file mode 100644 index 0000000..47f8c67 --- /dev/null +++ b/=1.3.1 @@ -0,0 +1,45 @@ +Collecting pydrive + Downloading PyDrive-1.3.1.tar.gz (987 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 987.4/987.4 kB 26.0 MB/s eta 0:00:00 + Preparing metadata (setup.py): started + Preparing metadata (setup.py): finished with status 'done' +Collecting google-api-python-client>=1.2 (from pydrive) + Downloading google_api_python_client-2.177.0-py3-none-any.whl.metadata (7.0 kB) +Collecting oauth2client>=4.0.0 (from pydrive) + Downloading oauth2client-4.1.3-py2.py3-none-any.whl.metadata (1.2 kB) +Requirement already satisfied: PyYAML>=3.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pydrive) (6.0.1) +Collecting httplib2<1.0.0,>=0.19.0 (from google-api-python-client>=1.2->pydrive) + Downloading httplib2-0.22.0-py3-none-any.whl.metadata (2.6 kB) +Requirement already satisfied: google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-python-client>=1.2->pydrive) (2.40.3) +Collecting google-auth-httplib2<1.0.0,>=0.2.0 (from google-api-python-client>=1.2->pydrive) + Downloading google_auth_httplib2-0.2.0-py2.py3-none-any.whl.metadata (2.2 kB) +Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-python-client>=1.2->pydrive) (2.25.1) +Requirement already satisfied: uritemplate<5,>=3.0.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-python-client>=1.2->pydrive) (4.2.0) +Requirement already satisfied: googleapis-common-protos<2.0.0,>=1.56.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (1.70.0) +Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.19.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (4.25.8) +Requirement already satisfied: proto-plus<2.0.0,>=1.22.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (1.26.1) +Requirement already satisfied: requests<3.0.0,>=2.18.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (2.31.0) +Requirement already satisfied: cachetools<6.0,>=2.0.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (5.5.2) +Requirement already satisfied: pyasn1-modules>=0.2.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (0.4.2) +Requirement already satisfied: rsa<5,>=3.1.4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (4.9.1) +Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from httplib2<1.0.0,>=0.19.0->google-api-python-client>=1.2->pydrive) (3.2.3) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (2025.7.14) +Requirement already satisfied: pyasn1>=0.1.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (0.6.1) +Requirement already satisfied: six>=1.6.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from oauth2client>=4.0.0->pydrive) (1.17.0) +Downloading google_api_python_client-2.177.0-py3-none-any.whl (13.7 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.7/13.7 MB 92.1 MB/s eta 0:00:00 +Downloading google_auth_httplib2-0.2.0-py2.py3-none-any.whl (9.3 kB) +Downloading httplib2-0.22.0-py3-none-any.whl (96 kB) +Downloading oauth2client-4.1.3-py2.py3-none-any.whl (98 kB) +Building wheels for collected packages: pydrive + Building wheel for pydrive (setup.py): started + Building wheel for pydrive (setup.py): finished with status 'done' + Created wheel for pydrive: filename=pydrive-1.3.1-py3-none-any.whl size=27539 sha256=0837f0594e858cf75dd820443de7c32c35f56eb6d5136425711b8aed656701ea + Stored in directory: /home/jules/.cache/pip/wheels/6c/10/da/a5b513f5b3916fc391c20ee7b4633e5cf3396d570cdd74970f +Successfully built pydrive +Installing collected packages: httplib2, oauth2client, google-auth-httplib2, google-api-python-client, pydrive + +Successfully installed google-api-python-client-2.177.0 google-auth-httplib2-0.2.0 httplib2-0.22.0 oauth2client-4.1.3 pydrive-1.3.1 diff --git a/=2.0.1 b/=2.0.1 new file mode 100644 index 0000000..478e234 --- /dev/null +++ b/=2.0.1 @@ -0,0 +1 @@ +Requirement already satisfied: xlrd in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (2.0.2) diff --git a/=2.8.0 b/=2.8.0 new file mode 100644 index 0000000..23c089b --- /dev/null +++ b/=2.8.0 @@ -0,0 +1,45 @@ +Collecting google-cloud-storage + Downloading google_cloud_storage-3.2.0-py3-none-any.whl.metadata (13 kB) +Collecting google-auth<3.0.0,>=2.26.1 (from google-cloud-storage) + Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB) +Collecting google-api-core<3.0.0,>=2.15.0 (from google-cloud-storage) + Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB) +Collecting google-cloud-core<3.0.0,>=2.4.2 (from google-cloud-storage) + Downloading google_cloud_core-2.4.3-py2.py3-none-any.whl.metadata (2.7 kB) +Collecting google-resumable-media<3.0.0,>=2.7.2 (from google-cloud-storage) + Downloading google_resumable_media-2.7.2-py2.py3-none-any.whl.metadata (2.2 kB) +Requirement already satisfied: requests<3.0.0,>=2.22.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-cloud-storage) (2.31.0) +Collecting google-crc32c<2.0.0,>=1.1.3 (from google-cloud-storage) + Downloading google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB) +Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=2.15.0->google-cloud-storage) + Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB) +Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.19.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core<3.0.0,>=2.15.0->google-cloud-storage) (4.25.8) +Requirement already satisfied: proto-plus<2.0.0,>=1.22.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core<3.0.0,>=2.15.0->google-cloud-storage) (1.26.1) +Collecting cachetools<6.0,>=2.0.0 (from google-auth<3.0.0,>=2.26.1->google-cloud-storage) + Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB) +Collecting pyasn1-modules>=0.2.1 (from google-auth<3.0.0,>=2.26.1->google-cloud-storage) + Downloading pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB) +Collecting rsa<5,>=3.1.4 (from google-auth<3.0.0,>=2.26.1->google-cloud-storage) + Downloading rsa-4.9.1-py3-none-any.whl.metadata (5.6 kB) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (2025.7.14) +Requirement already satisfied: pyasn1>=0.1.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.26.1->google-cloud-storage) (0.6.1) +Downloading google_cloud_storage-3.2.0-py3-none-any.whl (176 kB) +Downloading google_api_core-2.25.1-py3-none-any.whl (160 kB) +Downloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB) +Downloading cachetools-5.5.2-py3-none-any.whl (10 kB) +Downloading google_cloud_core-2.4.3-py2.py3-none-any.whl (29 kB) +Downloading google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32 kB) +Downloading google_resumable_media-2.7.2-py2.py3-none-any.whl (81 kB) +Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB) +Downloading rsa-4.9.1-py3-none-any.whl (34 kB) +Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB) +Installing collected packages: rsa, pyasn1-modules, googleapis-common-protos, google-crc32c, cachetools, google-resumable-media, google-auth, google-api-core, google-cloud-core, google-cloud-storage + Attempting uninstall: cachetools + Found existing installation: cachetools 6.1.0 + Uninstalling cachetools-6.1.0: + Successfully uninstalled cachetools-6.1.0 + +Successfully installed cachetools-5.5.2 google-api-core-2.25.1 google-auth-2.40.3 google-cloud-core-2.4.3 google-cloud-storage-3.2.0 google-crc32c-1.7.1 google-resumable-media-2.7.2 googleapis-common-protos-1.70.0 pyasn1-modules-0.4.2 rsa-4.9.1 diff --git a/=3.0.10 b/=3.0.10 new file mode 100644 index 0000000..1b5b713 --- /dev/null +++ b/=3.0.10 @@ -0,0 +1,9 @@ +Collecting openpyxl + Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB) +Collecting et-xmlfile (from openpyxl) + Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB) +Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB) +Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB) +Installing collected packages: et-xmlfile, openpyxl + +Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5 diff --git a/=3.7.0 b/=3.7.0 new file mode 100644 index 0000000..96f4768 --- /dev/null +++ b/=3.7.0 @@ -0,0 +1,24 @@ +Collecting tables + Downloading tables-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB) +Requirement already satisfied: numpy>=1.20.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (1.26.4) +Requirement already satisfied: numexpr>=2.6.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (2.11.0) +Requirement already satisfied: packaging in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (25.0) +Requirement already satisfied: py-cpuinfo in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (9.0.0) +Collecting blosc2>=2.3.0 (from tables) + Downloading blosc2-3.6.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB) +Requirement already satisfied: typing-extensions>=4.4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (4.14.1) +Requirement already satisfied: ndindex in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (1.10.0) +Requirement already satisfied: msgpack in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (1.1.1) +Requirement already satisfied: platformdirs in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (4.3.8) +Requirement already satisfied: requests in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (2.31.0) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (2025.7.14) +Downloading tables-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.5 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.5/7.5 MB 88.3 MB/s eta 0:00:00 +Downloading blosc2-3.6.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (4.4 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/4.4 MB 95.1 MB/s eta 0:00:00 +Installing collected packages: blosc2, tables + +Successfully installed blosc2-3.6.1 tables-3.10.2 diff --git a/=4.9.0 b/=4.9.0 new file mode 100644 index 0000000..6e0c295 --- /dev/null +++ b/=4.9.0 @@ -0,0 +1,6 @@ +Collecting lxml + Downloading lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.6 kB) +Downloading lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (5.3 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.3/5.3 MB 29.9 MB/s eta 0:00:00 +Installing collected packages: lxml +Successfully installed lxml-6.0.0 diff --git a/data_engineering/requirements_dashboard.txt b/data_engineering/requirements_dashboard.txt index c0f7a55..f7e9d7b 100644 --- a/data_engineering/requirements_dashboard.txt +++ b/data_engineering/requirements_dashboard.txt @@ -9,7 +9,7 @@ streamlit>=1.28.0 plotly>=5.15.0 # Database -sqlite3 # Usually included with Python +# sqlite3 # Usually included with Python # Optional: For enhanced features # scikit-learn>=1.1.0 # For advanced trust scoring diff --git a/launch_workflow_webui.py b/launch_workflow_webui.py new file mode 100644 index 0000000..03efd58 --- /dev/null +++ b/launch_workflow_webui.py @@ -0,0 +1,16 @@ +import subprocess +import sys + +def main(): + """ + Launches the Unified Workflow Web UI. + """ + try: + subprocess.run([sys.executable, "-m", "streamlit", "run", "workflow_webui.py"]) + except FileNotFoundError: + print("Error: 'streamlit' command not found. Please make sure Streamlit is installed.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() diff --git a/workflow_diagnostic_report_20250726_020600.json b/workflow_diagnostic_report_20250726_020600.json new file mode 100644 index 0000000..4c91acf --- /dev/null +++ b/workflow_diagnostic_report_20250726_020600.json @@ -0,0 +1,242 @@ +{ + "timestamp": "2025-07-26T02:06:00.806086", + "summary": { + "total_checks": 13, + "passed": 10, + "failed": 0, + "warnings": 3, + "skipped": 0 + }, + "results": [ + { + "component": "System Environment", + "status": "PASS", + "message": "System environment is ready", + "duration": 1.7772915363311768, + "timestamp": "2025-07-26T02:05:59.214582", + "details": { + "python_version": "3.12.11", + "fastapi_available": true, + "uvicorn_available": true, + "streamlit_available": true, + "pandas_available": true, + "numpy_available": true, + "plotly_available": true, + "requests_available": true, + "asyncio_available": true, + "logging_available": true, + "json_available": true, + "memory_available": "7.16 GB", + "cpu_count": 4, + "disk_free": "2.58 GB", + "working_directory": "/app", + "project_root": "/app" + } + }, + { + "component": "Data Uploads", + "status": "PASS", + "message": "Data uploads system is functional", + "duration": 0.014145374298095703, + "timestamp": "2025-07-26T02:05:59.229098", + "details": { + "uploads_dir_exists": true, + "write_permissions": true, + "dataset_connector_available": false, + "dataset_connector_error": "No module named 'data_engineering'" + } + }, + { + "component": "Data Engineering", + "status": "PASS", + "message": "Data engineering components are functional", + "duration": 0.12368917465209961, + "timestamp": "2025-07-26T02:05:59.353004", + "details": { + "data_engineering.advanced_trust_scoring_available": false, + "data_engineering.advanced_trust_scoring_error": "No module named 'data_engineering'", + "data_engineering.cleanlab_integration_available": false, + "data_engineering.cleanlab_integration_error": "No module named 'data_engineering'", + "data_engineering.trust_scoring_dashboard_available": false, + "data_engineering.trust_scoring_dashboard_error": "No module named 'data_engineering'", + "data_engineering.data_lifecycle_available": false, + "data_engineering.data_lifecycle_error": "No module named 'data_engineering'", + "trust_scoring_test": "FAIL", + "trust_scoring_error": "No module named 'data_engineering'", + "database_connectivity": true + } + }, + { + "component": "LLM Engineering", + "status": "PASS", + "message": "LLM engineering components are functional", + "duration": 0.0006618499755859375, + "timestamp": "2025-07-26T02:05:59.353965", + "details": { + "llm_engineering.llm_lifecycle_available": false, + "llm_engineering.llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_engineering.providers.base_provider_available": false, + "llm_engineering.providers.base_provider_error": "No module named 'llm_engineering'", + "llm_engineering.providers.llama_factory_provider_available": false, + "llm_engineering.providers.llama_factory_provider_error": "No module named 'llm_engineering'", + "llm_lifecycle_manager": "FAIL", + "llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_config_exists": true + } + }, + { + "component": "High Performance System", + "status": "PASS", + "message": "High performance system components are functional", + "duration": 0.0008711814880371094, + "timestamp": "2025-07-26T02:05:59.355021", + "details": { + "high_performance_system.core.ultimate_moe_system_available": false, + "high_performance_system.core.ultimate_moe_system_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_expert_ensemble_available": false, + "high_performance_system.core.advanced_expert_ensemble_error": "No module named 'high_performance_system'", + "high_performance_system.core.intelligent_domain_router_available": false, + "high_performance_system.core.intelligent_domain_router_error": "No module named 'high_performance_system'", + "high_performance_system.core.enhanced_dataset_profiler_available": false, + "high_performance_system.core.enhanced_dataset_profiler_error": "No module named 'high_performance_system'", + "high_performance_system.core.comprehensive_pii_detector_available": false, + "high_performance_system.core.comprehensive_pii_detector_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_trust_scorer_available": false, + "high_performance_system.core.advanced_trust_scorer_error": "No module named 'high_performance_system'", + "moe_system_test": "FAIL", + "moe_system_error": "No module named 'high_performance_system'", + "expert_ensemble": "FAIL", + "expert_ensemble_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Security System", + "status": "PASS", + "message": "Security system components are functional", + "duration": 0.00035881996154785156, + "timestamp": "2025-07-26T02:05:59.355529", + "details": { + "security.auth_manager_available": false, + "security.auth_manager_error": "No module named 'security'", + "security.secrets_manager_available": false, + "security.secrets_manager_error": "No module named 'security'", + "security.security_monitor_available": false, + "security.security_monitor_error": "No module named 'security'", + "security_webui": "FAIL", + "security_webui_error": "No module named 'security'" + } + }, + { + "component": "MCP Server", + "status": "WARNING", + "message": "MCP server check completed", + "duration": 0.007213115692138672, + "timestamp": "2025-07-26T02:05:59.362933", + "details": { + "mcp_server/server.py_exists": false, + "mcp_server/client.py_exists": true, + "mcp_server/config.py_exists": true, + "mcp_server_running": false, + "mcp_server_error": "Server not responding" + } + }, + { + "component": "Cloud APIs", + "status": "PASS", + "message": "Cloud APIs check completed", + "duration": 0.23957180976867676, + "timestamp": "2025-07-26T02:05:59.602769", + "details": { + "cloudscale_apis/docs/cloud_provider_integration.md_exists": true, + "cloudscale_apis/endpoints/_exists": true, + "cloudscale_apis/webhooks/_exists": true, + "aws_sdk_available": true, + "azure_sdk_available": false, + "gcp_sdk_available": true + } + }, + { + "component": "Third-party Integrations", + "status": "PASS", + "message": "Third-party integrations check completed", + "duration": 0.0004413127899169922, + "timestamp": "2025-07-26T02:05:59.603562", + "details": { + "thirdparty_integrations.endpoints.verify_realtime_available": false, + "thirdparty_integrations.endpoints.verify_realtime_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.endpoints.verify_batch_available": false, + "thirdparty_integrations.endpoints.verify_batch_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.webhooks.verify_webhook_available": false, + "thirdparty_integrations.webhooks.verify_webhook_error": "No module named 'thirdparty_integrations'" + } + }, + { + "component": "Tests", + "status": "WARNING", + "message": "Tests check completed", + "duration": 1.1941814422607422, + "timestamp": "2025-07-26T02:06:00.797914", + "details": { + "test_high_performance_system.py_exists": true, + "simple_unit_test.py_exists": false, + "tests/test_advanced_pipeline.py_exists": true, + "simple_test_run": "FAIL", + "test_error": "ERROR: file or directory not found: simple_unit_test.py\n\n" + } + }, + { + "component": "Plugins", + "status": "PASS", + "message": "Plugins check completed", + "duration": 0.0003821849822998047, + "timestamp": "2025-07-26T02:06:00.798560", + "details": { + "plugin_loader": "FAIL", + "plugin_loader_error": "No module named 'plugins'", + "plugins/example_plugin.py_exists": true, + "plugins/hallucination_detector.py_exists": true, + "plugins/eu_gdpr_embed.py_exists": true + } + }, + { + "component": "Analytics & Dashboards", + "status": "PASS", + "message": "Analytics and dashboards check completed", + "duration": 0.0002613067626953125, + "timestamp": "2025-07-26T02:06:00.798988", + "details": { + "high_performance_system/analytics/ultimate_analytics_dashboard.py_exists": true, + "high_performance_system/analytics/sme_dashboard.py_exists": true, + "data_engineering/trust_scoring_dashboard.py_exists": true, + "operation_sindoor_dashboard.py_exists": false, + "ultimate_dashboard": "FAIL", + "ultimate_dashboard_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Production Server", + "status": "WARNING", + "message": "Production server check completed", + "duration": 0.006836891174316406, + "timestamp": "2025-07-26T02:06:00.805957", + "details": { + "superfast_production_server.py_exists": true, + "ote_api.py_exists": false, + "production_server_running": false, + "production_server_error": "Server not responding", + "performance_endpoint": "FAIL" + } + } + ], + "recommendations": [ + { + "priority": "MEDIUM", + "action": "Address warnings", + "components": [ + "MCP Server", + "Tests", + "Production Server" + ] + } + ] +} \ No newline at end of file From df27c76df796f9a701f27554b2a64bd3638bd462 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 12:09:11 +0000 Subject: [PATCH 18/28] Retest web UI and validate libraries From 1c2765493318eed01e2d8af21714d8a00521dee8 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 12:10:18 +0000 Subject: [PATCH 19/28] Retest web UI and validate libraries From 6f7f4720efb447a841f5a184be462f95c256ded8 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 12:10:28 +0000 Subject: [PATCH 20/28] Retest web UI and validate libraries From 482f7269122ff1303f22ca73df55e35973039e5c Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Sat, 26 Jul 2025 18:11:19 +0530 Subject: [PATCH 21/28] testing scripts move into tests folder --- =0.3.0 => tests/=0.3.0 | 0 =0.4.27 => tests/=0.4.27 | 0 =1.1.0 => tests/=1.1.0 | 0 =1.26.0 => tests/=1.26.0 | 0 =1.3.1 => tests/=1.3.1 | 0 =2.0.1 => tests/=2.0.1 | 0 =2.8.0 => tests/=2.8.0 | 0 =3.0.10 => tests/=3.0.10 | 0 =3.7.0 => tests/=3.7.0 | 0 =4.9.0 => tests/=4.9.0 | 0 .../Custom_Cleanlab_Pipeline_Demo.ipynb | 0 .../cleanlab_plugin_test_output.txt | Bin .../custom_cleanlab_pipeline.py | 0 .../custom_cleanlab_webui.py | 0 ..._world_trust_scoring_report_20250723_224607.json | 0 ..._world_trust_scoring_report_20250723_225126.json | 0 ..._world_trust_scoring_report_20250723_225309.json | 0 ..._world_trust_scoring_report_20250723_225418.json | 0 ..._world_trust_scoring_report_20250723_225548.json | 0 ..._world_trust_scoring_report_20250723_225707.json | 0 ..._world_trust_scoring_report_20250723_225817.json | 0 ..._world_trust_scoring_report_20250723_225831.json | 0 ..._world_trust_scoring_report_20250723_225943.json | 0 .../test_high_performance_system.py | 0 .../test_moe_integration.py | 0 .../test_operation_sindoor_realtime.py | 0 26 files changed, 0 insertions(+), 0 deletions(-) rename =0.3.0 => tests/=0.3.0 (100%) rename =0.4.27 => tests/=0.4.27 (100%) rename =1.1.0 => tests/=1.1.0 (100%) rename =1.26.0 => tests/=1.26.0 (100%) rename =1.3.1 => tests/=1.3.1 (100%) rename =2.0.1 => tests/=2.0.1 (100%) rename =2.8.0 => tests/=2.8.0 (100%) rename =3.0.10 => tests/=3.0.10 (100%) rename =3.7.0 => tests/=3.7.0 (100%) rename =4.9.0 => tests/=4.9.0 (100%) rename Custom_Cleanlab_Pipeline_Demo.ipynb => tests/Custom_Cleanlab_Pipeline_Demo.ipynb (100%) rename cleanlab_plugin_test_output.txt => tests/cleanlab_plugin_test_output.txt (100%) rename custom_cleanlab_pipeline.py => tests/custom_cleanlab_pipeline.py (100%) rename custom_cleanlab_webui.py => tests/custom_cleanlab_webui.py (100%) rename real_world_trust_scoring_report_20250723_224607.json => tests/real_world_trust_scoring_report_20250723_224607.json (100%) rename real_world_trust_scoring_report_20250723_225126.json => tests/real_world_trust_scoring_report_20250723_225126.json (100%) rename real_world_trust_scoring_report_20250723_225309.json => tests/real_world_trust_scoring_report_20250723_225309.json (100%) rename real_world_trust_scoring_report_20250723_225418.json => tests/real_world_trust_scoring_report_20250723_225418.json (100%) rename real_world_trust_scoring_report_20250723_225548.json => tests/real_world_trust_scoring_report_20250723_225548.json (100%) rename real_world_trust_scoring_report_20250723_225707.json => tests/real_world_trust_scoring_report_20250723_225707.json (100%) rename real_world_trust_scoring_report_20250723_225817.json => tests/real_world_trust_scoring_report_20250723_225817.json (100%) rename real_world_trust_scoring_report_20250723_225831.json => tests/real_world_trust_scoring_report_20250723_225831.json (100%) rename real_world_trust_scoring_report_20250723_225943.json => tests/real_world_trust_scoring_report_20250723_225943.json (100%) rename test_high_performance_system.py => tests/test_high_performance_system.py (100%) rename test_moe_integration.py => tests/test_moe_integration.py (100%) rename test_operation_sindoor_realtime.py => tests/test_operation_sindoor_realtime.py (100%) diff --git a/=0.3.0 b/tests/=0.3.0 similarity index 100% rename from =0.3.0 rename to tests/=0.3.0 diff --git a/=0.4.27 b/tests/=0.4.27 similarity index 100% rename from =0.4.27 rename to tests/=0.4.27 diff --git a/=1.1.0 b/tests/=1.1.0 similarity index 100% rename from =1.1.0 rename to tests/=1.1.0 diff --git a/=1.26.0 b/tests/=1.26.0 similarity index 100% rename from =1.26.0 rename to tests/=1.26.0 diff --git a/=1.3.1 b/tests/=1.3.1 similarity index 100% rename from =1.3.1 rename to tests/=1.3.1 diff --git a/=2.0.1 b/tests/=2.0.1 similarity index 100% rename from =2.0.1 rename to tests/=2.0.1 diff --git a/=2.8.0 b/tests/=2.8.0 similarity index 100% rename from =2.8.0 rename to tests/=2.8.0 diff --git a/=3.0.10 b/tests/=3.0.10 similarity index 100% rename from =3.0.10 rename to tests/=3.0.10 diff --git a/=3.7.0 b/tests/=3.7.0 similarity index 100% rename from =3.7.0 rename to tests/=3.7.0 diff --git a/=4.9.0 b/tests/=4.9.0 similarity index 100% rename from =4.9.0 rename to tests/=4.9.0 diff --git a/Custom_Cleanlab_Pipeline_Demo.ipynb b/tests/Custom_Cleanlab_Pipeline_Demo.ipynb similarity index 100% rename from Custom_Cleanlab_Pipeline_Demo.ipynb rename to tests/Custom_Cleanlab_Pipeline_Demo.ipynb diff --git a/cleanlab_plugin_test_output.txt b/tests/cleanlab_plugin_test_output.txt similarity index 100% rename from cleanlab_plugin_test_output.txt rename to tests/cleanlab_plugin_test_output.txt diff --git a/custom_cleanlab_pipeline.py b/tests/custom_cleanlab_pipeline.py similarity index 100% rename from custom_cleanlab_pipeline.py rename to tests/custom_cleanlab_pipeline.py diff --git a/custom_cleanlab_webui.py b/tests/custom_cleanlab_webui.py similarity index 100% rename from custom_cleanlab_webui.py rename to tests/custom_cleanlab_webui.py diff --git a/real_world_trust_scoring_report_20250723_224607.json b/tests/real_world_trust_scoring_report_20250723_224607.json similarity index 100% rename from real_world_trust_scoring_report_20250723_224607.json rename to tests/real_world_trust_scoring_report_20250723_224607.json diff --git a/real_world_trust_scoring_report_20250723_225126.json b/tests/real_world_trust_scoring_report_20250723_225126.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225126.json rename to tests/real_world_trust_scoring_report_20250723_225126.json diff --git a/real_world_trust_scoring_report_20250723_225309.json b/tests/real_world_trust_scoring_report_20250723_225309.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225309.json rename to tests/real_world_trust_scoring_report_20250723_225309.json diff --git a/real_world_trust_scoring_report_20250723_225418.json b/tests/real_world_trust_scoring_report_20250723_225418.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225418.json rename to tests/real_world_trust_scoring_report_20250723_225418.json diff --git a/real_world_trust_scoring_report_20250723_225548.json b/tests/real_world_trust_scoring_report_20250723_225548.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225548.json rename to tests/real_world_trust_scoring_report_20250723_225548.json diff --git a/real_world_trust_scoring_report_20250723_225707.json b/tests/real_world_trust_scoring_report_20250723_225707.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225707.json rename to tests/real_world_trust_scoring_report_20250723_225707.json diff --git a/real_world_trust_scoring_report_20250723_225817.json b/tests/real_world_trust_scoring_report_20250723_225817.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225817.json rename to tests/real_world_trust_scoring_report_20250723_225817.json diff --git a/real_world_trust_scoring_report_20250723_225831.json b/tests/real_world_trust_scoring_report_20250723_225831.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225831.json rename to tests/real_world_trust_scoring_report_20250723_225831.json diff --git a/real_world_trust_scoring_report_20250723_225943.json b/tests/real_world_trust_scoring_report_20250723_225943.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225943.json rename to tests/real_world_trust_scoring_report_20250723_225943.json diff --git a/test_high_performance_system.py b/tests/test_high_performance_system.py similarity index 100% rename from test_high_performance_system.py rename to tests/test_high_performance_system.py diff --git a/test_moe_integration.py b/tests/test_moe_integration.py similarity index 100% rename from test_moe_integration.py rename to tests/test_moe_integration.py diff --git a/test_operation_sindoor_realtime.py b/tests/test_operation_sindoor_realtime.py similarity index 100% rename from test_operation_sindoor_realtime.py rename to tests/test_operation_sindoor_realtime.py From 0239ecbe1d84e4ccfbc2df9cb8b056cf8edb6032 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 12:43:17 +0000 Subject: [PATCH 22/28] I am moving the test files. --- .../real_world_trust_scoring_report_20250723_224607.json | 0 .../real_world_trust_scoring_report_20250723_225126.json | 0 .../real_world_trust_scoring_report_20250723_225309.json | 0 .../real_world_trust_scoring_report_20250723_225418.json | 0 .../real_world_trust_scoring_report_20250723_225548.json | 0 .../real_world_trust_scoring_report_20250723_225707.json | 0 .../real_world_trust_scoring_report_20250723_225817.json | 0 .../real_world_trust_scoring_report_20250723_225831.json | 0 .../real_world_trust_scoring_report_20250723_225943.json | 0 9 files changed, 0 insertions(+), 0 deletions(-) rename real_world_trust_scoring_report_20250723_224607.json => tests/real_world_trust_scoring_report_20250723_224607.json (100%) rename real_world_trust_scoring_report_20250723_225126.json => tests/real_world_trust_scoring_report_20250723_225126.json (100%) rename real_world_trust_scoring_report_20250723_225309.json => tests/real_world_trust_scoring_report_20250723_225309.json (100%) rename real_world_trust_scoring_report_20250723_225418.json => tests/real_world_trust_scoring_report_20250723_225418.json (100%) rename real_world_trust_scoring_report_20250723_225548.json => tests/real_world_trust_scoring_report_20250723_225548.json (100%) rename real_world_trust_scoring_report_20250723_225707.json => tests/real_world_trust_scoring_report_20250723_225707.json (100%) rename real_world_trust_scoring_report_20250723_225817.json => tests/real_world_trust_scoring_report_20250723_225817.json (100%) rename real_world_trust_scoring_report_20250723_225831.json => tests/real_world_trust_scoring_report_20250723_225831.json (100%) rename real_world_trust_scoring_report_20250723_225943.json => tests/real_world_trust_scoring_report_20250723_225943.json (100%) diff --git a/real_world_trust_scoring_report_20250723_224607.json b/tests/real_world_trust_scoring_report_20250723_224607.json similarity index 100% rename from real_world_trust_scoring_report_20250723_224607.json rename to tests/real_world_trust_scoring_report_20250723_224607.json diff --git a/real_world_trust_scoring_report_20250723_225126.json b/tests/real_world_trust_scoring_report_20250723_225126.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225126.json rename to tests/real_world_trust_scoring_report_20250723_225126.json diff --git a/real_world_trust_scoring_report_20250723_225309.json b/tests/real_world_trust_scoring_report_20250723_225309.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225309.json rename to tests/real_world_trust_scoring_report_20250723_225309.json diff --git a/real_world_trust_scoring_report_20250723_225418.json b/tests/real_world_trust_scoring_report_20250723_225418.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225418.json rename to tests/real_world_trust_scoring_report_20250723_225418.json diff --git a/real_world_trust_scoring_report_20250723_225548.json b/tests/real_world_trust_scoring_report_20250723_225548.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225548.json rename to tests/real_world_trust_scoring_report_20250723_225548.json diff --git a/real_world_trust_scoring_report_20250723_225707.json b/tests/real_world_trust_scoring_report_20250723_225707.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225707.json rename to tests/real_world_trust_scoring_report_20250723_225707.json diff --git a/real_world_trust_scoring_report_20250723_225817.json b/tests/real_world_trust_scoring_report_20250723_225817.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225817.json rename to tests/real_world_trust_scoring_report_20250723_225817.json diff --git a/real_world_trust_scoring_report_20250723_225831.json b/tests/real_world_trust_scoring_report_20250723_225831.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225831.json rename to tests/real_world_trust_scoring_report_20250723_225831.json diff --git a/real_world_trust_scoring_report_20250723_225943.json b/tests/real_world_trust_scoring_report_20250723_225943.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225943.json rename to tests/real_world_trust_scoring_report_20250723_225943.json From b13b0f5a33799287e9e450df69e3c362bc997f96 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 26 Jul 2025 16:13:36 +0000 Subject: [PATCH 23/28] I am re-testing the web UI and moving the test files. --- ...low_diagnostic_report_20250726_154015.json | 265 ++++++++++++++++++ 1 file changed, 265 insertions(+) create mode 100644 tests/workflow_diagnostic_report_20250726_154015.json diff --git a/tests/workflow_diagnostic_report_20250726_154015.json b/tests/workflow_diagnostic_report_20250726_154015.json new file mode 100644 index 0000000..e13e579 --- /dev/null +++ b/tests/workflow_diagnostic_report_20250726_154015.json @@ -0,0 +1,265 @@ +{ + "timestamp": "2025-07-26T15:40:15.501428", + "summary": { + "total_checks": 13, + "passed": 11, + "failed": 0, + "warnings": 2, + "skipped": 0 + }, + "results": [ + { + "component": "System Environment", + "status": "PASS", + "message": "System environment is ready", + "duration": 2.6867048740386963, + "timestamp": "2025-07-26T15:40:14.096871", + "details": { + "python_version": "3.12.11", + "fastapi_available": true, + "uvicorn_available": true, + "streamlit_available": true, + "pandas_available": true, + "numpy_available": true, + "plotly_available": true, + "requests_available": true, + "asyncio_available": true, + "logging_available": true, + "json_available": true, + "memory_available": "6.99 GB", + "cpu_count": 4, + "disk_free": "2.57 GB", + "working_directory": "/app", + "project_root": "/app" + } + }, + { + "component": "Data Uploads", + "status": "PASS", + "message": "Data uploads system is functional", + "duration": 0.0013608932495117188, + "timestamp": "2025-07-26T15:40:14.098495", + "details": { + "uploads_dir_exists": true, + "write_permissions": true, + "dataset_connector_available": false, + "dataset_connector_error": "No module named 'data_engineering'" + } + }, + { + "component": "Data Engineering", + "status": "PASS", + "message": "Data engineering components are functional", + "duration": 0.012610912322998047, + "timestamp": "2025-07-26T15:40:14.111292", + "details": { + "data_engineering.advanced_trust_scoring_available": false, + "data_engineering.advanced_trust_scoring_error": "No module named 'data_engineering'", + "data_engineering.cleanlab_integration_available": false, + "data_engineering.cleanlab_integration_error": "No module named 'data_engineering'", + "data_engineering.trust_scoring_dashboard_available": false, + "data_engineering.trust_scoring_dashboard_error": "No module named 'data_engineering'", + "data_engineering.data_lifecycle_available": false, + "data_engineering.data_lifecycle_error": "No module named 'data_engineering'", + "trust_scoring_test": "FAIL", + "trust_scoring_error": "No module named 'data_engineering'", + "database_connectivity": true + } + }, + { + "component": "LLM Engineering", + "status": "PASS", + "message": "LLM engineering components are functional", + "duration": 0.000698089599609375, + "timestamp": "2025-07-26T15:40:14.112314", + "details": { + "llm_engineering.llm_lifecycle_available": false, + "llm_engineering.llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_engineering.providers.base_provider_available": false, + "llm_engineering.providers.base_provider_error": "No module named 'llm_engineering'", + "llm_engineering.providers.llama_factory_provider_available": false, + "llm_engineering.providers.llama_factory_provider_error": "No module named 'llm_engineering'", + "llm_lifecycle_manager": "FAIL", + "llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_config_exists": true + } + }, + { + "component": "High Performance System", + "status": "PASS", + "message": "High performance system components are functional", + "duration": 0.0009016990661621094, + "timestamp": "2025-07-26T15:40:14.113425", + "details": { + "high_performance_system.core.ultimate_moe_system_available": false, + "high_performance_system.core.ultimate_moe_system_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_expert_ensemble_available": false, + "high_performance_system.core.advanced_expert_ensemble_error": "No module named 'high_performance_system'", + "high_performance_system.core.intelligent_domain_router_available": false, + "high_performance_system.core.intelligent_domain_router_error": "No module named 'high_performance_system'", + "high_performance_system.core.enhanced_dataset_profiler_available": false, + "high_performance_system.core.enhanced_dataset_profiler_error": "No module named 'high_performance_system'", + "high_performance_system.core.comprehensive_pii_detector_available": false, + "high_performance_system.core.comprehensive_pii_detector_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_trust_scorer_available": false, + "high_performance_system.core.advanced_trust_scorer_error": "No module named 'high_performance_system'", + "moe_system_test": "FAIL", + "moe_system_error": "No module named 'high_performance_system'", + "expert_ensemble": "FAIL", + "expert_ensemble_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Security System", + "status": "PASS", + "message": "Security system components are functional", + "duration": 0.0003681182861328125, + "timestamp": "2025-07-26T15:40:14.113942", + "details": { + "security.auth_manager_available": false, + "security.auth_manager_error": "No module named 'security'", + "security.secrets_manager_available": false, + "security.secrets_manager_error": "No module named 'security'", + "security.security_monitor_available": false, + "security.security_monitor_error": "No module named 'security'", + "security_webui": "FAIL", + "security_webui_error": "No module named 'security'" + } + }, + { + "component": "MCP Server", + "status": "WARNING", + "message": "MCP server check completed", + "duration": 0.006127595901489258, + "timestamp": "2025-07-26T15:40:14.120204", + "details": { + "mcp_server/server.py_exists": false, + "mcp_server/client.py_exists": true, + "mcp_server/config.py_exists": true, + "mcp_server_running": false, + "mcp_server_error": "Server not responding" + } + }, + { + "component": "Cloud APIs", + "status": "PASS", + "message": "Cloud APIs check completed", + "duration": 0.202164888381958, + "timestamp": "2025-07-26T15:40:14.322602", + "details": { + "cloudscale_apis/docs/cloud_provider_integration.md_exists": true, + "cloudscale_apis/endpoints/_exists": true, + "cloudscale_apis/webhooks/_exists": true, + "aws_sdk_available": true, + "azure_sdk_available": false, + "gcp_sdk_available": true + } + }, + { + "component": "Third-party Integrations", + "status": "PASS", + "message": "Third-party integrations check completed", + "duration": 0.0006895065307617188, + "timestamp": "2025-07-26T15:40:14.323546", + "details": { + "thirdparty_integrations.endpoints.verify_realtime_available": false, + "thirdparty_integrations.endpoints.verify_realtime_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.endpoints.verify_batch_available": false, + "thirdparty_integrations.endpoints.verify_batch_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.webhooks.verify_webhook_available": false, + "thirdparty_integrations.webhooks.verify_webhook_error": "No module named 'thirdparty_integrations'" + } + }, + { + "component": "Tests", + "status": "WARNING", + "message": "Tests check completed", + "duration": 1.0560717582702637, + "timestamp": "2025-07-26T15:40:15.379817", + "details": { + "test_high_performance_system.py_exists": true, + "simple_unit_test.py_exists": false, + "tests/test_advanced_pipeline.py_exists": true, + "simple_test_run": "FAIL", + "test_error": "ERROR: file or directory not found: simple_unit_test.py\n\n" + } + }, + { + "component": "Plugins", + "status": "PASS", + "message": "Plugins check completed", + "duration": 0.0003604888916015625, + "timestamp": "2025-07-26T15:40:15.380444", + "details": { + "plugin_loader": "FAIL", + "plugin_loader_error": "No module named 'plugins'", + "plugins/example_plugin.py_exists": true, + "plugins/hallucination_detector.py_exists": true, + "plugins/eu_gdpr_embed.py_exists": true + } + }, + { + "component": "Analytics & Dashboards", + "status": "PASS", + "message": "Analytics and dashboards check completed", + "duration": 0.0002505779266357422, + "timestamp": "2025-07-26T15:40:15.380855", + "details": { + "high_performance_system/analytics/ultimate_analytics_dashboard.py_exists": true, + "high_performance_system/analytics/sme_dashboard.py_exists": true, + "data_engineering/trust_scoring_dashboard.py_exists": true, + "operation_sindoor_dashboard.py_exists": false, + "ultimate_dashboard": "FAIL", + "ultimate_dashboard_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Production Server", + "status": "PASS", + "message": "Production server check completed", + "duration": 0.11996245384216309, + "timestamp": "2025-07-26T15:40:15.501130", + "details": { + "superfast_production_server.py_exists": true, + "ote_api.py_exists": false, + "production_server_running": true, + "production_server_health": { + "status": "healthy", + "timestamp": "2025-07-26T15:40:15.458071", + "uptime": 48819.97430515289, + "cache_stats": { + "hits": 0, + "misses": 0 + }, + "system_ready": true + }, + "performance_endpoint": "PASS", + "performance_data": { + "performance_stats": { + "total_requests": 0, + "total_latency": 0.0, + "avg_latency": 0.0, + "cache_hit_rate": 0.0, + "error_rate": 0.0 + }, + "cache_stats": { + "hits": 0, + "misses": 0 + }, + "cache_size": 0, + "timestamp": "2025-07-26T15:40:15.497127" + } + } + } + ], + "recommendations": [ + { + "priority": "MEDIUM", + "action": "Address warnings", + "components": [ + "MCP Server", + "Tests" + ] + } + ] +} \ No newline at end of file From 943a0a312b268918cac7aaa7e59e803dd058bedc Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Thu, 31 Jul 2025 11:34:10 +0530 Subject: [PATCH 24/28] Create SECURITY.md Security policy addition Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- SECURITY.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..034e848 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,21 @@ +# Security Policy + +## Supported Versions + +Use this section to tell people about which versions of your project are +currently being supported with security updates. + +| Version | Supported | +| ------- | ------------------ | +| 5.1.x | :white_check_mark: | +| 5.0.x | :x: | +| 4.0.x | :white_check_mark: | +| < 4.0 | :x: | + +## Reporting a Vulnerability + +Use this section to tell people how to report a vulnerability. + +Tell them where to go, how often they can expect to get an update on a +reported vulnerability, what to expect if the vulnerability is accepted or +declined, etc. From 43680cc24e21ca617161168ba46d4b850e89df63 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Thu, 31 Jul 2025 11:35:32 +0530 Subject: [PATCH 25/28] Update issue templates Bug report and features request templates --- .github/ISSUE_TEMPLATE/bug_report.md | 38 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/custom.md | 10 ++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/custom.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..dd84ea7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 0000000..48d5f81 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,10 @@ +--- +name: Custom issue template +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. From 2726c2e435d442bb30cea649bc998bf00d0784e1 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Thu, 31 Jul 2025 20:31:40 +0530 Subject: [PATCH 26/28] Create codeql.yml codeql configuration Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- .github/workflows/codeql.yml | 100 +++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..6305650 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,100 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL Advanced" + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '16 4 * * 4' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: actions + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" From 603e3712802601f06021d5547a4e697eaa0e87f2 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Thu, 31 Jul 2025 20:42:12 +0530 Subject: [PATCH 27/28] Potential fix for code scanning alert no. 27: Code injection security code fix Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- data_engineering/dataset_integration.py | 28 ++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/data_engineering/dataset_integration.py b/data_engineering/dataset_integration.py index ec78c52..cb12c77 100644 --- a/data_engineering/dataset_integration.py +++ b/data_engineering/dataset_integration.py @@ -15,6 +15,7 @@ from datetime import datetime import hashlib +import re # Optional imports for advanced features try: import plotly.express as px @@ -298,6 +299,28 @@ def create_quality_filtered_dataset(self, dataset_id: str, min_trust_score: floa self.logger.error(f"Error creating quality-filtered dataset: {e}") raise + def is_safe_query(self, query_str: str, allowed_columns) -> bool: + """ + Check if the query string is safe: only allowed column names, numbers, and safe operators. + """ + # Only allow column names, numbers, whitespace, and safe operators + # Disallow parentheses, function calls, __import__, etc. + # Allowed operators: ==, !=, <, >, <=, >=, and, or, not + # Build regex for allowed columns + col_pattern = r'|'.join([re.escape(col) for col in allowed_columns]) + # Full pattern: allowed columns, numbers, operators, whitespace + safe_pattern = rf'^([\s\d\.\'"]*({col_pattern})[\s\d\.\'"]*(==|!=|<=|>=|<|>|and|or|not|&|\||\s)*[\s\d\.\'"]*)+$' + # Disallow suspicious keywords + forbidden = ['__import__', 'os.', 'sys.', 'eval', 'exec', 'open(', '(', ')', '[', ']', '{', '}', ';'] + lowered = query_str.lower() + for word in forbidden: + if word in lowered: + return False + # Check regex + if re.match(safe_pattern, query_str): + return True + return False + def process_dataset(self, dataset_id: str, transformations: List[Dict]) -> str: """ Apply transformations to a dataset @@ -322,7 +345,10 @@ def process_dataset(self, dataset_id: str, transformations: List[Dict]) -> str: elif operation == 'rename_columns': df = df.rename(columns=params['mapping']) elif operation == 'filter': - df = df.query(params['condition']) + condition = params['condition'] + if not self.is_safe_query(condition, df.columns): + raise ValueError("Unsafe filter condition detected. Only simple column comparisons are allowed.") + df = df.query(condition) elif operation == 'sort': df = df.sort_values(by=params['columns'], ascending=params.get('ascending', True)) elif operation == 'groupby': From 1acfc14daaa983835b702363d26c35a49ff1c330 Mon Sep 17 00:00:00 2001 From: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> Date: Thu, 31 Jul 2025 20:45:01 +0530 Subject: [PATCH 28/28] Potential fix for code scanning alert no. 33: Code injection code quality check Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Signed-off-by: Kumar Vel <11884941+Kumarvels@users.noreply.github.com> --- data_engineering/dataset_integration.py | 26 ++++++++++++++++--- .../scripts/easy_dataset_webui.py | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/data_engineering/dataset_integration.py b/data_engineering/dataset_integration.py index cb12c77..34e0059 100644 --- a/data_engineering/dataset_integration.py +++ b/data_engineering/dataset_integration.py @@ -345,10 +345,28 @@ def process_dataset(self, dataset_id: str, transformations: List[Dict]) -> str: elif operation == 'rename_columns': df = df.rename(columns=params['mapping']) elif operation == 'filter': - condition = params['condition'] - if not self.is_safe_query(condition, df.columns): - raise ValueError("Unsafe filter condition detected. Only simple column comparisons are allowed.") - df = df.query(condition) + # Expect params: {'column': ..., 'operator': ..., 'value': ...} + column = params.get('column') + operator = params.get('operator') + value = params.get('value') + allowed_operators = ['==', '!=', '<', '>', '<=', '>='] + if column not in df.columns: + raise ValueError(f"Column '{column}' not found in dataset.") + if operator not in allowed_operators: + raise ValueError(f"Operator '{operator}' is not allowed.") + # Apply filter using boolean indexing + if operator == '==': + df = df[df[column] == value] + elif operator == '!=': + df = df[df[column] != value] + elif operator == '<': + df = df[df[column] < value] + elif operator == '>': + df = df[df[column] > value] + elif operator == '<=': + df = df[df[column] <= value] + elif operator == '>=': + df = df[df[column] >= value] elif operation == 'sort': df = df.sort_values(by=params['columns'], ascending=params.get('ascending', True)) elif operation == 'groupby': diff --git a/data_engineering/scripts/easy_dataset_webui.py b/data_engineering/scripts/easy_dataset_webui.py index eedfa29..3828e04 100644 --- a/data_engineering/scripts/easy_dataset_webui.py +++ b/data_engineering/scripts/easy_dataset_webui.py @@ -248,7 +248,7 @@ def run_cleanlab_on_dataset(dataset_id, label_column, save_output): process_id = gr.Textbox(label="Dataset ID", placeholder="Enter dataset ID") transformations = gr.Textbox( label="Transformations (JSON)", - placeholder='[{"operation": "filter", "params": {"condition": "age > 30"}}]', + placeholder='[{"operation": "filter", "params": {"column": "age", "operator": ">", "value": 30}}]', lines=5 ) process_btn = gr.Button("Process Dataset")