diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..dd84ea7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 0000000..48d5f81 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,10 @@ +--- +name: Custom issue template +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcbbe7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..c77d007 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..6305650 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,100 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL Advanced" + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '16 4 * * 4' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + permissions: + # required for all workflows + security-events: write + + # required to fetch internal or private CodeQL packs + packages: read + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + - language: actions + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/README.md b/README.md index c65da3d..3ae4e5a 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ OpenTrustEval is a comprehensive, high-performance, and modular platform for AI --- ## đŸ› ī¸ Installation -```bash +bash git clone https://github.com/Kumarvels/OpenTrustEval.git cd OpenTrustEval python -m venv .venv @@ -30,12 +30,12 @@ source .venv/bin/activate # On Linux/Mac pip install -r requirements.txt ``` ---- + ## 🌐 Launch the Unified WebUI The WebUI provides a single interface for LLM, Data, Security, and Research management. -```bash +bash streamlit run launch_workflow_webui.py ``` - Open [http://localhost:8501](http://localhost:8501) in your browser. @@ -112,5 +112,289 @@ python superfast_production_server.py --- +## Why Trust-Based Systems Are Better (than Label Error Detection Systems) ? + +Let me break down the fundamental differences and explain why a trust-based approach is more comprehensive and valuable. + +## Core Philosophical Differences + +### **Label Error Detection System** +``` +Focus: Data Quality → Model Performance +Approach: Find and fix problems in training data +Scope: Limited to labeled dataset issues +Outcome: Better training data +``` + +### **Trust-Based System** +``` +Focus: Holistic System Reliability → Real-World Performance +Approach: Evaluate comprehensive trustworthiness +Scope: End-to-end system behavior including deployment +Outcome: Confidence in system behavior +``` + +## Detailed Comparison + +### 1. **Scope and Coverage** + +**Label Error Detection Limitations:** +```python +# CleanLab approach - focused on training data +def cleanlab_approach(training_data, labels): + # Only addresses: + # 1. Mislabeling in training data + # 2. Data quality issues + # 3. Confidence in training predictions + + label_issues = find_label_errors(labels, pred_probs) + cleaned_data = remove_label_issues(training_data, label_issues) + return cleaned_data # Better training data, but... + +# What about deployment behavior? Real-world performance? +# These are NOT addressed by label error detection alone +``` + +**Trust-Based Approach:** +```python +# OpenTrustEval approach - comprehensive trust evaluation +def trust_based_approach(model, training_data, test_data, production_data): + trust_assessment = { + # Training Data Quality (includes label error detection) + 'data_quality': evaluate_data_quality(training_data, labels), + + # Model Reliability + 'reliability': evaluate_reliability(model, test_data), + + # Consistency Across Inputs + 'consistency': evaluate_consistency(model, various_inputs), + + # Fairness and Bias + 'fairness': evaluate_fairness(model, diverse_test_cases), + + # Robustness to Adversarial Attacks + 'robustness': evaluate_robustness(model, adversarial_examples), + + # Explainability and Transparency + 'explainability': evaluate_explainability(model, inputs), + + # Production Behavior + 'deployment_trust': evaluate_production_behavior(model, production_data) + } + + return comprehensive_trust_score(trust_assessment) +``` + +### 2. **Real-World Performance vs. Training Performance** + +**The Fundamental Problem:** +```python +# Scenario: Perfect training data, poor real-world trust +class ExampleScenario: + def demonstrate_limitation(self): + # Training data is perfect (no label errors) + training_data_quality = 0.99 # CleanLab would be happy + + # But model has issues: + reliability_score = 0.6 # Unreliable predictions + consistency_score = 0.5 # Inconsistent responses + fairness_score = 0.4 # Biased decisions + robustness_score = 0.3 # Fragile to input changes + + # Label error detection says: "Data is clean!" + # Trust system says: "Don't deploy this - it's not trustworthy!" + + return { + 'cleanlab_assessment': 'Data quality excellent', + 'trust_assessment': 'System not ready for deployment' + } +``` + +### 3. **Temporal and Contextual Trust** + +**Label Error Detection Cannot Address:** +```python +# Issues that arise over time and context +def temporal_trust_challenges(): + return { + # Time-based issues (CleanLab can't detect): + 'concept_drift': 'Model performance degrades as world changes', + 'data_drift': 'Input distribution shifts in production', + 'model_degradation': 'Performance naturally degrades over time', + + # Context-based issues: + 'domain_adaptation': 'Works in training domain but fails in deployment domain', + 'edge_cases': 'Handles common cases but fails on edge cases', + 'user_trust': 'Users lose confidence due to inconsistent behavior' + } +``` + +## Why Trust-Based Systems Are Superior + +### 1. **Comprehensive Risk Assessment** + +**Trust systems evaluate:** +```python +def comprehensive_risk_assessment(): + return { + # Pre-deployment risks (partially covered by CleanLab) + 'training_data_risks': ['label_errors', 'bias', 'completeness'], + + # Model behavior risks (NOT covered by CleanLab) + 'behavioral_risks': [ + 'overconfidence', # Model too confident in wrong answers + 'inconsistency', # Different responses to similar inputs + 'adversarial_vulnerability', # Security risks + 'bias_amplification' # Fairness issues in deployment + ], + + # Deployment risks (NOT covered by CleanLab) + 'deployment_risks': [ + 'production_drift', # Performance degradation over time + 'user_acceptance', # Human trust and adoption + 'regulatory_compliance', # Legal and ethical requirements + 'business_impact' # Real-world consequences of failures + ] + } +``` + +### 2. **Decision-Making Support** + +**Beyond Data Quality:** +```python +def decision_making_support(): + # CleanLab helps answer: "Is my training data good?" + cleanlab_question = "Should I retrain with cleaned data?" + + # Trust systems help answer broader questions: + trust_questions = [ + "Should I deploy this model to production?", + "Can I trust this model's decisions in critical situations?", + "How will this model perform with real users?", + "What are the risks of deploying this system?", + "How can I improve overall system trustworthiness?" + ] + + return { + 'cleanlab_scope': cleanlab_question, + 'trust_scope': trust_questions + } +``` + +### 3. **Continuous Monitoring and Improvement** + +**Evolution Over Time:** +```python +def evolution_comparison(): + return { + 'label_error_detection': { + 'phase': 'Training/pre-deployment', + 'frequency': 'One-time or periodic retraining', + 'scope': 'Static training dataset', + 'outcome': 'Better training data' + }, + + 'trust_based_system': { + 'phase': 'End-to-end lifecycle (training → deployment → monitoring)', + 'frequency': 'Continuous monitoring', + 'scope': 'Dynamic system behavior in real-world conditions', + 'outcome': 'Confidence in system reliability and safety' + } + } +``` + +## Concrete Examples Where Trust Systems Excel + +### Example 1: **Medical Diagnosis System** + +```python +# CleanLab approach: +medical_model_cleanlab = { + 'training_data_quality': 0.98, # Very clean data + 'recommendation': 'Ready for deployment' +} + +# Trust-based approach: +medical_model_trust = { + 'training_data_quality': 0.98, # Same clean data + 'reliability_score': 0.7, # Sometimes confident when wrong + 'consistency_score': 0.6, # Different diagnoses for similar symptoms + 'robustness_score': 0.5, # Fragile to slight input variations + 'fairness_score': 0.8, # Good but not perfect + 'explainability_score': 0.4, # Poor explanations for decisions + 'overall_trust': 0.6, # NOT ready for deployment! + 'recommendation': 'Needs significant improvement before deployment' +} +``` + +### Example 2: **Autonomous Vehicle Perception** + +```python +# CleanLab approach: +av_perception_cleanlab = { + 'training_data_quality': 0.95, # Good object detection labels + 'recommendation': 'Good data quality' +} + +# Trust-based approach: +av_perception_trust = { + 'training_data_quality': 0.95, # Same good data + 'reliability_in_rain': 0.3, # Terrible in rain conditions + 'consistency_at_night': 0.4, # Inconsistent night performance + 'robustness_to_adversarial': 0.2, # Vulnerable to simple attacks + 'edge_case_handling': 0.3, # Fails on unusual scenarios + 'safety_trust': 0.3, # DANGEROUS for deployment! + 'recommendation': 'Absolutely not ready - safety risks too high' +} +``` + +## The Trust Advantage: Beyond Binary Decisions + +### **CleanLab's Binary Thinking:** +``` +Data Quality: Good/Bad → Retrain/Don't Retrain +``` + +### **Trust-Based Thinking:** +``` +Trust Dimensions: +├── Reliability: 0.7 (Moderate confidence) +├── Consistency: 0.6 (Some variability acceptable) +├── Fairness: 0.9 (Excellent) +├── Robustness: 0.4 (Needs improvement) +├── Explainability: 0.8 (Good) +└── Overall Trust: 0.6 (Improvement needed) + +Decision Matrix: +├── Critical Applications: DON'T DEPLOY +├── Low-Stakes Applications: DEPLOY with monitoring +└── Research Applications: DEPLOY with caveats +``` + +## Fundamental Truth + +**Perfect training data ≠ Trustworthy system** + +A trust-based system recognizes that: +1. **Data quality is necessary but not sufficient** for trustworthy AI +2. **Model behavior in deployment matters more** than training data quality +3. **Human trust and acceptance** are crucial for real-world success +4. **Continuous monitoring and improvement** are essential for long-term success + +## Conclusion + +Trust-based systems are superior because they: + +1. **Provide comprehensive assessment** beyond just data quality +2. **Support better decision-making** for real-world deployment +3. **Consider end-to-end system behavior** rather than isolated components +4. **Enable continuous improvement** throughout the AI lifecycle +5. **Address human factors** like user trust and acceptance +6. **Prepare for real-world complexity** rather than controlled environments + +While label error detection is valuable (and should be part of any comprehensive approach), it's only one piece of the much larger trust puzzle. +A trust-based system provides the holistic view needed to build truly reliable, safe, and successful AI systems. + + ## 🏆 Credits OpenTrustEval is developed and maintained by Kumarvels and contributors. For issues, feature requests, or contributions, please open an issue or pull request on GitHub. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..034e848 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,21 @@ +# Security Policy + +## Supported Versions + +Use this section to tell people about which versions of your project are +currently being supported with security updates. + +| Version | Supported | +| ------- | ------------------ | +| 5.1.x | :white_check_mark: | +| 5.0.x | :x: | +| 4.0.x | :white_check_mark: | +| < 4.0 | :x: | + +## Reporting a Vulnerability + +Use this section to tell people how to report a vulnerability. + +Tell them where to go, how often they can expect to get an update on a +reported vulnerability, what to expect if the vulnerability is accepted or +declined, etc. diff --git a/cloudscale_apis/endpoints/trust_api.py b/cloudscale_apis/endpoints/trust_api.py new file mode 100644 index 0000000..b1c18a8 --- /dev/null +++ b/cloudscale_apis/endpoints/trust_api.py @@ -0,0 +1,338 @@ +# src/api/trust_api.py +""" +Trust API and Microservices - Scalable trust evaluation services +""" + +from fastapi import FastAPI, HTTPException, BackgroundTasks +from pydantic import BaseModel, Field +from typing import Dict, Any, List, Optional +import asyncio +import uuid +from datetime import datetime +import redis +import json +import logging + +logger = logging.getLogger(__name__) + +# FastAPI app +app = FastAPI( + title="OpenTrustEval API", + description="Scalable trust evaluation API with microservices architecture", + version="1.0.0" +) + +# Redis for caching and job queue +redis_client = redis.Redis(host='localhost', port=6379, db=0) + +# Pydantic models +class TrustEvaluationRequest(BaseModel): + model_id: str = Field(..., description="Unique identifier for the model") + model_type: str = Field(default="llm", description="Type of model (llm, ml, cv, etc.)") + data: Dict[str, Any] = Field(..., description="Evaluation data") + evaluation_config: Optional[Dict[str, Any]] = Field(default={}, description="Evaluation configuration") + callback_url: Optional[str] = Field(default=None, description="URL for callback notifications") + +class TrustEvaluationResponse(BaseModel): + evaluation_id: str + status: str + results: Optional[Dict[str, Any]] = None + error: Optional[str] = None + created_at: datetime + completed_at: Optional[datetime] = None + +class TrustEvaluationStatus(BaseModel): + evaluation_id: str + status: str + progress: Optional[float] = None + estimated_completion: Optional[datetime] = None + +# In-memory storage for demonstration (use database in production) +evaluation_storage = {} + +# Trust evaluation service +class TrustEvaluationService: + """Service for trust evaluation operations""" + + @staticmethod + async def execute_evaluation(request: TrustEvaluationRequest) -> Dict[str, Any]: + """Execute trust evaluation""" + try: + # Import evaluator (lazy import for performance) + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + + # Create evaluator + evaluator = CompositeTrustEvaluator() + + # Execute evaluation + results = evaluator.evaluate_comprehensive_trust( + model=None, # In real implementation, load model by model_id + data=request.data, + model_type=request.model_type, + **request.evaluation_config + ) + + return { + 'status': 'completed', + 'results': results, + 'completed_at': datetime.now() + } + + except Exception as e: + logger.error(f"Evaluation failed: {e}") + return { + 'status': 'failed', + 'error': str(e), + 'completed_at': datetime.now() + } + + @staticmethod + async def queue_evaluation(request: TrustEvaluationRequest) -> str: + """Queue evaluation for background processing""" + evaluation_id = str(uuid.uuid4()) + + # Store initial request + evaluation_storage[evaluation_id] = { + 'request': request.dict(), + 'status': 'queued', + 'created_at': datetime.now() + } + + # Add to Redis queue + job_data = { + 'evaluation_id': evaluation_id, + 'request': request.dict() + } + redis_client.lpush('trust_evaluation_queue', json.dumps(job_data)) + + return evaluation_id + +# API endpoints +@app.post("/evaluate", response_model=TrustEvaluationResponse) +async def evaluate_trust(request: TrustEvaluationRequest, background_tasks: BackgroundTasks): + """Submit trust evaluation request""" + try: + # Queue for background processing + evaluation_id = await TrustEvaluationService.queue_evaluation(request) + + return TrustEvaluationResponse( + evaluation_id=evaluation_id, + status="queued", + created_at=evaluation_storage[evaluation_id]['created_at'] + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/evaluation/{evaluation_id}", response_model=TrustEvaluationResponse) +async def get_evaluation_status(evaluation_id: str): + """Get evaluation status and results""" + if evaluation_id not in evaluation_storage: + raise HTTPException(status_code=404, detail="Evaluation not found") + + stored_data = evaluation_storage[evaluation_id] + + return TrustEvaluationResponse( + evaluation_id=evaluation_id, + status=stored_data['status'], + results=stored_data.get('results'), + error=stored_data.get('error'), + created_at=stored_data['created_at'], + completed_at=stored_data.get('completed_at') + ) + +@app.get("/evaluation/{evaluation_id}/status", response_model=TrustEvaluationStatus) +async def get_evaluation_progress(evaluation_id: str): + """Get evaluation progress""" + if evaluation_id not in evaluation_storage: + raise HTTPException(status_code=404, detail="Evaluation not found") + + stored_data = evaluation_storage[evaluation_id] + + return TrustEvaluationStatus( + evaluation_id=evaluation_id, + status=stored_data['status'], + progress=stored_data.get('progress', 0.0) if stored_data['status'] == 'processing' else + (1.0 if stored_data['status'] == 'completed' else 0.0) + ) + +# Background worker +async def trust_evaluation_worker(): + """Background worker for processing trust evaluations""" + logger.info("Starting trust evaluation worker") + + while True: + try: + # Get job from Redis queue + job_data_json = redis_client.brpop('trust_evaluation_queue', timeout=1) + + if job_data_json: + _, job_data_bytes = job_data_json + job_data = json.loads(job_data_bytes.decode('utf-8')) + + evaluation_id = job_data['evaluation_id'] + request_data = job_data['request'] + + logger.info(f"Processing evaluation {evaluation_id}") + + # Update status + evaluation_storage[evaluation_id]['status'] = 'processing' + evaluation_storage[evaluation_id]['progress'] = 0.1 + + # Execute evaluation + request = TrustEvaluationRequest(**request_data) + results = await TrustEvaluationService.execute_evaluation(request) + + # Update storage + evaluation_storage[evaluation_id].update(results) + evaluation_storage[evaluation_id]['status'] = results['status'] + + # Trigger callback if provided + if request.callback_url: + await trigger_callback(request.callback_url, evaluation_id, results) + + logger.info(f"Completed evaluation {evaluation_id}") + + await asyncio.sleep(0.1) # Prevent busy waiting + + except Exception as e: + logger.error(f"Worker error: {e}") + await asyncio.sleep(1) # Slow down on errors + +async def trigger_callback(callback_url: str, evaluation_id: str, results: Dict[str, Any]): + """Trigger callback notification""" + try: + import httpx + async with httpx.AsyncClient() as client: + callback_data = { + 'evaluation_id': evaluation_id, + 'results': results + } + await client.post(callback_url, json=callback_data) + except Exception as e: + logger.error(f"Callback failed: {e}") + +# Batch evaluation endpoints +class BatchEvaluationRequest(BaseModel): + evaluations: List[TrustEvaluationRequest] + batch_config: Optional[Dict[str, Any]] = Field(default={}, description="Batch processing configuration") + +class BatchEvaluationResponse(BaseModel): + batch_id: str + status: str + completed_evaluations: int = 0 + total_evaluations: int + results: Optional[List[Dict[str, Any]]] = None + +@app.post("/batch-evaluate", response_model=BatchEvaluationResponse) +async def batch_evaluate(request: BatchEvaluationRequest, background_tasks: BackgroundTasks): + """Submit batch trust evaluation request""" + batch_id = str(uuid.uuid4()) + + # Queue individual evaluations + evaluation_ids = [] + for eval_request in request.evaluations: + eval_id = await TrustEvaluationService.queue_evaluation(eval_request) + evaluation_ids.append(eval_id) + + # Store batch information + batch_storage[batch_id] = { + 'evaluation_ids': evaluation_ids, + 'status': 'processing', + 'total_evaluations': len(evaluation_ids), + 'completed_evaluations': 0, + 'created_at': datetime.now() + } + + # Start batch monitoring in background + background_tasks.add_task(monitor_batch_progress, batch_id, evaluation_ids) + + return BatchEvaluationResponse( + batch_id=batch_id, + status="processing", + total_evaluations=len(evaluation_ids) + ) + +# Batch storage +batch_storage = {} + +async def monitor_batch_progress(batch_id: str, evaluation_ids: List[str]): + """Monitor batch evaluation progress""" + while batch_storage[batch_id]['completed_evaluations'] < batch_storage[batch_id]['total_evaluations']: + completed_count = 0 + for eval_id in evaluation_ids: + if eval_id in evaluation_storage: + if evaluation_storage[eval_id]['status'] in ['completed', 'failed']: + completed_count += 1 + + batch_storage[batch_id]['completed_evaluations'] = completed_count + + if completed_count == len(evaluation_ids): + batch_storage[batch_id]['status'] = 'completed' + # Collect results + results = [] + for eval_id in evaluation_ids: + if eval_id in evaluation_storage: + results.append(evaluation_storage[eval_id]) + batch_storage[batch_id]['results'] = results + break + + await asyncio.sleep(5) # Check every 5 seconds + +# Health check endpoint +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return { + "status": "healthy", + "timestamp": datetime.now().isoformat(), + "services": { + "api": "running", + "worker": "running" if is_worker_alive() else "stopped", + "redis": "connected" if is_redis_connected() else "disconnected" + } + } + +def is_worker_alive() -> bool: + """Check if worker is alive""" + # Implementation would check worker status + return True + +def is_redis_connected() -> bool: + """Check Redis connection""" + try: + redis_client.ping() + return True + except: + return False + +# Startup and shutdown events +@app.on_event("startup") +async def startup_event(): + """Startup event handler""" + logger.info("Starting Trust API service") + + # Start background worker + asyncio.create_task(trust_evaluation_worker()) + + logger.info("Trust API service started") + +@app.on_event("shutdown") +async def shutdown_event(): + """Shutdown event handler""" + logger.info("Shutting down Trust API service") + +# CLI for API management +def start_api_server(): + """Start API server""" + import uvicorn + uvicorn.run( + "src.api.trust_api:app", + host="0.0.0.0", + port=8000, + reload=True, + log_level="info" + ) + +if __name__ == "__main__": + start_api_server() diff --git a/data_engineering/dataset_integration.py b/data_engineering/dataset_integration.py index ec78c52..34e0059 100644 --- a/data_engineering/dataset_integration.py +++ b/data_engineering/dataset_integration.py @@ -15,6 +15,7 @@ from datetime import datetime import hashlib +import re # Optional imports for advanced features try: import plotly.express as px @@ -298,6 +299,28 @@ def create_quality_filtered_dataset(self, dataset_id: str, min_trust_score: floa self.logger.error(f"Error creating quality-filtered dataset: {e}") raise + def is_safe_query(self, query_str: str, allowed_columns) -> bool: + """ + Check if the query string is safe: only allowed column names, numbers, and safe operators. + """ + # Only allow column names, numbers, whitespace, and safe operators + # Disallow parentheses, function calls, __import__, etc. + # Allowed operators: ==, !=, <, >, <=, >=, and, or, not + # Build regex for allowed columns + col_pattern = r'|'.join([re.escape(col) for col in allowed_columns]) + # Full pattern: allowed columns, numbers, operators, whitespace + safe_pattern = rf'^([\s\d\.\'"]*({col_pattern})[\s\d\.\'"]*(==|!=|<=|>=|<|>|and|or|not|&|\||\s)*[\s\d\.\'"]*)+$' + # Disallow suspicious keywords + forbidden = ['__import__', 'os.', 'sys.', 'eval', 'exec', 'open(', '(', ')', '[', ']', '{', '}', ';'] + lowered = query_str.lower() + for word in forbidden: + if word in lowered: + return False + # Check regex + if re.match(safe_pattern, query_str): + return True + return False + def process_dataset(self, dataset_id: str, transformations: List[Dict]) -> str: """ Apply transformations to a dataset @@ -322,7 +345,28 @@ def process_dataset(self, dataset_id: str, transformations: List[Dict]) -> str: elif operation == 'rename_columns': df = df.rename(columns=params['mapping']) elif operation == 'filter': - df = df.query(params['condition']) + # Expect params: {'column': ..., 'operator': ..., 'value': ...} + column = params.get('column') + operator = params.get('operator') + value = params.get('value') + allowed_operators = ['==', '!=', '<', '>', '<=', '>='] + if column not in df.columns: + raise ValueError(f"Column '{column}' not found in dataset.") + if operator not in allowed_operators: + raise ValueError(f"Operator '{operator}' is not allowed.") + # Apply filter using boolean indexing + if operator == '==': + df = df[df[column] == value] + elif operator == '!=': + df = df[df[column] != value] + elif operator == '<': + df = df[df[column] < value] + elif operator == '>': + df = df[df[column] > value] + elif operator == '<=': + df = df[df[column] <= value] + elif operator == '>=': + df = df[df[column] >= value] elif operation == 'sort': df = df.sort_values(by=params['columns'], ascending=params.get('ascending', True)) elif operation == 'groupby': diff --git a/data_engineering/requirements_dashboard.txt b/data_engineering/requirements_dashboard.txt index c0f7a55..f7e9d7b 100644 --- a/data_engineering/requirements_dashboard.txt +++ b/data_engineering/requirements_dashboard.txt @@ -9,7 +9,7 @@ streamlit>=1.28.0 plotly>=5.15.0 # Database -sqlite3 # Usually included with Python +# sqlite3 # Usually included with Python # Optional: For enhanced features # scikit-learn>=1.1.0 # For advanced trust scoring diff --git a/data_engineering/scripts/easy_dataset_webui.py b/data_engineering/scripts/easy_dataset_webui.py index eedfa29..3828e04 100644 --- a/data_engineering/scripts/easy_dataset_webui.py +++ b/data_engineering/scripts/easy_dataset_webui.py @@ -248,7 +248,7 @@ def run_cleanlab_on_dataset(dataset_id, label_column, save_output): process_id = gr.Textbox(label="Dataset ID", placeholder="Enter dataset ID") transformations = gr.Textbox( label="Transformations (JSON)", - placeholder='[{"operation": "filter", "params": {"condition": "age > 30"}}]', + placeholder='[{"operation": "filter", "params": {"column": "age", "operator": ">", "value": 30}}]', lines=5 ) process_btn = gr.Button("Process Dataset") diff --git a/high_performance_system/core/README.md b/high_performance_system/core/README.md index ee9a473..9f9f24d 100644 --- a/high_performance_system/core/README.md +++ b/high_performance_system/core/README.md @@ -1,4 +1,4 @@ -# 🧠 high_performance_system/core – Functional, Technical & System Design +# 🧠 High_performance_system/core – Functional, Technical & System Design --- @@ -119,6 +119,51 @@ flowchart TD - **Secure:** PII and compliance are first-class concerns. - **Extensible:** New modules can be added with minimal changes. +--- +--- +--- +--- + + Summary of Revolutionary Features +--- +1. Trust Evolution Timeline +Track trust changes over time +Predict future trust trajectories +Detect anomalies in trust patterns + +2. Cross-Model Correlation Engine +Understand how trust in one model affects others +Identify risk clusters and propagation paths +System-level trust assessment + +3. Trust Decision Matrix +Customizable trust criteria for different stakeholders +Multi-profile evaluation for various contexts +Automated deployment decisions based on trust + +4. Trust Simulation and Stress Testing +Adversarial attack simulation +Data drift testing +Robustness scoring and recommendations + +5. Trust Orchestration Pipeline +End-to-end trust evaluation pipeline +Environment-specific deployment with monitoring +Automated rollback on trust degradation + +6. Trust API and Microservices +Scalable REST API for trust evaluation +Background job processing +Batch evaluation capabilities + +7. Trust Visualization Dashboard +Interactive radar charts and timelines +Risk matrices and correlation heatmaps +Real-time monitoring with WebSocket updates + +Above 7 features transform OpenTrustEval from a simple evaluation tool into a comprehensive trust management platform that addresses the +real-world challenges of deploying trustworthy AI systems at scale. + --- -For more details, see the code and docstrings in each file, and refer to the main system README for integration and usage examples. \ No newline at end of file +For more details, see the code and docstrings in each file, and refer to the main system README for integration and usage examples. diff --git a/high_performance_system/core/cross_model_correlation.py b/high_performance_system/core/cross_model_correlation.py new file mode 100644 index 0000000..9b92a93 --- /dev/null +++ b/high_performance_system/core/cross_model_correlation.py @@ -0,0 +1,258 @@ +# src/correlation/cross_model_correlation.py +""" +Cross-Model Trust Correlation Engine - Understand how trust in one model affects others +""" + +import numpy as np +from scipy.stats import pearsonr +from typing import Dict, Any, List, Tuple +import networkx as nx +import plotly.graph_objects as go + +class CrossModelCorrelationEngine: + """Analyzes trust correlations between different AI models""" + + def __init__(self): + self.model_network = nx.DiGraph() # Directed graph for causality + self.trust_correlations = {} + self.interference_patterns = {} + + def register_model_relationship(self, source_model: str, target_model: str, + relationship_type: str, strength: float = 1.0): + """Register a relationship between models""" + self.model_network.add_edge(source_model, target_model, + relationship=relationship_type, + strength=strength) + + def analyze_trust_interference(self, model_evaluations: Dict[str, Dict]) -> Dict[str, Any]: + """Analyze how trust in one model affects trust in others""" + interference_analysis = {} + + # Calculate correlation matrix + model_names = list(model_evaluations.keys()) + trust_scores = {model: eval_data.get('overall_trust_score', 0.5) + for model, eval_data in model_evaluations.items()} + + # Pairwise correlation analysis + correlations = {} + for i, model1 in enumerate(model_names): + for j, model2 in enumerate(model_names): + if i != j: + score1 = trust_scores[model1] + score2 = trust_scores[model2] + + # Simple correlation (in real implementation, use more sophisticated methods) + correlation = self._calculate_trust_correlation( + model_evaluations[model1], + model_evaluations[model2] + ) + + correlations[f"{model1}__{model2}"] = { + 'correlation': correlation, + 'trust_impact': self._calculate_trust_impact(score1, score2), + 'risk_amplification': self._calculate_risk_amplification( + model_evaluations[model1], + model_evaluations[model2] + ) + } + + # Identify risk clusters + risk_clusters = self._identify_risk_clusters(model_evaluations) + + # Calculate system-level trust + system_trust = self._calculate_system_level_trust(model_evaluations) + + return { + 'pairwise_correlations': correlations, + 'risk_clusters': risk_clusters, + 'system_level_trust': system_trust, + 'vulnerability_analysis': self._analyze_vulnerability_propagation(model_evaluations), + 'recommendations': self._generate_correlation_recommendations( + correlations, risk_clusters, system_trust + ) + } + + def _calculate_trust_correlation(self, eval1: Dict, eval2: Dict) -> float: + """Calculate trust correlation between two model evaluations""" + # Extract dimension scores + dims1 = eval1.get('dimension_scores', {}) + dims2 = eval2.get('dimension_scores', {}) + + # Get common dimensions + common_dims = set(dims1.keys()) & set(dims2.keys()) + + if len(common_dims) < 2: + return 0.0 + + scores1 = [dims1[dim] for dim in common_dims] + scores2 = [dims2[dim] for dim in common_dims] + + # Calculate Pearson correlation + try: + correlation, _ = pearsonr(scores1, scores2) + return float(correlation) + except: + return 0.0 + + def _calculate_trust_impact(self, source_trust: float, target_trust: float) -> Dict[str, float]: + """Calculate how source model trust impacts target model trust""" + # Simple impact model - in practice, this would be more sophisticated + impact_magnitude = abs(source_trust - 0.5) * abs(target_trust - 0.5) * 2 + impact_direction = 1 if (source_trust > 0.5 and target_trust > 0.5) or \ + (source_trust < 0.5 and target_trust < 0.5) else -1 + + return { + 'magnitude': impact_magnitude, + 'direction': impact_direction, # 1 = positive correlation, -1 = negative + 'risk_factor': impact_magnitude * abs(impact_direction) + } + + def _calculate_risk_amplification(self, eval1: Dict, eval2: Dict) -> float: + """Calculate how risks in one model amplify risks in another""" + # Extract risk scores + risks1 = eval1.get('risk_assessment', {}).get('high_risks', []) + risks2 = eval2.get('risk_assessment', {}).get('high_risks', []) + + # Simple risk amplification model + base_risk = len(risks1) + len(risks2) + amplified_risk = base_risk * (1 + len(risks1) * len(risks2) * 0.1) + + return min(10.0, amplified_risk) # Cap at reasonable level + + def _identify_risk_clusters(self, model_evaluations: Dict[str, Dict]) -> List[Dict]: + """Identify clusters of models with correlated risks""" + # Simple clustering based on correlation thresholds + clusters = [] + processed_models = set() + + for model_name, eval_data in model_evaluations.items(): + if model_name in processed_models: + continue + + # Find correlated models + correlated_models = [] + for other_model, other_eval in model_evaluations.items(): + if other_model != model_name and other_model not in processed_models: + correlation = self._calculate_trust_correlation(eval_data, other_eval) + if abs(correlation) > 0.7: # High correlation threshold + correlated_models.append(other_model) + processed_models.add(other_model) + + if correlated_models: + correlated_models.append(model_name) + clusters.append({ + 'models': correlated_models, + 'cluster_risk': self._calculate_cluster_risk( + {m: model_evaluations[m] for m in correlated_models} + ) + }) + processed_models.add(model_name) + + return clusters + + def _calculate_system_level_trust(self, model_evaluations: Dict[str, Dict]) -> float: + """Calculate overall system trust considering correlations""" + if not model_evaluations: + return 0.5 + + individual_trusts = [eval_data.get('overall_trust_score', 0.5) + for eval_data in model_evaluations.values()] + + # Simple average - in practice, weight by model importance and correlations + return float(np.mean(individual_trusts)) + + def _analyze_vulnerability_propagation(self, model_evaluations: Dict[str, Dict]) -> Dict[str, Any]: + """Analyze how vulnerabilities might propagate through the system""" + propagation_analysis = {} + + for model_name, eval_data in model_evaluations.items(): + vulnerabilities = eval_data.get('risk_assessment', {}).get('critical_risks', []) + + if vulnerabilities: + propagation_analysis[model_name] = { + 'vulnerabilities': vulnerabilities, + 'propagation_risk': len(vulnerabilities) * 0.2, + 'affected_downstream': self._find_affected_models(model_name), + 'mitigation_priority': self._calculate_mitigation_priority(eval_data) + } + + return propagation_analysis + + def _find_affected_models(self, source_model: str) -> List[str]: + """Find models that might be affected by issues in source model""" + if source_model in self.model_network: + return list(self.model_network.successors(source_model)) + return [] + + def _calculate_mitigation_priority(self, eval_data: Dict) -> str: + """Calculate priority for mitigation based on risk severity""" + critical_risks = len(eval_data.get('risk_assessment', {}).get('critical_risks', [])) + high_risks = len(eval_data.get('risk_assessment', {}).get('high_risks', [])) + + risk_score = critical_risks * 3 + high_risks + + if risk_score >= 6: + return 'critical' + elif risk_score >= 3: + return 'high' + elif risk_score >= 1: + return 'medium' + else: + return 'low' + + def _generate_correlation_recommendations(self, correlations: Dict, + clusters: List[Dict], + system_trust: float) -> List[str]: + """Generate recommendations based on correlation analysis""" + recommendations = [] + + # System-level recommendations + if system_trust < 0.6: + recommendations.append("System-level trust is low. Consider comprehensive system review.") + + # Cluster recommendations + high_risk_clusters = [c for c in clusters if c['cluster_risk'] > 0.7] + if high_risk_clusters: + recommendations.append(f"Identified {len(high_risk_clusters)} high-risk model clusters. Review interdependencies.") + + # Correlation recommendations + strong_correlations = [corr for corr, data in correlations.items() + if abs(data['correlation']) > 0.8] + if strong_correlations: + recommendations.append(f"Found {len(strong_correlations)} strong model correlations. Monitor jointly.") + + return recommendations + +# Integration example +class CorrelationAwareEvaluator: + """Evaluator that considers cross-model correlations""" + + def __init__(self): + self.correlation_engine = CrossModelCorrelationEngine() + # ... other initialization + + def evaluate_multi_model_system(self, models: Dict[str, Any], + shared_data: Dict[str, Any]) -> Dict[str, Any]: + """Evaluate a system with multiple interacting models""" + + # Evaluate each model individually + individual_evaluations = {} + for model_name, model in models.items(): + individual_evaluations[model_name] = self.evaluate_comprehensive_trust( + model, shared_data.get(model_name, shared_data) + ) + + # Analyze cross-model correlations + correlation_analysis = self.correlation_engine.analyze_trust_interference( + individual_evaluations + ) + + return { + 'individual_evaluations': individual_evaluations, + 'correlation_analysis': correlation_analysis, + 'system_overview': { + 'total_models': len(models), + 'system_trust': correlation_analysis['system_level_trust'], + 'risk_clusters': len(correlation_analysis['risk_clusters']) + } + } diff --git a/high_performance_system/core/trust_dashboard.py b/high_performance_system/core/trust_dashboard.py new file mode 100644 index 0000000..33ed634 --- /dev/null +++ b/high_performance_system/core/trust_dashboard.py @@ -0,0 +1,429 @@ +# src/visualization/trust_dashboard.py +""" +Trust Visualization and Dashboard System - Interactive trust monitoring and analysis +""" + +import dash +from dash import dcc, html, Input, Output, callback +import plotly.graph_objects as go +import plotly.express as px +from plotly.subplots import make_subplots +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +import json +import logging + +logger = logging.getLogger(__name__) + +# Initialize Dash app +app = dash.Dash(__name__, title="Trust Evaluation Dashboard") + +class TrustVisualizationEngine: + """Engine for creating trust visualizations""" + + @staticmethod + def create_trust_radar_chart(dimension_scores: Dict[str, float], + title: str = "Trust Dimensions") -> go.Figure: + """Create radar chart for trust dimensions""" + categories = list(dimension_scores.keys()) + values = list(dimension_scores.values()) + + # Close the radar chart + categories.append(categories[0]) + values.append(values[0]) + + fig = go.Figure() + fig.add_trace(go.Scatterpolar( + r=values, + theta=categories, + fill='toself', + name=title + )) + + fig.update_layout( + polar=dict( + radialaxis=dict( + visible=True, + range=[0, 1] + ) + ), + showlegend=False, + title=title + ) + + return fig + + @staticmethod + def create_trust_timeline(history_data: pd.DataFrame) -> go.Figure: + """Create timeline of trust scores""" + fig = go.Figure() + + # Overall trust score timeline + fig.add_trace(go.Scatter( + x=history_data['timestamp'], + y=history_data['overall_trust'], + mode='lines+markers', + name='Overall Trust', + line=dict(color='blue') + )) + + # Add dimension scores if available + if 'dimensions' in history_data.columns: + sample_dims = history_data.iloc[0]['dimensions'] + for dim_name in sample_dims.keys(): + dim_values = [dims.get(dim_name, 0.5) for dims in history_data['dimensions']] + fig.add_trace(go.Scatter( + x=history_data['timestamp'], + y=dim_values, + mode='lines', + name=f'{dim_name}', + line=dict(dash='dot') + )) + + fig.update_layout( + title='Trust Evolution Over Time', + xaxis_title='Time', + yaxis_title='Trust Score', + yaxis=dict(range=[0, 1]) + ) + + return fig + + @staticmethod + def create_trust_heatmap(correlation_data: Dict[str, Dict[str, float]]) -> go.Figure: + """Create heatmap of trust correlations""" + # Convert to matrix format + dimensions = list(correlation_data.keys()) + correlation_matrix = [] + + for dim1 in dimensions: + row = [] + for dim2 in dimensions: + row.append(correlation_data[dim1].get(dim2, 0)) + correlation_matrix.append(row) + + fig = go.Figure(data=go.Heatmap( + z=correlation_matrix, + x=dimensions, + y=dimensions, + colorscale='RdBu', + zmid=0 + )) + + fig.update_layout( + title='Trust Dimension Correlations', + xaxis_title='Dimensions', + yaxis_title='Dimensions' + ) + + return fig + + @staticmethod + def create_risk_matrix(risk_data: Dict[str, Any]) -> go.Figure: + """Create risk matrix visualization""" + risks = risk_data.get('all_risks', []) + + if not risks: + # Create sample data for demonstration + risks = [ + {'dimension': 'safety', 'probability': 0.8, 'impact': 0.9, 'category': 'critical'}, + {'dimension': 'reliability', 'probability': 0.6, 'impact': 0.7, 'category': 'high'}, + {'dimension': 'fairness', 'probability': 0.4, 'impact': 0.5, 'category': 'medium'}, + {'dimension': 'privacy', 'probability': 0.3, 'impact': 0.8, 'category': 'high'} + ] + + # Create DataFrame + df = pd.DataFrame(risks) + + # Categorize risk levels + def categorize_risk(row): + if row['probability'] * row['impact'] > 0.7: + return 'Critical' + elif row['probability'] * row['impact'] > 0.4: + return 'High' + elif row['probability'] * row['impact'] > 0.2: + return 'Medium' + else: + return 'Low' + + df['risk_level'] = df.apply(categorize_risk, axis=1) + + # Create scatter plot + fig = px.scatter(df, x='probability', y='impact', + color='risk_level', text='dimension', + size_max=60, + title='Risk Matrix') + + # Add quadrant lines + fig.add_shape(type='line', x0=0.5, y0=0, x1=0.5, y1=1, + line=dict(color='gray', width=1, dash='dot')) + fig.add_shape(type='line', x0=0, y0=0.5, x1=1, y1=0.5, + line=dict(color='gray', width=1, dash='dot')) + + fig.update_layout( + xaxis_title='Probability', + yaxis_title='Impact', + xaxis=dict(range=[0, 1]), + yaxis=dict(range=[0, 1]) + ) + + return fig + +# Dashboard layout +app.layout = html.Div([ + html.H1("AI Trust Evaluation Dashboard", + style={'textAlign': 'center', 'marginBottom': 30}), + + # Control panel + html.Div([ + html.Div([ + html.Label("Model Selection:"), + dcc.Dropdown( + id='model-selector', + options=[ + {'label': 'GPT-4', 'value': 'gpt4'}, + {'label': 'LLaMA-2', 'value': 'llama2'}, + {'label': 'Claude', 'value': 'claude'}, + {'label': 'Custom Model', 'value': 'custom'} + ], + value='gpt4' + ) + ], style={'width': '30%', 'display': 'inline-block'}), + + html.Div([ + html.Label("Time Range:"), + dcc.Dropdown( + id='time-range', + options=[ + {'label': 'Last Hour', 'value': 'hour'}, + {'label': 'Last Day', 'value': 'day'}, + {'label': 'Last Week', 'value': 'week'}, + {'label': 'Last Month', 'value': 'month'} + ], + value='week' + ) + ], style={'width': '30%', 'display': 'inline-block', 'marginLeft': '5%'}), + + html.Div([ + html.Label("View Type:"), + dcc.RadioItems( + id='view-type', + options=[ + {'label': 'Current', 'value': 'current'}, + {'label': 'Historical', 'value': 'historical'}, + {'label': 'Comparison', 'value': 'comparison'} + ], + value='current', + inline=True + ) + ], style={'width': '30%', 'display': 'inline-block', 'marginLeft': '5%'}) + ], style={'marginBottom': 30}), + + # Main dashboard + html.Div([ + # Trust radar chart + html.Div([ + dcc.Graph(id='trust-radar-chart') + ], style={'width': '50%', 'display': 'inline-block'}), + + # Trust timeline + html.Div([ + dcc.Graph(id='trust-timeline') + ], style={'width': '50%', 'display': 'inline-block'}) + ]), + + # Risk matrix and heatmap + html.Div([ + html.Div([ + dcc.Graph(id='risk-matrix') + ], style={'width': '50%', 'display': 'inline-block'}), + + html.Div([ + dcc.Graph(id='trust-heatmap') + ], style={'width': '50%', 'display': 'inline-block'}) + ]), + + # Recommendations and alerts + html.Div([ + html.H3("Key Insights and Recommendations"), + html.Div(id='recommendations', + style={'padding': '20px', 'backgroundColor': '#f0f0f0', 'borderRadius': '5px'}) + ], style={'marginTop': '30px'}) +]) + +# Callbacks for interactive dashboard +@callback( + [Output('trust-radar-chart', 'figure'), + Output('trust-timeline', 'figure'), + Output('risk-matrix', 'figure'), + Output('trust-heatmap', 'figure'), + Output('recommendations', 'children')], + [Input('model-selector', 'value'), + Input('time-range', 'value'), + Input('view-type', 'value')] +) +def update_dashboard(model_id, time_range, view_type): + """Update dashboard based on selections""" + + # Generate sample data (in real implementation, fetch from database/API) + sample_dimension_scores = { + 'reliability': 0.85, + 'safety': 0.92, + 'fairness': 0.78, + 'consistency': 0.88, + 'robustness': 0.81, + 'explainability': 0.75 + } + + # Create visualizations + radar_fig = TrustVisualizationEngine.create_trust_radar_chart( + sample_dimension_scores, "Current Trust Profile" + ) + + # Generate timeline data + timeline_data = generate_sample_timeline_data(time_range) + timeline_fig = TrustVisualizationEngine.create_trust_timeline(timeline_data) + + # Generate risk data + risk_data = generate_sample_risk_data() + risk_fig = TrustVisualizationEngine.create_risk_matrix(risk_data) + + # Generate correlation data + correlation_data = generate_sample_correlation_data() + heatmap_fig = TrustVisualizationEngine.create_trust_heatmap(correlation_data) + + # Generate recommendations + recommendations = generate_sample_recommendations(sample_dimension_scores) + + return radar_fig, timeline_fig, risk_fig, heatmap_fig, recommendations + +def generate_sample_timeline_data(time_range: str) -> pd.DataFrame: + """Generate sample timeline data""" + end_time = datetime.now() + + if time_range == 'hour': + start_time = end_time - timedelta(hours=1) + freq = '5min' + elif time_range == 'day': + start_time = end_time - timedelta(days=1) + freq = '1h' + elif time_range == 'week': + start_time = end_time - timedelta(weeks=1) + freq = '1d' + else: # month + start_time = end_time - timedelta(days=30) + freq = '1d' + + timestamps = pd.date_range(start=start_time, end=end_time, freq=freq) + + # Generate sample trust scores with some variation + np.random.seed(42) + base_score = 0.8 + scores = base_score + np.random.normal(0, 0.05, len(timestamps)) + scores = np.clip(scores, 0, 1) # Keep between 0 and 1 + + # Generate dimension scores + dimensions = [] + for _ in timestamps: + dim_scores = { + 'reliability': np.clip(base_score + np.random.normal(0, 0.03), 0, 1), + 'safety': np.clip(base_score + 0.05 + np.random.normal(0, 0.02), 0, 1), + 'fairness': np.clip(base_score - 0.02 + np.random.normal(0, 0.04), 0, 1) + } + dimensions.append(dim_scores) + + return pd.DataFrame({ + 'timestamp': timestamps, + 'overall_trust': scores, + 'dimensions': dimensions + }) + +def generate_sample_risk_data() -> Dict[str, Any]: + """Generate sample risk data""" + return { + 'all_risks': [ + {'dimension': 'Safety', 'probability': 0.1, 'impact': 0.9, 'category': 'critical'}, + {'dimension': 'Reliability', 'probability': 0.2, 'impact': 0.7, 'category': 'high'}, + {'dimension': 'Fairness', 'probability': 0.3, 'impact': 0.6, 'category': 'medium'}, + {'dimension': 'Privacy', 'probability': 0.15, 'impact': 0.8, 'category': 'high'}, + {'dimension': 'Robustness', 'probability': 0.25, 'impact': 0.5, 'category': 'medium'} + ] + } + +def generate_sample_correlation_data() -> Dict[str, Dict[str, float]]: + """Generate sample correlation data""" + dimensions = ['reliability', 'safety', 'fairness', 'consistency', 'robustness'] + correlation_data = {} + + np.random.seed(42) + for i, dim1 in enumerate(dimensions): + correlation_data[dim1] = {} + for j, dim2 in enumerate(dimensions): + if i == j: + correlation_data[dim1][dim2] = 1.0 + else: + # Generate realistic correlations + correlation = np.random.normal(0, 0.3) + correlation_data[dim1][dim2] = np.clip(correlation, -1, 1) + + return correlation_data + +def generate_sample_recommendations(dimension_scores: Dict[str, float]) -> str: + """Generate sample recommendations""" + recommendations = [] + + for dimension, score in dimension_scores.items(): + if score < 0.7: + recommendations.append(f"âš ī¸ Low {dimension} score ({score:.2f}). Consider improvement actions.") + elif score < 0.8: + recommendations.append(f"â„šī¸ {dimension} score could be improved ({score:.2f}).") + + if not recommendations: + recommendations.append("✅ All trust dimensions are performing well!") + recommendations.append("💡 Consider running stress tests to validate robustness.") + + return html.Ul([html.Li(rec) for rec in recommendations]) + +# Real-time monitoring component +class RealTimeTrustMonitor: + """Real-time trust monitoring with WebSocket updates""" + + def __init__(self): + self.clients = set() + self.monitoring_data = {} + + async def register_client(self, websocket): + """Register WebSocket client""" + self.clients.add(websocket) + + async def unregister_client(self, websocket): + """Unregister WebSocket client""" + self.clients.discard(websocket) + + async def broadcast_update(self, update_data: Dict[str, Any]): + """Broadcast trust updates to all clients""" + if self.clients: + message = json.dumps(update_data) + # In real implementation, send to all WebSocket clients + for client in self.clients.copy(): + try: + await client.send(message) + except: + await self.unregister_client(client) + +# API endpoint for real-time data +@app.server.route('/api/realtime') +async def realtime_endpoint(): + """WebSocket endpoint for real-time trust updates""" + # Implementation would handle WebSocket connections + pass + +# CLI for starting dashboard +def start_dashboard(): + """Start the trust dashboard""" + logger.info("Starting Trust Dashboard") + app.run_server(debug=True, host='0.0.0.0', port=8050) + +if __name__ == '__main__': + start_dashboard() diff --git a/high_performance_system/core/trust_decision_matrix.py b/high_performance_system/core/trust_decision_matrix.py new file mode 100644 index 0000000..30bf20f --- /dev/null +++ b/high_performance_system/core/trust_decision_matrix.py @@ -0,0 +1,285 @@ +# src/decision/trust_decision_matrix.py +""" +Trust Decision Matrix - Customizable trust criteria for different stakeholders and contexts +""" + +from typing import Dict, Any, List, Callable +import numpy as np +from dataclasses import dataclass +from enum import Enum + +class StakeholderType(Enum): + """Different types of stakeholders with varying trust requirements""" + EXECUTIVE = "executive" + TECHNICAL = "technical" + REGULATORY = "regulatory" + END_USER = "end_user" + BUSINESS = "business" + +class DeploymentContext(Enum): + """Different deployment contexts with varying risk tolerances""" + DEVELOPMENT = "development" + TESTING = "testing" + STAGING = "staging" + PRODUCTION = "production" + CRITICAL = "critical" + +@dataclass +class TrustCriterion: + """A specific trust criterion with threshold and weight""" + dimension: str + threshold: float + weight: float + critical: bool = False + rationale: str = "" + +class TrustDecisionMatrix: + """Matrix-based trust decision system""" + + def __init__(self): + self.decision_profiles = {} + self.default_profiles = self._create_default_profiles() + + def _create_default_profiles(self) -> Dict[str, List[TrustCriterion]]: + """Create default decision profiles for common scenarios""" + return { + 'executive_high_level': [ + TrustCriterion('overall_trust', 0.8, 1.0, rationale="Executive overview"), + TrustCriterion('safety', 0.9, 0.8, critical=True, rationale="Safety is paramount"), + TrustCriterion('reliability', 0.85, 0.7, rationale="Business reliability") + ], + 'technical_detailed': [ + TrustCriterion('reliability', 0.8, 0.9, rationale="Technical accuracy"), + TrustCriterion('consistency', 0.85, 0.8, rationale="Stable performance"), + TrustCriterion('robustness', 0.8, 0.7, rationale="Resilience to attacks"), + TrustCriterion('explainability', 0.7, 0.6, rationale="Debugging capability") + ], + 'regulatory_compliance': [ + TrustCriterion('fairness', 0.9, 1.0, critical=True, rationale="Non-discrimination"), + TrustCriterion('privacy', 0.95, 0.9, critical=True, rationale="Data protection"), + TrustCriterion('safety', 0.9, 0.8, critical=True, rationale="User safety"), + TrustCriterion('transparency', 0.8, 0.7, rationale="Audit requirements") + ], + 'production_critical': [ + TrustCriterion('safety', 0.95, 1.0, critical=True, rationale="Life-critical systems"), + TrustCriterion('reliability', 0.9, 0.9, critical=True, rationale="Mission-critical"), + TrustCriterion('robustness', 0.85, 0.8, rationale="Security resilience") + ] + } + + def create_custom_profile(self, profile_name: str, criteria: List[TrustCriterion]): + """Create a custom decision profile""" + self.decision_profiles[profile_name] = criteria + + def evaluate_against_profile(self, evaluation_results: Dict[str, Any], + profile_name: str) -> Dict[str, Any]: + """Evaluate trust results against a specific decision profile""" + # Get profile criteria + if profile_name in self.decision_profiles: + criteria = self.decision_profiles[profile_name] + elif profile_name in self.default_profiles: + criteria = self.default_profiles[profile_name] + else: + return {'error': f'Profile {profile_name} not found'} + + # Extract dimension scores + dimension_scores = evaluation_results.get('dimension_scores', {}) + category_scores = evaluation_results.get('category_scores', {}) + + # Evaluate each criterion + criterion_results = [] + critical_failures = [] + weighted_scores = [] + + for criterion in criteria: + # Get score for dimension + score = dimension_scores.get(criterion.dimension) + if score is None: + # Try category score + score = category_scores.get(criterion.dimension, 0.5) + + # Check threshold + meets_threshold = score >= criterion.threshold + if criterion.critical and not meets_threshold: + critical_failures.append(criterion.dimension) + + # Calculate weighted contribution + weighted_score = score * criterion.weight + weighted_scores.append(weighted_score) + + criterion_results.append({ + 'dimension': criterion.dimension, + 'score': score, + 'threshold': criterion.threshold, + 'meets_threshold': meets_threshold, + 'weight': criterion.weight, + 'weighted_contribution': weighted_score, + 'critical': criterion.critical + }) + + # Calculate overall profile score + overall_profile_score = sum(weighted_scores) / sum(criterion.weight for criterion in criteria) if criteria else 0.5 + + # Make decision + decision = "APPROVED" if len(critical_failures) == 0 and overall_profile_score >= 0.7 else "REJECTED" + if len(critical_failures) > 0: + decision = "REJECTED_CRITICAL_FAILURES" + + return { + 'profile_name': profile_name, + 'overall_score': overall_profile_score, + 'decision': decision, + 'criterion_results': criterion_results, + 'critical_failures': critical_failures, + 'met_thresholds': len([c for c in criterion_results if c['meets_threshold']]), + 'total_criteria': len(criteria), + 'recommendations': self._generate_profile_recommendations(criterion_results) + } + + def multi_profile_evaluation(self, evaluation_results: Dict[str, Any], + profile_names: List[str]) -> Dict[str, Any]: + """Evaluate against multiple profiles simultaneously""" + profile_results = {} + for profile_name in profile_names: + profile_results[profile_name] = self.evaluate_against_profile( + evaluation_results, profile_name + ) + + # Aggregate decisions + final_decision = self._aggregate_decisions(profile_results) + + return { + 'individual_profile_results': profile_results, + 'final_decision': final_decision, + 'consensus_score': self._calculate_consensus_score(profile_results), + 'conflicting_decisions': self._find_conflicting_decisions(profile_results) + } + + def _generate_profile_recommendations(self, criterion_results: List[Dict]) -> List[str]: + """Generate recommendations based on profile evaluation""" + recommendations = [] + + for criterion in criterion_results: + if not criterion['meets_threshold']: + if criterion['critical']: + recommendations.append(f"CRITICAL: Improve {criterion['dimension']} (current: {criterion['score']:.3f}, required: {criterion['threshold']})") + else: + recommendations.append(f"Improve {criterion['dimension']} (current: {criterion['score']:.3f}, required: {criterion['threshold']})") + + return recommendations + + def _aggregate_decisions(self, profile_results: Dict[str, Dict]) -> str: + """Aggregate decisions from multiple profiles""" + decisions = [result['decision'] for result in profile_results.values()] + + if 'REJECTED_CRITICAL_FAILURES' in decisions: + return 'REJECTED_CRITICAL_FAILURES' + elif 'REJECTED' in decisions: + return 'REJECTED' + else: + return 'APPROVED' + + def _calculate_consensus_score(self, profile_results: Dict[str, Dict]) -> float: + """Calculate consensus score across profiles""" + scores = [result['overall_score'] for result in profile_results.values()] + return float(np.mean(scores)) if scores else 0.5 + + def _find_conflicting_decisions(self, profile_results: Dict[str, Dict]) -> List[str]: + """Find profiles with conflicting decisions""" + approved_profiles = [name for name, result in profile_results.items() + if result['decision'] == 'APPROVED'] + rejected_profiles = [name for name, result in profile_results.items() + if result['decision'] in ['REJECTED', 'REJECTED_CRITICAL_FAILURES']] + + if approved_profiles and rejected_profiles: + return [f"Approved: {approved_profiles}, Rejected: {rejected_profiles}"] + return [] + +# Integration with main system +class DecisionMatrixEvaluator: + """Evaluator with decision matrix capabilities""" + + def __init__(self): + self.decision_matrix = TrustDecisionMatrix() + # ... other initialization + + def evaluate_with_decision_matrix(self, model, data, + stakeholder_type: StakeholderType = None, + deployment_context: DeploymentContext = None, + custom_profiles: List[str] = None) -> Dict[str, Any]: + """Execute evaluation with decision matrix analysis""" + + # Standard evaluation + evaluation_results = self.evaluate_comprehensive_trust(model, data) + + # Determine profiles to evaluate against + profiles_to_evaluate = [] + + if custom_profiles: + profiles_to_evaluate.extend(custom_profiles) + elif stakeholder_type: + profile_mapping = { + StakeholderType.EXECUTIVE: ['executive_high_level'], + StakeholderType.TECHNICAL: ['technical_detailed'], + StakeholderType.REGULATORY: ['regulatory_compliance'], + StakeholderType.END_USER: ['executive_high_level'], + StakeholderType.BUSINESS: ['executive_high_level'] + } + profiles_to_evaluate.extend(profile_mapping.get(stakeholder_type, [])) + + if deployment_context == DeploymentContext.CRITICAL: + profiles_to_evaluate.append('production_critical') + elif deployment_context == DeploymentContext.PRODUCTION: + profiles_to_evaluate.append('technical_detailed') + + # If no specific profiles, use default comprehensive evaluation + if not profiles_to_evaluate: + profiles_to_evaluate = ['executive_high_level', 'technical_detailed'] + + # Multi-profile evaluation + decision_results = self.decision_matrix.multi_profile_evaluation( + evaluation_results, profiles_to_evaluate + ) + + # Combine results + final_results = evaluation_results.copy() + final_results['decision_matrix_analysis'] = decision_results + + return final_results + +# Usage example +def advanced_trust_decision_example(): + """Example of advanced trust decision making""" + + # Create evaluator + evaluator = DecisionMatrixEvaluator() + + # Define custom profile for healthcare application + healthcare_criteria = [ + TrustCriterion('safety', 0.95, 1.0, critical=True, rationale="Patient safety"), + TrustCriterion('privacy', 0.95, 0.9, critical=True, rationale="HIPAA compliance"), + TrustCriterion('reliability', 0.9, 0.8, rationale="Medical accuracy"), + TrustCriterion('fairness', 0.9, 0.7, rationale="Non-discrimination") + ] + + evaluator.decision_matrix.create_custom_profile('healthcare_medical_ai', healthcare_criteria) + + # Evaluate model + results = evaluator.evaluate_with_decision_matrix( + model=my_medical_ai_model, + data=medical_test_data, + custom_profiles=['healthcare_medical_ai', 'regulatory_compliance'] + ) + + print(f"Overall Trust Score: {results['overall_trust_score']:.3f}") + print(f"Decision: {results['decision_matrix_analysis']['final_decision']}") + + # Show detailed analysis + for profile_name, profile_result in results['decision_matrix_analysis']['individual_profile_results'].items(): + print(f"\n{profile_name.upper()} Profile:") + print(f" Score: {profile_result['overall_score']:.3f}") + print(f" Decision: {profile_result['decision']}") + if profile_result['recommendations']: + print(" Recommendations:") + for rec in profile_result['recommendations']: + print(f" - {rec}") diff --git a/high_performance_system/core/trust_orchestrator.py b/high_performance_system/core/trust_orchestrator.py new file mode 100644 index 0000000..cc0eee4 --- /dev/null +++ b/high_performance_system/core/trust_orchestrator.py @@ -0,0 +1,444 @@ +# src/orchestration/trust_orchestrator.py +""" +Trust Orchestration System - End-to-end trust management from development to production +""" + +import asyncio +import json +from datetime import datetime, timedelta +from typing import Dict, Any, List, Optional, Callable +from dataclasses import dataclass, asdict +import yaml +import logging + +logger = logging.getLogger(__name__) + +@dataclass +class TrustPipelineStage: + """A stage in the trust evaluation pipeline""" + name: str + description: str + required_trust_score: float + evaluation_config: Dict[str, Any] + timeout_seconds: int = 300 + parallel_execution: bool = False + +@dataclass +class DeploymentEnvironment: + """Configuration for a deployment environment""" + name: str + trust_requirements: Dict[str, Any] + monitoring_config: Dict[str, Any] + rollback_conditions: Dict[str, Any] + +class TrustOrchestrator: + """Orchestrates trust evaluation across the entire AI lifecycle""" + + def __init__(self): + self.pipeline_stages = [] + self.environments = {} + self.deployment_history = [] + self.monitoring_systems = {} + self.alerting_systems = {} + + def define_pipeline_stage(self, stage: TrustPipelineStage): + """Define a stage in the trust evaluation pipeline""" + self.pipeline_stages.append(stage) + logger.info(f"Added pipeline stage: {stage.name}") + + def define_environment(self, env_name: str, environment: DeploymentEnvironment): + """Define a deployment environment""" + self.environments[env_name] = environment + logger.info(f"Defined environment: {env_name}") + + async def execute_trust_pipeline(self, model, data: Dict[str, Any], + pipeline_stages: List[str] = None, + evaluator = None) -> Dict[str, Any]: + """Execute the trust evaluation pipeline""" + if evaluator is None: + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + evaluator = CompositeTrustEvaluator() + + if pipeline_stages is None: + pipeline_stages = [stage.name for stage in self.pipeline_stages] + + results = {} + pipeline_success = True + failed_stages = [] + + for stage in self.pipeline_stages: + if stage.name not in pipeline_stages: + continue + + logger.info(f"Executing pipeline stage: {stage.name}") + + try: + # Execute stage with timeout + stage_result = await asyncio.wait_for( + self._execute_stage(stage, model, data, evaluator), + timeout=stage.timeout_seconds + ) + + results[stage.name] = stage_result + + # Check if stage meets requirements + if stage_result.get('overall_trust_score', 0) < stage.required_trust_score: + logger.warning(f"Stage {stage.name} failed trust requirement") + pipeline_success = False + failed_stages.append(stage.name) + break # Stop pipeline on failure + + except asyncio.TimeoutError: + logger.error(f"Stage {stage.name} timed out") + pipeline_success = False + failed_stages.append(stage.name) + break + except Exception as e: + logger.error(f"Stage {stage.name} failed: {e}") + pipeline_success = False + failed_stages.append(stage.name) + break + + return { + 'pipeline_success': pipeline_success, + 'stage_results': results, + 'failed_stages': failed_stages, + 'overall_trust_score': self._calculate_pipeline_trust_score(results), + 'recommendations': self._generate_pipeline_recommendations(results, failed_stages) + } + + async def _execute_stage(self, stage: TrustPipelineStage, model, + data: Dict[str, Any], evaluator) -> Dict[str, Any]: + """Execute a single pipeline stage""" + if stage.parallel_execution: + # Execute in parallel if configured + return await self._execute_parallel_stage(stage, model, data, evaluator) + else: + # Execute sequentially + return evaluator.evaluate_comprehensive_trust(model, data, **stage.evaluation_config) + + async def _execute_parallel_stage(self, stage: TrustPipelineStage, model, + data: Dict[str, Any], evaluator) -> Dict[str, Any]: + """Execute stage with parallel processing""" + # This would implement parallel evaluation of different dimensions + # For simplicity, we'll just return standard evaluation + return evaluator.evaluate_comprehensive_trust(model, data, **stage.evaluation_config) + + def _calculate_pipeline_trust_score(self, stage_results: Dict[str, Any]) -> float: + """Calculate overall trust score from pipeline results""" + if not stage_results: + return 0.5 + + scores = [result.get('overall_trust_score', 0.5) + for result in stage_results.values()] + return float(sum(scores) / len(scores)) if scores else 0.5 + + def _generate_pipeline_recommendations(self, stage_results: Dict[str, Any], + failed_stages: List[str]) -> List[str]: + """Generate recommendations based on pipeline results""" + recommendations = [] + + if failed_stages: + recommendations.append(f"Pipeline failed at stages: {', '.join(failed_stages)}") + for stage_name in failed_stages: + stage_result = stage_results.get(stage_name, {}) + score = stage_result.get('overall_trust_score', 0) + required = next((s.required_trust_score for s in self.pipeline_stages + if s.name == stage_name), 0) + recommendations.append(f" {stage_name}: Score {score:.3f} < Required {required}") + + # General recommendations from stage results + for stage_name, result in stage_results.items(): + if 'recommendations' in result: + recommendations.extend([f"{stage_name}: {rec}" for rec in result['recommendations']]) + + return recommendations + + async def deploy_with_trust_monitoring(self, model, data: Dict[str, Any], + environment_name: str, + evaluator = None) -> Dict[str, Any]: + """Deploy model with continuous trust monitoring""" + if environment_name not in self.environments: + return {'error': f'Environment {environment_name} not defined'} + + environment = self.environments[environment_name] + + # Execute trust pipeline first + pipeline_results = await self.execute_trust_pipeline(model, data, evaluator=evaluator) + + if not pipeline_results['pipeline_success']: + return { + 'deployment_status': 'FAILED', + 'reason': 'Trust pipeline failed', + 'pipeline_results': pipeline_results + } + + # Check environment-specific requirements + env_requirements_met = self._check_environment_requirements( + pipeline_results, environment + ) + + if not env_requirements_met: + return { + 'deployment_status': 'FAILED', + 'reason': 'Environment trust requirements not met', + 'pipeline_results': pipeline_results + } + + # Deploy model (simulated) + deployment_id = self._generate_deployment_id() + deployment_info = { + 'deployment_id': deployment_id, + 'environment': environment_name, + 'timestamp': datetime.now().isoformat(), + 'model_trust_score': pipeline_results['overall_trust_score'], + 'status': 'DEPLOYED' + } + + # Start monitoring + monitoring_task = asyncio.create_task( + self._start_continuous_monitoring(deployment_id, model, data, environment) + ) + + # Record deployment + self.deployment_history.append(deployment_info) + + return { + 'deployment_status': 'SUCCESS', + 'deployment_info': deployment_info, + 'pipeline_results': pipeline_results, + 'monitoring_started': True + } + + def _check_environment_requirements(self, pipeline_results: Dict[str, Any], + environment: DeploymentEnvironment) -> bool: + """Check if pipeline results meet environment requirements""" + overall_score = pipeline_results.get('overall_trust_score', 0) + env_min_score = environment.trust_requirements.get('minimum_trust_score', 0.7) + + return overall_score >= env_min_score + + def _generate_deployment_id(self) -> str: + """Generate unique deployment ID""" + import uuid + return str(uuid.uuid4())[:8] + + async def _start_continuous_monitoring(self, deployment_id: str, model, + data: Dict[str, Any], + environment: DeploymentEnvironment): + """Start continuous monitoring for deployed model""" + monitoring_config = environment.monitoring_config + interval_seconds = monitoring_config.get('check_interval_seconds', 300) + + logger.info(f"Starting monitoring for deployment {deployment_id}") + + while True: + try: + # Execute monitoring evaluation + monitoring_results = await self._execute_monitoring_check( + model, data, monitoring_config + ) + + # Check for alerts + alerts = self._check_monitoring_alerts( + monitoring_results, environment + ) + + if alerts: + await self._trigger_alerts(deployment_id, alerts) + + # Check rollback conditions + if self._check_rollback_conditions(monitoring_results, environment): + await self._initiate_rollback(deployment_id) + break + + await asyncio.sleep(interval_seconds) + + except Exception as e: + logger.error(f"Monitoring error for deployment {deployment_id}: {e}") + await asyncio.sleep(interval_seconds) + + async def _execute_monitoring_check(self, model, data: Dict[str, Any], + config: Dict[str, Any]): + """Execute monitoring check""" + # This would implement actual monitoring logic + # For now, simulate with basic evaluation + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + evaluator = CompositeTrustEvaluator() + + return evaluator.evaluate_comprehensive_trust(model, data) + + def _check_monitoring_alerts(self, monitoring_results: Dict[str, Any], + environment: DeploymentEnvironment) -> List[Dict[str, Any]]: + """Check for monitoring alerts""" + alerts = [] + + current_score = monitoring_results.get('overall_trust_score', 0.5) + alert_threshold = environment.monitoring_config.get('alert_threshold', 0.6) + + if current_score < alert_threshold: + alerts.append({ + 'type': 'trust_score_drop', + 'severity': 'HIGH' if current_score < alert_threshold * 0.8 else 'MEDIUM', + 'current_score': current_score, + 'threshold': alert_threshold, + 'timestamp': datetime.now().isoformat() + }) + + return alerts + + async def _trigger_alerts(self, deployment_id: str, alerts: List[Dict[str, Any]]): + """Trigger alerts for monitoring issues""" + logger.warning(f"Alerts triggered for deployment {deployment_id}: {alerts}") + # In real implementation, this would send notifications via email, Slack, etc. + + def _check_rollback_conditions(self, monitoring_results: Dict[str, Any], + environment: DeploymentEnvironment) -> bool: + """Check if rollback conditions are met""" + rollback_conditions = environment.rollback_conditions + + current_score = monitoring_results.get('overall_trust_score', 0.5) + rollback_threshold = rollback_conditions.get('critical_threshold', 0.4) + + return current_score < rollback_threshold + + async def _initiate_rollback(self, deployment_id: str): + """Initiate rollback for problematic deployment""" + logger.critical(f"Initiating rollback for deployment {deployment_id}") + # In real implementation, this would trigger actual rollback procedures + +# Configuration system +class TrustOrchestrationConfig: + """Configuration for trust orchestration""" + + def __init__(self, config_file: str = None): + self.config = self._load_config(config_file) + self.orchestrator = TrustOrchestrator() + self._setup_from_config() + + def _load_config(self, config_file: str = None) -> Dict[str, Any]: + """Load configuration from file""" + if config_file and config_file.endswith('.yaml'): + with open(config_file, 'r') as f: + return yaml.safe_load(f) + elif config_file and config_file.endswith('.json'): + with open(config_file, 'r') as f: + return json.load(f) + else: + return self._get_default_config() + + def _get_default_config(self) -> Dict[str, Any]: + """Get default configuration""" + return { + 'pipeline_stages': [ + { + 'name': 'initial_validation', + 'description': 'Initial trust validation', + 'required_trust_score': 0.6, + 'evaluation_config': {'categories': ['reliability', 'safety']}, + 'timeout_seconds': 300 + }, + { + 'name': 'comprehensive_evaluation', + 'description': 'Full trust evaluation', + 'required_trust_score': 0.7, + 'evaluation_config': {}, + 'timeout_seconds': 600 + }, + { + 'name': 'simulation_testing', + 'description': 'Stress testing and simulation', + 'required_trust_score': 0.75, + 'evaluation_config': {'simulation_enabled': True}, + 'timeout_seconds': 900 + } + ], + 'environments': { + 'development': { + 'trust_requirements': {'minimum_trust_score': 0.5}, + 'monitoring_config': { + 'check_interval_seconds': 3600, + 'alert_threshold': 0.4 + }, + 'rollback_conditions': {'critical_threshold': 0.2} + }, + 'production': { + 'trust_requirements': {'minimum_trust_score': 0.8}, + 'monitoring_config': { + 'check_interval_seconds': 300, + 'alert_threshold': 0.7 + }, + 'rollback_conditions': {'critical_threshold': 0.5} + } + } + } + + def _setup_from_config(self): + """Set up orchestrator from configuration""" + # Setup pipeline stages + for stage_config in self.config.get('pipeline_stages', []): + stage = TrustPipelineStage(**stage_config) + self.orchestrator.define_pipeline_stage(stage) + + # Setup environments + for env_name, env_config in self.config.get('environments', {}).items(): + environment = DeploymentEnvironment( + name=env_name, + trust_requirements=env_config.get('trust_requirements', {}), + monitoring_config=env_config.get('monitoring_config', {}), + rollback_conditions=env_config.get('rollback_conditions', {}) + ) + self.orchestrator.define_environment(env_name, environment) + +# Usage example +async def orchestration_example(): + """Example of trust orchestration in action""" + + # Load configuration + config = TrustOrchestrationConfig('trust_orchestration_config.yaml') + + # Prepare model and data + model = my_llm_model + data = test_data + + # Execute trust pipeline + print("Executing trust evaluation pipeline...") + pipeline_results = await config.orchestrator.execute_trust_pipeline(model, data) + + print(f"Pipeline Success: {pipeline_results['pipeline_success']}") + print(f"Overall Trust Score: {pipeline_results['overall_trust_score']:.3f}") + + if pipeline_results['failed_stages']: + print(f"Failed Stages: {pipeline_results['failed_stages']}") + + # Deploy to production environment + print("\nDeploying to production environment...") + deployment_results = await config.orchestrator.deploy_with_trust_monitoring( + model, data, 'production' + ) + + print(f"Deployment Status: {deployment_results['deployment_status']}") + if 'deployment_info' in deployment_results: + print(f"Deployment ID: {deployment_results['deployment_info']['deployment_id']}") + + return deployment_results + +# CLI interface +def main(): + """Main CLI interface""" + import argparse + import asyncio + + parser = argparse.ArgumentParser(description='Trust Orchestration System') + parser.add_argument('--config', help='Configuration file path') + parser.add_argument('--deploy-env', help='Environment to deploy to') + parser.add_argument('--model-path', help='Path to model') + parser.add_argument('--data-path', help='Path to evaluation data') + + args = parser.parse_args() + + # Run orchestration + asyncio.run(orchestration_example()) + +if __name__ == "__main__": + main() diff --git a/high_performance_system/core/trust_simulation.py b/high_performance_system/core/trust_simulation.py new file mode 100644 index 0000000..ab40de7 --- /dev/null +++ b/high_performance_system/core/trust_simulation.py @@ -0,0 +1,442 @@ +# src/simulation/trust_simulation.py +""" +Trust Simulation and Stress Testing - Test trust under extreme conditions +""" + +import numpy as np +from typing import Dict, Any, List, Callable +import random +from dataclasses import dataclass +from abc import ABC, abstractmethod + +@dataclass +class SimulationScenario: + """A specific simulation scenario with parameters""" + name: str + description: str + stress_factors: Dict[str, float] # Factor name -> intensity (0-1) + duration: int # Simulation steps + critical_threshold: float = 0.6 + +class StressTestScenario(ABC): + """Base class for stress test scenarios""" + + def __init__(self, name: str, description: str): + self.name = name + self.description = description + + @abstractmethod + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + """Apply stress to model and data""" + pass + + @abstractmethod + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + """Measure impact of stress on trust metrics""" + pass + +class AdversarialAttackScenario(StressTestScenario): + """Simulate adversarial attacks on the model""" + + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + """Apply adversarial perturbations to data""" + stressed_data = data.copy() + + # Add adversarial noise to prompts (simplified) + if 'prompts' in stressed_data: + prompts = stressed_data['prompts'] + stressed_prompts = [] + + for prompt in prompts: + if random.random() < intensity: # Apply stress with probability + # Simple adversarial perturbation + perturbed_prompt = self._perturb_prompt(prompt, intensity) + stressed_prompts.append(perturbed_prompt) + else: + stressed_prompts.append(prompt) + + stressed_data['prompts'] = stressed_prompts + + return stressed_data + + def _perturb_prompt(self, prompt: str, intensity: float) -> str: + """Apply adversarial perturbation to prompt""" + words = prompt.split() + perturbation_count = max(1, int(len(words) * intensity * 0.3)) + + for _ in range(perturbation_count): + if words: + # Randomly swap, delete, or add words + action = random.choice(['swap', 'delete', 'add']) + if action == 'swap' and len(words) > 1: + i, j = random.sample(range(len(words)), 2) + words[i], words[j] = words[j], words[i] + elif action == 'delete' and len(words) > 1: + words.pop(random.randint(0, len(words) - 1)) + elif action == 'add': + words.insert(random.randint(0, len(words)), '[ATTACK]') + + return ' '.join(words) + + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + """Measure impact of adversarial attack""" + original_score = original_results.get('overall_trust_score', 0.5) + stressed_score = stressed_results.get('overall_trust_score', 0.5) + + score_degradation = original_score - stressed_score + + # Analyze dimension-specific impacts + original_dims = original_results.get('dimension_scores', {}) + stressed_dims = stressed_results.get('dimension_scores', {}) + + dimension_impacts = {} + for dim in set(original_dims.keys()) | set(stressed_dims.keys()): + orig_val = original_dims.get(dim, 0.5) + stress_val = stressed_dims.get(dim, 0.5) + dimension_impacts[dim] = { + 'degradation': orig_val - stress_val, + 'percentage_drop': ((orig_val - stress_val) / orig_val * 100) if orig_val > 0 else 0 + } + + return { + 'score_degradation': score_degradation, + 'percentage_degradation': (score_degradation / original_score * 100) if original_score > 0 else 0, + 'dimension_impacts': dimension_impacts, + 'vulnerability_score': max(0, min(1, score_degradation * 2)), # Scaled vulnerability + 'recommendation': self._generate_recommendation(score_degradation, dimension_impacts) + } + + def _generate_recommendation(self, score_degradation: float, dimension_impacts: Dict) -> str: + """Generate recommendation based on impact analysis""" + if score_degradation > 0.3: + return "High vulnerability to adversarial attacks. Implement robust adversarial training." + elif score_degradation > 0.1: + return "Moderate vulnerability detected. Consider adversarial defense mechanisms." + else: + return "Low vulnerability to tested adversarial scenarios." + +class DataDriftScenario(StressTestScenario): + """Simulate data drift scenarios""" + + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + """Apply data drift simulation""" + stressed_data = data.copy() + + # Simulate concept drift by modifying data characteristics + if 'contexts' in stressed_data: + contexts = stressed_data['contexts'] + drifted_contexts = [] + + for context in contexts: + if random.random() < intensity: + # Modify context to simulate drift + drifted_context = self._drift_context(context, intensity) + drifted_contexts.append(drifted_context) + else: + drifted_contexts.append(context) + + stressed_data['contexts'] = drifted_contexts + + return stressed_data + + def _drift_context(self, context: str, intensity: float) -> str: + """Apply context drift""" + # Simplified context drift simulation + drift_indicators = ['[FUTURE]', '[PAST]', '[DIFFERENT_DOMAIN]', '[EVOLVED]'] + drift_count = int(intensity * 3) + + for _ in range(drift_count): + indicator = random.choice(drift_indicators) + context = f"{indicator} {context}" + + return context + + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + """Measure impact of data drift""" + original_score = original_results.get('overall_trust_score', 0.5) + stressed_score = stressed_results.get('overall_trust_score', 0.5) + + drift_impact = original_score - stressed_score + + return { + 'drift_impact': drift_impact, + 'adaptability_score': max(0, min(1, 1 - drift_impact)), # Higher is better + 'recommendation': self._generate_recommendation(drift_impact) + } + + def _generate_recommendation(self, drift_impact: float) -> str: + """Generate recommendation based on drift impact""" + if drift_impact > 0.2: + return "Significant performance degradation under data drift. Implement continuous learning and monitoring." + elif drift_impact > 0.1: + return "Moderate drift sensitivity. Consider drift detection mechanisms." + else: + return "Good robustness to data drift scenarios." + +class TrustSimulationEngine: + """Main simulation engine for trust stress testing""" + + def __init__(self): + self.scenarios = { + 'adversarial_attack': AdversarialAttackScenario( + 'adversarial_attack', + 'Test model robustness against adversarial inputs' + ), + 'data_drift': DataDriftScenario( + 'data_drift', + 'Test model performance under data distribution shifts' + ) + } + self.default_scenarios = [ + 'adversarial_attack', + 'data_drift' + ] + + def register_scenario(self, name: str, scenario: StressTestScenario): + """Register a custom stress test scenario""" + self.scenarios[name] = scenario + + def run_simulation(self, model, baseline_data: Dict[str, Any], + scenario_name: str, intensity: float = 0.5, + evaluator = None) -> Dict[str, Any]: + """Run a single simulation scenario""" + if scenario_name not in self.scenarios: + return {'error': f'Scenario {scenario_name} not found'} + + scenario = self.scenarios[scenario_name] + + # Get baseline evaluation + if evaluator is None: + from src.evaluators.composite_evaluator import CompositeTrustEvaluator + evaluator = CompositeTrustEvaluator() + + baseline_results = evaluator.evaluate_comprehensive_trust(model, baseline_data) + + # Apply stress + stressed_data = scenario.apply_stress(model, baseline_data, intensity) + + # Evaluate stressed performance + stressed_results = evaluator.evaluate_comprehensive_trust(model, stressed_data) + + # Measure impact + impact_analysis = scenario.measure_impact(baseline_results, stressed_results) + + return { + 'scenario': scenario_name, + 'intensity': intensity, + 'baseline_results': baseline_results, + 'stressed_results': stressed_results, + 'impact_analysis': impact_analysis, + 'stress_applied': stressed_data != baseline_data + } + + def run_comprehensive_simulation(self, model, baseline_data: Dict[str, Any], + scenarios: List[str] = None, + intensities: List[float] = None, + evaluator = None) -> Dict[str, Any]: + """Run comprehensive simulation across multiple scenarios""" + if scenarios is None: + scenarios = self.default_scenarios + + if intensities is None: + intensities = [0.3, 0.5, 0.7, 0.9] + + simulation_results = {} + scenario_summaries = {} + + for scenario_name in scenarios: + scenario_results = [] + for intensity in intensities: + result = self.run_simulation(model, baseline_data, scenario_name, + intensity, evaluator) + scenario_results.append(result) + + simulation_results[scenario_name] = scenario_results + + # Summarize scenario results + scenario_summaries[scenario_name] = self._summarize_scenario_results(scenario_results) + + # Overall simulation summary + overall_summary = self._generate_overall_summary(scenario_summaries) + + return { + 'detailed_results': simulation_results, + 'scenario_summaries': scenario_summaries, + 'overall_summary': overall_summary, + 'robustness_score': overall_summary.get('overall_robustness', 0.5), + 'recommendations': self._generate_simulation_recommendations(scenario_summaries) + } + + def _summarize_scenario_results(self, scenario_results: List[Dict]) -> Dict[str, Any]: + """Summarize results for a single scenario across intensities""" + if not scenario_results: + return {} + + # Extract key metrics across intensities + intensities = [r['intensity'] for r in scenario_results] + impacts = [r['impact_analysis'].get('score_degradation', 0) for r in scenario_results] + + # Find maximum impact + max_impact = max(impacts) if impacts else 0 + + # Calculate robustness (inverse of impact) + robustness_scores = [1 - impact for impact in impacts] + avg_robustness = sum(robustness_scores) / len(robustness_scores) if robustness_scores else 0.5 + + return { + 'max_impact': max_impact, + 'average_robustness': avg_robustness, + 'intensity_impact_curve': list(zip(intensities, impacts)), + 'worst_case_intensity': intensities[impacts.index(max_impact)] if impacts else 0.5 + } + + def _generate_overall_summary(self, scenario_summaries: Dict[str, Dict]) -> Dict[str, Any]: + """Generate overall simulation summary""" + if not scenario_summaries: + return {} + + avg_robustness = np.mean([summary.get('average_robustness', 0.5) + for summary in scenario_summaries.values()]) + + max_impacts = [summary.get('max_impact', 0) + for summary in scenario_summaries.values()] + worst_case_impact = max(max_impacts) if max_impacts else 0 + + return { + 'overall_robustness': float(avg_robustness), + 'worst_case_impact': worst_case_impact, + 'scenarios_tested': list(scenario_summaries.keys()), + 'deployment_readiness': 'HIGH' if avg_robustness > 0.8 else + 'MEDIUM' if avg_robustness > 0.6 else 'LOW' + } + + def _generate_simulation_recommendations(self, scenario_summaries: Dict[str, Dict]) -> List[str]: + """Generate recommendations based on simulation results""" + recommendations = [] + + for scenario_name, summary in scenario_summaries.items(): + avg_robustness = summary.get('average_robustness', 0.5) + max_impact = summary.get('max_impact', 0) + + if avg_robustness < 0.6: + recommendations.append(f"Low robustness in {scenario_name} scenarios. Requires improvement.") + elif max_impact > 0.3: + recommendations.append(f"Significant vulnerability detected in {scenario_name}. Monitor closely.") + + return recommendations + +# Integration with main evaluator +class SimulationEnhancedEvaluator: + """Evaluator with simulation and stress testing capabilities""" + + def __init__(self): + self.simulation_engine = TrustSimulationEngine() + # ... other initialization + + def evaluate_with_simulation(self, model, data, + simulation_config: Dict[str, Any] = None) -> Dict[str, Any]: + """Execute evaluation with comprehensive simulation testing""" + + # Standard evaluation + standard_results = self.evaluate_comprehensive_trust(model, data) + + # Run simulations if configured + if simulation_config is not None: + simulation_results = self.simulation_engine.run_comprehensive_simulation( + model, data, + scenarios=simulation_config.get('scenarios'), + intensities=simulation_config.get('intensities'), + evaluator=self # Pass self as evaluator + ) + + # Combine results + final_results = standard_results.copy() + final_results['simulation_analysis'] = simulation_results + + # Add simulation-based trust score + final_results['simulation_adjusted_trust'] = self._calculate_simulation_adjusted_trust( + standard_results, simulation_results + ) + + return final_results + else: + return standard_results + + def _calculate_simulation_adjusted_trust(self, standard_results: Dict[str, Any], + simulation_results: Dict[str, Any]) -> float: + """Calculate trust score adjusted for simulation results""" + base_trust = standard_results.get('overall_trust_score', 0.5) + simulation_robustness = simulation_results.get('robustness_score', 0.5) + + # Adjust trust score based on robustness + adjusted_trust = base_trust * simulation_robustness + + return max(0, min(1, adjusted_trust)) + +# Usage example +def simulation_testing_example(): + """Example of trust simulation and stress testing""" + + # Create simulation-enhanced evaluator + evaluator = SimulationEnhancedEvaluator() + + # Define simulation configuration + simulation_config = { + 'scenarios': ['adversarial_attack', 'data_drift'], + 'intensities': [0.3, 0.5, 0.7, 0.9] + } + + # Run evaluation with simulation + results = evaluator.evaluate_with_simulation( + model=my_llm_model, + data=test_data, + simulation_config=simulation_config + ) + + print("=== Trust Simulation Results ===") + print(f"Base Trust Score: {results['overall_trust_score']:.3f}") + print(f"Simulation-Adjusted Trust: {results['simulation_adjusted_trust']:.3f}") + print(f"Overall Robustness: {results['simulation_analysis']['overall_summary']['overall_robustness']:.3f}") + print(f"Deployment Readiness: {results['simulation_analysis']['overall_summary']['deployment_readiness']}") + + # Show scenario summaries + print("\nScenario Summaries:") + for scenario_name, summary in results['simulation_analysis']['scenario_summaries'].items(): + print(f" {scenario_name}:") + print(f" Average Robustness: {summary['average_robustness']:.3f}") + print(f" Max Impact: {summary['max_impact']:.3f}") + + # Show recommendations + if results['simulation_analysis']['recommendations']: + print("\nRecommendations:") + for rec in results['simulation_analysis']['recommendations']: + print(f" - {rec}") + +# Advanced usage with custom scenarios +def custom_scenario_example(): + """Example with custom stress test scenarios""" + + class CustomStressScenario(StressTestScenario): + def apply_stress(self, model, data: Dict[str, Any], intensity: float) -> Dict[str, Any]: + # Custom stress logic + stressed_data = data.copy() + # ... implementation + return stressed_data + + def measure_impact(self, original_results: Dict[str, Any], + stressed_results: Dict[str, Any]) -> Dict[str, Any]: + # Custom impact measurement + return {'custom_impact': 0.5} + + # Register custom scenario + simulation_engine = TrustSimulationEngine() + simulation_engine.register_scenario('custom_stress', CustomStressScenario( + 'custom_stress', 'Custom stress test scenario' + )) + + +# Use in evaluation +# ... implementation diff --git a/high_performance_system/core/trust_timeline.py b/high_performance_system/core/trust_timeline.py new file mode 100644 index 0000000..53814a3 --- /dev/null +++ b/high_performance_system/core/trust_timeline.py @@ -0,0 +1,165 @@ +# src/evolution/trust_timeline.py +""" +Trust Evolution System - Track how trust changes over time, contexts, and model updates +""" + +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +from typing import Dict, Any, List, Optional +import plotly.graph_objects as go +import plotly.express as px + +class TrustEvolutionTracker: + """Tracks trust evolution across multiple dimensions and time periods""" + + def __init__(self): + self.trust_history = pd.DataFrame() + self.evolution_patterns = {} + self.anomaly_detectors = {} + + def track_evaluation(self, model_id: str, evaluation_results: Dict[str, Any], + context: str = "general", timestamp: datetime = None): + """Track a trust evaluation in the timeline""" + if timestamp is None: + timestamp = datetime.now() + + # Extract trust metrics + record = { + 'model_id': model_id, + 'timestamp': timestamp, + 'context': context, + 'overall_trust': evaluation_results.get('overall_trust_score', 0.0), + 'dimensions': evaluation_results.get('dimension_scores', {}), + 'categories': evaluation_results.get('category_scores', {}), + 'metadata': evaluation_results.get('metadata', {}) + } + + # Add to history + self.trust_history = pd.concat([self.trust_history, pd.DataFrame([record])], + ignore_index=True) + + # Update evolution patterns + self._update_evolution_patterns(model_id, record) + + def detect_trust_anomalies(self, model_id: str) -> List[Dict[str, Any]]: + """Detect anomalies in trust evolution""" + model_history = self.trust_history[self.trust_history['model_id'] == model_id] + + anomalies = [] + for dimension in ['overall_trust'] + list(model_history['dimensions'].iloc[0].keys()): + scores = model_history[dimension].values if dimension == 'overall_trust' else \ + [d.get(dimension, 0) for d in model_history['dimensions']] + + # Statistical anomaly detection + mean_score = np.mean(scores) + std_score = np.std(scores) + + # Check for recent significant drops + if len(scores) > 3: + recent_drop = scores[-1] < (mean_score - 2 * std_score) + if recent_drop: + anomalies.append({ + 'type': 'significant_drop', + 'dimension': dimension, + 'current_score': scores[-1], + 'historical_mean': mean_score, + 'severity': 'high' if scores[-1] < (mean_score - 3 * std_score) else 'medium', + 'timestamp': model_history.iloc[-1]['timestamp'] + }) + + return anomalies + + def predict_trust_trajectory(self, model_id: str, days_ahead: int = 30) -> Dict[str, Any]: + """Predict future trust trajectory using time series analysis""" + model_history = self.trust_history[self.trust_history['model_id'] == model_id] + + if len(model_history) < 5: + return {'status': 'insufficient_data', 'prediction': None} + + # Simple linear regression for prediction + timestamps = [(t - model_history.iloc[0]['timestamp']).days + for t in model_history['timestamp']] + overall_scores = model_history['overall_trust'].values + + # Linear regression + if len(set(timestamps)) > 1: # Avoid division by zero + slope = np.polyfit(timestamps, overall_scores, 1)[0] + predicted_score = overall_scores[-1] + (slope * days_ahead) + + return { + 'status': 'success', + 'current_score': overall_scores[-1], + 'predicted_score': max(0, min(1, predicted_score)), # Clamp between 0-1 + 'trend': 'improving' if slope > 0 else 'declining' if slope < 0 else 'stable', + 'confidence': min(1.0, len(timestamps) / 20.0) # Confidence increases with more data + } + + return {'status': 'insufficient_variance', 'prediction': None} + + def generate_evolution_report(self, model_id: str) -> Dict[str, Any]: + """Generate comprehensive trust evolution report""" + model_history = self.trust_history[self.trust_history['model_id'] == model_id] + + if len(model_history) == 0: + return {'status': 'no_data', 'report': None} + + # Calculate evolution metrics + first_score = model_history.iloc[0]['overall_trust'] + last_score = model_history.iloc[-1]['overall_trust'] + score_change = last_score - first_score + + # Dimension evolution + dimension_evolution = {} + if len(model_history) > 1: + first_dims = model_history.iloc[0]['dimensions'] + last_dims = model_history.iloc[-1]['dimensions'] + + for dim in set(first_dims.keys()) | set(last_dims.keys()): + first_val = first_dims.get(dim, 0) + last_val = last_dims.get(dim, 0) + dimension_evolution[dim] = { + 'change': last_val - first_val, + 'percentage_change': ((last_val - first_val) / first_val * 100) if first_val > 0 else 0 + } + + return { + 'status': 'success', + 'model_id': model_id, + 'evaluation_count': len(model_history), + 'time_span': (model_history.iloc[-1]['timestamp'] - model_history.iloc[0]['timestamp']).days, + 'overall_evolution': { + 'initial_score': first_score, + 'current_score': last_score, + 'absolute_change': score_change, + 'percentage_change': (score_change / first_score * 100) if first_score > 0 else 0 + }, + 'dimension_evolution': dimension_evolution, + 'anomalies_detected': self.detect_trust_anomalies(model_id), + 'trajectory_prediction': self.predict_trust_trajectory(model_id) + } + +# Integration with main evaluator +class EvolutionAwareEvaluator: + """Evaluator that tracks trust evolution""" + + def __init__(self): + self.trust_tracker = TrustEvolutionTracker() + # ... other initialization + + def evaluate_with_evolution_tracking(self, model, data, model_id: str, + context: str = "general") -> Dict[str, Any]: + """Execute evaluation with evolution tracking""" + results = self.evaluate_comprehensive_trust(model, data) + + # Track in evolution system + self.trust_tracker.track_evaluation(model_id, results, context) + + # Add evolution insights + results['evolution_insights'] = { + 'anomalies': self.trust_tracker.detect_trust_anomalies(model_id), + 'trajectory': self.trust_tracker.predict_trust_trajectory(model_id), + 'report': self.trust_tracker.generate_evolution_report(model_id) + } + + return results diff --git a/high_performance_system/core/unified_plugin_manager.py b/high_performance_system/core/unified_plugin_manager.py new file mode 100644 index 0000000..ee49156 --- /dev/null +++ b/high_performance_system/core/unified_plugin_manager.py @@ -0,0 +1,188 @@ +"""Unified plugin system for LLM trust evaluation""" + +import sys +import os +from typing import Dict, Any, List, Optional, Callable +from abc import ABC, abstractmethod +import logging + +logger = logging.getLogger(__name__) + +class UnifiedTrustPlugin(ABC): + """Base plugin interface for unified trust evaluation""" + + @property + @abstractmethod + def name(self) -> str: + """Plugin name""" + pass + + @property + @abstractmethod + def category(self) -> str: + """Plugin category (reliability, safety, fairness, etc.)""" + pass + + @property + @abstractmethod + def model_types(self) -> List[str]: + """Supported model types""" + pass + + @abstractmethod + def evaluate(self, model, data: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """Execute evaluation""" + pass + + @abstractmethod + def is_available(self) -> bool: + """Check if plugin dependencies are available""" + pass + +class UnifiedPluginManager: + """Manages plugins across OpenTrustEval and TrustLLM""" + + def __init__(self): + self.plugins: Dict[str, UnifiedTrustPlugin] = {} + self.adapters: Dict[str, Any] = {} + self._initialize_adapters() + + def _initialize_adapters(self): + """Initialize external tool adapters""" + # TrustLLM adapter + try: + from src.integration.trustllm_adapter import TrustLLMAdapter + self.adapters['trustllm'] = TrustLLMAdapter() + logger.info("✓ TrustLLM adapter initialized") + except Exception as e: + logger.warning(f"⚠ TrustLLM adapter not available: {e}") + + # CleanLab adapter + try: + from src.integration.cleanlab_adapter import CleanLabAdapter + self.adapters['cleanlab'] = CleanLabAdapter() + logger.info("✓ CleanLab adapter initialized") + except Exception as e: + logger.warning(f"⚠ CleanLab adapter not available: {e}") + + # DeepChecks adapter + try: + from src.integration.deepchecks_adapter import DeepChecksAdapter + self.adapters['deepchecks'] = DeepChecksAdapter() + logger.info("✓ DeepChecks adapter initialized") + except Exception as e: + logger.warning(f"⚠ DeepChecks adapter not available: {e}") + + def register_plugin(self, plugin: UnifiedTrustPlugin): + """Register a trust evaluation plugin""" + if plugin.is_available(): + self.plugins[plugin.name] = plugin + logger.info(f"✓ Registered plugin: {plugin.name}") + else: + logger.warning(f"⚠ Plugin not available: {plugin.name}") + + def get_compatible_plugins(self, model_type: str) -> List[UnifiedTrustPlugin]: + """Get plugins compatible with specific model type""" + compatible = [] + for plugin in self.plugins.values(): + if model_type in plugin.model_types or 'all' in plugin.model_types: + compatible.append(plugin) + return compatible + + def execute_evaluation(self, model, data: Dict[str, Any], + model_type: str = 'llm', + categories: List[str] = None) -> Dict[str, Any]: + """Execute comprehensive evaluation using compatible plugins""" + compatible_plugins = self.get_compatible_plugins(model_type) + + if categories: + compatible_plugins = [p for p in compatible_plugins + if p.category in categories] + + results = {} + for plugin in compatible_plugins: + try: + plugin_result = plugin.evaluate(model, data) + results[plugin.name] = { + 'success': True, + 'result': plugin_result, + 'plugin_info': { + 'name': plugin.name, + 'category': plugin.category, + 'model_types': plugin.model_types + } + } + except Exception as e: + results[plugin.name] = { + 'success': False, + 'error': str(e), + 'plugin_info': { + 'name': plugin.name, + 'category': plugin.category + } + } + + return self._aggregate_results(results) + + def _aggregate_results(self, plugin_results: Dict[str, Any]) -> Dict[str, Any]: + """Aggregate results from multiple plugins""" + aggregated = { + 'dimension_scores': {}, + 'category_scores': {}, + 'plugin_performance': {}, + 'conflicts': [], + 'overall_score': 0.0 + } + + # Collect dimension scores + for plugin_name, result in plugin_results.items(): + if result.get('success', False): + plugin_data = result.get('result', {}) + if 'score' in plugin_data: + # Single score plugin + aggregated['plugin_performance'][plugin_name] = plugin_data['score'] + elif 'dimension_scores' in plugin_data: + # Multi-dimension plugin + for dim_name, dim_score in plugin_data['dimension_scores'].items(): + if isinstance(dim_score, dict) and 'score' in dim_score: + aggregated['dimension_scores'][dim_name] = dim_score['score'] + else: + aggregated['dimension_scores'][dim_name] = dim_score + + # Calculate category scores + category_scores = {} + for dim_name, score in aggregated['dimension_scores'].items(): + category = self._infer_category(dim_name) + if category not in category_scores: + category_scores[category] = [] + category_scores[category].append(score) + + for category, scores in category_scores.items(): + aggregated['category_scores'][category] = sum(scores) / len(scores) + + # Calculate overall score + if aggregated['dimension_scores']: + aggregated['overall_score'] = sum(aggregated['dimension_scores'].values()) / len(aggregated['dimension_scores']) + elif aggregated['plugin_performance']: + aggregated['overall_score'] = sum(aggregated['plugin_performance'].values()) / len(aggregated['plugin_performance']) + + return aggregated + + def _infer_category(self, dimension_name: str) -> str: + """Infer category from dimension name""" + dimension_name = dimension_name.lower() + if any(word in dimension_name for word in ['truth', 'fact', 'accur']): + return 'reliability' + elif any(word in dimension_name for word in ['safe', 'harm', 'toxic']): + return 'safety' + elif any(word in dimension_name for word in ['fair', 'bias', 'discrim']): + return 'fairness' + elif any(word in dimension_name for word in ['consist', 'stable']): + return 'consistency' + elif any(word in dimension_name for word in ['explain', 'interpret']): + return 'explainability' + else: + return 'general' + +# Global plugin manager instance +plugin_manager = UnifiedPluginManager() diff --git a/launch_workflow_webui.py b/launch_workflow_webui.py new file mode 100644 index 0000000..03efd58 --- /dev/null +++ b/launch_workflow_webui.py @@ -0,0 +1,16 @@ +import subprocess +import sys + +def main(): + """ + Launches the Unified Workflow Web UI. + """ + try: + subprocess.run([sys.executable, "-m", "streamlit", "run", "workflow_webui.py"]) + except FileNotFoundError: + print("Error: 'streamlit' command not found. Please make sure Streamlit is installed.") + except Exception as e: + print(f"An error occurred: {e}") + +if __name__ == "__main__": + main() diff --git a/plugins/llm_reliability_plugin.py b/plugins/llm_reliability_plugin.py new file mode 100644 index 0000000..33ffa58 --- /dev/null +++ b/plugins/llm_reliability_plugin.py @@ -0,0 +1,86 @@ +"""LLM-specific reliability evaluation plugin""" + +from src.core.unified_plugin_manager import UnifiedTrustPlugin +from typing import Dict, Any, List +import numpy as np +import logging + +logger = logging.getLogger(__name__) + +class LLMReliabilityPlugin(UnifiedTrustPlugin): + """LLM-specific reliability evaluation plugin""" + + def __init__(self): + self.name = "llm_reliability" + self.category = "reliability" + self.model_types = ["llm", "language_model", "transformer", "all"] + self.adapter = None + self._initialize_adapter() + + def _initialize_adapter(self): + """Initialize adapter for external tools""" + try: + from src.integration.cleanlab_adapter import CleanLabAdapter + self.adapter = CleanLabAdapter() + except Exception as e: + logger.debug(f"CleanLab adapter not available: {e}") + self.adapter = None + + def is_available(self) -> bool: + """Plugin is always available (fallback methods)""" + return True + + def evaluate(self, model, data: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """Execute LLM reliability evaluation""" + try: + # Extract prompts and generate responses + prompts = data.get('prompts', []) + if not prompts: + return self._basic_reliability_evaluation() + + # Generate responses + responses = [] + for prompt in prompts: + response = self._generate_response(model, prompt) + responses.append(response) + + # Evaluate different aspects of reliability + factual_accuracy = self._evaluate_factual_accuracy(data, responses) + consistency_score = self._evaluate_consistency(model, prompts, responses) + confidence_calibration = self._evaluate_confidence_calibration(model, prompts, responses) + + # Calculate overall reliability score + reliability_components = [ + factual_accuracy, + consistency_score, + confidence_calibration + ] + overall_reliability = sum(reliability_components) / len(reliability_components) + + return { + 'score': float(overall_reliability), + 'dimension_scores': { + 'factual_accuracy': factual_accuracy, + 'consistency': consistency_score, + 'confidence_calibration': confidence_calibration + }, + 'details': { + 'evaluated_prompts': len(prompts), + 'response_consistency': self._analyze_response_patterns(responses) + }, + 'metadata': { + 'evaluator': 'llm_reliability_plugin', + 'timestamp': self._get_timestamp() + } + } + + except Exception as e: + logger.error(f"LLM reliability evaluation failed: {e}") + return self._basic_reliability_evaluation() + + def _generate_response(self, model, prompt: str) -> str: + """Generate response from model""" + try: + if hasattr(model, 'generate'): + return model.generate(prompt) + elif diff --git a/plugins/trustllm_plugin.py b/plugins/trustllm_plugin.py new file mode 100644 index 0000000..3b82eb6 --- /dev/null +++ b/plugins/trustllm_plugin.py @@ -0,0 +1,141 @@ +"""TrustLLM comprehensive trust evaluation plugin""" + +from src.core.unified_plugin_manager import UnifiedTrustPlugin +from typing import Dict, Any, List +import logging + +logger = logging.getLogger(__name__) + +class TrustLLMComprehensivePlugin(UnifiedTrustPlugin): + """Comprehensive TrustLLM evaluation plugin""" + + def __init__(self): + self.name = "trustllm_comprehensive" + self.category = "overall" + self.model_types = ["llm", "language_model", "transformer"] + self.adapter = None + self._initialize_adapter() + + def _initialize_adapter(self): + """Initialize TrustLLM adapter""" + try: + from src.integration.trustllm_adapter import TrustLLMAdapter + self.adapter = TrustLLMAdapter() + except Exception as e: + logger.error(f"Failed to initialize TrustLLM adapter: {e}") + self.adapter = None + + def is_available(self) -> bool: + """Check if plugin is available""" + return self.adapter is not None and self.adapter.is_available() + + def evaluate(self, model, data: Dict[str, Any], **kwargs) -> Dict[str, Any]: + """Execute comprehensive TrustLLM evaluation""" + if not self.is_available(): + return self._fallback_evaluation() + + try: + # Prepare data for TrustLLM + trustllm_data = self._prepare_trustllm_data(data) + + # Execute TrustLLM evaluations + results = {} + + # Truthfulness evaluation + truthfulness_result = self.adapter.evaluate_truthfulness(model, trustllm_data) + results['truthfulness'] = truthfulness_result + + # Safety evaluation + safety_result = self.adapter.evaluate_safety(model, trustllm_data) + results['safety'] = safety_result + + # Hallucination evaluation + hallucination_result = self.adapter.evaluate_hallucination(model, trustllm_data) + results['hallucination'] = hallucination_result + + # Privacy evaluation + privacy_result = self.adapter.evaluate_privacy(model, trustllm_data) + results['privacy'] = privacy_result + + # Toxicity evaluation + toxicity_result = self.adapter.evaluate_toxicity(model, trustllm_data) + results['toxicity'] = toxicity_result + + # Calculate comprehensive score + dimension_scores = {} + for dim_name, dim_result in results.items(): + if isinstance(dim_result, dict) and 'score' in dim_result: + dimension_scores[dim_name] = dim_result['score'] + + comprehensive_score = sum(dimension_scores.values()) / len(dimension_scores) if dimension_scores else 0.5 + + return { + 'score': float(comprehensive_score), + 'dimension_scores': dimension_scores, + 'detailed_results': results, + 'metadata': { + 'evaluator': 'trustllm', + 'timestamp': self._get_timestamp(), + 'dimensions_evaluated': list(dimension_scores.keys()) + } + } + + except Exception as e: + logger.error(f"TrustLLM evaluation failed: {e}") + return self._fallback_evaluation() + + def _prepare_trustllm_data(self, data: Dict[str, Any]) -> Dict[str, Any]: + """Prepare data in TrustLLM format""" + # Convert OpenTrustEval data format to TrustLLM format + trustllm_data = {} + + # Map common fields + if 'prompts' in data: + trustllm_data['prompts'] = data['prompts'] + if 'responses' in data: + trustllm_data['responses'] = data['responses'] + if 'ground_truth' in data: + trustllm_data['ground_truth'] = data['ground_truth'] + if 'contexts' in data: + trustllm_data['contexts'] = data['contexts'] + + return trustllm_data + + def _fallback_evaluation(self) -> Dict[str, Any]: + """Fallback evaluation when TrustLLM unavailable""" + return { + 'score': 0.5, + 'dimension_scores': { + 'truthfulness': 0.5, + 'safety': 0.7, # Conservative safety default + 'hallucination': 0.5, + 'privacy': 0.6, + 'toxicity': 0.7 + }, + 'detailed_results': {}, + 'metadata': { + 'evaluator': 'fallback', + 'warning': 'TrustLLM not available, using default scores' + } + } + + def _get_timestamp(self) -> str: + """Get current timestamp""" + from datetime import datetime + return datetime.now().isoformat() + +# Register the plugin +def register_trustllm_plugin(plugin_manager): + """Register TrustLLM plugin with plugin manager""" + plugin = TrustLLMComprehensivePlugin() + if plugin.is_available(): + plugin_manager.register_plugin(plugin) + logger.info("✓ TrustLLM comprehensive plugin registered") + else: + logger.warning("⚠ TrustLLM comprehensive plugin not available") + +# Auto-registration +try: + register_trustllm_plugin(plugin_manager) +except Exception as e: + logger.error(f"Failed to register TrustLLM plugin: {e}") diff --git a/tests/=0.3.0 b/tests/=0.3.0 new file mode 100644 index 0000000..9aedf66 --- /dev/null +++ b/tests/=0.3.0 @@ -0,0 +1,43 @@ +Collecting streamlit-option-menu + Downloading streamlit_option_menu-0.4.0-py3-none-any.whl.metadata (2.5 kB) +Requirement already satisfied: streamlit>=1.36 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit-option-menu) (1.47.1) +Requirement already satisfied: altair<6,>=4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (5.5.0) +Requirement already satisfied: blinker<2,>=1.5.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (1.9.0) +Requirement already satisfied: cachetools<7,>=4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (5.5.2) +Requirement already satisfied: click<9,>=7.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (8.2.1) +Requirement already satisfied: numpy<3,>=1.23 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (1.26.4) +Requirement already satisfied: packaging<26,>=20 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (25.0) +Requirement already satisfied: pandas<3,>=1.4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (2.2.2) +Requirement already satisfied: pillow<12,>=7.1.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (11.3.0) +Requirement already satisfied: protobuf<7,>=3.20 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (4.25.8) +Requirement already satisfied: pyarrow>=7.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (21.0.0) +Requirement already satisfied: requests<3,>=2.27 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (2.31.0) +Requirement already satisfied: tenacity<10,>=8.1.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (9.1.2) +Requirement already satisfied: toml<2,>=0.10.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (0.10.2) +Requirement already satisfied: typing-extensions<5,>=4.4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (4.14.1) +Requirement already satisfied: watchdog<7,>=2.1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (6.0.0) +Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (3.1.45) +Requirement already satisfied: pydeck<1,>=0.8.0b4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (0.9.1) +Requirement already satisfied: tornado!=6.5.0,<7,>=6.0.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from streamlit>=1.36->streamlit-option-menu) (6.5.1) +Requirement already satisfied: jinja2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (3.1.6) +Requirement already satisfied: jsonschema>=3.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (4.25.0) +Requirement already satisfied: narwhals>=1.14.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (1.48.1) +Requirement already satisfied: gitdb<5,>=4.0.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.36->streamlit-option-menu) (4.0.12) +Requirement already satisfied: smmap<6,>=3.0.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit>=1.36->streamlit-option-menu) (5.0.2) +Requirement already satisfied: python-dateutil>=2.8.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2.9.0.post0) +Requirement already satisfied: pytz>=2020.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2025.2) +Requirement already satisfied: tzdata>=2022.7 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (2025.2) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3,>=2.27->streamlit>=1.36->streamlit-option-menu) (2025.7.14) +Requirement already satisfied: MarkupSafe>=2.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jinja2->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (3.0.2) +Requirement already satisfied: attrs>=22.2.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (25.3.0) +Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (2025.4.1) +Requirement already satisfied: referencing>=0.28.4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (0.36.2) +Requirement already satisfied: rpds-py>=0.7.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit>=1.36->streamlit-option-menu) (0.26.0) +Requirement already satisfied: six>=1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas<3,>=1.4.0->streamlit>=1.36->streamlit-option-menu) (1.17.0) +Downloading streamlit_option_menu-0.4.0-py3-none-any.whl (829 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 829.3/829.3 kB 22.1 MB/s eta 0:00:00 +Installing collected packages: streamlit-option-menu +Successfully installed streamlit-option-menu-0.4.0 diff --git a/tests/=0.4.27 b/tests/=0.4.27 new file mode 100644 index 0000000..1102861 --- /dev/null +++ b/tests/=0.4.27 @@ -0,0 +1 @@ +Requirement already satisfied: python-magic in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (0.4.27) diff --git a/tests/=1.1.0 b/tests/=1.1.0 new file mode 100644 index 0000000..55de4d4 --- /dev/null +++ b/tests/=1.1.0 @@ -0,0 +1,12 @@ +Collecting pyreadstat + Downloading pyreadstat-1.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB) +Requirement already satisfied: pandas>=1.2.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pyreadstat) (2.2.2) +Requirement already satisfied: numpy>=1.26.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (1.26.4) +Requirement already satisfied: python-dateutil>=2.8.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (2.9.0.post0) +Requirement already satisfied: pytz>=2020.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (2025.2) +Requirement already satisfied: tzdata>=2022.7 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pandas>=1.2.0->pyreadstat) (2025.2) +Requirement already satisfied: six>=1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas>=1.2.0->pyreadstat) (1.17.0) +Downloading pyreadstat-1.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (661 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 661.2/661.2 kB 18.6 MB/s eta 0:00:00 +Installing collected packages: pyreadstat +Successfully installed pyreadstat-1.3.0 diff --git a/tests/=1.26.0 b/tests/=1.26.0 new file mode 100644 index 0000000..0670781 --- /dev/null +++ b/tests/=1.26.0 @@ -0,0 +1,19 @@ +Collecting boto3 + Downloading boto3-1.39.13-py3-none-any.whl.metadata (6.7 kB) +Collecting botocore<1.40.0,>=1.39.13 (from boto3) + Downloading botocore-1.39.13-py3-none-any.whl.metadata (5.7 kB) +Collecting jmespath<2.0.0,>=0.7.1 (from boto3) + Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB) +Collecting s3transfer<0.14.0,>=0.13.0 (from boto3) + Downloading s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB) +Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from botocore<1.40.0,>=1.39.13->boto3) (2.9.0.post0) +Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from botocore<1.40.0,>=1.39.13->boto3) (2.5.0) +Requirement already satisfied: six>=1.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.40.0,>=1.39.13->boto3) (1.17.0) +Downloading boto3-1.39.13-py3-none-any.whl (139 kB) +Downloading botocore-1.39.13-py3-none-any.whl (13.9 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.9/13.9 MB 43.6 MB/s eta 0:00:00 +Downloading jmespath-1.0.1-py3-none-any.whl (20 kB) +Downloading s3transfer-0.13.1-py3-none-any.whl (85 kB) +Installing collected packages: jmespath, botocore, s3transfer, boto3 + +Successfully installed boto3-1.39.13 botocore-1.39.13 jmespath-1.0.1 s3transfer-0.13.1 diff --git a/tests/=1.3.1 b/tests/=1.3.1 new file mode 100644 index 0000000..47f8c67 --- /dev/null +++ b/tests/=1.3.1 @@ -0,0 +1,45 @@ +Collecting pydrive + Downloading PyDrive-1.3.1.tar.gz (987 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 987.4/987.4 kB 26.0 MB/s eta 0:00:00 + Preparing metadata (setup.py): started + Preparing metadata (setup.py): finished with status 'done' +Collecting google-api-python-client>=1.2 (from pydrive) + Downloading google_api_python_client-2.177.0-py3-none-any.whl.metadata (7.0 kB) +Collecting oauth2client>=4.0.0 (from pydrive) + Downloading oauth2client-4.1.3-py2.py3-none-any.whl.metadata (1.2 kB) +Requirement already satisfied: PyYAML>=3.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from pydrive) (6.0.1) +Collecting httplib2<1.0.0,>=0.19.0 (from google-api-python-client>=1.2->pydrive) + Downloading httplib2-0.22.0-py3-none-any.whl.metadata (2.6 kB) +Requirement already satisfied: google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-python-client>=1.2->pydrive) (2.40.3) +Collecting google-auth-httplib2<1.0.0,>=0.2.0 (from google-api-python-client>=1.2->pydrive) + Downloading google_auth_httplib2-0.2.0-py2.py3-none-any.whl.metadata (2.2 kB) +Requirement already satisfied: google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-python-client>=1.2->pydrive) (2.25.1) +Requirement already satisfied: uritemplate<5,>=3.0.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-python-client>=1.2->pydrive) (4.2.0) +Requirement already satisfied: googleapis-common-protos<2.0.0,>=1.56.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (1.70.0) +Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.19.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (4.25.8) +Requirement already satisfied: proto-plus<2.0.0,>=1.22.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (1.26.1) +Requirement already satisfied: requests<3.0.0,>=2.18.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (2.31.0) +Requirement already satisfied: cachetools<6.0,>=2.0.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (5.5.2) +Requirement already satisfied: pyasn1-modules>=0.2.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (0.4.2) +Requirement already satisfied: rsa<5,>=3.1.4 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (4.9.1) +Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from httplib2<1.0.0,>=0.19.0->google-api-python-client>=1.2->pydrive) (3.2.3) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.18.0->google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0,>=1.31.5->google-api-python-client>=1.2->pydrive) (2025.7.14) +Requirement already satisfied: pyasn1>=0.1.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth!=2.24.0,!=2.25.0,<3.0.0,>=1.32.0->google-api-python-client>=1.2->pydrive) (0.6.1) +Requirement already satisfied: six>=1.6.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from oauth2client>=4.0.0->pydrive) (1.17.0) +Downloading google_api_python_client-2.177.0-py3-none-any.whl (13.7 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.7/13.7 MB 92.1 MB/s eta 0:00:00 +Downloading google_auth_httplib2-0.2.0-py2.py3-none-any.whl (9.3 kB) +Downloading httplib2-0.22.0-py3-none-any.whl (96 kB) +Downloading oauth2client-4.1.3-py2.py3-none-any.whl (98 kB) +Building wheels for collected packages: pydrive + Building wheel for pydrive (setup.py): started + Building wheel for pydrive (setup.py): finished with status 'done' + Created wheel for pydrive: filename=pydrive-1.3.1-py3-none-any.whl size=27539 sha256=0837f0594e858cf75dd820443de7c32c35f56eb6d5136425711b8aed656701ea + Stored in directory: /home/jules/.cache/pip/wheels/6c/10/da/a5b513f5b3916fc391c20ee7b4633e5cf3396d570cdd74970f +Successfully built pydrive +Installing collected packages: httplib2, oauth2client, google-auth-httplib2, google-api-python-client, pydrive + +Successfully installed google-api-python-client-2.177.0 google-auth-httplib2-0.2.0 httplib2-0.22.0 oauth2client-4.1.3 pydrive-1.3.1 diff --git a/tests/=2.0.1 b/tests/=2.0.1 new file mode 100644 index 0000000..478e234 --- /dev/null +++ b/tests/=2.0.1 @@ -0,0 +1 @@ +Requirement already satisfied: xlrd in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (2.0.2) diff --git a/tests/=2.8.0 b/tests/=2.8.0 new file mode 100644 index 0000000..23c089b --- /dev/null +++ b/tests/=2.8.0 @@ -0,0 +1,45 @@ +Collecting google-cloud-storage + Downloading google_cloud_storage-3.2.0-py3-none-any.whl.metadata (13 kB) +Collecting google-auth<3.0.0,>=2.26.1 (from google-cloud-storage) + Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB) +Collecting google-api-core<3.0.0,>=2.15.0 (from google-cloud-storage) + Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB) +Collecting google-cloud-core<3.0.0,>=2.4.2 (from google-cloud-storage) + Downloading google_cloud_core-2.4.3-py2.py3-none-any.whl.metadata (2.7 kB) +Collecting google-resumable-media<3.0.0,>=2.7.2 (from google-cloud-storage) + Downloading google_resumable_media-2.7.2-py2.py3-none-any.whl.metadata (2.2 kB) +Requirement already satisfied: requests<3.0.0,>=2.22.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-cloud-storage) (2.31.0) +Collecting google-crc32c<2.0.0,>=1.1.3 (from google-cloud-storage) + Downloading google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB) +Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=2.15.0->google-cloud-storage) + Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB) +Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.19.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core<3.0.0,>=2.15.0->google-cloud-storage) (4.25.8) +Requirement already satisfied: proto-plus<2.0.0,>=1.22.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from google-api-core<3.0.0,>=2.15.0->google-cloud-storage) (1.26.1) +Collecting cachetools<6.0,>=2.0.0 (from google-auth<3.0.0,>=2.26.1->google-cloud-storage) + Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB) +Collecting pyasn1-modules>=0.2.1 (from google-auth<3.0.0,>=2.26.1->google-cloud-storage) + Downloading pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB) +Collecting rsa<5,>=3.1.4 (from google-auth<3.0.0,>=2.26.1->google-cloud-storage) + Downloading rsa-4.9.1-py3-none-any.whl.metadata (5.6 kB) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests<3.0.0,>=2.22.0->google-cloud-storage) (2025.7.14) +Requirement already satisfied: pyasn1>=0.1.3 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from rsa<5,>=3.1.4->google-auth<3.0.0,>=2.26.1->google-cloud-storage) (0.6.1) +Downloading google_cloud_storage-3.2.0-py3-none-any.whl (176 kB) +Downloading google_api_core-2.25.1-py3-none-any.whl (160 kB) +Downloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB) +Downloading cachetools-5.5.2-py3-none-any.whl (10 kB) +Downloading google_cloud_core-2.4.3-py2.py3-none-any.whl (29 kB) +Downloading google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32 kB) +Downloading google_resumable_media-2.7.2-py2.py3-none-any.whl (81 kB) +Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB) +Downloading rsa-4.9.1-py3-none-any.whl (34 kB) +Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB) +Installing collected packages: rsa, pyasn1-modules, googleapis-common-protos, google-crc32c, cachetools, google-resumable-media, google-auth, google-api-core, google-cloud-core, google-cloud-storage + Attempting uninstall: cachetools + Found existing installation: cachetools 6.1.0 + Uninstalling cachetools-6.1.0: + Successfully uninstalled cachetools-6.1.0 + +Successfully installed cachetools-5.5.2 google-api-core-2.25.1 google-auth-2.40.3 google-cloud-core-2.4.3 google-cloud-storage-3.2.0 google-crc32c-1.7.1 google-resumable-media-2.7.2 googleapis-common-protos-1.70.0 pyasn1-modules-0.4.2 rsa-4.9.1 diff --git a/tests/=3.0.10 b/tests/=3.0.10 new file mode 100644 index 0000000..1b5b713 --- /dev/null +++ b/tests/=3.0.10 @@ -0,0 +1,9 @@ +Collecting openpyxl + Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB) +Collecting et-xmlfile (from openpyxl) + Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB) +Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB) +Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB) +Installing collected packages: et-xmlfile, openpyxl + +Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5 diff --git a/tests/=3.7.0 b/tests/=3.7.0 new file mode 100644 index 0000000..96f4768 --- /dev/null +++ b/tests/=3.7.0 @@ -0,0 +1,24 @@ +Collecting tables + Downloading tables-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB) +Requirement already satisfied: numpy>=1.20.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (1.26.4) +Requirement already satisfied: numexpr>=2.6.2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (2.11.0) +Requirement already satisfied: packaging in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (25.0) +Requirement already satisfied: py-cpuinfo in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (9.0.0) +Collecting blosc2>=2.3.0 (from tables) + Downloading blosc2-3.6.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB) +Requirement already satisfied: typing-extensions>=4.4.0 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from tables) (4.14.1) +Requirement already satisfied: ndindex in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (1.10.0) +Requirement already satisfied: msgpack in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (1.1.1) +Requirement already satisfied: platformdirs in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (4.3.8) +Requirement already satisfied: requests in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from blosc2>=2.3.0->tables) (2.31.0) +Requirement already satisfied: charset-normalizer<4,>=2 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (3.4.2) +Requirement already satisfied: idna<4,>=2.5 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (3.10) +Requirement already satisfied: urllib3<3,>=1.21.1 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (2.5.0) +Requirement already satisfied: certifi>=2017.4.17 in /home/jules/.pyenv/versions/3.12.11/lib/python3.12/site-packages (from requests->blosc2>=2.3.0->tables) (2025.7.14) +Downloading tables-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.5 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.5/7.5 MB 88.3 MB/s eta 0:00:00 +Downloading blosc2-3.6.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (4.4 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.4/4.4 MB 95.1 MB/s eta 0:00:00 +Installing collected packages: blosc2, tables + +Successfully installed blosc2-3.6.1 tables-3.10.2 diff --git a/tests/=4.9.0 b/tests/=4.9.0 new file mode 100644 index 0000000..6e0c295 --- /dev/null +++ b/tests/=4.9.0 @@ -0,0 +1,6 @@ +Collecting lxml + Downloading lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.6 kB) +Downloading lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (5.3 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 5.3/5.3 MB 29.9 MB/s eta 0:00:00 +Installing collected packages: lxml +Successfully installed lxml-6.0.0 diff --git a/Custom_Cleanlab_Pipeline_Demo.ipynb b/tests/Custom_Cleanlab_Pipeline_Demo.ipynb similarity index 100% rename from Custom_Cleanlab_Pipeline_Demo.ipynb rename to tests/Custom_Cleanlab_Pipeline_Demo.ipynb diff --git a/cleanlab_plugin_test_output.txt b/tests/cleanlab_plugin_test_output.txt similarity index 100% rename from cleanlab_plugin_test_output.txt rename to tests/cleanlab_plugin_test_output.txt diff --git a/custom_cleanlab_pipeline.py b/tests/custom_cleanlab_pipeline.py similarity index 100% rename from custom_cleanlab_pipeline.py rename to tests/custom_cleanlab_pipeline.py diff --git a/custom_cleanlab_webui.py b/tests/custom_cleanlab_webui.py similarity index 100% rename from custom_cleanlab_webui.py rename to tests/custom_cleanlab_webui.py diff --git a/real_world_trust_scoring_report_20250723_224607.json b/tests/real_world_trust_scoring_report_20250723_224607.json similarity index 100% rename from real_world_trust_scoring_report_20250723_224607.json rename to tests/real_world_trust_scoring_report_20250723_224607.json diff --git a/real_world_trust_scoring_report_20250723_225126.json b/tests/real_world_trust_scoring_report_20250723_225126.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225126.json rename to tests/real_world_trust_scoring_report_20250723_225126.json diff --git a/real_world_trust_scoring_report_20250723_225309.json b/tests/real_world_trust_scoring_report_20250723_225309.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225309.json rename to tests/real_world_trust_scoring_report_20250723_225309.json diff --git a/real_world_trust_scoring_report_20250723_225418.json b/tests/real_world_trust_scoring_report_20250723_225418.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225418.json rename to tests/real_world_trust_scoring_report_20250723_225418.json diff --git a/real_world_trust_scoring_report_20250723_225548.json b/tests/real_world_trust_scoring_report_20250723_225548.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225548.json rename to tests/real_world_trust_scoring_report_20250723_225548.json diff --git a/real_world_trust_scoring_report_20250723_225707.json b/tests/real_world_trust_scoring_report_20250723_225707.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225707.json rename to tests/real_world_trust_scoring_report_20250723_225707.json diff --git a/real_world_trust_scoring_report_20250723_225817.json b/tests/real_world_trust_scoring_report_20250723_225817.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225817.json rename to tests/real_world_trust_scoring_report_20250723_225817.json diff --git a/real_world_trust_scoring_report_20250723_225831.json b/tests/real_world_trust_scoring_report_20250723_225831.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225831.json rename to tests/real_world_trust_scoring_report_20250723_225831.json diff --git a/real_world_trust_scoring_report_20250723_225943.json b/tests/real_world_trust_scoring_report_20250723_225943.json similarity index 100% rename from real_world_trust_scoring_report_20250723_225943.json rename to tests/real_world_trust_scoring_report_20250723_225943.json diff --git a/test_high_performance_system.py b/tests/test_high_performance_system.py similarity index 100% rename from test_high_performance_system.py rename to tests/test_high_performance_system.py diff --git a/test_moe_integration.py b/tests/test_moe_integration.py similarity index 100% rename from test_moe_integration.py rename to tests/test_moe_integration.py diff --git a/test_operation_sindoor_realtime.py b/tests/test_operation_sindoor_realtime.py similarity index 100% rename from test_operation_sindoor_realtime.py rename to tests/test_operation_sindoor_realtime.py diff --git a/tests/workflow_diagnostic_report_20250726_154015.json b/tests/workflow_diagnostic_report_20250726_154015.json new file mode 100644 index 0000000..e13e579 --- /dev/null +++ b/tests/workflow_diagnostic_report_20250726_154015.json @@ -0,0 +1,265 @@ +{ + "timestamp": "2025-07-26T15:40:15.501428", + "summary": { + "total_checks": 13, + "passed": 11, + "failed": 0, + "warnings": 2, + "skipped": 0 + }, + "results": [ + { + "component": "System Environment", + "status": "PASS", + "message": "System environment is ready", + "duration": 2.6867048740386963, + "timestamp": "2025-07-26T15:40:14.096871", + "details": { + "python_version": "3.12.11", + "fastapi_available": true, + "uvicorn_available": true, + "streamlit_available": true, + "pandas_available": true, + "numpy_available": true, + "plotly_available": true, + "requests_available": true, + "asyncio_available": true, + "logging_available": true, + "json_available": true, + "memory_available": "6.99 GB", + "cpu_count": 4, + "disk_free": "2.57 GB", + "working_directory": "/app", + "project_root": "/app" + } + }, + { + "component": "Data Uploads", + "status": "PASS", + "message": "Data uploads system is functional", + "duration": 0.0013608932495117188, + "timestamp": "2025-07-26T15:40:14.098495", + "details": { + "uploads_dir_exists": true, + "write_permissions": true, + "dataset_connector_available": false, + "dataset_connector_error": "No module named 'data_engineering'" + } + }, + { + "component": "Data Engineering", + "status": "PASS", + "message": "Data engineering components are functional", + "duration": 0.012610912322998047, + "timestamp": "2025-07-26T15:40:14.111292", + "details": { + "data_engineering.advanced_trust_scoring_available": false, + "data_engineering.advanced_trust_scoring_error": "No module named 'data_engineering'", + "data_engineering.cleanlab_integration_available": false, + "data_engineering.cleanlab_integration_error": "No module named 'data_engineering'", + "data_engineering.trust_scoring_dashboard_available": false, + "data_engineering.trust_scoring_dashboard_error": "No module named 'data_engineering'", + "data_engineering.data_lifecycle_available": false, + "data_engineering.data_lifecycle_error": "No module named 'data_engineering'", + "trust_scoring_test": "FAIL", + "trust_scoring_error": "No module named 'data_engineering'", + "database_connectivity": true + } + }, + { + "component": "LLM Engineering", + "status": "PASS", + "message": "LLM engineering components are functional", + "duration": 0.000698089599609375, + "timestamp": "2025-07-26T15:40:14.112314", + "details": { + "llm_engineering.llm_lifecycle_available": false, + "llm_engineering.llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_engineering.providers.base_provider_available": false, + "llm_engineering.providers.base_provider_error": "No module named 'llm_engineering'", + "llm_engineering.providers.llama_factory_provider_available": false, + "llm_engineering.providers.llama_factory_provider_error": "No module named 'llm_engineering'", + "llm_lifecycle_manager": "FAIL", + "llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_config_exists": true + } + }, + { + "component": "High Performance System", + "status": "PASS", + "message": "High performance system components are functional", + "duration": 0.0009016990661621094, + "timestamp": "2025-07-26T15:40:14.113425", + "details": { + "high_performance_system.core.ultimate_moe_system_available": false, + "high_performance_system.core.ultimate_moe_system_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_expert_ensemble_available": false, + "high_performance_system.core.advanced_expert_ensemble_error": "No module named 'high_performance_system'", + "high_performance_system.core.intelligent_domain_router_available": false, + "high_performance_system.core.intelligent_domain_router_error": "No module named 'high_performance_system'", + "high_performance_system.core.enhanced_dataset_profiler_available": false, + "high_performance_system.core.enhanced_dataset_profiler_error": "No module named 'high_performance_system'", + "high_performance_system.core.comprehensive_pii_detector_available": false, + "high_performance_system.core.comprehensive_pii_detector_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_trust_scorer_available": false, + "high_performance_system.core.advanced_trust_scorer_error": "No module named 'high_performance_system'", + "moe_system_test": "FAIL", + "moe_system_error": "No module named 'high_performance_system'", + "expert_ensemble": "FAIL", + "expert_ensemble_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Security System", + "status": "PASS", + "message": "Security system components are functional", + "duration": 0.0003681182861328125, + "timestamp": "2025-07-26T15:40:14.113942", + "details": { + "security.auth_manager_available": false, + "security.auth_manager_error": "No module named 'security'", + "security.secrets_manager_available": false, + "security.secrets_manager_error": "No module named 'security'", + "security.security_monitor_available": false, + "security.security_monitor_error": "No module named 'security'", + "security_webui": "FAIL", + "security_webui_error": "No module named 'security'" + } + }, + { + "component": "MCP Server", + "status": "WARNING", + "message": "MCP server check completed", + "duration": 0.006127595901489258, + "timestamp": "2025-07-26T15:40:14.120204", + "details": { + "mcp_server/server.py_exists": false, + "mcp_server/client.py_exists": true, + "mcp_server/config.py_exists": true, + "mcp_server_running": false, + "mcp_server_error": "Server not responding" + } + }, + { + "component": "Cloud APIs", + "status": "PASS", + "message": "Cloud APIs check completed", + "duration": 0.202164888381958, + "timestamp": "2025-07-26T15:40:14.322602", + "details": { + "cloudscale_apis/docs/cloud_provider_integration.md_exists": true, + "cloudscale_apis/endpoints/_exists": true, + "cloudscale_apis/webhooks/_exists": true, + "aws_sdk_available": true, + "azure_sdk_available": false, + "gcp_sdk_available": true + } + }, + { + "component": "Third-party Integrations", + "status": "PASS", + "message": "Third-party integrations check completed", + "duration": 0.0006895065307617188, + "timestamp": "2025-07-26T15:40:14.323546", + "details": { + "thirdparty_integrations.endpoints.verify_realtime_available": false, + "thirdparty_integrations.endpoints.verify_realtime_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.endpoints.verify_batch_available": false, + "thirdparty_integrations.endpoints.verify_batch_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.webhooks.verify_webhook_available": false, + "thirdparty_integrations.webhooks.verify_webhook_error": "No module named 'thirdparty_integrations'" + } + }, + { + "component": "Tests", + "status": "WARNING", + "message": "Tests check completed", + "duration": 1.0560717582702637, + "timestamp": "2025-07-26T15:40:15.379817", + "details": { + "test_high_performance_system.py_exists": true, + "simple_unit_test.py_exists": false, + "tests/test_advanced_pipeline.py_exists": true, + "simple_test_run": "FAIL", + "test_error": "ERROR: file or directory not found: simple_unit_test.py\n\n" + } + }, + { + "component": "Plugins", + "status": "PASS", + "message": "Plugins check completed", + "duration": 0.0003604888916015625, + "timestamp": "2025-07-26T15:40:15.380444", + "details": { + "plugin_loader": "FAIL", + "plugin_loader_error": "No module named 'plugins'", + "plugins/example_plugin.py_exists": true, + "plugins/hallucination_detector.py_exists": true, + "plugins/eu_gdpr_embed.py_exists": true + } + }, + { + "component": "Analytics & Dashboards", + "status": "PASS", + "message": "Analytics and dashboards check completed", + "duration": 0.0002505779266357422, + "timestamp": "2025-07-26T15:40:15.380855", + "details": { + "high_performance_system/analytics/ultimate_analytics_dashboard.py_exists": true, + "high_performance_system/analytics/sme_dashboard.py_exists": true, + "data_engineering/trust_scoring_dashboard.py_exists": true, + "operation_sindoor_dashboard.py_exists": false, + "ultimate_dashboard": "FAIL", + "ultimate_dashboard_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Production Server", + "status": "PASS", + "message": "Production server check completed", + "duration": 0.11996245384216309, + "timestamp": "2025-07-26T15:40:15.501130", + "details": { + "superfast_production_server.py_exists": true, + "ote_api.py_exists": false, + "production_server_running": true, + "production_server_health": { + "status": "healthy", + "timestamp": "2025-07-26T15:40:15.458071", + "uptime": 48819.97430515289, + "cache_stats": { + "hits": 0, + "misses": 0 + }, + "system_ready": true + }, + "performance_endpoint": "PASS", + "performance_data": { + "performance_stats": { + "total_requests": 0, + "total_latency": 0.0, + "avg_latency": 0.0, + "cache_hit_rate": 0.0, + "error_rate": 0.0 + }, + "cache_stats": { + "hits": 0, + "misses": 0 + }, + "cache_size": 0, + "timestamp": "2025-07-26T15:40:15.497127" + } + } + } + ], + "recommendations": [ + { + "priority": "MEDIUM", + "action": "Address warnings", + "components": [ + "MCP Server", + "Tests" + ] + } + ] +} \ No newline at end of file diff --git a/workflow_diagnostic_report_20250726_020600.json b/workflow_diagnostic_report_20250726_020600.json new file mode 100644 index 0000000..4c91acf --- /dev/null +++ b/workflow_diagnostic_report_20250726_020600.json @@ -0,0 +1,242 @@ +{ + "timestamp": "2025-07-26T02:06:00.806086", + "summary": { + "total_checks": 13, + "passed": 10, + "failed": 0, + "warnings": 3, + "skipped": 0 + }, + "results": [ + { + "component": "System Environment", + "status": "PASS", + "message": "System environment is ready", + "duration": 1.7772915363311768, + "timestamp": "2025-07-26T02:05:59.214582", + "details": { + "python_version": "3.12.11", + "fastapi_available": true, + "uvicorn_available": true, + "streamlit_available": true, + "pandas_available": true, + "numpy_available": true, + "plotly_available": true, + "requests_available": true, + "asyncio_available": true, + "logging_available": true, + "json_available": true, + "memory_available": "7.16 GB", + "cpu_count": 4, + "disk_free": "2.58 GB", + "working_directory": "/app", + "project_root": "/app" + } + }, + { + "component": "Data Uploads", + "status": "PASS", + "message": "Data uploads system is functional", + "duration": 0.014145374298095703, + "timestamp": "2025-07-26T02:05:59.229098", + "details": { + "uploads_dir_exists": true, + "write_permissions": true, + "dataset_connector_available": false, + "dataset_connector_error": "No module named 'data_engineering'" + } + }, + { + "component": "Data Engineering", + "status": "PASS", + "message": "Data engineering components are functional", + "duration": 0.12368917465209961, + "timestamp": "2025-07-26T02:05:59.353004", + "details": { + "data_engineering.advanced_trust_scoring_available": false, + "data_engineering.advanced_trust_scoring_error": "No module named 'data_engineering'", + "data_engineering.cleanlab_integration_available": false, + "data_engineering.cleanlab_integration_error": "No module named 'data_engineering'", + "data_engineering.trust_scoring_dashboard_available": false, + "data_engineering.trust_scoring_dashboard_error": "No module named 'data_engineering'", + "data_engineering.data_lifecycle_available": false, + "data_engineering.data_lifecycle_error": "No module named 'data_engineering'", + "trust_scoring_test": "FAIL", + "trust_scoring_error": "No module named 'data_engineering'", + "database_connectivity": true + } + }, + { + "component": "LLM Engineering", + "status": "PASS", + "message": "LLM engineering components are functional", + "duration": 0.0006618499755859375, + "timestamp": "2025-07-26T02:05:59.353965", + "details": { + "llm_engineering.llm_lifecycle_available": false, + "llm_engineering.llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_engineering.providers.base_provider_available": false, + "llm_engineering.providers.base_provider_error": "No module named 'llm_engineering'", + "llm_engineering.providers.llama_factory_provider_available": false, + "llm_engineering.providers.llama_factory_provider_error": "No module named 'llm_engineering'", + "llm_lifecycle_manager": "FAIL", + "llm_lifecycle_error": "No module named 'llm_engineering'", + "llm_config_exists": true + } + }, + { + "component": "High Performance System", + "status": "PASS", + "message": "High performance system components are functional", + "duration": 0.0008711814880371094, + "timestamp": "2025-07-26T02:05:59.355021", + "details": { + "high_performance_system.core.ultimate_moe_system_available": false, + "high_performance_system.core.ultimate_moe_system_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_expert_ensemble_available": false, + "high_performance_system.core.advanced_expert_ensemble_error": "No module named 'high_performance_system'", + "high_performance_system.core.intelligent_domain_router_available": false, + "high_performance_system.core.intelligent_domain_router_error": "No module named 'high_performance_system'", + "high_performance_system.core.enhanced_dataset_profiler_available": false, + "high_performance_system.core.enhanced_dataset_profiler_error": "No module named 'high_performance_system'", + "high_performance_system.core.comprehensive_pii_detector_available": false, + "high_performance_system.core.comprehensive_pii_detector_error": "No module named 'high_performance_system'", + "high_performance_system.core.advanced_trust_scorer_available": false, + "high_performance_system.core.advanced_trust_scorer_error": "No module named 'high_performance_system'", + "moe_system_test": "FAIL", + "moe_system_error": "No module named 'high_performance_system'", + "expert_ensemble": "FAIL", + "expert_ensemble_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Security System", + "status": "PASS", + "message": "Security system components are functional", + "duration": 0.00035881996154785156, + "timestamp": "2025-07-26T02:05:59.355529", + "details": { + "security.auth_manager_available": false, + "security.auth_manager_error": "No module named 'security'", + "security.secrets_manager_available": false, + "security.secrets_manager_error": "No module named 'security'", + "security.security_monitor_available": false, + "security.security_monitor_error": "No module named 'security'", + "security_webui": "FAIL", + "security_webui_error": "No module named 'security'" + } + }, + { + "component": "MCP Server", + "status": "WARNING", + "message": "MCP server check completed", + "duration": 0.007213115692138672, + "timestamp": "2025-07-26T02:05:59.362933", + "details": { + "mcp_server/server.py_exists": false, + "mcp_server/client.py_exists": true, + "mcp_server/config.py_exists": true, + "mcp_server_running": false, + "mcp_server_error": "Server not responding" + } + }, + { + "component": "Cloud APIs", + "status": "PASS", + "message": "Cloud APIs check completed", + "duration": 0.23957180976867676, + "timestamp": "2025-07-26T02:05:59.602769", + "details": { + "cloudscale_apis/docs/cloud_provider_integration.md_exists": true, + "cloudscale_apis/endpoints/_exists": true, + "cloudscale_apis/webhooks/_exists": true, + "aws_sdk_available": true, + "azure_sdk_available": false, + "gcp_sdk_available": true + } + }, + { + "component": "Third-party Integrations", + "status": "PASS", + "message": "Third-party integrations check completed", + "duration": 0.0004413127899169922, + "timestamp": "2025-07-26T02:05:59.603562", + "details": { + "thirdparty_integrations.endpoints.verify_realtime_available": false, + "thirdparty_integrations.endpoints.verify_realtime_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.endpoints.verify_batch_available": false, + "thirdparty_integrations.endpoints.verify_batch_error": "No module named 'thirdparty_integrations'", + "thirdparty_integrations.webhooks.verify_webhook_available": false, + "thirdparty_integrations.webhooks.verify_webhook_error": "No module named 'thirdparty_integrations'" + } + }, + { + "component": "Tests", + "status": "WARNING", + "message": "Tests check completed", + "duration": 1.1941814422607422, + "timestamp": "2025-07-26T02:06:00.797914", + "details": { + "test_high_performance_system.py_exists": true, + "simple_unit_test.py_exists": false, + "tests/test_advanced_pipeline.py_exists": true, + "simple_test_run": "FAIL", + "test_error": "ERROR: file or directory not found: simple_unit_test.py\n\n" + } + }, + { + "component": "Plugins", + "status": "PASS", + "message": "Plugins check completed", + "duration": 0.0003821849822998047, + "timestamp": "2025-07-26T02:06:00.798560", + "details": { + "plugin_loader": "FAIL", + "plugin_loader_error": "No module named 'plugins'", + "plugins/example_plugin.py_exists": true, + "plugins/hallucination_detector.py_exists": true, + "plugins/eu_gdpr_embed.py_exists": true + } + }, + { + "component": "Analytics & Dashboards", + "status": "PASS", + "message": "Analytics and dashboards check completed", + "duration": 0.0002613067626953125, + "timestamp": "2025-07-26T02:06:00.798988", + "details": { + "high_performance_system/analytics/ultimate_analytics_dashboard.py_exists": true, + "high_performance_system/analytics/sme_dashboard.py_exists": true, + "data_engineering/trust_scoring_dashboard.py_exists": true, + "operation_sindoor_dashboard.py_exists": false, + "ultimate_dashboard": "FAIL", + "ultimate_dashboard_error": "No module named 'high_performance_system'" + } + }, + { + "component": "Production Server", + "status": "WARNING", + "message": "Production server check completed", + "duration": 0.006836891174316406, + "timestamp": "2025-07-26T02:06:00.805957", + "details": { + "superfast_production_server.py_exists": true, + "ote_api.py_exists": false, + "production_server_running": false, + "production_server_error": "Server not responding", + "performance_endpoint": "FAIL" + } + } + ], + "recommendations": [ + { + "priority": "MEDIUM", + "action": "Address warnings", + "components": [ + "MCP Server", + "Tests", + "Production Server" + ] + } + ] +} \ No newline at end of file