Skip to content

Commit

Permalink
fixed med3pa and added comparaison
Browse files Browse the repository at this point in the history
  • Loading branch information
lyna1404 committed Jul 16, 2024
1 parent d84fb04 commit b328161
Show file tree
Hide file tree
Showing 47 changed files with 951 additions and 249,195 deletions.
8 changes: 8 additions & 0 deletions MED3pa/detectron/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ def save(self, file_path: str, file_name: str = 'detectron_results', save_config
with open(file_name_path, 'w') as file:
json.dump(self.test_results, file, indent=4)

counts_dict = {}
counts_dict['reference'] = self.cal_record.rejected_counts().tolist()
counts_dict['test'] = self.test_record.rejected_counts().tolist()

file_name_path_counts = os.path.join(file_path, 'rejection_counts.json')
with open(file_name_path_counts, 'w') as file:
json.dump(counts_dict, file, indent=4)

if save_config:
config_file_path = os.path.join(file_path, 'experiment_config.json')
with open(config_file_path, 'w') as file:
Expand Down
46 changes: 29 additions & 17 deletions MED3pa/detectron/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,26 +103,38 @@ def execute(calibration_records: DetectronRecordsManager, test_records:Detectron

# Perform the Mann-Whitney U test
u_statistic, p_value = stats.mannwhitneyu(cal_counts, test_counts, alternative='less')
z_score = (test_mean - cal_mean) / cal_std

# Calculate the z-scores for the test data
z_scores = (test_counts[:, None] - cal_counts) / np.std(cal_counts)

# Define thresholds for categorizing
def categorize_z_score(z):
if z <= 0:
return 'no significant shift'
elif abs(z) < 1:
return 'small'
elif abs(z) < 2:
return 'moderate'
else:
return 'large'

# Categorize each test count based on its z-score
categories = np.array([categorize_z_score(z) for z in z_scores.flatten()])
# Calculate the percentage of each category
category_counts = pd.Series(categories).value_counts(normalize=True) * 100

# Describe the significance of the shift based on the z-score
significance_description = ""
if z_score <= 0 :
significance_description = "no significant shift"
elif abs(z_score) < 1.0:
significance_description = "Small"
elif abs(z_score) < 2.0:
significance_description = "Moderate"
elif abs(z_score) < 3.0:
significance_description = "Large"
else:
significance_description = "Very large"
# Results dictionary including rank statistics
significance_description = {
'no shift': category_counts.get('no significant shift', 0),
'small': category_counts.get('small', 0),
'moderate': category_counts.get('moderate', 0),
'large': category_counts.get('large', 0)
}

results = {
'p_value': p_value,
'u_statistic': u_statistic,
'z-score':z_score,
'shift significance' : significance_description
'significance_description' : significance_description
}

return results
Expand Down Expand Up @@ -239,7 +251,7 @@ def trim_dataset(data, proportion_to_cut):
# Define thresholds for categorizing
def categorize_z_score(z):
if z <= 0:
return 'no shift'
return 'no significant shift'
elif abs(z) < 1:
return 'small'
elif abs(z) < 2:
Expand All @@ -257,7 +269,7 @@ def categorize_z_score(z):

# Describe the significance of the shift based on the z-score
significance_description = {
'no shift': category_counts.get('no shift', 0),
'no shift': category_counts.get('no significant shift', 0),
'small': category_counts.get('small', 0),
'moderate': category_counts.get('moderate', 0),
'large': category_counts.get('large', 0)
Expand Down
198 changes: 198 additions & 0 deletions MED3pa/med3pa/comparaison.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import json
import os
from typing import Any, Dict, List, Tuple, Type, Union

import numpy as np
from sklearn.model_selection import train_test_split

from MED3pa.med3pa.models import *
from MED3pa.med3pa.uncertainty import *
from MED3pa.models.base import BaseModelManager
from MED3pa.med3pa.experiment import Med3paResults

class Med3paComparison:
"""
Class to compare the results of two Med3paExperiment instances.
"""
def __init__(self, results1_path: str, results2_path: str) -> None:
self.results1_path = os.path.abspath(results1_path)
self.results2_path = os.path.abspath(results2_path)
self.profiles_metrics_comparaison = {}
self.profiles_detectron_comparaison = {}
self.global_metrics_comparaison = {}
self.compare_profiles = False
self.compare_detectron = False
self._check_experiment_name()

def _check_experiment_name(self) -> None:
"""
Checks if the experiment_name in the config_file of both results paths is the same.
If not, raises a ValueError. Also sets the flag for Detectron comparison if applicable.
"""
config_file_1 = os.path.join(self.results1_path, 'experiment_config.json')
config_file_2 = os.path.join(self.results2_path, 'experiment_config.json')

with open(config_file_1, 'r') as f1, open(config_file_2, 'r') as f2:
config1 = json.load(f1)
config2 = json.load(f2)

if config1['experiment_name'] not in ["Med3paDetectronExperiment", "Med3paExperiment"]:
raise ValueError("Only Med3paDetectronExperiments & Med3paExperiments can be compared")

if config1['experiment_name'] != config2['experiment_name']:
raise ValueError("The two results are not from the same experiment.")

if config1['experiment_name'] == 'Med3paDetectronExperiment':
self.compare_detectron = True

def _check_experiment_tree(self) -> None:
"""
Checks if the experiment trees in the results paths are the same.
If they are, sets the flag for profile comparison.
"""
tree_file_1 = os.path.join(self.results1_path, 'test', 'tree.json')
tree_file_2 = os.path.join(self.results2_path, 'test', 'tree.json')

with open(tree_file_1, 'r') as f1, open(tree_file_2, 'r') as f2:
tree1 = json.load(f1)
tree2 = json.load(f2)

if tree1 == tree2:
self.compare_profiles = True

def compare_profiles_metrics(self):
"""
Compares profile metrics between two sets of results and stores them in a dictionary.
"""
combined = {}
profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json')
profiles_file_2 = os.path.join(self.results2_path, 'test', 'profiles.json')

with open(profiles_file_1, 'r') as f1, open(profiles_file_2, 'r') as f2:
profiles1 = json.load(f1)
profiles2 = json.load(f2)

for samples_ratio, dr_dict in profiles1.items():
if samples_ratio not in combined:
combined[samples_ratio] = {}
for dr, profiles in dr_dict.items():
for profile in profiles:
profile_path = " / ".join(profile["path"])
if profile_path not in combined[samples_ratio]:
combined[samples_ratio][profile_path] = {}
if dr not in combined[samples_ratio][profile_path]:
combined[samples_ratio][profile_path][dr] = {}
combined[samples_ratio][profile_path][dr]['metrics_1'] = profile["metrics"]

for samples_ratio, dr_dict in profiles2.items():
if samples_ratio not in combined:
combined[samples_ratio] = {}
for dr, profiles in dr_dict.items():
for profile in profiles:
profile_path = " / ".join(profile["path"])
if profile_path not in combined[samples_ratio]:
combined[samples_ratio][profile_path] = {}
if dr not in combined[samples_ratio][profile_path]:
combined[samples_ratio][profile_path][dr] = {}
combined[samples_ratio][profile_path][dr]['metrics_2'] = profile["metrics"]

self.profiles_metrics_comparaison = combined

def compare_profiles_detectron_results(self):
"""
Compares Detectron results between two sets of profiles and stores them in a dictionary.
"""
combined = {}
profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json')
profiles_file_2 = os.path.join(self.results2_path, 'test', 'profiles.json')

with open(profiles_file_1, 'r') as f1, open(profiles_file_2, 'r') as f2:
profiles1 = json.load(f1)
profiles2 = json.load(f2)

# Determine the smallest positive samples_ratio
smallest_samples_ratio = min(filter(lambda x: float(x) > 0, profiles1.keys()))

for profiles in [profiles1, profiles2]:
if smallest_samples_ratio not in profiles:
continue

dr_dict = profiles[smallest_samples_ratio]

if smallest_samples_ratio not in combined:
combined[smallest_samples_ratio] = {}

if "100" not in dr_dict:
continue

for profile in dr_dict["100"]:
profile_path = " / ".join(profile["path"])
if profile_path not in combined[smallest_samples_ratio]:
combined[smallest_samples_ratio][profile_path] = {}
if "100" not in combined[smallest_samples_ratio][profile_path]:
combined[smallest_samples_ratio][profile_path]["100"] = {}

if profiles is profiles1:
combined[smallest_samples_ratio][profile_path]["100"]['detectron_results_1'] = profile["detectron_results"]
else:
combined[smallest_samples_ratio][profile_path]["100"]['detectron_results_2'] = profile["detectron_results"]

self.profiles_detectron_comparaison = combined

def compare_global_metrics(self):
"""
Compares global metrics between two sets of results and stores them in a dictionary.
"""
combined = {}
file_1 = os.path.join(self.results1_path, 'test', 'metrics_dr.json')
file_2 = os.path.join(self.results2_path, 'test', 'metrics_dr.json')

with open(file_1, 'r') as f1, open(file_2, 'r') as f2:
dr1 = json.load(f1)
dr2 = json.load(f2)

for dr in range(100, -1, -1): # Iterating from 100 to 0
dr_str = str(dr)
combined[dr_str] = {}

if dr_str in dr1:
combined[dr_str]['metrics_dr_1'] = dr1[dr_str]
if dr_str in dr2:
combined[dr_str]['metrics_dr_2'] = dr2[dr_str]

self.global_metrics_comparaison = combined

def compare_experiments(self):
"""
Compares the experiments by global metrics, profiles, and Detectron results if applicable.
"""
self.compare_global_metrics()
self._check_experiment_tree()
if self.compare_profiles:
self.compare_profiles_metrics()
if self.compare_detectron:
self.compare_profiles_detectron_results()

def save(self, directory_path: str) -> None:
"""
Saves the comparison results to a specified directory.
Args:
directory_path (str): The directory where the comparison results will be saved.
"""
# Ensure the main directory exists
os.makedirs(directory_path, exist_ok=True)

global_comparaison_path = os.path.join(directory_path, 'global_metrics_comparaison.json')
with open(global_comparaison_path, 'w') as f:
json.dump(self.global_metrics_comparaison, f, indent=4)

if self.profiles_detectron_comparaison is not {} and self.compare_detectron:
profiles_detectron_path = os.path.join(directory_path, 'profiles_detectron_comparaison.json')
with open(profiles_detectron_path, 'w') as f:
json.dump(self.profiles_detectron_comparaison, f, indent=4)

if self.profiles_metrics_comparaison is not {} and self.compare_profiles:
profiles_metrics_path = os.path.join(directory_path, 'profiles_metrics_comparaison.json')
with open(profiles_metrics_path, 'w') as f:
json.dump(self.profiles_metrics_comparaison, f, indent=4)
Loading

0 comments on commit b328161

Please sign in to comment.