Skip to content

Commit

Permalink
fixed detectron by profiles
Browse files Browse the repository at this point in the history
  • Loading branch information
lyna1404 committed Jul 25, 2024
1 parent a847a96 commit 95adbe9
Show file tree
Hide file tree
Showing 77 changed files with 3,359 additions and 343,485 deletions.
38 changes: 15 additions & 23 deletions MED3pa/med3pa/comparaison.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,31 +116,24 @@ def compare_profiles_detectron_results(self):
profiles2 = json.load(f2)

# Determine the smallest positive samples_ratio
smallest_samples_ratio = min(filter(lambda x: float(x) > 0, profiles1.keys()))
smallest_samples_ratio = min([int(k) for k in profiles1.keys() if int(k) >= 0])
smallest_samples_ratio = str(smallest_samples_ratio)

for profiles in [profiles1, profiles2]:
for profiles, key in zip([profiles1, profiles2], ['detectron_results_1', 'detectron_results_2']):
if smallest_samples_ratio not in profiles:
continue

dr_dict = profiles[smallest_samples_ratio]

if smallest_samples_ratio not in combined:
combined[smallest_samples_ratio] = {}


if "100" not in dr_dict:
continue

for profile in dr_dict["100"]:
profile_path = " / ".join(profile["path"])
if profile_path not in combined[smallest_samples_ratio]:
combined[smallest_samples_ratio][profile_path] = {}
if "100" not in combined[smallest_samples_ratio][profile_path]:
combined[smallest_samples_ratio][profile_path]["100"] = {}
if profile_path not in combined:
combined[profile_path] = {}

if profiles is profiles1:
combined[smallest_samples_ratio][profile_path]["100"]['detectron_results_1'] = profile["detectron_results"]
else:
combined[smallest_samples_ratio][profile_path]["100"]['detectron_results_2'] = profile["detectron_results"]
combined[profile_path][key] = profile["detectron_results"]

self.profiles_detectron_comparaison = combined

Expand Down Expand Up @@ -208,14 +201,13 @@ def compare_config(self):
combined['base_model1'] = config1["base_model"]
combined['base_model2'] = config2["base_model"]

combined['apc_model1'] = config1["apc_model"]
combined['apc_model2'] = config2["apc_model"]

combined['ipc_model1'] = config1["ipc_model"]
combined['ipc_model2'] = config2["ipc_model"]
if not self.compare_detectron :
combined['experiment_params1'] = config1["med3pa_params"]
combined['experiment_params2'] = config2["med3pa_params"]
else:
combined['experiment_params1'] = config1["med3pa_detectron_params"]
combined['experiment_params2'] = config2["med3pa_detectron_params"]

combined['experiment_params1'] = config1["experiment_params"]
combined['experiment_params2'] = config2["experiment_params"]

self.config_file = combined

Expand Down
92 changes: 59 additions & 33 deletions MED3pa/med3pa/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def __init__(self) -> None:
self.datasets: Dict[int, MaskedDataset] = {}
self.experiment_config = {}
self.tree = {}
self.ipc_scores = None
self.apc_scores = None
self.mpc_scores = None

def set_metrics_by_dr(self, metrics_by_dr: Dict) -> None:
"""
Expand Down Expand Up @@ -140,6 +143,21 @@ def get_profiles_manager(self) -> ProfilesManager:
"""
return self.profiles_manager

def set_confidence_scores(self, scores:np.ndarray, mode = 'str') -> None:
if mode == 'ipc':
self.ipc_scores = scores
elif mode == "apc":
self.apc_scores = scores
elif mode=="mpc":
self.mpc_scores = scores

def get_confidence_scores(self, mode = 'str') -> np.ndarray:
if mode == 'ipc':
return self.ipc_scores
elif mode == "apc":
return self.apc_scores
elif mode=="mpc":
return self.mpc_scores

class Med3paResults:
"""
Expand Down Expand Up @@ -229,6 +247,7 @@ def save_models(self, file_path: str, mode:str ='all') -> None:
if tree_structure:
tree_structure.save_tree(tree_structure_path)


class Med3paExperiment:
"""
Class to run the MED3PA method experiment.
Expand All @@ -250,11 +269,11 @@ def run(datasets_manager: DatasetsManager,
samples_ratio_min: int = 0,
samples_ratio_max: int = 50,
samples_ratio_step: int = 5,
med3pa_metrics: List[str] = [],
med3pa_metrics: List[str] = ['Accuracy', 'BalancedAccuracy', 'Precision', 'Recall', 'F1Score', 'Specificity', 'Sensitivity', 'Auc', 'LogLoss', 'Auprc', 'NPV', 'PPV', 'MCC'],
evaluate_models: bool = False,
use_ref_models: bool = False,
mode: str = 'mpc',
models_metrics: List[str] = ['MSE', 'RMSE']) -> Med3paResults:
models_metrics: List[str] = ['MSE', 'RMSE', 'MAE']) -> Med3paResults:

"""Runs the MED3PA experiment on both reference and testing sets.
Args:
Expand Down Expand Up @@ -313,16 +332,15 @@ def run(datasets_manager: DatasetsManager,
'med3pa_metrics': med3pa_metrics,
'evaluate_models':evaluate_models,
'models_evaluation_metrics': models_metrics,
'mode':mode

'mode':mode,
'ipc_model': ipc_config.get_info(),
'apc_model': apc_config.get_info(),
}
experiment_config = {
'experiment_name': "Med3paExperiment",
'datasets':datasets_manager.get_info(),
'base_model': base_model_manager.get_instance().get_info(),
'ipc_model': ipc_config.get_info(),
'apc_model': apc_config.get_info(),
'experiment_params': med3pa_params
'med3pa_params': med3pa_params
}
results.set_experiment_config(experiment_config)
results.set_models(ipc_config, apc_config)
Expand All @@ -348,10 +366,10 @@ def _run_by_set(datasets_manager: DatasetsManager,
samples_ratio_min: int = 0,
samples_ratio_max: int = 50,
samples_ratio_step: int = 5,
med3pa_metrics: List[str] = [],
med3pa_metrics: List[str] = ['Accuracy', 'BalancedAccuracy', 'Precision', 'Recall', 'F1Score', 'Specificity', 'Sensitivity', 'Auc', 'LogLoss', 'Auprc', 'NPV', 'PPV', 'MCC'],
evaluate_models: bool = False,
mode: str = 'mpc',
models_metrics: List[str] = ['MSE', 'RMSE']) -> Tuple[Med3paRecord, dict, dict]:
models_metrics: List[str] = ['MSE', 'RMSE', 'MAE']) -> Tuple[Med3paRecord, dict, dict]:

"""
Orchestrates the MED3PA experiment on one specific set of the dataset.
Expand Down Expand Up @@ -421,8 +439,6 @@ def _run_by_set(datasets_manager: DatasetsManager,
x_train = x
uncertainty_train = uncertainty_values



results = Med3paRecord()

# Step 5: Create and train IPCModel
Expand All @@ -445,7 +461,11 @@ def _run_by_set(datasets_manager: DatasetsManager,
# Predict IPC values
IPC_values = IPC_model.predict(x)
print("Individualized confidence scores calculated.")

# Save the calculated confidence scores by the APCmodel
dataset.set_confidence_scores(IPC_values)
cloned_dataset = dataset.clone()
results.set_dataset(mode="ipc", dataset=cloned_dataset)
results.set_confidence_scores(IPC_values, "ipc")
if mode in ['mpc', 'apc']:

# Step 6: Create and train APCModel
Expand Down Expand Up @@ -475,6 +495,7 @@ def _run_by_set(datasets_manager: DatasetsManager,
dataset.set_confidence_scores(APC_values)
cloned_dataset = dataset.clone()
results.set_dataset(mode="apc", dataset=cloned_dataset)
results.set_confidence_scores(APC_values, "apc")

# Step 7: Create and train MPCModel
if mode == 'mpc':
Expand All @@ -485,12 +506,17 @@ def _run_by_set(datasets_manager: DatasetsManager,
dataset.set_confidence_scores(MPC_values)
cloned_dataset = dataset.clone()
results.set_dataset(mode="mpc", dataset=cloned_dataset)
results.set_confidence_scores(MPC_values, "mpc")
else:
MPC_model = MPCModel(APC_values=APC_values)
MPC_values = MPC_model.predict()

print("Mixed confidence scores calculated.")

# Calculate metrics by declaration rate
metrics_by_dr = MDRCalculator.calc_metrics_by_dr(datasets_manager=datasets_manager, confidence_scores=MPC_values, metrics_list=med3pa_metrics, set=set)
results.set_metrics_by_dr(metrics_by_dr)

# Step 8: Calculate the profiles for the different samples_ratio and drs
profiles_manager = ProfilesManager(features)
for samples_ratio in range(samples_ratio_min, samples_ratio_max + 1, samples_ratio_step):
Expand All @@ -501,18 +527,6 @@ def _run_by_set(datasets_manager: DatasetsManager,
results.set_profiles_manager(profiles_manager)
print("Results extracted for minimum_samples_ratio = ", samples_ratio)


# Calculate metrics by declaration rate
# Create and predict MPC values using only the IPC values
MPC_model = MPCModel(IPC_values=IPC_values)
MPC_values = MPC_model.predict()
# Save the confidence scores predicted by the IPCModel
dataset.set_confidence_scores(MPC_values)
cloned_dataset = dataset.clone()
results.set_dataset(mode="ipc", dataset=cloned_dataset)
metrics_by_dr = MDRCalculator.calc_metrics_by_dr(datasets_manager=datasets_manager, confidence_scores=MPC_values, metrics_list=med3pa_metrics, set=set)
results.set_metrics_by_dr(metrics_by_dr)

if mode in ['mpc', 'apc']:
ipc_config = IPC_model
apc_config = APC_model
Expand Down Expand Up @@ -557,12 +571,12 @@ def run(datasets: DatasetsManager,
samples_ratio_min: int = 0,
samples_ratio_max: int = 50,
samples_ratio_step: int = 5,
med3pa_metrics: List[str] = [],
med3pa_metrics: List[str] = ['Accuracy', 'BalancedAccuracy', 'Precision', 'Recall', 'F1Score', 'Specificity', 'Sensitivity', 'Auc', 'LogLoss', 'Auprc', 'NPV', 'PPV', 'MCC'],
evaluate_models: bool = False,
use_ref_models: bool = False,
models_metrics: List[str] = ['MSE', 'RMSE'],
models_metrics: List[str] = ['MSE', 'RMSE', 'MAE'],
mode: str = 'mpc',
all_dr: bool = False) -> Tuple[Med3paResults, Med3paResults, DetectronResult]:
all_dr: bool = False) -> Med3paResults:
"""Runs the MED3PA and Detectron experiment.
Args:
datasets (DatasetsManager): The datasets manager instance.
Expand Down Expand Up @@ -595,7 +609,7 @@ def run(datasets: DatasetsManager,
models_metrics (list of str, optional): List of metrics for model evaluation, by default ['MSE', 'RMSE'].
all_dr (bool, optional): Whether to run for all declaration rates, by default False.
Returns:
Tuple[Med3paResults, DetectronResult]: Results of MED3pa on reference and testing sets, plus Detectron Results.
Med3paResults: Results of MED3pa on reference and testing sets, plus Detectron Results.
"""

valid_modes = ['mpc', 'apc']
Expand All @@ -617,14 +631,21 @@ def run(datasets: DatasetsManager,
patience=patience, allow_margin=allow_margin, margin=margin)
detectron_results.analyze_results(test_strategies)

if med3pa_results.test_record.get_confidence_scores("mpc") is not None:
confidence_scores = med3pa_results.test_record.get_confidence_scores("mpc")
elif med3pa_results.test_record.get_confidence_scores("apc") is not None:
confidence_scores = med3pa_results.test_record.get_confidence_scores("apc")
else:
raise ValueError("the confidence scores were not calculated!")

print("Running Profiled Detectron Experiment:")
detectron_profiles_res = MDRCalculator.detectron_by_profiles(datasets=datasets, profiles_manager=med3pa_results.test_record.get_profiles_manager(),training_params=training_params,
base_model_manager=base_model_manager,
base_model_manager=base_model_manager, confidence_scores = confidence_scores,
samples_size=samples_size_profiles, num_calibration_runs=num_calibration_runs, ensemble_size=ensemble_size,
patience=patience, strategies=test_strategies,
allow_margin=allow_margin, margin=margin, all_dr=all_dr)

med3pa_detectron_params = {
med3pa_params = {
'uncertainty_metric': uncertainty_metric,
'samples_ratio_min': samples_ratio_min,
'samples_ratio_max': samples_ratio_max,
Expand All @@ -633,6 +654,10 @@ def run(datasets: DatasetsManager,
'evaluate_models':evaluate_models,
'models_evaluation_metrics': models_metrics,
'mode':mode,

}

detectron_params = {
'samples_size': samples_size,
'profiles_samples_size': samples_size_profiles,
'cdcs_ensemble_size': ensemble_size,
Expand All @@ -641,14 +666,15 @@ def run(datasets: DatasetsManager,
'allow_margin': allow_margin,
'margin': margin,
'additional_training_params': training_params,

}

experiment_config = {
'experiment_name': "Med3paDetectronExperiment",
'experiment_params': med3pa_detectron_params,
'med3pa_detectron_params': {},
}

experiment_config['med3pa_detectron_params']['detectron_params'] = detectron_params
experiment_config['med3pa_detectron_params']['med3pa_params'] = med3pa_params

med3pa_results.set_detectron_results(detectron_results)
med3pa_results.set_experiment_config(experiment_config)

Expand Down
2 changes: 1 addition & 1 deletion MED3pa/med3pa/mdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ def calc_metrics_by_profiles(profiles_manager, datasets_manager : DatasetsManage
@staticmethod
def detectron_by_profiles(datasets: DatasetsManager,
profiles_manager: ProfilesManager,
confidence_scores: np.ndarray,
training_params: Dict,
base_model_manager: BaseModelManager,
strategies: Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]],
Expand Down Expand Up @@ -460,7 +461,6 @@ def detectron_by_profiles(datasets: DatasetsManager,
min_positive_ratio = min([k for k in profiles_manager.profiles_records.keys() if k >= 0])
profiles_by_dr = profiles_manager.get_profiles(min_samples_ratio=min_positive_ratio)
last_min_confidence_level = -1
confidence_scores = datasets.get_dataset_by_type(dataset_type="testing", return_instance=True).get_confidence_scores()
for dr, profiles in profiles_by_dr.items():
if not all_dr and dr != 100:
continue # Skip all dr values except the first one if all_dr is False
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setup(
name="MED3pa",
version="0.1.15",
version="0.1.16",
author="MEDomics consortium",
author_email="medomics.info@gmail.com",
description="Python Open-source package for ensuring robust and reliable ML models deployments",
Expand Down
Loading

0 comments on commit 95adbe9

Please sign in to comment.