diff --git a/MED3pa/detectron/experiment.py b/MED3pa/detectron/experiment.py index da871a4..1f2b0ec 100644 --- a/MED3pa/detectron/experiment.py +++ b/MED3pa/detectron/experiment.py @@ -24,7 +24,7 @@ class DetectronResult: strategy_mapping = { 'original_disagreement_strategy': OriginalDisagreementStrategy, 'mannwhitney_strategy': MannWhitneyStrategy, - 'enhanced_disagreement_strategy': EnhancedDisagreementStrategy + 'enhanced_disagreement_strategy': EnhancedDisagreementStrategy, } def __init__(self, cal_record: DetectronRecordsManager, test_record: DetectronRecordsManager): diff --git a/MED3pa/detectron/strategies.py b/MED3pa/detectron/strategies.py index ee04a11..3f30584 100644 --- a/MED3pa/detectron/strategies.py +++ b/MED3pa/detectron/strategies.py @@ -145,19 +145,34 @@ def remove_outliers_based_on_iqr(arr1, arr2): # Calculate the z-scores for the test data z_scores = (test_counts - baseline_mean) / baseline_std - # Define thresholds for categorizing - def categorize_z_score(z): - if z <= 0: - return 'no significant shift' - elif 0 < z <= 1: - return 'small' - elif 1 < z <= 2: - return 'moderate' + def categorize_z_score(z, std): + # if the std is 0 + if std == 0: + if z == 0: + return 'no significant shift' + elif 0 < abs(z) <= baseline_mean * 0.1: + return 'small' + elif baseline_mean * 0.1 < abs(z) <= baseline_mean * 0.2: + return 'moderate' + else: + return 'large' else: - return 'large' + if z <= 0: + return 'no significant shift' + elif 0 < z <= 1: + return 'small' + elif 1 < z <= 2: + return 'moderate' + else: + return 'large' + + if baseline_std == 0: + z_scores = test_counts - baseline_mean + else: + z_scores = (test_counts - baseline_mean) / baseline_std + + categories = np.array([categorize_z_score(z, baseline_std) for z in z_scores]) - # Categorize each test count based on its z-score - categories = np.array([categorize_z_score(z) for z in z_scores.flatten()]) # Calculate the percentage of each category category_counts = pd.Series(categories).value_counts(normalize=True) * 100 @@ -177,62 +192,6 @@ def categorize_z_score(z): return results - -class KolmogorovSmirnovStrategy(DetectronStrategy): - """ - Implements a strategy to detect disagreement based on the Kolmogorov-Smirnov test, assessing the dissimilarity of results - from calibration runs and test runs. - """ - def execute(calibration_records: DetectronRecordsManager, test_records:DetectronRecordsManager): - """ - Executes the disagreement detection strategy using the Kolmogorov-Smirnov test. - - Args: - calibration_records (DetectronRecordsManager): Manager storing calibration phase records. - test_records (DetectronRecordsManager): Manager storing test phase records. - - Returns: - dict: A dictionary containing the calculated p-value, KS statistic, and a shift indicator which is True - if a shift is detected at the given significance level. - """ - # Retrieve count data from both calibration and test records - cal_counts = calibration_records.rejected_counts() - test_counts = test_records.rejected_counts() - - # Perform the Kolmogorov-Smirnov test - ks_statistic, p_value = stats.ks_2samp(cal_counts, test_counts) - - # Calculate statistics for interpretation - cal_mean = cal_counts.mean() - cal_std = cal_counts.std() - test_mean = test_counts.mean() - test_std = test_counts.std() - - z_score = (test_mean - cal_mean) / cal_std - # Describe the significance of the shift based on the z-score - significance_description = "" - if z_score <= 0: - significance_description = "no significant shift" - elif abs(z_score) < 1.0: - significance_description = "Small" - elif abs(z_score) < 2.0: - significance_description = "Moderate" - elif abs(z_score) < 3.0: - significance_description = "Large" - else: - significance_description = "Very Large" - # Results dictionary including rank statistics - # Results dictionary including KS test results and distribution statistics - results = { - 'p_value': p_value, - 'ks_statistic': ks_statistic, - 'z-score':z_score, - 'shift significance' : significance_description - } - - return results - - class EnhancedDisagreementStrategy(DetectronStrategy): """ Implements a strategy to detect disagreement based on the z-score mean difference between calibration and test datasets. @@ -300,27 +259,47 @@ def remove_outliers_based_on_iqr(arr1, arr2): # Calculate the test statistic (mean of test data) test_statistic = np.mean(test_counts) - # Calculate the z-scores for the test data - z_scores = (test_counts - baseline_mean) / baseline_std - - # Define thresholds for categorizing - def categorize_z_score(z): - if z <= 0: - return 'no significant shift' - elif 0 < z <= 1: - return 'small' - elif 1 < z <= 2: - return 'moderate' + def categorize_z_score(z, std): + # if the std is 0 + if std == 0: + if z == 0: + return 'no significant shift' + elif 0 < abs(z) <= baseline_mean * 0.1: + return 'small' + elif baseline_mean * 0.1 < abs(z) <= baseline_mean * 0.2: + return 'moderate' + else: + return 'large' else: - return 'large' + if z <= 0: + return 'no significant shift' + elif 0 < z <= 1: + return 'small' + elif 1 < z <= 2: + return 'moderate' + else: + return 'large' + + if baseline_std == 0: + z_scores = test_counts - baseline_mean + else: + z_scores = (test_counts - baseline_mean) / baseline_std + + categories = np.array([categorize_z_score(z, baseline_std) for z in z_scores]) - # Categorize each test count based on its z-score - categories = np.array([categorize_z_score(z) for z in z_scores]) # Calculate the percentage of each category + category_counts = pd.Series(categories).value_counts(normalize=True) * 100 # Calculate the one-tailed p-value (test_statistic > baseline_mean) p_value = np.mean(baseline_mean < test_counts) + + # Pairwise comparison of each element in test_counts with each element in cal_counts + greater_counts = np.sum(test_counts[:, None] > cal_counts) + # Total number of comparisons + total_comparisons = len(test_counts) * len(cal_counts) + # Probability of elements in test_counts being greater than elements in cal_counts + probability = greater_counts / total_comparisons # Describe the significance of the shift based on the z-score significance_description = { @@ -331,11 +310,10 @@ def categorize_z_score(z): } results = { - 'shift_probability': p_value, + 'shift_probability': probability, 'test_statistic': test_statistic, 'baseline_mean': baseline_mean, 'baseline_std': baseline_std, 'significance_description': significance_description, } return results - diff --git a/MED3pa/med3pa/comparaison.py b/MED3pa/med3pa/comparaison.py index 78dfe35..26be77c 100644 --- a/MED3pa/med3pa/comparaison.py +++ b/MED3pa/med3pa/comparaison.py @@ -202,7 +202,7 @@ def compare_profiles_detectron_results(self): profiles2 = json.load(f2) for samples_ratio, profiles_dict in self.shared_profiles.items(): - combined[samples_ratio] = {} + combined = {} for profile_path_list in profiles_dict.values(): profile_path = " / ".join(profile_path_list) # Convert the list to a string @@ -210,11 +210,11 @@ def compare_profiles_detectron_results(self): matching_profile_1 = next((p for p in profiles1[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None) matching_profile_2 = next((p for p in profiles2[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None) - if profile_path not in combined[samples_ratio]: - combined[samples_ratio][profile_path] = {} + if profile_path not in combined: + combined[profile_path] = {} - combined[samples_ratio][profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None - combined[samples_ratio][profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None + combined[profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None + combined[profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None self.profiles_detectron_comparaison = combined diff --git a/MED3pa/med3pa/mdr.py b/MED3pa/med3pa/mdr.py index 714338d..ea998e7 100644 --- a/MED3pa/med3pa/mdr.py +++ b/MED3pa/med3pa/mdr.py @@ -146,9 +146,9 @@ def _filter_by_profile(dataset : MaskedDataset, path : List, features: list, min # Filter the data according to the path mask filtered_x = x[mask] filtered_y_true = y_true[mask] - filtered_prob = predicted_prob[mask] - filtered_y_pred = y_pred[mask] - filtered_confidence_scores = confidence_scores[mask] + filtered_prob = predicted_prob[mask] if predicted_prob is not None else None + filtered_y_pred = y_pred[mask] if y_pred is not None else None + filtered_confidence_scores = confidence_scores[mask] if confidence_scores is not None else None # filter once again according to the min_confidence_level if specified if min_confidence_level is not None: @@ -401,7 +401,7 @@ def calc_metrics_by_profiles(profiles_manager, dataset : MaskedDataset, features profile.update_node_information(info_dict) @staticmethod - def detectron_by_profiles(datasets: DatasetsManager, + def detectron_by_profiles_deprecated(datasets: DatasetsManager, profiles_manager: ProfilesManager, confidence_scores: np.ndarray, training_params: Dict, @@ -512,3 +512,126 @@ def detectron_by_profiles(datasets: DatasetsManager, return profiles_by_dr + def _filter_with_fallback(dataset, profile, features, min_confidence_level): + # Initial attempt to filter with the full profile path + q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(dataset, path=profile.path, features=features, min_confidence_level=min_confidence_level) + + # If the result is empty, start reducing conditions + current_path = profile.path.copy() + while len(q_y_true) == 0 and len(current_path) >= 1: + # Remove the last condition + current_path.pop() + + # Attempt to filter with the reduced path + q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(dataset, path=current_path, features=features, min_confidence_level=min_confidence_level) + + return q_x, q_y_true + + def detectron_by_profiles(datasets: DatasetsManager, + profiles_manager: ProfilesManager, + confidence_scores: np.ndarray, + training_params: Dict, + base_model_manager: BaseModelManager, + strategies: Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]], + samples_size: int = 20, + ensemble_size: int = 10, + num_calibration_runs: int = 100, + patience: int = 3, + allow_margin: bool = False, + margin: float = 0.05, + all_dr: bool = True) -> Dict: + + """Runs the Detectron method on the different testing set profiles. + + Args: + datasets (DatasetsManager): The datasets manager instance. + profiles_manager (ProfilesManager): the manager containing the profiles of the testing set. + training_params (dict): Parameters for training the models. + base_model_manager (BaseModelManager): The base model manager instance. + testing_mpc_values (np.ndarray): MPC values for the testing data. + reference_mpc_values (np.ndarray): MPC values for the reference data. + samples_size (int, optional): Sample size for the Detectron experiment, by default 20. + ensemble_size (int, optional): Number of models in the ensemble, by default 10. + num_calibration_runs (int, optional): Number of calibration runs, by default 100. + patience (int, optional): Patience for early stopping, by default 3. + strategies (Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]]): The strategies for testing disagreement. + allow_margin (bool, optional): Whether to allow a margin in the test, by default False. + margin (float, optional): Margin value for the test, by default 0.05. + all_dr (bool, optional): Whether to run for all declaration rates, by default False. + + Returns: + Dict: Dictionary of med3pa profiles with detectron results. + """ + min_positive_ratio = min([k for k in profiles_manager.profiles_records.keys() if k >= 0]) + test_dataset = datasets.get_dataset_by_type('testing', True) + reference_dataset = datasets.get_dataset_by_type('reference', True) + test_dataset.set_confidence_scores(confidence_scores=confidence_scores) + profiles_by_dr = profiles_manager.get_profiles(min_samples_ratio=min_positive_ratio) + last_min_confidence_level = -1 + features = datasets.get_column_labels() + for dr, profiles in profiles_by_dr.items(): + if not all_dr and dr != 100: + continue # Skip all dr values except the first one if all_dr is False + + experiment_det = None + min_confidence_level = MDRCalculator._get_min_confidence_score(dr, confidence_scores) + if last_min_confidence_level != min_confidence_level: + for profile in profiles: + detectron_results_dict = {} + + q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(test_dataset, path=profile.path, features=features, min_confidence_level=min_confidence_level) + p_x_profile, p_y_true_profile = MDRCalculator._filter_with_fallback(reference_dataset, profile=profile, features=features, min_confidence_level=None) + if len(p_y_true_profile)==0: + p_x, p_y_true = datasets.get_dataset_by_type("reference") + else: + p_x = p_x_profile + p_y_true = p_y_true_profile + + if len(q_y_true) != 0: + if len(q_y_true) < samples_size or len(p_y_true) < samples_size: + detectron_results_dict['Executed'] = "Not enough samples" + detectron_results_dict['Tested Profile size'] = len(q_y_true) + detectron_results_dict['Tests Results'] = None + + else: + profile_set = DatasetsManager() + profile_set.set_column_labels(datasets.get_column_labels()) + profile_set.set_from_data(dataset_type="testing", observations=q_x, true_labels=q_y_true) + profile_set.set_from_data(dataset_type="reference", observations=p_x, true_labels=p_y_true) + profile_set.set_from_data(dataset_type="training", + observations=datasets.get_dataset_by_type(dataset_type="training", return_instance=True).get_observations(), + true_labels=datasets.get_dataset_by_type(dataset_type="training", return_instance=True).get_true_labels()) + profile_set.set_from_data(dataset_type="validation", + observations=datasets.get_dataset_by_type(dataset_type="validation", return_instance=True).get_observations(), + true_labels=datasets.get_dataset_by_type(dataset_type="validation", return_instance=True).get_true_labels()) + + path_description = "*, " + " & ".join(profile.path[1:]) + print("Running Detectron on Profile:", path_description) + + experiment_det= DetectronExperiment.run( + datasets=profile_set, training_params=training_params, base_model_manager=base_model_manager, + samples_size=samples_size, num_calibration_runs=num_calibration_runs, ensemble_size=ensemble_size, + patience=patience, allow_margin=allow_margin, margin=margin) + + + detectron_results = experiment_det.analyze_results(strategies=strategies) + detectron_results_dict['Executed'] = "Yes" + detectron_results_dict['Tested Profile size'] = len(q_y_true) + detectron_results_dict['Tests Results'] = detectron_results + + else: + detectron_results_dict['Executed'] = "Empty profile in test data" + detectron_results_dict['Tested Profile size'] = len(q_y_true) + detectron_results_dict['Tests Results'] = None + + + profile.update_detectron_results(detectron_results_dict) + + last_profiles = profiles + last_min_confidence_level = min_confidence_level + else: + profiles = last_profiles + + return profiles_by_dr + + \ No newline at end of file diff --git a/setup.py b/setup.py index f9b8abb..d2c49fb 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name="MED3pa", - version="0.1.31", + version="0.1.32", author="MEDomics consortium", author_email="medomics.info@gmail.com", description="Python Open-source package for ensuring robust and reliable ML models deployments", diff --git a/tutorials/med3pa_tutorials.ipynb b/tutorials/med3pa_tutorials.ipynb index a179bdf..a602235 100644 --- a/tutorials/med3pa_tutorials.ipynb +++ b/tutorials/med3pa_tutorials.ipynb @@ -362,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -380,9 +380,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running MED3pa Experiment on the reference set:\n", + "IPC Model training complete.\n", + "Individualized confidence scores calculated.\n" + ] + }, + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: './med3pa_experiment_results_models/apc_model.pkl'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[4], line 10\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mMED3pa\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdetectron\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstrategies\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EnhancedDisagreementStrategy\n\u001b[0;32m 9\u001b[0m \u001b[38;5;66;03m# Execute the integrated MED3PA and Detectron experiment\u001b[39;00m\n\u001b[1;32m---> 10\u001b[0m med3pa_detectron_results \u001b[38;5;241m=\u001b[39m Med3paDetectronExperiment\u001b[38;5;241m.\u001b[39mrun(\n\u001b[0;32m 11\u001b[0m datasets\u001b[38;5;241m=\u001b[39mdatasets,\n\u001b[0;32m 12\u001b[0m base_model_manager\u001b[38;5;241m=\u001b[39mbase_model_manager,\n\u001b[0;32m 13\u001b[0m uncertainty_metric\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mabsolute_error\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 14\u001b[0m samples_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[0;32m 15\u001b[0m ensemble_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[0;32m 16\u001b[0m num_calibration_runs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m,\n\u001b[0;32m 17\u001b[0m patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,\n\u001b[0;32m 18\u001b[0m test_strategies\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menhanced_disagreement_strategy\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 19\u001b[0m pretrained_apc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./med3pa_experiment_results_models/apc_model.pkl\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m 20\u001b[0m allow_margin\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 21\u001b[0m margin\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.05\u001b[39m,\n\u001b[0;32m 22\u001b[0m samples_ratio_min\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m,\n\u001b[0;32m 23\u001b[0m samples_ratio_max\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[0;32m 24\u001b[0m samples_ratio_step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m,\n\u001b[0;32m 25\u001b[0m evaluate_models\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 26\u001b[0m )\n\u001b[0;32m 28\u001b[0m med3pa_detectron_results2 \u001b[38;5;241m=\u001b[39m Med3paDetectronExperiment\u001b[38;5;241m.\u001b[39mrun(\n\u001b[0;32m 29\u001b[0m datasets\u001b[38;5;241m=\u001b[39mdatasets2,\n\u001b[0;32m 30\u001b[0m base_model_manager\u001b[38;5;241m=\u001b[39mbase_model_manager,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 44\u001b[0m evaluate_models\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m 45\u001b[0m )\n", + "File \u001b[1;32md:\\MED3pa-package\\MED3pa\\MED3pa\\med3pa\\experiment.py:623\u001b[0m, in \u001b[0;36mMed3paDetectronExperiment.run\u001b[1;34m(datasets, base_model_manager, uncertainty_metric, training_params, samples_size, samples_size_profiles, ensemble_size, num_calibration_runs, patience, test_strategies, allow_margin, margin, ipc_type, ipc_params, ipc_grid_params, ipc_cv, pretrained_ipc, apc_params, apc_grid_params, apc_cv, pretrained_apc, samples_ratio_min, samples_ratio_max, samples_ratio_step, med3pa_metrics, evaluate_models, use_ref_models, models_metrics, mode, all_dr)\u001b[0m\n\u001b[0;32m 620\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mode \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m valid_modes:\n\u001b[0;32m 621\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid mode \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmode\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. The mode must be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalid_modes\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 623\u001b[0m med3pa_results \u001b[38;5;241m=\u001b[39m Med3paExperiment\u001b[38;5;241m.\u001b[39mrun(datasets_manager\u001b[38;5;241m=\u001b[39mdatasets, \n\u001b[0;32m 624\u001b[0m base_model_manager\u001b[38;5;241m=\u001b[39mbase_model_manager, uncertainty_metric\u001b[38;5;241m=\u001b[39muncertainty_metric,\n\u001b[0;32m 625\u001b[0m ipc_params\u001b[38;5;241m=\u001b[39mipc_params, ipc_grid_params\u001b[38;5;241m=\u001b[39mipc_grid_params, ipc_cv\u001b[38;5;241m=\u001b[39mipc_cv, ipc_type\u001b[38;5;241m=\u001b[39mipc_type, pretrained_ipc\u001b[38;5;241m=\u001b[39mpretrained_ipc,\n\u001b[0;32m 626\u001b[0m apc_params\u001b[38;5;241m=\u001b[39mapc_params, apc_grid_params\u001b[38;5;241m=\u001b[39mapc_grid_params, apc_cv\u001b[38;5;241m=\u001b[39mapc_cv, pretrained_apc\u001b[38;5;241m=\u001b[39mpretrained_apc,\n\u001b[0;32m 627\u001b[0m evaluate_models\u001b[38;5;241m=\u001b[39mevaluate_models, models_metrics\u001b[38;5;241m=\u001b[39mmodels_metrics,\n\u001b[0;32m 628\u001b[0m samples_ratio_min\u001b[38;5;241m=\u001b[39msamples_ratio_min, samples_ratio_max\u001b[38;5;241m=\u001b[39msamples_ratio_max, samples_ratio_step\u001b[38;5;241m=\u001b[39msamples_ratio_step,\n\u001b[0;32m 629\u001b[0m med3pa_metrics\u001b[38;5;241m=\u001b[39mmed3pa_metrics, mode\u001b[38;5;241m=\u001b[39mmode, use_ref_models\u001b[38;5;241m=\u001b[39muse_ref_models)\n\u001b[0;32m 631\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRunning Global Detectron Experiment:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 632\u001b[0m detectron_results \u001b[38;5;241m=\u001b[39m DetectronExperiment\u001b[38;5;241m.\u001b[39mrun(datasets\u001b[38;5;241m=\u001b[39mdatasets, training_params\u001b[38;5;241m=\u001b[39mtraining_params, base_model_manager\u001b[38;5;241m=\u001b[39mbase_model_manager,\n\u001b[0;32m 633\u001b[0m samples_size\u001b[38;5;241m=\u001b[39msamples_size, num_calibration_runs\u001b[38;5;241m=\u001b[39mnum_calibration_runs, ensemble_size\u001b[38;5;241m=\u001b[39mensemble_size,\n\u001b[0;32m 634\u001b[0m patience\u001b[38;5;241m=\u001b[39mpatience, allow_margin\u001b[38;5;241m=\u001b[39mallow_margin, margin\u001b[38;5;241m=\u001b[39mmargin)\n", + "File \u001b[1;32md:\\MED3pa-package\\MED3pa\\MED3pa\\med3pa\\experiment.py:310\u001b[0m, in \u001b[0;36mMed3paExperiment.run\u001b[1;34m(datasets_manager, base_model_manager, uncertainty_metric, ipc_type, ipc_params, ipc_grid_params, ipc_cv, pretrained_ipc, apc_params, apc_grid_params, apc_cv, pretrained_apc, samples_ratio_min, samples_ratio_max, samples_ratio_step, med3pa_metrics, evaluate_models, use_ref_models, mode, models_metrics)\u001b[0m\n\u001b[0;32m 285\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Runs the MED3PA experiment on both reference and testing sets.\u001b[39;00m\n\u001b[0;32m 286\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m 287\u001b[0m \u001b[38;5;124;03m datasets_manager (DatasetsManager): the datasets manager containing the dataset to use in the experiment.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[38;5;124;03m Med3paResults: the results of the MED3PA experiment on the reference set and testing set.\u001b[39;00m\n\u001b[0;32m 308\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 309\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRunning MED3pa Experiment on the reference set:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m--> 310\u001b[0m results_reference, ipc_config, apc_config \u001b[38;5;241m=\u001b[39m Med3paExperiment\u001b[38;5;241m.\u001b[39m_run_by_set(datasets_manager\u001b[38;5;241m=\u001b[39mdatasets_manager,\u001b[38;5;28mset\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreference\u001b[39m\u001b[38;5;124m'\u001b[39m,base_model_manager\u001b[38;5;241m=\u001b[39m base_model_manager, \n\u001b[0;32m 311\u001b[0m uncertainty_metric\u001b[38;5;241m=\u001b[39muncertainty_metric,\n\u001b[0;32m 312\u001b[0m ipc_type\u001b[38;5;241m=\u001b[39mipc_type, ipc_params\u001b[38;5;241m=\u001b[39mipc_params, ipc_grid_params\u001b[38;5;241m=\u001b[39mipc_grid_params, ipc_cv\u001b[38;5;241m=\u001b[39mipc_cv, pretrained_ipc\u001b[38;5;241m=\u001b[39mpretrained_ipc,\n\u001b[0;32m 313\u001b[0m apc_params\u001b[38;5;241m=\u001b[39mapc_params,apc_grid_params\u001b[38;5;241m=\u001b[39mapc_grid_params, apc_cv\u001b[38;5;241m=\u001b[39mapc_cv, pretrained_apc\u001b[38;5;241m=\u001b[39mpretrained_apc,\n\u001b[0;32m 314\u001b[0m samples_ratio_min\u001b[38;5;241m=\u001b[39msamples_ratio_min, samples_ratio_max\u001b[38;5;241m=\u001b[39msamples_ratio_max, samples_ratio_step\u001b[38;5;241m=\u001b[39msamples_ratio_step, \n\u001b[0;32m 315\u001b[0m med3pa_metrics\u001b[38;5;241m=\u001b[39mmed3pa_metrics, evaluate_models\u001b[38;5;241m=\u001b[39mevaluate_models, models_metrics\u001b[38;5;241m=\u001b[39mmodels_metrics, mode\u001b[38;5;241m=\u001b[39mmode)\n\u001b[0;32m 316\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRunning MED3pa Experiment on the test set:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_ref_models:\n", + "File \u001b[1;32md:\\MED3pa-package\\MED3pa\\MED3pa\\med3pa\\experiment.py:488\u001b[0m, in \u001b[0;36mMed3paExperiment._run_by_set\u001b[1;34m(datasets_manager, set, base_model_manager, uncertainty_metric, ipc_type, ipc_params, ipc_grid_params, ipc_cv, pretrained_ipc, ipc_instance, apc_params, apc_grid_params, apc_cv, apc_instance, pretrained_apc, samples_ratio_min, samples_ratio_max, samples_ratio_step, med3pa_metrics, evaluate_models, mode, models_metrics)\u001b[0m\n\u001b[0;32m 486\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAPC Model optimization complete.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 487\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m pretrained_apc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 488\u001b[0m APC_model \u001b[38;5;241m=\u001b[39m APCModel(features\u001b[38;5;241m=\u001b[39mfeatures, params\u001b[38;5;241m=\u001b[39mapc_params, pretrained_model\u001b[38;5;241m=\u001b[39mpretrained_apc)\n\u001b[0;32m 489\u001b[0m APC_model\u001b[38;5;241m.\u001b[39mtrain(x, IPC_values)\n\u001b[0;32m 490\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLoaded a pretrained APC model.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[1;32md:\\MED3pa-package\\MED3pa\\MED3pa\\med3pa\\models.py:237\u001b[0m, in \u001b[0;36mAPCModel.__init__\u001b[1;34m(self, features, params, tree_file_path, pretrained_model)\u001b[0m\n\u001b[0;32m 234\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mload_tree(tree_file_path)\n\u001b[0;32m 236\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pretrained_model:\n\u001b[1;32m--> 237\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mload_model(pretrained_model)\n", + "File \u001b[1;32md:\\MED3pa-package\\MED3pa\\MED3pa\\med3pa\\models.py:382\u001b[0m, in \u001b[0;36mAPCModel.load_model\u001b[1;34m(self, file_path)\u001b[0m\n\u001b[0;32m 375\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m(\u001b[38;5;28mself\u001b[39m, file_path: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 376\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 377\u001b[0m \u001b[38;5;124;03m Loads a pre-trained model from a pickle file.\u001b[39;00m\n\u001b[0;32m 378\u001b[0m \n\u001b[0;32m 379\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m 380\u001b[0m \u001b[38;5;124;03m file_path (str): The path to the pickle file.\u001b[39;00m\n\u001b[0;32m 381\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 382\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(file_path, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m file:\n\u001b[0;32m 383\u001b[0m loaded_model \u001b[38;5;241m=\u001b[39m pickle\u001b[38;5;241m.\u001b[39mload(file)\n\u001b[0;32m 385\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(loaded_model, DecisionTreeRegressor):\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './med3pa_experiment_results_models/apc_model.pkl'" + ] + } + ], "source": [ "import sys\n", "import os\n", @@ -402,6 +428,7 @@ " num_calibration_runs=100,\n", " patience=3,\n", " test_strategies=\"enhanced_disagreement_strategy\",\n", + " pretrained_apc='./med3pa_experiment_results_models/apc_model.pkl',\n", " allow_margin=False,\n", " margin=0.05,\n", " samples_ratio_min=0,\n", @@ -420,6 +447,7 @@ " num_calibration_runs=100,\n", " patience=3,\n", " test_strategies=\"enhanced_disagreement_strategy\",\n", + " pretrained_apc='./med3pa_experiment_results_models/apc_model.pkl',\n", " allow_margin=False,\n", " margin=0.05,\n", " samples_ratio_min=0,\n",