updated disagreements test + detectron profile

MEDomics-UdeS · Aug 18, 2024 · 3fe5faa · 3fe5faa
1 parent fa6368c
commit 3fe5faa
Show file tree

Hide file tree

Showing 6 changed files with 226 additions and 97 deletions.
diff --git a/MED3pa/detectron/experiment.py b/MED3pa/detectron/experiment.py
@@ -24,7 +24,7 @@ class DetectronResult:
     strategy_mapping = {
         'original_disagreement_strategy': OriginalDisagreementStrategy,
         'mannwhitney_strategy': MannWhitneyStrategy,
-        'enhanced_disagreement_strategy': EnhancedDisagreementStrategy
+        'enhanced_disagreement_strategy': EnhancedDisagreementStrategy,
     }
 
     def __init__(self, cal_record: DetectronRecordsManager, test_record: DetectronRecordsManager):

diff --git a/MED3pa/detectron/strategies.py b/MED3pa/detectron/strategies.py
@@ -145,19 +145,34 @@ def remove_outliers_based_on_iqr(arr1, arr2):
         # Calculate the z-scores for the test data
         z_scores = (test_counts - baseline_mean) / baseline_std
 
-        # Define thresholds for categorizing
-        def categorize_z_score(z):
-            if z <= 0:
-                return 'no significant shift'
-            elif 0 < z <= 1:
-                return 'small'
-            elif 1 < z <= 2:
-                return 'moderate'
+        def categorize_z_score(z, std):
+            # if the std is 0
+            if std == 0:
+                if z == 0:
+                    return 'no significant shift'
+                elif 0 < abs(z) <= baseline_mean * 0.1:
+                    return 'small'
+                elif baseline_mean * 0.1 < abs(z) <= baseline_mean * 0.2:
+                    return 'moderate'
+                else:
+                    return 'large'
             else:
-                return 'large'
+                if z <= 0:
+                    return 'no significant shift'
+                elif 0 < z <= 1:
+                    return 'small'
+                elif 1 < z <= 2:
+                    return 'moderate'
+                else:
+                    return 'large'
+
+        if baseline_std == 0:
+            z_scores = test_counts - baseline_mean
+        else:
+            z_scores = (test_counts - baseline_mean) / baseline_std
+
+        categories = np.array([categorize_z_score(z, baseline_std) for z in z_scores])
 
-        # Categorize each test count based on its z-score
-        categories = np.array([categorize_z_score(z) for z in z_scores.flatten()])
         # Calculate the percentage of each category
         category_counts = pd.Series(categories).value_counts(normalize=True) * 100
 
@@ -177,62 +192,6 @@ def categorize_z_score(z):
 
         return results
 
-
-class KolmogorovSmirnovStrategy(DetectronStrategy):
-    """
-    Implements a strategy to detect disagreement based on the Kolmogorov-Smirnov test, assessing the dissimilarity of results
-    from calibration runs and test runs.
-    """
-    def execute(calibration_records: DetectronRecordsManager, test_records:DetectronRecordsManager):
-        """
-        Executes the disagreement detection strategy using the Kolmogorov-Smirnov test.
-
-        Args:
-            calibration_records (DetectronRecordsManager): Manager storing calibration phase records.
-            test_records (DetectronRecordsManager): Manager storing test phase records.
-
-        Returns:
-            dict: A dictionary containing the calculated p-value, KS statistic, and a shift indicator which is True
-                  if a shift is detected at the given significance level.
-        """
-        # Retrieve count data from both calibration and test records
-        cal_counts = calibration_records.rejected_counts()
-        test_counts = test_records.rejected_counts()
-
-        # Perform the Kolmogorov-Smirnov test
-        ks_statistic, p_value = stats.ks_2samp(cal_counts, test_counts)
-
-        # Calculate statistics for interpretation
-        cal_mean = cal_counts.mean()
-        cal_std = cal_counts.std()
-        test_mean = test_counts.mean()
-        test_std = test_counts.std()
-
-        z_score = (test_mean - cal_mean) / cal_std
-        # Describe the significance of the shift based on the z-score
-        significance_description = ""
-        if z_score <= 0:
-            significance_description = "no significant shift"
-        elif abs(z_score) < 1.0:
-            significance_description = "Small"
-        elif abs(z_score) < 2.0:
-            significance_description = "Moderate"
-        elif abs(z_score) < 3.0:
-            significance_description = "Large"
-        else:
-            significance_description = "Very Large"
-        # Results dictionary including rank statistics
-        # Results dictionary including KS test results and distribution statistics
-        results = {
-            'p_value': p_value,
-            'ks_statistic': ks_statistic,
-            'z-score':z_score,
-            'shift significance' : significance_description
-        }
-
-        return results
-
-
 class EnhancedDisagreementStrategy(DetectronStrategy):
     """
     Implements a strategy to detect disagreement based on the z-score mean difference between calibration and test datasets.
@@ -300,27 +259,47 @@ def remove_outliers_based_on_iqr(arr1, arr2):
         # Calculate the test statistic (mean of test data)
         test_statistic = np.mean(test_counts)
 
-        # Calculate the z-scores for the test data
-        z_scores = (test_counts - baseline_mean) / baseline_std
-
-        # Define thresholds for categorizing
-        def categorize_z_score(z):
-            if z <= 0:
-                return 'no significant shift'
-            elif 0 < z <= 1:
-                return 'small'
-            elif 1 < z <= 2:
-                return 'moderate'
+        def categorize_z_score(z, std):
+            # if the std is 0
+            if std == 0:
+                if z == 0:
+                    return 'no significant shift'
+                elif 0 < abs(z) <= baseline_mean * 0.1:
+                    return 'small'
+                elif baseline_mean * 0.1 < abs(z) <= baseline_mean * 0.2:
+                    return 'moderate'
+                else:
+                    return 'large'
             else:
-                return 'large'
+                if z <= 0:
+                    return 'no significant shift'
+                elif 0 < z <= 1:
+                    return 'small'
+                elif 1 < z <= 2:
+                    return 'moderate'
+                else:
+                    return 'large'
+
+        if baseline_std == 0:
+            z_scores = test_counts - baseline_mean
+        else:
+            z_scores = (test_counts - baseline_mean) / baseline_std
+
+        categories = np.array([categorize_z_score(z, baseline_std) for z in z_scores])
 
-        # Categorize each test count based on its z-score
-        categories = np.array([categorize_z_score(z) for z in z_scores])
         # Calculate the percentage of each category
+
         category_counts = pd.Series(categories).value_counts(normalize=True) * 100
 
         # Calculate the one-tailed p-value (test_statistic > baseline_mean)
         p_value = np.mean(baseline_mean < test_counts)
+
+        # Pairwise comparison of each element in test_counts with each element in cal_counts
+        greater_counts = np.sum(test_counts[:, None] > cal_counts)
+        # Total number of comparisons
+        total_comparisons = len(test_counts) * len(cal_counts)
+        # Probability of elements in test_counts being greater than elements in cal_counts
+        probability = greater_counts / total_comparisons
 
         # Describe the significance of the shift based on the z-score
         significance_description = {
@@ -331,11 +310,10 @@ def categorize_z_score(z):
         }
 
         results = {
-            'shift_probability': p_value,
+            'shift_probability': probability,
             'test_statistic': test_statistic,
             'baseline_mean': baseline_mean,
             'baseline_std': baseline_std,
             'significance_description': significance_description,
         }
         return results
-
diff --git a/MED3pa/med3pa/comparaison.py b/MED3pa/med3pa/comparaison.py
@@ -202,19 +202,19 @@ def compare_profiles_detectron_results(self):
             profiles2 = json.load(f2)
 
         for samples_ratio, profiles_dict in self.shared_profiles.items():
-            combined[samples_ratio] = {}
+            combined = {}
             for profile_path_list in profiles_dict.values():
                 profile_path = " / ".join(profile_path_list)  # Convert the list to a string
 
                 # Attempt to find matching profiles in both profiles1 and profiles2
                 matching_profile_1 = next((p for p in profiles1[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None)
                 matching_profile_2 = next((p for p in profiles2[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None)
 
-                if profile_path not in combined[samples_ratio]:
-                    combined[samples_ratio][profile_path] = {}
+                if profile_path not in combined:
+                    combined[profile_path] = {}
 
-                combined[samples_ratio][profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None
-                combined[samples_ratio][profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None
+                combined[profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None
+                combined[profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None
 
         self.profiles_detectron_comparaison = combined
 

diff --git a/MED3pa/med3pa/mdr.py b/MED3pa/med3pa/mdr.py
@@ -146,9 +146,9 @@ def _filter_by_profile(dataset : MaskedDataset, path : List, features: list, min
         # Filter the data according to the path mask
         filtered_x = x[mask]
         filtered_y_true = y_true[mask]
-        filtered_prob = predicted_prob[mask]
-        filtered_y_pred = y_pred[mask]
-        filtered_confidence_scores = confidence_scores[mask]
+        filtered_prob = predicted_prob[mask] if predicted_prob is not None else None
+        filtered_y_pred = y_pred[mask] if y_pred is not None else None
+        filtered_confidence_scores = confidence_scores[mask] if confidence_scores is not None else None
 
         # filter once again according to the min_confidence_level if specified
         if min_confidence_level is not None:
@@ -401,7 +401,7 @@ def calc_metrics_by_profiles(profiles_manager, dataset : MaskedDataset, features
                     profile.update_node_information(info_dict)
 
     @staticmethod
-    def detectron_by_profiles(datasets: DatasetsManager,
+    def detectron_by_profiles_deprecated(datasets: DatasetsManager,
                               profiles_manager: ProfilesManager,
                               confidence_scores: np.ndarray,
                               training_params: Dict,
@@ -512,3 +512,126 @@ def detectron_by_profiles(datasets: DatasetsManager,
 
         return profiles_by_dr
 
+    def _filter_with_fallback(dataset, profile, features, min_confidence_level):
+        # Initial attempt to filter with the full profile path
+        q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(dataset, path=profile.path, features=features, min_confidence_level=min_confidence_level)
+
+        # If the result is empty, start reducing conditions
+        current_path = profile.path.copy()
+        while len(q_y_true) == 0 and len(current_path) >= 1:
+            # Remove the last condition
+            current_path.pop()
+
+            # Attempt to filter with the reduced path
+            q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(dataset, path=current_path, features=features, min_confidence_level=min_confidence_level)
+
+        return q_x, q_y_true
+
+    def detectron_by_profiles(datasets: DatasetsManager,
+                              profiles_manager: ProfilesManager,
+                              confidence_scores: np.ndarray,
+                              training_params: Dict,
+                              base_model_manager: BaseModelManager,
+                              strategies: Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]],
+                              samples_size: int = 20,
+                              ensemble_size: int = 10,
+                              num_calibration_runs: int = 100,
+                              patience: int = 3,
+                              allow_margin: bool = False, 
+                              margin: float = 0.05, 
+                              all_dr: bool = True) -> Dict:
+
+        """Runs the Detectron method on the different testing set profiles.
+
+        Args:
+            datasets (DatasetsManager): The datasets manager instance.
+            profiles_manager (ProfilesManager): the manager containing the profiles of the testing set.
+            training_params (dict): Parameters for training the models.
+            base_model_manager (BaseModelManager): The base model manager instance.
+            testing_mpc_values (np.ndarray): MPC values for the testing data.
+            reference_mpc_values (np.ndarray): MPC values for the reference data.
+            samples_size (int, optional): Sample size for the Detectron experiment, by default 20.
+            ensemble_size (int, optional): Number of models in the ensemble, by default 10.
+            num_calibration_runs (int, optional): Number of calibration runs, by default 100.
+            patience (int, optional): Patience for early stopping, by default 3.
+            strategies (Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]]): The strategies for testing disagreement.
+            allow_margin (bool, optional): Whether to allow a margin in the test, by default False.
+            margin (float, optional): Margin value for the test, by default 0.05.
+            all_dr (bool, optional): Whether to run for all declaration rates, by default False.
+
+        Returns:
+            Dict: Dictionary of med3pa profiles with detectron results.
+        """
+        min_positive_ratio = min([k for k in profiles_manager.profiles_records.keys() if k >= 0])
+        test_dataset = datasets.get_dataset_by_type('testing', True)
+        reference_dataset = datasets.get_dataset_by_type('reference', True)
+        test_dataset.set_confidence_scores(confidence_scores=confidence_scores)
+        profiles_by_dr = profiles_manager.get_profiles(min_samples_ratio=min_positive_ratio)
+        last_min_confidence_level = -1   
+        features = datasets.get_column_labels()
+        for dr, profiles in profiles_by_dr.items():
+            if not all_dr and dr != 100:
+                continue  # Skip all dr values except the first one if all_dr is False
+
+            experiment_det = None
+            min_confidence_level = MDRCalculator._get_min_confidence_score(dr, confidence_scores)
+            if last_min_confidence_level != min_confidence_level:
+                for profile in profiles:
+                    detectron_results_dict = {}
+
+                    q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(test_dataset, path=profile.path, features=features, min_confidence_level=min_confidence_level)
+                    p_x_profile, p_y_true_profile = MDRCalculator._filter_with_fallback(reference_dataset, profile=profile, features=features, min_confidence_level=None)
+                    if len(p_y_true_profile)==0:
+                        p_x, p_y_true = datasets.get_dataset_by_type("reference")
+                    else:
+                        p_x = p_x_profile
+                        p_y_true = p_y_true_profile
+
+                    if len(q_y_true) != 0:
+                        if len(q_y_true) < samples_size or len(p_y_true) < samples_size: 
+                            detectron_results_dict['Executed'] = "Not enough samples"
+                            detectron_results_dict['Tested Profile size'] = len(q_y_true)
+                            detectron_results_dict['Tests Results'] = None         
+
+                        else:
+                            profile_set = DatasetsManager()
+                            profile_set.set_column_labels(datasets.get_column_labels())
+                            profile_set.set_from_data(dataset_type="testing", observations=q_x, true_labels=q_y_true)
+                            profile_set.set_from_data(dataset_type="reference", observations=p_x, true_labels=p_y_true)
+                            profile_set.set_from_data(dataset_type="training", 
+                                                      observations=datasets.get_dataset_by_type(dataset_type="training", return_instance=True).get_observations(),
+                                                      true_labels=datasets.get_dataset_by_type(dataset_type="training", return_instance=True).get_true_labels())
+                            profile_set.set_from_data(dataset_type="validation", 
+                                                      observations=datasets.get_dataset_by_type(dataset_type="validation", return_instance=True).get_observations(),
+                                                      true_labels=datasets.get_dataset_by_type(dataset_type="validation", return_instance=True).get_true_labels())
+
+                            path_description = "*, " + " & ".join(profile.path[1:])
+                            print("Running Detectron on Profile:", path_description)
+
+                            experiment_det= DetectronExperiment.run(
+                            datasets=profile_set, training_params=training_params, base_model_manager=base_model_manager,
+                            samples_size=samples_size, num_calibration_runs=num_calibration_runs, ensemble_size=ensemble_size,
+                            patience=patience, allow_margin=allow_margin, margin=margin)
+
+
+                            detectron_results = experiment_det.analyze_results(strategies=strategies)
+                            detectron_results_dict['Executed'] = "Yes"
+                            detectron_results_dict['Tested Profile size'] = len(q_y_true)
+                            detectron_results_dict['Tests Results'] = detectron_results
+
+                    else:
+                        detectron_results_dict['Executed'] = "Empty profile in test data"
+                        detectron_results_dict['Tested Profile size'] = len(q_y_true)
+                        detectron_results_dict['Tests Results'] = None
+
+
+                    profile.update_detectron_results(detectron_results_dict)
+
+                last_profiles = profiles
+                last_min_confidence_level = min_confidence_level
+            else:
+                profiles = last_profiles
+
+        return profiles_by_dr
+
+
diff --git a/setup.py b/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name="MED3pa",
-    version="0.1.31",
+    version="0.1.32",
     author="MEDomics consortium",
     author_email="medomics.info@gmail.com",
     description="Python Open-source package for ensuring robust and reliable ML models deployments",