Skip to content

Commit

Permalink
updated disagreements test + detectron profile
Browse files Browse the repository at this point in the history
  • Loading branch information
lyna1404 committed Aug 18, 2024
1 parent fa6368c commit 3fe5faa
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 97 deletions.
2 changes: 1 addition & 1 deletion MED3pa/detectron/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DetectronResult:
strategy_mapping = {
'original_disagreement_strategy': OriginalDisagreementStrategy,
'mannwhitney_strategy': MannWhitneyStrategy,
'enhanced_disagreement_strategy': EnhancedDisagreementStrategy
'enhanced_disagreement_strategy': EnhancedDisagreementStrategy,
}

def __init__(self, cal_record: DetectronRecordsManager, test_record: DetectronRecordsManager):
Expand Down
144 changes: 61 additions & 83 deletions MED3pa/detectron/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,19 +145,34 @@ def remove_outliers_based_on_iqr(arr1, arr2):
# Calculate the z-scores for the test data
z_scores = (test_counts - baseline_mean) / baseline_std

# Define thresholds for categorizing
def categorize_z_score(z):
if z <= 0:
return 'no significant shift'
elif 0 < z <= 1:
return 'small'
elif 1 < z <= 2:
return 'moderate'
def categorize_z_score(z, std):
# if the std is 0
if std == 0:
if z == 0:
return 'no significant shift'
elif 0 < abs(z) <= baseline_mean * 0.1:
return 'small'
elif baseline_mean * 0.1 < abs(z) <= baseline_mean * 0.2:
return 'moderate'
else:
return 'large'
else:
return 'large'
if z <= 0:
return 'no significant shift'
elif 0 < z <= 1:
return 'small'
elif 1 < z <= 2:
return 'moderate'
else:
return 'large'

if baseline_std == 0:
z_scores = test_counts - baseline_mean
else:
z_scores = (test_counts - baseline_mean) / baseline_std

categories = np.array([categorize_z_score(z, baseline_std) for z in z_scores])

# Categorize each test count based on its z-score
categories = np.array([categorize_z_score(z) for z in z_scores.flatten()])
# Calculate the percentage of each category
category_counts = pd.Series(categories).value_counts(normalize=True) * 100

Expand All @@ -177,62 +192,6 @@ def categorize_z_score(z):

return results


class KolmogorovSmirnovStrategy(DetectronStrategy):
"""
Implements a strategy to detect disagreement based on the Kolmogorov-Smirnov test, assessing the dissimilarity of results
from calibration runs and test runs.
"""
def execute(calibration_records: DetectronRecordsManager, test_records:DetectronRecordsManager):
"""
Executes the disagreement detection strategy using the Kolmogorov-Smirnov test.
Args:
calibration_records (DetectronRecordsManager): Manager storing calibration phase records.
test_records (DetectronRecordsManager): Manager storing test phase records.
Returns:
dict: A dictionary containing the calculated p-value, KS statistic, and a shift indicator which is True
if a shift is detected at the given significance level.
"""
# Retrieve count data from both calibration and test records
cal_counts = calibration_records.rejected_counts()
test_counts = test_records.rejected_counts()

# Perform the Kolmogorov-Smirnov test
ks_statistic, p_value = stats.ks_2samp(cal_counts, test_counts)

# Calculate statistics for interpretation
cal_mean = cal_counts.mean()
cal_std = cal_counts.std()
test_mean = test_counts.mean()
test_std = test_counts.std()

z_score = (test_mean - cal_mean) / cal_std
# Describe the significance of the shift based on the z-score
significance_description = ""
if z_score <= 0:
significance_description = "no significant shift"
elif abs(z_score) < 1.0:
significance_description = "Small"
elif abs(z_score) < 2.0:
significance_description = "Moderate"
elif abs(z_score) < 3.0:
significance_description = "Large"
else:
significance_description = "Very Large"
# Results dictionary including rank statistics
# Results dictionary including KS test results and distribution statistics
results = {
'p_value': p_value,
'ks_statistic': ks_statistic,
'z-score':z_score,
'shift significance' : significance_description
}

return results


class EnhancedDisagreementStrategy(DetectronStrategy):
"""
Implements a strategy to detect disagreement based on the z-score mean difference between calibration and test datasets.
Expand Down Expand Up @@ -300,27 +259,47 @@ def remove_outliers_based_on_iqr(arr1, arr2):
# Calculate the test statistic (mean of test data)
test_statistic = np.mean(test_counts)

# Calculate the z-scores for the test data
z_scores = (test_counts - baseline_mean) / baseline_std

# Define thresholds for categorizing
def categorize_z_score(z):
if z <= 0:
return 'no significant shift'
elif 0 < z <= 1:
return 'small'
elif 1 < z <= 2:
return 'moderate'
def categorize_z_score(z, std):
# if the std is 0
if std == 0:
if z == 0:
return 'no significant shift'
elif 0 < abs(z) <= baseline_mean * 0.1:
return 'small'
elif baseline_mean * 0.1 < abs(z) <= baseline_mean * 0.2:
return 'moderate'
else:
return 'large'
else:
return 'large'
if z <= 0:
return 'no significant shift'
elif 0 < z <= 1:
return 'small'
elif 1 < z <= 2:
return 'moderate'
else:
return 'large'

if baseline_std == 0:
z_scores = test_counts - baseline_mean
else:
z_scores = (test_counts - baseline_mean) / baseline_std

categories = np.array([categorize_z_score(z, baseline_std) for z in z_scores])

# Categorize each test count based on its z-score
categories = np.array([categorize_z_score(z) for z in z_scores])
# Calculate the percentage of each category

category_counts = pd.Series(categories).value_counts(normalize=True) * 100

# Calculate the one-tailed p-value (test_statistic > baseline_mean)
p_value = np.mean(baseline_mean < test_counts)

# Pairwise comparison of each element in test_counts with each element in cal_counts
greater_counts = np.sum(test_counts[:, None] > cal_counts)
# Total number of comparisons
total_comparisons = len(test_counts) * len(cal_counts)
# Probability of elements in test_counts being greater than elements in cal_counts
probability = greater_counts / total_comparisons

# Describe the significance of the shift based on the z-score
significance_description = {
Expand All @@ -331,11 +310,10 @@ def categorize_z_score(z):
}

results = {
'shift_probability': p_value,
'shift_probability': probability,
'test_statistic': test_statistic,
'baseline_mean': baseline_mean,
'baseline_std': baseline_std,
'significance_description': significance_description,
}
return results

10 changes: 5 additions & 5 deletions MED3pa/med3pa/comparaison.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,19 +202,19 @@ def compare_profiles_detectron_results(self):
profiles2 = json.load(f2)

for samples_ratio, profiles_dict in self.shared_profiles.items():
combined[samples_ratio] = {}
combined = {}
for profile_path_list in profiles_dict.values():
profile_path = " / ".join(profile_path_list) # Convert the list to a string

# Attempt to find matching profiles in both profiles1 and profiles2
matching_profile_1 = next((p for p in profiles1[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None)
matching_profile_2 = next((p for p in profiles2[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None)

if profile_path not in combined[samples_ratio]:
combined[samples_ratio][profile_path] = {}
if profile_path not in combined:
combined[profile_path] = {}

combined[samples_ratio][profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None
combined[samples_ratio][profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None
combined[profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None
combined[profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None

self.profiles_detectron_comparaison = combined

Expand Down
131 changes: 127 additions & 4 deletions MED3pa/med3pa/mdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ def _filter_by_profile(dataset : MaskedDataset, path : List, features: list, min
# Filter the data according to the path mask
filtered_x = x[mask]
filtered_y_true = y_true[mask]
filtered_prob = predicted_prob[mask]
filtered_y_pred = y_pred[mask]
filtered_confidence_scores = confidence_scores[mask]
filtered_prob = predicted_prob[mask] if predicted_prob is not None else None
filtered_y_pred = y_pred[mask] if y_pred is not None else None
filtered_confidence_scores = confidence_scores[mask] if confidence_scores is not None else None

# filter once again according to the min_confidence_level if specified
if min_confidence_level is not None:
Expand Down Expand Up @@ -401,7 +401,7 @@ def calc_metrics_by_profiles(profiles_manager, dataset : MaskedDataset, features
profile.update_node_information(info_dict)

@staticmethod
def detectron_by_profiles(datasets: DatasetsManager,
def detectron_by_profiles_deprecated(datasets: DatasetsManager,
profiles_manager: ProfilesManager,
confidence_scores: np.ndarray,
training_params: Dict,
Expand Down Expand Up @@ -512,3 +512,126 @@ def detectron_by_profiles(datasets: DatasetsManager,

return profiles_by_dr

def _filter_with_fallback(dataset, profile, features, min_confidence_level):
# Initial attempt to filter with the full profile path
q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(dataset, path=profile.path, features=features, min_confidence_level=min_confidence_level)

# If the result is empty, start reducing conditions
current_path = profile.path.copy()
while len(q_y_true) == 0 and len(current_path) >= 1:
# Remove the last condition
current_path.pop()

# Attempt to filter with the reduced path
q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(dataset, path=current_path, features=features, min_confidence_level=min_confidence_level)

return q_x, q_y_true

def detectron_by_profiles(datasets: DatasetsManager,
profiles_manager: ProfilesManager,
confidence_scores: np.ndarray,
training_params: Dict,
base_model_manager: BaseModelManager,
strategies: Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]],
samples_size: int = 20,
ensemble_size: int = 10,
num_calibration_runs: int = 100,
patience: int = 3,
allow_margin: bool = False,
margin: float = 0.05,
all_dr: bool = True) -> Dict:

"""Runs the Detectron method on the different testing set profiles.
Args:
datasets (DatasetsManager): The datasets manager instance.
profiles_manager (ProfilesManager): the manager containing the profiles of the testing set.
training_params (dict): Parameters for training the models.
base_model_manager (BaseModelManager): The base model manager instance.
testing_mpc_values (np.ndarray): MPC values for the testing data.
reference_mpc_values (np.ndarray): MPC values for the reference data.
samples_size (int, optional): Sample size for the Detectron experiment, by default 20.
ensemble_size (int, optional): Number of models in the ensemble, by default 10.
num_calibration_runs (int, optional): Number of calibration runs, by default 100.
patience (int, optional): Patience for early stopping, by default 3.
strategies (Union[Type[DetectronStrategy], List[Type[DetectronStrategy]]]): The strategies for testing disagreement.
allow_margin (bool, optional): Whether to allow a margin in the test, by default False.
margin (float, optional): Margin value for the test, by default 0.05.
all_dr (bool, optional): Whether to run for all declaration rates, by default False.
Returns:
Dict: Dictionary of med3pa profiles with detectron results.
"""
min_positive_ratio = min([k for k in profiles_manager.profiles_records.keys() if k >= 0])
test_dataset = datasets.get_dataset_by_type('testing', True)
reference_dataset = datasets.get_dataset_by_type('reference', True)
test_dataset.set_confidence_scores(confidence_scores=confidence_scores)
profiles_by_dr = profiles_manager.get_profiles(min_samples_ratio=min_positive_ratio)
last_min_confidence_level = -1
features = datasets.get_column_labels()
for dr, profiles in profiles_by_dr.items():
if not all_dr and dr != 100:
continue # Skip all dr values except the first one if all_dr is False

experiment_det = None
min_confidence_level = MDRCalculator._get_min_confidence_score(dr, confidence_scores)
if last_min_confidence_level != min_confidence_level:
for profile in profiles:
detectron_results_dict = {}

q_x, q_y_true, _, _, _ = MDRCalculator._filter_by_profile(test_dataset, path=profile.path, features=features, min_confidence_level=min_confidence_level)
p_x_profile, p_y_true_profile = MDRCalculator._filter_with_fallback(reference_dataset, profile=profile, features=features, min_confidence_level=None)
if len(p_y_true_profile)==0:
p_x, p_y_true = datasets.get_dataset_by_type("reference")
else:
p_x = p_x_profile
p_y_true = p_y_true_profile

if len(q_y_true) != 0:
if len(q_y_true) < samples_size or len(p_y_true) < samples_size:
detectron_results_dict['Executed'] = "Not enough samples"
detectron_results_dict['Tested Profile size'] = len(q_y_true)
detectron_results_dict['Tests Results'] = None

else:
profile_set = DatasetsManager()
profile_set.set_column_labels(datasets.get_column_labels())
profile_set.set_from_data(dataset_type="testing", observations=q_x, true_labels=q_y_true)
profile_set.set_from_data(dataset_type="reference", observations=p_x, true_labels=p_y_true)
profile_set.set_from_data(dataset_type="training",
observations=datasets.get_dataset_by_type(dataset_type="training", return_instance=True).get_observations(),
true_labels=datasets.get_dataset_by_type(dataset_type="training", return_instance=True).get_true_labels())
profile_set.set_from_data(dataset_type="validation",
observations=datasets.get_dataset_by_type(dataset_type="validation", return_instance=True).get_observations(),
true_labels=datasets.get_dataset_by_type(dataset_type="validation", return_instance=True).get_true_labels())

path_description = "*, " + " & ".join(profile.path[1:])
print("Running Detectron on Profile:", path_description)

experiment_det= DetectronExperiment.run(
datasets=profile_set, training_params=training_params, base_model_manager=base_model_manager,
samples_size=samples_size, num_calibration_runs=num_calibration_runs, ensemble_size=ensemble_size,
patience=patience, allow_margin=allow_margin, margin=margin)


detectron_results = experiment_det.analyze_results(strategies=strategies)
detectron_results_dict['Executed'] = "Yes"
detectron_results_dict['Tested Profile size'] = len(q_y_true)
detectron_results_dict['Tests Results'] = detectron_results

else:
detectron_results_dict['Executed'] = "Empty profile in test data"
detectron_results_dict['Tested Profile size'] = len(q_y_true)
detectron_results_dict['Tests Results'] = None


profile.update_detectron_results(detectron_results_dict)

last_profiles = profiles
last_min_confidence_level = min_confidence_level
else:
profiles = last_profiles

return profiles_by_dr


2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

setup(
name="MED3pa",
version="0.1.31",
version="0.1.32",
author="MEDomics consortium",
author_email="medomics.info@gmail.com",
description="Python Open-source package for ensuring robust and reliable ML models deployments",
Expand Down
Loading

0 comments on commit 3fe5faa

Please sign in to comment.