diff --git a/MED3pa/med3pa/comparaison.py b/MED3pa/med3pa/comparaison.py index a5d5748..faa1ef0 100644 --- a/MED3pa/med3pa/comparaison.py +++ b/MED3pa/med3pa/comparaison.py @@ -24,12 +24,42 @@ def __init__(self, results1_path: str, results2_path: str) -> None: self.profiles_detectron_comparaison = {} self.global_metrics_comparaison = {} self.models_evaluation_comparaison = {} + self.shared_profiles = {} # New variable to store shared profiles self.config_file = {} - self.compare_profiles = False + self.compare_profiles = True self.compare_detectron = False self.mode = "" self._check_experiment_name() + def identify_shared_profiles(self): + """ + Identifies the shared profiles between the two experiments and stores them in shared_profiles. + """ + profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json') + profiles_file_2 = os.path.join(self.results2_path, 'test', 'profiles.json') + + with open(profiles_file_1, 'r') as f1, open(profiles_file_2, 'r') as f2: + profiles1 = json.load(f1) + profiles2 = json.load(f2) + + shared = {} + + for samples_ratio, dr_dict in profiles1.items(): + if samples_ratio in profiles2: # Only proceed if samples_ratio is in both profiles + if samples_ratio not in shared: + shared[samples_ratio] = {} + for dr, profiles in dr_dict.items(): + if dr in profiles2[samples_ratio]: # Only proceed if dr is in both profiles + for profile in profiles: + profile_path = " / ".join(profile["path"]) + # Check if the profile_path exists in both profiles1 and profiles2 + matching_profile = next((p for p in profiles2[samples_ratio][dr] if p["path"] == profile["path"]), None) + if matching_profile: + if profile_path not in shared[samples_ratio]: + shared[samples_ratio][profile_path] = profile["path"] + + self.shared_profiles = shared # Store shared profiles + def _check_experiment_name(self) -> None: """ Checks if the experiment_name in the config_file of both results paths is the same. @@ -69,10 +99,25 @@ def is_comparable(self) -> bool: base_model_different = self.config_file['base_model']['different'] if self.compare_detectron: - params_different = self.config_file['med3pa_detectron_params']['different'] + # Extract med3pa_detectron_params for comparison, excluding apc_model and ipc_model + params1 = self.config_file['med3pa_detectron_params']['med3pa_detectron_params1'].copy() + params2 = self.config_file['med3pa_detectron_params']['med3pa_detectron_params2'].copy() + # Remove apc_model and ipc_model from comparison + params1['med3pa_params'].pop('apc_model', None) + params1['med3pa_params'].pop('ipc_model', None) + params2['med3pa_params'].pop('apc_model', None) + params2['med3pa_params'].pop('ipc_model', None) else: - params_different = self.config_file['med3pa_params']['different'] + # Extract med3pa_params for comparison, excluding apc_model and ipc_model + params1 = self.config_file['med3pa_params']['med3pa_params1'].copy() + params2 = self.config_file['med3pa_params']['med3pa_params2'].copy() + params1.pop('apc_model', None) + params1.pop('ipc_model', None) + params2.pop('apc_model', None) + params2.pop('ipc_model', None) + + params_different = (params1 != params2) # Check the conditions for comparability can_compare = False if datasets_different and not base_model_different and not params_different: @@ -105,7 +150,8 @@ def _check_experiment_tree(self) -> None: def compare_profiles_metrics(self): """ - Compares profile metrics between two sets of results and stores them in a dictionary. + Compares profile metrics between two sets of results and stores them in a dictionary, + using only the shared profiles. """ combined = {} profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json') @@ -115,37 +161,36 @@ def compare_profiles_metrics(self): profiles1 = json.load(f1) profiles2 = json.load(f2) - for samples_ratio, dr_dict in profiles1.items(): - if samples_ratio not in combined: - combined[samples_ratio] = {} - for dr, profiles in dr_dict.items(): - for profile in profiles: - profile_path = " / ".join(profile["path"]) - if profile_path not in combined[samples_ratio]: - combined[samples_ratio][profile_path] = {} - if dr not in combined[samples_ratio][profile_path]: - combined[samples_ratio][profile_path][dr] = {} - combined[samples_ratio][profile_path][dr]['metrics_1'] = profile["metrics"] - - for samples_ratio, dr_dict in profiles2.items(): - if samples_ratio not in combined: - combined[samples_ratio] = {} - for dr, profiles in dr_dict.items(): - for profile in profiles: - profile_path = " / ".join(profile["path"]) + for samples_ratio, profiles_dict in self.shared_profiles.items(): + combined[samples_ratio] = {} + for profile_path_list in profiles_dict.values(): + profile_path = " / ".join(profile_path_list) # Convert the list to a string + + # Extract possible drs (decision rules) where profiles match in profiles1 + drs = [dr for dr, profiles in profiles1[samples_ratio].items()] + for dr in drs: + # Attempt to find matching profiles in both profiles1 and profiles2 + matching_profile_1 = next((p for p in profiles1[samples_ratio][dr] if " / ".join(p["path"]) == profile_path), None) + matching_profile_2 = next((p for p in profiles2[samples_ratio][dr] if " / ".join(p["path"]) == profile_path), None) + if profile_path not in combined[samples_ratio]: combined[samples_ratio][profile_path] = {} - if dr not in combined[samples_ratio][profile_path]: - combined[samples_ratio][profile_path][dr] = {} - combined[samples_ratio][profile_path][dr]['metrics_2'] = profile["metrics"] + + combined[samples_ratio][profile_path][dr] = { + 'metrics_1': matching_profile_1["metrics"] if matching_profile_1 else None, + 'metrics_2': matching_profile_2["metrics"] if matching_profile_2 else None + } self.profiles_metrics_comparaison = combined - + + def compare_profiles_detectron_results(self): """ - Compares Detectron results between two sets of profiles and stores them in a dictionary. + Compares Detectron results between two sets of profiles and stores them in a dictionary, + using only the shared profiles. """ combined = {} + profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json') profiles_file_2 = os.path.join(self.results2_path, 'test', 'profiles.json') @@ -153,28 +198,24 @@ def compare_profiles_detectron_results(self): profiles1 = json.load(f1) profiles2 = json.load(f2) - # Determine the smallest positive samples_ratio - smallest_samples_ratio = min([int(k) for k in profiles1.keys() if int(k) >= 0]) - smallest_samples_ratio = str(smallest_samples_ratio) + for samples_ratio, profiles_dict in self.shared_profiles.items(): + combined[samples_ratio] = {} + for profile_path_list in profiles_dict.values(): + profile_path = " / ".join(profile_path_list) # Convert the list to a string - for profiles, key in zip([profiles1, profiles2], ['detectron_results_1', 'detectron_results_2']): - if smallest_samples_ratio not in profiles: - continue + # Attempt to find matching profiles in both profiles1 and profiles2 + matching_profile_1 = next((p for p in profiles1[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None) + matching_profile_2 = next((p for p in profiles2[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None) - dr_dict = profiles[smallest_samples_ratio] + if profile_path not in combined[samples_ratio]: + combined[samples_ratio][profile_path] = {} - if "100" not in dr_dict: - continue - - for profile in dr_dict["100"]: - profile_path = " / ".join(profile["path"]) - if profile_path not in combined: - combined[profile_path] = {} - - combined[profile_path][key] = profile["detectron_results"] + combined[samples_ratio][profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None + combined[samples_ratio][profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None self.profiles_detectron_comparaison = combined + def compare_global_metrics(self): """ Compares global metrics between two sets of results and stores them in a dictionary. @@ -288,9 +329,8 @@ def compare_experiments(self): raise ValueError("The two experiments cannot be compared based on the provided criteria.") self.compare_global_metrics() - + self.identify_shared_profiles() # Identify shared profiles before comparisons if self.mode in ['apc', 'mpc']: - self._check_experiment_tree() if self.compare_profiles: self.compare_profiles_metrics() if self.compare_detectron: diff --git a/MED3pa/med3pa/experiment.py b/MED3pa/med3pa/experiment.py index af91d68..23ab732 100644 --- a/MED3pa/med3pa/experiment.py +++ b/MED3pa/med3pa/experiment.py @@ -491,7 +491,7 @@ def _run_by_set(datasets_manager: DatasetsManager, else: APC_model = apc_instance print("Used a trainde IPC instance.") - + # Predict APC values APC_values = APC_model.predict(x) print("Aggregated confidence scores calculated.") @@ -527,7 +527,7 @@ def _run_by_set(datasets_manager: DatasetsManager, for samples_ratio in range(samples_ratio_min, samples_ratio_max + 1, samples_ratio_step): # Calculate profiles and their metrics by declaration rate - MDRCalculator.calc_profiles(profiles_manager, tree, mpc_dataset, features, MPC_values, samples_ratio) + MDRCalculator.calc_profiles(profiles_manager, tree, mpc_dataset, features, MPC_values, samples_ratio) MDRCalculator.calc_metrics_by_profiles(profiles_manager, mpc_dataset, features, MPC_values, samples_ratio, med3pa_metrics) results.set_profiles_manager(profiles_manager) print("Results extracted for minimum_samples_ratio = ", samples_ratio) diff --git a/MED3pa/med3pa/mdr.py b/MED3pa/med3pa/mdr.py index 0849216..5cfbe80 100644 --- a/MED3pa/med3pa/mdr.py +++ b/MED3pa/med3pa/mdr.py @@ -304,7 +304,7 @@ def calc_profiles(profiles_manager: ProfilesManager, tree: TreeRepresentation, d lost_profiles_all = [] # Saves lost profiles last_min_confidence_level = -1 # Last min confidence level min_confidence_levels_dict = {} # Saves the min_confidence_level thresholds - + precision = 14 # Go through all declaration rates for dr in range(100, -1, -1): @@ -329,10 +329,11 @@ def calc_profiles(profiles_manager: ProfilesManager, tree: TreeRepresentation, d # calculate the samples_ratio (pop%) and mean_confidence_level of this node, if the filtered data isnt empty if len(filtered_confidence_scores) > 0: samples_ratio = len(filtered_confidence_scores) / len(confidence_scores) * 100 - mean_cconfidence = np.mean(filtered_confidence_scores) if filtered_confidence_scores.size > 0 else 0 + mean_cconfidence = np.mean(filtered_confidence_scores) # if the calculated samples_ratio and mean_confidence meet the conditions, keep this node - if samples_ratio >= min_samples_ratio and mean_cconfidence >= min_confidence_level: + if samples_ratio >= min_samples_ratio and round(mean_cconfidence, precision) >= round(min_confidence_level, precision): profiles_current.append(node) + # If the last profiles are different from current profiles diff --git a/MED3pa/med3pa/models.py b/MED3pa/med3pa/models.py index 9782dc2..d29d927 100644 --- a/MED3pa/med3pa/models.py +++ b/MED3pa/med3pa/models.py @@ -282,7 +282,7 @@ def print_decision_tree_structure(tree_model, feature_names=None): """ tree_rules = export_text(tree_model, feature_names=feature_names) print(tree_rules) - + def optimize(self, param_grid: dict, cv: int, x: np.ndarray, error_prob: np.ndarray, sample_weight: np.ndarray = None) -> None: """ Optimizes the model parameters using GridSearchCV. @@ -303,7 +303,7 @@ def optimize(self, param_grid: dict, cv: int, x: np.ndarray, error_prob: np.ndar self.params.update(grid_search.best_params_) self.grid_search_params = param_grid df_X, df_y, df_w = self.dataPreparationStrategy.execute(column_labels=self.features, observations=x, labels=error_prob) - self.treeRepresentation.build_tree(self.model, df_X, error_prob, node_id=0) + self.treeRepresentation.head = self.treeRepresentation.build_tree(self.model, df_X, error_prob, node_id=0) self.optimized = True diff --git a/MED3pa/med3pa/tree.py b/MED3pa/med3pa/tree.py index e5c36a2..9633e21 100644 --- a/MED3pa/med3pa/tree.py +++ b/MED3pa/med3pa/tree.py @@ -84,7 +84,7 @@ def build_tree(self, dtr: DecisionTreeRegressorModel, X: DataFrame, y: Series, n node_thresh = dtr.model.tree_.threshold[node_id] node_feature_id = dtr.model.tree_.feature[node_id] node_feature = self.features[node_feature_id] - + # Check if the split would result in an empty set, if so, stop the recursion if y[X[node_feature] <= node_thresh].size == 0 or y[X[node_feature] > node_thresh].size == 0: print("split would results in an empty data section") diff --git a/setup.py b/setup.py deleted file mode 100644 index 459edb0..0000000 --- a/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -from setuptools import setup, find_packages - -with open("README.md", encoding='utf-8') as f: - long_description = f.read() - -with open('requirements.txt') as f: - requirements = f.readlines() - -setup( - name="MED3pa", - version="0.1.26", - author="MEDomics consortium", - author_email="medomics.info@gmail.com", - description="Python Open-source package for ensuring robust and reliable ML models deployments", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/lyna1404/MED3pa", - project_urls={ - 'Documentation': 'https://med3pa.readthedocs.io/en/latest/', - 'Github': 'https://github.com/lyna1404/MED3pa' - }, - packages=find_packages(exclude=['docs', 'tests', 'experiments']), - python_requires='>=3.9', - install_requires=requirements, -) diff --git a/tutorials/detectron_experiment_comparaison/detectron_results_comparaison.json b/tutorials/detectron_experiment_comparaison/detectron_results_comparaison.json deleted file mode 100644 index b79f0f0..0000000 --- a/tutorials/detectron_experiment_comparaison/detectron_results_comparaison.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "enhanced_disagreement_strategy": { - "detectron_results1": 0.4421052631578947, - "detectron_results2": 0.8421052631578947, - "comparison_criteria": "shift_probability", - "best": "detectron_results1" - }, - "mannwhitney_strategy": { - "detectron_results1": 0.09145056471994106, - "detectron_results2": 1.95388272998725e-18, - "comparison_criteria": "p_value", - "best": "detectron_results1" - }, - "original_disagreement_strategy": { - "detectron_results1": 0.04, - "detectron_results2": 0.05, - "comparison_criteria": "p_value", - "best": "detectron_results2" - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_comparaison/experiment_config_comparaison.json b/tutorials/detectron_experiment_comparaison/experiment_config_comparaison.json deleted file mode 100644 index 3264401..0000000 --- a/tutorials/detectron_experiment_comparaison/experiment_config_comparaison.json +++ /dev/null @@ -1,255 +0,0 @@ -{ - "datasets": { - "different": true, - "datasets1": { - "training_set": { - "file_path": "./data/train_data.csv", - "num_samples": 537, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "validation_set": { - "file_path": "./data/val_data.csv", - "num_samples": 115, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "reference_set": { - "file_path": "./data/test_data.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "testing_set": { - "file_path": "./data/test_data_shifted_0.1.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "column_labels": [ - "Pregnancies", - "Glucose", - "BloodPressure", - "SkinThickness", - "Insulin", - "BMI", - "DiabetesPedigreeFunction", - "Age" - ] - }, - "datasets2": { - "training_set": { - "file_path": "./data/train_data.csv", - "num_samples": 537, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "validation_set": { - "file_path": "./data/val_data.csv", - "num_samples": 115, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "reference_set": { - "file_path": "./data/test_data.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "testing_set": { - "file_path": "./data/test_data_shifted_1.6.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "column_labels": [ - "Pregnancies", - "Glucose", - "BloodPressure", - "SkinThickness", - "Insulin", - "BMI", - "DiabetesPedigreeFunction", - "Age" - ] - } - }, - "base_model": { - "different": false, - "base_model1": { - "model": "XGBoostModel", - "model_type": "Booster", - "params": { - "device": "cpu", - "fail_on_invalid_gpu_id": 0, - "n_jobs": 0, - "nthread": 0, - "random_state": 0, - "seed": 0, - "seed_per_iteration": 0, - "validate_parameters": 1, - "process_type": "default", - "tree_method": "auto", - "updater": "grow_quantile_histmaker", - "updater_seq": "grow_quantile_histmaker", - "alpha": 0, - "cache_opt": 1, - "colsample_bylevel": 1, - "colsample_bynode": 1, - "colsample_bytree": 0.824717641, - "eta": 0.0710294247, - "gamma": 0.302559406, - "grow_policy": "depthwise", - "interaction_constraints": "", - "lambda": 1, - "learning_rate": 0.0710294247, - "max_bin": 256, - "max_cat_threshold": 64, - "max_cat_to_onehot": 4, - "max_delta_step": 0, - "max_depth": 9, - "max_leaves": 0, - "min_child_weight": 1, - "min_split_loss": 0.302559406, - "monotone_constraints": "()", - "refresh_leaf": 1, - "reg_alpha": 0, - "reg_lambda": 1, - "sampling_method": "uniform", - "sketch_ratio": 2, - "sparse_threshold": 0.2, - "subsample": 0.817121327, - "debug_synchronize": 0, - "max_cached_hist_node": 65536, - "booster": "gbtree", - "disable_default_eval_metric": 0, - "multi_strategy": "one_output_per_tree", - "objective": "reg:squarederror", - "scale_pos_weight": 1, - "base_score": 0.3500931, - "boost_from_average": 1, - "num_class": 0, - "num_feature": 8, - "num_target": 1, - "eval_metric": [ - "auc" - ], - "num_boost_rounds": 30 - }, - "data_preparation_strategy": "ToDmatrixStrategy", - "pickled_model": true, - "file_path": "./models/diabetes_xgb_model.pkl" - }, - "base_model2": { - "model": "XGBoostModel", - "model_type": "Booster", - "params": { - "device": "cpu", - "fail_on_invalid_gpu_id": 0, - "n_jobs": 0, - "nthread": 0, - "random_state": 0, - "seed": 0, - "seed_per_iteration": 0, - "validate_parameters": 1, - "process_type": "default", - "tree_method": "auto", - "updater": "grow_quantile_histmaker", - "updater_seq": "grow_quantile_histmaker", - "alpha": 0, - "cache_opt": 1, - "colsample_bylevel": 1, - "colsample_bynode": 1, - "colsample_bytree": 0.824717641, - "eta": 0.0710294247, - "gamma": 0.302559406, - "grow_policy": "depthwise", - "interaction_constraints": "", - "lambda": 1, - "learning_rate": 0.0710294247, - "max_bin": 256, - "max_cat_threshold": 64, - "max_cat_to_onehot": 4, - "max_delta_step": 0, - "max_depth": 9, - "max_leaves": 0, - "min_child_weight": 1, - "min_split_loss": 0.302559406, - "monotone_constraints": "()", - "refresh_leaf": 1, - "reg_alpha": 0, - "reg_lambda": 1, - "sampling_method": "uniform", - "sketch_ratio": 2, - "sparse_threshold": 0.2, - "subsample": 0.817121327, - "debug_synchronize": 0, - "max_cached_hist_node": 65536, - "booster": "gbtree", - "disable_default_eval_metric": 0, - "multi_strategy": "one_output_per_tree", - "objective": "reg:squarederror", - "scale_pos_weight": 1, - "base_score": 0.3500931, - "boost_from_average": 1, - "num_class": 0, - "num_feature": 8, - "num_target": 1, - "eval_metric": [ - "auc" - ], - "num_boost_rounds": 30 - }, - "data_preparation_strategy": "ToDmatrixStrategy", - "pickled_model": true, - "file_path": "./models/diabetes_xgb_model.pkl" - } - }, - "detectron_params": { - "different": false, - "detectron_params1": { - "additional_training_params": null, - "samples_size": 20, - "cdcs_ensemble_size": 10, - "num_runs": 100, - "patience": 3, - "allow_margin": false, - "margin": 0.05, - "test_strategies": [ - "enhanced_disagreement_strategy", - "mannwhitney_strategy", - "original_disagreement_strategy" - ] - }, - "detectron_params2": { - "additional_training_params": null, - "samples_size": 20, - "cdcs_ensemble_size": 10, - "num_runs": 100, - "patience": 3, - "allow_margin": false, - "margin": 0.05, - "test_strategies": [ - "enhanced_disagreement_strategy", - "mannwhitney_strategy", - "original_disagreement_strategy" - ] - } - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_comparaison/model_evaluation_comparaison.json b/tutorials/detectron_experiment_comparaison/model_evaluation_comparaison.json deleted file mode 100644 index ae8aae1..0000000 --- a/tutorials/detectron_experiment_comparaison/model_evaluation_comparaison.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "Auc": { - "detectron_results1": 0.8212806626314112, - "detectron_results2": 0.7817776361898694, - "best": "detectron_results1" - }, - "Accuracy": { - "detectron_results1": 0.7758620689655172, - "detectron_results2": 0.7068965517241379, - "best": "detectron_results1" - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_comparaison/rejection_counts_comparaison.json b/tutorials/detectron_experiment_comparaison/rejection_counts_comparaison.json deleted file mode 100644 index b45d476..0000000 --- a/tutorials/detectron_experiment_comparaison/rejection_counts_comparaison.json +++ /dev/null @@ -1,414 +0,0 @@ -{ - "rejection_counts1": { - "reference": [ - 3, - 3, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 11, - 11, - 12, - 13, - 13, - 14, - 14 - ], - "test": [ - 4, - 4, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 11, - 11, - 11, - 11, - 13, - 13, - 14 - ] - }, - "rejection_counts2": { - "reference": [ - 3, - 3, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 11, - 11, - 12, - 13, - 13, - 14, - 14 - ], - "test": [ - 5, - 5, - 5, - 5, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 13, - 13, - 13, - 13, - 14, - 15 - ] - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_results/detectron_results.json b/tutorials/detectron_experiment_results/detectron_results.json deleted file mode 100644 index f786c87..0000000 --- a/tutorials/detectron_experiment_results/detectron_results.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "shift_probability": 0.4421052631578947, - "test_statistic": 7.589473684210526, - "baseline_mean": 7.231578947368421, - "baseline_std": 1.4396829444740318, - "significance_description": { - "unsignificant shift": 55.78947368421052, - "small": 12.631578947368421, - "moderate": 27.368421052631582, - "large": 4.2105263157894735 - }, - "Strategy": "enhanced_disagreement_strategy" - }, - { - "p_value": 0.09145056471994106, - "u_statistic": 4018.5, - "significance_description": { - "unsignificant shift": 55.78947368421052, - "small": 12.631578947368421, - "moderate": 27.368421052631582, - "large": 4.2105263157894735 - }, - "Strategy": "mannwhitney_strategy" - }, - { - "p_value": 0.04, - "test_statistic": 7, - "baseline_mean": 12.47, - "baseline_std": 1.920702996301094, - "Strategy": "original_disagreement_strategy" - } -] \ No newline at end of file diff --git a/tutorials/detectron_experiment_results/experiment_config.json b/tutorials/detectron_experiment_results/experiment_config.json deleted file mode 100644 index ba9b847..0000000 --- a/tutorials/detectron_experiment_results/experiment_config.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "experiment_name": "DetectronExperiment", - "datasets": { - "training_set": { - "file_path": "./data/train_data.csv", - "num_samples": 537, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "validation_set": { - "file_path": "./data/val_data.csv", - "num_samples": 115, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "reference_set": { - "file_path": "./data/test_data.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "testing_set": { - "file_path": "./data/test_data_shifted_0.1.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "column_labels": [ - "Pregnancies", - "Glucose", - "BloodPressure", - "SkinThickness", - "Insulin", - "BMI", - "DiabetesPedigreeFunction", - "Age" - ] - }, - "base_model": { - "model": "XGBoostModel", - "model_type": "Booster", - "params": { - "device": "cpu", - "fail_on_invalid_gpu_id": 0, - "n_jobs": 0, - "nthread": 0, - "random_state": 0, - "seed": 0, - "seed_per_iteration": 0, - "validate_parameters": 1, - "process_type": "default", - "tree_method": "auto", - "updater": "grow_quantile_histmaker", - "updater_seq": "grow_quantile_histmaker", - "alpha": 0, - "cache_opt": 1, - "colsample_bylevel": 1, - "colsample_bynode": 1, - "colsample_bytree": 0.824717641, - "eta": 0.0710294247, - "gamma": 0.302559406, - "grow_policy": "depthwise", - "interaction_constraints": "", - "lambda": 1, - "learning_rate": 0.0710294247, - "max_bin": 256, - "max_cat_threshold": 64, - "max_cat_to_onehot": 4, - "max_delta_step": 0, - "max_depth": 9, - "max_leaves": 0, - "min_child_weight": 1, - "min_split_loss": 0.302559406, - "monotone_constraints": "()", - "refresh_leaf": 1, - "reg_alpha": 0, - "reg_lambda": 1, - "sampling_method": "uniform", - "sketch_ratio": 2, - "sparse_threshold": 0.2, - "subsample": 0.817121327, - "debug_synchronize": 0, - "max_cached_hist_node": 65536, - "booster": "gbtree", - "disable_default_eval_metric": 0, - "multi_strategy": "one_output_per_tree", - "objective": "reg:squarederror", - "scale_pos_weight": 1, - "base_score": 0.3500931, - "boost_from_average": 1, - "num_class": 0, - "num_feature": 8, - "num_target": 1, - "eval_metric": [ - "auc" - ], - "num_boost_rounds": 30 - }, - "data_preparation_strategy": "ToDmatrixStrategy", - "pickled_model": true, - "file_path": "./models/diabetes_xgb_model.pkl" - }, - "detectron_params": { - "additional_training_params": null, - "samples_size": 20, - "cdcs_ensemble_size": 10, - "num_runs": 100, - "patience": 3, - "allow_margin": false, - "margin": 0.05, - "test_strategies": [ - "enhanced_disagreement_strategy", - "mannwhitney_strategy", - "original_disagreement_strategy" - ] - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_results/model_evaluation.json b/tutorials/detectron_experiment_results/model_evaluation.json deleted file mode 100644 index 5444413..0000000 --- a/tutorials/detectron_experiment_results/model_evaluation.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "reference": { - "Auc": 0.8214399490283529, - "Accuracy": 0.7844827586206896 - }, - "test": { - "Auc": 0.8212806626314112, - "Accuracy": 0.7758620689655172 - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_results/rejection_counts.json b/tutorials/detectron_experiment_results/rejection_counts.json deleted file mode 100644 index b1b260c..0000000 --- a/tutorials/detectron_experiment_results/rejection_counts.json +++ /dev/null @@ -1,206 +0,0 @@ -{ - "reference": [ - 3, - 3, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 11, - 11, - 12, - 13, - 13, - 14, - 14 - ], - "test": [ - 4, - 4, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 11, - 11, - 11, - 11, - 13, - 13, - 14 - ] -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_results2/detectron_results.json b/tutorials/detectron_experiment_results2/detectron_results.json deleted file mode 100644 index bf76eda..0000000 --- a/tutorials/detectron_experiment_results2/detectron_results.json +++ /dev/null @@ -1,33 +0,0 @@ -[ - { - "shift_probability": 0.8421052631578947, - "test_statistic": 9.83157894736842, - "baseline_mean": 7.231578947368421, - "baseline_std": 1.4396829444740318, - "significance_description": { - "unsignificant shift": 15.789473684210526, - "small": 8.421052631578947, - "moderate": 36.84210526315789, - "large": 38.94736842105263 - }, - "Strategy": "enhanced_disagreement_strategy" - }, - { - "p_value": 1.95388272998725e-18, - "u_statistic": 1259.5, - "significance_description": { - "unsignificant shift": 15.789473684210526, - "small": 8.421052631578947, - "moderate": 36.84210526315789, - "large": 38.94736842105263 - }, - "Strategy": "mannwhitney_strategy" - }, - { - "p_value": 0.05, - "test_statistic": 8, - "baseline_mean": 12.47, - "baseline_std": 1.920702996301094, - "Strategy": "original_disagreement_strategy" - } -] \ No newline at end of file diff --git a/tutorials/detectron_experiment_results2/experiment_config.json b/tutorials/detectron_experiment_results2/experiment_config.json deleted file mode 100644 index c37e00f..0000000 --- a/tutorials/detectron_experiment_results2/experiment_config.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "experiment_name": "DetectronExperiment", - "datasets": { - "training_set": { - "file_path": "./data/train_data.csv", - "num_samples": 537, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "validation_set": { - "file_path": "./data/val_data.csv", - "num_samples": 115, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "reference_set": { - "file_path": "./data/test_data.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "testing_set": { - "file_path": "./data/test_data_shifted_1.6.csv", - "num_samples": 116, - "num_observations": 8, - "has_pseudo_labels": false, - "has_pseudo_probabilities": false, - "has_confidence_scores": false - }, - "column_labels": [ - "Pregnancies", - "Glucose", - "BloodPressure", - "SkinThickness", - "Insulin", - "BMI", - "DiabetesPedigreeFunction", - "Age" - ] - }, - "base_model": { - "model": "XGBoostModel", - "model_type": "Booster", - "params": { - "device": "cpu", - "fail_on_invalid_gpu_id": 0, - "n_jobs": 0, - "nthread": 0, - "random_state": 0, - "seed": 0, - "seed_per_iteration": 0, - "validate_parameters": 1, - "process_type": "default", - "tree_method": "auto", - "updater": "grow_quantile_histmaker", - "updater_seq": "grow_quantile_histmaker", - "alpha": 0, - "cache_opt": 1, - "colsample_bylevel": 1, - "colsample_bynode": 1, - "colsample_bytree": 0.824717641, - "eta": 0.0710294247, - "gamma": 0.302559406, - "grow_policy": "depthwise", - "interaction_constraints": "", - "lambda": 1, - "learning_rate": 0.0710294247, - "max_bin": 256, - "max_cat_threshold": 64, - "max_cat_to_onehot": 4, - "max_delta_step": 0, - "max_depth": 9, - "max_leaves": 0, - "min_child_weight": 1, - "min_split_loss": 0.302559406, - "monotone_constraints": "()", - "refresh_leaf": 1, - "reg_alpha": 0, - "reg_lambda": 1, - "sampling_method": "uniform", - "sketch_ratio": 2, - "sparse_threshold": 0.2, - "subsample": 0.817121327, - "debug_synchronize": 0, - "max_cached_hist_node": 65536, - "booster": "gbtree", - "disable_default_eval_metric": 0, - "multi_strategy": "one_output_per_tree", - "objective": "reg:squarederror", - "scale_pos_weight": 1, - "base_score": 0.3500931, - "boost_from_average": 1, - "num_class": 0, - "num_feature": 8, - "num_target": 1, - "eval_metric": [ - "auc" - ], - "num_boost_rounds": 30 - }, - "data_preparation_strategy": "ToDmatrixStrategy", - "pickled_model": true, - "file_path": "./models/diabetes_xgb_model.pkl" - }, - "detectron_params": { - "additional_training_params": null, - "samples_size": 20, - "cdcs_ensemble_size": 10, - "num_runs": 100, - "patience": 3, - "allow_margin": false, - "margin": 0.05, - "test_strategies": [ - "enhanced_disagreement_strategy", - "mannwhitney_strategy", - "original_disagreement_strategy" - ] - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_results2/model_evaluation.json b/tutorials/detectron_experiment_results2/model_evaluation.json deleted file mode 100644 index 5fce5dc..0000000 --- a/tutorials/detectron_experiment_results2/model_evaluation.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "reference": { - "Auc": 0.8214399490283529, - "Accuracy": 0.7844827586206896 - }, - "test": { - "Auc": 0.7817776361898694, - "Accuracy": 0.7068965517241379 - } -} \ No newline at end of file diff --git a/tutorials/detectron_experiment_results2/rejection_counts.json b/tutorials/detectron_experiment_results2/rejection_counts.json deleted file mode 100644 index 57afa5d..0000000 --- a/tutorials/detectron_experiment_results2/rejection_counts.json +++ /dev/null @@ -1,206 +0,0 @@ -{ - "reference": [ - 3, - 3, - 5, - 5, - 5, - 5, - 5, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 11, - 11, - 12, - 13, - 13, - 14, - 14 - ], - "test": [ - 5, - 5, - 5, - 5, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 9, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 10, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 11, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 12, - 13, - 13, - 13, - 13, - 14, - 15 - ] -} \ No newline at end of file