Skip to content

Commit

Permalink
fixed some issues
Browse files Browse the repository at this point in the history
  • Loading branch information
lyna1404 committed Aug 15, 2024
1 parent bf1da2f commit cc25eac
Show file tree
Hide file tree
Showing 18 changed files with 94 additions and 1,528 deletions.
130 changes: 85 additions & 45 deletions MED3pa/med3pa/comparaison.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,42 @@ def __init__(self, results1_path: str, results2_path: str) -> None:
self.profiles_detectron_comparaison = {}
self.global_metrics_comparaison = {}
self.models_evaluation_comparaison = {}
self.shared_profiles = {} # New variable to store shared profiles
self.config_file = {}
self.compare_profiles = False
self.compare_profiles = True
self.compare_detectron = False
self.mode = ""
self._check_experiment_name()

def identify_shared_profiles(self):
"""
Identifies the shared profiles between the two experiments and stores them in shared_profiles.
"""
profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json')
profiles_file_2 = os.path.join(self.results2_path, 'test', 'profiles.json')

with open(profiles_file_1, 'r') as f1, open(profiles_file_2, 'r') as f2:
profiles1 = json.load(f1)
profiles2 = json.load(f2)

shared = {}

for samples_ratio, dr_dict in profiles1.items():
if samples_ratio in profiles2: # Only proceed if samples_ratio is in both profiles
if samples_ratio not in shared:
shared[samples_ratio] = {}
for dr, profiles in dr_dict.items():
if dr in profiles2[samples_ratio]: # Only proceed if dr is in both profiles
for profile in profiles:
profile_path = " / ".join(profile["path"])
# Check if the profile_path exists in both profiles1 and profiles2
matching_profile = next((p for p in profiles2[samples_ratio][dr] if p["path"] == profile["path"]), None)
if matching_profile:
if profile_path not in shared[samples_ratio]:
shared[samples_ratio][profile_path] = profile["path"]

self.shared_profiles = shared # Store shared profiles

def _check_experiment_name(self) -> None:
"""
Checks if the experiment_name in the config_file of both results paths is the same.
Expand Down Expand Up @@ -69,10 +99,25 @@ def is_comparable(self) -> bool:
base_model_different = self.config_file['base_model']['different']

if self.compare_detectron:
params_different = self.config_file['med3pa_detectron_params']['different']
# Extract med3pa_detectron_params for comparison, excluding apc_model and ipc_model
params1 = self.config_file['med3pa_detectron_params']['med3pa_detectron_params1'].copy()
params2 = self.config_file['med3pa_detectron_params']['med3pa_detectron_params2'].copy()
# Remove apc_model and ipc_model from comparison
params1['med3pa_params'].pop('apc_model', None)
params1['med3pa_params'].pop('ipc_model', None)
params2['med3pa_params'].pop('apc_model', None)
params2['med3pa_params'].pop('ipc_model', None)
else:
params_different = self.config_file['med3pa_params']['different']
# Extract med3pa_params for comparison, excluding apc_model and ipc_model
params1 = self.config_file['med3pa_params']['med3pa_params1'].copy()
params2 = self.config_file['med3pa_params']['med3pa_params2'].copy()
params1.pop('apc_model', None)
params1.pop('ipc_model', None)
params2.pop('apc_model', None)
params2.pop('ipc_model', None)


params_different = (params1 != params2)
# Check the conditions for comparability
can_compare = False
if datasets_different and not base_model_different and not params_different:
Expand Down Expand Up @@ -105,7 +150,8 @@ def _check_experiment_tree(self) -> None:

def compare_profiles_metrics(self):
"""
Compares profile metrics between two sets of results and stores them in a dictionary.
Compares profile metrics between two sets of results and stores them in a dictionary,
using only the shared profiles.
"""
combined = {}
profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json')
Expand All @@ -115,66 +161,61 @@ def compare_profiles_metrics(self):
profiles1 = json.load(f1)
profiles2 = json.load(f2)

for samples_ratio, dr_dict in profiles1.items():
if samples_ratio not in combined:
combined[samples_ratio] = {}
for dr, profiles in dr_dict.items():
for profile in profiles:
profile_path = " / ".join(profile["path"])
if profile_path not in combined[samples_ratio]:
combined[samples_ratio][profile_path] = {}
if dr not in combined[samples_ratio][profile_path]:
combined[samples_ratio][profile_path][dr] = {}
combined[samples_ratio][profile_path][dr]['metrics_1'] = profile["metrics"]

for samples_ratio, dr_dict in profiles2.items():
if samples_ratio not in combined:
combined[samples_ratio] = {}
for dr, profiles in dr_dict.items():
for profile in profiles:
profile_path = " / ".join(profile["path"])
for samples_ratio, profiles_dict in self.shared_profiles.items():
combined[samples_ratio] = {}
for profile_path_list in profiles_dict.values():
profile_path = " / ".join(profile_path_list) # Convert the list to a string

# Extract possible drs (decision rules) where profiles match in profiles1
drs = [dr for dr, profiles in profiles1[samples_ratio].items()]
for dr in drs:
# Attempt to find matching profiles in both profiles1 and profiles2
matching_profile_1 = next((p for p in profiles1[samples_ratio][dr] if " / ".join(p["path"]) == profile_path), None)
matching_profile_2 = next((p for p in profiles2[samples_ratio][dr] if " / ".join(p["path"]) == profile_path), None)

if profile_path not in combined[samples_ratio]:
combined[samples_ratio][profile_path] = {}
if dr not in combined[samples_ratio][profile_path]:
combined[samples_ratio][profile_path][dr] = {}
combined[samples_ratio][profile_path][dr]['metrics_2'] = profile["metrics"]

combined[samples_ratio][profile_path][dr] = {
'metrics_1': matching_profile_1["metrics"] if matching_profile_1 else None,
'metrics_2': matching_profile_2["metrics"] if matching_profile_2 else None
}

self.profiles_metrics_comparaison = combined



def compare_profiles_detectron_results(self):
"""
Compares Detectron results between two sets of profiles and stores them in a dictionary.
Compares Detectron results between two sets of profiles and stores them in a dictionary,
using only the shared profiles.
"""
combined = {}

profiles_file_1 = os.path.join(self.results1_path, 'test', 'profiles.json')
profiles_file_2 = os.path.join(self.results2_path, 'test', 'profiles.json')

with open(profiles_file_1, 'r') as f1, open(profiles_file_2, 'r') as f2:
profiles1 = json.load(f1)
profiles2 = json.load(f2)

# Determine the smallest positive samples_ratio
smallest_samples_ratio = min([int(k) for k in profiles1.keys() if int(k) >= 0])
smallest_samples_ratio = str(smallest_samples_ratio)
for samples_ratio, profiles_dict in self.shared_profiles.items():
combined[samples_ratio] = {}
for profile_path_list in profiles_dict.values():
profile_path = " / ".join(profile_path_list) # Convert the list to a string

for profiles, key in zip([profiles1, profiles2], ['detectron_results_1', 'detectron_results_2']):
if smallest_samples_ratio not in profiles:
continue
# Attempt to find matching profiles in both profiles1 and profiles2
matching_profile_1 = next((p for p in profiles1[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None)
matching_profile_2 = next((p for p in profiles2[samples_ratio]["100"] if " / ".join(p["path"]) == profile_path), None)

dr_dict = profiles[smallest_samples_ratio]
if profile_path not in combined[samples_ratio]:
combined[samples_ratio][profile_path] = {}

if "100" not in dr_dict:
continue

for profile in dr_dict["100"]:
profile_path = " / ".join(profile["path"])
if profile_path not in combined:
combined[profile_path] = {}

combined[profile_path][key] = profile["detectron_results"]
combined[samples_ratio][profile_path]['detectron_results_1'] = matching_profile_1["detectron_results"] if matching_profile_1 else None
combined[samples_ratio][profile_path]['detectron_results_2'] = matching_profile_2["detectron_results"] if matching_profile_2 else None

self.profiles_detectron_comparaison = combined


def compare_global_metrics(self):
"""
Compares global metrics between two sets of results and stores them in a dictionary.
Expand Down Expand Up @@ -288,9 +329,8 @@ def compare_experiments(self):
raise ValueError("The two experiments cannot be compared based on the provided criteria.")

self.compare_global_metrics()

self.identify_shared_profiles() # Identify shared profiles before comparisons
if self.mode in ['apc', 'mpc']:
self._check_experiment_tree()
if self.compare_profiles:
self.compare_profiles_metrics()
if self.compare_detectron:
Expand Down
4 changes: 2 additions & 2 deletions MED3pa/med3pa/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def _run_by_set(datasets_manager: DatasetsManager,
else:
APC_model = apc_instance
print("Used a trainde IPC instance.")

# Predict APC values
APC_values = APC_model.predict(x)
print("Aggregated confidence scores calculated.")
Expand Down Expand Up @@ -527,7 +527,7 @@ def _run_by_set(datasets_manager: DatasetsManager,
for samples_ratio in range(samples_ratio_min, samples_ratio_max + 1, samples_ratio_step):

# Calculate profiles and their metrics by declaration rate
MDRCalculator.calc_profiles(profiles_manager, tree, mpc_dataset, features, MPC_values, samples_ratio)
MDRCalculator.calc_profiles(profiles_manager, tree, mpc_dataset, features, MPC_values, samples_ratio)
MDRCalculator.calc_metrics_by_profiles(profiles_manager, mpc_dataset, features, MPC_values, samples_ratio, med3pa_metrics)
results.set_profiles_manager(profiles_manager)
print("Results extracted for minimum_samples_ratio = ", samples_ratio)
Expand Down
7 changes: 4 additions & 3 deletions MED3pa/med3pa/mdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def calc_profiles(profiles_manager: ProfilesManager, tree: TreeRepresentation, d
lost_profiles_all = [] # Saves lost profiles
last_min_confidence_level = -1 # Last min confidence level
min_confidence_levels_dict = {} # Saves the min_confidence_level thresholds

precision = 14
# Go through all declaration rates
for dr in range(100, -1, -1):

Expand All @@ -329,10 +329,11 @@ def calc_profiles(profiles_manager: ProfilesManager, tree: TreeRepresentation, d
# calculate the samples_ratio (pop%) and mean_confidence_level of this node, if the filtered data isnt empty
if len(filtered_confidence_scores) > 0:
samples_ratio = len(filtered_confidence_scores) / len(confidence_scores) * 100
mean_cconfidence = np.mean(filtered_confidence_scores) if filtered_confidence_scores.size > 0 else 0
mean_cconfidence = np.mean(filtered_confidence_scores)
# if the calculated samples_ratio and mean_confidence meet the conditions, keep this node
if samples_ratio >= min_samples_ratio and mean_cconfidence >= min_confidence_level:
if samples_ratio >= min_samples_ratio and round(mean_cconfidence, precision) >= round(min_confidence_level, precision):
profiles_current.append(node)



# If the last profiles are different from current profiles
Expand Down
4 changes: 2 additions & 2 deletions MED3pa/med3pa/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def print_decision_tree_structure(tree_model, feature_names=None):
"""
tree_rules = export_text(tree_model, feature_names=feature_names)
print(tree_rules)

def optimize(self, param_grid: dict, cv: int, x: np.ndarray, error_prob: np.ndarray, sample_weight: np.ndarray = None) -> None:
"""
Optimizes the model parameters using GridSearchCV.
Expand All @@ -303,7 +303,7 @@ def optimize(self, param_grid: dict, cv: int, x: np.ndarray, error_prob: np.ndar
self.params.update(grid_search.best_params_)
self.grid_search_params = param_grid
df_X, df_y, df_w = self.dataPreparationStrategy.execute(column_labels=self.features, observations=x, labels=error_prob)
self.treeRepresentation.build_tree(self.model, df_X, error_prob, node_id=0)
self.treeRepresentation.head = self.treeRepresentation.build_tree(self.model, df_X, error_prob, node_id=0)
self.optimized = True


Expand Down
2 changes: 1 addition & 1 deletion MED3pa/med3pa/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def build_tree(self, dtr: DecisionTreeRegressorModel, X: DataFrame, y: Series, n
node_thresh = dtr.model.tree_.threshold[node_id]
node_feature_id = dtr.model.tree_.feature[node_id]
node_feature = self.features[node_feature_id]

# Check if the split would result in an empty set, if so, stop the recursion
if y[X[node_feature] <= node_thresh].size == 0 or y[X[node_feature] > node_thresh].size == 0:
print("split would results in an empty data section")
Expand Down
26 changes: 0 additions & 26 deletions setup.py

This file was deleted.

This file was deleted.

Loading

0 comments on commit cc25eac

Please sign in to comment.