diff --git a/figure_33_joint_plot.py b/figure_33_joint_plot.py index 00ae6a80..ec3db4cd 100644 --- a/figure_33_joint_plot.py +++ b/figure_33_joint_plot.py @@ -131,6 +131,7 @@ def plot_boxplot(df, x_label, y_label="Balanced accuracy", order_ = None): plt.xlabel(x_label) plt.ylabel(y_label) + plt.xticks(rotation=90) plt.tight_layout() #plt.show(block=True) @@ -200,6 +201,8 @@ def plot_per_train_time_relation(df, label): for norm_window in [0, 5, 10, 20, 30, 60, 120, 180, 300, 480, 720, 960, 1200, 1440]: OUT_FILE = f"d_out_patient_across_{label_name}_class_{class_}_{norm_window}.pkl" PATH_READ = os.path.join(PATH_PER, OUT_FILE) + if not os.path.exists(PATH_READ): + continue df = read_per_out(PATH_READ) #df = df.query("loc == 'ecog_stn'") diff --git a/post_hoc_normalize.py b/post_hoc_normalize.py index d3d4186d..7ff15828 100644 --- a/post_hoc_normalize.py +++ b/post_hoc_normalize.py @@ -12,8 +12,10 @@ import time PATH_IN = "/Users/Timon/Documents/UCSF_Analysis/out/merged_std" -PATH_OUT_BASE = "/Users/Timon/Documents/UCSF_Analysis/out/merged_normalized" +PATH_IN = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std' +PATH_OUT_BASE = "/Users/Timon/Documents/UCSF_Analysis/out/merged_normalized" +PATH_OUT_BASE = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized' if __name__ == "__main__": df_all = pd.read_csv(os.path.join(PATH_IN, "all_merged_preprocessed.csv"), index_col=0) @@ -36,15 +38,21 @@ def process_sub(sub): else: time_before = df_sub.loc[idx, "pkg_dt"] - pd.Timedelta(minutes=normalization_window) time_now = df_sub.loc[idx, "pkg_dt"] - df_range = df_sub.query("pkg_dt >= @time_before and pkg_dt <@time_now") - if df_range.shape[0] < 2: - continue + + if normalization_window == 0: + df_range = df_sub.query("pkg_dt == @time_now") + else: + df_range = df_sub.query("pkg_dt >= @time_before and pkg_dt <@time_now") + if df_range.shape[0] < 2: + continue cols_use = [f for f in df_range.columns if "pkg_dt" not in f and f != "sub"] mean_ = df_range[cols_use].mean() - std_ = df_range[cols_use].std() - - row_add = (df_sub.loc[idx, cols_use] - mean_) / std_ + if normalization_window != 0: + std_ = df_range[cols_use].std() + row_add = (df_sub.loc[idx, cols_use] - mean_) / std_ + else: + row_add = mean_ time_pkg_before = df_sub.loc[idx, "pkg_dt"] - pd.Timedelta(minutes=5) time_pkg_after = df_sub.loc[idx, "pkg_dt"] + pd.Timedelta(minutes=5) @@ -74,7 +82,7 @@ def process_sub(sub): #process_sub(subs[0]) # parallelize - for normalization_window in [5, 10, 20, 30, 60, 120][::-1]: + for normalization_window in [0]: # [5, 10, 20, 30, 60, 120][::-1] PATH_OUT = os.path.join(PATH_OUT_BASE, str(normalization_window)) if not os.path.exists(PATH_OUT): os.makedirs(PATH_OUT) diff --git a/run_decoding_ucsf_across_patients_diff_norm_windows.py b/run_decoding_ucsf_across_patients_diff_norm_windows.py index f9634c47..b2a15614 100644 --- a/run_decoding_ucsf_across_patients_diff_norm_windows.py +++ b/run_decoding_ucsf_across_patients_diff_norm_windows.py @@ -29,12 +29,12 @@ if os.path.exists(os.path.join(PATH_PER, f"d_out_patient_across_{label_name}_class_{CLASSIFICATION}_{str(norm_window)}.pkl")): continue - if norm_window == 0: - PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std" - df_all = pd.read_csv(os.path.join(PATH_OUT, "all_merged_preprocessed.csv"), index_col=0) - else: - PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized" - df_all = pd.read_csv(os.path.join(PATH_OUT, str(norm_window), "all_merged_normed.csv"), index_col=0) + # if norm_window == 0: + # PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std" + # df_all = pd.read_csv(os.path.join(PATH_OUT, "all_merged_preprocessed.csv"), index_col=0) + # else: + PATH_OUT = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized" + df_all = pd.read_csv(os.path.join(PATH_OUT, str(norm_window), "all_merged_normed.csv"), index_col=0) #df_all = df_all.drop(columns=["Unnamed: 0"]) subs = df_all["sub"].unique()