Skip to content

Commit

Permalink
adapt CV norm and feature analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
timonmerk committed Oct 23, 2024
1 parent fb5a74c commit 2f8a235
Show file tree
Hide file tree
Showing 3 changed files with 256 additions and 239 deletions.
61 changes: 38 additions & 23 deletions figure_25_correlated_pkg_all_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import yaml
import seaborn as sb
from py_neuromodulation import nm_stats
from scipy import stats

df_s = pd.read_csv("ClinicalScoresTable.csv")
Expand Down Expand Up @@ -44,6 +45,9 @@

UE = df_s[df_s["Study id"] == sub[:-1]][UE].iloc[0]
LE = df_s[df_s["Study id"] == sub[:-1]][LE].iloc[0]

# clip pkg_bk to 80
df_pkg["pkg_bk"] = np.clip(df_pkg["pkg_bk"], 0, 300)

d_out.append({
"sub": sub[:-1],
Expand All @@ -60,6 +64,7 @@
"pkg_dk_max": df_pkg["pkg_dk"].max(),
"pkg_dk_75": np.quantile(df_pkg["pkg_dk"].dropna(), 0.75),
"pkg_bk_mean": df_pkg["pkg_bk"].mean(),
"pkg_bk_median": df_pkg["pkg_bk"].median(),
"pkg_bk_max": df_pkg["pkg_bk"].max(),
"pkg_bk_75": np.quantile(df_pkg["pkg_bk"].dropna(), 0.75),
"UPDRS (Off)" : df_s[df_s["Study id"] == sub[:-1]]["UPDRS (Off)"].iloc[0],
Expand All @@ -70,35 +75,42 @@

df_out = pd.DataFrame(d_out)

plt.figure(figsize=(8, 20))
for idx_plt_col, col_plt in enumerate(["UE", "LE", "postural", "kinetic", "updrs_tremor", "tremor_constancy"]):
for idx_pkg, col_pkg in enumerate(["pkg_tremor_mean", "pkg_tremor_max", "pkg_tremor_75"]):
plt.subplot(6, 3, 1+idx_pkg + idx_plt_col*3)
sb.regplot(data=df_out, x=col_pkg, y=col_plt)
idx_not_none = ~df_out[col_pkg].isnull()
rho, p = stats.spearmanr(df_out[col_pkg][idx_not_none], df_out[col_plt][idx_not_none])
plt.title(f"rho={rho:.2f}, p={p:.2f}")
plt.suptitle("Tremor PKG - UPDRS correlations")
plt.tight_layout()
plt.savefig(os.path.join(PATH_FIGURES, "pkg_tremor_correlation.pdf"))
plt.show(block=True)

plt.figure(figsize=(8, 5))
for idx_plt_col, col_plt in enumerate(["UPDRS (Off)", "UPDRS (Off-On)"]):
for idx_pkg, col_pkg in enumerate(["pkg_bk_mean", "pkg_bk_max", "pkg_bk_75"]):
plt.subplot(2, 3, 1+idx_pkg + idx_plt_col*3)
for idx_pkg, col_pkg in enumerate(["pkg_bk_mean", "pkg_bk_median", "pkg_bk_max", "pkg_bk_75"]):
plt.subplot(2, 4, 1+idx_pkg + idx_plt_col*4)
# remove inf values
#idx_not_none = ~df_out[col_pkg].isnull()
idx_not_inf = np.isfinite(df_out[col_pkg])

sb.regplot(data=df_out[idx_not_inf], x=col_pkg, y=col_plt)
data_plt = df_out[idx_not_inf].groupby(["sub"])[[col_plt, col_pkg]].sum().reset_index()
sb.regplot(data=data_plt, x=col_pkg, y=col_plt)

#sb.regplot(data=df_out, x=col_pkg, y=col_plt)
rho, p = stats.spearmanr(df_out[col_pkg], df_out[col_plt])
rho, p = stats.spearmanr(data_plt[col_pkg], data_plt[col_plt])
_, p = nm_stats.permutationTestSpearmansRho(
data_plt[col_pkg], data_plt[col_plt], False, None, 5000
)
plt.title(f"rho={rho:.2f}, p={p:.3f}")
plt.suptitle("Bradykinesia PKG - UPDRS correlations")
plt.tight_layout()
plt.savefig(os.path.join(PATH_FIGURES, "pkg_bradykinesia_correlation.pdf"))
plt.savefig(os.path.join(PATH_FIGURES, "pkg_bradykinesia_correlation_sum_subj.pdf"))
plt.show(block=True)

plt.figure(figsize=(8, 20))
for idx_plt_col, col_plt in enumerate(["UE", "LE", "postural", "kinetic", "updrs_tremor", "tremor_constancy"]):
for idx_pkg, col_pkg in enumerate(["pkg_tremor_mean", "pkg_tremor_max", "pkg_tremor_75"]):
plt.subplot(6, 3, 1+idx_pkg + idx_plt_col*3)
idx_not_none = ~df_out[col_pkg].isnull()
data_plt = df_out[idx_not_none].groupby(["sub"])[[col_plt, col_pkg]].mean().reset_index()
sb.regplot(data=data_plt, x=col_pkg, y=col_plt)
rho, p = stats.spearmanr(data_plt[col_pkg], data_plt[col_plt])
_, p = nm_stats.permutationTestSpearmansRho(
data_plt[col_pkg], data_plt[col_plt], False, None, 5000
)
plt.title(f"rho={rho:.2f}, p={p:.2f}")
plt.suptitle("Tremor PKG - UPDRS correlations")
plt.tight_layout()
plt.savefig(os.path.join(PATH_FIGURES, "pkg_tremor_correlation_mean_sub.pdf"))
plt.show(block=True)

plt.figure(figsize=(8, 5))
Expand All @@ -108,15 +120,18 @@
# remove inf values
#idx_not_none = ~df_out[col_pkg].isnull()
idx_not_inf = np.isfinite(df_out[col_pkg])

sb.regplot(data=df_out[idx_not_inf], x=col_pkg, y=col_plt)
data_plt = df_out[idx_not_none].groupby(["sub"])[[col_plt, col_pkg]].mean().reset_index()
sb.regplot(data=data_plt[idx_not_inf], x=col_pkg, y=col_plt)

#sb.regplot(data=df_out, x=col_pkg, y=col_plt)
rho, p = stats.spearmanr(df_out[col_pkg], df_out[col_plt])
rho, p = stats.spearmanr(data_plt[col_pkg], data_plt[col_plt])
_, p = nm_stats.permutationTestSpearmansRho(
data_plt[col_pkg], data_plt[col_plt], False, None, 5000
)
plt.title(f"rho={rho:.2f}, p={p:.2f}")
plt.suptitle("Dyskinesia PKG - UPDRS correlations")
plt.tight_layout()
plt.savefig(os.path.join(PATH_FIGURES, "pkg_dyskinesia_correlation.pdf"))
plt.savefig(os.path.join(PATH_FIGURES, "pkg_dyskinesia_correlation_mean_sub.pdf"))
plt.show(block=True)


217 changes: 109 additions & 108 deletions run_decoding_ucsf_across_patients_diff_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,123 +11,124 @@
from matplotlib.backends.backend_pdf import PdfPages

PATH_OUT = "/Users/Timon/Documents/UCSF_Analysis/out/merged_normalized"
PATH_OUT = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized_10s_window_length/480'
PATH_PER = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per'

CLASSIFICATION = True
PLT_PDF = False

if __name__ == "__main__":

for feature_mod in ["fft", "welch", "bursts", "Hjorth", "Sharpwave", "fooof", "LineLength", "_raw_"]:

df_all = pd.read_csv(os.path.join(PATH_OUT, str(480), "all_merged_normed.csv"), index_col=0)
#df_all = df_all.drop(columns=["Unnamed: 0"])

df_all = df_all[[c for c in df_all.columns if "pkg_" in c or c == "sub" or feature_mod in c]]

subs = df_all["sub"].unique()
d_out = {}

label_names_class = ["pkg_dk_class", "pkg_bk_class", "pkg_tremor_class"]
label_names_reg = ["pkg_dk_normed", "pkg_bk_normed", "pkg_tremor_normed"]

#for label_idx, label_name in enumerate(label_names_class):
label_idx = 0
label_name = label_names_class[label_idx]
print(label_name)
d_out[label_name] = {}

df_all = df_all.drop(columns=df_all.columns[df_all.isnull().all()])
df_all["pkg_dt"] = pd.to_datetime(df_all["pkg_dt"])
mask = ~df_all[label_name].isnull()
df_all = df_all[mask]

#for loc_ in ["ecog_stn", "ecog", "stn",]:
loc_ = "ecog_stn"
d_out[label_name][loc_] = {}
if PLT_PDF:
pdf_pages = PdfPages(os.path.join("figures_ucsf", f"decoding_across_patients_class_{label_name}_{loc_}.pdf"))
if loc_ == "ecog_stn":
df_use = df_all.copy()
elif loc_ == "ecog":
df_use = df_all[[c for c in df_all.columns if c.startswith("ch_cortex") or c.startswith("pkg") or c.startswith("sub")]].copy()
elif loc_ == "stn":
df_use = df_all[[c for c in df_all.columns if c.startswith("ch_subcortex") or c.startswith("pkg") or c.startswith("sub")]].copy()

for sub_test in tqdm(subs):
df_test = df_use[df_use["sub"] == sub_test]

df_test = df_test.drop(columns=["sub"])
y_test = np.array(df_test[label_name])
df_train = df_use[df_use["sub"] != sub_test]
df_train = df_train.drop(columns=["sub"])
y_train = np.array(df_train[label_name])

X_train = df_train[[c for c in df_train.columns if "pkg" not in c]]
X_train["hour"] = df_train["pkg_dt"].dt.hour

X_test = df_test[[c for c in df_test.columns if "pkg" not in c]]
X_test["hour"] = df_test["pkg_dt"].dt.hour
for label_name in ["pkg_bk", "pkg_dk", "pkg_tremor"]:
if label_name == "pkg_bk":
CLASSIFICATION = False
else:
CLASSIFICATION = True
for feature_mod in ["fft", "welch", "bursts", "Hjorth", "Sharpwave", "fooof", "LineLength", "_raw_"]:

df_all = pd.read_csv(os.path.join(PATH_OUT, "all_merged_normed.csv"), index_col=0)
#df_all = df_all.drop(columns=["Unnamed: 0"])

df_all = df_all[[c for c in df_all.columns if "pkg_" in c or c == "sub" or feature_mod in c]]

subs = df_all["sub"].unique()
d_out = {}

#X_ = X.dropna(axis=1) # drop all columns that have NaN values
if CLASSIFICATION:
classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = dict(zip(classes, weights))
model = CatBoostClassifier(silent=True, class_weights=class_weights)
#model = linear_model.LogisticRegression()
else:
model = CatBoostRegressor(silent=True) # task_type="GPU"

model.fit(X_train, y_train)

pr = model.predict(X_test)

feature_importances = model.get_feature_importance(Pool(X_test, y_test), type="PredictionValuesChange")

d_out[label_name][loc_][sub_test] = {}
if CLASSIFICATION:
y_test = y_test.astype(int)
pr = pr.astype(int)
d_out[label_name][loc_][sub_test]["accuracy"] = metrics.accuracy_score(y_test, pr)
d_out[label_name][loc_][sub_test]["ba"] = metrics.balanced_accuracy_score(y_test, pr)
d_out[label_name][loc_][sub_test]["f1"] = metrics.f1_score(y_test, pr)
d_out[label_name][loc_][sub_test]["pr_proba"] = model.predict_proba(X_test)
d_out[label_name][loc_][sub_test]["true_reg_normed"] = df_use[df_use["sub"] == sub_test][label_names_reg[label_idx]]

else:
corr_coeff = np.corrcoef(pr, np.array(y_test))[0, 1]
d_out[label_name][loc_][sub_test]["corr_coeff"] = corr_coeff
d_out[label_name][loc_][sub_test]["r2"] = metrics.r2_score(y_test, pr)
d_out[label_name][loc_][sub_test]["mse"] = metrics.mean_squared_error(y_test, pr)
d_out[label_name][loc_][sub_test]["mae"] = metrics.mean_absolute_error(y_test, pr)
d_out[label_name][loc_][sub_test]["pr"] = pr
d_out[label_name][loc_][sub_test]["y_"] = y_test
d_out[label_name][loc_][sub_test]["time"] = df_test["pkg_dt"].values
d_out[label_name][loc_][sub_test]["feature_importances"] = feature_importances
d_out[label_name] = {}

df_all = df_all.drop(columns=df_all.columns[df_all.isnull().all()])
df_all["pkg_dt"] = pd.to_datetime(df_all["pkg_dt"])
mask = ~df_all[label_name].isnull()
df_all = df_all[mask]

#for loc_ in ["ecog_stn", "ecog", "stn",]:
loc_ = "ecog_stn"
d_out[label_name][loc_] = {}
if PLT_PDF:
plt.figure(figsize=(10, 4), dpi=200)
#plt.plot(y_test, label="true")
#plt.plot(pr, label="pr")
plt.plot(d_out[label_name][loc_][sub_test]["pr_proba"][:, 1], label="pr_proba")
plt.plot(d_out[label_name][loc_][sub_test]["true_reg_normed"].values, label="true")

plt.legend()
plt.ylabel(f"PKG score {label_name}")
plt.xlabel("Time [30s]")
pdf_pages = PdfPages(os.path.join("figures_ucsf", f"decoding_across_patients_class_{label_name}_{loc_}.pdf"))
if loc_ == "ecog_stn":
df_use = df_all.copy()
elif loc_ == "ecog":
df_use = df_all[[c for c in df_all.columns if c.startswith("ch_cortex") or c.startswith("pkg") or c.startswith("sub")]].copy()
elif loc_ == "stn":
df_use = df_all[[c for c in df_all.columns if c.startswith("ch_subcortex") or c.startswith("pkg") or c.startswith("sub")]].copy()

if "_dk" in label_name:
df_use[label_name] = (df_use[label_name].copy() / df_use[label_name].max()) > 0.02
elif "_tremor" in label_name:
df_use[label_name] = df_use[label_name].copy() > 1

for sub_test in tqdm(subs):
df_test = df_use[df_use["sub"] == sub_test]

df_test = df_test.drop(columns=["sub"])
y_test = np.array(df_test[label_name])
df_train = df_use[df_use["sub"] != sub_test]
df_train = df_train.drop(columns=["sub"])
y_train = np.array(df_train[label_name])

X_train = df_train[[c for c in df_train.columns if "pkg" not in c]]
X_train["hour"] = df_train["pkg_dt"].dt.hour

X_test = df_test[[c for c in df_test.columns if "pkg" not in c]]
X_test["hour"] = df_test["pkg_dt"].dt.hour

#X_ = X.dropna(axis=1) # drop all columns that have NaN values
if CLASSIFICATION:
plt.title(f"ba: {np.round(d_out[label_name][loc_][sub_test]['ba'], 2)} sub: {sub_test}")
classes = np.unique(y_train)
weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weights = dict(zip(classes, weights))
model = CatBoostClassifier(silent=True, class_weights=class_weights)
#model = linear_model.LogisticRegression()
else:
plt.title(f"corr_coeff: {np.round(corr_coeff, 2)} sub: {sub_test}")
pdf_pages.savefig(plt.gcf())
plt.close()
if PLT_PDF:
pdf_pages.close()
model = CatBoostRegressor(silent=True) # task_type="GPU"

# save d_out to a pickle file
if CLASSIFICATION:
SAVE_NAME = f"d_out_patient_across_class_feature_mod_{str(feature_mod)}_480_min.pkl"
else:
SAVE_NAME = "d_out_patient_across_reg.pkl"
with open(os.path.join("out_per", SAVE_NAME), "wb") as f:
pickle.dump(d_out, f)
model.fit(X_train, y_train)

pr = model.predict(X_test)

feature_importances = model.get_feature_importance(Pool(X_test, y_test), type="PredictionValuesChange")

d_out[label_name][loc_][sub_test] = {}
if CLASSIFICATION:
y_test = y_test.astype(int)
pr = pr.astype(int)
d_out[label_name][loc_][sub_test]["accuracy"] = metrics.accuracy_score(y_test, pr)
d_out[label_name][loc_][sub_test]["ba"] = metrics.balanced_accuracy_score(y_test, pr)
d_out[label_name][loc_][sub_test]["f1"] = metrics.f1_score(y_test, pr)
d_out[label_name][loc_][sub_test]["pr_proba"] = model.predict_proba(X_test)
else:
corr_coeff = np.corrcoef(pr, np.array(y_test))[0, 1]
d_out[label_name][loc_][sub_test]["corr_coeff"] = corr_coeff
d_out[label_name][loc_][sub_test]["r2"] = metrics.r2_score(y_test, pr)
d_out[label_name][loc_][sub_test]["mse"] = metrics.mean_squared_error(y_test, pr)
d_out[label_name][loc_][sub_test]["mae"] = metrics.mean_absolute_error(y_test, pr)
d_out[label_name][loc_][sub_test]["pr"] = pr
d_out[label_name][loc_][sub_test]["y_"] = y_test
d_out[label_name][loc_][sub_test]["time"] = df_test["pkg_dt"].values
d_out[label_name][loc_][sub_test]["feature_importances"] = feature_importances

if PLT_PDF:
plt.figure(figsize=(10, 4), dpi=200)
#plt.plot(y_test, label="true")
#plt.plot(pr, label="pr")
plt.plot(d_out[label_name][loc_][sub_test]["pr_proba"][:, 1], label="pr_proba")
plt.plot(d_out[label_name][loc_][sub_test]["true_reg_normed"].values, label="true")

plt.legend()
plt.ylabel(f"PKG score {label_name}")
plt.xlabel("Time [30s]")
if CLASSIFICATION:
plt.title(f"ba: {np.round(d_out[label_name][loc_][sub_test]['ba'], 2)} sub: {sub_test}")
else:
plt.title(f"corr_coeff: {np.round(corr_coeff, 2)} sub: {sub_test}")
pdf_pages.savefig(plt.gcf())
plt.close()
if PLT_PDF:
pdf_pages.close()

# save d_out to a pickle file
SAVE_NAME = f"d_out_patient_across_{label_name}_feature_mod_{str(feature_mod)}_480_min.pkl"

with open(os.path.join(PATH_PER, SAVE_NAME), "wb") as f:
pickle.dump(d_out, f)
Loading

0 comments on commit 2f8a235

Please sign in to comment.