-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
5,484 additions
and
132 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import pandas as pd | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
import os | ||
|
||
|
||
if __name__ == "__main__": | ||
PATH_FIGURES = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/figures_ucsf" | ||
PATH_READ = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized_10s_window_length/480" | ||
df_all = pd.read_csv(os.path.join(PATH_READ, "all_merged_normed.csv"), index_col=0) | ||
df_all["pkg_dt"] = pd.to_datetime(df_all["pkg_dt"]) | ||
subs = df_all["sub"].unique() | ||
|
||
def running_counter(values): | ||
counter = 0 | ||
result = [] | ||
for value in values: | ||
if value == 0: | ||
counter = 0 # Reset counter on zero | ||
else: | ||
counter += 1 | ||
result.append(counter) | ||
return result | ||
|
||
df_all["time_diff"] = df_all["pkg_dt"].diff().dt.total_seconds() / 60 | ||
df_all["counter"] = 0 | ||
df_all.loc[df_all["time_diff"] == 2, "counter"] = 1 | ||
|
||
df_all["cum_sum"] = running_counter(df_all["counter"].values) | ||
# fileter from df_all only the columns that start with pkg, cum_sum or that contain fft | ||
df_all_filter = df_all[[c for c in df_all.columns if c.startswith("pkg") or c.startswith("cum_sum") or "fft" in c or "sub" == c]] | ||
|
||
for consec_duration in [2, 5, 10, 20, 50, 100, 200, 500]: | ||
idx_larger_ = np.where(df_all["cum_sum"] >= consec_duration) | ||
# for each index larger, extract the previous conec_duration dataframe and append it to a list | ||
l_df = [] | ||
for idx in idx_larger_[0]: | ||
df_ = df_all_filter.iloc[idx-consec_duration:idx] | ||
# for each row, add the rows as columns with "lag_" prefix | ||
l_row = [] | ||
for i in range(consec_duration): | ||
if i > 0: | ||
df_ = df_.rename(columns={c: f"lag_{i}_{c}" for c in df_.columns}) | ||
l_row.append(df_.iloc[i]) | ||
else: | ||
l_row.append(df_.iloc[i]) | ||
l_df.append(pd.concat(l_row, axis=0)) | ||
df_duration = pd.DataFrame(l_df) | ||
print(f"Duration: {consec_duration}") | ||
print(df_duration.shape) | ||
df_duration.to_csv(os.path.join(PATH_READ, f"all_merged_normed_merge_{consec_duration}_consec.csv")) | ||
|
||
|
||
|
||
PLT_ = False | ||
if PLT_: | ||
# set a column cum_sum to count counter iteratively, but reset when counter is 0 | ||
plt.hist(df_all["cum_sum"]*2, bins=50, density=False) | ||
plt.xlabel("Minutes") | ||
plt.title("Histogram of time series lengths") | ||
plt.savefig(os.path.join(PATH_FIGURES, "histogram_time_series_lengths.pdf")) | ||
plt.show(block=True) | ||
|
||
# make the upper plot as a cdf | ||
plt.hist(df_all["cum_sum"]*2, bins=50, density=True, cumulative=True, histtype='step') | ||
# flip the y axis | ||
# make the y axis percentage | ||
plt.xlabel("Minutes") | ||
# make x axis log | ||
plt.xlim(0, 610) | ||
# put for each tick a label | ||
plt.title("CDF of time series lengths") | ||
#plt.savefig(os.path.join(PATH_FIGURES, "cdf_time_series_lengths.pdf")) | ||
plt.show(block=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import pickle | ||
import pandas as pd | ||
import numpy as np | ||
from py_neuromodulation import nm_RMAP, nm_stats | ||
from matplotlib import pyplot as plt | ||
import os | ||
import nibabel as nib | ||
import seaborn as sns | ||
|
||
PATH_CONNECTIVITY = r"/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per/out_connectivity/out_conn" | ||
PATH_PER = r'/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per/out_dir/df_per_ind_all_coords.csv' | ||
|
||
PLT_ = False | ||
|
||
df_per = pd.read_csv(PATH_PER, index_col=0) | ||
l_ch_sel = [] | ||
|
||
for class_idx, CLASSIFICATION in enumerate([True, False]): | ||
plt.figure(figsize=(10, 10)) | ||
plt_idx = 0 | ||
for label_idx, label_name in enumerate(["pkg_dk", "pkg_bk", "pkg_tremor"]): | ||
for loc_idx, loc in enumerate(["ECOG", "STN", "GP"]): | ||
df_disease = df_per.query(f"classification == {CLASSIFICATION} and label == '{label_name}' and loc == '{loc}'") | ||
|
||
subs = np.unique([sub[:-1] for sub in df_disease["sub"].unique()]) | ||
df_disease["sub_ind"] = df_disease["sub"].apply(lambda x: x[:-1]) | ||
df_disease["hemisphere"] = df_disease["sub"].apply(lambda x: x[-1]) | ||
per_test_ = [] | ||
pred_corr_test = [] | ||
|
||
|
||
for sub_test in df_disease["sub_ind"].unique(): | ||
l_fp_train = [] | ||
per_train = [] | ||
l_fp_test = [] | ||
per_test = [] | ||
chs_test = [] | ||
hemispheres_test = [] | ||
|
||
affine = None | ||
|
||
for idx, row in df_disease.iterrows(): | ||
f_name = f'{row["sub"]}_ROI_{row["ch_orig"]}_conn-PPMI74P15CxPatients_desc-AvgRFz_funcmap.nii' | ||
f_path = os.path.join(PATH_CONNECTIVITY, f_name) | ||
rmapsel = nm_RMAP.RMAPCross_Val_ChannelSelector() | ||
fp = rmapsel.load_fingerprint(f_path) | ||
if affine is None: | ||
affine = rmapsel.affine | ||
|
||
if row["sub_ind"] == sub_test: | ||
l_fp_test.append(fp) | ||
per_test.append(row["per"]) | ||
chs_test.append(row["ch_orig"]) | ||
hemispheres_test.append(row["hemisphere"]) | ||
else: | ||
l_fp_train.append(fp) | ||
per_train.append(row["per"]) | ||
|
||
rmap = nm_RMAP.RMAPCross_Val_ChannelSelector().get_RMAP(np.array(l_fp_train).T, np.array(per_train)) | ||
|
||
#nm_RMAP.RMAPCross_Val_ChannelSelector().save_Nii(rmap, affine, os.path.join(PATH_CONNECTIVITY, f"rmap_func_{label_name}_class_{CLASSIFICATION}_loc_{loc}.nii")) | ||
corr_test = [np.corrcoef( | ||
np.nan_to_num(rmap.flatten()), | ||
np.nan_to_num(fp.flatten()) | ||
)[0][1] for fp in l_fp_test] | ||
per_test_.append(per_test) | ||
pred_corr_test.append(corr_test) | ||
|
||
df_test_pr = pd.DataFrame({"per": per_test, "corr": corr_test, "hemisphere": hemispheres_test, "ch": chs_test}) | ||
|
||
for hem in df_test_pr["hemisphere"].unique(): | ||
df_test_sub = df_test_pr.query(f"hemisphere == '{hem}'") | ||
ch_sel = df_test_sub["ch"].values[np.argmax(df_test_sub["corr"].values)] | ||
l_ch_sel.append({ | ||
"label": label_name, | ||
"CLASSIFICATION": CLASSIFICATION, | ||
"loc": loc, | ||
"sub": sub_test, | ||
"ch": ch_sel, | ||
"hemisphere": hem, | ||
"corr": np.max(df_test_sub["corr"].values), | ||
"per": df_test_sub.query(f"ch == '{ch_sel}'")["per"].values[0] | ||
}) | ||
#chs_test[np.argmax(corr_test)] | ||
|
||
plt_idx += 1 | ||
if PLT_: | ||
plt.subplot(3, 3, plt_idx) | ||
df_plt = pd.DataFrame({"per": np.concatenate(per_test_), "corr": np.concatenate(pred_corr_test)}) | ||
sns.regplot(data=df_plt, x="corr", y="per") | ||
corr___ = np.corrcoef(np.concatenate(per_test_), np.concatenate(pred_corr_test))[0, 1] | ||
print(corr___) | ||
_, p = nm_stats.permutationTestSpearmansRho(np.concatenate(per_test_), np.concatenate(pred_corr_test), False, None, 1000) | ||
plt.title(f"{label_name} {loc}\n" + | ||
f"cor = {np.round(corr___, 2)} p={np.round(p, 3)}" | ||
) | ||
if PLT_: | ||
plt.suptitle(f"CLASS: {CLASSIFICATION}") | ||
plt.tight_layout() | ||
plt.show(block=True) | ||
print("ha") | ||
df_ch_sel = pd.DataFrame(l_ch_sel) | ||
df_ch_sel.to_csv(os.path.join(PATH_CONNECTIVITY, "df_ch_sel_RMAP.csv")) | ||
|
||
# df_ch_sel.query("CLASSIFICATION == True and loc == 'STN' and label == 'pkg_dk'").sort_values("sub") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import os | ||
|
||
PATH_RMAP_CH = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per/out_connectivity/out_conn/df_ch_sel_RMAP.csv' | ||
PATH_DATA = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std_10s_window_length_all_ch' | ||
PATH_OUT = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_rmap' | ||
|
||
cortex_ch_names = ['8-9', '8-10', '10-11', '9-11', '8-11', '9-10'] | ||
subcortex_ch_names = ['0-2', '0-1', '1-2', '1-3', '2-3', '0-3'] | ||
|
||
def get_most_recorded_chs(df_all): | ||
# get two most recorded cortex channels | ||
chs_available = [f[:f.find("_raw_")] for f in df_all.columns if "raw_" in f and "mean" in f] | ||
size_ = [] | ||
for ch in cortex_ch_names: | ||
if ch in chs_available: | ||
dat_ = df_all[f"{ch}_raw_mean"] | ||
# count the number of non-NaN values | ||
size_.append(dat_.dropna().shape[0]) | ||
else: | ||
size_.append(0) | ||
# get the names of the two most recorded channels | ||
ch_cortex_sel = [cortex_ch_names[i] for i in np.argsort(size_)[::-1][:2]] | ||
ch_cortex_sel = sorted(ch_cortex_sel, key=lambda x: int(x.split('-')[0])) | ||
|
||
size_ = [] | ||
for ch in subcortex_ch_names: | ||
if ch in chs_available: | ||
dat_ = df_all[f"{ch}_raw_mean"] | ||
# count the number of non-NaN values | ||
size_.append(dat_.dropna().shape[0]) | ||
else: | ||
size_.append(0) | ||
# get the names of the two most recorded channels | ||
ch_subcortex_sel = [subcortex_ch_names[i] for i in np.argsort(size_)[::-1][:2]] | ||
ch_subcortex_sel = sorted(ch_subcortex_sel, key=lambda x: int(x.split('-')[0])) | ||
return ch_cortex_sel, ch_subcortex_sel | ||
|
||
df_rmap_ch = pd.read_csv(PATH_RMAP_CH) | ||
|
||
# compute the performance 1. renaming the columns | ||
|
||
SELECT_ALL = False | ||
subs = df_rmap_ch["sub"].unique() | ||
|
||
if SELECT_ALL: | ||
|
||
dfs_save = [] | ||
cols = None | ||
for sub in subs: | ||
print(sub) | ||
for hem in ["l", "r"]: | ||
if not os.path.exists(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv")): | ||
continue | ||
df_sub = pd.read_csv(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv")) | ||
ch_cortex, ch_subcortex = get_most_recorded_chs(df_sub) | ||
ch_used = ch_subcortex + ch_cortex | ||
# select all columns of df_sub that contain the ch_used | ||
col_used = [c for c in df_sub.columns if any([ch in c for ch in ch_used]) and "coh" not in c] + list(df_sub.columns[-4:]) | ||
df_sub_used = df_sub[col_used] | ||
df_sub_orig = df_sub_used.copy() | ||
ch_names_replace = ["ch_subcortex_1", "ch_subcortex_2", "ch_cortex_1", "ch_cortex_2"] | ||
|
||
# for each column replace the ch_used with the ch_names_replace | ||
for ch, ch_name in zip(ch_used, ch_names_replace): | ||
df_sub_used.columns = [c.replace(ch+"_", ch_name+"_") for c in df_sub_used.columns] | ||
if cols is None: | ||
cols = df_sub_used.columns | ||
else: | ||
if not np.all(cols == df_sub_used.columns): | ||
df_sub_used = df_sub_used[cols] | ||
|
||
df_sub_used["sub"] = sub+hem | ||
dfs_save.append(df_sub_used) | ||
print(df_sub_used.columns.shape) | ||
if df_sub_used.columns.shape[0] != 2725: | ||
print("break here") | ||
# df_sub_used can be saved | ||
|
||
df_all = pd.concat(dfs_save) | ||
df_all.to_csv(os.path.join(PATH_OUT, "all_ch_renamed_no_rmap.csv")) | ||
|
||
|
||
for CLASSIFICATION in [True, False]: | ||
print(CLASSIFICATION) | ||
for label in ["pkg_dk", "pkg_bk", "pkg_tremor"]: | ||
print(label) | ||
df_l = [] | ||
cols = None | ||
for sub in subs: | ||
print(sub) | ||
for hem in ["l", "r"]: | ||
if not os.path.exists(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv")): | ||
continue | ||
df_sub = pd.read_csv(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv")) | ||
|
||
ch_sel = list(df_rmap_ch.query(f"sub == '{sub}' and hemisphere == '{hem}' and label == '{label}' and CLASSIFICATION == {CLASSIFICATION}")["ch"]) | ||
# sort the channels | ||
ch_sel = sorted(ch_sel, key=lambda x: int(x.split('-')[0])) | ||
# select only columns that start with one ch in ch_sel | ||
col_used = [c for c in df_sub.columns if any([ch in c for ch in ch_sel]) and "coh" not in c] + list(df_sub.columns[-4:]) | ||
df_sub_sel = df_sub[col_used] | ||
ch_replace = ["ch_subcortex_1", "ch_cortex_1"] | ||
for ch, ch_name in zip(ch_sel, ch_replace): | ||
df_sub_sel.columns = [c.replace(ch+"_", ch_name+"_") for c in df_sub_sel.columns] | ||
if cols is None: | ||
cols = df_sub_sel.columns | ||
else: | ||
if len(cols) != len(df_sub_sel.columns): | ||
continue | ||
if not np.all(cols == df_sub_sel.columns): | ||
try: | ||
df_sub_sel = df_sub_sel[cols] | ||
except: | ||
continue | ||
df_sub_sel["sub"] = sub+hem | ||
df_l.append(df_sub_sel) | ||
df_rmap_cond = pd.concat(df_l) | ||
# remove entries where the label is NaN | ||
df_rmap_cond = df_rmap_cond[~df_rmap_cond[label].isna()] | ||
df_rmap_cond.to_csv(os.path.join(PATH_OUT, f"rmap_ch_{label}_class_{CLASSIFICATION}.csv")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.