Skip to content

Commit

Permalink
add new scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
timonmerk committed Oct 15, 2024
1 parent 670a7dd commit aa78bc9
Show file tree
Hide file tree
Showing 18 changed files with 5,484 additions and 132 deletions.
74 changes: 74 additions & 0 deletions combine_data_Catboost_multiple_timepoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os


if __name__ == "__main__":
PATH_FIGURES = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/figures_ucsf"
PATH_READ = "/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_normalized_10s_window_length/480"
df_all = pd.read_csv(os.path.join(PATH_READ, "all_merged_normed.csv"), index_col=0)
df_all["pkg_dt"] = pd.to_datetime(df_all["pkg_dt"])
subs = df_all["sub"].unique()

def running_counter(values):
counter = 0
result = []
for value in values:
if value == 0:
counter = 0 # Reset counter on zero
else:
counter += 1
result.append(counter)
return result

df_all["time_diff"] = df_all["pkg_dt"].diff().dt.total_seconds() / 60
df_all["counter"] = 0
df_all.loc[df_all["time_diff"] == 2, "counter"] = 1

df_all["cum_sum"] = running_counter(df_all["counter"].values)
# fileter from df_all only the columns that start with pkg, cum_sum or that contain fft
df_all_filter = df_all[[c for c in df_all.columns if c.startswith("pkg") or c.startswith("cum_sum") or "fft" in c or "sub" == c]]

for consec_duration in [2, 5, 10, 20, 50, 100, 200, 500]:
idx_larger_ = np.where(df_all["cum_sum"] >= consec_duration)
# for each index larger, extract the previous conec_duration dataframe and append it to a list
l_df = []
for idx in idx_larger_[0]:
df_ = df_all_filter.iloc[idx-consec_duration:idx]
# for each row, add the rows as columns with "lag_" prefix
l_row = []
for i in range(consec_duration):
if i > 0:
df_ = df_.rename(columns={c: f"lag_{i}_{c}" for c in df_.columns})
l_row.append(df_.iloc[i])
else:
l_row.append(df_.iloc[i])
l_df.append(pd.concat(l_row, axis=0))
df_duration = pd.DataFrame(l_df)
print(f"Duration: {consec_duration}")
print(df_duration.shape)
df_duration.to_csv(os.path.join(PATH_READ, f"all_merged_normed_merge_{consec_duration}_consec.csv"))



PLT_ = False
if PLT_:
# set a column cum_sum to count counter iteratively, but reset when counter is 0
plt.hist(df_all["cum_sum"]*2, bins=50, density=False)
plt.xlabel("Minutes")
plt.title("Histogram of time series lengths")
plt.savefig(os.path.join(PATH_FIGURES, "histogram_time_series_lengths.pdf"))
plt.show(block=True)

# make the upper plot as a cdf
plt.hist(df_all["cum_sum"]*2, bins=50, density=True, cumulative=True, histtype='step')
# flip the y axis
# make the y axis percentage
plt.xlabel("Minutes")
# make x axis log
plt.xlim(0, 610)
# put for each tick a label
plt.title("CDF of time series lengths")
#plt.savefig(os.path.join(PATH_FIGURES, "cdf_time_series_lengths.pdf"))
plt.show(block=True)
105 changes: 105 additions & 0 deletions connectome_lead_mapper_compute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import pickle
import pandas as pd
import numpy as np
from py_neuromodulation import nm_RMAP, nm_stats
from matplotlib import pyplot as plt
import os
import nibabel as nib
import seaborn as sns

PATH_CONNECTIVITY = r"/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per/out_connectivity/out_conn"
PATH_PER = r'/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per/out_dir/df_per_ind_all_coords.csv'

PLT_ = False

df_per = pd.read_csv(PATH_PER, index_col=0)
l_ch_sel = []

for class_idx, CLASSIFICATION in enumerate([True, False]):
plt.figure(figsize=(10, 10))
plt_idx = 0
for label_idx, label_name in enumerate(["pkg_dk", "pkg_bk", "pkg_tremor"]):
for loc_idx, loc in enumerate(["ECOG", "STN", "GP"]):
df_disease = df_per.query(f"classification == {CLASSIFICATION} and label == '{label_name}' and loc == '{loc}'")

subs = np.unique([sub[:-1] for sub in df_disease["sub"].unique()])
df_disease["sub_ind"] = df_disease["sub"].apply(lambda x: x[:-1])
df_disease["hemisphere"] = df_disease["sub"].apply(lambda x: x[-1])
per_test_ = []
pred_corr_test = []


for sub_test in df_disease["sub_ind"].unique():
l_fp_train = []
per_train = []
l_fp_test = []
per_test = []
chs_test = []
hemispheres_test = []

affine = None

for idx, row in df_disease.iterrows():
f_name = f'{row["sub"]}_ROI_{row["ch_orig"]}_conn-PPMI74P15CxPatients_desc-AvgRFz_funcmap.nii'
f_path = os.path.join(PATH_CONNECTIVITY, f_name)
rmapsel = nm_RMAP.RMAPCross_Val_ChannelSelector()
fp = rmapsel.load_fingerprint(f_path)
if affine is None:
affine = rmapsel.affine

if row["sub_ind"] == sub_test:
l_fp_test.append(fp)
per_test.append(row["per"])
chs_test.append(row["ch_orig"])
hemispheres_test.append(row["hemisphere"])
else:
l_fp_train.append(fp)
per_train.append(row["per"])

rmap = nm_RMAP.RMAPCross_Val_ChannelSelector().get_RMAP(np.array(l_fp_train).T, np.array(per_train))

#nm_RMAP.RMAPCross_Val_ChannelSelector().save_Nii(rmap, affine, os.path.join(PATH_CONNECTIVITY, f"rmap_func_{label_name}_class_{CLASSIFICATION}_loc_{loc}.nii"))
corr_test = [np.corrcoef(
np.nan_to_num(rmap.flatten()),
np.nan_to_num(fp.flatten())
)[0][1] for fp in l_fp_test]
per_test_.append(per_test)
pred_corr_test.append(corr_test)

df_test_pr = pd.DataFrame({"per": per_test, "corr": corr_test, "hemisphere": hemispheres_test, "ch": chs_test})

for hem in df_test_pr["hemisphere"].unique():
df_test_sub = df_test_pr.query(f"hemisphere == '{hem}'")
ch_sel = df_test_sub["ch"].values[np.argmax(df_test_sub["corr"].values)]
l_ch_sel.append({
"label": label_name,
"CLASSIFICATION": CLASSIFICATION,
"loc": loc,
"sub": sub_test,
"ch": ch_sel,
"hemisphere": hem,
"corr": np.max(df_test_sub["corr"].values),
"per": df_test_sub.query(f"ch == '{ch_sel}'")["per"].values[0]
})
#chs_test[np.argmax(corr_test)]

plt_idx += 1
if PLT_:
plt.subplot(3, 3, plt_idx)
df_plt = pd.DataFrame({"per": np.concatenate(per_test_), "corr": np.concatenate(pred_corr_test)})
sns.regplot(data=df_plt, x="corr", y="per")
corr___ = np.corrcoef(np.concatenate(per_test_), np.concatenate(pred_corr_test))[0, 1]
print(corr___)
_, p = nm_stats.permutationTestSpearmansRho(np.concatenate(per_test_), np.concatenate(pred_corr_test), False, None, 1000)
plt.title(f"{label_name} {loc}\n" +
f"cor = {np.round(corr___, 2)} p={np.round(p, 3)}"
)
if PLT_:
plt.suptitle(f"CLASS: {CLASSIFICATION}")
plt.tight_layout()
plt.show(block=True)
print("ha")
df_ch_sel = pd.DataFrame(l_ch_sel)
df_ch_sel.to_csv(os.path.join(PATH_CONNECTIVITY, "df_ch_sel_RMAP.csv"))

# df_ch_sel.query("CLASSIFICATION == True and loc == 'STN' and label == 'pkg_dk'").sort_values("sub")
122 changes: 122 additions & 0 deletions connectome_lead_mapper_write_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pandas as pd
import numpy as np
import os

PATH_RMAP_CH = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/out_per/out_connectivity/out_conn/df_ch_sel_RMAP.csv'
PATH_DATA = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_std_10s_window_length_all_ch'
PATH_OUT = '/Users/Timon/Library/CloudStorage/OneDrive-Charité-UniversitätsmedizinBerlin/Shared Documents - ICN Data World/General/Data/UCSF_OLARU/features/merged_rmap'

cortex_ch_names = ['8-9', '8-10', '10-11', '9-11', '8-11', '9-10']
subcortex_ch_names = ['0-2', '0-1', '1-2', '1-3', '2-3', '0-3']

def get_most_recorded_chs(df_all):
# get two most recorded cortex channels
chs_available = [f[:f.find("_raw_")] for f in df_all.columns if "raw_" in f and "mean" in f]
size_ = []
for ch in cortex_ch_names:
if ch in chs_available:
dat_ = df_all[f"{ch}_raw_mean"]
# count the number of non-NaN values
size_.append(dat_.dropna().shape[0])
else:
size_.append(0)
# get the names of the two most recorded channels
ch_cortex_sel = [cortex_ch_names[i] for i in np.argsort(size_)[::-1][:2]]
ch_cortex_sel = sorted(ch_cortex_sel, key=lambda x: int(x.split('-')[0]))

size_ = []
for ch in subcortex_ch_names:
if ch in chs_available:
dat_ = df_all[f"{ch}_raw_mean"]
# count the number of non-NaN values
size_.append(dat_.dropna().shape[0])
else:
size_.append(0)
# get the names of the two most recorded channels
ch_subcortex_sel = [subcortex_ch_names[i] for i in np.argsort(size_)[::-1][:2]]
ch_subcortex_sel = sorted(ch_subcortex_sel, key=lambda x: int(x.split('-')[0]))
return ch_cortex_sel, ch_subcortex_sel

df_rmap_ch = pd.read_csv(PATH_RMAP_CH)

# compute the performance 1. renaming the columns

SELECT_ALL = False
subs = df_rmap_ch["sub"].unique()

if SELECT_ALL:

dfs_save = []
cols = None
for sub in subs:
print(sub)
for hem in ["l", "r"]:
if not os.path.exists(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv")):
continue
df_sub = pd.read_csv(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv"))
ch_cortex, ch_subcortex = get_most_recorded_chs(df_sub)
ch_used = ch_subcortex + ch_cortex
# select all columns of df_sub that contain the ch_used
col_used = [c for c in df_sub.columns if any([ch in c for ch in ch_used]) and "coh" not in c] + list(df_sub.columns[-4:])
df_sub_used = df_sub[col_used]
df_sub_orig = df_sub_used.copy()
ch_names_replace = ["ch_subcortex_1", "ch_subcortex_2", "ch_cortex_1", "ch_cortex_2"]

# for each column replace the ch_used with the ch_names_replace
for ch, ch_name in zip(ch_used, ch_names_replace):
df_sub_used.columns = [c.replace(ch+"_", ch_name+"_") for c in df_sub_used.columns]
if cols is None:
cols = df_sub_used.columns
else:
if not np.all(cols == df_sub_used.columns):
df_sub_used = df_sub_used[cols]

df_sub_used["sub"] = sub+hem
dfs_save.append(df_sub_used)
print(df_sub_used.columns.shape)
if df_sub_used.columns.shape[0] != 2725:
print("break here")
# df_sub_used can be saved

df_all = pd.concat(dfs_save)
df_all.to_csv(os.path.join(PATH_OUT, "all_ch_renamed_no_rmap.csv"))


for CLASSIFICATION in [True, False]:
print(CLASSIFICATION)
for label in ["pkg_dk", "pkg_bk", "pkg_tremor"]:
print(label)
df_l = []
cols = None
for sub in subs:
print(sub)
for hem in ["l", "r"]:
if not os.path.exists(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv")):
continue
df_sub = pd.read_csv(os.path.join(PATH_DATA, f"{sub}{hem}_merged.csv"))

ch_sel = list(df_rmap_ch.query(f"sub == '{sub}' and hemisphere == '{hem}' and label == '{label}' and CLASSIFICATION == {CLASSIFICATION}")["ch"])
# sort the channels
ch_sel = sorted(ch_sel, key=lambda x: int(x.split('-')[0]))
# select only columns that start with one ch in ch_sel
col_used = [c for c in df_sub.columns if any([ch in c for ch in ch_sel]) and "coh" not in c] + list(df_sub.columns[-4:])
df_sub_sel = df_sub[col_used]
ch_replace = ["ch_subcortex_1", "ch_cortex_1"]
for ch, ch_name in zip(ch_sel, ch_replace):
df_sub_sel.columns = [c.replace(ch+"_", ch_name+"_") for c in df_sub_sel.columns]
if cols is None:
cols = df_sub_sel.columns
else:
if len(cols) != len(df_sub_sel.columns):
continue
if not np.all(cols == df_sub_sel.columns):
try:
df_sub_sel = df_sub_sel[cols]
except:
continue
df_sub_sel["sub"] = sub+hem
df_l.append(df_sub_sel)
df_rmap_cond = pd.concat(df_l)
# remove entries where the label is NaN
df_rmap_cond = df_rmap_cond[~df_rmap_cond[label].isna()]
df_rmap_cond.to_csv(os.path.join(PATH_OUT, f"rmap_ch_{label}_class_{CLASSIFICATION}.csv"))
15 changes: 9 additions & 6 deletions figure_27_get_ch_ind_per_all_ch.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,15 @@ def run_channel(sub, ch, ch_idx):
X = X[~idx_nan]
y = y[np.array(~idx_nan)]

y_pr = model_selection.cross_val_predict(
model,
X,
y,
cv=model_selection.KFold(n_splits=3, shuffle=False),
)
try:
y_pr = model_selection.cross_val_predict(
model,
X,
y,
cv=model_selection.KFold(n_splits=3, shuffle=False),
)
except:
continue


if CLASSIFICATION:
Expand Down
Loading

0 comments on commit aa78bc9

Please sign in to comment.