-
Notifications
You must be signed in to change notification settings - Fork 0
/
format_wilcox.py
75 lines (61 loc) · 2.64 KB
/
format_wilcox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import pandas as pd
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
import os
import glob
#Directory where data is located
data_dir = "HC_Analysis/Files_ig/Merge_Output/"
#Dataframe containing metric results
df = pd.read_csv(data_dir + "All_metric.csv")
df["img_type"] = df["img_type"].str[:-4]# because formatted wrong
#Remove redundent results
df = df.drop(["aes_lap", "aes_pst"], axis = 1)
#Remove duplicates if any
df = df.set_index(["pers_id", "moco", "nod", "RR", "shake", "still", "img_type"])
df = df[~df.index.duplicated(keep='first')]
df = df.reset_index()
#Different image sequences to calculate the wilcoxon rank for
img_sequences = df["img_type"].unique()
#Metrics to calculate for
metrics = ["tg", "coent", "aes"]
#different motions to calculate it for
motions = ["nod", "still"]
#nod shake and still are mutually disjoint, only one column
#is nonzero at a time.
def calc_wilcox_rank(df, df_to_merge, metric, motion, sequence, re_ac):
sub_df = df.copy()
sub_df = sub_df.loc[sub_df["img_type"] == sequence]
sub_df = sub_df.loc[sub_df[motion] == 1]
sub_df = sub_df.loc[sub_df["RR"] == re_ac]
if sub_df.shape[0]>0:
sub_df = sub_df[["pers_id", "moco"] + [metric]].pivot(index = "pers_id", columns = "moco", values = metric)
stat, pval = wilcoxon(x = sub_df[0], y = sub_df[1])
pvaldata = {"metric": [metric], "img_type": [sequence], "RR": [re_ac], "motion": [motion], "pvalue": [pval]}
pval_df = pd.DataFrame.from_dict(pvaldata)
merged_df = pd.concat([df_to_merge, pval_df])
return merged_df
else: return df_to_merge
#dataframe to contain all wilcoxon pvalues
wilcox_df = pd.DataFrame()
for racq in [0,1]:
for sequence in img_sequences:
for metric in metrics:
for motion in motions:
wilcox_df = calc_wilcox_rank(df = df, df_to_merge = wilcox_df, metric = metric,
motion = motion, sequence = sequence, re_ac = racq)
wilcox_df["pvalue_cor"] = 0
for metric in wilcox_df["metric"].unique():
for seq in wilcox_df["img_type"].unique():
p_values = wilcox_df.loc[(wilcox_df["metric"] == metric) & (wilcox_df["img_type"] == seq)]["pvalue"]
rej, p_values_cor, _, __ = multipletests(p_values, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)
wilcox_df.loc[(wilcox_df["metric"] == metric) & (wilcox_df["img_type"] == seq) , "pvalue_cor"] = p_values_cor
print(rej)
print()
print()
print(wilcox_df)
print()
print("----------------")
print("saving dataframe")
print("----------------")
wilcox_df.to_csv("wilcox_values.csv", index = False)