-
Notifications
You must be signed in to change notification settings - Fork 2
/
07_evaluation_across_contexts.py
155 lines (130 loc) · 8.74 KB
/
07_evaluation_across_contexts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import numpy as np
from config import conf
import os, sys
from config import names as gs
import pandas as pd
truth = np.genfromtxt(conf.binned_personality_file, skip_header=1, usecols=xrange(1, conf.n_traits+1), delimiter=',')
# all comparisons to perform. Each has
# a name,
# two annotation values that determine if classifiers trained on all data or on specific subsets only will be examined;
# names for both tasks to compare
comparisons = dict({'split halves': [conf.annotation_all, conf.annotation_all, 'first half', 'second half'],
'two ways': [conf.annotation_ways, conf.annotation_ways, 'way there', 'way back'],
'way vs shop in general classifier': [conf.annotation_all, conf.annotation_all, 'both ways' ,'shop'],
'way vs shop in specialised classifier': [conf.annotation_ways, conf.annotation_shop, 'both ways', 'shop'],
'way in specialised classifier vs way in general classifier': [conf.annotation_ways, conf.annotation_all, 'both ways', 'both ways'],
'shop in specialised classifier vs shop in general classifier': [conf.annotation_shop, conf.annotation_all, 'shop', 'shop']
})
def get_majority_vote(predictions):
if len(predictions) == 0:
return -1
(values, counts) = np.unique(predictions, return_counts=True)
ind = np.argmax(counts)
return values[ind]
def get_average_correlation(predA, predB, m_iter):
"""
:param predA: predictions for task A, n_participants x m_iter
:param predB: predictions for task B, n_participants x m_iter
:return:
"""
correlations = []
for si in xrange(0, m_iter):
if predB.ndim == 1:
if np.sum(predA[:,si]) > 0:
A = predA[:,si]
B = predB
consider = (A>0)
A = A[consider]
B = B[consider]
else:
continue
else:
if np.sum(predA[:,si]) > 0 and (np.sum(predB[:,si]) > 0):
A = predA[:,si]
B = predB[:,si]
consider = (A>0) & (B>0)
A = A[consider]
B = B[consider]
else:
continue
correlation = np.corrcoef(np.array([A, B]))[0][1]
correlations.append(correlation)
avg = np.tanh(np.mean(np.arctanh(np.array(correlations))))
return avg
if __name__ == "__main__":
# check if the output target folder already exists and create if not
if not os.path.exists(conf.figure_folder):
os.mkdir(conf.figure_folder)
# collect masks for each participant, annotation (all data, shop, way), window size and subset in question (e.g. first half, or way to the shop)
# each mask is True for samples of a particular participant and subset; False for all others
window_masks = []
for wsi in xrange(0, len(conf.all_window_sizes)):
x_file, y_file, id_file = conf.get_merged_feature_files(conf.all_window_sizes[wsi])
for annotation_value in conf.annotation_values:
ids_ws = np.genfromtxt(id_file, delimiter=',', skip_header=1).astype(int)
if annotation_value == conf.annotation_shop:
ids_ws = ids_ws[ids_ws[:, 1] == conf.time_window_annotation_shop, :]
elif annotation_value == conf.annotation_ways:
ids_ws = ids_ws[(ids_ws[:, 1] == conf.time_window_annotation_wayI) | (ids_ws[:, 1] == conf.time_window_annotation_wayII), :]
for p in xrange(0, conf.n_participants):
ids_ws_p = ids_ws[(ids_ws[:, 0] == p), :]
window_masks.append([annotation_value, p, wsi, 'first half', ids_ws_p[:, 2] == conf.time_window_annotation_halfI])
window_masks.append([annotation_value, p, wsi, 'second half', ids_ws_p[:, 2] == conf.time_window_annotation_halfII])
window_masks.append([annotation_value, p, wsi, 'way there', ids_ws_p[:, 1] == conf.time_window_annotation_wayI])
window_masks.append([annotation_value, p, wsi, 'way back', ids_ws_p[:, 1] == conf.time_window_annotation_wayII])
window_masks.append([annotation_value, p, wsi, 'shop', ids_ws_p[:, 1] == conf.time_window_annotation_shop])
window_masks.append([annotation_value, p, wsi, 'both ways', np.logical_or(ids_ws_p[:, 1] == conf.time_window_annotation_wayI,ids_ws_p[:, 1] == conf.time_window_annotation_wayII)])
window_masks_df = pd.DataFrame(window_masks, columns=['annotation', 'participant', 'window size index', 'subtask', 'mask'])
# collect predictions for each participant and each setting that is interesting for one of the comparisons
# Results are directly written into figures/table1-5.csv
with open(conf.figure_folder + '/table1-5.csv', 'w') as f:
f.write('comparison')
for trait in xrange(0, conf.n_traits):
f.write(',' + conf.medium_traitlabels[trait])
f.write('\n')
for comp_title, (annotation_value_I, annotation_value_II, subtaskI, subtaskII) in comparisons.items():
f.write(comp_title)
result_filename = conf.result_folder + '/predictions_' + comp_title.replace(' ','_') + '.npz'
if not os.path.exists(result_filename):
print 'computing data for', comp_title
print 'Note taht this might take a while - if the script is run again, intermediate results will be available and speed up all computations.'
predictions_I = np.zeros((conf.n_participants, conf.n_traits, conf.max_n_iter), dtype=int)
predictions_II = np.zeros((conf.n_participants, conf.n_traits, conf.max_n_iter), dtype=int)
for trait in xrange(0, conf.n_traits):
for si in xrange(0, conf.max_n_iter):
filenameI = conf.get_result_filename(annotation_value_I, trait, False, si, add_suffix=True)
filenameII = conf.get_result_filename(annotation_value_II, trait, False, si, add_suffix=True)
if os.path.exists(filenameI) and os.path.exists(filenameII):
dataI = np.load(filenameI)
detailed_predictions_I = dataI['detailed_predictions']
chosen_window_indices_I = dataI['chosen_window_indices']
dataII = np.load(filenameII)
detailed_predictions_II = dataII['detailed_predictions']
chosen_window_indices_II = dataII['chosen_window_indices']
for p, window_index_I, window_index_II, local_detailed_preds_I, local_detailed_preds_II in zip(xrange(0, conf.n_participants), chosen_window_indices_I, chosen_window_indices_II, detailed_predictions_I, detailed_predictions_II):
maskI = window_masks_df[(window_masks_df.annotation == annotation_value_I) &
(window_masks_df.participant == p) &
(window_masks_df['window size index'] == window_index_I) &
(window_masks_df.subtask == subtaskI)
].as_matrix(columns=['mask'])[0][0]
maskII = window_masks_df[(window_masks_df.annotation == annotation_value_II) &
(window_masks_df.participant == p) &
(window_masks_df['window size index'] == window_index_II) &
(window_masks_df.subtask == subtaskII)
].as_matrix(columns=['mask'])[0][0]
predictions_I[p, trait, si] = get_majority_vote(np.array(local_detailed_preds_I)[maskI])
predictions_II[p, trait, si] = get_majority_vote(np.array(local_detailed_preds_II)[maskII])
else:
print 'did not find', filenameI, 'or', filenameII
sys.exit(1)
np.savez(result_filename, predictions_I=predictions_I, predictions_II=predictions_II)
else:
data = np.load(result_filename)
predictions_I = data['predictions_I']
predictions_II = data['predictions_II']
# predictions_I are predictions from one context, predictions_II is the other context
# compute their average correlation and write it to file
for t in xrange(0, conf.n_traits):
corrI = get_average_correlation(predictions_I[:, t, :], predictions_II[:, t, :], 100)
f.write(','+'%.2f'%corrI)
f.write('\n')