-
Notifications
You must be signed in to change notification settings - Fork 0
/
gradCAMer.py
314 lines (254 loc) · 13.1 KB
/
gradCAMer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
import io
import os
import pickle
import pandas
import numpy as np
import scipy.io as sio
import torch
from scipy.io import loadmat
from scipy.ndimage import zoom
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM
"""""""""""""""""""""""""""""
SUMMARY
"""""""""""""""""""""""""""""
"""
THIS SCRIPT WILL TAKE THE RESULTS AUTOMATICALLY GENERATED BY VOLDNN.PY AND RETURN SOME PERFORMANCE METRICS,
TEST FOLD INDICES, AND GRADCAM++ MAPS. SAVES ALL OF THIS DATA TO A .MAT FILE.
1. MAKE SURE YOU HAVE grad-cam installed
2. EDIT USER DEFINED SETTINGS BELOW
3. LOOK AT OUTPUT:
* LOAD IN {INPUT_FILE}_COMBINED_gradCAM.mat
Data saved per repeat:
* all_fold: test fold indices; rows are subjects, cols are repeats
* all_probs: direct network model outputs (probabilities); rows are subjects, cols are repeats
* all_predictions: network outputs converted to labels; rows are subjects, cols are repeats
* all_true_labels: actual subject labels; rows are subjects, cols are repeats (each col should be identical)
* all_gradcam_maps: gradcam++ maps for the predicted label; stored as: (subjects,x,y,z,repeat)
* all_gradcam_maps_oppo: gradcam++ maps for the opposite of the predicted label; stored as: (subjects,x,y,z,repeat)
Metrics computed per repeat:
* all_f1: f1 scores
* all_acc: accuracy
* all_auc: area under the curve
Metrics saved per fold:
* folds_f1: f1 scores (rows are test folds, cols are repeats)
* folds_acc: accuracy (rows are test folds, cols are repeats)
* folds_auc: area under the curve (rows are test folds, cols are repeats)
WARNING: ASSUMING BINARY CLASSIFICATION
"""
"""""""""""""""""""""""""""""
USER DEFINED SETTINGS
"""""""""""""""""""""""""""""
# general settings
iF = 'FifthTry_CV_Rep_' # provide the base name of the file(s) that contain data to load in (give string prior to ?.pkl where ? is a number)
plot = False # should we plot gradcam++ maps?
priorityMetricTest = 'loss' # which metric was used for storing the model? loss or F1? (critical for retrieving correct model state)
targetl = torch.nn.Conv3d # which layer to target (last instance) for grad cam++
kfo = 6 # how many outer folds did you use?
# preprocess your data correctly (i.e., identical in fashion to what you did in volDNN.py)
inmat = loadmat('PyTorchInput_withFolds.mat') # this is my data, load yours however
yT = inmat['wabClassi'].flatten() # put your classes in a variable called yT. Here I flatten it, you may not have to. Check if your data has more than one dimension. It shouldn't.
yF = inmat['wabClass2i'].flatten() # for your data, make this identical to yT like the above line 130. This is the vector on which stratitfication will be based. I was experimenting with using more granular classes for stratification while still using yT for testing
x = inmat['data'] # this is a 4D predictor matrix. It needs to be shaped as such: (image dimension 1, image dimension 2, image dimension 3, subjects)
x = np.delete(x,[35,37,73,74,160],axis=3)
yF = np.delete(yF,[35,37,73,74,160],axis=0)
yT = np.delete(yT,[35,37,73,74,160],axis=0)
del (inmat)
# reshape input data and rescale if necessary
xr = np.reshape(x, (x.shape[0] * x.shape[1] * x.shape[2], x.shape[3]))
# get lesion size for strat
lsz = np.sum(xr == 4, axis=0)
dc = pandas.qcut(lsz,q=3,labels=False)
yC = yF * 10 + dc
un = np.unique(yC, return_counts=True)
id = yC==11
yC[id]=12
id = yC==30
yC[id]=31
id = yC==40
yC[id]=42
id = yC==41
yC[id]=42
yF = yC
sc = MinMaxScaler(feature_range=(-1,1))
sc.fit(xr)
xr = sc.transform(xr)
"""""""""""""""""""""""""""""
AUTOMATICALLY FIND AND LOAD IN FILES
"""""""""""""""""""""""""""""
path, filename = os.path.split(iF)
if not path:
path = os.getcwd()
files = [f for f in os.listdir(path) if f.endswith('.pkl')]
iF_files = [file for file in files if file.startswith(filename)]
arep = []
for file in iF_files:
# Extract the part of the filename between the first underscore and the first period
ns = file.rsplit('_', 1)[-1].split('.')[0]
# Convert the extracted string to a number
num = int(ns)
# Add the number to the list of numbers
arep.append(num)
# get number of reps you ran automatically
reps = np.max(arep)
# Custom unpickler in case of cpu/gpu mismatch between environment in which pytorch model was saved and current environment
class CustomUnpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == "torch.storage" and name == "_load_from_bytes":
def _load_from_bytes(b):
return torch.load(io.BytesIO(b), map_location=torch.device('cpu'))
return _load_from_bytes
return super().find_class(module, name)
"""""""""""""""""""""""""""""
DATA EXTRACTOR
"""""""""""""""""""""""""""""
def torchDataExtractor(id, xr, y, x, mbs, nm, sampType='standard'):
"""
EXTRACT INDICES FROM RESHAPED BRAIN DATA (TO 2D MATRIX), CONVERT THE RESULT BACK TO 4D MATRIX AND PREPROCESS MATRIX FOR DNN IN PYTORCH
- I.E., DO SOME MATRIX RESHAPING AND SETUP TENSORDATASET, SAMPLER, AND DATALOADER
- ASSUMES IN_CHANNELS IS 1
id is a set of indices for xr. xr is a predictor matrix where rows are samples (for id), cols are subjects/images.
x is the *size* of the 4d matrix of original images that were reshaped into xr. Rows in xr map onto 1st, 2nd and 3rd dims of x that were collapsed.
yT are samples
mbs is the mini batch size
output: tensor dataset for pytorch
"""
tmp = xr[:, id]
tmp = np.reshape(tmp, (x[0], x[1], x[2], tmp.shape[1]))
tmp = np.transpose(tmp, (3, 0, 1, 2))
tmp = torch.from_numpy(np.expand_dims(tmp, axis=1))
tmp2 = torch.from_numpy(np.transpose(y[id])).to(torch.uint8)
tmp3 = tmp2.long()
if sampType == 'balanced':
# this implements a sampler with replacement to have balanced minibatches
cw = torch.tensor([1 / class_count for class_count in torch.bincount(tmp3)])
cw = cw.view(1, -1)
sw = cw[:, tmp3].squeeze()
samp = WeightedRandomSampler(weights=sw, num_samples=len(sw), replacement=True)
drp = False
elif sampType == 'standard':
samp = None
drp = False
o = DataLoader(TensorDataset(tmp.float(), tmp2.float()), batch_size=mbs, sampler=samp, shuffle=False, drop_last=drp,
num_workers=nm, pin_memory=True)
return o
"""""""""""""""""""""""""""""
CUSTOM LOSS FUNCTION FOR COMPATIBILITY WITH GRAD-CAM
"""""""""""""""""""""""""""""
class CustomLoss:
def __init__(self, target_class):
self.target_class = target_class
def __call__(self, output):
return output[0] if self.target_class == 1 else -output[0]
def __iter__(self):
yield self
"""""""""""""""""""""""""""""
VARIABLES THAT WE WILL TRACK
"""""""""""""""""""""""""""""
all_gradcam_maps = np.zeros((x.shape[3],x.shape[0],x.shape[1],x.shape[2],reps))
all_gradcam_maps_oppo = np.zeros((x.shape[3],x.shape[0],x.shape[1],x.shape[2],reps))
all_predictions = np.zeros((xr.shape[1],reps))
all_probs = np.zeros((xr.shape[1],reps))
all_true_labels = np.zeros((xr.shape[1],reps))
all_accuracies = np.zeros((reps))
all_f1 = np.zeros((reps))
all_acc = np.zeros((reps))
all_auc = np.zeros((reps))
all_fold = np.zeros((xr.shape[1],reps))
folds_f1 = np.zeros((kfo,reps))
folds_acc = np.zeros((kfo,reps))
folds_auc = np.zeros((kfo,reps))
"""""""""""""""""""""""""""""
START LOOP
"""""""""""""""""""""""""""""
for i in range(1, reps + 1): # LOOP THROUGH REPEATS
file_name = f"{path}{os.path.sep}{iF}{i}.pkl" # LOAD IN PKL FILE FOR THIS REPEAT AND PLACE ON CORRECT DEVICE
with io.open(file_name, 'rb') as file:
if torch.cuda.is_available():
data = pickle.load(file)
else:
data = CustomUnpickler(file).load()
sds = data['sds'][i - 1] # GET RANDOM SEEDS FOR THIS REPEAT TO REPLICATE FOLDS
xr = np.transpose(xr) # TRANSPOSE JUST FOR STRATIFIEDKFOLD
outer_cv = StratifiedKFold(n_splits=data['kfo'], shuffle=True, random_state=sds)
nested_indices = [] # GENERATE FOLDS
for outer_train_idx, outer_test_idx in outer_cv.split(xr, yF):
nested_indices.append((outer_test_idx))
xr = np.transpose(xr) # RETURN DATA TO ORIGINAL STATE
for o in range(kfo): # LOOP THROUGH OUTER FOLDS
tei = torchDataExtractor(nested_indices[o], xr, yT, x.shape, data['mbs'], 0, sampType='standard') # EXTRACT THE CORRECT DATA AND PLACE IN DATALOADER
if priorityMetricTest == 'F1': # GET CORRECT MODEL STATE TO LOAD
modelstate = data['modelStateTracker'][o]
elif priorityMetricTest == 'loss':
modelstate = data['modelStateTracker2'][o]
model = data['modelTracker'][o] # SETUP MODEL FOR TESTING
model.load_state_dict(modelstate)
model.eval()
if torch.cuda.is_available(): # ENSURE DEVICE COMPATIBILITY
model.to('cuda')
else:
model.to('cpu')
for ii, layer in enumerate(model.features): # IDENTIFY INDEX OF LAYER THAT WE WILL USE FOR FEATURE MAPS
if isinstance(layer, targetl):
last_conv_index = ii
cam = GradCAMPlusPlus(model, [model.features[last_conv_index]]) # SETUP FEATURES TO MAP
gradcam_maps = [] # STATS TO TRACK OVER MINI BATCHES
gradcam_maps_oppo = [] # STATS TO TRACK OVER MINI BATCHES
predictions = []
true_labels = []
probs = []
for inputs, labels in tei:# ACROSS MINI BATCHES
if torch.cuda.is_available(): # PLACE INPUT TENSOR ON CORRECT DEVICE
input_tensor = inputs.cuda()
else:
input_tensor = inputs.to('cpu')
output = model(input_tensor) # GET MODEL OUTPUTS
preds = torch.sigmoid(output).detach().cpu().numpy() # THIS IS BINARY CLASSIFICATION SO WE NEED SIGMOID
pred_labels = (preds > 0.5).astype(int) # CONVERT PROBABILITIES TO LABELS
predictions.extend(pred_labels.tolist()) # SAVE THE ABOVE TO LISTS
probs.extend(preds.tolist())
true_labels.extend(labels.tolist())
for ii in range(input_tensor.size(0)): # FOR EACH PREDICTION GET GRADCAM MAP
target_class = int(preds[ii] > 0.5)
custom_loss = CustomLoss(target_class)
mask = cam(input_tensor[ii].unsqueeze(0), custom_loss)
mask = np.squeeze(mask)
zoom_factors = (
input_tensor.size(2) / mask.shape[0],
input_tensor.size(3) / mask.shape[1],
input_tensor.size(4) / mask.shape[2]
)
mask = zoom(mask, zoom_factors, order=1)
if target_class == 1:
custom_loss2 = CustomLoss(0)
elif target_class == 0:
custom_loss2 = CustomLoss(1)
mask2 = cam(input_tensor[ii].unsqueeze(0), custom_loss2)
mask2 = np.squeeze(mask2)
zoom_factors2 = (
input_tensor.size(2) / mask2.shape[0],
input_tensor.size(3) / mask2.shape[1],
input_tensor.size(4) / mask2.shape[2]
)
mask2 = zoom(mask2, zoom_factors2, order=1)
gradcam_maps.append(mask)
gradcam_maps_oppo.append(mask2)
# save out everything
all_fold[nested_indices[o],i-1] = o+1
all_probs[nested_indices[o],i-1] = np.array(probs).flatten()
all_true_labels[nested_indices[o],i-1] = np.array(true_labels).flatten()
all_predictions[nested_indices[o],i-1] = np.array(predictions).flatten()
folds_f1[o,i-1] = f1_score(all_true_labels[nested_indices[o],i-1], all_predictions[nested_indices[o],i-1], pos_label=1)
folds_acc[o,i-1] = accuracy_score(all_true_labels[nested_indices[o],i-1], all_predictions[nested_indices[o],i-1])
folds_auc[o,i-1] = roc_auc_score(all_true_labels[nested_indices[o],i-1], all_predictions[nested_indices[o],i-1])
all_gradcam_maps[nested_indices[o], :, :, :, i-1] = np.array(gradcam_maps)
all_gradcam_maps_oppo[nested_indices[o], :, :, :, i-1] = np.array(gradcam_maps_oppo)
all_f1[i-1] = f1_score(all_true_labels[:,i-1], all_predictions[:,i-1], pos_label=1)
all_acc[i-1] = accuracy_score(all_true_labels[:,i-1], all_predictions[:,i-1])
all_auc[i-1] = roc_auc_score(all_true_labels[:,i-1], all_predictions[:,i-1])
print(f"F1 is: {all_f1[i-1]}")
sio.savemat(f"{iF} COMBINED_gradCAM.mat", {'all_f1': all_f1, 'all_acc': all_acc,'all_auc': all_auc,'all_fold': all_fold,'all_probs': all_probs ,'all_true_labels': all_true_labels ,'all_predictions': all_predictions, 'folds_f1':folds_f1,'folds_acc': folds_acc, 'folds_auc': folds_auc, 'all_gradcam_maps': all_gradcam_maps, 'all_gradcam_maps_oppo': all_gradcam_maps_oppo})