-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathutil_basket.py
217 lines (183 loc) · 7.62 KB
/
util_basket.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# -*- coding: utf-8 -*-
"""
Created on Sun May 8 17:35:13 2016
@author: rob
"""
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
def plot_basket(data,labels, extra_title=' '):
"""Utility function to plot XYZ trajectories of the basketbal data
Input:
- data: takes any 3D array, the first three indices of the second dimension must correspond to X, Y and Z
- labels: the correct (binary) labels for the sequences. Same size as first dimension of data
- extra_title: string to append to the title
Output
- Plots =)
#Credits for this code go to
# http://matplotlib.org/mpl_toolkits/mplot3d/tutorial.html
mpl.rcParams['legend.fontsize'] = 10
"""
fig = plt.figure()
ax = fig.gca(projection='3d')
N,crd,sl = data.shape
if crd>sl:
data = np.transpose(data,[0,2,1])
print('We transpose dimension 1 and 2')
P = 100 #How many lines you want?
ind = np.random.permutation(N)
for p in ind[:P]:
if labels[p] == 1:
ax.plot(data[p,0,:], data[p,1,:], data[p,2,:],'r', label='miss')
elif labels[p] == 0:
ax.plot(data[p,0,:], data[p,1,:], data[p,2,:],'b', label='hit')
elif labels[p] == 2:
ax.plot(data[p,0,:], data[p,1,:], data[p,2,:],'c', label='lc hit')
elif labels[p] == 3:
ax.plot(data[p,0,:], data[p,1,:], data[p,2,:],'m', label='lc miss')
#Next lines serve to only plot one legend per label
handles, labels = ax.get_legend_handles_labels()
newLabels, newHandles = [], []
for handle, label in zip(handles, labels):
if label not in newLabels:
newLabels.append(label)
newHandles.append(handle)
ax.legend(newHandles, newLabels)
plt.title('Blue=hit, red=miss '+extra_title)
plt.show()
def shuffle_basket(X,order,ind):
"""Function in order to calculate variable importance during session
The function takes in
- X, the data. Expected a 3D Tensor of N x crd x sl
- N = number of samples
- crd = how many coordinates we have
- sl = sequence length
- order, the kind of variable we check
- 'crd' we will random shuffle different coordinates
- 'sl' we will random shuffle different sequence indices
- ind specifies the exact index along order to shuffle
"""
X = X.copy()
if order == 'crd':
extract = X[:,ind,:].copy() #Extracted 2D tensor N x sl
np.random.shuffle(extract)
X[:,ind,:] = extract
elif order == 'sl':
extract = X[:,:,ind].copy() #Extracted 2D tensor N x sl
np.random.shuffle(extract)
X[:,:,ind] = extract
return X
def plot_vi(vi_crd, vi_sl):
# Four axes, returned as a 2-d array
D = vi_crd.shape[1]
#Make color array to color baseline different
color = ['b'] * (D)
color[D-1] = 'r'
## TODO For now, the axes are defined with numerics, in future we might add
# a better heuristic here
f, axarr = plt.subplots(2, 2)
axarr[0, 0].bar(xrange(D),vi_crd[0],color=color)
axarr[0, 0].set_title('Accuracy - crd')
axarr[0, 0].axis([0,D,0.6,1.0])
axarr[0, 1].bar(xrange(D),vi_crd[1],color=color)
axarr[0, 1].set_title('Cost - crd')
axarr[0, 1].axis([0,D,0.3,1.0])
if vi_sl is not None:
# Four axes, returned as a 2-d array
D = vi_sl.shape[1]
#Make color array to color baseline different
color = ['b'] * (D)
color[D-1] = 'r'
axarr[1, 0].bar(xrange(D),vi_sl[0],color=color)
axarr[1, 0].set_title('Accuracy - sl')
axarr[1, 0].axis([0,D,0.6,1.0])
axarr[1, 1].bar(xrange(D),vi_sl[1],color=color)
axarr[1, 1].set_title('Cost - sl')
axarr[1, 1].axis([0,D,0.3,1.0])
# # Fine-tune figure; hide x ticks for top plots and y ticks for right plots
# plt.setp([a.get_xticklabels() for a in axarr[0, :]], visible=False)
# plt.setp([a.get_yticklabels() for a in axarr[:, 1]], visible=False)
def conf_ind(conf_correct,y_val,am,which_conf):
"""Computes indices per class for which we have highest confidence
Input arguments
- conf_correct: Confidence at the correct target. That is the probability that the Softmax classifiers outputs at thus sample
- y_val the true labels
- am: how many indices per class you want?
- which_conf: string label for which confidence you want?
-- hc for high confidence
-- lc for low confidence
returns
- conf_pos: indices at pos class for which we have high confidence
- hc_ned: indices at nex class for which we have high confidence
Note that this function only works for binary classes
"""
N = len(y_val) #How many samples we got?
if which_conf == 'hc':
conf_ind = np.argsort(conf_correct)[::-1] #indices for which we have the highest confidence.
# descending order, becase we are interested in high confidence
count = 0
elif which_conf == 'lc':
conf_ind = np.argsort(conf_correct)
count = 0
#We only want indices for which the confidence is at least 0.5
while conf_correct[conf_ind[count]] < 0.5:
count +=1
y_val.astype(int) #change so that we can use it for indexing
#Set up some variables
conf_pos = np.zeros((am))
ipos = 0
conf_neg = np.zeros((am))
ineg = 0
while count<N: #while we're still in the array
if ipos<am and y_val[conf_ind[count]] == 0:
#positive sample
conf_pos[ipos] = conf_ind[count]
ipos +=1
if ineg<am and y_val[conf_ind[count]] == 1:
conf_neg[ineg] = conf_ind[count]
ineg +=1
count +=1
return conf_pos.astype(int), conf_neg.astype(int)
def plot_grad(data,labels,grad):
""" Plots the gradient over the input trajectory wrt the cross-entropy cost
Input:
- data: the 3D Tensor containing the original trajectories. The first three indices
of the second dimension must correspond to X Y and Z
- labels: the corresponding target labels
- grad: The gradient over the trajectory. Due to Tensorflow's default for 4D Tensors,
also this input can be four dimensional
Output
- Plots =)
"""
if len(grad.shape) == 4: #remove the fourth dimension. SHould be empty
grad = np.squeeze(grad,axis=3)
fig = plt.figure()
ax = fig.gca(projection='3d')
N = data.shape[0]
P = np.min((100,N)) #How many lines you want?
ind = np.random.permutation(N)
for p in ind[:P]:
if labels[p] == 1:
ax.plot(data[p,0,:], data[p,1,:], data[p,2,:],'r', label='miss')
length_quiver = np.linalg.norm(grad[p,:3,:],axis=0)
ax.quiver(data[p,0,:], data[p,1,:], data[p,2,:],grad[p,0,:],grad[p,1,:],grad[p,2,:],length = 1.0, pivot = 'middle')
else:
ax.plot(data[p,0,:], data[p,1,:], data[p,2,:],'b', label='hit')
ax.quiver(data[p,0,:], data[p,1,:], data[p,2,:],grad[p,0,:],grad[p,1,:],grad[p,2,:],length = 1.0, pivot = 'middle')
plt.title('Blue=hit, red=miss')
print('Red lines are misses, blue lines are hits')
plt.show()
def abs_to_off(data):
"""Converts absolute data to offset data
Every first vector in the sequence is a zero vector
Input
- data in size [N by coordinates by sequence_length]
Output
- offset data in size [N by coordinates by sequence_length]"""
assert len(data.shape) == 3, 'abs_to_off() expects three dimensional matrices'
off = np.zeros_like(data)
sl = data.shape[2]
off[:,:,1:] = data[:,:,1:] - data[:,:,:sl-1]
return off