-
Notifications
You must be signed in to change notification settings - Fork 12
/
architecture.py
262 lines (239 loc) · 10.7 KB
/
architecture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import torch
from utils import *
from Layer import *
from Model import *
import copy
import numpy as np
LINEAR_THRESHOLD = 50000
def applyActionsShrinkage(m, action, inp, lookup):
#if m.fixed:
# return resizeToFit(Layer(m), inp)
# Get representation
# Perform updates
_, k, s, o, p = Layer(m).getRepresentation()
k = max(int(k * lookup[action[1]]), 1) if m.fixed[1] == False else k
s = max(int(s * lookup[action[2]]), 1) if m.fixed[2] == False else s
o = max(int(o * lookup[action[3]]), 10) if m.fixed[3] == False else o
p = int(p * lookup[action[4]]) if m.fixed[4] == False else p
in_channels = inp.size(1)
cn = m.__class__.__name__
if cn == 'Linear':
in_channels = inp.view(inp.size(0), -1).size(1)
if in_channels > LINEAR_THRESHOLD or in_channels < 10:
print('Linear layer too large')
return None
return resizeLayer(m, in_channels, o, kernel_size=k, stride=s, padding=p)
class Architecture:
def __init__(self, mode, model, datasetInputTensor, datasetName, LINEAR_THRESHOLD=50000, baseline_acc=.5, lookup=None):
self.mode = mode
self.model = model
self.datasetInputTensor = datasetInputTensor
self.a = 0
self.inp = None
self.LINEAR_THRESHOLD = LINEAR_THRESHOLD
self.datasetName = datasetName
self.parentSize = numParams(model)
self.baseline_acc = baseline_acc
self.lookup = lookup
def traverse_removal(self, parent, m, m_name, actions):
classname = m.__class__.__name__
if classname in ['Sequential', 'BasicBlock', 'Bottleneck', 'ResNet', 'VGG', 'LeNet', 'mnist_model', 'Model', 'ResNetModifiable', 'BasicBlockModifiable']:
child = createParentContainer(m)
for i in m._modules.keys():
if i == 'shortcut':
continue
res = self.traverse_removal(child, m._modules[i], i, actions)
if res == None:
return None
if classname not in ['ResNet', 'VGG', 'LeNet', 'mnist_model', 'Model', 'ResNetModifiable']:
parent.add_module(m_name, child)
else:
return child
else:
# childless layers -> we can shrink/remove these
if classname == 'Linear':
self.inp = self.inp.view(self.inp.size(0), -1)
if self.inp.size(1) > self.LINEAR_THRESHOLD:
return None
# perform removal/shrinkage
# if self.mode == 'removal':
# add if ok
# else if self.mode == 'shrinkage':
# shrink layer
# add if ok
# else
if m.fixed or actions[self.a]:
m = resizeToFit(Layer(m), self.inp).cuda()
self.inp = m(self.inp)
parent.add_module(m_name, m)
self.a += 1
return True
def traverse_shrinkage(self, parent, m, m_name, actions):
classname = m.__class__.__name__
if classname in ['Sequential', 'BasicBlock', 'Bottleneck', 'ResNet', 'VGG', 'LeNet', 'Model', 'ResNetModifiable', 'BasicBlockModifiable']:
# Change the number of input channels of the first conv of the shortcut layer
oldInp = Variable(copy.deepcopy(self.inp.data))
child = createParentContainer(m)
if classname in ['BasicBlock', 'BottleNeck', 'BasicBlockModifiable']:
self.fixBlockLayers(m)
m = self.processBlock(actions, m, self.lookup, self.inp.size(1)).cuda()
self.inp = m.layers(self.inp.cuda())
child = m
else:
for i in m._modules.keys():
res = self.traverse_shrinkage(child, m._modules[i], i, actions)
if res == None:
return None
# Change the number of output channels of the last conv of the shortcut layer
if classname not in ['ResNet', 'VGG', 'LeNet', 'Model', 'ResNetModifiable']:
child(oldInp)
parent.add_module(m_name, child)
return True
else:
return child
else:
if classname == 'Linear':
self.inp = self.inp.view(self.inp.size(0), -1)
#print(inp.size(1))
if self.inp.size(1) > LINEAR_THRESHOLD or self.inp.size(1) < 10:
print('Linear layer too large')
return None
action = actions[self.a][:]
m = applyActionsShrinkage(m, action, self.inp, self.lookup)
if m == None:
return None
try:
self.inp = m.cuda()(self.inp)
except:
print('Error in model, probably because of receptive field size')
return None
parent.add_module(m_name, m)
self.a += 1
return True
def processBlock(self, actions, m, lookup, input_size):
finalAction = actions[self.a+len(m.layers._modules)-1][3]
finalActionUsed = False
secondFinalAction = actions[self.a+len(m.layers._modules)-2][3]
secondFinalActionUsed = False
firstConv = False
secondConv = False
hasShortcut = False
if '0' in m.layers._modules:
firstConv = True
if '3' in m.layers._modules:
secondConv = True
if hasattr(m, 'shortcut') and m.shortcut != None:
hasShortcut = True
o = input_size
if firstConv:
i = input_size#m.layers._modules['0'].in_channels
k = m.layers._modules['0'].kernel_size
s = m.layers._modules['0'].stride
o = m.layers._modules['0'].out_channels
if secondConv:
o = max(int(o * self.lookup[finalAction]), 10)
finalActionUsed = True
elif hasShortcut:
o = max(int(o * self.lookup[finalAction]), 10)
si = i
sk = m.shortcut._modules['0'].kernel_size
ss = m.shortcut._modules['0'].stride
sp = m.shortcut._modules['0'].padding
m.shortcut._modules['0'] = resizeLayer(m.shortcut._modules['0'], si, o, sk, ss, sp).cuda()
m.shortcut._modules['1'] = resizeLayer(m.shortcut._modules['1'], o, o).cuda()
finalActionUsed = True
else:
# We want output to be same as input in the event of no shortcut and no secondConv
o = i
p = m.layers._modules['0'].padding
m.layers._modules['0'] = resizeLayer(m.layers._modules['0'], i, o, k, s, p).cuda()
if '1' in m.layers._modules:
m.layers._modules['1'] = resizeLayer(m.layers._modules['1'], o, o).cuda()
if secondConv:
#i = m.layers._modules['3'].in_channels if not firstConv else m.layers._modules['0'].out_channels
i = o
k = m.layers._modules['3'].kernel_size
s = m.layers._modules['3'].stride
o = m.layers._modules['3'].out_channels
if hasShortcut:
o = max(int(o * self.lookup[secondFinalAction]), 10)
si = m.layers._modules['0'].in_channels if firstConv else i
sk = m.shortcut._modules['0'].kernel_size
ss = m.shortcut._modules['0'].stride
sp = m.shortcut._modules['0'].padding
m.shortcut._modules['0'] = resizeLayer(m.shortcut._modules['0'], si, o, sk, ss, sp).cuda()
m.shortcut._modules['1'] = resizeLayer(m.shortcut._modules['1'], o, o).cuda()
secondFinalActionUsed = True
else:
o = m.layers._modules['0'].in_channels if firstConv else i
p = m.layers._modules['3'].padding
m.layers._modules['3'] = resizeLayer(m.layers._modules['3'], i, o, k, s, p).cuda()
if '4' in m.layers._modules:
m.layers._modules['4'] = resizeLayer(m.layers._modules['4'], o, o).cuda()
# Void actions
for _ in range(len(m.layers._modules)-2):
# actions[a].detach()
self.a += 1
#if not secondFinalActionUsed:
# actions[a].detach()
self.a += 1
#if not finalActionUsed:
# actions[a].detach()
self.a += 1
return m
def fixLayers(self, m):
# TODO: Make this function generalize to most models
# We basically want to make sure at least one fc layer exists
# We also want to make sure that the stride layer for downsampling does not get removed
layers = flattenModule(m)
# Initialize
for l in layers:
l.fixed = False
layers[-1].fixed = True
for l in layers:
cn = l.__class__.__name__
if hasattr(l, 'stride') and l.stride != (1, 1) and cn == 'Conv2d':
l.fixed = True
if cn == 'Linear' or cn == 'AvgPool2d':
l.fixed = True
def fixBlockLayers(self, m):
# Only allow num_filters of conv layers to change
for mm in m.layers._modules.values():
mm.fixed = [True]*5
m.layers._modules.values()[0].fixed = [True, True, True, False, True]
#m._modules.values()[-2].fixed = [True, True, True, False, True]
def fixParams(self, m):
layers = flattenModule(m)
# Initialize
for l in layers:
l.fixed = [False]*5
# Fix any layers you want here
# ----
# Fix all shortcut layers and corresponding stride layers, but not pre layers
for l in layers:
# Fix all shortcut/downsampling layers
# Since we couple the action for the conv layer and this layer we can modify this when building model
cn = l.__class__.__name__
if hasattr(l, 'stride') and l.stride != (1, 1) and cn == 'Conv2d':
l.fixed = [True]*5
# Fix final linear and average pooling layer
if cn == 'Linear' or cn == 'AvgPool2d':
l.fixed = [True]*5
# ----
def generateChildModel(self, actions):
m = copy.deepcopy(self.model)
self.a = 0
self.inp = Variable(self.datasetInputTensor.clone()).cuda()
if self.mode == 'shrinkage':
# Reshape actions to [Layer, Param]
actions = np.reshape(actions, (-1, 5))
self.fixParams(m)
newModel = self.traverse_shrinkage(None, m, None, actions)
else:
actions[0] = 1
self.fixLayers(m)
newModel = self.traverse_removal(None, m, None, actions)
if newModel == None or numParams(newModel) >= self.parentSize:
return None
resetModel(newModel)
return newModel