forked from dorajam/Convolutional-Network
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backprop.py
119 lines (90 loc) · 4.48 KB
/
backprop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Dora Jambor
# May 2016
# helper functions for backpropagation
import numpy as np
# backpropagation
##############################################################
def backprop_1d_to_1d(delta, prev_weights, prev_activations, z_vals, final=False):
if not final: # reset delta
sp = sigmoid_prime(z_vals)
# print 'w,d,z_vals: ', prev_weights.shape, delta.shape, sp.shape, prev_activations.shape
delta = np.dot(prev_weights.transpose(), delta) * sp # backprop to calculate error (delta) at layer - 1
delta_b = delta
delta_w = np.dot(delta, prev_activations.transpose())
return delta_b, delta_w, delta
def backprop_1d_to_3d(delta, prev_weights, prev_activations, z_vals):
sp = sigmoid_prime(z_vals)
# print 'w,d,z_vals: ', prev_weights.shape, delta.shape, sp.shape, prev_activations.shape
delta = np.dot(prev_weights.transpose(), delta) * sp # backprop to calculate error (delta) at layer - 1
delta_b = delta
depth, dim1, dim2 = prev_activations.shape
prev_activations = prev_activations.reshape((1, depth*dim1*dim2))
delta_w = np.dot(delta, prev_activations)
delta_w = delta_w.reshape((delta.shape[0], depth,dim1,dim2))
return delta_b, delta_w, delta
def backprop_pool_to_conv(delta, prev_weights, input_from_conv, max_indices, poolsize, pool_output):
# reshape the "z values" of the pool layer
x,y,z = pool_output.shape
a,b,c,d = prev_weights.shape
prev_weights = prev_weights.reshape((a,b*c*d))
pool_output= pool_output.reshape((x * y * z,1))
# same for the max index matrix
max_indices = max_indices.reshape((x, y * z, 2))
# backprop delta from fc to pool layer
sp = sigmoid_prime(pool_output)
delta = np.dot(prev_weights.transpose(), delta) * sp # backprop to calc delta on pooling layer
delta = delta.reshape((x,y*z))
pool_output = pool_output.reshape((x, y * z))
depth, height, width = input_from_conv.shape
delta_new = np.zeros((depth, height, width)) # calc the delta on the conv layer
for d in range(depth): # depth is the same for conv + pool layer
row = 0
slide = 0
for i in range(max_indices.shape[1]):
toPool = input_from_conv[d][row:poolsize[0] + row, slide:poolsize[0] + slide]
# calculate the new delta for the conv layer based on the max result + pooling input
deltas_from_pooling = max_prime(pool_output[d][i], delta[d][i], toPool)
delta_new[d][row:poolsize[0] + row, slide:poolsize[0] + slide] = deltas_from_pooling
slide += poolsize[1]
if slide >= width:
slide = 0
row+= poolsize[1]
return delta_new
def backprop_to_conv(delta, weight_filters, stride, input_to_conv, prev_z_vals):
'''weights passed in are the ones between pooling and fc layer'''
# print 'weight filter, delta shape', weight_filters.shape, delta.shape
# print 'input shape', input_to_conv.shape
num_filters, depth, filter_size, filter_size = weight_filters.shape
delta_b = np.zeros((num_filters, 1))
delta_w = np.zeros((weight_filters.shape)) # you need to change the dims of weights
# print delta_w.shape, delta_b.shape, delta.shape
total_deltas_per_layer = (delta.shape[1]) * (delta.shape[2])
# print 'total_deltas_per_layer', total_deltas_per_layer
delta = delta.reshape((delta.shape[0], delta.shape[1] * delta.shape[2]))
for j in range(num_filters):
slide = 0
row = 0
for i in range(total_deltas_per_layer):
to_conv = input_to_conv[:,row:filter_size+row, slide:filter_size + slide]
delta_w[j] += to_conv * delta[j][i]
delta_b[j] += delta[j][i] # not fully sure, but im just summing up the bias deltas over the conv layer
slide += stride
if (slide + filter_size)-stride >= input_to_conv.shape[2]: # wrap indices at the end of each row
slide = 0
row+=stride
return delta_b, delta_w
def max_prime(res, delta, tile_to_pool):
dim1, dim2 = tile_to_pool.shape
tile_to_pool = tile_to_pool.reshape((dim1 * dim2))
new_delta = np.zeros((tile_to_pool.shape))
for i in range(len(tile_to_pool)):
num = tile_to_pool[i]
if num < res:
new_delta[i] = 0
else:
new_delta[i] = delta
return new_delta.reshape((dim1, dim2))
def sigmoid(z):
return 1.0/(1.0 + np.exp(-z))
def sigmoid_prime(z):
return sigmoid(z) * (1-sigmoid(z))