-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminiflow.py
305 lines (244 loc) · 9.31 KB
/
miniflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
"""
Modify Linear#forward so that it linearly transforms
input matrices, weights matrices and a bias vector to
an output.
"""
import numpy as np
class Node(object):
def __init__(self, inbound_nodes=[]):
# Nodes from which this Node receives values
self.inbound_nodes = inbound_nodes
# Nodes to which this Node passes values
self.outbound_nodes = []
# A calculated value
self.value = None
# New property! Keys are the inputs to this node and
# their values are the partials of this node with
# respect to that input.
self.gradients = {}
# Add this node as an outbound node on its inputs.
for n in self.inbound_nodes:
n.outbound_nodes.append(self)
# These will be implemented in a subclass.
def forward(self):
"""
Forward propagation.
Compute the output value based on `inbound_nodes` and
store the result in self.value.
"""
raise NotImplemented
def backward(self):
"""
Every node that uses this class as a base class will
need to define its own `backward` method.
"""
raise NotImplementedError
class Input(Node):
"""
While it may be strange to consider an input a node when
an input is only an individual node in a node, for the sake
of simpler code we'll still use Node as the base class.
Think of Input as collating many individual input nodes into
a Node.
"""
def __init__(self):
# An Input node has no inbound nodes,
# so no need to pass anything to the Node instantiator
Node.__init__(self)
def forward(self, value=None):
# Do nothing because nothing is calculated.
pass
def backward(self):
# An Input node has no inputs so the gradient (derivative)
# is zero.
# The key, `self`, is reference to this object.
self.gradients = {self: 0}
# Weights and bias may be inputs, so you need to sum
# the gradient from output gradients.
for n in self.outbound_nodes:
grad_cost = n.gradients[self]
self.gradients[self] += grad_cost * 1
class Add(Node):
# You may need to change this...
def __init__(self, *inputs):
Node.__init__(self, inputs)
def forward(self):
"""
Set the value of this node (`self.value`) to the sum of it's inbound_nodes.
"""
self.value = sum(node.value for node in self.inbound_nodes)
class Mul(Node):
# You may need to change this...
def __init__(self, *inputs):
Node.__init__(self, inputs)
def forward(self):
from functools import reduce # Valid in Python 2.6+, required in Python 3
import operator
self.value = reduce(operator.mul, [node.value for node in self.inbound_nodes], 1)
class Linear(Node):
def __init__(self, X, W, b):
# Notice the ordering of the input nodes passed to the
# Node constructor.
Node.__init__(self, [X, W, b])
def forward(self):
"""
Set self.value to the value of the linear function output.
"""
X = self.inbound_nodes[0].value
W = self.inbound_nodes[1].value
b = self.inbound_nodes[2].value
self.value = np.dot(X, W) + b
def backward(self):
"""
Calculates the gradient based on the output values.
"""
# Initialize a partial for each of the inbound_nodes.
self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
# Cycle through the outputs. The gradient will change depending
# on each output, so the gradients are summed over all outputs.
for n in self.outbound_nodes:
# Get the partial of the cost with respect to this node.
grad_cost = n.gradients[self]
# Set the partial of the loss with respect to this node's inputs.
self.gradients[self.inbound_nodes[0]] += np.dot(grad_cost, self.inbound_nodes[1].value.T)
# Set the partial of the loss with respect to this node's weights.
self.gradients[self.inbound_nodes[1]] += np.dot(self.inbound_nodes[0].value.T, grad_cost)
# Set the partial of the loss with respect to this node's bias.
self.gradients[self.inbound_nodes[2]] += np.sum(grad_cost, axis=0, keepdims=False)
class Sigmoid(Node):
def __init__(self, node):
Node.__init__(self, [node])
def _sigmoid(self, x):
"""
This method is separate from `forward` because it
will be used later with `backward` as well.
`x`: A numpy array-like object.
Return the result of the sigmoid function.
"""
return 1. / (1. + np.exp(-x))
def forward(self):
"""
Set the value of this node to the result of the
sigmoid function, `_sigmoid`.
"""
# This is a dummy value to prevent numpy errors
# if you test without changing this method.
self.value = self._sigmoid(self.inbound_nodes[0].value)
def backward(self):
"""
Calculates the gradient using the derivative of
the sigmoid function.
"""
# Initialize the gradients to 0.
self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
# Cycle through the outputs. The gradient will change depending
# on each output, so the gradients are summed over all outputs.
for n in self.outbound_nodes:
# Get the partial of the cost with respect to this node.
grad_cost = n.gradients[self]
x = self.inbound_nodes[0].value
sig = self._sigmoid(x)
self.gradients[self.inbound_nodes[0]] += grad_cost * sig * (1. - sig)
class MSE(Node):
def __init__(self, y, a):
"""
The mean squared error cost function.
Should be used as the last node for a network.
"""
# Call the base class' constructor.
Node.__init__(self, [y, a])
self.m = 0
self.diff = 0
def forward(self):
"""
Calculates the mean squared error.
"""
# NOTE: We reshape these to avoid possible matrix/vector broadcast
# errors.
#
# For example, if we subtract an array of shape (3,) from an array of shape
# (3,1) we get an array of shape(3,3) as the result when we want
# an array of shape (3,1) instead.
#
# Making both arrays (3,1) insures the result is (3,1) and does
# an elementwise subtraction as expected.
y = self.inbound_nodes[0].value.reshape(-1, 1)
a = self.inbound_nodes[1].value.reshape(-1, 1)
self.diff = y - a
self.m = self.inbound_nodes[0].value.shape[0]
self.value = np.mean(np.square(self.diff))
def backward(self):
"""
Calculates the gradient of the cost.
This is the final node of the network so outbound nodes
are not a concern.
"""
self.gradients[self.inbound_nodes[0]] = (2 / self.m) * self.diff
self.gradients[self.inbound_nodes[1]] = (-2 / self.m) * self.diff
def topological_sort(feed_dict):
"""
Sort the nodes in topological order using Kahn's Algorithm.
`feed_dict`: A dictionary where the key is a `Input` Node and the value is the respective value feed to that Node.
Returns a list of sorted nodes.
"""
input_nodes = [n for n in feed_dict.keys()]
G = {}
nodes = [n for n in input_nodes]
while len(nodes) > 0:
n = nodes.pop(0)
if n not in G:
G[n] = {'in': set(), 'out': set()}
for m in n.outbound_nodes:
if m not in G:
G[m] = {'in': set(), 'out': set()}
G[n]['out'].add(m)
G[m]['in'].add(n)
nodes.append(m)
L = []
S = set(input_nodes)
while len(S) > 0:
n = S.pop()
if isinstance(n, Input):
n.value = feed_dict[n]
L.append(n)
for m in n.outbound_nodes:
G[n]['out'].remove(m)
G[m]['in'].remove(n)
# if no other incoming edges add to S
if len(G[m]['in']) == 0:
S.add(m)
return L
def forward_pass(output_node, sorted_nodes):
"""
Performs a forward pass through a list of sorted Nodes.
Arguments:
`output_node`: A Node in the graph, should be the output node (have no outgoing edges).
`sorted_nodes`: a topologically sorted list of nodes.
Returns the output node's value
"""
for n in sorted_nodes:
n.forward()
return output_node.value
def forward_and_backward(graph):
"""
Performs a forward pass and a backward pass through a list of sorted nodes.
Arguments:
`graph`: The result of calling `topological_sort`.
"""
# Forward pass
for n in graph:
n.forward()
# Backward pass
# see: https://docs.python.org/2.3/whatsnew/section-slices.html
for n in graph[::-1]:
n.backward()
def sgd_update(trainables, learning_rate=1e-2):
"""
Updates the value of each trainable with SGD.
Arguments:
`trainables`: A list of `Input` Nodes representing weights/biases.
`learning_rate`: The learning rate.
"""
# TODO: update all the `trainables` with SGD
for trainable in trainables:
trainable.value -= learning_rate * trainable.gradients[trainable]