-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoftmax.py
executable file
·70 lines (53 loc) · 2.35 KB
/
softmax.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import numpy as np
class Softmax:
# A standard fully-connected layer with softmax activation.
def __init__(self, input_len, nodes):
# We divide by input_len to reduce the variance of our initial values
self.weights = np.random.randn(input_len, nodes) / input_len
self.biases = np.zeros(nodes)
def forward(self, input):
'''
Performs a forward pass of the softmax layer using the given input.
Returns a 1d numpy array containing the respective probability values.
- input can be any array with any dimensions.
'''
self.last_input_shape = input.shape
input = input.flatten()
self.last_input = input
input_len, nodes = self.weights.shape
totals = np.dot(input, self.weights) + self.biases
self.last_totals = totals
exp = np.exp(totals)
return exp / np.sum(exp, axis=0)
def backprop(self, d_L_d_out, learn_rate):
'''
Performs a backward pass of the softmax layer.
Returns the loss gradient for this layer's inputs.
- d_L_d_out is the loss gradient for this layer's outputs.
- learn_rate is a float.
'''
# We know only 1 element of d_L_d_out will be nonzero
for i, gradient in enumerate(d_L_d_out):
if gradient == 0:
continue
# e^totals
t_exp = np.exp(self.last_totals)
# Sum of all e^totals
S = np.sum(t_exp)
# Gradients of out[i] against totals
d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)
# Gradients of totals against weights/biases/input
d_t_d_w = self.last_input
d_t_d_b = 1
d_t_d_inputs = self.weights
# Gradients of loss against totals
d_L_d_t = gradient * d_out_d_t
# Gradients of loss against weights/biases/input
d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
d_L_d_b = d_L_d_t * d_t_d_b
d_L_d_inputs = d_t_d_inputs @ d_L_d_t
# Update weights / biases
self.weights -= learn_rate * d_L_d_w
self.biases -= learn_rate * d_L_d_b
return d_L_d_inputs.reshape(self.last_input_shape)