-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
108 lines (97 loc) · 4.4 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import numpy as np
np.random.seed(786)
class Net:
def __init__(self, input_dim, latent_dim, output_dim, a=1e-6, b=1e-2, alpha=0.602, gamma=0.101):
self.input_dim = input_dim
self.latent_dim = latent_dim
self.output_dim = output_dim
self.a = a
self.b = b
self.alpha = alpha
self.gamma = gamma
# initialize layer weights
self.weights = []
self.biases = []
self.weights.append(np.random.randn(self.input_dim, self.latent_dim))
self.weights.append(np.random.randn(self.latent_dim, self.latent_dim))
self.weights.append(np.random.randn(self.latent_dim, self.latent_dim))
self.weights.append(np.random.randn(self.latent_dim, self.output_dim))
# self.biases.append(np.zeros([1,self.latent_dim]))
# self.biases.append(np.zeros([1,self.latent_dim]))
# self.biases.append(np.zeros([1,self.latent_dim]))
# self.biases.append(np.zeros([1,self.output_dim]))
def train(self, inputs, targets, t_max=100):
for l in range(len(self.weights)):
w_p = np.copy(self.weights)
w_m = np.copy(self.weights)
# b_p = self.biases.copy()
# b_m = self.biases.copy()
for t in range(1, t_max):
saved_weights = np.copy(self.weights[l])
preds = self.forward(inputs, self.weights)
loss_old = self.loss(preds, targets)
a_t = self.a / (1+t+50000)**self.alpha
b_t = self.b / (1+t)**self.gamma
delta = np.random.binomial(1, p=0.5, size=self.weights[l].shape) * 2. - 1
delta2 = np.random.binomial(1, p=0.5, size=self.weights[l].shape) * 2. - 1
# perturb weights in plus directions
w_p[l] = w_p[l] + b_t * delta
# b_p[l] = b_p[l] + b_t * delta2
# compute predictions according to W_p and then compute loss using perturbed weight
preds = self.forward(inputs, w_p)
loss_p = self.loss(preds, targets)
# perturb weights in minus directions
w_m[l] = w_m[l] - b_t * delta
# b_m[l] = b_m[l] - b_t * delta2
# compute predictions according to W_m and then compute loss using perturbed weight
preds = self.forward(inputs, w_m)
loss_m = self.loss(preds, targets)
# Compute approximation of the gradient
g_hat = (loss_p - loss_m) / (2 * b_t * delta)
# g_hat2 = (loss_p - loss_m) / (2 * b_t * delta2)
# For updating weights
breaking_bad = True
clip_max = np.ones(self.weights[l].shape)*5
clip_min = np.ones(self.weights[l].shape)*(-5)
this_ak = (self.weights[l]*0 + 1)*a_t
W_new = self.weights[l]
while breaking_bad:
out_of_bounds = np.where ( np.logical_or ( \
W_new - this_ak*g_hat > clip_max,
W_new - this_ak*g_hat < clip_min ) )[0]
W_new = self.weights[l] - this_ak*g_hat
if len ( out_of_bounds ) == 0:
self.weights[l] = self.weights[l] - this_ak*g_hat
breaking_bad = False
else:
this_ak[out_of_bounds] = this_ak[out_of_bounds]/2.
preds = self.forward(inputs, self.weights)
loss_new = self.loss (preds, targets)
if np.abs ( loss_new - loss_old ) > 5:
self.weights[l] = saved_weights
continue
else:
loss_old = loss_new
return self.forward(inputs, self.weights)
@staticmethod
def forward(inputs, w):
out = inputs.dot(w[0])
out = np.tanh(out)
out = out.dot(w[1])
out = np.tanh(out)
out = out.dot(w[2])
out = np.tanh(out)
out = out.dot(w[3])
return out
@staticmethod
def loss(preds, targets):
return np.sum((preds-targets)**2)
def getPreds(self, inputs):
out = inputs.dot(self.weights[0])
out = np.tanh(out)
out = out.dot(self.weights[1])
out = np.tanh(out)
out = out.dot(self.weights[2])
out= np.tanh(out)
out = out.dot(self.weights[3])
return out