-
Notifications
You must be signed in to change notification settings - Fork 0
/
hand_writing_digits_pj_4layer_finetuning.py
344 lines (293 loc) · 13.8 KB
/
hand_writing_digits_pj_4layer_finetuning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy.io import loadmat
import cv2
import pickle
dic = loadmat("D:/desktop/NN&DL/project_1_release/codes/digits.mat")
# idx = 125
# img = dic["Xvalid"][idx, :].reshape(16,16)
# print(dic["yvalid"][idx])
# plt.imshow(img,cmap="gray")
# plt.show()
def sigmoid_func(x): # 解决了overflow问题后显著提高正确率
return .5 * (1 + np.tanh(.5 * x))
def softmax(x):
exps = np.exp(x- np.max(x, axis=0, keepdims=True))
return exps / np.sum(exps, axis=0, keepdims=True)
def negative_log_likelihood(self, y_true, y_pred):
return -np.mean(np.log(y_pred[range(len(y_true)), y_true]))
def mse_loss(y_true, y_pred):
return ((y_true - y_pred) ** 2).mean()
def cross_entropy_loss(prediction, label):
num_classes = prediction.shape[0]
label_vector = np.zeros((num_classes, 1))
label_vector[label] = 1
loss = -np.sum(label_vector * np.log(prediction))
return loss
def compute_cost(AL, Y):
"""
Arguments:
AL -- probability matrix corresponding to your label predictions, shape (classes, number of examples)
Y -- true "label" matrix, shape (classes, number of examples)
Returns:
cost -- softmax cross-entropy cost
"""
m = Y.shape[0]
cost = -(np.sum(Y * np.log(AL))) / float(m)
# cost = np.squeeze(cost)
assert (cost.shape == ())
return cost
pkl_file = open('trained_wei4.pkl', 'rb')
# 从文件中读取
trained_weight = pickle.load(pkl_file)
pkl_file.close()
class NeuralNetwork:
# initialise the neural network
# 对神经网络的参数进行初始化
def __init__(self, innodes, hid1nodes, hid2nodes, outnodes, learningrate, trained_weight): # 用于类的初始值的设定
# set number of nodes in each input, hidden, output layer
# 设置节输入层、隐藏层、输出层的节点数 (self表示类所自带的不能改变的内容)
self.inodes = innodes # 输入层节点数
self.h1nodes = hid1nodes # 隐藏层1节点数
self.h2nodes = hid2nodes # 隐藏层2节点数
self.onodes = outnodes # 输出层节点数
self.w1 = trained_weight[0]
self.w2 = trained_weight[1]
self.b1 = trained_weight[2]
self.b2 = trained_weight[3]
# link weight matrices, wih and who
# 设置输入层与隐藏层直接的权重关系矩阵以及隐藏层与输出层之间的权重关系矩阵
# (一开始随机生成权重矩阵的数值,利用正态分布,均值为0,方差为隐藏层节点数的-0.5次方,)
# self.w1 = np.random.normal(0.0, pow(self.h1nodes, -0.5), (self.h1nodes, self.inodes)) # 矩阵大小为隐藏层1节点数×输入层节点数
# self.w2 = np.random.normal(0.0, pow(self.h1nodes, -0.5), (self.h2nodes, self.h1nodes)) # 矩阵大小为隐藏层2节点数×隐藏层1节点数
self.w3 = np.random.normal(0.0, pow(self.h2nodes, -0.5), (self.onodes, self.h2nodes)) # 矩阵大小为输出层节点数×隐藏层2节点数
# self.b1 = np.zeros((self.h1nodes, ))
# self.b2 = np.zeros((self.h2nodes,))
self.b3 = np.zeros((self.onodes, ))
# self.V_dW1 = np.zeros_like(self.w1)
# self.V_db1 = np.zeros_like(self.b1)
# self.V_dW2 = np.zeros_like(self.w2)
# self.V_db2 = np.zeros_like(self.b2)
self.V_dW3 = np.zeros_like(self.w3)
self.V_db3 = np.zeros_like(self.b3)
self.v1 = 0
self.v2 = 0
self.v3 = 0
# learning rate
# 设置学习率α
self.lr = learningrate
self.momentum = momentum
self.lamda = lamda
# activation function is the sigmoid function
# 将激活函数sigmoid定义为self.activation_function
self.activation_function = lambda x: sigmoid_func(x)
# lambda x:表示快速生成函数f(x) 并将其命名为self.activation_function
pass
def train(self, inputs, targets):
# w1->(100*256), w2->(10*100)
# Forward Propagation
hidden_inputs_1 = np.dot(self.w1, inputs) + self.b1 # dot()函数是指两个矩阵做点乘 (100*1)
# calculate the signals emerging from hidden layer
# using sigmoid
hidden_outputs_1 = (sigmoid_func(hidden_inputs_1)) # (100*1)
# add dropout layer
drop_out1 = np.random.rand(*hidden_outputs_1.shape) < p
hidden_outputs_1 *= drop_out1
hidden_inputs_2 = np.dot(self.w2, hidden_outputs_1) + self.b2
hidden_outputs_2 = (sigmoid_func(hidden_inputs_2))
drop_out2 = np.random.rand(*hidden_outputs_2.shape) < p
hidden_outputs_2 *= drop_out2
final_inputs = np.dot(self.w3, hidden_outputs_2) + self.b3# (10*1)
# calculate the signals emerging from final output layer
final_outputs = (sigmoid_func(final_inputs)) # (10*1)
# Backward Propagation
# output layer error is the (target - actual)
output_errors = targets - final_outputs # (10*1)
# delta represents the error term in bp algorithm
# Derivatives of softmax is the form of "x(1-x)"
output_delta = output_errors * final_outputs * (1.0 - final_outputs) # (10*1)^3 -->(10*1)
# sum of w1 * the delta of next layer
hidden_error_2 = np.dot(self.w3.T, output_delta) # (100*1)
hidden_delta_2 = hidden_error_2 * hidden_outputs_2 * (1.0 - hidden_outputs_2) # (100*1)
hidden_error_1 = np.dot(self.w2.T, hidden_delta_2) # (100*1)
hidden_delta_1 = hidden_error_1 * hidden_outputs_1 * (1.0 - hidden_outputs_1) # (100*1)
# """softmax"""
# grad_ho = np.dot(y_hat.reshape(-1,1), hidden_outputs.reshape(1,-1))
# grad_ih = np.dot(hidden_delta.reshape(-1, 1), np.transpose(inputs.reshape(-1, 1)))
# grad_bias_o = np.mean(y_hat, axis=0, keepdims=True)
# grad_bias_h = np.mean(hidden_delta, axis=0, keepdims=True)
# self.V_dW1 = self.momentum * self.V_dW1 + (1 - self.momentum) * grad_ih # (100*256)
# self.V_db1 = self.momentum * self.V_db1 + (1 - self.momentum) * grad_bias_h
# self.V_dW2 = self.momentum * self.V_dW2 + (1 - self.momentum) * grad_ho # (10*100)
# self.V_db2 = self.momentum * self.V_db2 + (1 - self.momentum) * grad_bias_o
dw3 = np.dot(output_delta.reshape(-1, 1), np.transpose(hidden_outputs_2.reshape(-1, 1)))
# dw2 = np.dot(hidden_delta_2.reshape(-1, 1), np.transpose(hidden_outputs_1.reshape(-1, 1)))
# # dw2 = np.dot(output_delta.reshape(-1, 1), np.transpose(hidden_outputs.reshape(-1, 1)))
# dw1 = np.dot(hidden_delta_1.reshape(-1, 1), np.transpose(inputs.reshape(-1, 1)))
db3 = np.sum(output_delta, axis=0, keepdims=True)
# db2 = np.sum(hidden_delta_2, axis=0)
# db1 = np.sum(hidden_delta_1, axis=0)
# v表示x要改变的幅度
# self.V_dW1 = self.momentum * self.V_dW1 + (1 - self.momentum) * dw1 # (100*256)
# self.V_db1 = self.momentum * self.V_db1 + (1 - self.momentum) * db1
# self.V_dW2 = self.momentum * self.V_dW2 + (1 - self.momentum) * dw2 # (10*100)
# self.V_db2 = self.momentum * self.V_db2 + (1 - self.momentum) * db2
self.V_dW3 = self.momentum * self.V_dW3 + (1 - self.momentum) * dw3 # (10*100)
self.V_db3 = self.momentum * self.V_db3 + (1 - self.momentum) * db3
# self.w1 += self.lr * (self.V_dW1 - self.lamda * self.w1) # lamda -> weight decay
# self.b1 += self.lr * self.V_db1
# self.w2 += self.lr * (self.V_dW2 - self.lamda * self.w2)
# self.b2 += self.lr * self.V_db2
self.w3 += self.lr * (self.V_dW3 - self.lamda * self.w3)
self.b3 += self.lr * self.V_db3
# self.w1 = self.w1 * (1 - self.lr * lamda) + dw1 * self.lr
# self.w2 = self.w2 * (1 - self.lr * lamda) + dw2 * self.lr
pass
def weights(self):
return [self.w1, self.w2, self.w3, self.b1, self.b2, self.b3]
def weights_optim(self, best_w1, best_w2, best_w3, best_b1, best_b2, best_b3):
self.w1 = best_w1
self.w2 = best_w2
self.w3 = best_w3
self.b1 = best_b1
self.b2 = best_b2
self.b3 = best_b3
def pre(self, inputs):
hidden_inputs_1 = np.dot(self.w1, inputs) + self.b1 # dot()函数是指两个矩阵做点乘 (100*1)
# calculate the signals emerging from hidden layer
# using sigmoid
hidden_outputs_1 = (sigmoid_func(hidden_inputs_1)) * p # (100*1)
hidden_inputs_2 = np.dot(self.w2, hidden_outputs_1) + self.b2
hidden_outputs_2 = (sigmoid_func(hidden_inputs_2)) * p
final_inputs = np.dot(self.w3, hidden_outputs_2) + self.b3 # (10*1)
# calculate the signals emerging from final output layer
final_outputs = (sigmoid_func(final_inputs)) # (10*1)
return final_outputs
innodes = 256
hid1nodes = 180
hid2nodes = 125
outnodes = 10
learningrate = 0.3
momentum = 0.9
lamda = 0
patience = 5
p = 0.5
n = NeuralNetwork(innodes, hid1nodes, hid2nodes, outnodes, learningrate, trained_weight)
tr_data = dic["X"]
valid_data = dic["Xvalid"]
epochs = 50
score = 0
best_loss = 1
best_acu = 0
tr_data_rotation = []
for idx, data in enumerate(tr_data):
img = data.reshape(16,16)
if 0<= idx <= 200:
M = cv2.getRotationMatrix2D((8, 8), 90, 1.0)
rotation_img = cv2.warpAffine(img, M, (16, 16))
tr_data_rotation.append(rotation_img.reshape(256,))
elif 200< idx <= 500:
M = cv2.getRotationMatrix2D((8, 8), 180, 1.0)
rotation_img = cv2.warpAffine(img, M, (16, 16))
tr_data_rotation.append(rotation_img.reshape(256,))
elif 500< idx <= 700:
M = cv2.getRotationMatrix2D((8, 8), 45, 1.0)
rotation_img = cv2.warpAffine(img, M, (16, 16))
tr_data_rotation.append(rotation_img.reshape(256,))
elif 700< idx <= 1000:
M = cv2.getRotationMatrix2D((8, 8), 270, 1.0)
rotation_img = cv2.warpAffine(img, M, (16, 16))
tr_data_rotation.append(rotation_img.reshape(256, ))
else:
break
## fine-tuning
trained_weight = []
final_input = []
for i in range(epochs):
train_score = 0
valid_score = 0
tr_loss = 0
valid_loss = 0
for idx, data in enumerate(tr_data):
inputs = (np.asfarray(data) / 255.0) * 0.99 + 0.01
targets = np.zeros(outnodes) + 0.01
targets[int(dic["y"][idx]-1)] = 0.99
n.train(inputs, targets)
outputs = n.pre(inputs)
label = np.argmax(outputs)
if (label + 1 == dic["y"][idx]):
train_score += 1
else:
train_score += 0
tr_loss += compute_cost(outputs, targets)
for idx, data in enumerate(tr_data_rotation):
inputs = (np.asfarray(data) / 255.0) * 0.99 + 0.01
targets = np.zeros(outnodes) + 0.01
targets[int(dic["y"][idx] - 1)] = 0.99
n.train(inputs, targets)
outputs = n.pre(inputs)
label = np.argmax(outputs)
if (label + 1 == dic["y"][idx]):
train_score += 1
else:
train_score += 0
tr_loss += compute_cost(outputs, targets)
# tr_loss = tr_loss / (len(tr_data))
tr_loss = tr_loss/(len(tr_data)+len(tr_data_rotation))
# tr_accuracy = train_score / (len(tr_data))
tr_accuracy = train_score/(len(tr_data)+len(tr_data_rotation))
for idx, data in enumerate(valid_data):
data = (np.asfarray(data) / 255.0) * 0.99 + 0.01
targets = np.zeros(outnodes) + 0.01
targets[int(dic["yvalid"][idx] - 1)] = 0.99
outputs = n.pre(data)
label = np.argmax(outputs) # argmax()函数用于找出数值最大的值所对应的标签
if (label + 1 == dic["yvalid"][idx]):
valid_score += 1
else:
valid_score += 0
valid_loss += compute_cost(outputs, targets)
valid_loss = valid_loss / len(dic["yvalid"])
val_accuracy = valid_score/len(dic["yvalid"])
print(f"Train accuracy: {tr_accuracy:.4f}. Valid accuracy: {val_accuracy:.4f}. Train loss: {tr_loss:.4f}. Valid loss: {valid_loss:.4f}")
if val_accuracy > best_acu:
# if valid_loss < best_loss:
# best_loss = valid_loss
best_acu = val_accuracy
best_w1 = n.weights()[0].copy()
best_w2 = n.weights()[1].copy()
best_w3 = n.weights()[2].copy()
best_b1 = n.weights()[3].copy()
best_b2 = n.weights()[4].copy()
best_b3 = n.weights()[5].copy()
stopping_rounds = 0
else:
stopping_rounds += 1
if stopping_rounds >= patience:
print(f"Validation accuracy has not improved for {patience} rounds. Stopping training.")
n.weights_optim(best_w1, best_w2, best_w3, best_b1, best_b2, best_b3)
trained_weight.append(best_w1)
trained_weight.append(best_w2)
trained_weight.append(best_b1)
trained_weight.append(best_b2)
break
te_data = dic["Xtest"]
te_lost = 0
for idx, data in enumerate(te_data):
data = (np.asfarray(data) / 255.0) * 0.99 + 0.01
targets = np.zeros(outnodes) + 0.01
targets[int(dic["ytest"][idx] - 1)] = 0.99
outputs = n.pre(data)
label = np.argmax(outputs) # argmax()函数用于找出数值最大的值所对应的标签
if (label+1 == dic["ytest"][idx]):
score += 1
else:
# network's answer doesn't match correct answer, add 0 to scorecard
score += 0
te_lost += compute_cost(outputs, targets)
te_accuracy = score / len(dic["ytest"])
te_lost = te_lost / len(dic["ytest"])
# calculate the performance score, the fraction
print(f"test accuracy : {te_accuracy: .4f} , test lost : {te_lost:.4f}")