-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain_mnist.py
executable file
·149 lines (120 loc) · 5.02 KB
/
train_mnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
from __future__ import print_function
try:
import matplotlib
matplotlib.use('Agg')
except ImportError:
pass
import argparse
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
import numpy
import weight_normalization as WN
class WNMLP(chainer.Chain):
def __init__(self, n_units, n_out, n_layers=3):
super(WNMLP, self).__init__(
l1=WN.convert_with_weight_normalization(
L.Linear, 784, n_units),
lo=WN.convert_with_weight_normalization(
L.Linear, n_units, n_out),
)
self.n_layers = n_layers
for i in range(2, self.n_layers - 1):
self.add_link(
'l{}'.format(i),
WN.convert_with_weight_normalization(
L.Linear, n_units, n_units))
def __call__(self, x):
act = F.leaky_relu
h = x
for i in range(1, self.n_layers - 1):
if i == 1:
h = act(getattr(self, 'l{}'.format(i))(h))
else:
h = act(getattr(self, 'l{}'.format(i))(h)) + h
return self.lo(h)
class MLP(chainer.Chain):
def __init__(self, n_units, n_out, n_layers=3):
super(MLP, self).__init__(
l1=L.Linear(784, n_units),
lo=L.Linear(n_units, n_out),
)
self.n_layers = n_layers
for i in range(2, self.n_layers - 1):
self.add_link('l{}'.format(i), L.Linear(n_units, n_units))
def __call__(self, x):
act = F.leaky_relu
h = x
for i in range(1, self.n_layers - 1):
if i == 1:
h = act(getattr(self, 'l{}'.format(i))(h))
else:
h = act(getattr(self, 'l{}'.format(i))(h)) + h
return self.lo(h)
def main():
parser = argparse.ArgumentParser(description='Chainer example: MNIST')
parser.add_argument('--batchsize', '-b', type=int, default=100,
help='Number of images in each mini-batch')
parser.add_argument('--epoch', '-e', type=int, default=1,
help='Number of sweeps over the dataset to train')
parser.add_argument('--gpu', '-g', type=int, default=-1,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--out', '-o', default='result',
help='Directory to output the result')
parser.add_argument('--resume', '-r', default='',
help='Resume the training from snapshot')
parser.add_argument('--unit', '-u', type=int, default=128,
help='Number of units')
parser.add_argument('--use-wn', '-wn', action='store_const',
const=True, default=False)
args = parser.parse_args()
print('GPU: {}'.format(args.gpu))
print('# unit: {}'.format(args.unit))
print('# Minibatch-size: {}'.format(args.batchsize))
print('# epoch: {}'.format(args.epoch))
print('')
# Set up a neural network to train
# Classifier reports softmax cross entropy loss and accuracy at every
# iteration, which will be used by the PrintReport extension below.
numpy.random.seed(777)
if args.use_wn:
model = L.Classifier(WNMLP(args.unit, 10, n_layers=10))
else:
model = L.Classifier(MLP(args.unit, 10, n_layers=10))
if args.gpu >= 0:
# Make a specified GPU current
chainer.cuda.get_device_from_id(args.gpu).use()
model.to_gpu() # Copy the model to the GPU
# Setup an optimizer
optimizer = chainer.optimizers.SGD(lr=0.1)
optimizer.setup(model)
# Load the MNIST dataset
train, test = chainer.datasets.get_mnist()
numpy.random.seed(777)
train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
repeat=False, shuffle=False)
# Set up a trainer
updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
# Evaluate the model with the test dataset for each epoch
trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
# Write a log of evaluation statistics for each epoch
trainer.extend(extensions.LogReport(trigger=(1, 'iteration')))
# Print selected entries of the log to stdout
# Here "main" refers to the target link of the "main" optimizer again, and
# "validation" refers to the default name of the Evaluator extension.
# Entries other than 'epoch' are reported by the Classifier link, called by
# either the updater or the evaluator.
trainer.extend(extensions.PrintReport(
['iteration', 'main/loss', 'validation/main/loss',
'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
# Print a progress bar to stdout
trainer.extend(extensions.ProgressBar())
# Run the training
trainer.run()
if __name__ == '__main__':
main()