-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathvanilla_rnn.py
74 lines (67 loc) · 2.66 KB
/
vanilla_rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import theano
import theano.tensor as T
import numpy as np
import os
import matplotlib.pyplot as plt
class RecurrentNeuralNetwork:
def __init__( self, sizes, activation ):
self.sizes = sizes
self.activation = activation
self.x = T.dmatrix('x')
self.y = T.dmatrix('y')
self.eta = T.dscalar('eta')
self.output = T.dmatrix('output')
self.createRNN()
self.cost = T.nnet.binary_crossentropy(self.output, self.y).mean()
self.train = self.computeTraining()
def createRNN( self ):
self.initNetworkParams()
self.hiddenLayers = []
for it in range( len(self.sizes)-1 ):
W = self.interLayerWeights[ it ]
W_prime = self.intraLayerWeights[ it ]
B = self.bias[ it ]
H0 = self.initH[ it ]
if( it == 0 ): #input layer
h, _ = theano.scan( self.recurrence, sequences=self.x, outputs_info=[H0], non_sequences=[ W, W_prime, B ] )
self.hiddenLayers.append( h )
elif( it == len(self.sizes)-2 ): #output layer
self.output = T.nnet.softmax( self.hiddenLayers[-1].dot( W ) + B )
else: #hidden layer
h, _ = theano.scan( self.recurrence, sequences=self.hiddenLayers[-1], outputs_info=[H0], non_sequences=[ W, W_prime, B ] )
self.hiddenLayers.append( h )
def recurrence( self, x_t, h_tm1, W, W_prime, B ):
return self.activation( x_t.dot( W ) + h_tm1.dot( W_prime ) + B )
def initNetworkParams( self ):
self.interLayerWeights = []
self.intraLayerWeights = []
self.bias = []
self.initH = []
for it in range( len(self.sizes)-1 ):
n_in = self.sizes[it]
n_out = self.sizes[it+1]
init_W = np.random.random( ( n_in, n_out ) ) - 0.5
init_W_prime= np.random.random( ( n_out, n_out ) ) - 0.5
init_B = np.zeros( ( n_out, ), dtype=theano.config.floatX )
h0 = theano.shared( np.zeros((n_out,), dtype=theano.config.floatX), 'h0' )
self.interLayerWeights.append( theano.shared( init_W, 'W' ) )
self.intraLayerWeights.append( theano.shared( init_W_prime, 'W_prime' ) )
self.bias.append( theano.shared( init_B, 'B' ) )
self.initH.append( h0 )
def computeTraining( self ):
updates = []
params = self.interLayerWeights + self.intraLayerWeights[:len(self.intraLayerWeights)-1] + self.bias + self.initH[:len(self.initH)-1]
for w in params:
updates.append( ( w, w - self.eta*T.grad( self.cost, w ) ) )
train = theano.function( [ self.x, self.y, self.eta ], self.cost, updates=updates )
return train
if __name__ == '__main__':
rnn = RecurrentNeuralNetwork( [ 2, 3, 4, 5 ], T.nnet.sigmoid )
inp = [ ]
inp = ( np.random.random( ( 10000, 2 ) ), np.random.random( ( 10000, 5 ) ) )
costs = []
for i in range( 100 ):
costs.append( rnn.train( inp[0], inp[1], 0.3 ) )
print costs[-1]
plt.plot( range(len(costs)), costs )
plt.show()