-
Notifications
You must be signed in to change notification settings - Fork 19
/
nn_lstm.py
147 lines (111 loc) · 5.05 KB
/
nn_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
This file defines the LSTM layer of the neural network.
"""
import theano
from theano import tensor
from util import numpy_floatX
def lstm_unmasked_layer(tparams, state_below, options, prefix='lstm', mult=1, go_backwards=False):
"""
:param tparams:
:param state_below:
:param options:
:param prefix:
:return:
"""
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _p(pp, name):
return '%s_%s' % (pp, name)
def _step(x_, h_, c_):
preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
preact += x_
i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']*mult))
f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']*mult))
o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']*mult))
c = tensor.tanh(_slice(preact, 3, options['dim_proj']*mult))
c = f * c_ + i * c
h = o * tensor.tanh(c)
return h, c
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj']*mult
rval, updates = theano.scan(_step,
sequences=[state_below],
outputs_info=[tensor.cast(tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj), theano.config.floatX),
tensor.cast(tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj), theano.config.floatX)],
name=_p(prefix, '_layers'),
n_steps=nsteps,
go_backwards=go_backwards)
return rval[0]
def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None, go_backwards=False, mult=1):
"""
:param tparams:
:param state_below:
:param options:
:param prefix:
:param mask:
:return:
"""
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _p(pp, name):
return '%s_%s' % (pp, name)
def _step(m_, x_, h_, c_):
preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
preact += x_
i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
c = f * c_ + i * c
c = m_[:, None] * c + (1. - m_)[:, None] * c_
h = o * tensor.tanh(c)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj'] * mult
rval, updates = theano.scan(_step,
sequences=[mask, state_below],
outputs_info=[tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj),
tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj)],
name=_p(prefix, '_layers'),
n_steps=nsteps, go_backwards=go_backwards)
return rval[0]
def bidirectional_lstm_layer(tparams, state_below, options, prefix='lstm', mask=None, mult=1):
def _p(pp, name):
return '%s_%s' % (pp, name)
prefix_forwards = '%s_forwards' % (prefix,)
prefix_backwards = '%s_backwards' % (prefix,)
if mask is not None:
forwards = lstm_layer(tparams, state_below, options, prefix=prefix_forwards, mask=mask, go_backwards=False, mult=mult)
backwards = lstm_layer(tparams, state_below, options, prefix=prefix_backwards, mask=mask, go_backwards=True, mult=mult)
else:
forwards = lstm_unmasked_layer(tparams, state_below, options, prefix=prefix_forwards, mult=mult, go_backwards=False)
backwards = lstm_unmasked_layer(tparams, state_below, options, prefix=prefix_backwards, mult=mult, go_backwards=True)
#forwards = theano.printing.Print(prefix_forwards, attrs=["shape"])(forwards)
#backwards = theano.printing.Print(prefix_forwards, attrs=["shape"])(backwards)
return forwards + backwards