-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdeep_components.py
160 lines (147 loc) · 7.31 KB
/
deep_components.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.contrib.seq2seq import BahdanauAttention,AttentionWrapper,AttentionMechanism
class bilstm_encoder(object):
def __init__(self,embedding,encoder_size):
self.embedding=embedding
self.encoder_size=encoder_size
self.fw_cell=rnn.LSTMCell(self.encoder_size)
self.bw_cell=rnn.LSTMCell(self.encoder_size)
def __call__(self,seq_index,seq_len,init_state=None):
seq_embedding=tf.nn.embedding_lookup(self.embedding,seq_index)
out,state=tf.nn.bidirectional_dynamic_rnn(self.fw_cell,self.bw_cell,seq_embedding,seq_len)
combined_out=tf.concat(out,axis=2)
combined_state=tf.concat(state,axis=1)
return combined_out,combined_state
class gru_encoder(object):
def __init__(self,embedding,encoder_size):
self.embedding=embedding
self.encoder_size=encoder_size
self.gru_cell=rnn.GRUCell(self.encoder_size)
def __call__(self,seq_index,seq_len,init_state=None):
seq_embedding=tf.nn.embedding_lookup(self.embedding,seq_index)
out,state=tf.nn.dynamic_rnn(self.gru_cell,seq_embedding,seq_len,dtype=tf.float32,initial_state=init_state)
return out,state
class decoder(object):
def __init__(self,embedding,rnn_units,vocab_size):
self.embedding=embedding
self.rnn_units=rnn_units
self.vocab_size=vocab_size
self.gru_cell=rnn.GRUCell(rnn_units)
self.out_layer = tf.layers.Dense(self.vocab_size, name='output_layer', _reuse=tf.AUTO_REUSE)
def __call__(self, inputs, state):
inputs_embedding=tf.nn.embedding_lookup(self.embedding,inputs)
inputs_embedding=tf.expand_dims(inputs_embedding,axis=1)
output,state= tf.nn.dynamic_rnn(self.gru_cell,inputs_embedding,initial_state=state,dtype=tf.float32)
output=self.out_layer(output)
return output,state
class AttentionDecoder(object):
"""
seq2seq decoder with attention.
"""
def __init__(self, embedding, hidden_size, vocab_size, max_length, num_layers=1, keep_prop=0.9):
"""
init.
:param embedding: embedding, (vocab_size, hidden_size)
:param hidden_size: hidden_size
:param vocab_size: vocab_size for output
:param num_layers: num of layers.
:param max_length: max length of sentence
:param keep_prop: keep rate for decoder input
"""
self.embedding = embedding
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.num_layers = num_layers
self.keep_prop = keep_prop
self.max_length = max_length
# params
self.attn_W = tf.Variable(tf.random_normal(shape=(self.hidden_size*2, self.max_length)) * 0.1)
self.attn_b = tf.Variable(tf.zeros(shape=(self.max_length, )))
self.attn_combine_W = tf.Variable(tf.random_normal(shape=(self.hidden_size*2, self.hidden_size)) * 0.1)
self.attn_combine_b = tf.Variable(tf.zeros(shape=(self.hidden_size,)))
self.linear = tf.Variable(tf.random_normal(shape=(self.hidden_size, self.vocab_size)) * 0.1)
# gru
self.gru_cell = rnn.GRUCell(self.hidden_size)
def linear_func(self, x, w, b):
"""
linear function, (x * W + b)
:param x: x input
:param w: W param
:param b: b bias
:return:
"""
linear_out = tf.add(tf.matmul(x, w), b)
return linear_out
def __call__(self, inputs, encoder_outputs, state):
"""
attention decoder using gru.
:param inputs: word indices(batch, )
:param encoder_outputs: encoder outputs(batch, max_length, hidden_size)
:param state: final state
:return:
"""
embedded = tf.nn.dropout(tf.nn.embedding_lookup(self.embedding, inputs), keep_prob=self.keep_prop) # batch*hidden_size
attn_weights = tf.nn.softmax(self.linear_func(tf.concat((embedded, state), 1), self.attn_W, self.attn_b)) # batch *(hiddensize * 2)-> batch * max_length
attn_applied = tf.matmul(tf.expand_dims(attn_weights, 1), encoder_outputs) # batch*1*max_length * batch*max_length*hidden_size -> batch*1*hidden_size
output = tf.concat([embedded, attn_applied[:, 0, :]], 1) # b*(hidden_size*2)
output = tf.expand_dims(self.linear_func(output, self.attn_combine_W, self.attn_combine_b), 1) # b*1*hidden_size
for i in range(0,self.num_layers):
output = tf.nn.relu(output)
output, state = tf.nn.dynamic_rnn(self.gru_cell, output, initial_state=state, dtype=tf.float32)
output = tf.tensordot(output, self.linear, axes=[[2], [0]]) # b*1*hidden_size hidden_size*vocab_size
return output, state, attn_weights # b*1*vocab_size(unscaled), b*max_length
class AttentionGRUCell(rnn.RNNCell):
"""
seq2seq decoder with attention.
"""
def __init__(self, memory, memory_size,
attention_size, embedding_dims,rnn_units,
num_layers=1, keep_prop=0.9,reuse=None, name='att_gru_cell'):
"""
init.
:param embedding: embedding, (vocab_size, hidden_size)
:param hidden_size: hidden_size
:param vocab_size: vocab_size for output
:param num_layers: num of layers.
:param max_length: max length of sentence
:param keep_prop: keep rate for decoder input
"""
super(AttentionGRUCell,self).__init__(name=name,_reuse=reuse)
self.embedding_dims=embedding_dims
self.attention_size = attention_size
self.num_layers = num_layers
self.keep_prop = keep_prop
self.memory = memory#b*w*ms
self.memory_size = memory_size
self.rnn_units=rnn_units
# params
self.attn_W = tf.get_variable(name='attn_w',shape=(self.rnn_units, self.attention_size),dtype=tf.float32)
self.attn_U = tf.get_variable(name='attn_u',shape=(self.memory_size, self.attention_size),dtype=tf.float32)
self.attn_V = tf.get_variable(name='attn_v',shape=(self.attention_size,),dtype=tf.float32)
# gru
self.gru_cell = rnn.GRUCell(self.rnn_units)
self.proj_layer= tf.layers.Dense(self.rnn_units, name='proj_layer', _reuse=reuse)
@property
def state_size(self):
return self.rnn_units
@property
def output_size(self):
return self.rnn_units
def __call__(self, inputs, state, scope=None):
"""
attention decoder using gru.
:param inputs: batch*embedding_dims
:param state: batch*rnn_units
:return:
"""
w_s=tf.matmul(state,self.attn_W)#b*att_size
u_m=tf.tensordot(self.memory,self.attn_U,axes=(2,1))#b*w*att_size
w_s=tf.expand_dims(w_s,axis=1)
ws_um_combined=tf.tanh(w_s+u_m)#b*w*ms
attn_weights=tf.tensordot(ws_um_combined,tf.expand_dims(self.attn_V,axis=1),axes=(2,0))
attn_weights=tf.nn.softmax(attn_weights[:,:,-1])
attn_applied = tf.matmul(tf.expand_dims(attn_weights, 1), self.memory) # batch*1*max_length * batch*max_length*hidden_size -> batch*1*hidden_size
output = tf.concat([inputs, attn_applied[:, 0, :]], 1) # b*(embedding_dims+memory_size)
output = self.proj_layer(output)
return self.gru_cell(output,state)