forked from MaxLikesMath/DeepLearningImplementations
-
Notifications
You must be signed in to change notification settings - Fork 0
/
attention_mech.py
40 lines (16 loc) · 983 Bytes
/
attention_mech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import tensorflow as tf
import numpy as np
def attention(inputs, input_size, attention_size, bi_rnn=False):
#This is based on http://www.cs.cmu.edu/~./hovy/papers/16HLT-hierarchical-attention-networks.pdf
if bi_rnn=True:
inputs=tf.concat(inputs, 2)
#Our first step is to pass our weights*inputs+bias through the tanh function. We start by initializing our values:
W=tf.Variable(tf.random_normal([input_size, attention_size], stddev=0.2))
b=tf.Variable(tf.random_normal([attention_size], stddev=0.2))
u=tf.Variable(tf.random_normal([attention_size], stddev=0.2)) #This is our "context" vector.
hid_rep=tf.tensordot(inputs, W, axes=1) + b #We run our input through a feed forward neural net
hid_rep=tf.tanh(hid_rep)
word_dif=tf.tensordot(hid_rep, u, axes=1) #Then we calculate the word difference.
alpha=tf.nn.softmax(word_dif)
output = tf.reduce_sum(inputs * tf.expand_dims(alpha, -1), 1)
return output