Attention layer code is not available on given repository link #18

ruchi-sharma1 · 2022-01-30T20:05:38Z

The link you gave for attention layer in the starting of repo is not available please check!

evyattar · 2022-02-11T04:07:15Z

import tensorflow as tf
import os
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import backend as K

class AttentionLayer(Layer):
"""
This class implements Bahdanau attention (https://arxiv.org/pdf/1409.0473.pdf).
There are three sets of weights introduced W_a, U_a, and V_a
"""

def __init__(self, **kwargs):
    super(AttentionLayer, self).__init__(**kwargs)

def build(self, input_shape):
    assert isinstance(input_shape, list)
    # Create a trainable weight variable for this layer.

    self.W_a = self.add_weight(name='W_a',
                               shape=tf.TensorShape((input_shape[0][2], input_shape[0][2])),
                               initializer='uniform',
                               trainable=True)
    self.U_a = self.add_weight(name='U_a',
                               shape=tf.TensorShape((input_shape[1][2], input_shape[0][2])),
                               initializer='uniform',
                               trainable=True)
    self.V_a = self.add_weight(name='V_a',
                               shape=tf.TensorShape((input_shape[0][2], 1)),
                               initializer='uniform',
                               trainable=True)

    super(AttentionLayer, self).build(input_shape)  # Be sure to call this at the end

def call(self, inputs, verbose=False):
    """
    inputs: [encoder_output_sequence, decoder_output_sequence]
    """
    assert type(inputs) == list
    encoder_out_seq, decoder_out_seq = inputs
    if verbose:
        print('encoder_out_seq>', encoder_out_seq.shape)
        print('decoder_out_seq>', decoder_out_seq.shape)

    def energy_step(inputs, states):
        """ Step function for computing energy for a single decoder state
        inputs: (batchsize * 1 * de_in_dim)
        states: (batchsize * 1 * de_latent_dim)
        """

        assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
        assert isinstance(states, list) or isinstance(states, tuple), assert_msg

        """ Some parameters required for shaping tensors"""
        en_seq_len, en_hidden = encoder_out_seq.shape[1], encoder_out_seq.shape[2]
        de_hidden = inputs.shape[-1]

        """ Computing S.Wa where S=[s0, s1, ..., si]"""
        # <= batch size * en_seq_len * latent_dim
        W_a_dot_s = K.dot(encoder_out_seq, self.W_a)

        """ Computing hj.Ua """
        U_a_dot_h = K.expand_dims(K.dot(inputs, self.U_a), 1)  # <= batch_size, 1, latent_dim
        if verbose:
            print('Ua.h>', U_a_dot_h.shape)

        """ tanh(S.Wa + hj.Ua) """
        # <= batch_size*en_seq_len, latent_dim
        Ws_plus_Uh = K.tanh(W_a_dot_s + U_a_dot_h)
        if verbose:
            print('Ws+Uh>', Ws_plus_Uh.shape)

        """ softmax(va.tanh(S.Wa + hj.Ua)) """
        # <= batch_size, en_seq_len
        e_i = K.squeeze(K.dot(Ws_plus_Uh, self.V_a), axis=-1)
        # <= batch_size, en_seq_len
        e_i = K.softmax(e_i)

        if verbose:
            print('ei>', e_i.shape)

        return e_i, [e_i]

    def context_step(inputs, states):
        """ Step function for computing ci using ei """

        assert_msg = "States must be an iterable. Got {} of type {}".format(states, type(states))
        assert isinstance(states, list) or isinstance(states, tuple), assert_msg

        # <= batch_size, hidden_size
        c_i = K.sum(encoder_out_seq * K.expand_dims(inputs, -1), axis=1)
        if verbose:
            print('ci>', c_i.shape)
        return c_i, [c_i]

    fake_state_c = K.sum(encoder_out_seq, axis=1)
    fake_state_e = K.sum(encoder_out_seq, axis=2)  # <= (batch_size, enc_seq_len, latent_dim

    """ Computing energy outputs """
    # e_outputs => (batch_size, de_seq_len, en_seq_len)
    last_out, e_outputs, _ = K.rnn(
        energy_step, decoder_out_seq, [fake_state_e],
    )

    """ Computing context vectors """
    last_out, c_outputs, _ = K.rnn(
        context_step, e_outputs, [fake_state_c],
    )

    return c_outputs, e_outputs

def compute_output_shape(self, input_shape):
    """ Outputs produced by the layer """
    return [
        tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[1][2])),
        tf.TensorShape((input_shape[1][0], input_shape[1][1], input_shape[0][1]))
    ]

Awad-Safwat · 2022-02-26T19:25:00Z

please explain how to use such attention code ..., your code is very good ,thanks a lot

ratnesh-advance · 2023-03-26T07:27:13Z

Sytem unable to train this model

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Attention layer code is not available on given repository link #18

Attention layer code is not available on given repository link #18

ruchi-sharma1 commented Jan 30, 2022

evyattar commented Feb 11, 2022

Awad-Safwat commented Feb 26, 2022

ratnesh-advance commented Mar 26, 2023

Attention layer code is not available on given repository link #18

Attention layer code is not available on given repository link #18

Comments

ruchi-sharma1 commented Jan 30, 2022

evyattar commented Feb 11, 2022

Awad-Safwat commented Feb 26, 2022

ratnesh-advance commented Mar 26, 2023