Defining seq2seq model

In this section, we will outline the TensorFlow seq2seq model definition. We employed an embedding layer to go from integer representation to vector representation of the input. This seq2seq model has 4 major components: the embedding layer, encoders, decoders and cost/optimizers.

You can see the model in graphical form in the image and formally defined in the code block that immediately follows:

class Chatbot:
    def __init__(self, size_layer, num_layers, embedded_size,
                 from_dict_size, to_dict_size, learning_rate, batch_size):
        
        def cells(reuse=False):
            return tf.nn.rnn_cell.LSTMCell(size_layer,initializer=tf.orthogonal_initializer(),reuse=reuse)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.X_seq_len = tf.placeholder(tf.int32, [None])
        self.Y_seq_len = tf.placeholder(tf.int32, [None])

        with tf.variable_scope("encoder_embeddings"): 
            encoder_embeddings = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
            encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
            main = tf.strided_slice(self.X, [0, 0], [batch_size, -1], [1, 1])
            
        with tf.variable_scope("decoder_embeddings"): 
            decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
            decoder_embeddings = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
            decoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, decoder_input)
        
        with tf.variable_scope("encoder"):
            rnn_cells = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])
            _, last_state = tf.nn.dynamic_rnn(rnn_cells, encoder_embedded,
                                              dtype = tf.float32)
        with tf.variable_scope("decoder"):
            rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])
            outputs, _ = tf.nn.dynamic_rnn(rnn_cells_dec, decoder_embedded, 
                                           initial_state = last_state,
                                           dtype = tf.float32)
        with tf.variable_scope("logits"): 
            self.logits = tf.layers.dense(outputs,to_dict_size)
            print(self.logits)
            masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
        with tf.variable_scope("cost"): 
            self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.logits,
                                                         targets = self.Y,
                                                         weights = masks)
        with tf.variable_scope("optimizer"): 
            self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)

Table of Contents for Defining seq2seq model

Create new playlist

Sign In

Sign Up

Table of Contents for
Defining seq2seq model