In this section, we will outline the TensorFlow seq2seq model definition. We employed an embedding layer to go from integer representation to vector representation of the input. This seq2seq model has 4 major components: the embedding layer, encoders, decoders and cost/optimizers.
You can see the model in graphical form in the image and formally defined in the code block that immediately follows:
class Chatbot:
def __init__(self, size_layer, num_layers, embedded_size,
from_dict_size, to_dict_size, learning_rate, batch_size):
def cells(reuse=False):
return tf.nn.rnn_cell.LSTMCell(size_layer,initializer=tf.orthogonal_initializer(),reuse=reuse)
self.X = tf.placeholder(tf.int32, [None, None])
self.Y = tf.placeholder(tf.int32, [None, None])
self.X_seq_len = tf.placeholder(tf.int32, [None])
self.Y_seq_len = tf.placeholder(tf.int32, [None])
with tf.variable_scope("encoder_embeddings"):
encoder_embeddings = tf.Variable(tf.random_uniform([from_dict_size, embedded_size], -1, 1))
encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
main = tf.strided_slice(self.X, [0, 0], [batch_size, -1], [1, 1])
with tf.variable_scope("decoder_embeddings"):
decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
decoder_embeddings = tf.Variable(tf.random_uniform([to_dict_size, embedded_size], -1, 1))
decoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, decoder_input)
with tf.variable_scope("encoder"):
rnn_cells = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])
_, last_state = tf.nn.dynamic_rnn(rnn_cells, encoder_embedded,
dtype = tf.float32)
with tf.variable_scope("decoder"):
rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])
outputs, _ = tf.nn.dynamic_rnn(rnn_cells_dec, decoder_embedded,
initial_state = last_state,
dtype = tf.float32)
with tf.variable_scope("logits"):
self.logits = tf.layers.dense(outputs,to_dict_size)
print(self.logits)
masks = tf.sequence_mask(self.Y_seq_len, tf.reduce_max(self.Y_seq_len), dtype=tf.float32)
with tf.variable_scope("cost"):
self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.logits,
targets = self.Y,
weights = masks)
with tf.variable_scope("optimizer"):
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)