Again most of the code will remain same only the major change will be to use tf.nn.rnn_cell.LSTMCell() instead of tf.nn.rnn_cell.BasicRNNCell(). While initializing the LSTM cell we are using an orthogonal initialized which will generate a random orthogonal matrix which is an effective way of combating exploding and vanishing gradients.
class Model:
def __init__(self, size_layer, num_layers, embedded_size,
dict_size, dimension_output, learning_rate):
def cells(reuse=False):
return tf.nn.rnn_cell.LSTMCell(size_layer,initializer=tf.orthogonal_initializer(),reuse=reuse)
self.X = tf.placeholder(tf.int32, [None, None])
self.Y = tf.placeholder(tf.float32, [None, dimension_output])
encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))
encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
rnn_cells = tf.nn.rnn_cell.MultiRNNCell([cells() for _ in range(num_layers)])
outputs, _ = tf.nn.dynamic_rnn(rnn_cells, encoder_embedded, dtype = tf.float32)
W = tf.get_variable('w',shape=(size_layer, dimension_output),initializer=tf.orthogonal_initializer())
b = tf.get_variable('b',shape=(dimension_output),initializer=tf.zeros_initializer())
self.logits = tf.matmul(outputs[:, -1], W) + b
self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.Y))
self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
correct_pred = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
So this is how the architecture of the LSTM model looks like, which is the same as compared to the previous basic model except with the addition of the LSTM cells in the RNN Block.