Provided with the mandatory inputs as the dataset file path, vocab file path, and the model name, we will initiate the training process. Let's define all the hyperparameters for the model.
import os
import argparse
from modules.Model import *
from modules.Batch import *
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--training_file', type=str, required=True)
parser.add_argument('--vocabulary_file', type=str, required=True)
parser.add_argument('--model_name', type=str, required=True)
parser.add_argument('--epoch', type=int, default=200)
parser.add_argument('--batch_size', type=int, default=50)
parser.add_argument('--sequence_length', type=int, default=50)
parser.add_argument('--log_frequency', type=int, default=100)
parser.add_argument('--learning_rate', type=int, default=0.002)
parser.add_argument('--units_number', type=int, default=128)
parser.add_argument('--layers_number', type=int, default=2)
args = parser.parse_args()
Since we are batch training the model, we divide the dataset into batches of defined batch_size using the Batch module.
batch = Batch(training_file, vocabulary_file, batch_size, sequence_length)
Each batch will return 2 arrays one will be the input vector of input sequence which will have the shape as [batch_size, sequence_len, vocab_size] and the other array will hold the label vector which will have the shape as [batch_size, vocab_size].
Now we initialize our model and create the optimizer function. In this model, we used the Adam optimizer.
Then we train our model and perform the optimization over each batch:
# Building model instance and classifier
model = Model(model_name)
model.build(input_number, sequence_length, layers_number, units_number, classes_number)
classifier = model.get_classifier()
# Building cost funcitons
cost = tf.reduce_mean(tf.square(classifier - model.y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Computing the accuracy metrics
expected_prediction = tf.equal(tf.argmax(classifier, 1), tf.argmax(model.y, 1))
accuracy = tf.reduce_mean(tf.cast(expected_prediction, tf.float32))
# Preparing logs for Tensorboard
loss_summary = tf.summary.scalar("loss", cost)
acc_summary = tf.summary.scalar("accuracy", accuracy)
train_summary_op = tf.summary.merge_all()
out_dir = "{}/{}".format(model_name, model_name)
train_summary_dir = os.path.join(out_dir, "summaries")
##
# Initializig the session and executing the training
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
iteration = 0
while batch.dataset_full_passes < epoch:
iteration += 1
batch_x, batch_y = batch.get_next_batch()
batch_x = batch_x.reshape((batch_size, sequence_length, input_number))
sess.run(optimizer, feed_dict={model.x: batch_x, model.y: batch_y})
if iteration % log_frequency == 0:
acc = sess.run(accuracy, feed_dict={model.x: batch_x, model.y: batch_y})
loss = sess.run(cost, feed_dict={model.x: batch_x, model.y: batch_y})
print("Iteration {}, batch loss: {:.6f}, training accuracy: {:.5f}".format(iteration * batch_size,
loss, acc))
batch.clean()
Once the model completes its training, the checkpoints get stored which we can use later on for inferencing. Here is the snap of the accuracy and the loss while training process.