The section will provide steps to set-up an RNN model.
- Load the MNIST dataset:
# Load mnist dataset from tensorflow library datasets <- tf$contrib$learn$datasets mnist <- datasets$mnist$read_data_sets("MNIST-data", one_hot = TRUE)
- Reset the graph and start an interactive session:
# Reset the graph and set-up a interactive session tf$reset_default_graph() sess<-tf$InteractiveSession()
- Reduce image size to 16 x 16 pixels using the reduceImage function from Chapter 4, Data Representation using Autoencoders:
# Covert train data to 16 x 16 pixel image trainData<-t(apply(mnist$train$images, 1, FUN=reduceImage)) validData<-t(apply(mnist$test$images, 1, FUN=reduceImage))
- Extract labels for the defined train and valid datasets:
labels <- mnist$train$labels labels_valid <- mnist$test$labels
- Define model parameters such as size of input pixels (n_input), step size (step_size), number of hidden layers (n.hidden), and number of outcome classes (n.classes):
# Define Model parameter n_input<-16 step_size<-16 n.hidden<-64 n.class<-10
- Define training parameters such as learning rate (lr), number of inputs per batch run (batch), and number of iterations (iteration):
lr<-0.01 batch<-500 iteration = 100
- Define a function rnn that takes in batch input dataset (x), weight matrix (weight), and bias vector (bias); and returns a final outcome predicted vector of a most basic RNN:
# Set up a most basic RNN rnn<-function(x, weight, bias){ # Unstack input into step_size x = tf$unstack(x, step_size, 1) # Define a most basic RNN rnn_cell = tf$contrib$rnn$BasicRNNCell(n.hidden) # create a Recurrent Neural Network cell_output = tf$contrib$rnn$static_rnn(rnn_cell, x, dtype=tf$float32) # Linear activation, using rnn inner loop last_vec=tail(cell_output[[1]], n=1)[[1]] return(tf$matmul(last_vec, weights) + bias) } Define a function eval_func to evaluate mean accuracy using actual (y) and predicted labels (yhat): # Function to evaluate mean accuracy eval_acc<-function(yhat, y){ # Count correct solution correct_Count = tf$equal(tf$argmax(yhat,1L), tf$argmax(y,1L)) # Mean accuracy mean_accuracy = tf$reduce_mean(tf$cast(correct_Count, tf$float32)) return(mean_accuracy) }
- Define placeholder variables (x and y) and initialize weight matrix and bias vector:
with(tf$name_scope('input'), { # Define placeholder for input data x = tf$placeholder(tf$float32, shape=shape(NULL, step_size, n_input), name='x') y <- tf$placeholder(tf$float32, shape(NULL, n.class), name='y') # Define Weights and bias weights <- tf$Variable(tf$random_normal(shape(n.hidden, n.class))) bias <- tf$Variable(tf$random_normal(shape(n.class))) })
- Generate the predicted labels:
# Evaluate rnn cell output yhat = rnn(x, weights, bias) Define the loss function and optimizer cost = tf$reduce_mean(tf$nn$softmax_cross_entropy_with_logits(logits=yhat, labels=y)) optimizer = tf$train$AdamOptimizer(learning_rate=lr)$minimize(cost)
- Run the optimization post initializing a session using the global variables initializer:
sess$run(tf$global_variables_initializer()) for(i in 1:iteration){ spls <- sample(1:dim(trainData)[1],batch) sample_data<-trainData[spls,] sample_y<-labels[spls,] # Reshape sample into 16 sequence with each of 16 element sample_data=tf$reshape(sample_data, shape(batch, step_size, n_input)) out<-optimizer$run(feed_dict = dict(x=sample_data$eval(), y=sample_y)) if (i %% 1 == 0){ cat("iteration - ", i, "Training Loss - ", cost$eval(feed_dict = dict(x=sample_data$eval(), y=sample_y)), " ") } }
- Get the mean accuracy on valid_data:
valid_data=tf$reshape(validData, shape(-1, step_size, n_input)) cost$eval(feed_dict=dict(x=valid_data$eval(), y=labels_valid))