How to do it...

This section covers the steps for setting up a feed-forward backpropagation Neural Network:

Let's define the input parameters of the neural network as function parameters. The following table describes each parameter:

The neural network function will have a structure as shown in the following script:

NN_train <- function(Xdata,Ydata,Xtestdata,Ytestdata,input_size,
learning_rate=0.1,momentum = 0.1,epochs=10,
batchsize=100,rbm_list,dbn_sizes){
library(stringi)
## insert all the codes mentioned in next 11 points
}

Initialize a weight and bias list of length 4, with the first being a tensor of random normal distribution (with a standard deviation of 0.01) of dimensions 784 x 900, the second being 900 x 500, the third being 500 x 300, and the fourth being 300 x 10:

weight_list <- list()
bias_list <- list()
# Initialize variables
for(size in c(dbn_sizes,ncol(Ydata))){
#Initialize weights through a random uniform distribution
weight_list <- c(weight_list,tf$random_normal(shape=shape(input_size, size), stddev=0.01, dtype=tf$float32))
#Initialize bias as zeroes
bias_list <- c(bias_list, tf$zeros(shape = shape(size), dtype=tf$float32))
input_size = size
}

Check whether the outcome of the stacked RBM conforms to the sizes of the hidden layers mentioned in the dbn_sizes parameter:

#Check if expected dbn_sizes are correct
if(length(dbn_sizes)!=length(rbm_list)){
stop("number of hidden dbn_sizes not equal to number of rbm outputs generated")
# check if expected sized are correct
for(i in 1:length(dbn_sizes)){
if(dbn_sizes[i] != dbn_sizes[i])
stop("Number of hidden dbn_sizes do not match")
}
}

Now, place the weights and biases in suitable positions within weight_list and bias_list:

for(i in 1:length(dbn_sizes)){
weight_list[[i]] <- rbm_list[[i]]$weight_final
bias_list[[i]] <- rbm_list[[i]]$bias_final
}

Create placeholders for the input and output data:

input <- tf$placeholder(tf$float32, shape = shape(NULL,ncol(Xdata)))
output <- tf$placeholder(tf$float32, shape = shape(NULL,ncol(Ydata)))

Now, use the weights and biases obtained from the stacked RBM to reconstruct the input data and store each RBM's reconstructed data in the list input_sub:

input_sub <- list()
weight <- list()
bias <- list()
for(i in 1:(length(dbn_sizes)+1)){
weight[[i]] <- tf$cast(tf$Variable(weight_list[[i]]),tf$float32)
bias[[i]] <- tf$cast(tf$Variable(bias_list[[i]]),tf$float32)
}
input_sub[[1]] <- tf$nn$sigmoid(tf$matmul(input, weight[[1]]) + bias[[1]])
for(i in 2:(length(dbn_sizes)+1)){
input_sub[[i]] <- tf$nn$sigmoid(tf$matmul(input_sub[[i-1]], weight[[i]]) + bias[[i]])
}

Define the cost function--that is, the mean squared error of difference between prediction and actual digits:

cost = tf$reduce_mean(tf$square(input_sub[[length(input_sub)]] - output))

Implement backpropagation for the purpose of minimizing the cost:

train_op <- tf$train$MomentumOptimizer(learning_rate, momentum)$minimize(cost)

Generate the prediction results:

predict_op = tf$argmax(input_sub[[length(input_sub)]],axis=tf$cast(1.0,tf$int32))

Perform iterations of training:

train_accuracy <- c()
test_accuracy <- c()
for(ep in 1:epochs){
for(i in seq(0,(dim(Xdata)[1]-batchsize),batchsize)){
batchX <- Xdata[(i+1):(i+batchsize),]
batchY <- Ydata[(i+1):(i+batchsize),]
#Run the training operation on the input data
sess$run(train_op,feed_dict=dict(input = batchX,
output = batchY))
}
for(j in 1:(length(dbn_sizes)+1)){
# Retrieve weights and biases
weight_list[[j]] <- sess$run(weight[[j]])
bias_list[[j]] <- sess$ run(bias[[j]])
}
train_result <- sess$run(predict_op, feed_dict = dict(input=Xdata, output=Ydata))+1
train_actual <- as.numeric(stringi::stri_sub(colnames(as.data.frame(Ydata))[max.col(as.data.frame(Ydata),ties.method="first")],2))
test_result <- sess$run(predict_op, feed_dict = dict(input=Xtestdata, output=Ytestdata))+1
test_actual <- as.numeric(stringi::stri_sub(colnames(as.data.frame(Ytestdata))[max.col(as.data.frame(Ytestdata),ties.method="first")],2))
train_accuracy <- c(train_accuracy,mean(train_actual==train_result))
test_accuracy <- c(test_accuracy,mean(test_actual==test_result))
cat("epoch:", ep, " Train Accuracy: ",train_accuracy[ep]," Test Accuracy : ",test_accuracy[ep],"
")
}

Finally, return a list of four outcomes, which are train accuracy (train_accuracy), test accuracy (test_accuracy), a list of weight matrices generated in each iteration (weight_list), and a list of bias vectors generated in each iteration (bias_list):

return(list(train_accuracy=train_accuracy,
test_accuracy=test_accuracy,
weight_list=weight_list,
bias_list=bias_list))

Run the iterations for the defined neural network for training:

NN_results <- NN_train(Xdata=trainX,
Ydata=trainY,
Xtestdata=testX,
Ytestdata=testY,
input_size=ncol(trainX),
rbm_list=RBM_output,
dbn_sizes = RBM_hidden_sizes)

The following code is used to plot the train and test accuracy:

accuracy_df <- data.frame("accuracy"=c(NN_results$train_accuracy,NN_results$test_accuracy),
"epochs"=c(rep(1:10,times=2)),
"datatype"=c(rep(c(1,2),each=10)),
stringsAsFactors = FALSE)
plot(accuracy ~ epochs,
xlab = "# of epochs",
ylab = "Accuracy in %",
pch = c(16, 1)[datatype],
main = "Neural Network - Accuracy in %",
data = accuracy_df)
legend('bottomright',
c("train","test"),
pch = c( 16, 1))

Table of Contents for How to do it...

Create new playlist

Sign In

Sign Up

Table of Contents for
How to do it...