Now we'll see how can we find the optimal parameters of our neural network with Reptile.
First, we initialize the necessary variables:
#number of epochs i.e training iterations
num_epochs = 100
#number of samples i.e number of shots
num_samples = 50
#number of tasks
num_tasks = 2
#number of times we want to perform optimization
num_iterations = 10
#mini btach size
mini_batch = 10
Then, we start the TensorFlow session:
with tf.Session() as sess:
sess.run(init)
For the number of epochs:
for e in range(num_epochs):
#for each task in batch of tasks
for task in range(num_tasks):
We get the initial parameters of the model:
old_w1, old_b1, old_w2, old_b2 = sess.run([w1, b1, w2, b2,])
Then, we sample x and y:
x_sample, y_sample = sample_points(num_samples)
For some k number of iterations, we perform optimization on the task:
for k in range(num_iterations):
#get the minibatch x and y
for i in range(0, num_samples, mini_batch):
#sample mini batch of examples
x_minibatch = x_sample[i:i+mini_batch]
y_minibatch = y_sample[i:i+mini_batch]
train = sess.run(optimizer, feed_dict={X: x_minibatch.reshape(mini_batch,1),
Y: y_minibatch.reshape(mini_batch,1)})
We get the updated model parameters after several iterations of optimization:
new_w1, new_b1, new_w2, new_b2 = sess.run([w1, b1, w2, b2])
Now, we perform the meta update:
epsilon = 0.1
updated_w1 = old_w1 + epsilon * (new_w1 - old_w1)
updated_b1 = old_b1 + epsilon * (new_b1 - old_b1)
updated_w2 = old_w2 + epsilon * (new_w2 - old_w2)
updated_b2 = old_b2 + epsilon * (new_b2 - old_b2)
We update the model parameter with new parameters:
w1.load(updated_w1, sess)
b1.load(updated_b1, sess)
w2.load(updated_w2, sess)
b2.load(updated_b2, sess)
Then, we print the loss for every 10 epochs:
if e%10 == 0:
loss = sess.run(loss_function, feed_dict={X: x_sample.reshape(num_samples,1), Y: y_sample.reshape(num_samples,1)})
print "Epoch {}: Loss {} ".format(e,loss)
print 'Updated Model Parameter Theta '
print 'Sampling Next Batch of Tasks '
print '--------------------------------- '
The complete code is given as follows:
#start the tensorflow session
with tf.Session() as sess:
sess.run(init)
for e in range(num_epochs):
#for each task in batch of tasks
for task in range(num_tasks):
#get the initial parameters of the model
old_w1, old_b1, old_w2, old_b2 = sess.run([w1, b1, w2, b2,])
#sample x and y
x_sample, y_sample = sample_points(num_samples)
#for some k number of iterations perform optimization on the task
for k in range(num_iterations):
#get the minibatch x and y
for i in range(0, num_samples, mini_batch):
#sample mini batch of examples
x_minibatch = x_sample[i:i+mini_batch]
y_minibatch = y_sample[i:i+mini_batch]
train = sess.run(optimizer, feed_dict={X: x_minibatch.reshape(mini_batch,1),
Y: y_minibatch.reshape(mini_batch,1)})
#get the updated model parameters after several iterations of optimization
new_w1, new_b1, new_w2, new_b2 = sess.run([w1, b1, w2, b2])
#Now we perform meta update
#i.e theta = theta + epsilon * (theta_star - theta)
epsilon = 0.1
updated_w1 = old_w1 + epsilon * (new_w1 - old_w1)
updated_b1 = old_b1 + epsilon * (new_b1 - old_b1)
updated_w2 = old_w2 + epsilon * (new_w2 - old_w2)
updated_b2 = old_b2 + epsilon * (new_b2 - old_b2)
#update the model parameter with new parameters
w1.load(updated_w1, sess)
b1.load(updated_b1, sess)
w2.load(updated_w2, sess)
b2.load(updated_b2, sess)
if e%10 == 0:
loss = sess.run(loss_function, feed_dict={X: x_sample.reshape(num_samples,1), Y: y_sample.reshape(num_samples,1)})
print "Epoch {}: Loss {} ".format(e,loss)
print 'Updated Model Parameter Theta '
print 'Sampling Next Batch of Tasks '
print '--------------------------------- '
You can see the output as follows:
Epoch 0: Loss 13.0675544739 Updated Model Parameter Theta Sampling Next Batch of Tasks --------------------------------- Epoch 10: Loss 7.3604927063 Updated Model Parameter Theta Sampling Next Batch of Tasks --------------------------------- Epoch 20: Loss 4.35141277313 Updated Model Parameter Theta Sampling Next Batch of Tasks ---------------------------------