import numpy as np
import cPickle
import csv
from matplotlib import pyplot as plt
%matplotlib inline
from caffe2.python import core,workspace,model_helper,brew,optimizer
from caffe2.proto import caffe2_pb2
device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
def AddLeNetModel(model):
with core.DeviceScope(device_option):
conv1 = brew.conv(model,'data', 'conv1', 1, 20, 5)
pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
conv2 = brew.conv(model, pool1, 'conv2', 20, 50, 5)
pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
fc3 = brew.fc(model, pool2, 'fc3', 50 * 4 * 4, 500)
fc3 = brew.relu(model, fc3, fc3)
pred = brew.fc(model, fc3, 'pred', 500, 10)
softmax = brew.softmax(model, pred, 'softmax')
return softmax
def AddInput(model, batch_size, db, db_type):
### load the data from db - Method 1 using brew
#data_uint8, label = brew.db_input(
# model,
# blobs_out=["data_uint8", "label"],
# batch_size=batch_size,
# db=db,
# db_type=db_type,
#)
### load the data from db - Method 2 using TensorProtosDB
data_uint8, label = model.TensorProtosDBInput(
[], ["data_uint8", "label"], batch_size=batch_size,
db=db, db_type=db_type)
# cast the data to float
data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)
# scale data from [0,255] down to [0,1]
data = model.Scale(data, data, scale=float(1./256))
# don't need the gradient for the backward pass
data = model.StopGradient(data, data)
return data, label
def AddAccuracy(model, softmax):
accuracy = brew.accuracy(model, [softmax, 'label'], "accuracy")
return accuracy
def AddTrainingOperators(model, softmax):
# Loss Calculation
xent = model.LabelCrossEntropy([softmax, 'label'])
loss = model.AveragedLoss(xent, "loss")
# Calculating Accuracy
AddAccuracy(model, softmax)
# Add loss to gradient for backpropogation
model.AddGradientOperators([loss])
# Initializing the SGD the solver
opt = optimizer.build_sgd(model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999)
def AddBookkeepingOperators(model):
"""This adds a few bookkeeping operators that we can inspect later.
These operators do not affect the training procedure: they only collect
statistics and prints them to file or to logs.
"""
# Print basically prints out the content of the blob. to_file=1 routes the
# printed output to a file. The file is going to be stored under
# root_folder/[blob name]
model.Print('accuracy', [], to_file=1)
model.Print('loss', [], to_file=1)
# Summarizes the parameters. Different from Print, Summarize gives some
# statistics of the parameter, such as mean, std, min and max.
for param in model.params:
model.Summarize(param, [], to_file=1)
model.Summarize(model.param_to_grad[param], [], to_file=1)
# Now, if we really want to be verbose, we can summarize EVERY blob
# that the model produces; it is probably not a good idea, because that
# is going to take time - summarization do not come for free. For this
# demo, we will only show how to summarize the parameters and their
# gradients.
Batch_Size = 32
workspace.ResetWorkspace()
train_data = '/home/lv009/caffe2_notebooks/tutorial_data/mnist/mnist-train-nchw-lmdb'
arg_scope = {"order": "NCHW"}
# Create the model helper for the train model
training_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope)
# Specify the input is from the train lmdb
data, label = AddInput(
training_model, batch_size=32,
db=train_data,
db_type='lmdb')
gpu_no=0
training_model.net.RunAllOnGPU(gpu_id=gpu_no, use_cudnn=True)
training_model.param_init_net.RunAllOnGPU(gpu_id=gpu_no, use_cudnn=True)
soft=AddLeNetModel(training_model)
AddTrainingOperators(training_model, soft)
workspace.RunNetOnce(training_model.param_init_net)
workspace.CreateNet(training_model.net,overwrite=True,input_blobs=['data','label'])
from matplotlib import pyplot
total_iters = 200
accuracy = np.zeros(total_iters)
loss = np.zeros(total_iters)
# MAIN TRAINING LOOP!
# Now, we will manually run the network for 200 iterations.
for i in range(total_iters):
workspace.RunNet(training_model.net)
accuracy[i] = workspace.blobs['accuracy']
loss[i] = workspace.blobs['loss']
# Check the accuracy and loss every so often
if i % 25 == 0:
print("Iter: {}, Loss: {}, Accuracy: {}".format(i,loss[i],accuracy[i]))
# After the execution is done, let's plot the values.
pyplot.plot(loss, 'b')
pyplot.plot(accuracy, 'r')
pyplot.title("Summary of Training Run")
pyplot.xlabel("Iteration")
pyplot.legend(('Loss', 'Accuracy'), loc='upper right')
Iter: 0, Loss: 2.42218399048, Accuracy: 0.0625
Iter: 25, Loss: 0.447062402964, Accuracy: 0.84375
Iter: 50, Loss: 0.399103671312, Accuracy: 0.875
Iter: 75, Loss: 0.464915454388, Accuracy: 0.8125
Iter: 100, Loss: 0.362110167742, Accuracy: 0.875
Iter: 125, Loss: 0.208061009645, Accuracy: 0.875
Iter: 150, Loss: 0.183143928647, Accuracy: 0.9375
Iter: 175, Loss: 0.241408318281, Accuracy: 0.875
reference:
Mnist using caffe2 with Specific GPU use