caffe2 single gpu for mnist

最新推荐文章于 2024-08-28 07:47:39 发布

容花呀_AI

最新推荐文章于 2024-08-28 07:47:39 发布

阅读量261

点赞数

分类专栏： caffe2 GPU

本文链接：https://blog.youkuaiyun.com/jyh_AI/article/details/89607581

版权

caffe2 同时被 2 个专栏收录

6 篇文章

订阅专栏

GPU

2 篇文章

订阅专栏

本文介绍如何使用Caffe2深度学习框架训练MNIST手写数字数据集。通过定义LeNet模型，从LMDB数据库加载数据，进行训练并监控损失和准确率，最终展示训练过程的总结。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import numpy as np
import cPickle
import csv
from matplotlib import pyplot as plt
%matplotlib inline

from caffe2.python import core,workspace,model_helper,brew,optimizer
from caffe2.proto import caffe2_pb2

device_option = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
def AddLeNetModel(model):
    with core.DeviceScope(device_option):
        conv1 = brew.conv(model,'data', 'conv1', 1, 20, 5)
        pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2)
        conv2 = brew.conv(model, pool1, 'conv2', 20, 50, 5)
        pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2)
        fc3 = brew.fc(model, pool2, 'fc3', 50 * 4 * 4, 500)
        fc3 = brew.relu(model, fc3, fc3)
        pred = brew.fc(model, fc3, 'pred', 500, 10)
        softmax = brew.softmax(model, pred, 'softmax')
    return softmax

def AddInput(model, batch_size, db, db_type):
    ### load the data from db - Method 1 using brew
    #data_uint8, label = brew.db_input(
    #    model,
    #    blobs_out=["data_uint8", "label"],
    #    batch_size=batch_size,
    #    db=db,
    #    db_type=db_type,
    #)
    ### load the data from db - Method 2 using TensorProtosDB
    data_uint8, label = model.TensorProtosDBInput(
        [], ["data_uint8", "label"], batch_size=batch_size,
        db=db, db_type=db_type)
    # cast the data to float
    data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)
    # scale data from [0,255] down to [0,1]
    data = model.Scale(data, data, scale=float(1./256))
    # don't need the gradient for the backward pass
    data = model.StopGradient(data, data)
    return data, label
    
def AddAccuracy(model, softmax):
    accuracy = brew.accuracy(model, [softmax, 'label'], "accuracy")
    return accuracy


def AddTrainingOperators(model, softmax):
    # Loss Calculation
    xent = model.LabelCrossEntropy([softmax, 'label'])
    loss = model.AveragedLoss(xent, "loss")
    # Calculating Accuracy
    AddAccuracy(model, softmax)
    # Add loss to gradient for backpropogation
    model.AddGradientOperators([loss])
    # Initializing the SGD the solver
    opt = optimizer.build_sgd(model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999)


def AddBookkeepingOperators(model):
    """This adds a few bookkeeping operators that we can inspect later.
    
    These operators do not affect the training procedure: they only collect
    statistics and prints them to file or to logs.
    """    
    # Print basically prints out the content of the blob. to_file=1 routes the
    # printed output to a file. The file is going to be stored under
    #     root_folder/[blob name]
    model.Print('accuracy', [], to_file=1)
    model.Print('loss', [], to_file=1)
    # Summarizes the parameters. Different from Print, Summarize gives some
    # statistics of the parameter, such as mean, std, min and max.
    for param in model.params:
        model.Summarize(param, [], to_file=1)
        model.Summarize(model.param_to_grad[param], [], to_file=1)
    # Now, if we really want to be verbose, we can summarize EVERY blob
    # that the model produces; it is probably not a good idea, because that
    # is going to take time - summarization do not come for free. For this
    # demo, we will only show how to summarize the parameters and their
    # gradients.

Batch_Size = 32
workspace.ResetWorkspace()
train_data = '/home/lv009/caffe2_notebooks/tutorial_data/mnist/mnist-train-nchw-lmdb'
arg_scope = {"order": "NCHW"}
# Create the model helper for the train model
training_model = model_helper.ModelHelper(name="mnist_train", arg_scope=arg_scope)
# Specify the input is from the train lmdb
data, label = AddInput(
    training_model, batch_size=32,
    db=train_data,
    db_type='lmdb')

gpu_no=0
training_model.net.RunAllOnGPU(gpu_id=gpu_no, use_cudnn=True)
training_model.param_init_net.RunAllOnGPU(gpu_id=gpu_no, use_cudnn=True)

soft=AddLeNetModel(training_model)
AddTrainingOperators(training_model, soft)

workspace.RunNetOnce(training_model.param_init_net)
workspace.CreateNet(training_model.net,overwrite=True,input_blobs=['data','label'])

from matplotlib import pyplot
total_iters = 200
accuracy = np.zeros(total_iters)
loss = np.zeros(total_iters)

# MAIN TRAINING LOOP!
# Now, we will manually run the network for 200 iterations. 
for i in range(total_iters):
    workspace.RunNet(training_model.net)
    accuracy[i] = workspace.blobs['accuracy']
    loss[i] = workspace.blobs['loss']
    # Check the accuracy and loss every so often
    if i % 25 == 0:
        print("Iter: {}, Loss: {}, Accuracy: {}".format(i,loss[i],accuracy[i]))

# After the execution is done, let's plot the values.
pyplot.plot(loss, 'b')
pyplot.plot(accuracy, 'r')
pyplot.title("Summary of Training Run")
pyplot.xlabel("Iteration")
pyplot.legend(('Loss', 'Accuracy'), loc='upper right')

Iter: 0, Loss: 2.42218399048, Accuracy: 0.0625
Iter: 25, Loss: 0.447062402964, Accuracy: 0.84375
Iter: 50, Loss: 0.399103671312, Accuracy: 0.875
Iter: 75, Loss: 0.464915454388, Accuracy: 0.8125
Iter: 100, Loss: 0.362110167742, Accuracy: 0.875
Iter: 125, Loss: 0.208061009645, Accuracy: 0.875
Iter: 150, Loss: 0.183143928647, Accuracy: 0.9375
Iter: 175, Loss: 0.241408318281, Accuracy: 0.875

在这里插入图片描述
reference:
Mnist using caffe2 with Specific GPU use