1. 前言
有时候在做网络性能分析的时候,需要寻找到网络最为耗时的部分,这时候就需要打印各个层在前向传播过程中所消耗的时间多少了。所幸,在caffe.cpp文件中包含了该部分的代码,这里的使用的代码是在其基础上进行修改的。
2. 代码实现
#include <stdio.h>
#include <stdlib.h>
#include <opencv2/opencv.hpp>
#include <caffe/blob.hpp>
#include <caffe/common.hpp>
#include <caffe/layer.hpp>
#include <caffe/net.hpp>
#include <caffe/util/benchmark.hpp>
#include <caffe/proto/rcnn_caffe.pb.h>
using std::vector;
using caffe::shared_ptr;
using caffe::Blob;
using caffe::Net;
using caffe::Layer;
using caffe::Timer;
/*
* 说明:显示前向传播时间消耗情况
* const std::string deploy_proto:网络定义文件
* const std::string model_file:模型文件
* const int FLAGS_iterations=10:迭代次数
*/
void TimeConsumptionClac(const std::string deploy_proto, const std::string model_file, const int FLAGS_iterations=10)
{
// Instantiate the caffe net.
Net<float> caffe_net(deploy_proto, rcnn_caffe::TEST);
caffe_net.CopyTrainedLayersFrom(model_file);
//prepare net forward,需要各自的使用情况做相应修改
//image data
const vector<Blob<float> *> &input_blobs = caffe_net.input_blobs();
cv::Mat img_in(832, 608, CV_8UC3, cv::Scalar(123,23,223));
input_blobs[0]->Reshape(1, img_in.channels(), img_in.rows, img_in.cols);
float *blob_data = input_blobs[0]->mutable_cpu_data();
const int cols = img_in.cols;
const int rows = img_in.rows;
for (int i = 0; i < rows; ++i)
{
const cv::Vec3f* data = img_in.ptr<cv::Vec3f>(i); //此处用指针读取Mat数据,并存到blob中
for (int j = 0; j < cols; ++j)
{
blob_data[(0 * rows + i)*cols + j] = data[j][0];
blob_data[(1 * rows + i)*cols + j] = data[j][1];
blob_data[(2 * rows + i)*cols + j] = data[j][2];
}
}
//image info
input_blobs[1]->Reshape(1, 3, 1, 1);
blob_data = input_blobs[1]->mutable_cpu_data();
float img_info[] = {832, 608, 1.0};
std::memcpy(blob_data, &img_info[0], sizeof(float)*3);
// Do a clean forward and backward pass, so that memory allocation are done
// and future iterations will be more stable.
LOG(INFO) << "Performing Forward";
// Note that for the speed benchmark, we will assume that the network does
// not take any input blobs.
float initial_loss;
caffe_net.Forward(&initial_loss);
LOG(INFO) << "Initial loss: " << initial_loss;
const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
LOG(INFO) << "*** Benchmark begins ***";
LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
Timer total_timer;
total_timer.Start();
Timer forward_timer;
Timer backward_timer;
Timer timer;
std::vector<double> forward_time_per_layer(layers.size(), 0.0);
double forward_time = 0.0;
for (int j = 0; j < FLAGS_iterations; ++j) {
Timer iter_timer;
iter_timer.Start();
forward_timer.Start();
for (int i = 0; i < layers.size(); ++i) {
timer.Start();
layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
forward_time_per_layer[i] += timer.MicroSeconds();
}
forward_time += forward_timer.MicroSeconds();
LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
<< iter_timer.MilliSeconds() << " ms.";
}
LOG(INFO) << "Average time per layer: ";
std::vector<rcnn_caffe::string> layernames = caffe_net.layer_names();
LOG(INFO) << "layer_names size: " << layernames.size() << ";" << "layers size:" << layers.size();
for (int i = 0; i < layers.size(); ++i) {
//const rcnn_caffe::string& layername = layers[i]->layer_param().name();
rcnn_caffe::string layername = layernames[i];
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tforward: " << forward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
}
total_timer.Stop();
LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
LOG(INFO) << "*** Benchmark ends ***";
}
//using namespace std;
int main(int argc, char* argv[])
{
if (3!=argc)
{
std::cout << "input param not enough!eg.\n 1. xx time gpu_id" << std::endl;
return 0;
}
std::string run_type = std::string(argv[1]);
if (3 == argc && run_type=="time")
{
int device_id = atoi(argv[2]); //GPU_ID
if (device_id >= 0)
{
rcnn_caffe::Caffe::set_mode(rcnn_caffe::Caffe::GPU);
rcnn_caffe::Caffe::SetDevice(device_id);
}
else
{
rcnn_caffe::Caffe::set_mode(rcnn_caffe::Caffe::CPU);
rcnn_caffe::Caffe::SetDevice(1);
}
TimeConsumptionClac("./models/test.prototxt",
"./models/caffe.caffemodel",
10);
}
return 0;
}
3. 结果
I0408 16:34:39.648761 66204 main.cpp:159] input forward: 0.007 ms.
I0408 16:34:39.648782 66204 main.cpp:159] conv1_1 forward: 727.992 ms.
I0408 16:34:39.648793 66204 main.cpp:159] relu1_1 forward: 44.2521 ms.
I0408 16:34:39.648805 66204 main.cpp:159] conv1_2 forward: 3841.53 ms.
I0408 16:34:39.648814 66204 main.cpp:159] relu1_2 forward: 51.561 ms.
I0408 16:34:39.648823 66204 main.cpp:159] pool1 forward: 141.595 ms.
I0408 16:34:39.648829 66204 main.cpp:159] conv2_1 forward: 1535.58 ms.
I0408 16:34:39.648836 66204 main.cpp:159] relu2_1 forward: 23.1318 ms.
I0408 16:34:39.648844 66204 main.cpp:159] conv2_2 forward: 3124.48 ms.
I0408 16:34:39.648850 66204 main.cpp:159] relu2_2 forward: 26.3654 ms.
I0408 16:34:39.648857 66204 main.cpp:159] pool2 forward: 80.442 ms.
I0408 16:34:39.648864 66204 main.cpp:159] conv3_1 forward: 1411.55 ms.
I0408 16:34:39.648872 66204 main.cpp:159] relu3_1 forward: 12.0473 ms.
I0408 16:34:39.648880 66204 main.cpp:159] conv3_2 forward: 2761.23 ms.
I0408 16:34:39.648887 66204 main.cpp:159] relu3_2 forward: 11.3475 ms.
I0408 16:34:39.648895 66204 main.cpp:159] conv3_3 forward: 2789.08 ms.
I0408 16:34:39.648900 66204 main.cpp:159] relu3_3 forward: 11.7217 ms.
I0408 16:34:39.648908 66204 main.cpp:159] pool3 forward: 34.3051 ms.
I0408 16:34:39.648916 66204 main.cpp:159] conv4_1 forward: 1337.64 ms.
I0408 16:34:39.648922 66204 main.cpp:159] relu4_1 forward: 6.4562 ms.
I0408 16:34:39.648928 66204 main.cpp:159] conv4_2 forward: 2633.94 ms.
I0408 16:34:39.648936 66204 main.cpp:159] relu4_2 forward: 6.0823 ms.
I0408 16:34:39.648943 66204 main.cpp:159] conv4_3 forward: 2576.46 ms.
I0408 16:34:39.648950 66204 main.cpp:159] relu4_3 forward: 5.2473 ms.
I0408 16:34:39.648957 66204 main.cpp:159] pool4 forward: 16.83 ms.
I0408 16:34:39.648964 66204 main.cpp:159] conv5_1 forward: 679.819 ms.
I0408 16:34:39.648972 66204 main.cpp:159] relu5_1 forward: 1.7372 ms.
I0408 16:34:39.648978 66204 main.cpp:159] conv5_2 forward: 691.741 ms.
I0408 16:34:39.648991 66204 main.cpp:159] relu5_2 forward: 1.3683 ms.
I0408 16:34:39.648998 66204 main.cpp:159] conv5_3 forward: 661.567 ms.
I0408 16:34:39.649006 66204 main.cpp:159] relu5_3 forward: 1.1911 ms.