caffe添加HeatmapData层（二）

最新推荐文章于 2020-06-13 18:04:58 发布

2014wzy

最新推荐文章于 2020-06-13 18:04:58 发布

阅读量1.8k

点赞数

分类专栏： caffe框架

caffe框架专栏收录该内容

62 篇文章

订阅专栏

经过加caffe添加PrecisionRecallLosslayer层（一）的学习，再继续进行学习：

本文以https://github.com/tpfister/caffe-heatmap中所实现的data_heatma.cpp和data_heatmap.hpp为例介绍如何写自己的层。

=================================================================================================================================

1、老规矩，我们现在caffe.proto中添加参数及消息：

[cpp]view plaincopy 
    
 message LayerParameter {  
   optional string name = 1; // the layer name  
   optional string type = 2; // the layer type  
   repeated string bottom = 3; // the name of each bottom blob  
   repeated string top = 4; // the name of each top blob  
   
   // The train / test phase for computation.  
   optional Phase phase = 10;  
   
   // The amount of weight to assign each top blob in the objective.  
   // Each layer assigns a default value, usually of either 0 or 1,  
   // to each top blob.  
   repeated float loss_weight = 5;  
   
   // Specifies training parameters (multipliers on global learning constants,  
   // and the name and other settings used for weight sharing).  
   repeated ParamSpec param = 6;  
   
   // The blobs containing the numeric parameters of the layer.  
   repeated BlobProto blobs = 7;  
   
   // Specifies on which bottoms the backpropagation should be skipped.  
   // The size must be either 0 or equal to the number of bottoms.  
   repeated bool propagate_down = 11;  
   
   // Rules controlling whether and when a layer is included in the network,  
   // based on the current NetState.  You may specify a non-zero number of rules  
   // to include OR exclude, but not both.  If no include or exclude rules are  
   // specified, the layer is always included.  If the current NetState meets  
   // ANY (i.e., one or more) of the specified rules, the layer is  
   // included/excluded.  
   repeated NetStateRule include = 8;  
   repeated NetStateRule exclude = 9;  
   
   // Parameters for data pre-processing.  
   optional TransformationParameter transform_param = 100;  
   
   // Parameters shared by loss layers.  
   optional LossParameter loss_param = 101;  
   
   
   // Options to allow visualisation可视化层的参数，就这两货哈    
   optional bool visualise = 200 [ default = false ];    
   optional uint32 visualise_channel = 201 [ default = 0 ];    
   // Layer type-specific parameters.  
   //  
   // Note: certain layers may have more than one computational engine  
   // for their implementation. These layers include an Engine type and  
   // engine parameter for selecting the implementation.  
   // The default for the engine is set by the ENGINE switch at compile-time.  
   optional AccuracyParameter accuracy_param = 102;  
   optional ArgMaxParameter argmax_param = 103;  
   optional BatchNormParameter batch_norm_param = 139;  
   optional BiasParameter bias_param = 141;  
   optional ConcatParameter concat_param = 104;  
   optional ContrastiveLossParameter contrastive_loss_param = 105;  
   optional ConvolutionParameter convolution_param = 106;  
   optional CropParameter crop_param = 144;  
   optional DataParameter data_param = 107;  
   optional DropoutParameter dropout_param = 108;  
   optional DummyDataParameter dummy_data_param = 109;  
   optional EltwiseParameter eltwise_param = 110;  
   optional ELUParameter elu_param = 140;  
   optional EmbedParameter embed_param = 137;  
   optional ExpParameter exp_param = 111;  
   optional FlattenParameter flatten_param = 135;  
   optional HeatmapDataParameter heatmap_data_param = 145;// 加入自己层的参数   
   optional HDF5DataParameter hdf5_data_param = 112;  
   optional HDF5OutputParameter hdf5_output_param = 113;  
   optional HingeLossParameter hinge_loss_param = 114;  
   optional ImageDataParameter image_data_param = 115;  
   optional InfogainLossParameter infogain_loss_param = 116;  
   optional InnerProductParameter inner_product_param = 117;  
   optional InputParameter input_param = 143;  
   optional LogParameter log_param = 134;  
   optional LRNParameter lrn_param = 118;  
   optional MemoryDataParameter memory_data_param = 119;  
   optional MVNParameter mvn_param = 120;  
   optional PoolingParameter pooling_param = 121;  
   optional PowerParameter power_param = 122;  
   optional PReLUParameter prelu_param = 131;  
   optional PythonParameter python_param = 130;  
   optional ReductionParameter reduction_param = 136;  
   optional ReLUParameter relu_param = 123;  
   optional ReshapeParameter reshape_param = 133;  
   optional ScaleParameter scale_param = 142;  
   optional SigmoidParameter sigmoid_param = 124;  
   optional SoftmaxParameter softmax_param = 125;  
   optional SPPParameter spp_param = 132;  
   optional SliceParameter slice_param = 126;  
   optional TanHParameter tanh_param = 127;  
   optional ThresholdParameter threshold_param = 128;  
   optional TileParameter tile_param = 138;  
   optional WindowDataParameter window_data_param = 129;  
 }  

顺便在这个layer参数后面添加HeatmapDataParameter消息：

[cpp]view plaincopy 
    
 // VGG heatmap params 自己层的参数  
 message HeatmapDataParameter {  
   optional bool segmentation = 1000 [default = false];   
   optional uint32 multfact = 1001 [default = 1];  
   optional uint32 num_channels = 1002 [default = 3];  
   optional uint32 batchsize = 1003;  
   optional string root_img_dir = 1004;  
   optional bool random_crop = 1005;   // image augmentation type  
   optional bool sample_per_cluster = 1006;   // image sampling type  
   optional string labelinds = 1007 [default = ''];   // if specified, only use these regression variables  
   optional string source = 1008;  
   optional string meanfile = 1009;  
   optional string crop_meanfile = 1010;  
   optional uint32 cropsize = 1011 [default = 0];  
   optional uint32 outsize = 1012 [default = 0];  
   optional float scale = 1013 [ default = 1 ];  
   optional uint32 label_width = 1014 [ default = 1 ];  
   optional uint32 label_height = 1015 [ default = 1 ];  
   optional bool dont_flip_first = 1016 [ default = true ];  
   optional float angle_max = 1017 [ default = 0 ];   
   optional bool flip_joint_labels = 1018 [ default = true ];  
 }  

对各个参数进行解释：

segmentation 是否分割，默认是否，假设图像的分割模板在segs/目录

multfact 将ground truth中的关节乘以这个multfact，就是图像中的位置，图像中的位置除以这个就是关节的位置，默认是1，也就是说关节的坐标与图像的坐标是一致大小的

num_channels 图像的channel数默认是3

batchsize batch大小

root_img_dir 存放图像文件的根目录

random_crop 是否需要随机crop图像（如果true则做随机crop，否则做中心crop）

sample_per_cluster 图像采样的类型（是否均匀地在clusters上采样）

labelinds 类标索引（只使用回归变量才设置这个）

source 存放打乱文件顺序之后的文件路径的txt文件

meanfile 平均值文件路径

crop_meanfile crop之后的平均值文件路径

cropsize crop的大小

outsize 默认是0（就是crop出来之后的图像会缩放的因子，0表示不缩放）

scale 默认是1，实际上就是一系列预处理（去均值、crop、缩放之后的像素值乘以该scale得到最终的图像的）

label_width heatmap的宽

label_height heatmap的高

dont_flip_first 不要对调第一个关节的位置，默认是true

angle_max 对图像进行旋转的最大角度，用于增强数据的，默认是0度

flip_joint_labels 默认是true（即水平翻转，将左右的关节对调）

还有可视化的测试参数设置：

[cpp]view plaincopy 
    
 / NOTE  
 // Update the next available ID when you add a new LayerParameter field.  
 //  
 // LayerParameter next available layer-specific ID: 139 (last added: tile_param)  
 message LayerParameter {  
   optional string name = 1; // the layer name  
   optional string type = 2; // the layer type  
   repeated string bottom = 3; // the name of each bottom blob  
   repeated string top = 4; // the name of each top blob  
   
   // The train / test phase for computation.  
   optional Phase phase = 10;  
   
   // The amount of weight to assign each top blob in the objective.  
   // Each layer assigns a default value, usually of either 0 or 1,  
   // to each top blob.  
   repeated float loss_weight = 5;  
   
   // Specifies training parameters (multipliers on global learning constants,  
   // and the name and other settings used for weight sharing).  
   repeated ParamSpec param = 6;  
   
   // The blobs containing the numeric parameters of the layer.  
   repeated BlobProto blobs = 7;  
   
   // Specifies on which bottoms the backpropagation should be skipped.  
   // The size must be either 0 or equal to the number of bottoms.  
   repeated bool propagate_down = 11;  
   
   // Rules controlling whether and when a layer is included in the network,  
   // based on the current NetState.  You may specify a non-zero number of rules  
   // to include OR exclude, but not both.  If no include or exclude rules are  
   // specified, the layer is always included.  If the current NetState meets  
   // ANY (i.e., one or more) of the specified rules, the layer is  
   // included/excluded.  
   repeated NetStateRule include = 8;  
   repeated NetStateRule exclude = 9;  
   
   // Parameters for data pre-processing.  
   optional TransformationParameter transform_param = 100;  
   
   // Parameters shared by loss layers.  
   optional LossParameter loss_param = 101;  
   
   // Options to allow visualisation可视化层的参数，  
   optional bool visualise = 200 [ default = false ];  
   optional uint32 visualise_channel = 201 [ default = 0 ];  

还有一部分前面没有提到的部分就是V1LayerParameter，在这个里面添加两个我注释内容，这部分是为caffe的扩展提供了很好的帮助，但是作者在实现更新的upgrade_proto文件中，写的风格有点不符合前面风格了，全是if。。。。。

[cpp]view plaincopy 
    
 // DEPRECATED: use LayerParameter.  
 message V1LayerParameter {  
   repeated string bottom = 2;  
   repeated string top = 3;  
   optional string name = 4;  
   repeated NetStateRule include = 32;  
   repeated NetStateRule exclude = 33;  
   enum LayerType {  
     NONE = 0;  
     ABSVAL = 35;  
     ACCURACY = 1;  
     ARGMAX = 30;  
     BNLL = 2;  
     CONCAT = 3;  
     CONTRASTIVE_LOSS = 37;  
     CONVOLUTION = 4;  
     DATA = 5;  
     DATA_HEATMAP=40;///////////自己添加  
     DECONVOLUTION = 39;  
     DROPOUT = 6;  
     DUMMY_DATA = 32;  
     EUCLIDEAN_LOSS = 7;  
     ELTWISE = 25;  
     EXP = 38;  
     FLATTEN = 8;  
     HDF5_DATA = 9;  
     HDF5_OUTPUT = 10;  
     HINGE_LOSS = 28;  
     IM2COL = 11;  
     IMAGE_DATA = 12;  
     INFOGAIN_LOSS = 13;  
     INNER_PRODUCT = 14;  
     LRN = 15;  
     MEMORY_DATA = 29;  
     MULTINOMIAL_LOGISTIC_LOSS = 16;  
     MVN = 34;  
     POOLING = 17;  
     POWER = 26;  
     RELU = 18;  
     SIGMOID = 19;  
     SIGMOID_CROSS_ENTROPY_LOSS = 27;  
     SILENCE = 36;  
     SOFTMAX = 20;  
     SOFTMAX_LOSS = 21;  
     SPLIT = 22;  
     SLICE = 33;  
     TANH = 23;  
     WINDOW_DATA = 24;  
     THRESHOLD = 31;  
   }  
   optional LayerType type = 5;  
   repeated BlobProto blobs = 6;  
   repeated string param = 1001;  
   repeated DimCheckMode blob_share_mode = 1002;  
   enum DimCheckMode {  
     STRICT = 0;  
     PERMISSIVE = 1;  
   }  
   repeated float blobs_lr = 7;  
   repeated float weight_decay = 8;  
   repeated float loss_weight = 35;  
   optional AccuracyParameter accuracy_param = 27;  
   optional ArgMaxParameter argmax_param = 23;  
   optional ConcatParameter concat_param = 9;  
   optional ContrastiveLossParameter contrastive_loss_param = 40;  
   optional ConvolutionParameter convolution_param = 10;  
   optional DataParameter data_param = 11;  
   optional HeatmapDataParameter heatmap_data_param = 43;// 加入自己层的参数  
   optional DropoutParameter dropout_param = 12;  
   optional DummyDataParameter dummy_data_param = 26;  
   optional EltwiseParameter eltwise_param = 24;  
   optional ExpParameter exp_param = 41;  
   optional HDF5DataParameter hdf5_data_param = 13;  
   optional HDF5OutputParameter hdf5_output_param = 14;  
   optional HingeLossParameter hinge_loss_param = 29;  
   optional ImageDataParameter image_data_param = 15;  
   optional InfogainLossParameter infogain_loss_param = 16;  
   optional InnerProductParameter inner_product_param = 17;  
   optional LRNParameter lrn_param = 18;  
   optional MemoryDataParameter memory_data_param = 22;  
   optional MVNParameter mvn_param = 34;  
   optional PoolingParameter pooling_param = 19;  
   optional PowerParameter power_param = 21;  
   optional ReLUParameter relu_param = 30;  
   optional SigmoidParameter sigmoid_param = 38;  
   optional SoftmaxParameter softmax_param = 39;  
   optional SliceParameter slice_param = 31;  
   optional TanHParameter tanh_param = 37;  
   optional ThresholdParameter threshold_param = 25;  
   optional WindowDataParameter window_data_param = 20;  
   optional TransformationParameter transform_param = 36;  
   optional LossParameter loss_param = 42;  
   optional V0LayerParameter layer = 1;  
 }  

2、参数添加好之后就是heatmapdata层声明和实现部分：

在介绍实现之前需要给出我们的训练数据的样子，看完参数，看一下训练的数据的格式理解一下：

下面给出一个样例：

train/FILE.jpg 123,144,165,123,66,22 372.296,720,1,480,0.53333 0

下面对样例做出解释

参数之间是以空格分隔

第一个参数是图像的路径：train/FILE.jpg

第二个参数是关节坐标：123,144,165,123,66,22

第三个参数是crop和scale的参数，分别为x_left,x_right,y_left,y_right,scaling_fact：372.296,720,1,480,0.53333

注意：第三个参数的crop的坐标其实上针对的是mean图像的，在mean图像中进行crop，然后放大到与原始图像一样大小，然后原始图像减去经过crop且放大之后的mean图像。这样在对原始图像进行crop的时候就不用担心了

第四个参数是是否cluster,是否均匀地在训练中采样图像： 0

crop在配置文件中的部分：

[cpp]view plaincopy 
    
 transform_param {  
    mirror: true  
    crop_size: 227  
    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"  
  }  

上面是 caffeNet的数据层的定义,看得出用了镜像和crop_size,还定义了 mean_file利用crop_size这种方式可以剪裁中心关注点和边角特征,mirror可以产生镜像,弥补小数据集的不足.git-issues里面有人问道这个crop_size和 mean_file的问题,一开始的时候是不能定义了crop,又用mean_file的,后来改进了.并且,这个mean_file和crop_size没什么大关系.只要你这个mean_file是根据你的训练集制作出来的就可以.应该是先通过mean_file处理一遍数据集,再进行crop操作.用python接口去调用 python/caffe/ 下的 ilsvrc_2012_mean.npy这个文件,显示一下它的 shape,得到 3*256*256,说明,这个mean_file是根据原数据集制作的,和crop_size 的 227 不一致,但是不影响训练.这样,就可以先根据原数据集做出mean_file,再设计想要crop的尺寸,而不用担心尺寸不一致的问题了。

声明部分data_heatmap.hpp：

[cpp]view plaincopy 
    
 // Copyright 2014 Tomas Pfister  
   
 #ifndef CAFFE_HEATMAP_HPP_  
 #define CAFFE_HEATMAP_HPP_  
   
 #include "caffe/layer.hpp"  
 #include <vector>  
 #include <boost/timer/timer.hpp>  
 #include <opencv2/core/core.hpp>  
   
 #include "caffe/common.hpp"  
 #include "caffe/data_transformer.hpp"  
 #include "caffe/filler.hpp"  
 #include "caffe/internal_thread.hpp"  
 #include "caffe/proto/caffe.pb.h"  
   
 namespace caffe  
 {  
   
 // 继承自PrefetchingDataLayer  
 template<typename Dtype>  
 class DataHeatmapLayer: public BasePrefetchingDataLayer<Dtype>  
 {  
   
 public:  
   
     explicit DataHeatmapLayer(const LayerParameter& param)  
         : BasePrefetchingDataLayer<Dtype>(param) {}  
     virtual ~DataHeatmapLayer();  
     virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
                                 const vector<Blob<Dtype>*>& top);  
   
     virtual inline const char* type() const { return "DataHeatmap"; }  
   
     virtual inline int ExactNumBottomBlobs() const { return 0; }  
     virtual inline int ExactNumTopBlobs() const { return 2; }  
   
   
 protected:  
     // 虚函数，就是实际读取一批数据到Batch中  
     virtual void load_batch(Batch<Dtype>* batch);  
     // 以下都是自己定义的要使用的函数，都在load_batch中被调用了  
       
     // Filename of current image  
     inline void GetCurImg(string& img_name, std::vector<float>& img_class, std::vector<float>& crop_info, int& cur_class);  
   
     inline void AdvanceCurImg();  
   
     // Visualise point annotations  
     inline void VisualiseAnnotations(cv::Mat img_annotation_vis, int numChannels, std::vector<float>& cur_label, int width);  
   
     // Random number generator  
     inline float Uniform(const float min, const float max);  
   
     // Rotate image for augmentation  
     inline cv::Mat RotateImage(cv::Mat src, float rotation_angle);  
   
     // Global vars  
     shared_ptr<Caffe::RNG> rng_data_;  
     shared_ptr<Caffe::RNG> prefetch_rng_;  
     vector<std::pair<std::string, int> > lines_;  
     int lines_id_;      
     int datum_channels_;  
     int datum_height_;  
     int datum_width_;  
     int datum_size_;  
     int num_means_;  
     int cur_class_;  
     vector<int> labelinds_;  
     // 图像均值的vector容器，其中存放的是每个视频的均值  
     vector<cv::Mat> mean_img_;  
     // 是否需要减去每个视频的均值  
     bool sub_mean_;  // true if the mean should be subtracted  
     // 是否对在每个类进行均匀采样  
     bool sample_per_cluster_; // sample separately per cluster?  
     string root_img_dir_;  
     // 如果开启sample_per_cluster_则该vector中放的就是在该类别中随机采样的图像的索引  
     // 举个例子，如果类别1的图像的个数是10个，那么就随机生成[0,9]之间的一个数作为采样的图像的索引  
     // 从类别1中将该图像取出进行处理，就是sample_per_cluster_=true的含义  
     // 这个数组实际上就是从类别到该类别的随机的一个图像编号的映射  
     vector<float> cur_class_img_; // current class index  
       
     // 当前图像的索引，处理的时候用  
     int cur_img_; // current image index  
       
     // 图像索引（图像的编号从0开始）到类别的映射  
     vector<int> img_idx_map_; // current image indices for each class  
   
     // array of lists: one list of image names per class  
     // 这么一长串这么吓人  
     // 分解开来看，要访问的时候  
     // 最外层首先要提供索引，因为第一个类型是vector  
     // 第二层还是vector，所以还是需要索引才能访问  
     //  第三层是pair，访问第一个可以用first，第二个用second  
     // 如果第三层是first，则第四层直接就是string的值了  
     // 如果第三层是second，则第四层就是pair，那么可以用first或者用second  
     // 如果第四层是first，那么第五层就可以用索引访问  
     // 如果第四层是second，那么第五层就直接是int值  
     vector< vector< pair<string, pair<vector<float>, pair<vector<float>, int> > > > > img_list_;  
   
     // vector of (image, label) pairs  
     // 外层是vector，所以用索引  
     // 第二层是pair，所以用first或者second  
     // 第三层是pair，所以继续用first或者second  
     // 第四层是vector或者pair，如果第三层的是first，那么第四层就可以用索引访问  
     // 如果第三层是second，那么第四层就直接得到值了  
     vector< pair<string, pair<vector<float>, pair<vector<float>, int> > > > img_label_list_;      
 };  
   
 }  
   
 #endif /* CAFFE_HEATMAP_HPP_ */  

在介绍详细实现之前看看整体流程：

1）首先在SetUp该函数中读取，proto中的参数，从而获得一批数据的大小、heatmap的长和宽，对图像进行切割的大小，以及切割后的图像需要缩放到多大，还有就是是否需要对每个类别的图像进行采样、放置图像的根目录等信息。

此外还读取每个图像文件的路径、关节的坐标位置、crop的位置、是否进行采样。

如果在每个类上进行采样，还会生成一个数组，该数组对应的是图像的类别索引与图像的索引之间的映射。

此外还从文件中读取每个视频的mean，然后将所读取的mean放到vector容器中，便于在读取数据的时候从图像中取出mean。最后还会设置top的形状

2）在load_batch这个函数中就是真正地读取数据，并且对数据进行预处理，预处理主要是是否对图像进行分割，对平均值图像进行切割，并将切割的图像块放大到图像的大小，然后用图像减去该段视频切割并方法的平均值图像。减去均值大牛都说可以提升3个点，具体为什么我也不是很清楚。

实现.cpp文件部分：

[cpp]view plaincopy 
    
 // Copyright 2015 Tomas Pfisterimg  
   
 #include <fstream>  // NOLINT(readability/streams)  
 #include <iostream>  // NOLINT(readability/streams)  
 #include <string>  
 #include <utility>  
 #include <vector>  
   
 #include "caffe/layers/data_layer.hpp"  
 #include "caffe/layer.hpp"  
 #include "caffe/util/io.hpp"  
 #include "caffe/util/math_functions.hpp"  
 #include "caffe/util/rng.hpp"  
   
 #include <stdint.h>  
   
 #include <cmath>  
   
 #include <opencv2/core/core.hpp>  
 #include <opencv2/highgui/highgui.hpp>  
 #include <opencv2/highgui/highgui_c.h>  
 #include <opencv2/imgproc/imgproc.hpp>  
   
 #include "caffe/layers/data_heatmap.hpp"  
 #include "caffe/util/benchmark.hpp"  
 #include <unistd.h>  
   
   
 namespace caffe  
 {  
   
 template <typename Dtype>  
 DataHeatmapLayer<Dtype>::~DataHeatmapLayer<Dtype>() {  
     this->StopInternalThread();  
 }  
   
 // 读取参数文件中的一些数据什么的，然后初始化  
 template<typename Dtype>  
 void DataHeatmapLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  
         const vector<Blob<Dtype>*>& top) {  
     HeatmapDataParameter heatmap_data_param = this->layer_param_.heatmap_data_param();  
   
     // Shortcuts  
     // 类标索引字符串（也就是关节类型？）  
     const std::string labelindsStr = heatmap_data_param.labelinds();  
     // batchsize  
     const int batchsize = heatmap_data_param.batchsize();  
     // heatmap的宽度  
     const int label_width = heatmap_data_param.label_width();  
     // heatmap的高度  
     const int label_height = heatmap_data_param.label_height();  
     // crop的大小  
     const int size = heatmap_data_param.cropsize();  
     // crop之后再次进行resize之后的大小  
     const int outsize = heatmap_data_param.outsize();  
     //  label的batchsize  
     const int label_batchsize = batchsize;  
     // 每个cluster都要进行采样  
     sample_per_cluster_ = heatmap_data_param.sample_per_cluster();  
     // 存放图像文件的根路径  
     root_img_dir_ = heatmap_data_param.root_img_dir();  
   
   
     // initialise rng seed  
     const unsigned int rng_seed = caffe_rng_rand();  
     srand(rng_seed);  
   
     // get label inds to be used for training  
     // 载入类标索引  
     std::istringstream labelss(labelindsStr);  
     LOG(INFO) << "using joint inds:";  
     while (labelss)  
     {  
         std::string s;  
         if (!std::getline(labelss, s, ',')) break;  
         labelinds_.push_back(atof(s.c_str()));  
         LOG(INFO) << atof(s.c_str());  
     }  
   
     // load GT  
     // shuffle file  
     // 载入ground truth文件，即关节坐标文件  
     std::string gt_path = heatmap_data_param.source();  
     LOG(INFO) << "Loading annotation from " << gt_path;  
   
     std::ifstream infile(gt_path.c_str());  
     string img_name, labels, cropInfos, clusterClassStr;  
     if (!sample_per_cluster_)// 是否根据你指定的类别随机取图像  
     {  
         // sequential sampling  
         // 文件名，关节位置坐标，crop的位置，是否均匀地在clusters上采样  
         while (infile >> img_name >> labels >> cropInfos >> clusterClassStr)  
         {  
             // read comma-separated list of regression labels  
             // 读取关节位置坐标  
             std::vector <float> label;  
             std::istringstream ss(labels);  
             int labelCounter = 1;  
             while (ss)  
             {  
                 // 读取一个数字  
                 std::string s;  
                 if (!std::getline(ss, s, ',')) break;  
                 // 是否是类标索引中的值  
                 // 如果labelinds为空或者为不为空在其中找到  
                 if (labelinds_.empty() || std::find(labelinds_.begin(), labelinds_.end(), labelCounter) != labelinds_.end())  
                 {  
                     label.push_back(atof(s.c_str()));  
                 }  
                 labelCounter++;// 个数  
             }  
   
             // read cropping info  
             // 读取crop的信息  
             std::vector <float> cropInfo;  
             std::istringstream ss2(cropInfos);  
             while (ss2)  
             {  
                 std::string s;  
                 if (!std::getline(ss2, s, ',')) break;  
                 cropInfo.push_back(atof(s.c_str()));  
             }  
   
             int clusterClass = atoi(clusterClassStr.c_str());  
             // 图像路径，关节坐标，crop信息、类别  
             img_label_list_.push_back(std::make_pair(img_name, std::make_pair(label, std::make_pair(cropInfo, clusterClass))));  
         }  
   
         // initialise image counter to 0  
         cur_img_ = 0;  
     }  
     else  
     {  
         // uniform sampling w.r.t. classes  
         // 根据类别均匀采样  
         // 也就是说图像有若干个类别，然后每个类别下有若干个图像  
         // 随机取其中一个图像  
         while (infile >> img_name >> labels >> cropInfos >> clusterClassStr)  
         {  
             // 获得你指定的类别  
             // 如果你制定为0  
             int clusterClass = atoi(clusterClassStr.c_str());  
         // 那么  
             if (clusterClass + 1 > img_list_.size())  
             {  
                 // expand the array  
                 img_list_.resize(clusterClass + 1);  
             }  
   
             // read comma-separated list of regression labels  
             // 读取关节的坐标位置到label这个vector  
             std::vector <float> label;  
             std::istringstream ss(labels);  
             int labelCounter = 1;  
             while (ss)  
             {  
                 std::string s;  
                 if (!std::getline(ss, s, ',')) break;  
                 if (labelinds_.empty() || std::find(labelinds_.begin(), labelinds_.end(), labelCounter) != labelinds_.end())  
                 {  
                     label.push_back(atof(s.c_str()));  
                 }  
                 labelCounter++;  
             }  
   
             // read cropping info  
             // 读取crop信息到cropinfo这个vector  
             std::vector <float> cropInfo;  
             std::istringstream ss2(cropInfos);  
             while (ss2)  
             {  
                 std::string s;  
                 if (!std::getline(ss2, s, ',')) break;  
                 cropInfo.push_back(atof(s.c_str()));  
             }  
         // 每个clusterClass下都是一个vector，用于装各种图像  
             img_list_[clusterClass].push_back(std::make_pair(img_name, std::make_pair(label, std::make_pair(cropInfo, clusterClass))));  
         }// while结尾  
         
       // 图像的类别个数  
         const int num_classes = img_list_.size();  
   
         // init image sampling  
         cur_class_ = 0;  
         // cur_class_img_中存放的是某个类别中随机取到的图像的索引值  
         cur_class_img_.resize(num_classes);  
   
         // init image indices for each class  
         for (int idx_class = 0; idx_class < num_classes; idx_class++)  
         {  
             // 是否需要根据类别随机取某个类别中的一个图像  
             if (sample_per_cluster_)  
             {  
                 // img_list_[idx_class].size()是该idx_class这个类中图像的个数  
                 // 产生从0-该类中图像个数之间的一个随机数  
                 cur_class_img_[idx_class] = rand() % img_list_[idx_class].size();  
                 // 图像类别个数  
                 LOG(INFO) << idx_class << " size: " << img_list_[idx_class].size();  
             }  
             else  
             {  
                 cur_class_img_[idx_class] = 0;  
             }  
         }  
     }  
   
     if (!heatmap_data_param.has_meanfile())// 是否有meanfile  
     {  
         // if no mean, assume input images are RGB (3 channels)  
         this->datum_channels_ = 3;  
         sub_mean_ = false;  
     } else {  
         // Implementation of per-video mean removal  
      // 下面整个一段代码是将每个视频mean文件读取到Mat结构  
        
        
         sub_mean_ = true;  
         // 从参数文件中获取mean文件的路径  
         string mean_path = heatmap_data_param.meanfile();  
   
         LOG(INFO) << "Loading mean file from " << mean_path;  
         BlobProto blob_proto, blob_proto2;  
         Blob<Dtype> data_mean;  
         // 读取到blob,然后blob数据转换到data_mean  
         ReadProtoFromBinaryFile(mean_path.c_str(), &blob_proto);  
         data_mean.FromProto(blob_proto);  
         LOG(INFO) << "mean file loaded";  
   
         // read config  
         this->datum_channels_ = data_mean.channels();  
         // mean值的数目,有多少个视频，就有多少个mean啊  
         num_means_ = data_mean.num();  
         LOG(INFO) << "num_means: " << num_means_;  
   
         // copy the per-video mean images to an array of OpenCV structures  
         const Dtype* mean_buf = data_mean.cpu_data();  
   
         // extract means from beginning of proto file  
         // mean文件中的图像的高度  
         const int mean_height = data_mean.height();  
         // mean文件中图像的宽度  
         const int mean_width = data_mean.width();  
         // 高度数组  
         int mean_heights[num_means_];  
         // 宽度数组  
         int mean_widths[num_means_];  
   
         // offset in memory to mean images  
         //  在mean图像中的偏移量  
         const int meanOffset = 2 * (num_means_);  
         for (int n = 0; n < num_means_; n++)  
         {  
             mean_heights[n] = mean_buf[2 * n];  
             mean_widths[n] = mean_buf[2 * n + 1];  
         }  
   
         // save means as OpenCV-compatible files  
         // 将从protobin文件读取的blob存放到Mat中  
         // 获得mean_image容器，这其中包含了若干个视频的mean值  
         // 下面是分配内存  
         for (int n = 0; n < num_means_; n++)  
         {  
             cv::Mat mean_img_tmp_;  
             mean_img_tmp_.create(mean_heights[n], mean_widths[n], CV_32FC3);  
             mean_img_.push_back(mean_img_tmp_);  
             LOG(INFO) << "per-video mean file array created: " << n << ": " << mean_heights[n] << "x" << mean_widths[n] << " (" << size << ")";  
         }  
   
         LOG(INFO) << "mean: " << mean_height << "x" << mean_width << " (" << size << ")";  
     // 下面是实际的赋值  
         for (int n = 0; n < num_means_; n++)  
         {  
             for (int i = 0; i < mean_heights[n]; i++)  
             {  
                 for (int j = 0; j < mean_widths[n]; j++)  
                 {  
                     for (int c = 0; c < this->datum_channels_; c++)  
                     {  
                         mean_img_[n].at<cv::Vec3f>(i, j)[c] = mean_buf[meanOffset + ((n * this->datum_channels_ + c) * mean_height + i) * mean_width + j]; //[c * mean_height * mean_width + i * mean_width + j];  
                     }  
                 }  
             }  
         }  
   
         LOG(INFO) << "mean file converted to OpenCV structures";  
     }  
   
   
     // init data  
     // 改变数据形状  
     this->transformed_data_.Reshape(batchsize, this->datum_channels_, outsize, outsize);  
     top[0]->Reshape(batchsize, this->datum_channels_, outsize, outsize);  
     for (int i = 0; i < this->PREFETCH_COUNT; ++i)  
         this->prefetch_[i].data_.Reshape(batchsize, this->datum_channels_, outsize, outsize);  
     this->datum_size_ = this->datum_channels_ * outsize * outsize;  
   
     // init label  
     int label_num_channels;  
     if (!sample_per_cluster_)// 如果不按照类别进行均匀采样  
         label_num_channels = img_label_list_[0].second.first.size();// 获取关节坐标的数字的个数（注意是数字的个数，并不是坐标的个数，要除以2才能是坐标的个数哈）  
     else// 如果按照类别均匀采样  
         label_num_channels = img_list_[0][0].second.first.size();// 第0类的第0个图像的关节数字的个数  
     label_num_channels /= 2;// 获得关节个数  
       
     // 将输出设置为对应的大小  
     // top[0]是batchsize个图像数据  
     // top[1]是batchsize个heatmap（一个heatmap有关节个数个channel）  
     // label的batchsize，关节个数作为channel，关节的heatmap的高、关节heatmap的宽度  
     top[1]->Reshape(label_batchsize, label_num_channels, label_height, label_width);  
     for (int i = 0; i < this->PREFETCH_COUNT; ++i)  
         this->prefetch_[i].label_.Reshape(label_batchsize, label_num_channels, label_height, label_width);  
   
     LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width();  
     LOG(INFO) << "output label size: " << top[1]->num() << "," << top[1]->channels() << "," << top[1]->height() << "," << top[1]->width();  
     LOG(INFO) << "number of label channels: " << label_num_channels;  
     LOG(INFO) << "datum channels: " << this->datum_channels_;  
   
 }  
 // 根据初始化之后的信息读取实际的文件数据，以及关节的位置，并将关节位置转换为类标  
 template<typename Dtype>  
 void DataHeatmapLayer<Dtype>::load_batch(Batch<Dtype>* batch) {  
   
     CPUTimer batch_timer;  
     batch_timer.Start();  
     CHECK(batch->data_.count());  
     HeatmapDataParameter heatmap_data_param = this->layer_param_.heatmap_data_param();  
   
     // Pointers to blobs' float data  
     // 指向数据和类标的指针  
     Dtype* top_data = batch->data_.mutable_cpu_data();  
     Dtype* top_label = batch->label_.mutable_cpu_data();  
   
     cv::Mat img, img_res, img_annotation_vis, img_mean_vis, img_vis, img_res_vis, mean_img_this, seg, segTmp;  
   
     // Shortcuts to params  
     // 是否显示读取的图像啥的，用户调试  
     const bool visualise = this->layer_param_.visualise();  
     // 是否对图像进行缩放  
     const Dtype scale = heatmap_data_param.scale();  
     // 每次读多少个图像  
     const int batchsize = heatmap_data_param.batchsize();  
     // heatmap的高度  
     const int label_height = heatmap_data_param.label_height();  
     // heatmap的宽度  
     const int label_width = heatmap_data_param.label_width();  
     // 需要旋转多少度  
     const float angle_max = heatmap_data_param.angle_max();  
     // 是否不要翻转第一个图  
     const bool dont_flip_first = heatmap_data_param.dont_flip_first();  
     // 是否翻转关节的坐标  
     const bool flip_joint_labels = heatmap_data_param.flip_joint_labels();  
     // 关节的坐标数值需要乘以这个multfact  
     const int multfact = heatmap_data_param.multfact();  
     // 图像是否需要分割  
     const bool segmentation = heatmap_data_param.segmentation();  
     // 切割的图像的块的带下  
     const int size = heatmap_data_param.cropsize();  
     // 切割之后的图像块需要缩放到outsize大小  
     const int outsize = heatmap_data_param.outsize();  
     const int num_aug = 1;  
     // 缩放因子  
     const float resizeFact = (float)outsize / (float)size;  
     // 是不是需要随机切图像块  
     const bool random_crop = heatmap_data_param.random_crop();  
   
     // Shortcuts to global vars  
     const bool sub_mean = this->sub_mean_;  
     const int channels = this->datum_channels_;  
   
     // What coordinates should we flip when mirroring images?  
     // For pose estimation with joints assumes i=0,1 are for head, and i=2,3 left wrist, i=4,5 right wrist etc  
     //     in which case dont_flip_first should be set to true.  
     int flip_start_ind;  
     if (dont_flip_first) flip_start_ind = 2;  
     else flip_start_ind = 0;  
   
     if (visualise)  
     {  
         cv::namedWindow("original image", cv::WINDOW_AUTOSIZE);  
         cv::namedWindow("cropped image", cv::WINDOW_AUTOSIZE);  
         cv::namedWindow("interim resize image", cv::WINDOW_AUTOSIZE);  
         cv::namedWindow("resulting image", cv::WINDOW_AUTOSIZE);  
     }  
   
     // collect "batchsize" images  
     std::vector<float> cur_label, cur_cropinfo;  
     std::string img_name;  
     int cur_class;  
   
     // loop over non-augmented images  
     // 获取batchsize个图像，然后进行预处理  
     for (int idx_img = 0; idx_img < batchsize; idx_img++)  
     {  
         // get image name and class  
         // 获取文件名、label、cropinfo、类标  
         this->GetCurImg(img_name, cur_label, cur_cropinfo, cur_class);  
   
         // get number of channels for image label  
         // 获取关节的数值的个数（并不是关节个数哈，关节个数乘以2就是该数）  
         int label_num_channels = cur_label.size();  
        
      // 将根路径和文件名称拼接并读取数据到img  
         std::string img_path = this->root_img_dir_ + img_name;  
         DLOG(INFO) << "img: " << img_path;  
         img = cv::imread(img_path, CV_LOAD_IMAGE_COLOR);  
   
         // show image  
         // 显示读取的图像  
         if (visualise)  
         {  
             img_annotation_vis = img.clone();  
             this->VisualiseAnnotations(img_annotation_vis, label_num_channels, cur_label, multfact);  
             cv::imshow("original image", img_annotation_vis);  
         }  
   
         // use if seg exists  
         // 是否对图像分割  
         // 分割的模板存放在segs目录  
         // 读取分割模板到seg  
         if (segmentation)  
         {  
             std::string seg_path = this->root_img_dir_ + "segs/" + img_name;  
             std::ifstream ifile(seg_path.c_str());  
   
             // Skip this file if segmentation doesn't exist  
             if (!ifile.good())  
             {  
                 LOG(INFO) << "file " << seg_path << " does not exist!";  
                 idx_img--;  
                 this->AdvanceCurImg();  
                 continue;  
             }  
             ifile.close();  
             seg = cv::imread(seg_path, CV_LOAD_IMAGE_GRAYSCALE);  
         }  
   
         int width = img.cols;  
         int height = img.rows;  
         // size是crop的大小  
         // 如果crop的大小太大x_border会变成负数，下面会进行pad  
         int x_border = width - size;  
         int y_border = height - size;  
        
        
      // 将读取的图像转换为RGB  
         // convert from BGR to RGB  
         cv::cvtColor(img, img, CV_BGR2RGB);  
   
         // to float  
         // 转换数据类型到float  
         img.convertTo(img, CV_32FC3);  
   
         if (segmentation)  
         {  
             segTmp = cv::Mat::zeros(img.rows, img.cols, CV_32FC3);  
             int threshold = 40;// 阈值  
             // 获取分割模板  
             seg = (seg > threshold);  
             // 对图像进行分割  
             segTmp.copyTo(img, seg);  
         }  
   
         if (visualise)  
             img_vis = img.clone();  
   
         // subtract per-video mean if used  
         // 减去每个视频的均值  
         int meanInd = 0;  
         if (sub_mean)  
         {  
             // 由此可以看到每个视频的命名规则，就是目录的名字嘛，而且还是数字  
             // 比如0,1,2,3,4  
             // 假设路径是images/1/xxx.jpg  
             // 那么获取的平均值索引就是1，然后再到mean_img_中得到对应的均值图像  
             std::string delimiter = "/";  
             std::string img_name_subdirImg = img_name.substr(img_name.find(delimiter) + 1, img_name.length());  
             std::string meanIndStr = img_name_subdirImg.substr(0, img_name_subdirImg.find(delimiter));  
             meanInd = atoi(meanIndStr.c_str()) - 1;  
   
             // subtract the cropped mean  
             mean_img_this = this->mean_img_[meanInd].clone();  
   
             DLOG(INFO) << "Image size: " << width << "x" << height;  
             DLOG(INFO) << "Crop info: " << cur_cropinfo[0] << " " <<  cur_cropinfo[1] << " " << cur_cropinfo[2] << " " << cur_cropinfo[3] << " " << cur_cropinfo[4];  
             DLOG(INFO) << "Crop info after: " << cur_cropinfo[0] << " " <<  cur_cropinfo[1] << " " << cur_cropinfo[2] << " " << cur_cropinfo[3] << " " << cur_cropinfo[4];  
             DLOG(INFO) << "Mean image size: " << mean_img_this.cols << "x" << mean_img_this.rows;  
             DLOG(INFO) << "Cropping: " << cur_cropinfo[0] - 1 << " " << cur_cropinfo[2] - 1 << " " << width << " " << height;  
   
             // crop and resize mean image  
             // 对mean文件进行切割并且调整其大小为图像大小  
             // cur_cropinfo中的数据分别为x_left,x_right,y_left,y_right  
             // 而Rect则是x,y,w,h，所以需要转换  
             cv::Rect crop(cur_cropinfo[0] - 1, cur_cropinfo[2] - 1, cur_cropinfo[1] - cur_cropinfo[0], cur_cropinfo[3] - cur_cropinfo[2]);  
             mean_img_this = mean_img_this(crop);// 这样就crop了  
             cv::resize(mean_img_this, mean_img_this, img.size());  
   
             DLOG(INFO) << "Cropped mean image.";  
           
         // 原图像减去crop之后并放大成与原图像一样大小的平均值图像  
         // 这是什么原理？？？？？  
             img -= mean_img_this;  
   
             DLOG(INFO) << "Subtracted mean image.";  
   
             if (visualise)  
             {  
                 img_vis -= mean_img_this;  
                 img_mean_vis = mean_img_this.clone() / 255;  
                 cv::cvtColor(img_mean_vis, img_mean_vis, CV_RGB2BGR);  
                 cv::imshow("mean image", img_mean_vis);  
             }  
         }  
   
         // pad images that aren't wide enough  
         // 如果crop大小大于图像大小则padding，图像得右侧padding  
         if (x_border < 0)  
         {  
             DLOG(INFO) << "padding " << img_path << " -- not wide enough.";  
             // 函数原型如下  
           // void copyMakeBorder( const Mat& src, Mat& dst,  
           // int top, int bottom, int left, int right,  
           // int borderType, const Scalar& value=Scalar() );  
             cv::copyMakeBorder(img, img, 0, 0, 0, -x_border, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));  
             width = img.cols;  
             x_border = width - size;  
   
             // add border offset to joints  
             // 因为pad过图像的右侧了所以需要调整关节的x坐标  
             for (int i = 0; i < label_num_channels; i += 2)// 注意这里是i+=2哦！  
                 cur_label[i] = cur_label[i] + x_border;  
   
             DLOG(INFO) << "new width: " << width << "   x_border: " << x_border;  
             if (visualise)// 显示经过padding的图像  
             {  
                 img_vis = img.clone();  
                 cv::copyMakeBorder(img_vis, img_vis, 0, 0, 0, -x_border, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));  
             }  
         }  
   
         DLOG(INFO) << "Entering jitter loop.";  
   
         // loop over the jittered versions  
         // 将关节位置转换为heatmap  
         for (int idx_aug = 0; idx_aug < num_aug; idx_aug++)  
         {  
             // augmented image index in the resulting batch  
             const int idx_img_aug = idx_img * num_aug + idx_aug;  
               
             // 关节坐标，首先将从文件读取的关节坐标赋值给它  
             // 接下来因为要对图像进行crop，crop之后的图像还要resize  
             // 所以对应的关节坐标也要进行crop和缩放，经过这个处理的  
             // 关节位置就存放在了 cur_label_aug  
             std::vector<float> cur_label_aug = cur_label;  
           
         // 是否随机crop  
             if (random_crop)  
             {  
                 // random sampling  
                 DLOG(INFO) << "random crop sampling";  
   
                 // horizontal flip  
                 // 随机旋转是否需要水平翻转  
                 if (rand() % 2)  
                 {  
                     // flip，0表示水平  
                     // 水平翻转  
                     cv::flip(img, img, 1);  
   
                     if (visualise)  
                         cv::flip(img_vis, img_vis, 1);  
   
                     // "flip" annotation coordinates  
                     // 将图像的坐标也翻转了  
                     for (int i = 0; i < label_num_channels; i += 2)  
                         // width 是原始图像的宽度，原始图像的宽度除以multfact就是关节的图像宽度，关节图像的宽度减去关节的x坐标就是翻转过来的x坐标  
                         cur_label_aug[i] = (float)width / (float)multfact - cur_label_aug[i];  
   
                     // "flip" annotation joint numbers  
                     // assumes i=0,1 are for head, and i=2,3 left wrist, i=4,5 right wrist etc  
                     // where coordinates are (x,y)  
                     // 将索引位置也翻转了。。。  
                     if (flip_joint_labels)  
                     {  
                         float tmp_x, tmp_y;  
                         for (int i = flip_start_ind; i < label_num_channels; i += 4)  
                         {  
                             CHECK_LT(i + 3, label_num_channels);  
                             tmp_x = cur_label_aug[i];  
                             tmp_y = cur_label_aug[i + 1];  
                             cur_label_aug[i] = cur_label_aug[i + 2];  
                             cur_label_aug[i + 1] = cur_label_aug[i + 3];  
                             cur_label_aug[i + 2] = tmp_x;  
                             cur_label_aug[i + 3] = tmp_y;  
                         }  
                     }  
                 }  
   
                 // left-top coordinates of the crop [0;x_border] x [0;y_border]  
                 // 生成左上的坐标，用于切割图像  
                 int x0 = 0, y0 = 0;  
                 x0 = rand() % (x_border + 1);  
                 y0 = rand() % (y_border + 1);  
   
                 // do crop  
                 cv::Rect crop(x0, y0, size, size);  
   
                 // NOTE: no full copy performed, so the original image buffer is affected by the transformations below  
                 // img_crop与img公用一个内存，所以在img_crop中所作的更改对img也会有  
                 cv::Mat img_crop(img, crop);  
   
                 // "crop" annotations  
                 // 万一关节的位置在crop的大小之外怎么办？？？疑问  
                 for (int i = 0; i < label_num_channels; i += 2)  
                 {  
                     cur_label_aug[i] -= (float)x0 / (float) multfact;  
                     cur_label_aug[i + 1] -= (float)y0 / (float) multfact;  
                 }  
   
                 // show image  
                 if (visualise)  
                 {  
                     DLOG(INFO) << "cropped image";  
                     cv::Mat img_vis_crop(img_vis, crop);  
                     cv::Mat img_res_vis = img_vis_crop / 255;  
                     cv::cvtColor(img_res_vis, img_res_vis, CV_RGB2BGR);  
                     this->VisualiseAnnotations(img_res_vis, label_num_channels, cur_label_aug, multfact);  
                     cv::imshow("cropped image", img_res_vis);  
                 }  
   
                 // rotations  
                 // 旋转图像到一个均匀分布的角度  
                 float angle = Uniform(-angle_max, angle_max);  
                 cv::Mat M = this->RotateImage(img_crop, angle);  
   
                 // also flip & rotate labels  
                 // 遍历所有关节坐标  
                 for (int i = 0; i < label_num_channels; i += 2)  
                 {  
                     // convert to image space  
                     // 将关节坐标转换到图像中的坐标  
                     float x = cur_label_aug[i] * (float) multfact;  
                     float y = cur_label_aug[i + 1] * (float) multfact;  
   
                     // rotate  
                     // ？为啥  
                     cur_label_aug[i] = M.at<double>(0, 0) * x + M.at<double>(0, 1) * y + M.at<double>(0, 2);  
                     cur_label_aug[i + 1] = M.at<double>(1, 0) * x + M.at<double>(1, 1) * y + M.at<double>(1, 2);  
   
                     // convert back to joint space  
                     // 转换回关节空间  
                     cur_label_aug[i] /= (float) multfact;  
                     cur_label_aug[i + 1] /= (float) multfact;  
                 }  
   
                 img_res = img_crop;  
             } else {// 中心crop(就是图像的中心crop啊)  
                 // determinsitic sampling  
                 DLOG(INFO) << "deterministic crop sampling (centre)";  
   
                 // centre crop  
                 const int y0 = y_border / 2;  
                 const int x0 = x_border / 2;  
   
                 DLOG(INFO) << "cropping image from " << x0 << "x" << y0;  
   
                 // do crop  
                 cv::Rect crop(x0, y0, size, size);  
                 cv::Mat img_crop(img, crop);  
   
                 DLOG(INFO) << "cropping annotations.";  
   
                 // "crop" annotations  
                 // 长见识了，关节的annotation也是需要crop的  
                 for (int i = 0; i < label_num_channels; i += 2)  
                 {  
                     // 除以multfact转换到关节坐标，然后再减去  
                     // 不过我有疑问，万一crop之后的图像没有关节咋办  
                     // 这样真的好吗  
                     cur_label_aug[i] -= (float)x0 / (float) multfact;  
                     cur_label_aug[i + 1] -= (float)y0 / (float) multfact;  
                 }  
   
                 if (visualise)  
                 {  
                     cv::Mat img_vis_crop(img_vis, crop);  
                     cv::Mat img_res_vis = img_vis_crop.clone() / 255;  
                     cv::cvtColor(img_res_vis, img_res_vis, CV_RGB2BGR);  
                     this->VisualiseAnnotations(img_res_vis, label_num_channels, cur_label_aug, multfact);  
                     cv::imshow("cropped image", img_res_vis);  
                 }  
                 img_res = img_crop;  
             }// end of else  
   
             // show image  
             if (visualise)  
             {  
                 cv::Mat img_res_vis = img_res / 255;  
                 cv::cvtColor(img_res_vis, img_res_vis, CV_RGB2BGR);  
                 this->VisualiseAnnotations(img_res_vis, label_num_channels, cur_label_aug, multfact);  
                 cv::imshow("interim resize image", img_res_vis);  
             }  
   
             DLOG(INFO) << "Resizing output image.";  
   
             // resize to output image size  
             // 将crop之后的图像弄到给定的大小  
             cv::Size s(outsize, outsize);  
             cv::resize(img_res, img_res, s);  
   
             // "resize" annotations  
             // resize 标注的关节  
             // 将图像进行缩放了，那么关节的坐标也要缩放  
             for (int i = 0; i < label_num_channels; i++)  
                 cur_label_aug[i] *= resizeFact;  
   
             // show image  
             if (visualise)  
             {  
                 cv::Mat img_res_vis = img_res / 255;  
                 cv::cvtColor(img_res_vis, img_res_vis, CV_RGB2BGR);  
                 this->VisualiseAnnotations(img_res_vis, label_num_channels, cur_label_aug, multfact);  
                 cv::imshow("resulting image", img_res_vis);  
             }  
   
             // show image  
             if (visualise && sub_mean)  
             {  
                 cv::Mat img_res_meansub_vis = img_res / 255;  
                 cv::cvtColor(img_res_meansub_vis, img_res_meansub_vis, CV_RGB2BGR);  
                 cv::imshow("mean-removed image", img_res_meansub_vis);  
             }  
   
             // multiply by scale  
             // 去均值、crop、缩放之后的像素值乘以该scale得到最终的图像的  
             if (scale != 1.0)  
                 img_res *= scale;  
   
             // resulting image dims  
             const int channel_size = outsize * outsize;  
             const int img_size = channel_size * channels;  
   
             // store image data  
             // 将处理好的图像存放到top_data  
             DLOG(INFO) << "storing image";  
             for (int c = 0; c < channels; c++)  
             {  
                 for (int i = 0; i < outsize; i++)  
                 {  
                     for (int j = 0; j < outsize; j++)  
                     {  
                         top_data[idx_img_aug * img_size + c * channel_size + i * outsize + j] = img_res.at<cv::Vec3f>(i, j)[c];  
                     }  
                 }  
             }  
   
             // store label as gaussian  
             // 将关节转换为高斯图像  
             DLOG(INFO) << "storing labels";  
             const int label_channel_size = label_height * label_width;  
             const int label_img_size = label_channel_size * label_num_channels / 2;  
             cv::Mat dataMatrix = cv::Mat::zeros(label_height, label_width, CV_32FC1);  
             float label_resize_fact = (float) label_height / (float) outsize;  
             float sigma = 1.5;  
   
             for (int idx_ch = 0; idx_ch < label_num_channels / 2; idx_ch++)  
             {  
                 // 将经过缩放的关节转换到图像空间的坐标(也就是乘以multfact)，再将缩小之后的图像空间坐标转换到缩小之前的图像空间坐标(也就是乘以label_resize_fact)  
                 float x = label_resize_fact * cur_label_aug[2 * idx_ch] * multfact;  
                 float y = label_resize_fact * cur_label_aug[2 * idx_ch + 1] * multfact;  
                 for (int i = 0; i < label_height; i++)  
                 {  
                     for (int j = 0; j < label_width; j++)  
                     {  
                         // 计算索引  
                         int label_idx = idx_img_aug * label_img_size + idx_ch * label_channel_size + i * label_height + j;  
                         float gaussian = ( 1 / ( sigma * sqrt(2 * M_PI) ) ) * exp( -0.5 * ( pow(i - y, 2.0) + pow(j - x, 2.0) ) * pow(1 / sigma, 2.0) );  
                         gaussian = 4 * gaussian;  
                           
                         // 存入到top_label  
                         top_label[label_idx] = gaussian;  
   
                         if (idx_ch == 0)  
                             dataMatrix.at<float>((int)j, (int)i) = gaussian;  
                     }  
                 }  
             }  
   
         } // jittered versions loop  
   
         DLOG(INFO) << "next image";  
   
         // move to the next image  
         // Advance是进行  
         // Cur是表示当前  
         // 那么就是移动到下一个图像  
         this->AdvanceCurImg();  
   
         if (visualise)  
             cv::waitKey(0);  
   
   
     } // original image loop  
   
     batch_timer.Stop();  
     DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";  
 }  
   
   
 // 获取当前图像的路径、类标、crop信息、类别  
 template<typename Dtype>  
 void DataHeatmapLayer<Dtype>::GetCurImg(string& img_name, std::vector<float>& img_label, std::vector<float>& crop_info, int& img_class)  
 {  
   
     if (!sample_per_cluster_)  
     {  
         img_name = img_label_list_[cur_img_].first;  
         img_label = img_label_list_[cur_img_].second.first;  
         crop_info = img_label_list_[cur_img_].second.second.first;  
         img_class = img_label_list_[cur_img_].second.second.second;  
     }  
     else  
     {  
         img_class = cur_class_;  
         // 看见没，这里用到了cur_class_img_，这个在SetUp中生成的随机数作为该类别的图像索引，该随机数的范围在[0,该类别图像的个数-1]之间。  
         img_name = img_list_[img_class][cur_class_img_[img_class]].first;  
         img_label = img_list_[img_class][cur_class_img_[img_class]].second.first;  
         crop_info = img_list_[img_class][cur_class_img_[img_class]].second.second.first;  
     }  
 }  
   
 // 实际上就是移动索引  
 template<typename Dtype>  
 void DataHeatmapLayer<Dtype>::AdvanceCurImg()  
 {  
     if (!sample_per_cluster_)  
     {  
         if (cur_img_ < img_label_list_.size() - 1)  
             cur_img_++;  
         else  
             cur_img_ = 0;  
     }  
     else  
     {  
         const int num_classes = img_list_.size();  
   
         if (cur_class_img_[cur_class_] < img_list_[cur_class_].size() - 1)  
             cur_class_img_[cur_class_]++;  
         else  
             cur_class_img_[cur_class_] = 0;  
   
         // move to the next class  
         if (cur_class_ < num_classes - 1)  
             cur_class_++;  
         else  
             cur_class_ = 0;  
     }  
   
 }  
   
 // 可视化关节点  
 template<typename Dtype>  
 void DataHeatmapLayer<Dtype>::VisualiseAnnotations(cv::Mat img_annotation_vis, int label_num_channels, std::vector<float>& img_class, int multfact)  
 {  
     // colors  
     const static cv::Scalar colors[] = {  
         CV_RGB(0, 0, 255),  
         CV_RGB(0, 128, 255),  
         CV_RGB(0, 255, 255),  
         CV_RGB(0, 255, 0),  
         CV_RGB(255, 128, 0),  
         CV_RGB(255, 255, 0),  
         CV_RGB(255, 0, 0),  
         CV_RGB(255, 0, 255)  
     };  
   
     int numCoordinates = int(label_num_channels / 2);  
   
     // points  
     // 将关节点放到centers数组中  
     cv::Point centers[numCoordinates];  
     for (int i = 0; i < label_num_channels; i += 2)  
     {  
         int coordInd = int(i / 2);  
         centers[coordInd] = cv::Point(img_class[i] * multfact, img_class[i + 1] * multfact);  
         // 给关节画圈圈  
         cv::circle(img_annotation_vis, centers[coordInd], 1, colors[coordInd], 3);  
     }  
   
     // connecting lines  
     // 1,3,5是一条膀子  
     // 2,4,6是一条膀子  
     cv::line(img_annotation_vis, centers[1], centers[3], CV_RGB(0, 255, 0), 1, CV_AA);  
     cv::line(img_annotation_vis, centers[2], centers[4], CV_RGB(255, 255, 0), 1, CV_AA);  
     cv::line(img_annotation_vis, centers[3], centers[5], CV_RGB(0, 0, 255), 1, CV_AA);  
     cv::line(img_annotation_vis, centers[4], centers[6], CV_RGB(0, 255, 255), 1, CV_AA);  
 }  
   
 // [min,max]的均匀分布  
 template <typename Dtype>  
 float DataHeatmapLayer<Dtype>::Uniform(const float min, const float max) {  
     float random = ((float) rand()) / (float) RAND_MAX;  
     float diff = max - min;  
     float r = random * diff;  
     return min + r;  
 }  
   
 // 旋转图像  
 template <typename Dtype>  
 cv::Mat DataHeatmapLayer<Dtype>::RotateImage(cv::Mat src, float rotation_angle)  
 {  
     cv::Mat rot_mat(2, 3, CV_32FC1);  
     cv::Point center = cv::Point(src.cols / 2, src.rows / 2);  
     double scale = 1;  
   
     // Get the rotation matrix with the specifications above  
     rot_mat = cv::getRotationMatrix2D(center, rotation_angle, scale);  
   
     // Rotate the warped image  
     cv::warpAffine(src, src, rot_mat, src.size());  
   
     return rot_mat;  
 }  
   
 INSTANTIATE_CLASS(DataHeatmapLayer);  
 REGISTER_LAYER_CLASS(DataHeatmap);  
   
 } // namespace caffe  

3、最后看看在配置文件中怎么使用该层？

[cpp]view plaincopy 
    
 layer {  
   name: "data"  
   type: "DataHeatmap" // 层的类型是DataHeatmap  
   top: "data"  
   top: "label"  
   visualise: false    // 是否可视化  
   include: { phase: TRAIN }     
   heatmap_data_param {  
     source: "/data/tp/flic/train_shuffle.txt"  
     root_img_dir: "/mnt/ramdisk/tp/flic/"     
     batchsize: 14  
     cropsize: 248  
     outsize: 256  
     sample_per_cluster: false  
     random_crop: true  
     label_width: 64  
     label_height: 64  
     segmentation: false  
     flip_joint_labels: true  
     dont_flip_first: true  
     angle_max: 40     
     multfact: 1  # set to 282 if using preprocessed data from website  
   }  
 }  

先浏览一下作者原始的配置文件的代码：

[cpp]view plaincopy 
    
 name: "HeatmapFusionNet"  
 layer {  
 name: "data"  
 type: "DataHeatmap"  
 top: "data"  
 top: "label"  
 visualise: false  
 include: { phase: TRAIN }  
 heatmap_data_param {  
 source: "/data/tp/flic/train_shuffle.txt"  
 root_img_dir: "/mnt/ramdisk/tp/flic/"  
 batchsize: 14  
 cropsize: 248  
 outsize: 256  
 sample_per_cluster: false  
 random_crop: true  
 label_width: 64  
 label_height: 64  
 segmentation: false  
 flip_joint_labels: true  
 dont_flip_first: true  
 angle_max: 40  
 multfact: 1 # set to 282 if using preprocessed data from website  
 }  
 }  
 layer {  
 name: "data"  
 type: "DataHeatmap"  
 top: "data"  
 top: "label"  
 visualise: false  
 include: { phase: TEST }  
 heatmap_data_param {  
 source: "/data/tp/flic/test_shuffle.txt"  
 root_img_dir: "/mnt/ramdisk/tp/flic/"  
 batchsize: 1  
 cropsize: 248  
 outsize: 256  
 sample_per_cluster: false  
 random_crop: false  
 label_width: 64  
 label_height: 64  
 segmentation: false  
 dont_flip_first: true  
 angle_max: 0  
 multfact: 1 # set to 282 if using preprocessed data from website  
 }  
 }  
 #########################################################  
 layer {  
 name: "conv1"  
 type: "Convolution"  
 bottom: "data"  
 top: "conv1"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 128  
 kernel_size: 5  
 stride: 1  
 pad: 2  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 value: 0  
 }  
 }  
 }  
 layer {  
 name: "relu1"  
 type: "ReLU"  
 bottom: "conv1"  
 top: "conv1"  
 }  
 layer {  
 name: "pool1"  
 type: "Pooling"  
 bottom: "conv1"  
 top: "pool1"  
 pooling_param {  
 pool: MAX  
 kernel_size: 2  
 stride: 2  
 }  
 }  
 #########################################################  
 layer {  
 name: "conv2"  
 type: "Convolution"  
 bottom: "pool1"  
 top: "conv2"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 128  
 kernel_size: 5  
 pad: 2  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu2"  
 type: "ReLU"  
 bottom: "conv2"  
 top: "conv2"  
 }  
 layer {  
 name: "pool2"  
 type: "Pooling"  
 bottom: "conv2"  
 top: "pool2"  
 pooling_param {  
 pool: MAX  
 kernel_size: 2  
 stride: 2  
 }  
 }  
 #########################################################  
 layer {  
 name: "conv3"  
 type: "Convolution"  
 bottom: "pool2"  
 top: "conv3"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 128  
 kernel_size: 5  
 pad: 2  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 value: 0  
 }  
 }  
 }  
 layer {  
 name: "relu3"  
 type: "ReLU"  
 bottom: "conv3"  
 top: "conv3"  
 }  
 #########################################################  
 layer {  
 name: "conv4"  
 type: "Convolution"  
 bottom: "conv3"  
 top: "conv4"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 256  
 kernel_size: 9  
 pad: 4  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu4"  
 type: "ReLU"  
 bottom: "conv4"  
 top: "conv4"  
 }  
 #########################################################  
 layer {  
 name: "conv5"  
 type: "Convolution"  
 bottom: "conv4"  
 top: "conv5"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 512  
 kernel_size: 9  
 pad: 4  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu5"  
 type: "ReLU"  
 bottom: "conv5"  
 top: "conv5"  
 }  
 #########################################################  
 layer {  
 name: "conv6"  
 type: "Convolution"  
 bottom: "conv5"  
 top: "conv6"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 256  
 # pad: 2  
 kernel_size: 1  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu6"  
 type: "ReLU"  
 bottom: "conv6"  
 top: "conv6"  
 }  
 #########################################################  
 layer {  
 name: "conv7"  
 type: "Convolution"  
 bottom: "conv6"  
 top: "conv7"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 256  
 kernel_size: 1  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu7"  
 type: "ReLU"  
 bottom: "conv7"  
 top: "conv7"  
 }  
 #########################################################  
 layer {  
 name: "conv8"  
 type: "Convolution"  
 bottom: "conv7"  
 top: "conv8"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 7  
 kernel_size: 1  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu8"  
 type: "ReLU"  
 bottom: "conv8"  
 top: "conv8"  
 }  
 #########################################################  
 layer {  
 name: "loss_heatmap"  
 type: "EuclideanLossHeatmap"  
 bottom: "conv8"  
 bottom: "label"  
 bottom: "data"  
 top: "loss_heatmap"  
 visualise: false  
 loss_weight: 1  
 }  
 #########################################################  
 layer {  
 name: "concat_fusion"  
 type: "Concat"  
 bottom: "conv3"  
 bottom: "conv7"  
 top: "concat_fusion"  
 concat_param {  
 concat_dim: 1  
 }  
 }  
 #########################################################  
 layer {  
 name: "conv1_fusion"  
 type: "Convolution"  
 bottom: "concat_fusion"  
 top: "conv1_fusion"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 64  
 kernel_size: 7  
 stride: 1  
 pad: 3  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu1_fusion"  
 type: "ReLU"  
 bottom: "conv1_fusion"  
 top: "conv1_fusion"  
 }  
 #########################################################  
 layer {  
 name: "conv2_fusion"  
 type: "Convolution"  
 bottom: "conv1_fusion"  
 top: "conv2_fusion"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 64  
 kernel_size: 13  
 stride: 1  
 pad: 6  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu2_fusion"  
 type: "ReLU"  
 bottom: "conv2_fusion"  
 top: "conv2_fusion"  
 }  
 #########################################################  
 layer {  
 name: "conv3_fusion"  
 type: "Convolution"  
 bottom: "conv2_fusion"  
 top: "conv3_fusion"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 128  
 kernel_size: 13  
 stride: 1  
 pad: 6  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu3_fusion"  
 type: "ReLU"  
 bottom: "conv3_fusion"  
 top: "conv3_fusion"  
 }  
 #########################################################  
 layer {  
 name: "conv4_fusion"  
 type: "Convolution"  
 bottom: "conv3_fusion"  
 top: "conv4_fusion"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 256  
 kernel_size: 1  
 stride: 1  
 pad: 0  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 layer {  
 name: "relu4_fusion"  
 type: "ReLU"  
 bottom: "conv4_fusion"  
 top: "conv4_fusion"  
 }  
 #########################################################  
 layer {  
 name: "conv5_fusion"  
 type: "Convolution"  
 bottom: "conv4_fusion"  
 top: "conv5_fusion"  
 param {  
 lr_mult: 1  
 decay_mult: 1  
 }  
 param {  
 lr_mult: 2  
 decay_mult: 0  
 }  
 convolution_param {  
 num_output: 7  
 kernel_size: 1  
 stride: 1  
 pad: 0  
 weight_filler {  
 type: "gaussian"  
 std: 0.01  
 }  
 bias_filler {  
 type: "constant"  
 }  
 }  
 }  
 #########################################################  
 layer {  
 name: "loss_fusion"  
 type: "EuclideanLossHeatmap"  
 bottom: "conv5_fusion"  
 bottom: "label"  
 bottom: "data"  
 top: "loss_fusion"  
 visualise: false  
 loss_weight: 3  
 }