CPM中data_transformer.cpp代码片段标注
程序功能
caffe,CPM程序中,进行数据预处理部分,预处理的方式:
(1)图像对比度增强clahe
(2)flip
(3)rote
(4)中间点
(5)crop
(6)color Jittering:颜色增强(颜色增强、饱和度、对比度)
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
//#include <opencv2/opencv.hpp>
#include <opencv2/contrib/contrib.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif // USE_OPENCV
#include <iostream>
#include <algorithm>
#include <fstream>
using namespace cv;
using namespace std;
#include <string>
#include <sstream>
#include <vector>
#include "caffe/data_transformer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
/*参数:data,idx,pf,len
功能:
*/
template<typename Dtype>
void DecodeFloats(const string& data, size_t idx, Dtype* pf, size_t len) {
memcpy(pf, const_cast<char*>(&data[idx]), len * sizeof(Dtype));
}
/*参数:引用变量(?)data,(size_t用来记录大小的数据类型)idx
功能:连接data[idx],data[idx+1],data[idx+2],结果为一长串字符串,保存着dataset name
*/
string DecodeString(const string& data, size_t idx) {
string result = "";
int i = 0;
while(data[idx+i] != 0){
result.push_back(char(data[idx+i]));
i++;
}
return result;
}
/*参数:MetaData,data,offset3(=3*row*col),offset1(data_width)
功能:读取dataset name ; height, width; img_size;isValidation;
numOtherPeople; people_index;annolist_index;
write_number;
total_write_number;以及坐标
记录训练次数
*/
template<typename Dtype>
void DataTransformer<Dtype>::ReadMetaData(MetaData& meta, const string& data, size_t offset3, size_t offset1) { //very specific to genLMDB.py
//ReadMetaData()是类DataTransformer的成员函数,通过::实现ReadMetaData()函数功能
//Metadata是容器
// ------------------- Dataset name ----------------------
meta.dataset = DecodeString(data, offset3);
// ------------------- Image Dimension -------------------
float height, width;
DecodeFloats(data, offset3+offset1, &height, 1);//参数(data, idx, Dtype* pf, len=1)
DecodeFloats(data, offset3+offset1+4, &width, 1); //函数memcpy(pf, const_cast<char*>(&data[offset3+offset1]), sizeof(Dtype));
//pf(&height)<- &data[offset3+offset1]
meta.img_size = Size(width, height);
// ----------- Validation, nop, counters -----------------
meta.isValidation = (data[offset3+2*offset1]==0 ? false : true);//是否为验证集(data[]里面存储了验证集标签信息,从offset3+2*offset1开始)
meta.numOtherPeople = (int)data[offset3+2*offset1+1];//图片中其他人的数量
meta.people_index = (int)data[offset3+2*offset1+2];每个人的索引
float annolist_index;
DecodeFloats(data, offset3+2*offset1+3, &annolist_index, 1);
meta.annolist_index = (int)annolist_index;//【?】&annolist_index<- &data[offset3+2*offset1+3]
float write_number;
DecodeFloats(data, offset3+2*offset1+7, &write_number, 1);
meta.write_number = (int)write_number;//【当前所写入的第几张图片】&write_number<- &data[offset3+2*offset1+7]
float total_write_number;
DecodeFloats(data, offset3+2*offset1+11, &total_write_number, 1);
meta.total_write_number = (int)total_write_number;//【样本总数】
// count epochs according to counters
//在神经网络中数据集会被训练多次,在多次训练时会需要记录第几次重复做【预处理-->输入网络】这个过程,即训练次数;
//每次训练时,都会经过这个预处理,每次处理的结果不一样
static int cur_epoch = -1;
if(meta.write_number == 0){
cur_epoch++;
}
meta.epoch = cur_epoch;
if(meta.write_number % 1000 == 0){
LOG(INFO) << "dataset: " << meta.dataset <<"; img_size: " << meta.img_size
<< "; meta.annolist_index: " << meta.annolist_index << "; meta.write_number: " << meta.write_number
<< "; meta.total_write_number: " << meta.total_write_number << "; meta.epoch: " << meta.epoch;
}//每读1000张图片输出以上信息
if(param_.aug_way() == "table" && !is_table_set){
SetAugTable(meta.total_write_number);
is_table_set = true;
}//用于列表每一张图的增强处理次数
// ------------------- objpos 人物中心点-----------------------
DecodeFloats(data, offset3+3*offset1, &meta.objpos.x, 1);//&meta.objpos.x<- &data[offset3+3*offset1]
DecodeFloats(data, offset3+3*offset1+4, &meta.objpos.y, 1);
meta.objpos -= Point2f(1,1);//由于meta里面的数据是以matlab为标准(从1开头),所以在c++中坐标应该减一
// ------------ scale_self, joint_self --------------
DecodeFloats(data, offset3+4*offset1, &meta.scale_self, 1);
meta.joint_self.joints.resize(np_in_lmdb);//关节resize,调整容器的大小,使其包含”关节个数“个元素。
meta.joint_self.isVisible.resize(np_in_lmdb);
for(int i=0; i<np_in_lmdb; i++){//关节坐标的赋值与转换,标签是否可见、超出坐标范围
DecodeFloats(data, offset3+5*offset1+4*i, &meta.joint_self.joints[i].x, 1);
DecodeFloats(data, offset3+6*offset1+4*i, &meta.joint_self.joints[i].y, 1);//每个人关节的横纵坐标
meta.joint_self.joints[i] -= Point2f(1,1); //from matlab 1-index to c++ 0-index
float isVisible;
DecodeFloats(data, offset3+7*offset1+4*i, &isVisible, 1);
meta.joint_self.isVisible[i] = (isVisible == 0) ? 0 : 1;//可见为0,不可见为1
if(meta.joint_self.joints[i].x < 0 || meta.joint_self.joints[i].y < 0 ||
meta.joint_self.joints[i].x >= meta.img_size.width || meta.joint_self.joints[i].y >= meta.img_size.height){
meta.joint_self.isVisible[i] = 2; // 2 means cropped, 0 means occluded by still on image
}//1:不可见
// 2:要裁剪
// 0:可见且不须裁剪
//LOG(INFO) << meta.joint_self.joints[i].x << " " << meta.joint_self.joints[i].y << " " << meta.joint_self.isVisible[i];
}
//others (7 lines loaded)
meta.objpos_other.resize(meta.numOtherPeople);//其他人的中心点,高度,关节的容器结构大小调整
meta.scale_other.resize(meta.numOtherPeople);
meta.joint_others.resize(meta.numOtherPeople);
for(int p=0; p<meta.numOtherPeople; p++){//(0~其他人数量)
DecodeFloats(data, offset3+(8+p)*offset1, &meta.objpos_other[p].x, 1);//中心点坐标赋值
DecodeFloats(data, offset3+(8+p)*offset1+4, &meta.objpos_other[p].y, 1);
meta.objpos_other[p] -= Point2f(1,1);//坐标转换
DecodeFloats(data, offset3+(8+meta.numOtherPeople)*offset1+4*p, &meta.scale_other[p], 1);
}
//8 + numOtherPeople lines loaded
for(int p=0; p<meta.numOtherPeople; p++){//遍历每个”其它“人
meta.joint_others[p].joints.resize(np_in_lmdb);//其他人关节点的可见性,joints,调整容器大小
meta.joint_others[p].isVisible.resize(np_in_lmdb);
for(int i=0; i<np_in_lmdb; i++){
DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p)*offset1+4*i, &meta.joint_others[p].joints[i].x, 1);
DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p+1)*offset1+4*i, &meta.joint_others[p].joints[i].y, 1);
meta.joint_others[p].joints[i] -= Point2f(1,1);
float isVisible;
DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p+2)*offset1+4*i, &isVisible, 1);
meta.joint_others[p].isVisible[i] = (isVisible == 0) ? 0 : 1;
if(meta.joint_others[p].joints[i].x < 0 || meta.joint_others[p].joints[i].y < 0 ||
meta.joint_others[p].joints[i].x >= meta.img_size.width || meta.joint_others[p].joints[i].y >= meta.img_size.height){
meta.joint_others[p].isVisible[i] = 2; // 2 means cropped, 1 means occluded by still on image
} //1:不可见
// 2:要裁剪
// 0:可见且不须裁剪
}
}
}
/*参数:numData(meta.total_write_number)
功能:生成两个输入流文件,rot_file,flip_file,把每张图片所做的变换次数保存到文件中去
*/
template<typename Dtype>
void DataTransformer<Dtype>::SetAugTable(int numData){
aug_degs.resize(numData); //numuData:meta.total_write_number
aug_flips.resize(numData); //aug_flips是一个二维数组,保存每张图片*所做的变换次数
for(int i = 0; i < numData; i++){
aug_degs[i].resize(param_.num_total_augs());
aug_flips[i].resize(param_.num_total_augs());
}
//load table files
char filename[100];
sprintf(filename, "../../rotate_%d_%d.txt", param_.num_total_augs(), numData);//把右边两个int打印成对应格式保存到文件名中
ifstream rot_file(filename);//旋转
//ifstream:以输入方式打开文件
char filename2[100];
sprintf(filename2, "../../flip_%d_%d.txt", param_.num_total_augs(), numData);
ifstream flip_file(filename2);//翻转
for(int i = 0; i < numData; i++){//numData行,
for(int j = 0; j < param_.num_total_augs(); j++){//param_.num_total_augs()列
rot_file >> aug_degs[i][j];
flip_file >> aug_flips[i][j];
}//把rot——file里的文件输出到aug_degs[i][j]中去
}
// for(int i = 0; i < numData; i++){
// for(int j = 0; j < param_.num_total_augs(); j++){
// printf("%d ", (int)aug_degs[i][j]);
// }
// printf("\n");
// }
}
template<typename Dtype>
void DataTransformer<Dtype>::TransformMetaJoints(MetaData& meta) {
//transform joints in meta from np_in_lmdb (specified in prototxt) to np (specified in prototxt)
TransformJoints(meta.joint_self);
for(int i=0;i<meta.joint_others.size();i++){
TransformJoints(meta.joint_others[i]);
}
//图中自己的关节与其他人的关节数据变换
}
/*参数:Joints
功能:重新给关节排序,把所有的关节排序方法转换成CMU方法如下
*/
template<typename Dtype>
void DataTransformer<Dtype>::TransformJoints(Joints& j) {//接收Joint_self和Joint_Others
//重新给关节排序
//transform joints in meta from np_in_lmdb (specified in prototxt) to np (specified in prototxt)
//MPII R leg: 0(ankle), 1(knee), 2(hip)
// L leg: 5(ankle), 4(knee), 3(hip)
// R arms: 10(wrist), 11(elbow), 12(shoulder)
// L arms: 15(wrist), 14(elbow), 13(shoulder)
// 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top
//LOG(INFO) << "TransformJoints: here np == " << np << " np_lmdb = " << np_in_lmdb << " joints.size() = " << j.joints.size();
//assert(joints.size() == np_in_lmdb);
//assert(np == 14 || np == 28);
Joints jo = j;
if(np == 14){
int MPI_to_ours[14] = {9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5};
jo.joints.resize(np);
jo.isVisible.resize(np);
for(int i=0;i<np;i++){
jo.joints[i] = j.joints[MPI_to_ours[i]];//把关节的顺序排成9, 8, 12, 11, 10, 13, 14,。。
jo.isVisible[i] = j.isVisible[MPI_to_ours[i]];
}
}
else if(np == 28){
int MPI_to_ours_1[28] = {9, 8,12,11,10,13,14,15, 2, 1, 0, 3, 4, 5, 7, 6, \
9, 8,12,11, 8,13,14, 2, 1, 3, 4, 6};
//17,18,19,20,21,22,23,24,25,26,27,28
int MPI_to_ours_2[28] = {9, 8,12,11,10,13,14,15, 2, 1, 0, 3, 4, 5, 7, 6, \
8,12,11,10,13,14,15, 1, 0, 4, 5, 7};
//17,18,19,20,21,22,23,24,25,26,27,28
jo.joints.resize(np);
jo.isVisible.resize(np);
for(int i=0;i<np;i++){
jo.joints[i] = (j.joints[MPI_to_ours_1[i]] + j.joints[MPI_to_ours_2[i]]) * 0.5;
//按照顺序,两个关节的中间点
if(j.isVisible[MPI_to_ours_1[i]]==2 || j.isVisible[MPI_to_ours_2[i]]==2){
jo.isVisible[i] = 2;
}
else {
jo.isVisible[i] = j.isVisible[MPI_to_ours_1[i]] && j.isVisible[MPI_to_ours_2[i]];
}
}
}
j = jo;
}
template<typename Dtype> DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param, Phase phase) : param_(param), phase_(phase) {//???
// check if we want to use mean_file 判断是否有平均值文件
if (param_.has_mean_file()) {//???
CHECK_EQ(param_.mean_value_size(), 0) <<
"Cannot specify mean_file and mean_value at the same time";//不能同时指定??
const string& mean_file = param.mean_file();
if (Caffe::root_solver()) {//通过检查当前Caffe对象中的solver_rank_是否为0用来判断当前solver线程是否为root线程
LOG(INFO) << "Loading mean file from: " << mean_file;
}
BlobProto blob_proto;//???
//调用google/protobuf?? ,用于加速运算的数据接口,
//这个函数是实现了从二进制文件中读取数据到blob_proto中
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
data_mean_.FromProto(blob_proto);
}
// check if we want to use mean_value
if (param_.mean_value_size() > 0) {
CHECK(param_.has_mean_file() == false) <<
"Cannot specify mean_file and mean_value at the same time";
for (int c = 0; c < param_.mean_value_size(); ++c) {
mean_values_.push_back(param_.mean_value(c));
//将元素param_.mean_value(c)加入到mean_values_容器的最后一位
}
}
LOG(INFO) << "DataTransformer constructor done.";
np_in_lmdb = param_.np_in_lmdb();//关节数
np = param_.num_parts();
is_table_set = false;
}
template<typename Dtype> void DataTransformer<Dtype>::Transform(const Datum& datum, Dtype* transformed_data) {
//LOG(INFO) << "Function 1 is used";
/*一个Datum有三个维度,channels, height,和width
存放数据的地方有两个:byte_data和float_data,分别存放整数型和浮点型数据。
图像数据一般是整形,放在byte_data里,特征向量一般是浮点型,放在float_data里。
label存放数据的类别标签,是整数型。
encoded标识数据是否需要被解码(里面有可能放的是JPEG或者PNG之类经过编码的数据)。
Datum这个数据结构将数据和标签封装在一起,兼容整形和浮点型数据。
*/
const string& data = datum.data();
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
const int crop_size = param_.crop_size();//裁剪大小
const Dtype scale = param_.scale();//??缩放比例
const bool do_mirror = param_.mirror() && Rand(2);//该参数用于在镜像位置对数据处理
const bool has_mean_file = param_.has_mean_file();// 是否有均值文件
const bool has_uint8 = data.size() > 0;// 数据是否为uint8还是float类型的
const bool has_mean_values = mean_values_.size() > 0;//是否有每个channel的均值
CHECK_GT(datum_channels, 0);
CHECK_GE(datum_height, crop_size);
CHECK_GE(datum_width, crop_size);
/*
前面有介绍这一部分CHECK内容,glog提供了多个便利的宏来处理特定关系的判定。具体有:
1,判定大小关系
CHECK_EQ, CHECK_NE, CHECK_LE, CHECK_LT, CHECK_GE, CHECK_GT,使用这些宏需要注意类型一致,如果出现类型不一致的,可使用static_cast转换。
2,判定指针是否为空
CHECK_NOTNULL(some_ptr),可用于对象初始化的时候。
3,判定字符串是否相等
CHECK_STREQ, CHECK_STRNE, CHECK_STRCASEEQ,CHECK_STRCASENE。可进行大小写敏感或不敏感字符串来分别判定。
4, 判定浮点是否相等或相近
CHECK_DOUBLE_EQ,CHECK_NEAR。这两个宏都需要指定一个可容忍的偏差上限。
*/
Dtype* mean = NULL;
if (has_mean_file) {// 检查mean_file是否与数据的参数一致
CHECK_EQ(datum_channels, data_mean_.channels());
CHECK_EQ(datum_height, data_mean_.height());
CHECK_EQ(datum_width, data_mean_.width());
mean = data_mean_.mutable_cpu_data();
}
if (has_mean_values) {
CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
"Specify either 1 mean_value or as many as channels: " << datum_channels;
if (datum_channels > 1 && mean_values_.size() == 1) {
// Replicate the mean_value for simplicity
for (int c = 1; c < datum_channels; ++c) {
mean_values_.push_back(mean_values_[0]);
}
}
}
int height = datum_height;
int width = datum_width;
int h_off = 0;//用于裁剪
int w_off = 0;
if (crop_size) {
height = crop_size;
width = crop_size;
// We only do random crop when we do training.
if (phase_ == TRAIN) {//在训练的时候随机裁剪
h_off = Rand(datum_height - crop_size + 1);
w_off = Rand(datum_width - crop_size + 1);
} else {
h_off = (datum_height - crop_size) / 2;//取图像中心
w_off = (datum_width - crop_size) / 2;
}
}
/*对数据进行变换,主要是将原来的像素值减去均值,然后乘以scale这么一个操作
如果需要crop则最终转换的Blob的大小即为crop*crop
如果不是,则最终的Blob大小即为datum_height*datum_width
*/
Dtype datum_element;
int top_index, data_index;
for (int c = 0; c < datum_channels; ++c) {
for (int h = 0; h < height; ++h) {//行数
for (int w = 0; w < width; ++w) {//列数
data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;//获取数据的索引
//自己定义的索引方式?
if (do_mirror) {//镜像位置转换?
top_index = (c * height + h) * width + (width - 1 - w);
} else {
top_index = (c * height + h) * width + w;
}
if (has_uint8) {
datum_element =
static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
} else {
datum_element = datum.float_data(data_index);
}
if (has_mean_file) {//如果有mean_file,则原来的像素值减去均值(??),然后乘以scale
transformed_data[top_index] =
(datum_element - mean[data_index]) * scale;
} else {
if (has_mean_values) {
transformed_data[top_index] =
(datum_element - mean_values_[c]) * scale;// 否则减去该channel的均值(每个channel有其一个均值),然后乘以scale
} else {
transformed_data[top_index] = datum_element * scale;
}
}
}
}
}
}
template<typename Dtype> void DataTransformer<Dtype>::Transform(const Datum& datum, Blob<Dtype>* transformed_blob) {
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
const int channels = transformed_blob->channels();//关于Blob结构的疑问
const int height = transformed_blob->height();
const int width = transformed_blob->width();
const int num = transformed_blob->num();
CHECK_EQ(channels, datum_channels);
CHECK_LE(height, datum_height);
CHECK_LE(width, datum_width);
CHECK_GE(num, 1);
const int crop_size = param_.crop_size();
if (crop_size) {
CHECK_EQ(crop_size, height);
CHECK_EQ(crop_size, width);
} else {
CHECK_EQ(datum_height, height);
CHECK_EQ(datum_width, width);
}
Dtype* transformed_data = transformed_blob->mutable_cpu_data();
Transform(datum, transformed_data);//继续变换
}
/*重点
前面有介绍这一部分CHECK内容,glog提供了多个便利的宏来处理特定关系的判定。具体有:
1,判定大小关系
CHECK_EQ, CHECK_NE, CHECK_LE, CHECK_LT, CHECK_GE, CHECK_GT,使用这些宏需要注意类型一致,如果出现类型不一致的,可使用static_cast转换。
2,判定指针是否为空
CHECK_NOTNULL(some_ptr),可用于对象初始化的时候。
3,判定字符串是否相等
CHECK_STREQ, CHECK_STRNE, CHECK_STRCASEEQ,CHECK_STRCASENE。可进行大小写敏感或不敏感字符串来分别判定。
4, 判定浮点是否相等或相近
CHECK_DOUBLE_EQ,CHECK_NEAR。这两个宏都需要指定一个可容忍的偏差上限。
*/
template<typename Dtype> void DataTransformer<Dtype>::Transform_CPM(const Datum& datum, Blob<Dtype>* transformed_data, Blob<Dtype>* transformed_label, int cnt) {
//std::cout << "Function 2 is used"; std::cout.flush();
const int datum_channels = datum.channels();
//const int datum_height = datum.height();
//const int datum_width = datum.width();
const int im_channels = transformed_data->channels();//blob中的channnal和datum中的有什么不一样??
//const int im_height = transformed_data->height();
//const int im_width = transformed_data->width();
const int im_num = transformed_data->num();
//const int lb_channels = transformed_label->channels();
//const int lb_height = transformed_label->height();
//const int lb_width = transformed_label->width();
const int lb_num = transformed_label->num();
//LOG(INFO) << "image shape: " << transformed_data->num() << " " << transformed_data->channels() << " "
// << transformed_data->height() << " " << transformed_data->width();
//LOG(INFO) << "label shape: " << transformed_label->num() << " " << transformed_label->channels() << " "
// << transformed_label->height() << " " << transformed_label->width();
CHECK_EQ(im_num, lb_num);
CHECK_GE(im_num, 1);
Dtype* transformed_data_pointer = transformed_data->mutable_cpu_data();//???
Dtype* transformed_label_pointer = transformed_label->mutable_cpu_data();
CHECK_EQ(datum_channels, 4);
if(param_.put_gaussian())
CHECK_EQ(im_channels, 4);
else
CHECK_EQ(im_channels, 3);
//
return Transform_CPM(datum, transformed_data_pointer, transformed_label_pointer, cnt); //call function 1
}
/* param_.[xxxxxx],查询如下,from caffe.proto
message TransformationParameter {
optional float scale = 1 [default = 1];
optional bool mirror = 2 [default = false];
optional uint32 crop_size = 3 [default = 0];
optional string mean_file = 4;
repeated float mean_value = 5;
optional uint32 stride = 6 [default = 4];
optional float scale_cvg = 7 [default = 0.5];
optional uint32 max_cvg_len = 8 [default = 50];
optional uint32 min_cvg_len = 9 [default = 50];
optional bool opaque_coverage = 10 [default = true];
optional string coverage = 11 [default = "gridbox_max"];
optional float flip_prob = 12 [default = 0.5];//翻转操作的可能性
optional float max_rotate_degree = 13 [default = 5.0];
optional bool visualize = 14 [default = false];
optional uint32 crop_size_x = 15 [default = 368];
optional uint32 crop_size_y = 16 [default = 368];
optional float scale_prob = 17 [default = 0.5];
optional float scale_min = 18 [default = 0.9];
optional float scale_max = 19 [default = 1.1];
optional float bbox_norm_factor = 20 [default = 300];
optional string img_header = 21 [default = "."];
// Force the decoded image to have 3 color channels.
optional bool force_color = 22 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 23 [default = false];
optional float target_dist = 24 [default = 1.0];
optional float center_perterb_max = 25 [default = 10.0];
optional float sigma = 26 [default = 7.0];
optional float sigma_center = 27 [default = 21.0];
optional float clahe_tile_size = 28 [default = 8.0];
optional float clahe_clip_limit = 29 [default = 4.0];
optional bool do_clahe = 30 [default = false];
optional uint32 num_parts = 31 [default = 14];
optional uint32 num_total_augs = 32 [default = 82];
optional string aug_way = 33 [default = "rand"];
optional uint32 gray = 34 [default = 0];
optional uint32 np_in_lmdb = 35 [default = 16];
optional bool transform_body_joint = 38 [default = true];
optional bool put_gaussian = 39 [default = true];
optional bool visualize_label = 40 [default = false];
}
*/
template<typename Dtype> void DataTransformer<Dtype>::Transform_CPM(const Datum& datum, Dtype* transformed_data, Dtype* transformed_label, int cnt) {
//TODO: some parameter should be set in prototxt
int clahe_tileSize = param_.clahe_tile_size();//用于直方图均衡化时,所要用到的参数,比如均衡时的宽度和阈值
int clahe_clipLimit = param_.clahe_clip_limit();
//float targetDist = 41.0/35.0;
AugmentSelection as = {
false,
0.0,
Size(),
0,
};//数据增强处理的结构体里的成员
MetaData meta;
const string& data = datum.data();
const int datum_channels = datum.channels();
const int datum_height = datum.height();
const int datum_width = datum.width();
//const int crop_size = param_.crop_size();
//const Dtype scale = param_.scale();
//const bool do_mirror = param_.mirror() && Rand(2);
//const bool has_mean_file = param_.has_mean_file();
const bool has_uint8 = data.size() > 0;
//const bool has_mean_values = mean_values_.size() > 0;
int crop_x = param_.crop_size_x();//裁剪分块的坐标
int crop_y = param_.crop_size_y();
CHECK_GT(datum_channels, 0);
//CHECK_GE(datum_height, crop_size);
//CHECK_GE(datum_width, crop_size);
//before any transformation, get the image from datum 从datum获取图像数据
Mat img = Mat::zeros(datum_height, datum_width, CV_8UC3);//CV_8UC3??,生成datum_height*datum_width*CV_8UC3全零阵
int offset = img.rows * img.cols;//全部像素数
int dindex;
Dtype d_element;
for (int i = 0; i < img.rows; ++i) {
for (int j = 0; j < img.cols; ++j) {
Vec3b& rgb = img.at<Vec3b>(i, j);//把rgb三个通道里的像素值都取出来
for(int c = 0; c < 3; c++){//每个通道的(i,j)处理一次,即整体来说是三个通道同时进行计算
dindex = c*offset + i*img.cols + j;
if (has_uint8)
d_element = static_cast<Dtype>(static_cast<uint8_t>(data[dindex]));
else
d_element = datum.float_data(dindex);
rgb[c] = d_element;
}
}
}
//color, contract
if(param_.do_clahe())
clahe(img, clahe_tileSize, clahe_clipLimit);//直方图均衡化,提高图像的对比度
if(param_.gray() == 1){
cv::cvtColor(img, img, CV_BGR2GRAY);//有问题??
cv::cvtColor(img, img, CV_GRAY2BGR);
}
int offset3 = 3 * offset;
int offset1 = datum_width;
ReadMetaData(meta, data, offset3, offset1);
if(param_.transform_body_joint()) // we expect to transform body joints, and not to transform hand joints
TransformMetaJoints(meta);
//visualize original
if(param_.visualize())
visualize(img, meta, as);
//Start transforming
Mat img_aug = Mat::zeros(crop_y, crop_x, CV_8UC3);
Mat img_temp, img_temp2, img_temp3; //size determined by scale
// We only do random transform as augmentation when training.
if (phase_ == TRAIN) {
as.scale = augmentation_scale(img, img_temp, meta);//缩放
//LOG(INFO) << meta.joint_self.joints.size();
//LOG(INFO) << meta.joint_self.joints[0];
as.degree = augmentation_rotate(img_temp, img_temp2, meta);//旋转
//LOG(INFO) << meta.joint_self.joints.size();
//LOG(INFO) << meta.joint_self.joints[0];
if(param_.visualize())
visualize(img_temp2, meta, as);
as.crop = augmentation_croppad(img_temp2, img_temp3, meta);//裁剪
//LOG(INFO) << meta.joint_self.joints.size();
//LOG(INFO) << meta.joint_self.joints[0];
if(param_.visualize())
visualize(img_temp3, meta, as);
as.flip = augmentation_flip(img_temp3, img_aug, meta);//翻转
//LOG(INFO) << meta.joint_self.joints.size();
//LOG(INFO) << meta.joint_self.joints[0];
if(param_.visualize())
visualize(img_aug, meta, as);
}
else {
img_aug = img.clone();
as.scale = 1;
as.crop = Size();
as.flip = 0;
as.degree = 0;
}
//LOG(INFO) << "scale: " << as.scale << "; crop:(" << as.crop.width << "," << as.crop.height
// << "); flip:" << as.flip << "; degree: " << as.degree;
//copy transformed img (img_aug) into transformed_data, do the mean-subtraction平均 here
offset = img_aug.rows * img_aug.cols;
for (int i = 0; i < img_aug.rows; ++i) {
for (int j = 0; j < img_aug.cols; ++j) {
Vec3b& rgb = img_aug.at<Vec3b>(i, j);
transformed_data[0*offset + i*img_aug.cols + j] = (rgb[0] - 128)/256.0;//第0通道中的(i,j)像素值均化,赋值给transformed_data的每一个像素位置上去
transformed_data[1*offset + i*img_aug.cols + j] = (rgb[1] - 128)/256.0;
transformed_data[2*offset + i*img_aug.cols + j] = (rgb[2] - 128)/256.0;
transformed_data[3*offset + i*img_aug.cols + j] = 0; //zero 4-th channel
}
}
putGaussianMaps(transformed_data + 3*offset, meta.objpos, 1, img_aug.cols, img_aug.rows, param_.sigma_center());
//LOG(INFO) << "image transformation done!";
generateLabelMap(transformed_label, img_aug, meta); // and visualize
}
template<typename Dtype>
float DataTransformer<Dtype>::augmentation_scale(Mat& img_src, Mat& img_temp, MetaData& meta) {
float dice = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]之间的随机数
float scale_multiplier;
//float scale = (param_.scale_max() - param_.scale_min()) * dice + param_.scale_min(); //linear shear into [scale_min, scale_max]
if(dice > param_.scale_prob()) {
//param_.scale_prob()是一个给定的值,如0.8,即有20%的可能性执行下面函数,即clone
img_temp = img_src.clone();
scale_multiplier = 1;
}
else {//否则就产生随机的scale_multiplier
float dice2 = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]
scale_multiplier = (param_.scale_max() - param_.scale_min()) * dice2 + param_.scale_min(); //linear shear into [scale_min, scale_max]
}
float scale_abs = param_.target_dist()/meta.scale_self;
float scale = scale_abs * scale_multiplier;//最后产生随机的缩放比例
resize(img_src, img_temp, Size(), scale, scale, INTER_CUBIC);
//modify meta data
meta.objpos *= scale;//不要忘记将坐标值进行缩放
for(int i=0; i<np; i++){
meta.joint_self.joints[i] *= scale;
}
for(int p=0; p<meta.numOtherPeople; p++){
meta.objpos_other[p] *= scale;
for(int i=0; i<np; i++){
meta.joint_others[p].joints[i] *= scale;
}
}
return scale_multiplier;
}
template<typename Dtype>
bool DataTransformer<Dtype>::onPlane(Point p, Size img_size) {
if(p.x < 0 || p.y < 0) return false;
if(p.x >= img_size.width || p.y >= img_size.height) return false;
return true;
}
template<typename Dtype>
Size DataTransformer<Dtype>::augmentation_croppad(Mat& img_src, Mat& img_dst, MetaData& meta) {
float dice_x = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]的随机数
float dice_y = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]
int crop_x = param_.crop_size_x();//裁剪方框的长度
int crop_y = param_.crop_size_y();//裁剪方框的高度
//在产生一个随机的点
float x_offset = int((dice_x - 0.5) * 2 * param_.center_perterb_max());//[-10,10]之间的随机数
float y_offset = int((dice_y - 0.5) * 2 * param_.center_perterb_max());//
//LOG(INFO) << "Size of img_temp is " << img_temp.cols << " " << img_temp.rows;
//LOG(INFO) << "ROI is " << x_offset << " " << y_offset << " " << min(800, img_temp.cols) << " " << min(256, img_temp.rows);
Point2i center = meta.objpos + Point2f(x_offset, y_offset);
int offset_left = -(center.x - (crop_x/2));//偏移量
int offset_up = -(center.y - (crop_y/2));
// int to_pad_right = max(center.x + (crop_x - crop_x/2) - img_src.cols, 0);
// int to_pad_down = max(center.y + (crop_y - crop_y/2) - img_src.rows, 0);
img_dst = Mat::zeros(crop_y, crop_x, CV_8UC3) + Scalar(128,128,128);
for(int i=0;i<crop_y;i++){
for(int j=0;j<crop_x;j++){ //i,j on cropped
int coord_x_on_img = center.x - crop_x/2 + j;//在原图上,裁剪方框中的像素坐标点值,根据随机中心点的位置计算
int coord_y_on_img = center.y - crop_y/2 + i;
if(onPlane(Point(coord_x_on_img, coord_y_on_img), Size(img_src.cols, img_src.rows))){// ( >0),(<img_size)
img_dst.at<Vec3b>(i,j) = img_src.at<Vec3b>(coord_y_on_img, coord_x_on_img);
//每计算出一个坐标点,进行一次像素值的copy,从img_src到img_dst(从0,0开始)
}
}
}
//modify meta data
//objpos坐标值、其它坐标值改变(加偏移量)
Point2f offset(offset_left, offset_up);
meta.objpos += offset;
for(int i=0; i<np; i++){
meta.joint_self.joints[i] += offset;
}
for(int p=0; p<meta.numOtherPeople; p++){
meta.objpos_other[p] += offset;
for(int i=0; i<np; i++){
meta.joint_others[p].joints[i] += offset;
}
}
return Size(x_offset, y_offset);
}
/*
关节序列翻转
*/
template<typename Dtype>
void DataTransformer<Dtype>::swapLeftRight(Joints& j) {
//MPII R leg: 0(ankle), 1(knee), 2(hip)
// L leg: 5(ankle), 4(knee), 3(hip)
// R arms: 10(wrist), 11(elbow), 12(shoulder)
// L arms: 15(wrist), 14(elbow), 13(shoulder)
if(np == 9){
int right[4] = {1,2,3,7};
int left[4] = {4,5,6,8};
for(int i=0; i<4; i++){
int ri = right[i] - 1;//matlab从1开始的,c++从0开始
int li = left[i] - 1;
Point2f temp = j.joints[ri];
j.joints[ri] = j.joints[li];
j.joints[li] = temp;
int temp_v = j.isVisible[ri];
j.isVisible[ri] = j.isVisible[li];
j.isVisible[li] = temp_v;
}
}
else if(np == 14){
int right[6] = {3,4,5,9,10,11}; //1-index
int left[6] = {6,7,8,12,13,14}; //1-index
for(int i=0; i<6; i++){
int ri = right[i] - 1;
int li = left[i] - 1;
Point2f temp = j.joints[ri];
j.joints[ri] = j.joints[li];
j.joints[li] = temp;
int temp_v = j.isVisible[ri];
j.isVisible[ri] = j.isVisible[li];
j.isVisible[li] = temp_v;
}
}
else if(np == 28){
int right[11] = {3,4,5,9,10,11,18,19,20,24,25}; //1-index
int left[11] = {6,7,8,12,13,14,21,22,23,26,27}; //1-index
for(int i=0; i<11; i++){
int ri = right[i] - 1;
int li = left[i] - 1;
Point2f temp = j.joints[ri];
j.joints[ri] = j.joints[li];
j.joints[li] = temp;
int temp_v = j.isVisible[ri];
j.isVisible[ri] = j.isVisible[li];
j.isVisible[li] = temp_v;
}
}
}
template<typename Dtype>
bool DataTransformer<Dtype>::augmentation_flip(Mat& img_src, Mat& img_aug, MetaData& meta) {
bool doflip;
if(param_.aug_way() == "rand"){
float dice = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);//[0,1]
doflip = (dice <= param_.flip_prob());//50%可能性使得doflip为true
}
else if(param_.aug_way() == "table"){
doflip = (aug_flips[meta.write_number][meta.epoch % param_.num_total_augs()] == 1);//aug_flips是一个数组,【行:第几张图】【列:总共变换的次数】
}//???
else {
doflip = 0;
LOG(INFO) << "Unhandled exception!!!!!!";
}
if(doflip){
flip(img_src, img_aug, 1);//像素翻转
int w = img_src.cols;//w:列数
meta.objpos.x = w - 1 - meta.objpos.x;//把人物中心坐标,翻转到另一边,但是为何-1??
for(int i=0; i<np; i++){//np是关节数
meta.joint_self.joints[i].x = w - 1 - meta.joint_self.joints[i].x;
}//关节的坐标翻转
if(param_.transform_body_joint())
swapLeftRight(meta.joint_self);//关节的序列翻转
for(int p=0; p<meta.numOtherPeople; p++){
meta.objpos_other[p].x = w - 1 - meta.objpos_other[p].x;
for(int i=0; i<np; i++){
meta.joint_others[p].joints[i].x = w - 1 - meta.joint_others[p].joints[i].x;
}
if(param_.transform_body_joint())
swapLeftRight(meta.joint_others[p]);
}
}
else {
img_aug = img_src.clone();
}
return doflip;
}
template<typename Dtype>
void DataTransformer<Dtype>::RotatePoint(Point2f& p, Mat R){
Mat point(3,1,CV_64FC1);//3行1列
point.at<double>(0,0) = p.x;//(行,列)
point.at<double>(1,0) = p.y;
point.at<double>(2,0) = 1;
Mat new_point = R * point;//变换矩阵与列向量相乘,列向量直接就做坐标变换了
p.x = new_point.at<double>(0,0);
p.y = new_point.at<double>(1,0);
}
template<typename Dtype>
float DataTransformer<Dtype>::augmentation_rotate(Mat& img_src, Mat& img_dst, MetaData& meta) {
float degree;
if(param_.aug_way() == "rand"){
float dice = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);//[0,1]
degree = (dice - 0.5) * 2 * param_.max_rotate_degree();//[-5,5]
}
else if(param_.aug_way() == "table"){
degree = aug_degs[meta.write_number][meta.epoch % param_.num_total_augs()];
}
else {
degree = 0;
LOG(INFO) << "Unhandled exception!!!!!!";
}
Point2f center(img_src.cols/2.0, img_src.rows/2.0);
Mat R = getRotationMatrix2D(center, degree, 1.0);
/*(旋转中心点,旋转角度,图像缩放因子)获得变换矩阵,用于下面的仿射变换
即先将轴心(x,y)移到原点,然后做旋转变换,最后将图片的左上角置为图片的原点,即
*/
Rect bbox = RotatedRect(center, img_src.size(), degree).boundingRect();//??框起来?
// adjust transformation matrix
R.at<double>(0,2) += bbox.width/2.0 - center.x;//?
R.at<double>(1,2) += bbox.height/2.0 - center.y;
//LOG(INFO) << "R=[" << R.at<double>(0,0) << " " << R.at<double>(0,1) << " " << R.at<double>(0,2) << ";"
// << R.at<double>(1,0) << " " << R.at<double>(1,1) << " " << R.at<double>(1,2) << "]";
warpAffine(img_src, img_dst, R, bbox.size(), INTER_CUBIC, BORDER_CONSTANT, Scalar(128,128,128));//对图片的像素仿射变换,旋转角度
/*warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, const Scalar& borderValue=Scalar())
Parameters:
src – input image.
dst – output image that has the size dsize and the same type as src .
M – transformation matrix,最重要的东东了,即上面得到的R
dsize – size of the output image.
flags – combination of interpolation methods
borderMode – pixel extrapolation method (see borderInterpolate()); when borderMode=BORDER_TRANSPARENT , it means that the pixels in the destination image corresponding to the “outliers” in the source image are not modified by the function.
borderValue – value used in case of a constant border; by default, it is 0.
变换矩阵模式:
| |
| |
| |
*/
//adjust meta data
RotatePoint(meta.objpos, R);//直接对坐标值进行变换
for(int i=0; i<np; i++){
RotatePoint(meta.joint_self.joints[i], R);
}
for(int p=0; p<meta.numOtherPeople; p++){
RotatePoint(meta.objpos_other[p], R);
for(int i=0; i<np; i++){
RotatePoint(meta.joint_others[p].joints[i], R);
}
}
return degree;
}
template<typename Dtype>
void DataTransformer<Dtype>::putGaussianMaps(Dtype* entry, Point2f center, int stride, int grid_x, int grid_y, float sigma){
//参数实际对应(transformed_data + 3*offset, meta.objpos, 1, img_aug.cols, img_aug.rows, param_.sigma_center())
//LOG(INFO) << "putGaussianMaps here we start for " << center.x << " " << center.y;
float start = stride/2.0 - 0.5; //0 if stride = 1, 0.5 if stride = 2, 1.5 if stride = 4, ...
for (int g_y = 0; g_y < grid_y; g_y++){
for (int g_x = 0; g_x < grid_x; g_x++){
float x = start + g_x * stride;
float y = start + g_y * stride;
float d2 = (x-center.x)*(x-center.x) + (y-center.y)*(y-center.y);//(x,)到中心点的位置的平方
float exponent = d2 / 2.0 / sigma / sigma;// d2/(2*sigma^2),???
if(exponent > 4.6052){ //ln(100) = -ln(1%)
continue;
}
//entry[][]是第四维的?全为0?
entry[g_y*grid_x + g_x] += exp(-exponent);//高斯分布?离得中心点越近,该(x,y)概率越大,xy*x?
if(entry[g_y*grid_x + g_x] > 1)
entry[g_y*grid_x + g_x] = 1;//概率不能大于1
}
}
}