CPM中data_transformer.cpp代码片段学习

本文主要探讨了在caffe的CPM模块中,data_transformer.cpp文件中的数据预处理流程,包括图像对比度增强CLAHE、翻转、旋转、中间点定位、裁剪以及颜色Jittering等步骤,旨在提升模型训练的效果。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

CPM中data_transformer.cpp代码片段标注

程序功能

caffe,CPM程序中,进行数据预处理部分,预处理的方式:
(1)图像对比度增强clahe
(2)flip
(3)rote
(4)中间点
(5)crop
(6)color Jittering:颜色增强(颜色增强、饱和度、对比度)

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
//#include <opencv2/opencv.hpp>
#include <opencv2/contrib/contrib.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif  // USE_OPENCV

#include <iostream>
#include <algorithm>
#include <fstream>
using namespace cv;
using namespace std;

#include <string>
#include <sstream>
#include <vector>

#include "caffe/data_transformer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

namespace caffe {
/*参数:data,idx,pf,len
  功能:
*/
template<typename Dtype>
void DecodeFloats(const string& data, size_t idx, Dtype* pf, size_t len) {
  memcpy(pf, const_cast<char*>(&data[idx]), len * sizeof(Dtype));
}
/*参数:引用变量(?)data,(size_t用来记录大小的数据类型)idx
  功能:连接data[idx],data[idx+1],data[idx+2],结果为一长串字符串,保存着dataset name
*/
string DecodeString(const string& data, size_t idx) {
  string result = "";
  int i = 0;
  while(data[idx+i] != 0){
    result.push_back(char(data[idx+i]));
    i++;
  }
  return result;
}
/*参数:MetaData,data,offset3(=3*row*col),offset1(data_width)
  功能:读取dataset name ; height, width; img_size;isValidation;
       numOtherPeople; people_index;annolist_index;
       write_number;
       total_write_number;以及坐标
       记录训练次数
*/
template<typename Dtype>
void DataTransformer<Dtype>::ReadMetaData(MetaData& meta, const string& data, size_t offset3, size_t offset1) { //very specific to genLMDB.py
  //ReadMetaData()是类DataTransformer的成员函数,通过::实现ReadMetaData()函数功能
  //Metadata是容器
  // ------------------- Dataset name ----------------------
  meta.dataset = DecodeString(data, offset3);
  // ------------------- Image Dimension -------------------
  float height, width;
  DecodeFloats(data, offset3+offset1, &height, 1);//参数(data, idx, Dtype* pf, len=1)
  DecodeFloats(data, offset3+offset1+4, &width, 1); //函数memcpy(pf, const_cast<char*>(&data[offset3+offset1]), sizeof(Dtype));
  //pf(&height)<- &data[offset3+offset1]
  meta.img_size = Size(width, height);
  // ----------- Validation, nop, counters -----------------
  meta.isValidation = (data[offset3+2*offset1]==0 ? false : true);//是否为验证集(data[]里面存储了验证集标签信息,从offset3+2*offset1开始)
  meta.numOtherPeople = (int)data[offset3+2*offset1+1];//图片中其他人的数量
  meta.people_index = (int)data[offset3+2*offset1+2];每个人的索引
  float annolist_index;
  DecodeFloats(data, offset3+2*offset1+3, &annolist_index, 1);
  meta.annolist_index = (int)annolist_index;//【?】&annolist_index<- &data[offset3+2*offset1+3]
  float write_number;
  DecodeFloats(data, offset3+2*offset1+7, &write_number, 1);
  meta.write_number = (int)write_number;//【当前所写入的第几张图片】&write_number<- &data[offset3+2*offset1+7]
  float total_write_number;
  DecodeFloats(data, offset3+2*offset1+11, &total_write_number, 1);
  meta.total_write_number = (int)total_write_number;//【样本总数】

  // count epochs according to counters
  //在神经网络中数据集会被训练多次,在多次训练时会需要记录第几次重复做【预处理-->输入网络】这个过程,即训练次数;
  //每次训练时,都会经过这个预处理,每次处理的结果不一样
  static int cur_epoch = -1;
  if(meta.write_number == 0){
    cur_epoch++;
  }
  meta.epoch = cur_epoch;
  if(meta.write_number % 1000 == 0){
    LOG(INFO) << "dataset: " << meta.dataset <<"; img_size: " << meta.img_size
        << "; meta.annolist_index: " << meta.annolist_index << "; meta.write_number: " << meta.write_number
        << "; meta.total_write_number: " << meta.total_write_number << "; meta.epoch: " << meta.epoch;
  }//每读1000张图片输出以上信息
  if(param_.aug_way() == "table" && !is_table_set){
    SetAugTable(meta.total_write_number);
    is_table_set = true;
  }//用于列表每一张图的增强处理次数

  // ------------------- objpos 人物中心点-----------------------
  DecodeFloats(data, offset3+3*offset1, &meta.objpos.x, 1);//&meta.objpos.x<- &data[offset3+3*offset1]
  DecodeFloats(data, offset3+3*offset1+4, &meta.objpos.y, 1);
  meta.objpos -= Point2f(1,1);//由于meta里面的数据是以matlab为标准(从1开头),所以在c++中坐标应该减一
  // ------------ scale_self, joint_self --------------
  DecodeFloats(data, offset3+4*offset1, &meta.scale_self, 1);
  meta.joint_self.joints.resize(np_in_lmdb);//关节resize,调整容器的大小,使其包含”关节个数“个元素。
  meta.joint_self.isVisible.resize(np_in_lmdb);
  for(int i=0; i<np_in_lmdb; i++){//关节坐标的赋值与转换,标签是否可见、超出坐标范围
    DecodeFloats(data, offset3+5*offset1+4*i, &meta.joint_self.joints[i].x, 1);
    DecodeFloats(data, offset3+6*offset1+4*i, &meta.joint_self.joints[i].y, 1);//每个人关节的横纵坐标
    meta.joint_self.joints[i] -= Point2f(1,1); //from matlab 1-index to c++ 0-index
    float isVisible;
    DecodeFloats(data, offset3+7*offset1+4*i, &isVisible, 1);
    meta.joint_self.isVisible[i] = (isVisible == 0) ? 0 : 1;//可见为0,不可见为1
    if(meta.joint_self.joints[i].x < 0 || meta.joint_self.joints[i].y < 0 ||
       meta.joint_self.joints[i].x >= meta.img_size.width || meta.joint_self.joints[i].y >= meta.img_size.height){
      meta.joint_self.isVisible[i] = 2; // 2 means cropped, 0 means occluded by still on image
    }//1:不可见
    // 2:要裁剪
    // 0:可见且不须裁剪
    //LOG(INFO) << meta.joint_self.joints[i].x << " " << meta.joint_self.joints[i].y << " " << meta.joint_self.isVisible[i];
  }
  
  //others (7 lines loaded)
  meta.objpos_other.resize(meta.numOtherPeople);//其他人的中心点,高度,关节的容器结构大小调整
  meta.scale_other.resize(meta.numOtherPeople);
  meta.joint_others.resize(meta.numOtherPeople);
  for(int p=0; p<meta.numOtherPeople; p++){//(0~其他人数量)
    DecodeFloats(data, offset3+(8+p)*offset1, &meta.objpos_other[p].x, 1);//中心点坐标赋值
    DecodeFloats(data, offset3+(8+p)*offset1+4, &meta.objpos_other[p].y, 1);
    meta.objpos_other[p] -= Point2f(1,1);//坐标转换
    DecodeFloats(data, offset3+(8+meta.numOtherPeople)*offset1+4*p, &meta.scale_other[p], 1);
  }
  //8 + numOtherPeople lines loaded
  for(int p=0; p<meta.numOtherPeople; p++){//遍历每个”其它“人
    meta.joint_others[p].joints.resize(np_in_lmdb);//其他人关节点的可见性,joints,调整容器大小
    meta.joint_others[p].isVisible.resize(np_in_lmdb);
    for(int i=0; i<np_in_lmdb; i++){
      DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p)*offset1+4*i, &meta.joint_others[p].joints[i].x, 1);
      DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p+1)*offset1+4*i, &meta.joint_others[p].joints[i].y, 1);
      meta.joint_others[p].joints[i] -= Point2f(1,1);
      float isVisible;
      DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p+2)*offset1+4*i, &isVisible, 1);
      meta.joint_others[p].isVisible[i] = (isVisible == 0) ? 0 : 1;
      if(meta.joint_others[p].joints[i].x < 0 || meta.joint_others[p].joints[i].y < 0 ||
         meta.joint_others[p].joints[i].x >= meta.img_size.width || meta.joint_others[p].joints[i].y >= meta.img_size.height){
        meta.joint_others[p].isVisible[i] = 2; // 2 means cropped, 1 means occluded by still on image
      }    //1:不可见
           // 2:要裁剪
           // 0:可见且不须裁剪
    }
  }
}

/*参数:numData(meta.total_write_number)
  功能:生成两个输入流文件,rot_file,flip_file,把每张图片所做的变换次数保存到文件中去
*/
template<typename Dtype>
void DataTransformer<Dtype>::SetAugTable(int numData){
  aug_degs.resize(numData);     //numuData:meta.total_write_number
  aug_flips.resize(numData);  //aug_flips是一个二维数组,保存每张图片*所做的变换次数
  for(int i = 0; i < numData; i++){
    aug_degs[i].resize(param_.num_total_augs());
    aug_flips[i].resize(param_.num_total_augs());
  }
  //load table files
  char filename[100];
  sprintf(filename, "../../rotate_%d_%d.txt", param_.num_total_augs(), numData);//把右边两个int打印成对应格式保存到文件名中
  ifstream rot_file(filename);//旋转
  //ifstream:以输入方式打开文件
  char filename2[100];
  sprintf(filename2, "../../flip_%d_%d.txt", param_.num_total_augs(), numData);
  ifstream flip_file(filename2);//翻转

  for(int i = 0; i < numData; i++){//numData行,
    for(int j = 0; j < param_.num_total_augs(); j++){//param_.num_total_augs()列
      rot_file >> aug_degs[i][j];
      flip_file >> aug_flips[i][j];
    }//把rot——file里的文件输出到aug_degs[i][j]中去
  }
  // for(int i = 0; i < numData; i++){
  //   for(int j = 0; j < param_.num_total_augs(); j++){
  //     printf("%d ", (int)aug_degs[i][j]);
  //   }
  //   printf("\n");
  // }
}

template<typename Dtype>
void DataTransformer<Dtype>::TransformMetaJoints(MetaData& meta) {
  //transform joints in meta from np_in_lmdb (specified in prototxt) to np (specified in prototxt)
  TransformJoints(meta.joint_self);
  for(int i=0;i<meta.joint_others.size();i++){
    TransformJoints(meta.joint_others[i]);
  }
  //图中自己的关节与其他人的关节数据变换
}
/*参数:Joints
  功能:重新给关节排序,把所有的关节排序方法转换成CMU方法如下
*/
template<typename Dtype>
void DataTransformer<Dtype>::TransformJoints(Joints& j) {//接收Joint_self和Joint_Others
//重新给关节排序
  //transform joints in meta from np_in_lmdb (specified in prototxt) to np (specified in prototxt)
  //MPII R leg: 0(ankle), 1(knee), 2(hip)
  //     L leg: 5(ankle), 4(knee), 3(hip)
  //     R arms: 10(wrist), 11(elbow), 12(shoulder)
  //     L arms: 15(wrist), 14(elbow), 13(shoulder)
  //     6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top
  //LOG(INFO) << "TransformJoints: here np == " << np << " np_lmdb = " << np_in_lmdb << " joints.size() = " << j.joints.size();
  //assert(joints.size() == np_in_lmdb);
  //assert(np == 14 || np == 28);
  Joints jo = j;
  if(np == 14){
    int MPI_to_ours[14] = {9, 8, 12, 11, 10, 13, 14, 15, 2, 1, 0, 3, 4, 5};
    jo.joints.resize(np);
    jo.isVisible.resize(np);
    for(int i=0;i<np;i++){
      jo.joints[i] = j.joints[MPI_to_ours[i]];//把关节的顺序排成9, 8, 12, 11, 10, 13, 14,。。
      jo.isVisible[i] = j.isVisible[MPI_to_ours[i]];
    }
  }
  else if(np == 28){
    int MPI_to_ours_1[28] = {9, 8,12,11,10,13,14,15, 2, 1, 0, 3, 4, 5, 7, 6, \
                             9, 8,12,11, 8,13,14, 2, 1, 3, 4, 6};
                          //17,18,19,20,21,22,23,24,25,26,27,28
    int MPI_to_ours_2[28] = {9, 8,12,11,10,13,14,15, 2, 1, 0, 3, 4, 5, 7, 6, \
                             8,12,11,10,13,14,15, 1, 0, 4, 5, 7};
                          //17,18,19,20,21,22,23,24,25,26,27,28
    jo.joints.resize(np);
    jo.isVisible.resize(np);
    for(int i=0;i<np;i++){
      jo.joints[i] = (j.joints[MPI_to_ours_1[i]] + j.joints[MPI_to_ours_2[i]]) * 0.5;
      //按照顺序,两个关节的中间点
      if(j.isVisible[MPI_to_ours_1[i]]==2 || j.isVisible[MPI_to_ours_2[i]]==2){
        jo.isVisible[i] = 2;
      }
      else {
        jo.isVisible[i] = j.isVisible[MPI_to_ours_1[i]] && j.isVisible[MPI_to_ours_2[i]];
      }
    }
  }
  j = jo;
}


template<typename Dtype> DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param, Phase phase) : param_(param), phase_(phase) {//???
  // check if we want to use mean_file 判断是否有平均值文件
  if (param_.has_mean_file()) {//???
    CHECK_EQ(param_.mean_value_size(), 0) <<
      "Cannot specify mean_file and mean_value at the same time";//不能同时指定??
    const string& mean_file = param.mean_file();
    if (Caffe::root_solver()) {//通过检查当前Caffe对象中的solver_rank_是否为0用来判断当前solver线程是否为root线程
      LOG(INFO) << "Loading mean file from: " << mean_file;
    }
    BlobProto blob_proto;//???
    //调用google/protobuf?? ,用于加速运算的数据接口,
    //这个函数是实现了从二进制文件中读取数据到blob_proto中
    ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
    data_mean_.FromProto(blob_proto);
  }
  // check if we want to use mean_value
  if (param_.mean_value_size() > 0) {
    CHECK(param_.has_mean_file() == false) <<
      "Cannot specify mean_file and mean_value at the same time";
    for (int c = 0; c < param_.mean_value_size(); ++c) {
      mean_values_.push_back(param_.mean_value(c));
      //将元素param_.mean_value(c)加入到mean_values_容器的最后一位
    }
  }
  LOG(INFO) << "DataTransformer constructor done.";
  np_in_lmdb = param_.np_in_lmdb();//关节数
  np = param_.num_parts();
  is_table_set = false;
}

template<typename Dtype> void DataTransformer<Dtype>::Transform(const Datum& datum, Dtype* transformed_data) {
  //LOG(INFO) << "Function 1 is used";
  /*一个Datum有三个维度,channels, height,和width
  存放数据的地方有两个:byte_data和float_data,分别存放整数型和浮点型数据。
  图像数据一般是整形,放在byte_data里,特征向量一般是浮点型,放在float_data里。
  label存放数据的类别标签,是整数型。
  encoded标识数据是否需要被解码(里面有可能放的是JPEG或者PNG之类经过编码的数据)。
  Datum这个数据结构将数据和标签封装在一起,兼容整形和浮点型数据。
  */
  const string& data = datum.data();
  const int datum_channels = datum.channels();
  const int datum_height = datum.height();
  const int datum_width = datum.width();

  const int crop_size = param_.crop_size();//裁剪大小
  const Dtype scale = param_.scale();//??缩放比例
  const bool do_mirror = param_.mirror() && Rand(2);//该参数用于在镜像位置对数据处理
  const bool has_mean_file = param_.has_mean_file();// 是否有均值文件
  const bool has_uint8 = data.size() > 0;// 数据是否为uint8还是float类型的
  const bool has_mean_values = mean_values_.size() > 0;//是否有每个channel的均值

  CHECK_GT(datum_channels, 0);
  CHECK_GE(datum_height, crop_size);
  CHECK_GE(datum_width, crop_size);

/*
前面有介绍这一部分CHECK内容,glog提供了多个便利的宏来处理特定关系的判定。具体有:
1,判定大小关系
CHECK_EQ, CHECK_NE, CHECK_LE, CHECK_LT, CHECK_GE, CHECK_GT,使用这些宏需要注意类型一致,如果出现类型不一致的,可使用static_cast转换。
2,判定指针是否为空
CHECK_NOTNULL(some_ptr),可用于对象初始化的时候。
3,判定字符串是否相等
CHECK_STREQ, CHECK_STRNE, CHECK_STRCASEEQ,CHECK_STRCASENE。可进行大小写敏感或不敏感字符串来分别判定。
4, 判定浮点是否相等或相近
CHECK_DOUBLE_EQ,CHECK_NEAR。这两个宏都需要指定一个可容忍的偏差上限。
*/


  Dtype* mean = NULL;
  if (has_mean_file) {// 检查mean_file是否与数据的参数一致
    CHECK_EQ(datum_channels, data_mean_.channels());
    CHECK_EQ(datum_height, data_mean_.height());
    CHECK_EQ(datum_width, data_mean_.width());
    mean = data_mean_.mutable_cpu_data();
  }
  if (has_mean_values) {
    CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
     "Specify either 1 mean_value or as many as channels: " << datum_channels;
    if (datum_channels > 1 && mean_values_.size() == 1) {
      // Replicate the mean_value for simplicity
      for (int c = 1; c < datum_channels; ++c) {
        mean_values_.push_back(mean_values_[0]);
      }
    }
  }

  int height = datum_height;
  int width = datum_width;

  int h_off = 0;//用于裁剪
  int w_off = 0;
  if (crop_size) {
    height = crop_size;
    width = crop_size;
    // We only do random crop when we do training.
    if (phase_ == TRAIN) {//在训练的时候随机裁剪
      h_off = Rand(datum_height - crop_size + 1);
      w_off = Rand(datum_width - crop_size + 1);
    } else {
      h_off = (datum_height - crop_size) / 2;//取图像中心
      w_off = (datum_width - crop_size) / 2;
    }
  }

/*对数据进行变换,主要是将原来的像素值减去均值,然后乘以scale这么一个操作
  如果需要crop则最终转换的Blob的大小即为crop*crop
  如果不是,则最终的Blob大小即为datum_height*datum_width
  */
  Dtype datum_element;
  int top_index, data_index;
  for (int c = 0; c < datum_channels; ++c) {
    for (int h = 0; h < height; ++h) {//行数
      for (int w = 0; w < width; ++w) {//列数
        data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;//获取数据的索引
        //自己定义的索引方式?
        if (do_mirror) {//镜像位置转换?
          top_index = (c * height + h) * width + (width - 1 - w);
        } else {
          top_index = (c * height + h) * width + w;
        }
        if (has_uint8) {
          datum_element =
            static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
        } else {
          datum_element = datum.float_data(data_index);
        }
        if (has_mean_file) {//如果有mean_file,则原来的像素值减去均值(??),然后乘以scale
          transformed_data[top_index] =
            (datum_element - mean[data_index]) * scale;
        } else {
          if (has_mean_values) {
            transformed_data[top_index] =
              (datum_element - mean_values_[c]) * scale;// 否则减去该channel的均值(每个channel有其一个均值),然后乘以scale
          } else {
            transformed_data[top_index] = datum_element * scale;
          }
        }
      }
    }
  }
}

template<typename Dtype> void DataTransformer<Dtype>::Transform(const Datum& datum, Blob<Dtype>* transformed_blob) {
  const int datum_channels = datum.channels();
  const int datum_height = datum.height();
  const int datum_width = datum.width();

  const int channels = transformed_blob->channels();//关于Blob结构的疑问
  const int height = transformed_blob->height();
  const int width = transformed_blob->width();
  const int num = transformed_blob->num();

  CHECK_EQ(channels, datum_channels);
  CHECK_LE(height, datum_height);
  CHECK_LE(width, datum_width);
  CHECK_GE(num, 1);

  const int crop_size = param_.crop_size();

  if (crop_size) {
    CHECK_EQ(crop_size, height);
    CHECK_EQ(crop_size, width);
  } else {
    CHECK_EQ(datum_height, height);
    CHECK_EQ(datum_width, width);
  }

  Dtype* transformed_data = transformed_blob->mutable_cpu_data();

  Transform(datum, transformed_data);//继续变换
}

/*重点
前面有介绍这一部分CHECK内容,glog提供了多个便利的宏来处理特定关系的判定。具体有:
1,判定大小关系
CHECK_EQ, CHECK_NE, CHECK_LE, CHECK_LT, CHECK_GE, CHECK_GT,使用这些宏需要注意类型一致,如果出现类型不一致的,可使用static_cast转换。
2,判定指针是否为空
CHECK_NOTNULL(some_ptr),可用于对象初始化的时候。
3,判定字符串是否相等
CHECK_STREQ, CHECK_STRNE, CHECK_STRCASEEQ,CHECK_STRCASENE。可进行大小写敏感或不敏感字符串来分别判定。
4, 判定浮点是否相等或相近
CHECK_DOUBLE_EQ,CHECK_NEAR。这两个宏都需要指定一个可容忍的偏差上限。
*/
template<typename Dtype> void DataTransformer<Dtype>::Transform_CPM(const Datum& datum, Blob<Dtype>* transformed_data, Blob<Dtype>* transformed_label, int cnt) {
  //std::cout << "Function 2 is used"; std::cout.flush();
  const int datum_channels = datum.channels();
  //const int datum_height = datum.height();
  //const int datum_width = datum.width();

  const int im_channels = transformed_data->channels();//blob中的channnal和datum中的有什么不一样??
  //const int im_height = transformed_data->height();
  //const int im_width = transformed_data->width();
  const int im_num = transformed_data->num();

  //const int lb_channels = transformed_label->channels();
  //const int lb_height = transformed_label->height();
  //const int lb_width = transformed_label->width();
  const int lb_num = transformed_label->num();

  //LOG(INFO) << "image shape: " << transformed_data->num() << " " << transformed_data->channels() << " " 
  //                             << transformed_data->height() << " " << transformed_data->width();
  //LOG(INFO) << "label shape: " << transformed_label->num() << " " << transformed_label->channels() << " " 
  //                             << transformed_label->height() << " " << transformed_label->width();

  CHECK_EQ(im_num, lb_num);
  CHECK_GE(im_num, 1);

  Dtype* transformed_data_pointer = transformed_data->mutable_cpu_data();//???
  Dtype* transformed_label_pointer = transformed_label->mutable_cpu_data();

  CHECK_EQ(datum_channels, 4);
  if(param_.put_gaussian())
    CHECK_EQ(im_channels, 4);
  else
    CHECK_EQ(im_channels, 3);
  //
  return Transform_CPM(datum, transformed_data_pointer, transformed_label_pointer, cnt); //call function 1
}

/*  param_.[xxxxxx],查询如下,from caffe.proto
message TransformationParameter {

  optional float scale = 1 [default = 1];
  optional bool mirror = 2 [default = false];
  optional uint32 crop_size = 3 [default = 0];
  optional string mean_file = 4;
  repeated float mean_value = 5;
  optional uint32 stride = 6 [default = 4];
  optional float scale_cvg = 7 [default = 0.5];
  optional uint32 max_cvg_len = 8 [default = 50];
  optional uint32 min_cvg_len = 9 [default = 50];
  optional bool opaque_coverage = 10 [default = true];
  optional string coverage = 11 [default = "gridbox_max"];
  optional float flip_prob = 12 [default = 0.5];//翻转操作的可能性
  optional float max_rotate_degree = 13 [default = 5.0];
  optional bool visualize = 14 [default = false];
  optional uint32 crop_size_x = 15 [default = 368];
  optional uint32 crop_size_y = 16 [default = 368];
  optional float scale_prob = 17 [default = 0.5];
  optional float scale_min = 18 [default = 0.9];
  optional float scale_max = 19 [default = 1.1];
  optional float bbox_norm_factor = 20 [default = 300];
  optional string img_header = 21 [default = "."];
  // Force the decoded image to have 3 color channels.
  optional bool force_color = 22 [default = false];
  // Force the decoded image to have 1 color channels.
  optional bool force_gray = 23 [default = false];
  optional float target_dist = 24 [default = 1.0];
  optional float center_perterb_max = 25 [default = 10.0];
  optional float sigma = 26 [default = 7.0];
  optional float sigma_center = 27 [default = 21.0];
  optional float clahe_tile_size = 28 [default = 8.0];
  optional float clahe_clip_limit = 29 [default = 4.0];
  optional bool do_clahe = 30 [default = false];
  optional uint32 num_parts = 31 [default = 14];
  optional uint32 num_total_augs = 32 [default = 82];
  optional string aug_way = 33 [default = "rand"];
  optional uint32 gray = 34 [default = 0];
  optional uint32 np_in_lmdb = 35 [default = 16];
  optional bool transform_body_joint = 38 [default = true];
  optional bool put_gaussian = 39 [default = true];
  optional bool visualize_label = 40 [default = false];
}
*/

template<typename Dtype> void DataTransformer<Dtype>::Transform_CPM(const Datum& datum, Dtype* transformed_data, Dtype* transformed_label, int cnt) {
  
  //TODO: some parameter should be set in prototxt
  int clahe_tileSize = param_.clahe_tile_size();//用于直方图均衡化时,所要用到的参数,比如均衡时的宽度和阈值
  int clahe_clipLimit = param_.clahe_clip_limit();
  //float targetDist = 41.0/35.0;
  AugmentSelection as = {
    false,
    0.0,
    Size(),
    0,
  };//数据增强处理的结构体里的成员
  MetaData meta;
  
  const string& data = datum.data();
  const int datum_channels = datum.channels();
  const int datum_height = datum.height();
  const int datum_width = datum.width();

  //const int crop_size = param_.crop_size();
  //const Dtype scale = param_.scale();
  //const bool do_mirror = param_.mirror() && Rand(2);
  //const bool has_mean_file = param_.has_mean_file();
  const bool has_uint8 = data.size() > 0;
  //const bool has_mean_values = mean_values_.size() > 0;
  int crop_x = param_.crop_size_x();//裁剪分块的坐标
  int crop_y = param_.crop_size_y();

  CHECK_GT(datum_channels, 0);
  //CHECK_GE(datum_height, crop_size);
  //CHECK_GE(datum_width, crop_size);

  //before any transformation, get the image from datum 从datum获取图像数据
  Mat img = Mat::zeros(datum_height, datum_width, CV_8UC3);//CV_8UC3??,生成datum_height*datum_width*CV_8UC3全零阵
  int offset = img.rows * img.cols;//全部像素数
  int dindex;
  Dtype d_element;
  for (int i = 0; i < img.rows; ++i) {
    for (int j = 0; j < img.cols; ++j) {
      Vec3b& rgb = img.at<Vec3b>(i, j);//把rgb三个通道里的像素值都取出来
      for(int c = 0; c < 3; c++){//每个通道的(i,j)处理一次,即整体来说是三个通道同时进行计算
        dindex = c*offset + i*img.cols + j;
        if (has_uint8)
          d_element = static_cast<Dtype>(static_cast<uint8_t>(data[dindex]));
        else
          d_element = datum.float_data(dindex);
        rgb[c] = d_element;
      }
    }
  }

  //color, contract
  if(param_.do_clahe())
    clahe(img, clahe_tileSize, clahe_clipLimit);//直方图均衡化,提高图像的对比度
  if(param_.gray() == 1){
    cv::cvtColor(img, img, CV_BGR2GRAY);//有问题??
    cv::cvtColor(img, img, CV_GRAY2BGR);
  }

  int offset3 = 3 * offset;
  int offset1 = datum_width;
  ReadMetaData(meta, data, offset3, offset1);
  if(param_.transform_body_joint()) // we expect to transform body joints, and not to transform hand joints
    TransformMetaJoints(meta);

  //visualize original
  if(param_.visualize()) 
    visualize(img, meta, as);

  //Start transforming
  Mat img_aug = Mat::zeros(crop_y, crop_x, CV_8UC3);
  Mat img_temp, img_temp2, img_temp3; //size determined by scale
  // We only do random transform as augmentation when training.
  if (phase_ == TRAIN) {
    as.scale = augmentation_scale(img, img_temp, meta);//缩放
    //LOG(INFO) << meta.joint_self.joints.size();
    //LOG(INFO) << meta.joint_self.joints[0];
    as.degree = augmentation_rotate(img_temp, img_temp2, meta);//旋转
    //LOG(INFO) << meta.joint_self.joints.size();
    //LOG(INFO) << meta.joint_self.joints[0];
    if(param_.visualize()) 
      visualize(img_temp2, meta, as);
    as.crop = augmentation_croppad(img_temp2, img_temp3, meta);//裁剪
    //LOG(INFO) << meta.joint_self.joints.size();
    //LOG(INFO) << meta.joint_self.joints[0];
    if(param_.visualize()) 
      visualize(img_temp3, meta, as);
    as.flip = augmentation_flip(img_temp3, img_aug, meta);//翻转
    //LOG(INFO) << meta.joint_self.joints.size();
    //LOG(INFO) << meta.joint_self.joints[0];
    if(param_.visualize()) 
      visualize(img_aug, meta, as);
  }
  else {
    img_aug = img.clone();
    as.scale = 1;
    as.crop = Size();
    as.flip = 0;
    as.degree = 0;
  }
  //LOG(INFO) << "scale: " << as.scale << "; crop:(" << as.crop.width << "," << as.crop.height 
  //          << "); flip:" << as.flip << "; degree: " << as.degree;

  //copy transformed img (img_aug) into transformed_data, do the mean-subtraction平均 here
  offset = img_aug.rows * img_aug.cols;
  for (int i = 0; i < img_aug.rows; ++i) {
    for (int j = 0; j < img_aug.cols; ++j) {
      Vec3b& rgb = img_aug.at<Vec3b>(i, j);
      transformed_data[0*offset + i*img_aug.cols + j] = (rgb[0] - 128)/256.0;//第0通道中的(i,j)像素值均化,赋值给transformed_data的每一个像素位置上去
      transformed_data[1*offset + i*img_aug.cols + j] = (rgb[1] - 128)/256.0;
      transformed_data[2*offset + i*img_aug.cols + j] = (rgb[2] - 128)/256.0;
      transformed_data[3*offset + i*img_aug.cols + j] = 0; //zero 4-th channel
    }
  }
  
  putGaussianMaps(transformed_data + 3*offset, meta.objpos, 1, img_aug.cols, img_aug.rows, param_.sigma_center());
  //LOG(INFO) << "image transformation done!";
  generateLabelMap(transformed_label, img_aug, meta); // and visualize
}

template<typename Dtype>
float DataTransformer<Dtype>::augmentation_scale(Mat& img_src, Mat& img_temp, MetaData& meta) {
  float dice = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]之间的随机数
  float scale_multiplier;
  //float scale = (param_.scale_max() - param_.scale_min()) * dice + param_.scale_min(); //linear shear into [scale_min, scale_max]
  if(dice > param_.scale_prob()) {
    //param_.scale_prob()是一个给定的值,如0.8,即有20%的可能性执行下面函数,即clone
    img_temp = img_src.clone();
    scale_multiplier = 1;
  }
  else {//否则就产生随机的scale_multiplier
    float dice2 = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]
    scale_multiplier = (param_.scale_max() - param_.scale_min()) * dice2 + param_.scale_min(); //linear shear into [scale_min, scale_max]
  }
  float scale_abs = param_.target_dist()/meta.scale_self;
  float scale = scale_abs * scale_multiplier;//最后产生随机的缩放比例
  resize(img_src, img_temp, Size(), scale, scale, INTER_CUBIC);
  //modify meta data
  meta.objpos *= scale;//不要忘记将坐标值进行缩放
  for(int i=0; i<np; i++){
    meta.joint_self.joints[i] *= scale;
  }
  for(int p=0; p<meta.numOtherPeople; p++){
    meta.objpos_other[p] *= scale;
    for(int i=0; i<np; i++){
      meta.joint_others[p].joints[i] *= scale;
    }
  }
  return scale_multiplier;
}

template<typename Dtype>
bool DataTransformer<Dtype>::onPlane(Point p, Size img_size) {
  if(p.x < 0 || p.y < 0) return false;
  if(p.x >= img_size.width || p.y >= img_size.height) return false;
  return true;
}

template<typename Dtype>
Size DataTransformer<Dtype>::augmentation_croppad(Mat& img_src, Mat& img_dst, MetaData& meta) {
  float dice_x = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]的随机数
  float dice_y = static_cast <float> (rand()) / static_cast <float> (RAND_MAX); //[0,1]
  int crop_x = param_.crop_size_x();//裁剪方框的长度
  int crop_y = param_.crop_size_y();//裁剪方框的高度
//在产生一个随机的点
  float x_offset = int((dice_x - 0.5) * 2 * param_.center_perterb_max());//[-10,10]之间的随机数
  float y_offset = int((dice_y - 0.5) * 2 * param_.center_perterb_max());//

  //LOG(INFO) << "Size of img_temp is " << img_temp.cols << " " << img_temp.rows;
  //LOG(INFO) << "ROI is " << x_offset << " " << y_offset << " " << min(800, img_temp.cols) << " " << min(256, img_temp.rows);
  Point2i center = meta.objpos + Point2f(x_offset, y_offset);
  int offset_left = -(center.x - (crop_x/2));//偏移量
  int offset_up = -(center.y - (crop_y/2));
  // int to_pad_right = max(center.x + (crop_x - crop_x/2) - img_src.cols, 0);
  // int to_pad_down = max(center.y + (crop_y - crop_y/2) - img_src.rows, 0);
  
  img_dst = Mat::zeros(crop_y, crop_x, CV_8UC3) + Scalar(128,128,128);
  for(int i=0;i<crop_y;i++){
    for(int j=0;j<crop_x;j++){ //i,j on cropped
      int coord_x_on_img = center.x - crop_x/2 + j;//在原图上,裁剪方框中的像素坐标点值,根据随机中心点的位置计算
      int coord_y_on_img = center.y - crop_y/2 + i;
      if(onPlane(Point(coord_x_on_img, coord_y_on_img), Size(img_src.cols, img_src.rows))){// ( >0),(<img_size)
        img_dst.at<Vec3b>(i,j) = img_src.at<Vec3b>(coord_y_on_img, coord_x_on_img);
        //每计算出一个坐标点,进行一次像素值的copy,从img_src到img_dst(从0,0开始)
      }
    }
  }

  //modify meta data
  //objpos坐标值、其它坐标值改变(加偏移量)
  Point2f offset(offset_left, offset_up);
  meta.objpos += offset;
  for(int i=0; i<np; i++){
    meta.joint_self.joints[i] += offset;
  }
  for(int p=0; p<meta.numOtherPeople; p++){
    meta.objpos_other[p] += offset;
    for(int i=0; i<np; i++){
      meta.joint_others[p].joints[i] += offset;
    }
  }

  return Size(x_offset, y_offset);
}
/*
 关节序列翻转
*/
template<typename Dtype>
void DataTransformer<Dtype>::swapLeftRight(Joints& j) {
  
  //MPII R leg: 0(ankle), 1(knee), 2(hip)
  //     L leg: 5(ankle), 4(knee), 3(hip)
  //     R arms: 10(wrist), 11(elbow), 12(shoulder)
  //     L arms: 15(wrist), 14(elbow), 13(shoulder)
  if(np == 9){
    int right[4] = {1,2,3,7};
    int left[4] = {4,5,6,8};
    for(int i=0; i<4; i++){
      int ri = right[i] - 1;//matlab从1开始的,c++从0开始
      int li = left[i] - 1;
      Point2f temp = j.joints[ri];
      j.joints[ri] = j.joints[li];
      j.joints[li] = temp;
      int temp_v = j.isVisible[ri];
      j.isVisible[ri] = j.isVisible[li];
      j.isVisible[li] = temp_v;
    }
  }
  else if(np == 14){
    int right[6] = {3,4,5,9,10,11}; //1-index
    int left[6] = {6,7,8,12,13,14}; //1-index
    for(int i=0; i<6; i++){
      int ri = right[i] - 1;
      int li = left[i] - 1;
      Point2f temp = j.joints[ri];
      j.joints[ri] = j.joints[li];
      j.joints[li] = temp;
      int temp_v = j.isVisible[ri];
      j.isVisible[ri] = j.isVisible[li];
      j.isVisible[li] = temp_v;
    }
  }
  else if(np == 28){
    int right[11] = {3,4,5,9,10,11,18,19,20,24,25}; //1-index
    int left[11] = {6,7,8,12,13,14,21,22,23,26,27}; //1-index
    for(int i=0; i<11; i++){
      int ri = right[i] - 1;
      int li = left[i] - 1;
      Point2f temp = j.joints[ri];
      j.joints[ri] = j.joints[li];
      j.joints[li] = temp;
      int temp_v = j.isVisible[ri];
      j.isVisible[ri] = j.isVisible[li];
      j.isVisible[li] = temp_v;
    }
  }
}

template<typename Dtype>
bool DataTransformer<Dtype>::augmentation_flip(Mat& img_src, Mat& img_aug, MetaData& meta) {
  bool doflip;
  if(param_.aug_way() == "rand"){
    float dice = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);//[0,1]
    doflip = (dice <= param_.flip_prob());//50%可能性使得doflip为true
  }
  else if(param_.aug_way() == "table"){
    doflip = (aug_flips[meta.write_number][meta.epoch % param_.num_total_augs()] == 1);//aug_flips是一个数组,【行:第几张图】【列:总共变换的次数】
  }//???
  else {
    doflip = 0;
    LOG(INFO) << "Unhandled exception!!!!!!";
  }

  if(doflip){
    flip(img_src, img_aug, 1);//像素翻转
    int w = img_src.cols;//w:列数

    meta.objpos.x = w - 1 - meta.objpos.x;//把人物中心坐标,翻转到另一边,但是为何-1??
    for(int i=0; i<np; i++){//np是关节数
      meta.joint_self.joints[i].x = w - 1 - meta.joint_self.joints[i].x;
    }//关节的坐标翻转
    if(param_.transform_body_joint())
      swapLeftRight(meta.joint_self);//关节的序列翻转

    for(int p=0; p<meta.numOtherPeople; p++){
      meta.objpos_other[p].x = w - 1 - meta.objpos_other[p].x;
      for(int i=0; i<np; i++){
        meta.joint_others[p].joints[i].x = w - 1 - meta.joint_others[p].joints[i].x;
      }
      if(param_.transform_body_joint())
        swapLeftRight(meta.joint_others[p]);
    }
  }
  else {
    img_aug = img_src.clone();
  }
  return doflip;
}

template<typename Dtype>
void DataTransformer<Dtype>::RotatePoint(Point2f& p, Mat R){
  Mat point(3,1,CV_64FC1);//3行1列
  point.at<double>(0,0) = p.x;//(行,列)
  point.at<double>(1,0) = p.y;
  point.at<double>(2,0) = 1;
  Mat new_point = R * point;//变换矩阵与列向量相乘,列向量直接就做坐标变换了
  p.x = new_point.at<double>(0,0);
  p.y = new_point.at<double>(1,0);
}

template<typename Dtype>
float DataTransformer<Dtype>::augmentation_rotate(Mat& img_src, Mat& img_dst, MetaData& meta) {
  
  float degree;
  if(param_.aug_way() == "rand"){
    float dice = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);//[0,1]
    degree = (dice - 0.5) * 2 * param_.max_rotate_degree();//[-5,5]
  }
  else if(param_.aug_way() == "table"){
    degree = aug_degs[meta.write_number][meta.epoch % param_.num_total_augs()];
  }
  else {
    degree = 0;
    LOG(INFO) << "Unhandled exception!!!!!!";
  }
  
  Point2f center(img_src.cols/2.0, img_src.rows/2.0);
  Mat R = getRotationMatrix2D(center, degree, 1.0);
  /*(旋转中心点,旋转角度,图像缩放因子)获得变换矩阵,用于下面的仿射变换
    即先将轴心(x,y)移到原点,然后做旋转变换,最后将图片的左上角置为图片的原点,即
  */
  Rect bbox = RotatedRect(center, img_src.size(), degree).boundingRect();//??框起来?
  // adjust transformation matrix
  R.at<double>(0,2) += bbox.width/2.0 - center.x;//?
  R.at<double>(1,2) += bbox.height/2.0 - center.y;
  //LOG(INFO) << "R=[" << R.at<double>(0,0) << " " << R.at<double>(0,1) << " " << R.at<double>(0,2) << ";" 
  //          << R.at<double>(1,0) << " " << R.at<double>(1,1) << " " << R.at<double>(1,2) << "]";
  warpAffine(img_src, img_dst, R, bbox.size(), INTER_CUBIC, BORDER_CONSTANT, Scalar(128,128,128));//对图片的像素仿射变换,旋转角度
/*warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags=INTER_LINEAR, int borderMode=BORDER_CONSTANT, const Scalar& borderValue=Scalar())
  Parameters:
  src – input image.
  dst – output image that has the size dsize and the same type as src .
  M –  transformation matrix,最重要的东东了,即上面得到的R
  dsize – size of the output image.
  flags – combination of interpolation methods
  borderMode – pixel extrapolation method (see borderInterpolate()); when borderMode=BORDER_TRANSPARENT , it means that the pixels in the destination image corresponding to the “outliers” in the source image are not modified by the function.
  borderValue – value used in case of a constant border; by default, it is 0.
变换矩阵模式:
|     |
|     |
|     |

*/
  //adjust meta data
  RotatePoint(meta.objpos, R);//直接对坐标值进行变换
  for(int i=0; i<np; i++){
    RotatePoint(meta.joint_self.joints[i], R);
  }
  for(int p=0; p<meta.numOtherPeople; p++){
    RotatePoint(meta.objpos_other[p], R);
    for(int i=0; i<np; i++){
      RotatePoint(meta.joint_others[p].joints[i], R);
    }
  }
  return degree;
}

template<typename Dtype>
void DataTransformer<Dtype>::putGaussianMaps(Dtype* entry, Point2f center, int stride, int grid_x, int grid_y, float sigma){
  //参数实际对应(transformed_data + 3*offset, meta.objpos, 1, img_aug.cols, img_aug.rows, param_.sigma_center())
  //LOG(INFO) << "putGaussianMaps here we start for " << center.x << " " << center.y;
  float start = stride/2.0 - 0.5; //0 if stride = 1, 0.5 if stride = 2, 1.5 if stride = 4, ...
  for (int g_y = 0; g_y < grid_y; g_y++){
    for (int g_x = 0; g_x < grid_x; g_x++){
      float x = start + g_x * stride;
      float y = start + g_y * stride;
      float d2 = (x-center.x)*(x-center.x) + (y-center.y)*(y-center.y);//(x,)到中心点的位置的平方
      float exponent = d2 / 2.0 / sigma / sigma;// d2/(2*sigma^2),???
      if(exponent > 4.6052){ //ln(100) = -ln(1%)
        continue;
      }
      //entry[][]是第四维的?全为0?
      entry[g_y*grid_x + g_x] += exp(-exponent);//高斯分布?离得中心点越近,该(x,y)概率越大,xy*x?
      if(entry[g_y*grid_x + g_x] > 1) 
        entry[g_y*grid_x + g_x] = 1;//概率不能大于1
    }
  }
}


下面是对这段代码的注释: ``` data = ["This two-wheeler is really good on slippery roads"] # 定义训练数据 sentce=["This is really good"] # 定义输入句子 from sklearn.feature_extraction.text import CountVectorizer # 导入文本特征提取库 from sklearn.feature_extraction.text import TfidfTransformer # 导入TF-IDF转换库 from sklearn.metrics.pairwise import cosine_similarity # 导入余弦相似度计算库 vectorizer = CountVectorizer() # 创建词袋模型 X_train_termcounts = vectorizer.fit_transform(data) # 对训练数据进行特征提取和向量化 tfidf_transformer = TfidfTransformer() # 创建TF-IDF转换器 X_train_tfidf = tfidf_transformer.fit_transform(X_train_termcounts) # 对训练数据进行TF-IDF转换 print ("\nTfidf of training data:", X_train_tfidf.toarray()) # 打印训练数据的TF-IDF矩阵 X_input_termcounts = vectorizer.transform(sentce) # 对输入句子进行特征提取和向量化 X_input_tfidf = tfidf_transformer.transform(X_input_termcounts) # 对输入句子进行TF-IDF转换 print ("\nTfidf of input data:", X_input_tfidf.toarray()) # 打印输入句子的TF-IDF矩阵 print("\nCosine of data:",cosine_similarity(X_train_tfidf,X_input_tfidf)) # 计算训练数据和输入句子的余弦相似度 ``` 这段代码通过使用CountVectorizer和TfidfTransformer库对文本数据进行特征提取和向量化,并使用cosine_similarity计算训练数据和输入句子的余弦相似度。输出结果是训练数据和输入句子的TF-IDF矩阵以及它们之间的余弦相似度。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值