修改过Boundingbox的CelebA数据集正在训练中,因为之后还要扩充训练集,加入我所需要的场景图片,因此需要将自己的人脸图片集制作成YOLO训练所需的格式。
之前已经提到,YOLO训练所需的数据其实分为三部分:
第一:图片;
第二:所有图片的绝对路径,以txt形式存储;
第三:每张图片中所有boundingbox的类别位置大小信息,同样以txt形式存储,只不过是每张图片对应一个txt文档。
现在所需要制作的其实就是第二和第三部分。
第三部分的Boundingbox计算方式使用上一篇博文的计算方式,详见上一篇。
以下是代码:
gainFoldname.h
#ifndef GAINFOLDNAME
#define GAINFOLDNAME
#define MAX_FOLDNAME 512
#include <unistd.h>
#include <string>
std::string gainFoldname();
#endif
gainFoldname.cpp
#include "gainFoldname.h"
std::string gainFoldname()
{
char* foldname;
foldname = getcwd(NULL, 0);
return foldname;
}
readFilesNames.h
#ifndef READFILESNAMES
#define READFILESNAMES
#include <dirent.h>
#include <opencv2/opencv.hpp>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <unistd.h>
bool readFilesNames(const std::string& foldname);
#endif
readFilesNames.cpp
#include "readFilesNames.h"
#include <iostream>
using namespace std;
extern vector<string> picnames;
extern vector<string> picaddress;
bool readFilesNames(const std::string& basePath)
{
DIR *dir;
struct dirent *ptr;
std::string base;
int namelen = 0;
if ((dir = opendir(basePath.c_str())) == NULL)
{
perror("Open dir error...");
return false;
exit(1);
}
while ((ptr = readdir(dir)) != NULL)
{
if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0) ///current dir OR parrent dir
continue;
else if(ptr->d_type == 8){namelen = strlen(ptr->d_name);if(ptr->d_name[namelen-4] == '.' && ( ptr->d_name[namelen-3] == 'j' || ptr->d_name[namelen-3] == 'J' ) && ( ptr->d_name[namelen-2] == 'p' || ptr->d_name[namelen-2] == 'P' ) && ( ptr->d_name[namelen-1] == 'g' || ptr->d_name[namelen-1] == 'G' )) ///jpgfile
{picnames.push_back(ptr->d_name);picaddress.push_back(basePath + "/" + ptr->d_name);cout << basePath + "/" + ptr->d_name<<endl;}}
else if(ptr->d_type == 4) //dir
{
base.clear();
base = basePath;
base = base + "/";
base = base + ptr->d_name;
readFilesNames(base);
}
}
closedir(dir);
return true;
}
onMouseParam.h
#include <opencv2/opencv.hpp>
class onMouseParam
{
public:
cv::Mat image;
std::vector<cv::Point> leye;
std::vector<cv::Point> reye;
//std::vector<cv::Point> nose;
std::vector<cv::Point> lmouth;
std::vector<cv::Point> rmouth;
int count;
};
main.cpp
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <string>
#include <stdio.h>
#include <sys/stat.h>
#include "gainFoldname.h"
#include "readFilesNames.h"
#include "onMouseParam.h"
using namespace cv;
using namespace std;
const string ImgProcessWin = "drawYOLOlandmarks";
int key_for_switch;
vector<string> picnames;
vector<string> picaddress;
static void onMouse(int,int,int,int,void*);
int main(int argc,char* argv[])
{
string foldname;
Mat copyImage;
double normalization_x;
double normalization_y;
double faceR = 0;
double eyesdis = 0;
double mouthdis = 0;
double leyelmouth = 0;
double reyermouth = 0;
double maxdis;
double mindis;
double factor;
double bboxx = 0;//label
double bboxy = 0;//label
double bboxw = 0;//label
double bboxh = 0;//label
Point facec;
onMouseParam dataImage;
bool notargetFile = false;
cout << "this program aims to do DataPreprocess for YOLO" << endl;
if(argc == 1)
foldname = gainFoldname();
if(argc == 2)
foldname = argv[1];
if(argc > 2)
{;cout << "argc = " << argc << "\nusage:./makeYOLOlandmarks [dir]" <<endl;return 0;}
if('/' == foldname.back())foldname.pop_back();
if (!readFilesNames(foldname)){cout << "there is no such dir." << endl;return 0;}
string txtname = foldname + "/" + "train.txt";
string labelname;
ofstream traintxt(txtname);
namedWindow(ImgProcessWin,WINDOW_AUTOSIZE);
setMouseCallback(ImgProcessWin, onMouse, (void*)&dataImage);
key_for_switch = 0;
for (auto iter = picnames.begin(),picdiriter = picaddress.begin(); iter != picnames.end(); ++iter, ++picdiriter)
{
dataImage.count = 1;
dataImage.image = imread(*picdiriter);
if(dataImage.image.rows > 1000)resize(dataImage.image, dataImage.image, Size(round(1000 * ((double)dataImage.image.cols / (double)dataImage.image.rows)), 1000), 0, 0);
if(dataImage.image.cols > 1500)resize(dataImage.image, dataImage.image, Size(1500, round(1500 * ((double)dataImage.image.rows / (double)dataImage.image.cols))), 0, 0);
copyImage = ((dataImage).image).clone();
normalization_x = 1.0 / (double)copyImage.cols;
normalization_y = 1.0 / (double)copyImage.rows;
//cal label.txt's name
labelname = (*iter);
labelname.pop_back();
labelname.pop_back();
labelname.pop_back();
labelname.pop_back();
if(0 == iter - picnames.begin())mkdir((foldname + "/label").c_str(),S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
labelname = foldname + "/label/" + labelname + ".txt";
imshow(ImgProcessWin, (dataImage).image);
waitKey(0);
ofstream label(labelname);
label.precision(12);
for (int a = 0; a < ((dataImage).rmouth).size(); ++a)
{
bboxx = (double)(dataImage.leye[a].x + dataImage.reye[a].x + dataImage.lmouth[a].x + dataImage.rmouth[a].x) / 4.0 / (double)dataImage.image.cols;//label
bboxy = (double)(dataImage.leye[a].y + dataImage.reye[a].y + dataImage.lmouth[a].y + dataImage.rmouth[a].y) / 4.0 / (double)dataImage.image.rows;//label
//calculate distence
eyesdis = (double)((dataImage.reye[a].x - dataImage.leye[a].x) * (dataImage.reye[a].x-dataImage.leye[a].x) + (dataImage.reye[a].y-dataImage.leye[a].y) * (dataImage.reye[a].y-dataImage.leye[a].y));
eyesdis = sqrt(eyesdis);
mouthdis = (double)((dataImage.rmouth[a].x - dataImage.lmouth[a].x) * (dataImage.rmouth[a].x-dataImage.lmouth[a].x) + (dataImage.rmouth[a].y-dataImage.lmouth[a].y) * (dataImage.rmouth[a].y-dataImage.lmouth[a].y));
mouthdis = sqrt(mouthdis);
leyelmouth = (double)((dataImage.lmouth[a].x - dataImage.leye[a].x) * (dataImage.lmouth[a].x-dataImage.leye[a].x) + (dataImage.lmouth[a].y-dataImage.leye[a].y) * (dataImage.lmouth[a].y-dataImage.leye[a].y));
leyelmouth = sqrt(leyelmouth);
reyermouth = (double)((dataImage.rmouth[a].x - dataImage.reye[a].x) * (dataImage.rmouth[a].x-dataImage.reye[a].x) + (dataImage.rmouth[a].y-dataImage.reye[a].y) * (dataImage.rmouth[a].y-dataImage.reye[a].y));
reyermouth = sqrt(reyermouth);
maxdis = eyesdis;
maxdis > mouthdis ? : maxdis = mouthdis;
maxdis > leyelmouth ? : maxdis = leyelmouth;
maxdis > reyermouth ? : maxdis = reyermouth;
faceR = maxdis * 2.0;
faceR = faceR * 1.4;
bboxw = faceR / (double)dataImage.image.cols;//label
bboxh = faceR / (double)dataImage.image.rows;//label
facec.x = bboxx * dataImage.image.cols;
facec.y = bboxy * dataImage.image.rows;
rectangle(dataImage.image,Point(facec.x - faceR / 2,facec.y - faceR / 2),Point(facec.x + faceR / 2,facec.y + faceR / 2),Scalar(0,0,255),1);
label << 0 << " " << bboxx << " " << bboxy << " " << bboxw << " " << bboxh << endl;
}
label.close();
cout << "press r to redraw this pic\npress s for saving the rectangle points\npress esc fot exit." << endl;
imshow(ImgProcessWin,dataImage.image);
key_for_switch = waitKey(0);key_for_switch = key_for_switch - 1048576;
if (27 == key_for_switch){remove(labelname.c_str());break;}
bool redraw = false;
switch (key_for_switch)
{
case 's':
break;
case 'S':
break;
case 'l':
remove((*picdiriter).c_str());
remove(labelname.c_str());
redraw = true;
break;
case 'L':
remove((*picdiriter).c_str());
remove(labelname.c_str());
redraw = true;
break;
default:
remove(labelname.c_str());
(dataImage).image = copyImage.clone();
--iter; --picdiriter;
dataImage.count = 1;
redraw = true;
dataImage.leye.clear();
dataImage.reye.clear();
//dataImage.nose.clear();
dataImage.lmouth.clear();
dataImage.rmouth.clear();
break;
}
dataImage.leye.clear();
dataImage.reye.clear();
//dataImage.nose.clear();
dataImage.lmouth.clear();
dataImage.rmouth.clear();
if(redraw)continue;
traintxt << *picdiriter << endl;
}
traintxt.close();
cout << "all of your images are processed,congratulation!" << endl;
return 0;
}
static void onMouse(int event, int x, int y, int flag, void* param)
{
static bool lbuttondown = false;
static Point curpoint,firpoint;
static Mat temp;
switch (event)
{
case EVENT_LBUTTONDOWN:
if(!lbuttondown)
{
firpoint = Point(x, y);
switch ((*((onMouseParam*)param)).count)
{
case 1:
(*((onMouseParam*)param)).leye.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
case 2:
(*((onMouseParam*)param)).reye.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
case 3:
(*((onMouseParam*)param)).count++;//(*((onMouseParam*)param)).nose.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;//this is for drawing nose landmark
case 4:
(*((onMouseParam*)param)).lmouth.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
case 5:
(*((onMouseParam*)param)).rmouth.push_back(firpoint);(*((onMouseParam*)param)).count = 1;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
}
imshow(ImgProcessWin, (*((onMouseParam*)param)).image);
lbuttondown = true;
}
break;
case EVENT_LBUTTONUP:
lbuttondown = false;
imshow(ImgProcessWin, (*((onMouseParam*)param)).image);
break;
case EVENT_MOUSEMOVE:
imshow(ImgProcessWin, (*((onMouseParam*)param)).image);
break;
}
}
Makefile
objects=main.o gainFoldname.o readFilesNames.o
OBJDIR=./obj/
makeYOLOlandmarks:$(objects)
g++ $(objects) -o makeYOLOlandmarks `pkg-config --cflags --libs opencv`
main.o:onMouseParam.h gainFoldname.h readFilesNames.h
gainFoldname.o:gainFoldname.h
readFileNames.o:readFilesNames.h
.PHONY: clean
clean:
-rm makeYOLOlandmarks $(objects)
以上是所有代码,需要使用g++支持c++11标准的版本编译。注释就不要在意啦……调试什么的时候用的……我代码写得烂。
使用方法么就是:
./makeYOLOlandmarks
这会搜索当前目录及其子目录中所有jpg图片来处理。label文件夹和train.txt生成在当前目录下。./makeYOLOlandmarks dir
这会搜索dir及其子目录下的所有jpg图片来处理。label文件夹和train.txt生成在dir目录下。图片显示出后,依序用鼠标点击人脸的左眼,右眼,左嘴角,右嘴角,多个人脸的情况下也是依序标完,当前图片标注完成后,按任意键显示计算出的boundingbox,之后,发现标错可按R来重标当前图片,觉得图片不合适可以按L删除当前图片,标注符合要求则按S键保存该图片的路径和boundingbox信息。
差不多就这样。
代码很烂,如果看不下去,可以重写……
[17.9.24updated]:
由于之前的疏忽,之前的算法将label的.txt文件中的boundingbox长宽输出为相等的值,忘记了yolo中boundingbox的标记数据都是归一化的,那么长宽相等的归一化值在图像复原之后,boundingbox就不再是正方形了,因为原图的长宽并不相等……可怜我还丢进去训练了那么久……怪不得我每次测试看到预测的boundingbox怎么都不是正方形……都是白玩儿了……现在更新了代码,将boundingbox的长宽归一化值分别计算出来,再写入label的.txt文件中。
还有一点就是关于侧脸人脸框的大小问题,按照原先的算法,取双眼双嘴角特征点求两两距离的最大值,然后求圆的内接正方形,这样的方式会造成同样距离的人脸正脸的boundingbox比侧脸的boundingbox普遍要大一些,因为正脸的对角线总是要长一些,而侧脸的对角线和边长是相等的。经过改正,如今只在四边形边长中求最大值,虽然这样也会有不平衡的地方,比如45度旋转的侧脸四个边长都会偏小……不过在我的实际应用中这样的情况毕竟极少……所以就先忽略吧~
嗯,又丢进去训练了……