YOLOv2之将自己的人脸图片集制作成YOLO格式 [by zhangzexuan][17.9.24updated]

最新推荐文章于 2025-06-24 13:47:25 发布

原创最新推荐文章于 2025-06-24 13:47:25 发布 · 915 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#YOLOv2 #darknet #人脸检测 #linux #c++

deeplearning 专栏收录该内容

9 篇文章

订阅专栏

本文介绍了一种用于人脸检测的数据预处理方法，包括如何通过手动标注左眼、右眼及嘴角位置来确定人脸边界框，并生成YOLO所需的数据集格式。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

修改过Boundingbox的CelebA数据集正在训练中，因为之后还要扩充训练集，加入我所需要的场景图片，因此需要将自己的人脸图片集制作成YOLO训练所需的格式。

之前已经提到，YOLO训练所需的数据其实分为三部分：

第一：图片；

第二：所有图片的绝对路径，以txt形式存储；

第三：每张图片中所有boundingbox的类别位置大小信息，同样以txt形式存储，只不过是每张图片对应一个txt文档。

现在所需要制作的其实就是第二和第三部分。

第三部分的Boundingbox计算方式使用上一篇博文的计算方式，详见上一篇。

以下是代码：

gainFoldname.h

#ifndef GAINFOLDNAME
#define GAINFOLDNAME
#define MAX_FOLDNAME 512
#include <unistd.h>
#include <string>
std::string gainFoldname();
#endif

gainFoldname.cpp

#include "gainFoldname.h"
std::string gainFoldname()
{
	char* foldname;
	foldname = getcwd(NULL, 0);
	return foldname;
}

readFilesNames.h

#ifndef READFILESNAMES
#define READFILESNAMES

#include <dirent.h>
#include <opencv2/opencv.hpp>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <unistd.h>

bool readFilesNames(const std::string& foldname);
#endif

readFilesNames.cpp

#include "readFilesNames.h"
#include <iostream>
using namespace std;


extern vector<string> picnames;
extern vector<string> picaddress;


bool readFilesNames(const std::string& basePath)
{
    DIR *dir;
    struct dirent *ptr;
    std::string base;
    int namelen = 0;


    if ((dir = opendir(basePath.c_str())) == NULL)
    {
        perror("Open dir error...");
        return false;
        exit(1);
    }


    while ((ptr = readdir(dir)) != NULL)
    {
        if(strcmp(ptr->d_name,".")==0 || strcmp(ptr->d_name,"..")==0)    ///current dir OR parrent dir
            continue;
        else if(ptr->d_type == 8){namelen = strlen(ptr->d_name);if(ptr->d_name[namelen-4] == '.' && ( ptr->d_name[namelen-3] == 'j' || ptr->d_name[namelen-3] == 'J' ) && ( ptr->d_name[namelen-2] == 'p' || ptr->d_name[namelen-2] == 'P' ) && ( ptr->d_name[namelen-1] == 'g' || ptr->d_name[namelen-1] == 'G' ))    ///jpgfile
            {picnames.push_back(ptr->d_name);picaddress.push_back(basePath + "/" + ptr->d_name);cout << basePath + "/" + ptr->d_name<<endl;}}
        else if(ptr->d_type == 4)    //dir
        {
            base.clear();
            base = basePath;
            base = base + "/";
            base = base + ptr->d_name;
            readFilesNames(base);
        }
    }
    closedir(dir);
    return true;
}

onMouseParam.h

#include <opencv2/opencv.hpp>

class onMouseParam
{
public:
	cv::Mat image;
	std::vector<cv::Point> leye;
	std::vector<cv::Point> reye;
	//std::vector<cv::Point> nose;
	std::vector<cv::Point> lmouth;
	std::vector<cv::Point> rmouth;
        int count;
};

main.cpp

#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <string>
#include <stdio.h>
#include <sys/stat.h>
#include "gainFoldname.h"
#include "readFilesNames.h"
#include "onMouseParam.h"

using namespace cv;
using namespace std;

const string ImgProcessWin = "drawYOLOlandmarks";
int key_for_switch;
vector<string> picnames;
vector<string> picaddress;

static void onMouse(int,int,int,int,void*);

int main(int argc,char* argv[])
{
	string foldname;
	Mat copyImage;
	double normalization_x;
	double normalization_y;
	double faceR = 0;
    	double eyesdis = 0;
    	double mouthdis = 0;
	double leyelmouth = 0;
    	double reyermouth = 0;
	double maxdis;
	double mindis;
	double factor;
    	double bboxx = 0;//label
    	double bboxy = 0;//label
    	double bboxw = 0;//label
    	double bboxh = 0;//label
	Point facec;
	onMouseParam dataImage;
	bool notargetFile = false;
	cout << "this program aims to do DataPreprocess for YOLO" << endl;
     if(argc == 1)
	foldname = gainFoldname();
     if(argc == 2)
     foldname = argv[1];
     if(argc > 2)
     {;cout << "argc = " << argc << "\nusage:./makeYOLOlandmarks [dir]" <<endl;return 0;}
	if('/' == foldname.back())foldname.pop_back();
	if (!readFilesNames(foldname)){cout << "there is no such dir." << endl;return 0;}
	string txtname = foldname + "/" + "train.txt";
	string labelname;
	ofstream traintxt(txtname);
	namedWindow(ImgProcessWin,WINDOW_AUTOSIZE);
	setMouseCallback(ImgProcessWin, onMouse, (void*)&dataImage);
	key_for_switch = 0;
	for (auto iter = picnames.begin(),picdiriter = picaddress.begin(); iter != picnames.end(); ++iter, ++picdiriter)
	{
		dataImage.count = 1;
		dataImage.image = imread(*picdiriter);
		if(dataImage.image.rows > 1000)resize(dataImage.image, dataImage.image, Size(round(1000 * ((double)dataImage.image.cols / (double)dataImage.image.rows)), 1000), 0, 0);
		if(dataImage.image.cols > 1500)resize(dataImage.image, dataImage.image, Size(1500, round(1500 * ((double)dataImage.image.rows / (double)dataImage.image.cols))), 0, 0);
		copyImage = ((dataImage).image).clone();
		normalization_x = 1.0 / (double)copyImage.cols;
		normalization_y = 1.0 / (double)copyImage.rows;
		//cal label.txt's name
		labelname = (*iter);
		labelname.pop_back();
		labelname.pop_back();
		labelname.pop_back();
		labelname.pop_back();
		if(0 == iter - picnames.begin())mkdir((foldname + "/label").c_str(),S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
		labelname = foldname + "/label/" + labelname + ".txt";


		imshow(ImgProcessWin, (dataImage).image);
		waitKey(0);


		ofstream label(labelname);
		label.precision(12);
		for (int a = 0; a < ((dataImage).rmouth).size(); ++a)
		{
			bboxx = (double)(dataImage.leye[a].x + dataImage.reye[a].x + dataImage.lmouth[a].x + dataImage.rmouth[a].x) / 4.0 / (double)dataImage.image.cols;//label
            	bboxy = (double)(dataImage.leye[a].y + dataImage.reye[a].y + dataImage.lmouth[a].y + dataImage.rmouth[a].y) / 4.0 / (double)dataImage.image.rows;//label

            	//calculate distence
            	eyesdis = (double)((dataImage.reye[a].x - dataImage.leye[a].x) * (dataImage.reye[a].x-dataImage.leye[a].x) + (dataImage.reye[a].y-dataImage.leye[a].y) * (dataImage.reye[a].y-dataImage.leye[a].y));
            	eyesdis = sqrt(eyesdis);
            	mouthdis = (double)((dataImage.rmouth[a].x - dataImage.lmouth[a].x) * (dataImage.rmouth[a].x-dataImage.lmouth[a].x) + (dataImage.rmouth[a].y-dataImage.lmouth[a].y) * (dataImage.rmouth[a].y-dataImage.lmouth[a].y));
          		mouthdis = sqrt(mouthdis);
          		leyelmouth = (double)((dataImage.lmouth[a].x - dataImage.leye[a].x) * (dataImage.lmouth[a].x-dataImage.leye[a].x) + (dataImage.lmouth[a].y-dataImage.leye[a].y) * (dataImage.lmouth[a].y-dataImage.leye[a].y));
          		leyelmouth = sqrt(leyelmouth);
          		reyermouth = (double)((dataImage.rmouth[a].x - dataImage.reye[a].x) * (dataImage.rmouth[a].x-dataImage.reye[a].x) + (dataImage.rmouth[a].y-dataImage.reye[a].y) * (dataImage.rmouth[a].y-dataImage.reye[a].y));
          		reyermouth = sqrt(reyermouth);
			maxdis = eyesdis;
            	maxdis > mouthdis ? : maxdis = mouthdis;
            	maxdis > leyelmouth ? : maxdis = leyelmouth;
            	maxdis > reyermouth ? : maxdis = reyermouth;
            	faceR = maxdis * 2.0;
			faceR = faceR * 1.4;
            	bboxw = faceR / (double)dataImage.image.cols;//label
            	bboxh = faceR / (double)dataImage.image.rows;//label
			facec.x = bboxx * dataImage.image.cols;
			facec.y = bboxy * dataImage.image.rows;
			rectangle(dataImage.image,Point(facec.x - faceR / 2,facec.y - faceR / 2),Point(facec.x + faceR / 2,facec.y + faceR / 2),Scalar(0,0,255),1);
			label << 0 << " " << bboxx << " " << bboxy << " " << bboxw << " " << bboxh << endl;
		}
		label.close();
		cout << "press r to redraw this pic\npress s for saving the rectangle points\npress esc fot exit." << endl;
		imshow(ImgProcessWin,dataImage.image);
		key_for_switch = waitKey(0);key_for_switch = key_for_switch - 1048576;


		if (27 == key_for_switch){remove(labelname.c_str());break;}
		bool redraw = false;
		switch (key_for_switch)
		{
		case 's':
			break;
		case 'S':
			break;
		case 'l':
			remove((*picdiriter).c_str());
			remove(labelname.c_str());
			redraw = true;
			break;
		case 'L':
			remove((*picdiriter).c_str());
			remove(labelname.c_str());
			redraw = true;
			break;
		default:
			remove(labelname.c_str());
			(dataImage).image = copyImage.clone();
			--iter; --picdiriter;
                        dataImage.count = 1;
			redraw = true;
		        dataImage.leye.clear();
			dataImage.reye.clear();
			//dataImage.nose.clear();
			dataImage.lmouth.clear();
			dataImage.rmouth.clear();
			break;
		}


		dataImage.leye.clear();
		dataImage.reye.clear();
		//dataImage.nose.clear();
		dataImage.lmouth.clear();
		dataImage.rmouth.clear();
		if(redraw)continue;
		traintxt << *picdiriter << endl;
	}
	traintxt.close();
	cout << "all of your images are processed,congratulation!" << endl;
    return 0;
}


static void onMouse(int event, int x, int y, int flag, void* param)
{
	static bool lbuttondown = false;
	static Point curpoint,firpoint;
	static Mat temp;
	switch (event)
	{
	case EVENT_LBUTTONDOWN:
		if(!lbuttondown)
		{
			firpoint = Point(x, y);
                switch ((*((onMouseParam*)param)).count)
                    {
                    case 1:
                            (*((onMouseParam*)param)).leye.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
                    case 2:
                            (*((onMouseParam*)param)).reye.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
                    case 3:
                            (*((onMouseParam*)param)).count++;//(*((onMouseParam*)param)).nose.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;//this is for drawing nose landmark
                    case 4:
                            (*((onMouseParam*)param)).lmouth.push_back(firpoint);(*((onMouseParam*)param)).count++;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
                    case 5:
                            (*((onMouseParam*)param)).rmouth.push_back(firpoint);(*((onMouseParam*)param)).count = 1;rectangle((*((onMouseParam*)param)).image, Point(firpoint.x-1,firpoint.y-1), Point(firpoint.x+1,firpoint.y+1), Scalar(0, 0, 255), -1);break;
                    }
			imshow(ImgProcessWin, (*((onMouseParam*)param)).image);
			lbuttondown = true;
		}
		break;
	case EVENT_LBUTTONUP:
		lbuttondown = false;
		imshow(ImgProcessWin, (*((onMouseParam*)param)).image);
		break;
	case EVENT_MOUSEMOVE:
		imshow(ImgProcessWin, (*((onMouseParam*)param)).image);
		break;
	}
}

Makefile

objects=main.o gainFoldname.o readFilesNames.o

OBJDIR=./obj/

makeYOLOlandmarks:$(objects)
	g++ $(objects) -o makeYOLOlandmarks `pkg-config --cflags --libs opencv`
main.o:onMouseParam.h gainFoldname.h readFilesNames.h
gainFoldname.o:gainFoldname.h
readFileNames.o:readFilesNames.h
.PHONY: clean
clean:
	-rm makeYOLOlandmarks $(objects)

以上是所有代码，需要使用g++支持c++11标准的版本编译。注释就不要在意啦……调试什么的时候用的……我代码写得烂。

使用方法么就是：

./makeYOLOlandmarks

这会搜索当前目录及其子目录中所有jpg图片来处理。label文件夹和train.txt生成在当前目录下。

./makeYOLOlandmarks dir

这会搜索dir及其子目录下的所有jpg图片来处理。label文件夹和train.txt生成在dir目录下。

图片显示出后，依序用鼠标点击人脸的左眼，右眼，左嘴角，右嘴角，多个人脸的情况下也是依序标完，当前图片标注完成后，按任意键显示计算出的boundingbox，之后，发现标错可按R来重标当前图片，觉得图片不合适可以按L删除当前图片，标注符合要求则按S键保存该图片的路径和boundingbox信息。

差不多就这样。

代码很烂，如果看不下去，可以重写……

[17.9.24updated]：

由于之前的疏忽，之前的算法将label的.txt文件中的boundingbox长宽输出为相等的值，忘记了yolo中boundingbox的标记数据都是归一化的，那么长宽相等的归一化值在图像复原之后，boundingbox就不再是正方形了，因为原图的长宽并不相等……可怜我还丢进去训练了那么久……怪不得我每次测试看到预测的boundingbox怎么都不是正方形……都是白玩儿了……现在更新了代码，将boundingbox的长宽归一化值分别计算出来，再写入label的.txt文件中。

还有一点就是关于侧脸人脸框的大小问题，按照原先的算法，取双眼双嘴角特征点求两两距离的最大值，然后求圆的内接正方形，这样的方式会造成同样距离的人脸正脸的boundingbox比侧脸的boundingbox普遍要大一些，因为正脸的对角线总是要长一些，而侧脸的对角线和边长是相等的。经过改正，如今只在四边形边长中求最大值，虽然这样也会有不平衡的地方，比如45度旋转的侧脸四个边长都会偏小……不过在我的实际应用中这样的情况毕竟极少……所以就先忽略吧~

嗯，又丢进去训练了……