前言
opencv中HOG+SVM已经是一个相对成熟的环境,只是在满足兼容性的条件下会牺牲很大的时间开销,在训练部分未做改进,只是对detectMultiScale()中的predict进行改进。
其中HOG特征计算请参考:
http://blog.youkuaiyun.com/liulina603/article/details/8291093
SVM算法原理(写的相当详细):
http://blog.youkuaiyun.com/v_july_v/article/details/7624837
本文代码也放出来供大家参考:http://download.youkuaiyun.com/detail/bleakie/9684568
内容
背景知识已经浏览完备,现在可以参考detectMultiScale()的详细介绍,就可以了解在预测部分HOG+SVM到底做了些什么
http://blog.youkuaiyun.com/suifeng50/article/details/42294271
http://www.cnblogs.com/klitech/p/5747895.html (里面的参数怎么设置)
要讲的几点
1.两分类的时候标签定义为-1和1
2.多分类的时候标签定义从1开始,用到测试时是int index = model->detect();
3.两分类的时候可生成txt或者xml文件,多分类只能用xml
一.原版的训练加测试
/*-------------------------------------------------
HOG+SVM的训练和预测函数(原始版本)
-------------------------------------------------*/
#include "opencv2/opencv.hpp"
#include "opencv2/ml.hpp"
#include <stdio.h>
#include <string.h>
#include <cctype>
#include<iostream>
#include <fstream>
using namespace cv;
using namespace cv::ml;
using namespace std;
CvSize RESIZE(45, 27);///图片resize的大小
CvSize winSize(3, 3);
CvSize blockSize(3, 3);
CvSize blockStride(5, 10);
CvSize cellSize(3, 3);
int nbins = 9;
Mat PcaDim(Mat featureVectorOfSample)
{
///////------------PCA-------------------------///////
PCA pca(featureVectorOfSample, Mat(), CV_PCA_DATA_AS_ROW, 0.95);//降维维度
Mat Eigenvalues = pca.eigenvalues;
Mat dd = Mat(Eigenvalues.size(), CV_32FC1);
pca.project(featureVectorOfSample, dd); //投影
Mat PcaFea;
PcaFea = pca.project(featureVectorOfSample);//映射新空间
return PcaFea;
}
void Enhance(const Mat& img, Mat& result)
{
//------图像增强
result.create(img.size(), img.type());
for (int row = 1; row < img.rows - 1; row++)
{
const uchar* previous = img.ptr<const uchar>(row - 1);
const uchar* current = img.ptr<const uchar>(row);
const uchar* next = img.ptr<const uchar>(row + 1);
uchar *output = result.ptr<uchar>(row);
int ch = img.channels();
int starts = ch;
int ends = (img.cols - 1) * ch;
for (int col = starts; col < ends; col++)
{
*output++ = saturate_cast<uchar>(5 * current[col] - current[col - ch] - current[col + ch] - previous[col] - next[col]);
}
}
result.row(0).setTo(Scalar::all(0));
result.row(result.rows - 1).setTo(Scalar::all(0));
result.col(0).setTo(Scalar::all(0));
result.col(result.cols - 1).setTo(Scalar::all(0));
}
#define FILEPATH "E:/GeekBigData/CODE/RectDetect/model/"
///////////////////////////////////HOG+SVM识别方式
void HOG_Train()
{
//图像路径和类别
vector<string> imagePath;
vector<int> imageClass;
int numberOfLine = 0;
string buffer;
ifstream trainingData("E:/GeekBigData/image/trainimg/train7.txt");
unsigned long n;
bool bLoadTrainSamples = 0;
if (trainingData.is_open())
{
while (!trainingData.eof())
{
getline(trainingData, buffer);
if (!buffer.empty())
{
bLoadTrainSamples = 1;
++numberOfLine;
if (numberOfLine % 2 == 0)
{
//读取样本类别
imageClass.push_back(atoi(buffer.c_str()));
}
else
{
//读取图像路径
imagePath.push_back(buffer);
}
}
}
}
//关闭文件
trainingData.close();
//样本的类别
Mat trainImage;
HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins);
vector<float> descriptors, descriptors2;
Mat FeaSize0 = imread(imagePath[0], IMREAD_GRAYSCALE);//用来计算hog特征维度,之前在网上找的计算hog特征维度的方法不正确,所以就加了这个
Mat FeaSize;
resize(FeaSize0, FeaSize, RESIZE);
hog.compute(FeaSize, descriptors2);//计算descriptors
int nSampleDim = descriptors2.size();
//////////////////////////////获取样本的HOG特征///////////////////////////////////////////////////
//样本特征向量矩阵
int numberOfSample = numberOfLine / 2;
Mat featureVectorOfSample(numberOfSample, nSampleDim, CV_32FC1);//矩阵中每行为一个样本
Mat classOfSample(numberOfSample, 1, CV_32SC1);
// 计算HOG特征
for (vector<string>::size_type i = 0; i <= imagePath.size() - 1 && bLoadTrainSamples; ++i)
{
Mat src = imread(imagePath[i], IMREAD_GRAYSCALE);
if (NULL != src.data)
{
cout << "processing:" << imagePath[i] << endl;
// 归一化
resize(src, trainImage, RESIZE);
descriptors.clear();
hog.compute(trainImage, descriptors);//计算descriptors
cout << "HOG dimensions:" << descriptors.size() << endl << i << endl;
//保存到特征向量矩阵中
for (vector<float>::size_type j = 0; j <= descriptors.size() - 1; ++j)
{
featureVectorOfSample.at<float>(i, j) = descriptors[j];
}
//保存类别到类别矩阵 //!!注意类别类型一定要是int 类型的
classOfSample.at<int>(i, 0) = imageClass[i];
}
else
{
cout << "can not load the image:" << imagePath[i] << endl;
continue;
}
}
Ptr<SVM> svm;
if (bLoadTrainSamples)
{
svm = SVM::create();
svm->setType(SVM::C_SVC);
svm->setKernel(SVM::RBF);//想使用模型生成的txt文件必须使用线性SVM进行训练,而且类别只有两类,因为HogDescriptor检测函数只支持线性检测!!!,但个利用predict函数就调用xml文件,这个就没要求
svm->setC(1);
//-------------------参数优化-------------------------//
cv::Ptr<cv::ml::TrainData> TrainData = cv::ml::TrainData::create(featureVectorOfSample, cv::ml::ROW_SAMPLE, classOfSample);
svm->trainAuto(TrainData, 10, cv::ml::SVM::getDefaultGrid(cv::ml::SVM::C), cv::ml::SVM::getDefaultGrid(cv::ml::SVM::GAMMA), cv::ml::SVM::getDefaultGrid(cv::ml::SVM::P),
cv::ml::SVM::getDefaultGrid(cv::ml::SVM::NU), cv::ml::SVM::getDefaultGrid(cv::ml::SVM::COEF), cv::ml::SVM::getDefaultGrid(cv::ml::SVM::DEGREE), false);
//-------------------不使用参数优化-------------------------//
//svm->setTermCriteria(TermCriteria(CV_TERMCRIT_ITER, 1000, 1e-4));
//svm->train(featureVectorOfSample, ROW_SAMPLE, classOfSample);
svm->save(string(FILEPATH) + "Auto_model7_rbf.xml");
}
Mat supportVector = svm->getSupportVectors();//
nSampleDim = supportVector.cols;
Mat alpha;//每个支持向量对应的参数α(拉格朗日乘子),默认alpha是float64的
Mat svIndex;//支持向量所在的索引
float rho = svm->getDecisionFunction(0, alpha, svIndex);
Mat alpha2;
alpha.convertTo(alpha2, CV_32FC1);
//结果矩阵,两个矩阵相乘
Mat result(1, nSampleDim, CV_32FC1);
result = alpha2*supportVector;//在hog+svm只支持两分类,所以支持向量数肯定是1*vec
////注意因为svm.predict使用的是alpha*sv*another-rho,如果为负的话则认为是正样本,在HOG的检测函数中,使用rho+alpha*sv*another(another为-1)
for (int i = 0; i < nSampleDim; ++i)
result.at<float>(0, i) *= -1;
//这个才是真正的判别函数的参数(ω),HOG可以直接使用该参数进行识别
FILE *fp = fopen((string(FILEPATH) + "Auto_model7_linear.txt").c_str(), "wb");
for (int i = 0; i < nSampleDim; i++)
{
fprintf(fp, "%f \n", result.at<float>(0, i));//只取第一个得到的权重
}
fprintf(fp, "%f", rho);
fclose(fp);
}
void HogSvm()
{
HOG_Train();
}
训练完成后会生成一个.xml文件(后面又把它存到txt文件中),xml文件里包含两组常量(权重W和偏移量b),在predict()函数里对测试img的Test需要复杂的循环和变换,不利于实时性的表达,考虑到线性核函数 ∑_(i=0)^m▒〖a_i*y_i*x_i*x+b〗满足此函数关系,又w、b已由train得出,则直接用到test中,对不同坐标下的img进行扫描,得到测试结果
二.预测部分的修改:
/*-------------------------------------------------
// 使用训练好的分类器识别,二分类时候,使用hog.detectMultiScale,这是opencv自带的
-------------------------------------------------*/
void HOG_Detect()
{
Mat fram, grayimg;
vector<float> detector;
ifstream fileIn(string(FILEPATH) + "Auto_model7_linear.txt", ios::in);
float val = 0.0f;
while (!fileIn.eof())
{
fileIn >> val;
detector.push_back(val);
}
fileIn.close();
HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins);
hog.setSVMDetector(detector);// 使用自己训练的分类器
char path[256];
int nFramNum = 0;
int nId = 0;
while (true)
{
// //读图片
char img_path[125];
sprintf(img_path, "E:/GeekBigData/image/TestImg/%d.jpg", nId);
fram = imread(img_path);
printf("******************************帧号:%d*********************************\n", nId);
if (NULL != fram.data)
{
cvtColor(fram, grayimg, CV_BGR2GRAY);
vector<Rect> found1, found_filtered1;
fflush(stdout);
double t = (double)getTickCount();
found1.clear();
found_filtered1.clear();
hog.detectMultiScale(grayimg, found1, 0, Size(16, 16), Size(8, 8), 1.05, 2); //多尺度检测
t = (double)getTickCount() - t;
printf("detection time = %gms\n", t*1000. / cv::getTickFrequency());
size_t i, j;
//去掉空间中具有内外包含关系的区域,保留大的
for (i = 0; i < found1.size(); i++)
{
Rect r1 = found1[i];
for (j = 0; j < found1.size(); j++)
if (j != i && (r1 & found1[j]) == r1)
break;
if (j == found1.size())
found_filtered1.push_back(r1);
}
// 适当缩小矩形
for (i = 0; i < found_filtered1.size(); i++)
{
Rect r1 = found_filtered1[i];
r1.x += cvRound(r1.width*0.1);
r1.width = cvRound(r1.width*0.8);
r1.y += cvRound(r1.height*0.1);
r1.height = cvRound(r1.height*0.8);
/*sprintf(path, "E:\\GeekBigData\\image\\trainimg\\模型得到的负样本\\%d.jpg", nFramNum);
imwrite(path, fram(r1));*/
nFramNum++;
rectangle(fram, r1.tl(), r1.br(), cv::Scalar(255, 0, 0), 1);
}
imshow("hogsvm", fram);
cvWaitKey(0);
}
else
{
continue;
}
nId++;
}
}
/*-------------------------------------------------
HOG+SVM的预测函数(predict函数的修改)
-------------------------------------------------*/
#include "opencv2/opencv.hpp"
#include "opencv2/ml.hpp"
#include <vector>
#include <stdio.h>
#include <string.h>
#include <cctype>
#include<iostream>
#include <fstream>
using namespace cv;
using namespace cv::ml;
using namespace std;
#define NORM_SAMPLE_W 64 //
#define NORM_SAMPLE_H 64 //行人:128
#define FILEPATH "F:/实验/实习/HogSvm/model/"
void OwnDetect()
{
//版本
Mat img1, img2, img3, img1_rgb, img2_rgb, img3_rgb;
Mat combine,combine1, combine2;
Mat grayimg;
Mat fram;
/*-------------------载入支持向量机-------------------------------*/
vector<float> detector;
vector<float>descriptors;
ifstream fileIn(string(FILEPATH) + "Own_DoubleSvm_LINEAR_1.txt", ios::in);
float val = 0.0f;
while (!fileIn.eof())
{
fileIn >> val;
detector.push_back(val);
}
fileIn.close();
//设置HOG
cv::HOGDescriptor hog(cv::Size(64,64), cv::Size(16,16), cv::Size(8,8), cv::Size(8,8), 9);
hog.setSVMDetector(detector);// 使用自己训练的分类器
/*-------------------载入图片or视频-------------------------------*/
int nId = 0; //读图片
// while (true)
// {
//#define READ_CONTINUOUS
//
//#ifdef READ_CONTINUOUS//读取连续帧
// char img_path[125];
// sprintf(img_path,"F:/实验/实习/HogSvm/813_103-105/103avm/%d.bmp",nId+1);
// fram = imread(img_path);
// flip(fram,fram,0);
// printf("******************************帧号:%d*********************************\n",nId+1);
//#else//读取不连续帧,即跳三帧测试
// int k=3*nId+1;
// char img_path[125];
// sprintf(img_path,"E:/company code/BSD_project/BSD_Video/822/1/avm/%d.bmp",k);
// fram = imread(img_path);
// flip(fram,fram,0);
// printf("******************************帧号:%d**************%d*******************\n",nId,3*nId+1);
//#endif
//
//读视频
VideoCapture cap("F:\\实验\\实习\\HogSvm\\40km.h\\400_SYNC.avi");
while(1)//读视频时候用到
{
cap>>fram;
if(!fram.data)
{
break;
}
cvtColor(fram,grayimg,CV_RGB2GRAY);//CV_BGR2GRAY
Rect DetArea1 = Rect(150,15,120,450);//设置左右检测区域
Rect DetArea2 = Rect(360,15,120,450);
Rect DetArea3 = Rect(270,15,90,450);
img1 = grayimg(DetArea1);
img2 = grayimg(DetArea2);
img3 = grayimg(DetArea3);
img1_rgb = fram(DetArea1);
img2_rgb = fram(DetArea2);
img3_rgb = fram(DetArea3);
if (!(img1.data || img2.data) )
continue;
fflush(stdout);//清空缓存
vector<Rect> found1, found2, found_filtered1, found_filtered2;
double t = (double)getTickCount();
float fscale = pow(1.2f,0);//对img进行resize尺度
Mat ResizeImg1,ResizeImg2;
resize(img1,ResizeImg1,Size(img1.cols/fscale,img1.rows/fscale));
resize(img2,ResizeImg2 ,Size(img2.cols/fscale,img2.rows/fscale));
/*-------------------win下进行step遍历img-------------------------------*/
for(int j1 =0, j2 = 0; j1 < ResizeImg1.rows -64, j2 < ResizeImg1.rows -64;j1+=64, j2 += 3)
{
for(int i1 =0, i2 = 0; i1 < ResizeImg1.cols-64, i2 < ResizeImg1.cols-64;i1+=34, i2 += 3)
{
float threshould1 = 1.2, threshould2 = 1.2;//设置左右检测的阈值
Rect RioRec1 = Rect(i1,j1,64,64);Rect RioRec2 = Rect(i2,j2,64,64);
//先计算left
descriptors.clear();
hog.compute(ResizeImg1(RioRec1), descriptors);//计算descriptors
/////
float sum1 = hog.svmDetector[descriptors.size()];
for(int i = 0; i < descriptors.size(); i++)
{
float fea1 = descriptors[i];
float svmW1 = hog.svmDetector[i];
sum1 += fea1*svmW1;//y = w*x + b
}
if( sum1 > threshould1)
{
RioRec1.x = RioRec1.x * fscale;
RioRec1.y = RioRec1.y * fscale;
RioRec1.width = RioRec1.width * fscale;
RioRec1.height = RioRec1.height * fscale;
found1.push_back(RioRec1);
cout << "左侧有车" << endl;
}
///计算right
descriptors.clear();
hog.compute(ResizeImg2(RioRec2), descriptors);
/////
float sum2 = hog.svmDetector[descriptors.size()];
for(int i = 0; i < descriptors.size(); i++)
{
float fea2 = descriptors[i];
float svmW2 = hog.svmDetector[i];
sum2 += fea2*svmW2;
}
if( sum2 > threshould2)
{
RioRec2.x = RioRec2.x * fscale;
RioRec2.y = RioRec2.y * fscale;
RioRec2.width = RioRec2.width * fscale;
RioRec2.height = RioRec2.height * fscale;
found2.push_back(RioRec2);
cout << "右侧有车" << endl;
}
}
}
//加入非极大值抑制来去除重叠
nonMaximumSuppression_own(found1, found_filtered1, 0.6, 4);%%%后面两个参数分别是面积重叠率和重叠的个数
nonMaximumSuppression_own(found2, found_filtered2, 0.6, 4);
size_t i, j;
////-------------------------left-------------------------////
for (i = 0; i < found1.size(); i++)
{
Rect r1 = found1[i];
for (j = 0; j < found1.size(); j++)
if (j != i && (r1 & found1[j]) == r1)
break;
if (j == found1.size())
found_filtered1.push_back(r1);
}
// 适当缩小矩形
for (i = 0; i < found_filtered1.size(); i++)
{
Rect r1 = found_filtered1[i];
r1.x += cvRound(r1.width*0.1);
r1.width = cvRound(r1.width*0.8);
r1.y += cvRound(r1.height*0.07);
r1.height = cvRound(r1.height*0.8);
rectangle(img1_rgb, found_filtered1[i], cv::Scalar(0, 0, 255), 3);
}
////-------------right----------------------------------////
for (i = 0; i < found2.size(); i++)
{
Rect r2 = found2[i];
for (j = 0; j < found2.size(); j++)
if (j != i && (r2 & found2[j]) == r2)
break;
if (j == found2.size())
found_filtered2.push_back(r2);
}
// 适当缩小矩形
for (i = 0; i < found_filtered2.size(); i++)
{
Rect r2 = found_filtered2[i];
r2.x += cvRound(r2.width*0.1);
r2.width = cvRound(r2.width*0.8);
r2.y += cvRound(r2.height*0.07);
r2.height = cvRound(r2.height*0.8);
rectangle(img2_rgb, r2.tl(), r2.br(), cv::Scalar(0, 0, 255), 3);
}
/*-------------合并img1和img2-------------*/
hconcat(img1_rgb,img3_rgb,combine1);
hconcat(combine1,img2_rgb,combine2);
vconcat(combine2,combine);
t = (double)getTickCount() - t;
printf("detection time = %gms\n", t*1000. / cv::getTickFrequency());
imshow("OwnDetect",combine);
waitKey(10);
nId++;
}
}
这里左右两侧的车设定了两个阈值,对于step大小自己再进行相应的修改,聚类部分对照detectMultiScale()进行修改,当时记得是改好了的,不知道保存到哪里了!!!
三.结果
opencv里面的结果:
改进predict的结果: