1. HOG特征,学习参考链接: 基于传统图像处理的目标检测与识别(HOG+SVM附代码)
2. SVM原理: 可以拜读一下july大神的文章,链接如下: 支持向量机通俗导论(理解SVM的三层境界)
3. 实现代码:
(1). 准备样本,我先用手机拍摄了一段物品的视频,然后使用opencv读取这一段视频,将视频分解成一张一张的图片作为正样本或者负样本,然后使用python将样本图片生成目录list的csv文件(python实现简单一点,哈哈),具体代码在最后附录
(2). 计算HOG特征,正负样本HOG特征以及标签
(3). SVM训练和结果保存xml,下一次可以直接加载xml方便快速
(4). 找一张图片测试,显示结果
4. 这个demo训练样本和结果xml下载, 只提供了一个积分下载门槛,具体下载地址如下: 训练正负样本下载
希望大家也可以自己拍摄视频做出自己喜欢的样本,享受学习的乐趣^&^(感谢AMing友情赞助的龙井茶)
具体完整HOG计算和SVM训练检测代码如下:
#include "stdafx.h"
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
using namespace cv::ml;
// 文件列表读取,保存文件路径
vector<string> getImagePathName(string filePath)
{
ifstream file(filePath.c_str(), ifstream::in);
string line, path, label;
vector<string> imgPathVec;
if (!file)
{
cout << "could not load file correctlly ..." << endl;
getchar();
return imgPathVec;
}
while (getline(file, line))
{
stringstream liness(line);
//cout << line << endl;
imgPathVec.push_back(line);
}
return imgPathVec;
}
int main()
{
bool generateModel = false; // 是否用于训练,或者可以直接load已经训练好的xml
int featureVectorNum = 3780; // 图片64*128 -> ((64-16)/8+1) * ((128-16)/8+1)* ((16/8)*(16/8)) * 9 // block数 * cell个数 * cell中bin特征数
Ptr<SVM> svm = SVM::create();
svm->setType(SVM::C_SVC);
svm->setKernel(SVM::LINEAR);
svm->setTermCriteria(TermCriteria(CV_TERMCRIT_ITER, 100, FLT_EPSILON));
if (generateModel)
{
// 正反例分开文件放了,也可以生成一个文件直接读取,
string posFileName = string("AMingLongjingPositive02.csv");
string nagFileName = string("AMingLongjingNagtive01.csv");
vector<string> posImgPathVec = getImagePathName(posFileName);
vector<string> nagImgPathVec = getImagePathName(nagFileName);
Mat trainImg;
// hog特征初始化 win size, block size, block step, cell size, bin num;
HOGDescriptor hog(Size(64, 128), Size(16, 16), Size(8, 8), Size(8, 8), 9);
vector<float> descriptors;
//存放每一张正反例图片的hog特征 和 正反例的标签 -1 +1
Mat featureVectorOfSample(posImgPathVec.size() + nagImgPathVec.size(), featureVectorNum, CV_32FC1);
vector<int> labels; //这里的标签 正例 +1 负例 -1
// 算正例的 positive
for (int i = 0; i < posImgPathVec.size(); i++)
{
trainImg = imread(posImgPathVec[i]);
//imshow("trainImg", trainImg);
hog.compute(trainImg, descriptors);
cout << "positive hog feature vector " << i << " :" << descriptors.size() << endl;
for (int j = 0; j < descriptors.size(); j++)
{
featureVectorOfSample.at<float>(i, j) = descriptors[j];
}
labels.push_back(1);
}
// 算反例的 nagtive
for (int i = 0; i < nagImgPathVec.size(); i++)
{
trainImg = imread(nagImgPathVec[i]);
//imshow("trainImg", trainImg);
hog.compute(trainImg, descriptors);
cout << "nagtive hog feature vector " << i << " :" << descriptors.size() << endl;
for (int j = 0; j < descriptors.size(); j++)
{
featureVectorOfSample.at<float>(posImgPathVec.size() + i, j) = descriptors[j];
}
labels.push_back(-1);
}
// 用SVM训练
svm->train(featureVectorOfSample, ROW_SAMPLE, labels);
svm->save("LongJing02.xml");
}
else
{
//svm->read("LongJing01.xml"); //原来不是像人脸检测那样load了
svm = StatModel::load<SVM>("LongJing02.xml");
}
// 获取支持向量,其中w = alpha * y * x, ( y 和 x 表示支持向量的样本信息)
Mat supportVector = svm->getSupportVectors();
Mat alpha, svIdx;
float rho = svm->getDecisionFunction(0, alpha, svIdx);
// 转换成32的
Mat alpha2;
alpha.convertTo(alpha2, CV_32FC1);
//结果矩阵,两个矩阵相乘 1 x d * d x N
Mat result(1, featureVectorNum, CV_32FC1);
result = alpha2 * supportVector;
//乘以-1,这里为什么会乘以-1? //解释转自 hongbin_xu大佬的文章:
//注意因为svm.predict使用的是alpha*sv*another-rho,如果为负的话则认为是正样本,在HOG的检测函数中,使用rho+alpha*sv*another(another为-1)
for (int i = 0;i < featureVectorNum;i++)
{
result.at<float>(0, i) *= -1;
}
vector<float> detector;
for (int i = 0; i < featureVectorNum; i++)
{
detector.push_back(result.at<float>(0, i));
}
detector.push_back(rho);
HOGDescriptor myHog;
vector<Rect> detectedRect;
myHog.setSVMDetector(detector);
//test picture, 这里训练的图片要和测试的图片比例相差不大,不然不太好检测出来
Mat imageSrc = imread("longjingTest04-0.jpg");
myHog.detectMultiScale(imageSrc, detectedRect, 0, Size(16, 16), Size(64,64), 1.05, 2);
for (int i = 0; i < detectedRect.size(); i++)
{
rectangle(imageSrc, detectedRect[i],Scalar(0,255,0),2);
}
imshow("dst", imageSrc);
/*VideoCapture capture(0);
Size S = Size((int)capture.get(CV_CAP_PROP_FRAME_WIDTH), (int)capture.get(CV_CAP_PROP_FRAME_HEIGHT));
int fps = capture.get(CV_CAP_PROP_FPS);
Mat frame, detectImg;
while (1)
{
capture >> frame;
resize(frame, detectImg,Size(120,160), 0, 0);
myHog.detectMultiScale(detectImg, detectedRect, 0, Size(16, 16), Size(32, 32), 1.05, 10);
for (int i = 0; i < detectedRect.size(); i++)
{
rectangle(frame, detectedRect[i], Scalar(0, 255, 0), 2);
}
if (frame.empty()) break;
waitKey(300);
imshow("vedio", frame);
}
*/
waitKey(0);
return 0;
}
检测结果:
附录: 视频分解生成图片样本代码
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main()
{
VideoCapture capture("LongJing02.mp4");
Mat frame, saveImg;
int objHeight = 128, objWidth = 64; //训练集中图片的高度和大小
int count = 0;
String saveImgPath;
while (1)
{
capture >> frame;
if (frame.empty()) break;
imshow("vedio", frame);
//count = 0;
resize(frame, saveImg, Size(objWidth, objHeight), 0, 0);
saveImgPath = format("AminngLongjing02/LongJing01_%d.jpg", count);
imwrite(saveImgPath, saveImg);
count++;
char c = waitKey(30);
if (c == 27 || count > 2000) //不让生产太多的样本,免得电脑占用太多空间,可以更改
{
break;
}
}
waitKey(0);
return 0;
}
python生成目录下所有文件list的代码:
import os
def file_name(file_dir):
f=open(r"AMingLongjingPositive02.csv","w+")
i = 0
for root, dirs, files in os.walk(file_dir):
print('root_dir:', root) # 当前目录路径
#print('sub_dirs:', dirs) # 当前路径下所有子目录
print('files:', files) # 当前路径下所有非目录子文件
for item in files:
strPath = root.replace("\\","/")+"/"+item+"\n"
f.write(strPath)
i = i+1
f.close()
file_name(r'AMingLongJing\AminngLongjing02')