'''
tilte: opencv haar特征人脸检测过程的一些理解
keyword: opencv2.x 人脸检测 HAAR特征 adaboost 积分图 图像金字塔 窗口放大检测(滤波器放大)
有文档和资料说opencv同时实现了窗口放大检测和图像缩小检测两种,
VJ的论文里也表示他是使用窗口放大检测这种方式。但是从对opencv代码(opencv-2.4.11)的阅读理解来看,
其只是实现了图像金字塔方式的缩小检测,并没有实现窗口放大检测。
ref: https://blog.youkuaiyun.com/qq_26898461/article/details/48802845
ref: https://www.bbsmax.com/A/o75NbO0N5W/
'''
关键代码如下:
void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects,
vector<int>& rejectLevels,
vector<double>& levelWeights,
double scaleFactor, int minNeighbors,
int flags, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
const double GROUP_EPS = 0.2; // 目标矩形相似度判断的参数,增大可以将更多的邻近窗口分类一类
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
if( empty() )
return;
if( isOldFormatCascade() )
{
MemStorage storage(cvCreateMemStorage(0));
CvMat _image = image;
CvSeq* _objects = cvHaarDetectObjectsForROC( &_image, oldCascade, storage, rejectLevels, levelWeights, scaleFactor,
minNeighbors, flags, minObjectSize, maxObjectSize, outputRejectLevels );
vector<CvAvgComp> vecAvgComp;
Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
objects.resize(vecAvgComp.size());
std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect());
return;
}
objects.clear();
if (!maskGenerator.empty()) {
maskGenerator->initializeMask(image);
}
if( maxObjectSize.height == 0 || maxObjectSize.width == 0 )
maxObjectSize = image.size();
Mat grayImage = image;
if( grayImage.channels() > 1 )
{
Mat temp;
cvtColor(grayImage, temp, CV_BGR2GRAY);
grayImage = temp;
}
// 缩放buffer
Mat imageBuffer(image.rows + 1, image.cols + 1, CV_8U);
vector<Rect> candidates;
for( double factor = 1; ; factor *= scaleFactor )
{
Size originalWindowSize = getOriginalWindowSize();
// windowSize的唯一用途是,得到当前缩放等级下,检出目标的实际大小,以便和maxObjectSize、minObjectSize比较
// 如果超了,就可以提前结束检测了,如果不足,则没有检测的必要,进入下一个缩放等级(目标更大)
Size windowSize( cvRound(originalWindowSize.width*factor), cvRound(originalWindowSize.height*factor) );
// 缩放后的尺寸
Size scaledImageSize( cvRound( grayImage.cols/factor ), cvRound( grayImage.rows/factor ) );
// 此缩放条件下,窗口滑动的位置范围(用一个矩形的宽高描述)
Size processingRectSize( scaledImageSize.width - originalWindowSize.width, scaledImageSize.height - originalWindowSize.height );
if( processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break;
if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )
break;
if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
continue;
// 缩放图像
Mat scaledImage( scaledImageSize, CV_8U, imageBuffer.data );
resize( grayImage, scaledImage, scaledImageSize, 0, 0, CV_INTER_LINEAR );
// 确定滑动步长,如果缩放倍数大于2,则精细滑动,否则2像素滑动
int yStep;
if( getFeatureType() == cv::FeatureEvaluator::HOG )
{
yStep = 4;
}
else
{
yStep = factor > 2. ? 1 : 2; // yStep在后面也被复用为x方向的步长
}
int stripCount, stripSize;
// 这个是性能检测的好像,不影响
const int PTS_PER_THREAD = 1000;
stripCount = ((processingRectSize.width/yStep)*(processingRectSize.height + yStep-1)/yStep + PTS_PER_THREAD/2)/PTS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
stripSize = (((processingRectSize.height + stripCount - 1)/stripCount + yStep-1)/yStep)*yStep;
// 检测此张缩放后的图像
if( !detectSingleScale( scaledImage, stripCount, processingRectSize, stripSize, yStep, factor, candidates,
rejectLevels, levelWeights, outputRejectLevels ) )
break;
}
objects.resize(candidates.size());
std::copy(candidates.begin(), candidates.end(), objects.begin());
// 对检测结束得到的objects数组进行分类,minNeighbors, GROUP_EPS是两个重要的分类指标
// minNeighbors淘汰目标数低于这个数量的类别
// GROUP_EPS确定矩形相似度的指标
if( outputRejectLevels )
{
groupRectangles( objects, rejectLevels, levelWeights, minNeighbors, GROUP_EPS );
}
else
{
groupRectangles( objects, minNeighbors, GROUP_EPS );
}
}
// 这个是重载的接口,支持rejectLevels输出
void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& objects,
double scaleFactor, int minNeighbors,
int flags, Size minObjectSize, Size maxObjectSize)
{
vector<int> fakeLevels;
vector<double> fakeWeights;
detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor,
minNeighbors, flags, minObjectSize, maxObjectSize, false );
}
//检测单个缩放后的图像(并行)
bool CascadeClassifier::detectSingleScale( const Mat& image, int stripCount, Size processingRectSize,
int stripSize, int yStep, double factor, vector<Rect>& candidates,
vector<int>& levels, vector<double>& weights, bool outputRejectLevels )
{
// 根据原始窗口大小,算出积分图vector
if( !featureEvaluator->setImage( image, data.origWinSize ) )
return false;
#if defined (LOG_CASCADE_STATISTIC)
logger.setImage(image);
#endif
Mat currentMask;
if (!maskGenerator.empty()) {
currentMask=maskGenerator->generateMask(image);
}
vector<Rect> candidatesVector;
vector<int> rejectLevels;
vector<double> levelWeights;
Mutex mtx;
if( outputRejectLevels ) // 通常不用
{
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
candidatesVector, rejectLevels, levelWeights, true, currentMask, &mtx));
levels.insert( levels.end(), rejectLevels.begin(), rejectLevels.end() );
weights.insert( weights.end(), levelWeights.begin(), levelWeights.end() );
}
else
{
// 并行检测此缩放后的图像
parallel_for_(Range(0, stripCount), CascadeClassifierInvoker( *this, processingRectSize, stripSize, yStep, factor,
candidatesVector, rejectLevels, levelWeights, false, currentMask, &mtx));
}
candidates.insert( candidates.end(), candidatesVector.begin(), candidatesVector.end() );
#if defined (LOG_CASCADE_STATISTIC)
logger.write();
#endif
return true;
}
// 并行检测被缩放后的图像
class CascadeClassifierInvoker : public ParallelLoopBody
{
public:
// 这个构造函数主要接受参数
CascadeClassifierInvoker( CascadeClassifier& _cc, Size _sz1, int _stripSize, int _yStep, double _factor,
vector<Rect>& _vec, vector<int>& _levels, vector<double>& _weights, bool outputLevels, const Mat& _mask, Mutex* _mtx)
{
classifier = &_cc;
processingRectSize = _sz1;
stripSize = _stripSize;
yStep = _yStep;
scalingFactor = _factor;
rectangles = &_vec;
rejectLevels = outputLevels ? &_levels : 0;
levelWeights = outputLevels ? &_weights : 0;
mask = _mask;
mtx = _mtx;
}
// 这个函数是对缩放后的图像进行检测的过程,并行执行
void operator()(const Range& range) const
{
Ptr<FeatureEvaluator> evaluator = classifier->featureEvaluator->clone();
// winSize是窗口放大的倍数,其唯一的用途是返回目标框的大小
// 因为这里的图像是缩放后的,那么最终输出的目标大小必然需要相同的进行放大
Size winSize(cvRound(classifier->data.origWinSize.width * scalingFactor), cvRound(classifier->data.origWinSize.height * scalingFactor));
int y1 = range.start * stripSize;
int y2 = min(range.end * stripSize, processingRectSize.height);
for( int y = y1; y < y2; y += yStep )
{
// processingRectSize的大小是图像缩放后的大小-原始窗口大小
// 也就是说,这个参数就是窗口可能滑动的位置,其最终输出也进行了放大scalingFactor倍
// yStep是滑动的步长,x复用y方向的步长,当缩放后的图像大于某个指标,使用2像素
// 否则使用单个像素步长
for( int x = 0; x < processingRectSize.width; x += yStep )
{
if ( (!mask.empty()) && (mask.at<uchar>(Point(x,y))==0)) {
continue;
}
// 对这个点对应窗口进行预测,返回预测结果
double gypWeight;
int result = classifier->runAt(evaluator, Point(x, y), gypWeight);
#if defined (LOG_CASCADE_STATISTIC)
logger.setPoint(Point(x, y), result);
#endif
// rejectLevels通常用不到
if( rejectLevels )
{
if( result == 1 )
result = -(int)classifier->data.stages.size();
if( classifier->data.stages.size() + result < 4 )
{
mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor), winSize.width, winSize.height));
rejectLevels->push_back(-result);
levelWeights->push_back(gypWeight);
mtx->unlock();
}
}
else if( result > 0 )
{
// 如果识别为人脸,保存窗口的x,y,w,h,其均是经过放大后的位置(原图像的实际位置)
mtx->lock();
rectangles->push_back(Rect(cvRound(x*scalingFactor), cvRound(y*scalingFactor),
winSize.width, winSize.height));
mtx->unlock();
}
if( result == 0 )
x += yStep;
}
}
}
CascadeClassifier* classifier;
vector<Rect>* rectangles;
Size processingRectSize;
int stripSize, yStep;
double scalingFactor;
vector<int> *rejectLevels;
vector<double> *levelWeights;
Mat mask;
Mutex* mtx;
};
int CascadeClassifier::runAt( Ptr<FeatureEvaluator>& evaluator, Point pt, double& weight )
{
CV_Assert( oldCascade.empty() );
assert( data.featureType == FeatureEvaluator::HAAR ||
data.featureType == FeatureEvaluator::LBP ||
data.featureType == FeatureEvaluator::HOG );
if( !evaluator->setWindow(pt) )
return -1;
if( data.isStumpBased )
{
// 对树桩型adaboost,对不同的特征,使用不同的分类器
if( data.featureType == FeatureEvaluator::HAAR )
return predictOrderedStump<HaarEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::LBP )
return predictCategoricalStump<LBPEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::HOG )
return predictOrderedStump<HOGEvaluator>( *this, evaluator, weight );
else
return -2;
}
else
{
if( data.featureType == FeatureEvaluator::HAAR )
return predictOrdered<HaarEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::LBP )
return predictCategorical<LBPEvaluator>( *this, evaluator, weight );
else if( data.featureType == FeatureEvaluator::HOG )
return predictOrdered<HOGEvaluator>( *this, evaluator, weight );
else
return -2;
}
}
template<class FEval>
inline int predictOrderedStump( CascadeClassifier& cascade, Ptr<FeatureEvaluator> &_featureEvaluator, double& sum )
{
int nodeOfs = 0, leafOfs = 0;
FEval& featureEvaluator = (FEval&)*_featureEvaluator;
float* cascadeLeaves = &cascade.data.leaves[0];
CascadeClassifier::Data::DTreeNode* cascadeNodes = &cascade.data.nodes[0];
CascadeClassifier::Data::Stage* cascadeStages = &cascade.data.stages[0];
int nstages = (int)cascade.data.stages.size();
for( int stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
CascadeClassifier::Data::Stage& stage = cascadeStages[stageIdx];
sum = 0.0;
int ntrees = stage.ntrees;
for( int i = 0; i < ntrees; i++, nodeOfs++, leafOfs+= 2 )
{
CascadeClassifier::Data::DTreeNode& node = cascadeNodes[nodeOfs];
// 算出弱分类器的特征值,和阈值比较,得动的float左右值累加
// 重载了操作符"()"的类对象。当用该对象名作函数式调用时,其表现形式如同普通函数调用一般,因此取名叫函数对象
double value = featureEvaluator(node.featureIdx);
sum += cascadeLeaves[ value < node.threshold ? leafOfs : leafOfs + 1 ];
}
// float类型的左右值的Sum,最终和强分类器的阈值比较,如果<阈值,说明非人脸,返回失败位置的强分类器索引
if( sum < stage.threshold )
return -stageIdx;
}
return 1;
}
//HAAR的featureEvaluator定义如下
class HaarEvaluator : public FeatureEvaluator
{
public:
struct Feature
{
Feature();
float calc( int offset ) const;
void updatePtrs( const Mat& sum );
bool read( const FileNode& node );
bool tilted;
enum { RECT_NUM = 3 };
struct
{
Rect r;
float weight;
} rect[RECT_NUM];
const int* p[RECT_NUM][4];
};
HaarEvaluator();
virtual ~HaarEvaluator();
virtual bool read( const FileNode& node );
virtual Ptr<FeatureEvaluator> clone() const;
virtual int getFeatureType() const { return FeatureEvaluator::HAAR; }
virtual bool setImage(const Mat&, Size origWinSize);
virtual bool setWindow(Point pt);
// operator是重载标识符,这里重载了()操作符,可以用类名直接调用此接口
// calc函数就是算窗口的特征值, offset是窗口积分图的一个索引
double operator()(int featureIdx) const
{ return featuresPtr[featureIdx].calc(offset) * varianceNormFactor; }
virtual double calcOrd(int featureIdx) const
{ return (*this)(featureIdx); }
protected:
Size origWinSize;
Ptr<vector<Feature> > features;
Feature* featuresPtr; // optimization
bool hasTiltedFeatures;
Mat sum0, sqsum0, tilted0;
Mat sum, sqsum, tilted;
Rect normrect;
const int *p[4];
const double *pq[4];
int offset;
double varianceNormFactor;
};
inline HaarEvaluator::Feature :: Feature()
{
tilted = false;
rect[0].r = rect[1].r = rect[2].r = Rect();
rect[0].weight = rect[1].weight = rect[2].weight = 0;
p[0][0] = p[0][1] = p[0][2] = p[0][3] =
p[1][0] = p[1][1] = p[1][2] = p[1][3] =
p[2][0] = p[2][1] = p[2][2] = p[2][3] = 0;
}
inline float HaarEvaluator::Feature :: calc( int _offset ) const
{
// 整个区域、黑色区域1、黑色区域2(部分特征)的积分图乘上权值,求和就是特征值
// 积分图是检测初期,根据原始窗口计算得到的一个vector型的数组,offset是其索引
// 也就是说根本没有放大窗口检测的实现
float ret = rect[0].weight * CALC_SUM(p[0], _offset) + rect[1].weight * CALC_SUM(p[1], _offset);
if( rect[2].weight != 0.0f )
ret += rect[2].weight * CALC_SUM(p[2], _offset);
return ret;
}