HOG算法

最新推荐文章于 2023-12-09 17:39:37 发布
weixin_33751566
最新推荐文章于 2023-12-09 17:39:37 发布
阅读量250
点赞数
CC 4.0 BY-SA版权
文章标签： python 人工智能
原文链接：https://my.oschina.net/u/1035026/blog/222840
本文深入探讨了HOG（Histogram of Oriented Gradients）行人检测算法的原理与实现细节，包括梯度计算、块归一化等核心步骤，并介绍了如何通过OpenCV库实现HOG描述符的设置与应用。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
2019独角兽企业重金招聘Python工程师标准>>>
这一周研究了下HOG行人检测算法，一开始看源码还真不习惯，不过仔细研究了下，对于算法的具体实现有了更多的了解。
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this license.
//  If you do not agree to this license, do not download, install,
//  copy or use the software.
//
//
//                           License Agreement
//                For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
//   * Redistribution's of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//
//   * Redistribution's in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//
//   * The name of the copyright holders may not be used to endorse or promote products
//     derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/

#include <stdio.h>
#include "precomp.hpp"
#include <iterator>
#ifdef HAVE_IPP
#include "ipp.h"
#endif
/****************************************************************************************\
      The code below is implementation of HOG (Histogram-of-Oriented Gradients)
      descriptor and object detection, introduced by Navneet Dalal and Bill Triggs.

      The computed feature vectors are compatible with the
      INRIA Object Detection and Localization Toolkit
      (http://pascal.inrialpes.fr/soft/olt/)
\****************************************************************************************/
//HOG一般调用过程：初始化实例化一个HOGDescriptor（各种参数及svmDetector系数），读入单幅图像,
//调用detector方法，得到行人可能存在矩阵集合hits。

//高斯函数求权重、三线性插值求权重  缓存机制实现过程
//算法涉及概念
//getblock()表示的是给定block在滑动窗口的位置以及图片的hog缓存指针，来获得本次block中计算hog特征所需要的信息。
//normalizeBlockHistogram()指对block获取到的hog部分描述子进行归一化，其实该归一化有2层，具体看代码。
//windowsInImage()实现的功能是给定测试图片和滑动窗口移动的大小，来获得该层中水平和垂直方向上需要滑动多少个滑动窗口。
//getWindow()值获得一个滑动窗口矩形。
//compute()是实际上计算hog描述子的函数，在测试和训练阶段都能用到。
//detect()是检测目标是用到的函数，在detectMultiScale()函数内部被调用。

//为了便于分析，我们假定win窗口大小为128x64,block为16x16,cell为8x8,winStride为8，可知一个block中有4个
//cell。win中有(128-16+8)/8x(64-16+8)/8=15x7=105个block。而一个cell的直方图为9维，那么一个对于整个
//win而言，产生的直方图为105x4x9=3780维。

namespace cv
{
//获得一个win中获得3780维的直方图描述子。
size_t HOGDescriptor::getDescriptorSize() const
{
    CV_Assert(blockSize.width % cellSize.width == 0 &&
        blockSize.height % cellSize.height == 0);
    CV_Assert((winSize.width - blockSize.width) % blockStride.width == 0 &&
        (winSize.height - blockSize.height) % blockStride.height == 0 );
    return (size_t)nbins*
        (blockSize.width/cellSize.width)*
        (blockSize.height/cellSize.height)*
        ((winSize.width - blockSize.width)/blockStride.width + 1)*
        ((winSize.height - blockSize.height)/blockStride.height + 1);
}

double HOGDescriptor::getWinSigma() const
{
//win设定为计算高斯权重时的方差，默认值为4。
    return winSigma >= 0 ? winSigma : (blockSize.width + blockSize.height)/8.;
}

bool HOGDescriptor::checkDetectorSize() const
{
    size_t detectorSize = svmDetector.size(), descriptorSize = getDescriptorSize();
    return detectorSize == 0 ||
        detectorSize == descriptorSize ||
        detectorSize == descriptorSize + 1;//比描述子长度大1表示偏置
}

void HOGDescriptor::setSVMDetector(InputArray _svmDetector)
{
//convertTo改变图像Mat属性，如改变图像深度、通道数等等。
    _svmDetector.getMat().convertTo(svmDetector, CV_32F);
    CV_Assert( checkDetectorSize() );
}

#define CV_TYPE_NAME_HOG_DESCRIPTOR "opencv-object-detector-hog"

//把文件节点的内容读取到类的成员变量中。
bool HOGDescriptor::read(FileNode& obj)
{
    if( !obj.isMap() )
        return false;
    FileNodeIterator it = obj["winSize"].begin();
    //依次读入winSize、blockSize、blockStride、cellSize、nbins、derivAperture、winSigma、
    //histogramNormType、L2HysThreshold、gammaCorrection、nlevels。
    it >> winSize.width >> winSize.height;
    it = obj["blockSize"].begin();
    it >> blockSize.width >> blockSize.height;
    it = obj["blockStride"].begin();
    it >> blockStride.width >> blockStride.height;
    it = obj["cellSize"].begin();
    it >> cellSize.width >> cellSize.height;
    obj["nbins"] >> nbins;
    obj["derivAperture"] >> derivAperture;
    obj["winSigma"] >> winSigma;
    obj["histogramNormType"] >> histogramNormType;
    obj["L2HysThreshold"] >> L2HysThreshold;
    obj["gammaCorrection"] >> gammaCorrection;
    obj["nlevels"] >> nlevels;

    FileNode vecNode = obj["SVMDetector"];
    if( vecNode.isSeq() )
    {
        vecNode >> svmDetector;
        CV_Assert(checkDetectorSize());
    }
    return true;
}

//将hog描述子内的变量输入到文件fs中，且每次输入前都输入一个名字与其对应，因此这些借点是mapping类型。
void HOGDescriptor::write(FileStorage& fs, const String& objName) const
{
    if( !objName.empty() )
        fs << objName;

    fs << "{" CV_TYPE_NAME_HOG_DESCRIPTOR
    << "winSize" << winSize
    << "blockSize" << blockSize
    << "blockStride" << blockStride
    << "cellSize" << cellSize
    << "nbins" << nbins
    << "derivAperture" << derivAperture
    << "winSigma" << getWinSigma()
    << "histogramNormType" << histogramNormType
    << "L2HysThreshold" << L2HysThreshold
    << "gammaCorrection" << gammaCorrection
    << "nlevels" << nlevels;
    if( !svmDetector.empty() )
        fs << "SVMDetector" << svmDetector;
    fs << "}";
}

//从给定的文件中读取参数。
bool HOGDescriptor::load(const String& filename, const String& objname)
{
    FileStorage fs(filename, FileStorage::READ);
    FileNode obj = !objname.empty() ? fs[objname] : fs.getFirstTopLevelNode();
    return read(obj);
}

//将文件的参数以文件借点的形式写入文件中。
void HOGDescriptor::save(const String& filename, const String& objName) const
{
    FileStorage fs(filename, FileStorage::WRITE);
    write(fs, !objName.empty() ? objName : FileStorage::getDefaultObjectName(filename));
}

//复制HOG描述子到c中。
void HOGDescriptor::copyTo(HOGDescriptor& c) const
{
    c.winSize = winSize;
    c.blockSize = blockSize;
    c.blockStride = blockStride;
    c.cellSize = cellSize;
    c.nbins = nbins;
    c.derivAperture = derivAperture;
    c.winSigma = winSigma;
    c.histogramNormType = histogramNormType;
    c.L2HysThreshold = L2HysThreshold;
    c.gammaCorrection = gammaCorrection;
    c.svmDetector = svmDetector;
    c.nlevels = nlevels;
}

//完成梯度幅度图和梯度方向值的计算。利用mapBuffer计算拓展图像与原始图像映射之间的关系。
void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
                                    Size paddingTL, Size paddingBR) const
{
    CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );

    Size gradsize(img.cols + paddingTL.width + paddingBR.width,
                  img.rows + paddingTL.height + paddingBR.height);
    grad.create(gradsize, CV_32FC2);  // <magnitude*(1-alpha), magnitude*alpha>
    qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation
    Size wholeSize;
    Point roiofs;
    img.locateROI(wholeSize, roiofs);

    int i, x, y;
    int cn = img.channels();

    Mat_<float> _lut(1, 256);
    const float* lut = &_lut(0,0);

    if( gammaCorrection )
        for( i = 0; i < 256; i++ )
            _lut(0,i) = std::sqrt((float)i);
    else
        for( i = 0; i < 256; i++ )
            _lut(0,i) = (float)i;

    AutoBuffer<int> mapbuf(gradsize.width + gradsize.height + 4);
    int* xmap = (int*)mapbuf + 1;
    int* ymap = xmap + gradsize.width + 2;

    const int borderType = (int)BORDER_REFLECT_101;
//利用borderInterpolate函数完成拓展图像和原始图像位置映射关系。
    for( x = -1; x < gradsize.width + 1; x++ )
        xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x,
                        wholeSize.width, borderType) - roiofs.x;
    for( y = -1; y < gradsize.height + 1; y++ )
        ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y,
                        wholeSize.height, borderType) - roiofs.y;

    // x- & y- derivatives for the whole row
    //创建连续内存空间_dbuf，完成对dx、dy、mag、angle的存放；利用angleScale系数将(0,2pi)划分为
    //18个单元，之后取整并利用hidx-=nbins操作，将其归拢到九个单元中。
    int width = gradsize.width;
    AutoBuffer<float> _dbuf(width*4);
    float* dbuf = _dbuf;
    Mat Dx(1, width, CV_32F, dbuf);
    Mat Dy(1, width, CV_32F, dbuf + width);
    Mat Mag(1, width, CV_32F, dbuf + width*2);
    Mat Angle(1, width, CV_32F, dbuf + width*3);

    int _nbins = nbins;
    float angleScale = (float)(_nbins/CV_PI);
#ifdef HAVE_IPP
    Mat lutimg(img.rows,img.cols,CV_MAKETYPE(CV_32F,cn));
    Mat hidxs(1, width, CV_32F);
    Ipp32f* pHidxs  = (Ipp32f*)hidxs.data;
    Ipp32f* pAngles = (Ipp32f*)Angle.data;

    IppiSize roiSize;
    roiSize.width = img.cols;
    roiSize.height = img.rows;

    for( y = 0; y < roiSize.height; y++ )
    {
       const uchar* imgPtr = img.data + y*img.step;
       float* imglutPtr = (float*)(lutimg.data + y*lutimg.step);

       for( x = 0; x < roiSize.width*cn; x++ )
       {
          imglutPtr[x] = lut[imgPtr[x]];
       }
    }

#endif
    for( y = 0; y < gradsize.height; y++ )
    {
#ifdef HAVE_IPP
        const float* imgPtr  = (float*)(lutimg.data + lutimg.step*ymap[y]);
        const float* prevPtr = (float*)(lutimg.data + lutimg.step*ymap[y-1]);
        const float* nextPtr = (float*)(lutimg.data + lutimg.step*ymap[y+1]);
#else
        const uchar* imgPtr  = img.data + img.step*ymap[y];
        const uchar* prevPtr = img.data + img.step*ymap[y-1];
        const uchar* nextPtr = img.data + img.step*ymap[y+1];
#endif
        float* gradPtr = (float*)grad.ptr(y);
        uchar* qanglePtr = (uchar*)qangle.ptr(y);

        if( cn == 1 )
        {
            for( x = 0; x < width; x++ )
            {
                int x1 = xmap[x];
#ifdef HAVE_IPP
                dbuf[x] = (float)(imgPtr[xmap[x+1]] - imgPtr[xmap[x-1]]);
                dbuf[width + x] = (float)(nextPtr[x1] - prevPtr[x1]);
#else
                dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
                dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
#endif
            }
        }
        else
        {
            for( x = 0; x < width; x++ )
            {
                int x1 = xmap[x]*3;
                float dx0, dy0, dx, dy, mag0, mag;
#ifdef HAVE_IPP
                const float* p2 = imgPtr + xmap[x+1]*3;
                const float* p0 = imgPtr + xmap[x-1]*3;

                dx0 = p2[2] - p0[2];
                dy0 = nextPtr[x1+2] - prevPtr[x1+2];
                mag0 = dx0*dx0 + dy0*dy0;

                dx = p2[1] - p0[1];
                dy = nextPtr[x1+1] - prevPtr[x1+1];
                mag = dx*dx + dy*dy;

                if( mag0 < mag )
                {
                    dx0 = dx;
                    dy0 = dy;
                    mag0 = mag;
                }

                dx = p2[0] - p0[0];
                dy = nextPtr[x1] - prevPtr[x1];
                mag = dx*dx + dy*dy;
#else
                const uchar* p2 = imgPtr + xmap[x+1]*3;
                const uchar* p0 = imgPtr + xmap[x-1]*3;

                dx0 = lut[p2[2]] - lut[p0[2]];
                dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
                mag0 = dx0*dx0 + dy0*dy0;

                dx = lut[p2[1]] - lut[p0[1]];
                dy = lut[nextPtr[x1+1]] - lut[prevPtr[x1+1]];
                mag = dx*dx + dy*dy;

                if( mag0 < mag )
                {
                    dx0 = dx;
                    dy0 = dy;
                    mag0 = mag;
                }

                dx = lut[p2[0]] - lut[p0[0]];
                dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
                mag = dx*dx + dy*dy;
 #endif
                if( mag0 < mag )
                {
                    dx0 = dx;
                    dy0 = dy;
                    mag0 = mag;
                }

                dbuf[x] = dx0;
                dbuf[x+width] = dy0;
            }
        }
#ifdef HAVE_IPP
        ippsCartToPolar_32f((const Ipp32f*)Dx.data, (const Ipp32f*)Dy.data, (Ipp32f*)Mag.data, pAngles, width);
        for( x = 0; x < width; x++ )
        {
           if(pAngles[x] < 0.f)
             pAngles[x] += (Ipp32f)(CV_PI*2.);
        }

        ippsNormalize_32f(pAngles, pAngles, width, 0.5f/angleScale, 1.f/angleScale);
        ippsFloor_32f(pAngles,(Ipp32f*)hidxs.data,width);
        ippsSub_32f_I((Ipp32f*)hidxs.data,pAngles,width);
        ippsMul_32f_I((Ipp32f*)Mag.data,pAngles,width);

        ippsSub_32f_I(pAngles,(Ipp32f*)Mag.data,width);
        ippsRealToCplx_32f((Ipp32f*)Mag.data,pAngles,(Ipp32fc*)gradPtr,width);
#else
//cartToPolar()函数是计算2个矩阵对应元素的幅度和角度，最后一个参数表示是否角度使用角度表示，这里false
//表示不用角度表示，即用弧度表示。如果只需计算2个矩阵对应元素的幅度图像，可以采用magnitute()函数。
//-pi/2<Angle<pi/2;
        cartToPolar( Dx, Dy, Mag, Angle, false );
#endif
        for( x = 0; x < width; x++ )
        {
#ifdef HAVE_IPP
            int hidx = (int)pHidxs[x];
#else
            float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f;
            int hidx = cvFloor(angle);
            angle -= hidx;
            gradPtr[x*2] = mag*(1.f - angle);
            gradPtr[x*2+1] = mag*angle;
#endif
            if( hidx < 0 )
                hidx += _nbins;
            else if( hidx >= _nbins )
                hidx -= _nbins;
            assert( (unsigned)hidx < (unsigned)_nbins );

            qanglePtr[x*2] = (uchar)hidx;
            hidx++;
            hidx &= hidx < _nbins ? -1 : 0;
            qanglePtr[x*2+1] = (uchar)hidx;
        }
    }
}


struct HOGCache
{
/*
结构体BlockData中有2个变量。1个BlockData结构体是对应的一个block数据。
histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子
的贡献那部分向量的起始位置；imgOffset为该block在滑动窗口图片中的坐标(当然是指左上角坐标)。
*/
    struct BlockData
    {
        BlockData() : histOfs(0), imgOffset() {}
        int histOfs;
        Point imgOffset;
    };

/*结构体PixData中有5个变量，1个PixData结构体是对应的block中1个像素点的数据。其中gradOfs表示
该点的梯度幅度在滑动窗口图片梯度幅度图中的位置坐标；qangleOfs表示该点的梯度角度在滑动窗口
图片梯度角度图中的位置坐标；histOfs[]表示该像素点对1个或2个或4个cell贡献的hog描述子向量的
起始位置坐标（比较抽象，需要看源码才懂）。histWeight[]表示该像素点对1个或2个或4个cell贡献
的权重。gradWeight表示该点本身由于处在block中位置的不同因而对梯度直方图贡献也不同，其权值
按照二维高斯分布(以block中心为二维高斯的中心)来决定。
*/
    struct PixData
    {
        size_t gradOfs, qangleOfs;
        int histOfs[4];
        float histWeights[4];
        float gradWeight;
    };

    HOGCache();
    HOGCache(const HOGDescriptor* descriptor,
        const Mat& img, Size paddingTL, Size paddingBR,
        bool useCache, Size cacheStride);
    virtual ~HOGCache() {};
    virtual void init(const HOGDescriptor* descriptor,
        const Mat& img, Size paddingTL, Size paddingBR,
        bool useCache, Size cacheStride);

    Size windowsInImage(Size imageSize, Size winStride) const;
    Rect getWindow(Size imageSize, Size winStride, int idx) const;

    const float* getBlock(Point pt, float* buf);
    virtual void normalizeBlockHistogram(float* histogram) const;

    vector<PixData> pixData;
    vector<BlockData> blockData;

    bool useCache;
    vector<int> ymaxCached;
    Size winSize, cacheStride;
    Size nblocks, ncells;
    int blockHistogramSize;
    int count1, count2, count4;
    Point imgoffset;
    Mat_<float> blockCache;
    Mat_<uchar> blockCacheFlags;

    Mat grad, qangle;
    const HOGDescriptor* descriptor;
};


HOGCache::HOGCache()
{
    useCache = false;
    blockHistogramSize = count1 = count2 = count4 = 0;
    descriptor = 0;
}

HOGCache::HOGCache(const HOGDescriptor* _descriptor,
        const Mat& _img, Size _paddingTL, Size _paddingBR,
        bool _useCache, Size _cacheStride)
{
    init(_descriptor, _img, _paddingTL, _paddingBR, _useCache, _cacheStride);
}

//完成对HOGCache的初始化工作，包括对梯度幅值、方向值的计算，对blockData及pixData的计算过程
void HOGCache::init(const HOGDescriptor* _descriptor,
        const Mat& _img, Size _paddingTL, Size _paddingBR,
        bool _useCache, Size _cacheStride)
{
    descriptor = _descriptor;
    cacheStride = _cacheStride;
    useCache = _useCache;

    descriptor->computeGradient(_img, grad, qangle, _paddingTL, _paddingBR);
    imgoffset = _paddingTL;

    winSize = descriptor->winSize;
    Size blockSize = descriptor->blockSize;
    Size blockStride = descriptor->blockStride;
    Size cellSize = descriptor->cellSize;
    int i, j, nbins = descriptor->nbins;
    int rawBlockSize = blockSize.width*blockSize.height;

    nblocks = Size((winSize.width - blockSize.width)/blockStride.width + 1,
                   (winSize.height - blockSize.height)/blockStride.height + 1);
    ncells = Size(blockSize.width/cellSize.width, blockSize.height/cellSize.height);
    blockHistogramSize = ncells.width*ncells.height*nbins;

    if( useCache )
    {
        Size cacheSize((grad.cols - blockSize.width)/cacheStride.width+1,
                       (winSize.height/cacheStride.height)+1);
        blockCache.create(cacheSize.height, cacheSize.width*blockHistogramSize);
        blockCacheFlags.create(cacheSize);
        size_t cacheRows = blockCache.rows;
        ymaxCached.resize(cacheRows);
        for(size_t ii = 0; ii < cacheRows; ii++ )
            ymaxCached[ii] = -1;
    }

    Mat_<float> weights(blockSize);
    float sigma = (float)descriptor->getWinSigma();
    float scale = 1.f/(sigma*sigma*2);

    for(i = 0; i < blockSize.height; i++)
        for(j = 0; j < blockSize.width; j++)
        {
            float di = i - blockSize.height*0.5f;
            float dj = j - blockSize.width*0.5f;
            weights(i,j) = std::exp(-(di*di + dj*dj)*scale);
        }

    blockData.resize(nblocks.width*nblocks.height);
    //这里*3不是表示三通道而是划分为三类像素区分存放。这里申请的内存空间远远超过实际所需，但为了计算方便
    //需先如此申请，后再进行压缩。
    pixData.resize(rawBlockSize*3);

    // Initialize 2 lookup tables, pixData & blockData.
    // Here is why:
    //
    // The detection algorithm runs in 4 nested loops (at each pyramid layer):
    //  loop over the windows within the input image
    //    loop over the blocks within each window
    //      loop over the cells within each block
    //        loop over the pixels in each cell
    //
    // As each of the loops runs over a 2-dimensional array,
    // we could get 8(!) nested loops in total, which is very-very slow.
    //
    // To speed the things up, we do the following:
    //   1. loop over windows is unrolled in the HOGDescriptor::{compute|detect} methods;
    //         inside we compute the current search window using getWindow() method.
    //         Yes, it involves some overhead (function call + couple of divisions),
    //         but it's tiny in fact.
    //   2. loop over the blocks is also unrolled. Inside we use pre-computed blockData[j]
    //         to set up gradient and histogram pointers.
    //   3. loops over cells and pixels in each cell are merged
    //       (since there is no overlap between cells, each pixel in the block is processed once)
    //      and also unrolled. Inside we use PixData[k] to access the gradient values and
    //      update the histogram
    //
    count1 = count2 = count4 = 0;
    for( j = 0; j < blockSize.width; j++ )
        for( i = 0; i < blockSize.height; i++ )
        {
            PixData* data = 0;
            float cellX = (j+0.5f)/cellSize.width - 0.5f;
            float cellY = (i+0.5f)/cellSize.height - 0.5f;
            int icellX0 = cvFloor(cellX);
            int icellY0 = cvFloor(cellY);
            int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1;
            cellX -= icellX0;
            cellY -= icellY0;

            if( (unsigned)icellX0 < (unsigned)ncells.width &&
                (unsigned)icellX1 < (unsigned)ncells.width )
            {
                if( (unsigned)icellY0 < (unsigned)ncells.height &&
                    (unsigned)icellY1 < (unsigned)ncells.height )
                {

                    //即一个区域内进行直方图统计，最多包含四个cell的不同直方图，histOfs[i]表示每个区域中的
                    //第i个直方图在整个block直方图存储空间中的距离原始位置的偏置。
                    //当前像素对哪个直方图做出贡献
                    data = &pixData[rawBlockSize*2 + (count4++)];
                    data->histOfs[0] = (icellX0*ncells.height + icellY0)*nbins;
                    data->histWeights[0] = (1.f - cellX)*(1.f - cellY);
                    data->histOfs[1] = (icellX1*ncells.height + icellY0)*nbins;
                    data->histWeights[1] = cellX*(1.f - cellY);
                    data->histOfs[2] = (icellX0*ncells.height + icellY1)*nbins;
                    data->histWeights[2] = (1.f - cellX)*cellY;
                    data->histOfs[3] = (icellX1*ncells.height + icellY1)*nbins;
                    data->histWeights[3] = cellX*cellY;
                }
                else
                {
                    data = &pixData[rawBlockSize + (count2++)];
                    if( (unsigned)icellY0 < (unsigned)ncells.height )
                    {
                        icellY1 = icellY0;
                        cellY = 1.f - cellY;
                    }
                    data->histOfs[0] = (icellX0*ncells.height + icellY1)*nbins;
                    data->histWeights[0] = (1.f - cellX)*cellY;
                    data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins;
                    data->histWeights[1] = cellX*cellY;
                    data->histOfs[2] = data->histOfs[3] = 0;
                    data->histWeights[2] = data->histWeights[3] = 0;
                }
            }
            else
            {
                if( (unsigned)icellX0 < (unsigned)ncells.width )
                {
                    icellX1 = icellX0;
                    cellX = 1.f - cellX;
                }

                if( (unsigned)icellY0 < (unsigned)ncells.height &&
                    (unsigned)icellY1 < (unsigned)ncells.height )
                {
                    data = &pixData[rawBlockSize + (count2++)];
                    data->histOfs[0] = (icellX1*ncells.height + icellY0)*nbins;
                    data->histWeights[0] = cellX*(1.f - cellY);
                    data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins;
                    data->histWeights[1] = cellX*cellY;
                    data->histOfs[2] = data->histOfs[3] = 0;
                    data->histWeights[2] = data->histWeights[3] = 0;
                }
                else
                {
                    data = &pixData[count1++];
                    if( (unsigned)icellY0 < (unsigned)ncells.height )
                    {
                        icellY1 = icellY0;
                        cellY = 1.f - cellY;
                    }
                    data->histOfs[0] = (icellX1*ncells.height + icellY1)*nbins;
                    data->histWeights[0] = cellX*cellY;
                    data->histOfs[1] = data->histOfs[2] = data->histOfs[3] = 0;
                    data->histWeights[1] = data->histWeights[2] = data->histWeights[3] = 0;
                }
            }
            //grad记录每个像素所属bin对应的权重的矩阵，为幅值乘以权重，这个权重包括高斯权重、三次差值的权重。
            //qangle记录每个像素角度所属的bin序号的矩阵，均为2通道，为了线性
            data->gradOfs = (grad.cols*i + j)*2;
            data->qangleOfs = (qangle.cols*i + j)*2;
            data->gradWeight = weights(i,j);
        }

    assert( count1 + count2 + count4 == rawBlockSize );
    // defragment pixData
    //重新整理数据使其连贯存储，节省了2/3的内存空间
    for( j = 0; j < count2; j++ )
        pixData[j + count1] = pixData[j + rawBlockSize];
    for( j = 0; j < count4; j++ )
        pixData[j + count1 + count2] = pixData[j + rawBlockSize*2];
    count2 += count1;
    count4 += count2;

 //上面是初始化pixData,下面开始初始化blockData
    // initialize blockData
    for( j = 0; j < nblocks.width; j++ )
        for( i = 0; i < nblocks.height; i++ )
        {
  //histOfs表示该block对检测窗口贡献的hog描述变量起点在整个变量中的坐标
            BlockData& data = blockData[j*nblocks.height + i];
            data.histOfs = (j*nblocks.height + i)*blockHistogramSize;
  //imgOffset表示该block的左上角在检测窗口中的坐标
            data.imgOffset = Point(j*blockStride.width,i*blockStride.height);
        }
}

//pt为该block左上角在滑动窗口中的坐标，buf为指向检测窗口中blockData的指针
//函数返回一个指向block描述子的指针。
const float* HOGCache::getBlock(Point pt, float* buf)
{
    float* blockHist = buf;
    assert(descriptor != 0);

    Size blockSize = descriptor->blockSize;
    pt += imgoffset;

    CV_Assert( (unsigned)pt.x <= (unsigned)(grad.cols - blockSize.width) &&
               (unsigned)pt.y <= (unsigned)(grad.rows - blockSize.height) );

    if( useCache )
    {
        CV_Assert( pt.x % cacheStride.width == 0 &&
                   pt.y % cacheStride.height == 0 );
       //cacheIdx表示的是block个数的坐标
        Point cacheIdx(pt.x/cacheStride.width,
                      (pt.y/cacheStride.height) % blockCache.rows);
        //ymaxCached的长度为一个检测窗口垂直方向上容纳的block个数
        if( pt.y != ymaxCached[cacheIdx.y] )
        {
            Mat_<uchar> cacheRow = blockCacheFlags.row(cacheIdx.y);
            cacheRow = (uchar)0;
            ymaxCached[cacheIdx.y] = pt.y;
        }

        blockHist = &blockCache[cacheIdx.y][cacheIdx.x*blockHistogramSize];
        uchar& computedFlag = blockCacheFlags(cacheIdx.y, cacheIdx.x);
        if( computedFlag != 0 )
            return blockHist;
        computedFlag = (uchar)1; // set it at once, before actual computing
    }

    int k, C1 = count1, C2 = count2, C4 = count4;
    const float* gradPtr = (const float*)(grad.data + grad.step*pt.y) + pt.x*2;
    const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2;

    CV_Assert( blockHist != 0 );
#ifdef HAVE_IPP
    ippsZero_32f(blockHist,blockHistogramSize);
#else
    for( k = 0; k < blockHistogramSize; k++ )
        blockHist[k] = 0.f;
#endif

    const PixData* _pixData = &pixData[0];

    for( k = 0; k < C1; k++ )
    {
        const PixData& pk = _pixData[k];
        //一个像素包含：gradOfs,qangleOfs,grandWeight,histOfs[4],histWeight[4]这5个属性
        const float* a = gradPtr + pk.gradOfs;
        float w = pk.gradWeight*pk.histWeights[0];
        const uchar* h = qanglePtr + pk.qangleOfs;
        int h0 = h[0], h1 = h[1];

        float* hist = blockHist + pk.histOfs[0];
        float t0 = hist[h0] + a[0]*w;
        float t1 = hist[h1] + a[1]*w;
        hist[h0] = t0; hist[h1] = t1;
    }

    for( ; k < C2; k++ )
    {
        const PixData& pk = _pixData[k];
        const float* a = gradPtr + pk.gradOfs;
        float w, t0, t1, a0 = a[0], a1 = a[1];
        const uchar* h = qanglePtr + pk.qangleOfs;
        int h0 = h[0], h1 = h[1];

        float* hist = blockHist + pk.histOfs[0];
        w = pk.gradWeight*pk.histWeights[0];
        t0 = hist[h0] + a0*w;
        t1 = hist[h1] + a1*w;
        hist[h0] = t0; hist[h1] = t1;

        hist = blockHist + pk.histOfs[1];
        w = pk.gradWeight*pk.histWeights[1];
        t0 = hist[h0] + a0*w;
        t1 = hist[h1] + a1*w;
        hist[h0] = t0; hist[h1] = t1;
    }

    for( ; k < C4; k++ )
    {
        const PixData& pk = _pixData[k];
        const float* a = gradPtr + pk.gradOfs;
        float w, t0, t1, a0 = a[0], a1 = a[1];
        const uchar* h = qanglePtr + pk.qangleOfs;
        int h0 = h[0], h1 = h[1];

        float* hist = blockHist + pk.histOfs[0];
        w = pk.gradWeight*pk.histWeights[0];
        t0 = hist[h0] + a0*w;
        t1 = hist[h1] + a1*w;
        hist[h0] = t0; hist[h1] = t1;

        hist = blockHist + pk.histOfs[1];
        w = pk.gradWeight*pk.histWeights[1];
        t0 = hist[h0] + a0*w;
        t1 = hist[h1] + a1*w;
        hist[h0] = t0; hist[h1] = t1;

        hist = blockHist + pk.histOfs[2];
        w = pk.gradWeight*pk.histWeights[2];
        t0 = hist[h0] + a0*w;
        t1 = hist[h1] + a1*w;
        hist[h0] = t0; hist[h1] = t1;

        hist = blockHist + pk.histOfs[3];
        w = pk.gradWeight*pk.histWeights[3];
        t0 = hist[h0] + a0*w;
        t1 = hist[h1] + a1*w;
        hist[h0] = t0; hist[h1] = t1;
    }

    normalizeBlockHistogram(blockHist);

    return blockHist;
}


void HOGCache::normalizeBlockHistogram(float* _hist) const
{
    float* hist = &_hist[0];
#ifdef HAVE_IPP
    size_t sz = blockHistogramSize;
#else
    size_t i, sz = blockHistogramSize;
#endif

    float sum = 0;
#ifdef HAVE_IPP
    ippsDotProd_32f(hist,hist,sz,&sum);
#else
    for( i = 0; i < sz; i++ )
        sum += hist[i]*hist[i];
#endif

    float scale = 1.f/(std::sqrt(sum)+sz*0.1f), thresh = (float)descriptor->L2HysThreshold;
#ifdef HAVE_IPP
    ippsMulC_32f_I(scale,hist,sz);
    ippsThreshold_32f_I( hist, sz, thresh, ippCmpGreater );
    ippsDotProd_32f(hist,hist,sz,&sum);
#else
    for( i = 0, sum = 0; i < sz; i++ )
    {
        hist[i] = std::min(hist[i]*scale, thresh);
        sum += hist[i]*hist[i];
    }
#endif

    scale = 1.f/(std::sqrt(sum)+1e-3f);
#ifdef HAVE_IPP
    ippsMulC_32f_I(scale,hist,sz);
#else
    for( i = 0; i < sz; i++ )
        hist[i] *= scale;
#endif
}

//返回测试图片中水平方向和垂直方向共有多少个检测窗口
Size HOGCache::windowsInImage(Size imageSize, Size winStride) const
{
    return Size((imageSize.width - winSize.width)/winStride.width + 1,
                (imageSize.height - winSize.height)/winStride.height + 1);
}

//给定图片的大小，已经检测窗口滑动的大小和测试图片中的检测窗口的索引，得到该索引处检测窗口的
//尺寸，包括坐标信息。
Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const
{
    int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1;
    int y = idx / nwindowsX;
    int x = idx - nwindowsX*y;
    return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );
}

//完成对整幅图像中所有扫描窗口，或指定扫描窗口的特征向量的计算。首先初始化HOGCache，
//之后遍历所有扫描窗口，在每个扫描窗口内遍历所有block并通过调用getBlock函数进行特征
//向量的快速计算。
void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
                            Size winStride, Size padding,
                            const vector<Point>& locations) const
{
    if( winStride == Size() )
        winStride = cellSize;
    Size cacheStride(gcd(winStride.width, blockStride.width),
                     gcd(winStride.height, blockStride.height));
    size_t nwindows = locations.size();
    padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
    padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
    Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);

    HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

    if( !nwindows )
        nwindows = cache.windowsInImage(paddedImgSize, winStride).area();

    const HOGCache::BlockData* blockData = &cache.blockData[0];

    int nblocks = cache.nblocks.area();
    int blockHistogramSize = cache.blockHistogramSize;
    size_t dsize = getDescriptorSize();
    descriptors.resize(dsize*nwindows);

    for( size_t i = 0; i < nwindows; i++ )
    {
        float* descriptor = &descriptors[i*dsize];

        Point pt0;
        if( !locations.empty() )
        {
            pt0 = locations[i];
            if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
                pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
                continue;
        }
        else
        {
            pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
            CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
        }

        for( int j = 0; j < nblocks; j++ )
        {
            const HOGCache::BlockData& bj = blockData[j];
            Point pt = pt0 + bj.imgOffset;

            float* dst = descriptor + bj.histOfs;
            const float* src = cache.getBlock(pt, dst);
            if( src != dst )
#ifdef HAVE_IPP
               ippsCopy_32f(src,dst,blockHistogramSize);
#else
                for( int k = 0; k < blockHistogramSize; k++ )
                    dst[k] = src[k];
#endif
        }
    }
}

//过程与compute()基本类似，不同在于，在计算机扫描窗口特征向量过程中利用svmCec进行
//计算，并最终判定当前扫描窗口内是否有人。
void HOGDescriptor::detect(const Mat& img,
    vector<Point>& hits, vector<double>& weights, double hitThreshold,
    Size winStride, Size padding, const vector<Point>& locations) const
{
    hits.clear();
    if( svmDetector.empty() )
        return;

    if( winStride == Size() )
        winStride = cellSize;
    Size cacheStride(gcd(winStride.width, blockStride.width),
                     gcd(winStride.height, blockStride.height));
    size_t nwindows = locations.size();
    padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
    padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
    Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
//调用detect函数的内部：初始化实例化一个HOGCache（完成单幅图像的梯度幅度图及梯度方向图的计算，
//对blockData,pixData进行初始化工作）；对指定扫描窗口或遍历所有扫描窗口(取决于locations是否为空)
//计算扫描窗口对应特征向量；将计算得到的特征向量与svmVec做乘累加；利用阈值对结果进行判定，如果
//超过阈值，则说明当前窗口内含有行人，将该扫描窗口的相关信息进行保留；继续进行下一个窗口扫描，
//直到结束。
    HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);

    if( !nwindows )
        nwindows = cache.windowsInImage(paddedImgSize, winStride).area();

    const HOGCache::BlockData* blockData = &cache.blockData[0];

    int nblocks = cache.nblocks.area();
    int blockHistogramSize = cache.blockHistogramSize;
    size_t dsize = getDescriptorSize();

    double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
    vector<float> blockHist(blockHistogramSize);

    for( size_t i = 0; i < nwindows; i++ )
    {
        Point pt0;
        if( !locations.empty() )
        {
            pt0 = locations[i];
            if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
                pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
                continue;
        }
        else
        {
            pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
            CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
        }
        double s = rho;
        const float* svmVec = &svmDetector[0];
#ifdef HAVE_IPP
        int j;
#else
        int j, k;
#endif
        for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
        {
            const HOGCache::BlockData& bj = blockData[j];
            Point pt = pt0 + bj.imgOffset;

            const float* vec = cache.getBlock(pt, &blockHist[0]);
#ifdef HAVE_IPP
            Ipp32f partSum;
            ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum);
            s += (double)partSum;
#else
            for( k = 0; k <= blockHistogramSize - 4; k += 4 )
                s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
                    vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
            for( ; k < blockHistogramSize; k++ )
                s += vec[k]*svmVec[k];
#endif
        }
        if( s >= hitThreshold )
        {
            hits.push_back(pt0);
            weights.push_back(s);
        }
    }
}

void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, double hitThreshold,
                           Size winStride, Size padding, const vector<Point>& locations) const
{
    vector<double> weightsV;
    detect(img, hits, weightsV, hitThreshold, winStride, padding, locations);
}

class HOGInvoker : public ParallelLoopBody
{
public:
    HOGInvoker( const HOGDescriptor* _hog, const Mat& _img,
                double _hitThreshold, Size _winStride, Size _padding,
                const double* _levelScale, std::vector<Rect> * _vec, Mutex* _mtx,
                std::vector<double>* _weights=0, std::vector<double>* _scales=0 )
    {
        hog = _hog;
        img = _img;
        hitThreshold = _hitThreshold;
        winStride = _winStride;
        padding = _padding;
        levelScale = _levelScale;
        vec = _vec;
        weights = _weights;
        scales = _scales;
        mtx = _mtx;
    }

    void operator()( const Range& range ) const
    {
        int i, i1 = range.start, i2 = range.end;
        double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows);
        Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale));
        Mat smallerImgBuf(maxSz, img.type());
        vector<Point> locations;
        vector<double> hitsWeights;

        for( i = i1; i < i2; i++ )
        {
            double scale = levelScale[i];
            //缩小图像尺寸 ，方便detectMultiScale
            Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));
            Mat smallerImg(sz, img.type(), smallerImgBuf.data);
            if( sz == img.size() )
                smallerImg = Mat(sz, img.type(), img.data, img.step);
            else
                resize(img, smallerImg, sz);
            hog->detect(smallerImg, locations, hitsWeights, hitThreshold, winStride, padding);
            Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));

            mtx->lock();
            for( size_t j = 0; j < locations.size(); j++ )
            {
                vec->push_back(Rect(cvRound(locations[j].x*scale),
                                    cvRound(locations[j].y*scale),
                                    scaledWinSize.width, scaledWinSize.height));
                if (scales)
                {
                    scales->push_back(scale);
                }
            }
            mtx->unlock();

            if (weights && (!hitsWeights.empty()))
            {
                mtx->lock();
                for (size_t j = 0; j < locations.size(); j++)
                {
                    weights->push_back(hitsWeights[j]);
                }
                mtx->unlock();
            }
        }
    }

    const HOGDescriptor* hog;
    Mat img;
    double hitThreshold;
    Size winStride;
    Size padding;
    const double* levelScale;
    std::vector<Rect>* vec;
    std::vector<double>* weights;
    std::vector<double>* scales;
    Mutex* mtx;
};


void HOGDescriptor::detectMultiScale(
    const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights,
    double hitThreshold, Size winStride, Size padding,
    double scale0, double finalThreshold, bool useMeanshiftGrouping) const
{
    double scale = 1.;
    int levels = 0;

    vector<double> levelScale;
    for( levels = 0; levels < nlevels; levels++ )
    {
        levelScale.push_back(scale);
        if( cvRound(img.cols/scale) < winSize.width ||
            cvRound(img.rows/scale) < winSize.height ||
            scale0 <= 1 )
            break;
        scale *= scale0;
    }
    levels = std::max(levels, 1);
    levelScale.resize(levels);

    std::vector<Rect> allCandidates;
    std::vector<double> tempScales;
    std::vector<double> tempWeights;
    std::vector<double> foundScales;
    Mutex mtx;

    parallel_for_(Range(0, (int)levelScale.size()),
                 HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &mtx, &tempWeights, &tempScales));

    std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales));
    foundLocations.clear();
    std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations));
    foundWeights.clear();
    std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights));

    if ( useMeanshiftGrouping )
    {
        groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize);
    }
    else
    {
        groupRectangles(foundLocations, foundWeights, (int)finalThreshold, 0.2);
    }
}

void HOGDescriptor::detectMultiScale(const Mat& img, vector<Rect>& foundLocations,
                                     double hitThreshold, Size winStride, Size padding,
                                     double scale0, double finalThreshold, bool useMeanshiftGrouping) const
{
    vector<double> foundWeights;
    detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride,
                     padding, scale0, finalThreshold, useMeanshiftGrouping);
}

typedef RTTIImpl<HOGDescriptor> HOGRTTI;

CvType hog_type( CV_TYPE_NAME_HOG_DESCRIPTOR, HOGRTTI::isInstance,
                 HOGRTTI::release, HOGRTTI::read, HOGRTTI::write, HOGRTTI::clone);



class HOGConfInvoker : public ParallelLoopBody
{
public:
       HOGConfInvoker( const HOGDescriptor* _hog, const Mat& _img,
                               double _hitThreshold, Size _padding,
                               std::vector<DetectionROI>* locs,
                               std::vector<Rect>* _vec, Mutex* _mtx )
       {
               hog = _hog;
               img = _img;
               hitThreshold = _hitThreshold;
               padding = _padding;
               locations = locs;
               vec = _vec;
               mtx = _mtx;
       }

       void operator()( const Range& range ) const
       {
               int i, i1 = range.start, i2 = range.end;

               Size maxSz(cvCeil(img.cols/(*locations)[0].scale), cvCeil(img.rows/(*locations)[0].scale));
               Mat smallerImgBuf(maxSz, img.type());
               vector<Point> dets;

               for( i = i1; i < i2; i++ )
               {
                       double scale = (*locations)[i].scale;

                       Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
                       Mat smallerImg(sz, img.type(), smallerImgBuf.data);

                       if( sz == img.size() )
                               smallerImg = Mat(sz, img.type(), img.data, img.step);
                       else
                               resize(img, smallerImg, sz);

                       hog->detectROI(smallerImg, (*locations)[i].locations, dets, (*locations)[i].confidences, hitThreshold, Size(), padding);
                       Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale));
                       mtx->lock();
                       for( size_t j = 0; j < dets.size(); j++ )
                       {
                               vec->push_back(Rect(cvRound(dets[j].x*scale),
                                                                       cvRound(dets[j].y*scale),
                                                                       scaledWinSize.width, scaledWinSize.height));
                       }
                       mtx->unlock();
               }
       }

       const HOGDescriptor* hog;
       Mat img;
       double hitThreshold;
       std::vector<DetectionROI>* locations;
       Size padding;
       std::vector<Rect>* vec;
       Mutex* mtx;
};

void HOGDescriptor::detectROI(const cv::Mat& img, const vector<cv::Point> &locations,
                                       CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences,
                                       double hitThreshold, cv::Size winStride,
                                       cv::Size padding) const
{
   foundLocations.clear();

   confidences.clear();

   if( svmDetector.empty() )
       return;

   if( locations.empty() )
       return;

   if( winStride == Size() )
       winStride = cellSize;

   Size cacheStride(gcd(winStride.width, blockStride.width),
                                    gcd(winStride.height, blockStride.height));

   size_t nwindows = locations.size();
   padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
   padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
   Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);

   // HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
   HOGCache cache(this, img, padding, padding, true, cacheStride);
   if( !nwindows )
           nwindows = cache.windowsInImage(paddedImgSize, winStride).area();

   const HOGCache::BlockData* blockData = &cache.blockData[0];

   int nblocks = cache.nblocks.area();
   int blockHistogramSize = cache.blockHistogramSize;
   size_t dsize = getDescriptorSize();

   double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
   vector<float> blockHist(blockHistogramSize);

   for( size_t i = 0; i < nwindows; i++ )
   {
           Point pt0;
           pt0 = locations[i];
           if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
                   pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
           {
               // out of image
               confidences.push_back(-10.0);
               continue;
           }

           double s = rho;
           const float* svmVec = &svmDetector[0];
           int j, k;

           for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
           {
                   const HOGCache::BlockData& bj = blockData[j];
                   Point pt = pt0 + bj.imgOffset;
                   // need to devide this into 4 parts!
                   const float* vec = cache.getBlock(pt, &blockHist[0]);
                   for( k = 0; k <= blockHistogramSize - 4; k += 4 )
                           s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
                                   vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
                   for( ; k < blockHistogramSize; k++ )
                           s += vec[k]*svmVec[k];
           }
           // cv::waitKey();
           confidences.push_back(s);

           if( s >= hitThreshold )
                   foundLocations.push_back(pt0);
   }
 }

void HOGDescriptor::detectMultiScaleROI(const cv::Mat& img,
                                                           CV_OUT std::vector<cv::Rect>& foundLocations,
                                                           std::vector<DetectionROI>& locations,
                                                           double hitThreshold,
                                                           int groupThreshold) const
{
   std::vector<Rect> allCandidates;
   Mutex mtx;

   parallel_for_(Range(0, (int)locations.size()),
                        HOGConfInvoker(this, img, hitThreshold, Size(8, 8), &locations, &allCandidates, &mtx));

   foundLocations.resize(allCandidates.size());
   std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin());
   cv::groupRectangles(foundLocations, groupThreshold, 0.2);
}

void HOGDescriptor::readALTModel(std::string modelfile)
{
   // read model from SVMlight format..
   FILE *modelfl;
   if ((modelfl = fopen(modelfile.c_str(), "rb")) == NULL)
   {
       std::string eerr("file not exist");
       std::string efile(__FILE__);
       std::string efunc(__FUNCTION__);
       throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
   }
   char version_buffer[10];
   if (!fread (&version_buffer,sizeof(char),10,modelfl))
   {
       std::string eerr("version?");
       std::string efile(__FILE__);
       std::string efunc(__FUNCTION__);
       throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
   }
   if(strcmp(version_buffer,"V6.01")) {
       std::string eerr("version doesnot match");
       std::string efile(__FILE__);
       std::string efunc(__FUNCTION__);
       throw Exception(CV_StsError, eerr, efile, efunc, __LINE__);
   }
   /* read version number */
   int version = 0;
   if (!fread (&version,sizeof(int),1,modelfl))
   { throw Exception(); }
   if (version < 200)
   {
       std::string eerr("version doesnot match");
       std::string efile(__FILE__);
       std::string efunc(__FUNCTION__);
       throw Exception();
   }
   int kernel_type;
   size_t nread;
   nread=fread(&(kernel_type),sizeof(int),1,modelfl);

   {// ignore these
       int poly_degree;
       nread=fread(&(poly_degree),sizeof(int),1,modelfl);

       double rbf_gamma;
       nread=fread(&(rbf_gamma),sizeof(double), 1, modelfl);
       double coef_lin;
       nread=fread(&(coef_lin),sizeof(double),1,modelfl);
       double coef_const;
       nread=fread(&(coef_const),sizeof(double),1,modelfl);
       int l;
       nread=fread(&l,sizeof(int),1,modelfl);
       char* custom = new char[l];
       nread=fread(custom,sizeof(char),l,modelfl);
       delete[] custom;
   }
   int totwords;
   nread=fread(&(totwords),sizeof(int),1,modelfl);
   {// ignore these
       int totdoc;
       nread=fread(&(totdoc),sizeof(int),1,modelfl);
       int sv_num;
       nread=fread(&(sv_num), sizeof(int),1,modelfl);
   }

   double linearbias;
   nread=fread(&linearbias, sizeof(double), 1, modelfl);

   std::vector<float> detector;
   detector.clear();
   if(kernel_type == 0) { /* linear kernel */
       /* save linear wts also */
       double *linearwt = new double[totwords+1];
       int length = totwords;
       nread = fread(linearwt, sizeof(double), totwords + 1, modelfl);
       if(nread != static_cast<size_t>(length) + 1) {
           delete [] linearwt;
           throw Exception();
       }

       for(int i = 0; i < length; i++)
           detector.push_back((float)linearwt[i]);

       detector.push_back((float)-linearbias);
       setSVMDetector(detector);
       delete [] linearwt;
   } else {
       throw Exception();
   }
   fclose(modelfl);
}

void HOGDescriptor::groupRectangles(vector<cv::Rect>& rectList, vector<double>& weights, int groupThreshold, double eps) const
{
    if( groupThreshold <= 0 || rectList.empty() )
    {
        return;
    }

    CV_Assert(rectList.size() == weights.size());

    vector<int> labels;
    int nclasses = partition(rectList, labels, SimilarRects(eps));

    vector<cv::Rect_<double> > rrects(nclasses);
    vector<int> numInClass(nclasses, 0);
    vector<double> foundWeights(nclasses, DBL_MIN);
    vector<double> totalFactorsPerClass(nclasses, 1);
    int i, j, nlabels = (int)labels.size();

    for( i = 0; i < nlabels; i++ )
    {
        int cls = labels[i];
        rrects[cls].x += rectList[i].x;
        rrects[cls].y += rectList[i].y;
        rrects[cls].width += rectList[i].width;
        rrects[cls].height += rectList[i].height;
        foundWeights[cls] = max(foundWeights[cls], weights[i]);
        numInClass[cls]++;
    }

    for( i = 0; i < nclasses; i++ )
    {
        // find the average of all ROI in the cluster
        cv::Rect_<double> r = rrects[i];
        double s = 1.0/numInClass[i];
        rrects[i] = cv::Rect_<double>(cv::saturate_cast<double>(r.x*s),
            cv::saturate_cast<double>(r.y*s),
            cv::saturate_cast<double>(r.width*s),
            cv::saturate_cast<double>(r.height*s));
    }

    rectList.clear();
    weights.clear();

    for( i = 0; i < nclasses; i++ )
    {
        cv::Rect r1 = rrects[i];
        int n1 = numInClass[i];
        double w1 = foundWeights[i];
        if( n1 <= groupThreshold )
            continue;
        // filter out small rectangles inside large rectangles
        for( j = 0; j < nclasses; j++ )
        {
            int n2 = numInClass[j];

            if( j == i || n2 <= groupThreshold )
                continue;

            cv::Rect r2 = rrects[j];

            int dx = cv::saturate_cast<int>( r2.width * eps );
            int dy = cv::saturate_cast<int>( r2.height * eps );

            if( r1.x >= r2.x - dx &&
                r1.y >= r2.y - dy &&
                r1.x + r1.width <= r2.x + r2.width + dx &&
                r1.y + r1.height <= r2.y + r2.height + dy &&
                (n2 > std::max(3, n1) || n1 < 3) )
                break;
        }

        if( j == nclasses )
        {
            rectList.push_back(r1);
            weights.push_back(w1);
        }
    }
}
}

//参考文献：
//http://blog.youkuaiyun.com/pp5576155/article/details/7029699#1536434-tsina-1-47472-66a1f5d8f89e9ad52626f6f40fdeadaa
//http://www.cnblogs.com/tornadomeet/archive/2012/08/15/2640754.html
//http://blog.youkuaiyun.com/antter0510/article/details/20565045
转载于:https://my.oschina.net/u/1035026/blog/222840