adaboost训练——弱分类器训练的opencv源码详解（2）_ada boosting对于弱分类器怎么处理-优快云博客

转自：http://blog.youkuaiyun.com/lanxuecc/article/details/52689008

接着上一个博客http://blog.youkuaiyun.com/lanxuecc/article/details/52688605在弱分类器训练的主体函数cvCreateCARTClassifier中我们看到主要是调用cvCreateMTStumpClassifier函数来训练得到弱分类器的结点，下面注释下这个函数

/*
 * cvCreateMTStumpClassifier
 *
 * Multithreaded stump classifier constructor
 * Includes huge train data support through callback function
 */
CV_BOOST_IMPL
CvClassifier* cvCreateMTStumpClassifier( CvMat* trainData,              //训练样本HAAR特征值矩阵
                                         int flags,                     // 1.按行排列，0.按列排列 
                                         CvMat* trainClasses,           // 样本类别{-1,1}
                                         CvMat* /*typeMask*/,           // 为了便于回调函数统一格式
                                         CvMat* missedMeasurementsMask, // 未知，很少用到
                                         CvMat* compIdx,                // 特征序列(必须为NULL)(行向量)
                                         CvMat* sampleIdx,              // 实际训练样本序列(行向量)
                                         CvMat* weights,                // 实际训练样本样本权重(行向量)
                                         CvClassifierTrainParams* trainParams ) //这个结构体中指明一些参数和数据，比如分类误差计算方法，特征总数以及多线程运行时每个线程处理的特征数
{
    CvStumpClassifier* stump = NULL;        // 弱分类器（桩）
    int m = 0;                              // 样本总数
    int n = 0;                              // 所有特征个数   
    uchar* data = NULL;                     // trainData数据指针
    size_t cstep   = 0;                     // trainData一行字节数
    size_t sstep   = 0;                     // trainData元素字节数
    int    datan   = 0;                     // 预计算特征个数
    uchar* ydata = NULL;                    // trainClasses数据指针
    size_t ystep = 0;                       // trainClasses元素字节数
    uchar* idxdata = NULL;                  // sampleIdx数据指针
    size_t idxstep = 0;                     // sampleIdx单个元素字节数
    int    l = 0;                           // 实际训练样本个数    
    uchar* wdata = NULL;                    // weights数据指针
    size_t wstep = 0;                       // weights元素字节数

    /*sortedIdx为事先计算好的特征值-样本矩阵，包含有预计算的所有HAAR特征对应于所有样本的特征值(按大小排列) */
    uchar* sorteddata = NULL;               // sortedIdx数据指针
    int    sortedtype    = 0;               // sortedIdx元素类型
    size_t sortedcstep   = 0;               // sortedIdx一行字节数
    size_t sortedsstep   = 0;               // sortedIdx元素字节数
    int    sortedn       = 0;               // sortedIdx行数(预计算特征个数)
    int    sortedm       = 0;               // sortedIdx列数(实际训练样本个数)

    char* filter = NULL;                    // 样本存在标示(行向量)，如果样本存在则为1，否则为0
    int i = 0;

    int compidx = 0;                        // 每组特征的起始序号
    int stumperror;                         // 计算阈值方法:1.misclass 2.gini 3.entropy 4.least sum of squares
    int portion;                            // 每组特征个数，对所有特征n进行分组处理，每组portion个

    /* private variables */
    CvMat mat;                              // 补充特征-样本矩阵
    CvValArray va;
    float lerror;                           // 阈值左侧误差
    float rerror;                           // 阈值右侧误差
    float left;                             // 置信度（左分支）
    float right;                            // 置信度（右分支）
    float threshold;                        // 阈值
    int optcompidx;                         // 最优特征

    float sumw;                             
    float sumwy;
    float sumwyy;

    /*临时变量，循环用*/
    int t_compidx;
    int t_n;

    int ti;
    int tj;
    int tk;

    uchar* t_data;                          // 指向data
    size_t t_cstep;                         // cstep
    size_t t_sstep;                         // sstep

    size_t matcstep;                        // mat一行字节数
    size_t matsstep;                        // mat元素字节数

    int* t_idx;                             // 样本序列
    /* end private variables */

    CV_Assert( trainParams != NULL );
    CV_Assert( trainClasses != NULL );
    CV_Assert( CV_MAT_TYPE( trainClasses->type ) == CV_32FC1 );
    CV_Assert( missedMeasurementsMask == NULL );
    CV_Assert( compIdx == NULL );

    // 计算阈值方法:1.misclass 2.gini 3.entropy 4.least sum of squares
    stumperror = (int) ((CvMTStumpTrainParams*) trainParams)->error;

    //样本类别
    ydata = trainClasses->data.ptr;
    if( trainClasses->rows == 1 )
    {
        m = trainClasses->cols;
        ystep = CV_ELEM_SIZE( trainClasses->type );
    }
    else
    {
        m = trainClasses->rows;
        ystep = trainClasses->step;
    }

    //样本权重
    wdata = weights->data.ptr;
    if( weights->rows == 1 )
    {
        CV_Assert( weights->cols == m );
        wstep = CV_ELEM_SIZE( weights->type );
    }
    else
    {
        CV_Assert( weights->rows == m );
        wstep = weights->step;
    }

    //事先计算好的排序好的所有样本的所有特征值排序好的序号
    //sortedIdx为空，trainData为行向量（1*m）；sortedIdx不为空，trainData为矩阵(m*datan)；
    if( ((CvMTStumpTrainParams*) trainParams)->sortedIdx != NULL )
    {
        sortedtype =
            CV_MAT_TYPE( ((CvMTStumpTrainParams*) trainParams)->sortedIdx->type );
        assert( sortedtype == CV_16SC1 || sortedtype == CV_32SC1
                || sortedtype == CV_32FC1 );
        sorteddata = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->data.ptr;
        sortedsstep = CV_ELEM_SIZE( sortedtype );
        sortedcstep = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->step;
        sortedn = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->rows;
        sortedm = ((CvMTStumpTrainParams*) trainParams)->sortedIdx->cols;
    }

    //事先计算好的排序好的所有样本的所有特征值
    if( trainData == NULL )                         //为空的情况没有遇到
    {
        assert( ((CvMTStumpTrainParams*) trainParams)->getTrainData != NULL );
        n = ((CvMTStumpTrainParams*) trainParams)->numcomp;
        assert( n > 0 );
    }
    else
    {
        assert( CV_MAT_TYPE( trainData->type ) == CV_32FC1 );
        data = trainData->data.ptr;
        if( CV_IS_ROW_SAMPLE( flags ) )             //trainData为矩阵
        {
            cstep = CV_ELEM_SIZE( trainData->type );
            sstep = trainData->step;
            assert( m == trainData->rows );
            datan = n = trainData->cols;
        }
        else                                        //trainData为向量
        {
            sstep = CV_ELEM_SIZE( trainData->type );
            cstep = trainData->step;
            assert( m == trainData->cols );
            datan = n = trainData->rows;
        }

        // trainData为矩阵,当trainData为向量时，datan = n = 1
        if( ((CvMTStumpTrainParams*) trainParams)->getTrainData != NULL )
        {
            n = ((CvMTStumpTrainParams*) trainParams)->numcomp;     // 总特征个数  
        }        
    }

    //预计算特征个数一定要小于特征总数
    assert( datan <= n );

    if( sampleIdx != NULL )     // 已经剔除小权值样本
    {
        assert( CV_MAT_TYPE( sampleIdx->type ) == CV_32FC1 );
        idxdata = sampleIdx->data.ptr;
        idxstep = ( sampleIdx->rows == 1 )
            ? CV_ELEM_SIZE( sampleIdx->type ) : sampleIdx->step;
        l = ( sampleIdx->rows == 1 ) ? sampleIdx->cols : sampleIdx->rows;

        // sorteddata中存放的是所有训练样本，需要筛选出实际训练样本
        if( sorteddata != NULL )
        {
            filter = (char*) cvAlloc( sizeof( char ) * m );
            memset( (void*) filter, 0, sizeof( char ) * m );
            for( i = 0; i < l; i++ )
            {
                filter[(int) *((float*) (idxdata + i * idxstep))] = (char) 1;   // 存在则为1，不存在则为0
            }
        }
    }
    else                        // 未剔除小权值样本
    {
        l = m;
    }

    //桩,分配一个结点的内存空间，用来存储
    stump = (CvStumpClassifier*) cvAlloc( sizeof( CvStumpClassifier) );
    memset( (void*) stump, 0, sizeof( CvStumpClassifier ) );

    //每组特征个数,个从理解是为多线程计算，为提高性能将所有特征分成很多组
    portion = ((CvMTStumpTrainParams*)trainParams)->portion;

    if( portion < 1 )
    {
        /* auto portion */
        portion = n;
    #ifdef _OPENMP
        portion /= omp_get_max_threads();        
    #endif /* _OPENMP */        
    }

    stump->eval = cvEvalStumpClassifier;
    stump->tune = NULL;
    stump->save = NULL;
    stump->release = cvReleaseStumpClassifier;

    stump->lerror = FLT_MAX;
    stump->rerror = FLT_MAX;
    stump->left  = 0.0F;
    stump->right = 0.0F;

    compidx = 0;

    // 并行计算，默认为关闭的
#ifdef _OPENMP
#pragma omp parallel private(mat, va, lerror, rerror, left, right, threshold, \
                                 optcompidx, sumw, sumwy, sumwyy, t_compidx, t_n, \
                                 ti, tj, tk, t_data, t_cstep, t_sstep, matcstep,  \
                                 matsstep, t_idx)
#endif /* _OPENMP */
    {
        lerror = FLT_MAX;
        rerror = FLT_MAX;
        left  = 0.0F;
        right = 0.0F;
        threshold = 0.0F;
        optcompidx = 0;

        sumw   = FLT_MAX;
        sumwy  = FLT_MAX;
        sumwyy = FLT_MAX;

        t_compidx = 0;
        t_n = 0;

        ti = 0;
        tj = 0;
        tk = 0;

        t_data = NULL;
        t_cstep = 0;
        t_sstep = 0;

        matcstep = 0;
        matsstep = 0;

        t_idx = NULL;

        mat.data.ptr = NULL;

        // 预计算特征个数小于特征总数，则说明存在新特征，用于计算样本的新特征，存放在mat中
        if( datan < n )
        {
            if( CV_IS_ROW_SAMPLE( flags ) )
            {
                mat = cvMat( m, portion, CV_32FC1, 0 );
                matcstep = CV_ELEM_SIZE( mat.type );
                matsstep = mat.step;
            }
            else
            {
                mat = cvMat( portion, m, CV_32FC1, 0 );
                matcstep = mat.step;
                matsstep = CV_ELEM_SIZE( mat.type );
            }
            mat.data.ptr = (uchar*) cvAlloc( sizeof( float ) * mat.rows * mat.cols );
        }

        // 将实际训练样本序列存放进t_idx
        if( filter != NULL || sortedn < n )
        {
            t_idx = (int*) cvAlloc( sizeof( int ) * m );
            if( sortedn == 0 || filter == NULL )
            {
                if( idxdata != NULL )
                {
                    for( ti = 0; ti < l; ti++ )
                    {
                        t_idx[ti] = (int) *((float*) (idxdata + ti * idxstep));
                    }
                }
                else
                {
                    for( ti = 0; ti < l; ti++ )
                    {
                        t_idx[ti] = ti;
                    }
                }                
            }
        }

    #ifdef _OPENMP
    #pragma omp critical(c_compidx)
    #endif /* _OPENMP */

        // 初始化计算特征范围
        {
            t_compidx = compidx;
            compidx += portion;
        }

        // 寻找最优弱分类器
        while( t_compidx < n )
        {
            t_n = portion;                      // 每组特征个数
            if( t_compidx < datan )             // 已经计算过的特征
            {
                t_n = ( t_n < (datan - t_compidx) ) ? t_n : (datan - t_compidx);
                t_data = data;
                t_cstep = cstep;
                t_sstep = sstep;
            }
            else                                // 新特征
            {
                t_n = ( t_n < (n - t_compidx) ) ? t_n : (n - t_compidx);
                t_cstep = matcstep;
                t_sstep = matsstep;
                t_data = mat.data.ptr - t_compidx * ((size_t) t_cstep );

                // 计算每个新特征对应于每个训练样本的特征值
                ((CvMTStumpTrainParams*)trainParams)->getTrainData( &mat,
                        sampleIdx, compIdx, t_compidx, t_n,
                        ((CvMTStumpTrainParams*)trainParams)->userdata );
            }

            /* 预计算特征部分，直接寻找最优特征，也就是传说中的最优弱分类器 */
            if( sorteddata != NULL )
            {
                if( filter != NULL )    //需要提取实际训练样本
                {
                    switch( sortedtype )
                    {
                        case CV_16SC1:  // 这里重复度很高，只注释一个分支，剩下的都一个道理

                            // 从一组特征(datan个预计算特征)中寻找最优特征
                            for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
                            {
                                tk = 0;

                                // 提取实际训练样本
                                for( tj = 0; tj < sortedm; tj++ )
                                {
                                    int curidx = (int) ( *((short*) (sorteddata
                                            + ti * sortedcstep + tj * sortedsstep)) );
                                    if( filter[curidx] != 0 )
                                    {
                                        t_idx[tk++] = curidx;
                                    }
                                }

                                // 如果findStumpThreshold_32s返回值为1, 则更新最优特征
                                if( findStumpThreshold_32s[stumperror]( 
                                        t_data + ti * t_cstep, t_sstep,
                                        wdata, wstep, ydata, ystep,
                                        (uchar*) t_idx, sizeof( int ), tk,
                                        &lerror, &rerror,
                                        &threshold, &left, &right, 
                                        &sumw, &sumwy, &sumwyy ) )
                                {
                                    optcompidx = ti;
                                }
                            }
                            break;
                        case CV_32SC1:
                            for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
                            {
                                tk = 0;
                                for( tj = 0; tj < sortedm; tj++ )
                                {
                                    int curidx = (int) ( *((int*) (sorteddata
                                            + ti * sortedcstep + tj * sortedsstep)) );
                                    if( filter[curidx] != 0 )
                                    {
                                        t_idx[tk++] = curidx;
                                    }
                                }
                                if( findStumpThreshold_32s[stumperror]( 
                                        t_data + ti * t_cstep, t_sstep,
                                        wdata, wstep, ydata, ystep,
                                        (uchar*) t_idx, sizeof( int ), tk,
                                        &lerror, &rerror,
                                        &threshold, &left, &right, 
                                        &sumw, &sumwy, &sumwyy ) )
                                {
                                    optcompidx = ti;
                                }
                            }
                            break;
                        case CV_32FC1:
                            for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
                            {
                                tk = 0;
                                for( tj = 0; tj < sortedm; tj++ )
                                {
                                    int curidx = (int) ( *((float*) (sorteddata
                                            + ti * sortedcstep + tj * sortedsstep)) );
                                    if( filter[curidx] != 0 )
                                    {
                                        t_idx[tk++] = curidx;
                                    }
                                }
                                if( findStumpThreshold_32s[stumperror]( 
                                        t_data + ti * t_cstep, t_sstep,
                                        wdata, wstep, ydata, ystep,
                                        (uchar*) t_idx, sizeof( int ), tk,
                                        &lerror, &rerror,
                                        &threshold, &left, &right, 
                                        &sumw, &sumwy, &sumwyy ) )
                                {
                                    optcompidx = ti;
                                }
                            }
                            break;
                        default:
                            assert( 0 );
                            break;
                    }
                }
                else            //所有训练样本均参与计算
                {
                    switch( sortedtype )
                    {
                        case CV_16SC1:/*遍历特征寻找使左右误差最小的特征*/
                            for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
                            {
                                /*
                                    t_data + ti * t_cstep:第ti个特征模版
                                    t_sstep:特征模版存储的跨距
                                    wdata:样本的权重 
                                    wstep:样本权重数组的跨距
                                    ydata:样本的类别标签
                                    ystep:样本的类别标签数组的跨距
                                    sorteddata + ti * sortedcstep:第ti个样本排序好的特征值的序号
                                    sortedsstep:跨距
                                    sortedm：序号的列数也就是实际样本列数
                                    lerror：阈值左侧误差
                                    rerror：阈值右侧误差
                                    threshold：阈值
                                    left：左分支置信度
                                    right：右分支置信度
                                    optcompidx：最优特征
                                */
                                if( findStumpThreshold_16s[stumperror]( 
                                        t_data + ti * t_cstep, t_sstep,
                                        wdata, wstep, ydata, ystep,
                                        sorteddata + ti * sortedcstep, sortedsstep, sortedm,
                                        &lerror, &rerror,
                                        &threshold, &left, &right, 
                                        &sumw, &sumwy, &sumwyy ) )
                                {
                                    optcompidx = ti;
                                }
                            }
                            break;
                        case CV_32SC1:
                            for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
                            {
                                if( findStumpThreshold_32s[stumperror]( 
                                        t_data + ti * t_cstep, t_sstep,
                                        wdata, wstep, ydata, ystep,
                                        sorteddata + ti * sortedcstep, sortedsstep, sortedm,
                                        &lerror, &rerror,
                                        &threshold, &left, &right, 
                                        &sumw, &sumwy, &sumwyy ) )
                                {
                                    optcompidx = ti;
                                }
                            }
                            break;
                        case CV_32FC1:
                            for( ti = t_compidx; ti < MIN( sortedn, t_compidx + t_n ); ti++ )
                            {
                                if( findStumpThreshold_32f[stumperror]( 
                                        t_data + ti * t_cstep, t_sstep,
                                        wdata, wstep, ydata, ystep,
                                        sorteddata + ti * sortedcstep, sortedsstep, sortedm,
                                        &lerror, &rerror,
                                        &threshold, &left, &right, 
                                        &sumw, &sumwy, &sumwyy ) )
                                {
                                    optcompidx = ti;
                                }
                            }
                            break;
                        default:
                            assert( 0 );
                            break;
                    }
                }
            }

            /* 新特征部分，要对样本特征值进行排序，然后再寻找最优特征 */
            ti = MAX( t_compidx, MIN( sortedn, t_compidx + t_n ) );
            for( ; ti < t_compidx + t_n; ti++ )
            {
                va.data = t_data + ti * t_cstep;
                va.step = t_sstep;

                // 对样本特征值进行排序
                icvSortIndexedValArray_32s( t_idx, l, &va );

                // 继续寻找最优特征
                if( findStumpThreshold_32s[stumperror]( 
                        t_data + ti * t_cstep, t_sstep,
                        wdata, wstep, ydata, ystep,
                        (uchar*)t_idx, sizeof( int ), l,
                        &lerror, &rerror,
                        &threshold, &left, &right, 
                        &sumw, &sumwy, &sumwyy ) )
                {
                    optcompidx = ti;
                }
            }
        #ifdef _OPENMP
        #pragma omp critical(c_compidx)
        #endif /* _OPENMP */

            // 更新特征计算范围
            {
                t_compidx = compidx;
                compidx += portion;
            }
        }

    #ifdef _OPENMP
    #pragma omp critical(c_beststump)
    #endif /* _OPENMP */

        // 设置最优弱分类器
        {
            if( lerror + rerror < stump->lerror + stump->rerror )
            {
                stump->lerror    = lerror;       
                stump->rerror    = rerror;       
                stump->compidx   = optcompidx;   
                stump->threshold = threshold;    
                stump->left      = left;         
                stump->right     = right;        
            }
        }

        /* free allocated memory */
        if( mat.data.ptr != NULL )
        {
            cvFree( &(mat.data.ptr) );
        }
        if( t_idx != NULL )
        {
            cvFree( &t_idx );
        }
    } /* end of parallel region */

    /* END */

    /* free allocated memory */
    if( filter != NULL )
    {
        cvFree( &filter );
    }
    // 如果设置为离散型，置信度应为1或者-1
    if( ((CvMTStumpTrainParams*) trainParams)->type == CV_CLASSIFICATION_CLASS )  /*要满足这个条件才转成离散*/
    {
        stump->left = 2.0F * (stump->left >= 0.5F) - 1.0F;   /*在这里将计算出来的左右置信度浮点数转成1或-1*/
        stump->right = 2.0F * (stump->right >= 0.5F) - 1.0F;
    }

    return (CvClassifier*) stump;
}
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572

从上面函数的代码中观察到在遍历特征时会调用findStumpThreshold_16s、findStumpThreshold_32s、findStumpThreshold_32f数组中定义了的总共12个函数指针，根据参数的不同调用不同的函数，例如findStumpThreshold_16s中的四个函数指针如下：

/*这个数组的类型是一个函数指针*/
CvFindThresholdFunc findStumpThreshold_16s[4] = {
        icvFindStumpThreshold_misc_16s,
        icvFindStumpThreshold_gini_16s,
        icvFindStumpThreshold_entropy_16s,
        icvFindStumpThreshold_sq_16s
    };
 
 1
2
3
4
5
6
7
 
 1
2
3
4
5
6
7

例如函数指针icvFindStumpThreshold_misc_16s我并未找到函数实现在哪，

在这些数组声明的上面声明的一些宏，其实实现这些函数的

举个例子
宏1

/* misclassification error
 * err = MIN( wpos, wneg );
 */
#define ICV_DEF_FIND_STUMP_THRESHOLD_MISC( suffix, type )                                \
    ICV_DEF_FIND_STUMP_THRESHOLD( misc_##suffix, type,                                   \
        wposl = 0.5F * ( wl + wyl );                                                     \
        wposr = 0.5F * ( wr + wyr );                                                     \
        curleft = 0.5F * ( 1.0F + curleft );                                             \
        curright = 0.5F * ( 1.0F + curright );                                           \
        curlerror = MIN( wposl, wl - wposl );                                            \
        currerror = MIN( wposr, wr - wposr );                                            \
    )
 
 1
2
3
4
5
6
7
8
9
10
11
12
 
 1
2
3
4
5
6
7
8
9
10
11
12

宏2

#define ICV_DEF_FIND_STUMP_THRESHOLD( suffix, type, error )                              \
CV_BOOST_IMPL int icvFindStumpThreshold_##suffix(                                              \
        uchar* data, size_t datastep,                                                    \
        uchar* wdata, size_t wstep,                                                      \
        uchar* ydata, size_t ystep,                                                      \
        uchar* idxdata, size_t idxstep, int num,                                         \
        float* lerror,                                                                   \
        float* rerror,                                                                   \
        float* threshold, float* left, float* right,                                     \
        float* sumw, float* sumwy, float* sumwyy )   
{

。。。。。。。。。。。。。。。。

}
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

根据宏在预编译阶段的解析原理，在预编译阶段声明：
ICV_DEF_FIND_STUMP_THRESHOLD_MISC( 16s, short )时会被上述第一个宏代替，变成

ICV_DEF_FIND_STUMP_THRESHOLD( misc_16s, short,                                   \
        wposl = 0.5F * ( wl + wyl );                                                     \
        wposr = 0.5F * ( wr + wyr );                                                     \
        curleft = 0.5F * ( 1.0F + curleft );                                             \
        curright = 0.5F * ( 1.0F + curright );                                           \
        curlerror = 2.0F * wposl * ( 1.0F - curleft );                                   \
        currerror = 2.0F * wposr * ( 1.0F - curright );                                  \
    )
 
 1
2
3
4
5
6
7
8
 
 1
2
3
4
5
6
7
8

接着上述ICV_DEF_FIND_STUMP_THRESHOLD宏解析后被CV_BOOST_IMPL int icvFindStumpThreshold_##suffix替代变成：

CV_BOOST_IMPL int icvFindStumpThreshold_misc_16s(                                              \
        uchar* data, size_t datastep,                                                    \
        uchar* wdata, size_t wstep,                                                      \
        uchar* ydata, size_t ystep,                                                      \
        uchar* idxdata, size_t idxstep, int num,                                         \
        float* lerror,                                                                   \
        float* rerror,                                                                   \
        float* threshold, float* left, float* right,                                     \
        float* sumw, float* sumwy, float* sumwyy )  
{

。。。。。。。。。。

/*后面函数体中的所有"type"都被替换成"short"*/

/*函数体中的"error"被*/
        wposl = 0.5F * ( wl + wyl );                                                     \
        wposr = 0.5F * ( wr + wyr );                                                     \
        curleft = 0.5F * ( 1.0F + curleft );                                             \
        curright = 0.5F * ( 1.0F + curright );                                           \
        curlerror = 2.0F * wposl * ( 1.0F - curleft );                                   \
        currerror = 2.0F * wposr * ( 1.0F - curright );  
       /*替换*/

} 
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

这样就实现了icvFindStumpThreshold_misc_16s的函数定义，其他的声明也是同样的方法，这就很巧妙的将12个很相似的函数，用宏声明的方式给分别定义了，而不用重复写很多代码。

所以findStumpThreshold_16s、findStumpThreshold_32s、findStumpThreshold_32f数组中函数指针指定的函数都是由下述宏实现的，只是要改下参数和error的计算方法：：：：

#define ICV_DEF_FIND_STUMP_THRESHOLD( suffix, type, error )                              \
CV_BOOST_IMPL int icvFindStumpThreshold_##suffix(                                              \
        uchar* data, size_t datastep,                                                    \
        uchar* wdata, size_t wstep,                                                      \
        uchar* ydata, size_t ystep,                                                      \
        uchar* idxdata, size_t idxstep, int num,                                         \
        float* lerror,                                                                   \
        float* rerror,                                                                   \
        float* threshold, float* left, float* right,                                     \
        float* sumw, float* sumwy, float* sumwyy ) 
{                                                                                        \
    int found = 0;                                                                       \
    float wyl  = 0.0F;                                                                   \
    float wl   = 0.0F;                                                                   \
    float wyyl = 0.0F;                                                                   \
    float wyr  = 0.0F;                                                                   \
    float wr   = 0.0F;                                                                   \
                                                                                         \
    float curleft  = 0.0F;                                                               \
    float curright = 0.0F;                                                               \
    float* prevval = NULL;                                                               \
    float* curval  = NULL;                                                               \
    float curlerror = 0.0F;                                                              \
    float currerror = 0.0F;                                                              \
    float wposl;                                                                         \
    float wposr;                                                                         \
                                                                                         \
    int i = 0;                                                                           \
    int idx = 0;                                                                         \
                                                                                         \
    wposl = wposr = 0.0F;                                                                \
    if( *sumw == FLT_MAX )                                                               \
    {                                                                                    \
        /* calculate sums */                                                             \
        float *y = NULL;                                                                 \
        float *w = NULL;                                                                 \
        float wy = 0.0F;                                                                 \
                                                                                         \
        *sumw   = 0.0F;                                                                  \
        *sumwy  = 0.0F;                                                                  \
        *sumwyy = 0.0F;                                                                  \
        for( i = 0; i < num; i++ )                                                       \
        {                                                                                \
            idx = (int) ( *((type*) (idxdata + i*idxstep)) );                            \
            w = (float*) (wdata + idx * wstep);                                          \
            *sumw += *w;                                                                 \ 
            y = (float*) (ydata + idx * ystep);                                          \
            wy = (*w) * (*y);                                                            \
            *sumwy += wy;                                                                \
            *sumwyy += wy * (*y);                                                        \
        }                                                                                \
    } 

    /*num:实际样本个数，遍历样本找到使左右误差最小的阈值curval位置*/                                                                                     \
    for( i = 0; i < num; i++ )                                                           \
    {                                                                                    \
        idx = (int) ( *((type*) (idxdata + i*idxstep)) );                                \
        curval = (float*) (data + idx * datastep);                                       \
         /* for debug purpose */                                                         \
        if( i > 0 ) assert( (*prevval) <= (*curval) );                                   \
                                                                                         \
        wyr  = *sumwy - wyl;                                                             \
        wr   = *sumw  - wl;                                                              \
                                                                                         \
        if( wl > 0.0 ) curleft = wyl / wl;                                               \
        else curleft = 0.0F;                                                             \
                                                                                         \
        if( wr > 0.0 ) curright = wyr / wr;                                              \
        else curright = 0.0F;                                                            \
                                                                                         \
        error                                                                            \
                                                                                         \
        if( curlerror + currerror < (*lerror) + (*rerror) )                              \
        {                                                                                \
            (*lerror) = curlerror;                                                       \
            (*rerror) = currerror;                                                       \
            *threshold = *curval;                                                        \
            if( i > 0 ) {                                                                \
                *threshold = 0.5F * (*threshold + *prevval);                             \
            }                                                                            \
            *left  = curleft;                                                            \
            *right = curright;                                                           \
            found = 1;                                                                   \
        }                                                                                \
                                                                                         \
        do                                                                               \
        {                                                                                \
            wl  += *((float*) (wdata + idx * wstep));                                    \
            wyl += (*((float*) (wdata + idx * wstep)))                                   \
                * (*((float*) (ydata + idx * ystep)));                                   \
            wyyl += *((float*) (wdata + idx * wstep))                                    \
                * (*((float*) (ydata + idx * ystep)))                                    \
                * (*((float*) (ydata + idx * ystep)));                                   \
        }                                                                                \
        while( (++i) < num &&                                                            \
            ( *((float*) (data + (idx =                                                  \
                (int) ( *((type*) (idxdata + i*idxstep))) ) * datastep))                 \
                == *curval ) );                                                          \
        --i;                                                                             \
        prevval = curval;                                                                \
    } /* for each value */                                                               \
                                                                                         \
    return found;                                                                        \
}
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
 
 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104