本系列作者:木凌
时间:2016年11月。
文章连接:http://blog.youkuaiyun.com/u014540717
一、主函数void forward_network(network net, network_state state)
void forward_network(network net, network_state state)
{
state.workspace = net.workspace;
int i;
for(i = 0; i < net.n; ++i){
state.index = i;
layer l = net.layers[i];
if(l.delta){
scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
l.forward(l, state);
state.input = l.output;
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
1、前向传播-convolutional层
void forward_convolutional_layer(convolutional_layer l, network_state state)
{
int out_h = convolutional_out_height(l);
int out_w = convolutional_out_width(l);
int i;
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
if(l.xnor){
binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights);
swap_binary(&l);
binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input);
state.input = l.binary_input;
}
int m = l.n;
int k = l.size*l.size*l.c;
int n = out_h*out_w;
if (l.xnor && l.c%32 == 0 && AI2) {
forward_xnor_layer(l, state);
printf("xnor\n");
} else {
float *a = l.weights;
float *b = state.workspace;
float *c = l.output;
for(i = 0; i < l.batch; ++i){
im2col_cpu(state.input, l.c, l.h, l.w,
l.size, l.stride, l.pad, b);
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
c += n*m;
state.input += l.c*l.h*l.w;
}
}
if(l.batch_normalize){
forward_batchnorm_layer(l, state);
}
add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
activate_array(l.output, m*n*l.batch, l.activation);
if(l.binary || l.xnor) swap_binary(&l);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
函数剖析
a. im2col_cpu()
这个函数还是很重要的,我们来分析下。这个函数是从caffe中移植过来的
void im2col_cpu(float* data_im,
int channels, int height, int width,
int ksize, int stride, int pad, float* data_col)
{
int c,h,w;
int height_col = (height + 2*pad - ksize) / stride + 1;
int width_col = (width + 2*pad - ksize) / stride + 1;
int channels_col = channels * ksize * ksize;
for (c = 0; c < channels_col; ++c) {
int w_offset = c % ksize;
int h_offset = (c / ksize) % ksize;
int c_im = c / ksize / ksize;
for (h = 0; h < height_col; ++h) {
for (w = 0; w < width_col; ++w) {
int im_row = h_offset + h * stride;
int im_col = w_offset + w * stride;
int col_index = (c * height_col + h) * width_col + w;
data_col[col_index] = im2col_get_pixel(data_im, height, width, channels,
im_row, im_col, c_im, pad);
}
}
}
}
float im2col_get_pixel(float *im, int height, int width, int channels,
int row, int col, int channel, int pad)
{
row -= pad;
col -= pad;
if (row < 0 || col < 0 ||
row >= height || col >= width) return 0;
return im[col + width*(row + height*channel)];
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
我画了下面的图来帮助理解下im2col_cpu()这个函数,为了方便理解,这里假设图像尺寸是5*5, stride=2,kernel_size=3

float *b指向state.workspace这个工作空间,也就是把原始数据变成行向量放到工作空间里,然后进行卷积计算
b. gemm()
这个函数实现了卷积的运算
void gemm(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc);
}
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float BETA,
float *C, int ldc)
{
int i, j;
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
C[i*ldc + j] *= BETA;
}
}
if(!TA && !TB)
gemm_nn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
else if(TA && !TB)
gemm_tn(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
else if(!TA && TB)
gemm_nt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
else
gemm_tt(M, N, K, ALPHA,A,lda, B, ldb,C,ldc);
}
void gemm_nn(int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float *C, int ldc)
{
int i,j,k;
for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[i*lda+k];
for(j = 0; j < N; ++j){
C[i*ldc+j] += A_PART*B[k*ldb+j];
}
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
这个函数的将卷积后的值放入c所指向的内存中(最终生成number of kernel个2*2的feature map)

函数结束后,开始循环每一个batch,卷积计算结果依次向后放
c. forward_batchnorm_layer()
这个函数实现batch normalization,加速了训练的收敛过程,详细见这篇论文 Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
void forward_batchnorm_layer(layer l, network_state state)
{
if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
if(l.type == CONNECTED){
l.out_c = l.outputs;
l.out_h = l.out_w = 1;
}
if(state.train){
mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);
} else {
normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
}
scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
{
float scale = 1./(batch * spatial);
int i,j,k;
for(i = 0; i < filters; ++i){
mean[i] = 0;
for(j = 0; j < batch; ++j){
for(k = 0; k < spatial; ++k){
int index = j*filters*spatial + i*spatial + k;
mean[i] += x[index];
}
}
mean[i] *= scale;
}
}
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
{
float scale = 1./(batch * spatial - 1);
int i,j,k;
for(i = 0; i < filters; ++i){
variance[i] = 0;
for(j = 0; j < batch; ++j){
for(k = 0; k < spatial; ++k){
int index = j*filters*spatial + i*spatial + k;
variance[i] += pow((x[index] - mean[i]), 2);
}
}
variance[i] *= scale;
}
}
void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
{
int b, f, i;
for(b = 0; b < batch; ++b){
for(f = 0; f < filters; ++f){
for(i = 0; i < spatial; ++i){
int index = b*filters*spatial + f*spatial + i;
x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
}
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
void scale_bias(float *output, float *scales, int batch, int n, int size)
{
int i,j,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
for(j = 0; j < size; ++j){
output[(b*n + i)*size + j] *= scales[i];
}
}
}
}
z这三个函数分别对应论文中的如下公式:

d. add_bias()
这个函数和scale_bias()一模一样,是什么意思?明白的给解释一下呗~
void scale_bias(float *output, float *scales, int batch, int n, int size)
{
int i,j,b;
for(b = 0; b < batch; ++b){
for(i = 0; i < n; ++i){
for(j = 0; j < size; ++j){
output[(b*n + i)*size + j] *= scales[i];
}
}
}
}
2、前向传播-maxpool layer
//池化层相对要简单很多,如果理解了卷基层,这一层就很好理解
void forward_maxpool_layer(const maxpool_layer l, network_state state)
{
int b,i,j,k,m,n;
int w_offset = -l.pad;
int h_offset = -l.pad;
int h = l.out_h;
int w = l.out_w;
int c = l.c;
for(b = 0; b < l.batch; ++b){
for(k = 0; k < c; ++k){
for(i = 0; i < h; ++i){
for(j = 0; j < w; ++j){
int out_index = j + w*(i + h*(k + c*b));
float max = -FLT_MAX;
int max_i = -1;
for(n = 0; n < l.size; ++n){
for(m = 0; m < l.size; ++m){
int cur_h = h_offset + i*l.stride + n;
int cur_w = w_offset + j*l.stride + m;
int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c));
int valid = (cur_h >= 0 && cur_h < l.h &&
cur_w >= 0 && cur_w < l.w);
float val = (valid != 0) ? state.input[index] : -FLT_MAX;
max_i = (val > max) ? index : max_i;
max = (val > max) ? val : max;
}
}
l.output[out_index] = max;
l.indexes[out_index] = max_i;
}
}
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
3、前向传播-local layer
顾名思义,local层就是只看前一层的一部分,作者这里选择了左上角的区域
void forward_local_layer(const local_layer l, network_state state)
{
int out_h = local_out_height(l);
int out_w = local_out_width(l);
int i, j;
int locations = out_h * out_w;
for(i = 0; i < l.batch; ++i){
copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1);
}
for(i = 0; i < l.batch; ++i){
float *input = state.input + i*l.w*l.h*l.c;
im2col_cpu(input, l.c, l.h, l.w,
l.size, l.stride, l.pad, l.col_image);
float *output = l.output + i*l.outputs;
for(j = 0; j < locations; ++j){
float *a = l.weights + j*l.size*l.size*l.c*l.n;
float *b = l.col_image + j;
float *c = output + j;
int m = l.n;
int n = 1;
int k = l.size*l.size*l.c;
gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
}
}
activate_array(l.output, l.outputs*l.batch, l.activation);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
4、前向传播-dropout layer
dropout层主要是为了防止过拟合,详细可以自己搜索,这里不做过多介绍
void forward_dropout_layer(dropout_layer l, network_state state)
{
int i;
if (!state.train) return;
for(i = 0; i < l.batch * l.inputs; ++i){
float r = rand_uniform(0, 1);
l.rand[i] = r;
if(r < l.probability) state.input[i] = 0;
else state.input[i] *= l.scale;
}
}
5、前向传播-connected layer
void forward_connected_layer(connected_layer l, network_state state)
{
int i;
fill_cpu(l.outputs*l.batch, 0, l.output, 1);
int m = l.batch;
int k = l.inputs;
int n = l.outputs;
float *a = state.input;
float *b = l.weights;
float *c = l.output;
gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
if(l.batch_normalize){
if(state.train){
mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);
scal_cpu(l.outputs, .95, l.rolling_mean, 1);
axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
scal_cpu(l.outputs, .95, l.rolling_variance, 1);
axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);
copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);
copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
} else {
normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
}
scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
}
for(i = 0; i < l.batch; ++i){
axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
}
activate_array(l.output, l.outputs*l.batch, l.activation);
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
void gemm_nt(int M, int N, int K, float ALPHA,
float *A, int lda,
float *B, int ldb,
float *C, int ldc)
{
int i,j,k;
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
register float sum = 0;
for(k = 0; k < K; ++k){
sum += ALPHA*A[i*lda+k]*B[j*ldb + k];
}
C[i*ldc+j] += sum;
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
6、前向传播-detectiondetection layer
这是最后一层了,这也是作者论文的精髓所在,希望大家能对比论文认真看一下。
前向传播终于要结束了,这篇博文有点小长~~
void forward_detection_layer(const detection_layer l, network_state state)
{
int locations = l.side*l.side;
int i,j;
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
int b;
if (l.softmax){
for(b = 0; b < l.batch; ++b){
int index = b*l.inputs;
for (i = 0; i < locations; ++i) {
int offset = i*l.classes;
softmax(l.output + index + offset, l.classes, 1,
l.output + index + offset);
}
}
}
if(state.train){
float avg_iou = 0;
float avg_cat = 0;
float avg_allcat = 0;
float avg_obj = 0;
float avg_anyobj = 0;
int count = 0;
*(l.cost) = 0;
int size = l.inputs * l.batch;
memset(l.delta, 0, size * sizeof(float));
for (b = 0; b < l.batch; ++b){
int index = b*l.inputs;
for (i = 0; i < locations; ++i) {
int truth_index = (b*locations + i)*(1+l.coords+l.classes);
int is_obj = state.truth[truth_index];
for (j = 0; j < l.n; ++j) {
int p_index = index + locations*l.classes + i*l.n + j;
l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]);
*(l.cost) += l.noobject_scale*pow(l.output[p_index], 2);
avg_anyobj += l.output[p_index];
}
int best_index = -1;
float best_iou = 0;
float best_rmse = 20;
if (!is_obj){
continue;
}
int class_index = index + i*l.classes;
for(j = 0; j < l.classes; ++j) {
l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
*(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
avg_allcat += l.output[class_index+j];
}
box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
truth.x /= l.side;
truth.y /= l.side;
for(j = 0; j < l.n; ++j){
int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
box out = float_to_box(l.output + box_index);
out.x /= l.side;
out.y /= l.side;
if (l.sqrt){
out.w = out.w*out.w;
out.h = out.h*out.h;
}
float iou = box_iou(out, truth);
float rmse = box_rmse(out, truth);
if(best_iou > 0 || iou > 0){
if(iou > best_iou){
best_iou = iou;
best_index = j;
}
}else{
if(rmse < best_rmse){
best_rmse = rmse;
best_index = j;
}
}
}
if(l.forced){
if(truth.w*truth.h < .1){
best_index = 1;
}else{
best_index = 0;
}
}
if(l.random && *(state.net.seen) < 64000){
best_index = rand()%l.n;
}
int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
int tbox_index = truth_index + 1 + l.classes;
box out = float_to_box(l.output + box_index);
out.x /= l.side;
out.y /= l.side;
if (l.sqrt) {
out.w = out.w*out.w;
out.h = out.h*out.h;
}
float iou = box_iou(out, truth);
int p_index = index + locations*l.classes + i*l.n + best_index;
*(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
*(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
avg_obj += l.output[p_index];
l.delta[p_index] = l.object_scale * (1.-l.output[p_index]);
if(l.rescore){
l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
}
l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]);
l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]);
l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]);
l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]);
if(l.sqrt){
l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]);
l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]);
}
*(l.cost) += pow(1-iou, 2);
avg_iou += iou;
++count;
}
}
if(0){
float *costs = calloc(l.batch*locations*l.n, sizeof(float));
for (b = 0; b < l.batch; ++b) {
int index = b*l.inputs;
for (i = 0; i < locations; ++i) {
for (j = 0; j < l.n; ++j) {
int p_index = index + locations*l.classes + i*l.n + j;
costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index];
}
}
}
int indexes[100];
top_k(costs, l.batch*locations*l.n, 100, indexes);
float cutoff = costs[indexes[99]];
for (b = 0; b < l.batch; ++b) {
int index = b*l.inputs;
for (i = 0; i < locations; ++i) {
for (j = 0; j < l.n; ++j) {
int p_index = index + locations*l.classes + i*l.n + j;
if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0;
}
}
}
free(costs);
}
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 167
- 168
- 169
- 170
- 171
- 172
- 173
- 174
- 175
- 176
- 177
- 178
- 179
- 180
- 181
- 182
- 183
- 184
- 185
- 186
- 187
- 188
- 189
- 190
- 191
- 192
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
- 106
- 107
- 108
- 109
- 110
- 111
- 112
- 113
- 114
- 115
- 116
- 117
- 118
- 119
- 120
- 121
- 122
- 123
- 124
- 125
- 126
- 127
- 128
- 129
- 130
- 131
- 132
- 133
- 134
- 135
- 136
- 137
- 138
- 139
- 140
- 141
- 142
- 143
- 144
- 145
- 146
- 147
- 148
- 149
- 150
- 151
- 152
- 153
- 154
- 155
- 156
- 157
- 158
- 159
- 160
- 161
- 162
- 163
- 164
- 165
- 166
- 167
- 168
- 169
- 170
- 171
- 172
- 173
- 174
- 175
- 176
- 177
- 178
- 179
- 180
- 181
- 182
- 183
- 184
- 185
- 186
- 187
- 188
- 189
- 190
- 191
- 192
二、总结
读到这里,我们已经完全掌握了YOLO代码的框架,我们大概总结下darknet的优缺点。
优点:
- 代码依赖项少,只有cuda,甚至连opencv都可以不需要,如果你在cpu平台,cuda都可以扔了(当然darknet的cup代码并没有做什么优化,跑起来就很慢)。这对于做工程的人来说是非常好的消息,因为我们可以很easy的将代码移植到其他平台
缺点:
-
在darknet中,所有层的lr都一样,这对微调造成了很大的困难,因为微调需要把前面几层的lr都设置的很小很小,然后主要训练最后一层的权重
-
总的来说就是darknet的接口确实很差,如果想把网络改成inception或者resnet的构架,需要改大量的代码,这对于验证模型可行性来说,非常浪费时间。你也应该能理解到为什么我们想要将代码移植到mxnet或者caffe上,然后在mxnet上做模型压缩了
下一篇是反向传播部分的代码,这部分是很重要的,但对与移植或者进一步改进网络来说,其实没必要理解反向传播部分的代码,但如果你想对CNN更深入的了解,可以继续看一下篇关于反向传播部分的内容。
(END)