yolo1、2、3和SSD网络结构对比
1、yolo1
1.1、yolo1网络结构

由图可见,其进行了二十多次卷积还有四次最大池化,其中3x3卷积用于提取特征,1x1卷积用于压缩特征,最后将图像压缩到7x7xfilter的大小,相当于将整个图像划分为7x7的网格,每个网格负责自己这一块区域的目标检测。
整个网络最后利用全连接层使其结果的size为(7x7x30),其中7x7代表的是7x7的网格,30前20个代表的是预测的种类,后10代表两个预测框及其置信度(5x2)。
1.2、yolo1网络代码
网络部分代码如下:
# relu的改进版
def leak_relu(self,x, alpha=0.1):
return tf.maximum(alpha * x, x)
# 建立网络部分
def _build_net(self):
x = tf.placeholder(tf.float32, [None, 448, 448, 3])
with tf.variable_scope('yolo'):
# _conv_layer(self, x, num_filters, filter_size, stride,scope)
with tf.variable_scope('conv_2'):
# (448,448,3)->(224,224,64)
net = self._conv_layer(x, 64, 7, 2,'conv_2')
# (224,224,64)->(112,112,64)
net = self._maxpool_layer(net, 2, 2)
with tf.variable_scope('conv_4'):
# (112,112,64)->(112,112,192)
net = self._conv_layer(net, 192, 3, 1,'conv_4')
# (112,112,192)->(56,56,192)
net = self._maxpool_layer(net, 2, 2)
with tf.variable_scope('conv_6'):
# (56,56,128)
net = self._conv_layer(net, 128, 1, 1,'conv_6')
with tf.variable_scope('conv_7'):
# (56,56,256)
net = self._conv_layer(net, 256, 3, 1,'conv_7')
with tf.variable_scope('conv_8'):
# (56,56,256)
net = self._conv_layer(net, 256, 1, 1,'conv_8')
with tf.variable_scope('conv_9'):
# (56,56,512)
net = self._conv_layer(net, 512, 3, 1,'conv_9')
# (28,28,512)
net = self._maxpool_layer(net, 2, 2)
with tf.variable_scope('conv_11'):
net = self._conv_layer(net, 256, 1, 1,'conv_11')
with tf.variable_scope('conv_12'):
net = self._conv_layer(net, 512, 3, 1,'conv_12')
with tf.variable_scope('conv_13'):
net = self._conv_layer(net, 256, 1, 1,'conv_13')
with tf.variable_scope('conv_14'):
net = self._conv_layer(net, 512, 3, 1,'conv_14')
with tf.variable_scope('conv_15'):
net = self._conv_layer(net, 256, 1, 1,'conv_15')
with tf.variable_scope('conv_16'):
net = self._conv_layer(net, 512, 3, 1,'conv_16')
with tf.variable_scope('conv_17'):
net = self._conv_layer(net, 256, 1, 1,'conv_17')
with tf.variable_scope('conv_18'):
net = self._conv_layer(net, 512, 3, 1,'conv_18')
with tf.variable_scope('conv_19'):
net = self._conv_layer(net, 512, 1, 1,'conv_19')
with tf.variable_scope('conv_20'):
net = self._conv_layer(net, 1024, 3, 1,'conv_20')
# (14,14,512)
net = self._maxpool_layer(net, 2, 2)
with tf.variable_scope('conv_22'):
net = self._conv_layer(net, 512, 1, 1,'conv_22')
with tf.variable_scope('conv_23'):
net = self._conv_layer(net, 1024, 3, 1,'conv_23')
with tf.variable_scope('conv_24'):
net = self._conv_layer(net, 512, 1, 1,'conv_24')
with tf.variable_scope('conv_25'):
net = self._conv_layer(net, 1024, 3, 1,'conv_25')
with tf.variable_scope('conv_26'):
net = self._conv_layer(net, 1024, 3, 1,'conv_26')
with tf.variable_scope('conv_28'):
# (7,7,1024)
net = self._conv_layer(net, 1024, 3, 2,'conv_28')
with tf.variable_scope('conv_29'):
net = self._conv_layer(net, 1024, 3, 1,'conv_29')
with tf.variable_scope('conv_30'):
net = self._conv_layer(net, 1024, 3, 1,'conv_30')
net = self._flatten(net)
# (7x7x512,512)
with tf.variable_scope('fc_33'):
net = self._fc_layer(net, 512, activation=self.leak_relu,scope='fc_33')
with tf.variable_scope('fc_34'):
net = self._fc_layer(net, 4096, activation=self.leak_relu,scope='fc_34')
with tf.variable_scope('fc_36'):
net = self._fc_layer(net, 7*7*30,scope='fc_36')
# 其返回了placeholder_x和(7,7,30)net
return net,x
# 生成卷积层
def _conv_layer(self, x, num_filters, filter_size, stride,scope):
# 生成卷积层的weights
in_channels = x.get_shape().as_list()[-1]
weight = tf.Variable(tf.truncated_normal([filter_size, filter_size,
in_channels, num_filters], stddev=0.1),name='weights')
# 生成卷积层的bias
bias = tf.Variable(tf.zeros([num_filters,]),name='biases')
# 计算要padding的量,
pad_size = filter_size // 2
pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])
x_pad = tf.pad(x, pad_mat)
# 卷积
conv = tf.nn.conv2d(x_pad, weight, strides=[1, stride, stride, 1], padding="VALID",name=scope)
# 经过优化后的relu
output = self.leak_relu(tf.nn.bias_add(conv, bias))
return output
def _fc_layer(self, x, num_out, activation=None,

本文对比分析了YOLO系列(YOLOv1, YOLOv2, YOLOv3)与SSD在目标检测任务中的网络结构和性能。YOLO系列通过不断优化,如引入残差网络、反卷积和多特征层,提高了检测效果,而SSD利用VGG16进行特征提取,实现了较好的检测性能。"
558870,88054,网页跨分辨率适配策略,"['网页设计', 'HTML', 'CSS', 'JavaScript', '框架']
最低0.47元/天 解锁文章
3万+





