import numpy as np
import tensorflow as tf
import yolo.config as cfg
slim = tf.contrib.slim
class YOLONet(object):
def __init__(self, is_training=True):
self.classes = cfg.CLASSES #有哪些种类
self.num_class = len(self.classes) #一共20类
self.image_size = cfg.IMAGE_SIZE #图片大小448*448
self.cell_size = cfg.CELL_SIZE #feature map大小 7*7
self.boxes_per_cell = cfg.BOXES_PER_CELL #feature map中每个cell包含两个box
self.output_size = (self.cell_size * self.cell_size) *\
(self.num_class + self.boxes_per_cell * 5) #输出长度 7*7*(20+2*5)
self.scale = 1.0 * self.image_size / self.cell_size #缩放比
self.boundary1 = self.cell_size * self.cell_size * self.num_class
self.boundary2 = self.boundary1 +\
self.cell_size * self.cell_size * self.boxes_per_cell
self.object_scale = cfg.OBJECT_SCALE #值为1,存在目标的因子
self.noobject_scale = cfg.NOOBJECT_SCALE #值为1,不存在目标的因子
self.class_scale = cfg.CLASS_SCALE #类别损失函数的因子
self.coord_scale = cfg.COORD_SCALE #坐标损失函数的因子
self.learning_rate = cfg.LEARNING_RATE
self.batch_size = cfg.BATCH_SIZE #每个批次45张图片
self.alpha = cfg.ALPHA
# [2,7,7] -> [7,7,2]
self.offset = np.transpose(np.reshape(np.array(
[np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
(self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
# 构建网络图,返回预测结果
self.images = tf.placeholder(
tf.float32, [None, self.image_size, self.image_size, 3],
name='images')
self.logits = self.build_network(
self.images, num_outputs=self.output_size, alpha=self.alpha,
is_training=is_training)
if is_training:
self.labels = tf.placeholder(
tf.float32,
[None, self.cell_size, self.cell_size, 5 + self.num_class]) #训练时,实际标签的维度为25
self.loss_layer(self.logits, self.labels)
self.total_loss = tf.losses.get_total_loss()
tf.summary.scalar('total_loss', self.total_loss)
# 构造网络图
def build_network(self,
images,
num_outputs,
alpha,
keep_prob=0.5,
is_training=True,
scope='yolo'):
with tf.variable_scope(scope):
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=leaky_relu(alpha),
weights_regularizer=slim.l2_regularizer(0.0005),
weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
):
net = tf.pad(
images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]), #对输入数据的宽高进行填充,batch_size和channel不做填充
name='pad_1')
net = slim.conv2d(
net, 64, 7, 2, padding='VALID', scope='conv_2')# conv:64个7x7的卷积核,以2为步伐进行卷积,out:224x224*64
net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')# pool:最大池化kernel=2,stride = 2,out:112x112x64
net = slim.
YOLO-tensorflow代码解析二(yolo_net.py)
最新推荐文章于 2025-03-10 17:28:39 发布