有不懂的可留言和笔者讨论,有些笔者也还有疑问
yolo_net.py
import numpy as np
import tensorflow as tf
import yolo.config as cfg
slim = tf.contrib.slim
class YOLONet(object):
def __init__(self, is_training=True):
self.classes = cfg.CLASSES
self.num_class = len(self.classes)
self.image_size = cfg.IMAGE_SIZE
self.cell_size = cfg.CELL_SIZE
self.boxes_per_cell = cfg.BOXES_PER_CELL
self.output_size = (self.cell_size * self.cell_size) * (self.num_class + self.boxes_per_cell * 5)
self.scale = 1.0 * self.image_size / self.cell_size
self.boundary1 = self.cell_size * self.cell_size * self.num_class # 7*7*20
self.boundary2 = self.boundary1 + self.cell_size * self.cell_size * self.boxes_per_cell # +7*7*2
self.object_scale = cfg.OBJECT_SCALE
self.noobject_scale = cfg.NOOBJECT_SCALE
self.class_scale = cfg.CLASS_SCALE
self.coord_scale = cfg.COORD_SCALE
self.learning_rate = cfg.LEARNING_RATE
self.batch_size = cfg.BATCH_SIZE
self.alpha = cfg.ALPHA
self.offset = np.transpose(np.reshape(np.array(
[np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
(self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))
self.images = tf.placeholder(
tf.float32, [None, self.image_size, self.image_size, 3],
name='images')
self.logits = self.build_network(
self.images, num_outputs=self.output_size, alpha=self.alpha,
is_training=is_training)
if is_training:
self.labels = tf.placeholder(
tf.float32,
[None, self.cell_size, self.cell_size, 5 + self.num_class])
self.loss_layer(self.logits, self.labels)
self.total_loss = tf.losses.get_total_loss()
tf.summary.scalar('total_loss', self.total_loss)
def build_network(self,
images,
num_outputs,
alpha,
keep_prob=0.5,
is_training=True,
scope='yolo'):
#定义结构,输出7*7*30张量
with tf.variable_scope(scope):
with slim.arg_scope(
[slim.conv2d, slim.fully_connected],
activation_fn=leaky_relu(alpha),
weights_regularizer=slim.l2_regularizer(0.0005),
weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
):
net = tf.pad(
images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),
name='pad_1')
net = slim.conv2d(
net, 64, 7, 2, padding='VALID', scope='conv_2')
net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
net = slim.conv2d(net, 192