VGGNet探索了卷积神经网络的深度和性能之间的关系,通过反复堆叠3*3的小型卷积核和2*2的最大池化层,VGGNet成功构筑了16~19层深的卷积神经网络。
VGGNet全部使用了3*3的卷积核与2*2的池化核,通过不断加深网络结构来提升性能。
在有些卷积组中出现了多个卷积层的堆叠,2个3*3的卷积层叠加等价于一个5*5的卷积核的效果;3个3*3的卷积层叠加等价于一个7*7的卷积核,而且参数更少,大约是7x7卷积核卷积层的(3*3*3)/(7*7)=0.55。最重要的是,3个3x3的卷积层拥有比1个7x7的卷积层更多的非线性变换(前者可以使用三次 ReLU 激活函数,而后者只有一次),使得 CNN 对特征的学习能力更强。
VGGNet各级别(从11层的网络到19层的网络)的网络结构图如下:
FC(fully-connected)即为全连接层
VGGNet各级别网络的参数量如下(单位为百万)
VGGNet有5段卷积,每段有2~3个卷积层,同时每段尾部会连接一个最大池化层用来缩小图片尺寸。每段内的卷积核数量都一样,按顺序分别为64-128-256-512-512。
在训练高级别的神经网络时,可以先训练低级别的网络(比如A),用低级别网络获得的权重初始化高级别的网络(D、E),可以加速网络的收敛。
具体代码如下:
import tensorflow as tf import math import time from datetime import datetime ##kh,kw分别是卷积核的高与宽,n_out是卷积核数量(输出通道数) ##dh,dw分别是步长的高与宽,p是参数列表 def conv_op(input_op, name, kh, kw, n_out, dh, dw, p): n_in = input_op.get_shape()[-1].value with tf.name_scope(name) as scope: kernel = tf.get_variable(scope+'w', shape=[kh,kw,n_in,n_out], initializer=tf.contrib.layers.xavier_initializer_conv2d()) conv = tf.nn.conv2d(input_op, kernel, (1,kh,kw,1), padding='SAME') bias_init_val = tf.constant(0.0, shape=[n_out], dtype=tf.float32) biases = tf.Variable(bias_init_val, trainable=True, name='b') z = tf.nn.bias_add(conv, biases) activation = tf.nn.relu(z, name=scope) p += [kernel, biases] return activation ##定义最大池化层 def pool(input_op, name, kh, kw, dh, dw): return tf.nn.max_pool(input_op, ksize=[1,kh,kw,1], strides=[1,dh,dw,1], padding='SAME', name=name) ##定义FC层 def fc(input_op, name, n_out, p): n_in = input_op.get_shape()[-1].value with tf.name_scope(name) as scope: kernel = tf.get_variable(scope + 'w', shape=[ n_in, n_out], initializer=tf.contrib.layers.xavier_initializer()) biases = tf.Variable(tf.constant(0.1, shape=[n_out], dtype=tf.float32), name='b') activation = tf.nn.relu_layer(input_op, kernel, biases, name=scope) ##激活函数为 ReLU p +=[kernel,biases] return activation def VGG16(input_op, keep_prob): p = [] ##初始化参数列表 conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p) conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p) pool1 = pool(conv1_2,name='pool1',kh=2,kw=2,dh=2,dw=2) ##第二段卷积层 conv2_1 = conv_op(pool1,name='conv2_1',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p) conv2_2 = conv_op(conv2_1, name='conv2_2', kh=3, kw=3, n_out=128, dh=1, dw=1, p=p) pool2 = pool(conv2_2,name='pool2',kh=2,kw=2,dh=2,dw=2) ##第三段卷积层 conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) pool3 = pool(conv3_3,name='pool3',kh=2,kw=2,dh=2,dw=2) ##第四段卷积层 conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) pool4 = pool(conv4_3, name='pool4', kh=2, kw=2, dh=2, dw=2) ##第五段卷积层 conv5_1 = conv_op(pool4, name='conv5_1',kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) pool5 = pool(conv5_3, name='pool5', kh=2, kw=2, dh=2, dw=2) ##将第五段输出的结果进行扁平化 shp = pool5.get_shape() flattened_shape = shp[1].value * shp[2].value * shp[3].value ##将每个样本化为7*7*512的一维向量 reshape1 = tf.reshape(pool5,[-1,flattened_shape], name='reshape1') fc1 = fc(reshape1, name='fc1', n_out=4096, p=p) fc1_drop = tf.nn.dropout(fc1, keep_prob, name='fc1_drop') fc2 = fc(fc1_drop, name='fc2', n_out=4096, p=p) fc2_drop = tf.nn.dropout(fc2, keep_prob, name='fc2_drop') fc3 = fc(fc2_drop, name='fc3', n_out=1000, p=p) softmax = tf.nn.softmax(fc3) predictions = tf.arg_max(softmax,1) return predictions, softmax, fc3, p def time_tensorflow_run(session, target, feed, info_string): #feed_dict用来传入keep_prob方便控制drop_out层的保留比例 num_steps_burn_in =10 ##定义预热轮数为10,主要是给程序热身 total_duration = 0.0 ##记录总时间 total_duration_squared = 0.0 for i in range(num_batches + num_steps_burn_in): start_time = time.time() _ = session.run(target, feed_dict=feed) duration = time.time() - start_time if i >= num_steps_burn_in: if not i % 10: print('%s: step %d, duration = %.3f' % (datetime.now(), i - num_steps_burn_in, duration)) total_duration += duration total_duration_squared += duration * duration mn = total_duration / num_batches vr = total_duration_squared / num_batches -mn *mn sd = math.sqrt(vr) print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % (datetime.now(),info_string, num_batches, mn ,sd)) def run_benchmark(): with tf.Graph().as_default(): ##定义默认的Graph方便后面使用 image_size = 224 ##图片的尺寸 images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1)) ##使用随机图片数据测试前馈和反馈计算的耗时, keep_prob = tf.placeholder(tf.float32) predictions, softmax, fc3, p = VGG16(images, keep_prob) ##创建Session并初始化全局参数 init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) time_tensorflow_run(sess, predictions, {keep_prob:0.1}, "Forward") #直接使用time_tensorflow_run统计运行时间 objective = tf.nn.l2_loss(fc3) ##计算fc3的loss grad = tf.gradients(objective,p) ##求相对于loss的所有模型参数的梯度,模拟了一个训练过程 time_tensorflow_run(sess, grad, {keep_prob:0.5}, "Forward-backward") batch_size = 32 num_batches = 100 ##总共测试100个batch的数据 run_benchmark()