VGG

最新推荐文章于 2020-09-03 11:23:56 发布

原创最新推荐文章于 2020-09-03 11:23:56 发布 · 287 阅读

0 ·

CC 4.0 BY-SA版权

VGG优缺点
VGG优点
VGGNet的结构非常简洁，整个网络都使用了同样大小的卷积核尺寸（3x3）和最大池化尺寸（2x2）。

几个小滤波器（3x3）卷积层的组合比一个大滤波器（5x5或7x7）卷积层好：

验证了通过不断加深网络结构可以提升性能。

VGG缺点
VGG耗费更多计算资源，并且使用了更多的参数（这里不是3x3卷积的锅），导致更多的内存占用（140M）。其中绝大多数的参数都是来自于第一个全连接层。VGG可是有3个全连接层啊！

PS：有的文章称：发现这些全连接层即使被去除，对于性能也没有什么影响，这样就显著降低了参数数量。

注：很多pretrained的方法就是使用VGG的model（主要是16和19），VGG相对其他的方法，参数空间很大，最终的model有500多m，AlexNet只有200m，GoogLeNet更少，所以train一个vgg模型通常要花费更长的时间，所幸有公开的pretrained model让我们很方便的使用。

关于感受野：

假设你一层一层地重叠了3个3x3的卷积层（层与层之间有非线性激活函数）。在这个排列下，第一个卷积层中的每个神经元都对输入数据体有一个3x3的视野。

import tensorflow as tf

# Tensorflow交互式会话
sess = tf.InteractiveSession()

with tf.device("/cpu:0"):
    # 定义两个placeholder，用于输入数据
    img = tf.placeholder(dtype=tf.float32,shape=[None, 224,224,3], name="input_images")
    y_ = tf.placeholder(tf.float32, [2], "realLabel")

    # 开始构建网络
    with tf.variable_scope("conv1_1"):
        kernel1_1 = tf.Variable(tf.truncated_normal([3, 3, 3, 64], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv1_1 = tf.nn.conv2d(img, kernel1_1, [1,1,1,1], padding="SAME", name="CONV1_1")
        bias1_1 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[64], name="BIAS1_1"))
        conv1_1 = tf.nn.bias_add(conv1_1, bias1_1)
        conv1_1 = tf.nn.relu(conv1_1)
        print('conv1_1',conv1_1)

    with tf.variable_scope("conv1_2"):
        kernel1_2 = tf.Variable(tf.truncated_normal([3, 3, 64, 64], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv1_2 = tf.nn.conv2d(conv1_1, kernel1_2, [1,1,1,1], padding="SAME", name="CONV1_2")
        bias1_2 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[64], name="BIAS1_2"))
        conv1_2 = tf.nn.bias_add(conv1_2, bias1_2)
        conv1_2 = tf.nn.relu(conv1_2)
        print('conv1_2', conv1_2)

    maxpool1 = tf.nn.max_pool(conv1_2, [1,2,2,1],[1,2,2,1],padding="SAME",name="maxpool1")

    with tf.variable_scope("conv2_1"):
        kernel2_1 = tf.Variable(tf.truncated_normal([3, 3, 64, 128], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv2_1 = tf.nn.conv2d(maxpool1, kernel2_1, [1,1,1,1], padding="SAME", name="CONV2_1")
        bias2_1 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[128], name="BIAS2_1"))
        conv2_1 = tf.nn.bias_add(conv2_1, bias2_1)
        conv2_1 = tf.nn.relu(conv2_1)

    with tf.variable_scope("conv2_2"):
        kernel2_2 = tf.Variable(tf.truncated_normal([3, 3, 128, 128], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv2_2 = tf.nn.conv2d(conv2_1, kernel2_2, [1,1,1,1], padding="SAME", name="CONV2_2")
        bias2_2 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[128], name="BIAS2_2"))
        conv2_2 = tf.nn.bias_add(conv2_2, bias2_2)
        conv2_2 = tf.nn.relu(conv2_2)

    maxpool2 = tf.nn.max_pool(conv2_2, [1,2,2,1],[1,2,2,1],padding="SAME",name="maxpool2")

    with tf.variable_scope("conv3_1"):
        kernel3_1 = tf.Variable(tf.truncated_normal([3,3,128,256], 0.0, 1.0, dtype=tf.float32))
        conv3_1 = tf.nn.conv2d(maxpool2, kernel3_1, [1,1,1,1], padding="SAME", name="CONV3_1")
        bias3_1 = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[256], name="BIAS3_1"))
        conv3_1 = tf.nn.bias_add(conv3_1, bias3_1)
        conv3_1 = tf.nn.relu(conv3_1)

    with tf.variable_scope("conv3_2"):
        kernel3_2 = tf.Variable(tf.truncated_normal([3,3,256,256],mean=0.0, stddev=1.0,dtype=tf.float32))
        conv3_2 = tf.nn.conv2d(conv3_1, kernel3_2, [1,1,1,1], padding="SAME", name="CONV3_2")
        bias3_2 = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[256],name="BIAS3_2"))
        conv3_2 = tf.nn.bias_add(conv3_2,bias3_2)
        conv3_2 = tf.nn.relu(conv3_2)

    with tf.variable_scope("conv3_3"):
        kernel3_3 = tf.Variable(tf.truncated_normal([3, 3, 256, 256], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv3_3 = tf.nn.conv2d(conv3_2, kernel3_3, [1, 1, 1, 1], padding="SAME", name="CONV3_3")
        bias3_3 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[256], name="BIAS3_3"))
        conv3_3 = tf.nn.bias_add(conv3_3, bias3_3)
        conv3_3 = tf.nn.relu(conv3_3)

    maxpool3 = tf.nn.max_pool(conv3_3, [1,2,2,1],[1,2,2,1],padding="SAME",name="maxpool3")

    with tf.variable_scope("conv4_1"):
        kernel4_1 = tf.Variable(tf.truncated_normal([3,3,256,512], 0.0, 1.0, dtype=tf.float32))
        conv4_1 = tf.nn.conv2d(maxpool3, kernel4_1, [1,1,1,1], padding="SAME", name="CONV4_1")
        bias4_1 = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[512], name="BIAS4_1"))
        conv4_1 = tf.nn.bias_add(conv4_1, bias4_1)
        conv4_1 = tf.nn.relu(conv4_1)

    with tf.variable_scope("conv4_2"):
        kernel4_2 = tf.Variable(tf.truncated_normal([3,3,512,512],mean=0.0, stddev=1.0,dtype=tf.float32))
        conv4_2 = tf.nn.conv2d(conv4_1, kernel4_2, [1,1,1,1], padding="SAME", name="CONV4_2")
        bias4_2 = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[512],name="BIAS4_2"))
        conv4_2 = tf.nn.bias_add(conv4_2,bias4_2)
        conv4_2 = tf.nn.relu(conv4_2)

    with tf.variable_scope("conv4_3"):
        kernel4_3 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv4_3 = tf.nn.conv2d(conv4_2, kernel4_3, [1, 1, 1, 1], padding="SAME", name="CONV4_3")
        bias4_3 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[512], name="BIAS4_3"))
        conv4_3 = tf.nn.bias_add(conv4_3, bias4_3)
        conv4_3 = tf.nn.relu(conv4_3)

    maxpool4 = tf.nn.max_pool(conv4_3, [1,2,2,1],[1,2,2,1],padding="SAME",name="maxpool4")

    with tf.variable_scope("conv5_1"):
        kernel5_1 = tf.Variable(tf.truncated_normal([3,3,512,512], 0.0, 1.0, dtype=tf.float32))
        conv5_1 = tf.nn.conv2d(maxpool4, kernel5_1, [1,1,1,1], padding="SAME", name="CONV5_1")
        bias5_1 = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[512], name="BIAS5_1"))
        conv5_1 = tf.nn.bias_add(conv5_1, bias5_1)
        conv5_1 = tf.nn.relu(conv5_1)

    with tf.variable_scope("conv5_2"):
        kernel5_2 = tf.Variable(tf.truncated_normal([3,3,512,512],mean=0.0, stddev=1.0,dtype=tf.float32))
        conv5_2 = tf.nn.conv2d(conv5_1, kernel5_2, [1,1,1,1], padding="SAME", name="CONV5_2")
        bias5_2 = tf.Variable(tf.constant(0.0, dtype=tf.float32,shape=[512],name="BIAS5_2"))
        conv5_2 = tf.nn.bias_add(conv5_2,bias5_2)
        conv5_2 = tf.nn.relu(conv5_2)

    with tf.variable_scope("conv5_3"):
        kernel5_3 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], mean=0.0, stddev=1.0, dtype=tf.float32))
        conv5_3 = tf.nn.conv2d(conv5_2, kernel5_3, [1, 1, 1, 1], padding="SAME", name="CONV5_3")
        bias5_3 = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[512], name="BIAS5_3"))
        conv5_3 = tf.nn.bias_add(conv5_3, bias5_3)
        conv5_3 = tf.nn.relu(conv5_3)

    maxpool5 = tf.nn.max_pool(conv5_3, [1,2,2,1],[1,2,2,1],padding="SAME",name="maxpool5")

    shape = maxpool5.get_shape()

    length = shape[1].value * shape[2].value * shape[3].value

    reshape = tf.reshape(maxpool5, [-1, length], name="reshape")

    with tf.variable_scope("fc6"):
        fc6_weight = tf.Variable(tf.truncated_normal([25088,4096],mean=0.0, stddev=1.0, dtype=tf.float32, name="fc6_Weight"))
        fc6_bias = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[4096],name="fc6_bias"))
        fc6 = tf.matmul(reshape, fc6_weight)
        fc6 = tf.nn.bias_add(fc6, fc6_bias)
        fc6 = tf.nn.relu(fc6)

    fc6_drop = tf.nn.dropout(fc6, 0.5, name="fc6_drop")

    with tf.variable_scope("fc7"):
        fc7_weight = tf.Variable(tf.truncated_normal([4096,4096],mean=0.0, stddev=1.0, dtype=tf.float32, name="fc7_Weight"))
        fc7_bias = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[4096],name="fc7_bias"))
        fc7 = tf.matmul(fc6_drop, fc7_weight)
        fc7 = tf.nn.bias_add(fc7, fc7_bias)
        fc7 = tf.nn.relu(fc7)

    fc7_drop = tf.nn.dropout(fc7, 0.5, name="fc7_drop")

    with tf.variable_scope("fc8"):
        fc8_weight = tf.Variable(tf.truncated_normal([4096,1000],mean=0.0, stddev=1.0, dtype=tf.float32, name="fc8_Weight"))
        fc8_bias = tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[1000],name="fc8_bias"))
        fc8 = tf.matmul(fc7_drop, fc8_weight)
        fc8 = tf.nn.bias_add(fc8, fc8_bias)
        fc8 = tf.nn.relu(fc8)

    softmax = tf.nn.softmax(fc8)

    predictions = tf.argmax(softmax, 1)


# 随机生成一个数据
pic = tf.Variable(tf.truncated_normal([1, 224, 224, 3],dtype=tf.float32))

# 初始化
tf.global_variables_initializer().run()

# feed数据，运行网络
#print(sess.run(softmax, feed_dict={img: pic.eval()}))