初涉LeNet5处理mnist (CNN卷积神经网络)_k210 lenet5 mnist cnn-优快云博客

本文链接：https://blog.youkuaiyun.com/u011707542/article/details/75533755
本文介绍了一种基于LeNet5卷积神经网络的手写数字识别系统实现过程，该系统通过六层神经网络（两层卷积层、两层池化层和两层全连接层）对MNIST数据集进行训练，并详细展示了每层网络的具体参数配置及计算流程。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
import numpy as np
#输入节点个数
INPUT_NODE = 784
#输出节点个数
OUTPUT_NODE = 10
#图片的尺寸
IMAGE_SIZE = 28
#通道数
NUM_CHANNELS = 1
#输出节点的个数 
NUM_LABELS = 10
#第一层卷积层的过滤器深度及其尺寸
CONV1_DEEP = 32
CONV1_SIZE = 5
#第二层卷积层的过滤器深度及其尺寸
CONV2_DEEP = 64
CONV2_SIZE = 5
#全连接层的节点个数
FC_SIZE = 512

def inference(input_tensor, train, regularizer):
    #第一层卷积层输入大小是28*28*1=784=INPUT_NODE  
    #卷积层参数个数计算： CONV1_SIZE*CONV1_SIZE*NUM_CHANNELS*CONV1_DEEP+bias =5*5*1*32+32  过滤器的长*宽*过滤器深度*当前层深度+biases(个数为过滤器深度)
    #过滤器尺寸5*5深度为32     从strides=[1, 1, 1, 1]可得  步长的长宽方向分别为1  第二维度跟第三维度表示分别为长宽方向步长
    #输出的深度为CONV1_DEEP=32  由于SAME是全0填充，因此输出的尺寸为当前输入矩阵的长宽分别除以对应的步长 28*28   bias与输出深度个数一致
    with tf.variable_scope('layer1-conv1'):
        #weight前两个维度过滤器的尺寸  第三个维度当前层的深度 第四个是过滤器的维度
        conv1_weights = tf.get_variable(
            "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))#给convert卷积的这个结果加上偏置  然后利用激活函数ReLu


    #第二层 池化层   输入矩阵为第一层的输出  28*28*32    池化层输出与当前输入的深度一致32    池化层采用了2*2的过滤器尺寸  并且SAME方法全0填充
    #步长的长宽方向分别为2 所以输出尺寸为28/2=14  输出14*14*32的矩阵   池化层可以改变输入的尺寸但是不改变深度
    with tf.name_scope("layer2-pool1"):
        #其中relu1是激活函数  ksize是过滤器尺寸  strides是步长 SAME是全0填充  VALID是不适用全0   SAME方法得到的尺寸是输入的尺寸/步长
        #  VALID方法输出的尺寸是 ( 输入尺寸-过滤器尺寸+1)/2取得上限值
        pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")


    #第三层 卷积层  输入矩阵为14*14*32     本层步长为1  所以输出尺寸为14/1=14    输出的矩阵为14*14*64    
    with tf.variable_scope("layer3-conv2"):
        #weight前两个维度过滤器的尺寸  第三个维度当前层的深度 第四个是过滤器的维度 :尺寸为5*5 深度为64的过滤器，  当前层深度为32  输出深度为64
        conv2_weights = tf.get_variable(
            "weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))


    #第四层  池化层输入矩阵为上一层输出 14*14*64 过滤器尺寸为2*2 深度为64  池化层的输出深度同输入深度  步长分别为2
    #所以输出尺寸是14/2=7   pool2的输出矩阵7*7*64
    with tf.name_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        pool_shape = pool2.get_shape().as_list()#pool2.get_shape()获得第四层输出矩阵的维度   
        #每一层神经网络的输入输出都为一个batch的矩阵 所以这里面的维度也包含了一个batch中数据的个数pool_shape[0]
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        reshaped = tf.reshape(pool2, [pool_shape[0], nodes])#把第四层的输出变为一个batch的向量

    #  第五层全连接层  输入为一组向量  向量长度为7*7*64=3136=nodes   输出一组长度为FC_SIZE=512的向量
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        #只有全连接的权重需要加入正则化
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
        fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))

        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        #dropout在训练时会随机将部分的节点的输出改为0 避免过拟合问题  一般只在全连接层使用
        if train: fc1 = tf.nn.dropout(fc1, 0.5)

    #第六层  全连接层  也是输出层 输入为一组长度为512的向量 输出为一组长度为10的向量  这一次输出后会通过softmax得到分类结果      
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc1, fc2_weights) + fc2_biases

    return logit



BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 6000
MOVING_AVERAGE_DECAY = 0.99

#定义训练过程
def train(mnist):
    # 定义输出为4维矩阵的placeholder
    x = tf.placeholder(tf.float32, [
            BATCH_SIZE,
            IMAGE_SIZE,
            IMAGE_SIZE,
           NUM_CHANNELS],
        name='x-input')
    y_ = tf.placeholder(tf.float32, [None, LeNet5_infernece.OUTPUT_NODE], name='y-input')

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = inference(x,False,regularizer)
    global_step = tf.Variable(0, trainable=False)

    # 定义损失函数、学习率、滑动平均操作以及训练过程。
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
        staircase=True)

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 初始化TensorFlow持久化类。
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)

            reshaped_xs = np.reshape(xs, (
                BATCH_SIZE,
                IMAGE_SIZE,
                IMAGE_SIZE,
                NUM_CHANNELS))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys})

            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))



def main(argv=None):
    mnist = input_data.read_data_sets("datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    main()