tensorflow实现手写数字识别（CNN）

最新推荐文章于 2025-03-05 14:16:43 发布

simple_hututu

最新推荐文章于 2025-03-05 14:16:43 发布

阅读量419

点赞数

本文介绍了一种使用TensorFlow实现的手写数字识别系统。该系统通过构建卷积神经网络(CNN)对MNIST数据集进行训练，并展示了如何设置网络结构、训练过程及评估准确性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
learning_rate = 0.001
training_rate = 1000
batch_size = 128
display_step = 10

n_input = 784
n_class = 10
dropout  = 0.75

x = tf.placeholder(tf.float32,[None,n_input])
y = tf.placeholder(tf.float32,[None,n_class])
keep_prob = tf.placeholder(tf.float32)

def conv2d(x,w,b,strides =1):
    x = tf.nn.conv2d(x,w,strides = [1,strides,strides,1],padding = 'SAME')
    #第一个参数input：指需要做卷积的输入图像，它要求是一个Tensor，具有[batch, in_height, in_width, in_channels]这样的shape，具体含义是[训练时一个batch的图片数量, 图片高度, 图片宽度, 图像通道数]，注意这是一个4维的Tensor，要求类型为float32和float64其中之一
    #第二个参数filter：相当于CNN中的卷积核，它要求是一个Tensor，具有[filter_height, filter_width, in_channels, out_channels]这样的shape，具体含义是[卷积核的高度，卷积核的宽度，图像通道数，卷积核个数]
    #第三个参数strides：卷积时在图像每一维的步长，这是一个一维的向量，长度4
    #第四个参数padding：string类型的量，只能是"SAME","VALID"其中之一，这个值决定了不同的卷积方式
    x = tf.nn.bias_add(x,b)
    return tf.nn.relu(x)

def maxpool2d(x,k=2):
    #value：池化的输入，一般池化层接在卷积层的后面，所以输出通常为feature map。feature map依旧是[batch, in_height, in_width, in_channels]这样的参数
    #ksize：池化窗口的大小，参数为四维向量，通常取[1, height, width, 1]，因为我们不想在batch和channels上做池化，所以这两个维度设为了1
    #stries：步长，同样是一个四维向量
    #3*3图像用2*2的滤波器进行卷积，当步长设为2的时候，会缺少一列，则进行第二次卷积的时候，VALID发现余下的窗口不足2*2会直接把第三列去掉，SAME则会填充一列，填充值为0
    return tf.nn.max_pool(x,ksize = [1,k,k,1],strides = [1,k,k,1],padding = 'SAME')

def conv_net(x,weights,biases,dropout):
    #首选进入网络时，都是要对输入进行处理，使其符合模型的输入形式
    x = tf.reshape(x,shape=[-1,28,28,1])
    #网络的结构搭建
    #第一层卷积
    conv1 = conv2d(x,weights['wc1'],biases['bc1'])
    conv1 = maxpool2d(conv1,k=2)#对卷积的结果进行池化
    #第二层卷积
    conv2 = conv2d(conv1,weights['wc2'],biases['bc2'])
    conv2 = maxpool2d(conv2,k=2)

    #全连接层
    #首先会对最后一层的池化的结果维度进行转化，转化为2维，使其能够作为全连接层的输入
    fc1 = tf.reshape(conv2,[-1,weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1,weights['wd1']),biases['bd1'])
    fc1 = tf.nn.relu(fc1)

    fc1 = tf.nn.dropout(fc1,dropout)

    out = tf.add(tf.matmul(fc1,weights['out']),biases['out'])
    return out

weights = {
    'wc1':tf.Variable(tf.random_normal([5,5,1,32])),
    'wc2':tf.Variable(tf.random_normal([5,5,32,64])),
    ##经过两层的卷积和池化之后最后得到的维度的大小为7*7*64
    'wd1':tf.Variable(tf.random_normal([7*7*64,1024])),#全连接层的隐层单元的大小为1024
    'out':tf.Variable(tf.random_normal([1024,n_class]))
}

biases = {
    'bc1':tf.Variable(tf.random_normal([32])),#偏向的值主要和卷积核有关
    'bc2':tf.Variable(tf.random_normal([64])),
    'bd1':tf.Variable(tf.random_normal([1024])),
    'out':tf.Variable(tf.random_normal([n_class]))
}

pred = conv_net(x,weights,biases,keep_prob)#预测结果的定义

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
    sess.run(init)
    epoch = 1
    while epoch < training_rate:
        batch_x,batch_y = mnist.train.next_batch(batch_size)
        #print(np.shape(batch_x))#(128, 784)
        sess.run(optimizer,feed_dict={x: batch_x, y: batch_y,keep_prob:dropout})
        if epoch % display_step == 0:
            loss,acc = sess.run([cost,accuracy],feed_dict={x: batch_x, y: batch_y,keep_prob:1.0})
            print("Iter " + str(epoch * batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
            epoch += 1
        print("Optimization Finished!")

        # Calculate accuracy for 256 mnist test images
    print("Testing Accuracy:", \
              sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
                                            y: mnist.test.labels[:256],
                                            keep_prob: 1.}))