使用TensorFlow对MNIST数据集测试_tensorflow mnist svm-优快云博客

本文对比了使用Softmax与卷积神经网络（CNN）在MNIST手写数字识别上的表现，Softmax实现了92.5%的准确率，而加入隐层的CNN准确率高达99.23%，揭示了特征提取的重要性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

使用TensorFlow对MNIST数据集测试

作为TensorFlow入门学习的内容，本次任务先后搭建了两个网络，对比先前使用过的KNN，SVM，来针对MNIST数据集测试验证学习效果。

1.Softmax实现

针对多分类Softmax算法的实现

# -*- coding: utf-8 -*-
# @Author: zhengbohang
# @Date:   2018-11-15 23:28:45
# @Last Modified by:   zhengbohang
# @Last Modified time: 2018-11-16 13:28:46

# get data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True, source_url='http://yann.lecun.com/exdb/mnist/')
import tensorflow as tf

# process
def main():
    sess = tf.InteractiveSession()

    # Initialize & define
    x = tf.placeholder(tf.float32, [None, 784])
    w = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))
    y = tf.nn.softmax(tf.matmul(x, w) + b)
    y_ = tf.placeholder(tf.float32, [None, 10])

    # 降维至1维向量 match with alpha = 0.5
    # cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
    # 降维至0维标量 match with alpha = 0.001
    cross_entropy = -tf.reduce_sum(y_ * tf.log(y))

    # train
    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
    tf.global_variables_initializer().run()
    for i in range(20000):
        batch = mnist.train.next_batch(100)
        train_step.run({x: batch[0], y_: batch[1]})

    # compute accuracy
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print("test accuracy: %g" % float(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels})))

if __name__ == '__main__':
    main()

输出结果：

test accuracy: 0.925

总结：一个基础的softmax实现，过程中没有用到隐层。训练速度很快，准确率为92.5%。与SVM相比较而言，训练速度大大加快，要调整的超参数只有学习率一个，而配备高斯核函数的SVM定义的超参数有多个，准确率也稍有提升。

2.加入隐层的卷积神经网络实现

# -*- coding:utf-8 -*-
# get data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True, source_url='http://yann.lecun.com/exdb/mnist/')
import tensorflow as tf
import time

def main():
    # process
    sess = tf.InteractiveSession()

    # Initialize & define
    x = tf.placeholder(tf.float32, [None, 784])
    w = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))
    y = tf.nn.softmax(tf.matmul(x, w) + b)
    y_ = tf.placeholder(tf.float32, [None, 10])

    # layer 1: 28 * 28 => 1 * 32 features
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    x_image = tf.reshape(x, [-1,28,28,1])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    # layer 2: 14 * 14 => 32 * 64 features
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
	
    # ready to output: 7 * 7 => 64 * 1024 dimensions
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])

    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # dropout
    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
	
    #softmax
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

    cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    tf.global_variables_initializer().run()
    # i in 20000
    # start count time
    f = open("/Users/zhengbohang/tensorflow/src/CNN_result", "w+")
    begin = time.time()
    for i in range(20000):
        batch = mnist.train.next_batch(50)
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
            print >> f, ("step %d, training accuracy %g"%(i, train_accuracy))
            print >> f, ("time spent %.2f seconds" % float((time.time() - begin)))
        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
    print >> f, ("test accuracy %g" % accuracy.eval(feed_dict={
        x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
    print >> f, ("time spent %.2f seconds" % float((time.time() - begin)))

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

if __name__ == '__main__':
    main()

通过CNN将向量回归为28*28的矩阵获取其局部位置特征，采用等宽卷积，激活函数选用ReLU函数。第一层提取32个特征，第二层在32个通道上提取64个特征，每一层在卷积结果上采用2*2最大的方法池化，得到64个7*7的矩阵，加入1024个神经元的全连接层准备输出，使用dropout减少过拟合，优化过程采用AdamOptimizer动态改变学习率，最后使用softmax输出。

输出结果：

step 0, training accuracy 0.16
time spent 0.15 seconds
step 100, training accuracy 0.86
time spent 8.95 seconds
step 200, training accuracy 0.9
time spent 17.95 seconds
step 300, training accuracy 0.96
time spent 26.70 seconds
step 400, training accuracy 0.9
time spent 34.54 seconds
step 500, training accuracy 0.98
time spent 42.43 seconds
………………
step 19500, training accuracy 0.98
time spent 5038.19 seconds
step 19600, training accuracy 1
time spent 5045.89 seconds
step 19700, training accuracy 1
time spent 5053.60 seconds
step 19800, training accuracy 1
time spent 5061.41 seconds
step 19900, training accuracy 1
time spent 5069.07 seconds
test accuracy 0.9923
time spent 5082.92 seconds