单层感知机
# -*- coding:utf-8 -*-
# [https://www.amazon.cn/dp/B06X8Z4BS9/ref=sr_1_1?ie=UTF8&qid=1550477736&sr=8-1&keywords=tensorflow%E5%AE%9E%E6%88%98](TensorFlow实战, 黄文坚, 唐源)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
print(mnist.train.images.shape, mnist.train.labels.shape)
print(mnist.test.images.shape, mnist.test.labels.shape)
print(mnist.validation.images.shape, mnist.validation.labels.shape)
import tensorflow as tf
sess = tf.InteractiveSession() # 交互式的session
x = tf.placeholder(tf.float32, [None, 784]) # 输入的特征占位
W = tf.Variable(tf.zeros([784, 10])) # tf变量(参数),可以求梯度和被更新
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
# matmul矩阵乘法,‘+’中二维数组加上一维数组,
# 执行广播功能,一维数组自动扩展成二维数组,即二维数组每一个行分量加上这个一维数组
y_ = tf.placeholder(tf.float32, [None, 10]) # 输入的真实标签占位
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# reduce_sum按第二个维度对y_ * tf.log(y)得到的数组求和(因为这里y_是10维one-hot编码的向量,
# 所以10次乘法中9个都是0*tf.log(y_i)),得到每个样本cross_entropy,
# 之后tf.reduce_mean把这些样本的cross_entropy平均,即这里cross_entropy是一个平均
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # 参数更新器
tf.global_variables_initializer().run()
# 全局变量初始化(参数初始化),session是交互式的,
# 所以直接.run()即可。否则,普通的session需要session.run(...)
for i in range(3000):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x: batch_xs, y_: batch_ys}) # 不断迭代运行参数更新器,优化参数
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
# 真实标签y_和预测输出y中最大值的索引是否一样,True or Fasle
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# tf.cast将 True or Fasle 转换成 1 or 0, 即预测对了和错了,然后求平均
print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels})) # eval评价
这里注意该网络为简单网络,参数初始化并不重要,可以全部初始化为0。但是对于复杂的网络,参数初始化非常重要,不能全部初始化为0(参见1,参见2),一般初始化为正态分布或均匀分布,如果全部初始化为0,那么预测结果将都是标签数据的均值,训练集MSE下降到标签数据的方差后,无法继续下降,网络失效!
多层感知机
#coding=utf8
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
sess = tf.InteractiveSession()
in_units = 784
h1_units = 300
W1 = tf.Variable(tf.truncated_normal([in_units, h1_units], stddev=0.1))
b1 = tf.Variable(tf.zeros([h1_units]))
W2 = tf.Variable(tf.zeros([h1_units, 10]))
b2 = tf.Variable(tf.zeros([10]))
x = tf.placeholder(tf.float32, [None, in_units])
keep_prob = tf.placeholder(tf.float32)
hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1)
hidden1_drop = tf.nn.dropout(hidden1, keep_prob)
y = tf.nn.softmax(tf.matmul(hidden1_drop, W2) + b2)
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
train_step = tf.train.AdagradOptimizer(0.3).minimize(cross_entropy)
# 学习率0.3
# Train
tf.global_variables_initializer().run()
for i in range(3000):
batch_xs, batch_ys = mnist.train.next_batch(100)
train_step.run({x: batch_xs, y_: batch_ys, keep_prob: 0.75})
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
卷积神经网络
# -*- coding:utf-8 -*-
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
sess = tf.InteractiveSession()
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape(x, [-1,28,28,1])
W_conv1 = weight_variable([5, 5, 1, 32])
# [5, 5, 1, 32] 分别对应滤波器的(高,宽,长,深),
# 也就是(高,宽,输入通道数,输出通道数),
# 输出通道数也对应有多少个滤波器
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# conv2d卷积操作,[1, 1, 1, 1],第一个和最后一个必须为1,
# 中间两个1表示高和宽的卷积步长
# 根据公式计算(28 + 2P -5)/2 - 1 = 28
# 可知左右,上下补了 17、18
# 输入特征图是 28x28x1,输出特征图是28x28x32
h_pool1 = max_pool_2x2(h_conv1)
# max_pool最大操作,[1, 2, 2, 1],第一个和最后一个必须为1,
# 中间两个2表示高和宽的池化步长
# padding='SAME',也补0了
# 输入特征图是 28x28x32,输出特征图是 14x14x32
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# 输入特征图是 14x14x32,输出特征图是 14x14x64
h_pool2 = max_pool_2x2(h_conv2)
# 输入特征图是 14x14x64,输出特征图是 7x7x64
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.global_variables_initializer().run()
for i in range(3000):
batch = mnist.train.next_batch(100)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy %g"%accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
结果:
单层感知机运行五次,结果:0.91820014,0.92330015,0.92270005,0.9230001,0.9194001。
平均:0.92132
多层感知机运行五次,结果:0.9785001,0.97980016,0.9768002,0.97940016,0.9785001。
平均:0.97862
卷积神经网络运行五次,结果: 0.9881,0.9911, 0.9919,0.9899,0.9913。
平均:0.9904
可见,在这个问题上,多层感知机优与单层感知机,卷积神经网络优于多层感知机优!
本文对比了单层感知机、多层感知机和卷积神经网络三种模型在手写数字识别任务上的表现,通过实验验证了模型复杂度对识别准确率的影响。
3086

被折叠的 条评论
为什么被折叠?



