隐藏层两层,神经元个数可以自定义,学习率采用退化学习率。尽了最大努力,最大识别率目前还不到98%,有没有高手指点一下?
# -*- coding: UTF-8 -*-
#主程序
import numpy as np
import tensorflow as tf
from mnistread import *
hidden_layer1_cells=400
hidden_layer2_cells=100
tf.reset_default_graph()
X=tf.placeholder(tf.float32,[None,784])
Y=tf.placeholder(tf.float32,[None,10])
Z=tf.placeholder(tf.float32,[None,10])
OH=tf.placeholder(tf.int32,[None])
onehot=tf.one_hot(OH,depth=10,axis=1)
batchsize=100
epochs=1000
current_iter=tf.Variable(0)
W={
"wih":tf.Variable(tf.random_normal([784,hidden_layer1_cells]),name="wih"),
"whh":tf.Variable(tf.random_normal([hidden_layer1_cells,hidden_layer2_cells]),name="whh"),
"who":tf.Variable(tf.random_normal([hidden_layer2_cells,10]),name="who")
}
B={
"bih":tf.Variable(tf.random_uniform([hidden_layer1_cells],dtype=tf.float32),name="bih"),
"bhh":tf.Variable(tf.random_uniform([hidden_layer2_cells],dtype=tf.float32),name="bhh"),
"bho":tf.Variable(tf.random_uniform([10],dtype=tf.float32),name="bho")
}
layer1=tf.nn.tanh(tf.matmul(X,W["wih"])+B["bih"])
layer2=tf.nn.tanh(tf.matmul(layer1,W["whh"])+B["bhh"])
Y=tf.matmul(layer2,W["who"])+B["bho"]
out=tf.nn.softmax(Y)
cost=tf.nn.softmax_cross_entropy_with_logits_v2(labels=Z, logits=Y)
lr=tf.train.exponential_decay(0.01,current_iter,6000,0.9,staircase=True)
optimizer=tf.train.AdamOptimizer(lr).minimize(cost,current_iter)
train_X=(load_train_images()/255*0.99)+0.01
train_X=np.reshape(train_X,[-1,784])
train_Y=load_train_labels()
loss=0.0
saver=tf.train.Saver()
test_images=np.reshape((load_train_images('./t10k-images.idx3-ubyte')/255*0.99)+0.01,[-1,784])
test_labels=load_train_labels('./t10k-labels.idx1-ubyte')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(epochs):
current_iter=i
batchs=len(train_X)//batchsize
z=np.zeros([batchsize,10])
for j in range(batchs):
x=train_X[j*batchsize:(j+1)*batchsize,:]
z=train_Y[j*batchsize:(j+1)*batchsize].astype("int32")
z=sess.run(onehot,feed_dict={OH:z})
_,loss,o=sess.run([optimizer,cost,out],feed_dict={X:x,Z:z})
test_out=sess.run(out,feed_dict={X:test_images})
result=np.argmax(test_out,1)
z=test_labels.astype("int32")
accuracy=len(np.argwhere(z==result))
print("Epoch:",i,"Loss:",np.mean(loss),"Accuracy:",accuracy)
print("----------------------------------------------------------------")
saver.save(sess,"./mnistconfig.ckpt")
print("Finised and saved")
mnist训练数据和测试数据读取函数:mnistread.py内容:
# -*- coding: UTF-8 -*-
import numpy as np
import tensorflow as tf
import matplotlib.pylab as pl
import struct
# 训练集文件
train_images_idx3_ubyte_file = './train-images.idx3-ubyte'
# 训练集标签文件
train_labels_idx1_ubyte_file = './train-labels.idx1-ubyte'
# 测试集文件
test_images_idx3_ubyte_file = './t10k-images.idx3-ubyte'
# 测试集标签文件
test_labels_idx1_ubyte_file = './t10k-labels.idx1-ubyte'
def decode_idx3_ubyte(idx3_ubyte_file):
"""
解析idx3文件的通用函数
:param idx3_ubyte_file: idx3文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx3_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = '>iiii'
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
#print '魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols)
# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header)
fmt_image = '>' + str(image_size) + 'B'
images = np.empty((num_images, num_rows, num_cols))
for i in range(num_images):
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
offset += struct.calcsize(fmt_image)
return images
def decode_idx1_ubyte(idx1_ubyte_file):
"""
解析idx1文件的通用函数
:param idx1_ubyte_file: idx1文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx1_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数和标签数
offset = 0
fmt_header = '>ii'
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
#print '魔数:%d, 图片数量: %d张' % (magic_number, num_images)
# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels
def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):
return decode_idx3_ubyte(idx_ubyte_file)
def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):
return decode_idx1_ubyte(idx_ubyte_file)