纯手写版手写数字识别--无框架
背景知识介绍
代码主要包括四个部分,分别是预处理,训练,预测与可视化,接下来将进行分别介绍。
-
预处理
在训练之前,需要先读取MINIST数据集中的内容,并对图像进行reshape,将其变为28 X28大小。给模型权重以及偏置赋予随机初始值,其中的每个元素都是通过从均值为0、标准差为0.01的正态分布中随机采样得到的。代码主要包括四步。以读取训练集为例。
1).打开并读取二进制文件binfile = open(filename, 'rb') buf = binfile.read()
2).数据解析,主要解析魔数(检查文件格式是否正确),个数,行列。index是偏移量,确定当前解析到哪里了。
magic, self.train_img_num, self.numRows, self.numColums = struct.unpack_from('>IIII', buf, index)
3).数据解析,主要解析像素值,因为要将图片变为28X28像素。
im = struct.unpack_from('>784B', buf, index) # 解析了784个字节之后要进行偏移,继续解析下一张图片 index += struct.calcsize('>784B')
4).转换为Numpy数组并且reshape。
im = np.array(im) im = im.reshape(1, 28 * 28) # 将解析好的图片放到训练图像列表中,这里的列表其实是一个二维数组 self.train_img_list[ i , : ] = im
-
训练
这里训练了1000次,可以自行调整训练次数。同时在训练时使用了batchsize,这代表每次输入batchsize数量的图片进行训练,batch表示批次。
for i in range( 1000 ): # 随机打乱数据 np.random.shuffle( self.train_data ) # 选取batchsize数据的图片 img_list = self.train_data[:self.BATCHSIZE,:-1] label_list = self.train_data[:self.BATCHSIZE, -1:] print("Train Time: ",i) self.train_network(img_list, label_list )
正式训练使用了两个隐藏层,隐藏层分别为30,60,隐藏层的激活函数为relu函数,最后输出层的激活函数为softmax函数,同时使用交叉熵来计算损失。
1).前向传播
hidden_layer1 = np.maximum(0, np.matmul( img_batch_list, self.W1 ) + self.b1 ) hidden_layer2 = np.maximum(0, np.matmul( hidden_layer1, self.W2 ) + self.b2 ) scores = np.matmul( hidden_layer2, self.W3 ) + self.b3) #使用softmax激活函数,也可以直接调用函数,以下这种是分开算。axis=1表示的是以竖轴为基准 ,同行相加,keepdims主要用于保持矩阵的二维特性 scores_e = np.exp( scores ) scores_e_sum = np.sum( scores_e, axis = 1, keepdims= True ) probs = scores_e / scores_e_sum
2).计算损失
# 初始化损失列表 loss_list_tmp = np.zeros((train_example_num, 1)) #对这个批次的每个样本都计算损失,因为相比起让错误标签的预测概率降低,我们更倾向于让正确标签的预测概率升高。 #举个例子,正确的标签为2,预测时1的概率是0.5,2的概率是0.6.这里如果进行计算损失,从而反向传播,更加关注的是怎么把2的预测概率升高,而非把1的预测概率降低。 #所以这里只计算了正确标签的预测概率的softmax值。 for i in range(train_example_num): loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[ i ] #计算每个样本的损失,这里也只看正确的,因为错误的话,one-hot编码就是0,乘起来也是0,加与不加没区别。 loss_list = -np.log(loss_list_tmp ) #因为是一个批次进行计算,所以要取平均,表示这一个批次的损失。 #0.5 * 正则惩罚项 * (W*W):避免过于拟合 loss = np.mean(loss_list, axis=0)[0] + \ 0.5 * self.reg_factor * np.sum( self.W1 * self.W1 ) + \ 0.5 * self.reg_factor * np.sum( self.W2 * self.W2 ) + \ 0.5 * self.reg_factor * np.sum( self.W3 * self.W3 ) #将批次损失加入整体损失列表 self.loss_list.append( loss )
3).反向传播
dscore = np.zeros((train_example_num, self.K)) #softmax+交叉熵的梯度计算公式=预测-实际 for i in range(train_example_num): dscore[i][:] = probs[i][:] dscore[i][int(label_batch_list[i])] -= 1 #对梯度进行归一化,除以训练样本数量 dscore /= train_example_num dW3 = np.dot( hidden_layer2.T, dscore ) #对偏置只需要计算每个样本的损失偏导之和即可,因为是一个批次里的 db3 = np.sum( dscore, axis = 0, keepdims= True ) dh2 = np.dot( dscore, self.W3.T ) #使用relu函数,如果h2本来为0,那么它的梯度就是0 dh2[ hidden_layer2 <= 0 ] = 0 dW2 = np.dot( hidden_layer1.T, dh2 ) db2 = np.sum( dh2, axis = 0, keepdims= True )
计算softmax+交叉熵损失的梯度计算公式,参见这个视频:
【交叉熵softmax求导简单解释】 https://www.bilibili.com/video/BV1NU4y1w7C9/?share_source=copy_web&vd_source=1adc4d4a0c37a44e9bda94a95f3f9032权重的偏导计算可以参见下面的图片:
# 加入正则项,避免求出的w过于集中 dW3 += self.reg_factor * self.W3 dW2 += self.reg_factor * self.W2 dW1 += self.reg_factor * self.W1 # 加入学习率,调整训练的速度,防止步子过大 self.W3 += -self.stepsize * dW3 self.W2 += -self.stepsize * dW2 self.W1 += -self.stepsize * dW1 self.b3 += -self.stepsize * db3 self.b2 += -self.stepsize * db2 self.b1 += -self.stepsize * db1
-
预测
# 前向传播 hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1) hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2) scores = np.matmul(hidden_layer2, self.W3) + self.b3 # 找到每一张图片中预测概率最大的 prediction = np.argmax( scores, axis = 1 ) # 将prediction reshape成与测试训练集相同 prediction = np.reshape( prediction, ( 10000,1 ) ) print(prediction.shape) print(self.test_label_list.shape) accuracy = np.mean( prediction == self.test_label_list ) print('The accuracy is: ',accuracy)
-
可视化
# 选取测试集的前十张图片进行可视化 for i in range(10): # 调用query函数,也是简单的前向传播 outputs = data.query(data.test_img_list[i]) label = np.argmax(outputs) print(label) # 画图 image_array = data.test_img_list[i].reshape(28, 28) plt.imshow(image_array, cmap="Greys", interpolation='None') plt.pause(0.000001) plt.show() print('done')
完整代码
# -*- coding:utf-8
import numpy as np
import struct
import matplotlib.pyplot as plt
import random
import pickle
class Data:
def __init__(self):
self.K = 10
self.N = 60000
self.M = 10000
self.BATCHSIZE = 2000
self.reg_factor = 1e-3
self.stepsize = 1e-2
self.train_img_list = np.zeros((self.N, 28 * 28))
self.train_label_list = np.zeros((self.N, 1))
self.test_img_list = np.zeros((self.M, 28 * 28))
self.test_label_list = np.zeros((self.M, 1))
self.loss_list = []
self.init_network()
# 需要将这些路径更改为自己存放MINIST的路径
self.read_train_images( 'D://software//PycharmProjects//pythonProject//train-images.idx3-ubyte')
self.read_train_labels( 'D://software//PycharmProjects//pythonProject//train-labels.idx1-ubyte')
self.train_data = np.append( self.train_img_list, self.train_label_list, axis = 1 )
self.read_test_images('D://software//PycharmProjects//pythonProject//t10k-images.idx3-ubyte')
self.read_test_labels('D://software//PycharmProjects//pythonProject//t10k-labels.idx1-ubyte')
def predict(self):
hidden_layer1 = np.maximum(0, np.matmul(self.test_img_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
scores = np.matmul(hidden_layer2, self.W3) + self.b3
prediction = np.argmax( scores, axis = 1 )
prediction = np.reshape( prediction, ( 10000,1 ) )
print(prediction.shape)
print(self.test_label_list.shape)
accuracy = np.mean( prediction == self.test_label_list )
print('The accuracy is: ',accuracy)
return
def query(self,inputs_list):
hidden_layer1 = np.maximum(0, np.matmul(inputs_list, self.W1) + self.b1)
hidden_layer2 = np.maximum(0, np.matmul(hidden_layer1, self.W2) + self.b2)
scores = np.matmul(hidden_layer2, self.W3) + self.b3
return scores
def train(self):
for i in range( 1000 ):
np.random.shuffle( self.train_data )
img_list = self.train_data[:self.BATCHSIZE,:-1]
label_list = self.train_data[:self.BATCHSIZE, -1:]
print("Train Time: ",i)
self.train_network(img_list, label_list )
def train_network(self, img_batch_list, label_batch_list):
train_example_num = img_batch_list.shape[0]
hidden_layer1 = np.maximum(0, np.matmul( img_batch_list, self.W1 ) + self.b1 )
hidden_layer2 = np.maximum(0, np.matmul( hidden_layer1, self.W2 ) + self.b2 )
scores = np.matmul( hidden_layer2, self.W3 ) + self.b3
scores_e = np.exp( scores )
scores_e_sum = np.sum( scores_e, axis = 1, keepdims= True )
probs = scores_e / scores_e_sum
loss_list_tmp = np.zeros((train_example_num, 1))
for i in range(train_example_num):
loss_list_tmp[i] = scores_e[i][int(label_batch_list[i])] / scores_e_sum[ i ]
loss_list = -np.log(loss_list_tmp )
loss = np.mean(loss_list, axis=0)[0] + \
0.5 * self.reg_factor * np.sum( self.W1 * self.W1 ) + \
0.5 * self.reg_factor * np.sum( self.W2 * self.W2 ) + \
0.5 * self.reg_factor * np.sum( self.W3 * self.W3 )
self.loss_list.append( loss )
print(loss, " ", len(self.loss_list))
dscore = np.zeros((train_example_num, self.K))
for i in range(train_example_num):
dscore[i][:] = probs[i][:]
dscore[i][int(label_batch_list[i])] -= 1
dscore /= train_example_num
dW3 = np.dot( hidden_layer2.T, dscore )
db3 = np.sum( dscore, axis = 0, keepdims= True )
dh2 = np.dot( dscore, self.W3.T )
dh2[ hidden_layer2 <= 0 ] = 0
dW2 = np.dot( hidden_layer1.T, dh2 )
db2 = np.sum( dh2, axis = 0, keepdims= True )
dh1 = np.dot( dh2, self.W2.T )
dh1[ hidden_layer1 <= 0 ] = 0
dW1 = np.dot( img_batch_list.T, dh1 )
db1 = np.sum( dh1, axis = 0, keepdims= True )
dW3 += self.reg_factor * self.W3
dW2 += self.reg_factor * self.W2
dW1 += self.reg_factor * self.W1
self.W3 += -self.stepsize * dW3
self.W2 += -self.stepsize * dW2
self.W1 += -self.stepsize * dW1
self.b3 += -self.stepsize * db3
self.b2 += -self.stepsize * db2
self.b1 += -self.stepsize * db1
return
def init_network(self):
self.W1 = 0.01 * np.random.randn( 28 * 28, 30 )
self.b1 = 0.01 * np.random.randn( 1, 30 )
self.W2 = 0.01 * np.random.randn( 30, 60 )
self.b2 = 0.01 * np.random.randn( 1, 60 )
self.W3 = 0.01 * np.random.randn( 60, self.K )
self.b3 = 0.01 * np.random.randn( 1, self.K )
def read_train_images(self,filename):
binfile = open(filename, 'rb')
buf = binfile.read()
index = 0
magic, self.train_img_num, self.numRows, self.numColums = struct.unpack_from('>IIII', buf, index)
print(magic, ' ', self.train_img_num, ' ', self.numRows, ' ', self.numColums)
index += struct.calcsize('>IIII')
for i in range(self.train_img_num):
im = struct.unpack_from('>784B', buf, index)
index += struct.calcsize('>784B')
im = np.array(im)
im = im.reshape(1, 28 * 28)
self.train_img_list[ i , : ] = im
print("train_img_list.shape:")
print(self.train_img_list.shape)
def read_train_labels(self,filename):
binfile = open(filename, 'rb')
index = 0
buf = binfile.read()
binfile.close()
magic, self.train_label_num = struct.unpack_from('>II', buf, index)
index += struct.calcsize('>II')
for i in range(self.train_label_num):
# for x in xrange(2000):
label_item = int(struct.unpack_from('>B', buf, index)[0])
self.train_label_list[ i , : ] = label_item
index += struct.calcsize('>B')
def read_test_images(self, filename):
binfile = open(filename, 'rb')
buf = binfile.read()
index = 0
magic, self.test_img_num, self.numRows, self.numColums = struct.unpack_from('>IIII', buf, index)
print(magic, ' ', self.test_img_num, ' ', self.numRows, ' ', self.numColums)
index += struct.calcsize('>IIII')
for i in range(self.test_img_num):
im = struct.unpack_from('>784B', buf, index)
index += struct.calcsize('>784B')
im = np.array(im)
im = im.reshape(1, 28 * 28)
self.test_img_list[i, :] = im
def read_test_labels(self,filename):
binfile = open(filename, 'rb')
index = 0
buf = binfile.read()
binfile.close()
magic, self.test_label_num = struct.unpack_from('>II', buf, index)
index += struct.calcsize('>II')
for i in range(self.test_label_num):
label_item = int(struct.unpack_from('>B', buf, index)[0])
self.test_label_list[i, :] = label_item
index += struct.calcsize('>B')
def main():
data = Data()
data.train()
data.predict()
for i in range(10):
outputs = data.query(data.test_img_list[i])
label = np.argmax(outputs)
print(label)
image_array = data.test_img_list[i].reshape(28, 28)
plt.imshow(image_array, cmap="Greys", interpolation='None')
plt.pause(0.000001)
plt.show()
print('done')
if __name__ == '__main__':
main()
MiNIST手写数据集
代码存在问题
- 可以调用函数的地方没有调用
- 计算损失的反向传播时还是一张张计算,并没有利用到批次
- 代码结构混乱,神经网络与Data之间应分离,且训练过程应在一起,而非在for循环中调用训练函数。有些参数不需要定义,如train_example_num,可以直接写batchsize。
参考文献
https://blog.youkuaiyun.com/superCally/article/details/54312625?fromshare=blogdetail