本文主要实现CNN卷积神经网络实现MNIST手写数字识别中的前向传播,其中的参数(权重与偏置)由文章“CNN卷积神经网络实现MNIST手写数字识别(一)训练得到
本文与“CNN卷积神经网络实现MNIST手写数字识别(二)的区别在于:在前向传播过程中,所用的卷积函数、池化函数、不同层之间的数据传递均由自己手写完成,未调用任何官方函数。通过对本代码的编写,本人对卷积神经网络有了较为深入的了解,同时也可以非常清晰的知道不同层间数据存储的方式、数据传递的方式等。
通过不断调试,本代码可以实现对手写数字数据集的推理验证,但本代码仍有以下问题:
①变量的命名较为随意,且中间变量较多,造成了代码的可读性差与空间复杂度较高
②代码运行时间较长,以28*28的一张手写数字图片为例,其前向传播的计算时间高达15分钟,但与此对比“CNN卷积神经网络实现MNIST手写数字识别(二)中的一次前向传播只需不到1秒中。
如果有读者对本代码的优化有什么建议,请随时联系我,期待我们的共同进步。本人QQ:1772758470
文章部分参考:https://blog.youkuaiyun.com/weixin_43580130/article/details/107960585
主函数源码如下:
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 1 16:35:26 2021
@author: ZZJin
"""
#参考:https://blog.youkuaiyun.com/weixin_43580130/article/details/107960585
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
import numpy as np
import sys
import math#导入math模块
import time
sys.path.append(r"D:\Anaconda_project\CNNInFPGA\tf2.0_CNN_keras")
from Function2 import conv2d_33
from Function2 import maxpol_22
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #禁止控制台打印无关信息
# x: [60k, 28, 28],
# y: [60k]
# (x, y), _ = datasets.mnist.load_data() #加载数据集
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() #加载数据集
# /255.使得x的数据范围从 [0~255] 变为 [0~1.]
x = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255. #创建tensor
y = tf.convert_to_tensor(y_test, dtype=tf.int32)
print(x.shape, y.shape, x.dtype, y.dtype) #查看x和y的shape以及数据类型
print('\n')
print(tf.reduce_min(x), tf.reduce_max(x)) #查看x和y的最大、最小值
print('\n')
print(tf.reduce_min(y), tf.reduce_max(y))
# train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)#先按照行切片再按照128个单位切片
# train_iter = iter(train_db) #迭代器
# sample = next(train_iter)
# print('batch:', sample[0].shape, sample[1].shape)
#到这里我们已经得到了mnist数据集的测试集数据,并且将图片文件与lable文件区分;且对数据集进行了以128为单位的分组
#接下来进行权重与偏置的空间初始化与赋值初始化
w1 = np.zeros((1, 32, 3, 3)) #w1 = np.zeros((in_channel,out_channel, row, col))
b1 = np.zeros(32)
w2 = np.zeros((32, 64, 3, 3))
b2 = np.zeros(64)
w3 = np.zeros((128, 1600))
b3 = np.zeros(128)
w4 = np.zeros((10, 128))
b4 = np.zeros(10)
out_layer1 = np.zeros((32, 26, 26))
out_layer1_bias = np.zeros((32, 26, 26))
out_layer1_relu = np.zeros((32, 26, 26))
out_maxpol_layer1 = np.zeros((32, 13, 13))
out_layer2 = np.zeros((32, 11, 11))
out_layer2sum = np.zeros(( 11, 11)) #存储1个输出通道对应32个输入通道的和
out_layer2sum64 = np.zeros((64, 11, 11))
out_layer2_bias = np.zeros((64, 11, 11))
out_layer2_relu = np.zeros((64, 11, 11))
out_maxpol_layer2 = np.zeros((64, 5, 5))
data_oneline = np.zeros((5,5,64))
out_layer3 = np.zeros(128)
out_layer4 = np.zeros(10)
# 读取已经训练好的神经网络参数
old = np.load
np.load.__defaults__=(None, True, True, 'ASCII')
loadData = np.load('CNN_weight.npy')
np.load.__defaults__=(None, False, True, 'ASCII')
for in_channel1 in list(range(0,1)): #卷积层一权重读取
for out_channel1 in list(range(0,32)):
for row1 in list(range(0,3)):
for col1 in list(range(0,3)):
w1[in_channel1][out_channel1][row1][col1] = loadData[0][row1][col1][0][out_channel1]
for out_channel1 in list(range(0,32)): #卷积层一偏置读取
b1 = loadData[1]
for in_channel2 in list(range(0,32)): #卷积层二权重读取
for out_channel2 in list(range(0,64)):
for row2 in list(range(0,3)):
for col2 in list(range(0,3)):
w2[in_channel2][out_channel2][row2][col2] = loadData[2][row2][col2][in_channel2][out_channel2]
for out_channel2 in list(range(0,64)): #卷积层二偏置读取
b2 = loadData[3]
for out_channel3 in list(range(0,128)):
for in_channel3 in list(range(0,1600)): #全连接层一权重读取
w3[out_channel3][in_channel3] = loadData[4][in_channel3][out_channel3]
for out_channel3 in list(range(0,128)): #全连接层一偏置读取
b3 = loadData[5]
for out_channel4 in list(range(0,10)):
for in_channel4 in list(range(0,128)): #全连接层二权重读取
w4[out_channel4][in_channel4] = loadData[6][in_channel4][out_channel4]
for out_channel4 in list(range(0,10)): #全连接层二偏置读取
b4 = loadData[7]
print("权重读取结束\n")
# 前向传播主要计算
# 卷积层一
inChannel_layer1 = 1
inChannel_lay1 = 0
outChannel_layer1 = 32
# for inChannel_lay1 in list(range(0,inChannel_layer1)):
for outChannel_lay1 in list(range(0,outChannel_layer1)):
start = time.perf_counter()
out_layer1[outChannel_lay1] = conv2d_33(x[20], w1[inChannel_lay1][outChannel_lay1] , 28, 28)
out_layer1_bias[outChannel_lay1] = out_layer1[outChannel_lay1] + b1[outChannel_lay1] # 加偏置
end = time.perf_counter()
print('outChannel_lay1 ',outChannel_lay1)
print('Running time: %s Seconds'%(end-start))
out_layer1_relu = tf.nn.relu(out_layer1_bias, name=None) #relu激活函数
print("卷积层一计算结束\n")
# 最大值池化层一
inChannel_layer1 = 1
outChannel_layer1 = 32
for inChannel_lay1 in list(range(0, inChannel_layer1)):
for outChannel_lay1 in list(range(0, outChannel_layer1)):
out_maxpol1 = maxpol_22(out_layer1_relu[outChannel_lay1], 26, 26)
out_maxpol_layer1[outChannel_lay1] = out_maxpol1
# 卷积层二
inChannel_layer2 = 32
outChannel_layer2 = 64
for outChannel_lay2 in list(range(0,outChannel_layer2)):
start = time.perf_counter()
for inChannel_lay2 in list(range(0,inChannel_layer2)):
# 32张图片分别与32个卷积核进行卷积运算
out_layer2[inChannel_lay2] = conv2d_33(out_maxpol_layer1[inChannel_lay2], w2[inChannel_lay2][outChannel_lay2] , 13, 13)
# 计算得到32个卷积结果,相同位置进行相加
out_layer2sum = out_layer2sum + out_layer2[inChannel_lay2]
out_layer2sum64[outChannel_lay2] = out_layer2sum
out_layer2sum = np.zeros(( 11, 11)) # 对该变量归零
out_layer2_bias[outChannel_lay2] = out_layer2sum64[outChannel_lay2] + b2[outChannel_lay2] # 加偏置
end = time.perf_counter()
print('outChannel_lay2 ',outChannel_lay2,'inChannel_lay2 ',inChannel_lay2)
print('Running time: %s Seconds'%(end-start))
out_layer2_relu = tf.nn.relu( out_layer2_bias, name=None) #relu激活函数
# 最大值池化层二
inChannel_layer2 = 1 #该位置赋值有问题
outChannel_layer2 = 64
for inChannel_lay2 in list(range(0, inChannel_layer2)):
for outChannel_lay2 in list(range(0, outChannel_layer2)):
out_maxpol2 = maxpol_22(out_layer2_relu[outChannel_lay2], 11, 11)
out_maxpol_layer2[outChannel_lay2] = out_maxpol2
# 数据一维化
i_layer3 = 5
j_layer3 = 5
for i_lay3 in list(range(0, i_layer3)):
for j_lay3 in list(range(0, j_layer3)):
data_oneline[i_lay3][j_lay3] = out_maxpol_layer2[:,i_lay3,j_lay3]
data_oneline_out = tf.reshape(data_oneline, [-1, 64*5*5]) #被卷积数据一行化
# 全连接层三
in_layer3 = np.array(data_oneline_out)
# w3 = np.array(tf.reshape(w3, [-1, 1600]) )#被卷积数据一行化
outChannel_layer3 = 128
for outChannel_lay3 in list(range(0, outChannel_layer3)):
out_layer3[outChannel_lay3] = sum(in_layer3[0] * w3[outChannel_lay3]) + b3[outChannel_lay3]
out_layer3_relu = tf.nn.relu(out_layer3, name=None) #relu激活函数
# 全连接层四
in_layer4 = np.array(tf.reshape(out_layer3_relu, [-1, 128]) )#被卷积数据一行化
# in_layer4 = out_layer3
outChannel_layer4 = 10
for outChannel_lay4 in list(range(0, outChannel_layer4)):
out_layer4[outChannel_lay4] = sum(in_layer4[0] * w4[outChannel_lay4]) + b4[outChannel_lay4]
out_layer4_softmax = tf.nn.softmax(out_layer4, name=None) #softmax激活函数
# out_layer3[0] = in_layer3 * w3[0] + b3[0]
# 0.14901961 * (-0.02254084) + 0.99607843 * 0.1772183
# -0.02254084, 0.1772183
# np.set_printoptions(threshold=1e6) 数据显示数量
# for i in range(50) :
# print(in_layer3[0][i])
主函数调用的卷积函数与池化函数如下:
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 10 13:30:37 2021
@author: ZZJin
"""
# 卷积函数封装
import tensorflow as tf
import math#导入math模块
import numpy as np
def conv2d_33(x_conv, w_conv , row_conv, col_conv): # x_conv被卷积数据, w_conv卷积核参数,
# x_conv = x[0] # row_conv是x_conv中的行参数, col_conv是x_conv中的列参数
# w_conv = w1[0][0]
# row_conv = 28
# col_conv = 28
x_conv32 = tf.cast(x_conv,dtype = tf.float32) #数据格式转换
w_conv32 = tf.cast(w_conv,dtype = tf.float32) #数据格式转换
image_test = tf.reshape(x_conv32, [-1, row_conv*col_conv]) #被卷积数据一行化
filter_test = tf.reshape(w_conv32, [-1, 3*3]) #卷集合数据一行化
out_conv = np.zeros((row_conv - 2, col_conv - 2))
for j_conv in range(0, (row_conv - 2)): #行扫描
for i_conv in range(0, (col_conv - 2)): #列扫描
buffer_conv = (image_test[0][i_conv + 0 + j_conv*row_conv]*filter_test[0][0] + image_test[0][i_conv + 1 + j_conv*row_conv]*filter_test[0][1] + image_test[0][i_conv + 2 + j_conv*row_conv]*filter_test[0][2]
+ image_test[0][i_conv + 0 + (j_conv+1)*row_conv]*filter_test[0][3] + image_test[0][i_conv + 1 + (j_conv+1)*row_conv]*filter_test[0][4] + image_test[0][i_conv + 2 + (j_conv+1)*row_conv]*filter_test[0][5]
+ image_test[0][i_conv + 0 + (j_conv+2)*row_conv]*filter_test[0][6] + image_test[0][i_conv + 1 + (j_conv+2)*row_conv]*filter_test[0][7] + image_test[0][i_conv + 2 + (j_conv+2)*row_conv]*filter_test[0][8]
)
out_conv[j_conv][i_conv] = buffer_conv
# print(j_conv,i_conv)
return out_conv
# 最大值池化函数封装
def maxpol_22(x_maxpol, row_maxpol, col_maxpol): # x_maxpol被池化数据
# row_maxpol是x_maxpol中的行参数, col_maxpol是x_maxpol中的列参数
row_maxpol = math.floor(row_maxpol/2) # 若行、列为奇数则舍弃最后一行
col_maxpol = math.floor(col_maxpol/2)
out_maxpol = np.zeros((row_maxpol, col_maxpol))
for j_maxpol in range(0, row_maxpol): #行扫描
for i_maxpol in range(0, col_maxpol): #列扫描
buffer_maxpol = max(x_maxpol[j_maxpol*2][i_maxpol*2], x_maxpol[j_maxpol*2][i_maxpol*2+1], x_maxpol[j_maxpol*2+1][i_maxpol*2], x_maxpol[j_maxpol*2+1][i_maxpol*2+1])
out_maxpol[j_maxpol][i_maxpol] = buffer_maxpol
return out_maxpol
本代码有一点点难懂,但如果读者静下心来读懂,并自己亲自进行调试,相信对读者的帮助会非常大。
我调这个代码,真的调了好久,如果读者认为有用的话,希望给我点个赞,您的点赞是对我最大的鼓励,谢谢!