CNN卷积神经网络实现MNIST手写数字识别（三）

最新推荐文章于 2025-03-07 21:22:25 发布

是ZZJin

最新推荐文章于 2025-03-07 21:22:25 发布

阅读量804

点赞数 8

文章标签：卷积神经网络 python 深度学习 tensorflow

本文链接：https://blog.youkuaiyun.com/qq_42752888/article/details/115828569

版权

本文主要实现CNN卷积神经网络实现MNIST手写数字识别中的前向传播，其中的参数（权重与偏置）由文章“CNN卷积神经网络实现MNIST手写数字识别（一）训练得到

本文与“CNN卷积神经网络实现MNIST手写数字识别（二）的区别在于：在前向传播过程中，所用的卷积函数、池化函数、不同层之间的数据传递均由自己手写完成，未调用任何官方函数。通过对本代码的编写，本人对卷积神经网络有了较为深入的了解，同时也可以非常清晰的知道不同层间数据存储的方式、数据传递的方式等。

通过不断调试，本代码可以实现对手写数字数据集的推理验证，但本代码仍有以下问题：

①变量的命名较为随意，且中间变量较多，造成了代码的可读性差与空间复杂度较高

②代码运行时间较长，以28*28的一张手写数字图片为例，其前向传播的计算时间高达15分钟，但与此对比“CNN卷积神经网络实现MNIST手写数字识别（二）中的一次前向传播只需不到1秒中。

如果有读者对本代码的优化有什么建议，请随时联系我，期待我们的共同进步。本人QQ：1772758470

文章部分参考：https://blog.youkuaiyun.com/weixin_43580130/article/details/107960585

主函数源码如下：

# -*- coding: utf-8 -*-
"""
Created on Thu Apr  1 16:35:26 2021

@author: ZZJin
"""
#参考：https://blog.youkuaiyun.com/weixin_43580130/article/details/107960585
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
import os
import numpy as np
import sys
import math#导入math模块
import time
sys.path.append(r"D:\Anaconda_project\CNNInFPGA\tf2.0_CNN_keras")
from Function2 import conv2d_33
from Function2 import maxpol_22
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    #禁止控制台打印无关信息

# x: [60k, 28, 28],
# y: [60k]
# (x, y), _ = datasets.mnist.load_data()  #加载数据集
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()  #加载数据集
# /255.使得x的数据范围从 [0~255] 变为 [0~1.]
x = tf.convert_to_tensor(x_test, dtype=tf.float32) / 255.    #创建tensor
y = tf.convert_to_tensor(y_test, dtype=tf.int32)

print(x.shape, y.shape, x.dtype, y.dtype)   #查看x和y的shape以及数据类型
print('\n')
print(tf.reduce_min(x), tf.reduce_max(x))   #查看x和y的最大、最小值
print('\n')
print(tf.reduce_min(y), tf.reduce_max(y))


# train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128)#先按照行切片再按照128个单位切片
# train_iter = iter(train_db)     #迭代器
# sample = next(train_iter)
# print('batch:', sample[0].shape, sample[1].shape)

#到这里我们已经得到了mnist数据集的测试集数据，并且将图片文件与lable文件区分；且对数据集进行了以128为单位的分组
#接下来进行权重与偏置的空间初始化与赋值初始化
w1 = np.zeros((1, 32, 3, 3)) #w1 = np.zeros((in_channel,out_channel, row, col))
b1 = np.zeros(32)
w2 = np.zeros((32, 64, 3, 3))
b2 = np.zeros(64)
w3 = np.zeros((128, 1600))
b3 = np.zeros(128)
w4 = np.zeros((10, 128))
b4 = np.zeros(10)


out_layer1 = np.zeros((32, 26, 26))
out_layer1_bias = np.zeros((32, 26, 26))
out_layer1_relu = np.zeros((32, 26, 26))
out_maxpol_layer1 = np.zeros((32, 13, 13))


out_layer2 = np.zeros((32, 11, 11))
out_layer2sum = np.zeros(( 11, 11))  #存储1个输出通道对应32个输入通道的和
out_layer2sum64 = np.zeros((64, 11, 11))
out_layer2_bias = np.zeros((64, 11, 11))
out_layer2_relu = np.zeros((64, 11, 11))
out_maxpol_layer2 = np.zeros((64, 5, 5))

data_oneline = np.zeros((5,5,64))

out_layer3 = np.zeros(128)

out_layer4 = np.zeros(10)

# 读取已经训练好的神经网络参数
old = np.load
np.load.__defaults__=(None, True, True, 'ASCII')
loadData = np.load('CNN_weight.npy')
np.load.__defaults__=(None, False, True, 'ASCII')


for in_channel1 in list(range(0,1)):          #卷积层一权重读取
    for out_channel1 in list(range(0,32)):    
        for row1 in list(range(0,3)):
            for col1 in list(range(0,3)):
                w1[in_channel1][out_channel1][row1][col1] = loadData[0][row1][col1][0][out_channel1]

for out_channel1 in list(range(0,32)):        #卷积层一偏置读取
    b1 = loadData[1]

for in_channel2 in list(range(0,32)):         #卷积层二权重读取
    for out_channel2 in list(range(0,64)):
        for row2 in list(range(0,3)):
            for col2 in list(range(0,3)):
                w2[in_channel2][out_channel2][row2][col2] = loadData[2][row2][col2][in_channel2][out_channel2]

for out_channel2 in list(range(0,64)):        #卷积层二偏置读取
    b2 = loadData[3]
    
for out_channel3 in list(range(0,128)):    
    for in_channel3 in list(range(0,1600)):       #全连接层一权重读取   
        w3[out_channel3][in_channel3] = loadData[4][in_channel3][out_channel3]

for out_channel3 in list(range(0,128)):       #全连接层一偏置读取
    b3 = loadData[5]
    
for out_channel4 in list(range(0,10)):  
    for in_channel4 in list(range(0,128)):       #全连接层二权重读取  
        w4[out_channel4][in_channel4] = loadData[6][in_channel4][out_channel4]

for out_channel4 in list(range(0,10)):       #全连接层二偏置读取
    b4 = loadData[7]
print("权重读取结束\n")    
# 前向传播主要计算

# 卷积层一
inChannel_layer1 = 1
inChannel_lay1 = 0
outChannel_layer1 = 32
# for inChannel_lay1 in list(range(0,inChannel_layer1)):  
for outChannel_lay1 in list(range(0,outChannel_layer1)):  
    start = time.perf_counter()
    out_layer1[outChannel_lay1] = conv2d_33(x[20], w1[inChannel_lay1][outChannel_lay1] , 28, 28)
    out_layer1_bias[outChannel_lay1] = out_layer1[outChannel_lay1] + b1[outChannel_lay1]  # 加偏置

    end = time.perf_counter()
    print('outChannel_lay1 ',outChannel_lay1)
    print('Running time: %s Seconds'%(end-start))
out_layer1_relu = tf.nn.relu(out_layer1_bias, name=None)   #relu激活函数
print("卷积层一计算结束\n")

# 最大值池化层一
inChannel_layer1 = 1
outChannel_layer1 = 32
for inChannel_lay1 in list(range(0, inChannel_layer1)):  
    for outChannel_lay1 in list(range(0, outChannel_layer1)):  
        out_maxpol1 = maxpol_22(out_layer1_relu[outChannel_lay1], 26, 26)
        out_maxpol_layer1[outChannel_lay1] = out_maxpol1

# 卷积层二
inChannel_layer2 = 32
outChannel_layer2 = 64
for outChannel_lay2 in list(range(0,outChannel_layer2)):  
    start = time.perf_counter()
    for inChannel_lay2 in list(range(0,inChannel_layer2)):  
        # 32张图片分别与32个卷积核进行卷积运算
        out_layer2[inChannel_lay2] = conv2d_33(out_maxpol_layer1[inChannel_lay2], w2[inChannel_lay2][outChannel_lay2] , 13, 13)           
        # 计算得到32个卷积结果，相同位置进行相加    
        out_layer2sum = out_layer2sum + out_layer2[inChannel_lay2]
    out_layer2sum64[outChannel_lay2] = out_layer2sum
    out_layer2sum = np.zeros(( 11, 11)) # 对该变量归零
    out_layer2_bias[outChannel_lay2] = out_layer2sum64[outChannel_lay2] + b2[outChannel_lay2]  # 加偏置
    end = time.perf_counter()
    print('outChannel_lay2 ',outChannel_lay2,'inChannel_lay2 ',inChannel_lay2)
    print('Running time: %s Seconds'%(end-start))
out_layer2_relu = tf.nn.relu( out_layer2_bias, name=None)   #relu激活函数    



# 最大值池化层二
inChannel_layer2 = 1 #该位置赋值有问题 
outChannel_layer2 = 64
for inChannel_lay2 in list(range(0, inChannel_layer2)):  
    for outChannel_lay2 in list(range(0, outChannel_layer2)):  
        out_maxpol2 = maxpol_22(out_layer2_relu[outChannel_lay2], 11, 11)
        out_maxpol_layer2[outChannel_lay2] = out_maxpol2

# 数据一维化
i_layer3 = 5
j_layer3 = 5
for i_lay3 in list(range(0, i_layer3)):  
    for j_lay3 in list(range(0, j_layer3)):  
        data_oneline[i_lay3][j_lay3] = out_maxpol_layer2[:,i_lay3,j_lay3]
    
data_oneline_out = tf.reshape(data_oneline, [-1, 64*5*5]) #被卷积数据一行化

# 全连接层三
in_layer3 = np.array(data_oneline_out)
# w3 = np.array(tf.reshape(w3, [-1, 1600]) )#被卷积数据一行化
outChannel_layer3 = 128
for outChannel_lay3 in list(range(0, outChannel_layer3)):  
    out_layer3[outChannel_lay3] = sum(in_layer3[0] * w3[outChannel_lay3]) + b3[outChannel_lay3]
out_layer3_relu = tf.nn.relu(out_layer3, name=None)   #relu激活函数    
    
# 全连接层四
in_layer4 = np.array(tf.reshape(out_layer3_relu, [-1, 128]) )#被卷积数据一行化
# in_layer4 = out_layer3
outChannel_layer4 = 10
for outChannel_lay4 in list(range(0, outChannel_layer4)):  
    out_layer4[outChannel_lay4] = sum(in_layer4[0] * w4[outChannel_lay4]) + b4[outChannel_lay4]
out_layer4_softmax = tf.nn.softmax(out_layer4, name=None)   #softmax激活函数    
     


# out_layer3[0] = in_layer3 * w3[0] + b3[0]

# 0.14901961 * (-0.02254084) + 0.99607843 * 0.1772183
# -0.02254084,  0.1772183

# np.set_printoptions(threshold=1e6) 数据显示数量
# for i in range(50) :
#     print(in_layer3[0][i])

主函数调用的卷积函数与池化函数如下：

# -*- coding: utf-8 -*-
"""
Created on Sat Apr 10 13:30:37 2021

@author: ZZJin
"""
# 卷积函数封装
import tensorflow as tf
import math#导入math模块
import numpy as np

def conv2d_33(x_conv, w_conv , row_conv, col_conv): # x_conv被卷积数据, w_conv卷积核参数, 
    # x_conv = x[0]                                 # row_conv是x_conv中的行参数, col_conv是x_conv中的列参数
    # w_conv = w1[0][0]
    # row_conv = 28
    # col_conv = 28       
    x_conv32 = tf.cast(x_conv,dtype = tf.float32)    #数据格式转换
    w_conv32 = tf.cast(w_conv,dtype = tf.float32)    #数据格式转换
    image_test = tf.reshape(x_conv32, [-1, row_conv*col_conv]) #被卷积数据一行化
    filter_test = tf.reshape(w_conv32, [-1, 3*3])              #卷集合数据一行化
    out_conv = np.zeros((row_conv - 2, col_conv - 2))    
    for j_conv in range(0, (row_conv - 2)):                 #行扫描
        for i_conv in range(0, (col_conv - 2)):             #列扫描
            buffer_conv = (image_test[0][i_conv + 0 + j_conv*row_conv]*filter_test[0][0] + image_test[0][i_conv + 1 + j_conv*row_conv]*filter_test[0][1] + image_test[0][i_conv + 2 + j_conv*row_conv]*filter_test[0][2]
                      +  image_test[0][i_conv + 0 + (j_conv+1)*row_conv]*filter_test[0][3] + image_test[0][i_conv + 1 + (j_conv+1)*row_conv]*filter_test[0][4] + image_test[0][i_conv + 2 + (j_conv+1)*row_conv]*filter_test[0][5]       
                      +  image_test[0][i_conv + 0 + (j_conv+2)*row_conv]*filter_test[0][6] + image_test[0][i_conv + 1 + (j_conv+2)*row_conv]*filter_test[0][7] + image_test[0][i_conv + 2 + (j_conv+2)*row_conv]*filter_test[0][8]
                      )
            out_conv[j_conv][i_conv] = buffer_conv
            # print(j_conv,i_conv)
    return out_conv
 
# 最大值池化函数封装
def maxpol_22(x_maxpol, row_maxpol, col_maxpol): # x_maxpol被池化数据
                                                 # row_maxpol是x_maxpol中的行参数, col_maxpol是x_maxpol中的列参数
    row_maxpol = math.floor(row_maxpol/2) # 若行、列为奇数则舍弃最后一行
    col_maxpol = math.floor(col_maxpol/2)
    out_maxpol = np.zeros((row_maxpol, col_maxpol))   
    for j_maxpol in range(0, row_maxpol):                 #行扫描
        for i_maxpol in range(0, col_maxpol):             #列扫描
            buffer_maxpol = max(x_maxpol[j_maxpol*2][i_maxpol*2], x_maxpol[j_maxpol*2][i_maxpol*2+1], x_maxpol[j_maxpol*2+1][i_maxpol*2], x_maxpol[j_maxpol*2+1][i_maxpol*2+1])
            out_maxpol[j_maxpol][i_maxpol] = buffer_maxpol
    return out_maxpol

本代码有一点点难懂，但如果读者静下心来读懂，并自己亲自进行调试，相信对读者的帮助会非常大。

我调这个代码，真的调了好久，如果读者认为有用的话，希望给我点个赞，您的点赞是对我最大的鼓励，谢谢！