自学神经网络系列——3 初识神经网络

最新推荐文章于 2025-04-11 18:10:34 发布

ML_python_get√

最新推荐文章于 2025-04-11 18:10:34 发布

阅读量252

点赞数

分类专栏：机器学习笔记文章标签：神经网络 python 深度学习

本文链接：https://blog.youkuaiyun.com/weixin_51499396/article/details/118893039

版权

机器学习笔记专栏收录该内容

22 篇文章

订阅专栏

本文探讨了神经网络的基础构建，包括使用非线性函数（如ReLU、sigmoid和阶跃函数）的构建原理，多维数组在处理神经网络数据的重要性，以及通过实例演示如何实现简单的神经网络结构，并介绍了如何进行手写数字识别，从数据加载到模型预测的全过程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

3.1 非线性函数的构造

任意曲线都可以用激活函数来构造
任意曲线都是一些激活函数的和
这些激活函数偏置和权重不同
两个ReLU函数才能构造一个阶跃函数或者sigmoid函数
所以同样情况下ReLU的激活函数(神经元)需要增加一倍

# 激活函数
# 阶跃函数的实现
import numpy as np

def  step_func(x):
    if  x >0:
        return 1
    elif x<=0:
        return 0
# 测试
step_func(11)
x = np.array([11,11])
# x >0
# step_func([11,11])
# TypeError: '>' not supported between instances of 'list' and 'int'

# 利用numpy数组改进
import numpy as np  
import matplotlib.pylab as plt

def step_function(x):
    '''阶跃函数的实现'''
    y = x>0  # 得到的布尔数组
    return y.astype(np.int)   #转换成0，1

x = np.arange(-5.0,5.0,0.1)
y = step_function(x)
# 可视化
plt.plot(x,y)
plt.ylim(-0.1,1.1)
plt.show()

def sigmoid(x):
    return 1/(1+np.exp(-x))

x = np.arange(-5,5,0.1)
y = sigmoid(x)
plt.plot(x,y)
plt.ylim(-0.1,1.1)
plt.show()

# ReLU 线性整流函数
def ReLU(x):
    return np.maximum(0,x)

# 测试
ReLU(2)
x = np.arange(-5,5,0.1)
y = ReLU(x)
plt.plot(x,y)
plt.show()

3.2 多维数组

import numpy as np

A= np.array([[[1,2],[3,4],[5,6]],[[2,2],[3,3],[4,4]]])
A.shape  # 维度从外到内读

3.3 神经网络的实现

# 矩阵乘法
X = np.array([1,2])
W = np.array([[1,3,5],[2,4,6]])  # 第一个是x1的权重，第二个是x2的权重
Y = np.dot(X,W)
print(Y)
# 隐藏层1的实现
X = np.array([1.0,0.5])
W1 = np.array([[0.1,0.3,0.5],[0.2,0.4,0.6]])
B1 = np.array([0.1,0.2,0.3])
print(X.shape)
print(W1.shape)
print(B1.shape)
A1 = np.dot(X,W1)+B1
Z1 = sigmoid(A1)
print(A1)
print(Z1)
# 隐藏层2的实现
W2 = np.array([[0.1,0.4],[0.2,0.5],[0.3,0.6]])
B2 =  np.array([0.1,0.2])
print(Z1.shape)
print(W2.shape)
print(B2.shape)
A2 = np.dot(Z1,W2) + B2
Z2 = sigmoid(A2)
# 输出层的实现
W3 = np.array([[0.1,0.3],[0.2,0.4]])
B3 = np.array([0.1,0.2])
A3 = np.dot(Z2,W3)+B3
# 输出层激活函数
# 回归问题使用恒等函数，分类问题使用sigmoid函数、多分类用softmax函数
def identity_func(x):
    '''输出层激活函数'''
    return x

def _softmax(x):
    exp_x = np.exp(x)
    sum_exp = np.sum(exp_x)
    y = exp_x /sum_exp
    return y

# softmax需要解决  某个指数，数据过大问题

def softmax(x):
    a = np.max(x)
    exp_x = np.exp(x-a)
    exp_sum = np.sum(exp_x)
    y = exp_x/exp_sum
    return y

Y = identity_func(A3) 
print(Y)

# 测试
x = np.array([1010,1000,990])
y1 = _softmax(x)
y2 = softmax(x)
print(y1)
print(y2)

面向对象形式

class network:

    def __init__(self,W1,b1,W2,b2,W3,b3):
        network = {}
        self.W1 = W1
        self.W2 = W2
        self.W3 = W3
        self.b1 = b1
        self.b2 = b2
        self.b3 = b3

    def forward(self,x):
        a1 = np.dot(x,self.W1)+self.b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1,self.W2) + self.b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2,self.W3) +self.b3
        y = identity_func(a3)
        return y

x = np.array([1.0,0.5])
W1 = np.array([[0.1,0.3,0.5],[0.2,0.4,0.6]])
b1 = np.array([0.1,0.2,0.3])
W2 = np.array([[0.1,0.4],[0.2,0.5],[0.3,0.6]])
b2 = np.array([0.1,0.2])
W3 = np.array([[0.1,0.3],[0.2,0.4]])
b3 = np.array([0.1,0.2])
network = network(W1,b1,W2,b2,W3,b3)
output = network.forward(x)
print(output)

3.4 手写数字识别

-------------------------------补充资料-----------------------------------------------------

# minst.py文件
try:
    import urllib.request
except ImportError:
    raise ImportError('You should use Python 3.x')
import os.path
import gzip
import pickle
import os
import numpy as np

url_base = 'http://yann.lecun.com/exdb/mnist/'
key_file = {
    'train_img': 'train-images-idx3-ubyte.gz',
    'train_label': 'train-labels-idx1-ubyte.gz',
    'test_img': 't10k-images-idx3-ubyte.gz',
    'test_label': 't10k-labels-idx1-ubyte.gz'
}

dataset_dir = os.path.dirname(os.path.abspath(__file__))
save_file = dataset_dir + "/mnist.pkl"

train_num = 60000
test_num = 10000
img_dim = (1, 28, 28)
img_size = 784


def _download(file_name):
    file_path = dataset_dir + "/" + file_name

    if os.path.exists(file_path):
        return

    print("Downloading " + file_name + " ... ")
    urllib.request.urlretrieve(url_base + file_name, file_path)
    print("Done")


def download_mnist():
    for v in key_file.values():
        _download(v)


def _load_label(file_name):
    file_path = dataset_dir + "/" + file_name

    print("Converting " + file_name + " to NumPy Array ...")
    with gzip.open(file_path, 'rb') as f:
        labels = np.frombuffer(f.read(), np.uint8, offset=8)
    print("Done")

    return labels


def _load_img(file_name):
    file_path = dataset_dir + "/" + file_name

    print("Converting " + file_name + " to NumPy Array ...")
    with gzip.open(file_path, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
    data = data.reshape(-1, img_size)
    print("Done")

    return data


def _convert_numpy():
    dataset = {}
    dataset['train_img'] = _load_img(key_file['train_img'])
    dataset['train_label'] = _load_label(key_file['train_label'])
    dataset['test_img'] = _load_img(key_file['test_img'])
    dataset['test_label'] = _load_label(key_file['test_label'])

    return dataset


def init_mnist():
    download_mnist()
    dataset = _convert_numpy()
    print("Creating pickle file ...")
    with open(save_file, 'wb') as f:
        pickle.dump(dataset, f, -1)
    print("Done!")


def _change_one_hot_label(X):
    T = np.zeros((X.size, 10))
    for idx, row in enumerate(T):
        row[X[idx]] = 1

    return T


def load_mnist(normalize=True, flatten=True, one_hot_label=False):
    """读入MNIST数据集

    Parameters
    ----------
    normalize : 将图像的像素值正规化为0.0~1.0
    one_hot_label :
        one_hot_label为True的情况下，标签作为one-hot数组返回
        one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
    flatten : 是否将图像展开为一维数组

    Returns
    -------
    (训练图像, 训练标签), (测试图像, 测试标签)
    """
    if not os.path.exists(save_file):
        init_mnist()

    with open(save_file, 'rb') as f:
        dataset = pickle.load(f)

    if normalize:
        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0

    if one_hot_label:
        dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
        dataset['test_label'] = _change_one_hot_label(dataset['test_label'])

    if not flatten:
        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].reshape(-1, 1, 28, 28)

    return (dataset['train_img'],
            dataset['train_label']), (dataset['test_img'],
                                      dataset['test_label'])


if __name__ == '__main__':
    init_mnist()

-----------------------------------------------正文--------------------------------------------

import sys,os
sys.path.append(os.pardir)
from res.mnist import load_mnist
from PIL import Image

# (x_train, t_train),(x_test,t_test) = load_mnist(flatten=True,normalize=False)
# print(x_train.shape)
# print(t_train.shape)
# print(x_test.shape)
# print(t_test.shape)

# 展示第一张照片
def img_show(img):
    pil_img = Image.fromarray(np.uint8(img))
    pil_img.show()

(x_train, t_train),(x_test,t_test) = load_mnist(flatten=True,normalize=False)
# pickle功能可以将运行对象保存为文件，下次可以直接使用
# 函数中使用了pickle功能
img = x_train[0]
label = t_train[0]
print(label)
print(img.shape)
img = img.reshape(28,28)
print(img.shape)
img_show(img)

# 神经网络预测
# 1*28*28维度的图片对应784个神经元，在255*255*255中选了部分
# 输入784个神经元，输出0-9的10个神经元
# 假设隐藏层1有50个神经元，隐藏层2有100个神经元

'''1、获取输入数据
      2、定义神经网络
      3、利用神经网络预测 '''


def  get_data():
        (xtrain,t_train),(x_test,t_test) = load_mnist(
            normalize=True,flatten=True,one_hot_label=False )
        return x_test,t_test
    
def  _network():
        import pickle
        with open('sample_weight.pkl', 'rb') as f:
            network = pickle.load(f)
        return network
    
def predict(network,x):
        W1,W2,W3  = network['W1'], network['W2'], network['W3']
        b1,b2,b3 = network['b1'],network['b2'],network['b3']
        a1 = np.dot(x,W1)+b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1,W2) + b2
        z2 = sigmoid(a2)
        a3  = np.dot(z2,W3) + b3
        y =softmax(a3)

        return y
x,t = get_data()
network = _network()
accuracy_cnt = 0
for i in range(len(x)):
    y = predict(network,x[i])
    p = np.argmax(y) 
    if p ==t[i]:
        accuracy_cnt +=1
print("Accuracy:" + str(float(accuracy_cnt)/len(x)))

# 所用神经网络的参数特征
print(x.shape)
print(x[0].shape)
print(W1.shape)
W1,W2,W3  = network['W1'], network['W2'], network['W3']
print(W1.shape)
print(W2.shape)
print(W3.shape)

# 因为神经网络只限制列，所以输入100张照片的像素和1张照片的像素无区别
# 使用batch能够提高计算性能
x,t = get_data()
network = _network()
batch_size = 100
accuracy_cnt = 0

for i in range(0,len(x),batch_size): # 取0  100 200 读取数据变快
    x_batch = x[i:i+batch_size]      # 在循环中再取 1 2 101 102  相当于两次循环 
    y_batch = predict(network,x_batch)
    p = np.argmax(y_batch,axis=1)
    accuracy_cnt += np.sum(p==t[i:i+batch_size])

print("Accuracy:" + str(float(accuracy_cnt/len(x))))