深度学习3 -- MLP实现

最新推荐文章于 2023-04-24 13:19:03 发布

原创最新推荐文章于 2023-04-24 13:19:03 发布 · 425 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#深度学习 #python #神经网络

算法原理专栏收录该内容

10 篇文章

订阅专栏

该文主要内容为一个完整神经网络的代码实现

1 神经网络的主要过程有：

1.1 前馈过程或者叫正向传播

1.2 误差反传或者叫反向传播

1.3 设置迭代次数训练模型

1.4 设置评价指标，生成评价指标

2 正向传播图示(仅供参考)与代码：

注：正向传播较容易理解，无非是每层都进行：权重*输入+偏移项，再使用激活函数计算后作为下一层的输入

def __init__(self, layer_list=[], lr=0.1, epochs=100):
    self.lr = lr                    #学习率
    self.layer_list = layer_list    # 每层神经元（节点）个数
    self.epochs = epochs            #迭代次数

# 超参初始化
def weight_bias_init(self):
    self.W = {}     #权重字典，key是层号，value是对应权重矩阵
    self.b = {}     #偏置字典，key是层号，value是对应偏置矩阵

    self.layer_num = len(self.layer_list)-1 #网络层数(权重矩阵的个数，输入层无权重)
    for idx in range(self.layer_num):       #为每层layer初始化W与b矩阵,每层 W 的shape为(前一层神经元个数，后一层神经元个数)
        self.W[idx] = np.random.randn(self.layer_list[idx], \
                          self.layer_list[idx+1]) * 0.01 #正态分布
        self.b[idx] = np.random.randn(self.layer_list[idx+1])
def forward(self, X, y):
        self.X = X  #将输入X保存为类的属性，可供其他函数使用
        self.y = np.array(y).reshape(-1, 1) #更改y的shape，防止运算出错

        #记录各层的z与a,反向传播时会用到
        self.z = {}  #字典，记录每层激活前的输出(z = W*X + b)
        self.a = {}  #字典，记录每层激活后的输出(a = sigmoid(z))
        
        input = self.X
        for idx in range(self.layer_num):  #循环向前累乘
            self.z[idx] = np.dot(input, self.W[idx]) + self.b[idx]  #z = W*X + b
            self.a[idx] = self.sigmoid(self.z[idx])                 #a = sigmoid(z)
            input = self.a[idx]            #更新输入

        self.output = self.a[self.layer_num-1]    #记录最后一层输出
        self.loss = -np.mean(self.y * np.log(self.output) + \
                        (1-self.y) * np.log(1-self.output))  #对数损失

3 反向传播图示以及代码

关于反向传播该图仅作为参考之一，如果有困惑可以详细看参考资料，反向传播为该内容的核心和难点。

# sigmoid的一阶导数
def Dsigmoid(self, x):
    return self.sigmoid(x) * (1 - self.sigmoid(x))
# 反向传播
def backward(self):
    #跟权重保存方式一样，使用字典存储，key为对应的层号
    self.dz = {}    #对每层z的求导
    self.dW = {}    #对每层W的求导
    self.db = {}    #对每层b的求导

    idx = self.layer_num - 1    #从后往前求导
    while(idx>=0):
        #********** 求dz *********#
        if(idx==self.layer_num-1): #最后一层的求导比较特殊，套最后一层求导的公式dz3
            self.dz[idx] = (self.output-self.y) * self.Dsigmoid(self.z[idx])  #元素乘
        else:                      #前层都可根据最后一层的dz迭代得到，套迭代公式dzi
            self.dz[idx] = np.dot(self.dz[idx+1], self.W[idx+1].T) \
                * self.Dsigmoid(self.z[idx]) 

        #********** 求dW *********#   
        if(idx == 0):  #idx为0时，即到达第一层时，前层输入a[idx-1]是X
            self.dW[idx] = np.dot(self.X.T, self.dz[idx]) / len(self.X) #梯度需除上总样本数
        else:          #idx不为0时迭代计算即可
            self.dW[idx] = np.dot(self.a[idx-1].T, self.dz[idx]) / len(self.X)

        #********** 求db *********#
        self.db[idx] = np.sum(self.dz[idx], axis=0) / len(self.X) #db=dz, 但是需要所有维度取平均
        idx -= 1    #跳前一层

    # 求完所有层的梯度后，更新即可
    for idx in range(self.layer_num):
        self.W[idx] -= self.lr * self.dW[idx]
        self.b[idx] -= self.lr * self.db[idx]

4 完整代码1展示：

上述代码以及接下来的完整代码1可以参考此处：

参考文章

import numpy as np

class NN(object):
  def __init__(self, layer_list=[], lr=0.1, epochs=100):
      self.lr = lr                    #学习率
      self.layer_list = layer_list    #每层神经元个数
      self.epochs = epochs            #迭代次数

  #权重与偏执初始化
  def weight_bias_init(self):
      self.W = {}     #权重字典，key是层号，value是权重矩阵
      self.b = {}     #偏置字典，key是层号，value是怕偏置矩阵

      self.layer_num = len(self.layer_list)-1    #网络层数
      #为每层layer初始化W与b矩阵
      for idx in range(self.layer_num):
          self.W[idx] = np.random.randn(self.layer_list[idx], \
                              self.layer_list[idx+1]) * 0.01 #正态分布
          self.b[idx] = np.random.randn(self.layer_list[idx+1])

  # sigmoid函数
  def sigmoid(self, x):
      return 1.0 / (1 + np.exp(-x))
  # sigmoid的一阶导数
  def Dsigmoid(self, x):
      return self.sigmoid(x) * (1 - self.sigmoid(x))

  # 前向传播
  def forward(self, X, y):
      self.X, self.y = X, np.array(y).reshape(-1, 1)
      self.z = {}  #记录每层激活前的输出(z = W*X + b)
      self.a = {}  #记录每层激活后的输出(a = sigmoid(z))
      #循环向前累乘
      input = self.X
      for idx in range(self.layer_num):
          self.z[idx] = np.dot(input, self.W[idx]) + self.b[idx]
          self.a[idx] = self.sigmoid(self.z[idx])
          input = self.a[idx]    #更新输入
      self.output = self.a[self.layer_num-1]      #最后一层输出
      self.loss = -np.mean(self.y * np.log(self.output) + \
                      (1-self.y) * np.log(1-self.output))  #对数损失
 #误差反向传播
 def backward(self):
      #跟权重保存方式一样，使用字典存储，key为对应的层号
      self.dz = {}    #对每层z的求导
      self.dW = {}    #对每层W的求导
      self.db = {}    #对每层b的求导

      idx = self.layer_num - 1    #从后往前求导
      while(idx>=0):
          #********** 求dz *********#
          if(idx==self.layer_num-1): #最后一层的求导比较特殊，套最后一层求导的公式dz3
              self.dz[idx] = (self.output-self.y) * self.Dsigmoid(self.z[idx])  #元素乘
          else:                      #前层都可根据最后一层的dz迭代得到，套迭代公式dzi
              self.dz[idx] = np.dot(self.dz[idx+1], self.W[idx+1].T) \
                  * self.Dsigmoid(self.z[idx]) 
          #********** 求dW *********#   
          if(idx == 0):  #idx为0时，即到达第一层时，前层输入a[idx-1]是X
              self.dW[idx] = np.dot(self.X.T, self.dz[idx]) / len(self.X) #梯度需除上总样本数
          else:          #idx不为0时迭代计算即可
              self.dW[idx] = np.dot(self.a[idx-1].T, self.dz[idx]) / len(self.X)
          #********** 求db *********#
          self.db[idx] = np.sum(self.dz[idx], axis=0) / len(self.X) #db=dz, 但是需要所有维度取平均
          idx -= 1    #跳前一层

      # 求完所有层的梯度后，更新即可
      for idx in range(self.layer_num):
          self.W[idx] -= self.lr * self.dW[idx]
          self.b[idx] -= self.lr * self.db[idx]

  #迭代训练
  def train(self, X, y):
      self.weight_bias_init()
      for i in range(self.epochs):
          self.forward(X, y)
          self.backward()
          #每10轮打印一次loss
          if(i%10==0): print("Epoch {}: loss={}".format(i//10+1, self.loss))

  #预测概率输出
  def predict(self, X_test):
      input = X_test
      for idx in range(self.layer_num):
          z = np.dot(input, self.W[idx]) + self.b[idx]
          a = self.sigmoid(z)
          input = a
      return a
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = load_iris(return_X_y=True)
X， y = X[:100], y[:100]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

layer_list = [4, 1] #自定义神经网络结构
model = NN(layer_list, lr=0.1, epochs=100)
model.train(X_train, y_train)
pre = model.predict(X_test)
pre = [1 if x>=0.5 else 0 for x in pre]
print(accuracy_score(pre, y_test))

运行结果展示：

5 完整代码2展示：

import numpy as np

def sigmoid(x):
  # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
  return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
  # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
  fx = sigmoid(x)
  return fx * (1 - fx)

def mse_loss(y_true, y_pred):
  # y_true and y_pred are numpy arrays of the same length.
  return ((y_true - y_pred) ** 2).mean()

class OurNeuralNetwork:
  '''
  A neural network with:
    - 2 inputs
    - a hidden layer with 2 neurons (h1, h2)
    - an output layer with 1 neuron (o1)

  *** DISCLAIMER ***:
  The code below is intended to be simple and educational, NOT optimal.
  Real neural net code looks nothing like this. DO NOT use this code.
  Instead, read/run it to understand how this specific network works.
  '''
  def __init__(self):
    # 权重，Weights
    self.w1 = np.random.normal()
    self.w2 = np.random.normal()
    self.w3 = np.random.normal()
    self.w4 = np.random.normal()
    self.w5 = np.random.normal()
    self.w6 = np.random.normal()

    # 截距项，Biases
    self.b1 = np.random.normal()
    self.b2 = np.random.normal()
    self.b3 = np.random.normal()

  def feedforward(self, x):
    # x is a numpy array with 2 elements.
    h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.b1)
    h2 = sigmoid(self.w3 * x[0] + self.w4 * x[1] + self.b2)
    o1 = sigmoid(self.w5 * h1 + self.w6 * h2 + self.b3)
    return o1

  def train(self, data, all_y_trues):
    '''
    - data is a (n x 2) numpy array, n = # of samples in the dataset.
    - all_y_trues is a numpy array with n elements.
      Elements in all_y_trues correspond to those in data.
    '''
    learn_rate = 0.1
    epochs = 1000 # number of times to loop through the entire dataset

    for epoch in range(epochs):
      for x, y_true in zip(data, all_y_trues):
        # --- Do a feedforward (we'll need these values later)
        sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1
        h1 = sigmoid(sum_h1)

        sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2
        h2 = sigmoid(sum_h2)

        sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3
        o1 = sigmoid(sum_o1)
        y_pred = o1

        # --- Calculate partial derivatives.
        # --- Naming: d_L_d_w1 represents "partial L / partial w1"
        d_L_d_ypred = -2 * (y_true - y_pred)

        # Neuron o1
        d_ypred_d_w5 = h1 * deriv_sigmoid(sum_o1)
        d_ypred_d_w6 = h2 * deriv_sigmoid(sum_o1)
        d_ypred_d_b3 = deriv_sigmoid(sum_o1)

        d_ypred_d_h1 = self.w5 * deriv_sigmoid(sum_o1)
        d_ypred_d_h2 = self.w6 * deriv_sigmoid(sum_o1)

        # Neuron h1
        d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
        d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
        d_h1_d_b1 = deriv_sigmoid(sum_h1)

        # Neuron h2
        d_h2_d_w3 = x[0] * deriv_sigmoid(sum_h2)
        d_h2_d_w4 = x[1] * deriv_sigmoid(sum_h2)
        d_h2_d_b2 = deriv_sigmoid(sum_h2)

        # --- Update weights and biases
        # Neuron h1
        self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
        self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
        self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

        # Neuron h2
        self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3
        self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
        self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

        # Neuron o1
        self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5
        self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6
        self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3

      # --- Calculate total loss at the end of each epoch
      if epoch % 10 == 0:
        y_preds = np.apply_along_axis(self.feedforward, 1, data)
        loss = mse_loss(all_y_trues, y_preds)
        print("Epoch %d loss: %.3f" % (epoch, loss))

# Define dataset
data = np.array([
  [-2, -1],  # Alice
  [25, 6],   # Bob
  [17, 4],   # Charlie
  [-15, -6], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])

# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)

# Make some predictions
emily = np.array([-7, -3]) # 128 pounds, 63 inches
frank = np.array([20, 2])  # 155 pounds, 68 inches
print("Emily: %.3f" % network.feedforward(emily)) # 0.951 - F
print("Frank: %.3f" % network.feedforward(frank)) # 0.039 - M