斋藤康毅-深度学习入门学习笔记四

最新推荐文章于 2025-02-11 12:58:39 发布

原创最新推荐文章于 2025-02-11 12:58:39 发布 · 700 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#神经网络 #深度学习 #python #人工智能 #机器学习

深度学习入门学习笔记专栏收录该内容

5 篇文章

订阅专栏

这篇博客探讨了神经网络中的损失函数，包括均方误差和交叉熵误差，以及数值微分的概念，用于求解梯度。作者通过实例展示了中心差分法来计算导数，并实现了一个简单的梯度下降算法。最后，通过一个两层神经网络的例子，演示了如何使用数值微分求取权重参数的梯度，以及如何在MNIST数据集上进行训练和评估。

部署运行你感兴趣的模型镜像

ch 神经网络的学习

损失函数
1.1 均方误差

import numpy as np


def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t) ** 2)


if __name__ == '__main__':
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    print(mean_squared_error(np.array(y), np.array(t)))

1.2 交叉熵误差

import numpy as np


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    batch_size = y.shape[0]
    delta = 1e-7
    return -np.sum(t * np.log(y + delta)) / batch_size


if __name__ == '__main__':
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    print(cross_entropy_error(np.array(y), np.array(t)))

数值微分

# 中心差分
def numerical_diff(f, x):
    h = 1e-5
    return (f(x + h) - f(x - h)) / (2 * h)


def function(x):
    return 0.01 * (x ** 2) + 0.1 * x


def function_2(x):
    return x[0] ** 2 + x[1] ** 2


def function_tmp1(x0):
    return x0 * x0 + 4.0 ** 2.0


def function_tmp2(x1):
    return 3.0 ** 2.0 + x1 * x1


if __name__ == '__main__':
    print(numerical_diff(function, 10))
    # 偏微分
    print(numerical_diff(function_tmp1, 3.0))
    print(numerical_diff(function_tmp2, 4.0))

梯度

import numpy as np


def numerical_diff(f, x):
    h = 1e-5
    return (f(x + h) - f(x - h)) / (2 * h)


def function_2(x):
    return x[0] ** 2 + x[1] ** 2

# 求梯度
def _numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val
    return grad


# 求极值点
# 函数 初值 学习率 迭代次数
def gradient_descent(f, init_x, lr=0.1, step_num=100):
    x = init_x

    for i in range(step_num):
        grad = _numerical_gradient(f, x)
        x -= lr * grad
    return x


if __name__ == '__main__':
    print(_numerical_gradient(function_2, np.array([3.0, 4.0])))
    # 使用梯度下降法求极值
    print(gradient_descent(function_2, init_x=np.array([3.0, -4.0]), lr=0.1, step_num=100))

学习算法的样例

two_layer_net.py

import numpy as np
import sys, os

sys.path.append(os.pardir)
from ch03.functions.all import *
from loss_function.cross_entropy_error import *
from gradient import numerical_gradient


class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        return y

    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads["W1"] = numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = numerical_gradient(loss_W, self.params["b2"])
        return grads

train_neuralnet.py

import numpy as np
import sys, os
import matplotlib.pyplot as plt

sys.path.append(os.pardir)
from ch03.functions.all import *
from loss_function.cross_entropy_error import *
from gradient import numerical_gradient
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

# hyper parameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    print(i)
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.numerical_gradient(x_batch, t_batch)
    for key in {"W1", "b1", "W2", "b2"}:
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

print(train_loss_list)
x = [i for i in range(0, len(train_loss_list))]
plt.plot(x, train_loss_list)
plt.show()