机器学习读书笔记（线性回归之梯度下降）_线性回归梯度下降-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_41359817/article/details/109048572

1、理论

理论方面就不多说了，可以参考《机器学习》周志华（西瓜书）

2、实现一元线性回归（梯度下降法）

目标函数：y = 4*x + 3

1、
根据当前的theta求Y的估计值
传入的data_x的最左侧列为全1，即设X_0 = 1

def return_Y_estimate(theta_now, data_x):
    # 确保theta_now为列向量
    theta_now = theta_now.reshape(-1, 1)
    _Y_estimate = np.dot(data_x, theta_now)
    return _Y_estimate

2、
求当前theta的梯度

def return_dJ(theta_now, data_x, y_true):
    y_estimate = return_Y_estimate(theta_now, data_x)
    # 共有_N组数据
    _N = data_x.shape[0]
    # 求解的theta个数
    _num_of_features = data_x.shape[1]
    # 构建
    _dJ = np.zeros([_num_of_features, 1])

    for i in range(_num_of_features):
        _dJ[i, 0] = 2 * np.dot((y_estimate - y_true).T, data_x[:, i]) / _N

    return _dJ

3、
计算J的值

def return_J(theta_now, data_x, y_true):
    # 共有N组数据
    N = data_x.shape[0]
    temp = y_true - np.dot(data_x, theta_now)
    _J = np.dot(temp.T, temp) / N

    return _J

4、
梯度下降法求解线性回归
data_x的一行为一组数据
data_y为列向量，每一行对应data_x一行的计算结果
学习率默认为0.01
误差默认为1e-8
默认最大迭代次数为1e4

def gradient_descent(data_x, data_y, Learning_rate=0.01, ER=1e-8, MAX_LOOP=1e4):
    # 样本个数为
    _num_of_samples = data_x.shape[0]
    # 在data_x的最左侧拼接全1列
    X_0 = np.ones([_num_of_samples, 1])
    new_x = np.column_stack((X_0, data_x))
    # 确保data_y为列向量
    new_y = data_y.reshape(-1, 1)
    # 求解的未知元个数为
    _num_of_features = new_x.shape[1]
    # 初始化theta向量
    theta = np.zeros([_num_of_features, 1]) * 0.3
    flag = 0  # 定义跳出标志位
    last_J = 0  # 用来存放上一次的Lose Function的值
    ct = 0  # 用来计算迭代次数

    colors = ['m-', 'y-', 'r-', 'b-']
    while flag == 0 and ct < MAX_LOOP:
        last_theta = theta
        # 更新theta
        gradient = return_dJ(theta, new_x, new_y)
        theta = theta - Learning_rate * gradient
        er = abs(return_J(last_theta, new_x, new_y) - return_J(theta, new_x, new_y))

        # 画图
        xx = np.linspace(0, 10, 100)
        yy = theta[0][0] + theta[0][0] * xx
        color = random.choice(colors)
        plt.plot(xx, yy, color)
        plt.pause(0.01)

        # 误差达到阀值则刷新跳出标志位
        if er < ER:
            flag = 1

        # 叠加迭代次数
        ct += 1
    plt.show()
    return theta

5、完整代码

# -*- coding: utf-8 -*-
# @Project: 一元函数
# @Author: little fly
# @Create time: 2020/10/13 12:00
import random
import matplotlib.pyplot as plt
import numpy as np


# 根据当前的theta求Y的估计值
# 传入的data_x的最左侧列为全1，即设X_0 = 1
def return_Y_estimate(theta_now, data_x):
    # 确保theta_now为列向量
    theta_now = theta_now.reshape(-1, 1)
    _Y_estimate = np.dot(data_x, theta_now)
    return _Y_estimate


# 求当前theta的梯度
# 传入的data_x的最左侧列为全1，即设X_0 = 1
def return_dJ(theta_now, data_x, y_true):
    y_estimate = return_Y_estimate(theta_now, data_x)
    # 共有_N组数据
    _N = data_x.shape[0]
    # 求解的theta个数
    _num_of_features = data_x.shape[1]
    # 构建
    _dJ = np.zeros([_num_of_features, 1])

    for i in range(_num_of_features):
        _dJ[i, 0] = 2 * np.dot((y_estimate - y_true).T, data_x[:, i]) / _N

    return _dJ


# 计算J的值
# 传入的data_x的最左侧列为全1，即设X_0 = 1
def return_J(theta_now, data_x, y_true):
    # 共有N组数据
    N = data_x.shape[0]
    temp = y_true - np.dot(data_x, theta_now)
    _J = np.dot(temp.T, temp) / N

    return _J


# 梯度下降法求解线性回归
# data_x的一行为一组数据
# data_y为列向量，每一行对应data_x一行的计算结果
# 学习率默认为0.3
# 误差默认为1e-8
# 默认最大迭代次数为1e4
def gradient_descent(data_x, data_y, Learning_rate=0.01, ER=1e-8, MAX_LOOP=1e4):
    # 样本个数为
    _num_of_samples = data_x.shape[0]
    # 在data_x的最左侧拼接全1列
    X_0 = np.ones([_num_of_samples, 1])
    new_x = np.column_stack((X_0, data_x))
    # 确保data_y为列向量
    new_y = data_y.reshape(-1, 1)
    # 求解的未知元个数为
    _num_of_features = new_x.shape[1]
    # 初始化theta向量
    theta = np.zeros([_num_of_features, 1]) * 0.3
    flag = 0  # 定义跳出标志位
    last_J = 0  # 用来存放上一次的Lose Function的值
    ct = 0  # 用来计算迭代次数

    colors = ['m-', 'y-', 'r-', 'b-']
    while flag == 0 and ct < MAX_LOOP:
        last_theta = theta
        # 更新theta
        gradient = return_dJ(theta, new_x, new_y)
        theta = theta - Learning_rate * gradient
        er = abs(return_J(last_theta, new_x, new_y) - return_J(theta, new_x, new_y))

        # 画图
        xx = np.linspace(0, 10, 100)
        yy = theta[0][0] + theta[0][0] * xx
        color = random.choice(colors)
        plt.plot(xx, yy, color)
        plt.pause(0.01)

        # 误差达到阀值则刷新跳出标志位
        if er < ER:
            flag = 1

        # 叠加迭代次数
        ct += 1
    plt.show()
    return theta


def main():
    # =================== 样本数据生成 =======================
    # 生成数据以1元为例,要估计的theta数为2个
    num_of_features = 1
    num_of_samples = 100
    # 设置噪声系数
    rate = 1
    X = []

    print(np.random.rand() * rate)


    for i in range(num_of_features):
        X.append(np.random.random([1, num_of_samples]) * 10)

    X = np.array(X).reshape(num_of_samples, num_of_features)
    print("X的数据规模为 ： ", X.shape)

    # 利用方程生成X对应的Y
    Y = []

    for i in range(num_of_samples):
        Y.append(3 + 4 * X[i][0] + np.random.rand() * rate)

    Y = np.array(Y).reshape(-1, 1)
    print("Y的数据规模为 ： ", Y.shape)

    plt.scatter(X, Y)

    # ======================================================

    # 计算并打印结果
    print(gradient_descent(X, Y))


if __name__ == '__main__':
    main()