Python 线性回归梯度下降预测数值

实现

通过训练样本(train.csv)得出的m,b值,来计算测试集(test.csv)中的y的预测值,并保存结果(result.csv)

代码

import numpy as np
import pylab
import csv
from sklearn.metrics import *

#计算误差
def compute_error(b, m, data):
    '''
    损失函数:计算实际值与测试值误差
    m: 权重
    b: 偏移
    '''
    totalError = 0
    x = data[:, 0]
    y = data[:, 1]
    #计算实际值与预测值之间差的平方的和
    totalError = (y-m*x - b)**2
    totalError = np.sum(totalError, axis=0)
    #返回损失值
    return totalError/float(len(data))

#梯度下降
def optimizer(data, starting_b, starting_m, learning_rate, num_iter):
    '''
    优化器用于进行梯度下降迭代
    starting_b: 初始偏移
    starting_m: 初始权重
    learning_rate: 学习率
    num_iter: 迭代次数
    '''
    b = starting_b
    m = starting_m

    # gradient descent, 迭代次数
    for i in range(num_iter):
        # update b and m with the new more accurate b and m by performing
        # thie gradient step
        b, m = compute_gradient(b, m, data, learning_rate)
        if i % 100 == 0:
            print('iter {0}:error={1}'.format(i, compute_error(b, m, data)))
    return [b, m]

#梯度下降更新w:权重,b:偏移
def compute_gradient(b_current, m_current, data, learning_rate):
    '''
    b_current: 当前偏移
    m_current: 当前权重
    learning_rate: 学习率
    '''
    b_gradient = 0
    m_gradient = 0

    N = float(len(data))
    # Vectorization implementation
    x = data[:, 0]
    y = data[:, 1]
    b_gradient = -(2/N)*(y-m_current*x-b_current)  # b求偏导
    b_gradient = np.sum(b_gradient, axis=0)
    m_gradient = -(2/N)*x*(y-m_current*x-b_current)# m求偏导
    m_gradient = np.sum(m_gradient,axis=0)
    # update our b and m values using out partial derivations

    new_b = b_current - (learning_rate * b_gradient)# 更新偏移
    new_m = m_current - (learning_rate * m_gradient)# 更新权重
    return [new_b, new_m]

#画出线性回归模型
def plot_data(data, b, m):
    '''
    可视化数据
    '''
    x = data[:, 0]
    y = data[:, 1]
    y_predict = m*x + b
    pylab.plot(x, y, 'o')
    pylab.plot(x, y_predict, 'k-')
    pylab.show()

def Linear_regression():
    # get train data
    data = np.loadtxt('train.csv', delimiter=',')
    test_data = np.loadtxt("test.csv",delimiter=',')
    # define hyperparamters
    # learning_rate is used for update gradient
    # defint the number that will iteration
    '''
    选择合适的参数
    '''
    learning_rate = 0.001
    initial_b = 0.0
    initial_m = 0.0
    num_iter = 1000

    # train model
    #print b m error
    print('initial variables:\n initial_b = {0}\n intial_m = {1}\n error of begin = {2} \n'
          .format(initial_b, initial_m, compute_error(initial_b, initial_m, data)))

    # optimizing b and m 进行梯度下降,寻找最优
    [b, m] = optimizer(data, initial_b, initial_m, learning_rate, num_iter)

    # print final b m error
    print('final formula parmaters:\n b = {1}\n m={2}\n error of end = {3} \n'
          .format(num_iter, b, m, compute_error(b, m, data)))

    # plot result
    plot_data(data, b, m)

    # 根据测试集x,计算出测试集的预测y值
    y_test_predict = list(test_data[:, ] * m + b)
    save_to_csv(y_test_predict, "result.csv")

def save_to_csv(data, outpath):
    file = open(outpath, 'w', newline='')
    csv_write = csv.writer(file)
    csv_write.writerow(['result'])
    for i in range(len(data)):
        csv_write.writerow([data[i]])
    file.close()
    return print("结果保存成功")

if __name__ == '__main__':
    Linear_regression()

模型评估

可能做得不对,只是个人尝试,代码接上面的

def Linear_regression():
    # get train data
    data = np.loadtxt('train.csv', delimiter=',')
    test_data = np.loadtxt("test.csv",delimiter=',')
    # define hyperparamters
    # learning_rate is used for update gradient
    # defint the number that will iteration
    '''
    选择合适的参数
    '''
    learning_rate = 0.001
    initial_b = 0.0
    initial_m = 0.0
    num_iter = 1000

    # train model
    #print b m error
    print('initial variables:\n initial_b = {0}\n intial_m = {1}\n error of begin = {2} \n'
          .format(initial_b, initial_m, compute_error(initial_b, initial_m, data)))

    # optimizing b and m 进行梯度下降,寻找最优
    [b, m] = optimizer(data, initial_b, initial_m, learning_rate, num_iter)

    # print final b m error
    print('final formula parmaters:\n b = {1}\n m={2}\n error of end = {3} \n'
          .format(num_iter, b, m, compute_error(b, m, data)))

    # plot result
    plot_data(data, b, m)
    #训练样本预测的y值
    y_predict = list(data[:, 0] * m + b)
    #根据测试集x,计算出测试集的预测y值
    y_test_predict = list(test_data[:, ] * m + b)
    save_to_csv(y_test_predict, "result.csv")
    #根据训练样本预测的y值与训练样本y真实值,来预判模型准确率
    #valid(y_predict, list(data[:, 1]),)
    #误差大小预测模型准确率
    valid(y_predict, list(data[:, 1]), compute_error(b, m, data))

def save_to_csv(data, outpath):
    file = open(outpath, 'w', newline='')
    csv_write = csv.writer(file)
    csv_write.writerow(['result'])
    for i in range(len(data)):
        csv_write.writerow([data[i]])
    file.close()
    return print("结果保存成功")
'''
#平均绝对误差和均方误差计算准确率
def valid(y, y_true):
    #accuracy = mean_squared_error(y_true, y)
    accuracy = mean_absolute_error(y_true, y) * 100
    print("模型正确率:{0}%".format(accuracy))
'''
#误差大小计算准确率
def valid(y,y_true, error):
    print(error)
    n = 0
    length = len(y)
    for i, j in zip(y, y_true):
        if i-error <= j <= i+error:
            n += 1
    accuracy = (n/length) * 100
    print("模型正确率:{0}%".format(accuracy))

if __name__ == '__main__':
    Linear_regression()

数据

自行保存在csv文件中
测试集(test.csv)
5.5
8.9
2.23
4.4
22
训练样本(train.csv)
1,3
1.2,3
1.2,4
1.5,4.5
1.6,4.3
6.5,12
3.6,7.1
2.5,9
5.7,14
6,11
9,17
8.9,17
7.1,15
7,14
2.5,4
0.8,2
0.5,2
3.4,7
3.6,9
5.6,12
6.7,15
6.9,15
7.1,14
7.5,17
7.8,16
8.1,15
8.3,15
8.5,15
8.7,16
8.7,17
8.8,18
8.8,20
8,16
9,19
9.2,18
10.1,20
1.1,3.2
1.6,4.2
4,9
12,25
9.5,20

参考

机器学习之线性回归及python实现
七、回归——回归预测的评价指标(附python代码)
python sklearn 线性回归模型 模型评估
Python 线性回归分析以及评价指标

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值