gradient descent 的python实现

本文介绍了一种通过模拟产生数据集并进行曲线拟合的方法。使用梯度下降法来调整多项式函数参数,实现对模拟数据的最佳拟合。通过对数据进行归一化处理,解决了数值过大导致的计算问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

从模拟数据集

到曲线拟合

# -*- coding: utf-8 -*-
"""
Created on Tue Sep  5 21:21:58 2017
@author: wjw
模拟产生数据集,然后再进行拟合
"""
def nomalization(X):#不归一化时梯度下降时数值太大,报错
    maxX = max(X)
    minX = min(X)
    normalized_X = []
    for x in X:
        normalized_X.append((x-minX)/(maxX-minX))
    return normalized_X 

def gradicent(X_train,Y_train,a,b,c,d):
    #当估计函数为    a*x**3+b*x**2+c*x+d
    n = 0
    max_itor = 20000
    alpha = 0.02
    epslion = 1e-8
    error1 = 0
    error2 = 0
    
    while True:
        n += 1
        if n>max_itor:break
    
        for i in range(X_train.__len__()): #得到每一行的数据
            x = X_train[i]
            y = Y_train[i]
            a -= (alpha*(a*(x**3)+b*x**2+c*x+d-y)*x**3)
            b -= (alpha*(a*(x**3)+b*x**2+c*x+d-y)*x**2)
            c -= (alpha*(a*(x**3)+b*x**2+c*x+d-y)*x)
            d -= (alpha*(a*(x**3)+b*x**2+c*x+d-y))
            error2 += (y-a*(x**3)+b*x**2+c*x+d)**2#先累加差异,再求平均差异
       
        if n%1000==0:
            print('times:%d'%(n))
            print('error:%f,train_accuracy:%f'%(abs(error2-error1)/X_train.__len__(),                                             calculate_acuracy(a,b,c,d,X_train,Y_train)))#前后平均差异之差
#            print('train_accuracy:',calculate_acuracy(a,b,c,X_train,Y_train))
#            break
        if abs(error2-error1)<epslion:
            print('congratulation!')
            print('n:',n)
            break
        
        error1 = error2/X_train.__len__()
        error2 = 0
#    print(n)   
    return(a,b,c,d)

def calculate_acuracy(a,b,c,d,X,Y):
    accuracy = 0
    for i in range(X.__len__()):
        x = X[i]
        y = Y[i]
        accuracy += (y-(a*(x**3)+b*x**2+c*x+d))**2
    avg_accuracy = accuracy/X.__len__()
#    print(avg_error)
    return avg_accuracy
  
if __name__ == "__main__":

    import numpy as np
    import matplotlib.pyplot as plt
    import math
    from sklearn.cross_validation import train_test_split
    
    x = np.arange(0,100,0.1)
    y = list(map(lambda x:math.log2(x+1)+np.random.random(1)*0.3,x))

    X_train,X_test,Y_train,Y_test = train_test_split(x,y,test_size=0.4,random_state=1)
    for i in range(X_train.__len__()): #得到每一行的数据
            plt.plot(X_train[i],Y_train[i],'ro')
    X_train = nomalization(X_train)
    A,B,C,D = gradicent(X_train,Y_train,0.1,0.1,0.2,0.2)
    print(A,B,C,D)
    X = np.arange(0,100,0.01)
    normalized_X = nomalization(X)
    Y = list(map(lambda x:A*(x**3)+B*x**2+C*x+D,normalized_X))
    plt.plot(X,Y,color='blue')    
    plt.show()
    X_test = nomalization(X_test)
    avg_accuracy = calculate_acuracy(A,B,C,D,X_test,Y_test)
    print('test_accuracy:',avg_accuracy)

填坑:在梯度下降运算时,解释器warning数据超过计算范围,除非设置alpha很小。解决方法:因为在计算梯度是要计算x^3,且还要累加error可能导致超过范围,在x带入计算前,进行归一化处理,问题解决!

效果如下:

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值