梯度下降算法
算法实现
import numpy as np
import matplotlib.pyplot as plt
import time
# 出现NaN时(一般也随着theta左右摇摆,但是误差越来越大,这是步长太大导致的),可适当减少步长alpha
num_params=3
x1_data = np.loadtxt('x1.txt', delimiter=',')
m=len(x1_data)
x1 = np.array(x1_data[0:m]).reshape(m, 1)
x2_data = np.loadtxt('x2.txt', delimiter=',')
x2 = np.array(x2_data[0:m]).reshape(m, 1)
x3_data = np.loadtxt('x3.txt', delimiter=',')
x3 = np.array(x3_data[0:m]).reshape(m, 1)
# x0为1
x = np.ones((m, 1))
# 设置三个特征向量取值数组的拼接
x = np.concatenate((x, x1.reshape(m, 1), x2.reshape(m,1), x3.reshape(m,1)), axis=1)
y_data = np.loadtxt('data.txt', delimiter=',')
# 从数组提取y的取值
y = np.array(y_data[0:m]).reshape(m, 1)
# 设置学习效率
alpha = 0.001
# 设置模型参数theta的初始迭代点(一般都是从0开始)
theta = np.zeros((num_params+1, 1))
# 分别定义一个存储损失函数值和迭代次数的列表
costlist, num = [], []
# 定义一个代价函数
def costfunction(theta, x, y):
error = np.dot(x, theta) - y
return (1 / (2 * m)) * (np.dot(error.T, error))
# 定义一个梯度迭代的函数,即偏导数部分,这里求法是对应线性回归的场景
def gradientdescent(theta, x, y):
error = np.dot(x, theta) - y
return (1 / m) * (np.dot(x.T, error))
def adam_update(theta, gd, m, v, t,beta1t,beta2t, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
...
def nadam_update(theta, gd, m, v, t,beta1t,beta2t, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
...
# 下降迭代过程函数
def GDworkfunction(theta, alpha, x, y):
gd = gradientdescent(theta, x, y)
mt=np.zeros((num_params+1, 1)).reshape(-1)
vt=np.zeros((num_params+1,1)).reshape(-1)
count=0
beta1t=1
beta2t=1
costlow=float("inf")
besttheta=theta
# 迭代10万次终止
for i in range(200000):
count+=1
num.append(i)
# theta = theta - alpha * gd # 原始算法
(theta,mt,vt,beta1t,beta2t) = adam_update(theta=theta, gd=gd,m=mt,v=vt,t=i+1,beta1t=beta1t,beta2t=beta2t) # Adam算法
# (theta,mt,vt,beta1t,beta2t) = nadam_update(theta=theta, gd=gd,m=mt,v=vt,t=i+1,beta1t=beta1t,beta2t=beta2t) # Nadam算法
cost1 = costfunction(theta, x, y)
costlist.append(cost1)
if cost1<costlow:
costlow=cost1[0][0]
besttheta=theta
if cost1 < 1e-27:
break
gd = gradientdescent(theta, x, y)
return (besttheta,count,costlow)
start_time = time.time()
(consulttheta,count,costlow) = GDworkfunction(theta, alpha, x, y)
end_time = time.time()
print('迭代时间为{}'.format(end_time - start_time))
print(f"迭代次数为{count}")
print('cost值取得最小时的函数为:y={}+{}*x1+{}*x2+{}*x3'.format(consulttheta[0], consulttheta[1], consulttheta[2], consulttheta[3]))
cost = costfunction(consulttheta, x, y)[0][0]
print('此时的最小损失值为{}'.format(costlow))
costlist1 = []
# 对cost值进行特征缩小,缩小的范围自己主观决定,怎么方便怎么来!
for j in costlist:
costlist1.append(j)
print(np.squeeze(np.array(costlist1)))
len=len(costlist1)
Adam
def adam_update(theta, gd, m, v, t,beta1t,beta2t, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
m=m.reshape(-1)
v=v.reshape(-1)
theta=theta.reshape(-1)
gd=gd.reshape(-1)
beta1t*=beta1
beta2t*=beta2
m=(beta1) * m + (1-beta1) * gd
v=(beta2) * v + (1-beta2) * (gd ** 2)
m_corrected = m / (1 - beta1t)
v_corrected = v / (1 - beta2t)
# rate = lr * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)
rate=lr
param_update = rate * m_corrected / (np.sqrt(v_corrected) + epsilon)
theta=theta-param_update
theta=theta.reshape(4,1) # 改变为原来形状
return (theta,m,v,beta1t,beta2t)
Nadam
def nadam_update(theta, gd, m, v, t,beta1t,beta2t, lr=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
m=m.reshape(-1)
v=v.reshape(-1)
theta=theta.reshape(-1)
gd=gd.reshape(-1)
beta1t*=beta1
beta2t*=beta2
m=(beta1) * m + (1-beta1) * gd
# v=max((beta2) * v + (1-beta2) * (gd ** 2),v)
v=(beta2) * v + (1-beta2) * (gd ** 2)
m_corrected = m / (1 - beta1t)
v_corrected = v / (1 - beta2t)
# rate = lr * np.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)
rate=lr
temp=beta1 * m_corrected + (1 - beta1) * gd / (1 - beta1t)
param_update = rate * temp / (np.sqrt(v_corrected) + epsilon)
theta=theta-param_update
theta=theta.reshape(4,1) # 改变为原来形状
return (theta,m,v,beta1t,beta2t)
数据可视化
plt.figure(num=3, figsize=(10, 6))
# 在绘图时,需要对cost值进行降维成一维,因为保存的是二维的数组
plt.plot(num, np.squeeze(np.array(costlist1)))
plt.xlim((1, len))
plt.ylim(0, 1e-20)
plt.xlabel('iterate_nums')
plt.ylabel('costvalue')
# 设置x轴刻度表示范围
my_x_ticks = np.arange(1, len, len//10)
plt.xticks(my_x_ticks)
plt.savefig('BGD.png') # 保存图片
plt.show()
生成模拟数据
import numpy as np
import csv
m=1000
x1=np.random.randint(-30,60,(m,))
x2=np.random.randint(-30,60,(m,))
x3=np.random.randint(-60,60,(m,))
y=8.051+3.151*x1+2.279*x2-0.788*x3
# print(x1)
# print(x2)
# print(x3)
# print(y)
print("-"*50+"x1")
for i in x1:
print(i,end=",")
print("\n"+"-"*50+"x2")
for i in x2:
print(i,end=",")
print("\n"+"-"*50+"x3")
for i in x3:
print(i,end=",")
print("\n"+"-"*50+"y")
for i in y:
print(i,end=",")
with open('x1.txt', 'w', encoding='utf-8') as file:
content = ",".join(map(str, x1))
file.write(content)
with open('x2.txt', 'w', encoding='utf-8') as file:
content = ",".join(map(str, x2))
file.write(content)
with open('x3.txt', 'w', encoding='utf-8') as file:
content = ",".join(map(str, x3))
file.write(content)
with open('data.txt', 'w', encoding='utf-8') as file:
content = ",".join(map(str, y))
file.write(content)
x0=np.ones((m,1))
data=np.concatenate((x0,x1.reshape(m, 1), x2.reshape(m,1), x3.reshape(m,1), y.reshape(m,1)), axis=1)
# 写入 CSV 文件
with open('example.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["x0","x1","x2","x3","y"])
writer.writerows(data) # 写入多行
总结
Nai