本文基于Andrew_Ng的ML课程作业
1-Regularized Linear Regression with scipy.optimize.minimize:对一个水库的流出水量以及水库水位进行正则化线性回归
导入库
import numpy as np
from scipy.io import loadmat
import scipy.optimize as opt
import matplotlib.pyplot as plt
函数:计算正则化的代价函数J(theta)
def computeRegCost(theta,X,y,lambada): #计算正则化的代价函数J(theta)
X,y,theta=map(np.matrix,(X,y,theta))
m=X.shape[0]
inner=np.power(X*theta.T-y,2) #num_of_X'features=2
cost=np.sum(inner)/(2*m)
reg=(lambada/(2*m))*np.sum(np.power(theta[:,1:theta.shape[1]],2))
regcost=cost+reg
return regcost
函数:计算正则化的梯度grad
def computeRegGradient(theta,X,y,lambada): #计算正则化的梯度grad
X,y,theta=map(np.matrix,(X,y,theta))
m=X.shape[0]
parameters=int(theta.shape[1])
grad=np.zeros(parameters) #梯度grad是和theta同维的数组
error=X*theta.T-y
for i in range(parameters):
term=np.multiply(error,X[:,i])
if (i==0):
grad[i]=np.sum(term)/m
else:
grad[i]=np.sum(term)/m+(lambada/m)*theta[:,i]
return grad
函数:拟合正则化线性回归的可视化函数
def visualizing(final_theta): #拟合正则化线性回归的可视化函数
b = final_theta[0]
k = final_theta[1]
fig,ax=plt.subplots(figsize=(12,8))
plt.scatter(X[:,1],y,c='r',label="Training Data")
plt.plot(X[:,1],X[:,1]*k+b,c='b',label="Prediction")
ax.legend(loc='upper left')
ax.set_xlabel("Water Level")
ax.set_ylabel("Flow")
plt.show()
主函数:
#Regularized Linear Regression with scipy.optimize.minimize:对一个水库的流出水量以及水库水位进行正则化线性回归
data=loadmat("ex5data1.mat")
X,y,Xval,yval,Xtest,ytest=data['X'],data['y'],data['Xval'],data['yval'],data['Xtest'],data['ytest']
X,Xval,Xtest=[np.insert(x,0,values=1,axis=1) for x in (X,Xval,Xtest)] #X.shape=(12,2),Xval.shape=(21,2),Xtest.shape=(21,2)
theta=np.ones(X.shape[1])
lambada=1
final_theta=opt.minimize(fun=computeRegCost,x0=theta,args=(X,y,lambada),method='TNC',jac=computeRegGradient,options={'disp':True}).x
print(final_theta)
visualizing(final_theta)
final_theta
拟合正则化线性回归可视化结果