线性回归主要用于处理回归问题,少数情况用于处理分类问题。
一元线性回归:y=a*x+b ,描述自变量和因变量都只有一个的情况,且自变量和因变量之间呈线性关系的回归模型。
python例子:
import numpy as np
import matplotlib.pyplot as plt
if __name__=='__main__':
x=np.array([1,2,4,6,9])
y=np.array([2,5,7,8,10])
x_mean=np.mean(x)
y_mean=np.mean(y)
denominator=0.0
numerator=0.0
for x_i,y_i in zip(x,y):
numerator+=(x_i-x_mean)*(y_i-y_mean)
denominator+=(x_i-x_mean)**2
a=numerator/denominator
b=y_mean-a*x_mean
y_predict=a*x+b
plt.scatter(x,y,color='b')
plt.plot(x,y_predict,color='r')
plt.xlabel('X',fontproperties='simHei',fontsize=15)
plt.ylabel('Y',fontproperties='simHei',fontsize=15)
plt.show()
结果
一元线性回归算法封装
写一个linearRegression.py文件
import numpy as np
class linearRegressionSelf:
def __init__(self):
self.a_=None
self.b_=None
def fit(self,x_train,y_train):
assert x_train.ndim==1
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
denominator = 0.0
numerator = 0.0
for x_i, y_i in zip(x_train, y_train):
numerator += (x_i - x_mean) * (y_i - y_mean)
denominator += (x_i - x_mean) ** 2
self.a_ = numerator / denominator
self.b_ = y_mean -self.a_* x_mean
return self
def predict(self,x_test_group):
return np.array([self._predict(x_test) for x_test in x_test_group])
def _predict(self,x_test):
return self.a_*x_test+self.b_
def mean_squared_error(self,y_true,y_predict):
return np.sum((y_true-y_predict)**2)/len(y_true)
def r_square(self,y_true,y_predict):
return 1-(self.mean_squared_error(y_true,y_predict))/np.var(y_true)
if __name__ == '__main__':
x = np.array([1, 2, 4, 6, 9])
y = np.array([2, 5, 7, 8, 10])
lr=linearRegressionSelf()
lr.fit(x,y)
print(lr.predict([7]))
print(lr.r_square([8,9],lr.predict([6,8])))
测试:
import numpy as np
from linearRegression import linearRegressionSelf
# 按间距中的绿色按钮以运行脚本。
if __name__ == '__main__':
x = np.array([1, 2, 4, 6, 9])
y = np.array([2, 5, 7, 8, 10])
lr=linearRegressionSelf()
lr.fit(x,y)
print(lr.predict([7]))
print(lr.r_square([8,9],lr.predict([6,8])))