多元线性回归,假设我们有m个样本,n个特征,对于每一个样本的结果,每个特征都需要占一定的权重即:
当样本集为:
输出结果为:
基于多个特征得到的预测值为:
假设有矩阵
多元线性回归的正规方程:
多元线性回归正规方程解的代码实现:
"""coding:utf-8"""
import numpy as np
from play_ML.metrics import r2_score
class LinearRegression(object):
"""初始化Linear Regression模型"""
def __init__(self):
self.coef = None
self.intercept = None
self._theta = None
def fit(self,x_train,y_train):
"""根据训练数据集X_train, y_train训练Linear Regression模型"""
assert x_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
x_b = np.hstack([np.ones((len(x_train),1)),x_train])
self._theta = np.linalg.inv(x_b.T.dot(x_b)).dot(x_b.T).dot(y_train)
self.intercept = self._theta[0]
self.coef = self._theta[1:]
return self
def predict(self,x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果集向量"""
assert self.intercept_ is not None and self.coef_ is not None, \
"must fit before predict!"
assert x_predict.shape[1] == len(self.coef_), \
"the feature number of X_predict must be equal to X_train"
x_b = np.hstack([np.ones((len(x_predict),1)),x_predict])
return x_b.dot(self._theta)
def score(self,x_test,y_test):
"""根据测试集x_test和y_test,确定当前模型的准确度"""
y_predict = self.predict(x_test)
return r2_score(y_test,y_predict)
def __repr__(self):
return "LinearRegression"