一、线性回归
# -*- coding: utf-8 -*-
from numpy import *
def loadData(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1 #通过这样的划分得到特征数
#print "numFeat = " , numFeat
dataMat = [] ; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t') #把一行划分为三个部分
#print "curLine = " , curLine
for i in range(numFeat):
lineArr.append(float(curLine[i])) #把每行的前两个加入特征集里
dataMat.append(lineArr)#加入特征集
labelMat.append(float(curLine[-1])) #加入标签里
return dataMat,labelMat
def standRegress(xArr,yArr):
xMat = mat(xArr)
yMat = mat(yArr).T
xTx = xMat.T * xMat
if linalg.det(xTx) == 0.0 :
print "this matrix is singular , cannot do inverse "
return
ws = xTx.I * (xMat.T * yMat) #结论:最小二乘法求最优解
return ws
进行回归预测: