前向逐步回归算法可以得到与lasso差不多的效果,但更加简单。它属于一种贪心算法,即每一步都尽可能的减少误差。
数据如下:
图片
from numpy import *
def rssError(yArr,yHatArr):
return((yArr-yHatArr)**2).sum()
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t'))-1
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def regularize(xMat):#regularize by columns
inMat = xMat.copy()
inMeans =