前向逐步回归
from numpy import *
import numpy as np
import pylab as pl
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = []; labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr =[]
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat,labelMat
def rssError(yArr, yHatArr):
return ((yArr - yHatArr)**2).sum()
def stagewise(xMat, yMat, numit = 1000, eps = 0.005):
xMat = mat(xMat)
yMat = mat(yMat).T
ymean = mean(yMat, 0)
yMat = yMat - ymean
xMat = (xMat - mean(xMat, 0))/var(xMat, 0)
m, n = shape(xMat)
returnMat = zeros((numit, n))
ws = zeros((n, 1))
wsTest = ws.copy()
wsMax = ws.copy()
for i in range(numit):
print i
print ws.T
lowestError = inf
for j in range(n):
for sign in [-1, 1]:
wsTest = ws.copy()
wsTest[j] += eps*sign
yTest = xMat*wsTest
rssE = rssError(yMat.A, yTest.A)
if rssE < lowestError:
lowestError = rssE
wsMax = wsTest
ws = wsMax.copy()
returnMat[i, :] = ws.T
return returnMat
def show(ws):
pl.plot(ws)
pl.show()
def main():
xArr,yArr = loadDataSet(r"C:\Users\l\Desktop\abalone.txt")
ws = stagewise(xArr, yArr)
print ws
show(ws)
if __name__ == '__main__':
main();
