dnn回归预测_机器学习(回归预测数值型数据)

这篇博客介绍了如何利用DNN进行回归预测,包括数据预处理、加载数据集、定义线性回归函数、局部加权线性回归算法以及岭回归算法,并提供了实际应用示例和交叉验证方法。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

from numpy import *

def loadDataSet(fileName): #general function to parse tab -delimited floats

numFeat = len(open(fileName).readline().split('\t')) - 1 #get number of fields

dataMat = []; labelMat =[]

fr=open(fileName)for line infr.readlines():

lineArr=[]

curLine= line.strip().split('\t')for i inrange(numFeat):

lineArr.append(float(curLine[i]))

dataMat.append(lineArr)

labelMat.append(float(curLine[-1]))returndataMat,labelMatdefstandRegres(xArr,yArr):

xMat= mat(xArr); yMat =mat(yArr).T

xTx= xMat.T*xMatif linalg.det(xTx) == 0.0:print "This matrix is singular, cannot do inverse"

returnws= xTx.I * (xMat.T*yMat)returnwsdef lwlr(testPoint,xArr,yArr,k=1.0):

xMat= mat(xArr); yMat =mat(yArr).T

m=shape(xMat)[0]

weights=mat(eye((m)))for j in range(m): #next 2 lines create weights matrix

diffMat = testPoint - xMat[j,:] # weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))

xTx= xMat.T * (weights *xMat)if linalg.det(xTx) == 0.0:print "This matrix is singular, cannot do inverse"

returnws= xTx.I * (xMat.T * (weights *yMat))return testPoint *wsdef lwlrTest(testArr,xArr,yArr,k=1.0): #loops over all the data points and applies lwlr to each one

m =shape(testArr)[0]

yHat=zeros(m)for i inrange(m):

yHat[i]=lwlr(testArr[i],xArr,yArr,k)returnyHatdef lwlrTestPlot(xArr,yArr,k=1.0): #same thing as lwlrTest except it sorts X first

yHat = zeros(shape(yArr)) #easier for plotting

xCopy =mat(xArr)

xCopy.sort(0)for i inrange(shape(xArr)[0]):

yHat[i]=lwlr(xCopy[i],xArr,yArr,k)returnyHat,xCopydef rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays

return ((yArr-yHatArr)**2).sum()def ridgeRegres(xMat,yMat,lam=0.2):

xTx= xMat.T*xMat

denom= xTx + eye(shape(xMat)[1])*lamif linalg.det(denom) == 0.0:print "This matrix is singular, cannot do inverse"

returnws= denom.I * (xMat.T*yMat)returnwsdefridgeTest(xArr,yArr):

xMat= mat(xArr); yMat=mat(yArr).T

yMean=mean(yMat,0)

yMat= yMat - yMean #to eliminate X0 take mean off of Y

#regularize X's

xMeans = mean(xMat,0) #calc mean then subtract it off

xVar = var(xMat,0) #calc variance of Xi then divide by it

xMat = (xMat - xMeans)/xVar

numTestPts= 30wMat= zeros((numTestPts,shape(xMat)[1]))for i inrange(numTestPts):

ws= ridgeRegres(xMat,yMat,exp(i-10))

wMat[i,:]=ws.TreturnwMatdef regularize(xMat):#regularize by columns

inMat =xMat.copy()

inMeans= mean(inMat,0) #calc mean then subtract it off

inVar = var(inMat,0) #calc variance of Xi then divide by it

inMat = (inMat - inMeans)/inVarreturninMatdef stageWise(xArr,yArr,eps=0.01,numIt=100):

xMat= mat(xArr); yMat=mat(yArr).T

yMean=mean(yMat,0)

yMat= yMat - yMean #can also regularize ys but will get smaller coef

xMat =regularize(xMat)

m,n=shape(xMat)#returnMat = zeros((numIt,n)) #testing code remove

ws = zeros((n,1)); wsTest = ws.copy(); wsMax =ws.copy()for i inrange(numIt):printws.T

lowestError=inf;for j inrange(n):for sign in [-1,1]:

wsTest=ws.copy()

wsTest[j]+= eps*sign

yTest= xMat*wsTest

rssE=rssError(yMat.A,yTest.A)if rssE

lowestError=rssE

wsMax=wsTest

ws=wsMax.copy()#returnMat[i,:]=ws.T

#return returnMat

#def scrapePage(inFile,outFile,yr,numPce,origPrc):#from BeautifulSoup import BeautifulSoup#fr = open(inFile); fw=open(outFile,'a') #a is append mode writing#soup = BeautifulSoup(fr.read())#i=1#currentRow = soup.findAll('table', r="%d" % i)#while(len(currentRow)!=0):#title = currentRow[0].findAll('a')[1].text#lwrTitle = title.lower()#if (lwrTitle.find('new') > -1) or (lwrTitle.find('nisb') > -1):#newFlag = 1.0#else:#newFlag = 0.0#soldUnicde = currentRow[0].findAll('td')[3].findAll('span')#if len(soldUnicde)==0:#print "item #%d did not sell" % i#else:#soldPrice = currentRow[0].findAll('td')[4]#priceStr = soldPrice.text#priceStr = priceStr.replace('$','') #strips out $#priceStr = priceStr.replace(',','') #strips out ,#if len(soldPrice)>1:#priceStr = priceStr.replace('Free shipping', '') #strips out Free Shipping#print "%s\t%d\t%s" % (priceStr,newFlag,title)#fw.write("%d\t%d\t%d\t%f\t%s\n" % (yr,numPce,newFlag,origPrc,priceStr))#i += 1#currentRow = soup.findAll('table', r="%d" % i)#fw.close()

from time importsleepimportjsonimporturllib2defsearchForSet(retX, retY, setNum, yr, numPce, origPrc):

sleep(10)

myAPIstr= 'AIzaSyD2cR2KFyx12hXu6PFU-wrWot3NXvko8vY'searchURL= 'https://www.googleapis.com/shopping/search/v1/public/products?key=%s&country=US&q=lego+%d&alt=json' %(myAPIstr, setNum)

pg=urllib2.urlopen(searchURL)

retDict=json.loads(pg.read())for i in range(len(retDict['items'])):try:

currItem= retDict['items'][i]if currItem['product']['condition'] == 'new':

newFlag= 1

else: newFlag =0

listOfInv= currItem['product']['inventories']for item inlistOfInv:

sellingPrice= item['price']if sellingPrice > origPrc * 0.5:print "%d\t%d\t%d\t%f\t%f" %(yr,numPce,newFlag,origPrc, sellingPrice)

retX.append([yr, numPce, newFlag, origPrc])

retY.append(sellingPrice)except: print 'problem with item %d' %idefsetDataCollect(retX, retY):

searchForSet(retX, retY,8288, 2006, 800, 49.99)

searchForSet(retX, retY,10030, 2002, 3096, 269.99)

searchForSet(retX, retY,10179, 2007, 5195, 499.99)

searchForSet(retX, retY,10181, 2007, 3428, 199.99)

searchForSet(retX, retY,10189, 2008, 5922, 299.99)

searchForSet(retX, retY,10196, 2009, 3263, 249.99)def crossValidation(xArr,yArr,numVal=10):

m=len(yArr)

indexList=range(m)

errorMat= zeros((numVal,30))#create error mat 30columns numVal rows

for i inrange(numVal):

trainX=[]; trainY=[]

testX= []; testY =[]

random.shuffle(indexList)for j in range(m):#create training set based on first 90% of values in indexList

if j < m*0.9:

trainX.append(xArr[indexList[j]])

trainY.append(yArr[indexList[j]])else:

testX.append(xArr[indexList[j]])

testY.append(yArr[indexList[j]])

wMat= ridgeTest(trainX,trainY) #get 30 weight vectors from ridge

for k in range(30):#loop over all of the ridge estimates

matTestX = mat(testX); matTrainX=mat(trainX)

meanTrain=mean(matTrainX,0)

varTrain=var(matTrainX,0)

matTestX= (matTestX-meanTrain)/varTrain #regularize test with training params

yEst = matTestX * mat(wMat[k,:]).T + mean(trainY)#test ridge results and store

errorMat[i,k]=rssError(yEst.T.A,array(testY))#print errorMat[i,k]

meanErrors = mean(errorMat,0)#calc avg performance of the different ridge weight vectors

minMean =float(min(meanErrors))

bestWeights= wMat[nonzero(meanErrors==minMean)]#can unregularize to get model

#when we regularized we wrote Xreg = (x-meanX)/var(x)

#we can now write in terms of x not Xreg: x*w/var(x) - meanX/var(x) +meanY

xMat = mat(xArr); yMat=mat(yArr).T

meanX= mean(xMat,0); varX =var(xMat,0)

unReg= bestWeights/varXprint "the best model from Ridge Regression is:\n",unRegprint "with constant term:",-1*sum(multiply(meanX,unReg)) + mean(yMat)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值