from numpy import *
#数据集
def loadSimpData():
datMat = matrix([[ 1. , 2.1],
[ 2. , 1.1],
[ 1.3, 1. ],
[ 1. , 1. ],
[ 2. , 1. ]])
classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
return datMat,classLabels
#阀值比较数据分类函数
def stumpclassify(datamatrix,dim,threshval,threshineq):
retarry=ones((shape(datamatrix)[0],1))#创建元素值为1的矩阵
if threshineq=='It':#控制不等号方向
retarry[datamatrix[:,dim]<=threshval]=-1.0#赋值
else:
retarry[datamatrix[:,dim]>threshval]=-1.0
return retarry
#单层决策树生成函数
def buildstump(datamatrix,classlabel,D):
datamat=mat(datamatrix);label=mat(classlabel).T#进行转置
m,n=shape(datamatrix)
numites=10.0#设置值来控制步长
buzyjg={}#创建字典
bestclasest=mat(zeros((m,1)))
minerror=inf#无穷大
for i in range(n):#遍历每一列
rangmin=datamat[:,i].min()
rangmax=datamat[:,i].max()
rangzhi=(rangmax-rangmin)/numites#计算步长
for j in range(-1,int(numites)+1):#遍历步长
for inseq in ['It','gt']:#控制不等号方向
thresh=(rangmin+float(j)*rangzhi)#计算值
predictval=stumpclassify(datamat,i,thresh,inseq)#进行预测分类
error=mat(ones((m,1)))
error[predictval==label]=0
weighterror=D.T*error#计算错误权重
#print('特征%d, thresh %.2f,ineqal %s,weighterror %.3f'%(i,thresh,
#inseq,weighterror))
if weighterror<minerror:#选取错误权重最小的,特征,特征值,不等号方向
minerror=weighterror
bestclasest=predictval.copy()
buzyjg['dim']=i
buzyjg['thresh']=thresh
buzyjg['inseq']=inseq
return buzyjg,minerror,bestclasest
#adaboost算法
def adaboosttrain(dataarr,classlabel,numit=40):
weakclassarr=[]
m,n=shape(dataarr)
D=mat(ones((m,1))/m)#设置起始权重向量D
aggclass=mat(zeros((m,1)))#设置起始类别估计值
for i in range(numit):#进行迭代
beststump,minerr,bestclass=buildstump(dataarr,classlabel,D)#计算最优的分类
print('D:',D.T)
alpha=float(0.5*log((1.0-minerr)/max(minerr,1e-16)))#计算alpha
beststump['alpha']=alpha#字典中添加apha
weakclassarr.append(beststump)#列表中添加本次分类所需 信息
print('bestcalss:',bestclass.T)
expon=multiply(-1*alpha*mat(classlabel).T,bestclass)#对预测对错进行判断,给权重分配正负号
D=multiply(D,exp(expon))
D=D/D.sum()#计算权重
aggclass+=alpha*bestclass#累计类别估计值
print('aggclass:',aggclass.T)
aggerrors=multiply(sign(aggclass)!=mat(classlabel).T,ones((m,1)))#计算错误次数
errorset=aggerrors.sum()/m#计算错误率
print('errorset:',errorset)
if errorset==0.0:break#错误率为0时退出循环
return weakclassarr
#adaboost算法分类器
def adaclassify(datamat,classify):
datamatrix=mat(datamat)#转换矩阵
m=shape(datamatrix)[0]
aggclass=mat(zeros((m,1)))#初始类别估计值
for i in range(len(classify)):#遍历多次类别循环所需信息
classest=stumpclassify(datamatrix,classify[i]['dim'],classify[i]['thresh'],
classify[i]['inseq'])
aggclass+=classify[i]['alpha']*classest#累计类别估计值
print('aggclass:',aggclass)
return sign(aggclass)#返回正负