代码:
# coding=utf-8
from numpy import exp, log
import numpy as np
from sklearn import tree, preprocessing
import pandas as pd
class Adaboost_Mixin(object):
def change_weight(self,clt, x, y, D):
"""
:param clt: 学习器
:param x: 数据
:param y: 标签
:param D: 权重
:return: 跟新完后的权重D与错误率
"""
length = len(x)
# print(x[0])
A = [0] * length
for i in range(length):
# print(x[i])
res = clt.predict([x[i]])
if float(res) == float(y[i]):
A[i] = 1
# print(sum(A))
error = (length - sum(A)) / length
alpha = float(0.5 * log((1.0 - error) / max(error,1e-10))) #避免百分比正确率时分母不能为0
# print(error)
p = sum(D)
for i in A:
if i == 1:
D[i] = D[i] * exp(-alpha) / p
else:
D[i] = D[i] * exp(alpha) / p
return D,error
def main(self):
"""
实现最大迭代次数与最小误差的功能
:return:
"""
length=len(self.Xtrain)
D=[1/length]*length
# print(self.max_number)
for i in range(self.max_number):
clt=self.DecisionTreeClassifier(self.Xtrain,self.Ytrain,D)
# print(D)
D,error=self.change_weight(clt,self.Xtrain,self.Ytrain,D=D)
if error<self.error:
break
last_accuracy=self.DecisionTreeClassifier_Correctly_Instances(self.Xtest,self.Ytest,clt)
return last_accuracy
class Deal(object):
"""
数据处理
"""
def __init__(self,file_name,Class_Values,max_number=10,error=1e-5):
self.file_name = file_name
self.Class_Values = Class_Values
self.Xtrain,self.Ytrain,self.Xtest,self.Ytest = self.read(self.file_name,self.Class_Values)
self.max_number=max_number
self.error=error
def read(self,file_name,Class_Values):
data = pd.read_csv(file_name)
train,test = self.Bootstrap(data)
Xtrain = train.iloc[:, train.columns != Class_Values]
Xtrain = preprocessing.scale(Xtrain) #数据标准化(Z-Score)
Ytrain = train.iloc[:,train.columns==Class_Values]
Ytrain = np.mat(Ytrain)
Xtest = test.iloc[:, test.columns != Class_Values]
Xtest = preprocessing.scale(Xtest) #数据标准化(Z-Score)
Ytest = test.iloc[:, test.columns == Class_Values]
Ytest = np.mat(Ytest)
return Xtrain,Ytrain,Xtest,Ytest
def Bootstrap(self,data):
#自助法
train = data.sample(frac=1.0, replace=True)
test = data.loc[data.index.difference(train.index)].copy()
return train,test
def DecisionTreeClassifier(self,X,Y,D):
ctf = tree.DecisionTreeClassifier()
ctf.fit(X,Y,sample_weight=D)
return ctf
def DecisionTreeClassifier_Correctly_Instances(self,Xtest, Ytest,clt):
#返回准确率
percent = clt.score(Xtest, Ytest)
return percent
class Adaboost(Deal,Adaboost_Mixin):
"""
继承2个子类
"""
pass
if __name__ == '__main__':
A=Adaboost('相对路径','标签ID',max_number=10,error=1e-10)
accuracy=A.main()
print('准确率:',accuracy)