Adaboost算法在机器学习中的应用-优快云博客

本文链接：https://blog.youkuaiyun.com/hexingwei/article/details/50724439

import numpy as np
class adaboost():
    def __init__(self,traindata,epo=100,numstep=100):
        self.x=traindata[:,:-1]
        self.y=traindata[:,-1]
        self.epo=epo
        self.numstep=numstep
        self.classfier=[]
        self.m,self.n=self.x.shape
        self.D=np.ones((self.m))/self.m
    def tree_classify(self,x,dimen_i,thresh_val,thresh_op):
        predict=np.ones((x.shape[0],1))
        if thresh_op=="lt":
            predict[x[:,dimen_i]<=thresh_val]=-1
        else:
            predict[x[:,dimen_i]>thresh_val]=-1
        return predict
    def build_tree(self):
        min_error=np.inf
        best_tree={}
        best_predict=None
        for i in range(self.n):
            min_val=self.x[:,i].min()
            max_val=self.x[:,i].max()
            step_val=(max_val-min_val)/self.numstep
            for j in range(-1,self.numstep+1):
                thresh_val=min_val+j*step_val
                for op in ["lt","gt"]:
                    predict_val=self.tree_classify(self.x,i,thresh_val,op)
                    temp=np.zeros(self.m)
                    temp[predict_val[:,0]!=self.y]=1
                    error=temp.dot(self.D)
                    if min_error>error:
                        min_error=error
                        best_tree["op"]=op
                        best_tree["dim"]=i
                        best_tree["thresh_val"]=thresh_val
                        best_predict=predict_val.copy()
        return min_error,best_tree,best_predict

    def fit(self):
        agg_predict_y=np.zeros((self.m,1))
        for i in range(self.epo):
            error,best_tree,predict_y=self.build_tree()
            alpha=0.5*np.log((1-error)/max(error,1e-16))
            best_tree["alpha"]=alpha
            self.classfier.append(best_tree)
            #update self.D
            self.D=self.D*np.exp(-alpha*self.y*predict_y[:,0])
            self.D/=self.D.sum()
            #calc training error of all classifiers, if this is 0 quit for loop early (use break)
            agg_predict_y+=alpha*predict_y
            total_error=(np.sign(agg_predict_y[:,0])!=self.y).sum()/float(self.m)
            print "total error: %f"%total_error
            if total_error==0:
                break
    def predict(self,x,y):
        m,n=x.shape
        agg_predict_y=np.zeros((m,1))
        for classifier in self.classfier:
            predict_y=self.tree_classify(x,classifier["dim"],classifier["thresh_val"],classifier["op"])
            agg_predict_y+=classifier["alpha"]*predict_y
        return np.sign(agg_predict_y)
    def accuracy(self,x,y):
        predict=self.predict(x,y)
        m=y.shape[0]
        accuracy=(predict[:,0]==y).sum()/float(m)
        print "test accuracy is %f"%accuracy

def loaddata(path):
    data=np.loadtxt(path)
    return data
train_path="D:\\SelfLearning\\Machine Learning\\MachineLearningInAction\\machinelearninginaction\\Ch07\\horseColicTraining2.txt"
test_path="D:\\SelfLearning\\Machine Learning\\MachineLearningInAction\\machinelearninginaction\\Ch07\\horseColicTest2.txt"
train_data=loaddata(train_path)
test_data=loaddata(test_path)
ada=adaboost(train_data)
ada.fit()
ada.accuracy(test_data[:,:-1],test_data[:,-1])

实验结果：

total error: 0.163880
test accuracy is 0.820896