目录
梯度提升分类树二分类原理
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import tree
import matplotlib.pyplot as plt
xi=np.arange(1,11)
yi=np.array([0,0,0,1,1]*2)
gbdt=GradientBoostingClassifier(n_estimators=3,max_depth=1)
gbdt.fit(xi.reshape(-1,1),yi)
print(gbdt.estimators_.shape)
_=tree.plot_tree(gbdt[0,0],filled=True)
plt.show()
print('均方误差:',np.var(yi))
# 第一颗决策树的构建
# 计算F0的值
F0=np.log(4/6)
print('F0=',F0)
# 计算残差,概率,负梯度
yi_1=yi-1/(1+np.exp(-F0))
print('yi_1=',yi_1)
# 计算每个裂分点的mse
mse1=[]
for i in range(1,11):
if i==10:
mse1.append(np.var(yi_1))
else:
mse1.append((np.var(yi_1[:i])*i+np.var(yi_1[i:])*(10-i))/10)
mse1=np.asarray(mse1)
print(np.round(mse1,4))
# 两个分支,计算左面分支的gamma预测值
gamma1=yi_1[:8].sum()/(((yi[:8]-yi_1[:8])*(1-yi[:8]+yi_1[:8])).sum())
gamma1=np.round(gamma1,3)
print('gamma1=',gamma1)
#右面分支的gamma预测值
gamma2=yi_1[8:].sum()/(((yi[8:]-yi_1[8:])*(1-yi[8:]+yi_1[8:])).sum())
gamma2=np.round(gamma2,3)
print('gamma2=',gamma2)
# 第一棵树,数据预测的值
y_1=[-0.625]*8+[2.5]*2
y_1=np.asarray(y_1)
print(gbdt[0,0].predict(xi.reshape(-1,1)))
# 用F0更新F1,学习率 learning_rate=0.1
F1=F0+y_1*0.1
F1=F1.round(4)
print('F1=',F1)
# 拟合第二棵树
# 计算残差,概率,负梯度
yi_2=yi-1/(1+np.exp(-F1))
print('yi_2=',yi_2)
# 计算每个裂分点的mse
mse2=[]
for i in range(1,11):
if i==10:
mse2.append(np.var(yi_2))
else:
mse2.append((np.var(yi_2[:i])*i+np.var(yi_2[i:])*(10-i))/10)
mse2=np.asarray(mse2)
print(np.round(mse2,4))
# 两个分支,计算左面分支的gamma预测值
gamma1_1=yi_2[:8].sum()/(((yi[:8]-yi_2[:8])*(1-yi[:8]+yi_2[:8])).sum())
gamma1_1=np.round(gamma1_1,3)
print('gamma1_1=',gamma1_1)
#右面分支的gamma预测值
gamma2_1=yi_2[8:].sum()/(((yi[8:]-yi_2[8:])*(1-yi[8:]+yi_2[8:])).sum())
gamma2_1=np.round(gamma2_1,3)
print('gamma2_1=',gamma2_1)
_=tree.plot_tree(gbdt[1,0],filled=True)
plt.show()
(3, 1)
均方误差: 0.24
F0= -0.40546510810816444
yi_1= [-0.4 -0.4 -0.4 0.6 0.6 -0.4 -0.4 -0.4 0.6 0.6]
[0.2222 0.2 0.1714 0.225 0.24 0.2333 0.2095 0.15 0.2 0.24 ]
gamma1= -0.625
gamma2= 2.5
[-0.625 -0.625 -0.625 -0.625 -0.625 -0.625 -0.625 -0.625 2.5 2.5 ]
F1= [-0.468 -0.468 -0.468 -0.468 -0.468 -0.468 -0.468 -0.468 -0.1555
-0.1555]
yi_2= [-0.38508973 -0.38508973 -0.38508973 0.61491027 0.61491027 -0.38508973
-0.38508973 -0.38508973 0.53879686 0.53879686]
[0.2062 0.1856 0.1592 0.2105 0.2224 0.2187 0.1998 0.15 0.1904 0.2227]
gamma1_1= -0.57
gamma2_1= 2.168
参考文章https://blog.youkuaiyun.com/On_theway10/article/details/83576715?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522158830485819725256722728%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.57662%2522%257D&request_id=158830485819725256722728&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2allfirst_rank_v2~rank_v25-1