import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy as sc
import seaborn as sns
import queue
from sklearn import datasets
from sklearn.metrics import accuracy_score,f1_score
from sklearn.tree import DecisionTreeClassifier
Decision Tree
把产生的连续变量转化成类别标签,用这个数据做决策树,就方便很多了。
x,y=datasets.make_classification()#三个分位数:
x=np.apply_along_axis(lambda a:np.searchsorted(np.quantile(a,np.sort(np.random.rand(5))),a),0,x)for i inlist(set(y)):
plt.plot(x[y==i][:,0],x[y==i][:,1],'*',label=i)
plt.legend()
plt.show()
MinSample=3#最小样本点
myqueue=queue.Queue()
root=Node((x,y),value=0,iIndex=0)#生成根节点
currentDecision=root
myqueue.put(currentDecision)whilenot myqueue.empty():#随便取出来一个继续决策
currentDecision=myqueue.get()
childe_nodes=DecisionTree(currentDecision)#print(myqueue.qsize(),childe_nodes[0].data[1].shape,len(childe_nodes))for i in childe_nodes:# 只有子节点大于5个样本点才回继续iflen(i.data[1])>MinSample:
myqueue.put(i)
搜索决策树
y_pred=[]for i inrange(y.shape[0]):
search=x[i]#设置搜索目标
searchDecision=root
#当前擦找的决策树,的索引while searchDecision.iIndex!=None:#
searchDecision=[i for i in searchDecision.nodes if i.value==search[searchDecision.iIndex]][0]
y_pred.append(np.mean(searchDecision.data[1])>0.5)