集成学习task12

最新推荐文章于 2025-03-21 13:57:20 发布

你扰到我学习了

最新推荐文章于 2025-03-21 13:57:20 发布

阅读量101

点赞数

文章标签：机器学习

本文链接：https://blog.youkuaiyun.com/jianmojiayou/article/details/116599720

版权

导入相关包

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

创建二分类数据集

from sklearn import datasets
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

data, target = make_blobs(n_samples=10000, centers=2, random_state=1, cluster_std=1.0)

X_train1, X_test, y_train1, y_test = train_test_split(data, target, test_size=0.2, random_state=1)

## 创建训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(X_train1, y_train1, test_size=0.3, random_state=1)

print("The shape of training X:", X_train.shape)
print("The shape of training y:", y_train.shape)
print("The shape of test X:", X_test.shape)
print("The shape of test y:", y_test.shape)

print("The shape of validation X:", X_val.shape)
print("The shape of validation y:", y_val.shape)

设置分类器

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

clfs = [SVC(probability=True), RandomForestClassifier(n_estimators=5, n_jobs=-1, criterion="gini"),
        KNeighborsClassifier()]

# 设置第二层分类器
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
# 输出第一层的验证集结果与测试集结果

这里初始化一个X_val.shape[0], len(clfs) 的数组，下面会填上每个模型预测的值

val_features = np.zeros((X_val.shape[0], len(clfs)))  # 初始化验证集结果
test_features = np.zeros((X_test.shape[0], len(clfs)))  # 初始化测试集结果

# 返回出的val_features是三个模型下的预测结果分别是类别0/1
for i, clf in enumerate(clfs):
    clf.fit(X_train, y_train)
    val_feature = clf.predict_proba(X_val)[:, 1] #这里是输出的属于1类的 概率
    print(clf.predict_proba(X_val)[:, 1])
    test_feature = clf.predict_proba(X_test)[:, 1]
    val_features[:, i] = val_feature
    test_features[:, i] = test_feature

试了一下也可以直接用类别，不用概率

# for i, clf in enumerate(clfs):
#     clf.fit(X_train, y_train)
#     val_feature = clf.predict(X_val)
#     print(clf.predict_proba(X_val))
#     test_feature = clf.predict(X_test)
#     val_features[:, i] = val_feature
#     test_features[:, i] = test_feature

把上面得出的预测结果当做自二层的输入，在此训练得出预测，用5折交叉验证查看模型结果

# 将第一层的验证集的结果输入第二层训练第二层分类器

lr.fit(val_features, y_val) # 验证集去训练模型

p = lr.predict(test_features) # 测试集去验证模型
# lr.coef_ # 线性回归的系数
# p[p !=  1] = 0
from sklearn.metrics import accuracy_score
accuracy_score(y_test, p)
# 输出预测的结果
from sklearn.model_selection import cross_val_score

cross_val_score(lr, test_features, y_test, cv=5)  # 评价指标是是accuracy