针对MNIST数据集进行sklearn中的集成模型的训练和测试
部分脚本如下: 完整脚本见笔者github
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_mldata
import warnings
warnings.filterwarnings(action='ignore')
def get_ministdata():
data_home = r'..' # mnist-original.mat文件下载位置
mnist = fetch_mldata('MNIST original', data_home=data_home)
return pd.DataFrame(np.c_[mnist['data']/255, mnist['target']])
def sklearn_clf(clf_model_func, tr, te):
clf_model = clf_model_func()
clf_model.fit(tr.iloc[:, :-1].values, tr.iloc[:, -1].values)
pred = clf_model.predict(te.iloc[:, :-1].values)
y_te = te.iloc[:, -1].values
acc_ = sum(pred == y_te)/len(y_te) * 100
return f'model: {clf_model_func.__name__}, acc: {acc_:.2f}'
import sklearn as skl
if __name__ == '__main__':
mnistdf = get_ministdata()
te_index = mnistdf.sample(frac=0.8).index.tolist()
mnist_te = mnistdf.loc[te_index, :]
mnist_tr = mnistdf.loc[~mnistdf.index.isin(te_index), :]
# 用集成模型训练 & 预测
ensemble_func_lst = [i for i in dir(skl.ensemble) if 'Classifier' in i and 'Voting' not in i]
print(ensemble_func_lst)
for clf_ in ensemble_func_lst:
print(f'test clf_: {clf_}')
msg=sklearn_clf(eval(f'skl.ensemble.{clf_}'), mnist_tr, mnist_te)
print(msg)
"""
sklearn_clf, take_time:43.97123s >> model: AdaBoostClassifier, acc: 70.88
sklearn_clf, take_time:62.52457s >> model: BaggingClassifier, acc: 91.86
sklearn_clf, take_time:3.11310s >> model: ExtraTreesClassifier, acc: 92.34
sklearn_clf, take_time:1510.23123s >> model: GradientBoostingClassifier, acc: 93.48
sklearn_clf, take_time:3.57081s >> model: RandomForestClassifier, acc: 91.63
"""