from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score
from sklearn.datasets import load_digits
# 加载数据 (手写数字图像)
digits = load_digits()
features = digits.data
target = digits.target
# 创建一个流水线, 流水线由# 将输入特征变换为0均值,1方差的缩放器# 逻辑回归模型# 组成
pipeline = make_pipeline(StandardScaler(), LogisticRegression())# 交叉验证, Fold=10
cv_res = cross_val_score(pipeline, features, target, cv=KFold(10, shuffle=True, random_state=1), scoring='accuracy', n_jobs=-1)print(cv_res.mean())
Baseline 模型
数值型baseline
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import train_test_split
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split
# 加载数据
boston = load_boston()
features, target = boston.<