python学习learning_curve和validation_curve

learning_curve是展示不同数据量,算法学习得分

validation_curve是展示某个因子,不同取值的算法得分

# -*- coding: utf-8 -*-
"""
Created on Wed Sep 06 09:55:13 2017

@author: 飘的心
"""


from sklearn.model_selection import learning_curve #加载学习曲线
from sklearn.model_selection import validation_curve #加载验证曲线
from sklearn.model_selection import ShuffleSplit #加载数据处理

from sklearn import datasets  #加载数据包
from sklearn.naive_bayes import GaussianNB#加载高斯贝叶斯
from sklearn.svm import LinearSVC #加载支持向量机

import numpy as np
import matplotlib.pyplot as plt

data=datasets.load_digits()
x=data.data
y=data.target

cv=ShuffleSplit(test_size=0.25,random_state=0,n_splits=4) #处理数据,测试数据比例为0.25,4——折交叉
estimator=GaussianNB()
estimator.get_params().keys()  #可以获得学习算法参数

train_sizes=[0.1,0.2,0.4,0.6,0.7,0.8]
train_size,train_scores,test_scores=learning_curve(estimator,x,y,cv=cv,\
                                    train_sizes=[0.1,0.2,0.4,0.6,0.7,0.8]) #获得学习曲线,针对不同的数据集

new_train_scores=train_scores.mean(1)
train_std=train_scores.std()
test_std=test_scores.std()
new_test_scores=test_scores.mean(1)
'''
画出不同比例数据集的学习曲线
'''
plt.grid()
plt.fill_between(train_sizes,new_train_scores-train_std,
                 new_train_scores+train_std,color='r',alpha=0.1)
plt.fill_between(train_sizes,new_test_scores-test_std,
                 new_test_scores+test_std,color='g',alpha=0.1)

plt.plot(train_sizes,new_train_scores,'*-',c='r',label='train score')
plt.plot(train_sizes,new_test_scores,'*-',c='g',label='test score')
plt.legend(loc='best')
plt.show()




'''使用支持向量机,来做验证曲线'''
estimator2=LinearSVC()
estimator2.get_params().keys()#查看有哪些系数
train_score2,validation_score2=validation_curve(estimator2,x,y,param_name='C',cv=cv
                ,param_range=np.linspace(0.1,1,20)) #改变变量C,来看得分

x_axis=np.linspace(0.1,1,20)
train_score2_mean=train_score2.mean(1)
train_score2_std=train_score2.std(1)
validation_score2_mean=validation_score2.mean(1)
validation_score2_std=validation_score2.std(1)

plt.grid()
plt.fill_between(x_axis,train_score2_mean-train_score2_std,
                 train_score2_mean+train_score2_std,color='r',alpha=0.1)
plt.fill_between(x_axis,validation_score2_mean-validation_score2_std,
                 validation_score2_mean+validation_score2_std,color='g',alpha=0.1)

plt.plot(x_axis,train_score2_mean,'o-',c='r',label='train score')
plt.plot(x_axis,validation_score2_mean,'o-',c='g',label='validation score')
plt.legend(loc='best')
plt.show()




Python中,`sklearn.learning_curve`模块已经被弃并从Scikit-learn库中移除。该模块在Scikit-learn版本0.20之后不再可用。如果你使用的是较新的Scikit-learn版本,你可以考虑使用`sklearn.model_selection`模块中的`learning_curve`函数来替代。 `learning_curve`函数可以用于绘制学习曲线,帮助我们分析模型的训练测试误差随着训练样本数量的变化而变化的情况。它可以帮助我们判断模型是否存在欠拟合或过拟合的问题。 如果你想使用`learning_curve`函数,你需要先安装Scikit-learn库,并导入`sklearn.model_selection`模块。下面是一个示例代码: ```python import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_digits from sklearn.model_selection import learning_curve from sklearn.svm import SVC # 加载数据集 digits = load_digits() X, y = digits.data, digits.target # 定义支持向量机分类器 clf = SVC() # 绘制学习曲线 train_sizes, train_scores, test_scores = learning_curve(clf, X, y, cv=5) # 计算平均值标准差 train_mean = np.mean(train_scores, axis=1) train_std = np.std(train_scores, axis=1) test_mean = np.mean(test_scores, axis=1) test_std = np.std(test_scores, axis=1) # 绘制学习曲线图 plt.figure() plt.plot(train_sizes, train_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_mean, 'o-', color="g", label="Cross-validation score") plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1, color="g") plt.xlabel("Training examples") plt.ylabel("Score") plt.legend(loc="best") plt.show() ``` 这是一个简单的示例,你可以根据自己的需求进行修改扩展。希望对你有帮助!
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值