机器学习基础自学一(算法审查)

机器学习中导入的库比较多,需要一个一个确定是否导入成功

# 导入类库
from pandas import read_csv
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.externals.joblib import dump
from sklearn.externals.joblib import load


# 导入数据
filename = 'D://example//MachineLearning-master//iris.data.csv'
names = ['separ-length', 'separ-width', 'petal-length', 'petal-width', 'class']
dataset = read_csv(filename, names=names)

print('数据维度: 行 %s,列 %s' % dataset.shape)#显示数据维度
print(dataset.head(10))# 查看数据的前10行
print(dataset.describe())# 统计描述数据信息
print(dataset.groupby('class').size())# 分类分布情况

dataset.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)# 箱线图
pyplot.show()

dataset.hist()# 直方图
pyplot.show()

scatter_matrix(dataset)# 散点矩阵图
pyplot.show()

# 分离数据集
array = dataset.values
X = array[:, 0:4] #取二维数组中第1(m)维到3维(第n-1维)的所有数据,相当于取第1(m)列到第3(n-1)列的所有数据
Y = array[:, 4]#取二维数组中第3维的所有数据,相当于取第3列的所有数据
validation_size = 0.2#验证集比例
seed = 7
X_train, X_validation, Y_train, Y_validation = \
    train_test_split(X, Y, test_size=validation_size, random_state=seed)
#分离出训练集和验证集的数据
# 算法审查
models = {}
models['LR'] = LogisticRegression()#线性回归(LR)
models['LDA'] = LinearDiscriminantAnalysis()#线性判别分析(LDA)
models['KNN'] = KNeighborsClassifier()#K近邻(KNN)
models['CART'] = DecisionTreeClassifier()#分类与回归树(CART)
models['NB'] = GaussianNB()#贝叶斯分类器(NB)
models['SVM'] = SVC()#支持向量机(SVM)
# 评估算法
results = []
for key in models:
    kfold = KFold(n_splits=10, random_state=seed)
    cv_results = cross_val_score(models[key], X_train, Y_train, cv=kfold, scoring='accuracy')
    results.append(cv_results)
    print('%s: %f (%f)' %(key, cv_results.mean(), cv_results.std()))



#使用评估数据集评估算法
svm = SVC()
svm.fit(X=X_train, y=Y_train)#X是已知训练集,Y是标准答案
predictions = svm.predict(X_validation)#用验证集去测试,看输出结果与验证集Y_validation是否相同。
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))


model_file='finalized_model_joblib.sav'#保存模型
with open(model_file,'wb') as model_f:
    dump(svm,model_f)


with open(model_file,'rb') as model_f:#加载模型
    loaded_model=load(model_f)

result=loaded_model.score(X=X_train, y=Y_train)
print(accuracy_score(Y_validation, predictions))

 

下面的链接是数据

https://pan.baidu.com/s/1a7Q2QaB2pIEz6p9uGsUGcA

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值