本文直接给出sklearn里面KNN 算法的用法。具体实现过程如下:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from sklearn import datasets
import operator
from sklearn import neighbors
import sklearn.model_selection as ms
import matplotlib.pyplot as plt
digits = datasets.load_digits()
totalNum = len(digits.data)
# 选出80%样本作为训练样本,其余20%测试
trainNum = int(0.8 * totalNum)
trainX,testX, trainY,testY = ms.train_test_split(digits.data, digits.target, random_state = 1, train_size = 0.8)
np.shape(digits.data)
np.shape(digits.target)
#用图像来初步认识下特征的长相
X_train = trainX.reshape(len(trainX), 8,8)
X_train = X_train/X_train.max() # 数据归一化
print("After reshaping, the shape of the X_train is:", X_train.shape)
np.shape(X_train)
a = X_train[1]
a.shape
plt.imshow(a, cmap = 'Greys_r') #画图
#训练模型,并计算不同K的情况下ER的变化情况
ER = []
for n_neighbors in range(1,16):
clf = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform') #测试不同的K 对最终结果的影响
clf.fit(trainX, trainY) #训练器
Z = clf.predict(testX) #预测
x = 1- np.mean(Z == testY) #计算错误率
ER.append(x) #将错误率储存在ER 中
pd.DataFrame(ER).plot(title = 'the plot of error rate') #画图显示不同K对模型正确的影响
通过以上的图形可知,n_neighbors = 7,8 时较为合适, 此时的error rate 为0.002778
# -*- coding: utf-8 -*-
import numpy as np
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.utils.testing import assert_equal
rng = np.random.RandomState(0)
# load and shuffle digits
digits =