version:python 3.6
环境:anaconda/JupyterLab 0.27.0
操作系统:Windows 10
knn分类笔记,会不断完善。
#k邻近值分类 KNN模型,选择2个邻居
from numpy import *
import operator
def get_dataset(train_x,train_y):
group = train_x.values
labels = train_y.values
return (group,labels)
def knn_classify(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize,1))-dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
# change itemgetter to item
sortedClassCount = sorted(classCount.items(),key=operator.itemgetter(1),
reverse=True)
return sortedClassCount[0][0]
def knn(trainset,train_label,test,k):
y_p = []
for i in range(test.shape[0]):