KNN算法伪代码:
实现KNN算法基础程序:
"""
Created on Tue Oct 11 19:05:33 2016
@author: Administrator
"""
from numpy import *
import operator
def creatDataSet () :
group=array([[1.0 ,1.1 ],[1.0 ,1.0 ],[0 ,0 ],[0 ,0.1 ]])
labels=['A' ,'A' ,'B' ,'B' ]
return group,labels
def classify0 (inX,dataSet,labels,k) :
dataSetSize=dataSet.shape[0 ]
diffMat=tile(inX,(dataSetSize,1 )) -dataSet
sqDiffMat=diffMat**2
sqDistances=sqDiffMat.sum(axis=1 )
distances=sqDistances**0.5
sortedDistIndicies=distances.argsort()
classCount={}
for i in range (k):
voteIlabel=labels[sortedDistIndicies[i]]
classCount[voteIlabel]=classCount.get(voteIlabel,0 )+1
sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1 ),
reverse=True )
return sortedClassCount [0 ] [0 ]
欧氏距离计算:
dataSetSize=dataSet.shape [0 ] # 读取数据集
diffMat=tile(inX,(dataSetSize,1 )) -dataSet #做扩展
sqDiffMat=diffMat**2 #平方
sqDistances=sqDiffMat.sum (axis=1 ) #列向量累加
distances=sqDistances**0.5 #开方
选择距离最小的K个点:
classCount={}
for i in range (k): #排名前K个
voteIlabel=labels[sortedDistIndicies[i]] #排名前k个贴标签
classCount[voteIlabel]=classCount.get (voteIlabel,0 )+1 #分类计数
sortedClassCount=sorted(classCount.iteritems(),key=operator .itemgetter(1 ),
reverse=True) #计数排序
return sortedClassCount [0 ] [0 ]