kNN算法:
优点:精度高,对异常值不敏感,无数据输入假定
缺点:计算复杂度高,空间复杂度高
from numpy import *
import operator
def createDataSet():
group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A','A','B','B']
return group,labels
group,labels=createDataSet()
def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]#多维数组维度(行數)
diffMat=tile(inX,(dataSetSize,1))-dataSet#inX二维数组化后,做减法运算
sqDiffMat=diffMat**2#平方运算
sqDistances=sqDiffMat.sum(axis=1)#行求和运算
distances=sqDistances**0.5#开方运算
sortedDistIndicies=distances.argsort()#升序排列
classcount={}
for i in range(k):
voteIlabel=labels[sortedDistIndicies[i]]
classcount[voteIlabel]=classcount.get(voteIlabel,0)+1#访问下标键为votelabel的项,如果没有这一项,那么初始值为0。然后把这一项的值加1
sortedClassCount=sorted(classcount.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
print (classify0([0,0],group,labels,3))
运算结果是B