不想用sklearn,想了解一下KNN的原理,动手写了一下
#!usr/bin/python3
# coding:utf-8
from numpy import *
def creatDataSet():
group = array([[1.0, 0.9], [1.0, 1.0], [0.1, 0.2], [0.0, 0.1]])
label = ['A', 'A', 'B', 'B']
return group, label
def KNNClassify(newInput, dataSet, label, k):
# 计算新样本与数据点之间的距离
numSample = dataSet.shape[0]
diff = tile(newInput, [numSample, 1])
squreDiff = diff ** 2
squreDist = sum(squreDiff, axis=1)
distance = sqrt(squreDist)
# 找出与新样本点距离最近的k个样本,用字典存储类别及其出现的次数
index = argsort(distance)
classCount = {}
for i in range(k):
labelVote = label[index[i]]
classCount[labelVote] = classCount.get(labelVote, 0) + 1
# 将出现次数最多的类别进行输出
count = 0
for k, v in classCount.items():
if v > count:
maxcount = v
maxvalue = k
return maxvalue
def main():
newInput = [1.2, 1.5]
dataSet, label = creatDataSet()
k = 3
newInputLabel = KNNClassify(newInput, dataSet, label, k)
print(newInputLabel)
return
if __name__ == '__main__':
main()