knn是什么,大家可以百度百科一下,这里就不做介绍,其作为机器学习的最基本的算法,这里有简单的实现。
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 9 18:27:11 2018
@author: Qunxing
"""
import numpy as np
DEBUG_FLAG = 0 #1 表示调试标志
def Normalize_Data(dataSet):
maxValue = dataSet.max(axis = 0);
minValue = dataSet.min(axis = 0);
ranges= maxValue - minValue;
dataSet = (dataSet - minValue)/ranges
return dataSet, minValue, ranges;
def KNN(train_data, labels, test_data, kV):
distSquaArray = (train_data - test_data)**2
distSquaCount = distSquaArray.sum(axis = 1) #进行操作后会变成一维数组
if DEBUG_FLAG:
print(distSquaCount)
dist = np.sqrt(distSquaCount)
if DEBUG_FLAG:
print(dist)
sortedIndices = dist.argsort() #默认按升序排列,返回索引值
indices = sortedIndices[:kV] # 取最小的k个
labelCount = {}
for index in indices:
label = labels[index]
labelCount[label] = labelCount.get(label, 0) + 1
#这里注意是字典的排序,先将字典转化为元组
labelRet = sorted(labelCount.items(), key = lambda x: x[1], reverse = True) #降序排列
return labelRet[0][0]
if __name__ =="__main__":
dataSet = np.array([[10, 3.5], [12, 7.8], [19, 4], [11, 8]])
labels = ['a', 'b', 'a', 'c']
testSet = np.array([14, 6])
dataSet, minValue, ranges = Normalize_Data(dataSet)
testSet = (testSet - minValue)/ranges
print(KNN(dataSet, labels, testSet, 2))