#!/usr/bin/python
#-*-encoding:utf-8-*-
import numpy as np
import operator
def classify(vect,dataset,lables,k=5):
rows = dataset.shape[0]
diffmat = np.tile(vect,(rows,1)) - dataset
diffmat = diffmat ** 2
diffmat = diffmat.sum(axis=1)#n维数组降维为1维数组
distances = diffmat ** 0.5
sortedIndices = distances.argsort()#返回排序后索引
classCount={}
for i in range(k):
votedLabel = lables[sortedIndices[i]]
classCount.setdefault(votedLabel,0)
classCount[votedLabel] += classCount[votedLabel] + 1
#对classCount进行排序
sortedClassCount = sorted(classCount.iteritems(),
key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
def load(filename,cols):
fr = open(filename)
arrayLines = fr.readlines()
mat = np.zeros((len(arrayLines),cols))
label = []
labelval=[]
valdict={}
indexCount=0
for line in arrayLines:
line = line.strip().strip("\n")
listLine = line.split("\t")
mat[indexCount,:] = listLine[0:-1]
label.append(listLine[-1])
indexCount+=1
if not valdict.has_key(listLine[-1]):
valdict[listLine[-1]]=float(indexCount)
labelval.append(valdict[listLine[-1]])
if indexCount > 100: break
return mat,label,labelval
#数据归一化处理
def normalize(dataset):
#n维数组axis=0维度的最小值,最大值
minval = dataset.min(0)
maxval = dataset.max(0)
ranges = maxval - minval
normat = np.zeros(np.shape(dataset))
minmat = np.tile(minval,(dataset.shape[0],1))
mormat = (dataset - minmat)/np.tile(maxval,(dataset.shape[0],1))
return mormat