from numpy import *
import operator
import matplotlib
import operator
from os import listdir
import matplotlib.pyplot as plt
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount={}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
filename="datingTestSet2.txt"
def file2matrix(filename):
fr =open(filename)
arrayOLines=fr.readlines()
numberOLines=len(arrayOLines)
returnMat=zeros((numberOLines,3))
classLabelVector=[]
index=0
for line in arrayOLines:
line=line.strip()
listFromLine=line.split('\t')
returnMat[index,:]=listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index+=1
return returnMat,classLabelVector
def autoNorm(dataSet):
minVals=dataSet.min(0)
print(minVals)
maxVals=dataSet.max(0)
print(maxVals)
ranges=maxVals-minVals
m=dataSet.shape[0]
normDataSet=dataSet-tile(minVals,(m,1))
normDataSet=normDataSet/tile(ranges,(m,1))
print(normDataSet,ranges,minVals)
return normDataSet,ranges,minVals
def datingClassTest():
horatio=0.10
a, b = file2matrix(filename)
normat,ranges,minvals=autoNorm(a)
m=normat.shape[0]
numtesvecs=int(m*horatio)
print(numtesvecs)
errorCount=0.0
for i in range(numtesvecs):
classifilerreult=classify0(normat[i,:],normat[numtesvecs:m,:],b[numtesvecs:m],3)
print(classifilerreult,b[i])
if (classifilerreult !=b[i]):errorCount+=1.0
print(errorCount/float(numtesvecs))
if __name__=="__main__":
datingClassTest()