import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pylab as pyl
import operator
def file2matrix(filename):
fr = open(filename)
array_lines = fr.readlines()
number_lines = len(array_lines)
returnmat = np.zeros((number_lines,3))
classlablevector = []
index = 0
for line in array_lines:
line = line.strip()
listformline = line.split('\t')
returnmat[index,:] = listformline[0:3]
classlablevector.append(int(listformline[-1]))
index += 1
return returnmat,classlablevector
def autoNorm(datingDatMat):
DMX = datingDatMat.max(0)
DMN= datingDatMat.min(0)
ranges = DMX - DMN
normDataset = np.zeros(np.shape(datingDatMat))
m =datingDatMat.shape[0]
normDataset = datingDatMat - np.tile(DMN,(m,1))
normDataset = normDataset / ( np.tile(ranges,(m,1)))
return normDataset,ranges,DMN
def Drawscatter(datingDataMat,DatingLables):
pyl.mpl.rcParams['font.sans-serif'] = ['SimHei']
name_axies = {0:"每年获取的飞行常客里程数",
1:"玩游戏所耗时间百分比",
2:"每周消费的冰淇淋公升数"}
for i in range(0,2):
for j in range(i+1,3):
fig = plt.figure(i+j)
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:,i],datingDataMat[:,j],
15.0*np.array(DatingLables),15.0*np.array(DatingLables))
plt.xlabel(name_axies[i])
plt.ylabel(name_axies[j])
plt.show()
def datingClassTest():
hoRatio = 0.10
datingDataMat,DatingLables = file2matrix('datingTestSet2.txt')
normMat,ranges,minvals = autoNorm(datingDataMat)
m = normMat.shape[0]
numTestVecs = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVecs):
classfileresult = classify0(normMat[i,:],normMat[numTestVecs:m,:],
DatingLables[numTestVecs:m],3)
print("the classfier came back with: %d,the real answer is: %d"
% (classfileresult,DatingLables[i]))
if (classfileresult != DatingLables[i]):
errorCount +=1.0
print("the total error rate is: %f" %(errorCount/float(numTestVecs)))
def classifyPerson():
result = ['not at all','in small doses', 'in large doses']
ffmiles = float(input("每年获取的飞行常客里程数:\n"))
percentTats = float(input("玩游戏所耗时间百分比?:\n"))
iceCream = float(input("每周消费的冰淇淋公升数:\n"))
datingDataMat,DatingLables = file2matrix('datingTestSet2.txt')
normMat,ranges,minvals = autoNorm(datingDataMat)
inArr = np.array([ffmiles,percentTats,iceCream])
classifyResult = classify0((inArr-minvals)/ranges,normMat,DatingLables,3)
print("you will probably like this person:",result[classifyResult-1])
def classify0(inx,dataSet,lables,k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inx,(dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis = 1)
distances = sqDistances**0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteIlable = lables[sortedDistIndicies[i]]
classCount[voteIlable] = classCount.get(voteIlable,0) + 1
sortedClassCount = sorted(classCount.items(),
key = operator.itemgetter(1),reverse = True)
return sortedClassCount[0][0]