import numpy as np
import matplotlib.pyplot as plt
import os
import operator
def img2vector(filename):
returnVect=np.zeros((1,1024))
f=open(filename,'r')
for i in range(32):
lineStr=f.readline()
for j in range(32):
returnVect[0,32*i+j]=int(lineStr[j])
return returnVect
def classify0(inX,dataSet,labels,k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX,(dataSetSize,1))-dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis = 1)
distances = sqDistances ** 0.5
sortedDistIndicies = distances.argsort() #indices
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0)+1
#找出最大的那个
sortedClassCount = sorted(classCount.items(),
key = operator.itemgetter(1),reverse = True)
return sortedClassCount[0][0]
def handwritingClassTest():
hwLabels=[]
trainingFilelist=os.listdir('E:\\machine learning\\machine learning ex\\digits\\digits\\trainingDigits')
m=len(trainingFilelist)
trainingMat=np.zeros((m,1024))
for i in range(m):
fileNameStr=trainingFilelist[i]
hwLabels.append(fileNameStr.split('_')[0])
trainingMat[i,:]=img2vector('E:\\machine learning\\machine learning ex\\digits\\digits\\trainingDigits\\'+fileNameStr)
testFileList=os.listdir('E:\\machine learning\\machine learning ex\\digits\\digits\\testDigits')
errorCount=0.0
mTest=len(testFileList)
for i in range(mTest):
fileNameStr=testFileList[i]
classNumberStr=fileNameStr.split('_')[0]
vectorUnderTest=img2vector('E:\\machine learning\\machine learning ex\\digits\\digits\\trainingDigits\\'+fileNameStr)
classifierResult=classify0(vectorUnderTest,trainingMat,hwLabels,3)
print('预测结果:',classifierResult,"真实结果:",classNumberStr)
if(classifierResult!=classNumberStr):
errorCount+=1
print('错误率为:',errorCount/float(mTest))
path='E:\\machine learning\\machine learning ex\\digits\\digits\\trainingDigits\\0_13.txt'
handwritingClassTest()