
import os.path
from sklearn.neighbors import KNeighborsClassifier as knn
import numpy as np
from os import listdir
"""
函数说明:将32*32的图像转化为(1,1024)的向量
Parameters:
filename - 文件名
returns:
returnVector - 返回的(1,1024)的向量
"""
def img2vector(filename):
returnVector = np.zeros(1024)
with open(filename) as fr:
for i in range(32):
line = fr.readline()
for j in range(32):
returnVector[32*i+j] = int(line[j])
return returnVector
"""
函数说明:返回数据矩阵与label列表
Parameters:
filename
returns:
returnMetrix,labels
"""
def datalabel(filename):
filelist = listdir(filename)
m = len(filelist)
returnMetrix = np.zeros((m,1024))
labels = []
for i in range(m):
filenamestr = filelist[i]
filepath = os.path.join(filename,filenamestr)
returnMetrix[i,:] = img2vector(filepath)
labels.append(filenamestr.split('_')[0])
return returnMetrix,labels
"""
函数说明:knn分类器
Parameters:
k - knn选择的最近数据个数
returns:
无
"""
def classfiy(k):
# 训练数据集和标签
traindata,trainlabels = datalabel('trainingDigits')
# 测试数据和真实标签
testdata,testlabels = datalabel('testDigits')
clf = knn(n_neighbors=k)
clf.fit(traindata,trainlabels)
errorcount = 0.0
testlen = len(testlabels)
for i in range(testlen):
label_predict = clf.predict(testdata[i].reshape(-1,1024))
if label_predict != (testlabels[i]):
errorcount += 1
print("预测:%d\t真实:%s"%(label_predict,testlabels[i]))
print('Error:%f%%'%(errorcount/len(testlabels)*100))

from 实现 import *
if __name__ == '__main__':
classfiy(5)