KNN.py
# from numpy import *
import operator
import numpy as np
def createDataSet():
group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1],[1.3,0.8],[0.3,0.6]])
labels = ['A','A','B','B','B','A']
return group,labels
def calassfy0(intx,dataset ,labels,k):
datasize = dataset.shape[0]
diffMat = np.tile(intx ,(datasize,1))-dataset
# print("diffMat=",diffMat)
sqDifMat = diffMat**2
# print("sqDifMat=",sqDifMat)
sqDistances = sqDifMat.sum(axis=1)
# print("sqDistance",sqDistances)
distance = sqDistances**0.5
# print("distance",distance)
sortDistanceindex = distance.argsort()
# print("sortDistance 返回的是坐标",sortDistanceindex)
classCount ={}
for i in range(5):
voteIlabe = labels[sortDistanceindex[i]]
classCount[voteIlabe] = classCount.get(voteIlabe,0)+1
# print("classCount.items()=",classCount.items(),type(classCount.items()))
sortedClassCount = sorted(classCount.items(),key = operator.itemgetter(1),reverse=True)
# print(sortedClassCount,type(sortedClassCount))
return sortedClassCount[0][0]
group,label=createDataSet()
print(calassfy0([0.7, 0.5], group, label, 3))
# print(group.shape,type(group.shape))
"""
dict.get(key, default=None)
Python 字典(Dictionary) get() 函数返回指定键的值,如果值不在字典中返回默认值。
Python 3.x
Python 3.x 里面,iteritems() 和 viewitems() 这两个方法都已经废除了,
而 items() 得到的结果是和 2.x 里面 viewitems() 一致的。
在3.x 里 用 items()替换iteritems() ,可以用于 for 来循环遍历。
"""
Pary.py
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import KNN
def file2matrix(filename):
f = open(filename)
lines = f.readlines()
# print('lines = ', lines)
numberOfLines = len(lines)
# print('numberOfLines', numberOfLines)
reMat = np.zeros((numberOfLines, 3))
# print('reMat=', reMat)
classLabe = []
index = 0
for i in lines:
# print('i=', i)
i= i.strip()
data = i.split('\t')
# print(data, type(data[0]))
reMat[index, :] = data[0:3]
classLabe.append(float(data[3]))
index += 1
# print('reMat=', reMat)
return reMat, classLabe
def drawPic(data, labels):
# //array ={1:'r',2:'b',3:'g'}
plt.scatter(data[:, 1], data[:, 2], s=15 * np.array(labels), c=150 * np.array(labels))
# print(15 * np.array(labels))
plt.show()
def autonorm(data): #归一化数据
min = data.min(0)
max = data.max(0)
rang = max - min
# Nomaldata = np.zeros(np.shape(data))
Nomaldata = data - np.tile(min, (data.shape[0],1))
Nomaldata = Nomaldata / np.tile(rang, (data.shape[0], 1))
return Nomaldata,rang,min
def testKNN(onedata,b):
m= onedata.shape[0]
err=0
cnt = int(m*0.1)
for i in range(cnt):
result = KNN.calassfy0(onedata[i,:],onedata[cnt:m,:],b[cnt:m],3)#
print("result=%d lane=%d"%(result,b[i]))
if result!= b[i]:
err+=1
print("err= %.2f %%"%(err/cnt))
# a, b = file2matrix('a.txt')
# print('------------------------------')
# print(a)
# #drawPic(a,b)
# print('--------------')
# print(autonorm(a))
# onedata,rangdata,mindata = autonorm(a)
#
# testKNN(onedata,b);
def image2vector(filename):
# print(filename)
revector = np.zeros((1,1024))
fp=open(filename);
for i in range(32):
line =fp.readline()
for j in range(32):
revector[0,32*i+j]=int(line[j])
return revector
# print(image2vector("/0_0.txt")[0,0:32])
import os,sys
def handWritingClassTest():
hwLabel=[]
trainingFileList = os.listdir('./trainingDigits')
mlen = len(trainingFileList)
trainMat = np.zeros((mlen,1024))
for i in range(mlen):
filename = trainingFileList[i]
file = filename.split('.')[0]
hwLabel.append(int (file.split('_')[0]))
trainMat[i,:] = image2vector('./trainingDigits/%s'%(filename))
testFileList = os.listdir('./testDigits')
error =0
cnt = len(testFileList)
# print(cnt,'----------1111111111')
for i in range(cnt):
filename = testFileList[i]
filestr = filename.split('.')[0]
label = int(filestr.split('_')[0])
vecteruderTest = image2vector('./testDigits/%s'%(filename))
ClassResult = KNN.calassfy0(vecteruderTest,trainMat,hwLabel,3)
if label!=ClassResult:
print('std class =%d test result =%d filename %s' % (label, ClassResult,filename))
error+=1
print("error = %d,错误率是 %f"%(error,error/cnt))
handWritingClassTest()
资源下载:https://download.youkuaiyun.com/download/u010261063/10319972