算法同之前的NN算法没什么太大的变化
直接上代码
程序输出不同k值下的准确率
#coding=utf-8
#author='HL'
'''
利用pickle模块存储对象
dump类似于write
load类似于read
完成对象的上传以及读取
'''
import cPickle as pickle
import numpy as np
import heapq
class NearestNeighbor(object):
def __init__(self):
pass
def train(self,x,y):
self.x_Tr = x
self.y_Tr = y
def predict(self,x,k):
y_Pre = np.zeros(x.shape[0],dtype=type(self.y_Tr))
for i in range(x.shape[0]):
distance = np.sum(abs(self.x_Tr-x[i,]),axis=1)
min_index = self.n_Labels_index_get(distance,k)
dict1 = {}
m = 0
for index in min_index:
dict1[self.y_Tr[index]] = dict1.get(self.y_Tr[index],0)+1
if dict1[self.y_Tr[index]]>m:
m = dict1[self.y_Tr[index]]
y_Pre[i] = self.y_Tr[index]
print y_Pre
return y_Pre
def n_Labels_index_get(self,distance,n):
return heapq.nsmallest(n,range(len(distance)),distance.take)
def un_P(path):
f = file(path,'rb')
dict1 = pickle.load(f)
f.close()
return dict1
data_Train = []
labels_Train = []
path = "A:/python_test/cifar-10-batches-py/"
for i in range(1,6):
now_Path = path+'data_batch_'+str(i)
dict1 = un_P(now_Path)
it = 0
while it <len(dict1['data']):
data_Train.append(dict1['data'][it])
labels_Train.append(dict1['labels'][it])
it+=10
'''
for item in dict1['data']:
data_Train.append(item)
for item in dict1['labels']:
labels_Train.append(item)
'''
data_Test = []
labels_Test = []
dict1 = un_P(path+'test_batch')
for item in dict1['data'][:1000]:
data_Test.append(item)
for item in dict1['labels'][:1000]:
labels_Test.append(item)
data_Tr = np.asarray(data_Train)
data_Te = np.asarray(data_Test)
labels_Tr = np.asarray(labels_Train)
labels_Te = np.asarray(labels_Test)
print data_Tr.shape
print data_Te.shape
print labels_Tr.shape
print labels_Te.shape
zz = NearestNeighbor()
zz.train(data_Tr,labels_Train)
res = []
for k in [1,3,5,7,9,15,20,40,60,100]:
out = np.mean(zz.predict(data_Te,k)==labels_Te)
res.append([k,out])
print res
print res
k值 | 准确率 |
---|---|
1 | 0.203 |
3 | 0.22 |
5 | 0.22 |
7 | 0.216 |
9 | 0.21 |
15 | 0.205 |
20 | 0.204 |
40 | 0.192 |
60 | 0.193 |
100 | 0.175 |
- 不同k值下的准确率
虽然训练以及测试参数不多,但是我们可以看出,并不是k值越大越好,k值在5~7之前较为好