K最近邻(k-Nearest Neighbor,KNN)
KNN分类器由两部分组成:(1)训练部分:只是把数据读进去,然后记下来;(2)测试部分:会找到离它最近的N个训练样本,以它们中出现最多的类别作为它的类别。
首先定义一个KNN的工具类:
import numpy as np
class KNearestNeighbor:
""" a kNN classifier with L2 distance """
def __init__(self):
pass
def train(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X, k=1, num_loops=0):
if num_loops == 0:
dists = self.compute_distances_no_loops(X)
elif num_loops == 1:
dists = self.compute_distances_one_loop(X)
elif num_loops == 2:
dists = self.compute_distances_two_loops(X)
else:
raise ValueError('Invalid value %d for num_loops' % num_loops)
return self.predict_labels(dists, k=k)
def compute_distances_two_loops(self, X):
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in xrange(num_test):
for j in xrange(num_train):
# L2 distance between ith testing image and jth training image
dists[i, j] = np.sum( (self.X_train[j,:] - X[i,:] )**2 )
return dists
def compute_distances_one_loop(self, X):
num_test = X.shape[0]
num_train = self.X_train.shape[0]
dists = np.zeros((num_test, num_train))
for i in xrange(num_test):
# dists[i, :] = np.sum((self.X_train - X[i, :])**2, axis=1 )
train_2 = np.sum( (self.X_train)**2, axis=1 ).T