KNN聚类分析
数据生成
1、首先进行knn算法分类器的初始化,首先传入一个训练数据集,以及与训练数据对应的分类来训练knn分类器。
2、通过计算找到测试数据最近的邻居
3、设置返回的最近邻居的数目。
返回值包括:测试数据的类别标志;k个最近邻居的类别标志;每个最近邻居到测试数据的距离。
代码
<span style="font-family:Arial;font-size:12px;"># -*- coding: utf-8 -*-
"""
Created on Fri Apr 17 11:45:19 2015
@author: carp
"""
import cv2
import numpy as np
from matplotlib import pyplot as plt
traindata = np.random.randint(0,100,(25,2)).astype(np.float32)
print 'traindata'
print traindata
responses = np.random.randint(0,2,(25,1)).astype(np.float32)
print 'response'
print responses
print 'responses.ravel()'
print responses.ravel()
print 'response.ravel() == 0'
print responses.ravel() == 0
print 'traindata[responses.ravel() == 0]'
red = traindata[responses.ravel() == 0]
print 'red\n',red
print red.size/2
plt.scatter(red[:,0],red[:,1],80,'r','*')
print 'response.ravel() == 1'
print responses.ravel() == 1
print 'traindata[responses.ravel() == 1]'
blue = traindata[responses.ravel() == 1]
print 'blue\n',blue
print blue.size/2
plt.scatter(blue[:,0],blue[:,1],80,'b','s')
newcomer = np.random.randint(0,100,(1,2)).astype(np.float32)
print newcomer
plt.scatter(newcomer[:,0],newcomer[:,1],80,'g','o')
knn = cv2.KNearest()
knn.train(traindata,responses)
ret,results,neighbours,dist = knn.find_nearest(newcomer,3)
print 'results:\t',results,'\n'
print "neighbours:\t:",neighbours,'\n'
print 'distances:\t',dist,'\n'
plt.show()</span>
生成结果
<span style="font-family:Arial;font-size:12px;"># -*- coding: utf-8 -*-
"""
Created on Fri Apr 17 11:45:19 2015
@author: carp
"""
import cv2
import numpy as np
from matplotlib import pyplot as plt
traindata = np.random.randint(0,100,(25,2)).astype(np.float32)
print 'traindata'
print traindata
responses = np.random.randint(0,2,(25,1)).astype(np.float32)
print 'response'
print responses
print 'responses.ravel()'
print responses.ravel()
print 'response.ravel() == 0'
print responses.ravel() == 0
print 'traindata[responses.ravel() == 0]'
red = traindata[responses.ravel() == 0]
print 'red\n',red
print red.size/2
plt.scatter(red[:,0],red[:,1],80,'r','*')
print 'response.ravel() == 1'
print responses.ravel() == 1
print 'traindata[responses.ravel() == 1]'
blue = traindata[responses.ravel() == 1]
print 'blue\n',blue
print blue.size/2
plt.scatter(blue[:,0],blue[:,1],80,'b','s')
newcomer = np.random.randint(0,100,(1,2)).astype(np.float32)
print newcomer
plt.scatter(newcomer[:,0],newcomer[:,1],80,'g','o')
knn = cv2.KNearest()
knn.train(traindata,responses)
ret,results,neighbours,dist = knn.find_nearest(newcomer,3)
print 'results:\t',results,'\n'
print "neighbours:\t:",neighbours,'\n'
print 'distances:\t',dist,'\n'
plt.show()</span>