OpenCV(2)ML库->K-Nearest Neighbour分类器

本文深入探讨了KNN算法的原理、应用及其实现细节,包括如何通过计算距离进行分类,以及算法的优缺点。并通过C++代码实例展示了算法的具体实现过程。

       KNN也是最邻近结点算法(k-Nearest Neighbor algorithm)的缩写形式,也可称为邻近算法。是电子信息分类器算法的一种。KNN方法对包容型数据的特征变量筛选尤其有效。 最邻近结点算法采用向量空间模型来分类,概念为相同类别的案例,彼此的相似度高,而可以借由计算与已知类别案例之相似度,来评估未知类别案例可能的分类。
目标:分类未知类别案例。
输入:待分类未知类别案例项目。已知类别案例集合D ,其中包含 j个已知类别的案例。
输出:项目可能的类别。
依公式计算 Item 与 D1、D2 … …、Dj 之相似度。得到Sim(Item, D1)、Sim(Item, D2)… …、Sim(Item, Dj)。
将Sim(Item, D1)、Sim(Item, D2)… …、Sim(Item, Dj)排序,若是超过相似度门槛t则放入邻居案例集合NN。自邻居案例集合NN中取出前k名,依多数决,得到Item可能类别。

    如果一个样本在特征空间中的k个最相似(即特征空间中最邻近)的样本中的大多数属于某一个类别,则该样本也属于这个类别。该方法在定类决策上只依据最邻近的一个或者几个样本的类别来决定待分样本所属的类别。

    KNN方法虽然从原理上也依赖于极限定理,但在类别决策时,只与极少量的相邻样本有关。因此,采用这种方法可以较好地避免样本的不平衡问题。另外,由于KNN方法主要靠周围有限的邻近的样本,而不是靠判别类域的方法来确定所属类别的,因此对于类域的交叉或重叠较多的待分样本集来说, KNN方法较其他方法更为适合。

    该方法的不足之处是计算量较大,因为对每一个待分类的文本都要计算它到全体已知样本的距离,才能求得它的K个最近邻点。目前常用的解决方法是事先对已知样本点进行剪辑,事先去除对分类作用不大的样本。另外还有一种Reverse KNN法,能降低KNN算法的计算复杂度,提高分类的效率。

    该算法比较适用于样本容量比较大的类域的自动分类,而那些样本容量较小的类域采用这种算法比较容易产生误分。


算法步骤:

step.1---初始化距离为最大值

step.2---计算未知样本和每个训练样本的距离dist

step.3---得到目前K个最临近样本中的最大距离maxdist

step.4---如果dist小于maxdist,则将该训练样本作为K-最近邻样本

step.5---重复步骤2、3、4,直到未知样本和所有训练样本的距离都算完

step.6---统计K-最近邻样本中每个类标号出现的次数

step.7---选择出现频率最大的类标号作为未知样本的类标号


/****************************************************************************************\
*                          K-Nearest Neighbour Classifier                                *
\****************************************************************************************/

// k Nearest Neighbors
class CV_EXPORTS CvKNearest : public CvStatModel
{
public:

    CvKNearest();
    virtual ~CvKNearest();

    CvKNearest( const CvMat* _train_data, const CvMat* _responses,
                const CvMat* _sample_idx=0, bool _is_regression=false, int max_k=32 );

    virtual bool train( const CvMat* _train_data, const CvMat* _responses,
                        const CvMat* _sample_idx=0, bool is_regression=false,
                        int _max_k=32, bool _update_base=false );
    
    virtual float find_nearest( const CvMat* _samples, int k, CvMat* results=0,
        const float** neighbors=0, CvMat* neighbor_responses=0, CvMat* dist=0 ) const;
    
#ifndef SWIG
    CvKNearest( const cv::Mat& _train_data, const cv::Mat& _responses,
               const cv::Mat& _sample_idx=cv::Mat(), bool _is_regression=false, int max_k=32 );
    
    virtual bool train( const cv::Mat& _train_data, const cv::Mat& _responses,
                       const cv::Mat& _sample_idx=cv::Mat(), bool is_regression=false,
                       int _max_k=32, bool _update_base=false );    
    
    virtual float find_nearest( const cv::Mat& _samples, int k, cv::Mat* results=0,
                                const float** neighbors=0,
                                cv::Mat* neighbor_responses=0,
                                cv::Mat* dist=0 ) const;
#endif
    
    virtual void clear();
    int get_max_k() const;
    int get_var_count() const;
    int get_sample_count() const;
    bool is_regression() const;

protected:

    virtual float write_results( int k, int k1, int start, int end,
        const float* neighbor_responses, const float* dist, CvMat* _results,
        CvMat* _neighbor_responses, CvMat* _dist, Cv32suf* sort_buf ) const;

    virtual void find_neighbors_direct( const CvMat* _samples, int k, int start, int end,
        float* neighbor_responses, const float** neighbors, float* dist ) const;


    int max_k, var_count;
    int total;
    bool regression;
    CvVectors* samples;
};


//源码引用自:http://www.mysjtu.com/page/M0/S914/914320.html
#include "stdafx.h"
#include <ml.h>   
#include <iostream>
#include <highgui.h>
#include <cv.h>
#include <cxcore.h> 
using namespace cv; 
using namespace std;
 
int main( int argc, char** argv ) 
{     
	const int K = 20;     
	int i, j, k, accuracy;     
	float response;     
	int train_sample_count = 100;     
	CvRNG rng_state = cvRNG(-1);//初始化随机数生成器状态    
	CvMat* trainData = cvCreateMat( train_sample_count, 2, CV_32FC1 );     
	CvMat* trainClasses = cvCreateMat( train_sample_count, 1, CV_32FC1 );     
	IplImage* img = cvCreateImage( cvSize( 500, 500 ), 8, 3 );     
	float _sample[2];     
	CvMat sample = cvMat( 1, 2, CV_32FC1, _sample );     
	cvZero( img );  

	CvMat trainData1, trainData2, trainClasses1, trainClasses2;    

	// form the training samples     
	cvGetRows( trainData, &trainData1, 0, train_sample_count/2 ); //返回数组的一行或在一定跨度内的行    
	cvRandArr( &rng_state, &trainData1, CV_RAND_NORMAL, cvScalar(200,200), cvScalar(50,50) ); //用随机数填充数组并更新 RNG 状态     

	cvGetRows( trainData, &trainData2, train_sample_count/2, train_sample_count );     
	cvRandArr( &rng_state, &trainData2, CV_RAND_NORMAL, cvScalar(300,300), cvScalar(50,50) );  

	cvGetRows( trainClasses, &trainClasses1, 0, train_sample_count/2 );     
	cvSet( &trainClasses1, cvScalar(1) );     

	cvGetRows( trainClasses, &trainClasses2, train_sample_count/2, train_sample_count );     
	cvSet( &trainClasses2, cvScalar(2) );   




	// learn classifier     
	CvKNearest knn( trainData, trainClasses, 0, false, K );    
	CvMat* nearests = cvCreateMat( 1, K, CV_32FC1);  

	for( i = 0; i < img->height; i++ )     
	{         
		for( j = 0; j < img->width; j++ )         
		{             
			sample.data.fl[0] = (float)j;             
			sample.data.fl[1] = (float)i;   

			// estimates the response and get the neighbors' labels             
			response = knn.find_nearest(&sample,K,0,0,nearests,0);      

			// compute the number of neighbors representing the majority             
			for( k = 0, accuracy = 0; k < K; k++ )             
			{                 
				if( nearests->data.fl[k] == response)                     
					accuracy++;             
			}   

			// highlight the pixel depending on the accuracy (or confidence)             
			cvSet2D( img, i, j, response == 1 ?                 
				(accuracy > 5 ? CV_RGB(180,0,0) : CV_RGB(180,120,0)) :                 
				(accuracy > 5 ? CV_RGB(0,180,0) : CV_RGB(120,120,0)) );         
		}     
	}       
	

	//显示分类后训练集
	// display the original training samples     
	for( i = 0; i < train_sample_count/2; i++ )     
	{         
		CvPoint pt;         
		pt.x = cvRound(trainData1.data.fl[i*2]);         
		pt.y = cvRound(trainData1.data.fl[i*2+1]);         
		cvCircle( img, pt, 2, CV_RGB(255,0,0), CV_FILLED );  

		pt.x = cvRound(trainData2.data.fl[i*2]);         
		pt.y = cvRound(trainData2.data.fl[i*2+1]);         
		cvCircle( img, pt, 2, CV_RGB(0,255,0), CV_FILLED );     
	}      
	cvNamedWindow( "classifier result", 1 );     
	cvShowImage( "classifier result", img );     
	cvWaitKey(0);      
	cvReleaseMat( &trainClasses );     
	cvReleaseMat( &trainData );     
	return 0; 
} 


参考:http://www.cnblogs.com/wengzilin/archive/2013/04/05/3001778.html

         http://www.cnblogs.com/seacode/archive/2011/03/09/1979246.html

         http://blog.youkuaiyun.com/yangtrees/article/details/7482890

         http://www.cnblogs.com/xiangshancuizhu/archive/2011/08/06/2129355.html

         http://blog.youkuaiyun.com/godenlove007/article/details/8084863

         http://www.doc88.com/p-710937416243.html

         http://blog.youkuaiyun.com/xlm289348/article/details/8876353

import csv import glob import os import re import cv2 import matplotlib.pyplot as plt import numpy as np import scipy.io as sio import torch.utils.data import imgaug as ia from imgaug import augmenters as iaa from misc.utils import cropping_center from .augs import ( add_to_brightness, add_to_contrast, add_to_hue, add_to_saturation, gaussian_blur, median_blur, ) #### class FileLoader(torch.utils.data.Dataset): """Data Loader. Loads images from a file list and performs augmentation with the albumentation library. After augmentation, horizontal and vertical maps are generated. Args: file_list: list of filenames to load input_shape: shape of the input [h,w] - defined in config.py mask_shape: shape of the output [h,w] - defined in config.py mode: 'train' or 'valid' """ # TODO: doc string def __init__( self, file_list, with_type=False, input_shape=None, mask_shape=None, mode="train", setup_augmentor=True, target_gen=None, ): assert input_shape is not None and mask_shape is not None self.mode = mode self.info_list = file_list self.with_type = with_type self.mask_shape = mask_shape self.input_shape = input_shape self.id = 0 self.target_gen_func = target_gen[0] self.target_gen_kwargs = target_gen[1] if setup_augmentor: self.setup_augmentor(0, 0) return def setup_augmentor(self, worker_id, seed): self.augmentor = self.__get_augmentation(self.mode, seed) self.shape_augs = iaa.Sequential(self.augmentor[0]) self.input_augs = iaa.Sequential(self.augmentor[1]) self.id = self.id + worker_id return def __len__(self): return len(self.info_list) def __getitem__(self, idx): path = self.info_list[idx] data = np.load(path) # split stacked channel into image and label img = (data[..., :3]).astype("uint8") # RGB images ann = (data[..., 3:]).astype("int32") # instance ID map and type map if self.shape_augs is not None: shape_augs = self.shape_augs.to_deterministic() img = shape_augs.augment_image(img) ann = shape_augs.augment_image(ann) if self.input_augs is not None: input_augs = self.input_augs.to_deterministic() img = input_augs.augment_image(img) img = cropping_center(img, self.input_shape) feed_dict = {"img": img} inst_map = ann[..., 0] # HW1 -> HW if self.with_type: type_map = (ann[..., 1]).copy() type_map = cropping_center(type_map, self.mask_shape) #type_map[type_map == 5] = 1 # merge neoplastic and non-neoplastic feed_dict["tp_map"] = type_map # TODO: document hard coded assumption about #input target_dict = self.target_gen_func( inst_map, self.mask_shape, **self.target_gen_kwargs ) feed_dict.update(target_dict) return feed_dict def __get_augmentation(self, mode, rng): if mode == "train": shape_augs = [ # * order = ``0`` -> ``cv2.INTER_NEAREST`` # * order = ``1`` -> ``cv2.INTER_LINEAR`` # * order = ``2`` -> ``cv2.INTER_CUBIC`` # * order = ``3`` -> ``cv2.INTER_CUBIC`` # * order = ``4`` -> ``cv2.INTER_CUBIC`` # ! for pannuke v0, no rotation or translation, just flip to avoid mirror padding iaa.Affine( # scale images to 80-120% of their size, individually per axis scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # translate by -A to +A percent (per axis) translate_percent={"x": (-0.01, 0.01), "y": (-0.01, 0.01)}, shear=(-5, 5), # shear by -5 to +5 degrees rotate=(-179, 179), # rotate by -179 to +179 degrees order=0, # use nearest neighbour backend="cv2", # opencv for fast processing seed=rng, ), # set position to 'center' for center crop # else 'uniform' for random crop iaa.CropToFixedSize( self.input_shape[0], self.input_shape[1], position="center" ), iaa.Fliplr(0.5, seed=rng), iaa.Flipud(0.5, seed=rng), ] input_augs = [ iaa.OneOf( [ iaa.Lambda( seed=rng, func_images=lambda *args: gaussian_blur(*args, max_ksize=3), ), iaa.Lambda( seed=rng, func_images=lambda *args: median_blur(*args, max_ksize=3), ), iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5 ), ] ), iaa.Sequential( [ iaa.Lambda( seed=rng, func_images=lambda *args: add_to_hue(*args, range=(-8, 8)), ), iaa.Lambda( seed=rng, func_images=lambda *args: add_to_saturation( *args, range=(-0.2, 0.2) ), ), iaa.Lambda( seed=rng, func_images=lambda *args: add_to_brightness( *args, range=(-26, 26) ), ), iaa.Lambda( seed=rng, func_images=lambda *args: add_to_contrast( *args, range=(0.75, 1.25) ), ), ], random_order=True, ), ] elif mode == "valid": shape_augs = [ # set position to 'center' for center crop # else 'uniform' for random crop iaa.CropToFixedSize( self.input_shape[0], self.input_shape[1], position="center" ) ] input_augs = [] return shape_augs, input_augs 一句一句解释这段代码的意思
最新发布
08-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值