这两天看了下PCA降维,用OpenCV测试了下。主要是参考[1]和[2]。根据我的理解,代码记录下。
#include <opencv/cv.h>
#include <opencv/highgui.h>
#include <stdio.h>
#include <stdlib.h>
using namespace cv;
using namespace std;
#define DIMENTIONS 7
#define SAMPLE_NUM 31
float Coordinates[DIMENTIONS*SAMPLE_NUM]={
101.5,100.4,97.0,98.7,100.8,114.2,104.2
,100.8,93.5,95.9,100.7,106.7,104.3,106.4
,100.8,97.4,98.2,98.2,99.5,103.6,102.4
,99.4,96.0,98.2,97.8,99.1,98.3,104.3
,101.8,97.7,99.0,98.1,98.4,102.0,103.7
,101.8,96.8,96.4,92.7,99.6,101.3,103.4
,101.3,98.2,99.4,103.7,98.7,101.4,105.3
,101.9,100.0,98.4,96.9,102.7,100.3,102.3
,100.3,98.9,97.2,97.4,98.1,102.1,102.3
,99.3,97.7,97.6,101.1,96.8,110.1,100.4
,98.7,98.4,97.0,99.6,95.6,107.2,99.8
,99.7,97.7,98.0,99.3,97.3,104.1,102.7
,97.6,96.5,97.6,102.5,97.2,100.6,99.9
,98.0,98.4,97.1,100.5,101.4,103.0,99.9
,101.1,98.6,98.7,102.4,96.9,108.2,101.7
,100.4,98.6,98.0,100.7,99.4,102.4,103.3
,99.3,96.9,94.0,98.1,99.7,109.7,99.2
,98.6,97.4,96.4,99.8,97.4,102.1,100.0
,98.2,98.2,99.4,99.3,99.7,101.5,99.9
,98.5,96.3,97.0,97.7,98.7,112.6,100.4
,98.4,99.2,98.1,100.2,98.0,98.2,97.8
,99.2,97.4,95.7,98.9,102.4,114.8,102.6
,101.3,97.9,99.2,98.8,105.4,111.9,99.9
,98.5,97.8,94.6,102.4,107.0,115.0,99.5
,98.3,96.3,98.5,106.2,92.5,98.6,101.6
,99.3,101.1,99.4,100.1,103.6,98.7,101.3
,99.2,97.3,96.2,99.7,98.2,112.6,100.5
,100.0,99.9,98.2,98.3,103.6,123.2,102.8
,102.2,99.4,96.2,98.6,102.4,115.3,101.2
,100.1,98.7,97.4,99.8,100.6,112.4,102.5
,104.3,98.7,100.2,116.1,105.2,101.6,102.6
};
float Coordinates_test[DIMENTIONS]={
104.3,98.7,100.2,116.1,105.2,101.6,102.6
};
#define PCA_MEAN "mean"
#define PCA_EIGEN_VECTOR "eigen_vector"
int main()
{
//load samples
Mat SampleSet(SAMPLE_NUM, DIMENTIONS, CV_32FC1);
for (int i=0; i<(SAMPLE_NUM); ++i)
{
for (int j=0; j<DIMENTIONS; ++j)
{
SampleSet.at<float>(i, j) = Coordinates[i*DIMENTIONS + j];
}
}
//Training
PCA *pca = new PCA(SampleSet, Mat(), CV_PCA_DATA_AS_ROW);///////////////
cout << "eigenvalues:" <<endl << pca->eigenvalues <<endl<<endl;
//cout << "eigenvectors" <<endl << pca->eigenvectors << endl;
Mat input(1,DIMENTIONS, CV_32FC1);//Test input
for (int j=0; j<DIMENTIONS; ++j)
{
input.at<float>(0, j) = Coordinates_test[j];
}
//calculate the decreased dimensions
int index;
float sum=0, sum0=0, ratio;
for (int d=0; d<pca->eigenvalues.rows; ++d)
{
sum += pca->eigenvalues.at<float>(d,0);
}
for (int d=0; d<pca->eigenvalues.rows; ++d)
{
sum0 += pca->eigenvalues.at<float>(d,0);
ratio = sum0/sum;
if(ratio > 0.9){
index = d;
break;
}
}
Mat eigenvetors_d;
eigenvetors_d.create((index+1), DIMENTIONS, CV_32FC1);//eigen values of decreased dimension
for (int i=0; i<(index+1); ++i)
{
pca->eigenvectors.row(i).copyTo(eigenvetors_d.row(i));
}
cout << "eigenvectors" <<endl << eigenvetors_d << endl;
FileStorage fs_w("config.xml", FileStorage::WRITE);//write mean and eigenvalues into xml file
fs_w << PCA_MEAN << pca->mean;
fs_w << PCA_EIGEN_VECTOR << eigenvetors_d;
fs_w.release();
//Encoding
PCA *pca_encoding = new PCA();
FileStorage fs_r("config.xml", FileStorage::READ);
fs_r[PCA_MEAN] >> pca_encoding->mean;
fs_r[PCA_EIGEN_VECTOR] >> pca_encoding->eigenvectors;
fs_r.release();
Mat output_encode(1, pca_encoding->eigenvectors.rows, CV_32FC1);
pca_encoding->project(input, output_encode);
cout << endl << "pca_encode:" << endl << output_encode;
//Decoding
PCA *pca_decoding = new PCA();
FileStorage fs_d("config.xml", FileStorage::READ);
fs_d[PCA_MEAN] >> pca_decoding->mean;
fs_d[PCA_EIGEN_VECTOR] >> pca_decoding->eigenvectors;
fs_d.release();
Mat output_decode(1, DIMENTIONS, CV_32FC1);
pca_decoding->backProject(output_encode,output_decode);
cout <<endl<< "pca_Decode:" << endl << output_decode;
delete pca;
delete pca_encoding;
return 0;
}
结果为:
eigenvalues:
[43.182041; 14.599923; 9.2121401; 4.0877957; 2.8236785; 0.88751495; 0.66496396]
eigenvectors
[0.01278889, 0.03393811, -0.099844977, -0.13044992, 0.20732452, 0.96349025, -0.020049129;
0.15659945, 0.037932698, 0.12129638, 0.89324093, 0.39454412, 0.046447847, 0.060190294;
0.21434425, 0.018043749, -0.0012475925, -0.40428901, 0.81335503, -0.22759444, 0.2773709;
0.43591988, -0.047541384, 0.19851086, -0.0035106051, -0.35545754, 0.10898948, 0.79376709]
pca_encode:
[-5.6273661, 17.138182, -0.078819014, 0.68144321]
pca_Decode:
[102.88557, 98.402702, 100.33086, 116.21081, 105.37261, 101.63729, 103.39891]
这个例子里最后backProject的结果跟原始输入比较起来感觉差别不是很大
float Coordinates_test[DIMENTIONS]={
104.3,98.7,100.2,116.1,105.2,101.6,102.6
};
主元分析,顾名思义找出数据中最主要的信息,去除次要的,以降低数据量。
具体步骤是:
1.对每个样本提取出有用的信息组成一个向量;
2.求取出所有样本向量的平均值;
3.用每个样本向量减去向量的平均值后组成一个矩阵;
4.该矩阵乘以该矩阵的逆为协方差矩阵,这个协方差矩阵是可对角化的,对角化后剩下的元素为特征值,每个特征值对应一个特征向量(特征向量要标准化);
5.选取最大的N个特征值(其中N即为PCA的主元(PC)数,我感觉这个主元数是PCA的核心之处,可自己选取数的多少,数越少,越降低数据量,但识别效果也越差),将这N个特征值对应的特征向量组成新的矩阵;
6.将新的矩阵转置后乘以样本向量即可得到降维后的数据(这些数据是原数据中相对较为主要的,而数据量一般也远远小于原数据量,当然这要取决于你选取的主元数)。
[1] http://blog.youkuaiyun.com/yang_xian521/article/details/7445536
[2] http://blog.youkuaiyun.com/abcjennifer/article/details/8002329