% 功能:数据库抽样可视化
%% 加载 cifar10 数据
close all;clear;clc
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_1.mat’);
data1=data;
labels1=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_2.mat’);
data2=data;
labels2=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_3.mat’);
data3=data;
labels3=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_4.mat’);
data4=data;
labels4=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_5.mat’);
data5=data;
labels5=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\test_batch.mat’);
data6=data;
labels6=labels;
clear data labels;
% 图片数据以及对应的labels
database = [data1 labels1; data2 labels2; data3 labels3; data4 labels4; data5 labels5; data6 labels6];
cifar10labels = [labels1; labels2; labels3;labels4; labels5; labels6];
%% 可视化方案1:按类别进行抽样展示
[sortedLabels, index] = sort(cifar10labels); % 类别从小到达排序
dataInClass = database(index, :); % 根据类别对数据排序
% clear database;
numImg = length(cifar10labels); % 统计图片总数目
numPerClass = numImg/length(unique(cifar10labels)); % 统计每类含有的图片数目
numShow = 100; % 设定抽样展示的图片总数
numInRow = 10; % 设定每行展示的图片总数
% 3232:图像大小
% 3:图像是RGB三通道
% I2: 4维的矩阵,前三维是 32323 的图片,第四维度是图像数目
I2 = uint8(zeros(32, 32, 3, numShow));
for i=1:numInRow
randID = randi(numPerClass, numInRow, 1); % 随机生成 numInRow 个索引
index = randID + (i-1)numPerClass; % 每类被选取出来的图片索引
for j=1:length(index)
image_r=dataInClass(index(j,:),1:1024); % 红色通道:1024位
image_g=dataInClass(index(j,:),1025:2048); % 绿色通道:1024维
image_b=dataInClass(index(j,:),2049:end-1); % 蓝色通道:1024维
image_rer = reshape(image_r, 32, 32); % 红色通道变形成 3232 大小
image_reg = reshape(image_g, 32, 32); % 绿色通道变形成 3232 大小
image_reb = reshape(image_b, 32, 32); % 蓝色通道变形成 32*32 大小
% R、G、B三通道融合,变成RGB彩色图片
image(:, :, 1) = image_rer’;
image(:, :, 2) = image_reg’;
image(:, :, 3) = image_reb’;
% 将数据类型转换成:无符号8位整型
image = uint8(image);
% 将处理好的图片存放在I2变量中
I2(:, :, :, j+(i-1)*numInRow) = image;
end
end
% 画图
figure(‘Color’, [1 1 1]); hold on;
montage(I2(:, :, :, :));
xlabel(‘sampling by labels’);
set(get(gca,‘xlabel’),‘FontName’,‘Times New Roman’);
%% 可视化方案2:随机进行抽样展示
numImg = length(cifar10labels); % 统计图片总数目
numPerClass = numImg/length(unique(cifar10labels)); % 统计每类含有的图片数目
numShow = 100; % 设定抽样展示的图片总数
numInRow = 10; % 设定每行展示的图片总数
numInColum = numShow/numInRow; % 设定每列展示的图片总数
% 3232:图像大小
% 3:图像是RGB三通道
% I2: 4维的矩阵,前三维是 3232*3 的图片,第四维度是图像数目
I2 = uint8(zeros(32, 32, 3, numShow));
randID = randi(numImg, numShow, 1); % 随机生成 numShow 个索引
for i=1:numInRow
for j=1:numInColum
image_r=database(randID(j+(i-1)*numInRow,:),1:1024);
image_g=database(randID(j+(i-1)*numInRow,:),1025:2048);
image_b=database(randID(j+(i-1)*numInRow,:),2049:end-1);
image_rer=reshape(image_r, 32, 32);
image_reg=reshape(image_g, 32, 32);
image_reb=reshape(image_b, 32, 32);
image(:, :,1)=image_rer’;
image(:, :, 2)=image_reg’;
image(:, :, 3)=image_reb’;
image=uint8(image);
I2(:, :, :, j+(i-1)*numInRow) = image;
end
end
% 画图
figure(‘Color’, [1 1 1]); hold on;
montage(I2(:, :, :, :));
xlabel(‘sampling by random’);
set(get(gca,‘xlabel’),‘FontName’,‘Times New Roman’);
% 功能:GIST特征的可视化
close all;clear;clc
addpath ‘tools/gist’
%% 示例1
% 加载测试图片1
img11 = imread(‘test1.jpg’);
% 调整图片大小
img1 = imresize(img11,[256,256]);
% 设定参数:
param.imageSize = [256 256]; % 图片大小:256*256
param.orientationsPerScale = [8 8 8 8]; % 4 尺度 8方向
param.numberBlocks = 4; % 图片分块数目
param.fc_prefilt = 4;
% 计算 GIST 特征
[gist1, param] = LMgist(img1, ‘’, param);
% 特征可视化
figure
subplot(121)
imshow(img1)
title(‘Input image’)
subplot(122)
showGist(gist1, param)
title(‘Descriptor’)
%% 示例2
img22 = imread(‘test2.jpg’);
img2 = imresize(img22,[256,256]);
clear param
param.imageSize = [256 256];
param.orientationsPerScale = [8 8 8 8];
param.numberBlocks = 4;
param.fc_prefilt = 4;
% 计算 GIST 特征
[gist2, param] = LMgist(img2, ‘’, param);
% 特征可视化
figure
subplot(121)
imshow(img2)
title(‘Input image’)
subplot(122)
showGist(gist2, param)
title(‘Descriptor’)
%% 示例3
img33 = imread(‘test3.jpg’);
img3 = imresize(img33,[256,256]);
clear param
param.imageSize = [256 256];
param.orientationsPerScale = [8 8 8 8];
param.numberBlocks = 4;
param.fc_prefilt = 4;
% 计算 GIST 特征
[gist3, param] = LMgist(img3, ‘’, param);
% 特征可视化
figure
subplot(121)
imshow(img3)
title(‘Input image’)
subplot(122)
showGist(gist3, param)
title(‘Descriptor’)
% 功能:数据库特征提取
%% 加载 cifar10 数据
close all;clear;clc
addpath ‘tools/gist’
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_1.mat’);
data1=data;
labels1=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_2.mat’);
data2=data;
labels2=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_3.mat’);
data3=data;
labels3=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_4.mat’);
data4=data;
labels4=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_5.mat’);
data5=data;
labels5=labels;
clear data labels;
load (‘D:\myprojects\datasets\old\cifar-10-batches-mat\test_batch.mat’);
data6=data;
labels6=labels;
clear data labels;
% 图片数据以及对应的labels
database = [data1 labels1; data2 labels2; data3 labels3; data4 labels4; data5 labels5; data6 labels6];
cifar10labels = [labels1; labels2; labels3;labels4; labels5; labels6];
%% 提取GIST特征
numImg = size(database,1); % 统计图片总数目
% 特征特征:512
CIFAR10 = zeros(numImg,512);
% 遍历处理
for index = 1:numImg
disp (['正在处理第',num2str(index),'张图片...'])
image_r=database(index,1:1024); % 红色分量
image_g=database(index,1025:2048); % 绿色分量
image_b=database(index, 2049:end-1); % 蓝色分量
image_rer = reshape(image_r, 32, 32); % 红色分量变形成 32*32 大小
image_reg = reshape(image_g, 32, 32); % 绿色分量变形成 32*32 大小
image_reb = reshape(image_b, 32, 32); % 蓝色分量变形成 32*32 大小
% R、G、B三通道融合,变成RGB彩色图片
image(:, :, 1) = image_rer';
image(:, :, 2) = image_reg';
image(:, :, 3) = image_reb';
% 将数据类型转换成:无符号8位整型
image = uint8(image);
% 图像水平翻转(为了与官网图片保持一致)
for k = 1:3
image_tmp(:,:,k) = fliplr(image(:,:,k));
end
% GIST 参数
clear param
param.orientationsPerScale = [8 8 8 8];
param.numberBlocks = 4;
param.fc_prefilt = 4;
% 计算 GIST 特征
[gist, param] = LMgist(image_tmp, '', param);
CIFAR10(index,:) = gist;
end
CIFAR10 = [CIFAR10 double(cifar10labels)];
save(‘D:\myprojects\datasets\new\CIFAR10_GIST_512.mat’,‘CIFAR10’,‘-v7.3’);
disp(‘GIST特征提取完毕!’);
% 功能:特征预处理
close all;clear;clc;
addpath 'tools\utils' 'datasets\new'
load('D:\myprojects\datasets\new\CIFAR10_GIST_512.mat');
[num_sample,num_dimension] = size(CIFAR10(:,1:end-1));
Data = CIFAR10(:,1:num_dimension); % gist 特征
Labels = CIFAR10(:,end); % label信息
%% 训练集与测试集划分
index = randsample(num_sample,1000); % 抽取1000个样本作为测试集
testdata = Data(index,:);
testgnd = Labels(index,:);
traindata = Data; % 剩余59000个样本作为训练集
traingnd = Labels;
traindata(index,:)=[];
traingnd(index,:) =[];
%% 去均值
mm = mean(traindata,1);
traindata = traindata-repmat(mm,size(traindata,1),1);
testdata = testdata - repmat(mm,size(testdata,1),1);
%% 归一化
traindata = normr(traindata); % 训练集归一化
testdata = normr(testdata); % 测试集归一化
db_data = [traindata;testdata]; % 将训练样本特征和测试样本特征合并
db_datagnd = [traingnd;testgnd]; % 将训练样本label和测试样本label合并
%% 方法一:使用 label 信息定义 groudtruth
cateTrainTest = bsxfun(@eq, traingnd, testgnd'); % traingnd 和 testgnd 是对应训练样本和测试样本的labels.
exp_data.index = index;
exp_data.train_data = traindata;
exp_data.test_data = testdata;
exp_data.traingnd = traingnd;
exp_data.testgnd = testgnd;
exp_data.db_data = db_data;
exp_data.db_datagnd = db_datagnd;
exp_data.cateTrainTest = cateTrainTest;
%% 方法二:使用 欧式距离/ topk 定义 groudtruth
num_test = size(testdata,1); % 统计测试样本数目
num_train = size(traindata,1); % 统计训练样本数目
topK_Neighbors = 0.02*num_train; % ground truth:设置为 the top k percent points closet to the query;
% topK_Neighbors = 50;
DtrueTestTraining = distMat(testdata,traindata); % 测试样本与训练样本之间的距离
[Dball, I] = sort(DtrueTestTraining,2); % 按行排列,每一行表示测试样本数据点与训练样本数据点的距离
KNN.knn_index = I(:,1:topK_Neighbors); % 保存测试数据点的 0.02*num_train 个近邻索引
KNN.knn_dis = Dball(:,1:topK_Neighbors); % 保存测试数据点的 0.02*num_train 个近邻的欧式距离
exp_data.KNN = KNN;
save('D:\myprojects\datasets\new\CIFAR10.mat','exp_data', '-v7.3');
disp('数据特征预处理完毕!');
% 功能:数据库特征提取
%% 加载 cifar10 数据
close all;clear;clc
load ('D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_1.mat');
data1=data;
clear data;
load ('D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_2.mat');
data2=data;
clear data;
load ('D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_3.mat');
data3=data;
clear data;
load ('D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_4.mat');
data4=data;
clear data;
load ('D:\myprojects\datasets\old\cifar-10-batches-mat\data_batch_5.mat');
data5=data;
clear data;
load ('D:\myprojects\datasets\old\cifar-10-batches-mat\test_batch.mat');
data6=data;
clear data;
% 图片数据
database = [data1; data2; data3; data4; data5; data6];
numImg = size(database,1); % 统计图片总数目
if exist('D:\myprojects\datasets\old\img')
rmdir('D:\myprojects\datasets\old\img','s');
end
if exist('D:\myprojects\datasets\new\img_train')
rmdir('D:\myprojects\datasets\new\img_train','s');
end
if exist('D:\myprojects\datasets\new\img_test')
rmdir('D:\myprojects\datasets\new\img_test','s');
end
mkdir('D:\myprojects\datasets\old\img'); % 创建img文件夹,用于存放图片
mkdir('D:\myprojects\datasets\new\img_train'); % 创建img_train文件夹,存放训练图片
mkdir('D:\myprojects\datasets\new\img_test'); % 创建img_test 文件夹,存放测试图片
% 遍历处理
for index = 1:numImg
disp (['正在处理第',num2str(index),'张图片...'])
image_r=database(index,1:1024); % 红色分量
image_g=database(index,1025:2048); % 绿色分量
image_b=database(index, 2049:end); % 蓝色分量
image_rer = reshape(image_r, 32, 32); % 红色分量变形成 32*32 大小
image_reg = reshape(image_g, 32, 32); % 绿色分量变形成 32*32 大小
image_reb = reshape(image_b, 32, 32); % 蓝色分量变形成 32*32 大小
% R、G、B三通道融合,变成RGB彩色图片
image(:, :, 1) = image_rer';
image(:, :, 2) = image_reg';
image(:, :, 3) = image_reb';
% 将数据类型转换成:无符号8位整型
image = uint8(image);
% 图像水平翻转(为了与官网图片保持一致)
for k = 1:3
image_tmp(:,:,k) = fliplr(image(:,:,k));
end
% 保存图片
img_name = ['D:\myprojects\datasets\old\img\',num2str(index),'.jpg'];
imwrite(image_tmp,img_name);
end
disp('图片保存完毕,下面进行训练数据与测试数据的分割......');
load('D:\myprojects\datasets\new\CIFAR10.mat');
idx_test = exp_data.index; % 测试图片的索引
idx_train = (1:60000)';
idx_train(idx_test,:) = []; % 训练图片的索引
clear exp_data;
disp('正在移动测试数据集...')
for k = 1:length(idx_test);
test_img = ['D:\myprojects\datasets\old\img\',num2str(idx_test(k)),'.jpg'];
movefile(test_img,'D:\myprojects\datasets\new\img_test');
end
disp('正在移动训练数据集...')
for k = 1:length(idx_train);
train_img = ['D:\myprojects\datasets\old\img\',num2str(idx_train(k)),'.jpg'];
movefile(train_img,'D:\myprojects\datasets\new\img_train');
end
rmdir('D:\myprojects\datasets\old\img');
disp('数据集分割完毕,存放在D:\myprojects\datasets\new文件夹下!')
使用局部敏感哈希(LSH)和K近邻(KNN)算法对CIFAR-10数据集进行分类