论文PCANet: A Simple Deep Learning Baseline for Image Classification?的matlab源码解读（三）

最新推荐文章于 2022-01-10 16:01:07 发布

原创最新推荐文章于 2022-01-10 16:01:07 发布 · 1.4k 阅读

8 ·

CC 4.0 BY-SA版权

文章标签：

#matlab #深度学习

深度学习专栏收录该内容

10 篇文章

订阅专栏

本文介绍了一种基于PCANet的哈希编码方法，并详细解释了如何通过该方法进行二值化哈希编码及随后的局部直方图计算。通过不同层级的滤波器输出，结合权重分配与量化处理，最终实现图像特征的哈希表示。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

<span style="font-family:Times New Roman;font-size:18px;"><span style="font-family:Times New Roman;font-size:14px;"><span style="font-family: Arial, Helvetica, sans-serif;">function [f BlkIdx] = HashingHist(PCANet,ImgIdx,OutImg)</span></span></span>

<span style="font-family:Times New Roman;font-size:18px;"><span style="font-family:Times New Roman;font-size:14px;">% Output layer of PCANet (Hashing plus local histogram)
% ========= INPUT ============
% PCANet  PCANet parameters (struct)
%       .PCANet.NumStages      
%           the number of stages in PCANet; e.g., 2  
%       .PatchSize
%           the patch size (filter size) for square patches; e.g., [5 3]
%           means patch size equalt to 5 and 3 in the first stage and second stage, respectively 
%       .NumFilters
%           the number of filters in each stage; e.g., [16 8] means 16 and
%           8 filters in the first stage and second stage, respectively
%       .HistBlockSize 
%           the size of each block for local histogram; e.g., [10 10]
%       .BlkOverLapRatio 
%           overlapped block region ratio; e.g., 0 means no overlapped 
%           between blocks, and 0.3 means 30% of blocksize is overlapped 
%       .Pyramid
%           spatial pyramid matching; e.g., [1 2 4], and [] if no Pyramid
%           is applied
% ImgIdx  Image index for OutImg (column vector) 
% OutImg  PCA filter output before the last stage (cell structure) 这里的OutImg为
% ========= OUTPUT ===========
% f       PCANet features (each column corresponds to feature of each image)
% BlkIdx  index of local block from which the histogram is compuated
% ============================
addpath('./Utils')


NumImg = max(ImgIdx);% 求每一列的最大值，相当于求总共有多少输入图片，这里的输入图片数目是最后一层的输入，不是初始的图片样本数
f = cell(NumImg,1);% 这里的NUmImg=L2*L1*NumImg
% 这部分就是进行二值化哈希编码时候的权重，权重设置为 2 的 i 次幂。
% PCANet.NumFilters(end)-1 对应论文中的2^(l-1),l=1...L2
% ((PCANet.NumFilters(end)-1):-1:0)就是以-1为间隔取到0
map_weights = 2.^((PCANet.NumFilters(end)-1):-1:0); % weights for binary to decimal conversion 权重从二进制转换到十进制 


for Idx = 1:NumImg %外层循环对每一幅图片进行
    % ImgIdx大小为（NumFilters*NumImg）*1，是一个列向量；每个输入图片都有L1*L2个索引
    Idx_span = find(ImgIdx == Idx);%分别找到每一幅样本图片的索引1~N
    % find函数返回ImgIdx矩阵中为Idx元素的索引，索引跨度为L1*L2
    NumOs = length(Idx_span)/PCANet.NumFilters(end); % the number of "O"s ；这里是N个图形经一层PCA的L1个滤波器后的输出个数
    Bhist = cell(NumOs,1);% 对矩阵T,将其分成B块，得到的分块矩阵大小为 （k1k2）* B，NumOs = L1
    
    for i = 1:NumOs % 内层循环第一阶段的滤波器L1，对应论文中的For each of the L1 images T_li;l = 1...L1, we partition it into B blocks
        
        T = 0;
        ImgSize = size(OutImg{Idx_span(PCANet.NumFilters(end)*(i-1) + 1)});%此处的size应该还是m*n
        % 这里需要此次循环的样本图片在第i个L1中的滤波器输出开始的索引号，Idx_span跨度为L1*L2
        for j = 1:PCANet.NumFilters(end)
        % 第三层循环L2层滤波器
            T = T + map_weights(j)*Heaviside(OutImg{Idx_span(PCANet.NumFilters(end)*(i-1)+j)}); 
            % weighted combination; hashing codes to decimal number conversion
            % heaviside(x) has the value 0 for x < 0, 1 for x > 0, and 0.5 for x == 0. 
            
            OutImg{Idx_span(PCANet.NumFilters(end)*(i-1)+j)} = [];%清空缓存
        end
        
        
        if isempty(PCANet.HistBlockSize)%判断HistBlockSize是否被赋值，如果有赋值返回0
            NumBlk = ceil((PCANet.ImgBlkRatio - 1)./PCANet.BlkOverLapRatio) + 1;
            HistBlockSize = ceil(size(T)./PCANet.ImgBlkRatio);
            OverLapinPixel = ceil((size(T) - HistBlockSize)./(NumBlk - 1));
            NImgSize = (NumBlk-1).*OverLapinPixel + HistBlockSize;
            Tmp = zeros(NImgSize);
            Tmp(1:size(T,1), 1:size(T,2)) = T;
            Bhist{i} = sparse(histc(im2col_general(Tmp,HistBlockSize,...
            OverLapinPixel),(0:2^PCANet.NumFilters(end)-1)')); 
        else %当HistBlockSize被赋值时执行
            
            stride = round((1-PCANet.BlkOverLapRatio)*PCANet.HistBlockSize); %round（）四舍五入,stride为X和y轴的间隔
            blkwise_fea = sparse(histc(im2col_general(T,PCANet.HistBlockSize,...
              stride),(0:2^PCANet.NumFilters(end)-1)')); 
            % calculate histogram for each local block in "T"
            % histc函数制定数值边界为分界条件，这里是2^L2-1为分界
            % S = sparse(A) 将矩阵A转化为稀疏矩阵形式，即矩阵A中任何0元素被去除，非零元素及其下标组成矩阵S。
            % 这里sparse的作用是计算在图片的编码值在每个区间的个数
			 
           if ~isempty(PCANet.Pyramid)
                x_start = ceil(PCANet.HistBlockSize(2)/2);
                y_start = ceil(PCANet.HistBlockSize(1)/2);
                x_end = floor(ImgSize(2) - PCANet.HistBlockSize(2)/2);
                y_end = floor(ImgSize(1) - PCANet.HistBlockSize(1)/2);
                
                sam_coordinate = [...
                    kron(x_start:stride:x_end,ones(1,length(y_start:stride: y_end))); 
                    kron(ones(1,length(x_start:stride:x_end)),y_start:stride: y_end)];               
                
                blkwise_fea = spp(blkwise_fea, sam_coordinate, ImgSize, PCANet.Pyramid)';
                %spp()函数对灰度值进行二进制编码
           else% PCANet.Pyramid未设置时执行
                blkwise_fea = bsxfun(@times, blkwise_fea, ...
                    2^PCANet.NumFilters(end)./sum(blkwise_fea)); 
           end
           
           Bhist{i} = blkwise_fea;
        end
        
    end           
    f{Idx} = vec([Bhist{:}]');
    
    if ~isempty(PCANet.Pyramid)
        f{Idx} = sparse(f{Idx}/norm(f{Idx}));
    end
end
f = [f{:}];

if ~isempty(PCANet.Pyramid)
    BlkIdx = kron((1:size(Bhist{1},1))',ones(length(Bhist)*size(Bhist{1},2),1));
else
    BlkIdx = kron(ones(NumOs,1),kron((1:size(Bhist{1},2))',ones(size(Bhist{1},1),1)));
end

%-------------------------------
function X = Heaviside(X) % binary quantization
X = sign(X);
X(X<=0) = 0;

function x = vec(X) % vectorization
x = X(:);


function beta = spp(blkwise_fea, sam_coordinate, ImgSize, pyramid)

[dSize, ~] = size(blkwise_fea);

img_width = ImgSize(2);
img_height = ImgSize(1);

% spatial levels
pyramid_Levels = length(pyramid);
pyramid_Bins = pyramid.^2;
tBins = sum(pyramid_Bins);

beta = zeros(dSize, tBins);
cnt = 0;

for i1 = 1:pyramid_Levels,
    
    Num_Bins = pyramid_Bins(i1);
    
    wUnit = img_width / pyramid(i1);
    hUnit = img_height / pyramid(i1);
    
    % find to which spatial bin each local descriptor belongs
    xBin = ceil(sam_coordinate(1,:) / wUnit);
    yBin = ceil(sam_coordinate(2,:) / hUnit);
    idxBin = (yBin - 1)*pyramid(i1) + xBin;
    
    for i2 = 1:Num_Bins,     
        cnt = cnt + 1;
        sidxBin = find(idxBin == i2);
        if isempty(sidxBin),
            continue;
        end      
        beta(:, cnt) = max(blkwise_fea(:, sidxBin), [], 2);
    end
end</span></span>

此函数实现哈希编码并画成直方图