偏最小二乘法PLS(matlab自带代码)

  1. 偏最小二乘法(NIPALS经典实现--未简化)
  2. 偏最小二乘法 基本性质推导
  3. 偏最小二乘法(SIMPLS---未简化)

 

前面讲了许多PLS的性质,这里介绍一下PLS的matlab自带的PLS代码,对PLS有一个全面的认识

matlab自带的plsregress函数实现了SIMPLS算法,主要是对X0和Y0的协方差矩阵不断做正交分解

function [Xloadings,Yloadings,Xscores,Yscores, ...
                    beta,pctVar,mse,stats] = plsregress(X,Y,ncomp,varargin)
%PLSREGRESS Partial least squares regression.


....
% Center both predictors and response, and do PLS
%对X,Y进行中心化,在模型中去掉截距项
meanX = mean(X,1);
meanY = mean(Y,1);
X0 = bsxfun(@minus, X, meanX);
Y0 = bsxfun(@minus, Y, meanY);

% 根据输出的需要,返回不同的参数,下面重点关注SIMPLS,
%这是Dejone在九几年提出的,matlab用这个算法来实现PLS
,可见这个算法的意义。
if nargout <= 2
    [Xloadings,Yloadings] = simpls(X0,Y0,ncomp);
    
elseif nargout <= 4
    [Xloadings,Yloadings,Xscores,Yscores] = simpls(X0,Y0,ncomp);
....
end


%------------------------------------------------------------------------------
%SIMPLS Basic SIMPLS.  Performs no error checking.
function [Xloadings,Yloadings,Xscores,Yscores,Weights] = simpls(X0,Y0,ncomp)

[n,dx] = size(X0);
dy = size(Y0,2);

% An orthonormal basis for the span of the X loadings, to make the successive
% deflation X0'*Y0 simple - each new basis vector can be removed from Cov
% separately.

%计算协方差矩阵

Cov = X0'*Y0;
for i = 1:ncomp
    % Find unit length ti=X0*ri and ui=Y0*ci whose covariance, ri'*X0'*Y0*ci, is
    % jointly maximized, subject to ti'*tj=0 for j=1:(i-1).
    %计算协方差的主成分方向   
    [ri,si,ci] = svd(Cov,'econ');
    ri = ri(:,1); ci = ci(:,1); si = si(1);
    %计算得分
    ti = X0*ri;
    normti = norm(ti); ti = ti ./ normti; % ti'*ti == 1
    %计算载荷
    Xloadings(:,i) = X0'*ti;
    
    qi = si*ci/normti; % = Y0'*ti
    Yloadings(:,i) = qi;
    
    if nargout > 2
        Xscores(:,i) = ti;
        Yscores(:,i) = Y0*qi; % = Y0*(Y0'*ti), and proportional to Y0*ci
        if nargout > 4
            Weights(:,i) = ri ./ normti; % rescaled to make ri'*X0'*X0*ri == ti'*ti == 1
        end
    end

    % Update the orthonormal basis with modified Gram Schmidt (more stable),
    % repeated twice (ditto).
    vi = Xloadings(:,i);
    for repeat = 1:2
        for j = 1:i-1
            vj = V(:,j);
            vi = vi - (vj'*vi)*vj;
        end
    end
    vi = vi ./ norm(vi);
    V(:,i) = vi;

    % Deflate Cov, i.e. project onto the ortho-complement of the X loadings.
    % First remove projections along the current basis vector, then remove any
    % component along previous basis vectors that's crept in as noise from
    % previous deflations.
%更新协方差矩阵
    Cov = Cov - vi*(vi'*Cov);
    Vi = V(:,1:i);
    Cov = Cov - Vi*(Vi'*Cov);
end

if nargout > 2
    % By convention, orthogonalize the Y scores w.r.t. the preceding Xscores,
    % i.e. XSCORES'*YSCORES will be lower triangular.  This gives, in effect, only
    % the "new" contribution to the Y scores for each PLS component.  It is also
    % consistent with the PLS-1/PLS-2 algorithms, where the Y scores are computed
    % as linear combinations of a successively-deflated Y0.  Use modified
    % Gram-Schmidt, repeated twice.
    for i = 1:ncomp
        ui = Yscores(:,i);
        for repeat = 1:2
            for j = 1:i-1
                tj = Xscores(:,j);
                ui = ui - (tj'*ui)*tj;
            end
        end
        Yscores(:,i) = ui;
    end
end


%------------------------------------------------------------------------------
%PLSCV Efficient cross-validation for X and Y mean squared error in PLS.
function mse = plscv(X,Y,ncomp,cvp,mcreps,ParOptions)

[n,dx] = size(X);

% Return error for as many components as asked for; some columns may be NaN
% if ncomp is too large for CV.
mse = NaN(2,ncomp+1);

% The CV training sets are smaller than the full data; may not be able to fit as
% many PLS components.  Do the best we can.
if isa(cvp,'cvpartition')
    cvpType = 'partition';
    maxncomp = min(min(cvp.TrainSize)-1,dx);
    nTest = sum(cvp.TestSize);
else
    cvpType = 'Kfold';
%    maxncomp = min(min( floor((n*(cvp-1)/cvp)-1), dx));
    maxncomp = min( floor((n*(cvp-1)/cvp)-1), dx);
    nTest = n;
end
if ncomp > maxncomp
    warning(message('stats:plsregress:MaxComponentsCV', maxncomp));
    ncomp = maxncomp;
end

% Cross-validate sum of squared errors for models with 1:ncomp components,
% simultaneously.  Sum the SSEs over CV sets, and compute the mean squared
% error
CVfun = @(Xtr,Ytr,Xtst,Ytst) sseCV(Xtr,Ytr,Xtst,Ytst,ncomp);
sumsqerr = crossval(CVfun,X,Y,cvpType,cvp,'mcreps',mcreps,'options',ParOptions);
mse(:,1:ncomp+1) = reshape(sum(sumsqerr,1)/(nTest*mcreps), [2,ncomp+1]);


%------------------------------------------------------------------------------
%SSECV Sum of squared errors for cross-validation
function sumsqerr = sseCV(Xtrain,Ytrain,Xtest,Ytest,ncomp)

XmeanTrain = mean(Xtrain);
YmeanTrain = mean(Ytrain);
X0train = bsxfun(@minus, Xtrain, XmeanTrain);
Y0train = bsxfun(@minus, Ytrain, YmeanTrain);

% Get and center the test data
X0test = bsxfun(@minus, Xtest, XmeanTrain);
Y0test = bsxfun(@minus, Ytest, YmeanTrain);

% Fit the full model, models with 1:(ncomp-1) components are nested within
[Xloadings,Yloadings,~,~,Weights] = simpls(X0train,Y0train,ncomp);
XscoresTest = X0test * Weights;

% Return error for as many components as the asked for.
outClass = superiorfloat(Xtrain,Ytrain);
sumsqerr = zeros(2,ncomp+1,outClass); % this will get reshaped to a row by CROSSVAL

% Sum of squared errors for the null model
sumsqerr(1,1) = sum(sum(abs(X0test).^2, 2));
sumsqerr(2,1) = sum(sum(abs(Y0test).^2, 2));

% Compute sum of squared errors for models with 1:ncomp components
for i = 1:ncomp
    X0reconstructed = XscoresTest(:,1:i) * Xloadings(:,1:i)';
    sumsqerr(1,i+1) = sum(sum(abs(X0test - X0reconstructed).^2, 2));

    Y0reconstructed = XscoresTest(:,1:i) * Yloadings(:,1:i)';
    sumsqerr(2,i+1) = sum(sum(abs(Y0test - Y0reconstructed).^2, 2));
end

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值