def fit(self, X, Y):
"""Fit model to data.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of predictors.
Y : array-like, shape = [n_samples, n_targets]
Target vectors, where n_samples is the number of samples and
n_targets is the number of response variables.
"""
# copy since this will contains the residuals (deflated) matrices残差矩阵
check_consistent_length(X, Y)
X = check_array(X, dtype=np.float64, copy=self.copy)
Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
n = X.shape[0]
p = X.shape[1]
q = Y.shape[1]
if self.n_components < 1 or self.n_components > p:
raise ValueError('Invalid number of components: %d' %
self.n_components)
if self.algorithm not in ("svd", "nipals"):
raise ValueError("Got algorithm %s when only 'svd' "
"and 'nipals' are known" % self.algorithm)
if self.algorithm == "svd" and self.mode == "B":
raise ValueError('Incompatible configuration: mode B is not '
'implemented with svd algorithm')
if self.deflation_mode not in ["canonical", "regression"]:
raise ValueError('The deflation mode is unknown')
# Scale (in place)
X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
_center_scale_xy(X, Y, self.scale))
# Residuals (deflated) matrices
Xk = X
Yk = Y
# Results matrices
self.x_scores_ = np.zeros((n, self.n_components))
self.y_scores_ = np.zeros((n, self.n_components))
self.x_weights_ = np.zeros((p, self.n_components))
self.y_weights_ = np.zeros((q, self.n_components))
self.x_loadings_ = np.zeros((p, self.n_components))
self.y_loadings_ = np.zeros((q, self.n_components))
self.n_iter_ = []
# NIPALS algo: outer loop, over components
for k in range(self.n_components):
if np.all(np.dot(Yk.T, Yk) < np.finfo(np.double).eps):
# Yk constant
warnings.warn('Y residual constant at iteration %s' % k)
break
# 1) weights estimation (inner loop)
# -----------------------------------
if self.algorithm == "nipals":
x_weights, y_weights, n_iter_ = \
_nipals_twoblocks_inner_loop(
X=Xk, Y=Yk, mode=self.mode, max_iter=self.max_iter,
tol=self.tol, norm_y_weights=self.norm_y_weights)
self.n_iter_.append(n_iter_)
elif self.algorithm == "svd":
x_weights, y_weights = _svd_cross_product(X=Xk, Y=Yk)
# Forces sign stability of x_weights and y_weights
# Sign undeterminacy issue from svd if algorithm == "svd"
# and from platform dependent computation if algorithm == 'nipals'
x_weights, y_weights = svd_flip(x_weights, y_weights.T)
y_weights = y_weights.T
# compute scores
x_scores = np.dot(Xk, x_weights)
if self.norm_y_weights:
y_ss = 1
else:
y_ss = np.dot(y_weights.T, y_weights)
y_scores = np.dot(Yk, y_weights) / y_ss
# test for null variance
if np.dot(x_scores.T, x_scores) < np.finfo(np.double).eps:
warnings.warn('X scores are null at iteration %s' % k)
break
# 2) Deflation (in place)
# ----------------------
# Possible memory footprint reduction may done here: in order to
# avoid the allocation of a data chunk for the rank-one
# approximations matrix which is then subtracted to Xk, we suggest
# to perform a column-wise deflation.
#
# - regress Xk's on x_score
x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
# - subtract rank-one approximations to obtain remainder matrix
Xk -= np.dot(x_scores, x_loadings.T)
if self.deflation_mode == "canonical":
# - regress Yk's on y_score, then subtract rank-one approx.
y_loadings = (np.dot(Yk.T, y_scores)
/ np.dot(y_scores.T, y_scores))
Yk -= np.dot(y_scores, y_loadings.T)
if self.deflation_mode == "regression":
# - regress Yk's on x_score, then subtract rank-one approx.
y_loadings = (np.dot(Yk.T, x_scores)
/ np.dot(x_scores.T, x_scores))
Yk -= np.dot(x_scores, y_loadings.T)
# 3) Store weights, scores and loadings # Notation:
self.x_scores_[:, k] = x_scores.ravel() # T
self.y_scores_[:, k] = y_scores.ravel() # U
self.x_weights_[:, k] = x_weights.ravel() # W
self.y_weights_[:, k] = y_weights.ravel() # C
self.x_loadings_[:, k] = x_loadings.ravel() # P
self.y_loadings_[:, k] = y_loadings.ravel() # Q
# Such that: X = TP' + Err and Y = UQ' + Err
# 4) rotations from input space to transformed space (scores)
# T = X W(P'W)^-1 = XW* (W* : p x k matrix)
# U = Y C(Q'C)^-1 = YC* (W* : q x k matrix)
self.x_rotations_ = np.dot(
self.x_weights_,
pinv2(np.dot(self.x_loadings_.T, self.x_weights_),
check_finite=False))
if Y.shape[1] > 1:
self.y_rotations_ = np.dot(
self.y_weights_,
pinv2(np.dot(self.y_loadings_.T, self.y_weights_),
check_finite=False))
else:
self.y_rotations_ = np.ones(1)
if True or self.deflation_mode == "regression":
# FIXME what's with the if?
# Estimate regression coefficient
# Regress Y on T
# Y = TQ' + Err,
# Then express in function of X
# Y = X W(P'W)^-1Q' + Err = XB + Err
# => B = W*Q' (p x q)
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
self.coef_ = self.coef_ * self.y_std_
return self
pls
最新推荐文章于 2025-04-30 10:02:30 发布