pls

最新推荐文章于 2022-05-11 19:35:36 发布
齐天大圣徐
最新推荐文章于 2022-05-11 19:35:36 发布
阅读量2.9k
点赞数
文章标签： python sklearn
 def fit(self, X, Y):
    """Fit model to data.
    Parameters
    ----------
    X : array-like, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of predictors.
    Y : array-like, shape = [n_samples, n_targets]
        Target vectors, where n_samples is the number of samples and
        n_targets is the number of response variables.
    """

    # copy since this will contains the residuals (deflated) matrices残差矩阵
    check_consistent_length(X, Y)
    X = check_array(X, dtype=np.float64, copy=self.copy)
    Y = check_array(Y, dtype=np.float64, copy=self.copy, ensure_2d=False)
    if Y.ndim == 1:
        Y = Y.reshape(-1, 1)

    n = X.shape[0]
    p = X.shape[1]
    q = Y.shape[1]

    if self.n_components < 1 or self.n_components > p:
        raise ValueError('Invalid number of components: %d' %
                         self.n_components)
    if self.algorithm not in ("svd", "nipals"):
        raise ValueError("Got algorithm %s when only 'svd' "
                         "and 'nipals' are known" % self.algorithm)
    if self.algorithm == "svd" and self.mode == "B":
        raise ValueError('Incompatible configuration: mode B is not '
                         'implemented with svd algorithm')
    if self.deflation_mode not in ["canonical", "regression"]:
        raise ValueError('The deflation mode is unknown')
    # Scale (in place)
    X, Y, self.x_mean_, self.y_mean_, self.x_std_, self.y_std_ = (
        _center_scale_xy(X, Y, self.scale))
    # Residuals (deflated) matrices
    Xk = X
    Yk = Y
    # Results matrices
    self.x_scores_ = np.zeros((n, self.n_components))
    self.y_scores_ = np.zeros((n, self.n_components))
    self.x_weights_ = np.zeros((p, self.n_components))
    self.y_weights_ = np.zeros((q, self.n_components))
    self.x_loadings_ = np.zeros((p, self.n_components))
    self.y_loadings_ = np.zeros((q, self.n_components))
    self.n_iter_ = []

    # NIPALS algo: outer loop, over components
    for k in range(self.n_components):
        if np.all(np.dot(Yk.T, Yk) < np.finfo(np.double).eps):
            # Yk constant
            warnings.warn('Y residual constant at iteration %s' % k)
            break
        # 1) weights estimation (inner loop)
        # -----------------------------------
        if self.algorithm == "nipals":
            x_weights, y_weights, n_iter_ = \
                _nipals_twoblocks_inner_loop(
                    X=Xk, Y=Yk, mode=self.mode, max_iter=self.max_iter,
                    tol=self.tol, norm_y_weights=self.norm_y_weights)
            self.n_iter_.append(n_iter_)
        elif self.algorithm == "svd":
            x_weights, y_weights = _svd_cross_product(X=Xk, Y=Yk)
        # Forces sign stability of x_weights and y_weights
        # Sign undeterminacy issue from svd if algorithm == "svd"
        # and from platform dependent computation if algorithm == 'nipals'
        x_weights, y_weights = svd_flip(x_weights, y_weights.T)
        y_weights = y_weights.T
        # compute scores
        x_scores = np.dot(Xk, x_weights)
        if self.norm_y_weights:
            y_ss = 1
        else:
            y_ss = np.dot(y_weights.T, y_weights)
        y_scores = np.dot(Yk, y_weights) / y_ss
        # test for null variance
        if np.dot(x_scores.T, x_scores) < np.finfo(np.double).eps:
            warnings.warn('X scores are null at iteration %s' % k)
            break
        # 2) Deflation (in place)
        # ----------------------
        # Possible memory footprint reduction may done here: in order to
        # avoid the allocation of a data chunk for the rank-one
        # approximations matrix which is then subtracted to Xk, we suggest
        # to perform a column-wise deflation.
        #
        # - regress Xk's on x_score
        x_loadings = np.dot(Xk.T, x_scores) / np.dot(x_scores.T, x_scores)
        # - subtract rank-one approximations to obtain remainder matrix
        Xk -= np.dot(x_scores, x_loadings.T)
        if self.deflation_mode == "canonical":
            # - regress Yk's on y_score, then subtract rank-one approx.
            y_loadings = (np.dot(Yk.T, y_scores)
                          / np.dot(y_scores.T, y_scores))
            Yk -= np.dot(y_scores, y_loadings.T)
        if self.deflation_mode == "regression":
            # - regress Yk's on x_score, then subtract rank-one approx.
            y_loadings = (np.dot(Yk.T, x_scores)
                          / np.dot(x_scores.T, x_scores))
            Yk -= np.dot(x_scores, y_loadings.T)
        # 3) Store weights, scores and loadings # Notation:
        self.x_scores_[:, k] = x_scores.ravel()  # T
        self.y_scores_[:, k] = y_scores.ravel()  # U
        self.x_weights_[:, k] = x_weights.ravel()  # W
        self.y_weights_[:, k] = y_weights.ravel()  # C
        self.x_loadings_[:, k] = x_loadings.ravel()  # P
        self.y_loadings_[:, k] = y_loadings.ravel()  # Q
    # Such that: X = TP' + Err and Y = UQ' + Err

    # 4) rotations from input space to transformed space (scores)
    # T = X W(P'W)^-1 = XW* (W* : p x k matrix)
    # U = Y C(Q'C)^-1 = YC* (W* : q x k matrix)
    self.x_rotations_ = np.dot(
        self.x_weights_,
        pinv2(np.dot(self.x_loadings_.T, self.x_weights_),
              check_finite=False))
    if Y.shape[1] > 1:
        self.y_rotations_ = np.dot(
            self.y_weights_,
            pinv2(np.dot(self.y_loadings_.T, self.y_weights_),
                  check_finite=False))
    else:
        self.y_rotations_ = np.ones(1)

    if True or self.deflation_mode == "regression":
        # FIXME what's with the if?
        # Estimate regression coefficient
        # Regress Y on T
        # Y = TQ' + Err,
        # Then express in function of X
        # Y = X W(P'W)^-1Q' + Err = XB + Err
        # => B = W*Q' (p x q)
        self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
        self.coef_ = self.coef_ * self.y_std_
    return self