# -*- coding: utf-8 -*-
"""
Created on Tue Dec 17 17:08:33 2024
@author: cetire718
"""
import numpy as np
from sklearn.decomposition import PCA
# 1. 生成随机数据 (10, 5)
np.random.seed(42)
data = np.random.rand(10, 5)
# 2. 对整个数据集进行PCA降维到 1 维
pca_full = PCA(n_components=1)
data_pca_full = pca_full.fit_transform(data)
# array([[ 0.56141715],
# [ 0.01600634],
# [ 0.88469781],
# [ 0.17512929],
# [-0.25376338],
# [-0.09108191],
# [-0.7391968 ],
# [-0.44240949],
# [-0.09652537],
# [-0.01427363]])
# 3. 对前 4 行数据进行PCA降维到 1 维
data_4 = data[:4] # 提取前8行
pca_partial = PCA(n_components=1)
data_pca_partial = pca_partial.fit_transform(data_4)
# array([[ 0.39286218],
# [-0.64185432],
# [ 0.45828384],
# [-0.20929171]])
# 4. 比较两者结果
# 提取 data_pca_full 的前 4 行
data_pca_full_4 = data_pca_full[:4]
# 5. 计算两者的均方误差 (MSE)
mse = np.mean((data_pca_full_4 - data_pca_partial) ** 2)
print(f"均方误差 (MSE): {mse:e}")
# 均方误差 (MSE): 1.976999e-01
也是因为其计算方法
实际上,对一个矩阵无论重复多少次计算结果是一样的
而且如果矩阵切割了一部分,其结果会完全不一样