Python:Generalized Discriminant Analysis (GDA) 手工代码实现 Kernel LDA_DeniuHe的博客-优快云博客
上次写的代码有问题。
讲的挺好,但是没讲M是怎么计算出来的。废了好多时间才搞明白。
国内网站还没有找到像样的KDA(GDA)代码。
只能自己写一个了。
'''
Deniu He
2023-04-04
'''
import numpy as np
from sklearn.datasets import load_iris
from sklearn import datasets
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics.pairwise import pairwise_kernels
import matplotlib.pyplot as plt
from numpy import linalg as LA
class KDA():
def __init__(self, X, y, gamma, n_components):
self.X = X
self.y = y
self.gamma = gamma
self.n_components = n_components
self.nSample, self.nDim = self.X.shape
self.total_index = np.arange(self.nSample)
self.labels = np.unique(self.y)
self.nClass = len(self.labels)
self.total_mean = np.mean(self.X, axis=0)
self.class_size = []
self.class_mean = self.get_Mean()
# self.KM = pairwise_kernels(X=X,Y=X, metric='rbf')
self.KM = rbf_kernel(X=X, gamma=self.gamma)
self.N = self.get_N()
self.H = self.get_H()
epsilon = 1e-08
eig_val, eig_vec = LA.eigh(LA.inv(self.N + epsilon * np.eye(self.nSample))@self.H)
idx = eig_val.argsort()[::-1] # sort eigenvalues in descending order (largest eigenvalue first)
eig_val = eig_val[idx]
eig_vec = eig_vec[:,idx]
if self.n_components is not None:
Theta = eig_vec[:,:self.n_components]
else:
Theta = eig_vec[:,:self.nClass-1]
self.Theta = Theta
print(self.Theta.shape)
# --------------Transform
def transform(self, X):
# Kernel_Tran_input = pairwise_kernels(X=self.X, Y=X, metric='rbf')
Kernel_Tran_input = rbf_kernel(X=self.X, Y=X, gamma=self.gamma)
X_transform = self.Theta.T @ Kernel_Tran_input
return X_transform.T
def get_Mean(self):
Mean = np.zeros((self.nClass, self.nDim))
for i, lab in enumerate(self.labels):
idx_list = np.where(self.y == lab)[0]
Mean[i] = np.mean(self.X[idx_list], axis=0)
self.class_size.append(len(idx_list))
return Mean
def get_H(self):
H = np.zeros((self.nSample, self.nSample))
M_star = self.KM.sum(axis=1)
M_star = M_star.reshape((-1,1))
M_star = (1 / self.nSample) * M_star
for i, lab in enumerate(self.labels):
idx_list = np.where(self.y==lab)[0]
Ki = self.KM[np.ix_(self.total_index, idx_list)]
M_c = Ki.sum(axis=1)
M_c = M_c.reshape((-1, 1))
M_c = (1 / self.class_size[i]) * M_c
H += self.class_size[i] * (M_c - M_star) @ (M_c - M_star).T
return H
def get_N(self):
N = np.zeros((self.nSample, self.nSample))
for i, lab in enumerate(self.labels):
idx_list = np.where(self.y == lab)[0]
# Kq = rbf_kernel(X=self.X, Y=self.X[idx_list],gamma=self.gamma)
Ki = self.KM[np.ix_(self.total_index, idx_list)]
N += Ki @ (np.eye(self.class_size[i]) - np.ones(self.class_size[i])*(1/self.class_size[i])) @ Ki.T
N += np.eye(self.nSample) * 1e-8
return N
# X, y = load_iris(return_X_y=True)
X, y = datasets.make_circles(n_samples=200, shuffle=True, noise=0.01, random_state=42)
plt.scatter(X[:,0], X[:,1], c=y)
plt.show()
kda = KDA(X=X,y=y, gamma=0.001, n_components=2)
# print(kda.N.shape)
X_transform = kda.transform(X=X)
print(X_transform.shape)
plt.scatter(X_transform[:,0], X_transform[:,1], c=y)
plt.show()
转换后变成线性可分数据: