LDA代码
理论部分看https://blog.youkuaiyun.com/jirong5206/article/details/105825057
相对于 PCA降维 LDA是有监督的即算法中是有labels参与的,体现在用labels分类取各自mean,而前一篇中的PCAlabels只用来染色区分而已
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class LDA: #py中类名约定大写字母开头
# 属性
fileName =""
labels = []
features = []
featureMean = []
Sw = np.zeros(shape=[13,13])
Sb = np.zeros(shape=[13,13])
Coord = []
feature_test=[]
def __init__(self,filename = "wine.txt"):
self.fileName = filename
self.labels = []
self.features = []
self.featureMean = []
'''***************************************************************
* @Fun_Name : def getSample(self):
* @Function : 得到样本数据
* @Parameter :
* @Return :
* @Creed : Talk is cheap , show me the code
***********************xieqinyu creates in 21:19 2020/5/23***'''
def getSample(self):
dataSet = pd.read_csv(self.fileName,header = None).values #将字典转化成列表
self.features.append(dataSet[0:59,1:14]) #第一类
self.features.append(dataSet[59:130, 1:14]) #第二类
self.features.append(dataSet[130:178, 1:14]) #第三类
'''***************************************************************
* @Fun_Name : def getSw(self):
* @Function : 得到类内离差阵
* @Parameter :
* @Return :
* @Creed : Talk is cheap , show me the code
***********************xieqinyu creates in 21:20 2020/5/23***'''
def getSw(self):
featureDeal = []
for i in range(len(self.features)):
self.featureMean.append(np.mean(self.features[i],axis=0))
featureDeal.append(np.mat(self.features[i]-self.featureMean[i])) #去均值
self.Sw += np.mat(featureDeal[i]).T*np.mat(featureDeal[i])
self.Sw /= 178
print(type(self.Sw))
# print(self.Sw)
'''***************************************************************
* @Fun_Name : def getSb(self):
* @Function : 得到类间离差阵
* @Parameter :
* @Return :
* @Creed : Talk is cheap , show me the code
***********************xieqinyu creates in 15:57 2020/5/24***'''
def getSb(self):
allFeatureMean = (59/178)*self.featureMean[0]+(71/178)*self.featureMean[1]+(48/178)*self.featureMean[2]
self.Sb = (59 / 178) * (np.mat(self.featureMean[0] - allFeatureMean).T * np.mat(self.featureMean[0] - allFeatureMean)) \
+ (71 / 178) * (np.mat(self.featureMean[1] - allFeatureMean).T * np.mat(self.featureMean[1] - allFeatureMean)) \
+ (48 / 178) * (np.mat(self.featureMean[2] - allFeatureMean).T * np.mat(self.featureMean[2] - allFeatureMean))
print(type(self.Sb))
'''***************************************************************
* @Fun_Name : def getU(self,n):
* @Function : 得到投影阵 这边降到二维
* @Parameter : 我print过特征值 第1 2 特征值为前二大 这边只要了U的方向所以对模值没做处理
* @Return :
* @Creed : Talk is cheap , show me the code
***********************xieqinyu creates in 15:58 2020/5/24***'''
def getU(self):
featureValues, featureVector = np.linalg.eig((np.mat(self.Sw).I*np.mat(self.Sb)))
print(featureValues)
# print(featureVector)
return featureVector[:, 1:3]
def getCoord(self,Vector):
# print(np.mat(self.features).shape,Vector.shape)
for i in range(len(self.features)):
self.Coord.append(np.mat(self.features[i]) * np.mat(Vector))
lda = LDA()
lda.getSample()
lda.getSw()
lda.getSb()
Vector = lda.getU(2)
lda.getCoord(Vector)
plt.scatter(lda.Coord[0][:,0].tolist(),lda.Coord[0][:,1].tolist(),color = "b")
plt.scatter(lda.Coord[1][:,0].tolist(),lda.Coord[1][:,1].tolist(),color = "r")
plt.scatter(lda.Coord[2][:,0].tolist(),lda.Coord[2][:,1].tolist(),color = "g")
plt.show()
效果: