"""
Author: Victoria
Created on: 2017.9.15 11:45
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def LDA(X0, X1):
"""
Get the optimal params of LDA model given training data.
Input:
X0: np.array with shape [N1, d]
X1: np.array with shape [N2, d]
Return:
omega: np.array with shape [1, d]. Optimal params of LDA.
"""
#shape [1, d]
mean0 = np.mean(X0, axis=0, keepdims=True)
mean1 = np.mean(X1, axis=0, keepdims=True)
Sw = (X0-mean0).T.dot(X0-mean0) + (X1-mean1).T.dot(X1-mean1)
omega = np.linalg.inv(Sw).dot((mean0-mean1).T)
return omega
if __name__=="__main__":
#read data from xls
work_book = pd.read_csv("../data/watermelon_3a.csv", header=None)
positive_data = work_book.values[work_book.values[:, -1] == 1.0, :]
negative_data = work_book.values[work_book.values[:, -1] == 0.0, :]
print (positive_data)
#LDA
omega = LDA(negative_data[:, 1:-1], positive_data[:, 1:-1])
#plot
plt.plot(positive_data[:, 1], positive_data[:, 2], "bo")
plt.plot(negative_data[:, 1], negative_data[:, 2], "r+")
lda_left = 0
lda_right = -(omega[0]*0.9) / omega[1]
plt.plot([0, 0.9], [lda_left, lda_right], 'g-')
plt.xlabel('density')
plt.ylabel('sugar rate')
plt.title("LDA")
plt.show()

查看完整代码及数据集