文章内容:使用鸢尾花数据,将sklearn自带的iris从字典dict格式转化为dataframe格式,用平行坐标图进行可视化,由图认为有必要做PCA和LDA,利用PCA和LDA的原理自编函数实现降维分析,分别绘制图像
import pandas as pd import matplotlib.pyplot as plt import matplotlib.mlab as mlab import warnings import numpy as np import seaborn as sns from sklearn.model_selection import train_test_split from sklearn import datasets from pandas.plotting import parallel_coordinates plt.rcParams['font.sans-serif'] = ['SimHei'] # 绘图时可以显示中文 plt.rcParams['axes.unicode_minus']=False # 绘图时显示负号 warnings.filterwarnings("ignore") # 不要显示警告
1)read data
In [220]:
iris_data = datasets.load_iris()
2) dict 转 dataframe
In [221]:
iris0 = pd.DataFrame(data= np.c_[iris_data['data'], iris_data['target']], columns = iris_data['feature_names'] + ['target'])
3)target 中 0,1,2 转字符
In [222]:
iris = iris0 species = pd.Series(iris.target) species = species.replace(0, 'setosa') species = species.replace(1, 'versicolor') species = species.replace(2, 'virginica') iris['target'] = species
4) basic infomation
In [223]:
iris.sample(6)
Out[223]:
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | target | |
---|---|---|---|---|---|
64 | 5.6 | 2.9 | 3.6 | 1.3 | versicolor |
32 | 5.2 | 4.1 | 1.5 | 0.1 | setosa |
144 | 6.7 | 3.3 | 5.7 | 2.5 | virginica |
86 | 6.7 | 3.1 | 4.7 | 1.5 | versicolor |
69 | 5.6 | 2.5 | 3.9 | 1.1 | versicolor |
46 | 5.1 | 3.8 | 1.6 | 0.2 | setosa |
In [224]:
iris.shape
O