import pandas as pd
import numpy as np
from sklearn import datasets
import seaborn as sns
iris = datasets.load_iris()
iris.keys()
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
X_train = pd.DataFrame(iris.data,columns=iris.feature_names)
X_train
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 |
1 | 4.9 | 3.0 | 1.4 | 0.2 |
2 | 4.7 | 3.2 | 1.3 | 0.2 |
3 | 4.6 | 3.1 | 1.5 | 0.2 |
4 | 5.0 | 3.6 | 1.4 | 0.2 |
... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 |
146 | 6.3 | 2.5 | 5.0 | 1.9 |
147 | 6.5 | 3.0 | 5.2 | 2.0 |
148 | 6.2 | 3.4 | 5.4 | 2.3 |
149 | 5.9 | 3.0 | 5.1 | 1.8 |
150 rows × 4 columns
X_train.describe()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
---|---|---|---|---|
count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
X_train.mean()
sepal length (cm) 5.843333
sepal width (cm) 3.057333
petal length (cm) 3.758000
petal width (cm) 1.199333
dtype: float64
X_train.corr()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
---|---|---|---|---|
sepal length (cm) | 1.000000 | -0.117570 | 0.871754 | 0.817941 |
sepal width (cm) | -0.117570 | 1.000000 | -0.428440 | -0.366126 |
petal length (cm) | 0.871754 | -0.428440 | 1.000000 | 0.962865 |
petal width (cm) | 0.817941 | -0.366126 | 0.962865 | 1.000000 |
sns.heatmap(X_train.corr(),annot=True,cmap='RdYlBu')
<matplotlib.axes._subplots.AxesSubplot at 0x1f90f0c96c8>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-D0nzJOvd-1588916369209)(output_7_1.png)]
Y_train0=pd.DataFrame(iris.target)==0
Y_train1=pd.DataFrame(iris.target)==1
Y_train2=pd.DataFrame(iris.target)==2
Y_train=pd.concat([Y_train0,Y_train1,Y_train2],axis=1)
Y_train.columns=iris.target_names
Y_train
setosa | versicolor | virginica | |
---|---|---|---|
0 | True | False | False |
1 | True | False | False |
2 | True | False | False |
3 | True | False | False |
4 | True | False | False |
... | ... | ... | ... |
145 | False | False | True |
146 | False | False | True |
147 | False | False | True |
148 | False | False | True |
149 | False | False | True |
150 rows × 3 columns
Train=pd.concat([X_train,Y_train],axis=1)
Train
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | setosa | versicolor | virginica | |
---|---|---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | True | False | False |
1 | 4.9 | 3.0 | 1.4 | 0.2 | True | False | False |
2 | 4.7 | 3.2 | 1.3 | 0.2 | True | False | False |
3 | 4.6 | 3.1 | 1.5 | 0.2 | True | False | False |
4 | 5.0 | 3.6 | 1.4 | 0.2 | True | False | False |
... | ... | ... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | False | False | True |
146 | 6.3 | 2.5 | 5.0 | 1.9 | False | False | True |
147 | 6.5 | 3.0 | 5.2 | 2.0 | False | False | True |
148 | 6.2 | 3.4 | 5.4 | 2.3 | False | False | True |
149 | 5.9 | 3.0 | 5.1 | 1.8 | False | False | True |
150 rows × 7 columns
sns.heatmap(Train.corr(),annot=True,cmap='RdYlBu')
<matplotlib.axes._subplots.AxesSubplot at 0x1f90f027a48>
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-rcUOxtUa-1588916369212)(output_10_1.png)]