1.特征相关性分析
# 导入必要模块
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns # 一个简单的画图函数
color = sns.color_palette()
%matplotlib inline
# 利用pandas读取数据
dpath = 'F:/Python_demo/XGBoost/data/'
train = pd.read_csv(dpath + "AllstateClaimsSeverity_train.csv")
print(train.head(5)) # 显示前5行数据
print(train.info()) # 显示数据相关信息
split = 117 # 分割位置
size = 15 # 总共提取的列数
data=train.iloc[:,split:] # 提取列数据
cols=data.columns #