Day09 热力图和子图的绘制

最新推荐文章于 2025-12-05 17:02:52 发布

原创最新推荐文章于 2025-12-05 17:02:52 发布 · 211 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python #开发语言

数据的预处理

结合前几天对数据与处理的相关知识，对data.csv数据进行完整的处理：

import pandas as pd
data = pd.read_csv('data.csv')
#查看详细信息
print(data.info)
#查看数据前五行
print(data.head(5))
#查看数据中列的信息
print(data.columns)
print(data['Years in current job'].value_counts())
print(data['Home Ownership'].value_counts())
#标签编码
mapping = {
    'Years in current job':{
            '10+ years':10,
            '2 years':2,
            '3 years':3,
            '< 1 year':0,
            '5 years':5,
            '1 year':1,
            '4 years':4,
            '6 years':6,
            '7 years':7,
            '8 years':8,
            '9 years':9,
    },
    'Home Ownership':{
        'Home Mortgage':0,
        'Rent':1,
        'Own Home':2,
        'Have Mortgage':3,
    }
}
data['Years in current job'] = data['Years in current job'].map(mapping['Years in current job'])
data['Home Ownership'] = data['Home Ownership'].map(mapping['Home Ownership'])
print(data['Home Ownership'].head(5))
print(data['Years in current job'].head(5))

热力图的绘制

discrete_features = []

for i in data.columns:
    if data[i].dtype == 'object':
        discrete_features.append(i)

print(discrete_features)
continuous_features = [items for items in data.columns if items not in discrete_features]
print(continuous_features)

# 计算相关系数矩阵
correlation_matrix = data[continuous_features].corr()
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300

# 绘制热力图
plt.figure(figsize=(6,4))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Heatmap of Continuous Features')
plt.show()

子图的绘制

#子图的绘制
# 定义要绘制的特征
features = ['Annual Income', 'Years in current job', 'Tax Liens', 'Number of Open Accounts']

# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300

# 创建一个包含 2 行 2 列的子图布局
fig, axes = plt.subplots(2, 2, figsize=(6,4))

# 使用 for 循环遍历特征
for i in range(len(features)):
    row = i // 2 # 计算当前特征在子图中的行索引，// 是整除，即取整 ，之所以用整除是因为我们要的是行数
    # 例如 0//2=0, 1//2=0, 2//2=1, 3//2=1
    col = i % 2 # 计算当前特征在子图中的列索引，% 是取余，即取模
    # 例如 0%2=0, 1%2=1, 2%2=0, 3%2=1
    # 绘制箱线图
    feature = features[i]
    axes[row, col].boxplot(data[feature].dropna())
    axes[row, col].set_title(f'Boxplot of {feature}')
    axes[row, col].set_ylabel(feature)

# 调整子图之间的间距
plt.tight_layout()

# 显示图形
plt.show()

@浙大疏锦行