导入需要的库
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
data = pd.read_csv('heart.csv')
type(data)
查看相关特征
data.columns
data.shape
data.head()
进行描述性统计
data.describe
data.dtypes
data.info()
查看有无缺失值
data.isnull()
data.isnull().sum()
# 无缺失值
分别查找连续特征和离散特征
continuous_features = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
discrete_features = data.select_dtypes(include=['object']).columns.tolist()
print("连续特征:",continuous_features)
print("离散特征:",discrete_features)
绘制单个连续变量可视化
#绘制单个连续变量的直方图
plt.figure(figsize=(16, 12))
for i, feature in enumerate(continuous_features, 1):
plt.subplot(4, 4, i) # 4行4列的子图
#同理可使用sns.violinplot()绘制小提琴图
# sns.violinplot(data[feature])
# sns.boxplot(data[feature]) # 绘制箱线图
# sns.kdeplot(data[feature], fill=True) # 绘制核密度估计图
# sns.distplot(data[feature], kde=True) # 绘制分布
sns.histplot(data[feature], kde=True, bins=30)
plt.title(f'Histogram of {feature}', fontsize=14)
plt.xlabel(feature)
plt.ylabel('Count')
plt.xticks(rotation=45) # 旋转x轴标签以避免重叠
plt.tight_layout() # 自动调整子图间距
plt.subplots_adjust(
left=0.1, # 左边缘
bottom=0.1, # 底部边缘
right=0.9, # 右边缘
top=0.92, # 顶部边缘
wspace=0.3, # 水平间距
hspace=0.5 # 垂直间距
)
plt.suptitle('Histograms of Continuous Features', fontsize=20, y=0.98, weight='bold') # 设置总标题
plt.savefig('histograms.png', dpi=300, bbox_inches='tight') # 保存图像
plt.show() # 显示图像
绘制特征和标签关系可视化
plt.figure(figsize=(8, 6))
sns.violinplot(x='target', y='cp', data=data)
plt.title('cp vs. target')
plt.xlabel('target')
plt.ylabel('cp')
plt.show()
plt.figure(figsize=(8, 6))
sns.violinplot(x='target', y='restecg', data=data)
plt.title('restecg vs. target')
plt.xlabel('target')
plt.ylabel('restecg')
plt.show()