总体代码:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
# 避免中文乱码
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
def fang_bar(x, y, ylabel, title):
""" 封装通用柱状图绘制逻辑 """
plt.figure(figsize=(8,5)) # 统一画布尺寸
colors = ['#FF1493','#800080','#191970','#FF4500','#00CED1'] # 扩展颜色列表
# 绘制条形图,自动循环颜色
bars = plt.bar(x, y, color=colors[:len(x)], alpha=0.8)
plt.xlabel("地区")
plt.ylabel(ylabel)
plt.title(title)
# 动态设置Y轴范围
ymax = max(y) * 1.2
plt.ylim(0, ymax if ymax > 0 else 100)
# 添加数据标签
for bar in bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width()/2., height,
f'{
height:.2f}', ha='center', va='bottom')
plt.show()
def clean_data():
""" 封装数据清洗逻辑 """
data = pd.read_csv("第一次清洗后的数据.csv")
# 处理单价列(保留原始需求)
data['单价'] = data['单价'].str.replace("[,元/平]", "", regex=True).astype(int)
# 新增:处理总价列(假设单位为万元)
data['总价'] = data['总价'].str.replace("万", "").astype(float)
# 筛选总价区间(100-120万)
data = data[(data['总价'] >= 100) & (data['总价'] <= 120)]
# 处理面积列(假设单位为平米)
data['面积'] = data['面积'].str.replace("平米", "")