import os.path
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import datetime
# 设置中文字体,这里以微软雅黑为例
matplotlib.rcParams['font.family'] = 'Microsoft YaHei'
matplotlib.rcParams['font.size'] = 10 # 字体大小
matplotlib.rcParams['axes.unicode_minus'] = False # 正常显示负号
date_now = (datetime.datetime.now() + datetime.timedelta(days=-1)).strftime('%Y%m%d')
def plot_double_y(x, y1, y2, title, save_png_nm):
# 创建图和第一个y轴
fig, ax1 = plt.subplots(figsize=(14, 6))
# Adding a grid for better readability
plt.grid(True, which="both", ls="--", linewidth=0.5)
plt.xticks(rotation=45) # 旋转x轴标签,以便更容易阅读
# plt.tight_layout() # 调整布局以适应标签和标题
# 设置第一个y轴的颜色
ax1.set_xlabel('日期')
ax1.set_ylabel('套数', color='tab:red')
ax1.plot(x, y1, color='tab:red', label='套数')
ax1.tick_params(axis='y', labelcolor='tab:red')
# 创建共享x轴的第二个y轴
ax2 = ax1.twinx()
ax2.set_ylabel('均价', color='tab:blue')
ax2.plot(x, y2, color='tab:blue', label='均价')
ax2.tick_params(axis='y', labelcolor='tab:blue')
plt.title(title)
# 合并图例
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(handles1 + handles2, labels1 + labels2)
for app in ["办公楼", "住宅", "商业", "其他"]:
if app in save_png_nm:
prefix = app
if '可售套数' in save_png_nm:
path = r'D:\data\sz\hourse_%s\%s\available' % (date_now,prefix)
elif '成交套数' in save_png_nm:
path = r'D:\data\sz\hourse_%s\%s\saled' % (date_now,prefix)
if not os.path.exists(path): os.makedirs(path)
plt.savefig(os.path.join(path, save_png_nm), dpi=300)
# 显示图表
# plt.show()
def plot_mul_data(x, y_dic, kwargs):
"""
:param x: 横坐标数据
:param y_dic: 多组y坐标数据{'label1':[y1,y2...],'label2':[y,...]}
:return: 多组y数据共享一组x坐标,显示在一张图上面
"""
label = list(y_dic.keys())
y = list(y_dic.values())
# Plotting the line chart for each series
plt.figure(figsize=(14, 6)) # You can adjust the figure size as needed
markers = ['o', 's', '^', 'D', 'x', '*', 'p', 'h', 'H', 'd', '_', '1', 'v'][:len(label)]
for label, score, mark in zip(label, y, markers):
plt.plot(x, score, marker=mark, label=label)
# Adding title and labels
plt.title(kwargs['title'])
plt.xlabel(kwargs['xlabel'])
plt.ylabel(kwargs['ylabel'])
# # 将 x 轴的刻度设置为对数
# plt.xscale('log')
# Adding a grid for better readability
plt.grid(True, which="both", ls="--", linewidth=0.5)
# Adding a legend to distinguish the different series
plt.legend()
plt.xticks(rotation=45) # 旋转x轴标签,以便更容易阅读
# plt.show()
if kwargs['path']:
path = kwargs['path']
if not os.path.exists(path): os.makedirs(path)
plt.savefig(os.path.join(path, kwargs['save_png_nm']), dpi=300)
else:
plt.show()
def plot_pic(df, sum_type='M', zone=["龙华", "光明", "宝安", "龙岗", "坪山", "南山", "福田", "罗湖", "盐田"],
application='住宅'):
# 筛选出住宅类型的数据
df = df[df['用途'] == application]
if type(zone) == str:
residential_data = df[df['所属区'] == zone]
title = '%s——%s%s度成交均价折线图' % (zone, application, sum_type)
save_png_nm = f'{zone}——{application}均价走势-{sum_type}度.png'
title_available = f'{zone}——{application}{sum_type}度可售套数与成交均价'
save_png_nm_available = f'{zone}——{application}可售套数与成交均价-{sum_type}度.png'
title_saled = f'{zone}——{application}{sum_type}度成交套数与成交均价'
save_png_nm_saled = f'{zone}——{application}成交套数与成交均价-{sum_type}度.png'
residential_data['成交额'] = residential_data['成交面积'] * residential_data['成交均价']
elif type(zone) == list:
residential_data = df[df['所属区'].isin(zone)]
title = f'全市——{application}{sum_type}度成交均价折线图'
save_png_nm = f'全市——{application}均价走势-{sum_type}度.png'
title_available = f'全市——{application}{sum_type}度可售套数与成交均价'
save_png_nm_available = f'全市——{application}可售套数与成交均价-{sum_type}度.png'
title_saled = f'全市——{application}{sum_type}度成交套数与成交均价'
save_png_nm_saled = f'全市——{application}成交套数与成交均价-{sum_type}度.png'
residential_data['成交额'] = residential_data['成交面积'] * residential_data['成交均价']
grouped_sum_1 = residential_data.groupby(residential_data['日期'])['成交额'].sum().reset_index(name='成交额')
grouped_sum_2 = residential_data.groupby(residential_data['日期'])['成交面积'].sum().reset_index(
name='成交面积')
grouped_sum_3 = residential_data.groupby(residential_data['日期'])['成交套数'].sum().reset_index(
name='成交套数')
grouped_sum_4 = residential_data.groupby(residential_data['日期'])['可售套数'].sum().reset_index(
name='可售套数')
grouped_sum_5 = residential_data.groupby(residential_data['日期'])['可售面积'].sum().reset_index(
name='可售面积')
# 合并多个聚合后的DataFrame
residential_data = pd.concat(
[grouped_sum_1, grouped_sum_2['成交面积'], grouped_sum_3['成交套数'], grouped_sum_4['可售套数'],
grouped_sum_5['可售面积']], axis=1)
residential_data["日期"] = residential_data["日期"].astype(str)
# print(residential_data.head())
# print(residential_data.dtypes)
# 将日期列转换为pandas的日期时间格式
residential_data['日期'] = pd.to_datetime(residential_data['日期'])
# 按月份对数据进行分组,并计算每个月份的成交均价平均值
monthly_amount = residential_data.groupby(residential_data['日期'].dt.to_period(sum_type))['成交额'].sum()
monthly_area = residential_data.groupby(residential_data['日期'].dt.to_period(sum_type))['成交面积'].sum()
monthly_avg_prices = monthly_amount / monthly_area # 成交均价
# 按月份对数据进行分组,并计算每个月份的可售套数,可售面积,成交套数
available_units = residential_data.groupby(residential_data['日期'].dt.to_period(sum_type))['可售套数'].mean()
available_area = residential_data.groupby(residential_data['日期'].dt.to_period(sum_type))['可售面积'].mean()
saled_units = residential_data.groupby(residential_data['日期'].dt.to_period(sum_type))['成交套数'].sum()
if sum_type == "Q":
x = '%Y-Q%q'
elif sum_type == "M":
x = '%Y-%m'
elif sum_type == "Y":
x = '%Y'
plot_double_y(available_units.index.to_timestamp().strftime(x), available_units.values, monthly_avg_prices.values,
title=title_available, save_png_nm=save_png_nm_available)
plot_double_y(available_units.index.to_timestamp().strftime(x), saled_units.values, monthly_avg_prices.values,
title=title_saled, save_png_nm=save_png_nm_saled)
# exit(0)
# 绘制均价折线图
plt.figure(figsize=(14, 6))
plt.plot(monthly_avg_prices.index.to_timestamp().strftime(x), monthly_avg_prices.values, marker='o')
plt.title(title)
plt.xlabel('日期')
plt.ylabel('成交均价 (元/平方米)')
plt.grid(True)
plt.xticks(rotation=45) # 旋转x轴标签,以便更容易阅读
plt.tight_layout() # 调整布局以适应标签和标题
path = r'D:\data\sz\hourse_%s\avg_price'%date_now
if not os.path.exists(path): os.makedirs(path)
plt.savefig(os.path.join(path, save_png_nm), dpi=300)
def process_data(df, application, sum_type='Y'):
df = df[df['用途'] == application]
df["日期"] = df["日期"].astype(str)
df['日期'] = pd.to_datetime(df['日期'])
df['日期'] = df['日期'].dt.to_period(sum_type)
df['成交额'] = df['成交面积'] * df['成交均价']
sum_by_groups1 = df.groupby(['所属区', '日期'])['成交面积'].sum().reset_index()
sum_by_groups2 = df.groupby(['所属区', '日期'])['可售面积'].mean().reset_index()
sum_by_groups3 = df.groupby(['所属区', '日期'])['成交额'].sum().reset_index()
merged_1_3 = pd.merge(sum_by_groups1, sum_by_groups3, on=['所属区', '日期'], how='inner')
merged_1_3['成交均价'] = merged_1_3['成交额'] / merged_1_3['成交面积']
df_merge = pd.merge(merged_1_3, sum_by_groups2, on=['所属区', '日期'], how='inner')
if sum_type == "Q":
# t='%Y-Q%q'
t = '%Y-%m-%d'
elif sum_type == "M":
t = '%Y-%m'
elif sum_type == "Y":
t = '%Y'
y_dic_ava_area = {}
y_dic_saled_area = {}
y_dic_avg_price = {}
# 找到最长的x轴
max_len = 0
for zone in ["龙华", "光明", "宝安", "龙岗", "坪山", "南山", "福田", "罗湖", "盐田", "深汕", "大鹏"]:
df_tmp = df_merge[df_merge['所属区'] == zone]
tmp_x = list(df_tmp['日期'])
if len(tmp_x) > max_len:
max_len = len(tmp_x)
x = tmp_x
x = [i.to_timestamp().strftime(t) for i in x]
for zone in ["龙华", "光明", "宝安", "龙岗", "坪山", "南山", "福田", "罗湖", "盐田", "深汕", "大鹏"]:
df_tmp = df_merge[df_merge['所属区'] == zone]
x_tmp = list(df_tmp['日期'])
x_tmp = [i.to_timestamp().strftime(t) for i in x_tmp]
y_ava_tmp, y_saled_tmp, y_price_tmp = list(df_tmp['可售面积']), list(df_tmp['成交面积']), list(
df_tmp['成交均价'])
indexx = list(range(len(x)))
exit_idx = [x.index(x_t) for x_t in x_tmp]
noexit_idx = [x for x in indexx if x not in exit_idx]
for idx in noexit_idx:
y_ava_tmp.insert(idx, None)
y_saled_tmp.insert(idx, None)
y_price_tmp.insert(idx, None)
y_dic_ava_area[zone] = y_ava_tmp
y_dic_saled_area[zone] = y_saled_tmp
y_dic_avg_price[zone] = y_price_tmp
for prefix, y_dic in zip(['成交', '可售', '均价'], [y_dic_saled_area, y_dic_ava_area, y_dic_avg_price]):
path = r'D:\data\sz\hourse_%s\汇总\%s\%s' % (date_now,app, prefix)
ylabel = f'{prefix}面积' if prefix != '均价' else f'{prefix}/平方米'
arg_dic = {'path': path, 'title': f'{app}{prefix}面积区域分布-{sum_type}度', 'xlabel': '日期', 'ylabel': ylabel,
'save_png_nm': f'{app}{prefix}面积-{sum_type}度.png'}
plot_mul_data(x, y_dic, arg_dic)
if __name__ == '__main__':
'''
流程:
1、下载最新数据(https://opendata.sz.gov.cn/data/search/toSearchPost)
2、把最新数据复制到旧数据上
3、执行shen_zhun_hourse.py脚本
'''
# 假设您的数据存储在名为 'real_estate_data.xlsx' 的Excel文件中
file_name = r'D:\data/一手商品房成交信息.xlsx'
# 可办理公积金贷款的预售房源数据
# 读取Excel文件
df = pd.read_excel(file_name, sheet_name='数据集1')
#1、生成汇总信息 所有区的 住宅 年度可售面积 与 均价
application = ["办公楼", "住宅", "商业", "其他"]
sum_type_lis = ['Q', 'Y']
for app in application:
for sum_type in sum_type_lis:
process_data(df, sum_type=sum_type, application=app)
# 2、均价走势图 2、可售面积/套数走势图 3、成交套数
application = ["办公楼", "住宅", "商业", "其他"]
sum_type_lis = ['M', 'Q', 'Y']
zone_list = ["龙华", "光明", "宝安", "龙岗", "坪山", "南山", "福田", "罗湖", "盐田",
["龙华", "光明", "宝安", "龙岗", "坪山", "南山", "福田", "罗湖", "盐田"]]
for app in application:
for zone in zone_list:
for sum_type in sum_type_lis:
plot_pic(df, sum_type=sum_type, zone=zone, application=app)
深圳hourse数据统计
于 2024-04-22 11:30:38 首次发布