#-*- coding : utf-8-*-
import numpy as np
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
#
lj_data=pd.read_csv("lj_gb.csv",encoding="gb18030")
print(lj_data.head())
print(lj_data.columns)
# Index(['line', 'station', 'property_name', 'bedrooms', 'livingrooms',
# 'building_area', 'direction', 'decoration', 'has_elevator', 'hml',
# 'building_height', 'building_year', 'building_style',
# 'building_location', 'price_sqm', 'price_ttl'],
# dtype='object')
# 第一问:
cout_hml=Counter(lj_data["hml"].values)
print(cout_hml)
plt.rcParams["font.sans-serif"] = ['Simhei']
plt.rcParams["axes.unicode_minus"] = False
# 柱状图
Y=[int(i) for i in list(cout_hml.values())][:-1]
X=list(cout_hml.keys())[:-1]
print(X)
print(Y)
plt.bar(X,Y,0.6,color="green")
plt.xticks(rotation=90, fontsize=14)
plt.xlabel("hml",fontsize=14)
plt.ylabel("sum",fontsize=14)
plt.title("hml统计")
plt.show()
cout_bedrooms=Counter(lj_data["bedrooms"].values)
Y=[int(i) for i in list(cout_bedrooms.values())][:-1]
X=list(cout_bedrooms.keys())[:-1]
print(X)
print(Y)
plt.bar(X,Y,0.6,color="green")
plt.xticks(rotation=90, fontsize=14)
plt.xlabel("bedrooms",fontsize=14)
plt.ylabel("sum",fontsize=14)
plt.title("bedrooms统计")
plt.show()
# 多变量分析:热力图
data=lj_data[["bedrooms","livingrooms","has_elevator","building_height","price_ttl"]]
ax=plt.subplots(figsize=(20,16))
ax=sns.heatmap(data.corr(),vmax=.8,square=True,annot=True)
plt.show()
# 我们对 "bedrooms","livingrooms"进行二维相关性探索
plt.figure(figsize=(16,8))
plt.title("bedrooms和livingrooms相关性图")
plt.xlabel('bedrooms',fontsize=8)
plt.ylabel('livingrooms',fontsize=8)
plt.scatter(data["bedrooms"].values,data["livingrooms"].values)
plt.show()
# 我们得出了在房地产中bedrooms和 livingrooms 是正相关的关系 而且他们是有一定关联性的
数据代码+报告:
https://download.youkuaiyun.com/download/qq_38735017/87351621