抓取空气质量指数AQI_PM2.5历史数据,可视化展示:
url=‘http://www.tianqihoubao.com/aqi/hangzhou-201810.html’杭州的空气质量数据,可以在主页抓取全国的pm2.5数据展示
import time,requests,re
import pandas as pd
from lxml import etree
#https://blog.youkuaiyun.com/u013337691/article/details/51894453
url='http://www.tianqihoubao.com/aqi/hangzhou.html'
headers = {'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36", }
response = requests.get(url, headers=headers)
html = response.text
response = etree.HTML(html)
url_list = response.xpath('//div[@class="box p"]//a/@href')
for url in url_list:
url='http://www.tianqihoubao.com'+url
#print(url)
#url='http://www.tianqihoubao.com/aqi/hangzhou-201810.html'
data = pd.read_html(url, header=0, encoding='gbk')[0]
print(data)
time.sleep(1)
data.to_csv("pm2.5.csv",mode='a', header=False)
数据分析:
import pandas as pd
from matplotlib import pyplot as plt
file_path = "pm2.5.csv"
#杭州市2013.10-2018.11的PM2.5随时间的变化情况:
df = pd.read_csv(file_path,names=['data_time','quality','AQI','ranking','PM2.5','Pm10','So2','No2','Co','O3'])
#print(df)
df["timeStamp"] = pd.to_datetime(df["data_time"]) #把时间字符串转为索引
df.set_index('timeStamp',inplace=True) #
#print(df.head())
#进行降采样:
df=df.resample('M').mean()
#绘图:
plt.figure(figsize=(20, 8), dpi=80)
data=df['PM2.5']
# print(data.index)
# print(data.values)
_x=data.index
_y=data.values
_x = [i.strftime("%Y%m%d") for i in _x]
plt.plot(range(len(_x)), _y)
plt.xticks(range(len(_x)), _x, rotation=45)
plt.show()