Python爬取2019Cov及基于Flask框架的数据可视化
项目概述
本博客是学习分享
1、基于腾讯提供的API爬取数据,部分涉及百度热搜爬取
2、项目基于Flask框架开发
3、运用百度的Echarts,进行可视化
4、数据存储到MySQL
获取腾讯疫情数据
初步认识腾讯数据的结构
首先了解一下腾讯疫情网站的相关数据结构:
腾讯疫情网站:https://news.qq.com/zt2020/page/feiyan.htm
API数据接口:
1、当前数据接口:https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5
2、历史数据接口:https://view.inews.qq.com/g2/getOnsInfo?name=disease_other
3、外国数据接口:https://view.inews.qq.com/g2/getOnsInfo?name=disease_foreign
本项目只做国内数据,国外数据可视化同样道理
API数据结构化
先看历史数据结构
data_history->
data_history["chinaDayList"] #中国每日数据
->confirm 确诊人数
->suspect 疑似人数
->dead 死亡人数
->nowConfirm 现有确诊
->nowSevere 现有重症
->importedCase 境外输入
->deadRate 死亡率
->healRate 治愈率
->date 日期
get_historyAdd["chinaDayAddList"]#每日新增数据
->confirm 新增确诊人数
->suspect新增疑似人数
->dead新增死亡人数
->heal新增治愈人数
->importedCase新增境外输入人数
->deadRate 新增死亡率
->healRate 新增治愈率
->date 日期
当前数据结构
data_all["lastUpdateTime"] #数据时间
data_all["chinaTotal"] #中国总数据 ,字典
data_all["chinaAdd"] #对比上日, , 字典
data_all["areaTree"][0]["total"] #全国现有的数据 ,字典
data_all["areaTree"][0]["children"] #省级
由于太多,不一一二叉展开了
获取数据
基于工具库
import requests
import json
代码
初始化类
class tencent_data():
def __init__(self):
url ='https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'#国内数据
url2 = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'#国内历史数据
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
}
r =requests.get(url,headers)
res = json.loads(r.text) #json字符串转换字典
data_all = json.loads(res['data'])
his = requests.get(url2,headers)
res_history = json.loads(his.text)
data_history = json.loads(res_history['data']) #历史数据
self.update_time =data_all["lastUpdateTime"]
self.data_Total = data_all["chinaTotal"] #中国总数据 ,字典
self.data_Add = data_all["chinaAdd"] #对比上日, , 字典
self.data_Now = data_all["areaTree"][0]["total"] #全国现有的数据 ,字典
self.data_province = data_all["areaTree"][0]["children"] #省级
#历史数据
self.chinaDayList=data_history["chinaDayList"] #中国每日数据
self.chinaDayAddList=data_history["chinaDayAddList"]#中国每日新增数据
历史数据:返回字典
def get_history(self):
history={
}
for daylist in self.chinaDayList:
ds="2020."+daylist["date"]
tup=time.strptime(ds,"%Y.%m.%d")
ds=time.strftime("%Y-%m-%d",tup)#改变时间格式
confirm=daylist["confirm"]
suspect=daylist["suspect"]
dead=daylist["dead"]
heal=daylist["heal"]
nowConfirm=daylist["nowConfirm"]
nowSevere=daylist["nowSevere"]
importedCase=daylist["importedCase"]
deadRate=daylist["deadRate"]
healRate=daylist["healRate"]
history[ds]={
"confirm":confirm,"suspect":suspect,"dead":dead,"heal":heal,"nowConfirm":nowConfirm,
"nowSevere":nowSevere,"importedCase":importedCase,"deadRate":deadRate,"healRate":healRate}
return history
每日新增历史数据:返回字典
def get_historyAdd(self):
historyAdd={
}
for daylist in self.chinaDayAddList:
ds="2020."+daylist["date"]
tup=time.strptime(ds,"%Y.%m.%d")
ds=time.strftime("%Y-%m-%d",tup)#改变时间格式
confirm=daylist["confirm"]
suspect=daylist["suspect"]
dead=daylist["dead"]
heal=daylist["heal"]
importedCase=daylist["importedCase"]
deadRate=daylist["deadRate"]
healRate=daylist["healRate"]
historyAdd[ds]={
"confirm":confirm,"suspect":suspect,"dead":dead,"heal":heal,
"importedCase":importedCase,"deadRate":deadRate,"healRate":healRate}
return historyAdd
各省及市数据整合(整合没啥用-下面有分开)
def get_details(self):
details = []
for pro_infos in self.data_province:
province = pro_infos["name"] #省名
pro_total = pro_infos["total"] #省级累计总数数据
pro_nowConfirm = pro_total["nowConfirm"]#现有确诊数
pro_confirm = pro_total["confirm"] #累计确诊数
pro_dead = pro_total["dead"] #累计死亡数
pro_deadRate = pro_total["deadRate"] #死亡率
pro_heal = pro_total["heal"] #累计治愈
pro_healRate = pro_total["healRate"] #治愈率
for city_infos in pro_infos["children"]: #市级
city = city_infos["name"] #市名
city_total = city_infos["total"] #市级累计总数数据
city_nowConfirm = city_total["nowConfirm"]#现有确诊数
city_confirm = city_total["confirm"] #累计确诊数
city_dead = city_total["dead"] #累计死亡数
city_deadRate = city_total["deadRate"] #死亡率
city_healRate = city_total["healRate"] #治愈率
details.append([self.update_time,province,pro_nowConfirm,pro_confirm,pro_dead,
pro_deadRate,pro_heal,pro_healRate,city,city_nowConfirm,
city_confirm,city_dead,city_deadRate,city_healRate])
return detail
省和市分开:
def get_cityList(self):
#各市数据
details = []
for pro_infos in self.data_province:
province = pro_infos["name"] #省名
for city_infos in pro_infos["children"]: #市级
city = city_infos["name"] #市名
city_total = city_infos["total"] #市级累计总数数据
city_nowConfirm = city_total["nowConfirm"]#现有确诊数
city_confirm = city_total["confirm"] #累计确诊数
city_dead = city_total["dead"] #累计死亡数
city_deadRate = city_total["deadRate"] #死亡率
city_heal = city_total["heal"] #累计治愈
city_healRate = city_total["healRate"] #治愈率
details.append([self.update_time,province,city,city_nowConfirm,
city_confirm,city_dead,city_deadRate,city_heal,city_healRate])
return details
def get_provinceList(self):
#各省数据
details = []
for pro_infos in self.data_province:
province = pro_infos["name"] #省名
pro_total = pro_infos["total"] #省级累计总数数据
pro_nowConfirm = pro_total["nowConfirm"]#现有确诊数
pro_confirm = pro_total["confirm"] #累计确诊数
pro_dead = pro_total["dead"] #累计死亡数
pro_deadRate = pro_total["deadRate"] #