import requests
import re
import json
import csv
import pandas as pd
from pyecharts.charts import Map
from pyecharts import options as opts
二、数据采集-爬虫
网址
url = 'https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner&city=%E7%BE%8E%E5%9B%BD-%E7%BE%8E%E5%9B%BD'
防盗链
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.5261 SLBChan/25'}
with open('人数.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['地点', '确诊人数', 'confirmed', 'curConfirmRelative', 'crued', 'died'])
rest = requests.get(url=url, headers=headers)
data_html = rest.text
正则表达式
json_str = re.findall('"component":\[(.*)\],', data_html)[0]
# json解析数据
json_dict = json.loads(json_str)
caseList = json_dict['caseList']
# 循环遍历提取数据
for case in caseList:
area = case['area'] #省份
curConfirm = case['curConfirm'] #总确诊人数
confirmed = case['confirmed'] #确诊人数
curConfirmRelative = case['curConfirmRelative'] #确诊人数
crued = case['crued'] #治愈人数
died = case['died'] #死亡人数
with open('人数.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([area, curConfirm, confirmed, curConfirmRelative, crued, died])
三、数据可视化
df = pd.read_csv('人数.csv')
china_map = (
Map()
.add('现确诊', [list(i) for i in zip(df['地点'].values.tolist(), df['确诊人数'].values.tolist())], 'china')
.set_global_opts(
title_opts=opts.TitleOpts(title='各地区确诊人数'),
visualmap_opts=opts.VisualMapOpts(max_=200, is_inverse=True)
)
)
china_map.render_notebook()
四、可视化效果显示