python爬取2011-2019河南各个地区的天气_爬取河南省各城市的天气数据-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_43541919/article/details/106882688

天气网址： http://www.tianqihoubao.com/lishi/zhengzhou/month/201101.html

1）进入网页，单击右键选择查看源代码

2）分析源代码

3）用pycharm安装相应的软件包编写代码

from bs4 import BeautifulSoup
import requests
import warnings
from pypinyin import lazy_pinyin
warnings.filterwarnings("ignore")
def get_temperature(url, citys):
    ulist = []
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,  like Gecko) Chrome/63.0.3239.132 Safari/537.36'}  
    response = requests.get(url, headers=headers).content 
    soup = BeautifulSoup(response, 'html.parser')
    information = soup.select('div[class="wdetail"]')
    for info in information:
        tr_list= info.select('tr')[1:]
        for index, tr in enumerate(tr_list): 
            td_list = tr.find_all('td')
            date = td_list[0].text.strip().replace("\n", "")  
            weather = td_list[1].text.strip().replace("\n", "").split("/")[0].strip()
            temperature = td_list[2].text.strip().replace("\n", "").split("/")[0].strip()
            temperature1 = td_list[2].text.strip().replace("\n", "").split("/")[1].strip()
            wind = td_list[3].text.strip().replace("\n", "").split("/")[0].strip()
            ulist.append([citys, date, weather, temperature, temperature1, wind])
    print(len(ulist))
    print(ulist)
    f = open("zhengzhou.txt", "a+")
    for i in range(len(ulist)):
        s = str(ulist[i]).replace('[', '').replace(']', '') 
        s = s.replace("'", '').replace(',', '') + '\n' 
        f.write(s)
    f.close()
if __name__ == '__main__':
    n = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
    y = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
    # citys1=["郑州市", "开封市", "洛阳市", "平顶山市", "安阳市", "鹤壁市", "新乡市", "焦作市", "濮阳市", "许昌 市", "漯河市", "三门峡市", "南阳市", "商丘市", "周口市",
    #           "驻马店市", "信阳市"]
    m="郑州市"
    city1 = ''.join(lazy_pinyin(m[:-1]))
    print(city1)
    urls = []
    for n1 in n:
        for y1 in y:
            ur = 'http://www.tianqihoubao.com/lishi/'+ city1 +'/month/'+n1+y1+'.html'
            urls.append(ur)
    print(urls)
    for url in urls:
        get_temperature(url, m)

结果图：（郑州为例）