天气网址: http://www.tianqihoubao.com/lishi/zhengzhou/month/201101.html
1)进入网页,单击右键选择查看源代码
2)分析源代码
3)用pycharm安装相应的软件包编写代码
from bs4 import BeautifulSoup
import requests
import warnings
from pypinyin import lazy_pinyin
warnings.filterwarnings("ignore")
def get_temperature(url, citys):
ulist = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
response = requests.get(url, headers=headers).content
soup = BeautifulSoup(response, 'html.parser')
information = soup.select('div[class="wdetail"]')
for info in information:
tr_list= info.select('tr')[1:]
for index, tr in enumerate(tr_list):
td_list = tr.find_all('td')
date = td_list[0].text.strip().replace("\n", "")
weather = td_list[1].text.strip().replace("\n", "").split("/")[0].strip()
temperature = td_list[2].text.strip().replace("\n", "").split("/")[0].strip()
temperature1 = td_list[2].text.strip().replace("\n", "").split("/")[1].strip()
wind = td_list[3].text.strip().replace("\n", "").split("/")[0].strip()
ulist.append([citys, date, weather, temperature, temperature1, wind])
print(len(ulist))
print(ulist)
f = open("zhengzhou.txt", "a+")
for i in range(len(ulist)):
s = str(ulist[i]).replace('[', '').replace(']', '')
s = s.replace("'", '').replace(',', '') + '\n'
f.write(s)
f.close()
if __name__ == '__main__':
n = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']
y = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
# citys1=["郑州市", "开封市", "洛阳市", "平顶山市", "安阳市", "鹤壁市", "新乡市", "焦作市", "濮阳市", "许昌 市", "漯河市", "三门峡市", "南阳市", "商丘市", "周口市",
# "驻马店市", "信阳市"]
m="郑州市"
city1 = ''.join(lazy_pinyin(m[:-1]))
print(city1)
urls = []
for n1 in n:
for y1 in y:
ur = 'http://www.tianqihoubao.com/lishi/'+ city1 +'/month/'+n1+y1+'.html'
urls.append(ur)
print(urls)
for url in urls:
get_temperature(url, m)
结果图:(郑州为例)
参考链接:https://blog.youkuaiyun.com/diboe3923/article/details/101207747