爬取天气网的数据很简单,使用到的模块也很少。
from bs4 import BeautifulSoup
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'}
# 101030100是天津的城市代码
url = 'http://www.weather.com.cn/weather/101030100.shtml'
response = requests.get(url=url, headers=headers)
# 一定要转为utf-8,不然获取的内容是乱码
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')
# 当天的日期
today = soup.find('h1').string.replace('(今天)', '')
# 天气情况
weather = soup.find('p', class_='wea').string
# 温度
temperature = soup.find('p', class_='tem').find('i').string
# 风向
win = soup.find('p', class_='win').find('span').get('title')
# 风力
win_level = soup.find('p', class_='win').find('i').string
print("今天是%s号,天气是%s,气温%s,风向是%s,风力为%s。" % (today, weather, temperature, win, win_level))