首先,附上代码
import json
import requests
import bs4
def get_html(url):
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.104 Safari/537.36',
'ContentType':
'text/html; charset=utf-8',
'Accept-Language':
'gzip, deflate, sdch',
'Accept-Language':
'zh-CN,zh;q=0.8',
'Connection':
'keep-alive',
}
try:
htmlcontent = requests.get(url, headers=headers, timeout=30)
htmlcontent.raise_for_status()
htmlcontent.encoding = 'utf-8'
return htmlcontent.text
except:
return "请求失败!"
def get_content(url):
weather_list = []
html = get_html(url)
soup = bs4.BeautifulSoup(html, 'lxml')
content_ul = soup.find('div', class_='t').find('ul', class_='clearfix').find_all('li')
for content in content_ul:
try:
weather = {}
weather['day'] = content.find('h1').text
weather['temperature'] = content.find('p', class_='tem').span.text + content.find('p', class_='tem').em.text
weather_list.append(weather)
except:
print('查询不到')
print(weather_list)
if __name__ == '__main__':
url = 'http://www.weather.com.cn/weather1d/101190401.shtml'
get_content(url)
在运行的过程中出现如下问题:
主要是没有安装lxml包,主需要在电脑终端输入pip install lxml让电脑自行安装即可,运行结果如下所示:
[{'day': '16日夜间', 'temperature': '5°C'}, {'day': '17日白天', 'temperature': '12°C'}]