爬取上海临沂两地天气和污染指数
import urllib.request
from pyquery import PyQuery as pq
import re
city= ['上海','临沂']
city_num = ['101020100','101120901']
def find_weather(country):
url = 'http://www.weather.com.cn/weather/' + country + '.shtml'
html = urllib.request.urlopen(url).read().decode('utf-8')
parse = re.compile('class="sky skyid.*?<h1>(.*?)</h1>.*?<span>(.*?)</span>.*?<i>(.*?)</i>'
+'.*?<p class="win">.*?<i>(.*?)</i>',re.S)
doc = pq(html)
polute = list(doc('.li6 span').text())
item = re.findall(parse,html)
print(polute)
for each in item:
yield{
'日期': each[0],
'温度': each[1]+'/' + each[2],
'风级': each[3],
}
def main():
for i in range(2):
weater =[]
print(city[i]+'\n')
for each in find_weather(city_num[i]):
weater.append(each)
for i in range(7):
print(weater[i])
print('\n')
if __name__ == '__main__':
main()
本篇博客介绍了一个Python脚本,该脚本使用urllib和pyquery库从中国天气网爬取上海和临沂两地的天气预报及空气污染指数。通过解析网页源代码,脚本能够获取包括日期、温度、风级在内的气象信息,并打印出前7天的数据。
1269

被折叠的 条评论
为什么被折叠?



