目的:从中国天气网爬取城市天气预报信息,得到7天的天气信息,并存入excel中
技术:requests、xpath的使用,openpyxl写入数据到excel(.xlsx),生成exe : pyinstaller -F Weather_Spider.py
1. 分析网页,通过xpath获取相关数据,并对数据进行简单处理
area=xml.xpath('//div[@class="crumbs fl"]//a/text()') # 地区
concrete_area=xml.xpath('//div[@class="crumbs fl"]//span/text()')
week=xml.xpath('//div[@class="con today clearfix"]//li//h1/text()') # 7天
wea=xml.xpath('//div[@class="con today clearfix"]//li//p[@class="wea"]/text()') # 天气
tem=xml.xpath('//div[@class="con today clearfix"]//li//p[@class="tem"]//i/text()') # 温度
2. 使用openpyxl将数据写入excel中
3. 建立部分城市与代码的字典,提供查询
4. 测试并生成exe
5. 源码如下:
import time
import requests
import os
from lxml import etree
import openpyxl
from openpyxl.styles import Font,colors
import traceback
class Weather_Forecast(object):
def __init__(self,url):
self.__url=url
# 获取网页信息
def get_request(self):
response=requests.get(self.__url)
xml=etree.HTML(response.content)
area=xml.xpath('//div[@class="crumbs fl"]//a/text()') # 地区
concrete_area=xml.xpath('//div[@class="crumbs fl"]//span/text()')
self.week=xml.xpath('//div[@class="con today clearfix"]//li//h1/text()') # 7天
self.wea=xml.xpath('//div[@class="con today clearfix"]//li//p[@class="wea"]/text()') # 天气
self.tem=xml.xpath('//div[@class="con today clearfix"]//li//p[@class="tem"]//i/text()') # 温度
# print(self.week)
# print(self.wea)
# print(self.tem)
for concrete_ar in concrete_area:
if concrete_ar != '>':
# print(concrete_ar)
concrete_area_str=concrete_ar
self.str=area[0]+">"+area[1]+">"+area[2]+">"+concrete_area_str # 处理提取到的地区字符
# print(self.str)
# 写excel
def write_excel(self):
try:
if os.path.exists("天气.xlsx"):
wb = openpyxl.load_workbook("天气.xlsx") # 打开已有文件
# 创建一张新表
# ws=wb.create_sheet()
ws = wb["Sheet"] # 通过名字选择表
else:
wb = openpyxl.Workbook() # 新建Excel 实例化
ws = wb.active # 激活 worksheet
# 设置字体为“宋体”,大小为11,bold为加粗,italic为斜体,strike为删除线,颜色为红色
font = Font(u'宋体', size=21, bold=True, italic=True, strike=False, color=colors.RED)
ws["A1"].font=font
ws.column_dimensions['A'].width = 25 # 列宽
ws.column_dimensions['B'].width = 25
ws.column_dimensions['C'].width = 20
ws.column_dimensions['D'].width = 20
columnsHeader = ["地区", "当前时间", "星期", "天气", "温度"]
for i in range(5):
ws.cell(row=1, column=i + 1, value=columnsHeader[i]) # 写入列标题
ws.cell(row=2, column=1, value=self.str)
datetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())); # 获取当前时间,并格式化
ws.cell(row=2, column=2, value=datetime)
for i in range(7):
ws.cell(row=i+2, column=3, value=self.week[i].encode("utf-8")) # 写入单元格内容
for i in range(7):
ws.cell(row=i+2, column=4, value=self.wea[i].encode("utf-8"))
for i in range(7):
ws.cell(row=i+2, column=5, value=self.tem[i].encode("utf-8"))
ws.merge_cells('F1:G1') # 合并单元格
wb.save("天气.xlsx") # 保存表格
print("已成功获取天气信息,并写入excel中!")
except Exception:
print(traceback.print_exc())
# 获取相应城市的url
def get_url():
city = {
"苏州": "101190401",
"西安": "101110101",
"北京": "101010100",
"上海": "101020100",
"南京": "101190101",
"厦门": "101230201",
"深圳": "101280601",
"珠海": "101280701",
"佛山": "101280800"
}
for k in city:
print(k + " ", end="")
print(" ")
city_name = input("Please enter the city name for the query :")
try:
city_num = city[city_name]
weather_url = "http://www.weather.com.cn/weather/%s.shtml" % city_num
except Exception: # 输入错误则默认为苏州
print(traceback.print_exc())
time.sleep(0.01) # 加延迟 将异常信息输出完全再继续执行
print("输入错误则默认为苏州!")
weather_url = "http://www.weather.com.cn/weather/101190401.shtml"
return weather_url
if __name__=="__main__":
GetWeather=Weather_Forecast(get_url())
GetWeather.get_request()
GetWeather.write_excel()
input("Press <enter>")
6. 运行结果: