1 数据获取
import requests
from lxml import etree
import json
import time
class ChengduClimate:
def __init__(self):
self.headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}
def parse_url(self,url):
print(url)
response = requests.get(url,headers=self.headers)
return response.content
def get_content_list(self,html_str):#提取数据
html = etree.HTML(html_str)
li_list = html.xpath("//ul[@class='lishitable_content clearfix']/li")
content_list =[]
for li in li_list[:-1]:
item = {
}
item["date"] = li.xpath("./div/text()")[0] if len(li.xpath("./div/text()"))>4 else li.xpath("./div/a/text()")[0]
item["max_temperture"] = li.xpath("./div[2]/text()")[0] if len(li.xpath("./div[2]/text()"))>0 else None
item["min_temperture"] = li.xpath("./div[3]/text()")[0] if len(li.xpath("./div[3]/text()"))>0 else None
item["weather"] = li.xpath("./div[4]/text()")[0] if len(li.xpath("./div[4]/text()"))>0 else None
item["wind_direction"] = li.xpath("./div[5]/text()")[0].strip() if len(li.xpath("./div[5]/text()"))>0 else None
content_list.append(item)
print(item)
return content_list
def save_content_list(self,content_list):
with open("chengdu tianqi.txt","a",encoding="utf-8") as f:
for content in content_list:
f.write(json.dumps(content,ensure_ascii=False))
f.write("\n")
print("保存成功")
def run(self):
for i in range