wallhaven网站图片下载
这个网站有很多好看的壁纸图片
import requests
from lxml import etree
from spider import Spider
keyword = "street"
for page in range(1, 196):
print(f"下载第{page}页")
# 721页
url = f'https://wallhaven.cc/search?q={keyword}&categories=110&purity=100&sorting=random&order=desc&ai_art_filter=1&seed=NYJ5pC&page={page}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}
print(url)
resp = requests.get(url, headers=headers)
html = etree.HTML(resp.text)
href_list = html.xpath('//ul//a[@class="preview"]/@href')
reso_list = html.xpath('//ul//div[@class="thumb-info"]/span[1]/text()')
print(href_list, len(href_list))
print(reso_list, len(reso_list))
task_list = []
for href, reso in zip(href_list, reso_list):
print(href, reso)
html = etree.HTML(requests.get(href).text)
download_url = html.xpath('//img[@id="wallpaper"]/@src')[0]
save_name = download_url.split('/')[-1]
save_path = f'./{keyword}/{save_name}'
print('下载地址:', download_url)
task_list.append((download_url, save_path))
Spider(task_list=task_list, thread_num=3).run()