这是一种通用的爬虫方法。
import os
import requests
from bs4 import BeautifulSoup
# 函数:下载图片
def download_image(url, save_dir, headers, proxies):
# 发送 GET 请求,包括请求头和代理
response = requests.get(url, headers=headers, proxies=proxies)
# 提取文件名
filename = url.split('/')[-1]
# 拼接保存路径和文件名
filepath = os.path.join(save_dir, filename)
# 保存文件到本地
with open(filepath, 'wb') as f:
f.write(response.content)
# 函数:爬取网页上的所有图片并保存到本地
def crawl_images(url, save_dir, headers, proxies):
# 发送 GET 请求,包括请求头和代理
response = requests.get(url, headers=headers, proxies=proxies)
# 解析 HTML 页面
soup = BeautifulSoup(response.content, 'html.parser')
# 提取页面中的所有图片链接
img_links = [link['src'] for link in soup.find_all('img')]
# 下载图片并保存到本地
for link in img_links:
download_image(link, save_dir, headers, proxies)
if __name__ =&#