安装依赖库:
pip install requests beautifulsoup4 urllib3
Python源码:
import os
import requests
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
import time
# 目标网站 URL
start_url = 'https://www.example.com/' # 这里替换为目标网站的 URL
# 创建存储图片的文件夹
base_folder = 'downloaded_images'
if not os.path.exists(base_folder):
os.makedirs(base_folder)
# 用于记录已访问的页面
visited_urls = set()
# 获取页面内容
def get_page_content(url):
try:
response = requests.get(url)
response.raise_for_status() # 检查请求是否成功
return response.text
except requests.RequestException as e:
print(f"请求失败: {url}, 错误: {e}")
return None
# 下载图片并保存
def download_image(img_url, folder):
try:
img_data = requests.get(img_url).content
img_name = os.path.basename(img_url)
img_path = os.path.join(folder, img_name)
with open(img_path, 'wb') as f: