实在是喜爱civitai上面某些模型下的图片啊,让人想要学习(流口水),好多模型虽然收藏了,但是过段时间可能会没了,导致美图也全都没了,让人心痛,所以想把某个模型下的所有图片都下载下来,以观后用(自己参照提示词慢慢玩)!找了几个chrome插件,号称都能自动下载页面中的所有图片,包括链接到详情页中的图片,但是实际使用下来总是有遗漏,如果你有好用的chrome插件欢迎评论区赐教。(我测试了两个“Fitkun图片批量下载”,“批量图片下载器-Imageye”)主要分三步:
1. 通过"https://civitai.com/api/v1/models/{model_id}"获取模型所有版本信息
2. 通过““https://civitai.com/api/trpc/image.getImagesAsPostsInfinite””获取每个版本下所有图片信息
3. 通过"https://civitai.com/images/{img_id}"下载图片
import requests
import os
import re
import time
import json
from urllib.parse import urlparse
def main():
model_id = "1076014" # 可以修改为其他模型ID
api_base = "https://civitai.com/api/v1"
trpc_api = "https://civitai.com/api/trpc/image.getImagesAsPostsInfinite"
# 设置请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': '*/*',
'Referer': f'https://civitai.com/models/{model_id}',
'Origin': 'https://civitai.com'
}
try:
# 1. 获取模型信息和所有版本ID
print(f"正在获取模型 {model_id} 的信息...")
model_url = f"{api_base}/models/{model_id}"
model_response = requests.get(model_url, headers={})
model_response.raise_for_status()
model_data = model_response.json()
model_name = sanitize_filename(model_data.get('name', f"model_{model_id}"))
output_dir = f"{model_id}_{model_name}"
print(f"模型名称: {model_name}")
# 创建主目录
os.makedirs(output_dir, exist_ok=True)
# 提取所有版本ID
version_ids = []
if 'modelVersions' in model_data and len(model_data['modelVersions']) > 0:
for version in model_data['modelVersions']:
version_id = version.get('id')
if version_id:
version_name = version.get('name', str(version_id))
version_ids.append((version_id, version_name))
print(f"找到版本: {version_name} (ID: {version_id})")
if not version_ids:
print("未找到有效的模型版本,退出")
return
# 2. 为每个版本获取图片并按版本分类下载
total_downloaded = 0
for version_id, version_name in version_ids:
print(f"\n开始处理版本: {version_name} (ID: {version_id})")
# 创建版本目录
version_dir = os.path.join(output_dir, f"{version_id}_{sanitize_filename(version_name)}")
os.makedirs(version_dir, exist_ok=True)
# 初始请求参数 - 确保modelVersionId和modelId是整数类型
request_params = {
"json": {
"period": "AllTime",
"sort": "Newest",
"modelVersionId": int(version_id), # 转换为整数
"modelId": int(model_id), # 转换为整数
"hidden": False,
"limit": 50,
"browsingLevel": 31,
"cursor": None,
"authed": True
},
"meta": {
"values": {
"cursor": ["undefined"]
}
}
}
version_image_count = 0
print(f"开始获取版本 {version_id} 的图片...")
# 准备GET请求的参数
input_json = json.dumps(request_params)
encoded_input = requests.utils.quote(input_json)
get_url = f"{trpc_api}?input={encoded_input}"
try:
# 使用GET请求
response = requests.get(get_url, headers=headers, timeout=15)
response.raise_for_status()
# 解析响应
data = response.json()
if "result" not in data:
print("响应中未找到结果数据,跳过此版本")
continue
result = data["result"]
if "data" not in result or "json" not in result["data"]:
print("结果数据格式不正确,跳过此版本")
continue
# 更新图片提取路径:result.data.json.items下的images数组
json_data = result["data"]["json"]
if "items" not in json_data or not isinstance(json_data["items"], list):
print("未找到items数组,跳过此版本")
continue
# 提取所有items中的images
images = []
for item in json_data["items"]:
if "images" in item and isinstance(item["images"], list):
images.extend(item["images"])
if not images:
print(f"版本 {version_id} 没有更多图片")
continue
# 提取并下载图片
for img in images:
img_id = img.get("id")
if not img_id:
continue
# 从图片ID构造详情页URL
img_detail_url = f"https://civitai.com/images/{img_id}"
print(f"正在处理图片: {img_id}-----------{version_image_count+1}/{len(images)}")
try:
# 获取图片详情页内容
img_page_response = requests.get(img_detail_url, headers=headers, timeout=15)
img_page_response.raise_for_status()
# 提取__NEXT_DATA__中的JSON数据
next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', img_page_response.text)
if not next_data_match:
print(f"未找到__NEXT_DATA__脚本,使用默认方式下载")
continue
try:
next_data = json.loads(next_data_match.group(1))
# 提取图片名称和真实URL
queries = next_data.get("props", {}).get("pageProps", {}).get("trpcState", {}).get("json", {}).get("queries", [])
if not queries:
print(f"未找到queries数据,使用默认方式下载")
continue
image_info = queries[0]["state"]["data"]
# print(image_info)
image_name = image_info.get("name")
image_base_url = image_info.get("url")
if not image_name or not image_base_url:
print(f"无法获取图片名称或URL,使用默认方式下载")
continue
# 修复:构造正确的图片下载URL
image_download_url = f"https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{image_base_url}/original=true/{image_name}"
# 确保文件名不包含非法字符
safe_filename = sanitize_filename(image_name)
# 检查文件是否已存在
file_path = os.path.join(version_dir, safe_filename)
if os.path.exists(file_path):
print(f"已存在: {safe_filename}")
version_image_count += 1
continue
# 下载图片
print(f"正在下载: {safe_filename} 来自 {image_download_url}")
# 下载真实图片
img_response = requests.get(image_download_url, headers=headers, stream=True, timeout=20)
img_response.raise_for_status()
# 验证是否真的是图片
content_type = img_response.headers.get('Content-Type', '')
if not content_type.startswith('image/'):
version_image_count += 1
print(f"跳过非图片内容: {image_download_url}")
continue
# 保存图片
with open(file_path, 'wb') as f:
for chunk in img_response.iter_content(chunk_size=8192):
f.write(chunk)
print(f"已下载: {safe_filename}")
version_image_count += 1
total_downloaded += 1
# 避免请求过于频繁
time.sleep(0.8)
except json.JSONDecodeError:
print(f"解析__NEXT_DATA__失败,使用默认方式下载")
continue
except Exception as e:
print(f"处理图片 {img_id} 失败: {e}")
print(f"版本 {version_id} 的图片已全部获取,共 {version_image_count} 张")
time.sleep(1.5) # 控制请求频率
except requests.exceptions.RequestException as e:
print(f"获取图片时出错: {e}")
continue
print(f"\n下载完成,共下载 {total_downloaded} 张图片")
print(f"图片保存在: {os.path.abspath(output_dir)}")
except Exception as e:
print(f"发生错误: {e}")
def sanitize_filename(filename):
"""清理文件名,移除非法字符"""
return re.sub(r'[\\/*?:"<>|]', '_', filename)
if __name__ == "__main__":
main()
226

被折叠的 条评论
为什么被折叠?



