批量下载civitai上图片

最新推荐文章于 2025-06-17 01:00:00 发布

原创最新推荐文章于 2025-06-17 01:00:00 发布 · 633 阅读

CC 4.0 BY-SA版权

文章标签：

#python #SD #comfyui #stable diffusion #爬虫

实在是喜爱civitai上面某些模型下的图片啊，让人想要学习（流口水），好多模型虽然收藏了，但是过段时间可能会没了，导致美图也全都没了，让人心痛，所以想把某个模型下的所有图片都下载下来，以观后用（自己参照提示词慢慢玩）！找了几个chrome插件，号称都能自动下载页面中的所有图片，包括链接到详情页中的图片，但是实际使用下来总是有遗漏，如果你有好用的chrome插件欢迎评论区赐教。（我测试了两个“Fitkun图片批量下载”，“批量图片下载器-Imageye”）主要分三步：

1. 通过"https://civitai.com/api/v1/models/{model_id}"获取模型所有版本信息

2. 通过““https://civitai.com/api/trpc/image.getImagesAsPostsInfinite””获取每个版本下所有图片信息

3. 通过"https://civitai.com/images/{img_id}"下载图片

import requests
import os
import re
import time
import json
from urllib.parse import urlparse

def main():

    model_id = "1076014"  # 可以修改为其他模型ID
    api_base = "https://civitai.com/api/v1"
    trpc_api = "https://civitai.com/api/trpc/image.getImagesAsPostsInfinite"
    
    # 设置请求头
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': '*/*',
        'Referer': f'https://civitai.com/models/{model_id}',
        'Origin': 'https://civitai.com'
    }
    
    try:
        # 1. 获取模型信息和所有版本ID
        print(f"正在获取模型 {model_id} 的信息...")
        model_url = f"{api_base}/models/{model_id}"
        model_response = requests.get(model_url, headers={})
        model_response.raise_for_status()
        model_data = model_response.json()
        
        model_name = sanitize_filename(model_data.get('name', f"model_{model_id}"))
        output_dir = f"{model_id}_{model_name}"
        print(f"模型名称: {model_name}")
        
        # 创建主目录
        os.makedirs(output_dir, exist_ok=True)
        
        # 提取所有版本ID
        version_ids = []
        if 'modelVersions' in model_data and len(model_data['modelVersions']) > 0:
            for version in model_data['modelVersions']:
                version_id = version.get('id')
                if version_id:
                    version_name = version.get('name', str(version_id))
                    version_ids.append((version_id, version_name))
                    print(f"找到版本: {version_name} (ID: {version_id})")
        
        if not version_ids:
            print("未找到有效的模型版本，退出")
            return
        
        # 2. 为每个版本获取图片并按版本分类下载
        total_downloaded = 0
        
        for version_id, version_name in version_ids:
            print(f"\n开始处理版本: {version_name} (ID: {version_id})")
            
            # 创建版本目录
            version_dir = os.path.join(output_dir, f"{version_id}_{sanitize_filename(version_name)}")
            os.makedirs(version_dir, exist_ok=True)
            
            # 初始请求参数 - 确保modelVersionId和modelId是整数类型
            request_params = {
                "json": {
                    "period": "AllTime",
                    "sort": "Newest",
                    "modelVersionId": int(version_id),  # 转换为整数
                    "modelId": int(model_id),           # 转换为整数
                    "hidden": False,
                    "limit": 50,
                    "browsingLevel": 31,
                    "cursor": None,
                    "authed": True
                },
                "meta": {
                    "values": {
                        "cursor": ["undefined"]
                    }
                }
            }
            
            version_image_count = 0
            
            print(f"开始获取版本 {version_id} 的图片...")
            
                
            # 准备GET请求的参数
            input_json = json.dumps(request_params)
            encoded_input = requests.utils.quote(input_json)
            get_url = f"{trpc_api}?input={encoded_input}"
            
            try:
                # 使用GET请求
                response = requests.get(get_url, headers=headers, timeout=15)
                response.raise_for_status()
                
                # 解析响应
                data = response.json()
                if "result" not in data:
                    print("响应中未找到结果数据，跳过此版本")
                    continue
                
                result = data["result"]
                if "data" not in result or "json" not in result["data"]:
                    print("结果数据格式不正确，跳过此版本")
                    continue
                
                # 更新图片提取路径：result.data.json.items下的images数组
                json_data = result["data"]["json"]
                if "items" not in json_data or not isinstance(json_data["items"], list):
                    print("未找到items数组，跳过此版本")
                    continue
                
                # 提取所有items中的images
                images = []
                for item in json_data["items"]:
                    if "images" in item and isinstance(item["images"], list):
                        images.extend(item["images"])
                
                if not images:
                    print(f"版本 {version_id} 没有更多图片")
                    continue
                
                # 提取并下载图片
                for img in images:
                    img_id = img.get("id")
                    if not img_id:
                        continue
                    
                    # 从图片ID构造详情页URL
                    img_detail_url = f"https://civitai.com/images/{img_id}"
                    
                    print(f"正在处理图片: {img_id}-----------{version_image_count+1}/{len(images)}")
                    
                    try:
                        # 获取图片详情页内容
                        img_page_response = requests.get(img_detail_url, headers=headers, timeout=15)
                        img_page_response.raise_for_status()
                        
                        # 提取__NEXT_DATA__中的JSON数据
                        next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', img_page_response.text)
                        if not next_data_match:
                            print(f"未找到__NEXT_DATA__脚本，使用默认方式下载")
                            continue
                        
                        try:
                            next_data = json.loads(next_data_match.group(1))
                            
                            # 提取图片名称和真实URL
                            queries = next_data.get("props", {}).get("pageProps", {}).get("trpcState", {}).get("json", {}).get("queries", [])
                            if not queries:
                                print(f"未找到queries数据，使用默认方式下载")
                                continue
                            
                            image_info = queries[0]["state"]["data"]
                            # print(image_info)
                            image_name = image_info.get("name")
                            image_base_url = image_info.get("url")
                            
                            if not image_name or not image_base_url:
                                print(f"无法获取图片名称或URL，使用默认方式下载")
                                continue
                            
                            # 修复：构造正确的图片下载URL
                            image_download_url = f"https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{image_base_url}/original=true/{image_name}"
                            
                            # 确保文件名不包含非法字符
                            safe_filename = sanitize_filename(image_name)
                            
                            # 检查文件是否已存在
                            file_path = os.path.join(version_dir, safe_filename)
                            if os.path.exists(file_path):
                                print(f"已存在: {safe_filename}")
                                version_image_count += 1
                                continue
                            
                            # 下载图片
                            print(f"正在下载: {safe_filename} 来自 {image_download_url}")
                            
                            # 下载真实图片
                            img_response = requests.get(image_download_url, headers=headers, stream=True, timeout=20)
                            img_response.raise_for_status()
                            
                            # 验证是否真的是图片
                            content_type = img_response.headers.get('Content-Type', '')
                            if not content_type.startswith('image/'):
                                version_image_count += 1
                                print(f"跳过非图片内容: {image_download_url}")
                                continue
                            
                            # 保存图片
                            with open(file_path, 'wb') as f:
                                for chunk in img_response.iter_content(chunk_size=8192):
                                    f.write(chunk)
                            
                            print(f"已下载: {safe_filename}")
                            version_image_count += 1
                            total_downloaded += 1
                            
                            # 避免请求过于频繁
                            time.sleep(0.8)
                        
                        except json.JSONDecodeError:
                            print(f"解析__NEXT_DATA__失败，使用默认方式下载")
                            continue
                        
                    except Exception as e:
                        print(f"处理图片 {img_id} 失败: {e}")
                
                
                print(f"版本 {version_id} 的图片已全部获取，共 {version_image_count} 张")
                time.sleep(1.5)  # 控制请求频率
                
            except requests.exceptions.RequestException as e:
                print(f"获取图片时出错: {e}")
                continue
        
        print(f"\n下载完成，共下载 {total_downloaded} 张图片")
        print(f"图片保存在: {os.path.abspath(output_dir)}")
        
    except Exception as e:
        print(f"发生错误: {e}")

def sanitize_filename(filename):
    """清理文件名，移除非法字符"""
    return re.sub(r'[\\/*?:"<>|]', '_', filename)

if __name__ == "__main__":
    main()