diffusers库训练diffusion model

训练无条件图像生成-Unconditional image generation模型,准备自己的数据集

我使用的是台大李宏毅老师2022年课程GAN作业的动漫头像数据集huggingface下载链接

下载后解压即可

训练步骤参考:diffusers官方教程

加载数据集:

from dataclasses import dataclass
from datasets import load_dataset
import matplotlib.pyplot as plt
from torchvision import transforms
import torch
from diffusers import UNet2DModel, DDPMScheduler, DDPMPipeline
from diffusers.optimization import get_cosine_schedule_with_warmup
from diffusers.utils import make_image_grid
from PIL import Image
import torch.nn.functional as F
import os
from accelerate import Accelerator, notebook_launcher
from tqdm.auto import tqdm
from pathlib import Path
import glob


# 设置训练配置
@dataclass
class TrainingConfig:
    image_size = 64  # the generated image resolution, 数据集是96的,但是我显卡太差,96要跑到天荒地老,所以设的小
    train_batch_size = 32
    eval_batch_size = 16  # how many images to sample during evaluation
    num_epochs = 100
    gradient_accumulation_steps = 1
    learning_rate = 1e-4
    lr_warmup_steps = 500
    save_image_epochs = 10
    save_model_epochs = 30
    mixed_precision = "fp16"  # `no` for float32, `fp16` for automatic mixed precision
    output_dir = "ddpm-anime-faces-64"  # the model name locally and on the HF Hub

    push_to_hub = False  # whether to upload the saved model to the HF Hub
    # hub_model_id = "<your-username>/<my-awesome-model>"  # the name of the repository to create on the HF Hub
    # hub_private_repo = None
    overwrite_output_dir = True  # overwrite the old model when re-running the notebook
    seed = 0


config = TrainingConfig()

# 加载数据集
dataset = load_dataset(
    "imagefolder", data_dir=r"xxx/dataset/xxx") # 换成自己的数据集路径
 
# 数据集一共有七万多张图,全部训练太耗时了,选部分出来训练
# 加了shuffle训练时会比不加更慢,原因待查
# dataset['train'] = dataset['train'].shuffle(seed=config.seed).select(range(0, 10000))
dataset['train'] = dataset['train'].select(range(0, 10000))
# fig, axs = plt.subplots(1, 4, figsize=(16, 8))
# for i, image in enumerate(dataset['train'][:4]['image']):
#     axs[i].imshow(image)
#     axs[i].set_axis_off()
# plt.show()

preprocess = transforms.Compose(
    [
        transforms.Resize((config.image_size, config.image_size)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ]
)


def transform(example):
    images = [preprocess(image.convert("RGB")) for image in example["image"]]
    return {"images": images}


dataset.set_transform(transform)

train_dataloader = torch.utils.data.DataLoader(
    dataset['train'], batch_size=config.train_batch_size, shuffle=True)

定义模型:

model = UNet2DModel(
    sample_size=config.image_size,
    in_channels=3,
    out_channels=3,
    layers_per_block=2,
    block_out_channels=(128, 256, 512, 512),
    down_block_types=(
        "DownBlock2D",
        "DownBlock2D",
        "AttnDownBlock2D",
        "DownBlock2D",        
    ),
    up_block_types=(
        "UpBlock2D",
        "AttnUpBlock2D",
        "UpBlock2D",
        "UpBlock2D",
    ),
)
sample_image = dataset["train"][0]["images"].unsqueeze(0)
print("Input shape:", sample_image.shape)
print("Output shape:", model(sample_image, timestep=0).sample.shape)

定义概率公式β,α那些东西

noise_scheduler = DDPMScheduler(
    num_train_timesteps=1000,
)

定义优化器,学习率规则

optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
lr_scheduler = get_cosine_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=config.lr_warmup_steps,
    num_training_steps=config.num_epochs * len(train_dataloader),
)

定义evaluate:

def evaluate(config, epoch, pipeline):
    images = pipeline(
        batch_size=config.eval_batch_size,
        generator=torch.Generator(device="cpu").manual_seed(config.seed),
    ).images
    grid = make_image_grid(images, rows=4, cols=4)
    test_dir = os.path.join(config.output_dir, "samples")
    os.makedirs(test_dir, exist_ok=True)
    grid.save(f"{test_dir}/{epoch:04d}.png")

定义训练过程:

def train_loop(config, model, noise_scheduler, optimizer, train_dataloader, lr_scheduler):
    accelerator = Accelerator(
        mixed_precision=config.mixed_precision,
        gradient_accumulation_steps=config.gradient_accumulation_steps,
        log_with="tensorboard",
        project_dir=os.path.join(config.output_dir, "logs"),
    )
    if accelerator.is_main_process:
        if config.output_dir is not None:
            os.makedirs(config.output_dir, exist_ok=True)
        accelerator.init_trackers("train_example")
    
    model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
        model, optimizer, train_dataloader, lr_scheduler
    )
    
    global_step = 0
    
    for epoch in range(config.num_epochs):
        progress_bar = tqdm(total=len(train_dataloader), disable=not accelerator.is_local_main_process)
        progress_bar.set_description(f"Epoch {epoch}")
        
        for step, batch in enumerate(train_dataloader):
            clean_images = batch["images"]    
            noise = torch.randn(clean_images.shape, device=clean_images.device)
            bs = clean_images.shape[0]
            
            timesteps = torch.randint(
                0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device,
                dtype=torch.int64
            )
            
            noisy_images = noise_scheduler.add_noise(clean_images, noise, timesteps)
            
            with accelerator.accumulate(model):
                noise_pred = model(noisy_images, timesteps, return_dict=False)[0]
                loss = F.mse_loss(noise_pred, noise)
                accelerator.backward(loss)
                
                if accelerator.sync_gradients:
                    accelerator.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
            progress_bar.update(1)
            logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0], "step": global_step}
            progress_bar.set_postfix(**logs)
            accelerator.log(logs, step=global_step)
            global_step += 1
        
        if accelerator.is_main_process:
            pipeline = DDPMPipeline(unet=accelerator.unwrap_model(model), scheduler=noise_scheduler)
            
            if (epoch + 1) % config.save_image_epochs == 0 or epoch == config.num_epochs - 1:
                evaluate(config, epoch, pipeline)
            
            if (epoch + 1) % config.save_model_epochs == 0 or epoch == config.num_epochs - 1:
                pipeline.save_pretrained(config.output_dir)

开始训练:

args = (config, model, noise_scheduler, optimizer, train_dataloader, lr_scheduler)
notebook_launcher(train_loop, args, num_processes=1)  

结果生成:

sample是每10个epoch生成的采样图

第60个epoch采样图效果:

### 如何复现Diffusion Policy Public项目 #### 获取源码和依赖项 为了成功复现Diffusion Policy Public项目,首先需要获取项目的源代码以及设置必要的开发环境。通常这类开源项目会在GitHub或其他版本控制系统上提供详细的安装指南。 对于特定于`diffusion_policy_public`这样的机器学习模型或,建议按照官方文档指示操作。如果存在类似的流程如引用中提到的Polymetis环境配置[^2],那么可以参照如下方式: ```bash git clone https://github.com/user/diffusion_policy_public.git # 假设这是存储地址 cd diffusion_policy_public conda env create -f environment.yml # 使用提供的YAML文件创建Conda虚拟环境 conda activate diffusion-policy-env # 激活新建的环境 ``` #### 准备数据集 大多数深度学习项目都需要准备相应的训练数据集。确保遵循项目说明下载并预处理所需的数据集。这可能涉及到从公开资源下载、转换格式或是清理原始数据等一系列工作。 #### 训练模型 一旦环境搭建完毕并且数据准备好之后,就可以开始执行训练脚本了。一般情况下,会有一个Python脚本来启动整个过程。例如: ```python import torch from diffuser import DiffusionPolicyModel, TrainerConfig config = TrainerConfig( batch_size=32, learning_rate=1e-4, epochs=50 ) model = DiffusionPolicyModel() trainer = Trainer(model=model, config=config) trainer.train() # 开始训练循环 ``` 这段伪代码展示了如何初始化一个假设存在的类`DiffusionPolicyModel`来进行实际训练的过程[^1]。 #### 测试与评估 完成训练后,应该对生成的结果进行全面测试以验证其有效性。可以通过比较不同指标下的表现来衡量模型的好坏,并调整超参数进一步优化性能。 #### 部署应用 最后一步是考虑如何将训练好的模型应用于生产环境中。这里可能会用到云服务提供商比如AWS SageMaker所提供的工具和服务来简化部署流程[^5]。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值