从零使用GAN(生成对抗网络)进行图像生成

本文档介绍了如何使用DCGAN(深度卷积生成对抗网络)模型在自建的人脸嘴巴区域微笑表情数据集上进行训练。首先,数据集被加载并预处理,然后定义了生成器和判别器模型,接着初始化模型并开始训练。在训练过程中,记录了损失函数的变化,并定期保存生成器的输出以展示训练进展。最后,展示了训练过程中的真实图像和生成的假图像,以及训练过程中生成器和判别器的损失曲线。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

前言

本项目使用 DCGAN 模型,在自建数据集上进行实验。

本项目使用的数据集是人脸嘴巴区域——微笑表情的数据集

数据集文件夹结构如下,图片供 4357 张

├─mouth
│  └─smile
     ├─1smile.jpg
     ├─2smile.jpg
     ├─3smile.jpg
     └─....  

同时,创建一个 out 文件夹来保存训练的中间结果,主要就是看 DCGAN 是如何从一张噪声照片生成我们期待的图片

import os
import time
if os.path.exists("out"):
    print("移除现有 out 文件夹!")
    os.system("rm -r ./out")
time.sleep(1)
print("创建 out 文件夹!")
os.mkdir("./out")

移除现有 out 文件夹!
创建 out 文件夹!
下方链接为该数据集压缩包,需要者自取:数据集
运行下面代码,对数据集进行解压。
由于图片数量多,解压需要一定时间

!unzip mouth.zip -d ./mouth
print("解压完毕!")

导入所需包

from __future__ import print_function
#%matplotlib inline
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML


os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

基本参数配置

# 设置一个随机种子,方便进行可重复性实验
manualSeed = 999
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

# 数据集所在路径
dataroot = "mouth/"
# 数据加载的进程数
workers = 0
# Batch size 大小
batch_size = 64
# Spatial size of training images. All images will be resized to this
# size using a transformer.
# 图片大小
image_size = 64

# 图片的通道数
nc = 3
# Size of z latent vector (i.e. size of generator input)
nz = 100
# Size of feature maps in generator
ngf = 64
# Size of feature maps in discriminator
ndf = 64
# Number of training epochs
num_epochs = 10
# Learning rate for optimizers
lr = 0.0003
# Beta1 hyperparam for Adam optimizers
beta1 = 0.5
# Number of GPUs available. Use 0 for CPU mode.
ngpu = 1

# Decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

导入数据集


# We can use an image folder dataset the way we have it setup.
# Create the dataset
dataset = dset.ImageFolder(root=dataroot,
                           transform=transforms.Compose([
                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ]))
# Create the dataloader
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)

简单看一下我们的原始数据集长啥样

# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))
# plt.show()

<matplotlib.image.AxesImage at 0x7f67b59d9cf8>
在这里插入图片描述

定义生成器与判别器

# 权重初始化函数,为生成器和判别器模型初始化
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

# Generator Code
class Generator(nn.Module):
    def __init__(self, ngpu):
        super(Generator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. (ngf*4) x 8 x 8
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. (ngf*2) x 16 x 16
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. (ngf) x 32 x 32
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. (nc) x 64 x 64
        )

    def forward(self, input):
        return self.main(input)


class Discriminator(nn.Module):
    def __init__(self, ngpu):
        super(Discriminator, self).__init__()
        self.ngpu = ngpu
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf) x 32 x 32
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*2) x 16 x 16
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*4) x 8 x 8
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. (ndf*8) x 4 x 4
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

初始化生成器和判别器

# Create the generator
netG = Generator(ngpu).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    netG = nn.DataParallel(netG, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
# to mean=0, stdev=0.2.
netG.apply(weights_init)
# Print the model
print(netG)

# Create the Discriminator
netD = Discriminator(ngpu).to(device)

# Handle multi-gpu if desired
if (device.type == 'cuda') and (ngpu > 1):
    netD = nn.DataParallel(netD, list(range(ngpu)))

# Apply the weights_init function to randomly initialize all weights
#  to mean=0, stdev=0.2.
netD.apply(weights_init)

# Print the model
print(netD)


定义损失函数

# Initialize BCELoss function
criterion = nn.BCELoss()

开始训练

# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# Establish convention for real and fake labels during training
real_label = 1.0
fake_label = 0.0

# Setup Adam optimizers for both G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

# Training Loop

# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0

print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
    import time
    start = time.time()
    # For each batch in the dataloader
    for i, data in enumerate(dataloader, 0):

        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        # Format batch
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Add the gradients from the all-real and all-fake batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())

        # Check how the generator is doing by saving G's output on fixed_noise
        if (iters % 20 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):

            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu()

            img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
            i = vutils.make_grid(fake, padding=2, normalize=True)
            fig = plt.figure(figsize=(8, 8))
            plt.imshow(np.transpose(i, (1, 2, 0)))
            plt.axis('off')  # 关闭坐标轴
            plt.savefig("out/%d_%d.png" % (epoch, iters))
            plt.close(fig)
        iters += 1
    print('time:', time.time() - start)

绘制损失曲线

plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(G_losses,label="G")
plt.plot(D_losses,label="D")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()


在这里插入图片描述

真假对比

# Grab a batch of real images from the dataloader
# real_batch = next(iter(dataloader))

# Plot the real images
plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=5, normalize=True).cpu(),(1,2,0)))

# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis("off")
plt.title("Fake Images")
plt.imshow(np.transpose(img_list[-1],(1,2,0)))
plt.show()

在这里插入图片描述

### 缺陷检测中使用GAN生成图片的方法 在缺陷检测领域,利用生成对抗网络GAN)来增强数据集并改进模型性能是一种有效方法。通过训练GAN生成类似于正常表面图像的伪图像,可以增加可用的数据量,从而改善后续分类器的表现[^1]。 #### 训练过程 为了实现这一目标,通常采用两阶段策略: - **第一阶段**:构建一个能够区分真实与伪造样本的强大判别器。这一步骤对于确保生成器能创建逼真的合成图像是至关重要的。 - **第二阶段**:优化生成器参数直至其产生的输出难以被上述经过良好调校后的判别器识别为假象。此时,生成器已经学会了捕捉到输入分布的关键特性,并能够在潜在特征空间内再现这些模式。 一旦完成以上两个阶段的工作,则可获得一组高质量的人造正面样例集合。该集合不仅有助于缓解因实际生产环境中正负类别比例失衡而导致的问题——即存在大量正常的而异常情况相对较少的情况[^4]——而且还可以作为额外资源辅助其他计算机视觉任务如分类、定位等。 #### 应用实例 具体来说,在工业产品质量控制方面,研究者们提出了基于卷积神经网络(CNN)快速稳健的产品缺陷探测框架。此方案结合了前述提到过的GAN技术,用来扩充有限数量的真实瑕疵照片库。实验表明这种方法显著提高了最终系统的准确性以及鲁棒性[^3]。 ```python import torch from torchvision import datasets, transforms from torch.utils.data import DataLoader from models.gan import Generator, Discriminator # 假设这是自定义模块路径 def train_gan(normal_images_path='path/to/normal/images'): transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]) ]) dataset = datasets.ImageFolder(root=normal_images_path, transform=transform) dataloader = DataLoader(dataset, batch_size=64, shuffle=True) generator = Generator() discriminator = Discriminator() criterion = nn.BCELoss() # Binary Cross Entropy Loss optimizer_G = optim.Adam(generator.parameters(), lr=0.0002) optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002) for epoch in range(num_epochs): for i, (imgs, _) in enumerate(dataloader): valid = Variable(Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False) fake = Variable(Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False) real_imgs = Variable(imgs.type(Tensor)) # Train Generator optimizer_G.zero_grad() z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], latent_dim)))) gen_imgs = generator(z) g_loss = criterion(discriminator(gen_imgs), valid) g_loss.backward() optimizer_G.step() # Train Discriminator optimizer_D.zero_grad() real_loss = criterion(discriminator(real_imgs), valid) fake_loss = criterion(discriminator(gen_imgs.detach()), fake) d_loss = (real_loss + fake_loss) / 2 d_loss.backward() optimizer_D.step() if __name__ == '__main__': train_gan('data/normal_surfaces') ```
评论 48
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

hyk今天写算法了吗

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值