MultiscaleDiscriminator的一点理解

其实从名字就能大概猜出来应该是对输入的img做了多层特征的判别,也就是说传统的discriminator是对一张image做判别,但是Multiscale是多个传统discriminator的叠加。比如Multiscale中的第一个D是用来判别输入img的真假,第二个D是判别输入img经过下采样后的真假,以此类推。。。
没有看论文直接看了代码,如果理解有问题希望多多指教。
下面是代码时间

import torch.nn as nn
import numpy as np


class NLayerDiscriminator(nn.Module):
    def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, use_sigmoid=False, getIntermFeat=False):
        super(NLayerDiscriminator, self).__init__()
        self.getIntermFeat = getIntermFeat
        self.n_layers = n_layers

        kw = 4
        padw = int(np.ceil((kw-1.0)/2))
        sequence = [[nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]]

        nf = ndf
        for n in range(1, n_layers):
            nf_prev = nf
            nf = min(nf * 2, 512)
            sequence += [[
                nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=2, padding=padw),
                norm_layer(nf), nn.LeakyReLU(0.2, True)
            ]]

        nf_prev = nf
        nf = min(nf * 2, 512)
        sequence += [[
            nn.Conv2d(nf_prev, nf, kernel_size=kw, stride=1, padding=padw),
            norm_layer(nf),
            nn.LeakyReLU(0.2, True)
        ]]

        sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]]

        if use_sigmoid:
            sequence += [[nn.Sigmoid()]]

        if getIntermFeat:
            for n in range(len(sequence)):
                setattr(self, 'model'+str(n), nn.Sequential(*sequence[n]))
        else:
            sequence_stream = []
            for n in range(len(sequence)):
                sequence_stream += sequence[n]
            self.model = nn.Sequential(*sequence_stream)

    def forward(self, input):
        if self.getIntermFeat:
            res = [input]
            for n in range(self.n_layers+2):
                model = getattr(self, 'model'+str(n))
                res.append(model(res[-1]))
            return res[1:]
        else:
            return self.model(input)


class MultiscaleDiscriminator(nn.Module):
    def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d,
                 use_sigmoid=False, num_D=3, getIntermFeat=False):
        super(MultiscaleDiscriminator, self).__init__()
        self.num_D = num_D
        self.n_layers = n_layers
        self.getIntermFeat = getIntermFeat

        for i in range(num_D):
            netD = NLayerDiscriminator(input_nc, ndf, n_layers, norm_layer, use_sigmoid, getIntermFeat)
            if getIntermFeat:
                for j in range(n_layers + 2):
                    setattr(self, 'scale' + str(i) + '_layer' + str(j), getattr(netD, 'model' + str(j)))
            else:
                setattr(self, 'layer' + str(i), netD.model)

        self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)

    def singleD_forward(self, model, input):
        if self.getIntermFeat:
            result = [input]
            for i in range(len(model)):
                result.append(model[i](result[-1]))
            return result[1:]
        else:
            return [model(input)]

    def forward(self, input):
        num_D = self.num_D
        result = []
        input_downsampled = input
        for i in range(num_D):
            if self.getIntermFeat:
                model = [getattr(self, 'scale' + str(num_D - 1 - i) + '_layer' + str(j)) for j in
                         range(self.n_layers + 2)]
            else:
                model = getattr(self, 'layer' + str(num_D - 1 - i))
            result.append(self.singleD_forward(model, input_downsampled))
            if i != (num_D - 1):
                input_downsampled = self.downsample(input_downsampled)
        return result
model.py中:class MultiScaleDiscriminator(nn.Module): def __init__(self, input_channels=3, num_scales=3): super().__init__() self.num_scales = num_scales self.discriminators = nn.ModuleList() # 创建不同尺度的判别器 for i in range(num_scales): disc = NLayerDiscriminator( input_channels=input_channels, n_layers=4, use_sn=True # 启用谱归一化 ) self.discriminators.append(disc) # 下采样层用于多尺度处理 self.downsample = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False) def forward(self, x): outputs = [] for i in range(self.num_scales): # 逐级下采样输入图像 if i > 0: x = self.downsample(x) outputs.append(self.discriminators[i](x)) return outputs # 返回各尺度判别结果列表 class NLayerDiscriminator(nn.Module): def __init__(self, input_channels=3, n_layers=4, use_sn=True): super().__init__() kernel_size = 4 padding = 1 sequence = [] # 初始卷积层 sequence += [ self._make_conv_block(input_channels, 64, kernel_size, 2, padding, use_sn, first_layer=True) ] # 中间卷积层 filter_mult = 1 for n in range(1, n_layers): filter_mult_prev = filter_mult filter_mult = min(2**n, 8) sequence += [ self._make_conv_block(64*filter_mult_prev, 64*filter_mult, kernel_size, 2, padding, use_sn) ] # 最终卷积层 filter_mult_prev = filter_mult sequence += [ self._make_conv_block(64*filter_mult_prev, 64*filter_mult, kernel_size, 1, padding, use_sn) ] # 输出层 sequence += [nn.Conv2d(64*filter_mult, 1, kernel_size, 1, padding)] self.model = nn.Sequential(*sequence) def _make_conv_block(self, in_c, out_c, kernel, stride, pad, use_sn, first_layer=False): layers = [] # 卷积层 conv = nn.Conv2d(in_c, out_c, kernel_size=kernel, stride=stride, padding=pad) if use_sn: conv = spectral_norm(conv) # 谱归一化 layers.append(conv) if not first_layer: layers.append(nn.InstanceNorm2d(out_c)) layers.append(nn.LeakyReLU(0.2, inplace=True)) return nn.Sequential(*layers) def forward(self, x): return self.model(x) train.py中:real_A = batch["A"].cuda() real_B = batch["B"].cuda() # real_A = Variable(batch["A"]).cuda() ## 真图像A # real_B = Variable(batch["B"]).cuda() ## 真图像B #触发前向传播以初始化output_shape with torch.no_grad(): _=D_A(real_A) ## 全真,全假的标签 valid = torch.ones( (real_A.size(0), *D_A.output_shape), device=real_A.device,requires_grad=False ) fake = torch.zeros( (real_A.size(0), *D_A.output_shape), device=real_A.device,requires_grad=False ) 出现报错:'MultiScaleDiscriminator' object has no attribute 'output_shape' 怎么解决
03-11
### Pix2PixHD源码实现解析 #### 多尺度判别器定义 在Pix2PixHD中,多尺度判别器的设计是为了更好地处理高分辨率图像。对于生成特定尺寸的图像,如1024x2048像素,需采用三个不同的判别器;而对于较小尺寸的图像,比如512x1024,则只需两个即可满足需求[^2]。 ```python def define_D(input_nc, ndf, n_layers_D, norm='instance', use_sigmoid=False, num_D=3, getIntermFeat=True): """ 构建一个多尺度判别网络MultiscaleDiscriminator实例 参数: input_nc (int): 输入通道数. ndf (int): 判别器的第一层特征图数量. n_layers_D (int): 卷积层数量. norm (str): 归一化方式 ('batch' | 'instance'). use_sigmoid (bool): 是否使用sigmoid激活函数. num_D (int): 使用多少个多尺度判别子网. getIntermFeat (bool): 返回中间特征还是仅最终输出. 返回: MultiscaleDiscriminator对象 """ norm_layer = get_norm_layer(norm_type=norm) netD = MultiscaleDiscriminator( input_nc=input_nc, ndf=ndf, n_layers=n_layers_D, norm_layer=norm_layer, use_sigmoid=use_sigmoid, num_D=num_D, getIntermFeat=getIntermFeat ) return init_net(netD) ``` 此部分代码展示了如何创建`MultiscaleDiscriminator`类的一个实例,并初始化它作为返回值。通过调整参数可以控制具体使用的判别器数目以及其他配置选项。 #### 编码特征脚本示例 为了准备训练数据集,在实际操作过程中可能会用到如下命令来编码城市景观标签映射至对应的Cityscapes数据集中: ```bash python3 encode_features.py \ --name label2city_1024p_feat \ --netG local \ --ngf 32 \ --resize_or_crop none; ``` 上述指令调用了名为`encode_features.py`的Python脚本来执行特征编码工作,其中设置了若干必要的参数用于指定任务名称、生成器架构(`local`)及其他设置[^5]。
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值