KLD Loss( tf.nn.softmax, torch.nn.functional.softmax, log_softmax, kl_div) 计算技巧(一)

KLD Loss计算:TensorFlow与PyTorch实现解析
本文介绍了在深度学习中计算Kullback-Leibler散度(KLD)的技巧,涉及TensorFlow和PyTorch的softmax、log_softmax以及kl_div操作。通过二维数据输入示例,详细解释了数据前处理和KLD散度计算的过程,强调了正确使用softmax和log_softmax的重要性,以避免数值计算问题并提高效率。

最近在比较不同模型的性能,发现虽然文献中使用的相同的指标,比如KLD。但是数据的处理方式却存在着差异,这会导致最后的数据并不具有直接可比性。
这里记录下,其中的一些值得记住的细节。主要涉及的API包括tf.nn.softmax, torch.nn.functional.softmax, log_softmax, kl_div

二维数据输入

为直观的看出数据,我们以一个2x2的矩阵为例,并打印。

import cv2
import numpy as np
import torch
import torch.nn.functional as F
import tensorflow as tf


y_pred=np.random.randn(1,2,2)
print( '\t y_pred', y_pred)

y_pred [[[-1.23909949 0.77767204]
[ 0.08646117 -0.14608897]]]

数据前处理

这里开始就有技巧性,由于神经网络的预测输出数值一般为双极性数值。如何将数据进行合理的处理,使其映射到标准空间方便后续计算。
这里我觉得比较合适的操作方法是先进行一组softmax操作,不管输入数据的范围是多少先映射到[0,1]空间。这里的softmax操作就有技巧,我们先看看softmax的API定义

tf.nn.softmax

tf.nn.softmax(
    logits, axis=None, name=None)

其中的axis形参,默认对最后1维度进行softmax操作

The dimension softmax would be performed on. The default is -1 which indicates the last dimension.

参考:https://www.tensorflow.org/api_docs/python/tf/nn/softmax

因此如果我们直接使用softmax操作,得到的是对最后维度,即 [-1.23909949 0.77767204] 和[ 0.08646117 -0.14608897]分别进行softmax操作的结果

y_pred_soft=tf.nn.softmax(y_pred)
print('tf softmax y_pred:', y_pred_soft)

输出

tf softmax y_pred: tf.Tensor(
[[[0.11745323 0.88254677]
[0.55787694 0.44212306]]], shape=(1, 2, 2), dtype=float64)

torch.nn.functional.softmax

对于pytorch的softmax操作

torch.nn.functional.softmax(input, dim=None, _stacklevel=3, dtype=None)
  • input (Tensor) – input
  • dim (int) – A dimension along which softmax will be computed.
  • dtype (torch.dtype, optional) – the desired data type of returned tensor. If specified, the input tensor is casted to dtype before the operation is performed. This is useful for preventing data type overflows. Default: None
    参考:https://pytorch.org/docs/stable/nn.functional.html

因此对应tf的默认操作,这里pytorch应该写成

y_pred = torch.from_numpy(y_pred)
y_pred = F.softmax(y_pred,dim=-1)
print('torch softmax y_pred:', y_pred)

结果

torch softmax y_pred: tensor([[[0.1175, 0.8825],
[0.5579, 0.4421]]], dtype=torch.float64)

但是但是但是,重点!!!
我们希望的softmax应该是对二维数据中所有元素同时进行的softmax,而不是特定在某个维度,因此这里我们需要的操作,是先将所有数据展开成一维后再进行softmax操作

y_pred = y_pred.view(1, -1)
y_pred = F.softmax(y_pred, dim=1)

kld 散度计算

tensorflow

按照计算公式:

import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Sampler from torchvision import datasets, transforms from torchvision.utils import save_image import os import numpy as np from tqdm import tqdm from collections import defaultdict # 超参数设置 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") batch_size = 128 image_size = 64 channels = 1 # 修改为灰度图像 latent_dim = 128 learning_rate = 1e-3 epochs = 130 # 数据加载与预处理(添加灰度转换) transform = transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.Grayscale(num_output_channels=1), # 添加灰度转换 transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)) # 单通道归化 ]) data_dir = r'I:\codes\新' dataset = datasets.ImageFolder(root=data_dir, transform=transform) # 分层采样器(保持不变) class StratifiedSampler(Sampler): def __init__(self, labels, batch_size): self.labels = labels self.batch_size = batch_size self.class_indices = defaultdict(list) for i, label in enumerate(labels): self.class_indices[label].append(i) self.num_classes = len(self.class_indices) self.samples_per_class = batch_size // self.num_classes def __iter__(self): indices = [] for _ in range(len(self.labels) // self.batch_size): for class_idx in range(self.num_classes): indices.extend(np.random.choice(self.class_indices[class_idx], self.samples_per_class, replace=False)) return iter(indices) def __len__(self): return len(self.labels) sampler = StratifiedSampler(dataset.targets, batch_size) dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler) # 编码器(调整输入通道) class Encoder(nn.Module): def __init__(self, channels, latent_dim, num_classes): super(Encoder, self).__init__() self.conv1 = nn.Conv2d(channels, 32, 3, 2, 1) # 输入通道改为1 self.bn1 = nn.BatchNorm2d(32) self.conv2 = nn.Conv2d(32, 64, 3, 2, 1) self.bn2 = nn.BatchNorm2d(64) self.conv3 = nn.Conv2d(64, 128, 3, 2, 1) self.bn3 = nn.BatchNorm2d(128) self.conv4 = nn.Conv2d(128, 256, 3, 2, 1) self.bn4 = nn.BatchNorm2d(256) self.label_emb = nn.Embedding(num_classes, latent_dim) self.fc_mu = nn.Linear(256 * (image_size // 16) * (image_size // 16) + latent_dim, latent_dim) self.fc_logvar = nn.Linear(256 * (image_size // 16) * (image_size // 16) + latent_dim, latent_dim) self.relu = nn.ReLU() def forward(self, x, labels): x = self.relu(self.bn1(self.conv1(x))) x = self.relu(self.bn2(self.conv2(x))) x = self.relu(self.bn3(self.conv3(x))) x = self.relu(self.bn4(self.conv4(x))) x = x.view(x.size(0), -1) label_emb = self.label_emb(labels) x = torch.cat([x, label_emb], dim=1) mu = self.fc_mu(x) logvar = self.fc_logvar(x) return mu, logvar # 解码器(调整输出通道) class Decoder(nn.Module): def __init__(self, channels, latent_dim, num_classes): super(Decoder, self).__init__() self.fc = nn.Linear(latent_dim + latent_dim, 256 * (image_size // 16) * (image_size // 16)) self.upconv1 = nn.ConvTranspose2d(256, 128, 2, 2) self.bn1 = nn.BatchNorm2d(128) self.upconv2 = nn.ConvTranspose2d(128, 64, 2, 2) self.bn2 = nn.BatchNorm2d(64) self.upconv3 = nn.ConvTranspose2d(64, 32, 2, 2) self.bn3 = nn.BatchNorm2d(32) self.upconv4 = nn.ConvTranspose2d(32, channels, 2, 2) # 输出通道改为1 self.relu = nn.ReLU() self.tanh = nn.Tanh() self.label_emb = nn.Embedding(num_classes, latent_dim) def forward(self, z, labels): label_emb = self.label_emb(labels) z = torch.cat([z, label_emb], dim=1) x = self.fc(z) x = x.view(x.size(0), 256, image_size // 16, image_size // 16) x = self.relu(self.bn1(self.upconv1(x))) x = self.relu(self.bn2(self.upconv2(x))) x = self.relu(self.bn3(self.upconv3(x))) x = self.tanh(self.upconv4(x)) return x # VAE主类(保持不变) class VAE(nn.Module): def __init__(self, channels, latent_dim, num_classes): super(VAE, self).__init__() self.encoder = Encoder(channels, latent_dim, num_classes) self.decoder = Decoder(channels, latent_dim, num_classes) def reparameterize(self, mu, logvar): std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) return mu + eps * std def forward(self, x, labels): mu, logvar = self.encoder(x, labels) z = self.reparameterize(mu, logvar) recon_x = self.decoder(z, labels) return recon_x, mu, logvar # 初始化模型、优化器 num_classes = len(dataset.classes) model = VAE(channels, latent_dim, num_classes).to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) # 损失函数(保持不变) def vae_loss(recon_x, x, mu, logvar): recon_loss = nn.functional.mse_loss(recon_x, x, reduction='sum') kld_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return recon_loss, kld_loss, recon_loss + kld_loss # 训练循环(保持不变) with tqdm(total=epochs, desc="Total Training Progress") as pbar_total: for epoch in range(epochs): model.train() epoch_recon_loss = 0.0 epoch_kld_loss = 0.0 epoch_total_loss = 0.0 batch_pbar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}", leave=False) for batch_idx, (images, labels) in enumerate(batch_pbar): images = images.to(device) labels = labels.to(device) optimizer.zero_grad() recon_images, mu, logvar = model(images, labels) recon_loss, kld_loss, total_loss = vae_loss(recon_images, images, mu, logvar) total_loss.backward() optimizer.step() epoch_recon_loss += recon_loss.item() epoch_kld_loss += kld_loss.item() epoch_total_loss += total_loss.item() batch_pbar.set_postfix({ "Recon Loss": f"{recon_loss.item():.2f}", "KLD Loss": f"{kld_loss.item():.2f}", "Total Loss": f"{total_loss.item():.2f}" }) batch_pbar.close() avg_recon = epoch_recon_loss / len(dataloader.dataset) avg_kld = epoch_kld_loss / len(dataloader.dataset) avg_total = epoch_total_loss / len(dataloader.dataset) pbar_total.set_postfix({ "Avg Recon Loss": f"{avg_recon:.2f}", "Avg KLD Loss": f"{avg_kld:.2f}", "Avg Total Loss": f"{avg_total:.2f}" }) pbar_total.update(1) # 样本生成与保存(优化为直接保存灰度图) model.eval() output_dir = r'I:\codes\vae(8.14)4' num_samples_per_class = 700 classes = dataset.classes num_classes = len(classes) # 创建类别文件夹 for class_name in classes: class_dir = os.path.join(output_dir, class_name) os.makedirs(class_dir, exist_ok=True) with torch.no_grad(): for class_idx in tqdm(range(num_classes), desc="Generating samples for all classes"): class_name = classes[class_idx] class_dir = os.path.join(output_dir, class_name) generated_count = 0 class_pbar = tqdm(total=num_samples_per_class, desc=f"Generating samples for {class_name}", leave=False) while generated_count < num_samples_per_class: batch_size_gen = min(num_samples_per_class - generated_count, batch_size) z = torch.randn(batch_size_gen, latent_dim).to(device) labels = torch.full((batch_size_gen,), class_idx, dtype=torch.long).to(device) samples = model.decoder(z, labels) # (batch_size_gen, 1, 64, 64) # 反归化并保存 samples = samples * 0.5 + 0.5 # 将[-1,1]转为[0,1] for i in range(samples.size(0)): sample = samples[i] # (1, 64, 64) save_path = os.path.join(class_dir, f'generated_{generated_count}.png') save_image(sample, save_path) # 直接保存单通道灰度图 generated_count += 1 class_pbar.update(1) class_pbar.close() print("灰度样本生成并保存完成。") 优化下此模型,使得生成的样本质量更高,细节和训练图像更致,生成完整代码
08-15
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值