如何理解softmax?
import torch
import math
def softmax(x, dim):
# Get the shape of the input tensor
batch_size, num_channels, height, width = x.shape
# Initialize the output tensor with the same shape as input tensor
softmax_output = torch.zeros_like(x)
if dim == 2: # softmax over height
# Iterate over the batch dimension
for b in range(batch_size):
# Iterate over the channels dimension
for c in range(num_channels):
# Iterate over the width dimension
for w in range(width):
# Extract the column of height for softmax
height_values = x[b, c, :, w]
# Step 1: Compute the exponentials for each value
exp_values = [math.exp(val.item()) for val in height_values]
# Step 2: Compute the sum of exponentials
exp_sum = sum(exp_values)
# Step 3: Normalize by dividing each exponential by the sum
softmax_values = [exp_val / exp_sum for exp_val in exp_values]
# Step 4: Store the softmax values back in the output tensor
for h in range(height):
softmax_output[b, c, h, w] = softmax_values[h]
elif dim == 3: # softmax over width
# Iterate over the batch dimension
for b in range(batch_size):
# Iterate over the channels dimension
for c in range(num_channels):
# Iterate over the height dimension
for h in range(height):
# Extract the row of width for softmax
width_values = x[b, c, h, :]
# Step 1: Compute the exponentials for each value
exp_values = [math.exp(val.item()) for val in width_values]
# Step 2: Compute the sum of exponentials
exp_sum = sum(exp_values)
# Step 3: Normalize by dividing each exponential by the sum
softmax_values = [exp_val / exp_sum for exp_val in exp_values]
# Step 4: Store the softmax values back in the output tensor
for w in range(width):
softmax_output[b, c, h, w] = softmax_values[w]
elif dim == 1: # softmax over channels
# Iterate over the batch dimension
for b in range(batch_size):
# Iterate over the height dimension
for h in range(height):
# Iterate over the width dimension
for w in range(width):
# Extract the column of channels for softmax
channel_values = x[b, :, h, w]
# Step 1: Compute the exponentials for each value
exp_values = [math.exp(val.item()) for val in channel_values]
# Step 2: Compute the sum of exponentials
exp_sum = sum(exp_values)
# Step 3: Normalize by dividing each exponential by the sum
softmax_values = [exp_val / exp_sum for exp_val in exp_values]
# Step 4: Store the softmax values back in the output tensor
for c in range(num_channels):
softmax_output[b, c, h, w] = softmax_values[c]
else:
raise ValueError("Invalid dim value. Please use dim=1, dim=2, or dim=3.")
return softmax_output
# Random tensor with shape (batch, channels, height, width)
batch, channels, height, width = 4, 3, 8, 8
x = torch.randint(-10, 10, (batch, channels, height, width), dtype=torch.float)
# Example: Apply softmax over the height (dim=2)
softmax_x = softmax(x, dim=1)
softmax_layer = torch.nn.Softmax(dim=1)
softmax_outputs = softmax_layer(x)
print(torch.allclose(softmax_x, softmax_outputs, atol=1e-4)) # True
1. 实例
当使用torch.nn.CrossEntropyLoss()与2D输出矩阵和1D标签张量时,输出矩阵的每一行对应于单个样本的原始预测(logits),并且标签张量的每个元素包含相应样本的整数类标签
"""
Model outputs(outputs):
For the first sample: [2.0, -1.0, 0.5]
For the second sample : [-0.5, 1.0, 3.0 ]
target labels [1,2]
Softmax([2.0, -1.0, 0.5]) = [0.832, 0.017, 0.151]
Softmax([-0.5, 1.0, 3.0]) = [0.046, 0.118, 0.836]
Average Loss = (4.08 + 0.18) / 2 ≈ 2.13
"""
2.torch模拟计算交叉熵
outputs矩阵的意义
样本 1: [P(类别1|样本1), P(类别2|样本1), P(类别3|样本1), P(类别4|样本1)]
样本 2: [P(类别1|样本2), P(类别2|样本2), P(类别3|样本2), P(类别4|样本2)]
样本 3: [P(类别1|样本3), P(类别2|样本3), P(类别3|样本3), P(类别4|样本3)]
softmax(dim=1), 沿着第1维度(dim=1:列维度,dim=0:行维度)进行softmax,
沿着同一行不同列之间的值进行softmax
softmax之后,每一行的概率值和为1
import torch
import torch.nn.functional as F
def test2():
# Example outputs (logits) and labels
outputs = torch.tensor([
[ 1.2, -0.5, 0.3, 2.1], # Raw predictions for sample 1
[-0.8, 1.5, 2.3, -1.0], # Raw predictions for sample 2
[ 0.5, -1.0, 1.8, 0.2] # Raw predictions for sample 3
])
target = torch.tensor([2, 1, 3]) # Ground truth labels
# Step 1: Compute softmax probabilities
softmax_layer=torch.nn.Softmax(dim=1)
softmax_outputs=softmax_layer(outputs)
# or softmax_outputs = F.softmax(outputs, dim=1,)
# softmax_outputs_size([3,4])
# Step 2: Extract the predicted probabilities for the target labels
predicted_probs = softmax_outputs[range(len(target)), target]
# predicted_probs_size([3])
# Step 3: Compute the negative log probabilities for the predicted classes
neg_log_probs = -torch.log(predicted_probs)
# neg_log_probs_size([3])
# Step 4: Compute the mean of the negative log probabilities
mean_loss = torch.mean(neg_log_probs)
print(mean_loss.item()) # 1.851070761680603
test2()
3.直接使用torch的函数
import torch
import torch.nn.functional as F
def test1():
# Example outputs (logits) and labels
outputs = torch.tensor([
[ 1.2, -0.5, 0.3, 2.1], # Raw predictions for sample 1
[-0.8, 1.5, 2.3, -1.0], # Raw predictions for sample 2
[ 0.5, -1.0, 1.8, 0.2] # Raw predictions for sample 3
])
target = torch.tensor([2, 1, 3]) # Ground truth labels
criterion=torch.nn.CrossEntropyLoss(reduction="mean")
print(criterion(outputs,target)) # tensor(1.8511)