```markdown
# 代码概述
在原有完整、优化的 CNN 实现基础上,将**记录指标的频率从每100步改为每500步一次**。保持使用增强型网络结构、L2正则化、GPU加速和最终可视化功能,仅调整监控频率以减少计算开销并聚焦长期趋势。
---
# 代码解析
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import matplotlib.pyplot as plt
# ====================== 1. 超参数设置 ======================
EPOCHS = 3
BATCH_SIZE = 64
LR = 0.001
DOWNLOAD_MNIST = True
TEST_SIZE = 1000 # 测试样本数量
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")
# ====================== 2. 数据预处理 ======================
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# 加载数据集
train_data = torchvision.datasets.MNIST(
root='./mnist/',
train=True,
transform=transform,
download=DOWNLOAD_MNIST
)
test_data = torchvision.datasets.MNIST(
root='./mnist/',
train=False,
transform=transform
)
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=TEST_SIZE, shuffle=False)
# 获取一批测试数据(一次性)
test_x, test_y = next(iter(test_loader))
test_x, test_y = test_x.to(DEVICE), test_y.to(DEVICE)
# ====================== 3. 定义增强型 CNN 模型 ======================
class EnhancedCNN(nn.Module):
def __init__(self):
super(EnhancedCNN, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Dropout2d(0.25)
)
self.fc = nn.Sequential(
nn.Linear(64 * 7 * 7, 128),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(128, 10)
)
def forward(self, x):
x = self.conv(x)
x = x.view(x.size(0), -1)
return self.fc(x)
# 初始化模型与优化器(启用 L2 正则化)
cnn = EnhancedCNN().to(DEVICE)
optimizer = optim.Adam(cnn.parameters(), lr=LR, weight_decay=0.001) # L2 正则
loss_func = nn.CrossEntropyLoss()
# ====================== 4. 训练过程:每 500 步记录一次指标(无打印)======================
steps = []
losses = []
accuracies = []
# 初始状态评估
with torch.no_grad():
output = cnn(test_x)
acc = (torch.max(output, 1)[1] == test_y).float().mean().item()
steps.append(0)
losses.append(0.0)
accuracies.append(acc)
for epoch in range(EPOCHS):
for step, (b_x, b_y) in enumerate(train_loader):
b_x, b_y = b_x.to(DEVICE), b_y.to(DEVICE)
# 前向传播
output = cnn(b_x)
loss = loss_func(output, b_y)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ✅ 修改:每 500 步记录一次(跳过第0步重复)
if step % 500 == 0 and step > 0:
with torch.no_grad():
test_output = cnn(test_x)
pred_y = torch.max(test_output, 1)[1]
accuracy = (pred_y == test_y).float().mean().item()
global_step = epoch * len(train_loader) + step
steps.append(global_step)
losses.append(loss.item())
accuracies.append(accuracy)
# ====================== 5. 可视化精确度与损失曲线 ======================
plt.figure(figsize=(12, 5))
# 子图1:测试准确率
plt.subplot(1, 2, 1)
plt.plot(steps, accuracies, marker='o', color='blue')
plt.title('Test Accuracy vs Training Steps')
plt.xlabel('Training Step')
plt.ylabel('Accuracy')
plt.ylim(0.8, 1.0)
plt.grid(True, linestyle='--', alpha=0.5)
# 子图2:训练损失
plt.subplot(1, 2, 2)
plt.plot(steps[1:], losses[1:], marker='s', color='red') # 排除初始值
plt.title('Training Loss vs Steps')
plt.xlabel('Training Step')
plt.ylabel('Loss')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
# ====================== 6. 可视化 16 张预测结果 ======================
with torch.no_grad():
pred_outputs = cnn(test_x[:16])
predicted_labels = torch.max(pred_outputs, 1)[1].cpu().numpy()
plt.figure(figsize=(10, 10))
for i in range(16):
plt.subplot(4, 4, i + 1)
plt.imshow(test_x[i].squeeze().cpu().numpy(), cmap='gray')
plt.title(f'Predicted: {predicted_labels[i]}', fontsize=12, color='green')
plt.axis('off')
plt.suptitle('CNN Predictions on MNIST Test Images', fontsize=16)
plt.tight_layout()
plt.show()
```
> ✅ **关键更新说明**:
> - 将 `if step % 100 == 0` 改为 `if step % 500 == 0`,大幅降低评估频率;
> - 更适合长周期训练观察趋势,减少冗余计算;
> - 特别适用于大模型或资源受限环境。
---
# 知识点
- **Dropout 技术**:训练时随机屏蔽神经元,防止共适应,提升泛化能力。
- **L2 正则化**:通过惩罚大权重减少过拟合,使模型更平滑。
- **深度卷积网络设计**:堆叠多层卷积可提取抽象特征,提升分类性能。
```