"""
lenet_full_experiment.py
实验内容:基于LeNet实现手写数字识别(单文件完整版)
本文件包含所有模块(数据加载 → 自写卷积 → LeNet → 训练 → 测试 → 可视化)
此为【子任务 1:数据加载 + 预处理 + 划分 + 可视化】部分
"""
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, random_split, Subset
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
# ============================================================
# 工具函数:固定随机种子
# ============================================================
def set_seed(seed=2025):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# ============================================================
# 构建训练集、验证集、测试集使用的 transform
# ============================================================
def build_transforms(augment=False):
"""
augment=True:训练集加入轻微数据增强
augment=False:训练集和验证集一致
"""
eval_transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
if augment:
train_transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.RandomAffine(degrees=10, translate=(0.08, 0.08)),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
else:
train_transform = eval_transform
return train_transform, eval_transform
# ============================================================
# 保存 / 加载 数据划分索引(保证可复现)
# ============================================================
def save_splits(path, idx_train, idx_val, idx_test):
np.savez_compressed(path, train=np.array(idx_train), val=np.array(idx_val), test=np.array(idx_test))
print(f"[INFO] 已保存数据划分到 {path}")
def load_splits(path):
if not os.path.exists(path):
return None
arr = np.load(path)
print(f"[INFO] 已从 {path} 加载已有划分")
return {
"train": arr["train"].tolist(),
"val": arr["val"].tolist(),
"test": arr["test"].tolist()
}
# ============================================================
# 子任务 1 核心:加载 MNIST + 划分 + 可视化
# ============================================================
def load_dataset(batch_size=64, use_mini=False, augment=False, split_path="mnist_split.npz"):
"""
use_mini=True:使用迷你版本数据(6000 / 1500 / 2000)
"""
set_seed()
# ---------- 1. 构建 transform ----------
train_tf, eval_tf = build_transforms(augment)
# ---------- 2. 加载 MNIST ----------
full_train = MNIST(root="./mnist_data", train=True, download=True, transform=train_tf)
full_train_eval = MNIST(root="./mnist_data", train=True, transform=eval_tf)
full_test = MNIST(root="./mnist_data", train=False, download=True, transform=eval_tf)
# ---------- 3. 使用已有划分 ----------
saved = load_splits(split_path) if os.path.exists(split_path) else None
if use_mini:
# 使用较小数据(更快)
if saved is None or len(saved["train"]) < 6000:
print("[INFO] 正在构建 Mini 数据集划分 ...")
pool_idx = list(range(len(full_train)))
random.shuffle(pool_idx)
idx_train = pool_idx[:6000]
idx_val = pool_idx[6000:6000 + 1500]
idx_test = list(range(2000))
save_splits(split_path, idx_train, idx_val, idx_test)
else:
idx_train, idx_val, idx_test = saved["train"], saved["val"], saved["test"]
else:
# 原始划分(48000 / 12000 / 10000)
if saved is None or len(saved["train"]) < 48000:
print("[INFO] 正在构建 Full 数据集划分 ...")
all_idx = list(range(len(full_train)))
random.shuffle(all_idx)
idx_train = all_idx[:48000]
idx_val = all_idx[48000:48000 + 12000]
idx_test = list(range(len(full_test)))
save_splits(split_path, idx_train, idx_val, idx_test)
else:
idx_train, idx_val, idx_test = saved["train"], saved["val"], saved["test"]
# ---------- 4. 子集构建 ----------
train_subset = Subset(full_train, idx_train)
val_subset = Subset(full_train_eval, idx_val)
test_subset = Subset(full_test, idx_test)
# ---------- 5. DataLoader ----------
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False)
print("==== 数据加载完成 ====")
print(f"Train: {len(train_subset)}")
print(f"Val: {len(val_subset)}")
print(f"Test: {len(test_subset)}")
# ---------- 6. 可视化 ----------
visualize_samples(train_subset, title="训练集样例")
visualize_samples(val_subset, title="验证集样例")
visualize_samples(test_subset, title="测试集样例")
return train_loader, val_loader, test_loader
# ============================================================
# 可视化:展示样例图片(6 张)
# ============================================================
def visualize_samples(dataset, num=6, title="Samples"):
plt.figure(figsize=(10, 3))
indices = random.sample(range(len(dataset)), num)
for i, idx in enumerate(indices):
img, label = dataset[idx]
img_show = img * 0.5 + 0.5
img_show = img_show.numpy().squeeze()
plt.subplot(1, num, i + 1)
plt.imshow(img_show, cmap="gray")
plt.title(f"{label}")
plt.axis("off")
plt.suptitle(title)
plt.show()
# ============================================================
# 子任务 2:手写 im2col 自定义卷积层(CustomConv2d)
# ============================================================
import torch
import torch.nn as nn
class CustomConv2d(nn.Module):
"""
基于 im2col 的自定义卷积层(仅实现 forward)
目的:理解卷积 padding、局部感受野、权值共享、矩阵展开等底层机制
"""
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
super(CustomConv2d, self).__init__()
# 支持 kernel_size 为 int 或 tuple
if isinstance(kernel_size, int):
kernel_size = (kernel_size, kernel_size)
self.in_channels = in_channels
self.out_channels = out_channels
self.kh, self.kw = kernel_size
self.stride = stride
self.padding = padding
# 卷积核参数(out_channels, in_channels, kh, kw)
self.weight = nn.Parameter(
torch.randn(out_channels, in_channels, self.kh, self.kw) * 0.01
)
# 是否添加偏置
if bias:
self.bias = nn.Parameter(torch.zeros(out_channels))
else:
self.bias = None
# -------------------------------
# 手写 padding
# -------------------------------
def _pad_input(self, x):
if self.padding == 0:
return x
return nn.functional.pad(x, (self.padding, self.padding, self.padding, self.padding))
# -------------------------------
# im2col 实现:将每个卷积窗口展开
# 输出形状:(N, C*kh*kw, L) 其中 L 为滑动窗口数量
# -------------------------------
def _im2col(self, x):
N, C, H, W = x.shape
out_h = (H - self.kh) // self.stride + 1
out_w = (W - self.kw) // self.stride + 1
cols = torch.zeros(N, C * self.kh * self.kw, out_h * out_w, device=x.device)
col_idx = 0
for i in range(0, H - self.kh + 1, self.stride):
for j in range(0, W - self.kw + 1, self.stride):
patch = x[:, :, i:i + self.kh, j:j + self.kw] # (N, C, kh, kw)
cols[:, :, col_idx] = patch.reshape(N, -1)
col_idx += 1
return cols # (N, C*kh*kw, L)
# -------------------------------
# 前向计算:基于矩阵乘法实现卷积
# -------------------------------
def forward(self, x):
# Step 1: padding
x = self._pad_input(x)
# Step 2: im2col 展开
cols = self._im2col(x) # (N, C*kh*kw, L)
# Step 3: 卷积核展开
# weight: (out_channels, in_channels, kh, kw)
w = self.weight.reshape(self.out_channels, -1) # (out_c, C*kh*kw)
# Step 4: 矩阵乘法(批量)
# cols: (N, C*kh*kw, L)
# w.T: (C*kh*kw, out_c)
out = torch.matmul(w, cols) # (N, out_c, L) 错误顺序?要交换
# 修正:确保 w 乘 columns 维度一致
# 正确为: out[n] = (w @ cols[n]) → (out_c, L)
outputs = []
for n in range(cols.size(0)):
r = torch.matmul(w, cols[n]) # (out_channels, L)
outputs.append(r.unsqueeze(0))
out = torch.cat(outputs, dim=0) # (N, out_channels, L)
# Step 5: reshape 回 feature map
N, _, L = out.shape
x_h = (x.shape[2] - self.kh) // self.stride + 1
x_w = (x.shape[3] - self.kw) // self.stride + 1
out = out.reshape(N, self.out_channels, x_h, x_w)
# Step 6: 加偏置
if self.bias is not None:
out = out + self.bias.view(1, -1, 1, 1)
return out
# ============================================================
# 自定义卷积层正确性验证(用于实验报告截图)
# ============================================================
def test_custom_conv():
print("========= 自定义卷积层功能测试 =========")
x = torch.randn(1, 1, 6, 6)
custom_conv = CustomConv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)
torch_conv = nn.Conv2d(1, 1, 3, padding=1)
torch_conv.weight.data = custom_conv.weight.data.clone()
torch_conv.bias.data = custom_conv.bias.data.clone()
out_custom = custom_conv(x)
out_torch = torch_conv(x)
print("自写输出:\n", out_custom)
print("PyTorch输出:\n", out_torch)
print("差值:\n", (out_custom - out_torch).abs().mean())
print("========= 测试结束(可截图写入报告)=========")
# ============================================================
# 子任务 3(增强版):LeNet-5 原始结构实现 + 输出测试信息
# ============================================================
class LeNet5(nn.Module):
"""
1998 原始 LeNet-5 网络结构复现:
C1: Conv(1→6, 5x5) → tanh
S2: AvgPool 2×2
C3: Conv(6→16, 5x5) → tanh
S4: AvgPool 2×2
FC: 400→120→84→10
"""
def __init__(self, use_custom_conv=False):
super(LeNet5, self).__init__()
Conv = CustomConv2d if use_custom_conv else nn.Conv2d
self.conv1 = Conv(1, 6, kernel_size=5, stride=1, padding=0)
self.pool1 = nn.AvgPool2d(2, 2)
self.conv2 = Conv(6, 16, kernel_size=5, stride=1, padding=0)
self.pool2 = nn.AvgPool2d(2, 2)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.tanh = nn.Tanh()
def forward(self, x, verbose=False):
if verbose:
print("\n==== LeNet-5 前向传播逐层输出形状 ====")
if verbose: print(f"输入: {x.shape}")
x = self.tanh(self.conv1(x))
if verbose: print(f"C1 卷积后: {x.shape}")
x = self.pool1(x)
if verbose: print(f"S2 平均池化后: {x.shape}")
x = self.tanh(self.conv2(x))
if verbose: print(f"C3 卷积后: {x.shape}")
x = self.pool2(x)
if verbose: print(f"S4 平均池化后: {x.shape}")
x = x.view(x.size(0), -1)
if verbose: print(f"展平后: {x.shape}")
x = self.tanh(self.fc1(x))
if verbose: print(f"FC1 后: {x.shape}")
x = self.tanh(self.fc2(x))
if verbose: print(f"FC2 后: {x.shape}")
x = self.fc3(x)
if verbose: print(f"输出 logits: {x.shape}")
return x
# ============================================================
# 打印模型信息、参数量、以及逐层输出(可截图)
# ============================================================
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def test_lenet_verbose():
print("\n===================== LeNet-5 模型结构测试 =====================")
model = LeNet5(use_custom_conv=False)
print(model)
print("\n总参数量:", count_parameters(model))
# 构造假数据
x = torch.randn(1, 1, 32, 32)
print("\n输入张量形状:", x.shape)
# 启用 verbose 模式打印每一层输出
out = model(x, verbose=True)
print("\n最终输出 logits:", out)
print("最终输出形状:", out.shape)
print("\n===================== 测试结束(可截图写入报告) =====================\n")
# ============================================================
# 子任务 4:LeNet-5 训练流程(训练 + 验证)
# ============================================================
def train_lenet(
model,
train_loader,
val_loader,
device="cpu",
epochs=12,
lr=0.01
):
"""
LeNet-5 标准训练流程:
- 优化器:SGD + Momentum(0.9)
- Loss:CrossEntropyLoss
- 每轮输出训练/验证准确率
- 返回训练记录用于绘图(任务 5)
"""
print("\n===================== 开始训练 LeNet-5 =====================")
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
# 保存训练/验证曲线
history = {
"train_loss": [],
"val_loss": [],
"train_acc": [],
"val_acc": []
}
for epoch in range(1, epochs + 1):
# ----------------------------------------
# TRAIN 训练阶段
# ----------------------------------------
model.train()
running_loss = 0.0
correct = 0
total = 0
for imgs, labels in train_loader:
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# 累加统计
running_loss += loss.item() * imgs.size(0)
_, pred = outputs.max(1)
total += labels.size(0)
correct += pred.eq(labels).sum().item()
train_loss = running_loss / len(train_loader.dataset)
train_acc = correct / total
# ----------------------------------------
# VAL 验证阶段(不更新参数)
# ----------------------------------------
model.eval()
val_loss_total = 0.0
val_correct = 0
val_total = 0
with torch.no_grad():
for imgs, labels in val_loader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
val_loss_total += loss.item() * imgs.size(0)
_, pred = outputs.max(1)
val_total += labels.size(0)
val_correct += pred.eq(labels).sum().item()
val_loss = val_loss_total / len(val_loader.dataset)
val_acc = val_correct / val_total
# 记录
history["train_loss"].append(train_loss)
history["val_loss"].append(val_loss)
history["train_acc"].append(train_acc)
history["val_acc"].append(val_acc)
# ----------------------------------------
# PRINT 输出(可截图)
# ----------------------------------------
print(f"Epoch [{epoch}/{epochs}] "
f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% "
f"|| Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.2f}%")
print("===================== 训练结束 =====================\n")
return model, history
# ============================================================
# 子任务 4:便捷运行函数(可直接执行)
# ============================================================
def run_training_task4():
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[INFO] 使用设备:{device}")
# 使用子任务 1 的数据加载函数
train_loader, val_loader, test_loader = load_dataset(
batch_size=64,
use_mini=True, # ← 使用 mini 数据集(更快)
augment=False # 不用增强,和论文一致
)
# 使用子任务 3 的网络
model = LeNet5(use_custom_conv=False)
# 执行训练
model, history = train_lenet(
model,
train_loader,
val_loader,
device=device,
epochs=12,
lr=0.01
)
print("\n训练准确率记录:", history["train_acc"])
print("验证准确率记录:", history["val_acc"])
return model, history, test_loader
# ============================================================
# 子任务 5:模型测试(独立测试集)
# ============================================================
def evaluate_on_test(model, test_loader, device="cpu"):
model.eval()
criterion = nn.CrossEntropyLoss()
total = 0
correct = 0
test_loss = 0.0
with torch.no_grad():
for imgs, labels in test_loader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
test_loss += loss.item() * imgs.size(0)
_, pred = outputs.max(1)
total += labels.size(0)
correct += pred.eq(labels).sum().item()
avg_loss = test_loss / len(test_loader.dataset)
acc = correct / total
print("\n===================== 测试集评估 =====================")
print(f"测试集 Loss: {avg_loss:.4f}")
print(f"测试集 Accuracy: {acc*100:.2f}%")
print("=======================================================\n")
return avg_loss, acc
# ============================================================
# 绘制训练曲线(loss + acc)
# ============================================================
def plot_training_curves(history):
epochs = len(history["train_loss"])
plt.figure(figsize=(12, 4))
# ---------- Loss ----------
plt.subplot(1, 2, 1)
plt.plot(range(1, epochs+1), history["train_loss"], label="Train Loss")
plt.plot(range(1, epochs+1), history["val_loss"], label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curve")
plt.legend()
# ---------- Accuracy ----------
plt.subplot(1, 2, 2)
plt.plot(range(1, epochs+1), history["train_acc"], label="Train Acc")
plt.plot(range(1, epochs+1), history["val_acc"], label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Curve")
plt.legend()
plt.suptitle("Training Curves")
plt.show()
# ============================================================
# 测试集样例预测(10 张)
# ============================================================
def visualize_test_predictions(model, test_loader, device="cpu", num=10):
model.eval()
images = []
labels = []
# 从 test_loader 取一批
for batch_imgs, batch_labels in test_loader:
images = batch_imgs[:num]
labels = batch_labels[:num]
break
images_device = images.to(device)
with torch.no_grad():
outputs = model(images_device)
_, preds = outputs.max(1)
# ---------- 可视化 ----------
plt.figure(figsize=(14, 4))
for i in range(num):
img = images[i].squeeze().numpy()
plt.subplot(1, num, i+1)
plt.imshow(img, cmap="gray")
plt.title(f"T:{labels[i].item()} P:{preds[i].item()}")
plt.axis("off")
plt.suptitle("Test Prediction Samples")
plt.show()
# ============================================================
# 综合运行:训练 → 测试 → 可视化(最终版)
# ============================================================
def run_full_experiment():
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[INFO] 使用设备:{device}")
# 加载子任务 1 数据
train_loader, val_loader, test_loader = load_dataset(
batch_size=64,
use_mini=True,
augment=False
)
# 子任务 3:模型
model = LeNet5(use_custom_conv=False)
# 子任务 4:训练
model, history = train_lenet(
model,
train_loader,
val_loader,
device=device,
epochs=12,
lr=0.01
)
# 子任务 5:测试集评估
evaluate_on_test(model, test_loader, device)
# 绘制训练曲线
plot_training_curves(history)
# 可视化预测
visualize_test_predictions(model, test_loader, device)
# 允许独立运行
# 独立测试
if __name__ == "__main__":
run_training_task4()
run_full_experiment()