深度学习实验11代码-优快云博客

"""
lenet_full_experiment.py
实验内容：基于LeNet实现手写数字识别（单文件完整版）

本文件包含所有模块（数据加载 → 自写卷积 → LeNet → 训练 → 测试 → 可视化）
此为【子任务 1：数据加载 + 预处理 + 划分 + 可视化】部分
"""

import os
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader, random_split, Subset
from torchvision.datasets import MNIST
import torchvision.transforms as transforms


# ============================================================
# 工具函数：固定随机种子
# ============================================================
def set_seed(seed=2025):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


# ============================================================
# 构建训练集、验证集、测试集使用的 transform
# ============================================================
def build_transforms(augment=False):
    """
    augment=True：训练集加入轻微数据增强
    augment=False：训练集和验证集一致
    """

    eval_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    if augment:
        train_transform = transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.RandomAffine(degrees=10, translate=(0.08, 0.08)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ])
    else:
        train_transform = eval_transform

    return train_transform, eval_transform


# ============================================================
# 保存 / 加载 数据划分索引（保证可复现）
# ============================================================
def save_splits(path, idx_train, idx_val, idx_test):
    np.savez_compressed(path, train=np.array(idx_train), val=np.array(idx_val), test=np.array(idx_test))
    print(f"[INFO] 已保存数据划分到 {path}")


def load_splits(path):
    if not os.path.exists(path):
        return None

    arr = np.load(path)
    print(f"[INFO] 已从 {path} 加载已有划分")
    return {
        "train": arr["train"].tolist(),
        "val": arr["val"].tolist(),
        "test": arr["test"].tolist()
    }


# ============================================================
# 子任务 1 核心：加载 MNIST + 划分 + 可视化
# ============================================================
def load_dataset(batch_size=64, use_mini=False, augment=False, split_path="mnist_split.npz"):
    """
    use_mini=True：使用迷你版本数据（6000 / 1500 / 2000）
    """

    set_seed()

    # ---------- 1. 构建 transform ----------
    train_tf, eval_tf = build_transforms(augment)

    # ---------- 2. 加载 MNIST ----------
    full_train = MNIST(root="./mnist_data", train=True, download=True, transform=train_tf)
    full_train_eval = MNIST(root="./mnist_data", train=True, transform=eval_tf)
    full_test = MNIST(root="./mnist_data", train=False, download=True, transform=eval_tf)

    # ---------- 3. 使用已有划分 ----------
    saved = load_splits(split_path) if os.path.exists(split_path) else None

    if use_mini:
        # 使用较小数据（更快）
        if saved is None or len(saved["train"]) < 6000:
            print("[INFO] 正在构建 Mini 数据集划分 ...")
            pool_idx = list(range(len(full_train)))
            random.shuffle(pool_idx)

            idx_train = pool_idx[:6000]
            idx_val = pool_idx[6000:6000 + 1500]
            idx_test = list(range(2000))

            save_splits(split_path, idx_train, idx_val, idx_test)
        else:
            idx_train, idx_val, idx_test = saved["train"], saved["val"], saved["test"]
    else:
        # 原始划分（48000 / 12000 / 10000）
        if saved is None or len(saved["train"]) < 48000:
            print("[INFO] 正在构建 Full 数据集划分 ...")
            all_idx = list(range(len(full_train)))
            random.shuffle(all_idx)

            idx_train = all_idx[:48000]
            idx_val = all_idx[48000:48000 + 12000]
            idx_test = list(range(len(full_test)))

            save_splits(split_path, idx_train, idx_val, idx_test)
        else:
            idx_train, idx_val, idx_test = saved["train"], saved["val"], saved["test"]

    # ---------- 4. 子集构建 ----------
    train_subset = Subset(full_train, idx_train)
    val_subset = Subset(full_train_eval, idx_val)
    test_subset = Subset(full_test, idx_test)

    # ---------- 5. DataLoader ----------
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle=False)

    print("==== 数据加载完成 ====")
    print(f"Train: {len(train_subset)}")
    print(f"Val:   {len(val_subset)}")
    print(f"Test:  {len(test_subset)}")

    # ---------- 6. 可视化 ----------
    visualize_samples(train_subset, title="训练集样例")
    visualize_samples(val_subset, title="验证集样例")
    visualize_samples(test_subset, title="测试集样例")

    return train_loader, val_loader, test_loader


# ============================================================
# 可视化：展示样例图片（6 张）
# ============================================================
def visualize_samples(dataset, num=6, title="Samples"):
    plt.figure(figsize=(10, 3))
    indices = random.sample(range(len(dataset)), num)
    for i, idx in enumerate(indices):
        img, label = dataset[idx]
        img_show = img * 0.5 + 0.5
        img_show = img_show.numpy().squeeze()

        plt.subplot(1, num, i + 1)
        plt.imshow(img_show, cmap="gray")
        plt.title(f"{label}")
        plt.axis("off")

    plt.suptitle(title)
    plt.show()
# ============================================================
# 子任务 2：手写 im2col 自定义卷积层（CustomConv2d）
# ============================================================

import torch
import torch.nn as nn


class CustomConv2d(nn.Module):
    """
    基于 im2col 的自定义卷积层（仅实现 forward）
    目的：理解卷积 padding、局部感受野、权值共享、矩阵展开等底层机制
    """

    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
        super(CustomConv2d, self).__init__()

        # 支持 kernel_size 为 int 或 tuple
        if isinstance(kernel_size, int):
            kernel_size = (kernel_size, kernel_size)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kh, self.kw = kernel_size
        self.stride = stride
        self.padding = padding

        # 卷积核参数（out_channels, in_channels, kh, kw）
        self.weight = nn.Parameter(
            torch.randn(out_channels, in_channels, self.kh, self.kw) * 0.01
        )

        # 是否添加偏置
        if bias:
            self.bias = nn.Parameter(torch.zeros(out_channels))
        else:
            self.bias = None

    # -------------------------------
    # 手写 padding
    # -------------------------------
    def _pad_input(self, x):
        if self.padding == 0:
            return x
        return nn.functional.pad(x, (self.padding, self.padding, self.padding, self.padding))

    # -------------------------------
    # im2col 实现：将每个卷积窗口展开
    # 输出形状：(N, C*kh*kw, L) 其中 L 为滑动窗口数量
    # -------------------------------
    def _im2col(self, x):
        N, C, H, W = x.shape
        out_h = (H - self.kh) // self.stride + 1
        out_w = (W - self.kw) // self.stride + 1

        cols = torch.zeros(N, C * self.kh * self.kw, out_h * out_w, device=x.device)

        col_idx = 0
        for i in range(0, H - self.kh + 1, self.stride):
            for j in range(0, W - self.kw + 1, self.stride):
                patch = x[:, :, i:i + self.kh, j:j + self.kw]   # (N, C, kh, kw)
                cols[:, :, col_idx] = patch.reshape(N, -1)
                col_idx += 1

        return cols  # (N, C*kh*kw, L)

    # -------------------------------
    # 前向计算：基于矩阵乘法实现卷积
    # -------------------------------
    def forward(self, x):
        # Step 1: padding
        x = self._pad_input(x)

        # Step 2: im2col 展开
        cols = self._im2col(x)  # (N, C*kh*kw, L)

        # Step 3: 卷积核展开
        # weight: (out_channels, in_channels, kh, kw)
        w = self.weight.reshape(self.out_channels, -1)  # (out_c, C*kh*kw)

        # Step 4: 矩阵乘法（批量）
        # cols: (N, C*kh*kw, L)
        # w.T: (C*kh*kw, out_c)
        out = torch.matmul(w, cols)  # (N, out_c, L) 错误顺序？要交换

        # 修正：确保 w 乘 columns 维度一致
        # 正确为： out[n] = (w @ cols[n]) → (out_c, L)
        outputs = []
        for n in range(cols.size(0)):
            r = torch.matmul(w, cols[n])  # (out_channels, L)
            outputs.append(r.unsqueeze(0))
        out = torch.cat(outputs, dim=0)  # (N, out_channels, L)

        # Step 5: reshape 回 feature map
        N, _, L = out.shape
        x_h = (x.shape[2] - self.kh) // self.stride + 1
        x_w = (x.shape[3] - self.kw) // self.stride + 1
        out = out.reshape(N, self.out_channels, x_h, x_w)

        # Step 6: 加偏置
        if self.bias is not None:
            out = out + self.bias.view(1, -1, 1, 1)

        return out


# ============================================================
# 自定义卷积层正确性验证（用于实验报告截图）
# ============================================================
def test_custom_conv():
    print("========= 自定义卷积层功能测试 =========")

    x = torch.randn(1, 1, 6, 6)
    custom_conv = CustomConv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)
    torch_conv = nn.Conv2d(1, 1, 3, padding=1)
    torch_conv.weight.data = custom_conv.weight.data.clone()
    torch_conv.bias.data = custom_conv.bias.data.clone()

    out_custom = custom_conv(x)
    out_torch = torch_conv(x)

    print("自写输出：\n", out_custom)
    print("PyTorch输出：\n", out_torch)
    print("差值：\n", (out_custom - out_torch).abs().mean())

    print("========= 测试结束（可截图写入报告）=========")
# ============================================================
# 子任务 3（增强版）：LeNet-5 原始结构实现 + 输出测试信息
# ============================================================

class LeNet5(nn.Module):
    """
    1998 原始 LeNet-5 网络结构复现：
    C1: Conv(1→6, 5x5) → tanh
    S2: AvgPool 2×2
    C3: Conv(6→16, 5x5) → tanh
    S4: AvgPool 2×2
    FC: 400→120→84→10
    """

    def __init__(self, use_custom_conv=False):
        super(LeNet5, self).__init__()

        Conv = CustomConv2d if use_custom_conv else nn.Conv2d

        self.conv1 = Conv(1, 6, kernel_size=5, stride=1, padding=0)
        self.pool1 = nn.AvgPool2d(2, 2)

        self.conv2 = Conv(6, 16, kernel_size=5, stride=1, padding=0)
        self.pool2 = nn.AvgPool2d(2, 2)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        self.tanh = nn.Tanh()

    def forward(self, x, verbose=False):
        if verbose:
            print("\n==== LeNet-5 前向传播逐层输出形状 ====")

        if verbose: print(f"输入: {x.shape}")
        x = self.tanh(self.conv1(x))
        if verbose: print(f"C1 卷积后: {x.shape}")
        x = self.pool1(x)
        if verbose: print(f"S2 平均池化后: {x.shape}")

        x = self.tanh(self.conv2(x))
        if verbose: print(f"C3 卷积后: {x.shape}")
        x = self.pool2(x)
        if verbose: print(f"S4 平均池化后: {x.shape}")

        x = x.view(x.size(0), -1)
        if verbose: print(f"展平后: {x.shape}")

        x = self.tanh(self.fc1(x))
        if verbose: print(f"FC1 后: {x.shape}")
        x = self.tanh(self.fc2(x))
        if verbose: print(f"FC2 后: {x.shape}")
        x = self.fc3(x)
        if verbose: print(f"输出 logits: {x.shape}")

        return x


# ============================================================
# 打印模型信息、参数量、以及逐层输出（可截图）
# ============================================================
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def test_lenet_verbose():
    print("\n===================== LeNet-5 模型结构测试 =====================")

    model = LeNet5(use_custom_conv=False)
    print(model)
    print("\n总参数量：", count_parameters(model))

    # 构造假数据
    x = torch.randn(1, 1, 32, 32)
    print("\n输入张量形状:", x.shape)

    # 启用 verbose 模式打印每一层输出
    out = model(x, verbose=True)

    print("\n最终输出 logits:", out)
    print("最终输出形状:", out.shape)

    print("\n===================== 测试结束（可截图写入报告） =====================\n")

# ============================================================
# 子任务 4：LeNet-5 训练流程（训练 + 验证）
# ============================================================

def train_lenet(
    model,
    train_loader,
    val_loader,
    device="cpu",
    epochs=12,
    lr=0.01
):
    """
    LeNet-5 标准训练流程：
    - 优化器：SGD + Momentum(0.9)
    - Loss：CrossEntropyLoss
    - 每轮输出训练/验证准确率
    - 返回训练记录用于绘图（任务 5）
    """

    print("\n===================== 开始训练 LeNet-5 =====================")

    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    # 保存训练/验证曲线
    history = {
        "train_loss": [],
        "val_loss": [],
        "train_acc": [],
        "val_acc": []
    }

    for epoch in range(1, epochs + 1):
        # ----------------------------------------
        # TRAIN 训练阶段
        # ----------------------------------------
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            # 累加统计
            running_loss += loss.item() * imgs.size(0)
            _, pred = outputs.max(1)
            total += labels.size(0)
            correct += pred.eq(labels).sum().item()

        train_loss = running_loss / len(train_loader.dataset)
        train_acc = correct / total

        # ----------------------------------------
        # VAL 验证阶段（不更新参数）
        # ----------------------------------------
        model.eval()
        val_loss_total = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)

                outputs = model(imgs)
                loss = criterion(outputs, labels)

                val_loss_total += loss.item() * imgs.size(0)
                _, pred = outputs.max(1)
                val_total += labels.size(0)
                val_correct += pred.eq(labels).sum().item()

        val_loss = val_loss_total / len(val_loader.dataset)
        val_acc = val_correct / val_total

        # 记录
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["train_acc"].append(train_acc)
        history["val_acc"].append(val_acc)

        # ----------------------------------------
        # PRINT 输出（可截图）
        # ----------------------------------------
        print(f"Epoch [{epoch}/{epochs}] "
              f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% "
              f"|| Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.2f}%")

    print("===================== 训练结束 =====================\n")

    return model, history


# ============================================================
# 子任务 4：便捷运行函数（可直接执行）
# ============================================================
def run_training_task4():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"[INFO] 使用设备：{device}")

    # 使用子任务 1 的数据加载函数
    train_loader, val_loader, test_loader = load_dataset(
        batch_size=64,
        use_mini=True,       # ← 使用 mini 数据集（更快）
        augment=False        # 不用增强，和论文一致
    )

    # 使用子任务 3 的网络
    model = LeNet5(use_custom_conv=False)

    # 执行训练
    model, history = train_lenet(
        model,
        train_loader,
        val_loader,
        device=device,
        epochs=12,
        lr=0.01
    )

    print("\n训练准确率记录：", history["train_acc"])
    print("验证准确率记录：", history["val_acc"])

    return model, history, test_loader

# ============================================================
# 子任务 5：模型测试（独立测试集）
# ============================================================
def evaluate_on_test(model, test_loader, device="cpu"):
    model.eval()
    criterion = nn.CrossEntropyLoss()

    total = 0
    correct = 0
    test_loss = 0.0

    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)

            outputs = model(imgs)
            loss = criterion(outputs, labels)

            test_loss += loss.item() * imgs.size(0)
            _, pred = outputs.max(1)
            total += labels.size(0)
            correct += pred.eq(labels).sum().item()

    avg_loss = test_loss / len(test_loader.dataset)
    acc = correct / total

    print("\n===================== 测试集评估 =====================")
    print(f"测试集 Loss: {avg_loss:.4f}")
    print(f"测试集 Accuracy: {acc*100:.2f}%")
    print("=======================================================\n")

    return avg_loss, acc
# ============================================================
# 绘制训练曲线（loss + acc）
# ============================================================
def plot_training_curves(history):
    epochs = len(history["train_loss"])

    plt.figure(figsize=(12, 4))

    # ---------- Loss ----------
    plt.subplot(1, 2, 1)
    plt.plot(range(1, epochs+1), history["train_loss"], label="Train Loss")
    plt.plot(range(1, epochs+1), history["val_loss"], label="Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss Curve")
    plt.legend()

    # ---------- Accuracy ----------
    plt.subplot(1, 2, 2)
    plt.plot(range(1, epochs+1), history["train_acc"], label="Train Acc")
    plt.plot(range(1, epochs+1), history["val_acc"], label="Val Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Accuracy Curve")
    plt.legend()

    plt.suptitle("Training Curves")
    plt.show()
# ============================================================
# 测试集样例预测（10 张）
# ============================================================
def visualize_test_predictions(model, test_loader, device="cpu", num=10):
    model.eval()

    images = []
    labels = []

    # 从 test_loader 取一批
    for batch_imgs, batch_labels in test_loader:
        images = batch_imgs[:num]
        labels = batch_labels[:num]
        break

    images_device = images.to(device)

    with torch.no_grad():
        outputs = model(images_device)
        _, preds = outputs.max(1)

    # ---------- 可视化 ----------
    plt.figure(figsize=(14, 4))
    for i in range(num):
        img = images[i].squeeze().numpy()
        plt.subplot(1, num, i+1)
        plt.imshow(img, cmap="gray")
        plt.title(f"T:{labels[i].item()}  P:{preds[i].item()}")
        plt.axis("off")

    plt.suptitle("Test Prediction Samples")
    plt.show()
# ============================================================
# 综合运行：训练 → 测试 → 可视化（最终版）
# ============================================================
def run_full_experiment():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"[INFO] 使用设备：{device}")

    # 加载子任务 1 数据
    train_loader, val_loader, test_loader = load_dataset(
        batch_size=64,
        use_mini=True,
        augment=False
    )

    # 子任务 3：模型
    model = LeNet5(use_custom_conv=False)

    # 子任务 4：训练
    model, history = train_lenet(
        model,
        train_loader,
        val_loader,
        device=device,
        epochs=12,
        lr=0.01
    )

    # 子任务 5：测试集评估
    evaluate_on_test(model, test_loader, device)

    # 绘制训练曲线
    plot_training_curves(history)

    # 可视化预测
    visualize_test_predictions(model, test_loader, device)


# 允许独立运行

# 独立测试
if __name__ == "__main__":
    run_training_task4()
    run_full_experiment()