第9篇：PyTorch入门——让电脑帮我们造AI-优快云博客

🤔 从"手工造轮子"到"工厂流水线"

还记得我们前面是如何实现神经网络的吗？

# 第8篇的手工实现：需要自己写每一行代码
class TwoLayerNetwork:
    def __init__(self):
        self.W1 = np.random.randn(2, 3) * 0.5
        self.b1 = np.random.randn(3) * 0.5
        self.W2 = np.random.randn(3, 1) * 0.5
        self.b2 = np.random.randn(1) * 0.5
    
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        # ... 还要写反向传播、梯度计算、参数更新 ...

问题来了：

📝 代码冗长，容易出错
🐌 没有GPU加速，训练慢
🔧 调试困难，难以扩展
📦 无法复用，每次都要重写

这就像手工打造每一颗螺丝钉！

而PyTorch就像是现代化工厂：

🏭 标准化的生产流程
⚡ 自动化设备（GPU加速）
🔧 模块化设计，即插即用
📈 规模化生产，效率倍增

🚀 PyTorch是什么？

PyTorch = Python + Torch

Torch：Facebook开发的深度学习框架（原名Torch，用Lua语言）
PyTorch：2016年Facebook用Python重写的Torch，保留了核心优势

核心理念：

"像NumPy一样简单，像C++一样快速"

import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

print("=== PyTorch初体验 ===")
print(f"PyTorch版本: {torch.__version__}")
print(f"CUDA是否可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU设备: {torch.cuda.get_device_name(0)}")

# PyTorch张量 vs NumPy数组
print(f"\n--- PyTorch张量 vs NumPy数组 ---")

# NumPy数组
import numpy as np
np_array = np.array([[1, 2, 3], [4, 5, 6]])
print(f"NumPy数组:\n{np_array}")
print(f"类型: {type(np_array)}")

# PyTorch张量
torch_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(f"\nPyTorch张量:\n{torch_tensor}")
print(f"类型: {type(torch_tensor)}")

# 惊人的相似性！
print(f"\n相似之处:")
print(f"• 都支持相同的索引操作: torch_tensor[0, 1] = {torch_tensor[0, 1]}")
print(f"• 都支持相同的数学运算: torch_tensor + 1 =\n{torch_tensor + 1}")
print(f"• 都可以转换为Python列表: torch_tensor.tolist() = {torch_tensor.tolist()[:2]}...")

# 但PyTorch还有超能力！
print(f"\nPyTorch的超能力:")
if torch.cuda.is_available():
    gpu_tensor = torch_tensor.cuda()
    print(f"• GPU加速: 张量可以在GPU上运行!")
    print(f"• 设备转移: {gpu_tensor.device}")
else:
    print(f"• 自动微分: 这是最大的超能力！")

🔥 自动微分：PyTorch的核心魔法

🎯 什么是自动微分？

还记得我们手工计算梯度的痛苦吗？PyTorch的autograd系统帮我们自动完成！

def autograd_demo():
    """演示PyTorch的自动微分功能"""
    
    print("=== PyTorch自动微分演示 ===")
    
    # 手工计算：f(x) = x² + 2x + 1，求在x=3处的导数
    # 手工解：f'(x) = 2x + 2，所以f'(3) = 8
    
    print("问题：f(x) = x² + 2x + 1，求f'(3)")
    print("手工解：f'(x) = 2x + 2，所以f'(3) = 2×3 + 2 = 8")
    print()
    
    # PyTorch自动微分
    print("PyTorch自动微分:")
    
    # 1. 创建需要求导的张量
    x = torch.tensor(3.0, requires_grad=True)
    print(f"创建张量 x = {x.item()}, requires_grad={x.requires_grad}")
    
    # 2. 定义函数（注意：用PyTorch操作）
    y = x**2 + 2*x + 1
    print(f"计算 y = x² + 2x + 1 = {y.item()}")
    
    # 3. 反向传播（自动计算梯度！）
    y.backward()
    print(f"调用 y.backward() 自动计算梯度")
    
    # 4. 获取梯度
    gradient = x.grad
    print(f"梯度 x.grad = {gradient.item()}")
    print(f"✓ 结果与手工计算一致：{gradient.item() == 8.0}")
    
    print(f"\n神奇之处：我们没有手动推导导数公式！")
    print(f"PyTorch自动完成了所有微积分运算！")

autograd_demo()

🧮 复杂函数的自动微分

def complex_autograd():
    """演示复杂函数的自动微分"""
    
    print("=== 复杂函数自动微分 ===")
    
    # 复合函数：f(x) = sin(x²) + ln(x + 1)，在x=1处求导
    # 手工解：f'(x) = cos(x²)×2x + 1/(x+1)
    # 在x=1处：f'(1) = cos(1)×2 + 1/2 ≈ 0.5403×2 + 0.5 = 1.5806
    
    x = torch.tensor(1.0, requires_grad=True)
    
    # 复合函数计算
    y = torch.sin(x**2) + torch.log(x + 1)
    
    # 自动求导
    y.backward()
    
    print(f"函数：f(x) = sin(x²) + ln(x + 1)")
    print(f"在 x = {x.item()} 处：")
    print(f"函数值：f(1) = {y.item():.4f}")
    print(f"导数值：f'(1) = {x.grad.item():.4f}")
    print(f"手工验证：cos(1)×2 + 1/2 ≈ 1.5806")
    print(f"✓ 自动微分正确！误差：{abs(x.grad.item() - 1.5806):.6f}")

complex_autograd()

🏗️ 用PyTorch构建神经网络

📦 nn.Module：神经网络的基础类

class PyTorchNeuralNetwork(nn.Module):
    """
    用PyTorch构建神经网络：继承nn.Module
    """
    def __init__(self, input_size, hidden_size, output_size):
        super(PyTorchNeuralNetwork, self).__init__()
        
        # 定义网络层（PyTorch会自动管理参数和梯度）
        self.layer1 = nn.Linear(input_size, hidden_size)  # 全连接层
        self.relu = nn.ReLU()                            # 激活函数
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()                      # 输出层激活函数
        
        print("网络结构:")
        print(f"  Input Layer: {input_size} neurons")
        print(f"  Hidden Layer: {hidden_size} neurons (Linear + ReLU)")
        print(f"  Output Layer: {output_size} neurons (Linear + Sigmoid)")
    
    def forward(self, x):
        """前向传播（必须实现的方法）"""
        x = self.layer1(x)      # 第一层
        x = self.relu(x)         # 激活函数
        x = self.layer2(x)      # 第二层
        x = self.sigmoid(x)     # 输出层
        return x

# 演示网络创建
def create_pytorch_network():
    print("=== 创建PyTorch神经网络 ===")
    
    # 创建网络实例
    input_size = 2   # XOR问题的两个输入
    hidden_size = 4  # 4个隐藏神经元
    output_size = 1  # 1个输出（0或1）
    
    model = PyTorchNeuralNetwork(input_size, hidden_size, output_size)
    
    # 查看网络参数（PyTorch自动创建了权重和偏置）
    print(f"\n网络参数:")
    for name, param in model.named_parameters():
        print(f"  {name}: {param.shape} -> 共{param.numel()}个参数")
    
    # 参数总数
    total_params = sum(p.numel() for p in model.parameters())
    print(f"  总参数数量: {total_params}")
    
    return model

model = create_pytorch_network()

🎯 训练神经网络：PyTorch方式

def train_pytorch_network():
    """用PyTorch训练神经网络解决XOR问题"""
    
    print("=== PyTorch训练神经网络 ===")
    
    # 准备数据
    X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
    y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
    
    print(f"训练数据:")
    for i in range(len(X)):
        print(f"  [{X[i, 0].item()}, {X[i, 1].item()}] -> {y[i, 0].item()}")
    
    # 创建模型
    model = PyTorchNeuralNetwork(2, 4, 1)
    
    # 定义损失函数和优化器
    criterion = nn.BCELoss()  # 二分类交叉熵损失
    optimizer = optim.Adam(model.parameters(), lr=1.0)  # Adam优化器
    
    print(f"\n损失函数: {criterion}")
    print(f"优化器: {optimizer}")
    
    # 训练过程
    epochs = 1000
    losses = []
    
    print(f"\n开始训练（每200轮显示一次）:")
    print("轮次   Loss    预测结果")
    print("-" * 35)
    
    for epoch in range(epochs):
        # 前向传播
        predictions = model(X)
        loss = criterion(predictions, y)
        losses.append(loss.item())
        
        # 反向传播（PyTorch自动处理！）
        optimizer.zero_grad()  # 清零梯度
        loss.backward()        # 自动计算梯度
        optimizer.step()       # 更新参数
        
        # 显示进度
        if (epoch + 1) % 200 == 0 or epoch == 0:
            with torch.no_grad():  # 不计算梯度（节省内存）
                pred_classes = (predictions > 0.5).float()
                correct = (pred_classes == y).all(dim=1).sum().item()
                accuracy = correct / len(y)
                
                print(f"{epoch+1:4d}   {loss.item():.4f}   {accuracy*100:.0f}%正确")
    
    # 最终测试
    print(f"\n最终测试结果:")
    with torch.no_grad():
        final_predictions = model(X)
        for i in range(len(X)):
            pred_val = final_predictions[i, 0].item()
            pred_class = 1 if pred_val > 0.5 else 0
            true_val = y[i, 0].item()
            status = "✓" if pred_class == true_val else "✗"
            print(f"  [{X[i, 0].item()}, {X[i, 1].item()}] -> "
                  f"预测{pred_class}({pred_val:.3f}) 真实{true_val} {status}")
    
    # 可视化训练过程
    plt.figure(figsize=(10, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(losses)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('PyTorch训练：损失下降曲线')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    # 决策边界可视化
    x_min, x_max = -0.5, 1.5
    y_min, y_max = -0.5, 1.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 50), 
                        np.linspace(y_min, y_max, 50))
    
    grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
    with torch.no_grad():
        Z = model(grid)
        Z = Z.numpy().reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, alpha=0.6, cmap=plt.cm.RdYlBu)
    plt.contour(xx, yy, Z, colors='black', linewidths=0.8)
    
    # 原始数据点
    colors = ['red' if label == 0 else 'blue' for label in y.numpy().flatten()]
    plt.scatter(X[:, 0].numpy(), X[:, 1].numpy(), c=colors, s=100, 
               edgecolors='black', linewidth=2)
    
    plt.xlabel('Input 1')
    plt.ylabel('Input 2')
    plt.title('神经网络决策边界')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    
    plt.tight_layout()
    plt.show()

train_pytorch_network()

🔧 PyTorch的核心组件详解

1️⃣ 张量(Tensor)：数据的基本单位

def tensor_operations():
    """PyTorch张量的基本操作"""
    
    print("=== PyTorch张量操作大全 ===")
    
    # 创建张量的多种方式
    print("1. 创建张量:")
    
    # 从Python列表
    tensor1 = torch.tensor([1, 2, 3, 4, 5])
    print(f"从列表: {tensor1}")
    
    # 从NumPy数组
    numpy_array = np.array([[1, 2], [3, 4]])
    tensor2 = torch.from_numpy(numpy_array)
    print(f"从NumPy: \n{tensor2}")
    
    # 特殊张量
    zeros = torch.zeros(2, 3)      # 全零张量
    ones = torch.ones(2, 3)        # 全一张量
    rand_tensor = torch.rand(2, 3) # 随机张量
    print(f"全零张量: \n{zeros}")
    print(f"随机张量: \n{rand_tensor}")
    
    # 张量属性
    print(f"\n2. 张量属性:")
    print(f"形状: {tensor2.shape}")
    print(f"维度: {tensor2.dim()}")
    print(f"数据类型: {tensor2.dtype}")
    print(f"设备: {tensor2.device}")
    
    # 张量运算
    print(f"\n3. 张量运算:")
    a = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
    b = torch.tensor([[5, 6], [7, 8]], dtype=torch.float32)
    
    print(f"A = \n{a}")
    print(f"B = \n{b}")
    print(f"A + B = \n{a + b}")
    print(f"A × B (逐元素) = \n{a * b}")
    print(f"矩阵乘法 A @ B = \n{torch.matmul(a, b)}")
    
    # GPU支持（如果可用）
    if torch.cuda.is_available():
        print(f"\n4. GPU支持:")
        gpu_tensor = tensor2.cuda()
        print(f"CPU张量: {tensor2.device}")
        print(f"GPU张量: {gpu_tensor.device}")
        print(f"✓ 张量可以无缝在CPU和GPU间转移！")

tensor_operations()

2️⃣ 神经网络模块(nn.Module)

def neural_network_modules():
    """PyTorch神经网络模块详解"""
    
    print("=== PyTorch神经网络模块 ===")
    
    # 常见的层类型
    print("1. 常见网络层:")
    
    # 全连接层
    linear = nn.Linear(10, 5)
    print(f"全连接层 Linear(10→5): {linear}")
    
    # 卷积层（图像处理）
    conv2d = nn.Conv2d(3, 16, kernel_size=3)  # 3通道→16通道，3×3卷积
    print(f"卷积层 Conv2d(3→16, 3×3): {conv2d}")
    
    # 循环层（序列处理）
    lstm = nn.LSTM(10, 20, batch_first=True)  # 输入10维，输出20维
    print(f"LSTM层 LSTM(10→20): {lstm}")
    
    # 激活函数
    print(f"\n2. 激活函数:")
    relu = nn.ReLU()
    sigmoid = nn.Sigmoid()
    tanh = nn.Tanh()
    print(f"ReLU: {relu}")
    print(f"Sigmoid: {sigmoid}")
    print(f"Tanh: {tanh}")
    
    # 损失函数
    print(f"\n3. 损失函数:")
    mse = nn.MSELoss()
    bce = nn.BCELoss()
    cross_entropy = nn.CrossEntropyLoss()
    print(f"均方误差: {mse}")
    print(f"二分类交叉熵: {bce}")
    print(f"交叉熵: {cross_entropy}")
    
    # 优化器
    print(f"\n4. 优化器:")
    model = nn.Linear(10, 1)
    sgd = optim.SGD(model.parameters(), lr=0.01)
    adam = optim.Adam(model.parameters(), lr=0.001)
    print(f"SGD优化器: {sgd}")
    print(f"Adam优化器: {adam}")

neural_network_modules()

3️⃣ 数据加载：Dataset和DataLoader

from torch.utils.data import Dataset, DataLoader, TensorDataset

def data_loading_demo():
    """PyTorch数据加载演示"""
    
    print("=== PyTorch数据加载 ===")
    
    # 创建示例数据
    X = torch.randn(100, 3)  # 100个样本，每个3个特征
    y = torch.randint(0, 2, (100, 1)).float()  # 二分类标签
    
    print(f"原始数据: X.shape={X.shape}, y.shape={y.shape}")
    
    # 方法1：直接使用TensorDataset
    dataset1 = TensorDataset(X, y)
    dataloader1 = DataLoader(dataset1, batch_size=16, shuffle=True)
    
    print(f"\n1. TensorDataset + DataLoader:")
    print(f"批次数量: {len(dataloader1)}")
    
    for batch_idx, (batch_X, batch_y) in enumerate(dataloader1):
        print(f"  批次{batch_idx}: X.shape={batch_X.shape}, y.shape={batch_y.shape}")
        if batch_idx == 2:  # 只看前3个批次
            break
    
    # 方法2：自定义Dataset
    class CustomDataset(Dataset):
        def __init__(self, features, labels):
            self.features = features
            self.labels = labels
        
        def __len__(self):
            return len(self.features)
        
        def __getitem__(self, idx):
            return self.features[idx], self.labels[idx]
    
    dataset2 = CustomDataset(X, y)
    dataloader2 = DataLoader(dataset2, batch_size=16, shuffle=True)
    
    print(f"\n2. 自定义Dataset:")
    print(f"数据集长度: {len(dataset2)}")
    
    # 数据增强示例（图像数据）
    print(f"\n3. 数据增强概念:")
    print("对于图像数据，常用的增强包括:")
    print("  • 随机翻转 (RandomHorizontalFlip)")
    print("  • 随机裁剪 (RandomCrop)")
    print("  • 颜色抖动 (ColorJitter)")
    print("  • 归一化 (Normalize)")

data_loading_demo()

🎨 用PyTorch实现经典网络架构

🏆 手写数字识别：MNIST分类器

def mnist_classifier():
    """用PyTorch实现MNIST手写数字分类器"""
    
    print("=== MNIST手写数字分类器 ===")
    
    # 注意：这里我们用模拟数据演示，实际使用需要下载MNIST数据集
    print("注意：这里用模拟数据演示网络结构")
    print("实际使用时需要安装torchvision并下载MNIST数据集")
    
    # 定义网络结构
    class MNISTClassifier(nn.Module):
        def __init__(self):
            super(MNISTClassifier, self).__init__()
            self.network = nn.Sequential(
                nn.Flatten(),                          # 展平：28×28 → 784
                nn.Linear(28 * 28, 128),                 # 全连接层
                nn.ReLU(),                             # 激活函数
                nn.Dropout(0.2),                       # Dropout防止过拟合
                nn.Linear(128, 64),                    # 第二个全连接层
                nn.ReLU(),                             # 激活函数
                nn.Linear(64, 10)                      # 输出层：10个数字类别
            )
        
        def forward(self, x):
            return self.network(x)
    
    # 创建模型
    model = MNISTClassifier()
    print(f"网络结构:")
    print(model)
    
    # 计算参数数量
    total_params = sum(p.numel() for p in model.parameters())
    print(f"总参数数量: {total_params:,}")
    
    # 模拟训练过程
    print(f"\n模拟训练过程:")
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # 模拟训练循环
    for epoch in range(3):  # 只模拟3轮
        # 模拟损失下降
        loss = 2.3 * (0.8 ** epoch) + 0.1 * np.random.normal()
        accuracy = 85 + 10 * (1 - 0.8 ** epoch) + np.random.normal(0, 2)
        
        print(f"Epoch {epoch+1}: Loss={loss:.3f}, Accuracy={accuracy:.1f}%")
    
    print(f"\n✓ 网络结构设计完成！")
    print(f"实际使用时只需要:")
    print(f"1. 加载真实MNIST数据")
    print(f"2. 实例化这个模型")
    print(f"3. 运行训练循环")

mnist_classifier()

⚡ GPU加速：释放PyTorch的真正威力

def gpu_acceleration_demo():
    """演示PyTorch的GPU加速能力"""
    
    print("=== PyTorch GPU加速演示 ===")
    
    # 检查GPU可用性
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"使用设备: {device}")
    
    if not torch.cuda.is_available():
        print("⚠️  未检测到GPU，将使用CPU演示")
        print("要启用GPU加速，请确保:")
        print("1. 安装了CUDA支持的PyTorch")
        print("2. 有NVIDIA GPU且驱动正确安装")
        return
    
    # 创建大型张量进行对比
    print(f"\n创建大型张量进行性能对比...")
    
    # CPU张量
    cpu_tensor = torch.randn(1000, 1000)
    
    # GPU张量
    gpu_tensor = torch.randn(1000, 1000).cuda()
    
    print(f"CPU张量设备: {cpu_tensor.device}")
    print(f"GPU张量设备: {gpu_tensor.device}")
    
    # 矩阵乘法性能对比
    import time
    
    # CPU计算
    start_time = time.time()
    cpu_result = torch.matmul(cpu_tensor, cpu_tensor)
    cpu_time = time.time() - start_time
    
    # GPU计算
    torch.cuda.synchronize()  # 等待GPU完成
    start_time = time.time()
    gpu_result = torch.matmul(gpu_tensor, gpu_tensor)
    torch.cuda.synchronize()  # 等待GPU完成
    gpu_time = time.time() - start_time
    
    print(f"\n性能对比 (1000×1000 矩阵乘法):")
    print(f"CPU时间: {cpu_time:.4f}秒")
    print(f"GPU时间: {gpu_time:.4f}秒")
    print(f"加速比: {cpu_time/gpu_time:.1f}x")
    
    # 清理GPU内存
    del gpu_tensor, gpu_result
    torch.cuda.empty_cache()

gpu_acceleration_demo()

🚨 PyTorch最佳实践

✅ 推荐做法

def pytorch_best_practices():
    """PyTorch最佳实践指南"""
    
    print("=== PyTorch最佳实践 ===")
    
    practices = {
        "模型定义": [
            "✓ 继承nn.Module，在__init__中定义层",
            "✓ 在forward中定义前向传播逻辑", 
            "✓ 使用nn.Sequential组织简单网络"
        ],
        "训练循环": [
            "✓ 使用optimizer.zero_grad()清零梯度",
            "✓ 使用with torch.no_grad()进行推理",
            "✓ 定期保存模型检查点"
        ],
        "内存管理": [
            "✓ 及时删除不需要的张量",
            "✓ 使用torch.cuda.empty_cache()清理GPU内存",
            "✓ 合理设置batch_size避免内存溢出"
        ],
        "调试技巧": [
            "✓ 使用print(tensor.shape)检查张量形状",
            "✓ 检查requires_grad设置是否正确",
            "✓ 使用torch.autograd.set_detect_anomaly(True)调试梯度问题"
        ]
    }
    
    for category, tips in practices.items():
        print(f"\n{category}:")
        for tip in tips:
            print(f"  {tip}")

pytorch_best_practices()