1.使用 make_moons 生成双月形数据集,并划分为训练集、验证集和测试集。
2.可视化数据分布,了解样本特征和分类难度。
3.构建两层前馈神经网络,初始化权重参数。
4.设置损失函数为二分类交叉熵,并选用 Adam 优化器。
5.分别使用 ReLU 和 Leaky ReLU 激活函数训练模型。
6.每轮训练记录训练集和验证集的损失及准确率。
7.保存训练过程中表现最好的模型参数。
8.使用测试集评估模型性能,并绘制分类决策边界图。
9.对比不同激活函数的训练曲线和测试结果,分析优化效果。
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# 设置中文字体(防止乱码)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# ============================================
# 一、生成数据集并可视化
# ============================================
x, y = make_moons(n_samples=300, noise=0.25, random_state=0)
x = StandardScaler().fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
plt.figure(figsize=(5, 4))
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap='viridis')
plt.title("数据分布示意图")
plt.xlabel("特征1")
plt.ylabel("特征2")
plt.show()
# 转为Tensor
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)
# ============================================
# 二、实验一:参数初始化对比
# ============================================
class InitNet(nn.Module):
def __init__(self, init_type='xavier'):
super().__init__()
self.layer1 = nn.Linear(2, 16)
self.layer2 = nn.Linear(16, 16)
self.out = nn.Linear(16, 2)
self.init_weights(init_type)
def init_weights(self, init_type):
for m in self.modules():
if isinstance(m, nn.Linear):
if init_type == 'zero':
nn.init.constant_(m.weight, 0)
elif init_type == 'random':
nn.init.normal_(m.weight, 0, 1)
elif init_type == 'xavier':
nn.init.xavier_uniform_(m.weight)
nn.init.constant_(m.bias, 0)
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
return self.out(x)
def train_model(model, optimizer, criterion, epochs=200):
losses = []
for i in range(epochs):
optimizer.zero_grad()
output = model(x_train)
loss = criterion(output, y_train)
loss.backward()
optimizer.step()
losses.append(loss.item())
return losses
criterion = nn.CrossEntropyLoss()
nets = {
'零初始化': InitNet('zero'),
'随机初始化': InitNet('random'),
'Xavier初始化': InitNet('xavier')
}
loss_dict = {}
# ========== 参数初始化单独绘图 ==========
for name, net in nets.items():
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
loss_dict[name] = train_model(net, optimizer, criterion)
plt.figure(figsize=(6, 4))
plt.plot(loss_dict[name], color='steelblue')
plt.title(f"{name} 的损失变化曲线")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()
# 汇总对比图
plt.figure(figsize=(6, 4))
for name, l in loss_dict.items():
plt.plot(l, label=name)
plt.title("不同初始化方式的损失变化对比")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()
# ============================================
# 三、实验二:梯度消失问题对比
# ============================================
class DeepNet(nn.Module):
def __init__(self, act):
super().__init__()
layers = []
in_dim = 2
for _ in range(5): # 多层网络以放大梯度消失效应
layers.append(nn.Linear(in_dim, 16))
if act == 'sigmoid':
layers.append(nn.Sigmoid())
else:
layers.append(nn.ReLU())
in_dim = 16
layers.append(nn.Linear(16, 2))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
def train_and_eval(model, epochs=300):
opt = torch.optim.Adam(model.parameters(), lr=0.01)
losses = []
for i in range(epochs):
opt.zero_grad()
out = model(x_train)
loss = criterion(out, y_train)
loss.backward()
opt.step()
losses.append(loss.item())
return losses
sigmoid_net = DeepNet('sigmoid')
relu_net = DeepNet('relu')
sigmoid_loss = train_and_eval(sigmoid_net)
relu_loss = train_and_eval(relu_net)
# 分开绘图
plt.figure(figsize=(6, 4))
plt.plot(sigmoid_loss, color='red')
plt.title("Sigmoid激活函数下的损失曲线(梯度消失)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()
plt.figure(figsize=(6, 4))
plt.plot(relu_loss, color='green')
plt.title("ReLU激活函数下的损失曲线(缓解梯度消失)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()
# 汇总对比图
plt.figure(figsize=(6, 4))
plt.plot(sigmoid_loss, label='Sigmoid 激活', color='red')
plt.plot(relu_loss, label='ReLU 激活', color='green')
plt.title("梯度消失问题对比")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()
# ============================================
# 四、实验三:死亡 ReLU 问题对比
# ============================================
class ReLUCompareNet(nn.Module):
def __init__(self, act='relu'):
super().__init__()
self.fc1 = nn.Linear(2, 16)
self.fc2 = nn.Linear(16, 16)
self.fc3 = nn.Linear(16, 2)
self.act_type = act
def forward(self, x):
if self.act_type == 'relu':
a = F.relu(self.fc1(x))
a = F.relu(self.fc2(a))
else:
a = F.leaky_relu(self.fc1(x), negative_slope=0.1)
a = F.leaky_relu(self.fc2(a), negative_slope=0.1)
return self.fc3(a)
relu_net = ReLUCompareNet('relu')
leaky_net = ReLUCompareNet('leaky')
relu_loss = train_and_eval(relu_net)
leaky_loss = train_and_eval(leaky_net)
# 单独绘制两种激活函数训练曲线
plt.figure(figsize=(6, 4))
plt.plot(relu_loss, color='orange')
plt.title("ReLU 激活下的训练损失曲线")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()
plt.figure(figsize=(6, 4))
plt.plot(leaky_loss, color='blue')
plt.title("LeakyReLU 激活下的训练损失曲线")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()
# 汇总对比图
plt.figure(figsize=(6, 4))
plt.plot(relu_loss, label='ReLU', color='orange')
plt.plot(leaky_loss, label='LeakyReLU', color='blue')
plt.title("死亡ReLU问题对比曲线")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()
# ============================================
# 五、结果可视化:分类边界展示(额外)
# ============================================
def plot_boundary(model, title):
xx, yy = np.meshgrid(np.linspace(-2, 2, 100),
np.linspace(-1.5, 2, 100))
data = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
with torch.no_grad():
pred = model(data)
z = torch.argmax(pred, dim=1).numpy().reshape(xx.shape)
plt.figure(figsize=(6, 5))
plt.contourf(xx, yy, z, cmap='coolwarm', alpha=0.6)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap='coolwarm', edgecolor='k', s=20)
plt.title(title)
plt.show()
plot_boundary(nets['Xavier初始化'], "Xavier初始化分类边界")
plot_boundary(leaky_net, "LeakyReLU激活分类边界")
print(" 实验完成:参数初始化、梯度消失、死亡ReLU对比可视化已完成。")

被折叠的 条评论
为什么被折叠?



