1、使用pytorch的预定义算子来重新实现二分类任务
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from nndl.dataset import make_moons
import matplotlib.pyplot as plt
# 检查是否有 GPU 可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 定义多层感知机模型
class ModelMLP(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(ModelMLP, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, inputs):
z1 = self.fc1(inputs)
a1 = torch.sigmoid(z1)
z2 = self.fc2(a1)
return z2
class Runner:
def __init__(self, model, optimizer, metric, loss_fn):
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.metric = metric
self.train_scores = []
self.dev_scores = []
self.train_loss = []
self.dev_loss = []
def train(self, train_set, dev_set, **kwargs):
self.model.train()
num_epochs = kwargs.get("num_epochs", 0)
log_epochs = kwargs.get("log_epochs", 100)
save_path = kwargs.get("save_path", "best_model.pth")
best_score = 0
for epoch in range(num_epochs):
X, y = train_set
# 前向传播
logits = self.model(X)
trn_loss = self.loss_fn(logits, y)
self.train_loss.append(trn_loss.item())
trn_score = self.metric(logits, y).item()
self.train_scores.append(trn_score)
# 反向传播和参数更新
self.optimizer.zero_grad()
trn_loss.backward()
self.optimizer.step()
# 验证集评估
dev_score, dev_loss = self.evaluate(dev_set)
if dev_score > best_score:
self.save_model(save_path)
best_score = dev_score
print(f"[Evaluate] best accuracy performance updated: {best_score:.5f}")
if log_epochs and epoch % log_epochs == 0:
print(f"[Train] epoch: {epoch}/{num_epochs}, loss: {trn_loss.item()}")
@torch.no_grad()
def evaluate(self, data_set):
self.model.eval()
X, y = data_set
logits = self.model(X)
loss = self.loss_fn(logits, y).item()
score = self.metric(logits, y).item()
self.dev_loss.append(loss)
self.dev_scores.append(score)
return score, loss
@torch.no_grad()
def predict(self, X):
self.model.eval()
return self.model(X)
def save_model(self, saved_path):
torch.save(self.model.state_dict(), saved_path)
def load_model(self, model_path):
self.model.load_state_dict(torch.load(model_path))
# 生成样本数据
n_samples = 1000
X, y = make_moons(n_samples=n_samples, shuffle=True, noise=0.1)
# 可视化数据
plt.figure(figsize=(5, 5))
plt.scatter(X[:, 0], X[:, 1], c=y, marker='*')
plt.xlim(-3, 4)
plt.ylim(-3, 4)
plt.savefig('linear-dataset-vis.pdf')
plt.show()
# 划分数据集
num_train, num_dev, num_test = 640, 160, 200
X_train, y_train = X[:num_train], y[:num_train]
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev]
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:]
# 将数据转换为 PyTorch 张量并移动到设备上
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1).to(device)
X_dev = torch.tensor(X_dev, dtype=torch.float32).to(device)
y_dev = torch.tensor(y_dev, dtype=torch.float32).reshape(-1, 1).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1).to(device)
# 初始化模型并移动到设备上
input_size = 2
hidden_size = 5
output_size = 1
model = ModelMLP(input_size, hidden_size, output_size).to(device)
# 损失函数和优化器
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.2)
# 定义准确率函数
def accuracy(logits, labels):
preds = torch.sigmoid(logits).round()
return (preds == labels).float().mean()
# 实例化 Runner
runner = Runner(model, optimizer, accuracy, loss_fn)
# 训练模型
epoch_num = 1000
saved_path = 'best_model.pth'
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs=epoch_num, log_epochs=50, save_path=saved_path)
# 可视化训练过程
def plot(runner, fig_name):
plt.figure(figsize=(10, 5))
epochs = list(range(len(runner.train_scores)))
plt.subplot(1, 2, 1)
plt.plot(epochs, runner.train_loss, label="Train loss", color='blue')
plt.plot(epochs, runner.dev_loss, label="Dev loss", color='red', linestyle='--')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(epochs, runner.train_scores, label="Train accuracy", color='blue')
plt.plot(epochs, runner.dev_scores, label="Dev accuracy", color='red', linestyle='--')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(fig_name)
plt.show()
# 绘制损失和准确率曲线
plot(runner, 'training_progress.pdf')
# 测试集评估
runner.load_model(saved_path)
score, loss = runner.evaluate([X_test, y_test])
print(f"[Test] score/loss: {score:.4f}/{loss:.4f}")
2、增加一个3个神经元的隐藏层,再次实现二分类,并与1做对比。
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from nndl.dataset import make_moons
import matplotlib.pyplot as plt
from sy7 import ModelMLP
# 检查是否有 GPU 可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 定义新的多层感知机模型,增加一个3个神经元的隐藏层
class ModelMLPWithExtraLayer(nn.Module):
def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
super(ModelMLPWithExtraLayer, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size1) # 第一个隐藏层
self.fc2 = nn.Linear(hidden_size1, hidden_size2) # 第二个隐藏层(3个神经元)
self.fc3 = nn.Linear(hidden_size2, output_size) # 输出层
def forward(self, inputs):
z1 = self.fc1(inputs)
a1 = torch.sigmoid(z1) # 激活函数:Sigmoid
z2 = self.fc2(a1)
a2 = torch.sigmoid(z2) # 第二个隐藏层的输出
z3 = self.fc3(a2) # 输出层
return z3
class Runner:
def __init__(self, model, optimizer, metric, loss_fn):
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.metric = metric
self.train_scores = []
self.dev_scores = []
self.train_loss = []
self.dev_loss = []
def train(self, train_set, dev_set, **kwargs):
self.model.train()
num_epochs = kwargs.get("num_epochs", 0)
log_epochs = kwargs.get("log_epochs", 100)
save_path = kwargs.get("save_path", "best_model.pth")
best_score = 0
for epoch in range(num_epochs):
X, y = train_set
# 前向传播
logits = self.model(X)
trn_loss = self.loss_fn(logits, y)
self.train_loss.append(trn_loss.item())
trn_score = self.metric(logits, y).item()
self.train_scores.append(trn_score)
# 反向传播和参数更新
self.optimizer.zero_grad()
trn_loss.backward()
self.optimizer.step()
# 验证集评估
dev_score, dev_loss = self.evaluate(dev_set)
if dev_score > best_score:
self.save_model(save_path)
best_score = dev_score
print(f"[Evaluate] best accuracy performance updated: {best_score:.5f}")
if log_epochs and epoch % log_epochs == 0:
print(f"[Train] epoch: {epoch}/{num_epochs}, loss: {trn_loss.item()}")
@torch.no_grad()
def evaluate(self, data_set):
self.model.eval()
X, y = data_set
logits = self.model(X)
loss = self.loss_fn(logits, y).item()
score = self.metric(logits, y).item()
self.dev_loss.append(loss)
self.dev_scores.append(score)
return score, loss
@torch.no_grad()
def predict(self, X):
self.model.eval()
return self.model(X)
def save_model(self, saved_path):
torch.save(self.model.state_dict(), saved_path)
def load_model(self, model_path):
self.model.load_state_dict(torch.load(model_path))
# 生成样本数据
n_samples = 1000
X, y = make_moons(n_samples=n_samples, shuffle=True, noise=0.1)
# 可视化数据
plt.figure(figsize=(5, 5))
plt.scatter(X[:, 0], X[:, 1], c=y, marker='*')
plt.xlim(-3, 4)
plt.ylim(-3, 4)
plt.savefig('linear-dataset-vis.pdf')
plt.show()
# 划分数据集
num_train, num_dev, num_test = 640, 160, 200
X_train, y_train = X[:num_train], y[:num_train]
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev]
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:]
# 将数据转换为 PyTorch 张量并移动到设备上
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1).to(device)
X_dev = torch.tensor(X_dev, dtype=torch.float32).to(device)
y_dev = torch.tensor(y_dev, dtype=torch.float32).reshape(-1, 1).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1).to(device)
# 初始化模型并移动到设备上
input_size = 2
hidden_size = 5
hidden_size1 = 5
hidden_size2 = 3
output_size = 1
model = ModelMLP(input_size, hidden_size, output_size).to(device)
model_extra_layer = ModelMLPWithExtraLayer(input_size, hidden_size1, hidden_size2, output_size).to(device)
# 损失函数和优化器
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.2)
optimizer_extra_layer = optim.SGD(model_extra_layer.parameters(), lr=0.2)
# 定义准确率函数
def accuracy(logits, labels):
preds = torch.sigmoid(logits).round()
return (preds == labels).float().mean()
# 实例化 Runner
runner = Runner(model, optimizer, accuracy, loss_fn)
runner_extra_layer = Runner(model_extra_layer, optimizer_extra_layer, accuracy, loss_fn)
# 训练模型
epoch_num = 1000
saved_path = 'best_model.pth'
saved_path_extra = 'best_model_extra.pth'
print("Training original model...")
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs=epoch_num, log_epochs=50, save_path=saved_path)
print("\nTraining model with extra hidden layer...")
runner_extra_layer.train([X_train, y_train], [X_dev, y_dev], num_epochs=epoch_num, log_epochs=50, save_path=saved_path_extra)
# 可视化训练过程
def plot_comparison(runner1, runner2, fig_name):
plt.figure(figsize=(15, 5))
epochs = list(range(len(runner1.train_scores)))
# 左图:训练损失对比
plt.subplot(1, 2, 1)
plt.plot(epochs, runner1.train_loss, label="Original Train Loss", color='blue')
plt.plot(epochs, runner2.train_loss, label="Extra Layer Train Loss", color='green', linestyle='--')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
# 右图:验证集准确率对比
plt.subplot(1, 2, 2)
plt.plot(epochs, runner1.dev_scores, label="Original Dev Accuracy", color='blue')
plt.plot(epochs, runner2.dev_scores, label="Extra Layer Dev Accuracy", color='green', linestyle='--')
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.savefig(fig_name)
plt.show()
# 绘制训练过程对比图
plot_comparison(runner, runner_extra_layer, 'model_comparison.pdf')
# 测试集评估
runner.load_model(saved_path)
runner_extra_layer.load_model(saved_path_extra)
score_orig, loss_orig = runner.evaluate([X_test, y_test])
score_extra, loss_extra = runner_extra_layer.evaluate([X_test, y_test])
print(f"[Test] Original Model - score/loss: {score_orig:.4f}/{loss_orig:.4f}")
print(f"[Test] Extra Layer Model - score/loss: {score_extra:.4f}/{loss_extra:.4f}")
3、自定义隐藏层层数和每个隐藏层中的神经元个数,尝试找到最优超参数完成二分类。可以适当修改数据集,便于探索超参数。
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from nndl.dataset import make_moons
# 检查是否有 GPU 可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 定义灵活的多层感知机模型类
class FlexibleMLP(nn.Module):
def __init__(self, input_size, hidden_sizes, output_size):
super(FlexibleMLP, self).__init__()
layers = []
in_features = input_size
# 根据隐藏层配置动态创建网络层
for hidden_size in hidden_sizes:
layers.append(nn.Linear(in_features, hidden_size))
layers.append(nn.ReLU())
in_features = hidden_size
layers.append(nn.Linear(in_features, output_size)) # 输出层
self.model = nn.Sequential(*layers)
def forward(self, x):
return self.model(x)
# 定义 Runner 类用于训练和评估模型
class Runner:
def __init__(self, model, optimizer, metric, loss_fn):
self.model = model
self.optimizer = optimizer
self.loss_fn = loss_fn
self.metric = metric
self.train_scores = []
self.dev_scores = []
self.train_loss = []
self.dev_loss = []
def train(self, train_set, dev_set, **kwargs):
self.model.train()
num_epochs = kwargs.get("num_epochs", 1000)
log_epochs = kwargs.get("log_epochs", 100)
save_path = kwargs.get("save_path", "best_model.pth")
best_score = 0
for epoch in range(num_epochs):
X, y = train_set
# 前向传播
logits = self.model(X)
trn_loss = self.loss_fn(logits, y)
self.train_loss.append(trn_loss.item())
trn_score = self.metric(logits, y).item()
self.train_scores.append(trn_score)
# 反向传播和参数更新
self.optimizer.zero_grad()
trn_loss.backward()
self.optimizer.step()
# 验证集评估
dev_score, dev_loss = self.evaluate(dev_set)
self.dev_scores.append(dev_score)
self.dev_loss.append(dev_loss)
if dev_score > best_score:
self.save_model(save_path)
best_score = dev_score
if log_epochs and epoch % log_epochs == 0:
print(f"[Train] epoch: {epoch}/{num_epochs}, loss: {trn_loss.item()}")
@torch.no_grad()
def evaluate(self, data_set):
self.model.eval()
X, y = data_set
logits = self.model(X)
loss = self.loss_fn(logits, y).item()
score = self.metric(logits, y).item()
return score, loss
def save_model(self, saved_path):
torch.save(self.model.state_dict(), saved_path)
def load_model(self, model_path):
self.model.load_state_dict(torch.load(model_path))
# 生成样本数据
X, y = make_moons(n_samples=1000, shuffle=True, noise=0.1)
# 划分数据集
num_train, num_dev, num_test = 640, 160, 200
X_train, y_train = X[:num_train], y[:num_train]
X_dev, y_dev = X[num_train:num_train + num_dev], y[num_train:num_train + num_dev]
X_test, y_test = X[num_train + num_dev:], y[num_train + num_dev:]
# 将数据转换为 PyTorch 张量并移动到设备上
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1).to(device)
X_dev = torch.tensor(X_dev, dtype=torch.float32).to(device)
y_dev = torch.tensor(y_dev, dtype=torch.float32).reshape(-1, 1).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1).to(device)
# 定义损失函数和准确率函数
loss_fn = nn.BCEWithLogitsLoss()
def accuracy(logits, labels):
preds = torch.sigmoid(logits).round()
return (preds == labels).float().mean()
# 超参数搜索:不同的隐藏层配置
hidden_layer_configs = [
[5], # 单隐藏层,5个神经元
[10, 5], # 双隐藏层
[10, 10, 5], # 三隐藏层
]
# 存储所有配置的结果用于绘图和测试集评估
results = {}
test_scores = {}
# 遍历所有超参数组合并训练模型
for config in hidden_layer_configs:
print(f"\nTraining with hidden layer sizes: {config}")
# 初始化模型和优化器
model = FlexibleMLP(input_size=2, hidden_sizes=config, output_size=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)
# 初始化 Runner 并训练模型
runner = Runner(model, optimizer, accuracy, loss_fn)
runner.train([X_train, y_train], [X_dev, y_dev], num_epochs=500, log_epochs=50)
# 在测试集上评估模型
test_score, test_loss = runner.evaluate([X_test, y_test])
print(f"Test accuracy: {test_score:.4f}, Test loss: {test_loss:.4f}")
# 存储当前配置的训练和测试结果
results[tuple(config)] = (runner.train_loss, runner.dev_scores)
test_scores[tuple(config)] = (test_score, test_loss)
# 绘制训练损失和验证准确率的对比图
def plot_results(results):
plt.figure(figsize=(15, 5))
# 绘制训练损失
plt.subplot(1, 2, 1)
for config, (train_loss, _) in results.items():
plt.plot(train_loss, label=f"Config {config}")
plt.xlabel("Epoch")
plt.ylabel("Train Loss")
plt.legend()
# 绘制验证准确率
plt.subplot(1, 2, 2)
for config, (_, dev_scores) in results.items():
plt.plot(dev_scores, label=f"Config {config}")
plt.xlabel("Epoch")
plt.ylabel("Dev Accuracy")
plt.legend()
plt.show()
# 调用绘图函数
plot_results(results)
# 输出所有模型的测试集评价结果
print("\nTest Set Results:")
for config, (test_score, test_loss) in test_scores.items():
print(f"Config {config} - Test Accuracy: {test_score:.4f}, Test Loss: {test_loss:.4f}")
参考文献

被折叠的 条评论
为什么被折叠?



