Pytorch中的学习率衰减及其用法_epochs=50 train_loss_list=[] val_loss_list=[] for

img
img

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化的资料的朋友,可以添加戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

当某指标(loss或accuracy)在最近几个epoch中都没有变化(下降或升高超过给定阈值)时,调整学习率。
如当验证集的loss不再下降是,调整学习率;或监察验证集的accuracy不再升高时,调整学习率。

torch.optim.lr_sheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10,
 verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)

参数:

  • mode(str): 模式选择,有min和max两种模式,min表示当指标不再降低(如监测loss),max表示当指标不再升高(如监测accuracy)。
  • factor(float): 学习率调整倍数,同前面的gamma,当监测指标达到要求时,lr=lr×factor。
  • patience(int): 忍受该指标多少个epoch不变化,当忍无可忍时,调整学习率。
  • verbose(bool): 是否打印学习率信息,print( ‘Epoch {:5d} reducing learning rate of group {} to {:.4e}.’.format(epoch, i, new_lr), 默认为False, 即不打印该信息。
  • threshold_mode (str): 选择判断指标是否达最优的模式,有两种模式:rel 和 abs.
    当threshold_mode == rel, 并且 mode == max时,dynamic_threshold = best * (1 + threshold);
    当threshold_mode == rel, 并且 mode == min时,dynamic_threshold = best * (1 - threshold);
    当threshold_mode == abs, 并且 mode == max时,dynamic_threshold = best
import copy import time import torch from torchvision.datasets import FashionMNIST from torchvision import transforms import torch.utils.data as Data import numpy as np import matplotlib.pyplot as plt from model import AlexNet import torch.nn as nn import pandas as pd from torchvision.datasets import ImageFolder def train_val_data_process(): # 定义数据集的路径 ROOT_TRAIN = r'./datas/catdog/test' normalize = transforms.Normalize([0.162, 0.151, 0.138], [0.058, 0.052, 0.048]) # 定义数据集处理方法变量 train_transform = transforms.Compose([transforms.Resize((227,227)), transforms.ToTensor(), normalize]) # 加载数据集 train_data = ImageFolder(ROOT_TRAIN, transform=train_transform) train_data, val_data = Data.random_split(train_data, [round(0.8*len(train_data)), round(0.2*len(train_data))]) train_dataloader = Data.DataLoader(dataset=train_data, batch_size=32, shuffle=True, num_workers=8) val_dataloader = Data.DataLoader(dataset=val_data, batch_size=32, shuffle=True, num_workers=8) return train_dataloader, val_dataloader def train_model_process(model, train_dataloader, val_dataloader, num_epochs): # 设定训练所用到的设备,有GPU用GPU没有GPU用CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 使用Adam优化器,学习率为0.001 optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 损失函数为交叉熵函数 criterion = nn.CrossEntropyLoss() # 将模型放入到训练设备中 model = model.to(device) # 复制当前模型的参数 best_model_wts = copy.deepcopy(model.state_dict()) # 初始化参数 # 最高准确度 best_acc = 0.0 # 训练集损失列表 train_loss_all = [] # 验证集损失列表 val_loss_all = [] # 训练集准确度列表 train_acc_all = [] # 验证集准确度列表 val_acc_all = [] # 当前时间 since = time.time() for epoch in range(num_epochs): print("Epoch {}/{}".format(epoch, num_epochs-1)) print("-"*10) # 初始化参数 # 训练集损失函数 train_loss = 0.0 # 训练集准确度 train_corrects = 0 # 验证集损失函数 val_loss = 0.0 # 验证集准确度 val_corrects = 0 # 训练集样本数量 train_num = 0 # 验证集样本数量 val_num = 0 # 对每一个mini-batch训练和计算 for step, (b_x, b_y) in enumerate(train_dataloader): # 将特征放入到训练设备中 b_x = b_x.to(device) # 将标签放入到训练设备中 b_y = b_y.to(device) # 设置模型为训练模式 model.train() # 前向传播过程,输入为一个batch,输出为一个batch中对应的预测 output = model(b_x) # 查找每一行中最大值对应的行标 pre_lab = torch.argmax(output, dim=1) # 计算每一个batch的损失函数 loss = criterion(output, b_y) # 将梯度初始化为0 optimizer.zero_grad() # 反向传播计算 loss.backward() # 根据网络反向传播的梯度信息来更新网络的参数,以起到降低loss函数计算值的作用 optimizer.step() # 对损失函数进行累加 train_loss += loss.item() * b_x.size(0) # 如果预测正确,则准确度train_corrects加1 train_corrects += torch.sum(pre_lab == b_y.data) # 当前用于训练的样本数量 train_num += b_x.size(0) for step, (b_x, b_y) in enumerate(val_dataloader): # 将特征放入到验证设备中 b_x = b_x.to(device) # 将标签放入到验证设备中 b_y = b_y.to(device) # 设置模型为评估模式 model.eval() # 前向传播过程,输入为一个batch,输出为一个batch中对应的预测 output = model(b_x) # 查找每一行中最大值对应的行标 pre_lab = torch.argmax(output, dim=1) # 计算每一个batch的损失函数 loss = criterion(output, b_y) # 对损失函数进行累加 val_loss += loss.item() * b_x.size(0) # 如果预测正确,则准确度train_corrects加1 val_corrects += torch.sum(pre_lab == b_y.data) # 当前用于验证的样本数量 val_num += b_x.size(0) # 计算并保存每一次迭代的loss值和准确率 # 计算并保存训练集的losstrain_loss_all.append(train_loss / train_num) # 计算并保存训练集的准确率 train_acc_all.append(train_corrects.double().item() / train_num) # 计算并保存验证集的lossval_loss_all.append(val_loss / val_num) # 计算并保存验证集的准确率 val_acc_all.append(val_corrects.double().item() / val_num) print("{} train loss:{:.4f} train acc: {:.4f}".format(epoch, train_loss_all[-1], train_acc_all[-1])) print("{} val loss:{:.4f} val acc: {:.4f}".format(epoch, val_loss_all[-1], val_acc_all[-1])) if val_acc_all[-1] > best_acc: # 保存当前最高准确度 best_acc = val_acc_all[-1] # 保存当前最高准确度的模型参数 best_model_wts = copy.deepcopy(model.state_dict()) # 计算训练和验证的耗时 time_use = time.time() - since print("训练和验证耗费的时间{:.0f}m{:.0f}s".format(time_use//60, time_use%60)) # 选择最优参数,保存最优参数的模型 model.load_state_dict(best_model_wts) # torch.save(model.load_state_dict(best_model_wts), "C:/Users/86159/Desktop/LeNet/best_model.pth") torch.save(best_model_wts, "../LeNet/best_model.pth") train_process = pd.DataFrame(data={"epoch":range(num_epochs), "train_loss_all":train_loss_all, "val_loss_all":val_loss_all, "train_acc_all":train_acc_all, "val_acc_all":val_acc_all,}) return train_process def matplot_acc_loss(train_process): # 显示每一次迭代后的训练集和验证集的损失函数和准确率 plt.figure(figsize=(12, 4)) plt.subplot(1, 2, 1) plt.plot(train_process['epoch'], train_process.train_loss_all, "ro-", label="Train loss") plt.plot(train_process['epoch'], train_process.val_loss_all, "bs-", label="Val loss") plt.legend() plt.xlabel("epoch") plt.ylabel("Loss") plt.subplot(1, 2, 2) plt.plot(train_process['epoch'], train_process.train_acc_all, "ro-", label="Train acc") plt.plot(train_process['epoch'], train_process.val_acc_all, "bs-", label="Val acc") plt.xlabel("epoch") plt.ylabel("acc") plt.legend() plt.show() if __name__ == '__main__': # 加载需要的模型 AlexNet = AlexNet() # 加载数据集 train_data, val_data = train_val_data_process() # 利用现有的模型进行模型的训练 train_process = train_model_process(AlexNet, train_data, val_data, num_epochs=20) matplot_acc_loss(train_process) 我是apple M机芯
最新发布
11-28
# 数据加载与预处理 def dataset(file_name): mat_data = loadmat(file_name) condition = [ 'defaulttop', 'tem958top', 'tem998top', 'onecto5top', 'onecto6top', 'onecto7top', 'twocto7top', 'twocto8top', 'twocto9top', 'cap80top', 'cap120top' ] # 创建数据列表:合并所有工况数据 inputs = [] outputs = [] for cond in condition: data = mat_data[cond] in_data = data[:, 0:6] #1-6列作为输入 out_data = data[:, 6:7] #第7列作为输出 inputs.append(in_data) outputs.append(out_data) # 将数据列表堆叠为数组 insim = np.vstack(inputs) outsim = np.vstack(outputs) return insim, outsim # MLP模型 class MLP(nn.Module): def __init__(self, in_size, out_size): super(MLP, self).__init__() self.network = nn.Sequential( nn.Linear(in_size, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.25), nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.2), nn.Linear(128, 64), nn.BatchNorm1d(64), nn.ReLU(), nn.Linear(64, out_size) ) def forward(self, x): return self.network(x) # 训练模型 def train_MLP(model, in_train, in_val, out_train, out_val, epochs=500, lr=0.001, batch=128): # 数据标准化 in_scaler = StandardScaler() out_scaler = StandardScaler() in_train_scaled = in_scaler.fit_transform(in_train) out_train_scaled = out_scaler.fit_transform(out_train) in_val_scaled = in_scaler.transform(in_val) out_val_scaled = out_scaler.transform(out_val) # 保存标准化器 joblib.dump(in_scaler, os.path.join("top scaler", "in_scaler.pkl")) joblib.dump(out_scaler, os.path.join("top scaler", "out_scaler.pkl")) # 转为PyTorch张量 train_data = TensorDataset(torch.FloatTensor(in_train_scaled), torch.FloatTensor(out_train_scaled)) val_data = TensorDataset(torch.FloatTensor(in_val_scaled), torch.FloatTensor(out_val_scaled)) # 创建数据加载器 train_loader = DataLoader(train_data, batch_size=batch, shuffle=True) val_loader = DataLoader(val_data, batch_size=batch, shuffle=False) # 使用Adam优化器 = 训练算法 optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4) # 使用MSE损失函数 criterion = nn.MSELoss() # 学习率调整 scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.5, patience=5 ) # 训练模型 best_loss = float('inf') counter = 0 # 记录耐心 train_losses = [] val_losses = [] for epoch in range(epochs): # 训练 model.train() # 训练模式 train_loss = 0.0 # 记录当前迭代的损失 for inputs, targets in train_loader: optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) # 记录当前批次的损失 loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() train_loss += loss.item() * inputs.size(0) train_loss /= len(train_loader.dataset) train_losses.append(train_loss) # 验证 model.eval() # 验证模式 val_loss = 0.0 with torch.no_grad(): # 不记录梯度 for inputs, targets in val_loader: outputs = model(inputs) loss = criterion(outputs, targets) val_loss += loss.item() * inputs.size(0) val_loss /= len(val_loader.dataset) val_losses.append(val_loss) # 更新学习率 scheduler.step(val_loss) # 打印训练信息 if (epoch + 1) % 10 == 0: print(f'Epoch {epoch + 1}/{epochs} | Train Loss: {train_loss:.6f} | ' f'Val Loss: {val_loss:.6f}') # 早停机制 if val_loss < best_loss: best_loss = val_loss counter = 0 torch.save(model.state_dict(),os.path.join(f"top_model.pth")) else: counter += 1 if counter >=6: print(f'{6}轮无改善,在 epoch {epoch+1} 提前停止') break # 训练结束,保存最佳模型 model.load_state_dict(torch.load(os.path.join(f"top_model.pth"))) # 评估模型泛化能力 model.eval() with torch.no_grad(): val_inputs = torch.FloatTensor(in_val_scaled) val_pre = model(val_inputs).detach().numpy() # 反标准化 val_pre = out_scaler.inverse_transform(val_pre) val_target = out_val # 性能评价 if not isinstance(val_pre, torch.Tensor): val_pre = torch.tensor(val_pre, dtype=torch.float32) if not isinstance(val_target, torch.Tensor): val_target = torch.tensor(val_target, dtype=torch.float32) mse = criterion(val_pre, val_target) print(f"MSE:{mse:.6f}") r2 = r2_score(val_target, val_pre) print(f"R2:{r2:.6f}") return best_loss, train_loss, val_loss # 4.主函数调用 if __name__ == "__main__": file_name = "rentopdata191212.mat" insim, outsim = dataset(file_name) # 交叉验证:80%训练集、20%验证集 in_train, in_val, out_train, out_val = train_test_split( insim, outsim, test_size=0.2, random_state=2 ) # 创建模型 model = MLP(in_size=6, out_size=1) # 训练模型 best_loss, epoch_train_loss, epoch_val_loss = train_MLP( model, in_train, in_val, out_train, out_val, epochs=800, lr=0.001 ) print(f"训练完成!最佳验证损失:{best_loss:.6f}")这是我的训练代码,一共有6729条数据,有什么问题吗
11-16
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值