[个人笔记]torch.optim.lr_scheduler.ReduceLROnPlateau

优化策略:理解torch.optim.lr_scheduler.ReduceLROnPlateau的学习率调整
本文解析了如何在PyTorch中使用ReduceLROnPlateau scheduler,详细讲解了学习率调整机制,包括何时改变、新学习率的位置以及threshold的含义。通过实例和官方解释,帮助读者掌握这个优化手段在深度学习中的关键作用。

最近需要使用torch.optim.lr_scheduler.ReduceLROnPlateau,但是没有看过相关论文,网上有很多相关的资料在threshold环节都很模糊.
我对这个API主要有两个问题:

  1. 应用ReduceLROnPlateau, 是如何改变学习率的(或者说,新的学习率是放在optimizer还是lr_scheduler里的)?
  2. threshold的"显著"到底指的是什么?

下面贴上torch.optim.lr_scheduler.ReduceLROnPlateau的源代码,想直接看结论可以滑动到文章最底部.

class ReduceLROnPlateau(object):
    """Reduce learning rate when a metric has stopped improving.
    Models often benefit from reducing the learning rate by a factor
    of 2-10 once learning stagnates. This scheduler reads a metrics
    quantity and if no improvement is seen for a 'patience' number
    of epochs, the learning rate is reduced.

    Args:
        optimizer (Optimizer): Wrapped optimizer.
        mode (str): One of `min`, `max`. In `min` mode, lr will
            be reduced when the quantity monitored has stopped
            decreasing; in `max` mode it will be reduced when the
            quantity monitored has stopped increasing. Default: 'min'.
        factor (float): Factor by which the learning rate will be
            reduced. new_lr = lr * factor. Default: 0.1.
        patience (int): Number of epochs with no improvement after
            which learning rate will be reduced. For example, if
            `patience = 2`, then we will ignore the first 2 epochs
            with no improvement, and will only decrease the LR after the
            3rd epoch if the loss still hasn't improved then.
            Default: 10.
        threshold (float): Threshold for measuring the new optimum,
            to only focus on significant changes. Default: 1e-4.
        threshold_mode (str): One of `rel`, `abs`. In `rel` mode,
            dynamic_threshold = best * ( 1 + threshold ) in 'max'
            mode or best * ( 1 - threshold ) in `min` mode.
            In `abs` mode, dynamic_threshold = best + threshold in
            `max` mode or best - threshold in `min` mode. Default: 'rel'.
        cooldown (int): Number of epochs to wait before resuming
            normal operation after lr has been reduced. Default: 0.
        min_lr (float or list): A scalar or a list of scalars. A
            lower bound on the learning rate of all param groups
            or each group respectively. Default: 0.
        eps (float): Minimal decay applied to lr. If the difference
            between new and old lr is smaller than eps, the update is
            ignored. Default: 1e-8.
        verbose (bool): If ``True``, prints a message to stdout for
            each update. Default: ``False``.

    Example:
        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
        >>> scheduler = ReduceLROnPlateau(optimizer, 'min')
        >>> for epoch in range(10):
        >>>     train(...)
        >>>     val_loss = validate(...)
        >>>     # Note that step should be called after validate()
        >>>     scheduler.step(val_loss)
    """

    def __init__(self, optimizer, mode='min', factor=0.1, patience=10,
                 threshold=1e-4, threshold_mode='rel', cooldown=0,
                 min_lr=0, eps=1e-8, verbose=False):

        if factor >= 1.0:
            raise ValueError('Factor should be < 1.0.')
        self.factor = factor

        # Attach optimizer
        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is no
from __future__ import print_function, division import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import time import os import copy if __name__ == '__main__': plt.ion() # interactive mode #加载数据我们将使用torchvisiontorch.utils.data包来加载数据。我们今天要解决的问题是 # 训练一个模型来对蚂蚁和蜜蜂进行分类。我们有大约120个蚂蚁和蜜蜂的训练图像。 # 每个类有75个验证图像。通常,这是一个非常小的数据集,如果从抓痕中训练的话。 # 因为我们使用的是传输。学习,我们应该能够合理地概括。这个数据集是ImageNet的一个很小的子集。 # 数据增强和培训规范化 # 只是验证的规范化 data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } data_dir = 'hymenoptera_data' image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val']} dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} class_names = image_datasets['train'].classes device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #可视化一些图像,让我们可视化一些培训图像,以便理解数据增强。 def imshow(inp, title=None): """Imshow for Tensor.""" inp = inp.numpy().transpose((1, 2, 0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) inp = std * inp + mean inp = np.clip(inp, 0, 1) plt.imshow(inp) if title is not None: plt.title(title) plt.pause(0.001) # 暂停一下,以便更新情节。 # 获取一批培训数据 inputs, classes = next(iter(dataloaders['train'])) # 从批处理中生成网格 out = torchvision.utils.make_grid(inputs) imshow(out, title=[class_names[x] for x in classes]) plt.waitforbuttonpress(0) #现在训练模型,让我们编写一个通用的函数来训练模型。在这里,我们将说明: # 调度学习率保存了下面的最佳模型,参数调度器是torch.Optim.lr_调度器中的LR调度器对象。 def train_model(model, criterion, optimizer, scheduler, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # 每个时代都有一个培训和验证阶段。 for phase in ['train', 'val']: if phase == 'train': scheduler.step() model.train() # 将模型设置为训练模式 else: model.eval() # 设定评估模式 running_loss = 0.0 running_corrects = 0 # 迭代数据。 for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) # 参数梯度为零 optimizer.zero_grad() # forward # 如果只在火车上跟踪历史 with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) # 只有在训练阶段才能向后优化 if phase == 'train': loss.backward() optimizer.step() # 统计 running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Acc: {:4f}'.format(best_acc)) # load best model weights model.load_state_dict(best_model_wts) return model # 可视化模型预测泛型函数以显示一些图像的预测 def visualize_model(model, num_images=6): was_training = model.training model.eval() images_so_far = 0 fig = plt.figure() with torch.no_grad(): for i, (inputs, labels) in enumerate(dataloaders['val']): inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) for j in range(inputs.size()[0]): images_so_far += 1 ax = plt.subplot(num_images // 2, 2, images_so_far) ax.axis('off') ax.set_title('predicted: {}'.format(class_names[preds[j]])) imshow(inputs.cpu().data[j]) if images_so_far == num_images: model.train(mode=was_training) return model.train(mode=was_training) #FinetuningConvNet加载一个预先训练的模型并重置最终完全连接的层。 model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features model_ft.fc = nn.Linear(num_ftrs, 2) model_ft = model_ft.to(device) criterion = nn.CrossEntropyLoss() # 注意到所有参数都在优化。 optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9) # 每7个周期衰减0.1倍 exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) #在GPU上训练和评估它应该需要15-25分钟,但是它只需要不到一分钟 model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) visualize_model(model_ft) #ConvNet作为固定的特征提取器,我们需要冻结除最后一层之外的所有网络。 # 我们需要设置Required_grad=false来冻结参数,这样梯度就不会用反向()计算。 # 您可以在这里的文档中读到更多关于这一点的内容。 model_conv = torchvision.models.resnet18(pretrained=True) for param in model_conv.parameters(): param.requires_grad = False # Parameters of newly constructed modules have requires_grad=True by default num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, 2) model_conv = model_conv.to(device) criterion = nn.CrossEntropyLoss() # Observe that only parameters of final layer are being optimized as # opoosed to before. optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1) #在CPU上进行训练和评估将花费大约一半的时间,这是因为对于大多数网络来说,不需要计算渐变。然而,前进确实需要计算。 model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=25) visualize_model(model_conv) plt.ioff() plt.show() plt.waitforbuttonpress(0)
最新发布
11-20
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值