Pytorch lr_scheduler 各个函数的用法及可视化

Pytorch lr_scheduler 各个函数的用法及可视化

参考博客
  1. https://www.pythonheidong.com/blog/article/511529/e49e08939f608d9736fe/
  2. https://blog.youkuaiyun.com/xiaotuzigaga/article/details/87879198
  3. https://blog.youkuaiyun.com/baoxin1100/article/details/107446538
说几个我之前不这么了解的吧,也不怎么用的
  • CyclicLR

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

这个函数没用过,其实估计效果应该不错,回头试试,这个mode有三种模式。“triangular2"是一个周期就减半,即缩放是0.5;“exp_range”是可以自定义这个每个周期后的缩放比例,通过设置"gamma"参数实现。
在这里插入图片描述

import torch
import matplotlib.pyplot as plt
lr = 0.001
epochs = 10
iters = 4
cyc_epoch = 1
# triangular, triangular2, exp_range
# 三角的形式,0.0001代表最小的学习率, 0.001代表最大的学习率, cyc_epoch*iters代表一个升降周期
scheduler_cyc0 = torch.optim.lr_scheduler.CyclicLR(torch.optim.SGD([torch.ones(1)], lr), 0.0001, 0.001, cyc_epoch*iters, mode='triangular')
scheduler_cyc1 = torch.optim.lr_scheduler.CyclicLR(torch.optim.SGD([torch.ones(1)], lr), 0.0001, 0.001, cyc_epoch*iters, mode='triangular2')
scheduler_cyc2 = torch.optim.lr_scheduler.CyclicLR(torch.optim.SGD([torch.ones(1)], lr), 0.0001, 0.001, cyc_epoch*iters, mode='exp_range', gamma=0.8)                         
triangular  = []
triangular2 = []
exp_range  = []
for epoch in range(epochs):
    for i in range(iters):
        # print (scheduler_cyc0.get_lr()) #[0.000325]
        triangular += (scheduler_cyc0.get_lr()) # list之间, + 和 append有区别
        triangular2 += scheduler_cyc1.get_lr()
        exp_range  += scheduler_cyc2.get_lr()
        scheduler_cyc0.step()
        scheduler_cyc1.step()
        scheduler_cyc2.step()
print (triangular)
print (triangular2)
print (exp_range)
x = list(range(len(triangular)))
plt.figure(figsize=(12,7))
plt.plot(x, triangular, "r")
plt.plot(x, triangular2, "g--")
plt.plot(x, exp_range, "b-.")
# plt.plot(triangular2)
plt.legend(['triangular','triangular2','exp_range'], fontsize=20)
plt.xlabel('iters', size=15)
plt.ylabel('lr', size=15)
# plt.savefig("CyclicLR.png")
plt.show()

这边讲个小细节,以前注意过。

a ,b = [1], [2]
a.append(b) # [1, [2]]
a += [b] # [1, 2]
  • CosineAnnealingWarmRestarts && CosineAnnealingWarmRestarts
    在这里插入图片描述
lr = 0.001
epochs = 100
iters = 32
# T_0代表第一个余弦退火的周期,T_mult代表周期的成长因子,例如:T_0=5, T_mult=5. [5, 5+25, 5+25+50]
scheduler_cosW = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(torch.optim.SGD([torch.ones(1)], lr), T_0=500, T_mult=5)
# lr从initial_lr = max_lr/div_factor在pct_start*total_steps(=epochs*steps_per_epoch)步数里
# 采用cos(可选还有线性)升到max_lr,
# 然后用cos(可选还有线性)退火降至min_lr = initial_lr/final_div_factor
scheduler_onecyc = torch.optim.lr_scheduler.OneCycleLR(torch.optim.SGD([torch.ones(1)], lr), max_lr=lr, steps_per_epoch=iters, epochs=epochs, anneal_strategy='cos')
scheduler_onecyc_linear = torch.optim.lr_scheduler.OneCycleLR(torch.optim.SGD([torch.ones(1)], lr), max_lr=lr, steps_per_epoch=iters, epochs=epochs, anneal_strategy='linear')

在这里插入图片描述
从图中可以看出,OneCycleLR才是我们口中常说的warmup吧,而且anneal_strategy这个参数居然没用,都是cos退火。我觉得以后可以直接用OneCycleLR。当然也有一些其他scheduler带上warmup的,自己改下即可。

import math
from torch.optim.lr_scheduler import MultiStepLR, _LRScheduler

class WarmupMultiStepLR(MultiStepLR):
    r"""
    # max_iter = epochs * steps_per_epoch
    Args:
        optimizer (Optimizer): Wrapped optimizer.
        max_iter (int): The total number of steps.
        milestones (list) – List of iter indices. Must be increasing.
        gamma (float): Multiplicative factor of learning rate decay. Default: 0.1.
        pct_start (float): The percentage of the cycle (in number of steps) spent
                    increasing the learning rate.
                    Default: 0.3
        warmup_factor (float):         
        last_epoch (int): The index of last epoch. Default: -1.
    """
    def __init__(self, optimizer, max_iter, milestones, gamma=0.1, pct_start=0.3, warmup_factor=1.0 / 2,
                  last_epoch=-1):
        self.warmup_factor = warmup_factor
        self.warmup_iters = int(pct_start * max_iter)
        super().__init__(optimizer, milestones, gamma, last_epoch)

    def get_lr(self):
        if self.last_epoch <= self.warmup_iters:
            alpha = self.last_epoch / self.warmup_iters
            warmup_factor = self.warmup_factor * (1 - alpha) + alpha
            return [lr * warmup_factor for lr in self.base_lrs]
        else:
            lr = super().get_lr()
        return lr

class WarmupCosineLR(_LRScheduler):
    def __init__(self, optimizer, max_iter, pct_start=0.3, warmup_factor=1.0 / 3, 
                 eta_min=0, last_epoch=-1):
        self.warmup_factor = warmup_factor
        self.warmup_iters = int(pct_start * max_iter)
        self.max_iter, self.eta_min = max_iter, eta_min
        super().__init__(optimizer)

    def get_lr(self):
        if self.last_epoch <= self.warmup_iters:
            alpha = self.last_epoch / self.warmup_iters
            warmup_factor = self.warmup_factor * (1 - alpha) + alpha
            return [lr * warmup_factor for lr in self.base_lrs]
        else:
            # print ("after warmup")
            return [self.eta_min + (base_lr - self.eta_min) *
                    (1 + math.cos(
                        math.pi * (self.last_epoch - self.warmup_iters) / (self.max_iter - self.warmup_iters))) / 2
                    for base_lr in self.base_lrs]

class WarmupPolyLR(_LRScheduler):
    def __init__(self, optimizer, T_max, pct_start=0.3, warmup_factor=1.0 / 4, 
                 eta_min=0, power=0.9):
        self.warmup_factor = warmup_factor
        self.warmup_iters = int(pct_start * T_max)
        self.power = power
        self.T_max, self.eta_min = T_max, eta_min
        super().__init__(optimizer)

    def get_lr(self):
        if self.last_epoch <= self.warmup_iters:
            alpha = self.last_epoch / self.warmup_iters
            warmup_factor = self.warmup_factor * (1 - alpha) + alpha
            return [lr * warmup_factor for lr in self.base_lrs]
        else:
            return [self.eta_min + (base_lr - self.eta_min) *
                    math.pow(1 - (self.last_epoch - self.warmup_iters) / (self.T_max - self.warmup_iters),
                             self.power) for base_lr in self.base_lrs]
if __name__ == '__main__':

    import matplotlib.pyplot as plt
    import torch
    import sys
    # sys.setrecursionlimit(12000)
    max_iter = 10000
    lr=5e-4
    optimizer = torch.optim.SGD([torch.ones(1)], lr)

    scheduler_WP = WarmupPolyLR(optimizer, T_max=max_iter)
    scheduler_WS = WarmupCosineLR(optimizer, max_iter)
    scheduler_WM = WarmupMultiStepLR(optimizer, max_iter, [5000, 7000, 9000])

    lrs_wp = []
    lrs_ws = []
    lrs_wm = []

    for cur_iter in range(max_iter):

        # lr = optimizer.param_groups[0]['lr']
        # lrs_wp.append(scheduler_WP.get_lr()[0])

        lrs_wp += scheduler_WP.get_lr()
        lrs_ws += scheduler_WS.get_lr()
        lrs_wm += scheduler_WM.get_lr()

        optimizer.step()
        scheduler_WP.step()
        scheduler_WS.step()
        scheduler_WM.step()

    x = list(range(len(lrs_wm)))
    plt.figure(figsize=(12,7))
    plt.plot(x, lrs_wp, "r",
            x, lrs_ws, "g--",
            x, lrs_wm, "b-.")
    # plt.plot(triangular2)
    plt.legend(['WarmupPolyLR','WarmupCosineLR','WarmupMultiStepLR'], fontsize=20)
    plt.xlabel('iters', size=15)
    plt.ylabel('lr', size=15)
    # plt.savefig("CyclicLR.png")
    plt.show()

在这里插入图片描述

  • other
class MyLRScheduler(object):
    '''
    CLass that defines cyclic learning rate that decays the learning rate linearly till the end of cycle and then restarts
    at the maximum value.
    '''
    def __init__(self, initial=0.1, cycle_len=5, ep_cycle=50, ep_max=100):
        super(MyLRScheduler, self).__init__()

        self.min_lr = initial# minimum learning rate
        self.m = cycle_len
        self.ep_cycle = ep_cycle
        self.ep_max = ep_max
        self.poly_start = initial
        self.step = initial/ self.ep_cycle
        print('Using Cyclic LR Scheduler with warm restarts and poly step '
              + str(self.step))

    def get_lr(self, epoch):
        if epoch==0:
            current_lr = self.min_lr
        elif 0< epoch and epoch <= self.ep_cycle:
            counter = (epoch-1) % self.m
            current_lr = round((self.min_lr * self.m) - (counter * self.min_lr), 5)
        else:

            current_lr = round(self.poly_start - (epoch-self.ep_cycle )*self.step, 8)

            # current_lr = round(self.poly_start * (1 - (epoch-self.ep_cycle) / (self.ep_max-self.ep_cycle)) ** 0.9, 8)

        return current_lr


class WarmupPoly(object):
    '''
    CLass that defines cyclic learning rate that decays the learning rate linearly till the end of cycle and then restarts
    at the maximum value.
    '''
    def __init__(self, init_lr, total_ep, warmup_ratio=0.05, poly_pow = 0.98):
        super(WarmupPoly, self).__init__()
        self.init_lr = init_lr
        self.total_ep = total_ep
        self.warmup_ep = int(warmup_ratio*total_ep)
        print("warup unitl " + str(self.warmup_ep))
        self.poly_pow = poly_pow

    def get_lr(self, epoch):
        #
        if epoch < self.warmup_ep:
            curr_lr =  self.init_lr*pow((((epoch+1) / self.warmup_ep)), self.poly_pow)

        else:
            curr_lr = self.init_lr*pow((1 - ((epoch- self.warmup_ep)  / (self.total_ep-self.warmup_ep))), self.poly_pow)

        return curr_lr
        
if __name__ == '__main__':
    import matplotlib.pyplot as plt
    max_epochs = 300
    lrSched = MyLRScheduler(initial=0.0001, cycle_len=10, ep_cycle=150, ep_max=300)
    lrSched1 = WarmupPoly(1e-3, max_epochs , poly_pow=0.95)

    x = []
    y = []
    y1 = []
    for i in range(max_epochs):
        x.append(i)
        y.append(lrSched.get_lr(i))
        y1.append(lrSched1.get_lr(i))
    print (y[0], y[-1]) # 0.0001 6.7e-07
    print (y1[0], y1[-1]) # 7.633317097623927e-05 4.654760957913004e-06
    plt.figure(figsize=(12,7))
    plt.plot(x, y, "r",
            x, y1, "b-.")
    plt.legend(['cyclic','WarmupPoly',], fontsize=20)
    plt.xlabel('iters', size=15)
    plt.ylabel('lr', size=15)
    plt.savefig("MyLRScheduler.png")
    plt.show()

在这里插入图片描述

  • 还发现Pytorch doc optim部分最后出现了SWA,以后实际任务跑一下,试试。
好的,以下是用PyTorch实现VGG16的代码。我们将使用CIFAR-10数据集进行训练和测试,并且我们将添加一些数据增强技术以提高模型的性能。 首先,我们需要导入必要的库和数据集: ```python import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms # 数据增强 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # 加载数据集 trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') ``` 接下来,我们将定义VGG16模型。由于我们可以改变网络参数,所以我们将定义一个函数来接受不同的参数: ```python def VGG16(num_classes=10, num_channels=3): layers = [] in_channels = num_channels # 第一个卷积块 layers += [nn.Conv2d(in_channels, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)] layers += [nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 第二个卷积块 layers += [nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True)] layers += [nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 第三个卷积块 layers += [nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True)] layers += [nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True)] layers += [nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 第四个卷积块 layers += [nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True)] layers += [nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True)] layers += [nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 第五个卷积块 layers += [nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True)] layers += [nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True)] layers += [nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] # 全连接层 layers += [nn.Flatten()] layers += [nn.Linear(512 * 2 * 2, 4096), nn.ReLU(inplace=True), nn.Dropout()] layers += [nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout()] layers += [nn.Linear(4096, num_classes)] return nn.Sequential(*layers) ``` 接下来,我们将定义损失函数、优化器和学习率调度程序: ```python device = 'cuda' if torch.cuda.is_available() else 'cpu' net = VGG16(num_classes=10, num_channels=3) net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1) ``` 现在我们可以开始训练模型了: ```python def train(net, trainloader, optimizer, criterion, device): net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() return train_loss / (batch_idx + 1), 100. * correct / total def test(net, testloader, criterion, device): net.eval() test_loss = 0 correct = 0 total = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() return test_loss / (batch_idx + 1), 100. * correct / total for epoch in range(200): train_loss, train_acc = train(net, trainloader, optimizer, criterion, device) test_loss, test_acc = test(net, testloader, criterion, device) lr_scheduler.step() print('Epoch %d: Train Loss: %.3f | Train Acc: %.3f%% | Test Loss: %.3f | Test Acc: %.3f%%' % (epoch + 1, train_loss, train_acc, test_loss, test_acc)) ``` 最后,我们可以使用Matplotlib来可视化一些测试图像和它们的预测结果: ```python import matplotlib.pyplot as plt import numpy as np def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() dataiter = iter(testloader) images, labels = dataiter.next() # 显示图像 imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) # 预测结果 outputs = net(images.to(device)) _, predicted = torch.max(outputs, 1) print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4))) ``` 这就是用PyTorch实现VGG16的完整代码。你可以通过改变参数来改变网络结构或使用不同的数据集进行训练和测试。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值