基于PyTorch实现cosine learning rate

1.需要用到的库

设置学习率和模型

import math
import matplotlib.pyplot as plt
import torch.optim as optim
from torchvision.models import resnet18

lr_rate = 0.1
model = resnet18(num_classes=10)

2.LambdaLR实现cosine learning rate

设置lambda、optimizer和scheduler 

lambda1 = lambda epoch: (epoch / 4000) if epoch < 4000 else 0.5 * (math.cos((epoch - 4000)/(100 * 1000 - 4000) * math.pi) + 1)
optimizer = optim.SGD(model.parameters(), lr=lr_rate, momentum=0.9, nesterov=True)
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)

3.learning rate设置

index = 0
x = []
y = []
for epoch in range(100):
    for batch in range(1000):
        x.append(index)
        y.append(optimizer.param_groups[0]['lr'])
        index += 1
        scheduler.step()

4.可视化learning rate

plt.figure(figsize=(10, 8), dpi=200)
plt.xlabel('batch stop')
plt.ylabel('learning rate')
plt.plot(x, y, color='r', linewidth=2.0, label='modify data')
plt.legend(loc='upper right')
plt.savefig('result.png')
plt.show()

5.learning rate变化结果

TensorBoard learning rate可视化

 

以下是基于PyTorch环境使用ArcFace训练模型的代码: ```python import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader from torchvision import datasets, transforms import argparse from tqdm import tqdm import math # 定义模型 class ArcFace(nn.Module): def __init__(self, num_classes=10, emb_size=2): super(ArcFace, self).__init__() self.num_classes = num_classes self.emb_size = emb_size self.features = nn.Sequential( nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(128, emb_size, bias=False) self.classifier = nn.Linear(emb_size, num_classes, bias=False) def forward(self, x, label=None): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) if label is not None: # 计算cos(theta) cosine = F.linear(F.normalize(x), F.normalize(self.classifier.weight)) # 计算theta theta = torch.acos(torch.clamp(cosine, -1.0 + 1e-7, 1.0 - 1e-7)) # 计算one-hot编码 one_hot = torch.zeros_like(cosine) one_hot.scatter_(1, label.view(-1, 1).long(), 1) # 计算计算cos(theta + m) new_cosine = torch.cos(theta + 0.1) # 计算cos(theta + m)与one-hot编码的乘积 output = new_cosine * cosine - one_hot * (new_cosine - cosine) else: output = self.classifier(x) return output # 定义训练方法 def train(model, device, train_loader, optimizer, epoch, log_interval): model.train() train_loss = 0 correct = 0 total = 0 pbar = tqdm(train_loader) for batch_idx, (data, labels) in enumerate(pbar): data, labels = data.to(device), labels.to(device) optimizer.zero_grad() output = model(data, labels) loss = F.cross_entropy(output, labels) loss.backward() optimizer.step() train_loss += loss.item() _, predicted = output.max(1) total += labels.size(0) correct += predicted.eq(labels).sum().item() pbar.set_description('Train epoch {} Loss: {:.6f} Acc: {:.4f}'.format( epoch, train_loss / (batch_idx + 1), 100. * correct / total)) # 打印日志 if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} Acc: {:.4f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item(), 100. * correct / total)) # 定义测试方法 def test(model, device, test_loader): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, labels in test_loader: data, labels = data.to(device), labels.to(device) output = model(data) test_loss += F.cross_entropy(output, labels, reduction='sum').item() _, predicted = output.max(1) correct += predicted.eq(labels).sum().item() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) def main(): # 解析参数 parser = argparse.ArgumentParser(description='PyTorch ArcFace Example') parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--test-batch-size', type=int, default=128, metavar='N', help='input batch size for testing (default: 128)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='Save the trained model') args = parser.parse_args() # 判断是否使用GPU use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device('cuda' if use_cuda else 'cpu') # 加载数据 kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) train_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform) test_dataset = datasets.MNIST('../data', train=False, transform=transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=True, **kwargs) # 初始化模型 model = ArcFace(num_classes=10, emb_size=2).to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) # 训练模型 for epoch in range(1, args.epochs + 1): train(model, device, train_loader, optimizer, epoch, args.log_interval) test(model, device, test_loader) # 保存模型 if args.save_model: torch.save(model.state_dict(), 'arcface_mnist.pt') if __name__ == '__main__': main() ``` 注意,这里的模型是使用MNIST数据集进行训练的,如果需要使用其他数据集,可以根据需要更改数据集的加载方式。同时,这里的代码只是一个示例,实际使用时还需要根据具体情况进行修改。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值