pytorch classic net fine_tuning-优快云博客

本文链接：https://blog.youkuaiyun.com/snow_lee_liwei/article/details/103639252

本文介绍了如何在PyTorch 1.0.0及以上版本中使用StepLR策略进行深度学习模型的微调。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

适用于 pytorch 版本大于等于 1.0.0

# -*- coding:utf-8 -*-
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)
# PyTorch Version:  1.0.0
# Torchvision Version:  0.4


# step 2  Model training and evaluation functions
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # 每个 epoch 包含 training 和 validation phase.
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # 计算模型输出及 loss.
                    # 对于 inception 模型，训练时，其还包括一个辅助 loss;
                    # 最终的 loss 是辅助 loss 和最终输出 loss 的两者之和. 但，测试时，只考虑最终输出的 loss.

                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, val_acc_history

#将模型用于特征提取(feature extraction) 时，需要设置 .requires_grad=False
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

"""
finetuning 和 feature-extraction 的区别：
[1] - 特征提取时，只需更新最后一层网络层的参数；即，只更新修改的网络层的参数，而对于未修改的其它网络层不进行参数更新.
 故，效率起见，设置 .requires_grad=False.
[2] - 模型 finetuning 时，需要设置全部网络层的 .requires_grad=True(默认).

除了 inception_v3 的网络输入尺寸为 (299, 299)，其它模型的网络输入均为 (224, 224).
"""
# step 3 Network initialization and setup
def initialize_model(model_name, num_classes, feature_extract, use_pretrained