pytorch fine-tuning (funtune) : 镂空设计or 偷梁换柱

本文通过实例演示了如何使用PyTorch进行模型微调，包括加载预训练模型、冻结部分层、替换指定层及继续训练的过程。通过具体代码展示了微调技巧，帮助读者更好地理解和实践模型迁移。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

偷梁换柱 or 貂尾续狗

# 导入包
import glob
import os

import torch
import matplotlib.pyplot as plt
import random #用于数据迭代器生成随机数据

# 生成数据集 x1类别0，x2类别1
n_data = torch.ones(50, 2)  # 数据的基本形态
x1 = torch.normal(2 * n_data, 1)  # shape=(50, 2)
y1 = torch.zeros(50)  # 类型0 shape=(50, 1)
x2 = torch.normal(-2 * n_data, 1)  # shape=(50, 2)
y2 = torch.ones(50)  # 类型1 shape=(50, 1)
# 注意 x, y 数据的数据形式一定要像下面一样(torch.cat是合并数据)
x = torch.cat((x1, x2), 0).type(torch.FloatTensor)
y = torch.cat((y1, y2), 0).type(torch.FloatTensor)

# 数据集可视化
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

# 数据读取：
def data_iter(batch_size, x, y):
    num_examples = len(x)
    indices = list(range(num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) #最后一次可能不足一个batch
        yield  x.index_select(0, j), y.index_select(0, j)

import torch.nn as nn
import torch.optim as optim



class net(nn.Module):
    def __init__(self, **kwargs):
        super(net, self).__init__(**kwargs)
        self.net = nn.Sequential(
            nn.Linear(2, 2),
            nn.Linear(2, 2),
            nn.Linear(2, 1),
            nn.ReLU())

    def forward(self, x):
        return self.net(x)

def loss(y_hat, y):
    return (y_hat - y.view(y_hat.size())) ** 2 / 2



def accuracy(y_hat, y):  #@save
    """计算预测正确的数量。"""
    cmp = y_hat.type(y.dtype) > 0.5 # 大于0.5类别1
    result=cmp.type(y.dtype)
    acc = 1-float(((result-y).sum())/ len(y))
    return acc;

lr = 0.03
num_epochs = 3 # 迭代次数
batch_size = 10 # 批量大小
model = net()
params =  list(model.parameters())
optimizer = torch.optim.Adam(params, 1e-4)



def loader(model_path):
    state_dict = torch.load(model_path)
    model_state_dict = state_dict["model_state_dict"]
    optimizer_state_dict = state_dict["optimizer_state_dict"]
    return model_state_dict, optimizer_state_dict

model_state_dict, optimizer_state_dict = loader("h1")
model.load_state_dict(model_state_dict)
optimizer.load_state_dict(optimizer_state_dict)
print('pretrained models loaded!')



# net(
#   (net): Sequential(
#     (0): Linear(in_features=2, out_features=1, bias=True)
#     (1): Linear(in_features=1, out_features=2, bias=True)
#     (2): Linear(in_features=2, out_features=1, bias=True)
#     (3): ReLU()
#   )
# )


for param in model.parameters():
    param.requires_grad = False

print(model.net[2])
num_fc_in = model.net[2].in_features
print("fc层的输入维度",num_fc_in)
model.net[2] = nn.Linear(num_fc_in, 3) # 偷梁换柱  貂尾续狗
print(model)
aa = model.net[1]#  参数不可学习             Parameter containing:tensor([-0.0303, -0.9412])
aa = model.net[2]# 参数可学习  Parameter containing:tensor([0.4327, 0.1848, 0.3112], requires_grad=True)

镂空设计

# net(
#   (net): Sequential(
#     (0): Linear(in_features=2, out_features=1, bias=True)
#     (1): Linear(in_features=1, out_features=2, bias=True)
#     (2): Linear(in_features=2, out_features=1, bias=True)
#     (3): ReLU()
#   )
# )
================================》
# net(
#   (net): Sequential(
#     (0): Linear(in_features=2, out_features=2, bias=True)
#     (1): Identity()
#     (2): Linear(in_features=2, out_features=1, bias=True)
#     (3): ReLU()
#   )
# )

# https://discuss.pytorch.org/t/how-to-delete-layer-in-pretrained-model/17648/16
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

# 导入包
import glob
import os

import torch
import matplotlib.pyplot as plt
import random #用于数据迭代器生成随机数据

# 生成数据集 x1类别0，x2类别1
n_data = torch.ones(50, 2)  # 数据的基本形态
x1 = torch.normal(2 * n_data, 1)  # shape=(50, 2)
y1 = torch.zeros(50)  # 类型0 shape=(50, 1)
x2 = torch.normal(-2 * n_data, 1)  # shape=(50, 2)
y2 = torch.ones(50)  # 类型1 shape=(50, 1)
# 注意 x, y 数据的数据形式一定要像下面一样(torch.cat是合并数据)
x = torch.cat((x1, x2), 0).type(torch.FloatTensor)
y = torch.cat((y1, y2), 0).type(torch.FloatTensor)

# 数据集可视化
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

# 数据读取：
def data_iter(batch_size, x, y):
    num_examples = len(x)
    indices = list(range(num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) #最后一次可能不足一个batch
        yield  x.index_select(0, j), y.index_select(0, j)

import torch.nn as nn
import torch.optim as optim



class net(nn.Module):
    def __init__(self, **kwargs):
        super(net, self).__init__(**kwargs)
        self.net = nn.Sequential(
            nn.Linear(2, 2),
            nn.Linear(2, 2),
            nn.Linear(2, 1),
            nn.ReLU())

    def forward(self, x):
        return self.net(x)

def loss(y_hat, y):
    return (y_hat - y.view(y_hat.size())) ** 2 / 2



def accuracy(y_hat, y):  #@save
    """计算预测正确的数量。"""
    cmp = y_hat.type(y.dtype) > 0.5 # 大于0.5类别1
    result=cmp.type(y.dtype)
    acc = 1-float(((result-y).sum())/ len(y))
    return acc;

lr = 0.03
num_epochs = 3 # 迭代次数
batch_size = 10 # 批量大小
model = net()
params =  list(model.parameters())
optimizer = torch.optim.Adam(params, 1e-4)



def loader(model_path):
    state_dict = torch.load(model_path)
    model_state_dict = state_dict["model_state_dict"]
    optimizer_state_dict = state_dict["optimizer_state_dict"]
    return model_state_dict, optimizer_state_dict

model_state_dict, optimizer_state_dict = loader("h1")
model.load_state_dict(model_state_dict)
optimizer.load_state_dict(optimizer_state_dict)
print('pretrained models loaded!')

# for param in model.parameters():
#     param.requires_grad = False



class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x
model.net[1] =  Identity()

for epoch in range(num_epochs):
    for X, y_train in data_iter(batch_size, x, y):
        optimizer.zero_grad()
        res = model(X)[:,0]
        l = loss(res, y_train).sum()  # l是有关小批量X和y的损失
        l.backward(retain_graph=True)
        optimizer.step()
        print(l)

头部担当

# import some dependencies  https://boscoj2008.github.io/customCNN/
import glob
import os
import torchvision
import torch
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

import torch.optim as optim
import time
import torch.nn as nn
import torch.nn.functional as F
torch.set_printoptions(linewidth=120)

class Network(nn.Module):  # extend nn.Module class of nn
    def __init__(self):
        super().__init__()  # super class constructor
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=(5, 5))
        self.batchN1 = nn.BatchNorm2d(num_features=6)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=(5, 5))
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.batchN2 = nn.BatchNorm1d(num_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)

    def forward(self, t):  # implements the forward method (flow of tensors)

        t = self.addconv1(t)# TODO 注意，再保存模型时注释掉这句
        # hidden conv layer
        t = self.conv1(t)
        t = F.max_pool2d(input=t, kernel_size=2, stride=2)
        t = F.relu(t)
        t = self.batchN1(t)

        # hidden conv layer
        t = self.conv2(t)
        t = F.max_pool2d(input=t, kernel_size=2, stride=2)
        t = F.relu(t)

        # flatten
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)
        t = self.batchN2(t)
        t = self.fc2(t)
        t = F.relu(t)

        # output
        t = self.out(t)

        return t
cnn_model = Network() # init model
print(cnn_model)
mean = 0.2859;  std = 0.3530 # calculated using standization from the MNIST itself which we skip in this blog


def saver(model_state_dict, optimizer_state_dict, model_path, epoch, max_to_save=30):
    total_models = glob.glob(model_path + '*')
    if len(total_models) >= max_to_save:
        total_models.sort()
        os.remove(total_models[0])

    state_dict = {}
    state_dict["model_state_dict"] = model_state_dict
    state_dict["optimizer_state_dict"] = optimizer_state_dict

    torch.save(state_dict, model_path + 'h' + str(epoch))
    print('models {} save successfully!'.format(model_path + 'hahaha' + str(epoch)))


optimizer = optim.Adam(lr=0.01, params=cnn_model.parameters())

# for epoch in range(3):
#     start_time = time.time()
#     total_correct = 0
#     total_loss = 0
#     for batch in range(10):
#         imgs, lbls = torch.rand(10,1,28,28),torch.tensor([0, 5, 3, 4, 4, 4, 7, 6, 2, 5])
#         preds = cnn_model(imgs)  # get preds
#         loss = F.cross_entropy(preds, lbls)  # compute loss
#         optimizer.zero_grad()  # zero grads
#         loss.backward()  # calculates gradients
#         optimizer.step()  # update the weights
#         accuracy = total_correct / 10
#     end_time = time.time() - start_time
#     print("Epoch no.", epoch + 1, "|accuracy: ", round(accuracy, 3), "%", "|total_loss: ", total_loss,
#           "| epoch_duration: ", round(end_time, 2), "sec")
#     saver(cnn_model.state_dict(), optimizer.state_dict(), "./", epoch + 1, max_to_save=100)


def loader(model_path):
    state_dict = torch.load(model_path)
    model_state_dict = state_dict["model_state_dict"]
    optimizer_state_dict = state_dict["optimizer_state_dict"]
    return model_state_dict, optimizer_state_dict

model_state_dict, optimizer_state_dict = loader("h1")
cnn_model.load_state_dict(model_state_dict)
optimizer.load_state_dict(optimizer_state_dict)
print('pretrained models loaded!')

cnn_model.addconv1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(1, 1))
for epoch in range(3):
    start_time = time.time()
    total_correct = 0
    total_loss = 0
    for batch in range(10):
        imgs, lbls = torch.rand(10,1,28,28),torch.tensor([0, 5, 3, 4, 4, 4, 7, 6, 2, 5])
        preds = cnn_model(imgs)  # get preds
        loss = F.cross_entropy(preds, lbls)  # compute loss
        optimizer.zero_grad()  # zero grads
        loss.backward()  # calculates gradients
        optimizer.step()  # update the weights
        accuracy = total_correct / 10
    end_time = time.time() - start_time
    print("Epoch no.", epoch + 1, "|accuracy: ", round(accuracy, 3), "%", "|total_loss: ", total_loss,
          "| epoch_duration: ", round(end_time, 2), "sec")
    saver(cnn_model.state_dict(), optimizer.state_dict(), "./", epoch + 1, max_to_save=100)

参考与更多

nn.Identity()