从零手写Lora微调案例
此节内容,主要结合CNN的一些代码来加以理解,也是为了方便一些只喜欢捣鼓code,不喜欢看理论的开发者,代码如有问题欢迎指正。本代码可以CPU运行,因此不需要GPU,需要GPU训练,则将torch.device改为“cuda”
Lora的快速理解
关于Lora的理解,该博主的写得非常详细:
从零开始实现LoRA和DoRA:改进权重分解的低秩适应(DoRA)_dora lora-优快云博客
CNN数据集的准备
此处所使用到的是EMNIST数据集(我的宗旨就是能简单写的,多一个代码,一个单词都不行,所以就采用了简单的EMNIST数据集),目的就是为了先整个数字识别的CNN模型,再基于数字识别的CNN模型,使用Lora微调一个不仅可以识别数字,还可以识别字母的模型。
数据集格式所用为parquet(此处问AI即可)
CNN 模型设计
数字识别模型
import torch
from torchvision.models import resnet18, ResNet18_Weights
from torch import nn
class NumberNet(nn.Module):
def __init__(self, num_classes=10, device=None):
super().__init__()
if device is None:
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.device = device
model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
self.model = model
self.softmax = nn.LogSoftmax(dim=-1)
def forward(self, x):
x = x.to(self.device)
return self.softmax(self.model(x))
训练使用基类
import torch
def train_epoch(model, features, labels, criterion, optimizer, batch_num, is_train=True):
optimizer.zero_grad()
predict_labels = model(features)
loss = criterion(predict_labels, labels)
loss.backward()
optimizer.step()
assign = "train" if is_train else "valid"
print(f"batch:{batch_num} -- {assign}_loss:{loss.item():.4f}")
return loss.item()
def accuracy(model, features, labels, batch_num, is_train=True):
predict_labels = model(features)
predict_labels = torch.argmax(predict_labels, dim=-1)
acc = sum(predict_labels == labels) / len(labels)
assign = "train" if is_train else "valid"
print(f"batch:{batch_num} -- {assign}_acc:{acc * 100:.2f}%")
return acc
def train(model, train_features, valid_features, criterion, optimizer, epochs=100):
device = model.device
best_acc = 0
for epoch in range(epochs):
total_loss = []
total_acc = []
for i, (features, labels) in enumerate(train_features):
features = features.to(device)
labels = labels.to(device)
epoch_loss = train_epoch(model, features, labels, criterion, optimizer, i)
total_loss.append(epoch_loss)
for i, (features, labels) in enumerate(valid_features):
features = features.to(device)
labels = labels.to(device)
epoch_acc = accuracy(model, features, labels, i, is_train=False)
total_acc.append(epoch_acc)
loss = sum(total_loss) / len(total_loss)
acc = sum(total_acc) / len(total_acc)
print(f"epoch: {epoch + 1}/{epochs} -- loss:{loss:.4f} -- acc:{acc * 100:.2f}%")
if (epoch + 1) % 10 == 0:
torch.save(model.state_dict(), "./save/latest.pt")
if acc > best_acc:
torch.save(model.state_dict(), "./save/letters_best.pt")
best_acc = acc
训练数字识别
import io
from PIL import Image
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from train_utils import train
transform = ToTensor()
def generate_features(df):
train_images = []
for image_data in df['image'].values:
image = Image.open(io.BytesIO(image_data['bytes']))
train_images.append(transform(image))
train_features = torch.stack(train_images, dim=0)
train_labels = torch.from_numpy(df['label'].to_numpy())
return {"features": train_features, "labels": train_labels}
class CustomDataset(Dataset):
def __init__(self, data):
super().__init__()
self.data = data
self.features = data['features']
self.labels = data['labels']
def __getitem__(self, index):
return self.features[index], self.labels[index]
def __len__(self):
return len(self.features)
if __name__ == '__main__':
train_df = pd.read_parquet('./numbers/train-00000-of-00001.parquet')
valid_df = pd.read_parquet('./numbers/test-00000-of-00001.parquet')
train_dataset = CustomDataset(generate_features(train_df))
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle=True)
valid_dataset = CustomDataset(generate_features(valid_df))
valid_loader = DataLoader(valid_dataset, batch_size=1000)
device = torch.device("cpu")
model = NumberNet(device=device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
train(model, train_loader, valid_loader, criterion, optimizer, epochs=100)
使用Lora微调数字识别模型
import torch
from torch import nn
import io
from PIL import Image
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from number_recognize_net import NumberNet
from Lora.train_utils import train
device = torch.device("cpu")
number_model_weights = torch.load("./save/best.pt", weights_only=True)
letter_model_fc_weights = torch.zeros((36, 512))
letter_model_fc_bias = torch.zeros((36,))
letter_model_fc_weights[:10, ] = number_model_weights['model.fc.weight']
letter_model_fc_bias[:10, ] = number_model_weights['model.fc.bias']
number_model_weights['model.fc.weight'] = nn.Parameter(letter_model_fc_weights)
number_model_weights['model.fc.bias'] = nn.Parameter(letter_model_fc_bias)
model = NumberNet(num_classes=36, device=device)
model.load_state_dict(number_model_weights)
# 冻结数字识别模型参数(确保之后只有Lora的A,B矩阵可调)
for param in model.parameters():
param.requires_grad = False
class LoRALayer(nn.Module):
def __init__(self, in_dim, out_dim, rank, alpha):
super().__init__()
std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
self.A = nn.Parameter(torch.randn(in_dim, rank) * std_dev, requires_grad=True)
self.B = nn.Parameter(torch.zeros(rank, out_dim), requires_grad=True)
self.alpha = alpha
def forward(self, x):
x = self.alpha * (x @ self.A @ self.B)
return x
class LinearWithLoRA(nn.Module):
def __init__(self, linear, rank, alpha):
super().__init__()
self.linear = linear
if isinstance(linear, nn.Conv2d):
self.lora = LoRALayer(
linear.in_channels, linear.out_channels, rank, alpha
)
elif isinstance(linear, nn.Linear):
self.lora = LoRALayer(
linear.in_features, linear.out_features, rank, alpha
)
def forward(self, x):
return self.linear(x) + self.lora(x)
def add_lora_to_model(model):
for name, module in model.model.named_modules():
if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
lora_layer = LinearWithLoRA(module, 4, 8)
parent_module = model.model
for part in name.split('.')[:-1]:
parent_module = getattr(parent_module, part)
setattr(parent_module, name.split('.')[-1], lora_layer)
return model
model.model.fc = LinearWithLoRA(model.model.fc, 4, 8)
transform = ToTensor()
def generate_features(df):
train_images = []
for image_data in df['image'].values:
image = Image.open(io.BytesIO(image_data['bytes']))
train_images.append(transform(image))
train_features = torch.stack(train_images, dim=0)
train_labels = torch.from_numpy(df['label'].to_numpy())
return {"features": train_features, "labels": train_labels}
class CustomDataset(Dataset):
def __init__(self, data):
super().__init__()
self.data = data
self.features = data['features']
self.labels = data['labels']
def __getitem__(self, index):
return self.features[index], self.labels[index]
def __len__(self):
return len(self.features)
if __name__ == '__main__':
train_df = pd.read_parquet('./letters/train-00000-of-00001.parquet')
valid_df = pd.read_parquet('./letters/test-00000-of-00001.parquet')
train_dataset = CustomDataset(generate_features(train_df))
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle=True)
valid_dataset = CustomDataset(generate_features(valid_df))
valid_loader = DataLoader(valid_dataset, batch_size=1000)
device = torch.device("cpu")
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
train(model, train_loader, valid_loader, criterion, optimizer, epochs=100)
TODO
- 合并Lora微调代码,做知识蒸馏