lora微调实战
项目背景
在问答系统中,有这么一个业务场景,用户提出的问题实时性很强,需要触发某个接口并返回数据。通过预训练模型纯生成答案,或者通过RAG先检索再生成,都发满足用户需求。
比如用户提问:请告诉我齐齐哈尔今天的天气?这时候需要去调天气的接口。
解决思路
根据用户问题去调固定的接口,首先需要判断用户意图,即是去调哪个接口,这里我们省略这一步。直接到根据用户问题,返回对应的接口。这里涉及到的主要问题是,如何把用户问题中的参数提取出来映射到接口参数上。
比如用户提问:南京2025年3月3日的天气怎么样?
模型返回结果:http://192.168.100.165/weather?city=南京&date=20250303
这里的关键参数是“city=南京”、“date=20250303”。
所以,接下来如何通过模型训练达到这样的效果。
环境准备
数据
数据以json格式保存,每个item包含一个问答对,query和answer。
[{ "query": "北京在2025年2月25日的天气情况是什么?", "answer": "http://192.168.100.165/weather?city=北京&date=20250225" }, { "query": "想知道2025年2月26日上海的天气如何?", "answer": "http://192.168.100.165/weather?city=上海&date=20250226" }, { "query": "2025年2月27日广州的天气怎么样?", "answer": "http://192.168.100.165/weather?city=广州&date=20250227" }, { "query": "深圳2025年2月28日的天气预报是什么?", "answer": "http://192.168.100.165/weather?city=深圳&date=20250228" }'
模型
Qwen2.5-1.5B-Instruct 【2.9G】
requirements
必要的几个python库
peft transformers modelscope torch
代码实战
数据预处理
from datasets import Dataset
import pandas as pd
from modelscope import AutoTokenizer
import json
model_dir = '/home/models/Qwen2.5-1.5B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
url = "http://192.168.100.165/weather/city/date"
template1 = "<|im_start|>system\n假如你是一个参数解析专家,请根据用户问题,提取参数并映射到对应url链接中,url为{url}<|im_end|>\n<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n"
def process(example):
max_length = 512
input_ids, attention_mask, labels = [],[],[]
params = {'url':url,'query':example['query']}
template = template1.format(**params)
instruction = tokenizer(template, add_special_tokens=False)
response = tokenizer(example['answer'], add_special_tokens=False)
input_ids = instruction['input_ids'] + response['input_ids'] + [tokenizer.pad_token_id]
attention_mask = instruction['attention_mask'] + response['attention_mask'] + [1]
labels = [-100] * len(instruction['input_ids']) + response['input_ids'] + [tokenizer.pad_token_id]
if len(input_ids) > max_length:
input_ids = input_ids[:max_length]
attention_mask = attention_mask[:max_length]
labels = labels[:max_length]
sample = {'input_ids':input_ids, 'attention_mask':attention_mask, 'labels':labels}
return sample
def read_data(file_path):
with open(file_path,'r',encoding='utf-8') as f:
data = json.load(f)
dataset = list(map(process, data))
return dataset
模型微调
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM
from modelscope import AutoTokenizer
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq
import torch
from data_load import read_data
# pretrain model
model_dir = '/home/models/Qwen2.5-1.5B-Instruct'
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", torch_dtype=torch.bfloat16)
model.enable_input_require_grads()
# define lora params
config = LoraConfig(
task_type="CAUSAL_LM",
target_modules=["q_proj", "k_proj", "v_proj"],
inference_mode=False,
r=64,
lora_alpha=16,
lora_dropout=0.05,
bias="none",)
# lora model
peft_model = get_peft_model(model, config)
# training
args = TrainingArguments(
output_dir="/home/output/",
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
logging_steps=10,
num_train_epochs=20,
save_steps=100,
learning_rate=1e-4,
save_on_each_node=True,
gradient_checkpointing=True,
report_to="none",)
train_data = read_data('/home/data/data.json')
print(len(train_data))
print('*'*100)
trainer = Trainer(
model=peft_model,
args=args,
train_dataset=train_data,
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)
if __name__ =="__main__":
trainer.train()
上述代码训练的损失变化如下:
微调后的lora模型保存在该目录下:
output_dir=“/home/output/”
合并模型
将预训练模型与微调后的lora模型合并
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# 加载原始模型和分词器
model_name = "/home/models/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# 加载lora增量模型
lora_model_path = '/home/output/checkpoint-40'
lora_config = PeftConfig.from_pretrained(lora_model_path)
lora_model = PeftModel.from_pretrained(model, lora_model_path)
model = lora_model.merge_and_unload()
# 保存合并后的模型
merged_model_path = "/home/models/merged_model"
model.save_pretrained(merged_model_path)
tokenizer.save_pretrained(merged_model_path)
模型预测
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
def load_merged_model(merged_model_path):
# 加载合并后的模型和分词器
tokenizer = AutoTokenizer.from_pretrained(merged_model_path)
model = AutoModelForCausalLM.from_pretrained(merged_model_path)
# 将模型移动到设备(GPU或CPU)
device = torch.device("cuda")
model.to(device)
model.eval()
return model, tokenizer
def generate_text(prompt, model, tokenizer, max_length=128):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_length=max_length)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated_text
if __name__ == "__main__":
prompt = '请告诉我齐齐哈尔今天的天气情况?'
url = "http://192.168.100.165/weather?city=城市&date=日期"
template1 = "<|im_start|>system\n假如你是一个参数解析专家,请根据用户问题,提取参数并映射到对应url链接中,url为{url}<|im_end|>\n<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n"
params = {'url':url,'query':prompt}
template = template1.format(**params)
#model, tokenizer = load_merged_model('/home/models/Qwen2.5-1.5B-Instruct')
model, tokenizer = load_merged_model('/home/models/merged_model')
text = generate_text(template, model, tokenizer)
print('*'*100)
print("output:",text)
预测结果对比
Qwen2.5-1.5B-Instruct预训练模型预测结果
第一次执行:
第二次执行:
第三次执行:
lora微调后模型预测结果
第一次执行:
第二次执行:
第三次执行:
从对比结果来看,lora微调后模型每次都能稳定输出想要的接口,正确返回http接口格式。这里的日期“今天”需要做后处理。
如有错误请指正!