网上看了看很多资料都是关于Lora后的模型直接由Peft 去读取的 ,具体可以参考:LoRA 模型合并与保存 这里就不再赘述了,大概原理就是
def merge_lora_to_LLM():
model_name_or_path = "your_LLM_model_path"
adapter_name_or_path = "your_lora_model_path"
save_path = "save_model_path"
tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
device_map="auto"
)
model = PeftModel.from_pretrained(model, adapter_name_or_path)
model = model.merge_and_unload()
tokenizer