Qwen QLora微调
import warnings
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
os.environ['TRANSFORMERS_OFFLINE']='1'
os.environ['DWAN _DISABLED'] = 'true'
warnings.filterwarnings("ignore")
import torch
from modelscope import (AutoModelForCausalLM, BitsAndBytesConfig)
_bnb_config = BitsAndBytesConfig(load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float32)
_model = AutoModelForCausalLM.from_pretrained("/mnt/LLM/model_weight/Qwen/Qwen2-1_5B",
low_cpu_mem_usage=True,
quantization_config= _bnb_config)
print(f"{sum(p.numel()for p in _model.parameters())}")
from modelscope import AutoTokenizer, AutoModel
_tokenizer = AutoTokenizer.from_pretrained("/mnt/LLM/model_weight/Qwen/Qwen2-1_5B")
ids = _tokenizer.encode("你是谁?爱谁谁!",return_tensors="pt")
tokens =_tokenizer.convert_ids_to_tokens(ids[0])
for id, token in zip(ids[0],tokens):
print(f"{id}--{token}")
from datasets import load_dataset
_dataset = load_dataset("json", data_files="/mnt/zhouqiang/LLM/Game_Qwen1.5/huan4.json", split="train")
def preprocess_dataset(example):
MAX_LENGTH = 256
_input_ids,_attention_mask,_labels =[],[],[]
_instruction = _tokenizer(f"user: {example['instruction']}Assistant:", add_special_tokens=False)
print(_instruction)
_response =_tokenizer(example["output"]+ _tokenizer.eos_token, add_special_tokens=False)
_input_ids = _instruction["input_ids"]+ _response["input_ids"]
_attention_mask = _instruction["attention_mask"]+ _response["attention_mask"]
_labels =[-100]* len(_instruction["input_ids"])+ _response["input_ids"]
if len(_input_ids)> MAX_LENGTH:
_input_ids = _input_ids[:MAX_LENGTH]
attention_mask = _attention_mask[:MAX_LENGTH]
_labels =_labels[:MAX_LENGTH]
return {
"input_ids": _input_ids,
"attention_mask":_attention_mask,
"labels":_labels
}
_dataset = _dataset.map(preprocess_dataset, remove_columns=_dataset.column_names)
_dataset = _dataset.shuffle()
from peft import (LoraConfig,get_peft_model,TaskType)
config = LoraConfig(task_type=TaskType.CAUSAL_LM,
r=8,
target_modules="all-linear")
_model = get_peft_model(_model, config)
from transformers import TrainingArguments,Trainer,DataCollatorForSeq2Seq
_training_args = TrainingArguments(output_dir="/mnt/LLM/Game_Qwen1.5/checkpoints/qlora",
run_name="qlora_study",
per_device_train_batch_size=10,
num_train_epochs=6,
save_steps=6,
logging_steps=6,
report_to="none",
optim="paged_adamw_32bit")
trainer = Trainer(model=_model,
args=_training_args,
train_dataset=_dataset,
data_collator=DataCollatorForSeq2Seq(tokenizer=_tokenizer,padding=True),
)
trainer.train()
from transformers import pipeline ,AutoModelForSeq2SeqLM
from peft import PeftModel
_model = AutoModelForCausalLM.from_pretrained("/mnt/LLM/model_weight/Qwen/Qwen2-1_5B",
quantization_config=_bnb_config,
low_cpu_mem_usage=True)
peft_model = PeftModel.from_pretrained(model= _model, model_id="/mnt/LLM/Game_Qwen1.5/checkpoints/qlora/checkpoint-6")
pipe = pipeline("text-generation", model=peft_model, tokenizer= _tokenizer)
response = pipe("User: 你是谁? Assistant: ")
print(response)
from transformers import pipeline
from peft import PeftModel
_model=AutoModelForCausalLM.from_pretrained("/mnt/zhouqiang/LLM/model_weight/Qwen/Qwen2-1_5B")
peft_model=PeftModel.from_pretrained(model=_model,
model_id="checkpoints/qlora/checkpoint-6")
merged_model = peft_model.merge_and_unload()
merged_model.save_pretrained("/mnt/LLM/Game_Qwen1.5/new_pth")
_tokenizer.save_pretrained(output_dir)