Qwen2模型量化时关于bitsandbytes安装的问题
问题描述:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig,BitsAndBytesConfig
CUDA_DEVICE = "cuda:0"
model_name_or_path = '/qwen2-1.5b-instruct'
Tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,use_fast=False)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype