本地环境:Linux
python 3.10.16
torch 2.4.0
cuda 12.2
transformer 4.43.2
模型地址:QwQ-32B合集详情-来自Qwen · 魔搭社区
模型下载
modelscope download --model Qwen/QwQ-32B --local_dir ./QwQ-32B
模型推理
import os
from datetime import datetime
import cv2
from PIL import Image
import torch
from transformers import AutoModel, AutoTokenizer, AutoProcessor, AutoModelForCausalLM
def Qwen_QwQ():
weight_path = 'QwQ-32B'
model = AutoModelForCausalLM.from_pretrained(
weight_path,
torch_dtype="auto",
device_map="balanced_low_0"
)
tokenizer = AutoTokenizer.from_pretrained(weight_path)
prompt = "what is the day today?"
messages = [
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=32768
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)
if __name__ == '__main__':
Qwen_QwQ()