from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
# 加载模型和分词器
model_name = "D:\\Algorithm\\DeepSeek-R1-Distill-Qwen-1.5B\\DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
# 修改1:显式指定设备并增加稳定性参数
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.float16,
trust_remote_code=True,
# 添加稳定性相关参数
low_cpu_mem_usage=True,
attn_implementation="eager" # 禁用可能不稳定的优化
).to("cuda")
# 修改2:强制转换为评估模式
model.eval()
# 流式生成
input_text = "<attn_implementation=eager>,这段代码是干什么用的"
streamer = TextStreamer(tokenizer)
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=500)
print(tokenizer.decode(outputs[0]))
DeepSeek-R1-Distill-Qwen-1.5B 本地部署报错解决
最新推荐文章于 2025-04-05 00:42:55 发布