以Llama-3.2-1B为例
安装库
pip install modelscope
运行程序
#模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('LLM-Research/Llama-3.2-1B')
下载好的模型在 C:\Users\Administrator\.cache\modelscope\hub\models\LLM-Research\Llama-3___2-1B
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# 模型所在目录
model_path = r"C:\Users\Administrator\.cache\modelscope\hub\models\LLM-Research\Llama-3___2-1B"
# 加载分词器和模型
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, use_safetensors=True)
except Exception as e:
print(f"加载模型时出现错误: {e}")
import sys
sys.exit(1)
# 将模型移到 GPU 上(如果可用)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
while True:
# 输入文本
input_text = input("请输入一些内容: ")
# 对输入文本进行分词
input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
# 生成输出
try:
output = model.generate(input_ids, max_length=150, num_beams=5, no_repeat_ngram_size=2, early_stopping=True)
except Exception as e:
print(f"生成输出时出现错误: {e}")
import sys
sys.exit(1)
# 将输出的 ID 转换为文本
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
# 打印输入和输出
print("输入内容:", input_text)
print("输出内容:", output_text)
运行结果(因为模型参数量只有2B,效果不好),成功本地部署人工智障