from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM,
AutoTokenizer, LlamaTokenizer)
model_name = "/localdata/cn-customer-engineering/xudongz/langchain/langchain-ChatGLM/llama/vicuna-13B/"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
print("load tokenizer done!")
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
model.half().cuda()
while 1:
prompt = input()
inputs = tokenizer(prompt, return_tensors='pt').to('cuda')
# print(inputs.type)
del inputs['token_type_ids']
print("inputs", inputs)
tokens = model.generate(
**inputs,
max_new_tokens=1024,
do_sample=True,
temperature=1.0,
top_p=1.0,
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))
在本地调用vicuna-13B模型(简易版)
最新推荐文章于 2025-03-07 18:08:32 发布