transformer加载模型进行推理代码_transformer的pipeline做推理,模型把input部分也输出了-优快云博客

本文链接：https://blog.youkuaiyun.com/m0_37156901/article/details/144740070


chioce_list2 = [
    "你是谁？",
    "1+2等于几？",
    "一个数字，乘以3后加上6，再除以2，最后减去2，结果等于这个数字本身，这个数字是什么？",
    "1*8+3/1=?",
    "讲个笑话",
    "太阳系中最大的行星是哪一个？",
    "地球的自转周期是多少小时？",
    "人类历史上最长的战争是哪一场？",
    "古代丝绸之路的起点和终点分别是哪里？",
    "月球上的第一个人类足迹是在哪个年份留下的？",
    "世界上最深的海沟叫什么名字？",
    "古埃及的法老图坦卡蒙的墓室是在何时被发现的？",
    "地球的大气层中最外层是什么？",
    "哪个国家拥有世界上最长的海岸线？",
    "人类历史上第一个环球航行的航海家是谁？"
]

chioce_user_prompt = "Please analyze the given multiple-choice question and return only the correct answer options. If it's a single-choice question, provide one option letter; if it's a multiple-choice question letter, list all the correct options letter.  For example, if the correct options are A, B, and C, your response should be 'ABC'.\n"

gen_code_prompt = "Please write a python code to implement the following verilog module:\n"
model_path = "/data/model_hub/deepseek-coder-6.7b-instruct"

# Use a pipeline as a high-level helper
from transformers import pipeline
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from tqdm import tqdm


model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_path)


def inference():
    for input_text in tqdm(chioce_list):
        input_ids = tokenizer(chioce_user_prompt+input_text, return_tensors="pt").to("cuda")

        output = model.generate(**input_ids, max_new_tokens=512)
        print(f"=========================={input_text[:30]}==========================")
        print(tokenizer.decode(output[0], skip_special_tokens=True))
        print("==============================end======================")




if __name__ == '__main__':
    inference()