今日剥削GPU小记-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_47231914/article/details/145270539

今天突发奇想想要剥削一下gpu，我用gpt4all来本地运行llama-8B，然后把上一轮对话的模型输出用作下一轮对话的模型输入，这样子只要用户输入了一句话作为初始就可以一直运行下去，然后想看看大模型自我对话会发展到什么地步。

为了防止大模型注意到对话的重复，我并没有让大模型留下聊天历史记录的上下文，源代码如下(使用了gpt4all的本地API)：

import requests
import json
import os

# 定义请求的URL和端点
url = "http://localhost:4891/v1/chat/completions"

# 聊天记录文件路径
chat_history_file = "chat_history.json"

# 初始化聊天记录
if not os.path.exists(chat_history_file):
    with open(chat_history_file, "w") as file:
        json.dump([], file)  # 初始化为空列表

# 定义函数加载聊天记录
def load_chat_history():
    with open(chat_history_file, "r") as file:
        return json.load(file)

# 定义函数保存聊天记录
def save_chat_history(history):
    with open(chat_history_file, "w") as file:
        json.dump(history, file, indent=4)

# 自动持续对话函数
def auto_chat_with_model():
    # 加载历史聊天记录
    try:
        chat_history = load_chat_history()
    except:
        chat_history=[]

    # 用户首次输入
    user_message = input("You: ").strip()
    if not user_message:
        print("Please provide an initial input to start the chat.")
        return

    # 构造当前消息
    current_message = {"role": "user", "content": user_message}

    # 更新聊天记录并保存
    chat_history.append(current_message)
    save_chat_history(chat_history)

    while True:
        # 构造请求的负载数据
        payload = {
            "model": "Llama-3-8B-Instruct",
            "messages": [current_message],  # 仅发送当前消息
            "max_tokens": 50,
            "temperature": 0.28
        }

        # 设置请求头
        headers = {
            "Content-Type": "application/json"
        }

        # 发送 POST 请求
        try:
            response = requests.post(url, headers=headers, data=json.dumps(payload))

            # 检查请求是否成功
            if response.status_code == 200:
                assistant_response = response.json().get("choices", [{}])[0].get("message", {}).get("content", "")
                print(f"Assistant: {assistant_response}")

                # 更新聊天记录
                current_message = {"role": "user", "content": assistant_response}  # 下一轮输入为当前助手输出
                chat_history.append({"role": "assistant", "content": assistant_response})  # 添加助手回复
                chat_history.append(current_message)  # 添加为下一轮用户消息

                # 保存聊天记录
                save_chat_history(chat_history)
            else:
                print(f"Failed to fetch response. Status code: {response.status_code}")
                print("Error message:", response.text)
                break
        except Exception as e:
            print("An error occurred:", str(e))
            break

# 启动自动对话
if __name__ == "__main__":
    auto_chat_with_model()