与ChatGPT完成语音交互

最新推荐文章于 2025-04-03 20:46:49 发布

寒冰屋

最新推荐文章于 2025-04-03 20:46:49 发布

阅读量66

点赞数

分类专栏：人工智能前端文章标签： chatgpt javascript

原文链接：https://www.codeproject.com/Tips/5387293/Complete-Voice-Interaction-with-ChatGPT

版权

人工智能同时被 2 个专栏收录

638 篇文章

订阅专栏

前端

241 篇文章

订阅专栏

介绍

使用代码

兴趣点

下载 ai-voice-chatbot.zip - 4.4 KB

介绍

本文旨在展示如何创建一个网页，允许用户通过使用语音识别和TTS模型以连续模式与ChatGPT聊天，询问文本模型并接收语音答案。

使用代码

该项目是用HTML/CSS和纯Vanilla JS实现的;前端由一个简单的结构组成，包括：

API密钥和提示的两个输入字段
用于启动语音识别的Icon
一个div，其中将出现书面答案
一个（隐形的）音频播放器

<header>
  <h1>AI Voice Chatbot</h1>
</header>
<div class="container">
  <form action="#" method="get" target="_blank" id="action-form">
    <input type="text" id="apikey" placeholder="Insert your API key here">
    <input id="prompt" type="text" placeholder="Activate Microphone to chat..." autocomplete="off" autofocus>

  </form>
  <div id="chathistory"> </div>
  <p class="info"></p>
  <audio controls id="audioPlayer" style="display: none;"></audio>
</div>

JavaScript文件主要由3个部分组成：

语音识别功能/事件
请求ChatGPT4o-mini获取答案
请求TTS生成音频文件

我们首先看一下语音识别部分。这是基于捕获一些识别事件（start、end、result）：

let recognition;

const SpeechRecognition =
  window.SpeechRecognition || window.webkitSpeechRecognition;

if (SpeechRecognition) {
  console.log("Your Browser supports speech Recognition");

  recognition = new SpeechRecognition();
  recognition.continuous = true;

  let idleTimer;

  actionForm.insertAdjacentHTML(
    "beforeend",
    '<button type="button"><i class="fas fa-microphone"></i></button>'
  );
  actionFormInput.style.paddingRight = "50px";

  const micBtn = actionForm.querySelector("button");
  const micIcon = micBtn.firstElementChild;

  micBtn.addEventListener("click", micBtnClick);
  function micBtnClick() {
    if (micIcon.classList.contains("fa-microphone")) {
      recognition.start();
    } else {
      recognition.stop();
    }
  }

  recognition.addEventListener("start", startSpeechRecognition);
  function startSpeechRecognition() {
    micIcon.classList.remove("fa-microphone");
    micIcon.classList.add("fa-microphone-slash");
    actionFormInput.focus();
    console.log("Voice activated, SPEAK");

    clearTimeout(idleTimer);
  }

  recognition.addEventListener("end", endSpeechRecognition);
  function endSpeechRecognition() {
    micIcon.classList.remove("fa-microphone-slash");
    micIcon.classList.add("fa-microphone");
    actionFormInput.focus();
    console.log("Speech recognition service disconnected");
  }

  recognition.addEventListener("result", resultOfSpeechRecognition);
  function resultOfSpeechRecognition(event) {
    const current = event.resultIndex;
    const transcript = event.results[current][0].transcript;

    const timestamp = new Date().toLocaleTimeString();
    const message = `${timestamp} - Guest: ${transcript}`;

    if (transcript.toLowerCase().trim() === "go") {
      recognition.stop();
    } else {
      clearTimeout(idleTimer);
      idleTimer = setTimeout(() => {
        recognition.stop();
      }, 2000);
    }
    sendMessage(transcript);
  }

然后，我们使用数组系统为聊天机器人创建上下文内存：

let chatMemory = [];
chatMemory = createMemory([
  {
    role: "system",
    content: "You are a funny bot."
  }
]);
console.log(chatMemory);

function createMemory(messages) {
  const memory = [];
  for (const msg of messages) {
    memory.push({ role: msg.role, content: msg.content });
  }
  return memory;
}

然后我们有两个函数将消息发送到OpenAI ChatGPT4o-mini模型并显示结果响应，以及完整的令牌数量和成本估算。

async function sendMessage(transcript) {
  const apikey = document.getElementById("apikey").value;
  console.log(apikey);

  if (apikey === "") {
    alert("No OpenAI API Key found.");
  } else {
    console.log(apikey);
  }

  const userInput = transcript;
  console.log(userInput);
  if (userInput !== "") {
    showMessage("Guest", userInput, "");
    chatMemory = await getChatGPTResponse(userInput, chatMemory);
  }
}

function showMessage(sender, message, tokens, downloadLink) {
  const messageElement = document.createElement("div");

  if (sender === "Guest") {
    messageElement.innerHTML = `${sender}: ${message}`;
    messageElement.classList.add("user-message");
  } else {
    const timestampElement = document.createElement("p");
    timestampElement.innerHTML = `${sender}: ${message} `;
    timestampElement.classList.add("chatgpt-message");

    messageElement.appendChild(timestampElement);

    const separator = document.createElement("p");
    separator.innerHTML = `${tokens}`;
    messageElement.classList.add("chatgpt-message");
    messageElement.appendChild(separator);

    const downloadElem = document.createElement("div");
    downloadElem.innerHTML = downloadLink;
    messageElement.appendChild(downloadElem);
  }

  chatContainer.appendChild(messageElement);
  chatContainer.scrollTop = chatContainer.scrollHeight;
}

最后，我们有了第一个OpenAI审讯：

async function getChatGPTResponse(userInput, chatMemory = []) {
  const apikey = document.getElementById("apikey").value;

  console.log(apikey);
  if (apikey === "") {
    alert("No OpenAI API Key found.");
  } else {
    console.log(apikey);
  }
  const chatContainer = document.getElementById("chathistory");

  try {
    const response = await fetch("https://api.openai.com/v1/chat/completions", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",

        Authorization: "Bearer " + apikey
      },
      body: JSON.stringify({
        model: "gpt-4o-mini", 
        messages: [...chatMemory, { role: "user", content: userInput }]
      })
    });
    if (!response.ok) {
      throw new Error("Error while requesting to the API");
    }
    const data = await response.json();
    if (
      !data.choices ||
      !data.choices.length ||
      !data.choices[0].message ||
      !data.choices[0].message.content
    ) {
      throw new Error("Invalid API response");
    }

    const chatGPTResponse = data.choices[0].message.content.trim();

    var cleanResponse = chatGPTResponse.replace(
      /(```html|```css|```javascript|```php|```python|```vb|```vb.net|cpp|java|csharp)(.*?)/gs,
      "$2"
    );
    console.log(chatGPTResponse);
    cleanResponse = cleanResponse.replace(/```/g, "");
    cleanResponse = cleanResponse.replace(/\*\*(.*?)\*\*/g, "$1");

    const tokenCount = document.createElement("p");

    if (data.usage.completion_tokens) {
      const requestTokens = data.usage.prompt_tokens;
      const responseTokens = data.usage.completion_tokens;
      const totalTokens = data.usage.total_tokens;
      const pricepertokenprompt = 0.15 / 1000000; //uses gpt-4o-mini price of 0.15/Mt USD
      const pricepertokenresponse = 0.6 / 1000000; //uses gpt-4o-mini price of 0.15/Mt USD
      const priceperrequest = pricepertokenprompt * requestTokens;
      const priceperresponse = pricepertokenresponse * responseTokens;
      const totalExpense = priceperrequest + priceperresponse;
      tokenCount.innerHTML = `<hr>Your request used ${requestTokens} tokens and costed ${priceperrequest.toFixed(
        6
      )}USD<br>This response used ${responseTokens} tokens and costed ${priceperresponse.toFixed(
        6
      )}USD<br>Total Tokens: ${totalTokens}. This interaction costed you: ${totalExpense.toFixed(
        6
      )}USD (audio not included).`;
    } else {
      tokenCount.innerHTML = "Unable to track the number of used tokens.";
    }

    const blob = new Blob([cleanResponse], { type: "text/html" });
    const url = URL.createObjectURL(blob);
    const downloadLink = `<a href="${url}" download="chat.txt">Click here to download the generated answer</a>`;

    showMessage(
      "VivacityGPT",
      cleanResponse,
      tokenCount.innerHTML,
      downloadLink
    );

    convertiTestoInAudio(cleanResponse);

    chatMemory.push({ role: "user", content: userInput });
    chatMemory.push({ role: "assistant", content: cleanResponse });

    return chatMemory;
  } catch (error) {
    console.error(error);

    alert(
      "An error occurred during the request. Check your OpenAI account or retry later."
    );
  }
}

现在我们有最后一个函数，询问TTS：

function convertiTestoInAudio(response) {
  const apikey = document.getElementById("apikey").value;
  console.log(apikey);
  const prompt = response;
  const selectedvoice = "nova";

  if (prompt) {
    fetch("https://api.openai.com/v1/audio/speech", {
      method: "POST",
      headers: {
        Authorization: `Bearer ${apikey}`,
        "Content-Type": "application/json"
      },
      body: JSON.stringify({
        model: "tts-1",
        input: prompt,
        voice: selectedvoice
      })
    })
      .then((response) => response.blob())
      .then((blob) => {
        const audioUrl = URL.createObjectURL(blob);
        const audioPlayer = document.getElementById("audioPlayer");
        audioPlayer.src = audioUrl;
        audioPlayer.play();
        audioPlayer.addEventListener("ended", () => {
          recognition.start(); 
        });
      })
      .catch((error) => {
        console.error("Error while converting TTS: ", error);
      });
  } else {
    alert("Please insert a text prompt before converting.");
  }
}

兴趣点

这段代码主要有两个兴趣点：

1、语音识别在循环中运行，这意味着它在识别完成后停止，并在音频播放器播放完TTS生成的音频后重新启动。这确保了连续聊天，而无需在每次互动时都单击麦克风图标。只需单击它即可开始聊天，然后在您想完成时单击它。这是通过TTS调用中的事件控件实现的：

audioPlayer.addEventListener("ended", () => {
          recognition.start();

2、所有函数都交织在一起，以创造一种流畅的体验：语音recognition.result事件调用sendMessage()函数，在异步模式下调用getChatGPTResponse()函数，调用convertiTestoInAudio()函数，调用recogntition.start。

作为其他兴趣点，我会指定只允许在本地或具有安全连接（SSL证书）的连接上进行语音识别，并且聊天机器人已设置为“有趣的聊天机器人”个性，以使聊天不那么无聊。自定义系统提示将允许任何性格/情绪变化。

https://www.codeproject.com/Tips/5387293/Complete-Voice-Interaction-with-ChatGPT