目录
介绍
本文旨在展示如何创建一个网页,允许用户通过使用语音识别和TTS模型以连续模式与ChatGPT聊天,询问文本模型并接收语音答案。
使用代码
该项目是用HTML/CSS和纯Vanilla JS实现的;前端由一个简单的结构组成,包括:
- API密钥和提示的两个输入字段
- 用于启动语音识别的Icon
- 一个div,其中将出现书面答案
- 一个(隐形的)音频播放器
<header>
<h1>AI Voice Chatbot</h1>
</header>
<div class="container">
<form action="#" method="get" target="_blank" id="action-form">
<input type="text" id="apikey" placeholder="Insert your API key here">
<input id="prompt" type="text" placeholder="Activate Microphone to chat..." autocomplete="off" autofocus>
</form>
<div id="chathistory"> </div>
<p class="info"></p>
<audio controls id="audioPlayer" style="display: none;"></audio>
</div>
JavaScript文件主要由3个部分组成:
- 语音识别功能/事件
- 请求ChatGPT4o-mini获取答案
- 请求TTS生成音频文件
我们首先看一下语音识别部分。这是基于捕获一些识别事件(start、end、result):
let recognition;
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
if (SpeechRecognition) {
console.log("Your Browser supports speech Recognition");
recognition = new SpeechRecognition();
recognition.continuous = true;
let idleTimer;
actionForm.insertAdjacentHTML(
"beforeend",
'<button type="button"><i class="fas fa-microphone"></i></button>'
);
actionFormInput.style.paddingRight = "50px";
const micBtn = actionForm.querySelector("button");
const micIcon = micBtn.firstElementChild;
micBtn.addEventListener("click", micBtnClick);
function micBtnClick() {
if (micIcon.classList.contains("fa-microphone")) {
recognition.start();
} else {
recognition.stop();
}
}
recognition.addEventListener("start", startSpeechRecognition);
function startSpeechRecognition() {
micIcon.classList.remove("fa-microphone");
micIcon.classList.add("fa-microphone-slash");
actionFormInput.focus();
console.log("Voice activated, SPEAK");
clearTimeout(idleTimer);
}
recognition.addEventListener("end", endSpeechRecognition);
function endSpeechRecognition() {
micIcon.classList.remove("fa-microphone-slash");
micIcon.classList.add("fa-microphone");
actionFormInput.focus();
console.log("Speech recognition service disconnected");
}
recognition.addEventListener("result", resultOfSpeechRecognition);
function resultOfSpeechRecognition(event) {
const current = event.resultIndex;
const transcript = event.results[current][0].transcript;
const timestamp = new Date().toLocaleTimeString();
const message = `${timestamp} - Guest: ${transcript}`;
if (transcript.toLowerCase().trim() === "go") {
recognition.stop();
} else {
clearTimeout(idleTimer);
idleTimer = setTimeout(() => {
recognition.stop();
}, 2000);
}
sendMessage(transcript);
}
然后,我们使用数组系统为聊天机器人创建上下文内存:
let chatMemory = [];
chatMemory = createMemory([
{
role: "system",
content: "You are a funny bot."
}
]);
console.log(chatMemory);
function createMemory(messages) {
const memory = [];
for (const msg of messages) {
memory.push({ role: msg.role, content: msg.content });
}
return memory;
}
然后我们有两个函数将消息发送到OpenAI ChatGPT4o-mini模型并显示结果响应,以及完整的令牌数量和成本估算。
async function sendMessage(transcript) {
const apikey = document.getElementById("apikey").value;
console.log(apikey);
if (apikey === "") {
alert("No OpenAI API Key found.");
} else {
console.log(apikey);
}
const userInput = transcript;
console.log(userInput);
if (userInput !== "") {
showMessage("Guest", userInput, "");
chatMemory = await getChatGPTResponse(userInput, chatMemory);
}
}
function showMessage(sender, message, tokens, downloadLink) {
const messageElement = document.createElement("div");
if (sender === "Guest") {
messageElement.innerHTML = `${sender}: ${message}`;
messageElement.classList.add("user-message");
} else {
const timestampElement = document.createElement("p");
timestampElement.innerHTML = `${sender}: ${message} `;
timestampElement.classList.add("chatgpt-message");
messageElement.appendChild(timestampElement);
const separator = document.createElement("p");
separator.innerHTML = `${tokens}`;
messageElement.classList.add("chatgpt-message");
messageElement.appendChild(separator);
const downloadElem = document.createElement("div");
downloadElem.innerHTML = downloadLink;
messageElement.appendChild(downloadElem);
}
chatContainer.appendChild(messageElement);
chatContainer.scrollTop = chatContainer.scrollHeight;
}
最后,我们有了第一个OpenAI审讯:
async function getChatGPTResponse(userInput, chatMemory = []) {
const apikey = document.getElementById("apikey").value;
console.log(apikey);
if (apikey === "") {
alert("No OpenAI API Key found.");
} else {
console.log(apikey);
}
const chatContainer = document.getElementById("chathistory");
try {
const response = await fetch("https://api.openai.com/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer " + apikey
},
body: JSON.stringify({
model: "gpt-4o-mini",
messages: [...chatMemory, { role: "user", content: userInput }]
})
});
if (!response.ok) {
throw new Error("Error while requesting to the API");
}
const data = await response.json();
if (
!data.choices ||
!data.choices.length ||
!data.choices[0].message ||
!data.choices[0].message.content
) {
throw new Error("Invalid API response");
}
const chatGPTResponse = data.choices[0].message.content.trim();
var cleanResponse = chatGPTResponse.replace(
/(```html|```css|```javascript|```php|```python|```vb|```vb.net|cpp|java|csharp)(.*?)/gs,
"$2"
);
console.log(chatGPTResponse);
cleanResponse = cleanResponse.replace(/```/g, "");
cleanResponse = cleanResponse.replace(/\*\*(.*?)\*\*/g, "$1");
const tokenCount = document.createElement("p");
if (data.usage.completion_tokens) {
const requestTokens = data.usage.prompt_tokens;
const responseTokens = data.usage.completion_tokens;
const totalTokens = data.usage.total_tokens;
const pricepertokenprompt = 0.15 / 1000000; //uses gpt-4o-mini price of 0.15/Mt USD
const pricepertokenresponse = 0.6 / 1000000; //uses gpt-4o-mini price of 0.15/Mt USD
const priceperrequest = pricepertokenprompt * requestTokens;
const priceperresponse = pricepertokenresponse * responseTokens;
const totalExpense = priceperrequest + priceperresponse;
tokenCount.innerHTML = `<hr>Your request used ${requestTokens} tokens and costed ${priceperrequest.toFixed(
6
)}USD<br>This response used ${responseTokens} tokens and costed ${priceperresponse.toFixed(
6
)}USD<br>Total Tokens: ${totalTokens}. This interaction costed you: ${totalExpense.toFixed(
6
)}USD (audio not included).`;
} else {
tokenCount.innerHTML = "Unable to track the number of used tokens.";
}
const blob = new Blob([cleanResponse], { type: "text/html" });
const url = URL.createObjectURL(blob);
const downloadLink = `<a href="${url}" download="chat.txt">Click here to download the generated answer</a>`;
showMessage(
"VivacityGPT",
cleanResponse,
tokenCount.innerHTML,
downloadLink
);
convertiTestoInAudio(cleanResponse);
chatMemory.push({ role: "user", content: userInput });
chatMemory.push({ role: "assistant", content: cleanResponse });
return chatMemory;
} catch (error) {
console.error(error);
alert(
"An error occurred during the request. Check your OpenAI account or retry later."
);
}
}
现在我们有最后一个函数,询问TTS:
function convertiTestoInAudio(response) {
const apikey = document.getElementById("apikey").value;
console.log(apikey);
const prompt = response;
const selectedvoice = "nova";
if (prompt) {
fetch("https://api.openai.com/v1/audio/speech", {
method: "POST",
headers: {
Authorization: `Bearer ${apikey}`,
"Content-Type": "application/json"
},
body: JSON.stringify({
model: "tts-1",
input: prompt,
voice: selectedvoice
})
})
.then((response) => response.blob())
.then((blob) => {
const audioUrl = URL.createObjectURL(blob);
const audioPlayer = document.getElementById("audioPlayer");
audioPlayer.src = audioUrl;
audioPlayer.play();
audioPlayer.addEventListener("ended", () => {
recognition.start();
});
})
.catch((error) => {
console.error("Error while converting TTS: ", error);
});
} else {
alert("Please insert a text prompt before converting.");
}
}
兴趣点
这段代码主要有两个兴趣点:
1、语音识别在循环中运行,这意味着它在识别完成后停止,并在音频播放器播放完TTS生成的音频后重新启动。这确保了连续聊天,而无需在每次互动时都单击麦克风图标。只需单击它即可开始聊天,然后在您想完成时单击它。这是通过TTS调用中的事件控件实现的:
audioPlayer.addEventListener("ended", () => {
recognition.start();
2、所有函数都交织在一起,以创造一种流畅的体验:语音recognition.result事件调用sendMessage()函数,在异步模式下调用getChatGPTResponse()函数,调用convertiTestoInAudio()函数,调用recogntition.start。
作为其他兴趣点,我会指定只允许在本地或具有安全连接(SSL证书)的连接上进行语音识别,并且聊天机器人已设置为“有趣的聊天机器人”个性,以使聊天不那么无聊。自定义系统提示将允许任何性格/情绪变化。
https://www.codeproject.com/Tips/5387293/Complete-Voice-Interaction-with-ChatGPT