一 . 简介
项目基于讯飞星火api作为核心能力并在其中搭载了Web Speech API中的webkitSpeechRecognition
对象来实现语音转文字的功能和TTS (Text-to-Speech):通过调用百度TTS API将文本转换成语音,使机器人能够“说话”。实现了语音输入,语音输出的对话形式。
实现方法:后端基于python的flask框架,前端使用了html+css并用AJAX通过XMLHttpRequest
对象实现异步请求。
二 . 代码
2.1 前端:
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="static/css/happy.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
<title>快乐加油站</title>
</head>
<body>
<div class="container">
<div class="header">
<h1>欢迎来到快乐加油站!</h1>
</div>
<div class="info">
<p>我是一个情绪大模型。请随意提问或闲聊。</p>
</div>
<div class="chat" id="chat">
<!-- 消息将在这里显示 -->
</div>
<div class="input-container">
<input type="text" id="questionInput" placeholder="向我提问..." onkeydown="handleKeyPress(event)">
<button id="recordButton"><i class="fa fa-microphone"></i></button>
<span id="recordingStatus" style="display:none;">正在识别语音...</span>
<button id="askButton">发送</button>
</div>
<div class="happy-quote">
<div class="quote-divider"></div>
<p class="quote-text">我生来就是高山而非溪流,我欲与群峰之巅俯视平庸的沟壑,我生来就是人杰而非草芥,我站在伟人之肩藐视卑微的懦夫,请只管跑下去,天自己会亮</p>
</div>
<div class="confirmation-dialog" id="confirmationDialog">
<p>你想播放音乐吗?</p>
<div class="dialog-buttons">
<button class="cancel">取消</button>
<button class="confirm">确定</button>
</div>
</div>
<img src="../static/images/happy01.jpg" alt="快乐机器人" class="depression-character">
</div>
<script>
let recognition;
let ws;
let wsConnected = false;
let isRecording = false;
document.getElementById('recordButton').addEventListener('click', toggleRecording);
document.getElementById('questionInput').addEventListener('keydown', handleKeyPress);
document.getElementById('askButton').addEventListener('click', () => handleKeyPress({ key: 'Enter' }));
// 初始化语音识别
recognition = new webkitSpeechRecognition();
recognition.lang = 'zh-CN';
recognition.interimResults = false;
recognition.continuous = false;
function toggleRecording() {
if (isRecording) {
recognition.stop();
isRecording = false;
} else {
startRecording();
isRecording = true;
}
}
function startRecording() {
if (!recognition) {
recognition = new webkitSpeechRecognition();
recognition.lang = 'zh-CN';
recognition.interimResults = false;
recognition.continuous = false;
}
recognition.start();
}
recognition.onstart = function() {
console.log('语音识别已开始...');
toggleRecordingStatus(true); // 开始录音时显示状态
connectWebSocket();
};
recognition.onresult = function(event) {
let transcript = event.results[0][0].transcript;
document.getElementById('questionInput').value = transcript; // 将识别结果填充到输入框
console.log('识别结果:', transcript);
if (!isRecording) {
handleKeyPress({ key: 'Enter' }); // 如果已经停止录音,则模拟按下回车键
}
};
recognition.onerror = function(event) {
console.error('Error occurred in speech recognition:', event.error);
};
recognition.onend = function() {
console.log('语音识别已结束.');
toggleRecordingStatus(false); // 结束录音时隐藏状态
if (ws && wsConnected) {
ws.close();
}
isRecording = false;
};
function connectWebSocket() {
const wsUrl = 'wss://ws-api.xfyun.cn/v2/iat';
const appid = 'xxxx';
const apiKey = 'xxx';
const apiSecret = 'xxxx';
const currentDate = new Date().toGMTString();
const signStr = appid + currentDate;
const sign = CryptoJS.SHA256(signStr, apiSecret).toString(CryptoJS.enc.Base64);
ws = new WebSocket(wsUrl);
ws.onopen = function() {
console.log('WebSocket 连接已打开');
wsConnected = true;
const initPayload = {
"common": {
"app_id": appid
},
"business": {
"domain": "iat",
"language": "zh_cn",
"accent": "mandarin"
},
"data": {
"status": 0,
"from": "pc",
"format": "audio/L16;rate=16000",
"encoding": "raw",
"audio": ""
}
};
ws.send(JSON.stringify(initPayload));
};
ws.onmessage = function(event) {
const data = JSON.parse(event.data);
if (data.code === 0) {
const result = data.data.result;
textToSpeech(result);
} else {
console.error('WebSocket 错误响应:', data);
}
};
ws.onerror = function(error) {
console.error('WebSocket 错误:', error);
};
ws.onclose = function(event) {
console.log('WebSocket 连接已关闭:', event);
wsConnected = false;
};
}
function sendTextToServer(text) {
if (ws && wsConnected) {
const dataPayload = {
"common": {
"app_id": "d614d1bf"
},
"business": {
"domain": "iat",
"language": "zh_cn",
"accent": "mandarin"
},
"data": {
"status": 2,
"from": "pc",
"format": "audio/L16;rate=16000",
"encoding": "raw",
"audio": text
}
};
ws.send(JSON.stringify(dataPayload));
}
}
function textToSpeech(text) {
const xhr = new XMLHttpRequest();
const apiUrl = 'http://tts.baidu.com/text2audio'; // 示例使用百度TTS API,根据实际情况调整
xhr.open('GET', apiUrl + '?tex=' + encodeURIComponent(text) + '&lan=zh&ctp=1&cuid=123456', true);
xhr.responseType = 'blob';
xhr.onload = function() {
if (xhr.status === 200) {
const audioUrl = URL.createObjectURL(xhr.response);
playAudio(audioUrl);
} else {
console.error('Failed to fetch audio:', xhr.statusText);
}
};
xhr.send();
}
function handleKeyPress(event) {
// 检查是否按下了回车键
if (event.key === 'Enter') {
event.preventDefault(); // 阻止默认行为,比如表单提交
var question = document.getElementById('questionInput').value;
if (question) {
displayMessage('user', question);
displayLoading(); // 显示加载状态
sendQuestion(question);
document.getElementById('questionInput').value = ''; // 清空输入框
}
}
}
function sendQuestion(question) {
var xhr = new XMLHttpRequest();
xhr.open('POST', '/ask', true);
xhr.setRequestHeader('Content-Type', 'application/json');
xhr.onreadystatechange = function() {
if (xhr.readyState === 4 && xhr.status === 200) {
var response = JSON.parse(xhr.responseText);
removeLoading(); // 移除加载状态
displayMessage('robot', response.text_answer);
playAudio(response.audio_url);
// 检查用户的输入并播放相应的音频
playCustomAudio(question);
}
};
xhr.send(JSON.stringify({ question: question }));
}
function displayMessage(sender, text) {
var chat = document.getElementById('chat');
var messageDiv = document.createElement('div');
messageDiv.classList.add('message', sender);
var avatarDiv = document.createElement('div');
avatarDiv.classList.add('avatar', sender);
var bubbleDiv = document.createElement('div');
bubbleDiv.classList.add('bubble', sender);
bubbleDiv.textContent = text;
messageDiv.appendChild(avatarDiv);
messageDiv.appendChild(bubbleDiv);
chat.appendChild(messageDiv);
chat.scrollTop = chat.scrollHeight; // 滚动到底部
}
function displayLoading() {
var chat = document.getElementById('chat');
var loadingDiv = document.createElement('div');
loadingDiv.classList.add('loading');
loadingDiv.textContent = '正在合成语音...';
chat.appendChild(loadingDiv);
chat.scrollTop = chat.scrollHeight; // 滚动到底部
}
function removeLoading() {
var chat = document.getElementById('chat');
var loadingElements = chat.getElementsByClassName('loading');
while (loadingElements.length > 0) {
chat.removeChild(loadingElements[0]);
}
}
function playAudio(audioUrl) {
var audio = new Audio(audioUrl);
audio.addEventListener('canplaythrough', function() {
this.play();
});
}
// 新增函数用于切换录音状态的显示
function toggleRecordingStatus(show) {
document.getElementById('recordingStatus').style.display = show ? 'inline' : 'none';
}
// 新增函数用于播放特定的音频文件
document.getElementById('confirmationDialog').addEventListener('click', function(event) {
var target = event.target;
if (target.classList.contains('confirm')) {
playAudio(customAudioUrl);
hideConfirmationDialog();
} else if (target.classList.contains('cancel')) {
hideConfirmationDialog();
}
});
function showConfirmationDialog(audioUrl) {
customAudioUrl = audioUrl;
document.getElementById('confirmationDialog').style.display = 'block';
}
function hideConfirmationDialog() {
document.getElementById('confirmationDialog').style.display = 'none';
}
function playCustomAudio(userInput) {
if (userInput.includes('不开心')) {
showConfirmationDialog('/static/video/顾森西 - 再次与你同行 (Live).ogg');
} else if (userInput.includes('迷茫')) {
showConfirmationDialog('/static/video/诗林 - 每当我找不到存在的意义.ogg');
}
}
</script>
</body>
</html>
注意:
将appid apiKey apiSecret 替换为自己的密钥,申请的方式在上一篇博客。
2.2 后端:
获取大模型的回答并修改模型自定义风格 happyApi.py:
# coding: utf-8
import _thread as thread
import base64
import datetime
import hashlib
import hmac
import json
import ssl
from urllib.parse import urlparse, urlencode
from wsgiref.handlers import format_date_time
import websocket
import os
import pygame
import SparkApi
#大模型接口
def get_response_from_model(appid, api_key, api_secret, spark_url, domain, question):
# 调用SparkApi模块的方法来获取回答
SparkApi.main(appid, api_key, api_secret, spark_url, domain, question)
answer = SparkApi.getText("assistant", SparkApi.answer)[0]["content"]
return answer
class Ws_Param(object):
# 初始化
def __init__(self, APPID, APIKey, APISecret, gpt_url):
self.APPID = APPID
self.APIKey = APIKey
self.APISecret = APISecret
self.host = urlparse(gpt_url).netloc
self.path = urlparse(gpt_url).path
self.gpt_url = gpt_url
# 生成url
def create_url(self):
now = datetime.datetime.now()
date = format_date_time(datetime.datetime.timestamp(now))
signature_origin = f"host: {self.host}\ndate: {date}\nGET {self.path} HTTP/1.1"
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
v = {
"authorization": authorization,
"date": date,
"host": self.host
}
url = self.gpt_url + '?' + urlencode(v)
return url
# 收到websocket错误的处理
def on_error(ws, error):
print("### error:", error)
# 收到websocket关闭的处理
def on_close(ws, close_status_code, close_msg):
print("### closed ###")
# 收到websocket连接建立的处理
def on_open(ws):
thread.start_new_thread(run, (ws,))
# 收到websocket消息的处理
def on_message(ws, message):
message = json.loads(message)
code = message['header']['code']
if code != 0:
print("### 请求出错: ", message)
else:
payload = message.get("payload")
status = message['header']['status']
if status == 2:
print("### 合成完毕")
# 播放音频
pygame.mixer.init() # 初始化pygame的音频系统
pygame.mixer.music.load(ws.save_file_name)
pygame.mixer.music.play()
while pygame.mixer.music.get_busy(): # 等待音乐播放结束
pygame.time.Clock().tick(10)
ws.close()
if payload and payload != "null":
audio = payload.get("audio")
if audio:
audio = audio["audio"]
with open(ws.save_file_name, 'ab') as f:
f.write(base64.b64decode(audio))
def run(ws, *args):
body = {
"header": {
"app_id": ws.appid,
"status": 0
},
"parameter": {
"oral": {
"spark_assist": 1,
"oral_level": "mid"
},
"tts": {
"vcn": ws.vcn,
"speed": 66,
"volume": 50,
"pitch": 50,
"bgs": 0,
"reg": 0,
"rdn": 0,
"rhy": 0,
"scn": 5,
"version": 0,
"L5SilLen": 0,
"ParagraphSilLen": 0,
"audio": {
"encoding": "lame",
"sample_rate": 16000,
"channels": 1,
"bit_depth": 16,
"frame_size": 0
},
"pybuf": {
"encoding": "utf8",
"compress": "raw",
"format": "plain"
}
}
},
"payload": {
"text": {
"encoding": "utf8",
"compress": "raw",
"format": "json",
"status": 0,
"seq": 0,
"text": str(base64.b64encode(ws.text.encode('utf-8')), "UTF8")
}
}
}
ws.send(json.dumps(body))
def main(appid, api_secret, api_key, url, vcn, save_file_name):
wsParam = Ws_Param(appid, api_key, api_secret, url)
wsUrl = wsParam.create_url()
ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
websocket.enableTrace(False)
ws.appid = appid
ws.vcn = vcn
ws.save_file_name = save_file_name
if os.path.exists(ws.save_file_name):
os.remove(ws.save_file_name)
while True:
user_input = input("请输入要询问的问题 (输入 'exit' 退出程序): ")
if user_input.lower() == 'exit':
break
# 从配置中获取必要的参数
config = SparkApi.config()
model_response = get_response_from_model(
config["appid"],
config["api_key"],
config["api_secret"],
config["Spark_url"],
config["domain"],
user_input
)
ws.text = model_response
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
pygame.mixer.quit() # 清理pygame资源
if __name__ == "__main__":
main(
appid="xxx",
api_secret="xxx",
api_key="xxx",
url="wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/medd90fec",
# 发音人参数
vcn="x4_lingxiaoxuan_oral",
save_file_name="2.mp3"
)
完整代码可私信刑获取或下载下一篇博客安装包