基于讯飞星火的语音问答_语音问答api-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_63478784/article/details/142444324

一 . 简介

项目基于讯飞星火api作为核心能力并在其中搭载了Web Speech API中的webkitSpeechRecognition对象来实现语音转文字的功能和TTS (Text-to-Speech)：通过调用百度TTS API将文本转换成语音，使机器人能够“说话”。实现了语音输入，语音输出的对话形式。

实现方法：后端基于python的flask框架，前端使用了html+css并用AJAX通过`XMLHttpRequest`对象实现异步请求。

二 . 代码

2.1 前端：

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link rel="stylesheet" href="static/css/happy.css">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
<title>快乐加油站</title>
</head>
<body>
<div class="container">
  <div class="header">
    <h1>欢迎来到快乐加油站！</h1>
  </div>
  <div class="info">
    <p>我是一个情绪大模型。请随意提问或闲聊。</p>
  </div>
  <div class="chat" id="chat">
    <!-- 消息将在这里显示 -->
  </div>
 <div class="input-container">
    <input type="text" id="questionInput" placeholder="向我提问..." onkeydown="handleKeyPress(event)">
    <button id="recordButton"><i class="fa fa-microphone"></i></button>
    <span id="recordingStatus" style="display:none;">正在识别语音...</span>
    <button id="askButton">发送</button>
</div>
  <div class="happy-quote">
    <div class="quote-divider"></div>
    <p class="quote-text">我生来就是高山而非溪流，我欲与群峰之巅俯视平庸的沟壑，我生来就是人杰而非草芥，我站在伟人之肩藐视卑微的懦夫，请只管跑下去，天自己会亮</p>
  </div>
  <div class="confirmation-dialog" id="confirmationDialog">
  <p>你想播放音乐吗？</p>
  <div class="dialog-buttons">
    <button class="cancel">取消</button>
    <button class="confirm">确定</button>
  </div>
</div>
  <img src="../static/images/happy01.jpg" alt="快乐机器人" class="depression-character">
</div>

<script>
let recognition;
let ws;
let wsConnected = false;
let isRecording = false;

document.getElementById('recordButton').addEventListener('click', toggleRecording);
document.getElementById('questionInput').addEventListener('keydown', handleKeyPress);
document.getElementById('askButton').addEventListener('click', () => handleKeyPress({ key: 'Enter' }));

// 初始化语音识别
recognition = new webkitSpeechRecognition();
recognition.lang = 'zh-CN';
recognition.interimResults = false;
recognition.continuous = false;

function toggleRecording() {
    if (isRecording) {
        recognition.stop();
        isRecording = false;
    } else {
        startRecording();
        isRecording = true;
    }
}

function startRecording() {
    if (!recognition) {
        recognition = new webkitSpeechRecognition();
        recognition.lang = 'zh-CN';
        recognition.interimResults = false;
        recognition.continuous = false;
    }

    recognition.start();
}

recognition.onstart = function() {
    console.log('语音识别已开始...');
    toggleRecordingStatus(true); // 开始录音时显示状态
    connectWebSocket();
};

recognition.onresult = function(event) {
    let transcript = event.results[0][0].transcript;
    document.getElementById('questionInput').value = transcript; // 将识别结果填充到输入框
    console.log('识别结果:', transcript);
    if (!isRecording) {
        handleKeyPress({ key: 'Enter' }); // 如果已经停止录音，则模拟按下回车键
    }
};

recognition.onerror = function(event) {
    console.error('Error occurred in speech recognition:', event.error);
};

recognition.onend = function() {
    console.log('语音识别已结束.');
    toggleRecordingStatus(false); // 结束录音时隐藏状态
    if (ws && wsConnected) {
        ws.close();
    }
    isRecording = false;
};

function connectWebSocket() {
    const wsUrl = 'wss://ws-api.xfyun.cn/v2/iat';
    const appid = 'xxxx';
    const apiKey = 'xxx';
    const apiSecret = 'xxxx';

    const currentDate = new Date().toGMTString();
    const signStr = appid + currentDate;
    const sign = CryptoJS.SHA256(signStr, apiSecret).toString(CryptoJS.enc.Base64);

    ws = new WebSocket(wsUrl);

    ws.onopen = function() {
        console.log('WebSocket 连接已打开');
        wsConnected = true;
        const initPayload = {
            "common": {
                "app_id": appid
            },
            "business": {
                "domain": "iat",
                "language": "zh_cn",
                "accent": "mandarin"
            },
            "data": {
                "status": 0,
                "from": "pc",
                "format": "audio/L16;rate=16000",
                "encoding": "raw",
                "audio": ""
            }
        };
        ws.send(JSON.stringify(initPayload));
    };

    ws.onmessage = function(event) {
        const data = JSON.parse(event.data);
        if (data.code === 0) {
            const result = data.data.result;
            textToSpeech(result);
        } else {
            console.error('WebSocket 错误响应:', data);
        }
    };

    ws.onerror = function(error) {
        console.error('WebSocket 错误:', error);
    };

    ws.onclose = function(event) {
        console.log('WebSocket 连接已关闭:', event);
        wsConnected = false;
    };
}

function sendTextToServer(text) {
    if (ws && wsConnected) {
        const dataPayload = {
            "common": {
                "app_id": "d614d1bf"
            },
            "business": {
                "domain": "iat",
                "language": "zh_cn",
                "accent": "mandarin"
            },
            "data": {
                "status": 2,
                "from": "pc",
                "format": "audio/L16;rate=16000",
                "encoding": "raw",
                "audio": text
            }
        };
        ws.send(JSON.stringify(dataPayload));
    }
}

function textToSpeech(text) {
    const xhr = new XMLHttpRequest();
    const apiUrl = 'http://tts.baidu.com/text2audio'; // 示例使用百度TTS API，根据实际情况调整
    xhr.open('GET', apiUrl + '?tex=' + encodeURIComponent(text) + '&lan=zh&ctp=1&cuid=123456', true);
    xhr.responseType = 'blob';

    xhr.onload = function() {
        if (xhr.status === 200) {
            const audioUrl = URL.createObjectURL(xhr.response);
            playAudio(audioUrl);
        } else {
            console.error('Failed to fetch audio:', xhr.statusText);
        }
    };

    xhr.send();
}

function handleKeyPress(event) {
    // 检查是否按下了回车键
    if (event.key === 'Enter') {
        event.preventDefault(); // 阻止默认行为，比如表单提交
        var question = document.getElementById('questionInput').value;
        if (question) {
            displayMessage('user', question);
            displayLoading(); // 显示加载状态
            sendQuestion(question);
            document.getElementById('questionInput').value = ''; // 清空输入框
        }
    }
}

function sendQuestion(question) {
    var xhr = new XMLHttpRequest();
    xhr.open('POST', '/ask', true);
    xhr.setRequestHeader('Content-Type', 'application/json');
    xhr.onreadystatechange = function() {
        if (xhr.readyState === 4 && xhr.status === 200) {
            var response = JSON.parse(xhr.responseText);
            removeLoading(); // 移除加载状态
            displayMessage('robot', response.text_answer);
            playAudio(response.audio_url);

            // 检查用户的输入并播放相应的音频
            playCustomAudio(question);
        }
    };
    xhr.send(JSON.stringify({ question: question }));
}

function displayMessage(sender, text) {
    var chat = document.getElementById('chat');
    var messageDiv = document.createElement('div');
    messageDiv.classList.add('message', sender);
    var avatarDiv = document.createElement('div');
    avatarDiv.classList.add('avatar', sender);
    var bubbleDiv = document.createElement('div');
    bubbleDiv.classList.add('bubble', sender);
    bubbleDiv.textContent = text;
    messageDiv.appendChild(avatarDiv);
    messageDiv.appendChild(bubbleDiv);
    chat.appendChild(messageDiv);
    chat.scrollTop = chat.scrollHeight; // 滚动到底部
}

function displayLoading() {
    var chat = document.getElementById('chat');
    var loadingDiv = document.createElement('div');
    loadingDiv.classList.add('loading');
    loadingDiv.textContent = '正在合成语音...';
    chat.appendChild(loadingDiv);
    chat.scrollTop = chat.scrollHeight; // 滚动到底部
}

function removeLoading() {
    var chat = document.getElementById('chat');
    var loadingElements = chat.getElementsByClassName('loading');
    while (loadingElements.length > 0) {
        chat.removeChild(loadingElements[0]);
    }
}

function playAudio(audioUrl) {
    var audio = new Audio(audioUrl);
    audio.addEventListener('canplaythrough', function() {
        this.play();
    });
}

// 新增函数用于切换录音状态的显示
function toggleRecordingStatus(show) {
    document.getElementById('recordingStatus').style.display = show ? 'inline' : 'none';
}

// 新增函数用于播放特定的音频文件
document.getElementById('confirmationDialog').addEventListener('click', function(event) {
    var target = event.target;
    if (target.classList.contains('confirm')) {
        playAudio(customAudioUrl);
        hideConfirmationDialog();
    } else if (target.classList.contains('cancel')) {
        hideConfirmationDialog();
    }
});

function showConfirmationDialog(audioUrl) {
    customAudioUrl = audioUrl;
    document.getElementById('confirmationDialog').style.display = 'block';
}

function hideConfirmationDialog() {
    document.getElementById('confirmationDialog').style.display = 'none';
}

function playCustomAudio(userInput) {
    if (userInput.includes('不开心')) {
        showConfirmationDialog('/static/video/顾森西 - 再次与你同行 (Live).ogg');
    } else if (userInput.includes('迷茫')) {
        showConfirmationDialog('/static/video/诗林 - 每当我找不到存在的意义.ogg');
    }
}
</script>
</body>
</html>

注意：

将appid apiKey apiSecret 替换为自己的密钥，申请的方式在上一篇博客。

2.2 后端：

获取大模型的回答并修改模型自定义风格 happyApi.py：

# coding: utf-8
import _thread as thread
import base64
import datetime
import hashlib
import hmac
import json
import ssl
from urllib.parse import urlparse, urlencode
from wsgiref.handlers import format_date_time
import websocket
import os
import pygame

import SparkApi


#大模型接口
def get_response_from_model(appid, api_key, api_secret, spark_url, domain, question):
    # 调用SparkApi模块的方法来获取回答
    SparkApi.main(appid, api_key, api_secret, spark_url, domain, question)
    answer = SparkApi.getText("assistant", SparkApi.answer)[0]["content"]
    return answer

class Ws_Param(object):
    # 初始化
    def __init__(self, APPID, APIKey, APISecret, gpt_url):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret
        self.host = urlparse(gpt_url).netloc
        self.path = urlparse(gpt_url).path
        self.gpt_url = gpt_url

    # 生成url
    def create_url(self):
        now = datetime.datetime.now()
        date = format_date_time(datetime.datetime.timestamp(now))

        signature_origin = f"host: {self.host}\ndate: {date}\nGET {self.path} HTTP/1.1"
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')

        v = {
            "authorization": authorization,
            "date": date,
            "host": self.host
        }
        url = self.gpt_url + '?' + urlencode(v)
        return url


# 收到websocket错误的处理
def on_error(ws, error):
    print("### error:", error)


# 收到websocket关闭的处理
def on_close(ws, close_status_code, close_msg):
    print("### closed ###")


# 收到websocket连接建立的处理
def on_open(ws):
    thread.start_new_thread(run, (ws,))


# 收到websocket消息的处理
def on_message(ws, message):
    message = json.loads(message)
    code = message['header']['code']
    if code != 0:
        print("### 请求出错： ", message)
    else:
        payload = message.get("payload")
        status = message['header']['status']
        if status == 2:
            print("### 合成完毕")
            # 播放音频
            pygame.mixer.init()  # 初始化pygame的音频系统
            pygame.mixer.music.load(ws.save_file_name)
            pygame.mixer.music.play()
            while pygame.mixer.music.get_busy():  # 等待音乐播放结束
                pygame.time.Clock().tick(10)
            ws.close()
        if payload and payload != "null":
            audio = payload.get("audio")
            if audio:
                audio = audio["audio"]
                with open(ws.save_file_name, 'ab') as f:
                    f.write(base64.b64decode(audio))


def run(ws, *args):
    body = {
        "header": {
            "app_id": ws.appid,
            "status": 0
        },
        "parameter": {
            "oral": {
                "spark_assist": 1,
                "oral_level": "mid"
            },
            "tts": {
                "vcn": ws.vcn,
                "speed": 66,
                "volume": 50,
                "pitch": 50,
                "bgs": 0,
                "reg": 0,
                "rdn": 0,
                "rhy": 0,
                "scn": 5,
                "version": 0,
                "L5SilLen": 0,
                "ParagraphSilLen": 0,
                "audio": {
                    "encoding": "lame",
                    "sample_rate": 16000,
                    "channels": 1,
                    "bit_depth": 16,
                    "frame_size": 0
                },
                "pybuf": {
                    "encoding": "utf8",
                    "compress": "raw",
                    "format": "plain"
                }
            }
        },
        "payload": {
            "text": {
                "encoding": "utf8",
                "compress": "raw",
                "format": "json",
                "status": 0,
                "seq": 0,
                "text": str(base64.b64encode(ws.text.encode('utf-8')), "UTF8")
            }
        }
    }

    ws.send(json.dumps(body))


def main(appid, api_secret, api_key, url, vcn, save_file_name):
    wsParam = Ws_Param(appid, api_key, api_secret, url)
    wsUrl = wsParam.create_url()
    ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
    websocket.enableTrace(False)
    ws.appid = appid
    ws.vcn = vcn
    ws.save_file_name = save_file_name
    if os.path.exists(ws.save_file_name):
        os.remove(ws.save_file_name)

    while True:
        user_input = input("请输入要询问的问题 (输入 'exit' 退出程序): ")
        if user_input.lower() == 'exit':
            break
        # 从配置中获取必要的参数
        config = SparkApi.config()
        model_response = get_response_from_model(
            config["appid"],
            config["api_key"],
            config["api_secret"],
            config["Spark_url"],
            config["domain"],
            user_input
        )
        ws.text = model_response
        ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
        pygame.mixer.quit()  # 清理pygame资源

if __name__ == "__main__":
    main(
        appid="xxx",
        api_secret="xxx",
        api_key="xxx",
        url="wss://cbm01.cn-huabei-1.xf-yun.com/v1/private/medd90fec",
        # 发音人参数
        vcn="x4_lingxiaoxuan_oral",
        save_file_name="2.mp3"
    )

完整代码可私信刑获取或下载下一篇博客安装包