语音识别flask接口开发

要开发一个flask语音识别接口,首先要解决语音文件在网络中的传输问题,然后选识别算法进行识别


1、以二进制文件流方式上次语音

python服务端代码,以flask.request.files接收前端的语音上传请求

from flask import Flask, request
import io
import wave
import os
import json

app = Flask(__name__)

@app.route('/upload_audio', methods=['POST'])
def upload_audio():
    """
    #接收语音文件并保存为.wav格式的文件
    #:return:
    """
    f_obj = request.files.get("file", None)
    if f_obj is None:
        return json.dumps({'status': 1, 'msg': 'No audio was received.', 'result': ''})
    else:
        audio_data = f_obj.read()
        with open('output.wav', 'ab') as f:
            f.write(audio_data)  # 追加写入音频数据
        return json.dumps({'status': 0, 'msg': '', 'result': 'receive audio success.'})

if __name__ == '__main__':
    socketio.run(app, port=8200, debug=True)

前端请求代码示例如下:

import requests
import time

def post_audio():
    """
    上传语音文件
    :return:
    """
    url = "http://localhost:8200/upload_audio"
    files = {'file': open('./c1.wav', 'rb')}
    t1 = time.time()
    r = requests.post(url, files=files)
    t2 = time.time()
    print("comsume time: %f s"%(t2-t1))
    if r.json()['status']:
        print(r.json()['msg'])
    else:
        response = r.json()['result']
        print(response)

if __name__ == '__main__':
    post_audio()

2、网页端长连接流式上传语音文件

python服务端代码,接收网页端发来的语音片段并保存为.wav格式的语音文件,方便后面的语音识别

from flask import Flask, request, render_template
from flask_socketio import SocketIO, emit
import json
import base64

app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret'
socketio = SocketIO(app, async_mode='eventlet')

# In-memory storage for the audio chunks
audio_chunks = []

@app.route('/')
def index():
    return render_template('index.html')

@socketio.on('audio_chunk')
def handle_audio_chunk(data):
    global audio_chunks
    audio_chunks.append(data)
    # Optionally, you can write each chunk to a file here if you prefer not to keep it in memory
    with open('audio_chunk.wav', 'ab') as f:
        f.write(base64.b64decode(data))
 
@socketio.on('audio_end')
def handle_audio_end():
    global audio_chunks
    if audio_chunks:
        print("开始保存语音文件")
        with open('uploaded_audio.wav', 'ab') as f:
            f.write(base64.b64decode(audio_chunks[0]))
        print("服务端保存语音文件完成")
        audio_chunks = []  # Clear the chunks list
        emit('audio_saved', {'message': 'Audio saved successfully!'})

       
@socketio.on('connect')
def connected_msg():
    """socket client event - connected"""
    print('客户端连接成功,client connected!')


@socketio.on('disconnect')
def disconnect_msg():
    """socket client event - disconnected"""
    print('客户端断开连接,client disconnected!')

if __name__ == '__main__':
    socketio.run(app, port=8200, debug=True)

前端html及JavaScript代码如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio Stream Upload</title>
    <script src="https://cdn.socket.io/4.0.0/socket.io.min.js"></script>
</head>
<body>
    <h1>Upload Audio Stream</h1>
    <button id="start-recording">Start Recording</button>
    <button id="stop-recording" disabled>Stop Recording</button>
    <p id="status"></p>

    <script>
        const socket = io.connect('http://localhost:8200');
        let mediaRecorder;
        let audioChunks = [];

        document.getElementById('start-recording').addEventListener('click', async () => {
            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
            mediaRecorder = new MediaRecorder(stream);

            mediaRecorder.ondataavailable = event => {
                audioChunks.push(event.data);
                // Send the chunk to the server (convert to ArrayBuffer first)
                const buffer = event.data.arrayBuffer();
                buffer.then(arrayBuffer => {
                    const base64String = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
                    socket.emit('audio_chunk', base64String);
                });
            };

            mediaRecorder.start();
            document.getElementById('start-recording').disabled = true;
            document.getElementById('stop-recording').disabled = false;
            document.getElementById('status').textContent = 'Recording...';

            mediaRecorder.onstop = () => {
                // Inform the server that the audio stream has ended
                socket.emit('audio_end');
                document.getElementById('start-recording').disabled = false;
                document.getElementById('stop-recording').disabled = true;
                document.getElementById('status').textContent = 'Recording stopped. Waiting for server response...';
            };
        });

        document.getElementById('stop-recording').addEventListener('click', () => {
            mediaRecorder.stop();
        });

        socket.on('audio_saved', data => {
            document.getElementById('status').textContent = data.message;
        });
    </script>
</body>
</html>

启动python服务,浏览器访问http://localhost:8200/就可以看到如下网页:
在这里插入图片描述


3、语音识别接口

语音识别算法这里选择openai的开源项目:whisper,项目地址:https://github.com/openai/whisper

  • 安装
    pip install -U openai-whisper
    还需要在终端安装ffmpeg,sudo apt update && sudo apt install ffmpeg

flask服务端代码如下:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
from flask import Flask, request
import io
import wave
import json
import whisper

app = Flask(__name__)
model = whisper.load_model("turbo")  # or your model

@app.route('/audio_rec', methods=['POST'])
def audio_recognize():
    """
    #接收语音文件并用whisper语音识别算法进行语音识别
    #:return:
    """
    f_obj = request.files.get("file", None)
    if f_obj is None:
        return json.dumps({'status': 1, 'msg': 'No audio was received.', 'result': ''})
    else:
    	save_path = "temp.wav"
        audio_data = f_obj.read()
        with open(save_path, 'ab') as f:
            f.write(audio_data)  # 追加写入音频数据
        result = model.transcribe(save_path)
        return json.dumps({'status': 0, 'msg': '', 'result': result["text"]})

if __name__ == '__main__':
    socketio.run(app, port=8200, debug=True)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值