# -*- coding: utf-8 -*-
"""
实时流式识别
需要安装websocket-client库
使用方式 python realtime_asr.py 16k-0.pcm
"""
import websocket
import threading
import time
import uuid
import json
import logging
import sys
# 定义常量
URI = "wss://vop.baidu.com/realtime_asr"
if len(sys.argv) < 2:
pcm_file = "D:\\llqxz\\16k-0.pcm"
else:
pcm_file = sys.argv[1]
logger = logging.getLogger()
# 配置日志
logging.basicConfig(
format='[%(asctime)-15s] [%(funcName)s()][%(levelname)s] %(message)s',
level=logging.DEBUG,
handlers=[
logging.FileHandler("realtime_asr.log"),
logging.StreamHandler()
]
)
# 用于存储最终识别结果
final_recognition_result = None
"""
1. 连接 ws_app.run_forever()
2. 连接成功后发送数据 on_open()
2.1 发送开始参数帧 send_start_params()
2.2 发送音频数据帧 send_audio()
2.3 库接收识别结果 on_message()
2.4 发送结束帧 send_finish()
3. 关闭连接 on_close()
库的报错 on_error()
"""
def send_start_params(ws):
"""
开始参数帧
:param websocket.WebSocket ws:
:return:
"""
req = {
"type": "START",
"data": {
"appid": , # 网页上的appid
"appkey": "", # 网页上的appid对应的appkey
"dev_pid": 15372, # 识别模型
"cuid": "yourself_defined_user_id", # 随便填不影响使用。机器的mac或者其它唯一id,百度计算UV用。
"sample": 16000, # 固定参数
"format": "pcm" # 固定参数
}
}
body = json.dumps(req)
try:
ws.send(body, websocket.ABNF.OPCODE_TEXT)
logger.info("send START frame with params:" + body)
except Exception as e:
logger.error(f"Failed to send START frame: {e}")
def send_audio(ws):
"""
发送二进制音频数据,注意每个帧之间需要有间隔时间
:param websocket.WebSocket ws:
:return:
"""
chunk_ms = 160 # 160ms的录音
chunk_len = int(16000 * 2 / 1000 * chunk_ms)
try:
with open(pcm_file, 'rb') as f:
pcm = f.read()
except FileNotFoundError:
logger.error(f"File {pcm_file} not found.")
return
except Exception as e:
logger.error(f"Failed to read audio file: {e}")
return
index = 0
total = len(pcm)
logger.info("send_audio total={}".format(total))
while index < total:
end = index + chunk_len
if end >= total:
# 最后一个音频数据帧
end = total
body = pcm[index:end]
logger.debug("try to send audio length {}, from bytes [{},{})".format(len(body), index, end))
try:
ws.send(body, websocket.ABNF.OPCODE_BINARY)
except Exception as e:
logger.error(f"Failed to send audio data: {e}")
break
index = end
time.sleep(chunk_ms / 1000.0) # ws.send 也有点耗时,这里没有计算
def send_finish(ws):
"""
发送结束帧
:param websocket.WebSocket ws:
:return:
"""
req = {
"type": "FINISH"
}
body = json.dumps(req)
try:
ws.send(body, websocket.ABNF.OPCODE_TEXT)
logger.info("send FINISH frame")
except Exception as e:
logger.error(f"Failed to send FINISH frame: {e}")
def send_cancel(ws):
"""
发送取消帧
:param websocket.WebSocket ws:
:return:
"""
req = {
"type": "CANCEL"
}
body = json.dumps(req)
try:
ws.send(body, websocket.ABNF.OPCODE_TEXT)
logger.info("send Cancel frame")
except Exception as e:
logger.error(f"Failed to send CANCEL frame: {e}")
def on_open(ws):
"""
连接后发送数据帧
:param websocket.WebSocket ws:
:return:
"""
def run(*args):
"""
发送数据帧
:param args:
:return:
"""
send_start_params(ws)
send_audio(ws)
send_finish(ws)
logger.debug("thread terminating")
threading.Thread(target=run).start()
def on_message(ws, message):
"""
接收服务端返回的消息
:param ws:
:param message: json格式,自行解析
:return:
"""
try:
data = json.loads(message)
if data.get("type") == "FIN_TEXT" and "result" in data:
global final_recognition_result
final_recognition_result = data['result']
except json.JSONDecodeError:
pass
logger.info("Response: " + message)
def on_error(ws, error):
"""
库的报错,比如连接超时
:param ws:
:param error: json格式,自行解析
:return:
"""
logger.error("error: " + str(error))
def on_close(ws, close_status_code, close_msg):
"""
Websocket关闭
:param websocket.WebSocket ws:
:param close_status_code: 关闭状态码
:param close_msg: 关闭消息
:return:
"""
logger.info(f"ws close ... status code: {close_status_code}, message: {close_msg}")
# 输出最终识别结果
if final_recognition_result:
print("\n最终识别结果:", final_recognition_result)
# ws.close()
if __name__ == "__main__":
logger.info("begin")
# websocket.enableTrace(True)
uri = URI + "?sn=" + str(uuid.uuid1())
logger.info("uri is " + uri)
ws_app = websocket.WebSocketApp(uri,
on_open=on_open, # 连接建立后的回调
on_message=on_message, # 接收消息的回调
on_error=on_error, # 库遇见错误的回调
on_close=on_close) # 关闭后的回调
ws_app.run_forever()
10-12
2348

08-23
7275
