使用阿里funasr作为freeswitch的ASR语音识别服务

livepy

已于 2025-02-19 09:43:25 修改

阅读量5.1k

点赞数 24

CC 4.0 BY-SA版权

文章标签：语音识别人工智能

于 2024-09-06 15:56:53 首次发布

本文链接：https://blog.youkuaiyun.com/livepy/article/details/141894837

1、编译及运行unimrcp

要使用unimrcp 必须先编译一个正常的unimrcpserver ，再修改里面的asr语音识别模块

1.1 获取unimrcp代码

mkdir unimrcpserver
cd unimrcpserver
git clone https://github.com/unispeech/unimrcp.git

1.2 下载unimrcp的依赖库，并安装

进入UniMRCP Dependencies - UniMRCP，直接选择最新版本下载

解压后，进入unimrcp-deps-XXX 目录，执行安装命令：

./build-dep-libs.sh

1.3 安装 unimrcp

./bootstrap
./configure
sudo make
sudo make install

后续就可以在/usr/local/unimrcp 路径下，看到unimrcp相关的内容

1.4 ASR 测试

1、先启动服务段 ./unimrcpserver

2、启动客户端

./umc
run recog

到这个时候标准的 unimrcpserver已经按照好了

2、安装并且完成和funasr的对接

因为funasr使用wss作为通信协议，感谢华为，华为的ASR 刚好有这个能力，而且它的SDK C++类库写的非常标准，阿里funasr的C++代码就不敢恭维了，整了很多让人感觉多此一举的代码，可能是一个刚毕业的985学生写的，用了很多类库，忘记了代码核心是间接明e

2.1 下载华为ASR SDK并且进行修改

华为SDK所以来的三方库包括

openssl
jsoncpp
websocketpp 只需要头文件，无需编译
glog
gflags
boost 只需要头文件，无需编译

这些依赖库均以源码形式存放在SDK根目录。该SDK默认是开启了所有依赖库的安装，如果你在系统中提前安装过openssl，则在构建时候，可以选择执行cmake … -DOPENSSL=OFF，跳过SDK对openssl的安装

2.1.1 SDK 获取

wget --no-check-certificate https://sis-sdk-repository.obs.cn-north-1.myhuaweicloud.com/cpp/huaweicloud-cpp-sdk-sis-linux.1.3.3.tar.gz
tar -xzvf huaweicloud-cpp-sdk-sis-linux.1.3.3.tar.gz

2.1.2 一键安装

mkdir -p build
cd build && cmake .. && make -j

这个时候可以看到编译出来一个 libhuawei_rasr.so 文件

2.2 修改代码，支持funasr 标准

为了尽量减少工作量，只对几个核心的文件进行修改

2.2.1 修改RasrRequest.cpp 、RasrRequest.h

RasrRequest.h

/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
 */
#ifndef RASR_REQUEST_H_
#define RASR_REQUEST_H_


#include "Utils.h"

namespace speech {
namespace huawei_asr {
class RasrRequest {
public:
    RasrRequest(long auidioFs,int chunkInterval,std::string chunkSize, std::string hotwords);
    std::string ConstructParams();

private:
    long laudioFs_ = 8000;
    int  chunkInterval_ = 10;
    std::string chunkSize_ = "5,10,5";
    std::string hotwords_ = "";
};
}
}
#endif

RasrRequest.cpp


#include "RasrRequest.h"
#include "json/value.h"

namespace speech {
namespace huawei_asr {

RasrRequest::RasrRequest(long auidioFs,int chunkInterval,std::string chunkSize, std::string hotwords)
{
    laudioFs_ = auidioFs;
    chunkInterval_ =  chunkInterval;
    chunkSize_ = chunkSize;
    hotwords_ = hotwords;
}

std::string RasrRequest::ConstructParams()
{
    Json::Value jsonbegin;
    Json::Value chunksize;
    chunksize = Json::Value(Json::arrayValue);
    std::istringstream ss(chunkSize_);
    std::string item;    
    while (std::getline(ss, item, ',')) {
        chunksize.append(std::stoi(item));
    }
    jsonbegin["mode"] = "2pass";
    jsonbegin["chunk_size"] = chunksize;
    jsonbegin["chunk_interval"] = chunkInterval_;
    jsonbegin["wav_name"] = "funcAsr";
    jsonbegin["wav_format"] = "pcm";
    jsonbegin["audio_fs"] = laudioFs_;
    jsonbegin["itn"] = true;
    jsonbegin["encoder_chunk_look_back"] = 4;
    jsonbegin["decoder_chunk_look_back"] = 0;
    jsonbegin["is_speaking"] = true;
    if(!hotwords_.empty()){
        jsonbegin["hotwords"] = hotwords_;
    }
    return jsonbegin.toStyledString();
}
}
}

2.2.2 RasrClient.cpp

void RasrClient::SendStart(RasrRequest request) {
    if (CheckStart()) {
        websocketServicePtr->SetStatus(WB_BLOCKING);
        ws.SendTxt(request.ConstructParams());
        /* ALI FUNASR 没有返回任何信息
        std::set<WebsocketStatus> targetStatuses{WB_START, WB_ERROR, WB_CLOSE};
        WaitStatus(targetStatuses, httpConfig.GetReadTimeout());
        */
        websocketServicePtr->OnStart();
    } else {
        LOG(INFO) << "status " << WebsocketStatusToStr(websocketServicePtr->GetStatus()) << " can't send start";
    }

}

void RasrClient::SendEnd() {
    LOG(INFO) << " SendEnd";
    if (CheckEnd()) {
        /*websocketServicePtr->SetStatus(WB_BLOCKING);
        ws.SendTxt("{\"command\": \"END\", \"cancel\": \"false\"}");
        std::set<WebsocketStatus> targetStatuses{WB_END, WB_ERROR, WB_CLOSE};
        WaitStatus(targetStatuses, httpConfig.GetReadTimeout());
        LOG(INFO) << "received end success";*/
    } else {
        LOG(INFO) << "status " << WebsocketStatusToStr(websocketServicePtr->GetStatus()) << " can't send end";
    }
}

void RasrClient::Connect(std::string api) {
    api = "";
    LOG(INFO) << " RasrClient::Connect";
    if (CheckConnect()) {
        std::string url = "wss://" + authInfo.GetEndpoint() + api;
        //std::map<std::string, std::string> headers = SignHeaders(authInfo, api, "", "", "GET");
        std::map<std::string, std::string> headers;
        websocketServicePtr->SetStatus(WB_BLOCKING);
        ws.Connect(url, headers);
    } else {
        LOG(INFO) << "status " << WebsocketStatusToStr(websocketServicePtr->GetStatus()) << " can't Connect";
    }

    std::set<WebsocketStatus> targetStatuses{WB_CONNECT, WB_ERROR, WB_CLOSE};
    WaitStatus(targetStatuses, httpConfig.GetConnectTimeout());
}

2.2.3 WebsocketService.h

修改原来的对象引用到指针

/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
 */
#ifndef HUAWEICLOUD_CPP_SDK_SIS_WEBSOCKETSERVICE_H
#define HUAWEICLOUD_CPP_SDK_SIS_WEBSOCKETSERVICE_H

#include "RasrListener.h"

#include <websocketpp/config/asio_client.hpp>
#include <websocketpp/client.hpp>
#include <websocketpp/common/thread.hpp>
#include <websocketpp/common/memory.hpp>

namespace speech {
namespace huawei_asr {

using websocketpp::lib::bind;
typedef websocketpp::client<websocketpp::config::asio_tls_client> client;
typedef websocketpp::lib::shared_ptr<websocketpp::lib::asio::ssl::context> context_ptr;
typedef websocketpp::config::asio_client::message_type::ptr message_ptr;

class WebsocketService {
public:
    typedef websocketpp::lib::shared_ptr<WebsocketService> ptr;
    void OnOpen(client *c, websocketpp::connection_hdl hdl);
    void OnStart();
    void OnFail(client *c, websocketpp::connection_hdl hdl);
    void OnClose(client *c, websocketpp::connection_hdl hdl);
    void OnMessage(websocketpp::connection_hdl, client::message_ptr msg);
    WebsocketStatus GetStatus() {
        return status_;
    }
    void SetStatus(WebsocketStatus newStatus) {
        status_ = newStatus;
    }
    void SetCallBack(RasrListener * callBack) {
        std::lock_guard<std::mutex> lock(mutex_);
        rasrListener_ = callBack;
    }
    void ResetCallBack();
private:
    WebsocketStatus status_ = WB_PRE_START;
    RasrListener * rasrListener_ = NULL;
    std::mutex mutex_;
    void ProcessMessage(std::string message);
    void ProcessConnect();
    void ProcessStart(std::string text);
    void ProcessEnd(std::string text);
    void ProcessError(std::string text);
    void ProcessClose();
    void ProcessResp(std::string text);
    void ProcessEvent(std::string text);

};

}
}

#endif //HUAWEICLOUD_CPP_SDK_SIS_WEBSOCKETSERVICE_H

2.2.4 WebsocketService.cpp

/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.
 */
#include "WebsocketService.h"
#include "json/json.h"

namespace speech {
namespace huawei_asr {

void WebsocketService::OnOpen(client *c, websocketpp::connection_hdl hdl) {
    ProcessConnect();
}

void WebsocketService::OnStart(){
    SetStatus(WB_START);
    ProcessStart("");
}

void WebsocketService::OnClose(client *c, websocketpp::connection_hdl hdl) {
    ProcessClose();
}

void WebsocketService::OnFail(client *c, websocketpp::connection_hdl hdl) {
    status_ = WB_ERROR; // avoid next step failed
    client::connection_ptr con = c->get_con_from_hdl(hdl);
    std::string errorMsg = con->get_ec().message();
    ProcessError(errorMsg);
}

void WebsocketService::OnMessage(websocketpp::connection_hdl, client::message_ptr msg) {
    if (msg->get_opcode() == websocketpp::frame::opcode::text) {
        std::string message = msg->get_payload();
        ProcessMessage(message);
    } else {
        std::string errorMsg = "receive binary data, which is wrong";
        ProcessError(errorMsg);
    }
}

void WebsocketService::ProcessMessage(std::string message) {
    Json::CharReaderBuilder readerBuilder;
    Json::Value root;
    std::string errs;
    std::istringstream s(message);
    if (!Json::parseFromStream(readerBuilder, s, &root, &errs)) {
        std::string errorMsg = "receive not json data, which is wrong "+message;
        ProcessError(errorMsg);
        return;        
    } 
    bool is_final = root["is_final"].asBool();
    if(is_final == true){
         ProcessEnd(message);
         return;
    }
    std::string mode = root["mode"].asString();
    std::string txtMsg = root["text"].asString();
    if (mode == "2pass-online") {
        ProcessEvent("VOICE_START");
    }else
    if (mode == "2pass-offline") {
        ProcessResp(txtMsg);
        ProcessEvent("VOICE_END");
    } else {
        ProcessError(message);
    }
}

void WebsocketService::ProcessConnect() {
    LOG(INFO) << " WebsocketService::ProcessConnect " ;
    status_ = WB_CONNECT;
    rasrListener_->OnConnect();
}

void WebsocketService::ProcessStart(std::string text) {
   //LOG(DEBUG) << " WebsocketService::ProcessStart " ;    
    status_ = WB_START;
    rasrListener_->OnStart(text);
}

void WebsocketService::ProcessEnd(std::string text) {
    LOG(INFO) << " WebsocketService::ProcessEnd " ;    
    status_ = WB_END;
    rasrListener_->OnEnd(text);
}

void WebsocketService::ProcessResp(std::string text) {
    //LOG(INFO) << " WebsocketService::Proc