使用sherpa-onnx的热词功能

最新推荐文章于 2025-06-20 11:12:00 发布

地狱为王

最新推荐文章于 2025-06-20 11:12:00 发布

阅读量548

点赞数 9

CC 4.0 BY-SA版权

分类专栏： Unity&Sherpa-Onnx 文章标签： sherpa-onnx hotwords unity

本文链接：https://blog.youkuaiyun.com/AWNUXCVBN/article/details/148491806

Unity&Sherpa-Onnx 专栏收录该内容

7 篇文章

订阅专栏

https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html

Only transducer models support hotwords in sherpa-onnx. That is, only models from Offline transducer models and Online transducer models support hotwords.

All other models don’t support hotwords.

测试一下看看效果

using System;
using System.IO;
using SherpaOnnx;
using UnityEngine;

public class OfflineHotwords : SpeechRecognition
{
    OfflineRecognizer recognizer = null;
    OfflineStream offlineStream = null;
    string tokensPath = "tokens.txt";
    string encoder = "encoder-epoch-99-avg-1.onnx";
    string decoder = "decoder-epoch-99-avg-1.onnx";
    string joiner = "joiner-epoch-99-avg-1.onnx";
    string decodingMethod = "modified_beam_search"; 
    int numThreads = 1;

    string pathRoot;
    string modelPath;
    int sampleRate = 16000;

    OfflinePunctuation offlinePunctuation = null;
    OfflineSpeechDenoiser offlineSpeechDenoiser = null;
    DenoisedAudio denoisedAudio = null;

    public bool initDone = false;

    // Start is called before the first frame update
    void Start()
    {
        pathRoot = Util.GetPath() + "/models";
        Loom.RunAsync(() =>
        {
            Init();
        });
    }

    void Init()
    {
        modelPath = pathRoot + "/sherpa-onnx-conformer-zh-stateless2-2023-05-23";
        OfflineRecognizerConfig config = new OfflineRecognizerConfig();
        config.FeatConfig.SampleRate = sampleRate;
        config.FeatConfig.FeatureDim = 80;
        config.DecodingMethod = decodingMethod;

        OfflineModelConfig offlineModelConfig = new OfflineModelConfig();
        offlineModelConfig.Tokens = Path.Combine(modelPath, tokensPath);
        offlineModelConfig.Transducer.Encoder = Path.Combine(modelPath, encoder);
        offlineModelConfig.Transducer.Decoder = Path.Combine(modelPath, decoder);
        offlineModelConfig.Transducer.Joiner = Path.Combine(modelPath, joiner); 
        offlineModelConfig.NumThreads = numThreads;
        offlineModelConfig.Provider = "cpu";
        config.ModelConfig.ModelingUnit = "cjkchar";
        config.HotwordsFile = Path.Combine(modelPath, "hotwords_cn.txt");
        config.HotwordsScore = 2.0f;
        offlineModelConfig.Debug = 0; 
        config.ModelConfig = offlineModelConfig;

        OfflineLMConfig offlineLMConfig = new OfflineLMConfig();
        offlineLMConfig.Scale = 0.5f;
        config.LmConfig = offlineLMConfig;
        recognizer = new OfflineRecognizer(config);

        #region 添加标点符号
        OfflinePunctuationConfig opc = new OfflinePunctuationConfig();

        OfflinePunctuationModelConfig opmc = new OfflinePunctuationModelConfig();
        opmc.CtTransformer = pathRoot + "/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
        opmc.NumThreads = numThreads;
        opmc.Provider = "cpu";
        opmc.Debug = 0;

        opc.Model = opmc;
        offlinePunctuation = new OfflinePunctuation(opc);
        #endregion

        OfflineSpeechDenoiserGtcrnModelConfig osdgmc = new OfflineSpeechDenoiserGtcrnModelConfig();
        osdgmc.Model = pathRoot + "/gtcrn_simple.onnx";
        OfflineSpeechDenoiserModelConfig osdmc = new OfflineSpeechDenoiserModelConfig();
        osdmc.NumThreads = numThreads;
        osdmc.Provider = "cpu";
        osdmc.Debug = 0;
        osdmc.Gtcrn = osdgmc;
        OfflineSpeechDenoiserConfig osdc = new OfflineSpeechDenoiserConfig();
        osdc.Model = osdmc;
        offlineSpeechDenoiser = new OfflineSpeechDenoiser(osdc);

        initDone = true;
        Loom.QueueOnMainThread(() =>
        {
            Debug.Log("文字转语音初始化完成");
        });
    }

    public override void RecognizeOffline(float[] input, Action<string> onResult)
    {
        if (!initDone)
        {
            Debug.Log("Model is not ready yet.");
            return;
        }
        // 语音增强
        denoisedAudio = offlineSpeechDenoiser.Run(input, sampleRate);
        input = denoisedAudio.Samples;

        offlineStream = recognizer.CreateStream();
        offlineStream.AcceptWaveform(sampleRate, input);
        recognizer.Decode(offlineStream);
        string result = offlineStream.Result.Text;
        result = offlinePunctuation.AddPunct(result);
        offlineStream.Dispose();
        if (onResult != null)
        {
            onResult(result);
        }
    }
}

using System.Collections.Generic;
using uMicrophoneWebGL;
using UnityEngine;
using UnityEngine.Events;
using UnityEngine.EventSystems;
using UnityEngine.UI;

[RequireComponent(typeof(MicrophoneWebGL))]
public class OfflineSample : MonoBehaviour
{
    public Button button;
    public InputField inputField;
    MicrophoneWebGL microphone;
    public SpeechRecognition speechRecognition;
    List<float> buffer = new List<float>();

    // Start is called before the first frame update
    void Start()
    {
        UnityAction<BaseEventData> down = new UnityAction<BaseEventData>(PointerDown);
        EventTrigger.Entry eDown = new EventTrigger.Entry();
        eDown.eventID = EventTriggerType.PointerDown;
        eDown.callback.AddListener(down);
        EventTrigger etDown = button.gameObject.AddComponent<EventTrigger>();
        etDown.triggers.Add(eDown);

        UnityAction<BaseEventData> up = new UnityAction<BaseEventData>(PointerUp);
        EventTrigger.Entry eUp = new EventTrigger.Entry();
        eUp.eventID = EventTriggerType.PointerUp;
        eUp.callback.AddListener(up);
        EventTrigger etUp = button.gameObject.AddComponent<EventTrigger>();
        etUp.triggers.Add(eUp);

        microphone = GetComponent<MicrophoneWebGL>();
        microphone.isAutoStart = false;
        microphone.dataEvent.AddListener(OnData);
    }

    void OnData(float[] input)
    {
        buffer.AddRange(input);
    }

    void PointerDown(BaseEventData data)
    {
        Debug.LogWarning("按下");
        buffer.Clear();
        microphone.Begin();
    }

    void PointerUp(BaseEventData data)
    {
        Debug.LogWarning("抬起");
        microphone.End();
        speechRecognition.RecognizeOffline(buffer.ToArray(), OnResult);
    }

    void OnResult(string result)
    {
        inputField.text = result;
    }
}

hotwords_cn.txt

文森特卡索
周望君
朱丽楠
蒋有伯
鲤鱼跃龙门
高考加油

来点截图

在这里插入图片描述

模型文件地址

https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-conformer-zh-stateless2-2023-05-23.tar.bz2