https://k2-fsa.github.io/sherpa/onnx/hotwords/index.html
Only transducer models support hotwords in sherpa-onnx. That is, only models from Offline transducer models and Online transducer models support hotwords.
All other models don’t support hotwords.
测试一下看看效果
using System;
using System.IO;
using SherpaOnnx;
using UnityEngine;
public class OfflineHotwords : SpeechRecognition
{
OfflineRecognizer recognizer = null;
OfflineStream offlineStream = null;
string tokensPath = "tokens.txt";
string encoder = "encoder-epoch-99-avg-1.onnx";
string decoder = "decoder-epoch-99-avg-1.onnx";
string joiner = "joiner-epoch-99-avg-1.onnx";
string decodingMethod = "modified_beam_search";
int numThreads = 1;
string pathRoot;
string modelPath;
int sampleRate = 16000;
OfflinePunctuation offlinePunctuation = null;
OfflineSpeechDenoiser offlineSpeechDenoiser = null;
DenoisedAudio denoisedAudio = null;
public bool initDone = false;
// Start is called before the first frame update
void Start()
{
pathRoot = Util.GetPath() + "/models";
Loom.RunAsync(() =>
{
Init();
});
}
void Init()
{
modelPath = pathRoot + "/sherpa-onnx-conformer-zh-stateless2-2023-05-23";
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
config.FeatConfig.SampleRate = sampleRate;
config.FeatConfig.FeatureDim = 80;
config.DecodingMethod = decodingMethod;
OfflineModelConfig offlineModelConfig = new OfflineModelConfig();
offlineModelConfig.Tokens = Path.Combine(modelPath, tokensPath);
offlineModelConfig.Transducer.Encoder = Path.Combine(modelPath, encoder);
offlineModelConfig.Transducer.Decoder = Path.Combine(modelPath, decoder);
offlineModelConfig.Transducer.Joiner = Path.Combine(modelPath, joiner);
offlineModelConfig.NumThreads = numThreads;
offlineModelConfig.Provider = "cpu";
config.ModelConfig.ModelingUnit = "cjkchar";
config.HotwordsFile = Path.Combine(modelPath, "hotwords_cn.txt");
config.HotwordsScore = 2.0f;
offlineModelConfig.Debug = 0;
config.ModelConfig = offlineModelConfig;
OfflineLMConfig offlineLMConfig = new OfflineLMConfig();
offlineLMConfig.Scale = 0.5f;
config.LmConfig = offlineLMConfig;
recognizer = new OfflineRecognizer(config);
#region 添加标点符号
OfflinePunctuationConfig opc = new OfflinePunctuationConfig();
OfflinePunctuationModelConfig opmc = new OfflinePunctuationModelConfig();
opmc.CtTransformer = pathRoot + "/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx";
opmc.NumThreads = numThreads;
opmc.Provider = "cpu";
opmc.Debug = 0;
opc.Model = opmc;
offlinePunctuation = new OfflinePunctuation(opc);
#endregion
OfflineSpeechDenoiserGtcrnModelConfig osdgmc = new OfflineSpeechDenoiserGtcrnModelConfig();
osdgmc.Model = pathRoot + "/gtcrn_simple.onnx";
OfflineSpeechDenoiserModelConfig osdmc = new OfflineSpeechDenoiserModelConfig();
osdmc.NumThreads = numThreads;
osdmc.Provider = "cpu";
osdmc.Debug = 0;
osdmc.Gtcrn = osdgmc;
OfflineSpeechDenoiserConfig osdc = new OfflineSpeechDenoiserConfig();
osdc.Model = osdmc;
offlineSpeechDenoiser = new OfflineSpeechDenoiser(osdc);
initDone = true;
Loom.QueueOnMainThread(() =>
{
Debug.Log("文字转语音初始化完成");
});
}
public override void RecognizeOffline(float[] input, Action<string> onResult)
{
if (!initDone)
{
Debug.Log("Model is not ready yet.");
return;
}
// 语音增强
denoisedAudio = offlineSpeechDenoiser.Run(input, sampleRate);
input = denoisedAudio.Samples;
offlineStream = recognizer.CreateStream();
offlineStream.AcceptWaveform(sampleRate, input);
recognizer.Decode(offlineStream);
string result = offlineStream.Result.Text;
result = offlinePunctuation.AddPunct(result);
offlineStream.Dispose();
if (onResult != null)
{
onResult(result);
}
}
}
using System.Collections.Generic;
using uMicrophoneWebGL;
using UnityEngine;
using UnityEngine.Events;
using UnityEngine.EventSystems;
using UnityEngine.UI;
[RequireComponent(typeof(MicrophoneWebGL))]
public class OfflineSample : MonoBehaviour
{
public Button button;
public InputField inputField;
MicrophoneWebGL microphone;
public SpeechRecognition speechRecognition;
List<float> buffer = new List<float>();
// Start is called before the first frame update
void Start()
{
UnityAction<BaseEventData> down = new UnityAction<BaseEventData>(PointerDown);
EventTrigger.Entry eDown = new EventTrigger.Entry();
eDown.eventID = EventTriggerType.PointerDown;
eDown.callback.AddListener(down);
EventTrigger etDown = button.gameObject.AddComponent<EventTrigger>();
etDown.triggers.Add(eDown);
UnityAction<BaseEventData> up = new UnityAction<BaseEventData>(PointerUp);
EventTrigger.Entry eUp = new EventTrigger.Entry();
eUp.eventID = EventTriggerType.PointerUp;
eUp.callback.AddListener(up);
EventTrigger etUp = button.gameObject.AddComponent<EventTrigger>();
etUp.triggers.Add(eUp);
microphone = GetComponent<MicrophoneWebGL>();
microphone.isAutoStart = false;
microphone.dataEvent.AddListener(OnData);
}
void OnData(float[] input)
{
buffer.AddRange(input);
}
void PointerDown(BaseEventData data)
{
Debug.LogWarning("按下");
buffer.Clear();
microphone.Begin();
}
void PointerUp(BaseEventData data)
{
Debug.LogWarning("抬起");
microphone.End();
speechRecognition.RecognizeOffline(buffer.ToArray(), OnResult);
}
void OnResult(string result)
{
inputField.text = result;
}
}
hotwords_cn.txt
文森特卡索
周望君
朱丽楠
蒋有伯
鲤鱼跃龙门
高考加油
来点截图