使用C++实现语音识别
语音识别,还记得星球大战里的那个总陪在主人身边听话的智能机器人么?拥有了听和说的技能,在那个时候这还是人们的想象,但是今天我们就可以来实现它!
微软的语音识别,在这里我们简称它为SR(speech recognition),SR分为两种模式的监听:第一种模式:任意监听,即随意输入语音,监听对象将最为接近的字或者词,句反馈出来;第二种模式:划定范围监听,制定一组被选项做为监听的,用户的语音输入被反馈成最为接近的一个选项。说得通俗一些:第一种是填空题,第二种是选择题目。
今天我们就一起来学习如何用C++ 来完成一道语音识别的填空题:
代码如下
#include<windows.h>
#include<sapi.h>
#include<stdio.h>
#include<string.h>
#include<atlbase.h>
#include"sphelper.h"
//Copyright(c)MicrosoftCorporation.Allrightsreserved.

inlineHRESULTBlockForResult(ISpRecoContext*pRecoCtxt,ISpRecoResult**ppResult)

...{
HRESULThr=S_OK;
CSpEventevent;

while(SUCCEEDED(hr)&&
SUCCEEDED(hr=event.GetFrom(pRecoCtxt))&&
hr==S_FALSE)

...{
hr=pRecoCtxt->WaitForNotifyEvent(INFINITE);
}

*ppResult=event.RecoResult();
if(*ppResult)

...{
(*ppResult)->AddRef();
}

returnhr;
}

constWCHAR*StopWord()

...{
constWCHAR*pchStop;

LANGIDLangId=::SpGetUserDefaultUILanguage();

switch(LangId)

...{
caseMAKELANGID(LANG_JAPANESE,SUBLANG_DEFAULT):
pchStop=L"}42N86\0b70e50fc0ea0e70fc/05708504608a087046";;
break;

default:
pchStop=L"Stop";
break;
}

returnpchStop;
}

intmain(intargc,char*argv[])

...{
HRESULThr=E_FAIL;
boolfUseTTS=true;//turnTTSplaybackonoroff
boolfReplay=true;//turnAudioreplayonoroff

//Processoptionalarguments
if(argc>1)

...{
inti;

for(i=1;i<argc;i++)

...{
if(_stricmp(argv[i],"-noTTS")==0)

...{
fUseTTS=false;
continue;
}
if(_stricmp(argv[i],"-noReplay")==0)

...{
fReplay=false;
continue;
}
printf("Usage:%s[-noTTS][-noReplay] ",argv[0]);
returnhr;
}
}

if(SUCCEEDED(hr=::CoInitialize(NULL)))

...{

...{
CComPtr<ISpRecoContext>cpRecoCtxt;
CComPtr<ISpRecoGrammar>cpGrammar;
CComPtr<ISpVoice>cpVoice;
hr=cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);
if(SUCCEEDED(hr))

...{
hr=cpRecoCtxt->GetVoice(&cpVoice);
}

if(cpRecoCtxt&&cpVoice&&
SUCCEEDED(hr=cpRecoCtxt->SetNotifyWin32Event())&&
SUCCEEDED(hr=cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION),SPFEI(SPEI_RECOGNITION)))&&
SUCCEEDED(hr=cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO,NULL,NULL))&&
SUCCEEDED(hr=cpRecoCtxt->CreateGrammar(0,&cpGrammar))&&
SUCCEEDED(hr=cpGrammar->LoadDictation(NULL,SPLO_STATIC))&&
SUCCEEDED(hr=cpGrammar->SetDictationState(SPRS_ACTIVE)))

...{
USES_CONVERSION;

constWCHAR*constpchStop=StopWord();
CComPtr<ISpRecoResult>cpResult;

printf("Iwillrepeateverythingyousay. Say"%s"toexit. ",W2A(pchStop));

while(SUCCEEDED(hr=BlockForResult(cpRecoCtxt,&cpResult)))

...{
cpGrammar->SetDictationState(SPRS_INACTIVE);

CSpDynamicStringdstrText;

if(SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE,SP_GETWHOLEPHRASE,
TRUE,&dstrText,NULL)))

...{
printf("Iheard:%s ",W2A(dstrText));

if(fUseTTS)

...{
cpVoice->Speak(L"Iheard",SPF_ASYNC,NULL);
cpVoice->Speak(dstrText,SPF_ASYNC,NULL);
}

if(fReplay)

...{
if(fUseTTS)
cpVoice->Speak(L"whenyousaid",SPF_ASYNC,NULL);
else
printf(" whenyousaid... ");
cpResult->SpeakAudio(NULL,0,NULL,NULL);
}

cpResult.Release();
}
if(_wcsicmp(dstrText,pchStop)==0)

...{
break;
}

cpGrammar->SetDictationState(SPRS_ACTIVE);
}
}
}
::CoUninitialize();
}
returnhr;
}