window speech实现语音控制

通过微软SpeechSDK实现简单语音控制程序,响应特定语音指令执行相应操作,如打开应用程序或播放音乐。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

windows下利用微软的SpeechSDK实现语音控制                                                   
                               

    语音识别是当前研究的热点,我们可以利用微软的语音识别引擎来进行二次开发。首先需要安装微软的SpeechSDK,安装完成后配置项目的包含目录和库目录,这些都不是重点,不会的可以去百度。然后就可以写代码了,码农嘛,还是看代码有感觉。

    源文件<Demo.cpp>

    1. #include <Windows.h>  
    2. #include <atlstr.h>  
    3. #include <sphelper.h>  
    4. #include <sapi.h>  
    5. #include <comutil.h>  
    6. #include <string.h>  
    7.   
    8. #pragma comment(lib,"sapi.lib")  
    9. #ifdef _UNICODE  
    10. #pragma   comment(lib,   "comsuppw.lib")  //_com_util::ConvertBSTRToString  
    11. #else  
    12. #pragma   comment(lib,   "comsupp.lib")  //_com_util::ConvertBSTRToString  
    13. #endif  
    14.   
    15. #define GID_CMD_GR 333333  
    16. #define WM_RECOEVENT WM_USER+1  
    17.   
    18. LRESULT CALLBACK WndProc(HWND,UINT,WPARAM,LPARAM);  
    19.   
    20. TCHAR szAppName[] = TEXT("语音控制Demo");  
    21. BOOL b_initSR;  
    22. BOOL b_Cmd_Grammar;  
    23. CComPtr<ISpRecoContext> m_cpRecoCtxt;//语音识别程序接口  
    24. CComPtr<ISpRecoGrammar> m_cpCmdGramma;//识别语法  
    25. CComPtr<ISpRecognizer> m_cpRecoEngine; //语音识别引擎  
    26. int speak(wchar_t *str);  
    27.   
    28. int WINAPI WinMain(HINSTANCE hInstance,HINSTANCE hPrevInstance,PSTR szCmdLine,int iCmdShow)  
    29. {  
    30.     HWND hwnd;  
    31.     MSG msg;  
    32.     WNDCLASS wndclass;  
    33.   
    34.     //窗口类结构体初始化值  
    35.     wndclass.cbClsExtra = 0;  
    36.     wndclass.cbWndExtra = 0;  
    37.     wndclass.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH);  
    38.     wndclass.hCursor = LoadCursor(NULL,IDC_ARROW);  
    39.     wndclass.hIcon = LoadIcon(NULL,IDI_APPLICATION);  
    40.     wndclass.hInstance = hInstance;  
    41.     wndclass.lpfnWndProc = WndProc;  
    42.     wndclass.lpszClassName = szAppName;  
    43.     wndclass.lpszMenuName = NULL;  
    44.     wndclass.style = CS_HREDRAW|CS_VREDRAW;  
    45.   
    46.     //注册窗口类  
    47.     if(!RegisterClass(&wndclass))  
    48.     {  
    49.         //失败后提示并返回  
    50.         MessageBox(NULL,TEXT("This program requires Windows NT!"),szAppName,MB_ICONERROR);  
    51.         return 0;  
    52.     }  
    53.   
    54.     //创建窗口  
    55.     hwnd = CreateWindow(szAppName,  
    56.                         TEXT("语音识别"),  
    57.                         WS_OVERLAPPEDWINDOW,  
    58.                         CW_USEDEFAULT,  
    59.                         CW_USEDEFAULT,  
    60.                         CW_USEDEFAULT,  
    61.                         CW_USEDEFAULT,  
    62.                         NULL,  
    63.                         NULL,  
    64.                         hInstance,  
    65.                         NULL);  
    66.       
    67.     //显示窗口  
    68.     ShowWindow(hwnd,iCmdShow);  
    69.     UpdateWindow(hwnd);  
    70.   
    71.     //进入消息循环  
    72.     while(GetMessage(&msg,NULL,0,0))  
    73.     {  
    74.         TranslateMessage(&msg);//翻译消息  
    75.         DispatchMessage(&msg);//分发消息  
    76.     }  
    77.     return msg.wParam;  
    78. }  
    79.   
    80. /* 
    81. *消息回调函数,由操作系统调用 
    82. */  
    83. LRESULT CALLBACK WndProc(HWND hwnd, UINT message,WPARAM wParam,LPARAM lParam)  
    84. {  
    85.     HDC hdc;  
    86.     PAINTSTRUCT ps;  
    87.   
    88.     switch(message)  
    89.     {  
    90.     case WM_CREATE:  
    91.         {     
    92.             //初始化COM端口  
    93.             ::CoInitializeEx(NULL,COINIT_APARTMENTTHREADED);  
    94.             //创建识别上下文接口  
    95.             HRESULT hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpSharedRecognizer);  
    96.             if(SUCCEEDED(hr))  
    97.             {  
    98.                 hr = m_cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);  
    99.             }  
    100.             else  
    101.             {  
    102.                 MessageBox(hwnd,TEXT("引擎实例化出错"),TEXT("提示"),S_OK);  
    103.             }  
    104.             //设置识别消息,使计算机时刻监听语音消息  
    105.             if(SUCCEEDED(hr))  
    106.             {  
    107.                 hr = m_cpRecoCtxt->SetNotifyWindowMessage(hwnd,WM_RECOEVENT,0,0);  
    108.             }  
    109.             else  
    110.             {  
    111.                 MessageBox(hwnd,TEXT("创建上下文接口出错"),TEXT("提示"),S_OK);  
    112.             }  
    113.             //设置我们感兴趣的事件  
    114.             if(SUCCEEDED(hr))  
    115.             {  
    116.                 ULONGLONG ullMyEvents = SPFEI(SPEI_SOUND_START)|SPFEI(SPEI_RECOGNITION)|SPFEI(SPEI_SOUND_END);  
    117.                 hr = m_cpRecoCtxt->SetInterest(ullMyEvents,ullMyEvents);  
    118.             }  
    119.             else  
    120.             {  
    121.                 MessageBox(hwnd,TEXT("设置识别消息出错"),TEXT("提示"),S_OK);  
    122.             }  
    123.             //创建语法规则  
    124.             b_Cmd_Grammar = TRUE;  
    125.             if(FAILED(hr))  
    126.             {  
    127.                 MessageBox(hwnd,TEXT("创建语法规则出错"),TEXT("提示"),S_OK);  
    128.             }  
    129.             hr = m_cpRecoCtxt->CreateGrammar(GID_CMD_GR,&m_cpCmdGramma);  
    130.             hr = m_cpCmdGramma->LoadCmdFromFile(L"cmd.xml",SPLO_DYNAMIC);  
    131.             if(FAILED(hr))  
    132.             {  
    133.                 MessageBox(hwnd,TEXT("配置文件打开出错"),TEXT("提示"),S_OK);  
    134.             }  
    135.             b_initSR = TRUE;  
    136.             //在开始识别时,激活语法进行识别  
    137.             hr = m_cpCmdGramma->SetRuleState(NULL,NULL,SPRS_ACTIVE);  
    138.             break;  
    139.         }  
    140.     case WM_RECOEVENT:  
    141.         {  
    142.             RECT rect;  
    143.             GetClientRect(hwnd,&rect);  
    144.             hdc = GetDC(hwnd);  
    145.             USES_CONVERSION;  
    146.             CSpEvent event;  
    147.             while(event.GetFrom(m_cpRecoCtxt)==S_OK)  
    148.             {  
    149.                 switch(event.eEventId)  
    150.                 {  
    151.                 case SPEI_RECOGNITION:  
    152.                     {  
    153.                         static const WCHAR wszUnrecognized[] = L"<Unrecognized>";  
    154.                         CSpDynamicString dstrText;  
    155.                         //取得消息结果  
    156.                         if(FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL)))  
    157.                         {  
    158.                             dstrText = wszUnrecognized;  
    159.                         }  
    160.                         BSTR SRout;  
    161.                         dstrText.CopyToBSTR(&SRout);  
    162.                         char * lpszText2 = _com_util::ConvertBSTRToString(SRout);  
    163.                         if(b_Cmd_Grammar)  
    164.                         {  
    165.                             //MessageBoxA(0,lpszText2,"内容",0);  
    166.                             if(strcmp("腾讯QQ",lpszText2)==0)  
    167.                             {  
    168.                                 //MessageBox(0,TEXT("计算机"),TEXT("内容"),0);  
    169.                                 speak(L"好的");  
    170.                                 //打开QQ.exe  
    171.                                 ShellExecuteA(NULL,"open","D:\\QQ\\QQProtect\\Bin\\QQProtect.exe",0,0,1);  
    172.                             }  
    173.                             if(strcmp("确定",lpszText2)==0)  
    174.                             {  
    175.                                 //按下回车键  
    176.                                 keybd_event(VK_RETURN,0,0,0);  
    177.                                 keybd_event(VK_RETURN,0,KEYEVENTF_KEYUP,0);  
    178.                             }  
    179.                             if(strcmp("音乐",lpszText2)==0)  
    180.                             {         
    181.                                 speak(L"好的");  
    182.                                 //调用系统程序wmplayer.exe播放音乐  
    183.                                 ShellExecuteA(NULL,"open","\"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe\"","C:\\Users\\KYT\\Desktop\\123.mp3",0,0);  
    184.                             }  
    185.                         }  
    186.                     }  
    187.                 }  
    188.             }  
    189.             break;  
    190.         }  
    191.     case WM_PAINT:  
    192.         hdc = BeginPaint(hwnd,&ps);  
    193.         EndPaint(hwnd,&ps);  
    194.         break;  
    195.     case WM_DESTROY:  
    196.         PostQuitMessage(0);  
    197.         break;  
    198.     }  
    199.     return DefWindowProc(hwnd,message,wParam,lParam);  
    200. }  
    201.   
    202. #pragma comment(lib,"ole32.lib")//CoInitialize CoCreateInstance 需要调用ole32.dll  
    203.   
    204. /* 
    205. *语音合成函数,朗读字符串str 
    206. */  
    207. int speak(wchar_t *str)  
    208. {  
    209.     ISpVoice * pVoice = NULL;  
    210.     ::CoInitialize(NULL);  
    211.     //获得ISpVoice接口  
    212.     long hr = CoCreateInstance(CLSID_SpVoice,NULL,CLSCTX_ALL,IID_ISpVoice,(void **)&pVoice);  
    213.     hr = pVoice->Speak(str,0,NULL);  
    214.     pVoice->Release();  
    215.     pVoice = NULL;  
    216.     //千万不要忘记  
    217.     ::CoUninitialize();  
    218.     return TRUE;  
    219. }  
    #include <Windows.h>
    #include <atlstr.h>
    #include <sphelper.h>
    #include <sapi.h>
    #include <comutil.h>
    #include <string.h>
    
    #pragma comment(lib,"sapi.lib")
    #ifdef _UNICODE
    #pragma   comment(lib,   "comsuppw.lib")  //_com_util::ConvertBSTRToString
    #else
    #pragma   comment(lib,   "comsupp.lib")  //_com_util::ConvertBSTRToString
    #endif
    
    #define GID_CMD_GR 333333
    #define WM_RECOEVENT WM_USER+1
    
    LRESULT CALLBACK WndProc(HWND,UINT,WPARAM,LPARAM);
    
    TCHAR szAppName[] = TEXT("语音控制Demo");
    BOOL b_initSR;
    BOOL b_Cmd_Grammar;
    CComPtr<ISpRecoContext> m_cpRecoCtxt;//语音识别程序接口
    CComPtr<ISpRecoGrammar> m_cpCmdGramma;//识别语法
    CComPtr<ISpRecognizer> m_cpRecoEngine; //语音识别引擎
    int speak(wchar_t *str);
    
    int WINAPI WinMain(HINSTANCE hInstance,HINSTANCE hPrevInstance,PSTR szCmdLine,int iCmdShow)
    {
    	HWND hwnd;
    	MSG msg;
    	WNDCLASS wndclass;
    
    	//窗口类结构体初始化值
    	wndclass.cbClsExtra = 0;
    	wndclass.cbWndExtra = 0;
    	wndclass.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH);
    	wndclass.hCursor = LoadCursor(NULL,IDC_ARROW);
    	wndclass.hIcon = LoadIcon(NULL,IDI_APPLICATION);
    	wndclass.hInstance = hInstance;
    	wndclass.lpfnWndProc = WndProc;
    	wndclass.lpszClassName = szAppName;
    	wndclass.lpszMenuName = NULL;
    	wndclass.style = CS_HREDRAW|CS_VREDRAW;
    
    	//注册窗口类
    	if(!RegisterClass(&wndclass))
    	{
    		//失败后提示并返回
    		MessageBox(NULL,TEXT("This program requires Windows NT!"),szAppName,MB_ICONERROR);
    		return 0;
    	}
    
    	//创建窗口
    	hwnd = CreateWindow(szAppName,
    						TEXT("语音识别"),
    						WS_OVERLAPPEDWINDOW,
    						CW_USEDEFAULT,
    						CW_USEDEFAULT,
    						CW_USEDEFAULT,
    						CW_USEDEFAULT,
    						NULL,
    						NULL,
    						hInstance,
    						NULL);
    	
    	//显示窗口
    	ShowWindow(hwnd,iCmdShow);
    	UpdateWindow(hwnd);
    
    	//进入消息循环
    	while(GetMessage(&msg,NULL,0,0))
    	{
    		TranslateMessage(&msg);//翻译消息
    		DispatchMessage(&msg);//分发消息
    	}
    	return msg.wParam;
    }
    
    /*
    *消息回调函数,由操作系统调用
    */
    LRESULT CALLBACK WndProc(HWND hwnd, UINT message,WPARAM wParam,LPARAM lParam)
    {
    	HDC hdc;
    	PAINTSTRUCT ps;
    
    	switch(message)
    	{
    	case WM_CREATE:
    		{	
    			//初始化COM端口
    			::CoInitializeEx(NULL,COINIT_APARTMENTTHREADED);
    			//创建识别上下文接口
    			HRESULT hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpSharedRecognizer);
    			if(SUCCEEDED(hr))
    			{
    				hr = m_cpRecoEngine->CreateRecoContext(&m_cpRecoCtxt);
    			}
    			else
    			{
    				MessageBox(hwnd,TEXT("引擎实例化出错"),TEXT("提示"),S_OK);
    			}
    			//设置识别消息,使计算机时刻监听语音消息
    			if(SUCCEEDED(hr))
    			{
    				hr = m_cpRecoCtxt->SetNotifyWindowMessage(hwnd,WM_RECOEVENT,0,0);
    			}
    			else
    			{
    				MessageBox(hwnd,TEXT("创建上下文接口出错"),TEXT("提示"),S_OK);
    			}
    			//设置我们感兴趣的事件
    			if(SUCCEEDED(hr))
    			{
    				ULONGLONG ullMyEvents = SPFEI(SPEI_SOUND_START)|SPFEI(SPEI_RECOGNITION)|SPFEI(SPEI_SOUND_END);
    				hr = m_cpRecoCtxt->SetInterest(ullMyEvents,ullMyEvents);
    			}
    			else
    			{
    				MessageBox(hwnd,TEXT("设置识别消息出错"),TEXT("提示"),S_OK);
    			}
    			//创建语法规则
    			b_Cmd_Grammar = TRUE;
    			if(FAILED(hr))
    			{
    				MessageBox(hwnd,TEXT("创建语法规则出错"),TEXT("提示"),S_OK);
    			}
    			hr = m_cpRecoCtxt->CreateGrammar(GID_CMD_GR,&m_cpCmdGramma);
    			hr = m_cpCmdGramma->LoadCmdFromFile(L"cmd.xml",SPLO_DYNAMIC);
    			if(FAILED(hr))
    			{
    				MessageBox(hwnd,TEXT("配置文件打开出错"),TEXT("提示"),S_OK);
    			}
    			b_initSR = TRUE;
    			//在开始识别时,激活语法进行识别
    			hr = m_cpCmdGramma->SetRuleState(NULL,NULL,SPRS_ACTIVE);
    			break;
    		}
    	case WM_RECOEVENT:
    		{
    			RECT rect;
    			GetClientRect(hwnd,&rect);
    			hdc = GetDC(hwnd);
    			USES_CONVERSION;
    			CSpEvent event;
    			while(event.GetFrom(m_cpRecoCtxt)==S_OK)
    			{
    				switch(event.eEventId)
    				{
    				case SPEI_RECOGNITION:
    					{
    						static const WCHAR wszUnrecognized[] = L"<Unrecognized>";
    						CSpDynamicString dstrText;
    						//取得消息结果
    						if(FAILED(event.RecoResult()->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL)))
    						{
    							dstrText = wszUnrecognized;
    						}
    						BSTR SRout;
    						dstrText.CopyToBSTR(&SRout);
    						char * lpszText2 = _com_util::ConvertBSTRToString(SRout);
    						if(b_Cmd_Grammar)
    						{
    							//MessageBoxA(0,lpszText2,"内容",0);
    							if(strcmp("腾讯QQ",lpszText2)==0)
    							{
    								//MessageBox(0,TEXT("计算机"),TEXT("内容"),0);
    								speak(L"好的");
    								//打开QQ.exe
    								ShellExecuteA(NULL,"open","D:\\QQ\\QQProtect\\Bin\\QQProtect.exe",0,0,1);
    							}
    							if(strcmp("确定",lpszText2)==0)
    							{
    								//按下回车键
    								keybd_event(VK_RETURN,0,0,0);
    								keybd_event(VK_RETURN,0,KEYEVENTF_KEYUP,0);
    							}
    							if(strcmp("音乐",lpszText2)==0)
    							{		
    								speak(L"好的");
    								//调用系统程序wmplayer.exe播放音乐
    								ShellExecuteA(NULL,"open","\"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe\"","C:\\Users\\KYT\\Desktop\\123.mp3",0,0);
    							}
    						}
    					}
    				}
    			}
    			break;
    		}
    	case WM_PAINT:
    		hdc = BeginPaint(hwnd,&ps);
    		EndPaint(hwnd,&ps);
    		break;
    	case WM_DESTROY:
    		PostQuitMessage(0);
    		break;
    	}
    	return DefWindowProc(hwnd,message,wParam,lParam);
    }
    
    #pragma comment(lib,"ole32.lib")//CoInitialize CoCreateInstance 需要调用ole32.dll
    
    /*
    *语音合成函数,朗读字符串str
    */
    int speak(wchar_t *str)
    {
    	ISpVoice * pVoice = NULL;
    	::CoInitialize(NULL);
    	//获得ISpVoice接口
    	long hr = CoCreateInstance(CLSID_SpVoice,NULL,CLSCTX_ALL,IID_ISpVoice,(void **)&pVoice);
    	hr = pVoice->Speak(str,0,NULL);
    	pVoice->Release();
    	pVoice = NULL;
    	//千万不要忘记
    	::CoUninitialize();
    	return TRUE;
    }

    只有源文件还不行,程序中还用到一个XML格式的语音配置文件cmd.xml。如果是利用编译器调试程序,cmd.xml需要和源代码同目录;发布程序以后,cmd.xml就得放到程序可执行文件所在的目录下了。总之,程序必须访问到cmd.xml文件,方便起见,可以在两个目录下都放一个cmd.xml文件。

    配置文件<cmd.xml>

    1. <?xml version="1.0" encoding="utf-8"?>  
    2. <GRAMMAR LANGID="804">  
    3.  <DEFINE>  
    4.     <ID NAME="CMD" VAL="10"/>  
    5.     </DEFINE>  
    6.   <RULE NAME="COMMAND" ID="CMD" TOPLEVEL="ACTIVE">  
    7.    <L>  
    8.     <P>腾讯QQ</P>  
    9.     <P>确定</P>  
    10.     <P>音乐</P>  
    11.     </L>  
    12.    </RULE>  
    13. </GRAMMAR>  
    <?xml version="1.0" encoding="utf-8"?>
    <GRAMMAR LANGID="804">
     <DEFINE>
        <ID NAME="CMD" VAL="10"/>
        </DEFINE>
      <RULE NAME="COMMAND" ID="CMD" TOPLEVEL="ACTIVE">
       <L>
        <P>腾讯QQ</P>
        <P>确定</P>
        <P>音乐</P>
        </L>
       </RULE>
    </GRAMMAR>

    到这里,一个简单的语音控制程序Demo就算完成了,你对它说:“腾迅QQ”,它就打开了一个QQ,你说:“确定”,默认的QQ账号就自己登陆了。你还可以说:“音乐”,于是一首曼妙的音乐响起(为什么看不见播放器呢,因为在代码
    1. ShellExecuteA(NULL,"open","\"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe\"","C:\\Users\\KYT\\Desktop\\123.mp3",0,0);  
    ShellExecuteA(NULL,"open","\"C:\\Program Files (x86)\\Windows Media Player\\wmplayer.exe\"","C:\\Users\\KYT\\Desktop\\123.mp3",0,0);

     设置了wmplayer.exe隐藏执行,想要关闭,嘻嘻,请打开任务管理器。另外,一定要设置你电脑上的音乐路径,不然你懂得)。 
     

    语音识别是不是挺有意思的,发挥你的想象,去震精别人吧。

    评论 1
    添加红包

    请填写红包祝福语或标题

    红包个数最小为10个

    红包金额最低5元

    当前余额3.43前往充值 >
    需支付:10.00
    成就一亿技术人!
    领取后你会自动成为博主和红包主的粉丝 规则
    hope_wisdom
    发出的红包
    实付
    使用余额支付
    点击重新获取
    扫码支付
    钱包余额 0

    抵扣说明:

    1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
    2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

    余额充值