成功调用Vosk后,我们验证了模型在板端运行的可行性。考虑到最终需要将该功能集成到实际项目中,我们随即展开了与真实项目的对接测试。
1.前期准备
1.放置vosk文件夹(内含include和lib文件)至项目资源文件夹内;
2.放置模型文件(model)和创建语音数据文件夹;
3.在项目.pro文件中添加路径,让项目能找到这些文件
//调用QT的媒体库 用于录音
QT += core gui widgets multimedia
//增加vosk模型的内部库文件
INCLUDEPATH += ...
../../vosk/include \
LIBS += ...
-L$$PWD/../../vosk/lib -lvosk
2.语音识别逻辑移植
创建wgt_vosk.c wgt_vosk.cpp文件,在.c文件中留好函数声明,当作接口使用
extern "C" {
#include "vosk_api.h"
}
QString audioRecognizer(QString fn);
在.cpp文件中写好识别逻辑
// 辅助函数:解析 Vosk JSON,提取 "text"
static QString parseVoskResult(const std::string &jsonStr)
{
QJsonParseError err;
QJsonDocument doc = QJsonDocument::fromJson(
QByteArray::fromStdString(jsonStr), &err);
if (err.error == QJsonParseError::NoError && doc.isObject()) {
QJsonObject obj = doc.object();
return obj.value("text").toString();
}
return QString();
}
QString audioRecognizer(QString fn){
QString failMessage = "FAILED";
//模型放置路径,具体按实际放置位置来
VoskModel *model = vosk_model_new("../../models/vosk-model-small-cn-0.22");
if (!model) {
return failMessage;
}
// 创建识别器,采样率 16000
VoskRecognizer *rec = vosk_recognizer_new(model, 16000.0);
// 打开 wav 文件
std::ifstream wav(fn.toStdString(), std::ios::binary);
if (!wav.is_open()) {
vosk_model_free(model);
return failMessage;
}
// 跳过 WAV 文件头 (44 字节)
wav.seekg(44);
std::vector<char> buffer(4000);
QString resultText;
while (wav.read(buffer.data(), buffer.size()) || wav.gcount() > 0) {
int len = wav.gcount();
if (vosk_recognizer_accept_waveform(rec, buffer.data(), len)) {
std::string json = vosk_recognizer_result(rec);
resultText += parseVoskResult(json) + "\n";
}
}
// Final result
std::string finalJson = vosk_recognizer_final_result(rec);
resultText += parseVoskResult(finalJson);
return resultText;
}
3.语音识别助手控件:功能识别的载体设计
为了在系统中实现语音识别的主动触发功能,需要设计一个交互控件。当前为测试基本功能,采用点击触发方式,暂未实现全局悬浮或语音唤醒等高级功能。
// wgt_voiceassistant.h
#ifndef _WGT_VOICEASSISTANT_H
#define _WGT_VOICEASSISTANT_H
#include <QWidget>
#include <QPushButton>
#include <QTextEdit>
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QAudioInput>
#include <QFile>
#include <QAudioFormat>
#include <QTimer>
#include <QPropertyAnimation>
class HtVoiceAssistant : public QWidget{
Q_OBJECT
public:
explicit HtVoiceAssistant(QWidget *parent = nullptr);
~HtVoiceAssistant();
private:
void setupUI(); //设置控件显示UI
void setupAudio(); //音频组件初始化
void showInputBox();
void hideInputBox();
void startRecording();
void stopRecording();
// 组件
QPushButton *voiceButton;
QWidget *inputBox;
QTextEdit *textEdit;
QHBoxLayout *mainLayout;
// 音频组件
QAudioInput *audioInput;
QFile *outputFile;
QAudioFormat *audioFormat;
QTimer *recordingTimer;
QTimer *simulationTimer;
// 动画
QPropertyAnimation *showAnimation;
QPropertyAnimation *hideAnimation;
bool isRecording;
int recordingDuration;// 录音时长(秒)
private slots:
void onVoiceButtonClicked();
void onRecordingTimeout();
};
#endif // _WGT_VOICEASSISTANT_H
操作流程如下:
- 控件初始化阶段调用
setupUI()和setupAudio(),分别完成界面控件初始化与音频组件准备 - 用户点击"语音助手"按钮触发
onVoiceButtonClicked() - 系统显示输入框(
showInputBox())并开始录音(startRecording()) - 再次点击按钮停止录音(
stopRecording()) - 语音内容识别接口接入,语音转文本
- 最终显示语音识别结果文本
HtVoiceAssistant::HtVoiceAssistant(QWidget *parent):
QWidget(parent),
isRecording(false),
recordingDuration(0)
{
setupUI();
setupAudio();
setupCommands();
}
void HtVoiceAssistant::setupUI(){
// 设置窗口属性
setWindowFlags(Qt::FramelessWindowHint | Qt::WindowStaysOnTopHint);
setAttribute(Qt::WA_TranslucentBackground);
setFixedSize(800,100);
mainLayout = new QHBoxLayout(this);
mainLayout->setContentsMargins(20, 20, 20, 20);
mainLayout->setSpacing(15);
//语音助手按钮
voiceButton = new QPushButton("语音助手", this);
voiceButton->setFixedSize(120,40);
voiceButton->setStyleSheet(
"QPushButton {"
" background-color: #4CAF50;"
" color: white;"
" border: none;"
" border-radius: 20px;"
" font-size: 14px;"
" font-weight: bold;"
"}"
"QPushButton:hover {"
" background-color: #45a049;"
"}"
"QPushButton:pressed {"
" background-color: #3d8b40;"
"}"
);
//输入框
inputBox = new QWidget(this);
inputBox->setFixedSize(600,60);
inputBox->setStyleSheet(
"background-color: white;"
"border-radius: 15px;"
"border: 2px solid #4CAF50;"
);
inputBox->setVisible(false);
//文本框
textEdit = new QTextEdit(inputBox);
textEdit->setGeometry(10,10,650,40);
textEdit->setStyleSheet(
"QTextEdit {"
" border: none;"
" background: transparent;"
" font-size: 14px;"
" padding: 10px;"
"}"
);
textEdit->setReadOnly(true);
//布局
mainLayout->addWidget(inputBox, 0, Qt::AlignVCenter);
mainLayout->addWidget(voiceButton, 0, Qt::AlignVCenter);
// 设置动画
showAnimation = new QPropertyAnimation(inputBox, "geometry", this);
hideAnimation = new QPropertyAnimation(inputBox, "geometry", this);
//点击后运行槽函数 执行后续录音
connect(voiceButton, &QPushButton::clicked, this, &HtVoiceAssistant::onVoiceButtonClicked);
}
void HtVoiceAssistant::setupAudio(){
//音频格式
audioFormat = new QAudioFormat();
audioFormat->setSampleRate(16000); // 采样率
audioFormat->setChannelCount(1); // 单声道
audioFormat->setSampleSize(16); // 16-bit
audioFormat->setCodec("audio/pcm"); // PCM 编码
audioFormat->setByteOrder(QAudioFormat::LittleEndian);
audioFormat->setSampleType(QAudioFormat::SignedInt);
//录音计时器
recordingTimer = new QTimer(this);
recordingTimer->setInterval(1000);
connect(recordingTimer, &QTimer::timeout, this, &HtVoiceAssistant::onRecordingTimeout);
}
void HtVoiceAssistant::onVoiceButtonClicked(){
if(!isRecording){
startRecording();
}else {
stopRecording();
}
}
点击触发开始录音功能(使用QT内置组件),再次点击结束录音并调用语音识别接口,随即启动音频处理流程
void HtVoiceAssistant::startRecording(){
isRecording = true;
recordingDuration = 0;
showInputBox();
// 更新按钮文本和样式
voiceButton->setText("停止录音");
voiceButton->setStyleSheet(
"QPushButton {"
" background-color: #f44336;"
" color: white;"
" border: none;"
" border-radius: 20px;"
" font-size: 14px;"
" font-weight: bold;"
"}"
"QPushButton:hover {"
" background-color: #da190b;"
"}"
);
textEdit->setText("您好,请大声说话!正在录音...");
//开始计时
recordingTimer->start();
}
void HtVoiceAssistant::stopRecording(){
isRecording = false;
recordingTimer->stop();
isRecord = 0;
// 更新按钮文本和样式
voiceButton->setText("语音助手");
voiceButton->setStyleSheet(
"QPushButton {"
" background-color: #4CAF50;"
" color: white;"
" border: none;"
" border-radius: 20px;"
" font-size: 14px;"
" font-weight: bold;"
"}"
"QPushButton:hover {"
" background-color: #45a049;"
"}"
);
textEdit->setText("录音结束,正在识别中...");
if (audioInput && outputFile) {
audioInput->stop();
outputFile->close();
// 更新 WAV 文件头
QFile file("./voskdemo/data/test.wav");
if (file.open(QIODevice::ReadWrite)) {
qint64 dataSize = file.size() - 44;
// RIFF 头
file.seek(0);
file.write("RIFF", 4);
quint32 fileSize = dataSize + 36;
file.write(reinterpret_cast<const char*>(&fileSize), 4);
file.write("WAVE", 4);
// fmt 块
file.write("fmt ", 4);
quint32 subchunk1Size = 16;
quint16 audioFormat = 1;
quint16 numChannels = 1;
quint32 sampleRate = 16000;
quint16 bitsPerSample = 16;
quint32 byteRate = sampleRate * numChannels * bitsPerSample / 8;
quint16 blockAlign = numChannels * bitsPerSample / 8;
file.write(reinterpret_cast<const char*>(&subchunk1Size), 4);
file.write(reinterpret_cast<const char*>(&audioFormat), 2);
file.write(reinterpret_cast<const char*>(&numChannels), 2);
file.write(reinterpret_cast<const char*>(&sampleRate), 4);
file.write(reinterpret_cast<const char*>(&byteRate), 4);
file.write(reinterpret_cast<const char*>(&blockAlign), 2);
file.write(reinterpret_cast<const char*>(&bitsPerSample), 2);
// data 块
file.write("data", 4);
file.write(reinterpret_cast<const char*>(&dataSize), 4);
file.close();
}
delete audioInput;
audioInput = nullptr;
delete outputFile;
outputFile = nullptr;
qDebug() << "录音结束,保存为 test.wav";
}
QString result = audioRecognizer("./voskdemo/data/test.wav");
textEdit->setText(result.remove(' '));
}
隐藏控件文本框函数即设置下属性即可:
void HtVoiceAssistant::showInputBox(){
inputBox->setVisible(true);
// 设置动画效果
QRect startRect = inputBox->geometry();
startRect.setHeight(0);
QRect endRect = inputBox->geometry();
showAnimation->setDuration(300);
showAnimation->setStartValue(startRect);
showAnimation->setEndValue(endRect);
showAnimation->start();
}
4.指令分析
识别内容后,系统将执行相应操作。以页面切换为例,当检测到"切换到…页面"的语音指令时,程序会提取关键词并完成跳转。为实现这一功能,需建立指令处理机制:首先需要设计标准化的指令数据结构,然后构建指令库并定义相应的操作规则。
typedef std::function<void(const QStringList&)> CommandHandler;
struct VoiceCommand{
QRegularExpression pattern;// 正则表达式模式
CommandHandler handler;// 处理函数
QString description;// 指令描述
};
我们提供API接口及注册规则:
class VoiceCommandManager : public QWidget{
Q_OBJECT
public:
explicit VoiceCommandManager(QWidget *parent = nullptr);
~VoiceCommandManager();
static VoiceCommandManager& getInstance();//单例模式接口
void registerCommand(const QString& pattern, CommandHandler handler, const QString& description = "");
bool executeCommand(const QString& text);
private:
QMap<QString, VoiceCommand> m_commands;
};
VoiceCommandManager& VoiceCommandManager::getInstance(){
static VoiceCommandManager instance;
return instance;
}
void VoiceCommandManager::registerCommand(const QString& pattern, CommandHandler handler, const QString& description){
VoiceCommand command;
command.pattern = QRegularExpression(pattern);
command.handler = handler;
command.description = description;
m_commands[pattern] = command;
}
bool VoiceCommandManager::executeCommand(const QString& text){
qDebug() << "进入指令处理逻辑";
for(auto it = m_commands.begin(); it != m_commands.end(); ++it){
QRegularExpressionMatch match = it.value().pattern.match(text);
if(match.hasMatch()){
QStringList captured;
for(int i = 1; i <= match.lastCapturedIndex(); ++i){
captured << match.captured(i);
}
qDebug() << "执行指令:" << it.key() << "参数:" << captured;
if (it.value().handler) {
it.value().handler(captured);
}
return true;
}
}
return false;
}
单例模式的接口设计确保了类对象实例始终指向同一资源库。语音助手初始化时会将所需规则注册入库。当识别到指令后,系统通过API查询资源库中的匹配规则,若找到对应规则则执行相应的指令处理函数。
void HtVoiceAssistant::setupCommands(){
auto& commandManager = VoiceCommandManager::getInstance();
commandManager.registerCommand(
"切换(?:到)?(.*?)页面",
[this](const QStringList& params) { handleSwitchPage(params);},
"切换页面 - 例如:'切换到首页'、'切换设置页面'"
);
}
// 应用时:
QString result = audioRecognizer("./voskdemo/data/test.wav");
textEdit->setText(result.remove(' '));
auto& commandManger = VoiceCommandManager::getInstance();
commandManger.executeCommand(result.remove(' '));
// 对应的指令处理函数:
void HtVoiceAssistant::handleSwitchPage(const QStringList& params){
if(params.isEmpty() || params[0].isEmpty()){
textEdit->setText("请指定要切换的页面名称");
return;
}
QString pageName = params[0];
textEdit->setText(QString("正在切换到【%1】页面").arg(pageName));
char_t pagePath[256];
// 从关键词-页面地址对应表中查询页面路径,此表需自制
strcpy(pagePath, path_databse_get2(pageName.toUtf8().constData()));
// 项目中的页面切换接口
gui_menu_id_change(pagePath);
}
通过哈希存储方式建立关键词与页面地址的映射关系,根据提取到的字段快速检索对应页面地址,从而实现高效页面切换功能。

被折叠的 条评论
为什么被折叠?



