基于AM67A的嵌入式语音识别开发(五)--QT项目集成和切换指令解析(精确查找)

成功调用Vosk后,我们验证了模型在板端运行的可行性。考虑到最终需要将该功能集成到实际项目中,我们随即展开了与真实项目的对接测试。

1.前期准备

1.放置vosk文件夹(内含includelib文件)至项目资源文件夹内;
2.放置模型文件(model)和创建语音数据文件夹;
3.在项目.pro文件中添加路径,让项目能找到这些文件

//调用QT的媒体库 用于录音
QT       += core gui widgets multimedia

//增加vosk模型的内部库文件
INCLUDEPATH += ...
						../../vosk/include \
LIBS += 	...
			-L$$PWD/../../vosk/lib -lvosk

2.语音识别逻辑移植

创建wgt_vosk.c wgt_vosk.cpp文件,在.c文件中留好函数声明,当作接口使用

extern "C" {
#include "vosk_api.h"
}

QString audioRecognizer(QString fn);

.cpp文件中写好识别逻辑

// 辅助函数:解析 Vosk JSON,提取 "text"
static QString parseVoskResult(const std::string &jsonStr)
{
    QJsonParseError err;
    QJsonDocument doc = QJsonDocument::fromJson(
        QByteArray::fromStdString(jsonStr), &err);

    if (err.error == QJsonParseError::NoError && doc.isObject()) {
        QJsonObject obj = doc.object();
        return obj.value("text").toString();
    }
    return QString();
}

QString audioRecognizer(QString fn){
    QString failMessage = "FAILED";
		//模型放置路径,具体按实际放置位置来
    VoskModel *model = vosk_model_new("../../models/vosk-model-small-cn-0.22");
    if (!model) {
        return failMessage;
    }

    // 创建识别器,采样率 16000
    VoskRecognizer *rec = vosk_recognizer_new(model, 16000.0);

    // 打开 wav 文件
    std::ifstream wav(fn.toStdString(), std::ios::binary);
    if (!wav.is_open()) {
        vosk_model_free(model);
        return failMessage;
    }

    // 跳过 WAV 文件头 (44 字节)
    wav.seekg(44);

    std::vector<char> buffer(4000);
    QString  resultText;

    while (wav.read(buffer.data(), buffer.size()) || wav.gcount() > 0) {
        int len = wav.gcount();
        if (vosk_recognizer_accept_waveform(rec, buffer.data(), len)) {
            std::string json = vosk_recognizer_result(rec);
            resultText += parseVoskResult(json) + "\n";
        }
    }

    // Final result
    std::string finalJson = vosk_recognizer_final_result(rec);
    resultText += parseVoskResult(finalJson);

    return resultText;

}

3.语音识别助手控件:功能识别的载体设计

为了在系统中实现语音识别的主动触发功能,需要设计一个交互控件。当前为测试基本功能,采用点击触发方式,暂未实现全局悬浮或语音唤醒等高级功能。

// wgt_voiceassistant.h

#ifndef _WGT_VOICEASSISTANT_H
#define _WGT_VOICEASSISTANT_H

#include <QWidget>
#include <QPushButton>
#include <QTextEdit>
#include <QVBoxLayout>
#include <QHBoxLayout>
#include <QAudioInput>
#include <QFile>
#include <QAudioFormat>
#include <QTimer>
#include <QPropertyAnimation>

class HtVoiceAssistant : public QWidget{
    Q_OBJECT
public:
    explicit HtVoiceAssistant(QWidget *parent = nullptr);
    ~HtVoiceAssistant();
private:
    void setupUI(); //设置控件显示UI
    void setupAudio(); //音频组件初始化
    void showInputBox();
    void hideInputBox();
    void startRecording();
    void stopRecording();

	// 组件
    QPushButton *voiceButton;
    QWidget *inputBox;
    QTextEdit *textEdit;
    QHBoxLayout *mainLayout;
	// 音频组件
    QAudioInput *audioInput;
    QFile *outputFile;
    QAudioFormat *audioFormat;

    QTimer *recordingTimer;
    QTimer *simulationTimer;

    // 动画
    QPropertyAnimation *showAnimation;
    QPropertyAnimation *hideAnimation;

    bool isRecording;
    int recordingDuration;// 录音时长(秒)

private slots:
    void onVoiceButtonClicked();
    void onRecordingTimeout();

};
#endif // _WGT_VOICEASSISTANT_H

操作流程如下:

  1. 控件初始化阶段调用setupUI()setupAudio(),分别完成界面控件初始化与音频组件准备
  2. 用户点击"语音助手"按钮触发onVoiceButtonClicked()
  3. 系统显示输入框(showInputBox())并开始录音(startRecording())
  4. 再次点击按钮停止录音(stopRecording())
  5. 语音内容识别接口接入,语音转文本
  6. 最终显示语音识别结果文本
HtVoiceAssistant::HtVoiceAssistant(QWidget *parent):
    QWidget(parent),
    isRecording(false),
    recordingDuration(0)
{
    setupUI();
    setupAudio();
    setupCommands();
}

void HtVoiceAssistant::setupUI(){
    // 设置窗口属性
    setWindowFlags(Qt::FramelessWindowHint | Qt::WindowStaysOnTopHint);
    setAttribute(Qt::WA_TranslucentBackground);
    setFixedSize(800,100);

    mainLayout = new QHBoxLayout(this);
    mainLayout->setContentsMargins(20, 20, 20, 20);
    mainLayout->setSpacing(15);

    //语音助手按钮
    voiceButton = new QPushButton("语音助手", this);
    voiceButton->setFixedSize(120,40);
    voiceButton->setStyleSheet(
            "QPushButton {"
                    "    background-color: #4CAF50;"
                    "    color: white;"
                    "    border: none;"
                    "    border-radius: 20px;"
                    "    font-size: 14px;"
                    "    font-weight: bold;"
                    "}"
                    "QPushButton:hover {"
                    "    background-color: #45a049;"
                    "}"
                    "QPushButton:pressed {"
                    "    background-color: #3d8b40;"
                    "}"
                );

    //输入框
    inputBox = new QWidget(this);
    inputBox->setFixedSize(600,60);
    inputBox->setStyleSheet(
                "background-color: white;"
                "border-radius: 15px;"
                 "border: 2px solid #4CAF50;"
             );
    inputBox->setVisible(false);

    //文本框
    textEdit = new QTextEdit(inputBox);
    textEdit->setGeometry(10,10,650,40);
    textEdit->setStyleSheet(
                "QTextEdit {"
                        "    border: none;"
                        "    background: transparent;"
                        "    font-size: 14px;"
                        "    padding: 10px;"
                        "}"
                );
    textEdit->setReadOnly(true);

    //布局
    mainLayout->addWidget(inputBox, 0, Qt::AlignVCenter);
    mainLayout->addWidget(voiceButton, 0, Qt::AlignVCenter);
    // 设置动画
    showAnimation = new QPropertyAnimation(inputBox, "geometry", this);
    hideAnimation = new QPropertyAnimation(inputBox, "geometry", this);

	//点击后运行槽函数 执行后续录音
    connect(voiceButton, &QPushButton::clicked, this, &HtVoiceAssistant::onVoiceButtonClicked);
}

void HtVoiceAssistant::setupAudio(){
    //音频格式
    audioFormat = new QAudioFormat();
    audioFormat->setSampleRate(16000);        // 采样率
    audioFormat->setChannelCount(1);          // 单声道
    audioFormat->setSampleSize(16);           // 16-bit
    audioFormat->setCodec("audio/pcm");       // PCM 编码
    audioFormat->setByteOrder(QAudioFormat::LittleEndian);
    audioFormat->setSampleType(QAudioFormat::SignedInt);

    //录音计时器
    recordingTimer = new QTimer(this);
    recordingTimer->setInterval(1000);
    connect(recordingTimer, &QTimer::timeout, this, &HtVoiceAssistant::onRecordingTimeout);
}

void HtVoiceAssistant::onVoiceButtonClicked(){

    if(!isRecording){
        startRecording();
    }else {
        stopRecording();
    }
}

点击触发开始录音功能(使用QT内置组件),再次点击结束录音并调用语音识别接口,随即启动音频处理流程

void HtVoiceAssistant::startRecording(){
    isRecording = true;
    recordingDuration = 0;

    showInputBox();

    // 更新按钮文本和样式
    voiceButton->setText("停止录音");
    voiceButton->setStyleSheet(
        "QPushButton {"
        "    background-color: #f44336;"
        "    color: white;"
        "    border: none;"
        "    border-radius: 20px;"
        "    font-size: 14px;"
        "    font-weight: bold;"
        "}"
        "QPushButton:hover {"
        "    background-color: #da190b;"
        "}"
    );

    textEdit->setText("您好,请大声说话!正在录音...");

    //开始计时
    recordingTimer->start();
}

void HtVoiceAssistant::stopRecording(){
    isRecording = false;
    recordingTimer->stop();
    isRecord = 0;

    // 更新按钮文本和样式
    voiceButton->setText("语音助手");
    voiceButton->setStyleSheet(
        "QPushButton {"
        "    background-color: #4CAF50;"
        "    color: white;"
        "    border: none;"
        "    border-radius: 20px;"
        "    font-size: 14px;"
        "    font-weight: bold;"
        "}"
        "QPushButton:hover {"
        "    background-color: #45a049;"
        "}"
    );
    textEdit->setText("录音结束,正在识别中...");

    if (audioInput && outputFile) {
        audioInput->stop();
        outputFile->close();

        // 更新 WAV 文件头
        QFile file("./voskdemo/data/test.wav");
        if (file.open(QIODevice::ReadWrite)) {
            qint64 dataSize = file.size() - 44;

            // RIFF 头
            file.seek(0);
            file.write("RIFF", 4);
            quint32 fileSize = dataSize + 36;
            file.write(reinterpret_cast<const char*>(&fileSize), 4);
            file.write("WAVE", 4);

            // fmt 块
            file.write("fmt ", 4);
            quint32 subchunk1Size = 16;
            quint16 audioFormat = 1;
            quint16 numChannels = 1;
            quint32 sampleRate = 16000;
            quint16 bitsPerSample = 16;
            quint32 byteRate = sampleRate * numChannels * bitsPerSample / 8;
            quint16 blockAlign = numChannels * bitsPerSample / 8;
            file.write(reinterpret_cast<const char*>(&subchunk1Size), 4);
            file.write(reinterpret_cast<const char*>(&audioFormat), 2);
            file.write(reinterpret_cast<const char*>(&numChannels), 2);
            file.write(reinterpret_cast<const char*>(&sampleRate), 4);
            file.write(reinterpret_cast<const char*>(&byteRate), 4);
            file.write(reinterpret_cast<const char*>(&blockAlign), 2);
            file.write(reinterpret_cast<const char*>(&bitsPerSample), 2);

            // data 块
            file.write("data", 4);
            file.write(reinterpret_cast<const char*>(&dataSize), 4);

            file.close();
        }

        delete audioInput;
        audioInput = nullptr;
        delete outputFile;
        outputFile = nullptr;

        qDebug() << "录音结束,保存为 test.wav";
    }
    QString result = audioRecognizer("./voskdemo/data/test.wav");
    textEdit->setText(result.remove(' '));
}

隐藏控件文本框函数即设置下属性即可:

void HtVoiceAssistant::showInputBox(){
    inputBox->setVisible(true);

    // 设置动画效果
    QRect startRect = inputBox->geometry();
    startRect.setHeight(0);
    QRect endRect = inputBox->geometry();

    showAnimation->setDuration(300);
    showAnimation->setStartValue(startRect);
    showAnimation->setEndValue(endRect);
    showAnimation->start();
}

4.指令分析

识别内容后,系统将执行相应操作。以页面切换为例,当检测到"切换到…页面"的语音指令时,程序会提取关键词并完成跳转。为实现这一功能,需建立指令处理机制:首先需要设计标准化的指令数据结构,然后构建指令库并定义相应的操作规则。

typedef std::function<void(const QStringList&)> CommandHandler;

struct VoiceCommand{
    QRegularExpression pattern;// 正则表达式模式
    CommandHandler handler;// 处理函数
    QString description;// 指令描述
};

我们提供API接口及注册规则:

class VoiceCommandManager : public QWidget{
    Q_OBJECT
public:
    explicit VoiceCommandManager(QWidget *parent = nullptr);
    ~VoiceCommandManager();

    static VoiceCommandManager& getInstance();//单例模式接口
    void registerCommand(const QString& pattern, CommandHandler handler, const QString& description = "");
    bool executeCommand(const QString& text);

private:
    QMap<QString, VoiceCommand> m_commands;
};

VoiceCommandManager& VoiceCommandManager::getInstance(){
    static VoiceCommandManager instance;
    return instance;
}

void VoiceCommandManager::registerCommand(const QString& pattern, CommandHandler handler, const QString& description){
    VoiceCommand command;
    command.pattern = QRegularExpression(pattern);
    command.handler = handler;
    command.description = description;

    m_commands[pattern] = command;
}

bool VoiceCommandManager::executeCommand(const QString& text){
    qDebug() << "进入指令处理逻辑";
    for(auto it = m_commands.begin(); it != m_commands.end(); ++it){
        QRegularExpressionMatch match = it.value().pattern.match(text);
        if(match.hasMatch()){
            QStringList captured;
            for(int i = 1; i <= match.lastCapturedIndex(); ++i){
                captured << match.captured(i);
            }
            qDebug() << "执行指令:" << it.key() << "参数:" << captured;
            if (it.value().handler) {
               it.value().handler(captured);
             }
             return true;
        }
    }
    return false;
}

单例模式的接口设计确保了类对象实例始终指向同一资源库。语音助手初始化时会将所需规则注册入库。当识别到指令后,系统通过API查询资源库中的匹配规则,若找到对应规则则执行相应的指令处理函数。

void HtVoiceAssistant::setupCommands(){
    auto& commandManager = VoiceCommandManager::getInstance();

    commandManager.registerCommand(
            "切换(?:到)?(.*?)页面",
            [this](const QStringList& params) { handleSwitchPage(params);},
            "切换页面 - 例如:'切换到首页'、'切换设置页面'"
    );
}

// 应用时:
 QString result = audioRecognizer("./voskdemo/data/test.wav");
 textEdit->setText(result.remove(' '));

 auto& commandManger = VoiceCommandManager::getInstance();
 commandManger.executeCommand(result.remove(' '));

// 对应的指令处理函数:
void HtVoiceAssistant::handleSwitchPage(const QStringList& params){
    if(params.isEmpty() || params[0].isEmpty()){
        textEdit->setText("请指定要切换的页面名称");
        return;
    }

    QString pageName = params[0];
    textEdit->setText(QString("正在切换到【%1】页面").arg(pageName));
    char_t pagePath[256];
    // 从关键词-页面地址对应表中查询页面路径,此表需自制
    strcpy(pagePath, path_databse_get2(pageName.toUtf8().constData()));
    // 项目中的页面切换接口
    gui_menu_id_change(pagePath);
}

通过哈希存储方式建立关键词与页面地址的映射关系,根据提取到的字段快速检索对应页面地址,从而实现高效页面切换功能。

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值