webassembly009 transformers.js 网页端侧推理 whisper-web transcriber & useTranscriber

本文链接：https://blog.youkuaiyun.com/ResumeProject/article/details/145602162

worker.js

模型的整体推理结构仍与webassembly009 transformers.js 网页端侧推理 NLLB翻译模型类似。

worker.js 模型工厂类

/* eslint-disable camelcase */
import {
    pipeline, env } from "@xenova/transformers"; // 导入必要的模块

// 禁用本地模型，确保仅使用在线资源
env.allowLocalModels = false;

// 定义模型工厂类
class PipelineFactory {
   
    static task = null; // 静态变量：任务类型
    static model = null; // 静态变量：模型名称
    static quantized = null; // 静态变量：是否量化模型
    static instance = null; // 静态变量：模型实例

    constructor(tokenizer, model, quantized) {
   
        this.tokenizer = tokenizer;
        this.model = model;
        this.quantized = quantized;
    }

    // 获取单例模式的模型实例
    static async getInstance(progress_callback = null) {
   
        if (this.instance === null) {
   
            // pipeline函数：默认情况下，模型将从 Hugging Face Hub 下载并存储在 浏览器缓存 中，有关更多信息，请参见https://hugging-face.cn/docs/transformers.js/custom_usage。
            this.instance = pipeline(this.task, this.model, {
   
                quantized: this.quantized,
                progress_callback,

                // 对于中型模型，为了防止内存溢出，加载`no_attentions`版本
                revision: this.model.includes("/whisper-medium") ? "no_attentions" : "main"
            });
        }
        return this.instance;
    }
}

// 自动语音识别管道工厂类，继承自PipelineFactory
class AutomaticSpeechRecognitionPipelineFactory extends PipelineFactory {
   
    static task = "automatic-speech-recognition"; // 任务类型为自动语音识别
    static model = null;
    static quantized = null;
}

处理来自主线程的消息

// 监听消息事件，处理来自主线程的消息
self.addEventListener("message", async (event) => {
   
    const message = event.data;

    // 根据接收到的消息数据执行转录操作
    let transcript = await transcribe(
        message.audio,
        message.model,
        message.multilingual,
        message.quantized,
        message.subtask,
        message.language,
    );
    if (transcript === null) return;

    // 将转录结果发送回主线程
    self.postMessage({
   
        status: "complete",
        task: "automatic-speech-recognition",
        data: transcript,
    });
});

调用worker

// 转录函数，根据提供的音频和其他参数进行转录
const transcribe = async (
    audio,
    model,
    multilingual,
    quantized,
    subtask,
    language,
) => {
   
    const isDistilWhisper = model.startsWith("distil-whisper/"); // 判断是否是特定模型

    let modelName = model;
    if (!isDistilWhisper && !multilingual) {
   
        modelName += ".en"; // 如果不是多语言模型，则附加.en后缀
    }

    const p = AutomaticSpeechRecognitionPipelineFactory;
    if (p.model !== modelName || p.quantized !== quantized) {
   
        // 若模型或量化设置改变，则重新初始化
        p.model = modelName;
        p.quantized = quantized;

        if (p.instance !== null) {
   
            (await p.getInstance()).dispose();
            p.instance = null;
        }
    }

    // 加载转录器模型
    let transcriber = await p.getInstance(