1 main.py
import os
import json
import logging
import tempfile
import numpy as np
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse
from vosk import Model, KaldiRecognizer
from vosk_helper import AudioProcessor
# 初始化FastAPI应用
app = FastAPI(title="VOSK语音识别服务", version="1.1.0")
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("ASR-Service")
# 全局模型加载
@app.on_event("startup")
async def load_asr_model():
try:
model_path = os.getenv("VOSK_MODEL_PATH", "/app/models/vosk-model-cn")
if not os.path.exists(model_path):
raise FileNotFoundError(f"模型路径不存在: {model_path}")
app.state.model = Model(model_path)
app.state.audio_processor = AudioProcessor()
logger.info(f"成功加载VOSK模型,版本:{app.state.model.version}")
except Exception as e:
logger.error(f"模型加载失败: {str(e)}")
raise RuntimeError("服务初始化失败")
@app.post("/recognize", summary="语音识别接口", response_description="识别结果")
async def recognize_speech(
file: UploadFile = File(..., description="上传的音频文件(支持wav/mp3/amr格式)")
):
"""
语音识别处理接口:
- 支持格式:WAV/MP3/AMR
- 最大文件尺寸:25MB
- 返回:识别文本和置信度
"""
# 文件校验
if not file.content_type.startswith('audio/'):
logger.warning(f"非法文件类型: {file.content_type}")
raise HTTPException(400, detail="仅支持音频文件上传")
# 创建临时文件
suffix = os.path.splitext(file.filename)[1]
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
try:
content = await file.read()
# 文件大小限制25MB
if len(content) > 25 * 1024 * 1024:
raise HTTPException(413, "文件大小超过25MB限制")
tmp.write(content)
tmp_path = tmp.name
except Exception as e