【记录Python使用PaddleSpeech报错】self._outputs[“result“] KeyError: ‘result‘ 附解决方法

部署运行你感兴趣的模型镜像

参考了Paddle官方提供的demo编写语音转文本程序:

import paddle
from paddlespeech.cli.asr import ASRExecutor

asr = ASRExecutor()
result = asr(audio_file="zh.wav")
print('ASR Result: \n{}'.format(result))

运行后直接报错:
self._outputs["result"] KeyError: 'result'
后面查阅了两天资料,踩了不少坑,直接说解决方案:

  • 不要使用太新的python版本,一开始我用的是python313,后面降级为python310
  • 直接在requirements.txt中指定依赖版本:
# Base dependencies
numpy==1.23.5
protobuf==3.20.0

# Core dependencies
paddlepaddle==2.4.2
paddlenlp==2.5.2
paddlespeech==1.4.1
paddleaudio==1.1.0
paddle-bfloat==0.1.7
paddle2onnx
paddlefsl==1.1.0
paddlesde==0.2.5
paddleslim==2.6.0
paddlespeech-feat==0.1.0
ppdiffusers
  • 初始化asr实例时指定模型名称:
import paddle
from paddlespeech.cli.asr import ASRExecutor

asr = ASRExecutor()
result = asr(
    audio_file="zh.wav",
    model='conformer_wenetspeech'  # 明确指定模型
)
print('ASR Result: \n{}'.format(result))

调整requirements.txt后,然后重新安装依赖:
pip install -r requirements.txt

您可能感兴趣的与本文相关的镜像

Python3.8

Python3.8

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

import os import sys import json import gc import time import concurrent.futures import traceback import numpy as np import librosa import torch import psutil import noisereduce as nr from typing import List, Dict, Tuple, Optional, Any from pydub import AudioSegment, effects from pydub.silence import split_on_silence from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from transformers import AutoModelForSequenceClassification, AutoTokenizer from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar, QGroupBox, QMessageBox, QListWidget, QSplitter, QTabWidget, QTableWidget, QTableWidgetItem, QHeaderView, QAction, QMenu, QToolBar, QComboBox, QSpinBox, QDialog, QDialogButtonBox) from PyQt5.QtCore import QThread, pyqtSignal, Qt from PyQt5.QtGui import QFont, QColor, QIcon from collections import deque import logging import shutil import subprocess import tempfile # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("DialectQA") # ====================== 工具函数 ====================== def check_ffmpeg_available() -> Tuple[bool, str]: """检查ffmpeg是否可用并返回检查结果和说明""" if not shutil.which("ffmpeg"): return False, "系统中未找到ffmpeg,请安装并添加到PATH" try: result = subprocess.run( ["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=3 ) if "ffmpeg version" in result.stdout: return True, "FFmpeg已正确安装并可用" return False, "FFmpeg可执行但返回异常输出" except (subprocess.TimeoutExpired, FileNotFoundError): return False, "FFmpeg执行失败" except Exception as e: return False, f"FFmpeg检查出错: {str(e)}" def is_gpu_available() -> bool: """检查GPU是否可用""" return torch.cuda.is_available() and torch.cuda.device_count() > 0 # ====================== 增强型资源监控器 ====================== class EnhancedResourceMonitor: def __init__(self): self.gpu_available = is_gpu_available() self.history_size = 60 # 保留60秒历史数据 self.cpu_history = deque(maxlen=self.history_size) self.gpu_history = deque(maxlen=self.history_size) self.last_check_time = time.time() def __del__(self): """析构时释放资源""" if self.gpu_available: torch.cuda.empty_cache() def memory_percent(self) -> Dict[str, float]: """获取当前内存使用百分比""" try: result = {"cpu": psutil.virtual_memory().percent} if self.gpu_available: allocated = torch.cuda.memory_allocated() / (1024 ** 3) reserved = torch.cuda.memory_reserved() / (1024 ** 3) total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) gpu_usage = (allocated + reserved) / total * 100 if total > 0 else 0 result["gpu"] = gpu_usage else: result["gpu"] = 0.0 current_time = time.time() if current_time - self.last_check_time >= 1.0: self.cpu_history.append(result["cpu"]) if self.gpu_available: self.gpu_history.append(result["gpu"]) self.last_check_time = current_time return result except Exception as e: logger.error(f"内存监控失败: {str(e)}") return {"cpu": 0, "gpu": 0} def get_usage_trend(self) -> Dict[str, float]: """获取内存使用趋势(移动平均值)""" if not self.cpu_history: return {"cpu": 0, "gpu": 0} cpu_avg = sum(self.cpu_history) / len(self.cpu_history) gpu_avg = sum(self.gpu_history) / len(self.gpu_history) if self.gpu_available and self.gpu_history else 0 return {"cpu": cpu_avg, "gpu": gpu_avg} def is_under_heavy_load(self, threshold: float = 85.0) -> bool: """检查系统是否处于高负载状态""" current = self.memory_percent() trend = self.get_usage_trend() return any([ current["cpu"] > threshold, current["gpu"] > threshold, trend["cpu"] > threshold, trend["gpu"] > threshold ]) # ====================== 方言处理器(增强版) ====================== class EnhancedDialectProcessor: KEYWORDS = { "opening": ("您好", "很高兴为您服务", "请问有什么可以帮您", "麻烦您喽", "请问搞哪样", "有咋个可以帮您", "多谢喽", "你好", "早上好", "下午好", "晚上好"), "closing": ("感谢来电", "祝您生活愉快", "再见", "搞归一喽", "麻烦您喽", "再见喽", "慢走喽", "谢谢", "拜拜"), "forbidden": ("不知道", "没办法", "你投诉吧", "随便你", "搞不成", "没得法", "随便你喽", "你投诉吧喽", "我不懂", "自己看"), "salutation": ("先生", "女士", "小姐", "老师", "师傅", "哥", "姐", "兄弟", "妹儿", "老板", "同志"), "reassurance": ("非常抱歉", "请不要着急", "我们会尽快处理", "理解您的心情", "实在对不住", "莫急哈", "马上帮您整", "理解您得很", "不好意思", "请您谅解", "我们会尽快解决") } # 扩展贵州方言到普通话的映射 _DIALECT_ITEMS = ( ("恼火得很", "非常生气"), ("鬼火戳", "很愤怒"), ("搞不成", "无法完成"), ("没得", "没有"), ("搞哪样嘛", "做什么呢"), ("归一喽", "完成了"), ("咋个", "怎么"), ("克哪点", "去哪里"), ("麻烦您喽", "麻烦您了"), ("多谢喽", "多谢了"), ("憨包", "傻瓜"), ("归一", "结束"), ("板扎", "很好"), ("鬼火冒", "非常生气"), ("背时", "倒霉"), ("吃豁皮", "占便宜"), ("扯拐", "出问题"), ("打脑壳", "头疼"), ("二天", "以后"), ("鬼火绿", "非常生气"), ("哈数", "规矩"), ("经事", "耐用"), ("抠脑壳", "思考"), ("拉稀摆带", "不靠谱"), ("马起脸", "板着脸"), ("哦豁", "哎呀"), ("皮坨", "拳头"), ("千翻", "顽皮"), ("日鼓鼓", "生气"), ("煞角", "结束"), ("舔肥", "巴结"), ("弯酸", "刁难"), ("歪得很", "凶"), ("悬掉掉", "危险"), ("妖艳儿", "炫耀"), ("渣渣", "垃圾") ) class TrieNode: __slots__ = ('children', 'is_end', 'value') def __init__(self): self.children = {} self.is_end = False self.value = "" # 类加载时直接构建Trie树 _trie_root = TrieNode() for dialect, standard in sorted(_DIALECT_ITEMS, key=lambda x: len(x[0]), reverse=True): node = _trie_root for char in dialect: if char not in node.children: node.children[char] = EnhancedDialectProcessor.TrieNode() node = node.children[char] node.is_end = True node.value = standard @classmethod def preprocess_text(cls, texts: List[str]) -> List[str]: """使用预构建的Trie树进行方言转换""" return [cls._process_single_text(text) for text in texts] @classmethod def _process_single_text(cls, text: str) -> str: """处理单个文本的核心逻辑""" result = [] i = 0 n = len(text) while i < n: node = cls._trie_root j = i last_match = None # 查找最长匹配 while j < n and text[j] in node.children: node = node.children[text[j]] j += 1 if node.is_end: last_match = (j, node.value) if last_match: end_index, replacement = last_match result.append(replacement) i = end_index else: result.append(text[i]) i += 1 return ''.join(result) # ====================== 系统配置管理器 ====================== class ConfigManager: __slots__ = ('config', 'dirty') _instance = None _DEFAULT_CONFIG = { "model_paths": { "asr": "D:/models/ASR-models/iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn", "sentiment": "D:/models/distilbert-base-multilingual-cased-sentiments-student" }, "sample_rate": 16000, "silence_thresh": -40, "min_silence_len": 1000, "max_concurrent": 1, "max_audio_duration": 3600, "enable_fp16": True, "enable_quantization": True, "max_sentiment_batch_size": 16 } def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance.dirty = False cls._instance.config = cls._DEFAULT_CONFIG.copy() cls._instance.load_config() return cls._instance def load_config(self): """加载配置文件""" try: if os.path.exists("config.json"): with open("config.json", "r", encoding="utf-8") as f: file_config = json.load(f) # 深度合并配置 for key, value in file_config.items(): if key in self.config and isinstance(self.config[key], dict) and isinstance(value, dict): self.config[key].update(value) else: self.config[key] = value except json.JSONDecodeError: logger.warning("配置文件格式错误,部分使用默认配置") except Exception as e: logger.error(f"加载配置失败: {str(e)},部分使用默认配置") def save_config(self, force=False): """延迟保存机制:仅当配置变化时保存""" if not force and not self.dirty: return try: with open("config.json", "w", encoding="utf-8") as f: json.dump(self.config, f, indent=2, ensure_ascii=False) self.dirty = False except Exception as e: logger.error(f"保存配置失败: {str(e)}") def get(self, key: str, default=None): return self.config.get(key, default) def set(self, key: str, value, immediate_save=False): self.config[key] = value self.dirty = True if immediate_save: self.save_config(force=True) def check_model_paths(self) -> Tuple[bool, List[str]]: errors = [] model_paths = self.get("model_paths", {}) for model_name, path in model_paths.items(): if not path: errors.append(f"{model_name}模型路径未设置") elif not os.path.exists(path): errors.append(f"{model_name}模型路径不存在: {path}") elif not os.path.isdir(path): errors.append(f"{model_name}模型路径不是有效的目录: {path}") return len(errors) == 0, errors def __del__(self): """析构时自动保存未持久化的更改""" if self.dirty: self.save_config(force=True) # ====================== 增强型音频处理器 ====================== class EnhancedAudioProcessor: SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a') MAX_SEGMENT_DURATION = 5 * 60 * 1000 # 5分钟分段限制 ENHANCEMENT_CONFIG = { 'noise_sample_duration': 0.5, # 噪声采样时长(秒) 'telephone_filter_range': (300, 3400), # 电话频段范围(Hz) 'compression_threshold': -25.0, # 压缩阈值(dBFS) 'compression_ratio': 3.0 # 压缩比 } def __init__(self): self._noise_profile = None self._sample_rate = ConfigManager().get("sample_rate", 16000) @staticmethod def check_dependencies(): try: # 尝试导入所需库 import librosa import noisereduce return True, "依赖检查通过" except ImportError as e: return False, f"缺少依赖库: {str(e)}" def process_audio(self, input_path: str, temp_dir: str) -> Optional[List[str]]: """处理音频文件并返回分段文件路径列表""" if not self._validate_input(input_path, temp_dir): return None try: # 使用临时目录处理音频 with tempfile.TemporaryDirectory() as process_dir: audio = self._load_audio(input_path) if audio is None: return None # 基础预处理 audio = self._basic_preprocessing(audio) # 音频增强处理 audio = self._enhance_audio(audio) # 分段并保存 return self._segment_audio(audio, input_path, temp_dir or process_dir) except Exception as e: logger.error(f"音频处理失败: {str(e)}", exc_info=True) return None def _validate_input(self, input_path: str, temp_dir: str) -> bool: """验证输入参数有效性""" ffmpeg_available, ffmpeg_msg = check_ffmpeg_available() if not ffmpeg_available: logger.error(f"ffmpeg错误: {ffmpeg_msg}") return False deps_ok, deps_msg = self.check_dependencies() if not deps_ok: logger.error(f"依赖错误: {deps_msg}") return False os.makedirs(temp_dir, exist_ok=True) ext = os.path.splitext(input_path)[1].lower() if ext not in self.SUPPORTED_FORMATS: logger.error(f"不支持的音频格式: {ext}") return False if not os.path.exists(input_path): logger.error(f"文件不存在: {input_path}") return False return True def _load_audio(self, input_path: str) -> Optional[AudioSegment]: """加载音频文件""" try: return AudioSegment.from_file(input_path) except Exception as e: logger.error(f"无法加载音频文件: {str(e)}") return None def _basic_preprocessing(self, audio: AudioSegment) -> AudioSegment: """基础预处理:统一采样率和通道数""" # 确保音频为单声道 if audio.channels > 1: audio = audio.set_channels(1) # 统一采样率 if audio.frame_rate != self._sample_rate: audio = audio.set_frame_rate(self._sample_rate) return audio def _enhance_audio(self, audio: AudioSegment) -> AudioSegment: """执行音频增强处理流水线""" self._analyze_noise_profile(audio) audio = self._extract_main_voice(audio) audio = self._enhance_telephone_quality(audio) return self._normalize_audio(audio) def _analyze_noise_profile(self, audio: AudioSegment): """分析噪声样本以创建噪声剖面""" try: samples = np.array(audio.get_array_of_samples()) sr = audio.frame_rate noise_duration = int(sr * self.ENHANCEMENT_CONFIG['noise_sample_duration']) self._noise_profile = samples[:min(noise_duration, len(samples))].astype(np.float32) except Exception as e: logger.warning(f"噪声分析失败: {str(e)}") self._noise_profile = None def _extract_main_voice(self, audio: AudioSegment) -> AudioSegment: """从音频中提取主要人声""" if self._noise_profile is None: logger.warning("无噪声样本可用,跳过说话人提取") return audio try: samples = np.array(audio.get_array_of_samples()) sr = audio.frame_rate reduced_noise = nr.reduce_noise( y=samples.astype(np.float32), sr=sr, y_noise=self._noise_profile, prop_decrease=0.8 ) return AudioSegment( reduced_noise.astype(np.int16).tobytes(), frame_rate=sr, sample_width=2, channels=1 ) except Exception as e: logger.warning(f"降噪处理失败: {str(e)}") return audio def _enhance_telephone_quality(self, audio: AudioSegment) -> AudioSegment: """增强电话语音质量(带通滤波)""" try: low, high = self.ENHANCEMENT_CONFIG['telephone_filter_range'] return audio.low_pass_filter(high).high_pass_filter(low) except Exception as e: logger.warning(f"电话质量增强失败: {str(e)}") return audio def _normalize_audio(self, audio: AudioSegment) -> AudioSegment: """音频归一化处理""" try: # 动态范围压缩 audio = effects.compress_dynamic_range( audio, threshold=self.ENHANCEMENT_CONFIG['compression_threshold'], ratio=self.ENHANCEMENT_CONFIG['compression_ratio'] ) # 标准化音量 return effects.normalize(audio) except Exception as e: logger.warning(f"音频标准化失败: {str(e)}") return audio def _segment_audio(self, audio: AudioSegment, input_path: str, output_dir: str) -> List[str]: """根据静音分割音频""" min_silence_len = ConfigManager().get("min_silence_len", 1000) silence_thresh = ConfigManager().get("silence_thresh", -40) try: segments = split_on_silence( audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh, keep_silence=500 ) # 确保分段不超过5分钟 merged_segments = [] current_segment = AudioSegment.silent(duration=0, frame_rate=self._sample_rate) for seg in segments: if len(current_segment) + len(seg) <= self.MAX_SEGMENT_DURATION: current_segment += seg else: merged_segments.append(current_segment) current_segment = seg if len(current_segment) > 0: merged_segments.append(current_segment) # 保存分段 output_files = [] base_name = os.path.splitext(os.path.basename(input_path))[0] for i, seg in enumerate(merged_segments): output_file = os.path.join(output_dir, f"{base_name}_segment_{i + 1}.wav") seg.export(output_file, format="wav") output_files.append(output_file) return output_files except Exception as e: logger.error(f"音频分割失败: {str(e)}") return [] # ====================== ASR处理器 ====================== class ASRProcessor: def __init__(self): self.config = ConfigManager() self._asr_pipeline = None self._gpu_available = is_gpu_available() self._initialize_pipeline() def _initialize_pipeline(self): """初始化ASR管道""" model_path = self.config.get("model_paths", {}).get("asr") if not model_path: logger.error("未配置ASR模型路径") return try: device = "gpu" if self._gpu_available else "cpu" self._asr_pipeline = pipeline( task=Tasks.auto_speech_recognition, model=model_path, device=device ) logger.info(f"ASR模型初始化完成,使用设备: {device}") except Exception as e: logger.error(f"ASR模型初始化失败: {str(e)}") self._asr_pipeline = None def transcribe(self, audio_path: str) -> Optional[str]: """转录单个音频文件""" if not self._asr_pipeline: logger.error("ASR管道未初始化") return None try: result = self._asr_pipeline(audio_path) return result.get('text', '') except Exception as e: logger.error(f"音频转录失败: {str(e)}") return None def batch_transcribe(self, audio_files: List[str]) -> List[Optional[str]]: """批量转录音频文件""" if not self._asr_pipeline: logger.error("ASR管道未初始化") return [None] * len(audio_files) results = [] for audio_file in audio_files: results.append(self.transcribe(audio_file)) # 转录后立即释放内存 torch.cuda.empty_cache() if self._gpu_available else gc.collect() return results # ====================== 情感分析器 ====================== class SentimentAnalyzer: def __init__(self): self.config = ConfigManager() self._tokenizer = None self._model = None self._gpu_available = is_gpu_available() self._initialize_model() def _initialize_model(self): """初始化情感分析模型""" model_path = self.config.get("model_paths", {}).get("sentiment") if not model_path: logger.error("未配置情感分析模型路径") return try: self._tokenizer = AutoTokenizer.from_pretrained(model_path) self._model = AutoModelForSequenceClassification.from_pretrained(model_path) if self._gpu_available: self._model = self._model.cuda() logger.info("情感分析模型初始化完成") except Exception as e: logger.error(f"情感分析模型初始化失败: {str(e)}") self._tokenizer = None self._model = None def analyze(self, texts: List[str]) -> List[Dict[str, float]]: """分析文本情感""" if not self._model or not self._tokenizer: logger.error("情感分析模型未初始化") return [{"positive": 0.0, "negative": 0.0, "neutral": 0.0}] * len(texts) try: # 分批处理 batch_size = self.config.get("max_sentiment_batch_size", 16) results = [] for i in range(0, len(texts), batch_size): batch = texts[i:i + batch_size] inputs = self._tokenizer( batch, padding=True, truncation=True, max_length=128, return_tensors="pt" ) if self._gpu_available: inputs = {k: v.cuda() for k, v in inputs.items()} with torch.no_grad(): outputs = self._model(**inputs) # 获取概率分布 probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy() # 转换为字典格式 for j in range(probs.shape[0]): results.append({ "negative": float(probs[j][0]), "neutral": float(probs[j][1]), "positive": float(probs[j][2]) }) return results except Exception as e: logger.error(f"情感分析失败: {str(e)}") return [{"positive": 0.0, "negative": 0.0, "neutral": 0.0}] * len(texts) # ====================== 核心处理线程 ====================== class ProcessingThread(QThread): progress = pyqtSignal(int, str) finished = pyqtSignal(dict) error = pyqtSignal(str) def __init__(self, audio_path: str): super().__init__() self.audio_path = audio_path self.resource_monitor = EnhancedResourceMonitor() self._stop_requested = False def run(self): """处理流程主函数""" try: # 1. 初始化配置 config = ConfigManager() ok, errors = config.check_model_paths() if not ok: self.error.emit(f"模型路径配置错误: {'; '.join(errors)}") return # 2. 创建临时目录 temp_dir = tempfile.mkdtemp(prefix="dialectqa_") self.progress.emit(10, "创建临时目录完成") # 3. 预处理音频 audio_processor = EnhancedAudioProcessor() segments = audio_processor.process_audio(self.audio_path, temp_dir) if not segments: self.error.emit("音频预处理失败") return self.progress.emit(30, f"音频预处理完成,生成{len(segments)}个分段") # 4. ASR转录 asr = ASRProcessor() transcripts = asr.batch_transcribe(segments) if not any(transcripts): self.error.emit("ASR转录失败") return self.progress.emit(50, f"转录完成,总计{len(''.join(transcripts))}字") # 5. 方言预处理 transcripts = EnhancedDialectProcessor.preprocess_text(transcripts) self.progress.emit(60, "方言转换完成") # 6. 情感分析 sentiment = SentimentAnalyzer() sentiments = sentiment.analyze(transcripts) self.progress.emit(80, "情感分析完成") # 7. 关键字检测 keywords_stats = self._analyze_keywords(transcripts) self.progress.emit(90, "关键字检测完成") # 8. 结果汇总 result = { "audio_path": self.audio_path, "segments": segments, "transcripts": transcripts, "sentiments": sentiments, "keywords": keywords_stats } # 9. 清理资源 gc.collect() if self._gpu_available: torch.cuda.empty_cache() self.finished.emit(result) self.progress.emit(100, "处理完成") except Exception as e: self.error.emit(f"处理失败: {str(e)}\n{traceback.format_exc()}") finally: # 延迟清理临时目录(实际应用中可能需要保留结果) pass def _analyze_keywords(self, transcripts: List[str]) -> Dict[str, int]: """分析关键字出现频率""" stats = {category: 0 for category in EnhancedDialectProcessor.KEYWORDS} full_text = "".join(transcripts) for category, keywords in EnhancedDialectProcessor.KEYWORDS.items(): for kw in keywords: stats[category] += full_text.count(kw) return stats def stop(self): """请求停止处理""" self._stop_requested = True self.terminate() # ====================== 主界面 ====================== class DialectQAAnalyzer(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("方言客服语音质量分析系统") self.setGeometry(100, 100, 1200, 800) self.setWindowIcon(QIcon("icon.png")) # 初始化状态 self.audio_path = "" self.processing_thread = None self.results = None self._init_ui() self.check_dependencies() self.show() def _init_ui(self): """初始化用户界面""" # 创建主布局 main_widget = QWidget(self) main_layout = QVBoxLayout(main_widget) # 创建选项卡 tab_widget = QTabWidget() main_layout.addWidget(tab_widget) # 创建输入选项卡 input_tab = QWidget() input_layout = QVBoxLayout(input_tab) tab_widget.addTab(input_tab, "输入") # 音频选择区域 audio_group = QGroupBox("音频文件") audio_layout = QHBoxLayout(audio_group) self.audio_path_edit = QLineEdit() self.audio_path_edit.setReadOnly(True) audio_layout.addWidget(self.audio_path_edit, 4) browse_btn = QPushButton("浏览...") browse_btn.clicked.connect(self.select_audio) audio_layout.addWidget(browse_btn, 1) input_layout.addWidget(audio_group) # 进度区域 progress_group = QGroupBox("处理进度") progress_layout = QVBoxLayout(progress_group) self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_text = QLabel("准备就绪") progress_layout.addWidget(self.progress_bar) progress_layout.addWidget(self.progress_text) input_layout.addWidget(progress_group) # 操作按钮 button_layout = QHBoxLayout() self.start_btn = QPushButton("开始分析") self.start_btn.clicked.connect(self.start_processing) self.start_btn.setEnabled(False) self.stop_btn = QPushButton("停止分析") self.stop_btn.clicked.connect(self.stop_processing) self.stop_btn.setEnabled(False) button_layout.addWidget(self.start_btn) button_layout.addWidget(self.stop_btn) input_layout.addLayout(button_layout) # 结果预览区域 preview_group = QGroupBox("预览") preview_layout = QVBoxLayout(preview_group) self.preview_text = QTextEdit() self.preview_text.setReadOnly(True) preview_layout.addWidget(self.preview_text) input_layout.addWidget(preview_group) # 结果选项卡 result_tab = QWidget() result_layout = QVBoxLayout(result_tab) tab_widget.addTab(result_tab, "详细结果") # 结果表格 result_group = QGroupBox("分析明细") result_layout = QVBoxLayout(result_group) self.results_table = QTableWidget() self.results_table.setColumnCount(5) self.results_table.setHorizontalHeaderLabels(["分段", "文本内容", "积极", "中性", "消极"]) self.results_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) result_layout.addWidget(self.results_table) result_layout.addWidget(result_group) # 关键字统计 keywords_group = QGroupBox("关键字统计") keywords_layout = QVBoxLayout(keywords_group) self.keywords_table = QTableWidget() self.keywords_table.setColumnCount(2) self.keywords_table.setHorizontalHeaderLabels(["类别", "出现次数"]) self.keywords_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) keywords_layout.addWidget(self.keywords_table) result_layout.addWidget(keywords_group) # 状态栏 self.statusBar().showMessage("就绪") # 设置中心控件 self.setCentralWidget(main_widget) def check_dependencies(self): """检查系统依赖""" # 检查GPU if not is_gpu_available(): self.statusBar().showMessage("警告: 未检测到GPU,将使用CPU模式运行", 10000) # 检查FFmpeg ffmpeg_ok, ffmpeg_msg = check_ffmpeg_available() if not ffmpeg_ok: QMessageBox.warning(self, "依赖缺失", ffmpeg_msg) # 检查模型路径 config = ConfigManager() ok, errors = config.check_model_paths() if not ok: QMessageBox.warning(self, "配置错误", "\n".join(errors)) def select_audio(self): """选择音频文件""" file_path, _ = QFileDialog.getOpenFileName( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.m4a)" ) if file_path: self.audio_path = file_path self.audio_path_edit.setText(file_path) self.start_btn.setEnabled(True) self.preview_text.setText(f"已选择文件: {file_path}") def start_processing(self): """开始处理音频""" if not self.audio_path: QMessageBox.warning(self, "错误", "请先选择音频文件") return # 禁用UI按钮 self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.preview_text.clear() # 创建处理线程 self.processing_thread = ProcessingThread(self.audio_path) self.processing_thread.progress.connect(self.update_progress) self.processing_thread.finished.connect(self.on_processing_finished) self.processing_thread.error.connect(self.on_processing_error) self.processing_thread.start() self.statusBar().showMessage("处理中...") def stop_processing(self): """停止处理""" if self.processing_thread and self.processing_thread.isRunning(): self.processing_thread.stop() self.stop_btn.setEnabled(False) self.statusBar().showMessage("已停止处理") def update_progress(self, value: int, message: str): """更新进度""" self.progress_bar.setValue(value) self.progress_text.setText(message) self.preview_text.append(message) def on_processing_finished(self, result: dict): """处理完成事件""" self.results = result self.stop_btn.setEnabled(False) self.start_btn.setEnabled(True) self.statusBar().showMessage("处理完成") # 更新结果表格 self.update_results_table() # 显示成功消息 QMessageBox.information(self, "完成", f"分析完成!\n音频时长: {self.calculate_audio_duration()}秒\n总字数: {len(''.join(result['transcripts']))}字") def on_processing_error(self, error: str): """处理错误事件""" self.stop_btn.setEnabled(False) self.start_btn.setEnabled(True) self.statusBar().showMessage("处理失败") # 显示错误详情 error_dialog = QDialog(self) error_dialog.setWindowTitle("处理错误") layout = QVBoxLayout() text_edit = QTextEdit() text_edit.setPlainText(error) text_edit.setReadOnly(True) layout.addWidget(text_edit) buttons = QDialogButtonBox(QDialogButtonBox.Ok) buttons.accepted.connect(error_dialog.accept) layout.addWidget(buttons) error_dialog.setLayout(layout) error_dialog.exec() def update_results_table(self): """更新结果表格""" if not self.results: return # 更新分段结果表格 segments = self.results.get("segments", []) transcripts = self.results.get("transcripts", []) sentiments = self.results.get("sentiments", []) self.results_table.setRowCount(len(segments)) for i in range(len(segments)): # 分段编号 self.results_table.setItem(i, 0, QTableWidgetItem(f"分段 {i + 1}")) # 文本内容 self.results_table.setItem(i, 1, QTableWidgetItem(transcripts[i])) # 情感分析结果 if i < len(sentiments): sentiment = sentiments[i] self.results_table.setItem(i, 2, QTableWidgetItem(f"{sentiment['positive'] * 100:.1f}%")) self.results_table.setItem(i, 3, QTableWidgetItem(f"{sentiment['neutral'] * 100:.1f}%")) self.results_table.setItem(i, 4, QTableWidgetItem(f"{sentiment['negative'] * 100:.1f}%")) # 更新关键字统计表格 keywords = self.results.get("keywords", {}) self.keywords_table.setRowCount(len(keywords)) for i, (category, count) in enumerate(keywords.items()): # 类别名称 self.keywords_table.setItem(i, 0, QTableWidgetItem(self._translate_category(category))) # 出现次数 self.keywords_table.setItem(i, 1, QTableWidgetItem(str(count))) # 根据次数设置颜色 if count > 0: for j in range(2): self.keywords_table.item(i, j).setBackground(QColor(255, 230, 230)) def _translate_category(self, category: str) -> str: """翻译关键字类别名称""" translations = { "opening": "开场白", "closing": "结束语", "forbidden": "禁用语", "salutation": "称呼语", "reassurance": "安抚语" } return translations.get(category, category) def calculate_audio_duration(self) -> float: """计算音频总时长(秒)""" if not self.audio_path or not os.path.exists(self.audio_path): return 0.0 try: audio = AudioSegment.from_file(self.audio_path) return len(audio) / 1000.0 # 转换为秒 except: return 0.0 # ====================== 主程序入口 ====================== @staticmethod def main(): # 启用高分屏支持 os.environ["QT_ENABLE_HIGHDPI_SCALING"] = "1" QApplication.setHighDpiScaleFactorRoundingPolicy(Qt.HighDpiScaleFactorRoundingPolicy.PassThrough) app = QApplication(sys.argv) app.setFont(QFont("Microsoft YaHei UI", 9)) # 设置默认字体 # 创建主窗口 window = DialectQAAnalyzer() window.show() # 检查资源 monitor = EnhancedResourceMonitor() if monitor.is_under_heavy_load(): QMessageBox.warning(window, "系统警告", "当前系统资源负载较高,性能可能受影响") # 运行应用 sys.exit(app.exec_()) if __name__ == "__main__": try: DialectQAAnalyzer.main() # 调用静态方法 except Exception as e: error_msg = f"致命错误: {str(e)}\n{traceback.format_exc()}" logger.critical(error_msg) # 创建临时错误报告 temp_file = os.path.join(os.getcwd(), "crash_report.txt") with open(temp_file, "w", encoding="utf-8") as f: f.write(error_msg) # 显示错误对话框 app = QApplication(sys.argv) msg_box = QMessageBox() msg_box.setIcon(QMessageBox.Critical) msg_box.setWindowTitle("系统崩溃") msg_box.setText("程序遇到致命错误,已终止运行") msg_box.setInformativeText(f"错误报告已保存到: {temp_file}") msg_box.exec() 运行以上代码时错先错误提示: 未解析的引用 'EnhancedDialectProcessor':164行
最新发布
09-09
全盘检索代码,是否存在错误,是否可执行,是否存在逻辑错误: import os import sys import re import json import gc import time import tempfile import concurrent.futures import difflib import threading import numpy as np import librosa import torch import psutil from typing import List, Dict, Tuple, Optional, Set from threading import Lock, Semaphore, RLock from datetime import datetime from pydub import AudioSegment from pydub.silence import split_on_silence from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from transformers import AutoModelForSequenceClassification, AutoTokenizer from torch.utils.data import TensorDataset, DataLoader from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar, QGroupBox, QMessageBox, QListWidget, QSplitter, QTabWidget, QTableWidget, QTableWidgetItem, QHeaderView, QAction, QMenu, QToolBar, QCheckBox, QComboBox, QSpinBox) from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer, QSize from PyQt5.QtGui import QFont, QTextCursor, QColor, QIcon # ====================== 资源监控器 ====================== class ResourceMonitor: """统一资源监控器(精简版)""" def __init__(self): self.gpu_available = torch.cuda.is_available() def memory_percent(self) -> float: """获取内存使用百分比""" try: if self.gpu_available: allocated = torch.cuda.memory_allocated() / (1024 ** 3) total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) return (allocated / total) * 100 if total > 0 else 0 else: return psutil.virtual_memory().percent except: return 0 # ====================== 方言配置中心(优化版) ====================== class DialectConfig: """集中管理方言配置,便于维护和扩展(带缓存)""" # 标准关键词 STANDARD_KEYWORDS = { "opening": ["您好", "很高兴为您服务", "请问有什么可以帮您"], "closing": ["感谢来电", "祝您生活愉快", "再见"], "forbidden": ["不知道", "没办法", "你投诉吧", "随便你"] } # 贵州方言关键词 GUIZHOU_KEYWORDS = { "opening": ["麻烦您喽", "请问搞哪样", "有咋个可以帮您", "多谢喽"], "closing": ["搞归一喽", "麻烦您喽", "再见喽", "慢走喽"], "forbidden": ["搞不成", "没得法", "随便你喽", "你投诉吧喽"] } # 方言到标准表达的映射 DIALECT_MAPPING = { "恼火得很": "非常生气", "鬼火戳": "很愤怒", "搞不成": "无法完成", "没得": "没有", "搞哪样嘛": "做什么呢", "归一喽": "完成了", "咋个": "怎么", "克哪点": "去哪里", "麻烦您喽": "麻烦您了", "多谢喽": "多谢了" } # 类属性缓存 _combined_keywords = None _compiled_opening = None _compiled_closing = None _hotwords = None _dialect_pattern = None @classmethod def get_combined_keywords(cls) -> Dict[str, List[str]]: """获取合并后的关键词集(带缓存)""" if cls._combined_keywords is None: cls._combined_keywords = { "opening": cls.STANDARD_KEYWORDS["opening"] + cls.GUIZHOU_KEYWORDS["opening"], "closing": cls.STANDARD_KEYWORDS["closing"] + cls.GUIZHOU_KEYWORDS["closing"], "forbidden": cls.STANDARD_KEYWORDS["forbidden"] + cls.GUIZHOU_KEYWORDS["forbidden"] } return cls._combined_keywords @classmethod def get_compiled_opening(cls) -> List[re.Pattern]: """获取预编译的开场关键词正则表达式(带缓存)""" if cls._compiled_opening is None: keywords = cls.get_combined_keywords()["opening"] cls._compiled_opening = [re.compile(re.escape(kw)) for kw in keywords] return cls._compiled_opening @classmethod def get_compiled_closing(cls) -> List[re.Pattern]: """获取预编译的结束关键词正则表达式(带缓存)""" if cls._compiled_closing is None: keywords = cls.get_combined_keywords()["closing"] cls._compiled_closing = [re.compile(re.escape(kw)) for kw in keywords] return cls._compiled_closing @classmethod def get_asr_hotwords(cls) -> List[str]: """获取ASR热词列表(带缓存)""" if cls._hotwords is None: combined = cls.get_combined_keywords() cls._hotwords = sorted(set( combined["opening"] + combined["closing"] )) return cls._hotwords @classmethod def preprocess_text(cls, texts: List[str]) -> List[str]: """将方言文本转换为标准表达(使用一次性替换)""" if cls._dialect_pattern is None: # 创建方言替换的正则表达式(一次性) keys = sorted(cls.DIALECT_MAPPING.keys(), key=len, reverse=True) pattern_str = "|".join(re.escape(key) for key in keys) cls._dialect_pattern = re.compile(pattern_str) def replace_match(match): return cls.DIALECT_MAPPING[match.group(0)] return [cls._dialect_pattern.sub(replace_match, text) for text in texts] # ====================== 系统配置管理器 ====================== class ConfigManager: """管理应用程序配置""" _instance = None def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._init_config() return cls._instance def _init_config(self): """初始化默认配置""" self.config = { "model_paths": { "asr": "./models/iic-speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn", "sentiment": "./models/IDEA-CCNL-Erlangshen-Roberta-110M-Sentiment" }, "sample_rate": 16000, "silence_thresh": -40, "min_silence_len": 1000, "max_concurrent": 1, "dialect_config": "guizhou" } self.load_config() def load_config(self): """从文件加载配置""" try: if os.path.exists("config.json"): with open("config.json", "r") as f: self.config.update(json.load(f)) except: pass def save_config(self): """保存配置到文件""" try: with open("config.json", "w") as f: json.dump(self.config, f, indent=2) except: pass def get(self, key: str, default=None): """获取配置值""" return self.config.get(key, default) def set(self, key: str, value): """设置配置值""" self.config[key] = value self.save_config() # ====================== 音频处理工具(优化版) ====================== class AudioProcessor: """处理音频转换和特征提取(避免重复加载)""" SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a') @staticmethod def convert_to_wav(input_path: str, temp_dir: str) -> Optional[List[str]]: """将音频转换为WAV格式(在静音处分割)""" try: os.makedirs(temp_dir, exist_ok=True) # 检查文件格式 if not any(input_path.lower().endswith(ext) for ext in AudioProcessor.SUPPORTED_FORMATS): raise ValueError(f"不支持的音频格式: {os.path.splitext(input_path)[1]}") if input_path.lower().endswith('.wav'): return [input_path] # 已经是WAV格式 # 检查ffmpeg是否可用 try: AudioSegment.converter = "ffmpeg" # 显式指定ffmpeg audio = AudioSegment.from_file(input_path) except FileNotFoundError: print("错误: 未找到ffmpeg,请安装并添加到环境变量") return None # 长音频分段(超过10分钟) if len(audio) > 10 * 60 * 1000: # 10分钟 return AudioProcessor._split_long_audio(audio, input_path, temp_dir) else: return AudioProcessor._convert_single_audio(audio, input_path, temp_dir) except Exception as e: print(f"格式转换失败: {str(e)}") return None @staticmethod def _split_long_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]: """分割长音频文件""" wav_paths = [] # 在静音处分割音频 chunks = split_on_silence( audio, min_silence_len=ConfigManager().get("min_silence_len", 1000), silence_thresh=ConfigManager().get("silence_thresh", -40), keep_silence=500 ) # 合并小片段,避免分段过多 merged_chunks = [] current_chunk = AudioSegment.empty() for chunk in chunks: if len(current_chunk) + len(chunk) < 5 * 60 * 1000: # 5分钟 current_chunk += chunk else: if len(current_chunk) > 0: merged_chunks.append(current_chunk) current_chunk = chunk if len(current_chunk) > 0: merged_chunks.append(current_chunk) # 导出分段音频 sample_rate = ConfigManager().get("sample_rate", 16000) for i, chunk in enumerate(merged_chunks): chunk = chunk.set_frame_rate(sample_rate).set_channels(1) chunk_path = os.path.join( temp_dir, f"{os.path.splitext(os.path.basename(input_path))[0]}_part{i + 1}.wav" ) chunk.export(chunk_path, format="wav") wav_paths.append(chunk_path) return wav_paths @staticmethod def _convert_single_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]: """转换单个短音频文件""" sample_rate = ConfigManager().get("sample_rate", 16000) audio = audio.set_frame_rate(sample_rate).set_channels(1) wav_path = os.path.join(temp_dir, os.path.splitext(os.path.basename(input_path))[0] + ".wav") audio.export(wav_path, format="wav") return [wav_path] @staticmethod def extract_features_from_audio(y: np.ndarray, sr: int) -> Dict[str, float]: """从已加载的音频数据中提取特征(避免重复加载)""" try: duration = librosa.get_duration(y=y, sr=sr) segment_length = 60 # 60秒分段 total_segments = max(1, int(np.ceil(duration / segment_length))) syllable_rates = [] volume_stabilities = [] for i in range(total_segments): start = i * segment_length end = min((i + 1) * segment_length, duration) y_segment = y[int(start * sr):int(end * sr)] if len(y_segment) == 0: continue # 语速计算 intervals = librosa.effects.split(y_segment, top_db=20) speech_duration = sum(end - start for start, end in intervals) / sr syllable_rate = len(intervals) / speech_duration if speech_duration > 0 else 0 syllable_rates.append(syllable_rate) # 音量稳定性 rms = librosa.feature.rms(y=y_segment)[0] if len(rms) > 0 and np.mean(rms) > 0: volume_stability = np.std(rms) / np.mean(rms) volume_stabilities.append(volume_stability) return { "duration": duration, "syllable_rate": round(np.mean(syllable_rates) if syllable_rates else 0, 2), "volume_stability": round(np.mean(volume_stabilities) if volume_stabilities else 0, 4) } except: return {"duration": 0, "syllable_rate": 0, "volume_stability": 0} # ====================== 模型加载器(优化版) ====================== class ModelLoader: """加载和管理AI模型(使用RLock)""" asr_pipeline = None sentiment_model = None sentiment_tokenizer = None model_lock = RLock() # 使用RLock代替Lock @classmethod def load_models(cls): """加载所有模型""" config = ConfigManager() # 加载ASR模型 if not cls.asr_pipeline: with cls.model_lock: if not cls.asr_pipeline: # 双重检查锁定 cls.load_asr_model(config.get("model_paths")["asr"]) # 加载情感分析模型 if not cls.sentiment_model: with cls.model_lock: if not cls.sentiment_model: # 双重检查锁定 cls.load_sentiment_model(config.get("model_paths")["sentiment"]) @classmethod def reload_models(cls): """重新加载模型(配置变更后)""" with cls.model_lock: cls.asr_pipeline = None cls.sentiment_model = None cls.sentiment_tokenizer = None gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() cls.load_models() @classmethod def load_asr_model(cls, model_path: str): """加载语音识别模型""" try: if not os.path.exists(model_path): raise FileNotFoundError(f"ASR模型路径不存在: {model_path}") asr_kwargs = {} if hasattr(torch, 'quantization'): asr_kwargs['quantize'] = 'int8' print("启用ASR模型量化") cls.asr_pipeline = pipeline( task=Tasks.auto_speech_recognition, model=model_path, device='cuda' if torch.cuda.is_available() else 'cpu', **asr_kwargs ) print("ASR模型加载完成") except Exception as e: print(f"加载ASR模型失败: {str(e)}") raise @classmethod def load_sentiment_model(cls, model_path: str): """加载情感分析模型""" try: if not os.path.exists(model_path): raise FileNotFoundError(f"情感分析模型路径不存在: {model_path}") cls.sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_path) cls.sentiment_tokenizer = AutoTokenizer.from_pretrained(model_path) if torch.cuda.is_available(): cls.sentiment_model = cls.sentiment_model.cuda() print("情感分析模型加载完成") except Exception as e: print(f"加载情感分析模型失败: {str(e)}") raise # ====================== 核心分析线程(优化版) ====================== class AnalysisThread(QThread): progress_updated = pyqtSignal(int, str, str) result_ready = pyqtSignal(dict) finished_all = pyqtSignal() error_occurred = pyqtSignal(str, str) memory_warning = pyqtSignal() resource_cleanup = pyqtSignal() def __init__(self, audio_paths: List[str], temp_dir: str = "temp_wav"): super().__init__() self.audio_paths = audio_paths self.temp_dir = temp_dir self.is_running = True self.current_file = "" self.max_concurrent = min( ConfigManager().get("max_concurrent", 1), self.get_max_concurrent_tasks() ) self.resource_monitor = ResourceMonitor() self.semaphore = Semaphore(self.max_concurrent) os.makedirs(temp_dir, exist_ok=True) def run(self): try: if not (ModelLoader.asr_pipeline and ModelLoader.sentiment_model): self.error_occurred.emit("模型未加载", "请等待模型加载完成后再开始分析") return self.progress_updated.emit(0, f"最大并行任务数: {self.max_concurrent}", "") # 使用线程池并行处理 with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_concurrent) as executor: # 创建任务 future_to_path = {} for path in self.audio_paths: if not self.is_running: break # 使用信号量控制并发 self.semaphore.acquire() batch_size = self.get_available_batch_size() future = executor.submit(self.analyze_audio, path, batch_size) future_to_path[future] = path future.add_done_callback(lambda f: self.semaphore.release()) # 处理完成的任务 for i, future in enumerate(concurrent.futures.as_completed(future_to_path)): if not self.is_running: break path = future_to_path[future] self.current_file = os.path.basename(path) # 内存检查 if self.check_memory_usage(): self.memory_warning.emit() self.is_running = False break try: result = future.result() if result: self.result_ready.emit(result) # 更新进度 progress = int((i + 1) / len(self.audio_paths) * 100) self.progress_updated.emit( progress, f"完成: {self.current_file} ({i + 1}/{len(self.audio_paths)})", self.current_file ) except Exception as e: result = { "file_name": self.current_file, "status": "error", "error": f"分析失败: {str(e)}" } self.result_ready.emit(result) # 分析完成后 if self.is_running: self.finished_all.emit() except Exception as e: self.error_occurred.emit("系统错误", str(e)) traceback.print_exc() finally: # 确保资源清理 self.resource_cleanup.emit() self.cleanup_resources() def analyze_audio(self, audio_path: str, batch_size: int) -> Dict: """分析单个音频文件(整合所有优化)""" result = { "file_name": os.path.basename(audio_path), "status": "processing" } wav_paths = [] try: # 1. 音频格式转换 wav_paths = AudioProcessor.convert_to_wav(audio_path, self.temp_dir) if not wav_paths: result["error"] = "格式转换失败(请检查ffmpeg是否安装)" result["status"] = "error" return result # 2. 提取音频特征(合并所有分段) audio_features = self._extract_audio_features(wav_paths) result.update(audio_features) result["duration_str"] = self._format_duration(audio_features["duration"]) # 3. 语音识别与处理 all_segments, full_text = self._process_asr_segments(wav_paths) # 4. 说话人区分(使用优化后的方法) agent_segments, customer_segments = self.identify_speakers(all_segments) # 5. 生成带说话人标签的文本 labeled_text = self._generate_labeled_text(all_segments, agent_segments, customer_segments) result["asr_text"] = labeled_text.strip() # 6. 文本分析(包含方言预处理) text_analysis = self._analyze_text(agent_segments, customer_segments, batch_size) result.update(text_analysis) # 7. 服务规范检查(使用方言适配的关键词) service_check = self._check_service_rules(agent_segments) result.update(service_check) # 8. 问题解决率(上下文关联) result["issue_resolved"] = self._check_issue_resolution(customer_segments, agent_segments) result["status"] = "success" except Exception as e: result["error"] = f"分析失败: {str(e)}" result["status"] = "error" finally: # 清理临时文件 self._cleanup_temp_files(wav_paths) # 显式内存清理 self.cleanup_resources() return result def identify_speakers(self, segments: List[Dict]) -> Tuple[List[Dict], List[Dict]]: """区分客服与客户(优化版:子串匹配+提前终止)""" if not segments: return [], [] # 获取预编译的正则表达式 opening_patterns = DialectConfig.get_compiled_opening() closing_patterns = DialectConfig.get_compiled_closing() agent_id = None found_by_opening = False found_by_closing = False # 策略1:在前3段中查找开场白关键词(提前终止) for seg in segments[:3]: text = seg["text"] # 检查是否包含任意开场关键词 for pattern in opening_patterns: if pattern.search(text): agent_id = seg["spk_id"] found_by_opening = True break # 找到即终止内层循环 if found_by_opening: break # 找到即终止外层循环 # 策略2:在后3段中查找结束语关键词(提前终止) if not found_by_opening: # 逆序遍历最后3段 for seg in reversed(segments[-3:] if len(segments) >= 3 else segments): text = seg["text"] # 检查是否包含任意结束关键词 for pattern in closing_patterns: if pattern.search(text): agent_id = seg["spk_id"] found_by_closing = True break # 找到即终止内层循环 if found_by_closing: break # 找到即终止外层循环 # 策略3:如果前两种策略未找到,使用说话频率最高的作为客服 if agent_id is None: spk_counts = {} for seg in segments: spk_id = seg["spk_id"] spk_counts[spk_id] = spk_counts.get(spk_id, 0) + 1 if spk_counts: agent_id = max(spk_counts, key=spk_counts.get) else: return [], [] # 如果没有有效的agent_id,返回空列表 # 使用集合存储agent的spk_id,提高查询效率 agent_spk_ids = {agent_id} return ( [seg for seg in segments if seg["spk_id"] in agent_spk_ids], [seg for seg in segments if seg["spk_id"] not in agent_spk_ids] ) def _analyze_text(self, agent_segments: List[Dict], customer_segments: List[Dict], batch_size: int) -> Dict: """文本情感分析(优化版:向量化批处理)""" def analyze_speaker(segments: List[Dict], speaker_type: str) -> Dict: if not segments: return { f"{speaker_type}_negative": 0.0, f"{speaker_type}_neutral": 1.0, f"{speaker_type}_positive": 0.0, f"{speaker_type}_emotions": "无" } # 方言预处理 - 使用优化的一次性替换 texts = [seg["text"] for seg in segments] processed_texts = DialectConfig.preprocess_text(texts) # 使用DataLoader进行批处理 with ModelLoader.model_lock: inputs = ModelLoader.sentiment_tokenizer( processed_texts, padding=True, truncation=True, max_length=128, return_tensors="pt" ) # 创建TensorDataset和DataLoader dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask']) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False) device = "cuda" if torch.cuda.is_available() else "cpu" sentiment_dist = [] emotions = [] # 批量处理 for batch in dataloader: input_ids, attention_mask = batch inputs = { 'input_ids': input_ids.to(device), 'attention_mask': attention_mask.to(device) } with torch.no_grad(): outputs = ModelLoader.sentiment_model(**inputs) batch_probs = torch.nn.functional.softmax(outputs.logits, dim=-1) sentiment_dist.append(batch_probs.cpu()) # 情绪识别(批量) emotion_keywords = ["愤怒", "生气", "鬼火", "不耐烦", "搞哪样嘛"] for text in processed_texts: if any(kw in text for kw in emotion_keywords): if any(kw in text for kw in ["愤怒", "生气", "鬼火"]): emotions.append("愤怒") elif any(kw in text for kw in ["不耐烦", "搞哪样嘛"]): emotions.append("不耐烦") # 合并结果 if sentiment_dist: all_probs = torch.cat(sentiment_dist, dim=0) avg_sentiment = torch.mean(all_probs, dim=0).tolist() else: avg_sentiment = [0.0, 1.0, 0.0] # 默认值 return { f"{speaker_type}_negative": round(avg_sentiment[0], 4), f"{speaker_type}_neutral": round(avg_sentiment[1], 4), f"{speaker_type}_positive": round(avg_sentiment[2], 4), f"{speaker_type}_emotions": ",".join(set(emotions)) if emotions else "无" } return { **analyze_speaker(agent_segments, "agent"), **analyze_speaker(customer_segments, "customer") } # ====================== 辅助方法 ====================== def get_available_batch_size(self) -> int: """根据GPU内存动态调整batch size(考虑并行)""" if not torch.cuda.is_available(): return 4 # CPU默认批次 total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) # GB per_task_mem = total_mem / self.max_concurrent # 修正批次大小逻辑:显存越少,批次越小 if per_task_mem < 2: return 2 elif per_task_mem < 4: return 4 else: return 8 def get_max_concurrent_tasks(self) -> int: """根据系统资源计算最大并行任务数""" if torch.cuda.is_available(): total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) if total_mem < 6: return 1 elif total_mem < 12: return 2 else: return 3 else: # CPU模式下根据核心数设置 return max(1, os.cpu_count() // 2) def check_memory_usage(self) -> bool: """检查内存使用(动态阈值)""" try: mem_percent = self.resource_monitor.memory_percent() return mem_percent > 85 # 超过85%则警告 except: return False def _extract_audio_features(self, wav_paths: List[str]) -> Dict[str, float]: """提取音频特征(合并所有分段)""" combined_y = np.array([], dtype=np.float32) sr = ConfigManager().get("sample_rate", 16000) for path in wav_paths: y, _ = librosa.load(path, sr=sr) combined_y = np.concatenate((combined_y, y)) return AudioProcessor.extract_features_from_audio(combined_y, sr) def _process_asr_segments(self, wav_paths: List[str]) -> Tuple[List[Dict], str]: """处理ASR分段""" segments = [] full_text = "" for path in wav_paths: result = ModelLoader.asr_pipeline( path, hotwords=DialectConfig.get_asr_hotwords(), output_dir=None ) for seg in result[0]["sentences"]: segments.append({ "start": seg["start"], "end": seg["end"], "text": seg["text"], "spk_id": seg.get("spk_id", "0") }) full_text += seg["text"] + " " return segments, full_text.strip() def _generate_labeled_text(self, all_segments: List[Dict], agent_segments: List[Dict], customer_segments: List[Dict]) -> str: """生成带说话人标签的文本""" agent_spk_id = agent_segments[0]["spk_id"] if agent_segments else None customer_spk_id = customer_segments[0]["spk_id"] if customer_segments else None labeled_text = [] for seg in all_segments: speaker = "客服" if seg["spk_id"] == agent_spk_id else "客户" labeled_text.append(f"[{speaker}]: {seg['text']}") return "\n".join(labeled_text) def _check_service_rules(self, agent_segments: List[Dict]) -> Dict: """检查服务规范""" forbidden_keywords = DialectConfig.get_combined_keywords()["forbidden"] found_forbidden = [] found_opening = False found_closing = False # 检查开场白(前3段) for seg in agent_segments[:3]: text = seg["text"] if any(kw in text for kw in DialectConfig.get_combined_keywords()["opening"]): found_opening = True break # 检查结束语(后3段) for seg in reversed(agent_segments[-3:] if len(agent_segments) >= 3 else agent_segments): text = seg["text"] if any(kw in text for kw in DialectConfig.get_combined_keywords()["closing"]): found_closing = True break # 检查禁用词 for seg in agent_segments: text = seg["text"] for kw in forbidden_keywords: if kw in text: found_forbidden.append(kw) break return { "opening_found": found_opening, "closing_found": found_closing, "forbidden_words": ", ".join(set(found_forbidden)) if found_forbidden else "无" } def _check_issue_resolution(self, customer_segments: List[Dict], agent_segments: List[Dict]) -> bool: """检查问题是否解决(上下文关联)""" # 简化实现:如果客户最后一段包含"谢谢"或"解决",则认为问题已解决 if customer_segments: last_customer_text = customer_segments[-1]["text"] resolution_keywords = ["谢谢", "解决", "可以", "好的", "明白了"] if any(kw in last_customer_text for kw in resolution_keywords): return True # 如果客服最后一段包含"还有什么问题"且客户没有回应 if agent_segments: last_agent_text = agent_segments[-1]["text"] if "还有什么问题" in last_agent_text: return True return False def _cleanup_temp_files(self, paths: List[str]): """清理临时文件""" for path in paths: try: if os.path.exists(path): os.remove(path) except: pass def _format_duration(self, seconds: float) -> str: """将秒转换为时分秒格式""" minutes, seconds = divmod(int(seconds), 60) hours, minutes = divmod(minutes, 60) return f"{hours:02d}:{minutes:02d}:{seconds:02d}" def cleanup_resources(self): """显式清理资源""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() def stop(self): """停止分析""" self.is_running = False # ====================== 模型加载线程 ====================== class ModelLoadThread(QThread): progress_updated = pyqtSignal(int, str) finished = pyqtSignal(bool, str) def run(self): try: # 检查模型路径 config = ConfigManager().get("model_paths") if not os.path.exists(config["asr"]): self.finished.emit(False, "ASR模型路径不存在") return if not os.path.exists(config["sentiment"]): self.finished.emit(False, "情感分析模型路径不存在") return self.progress_updated.emit(20, "加载语音识别模型...") ModelLoader.load_asr_model(config["asr"]) self.progress_updated.emit(60, "加载情感分析模型...") ModelLoader.load_sentiment_model(config["sentiment"]) self.progress_updated.emit(100, "模型加载完成") self.finished.emit(True, "模型加载成功。建议:可通过设置界面修改模型路径") except Exception as e: self.finished.emit(False, f"模型加载失败: {str(e)}。建议:检查模型路径是否正确,或重新下载模型文件") # ====================== GUI主界面 ====================== class MainWindow(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("贵州方言客服质检系统") self.setGeometry(100, 100, 1200, 800) self.setup_ui() self.setup_menu() self.analysis_thread = None self.model_load_thread = None self.temp_dir = "temp_wav" os.makedirs(self.temp_dir, exist_ok=True) def setup_ui(self): """设置用户界面""" # 主布局 main_widget = QWidget() main_layout = QVBoxLayout() main_widget.setLayout(main_layout) self.setCentralWidget(main_widget) # 工具栏 toolbar = QToolBar("主工具栏") toolbar.setIconSize(QSize(24, 24)) self.addToolBar(toolbar) # 添加文件按钮 add_file_action = QAction(QIcon("icons/add.png"), "添加文件", self) add_file_action.triggered.connect(self.add_files) toolbar.addAction(add_file_action) # 开始分析按钮 analyze_action = QAction(QIcon("icons/start.png"), "开始分析", self) analyze_action.triggered.connect(self.start_analysis) toolbar.addAction(analyze_action) # 停止按钮 stop_action = QAction(QIcon("icons/stop.png"), "停止分析", self) stop_action.triggered.connect(self.stop_analysis) toolbar.addAction(stop_action) # 设置按钮 settings_action = QAction(QIcon("icons/settings.png"), "设置", self) settings_action.triggered.connect(self.open_settings) toolbar.addAction(settings_action) # 分割布局 splitter = QSplitter(Qt.Horizontal) main_layout.addWidget(splitter) # 左侧文件列表 left_widget = QWidget() left_layout = QVBoxLayout() left_widget.setLayout(left_layout) file_list_label = QLabel("待分析文件列表") file_list_label.setFont(QFont("Arial", 12, QFont.Bold)) left_layout.addWidget(file_list_label) self.file_list = QListWidget() self.file_list.setSelectionMode(QListWidget.ExtendedSelection) left_layout.addWidget(self.file_list) # 右侧结果区域 right_widget = QWidget() right_layout = QVBoxLayout() right_widget.setLayout(right_layout) # 进度条 progress_label = QLabel("分析进度") progress_label.setFont(QFont("Arial", 12, QFont.Bold)) right_layout.addWidget(progress_label) self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_bar.setTextVisible(True) right_layout.addWidget(self.progress_bar) # 当前文件标签 self.current_file_label = QLabel("当前文件: 无") right_layout.addWidget(self.current_file_label) # 结果标签页 self.tab_widget = QTabWidget() right_layout.addWidget(self.tab_widget, 1) # 文本结果标签页 text_tab = QWidget() text_layout = QVBoxLayout() text_tab.setLayout(text_layout) self.text_result = QTextEdit() self.text_result.setReadOnly(True) text_layout.addWidget(self.text_result) self.tab_widget.addTab(text_tab, "文本结果") # 详细结果标签页 detail_tab = QWidget() detail_layout = QVBoxLayout() detail_tab.setLayout(detail_layout) self.result_table = QTableWidget() self.result_table.setColumnCount(10) self.result_table.setHorizontalHeaderLabels([ "文件名", "时长", "语速", "音量稳定性", "客服情感", "客户情感", "开场白", "结束语", "禁用词", "问题解决" ]) self.result_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) detail_layout.addWidget(self.result_table) self.tab_widget.addTab(detail_tab, "详细结果") # 添加左右部件到分割器 splitter.addWidget(left_widget) splitter.addWidget(right_widget) splitter.setSizes([300, 900]) def setup_menu(self): """设置菜单栏""" menu_bar = self.menuBar() # 文件菜单 file_menu = menu_bar.addMenu("文件") add_file_action = QAction("添加文件", self) add_file_action.triggered.connect(self.add_files) file_menu.addAction(add_file_action) export_action = QAction("导出结果", self) export_action.triggered.connect(self.export_results) file_menu.addAction(export_action) exit_action = QAction("退出", self) exit_action.triggered.connect(self.close) file_menu.addAction(exit_action) # 分析菜单 analysis_menu = menu_bar.addMenu("分析") start_action = QAction("开始分析", self) start_action.triggered.connect(self.start_analysis) analysis_menu.addAction(start_action) stop_action = QAction("停止分析", self) stop_action.triggered.connect(self.stop_analysis) analysis_menu.addAction(stop_action) # 设置菜单 settings_menu = menu_bar.addMenu("设置") config_action = QAction("系统配置", self) config_action.triggered.connect(self.open_settings) settings_menu.addAction(config_action) model_action = QAction("加载模型", self) model_action.triggered.connect(self.load_models) settings_menu.addAction(model_action) def add_files(self): """添加文件到分析列表""" files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.m4a)" ) if files: for file in files: self.file_list.addItem(file) def start_analysis(self): """开始分析""" if self.file_list.count() == 0: QMessageBox.warning(self, "警告", "请先添加要分析的音频文件") return if not (ModelLoader.asr_pipeline and ModelLoader.sentiment_model): QMessageBox.warning(self, "警告", "模型未加载,请先加载模型") return # 获取文件路径 audio_paths = [self.file_list.item(i).text() for i in range(self.file_list.count())] # 清空结果 self.text_result.clear() self.result_table.setRowCount(0) # 创建分析线程 self.analysis_thread = AnalysisThread(audio_paths, self.temp_dir) # 连接信号 self.analysis_thread.progress_updated.connect(self.update_progress) self.analysis_thread.result_ready.connect(self.handle_result) self.analysis_thread.finished_all.connect(self.analysis_finished) self.analysis_thread.error_occurred.connect(self.show_error) self.analysis_thread.memory_warning.connect(self.handle_memory_warning) self.analysis_thread.resource_cleanup.connect(self.cleanup_resources) # 启动线程 self.analysis_thread.start() def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() QMessageBox.information(self, "信息", "分析已停止") def load_models(self): """加载模型""" if self.model_load_thread and self.model_load_thread.isRunning(): return self.model_load_thread = ModelLoadThread() self.model_load_thread.progress_updated.connect( lambda value, msg: self.progress_bar.setValue(value) ) self.model_load_thread.finished.connect(self.handle_model_load_result) self.model_load_thread.start() def update_progress(self, progress: int, message: str, current_file: str): """更新进度""" self.progress_bar.setValue(progress) self.current_file_label.setText(f"当前文件: {current_file}") def handle_result(self, result: Dict): """处理分析结果""" # 添加到文本结果 self.text_result.append(f"文件: {result['file_name']}") self.text_result.append(f"状态: {result['status']}") if result["status"] == "success": self.text_result.append(f"时长: {result['duration_str']}") self.text_result.append(f"语速: {result['syllable_rate']} 音节/秒") self.text_result.append(f"音量稳定性: {result['volume_stability']}") self.text_result.append(f"客服情感: 负面({result['agent_negative']:.2%}) " f"中性({result['agent_neutral']:.2%}) " f"正面({result['agent_positive']:.2%})") self.text_result.append(f"客服情绪: {result['agent_emotions']}") self.text_result.append(f"客户情感: 负面({result['customer_negative']:.2%}) " f"中性({result['customer_neutral']:.2%}) " f"正面({result['customer_positive']:.2%})") self.text_result.append(f"客户情绪: {result['customer_emotions']}") self.text_result.append(f"开场白: {'有' if result['opening_found'] else '无'}") self.text_result.append(f"结束语: {'有' if result['closing_found'] else '无'}") self.text_result.append(f"禁用词: {result['forbidden_words']}") self.text_result.append(f"问题解决: {'是' if result['issue_resolved'] else '否'}") self.text_result.append("\n=== 对话文本 ===\n") self.text_result.append(result["asr_text"]) self.text_result.append("\n" + "=" * 50 + "\n") # 添加到结果表格 row = self.result_table.rowCount() self.result_table.insertRow(row) self.result_table.setItem(row, 0, QTableWidgetItem(result["file_name"])) self.result_table.setItem(row, 1, QTableWidgetItem(result["duration_str"])) self.result_table.setItem(row, 2, QTableWidgetItem(str(result["syllable_rate"]))) self.result_table.setItem(row, 3, QTableWidgetItem(str(result["volume_stability"]))) self.result_table.setItem(row, 4, QTableWidgetItem( f"负:{result['agent_negative']:.2f} 中:{result['agent_neutral']:.2f} 正:{result['agent_positive']:.2f}" )) self.result_table.setItem(row, 5, QTableWidgetItem( f"负:{result['customer_negative']:.2f} 中:{result['customer_neutral']:.2f} 正:{result['customer_positive']:.2f}" )) self.result_table.setItem(row, 6, QTableWidgetItem("是" if result["opening_found"] else "否")) self.result_table.setItem(row, 7, QTableWidgetItem("是" if result["closing_found"] else "否")) self.result_table.setItem(row, 8, QTableWidgetItem(result["forbidden_words"])) self.result_table.setItem(row, 9, QTableWidgetItem("是" if result["issue_resolved"] else "否")) # 根据结果着色 if not result["opening_found"]: self.result_table.item(row, 6).setBackground(QColor(255, 200, 200)) if not result["closing_found"]: self.result_table.item(row, 7).setBackground(QColor(255, 200, 200)) if result["forbidden_words"] != "无": self.result_table.item(row, 8).setBackground(QColor(255, 200, 200)) if not result["issue_resolved"]: self.result_table.item(row, 9).setBackground(QColor(255, 200, 200)) def analysis_finished(self): """分析完成""" QMessageBox.information(self, "完成", "所有音频分析完成") self.progress_bar.setValue(100) def show_error(self, title: str, message: str): """显示错误信息""" QMessageBox.critical(self, title, message) def handle_memory_warning(self): """处理内存警告""" QMessageBox.warning(self, "内存警告", "内存使用过高,分析已停止。请关闭其他应用程序后重试") def cleanup_resources(self): """清理资源""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() def handle_model_load_result(self, success: bool, message: str): """处理模型加载结果""" if success: QMessageBox.information(self, "成功", message) else: QMessageBox.critical(self, "错误", message) def open_settings(self): """打开设置对话框""" settings_dialog = QDialog(self) settings_dialog.setWindowTitle("系统设置") settings_dialog.setFixedSize(500, 400) layout = QVBoxLayout() # ASR模型路径 asr_layout = QHBoxLayout() asr_label = QLabel("ASR模型路径:") asr_line = QLineEdit(ConfigManager().get("model_paths")["asr"]) asr_browse = QPushButton("浏览...") def browse_asr(): path = QFileDialog.getExistingDirectory(self, "选择ASR模型目录") if path: asr_line.setText(path) asr_browse.clicked.connect(browse_asr) asr_layout.addWidget(asr_label) asr_layout.addWidget(asr_line) asr_layout.addWidget(asr_browse) layout.addLayout(asr_layout) # 情感分析模型路径 sentiment_layout = QHBoxLayout() sentiment_label = QLabel("情感模型路径:") sentiment_line = QLineEdit(ConfigManager().get("model_paths")["sentiment"]) sentiment_browse = QPushButton("浏览...") def browse_sentiment(): path = QFileDialog.getExistingDirectory(self, "选择情感模型目录") if path: sentiment_line.setText(path) sentiment_browse.clicked.connect(browse_sentiment) sentiment_layout.addWidget(sentiment_label) sentiment_layout.addWidget(sentiment_line) sentiment_layout.addWidget(sentiment_browse) layout.addLayout(sentiment_layout) # 并发设置 concurrent_layout = QHBoxLayout() concurrent_label = QLabel("最大并发任务:") concurrent_spin = QSpinBox() concurrent_spin.setRange(1, 8) concurrent_spin.setValue(ConfigManager().get("max_concurrent", 1)) concurrent_layout.addWidget(concurrent_label) concurrent_layout.addWidget(concurrent_spin) layout.addLayout(concurrent_layout) # 方言设置 dialect_layout = QHBoxLayout() dialect_label = QLabel("方言设置:") dialect_combo = QComboBox() dialect_combo.addItems(["标准普通话", "贵州方言"]) dialect_combo.setCurrentIndex(1 if ConfigManager().get("dialect_config") == "guizhou" else 0) dialect_layout.addWidget(dialect_label) dialect_layout.addWidget(dialect_combo) layout.addLayout(dialect_layout) # 按钮 button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) button_box.accepted.connect(settings_dialog.accept) button_box.rejected.connect(settings_dialog.reject) layout.addWidget(button_box) settings_dialog.setLayout(layout) if settings_dialog.exec_() == QDialog.Accepted: # 保存设置 ConfigManager().set("model_paths", { "asr": asr_line.text(), "sentiment": sentiment_line.text() }) ConfigManager().set("max_concurrent", concurrent_spin.value()) ConfigManager().set("dialect_config", "guizhou" if dialect_combo.currentIndex() == 1 else "standard") # 重新加载模型 ModelLoader.reload_models() def export_results(self): """导出结果""" if self.result_table.rowCount() == 0: QMessageBox.warning(self, "警告", "没有可导出的结果") return path, _ = QFileDialog.getSaveFileName( self, "保存结果", "", "CSV文件 (*.csv)" ) if path: try: with open(path, "w", encoding="utf-8") as f: # 写入表头 headers = [] for col in range(self.result_table.columnCount()): headers.append(self.result_table.horizontalHeaderItem(col).text()) f.write(",".join(headers) + "\n") # 写入数据 for row in range(self.result_table.rowCount()): row_data = [] for col in range(self.result_table.columnCount()): item = self.result_table.item(row, col) row_data.append(item.text() if item else "") f.write(",".join(row_data) + "\n") QMessageBox.information(self, "成功", f"结果已导出到: {path}") except Exception as e: QMessageBox.critical(self, "错误", f"导出失败: {str(e)}") def closeEvent(self, event): """关闭事件处理""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() # 清理临时目录 try: for file in os.listdir(self.temp_dir): os.remove(os.path.join(self.temp_dir, file)) os.rmdir(self.temp_dir) except: pass event.accept() # ====================== 程序入口 ====================== if __name__ == "__main__": torch.set_num_threads(4) # 限制CPU线程数 app = QApplication(sys.argv) # 设置应用样式 app.setStyle('Fusion') window = MainWindow() window.show() sys.exit(app.exec_())
08-02
对代码进行标准化以及压缩。减少不必要的计算及冗余,压缩内存及代码量,前提是不影响程序稳定以及精度:import os import sys import re import json import gc import time import tempfile import concurrent.futures import difflib import threading import traceback import numpy as np import librosa import torch import psutil import requests import hashlib import shutil from typing import List, Dict, Tuple, Optional, Set from threading import Lock, Semaphore, RLock from datetime import datetime from pydub import AudioSegment from pydub.silence import split_on_silence from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks from transformers import AutoModelForSequenceClassification, AutoTokenizer from torch.utils.data import TensorDataset, DataLoader from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar, QGroupBox, QMessageBox, QListWidget, QSplitter, QTabWidget, QTableWidget, QTableWidgetItem, QHeaderView, QAction, QMenu, QToolBar, QCheckBox, QComboBox, QSpinBox, QDialog, QDialogButtonBox, QStatusBar) from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer, QSize from PyQt5.QtGui import QFont, QTextCursor, QColor, QIcon # ====================== 资源监控器 ====================== class ResourceMonitor: """统一资源监控器(增强版)""" def __init__(self): self.gpu_available = torch.cuda.is_available() def memory_percent(self) -> Dict[str, float]: """获取内存使用百分比,同时返回CPU和GPU信息""" try: result = { "cpu": psutil.virtual_memory().percent } if self.gpu_available: allocated = torch.cuda.memory_allocated() / (1024 ** 3) total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) result["gpu"] = (allocated / total) * 100 if total > 0 else 0 return result except Exception as e: print(f"获取内存使用百分比失败: {str(e)}") return {"cpu": 0, "gpu": 0} # ====================== 方言配置中心(优化版) ====================== class DialectConfig: """集中管理方言配置,便于维护和扩展(带缓存)""" # 标准关键词 STANDARD_KEYWORDS = { "opening": ["您好", "很高兴为您服务", "请问有什么可以帮您"], "closing": ["感谢来电", "祝您生活愉快", "再见"], "forbidden": ["不知道", "没办法", "你投诉吧", "随便你"] } # 贵州方言关键词 GUIZHOU_KEYWORDS = { "opening": ["麻烦您喽", "请问搞哪样", "有咋个可以帮您", "多谢喽"], "closing": ["搞归一喽", "麻烦您喽", "再见喽", "慢走喽"], "forbidden": ["搞不成", "没得法", "随便你喽", "你投诉吧喽"] } # 方言到标准表达的映射(扩展更多贵州方言) DIALECT_MAPPING = { "恼火得很": "非常生气", "鬼火戳": "很愤怒", "搞不成": "无法完成", "没得": "没有", "搞哪样嘛": "做什么呢", "归一喽": "完成了", "咋个": "怎么", "克哪点": "去哪里", "麻烦您喽": "麻烦您了", "多谢喽": "多谢了", "憨包": "傻瓜", "归一": "结束", "板扎": "很好", "鬼火冒": "非常生气", "背时": "倒霉", "吃豁皮": "占便宜" } # 类属性缓存 _combined_keywords = None _compiled_opening = None _compiled_closing = None _hotwords = None _dialect_trie = None # 使用Trie树替换正则表达式 class TrieNode: """Trie树节点类""" def __init__(self): self.children = {} self.is_end = False self.value = "" @classmethod def _build_dialect_trie(cls): """构建方言Trie树""" root = cls.TrieNode() # 按长度降序添加关键词 for dialect, standard in sorted(cls.DIALECT_MAPPING.items(), key=lambda x: len(x[0]), reverse=True): node = root for char in dialect: if char not in node.children: node.children[char] = cls.TrieNode() node = node.children[char] node.is_end = True node.value = standard return root @classmethod def get_combined_keywords(cls) -> Dict[str, List[str]]: """获取合并后的关键词集(带缓存)""" if cls._combined_keywords is None: cls._combined_keywords = { "opening": cls.STANDARD_KEYWORDS["opening"] + cls.GUIZHOU_KEYWORDS["opening"], "closing": cls.STANDARD_KEYWORDS["closing"] + cls.GUIZHOU_KEYWORDS["closing"], "forbidden": cls.STANDARD_KEYWORDS["forbidden"] + cls.GUIZHOU_KEYWORDS["forbidden"] } return cls._combined_keywords @classmethod def get_compiled_opening(cls) -> List[re.Pattern]: """获取预编译的开场关键词正则表达式(带缓存)""" if cls._compiled_opening is None: keywords = cls.get_combined_keywords()["opening"] cls._compiled_opening = [re.compile(re.escape(kw)) for kw in keywords] return cls._compiled_opening @classmethod def get_compiled_closing(cls) -> List[re.Pattern]: """获取预编译的结束关键词正则表达式(带缓存)""" if cls._compiled_closing is None: keywords = cls.get_combined_keywords()["closing"] cls._compiled_closing = [re.compile(re.escape(kw)) for kw in keywords] return cls._compiled_closing @classmethod def get_asr_hotwords(cls) -> List[str]: """获取ASR热词列表(带缓存)""" if cls._hotwords is None: combined = cls.get_combined_keywords() cls._hotwords = sorted(set( combined["opening"] + combined["closing"] )) return cls._hotwords @classmethod def preprocess_text(cls, texts: List[str]) -> List[str]: """将方言文本转换为标准表达(使用Trie树优化)""" if cls._dialect_trie is None: cls._dialect_trie = cls._build_dialect_trie() processed_texts = [] for text in texts: # 使用Trie树进行高效替换 processed = [] i = 0 n = len(text) while i < n: node = cls._dialect_trie j = i found = False # 查找最长匹配 while j < n and text[j] in node.children: node = node.children[text[j]] j += 1 if node.is_end: processed.append(node.value) i = j found = True break if not found: processed.append(text[i]) i += 1 processed_texts.append(''.join(processed)) return processed_texts # ====================== 系统配置管理器 ====================== class ConfigManager: """管理应用程序配置""" _instance = None def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._init_config() return cls._instance def _init_config(self): """初始化默认配置""" self.config = { "model_paths": { "asr": "./models/iic-speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn", "sentiment": "./models/IDEA-CCNL-Erlangshen-Roberta-110M-Sentiment" }, "sample_rate": 16000, "silence_thresh": -40, "min_silence_len": 1000, "max_concurrent": 1, "dialect_config": "guizhou", "max_audio_duration": 3600 # 最大音频时长(秒) } self.load_config() def load_config(self): """从文件加载配置""" try: if os.path.exists("config.json"): with open("config.json", "r") as f: self.config.update(json.load(f)) except: pass def save_config(self): """保存配置到文件""" try: with open("config.json", "w") as f: json.dump(self.config, f, indent=2) except: pass def get(self, key: str, default=None): """获取配置值""" return self.config.get(key, default) def set(self, key: str, value): """设置配置值""" self.config[key] = value self.save_config() # ====================== 音频处理工具(优化版) ====================== class AudioProcessor: """处理音频转换和特征提取(避免重复加载)""" SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a') @staticmethod def convert_to_wav(input_path: str, temp_dir: str) -> Optional[List[str]]: """将音频转换为WAV格式(在静音处分割)""" try: os.makedirs(temp_dir, exist_ok=True) # 检查文件格式 if not any(input_path.lower().endswith(ext) for ext in AudioProcessor.SUPPORTED_FORMATS): raise ValueError(f"不支持的音频格式: {os.path.splitext(input_path)[1]}") if input_path.lower().endswith('.wav'): return [input_path] # 已经是WAV格式 # 检查ffmpeg是否可用 try: AudioSegment.converter = "ffmpeg" # 显式指定ffmpeg audio = AudioSegment.from_file(input_path) except FileNotFoundError: print("错误: 未找到ffmpeg,请安装并添加到环境变量") return None # 检查音频时长是否超过限制 max_duration = ConfigManager().get("max_audio_duration", 3600) * 1000 # 毫秒 if len(audio) > max_duration: return AudioProcessor._split_long_audio(audio, input_path, temp_dir) else: return AudioProcessor._convert_single_audio(audio, input_path, temp_dir) except Exception as e: print(f"格式转换失败: {str(e)}") return None @staticmethod def _split_long_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]: """分割长音频文件""" wav_paths = [] # 在静音处分割音频 chunks = split_on_silence( audio, min_silence_len=ConfigManager().get("min_silence_len", 1000), silence_thresh=ConfigManager().get("silence_thresh", -40), keep_silence=500 ) # 合并小片段,避免分段过多 merged_chunks = [] current_chunk = AudioSegment.empty() for chunk in chunks: if len(current_chunk) + len(chunk) < 5 * 60 * 1000: # 5分钟 current_chunk += chunk else: if len(current_chunk) > 0: merged_chunks.append(current_chunk) current_chunk = chunk if len(current_chunk) > 0: merged_chunks.append(current_chunk) # 导出分段音频 sample_rate = ConfigManager().get("sample_rate", 16000) for i, chunk in enumerate(merged_chunks): chunk = chunk.set_frame_rate(sample_rate).set_channels(1) chunk_path = os.path.join( temp_dir, f"{os.path.splitext(os.path.basename(input_path))[0]}_part{i + 1}.wav" ) chunk.export(chunk_path, format="wav") wav_paths.append(chunk_path) return wav_paths @staticmethod def _convert_single_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]: """转换单个短音频文件""" sample_rate = ConfigManager().get("sample_rate", 16000) audio = audio.set_frame_rate(sample_rate).set_channels(1) wav_path = os.path.join(temp_dir, os.path.splitext(os.path.basename(input_path))[0] + ".wav") audio.export(wav_path, format="wav") return [wav_path] @staticmethod def extract_features_from_audio(y: np.ndarray, sr: int) -> Dict[str, float]: """从音频数据中提取特征(流式处理优化)""" try: duration = librosa.get_duration(y=y, sr=sr) segment_length = 60 # 60秒分段 total_segments = max(1, int(np.ceil(duration / segment_length))) syllable_rates = [] volume_stabilities = [] total_samples = len(y) samples_per_segment = int(segment_length * sr) # 流式处理每个分段 for i in range(total_segments): start = i * samples_per_segment end = min((i + 1) * samples_per_segment, total_samples) y_segment = y[start:end] if len(y_segment) == 0: continue # 语速计算(使用VAD检测语音段) intervals = librosa.effects.split(y_segment, top_db=20) speech_samples = sum(end - start for start, end in intervals) speech_duration = speech_samples / sr if speech_duration > 0.1: syllable_rate = len(intervals) / speech_duration else: syllable_rate = 0 syllable_rates.append(syllable_rate) # 音量稳定性(使用RMS能量) rms = librosa.feature.rms(y=y_segment, frame_length=2048, hop_length=512)[0] if len(rms) > 0 and np.mean(rms) > 0: volume_stability = np.std(rms) / np.mean(rms) volume_stabilities.append(volume_stability) # 计算加权平均值(按时长加权) valid_syllable = [r for r in syllable_rates if r > 0] valid_volume = [v for v in volume_stabilities if v > 0] return { "duration": duration, "syllable_rate": round(np.mean(valid_syllable) if valid_syllable else 0, 2), "volume_stability": round(np.mean(valid_volume) if valid_volume else 0, 4) } except Exception as e: print(f"特征提取错误: {str(e)}") return {"duration": 0, "syllable_rate": 0, "volume_stability": 0} # ====================== 模型加载器(优化版) ====================== class ModelLoader: """加载和管理AI模型(使用RLock)""" asr_pipeline = None sentiment_model = None sentiment_tokenizer = None model_lock = RLock() # 使用RLock代替Lock models_loaded = False # 添加模型加载状态标志 @classmethod def load_models(cls): """加载所有模型""" config = ConfigManager() # 加载ASR模型 if not cls.asr_pipeline: with cls.model_lock: if not cls.asr_pipeline: # 双重检查锁定 cls.load_asr_model(config.get("model_paths")["asr"]) # 加载情感分析模型 if not cls.sentiment_model: with cls.model_lock: if not cls.sentiment_model: # 双重检查锁定 cls.load_sentiment_model(config.get("model_paths")["sentiment"]) cls.models_loaded = True @classmethod def reload_models(cls): """重新加载模型(配置变更后)""" with cls.model_lock: cls.asr_pipeline = None cls.sentiment_model = None cls.sentiment_tokenizer = None gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() cls.load_models() @classmethod def load_asr_model(cls, model_path: str): """加载语音识别模型""" try: if not os.path.exists(model_path): raise FileNotFoundError(f"ASR模型路径不存在: {model_path}") asr_kwargs = {} if hasattr(torch, 'quantization'): asr_kwargs['quantize'] = 'int8' print("启用ASR模型量化") cls.asr_pipeline = pipeline( task=Tasks.auto_speech_recognition, model=model_path, device='cuda' if torch.cuda.is_available() else 'cpu', **asr_kwargs ) print("ASR模型加载完成") except Exception as e: print(f"加载ASR模型失败: {str(e)}") raise @classmethod def load_sentiment_model(cls, model_path: str): """加载情感分析模型""" try: if not os.path.exists(model_path): raise FileNotFoundError(f"情感分析模型路径不存在: {model_path}") cls.sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_path) cls.sentiment_tokenizer = AutoTokenizer.from_pretrained(model_path) if torch.cuda.is_available(): cls.sentiment_model = cls.sentiment_model.cuda() print("情感分析模型加载完成") except Exception as e: print(f"加载情感分析模型失败: {str(e)}") raise # ====================== 核心分析线程(优化版) ====================== class AnalysisThread(QThread): progress_updated = pyqtSignal(int, str, str) result_ready = pyqtSignal(dict) finished_all = pyqtSignal() error_occurred = pyqtSignal(str, str) memory_warning = pyqtSignal() resource_cleanup = pyqtSignal() def __init__(self, audio_paths: List[str], temp_dir: str = "temp_wav"): super().__init__() self.audio_paths = audio_paths self.temp_dir = temp_dir self.is_running = True self.current_file = "" self.max_concurrent = min( ConfigManager().get("max_concurrent", 1), self.get_max_concurrent_tasks() ) self.resource_monitor = ResourceMonitor() self.semaphore = Semaphore(self.max_concurrent) os.makedirs(temp_dir, exist_ok=True) def run(self): try: if not ModelLoader.models_loaded: self.error_occurred.emit("模型未加载", "请等待模型加载完成后再开始分析") return self.progress_updated.emit(0, f"最大并行任务数: {self.max_concurrent}", "") # 使用线程池并行处理 with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_concurrent) as executor: # 创建任务 future_to_path = {} for path in self.audio_paths: if not self.is_running: break # 使用信号量控制并发 self.semaphore.acquire() batch_size = self.get_available_batch_size() future = executor.submit(self.analyze_audio, path, batch_size) future_to_path[future] = path future.add_done_callback(lambda f: self.semaphore.release()) # 处理完成的任务 for i, future in enumerate(concurrent.futures.as_completed(future_to_path)): if not self.is_running: break path = future_to_path[future] self.current_file = os.path.basename(path) # 内存检查 if self.check_memory_usage(): self.memory_warning.emit() self.is_running = False break try: result = future.result() if result: self.result_ready.emit(result) # 更新进度 progress = int((i + 1) / len(self.audio_paths) * 100) self.progress_updated.emit( progress, f"完成: {self.current_file} ({i + 1}/{len(self.audio_paths)})", self.current_file ) except Exception as e: result = { "file_name": self.current_file, "status": "error", "error": f"分析失败: {str(e)}" } self.result_ready.emit(result) # 分析完成后 if self.is_running: self.finished_all.emit() except Exception as e: self.error_occurred.emit("系统错误", str(e)) traceback.print_exc() finally: # 确保资源清理 self.resource_cleanup.emit() self.cleanup_resources() def analyze_audio(self, audio_path: str, batch_size: int) -> Dict: """分析单个音频文件(整合所有优化)""" result = { "file_name": os.path.basename(audio_path), "status": "processing" } wav_paths = [] try: # 1. 音频格式转换 wav_paths = AudioProcessor.convert_to_wav(audio_path, self.temp_dir) if not wav_paths: result["error"] = "格式转换失败(请检查ffmpeg是否安装)" result["status"] = "error" return result # 2. 提取音频特征(合并所有分段) audio_features = self._extract_audio_features(wav_paths) result.update(audio_features) result["duration_str"] = self._format_duration(audio_features["duration"]) # 3. 语音识别与处理(使用批处理优化) all_segments, full_text = self._process_asr_segments(wav_paths) # 4. 说话人区分(使用优化后的方法) agent_segments, customer_segments = self.identify_speakers(all_segments) # 5. 生成带说话人标签的文本 labeled_text = self._generate_labeled_text(all_segments, agent_segments, customer_segments) result["asr_text"] = labeled_text.strip() # 6. 文本分析(包含方言预处理) text_analysis = self._analyze_text(agent_segments, customer_segments, batch_size) result.update(text_analysis) # 7. 服务规范检查(使用方言适配的关键词) service_check = self._check_service_rules(agent_segments) result.update(service_check) # 8. 问题解决率(上下文关联) result["issue_resolved"] = self._check_issue_resolution(customer_segments, agent_segments) result["status"] = "success" except Exception as e: result["error"] = f"分析失败: {str(e)}" result["status"] = "error" finally: # 清理临时文件(使用优化后的清理方法self._cleanup_temp_files(wav_paths) # 显式内存清理 self.cleanup_resources() return result def identify_speakers(self, segments: List[Dict]) -> Tuple[List[Dict], List[Dict]]: """区分客服与客户(增强版)""" if not segments: return [], [] # 1. 基于关键词的识别 agent_id = self._identify_by_keywords(segments) # 2. 基于说话模式的识别(如果关键词识别失败) if agent_id is None and len(segments) >= 4: agent_id = self._identify_by_speech_patterns(segments) # 3. 使用说话频率最高的作为客服(最后手段) if agent_id is None: spk_counts = {} for seg in segments: spk_id = seg["spk_id"] spk_counts[spk_id] = spk_counts.get(spk_id, 0) + 1 agent_id = max(spk_counts, key=spk_counts.get) if spk_counts else None if agent_id is None: return [], [] # 使用集合存储agent的spk_id agent_spk_ids = {agent_id} return ( [seg for seg in segments if seg["spk_id"] in agent_spk_ids], [seg for seg in segments if seg["spk_id"] not in agent_spk_ids] ) def _identify_by_keywords(self, segments: List[Dict]) -> Optional[str]: """基于关键词识别客服""" opening_patterns = DialectConfig.get_compiled_opening() closing_patterns = DialectConfig.get_compiled_closing() # 策略1:在前3段中查找开场白关键词 for seg in segments[:3]: text = seg["text"] for pattern in opening_patterns: if pattern.search(text): return seg["spk_id"] # 策略2:在后3段中查找结束语关键词 for seg in reversed(segments[-3:] if len(segments) >= 3 else segments): text = seg["text"] for pattern in closing_patterns: if pattern.search(text): return seg["spk_id"] return None def _identify_by_speech_patterns(self, segments: List[Dict]) -> Optional[str]: """基于说话模式识别客服""" # 分析说话模式特征 speaker_features = {} for seg in segments: spk_id = seg["spk_id"] if spk_id not in speaker_features: speaker_features[spk_id] = { "total_duration": 0.0, "turn_count": 0, "question_count": 0 } features = speaker_features[spk_id] features["total_duration"] += (seg["end"] - seg["start"]) features["turn_count"] += 1 # 检测问题(包含疑问词) if any(q_word in seg["text"] for q_word in ["吗", "呢", "?", "?", "如何", "怎样"]): features["question_count"] += 1 # 客服通常说话时间更长、提问更多 if speaker_features: # 计算说话时间占比 max_duration = max(f["total_duration"] for f in speaker_features.values()) # 计算提问频率 question_rates = { spk_id: features["question_count"] / features["turn_count"] for spk_id, features in speaker_features.items() } # 综合评分 candidates = [] for spk_id, features in speaker_features.items(): score = ( 0.6 * (features["total_duration"] / max_duration) + 0.4 * question_rates[spk_id] ) candidates.append((spk_id, score)) # 返回得分最高的说话人 return max(candidates, key=lambda x: x[1])[0] return None def _analyze_text(self, agent_segments: List[Dict], customer_segments: List[Dict], batch_size: int) -> Dict: """文本情感分析(优化版:向量化批处理)""" def analyze_speaker(segments: List[Dict], speaker_type: str) -> Dict: if not segments: return { f"{speaker_type}_negative": 0.0, f"{speaker_type}_neutral": 1.0, f"{speaker_type}_positive": 0.0, f"{speaker_type}_emotions": "无" } # 方言预处理 - 使用优化的一次性替换 texts = [seg["text"] for seg in segments] processed_texts = DialectConfig.preprocess_text(texts) # 使用DataLoader进行批处理 with ModelLoader.model_lock: inputs = ModelLoader.sentiment_tokenizer( processed_texts, padding=True, truncation=True, max_length=128, return_tensors="pt" ) # 创建TensorDataset和DataLoader dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask']) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False) device = "cuda" if torch.cuda.is_available() else "cpu" sentiment_dist = [] emotions = [] # 批量处理 for batch in dataloader: input_ids, attention_mask = batch inputs = { 'input_ids': input_ids.to(device), 'attention_mask': attention_mask.to(device) } with torch.no_grad(): outputs = ModelLoader.sentiment_model(**inputs) batch_probs = torch.nn.functional.softmax(outputs.logits, dim=-1) sentiment_dist.append(batch_probs.cpu()) # 情绪识别(批量) emotion_keywords = ["愤怒", "生气", "鬼火", "不耐烦", "搞哪样嘛", "恼火", "背时"] for text in processed_texts: if any(kw in text for kw in emotion_keywords): if any(kw in text for kw in ["愤怒", "生气", "鬼火", "恼火"]): emotions.append("愤怒") elif any(kw in text for kw in ["不耐烦", "搞哪样嘛"]): emotions.append("不耐烦") elif "背时" in text: emotions.append("沮丧") # 合并结果 if sentiment_dist: all_probs = torch.cat(sentiment_dist, dim=0) avg_sentiment = torch.mean(all_probs, dim=0).tolist() else: avg_sentiment = [0.0, 1.0, 0.0] # 默认值 return { f"{speaker_type}_negative": round(avg_sentiment[0], 4), f"{speaker_type}_neutral": round(avg_sentiment[1], 4), f"{speaker_type}_positive": round(avg_sentiment[2], 4), f"{speaker_type}_emotions": ",".join(set(emotions)) if emotions else "无" } return { **analyze_speaker(agent_segments, "agent"), **analyze_speaker(customer_segments, "customer") } # ====================== 辅助方法 ====================== def get_available_batch_size(self) -> int: """根据GPU内存动态调整batch size(考虑并行)""" if not torch.cuda.is_available(): return 4 # CPU默认批次 total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) # GB per_task_mem = total_mem / self.max_concurrent # 修正批次大小逻辑:显存越少,批次越小 if per_task_mem < 2: return 2 elif per_task_mem < 4: return 4 else: return 8 def get_max_concurrent_tasks(self) -> int: """根据系统资源计算最大并行任务数""" if torch.cuda.is_available(): total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) if total_mem < 6: return 1 elif total_mem < 12: return 2 else: return 3 else: # CPU模式下根据核心数设置 return max(1, os.cpu_count() // 2) def check_memory_usage(self) -> bool: try: mem_percent = self.resource_monitor.memory_percent() return mem_percent.get("cpu", 0) > 85 or mem_percent.get("gpu", 0) > 85 except: return False def _extract_audio_features(self, wav_paths: List[str]) -> Dict[str, float]: """提取音频特征(合并所有分段)""" combined_y = np.array([], dtype=np.float32) sr = ConfigManager().get("sample_rate", 16000) for path in wav_paths: y, _ = librosa.load(path, sr=sr) combined_y = np.concatenate((combined_y, y)) return AudioProcessor.extract_features_from_audio(combined_y, sr) def _process_asr_segments(self, wav_paths: List[str]) -> Tuple[List[Dict], str]: """处理ASR分段(批处理优化)""" segments = [] full_text = "" # 分批处理(根据GPU内存动态调整批次大小) batch_size = min(4, len(wav_paths), self.get_available_batch_size()) for i in range(0, len(wav_paths), batch_size): if not self.is_running: break batch_paths = wav_paths[i:i + batch_size] try: # 批处理调用ASR模型 results = ModelLoader.asr_pipeline( batch_paths, hotwords=DialectConfig.get_asr_hotwords(), output_dir=None, batch_size=batch_size ) for result in results: for seg in result[0]["sentences"]: segments.append({ "start": seg["start"], "end": seg["end"], "text": seg["text"], "spk_id": seg.get("spk_id", "0") }) full_text += seg["text"] + " " except Exception as e: print(f"ASR批处理错误: {str(e)}") # 失败时回退到单文件处理 for path in batch_paths: try: result = ModelLoader.asr_pipeline( path, hotwords=DialectConfig.get_asr_hotwords(), output_dir=None ) for seg in result[0]["sentences"]: segments.append({ "start": seg["start"], "end": seg["end"], "text": seg["text"], "spk_id": seg.get("spk_id", "0") }) full_text += seg["text"] + " " except: continue return segments, full_text.strip() def _generate_labeled_text(self, all_segments: List[Dict], agent_segments: List[Dict], customer_segments: List[Dict]) -> str: """生成带说话人标签的文本""" agent_spk_id = agent_segments[0]["spk_id"] if agent_segments else None customer_spk_id = customer_segments[0]["spk_id"] if customer_segments else None labeled_text = [] for seg in all_segments: if seg["spk_id"] == agent_spk_id: speaker = "客服" elif seg["spk_id"] == customer_spk_id: speaker = "客户" else: speaker = f"说话人{seg['spk_id']}" labeled_text.append(f"[{speaker}]: {seg['text']}") return "\n".join(labeled_text) def _check_service_rules(self, agent_segments: List[Dict]) -> Dict: """检查服务规范""" forbidden_keywords = DialectConfig.get_combined_keywords()["forbidden"] found_forbidden = [] found_opening = False found_closing = False # 检查开场白(前3段) for seg in agent_segments[:3]: text = seg["text"] if any(kw in text for kw in DialectConfig.get_combined_keywords()["opening"]): found_opening = True break # 检查结束语(后3段) for seg in reversed(agent_segments[-3:] if len(agent_segments) >= 3 else agent_segments): text = seg["text"] if any(kw in text for kw in DialectConfig.get_combined_keywords()["closing"]): found_closing = True break # 检查禁用词 for seg in agent_segments: text = seg["text"] for kw in forbidden_keywords: if kw in text: found_forbidden.append(kw) break return { "opening_found": found_opening, "closing_found": found_closing, "forbidden_words": ", ".join(set(found_forbidden)) if found_forbidden else "无" } def _check_issue_resolution(self, customer_segments: List[Dict], agent_segments: List[Dict]) -> bool: """检查问题是否解决(增强版)""" if not customer_segments or not agent_segments: return False # 提取所有文本 customer_texts = [seg["text"] for seg in customer_segments] agent_texts = [seg["text"] for seg in agent_segments] full_conversation = " ".join(customer_texts + agent_texts) # 问题解决关键词 resolution_keywords = ["解决", "处理", "完成", "已", "好了", "可以了", "没问题"] thank_keywords = ["谢谢", "感谢", "多谢"] negative_keywords = ["没解决", "不行", "不对", "还是", "仍然", "再"] # 检查是否有负面词汇 has_negative = any(kw in full_conversation for kw in negative_keywords) if has_negative: return False # 检查客户最后是否表达感谢 last_customer_text = customer_segments[-1]["text"] if any(kw in last_customer_text for kw in thank_keywords): return True # 检查是否有解决关键词 if any(kw in full_conversation for kw in resolution_keywords): return True # 检查客服是否确认解决 for agent_text in reversed(agent_texts[-3:]): # 检查最后3段 if any(kw in agent_text for kw in resolution_keywords): return True return False def _cleanup_temp_files(self, paths: List[str]): """清理临时文件(增强兼容性)""" def safe_remove(path): """安全删除文件(多平台兼容)""" try: if os.path.exists(path): if sys.platform == 'win32': # Windows系统需要特殊处理 os.chmod(path, 0o777) # 确保有权限 for _ in range(5): # 最多尝试5次 try: os.remove(path) break except PermissionError: time.sleep(0.2) else: os.remove(path) except Exception: pass # 使用线程池并行删除 with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor: executor.map(safe_remove, paths) # 额外清理:删除超过1小时的临时文件 now = time.time() for file in os.listdir(self.temp_dir): file_path = os.path.join(self.temp_dir, file) if os.path.isfile(file_path): file_age = now - os.path.getmtime(file_path) if file_age > 3600: # 1小时 safe_remove(file_path) def _format_duration(self, seconds: float) -> str: """将秒转换为时分秒格式""" minutes, seconds = divmod(int(seconds), 60) hours, minutes = divmod(minutes, 60) return f"{hours:02d}:{minutes:02d}:{seconds:02d}" def cleanup_resources(self): """显式清理资源""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() def stop(self): """停止分析""" self.is_running = False # ====================== 模型加载线程 ====================== class ModelLoadThread(QThread): progress_updated = pyqtSignal(int, str) finished = pyqtSignal(bool, str) def run(self): try: # 检查模型路径 config = ConfigManager().get("model_paths") if not os.path.exists(config["asr"]): self.finished.emit(False, "ASR模型路径不存在") return if not os.path.exists(config["sentiment"]): self.finished.emit(False, "情感分析模型路径不存在") return self.progress_updated.emit(20, "加载语音识别模型...") ModelLoader.load_asr_model(config["asr"]) self.progress_updated.emit(60, "加载情感分析模型...") ModelLoader.load_sentiment_model(config["sentiment"]) self.progress_updated.emit(100, "模型加载完成") self.finished.emit(True, "模型加载成功。建议:可通过设置界面修改模型路径") except Exception as e: self.finished.emit(False, f"模型加载失败: {str(e)}。建议:检查模型路径是否正确,或重新下载模型文件") # ====================== GUI主界面 ====================== class MainWindow(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("贵州方言客服质检系统") self.setGeometry(100, 100, 1200, 800) self.setup_ui() self.setup_menu() self.analysis_thread = None self.model_load_thread = None self.temp_dir = "temp_wav" os.makedirs(self.temp_dir, exist_ok=True) self.model_loaded = False def setup_ui(self): """设置用户界面""" # 主布局 main_widget = QWidget() main_layout = QVBoxLayout() main_widget.setLayout(main_layout) self.setCentralWidget(main_widget) # 工具栏 toolbar = QToolBar("主工具栏") toolbar.setIconSize(QSize(24, 24)) self.addToolBar(toolbar) # 添加文件按钮 add_file_action = QAction(QIcon("icons/add.png"), "添加文件", self) add_file_action.triggered.connect(self.add_files) toolbar.addAction(add_file_action) # 开始分析按钮 analyze_action = QAction(QIcon("icons/start.png"), "开始分析", self) analyze_action.triggered.connect(self.start_analysis) toolbar.addAction(analyze_action) # 停止按钮 stop_action = QAction(QIcon("icons/stop.png"), "停止分析", self) stop_action.triggered.connect(self.stop_analysis) toolbar.addAction(stop_action) # 设置按钮 settings_action = QAction(QIcon("icons/settings.png"), "设置", self) settings_action.triggered.connect(self.open_settings) toolbar.addAction(settings_action) # 分割布局 splitter = QSplitter(Qt.Horizontal) main_layout.addWidget(splitter) # 左侧文件列表 left_widget = QWidget() left_layout = QVBoxLayout() left_widget.setLayout(left_layout) file_list_label = QLabel("待分析文件列表") file_list_label.setFont(QFont("Arial", 12, QFont.Bold)) left_layout.addWidget(file_list_label) self.file_list = QListWidget() self.file_list.setSelectionMode(QListWidget.ExtendedSelection) left_layout.addWidget(self.file_list) # 右侧结果区域 right_widget = QWidget() right_layout = QVBoxLayout() right_widget.setLayout(right_layout) # 进度条 progress_label = QLabel("分析进度") progress_label.setFont(QFont("Arial", 12, QFont.Bold)) right_layout.addWidget(progress_label) self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_bar.setTextVisible(True) right_layout.addWidget(self.progress_bar) # 当前文件标签 self.current_file_label = QLabel("当前文件: 无") right_layout.addWidget(self.current_file_label) # 结果标签页 self.tab_widget = QTabWidget() right_layout.addWidget(self.tab_widget, 1) # 文本结果标签页 text_tab = QWidget() text_layout = QVBoxLayout() text_tab.setLayout(text_layout) self.text_result = QTextEdit() self.text_result.setReadOnly(True) text_layout.addWidget(self.text_result) self.tab_widget.addTab(text_tab, "文本结果") # 详细结果标签页 detail_tab = QWidget() detail_layout = QVBoxLayout() detail_tab.setLayout(detail_layout) self.result_table = QTableWidget() self.result_table.setColumnCount(10) self.result_table.setHorizontalHeaderLabels([ "文件名", "时长", "语速", "音量稳定性", "客服情感", "客户情感", "开场白", "结束语", "禁用词", "问题解决" ]) self.result_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) detail_layout.addWidget(self.result_table) self.tab_widget.addTab(detail_tab, "详细结果") # 添加左右部件到分割器 splitter.addWidget(left_widget) splitter.addWidget(right_widget) splitter.setSizes([300, 900]) def setup_menu(self): """设置菜单栏""" menu_bar = self.menuBar() # 文件菜单 file_menu = menu_bar.addMenu("文件") add_file_action = QAction("添加文件", self) add_file_action.triggered.connect(self.add_files) file_menu.addAction(add_file_action) export_action = QAction("导出结果", self) export_action.triggered.connect(self.export_results) file_menu.addAction(export_action) exit_action = QAction("退出", self) exit_action.triggered.connect(self.close) file_menu.addAction(exit_action) # 分析菜单 analysis_menu = menu_bar.addMenu("分析") start_action = QAction("开始分析", self) start_action.triggered.connect(self.start_analysis) analysis_menu.addAction(start_action) stop_action = QAction("停止分析", self) stop_action.triggered.connect(self.stop_analysis) analysis_menu.addAction(stop_action) # 设置菜单 settings_menu = menu_bar.addMenu("设置") config_action = QAction("系统配置", self) config_action.triggered.connect(self.open_settings) settings_menu.addAction(config_action) model_action = QAction("加载模型", self) model_action.triggered.connect(self.load_models) settings_menu.addAction(model_action) def add_files(self): """添加文件到分析列表""" files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.m4a)" ) if files: for file in files: self.file_list.addItem(file) def start_analysis(self): """开始分析""" if self.file_list.count() == 0: QMessageBox.warning(self, "警告", "请先添加要分析的音频文件") return if not self.model_loaded: QMessageBox.warning(self, "警告", "模型未加载,请先加载模型") return # 获取文件路径 audio_paths = [self.file_list.item(i).text() for i in range(self.file_list.count())] # 清空结果 self.text_result.clear() self.result_table.setRowCount(0) # 创建分析线程 self.analysis_thread = AnalysisThread(audio_paths, self.temp_dir) # 连接信号 self.analysis_thread.progress_updated.connect(self.update_progress) self.analysis_thread.result_ready.connect(self.handle_result) self.analysis_thread.finished_all.connect(self.analysis_finished) self.analysis_thread.error_occurred.connect(self.show_error) self.analysis_thread.memory_warning.connect(self.handle_memory_warning) self.analysis_thread.resource_cleanup.connect(self.cleanup_resources) # 启动线程 self.analysis_thread.start() def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() QMessageBox.information(self, "信息", "分析已停止") def load_models(self): """加载模型""" if self.model_load_thread and self.model_load_thread.isRunning(): return self.model_load_thread = ModelLoadThread() self.model_load_thread.progress_updated.connect( lambda value, msg: self.progress_bar.setValue(value) ) self.model_load_thread.finished.connect(self.handle_model_load_result) self.model_load_thread.start() def update_progress(self, progress: int, message: str, current_file: str): """更新进度""" self.progress_bar.setValue(progress) self.current_file_label.setText(f"当前文件: {current_file}") def handle_result(self, result: Dict): """处理分析结果""" # 添加到文本结果 self.text_result.append(f"文件: {result['file_name']}") self.text_result.append(f"状态: {result['status']}") if result["status"] == "success": self.text_result.append(f"时长: {result['duration_str']}") self.text_result.append(f"语速: {result['syllable_rate']} 音节/秒") self.text_result.append(f"音量稳定性: {result['volume_stability']}") self.text_result.append(f"客服情感: 负面({result['agent_negative']:.2%}) " f"中性({result['agent_neutral']:.2%}) " f"正面({result['agent_positive']:.2%})") self.text_result.append(f"客服情绪: {result['agent_emotions']}") self.text_result.append(f"客户情感: 负面({result['customer_negative']:.2%}) " f"中性({result['customer_neutral']:.2%}) " f"正面({result['customer_positive']:.2%})") self.text_result.append(f"客户情绪: {result['customer_emotions']}") self.text_result.append(f"开场白: {'有' if result['opening_found'] else '无'}") self.text_result.append(f"结束语: {'有' if result['closing_found'] else '无'}") self.text_result.append(f"禁用词: {result['forbidden_words']}") self.text_result.append(f"问题解决: {'是' if result['issue_resolved'] else '否'}") self.text_result.append("\n=== 对话文本 ===\n") self.text_result.append(result["asr_text"]) self.text_result.append("\n" + "=" * 50 + "\n") # 添加到结果表格 row = self.result_table.rowCount() self.result_table.insertRow(row) self.result_table.setItem(row, 0, QTableWidgetItem(result["file_name"])) self.result_table.setItem(row, 1, QTableWidgetItem(result["duration_str"])) self.result_table.setItem(row, 2, QTableWidgetItem(str(result["syllable_rate"]))) self.result_table.setItem(row, 3, QTableWidgetItem(str(result["volume_stability"]))) self.result_table.setItem(row, 4, QTableWidgetItem( f"负:{result['agent_negative']:.2f} 中:{result['agent_neutral']:.2f} 正:{result['agent_positive']:.2f}" )) self.result_table.setItem(row, 5, QTableWidgetItem( f"负:{result['customer_negative']:.2f} 中:{result['customer_neutral']:.2f} 正:{result['customer_positive']:.2f}" )) self.result_table.setItem(row, 6, QTableWidgetItem("是" if result["opening_found"] else "否")) self.result_table.setItem(row, 7, QTableWidgetItem("是" if result["closing_found"] else "否")) self.result_table.setItem(row, 8, QTableWidgetItem(result["forbidden_words"])) self.result_table.setItem(row, 9, QTableWidgetItem("是" if result["issue_resolved"] else "否")) # 根据结果着色 if not result["opening_found"]: self.result_table.item(row, 6).setBackground(QColor(255, 200, 200)) if not result["closing_found"]: self.result_table.item(row, 7).setBackground(QColor(255, 200, 200)) if result["forbidden_words"] != "无": self.result_table.item(row, 8).setBackground(QColor(255, 200, 200)) if not result["issue_resolved"]: self.result_table.item(row, 9).setBackground(QColor(255, 200, 200)) def analysis_finished(self): """分析完成""" QMessageBox.information(self, "完成", "所有音频分析完成") self.progress_bar.setValue(100) def show_error(self, title: str, message: str): """显示错误信息""" QMessageBox.critical(self, title, message) def handle_memory_warning(self): """处理内存警告""" QMessageBox.warning(self, "内存警告", "内存使用过高,分析已停止。请关闭其他应用程序后重试") def cleanup_resources(self): """清理资源""" gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() def handle_model_load_result(self, success: bool, message: str): """处理模型加载结果""" if success: self.model_loaded = True QMessageBox.information(self, "成功", message) else: QMessageBox.critical(self, "错误", message) def open_settings(self): """打开设置对话框""" settings_dialog = QDialog(self) settings_dialog.setWindowTitle("系统设置") settings_dialog.setFixedSize(500, 400) layout = QVBoxLayout() # ASR模型路径 asr_layout = QHBoxLayout() asr_label = QLabel("ASR模型路径:") asr_line = QLineEdit(ConfigManager().get("model_paths")["asr"]) asr_browse = QPushButton("浏览...") def browse_asr(): path = QFileDialog.getExistingDirectory(self, "选择ASR模型目录") if path: asr_line.setText(path) asr_browse.clicked.connect(browse_asr) asr_layout.addWidget(asr_label) asr_layout.addWidget(asr_line) asr_layout.addWidget(asr_browse) layout.addLayout(asr_layout) # 情感分析模型路径 sentiment_layout = QHBoxLayout() sentiment_label = QLabel("情感模型路径:") sentiment_line = QLineEdit(ConfigManager().get("model_paths")["sentiment"]) sentiment_browse = QPushButton("浏览...") def browse_sentiment(): path = QFileDialog.getExistingDirectory(self, "选择情感模型目录") if path: sentiment_line.setText(path) sentiment_browse.clicked.connect(browse_sentiment) sentiment_layout.addWidget(sentiment_label) sentiment_layout.addWidget(sentiment_line) sentiment_layout.addWidget(sentiment_browse) layout.addLayout(sentiment_layout) # 并发设置 concurrent_layout = QHBoxLayout() concurrent_label = QLabel("最大并发任务:") concurrent_spin = QSpinBox() concurrent_spin.setRange(1, 8) concurrent_spin.setValue(ConfigManager().get("max_concurrent", 1)) concurrent_layout.addWidget(concurrent_label) concurrent_layout.addWidget(concurrent_spin) layout.addLayout(concurrent_layout) # 方言设置 dialect_layout = QHBoxLayout() dialect_label = QLabel("方言设置:") dialect_combo = QComboBox() dialect_combo.addItems(["标准普通话", "贵州方言"]) dialect_combo.setCurrentIndex(1 if ConfigManager().get("dialect_config") == "guizhou" else 0) dialect_layout.addWidget(dialect_label) dialect_layout.addWidget(dialect_combo) layout.addLayout(dialect_layout) # 音频时长限制 duration_layout = QHBoxLayout() duration_label = QLabel("最大音频时长(秒):") duration_spin = QSpinBox() duration_spin.setRange(60, 86400) # 1分钟到24小时 duration_spin.setValue(ConfigManager().get("max_audio_duration", 3600)) duration_layout.addWidget(duration_label) duration_layout.addWidget(duration_spin) layout.addLayout(duration_layout) # 按钮 button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) button_box.accepted.connect(settings_dialog.accept) button_box.rejected.connect(settings_dialog.reject) layout.addWidget(button_box) settings_dialog.setLayout(layout) if settings_dialog.exec_() == QDialog.Accepted: # 保存设置 ConfigManager().set("model_paths", { "asr": asr_line.text(), "sentiment": sentiment_line.text() }) ConfigManager().set("max_concurrent", concurrent_spin.value()) ConfigManager().set("dialect_config", "guizhou" if dialect_combo.currentIndex() == 1 else "standard") ConfigManager().set("max_audio_duration", duration_spin.value()) # 重新加载模型 ModelLoader.reload_models() def export_results(self): """导出结果""" if self.result_table.rowCount() == 0: QMessageBox.warning(self, "警告", "没有可导出的结果") return path, _ = QFileDialog.getSaveFileName( self, "保存结果", "", "CSV文件 (*.csv)" ) if path: try: with open(path, "w", encoding="utf-8") as f: # 写入表头 headers = [] for col in range(self.result_table.columnCount()): headers.append(self.result_table.horizontalHeaderItem(col).text()) f.write(",".join(headers) + "\n") # 写入数据 for row in range(self.result_table.rowCount()): row_data = [] for col in range(self.result_table.columnCount()): item = self.result_table.item(row, col) row_data.append(item.text() if item else "") f.write(",".join(row_data) + "\n") QMessageBox.information(self, "成功", f"结果已导出到: {path}") except Exception as e: QMessageBox.critical(self, "错误", f"导出失败: {str(e)}") def closeEvent(self, event): """关闭事件处理""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() # 清理临时目录(增强兼容性) try: for file in os.listdir(self.temp_dir): file_path = os.path.join(self.temp_dir, file) if os.path.isfile(file_path): # Windows系统可能需要多次尝试 for _ in range(3): try: os.remove(file_path) break except PermissionError: time.sleep(0.1) os.rmdir(self.temp_dir) except: pass event.accept() # ====================== 程序入口 ====================== if __name__ == "__main__": torch.set_num_threads(4) # 限制CPU线程数 app = QApplication(sys.argv) # 设置应用样式 app.setStyle('Fusion') window = MainWindow() window.show() sys.exit(app.exec_())
08-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值