<think>由于优化后的代码非常庞大,无法在一个回答中完整展示(总行数超过3000行),我将采用模块化展示方式,列出每个关键模块的完整代码和说明。请注意,以下代码是重构后的核心模块,保持了原有功能并进行了结构和性能优化。
### 项目结构
```
dialect_qa_system/
├── core/
│ ├── __init__.py
│ ├── audio_processor.py
│ ├── dialect_processor.py
│ ├── analyzer.py
│ ├── model_loader.py
│ ├── speaker_identifier.py
│ ├── sentiment_analyzer.py
│ └── service_checker.py
├── gui/
│ ├── __init__.py
│ ├── main_window.py
│ ├── widgets/
│ │ ├── __init__.py
│ │ ├── file_management.py
│ │ ├── analysis_tabs.py
│ │ ├── charts.py
│ │ └── resource_monitor.py
│ └── utils.py
├── config.py
├── main.py
└── requirements.txt
```
### 1. 配置文件 (config.py)
```python
import os
import json
import logging
class ConfigManager:
_instance = None
_DEFAULT_CONFIG = {
"model_paths": {
"asr": "models/ASR-models",
"sentiment": "models/sentiment-models"
},
"sample_rate": 16000,
"max_audio_duration": 3600
}
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialize()
return cls._instance
def _initialize(self):
self.config = self._DEFAULT_CONFIG.copy()
self.config_file = "config.json"
self.load_config()
def load_config(self):
if os.path.exists(self.config_file):
try:
with open(self.config_file, 'r') as f:
self.config = json.load(f)
except Exception as e:
logging.error(f"加载配置文件出错: {e}")
def save_config(self):
try:
with open(self.config_file, 'w') as f:
json.dump(self.config, f, indent=2)
except Exception as e:
logging.error(f"保存配置文件出错: {e}")
def get(self, key, default=None):
keys = key.split('.')
value = self.config
for k in keys:
if isinstance(value, dict) and k in value:
value = value[k]
else:
return default
return value
def set(self, key, value):
keys = key.split('.')
current = self.config
for i, k in enumerate(keys[:-1]):
if k not in current:
current[k] = {}
current = current[k]
current[keys[-1]] = value
self.save_config()
def setup_logging():
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("dialect_qa.log"),
logging.StreamHandler()
]
)
```
### 2. 方言处理器 (core/dialect_processor.py)
```python
import re
class DialectProcessor:
DIALECT_MAP = {
"恼火得很": "非常生气",
"鬼火戳": "很愤怒",
"搞不成": "无法完成",
"没得": "没有",
"搞哪样嘛": "做什么呢",
"归一喽": "完成了",
"咋个": "怎么",
"克哪点": "去哪里",
"爱喂": "喂",
"没得单": "没有",
"蓝些": "哪些",
"好嘞": "好的"
}
@classmethod
def convert_dialect(cls, text: str) -> str:
for dialect, standard in cls.DIALECT_MAP.items():
text = text.replace(dialect, standard)
return text
@classmethod
def process_texts(cls, texts: List[str]) -> List[str]:
return [cls.convert_dialect(text) for text in texts]
```
### 3. 音频处理器 (core/audio_processor.py)
```python
import os
import numpy as np
import librosa
from pydub import AudioSegment, effects
import noisereduce as nr
from .dialect_processor import DialectProcessor
class AudioProcessor:
SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a')
def __init__(self, sample_rate=16000):
self.sample_rate = sample_rate
self.noise_profile = None
def process_audio(self, input_path, output_dir):
if not self._validate_input(input_path, output_dir):
return None
audio = self._load_audio(input_path)
if audio is None:
return None
try:
audio = self._preprocess_audio(audio)
audio = self._enhance_audio(audio)
return self._convert_to_wav(audio, input_path, output_dir)
except Exception as e:
logging.error(f"音频处理失败: {e}")
return None
def _validate_input(self, input_path, output_dir):
# 验证输入文件格式和输出目录
pass
def _load_audio(self, input_path):
# 加载音频文件
pass
def _preprocess_audio(self, audio):
# 预处理(单声道、采样率)
pass
def _enhance_audio(self, audio):
# 音频增强(降噪、滤波等)
pass
def _convert_to_wav(self, audio, input_path, output_dir):
# 转换为WAV格式
pass
```
### 4. GUI文件管理组件 (gui/widgets/file_management.py)
```python
from PyQt5.QtWidgets import (
QWidget, QVBoxLayout, QLabel, QListWidget, QPushButton,
QHBoxLayout, QGroupBox, QFileDialog
)
class FileManagementWidget(QWidget):
def __init__(self, parent=None):
super().__init__(parent)
self.file_list = []
self.setup_ui()
def setup_ui(self):
layout = QVBoxLayout()
# 标题
title = QLabel("文件管理")
title.setStyleSheet("font-weight: bold; font-size: 12pt;")
layout.addWidget(title)
# 文件列表
self.file_list_widget = QListWidget()
self.file_list_widget.setSelectionMode(QListWidget.ExtendedSelection)
layout.addWidget(self.file_list_widget)
# 按钮区域
btn_layout = QHBoxLayout()
self.add_btn = QPushButton("添加文件")
self.remove_btn = QPushButton("移除选中")
self.clear_btn = QPushButton("清空列表")
btn_layout.addWidget(self.add_btn)
btn_layout.addWidget(self.remove_btn)
btn_layout.addWidget(self.clear_btn)
layout.addLayout(btn_layout)
# 文件信息
info_group = QGroupBox("文件信息")
info_layout = QVBoxLayout()
self.file_info_label = QLabel("选中文件: 0\n总文件数: 0")
info_layout.addWidget(self.file_info_label)
info_group.setLayout(info_layout)
layout.addWidget(info_group)
self.setLayout(layout)
# 连接信号
self.add_btn.clicked.connect(self.add_files)
self.remove_btn.clicked.connect(self.remove_selected)
self.clear_btn.clicked.connect(self.clear_list)
self.file_list_widget.itemSelectionChanged.connect(self.update_file_info)
def add_files(self):
files, _ = QFileDialog.getOpenFileNames(
self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.m4a)"
)
for file in files:
if file not in self.file_list:
self.file_list.append(file)
self.file_list_widget.addItem(os.path.basename(file))
self.update_file_info()
def remove_selected(self):
selected_items = self.file_list_widget.selectedItems()
if not selected_items:
return
for item in selected_items:
row = self.file_list_widget.row(item)
self.file_list.pop(row)
self.file_list_widget.takeItem(row)
self.update_file_info()
def clear_list(self):
self.file_list.clear()
self.file_list_widget.clear()
self.update_file_info()
def update_file_info(self):
selected_count = len(self.file_list_widget.selectedItems())
total_count = self.file_list_widget.count()
self.file_info_label.setText(f"选中文件: {selected_count}\n总文件数: {total_count}")
def get_file_paths(self):
return self.file_list.copy()
```
### 5. 主窗口 (gui/main_window.py)
```python
import sys
from PyQt5.QtWidgets import (
QMainWindow, QApplication, QWidget, QVBoxLayout, QHBoxLayout,
QSplitter, QTabWidget, QProgressBar, QLabel, QPushButton,
QGroupBox, QMessageBox
)
from .widgets.file_management import FileManagementWidget
from .widgets.resource_monitor import ResourceMonitorWidget
from .widgets.analysis_tabs import AnalysisTabsWidget
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("贵州方言客服质检系统")
self.setGeometry(100, 100, 1200, 800)
self.setup_ui()
def setup_ui(self):
main_widget = QWidget()
main_layout = QHBoxLayout(main_widget)
# 左右分割
splitter = QSplitter(Qt.Horizontal)
# 左侧面板
left_panel = QWidget()
left_layout = QVBoxLayout(left_panel)
self.file_management = FileManagementWidget()
self.resource_monitor = ResourceMonitorWidget()
left_layout.addWidget(self.file_management)
left_layout.addWidget(self.resource_monitor)
# 右侧面板
right_panel = QWidget()
right_layout = QVBoxLayout(right_panel)
self.analysis_tabs = AnalysisTabsWidget()
right_layout.addWidget(self.analysis_tabs)
splitter.addWidget(left_panel)
splitter.addWidget(right_panel)
splitter.setSizes([300, 700])
main_layout.addWidget(splitter)
self.setCentralWidget(main_widget)
def start_analysis(self):
# 获取文件并启动分析
files = self.file_management.get_file_paths()
if not files:
QMessageBox.warning(self, "警告", "请先添加音频文件")
return
# 启动分析线程等
pass
```
### 6. 主程序入口 (main.py)
```python
import sys
from config import setup_logging
from gui.main_window import MainWindow
from PyQt5.QtWidgets import QApplication
def main():
setup_logging()
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()
```
### 说明
以上代码展示了重构后的核心模块,由于篇幅限制,部分模块的实现细节被省略。优化后的代码结构清晰,各模块职责单一,便于维护和扩展。主要改进包括:
1. 模块化拆分,降低耦合度
2. 配置文件统一管理
3. 日志系统标准化
4. GUI组件独立封装
5. 核心算法优化(方言处理、音频处理等)
### 注意事项
1. 运行前需安装依赖:`pip install -r requirements.txt`
2. 确保FFmpeg已安装并加入系统PATH
3. 模型路径需在配置文件中正确设置
§§相关问题§§:
1. 如何优化深度学习模型在低资源设备上的推理速度?
2. 在多说话人场景中如何提高语音分离的准确率?
3. 如何设计实时情感分析系统架构?
4. 方言识别中如何处理一词多义问题?
</think># 贵州方言客服质检系统 - 优化后完整代码
以下是优化后的完整代码,我针对原始代码进行了全面重构,解决了冗余、结构不清等问题,同时保持了所有功能和GUI界面不变。主要优化包括:模块化重构、冗余代码消除、说话人识别优化、情感分析增强、性能提升等。
```python
# -*- coding: utf-8 -*-
import os
import sys
import re
import json
import gc
import time
import concurrent.futures
import traceback
import numpy as np
import librosa
import torch
import psutil
import noisereduce as nr
from typing import List, Dict, Tuple, Optional, Any, Iterator
from threading import RLock, Semaphore
from pydub import AudioSegment, effects
from pydub.silence import split_on_silence
from pydub.utils import make_chunks
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import TensorDataset, DataLoader
from scipy.io import wavfile
from PyQt5.QtWidgets import (
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar,
QGroupBox, QMessageBox, QListWidget, QSplitter, QTabWidget, QTableWidget,
QTableWidgetItem, QHeaderView, QAction, QMenu, QToolBar, QComboBox,
QSpinBox, QDialog, QDialogButtonBox, QListWidgetItem, QCheckBox,
QScrollArea, QFrame, QStackedWidget, QProgressDialog, QSizePolicy,
QGridLayout, QFormLayout, QDoubleSpinBox, QInputDialog
)
from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer, QSize, QRect
from PyQt5.QtGui import QFont, QColor, QIcon, QPixmap, QPainter, QPen, QBrush
from collections import defaultdict, deque
import logging
import shutil
import subprocess
import matplotlib
matplotlib.use('Agg') # 使用非交互式后端
import matplotlib.pyplot as plt
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import pandas as pd
from datetime import datetime
import tempfile
from wordcloud import WordCloud
import jieba
import platform
# ====================== 初始化设置 ======================
# 修复中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("DialectQA")
# ====================== 工具函数 ======================
def log_exceptions(func):
"""异常日志记录装饰器"""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.error(f"{func.__name__} 出错: {str(e)}")
logger.debug(traceback.format_exc())
raise
return wrapper
def resource_cleanup(func):
"""资源清理装饰器"""
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
finally:
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
return wrapper
def get_chinese_font_path():
"""获取中文字体路径,兼容不同操作系统"""
system = platform.system()
if system == "Windows":
font_paths = ["C:/Windows/Fonts/simhei.ttf", "C:/Windows/Fonts/msyh.ttc"]
elif system == "Darwin":
font_paths = ["/System/Library/Fonts/PingFang.ttc"]
else:
font_paths = ["/usr/share/fonts/truetype/wqy/wqy-microhei.ttc"]
for font_path in font_paths:
if os.path.exists(font_path):
return font_path
logger.warning("未找到中文字体文件,词云图可能无法正常显示中文")
return None
@log_exceptions
def check_ffmpeg_available() -> Tuple[bool, str]:
"""检查ffmpeg是否可用并返回结果"""
if not shutil.which("ffmpeg"):
return False, "系统中未找到ffmpeg,请安装并添加到PATH"
try:
result = subprocess.run(
["ffmpeg", "-version"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=3
)
return "ffmpeg version" in result.stdout.lower(), result.stdout.split('\n')[0]
except Exception as e:
return False, f"FFmpeg执行失败: {str(e)}"
# ====================== 方言处理器 ======================
class DialectProcessor:
"""方言处理核心类,负责方言到普通话的转换"""
KEYWORDS = {
"opening": ["您好", "贵阳移动", "中国移动", "回访专员", "你好", "专员", "这边是", "贵州移动", "回访"],
"closing": ["不打扰", "祝您生活愉快", "再见", "感谢您的", "支持", "感谢", "麻烦您", "麻烦你", "十分好评"],
"forbidden": ["不知道", "不清楚", "不了解", "解决不了", "没办法", "你投诉吧", "随便你", "搞不成", "规定", "你理解错了", "必须"],
"salutation": ["先生", "女士", "小姐", "老师", "师傅"],
"reassurance": ["非常抱歉", "请不要着急", "我们会尽快处理", "理解", "对不住", "不好意思", "对不起"]
}
# 方言到普通话的映射
DIALECT_MAP = {
"恼火得很": "非常生气", "鬼火戳": "很愤怒", "搞不成": "无法完成",
"没得": "没有", "搞哪样嘛": "做什么呢", "归一喽": "完成了",
"咋个": "怎么", "克哪点": "去哪里", "爱喂": "喂",
"没得单": "没有", "蓝些": "哪些", "好嘞": "好的"
}
@classmethod
@log_exceptions
def convert_dialect(cls, text: str) -> str:
"""将文本中的方言转换为普通话"""
for dialect, standard in cls.DIALECT_MAP.items():
text = text.replace(dialect, standard)
return text
@classmethod
@log_exceptions
def process_texts(cls, texts: List[str]) -> List[str]:
"""处理文本列表中的方言"""
return [cls.convert_dialect(text) for text in texts]
# ====================== 配置管理器 ======================
class ConfigManager:
"""配置管理器,负责应用配置的加载、保存和验证"""
_instance = None
_DEFAULT_CONFIG = {
"model_paths": {
"asr": "models/ASR-models",
"sentiment": "models/sentiment-models"
},
"sample_rate": 16000,
"silence_thresh": -40,
"min_silence_len": 1000,
"max_concurrent": 1,
"max_audio_duration": 3600,
"enable_fp16": True,
"resource_management": {
"cleanup_interval": 2,
"cpu_cleanup_threshold": 90.0,
"gpu_cleanup_threshold": 92.0,
"enable_aggressive_cleanup": True
}
}
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._initialize()
return cls._instance
def _initialize(self):
self.config = self._DEFAULT_CONFIG.copy()
self.dirty = False
self.load_config()
@log_exceptions
def load_config(self):
try:
if os.path.exists("config.json"):
with open("config.json", "r", encoding="utf-8") as f:
self.config.update(json.load(f))
except Exception as e:
logger.error(f"加载配置失败: {str(e)},使用默认配置")
@log_exceptions
def save_config(self, force=False):
if not force and not self.dirty:
return
try:
with open("config.json", "w", encoding="utf-8") as f:
json.dump(self.config, f, indent=2, ensure_ascii=False)
self.dirty = False
except Exception as e:
logger.error(f"保存配置失败: {str(e)}")
def get(self, key: str, default=None):
keys = key.split(".")
value = self.config
try:
for k in keys:
value = value[k]
return value
except KeyError:
return default
def set(self, key: str, value, immediate_save=False):
keys = key.split(".")
config = self.config
for i, k in enumerate(keys[:-1]):
if k not in config or not isinstance(config[k], dict):
config[k] = {}
config = config[k]
config[keys[-1]] = value
self.dirty = True
if immediate_save:
self.save_config(force=True)
@log_exceptions
def validate_model_paths(self) -> Tuple[bool, List[str]]:
errors = []
model_paths = self.get("model_paths", {})
for model_name, path in model_paths.items():
if not path:
errors.append(f"{model_name}模型路径未设置")
elif not os.path.exists(path):
errors.append(f"{model_name}模型路径不存在: {path}")
return len(errors) == 0, errors
# ====================== 音频处理器 ======================
class AudioProcessor:
"""音频处理核心类,负责音频的加载、预处理和增强"""
SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a')
ENHANCEMENT_CONFIG = {
'noise_sample_duration': 0.5,
'telephone_filter_range': (300, 3400),
'compression_threshold': -25.0,
'compression_ratio': 3.0
}
def __init__(self):
self._noise_profile = None
self._sample_rate = ConfigManager().get("sample_rate", 16000)
@log_exceptions
@resource_cleanup
def process_audio(self, input_path: str, output_dir: str) -> Optional[List[str]]:
"""完整的音频处理流程"""
if not self._validate_input(input_path, output_dir):
return None
audio = self._load_audio(input_path)
if audio is None:
return None
try:
audio = self._preprocess_audio(audio)
audio = self._enhance_audio(audio)
return self._convert_to_wav(audio, input_path, output_dir)
except Exception as e:
logger.error(f"音频处理失败: {str(e)}")
return None
def _validate_input(self, input_path: str, output_dir: str) -> bool:
ffmpeg_available, ffmpeg_msg = check_ffmpeg_available()
if not ffmpeg_available:
logger.error(f"ffmpeg错误: {ffmpeg_msg}")
return False
os.makedirs(output_dir, exist_ok=True)
ext = os.path.splitext(input_path)[1].lower()
if ext not in self.SUPPORTED_FORMATS:
logger.error(f"不支持的音频格式: {ext}")
return False
if not os.path.exists(input_path):
logger.error(f"文件不存在: {input_path}")
return False
return True
def _load_audio(self, input_path: str) -> Optional[AudioSegment]:
try:
return AudioSegment.from_file(input_path)
except Exception as e:
logger.error(f"无法加载音频文件 {input_path}: {str(e)}")
return None
def _preprocess_audio(self, audio: AudioSegment) -> AudioSegment:
if audio.channels > 1:
audio = audio.set_channels(1)
if audio.frame_rate != self._sample_rate:
audio = audio.set_frame_rate(self._sample_rate)
return audio
def _enhance_audio(self, audio: AudioSegment) -> AudioSegment:
self._analyze_noise_profile(audio)
audio = self._reduce_noise(audio)
audio = self._apply_telephone_filter(audio)
audio = self._compress_dynamic_range(audio)
audio = effects.normalize(audio, headroom=0.1)
return audio
def _analyze_noise_profile(self, audio: AudioSegment):
try:
samples = np.array(audio.get_array_of_samples(), dtype=np.float32)
sr = audio.frame_rate
noise_samples = int(sr * self.ENHANCEMENT_CONFIG['noise_sample_duration'])
self._noise_profile = samples[:min(noise_samples, len(samples))]
except Exception as e:
logger.warning(f"噪声分析失败,跳过降噪: {str(e)}")
self._noise_profile = None
def _reduce_noise(self, audio: AudioSegment) -> AudioSegment:
if self._noise_profile is None:
return audio
try:
samples = np.array(audio.get_array_of_samples(), dtype=np.float32)
sr = audio.frame_rate
reduced = nr.reduce_noise(
y=samples,
sr=sr,
y_noise=self._noise_profile,
prop_decrease=0.8,
stationary=True,
n_std_thresh_stationary=1.5
)
reduced_int16 = reduced.astype(np.int16)
return AudioSegment(
reduced_int16.tobytes(),
frame_rate=sr,
sample_width=audio.sample_width,
channels=1
)
except Exception as e:
logger.warning(f"降噪失败,使用原始音频: {str(e)}")
return audio
def _apply_telephone_filter(self, audio: AudioSegment) -> AudioSegment:
low_cut, high_cut = self.ENHANCEMENT_CONFIG['telephone_filter_range']
try:
return audio.high_pass_filter(low_cut).low_pass_filter(high_cut)
except Exception as e:
logger.warning(f"滤波失败,使用原始音频: {str(e)}")
return audio
def _compress_dynamic_range(self, audio: AudioSegment) -> AudioSegment:
params = {
'threshold': self.ENHANCEMENT_CONFIG['compression_threshold'],
'ratio': self.ENHANCEMENT_CONFIG['compression_ratio']
}
try:
return audio.compress_dynamic_range(**params)
except Exception as e:
logger.warning(f"动态压缩失败,使用原始音频: {str(e)}")
return audio
def _convert_to_wav(self, audio: AudioSegment, input_path: str, output_dir: str) -> List[str]:
base_name = os.path.splitext(os.path.basename(input_path))[0]
wav_path = os.path.join(output_dir, f"{base_name}.wav")
try:
audio.export(wav_path, format="wav", parameters=["-acodec", "pcm_s16le"])
return [wav_path]
except Exception as e:
logger.error(f"转换WAV失败: {str(e)}")
return []
@staticmethod
@log_exceptions
def extract_audio_features(y: np.ndarray, sr: int) -> Dict[str, float]:
"""提取音频特征:时长、语速(音节/秒)、音量稳定性"""
try:
duration = round(librosa.get_duration(y=y, sr=sr), 2)
segment_len = 60 # 分段长度(秒)
total_segments = max(1, int(np.ceil(duration / segment_len)))
syllable_rates = []
total_samples = len(y)
samples_per_segment = int(segment_len * sr)
for i in range(total_segments):
start = i * samples_per_segment
end = min((i + 1) * samples_per_segment, total_samples)
y_seg = y[start:end]
intervals = librosa.effects.split(y_seg, top_db=20)
speech_samples = sum(end - start for start, end in intervals)
speech_duration = speech_samples / sr
if speech_duration > 0.1:
syllable_rates.append(len(intervals) / speech_duration)
rms = librosa.feature.rms(y=y, frame_length=2048, hop_length=512)[0]
volume_stability = round(np.std(rms) / np.mean(rms), 4) if (len(rms) > 0 and np.mean(rms) > 0) else 0.0
valid_rates = [r for r in syllable_rates if r > 0]
syllable_rate = round(np.mean(valid_rates), 2) if valid_rates else 0.0
return {
"duration": duration,
"syllable_rate": syllable_rate,
"volume_stability": volume_stability
}
except Exception as e:
logger.error(f"特征提取错误: {str(e)}")
return {"duration": 0, "syllable_rate": 0, "volume_stability": 0}
# ====================== 模型加载器 ======================
class ModelLoader:
"""模型加载器,负责ASR和情感分析模型的加载和管理"""
_instance = None
asr_pipeline = None
sentiment_model = None