检查代码中的错误,是否合理,是否冗余:import os
import sys
import re
import json
import gc
import time
import concurrent.futures
import traceback
import numpy as np
import librosa
import torch
import psutil
from typing import List, Dict, Tuple, Optional
from threading import RLock, Semaphore
from pydub import AudioSegment
from pydub.silence import split_on_silence
from pydub.utils import get_encoder_name_extension, make_chunks
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import TensorDataset, DataLoader
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QLineEdit, QTextEdit, QFileDialog, QProgressBar,
QGroupBox, QMessageBox, QListWidget, QSplitter, QTabWidget, QTableWidget,
QTableWidgetItem, QHeaderView, QAction, QMenu, QToolBar, QComboBox,
QSpinBox, QDialog, QDialogButtonBox)
from PyQt5.QtCore import QThread, pyqtSignal, Qt
from PyQt5.QtGui import QFont, QColor, QIcon
# ====================== 工具函数 ======================
def check_ffmpeg_available() -> Tuple[bool, str]:
"""检查ffmpeg是否可用并返回检查结果和说明"""
try:
# 尝试加载一个空的音频片段来触发ffmpeg检查
test_audio = AudioSegment.empty()
# 尝试导出到一个常见格式
test_format = 'wav'
encoder = get_encoder_name_extension(test_format)
if not encoder:
return False, f"未找到{test_format}格式的编码器,请确保ffmpeg已正确安装"
return True, "ffmpeg已正确安装并可用"
except FileNotFoundError:
return False, "未找到ffmpeg程序,请安装ffmpeg并确保其在系统PATH中"
except Exception as e:
return False, f"ffmpeg检查失败: {str(e)}"
# ====================== 资源监控器 ======================
class ResourceMonitor:
def __init__(self):
self.gpu_available = torch.cuda.is_available()
def memory_percent(self) -> Dict[str, float]:
try:
result = {"cpu": psutil.virtual_memory().percent}
if self.gpu_available:
allocated = torch.cuda.memory_allocated() / (1024 ** 3)
total = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
result["gpu"] = (allocated / total) * 100 if total > 0 else 0
return result
except Exception as e:
print(f"内存监控失败: {str(e)}")
return {"cpu": 0, "gpu": 0}
# ====================== 方言处理器(简化版) ======================
class DialectProcessor:
# 合并贵州方言和普通话关键词
KEYWORDS = {
"opening": ["您好", "很高兴为您服务", "请问有什么可以帮您", "麻烦您喽", "请问搞哪样", "有咋个可以帮您",
"多谢喽"],
"closing": ["感谢来电", "祝您生活愉快", "再见", "搞归一喽", "麻烦您喽", "再见喽", "慢走喽"],
"forbidden": ["不知道", "没办法", "你投诉吧", "随便你", "搞不成", "没得法", "随便你喽", "你投诉吧喽"],
"salutation": ["先生", "女士", "小姐", "老师", "师傅", "哥", "姐", "兄弟", "妹儿"],
"reassurance": ["非常抱歉", "请不要着急", "我们会尽快处理", "理解您的心情", "实在对不住", "莫急哈",
"马上帮您整", "理解您得很"]
}
# 贵州方言到普通话的固定映射
DIALECT_MAPPING = {
"恼火得很": "非常生气", "鬼火戳": "很愤怒", "搞不成": "无法完成", "没得": "没有",
"搞哪样嘛": "做什么呢", "归一喽": "完成了", "咋个": "怎么", "克哪点": "去哪里",
"麻烦您喽": "麻烦您了", "多谢喽": "多谢了", "憨包": "傻瓜", "归一": "结束",
"板扎": "很好", "鬼火冒": "非常生气", "背时": "倒霉", "吃豁皮": "占便宜"
}
# Trie树根节点
_trie_root = None
class TrieNode:
def __init__(self):
self.children = {}
self.is_end = False
self.value = ""
@classmethod
def build_dialect_trie(cls):
"""构建方言转换的Trie树"""
if cls._trie_root is not None:
return cls._trie_root
root = cls.TrieNode()
# 按长度降序排序,确保最长匹配优先
for dialect, standard in sorted(cls.DIALECT_MAPPING.items(), key=lambda x: len(x[0]), reverse=True):
node = root
for char in dialect:
if char not in node.children:
node.children[char] = cls.TrieNode()
node = node.children[char]
node.is_end = True
node.value = standard
cls._trie_root = root
return root
@classmethod
def preprocess_text(cls, texts: List[str]) -> List[str]:
"""使用Trie树进行方言转换"""
if cls._trie_root is None:
cls.build_dialect_trie()
processed_texts = []
for text in texts:
processed = []
i = 0
n = len(text)
while i < n:
node = cls._trie_root
j = i
found = False
# 在Trie树中查找最长匹配
while j < n and text[j] in node.children:
node = node.children[text[j]]
j += 1
if node.is_end: # 找到完整匹配
processed.append(node.value)
i = j
found = True
break
if not found: # 无匹配
processed.append(text[i])
i += 1
processed_texts.append(''.join(processed))
return processed_texts
# ====================== 系统配置管理器 ======================
class ConfigManager:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._init_config()
return cls._instance
def _init_config(self):
self.config = {
"model_paths": {
"asr": "./models/iic-speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn",
"sentiment": "./models/IDEA-CCNL-Erlangshen-Roberta-110M-Sentiment"
},
"sample_rate": 16000,
"silence_thresh": -40,
"min_silence_len": 1000,
"max_concurrent": 1,
"max_audio_duration": 3600 # 移除了方言配置
}
self.load_config()
def load_config(self):
try:
if os.path.exists("config.json"):
with open("config.json", "r", encoding="utf-8") as f:
self.config.update(json.load(f))
except json.JSONDecodeError:
print("配置文件格式错误,使用默认配置")
except Exception as e:
print(f"加载配置失败: {str(e)},使用默认配置")
def save_config(self):
try:
with open("config.json", "w", encoding="utf-8") as f:
json.dump(self.config, f, indent=2, ensure_ascii=False)
except Exception as e:
print(f"保存配置失败: {str(e)}")
def get(self, key: str, default=None):
return self.config.get(key, default)
def set(self, key: str, value):
self.config[key] = value
self.save_config()
def check_model_paths(self) -> Tuple[bool, List[str]]:
"""检查模型路径是否有效"""
errors = []
model_paths = self.get("model_paths", {})
for model_name, path in model_paths.items():
if not path:
errors.append(f"{model_name}模型路径未设置")
elif not os.path.exists(path):
errors.append(f"{model_name}模型路径不存在: {path}")
elif not os.path.isdir(path):
errors.append(f"{model_name}模型路径不是有效的目录: {path}")
return len(errors) == 0, errors
# ====================== 音频处理工具 ======================
class AudioProcessor:
SUPPORTED_FORMATS = ('.mp3', '.wav', '.amr', '.m4a')
@staticmethod
def check_dependencies():
"""检查音频处理所需的依赖"""
return check_ffmpeg_available()
@staticmethod
def convert_to_wav(input_path: str, temp_dir: str) -> Optional[List[str]]:
# 先检查ffmpeg是否可用
ffmpeg_available, ffmpeg_msg = check_ffmpeg_available()
if not ffmpeg_available:
print(f"ffmpeg错误: {ffmpeg_msg}")
return None
try:
os.makedirs(temp_dir, exist_ok=True)
ext = os.path.splitext(input_path)[1].lower()
if ext not in AudioProcessor.SUPPORTED_FORMATS:
raise ValueError(f"不支持的音频格式: {ext},支持的格式为: {', '.join(AudioProcessor.SUPPORTED_FORMATS)}")
if ext == '.wav':
return [input_path]
# 尝试加载音频文件
try:
audio = AudioSegment.from_file(input_path)
except Exception as e:
raise RuntimeError(f"无法加载音频文件: {str(e)}。请确认文件未损坏且ffmpeg支持该格式。")
max_duration = ConfigManager().get("max_audio_duration", 3600) * 1000
if len(audio) > max_duration:
return AudioProcessor._split_long_audio(audio, input_path, temp_dir)
return AudioProcessor._convert_single_audio(audio, input_path, temp_dir)
except Exception as e:
print(f"格式转换失败: {str(e)}")
return None
@staticmethod
def _split_long_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]:
chunks = split_on_silence(
audio,
min_silence_len=ConfigManager().get("min_silence_len", 1000),
silence_thresh=ConfigManager().get("silence_thresh", -40),
keep_silence=500
)
merged_chunks = []
current_chunk = AudioSegment.empty()
for chunk in chunks:
if len(current_chunk) + len(chunk) < 5 * 60 * 1000: # 5分钟
current_chunk += chunk
else:
if len(current_chunk) > 0: merged_chunks.append(current_chunk)
current_chunk = chunk
if len(current_chunk) > 0: merged_chunks.append(current_chunk)
wav_paths = []
sample_rate = ConfigManager().get("sample_rate", 16000)
for i, chunk in enumerate(merged_chunks):
chunk = chunk.set_frame_rate(sample_rate).set_channels(1)
chunk_path = os.path.join(temp_dir, f"{os.path.splitext(os.path.basename(input_path))[0]}_part{i + 1}.wav")
chunk.export(chunk_path, format="wav")
wav_paths.append(chunk_path)
return wav_paths
@staticmethod
def _convert_single_audio(audio: AudioSegment, input_path: str, temp_dir: str) -> List[str]:
sample_rate = ConfigManager().get("sample_rate", 16000)
audio = audio.set_frame_rate(sample_rate).set_channels(1)
wav_path = os.path.join(temp_dir, os.path.splitext(os.path.basename(input_path))[0] + ".wav")
audio.export(wav_path, format="wav")
return [wav_path]
@staticmethod
def extract_features_from_audio(y: np.ndarray, sr: int) -> Dict[str, float]:
try:
duration = librosa.get_duration(y=y, sr=sr)
segment_length = 60
total_segments = max(1, int(np.ceil(duration / segment_length)))
syllable_rates, volume_stabilities = [], []
total_samples = len(y)
samples_per_segment = int(segment_length * sr)
for i in range(total_segments):
start = i * samples_per_segment
end = min((i + 1) * samples_per_segment, total_samples)
y_segment = y[start:end]
if len(y_segment) == 0: continue
intervals = librosa.effects.split(y_segment, top_db=20)
speech_samples = sum(end - start for start, end in intervals)
speech_duration = speech_samples / sr
syllable_rates.append(len(intervals) / speech_duration if speech_duration > 0.1 else 0)
rms = librosa.feature.rms(y=y_segment, frame_length=2048, hop_length=512)[0]
if len(rms) > 0 and np.mean(rms) > 0:
volume_stabilities.append(np.std(rms) / np.mean(rms))
return {
"duration": duration,
"syllable_rate": round(np.mean([r for r in syllable_rates if r > 0]) if syllable_rates else 0, 2),
"volume_stability": round(np.mean(volume_stabilities) if volume_stabilities else 0, 4)
}
except Exception as e:
print(f"特征提取错误: {str(e)}")
return {"duration": 0, "syllable_rate": 0, "volume_stability": 0}
# ====================== 模型加载器 ======================
class ModelLoader:
asr_pipeline = None
sentiment_model = None
sentiment_tokenizer = None
model_lock = RLock()
models_loaded = False
@classmethod
def load_models(cls):
config = ConfigManager()
# 先检查模型路径是否有效
paths_valid, errors = config.check_model_paths()
if not paths_valid:
raise ValueError(f"模型路径无效:\n{chr(10).join(errors)}")
if not cls.asr_pipeline:
with cls.model_lock:
if not cls.asr_pipeline:
cls._load_asr_model(config.get("model_paths")["asr"])
if not cls.sentiment_model:
with cls.model_lock:
if not cls.sentiment_model:
cls._load_sentiment_model(config.get("model_paths")["sentiment"])
cls.models_loaded = True
@classmethod
def reload_models(cls):
with cls.model_lock:
cls.asr_pipeline = None
cls.sentiment_model = None
cls.sentiment_tokenizer = None
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
cls.load_models()
@classmethod
def _load_asr_model(cls, model_path: str):
try:
if not os.path.exists(model_path):
raise FileNotFoundError(f"ASR模型路径不存在: {model_path}")
asr_kwargs = {'quantize': 'int8'} if hasattr(torch, 'quantization') else {}
cls.asr_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model=model_path,
device='cuda' if torch.cuda.is_available() else 'cpu',** asr_kwargs
)
except Exception as e:
print(f"加载ASR模型失败: {str(e)}")
raise
@classmethod
def _load_sentiment_model(cls, model_path: str):
try:
if not os.path.exists(model_path):
raise FileNotFoundError(f"情感分析模型路径不存在: {model_path}")
cls.sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_path)
cls.sentiment_tokenizer = AutoTokenizer.from_pretrained(model_path)
if torch.cuda.is_available(): cls.sentiment_model = cls.sentiment_model.cuda()
except Exception as e:
print(f"加载情感分析模型失败: {str(e)}")
raise
# ====================== 核心分析线程(简化版) ======================
class AnalysisThread(QThread):
progress_updated = pyqtSignal(int, str, str)
result_ready = pyqtSignal(dict)
finished_all = pyqtSignal()
error_occurred = pyqtSignal(str, str)
memory_warning = pyqtSignal()
resource_cleanup = pyqtSignal()
def __init__(self, audio_paths: List[str], temp_dir: str = "temp_wav"):
super().__init__()
self.audio_paths = audio_paths
self.temp_dir = temp_dir
self.is_running = True
self.current_file = ""
self.max_concurrent = min(ConfigManager().get("max_concurrent", 1), self._get_max_concurrent_tasks())
self.resource_monitor = ResourceMonitor()
self.semaphore = Semaphore(self.max_concurrent)
os.makedirs(temp_dir, exist_ok=True)
def run(self):
try:
# 检查ffmpeg是否可用
ffmpeg_available, ffmpeg_msg = check_ffmpeg_available()
if not ffmpeg_available:
self.error_occurred.emit("音频处理依赖缺失", f"无法处理音频: {ffmpeg_msg}\n\n请安装ffmpeg并确保其在系统PATH中。\nWindows用户可从https://ffmpeg.org/download.html下载并添加到环境变量。")
return
if not ModelLoader.models_loaded:
self.error_occurred.emit("模型未加载", "请等待模型加载完成后再开始分析")
return
self.progress_updated.emit(0, f"最大并行任务数: {self.max_concurrent}", "")
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
future_to_path = {}
for path in self.audio_paths:
if not self.is_running: break
self.semaphore.acquire()
future = executor.submit(self.analyze_audio, path, self._get_available_batch_size())
future_to_path[future] = path
future.add_done_callback(lambda f: self.semaphore.release())
for i, future in enumerate(concurrent.futures.as_completed(future_to_path)):
if not self.is_running: break
path = future_to_path[future]
self.current_file = os.path.basename(path)
if self._check_memory_usage():
self.memory_warning.emit()
self.is_running = False
break
try:
result = future.result()
if result:
self.result_ready.emit(result)
progress = int((i + 1) / len(self.audio_paths) * 100)
self.progress_updated.emit(progress,
f"完成: {self.current_file} ({i + 1}/{len(self.audio_paths)})",
self.current_file)
except Exception as e:
result = {"file_name": self.current_file, "status": "error", "error": f"分析失败: {str(e)}"}
self.result_ready.emit(result)
if self.is_running: self.finished_all.emit()
except Exception as e:
self.error_occurred.emit("系统错误", str(e))
traceback.print_exc()
finally:
self.resource_cleanup.emit()
self._cleanup_resources()
def analyze_audio(self, audio_path: str, batch_size: int) -> Dict:
result = {"file_name": os.path.basename(audio_path), "status": "processing"}
wav_paths = []
try:
wav_paths = AudioProcessor.convert_to_wav(audio_path, self.temp_dir)
if not wav_paths:
result["error"] = "格式转换失败,请检查文件是否损坏或格式是否支持"
result["status"] = "error"
return result
audio_features = self._extract_audio_features(wav_paths)
result.update(audio_features)
result["duration_str"] = self._format_duration(audio_features["duration"])
all_segments, full_text = self._process_asr_segments(wav_paths)
agent_segments, customer_segments = self._identify_speakers(all_segments)
result["asr_text"] = self._generate_labeled_text(all_segments, agent_segments, customer_segments).strip()
text_analysis = self._analyze_text(agent_segments, customer_segments, batch_size)
result.update(text_analysis)
service_check = self._check_service_rules(agent_segments)
result.update(service_check)
result["issue_resolved"] = self._check_issue_resolution(customer_segments, agent_segments)
result["status"] = "success"
except Exception as e:
result["error"] = f"分析失败: {str(e)}"
result["status"] = "error"
finally:
self._cleanup_temp_files(wav_paths)
self._cleanup_resources()
return result
def _identify_speakers(self, segments: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
"""使用四层逻辑识别客服"""
if not segments:
return [], []
# 逻辑1:前三片段开场白关键词
agent_id = self._identify_by_opening(segments)
# 逻辑2:后三片段结束语关键词
if agent_id is None:
agent_id = self._identify_by_closing(segments)
# 逻辑3:称呼与敬语关键词
if agent_id is None:
agent_id = self._identify_by_salutation(segments)
# 逻辑4:安抚语关键词
if agent_id is None:
agent_id = self._identify_by_reassurance(segments)
# 后备策略:说话模式识别
if agent_id is None and len(segments) >= 4:
agent_id = self._identify_by_speech_patterns(segments)
if agent_id is None:
# 最后手段:选择说话最多的说话人
spk_counts = {}
for seg in segments:
spk_id = seg["spk_id"]
spk_counts[spk_id] = spk_counts.get(spk_id, 0) + 1
agent_id = max(spk_counts, key=spk_counts.get) if spk_counts else None
if agent_id is None:
return [], []
return (
[seg for seg in segments if seg["spk_id"] == agent_id],
[seg for seg in segments if seg["spk_id"] != agent_id]
)
def _identify_by_opening(self, segments: List[Dict]) -> Optional[str]:
"""逻辑1:前三片段开场白关键词"""
keywords = DialectProcessor.KEYWORDS["opening"]
for seg in segments[:3]:
if any(kw in seg["text"] for kw in keywords):
return seg["spk_id"]
return None
def _identify_by_closing(self, segments: List[Dict]) -> Optional[str]:
"""逻辑2:后三片段结束语关键词"""
keywords = DialectProcessor.KEYWORDS["closing"]
last_segments = segments[-3:] if len(segments) >= 3 else segments
for seg in reversed(last_segments):
if any(kw in seg["text"] for kw in keywords):
return seg["spk_id"]
return None
def _identify_by_salutation(self, segments: List[Dict]) -> Optional[str]:
"""逻辑3:称呼与敬语关键词"""
keywords = DialectProcessor.KEYWORDS["salutation"]
for seg in segments:
if any(kw in seg["text"] for kw in keywords):
return seg["spk_id"]
return None
def _identify_by_reassurance(self, segments: List[Dict]) -> Optional[str]:
"""逻辑4:安抚语关键词"""
keywords = DialectProcessor.KEYWORDS["reassurance"]
for seg in segments:
if any(kw in seg["text"] for kw in keywords):
return seg["spk_id"]
return None
def _identify_by_speech_patterns(self, segments: List[Dict]) -> Optional[str]:
"""后备策略:说话模式识别"""
speaker_features = {}
for seg in segments:
spk_id = seg["spk_id"]
if spk_id not in speaker_features:
speaker_features[spk_id] = {"total_duration": 0.0, "turn_count": 0, "question_count": 0}
features = speaker_features[spk_id]
features["total_duration"] += (seg["end"] - seg["start"])
features["turn_count"] += 1
if any(q_word in seg["text"] for q_word in ["吗", "呢", "?", "?", "如何", "怎样"]):
features["question_count"] += 1
if speaker_features:
max_duration = max(f["total_duration"] for f in speaker_features.values())
question_rates = {spk_id: f["question_count"] / f["turn_count"] for spk_id, f in speaker_features.items()}
candidates = []
for spk_id, features in speaker_features.items():
score = (0.6 * (features["total_duration"] / max_duration) + 0.4 * question_rates[spk_id])
candidates.append((spk_id, score))
return max(candidates, key=lambda x: x[1])[0]
return None
def _analyze_text(self, agent_segments: List[Dict], customer_segments: List[Dict], batch_size: int) -> Dict:
"""优化情感分析方法"""
def split_long_sentences(texts: List[str]) -> List[str]:
splitted = []
for text in texts:
if len(text) > 128:
parts = re.split(r'(?<=[。!?;,])', text)
current = ""
for part in parts:
if len(current) + len(part) < 128:
current += part
else:
if current: splitted.append(current)
current = part
if current: splitted.append(current)
else:
splitted.append(text)
return splitted
def enhance_with_keywords(texts: List[str]) -> List[str]:
enhanced = []
emotion_keywords = {
"positive": ["满意", "高兴", "感谢", "专业", "解决", "帮助", "谢谢", "很好", "不错"],
"negative": ["生气", "愤怒", "不满", "投诉", "问题", "失望", "差劲", "糟糕", "投诉"],
"neutral": ["了解", "明白", "知道", "确认", "查询", "记录", "需要", "提供"]
}
for text in texts:
found_emotion = None
for emotion, keywords in emotion_keywords.items():
if any(kw in text for kw in keywords):
found_emotion = emotion
break
if found_emotion:
enhanced.append(f"[{found_emotion}] {text}")
else:
enhanced.append(text)
return enhanced
# 分析单个说话者
def analyze_speaker(segments: List[Dict], speaker_type: str) -> Dict:
if not segments:
return {
f"{speaker_type}_negative": 0.0,
f"{speaker_type}_neutral": 1.0,
f"{speaker_type}_positive": 0.0,
f"{speaker_type}_emotions": "无"
}
texts = [seg["text"] for seg in segments]
processed_texts = DialectProcessor.preprocess_text(texts)
splitted_texts = split_long_sentences(processed_texts)
enhanced_texts = enhance_with_keywords(splitted_texts)
with ModelLoader.model_lock:
inputs = ModelLoader.sentiment_tokenizer(
enhanced_texts, padding=True, truncation=True, max_length=128, return_tensors="pt"
)
dataset = TensorDataset(inputs['input_ids'], inputs['attention_mask'])
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
device = "cuda" if torch.cuda.is_available() else "cpu"
sentiment_dist = []
emotions = []
for batch in dataloader:
input_ids, attention_mask = batch
inputs = {'input_ids': input_ids.to(device), 'attention_mask': attention_mask.to(device)}
with torch.no_grad():
outputs = ModelLoader.sentiment_model(**inputs)
batch_probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
sentiment_dist.append(batch_probs.cpu())
emotion_keywords = ["愤怒", "生气", "鬼火", "不耐烦", "搞哪样嘛", "恼火", "背时", "失望", "不满"]
for text in enhanced_texts:
if any(kw in text for kw in emotion_keywords):
if any(kw in text for kw in ["愤怒", "生气", "鬼火", "恼火"]):
emotions.append("愤怒")
elif any(kw in text for kw in ["不耐烦", "搞哪样嘛"]):
emotions.append("不耐烦")
elif "背时" in text:
emotions.append("沮丧")
elif any(kw in text for kw in ["失望", "不满"]):
emotions.append("失望")
if sentiment_dist:
all_probs = torch.cat(sentiment_dist, dim=0)
avg_sentiment = torch.mean(all_probs, dim=0).tolist()
else:
avg_sentiment = [0.0, 1.0, 0.0]
return {
f"{speaker_type}_negative": round(avg_sentiment[0], 4),
f"{speaker_type}_neutral": round(avg_sentiment[1], 4),
f"{speaker_type}_positive": round(avg_sentiment[2], 4),
f"{speaker_type}_emotions": ",".join(set(emotions)) if emotions else "无"
}
return {** analyze_speaker(agent_segments, "agent"),
**analyze_speaker(customer_segments, "customer")
}
def _check_service_rules(self, agent_segments: List[Dict]) -> Dict:
keywords = DialectProcessor.KEYWORDS
found_forbidden = []
found_opening = any(kw in seg["text"] for seg in agent_segments[:3] for kw in keywords["opening"])
found_closing = any(
kw in seg["text"] for seg in (agent_segments[-3:] if len(agent_segments) >= 3 else agent_segments) for kw in
keywords["closing"])
for seg in agent_segments:
for kw in keywords["forbidden"]:
if kw in seg["text"]:
found_forbidden.append(kw)
break
return {
"opening_found": found_opening,
"closing_found": found_closing,
"forbidden_words": ", ".join(set(found_forbidden)) if found_forbidden else "无"
}
def _check_issue_resolution(self, customer_segments: List[Dict], agent_segments: List[Dict]) -> bool:
if not customer_segments or not agent_segments:
return False
resolution_keywords = ["解决", "处理", "完成", "已", "好了", "可以了", "没问题", "明白", "清楚", "满意", "行"]
unresolved_keywords = ["没解决", "不行", "不对", "还是", "仍然", "再", "未", "无法", "不能", "不行", "不满意"]
negation_words = ["不", "没", "未", "非", "无"]
gratitude_keywords = ["谢谢", "感谢", "多谢", "麻烦", "辛苦", "有劳"]
full_conversation = " ".join(seg["text"] for seg in customer_segments + agent_segments)
last_customer_text = customer_segments[-1]["text"]
for kw in unresolved_keywords:
if kw in full_conversation:
negation_context = re.search(rf".{{0,5}}{kw}", full_conversation)
if negation_context:
context = negation_context.group(0)
if not any(neg in context for neg in negation_words):
return False
else:
return False
if any(kw in last_customer_text for kw in gratitude_keywords):
if not any(neg + kw in last_customer_text for neg in negation_words):
return True
for agent_text in [seg["text"] for seg in agent_segments[-3:]]:
if any(kw in agent_text for kw in resolution_keywords):
if not any(neg in agent_text for neg in negation_words):
return True
for cust_seg in customer_segments[-2:]:
if any(kw in cust_seg["text"] for kw in ["好", "行", "可以", "明白"]):
if not any(neg in cust_seg["text"] for neg in negation_words):
return True
if any("?" in seg["text"] or "?" in seg["text"] for seg in customer_segments[-2:]):
return False
return False
# ====================== 辅助方法 ======================
def _get_available_batch_size(self) -> int:
if not torch.cuda.is_available(): return 4
total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
per_task_mem = total_mem / self.max_concurrent
return 2 if per_task_mem < 2 else 4 if per_task_mem < 4 else 8
def _get_max_concurrent_tasks(self) -> int:
if torch.cuda.is_available():
total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
return 1 if total_mem < 6 else 2 if total_mem < 12 else 3
return max(1, os.cpu_count() // 2)
def _check_memory_usage(self) -> bool:
try:
mem_percent = self.resource_monitor.memory_percent()
return mem_percent.get("cpu", 0) > 85 or mem_percent.get("gpu", 0) > 85
except:
return False
def _extract_audio_features(self, wav_paths: List[str]) -> Dict[str, float]:
combined_y = np.array([], dtype=np.float32)
sr = ConfigManager().get("sample_rate", 16000)
for path in wav_paths:
y, _ = librosa.load(path, sr=sr)
combined_y = np.concatenate((combined_y, y))
return AudioProcessor.extract_features_from_audio(combined_y, sr)
def _process_asr_segments(self, wav_paths: List[str]) -> Tuple[List[Dict], str]:
segments = []
full_text = ""
batch_size = min(4, len(wav_paths), self._get_available_batch_size())
for i in range(0, len(wav_paths), batch_size):
if not self.is_running: break
batch_paths = wav_paths[i:i + batch_size]
try:
results = ModelLoader.asr_pipeline(batch_paths, output_dir=None, batch_size=batch_size)
for result in results:
for seg in result[0]["sentences"]:
segments.append({
"start": seg["start"],
"end": seg["end"],
"text": seg["text"],
"spk_id": seg.get("spk_id", "0")
})
full_text += seg["text"] + " "
except Exception as e:
print(f"ASR批处理错误: {str(e)}")
for path in batch_paths:
try:
result = ModelLoader.asr_pipeline(path, output_dir=None)
for seg in result[0]["sentences"]:
segments.append({
"start": seg["start"],
"end": seg["end"],
"text": seg["text"],
"spk_id": seg.get("spk_id", "0")
})
full_text += seg["text"] + " "
except:
continue
return segments, full_text.strip()
def _generate_labeled_text(self, all_segments: List[Dict], agent_segments: List[Dict],
customer_segments: List[Dict]) -> str:
agent_spk_id = agent_segments[0]["spk_id"] if agent_segments else None
customer_spk_id = customer_segments[0]["spk_id"] if customer_segments else None
labeled_text = []
for seg in all_segments:
if seg["spk_id"] == agent_spk_id:
speaker = "客服"
elif seg["spk_id"] == customer_spk_id:
speaker = "客户"
else:
speaker = f"说话人{seg['spk_id']}"
labeled_text.append(f"[{speaker}]: {seg['text']}")
return "\n".join(labeled_text)
def _cleanup_temp_files(self, paths: List[str]):
def safe_remove(path):
if os.path.exists(path):
try:
os.remove(path)
except:
pass
for path in paths: safe_remove(path)
now = time.time()
for file in os.listdir(self.temp_dir):
file_path = os.path.join(self.temp_dir, file)
if os.path.isfile(file_path) and (now - os.path.getmtime(file_path)) > 3600:
safe_remove(file_path)
def _format_duration(self, seconds: float) -> str:
minutes, seconds = divmod(int(seconds), 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
def _cleanup_resources(self):
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
def stop(self):
self.is_running = False
# ====================== 模型加载线程 ======================
class ModelLoadThread(QThread):
progress_updated = pyqtSignal(int, str)
finished = pyqtSignal(bool, str)
def run(self):
try:
config = ConfigManager()
# 先检查模型路径是否有效
paths_valid, errors = config.check_model_paths()
if not paths_valid:
self.finished.emit(False, f"模型路径无效:\n{chr(10).join(errors)}")
return
self.progress_updated.emit(20, "加载语音识别模型...")
ModelLoader._load_asr_model(config.get("model_paths")["asr"])
self.progress_updated.emit(60, "加载情感分析模型...")
ModelLoader._load_sentiment_model(config.get("model_paths")["sentiment"])
self.progress_updated.emit(100, "模型加载完成")
self.finished.emit(True, "模型加载成功")
except Exception as e:
self.finished.emit(False, f"模型加载失败: {str(e)}")
# ====================== GUI主界面(简化版) ======================
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("贵州方言客服质检系统")
self.setGeometry(100, 100, 1200, 800)
self.setup_ui()
self.setup_menu()
self.analysis_thread = None
self.model_load_thread = None
self.temp_dir = "temp_wav"
os.makedirs(self.temp_dir, exist_ok=True)
self.model_loaded = False
# 初始化时检查依赖和模型配置
self.check_initial_setup()
def setup_ui(self):
main_widget = QWidget()
main_layout = QVBoxLayout()
main_widget.setLayout(main_layout)
self.setCentralWidget(main_widget)
toolbar = QToolBar("主工具栏")
self.addToolBar(toolbar)
actions = [
("添加文件", "icons/add.png", self.add_files),
("开始分析", "icons/start.png", self.start_analysis),
("停止分析", "icons/stop.png", self.stop_analysis),
("设置", "icons/settings.png", self.open_settings)
]
for name, icon, func in actions:
action = QAction(QIcon(icon), name, self)
action.triggered.connect(func)
toolbar.addAction(action)
splitter = QSplitter(Qt.Horizontal)
main_layout.addWidget(splitter)
left_widget = QWidget()
left_layout = QVBoxLayout()
left_widget.setLayout(left_layout)
left_layout.addWidget(QLabel("待分析文件列表"))
self.file_list = QListWidget()
self.file_list.setSelectionMode(QListWidget.ExtendedSelection)
left_layout.addWidget(self.file_list)
right_widget = QWidget()
right_layout = QVBoxLayout()
right_widget.setLayout(right_layout)
right_layout.addWidget(QLabel("分析进度"))
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 100)
right_layout.addWidget(self.progress_bar)
self.current_file_label = QLabel("当前文件: 无")
right_layout.addWidget(self.current_file_label)
self.tab_widget = QTabWidget()
right_layout.addWidget(self.tab_widget, 1)
text_tab = QWidget()
text_layout = QVBoxLayout()
text_tab.setLayout(text_layout)
self.text_result = QTextEdit()
self.text_result.setReadOnly(True)
text_layout.addWidget(self.text_result)
self.tab_widget.addTab(text_tab, "文本结果")
detail_tab = QWidget()
detail_layout = QVBoxLayout()
detail_tab.setLayout(detail_layout)
self.result_table = QTableWidget()
self.result_table.setColumnCount(10)
self.result_table.setHorizontalHeaderLabels([
"文件名", "时长", "语速", "音量稳定性", "客服情感", "客户情感",
"开场白", "结束语", "禁用词", "问题解决"
])
self.result_table.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch)
detail_layout.addWidget(self.result_table)
self.tab_widget.addTab(detail_tab, "详细结果")
splitter.addWidget(left_widget)
splitter.addWidget(right_widget)
splitter.setSizes([300, 900])
def setup_menu(self):
menu_bar = self.menuBar()
file_menu = menu_bar.addMenu("文件")
file_actions = [
("添加文件", self.add_files),
("导出结果", self.export_results),
("退出", self.close)
]
for name, func in file_actions:
action = QAction(name, self)
action.triggered.connect(func)
file_menu.addAction(action)
analysis_menu = menu_bar.addMenu("分析")
analysis_actions = [
("开始分析", self.start_analysis),
("停止分析", self.stop_analysis)
]
for name, func in analysis_actions:
action = QAction(name, self)
action.triggered.connect(func)
analysis_menu.addAction(action)
settings_menu = menu_bar.addMenu("设置")
settings_actions = [
("系统配置", self.open_settings),
("加载模型", self.load_models)
]
for name, func in settings_actions:
action = QAction(name, self)
action.triggered.connect(func)
settings_menu.addAction(action)
def check_initial_setup(self):
"""检查初始设置,包括依赖和模型路径"""
# 检查ffmpeg
ffmpeg_available, ffmpeg_msg = check_ffmpeg_available()
if not ffmpeg_available:
QMessageBox.critical(
self,
"音频处理依赖缺失",
f"无法处理音频: {ffmpeg_msg}\n\n请安装ffmpeg并确保其在系统PATH中。\nWindows用户可从https://ffmpeg.org/download.html下载并添加到环境变量。"
)
# 检查模型路径
config = ConfigManager()
paths_valid, errors = config.check_model_paths()
if not paths_valid:
msg = QMessageBox()
msg.setIcon(QMessageBox.Warning)
msg.setText("模型路径配置不正确")
msg.setInformativeText(f"检测到以下问题:\n{chr(10).join(errors)}\n\n是否现在进行配置?")
msg.setWindowTitle("配置模型路径")
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
if msg.exec_() == QMessageBox.Yes:
self.open_settings()
def add_files(self):
files, _ = QFileDialog.getOpenFileNames(
self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.m4a)"
)
for file in files: self.file_list.addItem(file)
def start_analysis(self):
# 先检查ffmpeg是否可用
ffmpeg_available, ffmpeg_msg = check_ffmpeg_available()
if not ffmpeg_available:
QMessageBox.critical(
self,
"音频处理依赖缺失",
f"无法开始分析: {ffmpeg_msg}\n\n请安装ffmpeg并确保其在系统PATH中。\nWindows用户可从https://ffmpeg.org/download.html下载并添加到环境变量。"
)
return
if self.file_list.count() == 0:
QMessageBox.warning(self, "警告", "请先添加要分析的音频文件")
return
# 检查模型路径
config = ConfigManager()
paths_valid, errors = config.check_model_paths()
if not paths_valid:
msg = QMessageBox()
msg.setIcon(QMessageBox.Warning)
msg.setText("模型路径配置不正确")
msg.setInformativeText(f"检测到以下问题:\n{chr(10).join(errors)}\n\n是否现在进行配置?")
msg.setWindowTitle("配置模型路径")
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
if msg.exec_() == QMessageBox.Yes:
self.open_settings()
# 再次检查配置
paths_valid, _ = config.check_model_paths()
if not paths_valid:
return
else:
return
if not self.model_loaded:
# 询问是否加载模型
reply = QMessageBox.question(
self,
"模型未加载",
"模型尚未加载,是否立即加载?",
QMessageBox.Yes | QMessageBox.No,
QMessageBox.Yes
)
if reply == QMessageBox.Yes:
self.load_models()
# 等待模型加载完成
return
else:
return
audio_paths = [self.file_list.item(i).text() for i in range(self.file_list.count())]
self.text_result.clear()
self.result_table.setRowCount(0)
self.analysis_thread = AnalysisThread(audio_paths, self.temp_dir)
self.analysis_thread.progress_updated.connect(self.update_progress)
self.analysis_thread.result_ready.connect(self.handle_result)
self.analysis_thread.finished_all.connect(self.analysis_finished)
self.analysis_thread.error_occurred.connect(self.show_error)
self.analysis_thread.memory_warning.connect(self.handle_memory_warning)
self.analysis_thread.start()
def stop_analysis(self):
if self.analysis_thread and self.analysis_thread.isRunning():
self.analysis_thread.stop()
self.analysis_thread.wait()
QMessageBox.information(self, "信息", "分析已停止")
def load_models(self):
# 先检查模型路径
config = ConfigManager()
paths_valid, errors = config.check_model_paths()
if not paths_valid:
msg = QMessageBox()
msg.setIcon(QMessageBox.Warning)
msg.setText("模型路径配置不正确")
msg.setInformativeText(f"检测到以下问题:\n{chr(10).join(errors)}\n\n是否现在进行配置?")
msg.setWindowTitle("配置模型路径")
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
if msg.exec_() == QMessageBox.Yes:
self.open_settings()
# 再次检查配置
paths_valid, _ = config.check_model_paths()
if not paths_valid:
return
else:
return
if self.model_load_thread and self.model_load_thread.isRunning(): return
self.model_load_thread = ModelLoadThread()
self.model_load_thread.progress_updated.connect(lambda value, _: self.progress_bar.setValue(value))
self.model_load_thread.finished.connect(self.handle_model_load_result)
self.model_load_thread.start()
def update_progress(self, progress: int, message: str, current_file: str):
self.progress_bar.setValue(progress)
self.current_file_label.setText(f"当前文件: {current_file}")
def handle_result(self, result: Dict):
if result["status"] == "success":
self.text_result.append(
f"文件: {result['file_name']}\n状态: {result['status']}\n时长: {result['duration_str']}")
self.text_result.append(
f"语速: {result['syllable_rate']} 音节/秒\n音量稳定性: {result['volume_stability']}")
self.text_result.append(
f"客服情感: 负面({result['agent_negative']:.2%}) 中性({result['agent_neutral']:.2%}) 正面({result['agent_positive']:.2%})")
self.text_result.append(f"客服情绪: {result['agent_emotions']}")
self.text_result.append(
f"客户情感: 负面({result['customer_negative']:.2%}) 中性({result['customer_neutral']:.2%}) 正面({result['customer_positive']:.2%})")
self.text_result.append(f"客户情绪: {result['customer_emotions']}")
self.text_result.append(
f"开场白: {'有' if result['opening_found'] else '无'}\n结束语: {'有' if result['closing_found'] else '无'}")
self.text_result.append(
f"禁用词: {result['forbidden_words']}\n问题解决: {'是' if result['issue_resolved'] else '否'}")
self.text_result.append("\n=== 对话文本 ===\n" + result["asr_text"] + "\n" + "=" * 50 + "\n")
row = self.result_table.rowCount()
self.result_table.insertRow(row)
items = [
result["file_name"], result["duration_str"], str(result["syllable_rate"]),
str(result["volume_stability"]),
f"负:{result['agent_negative']:.2f} 中:{result['agent_neutral']:.2f} 正:{result['agent_positive']:.2f}",
f"负:{result['customer_negative']:.2f} 中:{result['customer_neutral']:.2f} 正:{result['customer_positive']:.2f}",
"是" if result["opening_found"] else "否", "是" if result["closing_found"] else "否",
result["forbidden_words"], "是" if result["issue_resolved"] else "否"
]
for col, text in enumerate(items):
item = QTableWidgetItem(text)
if col in [6, 7] and text == "否": item.setBackground(QColor(255, 200, 200))
if col == 8 and text != "无": item.setBackground(QColor(255, 200, 200))
if col == 9 and text == "否": item.setBackground(QColor(255, 200, 200))
self.result_table.setItem(row, col, item)
elif result["status"] == "error":
self.text_result.append(f"文件: {result['file_name']}\n状态: 错误\n原因: {result['error']}\n" + "=" * 50 + "\n")
def analysis_finished(self):
QMessageBox.information(self, "完成", "所有音频分析完成")
self.progress_bar.setValue(100)
def show_error(self, title: str, message: str):
QMessageBox.critical(self, title, message)
def handle_memory_warning(self):
QMessageBox.warning(self, "内存警告", "内存使用过高,分析已停止")
def handle_model_load_result(self, success: bool, message: str):
if success:
self.model_loaded = True
QMessageBox.information(self, "成功", message)
else:
QMessageBox.critical(self, "错误", message)
def open_settings(self):
settings_dialog = QDialog(self)
settings_dialog.setWindowTitle("系统设置")
settings_dialog.setFixedSize(500, 300)
layout = QVBoxLayout()
config = ConfigManager().get("model_paths")
settings = [
("ASR模型路径:", config["asr"], self.browse_directory),
("情感模型路径:", config["sentiment"], self.browse_directory)
]
for label, value, func in settings:
h_layout = QHBoxLayout()
h_layout.addWidget(QLabel(label))
line_edit = QLineEdit(value)
browse_btn = QPushButton("浏览...")
browse_btn.clicked.connect(lambda _, le=line_edit: func(le))
h_layout.addWidget(line_edit)
h_layout.addWidget(browse_btn)
layout.addLayout(h_layout)
spin_settings = [
("最大并发任务:", "max_concurrent", 1, 8),
("最大音频时长(秒):", "max_audio_duration", 60, 86400)
]
for label, key, min_val, max_val in spin_settings:
h_layout = QHBoxLayout()
h_layout.addWidget(QLabel(label))
spin_box = QSpinBox()
spin_box.setRange(min_val, max_val)
spin_box.setValue(ConfigManager().get(key, min_val))
h_layout.addWidget(spin_box)
layout.addLayout(h_layout)
button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
button_box.accepted.connect(settings_dialog.accept)
button_box.rejected.connect(settings_dialog.reject)
layout.addWidget(button_box)
settings_dialog.setLayout(layout)
if settings_dialog.exec_() == QDialog.Accepted:
# 保存模型路径配置
ConfigManager().set("model_paths", {
"asr": layout.itemAt(0).layout().itemAt(1).widget().text(),
"sentiment": layout.itemAt(1).layout().itemAt(1).widget().text()
})
# 保存其他配置
ConfigManager().set("max_concurrent", layout.itemAt(2).layout().itemAt(1).widget().value())
ConfigManager().set("max_audio_duration", layout.itemAt(3).layout().itemAt(1).widget().value())
# 重新加载模型
if self.model_loaded:
reply = QMessageBox.question(
self,
"配置已更新",
"模型路径已更改,是否立即重新加载模型?",
QMessageBox.Yes | QMessageBox.No,
QMessageBox.Yes
)
if reply == QMessageBox.Yes:
self.load_models()
def browse_directory(self, line_edit):
path = QFileDialog.getExistingDirectory(self, "选择目录")
if path: line_edit.setText(path)
def export_results(self):
if self.result_table.rowCount() == 0:
QMessageBox.warning(self, "警告", "没有可导出的结果")
return
path, _ = QFileDialog.getSaveFileName(self, "保存结果", "", "CSV文件 (*.csv)")
if not path: return
try:
with open(path, "w", encoding="utf-8") as f:
headers = [self.result_table.horizontalHeaderItem(col).text() for col in
range(self.result_table.columnCount())]
f.write(",".join(headers) + "\n")
for row in range(self.result_table.rowCount()):
row_data = [self.result_table.item(row, col).text() for col in
range(self.result_table.columnCount())]
# 处理包含逗号的文本
row_data = [f'"{data}"' if ',' in data else data for data in row_data]
f.write(",".join(row_data) + "\n")
QMessageBox.information(self, "成功", f"结果已导出到: {path}")
except Exception as e:
QMessageBox.critical(self, "错误", f"导出失败: {str(e)}")
def closeEvent(self, event):
if self.analysis_thread and self.analysis_thread.isRunning():
self.analysis_thread.stop()
self.analysis_thread.wait()
try:
for file in os.listdir(self.temp_dir):
file_path = os.path.join(self.temp_dir, file)
if os.path.isfile(file_path):
for _ in range(3):
try:
os.remove(file_path);
break
except:
time.sleep(0.1)
os.rmdir(self.temp_dir)
except:
pass
event.accept()
# ====================== 程序入口 ======================
if __name__ == "__main__":
torch.set_num_threads(4)
app = QApplication(sys.argv)
app.setStyle('Fusion')
window = MainWindow()
window.show()
sys.exit(app.exec_())
最新发布