result_charts

本文介绍了一个性能数据采集到入库关键节点处理效率的报表系统。该系统通过JSP页面展示采集任务的结果,包括网元名称、采集粒度及各处理阶段的时间消耗等详细信息,并提供了图表展示不同处理阶段的时间对比。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<%@ taglib prefix="c" uri="http://java.sun.com/jsp/jstl/core"%>
<%@ taglib prefix="fmt" uri="http://java.sun.com/jsp/jstl/fmt"%>
<%
    String path = request.getContextPath();
    String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<!DOCTYPE html>
<html lang="zh">
<head>
<base href="<%=basePath%>">
<!-- jsp文件头和头部 -->
<%@ include file="../admin/top.jsp"%>
</head>
<body>
    <div class="container-fluid" id="main-container">
        <div id="page-content" class="clearfix">
            <div class="row-fluid">
                <div class="row-fluid">
                    <div id="container" style="min-width: 700px; height: 500px"></div>
                    <!-- 报表  -->
                    <table id="table_report"
                        class="table table-striped table-bordered table-hover center">
                        <thead>
                            <tr class="center">
                                <th>序号</th>
                                <th>网元</th>
                                <th>采集粒度</th>
                                <th>收到消息(receive)</th>
                                <th>获取数据(process 2)</th>
                                <th>转换数据(process 3)</th>
                                <th>存储数据(process 5)</th>
                                <th>入库完成(process 7)</th>
                                <th>总时间</th>
                                <th class="center">错误信息</th>
                            </tr>
                        </thead>
                        <tbody>
                            <!-- 开始循环 -->
                            <c:choose>
                                <c:when test="${not empty pmResults}">
                                    <c:forEach items="${pmResults}" var="result" varStatus="res">
                                        <tr class="center">
                                            <td class='center' style="width: 30px;">${res.index+1}</td>
                                            <td class="center"><a
                                                href="task/pm_detail.do?pm_id=${PM_RESULT_ID} }">${result.NEID}</a></td>
                                            <td class="center">${result.TIME_SPAN}</td>
                                            <td class="center"><a style="cursor: pointer;"
                                                target="mainFrame"
                                                onclick="result(${result.PM_RESULT_ID},${res.index+1},'task/pm_detail.do?step=1')">${result.RECEIVE_REPORT_MSG_TIME} 分钟</a></td>
                                            <td class="center"><a style="cursor: pointer;"
                                                target="mainFrame"
                                                onclick="result(${result.PM_RESULT_ID},${res.index+1},'task/pm_detail.do?step=2')">${result.PROCESS_GET_FILE_TIME} 分钟</a></td>
                                            <td class="center"><a style="cursor: pointer;"
                                                target="mainFrame"
                                                onclick="result(${result.PM_RESULT_ID},${res.index+1},'task/pm_detail.do?step=3')">${result.PROCESS_PARSE_TIME} 分钟</a></td>
                                            <td class="center"><a style="cursor: pointer;"
                                                target="mainFrame"
                                                onclick="result(${result.PM_RESULT_ID},${res.index+1},'task/pm_detail.do?step=5')">${result.PROCESS_STORE_TIME} 分钟</a></td>
                                            <td class="center"><a style="cursor: pointer;"
                                                target="mainFrame"
                                                onclick="result(${result.PM_RESULT_ID},${res.index+1},'task/pm_detail.do?step=7')">${result.PROCESS_FINISH_TIME} 分钟</a></td>
                                            <td style="width: 80px;" class="center">${result.TOTAL_TIME} 分钟</td>
                                            <td style="width: 60px;" class="center"><a
                                                style="cursor: pointer;" target="mainFrame"
                                                onclick="result(${result.PM_RESULT_ID},${res.index+1},'task/pm_reslut_log.do?')">查看日志</a></td>
                                        </tr>
                                    </c:forEach>
                                </c:when>
                                <c:otherwise>
                                    <tr class="main_info">
                                        <td colspan="100" class="center">没有相关数据</td>
                                    </tr>
                                </c:otherwise>
                            </c:choose>
                        </tbody>
                    </table>
                    </form>
                </div>


                <!-- PAGE CONTENT ENDS HERE -->
            </div>
            <!--/row-->

        </div>
        <!--/#page-content-->
    </div>
    <!--/.fluid-container#main-container-->


    <!-- 引入 -->
    <script type="text/javascript">window.jQuery || document.write("<script src='static/js/jquery-1.9.1.min.js'>\x3C/script>");</script>
    <script src="static/js/bootstrap.min.js"></script>
    <script src="static/js/ace-elements.min.js"></script>
    <script src="static/js/ace.min.js"></script>
    <!-- 引入 -->



    <script type="text/javascript">
        {
        $(top.hangge());
        
        //检索
        function search(){
            top.jzts();
            $("#Form").submit();
        }
        
        
        //删除
        function del(taskId){
            if(confirm("确定要删除?")){ 
                top.jzts();
                var url = '<%=basePath%>task/delete.do?taskId='+taskId;
                $.get(url,function(data){
                    /* nextPage(${page.currentPage}); */
                });
            }
        }
        </script>

    <script type="text/javascript">
        
        //全选 (是/否)
        function selectAll(){
             var checklist = document.getElementsByName ("ids");
               if(document.getElementById("zcheckbox").checked){
               for(var i=0;i<checklist.length;i++){
                  checklist[i].checked = 1;
               } 
             }else{
              for(var j=0;j<checklist.length;j++){
                 checklist[j].checked = 0;
              }
             }
        }

        
        
        //批量操作
        function makeAll(msg){
            
            if(confirm(msg)){ 
                
                    var str = '';
                    for(var i=0;i < document.getElementsByName('ids').length;i++)
                    {
                          if(document.getElementsByName('ids')[i].checked){
                              if(str=='') str += document.getElementsByName('ids')[i].value;
                              else str += ',' + document.getElementsByName('ids')[i].value;
                          }
                    }
                    if(str==''){
                        alert("您没有选择任何内容!"); 
                        return;
                    }else{
                        if(msg == '确定要删除选中的数据吗?'){
                            top.jzts();
                            $.ajax({
                                type: "POST",
                                url: '<%=basePath%>task/deleteAll.do?tm='+new Date().getTime(),
                                data: {DATA_IDS:str},
                                dataType:'json',
                                //beforeSend: validateData,
                                cache: false,
                                success: function(data){
                                     $.each(data.list, function(i, list){
                                            /* nextPage(${page.currentPage}); */
                                     });
                                     }
                            });
                        }
                    }
            }
        
        //导出excel
        function toExcel(){
            window.location.href='<%=basePath%>task/excel.do';
        }
        
        //查看报表
        function result(id,number,url){
                top.mainFrame.tabAddHandler(id,"节点详细-"+number,url);
            if(url != "druid/index.html"){
                jzts();
            }
        }
        </script>

    <script type="text/javascript">
    var legendData=[];
    var series="";
    var ser="";
    var legend="";
    var option={};
    function opSeries(){
        option ={
                chart: {
                    type: 'bar'
                },  
                title: {
                         text: '性能数据采集到入库关键节点处理效率报表'
                     },
                  xAxis: {  
                      categories: legendData  
                  },  
                  yAxis: { 
                         title: {
                             text: '关键节点处理时间 (分钟)'
                         } 
                  },
                  plotOptions: {
                      line: {
                          dataLabels: {
                              enabled: true
                          },
                          enableMouseTracking: false
                      }
                  },
                  plotLines: [{
                      value: 0,
                      width: 1,
                      color: '#808080'
                  }],
              tooltip: {
                  valueSuffix: '分钟'
              },
              legend: {
                  layout: 'vertical',
                  align: 'right',
                  verticalAlign: 'middle',
                  borderWidth: 0
              },
                  series: []
          }
    }
    
  $(function () {
      var taskId = ${pd.taskId};
      $.ajax({
          type: "post",
          async: false, //同步执行
          url: "task/result_charts",
          data:{taskId:taskId},
          dataType: "json", //返回数据形式为json
          success: function (dataResult) {
            var pmResults =dataResult;
            opSeries(); 

            console.log("pmResults.length:"+ pmResults.length);
            for(i=0;pmResults.length>i;i++){
                 //给legendData赋值
                console.log("pmResults[i].categorys.length:"+ pmResults[i].categorys.length);
                  for(j=0;pmResults[i].categorys.length > j;j++){
                     legendData.push(pmResults[i].categorys[j]);
                    // console.log("categorys:"+pmResults[i].categorys[j]);
                    // console.log("legendData:"+legendData[i]);
                 }
                  
                 //给series赋值
                     console.log("pmResults[i].series.length:"+ pmResults[i].series.length);
                 for(k=0;pmResults[i].series.length > k;k++){
                     console.log("series:"+ pmResults[i].series[k].name);
                    // console.log("series:"+ pmResults[i].series[k].data);
                     
                     option.series[i] = pmResults[i].series[k];
                     //console.log("series:"+ pmResults[i].series[k]);
                 }
             } 
              
              
          },
          error: function (errorMsg) {
                 legendDate = ["中国","美国","英国"];
                 series =  [ { "name":"2014", "type":"bar", "data":[51, 2, 43]} ,{ "name":"2015", "type":"bar", "data":[55, 36, 4]},{ "name":"2016", "type":"bar", "data":[5, 20, 40]}];
                 ser = [ { "name":"2014", "type":"bar", "data":[51, 2, 43]} ,{ "name":"2015", "type":"bar", "data":[55, 36, 4]},{ "name":"2016", "type":"bar", "data":[5, 20, 40]}];
                 //showChart();
                 opSeries(); 
                 for(i=0;series.length>i;i++){
                     legendData.push(series[i].name);
                      option.series[i]=series[i];
                 }
          }
      }); 
      $("#container").highcharts(option); 
  });
  </script>
    <style type="text/css">
li {
    list-style-type: none;
}
</style>
    <ul class="navigationTabs">
        <li><a></a></li>
        <li></li>
    </ul>
</body>
</html>

 

帮我分析以下代码逻辑 import tkinter as tk from tkinter import ttk, filedialog, messagebox, scrolledtext from datetime import datetime, timedelta import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg import os import glob import sys import traceback import re import subprocess from openpyxl import Workbook from openpyxl.utils.dataframe import dataframe_to_rows from openpyxl.drawing.image import Image as XLImage import io import tempfile class CollapsibleFrame(ttk.Frame): """可折叠/展开的面板""" def __init__(self, parent, title="", *args, **kwargs): ttk.Frame.__init__(self, parent, *args, **kwargs) self.title = title self.is_expanded = tk.BooleanVar(value=True) # 标题栏 self.header = ttk.Frame(self) self.header.pack(fill="x", pady=(0, 5)) self.toggle_btn = ttk.Checkbutton( self.header, text=title, variable=self.is_expanded, command=self.toggle, style="Toggle.TButton" ) self.toggle_btn.pack(side="left", padx=5) # 内容区域 self.content = ttk.Frame(self) self.content.pack(fill="both", expand=True) # 初始状态 self.toggle() # 创建自定义样式 style = ttk.Style() style.configure("Toggle.TButton", font=("Arial", 10, "bold")) def toggle(self): """切换面板状态""" if self.is_expanded.get(): self.content.pack(fill="both", expand=True) else: self.content.pack_forget() class TensileAnalyzer: def __init__(self, root): self.root = root self.root.title("拉力数据分析系统") self.root.geometry("1200x900") self.root.state('zoomed') # 启动时最大化窗口 self.data = pd.DataFrame() self.all_data_details = [] # 存储所有详细数据 self.figures = [] # 存储图表对象 self.risk_files = [] # 存储有风险的文件路径 self.setup_ui() def setup_ui(self): # 主框架 main_frame = ttk.Frame(self.root) main_frame.pack(fill="both", expand=True, padx=10, pady=10) # 左侧控制面板 control_frame = ttk.Frame(main_frame, width=350) control_frame.pack(side="left", fill="y", padx=(0, 10)) # 文件选择区域 file_frame = ttk.LabelFrame(control_frame, text="数据源") file_frame.pack(fill="x", pady=5) # 文件夹选择 folder_frame = ttk.Frame(file_frame) folder_frame.pack(fill="x", pady=5) ttk.Label(folder_frame, text="数据文件夹:").pack(side="left", padx=5, pady=5) self.folder_entry = ttk.Entry(folder_frame, width=30) self.folder_entry.pack(side="left", padx=5, pady=5, fill="x", expand=True) ttk.Button(folder_frame, text="浏览", command=self.select_folder, width=8).pack(side="left", padx=5, pady=5) # 文件选择 file_sel_frame = ttk.Frame(file_frame) file_sel_frame.pack(fill="x", pady=5) ttk.Label(file_sel_frame, text="单个文件:").pack(side="left", padx=5, pady=5) self.file_entry = ttk.Entry(file_sel_frame, width=30) self.file_entry.pack(side="left", padx=5, pady=5, fill="x", expand=True) ttk.Button(file_sel_frame, text="浏览", command=self.select_file, width=8).pack(side="left", padx=5, pady=5) # 型号选择 model_frame = ttk.Frame(file_frame) model_frame.pack(fill="x", pady=5) ttk.Label(model_frame, text="产品型号:").pack(side="left", padx=(0, 5)) self.model_combobox = ttk.Combobox(model_frame, width=20) self.model_combobox.pack(side="left", padx=(0, 5)) ttk.Button(model_frame, text="刷新型号", command=self.refresh_models, width=8).pack(side="left") # 规格设置区域 spec_frame = ttk.LabelFrame(control_frame, text="规格设置") spec_frame.pack(fill="x", pady=5) ttk.Label(spec_frame, text="规格下限(LCL):").grid(row=0, column=0, padx=5, pady=5, sticky='w') self.lcl_entry = ttk.Entry(spec_frame, width=10) self.lcl_entry.grid(row=0, column=1, padx=5, pady=5, sticky='w') self.lcl_entry.insert(0, "180") # 默认值 ttk.Label(spec_frame, text="规格上限(USL):").grid(row=0, column=2, padx=5, pady=5, sticky='w') self.usl_entry = ttk.Entry(spec_frame, width=10) self.usl_entry.grid(row=0, column=3, padx=5, pady=5, sticky='w') self.usl_entry.insert(0, "250") # 默认值 # 风险阈值 ttk.Label(spec_frame, text="风险阈值(LCL+):").grid(row=1, column=0, padx=5, pady=5, sticky='w') self.risk_threshold_entry = ttk.Entry(spec_frame, width=10) self.risk_threshold_entry.grid(row=1, column=1, padx=5, pady=5, sticky='w') self.risk_threshold_entry.insert(0, "30") # 默认值 # 日期范围 date_frame = ttk.LabelFrame(control_frame, text="日期范围") date_frame.pack(fill="x", pady=5) ttk.Label(date_frame, text="开始日期:").grid(row=0, column=0, padx=5, pady=5, sticky='w') self.start_date = ttk.Entry(date_frame, width=12) self.start_date.grid(row=0, column=1, padx=5, pady=5, sticky='w') # 设置默认日期为60天前 default_start_date = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d") self.start_date.insert(0, default_start_date) ttk.Label(date_frame, text="结束日期:").grid(row=0, column=2, padx=5, pady=5, sticky='w') self.end_date = ttk.Entry(date_frame, width=12) self.end_date.grid(row=0, column=3, padx=5, pady=5, sticky='w') self.end_date.insert(0, datetime.now().strftime("%Y-%m-%d")) # 日期范围调整按钮 date_btn_frame = ttk.Frame(date_frame) date_btn_frame.grid(row=1, column=0, columnspan=4, pady=5) ttk.Button(date_btn_frame, text="最近7天", width=8, command=lambda: self.set_date_range(7)).pack(side="left", padx=2) ttk.Button(date_btn_frame, text="最近30天", width=8, command=lambda: self.set_date_range(30)).pack(side="left", padx=2) ttk.Button(date_btn_frame, text="最近60天", width=8, command=lambda: self.set_date_range(60)).pack(side="left", padx=2) ttk.Button(date_btn_frame, text="所有日期", width=8, command=self.set_all_dates).pack(side="left", padx=2) # 操作按钮区域 btn_frame = ttk.LabelFrame(control_frame, text="操作") btn_frame.pack(fill="x", pady=5) # 分析按钮 ttk.Button(btn_frame, text="分析数据", command=self.analyze_data, width=15).pack(pady=5) # 调试模式 self.debug_var = tk.BooleanVar(value=True) # 默认启用调试模式 self.debug_check = ttk.Checkbutton(btn_frame, text="调试模式(显示详细信息)", variable=self.debug_var) self.debug_check.pack(pady=2) # 详细信息按钮 ttk.Button(btn_frame, text="查看详细信息", command=self.show_details, width=15).pack(pady=5) # 保存按钮 ttk.Button(btn_frame, text="保存结果", command=self.save_results, width=15).pack(pady=5) # 右侧结果显示区域 result_frame = ttk.Frame(main_frame) result_frame.pack(side="right", fill="both", expand=True) # 可折叠的结果面板 self.result_panel = CollapsibleFrame(result_frame, title="分析结果") self.result_panel.pack(fill="both", expand=True, pady=5) # 结果显示区域 self.result_text = scrolledtext.ScrolledText( self.result_panel.content, height=15, width=80, font=("Consolas", 10) # 使用等宽字体 ) self.result_text.pack(fill="both", expand=True, padx=5, pady=5) # 风险文件区域 self.risk_frame = ttk.LabelFrame(self.result_panel.content, text="风险文件") self.risk_frame.pack(fill="x", padx=5, pady=5) # 图表面板 self.chart_panel = CollapsibleFrame(result_frame, title="分析图表") self.chart_panel.pack(fill="both", expand=True, pady=5) # 图表区域 self.canvas_frame = ttk.Frame(self.chart_panel.content) self.canvas_frame.pack(fill="both", expand=True, padx=5, pady=5) def set_date_range(self, days): """设置日期范围为最近N天""" end_date = datetime.now() start_date = end_date - timedelta(days=days) self.start_date.delete(0, tk.END) self.start_date.insert(0, start_date.strftime("%Y-%m-%d")) self.end_date.delete(0, tk.END) self.end_date.insert(0, end_date.strftime("%Y-%m-%d")) def set_all_dates(self): """设置日期范围为所有日期""" self.start_date.delete(0, tk.END) self.start_date.insert(0, "2000-01-01") # 足够早的日期 self.end_date.delete(0, tk.END) self.end_date.insert(0, datetime.now().strftime("%Y-%m-%d")) def select_folder(self): folder_path = filedialog.askdirectory() if folder_path: self.folder_entry.delete(0, tk.END) self.folder_entry.insert(0, folder_path) self.file_entry.delete(0, tk.END) # 清除单个文件选择 self.refresh_models() def select_file(self): file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xls;*.xlsx")]) if file_path: self.file_entry.delete(0, tk.END) self.file_entry.insert(0, file_path) self.folder_entry.delete(0, tk.END) # 清除文件夹选择 self.refresh_models() def refresh_models(self): folder_path = self.folder_entry.get() file_path = self.file_entry.get() # 清空风险文件显示 for widget in self.risk_frame.winfo_children(): widget.destroy() if folder_path: path_type = "文件夹" path = folder_path if not os.path.exists(folder_path): self.result_text.insert(tk.END, f"错误: 文件夹不存在: {folder_path}\n") return elif file_path: path_type = "文件" path = file_path if not os.path.exists(file_path): self.result_text.insert(tk.END, f"错误: 文件不存在: {file_path}\n") return else: return models = set() self.result_text.delete(1.0, tk.END) self.result_text.insert(tk.END, f"扫描{path_type}: {path}\n") file_paths = [] if folder_path: file_paths = glob.glob(os.path.join(folder_path, "*.xls*")) elif file_path: file_paths = [file_path] file_count = 0 for file_path in file_paths: file_count += 1 file_name = os.path.basename(file_path) self.result_text.insert(tk.END, f"发现文件: {file_name}\n") self.result_text.see(tk.END) self.root.update() try: # 尝试使用 openpyxl 读取所有 Excel 文件 try: df_meta = pd.read_excel(file_path, sheet_name='Sheet2', header=None, nrows=15, engine='openpyxl') except Exception as e: try: df_meta = pd.read_excel(file_path, sheet_name='Sheet2', header=None, nrows=15, engine='xlrd') except Exception as e2: self.result_text.insert(tk.END, f" 无法读取文件: {str(e2)}\n") continue # 查找规格型号行 - 更智能的搜索 model_value = None for i in range(len(df_meta)): # 检查是否包含"规格型号"或类似文本 if not pd.isna(df_meta.iloc[i, 0]) and "规格型号" in str(df_meta.iloc[i, 0]): if len(df_meta.columns) > 2: model_value = df_meta.iloc[i, 2] break # 检查是否包含"model"或类似文本 elif not pd.isna(df_meta.iloc[i, 0]) and any(word in str(df_meta.iloc[i, 0]).lower() for word in ["model", "type", "规格"]): if len(df_meta.columns) > 2: model_value = df_meta.iloc[i, 2] break if model_value is not None and pd.notna(model_value): model_str = str(model_value).strip() models.add(model_str) self.result_text.insert(tk.END, f" 发现型号: {model_str}\n") else: self.result_text.insert(tk.END, " 未找到型号信息\n") except Exception as e: self.result_text.insert(tk.END, f" 处理文件出错: {str(e)}\n") self.model_combobox['values'] = sorted(models) if models: self.model_combobox.current(0) self.result_text.insert(tk.END, f"\n找到 {len(models)} 个型号\n") else: self.result_text.insert(tk.END, "\n未找到任何型号信息\n") def validate_date(self, date_str): try: return datetime.strptime(date_str, "%Y-%m-%d") except ValueError: return None def parse_test_date(self, date_str): """多种日期格式解析""" date_str = str(date_str).strip() # 尝试常见格式 formats = [ "%Y%m%d", # 20250424 "%Y-%m-%d", # 2025-04-24 "%Y/%m/%d", # 2025/04/24 "%m/%d/%Y", # 04/24/2025 "%d-%m-%Y", # 24-04-2025 ] for fmt in formats: try: return datetime.strptime(date_str, fmt) except ValueError: continue # 尝试提取数字日期 digits = re.sub(r'\D', '', date_str) if len(digits) == 8: try: return datetime.strptime(digits, "%Y%m%d") except ValueError: pass return None def analyze_data(self): try: # 清除之前的图表和风险文件 for widget in self.canvas_frame.winfo_children(): widget.destroy() for widget in self.risk_frame.winfo_children(): widget.destroy() self.figures.clear() self.risk_files = [] # 重置风险文件列表 folder_path = self.folder_entry.get() file_path = self.file_entry.get() target_model = self.model_combobox.get().strip() start_date_str = self.start_date.get().strip() end_date_str = self.end_date.get().strip() debug_mode = self.debug_var.get() # 获取规格限 try: lcl = float(self.lcl_entry.get().strip()) except: messagebox.showerror("错误", "请输入有效的规格下限(LCL)") return try: usl = float(self.usl_entry.get().strip()) except: messagebox.showerror("错误", "请输入有效的规格上限(USL)") return # 获取风险阈值 try: risk_threshold = float(self.risk_threshold_entry.get().strip()) except: risk_threshold = 30 # 默认值 # 确定数据源 if folder_path: file_paths = glob.glob(os.path.join(folder_path, "*.xls*")) source_type = "文件夹" elif file_path: file_paths = [file_path] source_type = "文件" else: messagebox.showerror("错误", "请选择数据文件夹或文件") return if not os.path.exists(file_paths[0]): messagebox.showerror("错误", f"路径不存在: {file_paths[0]}") return if not target_model: messagebox.showerror("错误", "请选择产品型号") return start_date = self.validate_date(start_date_str) end_date = self.validate_date(end_date_str) if not start_date or not end_date: messagebox.showerror("错误", "请填写有效的日期范围 (格式: YYYY-MM-DD)") return # 收集所有文件数据 self.all_data_details = [] # 重置详细数据 all_data = [] # 仅拉力值 failure_counts = {'A': 0, 'B': 0, 'C': 0, 'D': 0} total_files = 0 processed_files = 0 matched_files = 0 included_files = 0 self.result_text.delete(1.0, tk.END) self.result_text.insert(tk.END, f"开始分析...\n目标型号: {target_model}\n") self.result_text.insert(tk.END, f"规格下限(LCL): {lcl} N, 规格上限(USL): {usl} N\n") self.result_text.insert(tk.END, f"日期范围: {start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}\n") self.result_text.insert(tk.END, f"数据源: {source_type} ({len(file_paths)}个文件)\n\n") for file_path in file_paths: total_files += 1 file_name = os.path.basename(file_path) self.result_text.insert(tk.END, f"处理文件: {file_name}\n") if debug_mode: self.result_text.see(tk.END) self.root.update() try: # 尝试使用 openpyxl 读取文件 try: df_meta = pd.read_excel(file_path, sheet_name='Sheet2', header=None, nrows=20, engine='openpyxl') except Exception as e: try: df_meta = pd.read_excel(file_path, sheet_name='Sheet2', header=None, nrows=20, engine='xlrd') except Exception as e2: self.result_text.insert(tk.END, f" 读取元数据失败: {str(e2)}\n") continue # 1. 获取型号 model_value = None model_row_index = None for i in range(len(df_meta)): if not pd.isna(df_meta.iloc[i, 0]) and "规格型号" in str(df_meta.iloc[i, 0]): if len(df_meta.columns) > 2: model_value = df_meta.iloc[i, 2] model_row_index = i break if model_value is None: self.result_text.insert(tk.END, " 未找到型号信息\n") continue model_str = str(model_value).strip() if debug_mode: self.result_text.insert(tk.END, f" 文件型号: {model_str}\n") if model_str != target_model: if debug_mode: self.result_text.insert(tk.END, f" 型号不匹配 (目标: {target_model})\n") continue matched_files += 1 # 2. 获取日期 test_date = None date_row_index = None for i in range(len(df_meta)): if not pd.isna(df_meta.iloc[i, 0]) and "试验日期" in str(df_meta.iloc[i, 0]): if len(df_meta.columns) > 2: date_value = df_meta.iloc[i, 2] if date_value is not None: test_date = self.parse_test_date(date_value) date_row_index = i break if test_date is None: # 尝试从文件名中提取日期 try: # 匹配文件名中的日期模式 (如 ACP-3S-2025.4.24.xls) match = re.search(r'(\d{4})[\.\-]?(\d{1,2})[\.\-]?(\d{1,2})', file_name) if match: year = int(match.group(1)) month = int(match.group(2)) day = int(match.group(3)) test_date = datetime(year, month, day) self.result_text.insert(tk.END, f" 从文件名解析日期: {test_date.strftime('%Y-%m-%d')}\n") except: self.result_text.insert(tk.END, " 未找到有效日期\n") continue if test_date is None: self.result_text.insert(tk.END, " 未找到有效日期\n") continue if debug_mode: self.result_text.insert(tk.END, f" 文件日期: {test_date.strftime('%Y-%m-%d')}\n") # 检查日期范围 if test_date < start_date or test_date > end_date: if debug_mode: self.result_text.insert(tk.END, f" 日期不在范围内 (要求: {start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')})\n") continue included_files += 1 # 3. 定位数据起始行 - 改进版本 data_start_row = None # 查找包含"序号"或"No"的行 for i in range(len(df_meta)): if not pd.isna(df_meta.iloc[i, 0]) and any(keyword in str(df_meta.iloc[i, 0]) for keyword in ["序号", "No", "样本"]): data_start_row = i + 1 break # 如果没找到,尝试查找数值起始行 if data_start_row is None: for i in range(len(df_meta)): if pd.api.types.is_number(df_meta.iloc[i, 0]) and not pd.isna(df_meta.iloc[i, 0]): data_start_row = i break if data_start_row is None: # 尝试基于型号行和日期行定位 if model_row_index is not None and date_row_index is not None: data_start_row = max(model_row_index, date_row_index) + 5 else: data_start_row = 10 # 默认值 if debug_mode: self.result_text.insert(tk.END, f" 数据起始行: {data_start_row}\n") # 4. 读取数据 try: # 尝试读取数据部分 df_data = pd.read_excel( file_path, sheet_name='Sheet2', skiprows=data_start_row, header=None, engine='openpyxl' ) except: try: df_data = pd.read_excel( file_path, sheet_name='Sheet2', skiprows=data_start_row, header=None, engine='xlrd' ) except Exception as e: self.result_text.insert(tk.END, f" 读取数据失败: {str(e)}\n") continue if debug_mode: self.result_text.insert(tk.END, f" 找到 {len(df_data)} 行数据\n") if len(df_data) > 0: self.result_text.insert(tk.END, f" 前3行数据预览:\n") for i in range(min(3, len(df_data))): self.result_text.insert(tk.END, f" 行 {i+1}: {df_data.iloc[i].values}\n") # 5. 处理数据 - 改进版本 sample_count = 0 file_has_risk = False # 标记文件是否有风险数据 for index, row in df_data.iterrows(): # 检查是否为有效数据行 if len(row) < 4: # 确保有足够的列 continue # 检查序号列是否为数字 try: if pd.isna(row[0]) or not isinstance(row[0], (int, float)): continue except: continue # 获取拉力值 (B列) force = row[1] if not pd.api.types.is_number(force) or pd.isna(force): continue # 获取失效模式 (D列) failure_mode = row[3] if pd.isna(failure_mode): failure_mode = None else: failure_mode = str(failure_mode).strip() if len(failure_mode) > 0: failure_mode = failure_mode[0] # 只取第一个字母 else: failure_mode = None # 检查是否低于风险阈值 if force < lcl + risk_threshold: file_has_risk = True # 添加到数据集 all_data.append(force) sample_count += 1 # 添加到详细数据 self.all_data_details.append({ '文件名称': file_name, '文件路径': file_path, # 保存完整路径用于风险文件打开 '测试日期': test_date.strftime('%Y-%m-%d'), '序号': row[0], '拉力值(N)': force, '失效模式': failure_mode }) # 统计失效模式 if failure_mode and failure_mode in failure_counts: failure_counts[failure_mode] += 1 # 如果有风险数据,添加到风险文件列表 if file_has_risk: self.risk_files.append(file_path) processed_files += 1 self.result_text.insert(tk.END, f" 成功读取 {sample_count} 个样本\n") except Exception as e: error_msg = f" 处理文件出错: {str(e)}" self.result_text.insert(tk.END, error_msg + "\n") if debug_mode: traceback.print_exc() continue # 结果汇总 self.result_text.insert(tk.END, "\n===== 分析结果汇总 =====\n") self.result_text.insert(tk.END, f"扫描文件总数: {total_files}\n") self.result_text.insert(tk.END, f"型号匹配文件数: {matched_files}\n") self.result_text.insert(tk.END, f"日期范围内文件数: {included_files}\n") self.result_text.insert(tk.END, f"成功处理文件数: {processed_files}\n") self.result_text.insert(tk.END, f"总样本数量: {len(all_data)}\n") if not all_data: # 给出调整日期范围的建议 self.result_text.insert(tk.END, "\n没有找到符合条件的数据,建议:\n") self.result_text.insert(tk.END, f"1. 调整日期范围(当前: {start_date_str} 至 {end_date_str})\n") self.result_text.insert(tk.END, "2. 使用'所有日期'按钮扩大范围\n") self.result_text.insert(tk.END, "3. 检查文件夹是否包含所需文件\n") return # 计算统计指标 data_series = pd.Series(all_data) mean_val = data_series.mean() median_val = data_series.median() std_val = data_series.std() min_val = data_series.min() max_val = data_series.max() # 计算过程能力 cp = (usl - lcl) / (6 * std_val) if std_val > 0 else 0 cpk = min((mean_val - lcl) / (3 * std_val), (usl - mean_val) / (3 * std_val)) if std_val > 0 else 0 # 计算失效模式百分比 total_failures = sum(failure_counts.values()) failure_percent = {k: (v / total_failures * 100) if total_failures > 0 else 0 for k, v in failure_counts.items()} # 显示详细结果 result = f""" === 详细分析结果 === 产品型号: {target_model} 规格范围: {lcl} N ~ {usl} N 日期范围: {start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')} 样本数量: {len(all_data)} 风险阈值: LCL + {risk_threshold} = {lcl + risk_threshold:.1f} N 统计指标: 最小值: {min_val:.2f} N 最大值: {max_val:.2f} N 平均值: {mean_val:.2f} N 中值: {median_val:.2f} N 标准差: {std_val:.2f} N 过程能力指数(CP): {cp:.3f} 过程能力指数(CPK): {cpk:.3f} 失效模式统计: A模式: {failure_counts['A']} 颗 ({failure_percent['A']:.1f}%) B模式: {failure_counts['B']} 颗 ({failure_percent['B']:.1f}%) C模式: {failure_counts['C']} 颗 ({failure_percent['C']:.1f}%) D模式: {failure_counts['D']} 颗 ({failure_percent['D']:.1f}%) """ self.result_text.insert(tk.END, result) # 显示风险文件 self.show_risk_files(lcl, risk_threshold) self.result_text.see(tk.END) # 滚动到底部 # 绘制图表 self.plot_data(data_series, target_model, lcl, usl, mean_val, std_val, risk_threshold) except Exception as e: error_msg = f"分析出错: {str(e)}" messagebox.showerror("错误", error_msg) self.result_text.insert(tk.END, f"\n{error_msg}") self.result_text.see(tk.END) traceback.print_exc() def show_risk_files(self, lcl, risk_threshold): """显示包含风险数据的文件""" # 清空风险文件区域 for widget in self.risk_frame.winfo_children(): widget.destroy() if not self.risk_files: ttk.Label(self.risk_frame, text="未检测到风险文件").pack(padx=5, pady=5) return # 显示风险文件标题 risk_title = ttk.Label( self.risk_frame, text=f"检测到 {len(self.risk_files)} 个文件包含拉力值 < LCL+{risk_threshold} = {lcl+risk_threshold:.1f}N 的数据:", font=("Arial", 10, "bold") ) risk_title.pack(anchor="w", padx=5, pady=5) # 创建滚动区域 risk_canvas = tk.Canvas(self.risk_frame, height=100) scrollbar = ttk.Scrollbar(self.risk_frame, orient="vertical", command=risk_canvas.yview) scrollable_frame = ttk.Frame(risk_canvas) scrollable_frame.bind( "<Configure>", lambda e: risk_canvas.configure(scrollregion=risk_canvas.bbox("all")) ) risk_canvas.create_window((0, 0), window=scrollable_frame, anchor="nw") risk_canvas.configure(yscrollcommand=scrollbar.set) risk_canvas.pack(side="left", fill="both", expand=True, padx=5, pady=5) scrollbar.pack(side="right", fill="y") # 添加文件列表 for i, file_path in enumerate(self.risk_files): file_name = os.path.basename(file_path) file_frame = ttk.Frame(scrollable_frame) file_frame.pack(fill="x", padx=5, pady=2) ttk.Label(file_frame, text=f"{i+1}. {file_name}").pack(side="left") # 添加打开按钮 open_btn = ttk.Button( file_frame, text="打开文件", width=10, command=lambda path=file_path: self.open_file(path) ) open_btn.pack(side="right", padx=5) def open_file(self, file_path): """用系统默认程序打开文件""" if not os.path.exists(file_path): messagebox.showerror("错误", f"文件不存在: {file_path}") return try: if sys.platform.startswith('win'): os.startfile(file_path) elif sys.platform.startswith('darwin'): subprocess.call(('open', file_path)) else: subprocess.call(('xdg-open', file_path)) except Exception as e: messagebox.showerror("错误", f"无法打开文件: {str(e)}") def plot_data(self, data, model, lcl, usl, mean_val, std_val, risk_threshold): # 清除旧图表 for widget in self.canvas_frame.winfo_children(): widget.destroy() # 创建图表框架 fig = plt.Figure(figsize=(12, 8)) fig.suptitle(f"{model} 拉力数据分析", fontsize=16) # 趋势图 ax1 = fig.add_subplot(211) ax1.plot(data.values, marker='o', linestyle='-', color='b') ax1.set_title('拉力值趋势图', fontsize=12) ax1.set_xlabel('样本序号', fontsize=10) ax1.set_ylabel('拉力值 (N)', fontsize=10) ax1.grid(True) # 添加统计线和规格线 ax1.axhline(y=mean_val, color='r', linestyle='-', label=f'平均值: {mean_val:.2f}N') ax1.axhline(y=mean_val + std_val, color='g', linestyle='--', label=f'+1标准差') ax1.axhline(y=mean_val - std_val, color='g', linestyle='--', label=f'-1标准差') ax1.axhline(y=lcl, color='purple', linestyle='-.', label=f'规格下限(LCL): {lcl}N') ax1.axhline(y=usl, color='orange', linestyle='-.', label=f'规格上限(USL): {usl}N') ax1.axhline(y=lcl + risk_threshold, color='brown', linestyle=':', label=f'风险阈值: {lcl+risk_threshold:.1f}N') ax1.legend() # 直方图 ax2 = fig.add_subplot(212) ax2.hist(data, bins=10, color='skyblue', edgecolor='black') ax2.set_title('拉力值分布', fontsize=12) ax2.set_xlabel('拉力值 (N)', fontsize=10) ax2.set_ylabel('频次', fontsize=10) ax2.grid(True) # 添加统计值和规格线 ax2.axvline(x=lcl, color='purple', linestyle='-.', label=f'LCL: {lcl}N') ax2.axvline(x=usl, color='orange', linestyle='-.', label=f'USL: {usl}N') ax2.axvline(x=lcl + risk_threshold, color='brown', linestyle=':', label=f'风险阈值: {lcl+risk_threshold:.1f}N') ax2.axvline(x=mean_val, color='r', linestyle='-', label=f'平均值: {mean_val:.2f}N') ax2.legend() # 添加统计值 stats_text = f"样本数: {len(data)}\n最小值: {data.min():.2f} N\n最大值: {data.max():.2f} N\n平均值: {data.mean():.2f} N\n标准差: {data.std():.2f} N" ax2.text(0.95, 0.95, stats_text, transform=ax2.transAxes, verticalalignment='top', horizontalalignment='right', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5), fontsize=9) plt.tight_layout(rect=[0, 0, 1, 0.95]) # 嵌入到Tkinter canvas = FigureCanvasTkAgg(fig, master=self.canvas_frame) canvas.draw() canvas.get_tk_widget().pack(fill="both", expand=True) # 保存图表对象 self.figures.append(fig) def show_details(self): """显示详细数据""" if not self.all_data_details: messagebox.showinfo("提示", "没有可用的详细数据,请先执行分析") return # 创建临时Excel文件 temp_file = tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) temp_path = temp_file.name temp_file.close() try: # 创建Excel工作簿 wb = Workbook() ws = wb.active ws.title = "详细数据" # 添加标题行 headers = ["文件名称", "测试日期", "序号", "拉力值(N)", "失效模式"] ws.append(headers) # 添加数据 for item in self.all_data_details: ws.append([ item['文件名称'], item['测试日期'], item['序号'], item['拉力值(N)'], item['失效模式'] ]) # 添加统计结果 ws.append([]) ws.append(["统计分析结果"]) ws.append(["产品型号", self.model_combobox.get().strip()]) ws.append(["规格下限(LCL)", float(self.lcl_entry.get())]) ws.append(["规格上限(USL)", float(self.usl_entry.get())]) # 获取文本结果 result_text = self.result_text.get("1.0", tk.END) result_lines = result_text.split('\n') for line in result_lines: if "===" in line or line.strip() == "": continue ws.append([line]) # 保存Excel文件 wb.save(temp_path) # 打开Excel文件 if sys.platform.startswith('win'): os.startfile(temp_path) elif sys.platform.startswith('darwin'): subprocess.call(('open', temp_path)) else: subprocess.call(('xdg-open', temp_path)) except Exception as e: messagebox.showerror("错误", f"创建详细数据文件失败: {str(e)}") def save_results(self): """保存分析结果""" if not self.all_data_details or not self.figures: messagebox.showinfo("提示", "没有可保存的结果,请先执行分析") return # 选择保存位置 save_path = filedialog.asksaveasfilename( defaultextension=".xlsx", filetypes=[("Excel files", "*.xlsx"), ("All files", "*.*")], title="保存分析结果" ) if not save_path: return try: # 创建Excel工作簿 wb = Workbook() # 添加详细数据工作表 ws_data = wb.active ws_data.title = "详细数据" # 添加标题行 headers = ["文件名称", "测试日期", "序号", "拉力值(N)", "失效模式"] ws_data.append(headers) # 添加数据 for item in self.all_data_details: ws_data.append([ item['文件名称'], item['测试日期'], item['序号'], item['拉力值(N)'], item['失效模式'] ]) # 添加统计结果工作表 ws_stats = wb.create_sheet("统计分析") # 添加规格和日期信息 ws_stats.append(["产品型号", self.model_combobox.get().strip()]) ws_stats.append(["规格下限(LCL)", float(self.lcl_entry.get())]) ws_stats.append(["规格上限(USL)", float(self.usl_entry.get())]) ws_stats.append(["开始日期", self.start_date.get()]) ws_stats.append(["结束日期", self.end_date.get()]) ws_stats.append([]) # 添加统计结果 result_text = self.result_text.get("1.0", tk.END) result_lines = result_text.split('\n') for line in result_lines: if line.strip() == "": continue ws_stats.append([line]) # 添加图表工作表 ws_charts = wb.create_sheet("分析图表") # 保存图表到临时文件并插入Excel for i, fig in enumerate(self.figures): # 保存图表为图片 img_data = io.BytesIO() fig.savefig(img_data, format='png', dpi=100) img_data.seek(0) # 创建临时图片文件 img_temp = tempfile.NamedTemporaryFile(suffix='.png', delete=False) img_temp.write(img_data.read()) img_temp.close() # 插入图片到Excel img = XLImage(img_temp.name) img.anchor = f'A{1 + i * 25}' # 每张图间隔25行 ws_charts.add_image(img) # 删除临时文件 os.unlink(img_temp.name) # 保存Excel文件 wb.save(save_path) messagebox.showinfo("成功", f"分析结果已保存到: {save_path}") except Exception as e: messagebox.showerror("错误", f"保存结果失败: {str(e)}") def check_dependencies(): missing = [] try: import pandas except ImportError: missing.append("pandas") try: import matplotlib except ImportError: missing.append("matplotlib") try: import openpyxl except ImportError: missing.append("openpyxl") try: import xlrd except ImportError: missing.append("xlrd") if missing: messagebox.showerror( "缺少依赖库", f"缺少必要的Python库: {', '.join(missing)}\n\n" "请运行以下命令安装:\n" "pip install pandas matplotlib openpyxl xlrd" ) return False return True if __name__ == "__main__": root = tk.Tk() # 检查依赖库 if not check_dependencies(): root.destroy() sys.exit(1) app = TensileAnalyzer(root) root.mainloop()
05-30
根据修改建议进行修改: 1、AnalysisThread.add_charts 中生成图表后虽删除临时文件,但未显式释放图表对象,可能导致内存泄漏(尤其批量处理时)。使用 plt.close(fig) 显式关闭图表,释放内存 2、AnalysisThread.stop() 仅设置 stop_requested=True,但线程池中的任务仍会继续运行,可能导致资源占用。结合 concurrent.futures 的 Executor.shutdown(wait=False) 强制终止线程池。 3、AudioAnalyzer.convert_audio 中若临时目录创建失败,temp_dir 为 None,后续 temp_dir.cleanup() 会报错。添加容错处理,确保临时目录安全清理。 4、ModelLoader 中加载的 Whisper 模型未充分利用硬件性能(如未指定 language 参数,可能增加语言检测耗时)。指定 language=“chinese” 减少推理时间。 5、convert_audio 中无论原始音频格式如何,均强制转换为 WAV,部分格式(如 16kHz 单声道 WAV)可跳过转换。 6、短时间内同一说话人的片段可能被拆分(如停顿导致),影响后续文本关联准确性。合并连续相同说话人的片段(如间隔 < 1 秒)。 7、当前情感分析仅基于文本内容,未结合对话上下文(如客户抱怨后客服回应的情感)。增加上下文权重,如客户表达消极情绪后,客服的回应情感权重提升。 8、模型加载失败后无重试逻辑,用户需重启程序。添加重试按钮,允许用户重新加载模型。 9、未校验音频文件的有效性(如损坏文件、非音频文件),可能导致分析线程崩溃。添加文件合法性校验,过滤无效文件。 代码: import os import sys import time import json import traceback import numpy as np import pandas as pd import torch import librosa import jieba import tempfile from pydub import AudioSegment from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer from pyannote.audio import Pipeline from concurrent.futures import ThreadPoolExecutor, as_completed from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton, QFileDialog, QTextEdit, QProgressBar, QGroupBox, QCheckBox, QListWidget, QMessageBox) from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer from PyQt5.QtGui import QFont from docx import Document from docx.shared import Inches import matplotlib.pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from collections import Counter # 全局配置 MODEL_CONFIG = { "whisper_model": "openai/whisper-small", "diarization_model": "pyannote/speaker-diarization@2.1-base", # 使用更轻量模型 "sentiment_model": "IDEA-CCNL/Erlangshen-Roberta-110M-Sentiment", "chunk_size": 10, # 强制10秒分块 "sample_rate": 16000, "device": "cuda" if torch.cuda.is_available() else "cpu", "max_workers": 2 if torch.cuda.is_available() else 4, # GPU模式下并行度降低 "batch_size": 8 # 批处理大小 } # 初始化分词器 jieba.initialize() class ModelLoader(QThread): """模型加载线程""" progress = pyqtSignal(str) finished = pyqtSignal(bool, str) def __init__(self): super().__init__() self.models = {} self.error = None def run(self): try: self.progress.emit("正在加载语音识别模型...") # 语音识别模型 self.models["asr_pipeline"] = pipeline( "automatic-speech-recognition", model=MODEL_CONFIG["whisper_model"], torch_dtype=torch.float16, device=MODEL_CONFIG["device"], batch_size=MODEL_CONFIG["batch_size"] # 添加批处理支持 ) self.progress.emit("正在加载说话人分离模型...") # 说话人分离模型 - 使用更轻量版本 self.models["diarization_pipeline"] = Pipeline.from_pretrained( MODEL_CONFIG["diarization_model"], use_auth_token=True ).to(torch.device(MODEL_CONFIG["device"]), torch.float16) self.progress.emit("正在加载情感分析模型...") # 情感分析模型 self.models["sentiment_tokenizer"] = AutoTokenizer.from_pretrained( MODEL_CONFIG["sentiment_model"] ) self.models["sentiment_model"] = AutoModelForSequenceClassification.from_pretrained( MODEL_CONFIG["sentiment_model"], torch_dtype=torch.float16 ).to(MODEL_CONFIG["device"]) self.finished.emit(True, "模型加载完成!") except Exception as e: self.error = str(e) traceback.print_exc() self.finished.emit(False, f"模型加载失败: {str(e)}") class AudioAnalyzer: """深度优化的核心音频分析类""" def __init__(self, models): self.keywords = { "opening": ["您好", "请问是", "先生/女士", "很高兴为您服务"], "closing": ["感谢接听", "祝您生活愉快", "再见", "有问题随时联系"], "forbidden": ["不可能", "没办法", "我不管", "随便你", "投诉也没用"], "solution": ["解决", "处理好了", "已完成", "满意吗", "还有问题吗"] } self.synonyms = { "不可能": ["不可能", "没可能", "做不到", "无法做到"], "解决": ["解决", "处理", "完成", "搞定", "办妥"] } self.models = models self.models_loaded = True if models else False def load_keywords(self, excel_path): """从Excel加载关键词和同义词""" try: # 使用更健壮的Excel读取方式 df = pd.read_excel(excel_path, sheet_name=None) if "开场白" in df: self.keywords["opening"] = df["开场白"].dropna()["关键词"].tolist() if "结束语" in df: self.keywords["closing"] = df["结束语"].dropna()["关键词"].tolist() if "禁语" in df: self.keywords["forbidden"] = df["禁语"].dropna()["关键词"].tolist() if "解决关键词" in df: self.keywords["solution"] = df["解决关键词"].dropna()["关键词"].tolist() # 加载同义词表 if "同义词" in df: for _, row in df["同义词"].iterrows(): main_word = row["主词"] synonyms = row["同义词"].split("、") self.synonyms[main_word] = synonyms return True, "关键词加载成功" except Exception as e: error_msg = f"加载关键词失败: {str(e)}" return False, error_msg def convert_audio(self, input_path): """转换音频为WAV格式并分块,使用临时目录管理""" try: # 创建临时目录 temp_dir = tempfile.TemporaryDirectory() # 读取音频文件 audio = AudioSegment.from_file(input_path) # 转换为单声道16kHz audio = audio.set_frame_rate(MODEL_CONFIG["sample_rate"]) audio = audio.set_channels(1) # 计算总时长 duration = len(audio) / 1000.0 # 毫秒转秒 # 分块处理(10秒) chunks = [] chunk_size = MODEL_CONFIG["chunk_size"] * 1000 # 毫秒 for i in range(0, len(audio), chunk_size): chunk = audio[i:i + chunk_size] chunk_path = os.path.join(temp_dir.name, f"chunk_{i // chunk_size}.wav") chunk.export(chunk_path, format="wav") chunks.append({ "path": chunk_path, "start_time": i / 1000.0, # 全局起始时间(秒) "end_time": (i + len(chunk)) / 1000.0 # 全局结束时间(秒) }) return chunks, duration, temp_dir except Exception as e: error_msg = f"音频转换失败: {str(e)}" return [], 0, None def diarize_speakers(self, audio_path): """说话人分离""" try: diarization = self.models["diarization_pipeline"](audio_path) segments = [] for turn, _, speaker in diarization.itertracks(yield_label=True): segments.append({ "start": turn.start, "end": turn.end, "speaker": speaker, "text": "" }) return segments except Exception as e: error_msg = f"说话人分离失败: {str(e)}" raise Exception(error_msg) from e def transcribe_audio_batch(self, chunk_paths): """批量语音识别多个分块""" try: # 批量处理音频分块 results = self.models["asr_pipeline"]( chunk_paths, chunk_length_s=MODEL_CONFIG["chunk_size"], stride_length_s=(4, 2), batch_size=MODEL_CONFIG["batch_size"], return_timestamps=True ) # 整理结果 transcribed_data = [] for result in results: text = result["text"] chunks = result["chunks"] transcribed_data.append((text, chunks)) return transcribed_data except Exception as e: error_msg = f"语音识别失败: {str(e)}" raise Exception(error_msg) from e def analyze_sentiment_batch(self, texts): """批量情感分析 - 支持长文本处理""" try: if not texts: return [] # 预处理文本 - 截断并添加特殊token inputs = self.models["sentiment_tokenizer"]( texts, padding=True, truncation=True, max_length=512, return_tensors="pt" ).to(MODEL_CONFIG["device"]) # 批量推理 with torch.no_grad(): outputs = self.models["sentiment_model"](**inputs) # 计算概率 probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy() # 处理结果 results = [] labels = ["积极", "消极", "中性"] for i, text in enumerate(texts): sentiment = labels[np.argmax(probs[i])] # 情感强度检测 strong_negative = probs[i][1] > 0.7 # 消极概率超过70% strong_positive = probs[i][0] > 0.7 # 积极概率超过70% # 特定情绪检测 specific_emotion = "无" if "生气" in text or "愤怒" in text or "气死" in text: specific_emotion = "愤怒" elif "不耐烦" in text or "快点" in text or "急死" in text: specific_emotion = "不耐烦" elif "失望" in text or "无奈" in text: specific_emotion = "失望" # 如果有强烈情感则覆盖平均结果 if strong_negative: sentiment = "强烈消极" elif strong_positive: sentiment = "强烈积极" results.append({ "sentiment": sentiment, "emotion": specific_emotion, "scores": probs[i].tolist() }) return results except Exception as e: error_msg = f"情感分析失败: {str(e)}" raise Exception(error_msg) from e def match_keywords(self, text, keyword_type): """高级关键词匹配 - 使用分词和同义词""" # 获取关键词列表 keywords = self.keywords.get(keyword_type, []) if not keywords: return False # 分词处理 words = jieba.lcut(text) # 检查每个关键词 for keyword in keywords: # 检查直接匹配 if keyword in text: return True # 检查同义词 synonyms = self.synonyms.get(keyword, []) for synonym in synonyms: if synonym in text: return True # 检查分词匹配(全词匹配) if keyword in words: return True return False def identify_agent(self, segments, full_text): """智能客服身份识别""" # 候选客服信息 candidates = {} # 特征1:开场白关键词 for i, segment in enumerate(segments[:5]): # 检查前5个片段 if self.match_keywords(segment["text"], "opening"): speaker = segment["speaker"] candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 3 # 开场白权重高 candidates[speaker]["segments"].append(i) # 特征2:结束语关键词 for i, segment in enumerate(segments[-3:]): # 检查最后3个片段 if self.match_keywords(segment["text"], "closing"): speaker = segment["speaker"] candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 2 # 结束语权重中等 candidates[speaker]["segments"].append(len(segments) - 3 + i) # 特征3:说话时长 speaker_durations = {} for segment in segments: duration = segment["end"] - segment["start"] speaker_durations[segment["speaker"]] = speaker_durations.get(segment["speaker"], 0) + duration # 为说话时长最长的加分 if speaker_durations: max_duration = max(speaker_durations.values()) for speaker, duration in speaker_durations.items(): candidates.setdefault(speaker, {"score": 0, "segments": []}) if duration == max_duration: candidates[speaker]["score"] += 1 # 特征4:客服特定词汇出现频率 agent_keywords = ["客服", "代表", "专员", "先生", "女士"] speaker_keyword_count = {} for segment in segments: text = segment["text"] speaker = segment["speaker"] for word in agent_keywords: if word in text: speaker_keyword_count[speaker] = speaker_keyword_count.get(speaker, 0) + 1 # 为关键词出现最多的加分 if speaker_keyword_count: max_count = max(speaker_keyword_count.values()) for speaker, count in speaker_keyword_count.items(): if count == max_count: candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 1 # 选择得分最高的作为客服 if candidates: best_speaker = max(candidates.items(), key=lambda x: x[1]["score"])[0] return best_speaker # 默认选择第一个说话人 return segments[0]["speaker"] if segments else None def associate_speaker_text(self, segments, full_text_chunks): """基于时间重叠度的说话人-文本关联""" for segment in segments: segment_text = "" segment_start = segment["start"] segment_end = segment["end"] for word_info in full_text_chunks: if "global_start" not in word_info: continue word_start = word_info["global_start"] word_end = word_info["global_end"] # 计算重叠度 overlap_start = max(segment_start, word_start) overlap_end = min(segment_end, word_end) overlap = max(0, overlap_end - overlap_start) # 计算重叠比例 word_duration = word_end - word_start segment_duration = segment_end - segment_start if overlap > 0: # 如果重叠超过50%或单词完全在片段内 if (overlap / word_duration > 0.5) or (overlap / segment_duration > 0.5): segment_text += word_info["text"] + " " segment["text"] = segment_text.strip() def analyze_audio(self, audio_path): """完整分析单个音频文件 - 优化版本""" try: # 步骤1: 转换音频并分块(使用临时目录) chunks, duration, temp_dir = self.convert_audio(audio_path) if not chunks or not temp_dir: raise Exception("音频转换失败或未生成分块") try: # 步骤2: 说话人分离 segments = self.diarize_speakers(audio_path) # 步骤3: 批量语音识别 chunk_paths = [chunk["path"] for chunk in chunks] transcribed_data = self.transcribe_audio_batch(chunk_paths) # 步骤4: 处理识别结果 full_text_chunks = [] for idx, (text, chunk_data) in enumerate(transcribed_data): chunk = chunks[idx] # 调整时间戳为全局时间 for word_info in chunk_data: if "timestamp" in word_info: start, end = word_info["timestamp"] word_info["global_start"] = chunk["start_time"] + start word_info["global_end"] = chunk["start_time"] + end else: word_info["global_start"] = chunk["start_time"] word_info["global_end"] = chunk["end_time"] full_text_chunks.extend(chunk_data) # 步骤5: 基于时间重叠度关联说话人和文本 self.associate_speaker_text(segments, full_text_chunks) # 步骤6: 智能识别客服身份 agent_id = self.identify_agent(segments, full_text_chunks) # 步骤7: 提取客服和客户文本 agent_text = "" customer_text = "" opening_found = False closing_found = False forbidden_found = False for segment in segments: if segment["speaker"] == agent_id: agent_text += segment["text"] + " " else: customer_text += segment["text"] + " " # 使用高级关键词匹配 if not opening_found and self.match_keywords(segment["text"], "opening"): opening_found = True if not closing_found and self.match_keywords(segment["text"], "closing"): closing_found = True if not forbidden_found and self.match_keywords(segment["text"], "forbidden"): forbidden_found = True # 步骤8: 批量情感分析 sentiment_results = self.analyze_sentiment_batch([agent_text, customer_text]) if sentiment_results: agent_sentiment = sentiment_results[0]["sentiment"] agent_emotion = sentiment_results[0]["emotion"] customer_sentiment = sentiment_results[1]["sentiment"] customer_emotion = sentiment_results[1]["emotion"] else: agent_sentiment = "未知" agent_emotion = "无" customer_sentiment = "未知" customer_emotion = "无" # 问题解决率分析 solution_found = self.match_keywords(agent_text, "solution") # 语速分析 agent_words = len(agent_text.split()) agent_duration = sum([s["end"] - s["start"] for s in segments if s["speaker"] == agent_id]) agent_speed = agent_words / (agent_duration / 60) if agent_duration > 0 else 0 # 词/分钟 # 音量分析(简单版) try: y, sr = librosa.load(audio_path, sr=MODEL_CONFIG["sample_rate"]) rms = librosa.feature.rms(y=y) avg_volume = np.mean(rms) volume_stability = np.std(rms) / avg_volume if avg_volume > 0 else 0 except: avg_volume = 0 volume_stability = 0 # 构建结果 result = { "file_name": os.path.basename(audio_path), "duration": round(duration, 2), "opening_check": "是" if opening_found else "否", "closing_check": "是" if closing_found else "否", "forbidden_check": "是" if forbidden_found else "否", "agent_sentiment": agent_sentiment, "agent_emotion": agent_emotion, "customer_sentiment": customer_sentiment, "customer_emotion": customer_emotion, "agent_speed": round(agent_speed, 1), "volume_level": round(avg_volume, 4), "volume_stability": round(volume_stability, 2), "solution_rate": "是" if solution_found else "否", "agent_text": agent_text[:500] + "..." if len(agent_text) > 500 else agent_text, "customer_text": customer_text[:500] + "..." if len(customer_text) > 500 else customer_text } return result finally: # 自动清理临时目录 temp_dir.cleanup() except Exception as e: error_msg = f"分析文件 {os.path.basename(audio_path)} 时出错: {str(e)}" raise Exception(error_msg) from e class AnalysisThread(QThread): """分析线程 - 并行优化版本""" progress = pyqtSignal(int, str) result_ready = pyqtSignal(dict) finished_all = pyqtSignal() error_occurred = pyqtSignal(str, str) def __init__(self, audio_files, keywords_file, output_dir, models): super().__init__() self.audio_files = audio_files self.keywords_file = keywords_file self.output_dir = output_dir self.stop_requested = False self.analyzer = AudioAnalyzer(models) self.completed_count = 0 def run(self): try: total = len(self.audio_files) # 加载关键词 if self.keywords_file: success, msg = self.analyzer.load_keywords(self.keywords_file) if not success: self.error_occurred.emit("关键词加载", msg) results = [] errors = [] # 使用线程池进行并行处理 with ThreadPoolExecutor(max_workers=MODEL_CONFIG["max_workers"]) as executor: # 提交所有任务 future_to_file = { executor.submit(self.analyzer.analyze_audio, audio_file): audio_file for audio_file in self.audio_files } # 处理完成的任务 for future in as_completed(future_to_file): if self.stop_requested: break audio_file = future_to_file[future] try: result = future.result() if result: results.append(result) self.result_ready.emit(result) except Exception as e: error_msg = str(e) errors.append({ "file": audio_file, "error": error_msg }) self.error_occurred.emit(os.path.basename(audio_file), error_msg) # 更新进度 self.completed_count += 1 progress = int(self.completed_count / total * 100) self.progress.emit( progress, f"已完成 {self.completed_count}/{total} ({progress}%)" ) # 生成报告 if results: self.generate_reports(results, errors) self.finished_all.emit() except Exception as e: self.error_occurred.emit("全局错误", str(e)) def stop(self): self.stop_requested = True def generate_reports(self, results, errors): """生成Excel和Word报告 - 优化版本""" try: # 生成Excel报告 df = pd.DataFrame(results) excel_path = os.path.join(self.output_dir, "质检分析报告.xlsx") # 创建Excel写入器 with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer: df.to_excel(writer, sheet_name='详细结果', index=False) # 添加统计摘要 stats_data = { "指标": ["分析文件总数", "成功分析文件数", "分析失败文件数", "开场白合格率", "结束语合格率", "禁语出现率", "客服积极情绪占比", "客户消极情绪占比", "问题解决率"], "数值": [ len(results) + len(errors), len(results), len(errors), f"{df['opening_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df['closing_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df['forbidden_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df[df['agent_sentiment'] == '积极'].shape[0] / len(df) * 100:.1f}%", f"{df[df['customer_sentiment'] == '消极'].shape[0] / len(df) * 100:.1f}%", f"{df['solution_rate'].value_counts(normalize=True).get('是', 0) * 100:.1f}%" ] } stats_df = pd.DataFrame(stats_data) stats_df.to_excel(writer, sheet_name='统计摘要', index=False) # 生成Word报告 doc = Document() doc.add_heading('外呼电话质检分析汇总报告', 0) # 添加统计信息 doc.add_heading('整体统计', level=1) stats = [ f"分析文件总数: {len(results) + len(errors)}", f"成功分析文件数: {len(results)}", f"分析失败文件数: {len(errors)}", f"开场白合格率: {stats_data['数值'][3]}", f"结束语合格率: {stats_data['数值'][4]}", f"禁语出现率: {stats_data['数值'][5]}", f"客服积极情绪占比: {stats_data['数值'][6]}", f"客户消极情绪占比: {stats_data['数值'][7]}", f"问题解决率: {stats_data['数值'][8]}" ] for stat in stats: doc.add_paragraph(stat) # 添加图表 self.add_charts(doc, df) # 添加错误列表 if errors: doc.add_heading('分析失败文件', level=1) table = doc.add_table(rows=1, cols=2) hdr_cells = table.rows[0].cells hdr_cells[0].text = '文件' hdr_cells[1].text = '错误原因' for error in errors: row_cells = table.add_row().cells row_cells[0].text = os.path.basename(error['file']) row_cells[1].text = error['error'] word_path = os.path.join(self.output_dir, "可视化分析报告.docx") doc.save(word_path) return True, f"报告已保存到: {self.output_dir}" except Exception as e: return False, f"生成报告失败: {str(e)}" def add_charts(self, doc, df): """在Word文档中添加图表""" try: # 客服情感分布 fig1, ax1 = plt.subplots(figsize=(6, 4)) sentiment_counts = df['agent_sentiment'].value_counts() sentiment_counts.plot(kind='bar', ax=ax1, color=['green', 'red', 'blue', 'darkred', 'darkgreen']) ax1.set_title('客服情感分布') ax1.set_xlabel('情感类型') ax1.set_ylabel('数量') fig1.tight_layout() fig1.savefig('agent_sentiment.png') doc.add_picture('agent_sentiment.png', width=Inches(5)) os.remove('agent_sentiment.png') # 客户情感分布 fig2, ax2 = plt.subplots(figsize=(6, 4)) df['customer_sentiment'].value_counts().plot(kind='bar', ax=ax2, color=['green', 'red', 'blue', 'darkred', 'darkgreen']) ax2.set_title('客户情感分布') ax2.set_xlabel('情感类型') ax2.set_ylabel('数量') fig2.tight_layout() fig2.savefig('customer_sentiment.png') doc.add_picture('customer_sentiment.png', width=Inches(5)) os.remove('customer_sentiment.png') # 合规性检查 fig3, ax3 = plt.subplots(figsize=(6, 4)) compliance = df[['opening_check', 'closing_check', 'forbidden_check']].apply( lambda x: x.value_counts().get('是', 0)) compliance.plot(kind='bar', ax=ax3, color=['blue', 'green', 'red']) ax3.set_title('合规性检查') ax3.set_xlabel('检查项') ax3.set_ylabel('合格数量') fig3.tight_layout() fig3.savefig('compliance.png') doc.add_picture('compliance.png', width=Inches(5)) os.remove('compliance.png') except Exception as e: print(f"生成图表失败: {str(e)}") class MainWindow(QMainWindow): """主界面 - 优化版本""" def __init__(self): super().__init__() self.setWindowTitle("外呼电话录音质检分析系统") self.setGeometry(100, 100, 1000, 800) # 初始化变量 self.audio_files = [] self.keywords_file = "" self.output_dir = os.getcwd() self.analysis_thread = None self.model_loader = None self.models = {} self.models_loaded = False # 初始化为False # 设置全局字体 app_font = QFont("Microsoft YaHei", 10) QApplication.setFont(app_font) # 创建主布局 main_widget = QWidget() main_layout = QVBoxLayout() main_layout.setSpacing(10) main_layout.setContentsMargins(15, 15, 15, 15) # 状态栏 self.status_label = QLabel("准备就绪") self.status_label.setAlignment(Qt.AlignCenter) self.status_label.setStyleSheet("background-color: #f0f0f0; padding: 5px; border-radius: 5px;") # 文件选择区域 file_group = QGroupBox("文件选择") file_layout = QVBoxLayout() file_layout.setSpacing(10) # 音频选择 audio_layout = QHBoxLayout() self.audio_label = QLabel("音频文件/文件夹:") self.audio_path_edit = QLineEdit() self.audio_path_edit.setReadOnly(True) self.audio_path_edit.setPlaceholderText("请选择音频文件或文件夹") self.audio_browse_btn = QPushButton("浏览...") self.audio_browse_btn.setFixedWidth(80) self.audio_browse_btn.clicked.connect(self.browse_audio) audio_layout.addWidget(self.audio_label) audio_layout.addWidget(self.audio_path_edit, 1) audio_layout.addWidget(self.audio_browse_btn) # 关键词选择 keyword_layout = QHBoxLayout() self.keyword_label = QLabel("关键词文件:") self.keyword_path_edit = QLineEdit() self.keyword_path_edit.setReadOnly(True) self.keyword_path_edit.setPlaceholderText("可选:选择关键词Excel文件") self.keyword_browse_btn = QPushButton("浏览...") self.keyword_browse_btn.setFixedWidth(80) self.keyword_browse_btn.clicked.connect(self.browse_keywords) keyword_layout.addWidget(self.keyword_label) keyword_layout.addWidget(self.keyword_path_edit, 1) keyword_layout.addWidget(self.keyword_browse_btn) # 输出目录 output_layout = QHBoxLayout() self.output_label = QLabel("输出目录:") self.output_path_edit = QLineEdit(os.getcwd()) self.output_path_edit.setReadOnly(True) self.output_browse_btn = QPushButton("浏览...") self.output_browse_btn.setFixedWidth(80) self.output_browse_btn.clicked.connect(self.browse_output) output_layout.addWidget(self.output_label) output_layout.addWidget(self.output_path_edit, 1) output_layout.addWidget(self.output_browse_btn) file_layout.addLayout(audio_layout) file_layout.addLayout(keyword_layout) file_layout.addLayout(output_layout) file_group.setLayout(file_layout) # 控制按钮区域 control_layout = QHBoxLayout() control_layout.setSpacing(15) self.start_btn = QPushButton("开始分析") self.start_btn.setFixedHeight(40) self.start_btn.setStyleSheet("background-color: #4CAF50; color: white; font-weight: bold;") self.start_btn.clicked.connect(self.start_analysis) self.stop_btn = QPushButton("停止分析") self.stop_btn.setFixedHeight(40) self.stop_btn.setStyleSheet("background-color: #f44336; color: white; font-weight: bold;") self.stop_btn.clicked.connect(self.stop_analysis) self.stop_btn.setEnabled(False) self.clear_btn = QPushButton("清空") self.clear_btn.setFixedHeight(40) self.clear_btn.setStyleSheet("background-color: #2196F3; color: white; font-weight: bold;") self.clear_btn.clicked.connect(self.clear_all) control_layout.addWidget(self.start_btn) control_layout.addWidget(self.stop_btn) control_layout.addWidget(self.clear_btn) # 进度条 self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_bar.setTextVisible(True) self.progress_bar.setStyleSheet("QProgressBar {border: 1px solid grey; border-radius: 5px; text-align: center;}" "QProgressBar::chunk {background-color: #4CAF50; width: 10px;}") # 结果展示区域 result_group = QGroupBox("分析结果") result_layout = QVBoxLayout() result_layout.setSpacing(10) # 结果标签 result_header = QHBoxLayout() self.result_label = QLabel("分析结果:") self.result_count_label = QLabel("0/0") self.result_count_label.setAlignment(Qt.AlignRight) result_header.addWidget(self.result_label) result_header.addWidget(self.result_count_label) self.result_text = QTextEdit() self.result_text.setReadOnly(True) self.result_text.setStyleSheet("font-family: Consolas, 'Microsoft YaHei';") # 错误列表 error_header = QHBoxLayout() self.error_label = QLabel("错误信息:") self.error_count_label = QLabel("0") self.error_count_label.setAlignment(Qt.AlignRight) error_header.addWidget(self.error_label) error_header.addWidget(self.error_count_label) self.error_list = QListWidget() self.error_list.setFixedHeight(120) self.error_list.setStyleSheet("color: #d32f2f;") result_layout.addLayout(result_header) result_layout.addWidget(self.result_text) result_layout.addLayout(error_header) result_layout.addWidget(self.error_list) result_group.setLayout(result_layout) # 添加到主布局 main_layout.addWidget(file_group) main_layout.addLayout(control_layout) main_layout.addWidget(self.progress_bar) main_layout.addWidget(self.status_label) main_layout.addWidget(result_group) main_widget.setLayout(main_layout) self.setCentralWidget(main_widget) # 启动模型加载 self.load_models() def load_models(self): """后台加载模型""" self.status_label.setText("正在加载AI模型,请稍候...") self.start_btn.setEnabled(False) self.model_loader = ModelLoader() self.model_loader.progress.connect(self.update_model_loading_status) self.model_loader.finished.connect(self.handle_model_loading_finished) self.model_loader.start() def update_model_loading_status(self, message): """更新模型加载状态""" self.status_label.setText(message) def handle_model_loading_finished(self, success, message): """处理模型加载完成""" if success: self.models = self.model_loader.models self.models_loaded = True # 修复标志位 self.status_label.setText(message) self.start_btn.setEnabled(True) else: self.status_label.setText(message) QMessageBox.critical(self, "模型加载失败", message) def browse_audio(self): """选择音频文件或文件夹""" options = QFileDialog.Options() files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.flac *.m4a);;所有文件 (*)", options=options ) if files: self.audio_files = files self.audio_path_edit.setText(f"已选择 {len(files)} 个文件") self.result_count_label.setText(f"0/{len(files)}") def browse_keywords(self): """选择关键词文件""" options = QFileDialog.Options() file, _ = QFileDialog.getOpenFileName( self, "选择关键词文件", "", "Excel文件 (*.xlsx);;所有文件 (*)", options=options ) if file: self.keywords_file = file self.keyword_path_edit.setText(os.path.basename(file)) def browse_output(self): """选择输出目录""" options = QFileDialog.Options() directory = QFileDialog.getExistingDirectory( self, "选择输出目录", options=options ) if directory: self.output_dir = directory self.output_path_edit.setText(directory) def start_analysis(self): """开始分析""" if not self.audio_files: self.show_message("错误", "请先选择音频文件!") return if not self.models_loaded: # 使用修复后的标志位 self.show_message("错误", "AI模型尚未加载完成!") return # 检查输出目录 if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) except Exception as e: self.show_message("错误", f"无法创建输出目录: {str(e)}") return # 更新UI状态 self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.result_text.clear() self.error_list.clear() self.error_count_label.setText("0") self.result_text.append("开始分析音频文件...") self.progress_bar.setValue(0) # 创建并启动分析线程 self.analysis_thread = AnalysisThread( self.audio_files, self.keywords_file, self.output_dir, self.models ) # 连接信号 self.analysis_thread.progress.connect(self.update_progress) self.analysis_thread.result_ready.connect(self.handle_result) self.analysis_thread.finished_all.connect(self.analysis_finished) self.analysis_thread.error_occurred.connect(self.handle_error) self.analysis_thread.start() def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() self.result_text.append("分析已停止") self.status_label.setText("分析已停止") self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) def clear_all(self): """清空所有内容""" self.audio_files = [] self.keywords_file = "" self.audio_path_edit.clear() self.keyword_path_edit.clear() self.result_text.clear() self.error_list.clear() self.progress_bar.setValue(0) self.status_label.setText("准备就绪") self.result_count_label.setText("0/0") self.error_count_label.setText("0") def update_progress(self, value, message): """更新进度""" self.progress_bar.setValue(value) self.status_label.setText(message) # 更新结果计数 if "已完成" in message: parts = message.split() if len(parts) >= 2: self.result_count_label.setText(parts[1]) def handle_result(self, result): """处理单个结果""" summary = f""" 文件: {result['file_name']} 时长: {result['duration']}秒 ---------------------------------------- 开场白: {result['opening_check']} | 结束语: {result['closing_check']} | 禁语: {result['forbidden_check']} 客服情感: {result['agent_sentiment']} ({result['agent_emotion']}) | 语速: {result['agent_speed']}词/分 客户情感: {result['customer_sentiment']} ({result['customer_emotion']}) 问题解决: {result['solution_rate']} 音量水平: {result['volume_level']} | 稳定性: {result['volume_stability']} ---------------------------------------- """ self.result_text.append(summary) def handle_error(self, file_name, error): """处理错误""" self.error_list.addItem(f"{file_name}: {error}") self.error_count_label.setText(str(self.error_list.count())) def analysis_finished(self): """分析完成""" self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) self.status_label.setText(f"分析完成! 报告已保存到: {self.output_dir}") self.result_text.append("分析完成!") # 显示完成消息 self.show_message("完成", f"分析完成! 报告已保存到: {self.output_dir}") def show_message(self, title, message): """显示消息对话框""" msg = QMessageBox(self) msg.setWindowTitle(title) msg.setText(message) msg.setStandardButtons(QMessageBox.Ok) msg.exec_() if __name__ == "__main__": app = QApplication(sys.argv) # 检查GPU可用性 if MODEL_CONFIG["device"] == "cuda": try: gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) print(f"GPU内存: {gpu_mem:.2f}GB") # 根据GPU内存调整并行度 if gpu_mem < 4: # 确保有足够内存 MODEL_CONFIG["device"] = "cpu" MODEL_CONFIG["max_workers"] = 4 print("GPU内存不足,切换到CPU模式") elif gpu_mem < 8: MODEL_CONFIG["max_workers"] = 2 else: MODEL_CONFIG["max_workers"] = 4 except: MODEL_CONFIG["device"] = "cpu" MODEL_CONFIG["max_workers"] = 4 print("无法获取GPU信息,切换到CPU模式") window = MainWindow() window.show() sys.exit(app.exec_())
07-19
修改代码,将音量分析修改为仅针对客服部分,注意修改后的整体变化: import os import sys import time import json import traceback import numpy as np import pandas as pd import torch import librosa import jieba import tempfile from pydub import AudioSegment from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer from pyannote.audio import Pipeline from concurrent.futures import ThreadPoolExecutor, as_completed from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QLabel, QLineEdit, QPushButton, QFileDialog, QTextEdit, QProgressBar, QGroupBox, QCheckBox, QListWidget, QMessageBox) from PyQt5.QtCore import QThread, pyqtSignal, Qt, QTimer from PyQt5.QtGui import QFont from docx import Document from docx.shared import Inches import matplotlib.pyplot as plt from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from collections import Counter # 全局配置 MODEL_CONFIG = { "whisper_model": "openai/whisper-small", "diarization_model": "pyannote/speaker-diarization@2.1-base", # 使用更轻量模型 "sentiment_model": "IDEA-CCNL/Erlangshen-Roberta-110M-Sentiment", "chunk_size": 10, # 强制10秒分块 "sample_rate": 16000, "device": "cuda" if torch.cuda.is_available() else "cpu", "max_workers": 2 if torch.cuda.is_available() else 4, # GPU模式下并行度降低 "batch_size": 8 # 批处理大小 } # 初始化分词器 jieba.initialize() class ModelLoader(QThread): """模型加载线程""" progress = pyqtSignal(str) finished = pyqtSignal(bool, str) def __init__(self): super().__init__() self.models = {} self.error = None def run(self): try: self.progress.emit("正在加载语音识别模型...") # 语音识别模型 - 指定语言为中文减少推理时间 self.models["asr_pipeline"] = pipeline( "automatic-speech-recognition", model=MODEL_CONFIG["whisper_model"], torch_dtype=torch.float16, device=MODEL_CONFIG["device"], batch_size=MODEL_CONFIG["batch_size"], language="chinese" # 指定语言减少推理时间 ) self.progress.emit("正在加载说话人分离模型...") # 说话人分离模型 - 使用更轻量版本 self.models["diarization_pipeline"] = Pipeline.from_pretrained( MODEL_CONFIG["diarization_model"], use_auth_token=True ).to(torch.device(MODEL_CONFIG["device"]), torch.float16) self.progress.emit("正在加载情感分析模型...") # 情感分析模型 self.models["sentiment_tokenizer"] = AutoTokenizer.from_pretrained( MODEL_CONFIG["sentiment_model"] ) self.models["sentiment_model"] = AutoModelForSequenceClassification.from_pretrained( MODEL_CONFIG["sentiment_model"], torch_dtype=tor极狐float16 ).to(MODEL_CONFIG["device"]) self.finished.emit(True, "模型加载完成!") except Exception as e: self.error = str(e) traceback.print_exc() self.finished.emit(False, f"模型加载失败: {str(e)}") class AudioAnalyzer: """深度优化的核心音频分析类""" def __init__(self, models): self.keywords = { "opening": ["您好", "请问是", "先生/女士", "很高兴为您服务"], "closing": ["感谢接听", "祝您生活愉快", "再见", "有问题随时联系"], "forbidden": ["不可能", "没办法", "我不管", "随便你", "投诉也没用"], "solution": ["解决", "处理好了", "已完成", "满意吗", "还有问题吗"] } self.synonyms = { "不可能": ["不可能", "没可能", "做不到", "无法做到"], "解决": ["极狐", "处理", "完成", "搞定", "办妥"] } self.models = models self.models_loaded = True if models else False def load_keywords(self, excel_path): """从Excel加载关键词和同义词""" try: # 使用更健壮的Excel读取方式 df = pd.read_excel(excel_path, sheet_name=None) if "开场白" in df: self.keywords["opening"] = df["开场白"].dropna()["关键词"].tolist() if "结束语" in df: self.keywords["closing"] = df["结束语"].dropna()["关键词"].tolist() if "禁语" in df: self.keywords["forbidden"] = df["禁语"].dropna()["关键词"].tolist() if "解决关键词" in极狐 df: self.keywords["solution"] = df["解决关键词"].dropna()["关键词"].tolist() # 加载同义词表 if "同义词" in df: for _, row in df["同义词"].iterrows(): main_word = row["主词"] synonyms = row["同义词"].split("、") self.synonyms[main_word] = synonyms return True, "关键词加载成功" except Exception as e: error_msg = f"加载关键词失败: {str(e)}" return False, error_msg def convert_audio(self, input_path): """转换音频为WAV格式并分块,使用临时目录管理""" # 添加音频文件校验 if not os.path.exists(input_path): raise Exception(f"文件不存在: {input_path}") if os.path.getsize(input_path) == 0: raise Exception("文件为空") valid_extensions = ['.mp3', '.wav', '.amr', '.flac', '.m4a', '.ogg'] _, ext = os.path.splitext(input_path) if ext.lower() not in valid_extensions: raise Exception(f"不支持的文件格式: {ext}") temp_dir = None # 初始化为None try: # 检查原始音频格式,符合条件则跳过转换 _, ext = os.path.splitext(input_path) if ext.lower() in ['.wav', '.wave']: # 检查文件格式是否符合要求 audio = AudioSegment.from_file(input_path) if (audio.frame_rate == MODEL_CONFIG["sample_rate"] and audio.channels == 1 and audio.sample_width == 2): # 16位PCM # 符合要求的WAV文件,直接使用 chunks = [] chunk_size = MODEL_CONFIG["chunk_size"] * 1000 # 毫秒 # 创建临时目录用于分块 temp_dir = tempfile.TemporaryDirectory() for i in range(0, len(audio), chunk_size): chunk = audio[i:i + chunk_size] chunk_path = os.path.join(temp_dir.name, f"chunk_{i // chunk_size}.wav") chunk.export(chunk_path, format="wav") chunks.append({ "path": chunk_path, "start_time": i / 1000.0, "end_time": (i + len(chunk)) / 1000.0 }) return chunks, len(audio) / 1000.0, temp_dir # 创建临时目录 temp_dir = tempfile.TemporaryDirectory() # 读取音频文件 audio = AudioSegment.from_file(input_path) # 转换为单声道16kHz audio = audio.set_frame_rate(MODEL_CONFIG["sample_rate"]) audio = audio.set_channels(1) # 计算总时长 duration = len(audio) / 1000.0 # 毫秒转秒 # 分块处理(10秒) chunks = [] chunk_size = MODEL_CONFIG["chunk_size"] * 1000 # 毫秒 for i in range(0, len(audio), chunk_size): chunk = audio[i:i + chunk_size] chunk_path = os.path.join(temp_dir.name, f"chunk_{i // chunk_size}.wav") chunk.export(chunk_path, format="wav") chunks.append({ "path": chunk_path, "start_time": i / 1000.0, # 全局起始时间(秒) "end_time": (i + len(chunk)) / 1000.0 # 全局结束时间(秒) }) return chunks, duration, temp_dir except Exception as e: error_msg = f"音频转换失败: {str(e)}" # 安全清理临时目录 if temp_dir: try: temp_dir.cleanup() except: pass return [], 0, None def diarize_speakers(self, audio_path): """说话人分离 - 合并连续片段""" try: diarization = self.models["diarization_pipeline"](audio_path) segments = [] current_segment = None # 合并连续相同说话人的片段 for turn, _, speaker in diarization.itertracks(yield_label=True): if current_segment is None: # 第一个片段 current_segment = { "start": turn.start, "end": turn.end, "speaker": speaker } elif current_segment["speaker"] == speaker and (turn.start - current_segment["end"]) < 1.0: # 相同说话人且间隔小于1秒,合并片段 current_segment["end"] = turn.end else: # 不同说话人或间隔过大,保存当前片段并开始新片段 segments.append(current_segment) current_segment = { "start": turn.start, "end": turn.end, "speaker": speaker } # 添加最后一个片段 if current_segment: segments.append(current_segment) # 添加文本占位符 for segment in segments: segment["text"] = "" return segments except Exception as e: error_msg = f"说话人分离失败: {str(e)}" raise Exception(error_msg) from e def transcribe_audio_batch(self, chunk_paths): """批量语音识别多个分块""" try: # 批量处理音频分块 results = self.models["asr_pipeline"]( chunk_paths, chunk_length_s=MODEL_CONFIG["chunk_size"], stride_length_s=(4, 2), batch_size=MODEL_CONFIG["batch_size"], return_timestamps=True ) # 整理结果 transcribed_data = [] for result in results: text = result["text"] chunks = result["chunks"] transcribed_data.append((text, chunks)) return transcribed_data except Exception as e: error_msg = f"语音识别失败: {str(e)}" raise Exception(error_msg) from e def analyze_sentiment_batch(self, texts, context_weights=None): """批量情感分析 - 支持长文本处理和上下文权重""" try: if not texts: return [] # 应用上下文权重(如果有) if context_weights is None: context_weights = [1.0] * len(texts) # 预处理文本 - 截断并添加特殊token inputs = self.models["sentiment_tokenizer"]( texts, padding=True, truncation=True, max_length=512, return_tensors="pt" ).to(MODEL_CONFIG["device"]) # 批量推理 with torch.no_grad(): outputs = self.models["sentiment_model"](**inputs) # 计算概率 probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy() # 处理结果 results = [] labels = ["积极", "消极", "中性"] for i, text in enumerate(texts): base_probs = probs[i] weight = context_weights[i] # 应用上下文权重 weighted_probs = base_probs * weight sentiment = labels[np.argmax(weighted_probs)] # 情感强度检测 strong_negative = weighted_probs[1] > 0.7 # 消极概率超过70% strong_positive = weighted_probs[0] > 0.7 # 积极概率超过70% # 特定情绪检测 specific_emotion = "无" if "生气" in text or "愤怒" in text or "气死" in text: specific_emotion = "愤怒" elif "不耐烦" in text or "快点" in text or "急死" in text: specific_emotion = "不耐烦" elif "失望" in text or "无奈" in text: specific_emotion = "失望" # 如果有强烈情感则覆盖平均结果 if strong_negative: sentiment = "强烈消极" elif strong_positive: sentiment = "强烈积极" results.append({ "sentiment": sentiment, "emotion": specific_emotion, "s极狐": weighted_probs.tolist(), "weight": weight }) return results except Exception as e: error_msg = f"情感分析失败: {str(e)}" raise Exception(error_msg) from e def match_keywords(self, text, keyword_type): """高级关键词匹配 - 使用分词和同义词""" # 获取关键词列表 keywords = self.keywords.get(keyword_type, []) if not keywords: return False # 分词处理 words = jieba.lcut(text) # 检查每个关键词 for keyword in keywords: # 检查直接匹配 if keyword in text: return True # 检查同义词 synonyms = self.synonyms.get(keyword, []) for synonym in synonyms: if synonym in text: return True # 检查分词匹配(全词匹配) if keyword in words: return True return False def identify_agent(self, segments, full_text): """智能客服身份识别""" # 候选客服信息 candidates = {} # 特征1:开场白关键词 for i, segment in enumerate(segments[:5]): # 检查前5个片段 if self.match_keywords(segment["text"], "opening"): speaker = segment["speaker"] candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 3 # 开场白权重高 candidates[speaker]["segments"].append(i) # 特征2:结束语关键词 for i, segment in enumerate(segments[-3:]): # 检查最后3个片段 if self.match_keywords(segment["text"], "closing"): speaker = segment["speaker"] candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 2 # 结束语权重中等 candidates[speaker]["segments"].append(len(segments) - 3 + i) # 特征3:说话时长 speaker_durations = {} for segment in segments: duration = segment["end"] - segment["start"] speaker_durations[segment["speaker"]] = speaker_durations.get(segment["speaker"], 0) + duration # 为说话时长最长的加分 if speaker_durations: max_duration = max(speaker_durations.values()) for speaker, duration in speaker_durations.items(): candidates.setdefault(speaker, {"score": 0, "segments": []}) if duration == max_duration: candidates[speaker]["score"] += 1 # 特征4:客服特定词汇出现频率 agent_keywords = ["客服", "代表", "专员", "先生", "女士"] speaker_keyword_count = {} for segment in segments: text = segment["text"] speaker = segment["speaker"] for word in agent_keywords: if word in text: speaker_keyword_count[speaker] = speaker_keyword_count.get(speaker, 0) + 1 # 为关键词出现最多的加分 if speaker_keyword_count: max_count = max(speaker_keyword_count.values()) for speaker, count in speaker_keyword_count.items(): if count == max_count: candidates.setdefault(speaker, {"score": 0, "segments": []}) candidates[speaker]["score"] += 1 # 选择得分最高的作为客服 if candidates: best_speaker = max(candidates.items(), key=lambda x: x[1]["score"])[0] return best_speaker # 默认选择第一个说话人 return segments[0]["speaker"] if segments else None def associate_speaker_text(self, segments, full_text_chunks): """基于时间重叠度的说话人-文本关联""" for segment in segments: segment_text = "" segment_start = segment["start"] segment_end = segment["end"] for word_info in full_text_chunks: if "global_start" not in word_info: continue word_start = word_info["global_start"] word_end = word_info["global_end"] # 计算重叠度 overlap_start = max(segment_start, word_start) overlap_end = min(segment_end, word_end) overlap = max(0, overlap_end - overlap_start) # 计算重叠比例 word_duration = word_end - word_start segment_duration = segment_end - segment_start if overlap > 0: # 如果重叠超过50%或单词完全在片段内 if (overlap / word_duration > 0.5) or (overlap / segment_duration > 0.5): segment_text += word_info["text"] + " " segment["text"] = segment_text.strip() def analyze_audio(self, audio_path): """完整分析单个音频文件 - 优化版本""" try: # 步骤1: 转换音频并分块(使用临时目录) chunks, duration, temp_dir = self.convert_audio(audio_path) if not chunks or not temp_dir: raise Exception("音频转换失败或未生成分块") try: # 步骤2: 说话人分离 segments = self.diarize_speakers(audio_path) # 步骤3: 批量语音识别 chunk_paths = [chunk["path"] for chunk in chunks] transcribed_data = self.transcribe_audio_batch(chunk_paths) # 步骤4: 处理识别结果 full_text_chunks = [] for idx, (text, chunk_data) in enumerate(transcribed_data): chunk = chunks[idx] # 调整时间戳为全局时间 for word_info in chunk_data: if "timestamp" in word_info: start, end = word_info["timestamp"] word_info["global_start"] = chunk["start_time"] + start word_info["global_end"] = chunk["start_time"] + end else: word_info["global_start"] = chunk["start_time"] word_info["global_end"] = chunk["end_time"] full_text_chunks.extend(chunk_data) # 步骤5: 基于时间重叠度关联说话人和文本 self.associate_speaker_text(segments, full_text_chunks) # 步骤6: 智能识别客服身份 agent_id = self.identify_agent(segments, full_text_chunks) # 步骤7: 提取客服和客户文本 agent_text = "" customer_text = "" opening_found = False closing_found = False forbidden_found = False agent_weights = [] # 单独收集客服权重 customer_weights = [] # 单独收集客户权重 negative_context = False # 用于情感分析上下文权重 # 收集上下文信息用于情感权重 for i, segment in enumerate(segments): if segment["speaker"] == agent_id: agent_text += segment["text"] + " " agent_weights.append(1.2 if negative_context else 1.0) # 客服在消极上下文后权重更高 else: customer_text += segment["text"] + " " customer_weights.append(1.0) # 客户权重不变 # 检测消极情绪上下文 if "生气" in segment["text"] or "愤怒" in segment["text"] or "失望" in segment["text"]: negative_context = True elif "解决" in segment["text"] or "满意" in segment["text"]: negative_context = False # 使用高级关键词匹配 if not opening_found and self.match_keywords(segment["text"], "opening"): opening_found = True if not closing_found and self.match_keywords(segment["text"], "closing"): closing_found = True if not forbidden_found and self.match_keywords(segment["text"], "forbidden"): forbidden_found = True # 步骤8: 批量情感分析 - 应用平均权重 agent_avg_weight = np.mean(agent_weights) if agent_weights else 1.0 customer_avg_weight = np.mean(customer_weights) if customer_weights else 1.0 sentiment_results = self.analyze_sentiment_batch( [agent_text, customer_text], context_weights=[agent_avg_weight, customer_avg_weight] ) if sentiment_results: agent_sentiment = sentiment_results[0]["sentiment"] agent_emotion = sentiment_results[0]["emotion"] customer_sentiment = sentiment_results[1]["sentiment"] customer_emotion = sentiment_results[1]["emotion"] else: agent_sentiment = "未知" agent_emotion = "无" customer_sentiment = "未知" customer_emotion = "无" # 问题解决率分析 solution_found = self.match_keywords(agent_text, "solution") # 语速分析 agent_words = len(agent_text.split()) agent_duration = sum([s["end"] - s["start"] for s in segments if s["speaker"] == agent_id]) agent_speed = agent_words / (agent_duration / 60) if agent_duration > 0 else 0 # 词/分钟 # 音量分析(简单版) try: y, sr = librosa.load(audio_path, sr=MODEL_CONFIG["sample_rate"]) rms = librosa.feature.rms(y=y) avg_volume = np.mean(rms) volume_stability = np.std(rms) / avg_volume if avg_volume > 0 else 0 except: avg_volume = 0 volume_stability = 0 # 构建结果 result = { "file_name": os.path.basename(audio_path), "duration": round(duration, 2), "opening_check": "是" if opening_found else "否", "closing_check": "是" if closing_found else "极狐", "forbidden_check": "是" if forbidden_found else "否", "agent_sentiment": agent_sentiment, "agent_emotion": agent_emotion, "customer_sentiment": customer_sentiment, "customer_emotion": customer_emotion, "agent_speed": round(agent_speed, 1), "volume_level": round(avg_volume, 4), "volume_stability": round(volume_stability, 2), "solution_rate": "是" if solution_found else "否", "agent_text": agent_text[:500] + "..." if len(agent_text) > 500 else agent_text, "customer_text": customer_text[:500] + "..." if len(customer_text) > 500 else customer_text } return result finally: # 自动清理临时目录 try: temp_dir.cleanup() except Exception as e: print(f"清理临时目录失败: {str(e)}") except Exception as e: error_msg = f"分析文件 {os.path.basename(audio_path)} 时出错: {str(e)}" raise Exception(error_msg) from e class AnalysisThread(QThread): """分析线程 - 并行优化版本""" progress = pyqtSignal(int, str) result_ready = pyqtSignal(dict) finished_all = pyqtSignal() error_occurred = pyqtSignal(str, str) def __init__(self, audio_files, keywords_file, output_dir, models): super().__init__() self.audio_files = audio_files self.keywords_file = keywords_file self.output_dir = output_dir self.stop_requested = False self.analyzer = AudioAnalyzer(models) self.completed_count = 0 self.executor = None # 用于线程池引用 def run(self): try: total = len(self.audio_files) # 加载关键词 if self.keywords_file: success, msg = self.analyzer.load_keywords(self.keywords_file) if not success: self.error_occurred.emit("关键词加载", msg) results = [] errors = [] # 使用线程池进行并行处理 with ThreadPoolExecutor(max_workers=MODEL_CONFIG["max_workers"]) as executor: self.executor = executor # 保存引用用于停止操作 # 提交所有任务 future_to_file = { executor.submit(self.analyzer.analyze_audio, audio_file): audio_file for audio_file in self.audio_files } # 处理完成的任务 for future in as_completed(future_to_file): if self.stop_requested: break audio_file = future_to_file[future] try: result = future.result() if result: results.append(result) self.result_ready.emit(result) except Exception as e: error_msg = str(e) errors.append({ "file": audio_file, "error": error_msg }) self.error_occurred.emit(os.path.basename(audio_file), error_msg) # 更新进度 self.completed_count += 1 progress = int(self.completed_count / total * 100) self.progress.emit( progress, f"已完成 {self.completed_count}/{total} ({progress}%)" ) # 生成报告 if results: self.generate_reports(results, errors) self.finished_all.emit() except Exception as e: self.error_occurred.emit("全局错误", str(e)) def stop(self): """停止分析 - 强制终止线程池任务""" self.stop_requested = True # 强制终止线程池中的任务 if self.executor: # 先尝试优雅关闭 self.executor.shutdown(wait=False) # 强制取消所有未完成的任务 for future in self.executor._futures: if not future.done(): future.cancel() def generate_reports(self, results, errors): """生成Excel和Word报告 - 优化版本""" try: # 生成Excel报告 df = pd.DataFrame(results) excel_path = os.path.join(self.output_dir, "质检分析报告.xlsx") # 创建Excel写入器 with pd.ExcelWriter(excel_path, engine='xlsxwriter') as writer: df.to_excel(writer, sheet_name='详细结果', index=False) # 添加统计摘要 stats_data = { "指标": ["分析文件总数", "成功分析文件数", "分析失败文件数", "开场白合格率", "结束语合格率", "禁语出现率", "客服积极情绪占比", "客户消极情绪占比", "问题解决率"], "数值": [ len(results) + len(errors), len(results), len(errors), f"{df['opening_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df['closing_check'].value_counts(normalize=True).get('是', 0) * 100:.1f}%", f"{df['forbidden_check'].value_counts(normalize=True).get('是', 0) * 100:.1极狐%", f"{df[df['agent_sentiment'] == '积极'].shape[0] / len(df) * 100:.1f}%", f"{df[df['customer_sentiment'] == '消极'].shape[0] / len(df) * 100:.1f}%", f"{df['solution_rate'].value_counts(normalize=True).get('是', 0) * 100:.1f}%" ] } stats_df = pd.DataFrame(stats_data) stats_df.to_excel(writer, sheet_name='统计摘要', index=False) # 生成Word报告 doc = Document() doc.add_heading('外呼电话质检分析汇总报告', 0) # 添加统计信息 doc.add_heading('整体统计', level=1) stats = [ f"分析文件总数: {len(results) + len(errors)}", f"成功分析文件数: {len(results)}", f"分析失败文件数: {len(errors)}", f"开场白合格率: {stats_data['数值'][3]}", f"结束语合格率: {stats_data['数值'][4]}", f"禁语出现率: {stats_data['数值'][5]}", f"客服积极情绪占比: {stats_data['数值'][6]}", f"客户消极情绪占比: {stats_data['数值'][7]}", f"问题解决率: {stats_data['数值'][8]}" ] for stat in stats: doc.add_paragraph(stat) # 添加图表 self.add_charts(doc, df) # 添加错误列表 if errors: doc.add_heading('分析失败文件', level=1) table = doc.add_table(rows=1, cols=2) hdr_cells = table.rows[0].cells hdr_cells[0].text = '文件' hdr_cells[1].text = '错误原因' for error in errors: row_cells = table.add_row().cells row_cells[0].text = os.path.basename(error['file']) row_cells[1].text = error['error'] word_path = os.path.join(self.output_dir, "可视化分析报告.docx") doc.save(word_path) return True, f"报告已保存到: {self.output_dir}" except Exception as e: return False, f"生成报告失败: {str(e)}" def add_charts(self, doc, df): """在Word文档中添加图表 - 显式释放内存""" try: # 客服情感分布 fig1, ax1 = plt.subplots(figsize=(6, 4)) sentiment_counts = df['agent_sentiment'].value_counts() sentiment_counts.plot(kind='bar', ax=ax1, color=['green', 'red', 'blue', 'darkred', 'darkgreen']) ax1.set_title('客服情感分布') ax1.set_xlabel('情感类型') ax1.set_ylabel('数量') fig1.tight_layout() fig1.savefig('agent_sentiment.png') doc.add_picture('agent_sentiment.png', width=Inches(5)) os.remove('agent_sentiment.png') plt.close(fig1) # 显式关闭图表释放内存 # 客户情感分布 fig2, ax2 = plt.subplots(figsize=(6, 4)) df['customer_sentiment'].value_counts().plot(kind='bar', ax=ax2, color=['green', 'red', 'blue', 'darkred', 'darkgreen']) ax2.set_title('客户情感分布') ax2.set_xlabel('情感类型') ax2.set_ylabel('数量') fig2.tight_layout() fig2.savefig('customer_sentiment.png') doc.add_picture('customer_sentiment.png', width=Inches(5)) os.remove('customer_sentiment.png') plt.close(fig2) # 显式关闭图表释放内存 # 合规性检查 fig3, ax3 = plt.subplots(figsize=(6, 4)) compliance = df[['opening_check', 'closing_check', 'forbidden_check']].apply( lambda x: x.value_counts().get('是', 0)) compliance.plot(kind='bar', ax=ax3, color=['blue', 'green', 'red']) ax3.set_title('合规性检查') ax3.set_xlabel('检查项') ax3.set_ylabel('合格数量') fig3.tight_layout() fig3.savefig('compliance.png') doc.add_picture('compliance.png', width=Inches(5)) os.remove('compliance.png') plt.close(fig3) # 显式关闭图表释放内存 except Exception as e: print(f"生成图表失败: {str(e)}") # 确保异常情况下也关闭所有图表 if 'fig1' in locals(): plt.close(fig1) if 'fig2' in locals(): plt.close(fig2) if 'fig3' in locals(): plt.close(fig3) class MainWindow(QMainWindow): """主界面 - 优化版本""" def __init__(self): super().__init__() self.setWindowTitle("外呼电话录音质检分析系统") self.setGeometry(100, 100, 1000, 800) # 初始化变量 self.audio_files = [] self.keywords_file = "" self.output_dir = os.getcwd() self.analysis_thread = None self.model_loader = None self.models = {} self.models_loaded = False # 初始化为False # 设置全局字体 app_font = QFont("Microsoft YaHei", 10) QApplication.setFont(app_font) # 创建主布局 main_widget = QWidget() main_layout = QVBoxLayout() main_layout.setSpacing(10) main_layout.setContentsMargins(15, 15, 15, 15) # 状态栏 self.status_label = QLabel("准备就绪") self.status_label.setAlignment(Qt.AlignCenter) self.status_label.setStyleSheet("background-color: #f0f0f0; padding: 5px; border-radius: 5px;") # 文件选择区域 file_group = QGroupBox("文件选择") file_layout = QVBoxLayout() file_layout.setSpacing(10) # 音频选择 audio_layout = QHBoxLayout() self.audio_label = QLabel("音频文件/文件夹:") self.audio_path_edit = QLineEdit() self.audio极狐_edit.setReadOnly(True) self.audio_path_edit.setPlaceholderText("请选择音频文件或文件夹") self.audio_browse_btn = QPushButton("浏览...") self.audio_browse_btn.setFixedWidth(80) self.audio_browse_btn.clicked.connect(self.browse_audio) audio_layout.addWidget(self.audio_label) audio_layout.addWidget(self.audio_path_edit, 1) audio_layout.addWidget(self.audio_browse_btn) # 关键词选择 keyword_layout = QHBoxLayout() self.keyword_label = QLabel("关键词文件:") self.keyword_path_edit = QLineEdit() self.keyword_path_edit.setReadOnly(True) self.keyword_path_edit.setPlaceholderText("可选:选择关键词Excel文件") self.keyword_browse_btn = QPushButton("浏览...") self.keyword_browse_btn.setFixedWidth(80) self.keyword_browse_btn.clicked.connect(self.browse_keywords) keyword_layout.addWidget(self.keyword_label) keyword_layout.addWidget(self.keyword_path_edit, 1) keyword_layout.addWidget(self.keyword_browse_btn) # 输出目录 output_layout = QHBoxLayout() self.output_label = QLabel("输出目录:") self.output_path_edit = QLineEdit(os.getcwd()) self.output_path_edit.setReadOnly(True) self.output_browse_btn = QPushButton("浏览...") self.output_browse_btn.setFixedWidth(80) self.output_browse_btn.clicked.connect(self.browse_output) output_layout.addWidget(self.output_label) output_layout.addWidget(self.output_path_edit, 1) output_layout.addWidget(self.output_browse_btn) file_layout.addLayout(audio_layout) file_layout.addLayout(keyword_layout) file_layout.addLayout(output_layout) file_group.setLayout(file_layout) # 控制按钮区域 control_layout = QHBoxLayout() control_layout.setSpacing(15) self.start_btn = QPushButton("开始分析") self.start_btn.setFixedHeight(40) self.start_btn.setStyleSheet("background-color: #4CAF50; color: white; font-weight: bold;") self.start_btn.clicked.connect(self.start_analysis) self.stop_btn = QPushButton("停止分析") self.stop_btn.setFixedHeight(40) self.stop_btn.setStyleSheet("background-color: #f44336; color: white; font-weight: bold;") self.stop_btn.clicked.connect(self.stop_analysis) self.stop_btn.setEnabled(False) self.clear_btn = QPushButton("清空") self.clear_btn.setFixedHeight(40) self.clear_btn.setStyleSheet("background-color: #2196F3; color: white; font-weight: bold;") self.clear_btn.clicked.connect(self.clear_all) # 添加模型重试按钮 self.retry_btn = QPushButton("重试加载模型") self.retry_btn.setFixedHeight(40) self.retry_btn.setStyleSheet("background-color: #FF9800; color: white; font-weight: bold;") self.retry_btn.clicked.connect(self.retry_load_models) self.retry_btn.setVisible(False) # 初始隐藏 control_layout.addWidget(self.start_btn) control_layout.addWidget(self.stop_btn) control_layout.addWidget(self.clear_btn) control_layout.addWidget(self.retry_btn) # 进度条 self.progress_bar = QProgressBar() self.progress_bar.setRange(0, 100) self.progress_bar.setTextVisible(True) self.progress_bar.setStyleSheet("QProgressBar {border: 1px solid grey; border-radius: 5px; text-align: center;}" "QProgressBar::chunk {background-color: #4CAF50; width: 10px;}") # 结果展示区域 result_group = QGroupBox("分析结果") result_layout = QVBoxLayout() result_layout.setSpacing(10) # 结果标签 result_header = QHBoxLayout() self.result_label = QLabel("分析结果:") self.result_count_label = QLabel("0/0") self.result_count_label.setAlignment(Qt.AlignRight) result_header.addWidget(self.result_label) result_header.addWidget(self.result_count_label) self.result_text = QTextEdit() self.result_text.setReadOnly(True) self.result_text.setStyleSheet("font-family: Consolas, 'Microsoft YaHei';") # 错误列表 error_header = QHBoxLayout() self.error_label = QLabel("错误信息:") self.error_count_label = QLabel("0") self.error_count_label.setAlignment(Qt.AlignRight) error_header.addWidget(self.error_label) error_header.addWidget(self.error_count_label) self.error_list = QListWidget() self.error_list.setFixedHeight(120) self.error_list.setStyleSheet("color: #d32f2f;") result_layout.addLayout(result_header) result_layout.addWidget(self.result_text) result_layout.addLayout(error_header) result_layout.addWidget(self.error_list) result_group.setLayout(result_layout) # 添加到主布局 main_layout.addWidget(file_group) main_layout.addLayout(control_layout) main_layout.addWidget(self.progress_bar) main_layout.addWidget(self.status_label) main_layout.addWidget(result_group) main_widget.setLayout(main_layout) self.setCentralWidget(main_widget) # 启动模型加载 self.load_models() def load_models(self): """后台加载模型""" self.status_label.setText("正在加载AI模型,请稍候...") self.start_btn.setEnabled(False) self.retry_btn.setVisible(False) # 隐藏重试按钮 self.model_loader = ModelLoader() self.model_loader.progress.connect(self.update_model_loading_status) self.model_loader.finished.connect(self.handle_model_loading_finished) self.model_loader.start() def retry_load_models(self): """重试加载模型""" self.retry_btn.setVisible(False) self.load_models() def update_model_loading_status(self, message): """更新模型加载状态""" self.status_label.setText(message) def handle_model_loading_finished(self, success, message): """处理模型加载完成""" if success: self.models = self.model_loader.models self.models_loaded = True self.status_label.setText(message) self.start_btn.setEnabled(True) self.retry_btn.setVisible(False) else: self.status_label.setText(message) self.start_btn.setEnabled(False) self.retry_btn.setVisible(True) # 显示重试按钮 QMessageBox.critical(self, "模型加载失败", f"{message}\n\n点击'重试加载模型'按钮尝试重新加载") def browse_audio(self): """选择音频文件或文件夹""" options = QFileDialog.Options() files, _ = QFileDialog.getOpenFileNames( self, "选择音频文件", "", "音频文件 (*.mp3 *.wav *.amr *.flac *.m4a);;所有文件 (*)", options=options ) if files: self.audio_files = files self.audio_path_edit.setText(f"已选择 {len(files)} 个文件") self.result_count_label.setText(f"0/{len(files)}") def browse_keywords(self): """选择关键词文件""" options = QFileDialog.Options() file, _ = QFileDialog.getOpenFileName( self, "选择关键词文件", "", "Excel文件 (*.xlsx);;所有文件 (*)", options=options ) if file: self.keywords_file = file self.keyword_path_edit.setText(os.path.basename(file)) def browse_output(self): """选择输出目录""" options = QFileDialog.Options() directory = QFileDialog.getExistingDirectory( self, "选择输出目录", options=options ) if directory: self.output_dir = directory self.output_path_edit.setText(directory) def start_analysis(self): """开始分析""" if not self.audio_files: self.show_message("错误", "请先选择音频文件!") return if not self.models_loaded: self.show_message("错误", "AI模型尚未加载完成!") return # 检查输出目录 if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) except Exception as e: self.show_message("错误", f"无法创建输出目录: {str(e)}") return # 更新UI状态 self.start_btn.setEnabled(False) self.stop_btn.setEnabled(True) self.result_text.clear() self.error_list.clear() self.error_count_label.setText("0") self.result_text.append("开始分析音频文件...") self.progress_bar.setValue(0) # 创建并启动分析线程 self.analysis_thread = AnalysisThread( self.audio_files, self.keywords_file, self.output_dir, self.models ) # 连接信号 self.analysis_thread.progress.connect(self.update_progress) self.analysis_thread.result_ready.connect(self.handle_result) self.analysis_thread.finished_all.connect(self.analysis_finished) self.analysis_thread.error_occurred.connect(self.handle_error) self.analysis_thread.start() def stop_analysis(self): """停止分析""" if self.analysis_thread and self.analysis_thread.isRunning(): self.analysis_thread.stop() self.analysis_thread.wait() self.result_text.append("分析已停止") self.status_label.setText("分析已停止") self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) def clear_all(self): """清空所有内容""" self.audio_files = [] self.keywords_file = "" self.audio_path_edit.clear() self.keyword_path_edit.clear() self.result_text.clear() self.error_list.clear() self.progress_bar.setValue(0) self.status_label.setText("准备就绪") self.result_count_label.setText("0/0") self.error_count_label.setText("0") def update_progress(self, value, message): """更新进度""" self.progress_bar.setValue(value) self.status_label.setText(message) # 更新结果计数 if "已完成" in message: parts = message.split() if len(parts) >= 2: self.result_count_label.setText(parts[1]) def handle_result(self, result): """处理单个结果""" summary = f""" 文件: {result['file_name']} 时长: {result['duration']}秒 ---------------------------------------- 开场白: {result['opening_check']} | 结束语: {result['closing_check']} | 禁语: {result['forbidden_check']} 客服情感: {result['agent_sentiment']} ({result['agent_emotion']}) | 语速: {result['agent_speed']}词/分 客户情感: {result['customer_sentiment']} ({result['customer_emotion']}) 问题解决: {result['solution_rate']} 音量水平: {result['volume_level']} | 稳定性: {result['volume_stability']} ---------------------------------------- """ self.result_text.append(summary) def handle_error(self, file_name, error): """处理错误""" self.error_list.addItem(f"{file_name}: {error}") self.error_count_label.setText(str(self.error_list.count())) def analysis_finished(self): """分析完成""" self.start_btn.setEnabled(True) self.stop_btn.setEnabled(False) self.status_label.setText(f"分析完成! 报告已保存到: {self.output_dir}") self.result_text.append("分析完成!") # 显示完成消息 self.show_message("完成", f"分析完成! 报告已保存到: {self.output_dir}") def show_message(self, title, message): """显示消息对话框""" msg = QMessageBox(self) msg.setWindowTitle(title) msg.setText(message) msg.setStandardButtons(QMessageBox.Ok) msg.exec_() if __name__ == "__main__": app = QApplication(sys.argv) # 检查GPU可用性 if MODEL_CONFIG["device"] == "cuda": try: gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3) print(f"GPU内存: {gpu_mem:.2f}GB") # 根据GPU内存调整并行度 if gpu_mem < 4: # 确保有足够内存 MODEL_CONFIG["device"] = "cpu" MODEL_CONFIG["max_workers"] = 4 print("GPU内存不足,切换到CPU模式") elif gpu_mem < 8: MODEL_CONFIG["max_workers"] = 2 else: MODEL_CONFIG["max_workers"] = 4 except: MODEL_CONFIG["device"] = "cpu" MODEL_CONFIG["max_workers"] = 4 print("无法获取GPU信息,切换到CPU模式") window = MainWindow() window.show() sys.exit(app.exec_())
07-22
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值