用Axis2开发web service #3 - Custom Fault Message

本文介绍了一个具体的WebService接口设计案例,展示了如何通过自定义异常WebServiceFault来处理错误,并返回详细的错误码和错误信息。同时提供了客户端和服务端的实现代码示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

public interface UserServices {
    UserInfoResp getUserInfo(String userId) throws WebServiceFault;

    UserInfoResp authUser(AuthUserReq authReq)  throws WebServiceFault;
}

Take note:

All the methods in the interface class,which will be exposed to web service, throw WebServiceFault.

// ==== WebServiceFault.java =====
package com.test.axis.bean;

public class WebServiceFault extends Exception {
    private String errCode;
    private String errMessage;

     // setter and getter methods
}

I want to respond an error code and an error description to web service consumer if something is wrong.

---- wsdl

<wsdl:operation name="getUserInfo">
            <soap:operation soapAction="urn:getUserInfo" style="document"/>
            <wsdl:input>
                <soap:body use="literal"/>
            </wsdl:input>
            <wsdl:output>
                <soap:body use="literal"/>
            </wsdl:output>
            <wsdl:fault name="WebServiceFault">
                <soap:fault use="literal" name="WebServiceFault"/>
            </wsdl:fault>
        </wsdl:operation>

---- skeleton code

    public com.test.axis.ws.bean.GetUserInfoResp getUserInfo(
            com.test.axis.ws.bean.GetUserInfo getUserInfo2)
            throws WebServiceFault {
        if (getUserInfo2== null || getUserInfo2.getUserId() == null) {
            WebServiceFault fault = new WebServiceFault();
            com.test.axis.ws.bean.WebServiceFaultE wsFaultE = new com.test.axis.ws.bean.WebServiceFaultE();
            com.test.axis.ws.bean.xsd.WebServiceFault param = new com.test.axis.ws.bean.xsd.WebServiceFault();
            param.setErrCode("Error Code 9999");
            param.setErrMessage("Error Desc: parameter is Null.");

            wsFaultE.setWebServiceFault(param);
            fault.setFaultMessage(wsFaultE);
            throw fault;
        }
        return null;
    }

---- request soap

<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:q0="http://axis.test.com/ws" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<soapenv:Body>
  <q0:getUserInfo />
</soapenv:Body>
</soapenv:Envelope>

in this case, the request parameter is empty

--- response soap

<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
  <soapenv:Body>
  <soapenv:Fault>
  <faultcode>soapenv:Server</faultcode>
  <faultstring>WebServiceFault</faultstring>
  <detail>
  <ns2:WebServiceFault xmlns:ns2="http://axis.test.com/ws">
  <ns2:WebServiceFault xmlns:ns1="http://bean.axis.test.com/xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="ns1:WebServiceFault">
  <ns1:errCode>Error Code 9999</ns1:errCode>
  <ns1:errMessage>Error Desc: parameter is Null.</ns1:errMessage>
  </ns2:WebServiceFault>
  </ns2:WebServiceFault>
  </detail>
  </soapenv:Fault>
  </soapenv:Body>
  </soapenv:Envelope>

from the elements in blue, we can see error code and message returned successfully.

----- stub code


try {
    _stub.getUserInfo(new GetUserInfo());
    } catch (RemoteException e) {
    e.printStackTrace();
    } catch (WebServiceFault e) {
    // print the custom error code and error message
    System.out.println(e.getFaultMessage().getWebServiceFault().getErrCode());
    System.out.println(e.getFaultMessage().getWebServiceFault().getErrMessage());
    }

------------------------------------------------------------

TODO:

a> consider to use the following elements to customize fault message

  <faultcode>ABC</faultcode>
  <faultstring>XYZ</faultstring>
  <detail>Detail of Fault</detail>

I have not tried it out.

probably, the following reference websites will be helpful:
http://www.cnblogs.com/huqingyu/archive/2008/04/09/1145868.html
http://apps.hi.baidu.com/share/detail/23219236
http://hi.baidu.com/hero%CD%F5%E6%DD/blog/item/ad3b47107dc024c1f6039eb7.html

import pandas as pd import logging from geopy.distance import geodesic import time from contextlib import contextmanager import numpy as np from sqlalchemy import create_engine, text from tenacity import retry, stop_after_attempt, wait_exponential from datetime import datetime # -------------------------- # 通用配置 # -------------------------- DB_CONFIG = { "conn_str": "mysql+pymysql://root:root@127.0.0.1:3306/test?charset=utf8", "pool_size": 10, "pool_recycle": 300 } member_list_path = r"D:\OneDrive\ERIC\维护\平台数据\代维信息\2025-07人员信息快照_20250708.xlsx" member_list_columns = [&#39;登陆账号&#39;, &#39;所属组织&#39;, &#39;所属地市&#39;, &#39;在职状态&#39;] TIME_RANGE = { "start": &#39;2025-07-01 00:00:00&#39;, "end": &#39;2025-07-08 23:59:59&#39; } # 定义日期格式转换器 def format_time_range(time_range): # 解析开始时间和结束时间 start_date = datetime.strptime(time_range["start"], &#39;%Y-%m-%d %H:%M:%S&#39;) end_date = datetime.strptime(time_range["end"], &#39;%Y-%m-%d %H:%M:%S&#39;) # 格式化为YYYYMMDD形式并拼接 return f"{start_date.strftime(&#39;%Y%m%d&#39;)}~{end_date.strftime(&#39;%Y%m%d&#39;)}" # 获取格式化后的时间范围 source_time = format_time_range(TIME_RANGE) FILE_PATHS = { "cran_info": r&#39;D:\OneDrive\ERIC\维护\test\CRAN机房信息.csv&#39;, "output": fr&#39;D:\OneDrive\ERIC\维护\工单\现场执行工单\现场执行工单_{source_time}.xlsx&#39; } TABLE_CONFIG = { &#39;工单_保障&#39;: {&#39;columns&#39;: ["工单当前状态", "工单号", "工单分类", "维护分类"]}, &#39;工单_巡检&#39;: {&#39;columns&#39;: [ "工单当前状态", "工单号", "资源ID", "资源名称", "资源经度", "资源纬度", "STATION_NAME", "STATION_ID", "STATION_LEVEL", "LONGITUDE", "LATITUDE", "工单分类", "维护分类" ]}, &#39;工单_拆站&#39;: {&#39;columns&#39;: [ "工单当前状态", "工单号", "资源cid AS 资源ID", "资源名称", "资源经度", "资源纬度", "STATION_NAME", "STATION_ID", "STATION_LEVEL", "LONGITUDE", "LATITUDE", "工单分类", "维护分类" ]}, &#39;工单_验收&#39;: {&#39;columns&#39;: [ "工单当前状态", "工单号", "资源cid AS 资源ID", "资源名称", "资源经度", "资源纬度", "STATION_NAME", "STATION_ID", "STATION_LEVEL", "LONGITUDE", "LATITUDE", "工单分类", "维护分类" ]}, &#39;工单_发电&#39;: {&#39;columns&#39;: [ "工单当前状态", "工单号", "站点ID AS 资源ID", "站点名称 AS 资源名称", "站点经度 AS 资源经度", "站点纬度 AS 资源纬度", "STATION_NAME", "STATION_ID", "STATION_LEVEL", "LONGITUDE", "LATITUDE", "工单分类", "维护分类" ]}, &#39;工单_通用&#39;: {&#39;columns&#39;: [ "工单当前状态", "工单号", "资源名称", "资源经度", "资源纬度", "STATION_NAME", "STATION_ID", "STATION_LEVEL", "LONGITUDE", "LATITUDE", "工单分类", "维护分类" ]}, &#39;故障工单&#39;: {&#39;columns&#39;: [ "工单状态 AS 工单当前状态", "工单编号 AS 工单号", "STATION_NAME AS 资源名称", "LONGITUDE AS 资源经度", "LATITUDE AS 资源纬度", "STATION_NAME", "STATION_ID", "STATION_LEVEL", "LONGITUDE", "LATITUDE", "工单分类", "维护分类", "故障处理时长_小时", "是否延期有效", "是否及时处理", "高频故障", "是否超长退服", "网元分类" ], &#39;where_column&#39;: &#39;工单编号&#39;} } DIMENSIONS = [ {&#39;name&#39;: &#39;执行人&#39;, &#39;keys&#39;: [&#39;执行人&#39;]}, {&#39;name&#39;: &#39;所属组织&#39;, &#39;keys&#39;: [&#39;所属组织&#39;]}, {&#39;name&#39;: &#39;地市&#39;, &#39;keys&#39;: [&#39;地市&#39;]}, {&#39;name&#39;: &#39;代维简称&#39;, &#39;keys&#39;: [&#39;代维简称&#39;]}, {&#39;name&#39;: &#39;地市代维&#39;, &#39;keys&#39;: [&#39;地市&#39;, &#39;代维简称&#39;]} ] # -------------------------- # 工具函数 # -------------------------- @contextmanager def time_monitor(step_name): """耗时监控上下文管理器""" start_time = time.time() try: yield finally: print(f"{step_name} 耗时: {time.time() - start_time:.4f}秒") def setup_logging(): """日志配置""" logging.basicConfig( level=logging.INFO, format=&#39;%(asctime)s - %(levelname)s - %(message)s&#39;, handlers=[ logging.FileHandler("data_processor_v2.log"), logging.StreamHandler() ] ) return logging.getLogger(__name__) logger = setup_logging() @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def safe_read_sql(sql: text, engine, params: dict = None) -> pd.DataFrame: """带重试机制的SQL读取""" logger.debug(f"执行SQL: {sql} \n参数: {params}") return pd.read_sql(sql, engine, params=params) def init_db_engine(): """初始化数据库引擎""" return create_engine( DB_CONFIG["conn_str"], pool_size=DB_CONFIG["pool_size"], pool_recycle=DB_CONFIG["pool_recycle"] ) # -------------------------- # 数据校验函数 # -------------------------- def is_valid_coordinates(lat, lon): """经纬度有效性校验""" if pd.isna(lat) or pd.isna(lon): return False if not (-90 <= lat <= 90 and -180 <= lon <= 180): return False if abs(lat) < 1e-6 and abs(lon) < 1e-6: return False return True def calculate_distance(coord1, coord2): """带校验的距离计算""" try: lat1, lon1 = coord1 lat2, lon2 = coord2 except (TypeError, ValueError) as e: logger.debug(f"坐标解析失败: {str(e)} | coord1={coord1} coord2={coord2}") return 99999.0 if not (is_valid_coordinates(lat1, lon1) and is_valid_coordinates(lat2, lon2)): return 99999.0 return geodesic(coord1, coord2).meters # -------------------------- # 数据加载模块 # -------------------------- def load_site_orders(engine): """加载现场执行工单数据""" sql = text(""" SELECT 地市, 代维简称, 专业, 工单类型, 工单编号, 执行人, 所属组织, 出发时间, 出发经度, 出发纬度, 签到时间, 签到经度, 签到纬度, 完成时间, 完成经度, 完成纬度, 路途时长, 实际工作时长 FROM 现场执行工单 WHERE 出发时间 BETWEEN :start_time AND :end_time """) params = { &#39;start_time&#39;: TIME_RANGE["start"], &#39;end_time&#39;: TIME_RANGE["end"] } df = safe_read_sql(sql, engine, params) df = df[~df[&#39;所属组织&#39;].str.contains(&#39;高铁&#39;)] df[&#39;代维简称&#39;] = df[&#39;代维简称&#39;].replace(&#39;中移铁通&#39;, &#39;铁通&#39;) valid_companies = [&#39;中贝&#39;, &#39;中通服&#39;, &#39;中邮建&#39;, &#39;唐人&#39;, &#39;宜通&#39;, &#39;怡创&#39;, &#39;浙通服&#39;, &#39;润建&#39;, &#39;虹信&#39;, &#39;超讯&#39;, &#39;铁通&#39;, &#39;长实&#39;] df = df[df[&#39;代维简称&#39;].isin(valid_companies)] df[&#39;地市代维&#39;] = df[&#39;地市&#39;].str.split("-").str[0] + &#39;-&#39; + df[&#39;代维简称&#39;] logger.info(f"加载现场工单记录数: {len(df)}") return df def batch_load_order_data(engine, order_numbers, batch_size=500): """批量加载工单相关数据""" dfs = [] for table, config in TABLE_CONFIG.items(): try: columns = ", ".join(config[&#39;columns&#39;]) where_col = config.get(&#39;where_column&#39;, &#39;工单号&#39;) # 修复:正确构造SQL查询字符串,确保引号闭合 sql_template = text(f"SELECT {columns} FROM `{table}` WHERE `{where_col}` IN :order_nums") table_dfs = [] for i in range(0, len(order_numbers), batch_size): batch = order_numbers[i:i + batch_size] df = safe_read_sql(sql_template, engine, params={&#39;order_nums&#39;: batch}) table_dfs.append(df) if table_dfs: table_df = pd.concat(table_dfs, ignore_index=True) dfs.append(table_df) logger.info(f"表 {table} 加载完成,记录数: {len(table_df)}") except Exception as e: logger.error(f"加载表 {table} 失败: {str(e)}", exc_info=True) return dfs # -------------------------- # 数据处理模块 # -------------------------- def merge_order_data(site_df, order_dfs): """合并工单数据(增强版)""" if not order_dfs: logger.warning("没有需要合并的工单数据") return pd.DataFrame() final_df = pd.concat(order_dfs, axis=0, ignore_index=True) final_df = final_df.drop_duplicates(subset=[&#39;工单号&#39;], keep=&#39;first&#39;) merged_df = pd.merge( left=site_df, right=final_df, left_on=&#39;工单编号&#39;, right_on=&#39;工单号&#39;, how=&#39;left&#39;, ) merged_df.drop(columns=[&#39;工单号&#39;], inplace=True) merged_df[&#39;路途时长(分钟)&#39;] = (merged_df[&#39;路途时长&#39;] * 60).round(2) merged_df[&#39;实际工作时长(分钟)&#39;] = (merged_df[&#39;实际工作时长&#39;] * 60).round(2) # 过滤有效状态 condition = ( merged_df[&#39;工单当前状态&#39;].notna() & (merged_df[&#39;工单当前状态&#39;] != &#39;&#39;) & ~merged_df[&#39;工单当前状态&#39;].str.contains(&#39;已驳回|已撤销|已关闭&#39;, na=False) ) return merged_df[condition] def get_custom_natural_week(date): """ 计算“自然周”编号。本定义中,一周从上周三开始,到本周二结束。 """ if pd.isnull(date): return np.nan weekday = date.weekday() days_since_last_thursday = (weekday + 5) % 7 date = date - pd.Timedelta(days=days_since_last_thursday) return date.isocalendar()[1] # -------------------------- # 核心处理模块 # -------------------------- def generate_time_features(df): """增强时间特征生成""" df[&#39;出发时间&#39;] = pd.to_datetime(df[&#39;出发时间&#39;], errors=&#39;coerce&#39;) df[&#39;自然年&#39;] = df[&#39;出发时间&#39;].dt.year df[&#39;自然月&#39;] = df[&#39;出发时间&#39;].dt.to_period(&#39;M&#39;).astype(str) df[&#39;自然周&#39;] = df[&#39;出发时间&#39;].apply(get_custom_natural_week) df[&#39;出发日期&#39;] = df[&#39;出发时间&#39;].dt.date return df def process_coordinates(df): """坐标处理""" coord_columns = [ "资源纬度", "资源经度", "签到纬度", "签到经度", "LATITUDE", "LONGITUDE", "完成纬度", "完成经度" ] for col in coord_columns: if col in df.columns: df[col] = pd.to_numeric(df[col], errors=&#39;coerce&#39;) else: logger.warning(f"列 {col} 不存在") fault_columns = [&#39;是否超长退服&#39;, &#39;高频故障&#39;, &#39;是否延期有效&#39;, &#39;是否及时处理&#39;] for col in fault_columns: if col not in df.columns: df[col] = &#39;否&#39; return df def calculate_distances(df): """距离计算""" df["签到距离"] = df.apply( lambda row: calculate_distance( (row["签到纬度"], row["签到经度"]), (row["资源纬度"], row["资源经度"]) ), axis=1 ) return df def merge_cran_data(main_df, cran_path): """合并CRAN机房数据并取最近RU距离""" cran_df = pd.read_csv(cran_path) for col in ["ru纬度", "ru经度"]: cran_df[col] = pd.to_numeric(cran_df[col], errors="coerce").fillna(0) merged = pd.merge( main_df[["工单编号", "STATION_NAME", "签到经度", "签到纬度"]], cran_df, left_on="STATION_NAME", right_on="station_name", how="left" ) merged["签到距离_CRAN"] = merged.apply( lambda row: calculate_distance( (row["签到纬度"], row["签到经度"]), (row["ru纬度"], row["ru经度"]) ), axis=1 ) min_distances = merged.groupby("工单编号", as_index=False)["签到距离_CRAN"].min() return pd.merge( main_df, min_distances, on="工单编号", how="left" ).fillna({"签到距离_CRAN": 99999.0}) # -------------------------- # 统计计算模块 # -------------------------- def calculate_workload(df): """工作量评定""" df[&#39;工作量评定&#39;] = &#39;有效&#39; df[&#39;原因&#39;] = &#39;&#39; no_completion = df[&#39;完成时间&#39;].isna() df.loc[no_completion, [&#39;工作量评定&#39;, &#39;原因&#39;]] = [&#39;无效&#39;, &#39;无完成时间&#39;] short_work = df[&#39;实际工作时长(分钟)&#39;] < 5 df.loc[short_work, [&#39;工作量评定&#39;, &#39;原因&#39;]] = [&#39;无效&#39;, &#39;工作时长过短&#39;] if all(col in df.columns for col in [&#39;签到距离&#39;, &#39;签到距离_CRAN&#39;, &#39;维护分类&#39;]): invalid_dist = ( ~df[&#39;维护分类&#39;].isin([&#39;发电&#39;, &#39;保障&#39;]) & (df[&#39;签到距离&#39;] > 300) & (df[&#39;签到距离_CRAN&#39;] > 300) ) df.loc[invalid_dist, [&#39;工作量评定&#39;, &#39;原因&#39;]] = [&#39;无效&#39;, &#39;签到距离过大&#39;] return df def _calculate_base_stats(df, group_keys): """通用基础指标计算""" categories = df[&#39;维护分类&#39;].dropna().unique() agg_dict = { # 总工单数(已去重) "总工单数": (&#39;工单编号&#39;, &#39;nunique&#39;), # 有效工单数(去重) "有效工单数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][df[&#39;工作量评定&#39;] == "有效"].nunique()), # 故障工单数(去重):统计工单分类为"故障工单"的工单编号唯一数 "故障工单数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][df[&#39;工单分类&#39;] == "故障工单"].nunique()), # 故障及时数(去重):统计同时满足工单分类为故障工单且是否及时处理为"是"的工单编号唯一数 "故障及时数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][ (df[&#39;工单分类&#39;] == "故障工单") & (df[&#39;是否及时处理&#39;] == "是") ].nunique()), # 故障超时数(去重):统计同时满足工单分类为故障工单且是否及时处理为"否"的工单编号唯一数 "故障超时数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][ (df[&#39;工单分类&#39;] == "故障工单") & (df[&#39;是否及时处理&#39;] == "否") ].nunique()), # 超长退服工单数(去重):统计是否超长退服为"是"的工单编号唯一数 "超长退服工单数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][df[&#39;是否超长退服&#39;] == "是"].nunique()), # 超频故障工单数(去重):统计高频故障为"是"的工单编号唯一数 "超频故障工单数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][df[&#39;高频故障&#39;] == "是"].nunique()), # 延期工单数(去重):统计是否延期有效为"是"的工单编号唯一数 "延期工单数": (&#39;工单编号&#39;, lambda x: df.loc[x.index, &#39;工单编号&#39;][df[&#39;是否延期有效&#39;] == "是"].nunique()), # 其他非去重指标保持不变 "故障处理时长": (&#39;故障处理时长_小时&#39;, lambda x: x[df[&#39;工单分类&#39;] == "故障工单"].sum()), "路途总时长": (&#39;路途时长(分钟)&#39;, &#39;sum&#39;), "工作总时长": (&#39;实际工作时长(分钟)&#39;, &#39;sum&#39;), "出勤人数": (&#39;执行人&#39;, &#39;nunique&#39;), "出勤人天": (&#39;工单编号&#39;, lambda x: df.loc[x.index][[&#39;执行人&#39;, &#39;出发日期&#39;]].drop_duplicates().shape[0]), } for cat in categories: agg_dict[f"{cat}工单数"] = ( &#39;维护分类&#39;, lambda x, current_cat=cat: x.eq(current_cat).sum() ) stats = df.groupby(group_keys).agg(**agg_dict).reset_index() return stats def _add_derived_metrics(stats, time_level): """通用衍生指标计算""" stats[&#39;出勤总时长&#39;] = stats[&#39;路途总时长&#39;] + stats[&#39;工作总时长&#39;] hour_cols = [&#39;出勤总时长&#39;, &#39;路途总时长&#39;, &#39;工作总时长&#39;] for col in hour_cols: stats[f&#39;{col}(h)&#39;] = (stats[col] / 60).round(2) stats[&#39;平均工单工作时长(h)&#39;] = ( stats[&#39;工作总时长(h)&#39;] / stats[&#39;总工单数&#39;] ).replace([np.inf, -np.inf], 0).fillna(0).round(2) stats[&#39;平均工单路途时长(h)&#39;] = ( stats[&#39;路途总时长(h)&#39;] / stats[&#39;总工单数&#39;] ).replace([np.inf, -np.inf], 0).fillna(0).round(2) if time_level == &#39;weekly&#39;: stats[&#39;人均能效_周&#39;] = (stats[&#39;有效工单数&#39;] / stats[&#39;出勤人天&#39;]).round(2) elif time_level == &#39;monthly&#39;: stats[&#39;人均能效_月&#39;] = (stats[&#39;有效工单数&#39;] / stats[&#39;出勤人天&#39;]).round(2) stats[&#39;故障处理及时率&#39;] = (stats[&#39;故障及时数&#39;] / stats[&#39;故障工单数&#39;]).replace(np.inf, 0) stats[&#39;工作时长占比&#39;] = (stats[&#39;工作总时长&#39;] / stats[&#39;出勤总时长&#39;]).replace(np.inf, 0) stats[&#39;人均出勤天数&#39;] = (stats[&#39;出勤人天&#39;] / stats[&#39;出勤人数&#39;]).replace(np.inf, 0).round(2) stats[&#39;日均出勤人数&#39;] = (stats[&#39;出勤人天&#39;] / stats[&#39;出勤天数&#39;]).replace(np.inf, 0).round(2) return stats def calculate_attendance_metrics(df, denominator): """计算出勤相关指标""" df = df.copy() df[&#39;出勤总时长(h)&#39;] = (df[&#39;路途时长(分钟)&#39;] + df[&#39;实际工作时长(分钟)&#39;]) / 60 df[&#39;工作总时长(h)&#39;] = df[&#39;实际工作时长(分钟)&#39;] / 60 # 按执行人和出发日期分组,计算每天的数据 daily_stats = df.groupby([&#39;执行人&#39;, &#39;出发日期&#39;]).agg( station_count=(&#39;STATION_NAME&#39;, &#39;nunique&#39;), total_attendance_hours=(&#39;出勤总时长(h)&#39;, &#39;sum&#39;), total_work_hours=(&#39;工作总时长(h)&#39;, &#39;sum&#39;) ).reset_index() # 计算每天是否达标(条件1) daily_stats[&#39;达标&#39;] = ( (daily_stats[&#39;station_count&#39;] >= 4) & (daily_stats[&#39;total_attendance_hours&#39;] >= 7) & (daily_stats[&#39;total_work_hours&#39;] >= 2) ) # 按执行人聚合计算总指标 executor_stats = daily_stats.groupby(&#39;执行人&#39;).agg( meet_days=(&#39;达标&#39;, &#39;sum&#39;), total_stations=(&#39;station_count&#39;, &#39;sum&#39;), total_attendance_hours=(&#39;total_attendance_hours&#39;, &#39;sum&#39;), total_work_hours=(&#39;total_work_hours&#39;, &#39;sum&#39;) ).reset_index() # 计算衍生指标 executor_stats[&#39;平均每天到站址&#39;] = executor_stats[&#39;total_stations&#39;] / denominator executor_stats[&#39;日均出勤出工时长&#39;] = executor_stats[&#39;total_attendance_hours&#39;] / denominator executor_stats[&#39;日均有效出勤工时&#39;] = executor_stats[&#39;total_work_hours&#39;] / denominator # 添加条件2达标标识 executor_stats[&#39;条件2达标&#39;] = ( (executor_stats[&#39;平均每天到站址&#39;] >= 4) & (executor_stats[&#39;日均出勤出工时长&#39;] >= 7) & (executor_stats[&#39;日均有效出勤工时&#39;] >= 2) ) return executor_stats.rename(columns={&#39;meet_days&#39;: &#39;出勤达标天数&#39;}) def generate_stats(df, dimension, time_level): """通用统计生成函数""" time_key = { &#39;daily&#39;: &#39;出发日期&#39;, &#39;weekly&#39;: &#39;自然周&#39;, &#39;monthly&#39;: &#39;自然月&#39; }[time_level] group_keys = dimension[&#39;keys&#39;] + [time_key] # 获取维度的主键(用于合并的键名) dimension_key = dimension[&#39;keys&#39;][0] # 取维度的第一个键作为合并主键 stats = _calculate_base_stats(df, group_keys) stats[&#39;出勤天数&#39;] = df.groupby(group_keys)[&#39;出发日期&#39;].nunique().values # 计算月度分母(MIN(TIME_RANGE["end"]的日期部分, 22)) denominator = None if time_level == &#39;daily&#39;: stats[&#39;出发日期&#39;] = pd.to_datetime(stats[&#39;出发日期&#39;]).dt.strftime(&#39;%Y-%m-%d&#39;) else: stats[&#39;出勤天数&#39;] = df.groupby(group_keys)[&#39;出发日期&#39;].nunique().values if time_level == &#39;monthly&#39;: # 移除原工作日天数计算,替换为新分母 end_date = datetime.strptime(TIME_RANGE["end"], &#39;%Y-%m-%d %H:%M:%S&#39;) end_day = end_date.day denominator = min(end_day, 22) stats = _add_derived_metrics(stats, time_level) # 当维度为执行人且时间粒度为月度时,合并出勤指标(使用正确的键名) if dimension[&#39;name&#39;] == &#39;执行人&#39; and time_level == &#39;monthly&#39; and denominator is not None: attendance_metrics = calculate_attendance_metrics(df, denominator) # 使用维度的实际键名(如&#39;执行人&#39;)作为合并键,而非固定的&#39;维度&#39; stats = pd.merge( stats, attendance_metrics[[ # 保留原始&#39;执行人&#39;列作为合并键 &#39;执行人&#39;, &#39;出勤达标天数&#39;, &#39;平均每天到站址&#39;, &#39;日均出勤出工时长&#39;, &#39;日均有效出勤工时&#39; ]], on=dimension_key, # 动态使用维度的键名(如&#39;执行人&#39;)进行合并 how=&#39;left&#39; ) return stats # -------------------------- # 代维统计增强模块 # -------------------------- def enhance_maintainer_stats(filtered_data, member_list_df_filter): time_range_data = filtered_data.copy() # 初始化维度框架 GROUP_KEYS = [&#39;所属组织&#39;, &#39;代维简称&#39;, &#39;地市&#39;, &#39;地市代维&#39;] dim_dfs = [] for key in GROUP_KEYS: df = member_list_df_filter.groupby(key).agg( 应出勤人数=(&#39;登陆账号&#39;, &#39;count&#39;) ).reset_index() dim_dfs.append(df) if not time_range_data.empty: # ===================== 原有统计逻辑 ===================== # 执行人级别统计 executor_stats = time_range_data.groupby([&#39;执行人&#39;, &#39;所属组织&#39;, &#39;代维简称&#39;, &#39;地市&#39;, &#39;地市代维&#39;]).agg( 总工单数=(&#39;工单编号&#39;, &#39;nunique&#39;), 有效工单数=(&#39;工作量评定&#39;, lambda s: s.eq(&#39;有效&#39;).sum()), 总路途时长=(&#39;路途时长(分钟)&#39;, &#39;sum&#39;), 总工作时长=(&#39;实际工作时长(分钟)&#39;, &#39;sum&#39;), 总出勤天数=(&#39;出发日期&#39;, &#39;nunique&#39;), 故障工单数=(&#39;工单分类&#39;, lambda s: s.eq(&#39;故障工单&#39;).sum()), 超长退服工单数=(&#39;是否超长退服&#39;, lambda s: s.eq(&#39;是&#39;).sum()), 高频故障工单数=(&#39;高频故障&#39;, lambda s: s.eq(&#39;是&#39;).sum()), 延期工单数=(&#39;是否延期有效&#39;, lambda s: s.eq(&#39;是&#39;).sum()), 故障及时数=(&#39;是否及时处理&#39;, lambda s: s.eq(&#39;是&#39;).sum()) ).reset_index() # 计算执行人级别指标 metrics = [ (&#39;平均路途时长(小时)&#39;, (executor_stats[&#39;总路途时长&#39;] / 60) / executor_stats[&#39;总工单数&#39;].replace(0, np.nan)), (&#39;工作时长占比&#39;, executor_stats[&#39;总工作时长&#39;] / (executor_stats[&#39;总路途时长&#39;] + executor_stats[&#39;总工作时长&#39;]).replace(0, np.nan)), (&#39;人均每日工单数&#39;, executor_stats[&#39;总工单数&#39;] / executor_stats[&#39;总出勤天数&#39;].replace(0, np.nan)), (&#39;人均每日有效工单数&#39;, executor_stats[&#39;有效工单数&#39;] / executor_stats[&#39;总出勤天数&#39;].replace(0, np.nan)), (&#39;人均每日出勤时长(小时)&#39;, (executor_stats[&#39;总路途时长&#39;] + executor_stats[&#39;总工作时长&#39;]) / 60 / executor_stats[&#39;总出勤天数&#39;].replace( 0, np.nan)), (&#39;人均每日工作时长(小时)&#39;, executor_stats[&#39;总工作时长&#39;] / 60 / executor_stats[&#39;总出勤天数&#39;].replace(0, np.nan)), (&#39;每工单路途时长(小时)&#39;, executor_stats[&#39;总路途时长&#39;] / 60 / executor_stats[&#39;总工单数&#39;].replace(0, np.nan)), (&#39;每工单工作时长(小时)&#39;, executor_stats[&#39;总工作时长&#39;] / 60 / executor_stats[&#39;总工单数&#39;].replace(0, np.nan)), (&#39;超长退服工单占比&#39;, executor_stats[&#39;超长退服工单数&#39;] / executor_stats[&#39;故障工单数&#39;].replace(0, np.nan)), (&#39;高频故障工单占比&#39;, executor_stats[&#39;高频故障工单数&#39;] / executor_stats[&#39;故障工单数&#39;].replace(0, np.nan)), (&#39;延期工单占比&#39;, executor_stats[&#39;延期工单数&#39;] / executor_stats[&#39;故障工单数&#39;].replace(0, np.nan)), ] for col, formula in metrics: executor_stats[col] = formula executor_stats = executor_stats.fillna(0).round(4) # 维度聚合统计 def calculate_dimension_stats(grouped_df, current_key): return grouped_df.groupby(current_key).agg( 总工单数=(&#39;总工单数&#39;, &#39;sum&#39;), 有效工单数=(&#39;有效工单数&#39;, &#39;sum&#39;), 工单平均路途超2小时人数=(&#39;平均路途时长(小时)&#39;, lambda x: (x > 2).sum()), 平均工作时长占比低人数=(&#39;工作时长占比&#39;, lambda x: (x < 0.1).sum()), 人均每日工单数=(&#39;人均每日工单数&#39;, &#39;mean&#39;), 人均每日有效工单数=(&#39;人均每日有效工单数&#39;, &#39;mean&#39;), 人均每日出勤时长=(&#39;人均每日出勤时长(小时)&#39;, &#39;mean&#39;), 人均每日工作时长=(&#39;人均每日工作时长(小时)&#39;, &#39;mean&#39;), 每工单路途时长=(&#39;每工单路途时长(小时)&#39;, &#39;mean&#39;), 每工单工作时长=(&#39;每工单工作时长(小时)&#39;, &#39;mean&#39;), 超长退服工单占比=(&#39;超长退服工单占比&#39;, &#39;mean&#39;), 高频故障工单占比=(&#39;高频故障工单占比&#39;, &#39;mean&#39;), 延期工单占比=(&#39;延期工单占比&#39;, &#39;mean&#39;), ).reset_index().round(4) # 合并维度统计 updated_dims = [] for i, key in enumerate(GROUP_KEYS): dim_stats = calculate_dimension_stats(executor_stats, key) dim_stats[&#39;有效工单占比&#39;] = (dim_stats[&#39;有效工单数&#39;] / dim_stats[&#39;总工单数&#39;]).replace(np.inf, 0).fillna( 0).round(4) merged = pd.merge( dim_dfs[i], dim_stats, on=key, how=&#39;left&#39; ).fillna(0) updated_dims.append(merged) dim_dfs = updated_dims # ===================== 新增需求处理 ===================== # 提取问题工单 sign_in_issue = time_range_data[time_range_data[&#39;原因&#39;] == &#39;签到距离过大&#39;] short_duration_issue = time_range_data[time_range_data[&#39;原因&#39;] == &#39;工作时长过短&#39;] top50_travel_orders = time_range_data.nlargest(50, &#39;路途时长(分钟)&#39;)[&#39;工单编号&#39;].unique() top50_travel_data = time_range_data[time_range_data[&#39;工单编号&#39;].isin(top50_travel_orders)] # 处理各维度指标 for i, key in enumerate(GROUP_KEYS): # 合并签到距离过大条目数 sign_in_counts = sign_in_issue.groupby(key)[&#39;工单编号&#39;].nunique().reset_index(name=&#39;签到距离过大条目数&#39;) dim_dfs[i] = pd.merge(dim_dfs[i], sign_in_counts, on=key, how=&#39;left&#39;) # 合并工作时长过短条目数 short_duration_counts = short_duration_issue.groupby(key)[&#39;工单编号&#39;].nunique().reset_index( name=&#39;工作时长过短条目数&#39;) dim_dfs[i] = pd.merge(dim_dfs[i], short_duration_counts, on=key, how=&#39;left&#39;) # 合并路途时长TOP50条目数 top50_counts = top50_travel_data.groupby(key)[&#39;工单编号&#39;].nunique().reset_index(name=&#39;路途时长TOP50条目数&#39;) dim_dfs[i] = pd.merge(dim_dfs[i], top50_counts, on=key, how=&#39;left&#39;) dim_dfs[i] = dim_dfs[i].fillna(0) # ===================== 出勤相关统计 ===================== # 实际出勤人数 def merge_attendance(source_df, target_df, grp_key): att = source_df.groupby(grp_key)[&#39;执行人&#39;].nunique().reset_index(name=&#39;实际出勤人数&#39;) return pd.merge(target_df, att, on=grp_key, how=&#39;left&#39;).fillna(0) for i, key in enumerate(GROUP_KEYS): dim_dfs[i] = merge_attendance(time_range_data, dim_dfs[i], key) dim_dfs[i][&#39;出勤率&#39;] = (dim_dfs[i][&#39;实际出勤人数&#39;] / dim_dfs[i][&#39;应出勤人数&#39;]).replace(np.inf, 0).fillna( 0).round(4) # 出勤天数统计 for i, key in enumerate(GROUP_KEYS): att_cols = [&#39;执行人&#39;, key, &#39;出发日期&#39;] att_days = time_range_data[att_cols].drop_duplicates(subset=[&#39;执行人&#39;, &#39;出发日期&#39;]) # 计算每个执行人的出勤天数 att_days_per_executor = att_days.groupby([key, &#39;执行人&#39;]).size().reset_index(name=&#39;出勤天数&#39;) # 总出勤人天(所有人的出勤天数总和) total_attendance_person_days = att_days_per_executor.groupby(key)[&#39;出勤天数&#39;].sum().reset_index( name=&#39;总出勤人天&#39;) dim_dfs[i] = pd.merge(dim_dfs[i], total_attendance_person_days, on=key, how=&#39;left&#39;).fillna(0) # 出勤天数(不同出发日期的数量) attendance_days = att_days.groupby(key)[&#39;出发日期&#39;].nunique().reset_index(name=&#39;出勤天数_实际&#39;) dim_dfs[i] = pd.merge(dim_dfs[i], attendance_days, on=key, how=&#39;left&#39;).fillna(0) # 计算人均出勤天数和日均出勤人数 dim_dfs[i][&#39;人均出勤天数&#39;] = (dim_dfs[i][&#39;总出勤人天&#39;] / dim_dfs[i][&#39;实际出勤人数&#39;]).replace(np.inf, 0).fillna( 0).round(2) dim_dfs[i][&#39;日均出勤人数&#39;] = (dim_dfs[i][&#39;总出勤人天&#39;] / dim_dfs[i][&#39;出勤天数_实际&#39;]).replace(np.inf, 0).fillna( 0).round(2) # 出勤不足3天人数统计 under3 = att_days_per_executor[att_days_per_executor[&#39;出勤天数&#39;] < 3].groupby(key)[ &#39;执行人&#39;].count().reset_index(name=&#39;出勤不足3天人数&#39;) dim_dfs[i] = pd.merge(dim_dfs[i], under3, on=key, how=&#39;left&#39;).fillna(0) dim_dfs[i][&#39;出勤不足3天人员占比&#39;] = (dim_dfs[i][&#39;出勤不足3天人数&#39;] / dim_dfs[i][&#39;实际出勤人数&#39;]).replace( [np.inf, -np.inf], 0).fillna(0).round(4) # ===================== 故障处理及时率计算 ===================== for i, key in enumerate(GROUP_KEYS): fault_data = time_range_data[time_range_data[&#39;工单分类&#39;] == &#39;故障工单&#39;] # 初始化要计算的占比列 ratio_columns = [&#39;故障处理及时率&#39;, &#39;超长退服工单占比&#39;, &#39;高频故障工单占比&#39;, &#39;延期工单占比&#39;] if not fault_data.empty: agg_dict = { &#39;是否及时处理&#39;: lambda x: x.eq(&#39;是&#39;).any(), &#39;是否超长退服&#39;: lambda x: x.eq(&#39;是&#39;).any(), &#39;高频故障&#39;: lambda x: x.eq(&#39;是&#39;).any(), &#39;是否延期有效&#39;: lambda x: x.eq(&#39;是&#39;).any(), &#39;所属组织&#39;: &#39;first&#39;, &#39;代维简称&#39;: &#39;first&#39;, &#39;地市&#39;: &#39;first&#39;, &#39;地市代维&#39;: &#39;first&#39; } if key == &#39;地市代维&#39;: agg_dict[&#39;地市&#39;] = &#39;first&#39; agg_dict[&#39;代维简称&#39;] = &#39;first&#39; unique_faults = fault_data.groupby(&#39;工单编号&#39;).agg(agg_dict).reset_index() # 按维度key分组统计 fault_stats_current = unique_faults.groupby(key).agg( 总故障工单数=(&#39;工单编号&#39;, &#39;nunique&#39;), 总故障及时数=(&#39;是否及时处理&#39;, &#39;sum&#39;), 超长退服工单数=(&#39;是否超长退服&#39;, &#39;sum&#39;), 高频故障工单数=(&#39;高频故障&#39;, &#39;sum&#39;), 延期工单数=(&#39;是否延期有效&#39;, &#39;sum&#39;) ).reset_index() # 计算各项占比 fault_stats_current[&#39;故障处理及时率&#39;] = ( fault_stats_current[&#39;总故障及时数&#39;] / fault_stats_current[&#39;总故障工单数&#39;] ).replace([np.inf, -np.inf], 0).fillna(0) fault_stats_current[&#39;超长退服工单占比&#39;] = ( fault_stats_current[&#39;超长退服工单数&#39;] / fault_stats_current[&#39;总故障工单数&#39;] ).replace([np.inf, -np.inf], 0).fillna(0) fault_stats_current[&#39;高频故障工单占比&#39;] = ( fault_stats_current[&#39;高频故障工单数&#39;] / fault_stats_current[&#39;总故障工单数&#39;] ).replace([np.inf, -np.inf], 0).fillna(0) fault_stats_current[&#39;延期工单占比&#39;] = ( fault_stats_current[&#39;延期工单数&#39;] / fault_stats_current[&#39;总故障工单数&#39;] ).replace([np.inf, -np.inf], 0).fillna(0) else: # 创建空数据框并初始化所有占比列为0 fault_stats_current = pd.DataFrame(columns=[key] + ratio_columns) for col in ratio_columns: fault_stats_current[col] = 0.0 # 输出统计结果 logger.info(f"维度[{key}]故障统计结果:") logger.info(f" 总故障工单数: {fault_stats_current.get(&#39;总故障工单数&#39;, 0).sum()}") logger.info(f" 超长退服工单数: {fault_stats_current.get(&#39;超长退服工单数&#39;, 0).sum()}") logger.info(f" 高频故障工单数: {fault_stats_current.get(&#39;高频故障工单数&#39;, 0).sum()}") logger.info(f" 延期工单数: {fault_stats_current.get(&#39;延期工单数&#39;, 0).sum()}") # 使用map函数手动添加占比数据 for col in ratio_columns: mapping = dict(zip(fault_stats_current[key], fault_stats_current[col])) dim_dfs[i][col] = dim_dfs[i][key].map(mapping).fillna(0) # 输出合并后占比 logger.info(f"维度[{key}]合并后占比:") for col in ratio_columns: mean_value = dim_dfs[i][col].mean() logger.info(f" {col}: {mean_value}") # ===================== 调整字段顺序 ===================== base_columns = [ &#39;应出勤人数&#39;, &#39;实际出勤人数&#39;, &#39;出勤率&#39;, &#39;出勤不足3天人数&#39;, &#39;出勤不足3天人员占比&#39;, &#39;总工单数&#39;, &#39;有效工单数&#39;, &#39;有效工单占比&#39;, &#39;签到距离过大条目数&#39;, &#39;工作时长过短条目数&#39;, &#39;路途时长TOP50条目数&#39;, &#39;人均出勤天数&#39;, &#39;日均出勤人数&#39;, &#39;工单平均路途超2小时人数&#39;, &#39;平均工作时长占比低人数&#39;, &#39;人均每日工单数&#39;, &#39;人均每日有效工单数&#39;, &#39;人均每日出勤时长&#39;, &#39;人均每日工作时长&#39;, &#39;每工单路途时长&#39;, &#39;每工单工作时长&#39;, &#39;超长退服工单占比&#39;, &#39;高频故障工单占比&#39;, &#39;延期工单占比&#39;, &#39;故障处理及时率&#39; ] # 确保所有列都存在 for i, key in enumerate(GROUP_KEYS): # 只保留数据框中实际存在的列 available_columns = [col for col in base_columns if col in dim_dfs[i].columns] ordered_columns = [key] + available_columns dim_dfs[i] = dim_dfs[i][ordered_columns] # ===================== 总计行处理 ===================== for i in range(len(dim_dfs)): df = dim_dfs[i] group_key = GROUP_KEYS[i] # 创建总计行字典 total_row = {group_key: &#39;总计&#39;} # 处理需要sum的字段 sum_cols = [ &#39;应出勤人数&#39;, &#39;实际出勤人数&#39;, &#39;出勤不足3天人数&#39;, &#39;总工单数&#39;, &#39;有效工单数&#39;, &#39;签到距离过大条目数&#39;, &#39;工作时长过短条目数&#39;, &#39;路途时长TOP50条目数&#39;, &#39;工单平均路途超2小时人数&#39;, &#39;平均工作时长占比低人数&#39; ] # 检查并添加出勤相关列 has_total_attendance = &#39;总出勤人天&#39; in df.columns has_attendance_days = &#39;出勤天数_实际&#39; in df.columns if has_total_attendance: sum_cols.append(&#39;总出勤人天&#39;) if has_attendance_days: sum_cols.append(&#39;出勤天数_实际&#39;) # 计算总计行的和 for col in sum_cols: if col in df.columns: total_row[col] = df[df[group_key] != &#39;总计&#39;][col].sum() # 计算平均值字段 mean_cols = [ &#39;人均每日工单数&#39;, &#39;人均每日有效工单数&#39;, &#39;人均每日出勤时长&#39;, &#39;人均每日工作时长&#39;, &#39;人均出勤天数&#39;, &#39;日均出勤人数&#39;, &#39;每工单路途时长&#39;, &#39;每工单工作时长&#39;, &#39;超长退服工单占比&#39;, &#39;高频故障工单占比&#39;, &#39;延期工单占比&#39;, &#39;故障处理及时率&#39; ] for col in mean_cols: if col in df.columns: # 使用加权平均计算总计行的平均值 if &#39;实际出勤人数&#39; in df.columns: total_row[col] = (df[df[group_key] != &#39;总计&#39;][col] * df[df[group_key] != &#39;总计&#39;][ &#39;实际出勤人数&#39;]).sum() / df[df[group_key] != &#39;总计&#39;][&#39;实际出勤人数&#39;].sum() else: total_row[col] = df[df[group_key] != &#39;总计&#39;][col].mean() # 重新计算总计行中的百分比和比率字段 # 出勤率 total_actual = total_row.get(&#39;实际出勤人数&#39;, 0) total_expected = total_row.get(&#39;应出勤人数&#39;, 0) total_row[&#39;出勤率&#39;] = total_actual / total_expected if total_expected > 0 else 0 # 出勤不足3天人员占比 total_under3 = total_row.get(&#39;出勤不足3天人数&#39;, 0) total_row[&#39;出勤不足3天人员占比&#39;] = total_under3 / total_actual if total_actual > 0 else 0 # 有效工单占比 total_valid = total_row.get(&#39;有效工单数&#39;, 0) total_orders = total_row.get(&#39;总工单数&#39;, 0) total_row[&#39;有效工单占比&#39;] = total_valid / total_orders if total_orders > 0 else 0 # 添加总计行到数据框 total_df = pd.DataFrame([total_row]) dim_dfs[i] = pd.concat([df, total_df], ignore_index=True) return tuple(dim_dfs) # -------------------------- # 输出模块 # -------------------------- def save_to_excel(dataframes, path): """增强版Excel输出(带百分比格式)""" with pd.ExcelWriter(path, engine=&#39;xlsxwriter&#39;) as writer: workbook = writer.book # 定义格式(新增垂直居中设置) header_format = workbook.add_format({ &#39;bold&#39;: True, &#39;text_wrap&#39;: True, # 自动换行 &#39;border&#39;: 1, &#39;bg_color&#39;: &#39;#D9EAD3&#39;, # 浅绿色背景 &#39;align&#39;: &#39;center&#39;, # 水平居中 &#39;valign&#39;: &#39;vcenter&#39; # 垂直居中(新增) }) cell_format = workbook.add_format({ &#39;border&#39;: 1, &#39;align&#39;: &#39;center&#39;, # 内容水平居中 &#39;valign&#39;: &#39;vcenter&#39; # 内容垂直居中(新增) }) percent_format = workbook.add_format({ &#39;num_format&#39;: &#39;0.00%&#39;, # 百分比格式 &#39;border&#39;: 1, &#39;align&#39;: &#39;center&#39;, # 内容水平居中 &#39;valign&#39;: &#39;vcenter&#39; # 内容垂直居中(新增) }) for name, df in dataframes.items(): # 写入数据时添加标题格式 df.to_excel( writer, sheet_name=name, index=False, header=False, # 禁用自动标题 startrow=1 # 从第二行开始写数据 ) worksheet = writer.sheets[name] # 动态识别百分比列 percent_columns = [ col for col in df.columns if col.endswith((&#39;率&#39;, &#39;占比&#39;)) ] # 设置列格式(包含标题和数据) for col_num, col_name in enumerate(df.columns): col_width = max(12, len(col_name) * 1.5) # 动态列宽 # 设置标题 worksheet.write(0, col_num, col_name, header_format) # 设置列格式 if col_name in percent_columns: fmt = percent_format else: fmt = cell_format # 应用格式到整列 worksheet.set_column( first_col=col_num, last_col=col_num, width=col_width, cell_format=fmt ) # 冻结首行 worksheet.freeze_panes(1, 0) # 自动筛选(仅对数据量小于10000行的sheet启用) if len(df) < 10000: worksheet.autofilter(0, 0, len(df), len(df.columns) - 1) # -------------------------- # 主流程 # -------------------------- def main(): engine = init_db_engine() try: with time_monitor("数据加载"): site_orders = load_site_orders(engine) order_numbers = site_orders[&#39;工单编号&#39;].unique().tolist() order_dfs = batch_load_order_data(engine, order_numbers) with time_monitor("数据合并与处理"): merged_data = merge_order_data(site_orders, order_dfs) if merged_data.empty: logger.warning("无有效数据") return processed_data = ( merged_data .pipe(process_coordinates) .pipe(calculate_distances) .pipe(merge_cran_data, cran_path=FILE_PATHS["cran_info"]) .pipe(generate_time_features) .pipe(calculate_workload) ) output_dfs = {&#39;原始数据&#39;: processed_data} with time_monitor("统计计算"): for dim in DIMENSIONS: for time_level in [&#39;daily&#39;, &#39;weekly&#39;, &#39;monthly&#39;]: stats_df = generate_stats( processed_data, dimension=dim, time_level=time_level ) level_name = { &#39;daily&#39;: &#39;日统计&#39;, &#39;weekly&#39;: &#39;周统计&#39;, &#39;monthly&#39;: &#39;月统计&#39; }[time_level] sheet_name = f"{level_name}_{dim[&#39;name&#39;]}" output_dfs[sheet_name] = stats_df with time_monitor("处理代维组织数据"): member_list_df = pd.read_excel(member_list_path, usecols=member_list_columns, engine=&#39;calamine&#39;) pattern = r&#39;(?=.*无线2023)(?=.*维护组)(?!.*OLT)&#39; member_list_df = member_list_df[member_list_df[&#39;所属组织&#39;].str.contains(pattern, regex=True, na=False)] member_list_df[&#39;代维简称&#39;] = member_list_df[&#39;所属组织&#39;].str.split("-").str[0] member_list_df[&#39;代维简称&#39;] = member_list_df[&#39;代维简称&#39;].replace(&#39;中移铁通&#39;, &#39;铁通&#39;) member_list_df = member_list_df.rename(columns={&#39;所属地市&#39;: &#39;地市&#39;}) member_list_df[&#39;地市代维&#39;] = member_list_df[&#39;地市&#39;].str.split("-").str[0] + &#39;-&#39; + member_list_df[&#39;代维简称&#39;] member_list_df = member_list_df.drop_duplicates(subset=[&#39;登陆账号&#39;]) member_list_df[&#39;时间戳&#39;] = source_time member_list_df_filter = member_list_df[member_list_df[&#39;在职状态&#39;] == &#39;在职&#39;] filtered_data = processed_data[processed_data[&#39;所属组织&#39;].isin(member_list_df_filter[&#39;所属组织&#39;])] output_dfs[&#39;原始数据_副本&#39;] = filtered_data.copy() stats_group, stats_company, stats_city, stats_area = enhance_maintainer_stats( filtered_data, member_list_df_filter ) output_dfs.update({ &#39;代维组维度&#39;: stats_group, &#39;代维公司维度&#39;: stats_company, &#39;地市维度&#39;: stats_city, &#39;地市代维维度&#39;: stats_area }) with time_monitor("生成Top50统计"): # 从原始数据_副本中提取Top50 top50_travel = filtered_data.nlargest(50, &#39;路途时长(分钟)&#39;) # 选择需要的列 top50_travel = top50_travel[[&#39;工单编号&#39;, &#39;执行人&#39;, &#39;所属组织&#39;, &#39;地市&#39;, &#39;代维简称&#39;, &#39;路途时长(分钟)&#39;]] output_dfs[&#39;路途时长Top50&#39;] = top50_travel # 在指定的结果表第一列添加时间戳 target_sheets = [&#39;代维组维度&#39;, &#39;代维公司维度&#39;, &#39;地市维度&#39;, &#39;地市代维维度&#39;, &#39;路途时长Top50&#39;] for sheet in target_sheets: df = output_dfs[sheet] df.insert(0, &#39;时间戳&#39;, source_time) with time_monitor("保存结果"): save_to_excel(output_dfs, FILE_PATHS["output"]) logger.info(f"结果已保存至:{FILE_PATHS[&#39;output&#39;]}") except Exception as e: logger.error(f"处理失败:{str(e)}", exc_info=True) raise if __name__ == "__main__": main()
最新发布
07-17
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值