import os
import json
import logging
import traceback
import pandas as pd
import pytz
from datetime import datetime
from sqlalchemy import create_engine
from dotenv import load_dotenv
from sqlalchemy import inspect
from sqlalchemy.engine import Engine
# 初始化日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
# 加载环境变量
load_dotenv()
# 输出文件配置
OUTPUT_DIR = r"D:\AEB Management\output"
OUTPUT_FILE = "processed_data.csv"
# 确保输出目录存在
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ------------------ 配置加载 ------------------
def load_config(config_path):
"""加载 JSON 配置文件"""
try:
with open(config_path, "r", encoding="utf-8") as file:
config = json.load(file)
logging.info(f"成功加载配置文件: {config_path}")
return config
except Exception as e:
logging.error("加载配置文件失败,请检查路径或格式!")
logging.error(e)
raise
def load_bsw_staff_list(config_path):
"""从 JSON 配置文件中加载 BSW Staff 列表"""
try:
with open(config_path, "r", encoding="utf-8") as file:
config = json.load(file)
staff_list = config.get("bsw_staff_list", [])
logging.info(f"成功加载 BSW Staff List,包含 {len(staff_list)} 人")
return set(staff_list) # 转为 set,提高查询效率
except Exception as e:
logging.error("加载 BSW Staff List 配置失败,请检查路径或格式!")
logging.error(e)
raise
# ------------------ 数据列处理 ------------------
def add_columns(dataframe, bsw_staff_set):
"""
根据列 'Involvement of BSW' 和其他逻辑动态添加新列
:param dataframe: pandas DataFrame
:param bsw_staff_set: BSW人员集合
:return: 新增列后的 DataFrame
"""
# 获取当前UTC时间(用于时间差计算)
current_time = pd.Timestamp.now(tz='UTC')
# ------------------ 新增 HorizontalExpansion PN 列 ------------------
# 创建临时列存储提取值
dataframe['HorizontalExpansion PN'] = None
# 定义条件筛选
kochi_condition = (
(dataframe['Project key'] == 'KOCHI_PL') &
(dataframe['Labels'].str.contains('CheckItOnOtherProject', na=False, regex=False))
)
# 提取项目编号并替换Project key
if kochi_condition.any():
# 使用正则表达式提取P+5位数字的项目编号
extracted_projects = dataframe.loc[kochi_condition, 'Summary'].str.extract(r'(P\d{5})', expand=False)
# 更新HorizontalExpansion PN列
dataframe.loc[kochi_condition, 'HorizontalExpansion PN'] = extracted_projects
# 替换Project key的值
valid_extraction = extracted_projects.notna()
dataframe.loc[kochi_condition & valid_extraction, 'Project key'] = extracted_projects[valid_extraction]
# ================== 新增8个分析列 ==================
# 1. ProjectNum - 统一项目编号 (直接使用Project key的值)
dataframe['ProjectNum'] = dataframe['Project key']
# 2. BSW Self Test - BSW自测问题标记
def is_bsw_self_test(row):
if row['Involvement of BSW'] != 'Yes':
return 0
found_by = row.get('Custom field (Found By)', '')
labels = row.get('Labels', '')
# 条件1:开发审计、评审、单元测试或开发中发现
cond1 = found_by in ['Development Audit', 'Review', 'SW Unit Test', 'Development']
# 条件2:集成测试中发现且标签包含BSW_IT
cond2 = (found_by == 'SW Integration Test') and ('BSW_IT' in labels)
return 1 if cond1 or cond2 else 0
dataframe['BSW Self Test'] = dataframe.apply(is_bsw_self_test, axis=1)
# 3. BSW Issue - BSW相关问题标记
def is_bsw_issue(row):
if not (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
row['Involvement of BSW'] == 'Yes'):
return 0
custom_field = str(row.get('Custom field (Custom_2)', ''))
keywords = ['平台问题', '开发设计问题', '三方包问题',
'Development Design Issue', 'Platform Issue', 'Party Package Issue']
return 1 if any(keyword in custom_field for keyword in keywords) else 0
dataframe['BSW Issue'] = dataframe.apply(is_bsw_issue, axis=1)
# 4. Stack BSW Analyzed - Stack相关已分析问题
def is_stack_analyzed(row):
if not (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
row['Involvement of BSW'] == 'Yes'):
return 0
components = str(row.get('Component/s', ''))
custom_field = str(row.get('Custom field (Custom_2)', ''))
cond1 = 'Stack' in components
cond2 = '非BSW问题' in custom_field or 'Non-BSW' in custom_field
return 1 if cond1 or cond2 else 0
dataframe['Stack BSW Analyzed'] = dataframe.apply(is_stack_analyzed, axis=1)
# 5. Stack BSW Unanalyzed - Stack相关未分析问题
def is_stack_unanalyzed(row):
if not (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
row['Involvement of BSW'] == 'Yes'):
return 0
components = str(row.get('Component/s', ''))
custom_field = str(row.get('Custom field (Custom_2)', ''))
cond1 = 'Stack' in components
cond2 = '非BSW问题' in custom_field or 'Non-BSW' in custom_field
return 1 if not (cond1 or cond2) else 0
dataframe['Stack BSW Unanalyzed'] = dataframe.apply(is_stack_unanalyzed, axis=1)
# 6. Stack Total - Stack相关问题总数
dataframe['Stack Total'] = dataframe.apply(
lambda row: 1 if (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
'Stack' in str(row.get('Component/s', ''))) else 0,
axis=1
)
# 7. BSW Involve - BSW参与的问题
dataframe['BSW Involve'] = dataframe.apply(
lambda row: 1 if (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
row['Involvement of BSW'] == 'Yes') else 0,
axis=1
)
# 8. BSW Involve Unclosed - BSW参与的未关闭问题
dataframe['BSW Involve Unclosed'] = dataframe.apply(
lambda row: 1 if (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
row['Involvement of BSW'] == 'Yes' and
row.get('Status', '') != 'Closed') else 0,
axis=1
)
# ------------------ 新增 HorizontalExpansion 分析列 ------------------
# 1. HorizontalExpansion Count - 标记是否存在横向扩展
dataframe['HorizontalExpansion Count'] = dataframe['HorizontalExpansion PN'].notna().astype(int)
# 2. HorizontalExpansion Unfinished - 标记未完成的横向扩展问题
dataframe['HorizontalExpansion Unfinished'] = (
dataframe['HorizontalExpansion PN'].notna() &
(dataframe['Status'] != 'Done')
).astype(int)
# 3. HorizontalExpansion Delay - 标记超过30天未解决的横向扩展问题
# 处理Created列的时区问题
if 'Created' in dataframe.columns and dataframe['Created'].dtype == 'object':
# 如果Created列是字符串类型,转换为datetime
dataframe['Created'] = pd.to_datetime(dataframe['Created'], errors='coerce')
# 确保时区一致
if dataframe['Created'].dt.tz is None:
created_dates = dataframe['Created'].dt.tz_localize('UTC')
else:
created_dates = dataframe['Created'].dt.tz_convert('UTC')
days_since_creation = (current_time - created_dates).dt.days
dataframe['HorizontalExpansion Delay'] = (
dataframe['HorizontalExpansion PN'].notna() &
(dataframe['Status'] != 'Done') &
(days_since_creation > 30)
).astype(int)
# ------------------ 原有业务逻辑列 ------------------
# 添加 'BSW Analysis Conclusions' 列
def bsw_analysis_conclusions(row):
if row['Involvement of BSW'] == 'No':
return 'Non-BSW'
elif row['Involvement of BSW'] == 'Yes':
if pd.notna(row['Custom field (Custom_2)']):
custom_field = row['Custom field (Custom_2)']
if '非BSW问题' in custom_field or 'Non-BSW' in custom_field:
return 'Analyzed-Non-BSW'
elif '开发设计问题' in custom_field or 'Development Design Issue' in custom_field:
return 'Analyzed-BSW'
elif '需求问题' in custom_field or 'Requirement Issue' in custom_field:
return 'Analyzed-Non-BSW'
elif '平台问题' in custom_field or 'Platform Issue' in custom_field:
return 'Analyzed-BSW'
elif '三方包问题' in custom_field or 'Party Package Issue' in custom_field:
return 'Analyzed-三方包问题'
else:
return 'Analyzing-Unclassified'
return 'Analyzing-Unclassified'
dataframe['BSW Analysis Conclusions'] = dataframe.apply(bsw_analysis_conclusions, axis=1)
# 添加 'Stack Classification' 列
def stack_classification(component):
if pd.isna(component):
return 'Unclassed'
elif 'Com_Stack' in component:
return 'Com_Stack'
elif 'Cdd_Stack' in component:
return 'Cdd_Stack'
elif 'CybSec_Stack' in component:
return 'CybSec_Stack'
elif 'Diag_Stack' in component:
return 'Diag_Stack'
elif 'Fbl_Stack' in component:
return 'Fbl_Stack'
elif 'FuSa_Stack' in component:
return 'FuSa_Stack'
elif 'IoHwAb_Stack' in component:
return 'IoHwAb_Stack'
elif 'Mem_Stack' in component:
return 'Mem_Stack'
elif 'ModeMgm_Stack' in component:
return 'ModeMgm_Stack'
elif 'OS_Stack' in component:
return 'OS_Stack'
elif 'Sys_Stack' in component:
return 'Sys_Stack'
else:
return 'Unclassed'
dataframe['Stack Classification'] = dataframe['Component/s'].apply(stack_classification)
# 添加 'BSW Reason Classification' 列
def calculate_bsw_reason_classification(row):
"""
定义 'BSW Reason Classification' 列的逻辑.
当 'Involvement of BSW' 是 'No' 时,直接返回 '底层未参与分析'。
否则根据 'Custom field (Custom_2)' 的值匹配具体分类。
"""
if row['Involvement of BSW'] == 'No':
return '底层未参与分析' # 如果 BSW 不涉及,直接返回此值
if row['Involvement of BSW'] == 'Yes' and pd.notna(row['Custom field (Custom_2)']):
custom_field = row['Custom field (Custom_2)']
if '非BSW问题' in custom_field or 'Non-BSW' in custom_field:
return '非BSW问题'
elif '开发设计问题' in custom_field or 'Development Design Issue' in custom_field:
return '开发设计问题'
elif '需求问题' in custom_field or 'Requirement Issue' in custom_field:
return '需求问题'
elif '平台问题' in custom_field or 'Platform Issue' in custom_field:
return '平台问题'
elif '三方包问题' in custom_field or 'Party Package Issue' in custom_field:
return '三方包问题'
else:
return 'Analyzing-Unclassified'
else:
# 如果 'Custom field (Custom_2)' 为空,直接返回未分类状态
return 'Analyzing-Unclassified'
dataframe['BSW Reason Classification'] = dataframe.apply(calculate_bsw_reason_classification, axis=1)
# 添加新的 'total' 列
def calculate_total(row):
if row['Project key'] == 'KOCHI_PL':
return 0
elif row['Project key'] != 'KOCHI_PL' and row['Issue Type'] in ["KOCHI AE Issue", "Problem"]:
return 1
else:
return 0
dataframe['total'] = dataframe.apply(calculate_total, axis=1)
# 添加 "BSW Analyzed" 列
def calculate_bsw_analyzed(row):
if (
row['Issue Type'] in ["KOCHI AE Issue", "Problem"]
and row['Involvement of BSW'] == 'Yes'
and pd.notna(row['Custom field (Custom_2)']) # 确保不为空
):
custom_field = row['Custom field (Custom_2)']
# 正则匹配关键字
regex = r'需求问题|平台问题|开发设计问题|非BSW问题|三方包问题|' \
r'Non-BSW|Development Design Issue|Requirement Issue|Platform Issue|Party Package Issue'
if pd.Series(custom_field).str.contains(regex, regex=True, na=False).iloc[0]:
return 1
return 0
dataframe['BSW Analyzed'] = dataframe.apply(calculate_bsw_analyzed, axis=1)
# 添加 "BSW Unanalyzed" 列
def calculate_bsw_unanalyzed(row):
if (
row['Issue Type'] in ["KOCHI AE Issue", "Problem"]
and row['Involvement of BSW'] == 'Yes'
):
custom_field = row['Custom field (Custom_2)']
# 判断是否为 NULL、空字符串或未匹配关键字
if pd.isna(custom_field) or custom_field == '' or not pd.Series(custom_field).str.contains(
r'需求问题|平台问题|开发设计问题|非BSW问题|三方包问题|'
r'Non-BSW|Development Design Issue|Requirement Issue|Platform Issue|Party Package Issue',
regex=True, na=False
).iloc[0]:
return 1
return 0
dataframe['BSW Unanalyzed'] = dataframe.apply(calculate_bsw_unanalyzed, axis=1)
# 添加 'BSW Staff List' 列
dataframe['BSW Staff List'] = dataframe['Assignee'].apply(
lambda x: "Yes" if x in bsw_staff_set else "No"
)
# 添加 "Found By Classification" 列
def classify_found_by(row):
found_by = row['Custom field (Found By)']
labels = row['Labels'] if pd.notna(row['Labels']) else ""
if found_by == 'SW Integration Test' and 'BSW_IT' in labels:
return 'BSW_IT'
elif found_by == 'SW Integration Test':
return 'SWIT'
elif found_by == 'Cybersecurity Monitoring':
return 'Cybersecurity Monitoring'
elif found_by in ['HIL TEST', 'HILP TEST', 'System Test']:
return 'System Test'
elif found_by in ['OEM Test', 'OEM', 'End user']:
return 'OEM&End user'
elif found_by == 'Development Audit' and 'BSW_PT' in labels:
return 'BSW_PT'
elif found_by in ['Review', 'SW Unit Test', 'Development']:
return '开发自测'
elif found_by in ['Elec. HW-SW Integration Test', 'Elec. HW Unit Test']:
return 'HW'
elif pd.isna(found_by) or str(found_by).strip() == '':
return 'Others'
elif found_by in [
'SW Test', 'Production', 'Enviromental qualification', 'Mechatronic Module Test',
'ASPICE Assessment', 'Mech. Unit Test', 'Audit', 'SW Audit', 'Mechatronic Integration Test',
'Functional Safety Assesment', 'Mech. Integration Test', 'Gap Analysis'
]:
return 'Others'
else:
return 'Others'
dataframe['Found By Classification'] = dataframe.apply(classify_found_by, axis=1)
# 添加 "High Priority Unanalyzed" 列
def calculate_high_priority_unanalyzed(row):
# 1. 检查基础条件
if not (
row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and
row['Involvement of BSW'] == 'Yes' and
row['Priority'] == 'High'
):
return 0
# 2. 计算创建至今的天数差
created_date = row['Created']
if pd.isna(created_date):
return 0
# 确保时区一致
now = pd.Timestamp.now(tz='UTC') if created_date.tz else pd.Timestamp.now()
days_diff = (now - created_date).days
if days_diff <= 15:
return 0
# 3. 检查自定义字段是否包含分析结论
custom_field = row['Custom field (Custom_2)'] if pd.notna(row['Custom field (Custom_2)']) else ""
# 定义需要排除的关键字列表
exclude_keywords = [
'需求问题', '平台问题', '开发设计问题', '非BSW问题', '三方包问题',
'Non-BSW', 'Development Design Issue', 'Requirement Issue',
'Platform Issue', 'Party Package Issue'
]
# 检查是否包含任何排除关键字
if any(keyword in custom_field for keyword in exclude_keywords):
return 0
# 所有条件满足
return 1
dataframe['High Priority Unanalyzed'] = dataframe.apply(calculate_high_priority_unanalyzed, axis=1)
# 添加 "Stack_Analyzed" 列
def calculate_stack_analyzed(row):
if (
row['Issue Type'] in ["KOCHI AE Issue", "Problem"]
and row['Involvement of BSW'] == 'Yes'
):
# 检查组件是否包含'Stack'
comp_has_stack = pd.notna(row['Component/s']) and 'Stack' in row['Component/s']
# 检查自定义字段是否包含指定关键字
custom_has_keywords = pd.notna(row['Custom field (Custom_2)']) and (
'非BSW问题' in row['Custom field (Custom_2)'] or
'Non-BSW' in row['Custom field (Custom_2)']
)
# 满足任一条件即返回1
if comp_has_stack or custom_has_keywords:
return 1
return 0
dataframe['Stack_Analyzed'] = dataframe.apply(calculate_stack_analyzed, axis=1)
# 添加 "Stack_Unanalyzed" 列
def calculate_stack_unanalyzed(row):
if (
row['Issue Type'] in ["KOCHI AE Issue", "Problem"]
and row['Involvement of BSW'] == 'Yes'
):
# 检查组件是否包含'Stack'
comp_has_stack = pd.notna(row['Component/s']) and 'Stack' in row['Component/s']
# 检查自定义字段是否包含指定关键字
custom_has_keywords = pd.notna(row['Custom field (Custom_2)']) and (
'非BSW问题' in row['Custom field (Custom_2)'] or
'Non-BSW' in row['Custom field (Custom_2)']
)
# 两个条件都不满足时返回1
if not comp_has_stack and not custom_has_keywords:
return 1
return 0
dataframe['Stack_Unanalyzed'] = dataframe.apply(calculate_stack_unanalyzed, axis=1)
# 关键修复:统一时区处理
try:
# 转换 `Updated` 列为 UTC 时区的 datetime
dataframe['Updated'] = pd.to_datetime(
dataframe['Updated'],
errors='coerce',
utc=True
)
# 计算日期差
now = pd.Timestamp.now(tz='UTC')
days_since_update = (now - dataframe['Updated']).dt.days
except TypeError as e:
logging.error(f"时区错误: {e}")
if 'Updated' in dataframe and dataframe['Updated'].dt.tz is None:
logging.warning("检测到Updated列缺少时区信息,将转换为UTC")
dataframe['Updated'] = dataframe['Updated'].dt.tz_localize('UTC')
now = pd.Timestamp.now(tz='UTC')
days_since_update = (now - dataframe['Updated']).dt.days
except Exception as e:
logging.error(f"处理日期时发生未知错误: {e}")
days_since_update = None # 避免后续错误
# 使用日期差计算结果
condition_base = (
(dataframe['Issue Type'].isin(["KOCHI AE Issue", "Problem"])) &
(dataframe['Involvement of BSW'] == 'Yes') &
(dataframe['BSW Staff List'] == 'Yes')
)
# 创建日期相关列
dataframe['update_over_15'] = (
condition_base &
(days_since_update >= 15)
).astype(int)
dataframe['update_over_7'] = (
condition_base &
(days_since_update > 7) &
(days_since_update < 15)
).astype(int)
logging.info("成功添加所有分析列")
return dataframe
def assign_responsibles(dataframe, config):
"""
根据配置为 DataFrame 增加 'Responsible' 和 'Email' 列
"""
def get_responsible_and_email(project_key):
if project_key in config:
responsible = config[project_key].get("Responsible", "Unassigned")
email = config[project_key].get("Email", "unknown@example.com")
else:
# 默认值,当 Project Key 未配置时
responsible = config["Default"].get("Responsible", "Unassigned")
email = config["Default"].get("Email", "unknown@example.com")
return responsible, email
dataframe['Responsible'], dataframe['Email'] = zip(*dataframe['Project key'].apply(get_responsible_and_email))
return dataframe
from sqlalchemy import inspect
def process_columns(
dataframe: pd.DataFrame,
config_path: str,
bsw_staff_set: set,
target_engine: Engine,
target_table: str
) -> pd.DataFrame:
"""
处理数据列:应用业务规则并确保与目标表结构匹配
"""
# 1. 加载负责人配置
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
responsible_config = config.get("responsible_mapping", {})
except Exception as e:
logging.error(f"加载负责人配置文件失败: {str(e)}")
responsible_config = {}
# 2. 应用负责人映射规则
if responsible_config:
# 基于Assignee应用映射
dataframe['Responsible'] = dataframe['Assignee'].apply(
lambda x: responsible_config.get(str(x), "Unassigned")
)
# 基于项目应用映射
dataframe['Responsible'] = dataframe.apply(
lambda row: responsible_config.get(row['Project key'], row['Responsible']),
axis=1
)
# 基于组件应用映射
dataframe['Responsible'] = dataframe.apply(
lambda row: responsible_config.get(row['Component/s'], row['Responsible']),
axis=1
)
else:
dataframe['Responsible'] = "Unassigned"
# 3. 添加邮箱列
email_mapping = config.get("email_mapping", {})
dataframe['Email'] = dataframe['Responsible'].map(email_mapping).fillna("")
# 4. 处理BSW相关列
dataframe['BSW Staff List'] = dataframe['Assignee'].apply(
lambda x: "Yes" if x in bsw_staff_set else "No"
)
# 5. 动态匹配目标表结构
try:
inspector = inspect(target_engine)
if inspector.has_table(target_table):
# 获取目标表所有列名
target_columns = [col['name'] for col in inspector.get_columns(target_table)]
# 过滤数据框中目标表不存在的列
missing_columns = [col for col in dataframe.columns if col not in target_columns]
if missing_columns:
logging.info(f"过滤 {len(missing_columns)} 个目标表不存在的列: {', '.join(missing_columns)}")
dataframe = dataframe.drop(columns=missing_columns)
else:
logging.warning(f"目标表 {target_table} 不存在,将自动创建")
# 创建新表
dataframe.head(0).to_sql(
target_table,
target_engine,
index=False,
if_exists='fail'
)
logging.info(f"已创建目标表 {target_table}")
except Exception as e:
logging.error(f"表结构检查失败: {str(e)}")
raise
# 6. 添加日期相关分析列
if 'Updated' in dataframe.columns:
dataframe['update_over_15'] = dataframe['Updated'].apply(
lambda x: 1 if isinstance(x, pd.Timestamp) and (pd.Timestamp.now() - x).days > 15 else 0
)
dataframe['update_over_7'] = dataframe['Updated'].apply(
lambda x: 1 if isinstance(x, pd.Timestamp) and (pd.Timestamp.now() - x).days > 7 else 0
)
# 7. 返回处理后的数据
return dataframe
# ------------------ 数据库相关 ------------------
def get_database_engine(db_name="default"):
"""
创建数据库连接引擎,支持多数据库配置
:param db_name: 数据库配置标识(default 或 bsw_jira)
"""
try:
prefix = "BSW_" if db_name == "bsw_jira" else ""
host = os.getenv(f"{prefix}DB_HOST")
user = os.getenv(f"{prefix}DB_USER")
password = os.getenv(f"{prefix}DB_PASSWORD")
database = os.getenv(f"{prefix}DB_NAME", db_name)
port = os.getenv(f"{prefix}DB_PORT", 3306)
# 验证关键配置存在
if not all([host, user, password, database]):
missing = [k for k in ["host", "user", "password", "database"]
if not locals().get(k)]
raise ValueError(f"缺少{db_name}数据库配置: {', '.join(missing)}")
connection_uri = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}"
engine = create_engine(connection_uri)
logging.info(f"成功连接到数据库 {database}({db_name}配置)")
return engine
except Exception as e:
logging.error(f"创建{db_name}数据库引擎失败")
logging.error(traceback.format_exc())
return None
def query_and_process_data(source_engine, target_engine, config_path):
"""
查询和处理数据并写入目标数据库表
"""
try:
# 从环境变量获取表名
source_table = os.getenv("SOURCE_TABLE_NAME")
target_table = os.getenv("TARGET_TABLE_NAME")
if not source_table or not target_table:
raise ValueError("缺少环境变量 SOURCE_TABLE_NAME 或 TARGET_TABLE_NAME")
logging.info(f"源表: {source_table}, 目标表: {target_table}")
# 加载BSW人员配置
bsw_staff_set = load_bsw_staff_list(config_path)
project_keys = ["P09135", "P09192", "P08499", "KOCHI_PL"]
# 构建查询
formatted_keys = "','".join(project_keys)
query = f"SELECT * FROM `{source_table}` WHERE `Project key` IN ('{formatted_keys}')"
# 分块处理
chunks = pd.read_sql(query, con=source_engine, chunksize=500)
for idx, chunk in enumerate(chunks):
processed_chunk = process_columns(chunk, config_path, bsw_staff_set)
processed_chunk.to_sql(
name=target_table,
con=target_engine,
index=False,
if_exists='append',
chunksize=500
)
logging.info(f"已写入分块 {idx + 1} [{len(processed_chunk)}行] 到 {target_table}")
except Exception as e:
logging.error("数据处理流程失败")
logging.error(traceback.format_exc())
raise
if __name__ == "__main__":
# 验证环境变量
required_vars = ["SOURCE_TABLE_NAME", "TARGET_TABLE_NAME"]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
logging.error(f"缺少必要环境变量: {', '.join(missing_vars)}")
exit(1)
# 创建双数据库连接
source_engine = get_database_engine("default")
target_engine = get_database_engine("bsw_jira")
if not (source_engine and target_engine):
logging.error("数据库连接失败,程序退出")
exit(1)
# 配置项
project_keys = ["P09135", "P09192", "P08499", "KOCHI_PL"] # 需要查询的项目Key
target_table = "jira_task_bsw_deal" # 要写入的数据库表
config_path = "responsible_config.json" # 配置文件路径
# 数据库处理
try:
query_and_process_data(
source_engine=source_engine,
target_engine=target_engine,
config_path="responsible_config.json"
)
logging.info(f"数据已成功从 {os.getenv('SOURCE_TABLE_NAME')} 处理并写入 {os.getenv('TARGET_TABLE_NAME')}")
except Exception:
logging.error("主流程执行失败")
exit(1)
if __name__ == "__main__":
# 环境变量验证
table_name = os.getenv("TABLE_NAME")
if not table_name:
logging.error("缺少环境变量 TABLE_NAME")
exit(1)
# 创建数据库连接
engine = get_database_engine()
if engine is None:
logging.error("无法创建数据库引擎,程序退出")
exit(1)
# 配置路径和输出路径
project_keys = ["P09135", "P09192", "P08499","KOCHI_PL"]
output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILE)
config_path = "responsible_config.json"
# 执行数据处理
try:
query_and_process_data(engine, table_name, project_keys, output_path, config_path)
logging.info("数据处理流程完成")
except Exception as e:
logging.error("主流程执行失败")
logging.error(traceback.format_exc())
exit(1)
最新发布