Setting ANDROID_HOME enviromental variable on Mac OS X

本文详细介绍了在不同安装方式下查找Android SDK位置的方法,包括通过官网下载、使用Homebrew及作为Android Studio一部分自动安装的情况。并指导如何设置环境变量,确保开发环境正确配置。

安装Android-SDK的位置取决于你如何安装它。

  1. 如果通过官网下载SDK进行安装操作,那么它很可能在这里:

/Applications/ADT/sdk (大多数情况).

  2. 如果使用Homebrew (brew cask install android-sdk)安装SDK,那么它位于这里:

/usr/local/Caskroom/android-sdk/{YOUR_SDK_VERSION_NUMBER}

  3. 如果SDK是作为Android Studio的一部分自动安装的,那么它位于这里:

/Users/{YOUR_USER_NAME}/Library/Android/sdk

一旦确定了位置,打开一个终端(terminal)窗口,并输入以下内容(更改SDK的路径,无论你如何安装它):

vim ~/.bash_profile

export ANDROID_HOME={你的路径}
export PATH=$PATH:$ANDROID_HOME/tools:$ANDROID_HOME/platform-tools

保存退出(:wq)

最后,通过重启 .bash_profile应用这些更改:

source ~/.bash_profile

#验证:键入- echo $ANDROID_HOME,检查是否设置成功。

import os import json import logging import traceback import pandas as pd import pytz from datetime import datetime from sqlalchemy import create_engine from dotenv import load_dotenv from sqlalchemy import inspect from sqlalchemy.engine import Engine # 初始化日志 logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) # 加载环境变量 load_dotenv() # 输出文件配置 OUTPUT_DIR = r"D:\AEB Management\output" OUTPUT_FILE = "processed_data.csv" # 确保输出目录存在 os.makedirs(OUTPUT_DIR, exist_ok=True) # ------------------ 配置加载 ------------------ def load_config(config_path): """加载 JSON 配置文件""" try: with open(config_path, "r", encoding="utf-8") as file: config = json.load(file) logging.info(f"成功加载配置文件: {config_path}") return config except Exception as e: logging.error("加载配置文件失败,请检查路径或格式!") logging.error(e) raise def load_bsw_staff_list(config_path): """从 JSON 配置文件中加载 BSW Staff 列表""" try: with open(config_path, "r", encoding="utf-8") as file: config = json.load(file) staff_list = config.get("bsw_staff_list", []) logging.info(f"成功加载 BSW Staff List,包含 {len(staff_list)} 人") return set(staff_list) # 转为 set,提高查询效率 except Exception as e: logging.error("加载 BSW Staff List 配置失败,请检查路径或格式!") logging.error(e) raise # ------------------ 数据列处理 ------------------ def add_columns(dataframe, bsw_staff_set): """ 根据列 'Involvement of BSW' 和其他逻辑动态添加新列 :param dataframe: pandas DataFrame :param bsw_staff_set: BSW人员集合 :return: 新增列后的 DataFrame """ # 获取当前UTC时间(用于时间差计算) current_time = pd.Timestamp.now(tz='UTC') # ------------------ 新增 HorizontalExpansion PN 列 ------------------ # 创建临时列存储提取值 dataframe['HorizontalExpansion PN'] = None # 定义条件筛选 kochi_condition = ( (dataframe['Project key'] == 'KOCHI_PL') & (dataframe['Labels'].str.contains('CheckItOnOtherProject', na=False, regex=False)) ) # 提取项目编号并替换Project key if kochi_condition.any(): # 使用正则表达式提取P+5位数字的项目编号 extracted_projects = dataframe.loc[kochi_condition, 'Summary'].str.extract(r'(P\d{5})', expand=False) # 更新HorizontalExpansion PN列 dataframe.loc[kochi_condition, 'HorizontalExpansion PN'] = extracted_projects # 替换Project key的值 valid_extraction = extracted_projects.notna() dataframe.loc[kochi_condition & valid_extraction, 'Project key'] = extracted_projects[valid_extraction] # ================== 新增8个分析列 ================== # 1. ProjectNum - 统一项目编号 (直接使用Project key的值) dataframe['ProjectNum'] = dataframe['Project key'] # 2. BSW Self Test - BSW自测问题标记 def is_bsw_self_test(row): if row['Involvement of BSW'] != 'Yes': return 0 found_by = row.get('Custom field (Found By)', '') labels = row.get('Labels', '') # 条件1:开发审计、评审、单元测试或开发中发现 cond1 = found_by in ['Development Audit', 'Review', 'SW Unit Test', 'Development'] # 条件2:集成测试中发现且标签包含BSW_IT cond2 = (found_by == 'SW Integration Test') and ('BSW_IT' in labels) return 1 if cond1 or cond2 else 0 dataframe['BSW Self Test'] = dataframe.apply(is_bsw_self_test, axis=1) # 3. BSW Issue - BSW相关问题标记 def is_bsw_issue(row): if not (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes'): return 0 custom_field = str(row.get('Custom field (Custom_2)', '')) keywords = ['平台问题', '开发设计问题', '三方包问题', 'Development Design Issue', 'Platform Issue', 'Party Package Issue'] return 1 if any(keyword in custom_field for keyword in keywords) else 0 dataframe['BSW Issue'] = dataframe.apply(is_bsw_issue, axis=1) # 4. Stack BSW Analyzed - Stack相关已分析问题 def is_stack_analyzed(row): if not (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes'): return 0 components = str(row.get('Component/s', '')) custom_field = str(row.get('Custom field (Custom_2)', '')) cond1 = 'Stack' in components cond2 = '非BSW问题' in custom_field or 'Non-BSW' in custom_field return 1 if cond1 or cond2 else 0 dataframe['Stack BSW Analyzed'] = dataframe.apply(is_stack_analyzed, axis=1) # 5. Stack BSW Unanalyzed - Stack相关未分析问题 def is_stack_unanalyzed(row): if not (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes'): return 0 components = str(row.get('Component/s', '')) custom_field = str(row.get('Custom field (Custom_2)', '')) cond1 = 'Stack' in components cond2 = '非BSW问题' in custom_field or 'Non-BSW' in custom_field return 1 if not (cond1 or cond2) else 0 dataframe['Stack BSW Unanalyzed'] = dataframe.apply(is_stack_unanalyzed, axis=1) # 6. Stack Total - Stack相关问题总数 dataframe['Stack Total'] = dataframe.apply( lambda row: 1 if (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and 'Stack' in str(row.get('Component/s', ''))) else 0, axis=1 ) # 7. BSW Involve - BSW参与的问题 dataframe['BSW Involve'] = dataframe.apply( lambda row: 1 if (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes') else 0, axis=1 ) # 8. BSW Involve Unclosed - BSW参与的未关闭问题 dataframe['BSW Involve Unclosed'] = dataframe.apply( lambda row: 1 if (row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes' and row.get('Status', '') != 'Closed') else 0, axis=1 ) # ------------------ 新增 HorizontalExpansion 分析列 ------------------ # 1. HorizontalExpansion Count - 标记是否存在横向扩展 dataframe['HorizontalExpansion Count'] = dataframe['HorizontalExpansion PN'].notna().astype(int) # 2. HorizontalExpansion Unfinished - 标记未完成的横向扩展问题 dataframe['HorizontalExpansion Unfinished'] = ( dataframe['HorizontalExpansion PN'].notna() & (dataframe['Status'] != 'Done') ).astype(int) # 3. HorizontalExpansion Delay - 标记超过30天未解决的横向扩展问题 # 处理Created列的时区问题 if 'Created' in dataframe.columns and dataframe['Created'].dtype == 'object': # 如果Created列是字符串类型,转换为datetime dataframe['Created'] = pd.to_datetime(dataframe['Created'], errors='coerce') # 确保时区一致 if dataframe['Created'].dt.tz is None: created_dates = dataframe['Created'].dt.tz_localize('UTC') else: created_dates = dataframe['Created'].dt.tz_convert('UTC') days_since_creation = (current_time - created_dates).dt.days dataframe['HorizontalExpansion Delay'] = ( dataframe['HorizontalExpansion PN'].notna() & (dataframe['Status'] != 'Done') & (days_since_creation > 30) ).astype(int) # ------------------ 原有业务逻辑列 ------------------ # 添加 'BSW Analysis Conclusions' 列 def bsw_analysis_conclusions(row): if row['Involvement of BSW'] == 'No': return 'Non-BSW' elif row['Involvement of BSW'] == 'Yes': if pd.notna(row['Custom field (Custom_2)']): custom_field = row['Custom field (Custom_2)'] if '非BSW问题' in custom_field or 'Non-BSW' in custom_field: return 'Analyzed-Non-BSW' elif '开发设计问题' in custom_field or 'Development Design Issue' in custom_field: return 'Analyzed-BSW' elif '需求问题' in custom_field or 'Requirement Issue' in custom_field: return 'Analyzed-Non-BSW' elif '平台问题' in custom_field or 'Platform Issue' in custom_field: return 'Analyzed-BSW' elif '三方包问题' in custom_field or 'Party Package Issue' in custom_field: return 'Analyzed-三方包问题' else: return 'Analyzing-Unclassified' return 'Analyzing-Unclassified' dataframe['BSW Analysis Conclusions'] = dataframe.apply(bsw_analysis_conclusions, axis=1) # 添加 'Stack Classification' 列 def stack_classification(component): if pd.isna(component): return 'Unclassed' elif 'Com_Stack' in component: return 'Com_Stack' elif 'Cdd_Stack' in component: return 'Cdd_Stack' elif 'CybSec_Stack' in component: return 'CybSec_Stack' elif 'Diag_Stack' in component: return 'Diag_Stack' elif 'Fbl_Stack' in component: return 'Fbl_Stack' elif 'FuSa_Stack' in component: return 'FuSa_Stack' elif 'IoHwAb_Stack' in component: return 'IoHwAb_Stack' elif 'Mem_Stack' in component: return 'Mem_Stack' elif 'ModeMgm_Stack' in component: return 'ModeMgm_Stack' elif 'OS_Stack' in component: return 'OS_Stack' elif 'Sys_Stack' in component: return 'Sys_Stack' else: return 'Unclassed' dataframe['Stack Classification'] = dataframe['Component/s'].apply(stack_classification) # 添加 'BSW Reason Classification' 列 def calculate_bsw_reason_classification(row): """ 定义 'BSW Reason Classification' 列的逻辑. 当 'Involvement of BSW' 是 'No' 时,直接返回 '底层未参与分析'。 否则根据 'Custom field (Custom_2)' 的值匹配具体分类。 """ if row['Involvement of BSW'] == 'No': return '底层未参与分析' # 如果 BSW 不涉及,直接返回此值 if row['Involvement of BSW'] == 'Yes' and pd.notna(row['Custom field (Custom_2)']): custom_field = row['Custom field (Custom_2)'] if '非BSW问题' in custom_field or 'Non-BSW' in custom_field: return '非BSW问题' elif '开发设计问题' in custom_field or 'Development Design Issue' in custom_field: return '开发设计问题' elif '需求问题' in custom_field or 'Requirement Issue' in custom_field: return '需求问题' elif '平台问题' in custom_field or 'Platform Issue' in custom_field: return '平台问题' elif '三方包问题' in custom_field or 'Party Package Issue' in custom_field: return '三方包问题' else: return 'Analyzing-Unclassified' else: # 如果 'Custom field (Custom_2)' 为空,直接返回未分类状态 return 'Analyzing-Unclassified' dataframe['BSW Reason Classification'] = dataframe.apply(calculate_bsw_reason_classification, axis=1) # 添加新的 'total' 列 def calculate_total(row): if row['Project key'] == 'KOCHI_PL': return 0 elif row['Project key'] != 'KOCHI_PL' and row['Issue Type'] in ["KOCHI AE Issue", "Problem"]: return 1 else: return 0 dataframe['total'] = dataframe.apply(calculate_total, axis=1) # 添加 "BSW Analyzed" 列 def calculate_bsw_analyzed(row): if ( row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes' and pd.notna(row['Custom field (Custom_2)']) # 确保不为空 ): custom_field = row['Custom field (Custom_2)'] # 正则匹配关键字 regex = r'需求问题|平台问题|开发设计问题|非BSW问题|三方包问题|' \ r'Non-BSW|Development Design Issue|Requirement Issue|Platform Issue|Party Package Issue' if pd.Series(custom_field).str.contains(regex, regex=True, na=False).iloc[0]: return 1 return 0 dataframe['BSW Analyzed'] = dataframe.apply(calculate_bsw_analyzed, axis=1) # 添加 "BSW Unanalyzed" 列 def calculate_bsw_unanalyzed(row): if ( row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes' ): custom_field = row['Custom field (Custom_2)'] # 判断是否为 NULL、空字符串或未匹配关键字 if pd.isna(custom_field) or custom_field == '' or not pd.Series(custom_field).str.contains( r'需求问题|平台问题|开发设计问题|非BSW问题|三方包问题|' r'Non-BSW|Development Design Issue|Requirement Issue|Platform Issue|Party Package Issue', regex=True, na=False ).iloc[0]: return 1 return 0 dataframe['BSW Unanalyzed'] = dataframe.apply(calculate_bsw_unanalyzed, axis=1) # 添加 'BSW Staff List' 列 dataframe['BSW Staff List'] = dataframe['Assignee'].apply( lambda x: "Yes" if x in bsw_staff_set else "No" ) # 添加 "Found By Classification" 列 def classify_found_by(row): found_by = row['Custom field (Found By)'] labels = row['Labels'] if pd.notna(row['Labels']) else "" if found_by == 'SW Integration Test' and 'BSW_IT' in labels: return 'BSW_IT' elif found_by == 'SW Integration Test': return 'SWIT' elif found_by == 'Cybersecurity Monitoring': return 'Cybersecurity Monitoring' elif found_by in ['HIL TEST', 'HILP TEST', 'System Test']: return 'System Test' elif found_by in ['OEM Test', 'OEM', 'End user']: return 'OEM&End user' elif found_by == 'Development Audit' and 'BSW_PT' in labels: return 'BSW_PT' elif found_by in ['Review', 'SW Unit Test', 'Development']: return '开发自测' elif found_by in ['Elec. HW-SW Integration Test', 'Elec. HW Unit Test']: return 'HW' elif pd.isna(found_by) or str(found_by).strip() == '': return 'Others' elif found_by in [ 'SW Test', 'Production', 'Enviromental qualification', 'Mechatronic Module Test', 'ASPICE Assessment', 'Mech. Unit Test', 'Audit', 'SW Audit', 'Mechatronic Integration Test', 'Functional Safety Assesment', 'Mech. Integration Test', 'Gap Analysis' ]: return 'Others' else: return 'Others' dataframe['Found By Classification'] = dataframe.apply(classify_found_by, axis=1) # 添加 "High Priority Unanalyzed" 列 def calculate_high_priority_unanalyzed(row): # 1. 检查基础条件 if not ( row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes' and row['Priority'] == 'High' ): return 0 # 2. 计算创建至今的天数差 created_date = row['Created'] if pd.isna(created_date): return 0 # 确保时区一致 now = pd.Timestamp.now(tz='UTC') if created_date.tz else pd.Timestamp.now() days_diff = (now - created_date).days if days_diff <= 15: return 0 # 3. 检查自定义字段是否包含分析结论 custom_field = row['Custom field (Custom_2)'] if pd.notna(row['Custom field (Custom_2)']) else "" # 定义需要排除的关键字列表 exclude_keywords = [ '需求问题', '平台问题', '开发设计问题', '非BSW问题', '三方包问题', 'Non-BSW', 'Development Design Issue', 'Requirement Issue', 'Platform Issue', 'Party Package Issue' ] # 检查是否包含任何排除关键字 if any(keyword in custom_field for keyword in exclude_keywords): return 0 # 所有条件满足 return 1 dataframe['High Priority Unanalyzed'] = dataframe.apply(calculate_high_priority_unanalyzed, axis=1) # 添加 "Stack_Analyzed" 列 def calculate_stack_analyzed(row): if ( row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes' ): # 检查组件是否包含'Stack' comp_has_stack = pd.notna(row['Component/s']) and 'Stack' in row['Component/s'] # 检查自定义字段是否包含指定关键字 custom_has_keywords = pd.notna(row['Custom field (Custom_2)']) and ( '非BSW问题' in row['Custom field (Custom_2)'] or 'Non-BSW' in row['Custom field (Custom_2)'] ) # 满足任一条件即返回1 if comp_has_stack or custom_has_keywords: return 1 return 0 dataframe['Stack_Analyzed'] = dataframe.apply(calculate_stack_analyzed, axis=1) # 添加 "Stack_Unanalyzed" 列 def calculate_stack_unanalyzed(row): if ( row['Issue Type'] in ["KOCHI AE Issue", "Problem"] and row['Involvement of BSW'] == 'Yes' ): # 检查组件是否包含'Stack' comp_has_stack = pd.notna(row['Component/s']) and 'Stack' in row['Component/s'] # 检查自定义字段是否包含指定关键字 custom_has_keywords = pd.notna(row['Custom field (Custom_2)']) and ( '非BSW问题' in row['Custom field (Custom_2)'] or 'Non-BSW' in row['Custom field (Custom_2)'] ) # 两个条件都不满足时返回1 if not comp_has_stack and not custom_has_keywords: return 1 return 0 dataframe['Stack_Unanalyzed'] = dataframe.apply(calculate_stack_unanalyzed, axis=1) # 关键修复:统一时区处理 try: # 转换 `Updated` 列为 UTC 时区的 datetime dataframe['Updated'] = pd.to_datetime( dataframe['Updated'], errors='coerce', utc=True ) # 计算日期差 now = pd.Timestamp.now(tz='UTC') days_since_update = (now - dataframe['Updated']).dt.days except TypeError as e: logging.error(f"时区错误: {e}") if 'Updated' in dataframe and dataframe['Updated'].dt.tz is None: logging.warning("检测到Updated列缺少时区信息,将转换为UTC") dataframe['Updated'] = dataframe['Updated'].dt.tz_localize('UTC') now = pd.Timestamp.now(tz='UTC') days_since_update = (now - dataframe['Updated']).dt.days except Exception as e: logging.error(f"处理日期时发生未知错误: {e}") days_since_update = None # 避免后续错误 # 使用日期差计算结果 condition_base = ( (dataframe['Issue Type'].isin(["KOCHI AE Issue", "Problem"])) & (dataframe['Involvement of BSW'] == 'Yes') & (dataframe['BSW Staff List'] == 'Yes') ) # 创建日期相关列 dataframe['update_over_15'] = ( condition_base & (days_since_update >= 15) ).astype(int) dataframe['update_over_7'] = ( condition_base & (days_since_update > 7) & (days_since_update < 15) ).astype(int) logging.info("成功添加所有分析列") return dataframe def assign_responsibles(dataframe, config): """ 根据配置为 DataFrame 增加 'Responsible' 和 'Email' 列 """ def get_responsible_and_email(project_key): if project_key in config: responsible = config[project_key].get("Responsible", "Unassigned") email = config[project_key].get("Email", "unknown@example.com") else: # 默认值,当 Project Key 未配置时 responsible = config["Default"].get("Responsible", "Unassigned") email = config["Default"].get("Email", "unknown@example.com") return responsible, email dataframe['Responsible'], dataframe['Email'] = zip(*dataframe['Project key'].apply(get_responsible_and_email)) return dataframe from sqlalchemy import inspect def process_columns( dataframe: pd.DataFrame, config_path: str, bsw_staff_set: set, target_engine: Engine, target_table: str ) -> pd.DataFrame: """ 处理数据列:应用业务规则并确保与目标表结构匹配 """ # 1. 加载负责人配置 try: with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) responsible_config = config.get("responsible_mapping", {}) except Exception as e: logging.error(f"加载负责人配置文件失败: {str(e)}") responsible_config = {} # 2. 应用负责人映射规则 if responsible_config: # 基于Assignee应用映射 dataframe['Responsible'] = dataframe['Assignee'].apply( lambda x: responsible_config.get(str(x), "Unassigned") ) # 基于项目应用映射 dataframe['Responsible'] = dataframe.apply( lambda row: responsible_config.get(row['Project key'], row['Responsible']), axis=1 ) # 基于组件应用映射 dataframe['Responsible'] = dataframe.apply( lambda row: responsible_config.get(row['Component/s'], row['Responsible']), axis=1 ) else: dataframe['Responsible'] = "Unassigned" # 3. 添加邮箱列 email_mapping = config.get("email_mapping", {}) dataframe['Email'] = dataframe['Responsible'].map(email_mapping).fillna("") # 4. 处理BSW相关列 dataframe['BSW Staff List'] = dataframe['Assignee'].apply( lambda x: "Yes" if x in bsw_staff_set else "No" ) # 5. 动态匹配目标表结构 try: inspector = inspect(target_engine) if inspector.has_table(target_table): # 获取目标表所有列名 target_columns = [col['name'] for col in inspector.get_columns(target_table)] # 过滤数据框中目标表不存在的列 missing_columns = [col for col in dataframe.columns if col not in target_columns] if missing_columns: logging.info(f"过滤 {len(missing_columns)} 个目标表不存在的列: {', '.join(missing_columns)}") dataframe = dataframe.drop(columns=missing_columns) else: logging.warning(f"目标表 {target_table} 不存在,将自动创建") # 创建新表 dataframe.head(0).to_sql( target_table, target_engine, index=False, if_exists='fail' ) logging.info(f"已创建目标表 {target_table}") except Exception as e: logging.error(f"表结构检查失败: {str(e)}") raise # 6. 添加日期相关分析列 if 'Updated' in dataframe.columns: dataframe['update_over_15'] = dataframe['Updated'].apply( lambda x: 1 if isinstance(x, pd.Timestamp) and (pd.Timestamp.now() - x).days > 15 else 0 ) dataframe['update_over_7'] = dataframe['Updated'].apply( lambda x: 1 if isinstance(x, pd.Timestamp) and (pd.Timestamp.now() - x).days > 7 else 0 ) # 7. 返回处理后的数据 return dataframe # ------------------ 数据库相关 ------------------ def get_database_engine(db_name="default"): """ 创建数据库连接引擎,支持多数据库配置 :param db_name: 数据库配置标识(default 或 bsw_jira) """ try: prefix = "BSW_" if db_name == "bsw_jira" else "" host = os.getenv(f"{prefix}DB_HOST") user = os.getenv(f"{prefix}DB_USER") password = os.getenv(f"{prefix}DB_PASSWORD") database = os.getenv(f"{prefix}DB_NAME", db_name) port = os.getenv(f"{prefix}DB_PORT", 3306) # 验证关键配置存在 if not all([host, user, password, database]): missing = [k for k in ["host", "user", "password", "database"] if not locals().get(k)] raise ValueError(f"缺少{db_name}数据库配置: {', '.join(missing)}") connection_uri = f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}" engine = create_engine(connection_uri) logging.info(f"成功连接到数据库 {database}({db_name}配置)") return engine except Exception as e: logging.error(f"创建{db_name}数据库引擎失败") logging.error(traceback.format_exc()) return None def query_and_process_data(source_engine, target_engine, config_path): """ 查询和处理数据并写入目标数据库表 """ try: # 从环境变量获取表名 source_table = os.getenv("SOURCE_TABLE_NAME") target_table = os.getenv("TARGET_TABLE_NAME") if not source_table or not target_table: raise ValueError("缺少环境变量 SOURCE_TABLE_NAME 或 TARGET_TABLE_NAME") logging.info(f"源表: {source_table}, 目标表: {target_table}") # 加载BSW人员配置 bsw_staff_set = load_bsw_staff_list(config_path) project_keys = ["P09135", "P09192", "P08499", "KOCHI_PL"] # 构建查询 formatted_keys = "','".join(project_keys) query = f"SELECT * FROM `{source_table}` WHERE `Project key` IN ('{formatted_keys}')" # 分块处理 chunks = pd.read_sql(query, con=source_engine, chunksize=500) for idx, chunk in enumerate(chunks): processed_chunk = process_columns(chunk, config_path, bsw_staff_set) processed_chunk.to_sql( name=target_table, con=target_engine, index=False, if_exists='append', chunksize=500 ) logging.info(f"已写入分块 {idx + 1} [{len(processed_chunk)}行] 到 {target_table}") except Exception as e: logging.error("数据处理流程失败") logging.error(traceback.format_exc()) raise if __name__ == "__main__": # 验证环境变量 required_vars = ["SOURCE_TABLE_NAME", "TARGET_TABLE_NAME"] missing_vars = [var for var in required_vars if not os.getenv(var)] if missing_vars: logging.error(f"缺少必要环境变量: {', '.join(missing_vars)}") exit(1) # 创建双数据库连接 source_engine = get_database_engine("default") target_engine = get_database_engine("bsw_jira") if not (source_engine and target_engine): logging.error("数据库连接失败,程序退出") exit(1) # 配置项 project_keys = ["P09135", "P09192", "P08499", "KOCHI_PL"] # 需要查询的项目Key target_table = "jira_task_bsw_deal" # 要写入的数据库表 config_path = "responsible_config.json" # 配置文件路径 # 数据库处理 try: query_and_process_data( source_engine=source_engine, target_engine=target_engine, config_path="responsible_config.json" ) logging.info(f"数据已成功从 {os.getenv('SOURCE_TABLE_NAME')} 处理并写入 {os.getenv('TARGET_TABLE_NAME')}") except Exception: logging.error("主流程执行失败") exit(1) if __name__ == "__main__": # 环境变量验证 table_name = os.getenv("TABLE_NAME") if not table_name: logging.error("缺少环境变量 TABLE_NAME") exit(1) # 创建数据库连接 engine = get_database_engine() if engine is None: logging.error("无法创建数据库引擎,程序退出") exit(1) # 配置路径和输出路径 project_keys = ["P09135", "P09192", "P08499","KOCHI_PL"] output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILE) config_path = "responsible_config.json" # 执行数据处理 try: query_and_process_data(engine, table_name, project_keys, output_path, config_path) logging.info("数据处理流程完成") except Exception as e: logging.error("主流程执行失败") logging.error(traceback.format_exc()) exit(1)
最新发布
11-25
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值