import openpyxl
import os
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill
def preprocess_classification_file(class_file_path):
"""
预处理分类文件,构建映射字典
:param class_file_path: 分类文件路径
:return: 映射字典 {三级分类: {四级分类: (二级分类, 四级编码)}}
"""
try:
wb = openpyxl.load_workbook(class_file_path)
ws = wb["工作职责分类、岗位说明书分类(20250730客户确认版)"]
mapping = {}
current_l2 = None
current_l3 = None
# 从第3行开始读取数据(跳过表头)
for row in range(3, ws.max_row + 1):
l2_code = ws.cell(row, 3).value # C列:二级分类编码
l2_name = ws.cell(row, 4).value # D列:二级分类名称
l3_code = ws.cell(row, 5).value # E列:三级分类编码
l3_name = ws.cell(row, 6).value # F列:三级分类名称
l4_code = ws.cell(row, 7).value # G列:四级分类编码
l4_name = ws.cell(row, 8).value # H列:四级分类名称
# 更新当前分类(处理合并单元格)
if l2_name:
current_l2 = l2_name
if l3_name:
current_l3 = l3_name
# 只处理有四级分类的行
if l4_name and current_l3 and current_l2:
if current_l3 not in mapping:
mapping[current_l3] = {}
mapping[current_l3][l4_name] = (current_l2, l4_code)
print(mapping)
return mapping
except Exception as e:
print(f"处理分类文件时出错: {str(e)}")
return {}
def process_all_files(root_source_folder, template_file, class_file_path,root_output_folder):
"""
处理根目录下所有子文件夹中的Excel文件并生成总表
:param root_source_folder: 源文件根目录路径
:param template_file: HRHB模板文件路径
:param root_output_folder: 输出根目录路径
"""
# 预处理分类文件(只处理一次)
class_mapping = preprocess_classification_file(class_file_path)
# 创建汇总工作簿
summary_wb = openpyxl.Workbook()
summary_ws = summary_wb.active
summary_ws.title = "汇总表"
# 设置汇总表表头
headers = ["文件名", "所属部门(文件名)","工作职责", "工作内容描述", "工作职责分类",'所属部门(表格)']
for col, header in enumerate(headers, start=1):
summary_ws.cell(row=1, column=col, value=header)
row_counter = 2 # 从第2行开始填充数据
# 遍历根目录下的所有子文件夹
for foldername, subfolders, filenames in os.walk(root_source_folder):
# 获取当前文件夹的直接父文件夹名称
current_parent_folder = os.path.basename(foldername)
# 找到第一个数字后的点号位置
dot_pos = current_parent_folder.find('.') + 1 # +1 跳过点号本身
# 找到"岗位说明书"的起始位置
title_pos = current_parent_folder.find('岗位说明书')
# 切片提取中间部分
pf = current_parent_folder[dot_pos:title_pos].strip() # strip() 去除首尾空格
for filename in filenames:
if filename.endswith('.xlsx') and not filename.startswith('~$'):
try:
source_file = os.path.join(foldername, filename)
relative_path = os.path.relpath(foldername, root_source_folder)
output_folder = os.path.join(root_output_folder, relative_path)
# 处理单个文件并获取生成的数据
generated_data = generate_hrhb_job_duties(
source_file, template_file, output_folder, class_mapping
)
# 将数据添加到汇总表
if generated_data:
for duty in generated_data:
summary_ws.cell(row=row_counter, column=1, value=filename)
summary_ws.cell(row=row_counter, column=2, value=pf)
summary_ws.cell(row=row_counter, column=3, value=duty["work_area"])
summary_ws.cell(row=row_counter, column=4, value=duty["duty_desc"])
summary_ws.cell(row=row_counter, column=5, value=duty["department"])
summary_ws.cell(row=row_counter, column=6, value=duty["department2"])
summary_ws.cell(row=row_counter, column=7, value=duty["position_class"])
summary_ws.cell(row=row_counter, column=8, value=duty["duty_class_code"])
row_counter += 1
except Exception as e:
print(f"处理文件 {filename} 时出错: {str(e)}")
# 保存汇总表
summary_file = os.path.join(root_output_folder, "HRHB_工作职责汇总表.xlsx")
summary_wb.save(summary_file)
print(f"成功生成汇总表: {summary_file}")
def generate_hrhb_job_duties(source_file, template_file, output_folder, class_mapping):
"""处理单个文件并返回提取的数据"""
try:
source_wb = openpyxl.load_workbook(source_file)
source_ws = source_wb["岗位职责"]
department = source_ws["B1"].value
department2 = source_ws["D1"].value
duties_data = []
# 动态检测职责结束位置
row = 4
while True:
importance = source_ws[f"A{row}"].value
work_area = source_ws[f"B{row}"].value
duty_desc = source_ws[f"C{row}"].value
# 检测数据结束:当连续3行无数据时停止
if not (importance or work_area or duty_desc):
# 检查后续行确认是否真正结束
next_rows_empty = True
for i in range(1, 4): # 检查后续3行
next_imp = source_ws[f"A{row + i}"].value
next_area = source_ws[f"B{row + i}"].value
next_desc = source_ws[f"C{row + i}"].value
if next_imp or next_area or next_desc:
next_rows_empty = False
break
if next_rows_empty:
break
# 收集有效数据
if all([importance, work_area, duty_desc]):
# 匹配分类信息
position_class = "未知"
duty_class_code = "未知"
if department2 in class_mapping:
dept_mapping = class_mapping[department2]
if work_area in dept_mapping:
position_class, duty_class_code = dept_mapping[work_area]
duties_data.append({
"importance": importance,
"work_area": work_area,
"duty_desc": duty_desc,
"department": department,
"department2": department2,
"position_class": position_class, # 新增字段
"duty_class_code": duty_class_code # 新增字段
})
row += 1
if not duties_data:
print(f"警告: 在 {source_file} 中未找到有效的工作职责数据")
return None
template_wb = openpyxl.load_workbook(template_file)
template_ws = template_wb["基本工作职责"]
# 清空模板数据
for row in range(2, template_ws.max_row + 1):
for col in range(1, 8): # 现在有7列
template_ws.cell(row=row, column=col).value = None
# 添加新增列的标题(如果不存在)
if template_ws["F1"].value is None:
template_ws["F1"] = "岗位分类"
template_ws["G1"] = "工作职责分类编码"
# 设置标题样式(黄色背景)
yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
for col in ["F1", "G1"]:
template_ws[col].fill = yellow_fill
start_row = 2
for idx, duty in enumerate(duties_data, start=start_row):
template_ws[f"A{idx}"] = duty["work_area"]
template_ws[f"B{idx}"] = duty["duty_desc"]
template_ws[f"C{idx}"] = duty["department"]
template_ws[f"D{idx}"] = "江门市烟草分公司(本级)"
template_ws[f"E{idx}"] = duty["department2"]
template_ws[f"F{idx}"] = duty["position_class"] # 新增字段
template_ws[f"G{idx}"] = duty["duty_class_code"] # 新增字段
# 调整列宽(现在有7列)
for col in range(1, 8):
max_length = 0
column = get_column_letter(col)
for cell in template_ws[column]:
if cell.value:
max_length = max(max_length, len(str(cell.value)))
template_ws.column_dimensions[column].width = (max_length + 4) * 1.2
os.makedirs(output_folder, exist_ok=True)
source_name = os.path.splitext(os.path.basename(source_file))[0]
output_file = os.path.join(output_folder, f"HRHB_{source_name}_已填充.xlsx")
template_wb.save(output_file)
print(f"成功生成文件: {output_file}")
return duties_data
except Exception as e:
print(f"处理文件时出错: {str(e)}")
return None
并没有匹配,全都是未知
最新发布