import os
import re
import time
import logging
import tkinter as tk
from tkinter import ttk, messagebox, filedialog, scrolledtext
from collections import Counter
import openpyxl
import datetime
from openpyxl.styles import PatternFill
from openpyxl.utils import get_column_letter
# 增强的日志配置 - 每次启动清空日志
def setup_logger():
"""配置并返回日志记录器,每次启动清空日志"""
logger = logging.getLogger('SCLMultiProcessor')
logger.setLevel(logging.DEBUG)
# 清空日志文件
log_file = 'scl_processor.log'
with open(log_file, 'w', encoding='utf-8') as f:
f.write("") # 清空日志文件
# 创建文件处理器
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(logging.DEBUG)
# 创建控制台处理器
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# 创建格式化器
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
# 应用格式化器
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# 添加处理器
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# 记录启动信息
logger.info("=" * 50)
logger.info(f"SCL Multi-Processor 启动于 {datetime.datetime.now()}")
logger.info("=" * 50)
return logger
# 获取日志记录器
logger = setup_logger()
class ExcelColorDetector:
"""Excel单元格颜色检测器"""
def __init__(self):
self.NO_FILL = "无填充"
self.RED_FILL = PatternFill(start_color='FFFF0000', end_color='FFFF0000', fill_type='solid')
self.YELLOW_FILL = PatternFill(start_color='FFFFFF00', end_color='FFFFFF00', fill_type='solid')
self.PURPLE_FILL = PatternFill(start_color='FF800080', end_color='FF800080', fill_type='solid')
def is_no_fill(self, cell):
"""检查单元格是否无填充颜色"""
try:
# 检查是否存在填充属性
if not hasattr(cell, 'fill') or cell.fill is None:
return True
# 检查填充类型
if cell.fill.patternType is None or cell.fill.patternType == 'none':
return True
# 检查背景色是否为默认(白色或无)
if hasattr(cell.fill, 'bgColor') and cell.fill.bgColor.rgb == '00000000':
return True
return False
except Exception as e:
logger.error(f"颜色检测错误: {str(e)}")
return True
class EmptyCellDetector:
"""空单元格检测器 - 增强版:添加规则7"""
def __init__(self, color_detector):
self.color_detector = color_detector
# 固定列位置(列索引)
self.COLUMN_MAPPING = {
"A": 1, # A列
"B": 2, # B列
"C": 3, # C列
"D": 4, # D列
"E": 5, # E列
"F": 6, # F列
"G": 7, # G列
"H": 8, # H列
"I": 9, # I列
"J": 10, # J列
"K": 11, # K列
"L": 12, # L列 (差分種別)
"M": 13, # M列 (变更内容)
"N": 14, # N列 (判断列)
"O": 15, # O列 (判断理由)
"P": 16, # P列 (变更背景)
"Q": 17 # Q列 (备注)
}
self.HEADER_ROW = 3 # 表头固定在第三行
self.DATA_START_ROW = 4 # 数据从第四行开始
self.SPEC_PREFIX = "仕様書No.:" # I列排除条件前缀
def detect_empty_cells(self, file_path, checksheet_base_path=None):
"""
检测指定列的空单元格
返回: (missing_data, marked_file_path)
- missing_data: 缺失数据列表 [{'row': 行号, 'col': 列号, 'col_name': 列名, 'message': 错误信息}, ...]
- marked_file_path: 标记后的文件路径(如果有缺失数据)
"""
missing_data = []
marked_file_path = None
try:
logger.info(f"开始检测空单元格: {file_path}")
# 加载SCL文件
scl_wb = openpyxl.load_workbook(file_path)
scl_sheet = scl_wb.active
logger.info(f"工作表加载成功: {scl_sheet.title}, 总行数: {scl_sheet.max_row}")
# 检查是否有足够的数据行
if scl_sheet.max_row < self.DATA_START_ROW:
logger.info("文件没有数据行,跳过检测")
return missing_data, None
# 获取文件名用于错误信息
file_name = os.path.basename(file_path)
# 规则1: 检查A、B、C、K列的单元格是否为空
required_columns = ["A", "B", "C", "K"]
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
for col_letter in required_columns:
col_idx = self.COLUMN_MAPPING[col_letter]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is None or str(cell.value).strip() == "":
# 标记单元格为黄色
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则1)"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则1)")
# 规则5: 检查L-Q列是否为空(备注列表头可为空)
required_cols = ["L", "M", "N", "O", "P", "Q"]
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
for col_letter in required_cols:
col_idx = self.COLUMN_MAPPING[col_letter]
# 备注列的表头可以为空,但数据需要检查
if col_letter == "Q" and row_idx == self.HEADER_ROW:
continue
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is None or str(cell.value).strip() == "":
# 标记单元格为黄色
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则5)"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则5)")
# 规则6: 当文件名后缀为.docx时,检查整个文件的F列(排除I列前缀为"仕様書No.:"的行)
file_name_cell = scl_sheet.cell(row=1, column=1)
if file_name_cell.value and str(file_name_cell.value).lower().endswith('.docx'):
logger.info(f"文件后缀为.docx,检查F列数据: {file_name}")
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
# 检查I列是否有排除前缀
i_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["I"])
i_value = str(i_cell.value).strip() if i_cell.value else ""
# 如果I列以指定前缀开头,则跳过该行
if i_value.startswith(self.SPEC_PREFIX):
continue
# 检查F列是否为空
col_idx = self.COLUMN_MAPPING["F"]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is None or str(cell.value).strip() == "":
# 标记单元格为黄色
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': "F",
'message': f"{file_name}: 行 {row_idx} 列 F 数据缺失 (规则6: .docx文件, 排除条件:{self.SPEC_PREFIX})"
})
logger.warning(f"行 {row_idx} 列 F 数据缺失 (规则6: .docx文件, 排除条件:{self.SPEC_PREFIX})")
# 规则2-4: 基于L列的值检查其他列
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
l_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["L"])
l_value = str(l_cell.value).strip() if l_cell.value else ""
# 规则2: 当L列为"変更"时,检查D、E、I、J列
if l_value == "変更":
# 应有数据的列
required_cols = ["D", "E", "I", "J"]
for col_letter in required_cols:
col_idx = self.COLUMN_MAPPING[col_letter]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is None or str(cell.value).strip() == "":
# 标记单元格为黄色
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则2: L列='変更')"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则2: L列='変更')")
# 规则3: 当L列为"削除"时,检查D、I列应有数据,E、J列应无数据
elif l_value == "削除":
# 应有数据的列
required_cols = ["D", "I"]
for col_letter in required_cols:
col_idx = self.COLUMN_MAPPING[col_letter]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is None or str(cell.value).strip() == "":
# 标记单元格为黄色
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则3: L列='削除')"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则3: L列='削除')")
# 应无数据的列
empty_cols = ["E", "J"]
for col_letter in empty_cols:
col_idx = self.COLUMN_MAPPING[col_letter]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is not None and str(cell.value).strip() != "":
# 标记单元格为红色(不应有数据但有数据)
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFF0000")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 不应有数据 (规则3: L列='削除')"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 不应有数据 (规则3: L列='削除')")
# 规则4: 当L列为"追加"时,检查E、J列应有数据,D、I列应无数据
elif l_value == "追加":
# 应有数据的列
required_cols = ["E", "J"]
for col_letter in required_cols:
col_idx = self.COLUMN_MAPPING[col_letter]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is None or str(cell.value).strip() == "":
# 标记单元格为黄色
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则4: L列='追加')"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则4: L列='追加')")
# 应无数据的列
empty_cols = ["D", "I"]
for col_letter in empty_cols:
col_idx = self.COLUMN_MAPPING[col_letter]
cell = scl_sheet.cell(row_idx, col_idx)
if cell.value is not None and str(cell.value).strip() != "":
# 标记单元格为红色(不应有数据但有数据)
self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFF0000")
missing_data.append({
'row': row_idx,
'col': col_idx,
'col_name': col_letter,
'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 不应有数据 (规则4: L列='追加')"
})
logger.warning(f"行 {row_idx} 列 {col_letter} 不应有数据 (规则4: L列='追加')")
# 规则7: 变更背景检查
if checksheet_base_path:
background_errors = self.check_background_rule7(
scl_sheet, file_name, checksheet_base_path
)
missing_data.extend(background_errors)
# 如果有缺失数据,保存标记后的文件
if missing_data:
marked_file_path = file_path.replace(".xlsx", "_marked.xlsx")
scl_wb.save(marked_file_path)
logger.warning(f"检测到空单元格,文件已标记并保存为: {marked_file_path}")
return missing_data, marked_file_path
except Exception as e:
error_msg = f"空单元格检测失败: {str(e)}"
logger.exception(f"空单元格检测失败: {file_path} - {str(e)}")
return [{'row': 0, 'col': 0, 'col_name': '错误', 'message': error_msg}], None
def highlight_cell(self, sheet, row, col, color="FFFF0000"):
"""为单元格设置背景色"""
try:
fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
sheet.cell(row=row, column=col).fill = fill
return True
except Exception as e:
logger.error(f"设置单元格颜色失败: {str(e)}")
return False
def extract_folder_name(self, file_name):
"""从文件名中提取文件夹名 (SCL_和.xlsx之间的部分)"""
pattern = r'SCL_(.*?)\.xlsx'
match = re.search(pattern, file_name)
if match:
return match.group(1)
return None
def read_checksheet_data(self, checksheet_path):
"""从CheckSheet文件中读取数据(第8行开始,B、C列)"""
try:
logger.info(f"读取CheckSheet文件: {checksheet_path}")
# 加载CheckSheet文件
wb = openpyxl.load_workbook(checksheet_path)
sheet = wb.active
# 表头在第7行
header_row = 7
# 数据从第8行开始
data_start_row = 8
# 存储读取到的数据
data = []
# 遍历数据行
for row_idx in range(data_start_row, sheet.max_row + 1):
# 读取B列数据(合并后的主数据)
b_cell = sheet.cell(row=row_idx, column=2)
b_value = str(b_cell.value).strip() if b_cell.value else ""
# 如果B列为空,停止读取
if not b_value:
break
# 读取C列数据
c_cell = sheet.cell(row=row_idx, column=3)
c_value = str(c_cell.value).strip() if c_cell.value else ""
# 只添加非空数据
if b_value or c_value:
data.append((b_value, c_value))
logger.info(f"从CheckSheet读取到 {len(data)} 条数据")
return data
except Exception as e:
logger.error(f"读取CheckSheet文件失败: {str(e)}")
return []
def check_background_rule7(self, scl_sheet, file_name, checksheet_base_path):
"""检查变更背景是否符合规则7"""
errors = []
try:
logger.info(f"开始检查规则7: {file_name}")
# 提取文件夹名
folder_name = self.extract_folder_name(file_name)
if not folder_name:
logger.warning(f"无法从文件名提取文件夹名: {file_name}")
return []
# 构建CheckSheet文件路径
checksheet_dir = os.path.join(checksheet_base_path, folder_name)
if not os.path.exists(checksheet_dir):
logger.warning(f"文件夹不存在: {checksheet_dir}")
return []
# 查找CheckSheet文件
checksheet_files = [f for f in os.listdir(checksheet_dir)
if f.startswith("CheckSheet_") and f.endswith((".xlsx", ".xls"))]
if not checksheet_files:
logger.warning(f"未找到CheckSheet文件 in {checksheet_dir}")
return []
# 使用第一个找到的CheckSheet文件
checksheet_file = checksheet_files[0]
checksheet_full_path = os.path.join(checksheet_dir, checksheet_file)
logger.info(f"使用CheckSheet文件: {checksheet_full_path}")
# 读取CheckSheet数据 - 使用修复后的方法
checksheet_data = self.read_checksheet_data(checksheet_full_path)
if not checksheet_data:
logger.warning(f"CheckSheet文件无有效数据: {checksheet_full_path}")
return []
# 打印读取到的sheet名数据
logger.info(f"读取到的CheckSheet数据: {checksheet_data}")
# 提取所有sheet名(来自CheckSheet的B列和C列)
sheet_names = set()
for b_value, c_value in checksheet_data:
if b_value:
sheet_names.add(b_value)
if c_value:
sheet_names.add(c_value)
logger.info(f"提取到 {len(sheet_names)} 个唯一sheet名")
# 创建变更背景错误列表
background_errors = []
# 遍历C列,查找匹配的sheet名
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
c_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["C"])
c_value = str(c_cell.value).strip() if c_cell.value is not None else ""
# 检查是否在sheet名列表中
if c_value and c_value in sheet_names:
# 检查P列(变更背景)
p_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["P"])
p_value = str(p_cell.value).strip() if p_cell.value is not None else ""
# 检查是否为空或为"-"
if not p_value or p_value == "-":
error_msg = f"{file_name}: 行 {row_idx} - C列值 '{c_value}' 匹配, 但变更背景为空或'-' (规则7)"
background_errors.append({
'row': row_idx,
'col': self.COLUMN_MAPPING["P"],
'col_name': "P",
'message': error_msg
})
logger.warning(error_msg)
# 标记错误单元格
for error in background_errors:
self.highlight_cell(scl_sheet, error['row'], error['col'], "FFFFFF00")
return background_errors
except Exception as e:
error_msg = f"检查规则7失败: {str(e)}"
logger.exception(error_msg)
return [{
'row': 0,
'col': 0,
'col_name': '错误',
'message': f"{file_name}: {error_msg} (规则7)"
}]
class SCLStatsProcessor:
"""SCL统计处理器 - 优化版:固定列位置"""
def __init__(self, color_detector):
self.color_detector = color_detector
# 固定列位置
self.COLUMN_MAPPING = {
"差分種別": 12, # L列
"变更内容": 13, # M列
"判断列": 14, # N列
"判断理由": 15, # O列
"变更背景": 16, # P列
"备注": 17 # Q列
}
# 统计结果列映射
self.target_columns = {
"diff_no_fill": 16, # P列
"diff_fill": 23, # W列
"diff_add_no_fill": 27, # AA列
"diff_add_fill": 30, # AD列
"diff_change_no_fill": 34, # AH列
"diff_change_fill": 37, # AK列
"diff_delete_no_fill": 42, # AP列
"diff_delete_fill": 45, # AS列
"valid_yes_no_fill": 50, # AX列
"valid_yes_fill": 53, # BA列
"valid_no_no_fill": 57, # BE列
"valid_no_fill": 60, # BH列
"valid_yes_reason_no_fill": 62, # BL列
"valid_yes_reason_fill": 65, # BO列
"valid_no_reason_no_fill": 71, # BS列
"valid_no_reason_fill": 74, # BV列
"background_no_fill": 78, # BZ列
"background_fill": 85 # CG列
}
self.HEADER_ROW = 3 # 表头固定在第三行
self.DATA_START_ROW = 4 # 数据从第四行开始
def process_scl_file(self, file_path):
"""
处理单个SCL文件并返回所有统计结果
返回: (results, missing_data)
- results: 规则统计结果
- missing_data: 缺失数据列表 [{'row': 行号, 'col': 列号, 'col_name': 列名, 'message': 错误信息}, ...]
"""
results = {}
missing_data = [] # 存储缺失数据信息
try:
logger.info(f"开始处理SCL文件: {file_path}")
# 加载SCL文件
scl_wb = openpyxl.load_workbook(file_path)
scl_sheet = scl_wb.active
logger.info(f"工作表加载成功: {scl_sheet.title}, 总行数: {scl_sheet.max_row}")
# 检查是否有足够的数据行
if scl_sheet.max_row < self.DATA_START_ROW:
logger.info("文件没有数据行,跳过处理")
return results, missing_data
# 获取文件名用于错误信息
file_name = os.path.basename(file_path)
# 初始化统计结果
stats = {
"diff_no_fill": [], # 规则1: 変更内容无颜色填充
"diff_fill": [], # 规则2: 変更内容有颜色填充
"diff_add_no_fill": [], # 规则3: 差分种别="追加"且无颜色填充
"diff_add_fill": [], # 规则4: 差分种别="追加"且有颜色填充
"diff_change_no_fill": [], # 规则5: 差分种别="変更"且无颜色填充
"diff_change_fill": [], # 规则6: 差分种别="変更"且有颜色填充
"diff_delete_no_fill": [], # 规则7: 差分种别="削除"且无颜色填充
"diff_delete_fill": [], # 规则8: 差分种别="削除"且有颜色填充
"valid_yes_no_fill": [], # 规则9: 判断="有意"且无颜色填充
"valid_yes_fill": [], # 规则10: 判断="有意"且有颜色填充
"valid_no_no_fill": [], # 规则11: 判断="無効"且无颜色填充
"valid_no_fill": [], # 规则12: 判断="無効"且有颜色填充
"valid_yes_reason_no_fill": [], # 规则13: 判断="有意"且理由无颜色填充
"valid_yes_reason_fill": [], # 规则14: 判断="有意"且理由有颜色填充
"valid_no_reason_no_fill": [], # 规则15: 判断="無効"且理由无颜色填充
"valid_no_reason_fill": [], # 规则16: 判断="無効"且理由有颜色填充
"background_no_fill": [], # 规则17: 背景无颜色填充
"background_fill": [] # 规则18: 背景有颜色填充
}
# 遍历所有数据行
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
# 获取所有需要的单元格
diff_content_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更内容"])
diff_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["差分種別"])
note_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["备注"])
valid_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断列"])
reason_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断理由"])
background_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更背景"])
# 获取备注值
note_value = str(note_cell.value).strip() if note_cell and note_cell.value else None
# 规则1: 変更内容无颜色填充
if self.color_detector.is_no_fill(diff_content_cell) and note_value:
stats["diff_no_fill"].append(note_value)
# 规则2: 変更内容有颜色填充
if not self.color_detector.is_no_fill(diff_content_cell) and note_value:
stats["diff_fill"].append(note_value)
# 规则3: 差分种别="追加"且无颜色填充
if (diff_cell.value == "追加" and
self.color_detector.is_no_fill(diff_cell) and note_value):
stats["diff_add_no_fill"].append(note_value)
# 规则4: 差分种别="追加"且有颜色填充
if (diff_cell.value == "追加" and
not self.color_detector.is_no_fill(diff_cell) and note_value):
stats["diff_add_fill"].append(note_value)
# 规则5: 差分种别="変更"且无颜色填充
if (diff_cell.value == "変更" and
self.color_detector.is_no_fill(diff_cell) and note_value):
stats["diff_change_no_fill"].append(note_value)
# 规则6: 差分种别="変更"且有颜色填充
if (diff_cell.value == "変更" and
not self.color_detector.is_no_fill(diff_cell) and note_value):
stats["diff_change_fill"].append(note_value)
# 规则7: 差分种别="削除"且无颜色填充
if (diff_cell.value == "削除" and
self.color_detector.is_no_fill(diff_cell) and note_value):
stats["diff_delete_no_fill"].append(note_value)
# 规则8: 差分种别="削除"且有颜色填充
if (diff_cell.value == "削除" and
not self.color_detector.is_no_fill(diff_cell) and note_value):
stats["diff_delete_fill"].append(note_value)
# 规则9: 判断="有意"且无颜色填充
if (valid_cell.value == "有意" and
self.color_detector.is_no_fill(valid_cell) and note_value):
stats["valid_yes_no_fill"].append(note_value)
# 规则10: 判断="有意"且有颜色填充
if (valid_cell.value == "有意" and
not self.color_detector.is_no_fill(valid_cell) and note_value):
stats["valid_yes_fill"].append(note_value)
# 规则11: 判断="無効"且无颜色填充
if (valid_cell.value == "無効" and
self.color_detector.is_no_fill(valid_cell) and note_value):
stats["valid_no_no_fill"].append(note_value)
# 规则12: 判断="無効"且有颜色填充
if (valid_cell.value == "無効" and
not self.color_detector.is_no_fill(valid_cell) and note_value):
stats["valid_no_fill"].append(note_value)
# 规则13: 判断="有意"且理由无颜色填充
if (valid_cell.value == "有意" and
self.color_detector.is_no_fill(reason_cell) and note_value):
stats["valid_yes_reason_no_fill"].append(note_value)
# 规则14: 判断="有意"且理由有颜色填充
if (valid_cell.value == "有意" and
not self.color_detector.is_no_fill(reason_cell) and note_value):
stats["valid_yes_reason_fill"].append(note_value)
# 规则15: 判断="無効"且理由无颜色填充
if (valid_cell.value == "無効" and
self.color_detector.is_no_fill(reason_cell) and note_value):
stats["valid_no_reason_no_fill"].append(note_value)
# 规则16: 判断="無効"且理由有颜色填充
if (valid_cell.value == "無効" and
not self.color_detector.is_no_fill(reason_cell) and note_value):
stats["valid_no_reason_fill"].append(note_value)
# 规则17: 背景无颜色填充
if self.color_detector.is_no_fill(background_cell) and note_value:
stats["background_no_fill"].append(note_value)
# 规则18: 背景有颜色填充
if not self.color_detector.is_no_fill(background_cell) and note_value:
stats["background_fill"].append(note_value)
# 处理统计结果
for rule, values in stats.items():
if not values:
results[rule] = "/"
logger.info(f"{rule}: 无数据")
else:
counter = Counter(values)
result_lines = [f"{value},{count}" for value, count in counter.most_common()]
results[rule] = "\n".join(result_lines)
logger.info(f"{rule}: 找到 {len(values)} 条数据")
return results, missing_data
except Exception as e:
error_msg = f"处理SCL文件失败: {str(e)}"
logger.exception(f"处理SCL文件失败: {file_path} - {str(e)}")
# 返回错误信息
return {rule: f"错误: {str(e)}" for rule in self.target_columns}, [{
'row': 0,
'col': 0,
'col_name': '错误',
'message': error_msg
}]
class SCLMultiProcessor:
def __init__(self, root):
self.root = root
self.root.title("SCL文件处理系统 - 增强版")
self.root.geometry("1000x700")
# 初始化变量
self.color_detector = ExcelColorDetector()
self.empty_cell_detector = EmptyCellDetector(self.color_detector)
self.stats_processor = SCLStatsProcessor(self.color_detector)
self.progress_var = tk.DoubleVar()
# 创建主框架
self.main_frame = ttk.Frame(root, padding="10")
self.main_frame.pack(fill=tk.BOTH, expand=True)
# 创建UI
self.create_ui()
# 记录UI初始化完成
logger.info("用户界面初始化完成")
def create_ui(self):
"""创建用户界面"""
# 文件选择区域
file_frame = ttk.LabelFrame(self.main_frame, text="文件选择", padding="10")
file_frame.pack(fill=tk.X, pady=5)
# 输入文件选择
input_frame = ttk.Frame(file_frame)
input_frame.pack(fill=tk.X, pady=5)
ttk.Label(input_frame, text="主输入文件:").pack(side=tk.LEFT, padx=5)
self.input_path_var = tk.StringVar()
input_entry = ttk.Entry(input_frame, textvariable=self.input_path_var, width=70)
input_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
ttk.Button(input_frame, text="浏览...", command=self.browse_input_file).pack(side=tk.LEFT, padx=5)
# CheckSheet路径选择
checksheet_frame = ttk.Frame(file_frame)
checksheet_frame.pack(fill=tk.X, pady=5)
ttk.Label(checksheet_frame, text="CheckSheet路径:").pack(side=tk.LEFT, padx=5)
self.checksheet_path_var = tk.StringVar()
checksheet_entry = ttk.Entry(checksheet_frame, textvariable=self.checksheet_path_var, width=70)
checksheet_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
ttk.Button(checksheet_frame, text="浏览...", command=self.browse_checksheet_path).pack(side=tk.LEFT, padx=5)
# 操作模式选择区域
mode_frame = ttk.LabelFrame(self.main_frame, text="操作模式", padding="10")
mode_frame.pack(fill=tk.X, pady=5)
# 添加操作模式单选按钮
self.operation_mode = tk.StringVar(value="stats") # 默认选择统计模式
ttk.Radiobutton(mode_frame, text="统计功能", variable=self.operation_mode, value="stats").pack(side=tk.LEFT, padx=10)
ttk.Radiobutton(mode_frame, text="检测SCL文件格式", variable=self.operation_mode, value="format_check").pack(side=tk.LEFT, padx=10)
# 配置区域
config_frame = ttk.LabelFrame(self.main_frame, text="处理配置", padding="10")
config_frame.pack(fill=tk.X, pady=5)
# 搜索选项
search_frame = ttk.Frame(config_frame)
search_frame.pack(fill=tk.X, pady=5)
ttk.Label(search_frame, text="文件前缀:").grid(row=0, column=0, padx=5, sticky=tk.W)
self.prefix_var = tk.StringVar(value="SCL_")
ttk.Entry(search_frame, textvariable=self.prefix_var, width=10).grid(row=0, column=1, padx=5, sticky=tk.W)
# 添加性能提示
ttk.Label(search_frame, text="(表头固定在第3行,数据从第4行开始)").grid(row=0, column=2, padx=5, sticky=tk.W)
# 日志选项
log_frame = ttk.Frame(config_frame)
log_frame.pack(fill=tk.X, pady=5)
ttk.Label(log_frame, text="日志级别:").grid(row=0, column=0, padx=5, sticky=tk.W)
self.log_level_var = tk.StringVar(value="INFO")
log_level_combo = ttk.Combobox(
log_frame, textvariable=self.log_level_var, width=10, state="readonly"
)
log_level_combo['values'] = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
log_level_combo.grid(row=0, column=1, padx=5, sticky=tk.W)
log_level_combo.bind("<<ComboboxSelected>>", self.change_log_level)
# 处理按钮
btn_frame = ttk.Frame(self.main_frame)
btn_frame.pack(fill=tk.X, pady=10)
ttk.Button(btn_frame, text="开始处理", command=self.process_file).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="查看日志", command=self.view_log).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="导出配置", command=self.export_config).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="加载配置", command=self.load_config).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="退出", command=self.root.destroy).pack(side=tk.RIGHT, padx=5)
# 进度条
progress_frame = ttk.Frame(self.main_frame)
progress_frame.pack(fill=tk.X, pady=5)
ttk.Label(progress_frame, text="处理进度:").pack(side=tk.LEFT, padx=5)
self.progress_bar = ttk.Progressbar(
progress_frame, variable=self.progress_var, maximum=100, length=700
)
self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
self.progress_label = ttk.Label(progress_frame, text="0%")
self.progress_label.pack(side=tk.LEFT, padx=5)
# 结果展示区域
result_frame = ttk.LabelFrame(self.main_frame, text="处理结果", padding="10")
result_frame.pack(fill=tk.BOTH, expand=True, pady=5)
# 结果文本框
self.result_text = scrolledtext.ScrolledText(
result_frame, wrap=tk.WORD, height=20
)
self.result_text.pack(fill=tk.BOTH, expand=True)
self.result_text.config(state=tk.DISABLED)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(self.main_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(fill=tk.X, pady=5)
logger.info("UI创建完成")
def browse_checksheet_path(self):
"""浏览CheckSheet路径"""
folder_path = filedialog.askdirectory()
if folder_path:
self.checksheet_path_var.set(folder_path)
logger.info(f"已选择CheckSheet路径: {folder_path}")
def change_log_level(self, event=None):
"""更改日志级别"""
level = self.log_level_var.get()
logger.setLevel(getattr(logging, level))
logger.info(f"日志级别已更改为: {level}")
def browse_input_file(self):
"""浏览输入文件"""
file_path = filedialog.askopenfilename(
filetypes=[("Excel 文件", "*.xlsx *.xls"), ("所有文件", "*.*")]
)
if file_path:
self.input_path_var.set(file_path)
self.input_file = file_path
logger.info(f"已选择输入文件: {file_path}")
def highlight_cell(self, sheet, row, col, color="FFFF0000"):
"""为单元格设置背景色"""
try:
fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
sheet.cell(row=row, column=col).fill = fill
return True
except Exception as e:
logger.error(f"设置单元格颜色失败: {str(e)}")
return False
def process_file(self):
"""处理文件 - 每次处理保存数据,下次运行重新开始"""
if not self.input_path_var.get():
messagebox.showwarning("警告", "请先选择输入文件")
logger.warning("未选择输入文件")
return
try:
# 重置结果
self.result_text.config(state=tk.NORMAL)
self.result_text.delete(1.0, tk.END)
self.result_text.insert(tk.END, "开始处理...\n")
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
self.status_var.set("开始处理文件...")
self.root.update()
# 获取输入文件目录
input_file = self.input_path_var.get()
input_dir = os.path.dirname(input_file)
logger.info(f"开始处理文件: {input_file}")
logger.info(f"文件目录: {input_dir}")
# 使用openpyxl加载工作簿(保留格式)
wb = openpyxl.load_workbook(input_file)
sheet = wb.active
logger.info(f"工作簿加载成功, 工作表: {sheet.title}")
# 获取配置参数
prefix = self.prefix_var.get()
operation_mode = self.operation_mode.get()
checksheet_base_path = self.checksheet_path_var.get() if operation_mode == "format_check" else None
logger.info(f"配置参数: 文件前缀={prefix}, 操作模式={operation_mode}")
# 扫描E列(第5列)
total_rows = sheet.max_row
processed_count = 0
found_files = 0
problem_files = 0
logger.info(f"开始扫描E列, 总行数: {total_rows}")
start_time = time.time()
for row_idx in range(1, total_rows + 1):
# 更新进度
progress = (row_idx / total_rows) * 100
self.progress_var.set(progress)
self.progress_label.config(text=f"{progress:.1f}%")
self.root.update()
cell = sheet.cell(row=row_idx, column=5)
cell_value = str(cell.value) if cell.value else ""
# 检查是否包含前缀的文件名
if prefix in cell_value:
# 提取文件名(可能有多个以空格分隔)
file_names = re.findall(fr'{prefix}[^\s]+', cell_value)
logger.info(f"行 {row_idx}: 找到文件: {', '.join(file_names)}")
result_lines = []
file_has_problems = False # 标记当前行是否有问题文件
for file_name in file_names:
file_path = os.path.join(input_dir, file_name)
# 检查文件是否存在
if not os.path.exists(file_path):
result_lines.append(f"{file_name}: 文件不存在")
logger.warning(f"文件不存在: {file_path}")
# 标记文件不存在的单元格为紫色
self.highlight_cell(sheet, row_idx, 5, "FF800080")
file_has_problems = True
problem_files += 1
continue
# 根据操作模式选择处理方式
if operation_mode == "format_check":
# 检测SCL文件格式模式(包含规则1-7)
missing_data, marked_file_path = self.empty_cell_detector.detect_empty_cells(
file_path,
checksheet_base_path
)
if missing_data:
result_lines.append(f"{file_name}: 检测到格式问题!")
for item in missing_data:
result_lines.append(f" - {item['message']}")
logger.warning(item['message'])
file_has_problems = True
problem_files += 1
else:
result_lines.append(f"{file_name}: 格式检查通过")
else:
# 默认统计模式
results, missing_data = self.stats_processor.process_scl_file(file_path)
# 如果有数据缺失
if missing_data:
file_has_problems = True
problem_files += 1
result_lines.append(f"{file_name}: 数据缺失!")
for item in missing_data:
result_lines.append(f" - {item['message']}")
logger.warning(item['message'])
else:
result_lines.append(f"{file_name}: 处理完成")
# 将结果写入主Excel文件的不同列
for rule_name, result_str in results.items():
target_col = self.stats_processor.target_columns.get(rule_name)
if target_col:
target_cell = sheet.cell(row=row_idx, column=target_col)
target_cell.value = result_str
found_files += 1
# 如果该行有文件存在问题,将E列单元格标红
if file_has_problems:
self.highlight_cell(sheet, row_idx, 5)
logger.info(f"行 {row_idx} E列单元格标记为红色(存在问题)")
# 更新结果文本框
self.result_text.config(state=tk.NORMAL)
self.result_text.insert(
tk.END, f"行 {row_idx} 处理结果:\n" + "\n".join(result_lines) + "\n\n"
)
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
processed_count += 1
# 保存修改后的Excel文件 - 每次处理保存数据
output_path = input_file.replace(".xlsx", "_processed.xlsx")
wb.save(output_path)
logger.info(f"结果已保存到: {output_path}")
elapsed_time = time.time() - start_time
status_msg = f"处理完成! 处理了 {processed_count} 个文件项, 耗时 {elapsed_time:.2f} 秒"
if problem_files > 0:
status_msg += f", {problem_files} 个文件存在问题"
self.status_var.set(status_msg)
logger.info(status_msg)
# 更新结果文本框
self.result_text.config(state=tk.NORMAL)
self.result_text.insert(
tk.END, f"\n{status_msg}\n"
f"结果已保存到: {output_path}\n"
)
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
messagebox.showinfo("完成", status_msg)
except Exception as e:
error_msg = f"处理文件时出错: {str(e)}"
logger.exception(f"处理文件时出错: {str(e)}")
messagebox.showerror("错误", error_msg)
self.status_var.set(f"错误: {str(e)}")
# 更新结果文本框
self.result_text.config(state=tk.NORMAL)
self.result_text.insert(tk.END, f"\n错误: {error_msg}\n")
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
def view_log(self):
"""查看日志"""
log_window = tk.Toplevel(self.root)
log_window.title("处理日志")
log_window.geometry("800x600")
log_frame = ttk.Frame(log_window, padding="10")
log_frame.pack(fill=tk.BOTH, expand=True)
# 日志文本框
log_text = scrolledtext.ScrolledText(
log_frame, wrap=tk.WORD, height=30
)
log_text.pack(fill=tk.BOTH, expand=True)
# 读取日志文件
log_file = 'scl_processor.log'
try:
if not os.path.exists(log_file):
with open(log_file, 'w', encoding='utf-8') as f:
f.write("日志文件已创建,暂无记录\n")
with open(log_file, 'r', encoding='utf-8') as f:
log_content = f.read()
log_text.insert(tk.END, log_content)
except Exception as e:
log_text.insert(tk.END, f"无法读取日志文件: {str(e)}")
# 设置为只读
log_text.config(state=tk.DISABLED)
# 添加刷新按钮
refresh_btn = ttk.Button(log_frame, text="刷新日志", command=lambda: self.refresh_log(log_text))
refresh_btn.pack(pady=5)
logger.info("日志查看窗口已打开")
def refresh_log(self, log_text):
"""刷新日志内容"""
log_text.config(state=tk.NORMAL)
log_text.delete(1.0, tk.END)
try:
with open('scl_processor.log', 'r', encoding='utf-8') as f:
log_content = f.read()
log_text.insert(tk.END, log_content)
except Exception as e:
log_text.insert(tk.END, f"刷新日志失败: {str(e)}")
log_text.config(state=tk.DISABLED)
log_text.see(tk.END)
logger.info("日志已刷新")
def export_config(self):
"""导出配置到文件"""
config = {
"prefix": self.prefix_var.get(),
"log_level": self.log_level_var.get(),
"operation_mode": self.operation_mode.get(),
"checksheet_path": self.checksheet_path_var.get()
}
file_path = filedialog.asksaveasfilename(
defaultextension=".json",
filetypes=[("JSON 文件", "*.json"), ("所有文件", "*.*")]
)
if file_path:
try:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(str(config))
messagebox.showinfo("成功", f"配置已导出到: {file_path}")
logger.info(f"配置已导出到: {file_path}")
except Exception as e:
messagebox.showerror("错误", f"导出配置失败: {str(e)}")
logger.error(f"导出配置失败: {str(e)}")
def load_config(self):
"""从文件加载配置"""
file_path = filedialog.askopenfilename(
filetypes=[("JSON 文件", "*.json"), ("所有文件", "*.*")]
)
if file_path:
try:
with open(file_path, 'r', encoding='utf-8') as f:
config = eval(f.read())
self.prefix_var.set(config.get("prefix", "SCL_"))
self.log_level_var.set(config.get("log_level", "INFO"))
self.operation_mode.set(config.get("operation_mode", "stats"))
self.checksheet_path_var.set(config.get("checksheet_path", ""))
self.change_log_level()
messagebox.showinfo("成功", "配置已加载")
logger.info(f"配置已从 {file_path} 加载")
except Exception as e:
messagebox.showerror("错误", f"加载配置失败: {str(e)}")
logger.error(f"加载配置失败: {str(e)}")
if __name__ == "__main__":
root = tk.Tk()
app = SCLMultiProcessor(root)
root.mainloop()
把刚刚的规则加入到程序中,给出完整代码