class SCLRuleProcessor:
"""SCL规则处理器,包含详细的颜色映射逻辑"""
def __init__(self, color_detector=None):
self.color_detector = color_detector or EnhancedColorDetector(tolerance=30)
# 创建专用日志记录器
self.logger = logging.getLogger('SCLProcessor.RuleProcessor')
if not self.logger.handlers:
# 如果没有处理器,添加控制台处理器
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
self.logger.setLevel(logging.INFO)
# 固定列位置
self.COLUMN_MAPPING = {
"差分種別": 12, # L列
"变更内容": 13, # M列
"判断列": 14, # N列
"判断理由": 15, # O列
"变更背景": 16, # P列
"备注": 17 # Q列
}
# 新增规则的关键词 - 添加更多变体
self.keywords = [
"波形図の尾の延長、非機能変更と判定されました。",
"波形図の尾の延長",
"非機能変更",
"選択肢的补充说明,不属于功能变更。",
"补充说明",
"不属于功能变更",
"空白行が追加され、非機能変更と判定された",
"空白行追加",
"空行が削除され、非機能変更と判定された",
"空行削除",
"无效更改,判定为无功能变更。",
"无效更改",
"書式変更で機能変更ではないと判断されました。",
"書式変更",
"仅修改了背景色,不影响软件设计,判定为无功能变更。",
"仅修改背景色",
"背景色修改",
"非功能变更",
"无功能变更",
"書式調整",
"非機能修正"
]
# 创建关键词正则表达式模式
self.keyword_pattern = self._create_keyword_pattern()
# 规则映射到目标列
self.RULE_MAPPING = {
# 规则1-18: 备注值统计
"diff_no_fill": 16, # P列
"diff_fill": 23, # W列
"diff_add_no_fill": 27, # AA列
"diff_add_fill": 30, # AD列
"diff_change_no_fill": 34, # AH列
"diff_change_fill": 37, # AK列
"diff_delete_no_fill": 42, # AP列
"diff_delete_fill": 45, # AS列
"valid_yes_no_fill": 50, # AX列
"valid_yes_fill": 53, # BA列
"valid_no_no_fill": 57, # BE列
"valid_no_fill": 60, # BH列
"valid_yes_reason_no_fill": 62, # BL列
"valid_yes_reason_fill": 65, # BO列
"valid_no_reason_no_fill": 71, # BS列
"valid_no_reason_fill": 74, # BV列
"background_no_fill": 78, # BZ列
"background_fill": 85, # CG列,
# 规则19-59: 计数统计
"rule19": 14, # N列
"rule20": 15, # O列
"rule21": 17, # Q列
"rule22": 18, # R列
"rule23": 19, # S列
"rule24": 20, # T列
"rule25": 21, # U列
"rule26": 22, # V列
"rule27": 25, # Y列
"rule28": 26, # Z列
"rule29": 28, # AB列
"rule30": 29, # AC列
"rule31": 32, # AF列
"rule32": 33, # AG列
"rule33": 35, # AI列
"rule34": 36, # AJ列
"rule35": 47, # AU列
"rule36": 48, # AV列
"rule37": 49, # AW列
"rule38": 51, # AY列
"rule39": 52, # AZ列
"rule40": 55, # BC列
"rule41": 56, # BD列
"rule42": 58, # BF列
"rule43": 59, # BG列
"rule44": 62, # BJ列
"rule45": 63, # BK列
"rule46": 65, # BM列
"rule47": 66, # BN列
"rule48": 69, # BQ列
"rule49": 70, # BR列
"rule50": 72, # BT列
"rule51": 73, # BU列
"rule52": 76, # BX列
"rule53": 77, # BY列
"rule54": 79, # CA列
"rule55": 80, # CB列
"rule56": 81, # CC列
"rule57": 82, # CD列
"rule58": 83, # CE列
"rule59": 84, # CF列
}
# 规则统计字典
self.rule_stats = {rule: defaultdict(int) for rule in self.RULE_MAPPING.keys()}
self.HEADER_ROW = 3 # 表头固定在第三行
self.DATA_START_ROW = 4 # 数据从第四行开始
def reset_rule_stats(self):
"""重置规则统计"""
for rule in self.rule_stats:
self.rule_stats[rule].clear()
def get_cell_color_status(self, cell, column_name=None):
"""
获取单元格的颜色状态(增强版)
返回: (颜色名称, 颜色HEX值, 是否无填充)
column_name: 列名,用于确定是否进行特殊颜色判断
"""
try:
# 检查是否无填充
if self.color_detector.is_no_fill(cell):
return "无填充", None, True
# 获取单元格颜色
hex_color = self.color_detector.get_cell_color(cell)
if not hex_color:
return "未知", None, False
# 只有M列和P列需要特殊颜色判断
if column_name in ["变更内容", "变更背景"]:
# 检查是否为特定颜色
if self.color_detector.is_specific_color(cell, self.COLOR_CRIMSON):
return "猩红", self.COLOR_CRIMSON, False
if self.color_detector.is_specific_color(cell, self.COLOR_STEEL_BLUE):
return "钢蓝", self.COLOR_STEEL_BLUE, False
if self.color_detector.is_specific_color(cell, self.COLOR_CHOCOLATE):
return "巧克力黄", self.COLOR_CHOCOLATE, False
# 匹配最接近的已知颜色
closest_name = self.color_detector.match_color_name(hex_color)
return closest_name, hex_color, False
except Exception as e:
self.logger.error(f"获取单元格颜色状态失败: {str(e)}")
return "错误", None, False
def _create_keyword_pattern(self):
"""创建关键词匹配的正则表达式模式"""
# 转义特殊字符并创建模式
escaped_keywords = [re.escape(kw) for kw in self.keywords]
# 创建不区分大小写的模式
pattern_str = "|".join(escaped_keywords)
return re.compile(pattern_str, re.IGNORECASE)
def contains_keyword(self, text):
"""
使用正则表达式检查文本是否包含任意关键词(更健壮的匹配)
返回: 匹配结果和匹配到的关键词列表
"""
if not text:
return False, []
text = str(text).strip()
matches = self.keyword_pattern.findall(text)
return bool(matches), matches
def split_note_values(self, note_value):
"""拆分备注值,支持多种分隔符"""
if not note_value:
return []
# 支持的分隔符:中文顿号、全角逗号、半角逗号、分号、空格
separators = ['、', ',', ',', ';', ';', ' ']
# 替换所有分隔符为统一的分隔符
normalized = str(note_value)
for sep in separators:
normalized = normalized.replace(sep, '|')
# 分割并过滤空值
values = [v.strip() for v in normalized.split('|') if v.strip()]
# 过滤掉值为“-”的项
values = [v for v in values if v != "-"]
return values
def format_note_stats(self, data_dict):
"""格式化备注统计结果,确保每个值一行"""
try:
# 尝试按数值排序
sorted_items = sorted(
data_dict.items(),
key=lambda x: int(x[0]) if x[0].isdigit() else x[0]
)
except Exception:
# 如果无法转换为数字,则按字符串排序
sorted_items = sorted(data_dict.items())
# 格式化为每行一个值
return "\n".join([f"{value},{count}" for value, count in sorted_items])
def log_rule_stats(self, file_path):
"""记录规则统计信息到日志"""
self.logger.info(f"文件 {os.path.basename(file_path)} 规则统计结果:")
# 创建列号到列字母的映射
column_letter_map = self.create_column_letter_map()
for rule_name in self.RULE_MAPPING.keys():
# 获取目标列号
target_col = self.RULE_MAPPING[rule_name]
# 获取列字母表示
col_letter = column_letter_map.get(target_col, f"列{target_col}")
# 获取统计结果
if rule_name in self.rule_stats and self.rule_stats[rule_name]:
# 格式化统计结果
if rule_name.startswith("rule") and rule_name[4:].isdigit():
# 计数型规则
count = self.rule_stats[rule_name]["count"]
self.logger.info(f"规则 {rule_name} ({col_letter}): 总计 {count} 行")
else:
# 值统计型规则
stats_str = "; ".join(
[f"{value},{count}" for value, count in self.rule_stats[rule_name].items()]
)
self.logger.info(f"规则 {rule_name} ({col_letter}): {stats_str}")
else:
self.logger.info(f"规则 {rule_name} ({col_letter}): 无数据")
def create_column_letter_map(self):
"""创建列号到列字母的映射"""
column_letter_map = {}
# 生成A-Z列
for i in range(1, 27):
column_letter_map[i] = chr(64 + i)
# 生成AA-AZ列
for i in range(1, 27):
column_letter_map[26 + i] = f"A{chr(64 + i)}"
# 生成BA-BZ列
for i in range(1, 27):
column_letter_map[52 + i] = f"B{chr(64 + i)}"
# 生成CA-CZ列
for i in range(1, 27):
column_letter_map[78 + i] = f"C{chr(64 + i)}"
# 添加已知的特殊列
column_letter_map.update({
16: "P", 23: "W", 27: "AA", 30: "AD", 34: "AH", 37: "AK",
42: "AP", 45: "AS", 50: "AX", 53: "BA", 57: "BE", 60: "BH",
62: "BL", 65: "BO", 71: "BS", 74: "BV", 78: "BZ", 85: "CG"
})
return column_letter_map
def process_file(self, file_path):
"""
处理单个SCL文件并返回所有统计结果
返回: (results, color_report)
- results: 规则统计结果
- color_report: 颜色分析报告
"""
# 重置规则统计
self.reset_rule_stats()
results = {}
missing_data = [] # 存储缺失数据信息
# 重置规则统计
self.reset_rule_stats()
try:
self.logger.info(f"开始处理SCL文件: {file_path}")
# 加载SCL文件
scl_wb = openpyxl.load_workbook(file_path)
scl_sheet = scl_wb.active
self.logger.info(f"工作表加载成功: {scl_sheet.title}, 总行数: {scl_sheet.max_row}")
# 检查是否有足够的数据行
if scl_sheet.max_row < self.DATA_START_ROW:
self.logger.info("文件没有数据行,跳过处理")
return results, "", missing_data
# 初始化统计结果
stats = {
# 规则1-18: 备注值统计
"diff_no_fill": defaultdict(int), # 规则1: 変更内容无颜色填充
"diff_fill": defaultdict(int), # 规则2: 変更内容有颜色填充
"diff_add_no_fill": defaultdict(int), # 规则3: 差分种别="追加"且无颜色填充
"diff_add_fill": defaultdict(int), # 规则4: 差分种别="追加"且有颜色填充
"diff_change_no_fill": defaultdict(int), # 规则5: 差分种别="変更"且无颜色填充
"diff_change_fill": defaultdict(int), # 规则6: 差分种别="変更"且有颜色填充
"diff_delete_no_fill": defaultdict(int), # 规则7: 差分种别="削除"且无颜色填充
"diff_delete_fill": defaultdict(int), # 规则8: 差分种别="削除"且有颜色填充
"valid_yes_no_fill": defaultdict(int), # 规则9: 判断="有意"且无颜色填充
"valid_yes_fill": defaultdict(int), # 规则10: 判断="有意"且有颜色填充
"valid_no_no_fill": defaultdict(int), # 规则11: 判断="無効"且无颜色填充
"valid_no_fill": defaultdict(int), # 规则12: 判断="無効"且有颜色填充
"valid_yes_reason_no_fill": defaultdict(int), # 规则13: 判断="有意"且理由无颜色填充
"valid_yes_reason_fill": defaultdict(int), # 规则14: 判断="有意"且理由有颜色填充
"valid_no_reason_no_fill": defaultdict(int), # 规则15: 判断="無効"且理由无颜色填充
"valid_no_reason_fill": defaultdict(int), # 规则16: 判断="無効"且理由有颜色填充
"background_no_fill": defaultdict(int), # 规则17: 背景无颜色填充
"background_fill": defaultdict(int), # 规则18: 背景有颜色填充,
# 规则19-59: 计数统计
"rule19": 0, # 规则19: 包含关键词且无颜色填充
"rule20": 0, # 规则20: 不包含关键词且无颜色填充
"rule21": 0, # 规则21: 包含关键词且猩红填充
"rule22": 0, # 规则22: 不包含关键词且猩红填充
"rule23": 0, # 规则23: 包含关键词且钢蓝填充
"rule24": 0, # 规则24: 不包含关键词且钢蓝填充
"rule25": 0, # 规则25: 包含关键词且巧克力黄填充
"rule26": 0, # 规则26: 不包含关键词且巧克力黄填充
"rule27": 0, # L列="追加"且无颜色填充且M列包含关键词
"rule28": 0, # L列="追加"且无颜色填充且M列不包含关键词
"rule29": 0, # L列="追加"且有颜色填充且M列包含关键词
"rule30": 0, # L列="追加"且有颜色填充且M列不包含关键词
"rule31": 0, # L列="変更"且无颜色填充且M列包含关键词
"rule32": 0, # L列="変更"且无颜色填充且M列不包含关键词
"rule33": 0, # L列="変更"且有颜色填充且M列包含关键词
"rule34": 0, # L列="変更"且有颜色填充且M列不包含关键词
"rule35": 0, # L列="差分無し"的计数
"rule36": 0, # N列="有意"且无颜色填充且M列包含关键词
"rule37": 0, # N列="有意"且无颜色填充且M列不包含关键词
"rule38": 0, # N列="有意"且有颜色填充且M列包含关键词
"rule39": 0, # N列="有意"且有颜色填充且M列不包含关键词
"rule40": 0, # N列="無効"且无颜色填充且M列包含关键词
"rule41": 0, # N列="無効"且无颜色填充且M列不包含关键词
"rule42": 0, # N列="無効"且有颜色填充且M列包含关键词
"rule43": 0, # N列="無効"且有颜色填充且M列不包含关键词
"rule44": 0, # N列="有意"且O列无颜色填充且M列包含关键词
"rule45": 0, # N列="有意"且O列无颜色填充且M列不包含关键词
"rule46": 0, # N列="有意"且O列有颜色填充且M列包含关键词
"rule47": 0, # N列="有意"且O列有颜色填充且M列不包含关键词
"rule48": 0, # N列="無効"且O列无颜色填充且M列包含关键词
"rule49": 0, # N列="無効"且O列无颜色填充且M列不包含关键词
"rule50": 0, # N列="無効"且O列有颜色填充且M列包含关键词
"rule51": 0, # N列="無効"且O列有颜色填充且M列不包含关键词
"rule52": 0, # P列无颜色填充且M列包含关键词
"rule53": 0, # P列无颜色填充且M列不包含关键词
"rule54": 0, # P列颜色为猩红且M列包含关键词
"rule55": 0, # P列颜色为猩红且M列不包含关键词
"rule56": 0, # P列颜色为钢蓝且M列包含关键词
"rule57": 0, # P列颜色为钢蓝且M列不包含关键词
"rule58": 0, # P列颜色为巧克力黄且M列包含关键词
"rule59": 0, # P列颜色为巧克力黄且M列不包含关键词
}
# 遍历所有数据行
for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1):
# 获取所有需要的单元格
diff_content_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更内容"])
diff_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["差分種別"])
note_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["备注"])
valid_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断列"])
reason_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断理由"])
background_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更背景"])
# 获取单元格值
diff_value = str(diff_cell.value).strip() if diff_cell.value else ""
note_value = str(note_cell.value).strip() if note_cell.value else ""
valid_value = str(valid_cell.value).strip() if valid_cell.value else ""
# 获取备注值并处理多值情况
note_values = self.split_note_values(note_value)
# 获取M列值
m_value = str(diff_content_cell.value) if diff_content_cell.value else ""
# 检查是否包含关键词(使用增强版匹配)
contains_keyword, matched_keywords = self.contains_keyword(m_value)
# 记录匹配结果(调试用)
if contains_keyword:
self.logger.debug(f"行 {row_idx} 匹配到关键词: {', '.join(matched_keywords)}")
else:
self.logger.debug(f"行 {row_idx} 未匹配到关键词")
# 获取单元格颜色状态
l_color_name, l_color_hex, l_no_fill = self.get_cell_color_status(diff_cell)
m_color_name, m_color_hex, m_no_fill = self.get_cell_color_status(diff_content_cell)
n_color_name, n_color_hex, n_no_fill = self.get_cell_color_status(valid_cell)
o_color_name, o_color_hex, o_no_fill = self.get_cell_color_status(reason_cell)
p_color_name, p_color_hex, p_no_fill = self.get_cell_color_status(background_cell)
# 记录颜色信息用于调试
self.logger.debug(f"行 {row_idx} 颜色状态: "
f"L列={l_color_name}, "
f"M列={m_color_name}, "
f"N列={n_color_name}, "
f"O列={o_color_name}, "
f"P列={p_color_name}")
# 规则1-18处理
# 规则1: 変更内容无颜色填充
if m_no_fill and note_values:
for nv in note_values:
stats["diff_no_fill"][nv] += 1
# 记录到规则统计
self.rule_stats["diff_no_fill"][nv] += 1
# 规则2: 変更内容有颜色填充
if not m_no_fill and note_values:
for nv in note_values:
stats["diff_fill"][nv] += 1
self.rule_stats["diff_fill"][nv] += 1
# 规则3: 差分种别="追加"且无颜色填充
if diff_value == "追加" and l_no_fill and note_values:
for nv in note_values:
stats["diff_add_no_fill"][nv] += 1
self.rule_stats["diff_add_no_fill"][nv] += 1
# 规则4: 差分种别="追加"且有颜色填充
if diff_value == "追加" and not l_no_fill and note_values:
for nv in note_values:
stats["diff_add_fill"][nv] += 1
self.rule_stats["diff_add_fill"][nv] += 1
# 规则5: 差分种别="変更"且无颜色填充
if diff_value == "変更" and l_no_fill and note_values:
for nv in note_values:
stats["diff_change_no_fill"][nv] += 1
self.rule_stats["diff_change_no_fill"][nv] += 1
# 规则6: 差分种别="変更"且有颜色填充
if diff_value == "変更" and not l_no_fill and note_values:
for nv in note_values:
stats["diff_change_fill"][nv] += 1
self.rule_stats["diff_change_fill"][nv] += 1
# 规则7: 差分种别="削除"且无颜色填充
if diff_value == "削除" and l_no_fill and note_values:
for nv in note_values:
stats["diff_delete_no_fill"][nv] += 1
self.rule_stats["diff_delete_no_fill"][nv] += 1
# 规则8: 差分种别="削除"且有颜色填充
if diff_value == "削除" and not l_no_fill and note_values:
for nv in note_values:
stats["diff_delete_fill"][nv] += 1
self.rule_stats["diff_delete_fill"][nv] += 1
# 规则9: 判断="有意"且无颜色填充
if valid_value == "有意" and n_no_fill and note_values:
for nv in note_values:
stats["valid_yes_no_fill"][nv] += 1
self.rule_stats["valid_yes_no_fill"][nv] += 1
# 规则10: 判断="有意"且有颜色填充
if valid_value == "有意" and not n_no_fill and note_values:
for nv in note_values:
stats["valid_yes_fill"][nv] += 1
self.rule_stats["valid_yes_fill"][nv] += 1
# 规则11: 判断="無効"且无颜色填充
if valid_value == "無効" and n_no_fill and note_values:
for nv in note_values:
stats["valid_no_no_fill"][nv] += 1
self.rule_stats["valid_no_no_fill"][nv] += 1
# 规则12: 判断="無効"且有颜色填充
if valid_value == "無効" and not n_no_fill and note_values:
for nv in note_values:
stats["valid_no_fill"][nv] += 1
self.rule_stats["valid_no_fill"][nv] += 1
# 规则13: 判断="有意"且理由无颜色填充
if valid_value == "有意" and o_no_fill and note_values:
for nv in note_values:
stats["valid_yes_reason_no_fill"][nv] += 1
self.rule_stats["valid_yes_reason_no_fill"][nv] += 1
# 规则14: 判断="有意"且理由有颜色填充
if valid_value == "有意" and not o_no_fill and note_values:
for nv in note_values:
stats["valid_yes_reason_fill"][nv] += 1
self.rule_stats["valid_yes_reason_fill"][nv] += 1
# 规则15: 判断="無効"且理由无颜色填充
if valid_value == "無効" and o_no_fill and note_values:
for nv in note_values:
stats["valid_no_reason_no_fill"][nv] += 1
self.rule_stats["valid_no_reason_no_fill"][nv] += 1
# 规则16: 判断="無効"且理由有颜色填充
if valid_value == "無効" and not o_no_fill and note_values:
for nv in note_values:
stats["valid_no_reason_fill"][nv] += 1
self.rule_stats["valid_no_reason_fill"][nv] += 1
# 规则17: 背景无颜色填充
if p_no_fill and note_values:
for nv in note_values:
stats["background_no_fill"][nv] += 1
self.rule_stats["background_no_fill"][nv] += 1
# 规则18: 背景有颜色填充
if not p_no_fill and note_values:
for nv in note_values:
stats["background_fill"][nv] += 1
self.rule_stats["background_fill"][nv] += 1
# 规则19-59处理
# 规则19: 包含关键词且无颜色填充
if contains_keyword and m_no_fill:
stats["rule19"] += 1
self.rule_stats["rule19"]["count"] += 1
# 规则20: 不包含关键词且无颜色填充
if not contains_keyword and m_no_fill:
stats["rule20"] += 1
self.rule_stats["rule20"]["count"] += 1
# 规则21: 包含关键词且猩红填充
if contains_keyword and m_color_name == "猩红":
stats["rule21"] += 1
self.rule_stats["rule21"]["count"] += 1
# 规则22: 不包含关键词且猩红填充
if not contains_keyword and m_color_name == "猩红":
stats["rule22"] += 1
self.rule_stats["rule22"]["count"] += 1
# 规则23: 包含关键词且钢蓝填充
if contains_keyword and m_color_name == "钢蓝":
stats["rule23"] += 1
self.rule_stats["rule23"]["count"] += 1
# 规则24: 不包含关键词且钢蓝填充
if not contains_keyword and m_color_name == "钢蓝":
stats["rule24"] += 1
self.rule_stats["rule24"]["count"] += 1
# 规则25: 包含关键词且巧克力黄填充
if contains_keyword and m_color_name == "巧克力黄":
stats["rule25"] += 1
self.rule_stats["rule25"]["count"] += 1
# 规则26: 不包含关键词且巧克力黄填充
if not contains_keyword and m_color_name == "巧克力黄":
stats["rule26"] += 1
self.rule_stats["rule26"]["count"] += 1
# 规则27: L列="追加"且无颜色填充且M列包含关键词
if diff_value == "追加" and l_no_fill and contains_keyword:
stats["rule27"] += 1
self.rule_stats["rule27"]["count"] += 1
# 规则28: L列="追加"且无颜色填充且M列不包含关键词
if diff_value == "追加" and l_no_fill and not contains_keyword:
stats["rule28"] += 1
self.rule_stats["rule28"]["count"] += 1
# 规则29: L列="追加"且有颜色填充且M列包含关键词
if diff_value == "追加" and not l_no_fill and contains_keyword:
stats["rule29"] += 1
self.rule_stats["rule29"]["count"] += 1
# 规则30: L列="追加"且有颜色填充且M列不包含关键词
if diff_value == "追加" and not l_no_fill and not contains_keyword:
stats["rule30"] += 1
self.rule_stats["rule30"]["count"] += 1
# 规则31: L列="変更"且无颜色填充且M列包含关键词
if diff_value == "変更" and l_no_fill and contains_keyword:
stats["rule31"] += 1
self.rule_stats["rule31"]["count"] += 1
# 规则32: L列="変更"且无颜色填充且M列不包含关键词
if diff_value == "変更" and l_no_fill and not contains_keyword:
stats["rule32"] += 1
self.rule_stats["rule32"]["count"] += 1
# 规则33: L列="変更"且有颜色填充且M列包含关键词
if diff_value == "変更" and not l_no_fill and contains_keyword:
stats["rule33"] += 1
self.rule_stats["rule33"]["count"] += 1
# 规则34: L列="変更"且有颜色填充且M列不包含关键词
if diff_value == "変更" and not l_no_fill and not contains_keyword:
stats["rule34"] += 1
self.rule_stats["rule34"]["count"] += 1
# 规则35: L列="差分無し"的计数
if diff_value == "差分無し":
stats["rule35"] += 1
self.rule_stats["rule35"]["count"] += 1
# 规则36: N列="有意"且无颜色填充且M列包含关键词
if valid_value == "有意" and n_no_fill and contains_keyword:
stats["rule36"] += 1
self.rule_stats["rule36"]["count"] += 1
# 规则37: N列="有意"且无颜色填充且M列不包含关键词
if valid_value == "有意" and n_no_fill and not contains_keyword:
stats["rule37"] += 1
self.rule_stats["rule37"]["count"] += 1
# 规则38: N列="有意"且有颜色填充且M列包含关键词
if valid_value == "有意" and not n_no_fill and contains_keyword:
stats["rule38"] += 1
self.rule_stats["rule38"]["count"] += 1
# 规则39: N列="有意"且有颜色填充且M列不包含关键词
if valid_value == "有意" and not n_no_fill and not contains_keyword:
stats["rule39"] += 1
self.rule_stats["rule39"]["count"] += 1
# 规则40: N列="無効"且无颜色填充且M列包含关键词
if valid_value == "無効" and n_no_fill and contains_keyword:
stats["rule40"] += 1
self.rule_stats["rule40"]["count"] += 1
# 规则41: N列="無効"且无颜色填充且M列不包含关键词
if valid_value == "無効" and n_no_fill and not contains_keyword:
stats["rule41"] += 1
self.rule_stats["rule41"]["count"] += 1
# 规则42: N列="無効"且有颜色填充且M列包含关键词
if valid_value == "無効" and not n_no_fill and contains_keyword:
stats["rule42"] += 1
self.rule_stats["rule42"]["count"] += 1
# 规则43: N列="無効"且有颜色填充且M列不包含关键词
if valid_value == "無効" and not n_no_fill and not contains_keyword:
stats["rule43"] += 1
self.rule_stats["rule43"]["count"] += 1
# 规则44: N列="有意"且O列无颜色填充且M列包含关键词
if valid_value == "有意" and o_no_fill and contains_keyword:
stats["rule44"] += 1
self.rule_stats["rule44"]["count"] += 1
# 规则45: N列="有意"且O列无颜色填充且M列不包含关键词
if valid_value == "有意" and o_no_fill and not contains_keyword:
stats["rule45"] += 1
self.rule_stats["rule45"]["count"] += 1
# 规则46: N列="有意"且O列有颜色填充且M列包含关键词
if valid_value == "有意" and not o_no_fill and contains_keyword:
stats["rule46"] += 1
self.rule_stats["rule46"]["count"] += 1
# 规则47: N列="有意"且O列有颜色填充且M列不包含关键词
if valid_value == "有意" and not o_no_fill and not contains_keyword:
stats["rule47"] += 1
self.rule_stats["rule47"]["count"] += 1
# 规则48: N列="無効"且O列无颜色填充且M列包含关键词
if valid_value == "無効" and o_no_fill and contains_keyword:
stats["rule48"] += 1
self.rule_stats["rule48"]["count"] += 1
# 规则49: N列="無効"且O列无颜色填充且M列不包含关键词
if valid_value == "無効" and o_no_fill and not contains_keyword:
stats["rule49"] += 1
self.rule_stats["rule49"]["count"] += 1
# 规则50: N列="無効"且O列有颜色填充且M列包含关键词
if valid_value == "無効" and not o_no_fill and contains_keyword:
stats["rule50"] += 1
self.rule_stats["rule50"]["count"] += 1
# 规则51: N列="無効"且O列有颜色填充且M列不包含关键词
if valid_value == "無効" and not o_no_fill and not contains_keyword:
stats["rule51"] += 1
self.rule_stats["rule51"]["count"] += 1
# 规则52: P列无颜色填充且M列包含关键词
if p_no_fill and contains_keyword:
stats["rule52"] += 1
self.rule_stats["rule52"]["count"] += 1
# 规则53: P列无颜色填充且M列不包含关键词
if p_no_fill and not contains_keyword:
stats["rule53"] += 1
self.rule_stats["rule53"]["count"] += 1
# 规则54: P列颜色为猩红且M列包含关键词
if p_color_name == "猩红" and contains_keyword:
stats["rule54"] += 1
self.rule_stats["rule54"]["count"] += 1
# 规则55: P列颜色为猩红且M列不包含关键词
if p_color_name == "猩红" and not contains_keyword:
stats["rule55"] += 1
self.rule_stats["rule55"]["count"] += 1
# 规则56: P列颜色为钢蓝且M列包含关键词
if p_color_name == "钢蓝" and contains_keyword:
stats["rule56"] += 1
self.rule_stats["rule56"]["count"] += 1
# 规则57: P列颜色为钢蓝且M列不包含关键词
if p_color_name == "钢蓝" and not contains_keyword:
stats["rule57"] += 1
self.rule_stats["rule57"]["count"] += 1
# 规则58: P列颜色为巧克力黄且M列包含关键词
if p_color_name == "巧克力黄" and contains_keyword:
stats["rule58"] += 1
self.rule_stats["rule58"]["count"] += 1
# 规则59: P列颜色为巧克力黄且M列不包含关键词
if p_color_name == "巧克力黄" and not contains_keyword:
stats["rule59"] += 1
self.rule_stats["rule59"]["count"] += 1
# 在遍历所有数据行后,添加规则统计日志
self.log_rule_stats(file_path)
# 处理统计结果
for rule, data in stats.items():
# 对于规则1-18,输出备注值及其出现次数
if rule in ["diff_no_fill", "diff_fill", "diff_add_no_fill", "diff_add_fill",
"diff_change_no_fill", "diff_change_fill", "diff_delete_no_fill",
"diff_delete_fill", "valid_yes_no_fill", "valid_yes_fill",
"valid_no_no_fill", "valid_no_fill", "valid_yes_reason_no_fill",
"valid_yes_reason_fill", "valid_no_reason_no_fill",
"valid_no_reason_fill", "background_no_fill", "background_fill"]:
# 检查是否有数据
if not data: # 字典为空
results[rule] = "/"
self.logger.info(f"{rule}: 无数据,输出 '/'")
continue
# 格式化备注统计结果
results[rule] = self.format_note_stats(data)
# 优化日志输出:分行显示
self.logger.info(f"{rule}: 结果:")
for line in results[rule].split('\n'):
self.logger.info(f" {line}")
else:
# 对于规则19-59,直接输出计数值
results[rule] = str(data)
self.logger.info(f"{rule}: 结果 {results[rule]}")
# 生成颜色报告
color_report = self.color_detector.generate_color_report()
self.logger.info("\n" + color_report)
return results, color_report, missing_data
except Exception as e:
error_msg = f"处理SCL文件失败: {str(e)}"
logger.exception(f"处理SCL文件失败: {file_path} - {str(e)}")
# 返回错误信息
return {rule: f"错误: {str(e)}" for rule in self.RULE_MAPPING}, f"错误: {error_msg}", [{
'row': 0,
'col': 0,
'col_name': '错误',
'message': error_msg
}]
# 创建全局日志记录器
logger = logging.getLogger("SCLMultiProcessor")
logger.setLevel(logging.INFO)
class SCLMultiProcessor:
def __init__(self, root):
self.root = root
# 创建日志目录
self.log_dir = "logs"
self.root.title("SCL文件处理系统")
self.root.geometry("1100x750") # 增加窗口尺寸以适应新控件
# 初始化变量
self.color_detector = EnhancedColorDetector()
self.stats_processor = SCLRuleProcessor(self.color_detector)
self.empty_cell_detector = EnhancedEmptyCellDetector(self.color_detector)
self.progress_var = tk.DoubleVar()
os.makedirs(self.log_dir, exist_ok=True)
# 初始化日志系统
self.current_log_file = None
self.setup_logger()
# 创建主框架
self.main_frame = ttk.Frame(root, padding="10")
self.main_frame.pack(fill=tk.BOTH, expand=True)
# 创建UI
self.create_ui()
# 记录UI初始化完成
self.logger.info("用户界面初始化完成")
def toggle_config_fields(self):
"""根据操作模式显示/隐藏相关配置字段"""
mode = self.operation_mode.get()
# 统计模式:显示统计表路径,隐藏CheckSheet路径
if mode == "stats":
self.input_frame.pack(fill=tk.X, pady=5) # 显示统计表路径
self.checksheet_frame.pack_forget() # 隐藏CheckSheet路径
self.logger.info("切换到统计模式,显示统计表路径")
# SCL格式检查模式:隐藏统计表路径,显示CheckSheet路径
elif mode == "empty_check":
self.input_frame.pack_forget() # 隐藏统计表路径
self.checksheet_frame.pack(fill=tk.X, pady=5) # 显示CheckSheet路径
self.logger.info("切换到SCL格式检查模式,显示CheckSheet路径")
def create_ui(self):
"""创建用户界面"""
# 操作模式选择区域 - 放在最前面
mode_frame = ttk.LabelFrame(self.main_frame, text="操作模式", padding="10")
mode_frame.pack(fill=tk.X, pady=5)
# 添加操作模式单选按钮
self.operation_mode = tk.StringVar(value="stats") # 默认选择统计模式
ttk.Radiobutton(mode_frame, text="统计功能", variable=self.operation_mode,
value="stats", command=self.toggle_config_fields).pack(side=tk.LEFT, padx=10)
ttk.Radiobutton(mode_frame, text="SCL格式检查", variable=self.operation_mode,
value="empty_check", command=self.toggle_config_fields).pack(side=tk.LEFT, padx=10)
# 文件选择区域 - 放在操作模式后面
file_frame = ttk.LabelFrame(self.main_frame, text="文件选择", padding="10")
file_frame.pack(fill=tk.X, pady=5)
# 输入文件选择 - 统计表路径(统计模式需要)
self.input_frame = ttk.Frame(file_frame)
ttk.Label(self.input_frame, text="统计表:").pack(side=tk.LEFT, padx=5)
self.input_path_var = tk.StringVar()
input_entry = ttk.Entry(self.input_frame, textvariable=self.input_path_var, width=70)
input_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
ttk.Button(self.input_frame, text="浏览...", command=self.browse_input_file).pack(side=tk.LEFT, padx=5)
self.input_frame.pack(fill=tk.X, pady=5) # 默认显示
# 配置区域
config_frame = ttk.LabelFrame(self.main_frame, text="处理配置", padding="10")
config_frame.pack(fill=tk.X, pady=5)
# 添加SCL文件夹路径输入
scl_folder_frame = ttk.Frame(config_frame)
scl_folder_frame.pack(fill=tk.X, pady=5)
ttk.Label(scl_folder_frame, text="SCL文件夹路径:").grid(row=0, column=0, padx=5, sticky=tk.W)
self.scl_folder_var = tk.StringVar()
scl_folder_entry = ttk.Entry(scl_folder_frame, textvariable=self.scl_folder_var, width=60)
scl_folder_entry.grid(row=0, column=1, padx=5, sticky=tk.W)
ttk.Button(scl_folder_frame, text="浏览...", command=self.browse_scl_folder).grid(row=0, column=2, padx=5, sticky=tk.W)
# 搜索选项
search_frame = ttk.Frame(config_frame)
search_frame.pack(fill=tk.X, pady=5)
ttk.Label(search_frame, text="文件前缀:").grid(row=0, column=0, padx=5, sticky=tk.W)
self.prefix_var = tk.StringVar(value="SCL_")
ttk.Entry(search_frame, textvariable=self.prefix_var, width=10).grid(row=0, column=1, padx=5, sticky=tk.W)
# 添加CheckSheet路径输入(SCL格式检查模式需要)
self.checksheet_frame = ttk.Frame(config_frame)
ttk.Label(self.checksheet_frame, text="CheckSheet路径:").grid(row=0, column=0, padx=5, sticky=tk.W)
self.checksheet_path_var = tk.StringVar()
checksheet_entry = ttk.Entry(self.checksheet_frame, textvariable=self.checksheet_path_var, width=60)
checksheet_entry.grid(row=0, column=1, padx=5, sticky=tk.W)
ttk.Button(self.checksheet_frame, text="浏览...", command=self.browse_checksheet_path).grid(row=0, column=2, padx=5, sticky=tk.W)
self.checksheet_frame.pack_forget() # 默认隐藏
# 添加性能提示
ttk.Label(config_frame, text="(表头固定在第3行,数据从第4行开始)").pack(anchor=tk.W, padx=5, pady=2)
# 日志选项
log_frame = ttk.Frame(config_frame)
log_frame.pack(fill=tk.X, pady=5)
ttk.Label(log_frame, text="日志级别:").grid(row=0, column=0, padx=5, sticky=tk.W)
self.log_level_var = tk.StringVar(value="INFO")
log_level_combo = ttk.Combobox(
log_frame, textvariable=self.log_level_var, width=10, state="readonly"
)
log_level_combo['values'] = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
log_level_combo.grid(row=0, column=1, padx=5, sticky=tk.W)
log_level_combo.bind("<<ComboboxSelected>>", self.change_log_level)
# 处理按钮
btn_frame = ttk.Frame(self.main_frame)
btn_frame.pack(fill=tk.X, pady=10)
ttk.Button(btn_frame, text="开始处理", command=self.process_file).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="查看日志", command=self.view_log).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="导出配置", command=self.export_config).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="加载配置", command=self.load_config).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="退出", command=self.root.destroy).pack(side=tk.RIGHT, padx=5)
# 进度条
progress_frame = ttk.Frame(self.main_frame)
progress_frame.pack(fill=tk.X, pady=5)
ttk.Label(progress_frame, text="处理进度:").pack(side=tk.LEFT, padx=5)
self.progress_bar = ttk.Progressbar(
progress_frame, variable=self.progress_var, maximum=100, length=700
)
self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
self.progress_label = ttk.Label(progress_frame, text="0%")
self.progress_label.pack(side=tk.LEFT, padx=5)
# 结果展示区域
result_frame = ttk.LabelFrame(self.main_frame, text="处理结果", padding="10")
result_frame.pack(fill=tk.BOTH, expand=True, pady=5)
# 结果文本框
self.result_text = scrolledtext.ScrolledText(
result_frame, wrap=tk.WORD, height=20
)
self.result_text.pack(fill=tk.BOTH, expand=True)
self.result_text.config(state=tk.DISABLED)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(self.main_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(fill=tk.X, pady=5)
self.logger.info("UI创建完成")
def browse_checksheet_path(self):
"""浏览CheckSheet文件夹"""
folder_path = filedialog.askdirectory(title="选择CheckSheet文件夹")
if folder_path:
self.checksheet_path_var.set(folder_path)
self.logger.info(f"已选择CheckSheet文件夹: {folder_path}")
def setup_logger(self):
"""配置日志记录器"""
# 创建唯一日志文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.current_log_file = os.path.join(self.log_dir, f"scl_processor_{timestamp}.log")
# 创建或获取日志记录器
self.logger = logging.getLogger("SCLProcessor")
self.logger.setLevel(logging.INFO)
# 移除所有现有处理器
for handler in self.logger.handlers[:]:
self.logger.removeHandler(handler)
# 创建文件处理器
file_handler = logging.FileHandler(self.current_log_file, encoding='utf-8')
file_handler.setLevel(logging.INFO)
# 创建控制台处理器
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
# 创建日志格式
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# 添加处理器
self.logger.addHandler(file_handler)
self.logger.addHandler(console_handler)
# 记录日志初始化信息
self.logger.info(f"日志系统已初始化,日志文件: {self.current_log_file}")
self.logger.info(f"日志目录: {os.path.abspath(self.log_dir)}")
def change_log_level(self, event=None):
"""动态更改日志级别"""
try:
# 获取选择的日志级别
level_str = self.log_level_var.get()
log_level = getattr(logging, level_str.upper())
# 更新日志记录器级别
self.logger.setLevel(log_level)
# 更新所有处理器的级别
for handler in self.logger.handlers:
handler.setLevel(log_level)
self.logger.info(f"日志级别已成功更改为: {level_str}")
self.status_var.set(f"日志级别: {level_str}")
except AttributeError:
self.logger.error(f"无效的日志级别: {level_str}")
messagebox.showerror("错误", f"无效的日志级别: {level_str}")
except Exception as e:
self.logger.exception("更改日志级别时发生错误")
messagebox.showerror("错误", f"更改日志级别失败: {str(e)}")
def browse_input_file(self):
"""浏览输入文件"""
file_path = filedialog.askopenfilename(
filetypes=[("Excel 文件", "*.xlsx *.xls"), ("所有文件", "*.*")]
)
if file_path:
self.input_path_var.set(file_path)
self.logger.info(f"已选择输入文件: {file_path}")
def browse_scl_folder(self):
"""浏览SCL文件夹"""
folder_path = filedialog.askdirectory(title="选择SCL文件夹")
if folder_path:
self.scl_folder_var.set(folder_path)
self.logger.info(f"已选择SCL文件夹: {folder_path}")
def highlight_cell(self, sheet, row, col, color="FFFF0000"):
"""为单元格设置背景色"""
try:
fill = PatternFill(start_color=color, end_color=color, fill_type="solid")
sheet.cell(row=row, column=col).fill = fill
return True
except Exception as e:
self.logger.error(f"设置单元格颜色失败: {str(e)}")
return False
def process_file(self):
"""处理文件 - 根据操作模式执行不同处理流程"""
operation_mode = self.operation_mode.get()
# 重置结果
self.result_text.config(state=tk.NORMAL)
self.result_text.delete(1.0, tk.END)
self.result_text.insert(tk.END, "开始处理...\n")
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
self.status_var.set("开始处理文件...")
self.root.update()
try:
# 每次处理前重新初始化日志系统
self.setup_logger()
# 记录处理开始信息
self.logger.info("=" * 50)
self.logger.info(f"开始处理: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
self.logger.info(f"操作模式: {operation_mode}")
self.logger.info("=" * 50)
# 更新UI显示当前日志文件
self.status_var.set(f"当前日志: {os.path.basename(self.current_log_file)}")
# 获取公共配置参数
scl_folder = self.scl_folder_var.get()
prefix = self.prefix_var.get()
# 根据操作模式执行不同处理流程
if operation_mode == "stats":
self.process_stats_mode(scl_folder, prefix)
elif operation_mode == "empty_check":
self.process_empty_check_mode(scl_folder, prefix)
else:
messagebox.showerror("错误", f"未知操作模式: {operation_mode}")
self.logger.error(f"未知操作模式: {operation_mode}")
except Exception as e:
error_msg = f"处理文件时出错: {str(e)}"
logger.exception(f"处理文件时出错: {str(e)}")
messagebox.showerror("错误", error_msg)
self.status_var.set(f"错误: {str(e)}")
# 更新结果文本框
self.result_text.config(state=tk.NORMAL)
self.result_text.insert(tk.END, f"\n错误: {error_msg}\n")
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
def process_stats_mode(self, scl_folder, prefix):
"""处理统计模式:扫描统计表E列获取SCL文件名"""
# 检查输入文件是否存在
input_file = self.input_path_var.get()
if not input_file:
messagebox.showwarning("警告", "请先选择统计表")
self.logger.warning("未选择统计表")
return
if not os.path.exists(input_file):
messagebox.showerror("错误", f"统计表文件不存在: {input_file}")
self.logger.error(f"统计表文件不存在: {input_file}")
return
# 使用openpyxl加载工作簿(保留格式)
wb = openpyxl.load_workbook(input_file)
sheet = wb.active
self.logger.info(f"工作簿加载成功, 工作表: {sheet.title}")
# 扫描E列(第5列)
total_rows = sheet.max_row
processed_count = 0
found_files = 0
problem_files = 0
self.logger.info(f"开始扫描E列, 总行数: {total_rows}")
start_time = time.time()
for row_idx in range(1, total_rows + 1):
# 更新进度
progress = (row_idx / total_rows) * 100
self.progress_var.set(progress)
self.progress_label.config(text=f"{progress:.1f}%")
self.status_var.set(f"处理行: {row_idx}/{total_rows}")
self.root.update()
cell = sheet.cell(row=row_idx, column=5)
cell_value = str(cell.value) if cell.value else ""
# 检查是否包含前缀的文件名
if prefix in cell_value:
# 改进的正则表达式:匹配包含空格的文件名
# 匹配模式:以prefix开头,后跟任意字符(包括空格),直到文件扩展名
pattern = re.compile(
fr'{prefix}[^\s]*\s*[^\s]*\.(?:xlsx|xls|xlsm)',
re.IGNORECASE
)
file_names = pattern.findall(cell_value)
self.logger.info(f"行 {row_idx}: 找到文件: {', '.join(file_names)}")
result_lines = []
file_has_problems = False # 标记当前行是否有问题文件
for file_name in file_names:
# 在SCL文件夹及其子目录中查找文件(保留文件名中的空格)
file_path = self.find_single_scl_file(scl_folder, file_name)
# 检查文件是否存在
if not file_path or not os.path.exists(file_path):
result_lines.append(f"{file_name}: 文件不存在")
self.logger.warning(f"文件不存在: {file_name}")
# 标记文件不存在的单元格为紫色
self.highlight_cell(sheet, row_idx, 5, "FF800080")
file_has_problems = True
problem_files += 1
continue
# 执行统计处理
results, color_report, missing_data = self.stats_processor.process_file(file_path)
# 如果有数据缺失
if missing_data:
file_has_problems = True
problem_files += 1
result_lines.append(f"{file_name}: 数据缺失!")
for item in missing_data:
result_lines.append(f" - {item['message']}")
self.logger.warning(item['message'])
else:
result_lines.append(f"{file_name}: 处理完成")
# 将结果写入主Excel文件的不同列
for rule_name, result_str in results.items():
target_col = self.stats_processor.RULE_MAPPING.get(rule_name)
if target_col:
target_cell = sheet.cell(row=row_idx, column=target_col)
target_cell.value = result_str
found_files += 1
# 如果该行有文件存在问题,将E列单元格标红
if file_has_problems:
self.highlight_cell(sheet, row_idx, 5)
self.logger.info(f"行 {row_idx} E列单元格标记为红色(存在问题)")
# 更新结果文本框
self.result_text.config(state=tk.NORMAL)
self.result_text.insert(
tk.END, f"行 {row_idx} 处理结果:\n" + "\n".join(result_lines) + "\n\n"
)
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
processed_count += 1
# 保存修改后的Excel文件
output_path = input_file.replace(".xlsx", "_processed.xlsx")
wb.save(output_path)
self.logger.info(f"结果已保存到: {output_path}")
elapsed_time = time.time() - start_time
status_msg = f"统计处理完成! 处理了 {processed_count} 个文件项, 耗时 {elapsed_time:.2f} 秒"
if problem_files > 0:
status_msg += f", {problem_files} 个文件存在问题"
self.status_var.set(status_msg)
self.logger.info(status_msg)
# 更新结果文本框
self.result_text.config(state=tk.NORMAL)
self.result_text.insert(
tk.END, f"\n{status_msg}\n"
f"结果已保存到: {output_path}\n"
)
self.result_text.see(tk.END)
self.result_text.config(state=tk.DISABLED)
messagebox.showinfo("完成", status_msg)
把rule_stats的含义替换成统计统计表的特殊列,比如规则1是要写到统计表的P列里面的,SCL文件从第六行开始读取,那数据也从第六行开始统计,直到SCL文件名也读取完的行