第3章 伪列:COLUMN_VALUE

本文深入解析了在SQL查询中利用COLUMN_VALUE伪列的方法,包括XMLTABLE和TABLE函数的应用,以及在嵌套类型中的使用场景。通过实例演示了如何获取内部嵌套表的标量值,并解释了COLUMN_VALUE在不同情境下的作用。
部署运行你感兴趣的模型镜像

原文地址:http://docs.oracle.com/cd/B19306_01/server.102/b14200/pseudocolumns004.htm

如果引用不带COLUMNS子句的XMLTABLE构造函数,或者使用TABLE函数引用一个标量嵌套表类型,数据库返回一个具有单一列的虚表。这个伪列的名称就是COLUMN_VALUE。

对于XMLTABLE,返回值的类型为XMLType。例如,以下两个语句是等价的,输出都表明了返回的列名为COLUMN_VALUE:

SELECT * FROM XMLTABLE('123');

COLUMN_VALUE
---------------------------------------
123

SELECT COLUMN_VALUE FROM (XMLTable('123'));

COLUMN_VALUE
----------------------------------------
123

对于TABLE函数,返回值的类型为集合元素。以下语句创建了一个两层的嵌套表,展示了这种情况下COLUMN_VALUE的用法:


 

以下语句使用COLUMN_VALUE查询phone类型:

SELECT t.COLUMN_VALUE from table(phone(1,2,3)) t;

COLUMN_VALUE
------------
          1
          2
          3

在嵌套类型中,可以在选择列表以及TABLE函数中使用COLUMN_VALUE伪列:

SELECT t.COLUMN_VALUE FROM
   TABLE(phone_list(phone(1,2,3))) p, TABLE(p.COLUMN_VALUE) t;
COLUMN_VALUE
------------
           1
           2
           3

关键字COLUMN_VALUE也表示没有列名或者属性名的内部嵌套表的标量值的名称,如下例所示。此时,COLUMN_VALUE不是一个伪列,而是一个实际的列名。

CREATE TABLE my_customers (
   cust_id       NUMBER,
   name          VARCHAR2(25),
   phone_numbers phone_list,
   credit_limit  NUMBER)
   NESTED TABLE phone_numbers STORE AS outer_ntab
   (NESTED TABLE COLUMN_VALUE STORE AS inner_ntab);


来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/24945919/viewspace-747914/,如需转载,请注明出处,否则将追究法律责任。

转载于:http://blog.itpub.net/24945919/viewspace-747914/

您可能感兴趣的与本文相关的镜像

Stable-Diffusion-3.5

Stable-Diffusion-3.5

图片生成
Stable-Diffusion

Stable Diffusion 3.5 (SD 3.5) 是由 Stability AI 推出的新一代文本到图像生成模型,相比 3.0 版本,它提升了图像质量、运行速度和硬件效率

import os import math from datetime import datetime, date from openpyxl import load_workbook, Workbook from openpyxl.drawing.image import Image as OpenpyxlImage from openpyxl.styles import Font # ================== 配置路径 ================== folder_path = r&#39;D:\Class\能管比对\导出文件_月&#39; folder_path2 = r&#39;D:\Class\能管比对\导出文件合并&#39; output_result = r&#39;D:\Class\能管比对\每日结果\高变异列_CV大于0.3_整列分析.xlsx&#39; # 创建输出目录 os.makedirs(os.path.dirname(output_result), exist_ok=True) # 获取今天零点时间用于过滤 today = datetime.combine(date.today(), datetime.min.time()) # ========== 第一阶段:清洗数据并保存到 folder_path2 ========== processed_count = 0 for filename in os.listdir(folder_path): if not filename.endswith(&#39;.xlsx&#39;) or filename.startswith(&#39;~$&#39;): continue src_file = os.path.join(folder_path, filename) dst_file = os.path.join(folder_path2, filename) try: wb = load_workbook(src_file) ws = wb.active data = [list(row) for row in ws.iter_rows(values_only=True)] if len(data) <= 1: continue header, rows = data[0], data[1:] cleaned_rows = [header] # 保留表头 for row in rows: if not row: continue cell_val = row[0] try: # 统一解析为 datetime 类型 if isinstance(cell_val, datetime): dt = cell_val else: # 处理字符串格式如 &#39;2025/4/1&#39; 或 &#39;2025-04-01&#39; str_val = str(cell_val).strip() dt = datetime.fromisoformat(str_val.replace(&#39;/&#39;, &#39;-&#39;)) if dt >= today: continue # 排除今天及以后的数据 except Exception: continue # 解析失败则跳过该行 cleaned_rows.append(row) # 保存清洗后的数据 new_wb = Workbook() new_ws = new_wb.active for r in cleaned_rows: new_ws.append(r) new_wb.save(dst_file) processed_count += 1 except Exception as e: print(f"⚠️ 处理 {filename} 时出错:{e}") print(f"✅ 共清洗并保存了 {processed_count} 个文件") # ========== 第二阶段:分析 CV > 0.3 的列,并记录图像来源 ========== results = [] image_sources = [] for filename in os.listdir(folder_path2): if not filename.endswith(&#39;.xlsx&#39;) or filename.startswith(&#39;~$&#39;): continue file_path = os.path.join(folder_path2, filename) filename_no_ext = os.path.splitext(filename)[0] png_path = os.path.join(folder_path2, f"{filename_no_ext}.png") try: wb = load_workbook(file_path) data = [list(row) for row in wb.active.iter_rows(values_only=True)] if len(data) < 2: continue header, rows = data[0], data[1:] # 提取各列数值数据 col_data = [[] for _ in header] for row in rows: for i, v in enumerate(row): try: col_data[i].append(float(v)) except (TypeError, ValueError): pass # 非数值跳过 # 分析第2列开始(索引1起)的变异系数 for idx, col_name in enumerate(header[1:], start=1): vals = col_data[idx] n = len(vals) if n < 2: continue mean_val = sum(vals) / n if abs(mean_val) < 1e-8: # 避免除以0 continue variance = sum((x - mean_val) ** 2 for x in vals) / n std_val = math.sqrt(variance) cv = std_val / mean_val if cv > 0.3: max_val = max(vals) min_val = min(vals) # 构建待添加项 item = { &#39;来源文件&#39;: filename_no_ext, &#39;列名&#39;: col_name, &#39;有效数据点数&#39;: n, &#39;均值&#39;: round(mean_val, 6), &#39;标准差&#39;: round(std_val, 6), &#39;变异系数(CV)&#39;: round(cv, 6), &#39;最大值&#39;: round(max_val, 6), &#39;最小值&#39;: round(min_val, 6) } # === 特殊列白名单过滤逻辑 === # 替换原来的 if 判断 col_name_clean = str(col_name).strip() # 确保转字符串并去空格 if ( (&#39;1st DI耗用量&#39; in col_name_clean and max_val <= 1500) # (&#39;CCoolingTower用水量&#39; in col_name_clean and max_val <= 15000) or # (&#39;低温热水系统&#39; in col_name_clean and max_val <= 300000) ): continue # 跳过这些“伪异常” results.append(item) image_sources.append({ &#39;source_file&#39;: filename, &#39;png_path&#39;: png_path, &#39;exists&#39;: os.path.exists(png_path) }) except Exception as e: print(f"⚠️ 分析 {filename} 时出错:{e}") # ========== 第三阶段:生成最终结果 Excel ========== if results: result_wb = Workbook() sheet1 = result_wb.active sheet1.title = "高变异列数据" sheet1.append(list(results[0].keys())) for r in results: sheet1.append(list(r.values())) # 第二个工作表:展示图表 sheet2 = result_wb.create_sheet("对应图表展示") sheet2.cell(row=1, column=1, value="来源文件") sheet2.cell(row=1, column=2, value="图表展示") current_row = 2 inserted_images_count = 0 seen_files = set() # 去重,避免重复插入同一文件图表 for item in image_sources: source_file = item[&#39;source_file&#39;] if source_file in seen_files: continue seen_files.add(source_file) # 写入来源文件名(A列) sheet2.cell(row=current_row, column=1, value=os.path.splitext(source_file)[0]) # 插入图片或错误提示(B列) if item[&#39;exists&#39;]: try: img = OpenpyxlImage(item[&#39;png_path&#39;]) img.width *= 0.5 # 缩放图片防止过大 img.height *= 0.5 sheet2.add_image(img, f&#39;B{current_row}&#39;) inserted_images_count += 1 height_in_rows = int(img.height / 15) + 3 # 动态调整行高 except Exception as e: sheet2.cell(row=current_row, column=2, value=f"❌ 加载失败: {e}") sheet2.cell(row=current_row, column=2).font = Font(color="FF0000", size=9) height_in_rows = 5 else: sheet2.cell(row=current_row, column=2, value="❌ 图像未找到") sheet2.cell(row=current_row, column=2).font = Font(italic=True, color="888888", size=9) height_in_rows = 5 # 设置当前区域行高 for i in range(current_row, current_row + height_in_rows): sheet2.row_dimensions[i].height = 15 current_row += height_in_rows # 设置列宽 sheet2.column_dimensions[&#39;A&#39;].width = 50 sheet2.column_dimensions[&#39;B&#39;].width = 80 # 保存最终结果 result_wb.save(output_result) print(f"\n✅ 分析完成!共 {len(results)} 个高变异列已保存至:\n {output_result}") print(f"📊 共尝试插入 {len(seen_files)} 张图像,成功插入 {inserted_images_count} 张。") else: print("\n🟡 未发现变异系数大于 0.3 的列。") # ========== 第四阶段:后处理 - 按关键词+最大值条件过滤并删除对应行与图表 ========== import os from openpyxl.styles import Font # 输出路径 if not os.path.exists(output_result): print(f"❌ 后处理失败:未找到输出文件 {output_result}") else: from openpyxl.utils import coordinate_from_string, column_index_from_string wb = load_workbook(output_result) sheet1 = wb["高变异列数据"] sheet2 = wb["对应图表展示"] # 定义关键词与阈值映射关系:(keyword -> threshold) keyword_threshold_map = { &#39;纯水曲线看板-CCoolingTower用水量(中水+自来水&#39;: 1500, &#39;空调-6-冷热水-冷却水系统&#39;: 3500, # 可继续扩展:&#39;其他关键词&#39;: 1000 } # 获取表头以定位列索引 headers = [cell.value for cell in sheet1[1]] try: max_col_idx = headers.index(&#39;最大值&#39;) + 1 # openpyxl 列从1开始 source_col_idx = headers.index(&#39;来源文件&#39;) + 1 except ValueError as e: print(f"❌ 表头缺失: {e}") raise # 收集需要删除的“来源文件”名称(用于匹配 sheet2) files_to_remove = set() # 第一步:扫描 sheet1,标记要删除的行 rows_to_delete = [] # 存储行号(逆序删除) for row_idx, row in enumerate(sheet1.iter_rows(min_row=2, max_row=sheet1.max_row), start=2): source_cell = row[source_col_idx - 1] max_cell = row[max_col_idx - 1] source_value = str(source_cell.value) if source_cell.value else "" try: max_value = float(max_cell.value) except (TypeError, ValueError): max_value = float(&#39;inf&#39;) # 非数字视为不满足条件 # 检查是否匹配任一关键词 + 数值条件 for keyword, threshold in keyword_threshold_map.items(): if keyword in source_value and max_value < threshold: rows_to_delete.append(row_idx) files_to_remove.add(os.path.splitext(str(source_value))[0]) # 去掉扩展名 break # 匹配一个即可 # 第二步:从下往上删除 sheet1 的行(避免索引错乱) for row_idx in sorted(rows_to_delete, reverse=True): sheet1.delete_rows(row_idx) print(f"🗑️ 已删除 {len(rows_to_delete)} 行不符合条件的数据") # 第三步:清理 sheet2 中对应的图表块 current_row = 2 deleted_image_blocks = 0 while current_row <= sheet2.max_row: cell_a = sheet2.cell(row=current_row, column=1) if cell_a.value and str(cell_a.value) in files_to_remove: # 找到该区块的高度(查找下一个非空A列之前有多少行) block_height = 1 next_check_row = current_row + 1 while next_check_row <= sheet2.max_row: next_cell_a = sheet2.cell(row=next_check_row, column=1) if next_cell_a.value is not None: break block_height += 1 # 删除整个 block 占据的行 sheet2.delete_rows(current_row, block_height) deleted_image_blocks += 1 # 注意:current_row 不变,因为删除后新内容已上移 else: current_row += 1 print(f"🖼️ 已删除 {deleted_image_blocks} 个对应的图表区块") # 保存最终文件 wb.save(output_result) print(f"✅ 清理完成:符合条件的数据及图表已删除 → {output_result}") ——"D:\Program Files\Python39\python.exe" C:\Users\E918928\PycharmProjects\pythonProject\LZRR\代码1、\能管检查\数据检测.py ✅ 共清洗并保存了 30 个文件 ✅ 分析完成!共 16 个高变异列已保存至: D:\Class\能管比对\每日结果\高变异列_CV大于0.3_整列分析.xlsx 📊 共尝试插入 13 张图像,成功插入 13 张。 Traceback (most recent call last): File "C:\Users\E918928\PycharmProjects\pythonProject\LZRR\代码1、\能管检查\数据检测.py", line 427, in <module> from openpyxl.utils import coordinate_from_string, column_index_from_string ImportError: cannot import name &#39;coordinate_from_string&#39; from &#39;openpyxl.utils&#39; (D:\Program Files\Python39\lib\site-packages\openpyxl\utils\__init__.py) Process finished with exit code 1
11-26
import os import openpyxl from openpyxl.styles import Font from openpyxl.formatting.rule import ColorScaleRule from datetime import datetime, timedelta import pandas as pd # 测试计数器 test_counter = {"font_success": 0, "font_failure": 0, "region_found": 0, "region_not_found": 0} # 1. 获取前一天的日期并创建新文件名 today = datetime.now() yesterday = today - timedelta(days=1) yesterday_str = yesterday.strftime("%Y%m%d") new_file_name = f"系统质检日报{yesterday_str}.xlsx" wb_new = openpyxl.Workbook() # 删除默认sheet if "Sheet" in wb_new.sheetnames: wb_new.remove(wb_new["Sheet"]) # 定义文件夹路径 folder_negative = "D:/质检:/负向违规率" folder_sop = "D:/质检:/SOP执行率" # 辅助函数:提取文件名括号中的内容 def extract_bracket_content(filename): if &#39;(&#39; in filename and &#39;)&#39; in filename: start = filename.find(&#39;(&#39;) + 1 end = filename.find(&#39;)&#39;) return filename[start:end] return os.path.splitext(filename)[0] # 辅助函数:查找区域范围(基于标题关键词) def find_table_range(ws, keyword, max_search_rows=50): """ 智能定位表格区域 (支持合并单元格标题) 返回: (列头行, 数据结束行, 起始列, 结束列) """ # 1. 搜索标题行 (支持合并单元格) title_cell = None for row in range(1, max_search_rows + 1): for col in range(1, ws.max_column + 1): cell = ws.cell(row=row, column=col) # 检查合并单元格 if cell.value and keyword in str(cell.value): title_cell = cell break # 检查是否为合并区域的一部分 for merged in ws.merged_cells.ranges: if (row, col) in merged: merged_cell = ws.cell(merged.min_row, merged.min_col) if merged_cell.value and keyword in str(merged_cell.value): title_cell = merged_cell break if title_cell: break if not title_cell: print(f"⚠️ 未找到包含&#39;{keyword}&#39;的标题") return None # 2. 确定标题区域范围 start_col = title_cell.column end_col = start_col # 处理合并单元格 merged_range = None for merged in ws.merged_cells.ranges: if (title_cell.row, title_cell.column) in merged: merged_range = merged start_col = merged.min_col end_col = merged.max_col break # 未合并则向右扫描 if not merged_range: while end_col < ws.max_column: next_cell = ws.cell(row=title_cell.row, column=end_col + 1) # 允许空白单元格但必须存在列名 if next_cell.value is None and ws.cell(row=title_cell.row + 1, column=end_col + 1).value is None: break end_col += 1 # 3. 定位列头行和数据区域 header_row = title_cell.row + 1 # 列名在标题下一行 # 查找数据结束行 (从header_row+1开始) data_end_row = header_row for row in range(header_row + 1, ws.max_row + 1): # 检查起始列是否有值 (避免空行干扰) if ws.cell(row=row, column=start_col).value is not None: data_end_row = row else: break print(f"✅ 定位成功: {keyword}") print(f" 标题位置: {title_cell.coordinate} (合并区域: {f&#39;{start_col}-{end_col}&#39; if merged_range else &#39;无&#39;})") print(f" 列头行: {header_row}, 数据行: {header_row + 1}-{data_end_row}") print(f" 列范围: {start_col}-{end_col}") return (header_row, data_end_row, start_col, end_col) # 2. 遍历文件夹并处理文件 folders = [folder_negative, folder_sop] for folder in folders: files = [f for f in os.listdir(folder) if f.endswith((&#39;.xlsx&#39;, &#39;.xls&#39;))] print(f"\n处理文件夹: {folder}") print(f"找到 {len(files)} 个文件") for file_idx, file in enumerate(files, 1): file_path = os.path.join(folder, file) print(f"\n处理文件 {file_idx}/{len(files)}: {file}") try: wb_source = openpyxl.load_workbook(file_path, data_only=True) if "统计" in wb_source.sheetnames: ws_source = wb_source["统计"] sheet_name = "统计-" + extract_bracket_content(file) ws_new = wb_new.create_sheet(title=sheet_name) print(f"创建新工作表: {sheet_name}") # 复制内容和样式 for row in ws_source.iter_rows(): for cell in row: new_cell = ws_new.cell(row=cell.row, column=cell.column, value=cell.value) if cell.has_style: new_cell.font = cell.font.copy() new_cell.border = cell.border.copy() new_cell.fill = cell.fill.copy() new_cell.number_format = cell.number_format new_cell.alignment = cell.alignment.copy() # === 测试点1: 初始字体测试 === test_cell = ws_new.cell(row=1, column=1) print("\n[初始字体测试]") print(f"初始字体: 名称={test_cell.font.name}, 大小={test_cell.font.size}") # 应用全局格式:字体微软雅黑,字号10 font = Font(name=&#39;微软雅黑&#39;, size=10) for row in ws_new.iter_rows(): for cell in row: cell.font = font # === 测试点2: 字体设置验证 === test_cell = ws_new.cell(row=1, column=1) print("\n[字体设置测试]") if test_cell.font.name == &#39;微软雅黑&#39; and test_cell.font.size == 10: print("√ 字体设置成功: 微软雅黑/10号") test_counter["font_success"] += 1 else: print(f"× 字体设置失败: 当前字体={test_cell.font.name}, 大小={test_cell.font.size}") test_counter["font_failure"] += 1 # 5. 根据文件夹类型处理特定区域 if folder == folder_negative: print("\n处理负向违规率文件") # 坐席维度处理 range_seat = find_table_range(ws_new, "负向-坐席维度统计") if range_seat: start_row, end_row, start_col, end_col = range_seat headers = [ws_new.cell(row=start_row, column=c).value for c in range(start_col, end_col + 1)] if "汇总" in headers: col_idx = headers.index("汇总") + start_col print(f" 按&#39;汇总&#39;列排序 (列{col_idx})") # 提取数据 data = [] for r in range(start_row + 1, end_row + 1): row_data = [ws_new.cell(row=r, column=c).value for c in range(start_col, end_col + 1)] data.append(row_data) # === 测试点3: 排序前数据 === print(f" 排序前第一行汇总值: {data[0][headers.index(&#39;汇总&#39;)]}") # 创建DataFrame并排序 df = pd.DataFrame(data, columns=headers) df["汇总"] = pd.to_numeric(df["汇总"], errors=&#39;coerce&#39;) df_sorted = df.sort_values(by="汇总", ascending=True) # === 测试点4: 排序验证 === sorted_values = df_sorted["汇总"].tolist() print(f" 排序验证: 最小值={minc(sorted_values)}, 最大值={max(sorted_values)}") print(f" 排序后第一行汇总值: {sorted_values[0]}") # 写回排序后的数据 for r_idx, r in enumerate(range(start_row + 1, end_row + 1)): for c_idx, c in enumerate(range(start_col, end_col + 1)): ws_new.cell(row=r, column=c, value=df_sorted.iloc[r_idx, c_idx]) # 处理主任维度统计:设置“汇总”列色阶(绿-白-红) range_director = find_table_range(ws_new, "负向-主任维度统计") if range_director: start_row, end_row, start_col, end_col = range_director headers = [ws_new.cell(row=start_row, column=c).value for c in range(start_col, end_col + 1)] if "汇总" in headers: col_idx = headers.index("汇总") + start_col # 添加条件格式 rule = ColorScaleRule(start_type=&#39;min&#39;, start_color=&#39;00FF00&#39;, # 绿 mid_type=&#39;percentile&#39;, mid_value=50, mid_color=&#39;FFFFFF&#39;, # 白 end_type=&#39;max&#39;, end_color=&#39;FF0000&#39;) # 红 ws_new.conditional_formatting.add(f"{openpyxl.utils.get_column_letter(col_idx)}{start_row + 1}:{openpyxl.utils.get_column_letter(col_idx)}{end_row}", rule) # === 测试点5: 条件格式验证 === cf_ranges = [cf.cells for cf in ws_new.conditional_formatting] print(f" 条件格式规则数量: {len(ws_new.conditional_formatting)}") print(f" 最新规则范围: {ws_new.conditional_formatting._cf_rules[-1].cells}") # 如果文件名包含“说辞合规性-非成功单”,删除所有区域的“汇总”列 if "说辞合规性-非成功单" in file: keywords = ["负向-模型维度统计", "负向-科室维度统计", "负向-主任维度统计", "负向-坐席维度统计"] for keyword in keywords: region_range = find_table_range(ws_new, keyword) if region_range: start_row, end_row, start_col, end_col = region_range headers = [ws_new.cell(row=start_row, column=c).value for c in range(start_col, end_col + 1)] if "汇总" in headers: col_idx = headers.index("汇总") + start_col ws_new.delete_cols(col_idx, 1) # 删除列 # === 测试点6: 列删除验证 === new_headers = [ws_new.cell(row=start_row, column=c).value for c in range(start_col, end_col)] if "汇总" not in new_headers: print(" √ 汇总列删除成功") else: print(" × 汇总列仍然存在") elif folder == folder_sop: print("\n处理SOP执行率文件") # 处理坐席维度统计:按“需关联外部话术的执行率”列升序 range_seat = find_table_range(ws_new, "SOP-坐席维度统计") if range_seat: start_row, end_row, start_col, end_col = range_seat data = [] for r in range(start_row, end_row + 1): row_data = [ws_new.cell(row=r, column=c).value for c in range(start_col, end_col + 1)] data.append(row_data) df = pd.DataFrame(data[1:], columns=data[0]) if "需关联外部话术的执行率" in df.columns: df = df.sort_values(by="需关联外部话术的执行率", ascending=True) for r_idx in range(1, len(df) + 1): for c_idx, c in enumerate(range(start_col, end_col + 1)): ws_new.cell(row=start_row + r_idx, column=c).value = df.iloc[r_idx - 1, c_idx] # 处理主任维度统计:设置“汇总”列色阶(红-白-绿) range_director = find_table_range(ws_new, "SOP-主任维度统计") if range_director: start_row, end_row, start_col, end_col = range_director headers = [ws_new.cell(row=start_row, column=c).value for c in range(start_col, end_col + 1)] if "汇总" in headers: col_idx = headers.index("汇总") + start_col rule = ColorScaleRule(start_type=&#39;min&#39;, start_color=&#39;FF0000&#39;, # 红 mid_type=&#39;percentile&#39;, mid_value=50, mid_color=&#39;FFFFFF&#39;, # 白 end_type=&#39;max&#39;, end_color=&#39;00FF00&#39;) # 绿 ws_new.conditional_formatting.add(f"{openpyxl.utils.get_column_letter(col_idx)}{start_row + 1}:{openpyxl.utils.get_column_letter(col_idx)}{end_row}", rule) file_count += 1 else: print(f" 文件 {file} 中缺少&#39;统计&#39;工作表") except Exception as e: print(f"处理文件 {file} 时出错: {str(e)}") # === 错误分析 === import traceback error_type = type(e).__name__ print(f"错误类型: {error_type}") print("错误追踪:") traceback.print_exc() # 保存新文件 wb_new.save(new_file_name) print(f"\n文件已创建: {new_file_name}") # 最终测试报告 print("\n==== 测试报告 ====") print(f"字体设置成功: {test_counter[&#39;font_success&#39;]} 次") print(f"字体设置失败: {test_counter[&#39;font_failure&#39;]} 次") print(f"区域成功定位: {test_counter[&#39;region_found&#39;]} 次") print(f"区域未找到: {test_counter[&#39;region_not_found&#39;]} 次") print("==================") 这段代码,只有字体设置成功了,后面的色阶设置,排序和删除操作都没有成功,请帮我检查错误原因,并改正,表结构:第一行是标题,第二行是列名,第三行开始是数据,请加一个功能,帮我把标题行在当前有数据的区域合并居中,谢谢
11-18
class SCLRuleProcessor: """SCL规则处理器,包含详细的颜色映射逻辑""" def __init__(self, color_detector=None): self.color_detector = color_detector or EnhancedColorDetector(tolerance=30) # 创建专用日志记录器 self.logger = logging.getLogger(&#39;SCLProcessor.RuleProcessor&#39;) if not self.logger.handlers: # 如果没有处理器,添加控制台处理器 console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) formatter = logging.Formatter( &#39;%(asctime)s - %(name)s - %(levelname)s - %(message)s&#39;, datefmt=&#39;%Y-%m-%d %H:%M:%S&#39; ) console_handler.setFormatter(formatter) self.logger.addHandler(console_handler) self.logger.setLevel(logging.INFO) # 固定列位置 self.COLUMN_MAPPING = { "差分種別": 12, # L列 "变更内容": 13, # M列 "判断列": 14, # N列 "判断理由": 15, # O列 "变更背景": 16, # P列 "备注": 17 # Q列 } # 新增规则的关键词 - 添加更多变体 self.keywords = [ "波形図の尾の延長、非機能変更と判定されました。", "波形図の尾の延長", "非機能変更", "選択肢的补充说明,不属于功能变更。", "补充说明", "不属于功能变更", "空白行が追加され、非機能変更と判定された", "空白行追加", "空行が削除され、非機能変更と判定された", "空行削除", "无效更改,判定为无功能变更。", "无效更改", "書式変更で機能変更ではないと判断されました。", "書式変更", "仅修改了背景色,不影响软件设计,判定为无功能变更。", "仅修改背景色", "背景色修改", "非功能变更", "无功能变更", "書式調整", "非機能修正" ] # 创建关键词正则表达式模式 self.keyword_pattern = self._create_keyword_pattern() # 规则映射到目标列 self.RULE_MAPPING = { # 规则1-18: 备注值统计 "diff_no_fill": 16, # P列 "diff_fill": 23, # W列 "diff_add_no_fill": 27, # AA列 "diff_add_fill": 30, # AD列 "diff_change_no_fill": 34, # AH列 "diff_change_fill": 37, # AK列 "diff_delete_no_fill": 42, # AP列 "diff_delete_fill": 45, # AS列 "valid_yes_no_fill": 50, # AX列 "valid_yes_fill": 53, # BA列 "valid_no_no_fill": 57, # BE列 "valid_no_fill": 60, # BH列 "valid_yes_reason_no_fill": 64, # BL列 "valid_yes_reason_fill": 67, # BO列 "valid_no_reason_no_fill": 71, # BS列 "valid_no_reason_fill": 74, # BV列 "background_no_fill": 78, # BZ列 "background_fill": 85, # CG列, # 规则19-59: 计数统计 "rule19": 14, # N列 "rule20": 15, # O列 "rule21": 17, # Q列 "rule22": 18, # R列 "rule23": 19, # S列 "rule24": 20, # T列 "rule25": 21, # U列 "rule26": 22, # V列 "rule27": 25, # Y列 "rule28": 26, # Z列 "rule29": 28, # AB列 "rule30": 29, # AC列 "rule31": 32, # AF列 "rule32": 33, # AG列 "rule33": 35, # AI列 "rule34": 36, # AJ列 "rule35": 47, # AU列 "rule36": 48, # AV列 "rule37": 49, # AW列 "rule38": 51, # AY列 "rule39": 52, # AZ列 "rule40": 55, # BC列 "rule41": 56, # BD列 "rule42": 58, # BF列 "rule43": 59, # BG列 "rule44": 62, # BJ列 "rule45": 63, # BK列 "rule46": 65, # BM列 "rule47": 66, # BN列 "rule48": 69, # BQ列 "rule49": 70, # BR列 "rule50": 72, # BT列 "rule51": 73, # BU列 "rule52": 76, # BX列 "rule53": 77, # BY列 "rule54": 79, # CA列 "rule55": 80, # CB列 "rule56": 81, # CC列 "rule57": 82, # CD列 "rule58": 83, # CE列 "rule59": 84, # CF列 "rule60": 40, # AF列 "rule61": 41, # AG列 "rule62": 43, # AI列 "rule63": 44, # AJ列 "rule64": 7, # G列 "rule65": 8, # H列 "rule66": 9, # I列 } self.HEADER_ROW = 3 # 表头固定在第三行 self.DATA_START_ROW = 4 # 数据从第四行开始 def get_cell_color_status(self, cell, column_name=None): """ 获取单元格的颜色状态(增强版) 返回: (颜色名称, 颜色HEX值, 是否无填充) column_name: 列名,用于确定是否进行特殊颜色判断 """ try: # 检查是否无填充 if self.color_detector.is_no_fill(cell): return "无填充", None, True # 获取单元格颜色 hex_color = self.color_detector.get_cell_color(cell) if not hex_color: return "未知", None, False # 只有M列和P列需要特殊颜色判断 if column_name in ["变更内容", "变更背景"]: # 检查是否为特定颜色 if self.color_detector.is_specific_color(cell, self.COLOR_CRIMSON): return "猩红", self.COLOR_CRIMSON, False if self.color_detector.is_specific_color(cell, self.COLOR_STEEL_BLUE): return "钢蓝", self.COLOR_STEEL_BLUE, False if self.color_detector.is_specific_color(cell, self.COLOR_CHOCOLATE): return "巧克力黄", self.COLOR_CHOCOLATE, False # 匹配最接近的已知颜色 closest_name = self.color_detector.match_color_name(hex_color) return closest_name, hex_color, False except Exception as e: self.logger.error(f"获取单元格颜色状态失败: {str(e)}") return "错误", None, False def _create_keyword_pattern(self): """创建关键词匹配的正则表达式模式""" # 转义特殊字符并创建模式 escaped_keywords = [re.escape(kw) for kw in self.keywords] # 创建不区分大小写的模式 pattern_str = "|".join(escaped_keywords) return re.compile(pattern_str, re.IGNORECASE) def contains_keyword(self, text): """ 使用正则表达式检查文本是否包含任意关键词(更健壮的匹配) 返回: 匹配结果和匹配到的关键词列表 """ if not text: return False, [] text = str(text).strip() matches = self.keyword_pattern.findall(text) return bool(matches), matches def split_note_values(self, note_value): """拆分备注值,支持多种分隔符""" if not note_value: return [] # 支持的分隔符:中文顿号、全角逗号、半角逗号、分号、空格 separators = [&#39;、&#39;, &#39;,&#39;, &#39;,&#39;, &#39;;&#39;, &#39;;&#39;, &#39; &#39;] # 替换所有分隔符为统一的分隔符 normalized = str(note_value) for sep in separators: normalized = normalized.replace(sep, &#39;|&#39;) # 分割并过滤空值 values = [v.strip() for v in normalized.split(&#39;|&#39;) if v.strip()] # 过滤掉值为“-”的项 values = [v for v in values if v != "-"] return values def format_note_stats(self, data_dict): """格式化备注统计结果,确保每个值一行""" try: # 尝试按数值排序 sorted_items = sorted( data_dict.items(), key=lambda x: int(x[0]) if x[0].isdigit() else x[0] ) except Exception: # 如果无法转换为数字,则按字符串排序 sorted_items = sorted(data_dict.items()) # 格式化为每行一个值 return "\n".join([f"{value},{count}" for value, count in sorted_items]) def create_column_letter_map(self): """创建列号到列字母的映射""" column_letter_map = {} # 生成A-Z列 for i in range(1, 27): column_letter_map[i] = chr(64 + i) # 生成AA-AZ列 for i in range(1, 27): column_letter_map[26 + i] = f"A{chr(64 + i)}" # 生成BA-BZ列 for i in range(1, 27): column_letter_map[52 + i] = f"B{chr(64 + i)}" # 生成CA-CZ列 for i in range(1, 27): column_letter_map[78 + i] = f"C{chr(64 + i)}" # 添加已知的特殊列 column_letter_map.update({ 16: "P", 23: "W", 27: "AA", 30: "AD", 34: "AH", 37: "AK", 42: "AP", 45: "AS", 50: "AX", 53: "BA", 57: "BE", 60: "BH", 62: "BL", 65: "BO", 71: "BS", 74: "BV", 78: "BZ", 85: "CG" }) return column_letter_map def log_cell_value_counts(self, rule_name, cell_stats): """记录规则对应的每个单元格值的计数""" total_count = sum(cell_stats.values()) self.logger.info(f"{rule_name}: 详细单元格统计 (共 {total_count} 个单元格)") # 按计数值降序排序 sorted_items = sorted(cell_stats.items(), key=lambda x: x[1], reverse=True) # 记录每个值的计数 for value, count in sorted_items: # 处理空值 display_value = "(空)" if value is None or value == "" else value self.logger.info(f" - {display_value}: {count} 次") # 记录唯一值计数 unique_values = len(cell_stats) self.logger.info(f" 共 {unique_values} 个唯一值") def process_file(self, file_path): """ 处理单个SCL文件并返回所有统计结果 返回: (results, color_report) - results: 规则统计结果 - color_report: 颜色分析报告 """ results = {} missing_data = [] # 存储缺失数据信息 try: self.logger.info(f"开始处理SCL文件: {file_path}") # 加载SCL文件 scl_wb = openpyxl.load_workbook(file_path) scl_sheet = scl_wb.active self.logger.info(f"工作表加载成功: {scl_sheet.title}, 总行数: {scl_sheet.max_row}") # 检查是否有足够的数据行 if scl_sheet.max_row < self.DATA_START_ROW: self.logger.info("文件没有数据行,跳过处理") return results, "", missing_data # 初始化统计结果 stats = { # 规则1-18: 备注值统计 "diff_no_fill": defaultdict(int), # 规则1: 変更内容无颜色填充 "diff_fill": defaultdict(int), # 规则2: 変更内容有颜色填充 "diff_add_no_fill": defaultdict(int), # 规则3: 差分种别="追加"且无颜色填充 "diff_add_fill": defaultdict(int), # 规则4: 差分种别="追加"且有颜色填充 "diff_change_no_fill": defaultdict(int), # 规则5: 差分种别="変更"且无颜色填充 "diff_change_fill": defaultdict(int), # 规则6: 差分种别="変更"且有颜色填充 "diff_delete_no_fill": defaultdict(int), # 规则7: 差分种别="削除"且无颜色填充 "diff_delete_fill": defaultdict(int), # 规则8: 差分种别="削除"且有颜色填充 "valid_yes_no_fill": defaultdict(int), # 规则9: 判断="有意"且无颜色填充 "valid_yes_fill": defaultdict(int), # 规则10: 判断="有意"且有颜色填充 "valid_no_no_fill": defaultdict(int), # 规则11: 判断="無効"且无颜色填充 "valid_no_fill": defaultdict(int), # 规则12: 判断="無効"且有颜色填充 "valid_yes_reason_no_fill": defaultdict(int), # 规则13: 判断="有意"且理由无颜色填充 "valid_yes_reason_fill": defaultdict(int), # 规则14: 判断="有意"且理由有颜色填充 "valid_no_reason_no_fill": defaultdict(int), # 规则15: 判断="無効"且理由无颜色填充 "valid_no_reason_fill": defaultdict(int), # 规则16: 判断="無効"且理由有颜色填充 "background_no_fill": defaultdict(int), # 规则17: 背景无颜色填充 "background_fill": defaultdict(int), # 规则18: 背景有颜色填充, # 规则19-59: 计数统计 "rule19": 0, # 规则19: 包含关键词且无颜色填充 "rule20": 0, # 规则20: 不包含关键词且无颜色填充 "rule21": 0, # 规则21: 包含关键词且猩红填充 "rule22": 0, # 规则22: 不包含关键词且猩红填充 "rule23": 0, # 规则23: 包含关键词且钢蓝填充 "rule24": 0, # 规则24: 不包含关键词且钢蓝填充 "rule25": 0, # 规则25: 包含关键词且巧克力黄填充 "rule26": 0, # 规则26: 不包含关键词且巧克力黄填充 "rule27": 0, # L列="追加"且无颜色填充且M列包含关键词 "rule28": 0, # L列="追加"且无颜色填充且M列不包含关键词 "rule29": 0, # L列="追加"且有颜色填充且M列包含关键词 "rule30": 0, # L列="追加"且有颜色填充且M列不包含关键词 "rule31": 0, # L列="変更"且无颜色填充且M列包含关键词 "rule32": 0, # L列="変更"且无颜色填充且M列不包含关键词 "rule33": 0, # L列="変更"且有颜色填充且M列包含关键词 "rule34": 0, # L列="変更"且有颜色填充且M列不包含关键词 "rule35": 0, # L列="差分無し"的计数 "rule36": 0, # N列="有意"且无颜色填充且M列包含关键词 "rule37": 0, # N列="有意"且无颜色填充且M列不包含关键词 "rule38": 0, # N列="有意"且有颜色填充且M列包含关键词 "rule39": 0, # N列="有意"且有颜色填充且M列不包含关键词 "rule40": 0, # N列="無効"且无颜色填充且M列包含关键词 "rule41": 0, # N列="無効"且无颜色填充且M列不包含关键词 "rule42": 0, # N列="無効"且有颜色填充且M列包含关键词 "rule43": 0, # N列="無効"且有颜色填充且M列不包含关键词 "rule44": 0, # N列="有意"且O列无颜色填充且M列包含关键词 "rule45": 0, # N列="有意"且O列无颜色填充且M列不包含关键词 "rule46": 0, # N列="有意"且O列有颜色填充且M列包含关键词 "rule47": 0, # N列="有意"且O列有颜色填充且M列不包含关键词 "rule48": 0, # N列="無効"且O列无颜色填充且M列包含关键词 "rule49": 0, # N列="無効"且O列无颜色填充且M列不包含关键词 "rule50": 0, # N列="無効"且O列有颜色填充且M列包含关键词 "rule51": 0, # N列="無効"且O列有颜色填充且M列不包含关键词 "rule52": 0, # P列无颜色填充且M列包含关键词 "rule53": 0, # P列无颜色填充且M列不包含关键词 "rule54": 0, # P列颜色为猩红且M列包含关键词 "rule55": 0, # P列颜色为猩红且M列不包含关键词 "rule56": 0, # P列颜色为钢蓝且M列包含关键词 "rule57": 0, # P列颜色为钢蓝且M列不包含关键词 "rule58": 0, # P列颜色为巧克力黄且M列包含关键词 "rule59": 0, # P列颜色为巧克力黄且M列不包含关键词 "rule60": 0, # L列="削除"且无颜色填充且M列包含关键词 "rule61": 0, # L列="削除"且无颜色填充且M列不包含关键词 "rule62": 0, # L列="削除"且有颜色填充且M列包含关键词 "rule63": 0, # L列="削除"且有颜色填充且M列不包含关键词 "rule64": 0, # M列有内容 "rule65": 0, # N列="有意" "rule66": 0, # 0 } all_notes_empty = True # 新增:标记整列备注是否全为空 # 遍历所有数据行 for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): # 获取所有需要的单元格 diff_content_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更内容"]) diff_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["差分種別"]) note_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["备注"]) valid_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断列"]) reason_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断理由"]) background_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更背景"]) # 获取单元格值 diff_value = str(diff_cell.value).strip() if diff_cell.value else "" note_value = str(note_cell.value).strip() if note_cell.value else "" valid_value = str(valid_cell.value).strip() if valid_cell.value else "" # 获取备注值并处理多值情况 note_values = self.split_note_values(note_value) # 获取M列值 m_value = str(diff_content_cell.value) if diff_content_cell.value else "" # 检查是否包含关键词(使用增强版匹配) contains_keyword, matched_keywords = self.contains_keyword(m_value) # 记录匹配结果(调试用) if contains_keyword: self.logger.debug(f"行 {row_idx} 匹配到关键词: {&#39;, &#39;.join(matched_keywords)}") else: self.logger.debug(f"行 {row_idx} 未匹配到关键词") # 获取单元格颜色状态 l_color_name, l_color_hex, l_no_fill = self.get_cell_color_status(diff_cell) m_color_name, m_color_hex, m_no_fill = self.get_cell_color_status(diff_content_cell) n_color_name, n_color_hex, n_no_fill = self.get_cell_color_status(valid_cell) o_color_name, o_color_hex, o_no_fill = self.get_cell_color_status(reason_cell) p_color_name, p_color_hex, p_no_fill = self.get_cell_color_status(background_cell) # 检查备注列是否有数据 if note_value: all_notes_empty = False # 规则1-18处理 # 规则1: 変更内容无颜色填充 if m_no_fill and note_values: for nv in note_values: stats["diff_no_fill"][nv] += 1 # 记录到规则统计 # 规则2: 変更内容有颜色填充 if not m_no_fill and note_values: for nv in note_values: stats["diff_fill"][nv] += 1 # 规则3: 差分种别="追加"且无颜色填充 if diff_value == "追加" and l_no_fill and note_values: for nv in note_values: stats["diff_add_no_fill"][nv] += 1 # 规则4: 差分种别="追加"且有颜色填充 if diff_value == "追加" and not l_no_fill and note_values: for nv in note_values: stats["diff_add_fill"][nv] += 1 # 规则5: 差分种别="変更"且无颜色填充 if diff_value == "変更" and l_no_fill and note_values: for nv in note_values: stats["diff_change_no_fill"][nv] += 1 # 规则6: 差分种别="変更"且有颜色填充 if diff_value == "変更" and not l_no_fill and note_values: for nv in note_values: stats["diff_change_fill"][nv] += 1 # 规则7: 差分种别="削除"且无颜色填充 if diff_value == "削除" and l_no_fill and note_values: for nv in note_values: stats["diff_delete_no_fill"][nv] += 1 # 规则8: 差分种别="削除"且有颜色填充 if diff_value == "削除" and not l_no_fill and note_values: for nv in note_values: stats["diff_delete_fill"][nv] += 1 # 规则9: 判断="有意"且无颜色填充 if valid_value == "有意" and n_no_fill and note_values: for nv in note_values: stats["valid_yes_no_fill"][nv] += 1 # 规则10: 判断="有意"且有颜色填充 if valid_value == "有意" and not n_no_fill and note_values: for nv in note_values: stats["valid_yes_fill"][nv] += 1 # 规则11: 判断="無効"且无颜色填充 if valid_value == "無効" and n_no_fill and note_values: for nv in note_values: stats["valid_no_no_fill"][nv] += 1 # 规则12: 判断="無効"且有颜色填充 if valid_value == "無効" and not n_no_fill and note_values: for nv in note_values: stats["valid_no_fill"][nv] += 1 # 规则13: 判断="有意"且理由无颜色填充 if valid_value == "有意" and o_no_fill and note_values: for nv in note_values: stats["valid_yes_reason_no_fill"][nv] += 1 # 规则14: 判断="有意"且理由有颜色填充 if valid_value == "有意" and not o_no_fill and note_values: for nv in note_values: stats["valid_yes_reason_fill"][nv] += 1 # 规则15: 判断="無効"且理由无颜色填充 if valid_value == "無効" and o_no_fill and note_values: for nv in note_values: stats["valid_no_reason_no_fill"][nv] += 1 # 规则16: 判断="無効"且理由有颜色填充 if valid_value == "無効" and not o_no_fill and note_values: for nv in note_values: stats["valid_no_reason_fill"][nv] += 1 # 规则17: 背景无颜色填充 if p_no_fill and note_values: for nv in note_values: stats["background_no_fill"][nv] += 1 # 规则18: 背景有颜色填充 if not p_no_fill and note_values: for nv in note_values: stats["background_fill"][nv] += 1 # 规则19-59处理 # 规则19: 包含关键词且无颜色填充 if contains_keyword and m_no_fill: stats["rule19"] += 1 # 规则20: 不包含关键词且无颜色填充 if not contains_keyword and m_no_fill: stats["rule20"] += 1 # 规则21: 包含关键词且猩红填充 if contains_keyword and m_color_name == "猩红": stats["rule21"] += 1 # 规则22: 不包含关键词且猩红填充 if not contains_keyword and m_color_name == "猩红": stats["rule22"] += 1 # 规则23: 包含关键词且钢蓝填充 if contains_keyword and m_color_name == "钢蓝": stats["rule23"] += 1 # 规则24: 不包含关键词且钢蓝填充 if not contains_keyword and m_color_name == "钢蓝": stats["rule24"] += 1 # 规则25: 包含关键词且巧克力黄填充 if contains_keyword and m_color_name == "巧克力黄": stats["rule25"] += 1 # 规则26: 不包含关键词且巧克力黄填充 if not contains_keyword and m_color_name == "巧克力黄": stats["rule26"] += 1 # 规则27: L列="追加"且无颜色填充且M列包含关键词 if diff_value == "追加" and l_no_fill and contains_keyword: stats["rule27"] += 1 # 规则28: L列="追加"且无颜色填充且M列不包含关键词 if diff_value == "追加" and l_no_fill and not contains_keyword: stats["rule28"] += 1 # 规则29: L列="追加"且有颜色填充且M列包含关键词 if diff_value == "追加" and not l_no_fill and contains_keyword: stats["rule29"] += 1 # 规则30: L列="追加"且有颜色填充且M列不包含关键词 if diff_value == "追加" and not l_no_fill and not contains_keyword: stats["rule30"] += 1 # 规则31: L列="変更"且无颜色填充且M列包含关键词 if diff_value == "変更" and l_no_fill and contains_keyword: stats["rule31"] += 1 # 规则32: L列="変更"且无颜色填充且M列不包含关键词 if diff_value == "変更" and l_no_fill and not contains_keyword: stats["rule32"] += 1 # 规则33: L列="変更"且有颜色填充且M列包含关键词 if diff_value == "変更" and not l_no_fill and contains_keyword: stats["rule33"] += 1 # 规则34: L列="変更"且有颜色填充且M列不包含关键词 if diff_value == "変更" and not l_no_fill and not contains_keyword: stats["rule34"] += 1 # 规则35: L列="差分無し"的计数 if diff_value == "差分無し": stats["rule35"] += 1 # 规则36: N列="有意"且无颜色填充且M列包含关键词 if valid_value == "有意" and n_no_fill and contains_keyword: stats["rule36"] += 1 # 规则37: N列="有意"且无颜色填充且M列不包含关键词 if valid_value == "有意" and n_no_fill and not contains_keyword: stats["rule37"] += 1 # 规则38: N列="有意"且有颜色填充且M列包含关键词 if valid_value == "有意" and not n_no_fill and contains_keyword: stats["rule38"] += 1 # 规则39: N列="有意"且有颜色填充且M列不包含关键词 if valid_value == "有意" and not n_no_fill and not contains_keyword: stats["rule39"] += 1 # 规则40: N列="無効"且无颜色填充且M列包含关键词 if valid_value == "無効" and n_no_fill and contains_keyword: stats["rule40"] += 1 # 规则41: N列="無効"且无颜色填充且M列不包含关键词 if valid_value == "無効" and n_no_fill and not contains_keyword: stats["rule41"] += 1 # 规则42: N列="無効"且有颜色填充且M列包含关键词 if valid_value == "無効" and not n_no_fill and contains_keyword: stats["rule42"] += 1 # 规则43: N列="無効"且有颜色填充且M列不包含关键词 if valid_value == "無効" and not n_no_fill and not contains_keyword: stats["rule43"] += 1 # 规则44: N列="有意"且O列无颜色填充且M列包含关键词 if valid_value == "有意" and o_no_fill and contains_keyword: stats["rule44"] += 1 # 规则45: N列="有意"且O列无颜色填充且M列不包含关键词 if valid_value == "有意" and o_no_fill and not contains_keyword: stats["rule45"] += 1 # 规则46: N列="有意"且O列有颜色填充且M列包含关键词 if valid_value == "有意" and not o_no_fill and contains_keyword: stats["rule46"] += 1 # 规则47: N列="有意"且O列有颜色填充且M列不包含关键词 if valid_value == "有意" and not o_no_fill and not contains_keyword: stats["rule47"] += 1 # 规则48: N列="無効"且O列无颜色填充且M列包含关键词 if valid_value == "無効" and o_no_fill and contains_keyword: stats["rule48"] += 1 # 规则49: N列="無効"且O列无颜色填充且M列不包含关键词 if valid_value == "無効" and o_no_fill and not contains_keyword: stats["rule49"] += 1 # 规则50: N列="無効"且O列有颜色填充且M列包含关键词 if valid_value == "無効" and not o_no_fill and contains_keyword: stats["rule50"] += 1 # 规则51: N列="無効"且O列有颜色填充且M列不包含关键词 if valid_value == "無効" and not o_no_fill and not contains_keyword: stats["rule51"] += 1 # 规则52: P列无颜色填充且M列包含关键词 if p_no_fill and contains_keyword: stats["rule52"] += 1 # 规则53: P列无颜色填充且M列不包含关键词 if p_no_fill and not contains_keyword: stats["rule53"] += 1 # 规则54: P列颜色为猩红且M列包含关键词 if p_color_name == "猩红" and contains_keyword: stats["rule54"] += 1 # 规则55: P列颜色为猩红且M列不包含关键词 if p_color_name == "猩红" and not contains_keyword: stats["rule55"] += 1 # 规则56: P列颜色为钢蓝且M列包含关键词 if p_color_name == "钢蓝" and contains_keyword: stats["rule56"] += 1 # 规则57: P列颜色为钢蓝且M列不包含关键词 if p_color_name == "钢蓝" and not contains_keyword: stats["rule57"] += 1 # 规则58: P列颜色为巧克力黄且M列包含关键词 if p_color_name == "巧克力黄" and contains_keyword: stats["rule58"] += 1 # 规则59: P列颜色为巧克力黄且M列不包含关键词 if p_color_name == "巧克力黄" and not contains_keyword: stats["rule59"] += 1 # 规则60: L列="削除"且无颜色填充且M列包含关键词 if diff_value == "削除" and l_no_fill and contains_keyword: stats["rule60"] += 1 # 规则61: L列="変更"且无颜色填充且M列不包含关键词 if diff_value == "削除" and l_no_fill and not contains_keyword: stats["rule61"] += 1 # 规则62: L列="削除"且有颜色填充且M列包含关键词 if diff_value == "削除" and not l_no_fill and contains_keyword: stats["rule62"] += 1 # 规则63: L列="削除"且有颜色填充且M列不包含关键词 if diff_value == "削除" and not l_no_fill and not contains_keyword: stats["rule63"] += 1 # 规则64: M列有数据 if m_value : stats["rule64"] += 1 # 规则65: L列="削除"且有颜色填充且M列包含关键词 if valid_value == "有意": stats["rule65"] += 1 # 规则66: L列="削除"且有颜色填充且M列不包含关键词 stats["rule66"] = 0 # 处理统计结果 for rule, data in stats.items(): # 对于规则1-18,输出备注值及其出现次数 if rule in ["diff_no_fill", "diff_fill", "diff_add_no_fill", "diff_add_fill", "diff_change_no_fill", "diff_change_fill", "diff_delete_no_fill", "diff_delete_fill", "valid_yes_no_fill", "valid_yes_fill", "valid_no_no_fill", "valid_no_fill", "valid_yes_reason_no_fill", "valid_yes_reason_fill", "valid_no_reason_no_fill", "valid_no_reason_fill", "background_no_fill", "background_fill"]: # 新增功能:如果备注列全为空,不填入任何数据 if all_notes_empty: self.logger.info(f"{rule}: 备注列全为空,不填入数据") continue # 检查是否有数据 if not data: # 字典为空 results[rule] = "/" self.logger.info(f"{rule}: 无数据,输出 &#39;/&#39;") continue # 格式化备注统计结果 results[rule] = self.format_note_stats(data) self.log_cell_value_counts(rule, data) # 优化日志输出:分行显示 self.logger.info(f"{rule}: 结果:") for line in results[rule].split(&#39;\n&#39;): self.logger.info(f" {line}") else: # 对于规则19-59,直接输出计数值 results[rule] = data self.logger.info(f"{rule}: 结果 {results[rule]}") # 生成颜色报告 color_report = self.color_detector.generate_color_report() self.logger.info("\n" + color_report) return results, color_report, missing_data except Exception as e: error_msg = f"处理SCL文件失败: {str(e)}" logger.exception(f"处理SCL文件失败: {file_path} - {str(e)}") # 返回错误信息 return {rule: f"错误: {str(e)}" for rule in self.RULE_MAPPING}, f"错误: {error_msg}", [{ &#39;row&#39;: 0, &#39;col&#39;: 0, &#39;col_name&#39;: &#39;错误&#39;, &#39;message&#39;: error_msg }] 因为创建的EnhancedColorDetector类在SCL类里面创建了一个实例,现在EnhancedColorDetector类修改了,那么SCL类里面调用的方法也要相应的修改,不改变所有规则,给出修改后的完整代码
08-21
PS D:\check> & C:/Users/siyuanll2102/AppData/Local/Programs/Python/Python310/python.exe d:/check/check.py C:\Users\siyuanll2102\AppData\Local\Programs\Python\Python310\lib\site-packages\openpyxl\reader\drawings.py:33: UserWarning: DrawingML support is incomplete and limited to charts and images only. Shapes and drawings will be lost. warn(“DrawingML support is incomplete and limited to charts and images only. Shapes and drawings will be lost.”) import os import tkinter as tk from tkinter import filedialog, ttk, messagebox from openpyxl import load_workbook from openpyxl.styles import PatternFill import logging from datetime import datetime import difflib import csv from collections import defaultdict import re class EnhancedExcelComparatorApp: def init(self, root): self.root = root self.root.title(“增强型Excel比较工具”) self.root.geometry(“1000x700”) # 初始化相似度阈值 self.similarity_threshold = 0.7 # 创建UI组件 self.create_widgets() # 颜色定义 self.yellow_fill = PatternFill(start_color="FFFFFF00", end_color="FFFFFF00", fill_type="solid") # 新增行 (黄色) self.red_fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid") # 删除行 (红色) self.blue_fill = PatternFill(start_color="FF0070C0", end_color="FF0070C0", fill_type="solid") # DE列差异 (蓝色0070C0) # 日志记录器 self.logger = logging.getLogger("ExcelComparator") self.logger.setLevel(logging.INFO) def create_widgets(self): # 文件夹选择区域 frame_folder = ttk.LabelFrame(self.root, text="文件夹选择") frame_folder.pack(padx=10, pady=5, fill="x") self.folder_path = tk.StringVar() ttk.Entry(frame_folder, textvariable=self.folder_path, width=80).pack(side="left", padx=5, pady=5, fill="x", expand=True) ttk.Button(frame_folder, text="浏览", command=self.select_folder).pack(side="left", padx=5, pady=5) # 设置区域 frame_settings = ttk.LabelFrame(self.root, text="比较设置") frame_settings.pack(padx=10, pady=5, fill="x") ttk.Label(frame_settings, text="相似度阈值:").pack(side="left", padx=5) self.similarity_var = tk.DoubleVar(value=self.similarity_threshold) ttk.Scale(frame_settings, from_=0.1, to=1.0, variable=self.similarity_var, orient="horizontal", length=200).pack(side="left", padx=5) ttk.Label(frame_settings, textvariable=self.similarity_var).pack(side="left", padx=5) # 比较按钮 frame_controls = ttk.Frame(self.root) frame_controls.pack(pady=10) ttk.Button(frame_controls, text="开始比较", command=self.start_comparison).pack(side="left", padx=5) ttk.Button(frame_controls, text="导出CSV报告", command=self.export_csv_report).pack(side="left", padx=5) # 结果展示区域 frame_results = ttk.LabelFrame(self.root, text="比较结果") frame_results.pack(padx=10, pady=5, fill="both", expand=True) # 创建带滚动条的文本区域 self.result_text = tk.Text(frame_results, wrap="word") scrollbar = ttk.Scrollbar(frame_results, orient="vertical", command=self.result_text.yview) self.result_text.configure(yscrollcommand=scrollbar.set) scrollbar.pack(side="right", fill="y") self.result_text.pack(fill="both", expand=True, padx=5, pady=5) # 底部按钮 frame_buttons = ttk.Frame(self.root) frame_buttons.pack(pady=10) ttk.Button(frame_buttons, text="打开标记文件夹", command=self.open_marked_folder).pack(side="left", padx=5) ttk.Button(frame_buttons, text="打开报告文件夹", command=self.open_reports_folder).pack(side="left", padx=5) ttk.Button(frame_buttons, text="清空结果", command=self.clear_results).pack(side="left", padx=5) def select_folder(self): folder_selected = filedialog.askdirectory() if folder_selected: self.folder_path.set(folder_selected) def start_comparison(self): folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return # 更新相似度阈值 self.similarity_threshold = self.similarity_var.get() # 检查文件夹结构 before_dir = os.path.join(folder, "before") after_dir = os.path.join(folder, "after") if not os.path.exists(before_dir) or not os.path.exists(after_dir): messagebox.showerror("错误", "文件夹中必须包含before和after子文件夹") return # 创建目录 before_marked_dir = os.path.join(folder, "before_marked") after_marked_dir = os.path.join(folder, "after_marked") reports_dir = os.path.join(folder, "reports") os.makedirs(before_marked_dir, exist_ok=True) os.makedirs(after_marked_dir, exist_ok=True) os.makedirs(reports_dir, exist_ok=True) # 清空结果区域 self.result_text.delete(1.0, tk.END) self.result_text.insert(tk.END, f"开始比较: {datetime.now().strftime(&#39;%Y-%m-%d %H:%M:%S&#39;)}\n") self.result_text.insert(tk.END, f"比较目录: {folder}\n") self.result_text.insert(tk.END, f"相似度阈值: {self.similarity_threshold}\n") self.result_text.insert(tk.END, "-" * 80 + "\n") # 文件配对和比较 before_files = [f for f in os.listdir(before_dir) if f.endswith((&#39;.xlsx&#39;, &#39;.xls&#39;))] after_files = [f for f in os.listdir(after_dir) if f.endswith((&#39;.xlsx&#39;, &#39;.xls&#39;))] # 创建文件映射 file_map = self.create_file_mapping(before_files, after_files) # 比较每个文件对 total_changes = 0 for b_file, a_file in file_map.items(): b_path = os.path.join(before_dir, b_file) a_path = os.path.join(after_dir, a_file) if a_file else None # 处理文件存在性 b_exists = os.path.exists(b_path) a_exists = a_file and os.path.exists(a_path) if not b_exists and not a_exists: continue # 创建文件日志 log_filename = f"{b_file}_vs_{a_file}.log" if a_file else f"{b_file}_deleted.log" log_path = os.path.join(reports_dir, log_filename) self.setup_file_logger(log_path) # 执行比较 status = "完成" try: if b_exists and a_exists: self.logger.info(f"开始比较文件对: {b_file} ↔ {a_file}") self.logger.info(f"相似度阈值: {self.similarity_threshold}") result = self.compare_excel_files( b_path, a_path, os.path.join(before_marked_dir, b_file), os.path.join(after_marked_dir, a_file) ) elif b_exists: self.logger.info(f"处理删除文件: {b_file}") result = self.mark_deleted_file(b_path, os.path.join(before_marked_dir, b_file)) status = "删除" else: self.logger.info(f"处理新增文件: {a_file}") result = self.mark_added_file(a_path, os.path.join(after_marked_dir, a_file)) status = "新增" # 更新UI结果 file_pair = f"{b_file} ↔ {a_file}" if a_file else b_file summary = ( f"文件对: {file_pair}\n" f"状态: {status}\n" f"总变更点: {result[&#39;total_changes&#39;]}\n" f"新增行: {result[&#39;added_rows&#39;]}\n" f"删除行: {result[&#39;removed_rows&#39;]}\n" f"修改行: {result[&#39;modified_rows&#39;]}\n" f"相似度匹配: {result[&#39;link_style_matches&#39;]}\n" f"详细报告: {log_path}\n" + "-" * 80 + "\n" ) self.result_text.insert(tk.END, summary) total_changes += result["total_changes"] # 关闭文件日志 self.remove_file_logger() except Exception as e: status = f"错误: {str(e)}" self.logger.error(f"比较失败: {str(e)}") error_msg = f"文件对: {b_file} ↔ {a_file} - 错误: {str(e)}\n" self.result_text.insert(tk.END, error_msg) self.remove_file_logger() self.result_text.insert(tk.END, f"\n比较完成! 共发现 {total_changes} 处变更\n") messagebox.showinfo("完成", f"比较完成! 共发现 {total_changes} 处变更") def setup_file_logger(self, log_path): """为当前文件对设置日志记录器""" # 移除现有处理器 for handler in self.logger.handlers[:]: self.logger.removeHandler(handler) handler.close() # 创建文件处理器 file_handler = logging.FileHandler(log_path, mode=&#39;w&#39;, encoding=&#39;utf-8&#39;) file_handler.setLevel(logging.INFO) # 设置日志格式 formatter = logging.Formatter(&#39;%(asctime)s - %(levelname)s - %(message)s&#39;) file_handler.setFormatter(formatter) # 添加处理器 self.logger.addHandler(file_handler) def remove_file_logger(self): """移除文件日志处理器""" for handler in self.logger.handlers[:]: self.logger.removeHandler(handler) handler.close() def create_file_mapping(self, before_files, after_files): """创建文件映射,处理文件名相似度匹配""" file_map = {} used_after_files = set() # 优先匹配同名文件 for b_file in before_files: if b_file in after_files: file_map[b_file] = b_file used_after_files.add(b_file) # 匹配相似文件名 for b_file in before_files: if b_file in file_map: continue # 寻找最相似的文件 best_match = None best_score = 0 for a_file in after_files: if a_file in used_after_files: continue # 计算文件名相似度 score = difflib.SequenceMatcher(None, b_file, a_file).ratio() if score > best_score and score > 0.6: best_score = score best_match = a_file if best_match: file_map[b_file] = best_match used_after_files.add(best_match) else: file_map[b_file] = None # 处理未匹配的after文件 for a_file in after_files: if a_file not in used_after_files: file_map[a_file] = a_file # 标记为新增文件 return file_map def is_link_style(self, cell_value): """检查单元格值是否表示样式链接""" if cell_value is None: return False return "link" in str(cell_value).lower() def calculate_ij_similarity(self, ws_before, before_row_idx, ws_after, after_row_idx): """计算IJ列的相似度""" # 获取IJ列值 i_before = ws_before.cell(row=before_row_idx, column=9).value j_before = ws_before.cell(row=before_row_idx, column=10).value i_after = ws_after.cell(row=after_row_idx, column=9).value j_after = ws_after.cell(row=after_row_idx, column=10).value # 处理空值 text_before = f"{i_before or &#39;&#39;} {j_before or &#39;&#39;}".strip() text_after = f"{i_after or &#39;&#39;} {j_after or &#39;&#39;}".strip() if not text_before or not text_after: return 0.0 # 标准化文本 text_before = re.sub(r&#39;\s+&#39;, &#39; &#39;, str(text_before).lower()).strip() text_after = re.sub(r&#39;\s+&#39;, &#39; &#39;, str(text_after).lower()).strip() # 计算相似度 return difflib.SequenceMatcher(None, text_before, text_after).ratio() def compare_excel_files(self, before_path, after_path, before_marked_path, after_marked_path): """根据新规则比较Excel文件并记录详细日志""" # 加载工作簿 wb_before = load_workbook(before_path) wb_after = load_workbook(after_path) # 获取第一个工作表 ws_before = wb_before.active ws_after = wb_after.active # 记录工作表信息 self.logger.info(f"工作表名称: {ws_before.title}") self.logger.info(f"Before行数: {ws_before.max_row}, After行数: {ws_after.max_row}") self.logger.info("-" * 100) # 构建before文件的索引 exact_index = defaultdict(list) # 用于严格匹配的索引 (C, D, E) similarity_index = defaultdict(list) # 用于相似度匹配的索引 \(C\) for row_idx in range(1, ws_before.max_row + 1): c_value = ws_before.cell(row=row_idx, column=3).value d_value = ws_before.cell(row=row_idx, column=4).value e_value = ws_before.cell(row=row_idx, column=5).value # 检查D或E列是否为"link" is_link = (self.is_link_style(d_value) or self.is_link_style(e_value)) if is_link: # 链接行:添加到相似度索引 if c_value is not None: similarity_index[c_value].append(row_idx) else: # 非链接行:添加到严格匹配索引 key = (c_value, d_value, e_value) exact_index[key].append(row_idx) # 初始化结果统计 result = { "total_changes": 0, "added_rows": 0, "removed_rows": 0, "modified_rows": 0, "link_style_matches": 0 } # 记录匹配映射 match_map = {} # 记录before行的匹配状态 before_matched = [False] * (ws_before.max_row + 1) # 遍历after的每一行 for after_row_idx in range(1, ws_after.max_row + 1): c_value_after = ws_after.cell(row=after_row_idx, column=3).value d_value_after = ws_after.cell(row=after_row_idx, column=4).value e_value_after = ws_after.cell(row=after_row_idx, column=5).value matched = False matched_row = None match_type = "exact" # 检查是否为链接行(D或E列为"link") is_link = (self.is_link_style(d_value_after) or self.is_link_style(e_value_after)) # 根据行类型选择匹配策略 if is_link: # 链接行:使用相似度匹配策略 if c_value_after in similarity_index: best_match_idx = None best_similarity = 0.0 # 遍历所有具有相同C值的候选行 for before_row_idx in similarity_index[c_value_after]: if before_matched[before_row_idx]: continue # 计算IJ列相似度 similarity = self.calculate_ij_similarity( ws_before, before_row_idx, ws_after, after_row_idx ) if similarity > best_similarity: best_similarity = similarity best_match_idx = before_row_idx # 检查是否达到相似度阈值 if best_match_idx and best_similarity >= self.similarity_threshold: matched = True matched_row = best_match_idx before_matched[best_match_idx] = True match_map[after_row_idx] = best_match_idx match_type = "similarity" result["link_style_matches"] += 1 # 记录相似度匹配 self.logger.info(f"链接行相似度匹配: After行{after_row_idx} ↔ Before行{best_match_idx}") self.logger.info(f" 相似度: {best_similarity:.2f} (阈值: {self.similarity_threshold})") else: # 非链接行:使用严格匹配策略 key = (c_value_after, d_value_after, e_value_after) if key in exact_index: # 查找所有具有相同键值的候选行 for before_row_idx in exact_index[key]: if not before_matched[before_row_idx]: # 找到匹配行 matched = True matched_row = before_row_idx before_matched[before_row_idx] = True match_map[after_row_idx] = before_row_idx match_type = "exact" # 记录匹配 self.logger.info(f"非链接行严格匹配: After行{after_row_idx} ↔ Before行{before_row_idx}") break # 处理匹配结果 if matched: # 初始化变更列列表 changed_columns = [] # 比较IJLMNOP列 col_names = {9: "I", 10: "J", 12: "L", 13: "M", 14: "N", 15: "O", 16: "P"} for col in [9, 10, 12, 13, 14, 15, 16]: col_match, diff = self.compare_cells( ws_before, ws_after, matched_row, after_row_idx, col, col_names[col] ) if not col_match: changed_columns.append(col_names[col]) # 记录差异 self.logger.info(f" 列{col_names[col]}变更: {diff}") # 如果有变更列 if changed_columns: # 标记DE列为蓝色 self.mark_cell(ws_before, matched_row, 4, self.blue_fill) # Before D列 self.mark_cell(ws_before, matched_row, 5, self.blue_fill) # Before E列 self.mark_cell(ws_after, after_row_idx, 4, self.blue_fill) # After D列 self.mark_cell(ws_after, after_row_idx, 5, self.blue_fill) # After E列 # 将有变更的列字母写入R列 changed_str = ",".join(changed_columns) ws_before.cell(row=matched_row, column=18).value = changed_str ws_after.cell(row=after_row_idx, column=18).value = changed_str # 记录日志 self.logger.info(f" 行{after_row_idx}的D、E列标记蓝色,变更列: {changed_str}") result["modified_rows"] += 1 result["total_changes"] += len(changed_columns) # 未匹配处理 for after_row_idx in range(1, ws_after.max_row + 1): if after_row_idx not in match_map: # 未找到匹配行,标记为新增 self.mark_cell(ws_after, after_row_idx, 1, self.yellow_fill) self.mark_cell(ws_after, after_row_idx, 2, self.yellow_fill) self.mark_cell(ws_after, after_row_idx, 3, self.yellow_fill) # 记录新增行 row_type = "链接行" if is_link else "非链接行" c_val = ws_after.cell(row=after_row_idx, column=3).value self.logger.info(f"新增{row_type}: After行{after_row_idx} (C列值: {c_val}), 标记黄色") result["added_rows"] += 1 result["total_changes"] += 1 # 处理未匹配的before行(删除) for before_row_idx in range(1, ws_before.max_row + 1): if not before_matched[before_row_idx]: self.mark_cell(ws_before, before_row_idx, 1, self.red_fill) self.mark_cell(ws_before, before_row_idx, 2, self.red_fill) self.mark_cell(ws_before, before_row_idx, 3, self.red_fill) # 确定行类型 d_val = ws_before.cell(row=before_row_idx, column=4).value e_val = ws_before.cell(row=before_row_idx, column=5).value is_link = (self.is_link_style(d_val) or self.is_link_style(e_val)) row_type = "链接行" if is_link else "非链接行" # 记录删除行 c_val = ws_before.cell(row=before_row_idx, column=3).value self.logger.info(f"删除{row_type}: Before行{before_row_idx} (C列值: {c_val}), 标记黄色") result["removed_rows"] += 1 result["total_changes"] += 1 # 保存匹配映射到日志 self.logger.info("\n行匹配映射:") for after_idx, before_idx in match_map.items(): self.logger.info(f"After行{after_idx} ↔ Before行{before_idx}") # 保存标记后的文件 wb_before.save(before_marked_path) wb_after.save(after_marked_path) # 记录总结 self.logger.info("\n比较总结:") self.logger.info(f"总变更点: {result[&#39;total_changes&#39;]}") self.logger.info(f"新增行: {result[&#39;added_rows&#39;]}") self.logger.info(f"删除行: {result[&#39;removed_rows&#39;]}") self.logger.info(f"修改行: {result[&#39;modified_rows&#39;]}") self.logger.info(f"相似度匹配次数: {result[&#39;link_style_matches&#39;]}") return result def compare_cells(self, ws_before, ws_after, before_row, after_row, col, col_name): """比较两个工作表中指定单元格的值并返回差异详情""" val_before = ws_before.cell(row=before_row, column=col).value val_after = ws_after.cell(row=after_row, column=col).value # 处理空值 if val_before is None and val_after is None: return True, "" if val_before is None or val_after is None: return False, f"{col_name}列: 空值差异 (Before: {val_before}, After: {val_after})" # 数值比较 if isinstance(val_before, (int, float)) and isinstance(val_after, (int, float)): if abs(val_before - val_after) < 1e-9: return True, "" # 字符串比较 str_before = str(val_before).strip() str_after = str(val_after).strip() if str_before == str_after: return True, "" # 处理长文本差异 if len(str_before) > 50 or len(str_after) > 50: diff = f"{col_name}列: 内容长度不同 (Before: {len(str_before)}字符, After: {len(str_after)}字符)" else: diff = f"Before行{before_row} {col_name}列: [{str_before}]\nAfter行{after_row} {col_name}列: [{str_after}]" return False, diff def mark_cell(self, worksheet, row, col, fill): """标记单元格背景色""" cell = worksheet.cell(row=row, column=col) cell.fill = fill def mark_deleted_file(self, src_path, dest_path): """标记整个文件为删除状态""" wb = load_workbook(src_path) ws = wb.active # 记录日志 self.logger.info(f"文件被删除: {os.path.basename(src_path)}") self.logger.info(f"总行数: {ws.max_row}") self.logger.info("所有行标记为黄色") for row in range(1, ws.max_row + 1): self.mark_cell(ws, row, 3, self.red_fill) wb.save(dest_path) return { "total_changes": ws.max_row, "added_rows": 0, "removed_rows": ws.max_row, "modified_rows": 0, "link_style_matches": 0 } def mark_added_file(self, src_path, dest_path): """标记整个文件为新增状态""" wb = load_workbook(src_path) ws = wb.active # 记录日志 self.logger.info(f"文件为新增: {os.path.basename(src_path)}") self.logger.info(f"总行数: {ws.max_row}") self.logger.info("所有行标记为黄色") for row in range(1, ws.max_row + 1): self.mark_cell(ws, row, 3, self.yellow_fill) wb.save(dest_path) return { "total_changes": ws.max_row, "added_rows": ws.max_row, "removed_rows": 0, "modified_rows": 0, "link_style_matches": 0 } def open_marked_folder(self): folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return marked_dir = os.path.join(folder, "before_marked") if not os.path.exists(marked_dir): marked_dir = os.path.join(folder, "after_marked") if os.path.exists(marked_dir): os.startfile(marked_dir) else: messagebox.showinfo("提示", "请先执行比较操作") def open_reports_folder(self): folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return reports_dir = os.path.join(folder, "reports") if os.path.exists(reports_dir): os.startfile(reports_dir) else: messagebox.showinfo("提示", "报告目录不存在,请先执行比较操作") def clear_results(self): """清空结果区域""" self.result_text.delete(1.0, tk.END) def export_csv_report(self): """导出CSV格式的报告摘要""" folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return reports_dir = os.path.join(folder, "reports") if not os.path.exists(reports_dir): messagebox.showinfo("提示", "请先执行比较操作") return csv_path = os.path.join(reports_dir, "comparison_summary.csv") try: # 收集日志文件中的摘要信息 summary_data = [] for log_file in os.listdir(reports_dir): if log_file.endswith(".log"): log_path = os.path.join(reports_dir, log_file) file_pair = log_file.replace(".log", "") added = removed = modified = total = link_matches = 0 # 从日志中提取摘要信息 with open(log_path, &#39;r&#39;, encoding=&#39;utf-8&#39;) as f: for line in f: if "总变更点:" in line: total = int(line.split(":")[1].strip()) elif "新增行:" in line: added = int(line.split(":")[1].strip()) elif "删除行:" in line: removed = int(line.split(":")[1].strip()) elif "修改行:" in line: modified = int(line.split(":")[1].strip()) elif "相似度匹配次数:" in line: link_matches = int(line.split(":")[1].strip()) summary_data.append({ "file_pair": file_pair, "total_changes": total, "added_rows": added, "removed_rows": removed, "modified_rows": modified, "link_style_matches": link_matches, "log_file": log_file }) # 写入CSV文件 with open(csv_path, &#39;w&#39;, newline=&#39;&#39;, encoding=&#39;utf-8&#39;) as csvfile: fieldnames = ["file_pair", "total_changes", "added_rows", "removed_rows", "modified_rows", "link_style_matches", "log_file"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for row in summary_data: writer.writerow(row) messagebox.showinfo("成功", f"CSV报告已导出到: {csv_path}") os.startfile(reports_dir) except Exception as e: messagebox.showerror("错误", f"导出CSV报告失败: {str(e)}") if name == “main”: root = tk.Tk() app = EnhancedExcelComparatorApp(root) root.mainloop() 这个整不了图片,给我做一版可以处理图片的 比如说原after里面有批注,批注是一张图片,这个时候after_marked的文件夹里面的批注是空的,我希望解决这个问题
11-25
import os import math from datetime import datetime, date from openpyxl import load_workbook, Workbook from openpyxl.drawing.image import Image as OpenpyxlImage from openpyxl.styles import Font # ================== 配置路径 ================== folder_path = r&#39;D:\Class\能管比对\导出文件_月&#39; folder_path2 = r&#39;D:\Class\能管比对\导出文件合并&#39; output_result = r&#39;D:\Class\能管比对\每日结果\高变异列_CV大于0.3_整列分析.xlsx&#39; # 创建输出目录 os.makedirs(os.path.dirname(output_result), exist_ok=True) # 获取今天零点时间用于过滤 today = datetime.combine(date.today(), datetime.min.time()) # ========== 第一阶段:清洗数据并保存到 folder_path2 ========== processed_count = 0 for filename in os.listdir(folder_path): if not filename.endswith(&#39;.xlsx&#39;) or filename.startswith(&#39;~$&#39;): continue src_file = os.path.join(folder_path, filename) dst_file = os.path.join(folder_path2, filename) try: wb = load_workbook(src_file) ws = wb.active data = [list(row) for row in ws.iter_rows(values_only=True)] if len(data) <= 1: continue header, rows = data[0], data[1:] cleaned_rows = [header] # 保留表头 for row in rows: if not row: continue cell_val = row[0] try: # 统一解析为 datetime 类型 if isinstance(cell_val, datetime): dt = cell_val else: # 处理字符串格式如 &#39;2025/4/1&#39; 或 &#39;2025-04-01&#39; str_val = str(cell_val).strip() dt = datetime.fromisoformat(str_val.replace(&#39;/&#39;, &#39;-&#39;)) if dt >= today: continue # 排除今天及以后的数据 except Exception: continue # 解析失败则跳过该行 cleaned_rows.append(row) # 保存清洗后的数据 new_wb = Workbook() new_ws = new_wb.active for r in cleaned_rows: new_ws.append(r) new_wb.save(dst_file) processed_count += 1 except Exception as e: print(f"⚠️ 处理 {filename} 时出错:{e}") print(f"✅ 共清洗并保存了 {processed_count} 个文件") # ========== 第二阶段:分析 CV > 0.3 的列,并记录图像来源 ========== results = [] image_sources = [] for filename in os.listdir(folder_path2): if not filename.endswith(&#39;.xlsx&#39;) or filename.startswith(&#39;~$&#39;): continue file_path = os.path.join(folder_path2, filename) filename_no_ext = os.path.splitext(filename)[0] png_path = os.path.join(folder_path2, f"{filename_no_ext}.png") try: wb = load_workbook(file_path) data = [list(row) for row in wb.active.iter_rows(values_only=True)] if len(data) < 2: continue header, rows = data[0], data[1:] # 提取各列数值数据 col_data = [[] for _ in header] for row in rows: for i, v in enumerate(row): try: col_data[i].append(float(v)) except (TypeError, ValueError): pass # 非数值跳过 # 分析第2列开始(索引1起)的变异系数 for idx, col_name in enumerate(header[1:], start=1): vals = col_data[idx] n = len(vals) if n < 2: continue mean_val = sum(vals) / n if abs(mean_val) < 1e-8: # 避免除以0 continue variance = sum((x - mean_val) ** 2 for x in vals) / n std_val = math.sqrt(variance) cv = std_val / mean_val if cv > 0.3: max_val = max(vals) min_val = min(vals) # 构建待添加项 item = { &#39;来源文件&#39;: filename_no_ext, &#39;列名&#39;: col_name, &#39;有效数据点数&#39;: n, &#39;均值&#39;: round(mean_val, 6), &#39;标准差&#39;: round(std_val, 6), &#39;变异系数(CV)&#39;: round(cv, 6), &#39;最大值&#39;: round(max_val, 6), &#39;最小值&#39;: round(min_val, 6) } # === 特殊列白名单过滤逻辑 === # 替换原来的 if 判断 col_name_clean = str(filename_no_ext).strip() # 确保转字符串并去空格 if ( (&#39;CCoolingTower用水量&#39; in col_name_clean and max_val <= 15000) or (&#39;1st DI耗用量&#39; in col_name_clean and max_val <= 1500) or (&#39;冷却塔补水量&#39; in col_name and max_val <= 500) or (&#39;Fab1-电系统-2-行政后勤-其他辅助类-电梯&#39; in filename_no_ext and max_val <= 60) or (&#39;Fab1-电系统-1-行政后勤-生活类&#39; in filename_no_ext and &#39;食堂(KWH)&#39; in col_name and max_val <= 800) or (&#39;WWT曲线看板-WWT放流水质_Cu&#39; in filename_no_ext and max_val <= 0.15) # (&#39;低温热水系统&#39; in col_name_clean and max_val <= 300000) ): continue # 跳过这些“伪异常” results.append(item) image_sources.append({ &#39;source_file&#39;: filename_no_ext+&#39;-&#39;+col_name, # &#39;source_file&#39;: filename, &#39;png_path&#39;: png_path, &#39;exists&#39;: os.path.exists(png_path) }) except Exception as e: print(f"⚠️ 分析 {filename} 时出错:{e}") # ========== 第三阶段:生成最终结果 Excel(交换 Sheet 顺序) ========== if results: result_wb = Workbook() # --- 第一个 Sheet:对应图表展示 --- chart_sheet = result_wb.active # 新建的工作簿默认有一个 active sheet chart_sheet.title = "对应图表展示" chart_sheet.cell(row=1, column=1, value="来源文件") chart_sheet.cell(row=1, column=2, value="图表展示") current_row = 2 inserted_images_count = 0 seen_files = set() # 去重,避免重复插入同一文件图表 for item in image_sources: source_file = item[&#39;source_file&#39;] if source_file in seen_files: continue seen_files.add(source_file) # 写入来源文件名(A列) chart_sheet.cell(row=current_row, column=1, value=os.path.splitext(source_file)[0]) # 插入图片或错误提示(B列) if item[&#39;exists&#39;]: try: img = OpenpyxlImage(item[&#39;png_path&#39;]) img.width *= 0.5 img.height *= 0.5 chart_sheet.add_image(img, f&#39;B{current_row}&#39;) inserted_images_count += 1 height_in_rows = int(img.height / 15) + 3 except Exception as e: chart_sheet.cell(row=current_row, column=2, value=f"❌ 加载失败: {e}") chart_sheet.cell(row=current_row, column=2).font = Font(color="FF0000", size=9) height_in_rows = 5 else: chart_sheet.cell(row=current_row, column=2, value="❌ 图像未找到") chart_sheet.cell(row=current_row, column=2).font = Font(italic=True, color="888888", size=9) height_in_rows = 5 # 设置行高 for i in range(current_row, current_row + height_in_rows): chart_sheet.row_dimensions[i].height = 15 current_row += height_in_rows # --- 第二个 Sheet:高变异列数据 --- data_sheet = result_wb.create_sheet("高变异列数据") data_sheet.append(list(results[0].keys())) for r in results: data_sheet.append(list(r.values())) # 设置列宽(可选优化) chart_sheet.column_dimensions[&#39;A&#39;].width = 50 chart_sheet.column_dimensions[&#39;B&#39;].width = 80 # 保存最终结果 result_wb.save(output_result) print(f"\n✅ 分析完成!共 {len(results)} 个高变异列已保存至:\n {output_result}") print(f"📊 共尝试插入 {len(seen_files)} 张图像,成功插入 {inserted_images_count} 张。") else: print("\n🟡 未发现变异系数大于 0.3 的列。") ——在白名单过滤后,现在还需要生成一个txt文档记录来源文件A列的情况,分别统计来源文件中包含‘Fab1-电系统’、‘Fab1-水系统’、‘Fab1-气化系统’、‘Fab1-空调系统’各有多少条,并在txt文档中记录
最新发布
11-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值