02_工作簿筛选标记1.0

知识点:

  1. 方法调用,传参无返回值
  2. for each …next循环
  3. AutoFilter 筛选属性
  4. StrComp(str1,str2,0/-1) 字符串比较
  5. ActiveSheet.Tab.ColorIndex 工作表标色
  6. AutoFilterMode 工作表筛选状态
Option Explicit
Sub 自动筛选()

Dim Town As String
Dim wsh As Worksheet
 
Call 取消筛选标记  ' 初始化表格状态
  
Town = InputBox("请输入街道名称!")  ' 交互式输入

For Each wsh In Sheets     '表格循环

    wsh.Select
    If wsh.Range("G1").Value = "乡(镇、街道)" Then    'G列标题判定
        wsh.Range("G1").AutoFilter Field:=7, Criteria1:=Town
       
    Else:                                               'I列标题判定
        wsh.Range("I2").AutoFilter Field:=9, Criteria1:=Town
       
        Call SheetColor(wsh, Town)  'sheet标签标色--->方法调用
    End If
Next wsh

Sheet1.Select

End Sub
Sub SheetColor(wsh As Worksheet, Town As String)     '---->挨个对比太繁琐,进阶为find查找高效无遗漏 详情对比03

    Dim rage As Range
    For Each rage In wsh.Range("G2:I500")
        If StrComp(rage.Value, Town) = 0 Then
           ActiveSheet.Tab.ColorIndex = 6
           Exit For
        End If
    Next rage
End Sub
Sub 取消筛选标记()
    Dim wsh As Worksheet
    For Each wsh In Sheets
        wsh.Tab.ColorIndex = -4142 '判断,并取消筛选状态
        wsh.AutoFilterMode = False '判断,并取消筛选状态
    Next wsh
End Sub
import os import re import time import logging import tkinter as tk from tkinter import ttk, messagebox, filedialog, scrolledtext from collections import Counter import openpyxl import datetime from openpyxl.styles import PatternFill from openpyxl.utils import get_column_letter # 增强的日志配置 - 每次启动清空日志 def setup_logger(): """配置并返回日志记录器,每次启动清空日志""" logger = logging.getLogger('SCLMultiProcessor') logger.setLevel(logging.DEBUG) # 清空日志文件 log_file = 'scl_processor.log' with open(log_file, 'w', encoding='utf-8') as f: f.write("") # 清空日志文件 # 创建文件处理器 file_handler = logging.FileHandler(log_file, encoding='utf-8') file_handler.setLevel(logging.DEBUG) # 创建控制台处理器 console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) # 创建格式化器 formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) # 应用格式化器 file_handler.setFormatter(formatter) console_handler.setFormatter(formatter) # 添加处理器 logger.addHandler(file_handler) logger.addHandler(console_handler) # 记录启动信息 logger.info("=" * 50) logger.info(f"SCL Multi-Processor 启动于 {datetime.datetime.now()}") logger.info("=" * 50) return logger # 获取日志记录器 logger = setup_logger() class ExcelColorDetector: """Excel单元格颜色检测器""" def __init__(self): self.NO_FILL = "无填充" self.RED_FILL = PatternFill(start_color='FFFF0000', end_color='FFFF0000', fill_type='solid') self.YELLOW_FILL = PatternFill(start_color='FFFFFF00', end_color='FFFFFF00', fill_type='solid') self.PURPLE_FILL = PatternFill(start_color='FF800080', end_color='FF800080', fill_type='solid') def is_no_fill(self, cell): """检查单元格是否无填充颜色""" try: # 检查是否存在填充属性 if not hasattr(cell, 'fill') or cell.fill is None: return True # 检查填充类型 if cell.fill.patternType is None or cell.fill.patternType == 'none': return True # 检查背景色是否为默认(白色或无) if hasattr(cell.fill, 'bgColor') and cell.fill.bgColor.rgb == '00000000': return True return False except Exception as e: logger.error(f"颜色检测错误: {str(e)}") return True class EmptyCellDetector: """空单元格检测器 - 增强版:添加规则7""" def __init__(self, color_detector): self.color_detector = color_detector # 固定列位置(列索引) self.COLUMN_MAPPING = { "A": 1, # A列 "B": 2, # B列 "C": 3, # C列 "D": 4, # D列 "E": 5, # E列 "F": 6, # F列 "G": 7, # G列 "H": 8, # H列 "I": 9, # I列 "J": 10, # J列 "K": 11, # K列 "L": 12, # L列 (差分種別) "M": 13, # M列 (变更内容) "N": 14, # N列 (判断列) "O": 15, # O列 (判断理由) "P": 16, # P列 (变更背景) "Q": 17 # Q列 (备注) } self.HEADER_ROW = 3 # 表头固定在第三行 self.DATA_START_ROW = 4 # 数据从第四行开始 self.SPEC_PREFIX = "仕様書No.:" # I列排除条件前缀 def detect_empty_cells(self, file_path, checksheet_base_path=None): """ 检测指定列的空单元格 返回: (missing_data, marked_file_path) - missing_data: 缺失数据列表 [{'row': 行号, 'col': 列号, 'col_name': 列名, 'message': 错误信息}, ...] - marked_file_path: 标记后的文件路径(如果有缺失数据) """ missing_data = [] marked_file_path = None try: logger.info(f"开始检测空单元格: {file_path}") # 加载SCL文件 scl_wb = openpyxl.load_workbook(file_path) scl_sheet = scl_wb.active logger.info(f"工作表加载成功: {scl_sheet.title}, 总行数: {scl_sheet.max_row}") # 检查是否有足够的数据行 if scl_sheet.max_row < self.DATA_START_ROW: logger.info("文件没有数据行,跳过检测") return missing_data, None # 获取文件名用于错误信息 file_name = os.path.basename(file_path) # 规则1: 检查A、B、C、K列的单元格是否为空 required_columns = ["A", "B", "C", "K"] for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): for col_letter in required_columns: col_idx = self.COLUMN_MAPPING[col_letter] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is None or str(cell.value).strip() == "": # 标记单元格为黄色 self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则1)" }) logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则1)") # 规则5: 检查L-Q列是否为空(备注列表头可为空) required_cols = ["L", "M", "N", "O", "P", "Q"] for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): for col_letter in required_cols: col_idx = self.COLUMN_MAPPING[col_letter] # 备注列的表头可以为空,但数据需要检查 if col_letter == "Q" and row_idx == self.HEADER_ROW: continue cell = scl_sheet.cell(row_idx, col_idx) if cell.value is None or str(cell.value).strip() == "": # 标记单元格为黄色 self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则5)" }) logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则5)") # 规则6: 当文件名后缀为.docx时,检查整个文件的F列(排除I列前缀为"仕様書No.:"的行) file_name_cell = scl_sheet.cell(row=1, column=1) if file_name_cell.value and str(file_name_cell.value).lower().endswith('.docx'): logger.info(f"文件后缀为.docx,检查F列数据: {file_name}") for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): # 检查I列是否有排除前缀 i_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["I"]) i_value = str(i_cell.value).strip() if i_cell.value else "" # 如果I列以指定前缀开头,则跳过该行 if i_value.startswith(self.SPEC_PREFIX): continue # 检查F列是否为空 col_idx = self.COLUMN_MAPPING["F"] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is None or str(cell.value).strip() == "": # 标记单元格为黄色 self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': "F", 'message': f"{file_name}: 行 {row_idx} 列 F 数据缺失 (规则6: .docx文件, 排除条件:{self.SPEC_PREFIX})" }) logger.warning(f"行 {row_idx} 列 F 数据缺失 (规则6: .docx文件, 排除条件:{self.SPEC_PREFIX})") # 规则2-4: 基于L列的值检查其他列 for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): l_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["L"]) l_value = str(l_cell.value).strip() if l_cell.value else "" # 规则2: 当L列为"変更"时,检查D、E、I、J列 if l_value == "変更": # 应有数据的列 required_cols = ["D", "E", "I", "J"] for col_letter in required_cols: col_idx = self.COLUMN_MAPPING[col_letter] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is None or str(cell.value).strip() == "": # 标记单元格为黄色 self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则2: L列='変更')" }) logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则2: L列='変更')") # 规则3: 当L列为"削除"时,检查D、I列应有数据,E、J列应无数据 elif l_value == "削除": # 应有数据的列 required_cols = ["D", "I"] for col_letter in required_cols: col_idx = self.COLUMN_MAPPING[col_letter] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is None or str(cell.value).strip() == "": # 标记单元格为黄色 self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则3: L列='削除')" }) logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则3: L列='削除')") # 应无数据的列 empty_cols = ["E", "J"] for col_letter in empty_cols: col_idx = self.COLUMN_MAPPING[col_letter] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is not None and str(cell.value).strip() != "": # 标记单元格为红色(不应有数据但有数据) self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFF0000") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 不应有数据 (规则3: L列='削除')" }) logger.warning(f"行 {row_idx} 列 {col_letter} 不应有数据 (规则3: L列='削除')") # 规则4: 当L列为"追加"时,检查E、J列应有数据,D、I列应无数据 elif l_value == "追加": # 应有数据的列 required_cols = ["E", "J"] for col_letter in required_cols: col_idx = self.COLUMN_MAPPING[col_letter] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is None or str(cell.value).strip() == "": # 标记单元格为黄色 self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFFFF00") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 数据缺失 (规则4: L列='追加')" }) logger.warning(f"行 {row_idx} 列 {col_letter} 数据缺失 (规则4: L列='追加')") # 应无数据的列 empty_cols = ["D", "I"] for col_letter in empty_cols: col_idx = self.COLUMN_MAPPING[col_letter] cell = scl_sheet.cell(row_idx, col_idx) if cell.value is not None and str(cell.value).strip() != "": # 标记单元格为红色(不应有数据但有数据) self.highlight_cell(scl_sheet, row_idx, col_idx, "FFFF0000") missing_data.append({ 'row': row_idx, 'col': col_idx, 'col_name': col_letter, 'message': f"{file_name}: 行 {row_idx} 列 {col_letter} 不应有数据 (规则4: L列='追加')" }) logger.warning(f"行 {row_idx} 列 {col_letter} 不应有数据 (规则4: L列='追加')") # 规则7: 变更背景检查 if checksheet_base_path: background_errors = self.check_background_rule7( scl_sheet, file_name, checksheet_base_path ) missing_data.extend(background_errors) # 如果有缺失数据,保存标记后的文件 if missing_data: marked_file_path = file_path.replace(".xlsx", "_marked.xlsx") scl_wb.save(marked_file_path) logger.warning(f"检测到空单元格,文件已标记并保存为: {marked_file_path}") return missing_data, marked_file_path except Exception as e: error_msg = f"空单元格检测失败: {str(e)}" logger.exception(f"空单元格检测失败: {file_path} - {str(e)}") return [{'row': 0, 'col': 0, 'col_name': '错误', 'message': error_msg}], None def highlight_cell(self, sheet, row, col, color="FFFF0000"): """为单元格设置背景色""" try: fill = PatternFill(start_color=color, end_color=color, fill_type="solid") sheet.cell(row=row, column=col).fill = fill return True except Exception as e: logger.error(f"设置单元格颜色失败: {str(e)}") return False def extract_folder_name(self, file_name): """从文件名中提取文件夹名 (SCL_和.xlsx之间的部分)""" pattern = r'SCL_(.*?)\.xlsx' match = re.search(pattern, file_name) if match: return match.group(1) return None def read_checksheet_data(self, checksheet_path): """从CheckSheet文件中读取数据(第8行开始,B、C列)""" try: logger.info(f"读取CheckSheet文件: {checksheet_path}") # 加载CheckSheet文件 wb = openpyxl.load_workbook(checksheet_path) sheet = wb.active # 表头在第7行 header_row = 7 # 数据从第8行开始 data_start_row = 8 # 存储读取到的数据 data = [] # 遍历数据行 for row_idx in range(data_start_row, sheet.max_row + 1): # 读取B列数据(合并后的主数据) b_cell = sheet.cell(row=row_idx, column=2) b_value = str(b_cell.value).strip() if b_cell.value else "" # 如果B列为空,停止读取 if not b_value: break # 读取C列数据 c_cell = sheet.cell(row=row_idx, column=3) c_value = str(c_cell.value).strip() if c_cell.value else "" # 只添加非空数据 if b_value or c_value: data.append((b_value, c_value)) logger.info(f"从CheckSheet读取到 {len(data)} 条数据") return data except Exception as e: logger.error(f"读取CheckSheet文件失败: {str(e)}") return [] def check_background_rule7(self, scl_sheet, file_name, checksheet_base_path): """检查变更背景是否符合规则7""" errors = [] try: logger.info(f"开始检查规则7: {file_name}") # 提取文件夹名 folder_name = self.extract_folder_name(file_name) if not folder_name: logger.warning(f"无法从文件名提取文件夹名: {file_name}") return [] # 构建CheckSheet文件路径 checksheet_dir = os.path.join(checksheet_base_path, folder_name) if not os.path.exists(checksheet_dir): logger.warning(f"文件夹不存在: {checksheet_dir}") return [] # 查找CheckSheet文件 checksheet_files = [f for f in os.listdir(checksheet_dir) if f.startswith("CheckSheet_") and f.endswith((".xlsx", ".xls"))] if not checksheet_files: logger.warning(f"未找到CheckSheet文件 in {checksheet_dir}") return [] # 使用第一个找到的CheckSheet文件 checksheet_file = checksheet_files[0] checksheet_full_path = os.path.join(checksheet_dir, checksheet_file) logger.info(f"使用CheckSheet文件: {checksheet_full_path}") # 读取CheckSheet数据 - 使用修复后的方法 checksheet_data = self.read_checksheet_data(checksheet_full_path) if not checksheet_data: logger.warning(f"CheckSheet文件无有效数据: {checksheet_full_path}") return [] # 打印读取到的sheet名数据 logger.info(f"读取到的CheckSheet数据: {checksheet_data}") # 提取所有sheet名(来自CheckSheet的B列和C列) sheet_names = set() for b_value, c_value in checksheet_data: if b_value: sheet_names.add(b_value) if c_value: sheet_names.add(c_value) logger.info(f"提取到 {len(sheet_names)} 个唯一sheet名") # 创建变更背景错误列表 background_errors = [] # 遍历C列,查找匹配的sheet名 for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): c_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["C"]) c_value = str(c_cell.value).strip() if c_cell.value is not None else "" # 检查是否在sheet名列表中 if c_value and c_value in sheet_names: # 检查P列(变更背景) p_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["P"]) p_value = str(p_cell.value).strip() if p_cell.value is not None else "" # 检查是否为空或为"-" if not p_value or p_value == "-": error_msg = f"{file_name}: 行 {row_idx} - C列值 '{c_value}' 匹配, 但变更背景为空或'-' (规则7)" background_errors.append({ 'row': row_idx, 'col': self.COLUMN_MAPPING["P"], 'col_name': "P", 'message': error_msg }) logger.warning(error_msg) # 标记错误单元格 for error in background_errors: self.highlight_cell(scl_sheet, error['row'], error['col'], "FFFFFF00") return background_errors except Exception as e: error_msg = f"检查规则7失败: {str(e)}" logger.exception(error_msg) return [{ 'row': 0, 'col': 0, 'col_name': '错误', 'message': f"{file_name}: {error_msg} (规则7)" }] class SCLStatsProcessor: """SCL统计处理器 - 优化版:固定列位置""" def __init__(self, color_detector): self.color_detector = color_detector # 固定列位置 self.COLUMN_MAPPING = { "差分種別": 12, # L列 "变更内容": 13, # M列 "判断列": 14, # N列 "判断理由": 15, # O列 "变更背景": 16, # P列 "备注": 17 # Q列 } # 统计结果列映射 self.target_columns = { "diff_no_fill": 16, # P列 "diff_fill": 23, # W列 "diff_add_no_fill": 27, # AA列 "diff_add_fill": 30, # AD列 "diff_change_no_fill": 34, # AH列 "diff_change_fill": 37, # AK列 "diff_delete_no_fill": 42, # AP列 "diff_delete_fill": 45, # AS列 "valid_yes_no_fill": 50, # AX列 "valid_yes_fill": 53, # BA列 "valid_no_no_fill": 57, # BE列 "valid_no_fill": 60, # BH列 "valid_yes_reason_no_fill": 62, # BL列 "valid_yes_reason_fill": 65, # BO列 "valid_no_reason_no_fill": 71, # BS列 "valid_no_reason_fill": 74, # BV列 "background_no_fill": 78, # BZ列 "background_fill": 85 # CG列 } self.HEADER_ROW = 3 # 表头固定在第三行 self.DATA_START_ROW = 4 # 数据从第四行开始 def process_scl_file(self, file_path): """ 处理单个SCL文件并返回所有统计结果 返回: (results, missing_data) - results: 规则统计结果 - missing_data: 缺失数据列表 [{'row': 行号, 'col': 列号, 'col_name': 列名, 'message': 错误信息}, ...] """ results = {} missing_data = [] # 存储缺失数据信息 try: logger.info(f"开始处理SCL文件: {file_path}") # 加载SCL文件 scl_wb = openpyxl.load_workbook(file_path) scl_sheet = scl_wb.active logger.info(f"工作表加载成功: {scl_sheet.title}, 总行数: {scl_sheet.max_row}") # 检查是否有足够的数据行 if scl_sheet.max_row < self.DATA_START_ROW: logger.info("文件没有数据行,跳过处理") return results, missing_data # 获取文件名用于错误信息 file_name = os.path.basename(file_path) # 初始化统计结果 stats = { "diff_no_fill": [], # 规则1: 変更内容无颜色填充 "diff_fill": [], # 规则2: 変更内容有颜色填充 "diff_add_no_fill": [], # 规则3: 差分种别="追加"且无颜色填充 "diff_add_fill": [], # 规则4: 差分种别="追加"且有颜色填充 "diff_change_no_fill": [], # 规则5: 差分种别="変更"且无颜色填充 "diff_change_fill": [], # 规则6: 差分种别="変更"且有颜色填充 "diff_delete_no_fill": [], # 规则7: 差分种别="削除"且无颜色填充 "diff_delete_fill": [], # 规则8: 差分种别="削除"且有颜色填充 "valid_yes_no_fill": [], # 规则9: 判断="有意"且无颜色填充 "valid_yes_fill": [], # 规则10: 判断="有意"且有颜色填充 "valid_no_no_fill": [], # 规则11: 判断="無効"且无颜色填充 "valid_no_fill": [], # 规则12: 判断="無効"且有颜色填充 "valid_yes_reason_no_fill": [], # 规则13: 判断="有意"且理由无颜色填充 "valid_yes_reason_fill": [], # 规则14: 判断="有意"且理由有颜色填充 "valid_no_reason_no_fill": [], # 规则15: 判断="無効"且理由无颜色填充 "valid_no_reason_fill": [], # 规则16: 判断="無効"且理由有颜色填充 "background_no_fill": [], # 规则17: 背景无颜色填充 "background_fill": [] # 规则18: 背景有颜色填充 } # 遍历所有数据行 for row_idx in range(self.DATA_START_ROW, scl_sheet.max_row + 1): # 获取所有需要的单元格 diff_content_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更内容"]) diff_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["差分種別"]) note_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["备注"]) valid_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断列"]) reason_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["判断理由"]) background_cell = scl_sheet.cell(row_idx, self.COLUMN_MAPPING["变更背景"]) # 获取备注值 note_value = str(note_cell.value).strip() if note_cell and note_cell.value else None # 规则1: 変更内容无颜色填充 if self.color_detector.is_no_fill(diff_content_cell) and note_value: stats["diff_no_fill"].append(note_value) # 规则2: 変更内容有颜色填充 if not self.color_detector.is_no_fill(diff_content_cell) and note_value: stats["diff_fill"].append(note_value) # 规则3: 差分种别="追加"且无颜色填充 if (diff_cell.value == "追加" and self.color_detector.is_no_fill(diff_cell) and note_value): stats["diff_add_no_fill"].append(note_value) # 规则4: 差分种别="追加"且有颜色填充 if (diff_cell.value == "追加" and not self.color_detector.is_no_fill(diff_cell) and note_value): stats["diff_add_fill"].append(note_value) # 规则5: 差分种别="変更"且无颜色填充 if (diff_cell.value == "変更" and self.color_detector.is_no_fill(diff_cell) and note_value): stats["diff_change_no_fill"].append(note_value) # 规则6: 差分种别="変更"且有颜色填充 if (diff_cell.value == "変更" and not self.color_detector.is_no_fill(diff_cell) and note_value): stats["diff_change_fill"].append(note_value) # 规则7: 差分种别="削除"且无颜色填充 if (diff_cell.value == "削除" and self.color_detector.is_no_fill(diff_cell) and note_value): stats["diff_delete_no_fill"].append(note_value) # 规则8: 差分种别="削除"且有颜色填充 if (diff_cell.value == "削除" and not self.color_detector.is_no_fill(diff_cell) and note_value): stats["diff_delete_fill"].append(note_value) # 规则9: 判断="有意"且无颜色填充 if (valid_cell.value == "有意" and self.color_detector.is_no_fill(valid_cell) and note_value): stats["valid_yes_no_fill"].append(note_value) # 规则10: 判断="有意"且有颜色填充 if (valid_cell.value == "有意" and not self.color_detector.is_no_fill(valid_cell) and note_value): stats["valid_yes_fill"].append(note_value) # 规则11: 判断="無効"且无颜色填充 if (valid_cell.value == "無効" and self.color_detector.is_no_fill(valid_cell) and note_value): stats["valid_no_no_fill"].append(note_value) # 规则12: 判断="無効"且有颜色填充 if (valid_cell.value == "無効" and not self.color_detector.is_no_fill(valid_cell) and note_value): stats["valid_no_fill"].append(note_value) # 规则13: 判断="有意"且理由无颜色填充 if (valid_cell.value == "有意" and self.color_detector.is_no_fill(reason_cell) and note_value): stats["valid_yes_reason_no_fill"].append(note_value) # 规则14: 判断="有意"且理由有颜色填充 if (valid_cell.value == "有意" and not self.color_detector.is_no_fill(reason_cell) and note_value): stats["valid_yes_reason_fill"].append(note_value) # 规则15: 判断="無効"且理由无颜色填充 if (valid_cell.value == "無効" and self.color_detector.is_no_fill(reason_cell) and note_value): stats["valid_no_reason_no_fill"].append(note_value) # 规则16: 判断="無効"且理由有颜色填充 if (valid_cell.value == "無効" and not self.color_detector.is_no_fill(reason_cell) and note_value): stats["valid_no_reason_fill"].append(note_value) # 规则17: 背景无颜色填充 if self.color_detector.is_no_fill(background_cell) and note_value: stats["background_no_fill"].append(note_value) # 规则18: 背景有颜色填充 if not self.color_detector.is_no_fill(background_cell) and note_value: stats["background_fill"].append(note_value) # 处理统计结果 for rule, values in stats.items(): if not values: results[rule] = "/" logger.info(f"{rule}: 无数据") else: counter = Counter(values) result_lines = [f"{value},{count}" for value, count in counter.most_common()] results[rule] = "\n".join(result_lines) logger.info(f"{rule}: 找到 {len(values)} 条数据") return results, missing_data except Exception as e: error_msg = f"处理SCL文件失败: {str(e)}" logger.exception(f"处理SCL文件失败: {file_path} - {str(e)}") # 返回错误信息 return {rule: f"错误: {str(e)}" for rule in self.target_columns}, [{ 'row': 0, 'col': 0, 'col_name': '错误', 'message': error_msg }] class SCLMultiProcessor: def __init__(self, root): self.root = root self.root.title("SCL文件处理系统 - 增强版") self.root.geometry("1000x700") # 初始化变量 self.color_detector = ExcelColorDetector() self.empty_cell_detector = EmptyCellDetector(self.color_detector) self.stats_processor = SCLStatsProcessor(self.color_detector) self.progress_var = tk.DoubleVar() # 创建主框架 self.main_frame = ttk.Frame(root, padding="10") self.main_frame.pack(fill=tk.BOTH, expand=True) # 创建UI self.create_ui() # 记录UI初始化完成 logger.info("用户界面初始化完成") def create_ui(self): """创建用户界面""" # 文件选择区域 file_frame = ttk.LabelFrame(self.main_frame, text="文件选择", padding="10") file_frame.pack(fill=tk.X, pady=5) # 输入文件选择 input_frame = ttk.Frame(file_frame) input_frame.pack(fill=tk.X, pady=5) ttk.Label(input_frame, text="主输入文件:").pack(side=tk.LEFT, padx=5) self.input_path_var = tk.StringVar() input_entry = ttk.Entry(input_frame, textvariable=self.input_path_var, width=70) input_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5) ttk.Button(input_frame, text="浏览...", command=self.browse_input_file).pack(side=tk.LEFT, padx=5) # CheckSheet路径选择 checksheet_frame = ttk.Frame(file_frame) checksheet_frame.pack(fill=tk.X, pady=5) ttk.Label(checksheet_frame, text="CheckSheet路径:").pack(side=tk.LEFT, padx=5) self.checksheet_path_var = tk.StringVar() checksheet_entry = ttk.Entry(checksheet_frame, textvariable=self.checksheet_path_var, width=70) checksheet_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5) ttk.Button(checksheet_frame, text="浏览...", command=self.browse_checksheet_path).pack(side=tk.LEFT, padx=5) # 操作模式选择区域 mode_frame = ttk.LabelFrame(self.main_frame, text="操作模式", padding="10") mode_frame.pack(fill=tk.X, pady=5) # 添加操作模式单选按钮 self.operation_mode = tk.StringVar(value="stats") # 默认选择统计模式 ttk.Radiobutton(mode_frame, text="统计功能", variable=self.operation_mode, value="stats").pack(side=tk.LEFT, padx=10) ttk.Radiobutton(mode_frame, text="检测SCL文件格式", variable=self.operation_mode, value="format_check").pack(side=tk.LEFT, padx=10) # 配置区域 config_frame = ttk.LabelFrame(self.main_frame, text="处理配置", padding="10") config_frame.pack(fill=tk.X, pady=5) # 搜索选项 search_frame = ttk.Frame(config_frame) search_frame.pack(fill=tk.X, pady=5) ttk.Label(search_frame, text="文件前缀:").grid(row=0, column=0, padx=5, sticky=tk.W) self.prefix_var = tk.StringVar(value="SCL_") ttk.Entry(search_frame, textvariable=self.prefix_var, width=10).grid(row=0, column=1, padx=5, sticky=tk.W) # 添加性能提示 ttk.Label(search_frame, text="(表头固定在第3行,数据从第4行开始)").grid(row=0, column=2, padx=5, sticky=tk.W) # 日志选项 log_frame = ttk.Frame(config_frame) log_frame.pack(fill=tk.X, pady=5) ttk.Label(log_frame, text="日志级别:").grid(row=0, column=0, padx=5, sticky=tk.W) self.log_level_var = tk.StringVar(value="INFO") log_level_combo = ttk.Combobox( log_frame, textvariable=self.log_level_var, width=10, state="readonly" ) log_level_combo['values'] = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') log_level_combo.grid(row=0, column=1, padx=5, sticky=tk.W) log_level_combo.bind("<<ComboboxSelected>>", self.change_log_level) # 处理按钮 btn_frame = ttk.Frame(self.main_frame) btn_frame.pack(fill=tk.X, pady=10) ttk.Button(btn_frame, text="开始处理", command=self.process_file).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="查看日志", command=self.view_log).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="导出配置", command=self.export_config).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="加载配置", command=self.load_config).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="退出", command=self.root.destroy).pack(side=tk.RIGHT, padx=5) # 进度条 progress_frame = ttk.Frame(self.main_frame) progress_frame.pack(fill=tk.X, pady=5) ttk.Label(progress_frame, text="处理进度:").pack(side=tk.LEFT, padx=5) self.progress_bar = ttk.Progressbar( progress_frame, variable=self.progress_var, maximum=100, length=700 ) self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5) self.progress_label = ttk.Label(progress_frame, text="0%") self.progress_label.pack(side=tk.LEFT, padx=5) # 结果展示区域 result_frame = ttk.LabelFrame(self.main_frame, text="处理结果", padding="10") result_frame.pack(fill=tk.BOTH, expand=True, pady=5) # 结果文本框 self.result_text = scrolledtext.ScrolledText( result_frame, wrap=tk.WORD, height=20 ) self.result_text.pack(fill=tk.BOTH, expand=True) self.result_text.config(state=tk.DISABLED) # 状态栏 self.status_var = tk.StringVar(value="就绪") status_bar = ttk.Label(self.main_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W) status_bar.pack(fill=tk.X, pady=5) logger.info("UI创建完成") def browse_checksheet_path(self): """浏览CheckSheet路径""" folder_path = filedialog.askdirectory() if folder_path: self.checksheet_path_var.set(folder_path) logger.info(f"已选择CheckSheet路径: {folder_path}") def change_log_level(self, event=None): """更改日志级别""" level = self.log_level_var.get() logger.setLevel(getattr(logging, level)) logger.info(f"日志级别已更改为: {level}") def browse_input_file(self): """浏览输入文件""" file_path = filedialog.askopenfilename( filetypes=[("Excel 文件", "*.xlsx *.xls"), ("所有文件", "*.*")] ) if file_path: self.input_path_var.set(file_path) self.input_file = file_path logger.info(f"已选择输入文件: {file_path}") def highlight_cell(self, sheet, row, col, color="FFFF0000"): """为单元格设置背景色""" try: fill = PatternFill(start_color=color, end_color=color, fill_type="solid") sheet.cell(row=row, column=col).fill = fill return True except Exception as e: logger.error(f"设置单元格颜色失败: {str(e)}") return False def process_file(self): """处理文件 - 每次处理保存数据,下次运行重新开始""" if not self.input_path_var.get(): messagebox.showwarning("警告", "请先选择输入文件") logger.warning("未选择输入文件") return try: # 重置结果 self.result_text.config(state=tk.NORMAL) self.result_text.delete(1.0, tk.END) self.result_text.insert(tk.END, "开始处理...\n") self.result_text.see(tk.END) self.result_text.config(state=tk.DISABLED) self.status_var.set("开始处理文件...") self.root.update() # 获取输入文件目录 input_file = self.input_path_var.get() input_dir = os.path.dirname(input_file) logger.info(f"开始处理文件: {input_file}") logger.info(f"文件目录: {input_dir}") # 使用openpyxl加载工作簿(保留格式) wb = openpyxl.load_workbook(input_file) sheet = wb.active logger.info(f"工作簿加载成功, 工作表: {sheet.title}") # 获取配置参数 prefix = self.prefix_var.get() operation_mode = self.operation_mode.get() checksheet_base_path = self.checksheet_path_var.get() if operation_mode == "format_check" else None logger.info(f"配置参数: 文件前缀={prefix}, 操作模式={operation_mode}") # 扫描E列(第5列) total_rows = sheet.max_row processed_count = 0 found_files = 0 problem_files = 0 logger.info(f"开始扫描E列, 总行数: {total_rows}") start_time = time.time() for row_idx in range(1, total_rows + 1): # 更新进度 progress = (row_idx / total_rows) * 100 self.progress_var.set(progress) self.progress_label.config(text=f"{progress:.1f}%") self.root.update() cell = sheet.cell(row=row_idx, column=5) cell_value = str(cell.value) if cell.value else "" # 检查是否包含前缀的文件名 if prefix in cell_value: # 提取文件名(可能有多个以空格分隔) file_names = re.findall(fr'{prefix}[^\s]+', cell_value) logger.info(f"行 {row_idx}: 找到文件: {', '.join(file_names)}") result_lines = [] file_has_problems = False # 标记当前行是否有问题文件 for file_name in file_names: file_path = os.path.join(input_dir, file_name) # 检查文件是否存在 if not os.path.exists(file_path): result_lines.append(f"{file_name}: 文件不存在") logger.warning(f"文件不存在: {file_path}") # 标记文件不存在的单元格为紫色 self.highlight_cell(sheet, row_idx, 5, "FF800080") file_has_problems = True problem_files += 1 continue # 根据操作模式选择处理方式 if operation_mode == "format_check": # 检测SCL文件格式模式(包含规则1-7) missing_data, marked_file_path = self.empty_cell_detector.detect_empty_cells( file_path, checksheet_base_path ) if missing_data: result_lines.append(f"{file_name}: 检测到格式问题!") for item in missing_data: result_lines.append(f" - {item['message']}") logger.warning(item['message']) file_has_problems = True problem_files += 1 else: result_lines.append(f"{file_name}: 格式检查通过") else: # 默认统计模式 results, missing_data = self.stats_processor.process_scl_file(file_path) # 如果有数据缺失 if missing_data: file_has_problems = True problem_files += 1 result_lines.append(f"{file_name}: 数据缺失!") for item in missing_data: result_lines.append(f" - {item['message']}") logger.warning(item['message']) else: result_lines.append(f"{file_name}: 处理完成") # 将结果写入主Excel文件的不同列 for rule_name, result_str in results.items(): target_col = self.stats_processor.target_columns.get(rule_name) if target_col: target_cell = sheet.cell(row=row_idx, column=target_col) target_cell.value = result_str found_files += 1 # 如果该行有文件存在问题,将E列单元格标红 if file_has_problems: self.highlight_cell(sheet, row_idx, 5) logger.info(f"行 {row_idx} E列单元格标记为红色(存在问题)") # 更新结果文本框 self.result_text.config(state=tk.NORMAL) self.result_text.insert( tk.END, f"行 {row_idx} 处理结果:\n" + "\n".join(result_lines) + "\n\n" ) self.result_text.see(tk.END) self.result_text.config(state=tk.DISABLED) processed_count += 1 # 保存修改后的Excel文件 - 每次处理保存数据 output_path = input_file.replace(".xlsx", "_processed.xlsx") wb.save(output_path) logger.info(f"结果已保存到: {output_path}") elapsed_time = time.time() - start_time status_msg = f"处理完成! 处理了 {processed_count} 个文件项, 耗时 {elapsed_time:.2f} 秒" if problem_files > 0: status_msg += f", {problem_files} 个文件存在问题" self.status_var.set(status_msg) logger.info(status_msg) # 更新结果文本框 self.result_text.config(state=tk.NORMAL) self.result_text.insert( tk.END, f"\n{status_msg}\n" f"结果已保存到: {output_path}\n" ) self.result_text.see(tk.END) self.result_text.config(state=tk.DISABLED) messagebox.showinfo("完成", status_msg) except Exception as e: error_msg = f"处理文件时出错: {str(e)}" logger.exception(f"处理文件时出错: {str(e)}") messagebox.showerror("错误", error_msg) self.status_var.set(f"错误: {str(e)}") # 更新结果文本框 self.result_text.config(state=tk.NORMAL) self.result_text.insert(tk.END, f"\n错误: {error_msg}\n") self.result_text.see(tk.END) self.result_text.config(state=tk.DISABLED) def view_log(self): """查看日志""" log_window = tk.Toplevel(self.root) log_window.title("处理日志") log_window.geometry("800x600") log_frame = ttk.Frame(log_window, padding="10") log_frame.pack(fill=tk.BOTH, expand=True) # 日志文本框 log_text = scrolledtext.ScrolledText( log_frame, wrap=tk.WORD, height=30 ) log_text.pack(fill=tk.BOTH, expand=True) # 读取日志文件 log_file = 'scl_processor.log' try: if not os.path.exists(log_file): with open(log_file, 'w', encoding='utf-8') as f: f.write("日志文件已创建,暂无记录\n") with open(log_file, 'r', encoding='utf-8') as f: log_content = f.read() log_text.insert(tk.END, log_content) except Exception as e: log_text.insert(tk.END, f"无法读取日志文件: {str(e)}") # 设置为只读 log_text.config(state=tk.DISABLED) # 添加刷新按钮 refresh_btn = ttk.Button(log_frame, text="刷新日志", command=lambda: self.refresh_log(log_text)) refresh_btn.pack(pady=5) logger.info("日志查看窗口已打开") def refresh_log(self, log_text): """刷新日志内容""" log_text.config(state=tk.NORMAL) log_text.delete(1.0, tk.END) try: with open('scl_processor.log', 'r', encoding='utf-8') as f: log_content = f.read() log_text.insert(tk.END, log_content) except Exception as e: log_text.insert(tk.END, f"刷新日志失败: {str(e)}") log_text.config(state=tk.DISABLED) log_text.see(tk.END) logger.info("日志已刷新") def export_config(self): """导出配置到文件""" config = { "prefix": self.prefix_var.get(), "log_level": self.log_level_var.get(), "operation_mode": self.operation_mode.get(), "checksheet_path": self.checksheet_path_var.get() } file_path = filedialog.asksaveasfilename( defaultextension=".json", filetypes=[("JSON 文件", "*.json"), ("所有文件", "*.*")] ) if file_path: try: with open(file_path, 'w', encoding='utf-8') as f: f.write(str(config)) messagebox.showinfo("成功", f"配置已导出到: {file_path}") logger.info(f"配置已导出到: {file_path}") except Exception as e: messagebox.showerror("错误", f"导出配置失败: {str(e)}") logger.error(f"导出配置失败: {str(e)}") def load_config(self): """从文件加载配置""" file_path = filedialog.askopenfilename( filetypes=[("JSON 文件", "*.json"), ("所有文件", "*.*")] ) if file_path: try: with open(file_path, 'r', encoding='utf-8') as f: config = eval(f.read()) self.prefix_var.set(config.get("prefix", "SCL_")) self.log_level_var.set(config.get("log_level", "INFO")) self.operation_mode.set(config.get("operation_mode", "stats")) self.checksheet_path_var.set(config.get("checksheet_path", "")) self.change_log_level() messagebox.showinfo("成功", "配置已加载") logger.info(f"配置已从 {file_path} 加载") except Exception as e: messagebox.showerror("错误", f"加载配置失败: {str(e)}") logger.error(f"加载配置失败: {str(e)}") if __name__ == "__main__": root = tk.Tk() app = SCLMultiProcessor(root) root.mainloop() 把刚刚的规则加入到程序中,给出完整代码
08-07
#!/usr/bin/env python # -*- coding: utf-8 -*- import os import re import sys import argparse import xlwt import locale from collections import defaultdict # 设置系统默认编码 if sys.version_info[0] < 3: reload(sys) sys.setdefaultencoding('utf-8') # 分区名称映射表(文件名 → 友好名称) PARTITION_NAME_MAP = { 'system.txt': 'system', 'system_ext.txt': 'system_ext', 'vendor.txt': 'vendor', 'product.txt': 'product', 'odm.txt': 'odm', 'my_product.txt': 'my_product', 'my_engineering.txt': 'my_engineering', 'my_stock.txt': 'my_stock', 'my_heytap.txt': 'my_heytap', 'my_company.txt': 'my_company', 'my_carrier.txt': 'my_carrier', 'my_region.txt': 'my_region', 'my_preload.txt': 'my_preload', 'my_manifest.txt': 'my_manifest', 'data.txt': 'data', 'my_bigball.txt': 'my_bigball', 'vendor_dlkm.txt': 'vendor_dlkm', 'system_dlkm.txt': 'system_dlkm' } # 分区显示顺序(按重要性排序) PARTITION_ORDER = [ 'system', 'system_ext', 'vendor', 'product', 'odm', 'my_product', 'my_engineering', 'my_stock', 'my_heytap', 'my_company', 'my_carrier', 'my_region', 'my_preload', 'my_manifest', 'my_bigball', 'vendor_dlkm', 'system_dlkm' ] def to_unicode(obj): """将对象转换为Unicode字符串""" if isinstance(obj, unicode): return obj if isinstance(obj, str): try: return obj.decode('utf-8') except UnicodeDecodeError: try: return obj.decode(locale.getpreferredencoding()) except: return obj.decode('utf-8', 'ignore') return unicode(obj) def parse_du_file(file_path): """ 解析du命令输出文件 修改:将输入值从KB转换为MB """ data = {} try: with open(file_path, 'r') as f: for line in f: # 跳过错误行和空行 if 'Permission denied' in line or 'No such file' in line or not line.strip(): continue # 使用灵活匹配模式处理不同格式(支持制表符) match = re.match(r'(\d+\.?\d*)[\s\t]+([^\s].*)', line.strip()) if match: size_kb, path = match.groups() size_kb = float(size_kb) size_mb = size_kb / 1024.0 # KB 转 MB # 路径规范化处理 path = path.strip() if path.startswith('y_'): path = 'my_' + path[2:] # 修复y_bigball → my_bigball path = os.path.normpath(path).replace('\\', '/') data[to_unicode(path)] = round(size_mb, 4) except IOError as e: print(u"警告: 无法读取文件 {}: {}".format(file_path, str(e))) return data def get_partition_name(filename): """从文件名获取分区名称""" return PARTITION_NAME_MAP.get(filename, filename.replace('.txt', '')) def is_main_partition_file(filename): """检查是否为主分区文件""" return filename in PARTITION_NAME_MAP def generate_dual_report(folder1, folder2, output_xlsx): """生成双机对比报告""" folder1_name = to_unicode(os.path.basename(os.path.normpath(folder1))) folder2_name = to_unicode(os.path.basename(os.path.normpath(folder2))) for folder in [folder1, folder2]: if not os.path.exists(folder): print(u"错误: 目录不存在 - {}".format(folder)) return u"目录 {} 不存在,请检查路径".format(folder) if not os.path.isdir(folder): print(u"错误: 路径不是目录 - {}".format(folder)) return u"{} 不是有效目录".format(folder) # 初始化数据结构 machine1_main_data = {} machine2_main_data = {} machine1_all_files = defaultdict(dict) machine2_all_files = defaultdict(dict) # 收集数据 for folder_path, main_dict, all_dict in [ (folder1, machine1_main_data, machine1_all_files), (folder2, machine2_main_data, machine2_all_files) ]: print(u"处理目录: {}".format(folder_path)) try: for filename in os.listdir(folder_path): if not filename.endswith('.txt'): continue file_path = os.path.join(folder_path, filename) filename_unicode = to_unicode(filename) partition_name = get_partition_name(filename_unicode) file_data = parse_du_file(file_path) all_dict[filename_unicode] = file_data if is_main_partition_file(filename_unicode): print(u"解析主分区文件: {}".format(file_path)) main_dict[filename_unicode] = file_data except OSError as e: print(u"目录访问错误: {}".format(str(e))) return u"无法访问目录 {}: {}".format(folder_path, str(e)) # 创建Excel工作簿 try: wb = xlwt.Workbook(encoding='utf-8') # 定义样式 header_style = xlwt.easyxf('font: bold on') title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre') normal_style = xlwt.easyxf() added_style = xlwt.easyxf('pattern: pattern solid, fore_colour light_green;') removed_style = xlwt.easyxf('pattern: pattern solid, fore_colour rose;') summary_style = xlwt.easyxf('font: bold on, color blue;') wrap_style = xlwt.easyxf('align: wrap on, vert centre') # ====== 创建总览Sheet页 ====== ws_overview = wb.add_sheet(u'总览') print(u"创建总览Sheet页(仅主文件数据)") current_row = 0 # 写入总览标题 ws_overview.write_merge( current_row, current_row, 0, 5, u"存储使用总览 (单位: MB)", title_style ) current_row += 1 # 写入文件夹名称 ws_overview.write(current_row, 1, folder1_name, header_style) ws_overview.write(current_row, 2, folder2_name, header_style) current_row += 1 # 写入表头(增加备注列) headers = [u'分区', u'总大小(MB)', u'总大小(MB)', u'差值(MB)', u'标记', u'备注(变化TOP5,>1MB)'] for col, header in enumerate(headers): ws_overview.write(current_row, col, header, header_style) current_row += 1 # 存储各分区汇总数据 overview_data = [] total_machine1 = 0.0 total_machine2 = 0.0 # 按分区顺序处理数据 for partition in PARTITION_ORDER: # 查找对应的文件名 filename = None for fname in machine1_main_data: if get_partition_name(fname) == partition: filename = fname break if not filename: continue # 获取主文件数据 data1 = machine1_main_data.get(filename, {}) data2 = machine2_main_data.get(filename, {}) # 关键修改:只取分区根目录大小(避免重复计算) partition_total1 = data1.get(partition, 0.0) partition_total2 = data2.get(partition, 0.0) diff = partition_total1 - partition_total2 # 更新总计 total_machine1 += partition_total1 total_machine2 += partition_total2 # 确定标记样式 if diff > 0: mark = u"增加" style = added_style elif diff < 0: mark = u"减少" style = removed_style else: mark = u"无变化" style = normal_style # 计算分区中变化大于1MB的TOP5路径(包括增大和减小) path_diffs = [] all_paths = set(data1.keys()) | set(data2.keys()) for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) path_diff = size1 - size2 # 只记录变化量大于1MB的路径 if abs(path_diff) > 1.0: path_diffs.append((path, path_diff)) # 按变化量绝对值降序排序,取TOP5 path_diffs.sort(key=lambda x: abs(x[1]), reverse=True) top_notes = [] for i, (path, diff_val) in enumerate(path_diffs[:5]): # 截断过长的路径名 if len(path) > 50: path = u"..." + path[-47:] # 添加符号表示变化方向 sign = "+" if diff_val > 0 else "" top_notes.append(u"{}. {}: {}{:.2f}MB".format(i+1, path, sign, diff_val)) # 保存分区数据 overview_data.append({ 'name': partition, 'machine1': partition_total1, 'machine2': partition_total2, 'diff': diff, 'style': style, 'mark': mark, 'notes': u"\n".join(top_notes) if top_notes else u"无显著变化路径" }) # 写入行数据到总览页(增加备注列) ws_overview.write(current_row, 0, to_unicode(partition), style) ws_overview.write(current_row, 1, partition_total1, style) ws_overview.write(current_row, 2, partition_total2, style) ws_overview.write(current_row, 3, diff, style) ws_overview.write(current_row, 4, mark, style) ws_overview.write(current_row, 5, overview_data[-1]['notes'], wrap_style) current_row += 1 # 添加空行 current_row += 1 # 写入总计行 total_diff = total_machine1 - total_machine2 if total_diff > 0: total_mark = u"总增加" total_style = added_style elif total_diff < 0: total_mark = u"总减少" total_style = removed_style else: total_mark = u"无变化" total_style = normal_style ws_overview.write(current_row, 0, u"总计", header_style) ws_overview.write(current_row, 1, total_machine1, header_style) ws_overview.write(current_row, 2, total_machine2, header_style) ws_overview.write(current_row, 3, total_diff, header_style) ws_overview.write(current_row, 4, total_mark, header_style) ws_overview.write(current_row, 5, u"", header_style) # 备注列留空 # 设置备注列宽度(100字符) ws_overview.col(5).width = 256 * 100 # ====== 为每个文件创建单独的Sheet页 ====== # 获取所有唯一的文件名(两个文件夹的并集) all_filenames = sorted(set(machine1_all_files.keys()) | set(machine2_all_files.keys())) for filename in all_filenames: # 获取分区名称 partition_name = get_partition_name(filename) # 创建Sheet页名称(文件名不带扩展名) sheet_name = filename.replace('.txt', '') if len(sheet_name) > 31: # Excel sheet名称长度限制 sheet_name = sheet_name[:31] # 创建Sheet页 ws = wb.add_sheet(sheet_name) print(u"创建文件Sheet页: {}".format(sheet_name)) # 当前行指针 current_row = 0 # 写入分区标题 title = u"分区: {} - 文件: {} (单位: MB)".format(partition_name, filename) ws.write_merge( current_row, current_row, 0, 5, title, title_style ) current_row += 1 # 写入文件夹名称 ws.write_merge(current_row, current_row, 0, 1, folder1_name, header_style) ws.write_merge(current_row, current_row, 2, 3, folder2_name, header_style) ws.write(current_row, 4, u"差异(MB)", header_style) ws.write(current_row, 5, u"标记", header_style) current_row += 1 # 写入表头 headers = [u'路径', u'大小(MB)', u'路径', u'大小(MB)', u'差异(MB)', u'标记'] for col, header in enumerate(headers): ws.write(current_row, col, header, header_style) current_row += 1 # 获取文件数据 data1 = machine1_all_files.get(filename, {}) data2 = machine2_all_files.get(filename, {}) # 获取所有路径(合并两个文件夹的路径) all_paths = sorted(set(data1.keys()) | set(data2.keys())) # 初始化变化统计数据 total_increase = 0.0 # 增大总和 total_decrease = 0.0 # 减小总和 total_added = 0.0 # 新增文件总和 total_removed = 0.0 # 去除文件总和 # 写入数据行 for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) # 计算差值 diff = size1 - size2 # 确定标记和样式 if size1 == 0 and size2 > 0: mark = u"除去" cell_style = removed_style total_removed += size2 elif size1 > 0 and size2 == 0: mark = u"新增" cell_style = added_style total_added += size1 else: if diff > 0: mark = u"增大" cell_style = added_style total_increase += diff elif diff < 0: mark = u"减小" cell_style = removed_style total_decrease += abs(diff) else: mark = u"相同" cell_style = normal_style # 写入行数据 # folder1列 if size1 > 0: ws.write(current_row, 0, path, cell_style) ws.write(current_row, 1, size1, cell_style) else: ws.write(current_row, 0, u"", cell_style) ws.write(current_row, 1, u"", cell_style) # folder2列 if size2 > 0: ws.write(current_row, 2, path, cell_style) ws.write(current_row, 3, size2, cell_style) else: ws.write(current_row, 2, u"", cell_style) ws.write(current_row, 3, u"", cell_style) # 差异和标记列 ws.write(current_row, 4, diff, cell_style) ws.write(current_row, 5, mark, cell_style) current_row += 1 # 添加文件汇总行(关键修改:只取分区根目录值) root_path = partition_name file_total1 = data1.get(root_path, 0.0) file_total2 = data2.get(root_path, 0.0) file_diff = file_total1 - file_total2 # 写入汇总行 ws.write(current_row, 0, u"文件汇总", header_style) ws.write(current_row, 1, file_total1, header_style) ws.write(current_row, 2, u"", header_style) ws.write(current_row, 3, file_total2, header_style) ws.write(current_row, 4, file_diff, header_style) ws.write(current_row, 5, u"", header_style) current_row += 1 # 添加变化分类统计行 message = ( u"{partition_name}路径下: " u"减小{total_decrease:.2f}MB " u"增大{total_increase:.2f}MB " u"新增文件{total_added:.2f}MB " u"减少文件{total_removed:.2f}MB" ).format( partition_name=partition_name, total_decrease=total_decrease, total_increase=total_increase, total_added=total_added, total_removed=total_removed ) ws.write_merge( current_row, current_row, 0, 5, message, summary_style ) # 保存文件 wb.save(output_xlsx) return u"对比报告已成功生成: {}".format(output_xlsx) except Exception as e: import traceback traceback.print_exc() return u"生成Excel文件时出错: {}".format(str(e)) def main(): """主函数:从本地目录读取数据并生成报告""" parser = argparse.ArgumentParser(description='设备存储空间对比工具') parser.add_argument('--test_dir', required=True, help='测试机数据目录') parser.add_argument('--ref_dir', required=True, help='对比机数据目录') parser.add_argument('--output', required=True, help='输出Excel文件路径') args = parser.parse_args() # 确保命令行参数正确转换为Unicode test_dir = to_unicode(args.test_dir) ref_dir = to_unicode(args.ref_dir) output = to_unicode(args.output) print(u"=" * 60) print(u"开始对比: 测试机({}) vs 对比机({})".format(test_dir, ref_dir)) print(u"=" * 60) result = generate_dual_report(test_dir, ref_dir, output) print(result) if __name__ == "__main__": main() 总览页的备注列从差异top改成增大大于1M的top5,并且只显示深层路径文件,例如 修改前: "1. my_product: +730.66MB 2. my_product/app: +322.14MB 3. my_product/priv-app: +277.58MB 4. my_product/del-app-pre: +193.83MB 5. my_product/priv-app/GoogleVelvet_CTS: +187.39MB" 修改后: 1. my_product/priv-app/GoogleVelvet_CTS: +187.39MB"
08-12
#!/usr/bin/env python -- coding: utf-8 -- import os import re import sys import argparse import xlwt import locale from collections import defaultdict 设置系统默认编码 if sys.version_info[0] < 3: reload(sys) sys.setdefaultencoding(‘utf-8’) 分区名称映射表(文件名 → 友好名称) PARTITION_NAME_MAP = { ‘system.txt’: ‘system’, ‘system_ext.txt’: ‘system_ext’, ‘vendor.txt’: ‘vendor’, ‘product.txt’: ‘product’, ‘odm.txt’: ‘odm’, ‘my_product.txt’: ‘my_product’, ‘my_engineering.txt’: ‘my_engineering’, ‘my_stock.txt’: ‘my_stock’, ‘my_heytap.txt’: ‘my_heytap’, ‘my_company.txt’: ‘my_company’, ‘my_carrier.txt’: ‘my_carrier’, ‘my_region.txt’: ‘my_region’, ‘my_preload.txt’: ‘my_preload’, ‘my_manifest.txt’: ‘my_manifest’, ‘data.txt’: ‘data’, ‘my_bigball.txt’: ‘my_bigball’, ‘vendor_dlkm.txt’: ‘vendor_dlkm’, ‘system_dlkm.txt’: ‘system_dlkm’ } 分区显示顺序(按重要性排序) PARTITION_ORDER = [ ‘system’, ‘system_ext’, ‘vendor’, ‘product’, ‘odm’, ‘my_product’, ‘my_engineering’, ‘my_stock’, ‘my_heytap’, ‘my_company’, ‘my_carrier’, ‘my_region’, ‘my_preload’, ‘my_manifest’, ‘my_bigball’, ‘vendor_dlkm’, ‘system_dlkm’ ] def to_unicode(obj): “”“将对象转换为Unicode字符串”“” if isinstance(obj, unicode): return obj if isinstance(obj, str): try: return obj.decode(‘utf-8’) except UnicodeDecodeError: try: return obj.decode(locale.getpreferredencoding()) except: return obj.decode(‘utf-8’, ‘ignore’) return unicode(obj) def parse_du_file(file_path): “”" 解析du命令输出文件 修改:将输入值从KB转换为MB “”" data = {} try: with open(file_path, ‘r’) as f: for line in f: # 跳过错误行和空行 if ‘Permission denied’ in line or ‘No such file’ in line or not line.strip(): continue # 使用灵活匹配模式处理不同格式(支持制表符) match = re.match(r'(\d+\.?\d*)[\s\t]+([^\s].*)', line.strip()) if match: size_kb, path = match.groups() size_kb = float(size_kb) size_mb = size_kb / 1024.0 # KB 转 MB # 路径规范化处理 path = path.strip() if path.startswith('y_'): path = 'my_' + path[2:] # 修复y_bigball → my_bigball path = os.path.normpath(path).replace('\\', '/') data[to_unicode(path)] = round(size_mb, 4) except IOError as e: print(u"警告: 无法读取文件 {}: {}".format(file_path, str(e))) return data def get_partition_name(filename): “”“从文件名获取分区名称”“” return PARTITION_NAME_MAP.get(filename, filename.replace(‘.txt’, ‘’)) def is_main_partition_file(filename): “”“检查是否为主分区文件”“” return filename in PARTITION_NAME_MAP def generate_dual_report(folder1, folder2, output_xlsx): “”“生成双机对比报告”“” folder1_name = to_unicode(os.path.basename(os.path.normpath(folder1))) folder2_name = to_unicode(os.path.basename(os.path.normpath(folder2))) for folder in [folder1, folder2]: if not os.path.exists(folder): print(u"错误: 目录不存在 - {}".format(folder)) return u"目录 {} 不存在,请检查路径".format(folder) if not os.path.isdir(folder): print(u"错误: 路径不是目录 - {}".format(folder)) return u"{} 不是有效目录".format(folder) # 初始化数据结构 machine1_main_data = {} machine2_main_data = {} machine1_all_files = defaultdict(dict) machine2_all_files = defaultdict(dict) # 收集数据 for folder_path, main_dict, all_dict in [ (folder1, machine1_main_data, machine1_all_files), (folder2, machine2_main_data, machine2_all_files) ]: print(u"处理目录: {}".format(folder_path)) try: for filename in os.listdir(folder_path): if not filename.endswith('.txt'): continue file_path = os.path.join(folder_path, filename) filename_unicode = to_unicode(filename) partition_name = get_partition_name(filename_unicode) file_data = parse_du_file(file_path) all_dict[filename_unicode] = file_data if is_main_partition_file(filename_unicode): print(u"解析主分区文件: {}".format(file_path)) main_dict[filename_unicode] = file_data except OSError as e: print(u"目录访问错误: {}".format(str(e))) return u"无法访问目录 {}: {}".format(folder_path, str(e)) # 创建Excel工作簿 try: wb = xlwt.Workbook(encoding='utf-8') # 定义样式 header_style = xlwt.easyxf('font: bold on') title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre') normal_style = xlwt.easyxf() added_style = xlwt.easyxf('pattern: pattern solid, fore_colour light_green;') removed_style = xlwt.easyxf('pattern: pattern solid, fore_colour rose;') summary_style = xlwt.easyxf('font: bold on, color blue;') wrap_style = xlwt.easyxf('align: wrap on, vert centre') # ====== 创建总览Sheet页 ====== ws_overview = wb.add_sheet(u'总览') print(u"创建总览Sheet页(仅主文件数据)") current_row = 0 # 写入总览标题 ws_overview.write_merge( current_row, current_row, 0, 5, u"存储使用总览 (单位: MB)", title_style ) current_row += 1 # 写入文件夹名称 ws_overview.write(current_row, 1, folder1_name, header_style) ws_overview.write(current_row, 2, folder2_name, header_style) current_row += 1 # 写入表头(增加备注列) headers = [u'分区', u'总大小(MB)', u'总大小(MB)', u'差值(MB)', u'标记', u'备注(变化TOP5,>1MB)'] for col, header in enumerate(headers): ws_overview.write(current_row, col, header, header_style) current_row += 1 # 存储各分区汇总数据 overview_data = [] total_machine1 = 0.0 total_machine2 = 0.0 # 按分区顺序处理数据 for partition in PARTITION_ORDER: # 查找对应的文件名 filename = None for fname in machine1_main_data: if get_partition_name(fname) == partition: filename = fname break if not filename: continue # 获取主文件数据 data1 = machine1_main_data.get(filename, {}) data2 = machine2_main_data.get(filename, {}) # 关键修改:只取分区根目录大小(避免重复计算) partition_total1 = data1.get(partition, 0.0) partition_total2 = data2.get(partition, 0.0) diff = partition_total1 - partition_total2 # 更新总计 total_machine1 += partition_total1 total_machine2 += partition_total2 # 确定标记样式 if diff > 0: mark = u"增加" style = added_style elif diff < 0: mark = u"减少" style = removed_style else: mark = u"无变化" style = normal_style # 计算分区中变化大于1MB的TOP5路径(包括增大和减小) path_diffs = [] all_paths = set(data1.keys()) | set(data2.keys()) for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) path_diff = size1 - size2 # 只记录变化量大于1MB的路径 if abs(path_diff) > 1.0: path_diffs.append((path, path_diff)) # 按变化量绝对值降序排序,取TOP5 path_diffs.sort(key=lambda x: abs(x[1]), reverse=True) top_notes = [] for i, (path, diff_val) in enumerate(path_diffs[:5]): # 截断过长的路径名 if len(path) > 50: path = u"..." + path[-47:] # 添加符号表示变化方向 sign = "+" if diff_val > 0 else "" top_notes.append(u"{}. {}: {}{:.2f}MB".format(i+1, path, sign, diff_val)) # 保存分区数据 overview_data.append({ 'name': partition, 'machine1': partition_total1, 'machine2': partition_total2, 'diff': diff, 'style': style, 'mark': mark, 'notes': u"\n".join(top_notes) if top_notes else u"无显著变化路径" }) # 写入行数据到总览页(增加备注列) ws_overview.write(current_row, 0, to_unicode(partition), style) ws_overview.write(current_row, 1, partition_total1, style) ws_overview.write(current_row, 2, partition_total2, style) ws_overview.write(current_row, 3, diff, style) ws_overview.write(current_row, 4, mark, style) ws_overview.write(current_row, 5, overview_data[-1]['notes'], wrap_style) current_row += 1 # 添加空行 current_row += 1 # 写入总计行 total_diff = total_machine1 - total_machine2 if total_diff > 0: total_mark = u"总增加" total_style = added_style elif total_diff < 0: total_mark = u"总减少" total_style = removed_style else: total_mark = u"无变化" total_style = normal_style ws_overview.write(current_row, 0, u"总计", header_style) ws_overview.write(current_row, 1, total_machine1, header_style) ws_overview.write(current_row, 2, total_machine2, header_style) ws_overview.write(current_row, 3, total_diff, header_style) ws_overview.write(current_row, 4, total_mark, header_style) ws_overview.write(current_row, 5, u"", header_style) # 备注列留空 # 设置备注列宽度(100字符) ws_overview.col(5).width = 256 * 100 # ====== 为每个文件创建单独的Sheet页 ====== # 获取所有唯一的文件名(两个文件夹的并集) all_filenames = sorted(set(machine1_all_files.keys()) | set(machine2_all_files.keys())) for filename in all_filenames: # 获取分区名称 partition_name = get_partition_name(filename) # 创建Sheet页名称(文件名不带扩展名) sheet_name = filename.replace('.txt', '') if len(sheet_name) > 31: # Excel sheet名称长度限制 sheet_name = sheet_name[:31] # 创建Sheet页 ws = wb.add_sheet(sheet_name) print(u"创建文件Sheet页: {}".format(sheet_name)) # 当前行指针 current_row = 0 # 写入分区标题 title = u"分区: {} - 文件: {} (单位: MB)".format(partition_name, filename) ws.write_merge( current_row, current_row, 0, 5, title, title_style ) current_row += 1 # 写入文件夹名称 ws.write_merge(current_row, current_row, 0, 1, folder1_name, header_style) ws.write_merge(current_row, current_row, 2, 3, folder2_name, header_style) ws.write(current_row, 4, u"差异(MB)", header_style) ws.write(current_row, 5, u"标记", header_style) current_row += 1 # 写入表头 headers = [u'路径', u'大小(MB)', u'路径', u'大小(MB)', u'差异(MB)', u'标记'] for col, header in enumerate(headers): ws.write(current_row, col, header, header_style) current_row += 1 # 获取文件数据 data1 = machine1_all_files.get(filename, {}) data2 = machine2_all_files.get(filename, {}) # 获取所有路径(合并两个文件夹的路径) all_paths = sorted(set(data1.keys()) | set(data2.keys())) # 初始化变化统计数据 total_increase = 0.0 # 增大总和 total_decrease = 0.0 # 减小总和 total_added = 0.0 # 新增文件总和 total_removed = 0.0 # 去除文件总和 # 写入数据行 for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) # 计算差值 diff = size1 - size2 # 确定标记和样式 if size1 == 0 and size2 > 0: mark = u"除去" cell_style = removed_style total_removed += size2 elif size1 > 0 and size2 == 0: mark = u"新增" cell_style = added_style total_added += size1 else: if diff > 0: mark = u"增大" cell_style = added_style total_increase += diff elif diff < 0: mark = u"减小" cell_style = removed_style total_decrease += abs(diff) else: mark = u"相同" cell_style = normal_style # 写入行数据 # folder1列 if size1 > 0: ws.write(current_row, 0, path, cell_style) ws.write(current_row, 1, size1, cell_style) else: ws.write(current_row, 0, u"", cell_style) ws.write(current_row, 1, u"", cell_style) # folder2列 if size2 > 0: ws.write(current_row, 2, path, cell_style) ws.write(current_row, 3, size2, cell_style) else: ws.write(current_row, 2, u"", cell_style) ws.write(current_row, 3, u"", cell_style) # 差异和标记列 ws.write(current_row, 4, diff, cell_style) ws.write(current_row, 5, mark, cell_style) current_row += 1 # 添加文件汇总行(关键修改:只取分区根目录值) root_path = partition_name file_total1 = data1.get(root_path, 0.0) file_total2 = data2.get(root_path, 0.0) file_diff = file_total1 - file_total2 # 写入汇总行 ws.write(current_row, 0, u"文件汇总", header_style) ws.write(current_row, 1, file_total1, header_style) ws.write(current_row, 2, u"", header_style) ws.write(current_row, 3, file_total2, header_style) ws.write(current_row, 4, file_diff, header_style) ws.write(current_row, 5, u"", header_style) current_row += 1 # 添加变化分类统计行 message = ( u"{partition_name}路径下: " u"减小{total_decrease:.2f}MB " u"增大{total_increase:.2f}MB " u"新增文件{total_added:.2f}MB " u"减少文件{total_removed:.2f}MB" ).format( partition_name=partition_name, total_decrease=total_decrease, total_increase=total_increase, total_added=total_added, total_removed=total_removed ) ws.write_merge( current_row, current_row, 0, 5, message, summary_style ) # 保存文件 wb.save(output_xlsx) return u"对比报告已成功生成: {}".format(output_xlsx) except Exception as e: import traceback traceback.print_exc() return u"生成Excel文件时出错: {}".format(str(e)) def main(): “”“主函数:从本地目录读取数据并生成报告”“” parser = argparse.ArgumentParser(description=‘设备存储空间对比工具’) parser.add_argument(‘–test_dir’, required=True, help=‘测试机数据目录’) parser.add_argument(‘–ref_dir’, required=True, help=‘对比机数据目录’) parser.add_argument(‘–output’, required=True, help=‘输出Excel文件路径’) args = parser.parse_args() # 确保命令行参数正确转换为Unicode test_dir = to_unicode(args.test_dir) ref_dir = to_unicode(args.ref_dir) output = to_unicode(args.output) print(u"=" * 60) print(u"开始对比: 测试机({}) vs 对比机({})".format(test_dir, ref_dir)) print(u"=" * 60) result = generate_dual_report(test_dir, ref_dir, output) print(result) if name == “main”: main() 总览页的备注列从差异top改成增大大于1M的top5,并且只显示深层路径文件,例如 修改前: “1. my_product: +730.66MB 2. my_product/app: +322.14MB 3. my_product/priv-app: +277.58MB 4. my_product/del-app-pre: +193.83MB 5. my_product/priv-app/GoogleVelvet_CTS: +187.39MB” 修改后: my_product/priv-app/GoogleVelvet_CTS: +187.39MB"数据修改会全部脚本内容
08-13
标题基于Spring Boot的音乐播放网站设计与实现研究AI更换标题第1章引言介绍音乐播放网站的研究背景、意义、国内外现状及论文方法与创新点。1.1研究背景与意义阐述音乐播放网站在当今数字化时代的重要性与市场需求。1.2国内外研究现状分析国内外音乐播放网站的发展现状及技术特点。1.3研究方法以及创新点概述论文采用的研究方法及在设计与实现上的创新点。第2章相关理论与技术基础总结音乐播放网站设计与实现所需的相关理论和技术。2.1Spring Boot框架介绍介绍Spring Boot框架的基本原理、特点及其在Web开发中的应用。2.2音乐播放技术概述概述音乐播放的基本原理、流媒体技术及音频处理技术。2.3数据库技术选型分析适合音乐播放网站的数据库技术,如MySQL、MongoDB等。第3章系统设计详细介绍音乐播放网站的整体设计方案。3.1系统架构设计阐述系统的层次结构、模块划分及各模块的功能。3.2数据库设计介绍数据库表结构、关系及数据存储方式。3.3界面设计用户界面的设计原则、布局及交互方式。第4章系统实现详细介绍音乐播放网站的具体实现过程。4.1开发环境与工具介绍开发所需的软件、硬件环境及开发工具。4.2核心功能实现阐述音乐播放、搜索、推荐等核心功能的实现细节。4.3系统测试与优化介绍系统测试的方法、过程及性能优化策略。第5章研究结果与分析呈现音乐播放网站设计与实现的研究结果。5.1系统功能测试结果展示系统各项功能的测试结果,包括功能完整性、稳定性等。5.2用户反馈与评价收集并分析用户对音乐播放网站的使用反馈与评价。5.3对比方法分析将本设计与实现与其他类似系统进行对比分析,突出优势与不足。第6章结论与展望总结音乐播放网站设计与实现的研究成果,并展望未来发展方向。6.1研究结论概括音乐播放网站设计与实现的主要成果及创新点。6.2展望指出当前研究的不足,提出未来改进方向及可
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

pigerr杨

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值