cell去重 覆盖代码

是不是碰到过 Cell控件UILabel   UITextField  文字会重复叠加出现   多次进入改页面之后你会发现原本灰色字体,变成黑色字体了

那是因为重复alloc了那些控件导致此问题出现;

简单

一句话去重

if (cell.contentView.subviews.count == 0 || cell.contentView.subviews == nil)

import os import glob import zipfile from openpyxl import load_workbook, Workbook from openpyxl.drawing.image import Image from openpyxl.drawing.spreadsheet_drawing import AnchorMarker, TwoCellAnchor, OneCellAnchor from openpyxl.styles import PatternFill, Border, Side, Alignment, Font, colors from openpyxl.utils import get_column_letter import copy import re def is_valid_excel_file(file_path): """检查是否为有效的Excel文件""" try: with zipfile.ZipFile(file_path, 'r') as zip_ref: required_files = ['xl/workbook.xml', 'xl/styles.xml', 'xl/worksheets/'] return all(name in zip_ref.namelist() for name in required_files[:-1]) except zipfile.BadZipFile: return False except Exception: return False def copy_cell_style(src_cell, dest_cell): """复制单元格样式""" if src_cell.font: dest_cell.font = copy.copy(src_cell.font) if src_cell.fill: dest_cell.fill = copy.copy(src_cell.fill) if src_cell.border: dest_cell.border = copy.copy(src_cell.border) if src.cell.alignment: dest_cell.alignment = copy.copy(src_cell.alignment) if src_cell.number_format: dest_cell.number_format = src_cell.number_format def find_passport_column(ws): """ 查找包含'passport'关键字的列(不区分大小写) 返回列字母和列索引(从1开始) """ # 检查第一行(通常是标题行) for cell in ws[1]: if cell.value and re.search(r'passport', str(cell.value), re.IGNORECASE): return cell.column_letter, cell.column # 如果第一行没找到,检查所有单元格 for row in ws.iter_rows(min_row=1, max_row=min(10, ws.max_row)): for cell in row: if cell.value and re.search(r'passport', str(cell.value), re.IGNORECASE): return cell.column_letter, cell.column # 仍未找到,默认第一列 return 'A', 1 def create_header_style(): """创建第一列的标题样式""" return ( Font(bold=True, color=colors.WHITE), PatternFill(start_color="4F81BD", end_color="4F81BD", fill_type="solid"), Alignment(horizontal='center', vertical='center') ) def merge_excel_with_deduplication(input_dir, output_file): """ 合并Excel文件并基于Passport字段 保留格式和图片,第一列添加文件名和工作表名 """ # 创建新工作簿 wb_output = Workbook() ws_output = wb_output.active ws_output.title = "Merged Deduplicated" # 创建第一列标题的样式 header_font, header_fill, header_alignment = create_header_style() ws_output.column_dimensions['A'].width = 40 # 存储所有已处理的Passport值 seen_passports = set() # 存储行数据和图片信息:(passport_value, row_data, images_list, has_image) all_rows = [] # 存储列名映射关系 column_mapping = {} # 第一阶段:收集所有数据并进行预处理 try: for excel_file in glob.glob(os.path.join(input_dir, "*.xlsx")): file_name = os.path.basename(excel_file) if not is_valid_excel_file(excel_file): print(f"跳过无效文件: {file_name}") continue print(f"处理文件: {file_name}") try: wb_source = load_workbook(excel_file, keep_vba=False, data_only=True) except Exception as e: print(f" 加载失败: {str(e)}") continue for ws_name in wb_source.sheetnames: print(f" 处理工作表: {ws_name}") ws_source = wb_source[ws_name] # 1. 查找Passport列 passport_col_letter, passport_col_idx = find_passport_column(ws_source) print(f" 检测到Passport列: {passport_col_letter}") # 2. 收集图片信息 image_rows = {} if hasattr(ws_source, '_images'): for img in ws_source._images: # 获取图片所在行 if isinstance(img.anchor, (TwoCellAnchor, OneCellAnchor)): anchor = img.anchor img_row = anchor._from.row if hasattr(anchor, '_from') else anchor.row if img_row not in image_rows: image_rows[img_row] = [] image_rows[img_row].append(img) # 3. 处理每一行数据 for row_idx, row in enumerate(ws_source.iter_rows(), 1): passport_cell = ws_source.cell(row=row_idx, column=passport_col_idx) passport_value = str(passport_cell.value).strip() if passport_cell.value else "" # 跳过空Passport行 if not passport_value: continue has_image = row_idx in image_rows # 收集行数据 row_data = [] for cell in row: cell_data = { 'value': cell.value, 'font': copy.copy(cell.font), 'fill': copy.copy(cell.fill), 'border': copy.copy(cell.border), 'alignment': copy.copy(cell.alignment), 'number_format': cell.number_format } row_data.append(cell_data) # 记录列名映射(仅第一行) if row_idx == 1: for col_idx, cell in enumerate(row, 1): col_name = cell.value if cell.value else f"Column{col_idx}" column_mapping[col_idx] = col_name # 添加文件信息 source_info = f"{file_name}-{ws_name}" # 添加到所有行集合 all_rows.append({ 'passport': passport_value, 'source_info': source_info, 'row_data': row_data, 'images': image_rows.get(row_idx, []), 'has_image': has_image, 'row_idx': row_idx, 'file': file_name, 'sheet': ws_name }) except Exception as e: print(f"收集数据时出错: {str(e)}") raise # 第二阶段:基于Passport print("\n开始处理...") passport_groups = {} for row in all_rows: passport = row['passport'] if passport not in passport_groups: passport_groups[passport] = [] passport_groups[passport].append(row) # 按规则选择保留的行 dedup_rows = [] for passport, rows in passport_groups.items(): # 查找有图片的行 rows_with_images = [r for r in rows if r['has_image']] if rows_with_images: # 保留第一个有图片的行 dedup_rows.append(rows_with_images[0]) if len(rows_with_images) > 1: print(f" Passport {passport}: 保留有图片的第一行(共有 {len(rows_with_images)} 行含图片)") else: # 保留第一行 dedup_rows.append(rows[0]) print(f" Passport {passport}: 保留第一行(无图片行)") print(f"完成: 原始 {len(all_rows)} 行 -> 后 {len(dedup_rows)} 行") # 第三阶段:写入输出文件 current_row = 1 image_offset = 0 # 图片行偏移量 # 写入标题行 ws_output.cell(row=current_row, column=1, value="Source").font = header_font ws_output.cell(row=current_row, column=2, value="Passport").font = header_font for col_idx, col_name in column_mapping.items(): ws_output.cell(row=current_row, column=col_idx+2, value=col_name).font = header_font current_row += 1 # 写入数据行 for row_data in dedup_rows: # 添加源信息列 source_cell = ws_output.cell(row=current_row, column=1) source_cell.value = row_data['source_info'] source_cell.font = header_font source_cell.fill = header_fill source_cell.alignment = header_alignment # 添加Passport列 passport_cell = ws_output.cell(row=current_row, column=2) passport_cell.value = row_data['passport'] # 添加其他列数据 for col_idx, cell_data in enumerate(row_data['row_data'], 1): dest_cell = ws_output.cell(row=current_row, column=col_idx+2) dest_cell.value = cell_data['value'] # 应用样式 if cell_data['font']: dest_cell.font = cell_data['font'] if cell_data['fill']: dest_cell.fill = cell_data['fill'] if cell_data['border']: dest_cell.border = cell_data['border'] if cell_data['alignment']: dest_cell.alignment = cell_data['alignment'] if cell_data.get('number_format'): dest_cell.number_format = cell_data['number_format'] # 处理图片 if row_data['has_image']: for img in row_data['images']: img_copy = copy.deepcopy(img) anchor = img_copy.anchor # 调整锚点位置 if isinstance(anchor, TwoCellAnchor): new_anchor = TwoCellAnchor( _from=AnchorMarker( col=anchor._from.col + 2, # 新增了两列 row=anchor._from.row + current_row - row_data['row_idx'] - 1, colOff=anchor._from.colOff, rowOff=anchor._from.rowOff ), to=AnchorMarker( col=anchor.to.col + 2, # 新增了两列 row=anchor.to.row + current_row - row_data['row_idx'] - 1, colOff=anchor.to.colOff, rowOff=anchor.to.rowOff ) ) img_copy.anchor = new_anchor elif isinstance(anchor, OneCellAnchor): new_anchor = OneCellAnchor( _from=AnchorMarker( col=anchor._from.col + 2, # 新增了两列 row=anchor._from.row + current_row - row_data['row_idx'] - 1, colOff=anchor._from.colOff, rowOff=anchor._from.rowOff ) ) img_copy.anchor = new_anchor # 添加图片 ws_output.add_image(img_copy) current_row += 1 # 保存结果 try: wb_output.save(output_file) print(f"\n合并完成! 输出文件: {output_file}") print(f"原始行数: {len(all_rows)}, 后行数: {len(dedup_rows)}") except Exception as e: print(f"保存文件时出错: {str(e)}") raise if __name__ == "__main__": input_directory = "./input_excels" # Excel文件所在目录 output_filename = "merged_deduplicated.xlsx" merge_excel_with_deduplication(input_directory, output_filename) 这个代码图片图片会错位和缺失,修正下
10-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值