228. Summary Ranges

class Solution {
public:
    vector<string> summaryRanges(vector<int>& nums) {
        int n=nums.size(),i;
        vector<string> ranges;
        if(n==0)
            return ranges;
            
        string tmp="";
        int cnt=0;
        for(i=0;i<n;)
        {
            if(tmp=="")
            {
                tmp=to_string(nums[i]);
                cnt=1;
            }
            while(i<n-1&&nums[i]+1==nums[i+1])
            {
                cnt++;
                i++;
            }
            if(cnt==1)
                ranges.push_back(tmp);
            else
            {
                tmp+="->";
                tmp+=to_string(nums[i]);
                ranges.push_back(tmp);
            }
            tmp="";
            i++;
        }
        return ranges;
    }
};

由于数据清洗需要,我需要一个python脚本工具批量处理。我编写了代码如下: import os import pandas as pd from openpyxl import load_workbook from openpyxl.utils import get_column_letter from openpyxl.worksheet.merge import MergeCell def clean_excel_files(input_dir, output_dir, summary_keywords, target_content, delete_type='row'): os.makedirs(output_dir, exist_ok=True) for filename in os.listdir(input_dir): if filename.endswith(('.xlsx', '.xls')): input_path = os.path.join(input_dir, filename) output_path = os.path.join(output_dir, filename) wb = load_workbook(input_path) for sheet_name in wb.sheetnames: ws = wb[sheet_name] # 1. 删除汇总行 rows_to_delete = [] for row in ws.iter_rows(): for cell in row: if any(keyword in str(cell.value) for keyword in summary_keywords): rows_to_delete.append(cell.row) break for row_idx in sorted(set(rows_to_delete), reverse=True): ws.delete_rows(row_idx) # 2. 处理目标内容(改进合并单元格处理) if delete_type == 'row': # 存储待删除的行索引(使用集合避免重复) rows_to_delete_target = set() # 处理合并单元格区域 if ws.merged_cells.ranges: merged_ranges_to_remove = [] # 存储需要移除的合并区域 for merged_range in list(ws.merged_cells.ranges): top_left_cell = ws.cell(merged_range.min_row, merged_range.min_col) if top_left_cell.value == target_content: # 记录合并区域覆盖的所有行 for r in range(merged_range.min_row, merged_range.max_row + 1): rows_to_delete_target.add(r) # 标记该合并区域需要移除 merged_ranges_to_remove.append(merged_range) # 移除包含目标内容的合并区域 for merged_range in merged_ranges_to_remove: ws.unmerge_cells(str(merged_range)) # 处理非合并单元格 for row in ws.iter_rows(): for cell in row: # 只处理未在合并区域中处理过的单元格 if cell.value == target_content and cell.row not in rows_to_delete_target: rows_to_delete_target.add(cell.row) # 执行行删除 for row_idx in sorted(rows_to_delete_target, reverse=True): ws.delete_rows(row_idx) else: # 列删除逻辑保持不变 cells_to_delete = [] for row in ws.iter_rows(): for cell in row: if cell.value == target_content: cells_to_delete.append(cell.column) if cells_to_delete: for col_idx in sorted(set(cells_to_delete), reverse=True): ws.delete_cols(col_idx) wb.save(output_path) print(f"文件已处理: {filename}") # 使用示例 if __name__ == "__main__": input_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\input" output_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\cleaned" summary_keywords = ["总计", "合计", "汇总", "小计", "共计", "制表人","深圳市","日期"] target_content = "投资者资金对账" delete_type = "row" clean_excel_files(input_directory, output_directory, summary_keywords, target_content, delete_type) 现在要求先解除合并单元格区域再执行删除操作。请在我的代码基础上进行改进
最新发布
12-17
import os import pandas as pd from openpyxl import load_workbook from openpyxl.utils import get_column_letter from openpyxl.worksheet.merge import MergeCell def clean_excel_files(input_dir, output_dir, summary_keywords, target_content, delete_type='row'): os.makedirs(output_dir, exist_ok=True) for filename in os.listdir(input_dir): if filename.endswith(('.xlsx', '.xls')): input_path = os.path.join(input_dir, filename) output_path = os.path.join(output_dir, filename) wb = load_workbook(input_path) for sheet_name in wb.sheetnames: ws = wb[sheet_name] # 1. 删除汇总行 rows_to_delete = [] for row in ws.iter_rows(): for cell in row: if any(keyword in str(cell.value) for keyword in summary_keywords): rows_to_delete.append(cell.row) break for row_idx in sorted(set(rows_to_delete), reverse=True): ws.delete_rows(row_idx) # 2. 处理目标内容(改进合并单元格处理) if delete_type == 'row': # 存储待删除的行索引(使用集合避免重复) rows_to_delete_target = set() # 处理合并单元格区域 if ws.merged_cells.ranges: merged_ranges_to_remove = [] # 存储需要移除的合并区域 for merged_range in list(ws.merged_cells.ranges): top_left_cell = ws.cell(merged_range.min_row, merged_range.min_col) if top_left_cell.value == target_content: # 记录合并区域覆盖的所有行 for r in range(merged_range.min_row, merged_range.max_row + 1): rows_to_delete_target.add(r) # 标记该合并区域需要移除 merged_ranges_to_remove.append(merged_range) # 移除包含目标内容的合并区域 for merged_range in merged_ranges_to_remove: ws.unmerge_cells(str(merged_range)) # 处理非合并单元格 for row in ws.iter_rows(): for cell in row: # 只处理未在合并区域中处理过的单元格 if cell.value == target_content and cell.row not in rows_to_delete_target: rows_to_delete_target.add(cell.row) # 执行行删除 for row_idx in sorted(rows_to_delete_target, reverse=True): ws.delete_rows(row_idx) else: # 列删除逻辑保持不变 cells_to_delete = [] for row in ws.iter_rows(): for cell in row: if cell.value == target_content: cells_to_delete.append(cell.column) if cells_to_delete: for col_idx in sorted(set(cells_to_delete), reverse=True): ws.delete_cols(col_idx) wb.save(output_path) print(f"文件已处理: {filename}") # 使用示例 if __name__ == "__main__": input_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\input" output_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\cleaned" summary_keywords = ["总计", "合计", "汇总", "小计", "共计", "制表人","深圳市","日期"] target_content = "投资者资金对账" delete_type = "row" clean_excel_files(input_directory, output_directory, summary_keywords, target_content, delete_type) 现在target_content所在单元格为合并单元格,执行删除并未解除合并单元格(导致后续数据被挤掉只显示第一个单元格的值),请改进代码
12-17
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值