228. Summary Ranges

本文介绍了一个用于处理整数数组的有效算法,该算法可以快速找出数组中连续递增元素的范围,并将其格式化为字符串列表返回。通过对数组进行一次遍历,实现1ms内完成任务的目标。适用于需要对大量连续数值进行区间划分的场景。

这道题我写代码写的优点乱了。定义一个最初的起始位置(也就是范围的第一个)first,然后循环,nums[i]=nums[i-1],就继续循环。直到nums[i]!=nums[i-1]+1在做处理,范围就是first到nums[i-1],但如果first==nums[i-1] ,就输出“first”,其他输出”first->nums[i-1]”,还有一种特殊情况也就是i==nums.length-1,末尾,在判断最后一次,这道题1ms

public class Solution {
    public List<String> summaryRanges(int[] nums) {
        List<String> list = new ArrayList<>();
        if(nums.length==0)return list;
        if(nums.length==1){list.add(""+nums[0]);return list;}
        int first =nums[0];
        String str = new String("");
        for(int i=1;i<nums.length;i++){
            if(nums[i]==nums[i-1]+1){
                if(i==nums.length-1){
                    str = first+"->"+nums[i];
                    list.add(str);
                    break;
                }
                continue;
            }else{
                 if(i==nums.length-1){
                     if(first == nums[i-1]){
                        str = first+"";
                        list.add(str);
                     }else{
                        str = first+"->"+nums[i-1];
                        list.add(str);
                     }

                    list.add(""+nums[i]);
                    break;
                 }
                 if(first == nums[i-1]){
                     str = first+"";
                     list.add(str);
                     first = nums[i];
                 }else{
                    str = first+"->"+nums[i-1];
                    list.add(str);
                    first = nums[i];
                 }
            }
        }
        return list;
    }
}
由于数据清洗需要,我需要一个python脚本工具批量处理。我编写了代码如下: import os import pandas as pd from openpyxl import load_workbook from openpyxl.utils import get_column_letter from openpyxl.worksheet.merge import MergeCell def clean_excel_files(input_dir, output_dir, summary_keywords, target_content, delete_type='row'): os.makedirs(output_dir, exist_ok=True) for filename in os.listdir(input_dir): if filename.endswith(('.xlsx', '.xls')): input_path = os.path.join(input_dir, filename) output_path = os.path.join(output_dir, filename) wb = load_workbook(input_path) for sheet_name in wb.sheetnames: ws = wb[sheet_name] # 1. 删除汇总行 rows_to_delete = [] for row in ws.iter_rows(): for cell in row: if any(keyword in str(cell.value) for keyword in summary_keywords): rows_to_delete.append(cell.row) break for row_idx in sorted(set(rows_to_delete), reverse=True): ws.delete_rows(row_idx) # 2. 处理目标内容(改进合并单元格处理) if delete_type == 'row': # 存储待删除的行索引(使用集合避免重复) rows_to_delete_target = set() # 处理合并单元格区域 if ws.merged_cells.ranges: merged_ranges_to_remove = [] # 存储需要移除的合并区域 for merged_range in list(ws.merged_cells.ranges): top_left_cell = ws.cell(merged_range.min_row, merged_range.min_col) if top_left_cell.value == target_content: # 记录合并区域覆盖的所有行 for r in range(merged_range.min_row, merged_range.max_row + 1): rows_to_delete_target.add(r) # 标记该合并区域需要移除 merged_ranges_to_remove.append(merged_range) # 移除包含目标内容的合并区域 for merged_range in merged_ranges_to_remove: ws.unmerge_cells(str(merged_range)) # 处理非合并单元格 for row in ws.iter_rows(): for cell in row: # 只处理未在合并区域中处理过的单元格 if cell.value == target_content and cell.row not in rows_to_delete_target: rows_to_delete_target.add(cell.row) # 执行行删除 for row_idx in sorted(rows_to_delete_target, reverse=True): ws.delete_rows(row_idx) else: # 列删除逻辑保持不变 cells_to_delete = [] for row in ws.iter_rows(): for cell in row: if cell.value == target_content: cells_to_delete.append(cell.column) if cells_to_delete: for col_idx in sorted(set(cells_to_delete), reverse=True): ws.delete_cols(col_idx) wb.save(output_path) print(f"文件已处理: {filename}") # 使用示例 if __name__ == "__main__": input_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\input" output_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\cleaned" summary_keywords = ["总计", "合计", "汇总", "小计", "共计", "制表人","深圳市","日期"] target_content = "投资者资金对账" delete_type = "row" clean_excel_files(input_directory, output_directory, summary_keywords, target_content, delete_type) 现在要求先解除合并单元格区域再执行删除操作。请在我的代码基础上进行改进
最新发布
12-17
import os import pandas as pd from openpyxl import load_workbook from openpyxl.utils import get_column_letter from openpyxl.worksheet.merge import MergeCell def clean_excel_files(input_dir, output_dir, summary_keywords, target_content, delete_type='row'): os.makedirs(output_dir, exist_ok=True) for filename in os.listdir(input_dir): if filename.endswith(('.xlsx', '.xls')): input_path = os.path.join(input_dir, filename) output_path = os.path.join(output_dir, filename) wb = load_workbook(input_path) for sheet_name in wb.sheetnames: ws = wb[sheet_name] # 1. 删除汇总行 rows_to_delete = [] for row in ws.iter_rows(): for cell in row: if any(keyword in str(cell.value) for keyword in summary_keywords): rows_to_delete.append(cell.row) break for row_idx in sorted(set(rows_to_delete), reverse=True): ws.delete_rows(row_idx) # 2. 处理目标内容(改进合并单元格处理) if delete_type == 'row': # 存储待删除的行索引(使用集合避免重复) rows_to_delete_target = set() # 处理合并单元格区域 if ws.merged_cells.ranges: merged_ranges_to_remove = [] # 存储需要移除的合并区域 for merged_range in list(ws.merged_cells.ranges): top_left_cell = ws.cell(merged_range.min_row, merged_range.min_col) if top_left_cell.value == target_content: # 记录合并区域覆盖的所有行 for r in range(merged_range.min_row, merged_range.max_row + 1): rows_to_delete_target.add(r) # 标记该合并区域需要移除 merged_ranges_to_remove.append(merged_range) # 移除包含目标内容的合并区域 for merged_range in merged_ranges_to_remove: ws.unmerge_cells(str(merged_range)) # 处理非合并单元格 for row in ws.iter_rows(): for cell in row: # 只处理未在合并区域中处理过的单元格 if cell.value == target_content and cell.row not in rows_to_delete_target: rows_to_delete_target.add(cell.row) # 执行行删除 for row_idx in sorted(rows_to_delete_target, reverse=True): ws.delete_rows(row_idx) else: # 列删除逻辑保持不变 cells_to_delete = [] for row in ws.iter_rows(): for cell in row: if cell.value == target_content: cells_to_delete.append(cell.column) if cells_to_delete: for col_idx in sorted(set(cells_to_delete), reverse=True): ws.delete_cols(col_idx) wb.save(output_path) print(f"文件已处理: {filename}") # 使用示例 if __name__ == "__main__": input_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\input" output_directory = "C:\\Users\\结算风控部\\Desktop\\FuturesRebateSystem\\data\\cleaned" summary_keywords = ["总计", "合计", "汇总", "小计", "共计", "制表人","深圳市","日期"] target_content = "投资者资金对账" delete_type = "row" clean_excel_files(input_directory, output_directory, summary_keywords, target_content, delete_type) 现在target_content所在单元格为合并单元格,执行删除并未解除合并单元格(导致后续数据被挤掉只显示第一个单元格的值),请改进代码
12-17
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值