python 最常用的20个excel批处理脚本

最新推荐文章于 2025-04-14 21:21:16 发布

程序员CC_

最新推荐文章于 2025-04-14 21:21:16 发布

阅读量982

点赞数 18

分类专栏：学python Python零基础 Python教程文章标签： python 开发语言 Excel Python零基础

本文链接：https://blog.youkuaiyun.com/2401_85428892/article/details/145596357

版权

学python 同时被 3 个专栏收录

71 篇文章

订阅专栏

Python教程

66 篇文章

订阅专栏

Python零基础

58 篇文章

订阅专栏

👉大礼包🎁：Python安装包/pycharm教程免费分享👈

批量合并多个Excel文件

import pandas as pd
import os
def merge_excel_files(directory, output_file):
    # 获取目录下所有Excel文件的路径
    excel_files = [f for f in os.listdir(directory) if f.endswith('.xlsx')]
    # 创建一个空的DataFrame用于存储合并的数据
    all_data = pd.DataFrame()
    # 遍历每个Excel文件并读取数据
    for file_name in excel_files:
        print(f"正在处理: {file_name}")
        file_path = os.path.join(directory, file_name)
        df = pd.read_excel(file_path)
        all_data = pd.concat([all_data, df], ignore_index=True)
    # 将合并后的数据保存到新的Excel文件中
    all_data.to_excel(output_file, index=False)
    print(f"合并完成，结果已保存至: {output_file}")
# 使用方法
merge_excel_files('input_directory', 'merged_output.xlsx')

解释: 该脚本从指定目录中读取所有的Excel文件，并将它们的数据合并到一个新的Excel文件中。

在Excel中查找并替换内容

from openpyxl import load_workbook
def find_and_replace_in_excel(file_path, search_text, replace_text):
    wb = load_workbook(file_path)
    for sheet in wb.worksheets:
        print(f"正在处理工作表: {sheet.title}")
        for row in sheet.iter_rows():
            for cell in row:
                if cell.value and search_text in str(cell.value):
                    old_value = cell.value
                    new_value = old_value.replace(search_text, replace_text)
                    cell.value = new_value
                    print(f"在单元格 {cell.coordinate} 中找到 '{search_text}'，已替换为 '{replace_text}'")
    # 保存更改
    wb.save('replaced_' + os.path.basename(file_path))
    print("查找和替换操作已完成")
# 使用方法
find_and_replace_in_excel('example.xlsx', '旧文本', '新文本')

解释: 在指定的Excel文件中搜索特定文本并替换为新的文本，然后保存修改。

根据条件筛选数据并保存到新文件

import pandas as pd
def filter_excel_data(input_file, output_file, column_name, threshold):
    df = pd.read_excel(input_file)
    # 筛选出满足条件的数据
    filtered_df = df[df[column_name] > threshold]
    # 将筛选后的数据保存到新文件
    filtered_df.to_excel(output_file, index=False)
    print(f"筛选完成，符合条件的数据已保存至: {output_file}")
# 使用方法
filter_excel_data('data.xlsx', 'filtered_data.xlsx', 'Age', 30)

解释: 根据给定的列名和阈值筛选数据，并将筛选后的结果保存到一个新的Excel文件中。

向Excel添加公式

from openpyxl import load_workbook
def add_formula_to_excel(file_path, formula, target_cell):
    wb = load_workxl(file_path)
    ws = wb.active
    # 添加公式到指定单元格
    ws[target_cell] = formula
    print(f"公式 '{formula}' 已添加到单元格 {target_cell}")
    wb.save('formulated_' + os.path.basename(file_path))
# 使用方法
add_formula_to_excel('example.xlsx', '=SUM(A1:A10)', 'B1')

解释: 向Excel文件的指定单元格添加公式，并保存文件。

批量调整Excel格式

from openpyxl import load_workbook
from openpyxl.styles import Font, Color, Alignment
def format_excel_file(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    # 设置字体、颜色和对齐方式
    for row in ws.iter_rows():
        for cell in row:
            cell.font = Font(name='Arial', size=12, bold=True, color='FF0000')
            cell.alignment = Alignment(horizontal='center', vertical='center')
    wb.save('formatted_' + os.path.basename(file_path))
    print("格式调整完成")
# 使用方法
format_excel_file('example.xlsx')

解释: 批量调整Excel文件中的字体样式、颜色和对齐方式。

批量重命名工作表

from openpyxl import load_workbook
def rename_sheets(file_path, prefix):
    wb = load_workbook(file_path)
    for sheet in wb.worksheets:
        old_name = sheet.title
        new_name = f"{prefix}_{old_name}"
        sheet.title = new_name
        print(f"工作表 '{old_name}' 已重命名为: {new_name}")
    # 保存更改
    wb.save('renamed_' + os.path.basename(file_path))
    print("所有工作表重命名完成")
# 使用方法
rename_sheets('example.xlsx', 'New')

解释: 为Excel文件中的每个工作表添加前缀并重命名，然后保存文件。

从多个Excel文件中提取特定列的数据

import pandas as pd
import os
def extract_columns_from_multiple_files(directory, columns_to_extract, output_file):
    all_data = pd.DataFrame()
    # 遍历目录中的所有Excel文件
    for filename in os.listdir(directory):
        if filename.endswith('.xlsx'):
            file_path = os.path.join(directory, filename)
            df = pd.read_excel(file_path)
            # 提取指定的列
            extracted_df = df[columns_to_extract]
            all_data = pd.concat([all_data, extracted_df], ignore_index=True)
    # 将结果保存到新的Excel文件
    all_data.to_excel(output_file, index=False)
    print(f"数据提取完成，结果已保存至: {output_file}")
# 使用方法
extract_columns_from_multiple_files('input_directory', ['Name', 'Age'], 'extracted_output.xlsx')

解释: 从指定目录下的所有Excel文件中提取特定列的数据，并将它们合并保存到一个新的Excel文件中。

在Excel中插入新行或列

from openpyxl import load_workbook
def insert_row_or_column(file_path, location, dimension='row'):
    wb = load_workbook(file_path)
    ws = wb.active
    if dimension == 'row':
        ws.insert_rows(location)
        print(f"在第{location}行之前插入了一行")
    elif dimension == 'column':
        ws.insert_cols(location)
        print(f"在第{location}列之前插入了一列")
    wb.save('inserted_' + os.path.basename(file_path))
# 使用方法
insert_row_or_column('example.xlsx', 5, 'row')  # 在第5行之前插入一行
insert_row_or_column('example.xlsx', 3, 'column')  # 在第3列之前插入一列

解释: 根据给定的位置在Excel文件中插入新行或新列，并保存文件。

删除重复数据

import pandas as pd
def remove_duplicates(input_file, output_file, subset=None):
    df = pd.read_excel(input_file)
    # 删除重复项
    df.drop_duplicates(subset=subset, inplace=True)
    # 保存处理后的数据
    df.to_excel(output_file, index=False)
    print(f"重复数据删除完成，结果已保存至: {output_file}")
# 使用方法
remove_duplicates('data_with_duplicates.xlsx', 'cleaned_data.xlsx', subset=['Name', 'Email'])

解释: 删除Excel文件中的重复数据（可以根据指定列进行去重），并将清理后的数据保存到新的文件中。

跨文件复制工作表

from openpyxl import load_workbook
def copy_sheet(source_file, target_file, sheet_name):
    source_wb = load_workbook(source_file)
    target_wb = load_workbook(target_file)
    if sheet_name in source_wb.sheetnames:
        source_ws = source_wb[sheet_name]
        # 创建新的工作表
        target_ws = target_wb.create_sheet(title=sheet_name)
        # 复制单元格内容
        for row in source_ws.iter_rows():
            for cell in row:
                target_ws[cell.coordinate].value = cell.value
        # 保存目标文件
        target_wb.save(target_file)
        print(f"工作表 '{sheet_name}' 已从 {source_file} 复制到 {target_file}")
    else:
        print(f"源文件中不存在名为 '{sheet_name}' 的工作表")
# 使用方法
copy_sheet('source.xlsx', 'target.xlsx', 'Sheet1')

解释: 从一个Excel文件中复制指定的工作表到另一个Excel文件中，并保存修改。

批量调整列宽

from openpyxl import load_workbook
def adjust_column_width(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    for column in ws.columns:
        max_length = 0
        column_letter = column[0].column_letter  # 获取列字母
        for cell in column:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(str(cell.value))
            except:
                pass
        adjusted_width = (max_length + 2)
        ws.column_dimensions[column_letter].width = adjusted_width
    wb.save('adjusted_' + os.path.basename(file_path))
    print("所有列宽调整完成")
# 使用方法
adjust_column_width('example.xlsx')

解释: 自动调整Excel文件中每列的宽度以适应最长的数据项长度。

数据透视表创建

import pandas as pd
def create_pivot_table(input_file, output_file, index_columns, values_columns):
    df = pd.read_excel(input_file)
    pivot_table = pd.pivot_table(df, values=values_columns, index=index_columns, aggfunc='sum')
    # 将透视表保存到新的Excel文件
    pivot_table.to_excel(output_file)
    print(f"数据透视表已生成并保存至: {output_file}")
# 使用方法
create_pivot_table('sales_data.xlsx', 'pivot_output.xlsx', ['Region'], ['Sales'])

解释: 根据指定的索引列和值列创建一个数据透视表，并将其保存为新的Excel文件。

批量添加图表

from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference
def add_chart_to_excel(file_path, sheet_name, chart_title, x_axis_title, y_axis_title, data_range):
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    chart = BarChart()
    chart.title = chart_title
    chart.x_axis.title = x_axis_title
    chart.y_axis.title = y_axis_title
    data = Reference(ws, min_col=data_range['min_col'], min_row=data_range['min_row'],
                     max_col=data_range['max_col'], max_row=data_range['max_row'])
    categories = Reference(ws, min_col=data_range['category_col'], min_row=data_range['category_min_row'],
                           max_row=data_range['category_max_row'])
    chart.add_data(data, titles_from_data=True)
    chart.set_categories(categories)
    ws.add_chart(chart, "E5")  # 在单元格E5处添加图表
    wb.save('chart_added_' + os.path.basename(file_path))
    print("图表添加完成")
# 使用方法
add_chart_to_excel('example.xlsx', 'Sheet1', '销售统计', '产品', '销售额', 
                   {'min_col': 1, 'min_row': 2, 'max_col': 2, 'max_row': 7,
                    'category_col': 1, 'category_min_row': 2, 'category_max_row': 7})

解释: 向指定的工作表添加一个柱状图，并设置图表标题、X轴和Y轴标题。

合并相同内容的单元格

from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def merge_cells_with_same_content(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    for col in range(1, ws.max_column + 1):
        start = None
        for row in range(1, ws.max_row + 1):
            current_cell = ws[f"{get_column_letter(col)}{row}"]
            next_cell = ws[f"{get_column_letter(col)}{row + 1}"] if row < ws.max_row else None
            if start is None and current_cell.value == next_cell.value:
                start = current_cell.coordinate
            elif start is not None and current_cell.value != next_cell.value:
                end = current_cell.coordinate
                ws.merge_cells(f"{start}:{end}")
                start = None
    wb.save('merged_' + os.path.basename(file_path))
    print("相同内容的单元格合并完成")
# 使用方法
merge_cells_with_same_content('example.xlsx')

解释: 合并同一列中具有相同内容的连续单元格。

从Excel中导出图片

from openpyxl import load_workbook
from openpyxl.drawing.image import Image
def extract_images_from_excel(file_path, output_dir):
    wb = load_workbook(file_path, keep_vba=True)
    ws = wb.active
    for img in ws._images:
        image = Image(img.ref)
        image_path = os.path.join(output_dir, f"image_{img.anchor._from.row}.png")
        image.save(image_path)
        print(f"图片已保存至: {image_path}")
# 使用方法
extract_images_from_excel('example_with_images.xlsx', 'output_images')

解释: 从Excel文件中提取所有的图片，并将它们保存到指定目录。

批量转换日期格式

import pandas as pd
def convert_date_format(input_file, output_file, date_column, current_format, target_format):
    df = pd.read_excel(input_file)
    # 转换日期格式
    df[date_column] = pd.to_datetime(df[date_column], format=current_format).dt.strftime(target_format)
    # 保存结果
    df.to_excel(output_file, index=False)
    print(f"日期格式转换完成，结果已保存至: {output_file}")
# 使用方法
convert_date_format('dates.xlsx', 'formatted_dates.xlsx', 'Date', '%Y-%m-%d', '%d/%m/%Y')

解释: 将指定列中的日期从一种格式转换为另一种格式，并将结果保存到新的文件中。

根据条件拆分Excel文件

import pandas as pd
def split_excel_by_condition(input_file, column_name, condition_value):
    df = pd.read_excel(input_file)
    # 根据条件筛选数据
    matching_df = df[df[column_name] == condition_value]
    non_matching_df = df[df[column_name] != condition_value]
    # 保存结果
    matching_df.to_excel(f'matching_{condition_value}.xlsx', index=False)
    non_matching_df.to_excel('non_matching.xlsx', index=False)
    print(f"符合条件的数据已保存至: matching_{condition_value}.xlsx")
    print("不符合条件的数据已保存至: non_matching.xlsx")
# 使用方法
split_excel_by_condition('data.xlsx', 'Category', 'A')

解释: 根据给定的条件（例如特定值）拆分Excel文件，并将符合条件与不符合条件的数据分别保存到两个文件中。

在Excel中添加水印

from openpyxl import load_workbook
from openpyxl.drawing.text import TextBlock, Paragraph, ParagraphProperties, CharacterProperties, Font
from openpyxl.drawing.spreadsheet_drawing import OneCellAnchor, AnchorMarker
from openpyxl.utils.units import cm_to_EMU
def add_watermark(file_path, watermark_text):
    wb = load_workbook(file_path)
    ws = wb.active
    drawing = TextBlock(
        Paragraph(paragraphProperties=ParagraphProperties(defRPr=CharacterProperties(sz=400))),
        [Paragraph(watermark_text, paragraphProperties=ParagraphProperties(defRPr=CharacterProperties(sz=400)))])
    anchor = OneCellAnchor(anchorRow=5, anchorCol=5,
                           _from=AnchorMarker(x=cm_to_EMU(2), y=cm_to_EMU(2)),
                           ext=AnchorMarker(x=cm_to_EMU(10), y=cm_to_EMU(1)))
    ws.add_drawing(drawing, anchor)
    wb.save('watermarked_' + os.path.basename(file_path))
    print("水印添加完成")
# 使用方法
add_watermark('example.xlsx', '机密')

注意: 上述代码仅为示意，实际实现可能需要调整以适应openpyxl库的功能限制。对于更复杂的文本块或水印效果，可能需要使用其他库如Pillow来生成图像水印。

批量加密Excel文件

from openpyxl import load_workbook
from openpyxl.workbook.protection import WorkbookProtection
def encrypt_excel_files(directory, password):
    for filename in os.listdir(directory):
        if filename.endswith('.xlsx'):
            file_path = os.path.join(directory, filename)
            wb = load_workbook(file_path)
            # 设置工作簿保护密码
            wb.security = WorkbookProtection(workbookPassword=password, lockStructure=True)
            # 保存加密后的文件
            encrypted_filename = f'encrypted_{filename}'
            wb.save(os.path.join(directory, encrypted_filename))
            print(f"{filename} 已加密并保存为 {encrypted_filename}")
# 使用方法
encrypt_excel_files('input_directory', 'your_password')

解释: 遍历指定目录下的所有Excel文件，并为其设置保护密码。

批量更新超链接

from openpyxl import load_workbook
def update_hyperlinks(file_path, old_url, new_url):
    wb = load_workbook(file_path)
    for sheet in wb.worksheets:
        for row in sheet.iter_rows():
            for cell in row:
                if cell.hyperlink and cell.hyperlink.target.startswith(old_url):
                    new_link = cell.hyperlink.target.replace(old_url, new_url)
                    cell.hyperlink = Hyperlink(display=None, ref=cell.coordinate, 
                                               location=None, tooltip=None, target=new_link)
                    print(f"超链接在单元格 {cell.coordinate} 中已从 {old_url} 更新为 {new_url}")
    wb.save('updated_links_' + os.path.basename(file_path))
# 使用方法
update_hyperlinks('example.xlsx', 'http://oldwebsite.com', 'http://newwebsite.com')

解释: 在指定的Excel文件中查找所有指向旧URL的超链接，并将其更新为新的URL。

这里分享Python 50G大礼包，里面还有Python面试真题，里面干货满满，一次全拿走！
在这里插入图片描述