python 常用的20个excel批处理脚本

# 批量合并多个Excel文件

import pandas as pd
import os
def merge_excel_files(directory, output_file):
    # 获取目录下所有Excel文件的路径
    excel_files = [f for f in os.listdir(directory) if f.endswith('.xlsx')]
    # 创建一个空的DataFrame用于存储合并的数据
    all_data = pd.DataFrame()
    # 遍历每个Excel文件并读取数据
    for file_name in excel_files:
        print(f"正在处理: {file_name}")
        file_path = os.path.join(directory, file_name)
        df = pd.read_excel(file_path)
        all_data = pd.concat([all_data, df], ignore_index=True)
    # 将合并后的数据保存到新的Excel文件中
    all_data.to_excel(output_file, index=False)
    print(f"合并完成,结果已保存至: {output_file}")
# 使用方法
merge_excel_files('input_directory', 'merged_output.xlsx')


# 在Excel中查找并替换内容

from openpyxl import load_workbook
def find_and_replace_in_excel(file_path, search_text, replace_text):
    wb = load_workbook(file_path)
    for sheet in wb.worksheets:
        print(f"正在处理工作表: {sheet.title}")
        for row in sheet.iter_rows():
            for cell in row:
                if cell.value and search_text in str(cell.value):
                    old_value = cell.value
                    new_value = old_value.replace(search_text, replace_text)
                    cell.value = new_value
                    print(f"在单元格 {cell.coordinate} 中找到 '{search_text}',已替换为 '{replace_text}'")
    # 保存更改
    wb.save('replaced_' + os.path.basename(file_path))
    print("查找和替换操作已完成")
# 使用方法
find_and_replace_in_excel('example.xlsx', '旧文本', '新文本')

# 根据条件筛选数据并保存到新文件

import pandas as pd
def filter_excel_data(input_file, output_file, column_name, threshold):
    df = pd.read_excel(input_file)
    # 筛选出满足条件的数据
    filtered_df = df[df[column_name] > threshold]
    # 将筛选后的数据保存到新文件
    filtered_df.to_excel(output_file, index=False)
    print(f"筛选完成,符合条件的数据已保存至: {output_file}")
# 使用方法
filter_excel_data('data.xlsx', 'filtered_data.xlsx', 'Age', 30)


# 向Excel添加公式

from openpyxl import load_workbook
def add_formula_to_excel(file_path, formula, target_cell):
    wb = load_workxl(file_path)
    ws = wb.active
    # 添加公式到指定单元格
    ws[target_cell] = formula
    print(f"公式 '{formula}' 已添加到单元格 {target_cell}")
    wb.save('formulated_' + os.path.basename(file_path))
# 使用方法
add_formula_to_excel('example.xlsx', '=SUM(A1:A10)', 'B1')

# 批量调整Excel格式
from openpyxl import load_workbook
from openpyxl.styles import Font, Color, Alignment
def format_excel_file(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    # 设置字体、颜色和对齐方式
    for row in ws.iter_rows():
        for cell in row:
            cell.font = Font(name='Arial', size=12, bold=True, color='FF0000')
            cell.alignment = Alignment(horizontal='center', vertical='center')
    wb.save('formatted_' + os.path.basename(file_path))
    print("格式调整完成")
# 使用方法
format_excel_file('example.xlsx')


# 批量重命名工作表

from openpyxl import load_workbook
def rename_sheets(file_path, prefix):
    wb = load_workbook(file_path)
    for sheet in wb.worksheets:
        old_name = sheet.title
        new_name = f"{prefix}_{old_name}"
        sheet.title = new_name
        print(f"工作表 '{old_name}' 已重命名为: {new_name}")
    # 保存更改
    wb.save('renamed_' + os.path.basename(file_path))
    print("所有工作表重命名完成")
# 使用方法
rename_sheets('example.xlsx', 'New')



# 从多个Excel文件中提取特定列的数据

import pandas as pd
import os
def extract_columns_from_multiple_files(directory, columns_to_extract, output_file):
    all_data = pd.DataFrame()
    # 遍历目录中的所有Excel文件
    for filename in os.listdir(directory):
        if filename.endswith('.xlsx'):
            file_path = os.path.join(directory, filename)
            df = pd.read_excel(file_path)
            # 提取指定的列
            extracted_df = df[columns_to_extract]
            all_data = pd.concat([all_data, extracted_df], ignore_index=True)
    # 将结果保存到新的Excel文件
    all_data.to_excel(output_file, index=False)
    print(f"数据提取完成,结果已保存至: {output_file}")
# 使用方法
extract_columns_from_multiple_files('input_directory', ['Name', 'Age'], 'extracted_output.xlsx')


# 在Excel中插入新行或列

from openpyxl import load_workbook
def insert_row_or_column(file_path, location, dimension='row'):
    wb = load_workbook(file_path)
    ws = wb.active
    if dimension == 'row':
        ws.insert_rows(location)
        print(f"在第{location}行之前插入了一行")
    elif dimension == 'column':
        ws.insert_cols(location)
        print(f"在第{location}列之前插入了一列")
    wb.save('inserted_' + os.path.basename(file_path))
# 使用方法
insert_row_or_column('example.xlsx', 5, 'row')  # 在第5行之前插入一行
insert_row_or_column('example.xlsx', 3, 'column')  # 在第3列之前插入一列

# 删除重复数据

import pandas as pd
def remove_duplicates(input_file, output_file, subset=None):
    df = pd.read_excel(input_file)
    # 删除重复项
    df.drop_duplicates(subset=subset, inplace=True)
    # 保存处理后的数据
    df.to_excel(output_file, index=False)
    print(f"重复数据删除完成,结果已保存至: {output_file}")
# 使用方法
remove_duplicates('data_with_duplicates.xlsx', 'cleaned_data.xlsx', subset=['Name', 'Email'])


# 跨文件复制工作表

from openpyxl import load_workbook
def copy_sheet(source_file, target_file, sheet_name):
    source_wb = load_workbook(source_file)
    target_wb = load_workbook(target_file)
    if sheet_name in source_wb.sheetnames:
        source_ws = source_wb[sheet_name]
        # 创建新的工作表
        target_ws = target_wb.create_sheet(title=sheet_name)
        # 复制单元格内容
        for row in source_ws.iter_rows():
            for cell in row:
                target_ws[cell.coordinate].value = cell.value
        # 保存目标文件
        target_wb.save(target_file)
        print(f"工作表 '{sheet_name}' 已从 {source_file} 复制到 {target_file}")
    else:
        print(f"源文件中不存在名为 '{sheet_name}' 的工作表")
# 使用方法
copy_sheet('source.xlsx', 'target.xlsx', 'Sheet1')


# 批量调整列宽

from openpyxl import load_workbook
def adjust_column_width(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    for column in ws.columns:
        max_length = 0
        column_letter = column[0].column_letter  # 获取列字母
        for cell in column:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(str(cell.value))
            except:
                pass
        adjusted_width = (max_length + 2)
        ws.column_dimensions[column_letter].width = adjusted_width
    wb.save('adjusted_' + os.path.basename(file_path))
    print("所有列宽调整完成")
# 使用方法
adjust_column_width('example.xlsx')


# 数据透视表创建

import pandas as pd
def create_pivot_table(input_file, output_file, index_columns, values_columns):
    df = pd.read_excel(input_file)
    pivot_table = pd.pivot_table(df, values=values_columns, index=index_columns, aggfunc='sum')
    # 将透视表保存到新的Excel文件
    pivot_table.to_excel(output_file)
    print(f"数据透视表已生成并保存至: {output_file}")
# 使用方法
create_pivot_table('sales_data.xlsx', 'pivot_output.xlsx', ['Region'], ['Sales'])


# 批量添加图表

from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference
def add_chart_to_excel(file_path, sheet_name, chart_title, x_axis_title, y_axis_title, data_range):
    wb = load_workbook(file_path)
    ws = wb[sheet_name]
    chart = BarChart()
    chart.title = chart_title
    chart.x_axis.title = x_axis_title
    chart.y_axis.title = y_axis_title
    data = Reference(ws, min_col=data_range['min_col'], min_row=data_range['min_row'],
                     max_col=data_range['max_col'], max_row=data_range['max_row'])
    categories = Reference(ws, min_col=data_range['category_col'], min_row=data_range['category_min_row'],
                           max_row=data_range['category_max_row'])
    chart.add_data(data, titles_from_data=True)
    chart.set_categories(categories)
    ws.add_chart(chart, "E5")  # 在单元格E5处添加图表
    wb.save('chart_added_' + os.path.basename(file_path))
    print("图表添加完成")
# 使用方法
add_chart_to_excel('example.xlsx', 'Sheet1', '销售统计', '产品', '销售额', 
                   {'min_col': 1, 'min_row': 2, 'max_col': 2, 'max_row': 7,
                    'category_col': 1, 'category_min_row': 2, 'category_max_row': 7})

# 合并相同内容的单元格

from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def merge_cells_with_same_content(file_path):
    wb = load_workbook(file_path)
    ws = wb.active
    for col in range(1, ws.max_column + 1):
        start = None
        for row in range(1, ws.max_row + 1):
            current_cell = ws[f"{get_column_letter(col)}{row}"]
            next_cell = ws[f"{get_column_letter(col)}{row + 1}"] if row < ws.max_row else None
            if start is None and current_cell.value == next_cell.value:
                start = current_cell.coordinate
            elif start is not None and current_cell.value != next_cell.value:
                end = current_cell.coordinate
                ws.merge_cells(f"{start}:{end}")
                start = None
    wb.save('merged_' + os.path.basename(file_path))
    print("相同内容的单元格合并完成")
# 使用方法
merge_cells_with_same_content('example.xlsx')


# 从Excel中导出图片

from openpyxl import load_workbook
from openpyxl.drawing.image import Image
def extract_images_from_excel(file_path, output_dir):
    wb = load_workbook(file_path, keep_vba=True)
    ws = wb.active
    for img in ws._images:
        image = Image(img.ref)
        image_path = os.path.join(output_dir, f"image_{img.anchor._from.row}.png")
        image.save(image_path)
        print(f"图片已保存至: {image_path}")
# 使用方法
extract_images_from_excel('example_with_images.xlsx', 'output_images')


# 批量转换日期格式

import pandas as pd
def convert_date_format(input_file, output_file, date_column, current_format, target_format):
    df = pd.read_excel(input_file)
    # 转换日期格式
    df[date_column] = pd.to_datetime(df[date_column], format=current_format).dt.strftime(target_format)
    # 保存结果
    df.to_excel(output_file, index=False)
    print(f"日期格式转换完成,结果已保存至: {output_file}")
# 使用方法
convert_date_format('dates.xlsx', 'formatted_dates.xlsx', 'Date', '%Y-%m-%d', '%d/%m/%Y')


# 根据条件拆分Excel文件

import pandas as pd
def split_excel_by_condition(input_file, column_name, condition_value):
    df = pd.read_excel(input_file)
    # 根据条件筛选数据
    matching_df = df[df[column_name] == condition_value]
    non_matching_df = df[df[column_name] != condition_value]
    # 保存结果
    matching_df.to_excel(f'matching_{condition_value}.xlsx', index=False)
    non_matching_df.to_excel('non_matching.xlsx', index=False)
    print(f"符合条件的数据已保存至: matching_{condition_value}.xlsx")
    print("不符合条件的数据已保存至: non_matching.xlsx")
# 使用方法
split_excel_by_condition('data.xlsx', 'Category', 'A')

# 在Excel中添加水印

from openpyxl import load_workbook
from openpyxl.drawing.text import TextBlock, Paragraph, ParagraphProperties, CharacterProperties, Font
from openpyxl.drawing.spreadsheet_drawing import OneCellAnchor, AnchorMarker
from openpyxl.utils.units import cm_to_EMU
def add_watermark(file_path, watermark_text):
    wb = load_workbook(file_path)
    ws = wb.active
    drawing = TextBlock(
        Paragraph(paragraphProperties=ParagraphProperties(defRPr=CharacterProperties(sz=400))),
        [Paragraph(watermark_text, paragraphProperties=ParagraphProperties(defRPr=CharacterProperties(sz=400)))])
    anchor = OneCellAnchor(anchorRow=5, anchorCol=5,
                           _from=AnchorMarker(x=cm_to_EMU(2), y=cm_to_EMU(2)),
                           ext=AnchorMarker(x=cm_to_EMU(10), y=cm_to_EMU(1)))
    ws.add_drawing(drawing, anchor)
    wb.save('watermarked_' + os.path.basename(file_path))
    print("水印添加完成")
# 使用方法
add_watermark('example.xlsx', '机密')

# 批量加密Excel文件

from openpyxl import load_workbook
from openpyxl.workbook.protection import WorkbookProtection
def encrypt_excel_files(directory, password):
    for filename in os.listdir(directory):
        if filename.endswith('.xlsx'):
            file_path = os.path.join(directory, filename)
            wb = load_workbook(file_path)
            # 设置工作簿保护密码
            wb.security = WorkbookProtection(workbookPassword=password, lockStructure=True)
            # 保存加密后的文件
            encrypted_filename = f'encrypted_{filename}'
            wb.save(os.path.join(directory, encrypted_filename))
            print(f"{filename} 已加密并保存为 {encrypted_filename}")
# 使用方法
encrypt_excel_files('input_directory', 'your_password')

# 批量更新超链接

from openpyxl import load_workbook
def update_hyperlinks(file_path, old_url, new_url):
    wb = load_workbook(file_path)
    for sheet in wb.worksheets:
        for row in sheet.iter_rows():
            for cell in row:
                if cell.hyperlink and cell.hyperlink.target.startswith(old_url):
                    new_link = cell.hyperlink.target.replace(old_url, new_url)
                    cell.hyperlink = Hyperlink(display=None, ref=cell.coordinate, 
                                               location=None, tooltip=None, target=new_link)
                    print(f"超链接在单元格 {cell.coordinate} 中已从 {old_url} 更新为 {new_url}")
    wb.save('updated_links_' + os.path.basename(file_path))
# 使用方法
update_hyperlinks('example.xlsx', 'http://oldwebsite.com', 'http://newwebsite.com')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值