学校的素拓分(学分统计代码)

        代码的主要目的是从指定目录中的Excel文件中搜索特定的字符串(如姓名或编号),并将搜索结果输出到新的Excel文件中。另外无法匹配的错误excel文件会把文件名保存下来储存到txt中。

import pandas as pd
import os
import re
import warnings
import time
"""
这里需要注意的是:
1、对于检查的文件不能是打开的状态
2、对于生成文件的地方要把之前的生成过的文件夹改一下文件夹的位置可以新建一个
3、error_log.txt这个文档里的文件需要自己进行单独查一下,可能有不知名的问题
"""
# 定义路径变量
#这个是对于放置要查的excel的名单表
BASE_DIRECTORY = r"D:\桌面\素拓\核对代码\excel文件"
#这个是统计完了之后的生成表
OUTPUT_DIRECTORY = r"D:\桌面\素拓\核对代码\output_excel\12"
#在python代码文件的地方会生成一个错误文件的txt文档
LOG_FILE = 'error_log.txt'


def replace_xls_with_xlsx(text):
    # 检查字符串是否以 '.xls' 结尾
    if text.endswith('.xls'):
        # 使用字符串的 replace 方法将 '.xls' 替换为 '.xlsx'
        new_text = text.replace('.xls', '.xlsx')
        return new_text
    else:
        return text


def open_excel(file_path):
    try:
        print(f"尝试打开文件: {file_path}")  # 添加调试信息
        xls = pd.ExcelFile(file_path)
        sheet_names = xls.sheet_names
        dataframes = {sheet_name: xls.parse(sheet_name) for sheet_name in sheet_names}
        return tuple(dataframes.values())
    except FileNotFoundError:
        print(f"文件未找到: {file_path}")
        return None
    except Exception as e:
        print(f"打开文件 {file_path} 时发生错误: {e}")
        return None


def scan_excel(directory_path):
    txt_files = []
    # 遍历指定文件夹内的所有文件和子文件夹
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".xlsx") or file.endswith(".xls"):
                txt_files.append(file)
    return txt_files


def filter_rows_by_name_in_any_column(dataframe, name):
    """
    在任何文本列中匹配指定姓名并输出整行
    参数:
    dataframe (Pandas DataFrame): 要过滤的数据
    name (str): 要匹配的姓名
    返回:
    Pandas DataFrame: 包含匹配行的新DataFrame
    """
    # 构建包含指定姓名的正则表达式模式
    pattern = fr'.*{re.escape(name)}.*'
    regex = re.compile(pattern)

    # 定义一个函数来检查文本列是否包含指定姓名
    def has_name_in_text(cell):
        return bool(regex.search(str(cell)) if isinstance(cell, str) else '')

    # 使用函数筛选行
    filtered_indices = dataframe.apply(lambda row: any(has_name_in_text(cell) for cell in row), axis=1)
    filtered_dataframe = dataframe[filtered_indices]
    return filtered_dataframe


def search_dataframe(df, search_strings, search_mode):
    if search_mode == 1:
        # 创建一个布尔掩码,用于检查DataFrame中的每个单元格是否包含搜索字符串
        mask = df.apply(lambda row: row.str.contains('|'.join(search_strings), case=False, na=False).any(), axis=1)

        # 使用布尔掩码过滤DataFrame,返回包含匹配行的子DataFrame
        matching_rows = df[mask]

        if not matching_rows.empty:
            return matching_rows
        else:
            return 0
    elif search_mode == 2:
        # 创建一个布尔掩码,用于检查DataFrame中的每个单元格是否与搜索字符串完全匹配
        mask = df.apply(lambda row: any(cell in search_strings for cell in row), axis=1)

        # 使用布尔掩码过滤DataFrame,返回包含完全匹配行的子DataFrame
        matching_rows = df[mask]

        if not matching_rows.empty:
            return matching_rows
        else:
            return 0
    else:
        return 0


def remove_empty_columns(df):
    # 使用dropna(axis=1, how='all')来删除所有列中包含空值的列
    cleaned_df = df.dropna(axis=1, how='all')
    return cleaned_df


def merge_dataframes_with_filenames(data_frames_and_filenames):
    """
    合并多个Pandas对象,保留每个表格的列标题,并在每个表格的列标题上方一行的第一列写上文件名
    参数:
    data_frames_and_filenames (list of tuple): 包含多个元组 (pandas对象, 文件名) 的列表
    返回:
    Pandas DataFrame: 合并后的Pandas对象
    """
    data_frames = [df for df, _ in data_frames_and_filenames]
    file_names = [file_name for _, file_name in data_frames_and_filenames]
    # 为每个数据框添加文件名的列
    for i, df in enumerate(data_frames):
        df['文件名'] = file_names[i]
    # 合并多个数据框
    combined_df = pd.concat(data_frames, ignore_index=True)
    return combined_df


def output_as_name(filter_str):
    excel_opened_list, filtered_lists = [], []
    txt_file_paths = scan_excel(BASE_DIRECTORY)
    for num, file in enumerate(txt_file_paths):
        print(f"正在读取第{num + 1}个文件: {file}")
        t = open_excel(os.path.join(BASE_DIRECTORY, file))
        for j, i in enumerate(t):
            excel_opened_list.append(
                (i, f"[{str(j + 1)}] " + file))  # excel_opened_list每个元素为一个元祖,元祖:(excel已打开的pd对象,对应的文件名)
    for all_open_data_one in excel_opened_list:
        filtered_lists.append(
            (filter_rows_by_name_in_any_column(all_open_data_one[0], filter_str), all_open_data_one[1]))
    # 假设data_frames_and_filenames是一个包含多个元组 (pandas对象, 文件名) 的列表
    merged_dataframe = merge_dataframes_with_filenames(filtered_lists)
    cleaned_df = remove_empty_columns(merged_dataframe)
    output_path = os.path.join(OUTPUT_DIRECTORY, filter_str + '.xlsx')
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cleaned_df.to_excel(output_path, index=False)
    return None


def output_as_list(filter_str_list, search_mode, log_file=LOG_FILE):
    excel_opened_list, filtered_lists = [], []
    tt = type(0)
    txt_file_paths = scan_excel(BASE_DIRECTORY)

    # 打开错误日志文件
    with open(log_file, 'w') as error_log:
        for num, file in enumerate(txt_file_paths):
            print(f"正在读取第{num + 1}个文件: {file}")
            t = open_excel(os.path.join(BASE_DIRECTORY, file))
            if t is None:
                error_log.write(f"无法打开文件: {file}\n")
                continue
            for j, i in enumerate(t):
                excel_opened_list.append(
                    (i, f"[{str(j + 1)}] " + file))  # excel_opened_list每个元素为一个元祖,元祖:(excel已打开的pd对象,对应的文件名)

        for num, all_open_data_one in enumerate(excel_opened_list):
            print(f"正在处理第{num + 1}个文件")
            t = search_dataframe(all_open_data_one[0], filter_str_list, search_mode)
            if type(t) != tt:
                output_path = os.path.join(OUTPUT_DIRECTORY, replace_xls_with_xlsx(all_open_data_one[1]))
                # 确保目标目录存在
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                t.to_excel(output_path, index=False)


def list_excel_files(input_folder, output_file):
    with open(output_file, 'w') as output_file:
        for filename in os.listdir(input_folder):
            if filename.endswith('.xlsx') or filename.endswith('.xls'):
                output_file.write(f'{filename}\n')


def __main__():
    start_time = time.time()
    warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl.styles.stylesheet")

    # 定义不同的模式
    mode = 2
    search_mode = 2
    find_key = "潘墨"

    # 定义一些示例名单和编号列表
    name_list = ["杨宇胜", "李意民", "袁潇雅", "黄哲远", "张道欢", "邵宇鹏", "栾逸晨", "周俊琪", "俞君豪", "叶鑫晖",
                 "陈寒旭", "俞佳辉", "金子涵", "赵崇杰", "夏一凡", "吕文强", "胡梦瑶", "郑丁铭", "胡温博", "王梁坤",
                 "边俊涛", "陈雨欣", "高帆", "唐任", "徐臣强", "吴昊天", "詹晟", "张诗林", "李梦雪", "邓万山", "甘青青"]
    num_list = ["2023b11031", "2023b11032", "2023b11033", "2023b11034", "2023b11035", "2023b11036", "2023b11037",
                "2023b11038", "2023b11039", "2023b11040", "2023b11041", "2023b11042", "2023b11043", "2023b11044",
                "2023b11045", "2023b11046", "2023b11047", "2023b11048", "2023b11049", "2023b11050", "2023b11051",
                "2023b11052", "2023b11053", "2023b11054", "2023b11055", "2023b11056", "2023b11057", "2023b11058",
                "2023b11059", "2023b11060", "2023b20062"]
    name_list_1 = ["江献越", "徐颖", "尉昌华", "沈吕柯", "徐康俊", "许倩倩", "安丽莹", "汤丽洁", "王梦莎", "李佳文",
                   "叶佳乐", "戴诗琪", "郭柳峰", "叶宁", "黄佳佳", "江雨杰", "洪宇翔", "占欣", "柳皓程", "金心怡",
                   "陈威妃", "张柯嘉", "向柏霖", "马菲菲", "曹佳怡", "谢星宇", "陈磊", "吴露萧", "潘墨", "王振东",
                   "骆成", "施晓艳", "陈鹏飞", "王成武", "刘建", "金有为", "余锦涛", "沈潮伟", "刘佳柱", "王中龙",
                   "王心雨"]
    num_list_1 = ["2023b32001", "2023b32002", "2023b32003", "2023b32004", "2023b32005", "2023b32006", "2023b32007",
                  "2023b32008", "2023b32009", "2023b32010", "2023b32011", "2023b32012", "2023b32013", "2023b32014",
                  "2023b32015", "2023b32016", "2023b32017", "2023b32018", "2023b32019", "2023b32020", "2023b32021",
                  "2023b32022", "2023b32023", "2023b32024", "2023b32025", "2023b32026", "2023b32027", "2023b32028",
                  "2023b32029", "2023b32030", "2023b32031", "2023b32032", "2023b32033", "2023b32034", "2023b32035",
                  "2023b32036", "2023b32037", "2023b32038", "2023b32039", "2023b32040", "2023b32041"]
    name_list_2 = ["陈永宽", "俞佚辉", "陈艺", "徐浩程", "吴昌城", "徐平平", "夏浩阳", "方浩炜", "张鑫灿", "何钧",
                   "袁轶乐", "张宇", "闻佳叶", "万新伟", "祝贺", "阮丹阳", "孙舒炜", "陈思远", "周丰林", "蔡烨",
                   "黄力伟", "方浩", "姜晨峰", "刘天赐", "何星佑", "周家逸", "王鑫鑫", "郑维刚", "叶彬豪", "黎勇",
                   "彭佳怡"]
    name_list_3 = ["余琴", "胡婷婷", "李佳坤", "刘思蕲", "王晶晶", "石俊逸", "陈雅丽", "樊双双", "黄俏伊", "陈依然",
                   "贾佑丁", "瞿爱涛", "蒋勇", "沈懿", "汪洋", "唐策爰", "戴诗琴", "许圭林", "吴瀚", "胡旖旎", "蒋杭波",
                   "王宇婷", "黄奕", "夏徐恺", "俞波雯", "沈宵", "包瑞阳", "罗红建", "许志辉", "谢振霄", "吴逸舟",
                   "姜怡敏"]

    # 根据模式执行不同的操作
    if mode == 0:
        # 模式0:扫描指定文件夹内的所有Excel文件并打印文件名
        txt_file_paths = scan_excel(BASE_DIRECTORY)
        print(txt_file_paths)
    elif mode == 1:
        # 模式1:根据提供的姓名列表逐个搜索并输出结果
        name_list_len = len(name_list_1)
        for num, name in enumerate(name_list_1, 1):
            output_as_name(name)
            print(f"*****\n已完成{num}人,共{name_list_len}人\n*****")
    elif mode == 2:
        # 模式2:根据提供的姓名列表批量搜索并输出结果
        output_as_list(name_list, search_mode)
    elif mode == 3:
        # 模式3:根据提供的单个关键字搜索并输出结果
        output_as_name(find_key)
    elif mode == -1:
        # 模式-1:列出指定文件夹内的所有Excel文件名并保存到指定文件
        input_folder_path = BASE_DIRECTORY
        output_file_path = os.path.join(OUTPUT_DIRECTORY, '1.txt')
        list_excel_files(input_folder_path, output_file_path)

    # 记录运行时间
    end_time = time.time()
    print(f"共用时{end_time - start_time}秒")
    print("运行结束")


if __name__ == "__main__":
    __main__()

升级之后的想法是打包之后生成exe文件然后通过命令行进行输入文件的地址等信息:

import pandas as pd
import os
import re
import warnings
import time
import sys

# 获取 .exe 文件所在的目录
BASE_SCRIPT_PATH = os.path.dirname(sys.executable) if hasattr(sys, '_MEIPASS') else os.getcwd()

# 提示用户输入参数
input_directory = input("请输入输入文件夹路径(包含要处理的Excel文件): ").strip()
output_directory = input("请输入输出文件夹路径(用于存放处理结果): ").strip()
name_list_input = input("请输入逗号分隔的姓名列表(用于模式1和模式2,可选): ").strip()
find_key_input = input("请输入单个关键字(用于模式3,可选): ").strip()
mode_input = input("请输入操作模式(0, 1, 2, 3, -1): ").strip()

# 处理输入
name_list = name_list_input.split(',') if name_list_input else []
find_key = find_key_input if find_key_input else None
mode = int(mode_input) if mode_input else None

LOG_FILE = 'error_log.txt'

def replace_xls_with_xlsx(text):
    if text.endswith('.xls'):
        return text.replace('.xls', '.xlsx')
    return text

def open_excel(file_path):
    try:
        print(f"尝试打开文件: {file_path}")
        xls = pd.ExcelFile(file_path)
        sheet_names = xls.sheet_names
        dataframes = {sheet_name: xls.parse(sheet_name) for sheet_name in sheet_names}
        return tuple(dataframes.values())
    except FileNotFoundError:
        print(f"文件未找到: {file_path}")
        return None
    except Exception as e:
        print(f"打开文件 {file_path} 时发生错误: {e}")
        return None

def scan_excel(directory_path):
    txt_files = []
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".xlsx") or file.endswith(".xls"):
                txt_files.append(file)
    return txt_files

def filter_rows_by_name_in_any_column(dataframe, name):
    pattern = fr'.*{re.escape(name)}.*'
    regex = re.compile(pattern)
    def has_name_in_text(cell):
        return bool(regex.search(str(cell)) if isinstance(cell, str) else '')
    filtered_indices = dataframe.apply(lambda row: any(has_name_in_text(cell) for cell in row), axis=1)
    filtered_dataframe = dataframe[filtered_indices]
    return filtered_dataframe

def search_dataframe(df, search_strings, search_mode):
    if search_mode == 1:
        mask = df.apply(lambda row: row.str.contains('|'.join(search_strings), case=False, na=False).any(), axis=1)
        matching_rows = df[mask]
        return matching_rows if not matching_rows.empty else 0
    elif search_mode == 2:
        mask = df.apply(lambda row: any(cell in search_strings for cell in row), axis=1)
        matching_rows = df[mask]
        return matching_rows if not matching_rows.empty else 0
    else:
        return 0

def remove_empty_columns(df):
    cleaned_df = df.dropna(axis=1, how='all')
    return cleaned_df

def merge_dataframes_with_filenames(data_frames_and_filenames):
    data_frames = [df for df, _ in data_frames_and_filenames]
    file_names = [file_name for _, file_name in data_frames_and_filenames]
    for i, df in enumerate(data_frames):
        df['文件名'] = file_names[i]
    combined_df = pd.concat(data_frames, ignore_index=True)
    return combined_df

def output_as_name(filter_str):
    excel_opened_list, filtered_lists = [], []
    txt_file_paths = scan_excel(input_directory)
    for num, file in enumerate(txt_file_paths):
        print(f"正在读取第{num + 1}个文件: {file}")
        t = open_excel(os.path.join(input_directory, file))
        for j, i in enumerate(t):
            excel_opened_list.append((i, f"[{str(j + 1)}] " + file))
    for all_open_data_one in excel_opened_list:
        filtered_lists.append((filter_rows_by_name_in_any_column(all_open_data_one[0], filter_str), all_open_data_one[1]))
    merged_dataframe = merge_dataframes_with_filenames(filtered_lists)
    cleaned_df = remove_empty_columns(merged_dataframe)
    output_path = os.path.join(output_directory, filter_str + '.xlsx')
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cleaned_df.to_excel(output_path, index=False)
    return None

def output_as_list(filter_str_list, search_mode, log_file=LOG_FILE):
    excel_opened_list, filtered_lists = [], []
    tt = type(0)
    txt_file_paths = scan_excel(input_directory)

    with open(log_file, 'w') as error_log:
        for num, file in enumerate(txt_file_paths):
            print(f"正在读取第{num + 1}个文件: {file}")
            t = open_excel(os.path.join(input_directory, file))
            if t is None:
                error_log.write(f"无法打开文件: {file}\n")
                continue
            for j, i in enumerate(t):
                excel_opened_list.append((i, f"[{str(j + 1)}] " + file))

        for num, all_open_data_one in enumerate(excel_opened_list):
            print(f"正在处理第{num + 1}个文件")
            t = search_dataframe(all_open_data_one[0], filter_str_list, search_mode)
            if type(t) != tt:
                output_path = os.path.join(output_directory, replace_xls_with_xlsx(all_open_data_one[1]))
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                t.to_excel(output_path, index=False)

def list_excel_files(input_folder, output_file):
    with open(output_file, 'w') as output_file:
        for filename in os.listdir(input_folder):
            if filename.endswith('.xlsx') or filename.endswith('.xls'):
                output_file.write(f'{filename}\n')

def __main__():
    start_time = time.time()
    warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl.styles.stylesheet")

    # 根据模式执行不同的操作
    if mode == 0:
        # 模式0:扫描指定文件夹内的所有Excel文件并打印文件名
        txt_file_paths = scan_excel(input_directory)
        print(txt_file_paths)
    elif mode == 1:
        # 模式1:根据提供的姓名列表逐个搜索并输出结果
        name_list_len = len(name_list)
        for num, name in enumerate(name_list, 1):
            output_as_name(name)
            print(f"*****\n已完成{num}人,共{name_list_len}人\n*****")
    elif mode == 2:
        # 模式2:根据提供的姓名列表批量搜索并输出结果
        output_as_list(name_list, 2)
    elif mode == 3:
        # 模式3:根据提供的单个关键字搜索并输出结果
        if find_key:
            output_as_name(find_key)
        else:
            print("请提供有效的单个关键字。")
    elif mode == -1:
        # 模式-1:列出指定文件夹内的所有Excel文件名并保存到指定文件
        input_folder_path = input_directory
        output_file_path = os.path.join(output_directory, '1.txt')
        list_excel_files(input_folder_path, output_file_path)

    # 记录运行时间
    end_time = time.time()
    print(f"共用时{end_time - start_time}秒")
    print("运行结束")

if __name__ == "__main__":
    __main__()

有点bug我改了一下:

import pandas as pd
import os
import re
import warnings
import time
import sys

# 获取 .exe 文件所在的目录
BASE_SCRIPT_PATH = os.path.dirname(sys.executable) if hasattr(sys, '_MEIPASS') else os.getcwd()

# 提示用户输入参数
input_directory = input("请输入输入文件夹路径(包含要处理的Excel文件): ").strip()
output_directory = input("请输入输出文件夹路径(用于存放处理结果): ").strip()
name_list_input = input("请输入逗号分隔的姓名列表(用于模式1和模式2,可选): ").strip()
find_key_input = input("请输入单个关键字(用于模式3,可选): ").strip()
mode_input = input("请输入操作模式(0, 1, 2, 3, -1): ").strip()

# 处理输入
name_list = name_list_input.split(',') if name_list_input else []
find_key = find_key_input if find_key_input else None
mode = int(mode_input) if mode_input else None

LOG_FILE = 'error_log.txt'

def replace_xls_with_xlsx(text):
    if text.endswith('.xls'):
        return text.replace('.xls', '.xlsx')
    return text

def open_excel(file_path):
    try:
        print(f"尝试打开文件: {file_path}")
        xls = pd.ExcelFile(file_path)
        sheet_names = xls.sheet_names
        dataframes = {sheet_name: xls.parse(sheet_name) for sheet_name in sheet_names}
        return tuple(dataframes.values())
    except FileNotFoundError:
        print(f"文件未找到: {file_path}")
        return None
    except Exception as e:
        print(f"打开文件 {file_path} 时发生错误: {e}")
        return None

def scan_excel(directory_path):
    txt_files = []
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith(".xlsx") or file.endswith(".xls"):
                txt_files.append(file)
    return txt_files

def filter_rows_by_name_in_any_column(dataframe, name):
    pattern = fr'.*{re.escape(name)}.*'
    regex = re.compile(pattern)
    def has_name_in_text(cell):
        return bool(regex.search(str(cell)) if isinstance(cell, str) else '')
    filtered_indices = dataframe.apply(lambda row: any(has_name_in_text(cell) for cell in row), axis=1)
    filtered_dataframe = dataframe[filtered_indices]
    return filtered_dataframe

def search_dataframe(df, search_strings, search_mode):
    if search_mode == 1:
        mask = df.apply(lambda row: row.str.contains('|'.join(search_strings), case=False, na=False).any(), axis=1)
        matching_rows = df[mask]
        return matching_rows if not matching_rows.empty else 0
    elif search_mode == 2:
        mask = df.apply(lambda row: any(cell in search_strings for cell in row), axis=1)
        matching_rows = df[mask]
        return matching_rows if not matching_rows.empty else 0
    else:
        return 0

def remove_empty_columns(df):
    cleaned_df = df.dropna(axis=1, how='all')
    return cleaned_df

def merge_dataframes_with_filenames(data_frames_and_filenames):
    data_frames = [df for df, _ in data_frames_and_filenames]
    file_names = [file_name for _, file_name in data_frames_and_filenames]
    for i, df in enumerate(data_frames):
        df['文件名'] = file_names[i]
    combined_df = pd.concat(data_frames, ignore_index=True)
    return combined_df

def output_as_name(filter_str):
    excel_opened_list, filtered_lists = [], []
    txt_file_paths = scan_excel(input_directory)
    log_file = LOG_FILE  # 使用全局定义的 LOG_FILE

    with open(log_file, 'w', encoding='utf-8') as error_log:
        # 处理文件读取
        for num, file in enumerate(txt_file_paths):
            print(f"正在读取第{num + 1}个文件: {file}")
            try:
                t = open_excel(os.path.join(input_directory, file))
                if t is None:
                    error_log.write(f"无法打开文件: {file}\n")
                    print(f"处理文件 {file} 时出错: 文件读取失败")
                    continue
                for j, i in enumerate(t):
                    excel_opened_list.append((i, f"[{str(j + 1)}] " + file))
            except Exception as e:
                error_log.write(f"处理文件 {file} 时出错: {str(e)}\n")
                print(f"处理文件 {file} 时出错: {str(e)}")
                continue
        
        # 处理匹配项
        for num, (df, file_name) in enumerate(excel_opened_list):
            print(f"正在处理第{num + 1}个文件: {file_name}")
            filtered_df = filter_rows_by_name_in_any_column(df, filter_str)
            if not filtered_df.empty:
                output_path = os.path.join(output_directory, replace_xls_with_xlsx(file_name))
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                filtered_df.to_excel(output_path, index=False)
                print(f"已输出文件: {output_path}")

def output_as_list(filter_str_list, search_mode, log_file=LOG_FILE):
    excel_opened_list, filtered_lists = [], []
    tt = type(0)
    txt_file_paths = scan_excel(input_directory)

    with open(log_file, 'w') as error_log:
        for num, file in enumerate(txt_file_paths):
            print(f"正在读取第{num + 1}个文件: {file}")
            t = open_excel(os.path.join(input_directory, file))
            if t is None:
                error_log.write(f"无法打开文件: {file}\n")
                continue
            for j, i in enumerate(t):
                excel_opened_list.append((i, f"[{str(j + 1)}] " + file))

        for num, all_open_data_one in enumerate(excel_opened_list):
            print(f"正在处理第{num + 1}个文件")
            t = search_dataframe(all_open_data_one[0], filter_str_list, search_mode)
            if type(t) != tt:
                output_path = os.path.join(output_directory, replace_xls_with_xlsx(all_open_data_one[1]))
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                t.to_excel(output_path, index=False)

def list_excel_files(input_folder, output_file):
    with open(output_file, 'w') as output_file:
        for filename in os.listdir(input_folder):
            if filename.endswith('.xlsx') or filename.endswith('.xls'):
                output_file.write(f'{filename}\n')

def __main__():
    start_time = time.time()
    warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl.styles.stylesheet")

    # 根据模式执行不同的操作
    if mode == 0:
        # 模式0:扫描指定文件夹内的所有Excel文件并保存到activity.txt文件中
        txt_file_paths = scan_excel(input_directory)
        activity_file_path = os.path.join(output_directory, 'activity.txt')
        with open(activity_file_path, 'w') as activity_file:
            for file_path in txt_file_paths:
                # 去除.xlsx后缀
                file_path_no_ext = os.path.splitext(file_path)[0]
                activity_file.write(f"{file_path_no_ext}\n")
    # elif mode == 1:
    #     # 模式1:根据提供的姓名列表逐个搜索并输出结果
    #     name_list_len = len(name_list)
    #     for num, name in enumerate(name_list, 1):
    #         output_as_name(name)
    #         print(f"*****\n已完成{num}人,共{name_list_len}人\n*****")
    elif mode == 2:
        # 模式2:根据提供的姓名列表批量搜索并输出结果
        output_as_list(name_list, 2)
    elif mode == 3:
        # 模式3:根据提供的单个关键字搜索并输出结果
        if find_key:
            output_as_name(find_key)
        else:
            print("请提供有效的单个关键字。")
    elif mode == -1:
        # 模式-1:列出指定文件夹内的所有Excel文件名并保存到指定文件
        input_folder_path = input_directory
        output_file_path = os.path.join(output_directory, '1.txt')
        list_excel_files(input_folder_path, output_file_path)

    # 记录运行时间
    end_time = time.time()
    print(f"共用时{end_time - start_time}秒")
    print("运行结束")

if __name__ == "__main__":
    __main__()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值