详细意图
1.输入:文件夹路径(base_dir),原文件名(original_name),以及一个包含多个后缀的列表(suffixes,例如[‘BB’,‘CCC’])
2.在base_dir中找到名为original_name的文件,复制该文件
3.为每个后缀粘贴一个新的文件,新文件名为将原文件名中的特定部分替换为新的后缀。例如原文件名中有一个固定的模式,“20250630_Moodys_AAA.csv”,要替换的是最后一个下划线后面的部分(‘AAA’)。
V 1.0
import os
import shutil
def batch_rename_and_copy(folder_path, original_filename, new_suffixes):
"""
批量复制文件并修改名称(支持任意文件类型)
参数:
folder_path (str): 文件夹路径
original_filename (str): 原始文件名(例如:"20250630_Moodys_AAA.csv")
new_suffixes (list): 需要替换的后缀列表(例如:["BB", "CCC"])
"""
# 构建原始文件的完整路径
src_path = os.path.join(folder_path, original_filename)
# 检查原始文件是否存在
if not os.path.isfile(src_path):
raise FileNotFoundError(f"原始文件不存在: {src_path}")
# 提取文件名前缀和扩展名
if '_' not in original_filename:
raise ValueError("文件名格式不正确,缺少下划线分隔符")
# 分离文件名和扩展名
filename, file_extension = os.path.splitext(original_filename)
# 获取前缀(最后一个下划线之前的部分)
prefix = filename.rsplit('_', 1)[0]
# 处理每个新后缀
for suffix in new_suffixes:
# 构建新文件名(保留原始扩展名)
new_filename = f"{prefix}_{suffix}{file_extension}"
dest_path = os.path.join(folder_path, new_filename)
# 复制文件
shutil.copy2(src_path, dest_path)
print(f"已创建副本: {new_filename}")
# 示例用法
if __name__ == "__main__":
folder = "/path/to/folder"
original = "20250630_Moodys_AAA.csv" # 支持任意文件类型
suffixes = ["BB", "CCC"]
batch_rename_and_copy(folder, original, suffixes)
V 2.0
import os
import shutil
from pathlib import Path
def copy_file_with_suffixes(folder_path, original_filename, new_suffixes):
"""
批量复制文件并修改名称(支持任意文件类型)
参数:
folder_path (str): 文件夹路径
original_filename (str): 原始文件名(例如:"20250630_Moodys_AAA.csv")
new_suffixes (list): 需要替换的后缀列表(例如:["BB", "CCC"])
"""
# 使用Path对象处理路径更高效
base_dir = Path(folder_path)
if not base_dir.is_dir():
raise NotADirectoryError(f"文件夹不存在: {folder_path}")
# 检查原始文件是否存在
src_path = base_dir / original_filename
if not src_path.is_file():
raise FileNotFoundError(f"原始文件不存在: {src_path}")
# 提取文件名前缀和扩展名
if '_' not in original_filename:
raise ValueError("文件名格式不正确,缺少下划线分隔符")
# 使用Path对象处理文件名分割
stem = src_path.stem # 获取不带扩展名的文件名
file_extension = src_path.suffix # 获取扩展名
# 获取前缀(最后一个下划线之前的部分)
prefix = stem.rsplit('_', 1)[0]
# 预先计算目标路径列表,减少循环内计算
dest_paths = [
base_dir / f"{prefix}_{suffix}{file_extension}"
for suffix in new_suffixes
]
# 批量复制文件
for dest_path in dest_paths:
shutil.copy2(src_path, dest_path) # 使用更高效的copy2方法(保留元数据)
print(f"已创建副本: {dest_path.name}")
# 示例用法—单任务
if __name__ == "__main__":
folder = "/path/to/folder"
original = "20250630_Moodys_AAA.csv"
suffixes = ["BB", "CCC", "DDD", "EEE"] # 可以处理更多后缀
copy_file_with_suffixes(folder, original, suffixes)
# 示例用法—多任务
if __name__ == "__main__":
# 定义所有任务:每个任务是 (文件夹路径, 原始文件名, 新后缀列表)
file_tasks = [
# 任务1:Moodys文件(路径A)
(
"/path/to/moodys_folder", # Moodys专属文件夹路径
"20250630_Moodys_AAA.csv", # Moodys原始文件名
["BB", "CCC"] # Moodys需要添加的后缀
),
# 任务2:SP文件(路径B)
(
"/path/to/sp_folder", # SP专属文件夹路径
"20250630_SP_AAA.csv", # SP原始文件名
["DD", "EE", "FF"] # SP需要添加的后缀
),
# 任务3:Fitch文件(路径C)
(
"/path/to/fitch_folder", # Fitch专属文件夹路径
"20250630_Fitch_AAA.csv", # Fitch原始文件名
["GG", "HH", "II", "JJ"] # Fitch需要添加的后缀
)
]
# 遍历所有任务并执行复制
for folder_path, original_filename, new_suffixes in file_tasks:
copy_file_with_suffixes(folder_path, original_filename, new_suffixes)
print(f"完成文件夹 {folder_path} 下的所有文件复制") # 分隔不同文件夹的输出
主要优化点:
- 使用Path对象:使用
pathlib.Path替代os.path处理路径,更现代且高效 - 减少重复计算:预先计算所有目标路径,减少循环内的计算量
- 更高效的文件检查:使用Path对象的
is_file()方法替代os.path.isfile() - 更简洁的文件名处理:使用Path对象的
stem和suffix属性替代os.path.splitext()
V2.0的多线程改进方案:使用 concurrent.futures.ThreadPoolExecutor
import os
import shutil
import concurrent.futures
from pathlib import Path
from tqdm import tqdm
import logging
# 配置日志格式(含时间戳)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
def copy_file_with_suffixes(folder_path, original_filename, new_suffixes, overwrite=True):
"""
批量复制文件并修改名称(支持任意文件类型)
参数:
folder_path (str): 文件夹路径
original_filename (str): 原始文件名(例如:"20250630_Moodys_AAA.csv")
new_suffixes (list): 需要替换的后缀列表(例如:["BB", "CCC"])
overwrite (bool): 是否覆盖已存在的目标文件(默认True)
"""
# 使用Path对象处理路径更高效
base_dir = Path(folder_path)
if not base_dir.is_dir():
raise NotADirectoryError(f"文件夹不存在: {folder_path}")
# 检查原始文件是否存在
src_path = base_dir / original_filename
if not src_path.is_file():
raise FileNotFoundError(f"原始文件不存在: {src_path}")
# 提取文件名前缀和扩展名
if '_' not in original_filename:
raise ValueError("文件名格式不正确,缺少下划线分隔符")
# 使用Path对象处理文件名分割
stem = src_path.stem # 不带扩展名的文件名(如 "20250630_Moodys_AAA")
file_extension = src_path.suffix # 扩展名(如 ".csv")
prefix = stem.rsplit('_', 1)[0] # 最后一个下划线前的部分(如 "20250630_Moodys")
# 预先计算目标路径列表,减少循环内计算
dest_paths = [
base_dir / f"{prefix}_{suffix}{file_extension}"
for suffix in new_suffixes
]
# 批量复制文件
copied_count = 0
for dest_path in dest_paths:
if dest_path.exists() and not overwrite:
logging.warning(f"跳过已存在文件: {dest_path}")
continue # 跳过不覆盖
shutil.copy2(src_path, dest_path) # 使用更高效的copy2方法(保留元数据)
logging.info(f"复制成功: {dest_path.name}")
copied_count += 1
return copied_count # 返回当前任务复制的文件数
def process_multiple_files_threadpool(file_tasks, max_workers=None):
"""
使用线程池并行处理多个文件复制任务,并显示进度条
参数:
file_tasks (list): 任务列表,每个元素是元组 (folder_path, original_filename, new_suffixes)
max_workers (int): 线程池最大线程数(默认自动调整为 min(10, 任务数))
"""
# 自动设置线程数(可选优化)
if max_workers is None:
cpu_count = os.cpu_count() or 1
# 对于IO密集型任务,通常设置为CPU核心数的2倍
max_workers = min(max(2, cpu_count * 2), len(file_tasks), 10) # 最多10个线程
# 提交任务到线程池,并建立 Future -> 任务参数的映射
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_task = {}
for task in file_tasks:
folder_path, original_filename, new_suffixes, overwrite = task
# 提交任务,并将 Future 对象映射到任务参数
future = executor.submit(
copy_file_with_suffixes,
folder_path=folder_path,
original_filename=original_filename,
new_suffixes=new_suffixes,
overwrite=overwrite # 可根据需要调整默认值
)
future_to_task[future] = task
# 初始化进度条(总数为任务数)
pbar = tqdm(
total=len(file_tasks),
desc="文件复制进度",
unit="任务",
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]"
)
# 遍历完成的 Future 对象
for future in concurrent.futures.as_completed(future_to_task):
task = future_to_task[future] # 获取当前任务参数
folder_path, original_filename, new_suffixes = task
try:
# 执行任务并获取结果(此处返回复制的文件数,可选)
copied = future.result()
logging.debug(f"任务完成: {original_filename},复制了 {copied} 个文件") # 调试日志
except Exception as e:
logging.error(f"任务失败: 文件夹={folder_path}, 文件={original_filename}, 错误={str(e)}")
finally:
pbar.update(1) # 无论成功或失败,进度条前进
pbar.close() # 关闭进度条
# 示例用法
if __name__ == "__main__":
# 任务列表:每个元素是 (文件夹路径, 原始文件名, 新后缀列表)
file_tasks = [
("/path/to/moodys_folder", "20250630_Moodys_AAA.csv", ["BB", "CCC"], True),
("/path/to/sp_folder", "20250630_SP_AAA.csv", ["DD", "EE", "FF"], True),
("/path/to/fitch_folder", "20250630_Fitch_AAA.csv", ["GG", "HH", "II", "JJ"], True)
]
# 执行多线程复制(显式设置线程数为3)
process_multiple_files_threadpool(file_tasks, max_workers=3)
def copy_file_with_suffixes(folder_path, original_filename, new_suffixes, overwrite=True):
"""
Batch copy files with modified names (supports any file type)
Parameters:
folder_path (str): Folder path
original_filename (str): Original filename (e.g., "20250630_Moodys_AAA.csv")
new_suffixes (list): List of suffixes to replace (e.g., ["BB", "CCC"])
overwrite (bool): Whether to overwrite existing target files (default: True)
"""
# Use Path object for more efficient path handling
base_dir = Path(folder_path)
if not base_dir.is_dir():
raise NotADirectoryError(f"Directory does not exist: {folder_path}")
# Check if source file exists
src_path = base_dir / original_filename
if not src_path.is_file():
raise FileNotFoundError(f"Source file does not exist: {src_path}")
# Extract filename prefix and extension
if '_' not in original_filename:
raise ValueError("Filename format incorrect: missing underscore separator")
# Use Path object for filename splitting
stem = src_path.stem # Filename without extension (e.g., "20250630_Moodys_AAA")
file_extension = src_path.suffix # File extension (e.g., ".csv")
prefix = stem.rsplit('_', 1)[0] # Part before last underscore (e.g., "20250630_Moodys")
# Precompute destination paths to reduce in-loop calculations
dest_paths = [
base_dir / f"{prefix}_{suffix}{file_extension}"
for suffix in new_suffixes
]
# Batch copy files
copied_count = 0
for dest_path in dest_paths:
if dest_path.exists() and not overwrite:
logging.warning(f"Skipping existing file: {dest_path}")
continue # Skip without overwriting
shutil.copy2(src_path, dest_path) # Use more efficient copy2 method (preserves metadata)
logging.info(f"Successfully copied: {dest_path.name}")
copied_count += 1
return copied_count # Return number of files copied in this task
def process_multiple_files_threadpool(file_tasks, max_workers=None):
"""
Process multiple file copy tasks in parallel using thread pool with progress bar
Parameters:
file_tasks (list): List of tasks, each element is a tuple (folder_path, original_filename, new_suffixes)
max_workers (int): Maximum number of threads in pool (default: min(10, task_count))
"""
# Automatically set thread count (optional optimization)
if max_workers is None:
cpu_count = os.cpu_count() or 1
# For IO-intensive tasks, typically set to 2x CPU cores
max_workers = min(max(2, cpu_count * 2), len(file_tasks), 10) # Maximum 10 threads
# Submit tasks to thread pool and create Future -> task parameter mapping
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_task = {}
for task in file_tasks:
folder_path, original_filename, new_suffixes, overwrite = task
# Submit task and map Future object to task parameters
future = executor.submit(
copy_file_with_suffixes,
folder_path=folder_path,
original_filename=original_filename,
new_suffixes=new_suffixes,
overwrite=overwrite # Can adjust default as needed
)
future_to_task[future] = task
# Initialize progress bar (total = number of tasks)
pbar = tqdm(
total=len(file_tasks),
desc="File Copy Progress",
unit="task",
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]"
)
# Iterate through completed Future objects
for future in concurrent.futures.as_completed(future_to_task):
task = future_to_task[future] # Get current task parameters
folder_path, original_filename, new_suffixes = task
try:
# Execute task and get result (returns number of files copied, optional)
copied = future.result()
logging.debug(f"Task completed: {original_filename}, copied {copied} files") # Debug log
except Exception as e:
logging.error(f"Task failed: folder={folder_path}, file={original_filename}, error={str(e)}")
finally:
pbar.update(1) # Advance progress bar regardless of success/failure
pbar.close() # Close progress bar
from pathlib import Path
from typing import Union, List, Tuple
def generate_file_tasks(
paths: Union[Union[str, Path], List[Union[str, Path]]],
original_filenames: List[str],
target_filenames: List[List[str]],
overwrite: List[bool]
) -> List[Tuple[Path, str, List[str], bool]]:
# Convert paths to a list of Path objects
if isinstance(paths, (str, Path)):
path_list = [Path(paths)] * len(original_filenames)
else:
path_list = [Path(p) for p in paths]
# Validate input lengths
if len(path_list) != len(original_filenames) or len(original_filenames) != len(target_filenames) or len(original_filenames) != len(overwrite):
raise ValueError("The lengths of paths, original_filenames, target_filenames, and overwrite must be consistent")
file_tasks = []
for path, orig, targets, overwrite_flag in zip(path_list, original_filenames, target_filenames, overwrite):
# Filter out target filenames that are identical to the original filename
filtered_targets = [t for t in targets if t != orig]
file_tasks.append((path, orig, filtered_targets, overwrite_flag))
return file_tasks

被折叠的 条评论
为什么被折叠?



