import os
import shutil
from datetime import datetime, timedelta
import concurrent.futures
import time
def get_recent_files(directory, days=30, executor=None):
"""优化版:获取目录中最近N天修改过的文件(支持多线程)"""
cutoff = datetime.now() - timedelta(days=days)
recent_files = {}
dir_prefix_len = len(directory) + 1 # 预设前缀长度
def scan_dir(current_dir):
"""扫描单个目录"""
local_files = {}
try:
with os.scandir(current_dir) as entries:
for entry in entries:
try:
if entry.is_dir(follow_symlinks=False):
# 如果是目录,提交新任务
if executor:
executor.submit(scan_dir, entry.path)
else:
# 获取文件修改时间(使用entry自带的stat缓存)
mtime = datetime.fromtimestamp(entry.stat().st_mtime)
if mtime >= cutoff:
# 直接计算相对路径(避免多次调用os.path.relpath)
rel_path = entry.path[dir_prefix_len:]
local_files[rel_path] = entry.path
except (OSError, PermissionError):
continue
except (OSError, PermissionError):
pass
return local_files
# 使用线程池并行扫描
with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
# 提交根目录扫描任务
future = executor.submit(scan_dir, directory)
# 收集所有结果
for future in concurrent.futures.as_completed(executor._pending_work_items):
recent_files.update(future.result())
return recent_files
def copy_with_directory_structure(src, dst):
"""拷贝文件并保持目录结构(优化版)"""
os.makedirs(os.path.dirname(dst), exist_ok=True)
shutil.copy2(src, dst)
def compare_and_copy(a_dir, b_dir, c_dir, days=5):
"""主函数:优化后的比较和拷贝"""
print(f"扫描近期修改文件({datetime.now():%H:%M:%S})...")
# 并行扫描两个目录
start_scan = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
future_a = executor.submit(get_recent_files, a_dir, days)
future_b = executor.submit(get_recent_files, b_dir, days)
a_files = future_a.result()
b_files = future_b.result()
scan_time = time.time() - start_scan
print(f"扫描完成! 用时: {scan_time:.2f}秒 | A目录文件数: {len(a_files)} | B目录文件数: {len(b_files)}")
# 比较文件差异
print("文件名差异对比...")
diff_files = []
for rel_path, a_path in a_files.items():
if rel_path not in b_files:
dst_path = os.path.join(c_dir, rel_path)
diff_files.append((a_path, dst_path))
# 并行拷贝差异文件
print(f"开始拷贝 {len(diff_files)} 个差异文件...")
copy_start = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
futures = [executor.submit(copy_with_directory_structure, src, dst)
for src, dst in diff_files]
# 显示拷贝进度
for i, future in enumerate(concurrent.futures.as_completed(futures)):
if i % 100 == 0:
print(f"拷贝进度: {i + 1}/{len(diff_files)}")
copy_time = time.time() - copy_start
print(f"拷贝完成! 用时: {copy_time:.2f}秒 | 平均速率: {len(diff_files) / copy_time:.1f}文件/秒")
return len(diff_files)
if __name__ == "__main__":
# 路径配置(根据实际需要修改)
# #1缸体
A_folder = r"\\192.168.190.241\csv"
B_folder = r"\\192.168.10.119\sharecsv\ThreeD\#1\GT\output"
C_folder = r"\\192.168.10.119\sharecsv\ThreeD\#1\GT"
# #2缸体
D_folder = r"\\192.168.190.242\Report\CSV2"
E_folder = r"\\192.168.10.119\sharecsv\ThreeD\#2\GT\output"
F_folder = r"\\192.168.10.119\sharecsv\ThreeD\#2\GT"
# #1缸盖
G_folder = r"\\192.168.190.189\csv"
H_folder = r"\\192.168.10.119\sharecsv\ThreeD\#1\GG\output"
I_folder = r"\\192.168.10.119\sharecsv\ThreeD\#1\GG"
# #2缸盖
J_folder = r"\\192.168.190.116\csv2"
K_folder = r"\\192.168.10.119\sharecsv\ThreeD\#2\GG\output"
L_folder = r"\\192.168.10.119\sharecsv\ThreeD\#2\GG"
# #1曲轴
M_folder = r"\\192.168.190.169\Bigdata"
N_folder = r"\\192.168.10.119\sharecsv\ThreeD\#1\QZ\output"
O_folder = r"\\192.168.10.119\sharecsv\ThreeD\#1\QZ"
# #2曲轴
P_folder = r"\\192.168.190.170\tmc\Bigdata"
Q_folder = r"\\192.168.10.119\sharecsv\ThreeD\#2\QZ\output"
R_folder = r"\\192.168.10.119\sharecsv\ThreeD\#2\QZ"
print("===== 开始#1缸体比较拷贝操作 =====")
start_time = datetime.now()
copied_count1 = compare_and_copy(A_folder, B_folder, C_folder)
print(f"操作#1缸体完成! 共拷贝 {copied_count1} 个文件\n")
print("===== 开始#2缸体比较拷贝操作 =====")
copied_count2 = compare_and_copy(D_folder, E_folder, F_folder)
print(f"操作#2缸体完成! 共拷贝 {copied_count2} 个文件")
print("===== 开始#1缸盖比较拷贝操作 =====")
copied_count3 = compare_and_copy(G_folder, H_folder, I_folder)
print(f"操作#1缸盖完成! 共拷贝 {copied_count3} 个文件\n")
print("===== 开始#2缸盖比较拷贝操作 =====")
copied_count4 = compare_and_copy(J_folder, K_folder, L_folder)
print(f"操作#2缸盖完成! 共拷贝 {copied_count4} 个文件")
print("===== 开始#1曲轴比较拷贝操作 =====")
copied_count5 = compare_and_copy(M_folder, N_folder, O_folder)
print(f"操作#1曲轴完成! 共拷贝 {copied_count5} 个文件\n")
print("===== 开始#2曲轴比较拷贝操作 =====")
copied_count6 = compare_and_copy(P_folder, Q_folder, R_folder)
print(f"操作#2曲轴完成! 共拷贝 {copied_count6} 个文件")
elapsed = (datetime.now() - start_time).total_seconds()
print(f"耗时 {elapsed6:.2f} 秒")
AttributeError: 'ThreadPoolExecutor' object has no attribute '_pending_work_items'
最新发布