序言
由于互联网上的游戏资源复杂多样(至于是什么资源自己体会),并且时常会有版本更替,因此在使用诸如360重复文件查找或者腾讯管家等重复文件查找时,通常比对的是文件的hash值,如果出现版本更替,文件的hash值出现变化,则无法分辨出哪些文件是重复的,比如说1.0版本的exe文件和1.2版本的exe文件很可能由于hash值不同而导致查找不出,浪费磁盘空间,这对于松鼠症+强迫症来说,并不友好。
因此借助ChatGPT生成下面的Python代码,以进行exe文件查重。
import datetime
import os
from collections import defaultdict
import logging
from multiprocessing import Pool, cpu_count
from pathlib import Path
# 配置日志记录,不包含时间戳
logging.basicConfig(filename='duplicate_exe_files.log', level=logging.INFO, format='%(message)s', encoding='utf-8')
# 获取并打印今天的日期,打印当前时间
def log_today():
logging.info(f"Today's date is: {datetime.date.today()}")
logging.info(f"Current time is: {datetime.datetime.now().strftime('%H:%M:%S')}")
# 定义忽略的文件名集合
IGNORED_FILES = {
'WeChat.exe', "セーブデータフォルダを開く.exe", "BHVC.exe", "Game.exe", "setup.exe",
'WechatAppLauncher.exe', "Uninstall.exe", 'conda.exe', 'conda-env.exe', 'wheel.exe', "config.exe",
'tqdm.exe', 'pip3.exe', 'pip.exe', 'idle.exe', 'LEUpdater.exe', 'LEProc.exe',
'LEInstaller.exe', 'LEGUI.exe', 'UE4PrereqSetup_x64.exe', 'settings.exe',
'unins000.exe', 'SetupReiPatcherAndAutoTranslator.exe', 'TextractorCLI.exe',
'UnityCrashHandler64.exe', 'Config.exe', 'Lec.ExtProtocol.exe', 'UnityCrashHandler32.exe',
'ReiPatcher.exe', 'Common.ExtProtocol.Executor.exe', 'UEPrereqSetup_x64.exe',
'ezTransXP.ExtProtocol.exe', 'acmp.exe', 'notification_helper.exe', "エンジン設定.exe", "uninstall.exe","build-script-build.exe",
'curl.exe', 'uninst.exe', 'zsync.exe', 'zsyncmake.exe', "DXSETUP.exe", "Uninstaller.exe", "player.exe", "SiglusEngine.exe", "CrashReportClient.exe",
"ファイル破損チェックツール.exe", "nwjc.exe", "payload.exe", "python.exe", "pythonw.exe","localization.exe","mask_trim_png.exe","png_alpha_colour_remover.exe"
}
# 定义要过滤的路径关键词集合
FILTER_KEYWORDS = {
'Tool', '补丁', 'i686', 'conda', '备份', 'mingw64'
}
# 定义要搜索的文件夹路径列表
SEARCH_PATHS = [
Path('D:/'),
Path('E:/'),
Path('F:/'),
Path('G:/'),
Path('H:/'),
]
def find_exe_files_in_folder(folder_path):
"""在一个文件夹中查找所有 .exe 文件"""
exe_files = defaultdict(list)
try:
for root in folder_path.rglob('*.exe'):
exe_files[root.name].append(root)
except FileNotFoundError:
logging.error(f"Folder not found: {folder_path}")
return exe_files
def find_duplicate_exe_files_parallel(folder_paths):
"""使用多进程查找所有重复的 .exe 文件"""
with Pool(cpu_count()) as pool:
result = pool.map(find_exe_files_in_folder, folder_paths)
# 合并所有进程的结果
all_exe_files = defaultdict(list)
for exe_file in result:
for file_name, paths in exe_file.items():
all_exe_files[file_name].extend(paths)
return all_exe_files
def filter_and_log_duplicates(all_exe_files):
"""过滤并记录重复文件"""
ignored_files_set = set(IGNORED_FILES) # 转为集合以加速查找
for file_name, paths in all_exe_files.items():
if file_name in ignored_files_set or len(paths) <= 1:
continue
logging.info(f"重复的文件:{file_name}")
for path in paths:
if not any(keyword in str(path) for keyword in FILTER_KEYWORDS):
logging.info(path)
logging.info("")
if __name__ == "__main__":
log_today()
all_exe_files = find_duplicate_exe_files_parallel(SEARCH_PATHS)
filter_and_log_duplicates(all_exe_files)
该代码采用了多进程加快重复文件查找速度,如需要运行请使用Python3.11之后的版本,自行修改所需要查找的路径。
Path(‘D:/’),
Path(‘E:/’),
Path(‘F:/’),
Path(‘G:/’),
Path(‘H:/’),