python shutil复制中文文件decodeerror_Shutil.复制导致文件数据删除

博主在使用Python的shutil.copy函数复制文件时遇到问题,原始文件意外被删除,导致'文件中没有数据'的错误。尝试以可写模式打开文件也未成功。此外,代码中出现了random模块的错误,导致'Cannot choose from an empty sequence'的异常。问题可能与文件操作和随机数生成有关。

我正在使用shutil.复制将一个文件的内容复制到另一个文件中。但是它导致我的原始文件被删除,错误是“文件中没有数据”

我第一次尝试这个import shutil

shutil.copy('keywords.txt', 'keywordsfinal.txt')

然后被告知文件需要以可写格式打开

^{pr2}$

但是对于这两个代码,即使我在每个.txt文件中都有一些内容,例如test1和test2,这两个文件都将返回空。在

我以前有这个工作,大约6个月后我回到我的程序,发现这个错误。感谢任何帮助。在

但是最近下面的错误也开始出现,我不知道它是什么,如果它与我的代码有任何关联。在Traceback (most recent call last):

File "C:\Python33\lib\random.py", line 249, in choice

i = self._randbelow(len(seq))

File "C:\Python33\lib\random.py", line 225, in _randbelow

r = getrandbits(k) # 0 <= r < 2**k

ValueError: number of bits must be greater than zero

During handling of the above exception, another exception occurred:

Traceback (most recent call last):

File "C:\Users\*******\Desktop\*******\*********Python\new\Final - Copy.py", line 84, in

a = random.choice(list(f)).strip() #.strip cleans the line \n problem

File "C:\Python33\lib\random.py", line 251, in choice

raise IndexError('Cannot choose from an empty sequence')

IndexError: Cannot choose from an empty sequence

import sys import poplib from email.parser import Parser from email.header import decode_header import os import ssl from PyQt6 import QtCore, QtWidgets class EmailDownloader(QtCore.QObject): progress_updated = QtCore.pyqtSignal(int) file_downloaded = QtCore.pyqtSignal(str, str) error_occurred = QtCore.pyqtSignal(str) finished = QtCore.pyqtSignal() started = QtCore.pyqtSignal() def __init__(self, host, port, user, password, save_path): super().__init__() self.host = host self.port = port self.user = user self.password = password self.save_path = save_path self.context = ssl.create_default_context() self.context.set_ciphers('DEFAULT@SECLEVEL=1') self.context.check_hostname = False self.context.verify_mode = ssl.CERT_NONE def run(self): self.started.emit() server = None try: server = poplib.POP3_SSL(self.host, self.port, context=self.context) server.user(self.user) server.pass_(self.password) print("POP3_SSL 登录成功") num_messages = len(server.list()[1]) self.file_downloaded.emit("邮箱中共有", f"{num_messages} 封邮件") for i in range(1, num_messages + 1): progress = i * 100 // num_messages self.progress_updated.emit(progress) try: _, lines, _ = server.retr(i) msg_content = b'\r\n'.join(lines).decode('utf-8', errors='ignore') msg = Parser().parsestr(msg_content) subject = self._decode_email_header(msg.get('Subject', '')) if subject != "光庭人力资源数字化平台(试用期目标制订)": self.file_downloaded.emit("邮件不符合 ",subject) continue for part in msg.walk(): content_disposition = part.get('Content-Disposition', '') if 'attachment' in content_disposition.lower(): self.file_downloaded.emit( "找到符合条件的邮件 " ,subject,) file_path = self._save_attachment(part, self.save_path) if file_path: self.file_downloaded.emit(part.get_filename(), file_path) except Exception as e: print(f"处理邮件 {i} 失败: {str(e)}") continue self.finished.emit() except Exception as e: self.error_occurred.emit(str(e)) finally: if server: server.quit() @staticmethod def _decode_email_header(header_str): decoded = decode_header(header_str) decoded_text = '' for text, charset in decoded: if isinstance(text, bytes): decoded_text += text.decode(charset or 'utf-8') else: decoded_text += text return decoded_text @staticmethod def _save_attachment(part, save_path): filename = part.get_filename() if not filename: return None decoded_filename = decode_header(filename) file_name = '' for text, charset in decoded_filename: if isinstance(text, bytes): file_name += text.decode(charset or 'utf-8') else: file_name += text payload = part.get_payload(decode=True) if payload: os.makedirs(save_path, exist_ok=True) file_path = os.path.join(save_path, file_name) counter = 1 while os.path.exists(file_path): base, ext = os.path.splitext(file_name) file_path = os.path.join(save_path, f"{base}_{counter}{ext}") counter += 1 with open(file_path, 'wb') as f: f.write(payload) return file_path return None class MainWindow(QtWidgets.QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("邮件附件下载器") self.setGeometry(100, 100, 600, 400) self.setup_ui() self.downloader = None self.worker_thread = None def setup_ui(self): main_widget = QtWidgets.QWidget() self.setCentralWidget(main_widget) layout = QtWidgets.QVBoxLayout(main_widget) self.start_button = QtWidgets.QPushButton("开始下载") self.start_button.clicked.connect(self.start_download) layout.addWidget(self.start_button) self.progress_bar = QtWidgets.QProgressBar() self.progress_bar.setMinimum(0) self.progress_bar.setMaximum(100) layout.addWidget(self.progress_bar) self.file_list = QtWidgets.QListWidget() layout.addWidget(QtWidgets.QLabel("下载的文件:")) layout.addWidget(self.file_list) def start_download(self): self.start_button.setEnabled(False) self.progress_bar.setValue(0) self.file_list.clear() # 创建线程和工作对象 self.worker_thread = QtCore.QThread() self.downloader = EmailDownloader( host="pop.263.net", port=995, user="guan.chaoguo@kotei.com.cn", password="A1bD5964bE622E8f", save_path="downloaded_attachments" ) self.downloader.moveToThread(self.worker_thread) self.worker_thread.started.connect(self.downloader.run) self.worker_thread.finished.connect(self.worker_thread.deleteLater) self.downloader.progress_updated.connect(self.update_progress) self.downloader.file_downloaded.connect(self.add_downloaded_file) self.downloader.finished.connect(self.download_finished) self.downloader.error_occurred.connect(self.show_error) self.downloader.finished.connect(self.worker_thread.quit) self.downloader.finished.connect(self.downloader.deleteLater) # 启动线程 self.worker_thread.start() def update_progress(self, value): self.progress_bar.setValue(value) def add_downloaded_file(self, filename, file_path): item = QtWidgets.QListWidgetItem(self.file_list) widget = QtWidgets.QWidget() layout = QtWidgets.QHBoxLayout(widget) label_name = QtWidgets.QLabel(filename) label_path = QtWidgets.QLabel(file_path) label_path.setStyleSheet("color: gray; font-size: 8pt;") layout.addWidget(label_name, 0) layout.addWidget(label_path, 1) layout.setContentsMargins(0, 0, 0, 0) item.setSizeHint(widget.sizeHint()) self.file_list.addItem(item) self.file_list.setItemWidget(item, widget) def show_error(self, error_message): QtWidgets.QMessageBox.critical(self, "错误", error_message) self.start_button.setEnabled(True) def download_finished(self): QtWidgets.QMessageBox.information(self, "完成", "下载已完成") self.start_button.setEnabled(True) self.progress_bar.setValue(100) if __name__ == "__main__": app = QtWidgets.QApplication(sys.argv) window = MainWindow() window.show() sys.exit(app.exec()) 改成下载 系统的临时目录的 创建一个文件夹 并且每次开始下载前删除这个文件夹和全部文件
08-23
import os import datetime import shutil import subprocess import re import platform import hashlib def get_commit_id(): cmd = ['git', 'rev-parse', 'HEAD'] try: result = subprocess.check_output(cmd) commit_id = result.decode().strip() if not commit_id: raise Exception('commit id not found') return commit_id except Exception as e: print(e) raise e def remove_pycache(path): for root, dirs, files in os.walk(path): for dir in dirs: if dir == '__pycache__': pycache_dir = os.path.join(root, dir) shutil.rmtree(pycache_dir) def use_shell(): if platform.system() == "Linux": return False return True class SdkPacker: def __init__(self): self.starttime = datetime.datetime.now() self.record('*' * 10 + 'SDK pack init' + '*' * 10) # pip self.pipUrl = os.getenv('SDK_PIP_URL') self.pipTrustHost = os.getenv('SDK_PIP_TRUST_HOST') self.pipArgs = os.getenv('SDK_PIP_ARGS') self.record(f'pipUrl: {self.pipUrl}') self.record(f'pipTrustHost: {self.pipTrustHost}') self.record(f'pipArgs: {self.pipArgs}') # sdk path self.sdkPath = os.path.dirname(os.path.abspath(__file__)) self.outPath = os.path.join(self.sdkPath, 'out') self.enginePath = os.path.join(self.outPath, 'engine') self.remove_path(os.path.join(self.sdkPath, 'build')) self.remove_path(os.path.join(self.sdkPath, 'rpa', 'build')) self.remove_path(self.enginePath) # commit id self.commitId = get_commit_id() self.record(f"commit id:{self.commitId}") # cache path self.cachePath = os.path.join(self.sdkPath, 'out', 'cache') self.cacheZipPath = os.path.join(self.cachePath, f'{self.commitId}.7z') # env self.RPA_PACK_PLATFORM = self.get_env('RPA_PACK_PLATFORM') self.RPA_PACK_ARCH = self.get_env('RPA_PACK_ARCH') self.RPA_VERSION = self.get_env('RPA_PACK_VERSION') if not self.RPA_VERSION or not re.search(r"\d", self.RPA_VERSION): self.RPA_VERSION = "15.0.0" self.RPA_FORCE_REBUILD = self.get_env('RPA_PACK_FORCE_REBUILD') self.platform = platform.system() self.record(f"System: {self.platform}") # tools path self.sdkToolsPath = os.path.join(self.sdkPath, 'out', 'sdk_tools') # output path self.python_out = os.path.join(self.enginePath) if self.RPA_PACK_PLATFORM == 'windows': self.reqsPath = os.path.join(self.sdkPath, 'rpa', 'requirements.txt') self.site_packages = os.path.join(self.enginePath, 'Lib', 'site-packages') elif self.RPA_PACK_PLATFORM in ['linux', 'UOS', 'kylinOS']: self.reqsPath = os.path.join(self.sdkPath, 'rpa', 'requirements_uos.txt') self.site_packages = os.path.join(self.enginePath, 'lib', 'python3.7', 'site-packages') else: raise Exception(f'not support platform: {self.RPA_PACK_PLATFORM} and arch: {self.RPA_PACK_ARCH}') self.seven_zip_out = os.path.join(self.site_packages, 'rpa', 'file_folder') self.ffmpeg_out = os.path.join(self.site_packages, 'rpa', 'win32') self.db2_out = os.path.join(self.site_packages) self.db2_cli_out = os.path.join(self.site_packages, 'ibm_db-3.1.4') self.pip_args = [] if self.pipUrl: self.pip_args.extend(['-i', self.pipUrl]) if self.pipTrustHost: self.pip_args.extend(['--trusted-host', self.pipTrustHost]) if self.pipArgs: self.pip_args.extend(self.pipArgs.split(',')) # self.pip_args.extend(['--no-cache-dir', '--no-warn-script-location']) self.record("sdk pack init end") def run_command(self, command, cwd=None): process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd, shell=use_shell()) while True: output = process.stdout.readline() if output == b'' and process.poll() is not None: break if output: print(output.strip().decode('utf-8', errors='replace')) process.wait() if process.returncode is not 0: raise Exception(f'run command {command} error, return code: {process.returncode}') self.record(f"run command: {command}") def get_env(self, env_key): env_value = os.getenv(env_key) if env_value: self.record(f'{env_key}: {env_value}') else: raise Exception(f'{env_key} not found') return env_value def remove_path(self, path): if os.path.exists(path): try: if os.path.isfile(path): os.remove(path) elif os.path.isdir(path): shutil.rmtree(path) self.record(f"remove {path}: successfully") except Exception as e: self.record(f'remove {path}: {e} error') raise e else: self.record(f"remove {path}: not exists") def record(self, title): end_time = datetime.datetime.now() diff = (end_time - self.starttime) print(f"[{end_time.time()} - {diff.seconds}] {title}") def unzip(self, src, dst): # self.run_command(['7z', 'x', src, '-o' + dst, '-bb0'], cwd=os.path.join(self.sdkToolsPath, '7z')) os.system(f"7z x {src} -o{dst}") self.record(f"unzip {src} to {dict}") def calculate_md5(self, file_path): with open(file_path, "rb") as f: md5_hash = hashlib.md5() for chunk in iter(lambda: f.read(4096), b""): md5_hash.update(chunk) return md5_hash.hexdigest() def copy(self, package_name, *rpa_dir): package = os.path.join(self.sdkToolsPath, package_name) package_out = os.path.join(self.site_packages, 'rpa', *rpa_dir) for file in os.listdir(package): package_file = os.path.join(package, file) shutil.copy(package_file, package_out) self.record(f"{package_file} >> {package_out}") self.record(f"copy {package_name}") def pack(self): # encrypt sdk self.record('*' * 10 + 'SDK pack' + '*' * 10) if self.RPA_FORCE_REBUILD == 'false' and os.path.exists(self.cacheZipPath): self.record('SDK use cache') self.unzip(self.cacheZipPath, self.outPath) else: self.encrypt_sdk() # add version version_path = os.path.join(self.site_packages, 'rpa', 'version', 'version') content = f'{self.RPA_VERSION}\n{datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")}\n{self.commitId}' with open(version_path, 'w') as f: f.write(content) shutil.copy(version_path, self.enginePath) with open(os.path.join(self.enginePath, 'version'), 'r') as f: self.record(f.read()) # remove cache remove_pycache(self.enginePath) self.record(f"remove engine pycache: {self.enginePath}") self.record("SDK pack end") def link_python(self): pass def is_linux(self): return self.RPA_PACK_PLATFORM in ['linux', 'UOS', 'kylinOS'] def is_windows(self): return self.RPA_PACK_PLATFORM == 'windows' def encrypt_sdk(self): def fix_tk(): # remove mouseinfo tk sys.exit mouseinfo_init_path = os.path.join(self.site_packages, 'mouseinfo', '__init__.py') command = """sed -i "s/sys.exit.*/pass/g" """ + mouseinfo_init_path ret = os.system(command) msg = f"remove mouseinfo tk sys.exit code: {ret}" if ret: raise SystemError(msg) self.record(msg) def install_pywpsrpc(): wheel_path = 'pywpsrpc-2.3.3-cp37-cp37m-manylinux_2_5_x86_64.whl' if self.RPA_PACK_ARCH == "arm64": wheel_path = 'pywpsrpc-2.3.3-cp37-cp37m-manylinux_2_28_aarch64.whl' pywpsrpc_path = os.path.join(self.sdkToolsPath, 'pywpsrpc', wheel_path) self.run_command([python_path, '-m', 'pip', 'install', pywpsrpc_path]) def copy_depends(): shutil.copytree(os.path.join(self.sdkToolsPath, 'deps', 'at-spi2-core'), os.path.join(self.enginePath, 'deps', 'at-spi2-core')) shutil.copytree(os.path.join(self.sdkToolsPath, 'deps', 'wps'), os.path.join(self.enginePath, 'deps', 'wps')) shutil.copytree(os.path.join(self.sdkToolsPath, 'deps', 'xclip'), os.path.join(self.site_packages, 'xclip')) # move python self.record('SDK encrypt') use_cache = False requirements_md5 = self.calculate_md5(self.reqsPath) requirements_cache_path = os.path.join(self.sdkPath, 'out', 'cache', f'{requirements_md5}.7z') if self.RPA_FORCE_REBUILD == 'false': if os.path.exists(requirements_cache_path): use_cache = True python_source = "" python_path = "" if self.is_windows(): python_source = os.path.join(self.sdkToolsPath, 'python') python_path = os.path.join(self.enginePath, "python.exe") elif self.is_linux(): python_source = "/opt/python3.7" python_path = os.path.join(self.enginePath, "bin", "python") if not use_cache: shutil.copytree(python_source, self.enginePath) self.record(f"{python_source} >> {self.enginePath}") if self.is_linux(): os.system(f'apt-get install -y libcairo2-dev libgirepository1.0-dev unixodbc-dev') current_cwd = os.getcwd() self.record(f"current cwd:{current_cwd}") bin_path = os.path.join(self.enginePath, "bin") os.chdir(bin_path) os.system(f'ln -s python3.7 python') os.system(f'ln -s python3.7 python3') self.record("link python3.7 to python python3") os.chdir(current_cwd) # install requirements # comtypes<1.1.11 need 2to3, setuptools<58 support 2to3 self.run_command([python_path, '-m', 'pip', 'install', '--upgrade', 'pip'] + self.pip_args) self.run_command([python_path, '-m', 'pip', 'install', '--upgrade', 'setuptools < 58'] + self.pip_args) self.run_command([python_path, '-m', 'pip', 'install', '-r', self.reqsPath] + self.pip_args) if self.is_windows(): # install db2 shutil.copytree(os.path.join(self.sdkToolsPath, 'db2', 'ibm_db-3.1.4'), os.path.join(self.enginePath, 'Lib', 'site-packages', 'ibm_db-3.1.4')) shutil.copytree(os.path.join(self.sdkToolsPath, 'db2_cli', 'clidriver'), os.path.join(self.enginePath, 'Lib', 'site-packages', 'ibm_db-3.1.4', 'clidriver')) self.run_command([os.path.join(self.enginePath, 'python'), 'setup.py', 'install'], cwd=os.path.join(self.enginePath, 'Lib', 'site-packages', 'ibm_db-3.1.4')) elif self.is_linux(): # install db2 # shutil.copytree(os.path.join(self.sdkToolsPath, 'db2', 'ibm_db-3.1.4'), # os.path.join(self.site_packages, 'ibm_db-3.1.4')) # shutil.copytree(os.path.join(self.sdkToolsPath, 'db2_cli', 'clidriver'), # os.path.join(self.site_packages, 'ibm_db-3.1.4', 'clidriver')) # self.run_command([python_path, 'setup.py', 'install'], # cwd=os.path.join(self.site_packages, 'ibm_db-3.1.4')) fix_tk() install_pywpsrpc() copy_depends() # install cython self.run_command([python_path, '-m', 'pip', 'install', 'cython==0.29.24'] + self.pip_args) self.remove_path(requirements_cache_path) self.run_command(['7z', 'a', '-mx1', requirements_cache_path, self.enginePath], cwd=os.path.join(self.sdkToolsPath, '7z')) else: self.record("requirements use cache") self.unzip(requirements_cache_path, self.outPath) build_path = os.path.join(self.sdkPath, 'build', 'rpa') # encrypt sdk self.run_command([python_path, 'setup.py'], cwd=os.path.join(self.sdkPath, 'rpa')) # uninstall cython self.run_command([python_path, '-m', 'pip', 'uninstall', 'cython', '-y']) # remove pycache remove_pycache(build_path) self.record(f"remove rpa pycache: {build_path}") # copy sdk rpa_path = os.path.join(self.site_packages, 'rpa') shutil.move(build_path, rpa_path) self.record(f"move {build_path} >> {rpa_path}") if self.RPA_PACK_PLATFORM == 'windows': self.copy('activexinput', 'uia', 'activexinput') self.copy("7z", "file_folder") self.copy("ffmpeg", "win32") # save cache self.remove_path(self.cacheZipPath) self.run_command(['7z', 'a', '-mx1', self.cacheZipPath, self.enginePath], cwd=os.path.join(self.sdkToolsPath, '7z')) # self.run_command(['7z', 'a', '-tzip', self.cacheZipPath, '-r', self.enginePath, '-y', '-bb0'], # cwd=os.path.join(self.sdkToolsPath, '7z')) # remove paths self.remove_path(os.path.join(self.sdkPath, 'build')) self.remove_path(build_path) self.record("SDK encrypt end") if __name__ == '__main__': import sys if sys.platform == "win32": # for tests os.environ['RPA_PACK_PLATFORM'] = 'windows' os.environ['RPA_PACK_ARCH'] = 'x64' os.environ['RPA_TARGET_FORMAT'] = 'zip' os.environ['RPA_PACK_GITOKEN'] = 'pack_gitoken' os.environ['RPA_VERSION'] = '1.0.0' os.environ['RPA_GIT_TOKEN'] = 'git_token' os.environ['RPA_FORCE_REBUILD'] = 'false' os.environ['RPA_TOOLS_HOME'] = 'C:\\Repos\\tools' elif sys.platform == "darwin": sys.exit(0) else: os.environ['RPA_PACK_PLATFORM'] = 'linux' os.environ['RPA_PACK_ARCH'] = 'x64' os.environ['RPA_TARGET_FORMAT'] = 'deb' os.environ['RPA_PACK_GITOKEN'] = 'pack_gitoken' os.environ['RPA_VERSION'] = '1.0.0' os.environ['RPA_GIT_TOKEN'] = 'git_token' os.environ['RPA_FORCE_REBUILD'] = 'false' os.environ['RPA_TOOLS_HOME'] = '/home/uos/tools' os.environ['SDK_PIP_URL'] = 'https://repo.datagrand.com/repository/py/simple' packer = SdkPacker() packer.pack() 分解一下项目打包的流程,介绍如何实现打包,如何配置环境,依赖等等,以及是否实现可执行文件,将整个流程用图表表示
07-10
import os import re import sys import time import threading import tkinter as tk from tkinter import ttk, filedialog, messagebox, scrolledtext from tkinter.font import Font import fnmatch import subprocess import shutil import docx from openpyxl import load_workbook import PyPDF2 import zipfile import chardet import xlrd # 添加对旧版Excel的支持 class FileSearchApp: def __init__(self, master): self.master = master master.title("高级文件搜索工具") master.geometry("1200x800") master.minsize(900, 650) # 设置现代主题 self.style = ttk.Style() self.style.theme_use("vista" if sys.platform == "win32" else "aqua") # 创建主框架 main_frame = ttk.Frame(master, padding=10) main_frame.pack(fill=tk.BOTH, expand=True) # 创建左侧搜索面板 search_frame = ttk.LabelFrame(main_frame, text="搜索选项", padding=10) search_frame.pack(side=tk.LEFT, fill=tk.Y, padx=(0, 10), pady=5) # 使用网格布局管理器 row = 0 ttk.Label(search_frame, text="搜索目录:").grid(row=row, column=0, sticky="w", pady=5) self.dir_entry = ttk.Entry(search_frame, width=40) self.dir_entry.grid(row=row, column=1, padx=5, pady=5, sticky="we") self.dir_entry.insert(0, os.getcwd()) ttk.Button(search_frame, text="浏览...", command=self.browse_directory).grid(row=row, column=2, padx=5, pady=5) row += 1 ttk.Label(search_frame, text="关键词:").grid(row=row, column=0, sticky="w", pady=5) self.keyword_entry = ttk.Entry(search_frame, width=40) self.keyword_entry.grid(row=row, column=1, padx=5, pady=5, sticky="we") row += 1 ttk.Label(search_frame, text="文件过滤:").grid(row=row, column=0, sticky="w", pady=5) self.filter_entry = ttk.Entry(search_frame, width=40) self.filter_entry.grid(row=row, column=1, padx=5, pady=5, sticky="we") self.filter_entry.insert(0, "*") row += 1 # 添加分隔线 ttk.Separator(search_frame, orient=tk.HORIZONTAL).grid(row=row, column=0, columnspan=3, sticky="ew", pady=10) row += 1 # 搜索选项 options_frame = ttk.Frame(search_frame) options_frame.grid(row=row, column=0, columnspan=3, sticky="we", padx=5, pady=5) # 使用网格布局替代pack布局,更紧凑 self.case_var = tk.BooleanVar(value=False) ttk.Checkbutton(options_frame, text="忽略大小写", variable=self.case_var).grid(row=0, column=0, sticky="w", padx=(0, 10)) self.regex_var = tk.BooleanVar(value=False) ttk.Checkbutton(options_frame, text="正则表达式", variable=self.regex_var).grid(row=0, column=1, sticky="w", padx=(0, 10)) self.binary_var = tk.BooleanVar(value=False) self.binary_check = ttk.Checkbutton(options_frame, text="包含二进制", variable=self.binary_var) self.binary_check.grid(row=0, column=2, sticky="w") # 添加文件大小限制选项 self.limit_var = tk.BooleanVar(value=True) ttk.Checkbutton(options_frame, text="限制大小(100MB)", variable=self.limit_var).grid(row=0, column=3, sticky="w", padx=(10, 0)) row += 1 # 添加分隔线 ttk.Separator(search_frame, orient=tk.HORIZONTAL).grid(row=row, column=0, columnspan=3, sticky="ew", pady=10) row += 1 # 搜索按钮 button_frame = ttk.Frame(search_frame) button_frame.grid(row=row, column=0, columnspan=3, pady=10) self.search_button = ttk.Button(button_frame, text="开始搜索", command=self.start_search) self.search_button.pack(side=tk.LEFT, padx=5) self.stop_button = ttk.Button(button_frame, text="停止搜索", command=self.stop_search, state=tk.DISABLED) self.stop_button.pack(side=tk.LEFT, padx=5) self.export_button = ttk.Button(button_frame, text="导出结果", command=self.export_results) self.export_button.pack(side=tk.LEFT, padx=5) row += 1 # 添加分隔线 ttk.Separator(search_frame, orient=tk.HORIZONTAL).grid(row=row, column=0, columnspan=3, sticky="ew", pady=10) row += 1 # 状态栏 status_frame = ttk.Frame(search_frame) status_frame.grid(row=row, column=0, columnspan=3, sticky="we", pady=5) # 状态标签(左对齐) self.status_label = ttk.Label(status_frame, text="就绪", font=("Arial", 9)) self.status_label.pack(side=tk.LEFT, anchor='w') # 进度条(中间,可伸缩) self.progress_var = tk.DoubleVar() self.progress_bar = ttk.Progressbar( status_frame, variable=self.progress_var, length=200, mode='determinate' ) self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10) # 结果统计(右对齐) self.stats_label = ttk.Label(status_frame, text="", font=("Arial", 9)) self.stats_label.pack(side=tk.RIGHT) # 创建结果面板 results_frame = ttk.LabelFrame(main_frame, text="搜索结果", padding=10) results_frame.pack(fill=tk.BOTH, expand=True, padx=(5, 0), pady=5) # 分割窗格 paned_window = ttk.PanedWindow(results_frame, orient=tk.HORIZONTAL) paned_window.pack(fill=tk.BOTH, expand=True) # 左侧文件列表 file_list_frame = ttk.Frame(paned_window) paned_window.add(file_list_frame, weight=1) # 使用Treeview替代Listbox columns = ("filename", "path") self.file_tree = ttk.Treeview(file_list_frame, columns=columns, show="headings", selectmode="browse") # 设置列标题 self.file_tree.heading("filename", text="文件名") self.file_tree.heading("path", text="路径") # 设置列宽 self.file_tree.column("filename", width=200, anchor="w") self.file_tree.column("path", width=300, anchor="w") self.file_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) self.file_tree.bind('<<TreeviewSelect>>', self.show_file_content) self.file_tree.bind('<Double-1>', self.open_selected_file) file_scroll = ttk.Scrollbar(file_list_frame, command=self.file_tree.yview) file_scroll.pack(side=tk.RIGHT, fill=tk.Y) self.file_tree.config(yscrollcommand=file_scroll.set) # 右键菜单 self.file_menu = tk.Menu(self.master, tearoff=0) self.file_menu.add_command(label="打开文件", command=self.open_selected_file) self.file_menu.add_command(label="打开文件位置", command=self.open_file_location) self.file_tree.bind("<Button-3>", self.show_file_context_menu) # 右侧文件内容预览 content_frame = ttk.Frame(paned_window) paned_window.add(content_frame, weight=2) self.content_text = scrolledtext.ScrolledText( content_frame, wrap=tk.WORD, font=("Consolas", 10), padx=5, pady=5 ) self.content_text.pack(fill=tk.BOTH, expand=True) # 文本区域右键菜单 text_menu = tk.Menu(self.master, tearoff=0) text_menu.add_command(label="复制", command=self.copy_selected_text) self.content_text.bind("<Button-3>", lambda e: text_menu.tk_popup(e.x_root, e.y_root)) # 高亮标签 self.content_text.tag_configure("match", background="yellow") self.content_text.tag_configure("linenum", foreground="blue") self.content_text.tag_configure("header", foreground="darkgreen", font=("Arial", 10, "bold")) self.content_text.tag_configure("warning", foreground="red", font=("Arial", 10, "italic")) # 初始化变量 self.results = {} self.all_files = [] self.file_paths = [] self.stop_requested = False self.search_thread = None def browse_directory(self): directory = filedialog.askdirectory(title="选择搜索目录") if directory: self.dir_entry.delete(0, tk.END) self.dir_entry.insert(0, directory) def start_search(self): # 重置状态 self.progress_var.set(0) self.stop_requested = False self.results = {} self.all_files = [] self.file_paths = [] self.file_tree.delete(*self.file_tree.get_children()) self.content_text.delete(1.0, tk.END) self.status_label.config(text="正在搜索...") self.search_button.config(state=tk.DISABLED) self.stop_button.config(state=tk.NORMAL) self.stats_label.config(text="") # 获取搜索参数 directory = self.dir_entry.get().strip() keyword = self.keyword_entry.get().strip() file_filter = self.filter_entry.get().strip() # 验证输入 if not directory or not os.path.isdir(directory): messagebox.showerror("错误", "请选择有效的搜索目录") self.search_button.config(state=tk.NORMAL) self.stop_button.config(state=tk.DISABLED) return if not keyword: messagebox.showerror("错误", "请输入搜索关键词") self.search_button.config(state=tk.NORMAL) self.stop_button.config(state=tk.DISABLED) return # 解析文件过滤器 if file_filter == "": filter_patterns = ['*'] else: separators = [';', '|', ' ', ','] for sep in separators: if sep in file_filter: filter_patterns = [pat.strip() for pat in file_filter.split(sep)] break else: filter_patterns = [file_filter] # 编译搜索模式 flags = re.IGNORECASE if self.case_var.get() else 0 try: if self.regex_var.get(): pattern = re.compile(keyword, flags) else: escaped_keyword = re.escape(keyword) pattern = re.compile(escaped_keyword, flags) except re.error as e: messagebox.showerror("正则表达式错误", f"无效的正则表达式: {str(e)}") self.search_button.config(state=tk.NORMAL) self.stop_button.config(state=tk.DISABLED) return # 在后台线程中执行搜索 self.search_thread = threading.Thread( target=self.perform_search, args=(directory, filter_patterns, pattern), daemon=True ) self.search_thread.start() def perform_search(self, directory, filter_patterns, pattern): """在后台线程中执行文件搜索""" try: # 收集所有匹配的文件 all_files = [] for root, _, files in os.walk(directory): if self.stop_requested: self.master.after(0, lambda: self.status_label.config(text="搜索已取消")) return for file in files: file_path = os.path.join(root, file) # 检查文件大小限制(避免处理超大文件) try: file_size = os.path.getsize(file_path) if file_size > 100 * 1024 * 1024: # 100MB continue except: continue # 检查是否符合任一过滤模式 if any(fnmatch.fnmatch(file, pat) for pat in filter_patterns): all_files.append(file_path) self.all_files = all_files total_files = len(all_files) # 初始化进度条 self.master.after(0, lambda: self.progress_bar.config(maximum=total_files)) self.master.after(0, lambda: self.stats_label.config(text=f"扫描到 {total_files} 个文件")) # 搜索每个文件 self.results = {} processed = 0 matches_found = 0 for file_path in self.all_files: if self.stop_requested: break processed += 1 # 更新进度条(安全方式) self.master.after(0, lambda v=processed: self.progress_var.set(v)) if processed % 10 == 0: # 每处理10个文件更新一次进度 self.master.after(0, lambda p=processed, t=total_files: self.stats_label.config(text=f"处理中: {p}/{t} 文件 ({round(p/t*100,1)}%)")) # 忽略二进制文件(除非用户选择包含) if not self.binary_var.get() and self.is_binary(file_path): continue # 获取文件扩展名 _, ext = os.path.splitext(file_path) ext_lower = ext.lower() # 处理Office文档 if ext_lower in ['.docx', '.xlsx', '.xls', '.xlsm', '.pptx', '.pdf', '.doc']: matches = self.search_in_office_file(file_path, pattern) # 处理压缩文件 elif ext_lower in ['.zip', '.rar', '.7z', '.tar', '.gz']: matches = self.search_in_archive(file_path, pattern) # 处理文本文件 else: matches = self.search_in_text_file(file_path, pattern) if matches: self.results[file_path] = matches matches_found += len(matches) # 在UI线程中添加文件到列表 filename = os.path.basename(file_path) self.master.after(0, lambda fp=file_path, fn=filename: self.file_tree.insert("", "end", values=(fn, fp))) # 更新完成状态 if self.stop_requested: status_text = f"搜索已取消 - 找到 {len(self.results)} 个文件, {matches_found} 个匹配项" else: status_text = f"搜索完成 - 找到 {len(self.results)} 个文件, {matches_found} 个匹配项" self.master.after(0, lambda: self.status_label.config(text=status_text)) self.master.after(0, lambda: self.stats_label.config(text=f"已处理 {processed}/{total_files} 文件")) self.master.after(0, lambda: self.progress_var.set(total_files)) except Exception as e: # 记录详细错误日志 error_info = f"搜索错误: {type(e).__name__} - {str(e)}" print(error_info) with open("search_errors.log", "a") as log: log.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {error_info}\n") import traceback traceback.print_exc(file=log) self.master.after(0, lambda: messagebox.showerror( "搜索错误", f"发生严重错误: {error_info}\n详细信息已记录到日志" )) finally: self.master.after(0, lambda: self.search_button.config(state=tk.NORMAL)) self.master.after(0, lambda: self.stop_button.config(state=tk.DISABLED)) self.search_thread = None def search_in_text_file(self, filepath, pattern): """在文本文件中搜索匹配项""" matches = [] try: encoding = self.detect_encoding(filepath) try: with open(filepath, 'r', encoding=encoding, errors='replace') as f: for line_num, line in enumerate(f, 1): if pattern.search(line): cleaned_line = line.strip() if len(cleaned_line) > 150: cleaned_line = cleaned_line[:150] + "..." matches.append((line_num, cleaned_line)) except UnicodeDecodeError: # 特殊编码处理回退 with open(filepath, 'rb') as f: content = f.read() try: text = content.decode('utf-8', errors='replace') except: text = content.decode('latin-1', errors='replace') for line_num, line in enumerate(text.splitlines(), 1): if pattern.search(line): cleaned_line = line.strip() if len(cleaned_line) > 150: cleaned_line = cleaned_line[:150] + "..." matches.append((line_num, cleaned_line)) except Exception as e: print(f"读取文本文件失败 {filepath}: {str(e)}") return matches def search_in_office_file(self, filepath, pattern): """在Office文件中搜索文本内容""" matches = [] _, ext = os.path.splitext(filepath) ext_lower = ext.lower() try: # DOCX文件处理 if ext_lower == '.docx': doc = docx.Document(filepath) # 搜索段落 for i, para in enumerate(doc.paragraphs, 1): if para.text and pattern.search(para.text): matches.append((i, f"段落 {i}: {para.text[:100]}" + ("..." if len(para.text) > 100 else ""))) # 搜索表格 for table in doc.tables: for row_idx, row in enumerate(table.rows, 1): for cell_idx, cell in enumerate(row.cells, 1): if cell.text and pattern.search(cell.text): content = cell.text.strip() if len(content) > 100: content = content[:100] + "..." matches.append((row_idx, f"表格 行{row_idx}列{cell_idx}: {content}")) # XLSX/XLS文件处理 elif ext_lower in ('.xlsx', '.xls', '.xlsm'): # 处理新格式Excel文件 if ext_lower in ('.xlsx', '.xlsm'): wb = load_workbook(filepath, read_only=True, data_only=True) for sheet_name in wb.sheetnames: sheet = wb[sheet_name] for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1): for col_idx, cell in enumerate(row, 1): if cell is not None and pattern.search(str(cell)): cell_ref = f"{chr(64+col_idx)}{row_idx}" cell_value = str(cell).strip() if len(cell_value) > 100: cell_value = cell_value[:100] + "..." matches.append((row_idx, f"工作表 '{sheet_name}' 单元格 {cell_ref}: {cell_value}")) # 处理旧格式Excel文件 elif ext_lower == '.xls': wb = xlrd.open_workbook(filepath) for sheet_idx in range(wb.nsheets): sheet = wb.sheet_by_index(sheet_idx) for row_idx in range(sheet.nrows): for col_idx in range(sheet.ncols): cell = sheet.cell_value(row_idx, col_idx) if cell and pattern.search(str(cell)): cell_ref = f"{chr(65+col_idx)}{row_idx+1}" cell_value = str(cell).strip() if len(cell_value) > 100: cell_value = cell_value[:100] + "..." matches.append((row_idx+1, f"工作表 '{sheet.name}' 单元格 {cell_ref}: {cell_value}")) # PPTX文件处理 elif ext_lower == '.pptx': from pptx import Presentation ppt = Presentation(filepath) # 搜索幻灯片文本 for slide_idx, slide in enumerate(ppt.slides, 1): for shape in slide.shapes: if hasattr(shape, "text"): if shape.text and pattern.search(shape.text): content = shape.text.strip() if len(content) > 100: content = content[:100] + "..." matches.append((slide_idx, f"幻灯片 {slide_idx}: {content}")) # PDF文件处理 elif ext_lower == '.pdf': with open(filepath, 'rb') as f: pdf = PyPDF2.PdfReader(f) for page_num in range(len(pdf.pages)): page_text = pdf.pages[page_num].extract_text() if page_text and pattern.search(page_text): # 提取匹配内容 matches_found = [] for match in pattern.finditer(page_text): context = page_text[max(0, match.start()-20):match.end()+20] context = context.replace('\n', ' ').strip() matches_found.append(context) # 添加到结果 if matches_found: preview = "; ".join(matches_found[:3]) # 显示前3个匹配 if len(matches_found) > 3: preview += f" ... (+{len(matches_found)-3} 更多)" matches.append((page_num+1, f"页面 {page_num+1}: {preview}")) # 旧版DOC文件处理 elif ext_lower == '.doc': try: # 尝试使用antiword转换DOC为文本 result = subprocess.run(['antiword', filepath], capture_output=True, text=True, timeout=10) if result.returncode == 0: doc_text = result.stdout for line_num, line in enumerate(doc_text.split('\n'), 1): if line and pattern.search(line): cleaned_line = line.strip() if len(cleaned_line) > 150: cleaned_line = cleaned_line[:150] + "..." matches.append((line_num, cleaned_line)) except Exception: # 备用方法:使用python-doc处理 import win32com.client word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(filepath) doc_text = doc.Content.Text doc.Close() word.Quit() for line_num, line in enumerate(doc_text.split('\n'), 1): if line and pattern.search(line): cleaned_line = line.strip() if len(cleaned_line) > 150: cleaned_line = cleaned_line[:150] + "..." matches.append((line_num, cleaned_line)) except Exception as e: print(f"处理Office文件失败 {filepath}: {str(e)}") return matches def search_in_archive(self, filepath, pattern): """在压缩文件中搜索匹配项""" matches = [] _, ext = os.path.splitext(filepath) ext_lower = ext.lower() try: # ZIP文件处理 if ext_lower in ('.zip', '.jar', '.war'): with zipfile.ZipFile(filepath, 'r') as archive: for name in archive.namelist(): # 只处理文本文件和Office文档 if not name.endswith(('/')) and not self.is_binary(name): try: with archive.open(name) as file: content = file.read(4096) # 只读取前4KB # 尝试检测编码 result = chardet.detect(content) encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8' # 解码内容并搜索 try: text_content = content.decode(encoding, errors='replace') if pattern.search(text_content): matches.append((name, f"压缩文件中的文件: {name}")) except: # 二进制内容搜索 if pattern.search(content): matches.append((name, f"压缩文件中的文件(二进制内容): {name}")) except Exception: continue # 其他压缩格式(需要外部工具) elif ext_lower in ('.rar', '.7z', '.tar', '.gz'): # 使用7zip命令行工具解压并搜索 temp_dir = tempfile.mkdtemp() try: subprocess.run(['7z', 'x', filepath, f'-o{temp_dir}'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, timeout=60) # 递归搜索解压的目录 for root, _, files in os.walk(temp_dir): for file in files: full_path = os.path.join(root, file) _, file_ext = os.path.splitext(file) file_ext = file_ext.lower() # 只在文本/Office文件中搜索 if file_ext in ['', '.txt', '.py', '.java', '.c', '.cpp', '.h', '.html', '.xml', '.json', '.csv', '.docx', '.xlsx', '.pptx', '.pdf']: if file_ext in ['.docx', '.xlsx', '.pptx', '.pdf']: file_matches = self.search_in_office_file(full_path, pattern) else: file_matches = self.search_in_text_file(full_path, pattern) if file_matches: matches.append((file, f"压缩文件中的文件: {file}")) finally: shutil.rmtree(temp_dir, ignore_errors=True) except Exception as e: print(f"处理压缩文件失败 {filepath}: {str(e)}") return matches def detect_encoding(self, filepath): """改进的文件编码检测方法""" try: # 尝试读取文件前4KB进行编码检测 with open(filepath, 'rb') as f: raw_data = f.read(4096) # 使用chardet进行编码检测 result = chardet.detect(raw_data) # 优先使用检测到的编码,否则尝试常见编码 if result['confidence'] > 0.7: return result['encoding'] # 中文环境常用编码回退策略 common_encodings = ['utf-8', 'gbk', 'gb2312', 'gb18030', 'latin1'] for encoding in common_encodings: try: # 尝试解码验证 raw_data.decode(encoding, errors='strict') return encoding except UnicodeDecodeError: continue # 默认使用UTF-8 return 'utf-8' except Exception: return 'utf-8' def is_binary(self, filepath): """检查文件是否为二进制文件""" try: with open(filepath, 'rb') as f: chunk = f.read(1024) if b'\0' in chunk: # 空字节是二进制文件的标志 return True # 检查高字节值 if any(byte >= 0x80 for byte in chunk): return True return False except: return False def stop_search(self): """停止当前搜索""" self.stop_requested = True self.status_label.config(text="正在停止搜索...") self.stop_button.config(state=tk.DISABLED) # 冻结进度条显示当前进度 self.progress_bar.config(mode='indeterminate' if self.progress_var.get() == 0 else 'determinate') def export_results(self): """导出搜索结果""" if not self.results: messagebox.showinfo("导出结果", "没有可导出的搜索结果") return file_path = filedialog.asksaveasfilename( title="保存搜索结果为", defaultextension=".csv", filetypes=[("CSV 文件", "*.csv"), ("文本文件", "*.txt")] ) if not file_path: return try: with open(file_path, 'w', encoding='utf-8') as f: # 写出CSV头部 f.write("文件路径,匹配行号,匹配内容\n") # 写出每项结果 for file, matches in self.results.items(): for line_num, match_content in matches: # 清理内容中的逗号 cleaned_content = match_content.replace('"', '""').replace(',', ';') f.write(f'"{file}",{line_num},"{cleaned_content}"\n') messagebox.showinfo("导出成功", f"搜索结果已保存到:\n{file_path}") except Exception as e: messagebox.showerror("导出错误", f"导出失败: {str(e)}") def show_file_content(self, event=None): """在预览区域显示文件内容""" # 获取选中的文件 selection = self.file_tree.selection() if not selection: return selected_item = selection[0] filepath = self.file_tree.item(selected_item, 'values')[1] # 清空预览区域 self.content_text.delete(1.0, tk.END) # 获取文件扩展名 _, ext = os.path.splitext(filepath) ext_lower = ext.lower() # 显示文件路径标题 self.content_text.insert(tk.END, f"文件路径: {filepath}\n", "header") # 处理不同文件类型 try: # 处理Office文档 if ext_lower in ['.docx', '.xlsx', '.xls', '.xlsm', '.pptx', '.pdf', '.doc']: matches = self.results.get(filepath, []) if not matches: self.content_text.insert(tk.END, "\n未找到匹配内容\n", "warning") return self.content_text.insert(tk.END, f"\n找到 {len(matches)} 个匹配项:\n\n", "header") # 显示每个匹配项 for i, (line_num, content) in enumerate(matches, 1): self.content_text.insert(tk.END, f"[匹配项 {i}] 位置: {line_num}\n") self.content_text.insert(tk.END, f"{content}\n\n") # 处理压缩文件 elif ext_lower in ['.zip', '.rar', '.7z', '.tar', '.gz']: matches = self.results.get(filepath, []) if not matches: self.content_text.insert(tk.END, "\n未找到匹配内容\n", "warning") return self.content_text.insert(tk.END, f"\n找到 {len(matches)} 个匹配项:\n\n", "header") for i, (file_in_zip, content) in enumerate(matches, 1): self.content_text.insert(tk.END, f"[匹配项 {i}] 文件: {file_in_zip}\n") self.content_text.insert(tk.END, f"{content}\n\n") # 处理文本文件 else: # 获取关键词高亮模式 keyword = self.keyword_entry.get().strip() flags = re.IGNORECASE if self.case_var.get() else 0 if self.regex_var.get(): try: pattern = re.compile(keyword, flags) except: pattern = None else: pattern = re.compile(re.escape(keyword), flags) # 显示文件内容并高亮匹配 self.content_text.insert(tk.END, "\n文件内容:\n\n", "header") # 限制预览内容大小(最多显示1000行) max_preview_lines = 1000 try: encoding = self.detect_encoding(filepath) with open(filepath, 'r', encoding=encoding, errors='replace') as f: line_count = 0 for line in f: line_count += 1 if line_count > max_preview_lines: self.content_text.insert(tk.END, f"\n... (文件过大,仅显示前{max_preview_lines}行)\n", "warning") break # 插入行号 self.content_text.insert(tk.END, f"{line_count:4d} | ", "linenum") # 插入行内容并高亮匹配 if pattern: start_idx = 0 for match in pattern.finditer(line): # 插入匹配前的文本 self.content_text.insert(tk.END, line[start_idx:match.start()]) # 插入高亮的匹配文本 self.content_text.insert(tk.END, match.group(), "match") start_idx = match.end() # 插入匹配后的文本 self.content_text.insert(tk.END, line[start_idx:]) else: self.content_text.insert(tk.END, line) except UnicodeDecodeError: self.content_text.insert(tk.END, "\n无法解码此文件内容(可能是二进制文件)\n", "warning") except Exception as e: self.content_text.insert(tk.END, f"\n读取文件时出错: {str(e)}\n", "warning") except Exception as e: self.content_text.insert(tk.END, f"\n加载文件内容出错: {str(e)}\n", "warning") def open_selected_file(self, event=None): """用系统默认程序打开选中的文件""" selection = self.file_tree.selection() if not selection: return selected_item = selection[0] filepath = self.file_tree.item(selected_item, 'values')[1] try: if sys.platform == "win32": os.startfile(filepath) elif sys.platform == "darwin": # macOS subprocess.run(["open", filepath]) else: # Linux subprocess.run(["xdg-open", filepath]) except Exception as e: messagebox.showerror("打开文件失败", f"无法打开文件: {str(e)}") def open_file_location(self): """在文件资源管理器中打开文件所在位置""" selection = self.file_tree.selection() if not selection: return selected_item = selection[0] filepath = self.file_tree.item(selected_item, 'values')[1] folder = os.path.dirname(filepath) try: if sys.platform == "win32": subprocess.run(["explorer", "/select,", filepath]) elif sys.platform == "darwin": # macOS subprocess.run(["open", "-R", filepath]) else: # Linux subprocess.run(["xdg-open", folder]) except Exception as e: messagebox.showerror("打开位置失败", f"无法打开位置: {str(e)}") def show_file_context_menu(self, event): """显示文件列表的右键菜单""" item = self.file_tree.identify_row(event.y) if item: self.file_tree.selection_set(item) self.file_menu.tk_popup(event.x_root, event.y_root) def copy_selected_text(self): """复制预览区域中选中的文本""" selected_text = self.content_text.get(tk.SEL_FIRST, tk.SEL_LAST) if selected_text: self.master.clipboard_clear() self.master.clipboard_append(selected_text) # 程序入口 if __name__ == "__main__": root = tk.Tk() app = FileSearchApp(root) # 添加图标(如果有) try: if sys.platform == "win32": root.iconbitmap("search_icon.ico") else: img = tk.PhotoImage(file="search_icon.png") root.iconphoto(True, img) except: pass root.mainloop() 这是我的代码,所有的功能都已经实现,但我觉得它太过于冗余,帮我优化一下,另外我觉得目前这个布局不够美观,也不够实用,帮我修改布局,另外,在搜索到关键字之后,我希望能够在文件预览那个窗口中,将搜索到的关键字进行高亮,或者可以选择高亮。在新增一些比较实用的功能
09-13
我想创建一个面板程序:执行以下步骤: 1、调用WinMerge生成HTML差异文件 2、将生成的HTML文件与目标Excel文件放在同一目录 3、将生成的HTML文件转换为excel文件,并且复制转换后的excel文件的A~F列数据,粘贴到目标Excel文件的“一覧”工作表第6行开始的A~F列 4、点击目标Excel文件“一覧”工作表上的“作成”按钮 5、等待处理完成 目前我的代码为:import os import subprocess import shutil import time import tkinter as tk from tkinter import filedialog, ttk, scrolledtext, messagebox, PhotoImage import pandas as pd import win32com.client as win32 from bs4 import BeautifulSoup import threading import tempfile import queue import traceback class DiffProcessorApp: def __init__(self, root): self.root = root root.title("高级文件夹比较工具") root.geometry("1000x700") root.configure(bg="#f5f5f5") # 创建现代风格主题 self.style = ttk.Style() self.style.theme_use('clam') # 自定义主题颜色 self.style.configure('TButton', font=('Segoe UI', 10, 'bold'), borderwidth=1, foreground="#333", background="#4CAF50", bordercolor="#388E3C", relief="flat", padding=8, anchor="center") self.style.map('TButton', background=[('active', '#388E3C'), ('disabled', '#BDBDBD')], foreground=[('disabled', '#9E9E9E')]) self.style.configure('TLabel', font=('Segoe UI', 9), background="#f5f5f5") self.style.configure('TLabelframe', font=('Segoe UI', 10, 'bold'), background="#f5f5f5", relief="flat", borderwidth=2) self.style.configure('TLabelframe.Label', font=('Segoe UI', 10, 'bold'), background="#f5f5f5", foreground="#2E7D32") self.style.configure('Treeview', font=('Segoe UI', 9), rowheight=25) self.style.configure('Treeview.Heading', font=('Segoe UI', 9, 'bold')) # 创建主框架 main_frame = ttk.Frame(root, padding="15") main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 标题区域 header_frame = ttk.Frame(main_frame) header_frame.pack(fill=tk.X, pady=(0, 15)) # 添加标题图标 try: icon = PhotoImage(file="folder_icon.png") self.icon_label = ttk.Label(header_frame, image=icon) self.icon_label.image = icon self.icon_label.pack(side=tk.LEFT, padx=(0, 10)) except: self.icon_label = ttk.Label(header_frame, text="📁", font=("Arial", 24)) self.icon_label.pack(side=tk.LEFT, padx=(0, 10)) title_label = ttk.Label(header_frame, text="高级文件夹比较工具", font=("Segoe UI", 18, "bold"), foreground="#2E7D32") title_label.pack(side=tk.LEFT) # 文件选择区域 file_frame = ttk.LabelFrame(main_frame, text="文件夹选择", padding="12") file_frame.pack(fill=tk.X, pady=5) # 文件夹选择 self.old_folder_entry, self.new_folder_entry = self.create_folder_selector(file_frame, "原始文件夹:") self.new_folder_entry = self.create_folder_selector(file_frame, "修改后文件夹:")[0] # 比较选项区域 options_frame = ttk.LabelFrame(main_frame, text="比较选项", padding="12") options_frame.pack(fill=tk.X, pady=5) # 递归比较选项 self.recursive_var = tk.BooleanVar(value=True) recursive_check = ttk.Checkbutton(options_frame, text="递归比较子文件夹", variable=self.recursive_var) recursive_check.grid(row=0, column=0, padx=10, pady=5, sticky=tk.W) # 文件过滤 filter_frame = ttk.Frame(options_frame) filter_frame.grid(row=0, column=1, padx=10, pady=5, sticky=tk.W) ttk.Label(filter_frame, text="文件过滤:").pack(side=tk.LEFT, padx=(0, 5)) self.filter_var = tk.StringVar(value="*.*") filter_entry = ttk.Entry(filter_frame, textvariable=self.filter_var, width=15) filter_entry.pack(side=tk.LEFT) # 目标Excel选择 excel_frame = ttk.LabelFrame(main_frame, text="输出设置", padding="12") excel_frame.pack(fill=tk.X, pady=5) ttk.Label(excel_frame, text="目标Excel文件:").grid(row=0, column=0, sticky=tk.W, padx=5, pady=5) self.excel_file_entry = ttk.Entry(excel_frame, width=60) self.excel_file_entry.grid(row=0, column=1, padx=5, pady=5) ttk.Button(excel_frame, text="浏览...", command=lambda: self.select_file(self.excel_file_entry, [("Excel文件", "*.xlsx *.xlsm")])).grid(row=0, column=2, padx=5, pady=5) # 执行按钮区域 button_frame = ttk.Frame(main_frame) button_frame.pack(fill=tk.X, pady=10) self.run_button = ttk.Button(button_frame, text="执行比较", command=self.start_processing, width=20, style='TButton') self.run_button.pack(side=tk.LEFT) # 停止按钮 self.stop_button = ttk.Button(button_frame, text="停止", command=self.stop_processing, width=10, state=tk.DISABLED) self.stop_button.pack(side=tk.LEFT, padx=10) # 进度条 self.progress = ttk.Progressbar(main_frame, orient=tk.HORIZONTAL, length=700, mode='determinate') self.progress.pack(fill=tk.X, pady=5) # 状态信息 status_frame = ttk.Frame(main_frame) status_frame.pack(fill=tk.X, pady=5) self.status_var = tk.StringVar(value="准备就绪") status_label = ttk.Label(status_frame, textvariable=self.status_var, font=("Segoe UI", 9), foreground="#2E7D32") status_label.pack(side=tk.LEFT) # 日志和预览区域 notebook = ttk.Notebook(main_frame) notebook.pack(fill=tk.BOTH, expand=True, pady=5) # 文件夹结构标签 tree_frame = ttk.Frame(notebook, padding="5") notebook.add(tree_frame, text="文件夹结构") # 创建树形视图 self.tree = ttk.Treeview(tree_frame, columns=("Status"), show="tree") self.tree.heading("#0", text="文件夹结构", anchor=tk.W) self.tree.heading("Status", text="状态", anchor=tk.W) self.tree.column("#0", width=400) self.tree.column("Status", width=100) vsb = ttk.Scrollbar(tree_frame, orient="vertical", command=self.tree.yview) hsb = ttk.Scrollbar(tree_frame, orient="horizontal", command=self.tree.xview) self.tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) self.tree.grid(row=0, column=0, sticky="nsew") vsb.grid(row=0, column=1, sticky="ns") hsb.grid(row=1, column=0, sticky="ew") # 日志标签 log_frame = ttk.Frame(notebook, padding="5") notebook.add(log_frame, text="执行日志") self.log_text = scrolledtext.ScrolledText(log_frame, height=10, wrap=tk.WORD, font=("Consolas", 9)) self.log_text.pack(fill=tk.BOTH, expand=True) self.log_text.config(state=tk.DISABLED) # 设置网格权重 tree_frame.grid_rowconfigure(0, weight=1) tree_frame.grid_columnconfigure(0, weight=1) # 线程控制 self.processing = False self.queue = queue.Queue() # 启动队列处理 self.root.after(100, self.process_queue) def create_folder_selector(self, parent, label_text): """创建文件夹选择器组件""" frame = ttk.Frame(parent) frame.pack(fill=tk.X, pady=5) ttk.Label(frame, text=label_text).grid(row=0, column=0, sticky=tk.W, padx=5, pady=5) entry = ttk.Entry(frame, width=70) entry.grid(row=0, column=1, padx=5, pady=5) button = ttk.Button(frame, text="浏览文件...", command=lambda: self.select_folder(entry)) button.grid(row=0, column=2, padx=5, pady=5) return entry, button def select_folder(self, entry): """选择文件夹""" foldername = filedialog.askdirectory() if foldername: entry.delete(0, tk.END) entry.insert(0, foldername) # 自动填充文件夹结构 self.populate_folder_tree(foldername) def select_file(self, entry, filetypes=None): """选择文件""" if filetypes is None: filetypes = [("所有文件", "*.*")] filename = filedialog.askopenfilename(filetypes=filetypes) if filename: entry.delete(0, tk.END) entry.insert(0, filename) def populate_folder_tree(self, path): """填充文件夹结构树""" self.tree.delete(*self.tree.get_children()) if not os.path.isdir(path): return # 添加根节点 root_node = self.tree.insert("", "end", text=os.path.basename(path), values=("文件夹",), open=True) self.add_tree_nodes(root_node, path) def add_tree_nodes(self, parent, path): """递归添加树节点""" try: for item in os.listdir(path): item_path = os.path.join(path, item) if os.path.isdir(item_path): node = self.tree.insert(parent, "end", text=item, values=("文件夹",)) self.add_tree_nodes(node, item_path) else: self.tree.insert(parent, "end", text=item, values=("文件",)) except PermissionError: self.log_message(f"权限错误: 无法访问 {path}") def log_message(self, message): """记录日志消息""" self.queue.put(("log", message)) def update_progress(self, value): """更新进度条""" self.queue.put(("progress", value)) def update_status(self, message): """更新状态信息""" self.queue.put(("status", message)) def process_queue(self): """处理线程队列中的消息""" try: while not self.queue.empty(): msg_type, data = self.queue.get_nowait() if msg_type == "log": self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, data + "\n") self.log_text.see(tk.END) self.log_text.config(state=tk.DISABLED) elif msg_type == "progress": self.progress['value'] = data elif msg_type == "status": self.status_var.set(data) except queue.Empty: pass self.root.after(100, self.process_queue) def write_to_excel(self, excel_path, diff_data): """将差异数据写入Excel""" self.log_message("正在写入Excel文件...") try: # 使用win32com打开Excel excel = win32.gencache.EnsureDispatch('Excel.Application') excel.Visible = True workbook = excel.Workbooks.Open(os.path.abspath(excel_path)) sheet = workbook.Sheets("一覧") # 从第6行开始写入数据 start_row = 6 for i, row_data in enumerate(diff_data): for j, value in enumerate(row_data[:6]): # 确保值是字符串类型 sheet.Cells(start_row + i, j + 1).Value = str(value) # 保存Excel workbook.Save() self.log_message(f"数据已写入Excel第{start_row}行开始") # 触发"作成"按钮 self.log_message("正在触发'作成'按钮...") try: # 查找按钮并点击 button = sheet.Buttons("作成") button.OnAction = "作成按钮的处理" button.Click() self.log_message("已触发'作成'按钮") # 等待处理完成 self.update_status("处理中...请等待") # 简单等待机制 for _ in range(30): # 最多等待30秒 if not self.processing: break if excel.CalculationState == 0: # 0 = xlDone break time.sleep(1) self.log_message("处理中...") self.log_message("处理完成") self.update_status("处理完成") except Exception as e: # 修复TypeError: 使用f-string记录异常 self.log_message(f"按钮操作失败: {str(e)}. 请手动点击'作成'按钮") # 关闭Excel workbook.Close() excel.Quit() return True except Exception as e: # 修复TypeError: 使用f-string记录异常 self.log_message(f"Excel操作失败: {str(e)}\n{traceback.format_exc()}") return False def start_processing(self): """启动处理线程 - 修复无响应问题""" if self.processing: self.log_message("警告: 处理正在进行中") return # 获取路径 old_path = self.old_folder_entry.get() new_path = self.new_folder_entry.get() excel_file = self.excel_file_entry.get() # 详细路径验证 validation_errors = [] if not old_path: validation_errors.append("原始文件夹路径为空") elif not os.path.isdir(old_path): validation_errors.append(f"原始文件夹路径无效: {old_path}") if not new_path: validation_errors.append("新文件夹路径为空") elif not os.path.isdir(new_path): validation_errors.append(f"新文件夹路径无效: {new_path}") if not excel_file: validation_errors.append("Excel文件路径为空") elif not excel_file.lower().endswith(('.xlsx', '.xlsm')): validation_errors.append("Excel文件必须是.xlsx或.xlsm格式") if validation_errors: self.log_message("错误: " + "; ".join(validation_errors)) messagebox.showerror("输入错误", "\n".join(validation_errors)) return # 检查WinMerge安装 winmerge_path = r"E:\App\WinMerge\WinMerge2.16.12.0\WinMergeU.exe" if not os.path.exists(winmerge_path): self.log_message(f"错误: WinMerge未安装在默认位置 {winmerge_path}") messagebox.showwarning("WinMerge未安装", "请确保WinMerge已安装或更新路径配置") return # 禁用执行按钮,启用停止按钮 self.run_button.config(state=tk.DISABLED) self.stop_button.config(state=tk.NORMAL) self.processing = True # 启动处理线程 thread = threading.Thread(target=self.process_folders, args=(old_path, new_path, excel_file)) thread.daemon = True thread.start() self.log_message("处理线程已启动") def process_folders(self, old_path, new_path, excel_file): """处理文件夹比较的线程函数 - 增强异常处理""" output_html = None try: # 步骤1: 生成HTML差异文件 self.update_status("生成HTML差异文件...") self.update_progress(20) # 使用临时文件存储HTML报告 with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as temp_file: output_html = temp_file.name if not self.run_winmerge(old_path, new_path, output_html): self.update_status("WinMerge执行失败") return # 步骤2: 将HTML文件与Excel放在同一目录 self.update_status("准备文件...") self.update_progress(40) excel_dir = os.path.dirname(excel_file) if excel_dir: target_html = os.path.join(excel_dir, "diff_report.html") try: shutil.copy(output_html, target_html) self.log_message(f"已将HTML文件复制到: {target_html}") except Exception as e: self.log_message(f"文件复制失败: {str(e)}") return # 步骤3: 解析HTML差异文件 self.update_status("解析差异数据...") self.update_progress(60) diff_data = self.parse_html_diff(output_html) if not diff_data: self.update_status("HTML解析失败") return # 步骤4: 写入Excel并触发按钮 self.update_status("写入Excel并触发处理...") self.update_progress(80) if not self.write_to_excel(excel_file, diff_data): self.update_status("Excel操作失败") return # 完成 self.update_progress(100) self.update_status("处理完成!") self.log_message("文件夹比较流程执行完毕") messagebox.showinfo("完成", "文件夹比较处理成功完成") except Exception as e: error_msg = f"执行过程中发生错误: {str(e)}\n{traceback.format_exc()}" self.log_message(error_msg) self.update_status("执行失败") messagebox.showerror("错误", f"处理失败: {str(e)}") finally: # 重新启用执行按钮 if self.processing: self.stop_processing() # 清理临时文件 if output_html and os.path.exists(output_html): try: os.remove(output_html) except: pass def run_winmerge(self, path1, path2, output_html): """增强的WinMerge调用方法 - 解决弹窗阻塞问题""" winmerge_path = r"E:\App\WinMerge\WinMerge2.16.12.0\WinMergeU.exe" # 验证WinMerge可执行文件 if not os.path.exists(winmerge_path): self.log_message(f"错误: WinMerge路径不存在 {winmerge_path}") return False # 构建抑制弹窗的命令参数 winmerge_cmd = [ winmerge_path, '/u', # 不显示GUI界面 '/minimize', # 最小化窗口 '/noprefs', # 不使用保存的选项 '/exit', # 完成后自动退出 - 关键参数[^1] '/dl', 'Base', '/dr', 'Modified', '/or', output_html, path1, path2 ] # 添加递归选项 if self.recursive_var.get(): winmerge_cmd.insert(1, '/r') self.log_message(f"执行命令: {' '.join(winmerge_cmd)}") try: # 使用Popen启动进程(非阻塞) proc = subprocess.Popen( winmerge_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, creationflags=subprocess.CREATE_NO_WINDOW ) # 设置超时监控 timeout = 120 # 秒 start_time = time.time() while proc.poll() is None: # 进程仍在运行 # 更新状态 elapsed = int(time.time() - start_time) self.update_status(f"生成报告中...({elapsed}秒)") # 超时处理 if elapsed > timeout: self.log_message("WinMerge执行超时,强制终止进程") proc.terminate() try: proc.wait(timeout=5) except subprocess.TimeoutExpired: proc.kill() return False # 定期检查进程状态 time.sleep(1) # 检查退出码 if proc.returncode == 0: # 验证报告文件是否生成 if not os.path.exists(output_html): self.log_message(f"错误: 报告文件未生成 {output_html}") return False # 验证报告内容有效性 with open(output_html, 'r', encoding='utf-8') as f: content = f.read(1024) # 只读取前1KB检查 if '<table' not in content: self.log_message("警告: 报告文件不包含表格数据") self.log_message(f"HTML差异报告生成成功: {output_html}") return True else: # 获取错误输出 stderr_output = proc.stderr.read().decode('utf-8', errors='ignore') error_msg = f"WinMerge异常退出(代码{proc.returncode}): {stderr_output}" self.log_message(error_msg) return False except Exception as e: self.log_message(f"WinMerge执行错误: {str(e)}") return False def parse_html_diff(self, html_file): """增强的HTML报告解析方法 - 解决表格查找失败问题""" try: # 验证文件是否存在 if not os.path.exists(html_file): self.log_message(f"错误: HTML文件不存在 {html_file}") return [] # 读取文件内容(处理可能的编码问题) with open(html_file, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() # 检查是否有有效内容 if not content.strip(): self.log_message("警告: HTML文件为空") return [] soup = BeautifulSoup(content, 'html.parser') # 增强表格查找方法 - 尝试多种定位方式[^2] table = None # 方式1: 通过特定ID查找 table = soup.find('table', id='filediff') # 方式2: 通过特定class查找 if not table: table = soup.find('table', class_='filediff') # 方式3: 查找包含特定标题的表格 if not table: for t in soup.find_all('table'): if t.find('th', text='Filename'): table = t break if not table: self.log_message("未找到差异表格,可能是无差异或格式变化") return [] # 提取差异文件列表 diff_files = [] for row in table.find_all('tr')[1:]: # 跳过表头 cols = row.find_all('td') if len(cols) >= 3: # 获取文件名(第二列或第三列) filename = cols[1].get_text(strip=True) or cols[2].get_text(strip=True) if filename: diff_files.append(filename) self.log_message(f"解析到 {len(diff_files)} 个差异文件") return diff_files except Exception as e: self.log_message(f"解析HTML报告错误: {str(e)}") return [] def write_to_excel(self, excel_path, diff_data): """将差异数据写入Excel - 增强健壮性""" self.log_message("正在写入Excel文件...") excel = None workbook = None try: # 验证Excel文件存在 if not os.path.exists(excel_path): self.log_message(f"错误: Excel文件不存在 {excel_path}") return False # 使用win32com打开Excel excel = win32.gencache.EnsureDispatch('Excel.Application') excel.Visible = True excel.DisplayAlerts = False # 禁用警告提示 # 尝试打开工作簿 try: workbook = excel.Workbooks.Open(os.path.abspath(excel_path)) except Exception as e: self.log_message(f"打开Excel文件失败: {str(e)}") return False # 检查工作表是否存在 sheet_names = [sheet.Name for sheet in workbook.Sheets] if "一覧" not in sheet_names: self.log_message("错误: Excel文件中缺少'一覧'工作表") return False sheet = workbook.Sheets("一覧") # 从第6行开始写入数据 start_row = 6 for i, row_data in enumerate(diff_data): for j, value in enumerate(row_data[:6]): # 确保值是字符串类型 sheet.Cells(start_row + i, j + 1).Value = str(value) # 保存Excel workbook.Save() self.log_message(f"数据已写入Excel第{start_row}行开始") # 触发"作成"按钮 self.log_message("正在触发'作成'按钮...") try: # 查找按钮并点击 button = sheet.Buttons("作成") button.OnAction = "作成按钮的处理" button.Click() self.log_message("已触发'作成'按钮") # 等待处理完成 self.update_status("处理中...请等待") wait_time = 0 max_wait = 60 # 最大等待60秒 while self.processing and wait_time < max_wait: if excel.CalculationState == 0: # 0 = xlDone break time.sleep(1) wait_time += 1 self.log_message(f"处理中...({wait_time}秒)") if wait_time >= max_wait: self.log_message("警告: 处理超时") else: self.log_message("处理完成") return True except Exception as e: self.log_message(f"按钮操作失败: {str(e)}. 请手动点击'作成'按钮") return False except Exception as e: self.log_message(f"Excel操作失败: {str(e)}\n{traceback.format_exc()}") return False finally: # 确保正确关闭Excel try: if workbook: workbook.Close(SaveChanges=False) if excel: excel.Quit() except Exception as e: self.log_message(f"关闭Excel时出错: {str(e)}") def stop_processing(self): """停止处理""" self.processing = False self.stop_button.config(state=tk.DISABLED) self.run_button.config(state=tk.NORMAL) self.update_status("操作已停止") def process_folders(self, old_path, new_path, excel_file): """处理文件夹比较的线程函数""" try: # 步骤1: 生成HTML差异文件 self.update_status("生成HTML差异文件...") self.update_progress(20) # 使用临时文件存储HTML报告 with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as temp_file: output_html = temp_file.name if not self.run_winmerge(old_path, new_path, output_html): return # 步骤2: 将HTML文件与Excel放在同一目录 self.update_status("准备文件...") self.update_progress(40) excel_dir = os.path.dirname(excel_file) if excel_dir: target_html = os.path.join(excel_dir, "diff_report.html") shutil.copy(output_html, target_html) self.log_message(f"已将HTML文件复制到: {target_html}") # 步骤3: 解析HTML差异文件 self.update_status("解析差异数据...") self.update_progress(60) diff_data = self.parse_html_diff(output_html) if not diff_data: return # 步骤4: 写入Excel并触发按钮 self.update_status("写入Excel并触发处理...") self.update_progress(80) self.write_to_excel(excel_file, diff_data) # 完成 self.update_progress(100) self.update_status("处理完成!") self.log_message("文件夹比较流程执行完毕") except Exception as e: # 修复TypeError: 使用f-string记录异常 error_msg = f"执行过程中发生错误: {str(e)}\n{traceback.format_exc()}" self.log_message(error_msg) self.update_status("执行失败") finally: # 重新启用执行按钮 if self.processing: self.stop_processing() # 清理临时文件 if os.path.exists(output_html): try: os.remove(output_html) except: pass if __name__ == "__main__": root = tk.Tk() app = DiffProcessorApp(root) root.mainloop() 检查代码的问题,并提供完整代码实现我的要求
07-11
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值