import os
import re
import sys
import threading
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, scrolledtext
import fnmatch
import chardet
import docx
from openpyxl import load_workbook
import PyPDF2
import xlrd
class FileSearchApp:
def __init__(self, master):
self.master = master
master.title("高级文件搜索工具")
master.geometry("1200x800")
master.minsize(900, 650)
# 设置现代主题
self.style = ttk.Style()
self.style.theme_use("vista" if sys.platform == "win32" else "aqua")
# 创建主框架
main_frame = ttk.Frame(master, padding=10)
main_frame.pack(fill=tk.BOTH, expand=True)
# 创建搜索面板
search_frame = ttk.LabelFrame(main_frame, text="搜索选项", padding=10)
search_frame.pack(fill=tk.X, padx=5, pady=5)
# 搜索目录
dir_frame = ttk.Frame(search_frame)
dir_frame.pack(fill=tk.X, pady=5)
ttk.Label(dir_frame, text="搜索目录:").pack(side=tk.LEFT, padx=(0, 5))
self.dir_entry = ttk.Entry(dir_frame, width=50)
self.dir_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
self.dir_entry.insert(0, os.getcwd())
ttk.Button(dir_frame, text="浏览...", command=self.browse_directory).pack(side=tk.RIGHT)
# 关键词和文件过滤
filter_frame = ttk.Frame(search_frame)
filter_frame.pack(fill=tk.X, pady=5)
ttk.Label(filter_frame, text="关键词:").pack(side=tk.LEFT, padx=(0, 5))
self.keyword_entry = ttk.Entry(filter_frame)
self.keyword_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=5)
ttk.Label(filter_frame, text="文件过滤:").pack(side=tk.LEFT, padx=(10, 5))
self.filter_entry = ttk.Entry(filter_frame, width=30)
self.filter_entry.pack(side=tk.LEFT, padx=5)
self.filter_entry.insert(0, "*.c;*.h;*.prm;*.xlsx;*.xls;*.doc;*.docx;*.pdf")
# 搜索选项
options_frame = ttk.Frame(search_frame)
options_frame.pack(fill=tk.X, pady=10)
self.case_var = tk.BooleanVar(value=False)
ttk.Checkbutton(options_frame, text="忽略大小写", variable=self.case_var).pack(side=tk.LEFT, padx=10)
self.regex_var = tk.BooleanVar(value=False)
ttk.Checkbutton(options_frame, text="正则表达式", variable=self.regex_var).pack(side=tk.LEFT, padx=10)
self.binary_var = tk.BooleanVar(value=False)
ttk.Checkbutton(options_frame, text="包含二进制", variable=self.binary_var).pack(side=tk.LEFT, padx=10)
self.limit_var = tk.BooleanVar(value=True)
ttk.Checkbutton(options_frame, text="限制大小(100MB)", variable=self.limit_var).pack(side=tk.LEFT, padx=10)
self.highlight_var = tk.BooleanVar(value=True)
ttk.Checkbutton(options_frame, text="关键字高亮", variable=self.highlight_var).pack(side=tk.LEFT, padx=10)
# 按钮面板
button_frame = ttk.Frame(search_frame)
button_frame.pack(fill=tk.X, pady=10)
self.search_button = ttk.Button(button_frame, text="开始搜索", command=self.start_search)
self.search_button.pack(side=tk.LEFT, padx=5)
self.stop_button = ttk.Button(button_frame, text="停止搜索", command=self.stop_search, state=tk.DISABLED)
self.stop_button.pack(side=tk.LEFT, padx=5)
self.export_button = ttk.Button(button_frame, text="导出结果", command=self.export_results)
self.export_button.pack(side=tk.LEFT, padx=5)
# 状态栏 - 修复布局问题
status_frame = ttk.Frame(main_frame)
status_frame.pack(fill=tk.X, padx=5, pady=(0, 5))
self.status_label = ttk.Label(status_frame, text="就绪", font=("Arial", 9))
self.status_label.pack(side=tk.LEFT, anchor='w')
self.progress_var = tk.DoubleVar()
self.progress_bar = ttk.Progressbar(
status_frame,
variable=self.progress_var,
length=200,
mode='determinate'
)
self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=10)
self.stats_label = ttk.Label(status_frame, text="", font=("Arial", 9))
self.stats_label.pack(side=tk.RIGHT, padx=(0, 10))
# 结果面板
results_frame = ttk.LabelFrame(main_frame, text="搜索结果", padding=10)
results_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# 分割窗格
paned_window = ttk.PanedWindow(results_frame, orient=tk.HORIZONTAL)
paned_window.pack(fill=tk.BOTH, expand=True)
# 文件列表
file_list_frame = ttk.Frame(paned_window)
paned_window.add(file_list_frame, weight=1)
self.file_tree = ttk.Treeview(
file_list_frame,
columns=("filename", "path"),
show="headings",
selectmode="browse"
)
self.file_tree.heading("filename", text="文件名")
self.file_tree.heading("path", text="路径")
self.file_tree.column("filename", width=200, anchor="w")
self.file_tree.column("path", width=400, anchor="w")
file_scroll = ttk.Scrollbar(
file_list_frame,
orient=tk.VERTICAL,
command=self.file_tree.yview
)
self.file_tree.configure(yscrollcommand=file_scroll.set)
self.file_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
file_scroll.pack(side=tk.RIGHT, fill=tk.Y)
self.file_tree.bind('<<TreeviewSelect>>', self.show_file_content)
self.file_tree.bind('<Double-1>', self.open_selected_file)
# 文件右键菜单
self.file_menu = tk.Menu(self.master, tearoff=0)
self.file_menu.add_command(label="打开文件", command=self.open_selected_file)
self.file_menu.add_command(label="打开文件位置", command=self.open_file_location)
self.file_tree.bind("<Button-3>", self.show_file_context_menu)
# 文件内容预览
content_frame = ttk.Frame(paned_window)
paned_window.add(content_frame, weight=2)
self.content_text = scrolledtext.ScrolledText(
content_frame,
wrap=tk.WORD,
font=("Consolas", 10),
padx=5,
pady=5
)
self.content_text.pack(fill=tk.BOTH, expand=True)
# 文本标签配置
self.content_text.tag_configure("match", background="yellow")
self.content_text.tag_configure("linenum", foreground="blue", font=("Consolas", 9))
self.content_text.tag_configure("header", foreground="darkgreen", font=("Arial", 10, "bold"))
self.content_text.tag_configure("warning", foreground="red", font=("Arial", 10, "italic"))
# 文本右键菜单
text_menu = tk.Menu(self.master, tearoff=0)
text_menu.add_command(label="复制", command=self.copy_selected_text)
self.content_text.bind("<Button-3>", lambda e: text_menu.tk_popup(e.x_root, e.y_root))
# 初始化变量
self.results = {}
self.all_files = []
self.stop_requested = False
self.search_thread = None
def browse_directory(self):
directory = filedialog.askdirectory(title="选择搜索目录")
if directory:
self.dir_entry.delete(0, tk.END)
self.dir_entry.insert(0, directory)
def update_status(self, text):
"""更新状态标签"""
self.status_label.config(text=text)
def update_stats(self, text):
"""更新统计标签"""
self.stats_label.config(text=text)
def update_progress(self, value, total):
"""更新进度条和统计信息"""
self.progress_var.set(value)
percentage = round((value / total) * 100, 1) if total > 0 else 0
self.stats_label.config(
text=f"处理中: {value}/{total} 文件 ({percentage}%)"
)
def reset_search_state(self):
"""重置搜索按钮状态"""
self.search_button.config(state=tk.NORMAL)
self.stop_button.config(state=tk.DISABLED)
def start_search(self):
# 重置状态
self.progress_var.set(0)
self.stop_requested = False
self.results.clear()
self.all_files.clear()
self.file_tree.delete(*self.file_tree.get_children())
self.content_text.delete(1.0, tk.END)
self.update_status("正在搜索...")
self.search_button.config(state=tk.DISABLED)
self.stop_button.config(state=tk.NORMAL)
self.update_stats("扫描文件中...")
# 获取搜索参数
directory = self.dir_entry.get().strip()
keyword = self.keyword_entry.get().strip()
file_filter = self.filter_entry.get().strip()
# 验证输入
if not directory or not os.path.isdir(directory):
messagebox.showerror("错误", "请选择有效的搜索目录")
self.reset_search_state()
return
if not keyword:
messagebox.showerror("错误", "请输入搜索关键词")
self.reset_search_state()
return
# 编译搜索模式
flags = re.IGNORECASE if self.case_var.get() else 0
try:
if self.regex_var.get():
pattern = re.compile(keyword, flags)
else:
pattern = re.compile(re.escape(keyword), flags)
except re.error as e:
messagebox.showerror("正则表达式错误", f"无效的正则表达式: {str(e)}")
self.reset_search_state()
return
# 处理文件过滤器 - 修复过滤逻辑
if not file_filter:
filter_patterns = ["*"]
else:
# 修复:处理带分号的过滤模式
filter_patterns = [pat.strip() for pat in file_filter.split(";") if pat.strip()]
# 在后台线程中执行搜索
self.search_thread = threading.Thread(
target=self.perform_search,
args=(directory, filter_patterns, pattern),
daemon=True
)
self.search_thread.start()
def perform_search(self, directory, filter_patterns, pattern):
"""在后台线程中执行文件搜索"""
try:
# 收集所有匹配的文件 - 修复文件过滤问题
self.all_files = []
for root, _, files in os.walk(directory):
if self.stop_requested:
self.update_status("搜索已取消")
return
for file in files:
if self.stop_requested:
break
file_path = os.path.join(root, file)
# 检查文件大小限制
if self.limit_var.get():
try:
if os.path.getsize(file_path) > 100 * 1024 * 1024: # 100MB
continue
except:
continue
# 修复:正确匹配文件扩展名
if not any(fnmatch.fnmatch(file, pat) for pat in filter_patterns):
continue
self.all_files.append(file_path)
total_files = len(self.all_files)
self.update_progress(0, total_files)
self.update_stats(f"扫描到 {total_files} 个文件")
# 搜索每个文件
matches_found = 0
for i, file_path in enumerate(self.all_files):
if self.stop_requested:
break
# 更新进度
self.update_progress(i + 1, total_files)
# 检查二进制文件
if not self.binary_var.get() and self.is_binary(file_path):
continue
# 搜索文件内容 - 修复内容搜索问题
matches = self.search_file_content(file_path, pattern)
if matches:
self.results[file_path] = matches
matches_found += len(matches)
self.add_file_to_list(file_path)
# 更新完成状态
status = f"搜索完成 - 找到 {len(self.results)} 个文件, {matches_found} 个匹配项"
if self.stop_requested:
status = f"搜索已取消 - 找到 {len(self.results)} 个文件, {matches_found} 个匹配项"
self.update_status(status)
self.update_progress(total_files, total_files)
except Exception as e:
self.update_status(f"搜索错误: {str(e)}")
finally:
self.reset_search_state()
self.search_thread = None
def search_file_content(self, file_path, pattern):
"""根据文件类型搜索内容"""
_, ext = os.path.splitext(file_path)
ext_lower = ext.lower()
# 修复:确保所有支持的文件类型都能被搜索
if ext_lower in ['.c', '.h', '.prm', '.txt', '.py', '.java', '.cpp', '.hpp', '.log']:
return self.search_in_text_file(file_path, pattern)
elif ext_lower in ['.docx', '.doc', '.xlsx', '.xls', '.pdf']:
return self.search_in_office_file(file_path, pattern)
else:
return [] # 其他文件类型不搜索
def search_in_text_file(self, file_path, pattern):
"""在文本文件中搜索匹配项"""
try:
encoding = self.detect_encoding(file_path)
matches = []
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
for line_num, line in enumerate(f, 1):
if pattern.search(line):
# 截断过长的行
cleaned_line = line.strip()
if len(cleaned_line) > 150:
cleaned_line = cleaned_line[:150] + "..."
matches.append((line_num, cleaned_line))
return matches
except Exception as e:
print(f"搜索文本文件出错: {file_path} | {str(e)}")
return []
def search_in_office_file(self, file_path, pattern):
"""修复Office文件搜索逻辑"""
_, ext = os.path.splitext(file_path)
ext_lower = ext.lower()
matches = []
try:
# DOCX文件处理
if ext_lower == '.docx':
doc = docx.Document(file_path)
for i, para in enumerate(doc.paragraphs, 1):
if para.text and pattern.search(para.text):
content = para.text.strip()
if len(content) > 100:
content = content[:100] + "..."
matches.append((i, f"段落 {i}: {content}"))
for table in doc.tables:
for row_idx, row in enumerate(table.rows, 1):
for cell_idx, cell in enumerate(row.cells, 1):
if cell.text and pattern.search(cell.text):
content = cell.text.strip()
if len(content) > 100:
content = content[:100] + "..."
matches.append((row_idx, f"表格 行{row_idx}列{cell_idx}: {content}"))
# XLSX文件处理
elif ext_lower == '.xlsx':
wb = load_workbook(file_path, read_only=True, data_only=True)
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
for row_idx, row in enumerate(sheet.iter_rows(values_only=True), 1):
for col_idx, cell in enumerate(row, 1):
if cell is not None and pattern.search(str(cell)):
cell_ref = f"{chr(64+col_idx)}{row_idx}"
content = str(cell).strip()
if len(content) > 100:
content = content[:100] + "..."
matches.append((row_idx, f"工作表 '{sheet_name}' 单元格 {cell_ref}: {content}"))
# XLS文件处理
elif ext_lower == '.xls':
wb = xlrd.open_workbook(file_path)
for sheet_idx in range(wb.nsheets):
sheet = wb.sheet_by_index(sheet_idx)
for row_idx in range(sheet.nrows):
for col_idx in range(sheet.ncols):
cell = sheet.cell_value(row_idx, col_idx)
if cell and pattern.search(str(cell)):
cell_ref = f"{chr(65+col_idx)}{row_idx+1}"
content = str(cell).strip()
if len(content) > 100:
content = content[:100] + "..."
matches.append((row_idx+1, f"工作表 '{sheet.name}' 单元格 {cell_ref}: {content}"))
# PDF文件处理 - 修复内容提取
elif ext_lower == '.pdf':
with open(file_path, 'rb') as f:
pdf = PyPDF2.PdfReader(f)
for page_num in range(len(pdf.pages)):
page_text = pdf.pages[page_num].extract_text()
if page_text:
# 修复:查找所有匹配项
for match in pattern.finditer(page_text):
# 提取匹配上下文
start = max(0, match.start() - 30)
end = min(len(page_text), match.end() + 70)
context = page_text[start:end].replace('\n', ' ').strip()
matches.append((page_num+1, f"页面 {page_num+1}: {context}"))
except Exception as e:
print(f"搜索Office文件出错: {file_path} | {str(e)}")
return []
return matches
def is_binary(self, file_path):
"""检测文件是否为二进制"""
try:
with open(file_path, 'rb') as f:
chunk = f.read(1024)
if b'\x00' in chunk: # 二进制文件通常包含空字节
return True
# 检测字符编码
result = chardet.detect(chunk)
return result['encoding'] is None or 'ascii' not in result['encoding'].lower()
except:
return True
def detect_encoding(self, file_path):
"""检测文件编码"""
with open(file_path, 'rb') as f:
raw_data = f.read(4096)
result = chardet.detect(raw_data)
return result['encoding'] or 'utf-8'
def stop_search(self):
"""停止搜索"""
self.stop_requested = True
self.update_status("正在停止搜索...")
def show_file_content(self, event=None):
"""在预览区域显示文件内容"""
selected = self.file_tree.selection()
if not selected:
return
file_path = self.file_tree.item(selected[0])['values'][1]
matches = self.results.get(file_path, [])
self.content_text.config(state=tk.NORMAL)
self.content_text.delete(1.0, tk.END)
# 显示文件头信息
self.content_text.insert(tk.END, f"文件: {file_path}\n", "header")
self.content_text.insert(tk.END, f"共找到 {len(matches)} 个匹配项\n\n", "header")
# 显示匹配内容
for line_num, content in matches:
self.content_text.insert(tk.END, f"行 {line_num}: ", "linenum")
# 高亮显示匹配的关键字
if self.highlight_var.get():
start_idx = "1.0"
line_tag = f"line_{line_num}"
self.content_text.insert(tk.END, content + "\n\n", line_tag)
# 搜索并高亮匹配文本
pattern = re.escape(self.keyword_entry.get().strip())
if self.case_var.get():
pattern = f"(?i){pattern}"
idx = "1.0"
while True:
idx = self.content_text.search(
pattern,
idx,
stopindex=tk.END,
regexp=True,
nocase=self.case_var.get()
)
if not idx:
break
end_idx = f"{idx}+{len(self.keyword_entry.get().strip())}c"
self.content_text.tag_add("match", idx, end_idx)
idx = end_idx
else:
self.content_text.insert(tk.END, content + "\n\n")
self.content_text.config(state=tk.DISABLED)
def open_selected_file(self, event=None):
"""打开选中的文件"""
selected = self.file_tree.selection()
if selected:
file_path = self.file_tree.item(selected[0])['values'][1]
if os.path.exists(file_path):
os.startfile(file_path) if sys.platform == "win32" else os.system(f'open "{file_path}"')
def open_file_location(self):
"""打开文件所在位置"""
selected = self.file_tree.selection()
if selected:
file_path = self.file_tree.item(selected[0])['values'][1]
if os.path.exists(file_path):
if sys.platform == "win32":
os.startfile(os.path.dirname(file_path))
elif sys.platform == "darwin":
os.system(f'open "{os.path.dirname(file_path)}"')
else:
os.system(f'xdg-open "{os.path.dirname(file_path)}"')
def copy_selected_text(self):
"""复制选中的文本"""
if self.content_text.tag_ranges(tk.SEL):
selected = self.content_text.get(tk.SEL_FIRST, tk.SEL_LAST)
self.master.clipboard_clear()
self.master.clipboard_append(selected)
def show_file_context_menu(self, event):
"""显示文件右键菜单"""
item = self.file_tree.identify_row(event.y)
if item:
self.file_tree.selection_set(item)
self.file_menu.tk_popup(event.x_root, event.y_root)
def export_results(self):
"""导出搜索结果到文本文件"""
if not self.results:
messagebox.showinfo("导出", "没有搜索结果可导出")
return
file_path = filedialog.asksaveasfilename(
title="保存搜索结果",
filetypes=[("文本文件", "*.txt"), ("所有文件", "*.*")],
defaultextension=".txt"
)
if not file_path:
return
try:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(f"搜索目录: {self.dir_entry.get()}\n")
f.write(f"关键词: {self.keyword_entry.get()}\n")
f.write(f"文件过滤: {self.filter_entry.get()}\n")
f.write(f"匹配选项: {'忽略大小写' if self.case_var.get() else '区分大小写'}, "
f"{'正则表达式' if self.regex_var.get() else '普通文本'}\n\n")
for file_path, matches in self.results.items():
f.write(f"文件: {file_path}\n")
f.write(f"匹配数: {len(matches)}\n")
for line_num, content in matches:
f.write(f"行 {line_num}: {content}\n")
f.write("\n")
messagebox.showinfo("导出成功", f"搜索结果已保存到:\n{file_path}")
except Exception as e:
messagebox.showerror("导出错误", f"保存文件失败: {str(e)}")
def main():
root = tk.Tk()
app = FileSearchApp(root)
root.mainloop()
if __name__ == "__main__":
main()
1、由于布局变化,进度条的显示出现问题。2、文件过滤使用时,检索不到文件,文件夹里确保有文件的前提下。3、将文件过滤里面的类型去除掉后,能够搜索到文件,但是搜索不到文件里面的内容,也就是找不到匹配项这几个问题仍然存在。我附上代码你排查问题
最新发布