第七天 如何File操作过滤os下的.exe后缀的的运行程序?

本文提供了一个Java程序示例,展示了如何使用File类和匿名内部类实现特定文件类型的过滤。通过遍历指定目录下的所有.exe文件,并打印出它们的绝对路径。
public class Exercise {
	public static void main(String[] args) {
		//windows平台的路径都是\形式,java的是/形式
		File file=new File("c:/windows");
		
		FileFilter f1=new FileFilter(){

		@Override
		public boolean accept(File pathname) {
			// TODO Auto-generated method stub
			return pathname.getName().endsWith(".exe")?true:false;
		}

	};
	File[] files=file.listFiles(f1);
	
	for(File f2 : files){
		System.out.println(f2.getAbsolutePath());
	}
	
	}
}




                
import os import tempfile import pythoncom import win32com.client import threading import shutil import tkinter as tk from tkinter import filedialog, ttk, messagebox, scrolledtext from docx import Document from PyPDF2 import PdfMerger, PdfReader, PdfWriter from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.colors import red, black, white from reportlab.platypus import Table, TableStyle from io import BytesIO from datetime import datetime import wx from collections import defaultdict class PDFConverterApp: def __init__(self, root): self.root = root self.root.title("audio_data") self.root.geometry("800x650") self.folders = [] self.log_messages = [] self.output_path = "" self.backup_mode = tk.BooleanVar(value=True) self.point_22_mode = tk.BooleanVar(value=False) self.rename_mode = tk.BooleanVar(value=True) self.output_filename = tk.StringVar(value="听筒磁干扰_Simulation_Result") self.create_widgets() def create_widgets(self): top_frame = ttk.Frame(self.root, padding=10) # 修正:使用 Frame top_frame.pack(fill=tk.X) output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10) # 修正:使用 LabelFrame output_frame.pack(fill=tk.X, padx=10, pady=(0, 5)) ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) filename_entry = ttk.Entry(output_frame, textvariable=self.output_filename, width=30) filename_entry.grid(row=0, column=1, sticky=tk.W, padx=5) ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5)) self.path_entry = ttk.Entry(output_frame, width=40, state='readonly') self.path_entry.grid(row=0, column=3, sticky=tk.EW, padx=5) browse_btn = ttk.Button(output_frame, text="浏览...", command=self.choose_output_path) browse_btn.grid(row=0, column=4, padx=(5, 0)) output_frame.columnconfigure(3, weight=1) check_frame = ttk.Frame(output_frame) # 修正:使用 Frame check_frame.grid(row=1, column=0, columnspan=5, sticky=tk.W, padx=0, pady=5) self.point_22_check = ttk.Checkbutton( check_frame, text="2号点位", variable=self.point_22_mode ) self.point_22_check.pack(side=tk.LEFT, padx=(0, 15)) self.backup_check = ttk.Checkbutton( check_frame, text="报告存档", variable=self.backup_mode ) self.backup_check.pack(side=tk.LEFT, padx=(0, 15)) self.rename_check = ttk.Checkbutton( check_frame, text="启用重命名", variable=self.rename_mode ) self.rename_check.pack(side=tk.LEFT) add_btn = ttk.Button(top_frame, text="添加文件夹", command=self.add_folder) add_btn.pack(side=tk.LEFT, padx=5) remove_btn = ttk.Button(top_frame, text="移除选中", command=self.remove_selected) remove_btn.pack(side=tk.LEFT, padx=5) clear_btn = ttk.Button(top_frame, text="清空列表", command=self.clear_list) clear_btn.pack(side=tk.LEFT, padx=5) process_btn = ttk.Button(top_frame, text="开始处理", command=self.start_processing) process_btn.pack(side=tk.RIGHT, padx=5) list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10) # 修正:使用 LabelFrame list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) scrollbar = ttk.Scrollbar(list_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.folder_list = tk.Listbox( list_frame, selectmode=tk.EXTENDED, yscrollcommand=scrollbar.set, height=10 ) self.folder_list.pack(fill=tk.BOTH, expand=True) scrollbar.config(command=self.folder_list.yview) log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10) # 修正:使用 LabelFrame log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) self.log_text = scrolledtext.ScrolledText( log_frame, wrap=tk.WORD, state=tk.DISABLED ) self.log_text.pack(fill=tk.BOTH, expand=True) self.progress = ttk.Progressbar( self.root, orient=tk.HORIZONTAL, mode='determinate' ) self.progress.pack(fill=tk.X, padx=10, pady=5) # 后续方法保持不变... def choose_output_path(self): path = filedialog.askdirectory(title="选择输出文件夹") if path: self.output_path = path self.path_entry.config(state='normal') self.path_entry.delete(0, tk.END) self.path_entry.insert(0, path) self.path_entry.config(state='readonly') self.log(f"已设置输出路径: {path}") def apply_rename_rule(self, original_files): """应用重命名规则:使用文件夹名称作为文件名(删除最后一个横杠及其后内容)""" renamed_files = [] folder_file_counts = defaultdict(int) # 记录每个文件夹下的文件数量 for file_path in original_files: # 获取文件所在文件夹的路径和名称 folder_path = os.path.dirname(file_path) folder_name = os.path.basename(folder_path) # +++ 新增处理逻辑:删除最后一个横杠及其后内容 +++ if '-' in folder_name: # 找到最后一个横杠的位置 dash_index = folder_name.rfind('-') # 删除横杠及其后所有字符 folder_name = folder_name[:dash_index] self.log(f"已处理文件夹名: {os.path.basename(folder_path)} → {folder_name}") # 获取文件扩展名 _, ext = os.path.splitext(file_path) # 更新文件夹文件计数器 file_count = folder_file_counts[folder_path] + 1 folder_file_counts[folder_path] = file_count # 生成新文件名:文件夹名称 + 序号(如果多于一个文件) if file_count == 1: new_name = f"{folder_name}{ext}" # 单文件不加序号 else: new_name = f"{folder_name}_{file_count}{ext}" # 多文件加序号 new_path = os.path.join(folder_path, new_name) # 确保新文件名不冲突 counter = 1 while os.path.exists(new_path): # 如果文件名冲突,添加后缀序号 new_name = f"{folder_name}_{file_count}_{counter}{ext}" new_path = os.path.join(folder_path, new_name) counter += 1 # 执行重命名 try: os.rename(file_path, new_path) self.log(f"已将 '{os.path.basename(file_path)}' 重命名为 '{new_name}'") renamed_files.append(new_path) except Exception as e: self.log(f"重命名失败: {str(e)}") renamed_files.append(file_path) # 保留原文件路径 return renamed_files def add_folder(self): """添加要处理的文件夹""" folders = filedialog.askdirectory( title="选择要处理的文件夹", mustexist=True ) if folders: self.folders.append(folders) self.folder_list.insert(tk.END, folders) self.log(f"已添加文件夹: {folders}") def remove_selected(self): """移除选中的文件夹""" selected = self.folder_list.curselection() for index in selected[::-1]: folder = self.folder_list.get(index) self.folder_list.delete(index) self.folders.remove(folder) self.log(f"已移除文件夹: {folder}") def clear_list(self): """清空文件夹列表""" self.folder_list.delete(0, tk.END) self.folders = [] self.log("已清空文件夹列表") def log(self, message): """向日志区域添加消息""" timestamp = datetime.now().strftime("%H:%M:%S") log_entry = f"[{timestamp}] {message}" self.log_messages.append(log_entry) self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, log_entry + "\n") self.log_text.config(state=tk.DISABLED) self.log_text.yview(tk.END) # 自动滚动到底部 self.root.update_idletasks() def start_processing(self): """启动处理过程""" if not self.folders: messagebox.showwarning("警告", "请先添加要处理的文件夹") return # 禁用处理按钮 self.root.title("Word 转 PDF 合并工具 - 处理中...") self.progress["value"] = 0 # 在新线程中处理,避免界面冻结 thread = threading.Thread(target=self.process_folders) thread.daemon = True thread.start() # +++ 修改方法:备份时获取校准数据 +++ def backup_data_files(self, folder_path, backup_dir): """递归查找并备份所有.xlsx和.csv文件""" self.log(f"开始在文件夹中搜索所有Excel和CSV文件: {folder_path}") backup_count = 0 try: self.log(f"开始备份Excel/CSV文件到Word文档备份目录: {backup_dir}") backup_count = 0 # 确保备份目录已存在 os.makedirs(backup_dir, exist_ok=True) # 遍历原始文件夹中的所有文件 for file in os.listdir(folder_path): file_path = os.path.join(folder_path, file) # 检查是否为Excel或CSV文件 if os.path.isfile(file_path) and file.lower().endswith(('.xlsx', '.csv')): dest_path = os.path.join(backup_dir, file) try: shutil.copy2(file_path, dest_path) backup_count += 1 self.log(f"✅ 成功备份: {file} → {dest_path}") except Exception as e: self.log(f"⚠️ 备份失败: {file} ({str(e)})") self.log(f"完成备份! 共处理 {backup_count} 个Excel/CSV文件") return backup_count > 0 except Exception as e: self.log(f"❌ 备份过程出错: {str(e)}") return False def process_folders(self): """处理多个文件夹中的Word文件""" try: # 提前初始化 output_folder if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) self.log(f"开始处理 {len(self.folders)} 个文件夹...") # 获取所有文件夹中的Word文件 word_files = self.get_all_word_files(self.folders) if not word_files: self.log("没有找到任何Word文档") return # +++ 应用重命名规则 +++ if self.rename_mode.get(): self.log("应用重命名规则...") word_files = self.apply_rename_rule(word_files) self.log(f"共找到 {len(word_files)} 个Word文档") self.progress["maximum"] = len(word_files) + 5 # 文件数 + 合并步骤 backup_root = os.path.join(output_folder, "报告存档") if self.backup_mode.get(): os.makedirs(backup_root, exist_ok=True) for idx, folder_path in enumerate(self.folders): backup_dir = os.path.join(backup_root, f"DataBackup_{idx + 1}") self.backup_data_files(folder_path, backup_dir) # 创建临时目录存储转换后的PDF with tempfile.TemporaryDirectory() as temp_dir: pdf_files_with_header = [] toc_entries = [] all_tables = {} current_page = 1 # 处理每个Word文件 for i, word_file in enumerate(word_files): self.progress["value"] = i + 1 file_name = os.path.splitext(os.path.basename(word_file))[0] display_name = file_name # 修改Word文档逻辑 modified_word_path = word_file if self.point_22_mode.get() or "GSM" in file_name.upper(): # 创建临时副本进行修改 temp_word_path = os.path.join(temp_dir, os.path.basename(word_file)) shutil.copy2(word_file, temp_word_path) if self.modify_word_spec(temp_word_path): modified_word_path = temp_word_path original_pdf = os.path.join(temp_dir, f"{file_name}_original.pdf") pdf_with_header = os.path.join(temp_dir, f"{file_name}_with_header.pdf") if self.backup_mode.get(): os.makedirs(backup_root, exist_ok=True) for idx, word_file in enumerate(word_files): try: # 为每个Word文件创建备份目录 file_name = os.path.splitext(os.path.basename(word_file))[0] dest_dir = os.path.join(backup_root, file_name) os.makedirs(dest_dir, exist_ok=True) # 备份Word文件 word_dest = os.path.join(dest_dir, os.path.basename(modified_word_path)) shutil.copy2(modified_word_path, word_dest) self.log(f"Word文件备份成功: {word_file} → {word_dest}") # +++ 关键修改:将Excel/CSV备份到相同的目录 +++ folder_path = os.path.dirname(word_file) self.backup_data_files(folder_path, dest_dir) # 使用相同的dest_dir except Exception as e: self.log(f"备份过程中出错: {e}") # 提取表格数据 tables = self.extract_spec_table(modified_word_path) if tables: all_tables[display_name] = tables self.log(f"已从 {display_name} 中提取 {len(tables)} 个数据表格") # 转换为PDF if self.word_to_pdf(modified_word_path, original_pdf): # 添加内联标题 if self.add_inline_header(original_pdf, display_name, pdf_with_header): pdf_files_with_header.append(pdf_with_header) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(pdf_with_header) else: pdf_files_with_header.append(original_pdf) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(original_pdf) else: self.log(f"跳过 {display_name},转换失败") # 更新进度条 self.progress["value"] = len(word_files) + 1 if not pdf_files_with_header: self.log("没有成功转换的PDF文件,无法进行合并") return # 获取输出路径 if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) # 获取文件名 report_name = self.output_filename.get().strip() if not report_name: report_name = self.get_folder_name_parts(self.folders[0]) # 使用默认规则 output_pdf = os.path.join(output_folder, f"{report_name}.pdf") # 合并PDF success = self.merge_pdfs_with_summary( pdf_files_with_header, toc_entries, all_tables, output_pdf ) self.progress["value"] = len(word_files) + 3 if success: self.log(f"处理完成!输出文件: {output_pdf}") messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}") else: self.log("处理失败") messagebox.showerror("错误", "处理过程中出现错误") self.root.title("Word 转 PDF 合并工具") except Exception as e: self.log(f"处理过程中出现错误: {str(e)}") messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}") self.root.title("Word 转 PDF 合并工具") # 以下是原有的处理函数,保持不变但添加为类方法 def extract_spec_table(self, word_path): """从Word文档中提取SPEC(dB)、Simulation和Pass/Fail数据表格""" try: doc = Document(word_path) tables = [] for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers: table_data = [] table_data.append(headers) for row in table.rows[1:]: row_data = [cell.text.strip() for cell in row.cells] table_data.append(row_data) tables.append(table_data) return tables except Exception as e: self.log(f"提取 {os.path.basename(word_path)} 中的表格时出错: {str(e)}") return [] def modify_word_spec(self, word_path): try: doc = Document(word_path) filename = os.path.basename(word_path).upper() has_gsm = "GSM" in filename # 移动到try块内部 # 确定SPEC基准值 if self.point_22_mode.get(): # 默认22号点位 spec_value = 20 if has_gsm else 18 else: # 2号点位未启用 spec_value = 22 if has_gsm else 20 modified = False # 初始化修改标志 # 遍历文档所有表格 for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] try: spec_index = headers.index("SPEC(dB)") # 定位SPEC列 sim_index = headers.index("Simulation") # 定位Simulation列 pf_index = headers.index("Pass/Fail") # 定位Pass/Fail列 except ValueError: continue # 跳过不含目标列的表 # 标记已找到可修改表格 modified = True # 修改每行数据 for row in table.rows[1:]: cells = row.cells # 更新SPEC值 if spec_index < len(cells): cells[spec_index].text = str(spec_value) # 更新Pass/Fail状态 if sim_index < len(cells) and pf_index < len(cells): try: sim_value = float(cells[sim_index].text) new_status = "PASS" if sim_value < spec_value else "FAIL" cells[pf_index].text = new_status except ValueError: pass # 忽略格式错误 # 保存修改后的文档 if modified: doc.save(word_path) self.log(f"已修改 {os.path.basename(word_path)} 的SPEC值为{spec_value}") return modified except Exception as e: self.log(f"修改 {os.path.basename(word_path)} 失败: {str(e)}") return False def add_inline_header(self, pdf_path, title, output_path): """在PDF的第一页顶部添加一行红色加粗的标题""" try: reader = PdfReader(pdf_path) writer = PdfWriter() if len(reader.pages) > 0: first_page = reader.pages[0] packet = BytesIO() can = canvas.Canvas(packet, pagesize=letter) width, height = letter font_name = "Helvetica-Bold" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) pdfmetrics.registerFont(TTFont('SimSun-Bold', 'simsun.ttc')) font_name = "SimSun-Bold" except: pass can.setFont(font_name, 14) can.setFillColor(red) can.drawString(50, height - 50, title) can.save() packet.seek(0) title_reader = PdfReader(packet) title_page = title_reader.pages[0] first_page.merge_page(title_page) writer.add_page(first_page) for page in reader.pages[1:]: writer.add_page(page) with open(output_path, "wb") as f: writer.write(f) return True return False except Exception as e: self.log(f"PDF添加标题失败: {str(e)}") return False # +++ 修改方法:创建Summary页(核心修改) +++ def create_summary_page(self, toc_entries, all_tables, output_path): """创建包含三列数据的Summary页(无Calibration列)""" try: c = canvas.Canvas(output_path, pagesize=letter) width, height = letter font_name = "Helvetica" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) font_name = "SimSun" except: pass # Summary标题 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, height - 50, "Summary") c.setFillColor(black) y_position = height - 100 # 添加数据汇总表格 if all_tables: c.setFont(font_name, 16) c.drawString(50, y_position, "Data Summary:") y_position -= 30 c.setFont(font_name, 10) table_width = width - 100 for doc_name, tables in all_tables.items(): c.setFont(font_name, 12) c.setFillColor(red) c.drawString(60, y_position, f"Document: {doc_name}") y_position -= 20 c.setFillColor(black) c.setFont(font_name, 10) # 处理每个表格 for table_data in tables: # 确保表格有数据行 if len(table_data) < 2: # 至少包含表头+1行数据 continue # 表头格式(三列) headers = ["SPEC(dB)", "Simulation", "Pass/Fail"] # 提取第一行原始数据(跳过表头) data_row = table_data[1] if len(table_data) > 1 else ["N/A"] * 3 # 确保数据行有足够列 while len(data_row) < 3: data_row.append("N/A") # 创建数据行:三列 new_row = [ data_row[0], # SPEC(dB)值 data_row[1], # Simulation值 data_row[2], # Pass/Fail值 ] # 表格数据:表头+数据行 modified_table = [headers, new_row] # 设置三列等宽布局 col_widths = [table_width / 3] * 3 table = Table(modified_table, colWidths=col_widths) # 设置表格样式 style = TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), white), ('TEXTCOLOR', (0, 0), (-1, 0), black), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), font_name), ('FONTNAME', (0, 1), (-1, -1), font_name), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), white), ('GRID', (0, 0), (-1, -1), 1, black) ]) table.setStyle(style) # 计算表格高度并绘制 table_height = table.wrap(0, 0)[1] if y_position - table_height < 50: c.showPage() y_position = height - 50 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, y_position, "Summary") y_position -= 50 c.setFillColor(black) table.drawOn(c, 50, y_position - table_height) y_position -= (table_height + 20) c.save() return output_path except Exception as e: self.log(f"创建Summary页失败: {str(e)}") return None def word_to_pdf(self, word_path, pdf_path): """将Word文档转换为PDF""" pythoncom.CoInitialize() try: word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(os.path.abspath(word_path)) doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17) doc.Close() word.Quit() self.log(f"已将 {os.path.basename(word_path)} 转换为PDF") return True except Exception as e: self.log(f"转换 {os.path.basename(word_path)} 时出错: {str(e)}") return False finally: pythoncom.CoUninitialize() def get_pdf_page_count(self, pdf_path): """获取PDF文件的页数""" try: reader = PdfReader(pdf_path) return len(reader.pages) except: return 0 def merge_pdfs_with_summary(self, pdf_files, toc_entries, all_tables, output_path): """合并PDF文件并添加Summary页""" try: with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file: summary_path = summary_file.name # 调用修改后的create_summary_page,传入三个参数 self.create_summary_page(toc_entries, all_tables, summary_path) summary_page_count = self.get_pdf_page_count(summary_path) updated_toc_entries = [(title, page_num + summary_page_count) for title, page_num in toc_entries] merger = PdfMerger() merger.append(summary_path) current_page = summary_page_count for pdf, (title, _) in zip(pdf_files, updated_toc_entries): merger.append(pdf) merger.add_outline_item(title, current_page) current_page += self.get_pdf_page_count(pdf) merger.write(output_path) merger.close() os.remove(summary_path) self.log(f"已成功合并 {len(pdf_files)} 个PDF文件") return True except Exception as e: self.log(f"合并PDF时出错: {str(e)}") return False def get_all_word_files(self, folder_paths): """获取所有Word文件""" word_extensions = ['.docx', '.doc'] word_files = [] for folder_path in folder_paths: if not os.path.isdir(folder_path): continue for file in os.listdir(folder_path): file_ext = os.path.splitext(file)[1].lower() if file_ext in word_extensions: word_path = os.path.join(folder_path, file) word_files.append(word_path) return word_files def get_folder_name_parts(self, folder_paths): """生成报告文件名""" if not folder_paths: return "听筒磁干扰仿真报告" folder_path = folder_paths[0] norm_path = os.path.normpath(folder_path) parts = [p for p in norm_path.split(os.sep) if p] if len(parts) >= 3: return f"{parts[-3]}_{parts[-2]}_{parts[-1]}" elif len(parts) == 2: return f"{parts[-2]}_{parts[-1]}" elif len(parts) == 1: return parts[0] return "听筒磁干扰仿真报告" if __name__ == "__main__": root = tk.Tk() app = PDFConverterApp(root) root.mainloop() # 添加这行启动事件循环 [15:47:40] 已设置输出路径: D:/tes/新建文件夹/111 [15:47:44] 已添加文件夹: D:/tes/新建文件夹/1 [15:47:48] 已添加文件夹: D:/tes/新建文件夹/2 [15:47:49] 开始处理 2 个文件夹... [15:47:49] 共找到 2 个Word文档 [15:47:49] 开始在文件夹中搜索所有Excel和CSV文件: D:/tes/新建文件夹/1 [15:47:49] 开始备份Excel/CSV文件到Word文档备份目录: D:/tes/新建文件夹/111\报告存档\DataBackup_1 [15:47:49] 完成备份! 共处理 0 个Excel/CSV文件 [15:47:49] 开始在文件夹中搜索所有Excel和CSV文件: D:/tes/新建文件夹/2 [15:47:49] 开始备份Excel/CSV文件到Word文档备份目录: D:/tes/新建文件夹/111\报告存档\DataBackup_2 [15:47:49] 完成备份! 共处理 0 个Excel/CSV文件 [15:47:49] 已从 1 中提取 1 个数据表格 [15:47:54] 已将 1.docx 转换为PDF [15:47:55] 已从 2 中提取 1 个数据表格 [15:47:59] 已将 2.docx 转换为PDF [15:48:01] 已成功合并 2 个PDF文件 [15:48:01] 处理完成!输出文件: D:/tes/新建文件夹/111\听筒磁干扰_Simulation_Result.pdf [15:48:37] 开始处理 2 个文件夹... [15:48:37] 共找到 2 个Word文档 [15:48:37] 开始在文件夹中搜索所有Excel和CSV文件: D:/tes/新建文件夹/1 [15:48:37] 开始备份Excel/CSV文件到Word文档备份目录: D:/tes/新建文件夹/111\报告存档\DataBackup_1 [15:48:37] 完成备份! 共处理 0 个Excel/CSV文件 [15:48:37] 开始在文件夹中搜索所有Excel和CSV文件: D:/tes/新建文件夹/2 [15:48:37] 开始备份Excel/CSV文件到Word文档备份目录: D:/tes/新建文件夹/111\报告存档\DataBackup_2 [15:48:37] 完成备份! 共处理 0 个Excel/CSV文件 [15:48:37] 已从 1 中提取 1 个数据表格 [15:48:41] 已将 1.docx 转换为PDF [15:48:42] 已从 2 中提取 1 个数据表格 [15:48:46] 已将 2.docx 转换为PDF [15:48:48] 已成功合并 2 个PDF文件 [15:48:48] 处理完成!输出文件: D:/tes/新建文件夹/111\听筒磁干扰_Simulation_Result.pdf 为什么没有成功备份数据
最新发布
10-19
import json import requests import os import time import logging from datetime import datetime, timedelta import pytz from mutagen.mp3 import MP3 from mutagen.mp4 import MP4 import subprocess import platform # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("broadcast.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def get_broadcast_data_with_token(tenant_access_token): """ 使用新的 token 获取飞书的数据 """ # 获取 Feishu Bitable 数据 url = 'https://open.feishu.cn/open-apis/bitable/v1/apps/QCPUbGid0aBjqLsZDWMcYetinRg/tables/tblMFWB9leKGDxnF/records/search' headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {tenant_access_token}' # 使用新的 token } data = {} # 如果需要传递查询条件,可以在这里添加 try: logger.info(f"正在请求飞书数据,URL: {url}") response = requests.post(url, headers=headers, json=data, timeout=30) response.raise_for_status() # 如果响应失败,将抛出异常 response_dict = response.json() # 将返回的 JSON 数据转换为字典 items = response_dict.get("data", {}).get("items", []) logger.info(f"成功获取飞书数据,共 {len(items)} 条记录") data = [] for item in items: fields = item.get("fields", {}) data.append({ "播音日期": extract_broadcast_date(fields, '播音日期'), "时间段": extract_time_segment(fields, '时间段'), "开播音乐file_token": extract_file_token(fields, '开播音乐'), "开场白-播报file_token": extract_file_token(fields, '开场白-播报'), "需更新文案-播报file_token": extract_file_token(fields, '需更新文案-播报'), "壹首歌file_token": extract_file_token(fields, '壹首歌'), "需更新文案2-播报file_token": extract_file_token(fields, '需更新文案2-播报'), "贰首歌file_token": extract_file_token(fields, '贰首歌'), "结束语-播报file_token": extract_file_token(fields, '结束语-播报'), "结束音乐file_token": extract_file_token(fields, '结束音乐') }) return data except requests.exceptions.HTTPError as http_err: logger.error(f"HTTP 错误发生: {http_err}") except requests.exceptions.Timeout: logger.error("请求超时,服务器响应时间过长") except requests.exceptions.ConnectionError: logger.error("连接错误,无法连接到服务器") except Exception as err: logger.error(f"其他错误发生: {err}", exc_info=True) return [] def extract_file_token(fields, field_name): """提取 file_token""" field_data = fields.get(field_name, []) if isinstance(field_data, list) and len(field_data) > 0: value = field_data[0] if isinstance(value, dict): return value.get("file_token", "") return '' def extract_time_segment(fields, field_name): """提取时间段字段""" field_data = fields.get(field_name, []) if isinstance(field_data, list) and len(field_data) > 0: value = field_data[0] if isinstance(value, dict): return value.get("text", "") return None def extract_broadcast_date(fields, field_name): """提取播音日期字段""" field_data = fields.get(field_name, 0) if isinstance(field_data, int): try: timestamp = field_data / 1000 # 时间戳转化为秒 parsed_date = datetime.fromtimestamp(timestamp, tz=pytz.utc).astimezone(pytz.timezone('Asia/Shanghai')) return parsed_date.strftime("%Y-%m-%d") # 转换为 "YYYY-MM-DD" 格式 except (ValueError, OverflowError): pass return None def get_auth_token(): """获取认证 token""" url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal" headers = {"Content-Type": "application/json; charset=utf-8"} payload = {"app_id": "cli_a882683e8779d00c", "app_secret": "3NKkALA7vyMRVnpKJinmrb1LJ7YuK4H0"} try: logger.info("正在获取认证token") response = requests.post(url, json=payload, headers=headers, timeout=30) response.raise_for_status() data = response.json() if data["code"] == 0: logger.info("成功获取认证token") return data["tenant_access_token"] else: logger.error(f"请求失败:{data['msg']}(错误码:{data['code']})") except requests.exceptions.HTTPError as http_err: logger.error(f"HTTP 错误发生: {http_err}") except requests.exceptions.Timeout: logger.error("获取token超时") except requests.exceptions.ConnectionError: logger.error("连接错误,无法获取token") except Exception as e: logger.error(f"获取token异常:{e}", exc_info=True) return None def create_folder(folder_name): """创建文件夹""" if not os.path.exists(folder_name): logger.info(f"创建文件夹: {folder_name}") os.makedirs(folder_name) def download_file(file_token, save_path, authorization): """下载文件""" url = f"https://open.feishu.cn/open-apis/drive/v1/medias/{file_token}/download" headers = {"Authorization": "Bearer " + authorization} try: logger.info(f"开始下载文件: {file_token}") response = requests.get(url, headers=headers, stream=True, timeout=60) if response.status_code == 200: with open(save_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) logger.info(f"文件已成功下载到: {save_path}") return True else: logger.error(f"请求失败,状态码: {response.status_code}") logger.error(f"错误信息: {response.text}") except requests.exceptions.Timeout: logger.error(f"下载文件超时: {file_token}") except requests.exceptions.ConnectionError: logger.error(f"连接错误,无法下载文件: {file_token}") except Exception as e: logger.error(f"下载文件发生异常: {str(e)}", exc_info=True) return False def get_audio_duration(file_path): """获取音频时长""" try: if file_path.endswith(".mp3"): audio = MP3(file_path) elif file_path.endswith(".mp4"): audio = MP4(file_path) else: logger.error(f"不支持的文件格式: {file_path}") return 0 return audio.info.length except Exception as e: logger.error(f"获取音频时长失败: {e}", exc_info=True) return 0 def kill_previous_players(): """清理之前残留的播放器进程""" system = platform.system() # 获取当前操作系统类型 try: logger.info(f"清理之前残留的播放器进程,操作系统: {system}") if system == "Windows": subprocess.run(['taskkill', '/F', '/IM', 'wmplayer.exe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['taskkill', '/F', '/IM', 'vlc.exe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['taskkill', '/F', '/IM', 'Music.UI.exe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif system == "Darwin": # macOS subprocess.run(['killall', 'afplay'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['killall', 'Music'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif system == "Linux": subprocess.run(['pkill', 'mpg123'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['pkill', 'vlc'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.info("已清理之前残留的播放器进程") except Exception as e: logger.error(f"清理播放器进程时发生错误: {e}", exc_info=True) def play_music_in_folder(folder_path): """播放文件夹中的音频文件,并在播放完成后关闭播放器""" audio_files = [f for f in os.listdir(folder_path) if f.endswith((".mp3", ".mp4"))] processes = [] # 用于存储播放器进程 for file_name in audio_files: full_file_path = os.path.join(folder_path, file_name) try: duration = get_audio_duration(full_file_path) if duration <= 0: logger.error(f"无法获取 {file_name} 的时长,跳过播放") continue logger.info(f"播放 {file_name},预计播放时长:{duration} 秒") if os.name == 'nt': # Windows 系统 process = subprocess.Popen(['start', '', full_file_path], shell=True) elif os.name == 'posix': # MacOS 或 Linux 系统 process = subprocess.Popen(['afplay', full_file_path]) # 对于 MacOS 使用 afplay else: logger.error(f"不支持的操作系统类型: {os.name}") continue processes.append(process) # 保存进程对象 time.sleep(duration + 1) # 等待音频播放完成,额外增加1秒缓冲 except Exception as e: logger.error(f"无法播放 {full_file_path}: {e}", exc_info=True) # 关闭所有播放器进程 for process in processes: try: if process.poll() is None: # 检查进程是否仍在运行 process.kill() # 强制终止进程 logger.info("播放器已强制关闭") except Exception as e: logger.error(f"关闭播放器失败: {e}", exc_info=True) def wait_until(target_time_str, target_name="目标时间"): """等待直到指定时间""" # 获取当前时间(上海时区) tz = pytz.timezone('Asia/Shanghai') now = datetime.now(tz) # 正确解析目标时间并设置时区 time_part = datetime.strptime(target_time_str, "%H:%M").time() naive_target_time = datetime.combine(now.date(), time_part) target_time = tz.localize(naive_target_time) # 如果目标时间已经过去,则将其设置为明天的同一时间 if target_time <= now: target_time = tz.localize(naive_target_time + timedelta(days=1)) # 计算需要等待的总秒数 total_wait_seconds = (target_time - now).total_seconds() logger.info(f"当前时间: {now.strftime('%H:%M:%S')}, 等待 {target_name}: {target_time.strftime('%H:%M:%S')}, 预计等待 {total_wait_seconds/60:.1f} 分钟") # 等待策略优化 if total_wait_seconds > 600: # 超过10分钟 # 先等待大部分时间 initial_wait = total_wait_seconds - 300 # 预留5分钟精确检查 logger.info(f"先等待 {initial_wait/60:.1f} 分钟,然后进行精确检查") time.sleep(initial_wait) # 剩余时间每秒检查一次 remaining_wait = total_wait_seconds - initial_wait logger.info(f"剩余 {remaining_wait:.1f} 秒,每秒检查一次") while datetime.now(tz) < target_time: time.sleep(1) else: # 短时间等待,每秒检查一次 while datetime.now(tz) < target_time: current_time = datetime.now(tz) elapsed = (current_time - now).total_seconds() # 每分钟输出一次日志 if int(elapsed) % 60 == 0: logger.info(f"已等待 {elapsed/60:.1f} 分钟,当前时间: {current_time.strftime('%H:%M:%S')}, 目标时间: {target_time.strftime('%H:%M:%S')}") time.sleep(1) logger.info(f"已到达 {target_name}: {target_time_str}") def process_time_segment(segment, download_offset, play_offset, data, authorization, folder_name): """处理一个时间段的下载和播放""" try: logger.info(f"开始处理时间段: {segment}") target_data = next((entry for entry in data if entry["时间段"] == segment), None) if not target_data: logger.error(f"未找到时间段 {segment} 的文件数据!") return current_time = datetime.now(pytz.timezone('Asia/Shanghai')).strftime('%H:%M') segment_start_time = segment.split("-")[0] logger.info(f"当前时间: {current_time}, 时间段开始时间: {segment_start_time}") # 如果当前时间已经超过该时间段的开始时间,则跳过 if current_time > segment_start_time: logger.warning(f"当前时间已超过时间段 {segment} 的开始时间,跳过该时间段的处理") return # 计算下载时间 download_time = (datetime.strptime(segment_start_time, "%H:%M") - timedelta(minutes=download_offset)).strftime("%H:%M") # 等待到达下载时间 wait_until(download_time, "下载时间") # 在下载前3分钟,重新获取新的 token new_authorization = get_auth_token() if not new_authorization: logger.error("获取新的 token 失败,无法继续执行操作。") return logger.info(f"使用新的 token 开始获取 {segment} 的数据") # 使用新的 token 获取飞书数据 new_data = get_broadcast_data_with_token(new_authorization) if not new_data: logger.error("获取新的飞书数据失败!") return # 获取当前时间段的数据 target_data = next((entry for entry in new_data if entry["时间段"] == segment), None) if not target_data: logger.error(f"未找到时间段 {segment} 的文件数据!") return logger.info(f"开始下载 {segment} 的文件") files = [] file_tokens = [ ("开播音乐file_token", "mp3"), ("开场白-播报file_token", "mp4"), ("需更新文案-播报file_token", "mp4"), ("壹首歌file_token", "mp3"), ("需更新文案2-播报file_token", "mp4"), ("贰首歌file_token", "mp3"), ("结束语-播报file_token", "mp4"), ("结束音乐file_token", "mp3") ] for i, (key, file_format) in enumerate(file_tokens): token = target_data.get(key) if token: save_path = os.path.join(folder_name, f"file_{i+1}.{file_format}") if download_file(token, save_path, new_authorization): files.append(save_path) # 清理之前残留的播放器进程 kill_previous_players() # 计算播放时间 play_time = (datetime.strptime(segment_start_time, "%H:%M") - timedelta(minutes=play_offset)).strftime("%H:%M") # 等待到达播放时间 wait_until(play_time, "播放时间") logger.info(f"开始播放 {segment} 的文件") play_music_in_folder(folder_name) # 播放结束后再次清理播放器进程 kill_previous_players() # 删除下载的文件 for file in files: try: os.remove(file) logger.info(f"已删除文件: {file}") except Exception as e: logger.error(f"删除文件失败: {e}") logger.info(f"成功完成时间段: {segment} 的处理") except Exception as e: logger.error(f"处理时间段 {segment} 时发生异常: {e}", exc_info=True) def main(): """主函数""" logger.info("===== 广播自动化程序启动 =====") authorization = get_auth_token() if not authorization: logger.error("获取认证token失败,程序退出") return # Get the broadcast data using the authorization token data = get_broadcast_data_with_token(authorization) if not data: logger.error("未获取到有效的数据!程序退出") return folder_name = "bobao" create_folder(folder_name) segments = [ ("08:10-08:15", 10, 0), # 提前10分钟下载文件,0-准点播放 ("10:30-10:40", 10, 0), ("13:00-13:10", 10, 0), ("15:00-15:10", 10, 0) ] for segment, download_offset, play_offset in segments: logger.info(f"===== 准备处理时间段: {segment} =====") process_time_segment(segment, download_offset, play_offset, data, authorization, folder_name) logger.info(f"===== 完成处理时间段: {segment} =====") # 主程序结束后,再清理一次播放器进程 kill_previous_players() logger.info("===== 广播自动化程序结束 =====") if __name__ == "__main__": main() 为什么上述代码每个时间段播放完后,可以关闭调用的Windows播放器的窗口,而下面代码却不能实现? import json import os import logging from datetime import datetime, timedelta import pytz import requests from urllib.parse import quote import time from selenium import webdriver from selenium.webdriver.edge.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.edge.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from bs4 import BeautifulSoup import re import shutil import glob import asyncio import edge_tts import threading from mutagen.mp3 import MP3 from mutagen.mp4 import MP4 import subprocess import platform # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("broadcast.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) # 下载目录配置 DOWNLOAD_DIR = os.path.expanduser("~/Downloads") # 浏览器默认下载目录 TARGET_DIR = "./bobao" # 目标保存目录 def get_broadcast_data_with_token(tenant_access_token): """使用token获取飞书的数据""" # 获取 Feishu Bitable 数据 url = 'https://open.feishu.cn/open-apis/bitable/v1/apps/E1zybPqiqa0TaesZjKKch5ZcnJd/tables/tblwFY4k3pmrV5WK/records/search' headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {tenant_access_token}' } data = {} try: logger.info(f"正在请求飞书数据,URL: {url}") response = requests.post(url, headers=headers, json=data, timeout=30) response.raise_for_status() response_dict = response.json() items = response_dict.get("data", {}).get("items", []) logger.info(f"成功获取飞书数据,共 {len(items)} 条记录") data = [] for item in items: fields = item.get("fields", {}) time_segment = extract_time_segment(fields, '时间段') song1_text = extract_text_field(fields, '壹歌曲-歌手') song2_text = extract_text_field(fields, '贰歌曲-歌手') # 新增:从文本字段获取播报内容 song1_broadcast = extract_text_field(fields, '需更新文案-播报') song2_broadcast = extract_text_field(fields, '需更新文案2-播报') if time_segment == "08:10-08:15": song1_name = "" song1_artist = "" song2_name = "" song2_artist = "" else: song1_parts = song1_text.split(' ', 1) song1_name = song1_parts[0] if len(song1_parts) > 0 else "" song1_artist = song1_parts[1] if len(song1_parts) > 1 else "" song2_parts = song2_text.split(' ', 1) song2_name = song2_parts[0] if len(song2_parts) > 0 else "" song2_artist = song2_parts[1] if len(song2_parts) > 1 else "" data.append({ "播音日期": extract_broadcast_date(fields, '播音日期'), "时间段": time_segment, "开播音乐file_token": extract_file_token(fields, '开播音乐'), "开场白-播报file_token": extract_file_token(fields, '开场白-播报'), "需更新文案-播报": song1_broadcast, # 保留文本内容 "需更新文案-播报file_token": extract_file_token(fields, '需更新文案-播报'), "壹歌曲-歌手": song1_text, "壹首歌file_token": extract_file_token(fields, '壹首歌'), "需更新文案2-播报": song2_broadcast, # 保留文本内容 "需更新文案2-播报file_token": extract_file_token(fields, '需更新文案2-播报'), "贰歌曲-歌手": song2_text, "贰首歌file_token": extract_file_token(fields, '贰首歌'), "结束语-播报file_token": extract_file_token(fields, '结束语-播报'), "结束音乐file_token": extract_file_token(fields, '结束音乐'), "壹歌名": song1_name, "壹歌手": song1_artist, "贰歌名": song2_name, "贰歌手": song2_artist, }) return data except requests.exceptions.HTTPError as http_err: logger.error(f"HTTP 错误发生: {http_err}") except requests.exceptions.Timeout: logger.error("请求超时,服务器响应时间过长") except requests.exceptions.ConnectionError: logger.error("连接错误,无法连接到服务器") except Exception as err: logger.error(f"其他错误发生: {err}", exc_info=True) return [] def extract_file_token(fields, field_name): """提取 file_token""" field_data = fields.get(field_name, []) if isinstance(field_data, list) and len(field_data) > 0: value = field_data[0] if isinstance(value, dict): return value.get("file_token", "") return '' def extract_text_field(fields, field_name): """提取文本字段内容""" field_data = fields.get(field_name, []) if isinstance(field_data, list) and len(field_data) > 0: value = field_data[0] if isinstance(value, dict): # 尝试获取text字段,如果没有则获取content return value.get("text", value.get("content", "")) return '' def extract_time_segment(fields, field_name): """提取时间段字段""" field_data = fields.get(field_name, []) if isinstance(field_data, list) and len(field_data) > 0: value = field_data[0] if isinstance(value, dict): return value.get("text", "") return None def extract_broadcast_date(fields, field_name): """提取播音日期字段""" field_data = fields.get(field_name, 0) if isinstance(field_data, int): try: timestamp = field_data / 1000 parsed_date = datetime.fromtimestamp(timestamp, tz=pytz.utc).astimezone(pytz.timezone('Asia/Shanghai')) return parsed_date.strftime("%Y-%m-%d") except (ValueError, OverflowError): pass return None def get_auth_token(): """获取认证 token""" url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal" headers = {"Content-Type": "application/json; charset=utf-8"} payload = {"app_id": "cli_a882683e8779d00c", "app_secret": "3NKkALA7vyMRVnpKJinmrb1LJ7YuK4H0"} try: logger.info("正在获取认证token") response = requests.post(url, json=payload, headers=headers, timeout=30) response.raise_for_status() data = response.json() if data["code"] == 0: logger.info("成功获取认证token") return data["tenant_access_token"] else: logger.error(f"请求失败:{data['msg']}(错误码:{data['code']})") except requests.exceptions.HTTPError as http_err: logger.error(f"HTTP 错误发生: {http_err}") except requests.exceptions.Timeout: logger.error("获取token超时") except requests.exceptions.ConnectionError: logger.error("连接错误,无法获取token") except Exception as e: logger.error(f"获取token异常:{e}", exc_info=True) return None def display_data(data): """展示获取的数据""" if not data: print("没有获取到任何数据") return print(f"共获取到 {len(data)} 条广播数据记录:") for i, item in enumerate(data, 1): print(f"\n--- 记录 {i} ---") for key, value in item.items(): if key.endswith('file_token') and value: print(f"{key}: {value[:10]}...") else: print(f"{key}: {value}") def format_song_info(record): """格式化并打印歌曲信息""" print("\n歌曲信息:") print(f"壹歌名: {record['壹歌名']}") print(f"壹歌手: {record['壹歌手']}") print(f"贰歌名: {record['贰歌名']}") print(f"贰歌手: {record['贰歌手']}") # 打印播报内容 print("\n播报内容:") print(f"需更新文案-播报: {record['需更新文案-播报']}") print(f"需更新文案2-播报: {record['需更新文案2-播报']}") def wait_for_download(initial_files, timeout=60): """等待下载完成并返回新文件路径""" start_time = time.time() while time.time() - start_time < timeout: current_files = os.listdir(DOWNLOAD_DIR) new_files = [f for f in current_files if f not in initial_files] # 过滤掉临时下载文件 valid_files = [f for f in new_files if not f.endswith('.crdownload')] if valid_files: # 返回第一个找到的有效文件 return os.path.join(DOWNLOAD_DIR, valid_files[0]) time.sleep(1) print(f"等待下载超时({timeout}秒)") return None def download_song(song_name, artist=None, save_path=TARGET_DIR, custom_filename=None): """ 搜索并下载指定歌曲,下载后重命名为指定名称 参数: song_name (str): 歌曲名称 artist (str, optional): 歌手名称,默认为None save_path (str, optional): 保存路径,默认为"./bobao" custom_filename (str, optional): 自定义文件名,默认为None """ # 创建保存目录(如果不存在) if not os.path.exists(save_path): os.makedirs(save_path) # 构建搜索关键词 search_query = song_name if artist: search_query += f" {artist}" # 去除.html后缀 search_query = search_query.replace('.html', '') print(f"正在搜索歌曲: {search_query}") # 记录下载前的文件列表 initial_files = os.listdir(DOWNLOAD_DIR) # 设置Edge浏览器选项 edge_options = Options() edge_options.add_argument('--disable-gpu') edge_options.add_argument('--no-sandbox') edge_options.add_argument('--disable-dev-shm-usage') # 指定EdgeDriver的路径 edge_driver_path = r"C:\Users\shaopeng.qi\Downloads\edgedriver_win64 (1)\msedgedriver.exe" # 检查路径是否存在 if not os.path.exists(edge_driver_path): print(f"错误: EdgeDriver路径不存在 - {edge_driver_path}") print("请下载与您Edge浏览器版本匹配的EdgeDriver,并更新路径") print("下载地址: https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/") return # 初始化WebDriver service = Service(executable_path=edge_driver_path) service.log_path = "edgedriver.log" driver = webdriver.Edge(service=service, options=edge_options) try: # 直接访问搜索结果页面 - 修正:去掉.html后缀 search_url = f"https://www.gequbao.com/s/{quote(search_query)}" print(f"正在访问搜索结果页面: {search_url}") driver.get(search_url) # 等待页面加载完成 WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, ".card-body")) ) # 检查是否有搜索结果 try: no_results = WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.XPATH, "//div[contains(text(), '没有找到相关歌曲')]")) ) print("未找到匹配的歌曲") return except: # 没有找到"没有找到相关歌曲"的提示,继续执行 pass # 滚动到页面底部,确保所有元素都加载 driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(1) # 查找第一个歌曲的链接 print("正在查找歌曲链接...") song_links = WebDriverWait(driver, 10).until( EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".music-link")) ) if not song_links: print("未找到歌曲链接") with open("error_page.html", "w", encoding="utf-8") as f: f.write(driver.page_source) print("已保存当前页面源码到error_page.html,可用于分析问题") return # 获取第一个歌曲链接的href属性 first_song_link = song_links[0].get_attribute("href") print(f"找到歌曲链接: {first_song_link}") # 提取歌曲ID match = re.search(r'/music/(\d+)', first_song_link) if not match: print("无法从链接中提取歌曲ID") with open("error_page.html", "w", encoding="utf-8") as f: f.write(driver.page_source) print("已保存当前页面源码到error_page.html,可用于分析问题") return song_id = match.group(1) print(f"提取的歌曲ID: {song_id}") # 直接构建并访问歌曲详情页URL song_detail_url = f"https://www.gequbao.com/music/{song_id}" print(f"正在访问歌曲详情页: {song_detail_url}") # 打开新窗口 driver.execute_script(f"window.open('{song_detail_url}');") # 等待新窗口打开 time.sleep(2) # 检查是否打开了新窗口 if len(driver.window_handles) > 1: # 切换到新窗口 for window in driver.window_handles: if window != driver.current_window_handle: driver.switch_to.window(window) print("已切换到歌曲详情页") break else: print("未成功打开歌曲详情页,尝试直接访问") # 直接访问歌曲详情页 driver.get(song_detail_url) print("已直接访问歌曲详情页") # 等待页面加载 print("等待页面加载完成...") time.sleep(5) # 检查是否在歌曲详情页 try: # 查找歌曲标题元素,确认在详情页 song_title = WebDriverWait(driver, 5).until( EC.presence_of_element_located((By.CSS_SELECTOR, ".song-title")) ) print(f"已进入歌曲详情页,歌曲标题: {song_title.text}") except: # 如果找不到歌曲标题元素,尝试其他可能的元素 try: # 尝试查找下载按钮 download_btn = WebDriverWait(driver, 3).until( EC.presence_of_element_located((By.XPATH, "//a[contains(text(), '下载')]")) ) print("已进入歌曲详情页,找到下载按钮") except: print("无法确认是否在歌曲详情页") with open("error_page.html", "w", encoding="utf-8") as f: f.write(driver.page_source) print("已保存当前页面源码到error_page.html,可用于分析问题") return # 点击下载按钮 print("正在查找并点击下载按钮...") try: download_btn = WebDriverWait(driver, 10).until( EC.element_to_be_clickable((By.XPATH, "//*[@id=\"btn-download-mp3\"]")) ) print("找到下载按钮") # 点击下载按钮 download_btn.click() print("已点击下载按钮") # 等待下载对话框出现 print("等待下载对话框出现...") time.sleep(3) # 点击低品质MP3按钮 - 使用更灵活的XPath选择器 print("正在查找并点击低品质MP3按钮...") try: # 使用通配符匹配动态ID部分 low_quality_btn = WebDriverWait(driver, 10).until( EC.element_to_be_clickable((By.XPATH, "//*[starts-with(@id, 'jconfirm-box')]/div/ul/li[2]/a")) ) print("找到低品质MP3按钮") # 点击低品质MP3按钮 low_quality_btn.click() # 构建保存文件名 if custom_filename: target_filename = f"{custom_filename}.mp3" elif artist: target_filename = f"{artist} - {song_name}.mp3" else: target_filename = f"{song_name}.mp3" target_path = os.path.join(save_path, target_filename) print(f"已点击下载按钮,等待下载完成后将保存为: {target_path}") # 等待下载完成 print("等待下载开始...") downloaded_file = wait_for_download(initial_files, timeout=60) if downloaded_file: # 等待文件完全下载(避免文件正在写入时移动) time.sleep(5) # 移动并重命名文件 try: shutil.move(downloaded_file, target_path) print(f"歌曲已成功下载并重命名为: {target_path}") return True except Exception as move_err: print(f"移动文件时出错: {move_err}") print(f"下载的文件保存在: {downloaded_file}") else: print("未检测到新的下载文件") except Exception as e: print(f"点击低品质MP3按钮时出错: {e}") with open("error_page.html", "w", encoding="utf-8") as f: f.write(driver.page_source) print("已保存当前页面源码到error_page.html,可用于分析问题") return False except Exception as e: print(f"点击下载按钮时出错: {e}") with open("error_page.html", "w", encoding="utf-8") as f: f.write(driver.page_source) print("已保存当前页面源码到error_page.html,可用于分析问题") return False except Exception as e: print(f"操作过程中发生错误: {e}") with open("error_page.html", "w", encoding="utf-8") as f: f.write(driver.page_source) print("已保存当前页面源码到error_page.html,可用于分析问题") return False finally: # 关闭浏览器 try: driver.quit() print("浏览器已关闭") except Exception as e: print(f"关闭浏览器时出错: {e}") def download_required_songs(data, folder_name): """下载指定的两首歌曲并命名为file_4和file_6""" if not data or len(data) == 0: print("没有可下载的歌曲数据") return first_record = data[0] # 下载第一首歌 (file_4) song1_name = first_record.get("壹歌名") song1_artist = first_record.get("壹歌手") success1 = False if song1_name and song1_artist: print(f"\n准备下载第一首歌: {song1_name} - {song1_artist},保存为 file_4.mp3") try: success1 = download_song(song1_name, song1_artist, folder_name, "file_4") except Exception as e: print(f"下载失败: {song1_name} - {song1_artist}, 错误: {e}") else: print("第一首歌信息不完整,无法下载") print(f"歌名: {song1_name}, 歌手: {song1_artist}") # 下载第二首歌 (file_6) song2_name = first_record.get("贰歌名") song2_artist = first_record.get("贰歌手") success2 = False if song2_name and song2_artist: print(f"\n准备下载第二首歌: {song2_name} - {song2_artist},保存为 file_6.mp3") try: success2 = download_song(song2_name, song2_artist, folder_name, "file_6") except Exception as e: print(f"下载失败: {song2_name} - {song2_artist}, 错误: {e}") else: print("第二首歌信息不完整,无法下载") print(f"歌名: {song2_name}, 歌手: {song2_artist}") return success1, success2 # 创建全局事件循环 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) async def convert_text_to_speech(text, output_file): """将文本转换为语音并保存为MP3文件""" if text: try: # 使用全局事件循环 communicate = edge_tts.Communicate(text, voice="zh-CN-YunyangNeural") await communicate.save(output_file) print(f"语音已保存为:{output_file}") return True except Exception as e: print(f"文本转语音失败: {e}") return False else: print(f"文本为空,跳过转换: {output_file}") return False def run_async_task(coro): """在独立线程中运行异步任务""" def run(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: loop.run_until_complete(coro) finally: loop.close() thread = threading.Thread(target=run) thread.start() thread.join() async def convert_broadcast_texts_async(data, folder_name): """异步转换播报文本为语音文件""" if not data or len(data) == 0: print("没有可转换的播报文本数据") return first_record = data[0] # 确保目标目录存在 if not os.path.exists(folder_name): os.makedirs(folder_name) # 转换第一个播报文本 (file_3) text1 = first_record.get("需更新文案-播报", "") output_file1 = os.path.join(folder_name, "file_3.mp3") print(f"\n准备转换第一个播报文本,保存为 file_3.mp3") print(f"文本内容: {text1[:50]}...") # 只显示前50个字符 success1 = await convert_text_to_speech(text1, output_file1) if success1: print("第一个播报文本转换成功") # 转换第二个播报文本 (file_5) text2 = first_record.get("需更新文案2-播报", "") output_file2 = os.path.join(folder_name, "file_5.mp3") print(f"\n准备转换第二个播报文本,保存为 file_5.mp3") print(f"文本内容: {text2[:50]}...") # 只显示前50个字符 success2 = await convert_text_to_speech(text2, output_file2) if success2: print("第二个播报文本转换成功") return success1, success2 def convert_broadcast_texts(data, folder_name): """转换播报文本为语音文件 - 兼容同步和异步环境""" # 创建新线程运行异步任务,避免事件循环冲突 run_async_task(convert_broadcast_texts_async(data, folder_name)) def create_folder(folder_name): """创建文件夹""" if not os.path.exists(folder_name): logger.info(f"创建文件夹: {folder_name}") os.makedirs(folder_name) def download_file(file_token, save_path, authorization): """下载文件""" url = f"https://open.feishu.cn/open-apis/drive/v1/medias/{file_token}/download" headers = {"Authorization": "Bearer " + authorization} try: logger.info(f"开始下载文件: {file_token}") response = requests.get(url, headers=headers, stream=True, timeout=60) if response.status_code == 200: with open(save_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) logger.info(f"文件已成功下载到: {save_path}") return True else: logger.error(f"请求失败,状态码: {response.status_code}") logger.error(f"错误信息: {response.text}") except requests.exceptions.Timeout: logger.error(f"下载文件超时: {file_token}") except requests.exceptions.ConnectionError: logger.error(f"连接错误,无法下载文件: {file_token}") except Exception as e: logger.error(f"下载文件发生异常: {str(e)}", exc_info=True) return False def get_audio_duration(file_path): """获取音频时长""" try: if file_path.endswith(".mp3"): audio = MP3(file_path) elif file_path.endswith(".mp4"): audio = MP4(file_path) else: logger.error(f"不支持的文件格式: {file_path}") return 0 return audio.info.length except Exception as e: logger.error(f"获取音频时长失败: {e}", exc_info=True) return 0 def kill_previous_players(): """清理之前残留的播放器进程""" system = platform.system() # 获取当前操作系统类型 try: logger.info(f"清理之前残留的播放器进程,操作系统: {system}") if system == "Windows": subprocess.run(['taskkill', '/F', '/IM', 'wmplayer.exe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['taskkill', '/F', '/IM', 'vlc.exe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['taskkill', '/F', '/IM', 'Music.UI.exe'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif system == "Darwin": # macOS subprocess.run(['killall', 'afplay'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['killall', 'Music'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) elif system == "Linux": subprocess.run(['pkill', 'mpg123'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) subprocess.run(['pkill', 'vlc'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.info("已清理之前残留的播放器进程") except Exception as e: logger.error(f"清理播放器进程时发生错误: {e}", exc_info=True) def play_music_in_folder(folder_path): """播放文件夹中的音频文件,并在播放完成后关闭播放器""" audio_files = [f for f in os.listdir(folder_path) if f.endswith((".mp3", ".mp4"))] processes = [] # 用于存储播放器进程 # 按照文件名排序 audio_files.sort() for file_name in audio_files: full_file_path = os.path.join(folder_path, file_name) try: duration = get_audio_duration(full_file_path) if duration <= 0: logger.error(f"无法获取 {file_name} 的时长,跳过播放") continue logger.info(f"播放 {file_name},预计播放时长:{duration} 秒") if os.name == 'nt': # Windows 系统 process = subprocess.Popen(['start', '', full_file_path], shell=True) elif os.name == 'posix': # MacOS 或 Linux 系统 process = subprocess.Popen(['afplay', full_file_path]) # 对于 MacOS 使用 afplay else: logger.error(f"不支持的操作系统类型: {os.name}") continue processes.append(process) # 保存进程对象 time.sleep(duration + 1) # 等待音频播放完成,额外增加1秒缓冲 except Exception as e: logger.error(f"无法播放 {full_file_path}: {e}", exc_info=True) # 关闭所有播放器进程 for process in processes: try: if process.poll() is None: # 检查进程是否仍在运行 process.kill() # 强制终止进程 logger.info("播放器已强制关闭") except Exception as e: logger.error(f"关闭播放器失败: {e}", exc_info=True) def wait_until(target_time_str, target_name="目标时间"): """等待直到指定时间""" # 获取当前时间(上海时区) tz = pytz.timezone('Asia/Shanghai') now = datetime.now(tz) # 正确解析目标时间并设置时区 time_part = datetime.strptime(target_time_str, "%H:%M").time() naive_target_time = datetime.combine(now.date(), time_part) target_time = tz.localize(naive_target_time) # 如果目标时间已经过去,则将其设置为明天的同一时间 if target_time <= now: target_time = tz.localize(naive_target_time + timedelta(days=1)) # 计算需要等待的总秒数 total_wait_seconds = (target_time - now).total_seconds() logger.info(f"当前时间: {now.strftime('%H:%M:%S')}, 等待 {target_name}: {target_time.strftime('%H:%M:%S')}, 预计等待 {total_wait_seconds/60:.1f} 分钟") # 等待策略优化 if total_wait_seconds > 600: # 超过10分钟 # 先等待大部分时间 initial_wait = total_wait_seconds - 300 # 预留5分钟精确检查 logger.info(f"先等待 {initial_wait/60:.1f} 分钟,然后进行精确检查") time.sleep(initial_wait) # 剩余时间每秒检查一次 remaining_wait = total_wait_seconds - initial_wait logger.info(f"剩余 {remaining_wait:.1f} 秒,每秒检查一次") while datetime.now(tz) < target_time: time.sleep(1) else: # 短时间等待,每秒检查一次 while datetime.now(tz) < target_time: current_time = datetime.now(tz) elapsed = (current_time - now).total_seconds() # 每分钟输出一次日志 if int(elapsed) % 60 == 0: logger.info(f"已等待 {elapsed/60:.1f} 分钟,当前时间: {current_time.strftime('%H:%M:%S')}, 目标时间: {target_time.strftime('%H:%M:%S')}") time.sleep(1) logger.info(f"已到达 {target_name}: {target_time_str}") def process_time_segment(segment, download_offset, play_offset, data, authorization, folder_name): """处理一个时间段的下载和播放""" try: logger.info(f"开始处理时间段: {segment}") target_data = next((entry for entry in data if entry["时间段"] == segment), None) if not target_data: logger.error(f"未找到时间段 {segment} 的文件数据!") return current_time = datetime.now(pytz.timezone('Asia/Shanghai')).strftime('%H:%M') segment_start_time = segment.split("-")[0] logger.info(f"当前时间: {current_time}, 时间段开始时间: {segment_start_time}") # 如果当前时间已经超过该时间段的开始时间,则跳过 if current_time > segment_start_time: logger.warning(f"当前时间已超过时间段 {segment} 的开始时间,跳过该时间段的处理") return # 计算下载时间 download_time = (datetime.strptime(segment_start_time, "%H:%M") - timedelta(minutes=download_offset)).strftime("%H:%M") # 等待到达下载时间 wait_until(download_time, "下载时间") # 在下载前3分钟,重新获取新的 token new_authorization = get_auth_token() if not new_authorization: logger.error("获取新的 token 失败,无法继续执行操作。") return logger.info(f"使用新的 token 开始获取 {segment} 的数据") # 使用新的 token 获取飞书数据 new_data = get_broadcast_data_with_token(new_authorization) if not new_data: logger.error("获取新的飞书数据失败!") return # 获取当前时间段的数据 target_data = next((entry for entry in new_data if entry["时间段"] == segment), None) if not target_data: logger.error(f"未找到时间段 {segment} 的文件数据!") return logger.info(f"开始下载 {segment} 的文件") files = [] # 下载固定的file_token文件 file_tokens = [ ("开播音乐file_token", "mp3"), ("开场白-播报file_token", "mp3"), ("结束语-播报file_token", "mp3"), ("结束音乐file_token", "mp3") ] for i, (key, file_format) in enumerate(file_tokens): token = target_data.get(key) if token: # 前两个文件保持原命名,后两个文件命名为file_7和file_8 if i < 2: # 索引0和1(前两个文件) file_number = i + 1 else: # 索引2和3(后两个文件) file_number = i + 5 # 2+5=7, 3+5=8 save_path = os.path.join(folder_name, f"file_{file_number}.{file_format}") if download_file(token, save_path, new_authorization): files.append(save_path) # 尝试下载或生成播报文件 # 需更新文案-播报 (file_3) broadcast_text = target_data.get("需更新文案-播报") broadcast_token = target_data.get("需更新文案-播报file_token") save_path = os.path.join(folder_name, "file_3.mp3") if broadcast_text: # 如果有文本内容,优先使用文本转语音 print(f"使用文本转语音生成 file_3.mp3") try: # 使用改进的异步任务执行方法 run_async_task(convert_text_to_speech(broadcast_text, save_path)) files.append(save_path) except Exception as e: print(f"文本转语音失败: {e}") # 如果文本转语音失败,尝试使用file_token下载 if broadcast_token: print(f"尝试使用 file_token 下载 file_3.mp3") if download_file(broadcast_token, save_path, new_authorization): files.append(save_path) elif broadcast_token: # 如果没有文本内容但有file_token,直接下载 print(f"使用 file_token 下载 file_3.mp3") if download_file(broadcast_token, save_path, new_authorization): files.append(save_path) # 需更新文案2-播报 (file_5) broadcast_text2 = target_data.get("需更新文案2-播报") broadcast_token2 = target_data.get("需更新文案2-播报file_token") save_path2 = os.path.join(folder_name, "file_5.mp3") if broadcast_text2: # 如果有文本内容,优先使用文本转语音 print(f"使用文本转语音生成 file_5.mp3") try: # 使用改进的异步任务执行方法 run_async_task(convert_text_to_speech(broadcast_text2, save_path2)) files.append(save_path2) except Exception as e: print(f"文本转语音失败: {e}") # 如果文本转语音失败,尝试使用file_token下载 if broadcast_token2: print(f"尝试使用 file_token 下载 file_5.mp3") if download_file(broadcast_token2, save_path2, new_authorization): files.append(save_path2) elif broadcast_token2: # 如果没有文本内容但有file_token,直接下载 print(f"使用 file_token 下载 file_5.mp3") if download_file(broadcast_token2, save_path2, new_authorization): files.append(save_path2) # 下载两首歌曲 (调整为file_4和file_6) song1_token = target_data.get("壹首歌file_token") song2_token = target_data.get("贰首歌file_token") song1_name = target_data.get("壹歌名") song1_artist = target_data.get("壹歌手") song2_name = target_data.get("贰歌名") song2_artist = target_data.get("贰歌手") # 尝试下载歌曲 if song1_token: save_path = os.path.join(folder_name, "file_4.mp3") if download_file(song1_token, save_path, new_authorization): files.append(save_path) elif song1_name and song1_artist: # 如果没有file_token但有歌名和歌手,尝试从网络搜索下载 print(f"从网络搜索下载第一首歌: {song1_name} - {song1_artist}") if download_song(song1_name, song1_artist, folder_name, "file_4"): files.append(os.path.join(folder_name, "file_4.mp3")) if song2_token: save_path = os.path.join(folder_name, "file_6.mp3") if download_file(song2_token, save_path, new_authorization): files.append(save_path) elif song2_name and song2_artist: print(f"从网络搜索下载第二首歌: {song2_name} - {song2_artist}") if download_song(song2_name, song2_artist, folder_name, "file_6"): files.append(os.path.join(folder_name, "file_6.mp3")) if not files: logger.error(f"没有成功下载任何文件!") return # 按照数字顺序排序文件 files.sort(key=lambda x: int(re.search(r'file_(\d+)', x).group(1))) logger.info(f"所有文件下载完成,共下载 {len(files)} 个文件") # 计算播放时间 play_time = (datetime.strptime(segment_start_time, "%H:%M") - timedelta(minutes=play_offset)).strftime("%H:%M") # 等待到达播放时间 wait_until(play_time, "播放时间") # 清理之前残留的播放器进程 kill_previous_players() # 播放所有下载的文件 logger.info(f"开始播放 {segment} 的所有文件") play_music_in_folder(folder_name) logger.info(f"时间段 {segment} 处理完成") # 清理下载的文件 logger.info(f"开始清理时间段 {segment} 下载的文件") for file in files: if os.path.exists(file): os.remove(file) logger.info(f"已删除文件: {file}") else: logger.warning(f"文件不存在,无法删除: {file}") logger.info(f"时间段 {segment} 文件清理完成") except Exception as e: logger.error(f"处理时间段 {segment} 时发生错误: {e}", exc_info=True) def process_day(data, authorization, process_past_segments=False): """处理一天的所有时间段""" if not data: logger.error("没有可处理的数据") return # 获取所有不同的时间段 time_segments = sorted(list({item["时间段"] for item in data if item["时间段"]})) if not time_segments: logger.error("没有找到任何时间段数据") return logger.info(f"找到 {len(time_segments)} 个时间段需要处理") # 处理每个时间段 for segment in time_segments: # 创建时间段文件夹 segment_folder = os.path.join(TARGET_DIR, segment.replace(":", "")) create_folder(segment_folder) # 检查是否需要处理已过去的时间段 current_time = datetime.now(pytz.timezone('Asia/Shanghai')).strftime('%H:%M') segment_start_time = segment.split("-")[0] if not process_past_segments and current_time > segment_start_time: logger.warning(f"时间段 {segment} 已过去,跳过处理") continue # 处理时间段 process_time_segment(segment, download_offset=5, play_offset=0, data=data, authorization=authorization, folder_name=segment_folder) # 清理时间段文件夹(如果为空) if os.path.exists(segment_folder) and not os.listdir(segment_folder): os.rmdir(segment_folder) logger.info(f"已删除空文件夹: {segment_folder}") def main(): """主函数""" logger.info("===== 广播播放系统启动 =====") # 获取认证token authorization = get_auth_token() if not authorization: logger.error("获取认证token失败,程序退出") return # 获取广播数据 logger.info("获取广播数据...") data = get_broadcast_data_with_token(authorization) if not data: logger.error("没有获取到任何广播数据,程序退出") return logger.info(f"成功获取 {len(data)} 条广播数据") # 获取今天的日期 today = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d") # 过滤出今天的数据 today_data = [item for item in data if item["播音日期"] == today] if not today_data: logger.warning(f"今天 ({today}) 没有安排广播,程序退出") return logger.info(f"今天 ({today}) 有 {len(today_data)} 条广播数据") # 处理今天的所有时间段(包括已过去的) process_day(today_data, authorization, process_past_segments=False) logger.info("===== 广播播放系统完成所有任务 =====") if __name__ == "__main__": main()
06-25
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值