excel公式实现变量名蛇形转驼峰

Excel变量名风格转换技巧
本文介绍如何利用Excel内置函数实现变量命名风格的快速转换,包括使用PROPER函数转换首字母大小写,SUBSTITUTE函数替换下划线以及REPLACE函数进行最终调整。

需求背景

在不同的场景我们的变量命名风格不太相同,有时候我们拿到excel文档时想快速将一种风格的变量转成另外一种。
譬如我们想实现以下效果。
在这里插入图片描述
excel能快速实现吗?当然答案是肯定的。
excel本身功能强大,并且还有VBA辅助能实现各种复杂的数据处理。
当然今天的需求我们只需要excel自带的几个组合函数一起使用就可以实现。

实现步骤

1- 使用PROPER将文本值中每一个单词的首字母设置为大写
=PROPER("is_friend")的结果Is_Friend
2- 使用SUBSTITUTE替换下划线
=SUBSTITUTE("Is_Friend","_","")的结果IsFriend
3- 将第一个单词首字母恢复小写
首先使用LEFT(原始变量名,1)获取单词字母的原始值
在使用REPLACE(第2步生成的变量名,1,1,替换后的字符)替换变大写后的字母为小写

这样功能就完成了。组合之后的代码如下:

=REPLACE(SUBSTITUTE(PROPER(A1),"_",""),1,1,LEFT(A1,1))

效果图如下所示:
在这里插入图片描述

import os import tempfile import pythoncom import win32com.client import threading import shutil import tkinter as tk from tkinter import filedialog, ttk, messagebox, scrolledtext from docx import Document from PyPDF2 import PdfMerger, PdfReader, PdfWriter from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.colors import red, black, white from reportlab.platypus import Table, TableStyle from io import BytesIO from datetime import datetime import openpyxl # 用于读取Excel文件 class PDFConverterApp: def __init__(self, root): self.root = root self.root.title("audio_data") self.root.geometry("800x650") # 增加窗口高度以容纳新控件 self.folders = [] self.log_messages = [] self.output_path = "" # 存储自定义输出路径 self.backup_mode = tk.BooleanVar(value=True) # 添加备份模式开关 self.point_22_mode = tk.BooleanVar(value=False) # 新增22号点位开关 self.output_filename = tk.StringVar(value="听筒磁干扰_Simulation_Result") # 默认文件名 self.create_widgets() def create_widgets(self): # 创建顶部框架 top_frame = ttk.Frame(self.root, padding=10) top_frame.pack(fill=tk.X) output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10) output_frame.pack(fill=tk.X, padx=10, pady=(0, 5)) # 文件名输入框 ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) filename_entry = ttk.Entry(output_frame, textvariable=self.output_filename, width=30) filename_entry.grid(row=0, column=1, sticky=tk.W, padx=5) # 输出路径选择 ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5)) self.path_entry = ttk.Entry(output_frame, width=40, state='readonly') self.path_entry.grid(row=0, column=3, sticky=tk.EW, padx=5) browse_btn = ttk.Button(output_frame, text="浏览...", command=self.choose_output_path) browse_btn.grid(row=0, column=4, padx=(5, 0)) # 设置网格列权重 output_frame.columnconfigure(3, weight=1) # 添加文件夹按钮 add_btn = ttk.Button(top_frame, text="添加文件夹", command=self.add_folder) add_btn.pack(side=tk.LEFT, padx=5) # 移除文件夹按钮 remove_btn = ttk.Button(top_frame, text="移除选中", command=self.remove_selected) remove_btn.pack(side=tk.LEFT, padx=5) # 清空列表按钮 clear_btn = ttk.Button(top_frame, text="清空列表", command=self.clear_list) clear_btn.pack(side=tk.LEFT, padx=5) # 处理按钮 process_btn = ttk.Button(top_frame, text="开始处理", command=self.start_processing) process_btn.pack(side=tk.RIGHT, padx=5) # 创建文件夹列表 list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10) list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 滚动条 scrollbar = ttk.Scrollbar(list_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) # 在输出设置区域添加22号点位复选框 backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) self.backup_check = ttk.Checkbutton( backup_frame, text="报告存档", variable=self.backup_mode ) self.backup_check.pack(side=tk.LEFT) # 新增22号点位复选框 self.point_22_check = ttk.Checkbutton( backup_frame, text="2号点位", variable=self.point_22_mode ) self.point_22_check.pack(side=tk.LEFT, padx=(10, 0)) # 文件夹列表 self.folder_list = tk.Listbox( list_frame, selectmode=tk.EXTENDED, yscrollcommand=scrollbar.set, height=10 ) self.folder_list.pack(fill=tk.BOTH, expand=True) scrollbar.config(command=self.folder_list.yview) # 创建日志区域 log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10) log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 日志文本框 self.log_text = scrolledtext.ScrolledText( log_frame, wrap=tk.WORD, state=tk.DISABLED ) self.log_text.pack(fill=tk.BOTH, expand=True) # 进度条 self.progress = ttk.Progressbar( self.root, orient=tk.HORIZONTAL, mode='determinate' ) self.progress.pack(fill=tk.X, padx=10, pady=5) def choose_output_path(self): """选择输出文件夹""" path = filedialog.askdirectory(title="选择输出文件夹") if path: self.output_path = path self.path_entry.config(state='normal') self.path_entry.delete(0, tk.END) self.path_entry.insert(0, path) self.path_entry.config(state='readonly') self.log(f"已设置输出路径: {path}") def add_folder(self): """添加要处理的文件夹""" folders = filedialog.askdirectory( title="选择要处理的文件夹", mustexist=True ) if folders: self.folders.append(folders) self.folder_list.insert(tk.END, folders) self.log(f"已添加文件夹: {folders}") def remove_selected(self): """移除选中的文件夹""" selected = self.folder_list.curselection() for index in selected[::-1]: folder = self.folder_list.get(index) self.folder_list.delete(index) self.folders.remove(folder) self.log(f"已移除文件夹: {folder}") def clear_list(self): """清空文件夹列表""" self.folder_list.delete(0, tk.END) self.folders = [] self.log("已清空文件夹列表") def log(self, message): """向日志区域添加消息""" timestamp = datetime.now().strftime("%H:%M:%S") log_entry = f"[{timestamp}] {message}" self.log_messages.append(log_entry) self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, log_entry + "\n") self.log_text.config(state=tk.DISABLED) self.log_text.yview(tk.END) # 自动滚动到底部 self.root.update_idletasks() def start_processing(self): """启动处理过程""" if not self.folders: messagebox.showwarning("警告", "请先添加要处理的文件夹") return # 禁用处理按钮 self.root.title("Word PDF 合并工具 - 处理中...") self.progress["value"] = 0 # 在新线程中处理,避免界面冻结 thread = threading.Thread(target=self.process_folders) thread.daemon = True thread.start() # +++ 修改方法:备份时获取校准数据 +++ def backup_data_files(self, folder_path, backup_dir): """递归查找并备份所有.xlsx和.csv文件并返回校准数据""" self.log(f"开始在文件夹中搜索所有Excel和CSV文件: {folder_path}") backup_count = 0 calibration_data = {'j2': None, 'j3': None} # 存储校准数据 for root, dirs, files in os.walk(folder_path): for file in files: if file.lower().endswith(('.xlsx', '.csv')): file_path = os.path.join(root, file) file_name = os.path.basename(file_path) dest_path = os.path.join(backup_dir, file_name) try: os.makedirs(backup_dir, exist_ok=True) shutil.copy2(file_path, dest_path) backup_count += 1 self.log(f"备份成功: {file_path} → {dest_path}") except Exception as e: self.log(f"备份失败 {file_path}: {str(e)}") self.log(f"共找到并备份 {backup_count} 个Excel和CSV文件") def process_folders(self): """处理多个文件夹中的Word文件""" try: # 提前初始化 output_folder if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) self.log(f"开始处理 {len(self.folders)} 个文件夹...") # 获取所有文件夹中的Word文件 word_files = self.get_all_word_files(self.folders) if not word_files: self.log("没有找到任何Word文档") return self.log(f"共找到 {len(word_files)} 个Word文档") self.progress["maximum"] = len(word_files) + 5 # 文件数 + 合并步骤 backup_root = os.path.join(output_folder, "报告存档") # 统一备份根目录 if self.backup_mode.get(): os.makedirs(backup_root, exist_ok=True) # 创建临时目录存储转换后的PDF with tempfile.TemporaryDirectory() as temp_dir: pdf_files_with_header = [] toc_entries = [] all_tables = {} current_page = 1 # 处理每个Word文件 for i, word_file in enumerate(word_files): self.progress["value"] = i + 1 file_name = os.path.splitext(os.path.basename(word_file))[0] display_name = file_name # 修改Word文档逻辑 modified_word_path = word_file if self.point_22_mode.get() or "GSM" in file_name.upper(): # 创建临时副本进行修改 temp_word_path = os.path.join(temp_dir, os.path.basename(word_file)) shutil.copy2(word_file, temp_word_path) if self.modify_word_spec(temp_word_path): modified_word_path = temp_word_path original_pdf = os.path.join(temp_dir, f"{file_name}_original.pdf") pdf_with_header = os.path.join(temp_dir, f"{file_name}_with_header.pdf") if self.backup_mode.get(): try: # 为每个Word文件创建备份目录 dest_dir = os.path.join(backup_root, file_name) os.makedirs(dest_dir, exist_ok=True) # 备份Word文件 word_dest = os.path.join(dest_dir, os.path.basename(modified_word_path)) shutil.copy2(modified_word_path, word_dest) self.log(f"Word文件备份成功: {word_file} → {word_dest}") # +++ 备份数据文件并获取校准数据 +++ folder_path = os.path.dirname(word_file) except OSError as e: self.log(f"文件备份失败: {e}") except Exception as e: self.log(f"未知错误: {e}") # 提取表格数据 tables = self.extract_spec_table(modified_word_path) if tables: all_tables[display_name] = tables self.log(f"已从 {display_name} 中提取 {len(tables)} 个数据表格") # 转换为PDF if self.word_to_pdf(modified_word_path, original_pdf): # 添加内联标题 if self.add_inline_header(original_pdf, display_name, pdf_with_header): pdf_files_with_header.append(pdf_with_header) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(pdf_with_header) else: pdf_files_with_header.append(original_pdf) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(original_pdf) else: self.log(f"跳过 {display_name},转换失败") # 更新进度条 self.progress["value"] = len(word_files) + 1 if not pdf_files_with_header: self.log("没有成功转换的PDF文件,无法进行合并") return # 获取输出路径 if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) # 获取文件名 report_name = self.output_filename.get().strip() if not report_name: report_name = self.get_folder_name_parts(self.folders[0]) # 使用默认规则 output_pdf = os.path.join(output_folder, f"{report_name}.pdf") # 合并PDF success = self.merge_pdfs_with_summary( pdf_files_with_header, toc_entries, all_tables, output_pdf ) self.progress["value"] = len(word_files) + 3 if success: self.log(f"处理完成!输出文件: {output_pdf}") messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}") else: self.log("处理失败") messagebox.showerror("错误", "处理过程中出现错误") self.root.title("Word PDF 合并工具") except Exception as e: self.log(f"处理过程中出现错误: {str(e)}") messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}") self.root.title("Word PDF 合并工具") # 以下是原有的处理函数,保持不变但添加为类方法 def extract_spec_table(self, word_path): """从Word文档中提取SPEC(dB)、Simulation和Pass/Fail数据表格""" try: doc = Document(word_path) tables = [] for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers: table_data = [] table_data.append(headers) for row in table.rows[1:]: row_data = [cell.text.strip() for cell in row.cells] table_data.append(row_data) tables.append(table_data) return tables except Exception as e: self.log(f"提取 {os.path.basename(word_path)} 中的表格时出错: {str(e)}") return [] def modify_word_spec(self, word_path): try: doc = Document(word_path) filename = os.path.basename(word_path).upper() has_gsm = "GSM" in filename # 移动到try块内部 # 确定SPEC基准值 if self.point_22_mode.get(): # 默认22号点位 spec_value = 20 if has_gsm else 18 else: # 2号点位未启用 spec_value = 22 if has_gsm else 20 modified = False # 初始化修改标志 # 遍历文档所有表格 for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] try: spec_index = headers.index("SPEC(dB)") # 定位SPEC列 sim_index = headers.index("Simulation") # 定位Simulation列 pf_index = headers.index("Pass/Fail") # 定位Pass/Fail列 except ValueError: continue # 跳过不含目标列的表 # 标记已找到可修改表格 modified = True # 修改每行数据 for row in table.rows[1:]: cells = row.cells # 更新SPEC值 if spec_index < len(cells): cells[spec_index].text = str(spec_value) # 更新Pass/Fail状态 if sim_index < len(cells) and pf_index < len(cells): try: sim_value = float(cells[sim_index].text) new_status = "PASS" if sim_value < spec_value else "FAIL" cells[pf_index].text = new_status except ValueError: pass # 忽略格式错误 # 保存修改后的文档 if modified: doc.save(word_path) self.log(f"已修改 {os.path.basename(word_path)} 的SPEC值为{spec_value}") return modified except Exception as e: self.log(f"修改 {os.path.basename(word_path)} 失败: {str(e)}") return False def add_inline_header(self, pdf_path, title, output_path): """在PDF的第一页顶部添加一行红色加粗的标题""" try: reader = PdfReader(pdf_path) writer = PdfWriter() if len(reader.pages) > 0: first_page = reader.pages[0] packet = BytesIO() can = canvas.Canvas(packet, pagesize=letter) width, height = letter font_name = "Helvetica-Bold" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) pdfmetrics.registerFont(TTFont('SimSun-Bold', 'simsun.ttc')) font_name = "SimSun-Bold" except: pass can.setFont(font_name, 14) can.setFillColor(red) can.drawString(50, height - 50, title) can.save() packet.seek(0) title_reader = PdfReader(packet) title_page = title_reader.pages[0] first_page.merge_page(title_page) writer.add_page(first_page) for page in reader.pages[1:]: writer.add_page(page) with open(output_path, "wb") as f: writer.write(f) return True return False except Exception as e: self.log(f"PDF添加标题失败: {str(e)}") return False # +++ 修改方法:创建Summary页(核心修改) +++ def create_summary_page(self, toc_entries, all_tables, output_path): """创建包含三列数据的Summary页(无Calibration列)""" try: c = canvas.Canvas(output_path, pagesize=letter) width, height = letter font_name = "Helvetica" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) font_name = "SimSun" except: pass # Summary标题 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, height - 50, "Summary") c.setFillColor(black) y_position = height - 100 # 添加数据汇总表格 if all_tables: c.setFont(font_name, 16) c.drawString(50, y_position, "Data Summary:") y_position -= 30 c.setFont(font_name, 10) table_width = width - 100 for doc_name, tables in all_tables.items(): c.setFont(font_name, 12) c.setFillColor(red) c.drawString(60, y_position, f"Document: {doc_name}") y_position -= 20 c.setFillColor(black) c.setFont(font_name, 10) # 处理每个表格 for table_data in tables: # 确保表格有数据行 if len(table_data) < 2: # 至少包含表头+1行数据 continue # 表头格式(三列) headers = ["SPEC(dB)", "Simulation", "Pass/Fail"] # 提取第一行原始数据(跳过表头) data_row = table_data[1] if len(table_data) > 1 else ["N/A"] * 3 # 确保数据行有足够列 while len(data_row) < 3: data_row.append("N/A") # 创建数据行:三列 new_row = [ data_row[0], # SPEC(dB)值 data_row[1], # Simulation值 data_row[2], # Pass/Fail值 ] # 表格数据:表头+数据行 modified_table = [headers, new_row] # 设置三列等宽布局 col_widths = [table_width / 3] * 3 table = Table(modified_table, colWidths=col_widths) # 设置表格样式 style = TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), white), ('TEXTCOLOR', (0, 0), (-1, 0), black), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), font_name), ('FONTNAME', (0, 1), (-1, -1), font_name), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), white), ('GRID', (0, 0), (-1, -1), 1, black) ]) table.setStyle(style) # 计算表格高度并绘制 table_height = table.wrap(0, 0)[1] if y_position - table_height < 50: c.showPage() y_position = height - 50 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, y_position, "Summary") y_position -= 50 c.setFillColor(black) table.drawOn(c, 50, y_position - table_height) y_position -= (table_height + 20) c.save() return output_path except Exception as e: self.log(f"创建Summary页失败: {str(e)}") return None def word_to_pdf(self, word_path, pdf_path): """将Word文档转换为PDF""" pythoncom.CoInitialize() try: word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(os.path.abspath(word_path)) doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17) doc.Close() word.Quit() self.log(f"已将 {os.path.basename(word_path)} 转换为PDF") return True except Exception as e: self.log(f"转换 {os.path.basename(word_path)} 时出错: {str(e)}") return False finally: pythoncom.CoUninitialize() def get_pdf_page_count(self, pdf_path): """获取PDF文件的页数""" try: reader = PdfReader(pdf_path) return len(reader.pages) except: return 0 def merge_pdfs_with_summary(self, pdf_files, toc_entries, all_tables, output_path): """合并PDF文件并添加Summary页""" try: with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file: summary_path = summary_file.name # 调用修改后的create_summary_page,传入三个参数 self.create_summary_page(toc_entries, all_tables, summary_path) summary_page_count = self.get_pdf_page_count(summary_path) updated_toc_entries = [(title, page_num + summary_page_count) for title, page_num in toc_entries] merger = PdfMerger() merger.append(summary_path) current_page = summary_page_count for pdf, (title, _) in zip(pdf_files, updated_toc_entries): merger.append(pdf) merger.add_outline_item(title, current_page) current_page += self.get_pdf_page_count(pdf) merger.write(output_path) merger.close() os.remove(summary_path) self.log(f"已成功合并 {len(pdf_files)} 个PDF文件") return True except Exception as e: self.log(f"合并PDF时出错: {str(e)}") return False def get_all_word_files(self, folder_paths): """获取所有Word文件""" word_extensions = ['.docx', '.doc'] word_files = [] for folder_path in folder_paths: if not os.path.isdir(folder_path): continue for file in os.listdir(folder_path): file_ext = os.path.splitext(file)[1].lower() if file_ext in word_extensions: word_path = os.path.join(folder_path, file) word_files.append(word_path) return word_files def get_folder_name_parts(self, folder_paths): """生成报告文件名""" if not folder_paths: return "听筒磁干扰仿真报告" folder_path = folder_paths[0] norm_path = os.path.normpath(folder_path) parts = [p for p in norm_path.split(os.sep) if p] if len(parts) >= 3: return f"{parts[-3]}_{parts[-2]}_{parts[-1]}" elif len(parts) == 2: return f"{parts[-2]}_{parts[-1]}" elif len(parts) == 1: return parts[0] return "听筒磁干扰仿真报告" if __name__ == "__main__": root = tk.Tk() app = PDFConverterApp(root) root.mainloop() # 添加这行启动事件循环 为什么这段 def get_folder_name_parts(self, folder_paths): """生成报告文件名""" if not folder_paths: return "听筒磁干扰仿真报告" folder_path = folder_paths[0] norm_path = os.path.normpath(folder_path) parts = [p for p in norm_path.split(os.sep) if p] if len(parts) >= 3: return f"{parts[-3]}_{parts[-2]}_{parts[-1]}" elif len(parts) == 2: return f"{parts[-2]}_{parts[-1]}" elif len(parts) == 1: return parts[0] return "听筒磁干扰仿真报告"代码的命名规则不起作用
最新发布
10-02
修改润色代码,让信息更全面,更符合学术论文标准:import torch import torch.nn as nn import os import pandas as pd import numpy as np from torch.utils.data import Dataset, DataLoader, random_split from sklearn.preprocessing import StandardScaler from statsmodels.tsa.holtwinters import ExponentialSmoothing import torch.optim as optim from pandas.api.types import is_numeric_dtype import matplotlib.pyplot as plt import matplotlib from sklearn.metrics import mean_squared_error, mean_absolute_error # 设置字体 matplotlib.rcParams['font.family'] = 'SimHei' class LSTMModel(nn.Module): def __init__(self, input_size, hidden_size, output_size, num_layers=1): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers # LSTM层 self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) # 添加dropout层,dropout率为0.2 self.dropout = nn.Dropout(0.2) # 全连接层 self.fc = nn.Linear(hidden_size, output_size) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device) out, _ = self.lstm(x, (h0, c0)) # 应用dropout out = self.dropout(out[:, -1, :]) out = self.fc(out) return out class CustomDataset(Dataset): def __init__(self, data_path, transform=None): try: self.data = pd.read_excel(data_path) print("数据加载成功,前5行预览:") print(self.data.head()) except FileNotFoundError: raise FileNotFoundError(f"文件 {data_path} 未找到,请检查文件路径。") # 打印各列数据类型 print("\n各列数据类型:") print(self.data.dtypes) self.transform = transform # 确保目标列存在 if '发病率' not in self.data.columns: raise ValueError("数据中缺少'发病率'列") # 保留原始日期时间列 self.original_dates = self.data['年份'] # 提取年份和月份作为特征 temp_date = self.data['年份'] self.data['年份'] = temp_date.dt.year self.data['月份'] = temp_date.dt.month # 添加季节性特征 self.data['季节'] = (self.data['月份'] % 12 + 3) // 3 # 1=春, 2=夏, 3=秋, 4=冬 # 分离特征和目标 self.Y = self.data['发病率'].values.astype(np.float32) # 选择特征列 self.X = self.data[['年份', '月份', '季节']].values.astype(np.float32) # 数据标准化 self.scaler = StandardScaler() self.X = self.scaler.fit_transform(self.X) print("\n将使用的特征列:", ['年份', '月份', '季节']) def __len__(self): return len(self.data) def __getitem__(self, idx): sample = { 'X': torch.from_numpy(self.X[idx]), 'Y': torch.from_numpy(np.array(self.Y[idx])) } if self.transform: sample = self.transform(sample) return sample def load_data(): # 修改为您的实际数据路径 data_folder = 'E:\\桌面\\大论文\\数据' data_path = os.path.join(data_folder, '预测数据2.0.xlsx') try: print(f"\n正在加载数据: {data_path}") custom_dataset = CustomDataset(data_path) except Exception as e: print("数据加载失败:", str(e)) return None, None, None # 划分训练集和测试集 train_size = int(0.7 * len(custom_dataset)) test_size = len(custom_dataset) - train_size train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size]) # 创建数据加载器 train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) # 打印数据信息 print("\n数据集信息:") print(f"总样本数: {len(custom_dataset)}") print(f"训练集样本数: {len(train_dataset)}") print(f"测试集样本数: {len(test_dataset)}") print(f"特征维度: {custom_dataset.X.shape[1]}") return train_loader, test_loader, custom_dataset def evaluate_model(model, data_loader, criterion, device): model.eval() total_loss = 0 predictions = [] actuals = [] with torch.no_grad(): for batch in data_loader: X_batch = batch['X'].unsqueeze(1).to(device) Y_batch = batch['Y'].unsqueeze(1).to(device) outputs = model(X_batch) loss = criterion(outputs, Y_batch) total_loss += loss.item() # 收集预测值和实际值 predictions.extend(outputs.cpu().numpy().flatten()) actuals.extend(Y_batch.cpu().numpy().flatten()) # 计算评价指标 rmse = np.sqrt(mean_squared_error(actuals, predictions)) mae = mean_absolute_error(actuals, predictions) mape = np.mean(np.abs((np.array(actuals) - np.array(predictions)) / np.array(actuals))) * 100 return total_loss / len(data_loader), rmse, mae, mape, predictions, actuals def predict(model, input_data, scaler, device): model.eval() with torch.no_grad(): input_data = scaler.transform([input_data]) input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).to(device) output = model(input_tensor) return output.item() def predict_future(model, custom_dataset, device, start_year=2025, end_year=2027): # 获取最后一个样本的特征 last_input = custom_dataset.X[-1] scaler = custom_dataset.scaler predictions = [] for year in range(start_year, end_year + 1): for month in range(1, 13): # 创建新的输入数据 new_input = np.array([ year, # 年份 month, # 月份 (month % 12 + 3) // 3 # 季节 (1=春, 2=夏, 3=秋, 4=冬) ], dtype=np.float32) # 进行预测 prediction = predict(model, new_input, scaler, device) predictions.append((year, month, prediction)) return predictions def get_all_predictions(model, custom_dataset, device): """获取所有历史数据的预测值""" model.eval() predictions = [] with torch.no_grad(): for i in range(len(custom_dataset)): input_data = custom_dataset.X[i] input_tensor = torch.tensor(input_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device) output = model(input_tensor) predictions.append(output.item()) return predictions def plot_predictions(predictions, actual_data=None, original_dates=None, fitted_values=None): # 准备绘图数据 pred_dates = [f"{year}-{month:02d}" for year, month, _ in predictions] pred_values = [pred for _, _, pred in predictions] plt.figure(figsize=(20, 8)) # 如果有实际数据,绘制实际值 if actual_data is not None and original_dates is not None: actual_dates = [f"{date.year}-{date.month:02d}" for date in original_dates] actual_values = actual_data['发病率'].values plt.plot(actual_dates, actual_values, label='实际发病率', color='blue', alpha=0.7, marker='o') # 绘制拟合值 if fitted_values is not None and original_dates is not None: fitted_dates = [f"{date.year}-{date.month:02d}" for date in original_dates] plt.plot(fitted_dates, fitted_values, label='模型拟合值', color='green', alpha=0.7, linestyle='--', marker='x') # 绘制预测值 plt.plot(pred_dates, pred_values, label='未来预测值', color='red', marker='o') plt.title('发病率实际值、拟合值和预测值对比') plt.xlabel('日期') plt.ylabel('发病率') plt.xticks(rotation=45) plt.legend() plt.grid(True) plt.tight_layout() plt.show() def main(): # 加载数据 train_loader, test_loader, custom_dataset = load_data() if train_loader is None or test_loader is None or custom_dataset is None: return # 模型参数 input_size = custom_dataset.X.shape[1] hidden_size = 64 # 增加隐藏层大小 output_size = 1 num_layers = 2 learning_rate = 0.001 num_epochs = 200 # 增加训练轮数 # 初始化模型 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"\n使用设备: {device}") model = LSTMModel(input_size, hidden_size, output_size, num_layers).to(device) criterion = nn.MSELoss() # 改用MSE损失函数 optimizer = optim.Adam(model.parameters(), lr=learning_rate) print("\n模型结构:") print(model) # 训练循环 try: print("\n开始训练...") train_losses = [] test_losses = [] test_rmses = [] test_maes = [] test_mapes = [] for epoch in range(num_epochs): model.train() train_loss = 0 for batch in train_loader: X_batch = batch['X'].unsqueeze(1).to(device) # 添加时间步维度 Y_batch = batch['Y'].unsqueeze(1).to(device) optimizer.zero_grad() outputs = model(X_batch) loss = criterion(outputs, Y_batch) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() train_loss += loss.item() # 记录训练损失 avg_train_loss = train_loss / len(train_loader) train_losses.append(avg_train_loss) # 每10个epoch评估一次 if (epoch + 1) % 10 == 0: test_loss, rmse, mae, mape, _, _ = evaluate_model(model, test_loader, criterion, device) test_losses.append(test_loss) test_rmses.append(rmse) test_maes.append(mae) test_mapes.append(mape) print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Test Loss: {test_loss:.4f}') print(f'Test RMSE: {rmse:.4f}, MAE: {mae:.4f}, MAPE: {mape:.2f}%') # 保存模型 model_path = f'lstm_model_epoch{epoch + 1}.pth' torch.save(model.state_dict(), model_path) print(f"模型已保存到: {model_path}") # 绘制损失曲线 plt.figure(figsize=(10, 5)) plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss') plt.plot(range(10, num_epochs + 1, 10), test_losses, label='Test Loss') plt.xlabel('Epoch') plt.ylabel('Loss') plt.title('Training and Test Loss') plt.legend() plt.grid(True) plt.show() # 绘制评价指标曲线 plt.figure(figsize=(15, 5)) plt.subplot(1, 3, 1) plt.plot(range(10, num_epochs + 1, 10), test_rmses, label='RMSE', color='red') plt.xlabel('Epoch') plt.ylabel('RMSE') plt.title('RMSE over Epochs') plt.grid(True) plt.subplot(1, 3, 2) plt.plot(range(10, num_epochs + 1, 10), test_maes, label='MAE', color='green') plt.xlabel('Epoch') plt.ylabel('MAE') plt.title('MAE over Epochs') plt.grid(True) plt.subplot(1, 3, 3) plt.plot(range(10, num_epochs + 1, 10), test_mapes, label='MAPE', color='blue') plt.xlabel('Epoch') plt.ylabel('MAPE (%)') plt.title('MAPE over Epochs') plt.grid(True) plt.tight_layout() plt.show() except KeyboardInterrupt: print("\n训练被中断") finally: print("训练完成") # 加载最后一个保存的模型 model.load_state_dict(torch.load(f'lstm_model_epoch{num_epochs}.pth')) # 获取所有历史数据的拟合值 fitted_values = get_all_predictions(model, custom_dataset, device) # 输出所有年份的拟合值 print("\n所有年份的拟合值:") for i, (date, fitted_value) in enumerate(zip(custom_dataset.original_dates, fitted_values)): print(f"{date.year}-{date.month:02d}: {fitted_value:.4f}") # 计算最终评价指标 final_test_loss, final_rmse, final_mae, final_mape, test_preds, test_actuals = evaluate_model(model, test_loader, criterion, device) print("\n最终模型评价指标:") print(f"Test RMSE: {final_rmse:.4f}") print(f"Test MAE: {final_mae:.4f}") print(f"Test MAPE: {final_mape:.2f}%") # 绘制测试集预测结果 plt.figure(figsize=(15, 6)) plt.plot(test_actuals, label='实际值', color='blue', marker='o') plt.plot(test_preds, label='预测值', color='red', linestyle='--', marker='x') plt.title('测试集实际值与预测值对比') plt.xlabel('样本') plt.ylabel('发病率') plt.legend() plt.grid(True) plt.show() # 进行未来预测 predictions = predict_future(model, custom_dataset, device) # 输出所有年份的预测值 print("\n所有年份的预测值:") for year, month, prediction in predictions: print(f"{year}-{month:02d}: {prediction:.4f}") # 绘制预测结果 plot_predictions(predictions, custom_dataset.data if hasattr(custom_dataset, 'data') else None, custom_dataset.original_dates if hasattr(custom_dataset, 'original_dates') else None, fitted_values) if __name__ == "__main__": main()
07-04
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值