Day_20 File操作、递归复制、序列化

1. File

1.1 概述

java.io.File类:文件和文件目录路径的抽象表示形式,与平台无关

File 能新建、删除、重命名文件和目录,但 File 不能访问文件内容本身。

如果需要访问文件内容本身,则需要使用输入/输出流

想要在Java程序中表示一个真实存在的文件或目录,那么必须有一个File对 象,但是Java程序中的一个File对象,可能没有一个真实存在的文件或目录

File对象可以作为参数传递给流的构造器

1.2 方法

public String getAbsolutePath():获取绝对路径

 public String getName() :获取名称

 public String getParent():获取上层文件目录路径。若无,返回null

 file.getParentFile() : 上级目录对应的文件对象

 public boolean isFile() :判断是否是文件

 public boolean isDirectory():判断是否是文件目录

 public boolean exists() :判断是否存在

 file.createNewFile(): 创建文件

 file.delete() : 删除文件

 file.listFiles() : 获取所有子文件对象

  file.mkdirs() : 创建目录

 2. 递归复制

1 复制 : 就是输入和输出结合使用

2 获取文件夹下所有子文件

          如果子文件是文件,则复制

          如果子文件是目录则再此获取该目录的所有子文件,做相同操作

注意 : 复制的时候,源目录和目标目录不能一致

判断是否是文件,是文件则进行如下操作

是目录,则进行如下操作

 

3. 对象流 

3.1 概述

创建对象的方式 :

1 new 用的最多

2 反射机制 可以通过一个字符串来创建对应的对象

3 clone Object中的方法,已经废弃,被序列化代替

4 序列化

序列化 : 把堆内存的java对象,持久化保存在本地硬盘当中

反序列化 : 把硬盘当中的序列化文件,反序列化为堆内存对象

优点 :  可以长期保存   更利于数据传输

要序列化的类,必须实现Serializable接口

应用场景 :序列化是将数据转换为二进制流进行长期保存,如果不进行序列化 是不能进行长期存储和网络传递的

网络传输流程 : 数据对象 --> 序列化 --> 二进制流 --> 加密处理 --> 网络传输 --> 解密处理 --> 二进制流 --> 反序列化 --> 数据对象

3.2 序列化

3.3 反序列化 

 

import tkinter as tk from tkinter import ttk, filedialog, messagebox import pandas as pd import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Dense, Lambda from tensorflow.keras.optimizers import Adam from sklearn.preprocessing import MinMaxScaler import os import time import warnings import matplotlib.dates as mdates warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow') mpl.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS'] mpl.rcParams['axes.unicode_minus'] = False plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False class PINNModel(tf.keras.Model): def __init__(self, num_layers=4, hidden_units=32, dropout_rate=0.1, **kwargs): super(PINNModel, self).__init__(**kwargs) self.dense_layers = [] self.dropout_layers = [] # 创建隐藏层和对应的Dropout层 for _ in range(num_layers): self.dense_layers.append(Dense(hidden_units, activation='tanh')) self.dropout_layers.append(tf.keras.layers.Dropout(dropout_rate)) self.final_layer = Dense(1, activation='linear') # 物理参数 self.k1_raw = tf.Variable(0.1, trainable=True, dtype=tf.float32, name='k1_raw') self.k1 = tf.math.sigmoid(self.k1_raw) * 0.5 self.k2_raw = tf.Variable(0.01, trainable=True, dtype=tf.float32, name='k2_raw') self.k2 = tf.math.sigmoid(self.k2_raw) * 0.1 self.alpha_raw = tf.Variable(0.1, trainable=True, dtype=tf.float32, name='alpha_raw') self.alpha = tf.math.sigmoid(self.alpha_raw) * 1.0 self.beta_raw = tf.Variable(0.05, trainable=True, dtype=tf.float32, name='beta_raw') self.beta = tf.math.sigmoid(self.beta_raw) * 0.2 def call(self, inputs, training=False): # 输入特征重构 year, month_sin, month_cos, day_sin, day_cos, h, dt_norm, log_dt_norm = inputs # 特征组合 x = tf.concat([ year, month_sin, month_cos, day_sin, day_cos, dt_norm, log_dt_norm, h * dt_norm, h * log_dt_norm ], axis=1) # 通过神经网络层 for dense_layer, dropout_layer in zip(self.dense_layers, self.dropout_layers): x = dense_layer(x) x = dropout_layer(x, training=training) return self.final_layer(x) def physics_loss(self, inputs, h_next_pred_scaled, scaler_h, scaler_dt, training=False): """在原始空间计算物理损失""" # 输入解包 year, month_sin, month_cos, day_sin, day_cos, h_current_scaled, dt_norm, log_dt_norm = inputs # 反归一化当前水位 h_current_raw = scaler_h.inverse_transform(h_current_scaled) # 反归一化时间步长 dt_raw = scaler_dt.inverse_transform(dt_norm) # 反归一化预测值 h_next_pred_raw = scaler_h.inverse_transform(h_next_pred_scaled) # 物理参数 k1 = tf.math.sigmoid(self.k1_raw) * 0.5 k2 = tf.math.sigmoid(self.k2_raw) * 0.1 alpha = tf.math.sigmoid(self.alpha_raw) * 1.0 beta = tf.math.sigmoid(self.beta_raw) * 0.2 # 物理方程计算 exponent = - (k1 + k2 * h_current_raw) * dt_raw exponent = tf.clip_by_value(exponent, -50.0, 50.0) decay_term = h_current_raw * tf.exp(exponent) beta_exp = -beta * dt_raw beta_exp = tf.clip_by_value(beta_exp, -50.0, 50.0) external_term = alpha * (1 - tf.exp(beta_exp)) residual = h_next_pred_raw - (decay_term + external_term) return tf.reduce_mean(tf.square(residual)) class DamSeepageModel: def __init__(self, root): self.root = root self.root.title("大坝渗流预测模型(PINNs)") self.root.geometry("1200x800") # 初始化数据和归一化器 self.train_df = None self.test_df = None self.model = None self.evaluation_metrics = {} # 归一化器 self.scaler_year = MinMaxScaler(feature_range=(0, 1)) self.scaler_month = MinMaxScaler(feature_range=(0, 1)) self.scaler_day = MinMaxScaler(feature_range=(0, 1)) self.scaler_dt = MinMaxScaler(feature_range=(0, 1)) self.scaler_log_dt = MinMaxScaler(feature_range=(0, 1)) self.scaler_h = MinMaxScaler(feature_range=(0, 1)) # 创建主界面 self.create_widgets() def create_widgets(self): # 创建主框架 main_frame = ttk.Frame(self.root, padding=10) main_frame.pack(fill=tk.BOTH, expand=True) # 左侧控制面板 control_frame = ttk.LabelFrame(main_frame, text="模型控制", padding=10) control_frame.pack(side=tk.LEFT, fill=tk.Y, padx=5, pady=5) # 文件选择部分 file_frame = ttk.LabelFrame(control_frame, text="数据文件", padding=10) file_frame.pack(fill=tk.X, pady=5) # 训练集选择 ttk.Label(file_frame, text="训练集:").grid(row=0, column=0, sticky=tk.W, pady=5) self.train_file_var = tk.StringVar() ttk.Entry(file_frame, textvariable=self.train_file_var, width=30, state='readonly').grid( row=0, column=1, padx=5) ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("train")).grid(row=0, column=2) # 测试集选择 ttk.Label(file_frame, text="测试集:").grid(row=1, column=0, sticky=tk.W, pady=5) self.test_file_var = tk.StringVar() ttk.Entry(file_frame, textvariable=self.test_file_var, width=30, state='readonly').grid(row=1, column=1, padx=5) ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("test")).grid(row=1, column=2) # PINNs参数设置 param_frame = ttk.LabelFrame(control_frame, text="PINNs参数", padding=10) param_frame.pack(fill=tk.X, pady=10) # 验证集切分比例 ttk.Label(param_frame, text="验证集比例:").grid(row=0, column=0, sticky=tk.W, pady=5) self.split_ratio_var = tk.DoubleVar(value=0.2) ttk.Spinbox(param_frame, from_=0, to=1, increment=0.05, textvariable=self.split_ratio_var, width=10).grid(row=0, column=1, padx=5) # 隐藏层数量 ttk.Label(param_frame, text="网络层数:").grid(row=1, column=0, sticky=tk.W, pady=5) self.num_layers_var = tk.IntVar(value=4) ttk.Spinbox(param_frame, from_=2, to=8, increment=1, textvariable=self.num_layers_var, width=10).grid(row=1, column=1, padx=5) # 每层神经元数量 ttk.Label(param_frame, text="神经元数/层:").grid(row=2, column=0, sticky=tk.W, pady=5) self.hidden_units_var = tk.IntVar(value=32) ttk.Spinbox(param_frame, from_=16, to=128, increment=4, textvariable=self.hidden_units_var, width=10).grid(row=2, column=1, padx=5) # 训练轮次 ttk.Label(param_frame, text="训练轮次:").grid(row=3, column=0, sticky=tk.W, pady=5) self.epochs_var = tk.IntVar(value=500) ttk.Spinbox(param_frame, from_=100, to=2000, increment=100, textvariable=self.epochs_var, width=10).grid(row=3, column=1, padx=5) # 物理损失权重 ttk.Label(param_frame, text="物理损失权重:").grid(row=4, column=0, sticky=tk.W, pady=5) self.physics_weight_var = tk.DoubleVar(value=0.5) ttk.Spinbox(param_frame, from_=0.1, to=1.0, increment=0.1, textvariable=self.physics_weight_var, width=10).grid(row=4, column=1, padx=5) # 控制按钮 btn_frame = ttk.Frame(control_frame) btn_frame.pack(fill=tk.X, pady=10) ttk.Button(btn_frame, text="训练模型", command=self.train_model).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="预测结果", command=self.predict).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="保存结果", command=self.save_results).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="重置", command=self.reset).pack(side=tk.RIGHT, padx=5) # 状态栏 self.status_var = tk.StringVar(value="就绪") status_bar = ttk.Label(control_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W) status_bar.pack(fill=tk.X, side=tk.BOTTOM) # 右侧结果显示区域 result_frame = ttk.Frame(main_frame) result_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=5, pady=5) # 创建标签页 self.notebook = ttk.Notebook(result_frame) self.notebook.pack(fill=tk.BOTH, expand=True) # 损失曲线标签页 self.loss_frame = ttk.Frame(self.notebook) self.notebook.add(self.loss_frame, text="训练损失") # 在预测结果标签页 self.prediction_frame = ttk.Frame(self.notebook) self.notebook.add(self.prediction_frame, text="预测结果") # 指标显示 self.metrics_var = tk.StringVar() metrics_label = ttk.Label( self.prediction_frame, textvariable=self.metrics_var, font=('TkDefaultFont', 10, 'bold'), relief='ridge', padding=5 ) metrics_label.pack(fill=tk.X, padx=5, pady=5) # 创建图表容器Frame chart_frame = ttk.Frame(self.prediction_frame) chart_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # 初始化绘图区域 self.fig, self.ax = plt.subplots(figsize=(10, 6)) self.canvas = FigureCanvasTkAgg(self.fig, master=chart_frame) self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) # 添加Matplotlib工具栏(缩放、平移等) from matplotlib.backends.backend_tkagg import NavigationToolbar2Tk self.toolbar = NavigationToolbar2Tk(self.canvas, chart_frame) self.toolbar.update() self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) # 损失曲线画布(同样添加工具栏) loss_chart_frame = ttk.Frame(self.loss_frame) loss_chart_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) self.loss_fig, self.loss_ax = plt.subplots(figsize=(10, 4)) self.loss_canvas = FigureCanvasTkAgg(self.loss_fig, master=loss_chart_frame) self.loss_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) self.loss_toolbar = NavigationToolbar2Tk(self.loss_canvas, loss_chart_frame) self.loss_toolbar.update() self.loss_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) def preprocess_data(self, df, is_training=False): """增强的时间特征处理""" # 创建时间戳 if 'datetime' not in df.columns: time_cols = ['year', 'month', 'day'] for col in ['hour', 'minute', 'second']: if col not in df.columns: df[col] = 0 df['datetime'] = pd.to_datetime(df[time_cols]) df = df.set_index('datetime') # 计算时间步长 if 'dt' not in df.columns: df['dt'] = df.index.to_series().diff().dt.total_seconds() / 86400 df['dt'] = df['dt'].fillna(df['dt'].mean()) # 处理异常时间步长 dt_mean = df['dt'].mean() df.loc[df['dt'] <= 0, 'dt'] = dt_mean df.loc[df['dt'] > 30, 'dt'] = dt_mean # 添加对数变换 df['log_dt'] = np.log1p(df['dt']) # 周期性时间特征 df['year_norm'] = df['year'] df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12) df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12) df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31) df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31) # 归一化处理 if is_training: self.scaler_year.fit(df[['year_norm']]) self.scaler_month.fit(df[['month_sin', 'month_cos']]) self.scaler_day.fit(df[['day_sin', 'day_cos']]) self.scaler_dt.fit(df[['dt']]) self.scaler_log_dt.fit(df[['log_dt']]) self.scaler_h.fit(df[['水位']]) # 应用归一化 df[['year_norm']] = self.scaler_year.transform(df[['year_norm']]) df[['month_sin', 'month_cos']] = self.scaler_month.transform(df[['month_sin', 'month_cos']]) df[['day_sin', 'day_cos']] = self.scaler_day.transform(df[['day_sin', 'day_cos']]) df[['dt_norm']] = self.scaler_dt.transform(df[['dt']]) df[['log_dt_norm']] = self.scaler_log_dt.transform(df[['log_dt']]) df[['水位_norm']] = self.scaler_h.transform(df[['水位']]) return df def select_file(self, file_type): """选择Excel文件并应用预处理""" try: file_path = filedialog.askopenfilename( title=f"选择{file_type}集Excel文件", filetypes=[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")] ) if not file_path: return df = pd.read_excel(file_path) # 验证必需列 required_cols = ['year', 'month', 'day', '水位'] missing_cols = [col for col in required_cols if col not in df.columns] if missing_cols: messagebox.showerror("列名错误", f"缺少必需列: {', '.join(missing_cols)}") return # 应用预处理 is_training = (file_type == "train") df = self.preprocess_data(df, is_training=is_training) # 保存数据 if file_type == "train": self.train_df = df self.train_file_var.set(os.path.basename(file_path)) self.status_var.set(f"已加载训练集: {len(self.train_df)}条数据") else: self.test_df = df self.test_file_var.set(os.path.basename(file_path)) self.status_var.set(f"已加载测试集: {len(self.test_df)}条数据") except Exception as e: error_msg = f"文件读取失败: {str(e)}\n\n请确保:\n1. 文件不是打开状态\n2. 文件格式正确\n3. 包含必需的时间和水位列" messagebox.showerror("文件错误", error_msg) # ... [calculate_metrics 方法保持不变] ... def train_model(self): """训练PINNs模型(带早停机制+训练指标监控)""" if self.train_df is None: messagebox.showwarning("警告", "请先选择训练集文件") return try: self.status_var.set("正在预处理数据...") self.root.update() # 从训练集中切分训练子集和验证子集 split_ratio = 1 - self.split_ratio_var.get() split_idx = int(len(self.train_df) * split_ratio) train_subset = self.train_df.iloc[:split_idx] valid_subset = self.train_df.iloc[split_idx:] # 检查数据量是否足够 if len(train_subset) < 2 or len(valid_subset) < 2: messagebox.showerror("数据错误", "训练集数据量不足(至少需要2个时间步)") return # 准备训练数据 train_inputs = [ train_subset['year_norm'].values[1:].reshape(-1, 1).astype(np.float32), train_subset['month_sin'].values[1:].reshape(-1, 1).astype(np.float32), train_subset['month_cos'].values[1:].reshape(-1, 1).astype(np.float32), train_subset['day_sin'].values[1:].reshape(-1, 1).astype(np.float32), train_subset['day_cos'].values[1:].reshape(-1, 1).astype(np.float32), train_subset['水位_norm'].values[:-1].reshape(-1, 1).astype(np.float32), train_subset['dt_norm'].values[1:].reshape(-1, 1).astype(np.float32), train_subset['log_dt_norm'].values[1:].reshape(-1, 1).astype(np.float32) ] h_next_train_scaled = train_subset['水位_norm'].values[1:].reshape(-1, 1).astype(np.float32) h_next_train_true = train_subset['水位'].values[1:].reshape(-1, 1) # 准备验证数据 valid_inputs = [ valid_subset['year_norm'].values[1:].reshape(-1, 1).astype(np.float32), valid_subset['month_sin'].values[1:].reshape(-1, 1).astype(np.float32), valid_subset['month_cos'].values[1:].reshape(-1, 1).astype(np.float32), valid_subset['day_sin'].values[1:].reshape(-1, 1).astype(np.float32), valid_subset['day_cos'].values[1:].reshape(-1, 1).astype(np.float32), valid_subset['水位_norm'].values[:-1].reshape(-1, 1).astype(np.float32), valid_subset['dt_norm'].values[1:].reshape(-1, 1).astype(np.float32), valid_subset['log_dt_norm'].values[1:].reshape(-1, 1).astype(np.float32) ] h_next_valid_scaled = valid_subset['水位_norm'].values[1:].reshape(-1, 1).astype(np.float32) h_next_valid_true = valid_subset['水位'].values[1:].reshape(-1, 1) # 创建模型和优化器 self.model = PINNModel( num_layers=self.num_layers_var.get(), hidden_units=self.hidden_units_var.get() ) # 创建学习率调度器 initial_lr = 0.001 lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=initial_lr, decay_steps=100, decay_rate=0.95, staircase=True ) optimizer = Adam(learning_rate=lr_schedule) # 创建数据集 def create_dataset(inputs, targets): dataset = tf.data.Dataset.from_tensor_slices(( tuple(inputs), targets )) return dataset.shuffle(buffer_size=1024).batch(32) train_dataset = create_dataset(train_inputs, h_next_train_scaled) valid_dataset = create_dataset(valid_inputs, h_next_valid_scaled) # 初始化训练历史记录 train_data_loss_history = [] physics_loss_history = [] valid_data_loss_history = [] train_metrics_history = [] valid_metrics_history = [] # 早停机制参数 patience = int(self.epochs_var.get() / 3) min_delta = 1e-4 best_valid_loss = float('inf') wait = 0 best_epoch = 0 best_weights = None start_time = time.time() # 自定义训练循环 for epoch in range(self.epochs_var.get()): # 获取当前学习率 current_lr = optimizer.learning_rate.numpy() # 计算自适应物理损失权重 current_epoch_ratio = epoch / self.epochs_var.get() physics_weight = self.physics_weight_var.get() * (1 - current_epoch_ratio * 0.5) # 训练阶段 epoch_train_data_loss = [] epoch_physics_loss = [] train_pred_scaled = [] for step, (inputs, h_next_batch) in enumerate(train_dataset): with tf.GradientTape() as tape: # 预测下一时刻水位 h_pred = self.model(inputs, training=True) data_loss = tf.reduce_mean(tf.square(h_next_batch - h_pred)) # 计算物理损失 physics_loss = self.model.physics_loss( inputs, h_pred, self.scaler_h, self.scaler_dt, training=True ) # 总损失 loss = data_loss + physics_weight * physics_loss grads = tape.gradient(loss, self.model.trainable_variables) optimizer.apply_gradients(zip(grads, self.model.trainable_variables)) epoch_train_data_loss.append(data_loss.numpy()) epoch_physics_loss.append(physics_loss.numpy()) train_pred_scaled.append(h_pred.numpy()) # 合并训练预测值 train_pred_scaled = np.concatenate(train_pred_scaled, axis=0) train_pred_true = self.scaler_h.inverse_transform(train_pred_scaled) train_metrics = self.calculate_metrics( h_next_train_true.flatten(), train_pred_true.flatten() ) train_metrics_history.append(train_metrics) # 验证阶段 epoch_valid_data_loss = [] valid_pred_scaled = [] for (inputs, h_next_batch) in valid_dataset: h_pred = self.model(inputs, training=False) valid_data_loss = tf.reduce_mean(tf.square(h_next_batch - h_pred)) epoch_valid_data_loss.append(valid_data_loss.numpy()) valid_pred_scaled.append(h_pred.numpy()) # 合并验证预测值 valid_pred_scaled = np.concatenate(valid_pred_scaled, axis=0) valid_pred_true = self.scaler_h.inverse_transform(valid_pred_scaled) valid_metrics = self.calculate_metrics( h_next_valid_true.flatten(), valid_pred_true.flatten() ) valid_metrics_history.append(valid_metrics) # 计算平均损失 avg_train_data_loss = np.mean(epoch_train_data_loss) avg_physics_loss = np.mean(epoch_physics_loss) avg_valid_data_loss = np.mean(epoch_valid_data_loss) # 记录损失 train_data_loss_history.append(avg_train_data_loss) physics_loss_history.append(avg_physics_loss) valid_data_loss_history.append(avg_valid_data_loss) # 早停机制逻辑 current_valid_loss = avg_valid_data_loss if current_valid_loss < best_valid_loss - min_delta: best_valid_loss = current_valid_loss best_epoch = epoch + 1 wait = 0 best_weights = self.model.get_weights() else: wait += 1 if wait >= patience: self.status_var.set(f"触发早停!最佳轮次: {best_epoch},最佳验证损失: {best_valid_loss:.4f}") if best_weights is not None: self.model.set_weights(best_weights) break # 更新状态 if epoch % 1 == 0: train_rmse = train_metrics['RMSE'] valid_rmse = valid_metrics['RMSE'] train_r2 = train_metrics['R2'] valid_r2 = valid_metrics['R2'] elapsed = time.time() - start_time self.status_var.set( f"训练中 | 轮次: {epoch + 1}/{self.epochs_var.get()} | " f"学习率: {current_lr:.6f} | " f"训练RMSE: {train_rmse:.4f} | 验证RMSE: {valid_rmse:.4f} | " f"训练R²: {train_r2:.4f} | 验证R²: {valid_r2:.4f} | " f"k1: {self.model.k1.numpy():.6f}, k2: {self.model.k2.numpy():.6f} | " f"时间: {elapsed:.1f}秒 | 早停等待: {wait}/{patience}" ) self.root.update() # 绘制损失曲线 self.loss_ax.clear() epochs_range = range(1, len(train_data_loss_history) + 1) self.loss_ax.plot(epochs_range, train_data_loss_history, 'b-', label='训练数据损失') self.loss_ax.plot(epochs_range, physics_loss_history, 'r--', label='物理损失') self.loss_ax.plot(epochs_range, valid_data_loss_history, 'g-.', label='验证数据损失') self.loss_ax.set_title('PINNs训练与验证损失') self.loss_ax.set_xlabel('轮次') self.loss_ax.set_ylabel('损失', rotation=0) self.loss_ax.legend() self.loss_ax.grid(True, alpha=0.3) self.loss_ax.set_yscale('log') self.loss_canvas.draw() # 训练完成提示 elapsed = time.time() - start_time if wait >= patience: completion_msg = ( f"早停触发 | 最佳轮次: {best_epoch} | 最佳验证损失: {best_valid_loss:.4f} | " f"最佳验证RMSE: {valid_metrics_history[best_epoch - 1]['RMSE']:.4f} | " f"总时间: {elapsed:.1f}秒" ) else: completion_msg = ( f"训练完成 | 总轮次: {self.epochs_var.get()} | " f"最终训练RMSE: {train_metrics_history[-1]['RMSE']:.4f} | " f"最终验证RMSE: {valid_metrics_history[-1]['RMSE']:.4f} | " f"最终训练R²: {train_metrics_history[-1]['R2']:.4f} | " f"最终验证R²: {valid_metrics_history[-1]['R2']:.4f} | " f"总时间: {elapsed:.1f}秒" ) # 保存训练历史 self.train_history = { 'train_data_loss': train_data_loss_history, 'physics_loss': physics_loss_history, 'valid_data_loss': valid_data_loss_history, 'train_metrics': train_metrics_history, 'valid_metrics': valid_metrics_history } # 保存学习到的物理参数 self.learned_params = { "k1": self.model.k1.numpy(), "k2": self.model.k2.numpy(), "alpha": self.model.alpha.numpy(), "beta": self.model.beta.numpy() } self.status_var.set(completion_msg) messagebox.showinfo("训练完成", f"PINNs模型训练成功完成!\n{completion_msg}") except Exception as e: messagebox.showerror("训练错误", f"模型训练失败:\n{str(e)}") self.status_var.set("训练失败") import traceback traceback.print_exc() def predict(self): """使用PINNs模型进行递归预测(带Teacher Forcing和蒙特卡洛Dropout)""" if self.model is None: messagebox.showwarning("警告", "请先训练模型") return if self.test_df is None: messagebox.showwarning("警告", "请先选择测试集文件") return try: self.status_var.set("正在生成预测(使用Teacher Forcing和MC Dropout)...") self.root.update() # 准备测试数据 t_test = self.test_df[['year_norm', 'month_sin', 'month_cos', 'day_sin', 'day_cos']].values h_test_scaled = self.test_df['水位_norm'].values.reshape(-1, 1) dt_test = self.test_df[['dt_norm', 'log_dt_norm']].values actual_values = self.test_df['水位'].values.reshape(-1, 1) test_time = self.test_df.index # 改进的递归预测参数 n = len(t_test) mc_iterations = 100 adaptive_forcing = True # 存储蒙特卡洛采样结果 mc_predictions_scaled = np.zeros((mc_iterations, n, 1), dtype=np.float32) # 进行多次蒙特卡洛采样 for mc_iter in range(mc_iterations): predicted_scaled = np.zeros((n, 1), dtype=np.float32) predicted_scaled[0] = h_test_scaled[0] # 第一个点使用真实值 # 递归预测 for i in range(1, n): # 自适应教师强制 if adaptive_forcing: teacher_forcing_prob = 0.7 + 0.2 * min(1.0, i / (0.7 * n)) else: teacher_forcing_prob = 0.7 # 决定使用真实值还是预测值 use_actual = np.random.rand() < teacher_forcing_prob if use_actual and i < n - 1: h_prev = h_test_scaled[i - 1:i] else: h_prev = predicted_scaled[i - 1:i] # 准备输入 inputs = [ t_test[i:i + 1, 0:1], # year_norm t_test[i:i + 1, 1:2], # month_sin t_test[i:i + 1, 2:3], # month_cos t_test[i:i + 1, 3:4], # day_sin t_test[i:i + 1, 4:5], # day_cos h_prev, dt_test[i:i + 1, 0:1], # dt_norm dt_test[i:i + 1, 1:2] # log_dt_norm ] # 预测 h_pred = self.model(inputs, training=True).numpy() # 物理模型预测值 k1 = self.learned_params['k1'] k2 = self.learned_params['k2'] alpha = self.learned_params['alpha'] beta = self.learned_params['beta'] # 反归一化当前水位 h_prev_raw = self.scaler_h.inverse_transform(h_prev) dt_i = self.scaler_dt.inverse_transform(dt_test[i:i + 1, 0:1]) # 物理方程预测 exponent = - (k1 + k2 * h_prev_raw) * dt_i decay_term = h_prev_raw * np.exp(exponent) external_term = alpha * (1 - np.exp(-beta * dt_i)) physics_pred = decay_term + external_term # 反归一化神经网络预测 nn_pred_raw = self.scaler_h.inverse_transform(h_pred) # 混合预测 physics_weight = 0.3 final_pred_raw = physics_weight * physics_pred + (1 - physics_weight) * nn_pred_raw final_pred_scaled = self.scaler_h.transform(final_pred_raw) predicted_scaled[i] = final_pred_scaled mc_predictions_scaled[mc_iter] = predicted_scaled # 计算预测统计量 mean_pred_scaled = np.mean(mc_predictions_scaled, axis=0) std_pred_scaled = np.std(mc_predictions_scaled, axis=0) # 反归一化结果 predictions = self.scaler_h.inverse_transform(mean_pred_scaled) uncertainty = self.scaler_h.inverse_transform(std_pred_scaled) * 1.96 # 95%置信区间 # 绘制结果 self.plot_results(test_time, actual_values, predictions, uncertainty) # 计算评估指标(排除第一个点) eval_actual = actual_values[1:].flatten() eval_pred = predictions[1:].flatten() self.evaluation_metrics = self.calculate_metrics(eval_actual, eval_pred) # 添加不确定性指标 avg_uncertainty = np.mean(uncertainty) max_uncertainty = np.max(uncertainty) self.evaluation_metrics['Avg Uncertainty'] = avg_uncertainty self.evaluation_metrics['Max Uncertainty'] = max_uncertainty # 显示指标 self.display_metrics(teacher_forcing_prob) # 保存预测结果 self.predictions = predictions self.uncertainty = uncertainty self.actual_values = actual_values self.test_time = test_time self.mc_predictions = mc_predictions_scaled self.status_var.set(f"预测完成(MC Dropout采样{mc_iterations}次)") except Exception as e: messagebox.showerror("预测错误", f"预测失败:\n{str(e)}") self.status_var.set("预测失败") import traceback traceback.print_exc() def plot_results(self, test_time, actual_values, predictions, uncertainty): """绘制预测结果""" self.ax.clear() # 计算y轴范围 median_val = np.median(actual_values) data_range = np.std(actual_values) * 4 y_half_range = max(data_range, 10) y_min_adjusted = median_val - y_half_range y_max_adjusted = median_val + y_half_range if y_max_adjusted - y_min_adjusted < 1: y_min_adjusted -= 5 y_max_adjusted += 5 # 绘制结果 self.ax.plot(test_time, actual_values, 'b-', label='真实值', linewidth=2) self.ax.plot(test_time, predictions, 'r--', label='预测均值', linewidth=2) self.ax.fill_between( test_time, (predictions - uncertainty).flatten(), (predictions + uncertainty).flatten(), color='orange', alpha=0.3, label='95%置信区间' ) # 设置坐标轴 self.ax.set_ylim(y_min_adjusted, y_max_adjusted) self.ax.set_title('大坝渗流水位预测(PINNs with MC Dropout)') self.ax.set_xlabel('时间') self.ax.set_ylabel('测压管水位', rotation=0) self.ax.legend(loc='best') # 优化时间轴 self.ax.xaxis.set_major_locator(mdates.YearLocator()) self.ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y')) self.ax.xaxis.set_minor_locator(mdates.MonthLocator(interval=2)) self.ax.grid(which='minor', axis='x', linestyle=':', color='gray', alpha=0.3) self.ax.grid(which='major', axis='y', linestyle='-', color='lightgray', alpha=0.5) # 更新画布 self.canvas.draw() def display_metrics(self, teacher_forcing_prob): """显示评估指标""" metrics_text = ( f"MSE: {self.evaluation_metrics['MSE']:.4f} | " f"RMSE: {self.evaluation_metrics['RMSE']:.4f} | " f"MAE: {self.evaluation_metrics['MAE']:.4f} | " f"MAPE: {self.evaluation_metrics['MAPE']:.2f}% | " f"R²: {self.evaluation_metrics['R2']:.4f}\n" f"平均不确定性: {self.evaluation_metrics['Avg Uncertainty']:.4f} | " f"最大不确定性: {self.evaluation_metrics['Max Uncertainty']:.4f}" ) self.metrics_var.set(metrics_text) # 在图表上添加指标 self.ax.text( 0.5, 1.05, metrics_text, transform=self.ax.transAxes, ha='center', fontsize=8, bbox=dict(facecolor='white', alpha=0.8) ) params_text = ( f"物理参数: k1={self.learned_params['k1']:.4f}, " f"k2={self.learned_params['k2']:.4f}, " f"alpha={self.learned_params['alpha']:.4f}, " f"beta={self.learned_params['beta']:.4f} | " f"Teacher Forcing概率: {teacher_forcing_prob:.2f}" ) self.ax.text( 0.5, 1.12, params_text, transform=self.ax.transAxes, ha='center', fontsize=8, bbox=dict(facecolor='white', alpha=0.8) ) # 调整布局 plt.tight_layout(pad=2.0) # ... [save_results 和 reset 方法保持不变] ... if __name__ == "__main__": root = tk.Tk() app = DamSeepageModel(root) root.mainloop() 检查错误
08-01
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值