import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
import os
import time
import warnings
import matplotlib.dates as mdates
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
mpl.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'Arial Unicode MS']
mpl.rcParams['axes.unicode_minus'] = False
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
class PINNModel(tf.keras.Model):
def __init__(self, num_layers=4, hidden_units=32, dropout_rate=0.1, **kwargs):
super(PINNModel, self).__init__(**kwargs)
self.dense_layers = []
self.dropout_layers = []
# 创建隐藏层和对应的Dropout层
for _ in range(num_layers):
self.dense_layers.append(Dense(hidden_units, activation='tanh'))
self.dropout_layers.append(tf.keras.layers.Dropout(dropout_rate))
self.final_layer = Dense(1, activation='linear')
# 物理参数
self.k1_raw = tf.Variable(0.1, trainable=True, dtype=tf.float32, name='k1_raw')
self.k1 = tf.math.sigmoid(self.k1_raw) * 0.5
self.k2_raw = tf.Variable(0.01, trainable=True, dtype=tf.float32, name='k2_raw')
self.k2 = tf.math.sigmoid(self.k2_raw) * 0.1
self.alpha_raw = tf.Variable(0.1, trainable=True, dtype=tf.float32, name='alpha_raw')
self.alpha = tf.math.sigmoid(self.alpha_raw) * 1.0
self.beta_raw = tf.Variable(0.05, trainable=True, dtype=tf.float32, name='beta_raw')
self.beta = tf.math.sigmoid(self.beta_raw) * 0.2
def call(self, inputs, training=False):
# 输入特征重构
year, month_sin, month_cos, day_sin, day_cos, h, dt_norm, log_dt_norm = inputs
# 特征组合
x = tf.concat([
year,
month_sin, month_cos,
day_sin, day_cos,
dt_norm,
log_dt_norm,
h * dt_norm,
h * log_dt_norm
], axis=1)
# 通过神经网络层
for dense_layer, dropout_layer in zip(self.dense_layers, self.dropout_layers):
x = dense_layer(x)
x = dropout_layer(x, training=training)
return self.final_layer(x)
def physics_loss(self, inputs, h_next_pred_scaled, scaler_h, scaler_dt, training=False):
"""在原始空间计算物理损失"""
# 输入解包
year, month_sin, month_cos, day_sin, day_cos, h_current_scaled, dt_norm, log_dt_norm = inputs
# 转换为numpy数组(如果张量)
if tf.is_tensor(h_current_scaled):
h_current_scaled = h_current_scaled.numpy()
if tf.is_tensor(dt_norm):
dt_norm = dt_norm.numpy()
if tf.is_tensor(h_next_pred_scaled):
h_next_pred_scaled = h_next_pred_scaled.numpy()
# 反归一化当前水位
h_current_raw = scaler_h.inverse_transform(h_current_scaled)
# 反归一化时间步长
dt_raw = scaler_dt.inverse_transform(dt_norm)
# 反归一化预测值
h_next_pred_raw = scaler_h.inverse_transform(h_next_pred_scaled)
# 物理参数
k1 = tf.math.sigmoid(self.k1_raw) * 0.5
k2 = tf.math.sigmoid(self.k2_raw) * 0.1
alpha = tf.math.sigmoid(self.alpha_raw) * 1.0
beta = tf.math.sigmoid(self.beta_raw) * 0.2
# 物理方程计算
exponent = - (k1 + k2 * h_current_raw) * dt_raw
exponent = tf.clip_by_value(exponent, -50.0, 50.0)
decay_term = h_current_raw * tf.exp(exponent)
beta_exp = -beta * dt_raw
beta_exp = tf.clip_by_value(beta_exp, -50.0, 50.0)
external_term = alpha * (1 - tf.exp(beta_exp))
residual = h_next_pred_raw - (decay_term + external_term)
return tf.reduce_mean(tf.square(residual))
class DamSeepageModel:
def __init__(self, root):
self.root = root
self.root.title("大坝渗流预测模型(PINNs)")
self.root.geometry("1200x800")
# 初始化数据和归一化器
self.train_df = None
self.test_df = None
self.model = None
self.evaluation_metrics = {}
# 归一化器
self.scaler_year = MinMaxScaler(feature_range=(0, 1))
self.scaler_month = MinMaxScaler(feature_range=(0, 1))
self.scaler_day = MinMaxScaler(feature_range=(0, 1))
self.scaler_dt = MinMaxScaler(feature_range=(0, 1))
self.scaler_log_dt = MinMaxScaler(feature_range=(0, 1))
self.scaler_h = MinMaxScaler(feature_range=(0, 1))
# 创建主界面
self.create_widgets()
def create_widgets(self):
# 创建主框架
main_frame = ttk.Frame(self.root, padding=10)
main_frame.pack(fill=tk.BOTH, expand=True)
# 左侧控制面板
control_frame = ttk.LabelFrame(main_frame, text="模型控制", padding=10)
control_frame.pack(side=tk.LEFT, fill=tk.Y, padx=5, pady=5)
# 文件选择部分
file_frame = ttk.LabelFrame(control_frame, text="数据文件", padding=10)
file_frame.pack(fill=tk.X, pady=5)
# 训练集选择
ttk.Label(file_frame, text="训练集:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.train_file_var = tk.StringVar()
ttk.Entry(file_frame, textvariable=self.train_file_var, width=30, state='readonly').grid(
row=0, column=1,
padx=5)
ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("train")).grid(row=0, column=2)
# 测试集选择
ttk.Label(file_frame, text="测试集:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.test_file_var = tk.StringVar()
ttk.Entry(file_frame, textvariable=self.test_file_var, width=30, state='readonly').grid(row=1, column=1, padx=5)
ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("test")).grid(row=1, column=2)
# PINNs参数设置
param_frame = ttk.LabelFrame(control_frame, text="PINNs参数", padding=10)
param_frame.pack(fill=tk.X, pady=10)
# 验证集切分比例
ttk.Label(param_frame, text="验证集比例:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.split_ratio_var = tk.DoubleVar(value=0.2)
ttk.Spinbox(param_frame, from_=0, to=1, increment=0.05,
textvariable=self.split_ratio_var, width=10).grid(row=0, column=1, padx=5)
# 隐藏层数量
ttk.Label(param_frame, text="网络层数:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.num_layers_var = tk.IntVar(value=4)
ttk.Spinbox(param_frame, from_=2, to=8, increment=1,
textvariable=self.num_layers_var, width=10).grid(row=1, column=1, padx=5)
# 每层神经元数量
ttk.Label(param_frame, text="神经元数/层:").grid(row=2, column=0, sticky=tk.W, pady=5)
self.hidden_units_var = tk.IntVar(value=32)
ttk.Spinbox(param_frame, from_=16, to=128, increment=4,
textvariable=self.hidden_units_var, width=10).grid(row=2, column=1, padx=5)
# 训练轮次
ttk.Label(param_frame, text="训练轮次:").grid(row=3, column=0, sticky=tk.W, pady=5)
self.epochs_var = tk.IntVar(value=500)
ttk.Spinbox(param_frame, from_=100, to=2000, increment=100,
textvariable=self.epochs_var, width=10).grid(row=3, column=1, padx=5)
# 物理损失权重
ttk.Label(param_frame, text="物理损失权重:").grid(row=4, column=0, sticky=tk.W, pady=5)
self.physics_weight_var = tk.DoubleVar(value=0.5)
ttk.Spinbox(param_frame, from_=0.1, to=1.0, increment=0.1,
textvariable=self.physics_weight_var, width=10).grid(row=4, column=1, padx=5)
# 控制按钮
btn_frame = ttk.Frame(control_frame)
btn_frame.pack(fill=tk.X, pady=10)
ttk.Button(btn_frame, text="训练模型", command=self.train_model).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="预测结果", command=self.predict).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="保存结果", command=self.save_results).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="重置", command=self.reset).pack(side=tk.RIGHT, padx=5)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(control_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(fill=tk.X, side=tk.BOTTOM)
# 右侧结果显示区域
result_frame = ttk.Frame(main_frame)
result_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=5, pady=5)
# 创建标签页
self.notebook = ttk.Notebook(result_frame)
self.notebook.pack(fill=tk.BOTH, expand=True)
# 损失曲线标签页
self.loss_frame = ttk.Frame(self.notebook)
self.notebook.add(self.loss_frame, text="训练损失")
# 在预测结果标签页
self.prediction_frame = ttk.Frame(self.notebook)
self.notebook.add(self.prediction_frame, text="预测结果")
# 指标显示
self.metrics_var = tk.StringVar()
metrics_label = ttk.Label(
self.prediction_frame,
textvariable=self.metrics_var,
font=('TkDefaultFont', 10, 'bold'),
relief='ridge',
padding=5
)
metrics_label.pack(fill=tk.X, padx=5, pady=5)
# 创建图表容器Frame
chart_frame = ttk.Frame(self.prediction_frame)
chart_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# 初始化绘图区域
self.fig, self.ax = plt.subplots(figsize=(10, 6))
self.canvas = FigureCanvasTkAgg(self.fig, master=chart_frame)
self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
# 添加Matplotlib工具栏(缩放、平移等)
from matplotlib.backends.backend_tkagg import NavigationToolbar2Tk
self.toolbar = NavigationToolbar2Tk(self.canvas, chart_frame)
self.toolbar.update()
self.canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
# 损失曲线画布(同样添加工具栏)
loss_chart_frame = ttk.Frame(self.loss_frame)
loss_chart_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
self.loss_fig, self.loss_ax = plt.subplots(figsize=(10, 4))
self.loss_canvas = FigureCanvasTkAgg(self.loss_fig, master=loss_chart_frame)
self.loss_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
self.loss_toolbar = NavigationToolbar2Tk(self.loss_canvas, loss_chart_frame)
self.loss_toolbar.update()
self.loss_canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True)
def calculate_metrics(self, y_true, y_pred):
"""计算评估指标"""
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
# 计算MAPE(排除零值)
non_zero_idx = np.where(y_true != 0)[0]
if len(non_zero_idx) > 0:
mape = np.mean(np.abs((y_true[non_zero_idx] - y_pred[non_zero_idx]) / y_true[non_zero_idx])) * 100
else:
mape = float('nan')
r2 = r2_score(y_true, y_pred)
return {
'MSE': mse,
'RMSE': rmse, # ✅ 修正拼写错误
'MAE': mae,
'MAPE': mape,
'R2': r2
}
def preprocess_data(self, df, is_training=False):
"""增强的时间特征处理"""
# 创建时间戳
if 'datetime' not in df.columns:
time_cols = ['year', 'month', 'day']
for col in ['hour', 'minute', 'second']:
if col not in df.columns:
df[col] = 0
df['datetime'] = pd.to_datetime(df[time_cols])
df = df.set_index('datetime')
# 计算时间步长
if 'dt' not in df.columns:
df['dt'] = df.index.to_series().diff().dt.total_seconds() / 86400
df['dt'] = df['dt'].fillna(df['dt'].mean())
# 处理异常时间步长
dt_mean = df['dt'].mean()
df.loc[df['dt'] <= 0, 'dt'] = dt_mean
df.loc[df['dt'] > 30, 'dt'] = dt_mean
# 添加对数变换
df['log_dt'] = np.log1p(df['dt'])
# 周期性时间特征
df[['year']] = self.scaler_year.transform(df[['year']])
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
df['day_sin'] = np.sin(2 * np.pi * df['day'] / 31)
df['day_cos'] = np.cos(2 * np.pi * df['day'] / 31) # ✅ 修正列名
# 1. 计算年份偏移量
year_min = df['year'].min()
df['year_offset'] = df['year'] - year_min
# 2. 计算年份周期特征
year_range = max(1, df['year'].max() - year_min) # 避免除零
df['year_sin'] = np.sin(2 * np.pi * df['year_offset'] / year_range)
df['year_cos'] = np.cos(2 * np.pi * df['year_offset'] / year_range)
# 归一化处理
year_features = ['year_offset', 'year_sin', 'year_cos']
if is_training:
self.scaler_year.fit(df[year_features])
elif not hasattr(self.scaler_year, 'data_min_'):
raise RuntimeError("归一化器未初始化,请先处理训练集")
df[year_features] = self.scaler_year.transform(df[year_features])
if is_training:
self.scaler_year.fit(df[['year_norm']])
self.scaler_month.fit(df[['month_sin', 'month_cos']])
self.scaler_day.fit(df[['day_sin', 'day_cos']])
self.scaler_dt.fit(df[['dt']])
self.scaler_log_dt.fit(df[['log_dt']])
self.scaler_h.fit(df[['水位']])
# 应用归一化
df[['month_sin', 'month_cos']] = self.scaler_month.transform(df[['month_sin', 'month_cos']])
df[['day_sin', 'day_cos']] = self.scaler_day.transform(df[['day_sin', 'day_cos']])
df[['dt_norm']] = self.scaler_dt.transform(df[['dt']])
df[['log_dt_norm']] = self.scaler_log_dt.transform(df[['log_dt']])
df[['水位_norm']] = self.scaler_h.transform(df[['水位']])
return df
def select_file(self, file_type):
"""选择Excel文件并计算时间步长"""
try:
file_path = filedialog.askopenfilename(
title=f"选择{file_type}集Excel文件",
filetypes=[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")]
)
if not file_path:
return
df = pd.read_excel(file_path)
# 验证必需列是否存在
required_cols = ['year', 'month', 'day', '水位']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
messagebox.showerror("列名错误", f"缺少必需列: {', '.join(missing_cols)}")
return
# 时间特征处理
time_features = ['year', 'month', 'day']
missing_time_features = [feat for feat in time_features if feat not in df.columns]
if missing_time_features:
messagebox.showerror("列名错误",
f"Excel文件缺少预处理后的时间特征列: {', '.join(missing_time_features)}")
return
# 创建时间戳列 (增强兼容性)
time_cols = ['year', 'month', 'day']
if 'hour' in df.columns: time_cols.append('hour')
if 'minute' in df.columns: time_cols.append('minute')
if 'second' in df.columns: time_cols.append('second')
# 填充缺失的时间单位
for col in ['hour', 'minute', 'second']:
if col not in df.columns:
df[col] = 0
df['datetime'] = pd.to_datetime(df[time_cols])
# 设置时间索引
df = df.set_index('datetime')
# 计算相对时间(天)
df['days'] = (df.index - df.index[0]).days
# 新增:计算时间步长dt(单位:天)
df['dt'] = df.index.to_series().diff().dt.total_seconds() / 86400 # 精确到秒级
# 处理时间步长异常值
if len(df) > 1:
# 计算有效时间步长(排除<=0的值)
valid_dt = df['dt'][df['dt'] > 0]
if len(valid_dt) > 0:
avg_dt = valid_dt.mean()
else:
avg_dt = 1.0
else:
avg_dt = 1.0
# 替换非正值
df.loc[df['dt'] <= 0, 'dt'] = avg_dt
# 填充缺失值
df['dt'] = df['dt'].fillna(avg_dt)
# 保存数据
if file_type == "train":
self.train_df = df
self.train_file_var.set(os.path.basename(file_path))
self.status_var.set(f"已加载训练集: {len(self.train_df)}条数据")
else:
self.test_df = df
self.test_file_var.set(os.path.basename(file_path))
self.status_var.set(f"已加载测试集: {len(self.test_df)}条数据")
except Exception as e:
error_msg = f"文件读取失败: {str(e)}\n\n请确保:\n1. 文件不是打开状态\n2. 文件格式正确\n3. 包含必需的时间和水位列"
messagebox.showerror("文件错误", error_msg)
def train_model(self):
"""训练PINNs模型(带早停机制+训练指标监控)"""
if self.train_df is None:
messagebox.showwarning("警告", "请先选择训练集文件")
return
try:
self.status_var.set("正在预处理数据...")
self.root.update()
# 从训练集中切分训练子集和验证子集
split_ratio = 1 - self.split_ratio_var.get()
split_idx = int(len(self.train_df) * split_ratio)
train_subset = self.train_df.iloc[:split_idx]
valid_subset = self.train_df.iloc[split_idx:]
# 检查数据量是否足够
if len(train_subset) < 2 or len(valid_subset) < 2:
messagebox.showerror("数据错误", "训练集数据量不足(至少需要2个时间步)")
return
# 准备训练数据
train_inputs = [
train_subset['year'].values[1:].reshape(-1, 1).astype(np.float32),
train_subset['month_sin'].values[1:].reshape(-1, 1).astype(np.float32),
train_subset['month_cos'].values[1:].reshape(-1, 1).astype(np.float32),
train_subset['day_sin'].values[1:].reshape(-1, 1).astype(np.float32),
train_subset['day_cos'].values[1:].reshape(-1, 1).astype(np.float32),
train_subset['水位_norm'].values[:-1].reshape(-1, 1).astype(np.float32),
train_subset['dt_norm'].values[1:].reshape(-1, 1).astype(np.float32),
train_subset['log_dt_norm'].values[1:].reshape(-1, 1).astype(np.float32)
]
h_next_train_scaled = train_subset['水位_norm'].values[1:].reshape(-1, 1).astype(np.float32)
h_next_train_true = train_subset['水位'].values[1:].reshape(-1, 1)
# 准备验证数据
valid_inputs = [
valid_subset['year_norm'].values[1:].reshape(-1, 1).astype(np.float32),
valid_subset['month_sin'].values[1:].reshape(-1, 1).astype(np.float32),
valid_subset['month_cos'].values[1:].reshape(-1, 1).astype(np.float32), # ✅ 修正类型转换
valid_subset['day_sin'].values[1:].reshape(-1, 1).astype(np.float32),
valid_subset['day_cos'].values[1:].reshape(-1, 1).astype(np.float32),
valid_subset['水位_norm'].values[:-1].reshape(-1, 1).astype(np.float32),
valid_subset['dt_norm'].values[1:].reshape(-1, 1).astype(np.float32),
valid_subset['log_dt_norm'].values[1:].reshape(-1, 1).astype(np.float32)
]
h_next_valid_scaled = valid_subset['水位_norm'].values[1:].reshape(-1, 1).astype(np.float32)
h_next_valid_true = valid_subset['水位'].values[1:].reshape(-1, 1)
# 创建模型和优化器
self.model = PINNModel(
num_layers=self.num_layers_var.get(),
hidden_units=self.hidden_units_var.get()
)
# 创建学习率调度器
initial_lr = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=initial_lr,
decay_steps=100,
decay_rate=0.95,
staircase=True
)
optimizer = Adam(learning_rate=lr_schedule)
# 创建数据集
def create_dataset(inputs, targets):
# 确保所有输入都是元组
input_tuple = tuple(inputs)
dataset = tf.data.Dataset.from_tensor_slices((
input_tuple,
targets
))
return dataset.shuffle(buffer_size=1024).batch(32)
train_dataset = create_dataset(train_inputs, h_next_train_scaled)
valid_dataset = create_dataset(valid_inputs, h_next_valid_scaled)
# 初始化训练历史记录
train_data_loss_history = []
physics_loss_history = []
valid_data_loss_history = []
train_metrics_history = []
valid_metrics_history = []
# 早停机制参数
patience = int(self.epochs_var.get() / 3)
min_delta = 1e-4
best_valid_loss = float('inf')
wait = 0
best_epoch = 0
best_weights = None
start_time = time.time()
# 自定义训练循环
for epoch in range(self.epochs_var.get()):
# 获取当前学习率
current_lr = optimizer.learning_rate.numpy()
# 计算自适应物理损失权重
current_epoch_ratio = epoch / self.epochs_var.get()
physics_weight = self.physics_weight_var.get() * (1 - current_epoch_ratio * 0.5)
# 训练阶段
epoch_train_data_loss = []
epoch_physics_loss = []
train_pred_scaled = []
for step, (inputs, h_next_batch) in enumerate(train_dataset):
with tf.GradientTape() as tape:
# 预测下一时刻水位
h_pred = self.model(inputs, training=True)
data_loss = tf.reduce_mean(tf.square(h_next_batch - h_pred))
# 计算物理损失
physics_loss = self.model.physics_loss(
inputs, h_pred,
self.scaler_h, self.scaler_dt,
training=True
)
# 总损失
loss = data_loss + physics_weight * physics_loss
grads = tape.gradient(loss, self.model.trainable_variables)
optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
epoch_train_data_loss.append(data_loss.numpy())
epoch_physics_loss.append(physics_loss.numpy())
train_pred_scaled.append(h_pred.numpy())
# 合并训练预测值
train_pred_scaled = np.concatenate(train_pred_scaled, axis=0)
train_pred_true = self.scaler_h.inverse_transform(train_pred_scaled)
train_metrics = self.calculate_metrics(
h_next_train_true.flatten(),
train_pred_true.flatten()
)
train_metrics_history.append(train_metrics)
# 验证阶段
epoch_valid_data_loss = []
valid_pred_scaled = []
for (inputs, h_next_batch) in valid_dataset:
h_pred = self.model(inputs, training=False)
valid_data_loss = tf.reduce_mean(tf.square(h_next_batch - h_pred))
epoch_valid_data_loss.append(valid_data_loss.numpy())
valid_pred_scaled.append(h_pred.numpy())
# 合并验证预测值
valid_pred_scaled = np.concatenate(valid_pred_scaled, axis=0)
valid_pred_true = self.scaler_h.inverse_transform(valid_pred_scaled)
valid_metrics = self.calculate_metrics(
h_next_valid_true.flatten(),
valid_pred_true.flatten()
)
valid_metrics_history.append(valid_metrics)
# 计算平均损失
avg_train_data_loss = np.mean(epoch_train_data_loss)
avg_physics_loss = np.mean(epoch_physics_loss)
avg_valid_data_loss = np.mean(epoch_valid_data_loss)
# 记录损失
train_data_loss_history.append(avg_train_data_loss)
physics_loss_history.append(avg_physics_loss)
valid_data_loss_history.append(avg_valid_data_loss)
# 早停机制逻辑
current_valid_loss = avg_valid_data_loss
if current_valid_loss < best_valid_loss - min_delta:
best_valid_loss = current_valid_loss
best_epoch = epoch + 1
wait = 0
best_weights = self.model.get_weights()
else:
wait += 1
if wait >= patience:
self.status_var.set(f"触发早停!最佳轮次: {best_epoch},最佳验证损失: {best_valid_loss:.4f}")
if best_weights is not None:
self.model.set_weights(best_weights)
break
# 更新状态
if epoch % 1 == 0:
train_rmse = train_metrics['RMSE']
valid_rmse = valid_metrics['RMSE']
train_r2 = train_metrics['R2']
valid_r2 = valid_metrics['R2']
elapsed = time.time() - start_time
self.status_var.set(
f"训练中 | 轮次: {epoch + 1}/{self.epochs_var.get()} | "
f"学习率: {current_lr:.6f} | "
f"训练RMSE: {train_rmse:.4f} | 验证RMSE: {valid_rmse:.4f} | "
f"训练R²: {train_r2:.4f} | 验证R²: {valid_r2:.4f} | "
f"k1: {self.model.k1.numpy():.6f}, k2: {self.model.k2.numpy():.6f} | "
f"时间: {elapsed:.1f}秒 | 早停等待: {wait}/{patience}"
)
self.root.update()
# 绘制损失曲线
self.loss_ax.clear()
epochs_range = range(1, len(train_data_loss_history) + 1)
self.loss_ax.plot(epochs_range, train_data_loss_history, 'b-', label='训练数据损失')
self.loss_ax.plot(epochs_range, physics_loss_history, 'r--', label='物理损失')
self.loss_ax.plot(epochs_range, valid_data_loss_history, 'g-.', label='验证数据损失')
self.loss_ax.set_title('PINNs训练与验证损失')
self.loss_ax.set_xlabel('轮次')
self.loss_ax.set_ylabel('损失', rotation=0)
self.loss_ax.legend()
self.loss_ax.grid(True, alpha=0.3)
self.loss_ax.set_yscale('log')
self.loss_canvas.draw()
# 训练完成提示
elapsed = time.time() - start_time
if wait >= patience:
completion_msg = (
f"早停触发 | 最佳轮次: {best_epoch} | 最佳验证损失: {best_valid_loss:.4f} | "
f"最佳验证RMSE: {valid_metrics_history[best_epoch - 1]['RMSE']:.4f} | "
f"总时间: {elapsed:.1f}秒"
)
else:
completion_msg = (
f"训练完成 | 总轮次: {self.epochs_var.get()} | "
f"最终训练RMSE: {train_metrics_history[-1]['RMSE']:.4f} | "
f"最终验证RMSE: {valid_metrics_history[-1]['RMSE']:.4f} | "
f"最终训练R²: {train_metrics_history[-1]['R2']:.4f} | "
f"最终验证R²: {valid_metrics_history[-1]['R2']:.4f} | "
f"总时间: {elapsed:.1f}秒"
)
# 保存训练历史
self.train_history = {
'train_data_loss': train_data_loss_history,
'physics_loss': physics_loss_history,
'valid_data_loss': valid_data_loss_history,
'train_metrics': train_metrics_history,
'valid_metrics': valid_metrics_history
}
# 保存学习到的物理参数
self.learned_params = {
"k1": self.model.k1.numpy(),
"k2": self.model.k2.numpy(),
"alpha": self.model.alpha.numpy(),
"beta": self.model.beta.numpy()
}
self.status_var.set(completion_msg)
messagebox.showinfo("训练完成", f"PINNs模型训练成功完成!\n{completion_msg}")
except Exception as e:
messagebox.showerror("训练错误", f"模型训练失败:\n{str(e)}")
self.status_var.set("训练失败")
import traceback
traceback.print_exc()
def predict(self):
"""使用PINNs模型进行递归预测(带Teacher Forcing和蒙特卡洛Dropout)"""
if self.model is None:
messagebox.showwarning("警告", "请先训练模型")
return
if self.test_df is None:
messagebox.showwarning("警告", "请先选择测试集文件")
return
try:
self.status_var.set("正在生成预测(使用Teacher Forcing和MC Dropout)...")
self.root.update()
# 准备测试数据
t_test = self.test_df[['year', 'month_sin', 'month_cos', 'day_sin', 'day_cos']].values
h_test_scaled = self.test_df['水位_norm'].values.reshape(-1, 1)
dt_test = self.test_df[['dt_norm', 'log_dt_norm']].values
actual_values = self.test_df['水位'].values.reshape(-1, 1) # ✅ 已正确定义
test_time = self.test_df.index
# 改进的递归预测参数
n = len(t_test)
mc_iterations = 100
adaptive_forcing = True
# 存储蒙特卡洛采样结果
mc_predictions_scaled = np.zeros((mc_iterations, n, 1), dtype=np.float32)
# 记录教师强制概率
total_tf_prob = 0.0
tf_count = 0
# 进行多次蒙特卡洛采样
for mc_iter in range(mc_iterations):
predicted_scaled = np.zeros((n, 1), dtype=np.float32)
predicted_scaled[0] = h_test_scaled[0] # 第一个点使用真实值
# 递归预测
for i in range(1, n):
# 自适应教师强制
if adaptive_forcing:
teacher_forcing_prob = 0.7 + 0.2 * min(1.0, i / (0.7 * n))
else:
teacher_forcing_prob = 0.7
# 记录教师强制概率
total_tf_prob += teacher_forcing_prob
tf_count += 1
# 决定使用真实值还是预测值
use_actual = np.random.rand() < teacher_forcing_prob
if use_actual and i < n - 1:
h_prev = h_test_scaled[i - 1:i]
else:
h_prev = predicted_scaled[i - 1:i]
# 准备输入 - 确保正确形状
inputs = [
np.array([t_test[i, 0]]).reshape(1, 1), # year_norm
np.array([t_test[i, 1]]).reshape(1, 1), # month_sin
np.array([t_test[i, 2]]).reshape(1, 1), # month_cos
np.array([t_test[i, 3]]).reshape(1, 1), # day_sin
np.array([t_test[i, 4]]).reshape(1, 1), # day_cos
h_prev,
np.array([dt_test[i, 0]]).reshape(1, 1), # dt_norm
np.array([dt_test[i, 1]]).reshape(1, 1) # log_dt_norm
]
# 预测
h_pred = self.model(inputs, training=True).numpy()
# 物理模型预测值
k1 = self.learned_params['k1']
k2 = self.learned_params['k2']
alpha = self.learned_params['alpha']
beta = self.learned_params['beta']
# 反归一化当前水位
h_prev_raw = self.scaler_h.inverse_transform(h_prev)
dt_i = self.scaler_dt.inverse_transform([[dt_test[i, 0]]])
# 物理方程预测
exponent = - (k1 + k2 * h_prev_raw) * dt_i
decay_term = h_prev_raw * np.exp(exponent)
external_term = alpha * (1 - np.exp(-beta * dt_i))
physics_pred = decay_term + external_term
# 反归一化神经网络预测
nn_pred_raw = self.scaler_h.inverse_transform(h_pred)
# 混合预测
physics_weight = 0.3
final_pred_raw = physics_weight * physics_pred + (1 - physics_weight) * nn_pred_raw
final_pred_scaled = self.scaler_h.transform(final_pred_raw)
predicted_scaled[i] = final_pred_scaled
mc_predictions_scaled[mc_iter] = predicted_scaled
# 计算平均教师强制概率
avg_teacher_forcing_prob = total_tf_prob / tf_count if tf_count > 0 else 0.7
# 计算预测统计量
mean_pred_scaled = np.mean(mc_predictions_scaled, axis=0)
std_pred_scaled = np.std(mc_predictions_scaled, axis=0)
# 反归一化结果
predictions = self.scaler_h.inverse_transform(mean_pred_scaled)
uncertainty = self.scaler_h.inverse_transform(std_pred_scaled) * 1.96 # 95%置信区间
# 清除现有图表
self.ax.clear()
# 计算合理的y轴范围 - 基于数据集中区域
# 获取实际值和预测值的中位数
median_val = np.median(actual_values.flatten()) # 展平数组
# 计算数据的波动范围(标准差)
data_range = np.std(actual_values.flatten()) * 4 # 4倍标准差覆盖大部分数据
# 设置y轴范围为中心值±数据波动范围
y_center = median_val
y_half_range = max(data_range, 10) # 确保最小范围为20个单位
y_min_adjusted = y_center - y_half_range
y_max_adjusted = y_center + y_half_range
# 确保范围不为零
if y_max_adjusted - y_min_adjusted < 1:
y_min_adjusted -= 5
y_max_adjusted += 5
# 绘制结果(带置信区间)
self.ax.plot(test_time, actual_values, 'b-', label='真实值', linewidth=2)
self.ax.plot(test_time, predictions, 'r--', label='预测均值', linewidth=2)
self.ax.fill_between(
test_time,
(predictions - uncertainty).flatten(),
(predictions + uncertainty).flatten(),
color='orange', alpha=0.3, label='95%置信区间'
)
# 设置自动调整的y轴范围
self.ax.set_ylim(y_min_adjusted, y_max_adjusted)
self.ax.set_title('大坝渗流水位预测(PINNs with MC Dropout)')
self.ax.set_xlabel('时间')
self.ax.set_ylabel('测压管水位', rotation=0)
self.ax.legend(loc='best') # 自动选择最佳位置
# 优化时间轴刻度
self.ax.xaxis.set_major_locator(mdates.YearLocator())
self.ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
self.ax.xaxis.set_minor_locator(mdates.MonthLocator(interval=2))
self.ax.grid(which='minor', axis='x', linestyle=':', color='gray', alpha=0.3)
self.ax.grid(which='major', axis='y', linestyle='-', color='lightgray', alpha=0.5)
self.ax.tick_params(axis='x', which='major', rotation=0, labelsize=9)
self.ax.tick_params(axis='x', which='minor', length=2)
# 计算评估指标(排除第一个点)
eval_actual = actual_values[1:].flatten()
eval_pred = predictions[1:].flatten()
self.evaluation_metrics = self.calculate_metrics(eval_actual, eval_pred)
# 添加不确定性指标
avg_uncertainty = np.mean(uncertainty)
max_uncertainty = np.max(uncertainty)
self.evaluation_metrics['Avg Uncertainty'] = avg_uncertainty
self.evaluation_metrics['Max Uncertainty'] = max_uncertainty
metrics_text = (
f"MSE: {self.evaluation_metrics['MSE']:.4f} | "
f"RMSE: {self.evaluation_metrics['RMSE']:.4f} | "
f"MAE: {self.evaluation_metrics['MAE']:.4f} | "
f"MAPE: {self.evaluation_metrics['MAPE']:.2f}% | "
f"R²: {self.evaluation_metrics['R2']:.4f}\n"
f"平均不确定性: {avg_uncertainty:.4f} | 最大不确定性: {max_uncertainty:.4f}"
)
self.metrics_var.set(metrics_text)
# 在图表上添加指标
self.ax.text(
0.5, 1.05, metrics_text,
transform=self.ax.transAxes,
ha='center', fontsize=8,
bbox=dict(facecolor='white', alpha=0.8)
)
params_text = (
f"物理参数: k1={self.learned_params['k1']:.4f}, "
f"k2={self.learned_params['k2']:.4f}, "
f"alpha={self.learned_params['alpha']:.4f}, "
f"beta={self.learned_params['beta']:.4f} | "
f"Teacher Forcing概率: {avg_teacher_forcing_prob:.4f}" # ✅ 使用平均概率
)
self.ax.text(
0.5, 1.12, params_text,
transform=self.ax.transAxes,
ha='center', fontsize=8,
bbox=dict(facecolor='white', alpha=0.8)
)
# 调整布局
plt.tight_layout(pad=2.0)
# 更新画布
self.canvas.draw()
# 保存预测结果
self.predictions = predictions
self.uncertainty = uncertainty
self.actual_values = actual_values
self.test_time = test_time
self.mc_predictions = mc_predictions_scaled
self.status_var.set(f"预测完成(MC Dropout采样{mc_iterations}次)")
except Exception as e:
messagebox.showerror("预测错误", f"预测失败:\n{str(e)}")
self.status_var.set("预测失败")
import traceback
traceback.print_exc()
def reset(self):
"""重置程序状态"""
# 重置所有归一化器
self.scaler_year = MinMaxScaler(feature_range=(0, 1))
self.scaler_month = MinMaxScaler(feature_range=(0, 1))
self.scaler_day = MinMaxScaler(feature_range=(0, 1))
self.scaler_dt = MinMaxScaler(feature_range=(0, 1))
self.scaler_log_dt = MinMaxScaler(feature_range=(0, 1))
self.scaler_h = MinMaxScaler(feature_range=(0, 1))
self.train_df = None
self.test_df = None
self.model = None
self.train_file_var.set("")
self.test_file_var.set("")
# 清除训练历史
if hasattr(self, 'train_history'):
del self.train_history
# 清除图表
if hasattr(self, 'ax'):
self.ax.clear()
if hasattr(self, 'loss_ax'):
self.loss_ax.clear()
# 重绘画布
if hasattr(self, 'canvas'):
self.canvas.draw()
if hasattr(self, 'loss_canvas'):
self.loss_canvas.draw()
# 清除状态
self.status_var.set("已重置,请选择新数据")
# 清除预测结果
if hasattr(self, 'predictions'):
del self.predictions
# 清除指标文本
if hasattr(self, 'metrics_var'):
self.metrics_var.set("")
messagebox.showinfo("重置", "程序已重置,可以开始新的分析")
def save_results(self):
"""保存预测结果和训练历史数据"""
if not hasattr(self, 'predictions') or not hasattr(self, 'train_history'):
messagebox.showwarning("警告", "请先生成预测结果并完成训练")
return
# 选择保存路径
save_path = filedialog.asksaveasfilename(
defaultextension=".xlsx",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")],
title="保存结果"
)
if not save_path:
return
try:
# 1. 创建预测结果DataFrame
result_df = pd.DataFrame({
'时间': self.test_time,
'实际水位': self.actual_values.flatten(),
'预测水位': self.predictions.flatten()
})
# 2. 创建评估指标DataFrame
metrics_df = pd.DataFrame([self.evaluation_metrics])
# 3. 创建训练历史DataFrame
history_data = {
'轮次': list(range(1, len(self.train_history['train_data_loss']) + 1)),
'训练数据损失': self.train_history['train_data_loss'],
'物理损失': self.train_history['physics_loss'],
'验证数据损失': self.train_history['valid_data_loss']
}
# 添加训练集指标
for metric in ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']:
history_data[f'训练集_{metric}'] = [item[metric] for item in self.train_history['train_metrics']]
# 添加验证集指标
for metric in ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']:
history_data[f'验证集_{metric}'] = [item[metric] for item in self.train_history['valid_metrics']]
history_df = pd.DataFrame(history_data)
# 保存到Excel
with pd.ExcelWriter(save_path) as writer:
result_df.to_excel(writer, sheet_name='预测结果', index=False)
metrics_df.to_excel(writer, sheet_name='评估指标', index=False)
history_df.to_excel(writer, sheet_name='训练历史', index=False)
# 保存图表
chart_path = os.path.splitext(save_path)[0] + "_chart.png"
self.fig.savefig(chart_path, dpi=300)
# 保存损失曲线图
loss_path = os.path.splitext(save_path)[0] + "_loss.png"
self.loss_fig.savefig(loss_path, dpi=300)
self.status_var.set(f"结果已保存至: {os.path.basename(save_path)}")
messagebox.showinfo("保存成功",
f"预测结果和图表已保存至:\n"
f"主文件: {save_path}\n"
f"预测图表: {chart_path}\n"
f"损失曲线: {loss_path}")
except Exception as e:
messagebox.showerror("保存错误", f"保存结果失败:\n{str(e)}")
if __name__ == "__main__":
root = tk.Tk()
app = DamSeepageModel(root)
root.mainloop()
检查错误并改进,给出局部即可
最新发布