import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import os
plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
plt.rcParams['axes.unicode_minus'] = False
class DamSeepageModel:
def __init__(self, root):
self.root = root
self.root.title("大坝渗流预测模型")
self.root.geometry("1200x800")
# 初始化数据
self.train_df = None
self.test_df = None
self.model = None
self.scaler = MinMaxScaler(feature_range=(0, 1))
self.evaluation_metrics = {} # 存储评估指标结果
# 创建主界面
self.create_widgets()
def create_widgets(self):
# 创建主框架
main_frame = ttk.Frame(self.root, padding=10)
main_frame.pack(fill=tk.BOTH, expand=True)
# 左侧控制面板
control_frame = ttk.LabelFrame(main_frame, text="模型控制", padding=10)
control_frame.pack(side=tk.LEFT, fill=tk.Y, padx=5, pady=5)
# 文件选择部分
file_frame = ttk.LabelFrame(control_frame, text="数据文件", padding=10)
file_frame.pack(fill=tk.X, pady=5)
# 训练集选择
ttk.Label(file_frame, text="训练集:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.train_file_var = tk.StringVar()
ttk.Entry(file_frame, textvariable=self.train_file_var, width=30, state='readonly').grid(row=0, column=1,
padx=5)
ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("train")).grid(row=0, column=2)
# 测试集选择
ttk.Label(file_frame, text="测试集:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.test_file_var = tk.StringVar()
ttk.Entry(file_frame, textvariable=self.test_file_var, width=30, state='readonly').grid(row=1, column=1, padx=5)
ttk.Button(file_frame, text="选择文件", command=lambda: self.select_file("test")).grid(row=1, column=2)
# 参数设置部分
param_frame = ttk.LabelFrame(control_frame, text="模型参数", padding=10)
param_frame.pack(fill=tk.X, pady=10)
# 时间窗口大小
ttk.Label(param_frame, text="时间窗口大小:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.window_size_var = tk.IntVar(value=60)
ttk.Spinbox(param_frame, from_=10, to=200, increment=5,
textvariable=self.window_size_var, width=10).grid(row=0, column=1, padx=5)
# LSTM单元数量
ttk.Label(param_frame, text="LSTM单元数:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.lstm_units_var = tk.IntVar(value=50)
ttk.Spinbox(param_frame, from_=10, to=200, increment=10,
textvariable=self.lstm_units_var, width=10).grid(row=1, column=1, padx=5)
# 训练轮次
ttk.Label(param_frame, text="训练轮次:").grid(row=2, column=0, sticky=tk.W, pady=5)
self.epochs_var = tk.IntVar(value=100)
ttk.Spinbox(param_frame, from_=10, to=500, increment=10,
textvariable=self.epochs_var, width=10).grid(row=2, column=1, padx=5)
# 批处理大小
ttk.Label(param_frame, text="批处理大小:").grid(row=3, column=0, sticky=tk.W, pady=5)
self.batch_size_var = tk.IntVar(value=32)
ttk.Spinbox(param_frame, from_=16, to=128, increment=16,
textvariable=self.batch_size_var, width=10).grid(row=3, column=1, padx=5)
# 控制按钮
btn_frame = ttk.Frame(control_frame)
btn_frame.pack(fill=tk.X, pady=10)
ttk.Button(btn_frame, text="训练模型", command=self.train_model).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="预测结果", command=self.predict).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="保存结果", command=self.save_results).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="重置", command=self.reset).pack(side=tk.RIGHT, padx=5)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(control_frame, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(fill=tk.X, side=tk.BOTTOM)
# 右侧结果显示区域
result_frame = ttk.Frame(main_frame)
result_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=5, pady=5)
# 创建标签页
self.notebook = ttk.Notebook(result_frame)
self.notebook.pack(fill=tk.BOTH, expand=True)
# 损失曲线标签页
self.loss_frame = ttk.Frame(self.notebook)
self.notebook.add(self.loss_frame, text="训练损失")
# 预测结果标签页
self.prediction_frame = ttk.Frame(self.notebook)
self.notebook.add(self.prediction_frame, text="预测结果")
# 添加指标文本框
self.metrics_var = tk.StringVar()
metrics_label = ttk.Label(
self.prediction_frame,
textvariable=self.metrics_var,
font=('TkDefaultFont', 10, 'bold'),
relief='ridge',
padding=5
)
metrics_label.pack(fill=tk.X, padx=5, pady=5)
# 初始化绘图区域
self.fig, self.ax = plt.subplots(figsize=(10, 6))
self.canvas = FigureCanvasTkAgg(self.fig, master=self.prediction_frame)
self.canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
self.loss_fig, self.loss_ax = plt.subplots(figsize=(10, 4))
self.loss_canvas = FigureCanvasTkAgg(self.loss_fig, master=self.loss_frame)
self.loss_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
# 文件选择
def select_file(self, file_type):
"""选择Excel文件"""
file_path = filedialog.askopenfilename(
title=f"选择{file_type}集Excel文件",
filetypes=[("Excel文件", "*.xlsx *.xls"), ("所有文件", "*.*")]
)
if file_path:
try:
# 读取Excel文件
df = pd.read_excel(file_path)
# 时间特征列
time_features = ['year', 'month', 'day']
missing_time_features = [feat for feat in time_features if feat not in df.columns]
if '水位' not in df.columns:
messagebox.showerror("列名错误", "Excel文件必须包含'水位'列")
return
if missing_time_features:
messagebox.showerror("列名错误",
f"Excel文件缺少预处理后的时间特征列: {', '.join(missing_time_features)}\n"
"请确保已使用预处理功能添加这些列")
return
# 创建完整的时间戳列
# 处理可能缺失的小时、分钟、秒数据
if 'hour' in df.columns and 'minute' in df.columns and 'second' in df.columns:
df['datetime'] = pd.to_datetime(
df[['year', 'month', 'day', 'hour', 'minute', 'second']]
)
elif 'hour' in df.columns and 'minute' in df.columns:
df['datetime'] = pd.to_datetime(
df[['year', 'month', 'day', 'hour', 'minute']].assign(second=0)
)
else:
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day']])
# 设置时间索引
df = df.set_index('datetime')
# 保存数据
if file_type == "train":
self.train_df = df
self.train_file_var.set(os.path.basename(file_path))
self.status_var.set(f"已加载训练集: {len(self.train_df)}条数据")
else:
self.test_df = df
self.test_file_var.set(os.path.basename(file_path))
self.status_var.set(f"已加载测试集: {len(self.test_df)}条数据")
except Exception as e:
messagebox.showerror("文件错误", f"读取文件失败: {str(e)}")
# 添加评估指标计算函数
def calculate_metrics(self, y_true, y_pred):
"""计算各种评估指标"""
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
# 避免除以零错误
non_zero_idx = np.where(y_true != 0)[0]
if len(non_zero_idx) > 0:
mape = np.mean(np.abs((y_true[non_zero_idx] - y_pred[non_zero_idx]) / y_true[non_zero_idx])) * 100
else:
mape = float('nan')
r2 = r2_score(y_true, y_pred)
return {
'MSE': mse,
'RMSE': rmse,
'MAE': mae,
'MAPE': mape,
'R2': r2
}
def create_dataset(self, data, window_size):
"""创建时间窗口数据集"""
X, y = [], []
for i in range(len(data) - window_size):
X.append(data[i:(i + window_size), 0])
y.append(data[i + window_size, 0])
return np.array(X), np.array(y)
def create_dynamic_plot_callback(self):
"""创建动态绘图回调实例,用于实时显示训练损失曲线"""
class DynamicPlotCallback(tf.keras.callbacks.Callback):
def __init__(self, gui_app):
self.gui_app = gui_app # 引用主GUI实例
self.train_loss = [] # 存储训练损失
self.val_loss = [] # 存储验证损失
def on_epoch_end(self, epoch, logs=None):
"""每个epoch结束时更新图表"""
logs = logs or {}
# 收集损失数据
self.train_loss.append(logs.get('loss'))
self.val_loss.append(logs.get('val_loss'))
# 更新GUI中的图表(在主线程中执行)
self.gui_app.root.after(0, self._update_plot)
def _update_plot(self):
"""实际更新图表的函数"""
try:
# 清除现有图表
self.gui_app.loss_ax.clear()
# 绘制训练和验证损失曲线
epochs = range(1, len(self.train_loss) + 1)
self.gui_app.loss_ax.plot(epochs, self.train_loss, 'b-', label='训练损失')
self.gui_app.loss_ax.plot(epochs, self.val_loss, 'r-', label='验证损失')
# 设置图表属性
self.gui_app.loss_ax.set_title('模型训练损失')
self.gui_app.loss_ax.set_xlabel('轮次')
self.gui_app.loss_ax.set_ylabel('损失', rotation=0)
self.gui_app.loss_ax.legend(loc='upper right')
self.gui_app.loss_ax.grid(True, alpha=0.3)
# 自动调整Y轴范围
all_losses = self.train_loss + self.val_loss
min_loss = max(0, min(all_losses) * 0.9)
max_loss = max(all_losses) * 1.1
self.gui_app.loss_ax.set_ylim(min_loss, max_loss)
# 刷新画布
self.gui_app.loss_canvas.draw()
# 更新状态栏显示最新损失
current_epoch = len(self.train_loss)
if current_epoch > 0:
latest_train_loss = self.train_loss[-1]
latest_val_loss = self.val_loss[-1] if self.val_loss else 0
self.gui_app.status_var.set(
f"训练中 | 轮次: {current_epoch} | "
f"训练损失: {latest_train_loss:.6f} | "
f"验证损失: {latest_val_loss:.6f}"
)
self.gui_app.root.update()
except Exception as e:
print(f"更新图表时出错: {str(e)}")
# 返回回调实例
return DynamicPlotCallback(self)
def train_model(self):
"""训练LSTM模型"""
if self.train_df is None:
messagebox.showwarning("警告", "请先选择训练集文件")
return
try:
self.status_var.set("正在预处理数据...")
self.root.update()
# 数据预处理
train_scaled = self.scaler.fit_transform(self.train_df[['水位']])
# 创建时间窗口数据集
window_size = self.window_size_var.get()
X_train, y_train = self.create_dataset(train_scaled, window_size)
# 调整LSTM输入格式
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# 构建LSTM模型
self.model = Sequential()
self.model.add(LSTM(
self.lstm_units_var.get(),
return_sequences=True,
input_shape=(window_size, 1)
))
self.model.add(LSTM(self.lstm_units_var.get()))
self.model.add(Dense(1))
self.model.compile(
optimizer=Adam(learning_rate=0.001),
loss='mean_squared_error'
)
# 创建验证集(在训练之前)
val_size = int(0.2 * len(X_train))
X_val, y_val = X_train[:val_size], y_train[:val_size]
X_train, y_train = X_train[val_size:], y_train[val_size:]
# 定义评估回调类
class MetricsCallback(tf.keras.callbacks.Callback):
def __init__(self, X_val, y_val, scaler, gui_app): # 添加gui_app参数
super().__init__()
self.X_val = X_val
self.y_val = y_val
self.scaler = scaler
self.gui_app = gui_app # 直接存储引用
self.best_r2 = -float('inf')
self.best_weights = None
def on_epoch_end(self, epoch, logs=None):
# 预测验证集(添加verbose=0避免输出)
val_pred = self.model.predict(self.X_val, verbose=0)
# 反归一化
val_pred_orig = self.scaler.inverse_transform(val_pred)
y_val_orig = self.scaler.inverse_transform(self.y_val.reshape(-1, 1))
# 计算指标(使用self.gui_app)
metrics = self.gui_app.calculate_metrics(y_val_orig, val_pred_orig)
# 更新日志
logs = logs or {}
logs.update({f'val_{k}': v for k, v in metrics.items()})
# 保存最佳权重(基于R²)
if metrics['R2'] > self.best_r2:
self.best_r2 = metrics['R2']
self.best_weights = self.model.get_weights()
# 更新状态栏(使用self.gui_app)
status = (f"训练中 | 轮次: {epoch + 1} | "
f"损失: {logs.get('loss', 0):.6f} | "
f"验证R²: {metrics['R2']:.4f}")
self.gui_app.status_var.set(status)
self.gui_app.root.update()
# 添加回调(传递所有四个参数)
metrics_callback = MetricsCallback(X_val, y_val, self.scaler, self) # 添加self参数
# 添加早停机制
early_stopping = EarlyStopping(
monitor='val_loss', # 监控验证集损失
patience=self.epochs_var.get()/3, # 连续20轮无改善则停止
min_delta=0.0001, # 最小改善阈值
restore_best_weights=True, # 恢复最佳权重
verbose=1 # 显示早停信息
)
# 在model.fit中添加回调
history = self.model.fit(
X_train,
y_train,
epochs=self.epochs_var.get(),
batch_size=self.batch_size_var.get(),
validation_data=(X_val, y_val),
callbacks=[early_stopping, metrics_callback], # 添加新回调
verbose=0
)
# 训练结束后恢复最佳权重
if metrics_callback.best_weights is not None:
self.model.set_weights(metrics_callback.best_weights)
# 绘制损失曲线
self.loss_ax.clear()
self.loss_ax.plot(history.history['loss'], label='训练损失')
self.loss_ax.plot(history.history['val_loss'], label='验证损失')
self.loss_ax.set_title('模型训练损失')
self.loss_ax.set_xlabel('轮次')
self.loss_ax.set_ylabel('损失',rotation=0)
self.loss_ax.legend()
self.loss_ax.grid(True)
self.loss_canvas.draw()
# 根据早停情况更新状态信息
if early_stopping.stopped_epoch > 0:
stopped_epoch = early_stopping.stopped_epoch
best_epoch = early_stopping.best_epoch
final_loss = history.history['loss'][-1]
best_loss = min(history.history['val_loss'])
self.status_var.set(
f"训练在{stopped_epoch + 1}轮提前终止 | "
f"最佳模型在第{best_epoch + 1}轮 | "
f"最终损失: {final_loss:.6f} | "
f"最佳验证损失: {best_loss:.6f}"
)
messagebox.showinfo(
"训练完成",
f"模型训练提前终止!\n"
f"最佳模型在第{best_epoch + 1}轮\n"
f"最佳验证损失: {best_loss:.6f}"
)
else:
final_loss = history.history['loss'][-1]
self.status_var.set(f"模型训练完成 | 最终损失: {final_loss:.6f}")
messagebox.showinfo("训练完成", "模型训练成功完成!")
except Exception as e:
messagebox.showerror("训练错误", f"模型训练失败:\n{str(e)}")
self.status_var.set("训练失败")
def predict(self):
"""使用模型进行预测"""
if self.model is None:
messagebox.showwarning("警告", "请先训练模型")
return
if self.test_df is None:
messagebox.showwarning("警告", "请先选择测试集文件")
return
try:
self.status_var.set("正在生成预测...")
self.root.update()
# 预处理测试数据
test_scaled = self.scaler.transform(self.test_df[['水位']])
# 创建测试集时间窗口
window_size = self.window_size_var.get()
X_test, y_test = self.create_dataset(test_scaled, window_size)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# 进行预测
test_predict = self.model.predict(X_test)
# 反归一化
test_predict = self.scaler.inverse_transform(test_predict)
y_test_orig = self.scaler.inverse_transform(y_test.reshape(-1, 1))
# 创建时间索引
test_time = self.test_df.index[window_size:window_size + len(test_predict)]
# 绘制图表
self.fig, self.ax = plt.subplots(figsize=(12, 6))
# 使用时间索引作为x轴
self.ax.plot(test_time, y_test_orig, label='真实值')
self.ax.plot(test_time, test_predict, label='预测值', linestyle='--')
self.ax.set_title('大坝渗流水位预测结果')
self.ax.set_xlabel('时间')
self.ax.set_ylabel('测压管水位')
self.ax.legend()
self.ax.grid(True)
self.ax.tick_params(axis='x', rotation=45)
# 计算并添加评估指标文本
self.evaluation_metrics = self.calculate_metrics(
y_test_orig.flatten(),
test_predict.flatten()
)
metrics_text = (
f"MSE: {self.evaluation_metrics['MSE']:.4f} | "
f"RMSE: {self.evaluation_metrics['RMSE']:.4f} | "
f"MAE: {self.evaluation_metrics['MAE']:.4f} | "
f"MAPE: {self.evaluation_metrics['MAPE']:.2f}% | "
f"R²: {self.evaluation_metrics['R2']:.4f}"
)
self.ax.text(
0.5, 1.05, metrics_text,
transform=self.ax.transAxes,
ha='center',
fontsize=10,
bbox=dict(facecolor='white', alpha=0.8)
)
# 添加分隔线(移至绘图设置之后)
# 注意:这里使用数值索引而不是时间对象
split_point = 0 # 测试集开始位置
self.ax.axvline(x=split_point, color='k', linestyle='--', alpha=0.5)
self.ax.text(
split_point,
np.min(y_test_orig) * 0.9,
' 训练/测试分界',
rotation=90,
verticalalignment='bottom'
)
# 调整布局并显示图表
plt.tight_layout()
if hasattr(self, 'canvas'):
self.canvas.draw()
else:
plt.show()
self.status_var.set("预测完成,结果已显示")
except Exception as e:
messagebox.showerror("预测错误", f"预测失败:\n{str(e)}")
self.status_var.set("预测失败")
def save_results(self):
"""保存预测结果"""
if not hasattr(self, 'test_predict') or self.test_predict is None:
messagebox.showwarning("警告", "请先生成预测结果")
return
save_path = filedialog.asksaveasfilename(
defaultextension=".xlsx",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")]
)
if not save_path:
return
try:
# 创建包含预测结果和评估指标的DataFrame
window_size = self.window_size_var.get()
test_time = self.test_df.index[window_size:window_size + len(self.test_predict)]
metrics_df = pd.DataFrame([self.evaluation_metrics])
result_df = pd.DataFrame({
'时间': test_time,
'实际水位': self.test_df['水位'][window_size:window_size + len(self.test_predict)].values,
'预测水位': self.test_predict.flatten()
})
# 保存到Excel的不同sheet
with pd.ExcelWriter(save_path) as writer:
result_df.to_excel(writer, sheet_name='预测结果', index=False)
metrics_df.to_excel(writer, sheet_name='评估指标', index=False)
# 保存图表
chart_path = os.path.splitext(save_path)[0] + "_chart.png"
self.fig.savefig(chart_path, dpi=300)
self.status_var.set(f"结果已保存至: {os.path.basename(save_path)}")
messagebox.showinfo("保存成功", f"预测结果和图表已保存至:\n{save_path}\n{chart_path}")
except Exception as e:
messagebox.showerror("保存错误", f"保存结果失败:\n{str(e)}")
def reset(self):
"""重置程序状态"""
self.train_df = None
self.test_df = None
self.model = None
self.train_file_var.set("")
self.test_file_var.set("")
self.ax.clear()
self.loss_ax.clear()
self.canvas.draw()
self.loss_canvas.draw()
self.data_text.delete(1.0, tk.END)
self.status_var.set("已重置,请选择新数据")
messagebox.showinfo("重置", "程序已重置,可以开始新的分析")
if __name__ == "__main__":
root = tk.Tk()
app = DamSeepageModel(root)
root.mainloop()
整个代码逐行检查一下