import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (mean_squared_error, mean_absolute_error, r2_score,
accuracy_score, f1_score, precision_score, recall_score)
from sklearn.utils import resample
import re
import os
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, GRU, Dense, Dropout, Concatenate
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
#成功但r2不理想
# -------------------------- 自定义偏度和峰度计算函数 --------------------------
def calculate_skewness(data):
"""计算偏度"""
mean = np.mean(data)
std = np.std(data, ddof=0)
if std == 0:
return 0.0
n = len(data)
skewness = (n / ((n - 1) * (n - 2))) * np.sum(((data - mean) / std) ** 3)
return skewness
def calculate_kurtosis(data):
"""计算峰度"""
mean = np.mean(data)
std = np.std(data, ddof=0)
if std == 0:
return 0.0
n = len(data)
kurtosis = (n * (n + 1) / ((n - 1) * (n - 2) * (n - 3))) * np.sum(((data - mean) / std) ** 4) - (
3 * (n - 1) ** 2) / ((n - 2) * (n - 3))
return kurtosis
# -------------------------- 自动识别时间列 --------------------------
def auto_detect_time_column(df):
"""自动识别数据框中的时间列"""
time_column_candidates = ["time", "Time", "TIME", "日期", "日期时间", "datetime", "Datetime", "DATETIME"]
for col in time_column_candidates:
if col in df.columns:
return col
time_keywords = ["时间", "日期", "datetime", "date", "年月", "年-月"]
for col in df.columns:
if any(keyword in str(col) for keyword in time_keywords):
return col
return None
# -------------------------- 加载气候因子数据 --------------------------
def load_climate_from_single_file(file_path, time_col=None, climate_cols=None):
"""加载气候因子数据并进行预处理"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"气候因子文件不存在:{file_path}")
# 读取文件
if file_path.endswith(".xlsx"):
df = pd.read_excel(file_path)
elif file_path.endswith(".csv"):
df = pd.read_csv(file_path, encoding="utf-8")
else:
raise ValueError(f"文件格式不支持:{file_path}")
# 自动识别时间列
if time_col is None:
time_col = auto_detect_time_column(df)
if time_col is None:
raise ValueError(f"未自动识别到时间列!文件包含列:{df.columns.tolist()}")
print(f"自动识别到时间列:{time_col}")
# 时间列处理
df[time_col] = pd.to_datetime(df[time_col], errors="coerce")
df = df.dropna(subset=[time_col])
df.set_index(time_col, inplace=True)
# 选择气候因子列
if climate_cols is None:
climate_cols = df.select_dtypes(include=[np.number]).columns.tolist()
else:
missing_cols = [col for col in climate_cols if col not in df.columns]
if missing_cols:
raise ValueError(f"指定的气候因子列不存在:{missing_cols}")
# 数据预处理
df_climate = df[climate_cols].copy()
df_climate = df_climate.loc['1981-01':'2022-12']
df_climate = df_climate.interpolate(method="linear").ffill().bfill()
# 确保完整的时间范围
date_range = pd.date_range(start='1981-01-01', end='2022-12-31', freq='MS')
df_climate = df_climate.reindex(date_range).interpolate(method="linear")
print(f"气候因子数据处理完成:{df_climate.shape},包含{len(climate_cols)}个因子")
return df_climate, climate_cols
# -------------------------- 气候指数特征扩展 --------------------------
def add_climate_features(df_original, df_climate, climate_indices, lag_months=[1, 3, 6], window_size=12):
"""添加气候因子的滞后特征和滑动窗口特征"""
df = df_original.copy()
df = df.join(df_climate, how="inner")
# 添加滞后特征
for idx in climate_indices:
for lag in lag_months:
df[f"{idx}_lag{lag}"] = df[idx].shift(lag)
# 添加滑动窗口特征
for idx in climate_indices:
df[f"{idx}_win_mean"] = df[idx].rolling(window=window_size).mean()
df[f"{idx}_win_std"] = df[idx].rolling(window=window_size).std()
df[f"{idx}_win_max"] = df[idx].rolling(window=window_size).max()
df[f"{idx}_win_min"] = df[idx].rolling(window=window_size).min()
def rolling_trend(series):
"""计算滚动趋势"""
if len(series.dropna()) < 2:
return 0.0
x = np.arange(len(series)).reshape(-1, 1)
model = LinearRegression()
model.fit(x, series.values)
return model.coef_[0]
df[f"{idx}_win_trend"] = df[idx].rolling(window=window_size).apply(rolling_trend)
df = df.dropna()
climate_feature_cols = [col for col in df.columns if any(idx in col for idx in climate_indices)]
print(f"添加气候特征完成:新增{len(climate_feature_cols)}个特征,总形状{df.shape}")
return df, climate_feature_cols
# -------------------------- 滑动窗口构造监督学习样本 --------------------------
def calculate_window_stats(window_data):
"""计算滑动窗口的统计特征"""
stats = []
stats.append(np.mean(window_data))
stats.append(np.std(window_data))
stats.append(np.max(window_data))
stats.append(np.min(window_data))
stats.append(np.median(window_data))
stats.append(np.ptp(window_data)) # 极差
stats.append(np.percentile(window_data, 25))
stats.append(np.percentile(window_data, 75))
# 趋势特征
if len(window_data) >= 2:
x = np.arange(len(window_data)).reshape(-1, 1)
model = LinearRegression()
model.fit(x, window_data)
stats.append(model.coef_[0])
else:
stats.append(0.0)
stats.append(calculate_skewness(window_data))
stats.append(calculate_kurtosis(window_data))
return np.array(stats)
def sliding_window_with_climate(data, window_size=48, forecast_horizon=12, climate_feature_cols=None):
"""构造滑动窗口样本"""
target_data = data["target"].values
y_data = data["target"].values
y_stage1_data = data["y_stage1"].values
month_sin_data = data["month_sin"].values
month_cos_data = data["month_cos"].values
climate_data = data[climate_feature_cols].values
n_samples = len(data) - window_size - forecast_horizon + 1
if n_samples <= 0:
raise ValueError(f"有效样本数为0!请调整窗口大小或预测步长")
# 特征维度计算
n_hist_features = window_size
n_stats_features = 11
n_seasonal_features = 2
n_climate_features = len(climate_feature_cols)
n_total_features = n_hist_features + n_stats_features + n_seasonal_features + n_climate_features
# 初始化数组
X = np.zeros((n_samples, n_total_features))
y = np.zeros((n_samples, forecast_horizon))
y_stage1 = np.zeros((n_samples, forecast_horizon), dtype=int)
X_times = []
# 构造样本
for i in range(n_samples):
# 历史特征
hist_features = target_data[i:i + window_size]
window_target = target_data[i:i + window_size]
stats_features = calculate_window_stats(window_target)
# 季节特征
target_month_idx = i + window_size
seasonal_features = np.array([month_sin_data[target_month_idx], month_cos_data[target_month_idx]])
# 气候特征
climate_features = climate_data[target_month_idx]
# 合并所有特征
X[i] = np.concatenate([hist_features, stats_features, seasonal_features, climate_features])
y[i] = y_data[i + window_size: i + window_size + forecast_horizon]
y_stage1[i] = y_stage1_data[i + window_size: i + window_size + forecast_horizon]
X_times.append(data.index[i + window_size - 1])
return X, y, y_stage1, pd.DatetimeIndex(X_times)
# -------------------------- GRU模型架构 --------------------------
def build_gru_model(time_steps, n_hist_features, n_static_features, forecast_horizon=12):
"""构建GRU模型"""
# 时序特征输入分支
seq_input = Input(shape=(time_steps, n_hist_features), name="sequence_input")
# GRU结构
gru1 = GRU(64, return_sequences=True, dropout=0.2)(seq_input)
gru2 = GRU(32, dropout=0.2)(gru1)
# 静态特征输入分支
static_input = Input(shape=(n_static_features,), name="static_input")
dense_static = Dense(32, activation="relu")(static_input)
dense_static = Dropout(0.2)(dense_static)
# 特征融合
merged = Concatenate()([gru2, dense_static])
# 回归层
dense1 = Dense(64, activation="relu")(merged)
dense1 = Dropout(0.3)(dense1)
dense2 = Dense(32, activation="relu")(dense1)
# 输出层
regression_output = Dense(forecast_horizon, name="regression_output")(dense2)
# 构建模型
model = Model(
inputs=[seq_input, static_input],
outputs=regression_output
)
# 编译模型
model.compile(
optimizer=Adam(learning_rate=0.001),
loss="mse",
metrics=["mae", "mse"]
)
return model
# -------------------------- 数据重采样 --------------------------
def improved_resample_data(X_train, y_train, y_stage1_train, strategy='oversample'):
"""改进的数据重采样方法"""
# 合并数据
combined_data = np.column_stack([X_train, y_train, y_stage1_train])
# 基于第一个月的标签分离类别
minority_mask = (y_stage1_train[:, 0] == 1)
majority_data = combined_data[~minority_mask]
minority_data = combined_data[minority_mask]
print(f"重采样前 - 多数类样本: {len(majority_data)}, 少数类样本: {len(minority_data)}")
if len(minority_data) > 0:
if strategy == 'oversample':
# 过采样少数类
minority_oversampled = resample(
minority_data,
replace=True,
n_samples=len(majority_data),
random_state=42
)
resampled_data = np.vstack([majority_data, minority_oversampled])
print(f"过采样后 - 少数类样本: {len(minority_oversampled)}")
elif strategy == 'undersample':
# 欠采样多数类
majority_undersampled = resample(
majority_data,
replace=False,
n_samples=len(minority_data),
random_state=42
)
resampled_data = np.vstack([majority_undersampled, minority_data])
print(f"欠采样后 - 多数类样本: {len(majority_undersampled)}")
else:
raise ValueError("策略必须是 'oversample' 或 'undersample'")
else:
print("警告:少数类样本数为0,跳过重采样")
resampled_data = combined_data
# 打乱数据
np.random.shuffle(resampled_data)
# 分离特征和目标
n_features = X_train.shape[1]
n_targets = y_train.shape[1]
X_resampled = resampled_data[:, :n_features]
y_resampled = resampled_data[:, n_features:n_features + n_targets]
y_stage1_resampled = resampled_data[:, n_features + n_targets:]
print(f"重采样后总样本数: {len(X_resampled)}")
return X_resampled, y_resampled, y_stage1_resampled.astype(int)
# -------------------------- 分析预测结果 --------------------------
def analyze_prediction_issues(y_true, y_pred, y_true_class, y_pred_class):
"""详细分析预测问题"""
print("\n" + "=" * 60)
print("预测问题详细分析")
print("=" * 60)
# 基本统计
print(f"真实值统计:")
print(f" 最小值: {y_true.min():.4f}, 最大值: {y_true.max():.4f}")
print(f" 均值: {y_true.mean():.4f}, 标准差: {y_true.std():.4f}")
print(f" 中位数: {np.median(y_true):.4f}")
print(f"\n预测值统计:")
print(f" 最小值: {y_pred.min():.4f}, 最大值: {y_pred.max():.4f}")
print(f" 均值: {y_pred.mean():.4f}, 标准差: {y_pred.std():.4f}")
print(f" 中位数: {np.median(y_pred):.4f}")
# 类别分析
unique_true, counts_true = np.unique(y_true_class, return_counts=True)
unique_pred, counts_pred = np.unique(y_pred_class, return_counts=True)
print(f"\n类别分布:")
print(f" 真实 - 0: {counts_true[0]}, 1: {counts_true[1] if len(counts_true) > 1 else 0}")
print(f" 预测 - 0: {counts_pred[0]}, 1: {counts_pred[1] if len(counts_pred) > 1 else 0}")
# R²分析
y_mean = np.mean(y_true)
mse_mean = np.mean((y_true - y_mean) ** 2)
mse_model = mean_squared_error(y_true, y_pred)
print(f"\nR²分析:")
print(f" 均值预测的MSE: {mse_mean:.4f}")
print(f" 模型预测的MSE: {mse_model:.4f}")
print(f" 模型比均值预测{'好' if mse_model < mse_mean else '差'}")
# -------------------------- 预测2023年数据 --------------------------
def predict_2023_data(model, df_combined, climate_feature_cols, scaler_X, scaler_y,
window_size=48, forecast_horizon=12):
"""预测2023年的旱涝急转数据"""
print("\n" + "=" * 60)
print("开始预测2023年数据")
print("=" * 60)
# 获取最后window_size个月的数据作为预测起点
last_data = df_combined.iloc[-window_size:].copy()
# 准备预测输入
target_data = last_data["target"].values
month_sin_data = last_data["month_sin"].values
month_cos_data = last_data["month_cos"].values
climate_data = last_data[climate_feature_cols].values
# 计算统计特征
stats_features = calculate_window_stats(target_data)
# 季节特征(使用最后一个月的季节特征)
seasonal_features = np.array([month_sin_data[-1], month_cos_data[-1]])
# 气候特征(使用最后一个月的)
climate_features = climate_data[-1]
# 合并所有特征
X_pred = np.concatenate([target_data, stats_features, seasonal_features, climate_features]).reshape(1, -1)
# 归一化
X_pred_scaled = scaler_X.transform(X_pred)
# 分离时序特征和静态特征
time_steps = window_size
n_hist_features = 1
n_static_features = X_pred_scaled.shape[1] - time_steps
pred_hist = X_pred_scaled[:, :time_steps].reshape(-1, time_steps, n_hist_features)
pred_static = X_pred_scaled[:, time_steps:]
# 预测
y_pred_scaled = model.predict([pred_hist, pred_static], verbose=0)
y_pred_2023 = scaler_y.inverse_transform(y_pred_scaled)
# 生成二分类预测(阈值=0)
y_pred_class_2023 = (y_pred_2023 > 0).astype(int)
# 创建结果DataFrame
dates_2023 = pd.date_range(start='2023-01-01', periods=forecast_horizon, freq='MS')
results_2023 = pd.DataFrame({
'date': dates_2023,
'predicted_value': y_pred_2023[0],
'predicted_class': y_pred_class_2023[0]
})
print("2023年预测结果:")
print(results_2023)
# 统计预测结果
n_events = np.sum(y_pred_class_2023[0])
print(f"\n2023年预测统计:")
print(f" 预测发生旱涝急转的月份数: {n_events}")
if n_events > 0:
event_months = dates_2023[y_pred_class_2023[0] == 1]
print(f" 预测发生旱涝急转的月份: {[m.strftime('%Y-%m') for m in event_months]}")
return results_2023
# -------------------------- 输出详细结果 --------------------------
def output_detailed_results(y_test, y_pred_original, y_stage1_test, y_pred_class, X_times_test, forecast_horizon):
"""输出详细的预测结果"""
print("\n" + "=" * 60)
print("详细预测结果输出")
print("=" * 60)
# 创建详细的预测结果DataFrame
detailed_results = []
for i in range(len(X_times_test)):
for month in range(forecast_horizon):
result = {
'预测时间点': X_times_test[i],
'预测月份': month + 1,
'真实值': y_test[i, month],
'预测值': y_pred_original[i, month],
'真实类别': '发生' if y_stage1_test[i, month] == 1 else '未发生',
'预测类别': '发生' if y_pred_class[i, month] == 1 else '未发生',
'预测正确': '是' if y_stage1_test[i, month] == y_pred_class[i, month] else '否',
'绝对误差': abs(y_test[i, month] - y_pred_original[i, month])
}
detailed_results.append(result)
detailed_df = pd.DataFrame(detailed_results)
# 输出前20条结果
print("\n前20条预测结果:")
print(detailed_df.head(20).to_string(index=False))
# 按月统计结果
print("\n按月统计预测结果:")
monthly_stats = []
for month in range(forecast_horizon):
month_data = detailed_df[detailed_df['预测月份'] == month + 1]
month_stats = {
'月份': month + 1,
'样本数量': len(month_data),
'平均绝对误差': month_data['绝对误差'].mean(),
'预测准确率': (month_data['预测正确'] == '是').mean(),
'发生次数(真实)': (month_data['真实类别'] == '发生').sum(),
'发生次数(预测)': (month_data['预测类别'] == '发生').sum()
}
monthly_stats.append(month_stats)
monthly_df = pd.DataFrame(monthly_stats)
print(monthly_df.to_string(index=False))
return detailed_df, monthly_df
# -------------------------- 主程序 --------------------------
def main():
print("=" * 60)
print("旱涝急转预测模型 - GRU版本")
print("=" * 60)
# -------------------------- 步骤1:读取旱涝急转数据 --------------------------
print("\n1. 读取旱涝急转数据...")
df_drought_flood = pd.read_excel(
r"E:\pythonProject\预测20251119\merged_data.xlsx",
sheet_name=2,
usecols=[0, 1]
)
# 数据预处理
df_drought_flood.rename(columns={
df_drought_flood.columns[0]: "time_str",
df_drought_flood.columns[1]: "target"
}, inplace=True)
def extract_date_from_gridcode(grid_code):
"""从网格代码中提取日期"""
if isinstance(grid_code, str):
match = re.search(r'(\d{6})', grid_code)
if match:
date_str = match.group(1)
return pd.to_datetime(date_str, format='%Y%m')
return None
df_drought_flood["time"] = df_drought_flood["time_str"].apply(extract_date_from_gridcode)
df_drought_flood = df_drought_flood.dropna(subset=["time", "target"])
df_drought_flood.set_index("time", inplace=True)
df_drought_flood = df_drought_flood.sort_index()
df_drought_flood = df_drought_flood[["target"]]
df_drought_flood = df_drought_flood.loc['1981-01':'2022-12']
print(f"旱涝急转数据:{df_drought_flood.shape}")
# -------------------------- 步骤2:加载气候因子 --------------------------
print("\n2. 加载气候因子数据...")
climate_file_path = r"E:\pythonProject\预测20251119\qixiang_data.xlsx"
df_climate, climate_indices = load_climate_from_single_file(
file_path=climate_file_path,
time_col=None,
climate_cols=None
)
# -------------------------- 步骤3:合并气候因子特征 --------------------------
print("\n3. 合并气候因子特征...")
df_combined, climate_feature_cols = add_climate_features(
df_original=df_drought_flood,
df_climate=df_climate,
climate_indices=climate_indices,
lag_months=[1, 3, 6],
window_size=12
)
# -------------------------- 步骤4:添加二分类标签和季节性特征 --------------------------
print("\n4. 添加二分类标签和季节性特征...")
df_combined["y_stage1"] = (df_combined["target"] != 0).astype(int)
df_combined["month"] = df_combined.index.month
df_combined["month_sin"] = np.sin(2 * np.pi * df_combined["month"] / 12)
df_combined["month_cos"] = np.cos(2 * np.pi * df_combined["month"] / 12)
df_combined.drop("month", axis=1, inplace=True)
class_distribution = df_combined["y_stage1"].value_counts()
print(f"二分类标签分布:0={class_distribution[0]},1={class_distribution[1]}")
# -------------------------- 步骤5:滑动窗口构造样本 --------------------------
print("\n5. 构造滑动窗口样本...")
window_size = 48
forecast_horizon = 12
X, y, y_stage1, X_times = sliding_window_with_climate(
data=df_combined,
window_size=window_size,
forecast_horizon=forecast_horizon,
climate_feature_cols=climate_feature_cols
)
print(f"滑动窗口样本:X={X.shape},y={y.shape},y_stage1={y_stage1.shape}")
# -------------------------- 步骤6:数据划分与归一化 --------------------------
print("\n6. 数据划分与归一化...")
train_ratio = 0.7
n_train = int(len(X) * train_ratio)
# 划分训练测试集
X_train, X_test = X[:n_train], X[n_train:]
y_train, y_test = y[:n_train], y[n_train:]
y_stage1_train, y_stage1_test = y_stage1[:n_train], y_stage1[n_train:]
X_times_train, X_times_test = X_times[:n_train], X_times[n_train:]
# 特征归一化
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)
print(f"训练集:X={X_train_scaled.shape},y={y_train_scaled.shape}")
print(f"测试集:X={X_test_scaled.shape},y={y_test_scaled.shape}")
# -------------------------- 步骤7:处理类别不平衡 --------------------------
print("\n7. 处理类别不平衡...")
X_train_resampled, y_train_resampled, y_stage1_train_resampled = improved_resample_data(
X_train_scaled, y_train_scaled, y_stage1_train, strategy='oversample'
)
# -------------------------- 步骤8:模型训练 --------------------------
print("\n8. 模型训练...")
# 数据格式转换
time_steps = window_size
n_hist_features = 1
n_static_features = X_train_resampled.shape[1] - time_steps
train_hist = X_train_resampled[:, :time_steps].reshape(-1, time_steps, n_hist_features)
test_hist = X_test_scaled[:, :time_steps].reshape(-1, time_steps, n_hist_features)
train_static = X_train_resampled[:, time_steps:]
test_static = X_test_scaled[:, time_steps:]
# 构建GRU模型
model = build_gru_model(time_steps, n_hist_features, n_static_features, forecast_horizon)
# 训练回调
early_stopping = EarlyStopping(monitor="val_loss", patience=20, restore_best_weights=True,
mode='min', verbose=1)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, min_lr=1e-6,
mode='min', verbose=1)
print("开始训练模型...")
history = model.fit(
x=[train_hist, train_static],
y=y_train_resampled,
epochs=150,
batch_size=32,
validation_split=0.2,
callbacks=[early_stopping, reduce_lr],
verbose=1
)
print("进行预测...")
y_pred_scaled = model.predict([test_hist, test_static], verbose=0)
y_pred_original = scaler_y.inverse_transform(y_pred_scaled)
y_pred_class = (y_pred_original > 0).astype(int)
# -------------------------- 步骤9:模型评估和结果输出 --------------------------
print("\n9. 模型评估和结果输出...")
# 回归评估
reg_metrics = []
for month in range(forecast_horizon):
mse = mean_squared_error(y_test[:, month], y_pred_original[:, month])
mae = mean_absolute_error(y_test[:, month], y_pred_original[:, month])
r2 = r2_score(y_test[:, month], y_pred_original[:, month])
reg_metrics.append({'month': month + 1, 'mse': mse, 'mae': mae, 'r2': r2})
# 分类评估
clf_metrics = []
for month in range(forecast_horizon):
acc = accuracy_score(y_stage1_test[:, month], y_pred_class[:, month])
f1 = f1_score(y_stage1_test[:, month], y_pred_class[:, month], zero_division=0)
precision = precision_score(y_stage1_test[:, month], y_pred_class[:, month], zero_division=0)
recall = recall_score(y_stage1_test[:, month], y_pred_class[:, month], zero_division=0)
clf_metrics.append({'month': month + 1, 'acc': acc, 'f1': f1, 'precision': precision, 'recall': recall})
# 计算平均指标
avg_reg_mse = np.mean([m['mse'] for m in reg_metrics])
avg_reg_mae = np.mean([m['mae'] for m in reg_metrics])
avg_reg_r2 = np.mean([m['r2'] for m in reg_metrics])
avg_clf_acc = np.mean([m['acc'] for m in clf_metrics])
avg_clf_f1 = np.mean([m['f1'] for m in clf_metrics])
avg_clf_precision = np.mean([m['precision'] for m in clf_metrics])
avg_clf_recall = np.mean([m['recall'] for m in clf_metrics])
# 输出详细结果
detailed_df, monthly_df = output_detailed_results(y_test, y_pred_original, y_stage1_test, y_pred_class,
X_times_test, forecast_horizon)
# 详细分析
analyze_prediction_issues(y_test[:, 0], y_pred_original[:, 0], y_stage1_test[:, 0], y_pred_class[:, 0])
# -------------------------- 步骤10:预测2023年数据 --------------------------
print("\n10. 预测2023年旱涝急转数据...")
results_2023 = predict_2023_data(
model=model,
df_combined=df_combined,
climate_feature_cols=climate_feature_cols,
scaler_X=scaler_X,
scaler_y=scaler_y,
window_size=window_size,
forecast_horizon=forecast_horizon
)
# -------------------------- 步骤11:保存所有结果 --------------------------
print("\n11. 保存所有结果...")
# 保存评估结果
pd.DataFrame(reg_metrics).to_excel("regression_metrics_gru.xlsx", index=False)
pd.DataFrame(clf_metrics).to_excel("classification_metrics_gru.xlsx", index=False)
# 保存详细预测结果
detailed_df.to_excel("detailed_prediction_results_gru.xlsx", index=False)
monthly_df.to_excel("monthly_statistics_gru.xlsx", index=False)
# 保存2023年预测结果
results_2023.to_excel("2023_prediction_results_gru.xlsx", index=False)
# 保存训练历史
history_df = pd.DataFrame(history.history)
history_df.to_excel("training_history_gru.xlsx", index=False)
print("\n" + "=" * 60)
print("模型训练和评估完成!")
print("=" * 60)
# 最终结果汇总
print("\n最终结果汇总:")
print(f"回归任务:")
print(f" 平均MSE: {avg_reg_mse:.4f}")
print(f" 平均MAE: {avg_reg_mae:.4f}")
print(f" 平均R²: {avg_reg_r2:.4f}")
print(f"\n分类任务:")
print(f" 平均准确率: {avg_clf_acc:.4f}")
print(f" 平均F1分数: {avg_clf_f1:.4f}")
print(f" 平均精确率: {avg_clf_precision:.4f}")
print(f" 平均召回率: {avg_clf_recall:.4f}")
print(f"\n结果文件已保存:")
print(f" - 回归指标: regression_metrics_gru.xlsx")
print(f" - 分类指标: classification_metrics_gru.xlsx")
print(f" - 详细预测结果: detailed_prediction_results_gru.xlsx")
print(f" - 月度统计: monthly_statistics_gru.xlsx")
print(f" - 2023年预测: 2023_prediction_results_gru.xlsx")
print(f" - 训练历史: training_history_gru.xlsx")
print(f" - GRU模型: gru_model.h5")
# 保存模型
model.save("gru_model.h5")
print("GRU模型保存完成!")
if __name__ == "__main__":
main()改哪些代码