LintCode 360: Sliding Window Median (双堆 或 双Set, 难题!)

本文介绍了一种在滑动窗口中寻找中位数的高效算法,通过使用双堆(maxHeap和minHeap)和sliding window来维持窗口内元素的平衡状态,实现O(nlog(n))的时间复杂度。示例代码展示了如何处理重复元素和调整窗口位置。
  1. Sliding Window Median
    Given an array of n integer, and a moving window(size k), move the window at each iteration from the start of the array, find the median of the element inside the window at each moving. (If there are even numbers in the array, return the N/2-th number after sorting the element in the window. )

Example
For array [1,2,7,8,5], moving window size k = 3. return [2,7,7]

At first the window is at the start of the array like this

[ | 1,2,7 | ,8,5] , return the median 2;

then the window move one step forward.

[1, | 2,7,8 | ,5], return the median 7;

then the window move one step forward again.

[1,2, | 7,8,5 | ], return the median 7;

Challenge
O(nlog(n)) time

思路:
用双堆maxHeap或minHeap,分别装k/2左右的元素。同时维护一个sliding window,保证两个heap的总元素个数是K个。median就是minHeap的最后一个元素。

注意:

  1. JAVA的priorityqueue有erase(),但C++的priority_queue不提供erase()这个函数。所以只能用set。而有可能存在重复元素,故用multiset。
  2. 删除元素的时候要用erase(iterator)。因为虽然erase()也可以用元素值做参数,但有重复元素的话就把所有的重复元素都删掉了。
  3. set.end()是最后一个元素还往后一个,所以要得到set()的最后一个元素必须用set.rbegin(),好像也可以用–set.end()。但erase()的参数必须是值或iterator,所以不能用set.rbegin(),因为是reverse iterator。

代码如下:

class Solution {
public:
    /**
     * @param nums: A list of integers
     * @param k: An integer
     * @return: The median of the element inside the window at each moving
     */
    vector<int> medianSlidingWindow(vector<int> &nums, int k) {
            
        int len = nums.size();
        if ((len < k) || (k == 0)) return vector<int>();
        vector<int> result;
        multiset<int> smallSet, largetSet;
        
        for (int i = 0; i < len; ++i) {
            
            //make sure sum of smallSet.size() and largetSet.size() not exceeds k 
            if (i >= k) {
                //remove nums[i - k], sliding window 
                if (smallSet.find(nums[i - k]) != smallSet.end()) smallSet.erase(smallSet.find(nums[i - k]));
                else largetSet.erase(largetSet.find(nums[i - k]));
            }
            
            if (smallSet.size() <= largetSet.size()) { //try to insert into smallSet first
                if (largetSet.empty() || nums[i] <= *largetSet.begin()) {
                    smallSet.insert(nums[i]);
                } else {
                    smallSet.insert(*largetSet.begin());
                    largetSet.erase(largetSet.begin());
                    largetSet.insert(nums[i]);
                }
            } else { //try to insert into largetSet first
                //use .rbegin() to point to the last element, not .end() !!!   
                if (smallSet.empty() || nums[i] >= *smallSet.rbegin()) {
                    largetSet.insert(nums[i]);
                } else {
                    largetSet.insert(*smallSet.rbegin());
                    smallSet.erase(--smallSet.end());
                    smallSet.insert(nums[i]);
                }
            }
            
            if (i >= k - 1) {
                //int num = (k & 0x1) ? *smallSet.rbegin() : (*smallSet.rbegin() + *largetSet.begin()) / 2;
                result.push_back(*smallSet.rbegin());
            }
            
        }        
        return result;
    }
};
import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LinearRegression from sklearn.metrics import (mean_squared_error, mean_absolute_error, r2_score, accuracy_score, f1_score, precision_score, recall_score) from sklearn.utils import resample import re import os import tensorflow as tf from keras.models import Model from keras.layers import Input, GRU, Dense, Dropout, Concatenate from keras.optimizers import Adam from keras.callbacks import EarlyStopping, ReduceLROnPlateau #成功但r2不理想 # -------------------------- 自定义偏度和峰度计算函数 -------------------------- def calculate_skewness(data): """计算偏度""" mean = np.mean(data) std = np.std(data, ddof=0) if std == 0: return 0.0 n = len(data) skewness = (n / ((n - 1) * (n - 2))) * np.sum(((data - mean) / std) ** 3) return skewness def calculate_kurtosis(data): """计算峰度""" mean = np.mean(data) std = np.std(data, ddof=0) if std == 0: return 0.0 n = len(data) kurtosis = (n * (n + 1) / ((n - 1) * (n - 2) * (n - 3))) * np.sum(((data - mean) / std) ** 4) - ( 3 * (n - 1) ** 2) / ((n - 2) * (n - 3)) return kurtosis # -------------------------- 自动识别时间列 -------------------------- def auto_detect_time_column(df): """自动识别数据框中的时间列""" time_column_candidates = ["time", "Time", "TIME", "日期", "日期时间", "datetime", "Datetime", "DATETIME"] for col in time_column_candidates: if col in df.columns: return col time_keywords = ["时间", "日期", "datetime", "date", "年月", "年-月"] for col in df.columns: if any(keyword in str(col) for keyword in time_keywords): return col return None # -------------------------- 加载气候因子数据 -------------------------- def load_climate_from_single_file(file_path, time_col=None, climate_cols=None): """加载气候因子数据并进行预处理""" if not os.path.exists(file_path): raise FileNotFoundError(f"气候因子文件不存在:{file_path}") # 读取文件 if file_path.endswith(".xlsx"): df = pd.read_excel(file_path) elif file_path.endswith(".csv"): df = pd.read_csv(file_path, encoding="utf-8") else: raise ValueError(f"文件格式不支持:{file_path}") # 自动识别时间列 if time_col is None: time_col = auto_detect_time_column(df) if time_col is None: raise ValueError(f"未自动识别到时间列!文件包含列:{df.columns.tolist()}") print(f"自动识别到时间列:{time_col}") # 时间列处理 df[time_col] = pd.to_datetime(df[time_col], errors="coerce") df = df.dropna(subset=[time_col]) df.set_index(time_col, inplace=True) # 选择气候因子列 if climate_cols is None: climate_cols = df.select_dtypes(include=[np.number]).columns.tolist() else: missing_cols = [col for col in climate_cols if col not in df.columns] if missing_cols: raise ValueError(f"指定的气候因子列不存在:{missing_cols}") # 数据预处理 df_climate = df[climate_cols].copy() df_climate = df_climate.loc['1981-01':'2022-12'] df_climate = df_climate.interpolate(method="linear").ffill().bfill() # 确保完整的时间范围 date_range = pd.date_range(start='1981-01-01', end='2022-12-31', freq='MS') df_climate = df_climate.reindex(date_range).interpolate(method="linear") print(f"气候因子数据处理完成:{df_climate.shape},包含{len(climate_cols)}个因子") return df_climate, climate_cols # -------------------------- 气候指数特征扩展 -------------------------- def add_climate_features(df_original, df_climate, climate_indices, lag_months=[1, 3, 6], window_size=12): """添加气候因子的滞后特征和滑动窗口特征""" df = df_original.copy() df = df.join(df_climate, how="inner") # 添加滞后特征 for idx in climate_indices: for lag in lag_months: df[f"{idx}_lag{lag}"] = df[idx].shift(lag) # 添加滑动窗口特征 for idx in climate_indices: df[f"{idx}_win_mean"] = df[idx].rolling(window=window_size).mean() df[f"{idx}_win_std"] = df[idx].rolling(window=window_size).std() df[f"{idx}_win_max"] = df[idx].rolling(window=window_size).max() df[f"{idx}_win_min"] = df[idx].rolling(window=window_size).min() def rolling_trend(series): """计算滚动趋势""" if len(series.dropna()) < 2: return 0.0 x = np.arange(len(series)).reshape(-1, 1) model = LinearRegression() model.fit(x, series.values) return model.coef_[0] df[f"{idx}_win_trend"] = df[idx].rolling(window=window_size).apply(rolling_trend) df = df.dropna() climate_feature_cols = [col for col in df.columns if any(idx in col for idx in climate_indices)] print(f"添加气候特征完成:新增{len(climate_feature_cols)}个特征,总形状{df.shape}") return df, climate_feature_cols # -------------------------- 滑动窗口构造监督学习样本 -------------------------- def calculate_window_stats(window_data): """计算滑动窗口的统计特征""" stats = [] stats.append(np.mean(window_data)) stats.append(np.std(window_data)) stats.append(np.max(window_data)) stats.append(np.min(window_data)) stats.append(np.median(window_data)) stats.append(np.ptp(window_data)) # 极差 stats.append(np.percentile(window_data, 25)) stats.append(np.percentile(window_data, 75)) # 趋势特征 if len(window_data) >= 2: x = np.arange(len(window_data)).reshape(-1, 1) model = LinearRegression() model.fit(x, window_data) stats.append(model.coef_[0]) else: stats.append(0.0) stats.append(calculate_skewness(window_data)) stats.append(calculate_kurtosis(window_data)) return np.array(stats) def sliding_window_with_climate(data, window_size=48, forecast_horizon=12, climate_feature_cols=None): """构造滑动窗口样本""" target_data = data["target"].values y_data = data["target"].values y_stage1_data = data["y_stage1"].values month_sin_data = data["month_sin"].values month_cos_data = data["month_cos"].values climate_data = data[climate_feature_cols].values n_samples = len(data) - window_size - forecast_horizon + 1 if n_samples <= 0: raise ValueError(f"有效样本数为0!请调整窗口大小预测步长") # 特征维度计算 n_hist_features = window_size n_stats_features = 11 n_seasonal_features = 2 n_climate_features = len(climate_feature_cols) n_total_features = n_hist_features + n_stats_features + n_seasonal_features + n_climate_features # 初始化数组 X = np.zeros((n_samples, n_total_features)) y = np.zeros((n_samples, forecast_horizon)) y_stage1 = np.zeros((n_samples, forecast_horizon), dtype=int) X_times = [] # 构造样本 for i in range(n_samples): # 历史特征 hist_features = target_data[i:i + window_size] window_target = target_data[i:i + window_size] stats_features = calculate_window_stats(window_target) # 季节特征 target_month_idx = i + window_size seasonal_features = np.array([month_sin_data[target_month_idx], month_cos_data[target_month_idx]]) # 气候特征 climate_features = climate_data[target_month_idx] # 合并所有特征 X[i] = np.concatenate([hist_features, stats_features, seasonal_features, climate_features]) y[i] = y_data[i + window_size: i + window_size + forecast_horizon] y_stage1[i] = y_stage1_data[i + window_size: i + window_size + forecast_horizon] X_times.append(data.index[i + window_size - 1]) return X, y, y_stage1, pd.DatetimeIndex(X_times) # -------------------------- GRU模型架构 -------------------------- def build_gru_model(time_steps, n_hist_features, n_static_features, forecast_horizon=12): """构建GRU模型""" # 时序特征输入分支 seq_input = Input(shape=(time_steps, n_hist_features), name="sequence_input") # GRU结构 gru1 = GRU(64, return_sequences=True, dropout=0.2)(seq_input) gru2 = GRU(32, dropout=0.2)(gru1) # 静态特征输入分支 static_input = Input(shape=(n_static_features,), name="static_input") dense_static = Dense(32, activation="relu")(static_input) dense_static = Dropout(0.2)(dense_static) # 特征融合 merged = Concatenate()([gru2, dense_static]) # 回归层 dense1 = Dense(64, activation="relu")(merged) dense1 = Dropout(0.3)(dense1) dense2 = Dense(32, activation="relu")(dense1) # 输出层 regression_output = Dense(forecast_horizon, name="regression_output")(dense2) # 构建模型 model = Model( inputs=[seq_input, static_input], outputs=regression_output ) # 编译模型 model.compile( optimizer=Adam(learning_rate=0.001), loss="mse", metrics=["mae", "mse"] ) return model # -------------------------- 数据重采样 -------------------------- def improved_resample_data(X_train, y_train, y_stage1_train, strategy='oversample'): """改进的数据重采样方法""" # 合并数据 combined_data = np.column_stack([X_train, y_train, y_stage1_train]) # 基于第一个月的标签分离类别 minority_mask = (y_stage1_train[:, 0] == 1) majority_data = combined_data[~minority_mask] minority_data = combined_data[minority_mask] print(f"重采样前 - 多数类样本: {len(majority_data)}, 少数类样本: {len(minority_data)}") if len(minority_data) > 0: if strategy == 'oversample': # 过采样少数类 minority_oversampled = resample( minority_data, replace=True, n_samples=len(majority_data), random_state=42 ) resampled_data = np.vstack([majority_data, minority_oversampled]) print(f"过采样后 - 少数类样本: {len(minority_oversampled)}") elif strategy == 'undersample': # 欠采样多数类 majority_undersampled = resample( majority_data, replace=False, n_samples=len(minority_data), random_state=42 ) resampled_data = np.vstack([majority_undersampled, minority_data]) print(f"欠采样后 - 多数类样本: {len(majority_undersampled)}") else: raise ValueError("策略必须是 'oversample' 'undersample'") else: print("警告:少数类样本数为0,跳过重采样") resampled_data = combined_data # 打乱数据 np.random.shuffle(resampled_data) # 分离特征和目标 n_features = X_train.shape[1] n_targets = y_train.shape[1] X_resampled = resampled_data[:, :n_features] y_resampled = resampled_data[:, n_features:n_features + n_targets] y_stage1_resampled = resampled_data[:, n_features + n_targets:] print(f"重采样后总样本数: {len(X_resampled)}") return X_resampled, y_resampled, y_stage1_resampled.astype(int) # -------------------------- 分析预测结果 -------------------------- def analyze_prediction_issues(y_true, y_pred, y_true_class, y_pred_class): """详细分析预测问题""" print("\n" + "=" * 60) print("预测问题详细分析") print("=" * 60) # 基本统计 print(f"真实值统计:") print(f" 最小值: {y_true.min():.4f}, 最大值: {y_true.max():.4f}") print(f" 均值: {y_true.mean():.4f}, 标准差: {y_true.std():.4f}") print(f" 中位数: {np.median(y_true):.4f}") print(f"\n预测值统计:") print(f" 最小值: {y_pred.min():.4f}, 最大值: {y_pred.max():.4f}") print(f" 均值: {y_pred.mean():.4f}, 标准差: {y_pred.std():.4f}") print(f" 中位数: {np.median(y_pred):.4f}") # 类别分析 unique_true, counts_true = np.unique(y_true_class, return_counts=True) unique_pred, counts_pred = np.unique(y_pred_class, return_counts=True) print(f"\n类别分布:") print(f" 真实 - 0: {counts_true[0]}, 1: {counts_true[1] if len(counts_true) > 1 else 0}") print(f" 预测 - 0: {counts_pred[0]}, 1: {counts_pred[1] if len(counts_pred) > 1 else 0}") # R²分析 y_mean = np.mean(y_true) mse_mean = np.mean((y_true - y_mean) ** 2) mse_model = mean_squared_error(y_true, y_pred) print(f"\nR²分析:") print(f" 均值预测的MSE: {mse_mean:.4f}") print(f" 模型预测的MSE: {mse_model:.4f}") print(f" 模型比均值预测{'好' if mse_model < mse_mean else '差'}") # -------------------------- 预测2023年数据 -------------------------- def predict_2023_data(model, df_combined, climate_feature_cols, scaler_X, scaler_y, window_size=48, forecast_horizon=12): """预测2023年的旱涝急转数据""" print("\n" + "=" * 60) print("开始预测2023年数据") print("=" * 60) # 获取最后window_size个月的数据作为预测起点 last_data = df_combined.iloc[-window_size:].copy() # 准备预测输入 target_data = last_data["target"].values month_sin_data = last_data["month_sin"].values month_cos_data = last_data["month_cos"].values climate_data = last_data[climate_feature_cols].values # 计算统计特征 stats_features = calculate_window_stats(target_data) # 季节特征(使用最后一个月的季节特征) seasonal_features = np.array([month_sin_data[-1], month_cos_data[-1]]) # 气候特征(使用最后一个月的) climate_features = climate_data[-1] # 合并所有特征 X_pred = np.concatenate([target_data, stats_features, seasonal_features, climate_features]).reshape(1, -1) # 归一化 X_pred_scaled = scaler_X.transform(X_pred) # 分离时序特征和静态特征 time_steps = window_size n_hist_features = 1 n_static_features = X_pred_scaled.shape[1] - time_steps pred_hist = X_pred_scaled[:, :time_steps].reshape(-1, time_steps, n_hist_features) pred_static = X_pred_scaled[:, time_steps:] # 预测 y_pred_scaled = model.predict([pred_hist, pred_static], verbose=0) y_pred_2023 = scaler_y.inverse_transform(y_pred_scaled) # 生成二分类预测(阈值=0) y_pred_class_2023 = (y_pred_2023 > 0).astype(int) # 创建结果DataFrame dates_2023 = pd.date_range(start='2023-01-01', periods=forecast_horizon, freq='MS') results_2023 = pd.DataFrame({ 'date': dates_2023, 'predicted_value': y_pred_2023[0], 'predicted_class': y_pred_class_2023[0] }) print("2023年预测结果:") print(results_2023) # 统计预测结果 n_events = np.sum(y_pred_class_2023[0]) print(f"\n2023年预测统计:") print(f" 预测发生旱涝急转的月份数: {n_events}") if n_events > 0: event_months = dates_2023[y_pred_class_2023[0] == 1] print(f" 预测发生旱涝急转的月份: {[m.strftime('%Y-%m') for m in event_months]}") return results_2023 # -------------------------- 输出详细结果 -------------------------- def output_detailed_results(y_test, y_pred_original, y_stage1_test, y_pred_class, X_times_test, forecast_horizon): """输出详细的预测结果""" print("\n" + "=" * 60) print("详细预测结果输出") print("=" * 60) # 创建详细的预测结果DataFrame detailed_results = [] for i in range(len(X_times_test)): for month in range(forecast_horizon): result = { '预测时间点': X_times_test[i], '预测月份': month + 1, '真实值': y_test[i, month], '预测值': y_pred_original[i, month], '真实类别': '发生' if y_stage1_test[i, month] == 1 else '未发生', '预测类别': '发生' if y_pred_class[i, month] == 1 else '未发生', '预测正确': '是' if y_stage1_test[i, month] == y_pred_class[i, month] else '否', '绝对误差': abs(y_test[i, month] - y_pred_original[i, month]) } detailed_results.append(result) detailed_df = pd.DataFrame(detailed_results) # 输出前20条结果 print("\n前20条预测结果:") print(detailed_df.head(20).to_string(index=False)) # 按月统计结果 print("\n按月统计预测结果:") monthly_stats = [] for month in range(forecast_horizon): month_data = detailed_df[detailed_df['预测月份'] == month + 1] month_stats = { '月份': month + 1, '样本数量': len(month_data), '平均绝对误差': month_data['绝对误差'].mean(), '预测准确率': (month_data['预测正确'] == '是').mean(), '发生次数(真实)': (month_data['真实类别'] == '发生').sum(), '发生次数(预测)': (month_data['预测类别'] == '发生').sum() } monthly_stats.append(month_stats) monthly_df = pd.DataFrame(monthly_stats) print(monthly_df.to_string(index=False)) return detailed_df, monthly_df # -------------------------- 主程序 -------------------------- def main(): print("=" * 60) print("旱涝急转预测模型 - GRU版本") print("=" * 60) # -------------------------- 步骤1:读取旱涝急转数据 -------------------------- print("\n1. 读取旱涝急转数据...") df_drought_flood = pd.read_excel( r"E:\pythonProject\预测20251119\merged_data.xlsx", sheet_name=2, usecols=[0, 1] ) # 数据预处理 df_drought_flood.rename(columns={ df_drought_flood.columns[0]: "time_str", df_drought_flood.columns[1]: "target" }, inplace=True) def extract_date_from_gridcode(grid_code): """从网格代码中提取日期""" if isinstance(grid_code, str): match = re.search(r'(\d{6})', grid_code) if match: date_str = match.group(1) return pd.to_datetime(date_str, format='%Y%m') return None df_drought_flood["time"] = df_drought_flood["time_str"].apply(extract_date_from_gridcode) df_drought_flood = df_drought_flood.dropna(subset=["time", "target"]) df_drought_flood.set_index("time", inplace=True) df_drought_flood = df_drought_flood.sort_index() df_drought_flood = df_drought_flood[["target"]] df_drought_flood = df_drought_flood.loc['1981-01':'2022-12'] print(f"旱涝急转数据:{df_drought_flood.shape}") # -------------------------- 步骤2:加载气候因子 -------------------------- print("\n2. 加载气候因子数据...") climate_file_path = r"E:\pythonProject\预测20251119\qixiang_data.xlsx" df_climate, climate_indices = load_climate_from_single_file( file_path=climate_file_path, time_col=None, climate_cols=None ) # -------------------------- 步骤3:合并气候因子特征 -------------------------- print("\n3. 合并气候因子特征...") df_combined, climate_feature_cols = add_climate_features( df_original=df_drought_flood, df_climate=df_climate, climate_indices=climate_indices, lag_months=[1, 3, 6], window_size=12 ) # -------------------------- 步骤4:添加二分类标签和季节性特征 -------------------------- print("\n4. 添加二分类标签和季节性特征...") df_combined["y_stage1"] = (df_combined["target"] != 0).astype(int) df_combined["month"] = df_combined.index.month df_combined["month_sin"] = np.sin(2 * np.pi * df_combined["month"] / 12) df_combined["month_cos"] = np.cos(2 * np.pi * df_combined["month"] / 12) df_combined.drop("month", axis=1, inplace=True) class_distribution = df_combined["y_stage1"].value_counts() print(f"二分类标签分布:0={class_distribution[0]},1={class_distribution[1]}") # -------------------------- 步骤5:滑动窗口构造样本 -------------------------- print("\n5. 构造滑动窗口样本...") window_size = 48 forecast_horizon = 12 X, y, y_stage1, X_times = sliding_window_with_climate( data=df_combined, window_size=window_size, forecast_horizon=forecast_horizon, climate_feature_cols=climate_feature_cols ) print(f"滑动窗口样本:X={X.shape},y={y.shape},y_stage1={y_stage1.shape}") # -------------------------- 步骤6:数据划分与归一化 -------------------------- print("\n6. 数据划分与归一化...") train_ratio = 0.7 n_train = int(len(X) * train_ratio) # 划分训练测试集 X_train, X_test = X[:n_train], X[n_train:] y_train, y_test = y[:n_train], y[n_train:] y_stage1_train, y_stage1_test = y_stage1[:n_train], y_stage1[n_train:] X_times_train, X_times_test = X_times[:n_train], X_times[n_train:] # 特征归一化 scaler_X = StandardScaler() X_train_scaled = scaler_X.fit_transform(X_train) X_test_scaled = scaler_X.transform(X_test) scaler_y = StandardScaler() y_train_scaled = scaler_y.fit_transform(y_train) y_test_scaled = scaler_y.transform(y_test) print(f"训练集:X={X_train_scaled.shape},y={y_train_scaled.shape}") print(f"测试集:X={X_test_scaled.shape},y={y_test_scaled.shape}") # -------------------------- 步骤7:处理类别不平衡 -------------------------- print("\n7. 处理类别不平衡...") X_train_resampled, y_train_resampled, y_stage1_train_resampled = improved_resample_data( X_train_scaled, y_train_scaled, y_stage1_train, strategy='oversample' ) # -------------------------- 步骤8:模型训练 -------------------------- print("\n8. 模型训练...") # 数据格式转换 time_steps = window_size n_hist_features = 1 n_static_features = X_train_resampled.shape[1] - time_steps train_hist = X_train_resampled[:, :time_steps].reshape(-1, time_steps, n_hist_features) test_hist = X_test_scaled[:, :time_steps].reshape(-1, time_steps, n_hist_features) train_static = X_train_resampled[:, time_steps:] test_static = X_test_scaled[:, time_steps:] # 构建GRU模型 model = build_gru_model(time_steps, n_hist_features, n_static_features, forecast_horizon) # 训练回调 early_stopping = EarlyStopping(monitor="val_loss", patience=20, restore_best_weights=True, mode='min', verbose=1) reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, min_lr=1e-6, mode='min', verbose=1) print("开始训练模型...") history = model.fit( x=[train_hist, train_static], y=y_train_resampled, epochs=150, batch_size=32, validation_split=0.2, callbacks=[early_stopping, reduce_lr], verbose=1 ) print("进行预测...") y_pred_scaled = model.predict([test_hist, test_static], verbose=0) y_pred_original = scaler_y.inverse_transform(y_pred_scaled) y_pred_class = (y_pred_original > 0).astype(int) # -------------------------- 步骤9:模型评估和结果输出 -------------------------- print("\n9. 模型评估和结果输出...") # 回归评估 reg_metrics = [] for month in range(forecast_horizon): mse = mean_squared_error(y_test[:, month], y_pred_original[:, month]) mae = mean_absolute_error(y_test[:, month], y_pred_original[:, month]) r2 = r2_score(y_test[:, month], y_pred_original[:, month]) reg_metrics.append({'month': month + 1, 'mse': mse, 'mae': mae, 'r2': r2}) # 分类评估 clf_metrics = [] for month in range(forecast_horizon): acc = accuracy_score(y_stage1_test[:, month], y_pred_class[:, month]) f1 = f1_score(y_stage1_test[:, month], y_pred_class[:, month], zero_division=0) precision = precision_score(y_stage1_test[:, month], y_pred_class[:, month], zero_division=0) recall = recall_score(y_stage1_test[:, month], y_pred_class[:, month], zero_division=0) clf_metrics.append({'month': month + 1, 'acc': acc, 'f1': f1, 'precision': precision, 'recall': recall}) # 计算平均指标 avg_reg_mse = np.mean([m['mse'] for m in reg_metrics]) avg_reg_mae = np.mean([m['mae'] for m in reg_metrics]) avg_reg_r2 = np.mean([m['r2'] for m in reg_metrics]) avg_clf_acc = np.mean([m['acc'] for m in clf_metrics]) avg_clf_f1 = np.mean([m['f1'] for m in clf_metrics]) avg_clf_precision = np.mean([m['precision'] for m in clf_metrics]) avg_clf_recall = np.mean([m['recall'] for m in clf_metrics]) # 输出详细结果 detailed_df, monthly_df = output_detailed_results(y_test, y_pred_original, y_stage1_test, y_pred_class, X_times_test, forecast_horizon) # 详细分析 analyze_prediction_issues(y_test[:, 0], y_pred_original[:, 0], y_stage1_test[:, 0], y_pred_class[:, 0]) # -------------------------- 步骤10:预测2023年数据 -------------------------- print("\n10. 预测2023年旱涝急转数据...") results_2023 = predict_2023_data( model=model, df_combined=df_combined, climate_feature_cols=climate_feature_cols, scaler_X=scaler_X, scaler_y=scaler_y, window_size=window_size, forecast_horizon=forecast_horizon ) # -------------------------- 步骤11:保存所有结果 -------------------------- print("\n11. 保存所有结果...") # 保存评估结果 pd.DataFrame(reg_metrics).to_excel("regression_metrics_gru.xlsx", index=False) pd.DataFrame(clf_metrics).to_excel("classification_metrics_gru.xlsx", index=False) # 保存详细预测结果 detailed_df.to_excel("detailed_prediction_results_gru.xlsx", index=False) monthly_df.to_excel("monthly_statistics_gru.xlsx", index=False) # 保存2023年预测结果 results_2023.to_excel("2023_prediction_results_gru.xlsx", index=False) # 保存训练历史 history_df = pd.DataFrame(history.history) history_df.to_excel("training_history_gru.xlsx", index=False) print("\n" + "=" * 60) print("模型训练和评估完成!") print("=" * 60) # 最终结果汇总 print("\n最终结果汇总:") print(f"回归任务:") print(f" 平均MSE: {avg_reg_mse:.4f}") print(f" 平均MAE: {avg_reg_mae:.4f}") print(f" 平均R²: {avg_reg_r2:.4f}") print(f"\n分类任务:") print(f" 平均准确率: {avg_clf_acc:.4f}") print(f" 平均F1分数: {avg_clf_f1:.4f}") print(f" 平均精确率: {avg_clf_precision:.4f}") print(f" 平均召回率: {avg_clf_recall:.4f}") print(f"\n结果文件已保存:") print(f" - 回归指标: regression_metrics_gru.xlsx") print(f" - 分类指标: classification_metrics_gru.xlsx") print(f" - 详细预测结果: detailed_prediction_results_gru.xlsx") print(f" - 月度统计: monthly_statistics_gru.xlsx") print(f" - 2023年预测: 2023_prediction_results_gru.xlsx") print(f" - 训练历史: training_history_gru.xlsx") print(f" - GRU模型: gru_model.h5") # 保存模型 model.save("gru_model.h5") print("GRU模型保存完成!") if __name__ == "__main__": main()改哪些代码
11-21
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值