<think>我们之前已经给出了一个比较完整的代码框架,但是用户反馈说没有生成完整的预测部分。现在,我们专注于预测部分的重写,特别是基于8月数据文件进行预测。下面的代码将重点放在预测流程上,并简化了特征工程,避免滞后特征问题。
我们将创建一个名为`SeepagePredictor`的类,它封装了数据加载、特征工程、模型训练(可选)和预测的功能。预测部分将专门针对8月数据文件。
下面是一个完整的、可执行的版本,其中预测部分被重构:
```python
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
import os
import joblib
warnings.filterwarnings('ignore')
# 导入机器学习库
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
# 导入高级模型
import xgboost as xgb
import lightgbm as lgb
np.random.seed(42)
# 配置参数
AUGUST_DATA_PATH = r'D:\zhongyuan_DM_pycharm\python\渗透量处理\8月数据.xlsx'
HISTORICAL_DATA_PATH = r'D:\zhongyuan_DM_pycharm\python\渗透量处理\大坝数据.xlsx'
OUTPUT_PATH = r'2025年8月渗漏量预测结果.xlsx'
MODEL_SAVE_PATH = 'trained_models.joblib'
class SeepagePredictor:
def __init__(self):
self.models = {}
self.feature_names = None
self.scaler = StandardScaler()
def load_data(self, path, is_historical=True):
"""加载数据并预处理"""
print(f"加载数据: {path}")
if not os.path.exists(path):
raise FileNotFoundError(f"文件不存在: {path}")
df = pd.read_excel(path, engine='openpyxl')
# 列名规范化
column_map = {
'时间': 'datetime',
'渗漏量': 'seepage',
'降雨量': 'rainfall',
'温度': 'temperature',
'水位': 'water_level'
}
df.rename(columns=column_map, inplace=True)
# 确保有datetime列
if 'datetime' not in df.columns:
raise ValueError("数据必须包含'datetime'列")
# 转换时间列
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
df = df.sort_values('datetime').reset_index(drop=True)
# 数值列处理
num_cols = ['rainfall', 'temperature', 'water_level']
if is_historical:
num_cols.append('seepage')
for col in num_cols:
if col in df.columns:
# 转换数据类型
df[col] = pd.to_numeric(df[col], errors='coerce')
# 处理负值
if col in ['water_level', 'seepage']:
df[col] = df[col].apply(lambda x: x if x > 0 else np.nan)
else:
df[col] = df[col].apply(lambda x: x if x >= 0 else np.nan)
# 插值
df[col] = df[col].interpolate(method='linear', limit_direction='both')
df[col] = df[col].fillna(method='ffill').fillna(method='bfill')
else:
if is_historical:
raise ValueError(f"历史数据缺少列: {col}")
else:
# 预测数据中如果缺少,使用默认值
df[col] = 0.0
if col == 'water_level':
df[col] = 100.0 # 默认水位
return df
def create_features(self, df):
"""创建特征"""
print("创建特征...")
# 时间特征
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour
df['dayofyear'] = df['datetime'].dt.dayofyear
df['weekofyear'] = df['datetime'].dt.isocalendar().week.astype(int)
# 周期性特征
df['hour_sin'] = np.sin(df['hour'] * (2 * np.pi / 24))
df['hour_cos'] = np.cos(df['hour'] * (2 * np.pi / 24))
df['dayofyear_sin'] = np.sin(df['dayofyear'] * (2 * np.pi / 365))
df['dayofyear_cos'] = np.cos(df['dayofyear'] * (2 * np.pi / 365))
# 滞后特征(仅使用water_level的滞后,因为其他变量在预测时没有未来值)
lags = [1, 2, 3, 6, 12, 24]
for lag in lags:
df[f'water_level_lag_{lag}'] = df['water_level'].shift(lag)
# 滚动特征
windows = [3, 6, 12, 24]
for window in windows:
df[f'water_level_roll_mean_{window}'] = df['water_level'].rolling(window=window, min_periods=1).mean()
df[f'water_level_roll_std_{window}'] = df['water_level'].rolling(window=window, min_periods=1).std()
if 'rainfall' in df.columns:
df[f'rainfall_roll_mean_{window}'] = df['rainfall'].rolling(window=window, min_periods=1).mean()
if 'temperature' in df.columns:
df[f'temperature_roll_mean_{window}'] = df['temperature'].rolling(window=window, min_periods=1).mean()
# 交互特征
df['water_level_rainfall'] = df['water_level'] * df['rainfall']
df['water_level_temperature'] = df['water_level'] * df['temperature']
df['is_rainy_season'] = df['month'].isin([5, 6, 7, 8, 9]).astype(int)
# 填充滞后特征产生的空值
for col in df.columns:
if df[col].isnull().any():
df[col].fillna(df[col].mean(), inplace=True)
# 记录特征名(训练时)
if 'seepage' in df.columns:
self.feature_names = [col for col in df.columns if col not in ['datetime', 'seepage']]
print(f"特征数量: {len(self.feature_names)}")
return df
def train(self, historical_path):
"""训练模型"""
print("开始训练模型...")
# 加载历史数据
df = self.load_data(historical_path, is_historical=True)
df = self.create_features(df)
# 划分训练集和测试集(最后一个月作为测试集)
split_date = df['datetime'].max() - pd.Timedelta(days=30)
train_df = df[df['datetime'] <= split_date]
test_df = df[df['datetime'] > split_date]
X_train = train_df[self.feature_names]
y_train = train_df['seepage']
X_test = test_df[self.feature_names]
y_test = test_df['seepage']
# 定义模型
models = {
'LinearRegression': LinearRegression(),
'Ridge': Ridge(alpha=0.5),
'RandomForest': RandomForestRegressor(n_estimators=100, max_depth=8, random_state=42),
'XGBoost': xgb.XGBRegressor(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42),
'LightGBM': lgb.LGBMRegressor(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42)
}
# 训练模型
for name, model in models.items():
print(f"训练 {name}...")
pipeline = Pipeline([
('scaler', StandardScaler()),
('model', model)
])
pipeline.fit(X_train, y_train)
self.models[name] = pipeline
# 评估
y_pred = pipeline.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print(f"{name} 测试集 RMSE: {rmse:.4f}, MAE: {mae:.4f}")
# 保存模型
joblib.dump(self.models, MODEL_SAVE_PATH)
print(f"模型已保存到 {MODEL_SAVE_PATH}")
def predict_for_august(self, august_data_path):
"""预测8月渗漏量"""
# 如果模型未训练,尝试加载
if not self.models:
if os.path.exists(MODEL_SAVE_PATH):
self.models = joblib.load(MODEL_SAVE_PATH)
print(f"加载已训练模型: {list(self.models.keys())}")
else:
raise FileNotFoundError("未找到训练好的模型,请先训练模型")
# 加载8月数据
august_df = self.load_data(august_data_path, is_historical=False)
# 为了构建滞后特征,需要连接历史数据
historical_df = self.load_data(HISTORICAL_DATA_PATH, is_historical=True)
# 取历史数据的最后24小时
last_24h = historical_df.tail(24)
# 将最后24小时与8月数据连接
combined_df = pd.concat([last_24h, august_df], ignore_index=True)
# 创建特征
combined_df = self.create_features(combined_df)
# 提取8月部分
august_start = august_df['datetime'].min()
august_end = august_df['datetime'].max()
august_mask = (combined_df['datetime'] >= august_start) & (combined_df['datetime'] <= august_end)
X_august = combined_df.loc[august_mask, self.feature_names]
# 使用集成模型进行预测(加权平均)
predictions = {}
weights = {
'LinearRegression': 0.1,
'Ridge': 0.1,
'RandomForest': 0.25,
'XGBoost': 0.25,
'LightGBM': 0.3
}
ensemble_pred = np.zeros(len(X_august))
for name, model in self.models.items():
if name in weights:
pred = model.predict(X_august)
ensemble_pred += weights[name] * pred
predictions[name] = pred
# 创建结果DataFrame
result_df = pd.DataFrame({
'datetime': combined_df.loc[august_mask, 'datetime'],
'predicted_seepage': ensemble_pred
})
# 保存结果
result_df.to_excel(OUTPUT_PATH, index=False)
print(f"预测结果已保存到 {OUTPUT_PATH}")
# 打印统计信息
print("\n预测结果统计:")
print(f"预测时间范围: {result_df['datetime'].min()} 至 {result_df['datetime'].max()}")
print(f"平均渗漏量: {result_df['predicted_seepage'].mean():.4f} L/s")
print(f"最小渗漏量: {result_df['predicted_seepage'].min():.4f} L/s")
print(f"最大渗漏量: {result_df['predicted_seepage'].max():.4f} L/s")
return result_df
if __name__ == "__main__":
predictor = SeepagePredictor()
# 如果模型已经训练过,可以跳过训练
if not os.path.exists(MODEL_SAVE_PATH):
predictor.train(HISTORICAL_DATA_PATH)
else:
print("检测到已训练模型,跳过训练...")
# 预测8月数据
predictor.predict_for_august(AUGUST_DATA_PATH)
```
### 代码说明
1. **类`SeepagePredictor`**:封装了整个预测流程。
- `load_data`方法:加载数据,进行基本预处理(列名规范化、时间转换、数值处理)。
- `create_features`方法:创建时间特征、滞后特征、滚动特征和交互特征。注意:滞后特征仅使用`water_level`,因为其他变量在预测时没有未来值。
- `train`方法:训练多个模型(线性回归、岭回归、随机森林、XGBoost、LightGBM),并保存模型。
- `predict_for_august`方法:加载8月数据,结合历史数据构建特征,然后使用加权平均集成模型进行预测。
2. **预测流程**:
- 加载8月数据。
- 取历史数据的最后24小时(用于构建滞后特征)。
- 将历史数据的最后24小时与8月数据连接。
- 在连接后的数据上创建特征(这样8月数据的前几个小时就可以利用历史数据构建滞后特征)。
- 提取8月数据对应的特征部分进行预测。
3. **模型集成**:对各个模型的预测结果进行加权平均,权重分配为:
- 线性模型(LinearRegression和Ridge)各0.1
- 随机森林0.25
- XGBoost 0.25
- LightGBM 0.3
4. **输出**:保存预测结果到Excel文件,并打印统计信息。
### 注意事项
1. 在运行预测之前,需要确保历史数据文件(`大坝数据.xlsx`)和8月数据文件(`8月数据.xlsx`)的路径正确。
2. 如果模型已经训练过,会跳过训练直接加载模型(模型保存在`trained_models.joblib`文件中)。
3. 如果8月数据文件中包含的历史数据不足24小时,则在构建滞后特征时可能会使用历史数据的最后部分来填充。
这个版本简化了特征工程,专注于预测部分,并确保了整个流程的连贯性。
§§相关问题§§:
1. 如何调整模型权重以获得更好的预测结果?
2. 如果8月数据文件中的列名与历史数据文件不一致,应该如何处理?
3. 如何增加更多特征来提高预测的准确性?
4. 能否使用深度学习模型(如LSTM)来预测时间序列的渗漏量?
</think>我理解您需要的完整代码示例没有被完全生成。下面是完整的、重构后的大坝渗漏量预测模型代码,专注于基于8月数据文件的预测部分:
```python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
大坝渗漏量预测模型(完整预测版)
日期: 2025-10-01
主要优化:
1. 完整实现了基于8月数据的预测流程
2. 重构特征工程,避免滞后特征问题
3. 增强数据预处理和异常处理
"""
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
import os
import joblib
warnings.filterwarnings('ignore')
# 导入机器学习库
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
# 导入高级模型
import xgboost as xgb
import lightgbm as lgb
np.random.seed(42)
# ==================== 配置参数 ====================
# 文件路径配置
AUGUST_DATA_PATH = r'D:\zhongyuan_DM_pycharm\python\渗透量处理\8月数据.xlsx' # 8月数据文件路径
HISTORICAL_DATA_PATH = r'D:\zhongyuan_DM_pycharm\python\渗透量处理\大坝数据.xlsx' # 历史数据文件路径
OUTPUT_PATH = r'2025年8月渗漏量预测结果.xlsx' # 预测结果输出路径
MODEL_SAVE_PATH = 'trained_models.joblib' # 模型保存路径
# =================================================
class SeepagePredictor:
"""大坝渗漏量预测器"""
def __init__(self):
self.models = {}
self.feature_names = []
self.scaler = StandardScaler()
def load_and_preprocess(self, file_path, is_historical=True):
"""加载和预处理数据"""
print(f"加载{'历史' if is_historical else '8月'}数据: {file_path}")
# 检查文件是否存在
if not os.path.exists(file_path):
raise FileNotFoundError(f"数据文件不存在: {file_path}")
# 读取Excel文件
try:
df = pd.read_excel(file_path, engine='openpyxl')
print(f"原始数据形状: {df.shape}, 列名: {df.columns.tolist()}")
except Exception as e:
raise ValueError(f"读取文件失败: {str(e)}")
# 规范化列名
rename_map = {}
if '时间' in df.columns:
rename_map['时间'] = 'datetime'
if '渗漏量' in df.columns:
rename_map['渗漏量'] = 'seepage'
if '降雨量' in df.columns:
rename_map['降雨量'] = 'rainfall'
if '温度' in df.columns:
rename_map['温度'] = 'temperature'
if '水位' in df.columns:
rename_map['水位'] = 'water_level'
if rename_map:
df = df.rename(columns=rename_map)
# 确保所有必需列存在
required_cols = ['datetime', 'rainfall', 'temperature', 'water_level']
if is_historical:
required_cols.append('seepage')
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
print(f"警告: 缺少列 {missing_cols},尝试补充默认值")
for col in missing_cols:
if col == 'seepage':
df[col] = np.nan if is_historical else 0.0
else:
df[col] = 100.0 if col == 'water_level' else 0.0
# 数据类型转换
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
# 数值处理
numeric_cols = ['rainfall', 'temperature', 'water_level']
if is_historical:
numeric_cols.append('seepage')
for col in numeric_cols:
# 处理异常值和缺失值
df[col] = pd.to_numeric(df[col], errors='coerce')
if col in ['water_level', 'seepage']:
df.loc[df[col] <= 0, col] = np.nan
else:
df.loc[df[col] < 0, col] = np.nan
# 插值填充
df[col] = df[col].interpolate(method='linear', limit_direction='both')
df[col] = df[col].fillna(method='ffill').fillna(method='bfill')
# 打印统计信息
stats = df[col].agg(['min', 'mean', 'max'])
print(f"{col}: 最小={stats['min']:.2f}, 平均={stats['mean']:.2f}, 最大={stats['max']:.2f}")
# 时间范围验证
if not df['datetime'].is_monotonic_increasing:
df = df.sort_values('datetime')
print(f"数据加载完成: {len(df)}条记录 ({df['datetime'].min()} 至 {df['datetime'].max()})")
return df
def create_time_features(self, df):
"""创建时间特征"""
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour
df['dayofyear'] = df['datetime'].dt.dayofyear
df['weekofyear'] = df['datetime'].dt.isocalendar().week.astype(int)
# 周期性特征
df['hour_sin'] = np.sin(df['hour'] * (2 * np.pi / 24))
df['hour_cos'] = np.cos(df['hour'] * (2 * np.pi / 24))
df['dayofyear_sin'] = np.sin(df['dayofyear'] * (2 * np.pi / 365))
df['dayofyear_cos'] = np.cos(df['dayofyear'] * (2 * np.pi / 365))
return df
def create_lag_features(self, df, col, lags=[1, 2, 3, 6, 12, 24]):
"""创建滞后特征"""
for lag in lags:
df[f'{col}_lag{lag}'] = df[col].shift(lag)
return df
def create_rolling_features(self, df, col, windows=[3, 6, 12, 24]):
"""创建滚动特征"""
for window in windows:
df[f'{col}_roll_mean{window}'] = df[col].rolling(window, min_periods=1).mean()
df[f'{col}_roll_std{window}'] = df[col].rolling(window, min_periods=1).std()
return df
def create_interaction_features(self, df):
"""创建交互特征"""
df['water_rain_interact'] = df['water_level'] * df['rainfall']
df['water_temp_interact'] = df['water_level'] * df['temperature']
df['rain_temp_interact'] = df['rainfall'] * df['temperature']
df['is_rainy'] = df['month'].between(5, 9).astype(int)
return df
def build_features(self, df, is_train=True):
"""构建完整的特征集"""
print(f"构建{'训练' if is_train else '预测'}特征...")
# 基础特征
df = self.create_time_features(df)
# 滞后特征
for col in ['water_level', 'rainfall', 'temperature']:
df = self.create_lag_features(df, col)
# 滚动特征
for col in ['water_level', 'rainfall', 'temperature']:
df = self.create_rolling_features(df, col)
# 交互特征
df = self.create_interaction_features(df)
# 填充缺失值
numeric_cols = df.select_dtypes(include=np.number).columns
for col in numeric_cols:
if df[col].isnull().any():
df[col].fillna(df[col].mean(), inplace=True)
# 保存特征名用于预测
if is_train:
self.feature_names = [col for col in df.columns if col not in ['datetime', 'seepage']]
print(f"特征数量: {len(self.feature_names)}")
# 只保留需要的特征
if self.feature_names and not is_train:
missing_cols = set(self.feature_names) - set(df.columns)
if missing_cols:
print(f"补充缺失特征: {missing_cols}")
for col in missing_cols:
df[col] = 0.0
df = df[['datetime'] + self.feature_names]
return df
def train_models(self, X_train, y_train):
"""训练多个模型"""
print("\n训练模型...")
models = {
'LinearRegression': LinearRegression(),
'Ridge': Ridge(alpha=0.5, random_state=42),
'RandomForest': RandomForestRegressor(
n_estimators=150, max_depth=8, min_samples_split=15,
random_state=42, n_jobs=-1
),
'XGBoost': xgb.XGBRegressor(
n_estimators=150, max_depth=6, learning_rate=0.08,
subsample=0.8, colsample_bytree=0.8, random_state=42
),
'LightGBM': lgb.LGBMRegressor(
n_estimators=150, max_depth=6, learning_rate=0.07,
subsample=0.8, colsample_bytree=0.8, random_state=42
)
}
# 训练并保存模型
for name, model in models.items():
try:
print(f"训练 {name}...")
pipeline = Pipeline([
('scaler', StandardScaler()),
('model', model)
])
pipeline.fit(X_train, y_train)
self.models[name] = pipeline
print(f"{name} 训练完成")
except Exception as e:
print(f"训练 {name} 失败: {str(e)[:100]}")
if not self.models:
raise RuntimeError("所有模型训练失败,请检查数据")
# 保存模型
joblib.dump(self.models, MODEL_SAVE_PATH)
print(f"模型已保存至 {MODEL_SAVE_PATH}")
return self.models
def evaluate_models(self, X_test, y_test):
"""评估模型性能"""
print("\n评估模型性能...")
results = []
for name, model in self.models.items():
try:
y_pred = model.predict(X_test)
y_pred = np.clip(y_pred, 0, y_test.max() * 1.5)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
results.append({
'Model': name,
'RMSE': rmse,
'MAE': mae,
'R2': r2
})
print(f"{name}: RMSE={rmse:.4f}, MAE={mae:.4f}, R2={r2:.4f}")
except Exception as e:
print(f"评估 {name} 失败: {str(e)[:100]}")
return pd.DataFrame(results)
def predict(self, X):
"""使用集成方法进行预测"""
if not self.models:
raise RuntimeError("请先训练或加载模型")
predictions = {}
weights = {}
total_score = 0
# 第一步:收集各模型的预测结果并计算权重
for name, model in self.models.items():
try:
pred = model.predict(X)
# 为每个模型分配初始权重(基于模型类型)
if 'XGBoost' in name or 'LightGBM' in name:
weights[name] = 0.35 # 给树模型较高权重
else:
weights[name] = 0.15
predictions[name] = pred
total_score += weights[name]
except Exception as e:
print(f"模型 {name} 预测失败: {str(e)[:50]}")
# 归一化权重
ensemble_pred = np.zeros(X.shape[0])
for name, w in weights.items():
normalized_w = w / total_score
ensemble_pred += normalized_w * predictions[name]
# 后处理:确保预测值在合理范围内
min_val = min(X['water_level'].min(), 0)
max_val = max(X['water_level'].max() * 1.5, 100)
ensemble_pred = np.clip(ensemble_pred, min_val, max_val)
return ensemble_pred
def predict_august(self, historical_df, august_df):
"""基于8月数据进行预测"""
print("\n开始8月渗漏量预测...")
# 1. 预处理8月数据
august_df = self.build_features(august_df, is_train=False)
# 2. 合并历史数据用于特征构建
# 获取历史数据最后24小时用于构建滞后特征
historical_last_24h = historical_df.sort_values('datetime').tail(24)
combined_df = pd.concat([historical_last_24h, august_df], ignore_index=True)
# 3. 重建特征(包含历史数据)
combined_df = self.build_features(combined_df, is_train=False)
# 4. 提取8月数据部分
august_start = august_df['datetime'].min()
august_end = august_df['datetime'].max()
august_mask = (combined_df['datetime'] >= august_start) & (combined_df['datetime'] <= august_end)
X_august = combined_df.loc[august_mask, self.feature_names].copy()
# 5. 执行预测
predictions = self.predict(X_august)
# 6. 创建结果DataFrame
result_df = pd.DataFrame({
'datetime': combined_df.loc[august_mask, 'datetime'],
'predicted_seepage_L/s': predictions.round(4),
'water_level': X_august['water_level'],
'rainfall': X_august['rainfall'],
'temperature': X_august['temperature']
})
# 7. 保存结果
result_df.to_excel(OUTPUT_PATH, index=False)
print(f"预测结果已保存至 {OUTPUT_PATH}")
# 8. 打印统计信息
print("\n预测结果统计:")
print(f"时间段: {august_start} 至 {august_end}")
print(f"预测点数: {len(result_df)}")
print(f"平均渗漏量: {result_df['predicted_seepage_L/s'].mean():.2f} L/s")
print(f"最小渗漏量: {result_df['predicted_seepage_L/s'].min():.2f} L/s")
print(f"最大渗漏量: {result_df['predicted_seepage_L/s'].max():.2f} L/s")
return result_df
def run(self, train=True):
"""运行完整流程"""
try:
# 1. 加载历史数据
if train or not os.path.exists(MODEL_SAVE_PATH):
print("="*60)
print("训练阶段")
print("="*60)
historical_df = self.load_and_preprocess(HISTORICAL_DATA_PATH, is_historical=True)
historical_df = self.build_features(historical_df, is_train=True)
# 划分训练集和测试集
split_index = int(len(historical_df) * 0.8)
train_df = historical_df.iloc[:split_index]
test_df = historical_df.iloc[split_index:]
X_train = train_df[self.feature_names]
y_train = train_df['seepage']
X_test = test_df[self.feature_names]
y_test = test_df['seepage']
# 训练模型
self.train_models(X_train, y_train)
# 评估模型
results_df = self.evaluate_models(X_test, y_test)
results_df.to_excel('model_performance.xlsx', index=False)
print("模型评估结果已保存")
else:
# 加载已训练模型
self.models = joblib.load(MODEL_SAVE_PATH)
self.feature_names = list(self.models.values())[0].named_steps['scaler'].feature_names_in_
print(f"已加载预训练模型: {list(self.models.keys())}")
# 2. 加载8月数据并进行预测
print("\n" + "="*60)
print("预测阶段")
print("="*60)
august_df = self.load_and_preprocess(AUGUST_DATA_PATH, is_historical=False)
# 如果未加载历史数据,加载用于特征构建
if 'historical_df' not in locals():
historical_df = self.load_and_preprocess(HISTORICAL_DATA_PATH, is_historical=True)
# 执行预测
self.predict_august(historical_df, august_df)
print("\n" + "="*60)
print("预测流程完成!")
except Exception as e:
print(f"\n❌ 流程出错: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
print("大坝渗漏量预测系统启动")
print(f"历史数据路径: {HISTORICAL_DATA_PATH}")
print(f"8月数据路径: {AUGUST_DATA_PATH}")
print(f"预测结果将保存至: {OUTPUT