import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow as tf
import os
from datetime import timedelta
# 设置随机种子确保结果可复现
tf.random.set_seed(42)
np.random.seed(42)
# 1. 数据加载与预处理函数
# --------------------------------------------------
def load_and_preprocess_data():
"""加载并预处理所有数据源"""
# 加载EC气象数据
ec_df = pd.read_csv('阿拉山口风电场_EC_data.csv', parse_dates=['生成日期', '预测日期'])
ec_df = ec_df[ec_df['场站名'] == '阿拉山口风电场']
# 计算EC风速和风向
ec_df['EC风速(m/s)'] = np.sqrt(ec_df['U风分量(m/s)']**2 + ec_df['V风分量(m/s)']**2)
ec_df['EC风向(度)'] = np.degrees(np.arctan2(ec_df['V风分量(m/s)'], ec_df['U风分量(m/s)'])) % 360
# 添加EC数据可用时间(生成时间+12小时)
ec_df['可用时间'] = ec_df['生成日期'] + timedelta(hours=12)
# 选择关键特征
ec_features = [
'可用时间', '预测日期', 'EC风速(m/s)', 'EC风向(度)',
'位势高度_850hPa(gpm)', '温度_850hPa(K)', '相对湿度_850hPa(%)',
'位势高度_500hPa(gpm)', '温度_500hPa(K)'
]
ec_df = ec_df[ec_features]
# 加载风机数据
turbine_df = pd.read_csv('阿拉山口风电场风机数据.csv', encoding='utf-8', parse_dates=[0])
turbine_df.columns = ['timestamp', 'wind_speed', 'active_power']
# 加载远动数据
scada_df = pd.read_csv('阿拉山口风电场远动数据.csv', encoding='utf-8', parse_dates=[0])
scada_df.columns = ['timestamp', 'active_power_total']
# 合并风机和远动数据
power_df = pd.merge(turbine_df[['timestamp', 'wind_speed']],
scada_df,
on='timestamp',
how='outer')
# 按时间排序并填充缺失值
power_df.sort_values('timestamp', inplace=True)
power_df['active_power_total'].ffill(inplace=True)
power_df['wind_speed'].ffill(inplace=True)
# 创建完整的时间序列索引(15分钟间隔)
full_range = pd.date_range(
start=power_df['timestamp'].min(),
end=power_df['timestamp'].max(),
freq='15T'
)
power_df = power_df.set_index('timestamp').reindex(full_range).reset_index()
power_df.rename(columns={'index': 'timestamp'}, inplace=True)
power_df[['wind_speed', 'active_power_total']] = power_df[['wind_speed', 'active_power_total']].ffill()
# 合并EC数据到主数据集
ec_data = []
for idx, row in power_df.iterrows():
ts = row['timestamp']
# 获取可用的EC预测(可用时间 <= 当前时间)
available_ec = ec_df[ec_df['可用时间'] <= ts]
if not available_ec.empty:
# 获取最近发布的EC数据
latest_gen = available_ec['可用时间'].max()
latest_ec = available_ec[available_ec['可用时间'] == latest_gen]
# 找到最接近当前时间点的预测
time_diff = (latest_ec['预测日期'] - ts).abs()
closest_idx = time_diff.idxmin()
ec_point = latest_ec.loc[closest_idx].copy()
ec_point['timestamp'] = ts
ec_data.append(ec_point)
# 创建EC数据DataFrame并合并
ec_ts_df = pd.DataFrame(ec_data)
merged_df = pd.merge(power_df, ec_ts_df, on='timestamp', how='left')
# 填充缺失的EC数据
ec_cols = [col for col in ec_ts_df.columns if col not in ['timestamp', '可用时间', '预测日期']]
for col in ec_cols:
merged_df[col] = merged_df[col].interpolate(method='time')
# 添加时间特征
merged_df['hour'] = merged_df['timestamp'].dt.hour
merged_df['day_of_week'] = merged_df['timestamp'].dt.dayofweek
merged_df['day_of_year'] = merged_df['timestamp'].dt.dayofyear
merged_df['month'] = merged_df['timestamp'].dt.month
# 计算实际风向(如果有测风塔数据,这里使用EC风向)
merged_df['风向(度)'] = merged_df['EC风向(度)']
return merged_df
# 2. 数据准备函数
# --------------------------------------------------
def prepare_dataset(df, look_back, forecast_steps, target_col='active_power_total'):
"""
准备LSTM训练数据集
:param df: 包含所有特征的DataFrame
:param look_back: 回溯时间步长
:param forecast_steps: 预测步长
:param target_col: 目标列名
:return: 标准化后的特征和目标数据集
"""
# 选择特征列
feature_cols = [
'wind_speed', 'active_power_total', 'EC风速(m/s)', '风向(度)',
'位势高度_850hPa(gpm)', '温度_850hPa(K)', '相对湿度_850hPa(%)',
'位势高度_500hPa(gpm)', '温度_500hPa(K)',
'hour', 'day_of_week', 'day_of_year', 'month'
]
# 确保目标列在特征中(用于自回归)
if target_col not in feature_cols:
feature_cols.append(target_col)
# 提取特征和目标
features = df[feature_cols].values
target = df[target_col].values
# 创建时间序列样本
X, y = [], []
for i in range(len(features) - look_back - forecast_steps):
X.append(features[i:i+look_back])
y.append(target[i+look_back:i+look_back+forecast_steps])
X = np.array(X)
y = np.array(y)
return X, y
# 3. LSTM模型构建
# --------------------------------------------------
def create_lstm_model(input_shape, output_steps):
"""
创建LSTM模型
:param input_shape: 输入形状 (time_steps, features)
:param output_steps: 输出时间步长
:return: 编译好的Keras模型
"""
model = Sequential([
LSTM(256, return_sequences=True, input_shape=input_shape),
Dropout(0.3),
LSTM(128, return_sequences=True),
Dropout(0.2),
LSTM(64, return_sequences=False),
Dropout(0.2),
Dense(64, activation='relu'),
Dense(32, activation='relu'),
Dense(output_steps)
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
return model
# 4. 训练和评估函数
# --------------------------------------------------
def train_and_evaluate_model(X_train, y_train, X_test, y_test, look_back, forecast_steps, model_name):
"""
训练和评估LSTM模型
:return: 训练好的模型和评估结果
"""
# 数据标准化
X_scaler = StandardScaler()
y_scaler = StandardScaler()
# 重塑数据用于标准化
X_train_reshaped = X_train.reshape(-1, X_train.shape[2])
X_test_reshaped = X_test.reshape(-1, X_test.shape[2])
# 拟合和转换特征
X_train_scaled = X_scaler.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_scaled = X_scaler.transform(X_test_reshaped).reshape(X_test.shape)
# 拟合和转换目标
y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1)).reshape(y_train.shape)
y_test_scaled = y_scaler.transform(y_test.reshape(-1, 1)).reshape(y_test.shape)
# 创建模型
model = create_lstm_model(
(look_back, X_train.shape[2]),
forecast_steps
)
# 回调函数
callbacks = [
EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
]
# 训练模型
history = model.fit(
X_train_scaled, y_train_scaled,
epochs=100,
batch_size=64,
validation_split=0.2,
callbacks=callbacks,
verbose=1
)
# 评估模型
y_pred_scaled = model.predict(X_test_scaled)
y_pred = y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_test.shape)
# 计算性能指标
mae = mean_absolute_error(y_test.flatten(), y_pred.flatten())
rmse = np.sqrt(mean_squared_error(y_test.flatten(), y_pred.flatten()))
print(f"{model_name} 模型评估结果:")
print(f"MAE: {mae:.2f} kW")
print(f"RMSE: {rmse:.2f} kW")
# 保存模型
model.save(f'{model_name}_wind_power_model.h5')
return model, y_pred, history, mae, rmse
# 5. 可视化函数
# --------------------------------------------------
def plot_results(y_true, y_pred, model_name, forecast_steps, mae):
"""
可视化预测结果
:param y_true: 实际值
:param y_pred: 预测值
:param model_name: 模型名称
:param forecast_steps: 预测步长
:param mae: 平均绝对误差
"""
# 选择一段代表性的时间序列展示
start_idx = 500
end_idx = start_idx + 200
plt.figure(figsize=(15, 7))
# 绘制实际值
plt.plot(y_true.flatten()[start_idx:end_idx],
label='实际功率',
color='blue',
alpha=0.7,
linewidth=2)
# 绘制预测值
plt.plot(y_pred.flatten()[start_idx:end_idx],
label='预测功率',
color='red',
alpha=0.7,
linestyle='--',
linewidth=2)
plt.title(f'{model_name}风功率预测 (预测步长: {forecast_steps}步, MAE: {mae:.2f} kW)', fontsize=14)
plt.xlabel('时间步', fontsize=12)
plt.ylabel('有功功率 (kW)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig(f'{model_name}_prediction_plot.png', dpi=300)
plt.show()
def plot_training_history(history, model_name):
"""绘制训练过程中的损失曲线"""
plt.figure(figsize=(12, 6))
# 绘制训练损失
plt.plot(history.history['loss'], label='训练损失')
# 绘制验证损失
if 'val_loss' in history.history:
plt.plot(history.history['val_loss'], label='验证损失')
plt.title(f'{model_name} 训练过程', fontsize=14)
plt.xlabel('训练轮次', fontsize=12)
plt.ylabel('损失 (MSE)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig(f'{model_name}_training_history.png', dpi=300)
plt.show()
# 6. 主函数
# --------------------------------------------------
def main():
print("开始数据加载与预处理...")
df = load_and_preprocess_data()
# 定义预测配置
ULTRA_SHORT_CONFIG = {
'name': '超短期',
'look_back': 24, # 6小时历史 (24*15min)
'forecast_steps': 16, # 4小时预测 (16*15min)
'test_size': 0.1 # 最后10%作为测试集
}
SHORT_TERM_CONFIG = {
'name': '短期',
'look_back': 96, # 24小时历史 (96*15min)
'forecast_steps': 288, # 72小时预测 (288*15min)
'test_size': 0.05 # 最后5%作为测试集(长期预测需要更多历史数据)
}
# 准备超短期预测数据集
print("\n准备超短期预测数据集...")
X_ultra, y_ultra = prepare_dataset(
df,
ULTRA_SHORT_CONFIG['look_back'],
ULTRA_SHORT_CONFIG['forecast_steps']
)
# 划分训练集和测试集
split_idx_ultra = int(len(X_ultra) * (1 - ULTRA_SHORT_CONFIG['test_size']))
X_train_ultra, X_test_ultra = X_ultra[:split_idx_ultra], X_ultra[split_idx_ultra:]
y_train_ultra, y_test_ultra = y_ultra[:split_idx_ultra], y_ultra[split_idx_ultra:]
# 准备短期预测数据集
print("\n准备短期预测数据集...")
X_short, y_short = prepare_dataset(
df,
SHORT_TERM_CONFIG['look_back'],
SHORT_TERM_CONFIG['forecast_steps']
)
# 划分训练集和测试集
split_idx_short = int(len(X_short) * (1 - SHORT_TERM_CONFIG['test_size']))
X_train_short, X_test_short = X_short[:split_idx_short], X_short[split_idx_short:]
y_train_short, y_test_short = y_short[:split_idx_short], y_short[split_idx_short:]
print("\n数据集统计信息:")
print(f"超短期数据集: 总样本={len(X_ultra)}, 训练集={len(X_train_ultra)}, 测试集={len(X_test_ultra)}")
print(f"短期数据集: 总样本={len(X_short)}, 训练集={len(X_train_short)}, 测试集={len(X_test_short)}")
# 训练和评估超短期模型
print("\n训练超短期预测模型...")
ultra_model, ultra_pred, ultra_history, ultra_mae, ultra_rmse = train_and_evaluate_model(
X_train_ultra, y_train_ultra,
X_test_ultra, y_test_ultra,
ULTRA_SHORT_CONFIG['look_back'],
ULTRA_SHORT_CONFIG['forecast_steps'],
'ultra_short_term'
)
# 可视化超短期结果
plot_results(y_test_ultra, ultra_pred,
'超短期', ULTRA_SHORT_CONFIG['forecast_steps'],
ultra_mae)
plot_training_history(ultra_history, '超短期模型')
# 训练和评估短期模型
print("\n训练短期预测模型...")
short_model, short_pred, short_history, short_mae, short_rmse = train_and_evaluate_model(
X_train_short, y_train_short,
X_test_short, y_test_short,
SHORT_TERM_CONFIG['look_back'],
SHORT_TERM_CONFIG['forecast_steps'],
'short_term'
)
# 可视化短期结果
plot_results(y_test_short, short_pred,
'短期', SHORT_TERM_CONFIG['forecast_steps'],
short_mae)
plot_training_history(short_history, '短期模型')
# 最终报告
print("\n" + "="*50)
print("风功率预测模型训练完成!")
print("="*50)
print(f"超短期模型 (4小时预测):")
print(f" - 回溯步长: {ULTRA_SHORT_CONFIG['look_back']} (6小时)")
print(f" - 预测步长: {ULTRA_SHORT_CONFIG['forecast_steps']} (4小时)")
print(f" - 测试集MAE: {ultra_mae:.2f} kW")
print(f" - 测试集RMSE: {ultra_rmse:.2f} kW")
print(f"\n短期模型 (72小时预测):")
print(f" - 回溯步长: {SHORT_TERM_CONFIG['look_back']} (24小时)")
print(f" - 预测步长: {SHORT_TERM_CONFIG['forecast_steps']} (72小时)")
print(f" - 测试集MAE: {short_mae:.2f} kW")
print(f" - 测试集RMSE: {short_rmse:.2f} kW")
print("="*50)
# 保存预测结果
results_df = pd.DataFrame({
'timestamp': df['timestamp'].iloc[split_idx_short + SHORT_TERM_CONFIG['look_back']:split_idx_short + SHORT_TERM_CONFIG['look_back'] + len(y_test_short)],
'实际功率': y_test_short.flatten(),
'超短期预测': ultra_pred.flatten()[:len(y_test_short)],
'短期预测': short_pred.flatten()
})
results_df.to_csv('风功率预测结果.csv', index=False)
print("预测结果已保存到 '风功率预测结果.csv'")
if __name__ == "__main__":
main() 分析上述代码,并按以下要求重新生成完整代码。 1、数据文件夹路径“G:\桌面映射文件夹\运行调控中心\两个细则\功率预测算法大赛\数据” 2、数据中每个电场只有 ec数据和风电场风机数据是全的,其它两个数据,有的场有,有的场没有,所以EC和风电场数据是必须拿来训练的,其它的根据需要。 3、模型不用tensorflow,用pytorch。
最新发布