import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
# 设置随机种子确保结果可复现
torch.manual_seed(42)
np.random.seed(42)
sns.set_style('whitegrid')
# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# 1. 数据加载与预处理函数
# --------------------------------------------------
def load_and_preprocess_data():
"""加载并预处理所有数据源"""
print("开始数据加载与预处理...")
start_time = time.time()
# 加载EC气象数据
ec_df = pd.read_csv('阿拉山口风电场_EC_data.csv', parse_dates=['生成日期', '预测日期'])
ec_df = ec_df[ec_df['场站名'] == '阿拉山口风电场']
# 计算EC风速和风向
ec_df['EC风速(m/s)'] = np.sqrt(ec_df['U风分量(m/s)']**2 + ec_df['V风分量(m/s)']**2)
ec_df['EC风向(度)'] = np.degrees(np.arctan2(ec_df['V风分量(m/s)'], ec_df['U风分量(m/s)'])) % 360
# 添加EC数据可用时间(生成时间+12小时)
ec_df['可用时间'] = ec_df['生成日期'] + pd.Timedelta(hours=12)
# 选择关键特征
ec_features = [
'可用时间', '预测日期', 'EC风速(m/s)', 'EC风向(度)',
'位势高度_850hPa(gpm)', '温度_850hPa(K)', '相对湿度_850hPa(%)',
'位势高度_500hPa(gpm)', '温度_500hPa(K)'
]
ec_df = ec_df[ec_features]
# 加载风机数据
turbine_df = pd.read_csv('阿拉山口风电场风机数据.csv', encoding='gbk', parse_dates=[0])
turbine_df.columns = ['timestamp', 'wind_speed', 'active_power']
# 加载远动数据
scada_df = pd.read_csv('阿拉山口风电场远动数据.csv', encoding='gbk', parse_dates=[0])
scada_df.columns = ['timestamp', 'active_power_total']
# 合并风机和远动数据
power_df = pd.merge(turbine_df[['timestamp', 'wind_speed']],
scada_df,
on='timestamp',
how='outer')
# 按时间排序并填充缺失值
power_df.sort_values('timestamp', inplace=True)
power_df['active_power_total'].ffill(inplace=True)
power_df['wind_speed'].ffill(inplace=True)
# 创建完整的时间序列索引(15分钟间隔)
full_range = pd.date_range(
start=power_df['timestamp'].min(),
end=power_df['timestamp'].max(),
freq='15T'
)
power_df = power_df.set_index('timestamp').reindex(full_range).reset_index()
power_df.rename(columns={'index': 'timestamp'}, inplace=True)
power_df[['wind_speed', 'active_power_total']] = power_df[['wind_speed', 'active_power_total']].ffill()
# 合并EC数据到主数据集
ec_data = []
for idx, row in power_df.iterrows():
ts = row['timestamp']
# 获取可用的EC预测(可用时间 <= 当前时间)
available_ec = ec_df[ec_df['可用时间'] <= ts]
if not available_ec.empty:
# 获取最近发布的EC数据
latest_gen = available_ec['可用时间'].max()
latest_ec = available_ec[available_ec['可用时间'] == latest_gen]
# 找到最接近当前时间点的预测
time_diff = (latest_ec['预测日期'] - ts).abs()
closest_idx = time_diff.idxmin()
ec_point = latest_ec.loc[closest_idx].copy()
ec_point['timestamp'] = ts
ec_data.append(ec_point)
# 创建EC数据DataFrame并合并
ec_ts_df = pd.DataFrame(ec_data)
merged_df = pd.merge(power_df, ec_ts_df, on='timestamp', how='left')
# 填充缺失的EC数据
ec_cols = [col for col in ec_ts_df.columns if col not in ['timestamp', '可用时间', '预测日期']]
for col in ec_cols:
merged_df[col] = merged_df[col].interpolate(method='time')
# 添加时间特征
merged_df['hour'] = merged_df['timestamp'].dt.hour
merged_df['day_of_week'] = merged_df['timestamp'].dt.dayofweek
merged_df['day_of_year'] = merged_df['timestamp'].dt.dayofyear
merged_df['month'] = merged_df['timestamp'].dt.month
# 计算实际风向(如果有测风塔数据,这里使用EC风向)
merged_df['风向(度)'] = merged_df['EC风向(度)']
# 移除包含NaN的行
merged_df.dropna(inplace=True)
# 特征选择
feature_cols = [
'wind_speed', 'active_power_total', 'EC风速(m/s)', '风向(度)',
'位势高度_850hPa(gpm)', '温度_850hPa(K)', '相对湿度_850hPa(%)',
'位势高度_500hPa(gpm)', '温度_500hPa(K)',
'hour', 'day_of_week', 'day_of_year', 'month'
]
target_col = 'active_power_total'
print(f"数据处理完成! 耗时: {time.time()-start_time:.2f}秒")
print(f"数据集形状: {merged_df.shape}")
print(f"特征数量: {len(feature_cols)}")
return merged_df[feature_cols], merged_df[target_col], merged_df['timestamp']
# 2. 数据准备类 (PyTorch Dataset)
# --------------------------------------------------
class WindPowerDataset(Dataset):
"""风功率预测数据集类"""
def __init__(self, X, y, look_back, forecast_steps):
"""
:param X: 特征数据 (n_samples, n_features)
:param y: 目标数据 (n_samples,)
:param look_back: 回溯时间步长
:param forecast_steps: 预测步长
"""
self.X = X
self.y = y
self.look_back = look_back
self.forecast_steps = forecast_steps
self.n_samples = len(X) - look_back - forecast_steps + 1
def __len__(self):
return self.n_samples
def __getitem__(self, idx):
# 获取历史序列
x_seq = self.X[idx:idx+self.look_back]
# 获取未来目标序列
y_seq = self.y[idx+self.look_back:idx+self.look_back+self.forecast_steps]
# 转换为PyTorch张量
x_tensor = torch.tensor(x_seq, dtype=torch.float32)
y_tensor = torch.tensor(y_seq, dtype=torch.float32)
return x_tensor, y_tensor
# 3. LSTM模型 (PyTorch实现)
# --------------------------------------------------
class WindPowerLSTM(nn.Module):
"""风功率预测LSTM模型"""
def __init__(self, input_size, hidden_size, num_layers, output_steps):
"""
:param input_size: 输入特征维度
:param hidden_size: LSTM隐藏层大小
:param num_layers: LSTM层数
:param output_steps: 输出步长
"""
super(WindPowerLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.output_steps = output_steps
# LSTM层
self.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=0.2 if num_layers > 1 else 0
)
# 全连接层
self.fc = nn.Sequential(
nn.Linear(hidden_size, 128),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(128, 64),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(64, output_steps)
)
def forward(self, x):
# 初始化隐藏状态
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# 前向传播LSTM
out, _ = self.lstm(x, (h0, c0))
# 只取最后一个时间步的输出
out = out[:, -1, :]
# 全连接层
out = self.fc(out)
return out
# 4. 训练和评估函数
# --------------------------------------------------
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs, model_name):
"""训练模型"""
print(f"\n开始训练 {model_name} 模型...")
start_time = time.time()
best_val_loss = float('inf')
history = {'train_loss': [], 'val_loss': []}
for epoch in range(epochs):
# 训练阶段
model.train()
train_loss = 0.0
for inputs, targets in train_loader:
inputs, targets = inputs.to(device), targets.to(device)
# 前向传播
outputs = model(inputs)
loss = criterion(outputs, targets)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item() * inputs.size(0)
# 验证阶段
model.eval()
val_loss = 0.0
with torch.no_grad():
for inputs, targets in val_loader:
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs)
loss = criterion(outputs, targets)
val_loss += loss.item() * inputs.size(0)
# 计算平均损失
train_loss = train_loss / len(train_loader.dataset)
val_loss = val_loss / len(val_loader.dataset)
history['train_loss'].append(train_loss)
history['val_loss'].append(val_loss)
# 更新学习率
if scheduler:
scheduler.step(val_loss)
# 保存最佳模型
if val_loss < best_val_loss:
best_val_loss = val_loss
torch.save(model.state_dict(), f'best_{model_name}_model.pth')
# 打印进度
if (epoch + 1) % 5 == 0:
print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")
print(f"训练完成! 耗时: {time.time()-start_time:.2f}秒")
print(f"最佳验证损失: {best_val_loss:.6f}")
return history
def evaluate_model(model, test_loader, scaler, model_name):
"""评估模型性能"""
model.eval()
actuals = []
predictions = []
with torch.no_grad():
for inputs, targets in test_loader:
inputs = inputs.to(device)
# 预测
outputs = model(inputs)
# 反归一化
outputs_np = outputs.cpu().numpy()
targets_np = targets.numpy()
# 反归一化
outputs_inv = scaler.inverse_transform(outputs_np)
targets_inv = scaler.inverse_transform(targets_np.reshape(-1, 1)).flatten()
# 收集结果
actuals.extend(targets_inv)
predictions.extend(outputs_inv.flatten())
# 转换为numpy数组
actuals = np.array(actuals)
predictions = np.array(predictions)
# 计算性能指标
mae = mean_absolute_error(actuals, predictions)
rmse = np.sqrt(mean_squared_error(actuals, predictions))
print(f"\n{model_name} 模型评估结果:")
print(f"MAE: {mae:.2f} kW")
print(f"RMSE: {rmse:.2f} kW")
return actuals, predictions, mae, rmse
# 5. 可视化函数
# --------------------------------------------------
def plot_predictions(actuals, predictions, timestamps, model_name, forecast_steps, mae):
"""可视化预测结果"""
# 创建结果DataFrame
results = pd.DataFrame({
'timestamp': timestamps,
'actual': actuals,
'predicted': predictions
})
# 设置时间索引
results.set_index('timestamp', inplace=True)
# 选择一段代表性的时间序列展示
sample = results.iloc[1000:1300]
plt.figure(figsize=(15, 7))
# 绘制实际值
plt.plot(sample.index, sample['actual'],
label='实际功率',
color='blue',
alpha=0.7,
linewidth=2)
# 绘制预测值
plt.plot(sample.index, sample['predicted'],
label='预测功率',
color='red',
alpha=0.7,
linestyle='--',
linewidth=2)
plt.title(f'{model_name}风功率预测 (预测步长: {forecast_steps}步, MAE: {mae:.2f} kW)', fontsize=14)
plt.xlabel('时间', fontsize=12)
plt.ylabel('有功功率 (kW)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f'{model_name}_prediction_plot.png', dpi=300)
plt.show()
return results
def plot_training_history(history, model_name):
"""绘制训练过程中的损失曲线"""
plt.figure(figsize=(12, 6))
# 绘制训练损失
plt.plot(history['train_loss'], label='训练损失')
# 绘制验证损失
if 'val_loss' in history:
plt.plot(history['val_loss'], label='验证损失')
plt.title(f'{model_name} 训练过程', fontsize=14)
plt.xlabel('训练轮次', fontsize=12)
plt.ylabel('损失 (MSE)', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig(f'{model_name}_training_history.png', dpi=300)
plt.show()
# 6. 主函数
# --------------------------------------------------
def main():
# 加载数据
X, y, timestamps = load_and_preprocess_data()
# 定义预测配置
ULTRA_SHORT_CONFIG = {
'name': '超短期',
'look_back': 24, # 6小时历史 (24*15min)
'forecast_steps': 16, # 4小时预测 (16*15min)
'batch_size': 64,
'hidden_size': 128,
'num_layers': 2,
'epochs': 100,
'lr': 0.001
}
SHORT_TERM_CONFIG = {
'name': '短期',
'look_back': 96, # 24小时历史 (96*15min)
'forecast_steps': 288, # 72小时预测 (288*15min)
'batch_size': 32,
'hidden_size': 256,
'num_layers': 3,
'epochs': 150,
'lr': 0.0005
}
# 准备超短期预测数据集
print("\n准备超短期预测数据集...")
# 数据标准化
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X)
y_scaled = y_scaler.fit_transform(y.values.reshape(-1, 1)).flatten()
# 创建数据集
dataset = WindPowerDataset(X_scaled, y_scaled,
ULTRA_SHORT_CONFIG['look_back'],
ULTRA_SHORT_CONFIG['forecast_steps'])
# 划分数据集
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
dataset, [train_size, val_size, test_size]
)
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=ULTRA_SHORT_CONFIG['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=ULTRA_SHORT_CONFIG['batch_size'])
test_loader = DataLoader(test_dataset, batch_size=ULTRA_SHORT_CONFIG['batch_size'])
# 创建模型
model_ultra = WindPowerLSTM(
input_size=X.shape[1],
hidden_size=ULTRA_SHORT_CONFIG['hidden_size'],
num_layers=ULTRA_SHORT_CONFIG['num_layers'],
output_steps=ULTRA_SHORT_CONFIG['forecast_steps']
).to(device)
# 损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model_ultra.parameters(), lr=ULTRA_SHORT_CONFIG['lr'])
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)
# 训练模型
history_ultra = train_model(
model_ultra, train_loader, val_loader,
criterion, optimizer, scheduler,
ULTRA_SHORT_CONFIG['epochs'],
'ultra_short'
)
# 评估模型
actuals_ultra, preds_ultra, mae_ultra, rmse_ultra = evaluate_model(
model_ultra, test_loader, y_scaler, '超短期'
)
# 可视化结果
plot_training_history(history_ultra, '超短期模型')
# 获取对应的时间戳
ultra_timestamps = timestamps[ULTRA_SHORT_CONFIG['look_back']:][:len(actuals_ultra)]
results_ultra = plot_predictions(
actuals_ultra, preds_ultra, ultra_timestamps,
'超短期', ULTRA_SHORT_CONFIG['forecast_steps'], mae_ultra
)
# 准备短期预测数据集
print("\n准备短期预测数据集...")
# 创建数据集
dataset_short = WindPowerDataset(X_scaled, y_scaled,
SHORT_TERM_CONFIG['look_back'],
SHORT_TERM_CONFIG['forecast_steps'])
# 划分数据集
train_size_short = int(0.8 * len(dataset_short))
val_size_short = int(0.1 * len(dataset_short))
test_size_short = len(dataset_short) - train_size_short - val_size_short
train_dataset_short, val_dataset_short, test_dataset_short = torch.utils.data.random_split(
dataset_short, [train_size_short, val_size_short, test_size_short]
)
# 创建数据加载器
train_loader_short = DataLoader(train_dataset_short, batch_size=SHORT_TERM_CONFIG['batch_size'], shuffle=True)
val_loader_short = DataLoader(val_dataset_short, batch_size=SHORT_TERM_CONFIG['batch_size'])
test_loader_short = DataLoader(test_dataset_short, batch_size=SHORT_TERM_CONFIG['batch_size'])
# 创建模型
model_short = WindPowerLSTM(
input_size=X.shape[1],
hidden_size=SHORT_TERM_CONFIG['hidden_size'],
num_layers=SHORT_TERM_CONFIG['num_layers'],
output_steps=SHORT_TERM_CONFIG['forecast_steps']
).to(device)
# 损失函数和优化器
optimizer_short = optim.Adam(model_short.parameters(), lr=SHORT_TERM_CONFIG['lr'])
scheduler_short = ReduceLROnPlateau(optimizer_short, mode='min', factor=0.5, patience=10, verbose=True)
# 训练模型
history_short = train_model(
model_short, train_loader_short, val_loader_short,
criterion, optimizer_short, scheduler_short,
SHORT_TERM_CONFIG['epochs'],
'short_term'
)
# 评估模型
actuals_short, preds_short, mae_short, rmse_short = evaluate_model(
model_short, test_loader_short, y_scaler, '短期'
)
# 可视化结果
plot_training_history(history_short, '短期模型')
# 获取对应的时间戳
short_timestamps = timestamps[SHORT_TERM_CONFIG['look_back']:][:len(actuals_short)]
results_short = plot_predictions(
actuals_short, preds_short, short_timestamps,
'短期', SHORT_TERM_CONFIG['forecast_steps'], mae_short
)
# 最终报告
print("\n" + "="*50)
print("风功率预测模型训练完成!")
print("="*50)
print(f"超短期模型 (4小时预测):")
print(f" - 回溯步长: {ULTRA_SHORT_CONFIG['look_back']} (6小时)")
print(f" - 预测步长: {ULTRA_SHORT_CONFIG['forecast_steps']} (4小时)")
print(f" - 测试集MAE: {mae_ultra:.2f} kW")
print(f" - 测试集RMSE: {rmse_ultra:.2f} kW")
print(f"\n短期模型 (72小时预测):")
print(f" - 回溯步长: {SHORT_TERM_CONFIG['look_back']} (24小时)")
print(f" - 预测步长: {SHORT_TERM_CONFIG['forecast_steps']} (72小时)")
print(f" - 测试集MAE: {mae_short:.2f} kW")
print(f" - 测试集RMSE: {rmse_short:.2f} kW")
print("="*50)
# 保存预测结果
results_df = pd.DataFrame({
'timestamp': short_timestamps,
'实际功率': actuals_short,
'超短期预测': results_ultra['predicted'].values[:len(actuals_short)],
'短期预测': preds_short
})
results_df.to_csv('风功率预测结果.csv', index=False)
print("预测结果已保存到 '风功率预测结果.csv'")
if __name__ == "__main__":
main() 请在上述的代码基础上修改,本次修改要求只使用"阿拉山口风电场_EC_data"和"阿拉山口风电场风机数据"这两个数据集进行训练和预测。
最新发布