import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from joblib import dump, load
from time import time
from mealpy.swarm_based import PSO
from mealpy.evolutionary_based import GA
from sko.SA import SA as SKO_SA
from mealpy.swarm_based import ACOR
from mealpy.swarm_based import WOA
from mealpy.swarm_based import GWO
# ==================== 1. 设备设置与随机种子 ====================
torch.manual_seed(100)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ==================== 2. 数据加载与预处理 ====================
data = pd.read_csv('D:/PycharmProjects/PythonProject3/liaotou4.csv')
feature_columns = data.columns[1:-2]
target_columns = data.columns[-2:] # 可多目标
X_all, y_all = data[feature_columns].values, data[target_columns].values
# 特征归一化
scaler_x = StandardScaler()
X_all = scaler_x.fit_transform(X_all)
dump(scaler_x, 'scaler_x')
# 目标归一化
scaler_y = StandardScaler()
y_all = scaler_y.fit_transform(y_all)
dump(scaler_y, 'scaler_y')
# 构建序列数据
seq_len = 60
X_seq, y_seq = [], []
for i in range(len(X_all) - seq_len):
X_seq.append(X_all[i:i+seq_len])
y_seq.append(y_all[i+seq_len])
X_seq = torch.tensor(np.array(X_seq), dtype=torch.float32)
y_seq = torch.tensor(np.array(y_seq), dtype=torch.float32)
# 数据集划分
# 数据集按时序划分
train_size = int(0.7 * len(X_seq))
train_x, test_x = X_seq[:train_size], X_seq[train_size:]
train_y, test_y = y_seq[:train_size], y_seq[train_size:]
batch_size = 64
train_loader = Data.DataLoader(Data.TensorDataset(train_x, train_y), batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = Data.DataLoader(Data.TensorDataset(test_x, test_y), batch_size=batch_size, drop_last=True)
# ==================== 3. 定义模型 ====================
from torch.nn import TransformerEncoder, TransformerEncoderLayer
class TransformerBiLSTM(nn.Module):
def __init__(self, input_dim, hidden_layer_sizes, hidden_dim, num_layers, num_heads, output_dim, dropout_rate=0.5):
super().__init__()
self.transformer = TransformerEncoder(
TransformerEncoderLayer(input_dim, num_heads, hidden_dim, dropout=dropout_rate, batch_first=True),
num_layers
)
self.num_layers = len(hidden_layer_sizes)
self.bilstm_layers = nn.ModuleList()
self.bilstm_layers.append(nn.LSTM(input_dim, hidden_layer_sizes[0], batch_first=True, bidirectional=True))
for i in range(1, self.num_layers):
self.bilstm_layers.append(nn.LSTM(hidden_layer_sizes[i-1]*2, hidden_layer_sizes[i], batch_first=True, bidirectional=True))
self.linear = nn.Linear(hidden_layer_sizes[-1]*2, output_dim)
def forward(self, input_seq):
transformer_output = self.transformer(input_seq)
bilstm_out = transformer_output
for bilstm in self.bilstm_layers:
bilstm_out, _ = bilstm(bilstm_out)
predict = self.linear(bilstm_out[:, -1, :])
return predict
# ==================== 4. VPPSO算法实现 ====================
def vppso(
func,
dim,
bounds,
N=12,
N1=6,
N2=6,
T=16,
a=0.3,
c1=2.0,
c2=2.0,
b=1.0,
verbose=True
):
X = np.random.uniform([b[0] for b in bounds], [b[1] for b in bounds], (N, dim))
V = np.zeros((N, dim))
Pbest = X.copy()
Pbest_f = np.array([func(x) for x in X])
gbest_idx = np.argmin(Pbest_f)
Gbest = Pbest[gbest_idx].copy()
Gbest_f = Pbest_f[gbest_idx]
best_curve = [Gbest_f]
for t in range(T):
alpha_t = np.exp(-b * (t / T) ** b)
for i in range(N):
if i < N1:
if np.random.rand() < a:
V[i] = V[i]
else:
r1, r2, r3 = np.random.rand(3)
V[i] = (V[i] * r1 * alpha_t
+ c1 * r2 * (Pbest[i] - X[i])
+ c2 * r3 * (Gbest - X[i]))
X[i] = X[i] + V[i]
else:
if np.random.rand() < 0.5:
X[i] = Gbest + alpha_t * np.random.rand(dim) * np.abs(Gbest * alpha_t)
else:
X[i] = Gbest - alpha_t * np.random.rand(dim) * np.abs(Gbest * alpha_t)
# 边界处理
for d in range(dim):
if X[i, d] < bounds[d][0]:
X[i, d] = bounds[d][0]
if X[i, d] > bounds[d][1]:
X[i, d] = bounds[d][1]
# 适应度
F = np.array([func(x) for x in X])
for i in range(N):
if i < N1:
if F[i] < Pbest_f[i]:
Pbest[i] = X[i].copy()
Pbest_f[i] = F[i]
if F[i] < Gbest_f:
Gbest = X[i].copy()
Gbest_f = F[i]
else:
if F[i] < Gbest_f:
Gbest = X[i].copy()
Gbest_f = F[i]
best_curve.append(Gbest_f)
if verbose and (t % 4 == 0 or t == T-1):
print(f"Iter {t+1}/{T}, Best fitness: {Gbest_f}")
return Gbest, Gbest_f, best_curve
# ==================== 5. 定义超参数搜索空间与适应度函数 ====================
param_bounds = [
(32, 128), # hidden_layer_sizes[0]
(32, 128), # hidden_layer_sizes[1]
(64, 256), # hidden_dim
(1, 4), # num_layers
(1, 4), # num_heads
(0.05, 0.5), # dropout_rate
(0.00005, 0.005) # learning rate
]
def eval_model_hyperparams(x):
h1 = int(round(x[0]))
h2 = int(round(x[1]))
hidden_dim = int(round(x[2]))
num_layers = int(round(x[3]))
num_heads = int(round(x[4]))
dropout = float(x[5])
lr = float(x[6])
try:
model = TransformerBiLSTM(
input_dim=X_seq.shape[2],
hidden_layer_sizes=[h1, h2],
hidden_dim=hidden_dim,
num_layers=num_layers,
num_heads=num_heads,
output_dim=y_seq.shape[1],
dropout_rate=dropout
).to(device)
optimizer = optim.Adam(model.parameters(), lr)
loss_function = nn.MSELoss(reduction='sum')
best_mse = 1000.
for epoch in range(4): # 搜索时只训练4个epoch
model.train()
for seq, labels in train_loader:
seq, labels = seq.to(device), labels.to(device)
optimizer.zero_grad()
y_pred = model(seq)
loss = loss_function(y_pred, labels)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
test_loss = 0.
test_size = len(test_loader.dataset)
for data, label in test_loader:
data, label = data.to(device), label.to(device)
pre = model(data)
test_loss += loss_function(pre, label).item()
test_av_mseloss = test_loss / test_size
if test_av_mseloss < best_mse:
best_mse = test_av_mseloss
return best_mse
except Exception as e:
print("Exception in eval:", e)
return 9999.
def run_sa():
bounds = np.array(param_bounds)
x0 = [(b[0] + b[1]) / 2 for b in param_bounds]
sa = SKO_SA(
func=lambda x: eval_model_hyperparams(np.clip(x, bounds[:, 0], bounds[:, 1])),
x0=x0, T_max=50, T_min=1, L=30, max_stay_counter=20
)
best_param, best_loss = sa.run()
return best_param, best_loss
from mealpy import Problem
problem = Problem(
fit_func=eval_model_hyperparams,
bounds=param_bounds,
minmax="min"
)
optimizer_dict = {
'GA': lambda: GA.BaseGA(problem, epoch=16, pop_size=12).solve().solution[:2],
'PSO': lambda: PSO.BasePSO(problem, epoch=16, pop_size=12).solve().solution[:2],
'ACO': lambda: ACOR.BaseACOR(problem, epoch=16, pop_size=12).solve().solution[:2],
'WOA': lambda: WOA.BaseWOA(problem, epoch=16, pop_size=12).solve().solution[:2],
'GWO': lambda: GWO.BaseGWO(problem, epoch=16, pop_size=12).solve().solution[:2],
'SA': run_sa,
'VPPSO': lambda: vppso(eval_model_hyperparams, len(param_bounds), param_bounds,
N=12, N1=6, N2=6, T=16, a=0.3, c1=2.0, c2=2.0, b=1.0, verbose=False)[:2]
}
final_results = {}
show_n = 100 # 展示前100个样本
alg_colors = {
'VPPSO': 'blue', 'GA': 'red', 'PSO': 'green', 'SA': 'purple',
'ACOR': 'orange', 'WOA': 'deepskyblue', 'GWO': 'brown'
}
for alg_name, alg_func in optimizer_dict.items():
print(f"\n------ 开始{alg_name}优化Transformer-BiLSTM超参数 ------")
best_param, best_loss = alg_func()
# 还原参数
h1 = int(round(best_param[0]))
h2 = int(round(best_param[1]))
hidden_dim = int(round(best_param[2]))
num_layers = int(round(best_param[3]))
num_heads = int(round(best_param[4]))
dropout = float(best_param[5])
lr = float(best_param[6])
print(f'{alg_name}最优超参数: {best_param}, 验证loss: {best_loss}')
# 训练模型
model = TransformerBiLSTM(
input_dim=X_seq.shape[2],
hidden_layer_sizes=[h1, h2],
hidden_dim=hidden_dim,
num_layers=num_layers,
num_heads=num_heads,
output_dim=y_seq.shape[1],
dropout_rate=dropout
).to(device)
optimizer = optim.Adam(model.parameters(), lr)
loss_function = nn.MSELoss(reduction='sum')
# 为加快总流程,只训练epochs=40,你可调整
def train_short(model, epochs):
train_size = len(train_loader.dataset)
test_size = len(test_loader.dataset)
minimum_mse = 1000.
best_model_wts = model.state_dict()
for epoch in range(epochs):
model.train()
for seq, labels in train_loader:
seq, labels = seq.to(device), labels.to(device)
optimizer.zero_grad()
y_pred = model(seq)
loss = loss_function(y_pred, labels)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
test_loss = 0.
for data, label in test_loader:
data, label = data.to(device), label.to(device)
pre = model(data)
test_loss += loss_function(pre, label).item()
test_av_mseloss = test_loss / test_size
if test_av_mseloss < minimum_mse:
minimum_mse = test_av_mseloss
best_model_wts = model.state_dict()
model.load_state_dict(best_model_wts)
train_short(model, epochs=40)
torch.save(model.state_dict(), f'best_model_{alg_name}.pt')
# 预测前100个
model.eval()
original_data = []
pre_data = []
with torch.no_grad():
for data, label in test_loader:
data, label = data.to(device), label.to(device)
y_pred = model(data)
original_data.append(label.cpu().numpy())
pre_data.append(y_pred.cpu().numpy())
original_data = np.concatenate(original_data, axis=0)
pre_data = np.concatenate(pre_data, axis=0)
scaler_y = load('scaler_y')
original_100 = scaler_y.inverse_transform(original_data)[:show_n, 0]
pre_100 = scaler_y.inverse_transform(pre_data)[:show_n, 0]
final_results[alg_name] = (original_100, pre_100)
# ======================= 结果可视化对比 ======================
plt.figure(figsize=(14, 7))
plt.plot(final_results['VPPSO'][0], color='gray', label='真实值', linewidth=2, linestyle='--')
for alg_name, (orig, pred) in final_results.items():
plt.plot(pred, color=alg_colors[alg_name], label=f'{alg_name}优化', alpha=0.85)
plt.xlabel('样本编号')
plt.ylabel('预测输出')
plt.title('不同智能优化算法下Transformer-BiLSTM预测对比(前100样本)')
plt.legend()
plt.tight_layout()
plt.show()
最新发布