# 数据加载与预处理
def dataset(file_name):
mat_data = loadmat(file_name)
condition = [
'defaulttop', 'tem958top', 'tem998top', 'onecto5top', 'onecto6top', 'onecto7top',
'twocto7top', 'twocto8top', 'twocto9top', 'cap80top', 'cap120top'
]
# 创建数据列表:合并所有工况数据
inputs = []
outputs = []
for cond in condition:
data = mat_data[cond]
in_data = data[:, 0:6] #1-6列作为输入
out_data = data[:, 6:7] #第7列作为输出
inputs.append(in_data)
outputs.append(out_data)
# 将数据列表堆叠为数组
insim = np.vstack(inputs)
outsim = np.vstack(outputs)
return insim, outsim
# MLP模型
class MLP(nn.Module):
def __init__(self, in_size, out_size):
super(MLP, self).__init__()
self.network = nn.Sequential(
nn.Linear(in_size, 256),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Dropout(0.25),
nn.Linear(256, 128),
nn.BatchNorm1d(128),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(128, 64),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.Linear(64, out_size)
)
def forward(self, x):
return self.network(x)
# 训练模型
def train_MLP(model, in_train, in_val, out_train, out_val, epochs=500, lr=0.001, batch=128):
# 数据标准化
in_scaler = StandardScaler()
out_scaler = StandardScaler()
in_train_scaled = in_scaler.fit_transform(in_train)
out_train_scaled = out_scaler.fit_transform(out_train)
in_val_scaled = in_scaler.transform(in_val)
out_val_scaled = out_scaler.transform(out_val)
# 保存标准化器
joblib.dump(in_scaler, os.path.join("top scaler", "in_scaler.pkl"))
joblib.dump(out_scaler, os.path.join("top scaler", "out_scaler.pkl"))
# 转为PyTorch张量
train_data = TensorDataset(torch.FloatTensor(in_train_scaled), torch.FloatTensor(out_train_scaled))
val_data = TensorDataset(torch.FloatTensor(in_val_scaled), torch.FloatTensor(out_val_scaled))
# 创建数据加载器
train_loader = DataLoader(train_data, batch_size=batch, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch, shuffle=False)
# 使用Adam优化器 = 训练算法
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
# 使用MSE损失函数
criterion = nn.MSELoss()
# 学习率调整
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='min',
factor=0.5,
patience=5
)
# 训练模型
best_loss = float('inf')
counter = 0 # 记录耐心
train_losses = []
val_losses = []
for epoch in range(epochs):
# 训练
model.train() # 训练模式
train_loss = 0.0 # 记录当前迭代的损失
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets) # 记录当前批次的损失
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
train_loss += loss.item() * inputs.size(0)
train_loss /= len(train_loader.dataset)
train_losses.append(train_loss)
# 验证
model.eval() # 验证模式
val_loss = 0.0
with torch.no_grad(): # 不记录梯度
for inputs, targets in val_loader:
outputs = model(inputs)
loss = criterion(outputs, targets)
val_loss += loss.item() * inputs.size(0)
val_loss /= len(val_loader.dataset)
val_losses.append(val_loss)
# 更新学习率
scheduler.step(val_loss)
# 打印训练信息
if (epoch + 1) % 10 == 0:
print(f'Epoch {epoch + 1}/{epochs} | Train Loss: {train_loss:.6f} | '
f'Val Loss: {val_loss:.6f}')
# 早停机制
if val_loss < best_loss:
best_loss = val_loss
counter = 0
torch.save(model.state_dict(),os.path.join(f"top_model.pth"))
else:
counter += 1
if counter >=6:
print(f'{6}轮无改善,在 epoch {epoch+1} 提前停止')
break
# 训练结束,保存最佳模型
model.load_state_dict(torch.load(os.path.join(f"top_model.pth")))
# 评估模型泛化能力
model.eval()
with torch.no_grad():
val_inputs = torch.FloatTensor(in_val_scaled)
val_pre = model(val_inputs).detach().numpy()
# 反标准化
val_pre = out_scaler.inverse_transform(val_pre)
val_target = out_val
# 性能评价
if not isinstance(val_pre, torch.Tensor):
val_pre = torch.tensor(val_pre, dtype=torch.float32)
if not isinstance(val_target, torch.Tensor):
val_target = torch.tensor(val_target, dtype=torch.float32)
mse = criterion(val_pre, val_target)
print(f"MSE:{mse:.6f}")
r2 = r2_score(val_target, val_pre)
print(f"R2:{r2:.6f}")
return best_loss, train_loss, val_loss
# 4.主函数调用
if __name__ == "__main__":
file_name = "rentopdata191212.mat"
insim, outsim = dataset(file_name)
# 交叉验证:80%训练集、20%验证集
in_train, in_val, out_train, out_val = train_test_split(
insim, outsim, test_size=0.2, random_state=2
)
# 创建模型
model = MLP(in_size=6, out_size=1)
# 训练模型
best_loss, epoch_train_loss, epoch_val_loss = train_MLP(
model,
in_train, in_val,
out_train, out_val,
epochs=800,
lr=0.001
)
print(f"训练完成!最佳验证损失:{best_loss:.6f}")这是我的训练代码,一共有6729条数据,有什么问题吗