import math
import os
import shutil
import struct
import socket
from itertools import chain
import torch.nn.init as init
import numpy as np
import pandas as pd
import torch
import random
import openpyxl
import torch.nn as nn
from torch.utils.data import DataLoader
from hyper_parameter import program_path, input_size, hidden_size, num_layers, output_size, batch_size, n_ahead, \
learning_rate, a, a1, train_window, name_tr, epochs, patience, shared_path, path_tr, min_error0, min_error1, \
min_error2
import threading
"""
设置随机数种子
"""
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
global n_ahead
n_ahead = n_ahead # 从超参数导入预测步长
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"当前使用设备: {device}")
"""
获取共享训练文件
"""
def copyfile(root, filename, dst):
for root, dirs, files in os.walk(root):
for file in files:
if filename == file:
print('target found')
src = os.path.join(root, file)
print('src:', src)
shutil.copy(src, dst)
print('copy that')
return True
return False
# 获取共享训练数据文件
copyfile(root=shared_path, filename=name_tr, dst=path_tr)
"""
定义数据处理函数
"""
def nn_seq(name, B, train_window, n_ahead, data_name):
path = program_path
file = os.path.join(path, name)
mon_data = pd.read_excel(file, sheet_name=0, usecols=None)
all_data = mon_data[data_name].values.astype(float) # 将数据转为float类型
# 新增:滑动平均平滑处理(核心去噪步骤)
window_size = 5 # 滑动窗口大小(可根据数据波动调整,建议5-10)
if window_size > 1:
# 计算滑动平均:用窗口内的均值代替中心值,边缘用原数据填充
# np.convolve实现滑动平均,mode='same'保证输出长度与输入一致
all_data = np.convolve(
all_data,
np.ones(window_size) / window_size, # 窗口内权重均等
mode='same'
)
# 归一化(原有逻辑不变)
max_value = np.max(all_data)
min_value = np.min(all_data)
scalar = max_value - min_value
if scalar == 0:
all_data_normalized = list(map(lambda x: x, all_data))
else:
all_data_normalized = list(map(lambda x: (x - min_value) / scalar, all_data))
# 转换为tensor并生成序列(原有逻辑不变)
all_data_normalized = torch.FloatTensor(all_data_normalized).view(-1)
seq = []
l_data = len(all_data_normalized)
for i in range(0, l_data - train_window - n_ahead, 10):
data_seq = all_data_normalized[i:i + train_window]
data_label = all_data_normalized[i + train_window:i + train_window + n_ahead]
data_seq = torch.FloatTensor(data_seq).view(-1, 1)
data_label = torch.FloatTensor(data_label).view(-1)
seq.append((data_seq, data_label))
# 处理训练数据(原有逻辑不变)
train_data = seq
train_len = int(len(train_data) / B) * B
train_data = train_data[:train_len]
train = MyDataset(train_data)
train_data = DataLoader(dataset=train, batch_size=B, shuffle=False, num_workers=0)
return train_data, max_value, min_value
class MyDataset(torch.utils.data.Dataset):
def __init__(self, data):
self.data = data
def __getitem__(self, item):
return self.data[item]
def __len__(self):
return len(self.data)
"""
构造LSTM模型
"""
class CNN_LSTM_attention(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size,train_window, dim=1):
super(CNN_LSTM_attention, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.output_size = output_size
self.bidirectional = False # 明确为单向LSTM
self.lstm_output_dim = hidden_size # 单向LSTM输出维度=hidden_size(无需×2)
# 新增:SE注意力后添加通道维度的LayerNorm(归一化通道维度,参数为hidden_size)
self.se_norm = nn.LayerNorm(hidden_size) # 输入维度=CNN输出通道数(hidden_size)
# 1. CNN模块:提取局部时序特征
self.cnn_block = nn.Sequential(
nn.Conv1d(in_channels=dim,
out_channels=hidden_size,
kernel_size=7,
padding=1), # 保持序列长度
nn.Sigmoid(), # 激活函数
nn.MaxPool1d(kernel_size=2, stride=1), # 池化后长度减1
nn.Dropout(0.3) # 防过拟合
)
# 2. SE注意力模块:通道注意力(关注重要特征通道)
conv_out_len = (train_window + 2 * 1 - 7) // 1 + 1 # 输入长度=train_window,padding=1,kernel_size=7,stride=1
# 池化层输出长度公式:(输入长度 - kernel_size) // stride + 1
self.pooled_seq_len = (conv_out_len - 2) // 1 + 1 # 池化kernel_size=2,stride=1
self.se_attention = nn.Sequential(
nn.Linear(self.pooled_seq_len, self.pooled_seq_len // 8),
nn.BatchNorm1d(self.pooled_seq_len // 8), # 新增:压缩后批量归一化
nn.ReLU(),
nn.Linear(self.pooled_seq_len // 8, self.pooled_seq_len),
nn.BatchNorm1d(self.pooled_seq_len), # 新增:恢复后批量归一化
nn.Softmax(dim=-1)
)
# LSTM部分(单向)
self.lstm = nn.LSTM(
input_size=hidden_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
bidirectional=self.bidirectional # 显式关闭双向
)
# 残差连接(单向LSTM:输入输出维度均为hidden_size,无需×2)
self.residual_proj = nn.Linear(hidden_size, self.lstm_output_dim)
# 输出层self.reg(适配单向LSTM,维度=hidden_size)
self.reg = nn.Sequential(
nn.BatchNorm1d(self.lstm_output_dim), # 输入维度=单向LSTM输出维度
nn.Linear(self.lstm_output_dim, self.lstm_output_dim), # 特征转换(维度一致)
nn.LeakyReLU(0.1), # 非线性激活
nn.Linear(self.lstm_output_dim, output_size) # 映射到最终输出维度
)
self.reg_act = nn.Tanh()
self.dropout = nn.Dropout(0.3) # 全局Dropout
self.post_se_dropout = nn.Dropout(0.3) # SE后的Dropout
# 新增:分类头(cls)
# self.cls = nn.Sequential(
# nn.BatchNorm1d(self.lstm_output_dim), # 与reg共享归一化逻辑
# nn.Linear(self.lstm_output_dim, self.lstm_output_dim // 2), # 特征降维
# nn.ReLU(),
# nn.Dropout(0.1), # 防过拟合
# nn.Linear(self.lstm_output_dim // 2, num_classes) # 输出类别数
# )
# 权重初始化
self._init_weights()
def _init_weights(self):
for name, param in self.lstm.named_parameters():
if 'weight' in name:
init.xavier_normal_(param)
elif 'bias' in name:
# 遗忘门偏置初始化为 1
n = param.size(0)
init.constant_(param, 0)
param.data[n // 4: n // 2] = 1 # LSTM 偏置分 4 部分,第 2 部分是遗忘门
# 回归头初始化
for m in self.reg.modules():
if isinstance(m, nn.Linear):
init.xavier_normal_(m.weight)
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm1d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
# # 分类头初始化
# for m in self.cls_head.modules():
# if isinstance(m, nn.Linear):
# init.xavier_normal_(m.weight)
# if m.bias is not None:
# init.constant_(m.bias, 0)
# elif isinstance(m, nn.BatchNorm1d):
# init.constant_(m.weight, 1)
# init.constant_(m.bias, 0)
def forward(self, input_seq, n_ahead):
# 1. CNN+SE注意力(原有逻辑不变)
x = input_seq.transpose(-1, -2) # (batch, dim=1, train_window)
x = self.cnn_block(x) # (batch, hidden_size, pooled_seq_len)
max_ = x.max(dim=1)[0] # (batch, pooled_seq_len)
se_attn = self.se_attention(max_) # (batch, pooled_seq_len)
x = torch.einsum("bnd,bd->bnd", x, se_attn) # 时间步权重加权
# 新增:通道维度归一化(将x从(batch, hidden_size, pooled_seq_len)转为(batch, pooled_seq_len, hidden_size)后归一化)
x = x.transpose(1, 2) # 交换通道和时间步维度 → (batch, pooled_seq_len, hidden_size)
x = self.se_norm(x) # 对通道维度(hidden_size)做归一化,平衡数值尺度
x = x.transpose(1, 2) # 换回原维度 → (batch, hidden_size, pooled_seq_len)
x = self.post_se_dropout(x)
x = x.transpose(-1, -2) # (batch, pooled_seq_len, hidden_size) → LSTM输入
# 2. LSTM+残差连接(原有逻辑不变)
batch_size = x.size(0)
seq_len = x.shape[1]
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
lstm_output, _ = self.lstm(x, (h0, c0)) # (batch, seq_len, lstm_output_dim)
residual = self.residual_proj(x) # (batch, seq_len, lstm_output_dim)
residual_output = lstm_output + residual # (batch, seq_len, lstm_output_dim)
# 3. 回归头(reg)计算(原有逻辑不变)
reg_feat = residual_output.contiguous().view(-1, self.lstm_output_dim)
reg_feat = self.dropout(reg_feat) # 复用dropout
reg_pred = self.reg(reg_feat) # 输出:(batch×seq_len, output_size)
pred = self.reg_act(reg_pred) # 激活
# 恢复时序维度:(batch, seq_len, output_size) → 取最后n_ahead步
pred = pred.view(batch_size, seq_len, self.output_size)[:, -n_ahead:, :]
return pred
"""
训练函数
"""
def train(k, model, optimizer, scheduler, loss_function):
motion_name = a[k]
try:
train_data, max_value, min_value = nn_seq(
name=name_tr,
B=batch_size,
train_window=train_window,
n_ahead=n_ahead,
data_name=motion_name,
)
print('training motion', k, motion_name)
except Exception as e:
print(f"数据加载错误: {str(e)}")
return
save_file = 'save_' + str(a1[k]) + '_model.pt'
counters = [0, 0, 0]
best_losses = [100, 100, 100]
min_errors = [min_error0, min_error1, min_error2]
switch_epoch = 300
for i in range(epochs):
model.train()#切换训练模式
cnt = 0
tra_loss = 0.0
# 训练过程
for (seq, label) in train_data:
cnt += 1
seq = seq.to(device)
label = label.to(device)
# 关键修复:调用模型时传入n_ahead参数
y_pred = model(seq, n_ahead).squeeze()
loss = loss_function(y_pred, label)
if torch.isnan(loss):
print(f"!!!在epoch {i + 1},第 {cnt} 个batch中检测到loss为nan,终止训练!!!")
torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict()},
f"nan_error_epoch_{i + 1}_motion_{k}.pt")
return
optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0)
optimizer.step()
if cnt % 20 == 0:
print(f'epoch {i + 1} 当前是自由度{k}的训练结果:', cnt - 20, '~', cnt, loss.item())
tra_loss += loss.item() * seq.size(0)
# 评估过程
model.eval()
pre = []
y = []
with torch.no_grad():
for (seq, target) in train_data:#seq是模型输入数据,target是真实值
target = list(chain.from_iterable(target.data.tolist()))#转为一维列表
y.extend(target)
seq = seq.to(device)
y_pre = model(seq, n_ahead).squeeze() # 同样需要传入n_ahead
y_pre = list(chain.from_iterable(y_pre.data.tolist()))
pre.extend(y_pre)
y, pre = np.array(y), np.array(pre)
y = y * (max_value - min_value) + min_value # 真实值反归一化
pre = pre * (max_value - min_value) + min_value # 预测值反归一化
#loss的计算
raw_mse = np.mean((pre - y) **2)
# 3. 再计算R²(此时用的是原始尺度,结果更真实)
y_flat = y.flatten()
pre_flat = pre.flatten()
# 注意:corrcoef计算的是相关系数,不是严格的R²,需用正确公式计算
# 正确R²公式:1 - (残差平方和 / 总平方和)
ss_res = np.sum((y_flat - pre_flat) ** 2) # 残差平方和
ss_tot = np.sum((y_flat - np.mean(y_flat)) ** 2) # 总平方和(均值误差)
R2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0 # 避免除以0
rmse_mean = np.sqrt(np.mean((y - pre) ** 2))#使用反归一化的rmse
if i < switch_epoch:
# 前switch_epoch轮:不更新学习率(固定初始值)
current_lr = learning_rate # 强制保持初始学习率
else:
# switch_epoch轮后:启用调度器,动态调整学习率
scheduler.step(R2)
current_lr = optimizer.param_groups[0]['lr']
# 保留学习率最小值限制(原有逻辑)
if current_lr < 0.0001:
current_lr = 0.0001
optimizer.param_groups[0]['lr'] = current_lr
# 早停逻辑
if rmse_mean <= best_losses[k]:
best_losses[k] = rmse_mean
if rmse_mean <= min_errors[k]:
counters[k] += 1
state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
torch.save(state, save_file)
else:
counters[k] = 0
elif rmse_mean <= min_errors[k]:
counters[k] += 1
else:
counters[k] = 0
# 达到最大epoch或早停条件
if i == epochs - 1:
print(f"✅ 已达到最大训练轮次 {epochs},强制保存最优模型")
state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
torch.save(state, f"motion_{a[k]}_{save_file}_final.pt")
break
elif counters[k] >= patience:
state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict()}
torch.save(state, save_file)
print(f"motion{k} Early stopping! (patience={patience})")
break
print(f'epoch: {i + 1}, training motion {k} {motion_name}, '
f"Loss: {raw_mse:.4f}, RMSE: {rmse_mean:.4f}, R2: {R2:.4f}")
# UDP发送数据
try:
all_data = np.array([tra_loss, rmse_mean, R2], dtype=float)
pre_data_bytes = struct.pack('ddd', *all_data)
udpSerSock.sendto(pre_data_bytes, predict_addr)
except Exception as e:
print(f"UDP发送错误: {str(e)}")
"""
主程序
"""
if __name__ == "__main__":
udpSerSock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
predict_addr = ('192.168.112.1', 25001)
n_ahead = 80
torch.set_num_threads(8)
loss_function = nn.MSELoss().to(device)
def thread_task(k):
# 每个线程独立初始化模型
model = CNN_LSTM_attention(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
output_size=output_size,
train_window=train_window
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode='max',
factor=0.90,
patience=80,
threshold=0.0001
)
# 确保Excel写入正常
try:
train(k, model=model,optimizer=optimizer, scheduler=scheduler,loss_function=loss_function)
except Exception as e:
print(f"线程{k}错误: {str(e)}")
print(f"线程完成:运动类型k={k}")
# 创建并启动线程
threads = []
for k in range(3):
t = threading.Thread(target=thread_task, args=(k,))
threads.append(t)
t.start()
# 等待所有线程完成
for t in threads:
t.join()
udpSerSock.close()
print(f"所有训练完成")
这个模型的问题在哪,为什么训练很不稳定
最新发布