import os import numpy as np import pandas as pd import torch import torch.nn as nn from torch.utils.data import DataLoader, Dataset from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score import matplotlib.pyplot as plt class BearingDataset(Dataset): def __init__(self, data_folder, file_limit=None): self.files = sorted([os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.csv')]) if file_limit is not None: self.files = self.files[:file_limit] self.data = [] self.labels = [] for file in self.files: df = pd.read_csv(file, header=None) data = df.iloc[:, 4].values # 提取振动信号列 # 对每个文件的数据进行归一化 data = (data - np.min(data)) / (np.max(data) - np.min(data) + 1e-8) # 避免除以零 label = df.iloc[0, -1] # 假设每个文件的最后一列是单个标签值 self.data.append(data) self.labels.append(label) self.data = np.array(self.data) self.labels = np.array(self.labels) def __len__(self): return len(self.data) def __getitem__(self, idx): data = torch.tensor(self.data[idx], dtype=torch.float32).unsqueeze(-1) # 增加一个维度 label = torch.tensor(self.labels[idx], dtype=torch.float32) return data, label # # RNN模型用于RUL预测 # class RNNRULPredictor(nn.Module): # def __init__(self, input_dim, hidden_dim, output_dim, num_layers=4): # super(RNNRULPredictor, self).__init__() # # 使用一个LSTM模型作为特征提取器 # self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True) # # 输出一个 RUL 值 # self.fc = nn.Linear(hidden_dim, output_dim) # # def forward(self, x): # # LSTM 返回的输出包括 (output, (h_n, c_n)),我们只使用 output # lstm_out, (h_n, c_n) = self.lstm(x) # # 只取最后一个时间步的隐藏状态作为特征输入到全连接层 # final_hidden_state = h_n[-1] # 获取最后一层 LSTM 的最后一个隐藏状态 # output = self.fc(final_hidden_state) # 用全连接层生成 RUL 预测值 # return output class RNNRULPredictor(nn.Module): def __init__(self, input_dim, hidden_dim1, hidden_dim2, hidden_dim3, hidden_dim4, output_dim): super(RNNRULPredictor, self).__init__() # 手动堆叠 4 个 LSTM 层,每一层的输入输出维度不同 self.lstm1 = nn.LSTM(input_dim, hidden_dim1, batch_first=True) self.lstm2 = nn.LSTM(hidden_dim1, hidden_dim2, batch_first=True) self.lstm3 = nn.LSTM(hidden_dim2, hidden_dim3, batch_first=True) self.lstm4 = nn.LSTM(hidden_dim3, hidden_dim4, batch_first=True) # 输出一个 RUL 值 self.fc = nn.Linear(hidden_dim4, output_dim) def forward(self, x): # 依次通过每个 LSTM 层 x, _ = self.lstm1(x) x, _ = self.lstm2(x) x, _ = self.lstm3(x) x, _ = self.lstm4(x) # 只取最后一个时间步的输出作为特征输入到全连接层 x = x[:, -1, :] # 取最后一个时间步的输出 output = self.fc(x) # 用全连接层生成 RUL 预测值 return output # 损失函数:MSE用于回归任务 def loss_function(pred_rul, true_rul): return nn.MSELoss()(pred_rul, true_rul) # 数据文件夹路径 train_data_folder = r"E:\pycharm code\1-1label" test_data_folder = r"E:\pycharm code\1-6label" # 训练集,使用前800个文件 train_dataset = BearingDataset(train_data_folder, file_limit=None) train_data_loader = DataLoader(train_dataset, batch_size=16, shuffle=False) # 模型参数 input_dim = 1 # 每个时间步只有一个特征(振动信号) hidden_dim1 = 256 # 第1层隐藏层维度 hidden_dim2 = 128 # 第2层隐藏层维度 hidden_dim3 = 64 # 第3层隐藏层维度 hidden_dim4 = 32 # 第4层隐藏层维度 output_dim = 1 # 预测RUL是一个标量 lr = 0.0001 # 学习率 epochs = 2 # 增加训练轮数 # 初始化模型和优化器 model = RNNRULPredictor(input_dim, hidden_dim1,hidden_dim2,hidden_dim3,hidden_dim4 , output_dim).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # 训练过程 for epoch in range(epochs): model.train() train_loss = 0 for data, label in train_data_loader: data = data.cuda() label = label.cuda() optimizer.zero_grad() pred_rul = model(data) # RUL预测 loss = loss_function(pred_rul, label) # 计算损失 loss.backward() train_loss += loss.item() optimizer.step() print(f'Epoch {epoch + 1}, Loss: {train_loss / len(train_data_loader)}') # 测试集推断 test_dataset = BearingDataset(test_data_folder) test_data_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) actual_values = [] predicted_values = [] file_indices = [] # 用于记录每个文件的序号 model.eval() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # 确保模型在正确的设备上 with torch.no_grad(): for file_idx, (data, label) in enumerate(test_data_loader): data = data.to(device) label = label.to(device) # 只对每个文件的数据进行一次推理 pred_rul = model(data) # 确保将 label 和 pred_rul 转为标量 actual_values.append(label.cpu().numpy()[0]) # 提取第一个元素,避免用 .item() predicted_values.append(pred_rul.cpu().numpy().flatten()[0]) # 预测值 file_indices.append(file_idx + 1) # 文件序号从1开始 # 将数据转换为 NumPy 数组 actual_values = np.array(actual_values) # 真实值,1D 数组 predicted_values = np.array(predicted_values) # 预测值,1D 数组 # 将数据转换为 NumPy 数组 actual_values = np.array(actual_values) # 真实值,1D 数组 predicted_values = np.array(predicted_values) # 预测值,1D 数组 # 将数据转换为 NumPy 数组 actual_values = np.array(actual_values).flatten() # 真实值,1D 数组 predicted_values = np.array(predicted_values).flatten() # 预测值,1D 数组 file_indices = np.array(file_indices) # 文件序号,1D 数组 # 计算 MSE, MAE 和 R2 mse = mean_squared_error(actual_values, predicted_values) mae = mean_absolute_error(actual_values, predicted_values) r2 = r2_score(actual_values, predicted_values) # 打印评价指标 print(f'Mean Squared Error (MSE): {mse:.4f}') print(f'Mean Absolute Error (MAE): {mae:.4f}') print(f'R-squared (R²): {r2:.4f}') # 绘制RUL曲线 plt.figure(figsize=(12, 6)) plt.plot(file_indices, actual_values, label='True RUL', color='green', linestyle='-.') # 真实RUL plt.plot(file_indices, predicted_values, label='Predicted RUL (RNN)', color='blue') # 预测RUL plt.xlabel('Time (10s)') plt.ylabel('Scaled RUL') plt.legend() plt.savefig('./my/{} RUL Prediction with LSTM.png'.format(round(mse, 3)))