仅记录学习过程,忽略效果不好
import keras import optuna import numpy as np import tensorflow as tf from keras.src import layers from sklearn.metrics import accuracy_score, f1_score from sklearn.model_selection import train_test_split from basic_func import get_data # ========== 2. Attention模块 ========== class AttentionLayer(keras.layers.Layer): def __init__(self, dim, heads, dim_head, dropout=0.0): super().__init__() self.heads = heads self.scale = dim_head ** -0.5 self.qkv = layers.Dense(dim * 3, use_bias=False) self.attend = layers.Softmax(axis=-1) self.to_out = keras.Sequential([ layers.Dense(dim), layers.Dropout(dropout) ]) def call(self, x): b, n, d = tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2] qkv = tf.split(self.qkv(x), num_or_size_splits=3, axis=-1) q, k, v = [tf.reshape(t, [b, n, self.heads, d // self.heads]) for t in qkv] q = tf.transpose(q, [0, 2, 1, 3]) k = tf.transpose(k, [0, 2, 1, 3]) v = tf.transpose(v, [0, 2, 1, 3]) attn = self.attend(tf.matmul(q, k, transpose_b=True) * self.scale) out = tf.matmul(attn, v) out = tf.transpose(out, [0, 2, 1, 3]) out = tf.reshape(out, [b, n, d]) return self.to_out(out) # ========== 3. CNN + Transformer 模型 ========== class CNNTransformer(keras.Model): def __init__(self, num_patches, dim, depth, heads, cnn_dim, dim_head,num_filters, kernel_size, input_dim, num_classes, dropout=0.0): super().__init__() self.num_patches = num_patches self.dim = dim self.heads = heads self.dim_head = dim_head self.dropout_rate = dropout # **时间片嵌入层** self.to_patch_embedding = keras.Sequential([ layers.Reshape((num_patches, input_dim // num_patches)), # 切分时间序列 layers.Dense(dim) # 线性投影到 dim ]) # **CLS token 和 位置编码** self.cls_token = self.add_weight(shape=(1, 1, dim), initializer='random_normal', trainable=True) self.pos_embedding = self.add_weight(shape=(1, num_patches + 1, dim), initializer='random_normal', trainable=True) self.dropout = layers.Dropout(dropout) # **Transformer 层** self.attention_layers = [ [self.Attention(dim, heads, dim_head, dropout), self.CNNFeedForward(dim, cnn_dim, kernel_size, dropout)] for _ in range(depth) ] # **CNN 分类头** self.cnn_head = keras.Sequential([ layers.Conv1D(filters=num_filters, kernel_size=kernel_size, activation='relu', padding='same'), layers.Dropout(dropout), layers.Conv1D(filters=num_filters*2, kernel_size=kernel_size, activation='relu', padding='same'), layers.Dropout(dropout), layers.Conv1D(filters=num_filters * 2, kernel_size=kernel_size, activation='relu', padding='same'), layers.GlobalAveragePooling1D(), layers.Dense(128, activation="relu"), layers.Dense(num_classes, activation="softmax") ]) def Attention(self, dim, heads, dim_head, dropout): """自注意力层""" scale = dim_head ** -0.5 qkv = layers.Dense(dim * 3, use_bias=False) attend = layers.Softmax(axis=-1) to_out = keras.Sequential([layers.Dense(dim), layers.Dropout(dropout)]) def call(x): b, n, d = tf.shape(x)[0], tf.shape(x)[1], tf.shape(x)[2] qkv_out = tf.split(qkv(x), num_or_size_splits=3, axis=-1) q, k, v = [tf.reshape(t, [b, n, heads, d // heads]) for t in qkv_out] q, k, v = tf.transpose(q, [0, 2, 1, 3]), tf.transpose(k, [0, 2, 1, 3]), tf.transpose(v, [0, 2, 1, 3]) attn = attend(tf.matmul(q, k, transpose_b=True) * scale) out = tf.matmul(attn, v) out = tf.transpose(out, [0, 2, 1, 3]) out = tf.reshape(out, [b, n, d]) return to_out(out) return call def CNNFeedForward(self, dim, hidden_dim,kernel_size, dropout): """CNN 前馈层""" return keras.Sequential([ layers.Conv1D(hidden_dim, kernel_size, padding="same", activation="relu"), layers.Dropout(dropout), layers.Conv1D(dim, kernel_size, padding="same"), layers.Dropout(dropout), ]) def call(self, x): # **时间片嵌入** x = self.to_patch_embedding(x) cls_tokens = tf.repeat(self.cls_token, repeats=tf.shape(x)[0], axis=0) x = tf.concat([cls_tokens, x], axis=1) x += self.pos_embedding x = self.dropout(x) # **Transformer 层** for attn, ff in self.attention_layers: x = attn(x) + x x = ff(x) + x # **CNN 分类头** x = self.cnn_head(x) return x # ========== 4. Optuna 目标函数 ========== def make_objective(x_train, y_train, x_val, y_val, x_test, y_test, input_shape, num_classes): def objective(trial): #需要优化的参数及范围 dim = trial.suggest_categorical("dim", [64, 128]) depth = trial.suggest_int("depth", 1, 2, 3) heads = trial.suggest_categorical("heads", [2, 4, 8]) dim_head = trial.suggest_categorical("dim_head", [32, 64]) cnn_dim = trial.suggest_categorical("cnn_dim", [64, 128]) num_filters = trial.suggest_categorical("num_filters", [16, 32, 64]) kernel_size = trial.suggest_categorical("kernel_size", [3,5,7,9,11]) dropout = trial.suggest_float("dropout", 0.1, 0.5) learning_rate = trial.suggest_float("lr", 1e-4, 1e-2, log=True) batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128]) num_patches = trial.suggest_categorical("num_patches", [4, 5, 10]) if input_shape[0] % num_patches != 0: raise optuna.exceptions.TrialPruned() model = CNNTransformer( num_patches = num_patches, dim = dim, depth = depth, heads = heads, cnn_dim = cnn_dim, dim_head = dim_head, num_filters = num_filters, kernel_size = kernel_size, input_dim = input_shape[0], num_classes= num_classes, dropout=dropout, ) model.compile( optimizer=keras.optimizers.Adam(learning_rate), loss="sparse_categorical_crossentropy", metrics=["accuracy"] ) history = model.fit( x_train, y_train, validation_data=(x_val, y_val), epochs=100, batch_size=batch_size, ) # 预测结果 y_pred = np.argmax(model.predict(x_val), axis=1) # 平均准确率 class_accuracies = [] for i in range(num_classes): indices = (y_val == i) if np.sum(indices) > 0: acc_i = accuracy_score(y_val[indices], y_pred[indices]) class_accuracies.append(acc_i) avg_acc = np.mean(class_accuracies) # F1-score (macro) f1 = f1_score(y_val, y_pred, average='macro') # 合成目标(你可以权重调节) score = 0.5 * avg_acc + 0.5 * f1 return score return objective # ========== 5. 启动优化器 ========== if __name__ == "__main__": # 1 加载数据 # 假设文件夹路径下的每个文件代表不同的故障类型 # file_path1 = 'E:\\paper_writing\\paper1\\data\\recogdata(singledata)' # channel = [0, 1, 2, 3, 4] # Fs = 10240 # sample_len = 1250 # file_path1 = 'E:\\data-experiment\\Jiangnan_bearing' # file_path1 = 'E:\\data-experiment\\CWRU\\1HP' # channel = [0, 1] # Fs = 10000 # sample_len = 1250 file_path1 = 'E:\\data-experiment\\Jiangnan_bearing' channel = [0] Fs = 50000 sample_len = 5120 x_list, y_list = [], [] for k in channel: matri, cla_name = get_data(file_path1, [k]) labels = list(range(len(cla_name))) # 每个文件对应的标签(例如,0=正常,1=故障A,2=故障B) num = [] for i in range(len(matri)): temp = np.array(matri[i]) temp = temp.flatten() f = len(temp) // sample_len num.append(f) num.sort() for i in range(len(matri)): data = np.array(matri[i]).flatten() # 直接对原数据进行长度和形状整理,或者在此处进行特征提取 data = data[0:sample_len * num[0]] data = data.reshape(num[0], sample_len, 1) # 生成对应的标签 labels_array = np.full((data.shape[0],), labels[i]) # 每个样本分配同样的标签 x_list.append(data) y_list.append(labels_array) print('train_begin') num_classes = len(cla_name) # 合并所有文件的数据 x = np.vstack(x_list) # 纵向合并特征数据 y = np.concatenate(y_list) # 合并标签 # y = to_categorical(y, num_classes=6)#转换为one-hot编码标签 shuffle_idx = np.random.permutation(len(x)) x, y = x[shuffle_idx], y[shuffle_idx] # 以8:1:1的比例划分数据集 x_train, x_temp, y_train, y_temp = train_test_split(x, y, test_size=0.2, random_state=42) x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42) input_shape = x_train.shape[1:] # 自动获取输入形状,如 (2048, 1) objective = make_objective(x_train, y_train, x_val, y_val, x_test, y_test, input_shape, num_classes) study = optuna.create_study(direction="maximize") study.optimize(objective, n_trials=50) print("Best trial:") print(study.best_trial.params)