我的数包含4个类别,每个类别有700张图片,使用下面的代码进行训练,训练了100个epoch,训练集准确率维持在0.95以上,但是验证集准确率会剧烈波动,没有收敛,给出解决方案,修改以下代码,并展示能完整运行的代码。# 超参数配置
NUM_CLASSES = 4 # 类别数量
IMG_SIZE = (224, 224) # 图像尺寸
BATCH_SIZE = 32 # 批大小
EPOCHS = 100 # 训练轮数
INIT_LR = 1e-3 # 初始学习率
L2_REG = 1e-3 # L2正则化系数
DROPOUT_RATE = 0.5 # Dropout比率
TEST_RATIO = 0.2 # 测试集比例
VAL_RATIO = 0.25 # 验证集比例(占非测试集部分)
SEED = 42 # 随机种子
# 路径配置
INPUT_DIR = "./input_images1" # 原始数据目录
OUTPUT_DIR = "./output_results" # 输出目录
SPLIT_DIR = os.path.join(OUTPUT_DIR, "split_data") # 划分后数据
MODEL_DIR = os.path.join(OUTPUT_DIR, "models") # 模型保存
EVAL_DIR = os.path.join(OUTPUT_DIR, "evaluation") # 评估结果
# 创建目录结构
os.makedirs(SPLIT_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(EVAL_DIR, exist_ok=True)
# ================== 数据划分与准备 ==================
def split_dataset():
# 清空旧数据
for subset in ['train', 'val', 'test']:
shutil.rmtree(os.path.join(SPLIT_DIR, subset), ignore_errors=True)
# 遍历每个类别
for class_name in os.listdir(INPUT_DIR):
class_path = os.path.join(INPUT_DIR, class_name)
if not os.path.isdir(class_path):
continue
# 获取所有图像文件
all_files = [f for f in os.listdir(class_path)
if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
np.random.shuffle(all_files)
# 首次划分:测试集分离
non_test, test = train_test_split(
all_files,
test_size=TEST_RATIO,
random_state=SEED
)
# 二次划分:训练集/验证集
train, val = train_test_split(
non_test,
test_size=VAL_RATIO,
random_state=SEED
)
# 文件复制函数
def copy_files(files, subset):
dest_dir = os.path.join(SPLIT_DIR, subset, class_name)
os.makedirs(dest_dir, exist_ok=True)
for f in files:
shutil.copy2(
os.path.join(class_path, f),
os.path.join(dest_dir, f)
)
# 执行复制
copy_files(train, 'train')
copy_files(val, 'val')
copy_files(test, 'test')
# 执行数据划分
split_dataset()
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=45,
width_shift_range=0.4,
height_shift_range=0.4,
brightness_range=[0.7,1.3],
shear_range=0.4,
zoom_range=0.4,
horizontal_flip=True,
vertical_flip=True,
fill_mode='reflect',
preprocessing_function=lambda x: x + np.random.normal(0, 0.03, x.shape)
)
val_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
# 数据生成器函数
def create_generator(datagen, subset):
return datagen.flow_from_directory(
os.path.join(SPLIT_DIR, subset),
target_size=IMG_SIZE,
color_mode='rgb', # 修改为RGB模式
batch_size=BATCH_SIZE,
class_mode='categorical',
shuffle=(subset == 'train'),
seed=SEED
)
# 创建数据管道
train_generator = create_generator(train_datagen, 'train')
val_generator = create_generator(val_datagen, 'val')
test_generator = create_generator(test_datagen, 'test')
# ================== 模型构建 ==================
def build_alexnet():
model = tf.keras.Sequential([
# 输入层适配灰度图(比原版增加边缘填充)
tf.keras.layers.Conv2D(32, (5, 5), activation='relu',
input_shape=(*IMG_SIZE, 3), padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D((3, 3), strides=2), # 增大下采样幅度
# 第二卷积层(通道数减半以降低冗余)
tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same',
kernel_regularizer=regularizers.l2(L2_REG)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPooling2D((3, 3), strides=2),
# 新增第三卷积层(针对小数据集增加特征复用)
tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dropout(0.3), # 提前加入Dropout
# 分类器部分(参考论文微调策略)
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(256, activation='relu',
kernel_initializer='he_normal'),
tf.keras.layers.Dropout(DROPOUT_RATE),
tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')
])
# model = tf.keras.Model(inputs, outputs)
# 优化器改进
optimizer = tf.keras.optimizers.Adam(
learning_rate=tf.keras.optimizers.schedules.CosineDecay(
INIT_LR, EPOCHS * 100)
)
model.compile(
optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy',
tf.keras.metrics.AUC(name='auc'),
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')]
)
return model
# 初始化模型
model = build_alexnet()
model.summary()
# ================== 训练配置 ==================
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
os.path.join(MODEL_DIR, 'best_model.h5'),
save_best_only=True,
monitor='val_loss'
),
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=100,
restore_best_weights=True
),
tf.keras.callbacks.TensorBoard(
log_dir=os.path.join(OUTPUT_DIR, 'logs')
)
]
# ================== 模型训练 ==================
history = model.fit(
train_generator,
steps_per_epoch=train_generator.samples // BATCH_SIZE,
validation_data=val_generator,
validation_steps=val_generator.samples // BATCH_SIZE,
epochs=EPOCHS,
callbacks=callbacks
)
with open("training_history.json", "w") as f:
json.dump(history.history, f)
# ================== 可视化评估 ==================
# 训练过程可视化
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training & Validation Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Training & Validation Accuracy')
plt.legend()
plt.savefig(os.path.join(EVAL_DIR, 'training_curves.png'))
plt.close()
# 测试集评估
test_results = model.evaluate(test_generator)
print(f'''
=== 最终测试集评估结果 ===
Loss: {test_results[0]:.4f}
Accuracy: {test_results[1]:.4f}
AUC: {test_results[2]:.4f}
Precision: {test_results[3]:.4f}
Recall: {test_results[4]:.4f}
''')
# 混淆矩阵可视化
y_pred = np.argmax(model.predict(test_generator), axis=1)
y_true = test_generator.classes
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=test_generator.class_indices.keys(),
yticklabels=test_generator.class_indices.keys())
plt.title('Confusion Matrix - Test Set')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig(os.path.join(EVAL_DIR, 'confusion_matrix.png'))
plt.close()
# 保存最终模型
model.save(os.path.join(MODEL_DIR, 'final_model.h5'))