Dropout_layer.cpp(防止过拟合)

本文详细介绍了Dropout层的工作原理及其实现方式。Dropout层通过随机失活部分神经元,减少过拟合现象,增强模型泛化能力。文章还深入探讨了Dropout层在训练与测试阶段的不同行为。

dropout层的作用是防止训练的时候过拟合。在训练的时候,传统的训练方法是每次迭代经过某一层时,将所有的结点拿来做参与更新,训练整个网络。加入dropout层,我们只需要按一定的概率(retaining probability)p 来对weight layer 的参数进行随机采样,将被采样的结点拿来参与更新,将这个子网络作为此次更新的目标网络。这样做的好处是,由于随机的让一些节点不工作了,因此可以避免某些特征只在固定组合下才生效,有意识地让网络去学习一些普遍的共性(而不是某些训练样本的一些特性)这样能提高训练出的模型的鲁棒性!!!

下面记录下我在看dropout层时的注释,如有错误,请指出~~~

Dropout_layer.hpp::::

#ifndef CAFFE_DROPOUT_LAYER_HPP_
#define CAFFE_DROPOUT_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/neuron_layer.hpp"

namespace caffe {

/**
 * @brief During training only, sets a random portion of @f$x@f$ to 0, adjusting
 *        the rest of the vector magnitude accordingly.
 *
 * @param bottom input Blob vector (length 1)
 *   -# @f$ (N \times C \times H \times W) @f$
 *      the inputs @f$ x @f$
 * @param top output Blob vector (length 1)
 *   -# @f$ (N \times C \times H \times W) @f$
 *      the computed outputs @f$ y = |x| @f$
 */
 /*DropoutLayer类继承了类NeuronLayer类*/
template <typename Dtype>
class DropoutLayer : public NeuronLayer<Dtype> {
 public:
  /**
   * @param param provides DropoutParameter dropout_param,
   *     with DropoutLayer options:
   *   - dropout_ratio (\b optional, default 0.5).
   *     Sets the probability @f$ p @f$ that any given unit is dropped.
   */
   /*构造函数*/
  explicit DropoutLayer(const LayerParameter& param)
      : NeuronLayer<Dtype>(param) {}
  /*设置函数*/
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  /*内存分配与输入输出数据形状reshape函数*/
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);

  /*返回当前层的类型*/
  virtual inline const char* type() const { return "Dropout"; }

 protected:
  /**
   * @param bottom input Blob vector (length 1)
   *   -# @f$ (N \times C \times H \times W) @f$
   *      the inputs @f$ x @f$
   * @param top output Blob vector (length 1)
   *   -# @f$ (N \times C \times H \times W) @f$
   *      the computed outputs. At training time, we have @f$
   *      y_{\mbox{train}} = \left\{
   *         \begin{array}{ll}
   *            \frac{x}{1 - p} & \mbox{if } u > p \\
   *            0 & \mbox{otherwise}
   *         \end{array} \right.
   *      @f$, where @f$ u \sim U(0, 1)@f$ is generated independently for each
   *      input at each iteration. At test time, we simply have
   *      @f$ y_{\mbox{test}} = \mathbb{E}[y_{\mbox{train}}] = x @f$.
   */
   /*cpu前向传播函数*/
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  /*gpu前向传播函数*/
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  /*cpu返向传播函数*/
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

  /*gpu返回传播函数*/
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

  /// when divided by UINT_MAX, the randomly generated values @f$u\sim U(0,1)@f$
  /*blob类型的,保存伯努利二项分布的随机数的变量*/
  Blob<unsigned int> rand_vec_;
  /// the probability @f$ p @f$ of dropping any input
  /*数据被dropout(意思就是迭代的某次训练不用)的概率*/
  Dtype threshold_;
  /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
  /*scale_ == 1 / (1 - threshold_)*/
  Dtype scale_;
  /*没有具体用到,不知其何意*/
  unsigned int uint_thres_;
};

}  // namespace caffe

#endif  // CAFFE_DROPOUT_LAYER_HPP_
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95

Dropout_layer.cpp:::

// TODO (sergeyk): effect should not be dependent on phase. wasted memcpy.

#include <vector>

#include "caffe/layers/dropout_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

/*设置dropout层对象,先调用NeuronLayer类完成基本设置*/
template <typename Dtype>
void DropoutLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  NeuronLayer<Dtype>::LayerSetUp(bottom, top);
  /*protobuf文件中传入的dropout的概率,也就是当前去除掉threshold_概率个数据不用*/
  /*因为是有放回的随机去除掉threshold_概率个数据,那么每个数据被去除的概率为threshold_*/
  threshold_ = this->layer_param_.dropout_param().dropout_ratio();
  DCHECK(threshold_ > 0.);
  DCHECK(threshold_ < 1.);
  /*(1. - threshold_)是这个数据被取用的概率*/
  scale_ = 1. / (1. - threshold_);
  uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);/*貌似没有用到*/
}

/*形状reshape和内存分配,同理先调用NeuronLayer类的Reshape函数完成基本的top与bottom数据的reshape*/
template <typename Dtype>
void DropoutLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  NeuronLayer<Dtype>::Reshape(bottom, top);
  // Set up the cache for random number generation
  // ReshapeLike does not work because rand_vec_ is of Dtype uint
  //这个类要单独分配一段内存用来存储满足伯努利分布的随机数
  rand_vec_.Reshape(bottom[0]->shape());
}

/*dropout层的前向传播*/
template <typename Dtype>
void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();/*前面一层数据内存地址(输入数据)*/
  Dtype* top_data = top[0]->mutable_cpu_data();/*后面一层数据内存地址(输出数据)*/
  unsigned int* mask = rand_vec_.mutable_cpu_data();/*伯努利分布的随机数的内存地址*/
  const int count = bottom[0]->count();/*输入数据blob个数*/
  if (this->phase_ == TRAIN) {/*当前处在测试阶段*/
    // Create random numbers
    caffe_rng_bernoulli(count, 1. - threshold_, mask); /*产生伯努利随机数*/
    for (int i = 0; i < count; ++i) {
      top_data[i] = bottom_data[i] * mask[i] * scale_;  /*遍历每个数据在满足伯努利分布的下的输出值*/
    }
  } else {
    caffe_copy(bottom[0]->count(), bottom_data, top_data); /*测试阶段每个数据都要输出*/
  }
}

/*dropout层的后向传播*/
template <typename Dtype>
void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,  /*这个向量记录当前数据了是否进行返向传播*/
    const vector<Blob<Dtype>*>& bottom) {
  if (propagate_down[0]) {/*如果进行反向传播*/
    const Dtype* top_diff = top[0]->cpu_diff();/*后面一层梯度(输入数据)*/
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();/*前面一层梯度(输入数据)*/
    if (this->phase_ == TRAIN) {/*训练阶段*/
      const unsigned int* mask = rand_vec_.cpu_data();/*伯努利分布的随机数*/
      const int count = bottom[0]->count();/*输入数据blob个数*/
      for (int i = 0; i < count; ++i) {
        bottom_diff[i] = top_diff[i] * mask[i] * scale_;/*返向传播梯度*/
      }
    } else {
      caffe_copy(top[0]->count(), top_diff, bottom_diff);/*如果不是训练就直接拷贝数据*/
    }
  }
}


#ifdef CPU_ONLY
STUB_GPU(DropoutLayer);
#endif

INSTANTIATE_CLASS(DropoutLayer);
REGISTER_LAYER_CLASS(Dropout);

}  // namespace caffe
# train_model.py # 害虫分类模型训练主程序plus1 import os os.environ['TF_DISABLE_METRICS'] = '1' # 禁用TensorFlow监控指标 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # 只显示错误日志 # 清理现有TensorFlow会话 import tensorflow as tf tf.keras.backend.clear_session() tf.compat.v1.reset_default_graph() # 检查版本 print(f"TensorFlow版本: {tf.__version__}") import os import numpy as np import tensorflow as tf import json from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.applications import MobileNetV2 from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau # -------------------------- # 配置参数(按需修改) # -------------------------- DATA_DIR = r'D:\thesis\farm_insects_dataset\farm_insects_dataset' # 数据集路径 IMG_SIZE = (128, 128) # 图像尺寸(与ESP32-CAM分辨率匹配) BATCH_SIZE = 8 # 批量大小(根据显存调整) NUM_EPOCHS = 15 # 训练轮次 NUM_CLASSES = 15 # 类别数(必须与实际数据一致) MODEL_SAVE_DIR = "outputs/models" # 模型保存路径 LOG_DIR = "outputs/logs" # 日志保存路径 # -------------------------- # Step 1: 数据增强与加载 # -------------------------- def create_data_generators(): # 定义训练集的数据增强方法(验证集不增强) train_datagen = ImageDataGenerator( rescale=1./255, rotation_range=40, # 随机旋转±40度 width_shift_range=0.2, # 水平平移±20% height_shift_range=0.2, # 垂直平移±20% shear_range=0.2, # 剪切变换 zoom_range=0.2, # 随机缩放 vertical_flip=True, # 增加垂直翻转 brightness_range=[0.6, 1.4], # 色彩抖动 horizontal_flip=True, # 水平翻转 fill_mode='nearest', # 填充策略 validation_split=0.2, # 80%训练,20%验证 # 模拟硬件摄像头效果(降低亮度+噪声) preprocessing_function=lambda x: x * 0.8 + np.random.normal(0, 0.02, x.shape) ) # 验证集只需归一化 val_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2) # 加载训练集 train_data = train_datagen.flow_from_directory( DATA_DIR, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', subset='training', shuffle=True ) # 加载验证集 val_data = val_datagen.flow_from_directory( DATA_DIR, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', subset='validation', shuffle=False ) # #观察数据库调整情况:在 return train_data, val_data 前添加: # print("\n===== 数据加载检查 =====") # print(f"训练集样本总数:{train_data.samples}") # print(f"验证集样本总数:{val_data.samples}") # print("类别对应关系:", train_data.class_indices) # print("示例类别文件名:", train_data.filenames[0]) # print("========================\n") return train_data, val_data # -------------------------- # Step 2: 构建迁移学习模型 # -------------------------- def build_model(): # 加载预训练的MobileNetV2(不包含顶层) base_model = MobileNetV2( input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), include_top=False, weights='imagenet', alpha=0.5 # 缩小模型宽度(平衡精度与速度) ) # base_model.trainable = False # 冻结基础模型权重 # 解冻部分层,适应任务 for layer in base_model.layers[-10:]: layer.trainable=True # 自定义顶层分类器 model = tf.keras.Sequential([ base_model, GlobalAveragePooling2D(), # 全局平均池化替代Flatten Dropout(0.5), # 减少过拟合 Dense(256, activation='relu'), Dense(NUM_CLASSES, activation='softmax') ]) return model # -------------------------- # Step 3: 定义训练过程 # -------------------------- def train(): # 创建输出目录 os.makedirs(MODEL_SAVE_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True) # 加载数据 train_data, val_data = create_data_generators() # 构建模型 model = build_model() # 编译模型 model.compile( # optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), # 调整学习率 optimizer=tf.keras.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.ExponentialDecay( initial_learning_rate=1e-4, decay_steps=10000, decay_rate=0.9 )), loss='categorical_crossentropy', metrics=['accuracy'] ) # 定义回调函数 callbacks = [ # 保存最佳模型 ModelCheckpoint( filepath=os.path.join(MODEL_SAVE_DIR, 'best_model_NEW.h5'), monitor='val_accuracy', save_best_only=True, mode='max', verbose=1, save_weights_only=False #确保为False(默认值) ), # 记录训练日志 CSVLogger(os.path.join(LOG_DIR, 'training_log.csv')), # 动态调整学习率 ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1 ) ] # 生成并保存类别索引 class_indices = train_data.class_indices with open('class_indices.json', 'w', encoding='utf-8') as f: json.dump(class_indices, f, ensure_ascii=False, indent=4) # 开始训练 history = model.fit( train_data, epochs=NUM_EPOCHS, validation_data=val_data, callbacks=callbacks ) # 评估模型 test_loss, test_acc = model.evaluate(val_data) print(f"验证集损失: {test_loss:.4f}, 验证集准确率: {test_acc:.4f}") # 保存最终模型 model.save(os.path.join(MODEL_SAVE_DIR, 'final_model.h5')) # -------------------------- # 主程序入口 # -------------------------- if __name__ == "__main__": # 打印TensorFlow版本信息 print(f"TensorFlow Version: {tf.__version__}") print(f"Using {'GPU' if tf.config.list_physical_devices('GPU') else 'CPU'}") # 启动训练 train() print("训练完成!模型已保存至:", MODEL_SAVE_DIR)“这里是识别害虫项目程序,训练识别模型,你是精通python语言,我要提高识别准确率,图片数据更新,有训练集、测试集,确保可读性和可运行性
06-09
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值