Caffe SourceCode Learning-solve()

最新推荐文章于 2020-09-24 04:09:11 发布

原创最新推荐文章于 2020-09-24 04:09:11 发布 · 117 阅读

0 ·

CC 4.0 BY-SA版权

Caffe 专栏收录该内容

3 篇文章

订阅专栏

本文详细解析了Caffe深度学习框架中Solver类的Solve方法，介绍了如何通过迭代进行模型训练，包括参数初始化、损失计算、梯度更新及模型快照保存等关键步骤。同时，阐述了前向和后向传播的实现原理，以及如何通过调整参数控制训练过程。

初始化完Solver类之后，调用基类Solver::Solve()进行迭代

template <typename Dtype>
void Solver<Dtype>::Solve(const char* resume_file) {
CHECK(Caffe::root_solver());
LOG(INFO) << "Solving " << net_->name();
LOG(INFO) << "Learning Rate Policy: " << param_.lr_policy();

// Initialize to false every time we start solving.
requested_early_exit_ = false;

if (resume_file) {
LOG(INFO) << "Restoring previous solver status from " << resume_file;
Restore(resume_file);
}

// For a network that is trained by the solver, no bottom or top vecs
// should be given, and we will just provide dummy vecs.
int start_iter = iter_;

# 迭代
Step(param_.max_iter() - iter_);
// If we haven't already, save a snapshot after optimization, unless
// overridden by setting snapshot_after_train := false
if (param_.snapshot_after_train()
&& (!param_.snapshot() || iter_ % param_.snapshot() != 0)) {
Snapshot();
}
if (requested_early_exit_) {
LOG(INFO) << "Optimization stopped early.";
return;
}
// After the optimization is done, run an additional train and test pass to
// display the train and test loss/outputs if appropriate (based on the
// display and test_interval settings, respectively). Unlike in the rest of
// training, for the train net we only run a forward pass as we've already
// updated the parameters "max_iter" times -- this final pass is only done to
// display the loss, which is computed in the forward pass.
if (param_.display() && iter_ % param_.display() == 0) {
int average_loss = this->param_.average_loss();
Dtype loss;
net_->Forward(&loss);

UpdateSmoothedLoss(loss, start_iter, average_loss);

LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
}
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
TestAll();
}
LOG(INFO) << "Optimization Done.";
}

template <typename Dtype>
void Solver<Dtype>::Step(int iters) {
const int start_iter = iter_;
const int stop_iter = iter_ + iters;

#获取设置的要计算之前多少次的loss均值，默认的average_loss为1
int average_loss = this->param_.average_loss();
losses_.clear();
smoothed_loss_ = 0;

while (iter_ < stop_iter) {
// zero-init the params

#清零上一次反向传输过程中产生的梯度数据
net_->ClearParamDiffs();
// 每隔test_iter进行一次测试
if (param_.test_interval() && iter_ % param_.test_interval() == 0
&& (iter_ > 0 || param_.test_initialization())
&& Caffe::root_solver()) {
TestAll();
if (requested_early_exit_) {
// Break out of the while loop because stop was requested while testing.
break;
}
}

for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_start();
}
const bool display = param_.display() && iter_ % param_.display() == 0;
net_->set_debug_info(display && param_.debug_info());
// accumulate the loss and gradient
Dtype loss = 0;

#累加多个batch的误差，以免batch_size过大，内存不够
for (int i = 0; i < param_.iter_size(); ++i) {

#执行前向计算和后向计算
loss += net_->ForwardBackward();
}
loss /= param_.iter_size();
// average the loss across iterations for smoothed reporting

#若average_loss为1：loss_容器里面只存当前获得的真实loss值，而smooth_loss_当然也是这个值；若average_loss为n：loss_容器里面就会存储前n个loss的值，而smooth_loss_相当于做了一个loss平均

UpdateSmoothedLoss(loss, start_iter, average_loss);
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
<< ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
const Dtype* result_vec = result[j]->cpu_data();
const string& output_name =
net_->blob_names()[net_->output_blob_indices()[j]];
const Dtype loss_weight =
net_->blob_loss_weights()[net_->output_blob_indices()[j]];
for (int k = 0; k < result[j]->count(); ++k) {
ostringstream loss_msg_stream;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * result_vec[k] << " loss)";
}
LOG_IF(INFO, Caffe::root_solver()) << " Train net output #"
<< score_index++ << ": " << output_name << " = "
<< result_vec[k] << loss_msg_stream.str();
}
}
}
for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_gradients_ready();
}

#权重更新
ApplyUpdate();

// Increment the internal iter_ counter -- its value should always indicate
// the number of times the weights have been updated.
++iter_;

SolverAction::Enum request = GetRequestedAction();

// Save a snapshot if needed.
if ((param_.snapshot()
&& iter_ % param_.snapshot() == 0
&& Caffe::root_solver()) ||
(request == SolverAction::SNAPSHOT)) {
Snapshot();
}
if (SolverAction::STOP == request) {
requested_early_exit_ = true;
// Break out of training loop.
break;
}
}
}

# net.cpp

Dtype ForwardBackward() {
Dtype loss;
Forward(&loss); # -> Net<Dtype>::Forward(Dtype* loss) -> Net<Dtype>::ForwardFromTo (int start, int end)
Backward(); # -> Net<Dtype>::Forward -> Net<Dtype>::BackwardFromTo(int start, int end)
return loss;
}

前向计算卷积实现原理：将每个像素位置作为模板中心时，被卷积模板覆盖的区域按一维排列（K*K），BGR通道依次排列（C*K*K）。一共有H*W个这样的一维数组（H*W）*（C*K*K）。每一组卷积模板也展开成一维形式（K*K），并以BGR通道依次排列为一位数组（C*K*K），共Cout个，这样点积之后，再reshape就可以得到Cout*(H*W)特征图。