系统环境Ubuntu14.04以上,CUDA8.0以上,python2
首先要拼一个这样需求的caffe,支持resnet和segnet等等网络,其实其他网络的支持也是可以的,老版的layer生成是通过common_layers和layer_factory实现的xxx类的实现,新版的layer生成简单一点,单独需要加什么,在src/caffe/layers下面添加相应的头文件和源文件就可以。前提系统可以正常编译各种版本的caffe。
第一步在src/caffe和include/caffe添加data_reader.cpp和data_reader.hpp文件
data_reader.cpp实现源码:
#include <boost/thread.hpp>
#include <map>
#include <string>
#include <vector>
#include "caffe/common.hpp"
#include "caffe/data_reader.hpp"
#include "caffe/layers/data_layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
using boost::weak_ptr;
map<const string, weak_ptr<DataReader::Body> > DataReader::bodies_;
static boost::mutex bodies_mutex_;
DataReader::DataReader(const LayerParameter& param)
: queue_pair_(new QueuePair( //
param.data_param().prefetch() * param.data_param().batch_size())) {
// Get or create a body
boost::mutex::scoped_lock lock(bodies_mutex_);
string key = source_key(param);
weak_ptr<Body>& weak = bodies_[key];
body_ = weak.lock();
if (!body_) {
body_.reset(new Body(param));
bodies_[key] = weak_ptr<Body>(body_);
}
body_->new_queue_pairs_.push(queue_pair_);
}
DataReader::~DataReader() {
string key = source_key(body_->param_);
body_.reset();
boost::mutex::scoped_lock lock(bodies_mutex_);
if (bodies_[key].expired()) {
bodies_.erase(key);
}
}
//
DataReader::QueuePair::QueuePair(int size) {
// Initialize the free queue with requested number of datums
for (int i = 0; i < size; ++i) {
free_.push(new Datum());
}
}
DataReader::QueuePair::~QueuePair() {
Datum* datum;
while (free_.try_pop(&datum)) {
delete datum;
}
while (full_.try_pop(&datum)) {
delete datum;
}
}
//
DataReader::Body::Body(const LayerParameter& param)
: param_(param),
new_queue_pairs_() {
StartInternalThread();
}
DataReader::Body::~Body() {
StopInternalThread();
}
void DataReader::Body::InternalThreadEntry() {
shared_ptr<db::DB> db(db::GetDB(param_.data_param().backend()));
db->Open(param_.data_param().source(), db::READ);
shared_ptr<db::Cursor> cursor(db->NewCursor());
vector<shared_ptr<QueuePair> > qps;
try {
int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1;
// To ensure deterministic runs, only start running once all solvers
// are ready. But solvers need to peek on one item during initialization,
// so read one item, then wait for the next solver.
for (int i = 0; i < solver_count; ++i) {
shared_ptr<QueuePair> qp(new_queue_pairs_.pop());
read_one(cursor.get(), qp.get());
qps.push_back(qp);
}
// Main loop
while (!must_stop()) {
for (int i = 0; i < solver_count; ++i) {
read_one(cursor.get(), qps[i].get());
}
// Check no additional readers have been created. This can happen if
// more than one net is trained at a time per process, whether single
// or multi solver. It might also happen if two data layers have same
// name and same source.
CHECK_EQ(new_queue_pairs_.size(), 0);
}
} catch (boost::thread_interrupted&) {
// Interrupted exception is expected on shutdown
}
}
void DataReader::Body::read_one(db::Cursor* cursor, QueuePair* qp) {
Datum* datum = qp->free_.pop();
// TODO deserialize in-place instead of copy?
datum->ParseFromString(cursor->value());
qp->full_.push(datum);
// go to the next iter
cursor->Next();
if (!cursor->valid()) {
DLOG(INFO) << "Restarting data prefetching from start.";
cursor->SeekToFirst();
}
}
} // namespace caffedata_reader.hpp实现源码:
#ifndef CAFFE_DATA_READER_HPP_
#define CAFFE_DATA_READER_HPP_
#include <map>
#include <string>
#include <vector>
#include "caffe/common.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/util/blocking_queue.hpp"
#include "caffe/util/db.hpp"
namespace caffe {
/**
* @brief Reads data from a source to queues available to data layers.
* A single reading thread is created per source, even if multiple solvers
* are running in parallel, e.g. for multi-GPU training. This makes sure
* databases are read sequentially, and that each solver accesses a different
* subset of the database. Data is distributed to solvers in a round-robin
* way to keep parallel training deterministic.
*/
class DataReader {
public:
explicit DataReader(const LayerParameter& param);
~DataReader();
inline BlockingQueue<Datum*>& free() const {
return queue_pair_->free_;
}
inline BlockingQueue<Datum*>& full() const {
return queue_pair_->full_;
}
protected:
// Queue pairs are shared between a body and its readers
class QueuePair {
public:
explicit QueuePair(int size);
~QueuePair();
BlockingQueue<Datum*> free_;
BlockingQueue<Datum*> full_;
DISABLE_COPY_AND_ASSIGN(QueuePair);
};
// A single body is created per source
class Body : public InternalThread {
public:
explicit Body(const LayerParameter& param);
virtual ~Body();
protected:
void InternalThreadEntry();
void read_one(db::Cursor* cursor, QueuePair* qp);
const LayerParameter param_;
BlockingQueue<shared_ptr<QueuePair> > new_queue_pairs_;
friend class DataReader;
DISABLE_COPY_AND_ASSIGN(Body);
};
// A source is uniquely identified by its layer name + path, in case
// the same database is read from two different locations in the net.
static inline string source_key(const LayerParameter& param) {
return param.name() + ":" + param.data_param().source();
}
const shared_ptr<QueuePair> queue_pair_;
shared_ptr<Body> body_;
static map<const string, boost::weak_ptr<DataReader::Body> > bodies_;
DISABLE_COPY_AND_ASSIGN(DataReader);
};
} // namespace caffe
#endif // CAFFE_DATA_READER_HPP_然后修改util下面的blocking_queue.cpp的实现源码:
#include <boost/thread.hpp>
#include <string>
#include "caffe/data_reader.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/parallel.hpp"
#include "caffe/util/blocking_queue.hpp"
namespace caffe {
template<typename T>
class BlockingQueue<T>::sync {
public:
mutable boost::mutex mutex_;
boost::condition_variable condition_;
};
template<typename T>
BlockingQueue<T>::BlockingQueue()
: sync_(new sync()) {
}
template<typename T>
void BlockingQueue<T>::push(const T& t) {
boost::mutex::scoped_lock lock(sync_->mutex_);
queue_.push(t);
lock.unlock();
sync_->condition_.notify_one();
}
template<typename T>
bool BlockingQueue<T>::try_pop(T* t) {
boost::mutex::scoped_lock lock(sync_->mutex_);
if (queue_.empty()) {
return false;
}
*t = queue_.front();
queue_.pop();
return true;
}
template<typename T>
T BlockingQueue<T>::pop(const string& log_on_wait) {
boost::mutex::scoped_lock lock(sync_->mutex_);
while (queue_.empty()) {
if (!log_on_wait.empty()) {
LOG_EVERY_N(INFO, 1000)<< log_on_wait;
}
sync_->condition_.wait(lock);
}
T t = queue_.front();
queue_.pop();
return t;
}
template<typename T>
bool BlockingQueue<T>::try_peek(T* t) {
boost::mutex::scoped_lock lock(sync_->mutex_);
if (queue_.empty()) {
return false;
}
*t = queue_.front();
return true;
}
template<typename T>
T BlockingQueue<T>::peek() {
boost::mutex::scoped_lock lock(sync_->mutex_);
while (queue_.empty()) {
sync_->condition_.wait(lock);
}
return queue_.front();
}
template<typename T>
size_t BlockingQueue<T>::size() const {
boost::mutex::scoped_lock lock(sync_->mutex_);
return queue_.size();
}
template class BlockingQueue<Batch<float>*>;
template class BlockingQueue<Batch<double>*>;
template class BlockingQueue<Datum*>;
template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;
} // namespace caffe然后就是src/caffe/layers下面添加bn_layer.cpp、bn_layer.cu、upsample_layer.cpp、upsample_layer.cu文件
bn_layer.cpp实现源码:
#include <algorithm>
#include <vector>
#include "caffe/common.hpp"
#include "caffe/layers/bn_layer.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void BNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
if (top.size() > 1) {
// top blob for batch mean
top[1]->Reshape(1, C_, 1, 1);
}
if (top.size() > 2) {
// top blob for batch variance
top[2]->Reshape(1, C_, 1, 1);
}
x_norm_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
// mean
spatial_mean_.Reshape(N_, C_, 1, 1);
batch_mean_.Reshape(1, C_, 1, 1);
// variance
spatial_variance_.Reshape(N_, C_, 1, 1);
batch_variance_.Reshape(1, C_, 1, 1);
// buffer blob
buffer_blob_.Reshape(N_, C_, H_, W_);
// fill spatial multiplier
spatial_sum_multiplier_.Reshape(1, 1, H_, W_);
Dtype* spatial_multipl_data = spatial_sum_multiplier_.mutable_cpu_data();
caffe_set(spatial_sum_multiplier_.count(), Dtype(1),
spatial_multipl_data);
caffe_set(spatial_sum_multiplier_.count(), Dtype(0),
spatial_sum_multiplier_.mutable_cpu_diff());
// fill batch multiplier
batch_sum_multiplier_.Reshape(N_, 1, 1, 1);
Dtype* batch_multiplier_data = batch_sum_multiplier_.mutable_cpu_data();
caffe_set(batch_sum_multiplier_.count(), Dtype(1),
batch_multiplier_data);
caffe_set(batch_sum_multiplier_.count(), Dtype(0),
batch_sum_multiplier_.mutable_cpu_diff());
}
template <typename Dtype>
void BNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Figure out the dimensions
N_ = bottom[0]->num();
C_ = bottom[0]->channels();
H_ = bottom[0]->height();
W_ = bottom[0]->width();
var_eps_ = 1e-9;
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
this->blobs_.resize(2);
// fill scale with scale_filler
this->blobs_[0].reset(new Blob<Dtype>(1, C_, 1, 1));
shared_ptr<Filler<Dtype> > scale_filler(GetFiller<Dtype>(
this->layer_param_.bn_param().scale_filler()));
scale_filler->Fill(this->blobs_[0].get());
// fill shift with shift_filler
this->blobs_[1].reset(new Blob<Dtype>(1, C_, 1, 1));
shared_ptr<Filler<Dtype> > shift_filler(GetFiller<Dtype>(
this->layer_param_.bn_param().shift_filler()));
shift_filler->Fill(this->blobs_[1].get());
} // parameter initialization
this->param_propagate_down_.resize(this->blobs_.size(), true);
}
template <typename Dtype>
void BNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const Dtype* const_top_data = top[0]->cpu_data();
const Dtype* scale_data = this->blobs_[0]->cpu_data();
const Dtype* shift_data = this->blobs_[1]->cpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// put the squares of bottom into buffer_blob_
caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
buffer_blob_.mutable_cpu_data());
// computes variance using var(X) = E(X^2) - (EX)^2
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)), bottom_data,
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_mean_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_mean_.mutable_cpu_data());
// E(X^2) across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_data());
// E(X^2) across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_variance_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_variance_.mutable_cpu_data());
caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2),
buffer_blob_.mutable_cpu_data()); // (EX)^2
caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(),
buffer_blob_.cpu_data(),
batch_variance_.mutable_cpu_data()); // variance
// save top[1] (batch_mean) and top[2] (batch_variance)
if (top.size() > 1) {
caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(),
top[1]->mutable_cpu_data());
}
if (top.size() > 2) {
caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(),
top[2]->mutable_cpu_data());
}
// do mean and variance normalization
// subtract mean
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_mean_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(-1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_add(buffer_blob_.count(), bottom_data,
buffer_blob_.cpu_data(), top_data);
// normalize variance
caffe_add_scalar(batch_variance_.count(), var_eps_,
batch_variance_.mutable_cpu_data());
caffe_powx(batch_variance_.count(),
batch_variance_.cpu_data(), Dtype(0.5),
batch_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_variance_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_div(buffer_blob_.count(), const_top_data,
buffer_blob_.cpu_data(), top_data);
// Saving x_norm
caffe_copy(buffer_blob_.count(), const_top_data,
x_norm_.mutable_cpu_data());
// scale
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), top_data,
buffer_blob_.cpu_data(), top_data);
// shift
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_add(buffer_blob_.count(), const_top_data,
buffer_blob_.cpu_data(), top_data);
break;
case BNParameter_BNMode_INFERENCE:
// scale
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), bottom_data,
buffer_blob_.cpu_data(), top_data);
// shift
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_add(buffer_blob_.count(), const_top_data,
buffer_blob_.cpu_data(), top_data);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
template <typename Dtype>
void BNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff();
Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff();
const Dtype* scale_data = this->blobs_[0]->cpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// Propagate layer to parameters
// gradient w.r.t. scale
caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(),
top_diff, buffer_blob_.mutable_cpu_data());
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.cpu_diff(),
batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);
// gradient w.r.t. shift
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), top_diff,
spatial_sum_multiplier_.cpu_data(),
Dtype(0), spatial_mean_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_,
Dtype(1), spatial_mean_.cpu_diff(),
batch_sum_multiplier_.cpu_data(),
Dtype(0), shift_diff);
// Propagate down
// put scale * top_diff to buffer_blob_
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
buffer_blob_.mutable_cpu_data());
// use new top diff for computation
caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(),
buffer_blob_.cpu_data(), bottom_diff);
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1), bottom_diff,
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_mean_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
bottom_diff);
caffe_mul(buffer_blob_.count(),
x_norm_.cpu_data(), bottom_diff, bottom_diff);
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_mean_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff);
caffe_cpu_axpby(buffer_blob_.count(), Dtype(1),
buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)),
bottom_diff);
// put the squares of bottom into buffer_blob_
caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2),
buffer_blob_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_variance_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_div(buffer_blob_.count(), bottom_diff,
buffer_blob_.cpu_data(), bottom_diff);
break;
case BNParameter_BNMode_INFERENCE:
// Propagate layer to parameters
// gradient w.r.t. scale
caffe_mul(buffer_blob_.count(), bottom_data,
top_diff, buffer_blob_.mutable_cpu_data());
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.cpu_diff(),
batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);
// gradient w.r.t. shift
// EX across spatial
caffe_cpu_gemv<Dtype>(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), top_diff,
spatial_sum_multiplier_.cpu_data(),
Dtype(0), spatial_mean_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv<Dtype>(CblasTrans, N_, C_,
Dtype(1), spatial_mean_.cpu_diff(),
batch_sum_multiplier_.cpu_data(),
Dtype(0), shift_diff);
// Propagate down
// put scale * top_diff to buffer_blob_
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
bottom_diff);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
#ifdef CPU_ONLY
STUB_GPU(BNLayer);
#endif
INSTANTIATE_CLASS(BNLayer);
REGISTER_LAYER_CLASS(BN);
} // namespace caffebn_layer.cu实现源码:
#include <algorithm>
#include <vector>
#include "caffe/common.hpp"
#include "caffe/layers/bn_layer.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void BNLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
const Dtype* const_top_data = top[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data();
Dtype* buffer_data = buffer_blob_.mutable_gpu_data();
const Dtype* const_buffer_data = buffer_blob_.gpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// put the squares of bottom into buffer_blob_
caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2),
buffer_blob_.mutable_gpu_data());
// computes variance using var(X) = E(X^2) - (EX)^2
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)),
bottom_data, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_mean_.mutable_gpu_data());
// E(X^2) across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)), buffer_data,
spatial_sum_multiplier_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
// E(X^2) across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_variance_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_variance_.mutable_gpu_data());
caffe_gpu_powx(batch_mean_.count(), batch_mean_.gpu_data(),
Dtype(2), buffer_blob_.mutable_gpu_data()); // (EX)^2
caffe_gpu_sub(batch_mean_.count(), batch_variance_.gpu_data(),
buffer_data, batch_variance_.mutable_gpu_data()); // variance
// save top[1] (batch_mean) and top[2] (batch_variance)
if (top.size() > 1) {
caffe_copy(batch_mean_.count(), batch_mean_.gpu_data(),
top[1]->mutable_gpu_data());
}
if (top.size() > 2) {
caffe_copy(batch_variance_.count(), batch_variance_.gpu_data(),
top[2]->mutable_gpu_data());
}
// do mean and variance normalization
// subtract mean
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), batch_mean_.gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_,
1, -Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_add(buffer_blob_.count(), bottom_data, buffer_data, top_data);
// normalize variance
caffe_gpu_add_scalar(batch_variance_.count(), var_eps_,
batch_variance_.mutable_gpu_data());
caffe_gpu_powx(batch_variance_.count(), batch_variance_.gpu_data(),
Dtype(0.5), batch_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0), buffer_blob_.mutable_gpu_data());
caffe_gpu_div(buffer_blob_.count(), top_data, buffer_data, top_data);
// Saving x_norm
caffe_copy(top[0]->count(), const_top_data, x_norm_.mutable_gpu_data());
// scale
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0), buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), top_data, buffer_data, top_data);
// shift
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
this->blobs_[1]->gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1,
Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_add(buffer_blob_.count(), top_data, buffer_data, top_data);
break;
case BNParameter_BNMode_INFERENCE:
// scale
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0), buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), bottom_data, buffer_data, top_data);
// shift
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
this->blobs_[1]->gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1,
Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_add(buffer_blob_.count(), top_data, buffer_data, top_data);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
template <typename Dtype>
void BNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* top_data = top[0]->gpu_data();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const Dtype* const_bottom_diff = bottom[0]->gpu_diff();
Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data();
Dtype* buffer_data = buffer_blob_.mutable_gpu_data();
const Dtype* const_buffer_data = buffer_blob_.gpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// Propage to layer params
// gradient w.r.t. scale
caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(),
top_diff, buffer_blob_.mutable_gpu_data());
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
buffer_data, spatial_sum_multiplier_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[0]->mutable_gpu_diff());
// gradient w.r.t. shift
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
top_diff, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[1]->mutable_gpu_diff());
// Propagate down
// scale top diff
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), top_diff, buffer_data,
buffer_blob_.mutable_gpu_data());
// use new top diff for computation
caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(),
buffer_data, bottom_diff);
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1), bottom_diff,
spatial_sum_multiplier_.gpu_data(), Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_mean_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
batch_mean_.gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1), spatial_mean_.gpu_data(),
spatial_sum_multiplier_.gpu_data(), Dtype(0),
bottom_diff);
caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(),
bottom_diff, bottom_diff);
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
buffer_data, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_mean_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_,
C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
batch_mean_.gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(1),
bottom_diff);
caffe_gpu_axpby(buffer_blob_.count(), Dtype(1), buffer_data,
Dtype(-1. / (N_ * H_ * W_)),
bottom_diff);
// put the squares of bottom into buffer_blob_
caffe_gpu_powx(buffer_blob_.count(), bottom_data, Dtype(2),
buffer_blob_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_div(buffer_blob_.count(), const_bottom_diff,
const_buffer_data, bottom_diff);
break;
case BNParameter_BNMode_INFERENCE:
// Propage to layer params
// gradient w.r.t. scale
caffe_gpu_mul(buffer_blob_.count(), bottom_data,
top_diff, buffer_blob_.mutable_gpu_data());
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
buffer_data, spatial_sum_multiplier_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[0]->mutable_gpu_diff());
// gradient w.r.t. shift
// EX across spatial
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
top_diff, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv<Dtype>(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[1]->mutable_gpu_diff());
// Propagate down
// scale top diff
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), top_diff, buffer_data,
bottom_diff);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
INSTANTIATE_LAYER_GPU_FUNCS(BNLayer);
} // namespace caffeupsample_layer.cpp实现源码:
#include <algorithm>
#include <cfloat>
#include <vector>
#include <iostream>
#include "caffe/layers/upsample_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void UpsampleLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
UpsampleParameter upsample_param = this->layer_param_.upsample_param();
CHECK((upsample_param.has_upsample_h() && upsample_param.has_upsample_w())
|| (!upsample_param.has_scale() && upsample_param.has_scale_h()
&& upsample_param.has_scale_w())
|| (!upsample_param.has_scale_h() && !upsample_param.has_scale_w()))
<< "upsample_h & upsample_w are required, else (DEPRECATED) "
<< "scale OR scale_h & scale_w are required.";
if (upsample_param.has_upsample_h() && upsample_param.has_upsample_w()) {
upsample_h_ = upsample_param.upsample_h();
upsample_w_ = upsample_param.upsample_w();
CHECK_GT(upsample_h_, 1);
CHECK_GT(upsample_w_, 1);
} else {
LOG(INFO) << "Params 'pad_out_{}_' are deprecated. Please declare upsample"
<< " height and width useing the upsample_h, upsample_w parameters.";
if (!upsample_param.has_scale_h()) {
scale_h_ = scale_w_ = upsample_param.scale();
CHECK_GT(scale_h_, 1);
} else {
scale_h_ = upsample_param.scale_h();
scale_w_ = upsample_param.scale_w();
CHECK_GT(scale_h_, 1);
CHECK_GT(scale_w_, 1);
}
pad_out_h_ = upsample_param.pad_out_h();
pad_out_w_ = upsample_param.pad_out_w();
CHECK(!pad_out_h_ || scale_h_ == 2)
<< "Output height padding compensation requires scale_h == 2, otherwise "
<< "the output size is ill-defined.";
CHECK(!pad_out_w_ || scale_w_ == 2)
<< "Output width padding compensation requires scale_w == 2, otherwise "
<< "the output size is ill-defined.";
upsample_h_ = upsample_w_ = -1; // flag to calculate in Reshape
}
}
template <typename Dtype>
void UpsampleLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
CHECK_EQ(4, bottom[1]->num_axes()) << "Input mask must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
CHECK_EQ(bottom[0]->num(), bottom[1]->num());
CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
CHECK_EQ(bottom[0]->height(), bottom[1]->height());
CHECK_EQ(bottom[0]->width(), bottom[1]->width());
if (upsample_h_ <= 0 || upsample_w_ <= 0) {
upsample_h_ = bottom[0]->height() * scale_h_ - int(pad_out_h_);
upsample_w_ = bottom[0]->width() * scale_w_ - int(pad_out_w_);
}
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), upsample_h_,
upsample_w_);
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
}
template <typename Dtype>
void UpsampleLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_mask_data = bottom[1]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
// Initialize
const int top_count = top[0]->count();
caffe_set(top_count, Dtype(0), top_data);
// The main loop
for (int n = 0; n < bottom[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int i = 0; i < height_ * width_; ++i) {
const int idx = static_cast<int>(bottom_mask_data[i]);
if (idx >= upsample_h_ * upsample_w_) {
// this can happen if the pooling layer that created the input mask
// had an input with different size to top[0]
LOG(FATAL) << "upsample top index " << idx << " out of range - "
<< "check scale settings match input pooling layer's "
<< "downsample setup";
}
top_data[idx] = bottom_data[i];
}
// compute offset
bottom_data += bottom[0]->offset(0, 1);
bottom_mask_data += bottom[1]->offset(0, 1);
top_data += top[0]->offset(0, 1);
}
}
}
template <typename Dtype>
void UpsampleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_mask_data = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const int bottom_count = bottom[0]->count();
caffe_set(bottom_count, Dtype(0), bottom_diff);
// The main loop
for (int n = 0; n < bottom[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int i = 0; i < height_ * width_; ++i) {
const int idx = static_cast<int>(bottom_mask_data[i]);
if (idx >= height_ * width_ * scale_h_ * scale_w_) {
// this can happen if the pooling layer that created
// the input mask had an input with different size to top[0]
LOG(FATAL) << "upsample top index " << idx << " out of range - "
<< "check scale settings match input pooling layer's downsample setup";
}
bottom_diff[i] = top_diff[idx];
}
// compute offset
bottom_diff += bottom[0]->offset(0, 1);
bottom_mask_data += bottom[1]->offset(0, 1);
top_diff += top[0]->offset(0, 1);
}
}
}
}
#ifdef CPU_ONLY
STUB_GPU(UpsampleLayer);
#endif
INSTANTIATE_CLASS(UpsampleLayer);
REGISTER_LAYER_CLASS(Upsample);
} // namespace caffeupsample_layer.cu实现源码:
#include <algorithm>
#include <cfloat>
#include <vector>
#include "caffe/layers/upsample_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
__global__ void UpsampleForward(const int nthreads, int in_w, int in_h,
int out_w, int out_h, const Dtype* bottom_data,
const Dtype* bottom_mask, Dtype* top_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int offset = index / (in_w * in_h) * out_w * out_h;
int upsample_idx = static_cast<int>(bottom_mask[index]);
top_data[offset + upsample_idx] = bottom_data[index];
}
}
template <typename Dtype>
void UpsampleLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
const Dtype* bottom_mask = bottom[1]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
caffe_gpu_set(top[0]->count(), Dtype(0), top_data);
int bottom_count = bottom[0]->count();
UpsampleForward<Dtype><<<CAFFE_GET_BLOCKS(bottom_count), CAFFE_CUDA_NUM_THREADS>>>(
bottom_count, bottom[0]->width(), bottom[0]->height(),
top[0]->width(), top[0]->height(), bottom_data, bottom_mask, top_data);
CUDA_POST_KERNEL_CHECK;
}
template <typename Dtype>
__global__ void UpsampleBackward(const int nthreads, int in_w, int in_h,
int out_w, int out_h, const Dtype* top_diff,
const Dtype* bottom_mask, Dtype* bottom_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
int offset = index / (in_w * in_h) * out_w * out_h;
int upsample_idx = static_cast<int>(bottom_mask[index]);
bottom_diff[index] = top_diff[offset + upsample_idx];
}
}
template <typename Dtype>
void UpsampleLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* bottom_mask = bottom[1]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int bottom_count = bottom[0]->count();
caffe_gpu_set(bottom_count, Dtype(0.), bottom_diff);
UpsampleBackward<Dtype><<<CAFFE_GET_BLOCKS(bottom_count), CAFFE_CUDA_NUM_THREADS>>>(
bottom_count, bottom[0]->width(), bottom[0]->height(),
top[0]->width(), top[0]->height(), top_diff, bottom_mask, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
}
INSTANTIATE_LAYER_GPU_FUNCS(UpsampleLayer);
} // namespace caffebn_layer.hpp头文件和upsample_layer.hpp头文件添加
bn_layer.hpp实现源码:
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/**
* @brief Normalizes the input to have 0-mean and/or unit (1) variance across
* the batch.
*
* This layer computes Batch Normalization as described in [1]. For each channel
* in the data (i.e. axis 1), it subtracts the mean and divides by the variance,
* where both statistics are computed across both spatial dimensions and across
* the different examples in the batch.
*
* By default, during training time, the network is computing global
* mean/variance statistics via a running average, which is then used at test
* time to allow deterministic outputs for each input. You can manually toggle
* whether the network is accumulating or using the statistics via the
* use_global_stats option. For reference, these statistics are kept in the
* layer's three blobs: (0) mean, (1) variance, and (2) moving average factor.
*
* Note that the original paper also included a per-channel learned bias and
* scaling factor. To implement this in Caffe, define a `ScaleLayer` configured
* with `bias_term: true` after each `BatchNormLayer` to handle both the bias
* and scaling factor.
*
* [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network
* Training by Reducing Internal Covariate Shift." arXiv preprint
* arXiv:1502.03167 (2015).
*
* TODO(dox): thorough documentation for Forward, Backward, and proto params.
*/
template <typename Dtype>
class BNLayer : public Layer<Dtype> {
public:
explicit BNLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "BN"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
// if the BNMode is "LEARN" mamximum 3 top blobs are available
virtual inline int MaxTopBlobs() const {
return (this->layer_param_.bn_param().bn_mode() ==
BNParameter_BNMode_LEARN) ? 3 : 1;
}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// spatial mean & variance
Blob<Dtype> spatial_mean_, spatial_variance_;
// batch mean & variance
Blob<Dtype> batch_mean_, batch_variance_;
// buffer blob
Blob<Dtype> buffer_blob_;
Blob<Dtype> x_norm_;
// x_sum_multiplier is used to carry out sum using BLAS
Blob<Dtype> spatial_sum_multiplier_, batch_sum_multiplier_;
// dimension
int N_;
int C_;
int H_;
int W_;
// eps
Dtype var_eps_;
};
} // namespace caffeupsample_layer.hpp实现源码:
#ifndef CAFFE_UPSAMPLE_LAYER_HPP_
#define CAFFE_UPSAMPLE_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
template <typename Dtype>
class UpsampleLayer : public Layer<Dtype> {
public:
explicit UpsampleLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "Upsample"; }
// [input, encoder max-pooling mask]
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int channels_;
int height_;
int width_;
int scale_h_, scale_w_;
bool pad_out_h_, pad_out_w_;
int upsample_h_, upsample_w_;
};
} // namespace caffe
#endif // CAFFE_UPSAMPLE_LAYER_HPP_最后在caffe.pro文件添加这两个信息就可以message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// The train / test phase for computation.
optional Phase phase = 10;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;
// The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7;
// Specifies whether to backpropagate to each bottom. If unspecified,
// Caffe will automatically infer whether each input needs backpropagation
// to compute parameter gradients. If set to true for some inputs,
// backpropagation to those inputs is forced; if set false for some inputs,
// backpropagation to those inputs is skipped.
//
// The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11;
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 8;
repeated NetStateRule exclude = 9;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 100;
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
////////////////////////////////////////////////
optional BNParameter bn_param = 150;
optional BatchNormParameter batch_norm_param = 139;
optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 144;
optional DataParameter data_param = 107;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
optional HDF5DataParameter hdf5_data_param = 112;
optional HDF5OutputParameter hdf5_output_param = 113;
optional HingeLossParameter hinge_loss_param = 114;
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional InputParameter input_param = 143;
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional ParameterParameter parameter_param = 145;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional RecurrentParameter recurrent_param = 146;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional ScaleParameter scale_param = 142;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional SwishParameter swish_param = 147;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
///////////////////////////////////
optional UpsampleParameter upsample_param = 151;
optional WindowDataParameter window_data_param = 129;
}对应的这两个parameter里面的参数:
message BNParameter {
enum BNMode {
LEARN = 0;
INFERENCE = 1;
}
optional BNMode bn_mode = 3 [default = LEARN];
optional FillerParameter scale_filler = 1; // The filler for the scale
optional FillerParameter shift_filler = 2; // The filler for the shift
}message UpsampleParameter {
// DEPRECATED. No need to specify upsampling scale factors when
// exact output shape is given by upsample_h, upsample_w parameters.
optional uint32 scale = 1 [default = 2];
// DEPRECATED. No need to specify upsampling scale factors when
// exact output shape is given by upsample_h, upsample_w parameters.
optional uint32 scale_h = 2;
// DEPRECATED. No need to specify upsampling scale factors when
// exact output shape is given by upsample_h, upsample_w parameters.
optional uint32 scale_w = 3;
// DEPRECATED. Specify exact output height using upsample_h. This
// parameter only works when scale is 2
optional bool pad_out_h = 4 [default = false];
// DEPRECATED. Specify exact output width using upsample_w. This
// parameter only works when scale is 2
optional bool pad_out_w = 5 [default = false];
optional uint32 upsample_h = 6;
optional uint32 upsample_w = 7;
}最后所有添加工作完成之后直接编译就可以使用了。中途问题解决地址:
https://blog.youkuaiyun.com/kevin_darkelf/article/details/51683456
https://github.com/BVLC/caffe/issues/5451
https://github.com/BVLC/caffe
http://www.cnblogs.com/yqyouqing/p/7231405.html
本文详细介绍如何在Caffe中自定义BN层和Upsample层,包括修改源代码、添加头文件及配置编译参数等步骤,并提供了解决过程中可能遇到问题的链接。
1万+

被折叠的 条评论
为什么被折叠?



