在caffe的深度学习平台上做了性别预测和年龄预测,性别预测正确率70%左右,年龄预测正确率只有百分之几。以性别预测为例:
其次是定义神经网络。caffe内部已经写好了各种层的实现,所以我们要做的非常简单,就是将不同的层组合在一起,形成一个整体的神经网络。我用的就是最简单的BP神经网络,仅仅用三个全连接层就可以达到70%的准确率。附上相关代码:
首先是得到训练数据集。因为caffe读取数据都要基于lmdb数据库,所以要将数据转换为lmdb格式。附上相关代码
#include<iostream>
#include<glog/logging.h>
#include<google/protobuf/text_format.h>
#if defined(USE_LEVELDB) && defined(USE_LMDB)
#include<leveldb/db.h>
#include<leveldb/write_batch.h>
#include<lmdb.h>
#endif
#include<stdint.h>
#include<sys/stat.h>
#include<fstream>
#include<iostream>
#include<string>
#include<boost/scoped_ptr.hpp>
#include"caffe/proto/caffe.pb.h"
#include"caffe/util/db.hpp"
#include"caffe/util/format.hpp"
using namespace std;
using namespace caffe;
using boost::scoped_ptr;
DEFINE_string(backend, "lmdb", "The backend for data result");
void conver_dataset(const char* data_filename, const char* label_filename, const char* db_path, const string& db_backend)
{
ifstream data_file(data_filename, std::ios::in);
ifstream label_file(label_filename, std::ios::in);
CHECK(data_file) << "Uable to open file " << data_file;
CHECK(label_file) << "Unable to open file " <<label_file;
//get data
string dataStr;
vector<string> data;
while(data_file >> dataStr)
{
data.push_back(dataStr);
}
int num_items = data.size() / 26;
uint32_t rows = 1;
uint32_t cols = 26;
//read label
string tmp_lable;
vector<int> vLabel;
int temp;
while(label_file >> tmp_lable)
{
temp = atoi(tmp_lable.c_str());
vLabel.push_back(temp);
}
scoped_ptr<db::DB> db(db::GetDB(db_backend));
db->Open(db_path, db::NEW);
scoped_ptr<db::Transaction> txn(db->NewTransaction());
char* pixels = new char[rows * cols];
int count = 0;
string value;
Datum datum;
datum.set_channels(1);
datum.set_height(rows);
datum.set_width(cols);
LOG(INFO) << "A total of " << num_items << " items. " ;
LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
for(int i = 0; i < num_items; i++)
{
for(int j = 0; j < 26; j++)
{
pixels[j] = atof(data[i * 26 + j].c_str());
}
datum.set_data(pixels, rows * cols);
datum.set_label(vLabel[i]);
string key_str = caffe::format_int(i,8);
datum.SerializeToString(&value);
txn->Put(key_str, value);
if(++count % 1000 == 0)
{
txn->Commit();
}
}
if(count % 1000 != 0)
{
txn->Commit();
}
LOG(INFO) << "Processed " << count << " files.";
delete [] pixels;
db->Close();
}
int main(int argc, char** argv)
{
namespace gflags = google;
FLAGS_alsologtostderr = 1;
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
const string& db_backend = FLAGS_backend;
conver_dataset(argv[1], argv[2], argv[3],db_backend);
return 0;
}
其次是定义神经网络。caffe内部已经写好了各种层的实现,所以我们要做的非常简单,就是将不同的层组合在一起,形成一个整体的神经网络。我用的就是最简单的BP神经网络,仅仅用三个全连接层就可以达到70%的准确率。附上相关代码:
name: "IDataNet"
layer {
name: "IData"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "examples/PredictSex/IData_train_lmdb"
batch_size: 32
backend: LMDB
}
}
layer {
name: "IData"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "examples/PredictSex/IData_test_lmdb"
batch_size: 10
backend: LMDB
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "data"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 800
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "ip2"
top: "ip2"
}
layer {
name: "ip3"
type: "InnerProduct"
bottom: "ip2"
top: "ip3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip3"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip3"
bottom: "label"
top: "loss"
}
# The train/test net protocol buffer definition
net: "examples/PredictSex/IData_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 20
# Carry out testing every 500 training iterations.
test_interval: 30
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 20
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/PredictSex/IData"
# solver mode: CPU or GPU
solver_mode: CPU