使用caffe实现性别年龄预测

最新推荐文章于 2024-06-17 16:34:15 发布

wxy0726

最新推荐文章于 2024-06-17 16:34:15 发布

阅读量1.8k

点赞数

CC 4.0 BY-SA版权

本文链接：https://blog.youkuaiyun.com/wxy0726/article/details/54131464

本文介绍如何使用Caffe深度学习平台进行性别预测任务，包括数据预处理、搭建神经网络及训练过程。通过将数据转换为lmdb格式并采用三层全连接神经网络结构，实现了约70%的预测准确率。

在caffe的深度学习平台上做了性别预测和年龄预测，性别预测正确率70%左右，年龄预测正确率只有百分之几。以性别预测为例：

首先是得到训练数据集。因为caffe读取数据都要基于lmdb数据库，所以要将数据转换为lmdb格式。附上相关代码

#include<iostream>  
#include<glog/logging.h>  
#include<google/protobuf/text_format.h>  
#if defined(USE_LEVELDB) && defined(USE_LMDB)  
#include<leveldb/db.h>  
#include<leveldb/write_batch.h>  
#include<lmdb.h>  
#endif  
#include<stdint.h>  
#include<sys/stat.h>  
#include<fstream>  
#include<iostream>  
#include<string>  
#include<boost/scoped_ptr.hpp>  
#include"caffe/proto/caffe.pb.h"  
#include"caffe/util/db.hpp"  
#include"caffe/util/format.hpp"  
using namespace std;  
using namespace caffe;  
using boost::scoped_ptr;  
DEFINE_string(backend, "lmdb", "The backend for data result");  
void conver_dataset(const char* data_filename, const char* label_filename, const char* db_path, const string& db_backend)  
{  
    ifstream data_file(data_filename, std::ios::in);  
    ifstream label_file(label_filename, std::ios::in);  
    CHECK(data_file) << "Uable to open file " << data_file;  
    CHECK(label_file) << "Unable to open file " <<label_file;  
    //get data  
    string dataStr;  
    vector<string> data;  
    while(data_file >> dataStr)  
    {  
        data.push_back(dataStr);  
    }  
    int num_items = data.size() / 26;  
    uint32_t rows = 1;  
    uint32_t cols = 26;  
    //read  label  
    string tmp_lable;  
    vector<int> vLabel;  
    int temp;  
    while(label_file >> tmp_lable)  
    {  
        temp = atoi(tmp_lable.c_str());  
        vLabel.push_back(temp);  
    }  
    scoped_ptr<db::DB> db(db::GetDB(db_backend));  
    db->Open(db_path, db::NEW);  
    scoped_ptr<db::Transaction> txn(db->NewTransaction());  
    char* pixels = new char[rows * cols];  
    int count = 0;  
    string value;  
    Datum datum;  
    datum.set_channels(1);  
    datum.set_height(rows);  
    datum.set_width(cols);  
    LOG(INFO) << "A total of " << num_items << " items. " ;  
    LOG(INFO) << "Rows: " << rows << " Cols: " << cols;  
    for(int i = 0; i < num_items; i++)  
    {  
        for(int j = 0; j < 26; j++)  
        {  
            pixels[j] = atof(data[i * 26 + j].c_str());  
        }  
          
        datum.set_data(pixels, rows * cols);  
        datum.set_label(vLabel[i]);  
        string key_str = caffe::format_int(i,8);  
         
        datum.SerializeToString(&value);  
        txn->Put(key_str, value);  
        if(++count % 1000 == 0)  
        {  
            txn->Commit();  
        }  
    }  
    if(count % 1000 != 0)  
    {  
        txn->Commit();  
    }  
    LOG(INFO) << "Processed " << count << " files.";  
    delete [] pixels;  
    db->Close();  
}  
int main(int argc, char** argv)  
{  
    namespace gflags = google;  
    FLAGS_alsologtostderr = 1;  
    gflags::ParseCommandLineFlags(&argc, &argv, true);  
    google::InitGoogleLogging(argv[0]);  
    const string& db_backend = FLAGS_backend;  
    conver_dataset(argv[1], argv[2], argv[3],db_backend);  
      
    return 0;  
}

其次是定义神经网络。caffe内部已经写好了各种层的实现，所以我们要做的非常简单，就是将不同的层组合在一起，形成一个整体的神经网络。我用的就是最简单的BP神经网络，仅仅用三个全连接层就可以达到70%的准确率。附上相关代码：

name: "IDataNet"  
layer {  
  name: "IData"  
  type: "Data"  
  top: "data"  
  top: "label"  
  include {  
    phase: TRAIN  
  }  
  transform_param {  
    scale: 0.00390625  
  }  
  data_param {  
    source: "examples/PredictSex/IData_train_lmdb"  
    batch_size: 32  
    backend: LMDB  
  }  
}  
layer {  
  name: "IData"  
  type: "Data"  
  top: "data"  
  top: "label"  
  include {  
    phase: TEST  
  }  
  transform_param {  
    scale: 0.00390625  
  }  
  data_param {  
    source: "examples/PredictSex/IData_test_lmdb"  
    batch_size: 10  
    backend: LMDB  
  }  
}  
layer {  
  name: "ip1"  
  type: "InnerProduct"  
  bottom: "data"  
  top: "ip1"  
  param {  
    lr_mult: 1  
  }  
  param {  
    lr_mult: 2  
  }  
  inner_product_param {  
    num_output: 500  
    weight_filler {  
      type: "xavier"  
    }  
    bias_filler {  
      type: "constant"  
    }  
  }  
}  
layer {  
  name: "relu1"  
  type: "ReLU"  
  bottom: "ip1"  
  top: "ip1"  
}  
layer {  
  name: "ip2"  
  type: "InnerProduct"  
  bottom: "ip1"  
  top: "ip2"  
  param {  
    lr_mult: 1  
  }  
  param {  
    lr_mult: 2  
  }  
  inner_product_param {  
    num_output: 800  
    weight_filler {  
      type: "xavier"  
    }  
    bias_filler {  
      type: "constant"  
    }  
  }  
}  
layer {  
  name: "relu2"  
  type: "ReLU"  
  bottom: "ip2"  
  top: "ip2"  
}  
layer {  
  name: "ip3"  
  type: "InnerProduct"  
  bottom: "ip2"  
  top: "ip3"  
  param {  
    lr_mult: 1  
  }  
  param {  
    lr_mult: 2  
  }  
  inner_product_param {  
    num_output: 2  
    weight_filler {  
      type: "xavier"  
    }  
    bias_filler {  
      type: "constant"  
    }  
  }  
}  
layer {  
  name: "accuracy"  
  type: "Accuracy"  
  bottom: "ip3"  
  bottom: "label"  
  top: "accuracy"  
  include {  
    phase: TEST  
  }  
}  
layer {  
  name: "loss"  
  type: "SoftmaxWithLoss"  
  bottom: "ip3"  
  bottom: "label"  
  top: "loss"  
}

# The train/test net protocol buffer definition  
net: "examples/PredictSex/IData_train_test.prototxt"  
# test_iter specifies how many forward passes the test should carry out.  
# In the case of MNIST, we have test batch size 100 and 100 test iterations,  
# covering the full 10,000 testing images.  
test_iter: 20  
# Carry out testing every 500 training iterations.  
test_interval: 30  
# The base learning rate, momentum and the weight decay of the network.  
base_lr: 0.01  
momentum: 0.9  
weight_decay: 0.0005  
# The learning rate policy  
lr_policy: "inv"  
gamma: 0.0001  
power: 0.75  
# Display every 100 iterations  
display: 20  
# The maximum number of iterations  
max_iter: 10000  
# snapshot intermediate results  
snapshot: 5000  
snapshot_prefix: "examples/PredictSex/IData"  
# solver mode: CPU or GPU  
solver_mode: CPU