之前一直用theano训练样本,最近需要转成c或c++实现。在网上参考了一下其它代码,还是喜欢c++。但是看了几份cpp代码之后,发现都多少有些bug,很不爽。由于本人编码能力较弱,还花了不少时间改正。另外又添加了写权值和读权值的功能,可以保存训练的结果。下面是代码实现的基本功能描述。
问题描述:
用cpp重写mlp,即普通的多层神经网络。需要实现多个隐藏层与输出层互连,分类层采用softmax分类。
测试例子:
测试例子为自己构造,将3位二进制转为10进制有8中可能,我分别让它们对应label0-7。例如:001对应的label为1,111对应的label为7,以此类推
下面看代码:
main.cpp
#include <iostream>
#include "NeuralNetwork.h"
#include "util.h"
using namespace std;
/*main函数中调用的两个函数功能一样
*将3位二进制分类成十进制
*test_lr用的是单层的softmax回归
*mlp是含有多个隐藏层的神经网络
*/
int main()
{
cout << "****softmax****" << endl;
test_lr();
cout << "****mlp****" << endl;
mlp();
return 0;
}
test_lr函数为softmax测试,先看它的相关文件
LogisticRegression.h
#ifndef LOGISTICREGRESSIONLAYER
#define LOGISTICREGRESSIONLAYER
class LogisticRegression
{
public:
LogisticRegression(int n_i, int i_o, int);
~LogisticRegression();
void forward_propagation(double* input_data);
void back_propagation(double* input_data, double* label, double lr);
void softmax(double* x);
void printwb();
void writewb(const char *pcname);
long readwb(const char *pcname, long);
void setwb(double ppdw[][3], double [8]);
void train(double *x, double *y, double lr);
int predict(double *);
double cal_error(double **ppdtest, double* pdlabel, int ibatch);
//double cal_error(double* label);
void makeLabels(int* pimax, double (*pplabels)[8]);
//本层前向传播的输出值,也是最终的预测值
double* output_data;
//反向传播时所需值
double* delta;
public:
int n_in;
int n_out;
int n_train;
double** w;
double* b;
};
void test_lr();
void testwb();
#endif
头文件中的testwb为测试权值的读写而测试的,可以先不用理会
LogisticRegression.cpp
#include <cmath>
#include <ctime>
#include <iostream>
#include "LogisticRegression.h"
#include "util.h"
using namespace std;
LogisticRegression::LogisticRegression(int n_i, int n_o, int n_t)
{
n_in = n_i;
n_out = n_o;
n_train = n_t;
w = new double* [n_out];
for(int i = 0; i < n_out; ++i)
{
w[i] = new double [n_in];
}
b = new double [n_out];
double a = 1.0 / n_in;
srand((unsigned)time(NULL));
for(int i = 0; i < n_out; ++i)
{
for(int j = 0; j < n_in; ++j)
w[i][j] = uniform(-a, a);
b[i] = uniform(-a, a);
}
delta = new double [n_out];
output_data = new double [n_out];
}
LogisticRegression::~LogisticRegression()
{
for(int i=0; i < n_out; i++)
delete []w[i];
delete[] w;
delete[] b;
delete[] output_data;
delete[] delta;
}
void LogisticRegression::printwb()
{
cout << "'****w****\n";
for(int i = 0; i < n_out; ++i)
{
for(int j = 0; j < n_in; ++j)
cout << w[i][j] << ' ';
cout << endl;
//w[i][j] = uniform(-a, a);
}
cout << "'****b****\n";
for(int i = 0; i < n_out; ++i)
{
cout << b[i] << ' ';
}
cout << endl;
cout << "'****output****\n";
for(int i = 0; i < n_out; ++i)
{
cout << output_data[i] << ' ';
}
cout << endl;
}
void LogisticRegression::softmax(double* x)
{
double _max = 0.0;
double _sum = 0.0;
for(int i = 0; i < n_out; ++i)
{
if(_max < x[i])
_max = x[i];
}
for(int i = 0; i < n_out; ++i)
{
x[i] = exp(x[i]-_max);
_sum += x[i];
}
for(int i = 0; i < n_out; ++i)
{
x[i] /= _sum;
}
}
void LogisticRegression::forward_propagation(double* input_data)
{
for(int i = 0; i < n_out; ++i)
{
output_data[i] = 0.0;
for(int j = 0; j < n_in; ++j)
{
output_data[i] += w[i][j]*input_data[j];
}
output_data[i] += b[i];
}
softmax(output_data);
}
void LogisticRegression::back_propagation(double* input_data, double* label, double lr)
{
for(int i = 0; i < n_out; ++i)
{
delta[i] = label[i] - output_data[i] ;
for(int j = 0; j < n_in; ++j)
{
w[i][j] += lr * delta[i] * input_data[j] / n_train;
}
b[i] += lr * delta[i] / n_train;
}
}
int LogisticRegression::predict(double *x)
{
forward_propagation(x);
cout << "***result is ***" << en