我们使用了opencv2.7.13用C++实现的,参考自《Neural Networks and Deep Learning》一书,使用的数据是网上下载的一些图片,大家可以自行查找。
建议大家看了上面提到的那本书后再看本代码,本代码和原书用python实现的代码有类似之处,大体框架仿照了原书的代码,但是额外增加了一些内容,大家可以对比查看区别。
另外,欢迎指正本代码的不足之处。
#include<cstdio>
#include<iostream>
#include<cv.h>
#include<cmath>
#include<highgui.hpp>
#include<core.hpp>
#include<imgproc.hpp>
#include<algorithm>
#include<ctime>
using namespace std;
using namespace cv;
struct Traning_data
{
Mat x;
Mat y;
};
class Network
{
public:
//读取训练数据和测试数据
void read_data();
/* sizes 指定神经网络每一层的神经元数量
eta 训练速度
cost 指定成本函数C,1代表使用交叉商,2代表使用二次成本函数
lmbda 指定L2正则化参数,默认为0,不使用
dropout 指定是否使用dropout,1为不使用,否则请指定0到1的实数,代表有多少比例的参数在每个周期参与训练,1-dropout的参数不参与训练
*/
void init(vector<int> &sizes,double _eta,int _cost=1,double _lmbda=0.0,double _dropout=1.0);
/*
epchos 训练周期
mini_batch_num 批量计算数量
track_cost 是否持续跟踪训练数据,若为true,则每个周期训练结束对网络进行测试
test 指定是否跟踪测试,若为true,则每个周期训练结束对网络进行测试
*/
void SGD(int epchos,int mini_batch_num,bool track_cost=false,bool test=false);
private:
vector<Mat>weights;//权重集合
vector<Mat>bases;//偏移集合
vector<Traning_data>test_data;//测试数据
vector<Traning_data>traning_data;//训练数据
int traning_num;//训练数据数量
int test_num;//测试数据数量
double eta;//学习效率
int cost;//成本函数类型
double lmbda;//L2正则化系数
double dropout;//屏蔽系数
Mat feedforward(Mat &x);//前向传播
void weights_init();//初始化权重
void updata_mini_data(vector<Traning_data>mini_batch);//部分训练数据训练
int accury(vector<Traning_data>&_data); //统计目前成功率
double total_cost(vector<Traning_data>&_data);//计算目前成本函数值
int getMaxnum(Mat &t);//返回t向量中最大的值对应的数字
void backprop(Mat &batch_traning_x,Mat &batch_traning_y,vector<Mat>& nabla_w,vector<Mat>& nabla_b);//反向传播大法
Mat drop_out(int rows,int cols);//返回dropout矩阵
Mat sigmoid(Mat &z);
Mat sigmoid_prime(Mat &z);
};
Mat Network::feedforward(Mat &x)
{
int i;
Mat y=x;
for(i=0;i<weights.size();i++)
{
y=weights[i]*y+bases[i];
y=sigmoid(y);
}
return y;
}
void Network::read_data()
{
int i,j;
for(i=0;i<10;i++)
for(j=1;j<=500;j++)
{
String c;
c="data\\"+to_string(i)+"\\"+to_string(i)+"_"+to_string(j)+".bmp";
Mat img=imread(c,IMREAD_GRAYSCALE);
if(!img.data)
continue;
Traning_data _traning_data;
_traning_data.x.create(img.rows*img.cols,1,CV_64FC1);
int num=0;
int k,l;
for(k=0;k<img.rows;k++)
for(l=0;l<img.cols;l++)
{
_traning_data.x.at<double>(num,0)=(double)img.at <uchar>(k,l);
num++;
}
_traning_data.x=_traning_data.x/255.0;
_traning_data.y=Mat::zeros(10,1,CV_64FC1);
_traning_data.y.at<double>(i,0)=1.0;
if(j<=400)
{
traning_data.push_back(_traning_data);
}
else
{
test_data.push_back(_traning_data);
}
}
traning_num=traning_data.size();
test_num=test_data.size();
cout<<"traning_data_num="<<traning_num<<endl;
cout<<"test_data_num="<<test_num<<endl;
}
void Network::weights_init()
{
int k;
int i,j;
RNG rng(time(0)%100);
for(k=0;k<bases.size();k++)
{
double sqrt_w_cols=sqrt(weights[k].cols);
for(i=0;i<bases[k].rows;i++)
for(j=0;j<bases[k].cols;j++)
bases[k].at<double>(i,j)=rng.gaussian(1.0/sqrt_w_cols);
for(i=0;i<weights[k].rows;i++)
for(j=0;j<weights[k].cols;j++)
weights[k].at<double>(i,j)=rng.gaussian(1.0/sqrt_w_cols);
}
}
void Network::init(vector<int> &sizes,double _eta,int _cost,double _lmbda,double _dropout)
{
eta=_eta;
cost=_cost;
lmbda=_lmbda;
dropout=_dropout;
int i;
for(i=1;i<sizes.size();i++)
{
Mat _w;
Mat _b;
_w.create(sizes[i],sizes[i-1],CV_64FC1);
_b.create(sizes[i],1,CV_64FC1);
weights.push_back(_w);
bases.push_back(_b);
}
weights_init();
}
void Network::SGD(int epchos,int mini_batch_num,bool track_cost,bool test)
{
int i,j;
for(i=0;i<epchos;i++)
{
random_shuffle(traning_data.begin(),traning_data.end());
for(j=0;j<traning_num;j+=mini_batch_num)
{
vector<Traning_data>mini_batch;
if(j+mini_batch_num<=traning_num)
{
mini_batch.assign(traning_data.begin()+j,traning_data.begin()+j+mini_batch_num);
updata_mini_data(mini_batch);
}
}
cout<<"Epoch "<<i+1<<" training complete"<<endl;
if(track_cost)
{
int correct_traning_num=accury(traning_data);
double total_traning_cost=total_cost(traning_data);
cout<<" Cost on traning data:"<<total_traning_cost<<endl;
cout<<" Accuracy on traning data:"<<correct_traning_num<<"/"<<traning_num<<endl;
}
if(test)
{
int correct_test_num=accury(test_data);
double total_test_cost=total_cost(test_data);
cout<<" Cost on test data:"<<total_test_cost<<endl;
cout<<" Accuracy on test data:"<<correct_test_num<<"/"<<test_num<<endl;
}
}
}
int Network::accury(vector<Traning_data>&_data)
{
int ans=0;
int i;
for(i=0;i<_data.size();i++)
if(getMaxnum(feedforward(_data[i].x))==getMaxnum(_data[i].y))
ans++;
return ans;
}
int Network::getMaxnum(Mat &t)
{
int ans=0;
int i;
for(i=1;i<t.rows;i++)
if(t.at<double>(i,0)>t.at<double>(ans,0))
ans=i;
return ans;
}
double Network::total_cost(vector<Traning_data>&_data)
{
double ans=0;
int i;
Mat t;
if(cost==2)
{
for(i=0;i<_data.size();i++)
{
pow(feedforward(_data[i].x)-_data[i].y,2.0,t);
ans+=0.5*sum(t).val[0];
}
ans/=2.0*_data.size();
}
else if(cost==1)
{
Mat t1,t2;
for(i=0;i<_data.size();i++)
{
t=feedforward(_data[i].x);
log(t,t1);
log(1-t,t2);
ans+=sum(_data[i].y.mul(t1)+(1-_data[i].y).mul(t2)).val[0];
}
ans/=-1.0*_data.size();
}
if(lmbda!=0.0)
{
for(i=0;i<weights.size();i++)
{
pow(weights[i],2.0,t);
ans+=lmbda/_data.size()/2.0*sum(t).val[0];
}
}
return ans;
}
void Network::updata_mini_data(vector<Traning_data>mini_batch)
{
vector<Mat>nabla_w;
vector<Mat>nabla_b;
int i;
for(i=0;i<weights.size();i++)
{
Mat _w,_b;
_w.create(weights[i].rows,weights[i].cols,CV_64FC1);
_b.create(bases[i].rows,bases[i].cols,CV_64FC1);
nabla_w.push_back(_w);
nabla_b.push_back(_b);
}
Mat batch_traning_data_x;
Mat batch_traning_data_y;
batch_traning_data_x.create(mini_batch[0].x.rows,mini_batch.size(),CV_64FC1);
batch_traning_data_y.create(mini_batch[0].y.rows,mini_batch.size(),CV_64FC1);
for(i=0;i<mini_batch.size();i++)
{
mini_batch[i].x.col(0).copyTo(batch_traning_data_x.col(i));
mini_batch[i].y.col(0).copyTo(batch_traning_data_y.col(i));
}
backprop(batch_traning_data_x,batch_traning_data_y,nabla_w,nabla_b);
for(i=0;i<weights.size();i++)
{
weights[i]=(1.0-eta*lmbda/traning_num)*weights[i]-eta/mini_batch.size()*nabla_w[i];
bases[i]=bases[i]-eta/mini_batch.size()*nabla_b[i];
}
}
void Network::backprop(Mat &batch_traning_x,Mat &batch_traning_y,vector<Mat>& nabla_w,vector<Mat>& nabla_b)
{
vector<Mat>as;
vector<Mat>zs;
Mat a=batch_traning_x;
as.push_back(a);
int i,j;
for(i=0;i<weights.size();i++)
{
Mat z=weights[i]*a;
for(j=0;j<z.cols;j++)
z.col(j)+=bases[i];
zs.push_back(z);
a=sigmoid(z);
as.push_back(a);
}
Mat delta;
if(cost==1)
delta=as[as.size()-1]-batch_traning_y;
else if(cost==2)
delta=(as[as.size()-1]-batch_traning_y)*sigmoid_prime(zs[zs.size()-1]);
nabla_w[nabla_w.size()-1]=delta*as[as.size()-2].t();
reduce(delta,nabla_b[nabla_b.size()-1],1,CV_REDUCE_SUM);
for(i=2;i<=weights.size();i++)
{
delta=(weights[weights.size()-i+1].t()*delta).mul(sigmoid_prime(zs[zs.size()-i]));
nabla_w[nabla_w.size()-i]=delta*as[as.size()-i-1].t();
reduce(delta,nabla_b[nabla_b.size()-i],1,CV_REDUCE_SUM);
}
if(dropout<1.0)
{
Mat drop_out_mat;
for(i=0;i<nabla_w.size();i++)
{
drop_out_mat=drop_out(nabla_w[i].rows,nabla_w[i].cols);
nabla_w[i]=nabla_w[i].mul(drop_out_mat);
drop_out_mat=drop_out(nabla_b[i].rows,nabla_b[i].cols);
nabla_b[i]=nabla_b[i].mul(drop_out_mat);
}
}
}
Mat Network::drop_out(int rows,int cols)
{
Mat ans;
RNG rng(time(0)%100);
ans.create(rows,cols,CV_64FC1);
int i,j;
for(i=0;i<rows;i++)
for(j=0;j<cols;j++)
{
double t=rng.uniform(double(0.0),double(1.0));
if(t>=dropout)
t=0;
else
t=1;
ans.at<double>(i,j)=t;
}
return ans;
}
Mat Network::sigmoid(Mat &z)
{
Mat ans;
exp(-z,ans);
return 1.0/(1.0+ans);
}
Mat Network::sigmoid_prime(Mat &z)
{
Mat ans=sigmoid(z);
return ans.mul(1.0-ans);
}
int main()
{
Network t;
t.read_data();
vector<int>sizes;
sizes.push_back(784);
sizes.push_back(50);
sizes.push_back(10);
t.init(sizes,0.1,1,1,0.9);
t.SGD(100,20,true,true);
getchar();
return 0;
}