这次的版本更优秀了的样子!
按照老板说的,每个节点是单独的导出节点(会导致sigmod训练变慢,原因sigmod层数多了,梯度会下降很快导致爆炸。)
换个激活函数就行了。
net<10>表示网咯有10个节点
然后input.txt里的文件格式大概是
n
0 1
1 2
3 4
....
n表示有n行,每行2个数字,表示2个点有边。(0下标开始,不超过net初始化的节点数量)。任意拓扑结构都可以运行。
#include <bits/stdc++.h>
#include "recordlog.h"
#include <memory>
using std::cin;
using std::endl;
using std::cout;
#define pr(x) cout<<#x<<" = "<<x<<" "
#define prln(x) cout<<#x<<" = "<<x<<endl
#define NODE (neurons[node])
#define NODE_GAIN (NODE.energy)
#define NODE_THETA (NODE.loss_energy) //node节点的theta,也就是阈值
#define NODE_VALUE (NODE_GAIN + NODE_THETA) //node节点的实际能量(获得能量+theta)
#define NODE_OUTPUT (NODE.output) //node节点的实际输出
#define NODE_PE (NODE.partial_derivative) //node节点output,对最终答案的导数
#define D_NODE (derivative(NODE_VALUE)) //node节点获得的所有能量(加过theta的),对于node节点output的导数
#define NEXT_NODE (neurons[nextnode.first])
#define NEXT_NODE_OUTPUT (NEXT_NODE.output) //node节点的实际输出
#define NEXT_NODE_GAIN (NEXT_NODE.energy)
#define NEXT_NODE_PE (NEXT_NODE.partial_derivative) //node节点output,对最终答案的导数
#define NEXT_NODE_THETA (NEXT_NODE.loss_energy) //node节点的theta,也就是阈值
#define NEXT_NODE_VALUE (NEXT_NODE_GAIN + NEXT_NODE_THETA) //node节点的实际能量(获得能量+theta)
#define D_NEXT_NODE (derivative(NEXT_NODE_VALUE)) //node节点获得的所有能量(加过theta的),对于node节点output的导数
#define NODE_TO_NEXTNODE_WEIGHT (nextnode.second)
class neuron_t;
typedef std::pair<int, double> PID;
typedef std::vector<PID> neuron_array_t;
typedef std::vector<int> vector_map_t;
typedef std::unique_ptr<neuron_t> neuron_ptr_t;
class neuron_t
{
public:
double energy;
double output;
int number;
double loss_energy;
neuron_array_t neuron_array;
double partial_derivative;
bool is_input;
bool is_output;
};
template<int neuron_size>
class net_t
{
public:
neuron_t neurons[neuron_size];
vector_map_t vector_map[neuron_size];
std::string activation_way; //激活函数的选择,默认ReLU
std::vector<double> input_weight;
std::vector<int> output_number;
std::vector<int> input_number;
int tmp[neuron_size]; //临时数组,生成过n的全排列,和拓扑排序中记录入度。
int height[neuron_size]; //辅助构图的高度数组
int topology[neuron_size]; //拓扑序
double rate; //学习率
int userful_neuron_size;
static double sigmoid(double x)
{
return 1.0/(1.0 + exp(-x));
}
static double line(double x)
{
return x;
}
static double ReLU(double x)
{
if (x<=0) return 0;
return x;
}
double derivative(double x)
{
if (activation_way == "sigmoid"){
return sigmoid(x) * (1 - sigmoid(x));
}
if (activation_way == "ReLU"){
if (x<0) return 0;
return 1;
}
if (activation_way == "line"){
return 1;
}
cout<<"no activationFunction!"<<endl;
return 0;
}
double activationFunction(double sum, double theta)
{
if (activation_way == "sigmoid"){
return sigmoid(sum + theta);
}
if (activation_way == "ReLU"){
return ReLU(sum + theta);
}
if (activation_way == "line"){
return line(sum + theta);
}
cout<<"no activationWay !" << endl;
return 0;
}
static double randomDouble(double l, double r)
{
return randomInt(l*10000, r * 10000)/10000.0;
}
static long long randomInt(long long L, long long R)
{
long long tmp = (unsigned long long)rand()
*(unsigned long long)rand()
*(unsigned long long)rand()
*(unsigned long long)rand() % (R - L + 1);
return L + tmp;
}
~net_t()
{
}
net_t (std::string file_name)
{
//初始化新的网络
FILE *file = fopen(file_name.c_str(), "r");
printf("[%s]\n", file_name.c_str());
int n;
fscanf(file, "%d", &n);
this -> activation_way = "sigmoid";
this -> rate = 0.1; //xuexilv
for (int i = 0; i < neuron_size; ++ i){
vector_map[i].clear();
tmp[i] = i;
neurons[i].number = i;
neurons[i].is_input = false;
neurons[i].is_output = false;
}
this -> output_number.clear();
this -> input_number.clear();
prln(neuron_size);
while (n--){
int s, t;
fscanf(file, "%d%d", &s, &t);
cout<<s<<" "<<t<<endl;
vector_map[s].push_back(t);
vector_map[t].push_back(s);
}
fclose(file);
}
net_t()
{
*this = net_t("input.txt");
}
void initInputNeuron(std::vector<int> &input_num){
int sz = input_num.size();
input_weight.resize(sz);
for (int i = 0; i < sz; ++ i){
input_weight[i] = randomDouble(-1, 1);
neurons[input_num[i]].is_input = true;
}
}
void setIO(std::vector<double> &input, std::vector<double> &output, std::vector<int> *input_num = NULL, std::vector<int> *output_num = NULL){
if (input.size() == 0){
//throws something TODO
return;
}
if (output.size() == 0){
//throws something TODO
return;
}
if (input_num && output_num)
{
output_number = *output_num;
input_number = *input_num;
}
else
{
std::random_shuffle(tmp, tmp + neuron_size);
printf("output nodes are: ");
for (int i = 0; i < output.size(); ++ i){
output_number.push_back(tmp[i]);
printf("%d ",tmp[i]);
}
printf("\n");
printf("input nodes are:");
for (int i = output.size(); i < input.size() + output.size(); ++ i){
input_number.push_back(tmp[i]);
printf("%d ",tmp[i]);
}
printf("\n");
}
initInputNeuron(*input_num);
for (int i = 0; i < output.size(); ++ i){
neurons[output_number[i]].is_output = true;
//pr(i),prln(output_number[i]);
}
std::queue<int>q[output.size() + input.size()];
memset(height, -1, sizeof(height));
int painted = output.size();
int cnt=0;
for (auto curnode : output_number){
q[cnt++].push(curnode);
height[curnode] = 0;
}
for (auto curnode : input_number){
q[cnt++].push(curnode);
height[curnode] = neuron_size;
}
bool flag = true;
while (flag){
int cnt = 0;
flag = false;
for (auto curnode : output_number){
flag |= bfs(q[cnt++], 1);
}
for (auto curnode : input_number){
flag |= bfs(q[cnt++], -1);
}
}
auto build_map = [=](int from, int to){
neurons[from].neuron_array.push_back(std::make_pair(to, randomDouble(-1,1)));
};
for (int i = 0; i < neuron_size; ++ i){
for (auto curnode : vector_map[i]){
if (height[i] > height[curnode]){
build_map(i, curnode);
}
}
}
for (int i = 0; i < neuron_size; ++ i){
neurons[i].loss_energy = randomDouble(-1, 1);
}
getTopology();
//至此构造完网络的拓扑结构
}
void getTopology()
{
memset(tmp, 0, sizeof(tmp));
for (int i = 0; i < neuron_size; ++ i){
for (auto nextnode : neurons[i].neuron_array){
++ tmp[nextnode.first];
}
}
std::queue<int>q;
for (auto curnode : input_number){
q.push(curnode);
}
int pos = 0;
while (!q.empty())
{
int curnode = q.front();
q.pop();
topology[pos++] = curnode;
for (auto nextnode : neurons[curnode].neuron_array){
if(-- tmp[nextnode.first] == 0){
q.push(nextnode.first);
}
}
}
userful_neuron_size = pos;
//DEBUG
//for (int i = 0; i < neuron_size; ++ i)
// pr(i),prln(topology[i]);
}
bool bfs(std::queue<int> &q, int delta){
if (q.empty()){
return false;
}
int h = height[q.front()];
while (!q.empty() && height[q.front()] == h){
int curnode = q.front();
q.pop();
for (auto nextnode : vector_map[curnode]){
if (height[nextnode] != -1){
continue;
}
height[nextnode] = h + delta;
q.push(nextnode);
}
}
return true;
}
void cal_propagate(int node){
NODE_OUTPUT = activationFunction(NODE_GAIN, NODE_THETA);
for (auto nextnode : NODE.neuron_array){
NEXT_NODE_GAIN += NODE_OUTPUT * NODE_TO_NEXTNODE_WEIGHT;
}
}
void propagate(std::vector<double> &input){
//TODO
/*
for (int i = 0; i < input_number.size(); ++i)
{
input_weight[i] = 1;
}
*/
for (int i = 0; i < neuron_size; ++ i){
neurons[i].energy = 0;
neurons[i].output = 0;
}
for (int i = 0; i != input.size(); ++ i){
int node = input_number[i];
NODE_GAIN += input_weight[i] * input[i];
}
for (int i = 0; i < userful_neuron_size; ++ i){
int node = topology[i];
cal_propagate(node);
}
}
void cal_back(int node){
for (auto &nextnode : NODE.neuron_array){
NODE_PE += NEXT_NODE_PE * NODE_TO_NEXTNODE_WEIGHT * D_NEXT_NODE;
}
for (auto &nextnode : NODE.neuron_array){
NODE_TO_NEXTNODE_WEIGHT -= NODE_OUTPUT * D_NEXT_NODE * NEXT_NODE_PE * rate;
}
NODE_THETA -= NODE_PE * D_NODE * rate;
}
void back(std::vector<double> &input, std::vector<double> &output){
for (int i = 0; i < neuron_size; ++ i){
neurons[i].partial_derivative = 0;
}
for (int i = 0; i != output.size(); ++ i)
{
int node = output_number[i];
NODE_PE = NODE_OUTPUT - output[i];
NODE_THETA -= NODE_PE * D_NODE * rate;
}
for (int i = userful_neuron_size - 1; i >= 0; -- i){
int node = topology[i];
if (NODE.is_output){
continue;
}
else{
cal_back(node);
}
}
for (int i = 0; i < input.size(); ++ i){
int node = input_number[i];
double tmp = input[i] * NODE_PE * D_NODE;
//prln(tmp);
//prln(input_weight[i]);
input_weight[i] -= tmp * rate;
//prln(input_weight[i]);
}
}
double train(std::vector<double> &input, std::vector<double> &output){
propagate(input);
// outputNetwork();
back(input, output);
double error=0;
for (int i = 0; i < output.size(); ++ i){
error += 0.5*pow((neurons[output_number[i]].output - output[i]), 2);
}
return error;
}
void outputNetwork(){
printf("---------------input nodes------------:\n");
for (int i = 0; i < input_number.size(); ++ i)
{
printf("[%d] weight:(%.7lf) \n", input_number[i], input_weight[i]);
}
printf("---------------other nodes------------\n");
printf("other nodes\n");
for (int i = 0; i < neuron_size; ++ i){
pr(topology[i]),prln(i);
int node = topology[i];
printf("[%d] gain(%.7lf) theta(%.7lf) par_derivative(%.7lf) output(%.7lf) d(%.7lf)\n",
node,
NODE_GAIN,
NODE_THETA,
NODE_PE,
NODE_OUTPUT,
D_NODE);
for (auto nextnode : NODE.neuron_array){
printf(" -> %d (%.7lf)\n", nextnode.first, nextnode.second);
}
}
printf("=============End====================\n");
}
void testOutput(std::vector<double> &input)
{
propagate(input);
cout<<"output: ";
for (auto curnode : output_number)
{
printf("%.7lf ", neurons[curnode].output);
}
cout<<endl;
}
std::vector<double> getTest(std::vector<double> &input)
{
std::vector<double> q;
for (auto curnode : output_number)
{
q.push_back(neurons[curnode].energy);
}
return move(q);
}
};
void doit251()
{
std::vector<int>in({0,1});
std::vector<int>out({7});
net_t<8> net;//("input.txt");
srand(0);
std::vector<double>input1({0,0});
std::vector<double>input2({0,1});
std::vector<double>input3({1,0});
std::vector<double>input4({1,1});
std::vector<double>output1({0});
std::vector<double>output2({1});
std::vector<double>output3({1});
std::vector<double>output4({0});
net.setIO(input1, output1, &in, &out);
net.activation_way = "sigmoid";
net.rate = 20;
/*
net.propagate(input3);
net.outputNetwork();
net.back(input3, output3);
net.outputNetwork();
return;
*/
double error=0;
for (int i = 1;i<=20000;++i){
error = 0;
error += net.train(input1, output1);
error += net.train(input2, output2);
error += net.train(input3, output3);
error += net.train(input4, output4);
error/=4;
cout<<error<<"\r";
}
prln(error);
//prln(net.userful_neuron_size);
net.testOutput(input1);
net.testOutput(input2);
net.testOutput(input3);
net.testOutput(input4);
}
int main()
{
doit251();
return 0;
}