机器学习BP神经网络，任意拓扑结构（C++）

最新推荐文章于 2025-07-07 10:02:33 发布

CMTM4

最新推荐文章于 2025-07-07 10:02:33 发布

阅读量1.3k

点赞数

CC 4.0 BY-SA版权

分类专栏：机器学习

本文链接：https://blog.youkuaiyun.com/baidu_23081367/article/details/78166883

机器学习专栏收录该内容

3 篇文章

订阅专栏

该博客介绍了如何使用C++实现具有任意拓扑结构的BP神经网络，针对老板提到的节点导出问题，博主提出了更换激活函数的解决方案，以避免sigmoid函数在多层时导致的梯度爆炸。通过net<10>定义网络含有10个节点，并提供input.txt文件格式说明，用于描述节点间的连接关系，支持任意拓扑结构的网络配置。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

这次的版本更优秀了的样子！

按照老板说的，每个节点是单独的导出节点（会导致sigmod训练变慢，原因sigmod层数多了，梯度会下降很快导致爆炸。）

换个激活函数就行了。

net<10>表示网咯有10个节点

然后input.txt里的文件格式大概是

0 1

1 2

3 4

....

n表示有n行，每行2个数字，表示2个点有边。（0下标开始，不超过net初始化的节点数量）。任意拓扑结构都可以运行。

#include <bits/stdc++.h>
#include "recordlog.h"
#include <memory>

using std::cin;
using std::endl;
using std::cout;
#define pr(x)	cout<<#x<<" = "<<x<<" "
#define prln(x)	cout<<#x<<" = "<<x<<endl


#define NODE		(neurons[node])
#define NODE_GAIN	(NODE.energy)
#define NODE_THETA	(NODE.loss_energy)			//node节点的theta，也就是阈值
#define NODE_VALUE	(NODE_GAIN + NODE_THETA)	//node节点的实际能量（获得能量+theta）
#define NODE_OUTPUT 	(NODE.output)				//node节点的实际输出
#define NODE_PE		(NODE.partial_derivative)		//node节点output，对最终答案的导数
#define D_NODE		(derivative(NODE_VALUE))		//node节点获得的所有能量（加过theta的），对于node节点output的导数



#define NEXT_NODE		(neurons[nextnode.first])
#define NEXT_NODE_OUTPUT 	(NEXT_NODE.output)				//node节点的实际输出
#define NEXT_NODE_GAIN		(NEXT_NODE.energy)
#define NEXT_NODE_PE		(NEXT_NODE.partial_derivative)			//node节点output，对最终答案的导数
#define NEXT_NODE_THETA		(NEXT_NODE.loss_energy)				//node节点的theta，也就是阈值
#define NEXT_NODE_VALUE		(NEXT_NODE_GAIN + NEXT_NODE_THETA)	//node节点的实际能量（获得能量+theta）
#define D_NEXT_NODE		(derivative(NEXT_NODE_VALUE))			//node节点获得的所有能量（加过theta的），对于node节点output的导数
#define NODE_TO_NEXTNODE_WEIGHT (nextnode.second)

class neuron_t;
typedef std::pair<int, double> 		PID;
typedef std::vector<PID> 		neuron_array_t;
typedef std::vector<int>	  	vector_map_t;
typedef std::unique_ptr<neuron_t>	neuron_ptr_t;

class neuron_t
{
	public:
		double 			energy;
		double			output;
		int 			number;
		double 			loss_energy;
		neuron_array_t 		neuron_array;
		double			partial_derivative;
		bool			is_input;
		bool			is_output;
};

template<int neuron_size>
class net_t
{
	public:
		neuron_t		neurons[neuron_size];
		vector_map_t		vector_map[neuron_size];
		std::string		activation_way;			//激活函数的选择，默认ReLU

		std::vector<double>	input_weight;

		std::vector<int> 	output_number;
		std::vector<int> 	input_number;
		int 			tmp[neuron_size];		//临时数组，生成过n的全排列，和拓扑排序中记录入度。
		int 			height[neuron_size];		//辅助构图的高度数组
		int			topology[neuron_size];		//拓扑序
		double			rate;				//学习率
		int			userful_neuron_size;

		static double sigmoid(double x)
		{
			return 1.0/(1.0 + exp(-x));
		}

		static double line(double x)
		{
			return x;
		}

		static double ReLU(double x)
		{
			if (x<=0)	return 0;
			return x;
		}

		double derivative(double x)
		{
			if (activation_way == "sigmoid"){
				return sigmoid(x) * (1 - sigmoid(x));
			}
			if (activation_way == "ReLU"){
				if (x<0)	return 0;
				return 1;
			}
			if (activation_way == "line"){
				return 1;
			}
			cout<<"no activationFunction!"<<endl;
			return 0;
		
		}

		double activationFunction(double sum, double theta)
		{
			if (activation_way == "sigmoid"){
				return sigmoid(sum + theta);
			}
			if (activation_way == "ReLU"){
				return ReLU(sum + theta);
			}
			if (activation_way == "line"){	
				return line(sum + theta);
			}
			cout<<"no activationWay !" << endl;
			return 0;
		}

		static double randomDouble(double l, double r)
		{
			return randomInt(l*10000, r * 10000)/10000.0;
		}

		static long long randomInt(long long L, long long R)
		{
			long long tmp = (unsigned long long)rand()
				*(unsigned long long)rand()
				*(unsigned  long long)rand()
				*(unsigned long long)rand() % (R - L + 1);
			return L + tmp;
		}

		~net_t()
		{
		}


		net_t (std::string file_name)
		{
			//初始化新的网络
			FILE *file = fopen(file_name.c_str(), "r");
			printf("[%s]\n", file_name.c_str());
			int n;
			fscanf(file, "%d", &n);
			this -> activation_way = "sigmoid";
			this -> rate = 0.1;	//xuexilv
			for (int i = 0; i < neuron_size; ++ i){
				vector_map[i].clear();
				tmp[i] = i;
				neurons[i].number = i;
				neurons[i].is_input = false;
				neurons[i].is_output = false;
			}
			this -> output_number.clear();
			this -> input_number.clear();
			prln(neuron_size);
			while (n--){
				int s, t;
				fscanf(file, "%d%d", &s, &t);
				cout<<s<<" "<<t<<endl;
				vector_map[s].push_back(t);
				vector_map[t].push_back(s);
			}
			fclose(file);
		}

		net_t()
		{
			*this = net_t("input.txt");
		}

		void initInputNeuron(std::vector<int> &input_num){
			int sz = input_num.size();
			input_weight.resize(sz);
			for (int i = 0; i < sz; ++ i){
				input_weight[i] = randomDouble(-1, 1);
				neurons[input_num[i]].is_input = true;
			}
		}

		void setIO(std::vector<double> &input, std::vector<double> &output, std::vector<int> *input_num = NULL, std::vector<int> *output_num = NULL){
			if (input.size() == 0){
				//throws something TODO
				return;
			}
			if (output.size() == 0){
				//throws something TODO
				return;
			}

			if (input_num && output_num)
			{
				output_number = *output_num;
				input_number = *input_num;
			}
			else
			{
				std::random_shuffle(tmp, tmp + neuron_size);
				printf("output nodes are: ");
				for (int i = 0; i < output.size(); ++ i){
					output_number.push_back(tmp[i]);
					printf("%d ",tmp[i]);
				}
				printf("\n");
				printf("input nodes are:");
				for (int i = output.size(); i < input.size() + output.size(); ++ i){
					input_number.push_back(tmp[i]);
					printf("%d ",tmp[i]);
				}
				printf("\n");
			}
			initInputNeuron(*input_num);
			for (int i = 0; i < output.size(); ++ i){
				neurons[output_number[i]].is_output = true;
				//pr(i),prln(output_number[i]);
			}

			std::queue<int>q[output.size() + input.size()];
			memset(height, -1, sizeof(height));
			int painted = output.size();
			int cnt=0;
			for (auto curnode : output_number){
				q[cnt++].push(curnode);
				height[curnode] = 0;
			}
			for (auto curnode : input_number){
				q[cnt++].push(curnode);
				height[curnode] = neuron_size;
			}
			bool flag = true;
			while (flag){
				int cnt = 0;
				flag = false;
				for (auto curnode : output_number){
					flag |= bfs(q[cnt++], 1);
				}
				for (auto curnode : input_number){
					flag |= bfs(q[cnt++], -1);
				}
			}
			auto build_map = [=](int from, int to){
				neurons[from].neuron_array.push_back(std::make_pair(to, randomDouble(-1,1)));
			};
			for (int i = 0; i < neuron_size; ++ i){
				for (auto curnode : vector_map[i]){
					if (height[i] > height[curnode]){
						build_map(i, curnode);
					}
				}
			}
			for (int i = 0; i < neuron_size; ++ i){
				neurons[i].loss_energy = randomDouble(-1, 1);
			}
			getTopology();
			//至此构造完网络的拓扑结构
		}

		void getTopology()
		{
			memset(tmp, 0, sizeof(tmp));
			for (int i = 0; i < neuron_size; ++ i){
				for (auto nextnode : neurons[i].neuron_array){
					++ tmp[nextnode.first];
				}
			}
			std::queue<int>q;
			for (auto curnode : input_number){
				q.push(curnode);
			}
			int pos = 0;
			while (!q.empty())
			{
				int curnode = q.front();
				q.pop();
				topology[pos++] = curnode;
				for (auto nextnode : neurons[curnode].neuron_array){
					if(-- tmp[nextnode.first] == 0){
						q.push(nextnode.first);
					}
				}
			}
			userful_neuron_size = pos;
			//DEBUG
			//for (int i = 0; i < neuron_size; ++ i)
			//	pr(i),prln(topology[i]);

		}

		bool bfs(std::queue<int> &q, int delta){
			if (q.empty()){
				return false;
			}
			int h = height[q.front()];
			while (!q.empty() && height[q.front()] == h){
				int curnode = q.front();
				q.pop();
				for (auto nextnode : vector_map[curnode]){
					if (height[nextnode] != -1){
						continue;
					}
					height[nextnode] = h + delta;
					q.push(nextnode);
				}
			}
			return true;
		}

		void cal_propagate(int node){
			NODE_OUTPUT = activationFunction(NODE_GAIN, NODE_THETA);
			for (auto nextnode : NODE.neuron_array){
				NEXT_NODE_GAIN += NODE_OUTPUT * NODE_TO_NEXTNODE_WEIGHT;
			}
		}

		void propagate(std::vector<double> &input){
			//TODO
			
			
			/*
			for (int i = 0; i < input_number.size(); ++i)
			{
				input_weight[i] = 1;
			}
			*/
			

			for (int i = 0; i < neuron_size; ++ i){
				neurons[i].energy = 0;
				neurons[i].output = 0;
			}

			for (int i = 0; i != input.size(); ++ i){
				int node = input_number[i];
				NODE_GAIN += input_weight[i] * input[i];
			}

			for (int i = 0; i < userful_neuron_size; ++ i){
				int node = topology[i];
				cal_propagate(node);
			}
		}

		void cal_back(int node){
			for (auto &nextnode : NODE.neuron_array){
				NODE_PE += NEXT_NODE_PE * NODE_TO_NEXTNODE_WEIGHT * D_NEXT_NODE;
			}

			for (auto &nextnode : NODE.neuron_array){
				NODE_TO_NEXTNODE_WEIGHT -= NODE_OUTPUT * D_NEXT_NODE * NEXT_NODE_PE * rate;
			}
			NODE_THETA -= NODE_PE * D_NODE * rate;
		}

		void back(std::vector<double> &input, std::vector<double> &output){
			for (int i = 0; i < neuron_size; ++ i){
				neurons[i].partial_derivative = 0;
			}

			for (int i = 0; i != output.size(); ++ i)
			{
				int node = output_number[i];
				NODE_PE = NODE_OUTPUT - output[i];
				NODE_THETA -= NODE_PE * D_NODE * rate;
			}

			for (int i = userful_neuron_size - 1; i >= 0; -- i){
				int node = topology[i];
				if (NODE.is_output){
					continue;
				}
				else{
					cal_back(node);
				}
			}

			for (int i = 0; i < input.size(); ++ i){
				int node = input_number[i];
				double tmp = input[i] * NODE_PE * D_NODE;
				//prln(tmp);
				//prln(input_weight[i]);
				input_weight[i] -= tmp * rate;
				//prln(input_weight[i]);
			}
		}

		double train(std::vector<double> &input, std::vector<double> &output){
			propagate(input);
			//	outputNetwork();
			back(input, output);
			double error=0;
			for (int i = 0; i < output.size(); ++ i){
				error += 0.5*pow((neurons[output_number[i]].output - output[i]), 2);
			}
			return error;
		}

		void outputNetwork(){
			printf("---------------input nodes------------:\n");
			for (int i = 0; i < input_number.size(); ++ i)
			{
				printf("[%d] weight:(%.7lf) \n", input_number[i], input_weight[i]);
			}
			printf("---------------other nodes------------\n");
			printf("other nodes\n");
			for (int i = 0; i < neuron_size; ++ i){
				pr(topology[i]),prln(i);
				int node = topology[i];
				printf("[%d] gain(%.7lf) theta(%.7lf) par_derivative(%.7lf) output(%.7lf) d(%.7lf)\n", 
						node,
						NODE_GAIN,
						NODE_THETA, 
						NODE_PE, 
						NODE_OUTPUT, 
						D_NODE);
				for (auto nextnode : NODE.neuron_array){
					printf("  -> %d (%.7lf)\n", nextnode.first, nextnode.second);
				}
			}
			printf("=============End====================\n");
		}

		void testOutput(std::vector<double> &input)
		{
			propagate(input);
			cout<<"output: ";
			for (auto curnode : output_number)
			{
				printf("%.7lf ", neurons[curnode].output);
			}
			cout<<endl;
		}

		std::vector<double> getTest(std::vector<double> &input)
		{
			std::vector<double> q;
			for (auto curnode : output_number)
			{
				q.push_back(neurons[curnode].energy);
			}
			return move(q);
		}
};

void doit251()
{
	std::vector<int>in({0,1});
	std::vector<int>out({7});
	net_t<8> net;//("input.txt");
	srand(0);
	std::vector<double>input1({0,0});
	std::vector<double>input2({0,1});
	std::vector<double>input3({1,0});
	std::vector<double>input4({1,1});
	std::vector<double>output1({0});
	std::vector<double>output2({1});
	std::vector<double>output3({1});
	std::vector<double>output4({0});

	net.setIO(input1, output1, &in, &out);
	net.activation_way = "sigmoid";
	net.rate = 20;



	/*
	net.propagate(input3);
	net.outputNetwork();
	net.back(input3, output3);
	net.outputNetwork();
	return;
	*/




	double error=0;
	for (int i = 1;i<=20000;++i){
		error = 0;
		error += net.train(input1, output1);
		error += net.train(input2, output2);
		error += net.train(input3, output3);
		error += net.train(input4, output4);
		error/=4;
		cout<<error<<"\r";
	}
	prln(error);
	//prln(net.userful_neuron_size);
	net.testOutput(input1);
	net.testOutput(input2);
	net.testOutput(input3);
	net.testOutput(input4);
}


int main()
{
	doit251();
	return 0;
}