C语言实现浅层全连接神经网络
一、引言
过去的一两年挺忙的,也是好久没写博客了~。现在在准备考研复试,忽然想起来自己两年前用C语言写过神经网络,故打算写这篇博客来重温一下神经网络的底层逻辑。相信点开这篇博客的读者,也是想要从较为底层的语言来理解神经网络前、反向传播训练过程中的具体细节。
PS1:还记得那时候我刚学完ML和DL的一些基本理论知识,我找我的导儿问想要代码实践的话,是用PyTorch还是Tensorflow,结果他告诉我用C语言*_*。
我花费了一周的空余时间来写出并跑成功这个代码,但好像现在的大模型生成相似的代码挺容易的,真是感慨万千,不过我们想学习DeepLearning,不就是希望有朝一日,能自己参与到AI magic的世界中。
PS2:本篇文章不再对神经网络的一些基础概念再做解释,如对一些相关概念不了解,可先行查阅资料。
二、数据准备与预处理
我们在目录下创建两个txt文件
trainData.txt : 每一行为一组数据,一组数据有三个值num1, num2, y。其中y是判别num1,和num2的相近程度(相同为0,不同为1)。
0 0 0
0 1 1
1 0 1
1 1 0
0.8 0.8 0
0.6 0.6 0
0.4 0.4 0
0.2 0.2 0
1.0 0.8 1
1.0 0.6 1
1.0 0.4 1
1.0 0.2 1
0.8 0.6 1
0.6 0.4 1
0.4 0.2 1
0.2 0 1
0.999 0.666 1
0.666 0.333 1
0.333 0 1
0.8 0.4 1
0.4 0 1
0 0.123 1
0.12 0.23 1
0.23 0.34 1
0.34 0.45 1
0.45 0.56 1
0.56 0.67 1
0.67 0.78 1
0.78 0.89 1
0.89 0.99 1
testData.txt : 每一行为一组数据,一组数据有两个值num1, num2。我们搭建神经网络的目的,就是要预测如testData.txt数据中的y’,来判断num1,num2的相近度。
0.111 0.112
0.001 0.999
0.123 0.345
0.123 0.456
0.123 0.789
0.234 0.567
0.234 0.678
0.387 0.401
0.616 0.717
0.701 0.919
1.01 1.01
PS:有人可能会问,我直接用if else判断num1,num2是否相等不就能求出y吗?
确实,但我们这儿只是为了方便才设计出这样一组看似幼稚的数据集,我们的重点是放在如何搭建神经网络,不是吗?
三、数据集的读入
- 定义了数据集的结构体,用于存储输入和输出数据:
/*
定义了数据集的结构体,用于存储输入和输出数据
*/
typedef struct Sample{
double out[30][OUTNODE]; // 输出
double in[30][INNODE]; // 输入
}Sample;
- 从文件中读取训练集和测试集数据,并存储到Sample结构体中:
/*
从文件中读取训练集和测试集数据,并存储到Sample结构体中
*/
Sample* getTrainData(const char * filename){
Sample* result = (Sample*)malloc(sizeof(Sample));
FILE * file = fopen(filename, "r");
if(file != NULL){
int count = 0;
while (fscanf(file, "%lf %lf %lf", &result->in[count][0], &result->in[count][1], &result->out[count][0]) != EOF){
++count;
}
trainSize = count;
printf("%s 文件读取完毕\n", filename);
fclose(file);
return result;
} else{
fclose(file);
printf("%s 文件打开错误!\n\a", filename);
return NULL;
}
}
Sample * getTestData(const char * filename){
Sample * result = (Sample*)malloc(sizeof (Sample));
FILE * file = fopen(filename, "r");
if(file != NULL){
int count = 0;
while (fscanf(file, "%lf %lf", &result->in[count][0], &result->in[count][1]) != EOF){
++count;
}
testSize = count;
printf("%s 文件读取完毕\n", filename);
fclose(file);
return result;
} else{
fclose(file);
printf("%s 文件打开错误!\n\a", filename);
return NULL;
}
}
- 打印数据集:
void printTrainData(Sample * data, int size){
if(data == NULL){
printf("样本为空!\n\a");
return;
}
for (int i = 0; i < size; ++i) {
printf("%d, x1 = %f, x2 = %f, y = %f\n", i + 1, data->in[i][0], data->in[i][1], data->out[i][0]);
}
}
void printTestData(Sample * data, int size){
if(data == NULL){
printf("样本为空!\n\a");
return;
}
for (int i = 0; i < size; ++i) {
printf("%d, x1 = %f, x2 = %f\n", i + 1, data->in[i][0], data->in[i][1]);
}
}
四、神经网络定义
超参数
- 我们构造一个简单的三层神经网络。
#define INNODE 2 //输入层神经元个数
#define HIDENODE 4 //隐层神经元个数
#define OUTNODE 1 //输出层神经元个数
#define LEARNING_RATE 0.05 //学习率
神经元
- 定义了神经元的结构体,包含了权重、偏置、激活值、梯度等信息,方便后续的网络初始化和训练。
typedef struct NODE{ //神经元结构体
double *W; //权重
double b; //偏置
double Z; //线性组合值
int g; //激活函数值 1:sigmoid
double A; //输出值
double dZ; //Z的偏导
double *dW;
double db;
}Node;
激活函数
- 实现了Sigmoid激活函数,用于神经元的输出
double sigmoid(double x){
return 1.0 / (1.0 + exp(-x));
}
初始化
- 初始化神经网络的权重和偏置,为后续的训练做准备
// 初始化神经元
void init(){
// 设置时间戳为生成随机序列的种子
srand(time(NULL));
// 隐藏层初始化
for (int i = 0; i < HIDENODE; ++i)
{
// 初始化权值
hideLayer[i].W = (double*)malloc(sizeof(double)*INNODE); // 每个神经元的w维数由上层的输出(本层的输入)决定
hideLayer[i].dW = (double*)malloc(sizeof(double)*INNODE);
for (int j = 0; j < INNODE; ++j)
{
hideLayer[i].W[j] = rand() % 10000 / (double )10000 * 2 - 1.0; // -1~1
hideLayer[i].dW[j] = 0.0;
}
// 初始化偏置值
hideLayer[i].b = 0.0;
hideLayer[i].db = 0.0;
hideLayer[i].A = 0.0;
hideLayer[i].dZ = 0.0;
}
//输出层初始化
for (int i = 0; i < OUTNODE; ++i)
{
// 初始化权值
outLayer[i].W = (double*)malloc(sizeof(double)*HIDENODE);
outLayer[i].dW = (double*)malloc(sizeof(double)*HIDENODE);
for (int j = 0; j < HIDENODE; ++j)
{
outLayer[i].W[j] = rand() % 10000 / (double)10000 * 2 - 1.0; // -1~1
outLayer[i].dW[j] = 0.0;
}
// 初始化偏置值
outLayer[i].b = 0.0;
outLayer[i].db = 0.0;
outLayer[i].A = 0.0;
outLayer[i].dZ = 0.0;
}
}
五、神经网络传播算法
前向传播&损失函数
- 实现了从输入层到输出层的前向传播过程,计算每个神经元的输出值。
void forward(Sample * trainData, int current_train_pos, int num){
double loss = 0.0; // 计算损失
// 为输入层赋值(训练集)
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
inputLayer[inputLayer_pos].A = trainData->in[current_train_pos][inputLayer_pos];
}
// 输入层的前向传播
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos)
{
double sum = 0.0;
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
sum += inputLayer[inputLayer_pos].A * hideLayer[hideLayer_pos].W[inputLayer_pos];
}
sum += hideLayer[hideLayer_pos].b;
hideLayer[hideLayer_pos].Z = sum;
hideLayer[hideLayer_pos].A = sigmoid(hideLayer[hideLayer_pos].Z); //输出激活函数
}
// 隐藏层的前向传播
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
double sum = 0.0;
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
sum += hideLayer[hideLayer_pos].A * outLayer[outLayer_pos].W[hideLayer_pos];
}
sum += outLayer[outLayer_pos].b;
outLayer[outLayer_pos].Z = sum;
outLayer[outLayer_pos].A = sigmoid(outLayer[outLayer_pos].Z); //输出激活函数
}
// 计算损失函数(loss对预测结果无影响)
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
loss += -(trainData->out[current_train_pos][outLayer_pos] * log(outLayer[outLayer_pos].A) + (1 - trainData->out[current_train_pos][outLayer_pos]) * log(1 - outLayer[outLayer_pos].A));
// loss += (outLayer[outLayer_pos].A - trainData->out[current_train_pos][outLayer_pos]) * (outLayer[outLayer_pos].A - trainData->out[current_train_pos][outLayer_pos]) / 2.0;
}
loss /= OUTNODE; // 求loss平均
// 打印
if (num % 10000 == 0 && current_train_pos == trainSize - 1)
{
printf("当前训练次数%d,代价函数值为%f\n", num, loss);
}
}
反向传播&梯度下降
- 实现了从输出层到输入层的反向传播过程,计算梯度并更新权重和偏置。
void backward(Sample * trainData, int current_train_pos){
// 输出层的偏导
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
outLayer[outLayer_pos].dZ = outLayer[outLayer_pos].A - trainData->out[current_train_pos][0];
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
outLayer[outLayer_pos].dW[hideLayer_pos] = outLayer[outLayer_pos].dZ * hideLayer[hideLayer_pos].A;
}
outLayer[outLayer_pos].db = outLayer[outLayer_pos].dZ;
}
// 隐藏层的偏导
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
// 因为输出层只有一个神经元,下循环无必要,如果非一个结点,则Z需要为指针了
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
hideLayer[hideLayer_pos].dZ = outLayer[outLayer_pos].W[hideLayer_pos] * outLayer[outLayer_pos].dZ * (sigmoid(hideLayer[hideLayer_pos].Z) * (1-sigmoid(hideLayer[hideLayer_pos].Z))); // dZ[1] = W[2]*dZ[2]*g[1]'(Z[1])
}
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos){
hideLayer[hideLayer_pos].dW[inputLayer_pos] = hideLayer[hideLayer_pos].dZ * trainData->in[current_train_pos][inputLayer_pos];
}
// dW = dW / INNODE; // 求dw平均
hideLayer[hideLayer_pos].db = hideLayer[hideLayer_pos].dZ;
}
// 梯度下降(每一条测试集都进行梯度下降-->随机梯度下降)
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; hideLayer_pos++)
{
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; inputLayer_pos++)
{
hideLayer[hideLayer_pos].W[inputLayer_pos] -= LEARNING_RATE * hideLayer[hideLayer_pos].dW[inputLayer_pos];
}
hideLayer[hideLayer_pos].b -= LEARNING_RATE * hideLayer[hideLayer_pos].db;
}
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; outLayer_pos++)
{
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; hideLayer_pos++)
{
outLayer[outLayer_pos].W[hideLayer_pos] -= LEARNING_RATE * outLayer[outLayer_pos].dW[hideLayer_pos];
}
outLayer[outLayer_pos].b -= LEARNING_RATE * outLayer[outLayer_pos].db;
}
}
六、运用神经网络进行训练与预测
预测函数
- 对测试集进行预测,输出预测结果。(即用刚刚传播算法训练好的权重参数,对测试集进行前向传播预测)
void predict(Sample * testData){
// 前向传播即可
for (int current_test_pos = 0; current_test_pos < testSize; ++current_test_pos){
// 获取测试集
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
inputLayer[inputLayer_pos].A = testData->in[current_test_pos][inputLayer_pos];
}
// 输入层前向传播
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
double sum = 0.0;
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
sum += inputLayer[inputLayer_pos].A * hideLayer[hideLayer_pos].W[inputLayer_pos];
}
sum += hideLayer[hideLayer_pos].b;
hideLayer[hideLayer_pos].Z = sum;
hideLayer[hideLayer_pos].A = sigmoid(hideLayer[hideLayer_pos].Z); //输出激活函数
}
// 隐藏层前向传播
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
double sum = 0.0;
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
sum += hideLayer[hideLayer_pos].A * outLayer[outLayer_pos].W[hideLayer_pos];
}
sum += outLayer[outLayer_pos].b;
outLayer[outLayer_pos].Z = sum;
outLayer[outLayer_pos].A = sigmoid(outLayer[outLayer_pos].Z); //输出激活函数
// 录入预测值
testData->out[current_test_pos][0] = outLayer[outLayer_pos].A;
}
}
}
主函数
- 主函数,负责调用数据读取、网络初始化、训练和预测等函数。
int main(){
Sample* train = getTrainData("trainData.txt");
// printTrainData(train, trainSize);
Sample* test = getTestData("testData.txt");
//printTestData(test, testSize);
init();
//训练10000次
for (int train_times = 0; train_times < 100000; ++train_times)
{
for (int train_dataNum = 0; train_dataNum < trainSize; train_dataNum++)
{
forward(train, train_dataNum, train_times);
backward(train, train_dataNum);
}
}
predict(test);
for (int i = 0; i < testSize; i++)
{
printf("%d: x1=%f, x2=%f, predict_y=%f\n", i+1, test->in[i][0], test->in[i][1], test->out[i][0]);
}
return 0;
}
七、结果展示
trainData.txt 文件读取完毕
testData.txt 文件读取完毕
当前训练次数0,代价函数值为0.483835
当前训练次数10000,代价函数值为0.004757
当前训练次数20000,代价函数值为0.001843
当前训练次数30000,代价函数值为0.001124
当前训练次数40000,代价函数值为0.000803
当前训练次数50000,代价函数值为0.000623
当前训练次数60000,代价函数值为0.000508
当前训练次数70000,代价函数值为0.000428
当前训练次数80000,代价函数值为0.000369
当前训练次数90000,代价函数值为0.000325
1: x1=0.111000, x2=0.112000, predict_y=0.000218
2: x1=0.001000, x2=0.999000, predict_y=1.000000
3: x1=0.123000, x2=0.345000, predict_y=1.000000
4: x1=0.123000, x2=0.456000, predict_y=1.000000
5: x1=0.123000, x2=0.789000, predict_y=1.000000
6: x1=0.234000, x2=0.567000, predict_y=1.000000
7: x1=0.234000, x2=0.678000, predict_y=1.000000
8: x1=0.387000, x2=0.401000, predict_y=0.000869
9: x1=0.616000, x2=0.717000, predict_y=0.999683
10: x1=0.701000, x2=0.919000, predict_y=1.000000
11: x1=1.010000, x2=1.010000, predict_y=0.000308
- 在训练过程中,损失函数值逐渐减小,说明模型在逐渐收敛。
- 在测试集上,模型的预测结果与真实值较为接近,表明模型具有一定的泛化能力。
- 因为随机数种子不确定,所以每次运行结果可能不一样,是正常现象,满足1、2的趋势即说明正确。
八、完整源码
network.c:
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#define INNODE 2 //输入层神经元个数
#define HIDENODE 4 //隐层神经元个数
#define OUTNODE 1 //输出层神经元个数
#define LEARNING_RATE 0.05 //学习率
typedef struct NODE{ //神经元结构体
double *W; //权重
double b; //偏置
double Z; //线性组合值
int g; //激活函数值 1:sigmoid
double A; //输出值
double dZ; //Z的偏导
double *dW;
double db;
}Node;
// 参数什么时候用指针:多对一/多的情况
/**
* 样本
*/
typedef struct Sample{
double out[30][OUTNODE]; // 输出
double in[30][INNODE]; // 输入
}Sample;
/**
* 输入层
*/
Node inputLayer[INNODE];
/**
* 隐藏层
*/
Node hideLayer[HIDENODE];
/**
* 输出层
*/
Node outLayer[OUTNODE];
/**
* 激活函数sigmoid
*/
double sigmoid(double x){
return 1.0 / (1.0 + exp(-x));
}
// ------------------------------------------------------神经网络结构定义
double trainSize = 0; //训练集个数
Sample* getTrainData(const char * filename){
Sample* result = (Sample*)malloc(sizeof(Sample));
FILE * file = fopen(filename, "r");
if(file != NULL){
int count = 0;
while (fscanf(file, "%lf %lf %lf", &result->in[count][0], &result->in[count][1], &result->out[count][0]) != EOF){
++count;
}
trainSize = count;
printf("%s 文件读取完毕\n", filename);
fclose(file);
return result;
} else{
fclose(file);
printf("%s 文件打开错误!\n\a", filename);
return NULL;
}
}
double testSize = 0; // 测试集个数
Sample * getTestData(const char * filename){
Sample * result = (Sample*)malloc(sizeof (Sample));
FILE * file = fopen(filename, "r");
if(file != NULL){
int count = 0;
while (fscanf(file, "%lf %lf", &result->in[count][0], &result->in[count][1]) != EOF){
++count;
}
testSize = count;
printf("%s 文件读取完毕\n", filename);
fclose(file);
return result;
} else{
fclose(file);
printf("%s 文件打开错误!\n\a", filename);
return NULL;
}
}
/**
* 打印样本
*/
void printTrainData(Sample * data, int size){
if(data == NULL){
printf("样本为空!\n\a");
return;
}
for (int i = 0; i < size; ++i) {
printf("%d, x1 = %f, x2 = %f, y = %f\n", i + 1, data->in[i][0], data->in[i][1], data->out[i][0]);
}
}
void printTestData(Sample * data, int size){
if(data == NULL){
printf("样本为空!\n\a");
return;
}
for (int i = 0; i < size; ++i) {
printf("%d, x1 = %f, x2 = %f\n", i + 1, data->in[i][0], data->in[i][1]);
}
}
//--------------------------------------------------------训练集、测试集定义
// 初始化神经元
void init(){
// 设置时间戳为生成随机序列的种子
srand(time(NULL));
// 隐藏层初始化
for (int i = 0; i < HIDENODE; ++i)
{
// 初始化权值
hideLayer[i].W = (double*)malloc(sizeof(double)*INNODE); // 每个神经元的w维数由上层的输出(本层的输入)决定
hideLayer[i].dW = (double*)malloc(sizeof(double)*INNODE);
for (int j = 0; j < INNODE; ++j)
{
hideLayer[i].W[j] = rand() % 10000 / (double )10000 * 2 - 1.0; // -1~1
hideLayer[i].dW[j] = 0.0;
}
// 初始化偏置值
hideLayer[i].b = 0.0;
hideLayer[i].db = 0.0;
hideLayer[i].A = 0.0;
hideLayer[i].dZ = 0.0;
}
//输出层初始化
for (int i = 0; i < OUTNODE; ++i)
{
// 初始化权值
outLayer[i].W = (double*)malloc(sizeof(double)*HIDENODE);
outLayer[i].dW = (double*)malloc(sizeof(double)*HIDENODE);
for (int j = 0; j < HIDENODE; ++j)
{
outLayer[i].W[j] = rand() % 10000 / (double)10000 * 2 - 1.0; // -1~1
outLayer[i].dW[j] = 0.0;
}
// 初始化偏置值
outLayer[i].b = 0.0;
outLayer[i].db = 0.0;
outLayer[i].A = 0.0;
outLayer[i].dZ = 0.0;
}
}
// ---------------------------------------------------------------------------------
// 单次训练集单条数据前向传播(训练集,当前训练集位置,当前训练次数)
void forward(Sample * trainData, int current_train_pos, int num){
double loss = 0.0; // 计算损失
// 为输入层赋值(训练集)
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
inputLayer[inputLayer_pos].A = trainData->in[current_train_pos][inputLayer_pos];
}
// 输入层的前向传播
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos)
{
double sum = 0.0;
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
sum += inputLayer[inputLayer_pos].A * hideLayer[hideLayer_pos].W[inputLayer_pos];
}
sum += hideLayer[hideLayer_pos].b;
hideLayer[hideLayer_pos].Z = sum;
hideLayer[hideLayer_pos].A = sigmoid(hideLayer[hideLayer_pos].Z); //输出激活函数
}
// 隐藏层的前向传播
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
double sum = 0.0;
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
sum += hideLayer[hideLayer_pos].A * outLayer[outLayer_pos].W[hideLayer_pos];
}
sum += outLayer[outLayer_pos].b;
outLayer[outLayer_pos].Z = sum;
outLayer[outLayer_pos].A = sigmoid(outLayer[outLayer_pos].Z); //输出激活函数
}
// 计算损失函数(loss对预测结果无影响)
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
loss += -(trainData->out[current_train_pos][outLayer_pos] * log(outLayer[outLayer_pos].A) + (1 - trainData->out[current_train_pos][outLayer_pos]) * log(1 - outLayer[outLayer_pos].A));
// loss += (outLayer[outLayer_pos].A - trainData->out[current_train_pos][outLayer_pos]) * (outLayer[outLayer_pos].A - trainData->out[current_train_pos][outLayer_pos]) / 2.0;
}
loss /= OUTNODE; // 求loss平均
// 打印
if (num % 10000 == 0 && current_train_pos == trainSize - 1)
{
printf("当前训练次数%d,代价函数值为%f\n", num, loss);
}
}
// 单次训练集反向传播(训练集,当前训练集位置)
void backward(Sample * trainData, int current_train_pos){
// 输出层的偏导
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
outLayer[outLayer_pos].dZ = outLayer[outLayer_pos].A - trainData->out[current_train_pos][0];
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
outLayer[outLayer_pos].dW[hideLayer_pos] = outLayer[outLayer_pos].dZ * hideLayer[hideLayer_pos].A;
}
outLayer[outLayer_pos].db = outLayer[outLayer_pos].dZ;
}
// 隐藏层的偏导
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
//double sum = 0.0;
// 因为输出层只有一个神经元,下循环无必要,如果非一个结点,则Z需要为指针了
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
hideLayer[hideLayer_pos].dZ = outLayer[outLayer_pos].W[hideLayer_pos] * outLayer[outLayer_pos].dZ * (sigmoid(hideLayer[hideLayer_pos].Z) * (1-sigmoid(hideLayer[hideLayer_pos].Z))); // dZ[1] = W[2]*dZ[2]*g[1]'(Z[1])
}
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos){
hideLayer[hideLayer_pos].dW[inputLayer_pos] = hideLayer[hideLayer_pos].dZ * trainData->in[current_train_pos][inputLayer_pos];
}
// dW = dW / INNODE; // 求dw平均
hideLayer[hideLayer_pos].db = hideLayer[hideLayer_pos].dZ;
}
// 梯度下降(每一条测试集都进行梯度下降-->随机梯度下降)
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; hideLayer_pos++)
{
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; inputLayer_pos++)
{
hideLayer[hideLayer_pos].W[inputLayer_pos] -= LEARNING_RATE * hideLayer[hideLayer_pos].dW[inputLayer_pos];
}
hideLayer[hideLayer_pos].b -= LEARNING_RATE * hideLayer[hideLayer_pos].db;
}
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; outLayer_pos++)
{
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; hideLayer_pos++)
{
outLayer[outLayer_pos].W[hideLayer_pos] -= LEARNING_RATE * outLayer[outLayer_pos].dW[hideLayer_pos];
}
outLayer[outLayer_pos].b -= LEARNING_RATE * outLayer[outLayer_pos].db;
}
}
// -------------------------------------------------------------传播算法完成
void predict(Sample * testData){
// 前向传播即可
for (int current_test_pos = 0; current_test_pos < testSize; ++current_test_pos){
// 获取测试集
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
inputLayer[inputLayer_pos].A = testData->in[current_test_pos][inputLayer_pos];
}
// 输入层前向传播
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
double sum = 0.0;
for (int inputLayer_pos = 0; inputLayer_pos < INNODE; ++inputLayer_pos)
{
sum += inputLayer[inputLayer_pos].A * hideLayer[hideLayer_pos].W[inputLayer_pos];
}
sum += hideLayer[hideLayer_pos].b;
hideLayer[hideLayer_pos].Z = sum;
hideLayer[hideLayer_pos].A = sigmoid(hideLayer[hideLayer_pos].Z); //输出激活函数
}
// 隐藏层前向传播
for (int outLayer_pos = 0; outLayer_pos < OUTNODE; ++outLayer_pos){
double sum = 0.0;
for (int hideLayer_pos = 0; hideLayer_pos < HIDENODE; ++hideLayer_pos){
sum += hideLayer[hideLayer_pos].A * outLayer[outLayer_pos].W[hideLayer_pos];
}
sum += outLayer[outLayer_pos].b;
outLayer[outLayer_pos].Z = sum;
outLayer[outLayer_pos].A = sigmoid(outLayer[outLayer_pos].Z); //输出激活函数
// 录入预测值
testData->out[current_test_pos][0] = outLayer[outLayer_pos].A;
}
}
}
int main(){
Sample* train = getTrainData("trainData.txt");
// printTrainData(train, trainSize);
Sample* test = getTestData("testData.txt");
//printTestData(test, testSize);
init();
//训练10000次
for (int train_times = 0; train_times < 100000; ++train_times)
{
for (int train_dataNum = 0; train_dataNum < trainSize; train_dataNum++)
{
forward(train, train_dataNum, train_times);
backward(train, train_dataNum);
}
}
predict(test);
for (int i = 0; i < testSize; i++)
{
printf("%d: x1=%f, x2=%f, predict_y=%f\n", i+1, test->in[i][0], test->in[i][1], test->out[i][0]);
}
return 0;
}
// 总结
// 1. 本次实验遇到了许多问题,总结过后发现问题基本出现在数据的维度上
// 2. 如定义单次前向/反向传播,到底是直接遍历整个数据集,还是一次调用遍历一个数据集的一条数据
// 3. 反向传播的维度需谨慎,尤其是目前输出层还是1个神经元的情况下,若为多个神经元,需要将输出层的dZ改为指针
九、复现流程
在一个目录下,创建trainData.txt,testData.txt,network.c文件,运行network.c文件即可。
十、总结与展望
(一)工作总结
希望我的博客对您的工作和学习有所帮助~如有不解,可在评论区提出。
另:数据集那块我记得是借用一个B站视频的idea,但时间太久找不到原视频了。
(二)未来展望
愿你我终将上岸!在AI时代大放异彩!~~