前言
程序中的最大学习次数,训练结束条件,学习率,隐含层神经元个数可以一步调整,所以读者可自行拿去测试,我下面的程序里隐含层神经元个数较多,运行时间会较长。另外,如果读者能对房价预测的精度提高给出建议,我将倍感荣幸,期待与你的交流。
至于BP神经网络的原理及公式推导,此处不加阐释。数据文件可在我之前的文章中找到,链接如下:多元线性回归求解波士顿房价问题
源程序
#include<math.h>
#include<stdio.h>
#include<time.h>
#include<stdlib.h>
#define Data 380 //训练样本个数
#define TestData 126 //测试样本个数
#define In 13 //输入层神经元个数
#define Out 1 //输出层神经元个数
#define Neuron1 40 //隐含层1神经元个数 (程序运行需要很久)
#define Neuron2 40 //隐含层2神经元个数 (程序运行需要很久)
#define TrainC 100000 //训练次数
#define WAlta 0.1 //权值w(隐含层2到输出层)学习率
#define V1Alta 0.5 //权值v1(输入层到隐含层1)学习率
#define V2Alta 0.5 //权值v2(隐含层1到隐含层2)学习率
#define FeatureNumber 14 //特征个数加房价
char FeatureName[FeatureNumber][15]; //存储特征名字
double d_in[Data+TestData][In]; //存储输入数据用于乱序,并分出训练集和测试集
double d_out[Data+TestData][Out]; //存储输出数据用于乱序,并分出训练集和测试集
double t_in[TestData][In]; //存储测试集输入
double t_out[TestData][Out]; //存储测试集输出
double pre[TestData][Out]; //存储预测样本的输出
double v1[Neuron1][In]; //存储输入层到隐含层1的权值
double v2[Neuron2][Neuron1]; //存储隐含层1到隐含层2的权值
double Y1[Neuron1]; //存储隐含层1的输出
double Y2[Neuron2]; //存储隐含层2的输出
double w[Out][Neuron2]; //存储隐含层2到输出层的权值
double Maxin[In], Minin[In]; //存储样本输入的最大,最小值
double Maxout[Out], Minout[Out]; //存储样本输出的最大,最小值
double OutputData[Out]; //存储神经网络的输出
double dw[Out][Neuron2], dv2[Neuron2][Neuron1], dv1[Neuron1][In];//存储各权值的修正值
double mse; //存储均方误差
double rmse; //存储均方根误差
void ReadData() { //读取数据
srand((int)time(0));
int i, j, k;
FILE* fp;
if ((fp = fopen("housing.txt", "r")) == NULL) { //打开数据文件
printf("不能打开文件!\n");
exit(0);
}
for (i = 0; i < FeatureNumber; ++i) { //输入13个房价特征的名字以及房价MEDV
fscanf(fp, "%s", FeatureName[i]);
}
for (i = 0; i < Data+TestData; ++i) { //文件中数据转移到数组
for (j = 0; j < In; ++j) {
fscanf(fp, "%lf", &d_in[i][j]);
}
for (k = 0; k < Out; ++k) {
fscanf(fp, "%lf", &d_out[i][k]);
}
}
for (i = 0; i < Data + TestData; ++i) { //打乱数据集顺序
int k = rand() % (Data + TestData);
for (int l = 0; l < In; ++l) {
double tmp = d_in[i][l];
d_in[i][l] = d_in[k][l];
d_in[k][l] = tmp;
}
for (int l = 0; l < Out; ++l) {
double tmp = d_out[i][l];
d_out[i][l] = d_out[k][l];
d_out[k][l] = tmp;
}
}
for (i = Data; i < TestData+Data; ++i) { //从数据集中分出训练集和测试集
for (j = 0; j < In; ++j) {
t_in[i-Data][j] = d_in[i][j];
}
for (k = 0; k < Out; ++k) {
t_out[i-Data][k] = d_out[i][k];
}
}
fclose(fp);
}
void InitBPNetwork() { //初始化
int i, j;
srand((int)time(0));
for (i = 0; i < In; ++i) { //寻找训练集各输入(各特征)最大最小值
Minin[i] = Maxin[i] = d_in[0][i];
for (j = 0; j < Data; ++j) {
Maxin[i] = Maxin[i] > d_in[j][i] ? Maxin[i] : d_in[j][i];
Minin[i] = Minin[i] < d_in[j][i] ? Minin[i] : d_in[j][i];
}
}
for (i = 0; i < Out; ++i) { //寻找训练集输出(房价)最大最小值
Minout[i] = Maxout[i] = d_out[0][i];
for (j = 0; j < Data; ++j) {
Maxout[i] = Maxout[i] > d_out[j][i] ? Maxout[i] : d_out[j][i];
Minout[i] = Minout[i] < d_out[j][i] ? Minout[i] : d_out[j][i];
}
}
for (i = 0; i < In; ++i) { //训练集输入数据归一化
for (j = 0; j < Data; ++j) {
d_in[j][i] = (d_in[j][i] - Minin[i]) / (Maxin[i] - Minin[i]);
}
}
for (i = 0; i < Out; ++i) { //训练集输出数据归一化
for (j = 0; j < Data; ++j) {
d_out[j][i] = (d_out[j][i] - Minout[i]) / (Maxout[i] - Minout[i]);
}
}
for (i = 0; i < Neuron1; ++i) { //初始化输入层到隐含层1的权值与修正值
for (j = 0; j < In; ++j) {
v1[i][j] = rand() * 2.0 / RAND_MAX - 1;
dv1[i][j] = 0;
}
}
for (i = 0; i < Neuron2; ++i) { //初始化隐含层1到隐含层2的权值与修正值
for (j = 0; j < Neuron1; ++j) {
v2[i][j] = rand() * 2.0 / RAND_MAX - 1;
dv2[i][j] = 0;
}
}
for (i = 0; i< Out; ++i) { //初始化隐含层2到输出层的权值与修正值
for (j = 0; j < Neuron2; ++j) {
w[i][j] = rand() * 2.0 / RAND_MAX - 1;
dw[i][j] = 0;
}
}
}
void ComputO(int var) { //前向传播
int i, j;
double sum;
for (i = 0; i < Neuron1; ++i) { //计算隐含层1的输出
sum = 0;
for (j = 0; j < In; ++j) {
sum += d_in[var][j] * v1[i][j];
}
Y1[i] = 1 / (1 + exp(-1 * sum));
}
for (i = 0; i < Neuron2; ++i) { //计算隐含层2的输出
sum = 0;
for (j = 0; j < Neuron1; ++j) {
sum += Y1[j] * v2[i][j];
}
Y2[i] = 1 / (1 + exp(-1 * sum));
}
for (i = 0; i < Out; ++i) { //计算输出层的输出
sum = 0;
for (j = 0; j < Neuron2; ++j) {
sum += Y2[j] * w[i][j];
}
OutputData[i] = 1 / (1 + exp(-1 * sum)); //神经网络的输出
}
}
void BackUpdata(int var) { //反向传播的权值修正
int i, j, k;
double s;
double t = 0;
for (k = 0; k < Neuron1; ++k) {
s = 0;
for (i = 0; i < Neuron2; ++i) {
t = 0;
for (j = 0; j < Out; ++j) {
dw[j][i] = WAlta * (d_out[var][j] - OutputData[j]) * OutputData[j] * (1 - OutputData[j]) * Y2[i];//计算权值w的修正值
t += (d_out[var][j] - OutputData[j]) * OutputData[j] * (1 - OutputData[j]) * w[j][i];
}
for (j = 0; j < Neuron1; ++j) {
dv2[i][j] = V2Alta * t * Y2[i] * (1 - Y2[i]) * Y1[j];//计算权值v2的修正值
}
s += t * Y2[i] * (1 - Y2[i]) * v2[i][k];
}
for (i = 0; i < In; ++i) {
dv1[k][i] = V1Alta * s * Y1[k] * (1 - Y1[k]) * d_in[var][i];//计算权值v1的修正值
}
}
for (i = 0; i < In; ++i) { //修正各权值
for (j = 0; j < Neuron1; ++j) {
v1[j][i] += dv1[j][i];
}
}
for (i = 0; i < Neuron1; ++i) {
for (j = 0; j < Neuron2; ++j) {
v2[j][i] += dv2[j][i];
}
}
for (i = 0; i < Neuron2; ++i) {
for (j = 0; j < Out; ++j) {
w[j][i] += dw[j][i];
}
}
}
void TrainNetwork() { //神经网络的训练
int count = 1;
int i, j;
do {
mse = 0;
for (i = 0; i < Data; ++i) {
ComputO(i);
BackUpdata(i);
for (j = 0; j < Out; ++j) {
double tmp1 = OutputData[j] * (Maxout[j] - Minout[j]) + Minout[j];
double tmp2 = d_out[i][j] * (Maxout[j] - Minout[j]) + Minout[j];
mse += (tmp1 - tmp2) * (tmp1 - tmp2); //累计均方误差
}
}
mse /= (double)Data * Out; //计算均方误差
if (count % 1000 == 0) { //观测
printf("训练次数:%d\t均方误差:%lf\n", count, mse);
}
count++;
} while (count <= TrainC && mse >= 1);
printf("\n训练结束\n\n");
}
void TestNetwork() { //测试
int i, j, k;
double sum;
for (i = 0; i < In; ++i) { //测试集输入数据归一化
for (j = 0; j < TestData; ++j) {
t_in[j][i] = (t_in[j][i] - Minin[i]) / (Maxin[i] - Minin[i]);
}
}
for (k = 0; k< TestData; ++k) { //计算神经网络的输出
for (i = 0; i < Neuron1; ++i) {
sum = 0;
for (j = 0; j < In; ++j) {
sum += t_in[k][j] * v1[i][j];
}
Y1[i] = 1 / (1 + exp(-1 * sum));
}
for (i = 0; i < Neuron2; ++i) {
sum = 0;
for (j = 0; j < Neuron1; ++j) {
sum += Y1[j] * v2[i][j];
}
Y2[i] = 1 / (1 + exp(-1 * sum));
}
for (i = 0; i < Out; ++i) {
sum = 0;
for (j = 0; j < Neuron2; ++j) {
sum += Y2[j] * w[i][j];
}
pre[k][i] = (double)1 / (1 + exp(-1 * sum)) * (Maxout[i] - Minout[i]) + Minout[i]; //计算预测值
printf("编号:%d \t预测值:%lf 实际值:%lf\n", k + 1, pre[k][i], t_out[k][i]); //预测值与实际值比较
}
}
rmse = 0.0;
for (k = 0; k < TestData; ++k) {
for (i = 0; i < Out; ++i) {
rmse += (pre[k][i] - t_out[k][i]) * (pre[k][i] - t_out[k][i]);
}
}
rmse = sqrt(rmse / TestData / Out); //均方根误差
printf("\nrmse: %.4lf\n", rmse);
}
int main() {
ReadData();
InitBPNetwork();
TrainNetwork();
TestNetwork();
return 0;
}
运行截图
由于最后得到的均方误差过大,怀疑可能是过度拟合的情况,把训练结束的条件改为mse>10,得到结果如下,进一步证明。