【deep learning学习笔记】注释yusugomori的LR代码 --- LogisticRegression.cpp

最新推荐文章于 2022-04-14 22:20:10 发布

转载最新推荐文章于 2022-04-14 22:20:10 发布 · 70 阅读

本文介绍了一个简单的逻辑回归模型实现过程，包括训练(train)和预测(predict)两个关键函数。通过具体的C++代码示例展示了如何初始化权重和偏置，以及如何进行正向传播计算概率和更新参数。

模型实现代码，关键是train函数和predict函数，都很容易。

#include <iostream>
#include <string>
#include <math.h>
#include "LogisticRegression.h"
using namespace std;


LogisticRegression::LogisticRegression(
						int size, 	// N
						int in, 	// n_in
						int out		// n_out
							) 
{
  	N = size;
  	n_in = in;
  	n_out = out;

  	// initialize W, b
  	// W[n_out][n_in], b[n_out]
  	W = new double*[n_out];
  	for(int i=0; i<n_out; i++) 
	  	W[i] = new double[n_in];
  	b = new double[n_out];

  	for(int i=0; i<n_out; i++) 
  	{
    	for(int j=0; j<n_in; j++) 
		{
      		W[i][j] = 0;
    	}
    	b[i] = 0;
  	}
}

LogisticRegression::~LogisticRegression() 
{
  	for(int i=0; i<n_out; i++) 
	  	delete[] W[i];
  	delete[] W;
  	delete[] b;
}


void LogisticRegression::train (
				int *x, 	// the input from input nodes in training set
				int *y, 	// the output from output nodes in training set
				double lr	// the learning rate
				) 
{
	// the probability of P(y|x)
  	double *p_y_given_x = new double[n_out];
  	// the tmp variable which is not necessary being an array
  	double *dy = new double[n_out];

	// step 1: calculate the output of softmax given input
  	for(int i=0; i<n_out; i++) 
  	{
  		// initialize
    	p_y_given_x[i] = 0;
    	for(int j=0; j<n_in; j++) 
		{
			// the weight of networks
      		p_y_given_x[i] += W[i][j] * x[j];
    	}
    	// the bias
    	p_y_given_x[i] += b[i];
  	}
  	// the softmax value
  	softmax(p_y_given_x);

	// step 2: update the weight of networks
	// w_new = w_old + learningRate * differential (导数)
	//		 = w_old + learningRate * x (1{y_i=y} - p_yi_given_x) 
	//		 = w_old + learningRate * x * (y - p_y_given_x)
  	for(int i=0; i<n_out; i++) 
  	{
    	dy[i] = y[i] - p_y_given_x[i];
    	for(int j=0; j<n_in; j++) 
		{
      		W[i][j] += lr * dy[i] * x[j] / N;
    	}
    	b[i] += lr * dy[i] / N;
  	}
  	delete[] p_y_given_x;
  	delete[] dy;
}

void LogisticRegression::softmax (double *x) 
{
  	double max = 0.0;
  	double sum = 0.0;
  
  	// step1: get the max in the X vector
  	for(int i=0; i<n_out; i++) 
	  	if(max < x[i]) 
		  	max = x[i];
  	// step 2: normalization and softmax
  	// normalize -- 'x[i]-max', it's not necessary in traditional LR.
  	// I wonder why it appears here? 
  	for(int i=0; i<n_out; i++) 
  	{
    	x[i] = exp(x[i] - max);
    	sum += x[i];
  	} 
  	for(int i=0; i<n_out; i++) 
	  x[i] /= sum;
}

void LogisticRegression::predict(
				int *x, 	// the input from input nodes in testing set
				double *y	// the calculated softmax probability
				) 
{
	// get the softmax output value given the current networks
  	for(int i=0; i<n_out; i++) 
  	{
    	y[i] = 0;
    	for(int j=0; j<n_in; j++) 
		{
      		y[i] += W[i][j] * x[j];
    	}
    	y[i] += b[i];
  	}

  	softmax(y);
}