啊啊

最新推荐文章于 2025-10-27 18:30:00 发布
转载最新推荐文章于 2025-10-27 18:30:00 发布 · 312 阅读
0 篇文章
订阅专栏
// naiveBayesClassify.cpp : 定义控制台应用程序的入口点。
//

//#include "stdafx.h"
#include<iostream>
#include<iomanip>
#include<string>
#include<map>
#include<vector>
#include <tchar.h>
//#include<math.h>

using namespace std;



//save the training data
typedef vector<vector<string>> strDVect;
//save all possible condition probabilitys
typedef map<string, map<string, map<string, double>>> thriMap;
//service for thriMap
typedef map<string, map<string, double>> doubleMap;
typedef map<string, double> strMap;
//save the kind of aim attribute value
typedef map<string, int> aimMap;
//kind of every attribute including aim attribute value
typedef map<string, int> attMap;
//const for denominator and numerator
double K = 1;
//sample
typedef vector<string> sampleVect;


//check the three dimensions map and return 4 states
int existRecord(thriMap prob, string aimValue, string attName, string attValue)
{
	thriMap::const_iterator iterThri = prob.find(aimValue);
	if (iterThri == prob.end())
		return 1;
	doubleMap::const_iterator iterDouble = iterThri->second.find(attName);
	if (iterDouble == iterThri->second.end())
		return 2;
	strMap::const_iterator iterSingle = iterDouble->second.find(attValue);
	if (iterSingle == iterDouble->second.end())
		return 3;
	return 4;
}

void show(thriMap prob)
{
	thriMap::iterator iterThri = prob.begin();
	for (; iterThri != prob.end(); iterThri++)
	{
		doubleMap::iterator iterDouble = iterThri->second.begin();
		for (; iterDouble != iterThri->second.end(); iterDouble++)
		{
			strMap::iterator iterSingle = iterDouble->second.begin();
			for (; iterSingle != iterDouble->second.end(); iterSingle++)
			{
				cout << iterThri->first << " " << iterDouble->first << " " << iterSingle->first << " " << iterSingle->second << endl;
			}
		}
	}
}

//get all possible statistics
//three dimensions hash prob, first key notes aim attribute value,second key notes refence attribute name,
//third key notes reference attribute value, the value notes the refence attribute value number
void getConditionStat(strDVect datas, thriMap &prob, aimMap &aimNum)
{
	int i, j, k, m, n;
	m = datas.size();
	n = datas[0].size();

	doubleMap doubleMapIns;
	strMap strMapIns;
	thriMap::iterator iterThri;
	doubleMap::iterator iterDouble;
	for (i = 1; i < m; i++)
	{
		//save all possible statistics
		for (j = 0; j < n - 1; j++)
		{
			switch (existRecord(prob, datas[i][n - 1], datas[0][j], datas[i][j]))
			{
			case 1:
				//must clear
				strMapIns.clear();
				doubleMapIns.clear();
				strMapIns.insert(make_pair(datas[i][j], 1));
				doubleMapIns.insert(make_pair(datas[0][j], strMapIns));
				prob.insert(make_pair(datas[i][n - 1], doubleMapIns));
				break;
			case 2:
				strMapIns.clear();
				strMapIns.insert(make_pair(datas[i][j], 1));
				prob.find(datas[i][n - 1])->second.insert(make_pair(datas[0][j], strMapIns));
				break;
			case 3:
				prob.find(datas[i][n - 1])->second.find(datas[0][j])->second.insert(make_pair(datas[i][j], 1));
				break;
			case 4:
				prob.find(datas[i][n - 1])->second.find(datas[0][j])->second.find(datas[i][j])->second++;
				break;
			}
		}
		//save the kind of aim attribute value
		if (aimNum.find(datas[i][n - 1]) != aimNum.end())
		{
			aimNum.find(datas[i][n - 1])->second++;
		}
		else
		{
			aimNum.insert(make_pair(datas[i][n - 1], 1));
		}
	}
}

void getConditionProb(thriMap &prob, aimMap aimNum, attMap attKind)
{
	//save all possible condition probabilitys
	thriMap::iterator iterThri = prob.begin();
	for (; iterThri != prob.end(); iterThri++)
	{
		doubleMap::iterator iterDouble = iterThri->second.begin();
		for (; iterDouble != iterThri->second.end(); iterDouble++)
		{
			strMap::iterator iterSingle = iterDouble->second.begin();
			for (; iterSingle != iterDouble->second.end(); iterSingle++)
			{
				//add const K,L to  denominator and numerator
				//the part of "attKind.find(iterDouble->first)->second*K" add the weight of kind of refence attribute。
				iterSingle->second = (iterSingle->second + K) / (attKind.find(iterDouble->first)->second*K + aimNum.find(iterThri->first)->second);
			}
		}
	}
}

void getClassification(strDVect datas, thriMap prob, aimMap aimNum, attMap attKind, sampleVect sampleIns, int records)
{

	//save all the probability
	double sum = 0;
	double max = 0;
	double pp;
	string classKind;
	//获得极大后验假设
	double h_map;
	aimMap::const_iterator iterAim = aimNum.begin();
	for (; iterAim != aimNum.end(); iterAim++)
	{
		//get prior probability
		//the part of "((--attKind.end())->second*K" add the weight of the kind of aim attribute。
		pp = (double)(iterAim->second + K) / ((--attKind.end())->second*K + records);
		//获得极大使然假设
		double h_ml = 1;
		for (int i = 0; i < sampleIns.size(); i++)
		{
			if (existRecord(prob, iterAim->first, datas[0][i], sampleIns.at(i)) != 4)
			{
				cout << "there is no value of attrubute " << datas[0][i] << endl;
				h_ml *= K;
			}
			else
			{
				h_ml *= prob.find(iterAim->first)->second.find(datas[0][i])->second.find(sampleIns.at(i))->second;
			}
		}
		h_map = pp*h_ml;
		if (h_map > max)
		{
			max = h_map;
			classKind = iterAim->first;
		}
		sum += h_map;
	}
	//normalize
	max = max / sum;
	cout << "the the most class is " << classKind << ", and the probability is " << max << endl;



}

int _tmain(int argc, _TCHAR* argv[])
{
	int i, j, m, n;
	//m:number of training data,n: number of attributes
	m = 15; n = 5;
	strDVect datas(m);
	for (i = 0; i < m; i++)
	{
		datas[i].resize(n);
	}
	//first row save attributes
	datas[0][0] = "age";
	datas[0][1] = "inco";
	datas[0][2] = "student";
	datas[0][3] = "credit_rating";
	datas[0][4] = "class:buys_computer";

	datas[1][0] = "<=30";
	datas[1][1] = "high";
	datas[1][2] = "no";
	datas[1][3] = "fair";
	datas[1][4] = "no";

	datas[2][0] = "<=30";
	datas[2][1] = "high";
	datas[2][2] = "no";
	datas[2][3] = "excellent";
	datas[2][4] = "no";

	datas[3][0] = "31..40";
	datas[3][1] = "high";
	datas[3][2] = "no";
	datas[3][3] = "fair";
	datas[3][4] = "yes";

	datas[4][0] = ">40";
	datas[4][1] = "medi";
	datas[4][2] = "no";
	datas[4][3] = "fair";
	datas[4][4] = "yes";

	datas[5][0] = ">40";
	datas[5][1] = "low";
	datas[5][2] = "yes";
	datas[5][3] = "fair";
	datas[5][4] = "yes";

	datas[6][0] = ">40";
	datas[6][1] = "low";
	datas[6][2] = "yes";
	datas[6][3] = "excellent";
	datas[6][4] = "no";

	datas[7][0] = "31..40";
	datas[7][1] = "low";
	datas[7][2] = "yes";
	datas[7][3] = "excellent";
	datas[7][4] = "yes";

	datas[8][0] = "<=30";
	datas[8][1] = "medi";
	datas[8][2] = "no";
	datas[8][3] = "fair";
	datas[8][4] = "no";

	datas[9][0] = "<=30";
	datas[9][1] = "low";
	datas[9][2] = "yes";
	datas[9][3] = "fair";
	datas[9][4] = "yes";

	datas[10][0] = ">40";
	datas[10][1] = "medi";
	datas[10][2] = "yes";
	datas[10][3] = "fair";
	datas[10][4] = "yes";

	datas[11][0] = "<=30";
	datas[11][1] = "medi";
	datas[11][2] = "yes";
	datas[11][3] = "excellent";
	datas[11][4] = "yes";

	datas[12][0] = "31..40";
	datas[12][1] = "medi";
	datas[12][2] = "no";
	datas[12][3] = "excellent";
	datas[12][4] = "yes";

	datas[13][0] = "31..40";
	datas[13][1] = "high";
	datas[13][2] = "yes";
	datas[13][3] = "fair";
	datas[13][4] = "yes";

	datas[14][0] = ">40";
	datas[14][1] = "medi";
	datas[14][2] = "no";
	datas[14][3] = "excellent";
	datas[14][4] = "no";

	for (i = 0; i < m; i++)
	{
		for (j = 0; j < n; j++)
		{
			cout.width(15);
			cout << setiosflags(ios::left) << datas[i][j];

		}
		cout << endl;
	}
	thriMap prob;
	aimMap aimNum;
	attMap attKind;
	attKind.insert(make_pair("age", 3));
	attKind.insert(make_pair("inco", 3));
	attKind.insert(make_pair("student", 2));
	attKind.insert(make_pair("credit_rating", 2));
	attKind.insert(make_pair("class:buys_computer", 2));

	getConditionStat(datas, prob, aimNum);
	//init K
	K = (double)1 / (m - 1);
	getConditionProb(prob, aimNum, attKind);
	show(prob);
	sampleVect sampleIns;
	sampleIns.push_back("31..40");
	sampleIns.push_back("high");
	sampleIns.push_back("no");
	sampleIns.push_back("fair");
	getClassification(datas, prob, aimNum, attKind, sampleIns, m - 1);

	system("pause");
}