// naiveBayesClassify.cpp : 定义控制台应用程序的入口点。
//
//#include "stdafx.h"
#include<iostream>
#include<iomanip>
#include<string>
#include<map>
#include<vector>
#include <tchar.h>
//#include<math.h>
using namespace std;
//save the training data
typedef vector<vector<string>> strDVect;
//save all possible condition probabilitys
typedef map<string, map<string, map<string, double>>> thriMap;
//service for thriMap
typedef map<string, map<string, double>> doubleMap;
typedef map<string, double> strMap;
//save the kind of aim attribute value
typedef map<string, int> aimMap;
//kind of every attribute including aim attribute value
typedef map<string, int> attMap;
//const for denominator and numerator
double K = 1;
//sample
typedef vector<string> sampleVect;
//check the three dimensions map and return 4 states
int existRecord(thriMap prob, string aimValue, string attName, string attValue)
{
thriMap::const_iterator iterThri = prob.find(aimValue);
if (iterThri == prob.end())
return 1;
doubleMap::const_iterator iterDouble = iterThri->second.find(attName);
if (iterDouble == iterThri->second.end())
return 2;
strMap::const_iterator iterSingle = iterDouble->second.find(attValue);
if (iterSingle == iterDouble->second.end())
return 3;
return 4;
}
void show(thriMap prob)
{
thriMap::iterator iterThri = prob.begin();
for (; iterThri != prob.end(); iterThri++)
{
doubleMap::iterator iterDouble = iterThri->second.begin();
for (; iterDouble != iterThri->second.end(); iterDouble++)
{
strMap::iterator iterSingle = iterDouble->second.begin();
for (; iterSingle != iterDouble->second.end(); iterSingle++)
{
cout << iterThri->first << " " << iterDouble->first << " " << iterSingle->first << " " << iterSingle->second << endl;
}
}
}
}
//get all possible statistics
//three dimensions hash prob, first key notes aim attribute value,second key notes refence attribute name,
//third key notes reference attribute value, the value notes the refence attribute value number
void getConditionStat(strDVect datas, thriMap &prob, aimMap &aimNum)
{
int i, j, k, m, n;
m = datas.size();
n = datas[0].size();
doubleMap doubleMapIns;
strMap strMapIns;
thriMap::iterator iterThri;
doubleMap::iterator iterDouble;
for (i = 1; i < m; i++)
{
//save all possible statistics
for (j = 0; j < n - 1; j++)
{
switch (existRecord(prob, datas[i][n - 1], datas[0][j], datas[i][j]))
{
case 1:
//must clear
strMapIns.clear();
doubleMapIns.clear();
strMapIns.insert(make_pair(datas[i][j], 1));
doubleMapIns.insert(make_pair(datas[0][j], strMapIns));
prob.insert(make_pair(datas[i][n - 1], doubleMapIns));
break;
case 2:
strMapIns.clear();
strMapIns.insert(make_pair(datas[i][j], 1));
prob.find(datas[i][n - 1])->second.insert(make_pair(datas[0][j], strMapIns));
break;
case 3:
prob.find(datas[i][n - 1])->second.find(datas[0][j])->second.insert(make_pair(datas[i][j], 1));
break;
case 4:
prob.find(datas[i][n - 1])->second.find(datas[0][j])->second.find(datas[i][j])->second++;
break;
}
}
//save the kind of aim attribute value
if (aimNum.find(datas[i][n - 1]) != aimNum.end())
{
aimNum.find(datas[i][n - 1])->second++;
}
else
{
aimNum.insert(make_pair(datas[i][n - 1], 1));
}
}
}
void getConditionProb(thriMap &prob, aimMap aimNum, attMap attKind)
{
//save all possible condition probabilitys
thriMap::iterator iterThri = prob.begin();
for (; iterThri != prob.end(); iterThri++)
{
doubleMap::iterator iterDouble = iterThri->second.begin();
for (; iterDouble != iterThri->second.end(); iterDouble++)
{
strMap::iterator iterSingle = iterDouble->second.begin();
for (; iterSingle != iterDouble->second.end(); iterSingle++)
{
//add const K,L to denominator and numerator
//the part of "attKind.find(iterDouble->first)->second*K" add the weight of kind of refence attribute。
iterSingle->second = (iterSingle->second + K) / (attKind.find(iterDouble->first)->second*K + aimNum.find(iterThri->first)->second);
}
}
}
}
void getClassification(strDVect datas, thriMap prob, aimMap aimNum, attMap attKind, sampleVect sampleIns, int records)
{
//save all the probability
double sum = 0;
double max = 0;
double pp;
string classKind;
//获得极大后验假设
double h_map;
aimMap::const_iterator iterAim = aimNum.begin();
for (; iterAim != aimNum.end(); iterAim++)
{
//get prior probability
//the part of "((--attKind.end())->second*K" add the weight of the kind of aim attribute。
pp = (double)(iterAim->second + K) / ((--attKind.end())->second*K + records);
//获得极大使然假设
double h_ml = 1;
for (int i = 0; i < sampleIns.size(); i++)
{
if (existRecord(prob, iterAim->first, datas[0][i], sampleIns.at(i)) != 4)
{
cout << "there is no value of attrubute " << datas[0][i] << endl;
h_ml *= K;
}
else
{
h_ml *= prob.find(iterAim->first)->second.find(datas[0][i])->second.find(sampleIns.at(i))->second;
}
}
h_map = pp*h_ml;
if (h_map > max)
{
max = h_map;
classKind = iterAim->first;
}
sum += h_map;
}
//normalize
max = max / sum;
cout << "the the most class is " << classKind << ", and the probability is " << max << endl;
}
int _tmain(int argc, _TCHAR* argv[])
{
int i, j, m, n;
//m:number of training data,n: number of attributes
m = 15; n = 5;
strDVect datas(m);
for (i = 0; i < m; i++)
{
datas[i].resize(n);
}
//first row save attributes
datas[0][0] = "age";
datas[0][1] = "inco";
datas[0][2] = "student";
datas[0][3] = "credit_rating";
datas[0][4] = "class:buys_computer";
datas[1][0] = "<=30";
datas[1][1] = "high";
datas[1][2] = "no";
datas[1][3] = "fair";
datas[1][4] = "no";
datas[2][0] = "<=30";
datas[2][1] = "high";
datas[2][2] = "no";
datas[2][3] = "excellent";
datas[2][4] = "no";
datas[3][0] = "31..40";
datas[3][1] = "high";
datas[3][2] = "no";
datas[3][3] = "fair";
datas[3][4] = "yes";
datas[4][0] = ">40";
datas[4][1] = "medi";
datas[4][2] = "no";
datas[4][3] = "fair";
datas[4][4] = "yes";
datas[5][0] = ">40";
datas[5][1] = "low";
datas[5][2] = "yes";
datas[5][3] = "fair";
datas[5][4] = "yes";
datas[6][0] = ">40";
datas[6][1] = "low";
datas[6][2] = "yes";
datas[6][3] = "excellent";
datas[6][4] = "no";
datas[7][0] = "31..40";
datas[7][1] = "low";
datas[7][2] = "yes";
datas[7][3] = "excellent";
datas[7][4] = "yes";
datas[8][0] = "<=30";
datas[8][1] = "medi";
datas[8][2] = "no";
datas[8][3] = "fair";
datas[8][4] = "no";
datas[9][0] = "<=30";
datas[9][1] = "low";
datas[9][2] = "yes";
datas[9][3] = "fair";
datas[9][4] = "yes";
datas[10][0] = ">40";
datas[10][1] = "medi";
datas[10][2] = "yes";
datas[10][3] = "fair";
datas[10][4] = "yes";
datas[11][0] = "<=30";
datas[11][1] = "medi";
datas[11][2] = "yes";
datas[11][3] = "excellent";
datas[11][4] = "yes";
datas[12][0] = "31..40";
datas[12][1] = "medi";
datas[12][2] = "no";
datas[12][3] = "excellent";
datas[12][4] = "yes";
datas[13][0] = "31..40";
datas[13][1] = "high";
datas[13][2] = "yes";
datas[13][3] = "fair";
datas[13][4] = "yes";
datas[14][0] = ">40";
datas[14][1] = "medi";
datas[14][2] = "no";
datas[14][3] = "excellent";
datas[14][4] = "no";
for (i = 0; i < m; i++)
{
for (j = 0; j < n; j++)
{
cout.width(15);
cout << setiosflags(ios::left) << datas[i][j];
}
cout << endl;
}
thriMap prob;
aimMap aimNum;
attMap attKind;
attKind.insert(make_pair("age", 3));
attKind.insert(make_pair("inco", 3));
attKind.insert(make_pair("student", 2));
attKind.insert(make_pair("credit_rating", 2));
attKind.insert(make_pair("class:buys_computer", 2));
getConditionStat(datas, prob, aimNum);
//init K
K = (double)1 / (m - 1);
getConditionProb(prob, aimNum, attKind);
show(prob);
sampleVect sampleIns;
sampleIns.push_back("31..40");
sampleIns.push_back("high");
sampleIns.push_back("no");
sampleIns.push_back("fair");
getClassification(datas, prob, aimNum, attKind, sampleIns, m - 1);
system("pause");
}