给一个句子,部分汉字是用注音的声母代替的,还原成原始的句子
感悟:
调用一个接受char* 的函数,记得char*后面加上'\0';
文件记得及时关闭;
作用域 may matter
map<string,string> big5 sucks
char* to int
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <map>
#include "Ngram.h"
#include "Prob.h"
using namespace std;
string deleteBlank(string s);
map<int, string> readMap(map<int, string> zyMap);
double gramProb(const char *w1, const char *w2);
string bestStr(string s,map<int,string> zyMap);
int big5ToInt(char *ch);
string testPath="./testdata/";
string resultPath="./result2/";
string mapPath="ZhuYin-Big5.map";
Vocab voc;
Ngram lm( voc,2);
int main()
{
map<int, string> zyMap;
zyMap=readMap(zyMap);
File lmFile("./bigram.lm", "r" );
lm.read(lmFile);
lmFile.close();
int i=1;
for(i=1;i<11;i++)
{
stringstream ss;
string s;
ss<<i;
ss>>s;
string path1=testPath+s+".txt";
string path2=resultPath+s+".txt";
ifstream infile(path1.c_str());
ofstream outfile(path2.c_str(),ios::app);
string line;
while(getline(infile,line))
{
// const char *cline=line.c_str();
string newline=deleteBlank(line);
string bestString=bestStr(newline,zyMap);
outfile<<bestString.c_str();
}
infile.close();
outfile.close();
}
return 0;
}
int big5ToInt(char *ch)
{
int a=ch[1]+(ch[0]<<8);
return a;
}
//delete the blank in the string
string deleteBlank(string s)
{
string::iterator it;
for(it=s.begin();it!=s.end();)
{
if(*it==' ')
it=s.erase(it);
else
++it;
}
return s;
}
//read the map
map<int, string> readMap(map<int, string> zyMap)
{
ifstream infile(mapPath.c_str());
string line;
int i=0;
while(getline(infile,line))
{
char w[2];
line=deleteBlank(line);
w[0]=line.c_str()[0];
w[1]=line.c_str()[1];
int key=w[1]+(w[0]<<8);
string value=line.substr(2,line.length()-2);
zyMap[key]=value;
}
return zyMap;
}
//compute the most possible string
string bestStr(string s,map<int,string> zyMap)
{
int k=0;
string bestString;
// double candidateNum[s.length()/2];
int trackMatrix[s.length()/2][10000];
double vMatrix[s.length()/2][10000];
memset(vMatrix,0.0,5000*s.length()*sizeof(double));
memset(trackMatrix,0,5000*s.length()*sizeof(int));
int maxId=0;
double max=LogP_Zero;;
double prob;
char wo1[3];
char wo2[3];
wo1[2]='\0';
wo2[2]='\0';
char w1[2];
char w2[2];
int word1;
int word2;
int i=0;
int j=0;
double maxPro=LogP_Zero;
string values1;
string values2;
int l2;
int l1;
int sl=s.length();
for(k=0;k<=sl-4;k+=2)
{
w1[0]=s.c_str()[k];
w1[1]=s.c_str()[k+1];
w2[0]=s.c_str()[k+2];
w2[1]=s.c_str()[k+3];
word1=w1[1]+(w1[0]<<8);
word2=w2[1]+(w2[0]<<8);
values1=zyMap[word1];
values2=zyMap[word2];
l2=values2.length();
l1=values1.length();
for(j=0;j<l2/2;j++)
{
maxPro=LogP_Zero;
wo2[0]=values2.c_str()[2*j];
wo2[1]=values2.c_str()[2*j+1];
for(i=0;i<l1/2;i++)
{
wo1[0]=values1.c_str()[2*i];
wo1[1]=values1.c_str()[2*i+1];
prob=gramProb(wo1,wo2);
if((prob+vMatrix[k/2][i])>maxPro)
{
maxPro=prob+vMatrix[k/2][i];
trackMatrix[k/2+1][j]=i;
}
}
if(k==(sl-4)&&maxPro>max)
{
maxId=j;
max=maxPro;
}
vMatrix[k/2+1][j]=maxPro;
}
}
//cout<<"maxId:"<<maxId<<endl;
//back tracking
string resultStr="";
int word;
char bestWord[3];
bestWord[2]='\0';
string values;
for(k=sl/2-1;k>=0;k--)
{
word=s.c_str()[2*k+1]+(s.c_str()[2*k]<<8);
values=zyMap[word];
bestWord[0]=values.c_str()[maxId*2];
bestWord[1]=values.c_str()[maxId*2+1];
resultStr=" "+string(bestWord)+resultStr ;
if(k>0)
maxId=trackMatrix[k][maxId];
}
resultStr="<s> "+resultStr+" </s>\n";
return resultStr;
}
double gramProb(const char *w1, const char *w2)
{
VocabIndex wid1 = voc.getIndex(w1);
VocabIndex wid2 = voc.getIndex(w2);
if(wid1 == Vocab_None) //OOV
wid1 = voc.getIndex(Vocab_Unknown);
if(wid2 == Vocab_None) //OOV
wid2 = voc.getIndex(Vocab_Unknown);
VocabIndex context[] = { wid1, Vocab_None };
return lm.wordProb( wid2, context);
}