英语语法检查工具C++

最新推荐文章于 2023-10-23 17:48:20 发布

zerokkqq

最新推荐文章于 2023-10-23 17:48:20 发布

阅读量1.6k

点赞数

分类专栏： VC6 文章标签：工具 string vector vb c++ c

VC6 专栏收录该内容

33 篇文章

订阅专栏

英语语法检查工具,主要检查英语中语谓一致,如第三人称,非第三人称,情态动词,如:should can may must,和there be等.
用句法分析工具首先词形标注,其实句法分析,结果如:(TOP (S (NP (PRP$ my) (NN name) ) (VP (VBP are) (NP (NNS tom) ) ) ) )
求高手帮小弟解决此程序

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>

using namespace std;

//function declaration
int evaluate(string line);
vector<string> sepString(string str);
int countWrong(vector<string> &abstract,int pos,int loc);
int findMaxNP(vector <string> &abstract,int pos);

int evaluate(string line)
{
int num = 0;
//string line;
vector <string> abstract;

int k = 4;//for can may ?
int M=0; //for may can ?
int canWrong = 0;
int VBZWrong = 0;
int VBPWrong = 0;
int articleWrong = 0;

int VBZ = 0;
int VBP = 0;
int article = 0;//record num of the whole abstract记录全部抽象数

num++;
if(line == "")
{
cout<<"句子为空"<<endl;
//continue;
}
// outFile<<"wrong of the abstract number:"<<num<<endl;
abstract = sepString(line);//调用向量函数
for(int i = 0; i < abstract.size();i++)
{
M = 0;//init
if(abstract[i] =="(VBZ"){VBZ++;}
if(abstract[i] =="(VBP"){VBP++;}
if(abstract[i] =="(DT"){article++;}
if(abstract[i] == "can)" || abstract[i] == "may)")//用规则判断类拟的情态动情后面是否是原型
{
for(int j = 1;j < k;j++)
{
if(i+j < abstract.size())
{
if(abstract[i+j] != "(VB" && abstract[i+j] != "(RB")
{
M++;
}
else
{
break;
}
}
}
if(M == 6)
{
canWrong++;
}
}
else if(abstract[i] == "(VBZ" || abstract[i] == "(VBP")
{
int locationMaxNP = 0;
int wrongFlag = 0;
locationMaxNP = findMaxNP(abstract,i);//
if(locationMaxNP != -1 && locationMaxNP != -2)
{
if(abstract[locationMaxNP] == "(NP")
{
wrongFlag = countWrong(abstract,locationMaxNP,i);
if(wrongFlag == 1)
{
VBZWrong++;
}
if(wrongFlag == 2)
{
VBPWrong++;
}
if(wrongFlag == 3)
{

articleWrong++;
}
if(wrongFlag == 4)
{

VBZWrong++;
articleWrong++;
}
if(wrongFlag == 5)
{

VBPWrong++;
articleWrong++;
}
}
else//to do next
{
if(abstract[locationMaxNP] == "(ADVP")
{
locationMaxNP = findMaxNP(abstract,locationMaxNP);
if(locationMaxNP != -1 && locationMaxNP != -2)
{
if(abstract[locationMaxNP] == "(NP")
{
wrongFlag = countWrong(abstract,locationMaxNP,i);
//////////
if(wrongFlag == 1)
{
VBZWrong++;
}
if(wrongFlag == 2)
{

VBPWrong++;
}
if(wrongFlag == 3)
{

articleWrong++;
}
if(wrongFlag == 4)
{

VBZWrong++;
articleWrong++;
}
if(wrongFlag == 5)
{

VBPWrong++;
articleWrong++;
}

}
else
{
continue;
}
}
else
{
continue;
}
}
else
{
continue;
//maybe wrong of the parser
}
}
}
else
{
continue;
}

}
}
//outFile<<endl;
// outFile<<"wrong of the abstract number:"<<num<<endl;
cout<<"VBZWrong:"<<VBZWrong<<endl;
cout<<"VBPWrong:"<<VBPWrong<<endl;
cout<<"articleWrong:"<<articleWrong<<endl;
cout<<"canWrong:"<<canWrong<<endl;
cout<<"VBZ"<<VBZ<<" "<<"VBP"<<VBP<<" "<<"article"<<article<<endl;

return 1;
}

int countWrong(vector<string> &abstract,int pos,int loc)
{
int sum = 0;
int num = 0;
int article = 0;
while(abstract[pos] == "(NP")
{
pos++;
}
article = pos;
num = 1;
bool flag = 0;
bool flag1 = 0;
bool flag2 = 0;
int NNSpos = 0;
int andpos = 0;

while(num != 0)
{
if(abstract[pos].find(")") != -1)
{
num--;
}
if(abstract[pos].find("(") != -1)
{
num++;
}
if(abstract[pos] == "(NNS" )//&& abstract[loc] == "(VBZ")
{
flag = 1;
NNSpos = pos;
}
if(abstract[pos] == "(PRP" || abstract[pos] == "(PRP$")
{
flag1 = 1;
}
if(abstract[pos] == "and)" )
{
int pos1 = pos - 2;
int m = 0;
while(abstract[pos1 - m].find("(") == -1)
{
m++;
}
if(abstract[pos1 - m].find("NN") != -1)
{
flag2 = 1;//and connect two NN,we use VBP directly
andpos = pos;
}
}
pos++;
}
if(flag == 1 && abstract[loc] == "(VBZ")
{
sum+=1;//VBZWrong;
cout<<"location "<<NNSpos<<" have "<<abstract[NNSpos]<<"----"<<abstract[loc]

<<endl;
}

if(flag == 0)
{
if(flag2 == 1)
{
if(abstract[loc] == "(VBZ")
{
cout<<"location "<<andpos<<" have "<<abstract[andpos]<<"----"<<loc<<"

"<<abstract[loc+1]<<" "<<abstract[loc+2]<<endl;
sum += 1;//VBZWrong
}
}
else
{
bool flag3 = 0;
while(pos < loc)
{
if(abstract[pos] == "and)")
{
flag3 = 1;
andpos = pos;
break;
}
pos++;
}
if(flag3 == 1 && abstract[loc] == "(VBZ")
{
cout<<"location "<<andpos<<"have "<<abstract[pos]<<"----"<<loc<<"

"<<abstract[loc+1]<<" "<<abstract[loc+2]<<endl;
sum += 1;//VBZWrong
}
else if(flag3 == 0 && abstract[loc] == "(VBP")
{
cout<<"have no [and] [NNS] "<<"-----"<<loc<<" "<<abstract[loc+1]<<"

"<<abstract[loc+2]<<endl;
sum += 2;//VBPWrong
}
}
}
if(flag1 != 1 && abstract[article] != "(DT" && abstract[article] != "(CD")
{
cout<<"NP have no DT"<<endl;
sum += 3;//article wrong
}
return sum;
}

//找最长的NP
int findMaxNP(vector <string> &abstract,int pos)
{
int start=0;
while(pos >= 0 && abstract[pos].find(")") == -1)// -1 right?
{
pos--;
}
if(pos == 0)
{
return -1;//the VBZ||VBP is the first word in the abstract
}
else
{
start = 1;
pos--;
while(pos >= 0 && start != 0)
{
if(abstract[pos].find("(") != -1)
{
start--;
}
else if (abstract[pos].find(")") != -1)
{
start++;
}
pos--;
}
if(start == 0)
{
pos++;
return pos;
}
else
{
return -2;//( and ) not match
}
}
}

//字符串以空格分开,放到向量里面
vector<string> sepString(string str)
{
istringstream in(str);
vector<string> temp;
string word;
while(!in.eof())
{
in>>word;
if(word != "")
{
temp.push_back(word);
}
else
{
cout<<"separate string wrong"<<endl;
}
}
return temp;
}

int main()
{
string s;
// s="(TOP (S (NP (EX there) ) (VP (VBZ is) (NP (NP (DT a) (NN book) ) (PP (IN on) (NP

(DT the) (NN desk) ) ) ) ) (. .) ) ) ";
// s="(TOP (S (NP (PRP he) ) (VP (MD can) (VP (VB speak) (VP (VB english) ) ) ) ) )";
s="(TOP (S (NP (PRP$ my) (NN name) ) (VP (VBP are) (NP (NNS tom) ) ) ) )";

evaluate(s);
return 1;
}

ＮＰ：名词短语
ＶＰ：动词短语
ＰＰ：介词短语
ＣＣ：并列连词
ＣＤ：基数
ＤＴ：限定词
ＥＸ：存在
ＦＷ：外来词
ＩＮ：前置／从属连词
ＪＪ：形容词
ＪＪＲ：形容词比较级
ＪＪＳ：形容词最高级
ＬＳ：列表符号
ＭＤ：情态
ＮＮ：一个或多个名词
ＮＮ：Ｕ：物资名词
ＮＮ：ＵＮ：可作为物资名词的名词
ＮＮＰ：专有名词　单数
ＮＮＰＳ：专有名词复数
ＮＮＳ：名词复数
ＰＤＴ：前限定词
ＰＲＰ：人称代词
ＰＲＰ$：所有格代词
ＲＢ：副词
ＲＢＲ：副词比较级
ＲＢＳ：副词最高极
ＲＰ：语气词
ＳＹＭ：符号
ＴＯ： to
ＵＨ：感叹词
ＶＢ：动词原型
ＶＢＤ：动词过去时
ＶＢＧ：动词现在分词
ＶＢＮ：动词过去分词
ＶＢＰ：动词　非第三人称
ＶＢＺ：动词　　第三人称
ＷＤＴ：　wh- 限定词
ＷＰ： wh- 代名词
ＷＰ$： wh- 代名词所有格
ＷＲＢ： wh- 副词