英语语法检查工具,主要检查英语中语谓一致,如第三人称,非第三人称,情态动词,如:should can may must,和there be等.
用句法分析工具首先词形标注,其实句法分析,结果如:(TOP (S (NP (PRP$ my) (NN name) ) (VP (VBP are) (NP (NNS tom) ) ) ) )
求高手帮小弟解决此程序
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
using namespace std;
//function declaration
int evaluate(string line);
vector<string> sepString(string str);
int countWrong(vector<string> &abstract,int pos,int loc);
int findMaxNP(vector <string> &abstract,int pos);
int evaluate(string line)
{
int num = 0;
//string line;
vector <string> abstract;
int k = 4;//for can may ?
int M=0; //for may can ?
int canWrong = 0;
int VBZWrong = 0;
int VBPWrong = 0;
int articleWrong = 0;
int VBZ = 0;
int VBP = 0;
int article = 0;//record num of the whole abstract记录全部抽象数
num++;
if(line == "")
{
cout<<"句子为空"<<endl;
//continue;
}
// outFile<<"wrong of the abstract number:"<<num<<endl;
abstract = sepString(line);//调用向量函数
for(int i = 0; i < abstract.size();i++)
{
M = 0;//init
if(abstract[i] =="(VBZ"){VBZ++;}
if(abstract[i] =="(VBP"){VBP++;}
if(abstract[i] =="(DT"){article++;}
if(abstract[i] == "can)" || abstract[i] == "may)")//用规则判断类拟的情态动情后面是否是原型
{
for(int j = 1;j < k;j++)
{
if(i+j < abstract.size())
{
if(abstract[i+j] != "(VB" && abstract[i+j] != "(RB")
{
M++;
}
else
{
break;
}
}
}
if(M == 6)
{
canWrong++;
}
}
else if(abstract[i] == "(VBZ" || abstract[i] == "(VBP")
{
int locationMaxNP = 0;
int wrongFlag = 0;
locationMaxNP = findMaxNP(abstract,i);//
if(locationMaxNP != -1 && locationMaxNP != -2)
{
if(abstract[locationMaxNP] == "(NP")
{
wrongFlag = countWrong(abstract,locationMaxNP,i);
if(wrongFlag == 1)
{
VBZWrong++;
}
if(wrongFlag == 2)
{
VBPWrong++;
}
if(wrongFlag == 3)
{
articleWrong++;
}
if(wrongFlag == 4)
{
VBZWrong++;
articleWrong++;
}
if(wrongFlag == 5)
{
VBPWrong++;
articleWrong++;
}
}
else//to do next
{
if(abstract[locationMaxNP] == "(ADVP")
{
locationMaxNP = findMaxNP(abstract,locationMaxNP);
if(locationMaxNP != -1 && locationMaxNP != -2)
{
if(abstract[locationMaxNP] == "(NP")
{
wrongFlag = countWrong(abstract,locationMaxNP,i);
//////////
if(wrongFlag == 1)
{
VBZWrong++;
}
if(wrongFlag == 2)
{
VBPWrong++;
}
if(wrongFlag == 3)
{
articleWrong++;
}
if(wrongFlag == 4)
{
VBZWrong++;
articleWrong++;
}
if(wrongFlag == 5)
{
VBPWrong++;
articleWrong++;
}
}
else
{
continue;
}
}
else
{
continue;
}
}
else
{
continue;
//maybe wrong of the parser
}
}
}
else
{
continue;
}
}
}
//outFile<<endl;
// outFile<<"wrong of the abstract number:"<<num<<endl;
cout<<"VBZWrong:"<<VBZWrong<<endl;
cout<<"VBPWrong:"<<VBPWrong<<endl;
cout<<"articleWrong:"<<articleWrong<<endl;
cout<<"canWrong:"<<canWrong<<endl;
cout<<"VBZ"<<VBZ<<" "<<"VBP"<<VBP<<" "<<"article"<<article<<endl;
return 1;
}
int countWrong(vector<string> &abstract,int pos,int loc)
{
int sum = 0;
int num = 0;
int article = 0;
while(abstract[pos] == "(NP")
{
pos++;
}
article = pos;
num = 1;
bool flag = 0;
bool flag1 = 0;
bool flag2 = 0;
int NNSpos = 0;
int andpos = 0;
while(num != 0)
{
if(abstract[pos].find(")") != -1)
{
num--;
}
if(abstract[pos].find("(") != -1)
{
num++;
}
if(abstract[pos] == "(NNS" )//&& abstract[loc] == "(VBZ")
{
flag = 1;
NNSpos = pos;
}
if(abstract[pos] == "(PRP" || abstract[pos] == "(PRP$")
{
flag1 = 1;
}
if(abstract[pos] == "and)" )
{
int pos1 = pos - 2;
int m = 0;
while(abstract[pos1 - m].find("(") == -1)
{
m++;
}
if(abstract[pos1 - m].find("NN") != -1)
{
flag2 = 1;//and connect two NN,we use VBP directly
andpos = pos;
}
}
pos++;
}
if(flag == 1 && abstract[loc] == "(VBZ")
{
sum+=1;//VBZWrong;
cout<<"location "<<NNSpos<<" have "<<abstract[NNSpos]<<"----"<<abstract[loc]
<<endl;
}
if(flag == 0)
{
if(flag2 == 1)
{
if(abstract[loc] == "(VBZ")
{
cout<<"location "<<andpos<<" have "<<abstract[andpos]<<"----"<<loc<<"
"<<abstract[loc+1]<<" "<<abstract[loc+2]<<endl;
sum += 1;//VBZWrong
}
}
else
{
bool flag3 = 0;
while(pos < loc)
{
if(abstract[pos] == "and)")
{
flag3 = 1;
andpos = pos;
break;
}
pos++;
}
if(flag3 == 1 && abstract[loc] == "(VBZ")
{
cout<<"location "<<andpos<<"have "<<abstract[pos]<<"----"<<loc<<"
"<<abstract[loc+1]<<" "<<abstract[loc+2]<<endl;
sum += 1;//VBZWrong
}
else if(flag3 == 0 && abstract[loc] == "(VBP")
{
cout<<"have no [and] [NNS] "<<"-----"<<loc<<" "<<abstract[loc+1]<<"
"<<abstract[loc+2]<<endl;
sum += 2;//VBPWrong
}
}
}
if(flag1 != 1 && abstract[article] != "(DT" && abstract[article] != "(CD")
{
cout<<"NP have no DT"<<endl;
sum += 3;//article wrong
}
return sum;
}
//找最长的NP
int findMaxNP(vector <string> &abstract,int pos)
{
int start=0;
while(pos >= 0 && abstract[pos].find(")") == -1)// -1 right?
{
pos--;
}
if(pos == 0)
{
return -1;//the VBZ||VBP is the first word in the abstract
}
else
{
start = 1;
pos--;
while(pos >= 0 && start != 0)
{
if(abstract[pos].find("(") != -1)
{
start--;
}
else if (abstract[pos].find(")") != -1)
{
start++;
}
pos--;
}
if(start == 0)
{
pos++;
return pos;
}
else
{
return -2;//( and ) not match
}
}
}
//字符串以空格分开,放到向量里面
vector<string> sepString(string str)
{
istringstream in(str);
vector<string> temp;
string word;
while(!in.eof())
{
in>>word;
if(word != "")
{
temp.push_back(word);
}
else
{
cout<<"separate string wrong"<<endl;
}
}
return temp;
}
int main()
{
string s;
// s="(TOP (S (NP (EX there) ) (VP (VBZ is) (NP (NP (DT a) (NN book) ) (PP (IN on) (NP
(DT the) (NN desk) ) ) ) ) (. .) ) ) ";
// s="(TOP (S (NP (PRP he) ) (VP (MD can) (VP (VB speak) (VP (VB english) ) ) ) ) )";
s="(TOP (S (NP (PRP$ my) (NN name) ) (VP (VBP are) (NP (NNS tom) ) ) ) )";
evaluate(s);
return 1;
}
NP:名词短语
VP:动词短语
PP:介词短语
CC:并列连词
CD:基数
DT:限定词
EX:存在
FW:外来词
IN:前置/从属连词
JJ:形容词
JJR:形容词比较级
JJS:形容词最高级
LS:列表符号
MD:情态
NN:一个或多个名词
NN:U:物资名词
NN:UN:可作为物资名词的名词
NNP:专有名词 单数
NNPS:专有名词复数
NNS:名词复数
PDT:前限定词
PRP:人称代词
PRP$:所有格代词
RB:副词
RBR:副词比较级
RBS:副词最高极
RP:语气词
SYM:符号
TO: to
UH:感叹词
VB:动词原型
VBD:动词过去时
VBG:动词现在分词
VBN:动词过去分词
VBP:动词 非第三人称
VBZ:动词 第三人称
WDT: wh- 限定词
WP: wh- 代名词
WP$: wh- 代名词所有格
WRB: wh- 副词
http://topic.youkuaiyun.com/u/20080429/15/0214cf94-cfa2-49d4-bcb9-a06ca2715c0f.html#top