英语语法检查工具C++

英语语法检查工具,主要检查英语中语谓一致,如第三人称,非第三人称,情态动词,如:should can may must,和there be等.
用句法分析工具首先词形标注,其实句法分析,结果如:(TOP (S (NP (PRP$ my) (NN name) ) (VP (VBP are) (NP (NNS tom) ) ) ) )
求高手帮小弟解决此程序


#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>

using namespace std;

//function declaration
int evaluate(string line);
vector<string> sepString(string str);
int countWrong(vector<string> &abstract,int pos,int loc);
int findMaxNP(vector <string> &abstract,int pos);

int evaluate(string line)
{
  int num = 0;
  //string line;
  vector <string> abstract;

  int k = 4;//for can may ?
  int M=0; //for may can ?
  int canWrong = 0;
  int VBZWrong = 0;
  int VBPWrong = 0;
  int articleWrong = 0;
   
  int VBZ = 0;
  int VBP = 0;
  int article = 0;//record num of the whole abstract记录全部抽象数

  num++; 
  if(line == "")
  {
cout<<"句子为空"<<endl;
  //continue;
  }
  // outFile<<"wrong of the abstract number:"<<num<<endl;
  abstract = sepString(line);//调用向量函数
  for(int i = 0; i < abstract.size();i++)
  {
  M = 0;//init
  if(abstract[i] =="(VBZ"){VBZ++;} 
  if(abstract[i] =="(VBP"){VBP++;}
  if(abstract[i] =="(DT"){article++;}
  if(abstract[i] == "can)" || abstract[i] == "may)")//用规则判断类拟的情态动情后面是否是原型
  {
  for(int j = 1;j < k;j++)
  {
  if(i+j < abstract.size())
  {
  if(abstract[i+j] != "(VB" && abstract[i+j] != "(RB")
  {
  M++;
  }
  else
  {
  break;
  }
  }
  } 
  if(M == 6)  
  {
  canWrong++;
  }
  }
  else if(abstract[i] == "(VBZ" || abstract[i] == "(VBP")
  {
  int locationMaxNP = 0;
  int wrongFlag = 0;
  locationMaxNP = findMaxNP(abstract,i);//
  if(locationMaxNP != -1 && locationMaxNP != -2)
  {
  if(abstract[locationMaxNP] == "(NP")
  {
  wrongFlag = countWrong(abstract,locationMaxNP,i);  
  if(wrongFlag == 1)
  {
  VBZWrong++;
  }  
  if(wrongFlag == 2)
  {
  VBPWrong++;
  }  
  if(wrongFlag == 3)
  {

  articleWrong++;  
  }  
  if(wrongFlag == 4)
  {

  VBZWrong++;
  articleWrong++;
  }
  if(wrongFlag == 5)
  {

  VBPWrong++;
  articleWrong++;
  }
  }
  else//to do next
  {
  if(abstract[locationMaxNP] == "(ADVP") 
  {
  locationMaxNP = findMaxNP(abstract,locationMaxNP);
  if(locationMaxNP != -1 && locationMaxNP != -2)
{
  if(abstract[locationMaxNP] == "(NP")
  {
  wrongFlag = countWrong(abstract,locationMaxNP,i);
  //////////
  if(wrongFlag == 1)
  {
  VBZWrong++;
  }
if(wrongFlag == 2)
  {

  VBPWrong++;
  }
if(wrongFlag == 3)
  {

  articleWrong++;
  }
  if(wrongFlag == 4)
  {

  VBZWrong++;
  articleWrong++;
  }
  if(wrongFlag == 5)
  {

  VBPWrong++;
  articleWrong++;
  }

  }
  else
  {
  continue;
  }
  }
  else
  {
  continue;
  }
  }
  else
  {
  continue;
  //maybe wrong of the parser
  }
  } 
  }
  else
  {
  continue;
  }
   
  }
  }
  //outFile<<endl;  
// outFile<<"wrong of the abstract number:"<<num<<endl;
  cout<<"VBZWrong:"<<VBZWrong<<endl;
  cout<<"VBPWrong:"<<VBPWrong<<endl;
  cout<<"articleWrong:"<<articleWrong<<endl;
  cout<<"canWrong:"<<canWrong<<endl;
  cout<<"VBZ"<<VBZ<<" "<<"VBP"<<VBP<<" "<<"article"<<article<<endl;

  return 1;
}

int countWrong(vector<string> &abstract,int pos,int loc)
{
  int sum = 0;
  int num = 0;
  int article = 0;
  while(abstract[pos] == "(NP")
  {
  pos++;
  }
  article = pos;
  num = 1;
  bool flag = 0;
  bool flag1 = 0;
  bool flag2 = 0;
  int NNSpos = 0;
  int andpos = 0;
  
  while(num != 0)
  {
  if(abstract[pos].find(")") != -1)
  { 
  num--;
  }
  if(abstract[pos].find("(") != -1)
  {
  num++;
  }
  if(abstract[pos] == "(NNS" )//&& abstract[loc] == "(VBZ")
  {
  flag = 1;
  NNSpos = pos;
  }
  if(abstract[pos] == "(PRP" || abstract[pos] == "(PRP$")
  {
  flag1 = 1;
  }
  if(abstract[pos] == "and)" )
  {
  int pos1 = pos - 2;
  int m = 0;
  while(abstract[pos1 - m].find("(") == -1)
  {
  m++;
  }
  if(abstract[pos1 - m].find("NN") != -1)
  {
  flag2 = 1;//and connect two NN,we use VBP directly
  andpos = pos;
  }
  }
  pos++;
  }
  if(flag == 1 && abstract[loc] == "(VBZ")
  {
  sum+=1;//VBZWrong;
  cout<<"location "<<NNSpos<<" have "<<abstract[NNSpos]<<"----"<<abstract[loc]

<<endl;
  }

  if(flag == 0)
  {
  if(flag2 == 1)
  {
  if(abstract[loc] == "(VBZ")
  {
  cout<<"location "<<andpos<<" have "<<abstract[andpos]<<"----"<<loc<<" 

"<<abstract[loc+1]<<" "<<abstract[loc+2]<<endl;
  sum += 1;//VBZWrong
  }
  }
  else
  {
  bool flag3 = 0;
  while(pos < loc)
  {
  if(abstract[pos] == "and)")
  {
  flag3 = 1;
  andpos = pos;
  break;
  }
  pos++;
  }
  if(flag3 == 1 && abstract[loc] == "(VBZ")
  {
  cout<<"location "<<andpos<<"have "<<abstract[pos]<<"----"<<loc<<" 

"<<abstract[loc+1]<<" "<<abstract[loc+2]<<endl;
  sum += 1;//VBZWrong
  }
  else if(flag3 == 0 && abstract[loc] == "(VBP")
  {
  cout<<"have no [and] [NNS] "<<"-----"<<loc<<" "<<abstract[loc+1]<<" 

"<<abstract[loc+2]<<endl;
  sum += 2;//VBPWrong
  }
  }  
  }
  if(flag1 != 1 && abstract[article] != "(DT" && abstract[article] != "(CD")
  {
  cout<<"NP have no DT"<<endl;
  sum += 3;//article wrong
  }
 return sum;
}


//找最长的NP
int findMaxNP(vector <string> &abstract,int pos)
{
  int start=0;
  while(pos >= 0 && abstract[pos].find(")") == -1)// -1 right?
  {
  pos--; 
  }
  if(pos == 0)
  { 
  return -1;//the VBZ||VBP is the first word in the abstract
  }
  else 
 {
  start = 1;
  pos--;
  while(pos >= 0 && start != 0)
  {
  if(abstract[pos].find("(") != -1)
  {
  start--;
  }
  else if (abstract[pos].find(")") != -1)
  {
  start++;
  }
  pos--;  
  }
  if(start == 0)
  {
  pos++;
  return pos;
  }
  else
  {
  return -2;//( and ) not match
  }
  }
}  

//字符串以空格分开,放到向量里面
vector<string> sepString(string str)
{
  istringstream in(str);
  vector<string> temp;
  string word;
  while(!in.eof())
  {
  in>>word;
  if(word != "")
  {
  temp.push_back(word);
  }
  else
  {
  cout<<"separate string wrong"<<endl;
  }
  }
return temp;
}


int main()
{
string s;
// s="(TOP (S (NP (EX there) ) (VP (VBZ is) (NP (NP (DT a) (NN book) ) (PP (IN on) (NP 

(DT the) (NN desk) ) ) ) ) (. .) ) ) ";
// s="(TOP (S (NP (PRP he) ) (VP (MD can) (VP (VB speak) (VP (VB english) ) ) ) ) )";
  s="(TOP (S (NP (PRP$ my) (NN name) ) (VP (VBP are) (NP (NNS tom) ) ) ) )";

evaluate(s);
return 1;
}

 

 


NP:名词短语
VP:动词短语
PP:介词短语
CC:并列连词
CD:基数
DT:限定词
EX:存在
FW:外来词
IN:前置/从属连词
JJ:形容词
JJR:形容词比较级
JJS:形容词最高级
LS:列表符号
MD:情态
NN:一个或多个名词
NN:U:物资名词
NN:UN:可作为物资名词的名词
NNP:专有名词 单数
NNPS:专有名词复数
NNS:名词复数
PDT:前限定词
PRP:人称代词
PRP$:所有格代词
RB:副词
RBR:副词比较级
RBS:副词最高极
RP:语气词
SYM:符号
TO: to
UH:感叹词
VB:动词原型
VBD:动词过去时
VBG:动词现在分词
VBN:动词过去分词
VBP:动词 非第三人称
VBZ:动词  第三人称
WDT: wh- 限定词
WP: wh- 代名词
WP$: wh- 代名词所有格
WRB: wh- 副词

http://topic.youkuaiyun.com/u/20080429/15/0214cf94-cfa2-49d4-bcb9-a06ca2715c0f.html#top

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值