一个简单的C语言词法分析与语法分析器【原】


词法分析
可识别内容:
标识符:id
数字:num
关键字:int,char,if,else,while,do,for
标号:, , . , ;
算术运算符号:=,+,-,*,/,&,!,|,&&,||
关系运算符:<,<=,>=,>,==,!=
注释://

内码定义:
单个符号,如{,+,*,> 等,均使用其ascii码做内码,占双或多个字节的符号(包括保留字,标号,数字,运算符等)为其取名如下:
Enum { END=0,INT,CHAR,IF,ELSE,WHILE=5,
DO,FOR,ARGAND,ARGOR,NUM=10,
ID,LESSEQUAL,EQUAL,GREATEQUAL,NOTEQUAL=15 };
其中NUM代表数字,ID代表标识符.

测试程序1-1的词法分析结果如下:

内码表
123{
11X
61=
1012
43+
43+
11b
47/
1013
45-
105
42*
104
42*
109
59;
11Y
61=
104
42*
101024
59;
3if
40(
11X
14>=
11Y
41)
123{
3if40(
11i
13==
11i
41)
123{
11X
61=
11Y
125}
125}
59;
59;
59;
59;
5while
40(
11X
60<
11Y
41)
123{
11X
61=
11X
43+
101
59;
125}
125}


语法分析
C语言子集,可支持
语句块,语句,条件语句,While循环语句,赋值语句,基本算术表达式等。例如:
{
// Comment Supported : This is only a Test ^_^

X = 12 + b / 13 - 5 * 4 * 9;// A AssignmentStatement
Y = 4 * 1024;
if( X >= Y){
if( i == i){// This is nested if Statement
X=Y;
}
}
;;;;// This is Null Statement
while( X < Y){// This is while Statement
X = X +1;
}
}
测试程序1-1

支持错误检测,如将上面例子中X = 12 + b / 13 - 5 * 4 * 9;
故意修改为:X = 12 ++ b / 13 - 5 * 4 * 9; 则会出现如下错误提示,指示了出错行数和行内偏移位置:

规则如下:
<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<NullStatement> ::= ';'
<CommonStatement> ::= <AssignmentStatement>
<VariantStatement> ::= <ConditionStatement>| <LoopWhileStatement>

<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=

//非终结符的英文定义
void StatementBlock();//语句块
void StatementSequence();//语句串

// XxxxxStatement() 为三类语句
void NullStatement();//空语句--仅仅含有一个;号
void CommonStatement();//语句
void VariantStatement();//变种语句--包括 if(){},while{},他们都不以;结尾

// 下面的属于CommonStatement
void AssignmentStatement();//赋值语句

// 下面两种属于VariantStatement
void ConditionStatement();//条件语句
void LoopWhileStatement();//while循环语句

void Condition();//条件
void Expression();//表达式
void Item();//项
void Factor();//因子
void RelationOperator();//关系运算符


不能支持的主要方面:函数调用的识别,逗号表达式,for循环,switch语句。


词法分析:

// LexAly.cpp:C子集词法分析程序
/**/ /*

支持内容:
标识符:id
关键字:int,char,if,else,while,do,for
标号:,,.,;
算术运算符号:=,+,-,&,!,|,&&,||

全局字符串:
instr记录待解析的字符串
string存贮当前被解析到的ID

接口:
gettoken();



Sample:
输入:
instr=
for(i=0;i<10;i++){
j=i+10;
printf("%d",j);
}
输出:
for
(
i
……
}


注意:
要记得处理程序中未显示出来的符号,如空白(''),回车(' '),值表符(' ')
*/


#include
" stdafx.h "
#include
< ctype.h >
#include
< stdlib.h >
#include
< string.h >
#include
" Constant.h "

extern
void grammar_check();


// 注意:这里累计数目(最大值)不能大于32(100000B)TOKEN
// enum{END=0,INT=1,CHAR,IF,ELSE,WHILE,DO,FOR,ARGAND,ARGOR,NUM,ID};

char index[][ 20 ] = ... {
...{"ENDOFFile"},/**//*0END*/
...{"int"},/**//*1INT*/
...{"char"},/**//*2CHAR*/
...{"if"},/**//*3IF*/
...{"else"},/**//*4ELSE*/
...{"while"},/**//*5WHILE*/
...{"do"},/**//*6DO*/
...{"for"},/**//*7FOR*/
...{"&&"},/**//*8ARGAND*/
...{"||"},/**//*9ARGOR*/
...{""},/**//*10NUM*/
...{""},/**//*11ID*/
...{"<="},/**//*12LESSEQUAL*/
...{"=="},/**//*13EQUAL*/
...{">="},/**//*14GREATEQUAL*/
...{"!="},/**//*15NOTEQUAL*/
...{""}/**//*16ID*/
}
;

char input[ 10000 ] = ... {0} ;

char * instr = input;
char * const start_of_instr = input;

// string包含gettoken最新得到的id等串
// gym包含gettoken得到的内容的代号
// current_line包含当前行号
char string[MAX_INDENT];
int sym;
int current_line = 1 ;
int start_pos_of_current_line;

char * strstart; // 用于辅助识别num,id

int gettoken();
int _gettoken();

void error( char * cur);
char * getlinestring( int line, char * in_buf);
int nextline();
int getline();
int getcurrentpos();


int nextline() ... {return++current_line;}
int getline() ... {returncurrent_line;}
int getcurrentpos() ... {return(int)instr;}

char * getlinestring( int line, char * in_buf)
... {
char*t=input;
inti=1;

while(*t!=0&&i<line)...{
if(*t==' ')i++;
t
++;
}


intlen=0;
while(*t!=' ')...{
in_buf[len]
=*t;
len
++;
t
++;
}

in_buf[len]
=0;
returnin_buf;

}


void error( char * cur)
... {
printf(
"SpellErrorfoundatline%d ",getline());
exit(
0);

}



// 语法分析
int main_grammar( char * filename)
... {
inti;

FILE
*f;
if(!(f=fopen(filename,"r")))...{
printf(
"Failtoopensourcefile%s! ",filename);
exit(
0);
}

intk=0;
charc;
while((c=fgetc(f))!=EOF)
...{
input[k]
=c;
k
++;
}

input[k]
=0;

//打印出程序
printf("%s ",start_of_instr);

//开始语法检查
grammar_check();

printf(
"Success! ");
return0;
}


// 词法分析
int main_spell( char * filename)
... {
inti;

FILE
*f;
if(!(f=fopen(filename,"r")))...{
printf(
"Failtoopensourcefile%s! ",filename);
exit(
0);
}




intk=0;
charc;
while((c=fgetc(f))!=EOF)
...{
input[k]
=c;
k
++;
}


input[k]
=0;

printf(
"%s ",start_of_instr);


while((i=gettoken())!=END)
...{
if(i==ID)...{
printf(
"%d %s ",i,string);
continue;
}

if(i==NUM)...{
printf(
"%d %s ",i,string);
continue;
}


if(i<20)...{
printf(
"%d %s ",i,index[i]);
}
else...{
printf(
"%d %c ",i,i);
}

}


return0;
}


int gettoken()
... {
inti=(sym=_gettoken());

#
if0
if(i==ID)...{
printf(
"%s",string);
}

if(i==NUM)...{
printf(
"%s",string);
}


if(i<20)...{
printf(
"%s",index[i]);
}
else...{
printf(
"%c",i);
}

#endif
returnsym;
}



int _gettoken()
... {
char*cp=instr;

for(;;)...{
if(*instr==0)
returnEND;

/**//*
if(可能读入的字符>当前可用缓冲区大小)
扩展缓冲区
*/

//int,char,if,else,while,do,for
switch(*instr)
...{
case'i':
if(instr[1]=='f'&&notda(instr[2]))
...{
instr
+=2;returnIF;
}

if(instr[1]=='n'&&instr[2]=='t'&&notda(instr[3]))
...{
instr
+=3;returnINT;
}

//notakeyword.butanid.
strstart=instr;
instr
++;
gotoid_label;
case'c':
if(instr[1]=='h'&&instr[2]=='a'&&instr[3]=='r'&&notda(instr[4]))
...{instr+=4;returnCHAR;}
strstart
=instr;
instr
++;
gotoid_label;
break;
case'e':
if(instr[1]=='l'&&instr[2]=='s'&&instr[3]=='e'&&notda(instr[4]))
...{instr+=4;returnELSE;}
strstart
=instr;
instr
++;
gotoid_label;
break;
case'w':
if(instr[1]=='h'&&instr[2]=='i'&&instr[3]=='l'&&instr[4]=='e'&&notda(instr[5]))
...{instr+=5;returnWHILE;}
strstart
=instr;
instr
++;
gotoid_label;
case'd':
if(instr[1]=='o'&&notda(instr[4]))
...{instr+=2;returnDO;}
strstart
=instr;
instr
++;
gotoid_label;
case'f':
if(instr[1]=='o'&&instr[2]=='r'&&notda(instr[3]))
...{instr+=3;returnFOR;}
strstart
=instr;
instr
++;
gotoid_label;
//dealwithIDs.
//EXCLUDE:i,c,d,e,w,f
case'a':;case'b':;
case'g':;case'h':;
case'j':;case'k':;case'l':;
case'm':;case'n':;case'o':;
case'p':;case'q':;case'r':;
case's':;case't':;case'u':;
case'v':;case'x':;
case'y':;case'z':;
case'A':;case'B':;
case'C':;case'D':;case'E':;
case'F':;case'G':;case'H':;
case'I':;case'J':;case'K':;
case'L':;case'M':;case'N':;
case'O':;case'P':;case'Q':;
case'R':;case'S':;case'T':;
case'U':;case'V':;case'W':;
case'X':;case'Y':;case'Z':;
strstart
=instr;
instr
++;
gotoid_label;

case'0':;
case'1':;case'2':;case'3':;
case'4':;case'5':;case'6':;
case'7':;case'8':;case'9':;
strstart
=instr;
instr
++;
gotonum_label;

case'{':
instr
++;
return'{';
case'}':
instr
++;
return'}';
case'(':
instr
++;
return'(';
case')':
instr
++;
return')';
case'+':
instr
++;
return'+';
case'-':
instr
++;
return'-';
case'*':
instr
++;
return'*';
case'/':
if(instr[1]=='/')...{////’形式的注释
instr+=2;
while(*(instr)!=10&&*(instr)!=0)
instr
++;
//instr++;
}
else...{//除号'/'
instr++;
return'/';
}

break;
case'=':
if(instr[1]=='=')...{instr+=2;returnEQUAL;}
else...{instr++;return'=';}
break;
case'<':
if(instr[1]=='=')...{instr+=2;returnLESSEQUAL;}
else...{instr++;return'<';}
break;
case'>':
if(instr[1]=='=')...{instr+=2;returnGREATEQUAL;}
else...{instr++;return'>';}
break;
case'!':
if(instr[1]=='=')...{instr+=2;returnNOTEQUAL;}
else...{instr++;return'!';}
break;

case'&':
if(instr[1]=='&')...{instr+=2;returnARGAND;}
if(instr[1]=='&'&&(isid(instr[2])||isspace(instr[2])))...{instr++;return'&';}
error(instr);
break;

case'|':
if(instr[1]=='|')...{instr+=2;returnARGAND;}
if(instr[1]=='|'&&(isid(instr[2])||isspace(instr[2])))...{instr++;return'|';}
error(instr);
break;

case';':
instr
++;
return';';
case' ':
//printf("newline(%d) ",getline());
nextline();
instr
++;
start_pos_of_current_line
=(int)instr;
break;
default:
instr
++;
break;
id_label:
while(isid(*instr))
instr
++;
strncpy(string,strstart,instr
-strstart);
string[instr
-strstart]=0;
returnID;
num_label:
while(isdigit(*instr))
instr
++;

//if(isalpha(*(instr+1))
//error(instr);让语法分析来做吧~

strncpy(string,strstart,instr
-strstart);
string[instr
-strstart]=0;
returnNUM;
}




}

}



int main( int argc, char * argv[])
... {
if(argc<=1||argc>=4)...{
printf(
"Usage:>LexAly[g|s][filename] ");
exit(
0);
}


if(argc==3)...{
argv[
1][0]=='g'?main_grammar(argv[2]):main_spell(argv[2]);
}
elseif(argc==2)...{
argv[
1][0]=='g'?main_grammar("source2.txt"):main_spell("source2.txt");
;
}

return0;
}


// grammar.cpp:C子集语法分析程序
/**/ /*
C语言子集,可支持
语句块,语句,条件语句,While循环语句,赋值语句,基本算术表达式等。例如:
{
//CommentSupported:ThisisonlyaTest^_^

X=12+b/13-5*4*9;//AAssignmentStatement
Y=4*1024;

if(X>=Y){
if(i==i){//ThisisnestedifStatement
X=Y;
}
}

;;;;//ThisisNullStatement

while(X<Y){//ThisiswhileStatement
X=X+1;
}
}

规则如下:

<StatementBlock>::='{'<StatementSequence>'}'
<StatementSequence>::={<NullStatement>|<CommonStatement>|<VariantStatement>}

<NullStatement>::=';'
<CommonStatement>::=<AssignmentStatement>
<VariantStatement>::=<ConditionStatement>|<LoopWhileStatement>

<AssignmentStatement>::=ID=<Expression>
<ConditionStatement>::=if(<Condition><StatementBlock>
<LoopWhileStatement>::=while(<Condition><StatementBlock>

<Condition>::=<Expression><RelationOperator><Expression>
<Expression>::=<Item>{+<Item>|-<Item>}
<Item>::=<Factor>{*<Factor>|/<Factor>}
<Factor>::=ID|NUM|(<Expression>)
<RelationOperator>::=<|<=|>=|>|==|!=

*/




#include
" stdafx.h "
#include
< ctype.h >
#include
< conio.h >
#include
< stdlib.h >
#include
< string.h >

#include
" Constant.h "

extern
int gettoken();
extern
int getcurrentpos();
extern
char * string;
extern
int sym;
extern
int current_line;
extern
int start_pos_of_current_line;
extern
char * getlinestring( int line, char * in_buf);
extern
char input[];

// 非终结符的英文定义

void StatementBlock(); // 语句块
void StatementSequence(); // 语句串

// XxxxxStatement()为三类语句
void NullStatement(); // 空语句--仅仅含有一个;号
void CommonStatement(); // 语句
void VariantStatement(); // 变种语句--包括if(){},while{},他们都不以;结尾

// 下面的属于CommonStatement
void AssignmentStatement(); // 赋值语句

// 下面两种属于VariantStatement
void ConditionStatement(); // 条件语句
void LoopWhileStatement(); // while循环语句

void Condition(); // 条件
void Expression(); // 表达式
void Item(); //
void Factor(); // 因子
void RelationOperator(); // 关系运算符

/**/ /*
注:以上未考虑函数调用表达式
*/



void match_error( char * c)
... {
charerror_buf[1024];
intin_line_pos=getcurrentpos()-start_pos_of_current_line;

printf(
"GrammarError! ");
printf(
"Line%d[%d]:%sexpected ",current_line,in_line_pos,c);

//获取错误行并打印出来
getlinestring(current_line,error_buf);
printf(
"%s ",error_buf);
//输出错误指示点(NotExact!)
for(inti=1;i<in_line_pos;i++)
printf(
"%c",'^');
printf(
" ");

exit(
0);
}


// expecetedSym期望符号
// msg出错时给出的消息
void match( int expecetedSym, char * msg)
... {
if(expecetedSym!=sym)...{
#
if0
if(sym<0x20)
printf(
" Fail:ExpecetedSym=%dsym=%d ",expecetedSym,sym);
else
printf(
" Fail:ExpecetedSym=%dsym=%c ",expecetedSym,sym);
#endif
match_error(msg);
}

gettoken();
//预读一个符号

}

void grammar_check()
... {

//printf("%s",input);
gettoken();//开始检查,填充预读区
//match('-',"DK");
StatementBlock();
if(sym!=END)match(END,"EndOfFile");
}


void StatementBlock() // 语句块
... {
match(
'{',"{");//和预读符号比较
StatementSequence();
match(
'}',"}");
}


void StatementSequence() // 语句串
... {

while(sym==ID||
sym
==IF||
sym
==WHILE||
sym
==';')
...{
while(sym==ID)//也可以用if(),但从统计角度看,while效率会略高。
//因为一般普通CommonStatement()出现概率较大
...{
CommonStatement();
match(
';',";");
}

while(sym==IF||
sym
==WHILE)
...{
VariantStatement();
}

while(sym==';')
...{
NullStatement();
}

}

}


void VariantStatement() // 变种语句--包括if(){},while{},他们都不以;结尾
... {
switch(sym)...{//若sym与下面两种均不匹配,什么也不做,空语句是也~
caseIF:
ConditionStatement();
//条件语句
break;
caseWHILE:
LoopWhileStatement();
break;
}

return;
}


void NullStatement() // 空语句--仅仅含有一个;号
... {
match(
';',";");
}


void CommonStatement() // 语句,以;结尾,但不以;开头
... {
switch(sym)...{//若sym与下面任何一种均不匹配,什么也不做,空语句是也~
caseID:
AssignmentStatement();
break;
}

return;
}


void AssignmentStatement() // 赋值语句
... {
match(ID,
"ID");
match(
'=',"=");
Expression();
}


void ConditionStatement() // 条件语句
... {
match(IF,
"if");
match(
'(',"(");
Condition();
match(
')',")");
StatementBlock();
}


void LoopWhileStatement() // 循环语句
... {
match(WHILE,
"while");
match(
'(',"(");
Condition();
match(
')',")");
StatementBlock();
}


void Condition() // 条件
... {
Expression();
RelationOperator();
Expression();
}


// <Expression>::=<Item>{+<Item>|-<Item>}
void Expression() // 表达式
... {
Item();
while(true)...{//{+<Item>|-<Item>}可以有多个
switch(sym)...{
case'+':
match(
'+',"+");
Item();
break;
case'-':
match(
'-',"-");
Item();
break;
default:
return;
}

}

return;

}

// <Item>::=<Factor>{*<Factor>|/<Factor>}
void Item() //
... {
Factor();
while(true)...{//{*<Factor>|/<Factor>}--可以有多个
switch(sym)...{
case'*':
match(
'*',"*");
Factor();
break;
case'/':
match(
'/',"/");
Factor();
break;
default:
return;
}

}

return;
}

// <Factor>::=ID|NUM|(<Expression>)
void Factor() // 因子
... {
switch(sym)...{
caseID:
match(ID,
"ID");
break;
caseNUM:
match(NUM,
"NUM");
break;
case'(':
match(
'(',"(");
Expression();
match(
')',")");
break;
default:
match(ID,
"Afactor");//ID在这里肯定不match,利用它来报错(找不到因子)
break;
}

}

// <RelationOperator>::=<|<=|>=|>|==|!=
void RelationOperator() // 关系运算符
... {
switch(sym)...{
case'<':
match(
'<',"<");
break;
case'>':
match(
'>',">");
break;
caseLESSEQUAL:
match(LESSEQUAL,
"<=");
break;
caseGREATEQUAL:
match(GREATEQUAL,
">=");
break;
caseEQUAL:
match(EQUAL,
"==");
break;
caseNOTEQUAL:
match(NOTEQUAL,
"!=");
break;
}

}

核心:规则----

<StatementBlock> ::= '{'<StatementSequence>'}'
<StatementSequence> ::= {<NullStatement>|<CommonStatement>|<VariantStatement>}

<NullStatement> ::= ';'
<CommonStatement> ::= <AssignmentStatement>
<VariantStatement> ::= <ConditionStatement>| <LoopWhileStatement>

<AssignmentStatement> ::= ID=<Expression>
<ConditionStatement> ::= if(<Condition> <StatementBlock>
<LoopWhileStatement> ::= while(<Condition> <StatementBlock>

<Condition> ::= <Expression><RelationOperator><Expression>
<Expression> ::= <Item>{+<Item>|-<Item>}
<Item> ::= <Factor>{*<Factor>|/<Factor>}
<Factor> ::= ID|NUM|(<Expression>)
<RelationOperator> ::= <|<=|>=|>|==|!=



休息一下:


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值