编译原理词法分析器C源代码

C语言词法分析器
本文介绍了一个使用C语言编写的简单词法分析器程序,该程序能够从输入文件中读取代码,并将代码分解为一系列有意义的符号,如关键字、标识符、数字等。这些符号随后被写入到输出文件中。
 #include <stdio.h>

#define MAX 500  /*标识符的最大长度*/

main()
{
 FILE *in,*out;

 char word[MAX];   /*存储标识符*/
 char cp;    /*存储当前读入字符*/
 int i;

 if((in=fopen("in.txt","r"))==NULL)
 {
  printf("不能打开文档in.txt,请检查根目录下是否存在该文档\n");
  exit(0);
 }
 else
 {
  printf("成功打开文档in.txt\n");
 }

 if((out=fopen("out.txt","w"))==NULL)
 {
  printf("不能打开文档out.txt,请检查根目录下是否存在该文档\n");
  exit(0);
 }
 else
 {
  printf("成功打开文档out.txt\n");
 }

 cp=fgetc(in);
 while(cp!=EOF)
 {
  /*消耗掉空格,制表符,换行符*/
  while(cp==' '||cp=='\t'||cp=='\n')
  {
   cp=fgetc(in);
  }

  /*cp数组复位*/
  i=0;

  /*数字检测*/
  if(cp>='0'&&cp<='9')
  {
   word[i++]=cp;
   cp=fgetc(in);
   while(cp>='0'&&cp<='9')
   {
    word[i++]=cp;
    cp=fgetc(in);
   }
   if(cp==' '||cp=='\t'||cp=='\n'||cp=='='||cp=='+'||cp=='-'||cp=='*'||cp=='/'||cp=='>'||cp=='<'||cp=='!'||cp==';'||cp==':'||cp==','||cp=='{'||cp=='}'||cp=='['||cp==']'||cp=='('||cp==')')
   {
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',28,')');
   }
   else
   {
    while(cp!=' '&&cp!='\t'&&cp!='\n'&&cp!='='&&cp!='+'&&cp!='-'&&cp!='*'&&cp!='/'&&cp!='>'&&cp!='<'&&cp!='!'&&cp!=';'&&cp!=':'&&cp!=','&&cp!='{'&&cp!='}'&&cp!='['&&cp!=']'&&cp!='('&&cp!=')')
    {
     word[i++]=cp;
     cp=fgetc(in);
    }
    word[i]='\0';
    fprintf(out,"%s%c%s%c%s\n","error: ",'"',word,'"',"不是合法的标识符");
   }
   continue;
  }

  /*字符串检测*/
  if((cp>='a'&&cp<='z')||(cp>='A'&&cp<='Z')||cp=='_')
  {
   while((cp>='a'&&cp<='z')||(cp>='0'&&cp<='9')||(cp>='A'&&cp<='Z')||cp=='_')
   {
    word[i++]=cp;
    cp=fgetc(in);
   }


   if(cp==' '||cp=='\t'||cp=='\n'||cp=='='||cp=='+'||cp=='-'||cp=='*'||cp=='/'||cp=='>'||cp=='<'||cp=='!'||cp==';'||cp==':'||cp==','||cp=='{'||cp=='}'||cp=='['||cp==']'||cp=='('||cp==')')
   {
    word[i]='\0';
    /*基本保留字检测*/
    if(strcmp(word,"main")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',1,')');
    else if(strcmp(word,"void")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',2,')');
    else if(strcmp(word,"int")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',3,')');
    else if(strcmp(word,"float")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',4,')');
    else if(strcmp(word,"double")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',5,')');
    else if(strcmp(word,"char")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',6,')');
    else if(strcmp(word,"struct")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',7,')');
      else if(strcmp(word,"const")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',8,')');
    else if(strcmp(word,"extern")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',9,')');
    else if(strcmp(word,"register")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',10,')');
    else if(strcmp(word,"static")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',11,')');
    else if(strcmp(word,"if")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',12,')');
    else if(strcmp(word,"else")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',13,')');
    else if(strcmp(word,"switch")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',14,')');
     else if(strcmp(word,"case")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',15,')');
    else if(strcmp(word,"for")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',16,')');
    else if(strcmp(word,"do")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',17,')');
    else if(strcmp(word,"while")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',18,')');
    else if(strcmp(word,"goto")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',19,')');
    else if(strcmp(word,"continue")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',20,')');
    else if(strcmp(word,"break")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',21,')');
      else if(strcmp(word,"default")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',22,')');
    else if(strcmp(word,"sizeof")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',23,')');
    else if(strcmp(word,"return")==0)
     fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',24,')');

    else
     fprintf(out,"自定义标识符:%c%c%s%c,%d%c\n",'(','"',word,'"',29,')');
   }

   /*排错处理(只能以字母、数字、下划线构成)*/
   else
   {
     while(cp!=' '&&cp!='\t'&&cp!='\n'&&cp!='='&&cp!='+'&&cp!='-'&&cp!='*'&&cp!='/'&&cp!='>'&&cp!='<'&&cp!='!'&&cp!=';'&&cp!=':'&&cp!=','&&cp!='{'&&cp!='}'&&cp!='['&&cp!=']'&&cp!='('&&cp!=')')
     {
      word[i++]=cp;
      cp=fgetc(in);
     }
     word[i]='\0';
     fprintf(out,"%s%c%s%c%s\n","error: ",'"',word,'"',"不是合法的标识符");
   }
   continue;
  }

  /*运算符*/
  if(cp=='+')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',31,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='-')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',32,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='*')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',33,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='/')
  {
       word[i++]=cp;

        cp=fgetc(in);
     if(cp=='*'){word[--i]='\0';while(cp!='/'){cp=fgetc(in);}cp=fgetc(in);}

     else
         fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',34,')');
             word[i]='\0';
   continue;
  }
  if(cp=='=')
  {
   word[i++]=cp;
   cp=fgetc(in);
   if(cp=='=')
   {
    word[i++]=cp;
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',39,')');
    cp=fgetc(in);
   }
   else
   {
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',27,')');
   }
   continue;
  }
  if(cp=='<')
  {
   word[i++]=cp;
   cp=fgetc(in);
   if(cp=='=')
   {
    word[i++]=cp;
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',36,')');
    cp=fgetc(in);
   }
   else
   {
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',35,')');
   }
   continue;
  }
  if(cp=='>')
  {
   word[i++]=cp;
   cp=fgetc(in);
   if(cp=='=')
   {
    word[i++]=cp;
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',38,')');
    cp=fgetc(in);
   }
   else
   {
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',37,')');
   }
   continue;
  }
  if(cp=='!')
  {
   word[i++]=cp;
   cp=fgetc(in);
   if(cp=='=')
   {
    word[i++]=cp;
    word[i]='\0';
    fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',40,')');
    cp=fgetc(in);
   }
   else
   {
    word[i]='\0';
    fprintf(out,"%s%c%s%c%s\n","error: ",'"',word,'"',"不是合法的标识符");
   }
   continue;
  }
  /*界符*/
  if(cp==';')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',41,')');
   cp=fgetc(in);
   continue;
  }
  if(cp==':')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',42,')');
   cp=fgetc(in);
   continue;
  }
  if(cp==',')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',43,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='{')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',44,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='}')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',45,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='[')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',46,')');
   cp=fgetc(in);
   continue;
  }
  if(cp==']')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',47,')');
   cp=fgetc(in);
   continue;
  }
  if(cp=='(')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',48,')');
   cp=fgetc(in);
   continue;
  }
  if(cp==')')
  {
   word[i++]=cp;
   word[i]='\0';
   fprintf(out,"%c%c%s%c,%d%c\n",'(','"',word,'"',49,')');
   cp=fgetc(in);
   continue;
  }
  /*不能识别的字符*/
  word[i++]=cp;
  cp=fgetc(in);
  word[i]='\0';
  fprintf(out,"%s%c%s%c%s\n","error: ",'"',word,'"',"不是合法的标识符");
 }

 fclose(in);
 fclose(out);
 printf("成功对in.txt文档内代码进行词法分析,分析结果保存在out.txt文档中\n");
 return 0;
}

实验一:词法分析程序 一、实验目的     通过设计编制调试一个具体的词法分析程序,加深对词法分析原理的理解。并掌握在对程序设计语言源程序进行扫描过程中将其分解为各类单词的词法分析方法。 编制一个读单词过程,从输入的源程序中,识别出各个具有独立意义的单词,即基本保留字、标识符、常数、运算符、分隔符五大类。并依次输出各个单词的类型码及单词符号的自身值。(遇到错误时可显示“Error”,然后跳过错误部分继续显示) 二、实验要求 用C或C++写一个简单的词法分析程序,程序可以满足下列要求: 1、能分析如下几种简单的语言词法 (1) 标识符: ID=letter(letter|digit)* (2) 关键字(全部小写) main int float double char if then else switch case break continue while do for (3)整型常量:NUM=digit digit* (4)运算符 = + - * / < <= == != > >= ; ( )? : (5)空格由空白、制表符和换行符组成,用以分隔ID、NUM、运算符等,字符分析时被忽略。 2、单词符号和相应的类别码 假定单词符号和相应的类别码如下: 单词符号 种别码 int 1 = 17 float 2 < 20 if 3 <= 21 switch 4 == 22 while 5 != 23 Do 6 > 24 标识符 10 >= 25 整型常量 11 ; 26 + 13 ( 27 - 14 ) 28 * 15 ? 29 / 16 : 30 3、词法分析程序实现的功能 输入:单词序列(以文件形式提供),输出识别的单词的二元组序列到文件和屏幕 输出:二元组构成: (syn,token或sum) 其中: syn 为单词的种别码 token 为存放的单词自身符号串 sum 为整型常数 例: 源程序: int ab; float ef=20; ab=10+ef; 输出: (保留字--1,int) (标识符--10,ab) (分号--26,;) (保留字--2,float) (标识符--10,ef) (等号--17,=) (整数--11,20) (分号--26,;) (标识符--10,ab) (等号--17,=) (整数--11,10) (加号--13,+) (标识符--10,ef) (分号--26,;) 4、自己准备测试数据存放于TestData.txt文件中,测试数据中应覆盖有以上5种数据,测试结果要求以原数据与结果对照的形式输出并保存在Result.txt中,同时要把结果输出到屏幕。 5、提前准备 ① 实验前,先编制好程序,上机时输入并调试程序。 准备好多组测试数据(存放于文件TestData.txt中)。 6、写出实验报告 报告格式:要求有实验名称、实验目的、实验要求、实验内容、实验小结。 其中实验内容包括算法分析、程序流程图及程序代码。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值