词法分析

本文档详细介绍了编译原理实验中词法分析器的实现过程,覆盖了关键字、标识符、运算符、整数,包括双目运算符、小数、多位数及负数的特殊识别。主要涉及的类有Type.java、FileUtil.java、Lexer.java、State.java以及主程序Main.java,实验输入为test.c文件,输出结果保存为output.txt。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

编译原理实验:实现词法分析

除了实现了关键字、标识符、运算符、操作符,整数的识别,还实现了其中存在的双目运算符、小数、多位数、负数等特殊情况的识别

类型处理类:Type.java

/**
* 定义各种类别的具体值以及对于输入字符进行判断是何种类型
*
*/
public class Type
{
   //关键字表
   public static final String[] keyword =
   {
       "abstract", "boolean", "break", "byte", "case", "catch", "char", "class",
       "const", "continue", "default", "do", "double", "else", "enum", "extends",
       "false", "final", "finally", "float", "for", "if", "implements", "import",
       "int", "interface", "long", "new", "null", "package", "private", "protected",
       "public", "return", "short", "static", "super", "switch", "this", "throw",
       "throws", "try", "true", "void", "while"
   };

   //分隔符表
   public static final char[] separator = {',', ';', '{', '}'};

   //运算符表
   public static final String[] operator =
   {
       "+", "-", "*", "/", "%", ">", "<", "=", "&", "|", "~", ">=", "<=", "==", "!=",
       "&&", "||", "++", "--", "+=", "-=", "(", ")", "[", "]", "\""
   };

   /**
    * 判断输入字符是不是数字
    *
    * 增加对负数的判断和多个小数点的判断,正则表达式
    */
//    public static boolean isDigit(String str)
//    {
//        Boolean strResult = str.matches("-?[0-9]+.*[0-9]*");
//        return strResult;
//    }

   public static boolean isDigit(char ch)
   {
       return Character.isDigit(ch);
   }

   /**
    * 判断输入字符是不是字母
    * @param ch
    * @return
    */
   public static boolean isLetter(char ch)
   {
       return Character.isLetter(ch);
   }

   /**
    * 判断是不是关键字
    * @param ch
    * @return
    */
   public static boolean isKeyword(String ch)
   {
       for(String key : keyword)
       {
           if(key.equals(ch))
               return true;
       }
       return false;
   }

   /**
    * 判断是不是运算符
    * @param ch
    * @return
    */
   public static boolean isOperator(String ch)
   {
       for(String op : operator)
       {
           if(op.equals(ch))
               return true;
       }
       return false;
   }

   /**
    * 判断是不是分隔符
    * @param ch
    * @return
    */
   public static boolean isSeparator(char ch)
   {
       for(Character se : separator)
       {
           if(se.equals(ch))
               return true;
       }
       return false;
   }
}

文件处理类:FileUtil.java

import java.io.*;

/**
*处理输入输出
*
*/
public class FileUtil
{
   private BufferedReader reader;
   private static BufferedWriter writer;
   private StringBuffer buffer;

   public FileUtil(String path)
   {
       try
       {
           reader = new BufferedReader(new FileReader(path));
           buffer = new StringBuffer();

           File file = new File("output.txt");
           if (!file.exists())
               file.createNewFile();

           writer = new BufferedWriter(new FileWriter(file));

       }
       catch (IOException e)
       {
           System.out.println("Can't find the file!");
       }
   }

   /**
    * 读取文件内容到缓存
    *
    */
   public StringBuffer read()
   {
       String temp = "";
       try
       {
           while ((temp = reader.readLine()) != null)
               buffer.append(temp);
       }catch (IOException e)
       {
           e.printStackTrace();
       }

       return buffer;
   }

   /**
    * 读出文件分析后的内容
    *
    * @param catelog
    * @param content
    */
   public void write(String catelog, String content)
   {
       try
       {
           //trim()方法将字符串起始和结尾的空格都删除
           String token = "(" + catelog + ", " + content.trim() + ")\n";
           writer.write(token);
       } catch (IOException e)
       {
           System.out.println("Write failed!");
           e.printStackTrace();
       }

   }

   public static void finish()
   {
       try
       {
           writer.close();
       }catch (IOException e)
       {
           e.printStackTrace();
       }
   }
}

具体词法分析类:Lexer.java

/**
* 具体根据DFA进行词法分析
*
*/
public class Lexer
{
   private FileUtil fileUtil;
   private StringBuffer buffer;

   public Lexer(String path)
   {
       fileUtil = new FileUtil(path);
       buffer = fileUtil.read();
   }

   public void analyse()
   {
       int i = 0;
       State state = State.STATE0;
       String word = "";

       while(i <= buffer.length())
       {
           char ch = 0;

           //charAt() 方法用于返回指定索引处的字符
           if(i != buffer.length())
               ch = buffer.charAt(i);

           switch(state)
           {
               case STATE0:
                   if(Type.isLetter(ch))
                   {
                       state = State.STATE1;
                       word += ch;
                   }
                   else if(Type.isDigit(ch))
                   {
                       state = State.STATE2;
                       word += ch;
                   }
                   else if (Type.isSeparator(ch))
                   {
                       state = State.STATE4;
                       word += ch;
                   }
                   else if(Type.isOperator((word+=ch)) )
                   {
                       state = State.STATE3;
                   }
                   else if(ch == ' ')
                   {
                       //防止空白符号的干扰
                       word = "";
                   }
                   break;
               case STATE1:
                   if(Type.isDigit(ch) || Type.isLetter(ch) || ch == '.')
                   {
                       word += ch;
                   }
                   else
                   {
                       state = State.STATE0;
                       if(Type.isKeyword(word.trim()))
                       {
                           fileUtil.write("Keyword",word);
                       }
                       else
                       {
                           fileUtil.write("Identifier",word);
                       }
                       word = "";
                       i--;
                   }
                   break;
               case STATE2:
                   if(Type.isDigit(ch) || ch == '.')
                   {
                       word +=ch;
                   }
                   else if(Type.isLetter(ch))
                   {
                       /**
                        * 数字开头后面不允许出现字母,异常报错
                        */
                       System.err.println("Error! Letter cannot appear after digit");
                   }
                   else
                   {
                       int flag = 0;
                       int len = word.length();
                       for(int j=0; j<len; j++)
                       {
                           if(word.charAt(j) == '.')
                           {
                               if(flag > 0)
                               {
                                   fileUtil.write("Error!",word);
                                   word = "";
                                   flag ++;
                                   break;
                               }

                               else
                                   flag ++;
                           }
                           if (word.charAt(len - 1) == '.' || word.charAt(0) == '.')
                           {
                               fileUtil.write("Error!",word);
                               flag = 2;
                               word="";
                               break;
                           }
                       }
                       //..

                       if(flag < 2)
                       {
                           fileUtil.write("Number",word);
                           word = "";
                       }
                       state = State.STATE0;
                       i--;
                   }
                   break;
               case STATE3:
                   //考虑类似+=的情况
                   String temp = word+ch;
                   if(Type.isOperator(temp))
                   {
                       word = temp;
                   }
                   else if(word.equals("-") && Type.isDigit(ch))
                   {
                       state = State.STATE2;
                       word += ch;
                   }
                   else
                   {
                       state = State.STATE0;
                       fileUtil.write("Operator",word);
                       word = "";
                       i--;
                   }
                   break;
               case STATE4:
                   state = State.STATE0;
                   fileUtil.write("Separator",word);
                   word = "";
                   i--;
                   break;
           }
           i++;

       }
   }

}

枚举类:State.java

/**
* 枚举
*/
public enum State
{
   STATE0,STATE1,STATE2,STATE3,STATE4
}

主函数类:Main.java

/**
* 主函数
*
*/
public class Main
{
   public static void main(String[] args)
   {
       Lexer lex = new Lexer("test.c");
       lex.analyse();
       FileUtil.finish();
   }
}

输入处理文件: test.c

#include "math.h"
int main()
{
   float a,b,y1,y2;
   int c,d,e;
   a += 3.14.15926;
   b = -25.1;
   c = .5;
   scanf("%d,%d",&c,&d);
   y1 = cos(a/3);
   y2 = sqrt(b);
   e = d+(-1);
   if(c>=d && c>0)
       e = c;
   printf("y1 = %f,y2 = %f,e = %d,c = %x\n",y1,y2,e,c);
}

输出结果:output.txt

(Identifier, #include)
(Operator, ")
(Identifier, math.h)
(Operator, ")
(Keyword, int)
(Identifier, main)
(Operator, ()
(Operator, ))
(Separator, {)
(Keyword, float)
(Identifier, a)
(Separator, ,)
(Identifier, b)
(Separator, ,)
(Identifier, y1)
(Separator, ,)
(Identifier, y2)
(Separator, ;)
(Keyword, int)
(Identifier, c)
(Separator, ,)
(Identifier, d)
(Separator, ,)
(Identifier, e)
(Separator, ;)
(Identifier, a)
(Operator, +=)
(Error!, 3.14.15926)
(Separator, ;)
(Identifier, b)
(Operator, =)
(Number, -25.1)
(Separator, ;)
(Identifier, c)
(Operator, =)
(Error!, .5)
(Separator, ;)
(Identifier, scanf)
(Operator, ()
(Operator, ")
(Operator, %)
(Identifier, d)
(Separator, ,)
(Operator, %)
(Identifier, d)
(Operator, ")
(Separator, ,)
(Operator, &)
(Identifier, c)
(Separator, ,)
(Operator, &)
(Identifier, d)
(Operator, ))
(Separator, ;)
(Identifier, y1)
(Operator, =)
(Identifier, cos)
(Operator, ()
(Identifier, a)
(Operator, /)
(Number, 3)
(Operator, ))
(Separator, ;)
(Identifier, y2)
(Operator, =)
(Identifier, sqrt)
(Operator, ()
(Identifier, b)
(Operator, ))
(Separator, ;)
(Identifier, e)
(Operator, =)
(Identifier, d)
(Operator, +)
(Operator, ()
(Number, -1)
(Operator, ))
(Separator, ;)
(Keyword, if)
(Operator, ()
(Identifier, c)
(Operator, >=)
(Identifier, d)
(Operator, &&)
(Identifier, c)
(Operator, >)
(Number, 0)
(Operator, ))
(Identifier, e)
(Operator, =)
(Identifier, c)
(Separator, ;)
(Identifier, printf)
(Operator, ()
(Operator, ")
(Identifier, y1)
(Operator, =)
(Operator, %)
(Identifier, f)
(Separator, ,)
(Identifier, y2)
(Operator, =)
(Operator, %)
(Identifier, f)
(Separator, ,)
(Identifier, e)
(Operator, =)
(Operator, %)
(Identifier, d)
(Separator, ,)
(Identifier, c)
(Operator, =)
(Operator, %)
(Identifier, x)
(Identifier, \n)
(Operator, ")
(Separator, ,)
(Identifier, y1)
(Separator, ,)
(Identifier, y2)
(Separator, ,)
(Identifier, e)
(Separator, ,)
(Identifier, c)
(Operator, ))
(Separator, ;)
(Separator, })
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值