编译原理实验:实现词法分析
除了实现了关键字、标识符、运算符、操作符,整数的识别,还实现了其中存在的双目运算符、小数、多位数、负数等特殊情况的识别
类型处理类:Type.java
/** * 定义各种类别的具体值以及对于输入字符进行判断是何种类型 * */ public class Type { //关键字表 public static final String[] keyword = { "abstract", "boolean", "break", "byte", "case", "catch", "char", "class", "const", "continue", "default", "do", "double", "else", "enum", "extends", "false", "final", "finally", "float", "for", "if", "implements", "import", "int", "interface", "long", "new", "null", "package", "private", "protected", "public", "return", "short", "static", "super", "switch", "this", "throw", "throws", "try", "true", "void", "while" }; //分隔符表 public static final char[] separator = {',', ';', '{', '}'}; //运算符表 public static final String[] operator = { "+", "-", "*", "/", "%", ">", "<", "=", "&", "|", "~", ">=", "<=", "==", "!=", "&&", "||", "++", "--", "+=", "-=", "(", ")", "[", "]", "\"" }; /** * 判断输入字符是不是数字 * * 增加对负数的判断和多个小数点的判断,正则表达式 */ // public static boolean isDigit(String str) // { // Boolean strResult = str.matches("-?[0-9]+.*[0-9]*"); // return strResult; // } public static boolean isDigit(char ch) { return Character.isDigit(ch); } /** * 判断输入字符是不是字母 * @param ch * @return */ public static boolean isLetter(char ch) { return Character.isLetter(ch); } /** * 判断是不是关键字 * @param ch * @return */ public static boolean isKeyword(String ch) { for(String key : keyword) { if(key.equals(ch)) return true; } return false; } /** * 判断是不是运算符 * @param ch * @return */ public static boolean isOperator(String ch) { for(String op : operator) { if(op.equals(ch)) return true; } return false; } /** * 判断是不是分隔符 * @param ch * @return */ public static boolean isSeparator(char ch) { for(Character se : separator) { if(se.equals(ch)) return true; } return false; } }
文件处理类:FileUtil.java
import java.io.*; /** *处理输入输出 * */ public class FileUtil { private BufferedReader reader; private static BufferedWriter writer; private StringBuffer buffer; public FileUtil(String path) { try { reader = new BufferedReader(new FileReader(path)); buffer = new StringBuffer(); File file = new File("output.txt"); if (!file.exists()) file.createNewFile(); writer = new BufferedWriter(new FileWriter(file)); } catch (IOException e) { System.out.println("Can't find the file!"); } } /** * 读取文件内容到缓存 * */ public StringBuffer read() { String temp = ""; try { while ((temp = reader.readLine()) != null) buffer.append(temp); }catch (IOException e) { e.printStackTrace(); } return buffer; } /** * 读出文件分析后的内容 * * @param catelog * @param content */ public void write(String catelog, String content) { try { //trim()方法将字符串起始和结尾的空格都删除 String token = "(" + catelog + ", " + content.trim() + ")\n"; writer.write(token); } catch (IOException e) { System.out.println("Write failed!"); e.printStackTrace(); } } public static void finish() { try { writer.close(); }catch (IOException e) { e.printStackTrace(); } } }
具体词法分析类:Lexer.java
/** * 具体根据DFA进行词法分析 * */ public class Lexer { private FileUtil fileUtil; private StringBuffer buffer; public Lexer(String path) { fileUtil = new FileUtil(path); buffer = fileUtil.read(); } public void analyse() { int i = 0; State state = State.STATE0; String word = ""; while(i <= buffer.length()) { char ch = 0; //charAt() 方法用于返回指定索引处的字符 if(i != buffer.length()) ch = buffer.charAt(i); switch(state) { case STATE0: if(Type.isLetter(ch)) { state = State.STATE1; word += ch; } else if(Type.isDigit(ch)) { state = State.STATE2; word += ch; } else if (Type.isSeparator(ch)) { state = State.STATE4; word += ch; } else if(Type.isOperator((word+=ch)) ) { state = State.STATE3; } else if(ch == ' ') { //防止空白符号的干扰 word = ""; } break; case STATE1: if(Type.isDigit(ch) || Type.isLetter(ch) || ch == '.') { word += ch; } else { state = State.STATE0; if(Type.isKeyword(word.trim())) { fileUtil.write("Keyword",word); } else { fileUtil.write("Identifier",word); } word = ""; i--; } break; case STATE2: if(Type.isDigit(ch) || ch == '.') { word +=ch; } else if(Type.isLetter(ch)) { /** * 数字开头后面不允许出现字母,异常报错 */ System.err.println("Error! Letter cannot appear after digit"); } else { int flag = 0; int len = word.length(); for(int j=0; j<len; j++) { if(word.charAt(j) == '.') { if(flag > 0) { fileUtil.write("Error!",word); word = ""; flag ++; break; } else flag ++; } if (word.charAt(len - 1) == '.' || word.charAt(0) == '.') { fileUtil.write("Error!",word); flag = 2; word=""; break; } } //.. if(flag < 2) { fileUtil.write("Number",word); word = ""; } state = State.STATE0; i--; } break; case STATE3: //考虑类似+=的情况 String temp = word+ch; if(Type.isOperator(temp)) { word = temp; } else if(word.equals("-") && Type.isDigit(ch)) { state = State.STATE2; word += ch; } else { state = State.STATE0; fileUtil.write("Operator",word); word = ""; i--; } break; case STATE4: state = State.STATE0; fileUtil.write("Separator",word); word = ""; i--; break; } i++; } } }
枚举类:State.java
/** * 枚举 */ public enum State { STATE0,STATE1,STATE2,STATE3,STATE4 }
主函数类:Main.java
/** * 主函数 * */ public class Main { public static void main(String[] args) { Lexer lex = new Lexer("test.c"); lex.analyse(); FileUtil.finish(); } }
输入处理文件: test.c
#include "math.h" int main() { float a,b,y1,y2; int c,d,e; a += 3.14.15926; b = -25.1; c = .5; scanf("%d,%d",&c,&d); y1 = cos(a/3); y2 = sqrt(b); e = d+(-1); if(c>=d && c>0) e = c; printf("y1 = %f,y2 = %f,e = %d,c = %x\n",y1,y2,e,c); }
输出结果:output.txt
(Identifier, #include) (Operator, ") (Identifier, math.h) (Operator, ") (Keyword, int) (Identifier, main) (Operator, () (Operator, )) (Separator, {) (Keyword, float) (Identifier, a) (Separator, ,) (Identifier, b) (Separator, ,) (Identifier, y1) (Separator, ,) (Identifier, y2) (Separator, ;) (Keyword, int) (Identifier, c) (Separator, ,) (Identifier, d) (Separator, ,) (Identifier, e) (Separator, ;) (Identifier, a) (Operator, +=) (Error!, 3.14.15926) (Separator, ;) (Identifier, b) (Operator, =) (Number, -25.1) (Separator, ;) (Identifier, c) (Operator, =) (Error!, .5) (Separator, ;) (Identifier, scanf) (Operator, () (Operator, ") (Operator, %) (Identifier, d) (Separator, ,) (Operator, %) (Identifier, d) (Operator, ") (Separator, ,) (Operator, &) (Identifier, c) (Separator, ,) (Operator, &) (Identifier, d) (Operator, )) (Separator, ;) (Identifier, y1) (Operator, =) (Identifier, cos) (Operator, () (Identifier, a) (Operator, /) (Number, 3) (Operator, )) (Separator, ;) (Identifier, y2) (Operator, =) (Identifier, sqrt) (Operator, () (Identifier, b) (Operator, )) (Separator, ;) (Identifier, e) (Operator, =) (Identifier, d) (Operator, +) (Operator, () (Number, -1) (Operator, )) (Separator, ;) (Keyword, if) (Operator, () (Identifier, c) (Operator, >=) (Identifier, d) (Operator, &&) (Identifier, c) (Operator, >) (Number, 0) (Operator, )) (Identifier, e) (Operator, =) (Identifier, c) (Separator, ;) (Identifier, printf) (Operator, () (Operator, ") (Identifier, y1) (Operator, =) (Operator, %) (Identifier, f) (Separator, ,) (Identifier, y2) (Operator, =) (Operator, %) (Identifier, f) (Separator, ,) (Identifier, e) (Operator, =) (Operator, %) (Identifier, d) (Separator, ,) (Identifier, c) (Operator, =) (Operator, %) (Identifier, x) (Identifier, \n) (Operator, ") (Separator, ,) (Identifier, y1) (Separator, ,) (Identifier, y2) (Separator, ,) (Identifier, e) (Separator, ,) (Identifier, c) (Operator, )) (Separator, ;) (Separator, })