想了解更多内容,移步至编译原理专栏
==========================2021.12.22 更新===================================
整理了一下代码,同步到了gitee
https://gitee.com/godelgnis/lrparserhttps://gitee.com/godelgnis/lrparser
--------------------------------------------------分割线---------------------------------------------------------------------
终于做完了这学期最后的综合实验,不想再动了,参考附录C4.5做的,主要是能够识别和分析多条赋值语句,循环语句,和判断语句。参照书本上的C代码(书上的代码有几个地方是错的,估计是排版的时候没检查出来)改写 成java版本的,主要修改的还是IrParser.java这个类,毕竟改动的是语法规则,词法分析器还是跟之前的差不多,修改一下种别码就可以了。
其实我这个分析器主要的一个思想就是,先分析处理源程序(识别单词,处理换行注释等),把单词先抽取出来放到一个表里面。然后再进行语法和语义的处理,语法分析时,先从main()这个固定的入口进行分析,然后再进入语句块分析,语句块里面再进行语句串的分析,只能识别三种语法规则,即赋值语句,判断语句,循环语句。
判读语句和循环语句需要进行条件判断,一旦进入了条件判断,就意味着之后的程序运行会出现分支,即跳转,所以需要记录好条件为真的入口和条件为假的入口,因为,当分析器分析这个地方时,已经生成了相应的四元式,当分析器继续分析时才可以将出口位置回填到入口位置(我是这么理解的,可能有误。。)在我的程序,我主要是用了statement()方法中用一个tempFalse来记录条件为假的入口的位置,程序的递归返回时,才能将正确的出口位置回填到入口位置,如果不记录的话就会出错。至于条件为真的入口位置可以很快就回填,因为条件判断完之后从condition()中返回就能立刻把出口回填。
可以这样子理解,进入条件判断时,
记录条件为真的入口位置trueConditon=当前四元式下标,
生成条件为真的四元式(goto 关系运算符,参数1,参数2,0)
条件为假的四元式下标falseCondition = 等于当前四元式下标
生成条件为假的四元式(goto , , ,0),四元式中的0就是待回填的,
生成四元式之后,列表下标是自动往后移动的。
然后从方法中返回之后,就可以将当前的四元式下标回填给条件为真的入口,成为它的跳转位置
然后后面基本就是,记录位置——递归——返回位置——回填,基本上就是这样了,知道程序出错或者正常结束运行。
C语言里的递归真的很有毒,不出错的情况下没什么感觉,一旦出错能把人烦死。。。。。。
各位读者如果如果有兴趣的话可以参考一下我的代码,建议在我的基础之上进行修改,完成属于自己的一个小型语言分析器
由于我做的这个分析器知识一个简单的分析器,所以只能识别部分错误,在某些测试代码下会出现死循环,抛出异常等等
所以最后,强烈建议读者能够自行修改并完成自己的分析器。以下是测试说明
测试数据:
main()
{
//A处缺赋值号,D处缺逗号错误
A 2 + 2; B=4; C=10; D=100
A= q +(q*j+ 0)* 2-V*c;
while (a<0){
while(C>0){
a=1;
}
//缺左括号和左花括号
if k=j)
C = p;
}
//缺右括号
while(c<d {
a = 0;
}
}
}
#
程序运行效果如下:
代码清单
public class Word {
private int typenum; //种别码
private String word; //扫描得到的词
public int getTypenum() {
return typenum;
}
public void setTypenum(int typenum) {
this.typenum = typenum;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
}
package codescanner;
public class CodeScanner {
private static String _KEY_WORD_END = "end string of string";
private int charNum = 0;
private Word word;
private char[] input = new char[255];
private char[] token = new char[255];
private int p_input=0;
private int p_token=0;
private char ch;
private String[] rwtab = {"main","int","char","if","else","for","while",_KEY_WORD_END};
public CodeScanner(char[] input) {
this.input = input;
}
/**
* 取下一个字符
* @return
*/
public char m_getch() {
if(p_input < input.length) {
ch = input[p_input];
p_input++;
}
return ch;
}
/**
* 如果是标识符或者空白符就取下一个字符
*/
public void getbc() {
while((ch == ' ' || ch == '\t') && p_input < input.length) {
ch=input[p_input];
p_input++;
}
}
/**
* 把当前字符和原有字符串连接
*/
public void concat() {
token[p_token] = ch;
p_token++;
token[p_token] = '\0';
}
public boolean letter() {
if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')
return true;
else
return false;
}
public boolean digit() {
if(ch>='0'&&ch<='9')
return true;
else
return false;
}
/**
* 回退一个字符
*/
public void retract() {
p_input--;
}
/**
* 将token中的数字串转换成二进制值表示
* @return
*/
public String dtb() {
int num = token[0] - 48;
for(int i = 1; i < p_token; i++) {
num = num * 10 + token[i] - 48;
}
StringBuilder result = new StringBuilder();
while(num>0) {
int r = num % 2;
int s = num / 2;
result.append(r);
num = s;
}
return result.reverse().toString();
}
/**
* 查看token中的字符串是否是关键字,是的话返回关键字种别编码,否则返回10
* @return
*/
public int reserve() {
int i=0;
while(rwtab[i].compareTo(_KEY_WORD_END)!=0) {
if(rwtab[i].compareTo(new String(token).trim()) == 0) {
return i+1;
}
i++;
}
return 10;
}
/**
* 能够识别换行,单行注释和多行注释的
* 换行的种别码设置成30
* 多行注释的种别码设置成31
* @return
*/
public Word scan() {
token = new char[255];
Word myWord = new Word();
myWord.setTypenum(10);
myWord.setWord("");
p_token=0;
m_getch();
getbc();
if(letter()) {
while(letter()||digit()) {
concat();
m_getch();
}
retract();
myWord.setTypenum(reserve());
myWord.setWord(new String(token).trim());
return myWord;
}else if(digit()) {
while(digit()) {
concat();
m_getch();
}
retract();
myWord.setTypenum(20);
myWord.setWord(new String(token).trim()); //输出token中的数字串字符形式
// myWord.setWord(dtb()); //输出token中的数字串10进制值的二进制字符串形式
return myWord;
}
else
switch (ch) {
case '=':
m_getch();
if(ch=='=') {
myWord.setTypenum(39);
myWord.setWord("==");
return myWord;
}
retract();
myWord.setTypenum(21);
myWord.setWord("=");
return myWord;
case '+':
myWord.setTypenum(22);
myWord.setWord("+");
return myWord;
case '-':
myWord.setTypenum(23);
myWord.setWord("-");
return myWord;
case '*':
myWord.setTypenum(24);
myWord.setWord("*");
return myWord;
case '/':
m_getch();
//识别单行注释
if (ch == '/') {
while(m_getch() != '\n');
myWord.setTypenum(41);
myWord.setWord("\\n");
return myWord;
}
//识别多行注释
if(ch=='*') {
String string = "";
while(true) {
if (ch == '*') {
if (m_getch() == '/') {
myWord.setTypenum(42);
myWord.setWord(string);
return myWord;
}
retract();
}
if (m_getch() == '\n') {
string += "\\n";
}
}
}
retract();
myWord.setTypenum(25);
myWord.setWord("/");
return myWord;
case '(':
myWord.setTypenum(26);
myWord.setWord("(");
return myWord;
case ')':
myWord.setTypenum(27);
myWord.setWord(")");
return myWord;
case '[':
myWord.setTypenum(28);
myWord.setWord("[");
return myWord;
case ']':
myWord.setTypenum(29);
myWord.setWord("]");
return myWord;
case '{':
myWord.setTypenum(30);
myWord.setWord("{");
return myWord;
case '}':
myWord.setTypenum(31);
myWord.setWord("}");
return myWord;
case ',':
myWord.setTypenum(32);
myWord.setWord(",");
return myWord;
case ':':
myWord.setTypenum(33);
myWord.setWord(":");
return myWord;
case '<':
m_getch();
if(ch=='=') {
myWord.setTypenum(38);
myWord.setWord("<=");
return myWord;
}
retract();
myWord.setTypenum(36);
myWord.setWord("<");
return myWord;
case '>':
m_getch();
if(ch=='=') {
myWord.setTypenum(37);
myWord.setWord(">=");
return myWord;
}
retract();
myWord.setTypenum(35);
myWord.setWord(">");
return myWord;
case ';':
myWord.setTypenum(34);
myWord.setWord(";");
return myWord;
case '!':
m_getch();
if(ch=='=') {
myWord.setTypenum(40);
myWord.setWord("!=");
return myWord;
}else {
concat();
myWord.setTypenum(-1);
myWord.setWord("ERROR INFO: WORD = \"" + new String(token).trim() + "\"");
return myWord;
}
case '\n':
myWord.setTypenum(41);
myWord.setWord("\\n");
return myWord;
case '#':
myWord.setTypenum(0);
myWord.setWord("#");
return myWord;
default:
concat();
myWord.setTypenum(-1);
myWord.setWord("ERROR INFO: WORD = \"" + new String(token).trim() + "\"");
return myWord;
}
}
}
package codescanner;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Scanner;
public class Analyzer {
private File inputFile;
private File outputFile;
private String fileContent;
private ArrayList<Word> list = new ArrayList<>();
public Analyzer(String input,String output) {
inputFile = new File(input);
outputFile = new File(output);
}
/**
* 从指定的文件中读取源程序文件内容
* @return
*/
public String getContent() {
StringBuilder stringBuilder = new StringBuilder();
try(Scanner reader = new Scanner(inputFile)) {
while (reader.hasNextLine()) {
String line = reader.nextLine();
stringBuilder.append(line + "\n");
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return fileContent = stringBuilder.toString();
}
/**
* 先将源程序中的注释和换行替换成空串
* 然后扫描程序,在程序结束前将扫描到的词添加到list中
* 最后把扫描结果保存到指定的文件中
*/
public void analyze(String fileContent) {
int over = 1;
Word word = new Word();
CodeScanner scanner = new CodeScanner(fileContent.toCharArray());
while (over != 0) {
word = scanner.scan();
list.add(word);
over = word.getTypenum();
}
saveResult();
}
/**
* 将结果写入到到指定文件中
* 如果文件不存在,则创建一个新的文件
* 用一个foreach循环将list中的项变成字符串写入到文件中
*/
public void saveResult() {
if (!outputFile.exists())
try {
outputFile.createNewFile();
} catch (IOException e1) {
e1.printStackTrace();
}
try(Writer writer = new FileWriter(outputFile)){
for (Word word : list) {
writer.write("(" + word.getTypenum() + " ," + word.getWord() + ")\n");
}
} catch (IOException e) {
e.printStackTrace();
}
}
public ArrayList<Word> getList() {
return list;
}
}
package codescanner;
import java.util.ArrayList;
public class IrParser {
private Analyzer analyzer;
private ArrayList<Word> list = new ArrayList<>(); //扫描到的词的列表
private Word word; //当前扫描到的词
private int index = 0; //从列表中获取单词的下标
private int rowNum = 1; //用来记录行数
private int tempVariousNum = 1; //临时变量标号
private ArrayList<ArrayList<String>> quaternaryList = new ArrayList<>(); //四元式列表
private int nextQuaternaryIndex = 1;
private int trueCondition = 1;
private int falseCondition = 1;
public IrParser() {
analyzer = new Analyzer("input.txt", "output.txt");
analyzer.analyze(analyzer.getContent());
list = analyzer.getList();
quaternaryList.add(new ArrayList<String>());
}
public void createAQuaternary(String op, String arg1, String arg2, String result) {
ArrayList<String> quaternary = new ArrayList<>();
quaternary.add(op);
quaternary.add(arg1);
quaternary.add(arg2);
quaternary.add(result);
quaternaryList.add(quaternary);
nextQuaternaryIndex++;
}
/**
* 合并q1和q2,q1,q2是两个四元式在四元式表中的下标
* @param q1
* @param q2
* @return
*/
public int merge(int q1, int q2) {
int result;
if (q2 == 0) {
result = q1;
}else {
result = q2;
backFill(q2, q1);
}
return result;
}
/**
* 将result回填到四元式表中下标为qIndex的四元式中的第四个元素中
* @param qIndex
* @param result
*/
public void backFill(int qIndex, int result) {
if (qIndex > 0) {
ArrayList<String> quaternary = quaternaryList.get(qIndex);
quaternary.remove(3);
quaternary.add("" + result);
}
}
/**
* 分析表达式表达式
*/
public String expression() {
String op, arg1, arg2, result;
arg1 = term();
result = arg1;
while(word.getTypenum() == 22 || word.getTypenum() == 23) {
op = word.getWord();
word = getNext(list);
arg2 = term();
result = newTemp();
createAQuaternary(op, arg1, arg2, result);
arg1 = result;
}
return result;
}
public String term() {
String op, arg1, arg2, result;
result = arg1 = factor();
while(word.getTypenum() == 24 || word.getTypenum() == 25) {
op = word.getWord();
word = getNext(list);
arg2 = factor();
result = newTemp();
createAQuaternary(op, arg1, arg2, result);
arg1 = result;
}
return result;
}
public boolean match(int typeNum) {
if (typeNum == word.getTypenum()) {
word = getNext(list);
return true;
}else {
return false;
}
}
public String factor() {
String result;
if (word.getTypenum() == 10 || word.getTypenum() == 20) {
result = word.getWord();
word = getNext(list);
}else {
if(!match(26)) {
System.out.print("'('错误");
locateError();
word = getNext(list);
}
result = expression();
if (!match(27)) {
System.out.print("')'错误");
locateError();
}
}
return result;
}
public void condition() {
String op, arg1, arg2;
arg1 = expression();
if (word.getTypenum() >= 35 && word.getTypenum() <= 40) {
op = word.getWord();
word = getNext(list);
arg2 = expression();
trueCondition = nextQuaternaryIndex;
falseCondition = nextQuaternaryIndex + 1;
op = "goto " + op;
createAQuaternary(op, arg1, arg2, "0");
createAQuaternary("goto", "", "", "0");
}else {
System.out.print("关系运算符错误");
locateError();
}
}
public int statement(int chain) {
String result, arg1;
int chainTemp = 0;
int tempFalse = 0;
switch (word.getTypenum()) {
case 10:
result = word.getWord();
word = getNext(list);
if(!match(21)) {
System.out.print("赋值号错误");
locateError();
}
arg1 = expression();
if (!match(34)) {
System.out.print("';'错误");
locateError();
}
createAQuaternary("=", arg1, "", result);
chain = 0;
break;
case 4:
match(4);
if(!match(26)) {
System.out.print("'('错误");
locateError();
}
condition();
backFill(trueCondition, nextQuaternaryIndex);
if (!match(27)) {
System.out.print("')'错误");
locateError();
}
tempFalse = falseCondition;
chainTemp = statement_Block(chainTemp);
chain = merge(chainTemp, tempFalse);
break;
case 7:
match(7);
int tempIndex = nextQuaternaryIndex;
if(!match(26)) {
System.out.print("'('错误");
locateError();
}
condition();
backFill(trueCondition, nextQuaternaryIndex);
if (!match(27)) {
System.out.print("')'错误");
locateError();
}
tempFalse = falseCondition;
chainTemp = statement_Block(chainTemp);
backFill(chainTemp, tempIndex);
result = "" + tempIndex;
createAQuaternary("goto", "", "", result);
chain = tempFalse;
break;
default:
System.out.print("语句错误");
locateError();
jumpError();
break;
}
return chain;
}
public int statement_Sequence(int chain) {
chain = statement(chain);
while(word.getTypenum() == 10
||word.getTypenum() == 4
||word.getTypenum() == 7) {
backFill(chain, nextQuaternaryIndex);
chain = statement(chain);
}
backFill(chain, nextQuaternaryIndex);
return chain;
}
public int statement_Block(int chain) {
if(!match(30)) {
System.out.print("'{'错误");
locateError();
}
chain = statement_Sequence(chain);
if(!match(31)) {
System.out.print("'}'错误");
locateError();
}
return chain;
}
/**
* 取下一个词,如果是读取到换行,就增加行数,
* @param list
* @return
*/
public Word getNext(ArrayList<Word> list) {
if (index < list.size()) {
Word currentWord = list.get(index++);
while (currentWord.getTypenum() == 41 || currentWord.getTypenum() == 42) {
//因为一个换行字符被我转换成长度为2字符串了,所以要除以2
rowNum += currentWord.getWord().length()/2;
currentWord = list.get(index++);
}
return currentWord;
} else {
return null;
}
}
public void parse() {
int chain = 0;
word = getNext(list);
match(1);
match(26);
match(27);
statement_Block(chain);
if (word.getTypenum() != 0) {
System.out.println("程序非正常结束!");
}
System.out.println("**************四元式序列*****************");
printQuaternaryList();
System.out.println("****************************************");
System.out.println("*************三元地址代码***************");
printCode();
System.out.println("****************************************");
}
public void jumpError() {
while(!word.getWord().equals(";") && index < list.size()) {
word = getNext(list);
}
if (index < list.size()) {
word = getNext(list);
}
}
public void locateError() {
System.out.println(" 识别符号为 " + word.getWord() + " " + "位置: " + rowNum + "行");
}
public void printQuaternaryList() {
for (int i = 1; i < quaternaryList.size(); i++) {
ArrayList<String > quaternary = quaternaryList.get(i);
System.out.printf("%3d (",i);
for (int j = 0; j < 3; j++) {
System.out.printf("%7s, ", quaternary.get(j));
}
System.out.printf("%7s)\n", quaternary.get(3));
}
}
public void printCode() {
for (int i = 1; i < quaternaryList.size(); i++) {
ArrayList<String > quaternary = quaternaryList.get(i);
System.out.printf("%3d ",i);
if (quaternary.get(0).length() == 1) {
if (quaternary.get(2).trim().length() > 0) {
System.out.println(quaternary.get(3) + " = " + quaternary.get(1) + quaternary.get(0) + quaternary.get(2));
}else {
System.out.println(quaternary.get(3) + " " + quaternary.get(0) + " " + quaternary.get(1) + quaternary.get(2));
}
}else if (quaternary.get(0).length() == 4) {
System.out.println(quaternary.get(0) + " " + quaternary.get(3));
}else{
String[] token = quaternary.get(0).split(" ");
System.out.println("if (" + quaternary.get(1) + " "
+ token[1] + " "
+ quaternary.get(2) + ") "
+ token[0] + " "
+ quaternary.get(3));
}
}
}
public String newTemp() {
return "t"+(tempVariousNum++);
}
public static void main(String[] args) {
IrParser parser = new IrParser();
parser.parse();
}
}