当一个分析器A调用另一个分析器B时,如果称A处于B的“外层”,那么除开测试文件中产生的那个FakeDefaultAnalyser之外,LR分析器是整个语法分析结构中最外层的语法分析器。因此当其它分析器有无法处理的错误时,这些烂摊子都得它来收拾。比如变量分析中可能有冗余的反方括号,表达式分析中可能有冗余的反圆括号,等等,甚至包括其它的LR分析器也有可能把错误扔给更外层的LR分析器。这样一来,LR分析器中至少得对付这样一些错误
static ErrMsg excessiveRParent = "Excessive close parenthese.", excessiveRBracket = "Excessive close bracket.", missingRBrace = "Missing close brace.";
此外,应该还要考虑一些简单的内部错误,比如掉分号,if语句或while语句条件掉了括号之类的static ErrMsg unknownSyntaxError = "Unknown error.",
static ErrMsg missingLParent = "Missing opening parenthese.", missingRParent = "Missing close parenthese.", missingRBrace = "Missing close brace.";
如果还有其它类型的错误,又不能抛给外层分析器,这些错误就统统归类为
static ErrMsg unknownSyntaxError = "Unknown error.";
好了,下面是错误处理的正文。
一个错误处理方案跟OperationAnalyser很类似,那就是增加一些fakeXXX,比如fakeShift,fakeReduce之类的。不过坦白说,LR分析器状态分析表太大,一个个地加太麻烦,如果也弄个如同fakeShiftFirstSentence之类的,那么就会一不小心跟正常的那些移进规约什么的名字冲突,所以这并不是一个好办法。
于是,我们得转头寻求一个较为简单,但是很可能比较混乱的方法,专门弄一个函数来处理错误。这里引入这样几个函数
static void wrapname(errorHandler)(struct LRAnalyser*, struct Token*); static void wrapname(fatalError)(struct LRAnalyser*, struct Token*);
前面一个就是错误处理器,后面一个比较纠结,那就是当发生了错误处理器难以理解的unknownSyntaxError时,就认为发生了fatalError,然后就把处理权转移给这个函数。现在先来看看wrapname(errorHandler)的一个实现方式。
static void wrapname(errorHandler)(struct LRAnalyser* self, struct Token* token) { struct LRState* currentState = (struct LRState*) (self->stateStack->peek(self->stateStack)); LRStateFamily stateNo = currentState - jerryLRStates; if(IfElseBranch_1 == stateNo || WhileLoop_1 == stateNo) { syntaxError(missingLParent, token); struct Token fakeLParent = {token->line, LPARENT, NULL, "("}; self->consumeToken(self, &fakeLParent); self->consumeToken(self, token); } else if(IfElseBranch_3 == stateNo || WhileLoop_3 == stateNo) { syntaxError(missingRParent, token); struct Token fakeRParent = {token->line, RPARENT, NULL, ")"}; self->consumeToken(self, &fakeRParent); self->consumeToken(self, token); } else if(Sentence_LBraceBasicBlockRBrace_2 == stateNo && END == token->type) { syntaxError(missingRBrace, token); struct Token fakeRBrace = {token->line, RBRACE, NULL, "}"}; self->consumeToken(self, &fakeRBrace); self->consumeToken(self, token); } else { if(VariableRegister_VariableInitialization_1 == stateNo || DeclarationVariableRegister == stateNo || Sentence_IOAssignmentEoS_2 == stateNo || Sentence_AssignmentEoS_1 == stateNo || IfElseBranch_4 == stateNo || WhileLoop_4 == stateNo) { if(RPARENT == token->type) { syntaxError(excessiveRParent, token); return; } else if(RBRACKET == token->type) { syntaxError(excessiveRBracket, token); return; } } if(DeclarationVariableRegister == stateNo || Sentence_IOAssignmentEoS_2 == stateNo || Sentence_AssignmentEoS_1 == stateNo || Sentence_BreakEoS_1 == stateNo) { syntaxError(missingSemicolon, token); struct Token fakeEoS = {token->line, EOS, NULL, ";"}; self->consumeToken(self, &fakeEoS); self->consumeToken(self, token); return; } wrapname(fatalError)(self, token); // 致命错误 } }
进入函数后,首先用两句话获取当前状态栈栈顶的状态编号,这个编号在后面某些分支中可能会用到。接下来就是一堆很恶心的分支。第一个分支的条件成立的话,那么发生错误很可能是因为漏掉了条件外面的左括号,于是在分支中首先报错,接着故伎重演构造一个假的左括号哄一哄LR分析器,然后再继续分析。第二个分支与第一个类似。第三个分支与前两个不太一样,它验证当一个基本块返回后,紧接下来的不是一个反话括号,而是文件结束符号,所以这时就判定缺少一个反花括号。现在终于来到了最后一个,看起来肮脏腐臭的块(如果你有更好的实现方式,请不吝赐教~)。
本来这个块也得分开写的,然而这两种情况中有些状态是相同的,分开写就会很麻烦,所以最后我把它们整成这个样子了。第一分支语句条件中的那些状态都是表达式返回以后Goto的状态,因此都有可能有冗余反括号,在这个子块里面就检查当前这个符号是不是反括号,然后作出对应的错误处理,并返回;但是这些检查如果失败,那么检查会进入下一分支。而第二分支语句条件中的那些状态都是应该遇到<EOS>的状态,然而这里出错就说明它们没有遇到,所以塞一个假的分号去并返回。如果这些拦截都失效了,那么就只好认为这是一个致命错误了。
产生了一个致命错误,往往是因为程序员做了一些奇妙的事情,比如删除了多行注释的开头但是忘记删除其它部分。于是我们得采取一些极端的手段,不如干脆把接下来的所有终结符跳过——直到某个特殊的符号,比如句子结尾的分号,或者反花括号。这样就需要一个过滤器,当发生致命错误时,就替换掉LR分析器的consumeToken成员函数为这个东西
static void wrapname(skipToken)(void* self, struct Token* token) { if(EOS == token->type) { ((struct LRAnalyser*)self)->consumeToken = wrapname(consumeToken); // 重置 } else if(RBRACE == token->type || END == token->type) { wrapname(consumeToken)(self, token); // 当前基本块结束,这样调用之后会让当前分析器弹出栈 } }
那么,wrapname(fatalError)就可以这样来写
static void wrapname(fatalError)(struct LRAnalyser* self, struct Token* t) { syntaxError(unknownSyntaxError, t); self->stateStack->pops(self->stateStack, self->stateStack->height(self->stateStack) - 1, NULL); struct AbstractSyntaxNode* node; while(0 < self->symbolStack->height(self->symbolStack)) { node = (struct AbstractSyntaxNode*) (self->symbolStack->pop(self->symbolStack)); node->delNode(node); } self->consumeToken = wrapname(skipToken); self->consumeToken(self, t); }
有一点重要的是,进入这个函数之后首先会重置LR分析器的状态栈和符号栈,这样避免以后遇到句子结束符之后重新开始分析时,使得当前栈里的东西影响到后续分析——反正现在已经有错误出现了,所以栈里面的东西都算是废品了。
最后还有一点需要修改的,那就是wrapname(consumeToken)
static void wrapname(consumeToken)(void* self, struct Token* token) { struct LRAnalyser* lra = (struct LRAnalyser*)self; struct LRState* lrState = (struct LRState*) (lra->stateStack->peek(lra->stateStack)); if(NULL != lrState->actions[token->type]) { lrState->actions[token->type](lra, token); } else { struct AbstractSyntaxNode* ret; struct LRState* topState = (struct LRState*) (lra->stateStack->peek(lra->stateStack)); // pop -> peek if((jerryLRStates + LRState0 == topState && RBRACE == token->type) || jerryLRStates + LRState1 == topState) { ret = lra->symbolStack->pop(lra->symbolStack); wrapname(cleanup)(lra); struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeNonTerminal(analyser, ret); analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeToken(analyser, token); } else { wrapname(errorHandler)(lra, token); // here goes to the error handler } } }
有一个小地方,就是上面的注释有 pop -> peek 的那一行,以前是弹栈而现在只是看一看,这跟变量分析器那个改动很类似,因为现在揉进了容错处理,错误处理会尽可能地纠正它,所以如果这时弹栈是不明智的。
附 lr-analyser.c 修改后的代码,只包含分析器部分,分析预测表没有改变
#include<stdlib.h> #include"datastruct.h" #include"syntax-analyser.h" #include"syntax-node.h" #include"COOL/MemoryAllocationDebugger.h" #include"COOL/Stack/Stack.h" extern struct Stack* analyserStack; extern struct SyntaxAnalyser* newVariableAnalyser(void); extern struct SyntaxAnalyser* newOperationAnalyser(void); extern void syntaxError(ErrMsg, struct Token*); static ErrMsg unknownSyntaxError = "Unknown error.", missingLParent = "Missing opening parenthese.", missingRParent = "Missing close parenthese.", excessiveRParent = "Excessive close parenthese.", excessiveRBracket = "Excessive close bracket.", missingSemicolon = "Missing Semicolon at the end of sentence.", missingRBrace = "Missing close brace."; #define wrapname(name) name ## _LRAnalyser static void wrapname(consumeNonTerminal)(void*, struct AbstractSyntaxNode*); static void wrapname(consumeToken)(void*, struct Token*); static void wrapname(cleanup)(struct LRAnalyser*); static void wrapname(errorHandler)(struct LRAnalyser*, struct Token*); static void wrapname(fatalError)(struct LRAnalyser*, struct Token*); static void wrapname(skipToken)(void*, struct Token*); static void wrapname(cleanup)(struct LRAnalyser* self) { struct AbstractSyntaxNode* node; self->symbolStack->finalize(self->symbolStack); self->stateStack->finalize(self->stateStack); analyserStack->pop(analyserStack); revert(self); } static void wrapname(consumeToken)(void* self, struct Token* token) { struct LRAnalyser* lra = (struct LRAnalyser*)self; struct LRState* lrState = (struct LRState*) (lra->stateStack->peek(lra->stateStack)); if(NULL != lrState->actions[token->type]) { lrState->actions[token->type](lra, token); } else { // printf("Interrupt LR\n"); struct AbstractSyntaxNode* ret; struct LRState* topState = (struct LRState*) (lra->stateStack->peek(lra->stateStack)); // pop to peek if((jerryLRStates + LRState0 == topState && RBRACE == token->type) || jerryLRStates + LRState1 == topState) { ret = lra->symbolStack->pop(lra->symbolStack); wrapname(cleanup)(lra); // printf("Cleaned...\n"); struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeNonTerminal(analyser, ret); analyser = (struct SyntaxAnalyser*) (analyserStack->peek(analyserStack)); analyser->consumeToken(analyser, token); } else { wrapname(errorHandler)(lra, token); } } } static void wrapname(consumeNonTerminal)(void* self, struct AbstractSyntaxNode* nonTerminal) { struct LRAnalyser* lra = (struct LRAnalyser*)self; struct LRState* currentState = (struct LRState*) (lra->stateStack->peek(lra->stateStack)); lra->symbolStack->push(lra->symbolStack, nonTerminal); lra->stateStack->push(lra->stateStack, currentState->gotoes[NR_GOTO_TABLE_COLUMN]); // printf(" Delegate returns. Goto %d\n", currentState->gotoes[NR_GOTO_TABLE_COLUMN] - jerryLRStates); } struct SyntaxAnalyser* newLRAnalyser(void) { struct LRAnalyser* analyser = (struct LRAnalyser*) allocate(sizeof(struct LRAnalyser)); analyser->symbolStack = newStack(); analyser->stateStack = newStack(); analyser->stateStack->push(analyser->stateStack, jerryLRStates); analyser->consumeToken = wrapname(consumeToken); analyser->consumeNonTerminal = wrapname(consumeNonTerminal); return (struct SyntaxAnalyser*)analyser; } static void wrapname(errorHandler)(struct LRAnalyser* self, struct Token* token) { struct LRState* currentState = (struct LRState*) (self->stateStack->peek(self->stateStack)); LRStateFamily stateNo = currentState - jerryLRStates; if(IfElseBranch_1 == stateNo || WhileLoop_1 == stateNo) { syntaxError(missingLParent, token); struct Token fakeLParent = {token->line, LPARENT, NULL, "("}; self->consumeToken(self, &fakeLParent); self->consumeToken(self, token); } else if(IfElseBranch_3 == stateNo || WhileLoop_3 == stateNo) { syntaxError(missingRParent, token); struct Token fakeRParent = {token->line, RPARENT, NULL, ")"}; self->consumeToken(self, &fakeRParent); self->consumeToken(self, token); } else if(Sentence_LBraceBasicBlockRBrace_2 == stateNo && END == token->type) { syntaxError(missingRBrace, token); struct Token fakeRBrace = {token->line, RBRACE, NULL, "}"}; self->consumeToken(self, &fakeRBrace); self->consumeToken(self, token); } else { if(VariableRegister_VariableInitialization_1 == stateNo || DeclarationVariableRegister == stateNo || Sentence_IOAssignmentEoS_2 == stateNo || Sentence_AssignmentEoS_1 == stateNo || IfElseBranch_4 == stateNo || WhileLoop_4 == stateNo) { if(RPARENT == token->type) { syntaxError(excessiveRParent, token); return; } else if(RBRACKET == token->type) { syntaxError(excessiveRBracket, token); return; } } if(DeclarationVariableRegister == stateNo || Sentence_IOAssignmentEoS_2 == stateNo || Sentence_AssignmentEoS_1 == stateNo || Sentence_BreakEoS_1 == stateNo) { syntaxError(missingSemicolon, token); struct Token fakeEoS = {token->line, EOS, NULL, ";"}; self->consumeToken(self, &fakeEoS); self->consumeToken(self, token); return; } wrapname(fatalError)(self, token); } } static void wrapname(fatalError)(struct LRAnalyser* self, struct Token* t) { syntaxError(unknownSyntaxError, t); self->stateStack->pops(self->stateStack, self->stateStack->height(self->stateStack) - 1, NULL); struct AbstractSyntaxNode* node; while(0 < self->symbolStack->height(self->symbolStack)) { node = (struct AbstractSyntaxNode*) (self->symbolStack->pop(self->symbolStack)); node->delNode(node); } self->consumeToken = wrapname(skipToken); self->consumeToken(self, t); } static void wrapname(skipToken)(void* self, struct Token* token) { if(EOS == token->type) { ((struct LRAnalyser*)self)->consumeToken = wrapname(consumeToken); } else if(RBRACE == token->type || END == token->type) { wrapname(consumeToken)(self, token); } } #undef wrapname