由于语法分析模块由多个不同的分析器协同完成,当一个分析器中发现语法错误,或者仅仅是该识别的非终结符识别完成,即将返回时,该分析器会将从这里开始的终结符都扔给委托它的分析器去处理。这会导致一个问题,那就是错误会从当前分析器扩散到调用栈中的其它分析器。比如输入:
if ( x == 1 2 ) { // other codes ...
在没有任何保护措施的情况下,表达式分析器遇到“2”时,会认为表达式识别结束,然后返回一个表达式节点,表征
x == 1
然后将“2”传入LR分析器。接着LR分析器一看也傻眼了,怎么来了个数字,于是也开始报错。而这一切的根源也许仅仅是因为多键入了一个空格。
为了尽可能地防止错误扩散,需要改造分析器,使它们能够容忍错误。就表达式分析而言,如果发现有连续两个因子出现,或者连续两个不应连续出现的运算符(可以连续出现的情况如 1 * -1),那么仍然继续表达式的识别。或者说,需要因子时遇到了运算符,或者当需要运算符时遇到了因子,当这两种非致命伤出现时,要用一种手法迷惑分析器,让它认为还可以继续分析下去。在这里给出一种很简单的方法,就是在终结符流中插入一个伪造的终结符
/* 需要运算符时遇到了因子 struct Token* token */
struct Token fakeOp = {token->line, PLUS, NULL, "+"};
// 报错
self->consumeToken(self, &fakeOp);
self->consumeToken(self, token);
/* 需要因子时遇到了运算符 struct Token* token */
struct Token fakeNum = {token->line, INTEGER, NULL, "0"};
// 报错
self->consumeToken(self, &fakeNum);
self->consumeToken(self, token);
首先伪造一个终结符,传入分析器,这时分析器就认了这家伙,然后再将原来由词法分析模块传入真的终结符重新来一次,这样就达到了目的。
此外,表达式分析还有一个大敌,就是左括号太多。这一点其实也不需要什么办法去容错,只用报个错,然后把滞留在符号栈里的那些左括号忽略掉就行了。
最后是OperationAnalyser加入容错机制后需要修改后的wrapname(consumeFactor)和wrapname(consumeOperator)两个函数。它们会用到宏包括
#define isFirstFactor(x) ( ( IDENT == (x) ) || \
(( INTEGER <= (x) ) && ( REAL >= (x) )) )
#define isFirstOperator(x) (( PLUS <= (x) ) && ( OR >= (x) ))
最好将它们放到AcceptType枚举的附近,因为它们跟AcceptType枚举中某些常量的顺序关系非常密切,如果AcceptType有修改的需要,那么就得对应地修改这两个宏。
这里是修改后的函数
static ErrMsg wantFactor = "Incorrect expression: should have been a factor",
wantOperator = "Incorrect expression: should have been a operator",
excessLParent = "Excessive opening parenthese.";
static void wrapname(consumeFactor)(struct OperationAnalyser* self,
struct Token* token)
{
if(NOT == token->type) {
self->opStack->push(self->opStack,
newOperator(token->type,
PRIORITY[token->type],
unaryOperate));
self->needFactor = 1;
} else if(MINUS == token->type || PLUS == token->type) {
self->opStack->push(self->opStack,
newOperator(token->type, 0, unaryOperate));
self->needFactor = 1;
} else if(IDENT == token->type) {
struct SyntaxAnalyser* analyser = newVariableAnalyser();
analyserStack->push(analyserStack, analyser);
analyser->consumeToken(analyser, token);
} else if(INTEGER == token->type) {
self->numStack->push(self->numStack,
newIntegerNode(atoi(token->image)));
self->needFactor = 0;
} else if(REAL == token->type) {
self->numStack->push(self->numStack, newRealNode(atof(token->image)));
self->needFactor = 0;
} else if(LPARENT == token->type) {
self->opStack->push(self->opStack,
newOperator(token->type, 0x7fffffff, nullOperate));
self->needFactor = 1;
} else {
struct AbstractSyntaxNode* ret = (struct AbstractSyntaxNode*)
(self->numStack->peek(self->numStack));
if(NULL == ret && 1 == self->opStack->height(self->opStack)) {
self->numStack->pop(self->numStack); // 弹出 ret
wrapname(cleanup)(self);
struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*)
(analyserStack->peek(analyserStack));
analyser->consumeNonTerminal(analyser, ret);
analyser = (struct SyntaxAnalyser*)
(analyserStack->peek(analyserStack));
analyser->consumeToken(analyser, token);
} else {
/* 容错处理 */
struct Token fakeNum = {token->line, INTEGER, NULL, "0"};
fprintf(stderr, "Before `%s' ",
NULL == token->image ? "End of the file." : token->image);
fprintf(stderr, "Error @ line %d\n"
" %s\n",
token->line, wantFactor);
self->consumeToken(self, &fakeNum);
self->consumeToken(self, token);
}
}
}
static void wrapname(consumeOperator)(struct OperationAnalyser* self,
struct Token* token)
{
int priority = PRIORITY[token->type];
if(0 < priority && priority < PRIORITY[LPARENT]) {
/* token 是运算符 */
int push = 0;
struct Operator* topOp = (struct Operator*)
(self->opStack->peek(self->opStack));
push |= (priority < topOp->priority);
push |= (priority == topOp->priority && topOp->rightCombination);
while(!push) {
topOp = (struct Operator*)(self->opStack->pop(self->opStack));
topOp->operate(topOp, self->numStack);
topOp = (struct Operator*)(self->opStack->peek(self->opStack));
push |= (priority < topOp->priority);
push |= (priority == topOp->priority && topOp->rightCombination);
}
self->opStack->push(self->opStack, newOperator(token->type,
priority,
OPER_FUNCS[token->type]));
self->needFactor = 1;
} else if(RPARENT == token->type) {
struct Operator* topOp = (struct Operator*)
(self->opStack->pop(self->opStack));
while(nullOperate != topOp->operate) {
topOp->operate(topOp, self->numStack);
topOp = (struct Operator*)(self->opStack->pop(self->opStack));
}
topOp->operate(topOp, self->numStack);
self->needFactor = 0;
if(0 == self->opStack->height(self->opStack)) {
struct AbstractSyntaxNode* ret = (struct AbstractSyntaxNode*)
(self->numStack->pop(self->numStack));
wrapname(cleanup)(self);
struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*)
(analyserStack->peek(analyserStack));
analyser->consumeNonTerminal(analyser, ret);
analyser = (struct SyntaxAnalyser*)
(analyserStack->peek(analyserStack));
analyser->consumeToken(analyser, token);
return;
}
} else if (isFirstFactor(token->type)) {
/* 容错处理 */
struct Token fakeOp = {token->line, PLUS, NULL, "+"};
fprintf(stderr, "Before `%s' ",
NULL == token->image ? "End of the file." : token->image);
fprintf(stderr, "Error @ line %d\n"
" %s\n",
token->line, e);
self->consumeToken(self, &fakeOp);
self->consumeToken(self, token);
} else {
struct AbstractSyntaxNode* ret;
struct Operator* topOp = (struct Operator*)
(self->opStack->pop(self->opStack));
while(LPARENT != topOp->op) {
topOp->operate(topOp, self->numStack);
topOp = (struct Operator*)(self->opStack->pop(self->opStack));
}
topOp->operate(topOp, NULL); // 左括号
ret = (struct AbstractSyntaxNode*)(self->numStack->pop(self->numStack));
if(0 != self->opStack->height(self->opStack)) {
fprintf(stderr, "Error @ line %d\n"
" %s\n",
token->line, excessLParent);
}
wrapname(cleanup)(self);
struct SyntaxAnalyser* analyser = (struct SyntaxAnalyser*)
(analyserStack->peek(analyserStack));
analyser->consumeNonTerminal(analyser, ret);
analyser = (struct SyntaxAnalyser*)(analyserStack->peek(analyserStack));
analyser->consumeToken(analyser, token);
}
}
这里的错误报告部分写得有点不太好,都是硬邦邦的fprintf,以后需要将它们抽取出来专门处理。
语法分析器容错机制
本文探讨了语法分析器在面对错误输入时如何通过插入伪造的终结符来避免错误扩散,确保解析过程的稳定性。文章详细介绍了如何针对表达式分析器进行容错改造,并提供了具体的代码实现。
346

被折叠的 条评论
为什么被折叠?



