编译原理之美 --- 03 | 语法分析（一）：纯手工打造公式计算器

最新推荐文章于 2021-06-29 21:02:21 发布

谛听-

最新推荐文章于 2021-06-29 21:02:21 发布

阅读量1.2k

点赞数

分类专栏：编译原理之美

本文链接：https://blog.youkuaiyun.com/u012319493/article/details/103450085

版权

编译原理之美专栏收录该内容

4 篇文章

订阅专栏

语法分析：生成 AST(Abstract Syntax Tree, 抽象语法树)。

可通过递归下降算法生成 AST，然后遍历 AST 进行求值。

上下文无关文法允许递归，而正则不允许，正则是上下文无关文法的一个子集。

simple_calculator.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.base_type import ASTNode
from play_with_compiler.craft.base_type import Token, TokenReader, TokenType, ASTNodeType
from play_with_compiler.craft.simple_lexer import SimpleLexer

'''
一个简单的AST节点的实现。
属性包括：类型、文本值、父节点、子节点。
'''
class SimpleASTNode(ASTNode):
    def __init__(self, node_type, text):
        self.parent = None
        self.children = []
        self.node_type = node_type
        self.text = text

    def addChild(self, child):
        self.children.append(child)
        child.parent = self

'''
实现一个计算器，但计算的结合性是有问题的。因为它使用了下面的语法规则：
additive -> multiplicative | multiplicative + additive
multiplicative -> primary | primary * multiplicative    
递归项在右边，会自然的对应右结合。我们真正需要的是左结合。
'''
class SimpleCalculator(object):
    '''
    执行脚本，并打印输出AST和求值过程。
    '''
    def evaluate(self, script):
        try:
            tree = self.parse(script)
            self.dump_ast(tree, "@")
            self._evaluate(tree, "|")
        except Exception:
            print('err')

    '''
    解析脚本，并返回根节点
    '''
    def parse(self, code):
        lexer = SimpleLexer()
        tokens = lexer.tokenize(code)
        rootNode = self.prog(tokens)
        return rootNode

    '''
    打印输出AST的树状结构
    '''
    def dump_ast(self, node, indent):
        if node == None:
            return
        print('{}{} {}'.format(indent, node.node_type, node.text))
        for item in node.children:
            self.dump_ast(item, indent + "\t")

    '''
    对某个AST节点求值，并打印求值过程。
    @param indent  打印输出时的缩进量
    '''
    def _evaluate(self, node, indent):
        print("{} Calculating: {}".format(indent, node.node_type))
        result = 0
        children = node.children
        value1 = 0
        value2 = 0
        child1 = None
        child2 = None

        if (node.node_type == ASTNodeType.Programm):
            for child in children:
                result = self._evaluate(child, indent + "\t")
        elif (node.node_type == ASTNodeType.Additive):
            child1 = children[0]
            value1 = self._evaluate(child1, indent + "\t")
            child2 = children[1]
            value2 = self._evaluate(child2, indent + "\t")
            if (node.text == "+"):
                result = int(value1) + int(value2)
            else:
                result = int(value1) - int(value2)
        elif (node.node_type == ASTNodeType.Multiplicative):
            child1 = children[0]
            value1 = self._evaluate(child1, indent + "\t")
            child2 = children[1]
            value2 = self._evaluate(child2, indent + "\t")
            if (node.text == "*"):
                result = int(value1) * int(value2)
            else:
                result = int(value1) / int(value2)
        elif (node.node_type == ASTNodeType.IntLiteral):
            result = node.text
        print("{} Result: {}".format(indent, result))
        return result

    '''
    语法解析：根节点
    '''
    def prog(self, tokens):
        node = SimpleASTNode(ASTNodeType.Programm, "Calculator")

        child = self.additive(tokens)

        if (child != None):
            node.addChild(child)
        return node

    '''
    整型变量声明语句，如：
    int a;
    int b = 2*3;
    '''
    def intDeclare(self, tokens):
        node = None
        token = tokens.peek()    # 预读
        if (token != None and token.token_type == TokenType.Int): # 匹配Int
            token = tokens.read()      # 消耗掉int
            if (tokens.peek().token_type == TokenType.Identifier): # 匹配标识符
                token = tokens.read()  # 消耗掉标识符
                # 创建当前节点，并把变量名记到AST节点的文本值中，这里新建一个变量子节点也是可以的
                node = SimpleASTNode(ASTNodeType.IntDeclaration, token.token_text)
                token = tokens.peek()  # 预读
                if (token != None and token.token_type == TokenType.Assignment):
                    tokens.read()      # 消耗掉等号
                    child = self.additive(tokens)  # 匹配一个表达式
                    if (child == None):
                        raise "invalide variable initialization, expecting an expression"
                    else:
                        node.addChild(child)
            else:
                raise "variable name expected"

            if (node != None):
                token = tokens.peek()
                if (token != None and token.token_type == TokenType.SemiColon):
                    tokens.read()
                else:
                    raise "invalid statement, expecting semicolon"
        return node

    '''
    语法解析：加法表达式
    '''
    def additive(self, tokens):
        child1 = self.multiplicative(tokens)
        node = child1

        token = tokens.peek()
        if (child1 != None and token != None):
            if (token.token_type == TokenType.Plus or token.token_type == TokenType.Minus):
                token = tokens.read()
                child2 = self.additive(tokens)
                if (child2 != None):
                    node = SimpleASTNode(ASTNodeType.Additive, token.token_text)
                    node.addChild(child1)
                    node.addChild(child2)
                else:
                    raise "invalid additive expression, expecting the right part."
        return node

    '''
    语法解析：乘法表达式
    '''
    def multiplicative(self, tokens):
        child1 = self.primary(tokens)
        node = child1
        token = tokens.peek()
        if (child1 != None and token != None):
            if (token.token_type == TokenType.Star or token.token_type == TokenType.Slash):
                token = tokens.read()
                child2 = self.multiplicative(tokens)
                if (child2 != None):
                    node = SimpleASTNode(ASTNodeType.Multiplicative, token.token_text)
                    node.addChild(child1)
                    node.addChild(child2)
                else:
                    raise "invalid additive expression, expecting the right part."
        return node
    '''
    
    语法解析：基础表达式
    '''
    def primary(self, tokens):
        node = None
        token = tokens.peek()
        if (token != None):
            if (token.token_type == TokenType.IntLiteral):
                token = tokens.read()
                node = SimpleASTNode(ASTNodeType.IntLiteral, token.token_text)
            elif (token.token_type == TokenType.Identifier): 
                token = tokens.read()
                node = SimpleASTNode(ASTNodeType.Identifier, token.token_text)
            elif (token.token_type == TokenType.LeftParen):
                tokens.read()
                node = self.additive(tokens)
                if (node != None):
                    token = tokens.peek()
                    if (token != None and token.token_type == TokenType.RightParen):
                        tokens.read()
                    else:
                        raise "expecting right parenthesis"
                else:
                    raise "expecting an additive expression inside parenthesis"
        return node  # 这个方法也做了AST的简化，就是不用构造一个primary节点，直接返回子节点。因为它只有一个子节点

test.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.simple_lexer import SimpleLexer
from play_with_compiler.craft.simple_calculator import SimpleCalculator

def test_simple_calculator():
    calculator = SimpleCalculator()

    # 测试变量声明语句的解析
    script = "int a = b+3;"
    print("解析变量声明语句: {}".format(script))
    lexer = SimpleLexer()
    tokens = lexer.tokenize(script)
    try:
        node = calculator.intDeclare(tokens)
        calculator.dump_ast(node, "")
    except Exception:
        print('err')

    # 测试表达式
    script = "2+3*5"
    print("\n计算: {}，看上去一切正常。".format(script))
    calculator.evaluate(script)

    # 测试语法错误
    script = "2+"
    print("\n{} ，应该有语法错误。".format(script))
    calculator.evaluate(script)

    script = "2+3+4"
    print("\n计算: {}，结合性出现错误。".format(script))
    calculator.evaluate(script)

if __name__ == '__main__':
    test_simple_calculator()

结果：

[root@VM_30_144_centos craft]# python3 test.py
解析变量声明语句: int a = b+3;
ASTNodeType.IntDeclaration a
	ASTNodeType.Additive +
		ASTNodeType.Identifier b
		ASTNodeType.IntLiteral 3

计算: 2+3*5，看上去一切正常。
@ASTNodeType.Programm Calculator
@	ASTNodeType.Additive +
@		ASTNodeType.IntLiteral 2
@		ASTNodeType.Multiplicative *
@			ASTNodeType.IntLiteral 3
@			ASTNodeType.IntLiteral 5
| Calculating: ASTNodeType.Programm
|	 Calculating: ASTNodeType.Additive
|		 Calculating: ASTNodeType.IntLiteral
|		 Result: 2
|		 Calculating: ASTNodeType.Multiplicative
|			 Calculating: ASTNodeType.IntLiteral
|			 Result: 3
|			 Calculating: ASTNodeType.IntLiteral
|			 Result: 5
|		 Result: 15
|	 Result: 17
| Result: 17

2+ ，应该有语法错误。
err

计算: 2+3+4，结合性出现错误。
@ASTNodeType.Programm Calculator
@	ASTNodeType.Additive +
@		ASTNodeType.IntLiteral 2
@		ASTNodeType.Additive +
@			ASTNodeType.IntLiteral 3
@			ASTNodeType.IntLiteral 4
| Calculating: ASTNodeType.Programm
|	 Calculating: ASTNodeType.Additive
|		 Calculating: ASTNodeType.IntLiteral
|		 Result: 2
|		 Calculating: ASTNodeType.Additive
|			 Calculating: ASTNodeType.IntLiteral
|			 Result: 3
|			 Calculating: ASTNodeType.IntLiteral
|			 Result: 4
|		 Result: 7
|	 Result: 9
| Result: 9
[root@VM_30_144_centos craft]#