语法分析:生成 AST(Abstract Syntax Tree, 抽象语法树)。
可通过递归下降算法生成 AST,然后遍历 AST 进行求值。
上下文无关文法允许递归,而正则不允许,正则是上下文无关文法的一个子集。
simple_calculator.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.base_type import ASTNode
from play_with_compiler.craft.base_type import Token, TokenReader, TokenType, ASTNodeType
from play_with_compiler.craft.simple_lexer import SimpleLexer
'''
一个简单的AST节点的实现。
属性包括:类型、文本值、父节点、子节点。
'''
class SimpleASTNode(ASTNode):
def __init__(self, node_type, text):
self.parent = None
self.children = []
self.node_type = node_type
self.text = text
def addChild(self, child):
self.children.append(child)
child.parent = self
'''
实现一个计算器,但计算的结合性是有问题的。因为它使用了下面的语法规则:
additive -> multiplicative | multiplicative + additive
multiplicative -> primary | primary * multiplicative
递归项在右边,会自然的对应右结合。我们真正需要的是左结合。
'''
class SimpleCalculator(object):
'''
执行脚本,并打印输出AST和求值过程。
'''
def evaluate(self, script):
try:
tree = self.parse(script)
self.dump_ast(tree, "@")
self._evaluate(tree, "|")
except Exception:
print('err')
'''
解析脚本,并返回根节点
'''
def parse(self, code):
lexer = SimpleLexer()
tokens = lexer.tokenize(code)
rootNode = self.prog(tokens)
return rootNode
'''
打印输出AST的树状结构
'''
def dump_ast(self, node, indent):
if node == None:
return
print('{}{} {}'.format(indent, node.node_type, node.text))
for item in node.children:
self.dump_ast(item, indent + "\t")
'''
对某个AST节点求值,并打印求值过程。
@param indent 打印输出时的缩进量
'''
def _evaluate(self, node, indent):
print("{} Calculating: {}".format(indent, node.node_type))
result = 0
children = node.children
value1 = 0
value2 = 0
child1 = None
child2 = None
if (node.node_type == ASTNodeType.Programm):
for child in children:
result = self._evaluate(child, indent + "\t")
elif (node.node_type == ASTNodeType.Additive):
child1 = children[0]
value1 = self._evaluate(child1, indent + "\t")
child2 = children[1]
value2 = self._evaluate(child2, indent + "\t")
if (node.text == "+"):
result = int(value1) + int(value2)
else:
result = int(value1) - int(value2)
elif (node.node_type == ASTNodeType.Multiplicative):
child1 = children[0]
value1 = self._evaluate(child1, indent + "\t")
child2 = children[1]
value2 = self._evaluate(child2, indent + "\t")
if (node.text == "*"):
result = int(value1) * int(value2)
else:
result = int(value1) / int(value2)
elif (node.node_type == ASTNodeType.IntLiteral):
result = node.text
print("{} Result: {}".format(indent, result))
return result
'''
语法解析:根节点
'''
def prog(self, tokens):
node = SimpleASTNode(ASTNodeType.Programm, "Calculator")
child = self.additive(tokens)
if (child != None):
node.addChild(child)
return node
'''
整型变量声明语句,如:
int a;
int b = 2*3;
'''
def intDeclare(self, tokens):
node = None
token = tokens.peek() # 预读
if (token != None and token.token_type == TokenType.Int): # 匹配Int
token = tokens.read() # 消耗掉int
if (tokens.peek().token_type == TokenType.Identifier): # 匹配标识符
token = tokens.read() # 消耗掉标识符
# 创建当前节点,并把变量名记到AST节点的文本值中,这里新建一个变量子节点也是可以的
node = SimpleASTNode(ASTNodeType.IntDeclaration, token.token_text)
token = tokens.peek() # 预读
if (token != None and token.token_type == TokenType.Assignment):
tokens.read() # 消耗掉等号
child = self.additive(tokens) # 匹配一个表达式
if (child == None):
raise "invalide variable initialization, expecting an expression"
else:
node.addChild(child)
else:
raise "variable name expected"
if (node != None):
token = tokens.peek()
if (token != None and token.token_type == TokenType.SemiColon):
tokens.read()
else:
raise "invalid statement, expecting semicolon"
return node
'''
语法解析:加法表达式
'''
def additive(self, tokens):
child1 = self.multiplicative(tokens)
node = child1
token = tokens.peek()
if (child1 != None and token != None):
if (token.token_type == TokenType.Plus or token.token_type == TokenType.Minus):
token = tokens.read()
child2 = self.additive(tokens)
if (child2 != None):
node = SimpleASTNode(ASTNodeType.Additive, token.token_text)
node.addChild(child1)
node.addChild(child2)
else:
raise "invalid additive expression, expecting the right part."
return node
'''
语法解析:乘法表达式
'''
def multiplicative(self, tokens):
child1 = self.primary(tokens)
node = child1
token = tokens.peek()
if (child1 != None and token != None):
if (token.token_type == TokenType.Star or token.token_type == TokenType.Slash):
token = tokens.read()
child2 = self.multiplicative(tokens)
if (child2 != None):
node = SimpleASTNode(ASTNodeType.Multiplicative, token.token_text)
node.addChild(child1)
node.addChild(child2)
else:
raise "invalid additive expression, expecting the right part."
return node
'''
语法解析:基础表达式
'''
def primary(self, tokens):
node = None
token = tokens.peek()
if (token != None):
if (token.token_type == TokenType.IntLiteral):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.IntLiteral, token.token_text)
elif (token.token_type == TokenType.Identifier):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.Identifier, token.token_text)
elif (token.token_type == TokenType.LeftParen):
tokens.read()
node = self.additive(tokens)
if (node != None):
token = tokens.peek()
if (token != None and token.token_type == TokenType.RightParen):
tokens.read()
else:
raise "expecting right parenthesis"
else:
raise "expecting an additive expression inside parenthesis"
return node # 这个方法也做了AST的简化,就是不用构造一个primary节点,直接返回子节点。因为它只有一个子节点
test.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.simple_lexer import SimpleLexer
from play_with_compiler.craft.simple_calculator import SimpleCalculator
def test_simple_calculator():
calculator = SimpleCalculator()
# 测试变量声明语句的解析
script = "int a = b+3;"
print("解析变量声明语句: {}".format(script))
lexer = SimpleLexer()
tokens = lexer.tokenize(script)
try:
node = calculator.intDeclare(tokens)
calculator.dump_ast(node, "")
except Exception:
print('err')
# 测试表达式
script = "2+3*5"
print("\n计算: {},看上去一切正常。".format(script))
calculator.evaluate(script)
# 测试语法错误
script = "2+"
print("\n{} ,应该有语法错误。".format(script))
calculator.evaluate(script)
script = "2+3+4"
print("\n计算: {},结合性出现错误。".format(script))
calculator.evaluate(script)
if __name__ == '__main__':
test_simple_calculator()
结果:
[root@VM_30_144_centos craft]# python3 test.py
解析变量声明语句: int a = b+3;
ASTNodeType.IntDeclaration a
ASTNodeType.Additive +
ASTNodeType.Identifier b
ASTNodeType.IntLiteral 3
计算: 2+3*5,看上去一切正常。
@ASTNodeType.Programm Calculator
@ ASTNodeType.Additive +
@ ASTNodeType.IntLiteral 2
@ ASTNodeType.Multiplicative *
@ ASTNodeType.IntLiteral 3
@ ASTNodeType.IntLiteral 5
| Calculating: ASTNodeType.Programm
| Calculating: ASTNodeType.Additive
| Calculating: ASTNodeType.IntLiteral
| Result: 2
| Calculating: ASTNodeType.Multiplicative
| Calculating: ASTNodeType.IntLiteral
| Result: 3
| Calculating: ASTNodeType.IntLiteral
| Result: 5
| Result: 15
| Result: 17
| Result: 17
2+ ,应该有语法错误。
err
计算: 2+3+4,结合性出现错误。
@ASTNodeType.Programm Calculator
@ ASTNodeType.Additive +
@ ASTNodeType.IntLiteral 2
@ ASTNodeType.Additive +
@ ASTNodeType.IntLiteral 3
@ ASTNodeType.IntLiteral 4
| Calculating: ASTNodeType.Programm
| Calculating: ASTNodeType.Additive
| Calculating: ASTNodeType.IntLiteral
| Result: 2
| Calculating: ASTNodeType.Additive
| Calculating: ASTNodeType.IntLiteral
| Result: 3
| Calculating: ASTNodeType.IntLiteral
| Result: 4
| Result: 7
| Result: 9
| Result: 9
[root@VM_30_144_centos craft]#
课程:https://time.geekbang.org/column/article/119891
代码:https://github.com/buyouran1/PlayWithCompiler