脚本语言支持变量,通过使用字典作为变量存储区实现。
赋值语句中的等号后面可匹配表达式。
尝试一个规则不成功之后,恢复到原样,再去尝试另外的规则,这个现象就叫做“回溯”。
simple_script.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.simple_parser import SimpleParser
from play_with_compiler.craft.base_type import ASTNodeType
import sys
'''
* 一个简单的脚本解释器。
* 所支持的语法,请参见simple_parser.py
*
* 运行脚本:
* 在命令行下,键入:python simple_script.py
* 则进入一个REPL界面。你可以依次敲入命令。比如:
* > 2+3;
* > int age = 10;
* > int b;
* > b = 10*2;
* > age = age + b;
* > exit(); //退出REPL界面。
*
* 你还可以使用一个参数 -v,让每次执行脚本的时候,都输出AST和整个计算过程。
'''
class SimpleScript(object):
def __init__(self, verbose):
self._variables = {}
self._verbose = verbose
'''
遍历AST,计算值
'''
def evaluate(self, node, indent):
result = None
if self._verbose:
print('%s Calcalationg: %s:' %(indent, node.get_type()))
if node.get_type() == ASTNodeType.Programm:
for child in node.get_children():
result = self.evaluate(child, indent)
elif node.get_type() == ASTNodeType.Additive:
child1 = node.get_children()[0]
value1 = self.evaluate(child1, indent + "\t")
child2 = node.get_children()[1]
value2 = self.evaluate(child2, indent + "\t")
if node.get_text() == '+':
result = int(value1) + int(value2)
else:
result = int(value1) - int(value2)
elif node.get_type() == ASTNodeType.Multiplicative:
child1 = node.get_children()[0]
value1 = self.evaluate(child1, indent + "\t")
child2 = node.get_children()[1]
value2 = self.evaluate(child2, indent + "\t")
if node.get_text() == '*':
result = int(value1) * int(value2)
else:
result = int(value1) / int(value2)
elif node.get_type() == ASTNodeType.IntLiteral:
result = int(node.get_text())
elif node.get_type() == ASTNodeType.Identifier:
var_name = node.get_text()
value = self._variables.get(var_name)
if value != None:
result = int(value)
else:
raise Exception('variavle ' + var_name + ' has not been set any value')
elif node.get_type() == ASTNodeType.AssignmentStmt:
var_name = node.get_text()
if var_name not in self._variables.keys():
raise Exception('unknown variable: ' + var_name)
# 接着执行下面的代码
elif node.get_type() == ASTNodeType.IntDeclaration:
var_name = node.get_text()
var_value = None
if len(node.get_children()) > 0:
child = node.get_children()[0]
result = self.evaluate(child, indent + '\t')
var_value = int(result)
self._variables[var_name] = var_value
if self._verbose:
print('%sResult: %s' %(indent, result))
elif indent == '':
if node.get_type() == ASTNodeType.IntDeclaration or node.get_type() == ASTNodeType.AssignmentStmt:
print('%s: %s' %(node.get_text(), result))
elif node.get_type() != ASTNodeType.Programm:
print(result)
return result
'''
实现一个简单的 REPL
'''
def play(args):
verbose = False
if (len(args) > 0 and args[0] == '-v'):
verbose = True
print('verbose mode')
print('Simple script language!')
parser = SimpleParser()
script = SimpleScript(verbose)
script_text = ""
while True:
try:
line = raw_input(">")
if line == 'exit();':
print("good bye!")
break
script_text += line + "\n"
if line.endswith(";"):
tree = parser.parse(script_text)
if verbose:
parser.dump_AST(tree, "")
script.evaluate(tree, "")
script_text = ""
except Exception as e:
print('119: %s' %e)
script_text = ''
play(sys.argv[1:])
simple_parser.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from play_with_compiler.craft.simple_lexer import SimpleLexer
from play_with_compiler.craft.base_type import Token, TokenReader, ASTNodeType, TokenType
from play_with_compiler.craft.simple_calculator import SimpleASTNode
'''
* 一个简单的语法解析器。
* 能够解析简单的表达式、变量声明和初始化语句、赋值语句。
* 它支持的语法规则为:
*
* programm -> int_declare | expressionStatement | assignmentStatement
* int_declare -> 'int' Id ( = additive) ';'
* expressionStatement -> addtive ';'
* addtive -> multiplicative ( (+ | -) multiplicative)*
* multiplicative -> primary ( (* | /) primary)*
* primary -> IntLiteral | Id | (additive)
'''
class SimpleParser(object):
'''
解析脚本
'''
def parse(self, script):
lexer = SimpleLexer()
tokens = lexer.tokenize(script)
root_node = self.prog(tokens)
return root_node
'''
AST的根节点,解析的入口
'''
def prog(self, tokens):
node = SimpleASTNode(ASTNodeType.Programm, 'pwc')
while tokens.peek():
child = self.int_declare(tokens)
if not child:
child = self.expression_statement(tokens)
if not child:
child = self.assignment_statement(tokens)
if not child:
node.add_child(child)
if not child:
raise Exception('unknown statement')
node.add_child(child)
return node
'''
表达式语句,即表达式后面跟个分号
'''
def expression_statement(self, tokens):
pos = tokens.get_position()
node = self.additive(tokens)
if node:
token = tokens.peek()
if (token and token.get_type() == TokenType.SemiColon):
tokens.read()
else:
node = None
tokens.set_position(pos) # 回溯
return node
'''
赋值语句,如age = 10*2;
'''
def assignment_statement(self, tokens):
node = None
token = tokens.peek() # 预读,看看下面是不是标识符
if (token != None and token.get_type() == TokenType.Identifier):
token = tokens.read() # 读入标识符
node = SimpleASTNode(ASTNodeType.AssignmentStmt, token.get_text())
token = tokens.peek() # 预读,看下面是不是等号
if (token != None and token.get_type() == TokenType.Assignment):
tokens.read() # 取出等号
child = self.additive(tokens)
if (child == None): # 出错,等号右边不是一个合法的表达式
raise Exception('invalide assignment statement, expecting an expression')
else:
node.add_child(child) # 添加子节点
token = tokens.peek() # 预读,看后面是不是分号
if (token != None and token.get_type() == TokenType.SemiColon):
tokens.read() # 消耗掉该分号
else:
raise Exception('invalid statement, expecting semicolon')
else: # 回溯,吐出之前消化掉的标识符
tokens.unread()
node = None
return node
'''
* 整型变量声明,如:
* int a;
* int b = 2*3;
'''
def int_declare(self, tokens):
node = None
token = tokens.peek()
if (token and token.get_type() == TokenType.Int):
token = tokens.read()
if (tokens.peek().get_type() == TokenType.Identifier):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.IntDeclaration, token.get_text())
token = tokens.peek()
if (token and token.get_type() == TokenType.Assignment):
tokens.read() # 取出等号
child = self.additive(tokens)
if (not child):
raise Exception('invlide variable initialization, expecting an expression')
else:
node.add_child(child)
else:
raise Exception('variable name expected')
if node:
token = tokens.peek()
if (token and token.get_type() == TokenType.SemiColon):
tokens.read()
else:
raise Exception('invalid statemennt, expecting semicolon')
return node
'''
加法表达式
'''
def additive(self, tokens):
child1 = self.multiplicative(tokens) # 应用 add 规则
node = child1
if child1:
while True: # 循环应用 add' 规则
token = tokens.peek()
if (token and (token.get_type() == TokenType.Plus or token.get_type() == TokenType.Minus)):
token = tokens.read() # 读出加号
child2 = self.multiplicative(tokens) # 计算下级节点
if child2:
node = SimpleASTNode(ASTNodeType.Additive, token.get_text())
node.add_child(child1)
node.add_child(child2)
child1 = node
else:
raise Exception('invlide additive expression, expecting the right part.')
else:
break
return node
'''
乘法表达式
'''
def multiplicative(self, tokens):
child1 = self.primary(tokens)
node = child1
while True:
token = tokens.peek()
if (token != None and (token.get_type() == TokenType.Star or token.get_type() == TokenType.Slash)):
token = tokens.read()
child2 = self.primary(tokens)
if (child2 != None):
node = SimpleASTNode(ASTNodeType.Multiplicative, token.get_text())
node.add_child(child1)
node.add_child(child2)
child1 = node
else:
raise Exception('invalid multiplicative expression, expecting the right part.')
else:
break
return node
'''
基础表达式
'''
def primary(self, tokens):
node = None
token = tokens.peek()
if token:
if (token.get_type() == TokenType.IntLiteral):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.IntLiteral, token.get_text())
elif (token.get_type() == TokenType.Identifier):
token = tokens.read()
node = SimpleASTNode(ASTNodeType.Identifier, token.get_text())
elif (token.get_type() == TokenType.LeftParen):
tokens.read()
node = self.additive(tokens)
if node:
token = tokens.peek()
if (token and token.get_type() == TokenType.RightParen):
tokens.read()
else:
raise Exception('expecting right parenthesis')
else:
raise Exception('expecting an additive expression inside parenthesis')
return node # 这个方法也做了AST的简化,就是不用构造一个primary节点,直接返回子节点。因为它只有一个子节点
'''
* 打印输出AST的树状结构
* @param node
* @param indent 缩进字符,由tab组成,每一级多一个tab
'''
def dump_AST(self, node, indent):
if not node:
return
print("%s%s %s" %(indent, node.node_type, node.text))
for child in node.get_children():
self.dump_AST(child, indent + "\t")
结果:
# python simple_script.py
Simple script language!
>2;
2
>2+3*5;
17
>age;
119: variavle age has not been set any value
>int a = 5;
a: 5
>int b = a + 2;
b: 7
>b;
7
>exit();
good bye!
课程:https://time.geekbang.org/column/article/125926
代码:https://github.com/buyouran1/PlayWithCompiler