成功解决了字节码指令生成的问题,掌握一个原则:
1、数值常量expr100一律是MovImm #imm, R0,但把要不要执行Push R0推迟到expr20-60里做判断;
2、假设任何表达式的指令生成结果都对应于其值在R0里,要不要Push 当且仅当:
此子表达式出现在二元运算的左侧,且右侧需要递归。(注意,右侧如果只是一个数值常量的话,倒是可以直接MovImm #right_imm, R1,不需要对左侧的结果进行Push R0)
function AdvancedCalculator(){
//语法分析的原始输入流:
this.tokens = [];//中缀带括号的, 3种语法分析输入单位:类型为String的(和)、类型为Number的value、类型为Object/String的运算符
this.tokens_scan_index = 0;
this.saved_tokens_scan_index_stack = [];
//用于栈式自动机直接求值的转换后的流:
//this.tokens2_values = [];//后缀value栈
//this.tokens2_op = [];//运算符栈,由于去除了括号,所以只需要区分运算符是一元还是二元的
this.value_buffer = [];
this.assember = new Assembler();
}
AdvancedCalculator.prototype = {
//复杂的运算符定义为单独的Object:
SQRT: "Sqrt",
SIN: "Sin",
COS: "Cos",
TAN: "Tan",
COT: "Cot",
LOG: "Log", //以10为底
LN: "Ln", //以e为底
POW: "Pow",//x^y
PI: Math.PI, //这是数值常量,不是运算符,不过也可以映射为0个输入的函数??
mapUnaryOperator2UnaryFunction: function(opToken){
if(opToken==this.SQRT)
return Math.sqrt;
else if(opToken==this.SIN)
return Math.sin;
else if(opToken==this.COS)
return Math.cos;
else if(opToken==this.TAN)
return Math.tan;
else if(opToken==this.COT)
return function(a){return 1/Math.tan(a);};
else if(opToken==this.LOG)
return Math.log10;
else if(opToken==this.LN)
return Math.log;
else
throw "未识别的一元运算符: "+opToken;
},
mapBinaryOperator2BinaryFunction: function(opToken){
if(opToken==this.POW)
return Math.pow;
else if(opToken=="+")
return function(a,b){return a+b;};
else if(opToken=="-")
return function(a,b){return a-b;};
else if(opToken=="*")
return function(a,b){return a*b;};
else if(opToken=="/")
return function(a,b){return a/b;};
else
throw "未识别的二元运算符: "+opToken;
},
nextToken: function(){
if (this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length){
var token_next = this.tokens[this.tokens_scan_index++];
return token_next;
}
return null;//throw "错误的调用:token流已经结束";
},
hasMoreTokens: function(){
return this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length;
},
pushTokenScanIndex: function(){
assert( this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length);
this.saved_tokens_scan_index_stack.push(this.tokens_scan_index);
return this.saved_tokens_scan_index_stack.length-1;
},
popTokenScanIndexAt: function(stack_index){
assert( stack_index>=0 && stack_index<this.saved_tokens_scan_index_stack.length);
while(this.saved_tokens_scan_index_stack.length>stack_index)
this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop();
},
popTokenScanIndex: function(){
this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop();
},
discardLastTokenScanIndex: function(){
this.saved_tokens_scan_index_stack.pop();
},
isUnaryOperator: function(token){
return token==this.SIN || token== this.COS || token==this.TAN || token==this.COT || token==this.LOG || token==this.LN;
},
isBinaryFunctionToken: function(token){//特殊的二元函数
return token==this.POW;
},
isBinaryOperator: function(token){//所有的二元中缀操作符(包括二元函数)
return token=="+" || token=="-" || token=="*" || token=="/" || this.isBinaryFunctionToken(token);
},
isOperator: function(token){//返回:0/1单元运算符包括函数/2元运算符
if (this.isUnaryOperator(token))
return 1;
if (this.isBinaryOperator(token))
return 2;
return 0;
},
emitToken: function(token){
this.tokens.push(token);
},
emitValueTokenIfAny: function(){
//检查之前缓存的value_buffer
if (this.value_buffer.length>0) {
var value_str = this.value_buffer.join('');
var value = Number(value_str); //a Number
this.emitToken(value);
this.value_buffer = []; //reset;
}
},
emitButton: function(btn){
if (btn=="(" || btn==")"){//括号是一种特殊的优先级运算符
this.emitValueTokenIfAny();
this.emitToken(btn);
}
else if (this.isOperator(btn)){
this.emitValueTokenIfAny();
this.emitToken(btn);
}else{//0,1,2,3,4,5,6,7,8,9,.
this.value_buffer.push(btn);
}
},
emitButtons: function(btns){
for(var i=0; i<btns.length; ++i){
var btn = btns[i];
this.emitButton(btn);
}
},
//核心算法:如何把一个中缀的混合value和operator的流转换为分离的value和operator的求值栈?
concat: function(target, source){
while(source.length>0){
var item = source.shift();
target.push(item);
}
},
evalExpr: function(){
return this.evalExpr20();//利用短路特性,前一个得到true的话后续子表达式不会执行
},
evalExpr100: function(){
if( !this.hasMoreTokens() )
return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
this.pushTokenScanIndex();
var next_token = this.nextToken();
if(next_token==null)
throw "流非正常结束,此处应有一数值value!";
assert(typeof next_token=="number");
if(typeof next_token=="number"){
this.assember.emitInstruction({type: "MovImm", arg: next_token, arg1: "R0"});
//this.assember.emitInstruction({type: "Push", arg: "R0"});
//正常情况下不需要push,只有发现此常量参与了一个二元原语函数的运算左端,而右端是一个需要递归的子表达式的时候
return [true,next_token];
}
this.popTokenScanIndex();
return [false,];
},
evalExpr80: function(){//括号表达式: 似乎不需要特殊处理?因为它只是改变了子表达式的优先级而已
if( !this.hasMoreTokens() )
return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
this.pushTokenScanIndex();
var next_token = this.nextToken();
if (next_token==null){//流已经结束
return false;
}
if (next_token=="("){
var result = this.evalExpr(); //if has ES6 destructing, can write as var [success, value] = ...
if(!result[0])
throw "TODO: fixme";//此时saved_tokens_scan_index需要维护成一个栈了
var next_next_token = this.nextToken();
if(next_next_token==null)
throw "流异常结束:expect a )";
assert( next_next_token==")" );
{
//this.assember.emitInstruction({type: "Pop", arg: "R0"});
//this.assember.emitInstruction({type: "Push", arg: "R0"});
}
return result;
}
//else:
this.popTokenScanIndex();
var result = this.evalExpr100();
//this.assember.emitInstruction({type: "Pop", arg: "R0"});
return result;
},
evalExpr60: function(){//一元函数
if( !this.hasMoreTokens() )
return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
this.pushTokenScanIndex();
var next_token = this.nextToken();
if (next_token==null) {
throw "流异常结束:期望一个Expr60";
}
if(this.isUnaryOperator(next_token)){
var unaryOp = next_token;
var result = this.evalExpr80();
if (!result[0])
throw "非法表达式!";//此时saved_tokens_scan_index需要维护成一个栈了, TODO: 支持 sin sin 1的语法?
var unaryFunc = this.mapUnaryOperator2UnaryFunction(unaryOp);
{
//this.assember.emitInstruction({type: "Pop", arg: "R0"});
this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});;
//this.assember.emitInstruction({type: "Push", arg: "R0"});
}
return [true, unaryFunc(result[1])];
}
this.popTokenScanIndex();
var result = this.evalExpr80();
//this.assember.emitInstruction({type: "Push", arg: "R0"});
return result;
},
evalExpr50: function(){//二元函数,如x^y(Pow求幂)
if( !this.hasMoreTokens() )
return [false,];
//expr50 := expr60 x^y expr50 | expr60
var result = this.evalExpr60();
if (result[0]) {
var tmp_value = result[1];
if( !this.hasMoreTokens() ) {
//this.assember.emitInstruction({type: "Push", arg: "R0"})
return [true, tmp_value];
}
this.pushTokenScanIndex();
var next_token = this.nextToken();//should use let;
while(this.isBinaryFunctionToken(next_token)){
//右递归之前,需要将当前的R0压栈:
this.assember.emitInstruction({type: "Push", arg: "R0"});
var result2 = this.evalExpr50();
if (!result2[0]) {
//Here: 二元函数运算符(如x^y)已经匹配,但右边的子表达式不匹配,则输入无效
throw "Input Invalid";
}
//这里的递归已经处理了结合性的问题
var binFunc = this.mapBinaryOperator2BinaryFunction(next_token);
tmp_value = binFunc(tmp_value, result2[1]);
{
//将当前右递归的运算结果(R0)移动到R1:
this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
//将之前压栈的左侧值出栈:
this.assember.emitInstruction({type: "Pop", arg: "R0"});
this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});
}
//
this.discardLastTokenScanIndex();
if( !this.hasMoreTokens() ){
//this.assember.emitInstruction({type: "Push", arg: "R0"})
return [true, tmp_value];
}
}
this.popTokenScanIndex();
//this.assember.emitInstruction({type: "Push", arg: "R0"})
return [true,tmp_value];
}
return [false,];
},
evalExpr40: function(){//二元乘除, 乘除运算都认为是左结合的
//expr40 := expr50 | expr50 ( '*' expr50 )* | expr50 ( '/' expr50)*
//如果解析失败,恢复输入token流的扫描初始位置
if( !this.hasMoreTokens() )
return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误
this.pushTokenScanIndex();
var result = this.evalExpr50();
if (result[0]) {
var tmp_value = result[1];
if( !this.hasMoreTokens() )
return [true, tmp_value];
this.pushTokenScanIndex();
var next_token = this.nextToken();//should use let;
while(next_token=="*" || next_token=="/"){
//右递归之前,需要将当前的R0压栈:
this.assember.emitInstruction({type: "Push", arg: "R0"});
//
var result2 = this.evalExpr50();
if (!result2[0]) {
//Here: *或/运算符已经匹配,后右边的子表达式不匹配,则输入无效
throw "Input Invalid";
}
//成功:
//var binFunc = this.mapBinaryOperator2BinaryFunction(next_token);
//tmp_value = binFunc(tmp_value, result2[1]);
if(next_token=="*")
tmp_value *= result2[1];
else
tmp_value /= result2[1];
{
//乘法和除法运算都是左结合的,问题是,这里子表达式的优先级都大于*/
//正常情况下,先算左边的子表达式,压栈,再算右边的,压栈,所以:
this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
this.assember.emitInstruction({type: "Pop", arg: "R0"})
this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token})
//注意,这里assembler的写法与二元函数expr60的类似,不同之处在于parser的控制流程:一个是递归,一个是while
}
//
this.discardLastTokenScanIndex();
//下一次循环:
if( !this.hasMoreTokens() )
return [true, tmp_value];//流已经结束,当前expr40子表达式解析完成(但不代表整体成功)
this.pushTokenScanIndex();
next_token = this.nextToken();
}//end while;
assert( next_token!="*" && next_token!="/");
this.popTokenScanIndex();//回退一个* /的位置,注意,这时可以清除流解析回退栈了(不清除其实也没关系)
return [true,tmp_value];//最顶层的push不用pop了;
}
this.popTokenScanIndex();
throw "Invalid Input: expect expr40 here";
},
//TODO: FIXME 对二元运算符而言,不管其结合性如何、是否满足交换律,优先级高的子表达式先运算!!!
// 但是现在不需要以“编译器”的行为来考虑问题,只是解释器,表达式可以认为没有负作用(赋值语句),则可以直接一边语法解析一边求值
evalExpr20: function(){//二元加减
if( !this.hasMoreTokens() )
return [false,];
//如果解析失败,不用恢复,直接报错
//expr20 := expr40 | expr40 ('+' expr40)* | expr40 ('-' expr40)* //加法可以是右结合的,减法不行, 这里把expr20改为expr40使得加法左结合
// | expr40 '+' expr20 //这么一来,加法将变成右结合的,不对;
//this.pushTokenScanIndex();
var result = this.evalExpr40();
if (!result[0]) {
return [false,];//整个表达式解析失败
}
//循环地向前看一个运算符,或者是+,或者是-
var tmp_value = result[1];
if( !this.hasMoreTokens() )
return [true, tmp_value];
this.pushTokenScanIndex();
var next_token=this.nextToken();
if(next_token==null){
//注意,前面已经有一个expr40解析成功,所以这里即使流已经结束,仍然可以成功返回
this.discardLastTokenScanIndex();
return [true, tmp_value];
}
while(next_token=="+" || next_token=="-"){
if(next_token=="+"){
//右递归之前,需要将当前的R0压栈:
this.assember.emitInstruction({type: "Push", arg: "R0"});
//
var result2 = this.evalExpr40(); //<-- 必须把+运算parse为右递归,否则无法处理 1+2+3 这种情况
if (!result2[0]) {
return false;//整个表达式解析失败
}
tmp_value += result2[1];
{
this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
this.assember.emitInstruction({type: "Pop", arg: "R0"});
this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "+"});
}
}else{//"-"
//右递归之前,需要将当前的R0压栈:
this.assember.emitInstruction({type: "Push", arg: "R0"});
//
var result2 = this.evalExpr40();
if (!result2[0]) {
return false;//整个表达式解析失败
}
tmp_value -= result2[1];
{
this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});
this.assember.emitInstruction({type: "Pop", arg: "R0"});
this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "-"});
}
}
//成功的情况:
this.discardLastTokenScanIndex();
//下一次循环:
if( !this.hasMoreTokens() )
return [true, tmp_value];//流已经结束,当前expr20子表达式解析完成(但不代表整体成功)
this.pushTokenScanIndex();
next_token = this.nextToken();
}//end while
assert( next_token!="+" && next_token!="-"); //非法期刊:1+2-
this.popTokenScanIndex();
return [true, tmp_value];
},
calc: function(){
this.emitValueTokenIfAny();//!!!
//输入全部在tokens里,视为一个正确的表达式输入流,后期也可以考虑错误处理
var result = this.evalExpr();//[success/fail, value]
{
alert(this.assember.toString());
var intercepter = new BytecodeIntercepter();
var interceptEvalResult = intercepter.eval(this.assember.getResult());
alert("字节码解释器求值结果="+interceptEvalResult+" \r\n直接递归下降解释执行结果="+result);
}
//assert( result[0] );
return result[1];
}
}
parser的代码目前同时做2件事情:(1)老的直接在递归下降解析过程中求值,(2)新的通过Assembler生成字节码指令。
测试代码:
alert("7: sin(1+2)+cos(3-4)-tan(5*6)");
var ac = new AdvancedCalculator();
ac.emitButtons([ac.SIN, "(", "1", "+", "2", ")", "+", ac.COS, "(", "3", "-", "4", ")", "-", ac.TAN, "(", "5", "*", "6", ")"]);
var result = ac.calc();
assertEquals(result, Math.sin(1+2)+Math.cos(3-4)-Math.tan(5*6));
成功输出:
MovImm 1 R0
Push R0
MovImm 2 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction +
CallPrimitiveFunction Sin
Push R0
MovImm 3 R0
Push R0
MovImm 4 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction -
CallPrimitiveFunction Cos
Mov R0 R1
Pop R0
CallPrimitiveFunction +
Push R0
MovImm 5 R0
Push R0
MovImm 6 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction *
CallPrimitiveFunction Tan
Mov R0 R1
Pop R0
CallPrimitiveFunction -
字节码解释器求值结果=7.086753510574282
直接递归下降解释执行结果=true,7.086753510574282
下一步工作:编写一个可视化界面?将JS代码格式化一下,然后变量命名再重构一下?加上AST生成和转换成JS运算表达式的支持?