【手搓一个脚本语言】八、用C语言抽象语法树AST解析标识符和有符号数(负数)

【手搓一个脚本语言】八、用C语言抽象语法树AST解析标识符和有符号数(负数)

1、定义标识符类型

  • Token是记号,可以是运算符、数字或字母组成的标识符Idnetifier!
  • 标识符可以是变量名、函数名、关键字!
  • 定义T_IDNT代表标识符!
  • 在union {…} V;中定义 char *sval; 与T_IDNT对应!
/* define token type */
#define T_OPER  0x21
#define T_NUMB  0x22
#define T_SUBA  0x23
#define T_IDNT  0x24

/* define astnode datatype */
typedef struct _AstNode *AstNode;

/* define token datatype */
typedef struct _Token Token;
struct _Token {
   
  union {
   
       void *nval;    //null
       char  oval[8]; //operator
       long  ival;    //integer
    AstNode  aobj;    //sub astnode
       char *sval; //identifier, as variable name
  } V;
  char type;       //value type
  char fill[7];    //unused
};

/* define astnode data struct */
struct _AstNode {
   
  Token token;
  AstNode left, right;
};

2、释放AST节点

  • 由于sval是字符指针,创建时要分配内存,释放节点时要同时释放掉!
/* free the astnode */
void
astnode_free (AstNode node)
{
   
  if (node != NULL)
    {
   
      astnode_free (node->left);
      astnode_free (node->right);
      if (node->token.type == T_SUBA)      //free sub astnode
	astnode_free (node->token.V.aobj);
      else if (node->token.type == T_IDNT) //free identifier
	free (node->token.V.sval);
      free (node);
    }
}

3、遍历函数

  • 直观遍历
...
        case T_IDNT: PFI("[%s]\n", node->token.V.sval); break;
...
  • 前序、中序、后序遍历
...
        case T_IDNT: PFI( " %s", node->token.V.sval); break;
...

4、解析标识符

  • 标识符由大小字母构成,长度不超过32字节!!!
	case 'A'...'Z': case 'a'...'z':
	  {
   
	    char n = estr[idx+1]; //next char
	    tbuf[tdx] = c; tdx++;
	    if (!((n >= 'A' && n <='Z')||(n >= 'a' && n <='z')))
	      {
   
		Token tk = {
    .type = T_IDNT, .V.sval = strdup(tbuf) };
		AstNode node = astnode_new (tk);
		if (st[lv] == NULL)
		  {
   
		    if (tp[lv] == NULL) tp[lv] = node;
		    else
		      PFI("Info: At index %d, Maybe Syntax error!\n", idx);
		  }
		else
		  {
   
		    AstNode tmp = st[lv]->right;
		    if (tmp == NULL)
		      {
   
			st[lv]->right = node;
		      }
		    else
		      {
   
			while (tmp->right != NULL)
			  tmp = tmp->right;
			tmp->right = node;
		      }
		  }
		PFI("IDNT: %s\n", tbuf);
		memset (tbuf, 0, IDNTSIZE); tdx = 0;
	      }
	  }
	  break;

5、编译测试

  • 在main函数中调用test_parse函数
  • 在test_parse函数中定义表达式:“area + width * height / 2”;
  • 运行输出结果如下:
EXPR: area + width * height / 2
--------------------
IDNT: area
  OP: +
IDNT: width
  OP: *
IDNT: height
  OP: /
NUMB: 2
--------------------
 MID:  area + width * height / 2
--------------------
PREV:  ( + area ( / ( * width height ) 2 ) )
--------------------
POST:  area width height * 2 / +
--------------------
    <[area]
[+]
            <[width]
        <[*]
            >[height]
    >[/]
        >[2]
--------------------

6、检查内存分配情况

==15991== Memcheck, a memory error detector
==15991== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==15991== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==15991== Command: ./gt
==15991== 
EXPR: area + width * height / 2
--------------------
IDNT: area
  OP: +
IDNT: width
  OP: *
IDNT: height
  OP: /
NUMB: 2
--------------------
 MID:  area + width * height / 2
--------------------
PREV:  ( + area ( / ( * width height ) 2 ) )
--------------------
POST:  area width height * 2 / +
--------------------
    <[area]
[+]
            <[width]
        <[*]
            >[height]
    >[/]
        >[2]
--------------------
==15991== 
==15991== HEAP SUMMARY:
==15991==     in use at exit: 0 bytes in 0 blocks
==15991==   total heap usage: 10 allocs, 10 frees, 242 bytes allocated
==15991== 
==15991== All heap blocks were freed -- no leaks are possible
==15991== 
==15991== For counts of detected and suppressed errors, rerun with: -v
==15991== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

7、解析有符号的数字(负数)

  • 在parse_string函数前面定义last_token,在每次解析数字、运算符、标识符后为last_token赋值解析的类型:
char last_token = 0;
  • 在case ‘0’…‘9’:后面修改输出解析得到的数字:
        PFI("NUMB: %ld\n", lt);
  • 判断是否为负数:
	case '+': case '-': case '*': case '/': //operator
	  {
   
	    if (c == '-') //negative?
	      {
   
		if (last_token == 0)
		  {
   
		    char n = estr[idx+1];
		    if (n >= '0' && n <= '9') {
    sign = 1; break; }
		  }
		else if (last_token == T_OPER)
		  {
   
		    char n = estr[idx+1];
		    if (n >= '0' && n <= '9') {
    sign = 1; break; }
		  }
		else
		  {
   } //todo ???
	      }

8、测试负数

EXPR: (-100*-200)-(-300*-20)
--------------------
LPAR: (
NUMB: -100
  OP: *
NUMB: -200
RPAR: )
  OP: -
LPAR: (
NUMB: -300
  OP: *
NUMB: -20
RPAR: )
--------------------
 MID:  ( -100 * -200 ) - ( -300 * -20 )
--------------------
PREV:  ( - ( * -100 -200 ) ( * -300 -20 ) )
--------------------
POST:  -100 -200 * -300 -20 * 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值