【手搓一个脚本语言】八、用C语言抽象语法树AST解析标识符和有符号数（负数）-优快云博客

本文链接：https://blog.youkuaiyun.com/gwsong52/article/details/144964954

【手搓一个脚本语言】八、用C语言抽象语法树AST解析标识符和有符号数（负数）

接上一篇【手搓一个脚本语言】七、用C语言抽象语法树AST实现一个可交互运行的表达式计算器-优快云博客代码，再进一步改进！！！
目标：正确判断负数和由大小写字母[A-Za-z]组成的标识符！

1、定义标识符类型

Token是记号，可以是运算符、数字或字母组成的标识符Idnetifier！
标识符可以是变量名、函数名、关键字！
定义T_IDNT代表标识符！
在union {…} V;中定义 char *sval; 与T_IDNT对应！

/* define token type */
#define T_OPER  0x21
#define T_NUMB  0x22
#define T_SUBA  0x23
#define T_IDNT  0x24

/* define astnode datatype */
typedef struct _AstNode *AstNode;

/* define token datatype */
typedef struct _Token Token;
struct _Token {
   
  union {
   
       void *nval;    //null
       char  oval[8]; //operator
       long  ival;    //integer
    AstNode  aobj;    //sub astnode
       char *sval; //identifier, as variable name
  } V;
  char type;       //value type
  char fill[7];    //unused
};

/* define astnode data struct */
struct _AstNode {
   
  Token token;
  AstNode left, right;
};

2、释放AST节点

由于sval是字符指针，创建时要分配内存，释放节点时要同时释放掉！

/* free the astnode */
void
astnode_free (AstNode node)
{
   
  if (node != NULL)
    {
   
      astnode_free (node->left);
      astnode_free (node->right);
      if (node->token.type == T_SUBA)      //free sub astnode
	astnode_free (node->token.V.aobj);
      else if (node->token.type == T_IDNT) //free identifier
	free (node->token.V.sval);
      free (node);
    }
}

3、遍历函数

直观遍历

...
        case T_IDNT: PFI("[%s]\n", node->token.V.sval); break;
...

前序、中序、后序遍历

...
        case T_IDNT: PFI( " %s", node->token.V.sval); break;
...

4、解析标识符

标识符由大小字母构成，长度不超过32字节！！！

	case 'A'...'Z': case 'a'...'z':
	  {
   
	    char n = estr[idx+1]; //next char
	    tbuf[tdx] = c; tdx++;
	    if (!((n >= 'A' && n <='Z')||(n >= 'a' && n <='z')))
	      {
   
		Token tk = {
    .type = T_IDNT, .V.sval = strdup(tbuf) };
		AstNode node = astnode_new (tk);
		if (st[lv] == NULL)
		  {
   
		    if (tp[lv] == NULL) tp[lv] = node;
		    else
		      PFI("Info: At index %d, Maybe Syntax error!\n", idx);
		  }
		else
		  {
   
		    AstNode tmp = st[lv]->right;
		    if (tmp == NULL)
		      {
   
			st[lv]->right = node;
		      }
		    else
		      {
   
			while (tmp->right != NULL)
			  tmp = tmp->right;
			tmp->right = node;
		      }
		  }
		PFI("IDNT: %s\n", tbuf);
		memset (tbuf, 0, IDNTSIZE); tdx = 0;
	      }
	  }
	  break;

5、编译测试

在main函数中调用test_parse函数
在test_parse函数中定义表达式：“area + width * height / 2”;
运行输出结果如下：

EXPR: area + width * height / 2
--------------------
IDNT: area
  OP: +
IDNT: width
  OP: *
IDNT: height
  OP: /
NUMB: 2
--------------------
 MID:  area + width * height / 2
--------------------
PREV:  ( + area ( / ( * width height ) 2 ) )
--------------------
POST:  area width height * 2 / +
--------------------
    <[area]
[+]
            <[width]
        <[*]
            >[height]
    >[/]
        >[2]
--------------------

6、检查内存分配情况

==15991== Memcheck, a memory error detector
==15991== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==15991== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==15991== Command: ./gt
==15991== 
EXPR: area + width * height / 2
--------------------
IDNT: area
  OP: +
IDNT: width
  OP: *
IDNT: height
  OP: /
NUMB: 2
--------------------
 MID:  area + width * height / 2
--------------------
PREV:  ( + area ( / ( * width height ) 2 ) )
--------------------
POST:  area width height * 2 / +
--------------------
    <[area]
[+]
            <[width]
        <[*]
            >[height]
    >[/]
        >[2]
--------------------
==15991== 
==15991== HEAP SUMMARY:
==15991==     in use at exit: 0 bytes in 0 blocks
==15991==   total heap usage: 10 allocs, 10 frees, 242 bytes allocated
==15991== 
==15991== All heap blocks were freed -- no leaks are possible
==15991== 
==15991== For counts of detected and suppressed errors, rerun with: -v
==15991== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

7、解析有符号的数字（负数）

在parse_string函数前面定义last_token，在每次解析数字、运算符、标识符后为last_token赋值解析的类型：

char last_token = 0;

在case ‘0’…‘9’:后面修改输出解析得到的数字：

        PFI("NUMB: %ld\n", lt);

判断是否为负数：

	case '+': case '-': case '*': case '/': //operator
	  {
   
	    if (c == '-') //negative?
	      {
   
		if (last_token == 0)
		  {
   
		    char n = estr[idx+1];
		    if (n >= '0' && n <= '9') {
    sign = 1; break; }
		  }
		else if (last_token == T_OPER)
		  {
   
		    char n = estr[idx+1];
		    if (n >= '0' && n <= '9') {
    sign = 1; break; }
		  }
		else
		  {
   } //todo ???
	      }

8、测试负数

EXPR: (-100*-200)-(-300*-20)
--------------------
LPAR: (
NUMB: -100
  OP: *
NUMB: -200
RPAR: )
  OP: -
LPAR: (
NUMB: -300
  OP: *
NUMB: -20
RPAR: )
--------------------
 MID:  ( -100 * -200 ) - ( -300 * -20 )
--------------------
PREV:  ( - ( * -100 -200 ) ( * -300 -20 ) )
--------------------
POST:  -100 -200 * -300 -20 *