【手搓一个脚本语言】八、用C语言抽象语法树AST解析标识符和有符号数(负数)
1、定义标识符类型
- Token是记号,可以是运算符、数字或字母组成的标识符Idnetifier!
- 标识符可以是变量名、函数名、关键字!
- 定义T_IDNT代表标识符!
- 在union {…} V;中定义 char *sval; 与T_IDNT对应!
#define T_OPER 0x21
#define T_NUMB 0x22
#define T_SUBA 0x23
#define T_IDNT 0x24
typedef struct _AstNode *AstNode;
typedef struct _Token Token;
struct _Token {
union {
void *nval;
char oval[8];
long ival;
AstNode aobj;
char *sval;
} V;
char type;
char fill[7];
};
struct _AstNode {
Token token;
AstNode left, right;
};
2、释放AST节点
- 由于sval是字符指针,创建时要分配内存,释放节点时要同时释放掉!
void
astnode_free (AstNode node)
{
if (node != NULL)
{
astnode_free (node->left);
astnode_free (node->right);
if (node->token.type == T_SUBA)
astnode_free (node->token.V.aobj);
else if (node->token.type == T_IDNT)
free (node->token.V.sval);
free (node);
}
}
3、遍历函数
...
case T_IDNT: PFI("[%s]\n", node->token.V.sval); break;
...
...
case T_IDNT: PFI( " %s", node->token.V.sval); break;
...
4、解析标识符
case 'A'...'Z': case 'a'...'z':
{
char n = estr[idx+1];
tbuf[tdx] = c; tdx++;
if (!((n >= 'A' && n <='Z')||(n >= 'a' && n <='z')))
{
Token tk = {
.type = T_IDNT, .V.sval = strdup(tbuf) };
AstNode node = astnode_new (tk);
if (st[lv] == NULL)
{
if (tp[lv] == NULL) tp[lv] = node;
else
PFI("Info: At index %d, Maybe Syntax error!\n", idx);
}
else
{
AstNode tmp = st[lv]->right;
if (tmp == NULL)
{
st[lv]->right = node;
}
else
{
while (tmp->right != NULL)
tmp = tmp->right;
tmp->right = node;
}
}
PFI("IDNT: %s\n", tbuf);
memset (tbuf, 0, IDNTSIZE); tdx = 0;
}
}
break;
5、编译测试
- 在main函数中调用test_parse函数
- 在test_parse函数中定义表达式:“area + width * height / 2”;
- 运行输出结果如下:
EXPR: area + width * height / 2
--------------------
IDNT: area
OP: +
IDNT: width
OP: *
IDNT: height
OP: /
NUMB: 2
--------------------
MID: area + width * height / 2
--------------------
PREV: ( + area ( / ( * width height ) 2 ) )
--------------------
POST: area width height * 2 / +
--------------------
<[area]
[+]
<[width]
<[*]
>[height]
>[/]
>[2]
--------------------
6、检查内存分配情况
==15991== Memcheck, a memory error detector
==15991== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==15991== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==15991== Command: ./gt
==15991==
EXPR: area + width * height / 2
--------------------
IDNT: area
OP: +
IDNT: width
OP: *
IDNT: height
OP: /
NUMB: 2
--------------------
MID: area + width * height / 2
--------------------
PREV: ( + area ( / ( * width height ) 2 ) )
--------------------
POST: area width height * 2 / +
--------------------
<[area]
[+]
<[width]
<[*]
>[height]
>[/]
>[2]
--------------------
==15991==
==15991== HEAP SUMMARY:
==15991== in use at exit: 0 bytes in 0 blocks
==15991== total heap usage: 10 allocs, 10 frees, 242 bytes allocated
==15991==
==15991== All heap blocks were freed -- no leaks are possible
==15991==
==15991== For counts of detected and suppressed errors, rerun with: -v
==15991== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
7、解析有符号的数字(负数)
- 在parse_string函数前面定义last_token,在每次解析数字、运算符、标识符后为last_token赋值解析的类型:
char last_token = 0;
- 在case ‘0’…‘9’:后面修改输出解析得到的数字:
PFI("NUMB: %ld\n", lt);
case '+': case '-': case '*': case '/':
{
if (c == '-')
{
if (last_token == 0)
{
char n = estr[idx+1];
if (n >= '0' && n <= '9') {
sign = 1; break; }
}
else if (last_token == T_OPER)
{
char n = estr[idx+1];
if (n >= '0' && n <= '9') {
sign = 1; break; }
}
else
{
}
}
8、测试负数
EXPR: (-100*-200)-(-300*-20)
--------------------
LPAR: (
NUMB: -100
OP: *
NUMB: -200
RPAR: )
OP: -
LPAR: (
NUMB: -300
OP: *
NUMB: -20
RPAR: )
--------------------
MID: ( -100 * -200 ) - ( -300 * -20 )
--------------------
PREV: ( - ( * -100 -200 ) ( * -300 -20 ) )
--------------------
POST: -100 -200 * -300 -20 *