flex
yytext是匹配的字符串,yyleng是该字符串的长度
- scan的规则:
If action code returns, scanning resumes on the next call to yylex(); if it doesn’t return, scanning resumes immediately.
统计代码行数,字符数,数字字符数
%{
#include <stdio.h> /* 这段代码会一字不差地拷贝到生成的代码中 */
#include <math.h>
int num_lines = 0, num_chars = 0, digit_num = 0;
%}
%%
\n ++num_lines;
[0-9] ++digit_num; ++num_chars;
. ++num_chars;
%%
main() {
yylex();
printf("# of lines = %d, # of chars = %d, # of digits = %d\n", num_lines, num_chars, digit_num);
}
pascal语言的flex文件
pascal.lex
%{
#include <math.h>
%}
DIGIT [0-9] /* name definition */
ID [a-z][a-z0-9]*
/* 先匹配前面的模式,匹配到的字符串存储在yytext中 */
%%
{DIGIT}+ {
printf("An integer: %s (%d)\n", yytext, atoi(yytext));
}
{DIGIT}+"."{DIGIT}* { /* 使用花括号展开name definition */
printf("A float: %s (%g)\n", yytext, atof(yytext));
}
if|then|begin|end|procedure|function {
printf("A keyword: %s\n", yytext);
}
{ID} printf("An identifier: %s\n", yytext);
"+"|"-"|"*"|"/" printf("An operator: %s\n", yytext);
"{"[^}\n]*"}"
[ \t\n]+
. printf("Unrecognized character: %s\n", yytext);
%%
main()
{
yyin = stdin;
yylex();
}
正则表达式
()表示一个表达式的匹配,如(abc)需要遇到字符串abc时才匹配,遇到a不会匹配
[]表示单个字符的匹配,如[abc]遇到其中任何一个字母就匹配
匹配以abc开头的字符串,(^abc)
匹配以abc结尾的字符串,(abc$)
[abc] A single character of: a, b, or c
[^abc] Any single character except: a, b, or c
[a-z] Any single character in the range a-z
[a-zA-Z] Any single character in the range a-z or A-Z
^ Start of line
$ End of line
\A Start of string
\z End of string
. Any single character
\s Any whitespace character
\S Any non-whitespace character
\d Any digit
\D Any non-digit
\w Any word character (letter, number, underscore) 匹配字母、数字和下划线
\W Any non-word character
\b Any word boundary
(...) Capture everything enclosed
(a|b) a or b
a? Zero or one of a
a* Zero or more of a
a+ One or more of a
a{3} Exactly 3 of a
a{3,} 3 or more of a
a{3,6} Between 3 and 6 of a
Start Condition
INITIAL自动定义,为%s类型(匹配任何规则),%x 则匹配对应的规则,如声明%x COMMENT,有<COMMENT>{str},则在已经BEGIN(COMMENT)的情况下才匹配str
%{
#include <unistd.h>
%}
%x COMMENT /* 只配对<COMMENT>的规则 */
%%
"/*" BEGIN(COMMENT);
"username" printf("username\n");
<COMMENT>[^*\n]*
<COMMENT>"*/" { printf("INITIAL\n"); BEGIN(INITIAL); }
. printf("strange char\n");
%%
main() {
yylex();
}
bison
union声明token类型,token的类型在声明时用<>括起来,否则默认为整型,比如下面的例子中NUMBER的类型是d
%{
#include <stdio.h>
#include <stdlib.h>
#include "ACalculator.h"
%}
%union {
struct ast *a;
double d;
}
%token <d> NUMBER
%token EOL
%type <a> exp factor term
Calculator Practice
ACalculator.h
extern int yylineno;
void yyerror(char* s, ...);
struct ast {
int nodetype;
struct ast *l;
struct ast *r;
};
struct numval
{
/* data */
int nodetype;
double number;
};
struct ast* newast(int nodetype, struct ast *l, struct ast *r);
struct ast* newnum(double d);
double eval(struct ast*);
void treefreee(struct ast*);
ACalculator.c
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "ACalculator.h"
struct ast* newast(int nodetype, struct ast* l, struct ast* r) {
struct ast* a = (struct ast*)malloc(sizeof(struct ast));
if (!a) {
yyerror("out of space\n");
exit(0);
}
a->nodetype = nodetype;
a->l = l;
a->r = r;
return a;
}
struct ast* newnum(double d) {
struct numval* a = (struct numval*)malloc(sizeof(struct numval));
if (!a) {
yyerror("out of space");
exit(0);
}
a->nodetype = 'K';
a->number = d;
return (struct ast*)a; // 注意返回的类型
}
double
eval(struct ast* a) {
double v;
switch (a->nodetype)
{
case 'K':
v = ((struct numval*)a)->number;
break;
case '+':
v = eval(a->l) + eval(a->r);
break;
case '-':
v = eval(a->l) - eval(a->r);
break;
case '*':
v = eval(a->l) * eval(a->r);
break;
case '/':
v = eval(a->l) / eval(a->r);
break;
case '|': // abs
v = eval(a->l);
if (v < 0)
v = -v;
break;
case 'M':
v = -eval(a->l);
break;
default:
printf("internal errror: bad node %c\n", a->nodetype);
break;
}
return v;
}
void
treefree(struct ast* a) {
switch (a->nodetype)
{
case '+':
case '-':
case '*':
case '/':
treefree(a->r);
case '|':
case 'M':
treefree(a->l);
case 'K':
free(a);
break;
default:
break;
}
}
void
yyerror(char* s, ...) {
va_list ap;
va_start(ap, s);
fprintf(stderr, "%d: error: ", yylineno);
vfprintf(stderr, s, ap);
fprintf(stderr, "\n");
}
int main() {
printf("> ");
return yyparse();
}
ACalculator.flex
%option noyywrap nodefault yylineno
%{
#include "ACalculator.h"
#include "Parser.tab.h"
%}
EXP ([Ee][-+]?[0-9]+)
%%
"+" |
"-" |
"*" |
"/" |
"|" |
"(" |
")" { return yytext[0]; } // 注意这种写法
[0-9]+"."[0-9]*{EXP}? |
"."?[0-9]+{EXP}? {yylval.d = atof(yytext); return NUMBER; }
\n { return EOL; }
"//".*
[ \t] {}
. { yyerror("Mystery character %c\n", *yytext); }
%%
Parser.y
%{
#include <stdio.h>
#include <stdlib.h>
#include "ACalculator.h"
%}
%union {
struct ast *a;
double d;
}
%token <d> NUMBER
%token EOL
%type <a> exp factor term
%%
calclist:
| calclist exp EOL {
printf("= %4.4g\n", eval($2));
treefree($2);
printf("> ");
}
| calclist EOL { printf("> "); }
;
exp: factor
| exp '+' factor { $$ = newast('+', $1, $3); }
| exp '-' factor { $$ = newast('-', $1, $3); }
;
factor: term
| factor '*' term { $$ = newast('*', $1, $3); }
| factor '/' term { $$ = newast('/', $1, $3); }
;
term: NUMBER { $$ = newnum($1); }
| '|' term { $$ = newast('|', $2, NULL); }
| '(' exp ')' { $$ = $2; }
| '-' term { $$ = newast('M', $2, NULL); }
;
%%
Makefile
all:
bison -d Parser.y
flex ACalculator.flex
gcc lex.yy.c ACalculator.c Parser.tab.c -o ACalculator -lfl
- 如何处理冲突:
定义运算符的优先级,优先级由上往下逐级递增,%left声明该运算符有左结合性,%nonassoc通常用于一元运算符
%left '+' '-'
%left '*' '/'
%nonassoc '|' UMINUS
- %token和%type的区别
%type用于非终结符和带有属性的终结符,%token用于