数据库系统有一个核心部件,那就是SQL解释器。用过mySQL的同学都知道,我们需要写一系列由SQL语言组成的代码来驱动数据库的运行,由此它就必须要有一个SQL语言解释器来解读SQL代码,然后根据代码的意图来驱动数据库执行相应的操作,本节我们就完成一个简单的SQL解释器。
解释器的原理基于编译原理,我在B站上专门有视频解释编译原理算法,因此我在这里不再赘述。实现一个解释器的首要步骤就是完成一个词法解析器,我在B站编译原理视频中实现过一个小型编译器(dragon-compiler),因此我将其对应的词法解析器直接拿过来稍作改动,让其能对SQL代码进行词法解析。首先我们把其中的lexer部分直接拷贝到我们现在的项目,打开其中的token.go文件,我们首先修改其中token的定义,将SQL语言中关键字的定义添加进去,然后去除与 SQL无关的定义,修改后代码如下:
package lexer
type Tag uint32
const (
//AND 对应SQL关键字
AND Tag = iota + 256
//BREAK
//DO
EQ
FALSE
GE
ID
//IF
//ELSE
INDEX
LE
INT
FLOAT
MINUS
PLUS
NE
NUM
//OR
REAL
//TRUE
//WHILE
LEFT_BRACE // "{"
RIGHT_BRACE // "}"
LEFT_BRACKET //"("
RIGHT_BRACKET //")"
AND_OPERATOR
OR_OPERATOR
ASSIGN_OPERATOR
NEGATE_OPERATOR
LESS_OPERATOR
GREATER_OPERATOR
BASIC //对应int , float, bool, char 等类型定义
//TEMP //对应中间代码的临时寄存器变量
//SEMICOLON
//新增SQL对应关键字
SELECT
FROM
WHERE
INSERT
INTO
VALUES
DELETE
UPDATE
SET
CREATE
TABLE
INT
VARCHAR
VIEW
AS
INDEX
ON
COMMA
STRING
//SQL关键字定义结束
EOF
ERROR
)
var token_map = make(map[Tag]string)
func init() {
//初始化SQL关键字对应字符串
token_map[AND] = "AND"
token_map[SELECT] = "SELECT"
token_map[WHERE] = "where"
token_map[INSERT] = "INSERT"
token_map[INTO] = "INTO"
token_map[VALUES] = "VALUES"
token_map[DELETE] = "DELETE"
token_map[UPDATE] = "UPDATE"
token_map[SET] = "SET"
token_map[CREATE] = "CREATE"
token_map[TABLE] = "TABLE"
token_map[INT] = "INT"
token_map[VARCHAR] = "VARCHAR"
token_map[VIEW] = "VIEW"
token_map[AS] = "AS"
token_map[INDEX] = "INDEX"
token_MAP[ON] = "ON"
token_map[COMMA] = ","
token_map[BASIC] = "BASIC"
//token_map[DO] = "do"
//token_map[ELSE] = "else"
token_map[EQ] = "EQ"
token_map[FALSE] = "FALSE"
token_map[GE] = "GE"
token_map[ID] = "ID"
//token_map[IF] = "if"
token_map[INT] = "int"
token_map[FLOAT] = "float"
token_map[LE] = "<="
token_map[MINUS] = "-"
token_map[PLUS] = "+"
token_map[NE] = "!="
token_map[NUM] = "NUM"
token_map[OR] = "OR"
token_map[REAL] = "REAL"
//token_map[TEMP] = "t"
token_map[TRUE] = "TRUE"
//token_map[WHILE] = "while"
//token_map[DO] = "do"
//token_map[BREAK] = "break"
token_map[AND_OPERATOR] = "&"
token_map[OR_OPERATOR] = "|"
token_map[ASSIGN_OPERATOR] = "="
token_map[NEGATE_OPERATOR] = "!"
token_map[LESS_OPERATOR] = "<"
token_map[GREATER_OPERATOR] = ">"
token_map[LEFT_BRACE] = "{"
token_map[RIGHT_BRACE] = "}"
token_map[LEFT_BRACKET] = "("
token_map[RIGHT_BRACKET] = ")"
token_map[EOF] = "EOF"
token_map[ERROR] = "ERROR"
//token_map[SEMICOLON] = ";"
}
type Token struct {
lexeme string
Tag Tag
}
func (t *Token) ToString() string {
if t.lexeme == "" {
return token_map[t.Tag]
}
return t.lexeme
}
func NewToken(tag Tag) Token {
return Token{
lexeme: "",
Tag: tag,
}
}
func NewTokenWithString(tag Tag, lexeme string) *Token {
return &Token{
lexeme: lexeme,
Tag: tag,
}
}
在上面代码修改中,我们把原来C语言的关键字去掉,增加了一系列SQL语言对应的关键字。打开文件word_token.go,做如下修改:
package lexer
type Word struct {
lexeme string
Tag Token
}
func NewWordToken(s string, tag Tag) Word {
return Word{
lexeme: s,
Tag: NewToken(tag),
}
}
func (w *Word) ToString() string {
return w.lexeme
}
func GetKeyWords() []Word {
key_words := []Word{
}
key_words = append(key_words, NewWordToken("||", OR))
key_words = append(key_words, NewWordToken("==", EQ))
key_words = append(key_words, NewWordToken("!=", NE))
key_words = append(key_words, NewWordToken("<=", LE))
key_words = append(key_words, NewWordToken(">=", GE))
//增加SQL语言对应关键字
key_words = append(key_words, NewWordToken("AND", AND))
key_words = append(key_words, NewWordToken("SELECT", SELECT))
key_words = append(key_words, NewWordToken("FROM", FROM))
key_words = append(key_words, NewWordToken("INSERT", INSERT))
key_words = append(key_words, NewWordToken("INTO", INTO))
key_words = append(key_words, NewWordToken("VALUES", VALUES))
key_words = append(key_words, NewWordToken("DELETE", DELETE))
key_words = append(key_words, NewWordToken("UPDATE", UPDATE))
key_words = append(key_words, NewWordToken("SET", SET))
key_words = append(key_words, NewWordToken("CREATE", CREATE))
key_words = append(key_words, NewWordToken("TABLE", TABLE))
key_words = append(key_words, NewWordToken("INT", INT))
key_words = append(key_words, NewWordToken("VARCHAR", VARCHAR))
key_words = append(key_words, NewWordToken("VIEW", VIEW))
key_words = append(key_words, NewWordToken("AS", AS))
key_words = append(key_words, NewWordToken("INDEX", INDEX))
key_words = append(key_words, NewWordToken("ON", ON))
//key_words = append(key_words, NewWordToken("minus", MINUS))
//key_words = append(key_words, NewWordToken("true", TRUE))
//key_words = append(key_words, NewWordToken("false", FALSE))
//key_words = append(key_words, NewWordToken("if", IF))
//key_words = append(key_words, NewWordToken("else", ELSE))
//增加while, do关键字
//key_words = append(key_words, NewWordToken("while", WHILE))
//key_words = append(key_words, NewWordToken("do", DO))
//key_words = append(key_words, NewWordToken("break", BREAK))
//添加类型定义
//key_w