自己动手写数据库系统:实现一个小型SQL解释器(上)

本文介绍了如何将一个用于C语言的词法解析器改造为SQL解释器,主要涉及修改词法解析器以支持SQL关键字,如SELECT、FROM、WHERE等,并展示了如何解析TERM、PREDICATE和QUERY等SQL语句的部分结构。通过示例代码和测试,演示了词法解析器和初步的语法解析器的工作流程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

数据库系统有一个核心部件,那就是SQL解释器。用过mySQL的同学都知道,我们需要写一系列由SQL语言组成的代码来驱动数据库的运行,由此它就必须要有一个SQL语言解释器来解读SQL代码,然后根据代码的意图来驱动数据库执行相应的操作,本节我们就完成一个简单的SQL解释器。

解释器的原理基于编译原理,我在B站上专门有视频解释编译原理算法,因此我在这里不再赘述。实现一个解释器的首要步骤就是完成一个词法解析器,我在B站编译原理视频中实现过一个小型编译器(dragon-compiler),因此我将其对应的词法解析器直接拿过来稍作改动,让其能对SQL代码进行词法解析。首先我们把其中的lexer部分直接拷贝到我们现在的项目,打开其中的token.go文件,我们首先修改其中token的定义,将SQL语言中关键字的定义添加进去,然后去除与 SQL无关的定义,修改后代码如下:

package lexer

type Tag uint32

const (
	//AND 对应SQL关键字
	AND Tag = iota + 256
	//BREAK
	//DO
	EQ
	FALSE
	GE
	ID
	//IF
	//ELSE
	INDEX
	LE
	INT
	FLOAT
	MINUS
	PLUS
	NE
	NUM
	//OR
	REAL
	//TRUE
	//WHILE
	LEFT_BRACE    // "{"
	RIGHT_BRACE   // "}"
	LEFT_BRACKET  //"("
	RIGHT_BRACKET //")"
	AND_OPERATOR
	OR_OPERATOR
	ASSIGN_OPERATOR
	NEGATE_OPERATOR
	LESS_OPERATOR
	GREATER_OPERATOR
	BASIC //对应int , float, bool, char 等类型定义
	//TEMP  //对应中间代码的临时寄存器变量
	//SEMICOLON

	//新增SQL对应关键字
	SELECT
	FROM
	WHERE
	INSERT
	INTO
	VALUES
	DELETE
	UPDATE
	SET
	CREATE
	TABLE
	INT
	VARCHAR
	VIEW
	AS
	INDEX
	ON
	COMMA
	STRING
	//SQL关键字定义结束
	EOF

	ERROR
)

var token_map = make(map[Tag]string)

func init() {
   
   
	//初始化SQL关键字对应字符串
	token_map[AND] = "AND"
	token_map[SELECT] = "SELECT"
	token_map[WHERE] = "where"
	token_map[INSERT] = "INSERT"
	token_map[INTO] = "INTO"
	token_map[VALUES] = "VALUES"
	token_map[DELETE] = "DELETE"
	token_map[UPDATE] = "UPDATE"
	token_map[SET] = "SET"
	token_map[CREATE] = "CREATE"
	token_map[TABLE] = "TABLE"
	token_map[INT] = "INT"
	token_map[VARCHAR] = "VARCHAR"
	token_map[VIEW] = "VIEW"
	token_map[AS] = "AS"
	token_map[INDEX] = "INDEX"
	token_MAP[ON] = "ON"
    token_map[COMMA] = ","
	token_map[BASIC] = "BASIC"
	//token_map[DO] = "do"
	//token_map[ELSE] = "else"
	token_map[EQ] = "EQ"
	token_map[FALSE] = "FALSE"
	token_map[GE] = "GE"
	token_map[ID] = "ID"
	//token_map[IF] = "if"
	token_map[INT] = "int"
	token_map[FLOAT] = "float"
	
	token_map[LE] = "<="
	token_map[MINUS] = "-"
	token_map[PLUS] = "+"
	token_map[NE] = "!="
	token_map[NUM] = "NUM"
	token_map[OR] = "OR"
	token_map[REAL] = "REAL"
	//token_map[TEMP] = "t"
	token_map[TRUE] = "TRUE"
	//token_map[WHILE] = "while"
	//token_map[DO] = "do"
	//token_map[BREAK] = "break"
	token_map[AND_OPERATOR] = "&"
	token_map[OR_OPERATOR] = "|"
	token_map[ASSIGN_OPERATOR] = "="
	token_map[NEGATE_OPERATOR] = "!"
	token_map[LESS_OPERATOR] = "<"
	token_map[GREATER_OPERATOR] = ">"
	token_map[LEFT_BRACE] = "{"
	token_map[RIGHT_BRACE] = "}"
	token_map[LEFT_BRACKET] = "("
	token_map[RIGHT_BRACKET] = ")"
	token_map[EOF] = "EOF"
	token_map[ERROR] = "ERROR"
	//token_map[SEMICOLON] = ";"

}

type Token struct {
   
   
	lexeme string
	Tag    Tag
}

func (t *Token) ToString() string {
   
   
	if t.lexeme == "" {
   
   
		return token_map[t.Tag]
	}

	return t.lexeme
}

func NewToken(tag Tag) Token {
   
   
	return Token{
   
   
		lexeme: "",
		Tag:    tag,
	}
}

func NewTokenWithString(tag Tag, lexeme string) *Token {
   
   
	return &Token{
   
   
		lexeme: lexeme,
		Tag:    tag,
	}
}

在上面代码修改中,我们把原来C语言的关键字去掉,增加了一系列SQL语言对应的关键字。打开文件word_token.go,做如下修改:

package lexer

type Word struct {
   
   
	lexeme string
	Tag    Token
}

func NewWordToken(s string, tag Tag) Word {
   
   
	return Word{
   
   
		lexeme: s,
		Tag:    NewToken(tag),
	}
}

func (w *Word) ToString() string {
   
   
	return w.lexeme
}

func GetKeyWords() []Word {
   
   
	key_words := []Word{
   
   }
	key_words = append(key_words, NewWordToken("||", OR))
	key_words = append(key_words, NewWordToken("==", EQ))
	key_words = append(key_words, NewWordToken("!=", NE))
	key_words = append(key_words, NewWordToken("<=", LE))
	key_words = append(key_words, NewWordToken(">=", GE))
	//增加SQL语言对应关键字
	key_words = append(key_words, NewWordToken("AND", AND))
	key_words = append(key_words, NewWordToken("SELECT", SELECT))
	key_words = append(key_words, NewWordToken("FROM", FROM))
	key_words = append(key_words, NewWordToken("INSERT", INSERT))
	key_words = append(key_words, NewWordToken("INTO", INTO))
	key_words = append(key_words, NewWordToken("VALUES", VALUES))
	key_words = append(key_words, NewWordToken("DELETE", DELETE))
	key_words = append(key_words, NewWordToken("UPDATE", UPDATE))
	key_words = append(key_words, NewWordToken("SET", SET))
	key_words = append(key_words, NewWordToken("CREATE", CREATE))
	key_words = append(key_words, NewWordToken("TABLE", TABLE))
	key_words = append(key_words, NewWordToken("INT", INT))
	key_words = append(key_words, NewWordToken("VARCHAR", VARCHAR))
	key_words = append(key_words, NewWordToken("VIEW", VIEW))
	key_words = append(key_words, NewWordToken("AS", AS))
	key_words = append(key_words, NewWordToken("INDEX", INDEX))
	key_words = append(key_words, NewWordToken("ON", ON))

	//key_words = append(key_words, NewWordToken("minus", MINUS))
	//key_words = append(key_words, NewWordToken("true", TRUE))
	//key_words = append(key_words, NewWordToken("false", FALSE))
	//key_words = append(key_words, NewWordToken("if", IF))
	//key_words = append(key_words, NewWordToken("else", ELSE))
	//增加while, do关键字
	//key_words = append(key_words, NewWordToken("while", WHILE))
	//key_words = append(key_words, NewWordToken("do", DO))
	//key_words = append(key_words, NewWordToken("break", BREAK))
	//添加类型定义
	//key_w
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值