项目概述
这是一个用Basic语言实现的自举编译器项目,支持解释执行和编译执行两种模式。项目旨在展示Basic语言的强大能力,同时向这门历史悠久且影响深远的编程语言致敬。
项目结构
BasicCompiler/
├── src/ # 源代码目录
│ ├── core/ # 核心组件
│ │ ├── lexer.bas # 词法分析器
│ │ ├── parser.bas # 语法分析器
│ │ ├── ast.bas # 抽象语法树定义
│ │ ├── interpreter.bas # 解释执行引擎
│ │ └── codegen.bas # 代码生成器
│ ├── frontend/ # 前端界面
│ │ ├── repl.bas # 交互式解释器
│ │ └── compiler.bas # 编译器主程序
│ └── utils/ # 工具函数
│ ├── error.bas # 错误处理
│ ├── symbols.bas # 符号表管理
│ └── memory.bas # 内存管理
├── lib/ # 库文件目录
│ └── runtime.c # C运行时库
├── examples/ # 示例代码
│ ├── hello.bas # Hello World示例
│ ├── fibonacci.bas # 斐波那契数列示例
│ └── selfhost.bas # 自举测试示例
└── docs/ # 文档
├── design.md # 设计文档
└── manual.md # 用户手册
核心组件详细实现
词法分析器 (lexer.bas)
' 令牌类型枚举
ENUM TokenType
TOKEN_EOF
TOKEN_IDENTIFIER
TOKEN_NUMBER
TOKEN_STRING
TOKEN_KEYWORD
TOKEN_OPERATOR
TOKEN_SYMBOL
END ENUM
' 令牌结构
TYPE Token
type AS TokenType
value AS STRING
line AS INTEGER
column AS INTEGER
END TYPE
' 关键字列表
DIM SHARED keywords(20) AS STRING
keywords(0) = "PRINT"
keywords(1) = "IF"
keywords(2) = "THEN"
keywords(3) = "ELSE"
keywords(4) = "END"
keywords(5) = "FOR"
keywords(6) = "TO"
keywords(7) = "STEP"
keywords(8) = "NEXT"
keywords(9) = "WHILE"
keywords(10) = "WEND"
keywords(11) = "DO"
keywords(12) = "LOOP"
keywords(13) = "FUNCTION"
keywords(14) = "SUB"
keywords(15) = "DIM"
keywords(16) = "AS"
keywords(17) = "INTEGER"
keywords(18) = "STRING"
keywords(19) = "BOOLEAN"
keywords(20) = "RETURN"
' 词法分析器主函数
FUNCTION Tokenize(source AS STRING, tokens() AS Token) AS INTEGER
DIM position AS INTEGER, line AS INTEGER, column AS INTEGER
DIM currentChar AS STRING, currentToken AS STRING
DIM tokenCount AS INTEGER, inString AS BOOLEAN, stringQuote AS STRING
position = 1
line = 1
column = 1
tokenCount = 0
inString = FALSE
WHILE position <= LEN(source)
currentChar = MID$(source, position, 1)
' 处理字符串
IF inString THEN
IF currentChar = stringQuote THEN
inString = FALSE
AddToken tokens(), tokenCount, TOKEN_STRING, currentToken, line, column - LEN(currentToken)
currentToken = ""
ELSE
currentToken = currentToken + currentChar
END IF
position = position + 1
column = column + 1
CONTINUE WHILE
END IF
' 跳过空白字符
IF currentChar = " " OR currentChar = CHR$(9) THEN
position = position + 1
column = column + 1
CONTINUE WHILE
END IF
' 处理换行
IF currentChar = CHR$(10) OR currentChar = CHR$(13) THEN
IF currentChar = CHR$(13) AND position < LEN(source) AND MID$(source, position + 1, 1) = CHR$(10) THEN
position = position + 1
END IF
position = position + 1
line = line + 1
column = 1
CONTINUE WHILE
END IF
' 开始字符串
IF currentChar = CHR$(34) OR currentChar = "'" THEN
inString = TRUE
stringQuote = currentChar
position = position + 1
column = column + 1
CONTINUE WHILE
END IF
' 识别标识符和关键字
IF IsAlpha(currentChar) THEN
currentToken = currentChar
position = position + 1
column = column + 1
WHILE position <= LEN(source)
currentChar = MID$(source, position, 1)
IF IsAlpha(currentChar) OR IsDigit(currentChar) OR currentChar = "_" THEN
currentToken = currentToken + currentChar
position = position + 1
column = column + 1
ELSE
EXIT WHILE
END IF
WEND
' 检查是否为关键字
DIM isKeyword AS BOOLEAN
isKeyword = FALSE
FOR i = 0 TO UBOUND(keywords)
IF UCASE$(currentToken) = keywords(i) THEN
AddToken tokens(), tokenCount, TOKEN_KEYWORD, currentToken, line, column - LEN(currentToken)
isKeyword = TRUE
EXIT FOR
END IF
NEXT i
IF NOT isKeyword THEN
AddToken tokens(), tokenCount, TOKEN_IDENTIFIER, currentToken, line, column - LEN(currentToken)
END IF
currentToken = ""
CONTINUE WHILE
END IF
' 识别数字
IF IsDigit(currentChar) THEN
currentToken = currentChar
position = position + 1
column = column + 1
DIM hasDecimal AS BOOLEAN
hasDecimal = FALSE
WHILE position <= LEN(source)
currentChar = MID$(source, position, 1)
IF IsDigit(currentChar) THEN
currentToken = currentToken + currentChar
position = position + 1
column = column + 1
ELSEIF currentChar = "." AND NOT hasDecimal THEN
currentToken = currentToken + currentChar
position = position + 1
column = column + 1
hasDecimal = TRUE
ELSE
EXIT WHILE
END IF
WEND
AddToken tokens(), tokenCount, TOKEN_NUMBER, currentToken, line, column - LEN(currentToken)
currentToken = ""
CONTINUE WHILE
END IF
' 识别运算符和符号
DIM symbol AS STRING
symbol = currentChar
' 检查多字符运算符
IF position < LEN(source) THEN
DIM nextChar AS STRING
nextChar = MID$(source, position + 1, 1)
DIM twoCharOp AS STRING
twoCharOp = currentChar + nextChar
IF twoCharOp = ">=" OR twoCharOp = "<=" OR twoCharOp = "<>" OR twoCharOp = "==" THEN
symbol = twoCharOp
position = position + 1
column = column + 1
END IF
END IF
AddToken tokens(), tokenCount, TOKEN_OPERATOR, symbol, line, column
position = position + 1
column = column + 1
WEND
' 添加EOF令牌
AddToken tokens(), tokenCount, TOKEN_EOF, "", line, column
Tokenize = tokenCount
END FUNCTION
' 辅助函数:添加令牌
SUB AddToken(tokens() AS Token, BYREF count AS INTEGER, type AS TokenType, value AS STRING, line AS INTEGER, column AS INTEGER)
REDIM PRESERVE tokens(count)
tokens(count).type = type
tokens(count).value = value
tokens(count).line = line
tokens(count).column = column
count = count + 1
END SUB
' 辅助函数:检查字符是否为字母
FUNCTION IsAlpha(c AS STRING) AS BOOLEAN
IsAlpha = (c >= "a" AND c <= "z") OR (c >= "A" AND c <= "Z")
END FUNCTION
' 辅助函数:检查字符是否为数字
FUNCTION IsDigit(c AS STRING) AS BOOLEAN
IsDigit = (c >= "0" AND c <= "9")
END FUNCTION
语法分析器 (parser.bas)
' 抽象语法树节点类型
ENUM ASTNodeType
NODE_PROGRAM
NODE_PRINT
NODE_ASSIGNMENT
NODE_VARIABLE
NODE_LITERAL
NODE_BINARY_OP
NODE_IF
NODE_IF_ELSE
NODE_FOR_LOOP
NODE_WHILE_LOOP
NODE_FUNCTION_DECL
NODE_FUNCTION_CALL
NODE_RETURN
END ENUM
' AST节点结构
TYPE ASTNode
type AS ASTNodeType
value AS STRING
children() AS ASTNode
childCount AS INTEGER
END TYPE
' 语法分析器状态
TYPE ParserState
tokens() AS Token
position AS INTEGER
tokenCount AS INTEGER
currentToken AS Token
END TYPE
' 初始化解析器
SUB ParserInit(state AS ParserState, tokens() AS Token, count AS INTEGER)
state.tokens = tokens
state.tokenCount = count
state.position = 0
state.currentToken = tokens(0)
END SUB
' 前进到下一个令牌
SUB ParserNext(state AS ParserState)
state.position = state.position + 1
IF state.position < state.tokenCount THEN
state.currentToken = state.tokens(state.position)
ELSE
state.currentToken.type = TOKEN_EOF
state.currentToken.value = ""
END IF
END SUB
' 解析程序
FUNCTION ParseProgram(state AS ParserState) AS ASTNode
DIM program AS ASTNode
program.type = NODE_PROGRAM
program.childCount = 0
WHILE state.currentToken.type <> TOKEN_EOF
DIM statement AS ASTNode
statement = ParseStatement(state)
AddChild program, statement
WEND
ParseProgram = program
END FUNCTION
' 解析语句
FUNCTION ParseStatement(state AS ParserState) AS ASTNode
SELECT CASE state.currentToken.value
CASE "PRINT"
ParseStatement = ParsePrint(state)
CASE "IF"
ParseStatement = ParseIf(state)
CASE "FOR"
ParseStatement = ParseFor(state)
CASE "WHILE"
ParseStatement = ParseWhile(state)
CASE "DIM"
ParseStatement = ParseDim(state)
CASE "FUNCTION"
ParseStatement = ParseFunction(state)
CASE "RETURN"
ParseStatement = ParseReturn(state)
CASE ELSE
' 可能是赋值语句或表达式
IF state.position + 1 < state.tokenCount THEN
IF state.tokens(state.position + 1).value = "=" THEN
ParseStatement = ParseAssignment(state)
ELSE
ParseStatement = ParseExpression(state)
END IF
ELSE
ParseStatement = ParseExpression(state)
END IF
END SELECT
END FUNCTION
' 解析打印语句
FUNCTION ParsePrint(state AS ParserState) AS ASTNode
DIM printNode AS ASTNode
printNode.type = NODE_PRINT
printNode.childCount = 0
' 消耗PRINT关键字
ParserNext state
' 解析表达式
DIM expr AS ASTNode
expr = ParseExpression(state)
AddChild printNode, expr
ParsePrint = printNode
END FUNCTION
' 解析赋值语句
FUNCTION ParseAssignment(state AS ParserState) AS ASTNode
DIM assignNode AS ASTNode
assignNode.type = NODE_ASSIGNMENT
assignNode.childCount = 0
' 左侧变量
DIM varNode AS ASTNode
varNode.type = NODE_VARIABLE
varNode.value = state.currentToken.value
AddChild assignNode, varNode
' 消耗变量和等号
ParserNext state ' 变量
ParserNext state ' 等号
' 右侧表达式
DIM expr AS ASTNode
expr = ParseExpression(state)
AddChild assignNode, expr
ParseAssignment = assignNode
END FUNCTION
' 解析表达式
FUNCTION ParseExpression(state AS ParserState) AS ASTNode
DIM left AS ASTNode
left = ParseTerm(state)
WHILE state.currentToken.value = "+" OR state.currentToken.value = "-" OR _
state.currentToken.value = "OR" OR state.currentToken.value = "AND"
DIM op AS ASTNode
op.type = NODE_BINARY_OP
op.value = state.currentToken.value
ParserNext state
DIM right AS ASTNode
right = ParseTerm(state)
DIM binOp AS ASTNode
binOp.type = NODE_BINARY_OP
binOp.value = op.value
binOp.childCount = 0
AddChild binOp, left
AddChild binOp, right
left = binOp
WEND
ParseExpression = left
END FUNCTION
' 解析项
FUNCTION ParseTerm(state AS ParserState) AS ASTNode
DIM left AS ASTNode
left = ParseFactor(state)
WHILE state.currentToken.value = "*" OR state.currentToken.value = "/" OR _
state.currentToken.value = "MOD"
DIM op AS ASTNode
op.type = NODE_BINARY_OP
op.value = state.currentToken.value
ParserNext state
DIM right AS ASTNode
right = ParseFactor(state)
DIM binOp AS ASTNode
binOp.type = NODE_BINARY_OP
binOp.value = op.value
binOp.childCount = 0
AddChild binOp, left
AddChild binOp, right
left = binOp
WEND
ParseTerm = left
END FUNCTION
' 解析因子
FUNCTION ParseFactor(state AS ParserState) AS ASTNode
DIM node AS ASTNode
SELECT CASE state.currentToken.type
CASE TOKEN_NUMBER, TOKEN_STRING
node.type = NODE_LITERAL
node.value = state.currentToken.value
ParserNext state
CASE TOKEN_IDENTIFIER
' 检查是否是函数调用
IF state.position + 1 < state.tokenCount AND state.tokens(state.position + 1).value = "(" THEN
node = ParseFunctionCall(state)
ELSE
node.type = NODE_VARIABLE
node.value = state.currentToken.value
ParserNext state
END IF
CASE TOKEN_OPERATOR
IF state.currentToken.value = "(" THEN
ParserNext state ' 消耗左括号
node = ParseExpression(state)
IF state.currentToken.value <> ")" THEN
ReportError "Expected ')'"
END IF
ParserNext state ' 消耗右括号
ELSE
ReportError "Unexpected operator: " + state.currentToken.value
END IF
CASE ELSE
ReportError "Unexpected token: " + state.currentToken.value
END SELECT
ParseFactor = node
END FUNCTION
' 添加子节点
SUB AddChild(parent AS ASTNode, child AS ASTNode)
REDIM PRESERVE parent.children(parent.childCount)
parent.children(parent.childCount) = child
parent.childCount = parent.childCount + 1
END SUB
解释执行引擎 (interpreter.bas)
' 变量值类型
TYPE VariableValue
type AS INTEGER ' 0=整数, 1=浮点, 2=字符串, 3=布尔
intValue AS INTEGER
floatValue AS SINGLE
stringValue AS STRING
boolValue AS BOOLEAN
END TYPE
' 运行时环境
TYPE RuntimeEnvironment
variables AS Collection ' 变量表
functions AS Collection ' 函数表
callStack() AS STRING ' 调用栈
stackSize AS INTEGER ' 调用栈大小
END TYPE
' 初始化运行时环境
SUB InitRuntime(env AS RuntimeEnvironment)
env.variables = NEW Collection
env.functions = NEW Collection
env.stackSize = 0
REDIM env.callStack(100)
END SUB
' 解释执行程序
SUB InterpretProgram(program AS ASTNode, env AS RuntimeEnvironment)
DIM i AS INTEGER
FOR i = 0 TO program.childCount - 1
InterpretStatement program.children(i), env
NEXT i
END SUB
' 解释执行语句
SUB InterpretStatement(stmt AS ASTNode, env AS RuntimeEnvironment)
SELECT CASE stmt.type
CASE NODE_PRINT
InterpretPrint stmt, env
CASE NODE_ASSIGNMENT
InterpretAssignment stmt, env
CASE NODE_IF
InterpretIf stmt, env
CASE NODE_IF_ELSE
InterpretIfElse stmt, env
CASE NODE_FOR_LOOP
InterpretFor stmt, env
CASE NODE_WHILE_LOOP
InterpretWhile stmt, env
CASE NODE_FUNCTION_CALL
InterpretFunctionCall stmt, env
CASE ELSE
ReportError "Unknown statement type: " + STR$(stmt.type)
END SELECT
END SUB
' 解释打印语句
SUB InterpretPrint(stmt AS ASTNode, env AS RuntimeEnvironment)
DIM value AS VariableValue
value = EvaluateExpression stmt.children(0), env
SELECT CASE value.type
CASE 0: PRINT value.intValue
CASE 1: PRINT value.floatValue
CASE 2: PRINT value.stringValue
CASE 3: PRINT value.boolValue
END SELECT
END SUB
' 解释赋值语句
SUB InterpretAssignment(stmt AS ASTNode, env AS RuntimeEnvironment)
DIM varName AS STRING
varName = stmt.children(0).value
DIM value AS VariableValue
value = EvaluateExpression stmt.children(1), env
SetVariable env, varName, value
END SUB
' 解释表达式
FUNCTION EvaluateExpression(expr AS ASTNode, env AS RuntimeEnvironment) AS VariableValue
SELECT CASE expr.type
CASE NODE_LITERAL
EvaluateExpression = EvaluateLiteral expr
CASE NODE_VARIABLE
EvaluateExpression = GetVariable env, expr.value
CASE NODE_BINARY_OP
EvaluateExpression = EvaluateBinaryOp expr, env
CASE NODE_FUNCTION_CALL
EvaluateExpression = EvaluateFunctionCall expr, env
CASE ELSE
ReportError "Unknown expression type: " + STR$(expr.type)
END SELECT
END FUNCTION
' 计算字面量
FUNCTION EvaluateLiteral(node AS ASTNode) AS VariableValue
DIM result AS VariableValue
' 尝试解析为整数
IF IsNumeric(node.value) THEN
IF INSTR(node.value, ".") > 0 THEN
result.type = 1
result.floatValue = VAL(node.value)
ELSE
result.type = 0
result.intValue = VAL(node.value)
END IF
ELSEIF node.value = "TRUE" OR node.value = "FALSE" THEN
result.type = 3
result.boolValue = (node.value = "TRUE")
ELSE
result.type = 2
result.stringValue = node.value
END IF
EvaluateLiteral = result
END FUNCTION
' 计算二元操作
FUNCTION EvaluateBinaryOp(node AS ASTNode, env AS RuntimeEnvironment) AS VariableValue
DIM left AS VariableValue, right AS VariableValue
DIM result AS VariableValue
left = EvaluateExpression node.children(0), env
right = EvaluateExpression node.children(1), env
SELECT CASE node.value
CASE "+"
IF left.type = 0 AND right.type = 0 THEN
result.type = 0
result.intValue = left.intValue + right.intValue
ELSEIF left.type = 1 OR right.type = 1 THEN
result.type = 1
result.floatValue = GetFloatValue(left) + GetFloatValue(right)
ELSEIF left.type = 2 OR right.type = 2 THEN
result.type = 2
result.stringValue = GetStringValue(left) + GetStringValue(right)
END IF
CASE "-"
IF left.type = 0 AND right.type = 0 THEN
result.type = 0
result.intValue = left.intValue - right.intValue
ELSE
result.type = 1
result.floatValue = GetFloatValue(left) - GetFloatValue(right)
END IF
CASE "*"
IF left.type = 0 AND right.type = 0 THEN
result.type = 0
result.intValue = left.intValue * right.intValue
ELSE
result.type = 1
result.floatValue = GetFloatValue(left) * GetFloatValue(right)
END IF
CASE "/"
result.type = 1
result.floatValue = GetFloatValue(left) / GetFloatValue(right)
CASE "="
result.type = 3
result.boolValue = (GetFloatValue(left) = GetFloatValue(right))
CASE "<"
result.type = 3
result.boolValue = (GetFloatValue(left) < GetFloatValue(right))
CASE ">"
result.type = 3
result.boolValue = (GetFloatValue(left) > GetFloatValue(right))
CASE "<="
result.type = 3
result.boolValue = (GetFloatValue(left) <= GetFloatValue(right))
CASE ">="
result.type = 3
result.boolValue = (GetFloatValue(left) >= GetFloatValue(right))
CASE "<>"
result.type = 3
result.boolValue = (GetFloatValue(left) <> GetFloatValue(right))
END SELECT
EvaluateBinaryOp = result
END FUNCTION
' 辅助函数:获取浮点值
FUNCTION GetFloatValue(value AS VariableValue) AS SINGLE
SELECT CASE value.type
CASE 0: GetFloatValue = value.intValue
CASE 1: GetFloatValue = value.floatValue
CASE 2: GetFloatValue = VAL(value.stringValue)
CASE 3: GetFloatValue = IIF(value.boolValue, 1, 0)
END SELECT
END FUNCTION
' 辅助函数:获取字符串值
FUNCTION GetStringValue(value AS VariableValue) AS STRING
SELECT CASE value.type
CASE 0: GetStringValue = STR$(value.intValue)
CASE 1: GetStringValue = STR$(value.floatValue)
CASE 2: GetStringValue = value.stringValue
CASE 3: GetStringValue = IIF(value.boolValue, "TRUE", "FALSE")
END SELECT
END FUNCTION
代码生成器 (codegen.bas)
' 代码生成器状态
TYPE CodeGenState
output AS STRING
indentLevel AS INTEGER
labelCount AS INTEGER
END TYPE
' 初始化代码生成器
SUB CodeGenInit(state AS CodeGenState)
state.output = ""
state.indentLevel = 0
state.labelCount = 0
END SUB
' 生成C代码
FUNCTION GenerateCCode(program AS ASTNode) AS STRING
DIM state AS CodeGenState
CodeGenInit state
' 添加头文件和主函数框架
CodeGenAddLine state, "#include <stdio.h>"
CodeGenAddLine state, "#include <stdlib.h>"
CodeGenAddLine state, "#include <string.h>"
CodeGenAddLine state, ""
CodeGenAddLine state, "// 运行时类型定义"
CodeGenAddLine state, "typedef enum {"
CodeGenAddLine state, " TYPE_INT,"
CodeGenAddLine state, " TYPE_FLOAT,"
CodeGenAddLine state, " TYPE_STRING,"
CodeGenAddLine state, " TYPE_BOOL"
CodeGenAddLine state, "} ValueType;"
CodeGenAddLine state, ""
CodeGenAddLine state, "typedef struct {"
CodeGenAddLine state, " ValueType type;"
CodeGenAddLine state, " union {"
CodeGenAddLine state, " int intValue;"
CodeGenAddLine state, " float floatValue;"
CodeGenAddLine state, " char* stringValue;"
CodeGenAddLine state, " int boolValue;"
CodeGenAddLine state, " };"
CodeGenAddLine state, "} VariableValue;"
CodeGenAddLine state, ""
CodeGenAddLine state, "// 全局变量存储"
CodeGenAddLine state, "VariableValue* variables[100];"
CodeGenAddLine state, "char* variableNames[100];"
CodeGenAddLine state, "int variableCount = 0;"
CodeGenAddLine state, ""
CodeGenAddLine state, "// 函数声明"
CodeGenAddLine state, "VariableValue* evaluateExpression();"
CodeGenAddLine state, ""
CodeGenAddLine state, "int main() {"
state.indentLevel = state.indentLevel + 1
' 生成程序主体代码
GenerateProgramCode state, program
state.indentLevel = state.indentLevel - 1
CodeGenAddLine state, " return 0;"
CodeGenAddLine state, "}"
GenerateCCode = state.output
END FUNCTION
' 生成程序代码
SUB GenerateProgramCode(state AS CodeGenState, program AS ASTNode)
DIM i AS INTEGER
FOR i = 0 TO program.childCount - 1
GenerateStatementCode state, program.children(i)
NEXT i
END SUB
' 生成语句代码
SUB GenerateStatementCode(state AS CodeGenState, stmt AS ASTNode)
SELECT CASE stmt.type
CASE NODE_PRINT
GeneratePrintCode state, stmt
CASE NODE_ASSIGNMENT
GenerateAssignmentCode state, stmt
CASE NODE_IF
GenerateIfCode state, stmt
CASE NODE_FOR_LOOP
GenerateForCode state, stmt
CASE ELSE
ReportError "Unsupported statement type for code generation: " + STR$(stmt.type)
END SELECT
END SUB
' 生成打印语句代码
SUB GeneratePrintCode(state AS CodeGenState, stmt AS ASTNode)
DIM exprCode AS STRING
exprCode = GenerateExpressionCode(state, stmt.children(0))
CodeGenAddLine state, "// PRINT语句"
CodeGenAddLine state, "{"
state.indentLevel = state.indentLevel + 1
CodeGenAddLine state, "VariableValue* result = " + exprCode + ";"
CodeGenAddLine state, "switch (result->type) {"
CodeGenAddLine state, " case TYPE_INT:"
CodeGenAddLine state, " printf(""%d\n"", result->intValue);"
CodeGenAddLine state, " break;"
CodeGenAddLine state, " case TYPE_FLOAT:"
CodeGenAddLine state, " printf(""%f\n"", result->floatValue);"
CodeGenAddLine state, " break;"
CodeGenAddLine state, " case TYPE_STRING:"
CodeGenAddLine state, " printf(""%s\n"", result->stringValue);"
CodeGenAddLine state, " break;"
CodeGenAddLine state, " case TYPE_BOOL:"
CodeGenAddLine state, " printf(result->boolValue ? ""TRUE\n"" : ""FALSE\n"");"
CodeGenAddLine state, " break;"
CodeGenAddLine state, "}"
state.indentLevel = state.indentLevel - 1
CodeGenAddLine state, "}"
END SUB
' 生成赋值语句代码
SUB GenerateAssignmentCode(state AS CodeGenState, stmt AS ASTNode)
DIM varName AS STRING
varName = stmt.children(0).value
DIM exprCode AS STRING
exprCode = GenerateExpressionCode(state, stmt.children(1))
CodeGenAddLine state, "// 赋值语句: " + varName
CodeGenAddLine state, "{"
state.indentLevel = state.indentLevel + 1
CodeGenAddLine state, "// 检查变量是否已存在"
CodeGenAddLine state, "int found = 0;"
CodeGenAddLine state, "for (int i = 0; i < variableCount; i++) {"
CodeGenAddLine state, " if (strcmp(variableNames[i], """ + varName + """) == 0) {"
CodeGenAddLine state, " // 更新现有变量"
CodeGenAddLine state, " *variables[i] = *" + exprCode + ";"
CodeGenAddLine state, " found = 1;"
CodeGenAddLine state, " break;"
CodeGenAddLine state, " }"
CodeGenAddLine state, "}"
CodeGenAddLine state, ""
CodeGenAddLine state, "if (!found) {"
CodeGenAddLine state, " // 创建新变量"
CodeGenAddLine state, " variables[variableCount] = malloc(sizeof(VariableValue));"
CodeGenAddLine state, " *variables[variableCount] = *" + exprCode + ";"
CodeGenAddLine state, " variableNames[variableCount] = malloc(strlen(""" + varName + """) + 1);"
CodeGenAddLine state, " strcpy(variableNames[variableCount], """ + varName + """);"
CodeGenAddLine state, " variableCount++;"
CodeGenAddLine state, "}"
state.indentLevel = state.indentLevel - 1
CodeGenAddLine state, "}"
END SUB
' 生成表达式代码
FUNCTION GenerateExpressionCode(state AS CodeGenState, expr AS ASTNode) AS STRING
SELECT CASE expr.type
CASE NODE_LITERAL
GenerateExpressionCode = GenerateLiteralCode(state, expr)
CASE NODE_VARIABLE
GenerateExpressionCode = GenerateVariableCode(state, expr)
CASE NODE_BINARY_OP
GenerateExpressionCode = GenerateBinaryOpCode(state, expr)
CASE ELSE
ReportError "Unsupported expression type for code generation: " + STR$(expr.type)
GenerateExpressionCode = "NULL"
END SELECT
END FUNCTION
' 生成字面量代码
FUNCTION GenerateLiteralCode(state AS CodeGenState, expr AS ASTNode) AS STRING
DIM tempVar AS STRING
tempVar = "temp" + STR$(state.labelCount)
state.labelCount = state.labelCount + 1
IF IsNumeric(expr.value) THEN
IF INSTR(expr.value, ".") > 0 THEN
CodeGenAddLine state, "VariableValue* " + tempVar + " = malloc(sizeof(VariableValue));"
CodeGenAddLine state, tempVar + "->type = TYPE_FLOAT;"
CodeGenAddLine state, tempVar + "->floatValue = " + expr.value + ";"
ELSE
CodeGenAddLine state, "VariableValue* " + tempVar + " = malloc(sizeof(VariableValue));"
CodeGenAddLine state, tempVar + "->type = TYPE_INT;"
CodeGenAddLine state, tempVar + "->intValue = " + expr.value + ";"
END IF
ELSEIF expr.value = "TRUE" OR expr.value = "FALSE" THEN
CodeGenAddLine state, "VariableValue* " + tempVar + " = malloc(sizeof(VariableValue));"
CodeGenAddLine state, tempVar + "->type = TYPE_BOOL;"
CodeGenAddLine state, tempVar + "->boolValue = " + IIF(expr.value = "TRUE", "1", "0") + ";"
ELSE
CodeGenAddLine state, "VariableValue* " + tempVar + " = malloc(sizeof(VariableValue));"
CodeGenAddLine state, tempVar + "->type = TYPE_STRING;"
CodeGenAddLine state, tempVar + "->stringValue = malloc(strlen(""" + expr.value + """) + 1);"
CodeGenAddLine state, "strcpy(" + tempVar + "->stringValue, """ + expr.value + """);"
END IF
GenerateLiteralCode = tempVar
END FUNCTION
' 添加代码行
SUB CodeGenAddLine(state AS CodeGenState, line AS STRING)
DIM i AS INTEGER
FOR i = 1 TO state.indentLevel
state.output = state.output + " "
NEXT i
state.output = state.output + line + CHR$(10)
END SUB
编译器主程序 (compiler.bas)
' 编译器选项
TYPE CompilerOptions
inputFile AS STRING
outputFile AS STRING
mode AS INTEGER ' 0=解释, 1=编译
verbose AS BOOLEAN
END TYPE
' 主程序
SUB Main()
DIM options AS CompilerOptions
DIM sourceCode AS STRING
DIM tokens(1000) AS Token
DIM tokenCount AS INTEGER
DIM ast AS ASTNode
DIM env AS RuntimeEnvironment
' 解析命令行参数
ParseCommandLine options
' 读取源代码
sourceCode = ReadFile(options.inputFile)
IF sourceCode = "" THEN
PRINT "无法读取文件: "; options.inputFile
EXIT SUB
END IF
' 词法分析
tokenCount = Tokenize(sourceCode, tokens())
IF options.verbose THEN
PRINT "词法分析完成,找到"; tokenCount; "个令牌"
END IF
' 语法分析
DIM state AS ParserState
ParserInit state, tokens(), tokenCount
ast = ParseProgram(state)
IF options.verbose THEN
PRINT "语法分析完成,生成AST"
END IF
' 根据模式执行或编译
IF options.mode = 0 THEN
' 解释模式
InitRuntime env
InterpretProgram ast, env
ELSE
' 编译模式
DIM cCode AS STRING
cCode = GenerateCCode(ast)
' 保存C代码
IF WriteFile(options.outputFile + ".c", cCode) THEN
IF options.verbose THEN
PRINT "C代码已生成: "; options.outputFile; ".c"
END IF
' 调用外部C编译器
DIM cmd AS STRING
cmd = "gcc -o " + options.outputFile + " " + options.outputFile + ".c"
SHELL cmd
IF options.verbose THEN
PRINT "编译完成,生成可执行文件: "; options.outputFile
END IF
ELSE
PRINT "无法写入文件: "; options.outputFile; ".c"
END IF
END IF
END SUB
' 读取文件
FUNCTION ReadFile(filename AS STRING) AS STRING
DIM content AS STRING, line AS STRING
IF _FILEEXISTS(filename) THEN
OPEN filename FOR INPUT AS #1
content = ""
WHILE NOT EOF(1)
LINE INPUT #1, line
content = content + line + CHR$(10)
WEND
CLOSE #1
ReadFile = content
ELSE
ReadFile = ""
END IF
END FUNCTION
' 写入文件
FUNCTION WriteFile(filename AS STRING, content AS STRING) AS BOOLEAN
ON ERROR GOTO ErrorHandler
OPEN filename FOR OUTPUT AS #1
PRINT #1, content;
CLOSE #1
WriteFile = TRUE
EXIT FUNCTION
ErrorHandler:
WriteFile = FALSE
END FUNCTION
' 解析命令行参数
SUB ParseCommandLine(options AS CompilerOptions)
DIM i AS INTEGER, argCount AS INTEGER
DIM arg AS STRING
' 默认值
options.mode = 0 ' 默认解释模式
options.verbose = FALSE
options.outputFile = "a.out"
argCount = COMMANDCOUNT
IF argCount = 0 THEN
PRINT "用法: bc [选项] 输入文件"
PRINT "选项:"
PRINT " -c 编译模式(默认解释模式)"
PRINT " -o 输出文件名"
PRINT " -v 详细输出"
END
END IF
FOR i = 1 TO argCount
arg = COMMAND$(i)
SELECT CASE arg
CASE "-c"
options.mode = 1
CASE "-o"
i = i + 1
IF i <= argCount THEN
options.outputFile = COMMAND$(i)
END IF
CASE "-v"
options.verbose = TRUE
CASE ELSE
options.inputFile = arg
END SELECT
NEXT i
END SUB
自举测试示例 (selfhost.bas)
' 自举测试:用Basic编译器编译自身
PRINT "开始自举编译测试..."
DIM sourceCode AS STRING
DIM tokens(5000) AS Token
DIM tokenCount AS INTEGER
' 读取编译器自身的源代码
sourceCode = ReadFile("compiler.bas")
IF sourceCode = "" THEN
PRINT "错误:无法读取编译器源代码"
END
END IF
PRINT "源代码长度:"; LEN(sourceCode); "字符"
' 词法分析
tokenCount = Tokenize(sourceCode, tokens())
PRINT "生成"; tokenCount; "个令牌"
' 语法分析
DIM state AS ParserState
ParserInit state, tokens(), tokenCount
DIM ast AS ASTNode
ast = ParseProgram(state)
PRINT "AST生成完成,包含"; ast.childCount; "个语句"
' 代码生成
DIM cCode AS STRING
cCode = GenerateCCode(ast)
' 保存生成的C代码
IF WriteFile("selfhost.c", cCode) THEN
PRINT "C代码生成成功,长度:"; LEN(cCode); "字符"
' 调用GCC编译
SHELL "gcc -o selfhost selfhost.c"
IF _FILEEXISTS("selfhost") THEN
PRINT "自举编译成功!生成可执行文件: selfhost"
ELSE
PRINT "自举编译失败:无法生成可执行文件"
END IF
ELSE
PRINT "错误:无法写入C代码文件"
END IF
项目构建与使用
构建步骤
-
使用FreeBASIC编译编译器核心:
fbc compiler.bas -x basicc -
测试解释器模式:
basicc hello.bas -
测试编译器模式:
basicc -c -o hello hello.bas ./hello -
自举测试:
basicc -c -o selfhost compiler.bas
扩展计划
-
语言特性扩展:
-
添加数组支持
-
实现结构体和自定义类型
-
支持模块和命名空间
-
添加异常处理机制
-
-
性能优化:
-
实现字节码编译和虚拟机
-
添加JIT编译支持
-
优化内存管理
-
-
工具生态:
-
开发IDE插件
-
构建包管理器
-
创建调试器和性能分析工具
-
-
跨平台支持:
-
支持Windows、Linux和macOS
-
开发WebAssembly后端
-
移动端支持
-
结语
这个Basic自举编译器项目不仅是对Basic语言的致敬,也展示了如何用相对简单的工具构建复杂的系统。通过实现解释器和编译器的双重功能,项目体现了Basic语言从交互式编程到系统级开发的演进路径。
对于从Basic开始编程生涯的开发者来说,这个项目既是一次技术挑战,也是一种情怀实践。它证明了无论技术如何变迁,基础原理和创造精神始终是编程的核心价值。
在退休后的时光里,从事这样的项目既能保持思维活跃,又能延续与Basic语言的特殊情缘,实现技术追求与个人情怀的完美结合。
832

被折叠的 条评论
为什么被折叠?



