#include <bits/stdc++.h>
#include <unordered_map>
#include <stdexcept>
using namespace std;
/*---------------------------词法分析部分---------------------------*/
unordered_map<string, string> key_word_map = {
{"const", "CONSTTK"}, {"int", "INTTK"}, {"char", "CHARTK"}, {"void", "VOIDTK"},
{"main", "MAINTK"}, {"if", "IFTK"}, {"else", "ELSETK"}, {"while", "WHILETK"},
{"for", "FORTK"}, {"do", "DOTK"}, {"return", "RETURNTK"},
{"printf", "PRINTFTK"}, {"scanf", "SCANFTK"}
};
unordered_map<string, string> operator_map = {
{"+", "PLUS"}, {"-", "MINU"}, {"*", "MULT"}, {"/", "DIV"}, {"=", "ASSIGN"},
{"==", "EQL"}, {"!=", "NEQ"}, {"<", "LSS"}, {"<=", "LEQ"},
{">", "GRE"}, {">=", "GEQ"}, {"(", "LPARENT"}, {")", "RPARENT"}, {"[", "LBRACK"},
{"]", "RBRACK"}, {"{", "LBRACE"}, {"}", "RBRACE"}, {";", "SEMICN"}, {",", "COMMA"}
};
bool isKeyWord(const string& token) {
return key_word_map.find(token) != key_word_map.end();
}
string getOperator(ifstream& fin, char c) {
string op(1, c);
if (c == '=' || c == '!' || c == '<' || c == '>') {
char next = fin.peek();
if (next == '=') {
op += next;
fin.get();
}
}
return op;
}
void lexAnalyze(const string& input_file, const string& lex_temp_file) {
ifstream fin(input_file);
ofstream fout(lex_temp_file);
if (!fin.is_open()) { cerr << "无法打开输入文件: " << input_file << endl; exit(1); }
if (!fout.is_open()) { cerr << "无法创建临时词法文件: " << lex_temp_file << endl; exit(1); }
char c;
while (fin.get(c)) {
if (isspace(c)) continue;
if (isalpha(c) || c == '_') {
string token;
token += c;
while (fin.get(c) && (isalnum(c) || c == '_')) token += c;
fin.unget();
if (isKeyWord(token)) fout << key_word_map[token] << " " << token << endl;
else fout << "IDENFR " << token << endl;
}
else if (isdigit(c)) {
string num;
num += c;
while (fin.get(c) && isdigit(c)) num += c;
fin.unget();
fout << "INTCON " << num << endl;
}
else if (c == '\'') {
char ch;
fin.get(ch);
fout << "CHARCON " << ch << endl;
fin.get(c); // 读取闭合单引号
}
else if (c == '\"') {
string str;
char ch;
while (fin.get(ch) && ch != '\"') str += ch;
fout << "STRCON " << str << endl;
}
else {
string op = getOperator(fin, c);
if (operator_map.count(op)) fout << operator_map[op] << " " << op << endl;
else cerr << "警告: 未知符号 '" << c << "'" << endl;
}
}
fin.close();
fout.close();
}
/*---------------------------语法分析部分---------------------------*/
struct Token { string type; string value; };
Token current_token;
ifstream lex_input;
ofstream syntax_output;
long long prev_pos = -1;
void save_pos() {
prev_pos = lex_input.tellg();
}
void restore_pos() {
if (prev_pos != -1) {
lex_input.seekg(prev_pos);
prev_pos = -1;
}
}
void next_token() {
save_pos();
if (lex_input >> current_token.type >> current_token.value) {
syntax_output << current_token.type << " " << current_token.value << endl;
} else {
current_token.type = "";
current_token.value = "";
}
}
Token peek_token() {
Token temp = current_token;
long long pos = lex_input.tellg();
Token next;
if (lex_input >> next.type >> next.value) {
lex_input.seekg(pos);
current_token = temp;
return next;
} else {
lex_input.clear();
lex_input.seekg(pos);
current_token = temp;
return Token{"", ""};
}
}
void match(const string& expected) {
if (current_token.type != expected) {
throw runtime_error("语法错误: 预期 '" + expected +
"', 实际 '" + current_token.type +
"' (值: " + current_token.value + ")");
}
next_token();
}
/* 递归子程序声明 */
void parse_program();
void parse_const_decl();
void parse_const_def();
void parse_integer();
void parse_unsigned_integer();
void parse_var_decl();
void parse_var_def();
void parse_type_identifier();
void parse_func_def_with_return();
void parse_func_def_without_return();
void parse_decl_head();
void parse_param_list();
void parse_main_func();
void parse_compound_stmt();
void parse_stmt_list();
void parse_stmt();
void parse_assign_stmt();
void parse_condition_stmt();
void parse_loop_stmt();
void parse_return_stmt();
void parse_read_stmt();
void parse_write_stmt();
void parse_expr_stmt();
void parse_empty_stmt();
void parse_expr();
void parse_term();
void parse_factor();
void parse_arg_list();
void parse_string();
void parse_condition();
void parse_step();
void parse_add_op();
void parse_mul_op();
void parse_rel_op();
/* 程序入口 */
void parse_program() {
// 解析常量说明
if (current_token.type == "CONSTTK") {
parse_const_decl();
}
// 解析变量说明
if (current_token.type == "INTTK" || current_token.type == "CHARTK") {
parse_var_decl();
}
// 解析函数定义
while (true) {
Token next = peek_token();
// 判断是否为函数定义(类型后接标识符和左括号)
if ((current_token.type == "INTTK" || current_token.type == "CHARTK") &&
next.type == "IDENFR" && peek_token().type == "LPARENT") {
parse_func_def_with_return();
} else if (current_token.type == "VOIDTK" && next.type == "IDENFR" &&
peek_token().type == "LPARENT") {
parse_func_def_without_return();
} else {
break;
}
}
// 解析主函数
parse_main_func();
syntax_output << "<程序>" << endl;
}
/* 常量说明 */
void parse_const_decl() {
while (current_token.type == "CONSTTK") {
match("CONSTTK");
parse_const_def();
match("SEMICN");
}
syntax_output << "<常量说明>" << endl;
}
/* 常量定义 */
void parse_const_def() {
if (current_token.type == "INTTK") {
match("INTTK");
match("IDENFR");
match("ASSIGN");
parse_integer();
while (current_token.type == "COMMA") {
match("COMMA");
match("IDENFR");
match("ASSIGN");
parse_integer();
}
} else if (current_token.type == "CHARTK") {
match("CHARTK");
match("IDENFR");
match("ASSIGN");
match("CHARCON");
while (current_token.type == "COMMA") {
match("COMMA");
match("IDENFR");
match("ASSIGN");
match("CHARCON");
}
}
syntax_output << "<常量定义>" << endl;
}
/* 整数 */
void parse_integer() {
if (current_token.type == "PLUS" || current_token.type == "MINU") {
match(current_token.type);
}
parse_unsigned_integer();
syntax_output << "<整数>" << endl;
}
/* 无符号整数 */
void parse_unsigned_integer() {
match("INTCON");
syntax_output << "<无符号整数>" << endl;
}
/* 变量说明 */
void parse_var_decl() {
while (true) {
// 预读下一个token,判断是变量定义还是函数定义
Token next = peek_token();
if ((current_token.type == "INTTK" || current_token.type == "CHARTK") &&
next.type == "IDENFR") {
// 再预读一个token,判断标识符后是分号/逗号还是左括号
Token next_next = peek_token();
if (next_next.type == "SEMICN" || next_next.type == "COMMA") {
parse_var_def();
match("SEMICN");
} else {
break; // 是函数定义,退出变量说明解析
}
} else {
break;
}
}
syntax_output << "<变量说明>" << endl;
}
/* 变量定义 */
void parse_var_def() {
parse_type_identifier();
match("IDENFR");
while (current_token.type == "COMMA") {
match("COMMA");
match("IDENFR");
}
syntax_output << "<变量定义>" << endl;
}
/* 类型标识符 */
void parse_type_identifier() {
if (current_token.type == "INTTK") {
match("INTTK");
} else if (current_token.type == "CHARTK") {
match("CHARTK");
} else {
throw runtime_error("预期类型标识符,实际为: " + current_token.type);
}
}
/* 有返回值函数定义 */
void parse_func_def_with_return() {
parse_decl_head();
match("LPARENT");
parse_param_list();
match("RPARENT");
match("LBRACE");
parse_compound_stmt();
match("RBRACE");
syntax_output << "<有返回值函数定义>" << endl;
}
/* 声明头部 */
void parse_decl_head() {
parse_type_identifier();
match("IDENFR");
syntax_output << "<声明头部>" << endl;
}
/* 参数表 */
void parse_param_list() {
if (current_token.type == "INTTK" || current_token.type == "CHARTK") {
parse_type_identifier();
match("IDENFR");
while (current_token.type == "COMMA") {
match("COMMA");
parse_type_identifier();
match("IDENFR");
}
}
syntax_output << "<参数表>" << endl;
}
/* 主函数 */
void parse_main_func() {
match("VOIDTK");
match("MAINTK");
match("LPARENT");
match("RPARENT");
match("LBRACE");
parse_compound_stmt();
match("RBRACE");
syntax_output << "<主函数>" << endl;
}
/* 复合语句 */
void parse_compound_stmt() {
if (current_token.type == "CONSTTK") {
parse_const_decl();
}
if (current_token.type == "INTTK" || current_token.type == "CHARTK") {
parse_var_decl();
}
parse_stmt_list();
syntax_output << "<复合语句>" << endl;
}
/* 语句列 */
void parse_stmt_list() {
while (current_token.type != "RBRACE" && !current_token.type.empty()) {
parse_stmt();
}
syntax_output << "<语句列>" << endl;
}
/* 语句 */
void parse_stmt() {
if (current_token.type == "IFTK") {
parse_condition_stmt();
} else if (current_token.type == "WHILETK" || current_token.type == "FORTK" || current_token.type == "DOTK") {
parse_loop_stmt();
} else if (current_token.type == "LBRACE") {
match("LBRACE");
parse_stmt_list();
match("RBRACE");
} else if (current_token.type == "IDENFR") {
Token next = peek_token();
if (next.type == "LPARENT") {
parse_expr_stmt();
} else {
parse_assign_stmt();
}
} else if (current_token.type == "SCANFTK") {
parse_read_stmt();
} else if (current_token.type == "PRINTFTK") {
parse_write_stmt();
} else if (current_token.type == "RETURNTK") {
parse_return_stmt();
} else if (current_token.type == "SEMICN") {
parse_empty_stmt();
} else {
throw runtime_error("未知语句类型: " + current_token.type);
}
syntax_output << "<语句>" << endl;
}
/* 赋值语句 */
void parse_assign_stmt() {
match("IDENFR");
if (current_token.type == "LBRACK") {
match("LBRACK");
parse_expr();
match("RBRACK");
}
match("ASSIGN");
parse_expr();
match("SEMICN");
syntax_output << "<赋值语句>" << endl;
}
/* 条件语句 */
void parse_condition_stmt() {
match("IFTK");
match("LPARENT");
parse_condition();
match("RPARENT");
parse_stmt();
if (current_token.type == "ELSETK") {
match("ELSETK");
parse_stmt();
}
syntax_output << "<条件语句>" << endl;
}
/* 循环语句 */
void parse_loop_stmt() {
if (current_token.type == "WHILETK") {
match("WHILETK");
match("LPARENT");
parse_condition();
match("RPARENT");
parse_stmt();
} else if (current_token.type == "DOTK") {
match("DOTK");
parse_stmt();
match("WHILETK");
match("LPARENT");
parse_condition();
match("RPARENT");
match("SEMICN");
} else if (current_token.type == "FORTK") {
match("FORTK");
match("LPARENT");
match("IDENFR");
match("ASSIGN");
parse_expr();
match("SEMICN");
parse_condition();
match("SEMICN");
match("IDENFR");
match("ASSIGN");
match("IDENFR");
if (current_token.type == "PLUS" || current_token.type == "MINU") {
match(current_token.type);
parse_step();
}
match("RPARENT");
parse_stmt();
}
syntax_output << "<循环语句>" << endl;
}
/* 返回语句 */
void parse_return_stmt() {
match("RETURNTK");
if (current_token.type == "LPARENT") {
match("LPARENT");
parse_expr();
match("RPARENT");
}
match("SEMICN");
syntax_output << "<返回语句>" << endl;
}
/* 读语句 */
void parse_read_stmt() {
match("SCANFTK");
match("LPARENT");
match("IDENFR");
while (current_token.type == "COMMA") {
match("COMMA");
match("IDENFR");
}
match("RPARENT");
match("SEMICN");
syntax_output << "<读语句>" << endl;
}
/* 写语句 */
void parse_write_stmt() {
match("PRINTFTK");
match("LPARENT");
if (current_token.type == "STRCON") {
parse_string();
if (current_token.type == "COMMA") {
match("COMMA");
parse_expr();
}
} else {
parse_expr();
}
match("RPARENT");
match("SEMICN");
syntax_output << "<写语句>" << endl;
}
/* 表达式语句(函数调用) */
void parse_expr_stmt() {
match("IDENFR");
match("LPARENT");
parse_arg_list();
match("RPARENT");
match("SEMICN");
syntax_output << "<有返回值函数调用语句>" << endl;
}
/* 空语句 */
void parse_empty_stmt() {
match("SEMICN");
syntax_output << "<空>" << endl;
}
/* 表达式 */
void parse_expr() {
if (current_token.type == "PLUS" || current_token.type == "MINU") {
match(current_token.type);
}
parse_term();
while (current_token.type == "PLUS" || current_token.type == "MINU") {
parse_add_op();
parse_term();
}
syntax_output << "<表达式>" << endl;
}
/* 项 */
void parse_term() {
parse_factor();
while (current_token.type == "MULT" || current_token.type == "DIV") {
parse_mul_op();
parse_factor();
}
syntax_output << "<项>" << endl;
}
/* 因子 */
void parse_factor() {
if (current_token.type == "IDENFR") {
match("IDENFR");
if (current_token.type == "LBRACK") {
match("LBRACK");
parse_expr();
match("RBRACK");
} else if (current_token.type == "LPARENT") {
match("LPARENT");
parse_arg_list();
match("RPARENT");
}
} else if (current_token.type == "LPARENT") {
match("LPARENT");
parse_expr();
match("RPARENT");
} else if (current_token.type == "INTCON") {
parse_integer();
} else if (current_token.type == "CHARCON") {
match("CHARCON");
} else {
throw runtime_error("无效的因子类型: " + current_token.type);
}
syntax_output << "<因子>" << endl;
}
/* 值参数表 */
void parse_arg_list() {
if (current_token.type != "RPARENT") {
parse_expr();
while (current_token.type == "COMMA") {
match("COMMA");
parse_expr();
}
}
syntax_output << "<值参数表>" << endl;
}
/* 字符串 */
void parse_string() {
match("STRCON");
syntax_output << "<字符串>" << endl;
}
/* 条件 */
void parse_condition() {
parse_expr();
if (current_token.type == "EQL" || current_token.type == "NEQ" ||
current_token.type == "LSS" || current_token.type == "LEQ" ||
current_token.type == "GRE" || current_token.type == "GEQ") {
parse_rel_op();
parse_expr();
}
}
/* 步长 */
void parse_step() {
parse_unsigned_integer();
}
/* 加法运算符 */
void parse_add_op() {
if (current_token.type == "PLUS" || current_token.type == "MINU") {
match(current_token.type);
} else {
throw runtime_error("预期加法运算符,实际为: " + current_token.type);
}
syntax_output << "<加法运算符>" << endl;
}
/* 乘法运算符 */
void parse_mul_op() {
if (current_token.type == "MULT" || current_token.type == "DIV") {
match(current_token.type);
} else {
throw runtime_error("预期乘法运算符,实际为: " + current_token.type);
}
syntax_output << "<乘法运算符>" << endl;
}
/* 关系运算符 */
void parse_rel_op() {
if (current_token.type == "EQL" || current_token.type == "NEQ" ||
current_token.type == "LSS" || current_token.type == "LEQ" ||
current_token.type == "GRE" || current_token.type == "GEQ") {
match(current_token.type);
} else {
throw runtime_error("预期关系运算符,实际为: " + current_token.type);
}
syntax_output << "<关系运算符>" << endl;
}
/* 无返回值函数定义 */
void parse_func_def_without_return() {
match("VOIDTK");
match("IDENFR");
match("LPARENT");
parse_param_list();
match("RPARENT");
match("LBRACE");
parse_compound_stmt();
match("RBRACE");
syntax_output << "<无返回值函数定义>" << endl;
}
/*---------------------------主函数---------------------------*/
int main() {
const string input_file = "testfile.txt";
const string output_file = "output.txt";
const string lex_temp_file = "lex_temp.txt";
lexAnalyze(input_file, lex_temp_file);
lex_input.open(lex_temp_file);
syntax_output.open(output_file, ios::binary);
if (!lex_input.is_open() || !syntax_output.is_open()) {
cerr << "文件打开失败" << endl;
return 1;
}
try {
next_token();
parse_program();
cout << "分析完成,结果已写入 " << output_file << endl;
} catch (const exception& e) {
cerr << "\n分析错误: " << e.what() << endl;
return 1;
}
lex_input.close();
syntax_output.close();
remove(lex_temp_file.c_str());
return 0;
}对该代码进行修改实现目的,给出完整代码
最新发布