#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#define MAX_PROD_LEN 20 // 产生式最大长度
#define MAX_SYMBOLS 26 // 文法符号最大数量
#define MAX_PRODUCTIONS 20 // 产生式最大数量
#define MAX_INPUT_LEN 100 // 输入串最大长度
#define MAX_STACK_SIZE 100 // 分析栈最大大小
// 文法符号类型
typedef enum {
TERMINAL, // 终结符
NON_TERMINAL // 非终结符
} SymbolType;
// 产生式结构
typedef struct {
char left; // 左部非终结符
char right[MAX_PROD_LEN]; // 右部符号串
} Production;
// 语法树节点
typedef struct TreeNode {
char symbol; // 符号
bool is_terminal; // 是否是终结符
struct TreeNode **children; // 子节点指针数组
int child_count; // 子节点数量
} TreeNode;
// LL(1)文法结构
typedef struct {
char start_symbol; // 开始符号
char symbols[MAX_SYMBOLS]; // 所有符号
SymbolType symbol_types[MAX_SYMBOLS]; // 符号类型
int symbol_count; // 符号总数
Production productions[MAX_PRODUCTIONS]; // 产生式数组
int production_count; // 产生式数量
char first_set[MAX_SYMBOLS][MAX_SYMBOLS]; // FIRST集
char follow_set[MAX_SYMBOLS][MAX_SYMBOLS]; // FOLLOW集
int parsing_table[MAX_SYMBOLS][MAX_SYMBOLS]; // 预测分析表
TreeNode *parse_tree_root; // 语法树根节点
char derivation[MAX_INPUT_LEN * 10]; // 推导过程字符串
int der_len; // 推导过程长度
} Grammar;
// 创建树节点
TreeNode *create_tree_node(char symbol, bool is_terminal) {
TreeNode *node = (TreeNode *)malloc(sizeof(TreeNode));
node->symbol = symbol;
node->is_terminal = is_terminal;
node->children = NULL;
node->child_count = 0;
return node;
}
// 添加子节点
void add_child(TreeNode *parent, TreeNode *child) {
parent->child_count++;
parent->children = realloc(parent->children, parent->child_count * sizeof(TreeNode *));
parent->children[parent->child_count - 1] = child;
}
// 初始化文法
void init_grammar(Grammar *grammar) {
memset(grammar, 0, sizeof(Grammar));
grammar->start_symbol = 'E'; // 默认开始符号
grammar->der_len = 0;
grammar->derivation[0] = '\0';
// 初始化预测分析表为-1(表示错误)
for (int i = 0; i < MAX_SYMBOLS; i++) {
for (int j = 0; j < MAX_SYMBOLS; j++) {
grammar->parsing_table[i][j] = -1;
}
}
}
// 添加符号到文法
void add_symbol(Grammar *grammar, char symbol, SymbolType type) {
// 检查符号是否已存在
for (int i = 0; i < grammar->symbol_count; i++) {
if (grammar->symbols[i] == symbol) return;
}
grammar->symbols[grammar->symbol_count] = symbol;
grammar->symbol_types[grammar->symbol_count] = type;
grammar->symbol_count++;
}
// 添加产生式
void add_production(Grammar *grammar, char left, const char *right) {
if (grammar->production_count >= MAX_PRODUCTIONS) {
printf("错误: 产生式数量超过限制\n");
return;
}
// 添加左部符号(非终结符)
add_symbol(grammar, left, NON_TERMINAL);
// 添加右部符号
for (int i = 0; right[i] != '\0'; i++) {
if (right[i] != '@') { // '@'表示ε
if (isupper(right[i])) {
add_symbol(grammar, right[i], NON_TERMINAL);
} else {
add_symbol(grammar, right[i], TERMINAL);
}
}
}
// 存储产生式
grammar->productions[grammar->production_count].left = left;
strncpy(grammar->productions[grammar->production_count].right, right, MAX_PROD_LEN - 1);
grammar->production_count++;
}
// 计算FIRST集
void compute_first(Grammar *grammar, char symbol) {
int idx = symbol - 'A';
// 如果已经计算过则返回
if (grammar->first_set[idx][0] != '\0') return;
// 如果是终结符或ε
if (!isupper(symbol) || symbol == '@') {
grammar->first_set[idx][0] = symbol;
grammar->first_set[idx][1] = '\0';
return;
}
// 遍历所有产生式
for (int i = 0; i < grammar->production_count; i++) {
if (grammar->productions[i].left != symbol) continue;
char *right = grammar->productions[i].right;
// 处理ε产生式
if (right[0] == '@') {
strcat(grammar->first_set[idx], "@");
continue;
}
// 处理右部的每个符号
for (int j = 0; right[j] != '\0'; j++) {
char curr = right[j];
// 递归计算FIRST集
compute_first(grammar, curr);
// 获取当前符号的FIRST集
char *curr_first = grammar->first_set[curr - 'A'];
// 将当前符号的FIRST集加入目标符号的FIRST集(不包括ε)
for (int k = 0; curr_first[k] != '\0'; k++) {
if (curr_first[k] != '@') {
char tmp[2] = {curr_first[k], '\0'};
// 避免重复添加
if (strchr(grammar->first_set[idx], curr_first[k]) == NULL) {
strcat(grammar->first_set[idx], tmp);
}
}
}
// 如果当前符号的FIRST集不包含ε,则停止处理
if (strchr(curr_first, '@') == NULL) {
break;
}
// 如果到达最后一个符号且FIRST集包含ε,则添加ε
if (right[j + 1] == '\0' && strchr(curr_first, '@') != NULL) {
strcat(grammar->first_set[idx], "@");
}
}
}
}
// 计算FOLLOW集
void compute_follow(Grammar *grammar, char symbol) {
int idx = symbol - 'A';
// 如果已经计算过则返回
if (grammar->follow_set[idx][0] != '\0') return;
// 如果是开始符号,添加$
if (symbol == grammar->start_symbol) {
strcat(grammar->follow_set[idx], "$");
}
// 遍历所有产生式
for (int i = 0; i < grammar->production_count; i++) {
char *right = grammar->productions[i].right;
char *pos = strchr(right, symbol);
while (pos != NULL) {
char next = *(pos + 1);
// A → αBβ
if (next != '\0' && next != '@') {
// 将FIRST(β)中的非ε元素加入FOLLOW(B)
compute_first(grammar, next);
char *next_first = grammar->first_set[next - 'A'];
for (int j = 0; next_first[j] != '\0'; j++) {
if (next_first[j] != '@') {
char tmp[2] = {next_first[j], '\0'};
// 避免重复添加
if (strchr(grammar->follow_set[idx], next_first[j]) == NULL) {
strcat(grammar->follow_set[idx], tmp);
}
}
}
// 如果FIRST(β)包含ε,将FOLLOW(A)加入FOLLOW(B)
if (strchr(next_first, '@') != NULL) {
compute_follow(grammar, grammar->productions[i].left);
char *left_follow = grammar->follow_set[grammar->productions[i].left - 'A'];
for (int j = 0; left_follow[j] != '\0'; j++) {
char tmp[2] = {left_follow[j], '\0'};
if (strchr(grammar->follow_set[idx], left_follow[j]) == NULL) {
strcat(grammar->follow_set[idx], tmp);
}
}
}
}
// A → αB 或 A → αBβ且β可以推出ε
else if (next == '\0' || next == '@') {
compute_follow(grammar, grammar->productions[i].left);
char *left_follow = grammar->follow_set[grammar->productions[i].left - 'A'];
for (int j = 0; left_follow[j] != '\0'; j++) {
char tmp[2] = {left_follow[j], '\0'};
if (strchr(grammar->follow_set[idx], left_follow[j]) == NULL) {
strcat(grammar->follow_set[idx], tmp);
}
}
}
// 继续查找下一个出现位置
pos = strchr(pos + 1, symbol);
}
}
}
// 构建预测分析表
void build_parsing_table(Grammar *grammar) {
// 初始化分析表
for (int i = 0; i < MAX_SYMBOLS; i++) {
for (int j = 0; j < MAX_SYMBOLS; j++) {
grammar->parsing_table[i][j] = -1;
}
}
// 遍历每个产生式
for (int i = 0; i < grammar->production_count; i++) {
char left = grammar->productions[i].left;
char *right = grammar->productions[i].right;
int left_idx = left - 'A';
// 计算FIRST(α)
char first_alpha[MAX_SYMBOLS] = {0};
char *ptr = right;
while (*ptr != '\0' && *ptr != '@') {
// 对于终结符
if (!isupper(*ptr)) {
char tmp[2] = {*ptr, '\0'};
strcat(first_alpha, tmp);
break;
}
// 对于非终结符
compute_first(grammar, *ptr);
char *curr_first = grammar->first_set[*ptr - 'A'];
// 检查是否包含ε
bool has_epsilon = false;
for (int j = 0; curr_first[j] != '\0'; j++) {
if (curr_first[j] == '@') {
has_epsilon = true;
} else {
char tmp[2] = {curr_first[j], '\0'};
// 避免重复添加
if (strchr(first_alpha, curr_first[j]) == NULL) {
strcat(first_alpha, tmp);
}
}
}
// 如果当前符号不包含ε,停止处理
if (!has_epsilon) {
break;
}
ptr++;
}
// 如果产生式可以推导出ε,则添加FOLLOW(A)
if (strcmp(right, "@") == 0 || (strlen(right) > 0 && strchr(first_alpha, '@') != NULL)) {
compute_follow(grammar, left);
char *follow = grammar->follow_set[left_idx];
for (int j = 0; follow[j] != '\0'; j++) {
char tmp[2] = {follow[j], '\0'};
if (strchr(first_alpha, follow[j]) == NULL) {
strcat(first_alpha, tmp);
}
}
}
// 为FIRST集(α)中的每个元素添加产生式
for (int j = 0; first_alpha[j] != '\0'; j++) {
if (first_alpha[j] != '@') {
int col = first_alpha[j] - 'A';
grammar->parsing_table[left_idx][col] = i;
}
}
}
}
// 打印语法树(递归辅助函数)
void print_tree_recursive(TreeNode *node, int depth) {
if (node == NULL) return;
// 打印缩进
for (int i = 0; i < depth; i++) {
printf(" ");
}
// 打印节点
printf("|-%c", node->symbol);
if (node->is_terminal) {
printf("(T)");
}
printf("\n");
// 递归打印子节点
for (int i = 0; i < node->child_count; i++) {
print_tree_recursive(node->children[i], depth + 1);
}
}
// 打印语法树
void print_parse_tree(TreeNode *root) {
printf("\n语法分析树:\n");
print_tree_recursive(root, 0);
printf("\n");
}
// 释放语法树内存
void free_tree(TreeNode *root) {
if (root == NULL) return;
for (int i = 0; i < root->child_count; i++) {
free_tree(root->children[i]);
}
if (root->child_count > 0) {
free(root->children);
}
free(root);
}
// LL(1)分析过程
bool parse_string(Grammar *grammar, char *input) {
char stack[MAX_STACK_SIZE]; // 分析栈
int top = -1; // 栈顶指针
TreeNode *node_stack[MAX_STACK_SIZE]; // 语法树节点栈
int node_top = -1;
// 初始化栈
stack[++top] = '$';
stack[++top] = grammar->start_symbol;
// 初始化语法树根节点
grammar->parse_tree_root = create_tree_node(grammar->start_symbol, false);
node_stack[++node_top] = grammar->parse_tree_root;
// 初始化推导过程
grammar->der_len = sprintf(grammar->derivation, "%c", grammar->start_symbol);
// 输入指针
char *input_ptr = input;
printf("\n分析过程:\n");
printf("栈\t\t| 输入\t\t| 动作\n");
printf("-----------------------------\n");
while (top >= 0) {
// 打印当前状态
char stack_str[MAX_STACK_SIZE] = {0};
for (int i = top; i >= 0; i--) {
strncat(stack_str, &stack[i], 1);
}
printf("%-10s\t| %-10s\t| ", stack_str, input_ptr);
char X = stack[top--];
TreeNode *curr_node = node_stack[node_top--];
// 如果是终结符
if (!isupper(X) || X == '$') {
if (X == *input_ptr) {
// 匹配终结符
if (*input_ptr != '$') {
printf("匹配 '%c'\n", *input_ptr);
// 创建终结符节点
TreeNode *term_node = create_tree_node(*input_ptr, true);
add_child(curr_node, term_node);
input_ptr++;
} else {
printf("接受\n");
return true; // 成功分析
}
} else {
printf("错误: 期望 '%c', 但找到 '%c'\n", X, *input_ptr);
return false; // 分析失败
}
}
// 如果是非终结符
else {
char a = *input_ptr;
int row = X - 'A';
int col = (a >= 'a' && a <= 'z') ? a - 'a' : -1; // 终结符在表中的位置
// 检查预测分析表
if (col == -1 || grammar->parsing_table[row][col] == -1) {
printf("错误: 没有 %c -> ? 的规则 (当前输入 '%c')\n", X, a);
return false; // 分析失败
}
int prod_idx = grammar->parsing_table[row][col];
Production prod = grammar->productions[prod_idx];
printf("应用规则: %c -> %s\n", prod.left, prod.right);
// 记录推导过程
char temp[50];
sprintf(temp, " => %s", prod.right);
strcat(grammar->derivation, temp);
// 处理ε产生式
if (strcmp(prod.right, "@") == 0) {
// 创建ε节点
TreeNode *epsilon_node = create_tree_node('@', true);
add_child(curr_node, epsilon_node);
continue;
}
// 将产生式右部逆序压入栈
int len = strlen(prod.right);
for (int i = len - 1; i >= 0; i--) {
char symbol = prod.right[i];
if (symbol != '@') {
stack[++top] = symbol;
// 创建新节点并添加到树
bool is_term = !isupper(symbol);
TreeNode *new_node = create_tree_node(symbol, is_term);
add_child(curr_node, new_node);
// 非终结符需要继续扩展