关于语法节点Tree、类型Type和符号Symbol

本文深入探讨Java编译过程中语法树(Tree)、符号(Symbol)及类型(Type)三者间的关系,详细解析Symbol与Type的属性关联,以及Node、Symbol、Type之间的互动机制。通过具体示例,阐述了访问模式在符号和类型处理中的应用。

 

每个语法节点Tree都有Type属性,部分的语法节点有Symbol属性,如下:

 

与Symbol类型与Type类型之间的关系如下:

 

 

下面是Symbol与Type之间的关系:

(1)MethodSymbol("finalize").type = MethodType("()void").tsym=

ClassSymbol("Method").type=ClassType("Method").tsym=ClassSymbol("Method")

(2)TypeVar("M").tsym=TypeSymbol("M").type=TypeVar("M").tsym

(3)PackageSymbol("java").type = PackageType("java").tsym=PackageSymbol("java")

(4)VarSymbol("length").type=Type("int").tsym=ClassSymbol("int").type=Type("int").tsym=ClassSymbol("int")

 

下面是Node与Type之间的关系:

 

class Outer{
	class Inner{}
}

public class Test01 extends Outer.Inner{
	public Test01(Outer o){
		o.super();
	}
}

 

Outer.Inner是一个树节点,如果这个节点中的type有值,则直接返回即可,不用再进行标注。  

 

 

 

关于Node、Symbol与Type举一个例子,如下:

import java.io.FileInputStream;
import java.io.InputStream;

public class TestScope<T1 extends InputStream,T2>{	
    public void test(){
    	TestScope<FileInputStream,?> x = null;
    }    
}

截图如下: 

JCTypeApply的Node结点中没有Symbol属性,但是每个Node中都有Type属性,其值如上图蓝色部分。

ClassSymbol的Symbol结点中,由于每个Symbol中都有TypeSymbol类型的属性,这个属性值为com.test18.TestScope<T1,T2>

ClassType的Type结点中,由于每个Type中都有Symbol属性,这个属性的值为com.test18.TestScope 

1、Symbol

对于Symbol来说:

Symbol中既有Symbol类型属性也有Type类型的属性,如下:

   /** The type of this symbol.
     */
    public Type type;

    /** The owner of this symbol.
     */
    public Symbol owner;

    /** The completer of this symbol.
     */
    public Completer completer;

    /** A cache for the type erasure of this symbol.
     */
    public Type erasure_field;  

所以每个Symbol类型都有type属性。

而标注Symbol类型的是Kinds枚举类型,代码如下:

/** Internal symbol kinds, which distinguish between elements of
 *  different subclasses of Symbol. Symbol kinds are organized so they can be  or'ed to sets.
 */
public class Kinds {

    private Kinds() {} // uninstantiable

    /** The empty set of kinds.
     */
    public final static int NIL = 0;

    /** The kind of package symbols.
     */
    public final static int PCK = 1 << 0;

    /** The kind of type symbols (classes, interfaces and type variables).
     */
    public final static int TYP = 1 << 1;

    /** The kind of variable symbols.
     */
    public final static int VAR = 1 << 2;

    /** The kind of values (variables or non-variable expressions), includes VAR.
     */
    public final static int VAL = (1 << 3) | VAR;

    /** The kind of methods.
     */
    public final static int MTH = 1 << 4;

    /** The error kind, which includes all other kinds.
     */
    public final static int ERR = (1 << 5) - 1;

    /** The set of all kinds.
     */
    public final static int AllKinds = ERR;

    /** Kinds for erroneous symbols that complement the above
     */
    public static final int ERRONEOUS = 1 << 6;
    public static final int AMBIGUOUS    = ERRONEOUS+1; // ambiguous reference
    public static final int HIDDEN       = ERRONEOUS+2; // hidden method or field
    public static final int STATICERR    = ERRONEOUS+3; // nonstatic member from static context
    public static final int ABSENT_VAR   = ERRONEOUS+4; // missing variable
    public static final int WRONG_MTHS   = ERRONEOUS+5; // methods with wrong arguments
    public static final int WRONG_MTH    = ERRONEOUS+6; // one method with wrong arguments
    public static final int ABSENT_MTH   = ERRONEOUS+7; // missing method
    public static final int ABSENT_TYP   = ERRONEOUS+8; // missing type

    public enum KindName implements Formattable {
        ANNOTATION("kindname.annotation"),
        CONSTRUCTOR("kindname.constructor"),
        INTERFACE("kindname.interface"),
        ENUM("kindname.enum"),
        STATIC("kindname.static"),
        TYPEVAR("kindname.type.variable"),
        BOUND("kindname.type.variable.bound"),
        VAR("kindname.variable"),
        VAL("kindname.value"),
        METHOD("kindname.method"),
        CLASS("kindname.class"),
        STATIC_INIT("kindname.static.init"),
        INSTANCE_INIT("kindname.instance.init"),
        PACKAGE("kindname.package");

        private String name;
        KindName(String name) {
            this.name = name;
        }
        public String toString() {
            return name;
        }
        public String getKind() {
            return "Kindname";
        }
        public String toString(Locale locale, Messages messages) {
            String s = toString();
            return messages.getLocalizedString(locale, "compiler.misc." + s);
        }
    }

    /** A KindName representing a given symbol kind
     */
    public static KindName kindName(int kind) {
        switch (kind) {
        case PCK: return KindName.PACKAGE;
        case TYP: return KindName.CLASS;
        case VAR: return KindName.VAR;
        case VAL: return KindName.VAL;
        case MTH: return KindName.METHOD;
            default : throw new AssertionError("Unexpected kind: "+kind);
        }
    }

    /** A KindName representing a given symbol
     */
    public static KindName kindName(Symbol sym) {
        switch (sym.getKind()) {
        case PACKAGE:
            return KindName.PACKAGE;

        case ENUM:
            return KindName.ENUM;

        case ANNOTATION_TYPE:
        case CLASS:
            return KindName.CLASS;

        case INTERFACE:
            return KindName.INTERFACE;

        case TYPE_PARAMETER:
            return KindName.TYPEVAR;

        case ENUM_CONSTANT:
        case FIELD:
        case PARAMETER:
        case LOCAL_VARIABLE:
        case EXCEPTION_PARAMETER:
        case RESOURCE_VARIABLE:
            return KindName.VAR;

        case CONSTRUCTOR:
            return KindName.CONSTRUCTOR;

        case METHOD:
            return KindName.METHOD;
        case STATIC_INIT:
            return KindName.STATIC_INIT;
        case INSTANCE_INIT:
            return KindName.INSTANCE_INIT;

        default:
            if (sym.kind == VAL)
                // I don't think this can happen but it can't harm
                // playing it safe --ahe
                return KindName.VAL;
            else
                throw new AssertionError("Unexpected kind: "+sym.getKind());
        }
    }

    /** A set of KindName(s) representing a set of symbol's kinds.
     */
    public static EnumSet<KindName> kindNames(int kind) {
        EnumSet<KindName> kinds = EnumSet.noneOf(KindName.class);
        if ((kind & VAL) != 0)
            kinds.add(((kind & VAL) == VAR) ? KindName.VAR : KindName.VAL);
        if ((kind & MTH) != 0) kinds.add(KindName.METHOD);
        if ((kind & TYP) != 0) kinds.add(KindName.CLASS);
        if ((kind & PCK) != 0) kinds.add(KindName.PACKAGE);
        return kinds;
    }

    /** A KindName representing the kind of a given class/interface type.
     */
    public static KindName typeKindName(Type t) {
        if (t.tag == TYPEVAR ||
            t.tag == CLASS && (t.tsym.flags() & COMPOUND) != 0)
            return KindName.BOUND;
        else if (t.tag == PACKAGE)
            return KindName.PACKAGE;
        else if ((t.tsym.flags_field & ANNOTATION) != 0)
            return KindName.ANNOTATION;
        else if ((t.tsym.flags_field & INTERFACE) != 0)
            return KindName.INTERFACE;
        else
            return KindName.CLASS;
    }

    /** A KindName representing the kind of a a missing symbol, given an
     *  error kind.
     * */
    public static KindName absentKind(int kind) {
        switch (kind) {
        case ABSENT_VAR:
            return KindName.VAR;
        case WRONG_MTHS: case WRONG_MTH: case ABSENT_MTH:
            return KindName.METHOD;
        case ABSENT_TYP:
            return KindName.CLASS;
        default:
            throw new AssertionError("Unexpected kind: "+kind);
        }
    }
}

Symbol可以通过访问模式来访问各个结点,定义如下:

(1)Symbol.Visitor<R, P>

符号类中定义的访问者模式接口如下:

/**
     * A visitor for symbols.  A visitor is used to implement operations
     * (or relations) on symbols.  Most common operations on types are
     * binary relations and this interface is designed for binary
     * relations, that is, operations on the form
     * Symbol × P → R.
     * <!-- In plain text: Type x P -> R -->
     *
     * @param <R> the return type of the operation implemented by this
     * visitor; use Void if no return type is needed.
     * @param <P> the type of the second argument (the first being the
     * symbol itself) of the operation implemented by this visitor; use
     * Void if a second argument is not needed.
     */
    public interface Visitor<R,P> {
        R visitClassSymbol(ClassSymbol s, P arg);
        R visitMethodSymbol(MethodSymbol s, P arg);
        R visitPackageSymbol(PackageSymbol s, P arg);
        R visitOperatorSymbol(OperatorSymbol s, P arg);
        R visitVarSymbol(VarSymbol s, P arg);
        R visitTypeSymbol(TypeSymbol s, P arg);
        R visitSymbol(Symbol s, P arg);
    }  

 

(2)Types中的DefaultSymbolVisitor<R,S>

 

 /**
     * A default visitor for symbols.  All visitor methods except
     * visitSymbol are implemented by delegating to visitSymbol.  Concrete
     * subclasses must provide an implementation of visitSymbol and can
     * override other methods as needed.
     *
     * @param <R> the return type of the operation implemented by this
     * visitor; use Void if no return type is needed.
     * @param <S> the type of the second argument (the first being the
     * symbol itself) of the operation implemented by this visitor; use
     * Void if a second argument is not needed.
     */
    public static abstract class DefaultSymbolVisitor<R,S> implements Symbol.Visitor<R,S> {
        final public R visit(Symbol s, S arg)                   { return s.accept(this, arg); }
        public R visitClassSymbol(ClassSymbol s, S arg)         { return visitSymbol(s, arg); }
        public R visitMethodSymbol(MethodSymbol s, S arg)       { return visitSymbol(s, arg); }
        public R visitOperatorSymbol(OperatorSymbol s, S arg)   { return visitSymbol(s, arg); }
        public R visitPackageSymbol(PackageSymbol s, S arg)     { return visitSymbol(s, arg); }
        public R visitTypeSymbol(TypeSymbol s, S arg)           { return visitSymbol(s, arg); }
        public R visitVarSymbol(VarSymbol s, S arg)             { return visitSymbol(s, arg); }
    }

 

 

 

2、Type

对于每个Type类型来说,只限定有TypeSymbol类型的属性,也就是包、类和类型变量对应的符号。

// The defining class / interface / package / type variable
public TypeSymbol typeSymbol; // 只有ClassSymbol与PackageSymbol继承了TypeSymbol  

标注每个Type类型的为TypeTags,代码如下:

/** An interface for type tag values, which distinguish between different  sorts of types.
 */
public class TypeTags {

    private TypeTags() {} // uninstantiable

    /** The tag of the basic type `byte'.
     */
    public static final int BYTE = 1;

    /** The tag of the basic type `char'.
     */
    public static final int CHAR = BYTE+1;

    /** The tag of the basic type `short'.
     */
    public static final int SHORT = CHAR+1;

    /** The tag of the basic type `int'.
     */
    public static final int INT = SHORT+1;

    /** The tag of the basic type `long'.
     */
    public static final int LONG = INT+1;

    /** The tag of the basic type `float'.
     */
    public static final int FLOAT = LONG+1;

    /** The tag of the basic type `double'.
     */
    public static final int DOUBLE = FLOAT+1;

    /** The tag of the basic type `boolean'.
     */
    public static final int BOOLEAN = DOUBLE+1;

    /** The tag of the type `void'.
     */
    public static final int VOID = BOOLEAN+1;

    /** The tag of all class and interface types.
     */
    public static final int CLASS = VOID+1;

    /** The tag of all array types.
     */
    public static final int ARRAY = CLASS+1;

    /** The tag of all (monomorphic 单一同态的) method types.
     */
    public static final int METHOD = ARRAY+1;

    /** The tag of all package "types".
     */
    public static final int PACKAGE = METHOD+1;

    /** The tag of all (source-level) type variables.
     */
    public static final int TYPEVAR = PACKAGE+1;

    /** The tag of all type arguments.
     */
    public static final int WILDCARD = TYPEVAR+1;

    /** The tag of all polymorphic (method-) types.
     */
    public static final int FORALL = WILDCARD+1;

    /** The tag of the bottom type <null>.
     */
    public static final int BOT = FORALL+1;

    /** The tag of a missing type.
     */
    public static final int NONE = BOT+1;

    /** The tag of the error type.
     */
    public static final int ERROR = NONE+1;

    /** The tag of an unknown type
     */
    public static final int UNKNOWN = ERROR+1;

    /** The tag of all instantiatable type variables.
     */
    public static final int UNDETVAR = UNKNOWN+1;

    /** The number of type tags.
     */
    public static final int TypeTagCount = UNDETVAR+1;

    /** The maximum tag of a basic type.
     */
    public static final int lastBaseTag = BOOLEAN;

    /** The minimum tag of a partial type
     */
    public static final int firstPartialTag = ERROR;
}

Javac为Type结果定义了访问者接口,如下:

(1)Type.Visitor<R, S>

类型Type中定义的访问者模式:

 /**
     * A visitor for types.  A visitor is used to implement operations
     * (or relations) on types.  Most common operations on types are
     * binary relations and this interface is designed for binary
     * relations, that is, operations on the form
     * Type × S → R.
     * <!-- In plain text: Type x S -> R -->
     *
     * @param <R> the return type of the operation implemented by this
     * visitor; use Void if no return type is needed.
     * @param <S> the type of the second argument (the first being the
     * type itself) of the operation implemented by this visitor; use
     * Void if a second argument is not needed.
     */
    public interface Visitor<R,S> {
        R visitClassType(ClassType t, S s);
        R visitWildcardType(WildcardType t, S s);
        R visitArrayType(ArrayType t, S s);
        R visitMethodType(MethodType t, S s);
        R visitPackageType(PackageType t, S s);
        R visitTypeVar(TypeVar t, S s);
        R visitCapturedType(CapturedType t, S s);
        R visitForAll(ForAll t, S s);
        R visitUndeterminedVar(UndeterminedVar t, S s);
        R visitErrorType(ErrorType t, S s);
        R visitType(Type t, S s);
    }

 

(2)Types中的DefaultTypeVisitor<R,S>,SimpleTypeVisitor<R,S>

 

/**
     * A default visitor for types.  All visitor methods except
     * visitType are implemented by delegating to visitType.  Concrete
     * subclasses must provide an implementation of visitType and can
     * override other methods as needed.
     *
     * @param <R> the return type of the operation implemented by this
     * visitor; use Void if no return type is needed.
     * @param <S> the type of the second argument (the first being the
     * type itself) of the operation implemented by this visitor; use
     * Void if a second argument is not needed.
     */
    public static abstract class DefaultTypeVisitor<R,S> implements Type.Visitor<R,S> {
        final public R visit(Type t, S s)               { return t.accept(this, s); }
        public R visitClassType(ClassType t, S s)       { return visitType(t, s); }
        public R visitWildcardType(WildcardType t, S s) { return visitType(t, s); }
        public R visitArrayType(ArrayType t, S s)       { return visitType(t, s); }
        public R visitMethodType(MethodType t, S s)     { return visitType(t, s); }
        public R visitPackageType(PackageType t, S s)   { return visitType(t, s); }
        public R visitTypeVar(TypeVar t, S s)           { return visitType(t, s); }
        public R visitCapturedType(CapturedType t, S s) { return visitType(t, s); }
        public R visitForAll(ForAll t, S s)             { return visitType(t, s); }
        public R visitUndeterminedVar(UndeterminedVar t, S s)         { return visitType(t, s); }
        public R visitErrorType(ErrorType t, S s)       { return visitType(t, s); }
    }

 

/**
     * A <em>simple</em> visitor for types.  This visitor is simple as
     * captured wildcards, for-all types (generic methods), and
     * undetermined(未确定的) type variables (part of inference) are hidden.
     * Captured wildcards are hidden by treating them as type
     * variables and the rest are hidden by visiting their qtypes.
     *
     * @param <R> the return type of the operation implemented by this
     * visitor; use Void if no return type is needed.
     * @param <S> the type of the second argument (the first being the
     * type itself) of the operation implemented by this visitor; use
     * Void if a second argument is not needed.
     */
    public static abstract class SimpleTypeVisitor<R,S> extends DefaultTypeVisitor<R,S> {
        @Override
        public R visitCapturedType(CapturedType t, S s) {
            return visitTypeVar(t, s);
        }
        @Override
        public R visitForAll(ForAll t, S s) {
            return visit(t.qtype, s);
        }
        @Override
        public R visitUndeterminedVar(UndeterminedVar t, S s) {
            return visit(t.qtype, s);
        }
    }

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

转载于:https://www.cnblogs.com/extjs4/p/6897234.html

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <stdbool.h> #define MAX_PROD_LEN 20 // 产生式最大长度 #define MAX_SYMBOLS 26 // 文法符号最大数量 #define MAX_PRODUCTIONS 20 // 产生式最大数量 #define MAX_INPUT_LEN 100 // 输入串最大长度 #define MAX_STACK_SIZE 100 // 分析栈最大大小 // 文法符号类型 typedef enum { TERMINAL, // 终结符 NON_TERMINAL // 非终结符 } SymbolType; // 产生式结构 typedef struct { char left; // 左部非终结符 char right[MAX_PROD_LEN]; // 右部符号串 } Production; // 语法节点 typedef struct TreeNode { char symbol; // 符号 bool is_terminal; // 是否是终结符 struct TreeNode **children; // 子节点指针数组 int child_count; // 子节点数量 } TreeNode; // LL(1)文法结构 typedef struct { char start_symbol; // 开始符号 char symbols[MAX_SYMBOLS]; // 所有符号 SymbolType symbol_types[MAX_SYMBOLS]; // 符号类型 int symbol_count; // 符号总数 Production productions[MAX_PRODUCTIONS]; // 产生式数组 int production_count; // 产生式数量 char first_set[MAX_SYMBOLS][MAX_SYMBOLS]; // FIRST集 char follow_set[MAX_SYMBOLS][MAX_SYMBOLS]; // FOLLOW集 int parsing_table[MAX_SYMBOLS][MAX_SYMBOLS]; // 预测分析表 TreeNode *parse_tree_root; // 语法树根节点 char derivation[MAX_INPUT_LEN * 10]; // 推导过程字符串 int der_len; // 推导过程长度 } Grammar; // 创建树节点 TreeNode *create_tree_node(char symbol, bool is_terminal) { TreeNode *node = (TreeNode *)malloc(sizeof(TreeNode)); node->symbol = symbol; node->is_terminal = is_terminal; node->children = NULL; node->child_count = 0; return node; } // 添加子节点 void add_child(TreeNode *parent, TreeNode *child) { parent->child_count++; parent->children = realloc(parent->children, parent->child_count * sizeof(TreeNode *)); parent->children[parent->child_count - 1] = child; } // 初始化文法 void init_grammar(Grammar *grammar) { memset(grammar, 0, sizeof(Grammar)); grammar->start_symbol = 'E'; // 默认开始符号 grammar->der_len = 0; grammar->derivation[0] = '\0'; // 初始化预测分析表为-1(表示错误) for (int i = 0; i < MAX_SYMBOLS; i++) { for (int j = 0; j < MAX_SYMBOLS; j++) { grammar->parsing_table[i][j] = -1; } } } // 添加符号到文法 void add_symbol(Grammar *grammar, char symbol, SymbolType type) { // 检查符号是否已存在 for (int i = 0; i < grammar->symbol_count; i++) { if (grammar->symbols[i] == symbol) return; } grammar->symbols[grammar->symbol_count] = symbol; grammar->symbol_types[grammar->symbol_count] = type; grammar->symbol_count++; } // 添加产生式 void add_production(Grammar *grammar, char left, const char *right) { if (grammar->production_count >= MAX_PRODUCTIONS) { printf("错误: 产生式数量超过限制\n"); return; } // 添加左部符号(非终结符) add_symbol(grammar, left, NON_TERMINAL); // 添加右部符号 for (int i = 0; right[i] != '\0'; i++) { if (right[i] != '@') { // '@'表示ε if (isupper(right[i])) { add_symbol(grammar, right[i], NON_TERMINAL); } else { add_symbol(grammar, right[i], TERMINAL); } } } // 存储产生式 grammar->productions[grammar->production_count].left = left; strncpy(grammar->productions[grammar->production_count].right, right, MAX_PROD_LEN - 1); grammar->production_count++; } // 计算FIRST集 void compute_first(Grammar *grammar, char symbol) { int idx = symbol - 'A'; // 如果已经计算过则返回 if (grammar->first_set[idx][0] != '\0') return; // 如果是终结符或ε if (!isupper(symbol) || symbol == '@') { grammar->first_set[idx][0] = symbol; grammar->first_set[idx][1] = '\0'; return; } // 遍历所有产生式 for (int i = 0; i < grammar->production_count; i++) { if (grammar->productions[i].left != symbol) continue; char *right = grammar->productions[i].right; // 处理ε产生式 if (right[0] == '@') { strcat(grammar->first_set[idx], "@"); continue; } // 处理右部的每个符号 for (int j = 0; right[j] != '\0'; j++) { char curr = right[j]; // 递归计算FIRST集 compute_first(grammar, curr); // 获取当前符号的FIRST集 char *curr_first = grammar->first_set[curr - 'A']; // 将当前符号的FIRST集加入目标符号的FIRST集(不包括ε) for (int k = 0; curr_first[k] != '\0'; k++) { if (curr_first[k] != '@') { char tmp[2] = {curr_first[k], '\0'}; // 避免重复添加 if (strchr(grammar->first_set[idx], curr_first[k]) == NULL) { strcat(grammar->first_set[idx], tmp); } } } // 如果当前符号的FIRST集不包含ε,则停止处理 if (strchr(curr_first, '@') == NULL) { break; } // 如果到达最后一个符号且FIRST集包含ε,则添加ε if (right[j + 1] == '\0' && strchr(curr_first, '@') != NULL) { strcat(grammar->first_set[idx], "@"); } } } } // 计算FOLLOW集 void compute_follow(Grammar *grammar, char symbol) { int idx = symbol - 'A'; // 如果已经计算过则返回 if (grammar->follow_set[idx][0] != '\0') return; // 如果是开始符号,添加$ if (symbol == grammar->start_symbol) { strcat(grammar->follow_set[idx], "$"); } // 遍历所有产生式 for (int i = 0; i < grammar->production_count; i++) { char *right = grammar->productions[i].right; char *pos = strchr(right, symbol); while (pos != NULL) { char next = *(pos + 1); // A → αBβ if (next != '\0' && next != '@') { // 将FIRST(β)中的非ε元素加入FOLLOW(B) compute_first(grammar, next); char *next_first = grammar->first_set[next - 'A']; for (int j = 0; next_first[j] != '\0'; j++) { if (next_first[j] != '@') { char tmp[2] = {next_first[j], '\0'}; // 避免重复添加 if (strchr(grammar->follow_set[idx], next_first[j]) == NULL) { strcat(grammar->follow_set[idx], tmp); } } } // 如果FIRST(β)包含ε,将FOLLOW(A)加入FOLLOW(B) if (strchr(next_first, '@') != NULL) { compute_follow(grammar, grammar->productions[i].left); char *left_follow = grammar->follow_set[grammar->productions[i].left - 'A']; for (int j = 0; left_follow[j] != '\0'; j++) { char tmp[2] = {left_follow[j], '\0'}; if (strchr(grammar->follow_set[idx], left_follow[j]) == NULL) { strcat(grammar->follow_set[idx], tmp); } } } } // A → αB 或 A → αBβ且β可以推出ε else if (next == '\0' || next == '@') { compute_follow(grammar, grammar->productions[i].left); char *left_follow = grammar->follow_set[grammar->productions[i].left - 'A']; for (int j = 0; left_follow[j] != '\0'; j++) { char tmp[2] = {left_follow[j], '\0'}; if (strchr(grammar->follow_set[idx], left_follow[j]) == NULL) { strcat(grammar->follow_set[idx], tmp); } } } // 继续查找下一个出现位置 pos = strchr(pos + 1, symbol); } } } // 构建预测分析表 void build_parsing_table(Grammar *grammar) { // 初始化分析表 for (int i = 0; i < MAX_SYMBOLS; i++) { for (int j = 0; j < MAX_SYMBOLS; j++) { grammar->parsing_table[i][j] = -1; } } // 遍历每个产生式 for (int i = 0; i < grammar->production_count; i++) { char left = grammar->productions[i].left; char *right = grammar->productions[i].right; int left_idx = left - 'A'; // 计算FIRST(α) char first_alpha[MAX_SYMBOLS] = {0}; char *ptr = right; while (*ptr != '\0' && *ptr != '@') { // 对于终结符 if (!isupper(*ptr)) { char tmp[2] = {*ptr, '\0'}; strcat(first_alpha, tmp); break; } // 对于非终结符 compute_first(grammar, *ptr); char *curr_first = grammar->first_set[*ptr - 'A']; // 检查是否包含ε bool has_epsilon = false; for (int j = 0; curr_first[j] != '\0'; j++) { if (curr_first[j] == '@') { has_epsilon = true; } else { char tmp[2] = {curr_first[j], '\0'}; // 避免重复添加 if (strchr(first_alpha, curr_first[j]) == NULL) { strcat(first_alpha, tmp); } } } // 如果当前符号不包含ε,停止处理 if (!has_epsilon) { break; } ptr++; } // 如果产生式可以推导出ε,则添加FOLLOW(A) if (strcmp(right, "@") == 0 || (strlen(right) > 0 && strchr(first_alpha, '@') != NULL)) { compute_follow(grammar, left); char *follow = grammar->follow_set[left_idx]; for (int j = 0; follow[j] != '\0'; j++) { char tmp[2] = {follow[j], '\0'}; if (strchr(first_alpha, follow[j]) == NULL) { strcat(first_alpha, tmp); } } } // 为FIRST集(α)中的每个元素添加产生式 for (int j = 0; first_alpha[j] != '\0'; j++) { if (first_alpha[j] != '@') { int col = first_alpha[j] - 'A'; grammar->parsing_table[left_idx][col] = i; } } } } // 打印语法树(递归辅助函数) void print_tree_recursive(TreeNode *node, int depth) { if (node == NULL) return; // 打印缩进 for (int i = 0; i < depth; i++) { printf(" "); } // 打印节点 printf("|-%c", node->symbol); if (node->is_terminal) { printf("(T)"); } printf("\n"); // 递归打印子节点 for (int i = 0; i < node->child_count; i++) { print_tree_recursive(node->children[i], depth + 1); } } // 打印语法树 void print_parse_tree(TreeNode *root) { printf("\n语法分析树:\n"); print_tree_recursive(root, 0); printf("\n"); } // 释放语法树内存 void free_tree(TreeNode *root) { if (root == NULL) return; for (int i = 0; i < root->child_count; i++) { free_tree(root->children[i]); } if (root->child_count > 0) { free(root->children); } free(root); } // LL(1)分析过程 bool parse_string(Grammar *grammar, char *input) { char stack[MAX_STACK_SIZE]; // 分析栈 int top = -1; // 栈顶指针 TreeNode *node_stack[MAX_STACK_SIZE]; // 语法节点栈 int node_top = -1; // 初始化栈 stack[++top] = '$'; stack[++top] = grammar->start_symbol; // 初始化语法树根节点 grammar->parse_tree_root = create_tree_node(grammar->start_symbol, false); node_stack[++node_top] = grammar->parse_tree_root; // 初始化推导过程 grammar->der_len = sprintf(grammar->derivation, "%c", grammar->start_symbol); // 输入指针 char *input_ptr = input; printf("\n分析过程:\n"); printf("栈\t\t| 输入\t\t| 动作\n"); printf("-----------------------------\n"); while (top >= 0) { // 打印当前状态 char stack_str[MAX_STACK_SIZE] = {0}; for (int i = top; i >= 0; i--) { strncat(stack_str, &stack[i], 1); } printf("%-10s\t| %-10s\t| ", stack_str, input_ptr); char X = stack[top--]; TreeNode *curr_node = node_stack[node_top--]; // 如果是终结符 if (!isupper(X) || X == '$') { if (X == *input_ptr) { // 匹配终结符 if (*input_ptr != '$') { printf("匹配 '%c'\n", *input_ptr); // 创建终结符节点 TreeNode *term_node = create_tree_node(*input_ptr, true); add_child(curr_node, term_node); input_ptr++; } else { printf("接受\n"); return true; // 成功分析 } } else { printf("错误: 期望 '%c', 但找到 '%c'\n", X, *input_ptr); return false; // 分析失败 } } // 如果是非终结符 else { char a = *input_ptr; int row = X - 'A'; int col = (a >= 'a' && a <= 'z') ? a - 'a' : -1; // 终结符在表中的位置 // 检查预测分析表 if (col == -1 || grammar->parsing_table[row][col] == -1) { printf("错误: 没有 %c -> ? 的规则 (当前输入 '%c')\n", X, a); return false; // 分析失败 } int prod_idx = grammar->parsing_table[row][col]; Production prod = grammar->productions[prod_idx]; printf("应用规则: %c -> %s\n", prod.left, prod.right); // 记录推导过程 char temp[50]; sprintf(temp, " => %s", prod.right); strcat(grammar->derivation, temp); // 处理ε产生式 if (strcmp(prod.right, "@") == 0) { // 创建ε节点 TreeNode *epsilon_node = create_tree_node('@', true); add_child(curr_node, epsilon_node); continue; } // 将产生式右部逆序压入栈 int len = strlen(prod.right); for (int i = len - 1; i >= 0; i--) { char symbol = prod.right[i]; if (symbol != '@') { stack[++top] = symbol; // 创建新节点并添加到树 bool is_term = !isupper(symbol); TreeNode *new_node = create_tree_node(symbol, is_term); add_child(curr_node, new_node); // 非终结符需要继续扩展
最新发布
06-05
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符  | 博主筛选后可见
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值