解析class文件-方法

本文介绍了一个Java类文件解析器的设计与实现,详细展示了类文件结构、常量池解析及方法解析的过程。通过C语言实现,重点讲解了如何解析类文件中的不同元素,并处理各种异常情况。

目前完成了对class文件方法的解析。注意,无论是对接口,域,方法,还是对常量池的解析,目前所做的工作都是试验性的。

我只是按照class格式分析,至于解析结果如何保存为合理,则到开发执行引擎时才能清楚。此次修改也调整了一些其它代码,故此贴出完整代码。

 

ClassFileParser.h

#pragma once

#include "util.h"

//u1,u2,u4分别代表1字节,2字节,4字节的无符号数。需要按照编译平台准确定义。
//JVM规范中大量使用它们描述class文件格式。
typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;

//根据所在的编译平台,定义合适的4字节float,8字节long,8字节双精度浮点数
typedef float JFLOAT;
typedef long long JLONG;
typedef double JDOUBLE;

struct cp_info;
struct field_info;
struct method_info;
struct attribute_info;

#pragma pack(1)

/* Class文件结构,参考JVM规范

//class文件的结构,直接明了。
//class文件中u2,u4,u8的存储都是big-endian顺序(高字节在前,低字节在后)
ClassFile
{
u4 magic;
u2 minor_verison;
u2 major_version;
u2 constant_pool_count; //按照JVM规范,此值cp_count等于cp_info的记录数+1
cp_info constant_pool[constant_pool_count-1];
u2 access_flags;
u2 this_class;
u2 super_class;
u2 interfaces_count;
u2 interfaces[interfaces_count];
u2 fields_count;
field_info fields[fields_count];
u2 methods_count;
method_info methods[methods_count];
u2 attributes_count;
attribute_info attributes[attributes_count];
}

cp_info
{
u1 tag;
u1 info[];
}

CONSTANT_Class_info {
u1 tag;
u2 name_index;  //常量池索引,该索引处的常量项必须是一个CONSTANT_Utf8_info
}

CONSTANT_Fieldref_info {
    u1 tag;
    u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
    u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
}
CONSTANT_Methodref_info {
    u1 tag;
    u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
    u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
}
CONSTANT_InterfaceMethodref_info {
    u1 tag;
    u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
    u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
}

CONSTANT_String_info {
    u1 tag;
    u2 string_index;    //常量池索引,必须指向一个CONSTANT_Utf8_info
}

CONSTANT_Integer_info {
    u1 tag;
    u4 bytes;   //4字节整数,高字节在前
}
CONSTANT_Float_info {
    u1 tag;
    u4 bytes;   //4字节浮点数,IEEE 754格式,高字节在前
}

CONSTANT_Long_info {
    u1 tag;
    u4 high_bytes;  //8字节整数,高字节在前
    u4 low_bytes;
}
CONSTANT_Double_info {
    u1 tag;
    u4 high_bytes;  //双精度浮点数,IEEE 754格式,高字节在前
    u4 low_bytes;
}

CONSTANT_NameAndType_info {
    u1 tag;
    u2 name_index;  //field或method的简单名字。该索引必须指向一个CONSTANT_Utf8_info
    u2 descriptor_index;    //field或method的描述符。该索引必须指向一个CONSTANT_Utf8_info
}

CONSTANT_Utf8_info {
    u1 tag;
    u2 length;
    u1 bytes[length];
}

attribute_info
{
u2 attribute_name_index;    //索引必须指向常量池的一个CONSTANT_Utf8_info
u4 attribute_length;
u1 info[attribute_length];
}

field_info
{
u2 access_flags;
u2 name_index;  //field simple name, 索引必须指向常量池的一个CONSTANT_Utf8_info
u2 descriptor_index;    //field desciptor, 索引必须指向常量池的一个CONSTANT_Utf8_info
u2 attributes_count;
attribute_info attributes[attributes_count];
}

struct method_info
{
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u2 attributes_count;
attribute_info attributes[attributes_count];
}

Code_attribute
{
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 code[code_length];
u2 exception_table_length;
Exception_table exception_table[exception_table_length];
u2 attributes_count;
attribute_info attributes[attributes_count];
};

*/

//常量池项的一般格式
//由于C语言不能准确描述可变长度,下面使用占位符的技巧,其他可变结构与此相似。
struct cp_info
{
u1 tag;
u1 info[0];  //占位符,不指明具体长度(或者指定为0),需要编译器支持
};

enum 
{
    CONSTANT_Utf8 = 1,
    CONSTANT_Unicode,   //not used
    CONSTANT_Integer,
    CONSTANT_Float,
    CONSTANT_Long,
    CONSTANT_Double,
    CONSTANT_Class,
    CONSTANT_String,
    CONSTANT_Fieldref,
    CONSTANT_Methodref,
    CONSTANT_InterfaceMethodref,
    CONSTANT_NameAndType,
};

struct CONSTANT_Class_info
{
u1 tag;
u2 name_index;
};

struct CONSTANT_Fieldref_info
{
u1 tag;
u2 class_index;
u2 name_and_type_index;
};

struct CONSTANT_Methodref_info
{
u1 tag;
u2 class_index;
u2 name_and_type_index;
};

struct CONSTANT_InterfaceMethodref_info
{
u1 tag;
u2 class_index;
u2 name_and_type_index;
};

struct CONSTANT_String_info
{
u1 tag;
u2 string_index;
};

struct CONSTANT_Integer_info
{
u1 tag;
u4 ivalue;
};

struct CONSTANT_Float_info
{
u1 tag;
JFLOAT fvalue;
};

struct CONSTANT_Long_info
{
u1 tag;
JLONG lvalue;
};

struct CONSTANT_Double_info
{
u1 tag;
JDOUBLE dvalue;
};

struct CONSTANT_NameAndType_info
{
u1 tag;
u2 name_index;
u2 descriptor_index;
};

struct CONSTANT_Utf8_info
{
u1 tag;
u2 length;
u1 bytes[0];  //[length]
};

//field_info内部表示
//Currently, associated attributes are: ConstantValue, Synthetic, Deprecated.
//Other unknown attributes will be ignored according to JVM Specification.
struct FieldInfo
{
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u1 is_ConstantValue;
u2 constantvalue_index;
u1 is_Synthetic;
u1 is_Deprecated;
};

struct Exception_table
{
u2 start_pc;
u2 end_pc;
u2 handler_pc;
u2 catch_type;
};

struct LineNumber_table
{
u2 start_pc;
u2 line_number;
};

struct LocalVariable_table
{
u2 start_pc;
u2 length;
u2 name_index;
u2 descriptor_index;
u2 index;
};

//method_info内部表示
//Currently, associated attributes are: Code, Exceptions, Synthetic, Deprecated.
//Other unknown attributes will be ignored according to JVM Specification.
struct MethodInfo
{
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u1 has_Code;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 *code;
u2 exception_table_length;
Exception_table *exception_table;
//Currently,the possible attributes that associated with Code attribute are: 
//LineNumberTable and LocalVariableTable. Both of each store debugging information.
int total_line_number_table;
LineNumber_table *line_number_table;
int total_local_variable_table;
LocalVariable_table *local_variable_table;
u2 number_of_exceptions;    //方法可能抛出的已检查异常的数量
u2 *exception_index_table;  //指出每个异常的类名(每个索引指向CONSTANT_Class_info)
u1 has_Exceptions;
u1 is_Synthetic;
u1 is_Deprecated;
};

#pragma pack()

class ClassBufferInput;

class ClassFileParser
{
public:
ClassFileParser(const u1 *classInBuffer, int length);
virtual ~ClassFileParser(void);

//解析class文件的时候,根据格式,只能按照次序解析,
//依次是magic, version, constant pool, ..., etc.
void parseClassFile() throw (Exception);

//releaseResource用于解析失败时,释放已分配的资源
void releaseResource();

void printSummary();

private:
const u1 * _class_buffer;
int _buffer_length;
ClassBufferInput *_classInput;

bool valid_cp_index(int index, int cp_count) {
    return (index >= 1 && index < cp_count);
}
bool valid_cp_index(int index) {
    return (index >= 1 && index < _cp_count);
}

bool is_supported_version(u2 majorVersion, u2 minorVersion);
bool check_utf8_string(const u1 *bytes, int length);
void parseConstantPool() throw (Exception);
void saveConstantPool() throw (Exception);
void check_this_class(u2 this_class) throw (Exception);
void check_access_flags(u2 access_flags) throw (Exception);
void check_super_class(u2 super_class) throw (Exception);
const cp_info * get_cp_info(int index) throw (Exception);
u1 get_cp_tag(int index) throw (Exception);
const u1 * get_cp_utf8(int index, u2 & length) throw (Exception);
void parseInterfaces() throw (Exception);
void parseFields() throw (Exception);
void parseMethods() throw (Exception);
void init_method_info(MethodInfo *minfo);
u4 parseCodeAttribute(MethodInfo *minfo) throw (Exception);
void releaseMethods();

private:
u2 _major_version;
u2 _minor_version;

int _cp_count;
//用两个数组存储运行时常量池,一个为索引数组,一个为常量池数据
cp_info ** _cp_index;
u1 * _constant_pool;
int _cp_length;
u1 _tag_0;

u2 _access_flags;
u2 _this_class;
u2 _super_class;

int _interfaces_count;
u1 * _interfaces;

int _fields_count;
FieldInfo *_fields;

int _methods_count;
MethodInfo *_methods;

};

class ClassBufferInput
{
public:
ClassBufferInput(const u1* buffer, int length);

const u1* buffer() const { return _buffer_start; }
int length() const { return _buffer_end - _buffer_start; }
const u1* current() const { return _current; }

u1 read_u1() throw (Exception);
u2 read_u2() throw (Exception);
u4 read_u4() throw (Exception);
void read_bytes(u1 *buf, int size) throw (Exception);

void skip_u1() throw (Exception);
void skip_u2() throw (Exception);
void skip_u4() throw (Exception);
void skip_bytes(int size) throw (Exception);
void mark();
void reset();

static u2 read_java_u2(const u1 *buffer);
static u4 read_java_u4(const u1 *buffer);

private:
const u1* _buffer_start;
const u1* _buffer_end;
const u1* _current;
const u1* _marked;

void guarantee_size(int size) throw (Exception);
};


 

ClassFileParser.cpp

#include "ClassFileParser.h"
#include <stdio.h>
#include <string.h>

#define CLASS_FILE_MAGIC_U4 0xCAFEBABE

ClassFileParser::ClassFileParser(const u1 *classInBuffer, int length)
{
    _class_buffer = classInBuffer;
    _buffer_length = length;
    _classInput = new ClassBufferInput(_class_buffer, _buffer_length);
    _major_version = 0;
    _minor_version = 0;
    _cp_count = 0;
    _cp_length = -1;
    _constant_pool = NULL;
    _cp_index = NULL;
    _tag_0 = 0;
    _interfaces = NULL;
    _fields = NULL;
}

ClassFileParser::~ClassFileParser(void)
{
    if (_classInput != NULL) {
        delete _classInput;
    }
    //Todo: 解析失败的情况下,需要释放已经分配的资源;解析成功的情况下,需要保留资源
    //if (_constant_pool != NULL) {
    //    delete [] _constant_pool;
    //}
}

bool ClassFileParser::is_supported_version(u2 major, u2 minor)
{
    //实际的Java虚拟机的版本,如SUN的Hotspot,令人费解
    //比如1.5版本,推测major=1,minor=5,可是SUN的Hotspot虚拟机却不认
    //查看Hotspot源代码,最小版本竟从45开始
    if (major > 45 && major <= 51)
        return true;
    return false;
}

bool ClassFileParser::check_utf8_string(const u1 *bytes, int length)
{
    //Todo: check utf8 string
    return true;
}

const cp_info * ClassFileParser::get_cp_info(int index) throw (Exception)
{
    assert_exception(valid_cp_index(index), "cp index out of cp range");
    cp_info * p = _cp_index[index - 1];
    return p;
}

u1 ClassFileParser::get_cp_tag(int index) throw (Exception)
{
    const cp_info *p = get_cp_info(index);
    u1 tag = p->tag;
    return tag;
}

const u1 * ClassFileParser::get_cp_utf8(int index, u2 & length) throw (Exception)
{
    const cp_info *p = get_cp_info(index);
    u1 tag = p->tag;
    assert_exception(tag == CONSTANT_Utf8, "cp indexed is not CONSTANT_Utf8");
    const CONSTANT_Utf8_info *pUtf8 = (const CONSTANT_Utf8_info *)p;
    length = pUtf8->length;
    return pUtf8->bytes;
}

void ClassFileParser::saveConstantPool() throw (Exception)
{
    ClassBufferInput *in = _classInput;

    //现在已知常量池的长度,保存到一个数组中(目前运行时常量池与原始常量池的长度相同!)
    //另用一个数组保存各个常量项的索引
    _constant_pool = new u1  [_cp_length];
    assert_exception(_constant_pool != NULL, "out of memory");

    _cp_index = new cp_info * [_cp_count - 1];
    assert_exception(_cp_index != NULL, "out of memory");

    cp_info * current = (cp_info *)_constant_pool;
    for (int index = 0; index < _cp_count - 1; index++)
    {
        u1 tag = in->read_u1();
        switch (tag)
        {
        case CONSTANT_Utf8:
            {
                CONSTANT_Utf8_info *pUtf8 = (CONSTANT_Utf8_info *)current;
                u2 length = in->read_u2();

                _cp_index[index] = current;
                pUtf8->tag = tag;
                pUtf8->length = length;
                in->read_bytes(pUtf8->bytes, length);
                current += (sizeof(CONSTANT_Utf8_info) + length);
            }
            break;
        case CONSTANT_Integer:
            {
                CONSTANT_Integer_info *pInteger = (CONSTANT_Integer_info *)current;
                u4 bytes = in->read_u4();

                _cp_index[index] = current;
                pInteger->tag = tag;
                pInteger->ivalue = bytes;
                current += sizeof(CONSTANT_Integer_info);
            }
            break;
        case CONSTANT_Float:
            {
                CONSTANT_Float_info *pFloat = (CONSTANT_Float_info *)current;
                u4 bytes = in->read_u4();

                _cp_index[index] = current;
                pFloat->tag = tag;
                union {
                    JFLOAT f;
                    u4 bytes;
                } u;
                u.bytes = bytes;
                pFloat->fvalue = u.f;
                current += sizeof(CONSTANT_Float_info);
            }
            break;
        case CONSTANT_Long:
            {
                CONSTANT_Long_info *pLong = (CONSTANT_Long_info *)current;
                u4 high_bytes = in->read_u4();
                u4 low_bytes = in->read_u4();

                _cp_index[index] = current;
                pLong->tag = tag;
                pLong->lvalue = (JLONG)high_bytes << 32 | (JLONG)low_bytes;
                index++;    //JVM规范:8字节的常量池项在计数上占两个
                _cp_index[index] = (cp_info *)&_tag_0;
                current += sizeof(CONSTANT_Long_info);
            }
            break;
        case CONSTANT_Double:
            {
                CONSTANT_Double_info *pDouble = (CONSTANT_Double_info *)current;
                u4 high_bytes = in->read_u4();
                u4 low_bytes = in->read_u4();

                _cp_index[index] = current;
                pDouble->tag = tag;
                union {
                    JDOUBLE d;
                    JLONG l;
                } u;
                JLONG l = (JLONG)high_bytes << 32 | (JLONG)low_bytes;
                u.l = l;
                pDouble->dvalue = u.d;
                index++;    //JVM规范:8字节的常量池项在计数上占两个
                _cp_index[index] = (cp_info *)&_tag_0;
                current += sizeof(CONSTANT_Double_info);
            }
            break;
        case CONSTANT_Class:
            {
                CONSTANT_Class_info *pClass = (CONSTANT_Class_info *)current;
                u2 name_index = in->read_u2();

                _cp_index[index] = current;
                pClass->tag = tag;
                pClass->name_index = name_index;
                current += sizeof(CONSTANT_Class_info);
            }
            break;
        case CONSTANT_String:
            {
                CONSTANT_String_info *pString = (CONSTANT_String_info *)current;
                u2 string_index = in->read_u2();

                _cp_index[index] = current;
                pString->tag = tag;
                pString->string_index = string_index;
                current += sizeof(CONSTANT_String_info);
            }
            break;
        case CONSTANT_Fieldref:
            {
                CONSTANT_Fieldref_info *pFieldref 
                    = (CONSTANT_Fieldref_info *)current;
                u2 class_index = in->read_u2();
                u2 name_and_type_index = in->read_u2();

                _cp_index[index] = current;
                pFieldref->tag = tag;
                pFieldref->class_index = class_index;
                pFieldref->name_and_type_index = name_and_type_index;
                current += sizeof(CONSTANT_Fieldref_info);
            }
            break;
        case CONSTANT_Methodref:
            {
                CONSTANT_Methodref_info *pMethodref 
                    = (CONSTANT_Methodref_info *)current;
                u2 class_index = in->read_u2();
                u2 name_and_type_index = in->read_u2();

                _cp_index[index] = current;
                pMethodref->tag = tag;
                pMethodref->class_index = class_index;
                pMethodref->name_and_type_index = name_and_type_index;
                current += sizeof(CONSTANT_Methodref_info);
            }
            break;
        case CONSTANT_InterfaceMethodref:
            {
                CONSTANT_InterfaceMethodref_info *pInterfaceMethodref 
                    = (CONSTANT_InterfaceMethodref_info *)current;
                u2 class_index = in->read_u2();
                u2 name_and_type_index = in->read_u2();

                _cp_index[index] = current;
                pInterfaceMethodref->tag = tag;
                pInterfaceMethodref->class_index = class_index;
                pInterfaceMethodref->name_and_type_index = name_and_type_index;
                current += sizeof(CONSTANT_InterfaceMethodref_info);
            }
            break;
        case CONSTANT_NameAndType:
            {
                CONSTANT_NameAndType_info *pNameAndType
                    = (CONSTANT_NameAndType_info *)current;
                u2 name_index = in->read_u2();
                u2 descriptor_index = in->read_u2();

                _cp_index[index] = current;
                pNameAndType->tag = tag;
                pNameAndType->name_index = name_index;
                pNameAndType->descriptor_index = descriptor_index;
                current += sizeof(CONSTANT_NameAndType_info);
            }
            break;
        default:
            {
                char msg[30];
                sprintf(msg, "unknown tag: %d", tag);
                assert_exception(false, msg);
            }
            break;
        }
    }

    //验证相等(目前必定不相等,因为已经从big-endian顺序转化为本机顺序)
    //int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2);
    //const u1 * p1 = _class_buffer + magic_version_length + 2;
    //int cmp = memcmp(p1, _constant_pool, _cp_length);

    //检查常量项相互之间的引用是否正确
    for (int index = 1; index < _cp_count; index++)
    {
        cp_info * current = _cp_index[index - 1];
        u1 tag = current->tag;
        switch (tag)
        {
        case CONSTANT_Utf8:
            break;
        case CONSTANT_Integer:
            break;
        case CONSTANT_Float:
            break;
        case CONSTANT_Long:
            {
                index++;
            }
            break;
        case CONSTANT_Double:
            {
                index++;
            }
            break;
        case CONSTANT_Class:
            {
                CONSTANT_Class_info *pClass = (CONSTANT_Class_info *)current;
                u1 ref_tag = get_cp_tag(pClass->name_index);
                assert_exception(ref_tag == CONSTANT_Utf8, 
                    "name_index refered is not CONSTANT_Utf8");
            }
            break;
        case CONSTANT_String:
            {
                CONSTANT_String_info *pString = (CONSTANT_String_info *)current;
                u1 ref_tag = get_cp_tag(pString->string_index);
                assert_exception(ref_tag == CONSTANT_Utf8,
                    "string_index refered is not CONSTANT_Utf8");
            }
            break;
        case CONSTANT_Fieldref:
            {
                CONSTANT_Fieldref_info *pFieldref 
                    = (CONSTANT_Fieldref_info *)current;
                u1 ref_tag1 = get_cp_tag(pFieldref->class_index);
                assert_exception(ref_tag1 == CONSTANT_Class,
                    "class_index refered is not CONSTANT_Class");
                u1 ref_tag2 = get_cp_tag(pFieldref->name_and_type_index);
                assert_exception(ref_tag2 == CONSTANT_NameAndType,
                    "name_and_type_index refered is not CONSTANT_NameAndType");
            }
            break;
        case CONSTANT_Methodref:
            {
                CONSTANT_Methodref_info *pMethodref 
                    = (CONSTANT_Methodref_info *)current;
                u1 ref_tag1 = get_cp_tag(pMethodref->class_index);
                assert_exception(ref_tag1 == CONSTANT_Class,
                    "class_index refered is not CONSTANT_Class");
                u1 ref_tag2 = get_cp_tag(pMethodref->name_and_type_index);
                assert_exception(ref_tag2 == CONSTANT_NameAndType,
                    "name_and_type_index refered is not CONSTANT_NameAndType");
            }
            break;
        case CONSTANT_InterfaceMethodref:
            {
                CONSTANT_InterfaceMethodref_info *pInterfaceMethodref 
                    = (CONSTANT_InterfaceMethodref_info *)current;
                u1 ref_tag1 = get_cp_tag(pInterfaceMethodref->class_index);
                assert_exception(ref_tag1 == CONSTANT_Class,
                    "class_index refered is not CONSTANT_Class");
                u1 ref_tag2 = get_cp_tag(pInterfaceMethodref->name_and_type_index);
                assert_exception(ref_tag2 == CONSTANT_NameAndType,
                    "name_and_type_index refered is not CONSTANT_NameAndType");
            }
            break;
        case CONSTANT_NameAndType:
            {
                CONSTANT_NameAndType_info *pNameAndType 
                    = (CONSTANT_NameAndType_info *)current;
                u1 ref_tag1 = get_cp_tag(pNameAndType->name_index);
                assert_exception(ref_tag1 == CONSTANT_Utf8,
                    "name_index refered is not CONSTANT_Utf8");
                u1 ref_tag2 = get_cp_tag(pNameAndType->descriptor_index);
                assert_exception(ref_tag2 == CONSTANT_Utf8,
                    "descriptor_index refered is not CONSTANT_Utf8");
            }
            break;
        }
    }
}

void ClassFileParser::parseConstantPool() throw (Exception)
{
    ClassBufferInput *in = _classInput;
    u2 cp_count = in->read_u2();  //常量池项目数 + 1
    assert_exception(cp_count >= 1, "bad constant pool size");
    //下面遍历一遍常量池,为了统计常量池的长度(字节数),顺便执行一些检查
    in->mark(); //第二遍将重读常量池,所以先标记一下
    int cp_length = 0;
    int cp_info_length;
    for (int index = 1; index < cp_count; index++)
    {
        cp_info_length = -1;
        u1 tag = in->read_u1();
        printf("index:%d, tag: %d, ", index, tag);
        switch (tag)
        {
        case CONSTANT_Utf8:
            {
                u2 length = in->read_u2();
                //检查utf8字符串
                bool isUtf8 = check_utf8_string(in->current(), length);
                assert_exception(isUtf8, "bad utf8 string");
                in->skip_bytes(length);
                cp_info_length = 2 + length;
            }
            break;
        case CONSTANT_Integer:
            {
                in->skip_u4();
                cp_info_length = 4;
            }
            break;
        case CONSTANT_Float:
            {
                in->skip_u4();
                cp_info_length = 4;
            }
            break;
        case CONSTANT_Long:
            {
                in->skip_bytes(8);
                cp_info_length = 8;
                index++;    //JVM规范:8字节的常量池项在计数上占两个
            }
            break;
        case CONSTANT_Double:
            {
                in->skip_bytes(8);
                cp_info_length = 8;
                index++;    //JVM规范:8字节的常量池项在计数上占两个
            }
            break;
        case CONSTANT_Class:
            {
                u2 name_index = in->read_u2();
                assert_exception(valid_cp_index(name_index, cp_count), 
                    "bad constant pool index");
                cp_info_length = 2;
            }
            break;
        case CONSTANT_String:
            {
                u2 string_index = in->read_u2();
                assert_exception(valid_cp_index(string_index, cp_count), 
                    "bad constant pool index");
                cp_info_length = 2;
            }
            break;
        case CONSTANT_Fieldref:
        case CONSTANT_Methodref:
        case CONSTANT_InterfaceMethodref:
            {
                u2 class_index = in->read_u2();
                u2 name_and_type_index = in->read_u2();
                assert_exception(valid_cp_index(class_index, cp_count),
                    "bad constant pool index");
                assert_exception(valid_cp_index(name_and_type_index, cp_count),
                    "bad constant pool index");
                cp_info_length = 4;
            }
            break;
        case CONSTANT_NameAndType:
            {
                u2 name_index = in->read_u2();
                u2 descriptor_index = in->read_u2();
                assert_exception(valid_cp_index(name_index, cp_count),
                    "bad constant pool index");
                assert_exception(valid_cp_index(descriptor_index, cp_count),
                    "bad constant pool index");
                cp_info_length = 4;
            }
            break;
        default:
            {
                char msg[30];
                sprintf(msg, "unknown tag: %d", tag);
                assert_exception(false, msg);
            }
            break;
        }
        assert_exception(cp_info_length != -1, "internal error");
        printf("length: %d\n", cp_info_length);
        cp_length += (cp_info_length + 1);
    }

    _cp_count = cp_count;
    _cp_length = cp_length;

    in->reset();    //重读常量池(第二遍)
    saveConstantPool();

}

void ClassFileParser::check_access_flags(u2 access_flags) throw (Exception)
{
    //Todo:
}

void ClassFileParser::check_this_class(u2 this_class) throw (Exception)
{
    assert_exception(valid_cp_index(this_class), 
        "this_class out of cp range");
    u1 tag  = get_cp_tag(this_class);
    assert_exception(tag == CONSTANT_Class, 
        "this_class refered is not CONSTANT_Class");
}

void ClassFileParser::check_super_class(u2 super_class) throw (Exception)
{
    assert_exception(valid_cp_index(super_class), 
        "super_class out of cp range");
    u1 tag  = get_cp_tag(super_class);
    assert_exception(tag == CONSTANT_Class, 
        "super_class refered is not CONSTANT_Class");
}

void ClassFileParser::parseInterfaces() throw (Exception)
{
    ClassBufferInput *in = _classInput;
    u2 interface_count = in->read_u2();
    const u1 * interfaces = in->current();
    for (int index = 0; index < interface_count; index++)
    {
        u2 this_interface = in->read_u2();
        assert_exception(valid_cp_index(this_interface), 
            "interface index out of cp range");
        u1 tag  = get_cp_tag(this_interface);
        assert_exception(tag == CONSTANT_Class, 
            "interface refered is not CONSTANT_Class");
    }
    _interfaces_count = interface_count;
    int length = interface_count * sizeof(u2);
    _interfaces = new u1 [length];
    memcpy(_interfaces, interfaces, length);
}

void ClassFileParser::parseFields() throw (Exception)
{
    const char *ConstantValue = "ConstantValue";
    const char *Synthetic = "Synthetic";
    const char *Deprecated = "Deprecated";
    int ConstantValue_length = strlen(ConstantValue);
    int Synthetic_length = strlen(Synthetic);
    int Deprecated_length = strlen(Deprecated);

    ClassBufferInput *in = _classInput;
    u2 fields_count = in->read_u2();
    FieldInfo *fields = new FieldInfo [fields_count];
    assert_exception(fields != NULL, "out of memory");
    _fields_count = fields_count;
    _fields = fields;

    for (int index = 0; index < fields_count; index++)
    {
        u2 access_flags = in->read_u2();

        u2 name_index = in->read_u2();
        u1 tag1 = get_cp_tag(name_index);
        assert_exception(tag1 == CONSTANT_Utf8, 
            "field name_index refered is not CONSTANT_Utf8");

        u2 descriptor_index = in->read_u2();
        u1 tag2 = get_cp_tag(descriptor_index);
        assert_exception(tag2 == CONSTANT_Utf8,
            "field descriptor_index refered is not CONSTANT_Utf8");

        u1 is_ConstantValue = 0;
        u1 is_Synthetic = 0;
        u1 is_Deprecated = 0;
        u2 constantvalue_index = 0;
        u2 attributes_count = in->read_u2();
        for (int attr_index = 0; attr_index < attributes_count; attr_index++)
        {
            u2 attribute_name_index = in->read_u2();
            u4 attribute_length = in->read_u4();
            u2 length;
            const u1 * name = get_cp_utf8(attribute_name_index, length);
            if (length == ConstantValue_length
                && (memcmp(ConstantValue, name, length)) == 0) 
            {
                assert_exception(is_ConstantValue == 0, 
                    "no more than one  ConstantValue attribute");
                is_ConstantValue = 1;
                assert_exception(attribute_length == 2, 
                    "bad ConstantValue attribute length");
                constantvalue_index = in->read_u2();

            } else if (length == Synthetic_length
                && (memcmp(Synthetic, name, length)) == 0) 
            {
                is_Synthetic = 1;
                assert_exception(attribute_length == 0,
                    "bad Synthetic attribute length");

            } else if (length == Deprecated_length
                && (memcmp(Deprecated, name, length)) == 0)
            {
                is_Deprecated = 1;
                assert_exception(attribute_length == 0,
                    "bad Deprecated attribute length");
            } else {
                //Ignore any attribute that does not recongnize
                in->skip_bytes(attribute_length);
            }
        }

        fields[index].access_flags = access_flags;
        fields[index].name_index = name_index;
        fields[index].descriptor_index = descriptor_index;
        fields[index].is_ConstantValue = is_ConstantValue;
        fields[index].is_Synthetic = is_Synthetic;
        fields[index].is_Deprecated = is_Deprecated;
        fields[index].constantvalue_index = constantvalue_index;
    }
}

void ClassFileParser::init_method_info(MethodInfo *minfo)
{
    minfo->has_Code = 0;
    minfo->code_length = 0;
    minfo->code = NULL;
    minfo->exception_table_length = 0;
    minfo->exception_table = NULL;
    minfo->total_line_number_table = 0;
    minfo->line_number_table = NULL;
    minfo->total_local_variable_table = 0;
    minfo->local_variable_table = NULL;
    minfo->number_of_exceptions = 0;
    minfo->exception_index_table = NULL;
}

u4 ClassFileParser::parseCodeAttribute(MethodInfo *minfo) throw (Exception)
{
    u2 max_stack = 0, max_locals = 0;
    u4 code_length = 0;
    u1 *code = NULL;
    u2 exception_table_length;
    Exception_table *exception_table = NULL;

    int total_line_number_table = 0;
    LineNumber_table *line_number_table = NULL;
    int total_local_variable_table = 0;
    LocalVariable_table *local_variable_table = NULL;

    ClassBufferInput *in = _classInput;
    const u1 * start = in->current();
    try {
        max_stack = in->read_u2();
        max_locals = in->read_u2();
        code_length = in->read_u4();
        if (code_length > 0) {
            code = new u1 [code_length];
            assert_exception(code != NULL, "out of memory");
            in->read_bytes(code, code_length);
        }
        exception_table_length = in->read_u2();
        if (exception_table_length > 0) {
            exception_table = new Exception_table [exception_table_length];
            assert_exception(exception_table != NULL, "out of memory");
        }
        for (int except_index = 0; except_index < exception_table_length; 
            except_index++) {
                exception_table[except_index].start_pc = in->read_u2();
                exception_table[except_index].end_pc = in->read_u2();
                exception_table[except_index].handler_pc = in->read_u2();
                exception_table[except_index].catch_type = in->read_u2();
        }

        //读取与Code attribute关联的属性(目前只有LineNumberTable和LocalVariableTable)
        //扫描两遍,第一遍统计LineNumber_table或LocalVariable_table的数量
        const char *LineNumberTable = "LineNumberTable";
        const char *LocalVariableTable = "LocalVariableTable";
        int LineNumberTable_length = strlen(LineNumberTable);
        int LocalVariableTable_length = strlen(LocalVariableTable);
        int attributes_count = in->read_u2();
        in->mark();
        for (int attr_index = 0; attr_index < attributes_count; attr_index++)
        {
            u2 attribute_name_index = in->read_u2();
            u4 attribute_length = in->read_u4();
            u2 length;
            const u1 * name = get_cp_utf8(attribute_name_index, length);

            if (length == LineNumberTable_length
                && (memcmp(LineNumberTable, name, length)) == 0) 
            {
                u2 line_number_table_length = in->read_u2();
                in->skip_bytes(line_number_table_length * sizeof(LineNumber_table));
                total_line_number_table += line_number_table_length;

            } else if (length == LocalVariableTable_length
                && (memcmp(LocalVariableTable, name, length)) == 0) 
            {
                u2 local_variable_table_length = in->read_u2();
                in->skip_bytes(local_variable_table_length * sizeof(LocalVariable_table));
                total_local_variable_table += local_variable_table_length;

            } else {
                //Ignore any attribute that does not recongnize
                in->skip_bytes(attribute_length);
            }
        }

        if (total_line_number_table > 0) {
            line_number_table = new LineNumber_table [total_line_number_table];
            assert_exception(line_number_table != NULL, "out of memory");
        }
        if (total_local_variable_table > 0) {
            local_variable_table = new LocalVariable_table [total_local_variable_table];
            assert_exception(local_variable_table != NULL, "out of memory");
        }
        in->reset();
        int line_number_pos = 0;
        int local_variable_pos = 0;
        for (int attr_index = 0; attr_index < attributes_count; attr_index++)
        {
            u2 attribute_name_index = in->read_u2();
            u4 attribute_length = in->read_u4();
            u2 length;
            const u1 * name = get_cp_utf8(attribute_name_index, length);

            if (length == LineNumberTable_length
                && (memcmp(LineNumberTable, name, length)) == 0) 
            {
                u2 line_number_table_length = in->read_u2();
                for (int i = line_number_pos; 
                    i < (line_number_pos + line_number_table_length); i++) {
                    line_number_table[i].start_pc = in->read_u2();
                    line_number_table[i].line_number = in->read_u2();
                }
                line_number_pos += line_number_table_length;

            } else if (length == LocalVariableTable_length
                && (memcmp(LocalVariableTable, name, length)) == 0) 
            {
                u2 local_variable_table_length = in->read_u2();
                for (int i = local_variable_pos; 
                    i < (local_variable_pos + local_variable_table_length); i++) {
                    local_variable_table[i].start_pc = in->read_u2();
                    local_variable_table[i].length = in->read_u2();
                    local_variable_table[i].name_index = in->read_u2();
                    local_variable_table[i].descriptor_index = in->read_u2();
                    local_variable_table[i].index = in->read_u2();
                }
                local_variable_pos += local_variable_table_length;

            } else {
                //Ignore any attribute that does not recongnize
                in->skip_bytes(attribute_length);
            }
        }

        minfo->max_stack = max_stack;
        minfo->max_locals = max_locals;
        minfo->code_length = code_length;
        minfo->code = code;
        minfo->exception_table_length = exception_table_length;
        minfo->exception_table = exception_table;
        minfo->total_line_number_table = total_line_number_table;
        minfo->line_number_table = line_number_table;
        minfo->total_local_variable_table = total_local_variable_table;
        minfo->local_variable_table = local_variable_table;

        const u1 *end = in->current();
        u4 code_attr_length = end - start;
        return code_attr_length;

    } catch (Exception e) {
        if (code != NULL) {
            delete [] code;
        }
        if (exception_table != NULL) {
            delete [] exception_table;
        }
        if (line_number_table != NULL) {
            delete [] line_number_table;
        }
        if (local_variable_table != NULL) {
            delete [] local_variable_table;
        }
        throw e;
    }

    return 0;
}

void ClassFileParser::parseMethods() throw (Exception)
{
    const char *Code = "Code";
    const char *Exceptions = "Exceptions";
    const char *Synthetic = "Synthetic";
    const char *Deprecated = "Deprecated";
    int Code_length = strlen(Code);
    int Exceptions_length = strlen(Exceptions);
    int Synthetic_length = strlen(Synthetic);
    int Deprecated_length = strlen(Deprecated);

    ClassBufferInput *in = _classInput;
    u2 methods_count = in->read_u2();
    MethodInfo *methods = new MethodInfo [methods_count];
    assert_exception(methods != NULL, "out of memory");
    _methods_count = methods_count;
    _methods = methods;

    //初始化methods,当解析失败,可以释放已申请的资源
    MethodInfo minfo;
    init_method_info(&minfo);
    for (int i = 0; i < methods_count; i++) {
        methods[i] = minfo;
    }

    for (int index = 0; index < methods_count; index++)
    {
        u2 access_flags = in->read_u2();

        u2 name_index = in->read_u2();
        u1 tag1 = get_cp_tag(name_index);
        assert_exception(tag1 == CONSTANT_Utf8, 
            "method name_index refered is not CONSTANT_Utf8");

        u2 descriptor_index = in->read_u2();
        u1 tag2 = get_cp_tag(descriptor_index);
        assert_exception(tag2 == CONSTANT_Utf8,
            "method descriptor_index refered is not CONSTANT_Utf8");

        u1 has_Code = 0;
        u1 has_Exceptions = 0;
        u1 is_Synthetic = 0;
        u1 is_Deprecated = 0;
        u2 number_of_exceptions = 0;
        u2 *exception_index_table = NULL;

        u2 attributes_count = in->read_u2();
        for (int attr_index = 0; attr_index < attributes_count; attr_index++)
        {
            u2 attribute_name_index = in->read_u2();
            u4 attribute_length = in->read_u4();
            u2 length;
            const u1 * name = get_cp_utf8(attribute_name_index, length);

            if (length == Code_length 
                && (memcmp(Code, name, length)) == 0) 
            {
                assert_exception(has_Code == 0, 
                    "no more than one Code attribute");
                has_Code = 1;
                u4 code_attr_length = parseCodeAttribute(&methods[index]);
                assert_exception(attribute_length == code_attr_length,
                    "bad Code attribute length");

            } else if (length == Exceptions_length
                && (memcmp(Exceptions, name, length)) == 0) 
            {
                assert_exception(has_Exceptions == 0,
                    "no more than one Exceptions attribute");
                has_Exceptions = 1;
                number_of_exceptions = in->read_u2();
                if (number_of_exceptions > 0) {
                    exception_index_table = new u2 [number_of_exceptions];
                    assert_exception(exception_index_table != NULL, "out of memory");
                }
                for (int i = 0; i < number_of_exceptions; i++) {
                    exception_index_table[i] = in->read_u2();
                }
                u4 exceptions_attr_length = 2 + (number_of_exceptions * 2);
                assert_exception(attribute_length == exceptions_attr_length,
                    "bad Exceptions attribute length");

            } else if (length == Synthetic_length
                && (memcmp(Synthetic, name, length)) == 0) 
            {
                is_Synthetic = 1;
                assert_exception(attribute_length == 0,
                    "bad Synthetic attribute length");

            } else if (length == Deprecated_length
                && (memcmp(Deprecated, name, length)) == 0) 
            {
                is_Deprecated = 1;
                assert_exception(attribute_length == 0,
                    "bad Deprecated attribute length");

            } else {
                //Ignore any attribute that does not recongnize
                in->skip_bytes(attribute_length);
            }
        }

        methods[index].access_flags = access_flags;
        methods[index].name_index = name_index;
        methods[index].descriptor_index = descriptor_index;
        methods[index].has_Code = has_Code;
        methods[index].has_Exceptions = has_Exceptions;
        methods[index].is_Synthetic = is_Synthetic;
        methods[index].is_Deprecated = is_Deprecated;
        methods[index].number_of_exceptions = number_of_exceptions;
        methods[index].exception_index_table = exception_index_table;
    }
}

void ClassFileParser::parseClassFile() throw (Exception)
{
    //parse class magic, version
    ClassBufferInput *in = _classInput;
    u4 magic = in->read_u4();
    assert_exception(magic == CLASS_FILE_MAGIC_U4, "bad magic value");
    u2 minorVersion = in->read_u2();
    u2 majorVersion = in->read_u2();
    assert_exception(is_supported_version(majorVersion, minorVersion), 
        "unsupported class version");
    _major_version = majorVersion;
    _minor_version = minorVersion;

    //parse constant pool
    parseConstantPool();

    //parse access_flags, this_clsss, super_class
    _access_flags = in->read_u2();
    //Todo: need check now?
    check_access_flags(_access_flags);

    _this_class = in->read_u2();
    check_this_class(_this_class);

    _super_class = in->read_u2();
    check_super_class(_super_class);

    //parse interfaces
    parseInterfaces();

    //parse fields
    parseFields();

    //parse methods
    parseMethods();
}

void ClassFileParser::releaseMethods()
{
    for (int i = 0; i < _methods_count; i++) 
    {
        u1 *code = _methods[i].code;
        Exception_table *exception_table = _methods[i].exception_table;
        LineNumber_table *line_number_table = _methods[i].line_number_table;
        LocalVariable_table *local_variable_table = _methods[i].local_variable_table;
        u2 *exception_index_table = _methods[i].exception_index_table;
        if (code != NULL) {
            delete [] code;
        }
        if (exception_table != NULL) {
            delete [] exception_table;
        }
        if (line_number_table != NULL) {
            delete [] line_number_table;
        }
        if (local_variable_table != NULL) {
            delete [] local_variable_table;
        }
        if (exception_index_table != NULL) {
            delete [] exception_index_table;
        }
    }
}

void ClassFileParser::releaseResource()
{
    if (_cp_index != NULL) {
        delete [] _cp_index;
    }
    if (_constant_pool != NULL) {
        delete [] _constant_pool;
    }
    if (_interfaces != NULL) {
        delete [] _interfaces;
    }
    if (_fields != NULL) {
        delete [] _fields;
    }
    if (_methods != NULL) {
        releaseMethods();
    }
}

void ClassFileParser::printSummary()
{
    printf("class version: %d.%d, cp count: %d, cp length: %d\n \
           interface count: %d, field count: %d, method count: %d\n",
        _major_version, _minor_version, _cp_count, _cp_length,
        _interfaces_count, _fields_count, _methods_count);
}


//ClassBufferInput

ClassBufferInput::ClassBufferInput(const u1* buffer, int length)
{
    _buffer_start = buffer;
    _buffer_end = buffer + length;
    _current = buffer;
    _marked = buffer;
}

void ClassBufferInput::guarantee_size(int size) throw (Exception)
{
    if (size > (_buffer_end - _buffer_start)) {
        throw Exception("unexpected end of file");
    }
}

u1 ClassBufferInput::read_u1() throw (Exception)
{
    guarantee_size(1);
    u1 c = *_current++;
    return c;
}

u2 ClassBufferInput::read_u2() throw (Exception)
{
    guarantee_size(2);
    u2 result = read_java_u2(_current);
    _current += 2;
    return result;
}

u4 ClassBufferInput::read_u4() throw (Exception)
{
    guarantee_size(4);
    u4 result = read_java_u4(_current);
    _current += 4;
    return result;
}

void ClassBufferInput::read_bytes(u1 *buf, int size) throw (Exception)
{
    guarantee_size(size);
    memcpy(buf, _current, size);
    _current += size;
}

u2 ClassBufferInput::read_java_u2(const u1 *buffer)
{
    u1 c1 = buffer[0];
    u1 c2 = buffer[1];
    u2 result = (u2)c1 << 8 | (u2)c2;
    return result;
}

u4 ClassBufferInput::read_java_u4(const u1 *buffer)
{
    u1 c1 = buffer[0];
    u1 c2 = buffer[1];
    u1 c3 = buffer[2];
    u1 c4 = buffer[3];
    u4 result = (u4)c1 << 24 | (u4)c2 << 16 | (u4)c3 << 8 | (u4)c4;
    return result;
}

void ClassBufferInput::skip_u1() throw (Exception)
{
    guarantee_size(1);
    _current++;
}

void ClassBufferInput::skip_u2() throw (Exception)
{
    guarantee_size(2);
    _current += 2;
}

void ClassBufferInput::skip_u4() throw (Exception)
{
    guarantee_size(4);
    _current += 4;
}

void ClassBufferInput::skip_bytes(int size) throw (Exception)
{
    guarantee_size(size);
    _current += size;
}

void ClassBufferInput::mark()
{
    _marked = _current;
}

void ClassBufferInput::reset()
{
    _current = _marked;
}


JVMTest.cpp

(只贴出有修改的部分)

int main(int argc, char *argv[])
{
    printf("JVM Test, JVM Utilities test...\n");
    if (argc < 2) {
        printf("Usage: JVMTest classfile\n");
        return 0;
    }
    const char *classFileName = argv[1];
    int bufferLength;
    char *classBuffer = loadClassToBuffer(classFileName, &bufferLength);
    if (classBuffer == NULL) {
        printf("\nload class file failed");
        return -1;
    }

    ClassFileParser parser((const u1 *)classBuffer, bufferLength);
    try {
        parser.parseClassFile();
        parser.printSummary();
    } catch (Exception e) {
        printf("parse class file failed: %s", e.getMessage());
        parser.releaseResource();
    }

    //test
    //Exception e("abcdef");

    return 0;
}


 

 

 

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值