目前完成了对class文件方法的解析。注意,无论是对接口,域,方法,还是对常量池的解析,目前所做的工作都是试验性的。
我只是按照class格式分析,至于解析结果如何保存为合理,则到开发执行引擎时才能清楚。此次修改也调整了一些其它代码,故此贴出完整代码。
ClassFileParser.h
#pragma once
#include "util.h"
//u1,u2,u4分别代表1字节,2字节,4字节的无符号数。需要按照编译平台准确定义。
//JVM规范中大量使用它们描述class文件格式。
typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;
//根据所在的编译平台,定义合适的4字节float,8字节long,8字节双精度浮点数
typedef float JFLOAT;
typedef long long JLONG;
typedef double JDOUBLE;
struct cp_info;
struct field_info;
struct method_info;
struct attribute_info;
#pragma pack(1)
/* Class文件结构,参考JVM规范
//class文件的结构,直接明了。
//class文件中u2,u4,u8的存储都是big-endian顺序(高字节在前,低字节在后)
ClassFile
{
u4 magic;
u2 minor_verison;
u2 major_version;
u2 constant_pool_count; //按照JVM规范,此值cp_count等于cp_info的记录数+1
cp_info constant_pool[constant_pool_count-1];
u2 access_flags;
u2 this_class;
u2 super_class;
u2 interfaces_count;
u2 interfaces[interfaces_count];
u2 fields_count;
field_info fields[fields_count];
u2 methods_count;
method_info methods[methods_count];
u2 attributes_count;
attribute_info attributes[attributes_count];
}
cp_info
{
u1 tag;
u1 info[];
}
CONSTANT_Class_info {
u1 tag;
u2 name_index; //常量池索引,该索引处的常量项必须是一个CONSTANT_Utf8_info
}
CONSTANT_Fieldref_info {
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
}
CONSTANT_Methodref_info {
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
}
CONSTANT_InterfaceMethodref_info {
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
}
CONSTANT_String_info {
u1 tag;
u2 string_index; //常量池索引,必须指向一个CONSTANT_Utf8_info
}
CONSTANT_Integer_info {
u1 tag;
u4 bytes; //4字节整数,高字节在前
}
CONSTANT_Float_info {
u1 tag;
u4 bytes; //4字节浮点数,IEEE 754格式,高字节在前
}
CONSTANT_Long_info {
u1 tag;
u4 high_bytes; //8字节整数,高字节在前
u4 low_bytes;
}
CONSTANT_Double_info {
u1 tag;
u4 high_bytes; //双精度浮点数,IEEE 754格式,高字节在前
u4 low_bytes;
}
CONSTANT_NameAndType_info {
u1 tag;
u2 name_index; //field或method的简单名字。该索引必须指向一个CONSTANT_Utf8_info
u2 descriptor_index; //field或method的描述符。该索引必须指向一个CONSTANT_Utf8_info
}
CONSTANT_Utf8_info {
u1 tag;
u2 length;
u1 bytes[length];
}
attribute_info
{
u2 attribute_name_index; //索引必须指向常量池的一个CONSTANT_Utf8_info
u4 attribute_length;
u1 info[attribute_length];
}
field_info
{
u2 access_flags;
u2 name_index; //field simple name, 索引必须指向常量池的一个CONSTANT_Utf8_info
u2 descriptor_index; //field desciptor, 索引必须指向常量池的一个CONSTANT_Utf8_info
u2 attributes_count;
attribute_info attributes[attributes_count];
}
struct method_info
{
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u2 attributes_count;
attribute_info attributes[attributes_count];
}
Code_attribute
{
u2 attribute_name_index;
u4 attribute_length;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 code[code_length];
u2 exception_table_length;
Exception_table exception_table[exception_table_length];
u2 attributes_count;
attribute_info attributes[attributes_count];
};
*/
//常量池项的一般格式
//由于C语言不能准确描述可变长度,下面使用占位符的技巧,其他可变结构与此相似。
struct cp_info
{
u1 tag;
u1 info[0]; //占位符,不指明具体长度(或者指定为0),需要编译器支持
};
enum
{
CONSTANT_Utf8 = 1,
CONSTANT_Unicode, //not used
CONSTANT_Integer,
CONSTANT_Float,
CONSTANT_Long,
CONSTANT_Double,
CONSTANT_Class,
CONSTANT_String,
CONSTANT_Fieldref,
CONSTANT_Methodref,
CONSTANT_InterfaceMethodref,
CONSTANT_NameAndType,
};
struct CONSTANT_Class_info
{
u1 tag;
u2 name_index;
};
struct CONSTANT_Fieldref_info
{
u1 tag;
u2 class_index;
u2 name_and_type_index;
};
struct CONSTANT_Methodref_info
{
u1 tag;
u2 class_index;
u2 name_and_type_index;
};
struct CONSTANT_InterfaceMethodref_info
{
u1 tag;
u2 class_index;
u2 name_and_type_index;
};
struct CONSTANT_String_info
{
u1 tag;
u2 string_index;
};
struct CONSTANT_Integer_info
{
u1 tag;
u4 ivalue;
};
struct CONSTANT_Float_info
{
u1 tag;
JFLOAT fvalue;
};
struct CONSTANT_Long_info
{
u1 tag;
JLONG lvalue;
};
struct CONSTANT_Double_info
{
u1 tag;
JDOUBLE dvalue;
};
struct CONSTANT_NameAndType_info
{
u1 tag;
u2 name_index;
u2 descriptor_index;
};
struct CONSTANT_Utf8_info
{
u1 tag;
u2 length;
u1 bytes[0]; //[length]
};
//field_info内部表示
//Currently, associated attributes are: ConstantValue, Synthetic, Deprecated.
//Other unknown attributes will be ignored according to JVM Specification.
struct FieldInfo
{
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u1 is_ConstantValue;
u2 constantvalue_index;
u1 is_Synthetic;
u1 is_Deprecated;
};
struct Exception_table
{
u2 start_pc;
u2 end_pc;
u2 handler_pc;
u2 catch_type;
};
struct LineNumber_table
{
u2 start_pc;
u2 line_number;
};
struct LocalVariable_table
{
u2 start_pc;
u2 length;
u2 name_index;
u2 descriptor_index;
u2 index;
};
//method_info内部表示
//Currently, associated attributes are: Code, Exceptions, Synthetic, Deprecated.
//Other unknown attributes will be ignored according to JVM Specification.
struct MethodInfo
{
u2 access_flags;
u2 name_index;
u2 descriptor_index;
u1 has_Code;
u2 max_stack;
u2 max_locals;
u4 code_length;
u1 *code;
u2 exception_table_length;
Exception_table *exception_table;
//Currently,the possible attributes that associated with Code attribute are:
//LineNumberTable and LocalVariableTable. Both of each store debugging information.
int total_line_number_table;
LineNumber_table *line_number_table;
int total_local_variable_table;
LocalVariable_table *local_variable_table;
u2 number_of_exceptions; //方法可能抛出的已检查异常的数量
u2 *exception_index_table; //指出每个异常的类名(每个索引指向CONSTANT_Class_info)
u1 has_Exceptions;
u1 is_Synthetic;
u1 is_Deprecated;
};
#pragma pack()
class ClassBufferInput;
class ClassFileParser
{
public:
ClassFileParser(const u1 *classInBuffer, int length);
virtual ~ClassFileParser(void);
//解析class文件的时候,根据格式,只能按照次序解析,
//依次是magic, version, constant pool, ..., etc.
void parseClassFile() throw (Exception);
//releaseResource用于解析失败时,释放已分配的资源
void releaseResource();
void printSummary();
private:
const u1 * _class_buffer;
int _buffer_length;
ClassBufferInput *_classInput;
bool valid_cp_index(int index, int cp_count) {
return (index >= 1 && index < cp_count);
}
bool valid_cp_index(int index) {
return (index >= 1 && index < _cp_count);
}
bool is_supported_version(u2 majorVersion, u2 minorVersion);
bool check_utf8_string(const u1 *bytes, int length);
void parseConstantPool() throw (Exception);
void saveConstantPool() throw (Exception);
void check_this_class(u2 this_class) throw (Exception);
void check_access_flags(u2 access_flags) throw (Exception);
void check_super_class(u2 super_class) throw (Exception);
const cp_info * get_cp_info(int index) throw (Exception);
u1 get_cp_tag(int index) throw (Exception);
const u1 * get_cp_utf8(int index, u2 & length) throw (Exception);
void parseInterfaces() throw (Exception);
void parseFields() throw (Exception);
void parseMethods() throw (Exception);
void init_method_info(MethodInfo *minfo);
u4 parseCodeAttribute(MethodInfo *minfo) throw (Exception);
void releaseMethods();
private:
u2 _major_version;
u2 _minor_version;
int _cp_count;
//用两个数组存储运行时常量池,一个为索引数组,一个为常量池数据
cp_info ** _cp_index;
u1 * _constant_pool;
int _cp_length;
u1 _tag_0;
u2 _access_flags;
u2 _this_class;
u2 _super_class;
int _interfaces_count;
u1 * _interfaces;
int _fields_count;
FieldInfo *_fields;
int _methods_count;
MethodInfo *_methods;
};
class ClassBufferInput
{
public:
ClassBufferInput(const u1* buffer, int length);
const u1* buffer() const { return _buffer_start; }
int length() const { return _buffer_end - _buffer_start; }
const u1* current() const { return _current; }
u1 read_u1() throw (Exception);
u2 read_u2() throw (Exception);
u4 read_u4() throw (Exception);
void read_bytes(u1 *buf, int size) throw (Exception);
void skip_u1() throw (Exception);
void skip_u2() throw (Exception);
void skip_u4() throw (Exception);
void skip_bytes(int size) throw (Exception);
void mark();
void reset();
static u2 read_java_u2(const u1 *buffer);
static u4 read_java_u4(const u1 *buffer);
private:
const u1* _buffer_start;
const u1* _buffer_end;
const u1* _current;
const u1* _marked;
void guarantee_size(int size) throw (Exception);
};
ClassFileParser.cpp
#include "ClassFileParser.h"
#include <stdio.h>
#include <string.h>
#define CLASS_FILE_MAGIC_U4 0xCAFEBABE
ClassFileParser::ClassFileParser(const u1 *classInBuffer, int length)
{
_class_buffer = classInBuffer;
_buffer_length = length;
_classInput = new ClassBufferInput(_class_buffer, _buffer_length);
_major_version = 0;
_minor_version = 0;
_cp_count = 0;
_cp_length = -1;
_constant_pool = NULL;
_cp_index = NULL;
_tag_0 = 0;
_interfaces = NULL;
_fields = NULL;
}
ClassFileParser::~ClassFileParser(void)
{
if (_classInput != NULL) {
delete _classInput;
}
//Todo: 解析失败的情况下,需要释放已经分配的资源;解析成功的情况下,需要保留资源
//if (_constant_pool != NULL) {
// delete [] _constant_pool;
//}
}
bool ClassFileParser::is_supported_version(u2 major, u2 minor)
{
//实际的Java虚拟机的版本,如SUN的Hotspot,令人费解
//比如1.5版本,推测major=1,minor=5,可是SUN的Hotspot虚拟机却不认
//查看Hotspot源代码,最小版本竟从45开始
if (major > 45 && major <= 51)
return true;
return false;
}
bool ClassFileParser::check_utf8_string(const u1 *bytes, int length)
{
//Todo: check utf8 string
return true;
}
const cp_info * ClassFileParser::get_cp_info(int index) throw (Exception)
{
assert_exception(valid_cp_index(index), "cp index out of cp range");
cp_info * p = _cp_index[index - 1];
return p;
}
u1 ClassFileParser::get_cp_tag(int index) throw (Exception)
{
const cp_info *p = get_cp_info(index);
u1 tag = p->tag;
return tag;
}
const u1 * ClassFileParser::get_cp_utf8(int index, u2 & length) throw (Exception)
{
const cp_info *p = get_cp_info(index);
u1 tag = p->tag;
assert_exception(tag == CONSTANT_Utf8, "cp indexed is not CONSTANT_Utf8");
const CONSTANT_Utf8_info *pUtf8 = (const CONSTANT_Utf8_info *)p;
length = pUtf8->length;
return pUtf8->bytes;
}
void ClassFileParser::saveConstantPool() throw (Exception)
{
ClassBufferInput *in = _classInput;
//现在已知常量池的长度,保存到一个数组中(目前运行时常量池与原始常量池的长度相同!)
//另用一个数组保存各个常量项的索引
_constant_pool = new u1 [_cp_length];
assert_exception(_constant_pool != NULL, "out of memory");
_cp_index = new cp_info * [_cp_count - 1];
assert_exception(_cp_index != NULL, "out of memory");
cp_info * current = (cp_info *)_constant_pool;
for (int index = 0; index < _cp_count - 1; index++)
{
u1 tag = in->read_u1();
switch (tag)
{
case CONSTANT_Utf8:
{
CONSTANT_Utf8_info *pUtf8 = (CONSTANT_Utf8_info *)current;
u2 length = in->read_u2();
_cp_index[index] = current;
pUtf8->tag = tag;
pUtf8->length = length;
in->read_bytes(pUtf8->bytes, length);
current += (sizeof(CONSTANT_Utf8_info) + length);
}
break;
case CONSTANT_Integer:
{
CONSTANT_Integer_info *pInteger = (CONSTANT_Integer_info *)current;
u4 bytes = in->read_u4();
_cp_index[index] = current;
pInteger->tag = tag;
pInteger->ivalue = bytes;
current += sizeof(CONSTANT_Integer_info);
}
break;
case CONSTANT_Float:
{
CONSTANT_Float_info *pFloat = (CONSTANT_Float_info *)current;
u4 bytes = in->read_u4();
_cp_index[index] = current;
pFloat->tag = tag;
union {
JFLOAT f;
u4 bytes;
} u;
u.bytes = bytes;
pFloat->fvalue = u.f;
current += sizeof(CONSTANT_Float_info);
}
break;
case CONSTANT_Long:
{
CONSTANT_Long_info *pLong = (CONSTANT_Long_info *)current;
u4 high_bytes = in->read_u4();
u4 low_bytes = in->read_u4();
_cp_index[index] = current;
pLong->tag = tag;
pLong->lvalue = (JLONG)high_bytes << 32 | (JLONG)low_bytes;
index++; //JVM规范:8字节的常量池项在计数上占两个
_cp_index[index] = (cp_info *)&_tag_0;
current += sizeof(CONSTANT_Long_info);
}
break;
case CONSTANT_Double:
{
CONSTANT_Double_info *pDouble = (CONSTANT_Double_info *)current;
u4 high_bytes = in->read_u4();
u4 low_bytes = in->read_u4();
_cp_index[index] = current;
pDouble->tag = tag;
union {
JDOUBLE d;
JLONG l;
} u;
JLONG l = (JLONG)high_bytes << 32 | (JLONG)low_bytes;
u.l = l;
pDouble->dvalue = u.d;
index++; //JVM规范:8字节的常量池项在计数上占两个
_cp_index[index] = (cp_info *)&_tag_0;
current += sizeof(CONSTANT_Double_info);
}
break;
case CONSTANT_Class:
{
CONSTANT_Class_info *pClass = (CONSTANT_Class_info *)current;
u2 name_index = in->read_u2();
_cp_index[index] = current;
pClass->tag = tag;
pClass->name_index = name_index;
current += sizeof(CONSTANT_Class_info);
}
break;
case CONSTANT_String:
{
CONSTANT_String_info *pString = (CONSTANT_String_info *)current;
u2 string_index = in->read_u2();
_cp_index[index] = current;
pString->tag = tag;
pString->string_index = string_index;
current += sizeof(CONSTANT_String_info);
}
break;
case CONSTANT_Fieldref:
{
CONSTANT_Fieldref_info *pFieldref
= (CONSTANT_Fieldref_info *)current;
u2 class_index = in->read_u2();
u2 name_and_type_index = in->read_u2();
_cp_index[index] = current;
pFieldref->tag = tag;
pFieldref->class_index = class_index;
pFieldref->name_and_type_index = name_and_type_index;
current += sizeof(CONSTANT_Fieldref_info);
}
break;
case CONSTANT_Methodref:
{
CONSTANT_Methodref_info *pMethodref
= (CONSTANT_Methodref_info *)current;
u2 class_index = in->read_u2();
u2 name_and_type_index = in->read_u2();
_cp_index[index] = current;
pMethodref->tag = tag;
pMethodref->class_index = class_index;
pMethodref->name_and_type_index = name_and_type_index;
current += sizeof(CONSTANT_Methodref_info);
}
break;
case CONSTANT_InterfaceMethodref:
{
CONSTANT_InterfaceMethodref_info *pInterfaceMethodref
= (CONSTANT_InterfaceMethodref_info *)current;
u2 class_index = in->read_u2();
u2 name_and_type_index = in->read_u2();
_cp_index[index] = current;
pInterfaceMethodref->tag = tag;
pInterfaceMethodref->class_index = class_index;
pInterfaceMethodref->name_and_type_index = name_and_type_index;
current += sizeof(CONSTANT_InterfaceMethodref_info);
}
break;
case CONSTANT_NameAndType:
{
CONSTANT_NameAndType_info *pNameAndType
= (CONSTANT_NameAndType_info *)current;
u2 name_index = in->read_u2();
u2 descriptor_index = in->read_u2();
_cp_index[index] = current;
pNameAndType->tag = tag;
pNameAndType->name_index = name_index;
pNameAndType->descriptor_index = descriptor_index;
current += sizeof(CONSTANT_NameAndType_info);
}
break;
default:
{
char msg[30];
sprintf(msg, "unknown tag: %d", tag);
assert_exception(false, msg);
}
break;
}
}
//验证相等(目前必定不相等,因为已经从big-endian顺序转化为本机顺序)
//int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2);
//const u1 * p1 = _class_buffer + magic_version_length + 2;
//int cmp = memcmp(p1, _constant_pool, _cp_length);
//检查常量项相互之间的引用是否正确
for (int index = 1; index < _cp_count; index++)
{
cp_info * current = _cp_index[index - 1];
u1 tag = current->tag;
switch (tag)
{
case CONSTANT_Utf8:
break;
case CONSTANT_Integer:
break;
case CONSTANT_Float:
break;
case CONSTANT_Long:
{
index++;
}
break;
case CONSTANT_Double:
{
index++;
}
break;
case CONSTANT_Class:
{
CONSTANT_Class_info *pClass = (CONSTANT_Class_info *)current;
u1 ref_tag = get_cp_tag(pClass->name_index);
assert_exception(ref_tag == CONSTANT_Utf8,
"name_index refered is not CONSTANT_Utf8");
}
break;
case CONSTANT_String:
{
CONSTANT_String_info *pString = (CONSTANT_String_info *)current;
u1 ref_tag = get_cp_tag(pString->string_index);
assert_exception(ref_tag == CONSTANT_Utf8,
"string_index refered is not CONSTANT_Utf8");
}
break;
case CONSTANT_Fieldref:
{
CONSTANT_Fieldref_info *pFieldref
= (CONSTANT_Fieldref_info *)current;
u1 ref_tag1 = get_cp_tag(pFieldref->class_index);
assert_exception(ref_tag1 == CONSTANT_Class,
"class_index refered is not CONSTANT_Class");
u1 ref_tag2 = get_cp_tag(pFieldref->name_and_type_index);
assert_exception(ref_tag2 == CONSTANT_NameAndType,
"name_and_type_index refered is not CONSTANT_NameAndType");
}
break;
case CONSTANT_Methodref:
{
CONSTANT_Methodref_info *pMethodref
= (CONSTANT_Methodref_info *)current;
u1 ref_tag1 = get_cp_tag(pMethodref->class_index);
assert_exception(ref_tag1 == CONSTANT_Class,
"class_index refered is not CONSTANT_Class");
u1 ref_tag2 = get_cp_tag(pMethodref->name_and_type_index);
assert_exception(ref_tag2 == CONSTANT_NameAndType,
"name_and_type_index refered is not CONSTANT_NameAndType");
}
break;
case CONSTANT_InterfaceMethodref:
{
CONSTANT_InterfaceMethodref_info *pInterfaceMethodref
= (CONSTANT_InterfaceMethodref_info *)current;
u1 ref_tag1 = get_cp_tag(pInterfaceMethodref->class_index);
assert_exception(ref_tag1 == CONSTANT_Class,
"class_index refered is not CONSTANT_Class");
u1 ref_tag2 = get_cp_tag(pInterfaceMethodref->name_and_type_index);
assert_exception(ref_tag2 == CONSTANT_NameAndType,
"name_and_type_index refered is not CONSTANT_NameAndType");
}
break;
case CONSTANT_NameAndType:
{
CONSTANT_NameAndType_info *pNameAndType
= (CONSTANT_NameAndType_info *)current;
u1 ref_tag1 = get_cp_tag(pNameAndType->name_index);
assert_exception(ref_tag1 == CONSTANT_Utf8,
"name_index refered is not CONSTANT_Utf8");
u1 ref_tag2 = get_cp_tag(pNameAndType->descriptor_index);
assert_exception(ref_tag2 == CONSTANT_Utf8,
"descriptor_index refered is not CONSTANT_Utf8");
}
break;
}
}
}
void ClassFileParser::parseConstantPool() throw (Exception)
{
ClassBufferInput *in = _classInput;
u2 cp_count = in->read_u2(); //常量池项目数 + 1
assert_exception(cp_count >= 1, "bad constant pool size");
//下面遍历一遍常量池,为了统计常量池的长度(字节数),顺便执行一些检查
in->mark(); //第二遍将重读常量池,所以先标记一下
int cp_length = 0;
int cp_info_length;
for (int index = 1; index < cp_count; index++)
{
cp_info_length = -1;
u1 tag = in->read_u1();
printf("index:%d, tag: %d, ", index, tag);
switch (tag)
{
case CONSTANT_Utf8:
{
u2 length = in->read_u2();
//检查utf8字符串
bool isUtf8 = check_utf8_string(in->current(), length);
assert_exception(isUtf8, "bad utf8 string");
in->skip_bytes(length);
cp_info_length = 2 + length;
}
break;
case CONSTANT_Integer:
{
in->skip_u4();
cp_info_length = 4;
}
break;
case CONSTANT_Float:
{
in->skip_u4();
cp_info_length = 4;
}
break;
case CONSTANT_Long:
{
in->skip_bytes(8);
cp_info_length = 8;
index++; //JVM规范:8字节的常量池项在计数上占两个
}
break;
case CONSTANT_Double:
{
in->skip_bytes(8);
cp_info_length = 8;
index++; //JVM规范:8字节的常量池项在计数上占两个
}
break;
case CONSTANT_Class:
{
u2 name_index = in->read_u2();
assert_exception(valid_cp_index(name_index, cp_count),
"bad constant pool index");
cp_info_length = 2;
}
break;
case CONSTANT_String:
{
u2 string_index = in->read_u2();
assert_exception(valid_cp_index(string_index, cp_count),
"bad constant pool index");
cp_info_length = 2;
}
break;
case CONSTANT_Fieldref:
case CONSTANT_Methodref:
case CONSTANT_InterfaceMethodref:
{
u2 class_index = in->read_u2();
u2 name_and_type_index = in->read_u2();
assert_exception(valid_cp_index(class_index, cp_count),
"bad constant pool index");
assert_exception(valid_cp_index(name_and_type_index, cp_count),
"bad constant pool index");
cp_info_length = 4;
}
break;
case CONSTANT_NameAndType:
{
u2 name_index = in->read_u2();
u2 descriptor_index = in->read_u2();
assert_exception(valid_cp_index(name_index, cp_count),
"bad constant pool index");
assert_exception(valid_cp_index(descriptor_index, cp_count),
"bad constant pool index");
cp_info_length = 4;
}
break;
default:
{
char msg[30];
sprintf(msg, "unknown tag: %d", tag);
assert_exception(false, msg);
}
break;
}
assert_exception(cp_info_length != -1, "internal error");
printf("length: %d\n", cp_info_length);
cp_length += (cp_info_length + 1);
}
_cp_count = cp_count;
_cp_length = cp_length;
in->reset(); //重读常量池(第二遍)
saveConstantPool();
}
void ClassFileParser::check_access_flags(u2 access_flags) throw (Exception)
{
//Todo:
}
void ClassFileParser::check_this_class(u2 this_class) throw (Exception)
{
assert_exception(valid_cp_index(this_class),
"this_class out of cp range");
u1 tag = get_cp_tag(this_class);
assert_exception(tag == CONSTANT_Class,
"this_class refered is not CONSTANT_Class");
}
void ClassFileParser::check_super_class(u2 super_class) throw (Exception)
{
assert_exception(valid_cp_index(super_class),
"super_class out of cp range");
u1 tag = get_cp_tag(super_class);
assert_exception(tag == CONSTANT_Class,
"super_class refered is not CONSTANT_Class");
}
void ClassFileParser::parseInterfaces() throw (Exception)
{
ClassBufferInput *in = _classInput;
u2 interface_count = in->read_u2();
const u1 * interfaces = in->current();
for (int index = 0; index < interface_count; index++)
{
u2 this_interface = in->read_u2();
assert_exception(valid_cp_index(this_interface),
"interface index out of cp range");
u1 tag = get_cp_tag(this_interface);
assert_exception(tag == CONSTANT_Class,
"interface refered is not CONSTANT_Class");
}
_interfaces_count = interface_count;
int length = interface_count * sizeof(u2);
_interfaces = new u1 [length];
memcpy(_interfaces, interfaces, length);
}
void ClassFileParser::parseFields() throw (Exception)
{
const char *ConstantValue = "ConstantValue";
const char *Synthetic = "Synthetic";
const char *Deprecated = "Deprecated";
int ConstantValue_length = strlen(ConstantValue);
int Synthetic_length = strlen(Synthetic);
int Deprecated_length = strlen(Deprecated);
ClassBufferInput *in = _classInput;
u2 fields_count = in->read_u2();
FieldInfo *fields = new FieldInfo [fields_count];
assert_exception(fields != NULL, "out of memory");
_fields_count = fields_count;
_fields = fields;
for (int index = 0; index < fields_count; index++)
{
u2 access_flags = in->read_u2();
u2 name_index = in->read_u2();
u1 tag1 = get_cp_tag(name_index);
assert_exception(tag1 == CONSTANT_Utf8,
"field name_index refered is not CONSTANT_Utf8");
u2 descriptor_index = in->read_u2();
u1 tag2 = get_cp_tag(descriptor_index);
assert_exception(tag2 == CONSTANT_Utf8,
"field descriptor_index refered is not CONSTANT_Utf8");
u1 is_ConstantValue = 0;
u1 is_Synthetic = 0;
u1 is_Deprecated = 0;
u2 constantvalue_index = 0;
u2 attributes_count = in->read_u2();
for (int attr_index = 0; attr_index < attributes_count; attr_index++)
{
u2 attribute_name_index = in->read_u2();
u4 attribute_length = in->read_u4();
u2 length;
const u1 * name = get_cp_utf8(attribute_name_index, length);
if (length == ConstantValue_length
&& (memcmp(ConstantValue, name, length)) == 0)
{
assert_exception(is_ConstantValue == 0,
"no more than one ConstantValue attribute");
is_ConstantValue = 1;
assert_exception(attribute_length == 2,
"bad ConstantValue attribute length");
constantvalue_index = in->read_u2();
} else if (length == Synthetic_length
&& (memcmp(Synthetic, name, length)) == 0)
{
is_Synthetic = 1;
assert_exception(attribute_length == 0,
"bad Synthetic attribute length");
} else if (length == Deprecated_length
&& (memcmp(Deprecated, name, length)) == 0)
{
is_Deprecated = 1;
assert_exception(attribute_length == 0,
"bad Deprecated attribute length");
} else {
//Ignore any attribute that does not recongnize
in->skip_bytes(attribute_length);
}
}
fields[index].access_flags = access_flags;
fields[index].name_index = name_index;
fields[index].descriptor_index = descriptor_index;
fields[index].is_ConstantValue = is_ConstantValue;
fields[index].is_Synthetic = is_Synthetic;
fields[index].is_Deprecated = is_Deprecated;
fields[index].constantvalue_index = constantvalue_index;
}
}
void ClassFileParser::init_method_info(MethodInfo *minfo)
{
minfo->has_Code = 0;
minfo->code_length = 0;
minfo->code = NULL;
minfo->exception_table_length = 0;
minfo->exception_table = NULL;
minfo->total_line_number_table = 0;
minfo->line_number_table = NULL;
minfo->total_local_variable_table = 0;
minfo->local_variable_table = NULL;
minfo->number_of_exceptions = 0;
minfo->exception_index_table = NULL;
}
u4 ClassFileParser::parseCodeAttribute(MethodInfo *minfo) throw (Exception)
{
u2 max_stack = 0, max_locals = 0;
u4 code_length = 0;
u1 *code = NULL;
u2 exception_table_length;
Exception_table *exception_table = NULL;
int total_line_number_table = 0;
LineNumber_table *line_number_table = NULL;
int total_local_variable_table = 0;
LocalVariable_table *local_variable_table = NULL;
ClassBufferInput *in = _classInput;
const u1 * start = in->current();
try {
max_stack = in->read_u2();
max_locals = in->read_u2();
code_length = in->read_u4();
if (code_length > 0) {
code = new u1 [code_length];
assert_exception(code != NULL, "out of memory");
in->read_bytes(code, code_length);
}
exception_table_length = in->read_u2();
if (exception_table_length > 0) {
exception_table = new Exception_table [exception_table_length];
assert_exception(exception_table != NULL, "out of memory");
}
for (int except_index = 0; except_index < exception_table_length;
except_index++) {
exception_table[except_index].start_pc = in->read_u2();
exception_table[except_index].end_pc = in->read_u2();
exception_table[except_index].handler_pc = in->read_u2();
exception_table[except_index].catch_type = in->read_u2();
}
//读取与Code attribute关联的属性(目前只有LineNumberTable和LocalVariableTable)
//扫描两遍,第一遍统计LineNumber_table或LocalVariable_table的数量
const char *LineNumberTable = "LineNumberTable";
const char *LocalVariableTable = "LocalVariableTable";
int LineNumberTable_length = strlen(LineNumberTable);
int LocalVariableTable_length = strlen(LocalVariableTable);
int attributes_count = in->read_u2();
in->mark();
for (int attr_index = 0; attr_index < attributes_count; attr_index++)
{
u2 attribute_name_index = in->read_u2();
u4 attribute_length = in->read_u4();
u2 length;
const u1 * name = get_cp_utf8(attribute_name_index, length);
if (length == LineNumberTable_length
&& (memcmp(LineNumberTable, name, length)) == 0)
{
u2 line_number_table_length = in->read_u2();
in->skip_bytes(line_number_table_length * sizeof(LineNumber_table));
total_line_number_table += line_number_table_length;
} else if (length == LocalVariableTable_length
&& (memcmp(LocalVariableTable, name, length)) == 0)
{
u2 local_variable_table_length = in->read_u2();
in->skip_bytes(local_variable_table_length * sizeof(LocalVariable_table));
total_local_variable_table += local_variable_table_length;
} else {
//Ignore any attribute that does not recongnize
in->skip_bytes(attribute_length);
}
}
if (total_line_number_table > 0) {
line_number_table = new LineNumber_table [total_line_number_table];
assert_exception(line_number_table != NULL, "out of memory");
}
if (total_local_variable_table > 0) {
local_variable_table = new LocalVariable_table [total_local_variable_table];
assert_exception(local_variable_table != NULL, "out of memory");
}
in->reset();
int line_number_pos = 0;
int local_variable_pos = 0;
for (int attr_index = 0; attr_index < attributes_count; attr_index++)
{
u2 attribute_name_index = in->read_u2();
u4 attribute_length = in->read_u4();
u2 length;
const u1 * name = get_cp_utf8(attribute_name_index, length);
if (length == LineNumberTable_length
&& (memcmp(LineNumberTable, name, length)) == 0)
{
u2 line_number_table_length = in->read_u2();
for (int i = line_number_pos;
i < (line_number_pos + line_number_table_length); i++) {
line_number_table[i].start_pc = in->read_u2();
line_number_table[i].line_number = in->read_u2();
}
line_number_pos += line_number_table_length;
} else if (length == LocalVariableTable_length
&& (memcmp(LocalVariableTable, name, length)) == 0)
{
u2 local_variable_table_length = in->read_u2();
for (int i = local_variable_pos;
i < (local_variable_pos + local_variable_table_length); i++) {
local_variable_table[i].start_pc = in->read_u2();
local_variable_table[i].length = in->read_u2();
local_variable_table[i].name_index = in->read_u2();
local_variable_table[i].descriptor_index = in->read_u2();
local_variable_table[i].index = in->read_u2();
}
local_variable_pos += local_variable_table_length;
} else {
//Ignore any attribute that does not recongnize
in->skip_bytes(attribute_length);
}
}
minfo->max_stack = max_stack;
minfo->max_locals = max_locals;
minfo->code_length = code_length;
minfo->code = code;
minfo->exception_table_length = exception_table_length;
minfo->exception_table = exception_table;
minfo->total_line_number_table = total_line_number_table;
minfo->line_number_table = line_number_table;
minfo->total_local_variable_table = total_local_variable_table;
minfo->local_variable_table = local_variable_table;
const u1 *end = in->current();
u4 code_attr_length = end - start;
return code_attr_length;
} catch (Exception e) {
if (code != NULL) {
delete [] code;
}
if (exception_table != NULL) {
delete [] exception_table;
}
if (line_number_table != NULL) {
delete [] line_number_table;
}
if (local_variable_table != NULL) {
delete [] local_variable_table;
}
throw e;
}
return 0;
}
void ClassFileParser::parseMethods() throw (Exception)
{
const char *Code = "Code";
const char *Exceptions = "Exceptions";
const char *Synthetic = "Synthetic";
const char *Deprecated = "Deprecated";
int Code_length = strlen(Code);
int Exceptions_length = strlen(Exceptions);
int Synthetic_length = strlen(Synthetic);
int Deprecated_length = strlen(Deprecated);
ClassBufferInput *in = _classInput;
u2 methods_count = in->read_u2();
MethodInfo *methods = new MethodInfo [methods_count];
assert_exception(methods != NULL, "out of memory");
_methods_count = methods_count;
_methods = methods;
//初始化methods,当解析失败,可以释放已申请的资源
MethodInfo minfo;
init_method_info(&minfo);
for (int i = 0; i < methods_count; i++) {
methods[i] = minfo;
}
for (int index = 0; index < methods_count; index++)
{
u2 access_flags = in->read_u2();
u2 name_index = in->read_u2();
u1 tag1 = get_cp_tag(name_index);
assert_exception(tag1 == CONSTANT_Utf8,
"method name_index refered is not CONSTANT_Utf8");
u2 descriptor_index = in->read_u2();
u1 tag2 = get_cp_tag(descriptor_index);
assert_exception(tag2 == CONSTANT_Utf8,
"method descriptor_index refered is not CONSTANT_Utf8");
u1 has_Code = 0;
u1 has_Exceptions = 0;
u1 is_Synthetic = 0;
u1 is_Deprecated = 0;
u2 number_of_exceptions = 0;
u2 *exception_index_table = NULL;
u2 attributes_count = in->read_u2();
for (int attr_index = 0; attr_index < attributes_count; attr_index++)
{
u2 attribute_name_index = in->read_u2();
u4 attribute_length = in->read_u4();
u2 length;
const u1 * name = get_cp_utf8(attribute_name_index, length);
if (length == Code_length
&& (memcmp(Code, name, length)) == 0)
{
assert_exception(has_Code == 0,
"no more than one Code attribute");
has_Code = 1;
u4 code_attr_length = parseCodeAttribute(&methods[index]);
assert_exception(attribute_length == code_attr_length,
"bad Code attribute length");
} else if (length == Exceptions_length
&& (memcmp(Exceptions, name, length)) == 0)
{
assert_exception(has_Exceptions == 0,
"no more than one Exceptions attribute");
has_Exceptions = 1;
number_of_exceptions = in->read_u2();
if (number_of_exceptions > 0) {
exception_index_table = new u2 [number_of_exceptions];
assert_exception(exception_index_table != NULL, "out of memory");
}
for (int i = 0; i < number_of_exceptions; i++) {
exception_index_table[i] = in->read_u2();
}
u4 exceptions_attr_length = 2 + (number_of_exceptions * 2);
assert_exception(attribute_length == exceptions_attr_length,
"bad Exceptions attribute length");
} else if (length == Synthetic_length
&& (memcmp(Synthetic, name, length)) == 0)
{
is_Synthetic = 1;
assert_exception(attribute_length == 0,
"bad Synthetic attribute length");
} else if (length == Deprecated_length
&& (memcmp(Deprecated, name, length)) == 0)
{
is_Deprecated = 1;
assert_exception(attribute_length == 0,
"bad Deprecated attribute length");
} else {
//Ignore any attribute that does not recongnize
in->skip_bytes(attribute_length);
}
}
methods[index].access_flags = access_flags;
methods[index].name_index = name_index;
methods[index].descriptor_index = descriptor_index;
methods[index].has_Code = has_Code;
methods[index].has_Exceptions = has_Exceptions;
methods[index].is_Synthetic = is_Synthetic;
methods[index].is_Deprecated = is_Deprecated;
methods[index].number_of_exceptions = number_of_exceptions;
methods[index].exception_index_table = exception_index_table;
}
}
void ClassFileParser::parseClassFile() throw (Exception)
{
//parse class magic, version
ClassBufferInput *in = _classInput;
u4 magic = in->read_u4();
assert_exception(magic == CLASS_FILE_MAGIC_U4, "bad magic value");
u2 minorVersion = in->read_u2();
u2 majorVersion = in->read_u2();
assert_exception(is_supported_version(majorVersion, minorVersion),
"unsupported class version");
_major_version = majorVersion;
_minor_version = minorVersion;
//parse constant pool
parseConstantPool();
//parse access_flags, this_clsss, super_class
_access_flags = in->read_u2();
//Todo: need check now?
check_access_flags(_access_flags);
_this_class = in->read_u2();
check_this_class(_this_class);
_super_class = in->read_u2();
check_super_class(_super_class);
//parse interfaces
parseInterfaces();
//parse fields
parseFields();
//parse methods
parseMethods();
}
void ClassFileParser::releaseMethods()
{
for (int i = 0; i < _methods_count; i++)
{
u1 *code = _methods[i].code;
Exception_table *exception_table = _methods[i].exception_table;
LineNumber_table *line_number_table = _methods[i].line_number_table;
LocalVariable_table *local_variable_table = _methods[i].local_variable_table;
u2 *exception_index_table = _methods[i].exception_index_table;
if (code != NULL) {
delete [] code;
}
if (exception_table != NULL) {
delete [] exception_table;
}
if (line_number_table != NULL) {
delete [] line_number_table;
}
if (local_variable_table != NULL) {
delete [] local_variable_table;
}
if (exception_index_table != NULL) {
delete [] exception_index_table;
}
}
}
void ClassFileParser::releaseResource()
{
if (_cp_index != NULL) {
delete [] _cp_index;
}
if (_constant_pool != NULL) {
delete [] _constant_pool;
}
if (_interfaces != NULL) {
delete [] _interfaces;
}
if (_fields != NULL) {
delete [] _fields;
}
if (_methods != NULL) {
releaseMethods();
}
}
void ClassFileParser::printSummary()
{
printf("class version: %d.%d, cp count: %d, cp length: %d\n \
interface count: %d, field count: %d, method count: %d\n",
_major_version, _minor_version, _cp_count, _cp_length,
_interfaces_count, _fields_count, _methods_count);
}
//ClassBufferInput
ClassBufferInput::ClassBufferInput(const u1* buffer, int length)
{
_buffer_start = buffer;
_buffer_end = buffer + length;
_current = buffer;
_marked = buffer;
}
void ClassBufferInput::guarantee_size(int size) throw (Exception)
{
if (size > (_buffer_end - _buffer_start)) {
throw Exception("unexpected end of file");
}
}
u1 ClassBufferInput::read_u1() throw (Exception)
{
guarantee_size(1);
u1 c = *_current++;
return c;
}
u2 ClassBufferInput::read_u2() throw (Exception)
{
guarantee_size(2);
u2 result = read_java_u2(_current);
_current += 2;
return result;
}
u4 ClassBufferInput::read_u4() throw (Exception)
{
guarantee_size(4);
u4 result = read_java_u4(_current);
_current += 4;
return result;
}
void ClassBufferInput::read_bytes(u1 *buf, int size) throw (Exception)
{
guarantee_size(size);
memcpy(buf, _current, size);
_current += size;
}
u2 ClassBufferInput::read_java_u2(const u1 *buffer)
{
u1 c1 = buffer[0];
u1 c2 = buffer[1];
u2 result = (u2)c1 << 8 | (u2)c2;
return result;
}
u4 ClassBufferInput::read_java_u4(const u1 *buffer)
{
u1 c1 = buffer[0];
u1 c2 = buffer[1];
u1 c3 = buffer[2];
u1 c4 = buffer[3];
u4 result = (u4)c1 << 24 | (u4)c2 << 16 | (u4)c3 << 8 | (u4)c4;
return result;
}
void ClassBufferInput::skip_u1() throw (Exception)
{
guarantee_size(1);
_current++;
}
void ClassBufferInput::skip_u2() throw (Exception)
{
guarantee_size(2);
_current += 2;
}
void ClassBufferInput::skip_u4() throw (Exception)
{
guarantee_size(4);
_current += 4;
}
void ClassBufferInput::skip_bytes(int size) throw (Exception)
{
guarantee_size(size);
_current += size;
}
void ClassBufferInput::mark()
{
_marked = _current;
}
void ClassBufferInput::reset()
{
_current = _marked;
}
JVMTest.cpp
(只贴出有修改的部分)
int main(int argc, char *argv[])
{
printf("JVM Test, JVM Utilities test...\n");
if (argc < 2) {
printf("Usage: JVMTest classfile\n");
return 0;
}
const char *classFileName = argv[1];
int bufferLength;
char *classBuffer = loadClassToBuffer(classFileName, &bufferLength);
if (classBuffer == NULL) {
printf("\nload class file failed");
return -1;
}
ClassFileParser parser((const u1 *)classBuffer, bufferLength);
try {
parser.parseClassFile();
parser.printSummary();
} catch (Exception e) {
printf("parse class file failed: %s", e.getMessage());
parser.releaseResource();
}
//test
//Exception e("abcdef");
return 0;
}
本文介绍了一个Java类文件解析器的设计与实现,详细展示了类文件结构、常量池解析及方法解析的过程。通过C语言实现,重点讲解了如何解析类文件中的不同元素,并处理各种异常情况。
1645

被折叠的 条评论
为什么被折叠?



