作者:王智通(阿里云安全工程师)
这两天在class文件解析器的基础上, 加上了java反汇编的功能, 反汇编器是指令解释器的基础,通过编写反汇编器可以熟悉jvm的指令系统, 不过jvm的指令一共有201个,反汇编过程基本就是个体力活。在《java虚拟机规范》中对每一条指令都有了详细的描述,下面说说我是如何解析 bytecode的:
一个java文件经过javac编译后会生成class格式文件, 在class格式中method字段里会有Code属性,Code属性包含了java的指令码和长度。 首先用class解析器将指令码提取出来, 举个例子:
test.java
06 | public static void main(String args[]) { |
09 | for (i = 0 ; i < 5 ; i++) |
10 | System.out.println( "hehe" ); |
我们用class文件解析器把test对应的bytecode打印出来:
len: 5
0x2a0xb70x00x10xb1
这一串bytecode为:0x2a0xb70x00x10xb1, 长度是5个字节。
对照《java虚拟机规范》我们来一步步手工解析:
0x2a代表aload_0指令, 它将本地局部变量中的第一个变量压入到堆栈里。这个指令本身长度就是一个字节,没有参数, 因此0x2a的解析就非常简单, 直接在屏幕打印出aload_0即可:
printf("%s\n", symbol);
0xb7代表invokespecial 它用来调用超类构造方法,实例初始化方法, 私有方法。它的用法如下:
invokespecial indexbyte1 indexbyte2,indexbyte1和indexbyte2各占一个字节,用(indexbyte1 << 8) | indexbyte2来构建一个常量池中的索引。每个jvm指令本身都占用一个字节,加上它的两个参数, invokespecial语句它将占用3个字节空间。 所以它的解析算法如下:
3 | index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2)); |
4 | printf ( "%s #%x\n" , symbol, index); |
注意0xb7解析完后,我们要跳过3个字节的地址,那么就是0xb1了, 它是return指令,没有参数,因此它的解析方法跟aload_0一样:
printf("%s\n", symbol);
以上是我们手工解析的过程, 但是jvm有201条指令, 我们需要建立一个合适的数据结构:
1 | typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base); |
3 | typedef struct bytecode_st { |
5 | u2 opcode_len; // 指令总的长度,包括参数 |
6 | char symbol[OPCODE_SYMBOL_LEN]; // 指令对应的助记符 |
7 | interp_func func; // 解析指令的回调函数 |
我们可以直接建立一个大的BYTECODE数组:
001 | BYTECODE jvm_byte_code[OPCODE_LEN] = { |
002 | {0x00, 1, "nop" , jvm_interp_nop}, |
003 | {0x01, 1, "aconst_null" , jvm_interp_aconst_null}, |
004 | {0x02, 1, "iconst_m1" , jvm_interp_iconst_m1}, |
005 | {0x03, 1, "iconst_0" , jvm_interp_iconst_0}, |
006 | {0x04, 1, "iconst_1" , jvm_interp_iconst_1}, |
007 | {0x05, 1, "iconst_2" , jvm_interp_iconst_2}, |
008 | {0x06, 1, "iconst_3" , jvm_interp_iconst_3}, |
009 | {0x07, 1, "iconst_4" , jvm_interp_iconst_4}, |
010 | {0x08, 1, "iconst_5" , jvm_interp_iconst_5}, |
011 | {0x09, 1, "lconst_0" , jvm_interp_lconst_0}, |
012 | {0x0a, 1, "lconst_1" , jvm_interp_lconst_1}, |
013 | {0x0b, 1, "fconst_0" , jvm_interp_fconst_0}, |
014 | {0x0c, 1, "fconst_1" , jvm_interp_fconst_1}, |
015 | {0x0d, 1, "fconst_2" , jvm_interp_fconst_2}, |
016 | {0x0e, 1, "dconst_0" , jvm_interp_dconst_0}, |
017 | {0x0f, 1, "dconst_1" , jvm_interp_dconst_1}, |
018 | {0x10, 1, "bipush" , jvm_interp_bipush}, |
019 | {0x11, 1, "sipush" , jvm_interp_sipush}, |
020 | {0x12, 2, "ldc" , jvm_interp_ldc}, |
021 | {0x13, 1, "ldc_w" , jvm_interp_ldc_w}, |
022 | {0x14, 1, "ldc2_w" , jvm_interp_ldc2_w}, |
023 | {0x15, 1, "iload" , jvm_interp_iload}, |
024 | {0x16, 1, "lload" , jvm_interp_lload}, |
025 | {0x17, 1, "fload" , jvm_interp_fload}, |
026 | {0x18, 1, "dload" , jvm_interp_dload}, |
027 | {0x19, 1, "aload" , jvm_interp_aload}, |
028 | {0x1a, 1, "iload_0" , jvm_interp_iload_0}, |
029 | {0x1b, 1, "iload_1" , jvm_interp_iload_1}, |
030 | {0x1c, 1, "iload_2" , jvm_interp_iload_2}, |
031 | {0x1d, 1, "iload_3" , jvm_interp_iload_3}, |
032 | {0x1e, 1, "lload_0" , jvm_interp_lload_0}, |
033 | {0x1f, 1, "lload_1" , jvm_interp_lload_1}, |
034 | {0x20, 1, "lload_2" , jvm_interp_lload_2}, |
035 | {0x21, 1, "lload_3" , jvm_interp_lload_3}, |
036 | {0x22, 1, "fload_0" , jvm_interp_fload_0}, |
037 | {0x23, 1, "fload_1" , jvm_interp_fload_1}, |
038 | {0x24, 1, "fload_2" , jvm_interp_fload_2}, |
039 | {0x25, 1, "fload_3" , jvm_interp_fload_3}, |
040 | {0x26, 1, "dload_0" , jvm_interp_dload_0}, |
041 | {0x27, 1, "dload_1" , jvm_interp_dload_1}, |
042 | {0x28, 1, "dload_2" , jvm_interp_dload_2}, |
043 | {0x29, 1, "dload_3" , jvm_interp_dload_3}, |
044 | {0x2a, 1, "aload_0" , jvm_interp_aload_0}, |
045 | {0x2b, 1, "aload_1" , jvm_interp_aload_1}, |
046 | {0x2c, 1, "aload_2" , jvm_interp_aload_2}, |
047 | {0x2d, 1, "aload_3" , jvm_interp_aload_3}, |
048 | {0x2e, 1, "iaload" , jvm_interp_iaload}, |
049 | {0x2f, 1, "laload" , jvm_interp_laload}, |
050 | {0x30, 1, "faload" , jvm_interp_faload}, |
051 | {0x31, 1, "daload" , jvm_interp_daload}, |
052 | {0x32, 1, "aaload" , jvm_interp_aaload}, |
053 | {0x33, 1, "baload" , jvm_interp_baload}, |
054 | {0x34, 1, "caload" , jvm_interp_caload}, |
055 | {0x35, 1, "saload" , jvm_interp_saload}, |
056 | {0x36, 1, "istore" , jvm_interp_istore}, |
057 | {0x37, 1, "lstore" , jvm_interp_lstore}, |
058 | {0x38, 1, "fstore" , jvm_interp_fstore}, |
059 | {0x39, 1, "dstore" , jvm_interp_dstore}, |
060 | {0x3a, 1, "astore" , jvm_interp_astore}, |
061 | {0x3b, 1, "istore_0" , jvm_interp_istore_0}, |
062 | {0x3c, 1, "istore_1" , jvm_interp_istore_1}, |
063 | {0x3d, 1, "istore_2" , jvm_interp_istore_2}, |
064 | {0x3e, 1, "istore_3" , jvm_interp_istore_3}, |
065 | {0x3f, 1, "lstore_0" , jvm_interp_lstore_0}, |
066 | {0x40, 1, "lstore_1" , jvm_interp_lstore_1}, |
067 | {0x41, 1, "lstore_2" , jvm_interp_lstore_2}, |
068 | {0x42, 1, "lstore_3" , jvm_interp_lstore_3}, |
069 | {0x43, 1, "fstore_0" , jvm_interp_fstore_0}, |
070 | {0x44, 1, "fstore_1" , jvm_interp_fstore_1}, |
071 | {0x45, 1, "fstore_2" , jvm_interp_fstore_2}, |
072 | {0x46, 1, "fstore_3" , jvm_interp_fstore_3}, |
073 | {0x47, 1, "dstore_0" , jvm_interp_dstore_0}, |
074 | {0x48, 1, "dstore_1" , jvm_interp_dstore_1}, |
075 | {0x49, 1, "dstore_2" , jvm_interp_dstore_2}, |
076 | {0x4a, 1, "dstore_3" , jvm_interp_dstore_3}, |
077 | {0x4b, 1, "astore_0" , jvm_interp_astore_0}, |
078 | {0x4c, 1, "astore_1" , jvm_interp_astore_1}, |
079 | {0x4d, 1, "astore_2" , jvm_interp_astore_2}, |
080 | {0x4e, 1, "astore_3" , jvm_interp_astore_3}, |
081 | {0x4f, 1, "iastore" , jvm_interp_iastore}, |
082 | {0x50, 1, "lastore" , jvm_interp_lastore}, |
083 | {0x51, 1, "fastore" , jvm_interp_fastore}, |
084 | {0x52, 1, "dastore" , jvm_interp_dastore}, |
085 | {0x53, 1, "aastore" , jvm_interp_aastore}, |
086 | {0x54, 1, "bastore" , jvm_interp_bastore}, |
087 | {0x55, 1, "castore" , jvm_interp_castore}, |
088 | {0x56, 1, "sastore" , jvm_interp_sastore}, |
089 | {0x57, 1, "pop" , jvm_interp_pop}, |
090 | {0x58, 1, "pop2" , jvm_interp_pop2}, |
091 | {0x59, 1, "dup" , jvm_interp_dup}, |
092 | {0x5a, 1, "dup_x1" , jvm_interp_dup_x1}, |
093 | {0x5b, 1, "dup_x2" , jvm_interp_dup_x2}, |
094 | {0x5c, 1, "dup2" , jvm_interp_dup2}, |
095 | {0x5d, 1, "dup2_x1" , jvm_interp_dup2_x1}, |
096 | {0x5e, 1, "dup2_x2" , jvm_interp_dup2_x2}, |
097 | {0x5f, 1, "swap" , jvm_interp_swap}, |
098 | {0x60, 1, "iadd" , jvm_interp_iadd}, |
099 | {0x61, 1, "ladd" , jvm_interp_ladd}, |
100 | {0x62, 1, "fadd" , jvm_interp_fadd}, |
101 | {0x63, 1, "dadd" , jvm_interp_dadd}, |
102 | {0x64, 1, "isub" , jvm_interp_isub}, |
103 | {0x65, 1, "lsub" , jvm_interp_lsub}, |
104 | {0x66, 1, "fsub" , jvm_interp_fsub}, |
105 | {0x67, 1, "dsub" , jvm_interp_dsub}, |
106 | {0x68, 1, "imul" , jvm_interp_imul}, |
107 | {0x69, 1, "lmul" , jvm_interp_lmul}, |
108 | {0x6a, 1, "fmul" , jvm_interp_fmul}, |
109 | {0x6b, 1, "dmul" , jvm_interp_dmul}, |
110 | {0x6c, 1, "idiv" , jvm_interp_idiv}, |
111 | {0x6d, 1, "ldiv" , jvm_interp_ldiv}, |
112 | {0x6e, 1, "fdiv" , jvm_interp_fdiv}, |
113 | {0x6f, 1, "ddiv" , jvm_interp_ddiv}, |
114 | {0x70, 1, "irem" , jvm_interp_irem}, |
115 | {0x71, 1, "lrem" , jvm_interp_lrem}, |
116 | {0x72, 1, "frem" , jvm_interp_frem}, |
117 | {0x73, 1, "drem" , jvm_interp_drem}, |
118 | {0x74, 1, "ineg" , jvm_interp_ineg}, |
119 | {0x75, 1, "lneg" , jvm_interp_lneg}, |
120 | {0x76, 1, "fneg" , jvm_interp_fneg}, |
121 | {0x77, 1, "dneg" , jvm_interp_dneg}, |
122 | {0x78, 1, "ishl" , jvm_interp_ishl}, |
123 | {0x79, 1, "lshl" , jvm_interp_lshl}, |
124 | {0x7a, 1, "ishr" , jvm_interp_ishr}, |
125 | {0x7b, 1, "lshr" , jvm_interp_lshr}, |
126 | {0x7c, 1, "iushr" , jvm_interp_iushr}, |
127 | {0x7d, 1, "lushr" , jvm_interp_lushr}, |
128 | {0x7e, 1, "iand" , jvm_interp_iand}, |
129 | {0x7f, 1, "land" , jvm_interp_land}, |
130 | {0x80, 1, "ior" , jvm_interp_ior}, |
131 | {0x81, 1, "lor" , jvm_interp_lor}, |
132 | {0x82, 1, "ixor" , jvm_interp_ixor}, |
133 | {0x83, 1, "lxor" , jvm_interp_lxor}, |
134 | {0x84, 3, "iinc" , jvm_interp_iinc}, |
135 | {0x85, 1, "i2l" , jvm_interp_i2l}, |
136 | {0x86, 1, "i2f" , jvm_interp_i2f}, |
137 | {0x87, 1, "i2d" , jvm_interp_i2d}, |
138 | {0x88, 1, "l2i" , jvm_interp_l2i}, |
139 | {0x89, 1, "l2f" , jvm_interp_l2f}, |
140 | {0x8a, 1, "l2d" , jvm_interp_l2d}, |
141 | {0x8b, 1, "f2i" , jvm_interp_f2i}, |
142 | {0x8c, 1, "f2l" , jvm_interp_f2l}, |
143 | {0x8d, 1, "f2d" , jvm_interp_f2d}, |
144 | {0x8e, 1, "d2i" , jvm_interp_d2i}, |
145 | {0x8f, 1, "d2l" , jvm_interp_d2l}, |
146 | {0x90, 1, "d2f" , jvm_interp_d2f}, |
147 | {0x91, 1, "i2b" , jvm_interp_i2b}, |
148 | {0x92, 1, "i2c" , jvm_interp_i2c}, |
149 | {0x93, 1, "i2s" , jvm_interp_i2s}, |
150 | {0x94, 1, "lcmp" , jvm_interp_lcmp}, |
151 | {0x95, 1, "fcmpl" , jvm_interp_fcmpl}, |
152 | {0x96, 1, "fcmpg" , jvm_interp_fcmpg}, |
153 | {0x97, 1, "dcmpl" , jvm_interp_dcmpl}, |
154 | {0x98, 1, "dcmpg" , jvm_interp_dcmpg}, |
155 | {0x99, 1, "ifeq" , jvm_interp_ifeq}, |
156 | {0x9a, 1, "ifne" , jvm_interp_ifne}, |
157 | {0x9b, 1, "iflt" , jvm_interp_iflt}, |
158 | {0x9c, 1, "ifge" , jvm_interp_ifge}, |
159 | {0x9d, 1, "ifgt" , jvm_interp_ifgt}, |
160 | {0x9e, 1, "ifle" , jvm_interp_ifle}, |
161 | {0x9f, 1, "if_icmpeq" , jvm_interp_if_icmpeq}, |
162 | {0xa0, 1, "if_icmpne" , jvm_interp_if_icmpne}, |