在Java虚拟机中,方法调用是最核心的操作之一。本文将通过分析OpenJDK17的C++源码,深入剖析invokevirtual
字节码的执行全流程,重点揭示方法索引如何加载到RBX寄存器,以及最终如何执行目标方法。
一、方法索引加载:RBX寄存器的赋值过程
invokevirtual
字节码处理始于TemplateTable::invokevirtual
函数:
cpp
void TemplateTable::invokevirtual(int byte_no) { prepare_invoke(byte_no, rbx, noreg, rcx, rdx); // 关键调用 invokevirtual_helper(rbx, rcx, rdx); }
这里的关键是prepare_invoke
调用,其中第二个参数rbx
表示该寄存器将被用于存储方法索引值。实际索引加载发生在load_resolved_method_at_index
函数中:
cpp
void InterpreterMacroAssembler::load_resolved_method_at_index( int byte_no, Register method, Register cache, Register index) { movptr(method, Address(cache, index, Address::times_ptr, method_offset)); }
当method = rbx
且byte_no = f2_byte
(对应invokevirtual)时,这条movptr
指令将常量池中的方法索引值加载到RBX寄存器。整个加载过程经过四层调用栈:
-
invokevirtual
→ -
prepare_invoke
→ -
load_invoke_cp_cache_entry
→ -
load_resolved_method_at_index
二、虚方法分发:RBX索引到方法地址转换
获取索引后,invokevirtual_helper
函数处理虚方法分发:
cpp
void TemplateTable::invokevirtual_helper(Register index, Register recv, Register flags) { // 处理final方法 __ jump_from_interpreted(method, rax); // method=rbx // 处理普通虚方法 __ load_klass(rax, recv); __ lookup_virtual_method(rax, index, method); // 转换索引为Method* __ jump_from_interpreted(method, rdx); // method=rbx }
这里有两个关键点:
-
final方法:RBX直接包含Method*指针
-
虚方法:
lookup_virtual_method
将RBX中的vtable索引转换为Method*
cpp
void InterpreterMacroAssembler::lookup_virtual_method(Register recv_klass, Register vtable_index, Register method_result) { movptr(method_result, vtable_entry); // 将方法地址存入method_result(rbx) }
三、方法执行:跳转到目标代码
最终执行发生在jump_from_interpreted
函数:
cpp
void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { jmp(Address(method, Method::from_interpreted_offset())); }
当method = rbx
时,这条jmp
指令是实际执行RBX中方法的关键:
-
计算入口地址:
rbx + Method::from_interpreted_offset()
-
直接跳转到该地址执行
https://example.com/invokevirtual_flow.png
(图示:invokevirtual从索引加载到方法执行的完整流程)
四、关键偏移量:方法入口点定位
Method::from_interpreted_offset()
是方法执行的关键偏移量:
cpp
class Method : public Metadata { static int from_interpreted_offset() { return offset_of(Method, _from_interpreted_entry); } };
该偏移量指向Method对象中解释器入口点地址的存储位置。在64位系统中通常为128字节左右,通过这个固定偏移,JVM能快速定位任何方法的执行入口。
五、设计思想分析
这种分层设计体现了JVM解释器的核心思想:
-
解耦:字节码处理(TemplateTable)、方法分发(invokevirtual_helper)、实际执行(InterpreterMacroAssembler)分离
-
寄存器约定:RBX专用于方法索引/指针传递
-
性能优化:通过固定偏移量快速访问方法元数据
-
分支预测:final方法与虚方法采用不同执行路径
结论
在OpenJDK17中,invokevirtual
字节码的执行是高度优化的过程:
-
加载阶段:通过
movptr
将方法索引加载到RBX -
转换阶段:通过vtable将索引转换为Method*
-
执行阶段:通过
jmp
跳转到固定偏移量指定的入口地址
这种基于寄存器和固定偏移量的设计,使得JVM能在保持跨平台特性的同时,实现接近原生代码的执行效率。理解RBX寄存器在方法调用全流程中的核心作用,是深入掌握JVM解释器工作机制的关键。
##源码
void TemplateTable::invokevirtual(int byte_no) {
transition(vtos, vtos);
assert(byte_no == f2_byte, "use this argument");
prepare_invoke(byte_no,
rbx, // method or vtable index
noreg, // unused itable index
rcx, rdx); // recv, flags
// rbx: index
// rcx: receiver
// rdx: flags
invokevirtual_helper(rbx, rcx, rdx);
}
void TemplateTable::prepare_invoke(int byte_no,
Register method, // linked method (or i-klass)
Register index, // itable index, MethodType, etc.
Register recv, // if caller wants to see it
Register flags // if caller wants to test it
) {
// determine flags
const Bytecodes::Code code = bytecode();
const bool is_invokeinterface = code == Bytecodes::_invokeinterface;
const bool is_invokedynamic = code == Bytecodes::_invokedynamic;
const bool is_invokehandle = code == Bytecodes::_invokehandle;
const bool is_invokevirtual = code == Bytecodes::_invokevirtual;
const bool is_invokespecial = code == Bytecodes::_invokespecial;
const bool load_receiver = (recv != noreg);
const bool save_flags = (flags != noreg);
assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
assert(flags == noreg || flags == rdx, "");
assert(recv == noreg || recv == rcx, "");
// setup registers & access constant pool cache
if (recv == noreg) recv = rcx;
if (flags == noreg) flags = rdx;
assert_different_registers(method, index, recv, flags);
// save 'interpreter return address'
__ save_bcp();
load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
// maybe push appendix to arguments (just before return address)
if (is_invokedynamic || is_invokehandle) {
Label L_no_push;
__ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
__ jcc(Assembler::zero, L_no_push);
// Push the appendix as a trailing parameter.
// This must be done before we get the receiver,
// since the parameter_size includes it.
__ push(rbx);
__ mov(rbx, index);
__ load_resolved_reference_at_index(index, rbx);
__ pop(rbx);
__ push(index); // push appendix (MethodType, CallSite, etc.)
__ bind(L_no_push);
}
// load receiver if needed (after appendix is pushed so parameter size is correct)
// Note: no return address pushed yet
if (load_receiver) {
__ movl(recv, flags);
__ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
const int no_return_pc_pushed_yet = -1; // argument slot correction before we push return address
const int receiver_is_at_end = -1; // back off one slot to get receiver
Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
__ movptr(recv, recv_addr);
__ verify_oop(recv);
}
if (save_flags) {
__ movl(rbcp, flags);
}
// compute return type
__ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
// Make sure we don't need to mask flags after the above shift
ConstantPoolCacheEntry::verify_tos_state_shift();
// load return address
{
const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
ExternalAddress table(table_addr);
LP64_ONLY(__ lea(rscratch1, table));
LP64_ONLY(__ movptr(flags, Address(rscratch1, flags, Address::times_ptr)));
NOT_LP64(__ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr))));
}
// push return address
__ push(flags);
// Restore flags value from the constant pool cache, and restore rsi
// for later null checks. r13 is the bytecode pointer
if (save_flags) {
__ movl(flags, rbcp);
__ restore_bcp();
}
}
void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
Register method,
Register itable_index,
Register flags,
bool is_invokevirtual,
bool is_invokevfinal, /*unused*/
bool is_invokedynamic) {
// setup registers
const Register cache = rcx;
const Register index = rdx;
assert_different_registers(method, flags);
assert_different_registers(method, cache, index);
assert_different_registers(itable_index, flags);
assert_different_registers(itable_index, cache, index);
// determine constant pool cache field offsets
assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
ConstantPoolCacheEntry::flags_offset());
// access constant pool cache fields
const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
ConstantPoolCacheEntry::f2_offset());
size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
resolve_cache_and_index(byte_no, cache, index, index_size);
__ load_resolved_method_at_index(byte_no, method, cache, index);
if (itable_index != noreg) {
// pick up itable or appendix index from f2 also:
__ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
}
__ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
}
void TemplateTable::resolve_cache_and_index(int byte_no,
Register cache,
Register index,
size_t index_size) {
const Register temp = rbx;
assert_different_registers(cache, index, temp);
Label L_clinit_barrier_slow;
Label resolved;
Bytecodes::Code code = bytecode();
switch (code) {
case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
default: break;
}
assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
__ get_cache_and_index_and_bytecode_at_bcp(cache, index, temp, byte_no, 1, index_size);
__ cmpl(temp, code); // have we resolved this bytecode?
__ jcc(Assembler::equal, resolved);
// resolve first time through
// Class initialization barrier slow path lands here as well.
__ bind(L_clinit_barrier_slow);
// std::cout << "@@@@yym%%%%" << "method begin" << "----begin" << std::endl;
address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
__ movl(temp, code);
__ call_VM(noreg, entry, temp);
// std::cout << "@@@@yym%%%%" << "method end" << "----end" << std::endl;
// Update registers with resolved info
__ get_cache_and_index_at_bcp(cache, index, 1, index_size);
__ bind(resolved);
// Class initialization barrier for static methods
if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
const Register method = temp;
const Register klass = temp;
const Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
assert(thread != noreg, "x86_32 not supported");
__ load_resolved_method_at_index(byte_no, method, cache, index);
__ load_method_holder(klass, method);
__ clinit_barrier(klass, thread, NULL /*L_fast_path*/, &L_clinit_barrier_slow);
}
}
void InterpreterMacroAssembler::load_resolved_method_at_index(int byte_no,
Register method,
Register cache,
Register index) {
assert_different_registers(cache, index);
const int method_offset = in_bytes(
ConstantPoolCache::base_offset() +
((byte_no == TemplateTable::f2_byte)
? ConstantPoolCacheEntry::f2_offset()
: ConstantPoolCacheEntry::f1_offset()));
movptr(method, Address(cache, index, Address::times_ptr, method_offset)); // get f1 Method*
}
void TemplateTable::invokevirtual_helper(Register index,
Register recv,
Register flags) {
// Uses temporary registers rax, rdx
assert_different_registers(index, recv, rax, rdx);
assert(index == rbx, "");
assert(recv == rcx, "");
// Test for an invoke of a final method
Label notFinal;
__ movl(rax, flags);
__ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
__ jcc(Assembler::zero, notFinal);
const Register method = index; // method must be rbx
assert(method == rbx,
"Method* must be rbx for interpreter calling convention");
// do the call - the index is actually the method to call
// that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
// It's final, need a null check here!
__ null_check(recv);
// profile this call
__ profile_final_call(rax);
__ profile_arguments_type(rax, method, rbcp, true);
__ jump_from_interpreted(method, rax);
__ bind(notFinal);
// get receiver klass
__ null_check(recv, oopDesc::klass_offset_in_bytes());
Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
__ load_klass(rax, recv, tmp_load_klass);
// profile this call
__ profile_virtual_call(rax, rlocals, rdx);
// get target Method* & entry point
__ lookup_virtual_method(rax, index, method);
__ profile_arguments_type(rdx, method, rbcp, true);
__ jump_from_interpreted(method, rdx);
}
// Jump to from_interpreted entry of a call unless single stepping is possible
// in this thread in which case we must call the i2i entry
void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
prepare_to_jump_from_interpreted();
if (JvmtiExport::can_post_interpreter_events()) {
Label run_compiled_code;
// JVMTI events, such as single-stepping, are implemented partly by avoiding running
// compiled code in threads for which the event is enabled. Check here for
// interp_only_mode if these events CAN be enabled.
// interp_only is an int, on little endian it is sufficient to test the byte only
// Is a cmpl faster?
LP64_ONLY(temp = r15_thread;)
NOT_LP64(get_thread(temp);)
cmpb(Address(temp, JavaThread::interp_only_mode_offset()), 0);
jccb(Assembler::zero, run_compiled_code);
jmp(Address(method, Method::interpreter_entry_offset()));
bind(run_compiled_code);
}
jmp(Address(method, Method::from_interpreted_offset()));
}