背景
想要了解jvm的bytecode 的汇编实现 ,目标平台是x86
汇编格式
同样一个汇编语句:将1赋值给rax
汇编有两种表达方式
desc/描述 | intel | AT&T |
---|---|---|
将1写入rax寄存器 | mov eax,1 | movl $1,%eax |
将rab+3 的地址的值写入rax | mov eax,[ebx+3] | movl 3(%ebx),%eax |
stack frame
在x86 64 位的模式下 rbcp
是用r13
, 描述的是下一个指令,i = instruction
r14
则存了本地变量指针
// Global Register Names
static const Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi);
static const Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi);
这里LP64_ONLY()和NOT_LP64()是通过宏_LP64来确定的
__LP64__
_LP64
These macros are defined, with value 1, if (and only if) the compilation is for a target where long int and pointer both use 64-bits and int uses 32-bit.
amd64 下面的寄存器
java的stack frame
寄存器 | 含义、描述 |
---|---|
r14 | 存了本地变量的基地址 |
r13 | 指向下一个执行的bytecode |
类似c的堆栈,java 的栈如下:
frame 用下面的结构描述
主要包括:
- _sp :指向栈
- _pc : 指向指令
jdk/src/hotspot/share/runtime/frame.hpp
class frame {
private:
// Instance variables:
intptr_t* _sp; // stack pointer (from Thread::last_Java_sp) , java 的stack 指针
address _pc; // program counter (the next instruction after the call) 下一个指令的指针
CodeBlob* _cb; // CodeBlob that "owns" pc
enum deopt_state {
not_deoptimized,
is_deoptimized,
unknown
};
deopt_state _deopt_state;
...
};
bytecode
enum TosState { // describes the tos cache contents
btos = 0, // byte, bool tos cached
ztos = 1, // byte, bool tos cached
ctos = 2, // char tos cached
stos = 3, // short tos cached
itos = 4, // int tos cached
ltos = 5, // long tos cached
ftos = 6, // float tos cached
dtos = 7, // double tos cached
atos = 8, // object cached
vtos = 9, // tos not cached
number_of_states,
ilgl // illegal state: should not occur
};
iload
bytecode | enum | asm |
---|---|---|
iload | 21 |
$65 = (address) 0x7fffe1012693 "A\017\266]\002\203\373\025\017\204J"
(gdb) x/20i 0x7fffe1012693
0x7fffe1012693: movzbl 0x2(%r13),%ebx
0x7fffe1012698: cmp $0x15,%ebx <--- 下一个bytecode
0x7fffe101269b: je 0x7fffe10126eb <-- 跳转到 done
0x7fffe10126a1: cmp $0xe0,%ebx <-- 判断下一个是否是_fast_iload
0x7fffe10126a7: mov $0xe1,%ecx <------ 下一个是_fast_iload 则重写成fast_iload2
0x7fffe10126ac: je 0x7fffe10126bd <-------- 跳转到rewrite label
0x7fffe10126ae: cmp $0x34,%ebx
0x7fffe10126b1: mov $0xe2,%ecx
0x7fffe10126b6: je 0x7fffe10126bd
0x7fffe10126b8: mov $0xe0,%ecx
0x7fffe10126bd: movzbl 0x0(%r13),%ebx
0x7fffe10126c2: cmp $0x15,%ebx
0x7fffe10126c5: je 0x7fffe10126e7
0x7fffe10126cb: cmp %ecx,%ebx
0x7fffe10126cd: je 0x7fffe10126e7
0x7fffe10126d3: movabs $0x7ffff74ef9d7,%rdi
0x7fffe10126dd: and $0xfffffffffffffff0,%rsp
0x7fffe10126e1: call 0x7ffff694f3c0 <_ZN14MacroAssembler7debug64EPclPl>
0x7fffe10126e6: hlt
0x7fffe10126e7: mov %cl,0x0(%r13)
源码分析
void TemplateTable::iload_internal(RewriteControl rc) {
transition(vtos, itos);
if (RewriteFrequentPairs && rc == may_rewrite) {
Label rewrite, done;
Register bc = r4;
// get next bytecode
__ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
// if _iload, wait to rewrite to iload2. We only want to rewrite the
// last two iloads in a pair. Comparing against fast_iload means that
// the next bytecode is neither an iload or a caload, and therefore
// an iload pair.
__ cmpw(r1, Bytecodes::_iload); <--- 下一个bytecode
__ br(Assembler::EQ, done); <---- 跳转到done
// if _fast_iload rewrite to _fast_iload2
__ cmpw(r1, Bytecodes::_fast_iload); <-- 判断下一个是否是_fast_iload
__ movw(bc, Bytecodes::_fast_iload2); <------ 下一个是_fast_iload 则重写成fast_iload2
__ br(Assembler::EQ, rewrite); <-------- 跳转到rewrite label
// if _caload rewrite to _fast_icaload
__ cmpw(r1, Bytecodes::_caload);
__ movw(bc, Bytecodes::_fast_icaload);
__ br(Assembler::EQ, rewrite);
// else rewrite to _fast_iload
__ movw(bc, Bytecodes::_fast_iload);
// rewrite
// bc: new bytecode
__ bind(rewrite);
patch_bytecode(Bytecodes::_iload, bc, r1, false);
__ bind(done);
}
// do iload, get the local value into tos
locals_index(r1);
__ ldr(r0, iaddress(r1));
}
aconst_null
bytecode | desc | enum |
---|---|---|
aconst_null | push a null reference onto the stack | 0x01 |
void TemplateTable::aconst_null() {
transition(vtos, atos);
__ xorl(rax, rax); // rax 就是栈顶
}
istore
bytecode | desc | enum |
---|---|---|
istore | Store int into local variable | 54, // 0x36 |
可以通过这个bytecode 了解怎么访问本地变量
void TemplateTable::istore() {
transition(itos, vtos); // 这里只是一个断言assert , 断言之前的状态是itos , 之后的状态是vtos , 实际上是由def来定义的
locals_index(rbx); // 将偏移 也就是index 写入rbx
__ movl(iaddress(rbx), rax); //iaddress 就是 rlocal + rbx 也就是获取最后的跳转地址 ,然后将rax写入偏移地址
}
这里iaddress(rbx)
其实是rlocals+rbx 的偏移,也就是相对于本地变量的偏移
static inline Address iaddress(Register r) {
return Address(rlocals, r, Address::times_ptr);
}
iaddress
的源码在这里: src\hotspot\cpu\x86\assembler_x86.hpp
调用顺序是iaddress
-> Address
static inline Address iaddress(Register r) {
return Address(rlocals, r, Address::times_ptr);
}
Address(Register base, Register index, ScaleFactor scale, int disp = 0)
: _base (base),
_index(index),
_xmmindex(xnoreg),
_scale(scale),
_disp (disp),
_isxmmindex(false) {
assert(!index->is_valid() == (scale == Address::no_scale),
"inconsistent address");
}
def istore展开
前面不是看到transition(itos, vtos);
, 这个transition
只是一个类似测试时候的断言,真正是在def
处理的
def(Bytecodes::_istore , ubcp|____|clvm|____, itos, vtos, istore , _ );
下面我们看看def
展开,会慢慢展开成
void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(int arg), int arg) {
...
Template* t = is_wide ? template_for_wide(code) : template_for(code);
// setup entry
t->initialize(flags, in, out, gen, arg);
assert(t->bytecode() == code, "just checkin'");
}
这里的 in 和out 会在TemplateInterpreterGenerator::generate_and_dispatch
的时候使用
//------------------------------------------------------------------------------------------------------------------------
void TemplateInterpreterGenerator::generate_and_dispatch(Template* t, TosState tos_out) {
#ifndef PRODUCT
// debugging code
if (CountBytecodes || TraceBytecodes || StopInterpreterAt > 0) count_bytecode();
if (PrintBytecodeHistogram) histogram_bytecode(t);
if (PrintBytecodePairHistogram) histogram_bytecode_pair(t);
if (TraceBytecodes) trace_bytecode(t);
if (StopInterpreterAt > 0) stop_interpreter_at();
__ verify_FPU(1, t->tos_in());
#endif // !PRODUCT
int step = 0;
if (!t->does_dispatch()) {
step = t->is_wide() ? Bytecodes::wide_length_for(t->bytecode()) : Bytecodes::length_for(t->bytecode());
if (tos_out == ilgl) tos_out = t->tos_out();
// compute bytecode size
assert(step > 0, "just checkin'");
// setup stuff for dispatching next bytecode
if (ProfileInterpreter && VerifyDataPointer
&& MethodData::bytecode_has_profile(t->bytecode())) {
__ verify_method_data_pointer();
}
__ dispatch_prolog(tos_out, step);
}
// generate template
t->generate(_masm);
// advance
if (t->does_dispatch()) {
#ifdef ASSERT
// make sure execution doesn't go beyond this point if code is broken
__ should_not_reach_here();
#endif // ASSERT
} else {
// dispatch to next bytecode
__ dispatch_epilog(tos_out, step);
}
}