Skip to content

each jvm bytecode implement in x86 with asm

Posted on:March 9, 2023 at 07:15 AM

背景

想要了解jvm的bytecode 的汇编实现 ,目标平台是x86

汇编格式

同样一个汇编语句:将1赋值给rax

汇编有两种表达方式

desc/描述intelAT&T
将1写入rax寄存器mov eax,1movl $1,%eax
将rab+3 的地址的值写入raxmov eax,[ebx+3]movl 3(%ebx),%eax

stack frame

在x86 64 位的模式下 rbcp 是用r13 , 描述的是下一个指令,i = instruction
r14则存了本地变量指针

// Global Register Names
static const Register rbcp     = LP64_ONLY(r13) NOT_LP64(rsi);
static const Register rlocals  = LP64_ONLY(r14) NOT_LP64(rdi);

这里LP64_ONLY()和NOT_LP64()是通过宏_LP64来确定的

__LP64__
_LP64
These macros are defined, with value 1, if (and only if) the compilation is for a target where long int and pointer both use 64-bits and int uses 32-bit.

amd64 下面的寄存器

java的stack frame

寄存器含义、描述
r14存了本地变量的基地址
r13指向下一个执行的bytecode

类似c的堆栈,java 的栈如下:

stack

相关阅读

frame 用下面的结构描述

主要包括:

jdk/src/hotspot/share/runtime/frame.hpp
class frame {
 private:
  // Instance variables:
  intptr_t* _sp; // stack pointer (from Thread::last_Java_sp)  , java 的stack 指针
  address   _pc; // program counter (the next instruction after the call)   下一个指令的指针

  CodeBlob* _cb; // CodeBlob that "owns" pc
  enum deopt_state {
    not_deoptimized,
    is_deoptimized,
    unknown
  };

  deopt_state _deopt_state;

...

};

bytecode

enum TosState {         // describes the tos cache contents
  btos = 0,             // byte, bool tos cached
  ztos = 1,             // byte, bool tos cached
  ctos = 2,             // char tos cached
  stos = 3,             // short tos cached
  itos = 4,             // int tos cached
  ltos = 5,             // long tos cached
  ftos = 6,             // float tos cached
  dtos = 7,             // double tos cached
  atos = 8,             // object cached
  vtos = 9,             // tos not cached
  number_of_states,
  ilgl                  // illegal state: should not occur
};

iload

bytecodeenumasm
iload21
$65 = (address) 0x7fffe1012693 "A\017\266]\002\203\373\025\017\204J"
(gdb) x/20i 0x7fffe1012693
   0x7fffe1012693:	movzbl 0x2(%r13),%ebx
   0x7fffe1012698:	cmp    $0x15,%ebx            <---  下一个bytecode
   0x7fffe101269b:	je     0x7fffe10126eb        <--   跳转到 done
   0x7fffe10126a1:	cmp    $0xe0,%ebx             <-- 判断下一个是否是_fast_iload
   0x7fffe10126a7:	mov    $0xe1,%ecx              <------ 下一个是_fast_iload 则重写成fast_iload2
   0x7fffe10126ac:	je     0x7fffe10126bd           <--------  跳转到rewrite label
   0x7fffe10126ae:	cmp    $0x34,%ebx
   0x7fffe10126b1:	mov    $0xe2,%ecx
   0x7fffe10126b6:	je     0x7fffe10126bd
   0x7fffe10126b8:	mov    $0xe0,%ecx
   0x7fffe10126bd:	movzbl 0x0(%r13),%ebx
   0x7fffe10126c2:	cmp    $0x15,%ebx
   0x7fffe10126c5:	je     0x7fffe10126e7
   0x7fffe10126cb:	cmp    %ecx,%ebx
   0x7fffe10126cd:	je     0x7fffe10126e7
   0x7fffe10126d3:	movabs $0x7ffff74ef9d7,%rdi
   0x7fffe10126dd:	and    $0xfffffffffffffff0,%rsp
   0x7fffe10126e1:	call   0x7ffff694f3c0 <_ZN14MacroAssembler7debug64EPclPl>
   0x7fffe10126e6:	hlt    
   0x7fffe10126e7:	mov    %cl,0x0(%r13)

源码分析

void TemplateTable::iload_internal(RewriteControl rc) {
  transition(vtos, itos);
  if (RewriteFrequentPairs && rc == may_rewrite) {
    Label rewrite, done;
    Register bc = r4;

    // get next bytecode
    __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));     

    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
    // last two iloads in a pair.  Comparing against fast_iload means that
    // the next bytecode is neither an iload or a caload, and therefore
    // an iload pair.
    __ cmpw(r1, Bytecodes::_iload);         <--- 下一个bytecode
    __ br(Assembler::EQ, done);             <---- 跳转到done

    // if _fast_iload rewrite to _fast_iload2
    __ cmpw(r1, Bytecodes::_fast_iload);       <-- 判断下一个是否是_fast_iload
    __ movw(bc, Bytecodes::_fast_iload2);    <------ 下一个是_fast_iload 则重写成fast_iload2
    __ br(Assembler::EQ, rewrite);           <--------  跳转到rewrite label  

    // if _caload rewrite to _fast_icaload
    __ cmpw(r1, Bytecodes::_caload);
    __ movw(bc, Bytecodes::_fast_icaload);
    __ br(Assembler::EQ, rewrite);

    // else rewrite to _fast_iload
    __ movw(bc, Bytecodes::_fast_iload);

    // rewrite
    // bc: new bytecode
    __ bind(rewrite);
    patch_bytecode(Bytecodes::_iload, bc, r1, false);
    __ bind(done);

  }

  // do iload, get the local value into tos
  locals_index(r1);
  __ ldr(r0, iaddress(r1));

}

aconst_null

bytecodedescenum
aconst_nullpush a null reference onto the stack0x01
void TemplateTable::aconst_null() {
  transition(vtos, atos);
  __ xorl(rax, rax);   // rax 就是栈顶
}

istore

bytecodedescenum
istoreStore int into local variable54, // 0x36

可以通过这个bytecode 了解怎么访问本地变量

void TemplateTable::istore() {
  transition(itos, vtos);   // 这里只是一个断言assert , 断言之前的状态是itos , 之后的状态是vtos , 实际上是由def来定义的
  locals_index(rbx); // 将偏移 也就是index 写入rbx
  __ movl(iaddress(rbx), rax); //iaddress 就是 rlocal + rbx 也就是获取最后的跳转地址 ,然后将rax写入偏移地址
}

这里iaddress(rbx) 其实是rlocals+rbx 的偏移,也就是相对于本地变量的偏移

static inline Address iaddress(Register r) {
  return Address(rlocals, r, Address::times_ptr);
}

iaddress 的源码在这里: src\hotspot\cpu\x86\assembler_x86.hpp 调用顺序是iaddress -> Address

static inline Address iaddress(Register r) {
  return Address(rlocals, r, Address::times_ptr);
}
  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
    : _base (base),
      _index(index),
      _xmmindex(xnoreg),
      _scale(scale),
      _disp (disp),
      _isxmmindex(false) {
    assert(!index->is_valid() == (scale == Address::no_scale),
           "inconsistent address");
  }

def istore展开

前面不是看到transition(itos, vtos); , 这个transition只是一个类似测试时候的断言,真正是在def 处理的

  def(Bytecodes::_istore              , ubcp|____|clvm|____, itos, vtos, istore              ,  _           );

下面我们看看def展开,会慢慢展开成

void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(int arg), int arg) {
  ...
  Template* t = is_wide ? template_for_wide(code) : template_for(code);
  // setup entry
  t->initialize(flags, in, out, gen, arg);
  assert(t->bytecode() == code, "just checkin'");
}

这里的 in 和out 会在TemplateInterpreterGenerator::generate_and_dispatch的时候使用


//------------------------------------------------------------------------------------------------------------------------

void TemplateInterpreterGenerator::generate_and_dispatch(Template* t, TosState tos_out) {
#ifndef PRODUCT
  // debugging code
  if (CountBytecodes || TraceBytecodes || StopInterpreterAt > 0) count_bytecode();
  if (PrintBytecodeHistogram)                                    histogram_bytecode(t);
  if (PrintBytecodePairHistogram)                                histogram_bytecode_pair(t);
  if (TraceBytecodes)                                            trace_bytecode(t);
  if (StopInterpreterAt > 0)                                     stop_interpreter_at();
  __ verify_FPU(1, t->tos_in());
#endif // !PRODUCT
  int step = 0;
  if (!t->does_dispatch()) {
    step = t->is_wide() ? Bytecodes::wide_length_for(t->bytecode()) : Bytecodes::length_for(t->bytecode());
    if (tos_out == ilgl) tos_out = t->tos_out();
    // compute bytecode size
    assert(step > 0, "just checkin'");
    // setup stuff for dispatching next bytecode
    if (ProfileInterpreter && VerifyDataPointer
        && MethodData::bytecode_has_profile(t->bytecode())) {
      __ verify_method_data_pointer();
    }
    __ dispatch_prolog(tos_out, step);
  }
  // generate template
  t->generate(_masm);
  // advance
  if (t->does_dispatch()) {
#ifdef ASSERT
    // make sure execution doesn't go beyond this point if code is broken
    __ should_not_reach_here();
#endif // ASSERT
  } else {
    // dispatch to next bytecode
    __ dispatch_epilog(tos_out, step);
  }
}

reference