Skip to main content

One post tagged with "bytecode"

View All Tags

each jvm bytecode implement in x86 with asm

· 7 min read

背景

想要了解jvm的bytecode 的汇编实现 ,目标平台是x86

汇编格式

同样一个汇编语句:将1赋值给rax

汇编有两种表达方式

desc/描述intelAT&T
将1写入rax寄存器mov eax,1movl $1,%eax
将rab+3 的地址的值写入raxmov eax,[ebx+3]movl 3(%ebx),%eax

stack frame

在x86 64 位的模式下 rbcp 是用r13 , 描述的是下一个指令,i = instruction
r14则存了本地变量指针

// Global Register Names
static const Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi);
static const Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi);

这里LP64_ONLY()和NOT_LP64()是通过宏_LP64来确定的

__LP64__
_LP64
These macros are defined, with value 1, if (and only if) the compilation is for a target where long int and pointer both use 64-bits and int uses 32-bit.

amd64 下面的寄存器

java的stack frame

寄存器含义、描述
r14存了本地变量的基地址
r13指向下一个执行的bytecode

类似c的堆栈,java 的栈如下:

stack

相关阅读

frame 用下面的结构描述

主要包括:

  • _sp :指向栈
  • _pc : 指向指令
jdk/src/hotspot/share/runtime/frame.hpp
class frame {
private:
// Instance variables:
intptr_t* _sp; // stack pointer (from Thread::last_Java_sp) , java 的stack 指针
address _pc; // program counter (the next instruction after the call) 下一个指令的指针

CodeBlob* _cb; // CodeBlob that "owns" pc
enum deopt_state {
not_deoptimized,
is_deoptimized,
unknown
};

deopt_state _deopt_state;

...

};

bytecode

enum TosState {         // describes the tos cache contents
btos = 0, // byte, bool tos cached
ztos = 1, // byte, bool tos cached
ctos = 2, // char tos cached
stos = 3, // short tos cached
itos = 4, // int tos cached
ltos = 5, // long tos cached
ftos = 6, // float tos cached
dtos = 7, // double tos cached
atos = 8, // object cached
vtos = 9, // tos not cached
number_of_states,
ilgl // illegal state: should not occur
};

iload

bytecodeenumasm
iload21
$65 = (address) 0x7fffe1012693 "A\017\266]\002\203\373\025\017\204J"
(gdb) x/20i 0x7fffe1012693
0x7fffe1012693: movzbl 0x2(%r13),%ebx
0x7fffe1012698: cmp $0x15,%ebx <--- 下一个bytecode
0x7fffe101269b: je 0x7fffe10126eb <-- 跳转到 done
0x7fffe10126a1: cmp $0xe0,%ebx <-- 判断下一个是否是_fast_iload
0x7fffe10126a7: mov $0xe1,%ecx <------ 下一个是_fast_iload 则重写成fast_iload2
0x7fffe10126ac: je 0x7fffe10126bd <-------- 跳转到rewrite label
0x7fffe10126ae: cmp $0x34,%ebx
0x7fffe10126b1: mov $0xe2,%ecx
0x7fffe10126b6: je 0x7fffe10126bd
0x7fffe10126b8: mov $0xe0,%ecx
0x7fffe10126bd: movzbl 0x0(%r13),%ebx
0x7fffe10126c2: cmp $0x15,%ebx
0x7fffe10126c5: je 0x7fffe10126e7
0x7fffe10126cb: cmp %ecx,%ebx
0x7fffe10126cd: je 0x7fffe10126e7
0x7fffe10126d3: movabs $0x7ffff74ef9d7,%rdi
0x7fffe10126dd: and $0xfffffffffffffff0,%rsp
0x7fffe10126e1: call 0x7ffff694f3c0 <_ZN14MacroAssembler7debug64EPclPl>
0x7fffe10126e6: hlt
0x7fffe10126e7: mov %cl,0x0(%r13)

源码分析

void TemplateTable::iload_internal(RewriteControl rc) {
transition(vtos, itos);
if (RewriteFrequentPairs && rc == may_rewrite) {
Label rewrite, done;
Register bc = r4;

// get next bytecode
__ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));

// if _iload, wait to rewrite to iload2. We only want to rewrite the
// last two iloads in a pair. Comparing against fast_iload means that
// the next bytecode is neither an iload or a caload, and therefore
// an iload pair.
__ cmpw(r1, Bytecodes::_iload); <--- 下一个bytecode
__ br(Assembler::EQ, done); <---- 跳转到done

// if _fast_iload rewrite to _fast_iload2
__ cmpw(r1, Bytecodes::_fast_iload); <-- 判断下一个是否是_fast_iload
__ movw(bc, Bytecodes::_fast_iload2); <------ 下一个是_fast_iload 则重写成fast_iload2
__ br(Assembler::EQ, rewrite); <-------- 跳转到rewrite label

// if _caload rewrite to _fast_icaload
__ cmpw(r1, Bytecodes::_caload);
__ movw(bc, Bytecodes::_fast_icaload);
__ br(Assembler::EQ, rewrite);

// else rewrite to _fast_iload
__ movw(bc, Bytecodes::_fast_iload);

// rewrite
// bc: new bytecode
__ bind(rewrite);
patch_bytecode(Bytecodes::_iload, bc, r1, false);
__ bind(done);

}

// do iload, get the local value into tos
locals_index(r1);
__ ldr(r0, iaddress(r1));

}

aconst_null

bytecodedescenum
aconst_nullpush a null reference onto the stack0x01
void TemplateTable::aconst_null() {
transition(vtos, atos);
__ xorl(rax, rax); // rax 就是栈顶
}

istore

bytecodedescenum
istoreStore int into local variable54, // 0x36

可以通过这个bytecode 了解怎么访问本地变量

void TemplateTable::istore() {
transition(itos, vtos); // 这里只是一个断言assert , 断言之前的状态是itos , 之后的状态是vtos , 实际上是由def来定义的
locals_index(rbx); // 将偏移 也就是index 写入rbx
__ movl(iaddress(rbx), rax); //iaddress 就是 rlocal + rbx 也就是获取最后的跳转地址 ,然后将rax写入偏移地址
}

这里iaddress(rbx) 其实是rlocals+rbx 的偏移,也就是相对于本地变量的偏移

static inline Address iaddress(Register r) {
return Address(rlocals, r, Address::times_ptr);
}

iaddress 的源码在这里: src\hotspot\cpu\x86\assembler_x86.hpp 调用顺序是iaddress -> Address

static inline Address iaddress(Register r) {
return Address(rlocals, r, Address::times_ptr);
}
Address(Register base, Register index, ScaleFactor scale, int disp = 0)
: _base (base),
_index(index),
_xmmindex(xnoreg),
_scale(scale),
_disp (disp),
_isxmmindex(false) {
assert(!index->is_valid() == (scale == Address::no_scale),
"inconsistent address");
}

def istore展开

前面不是看到transition(itos, vtos); , 这个transition只是一个类似测试时候的断言,真正是在def 处理的

  def(Bytecodes::_istore              , ubcp|____|clvm|____, itos, vtos, istore              ,  _           );

下面我们看看def展开,会慢慢展开成

void TemplateTable::def(Bytecodes::Code code, int flags, TosState in, TosState out, void (*gen)(int arg), int arg) {
...
Template* t = is_wide ? template_for_wide(code) : template_for(code);
// setup entry
t->initialize(flags, in, out, gen, arg);
assert(t->bytecode() == code, "just checkin'");
}

这里的 in 和out 会在TemplateInterpreterGenerator::generate_and_dispatch的时候使用


//------------------------------------------------------------------------------------------------------------------------

void TemplateInterpreterGenerator::generate_and_dispatch(Template* t, TosState tos_out) {
#ifndef PRODUCT
// debugging code
if (CountBytecodes || TraceBytecodes || StopInterpreterAt > 0) count_bytecode();
if (PrintBytecodeHistogram) histogram_bytecode(t);
if (PrintBytecodePairHistogram) histogram_bytecode_pair(t);
if (TraceBytecodes) trace_bytecode(t);
if (StopInterpreterAt > 0) stop_interpreter_at();
__ verify_FPU(1, t->tos_in());
#endif // !PRODUCT
int step = 0;
if (!t->does_dispatch()) {
step = t->is_wide() ? Bytecodes::wide_length_for(t->bytecode()) : Bytecodes::length_for(t->bytecode());
if (tos_out == ilgl) tos_out = t->tos_out();
// compute bytecode size
assert(step > 0, "just checkin'");
// setup stuff for dispatching next bytecode
if (ProfileInterpreter && VerifyDataPointer
&& MethodData::bytecode_has_profile(t->bytecode())) {
__ verify_method_data_pointer();
}
__ dispatch_prolog(tos_out, step);
}
// generate template
t->generate(_masm);
// advance
if (t->does_dispatch()) {
#ifdef ASSERT
// make sure execution doesn't go beyond this point if code is broken
__ should_not_reach_here();
#endif // ASSERT
} else {
// dispatch to next bytecode
__ dispatch_epilog(tos_out, step);
}
}

reference