7

JVM之创建对象源码分析

 3 years ago
source link: https://zhuanlan.zhihu.com/p/34213112
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

JVM之创建对象源码分析

中华儿女多奇志,不爱无码爱代码

欢迎关注我的专栏:半栈工程师

之前对Java对象的创建一直都是概念上的了解,没有在源码层面进行过分析,这段时间在看HotSpot,就顺便了解了下JVM究竟是如何创建Java对象的。

一:Java对象创建流程

  1. 检查对象所属类是否已经被加载解析;
  2. 为对象分配内存空间;
  3. 将分配给对象的内存初始化为零值;
  4. 执行对象的<init>方法,用来初始化对象。

我之前收藏过一张图,忘记出处了,此处引用下:

v2-30cd25f9d2d01cb61568f489b2dbcb1b_720w.jpg

二:对象创建源码分析

2.1:了解对象创建指令

我们先从一个简单的Demo入手:

public class Test {

    public static void main(String[] args) {
        Dog dog = new Dog();
    }

    static class Dog{
        int age;
    }
}

上面代码很简单,在main()中创建了一个Dog对象,写这个Demo是为了让我们看看new Dog()在编译成字节码后会变成什么。

public static void main(java.lang.String[]);
    Code:
       0: new           #2                  // class com/wangxiandeng/test/Test$Dog
       3: dup
       4: invokespecial #3                  // Method com/wangxiandeng/test/Test$Dog."<init>":()V
       7: astore_1
       8: return
}

可见new Dog()变成了new #2,new 是java众多字节码中用来实例化对象的字节码,不用我说大家肯定也清楚,关键后面的 #2 是个啥?

类在编译成字节码时,会生成类所属的常量池,常量池中记录了各种符号引用及常量,#2 其实就是常量池中索引为2的常量项,此处指向的是Dog类的符号。

2.2:new指令源码分析

上面已经对类创建的字节码进行了简单介绍,我们已经知道了用于对象创建的字节码指令为new,接下来就可以对new指令进行源码分析了。

在我上一篇博客中对java字节码指令的运行进行了介绍,主要讲的是模板解释器,今天我们仍然对模板解释器中new指令的运行进行讲解,不清楚模板解释器的读者可以看看《JVM之模板解释器》

我们先来看看new指令对应的汇编代码,代码很长,我们稍后会进行逐步分析,不想直接看代码的同学可以先跳过。

/hotspot/src/cpu/x86/vm/templateTable_x86.cpp

void TemplateTable::_new() {
  transition(vtos, atos);
  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
  Label slow_case;
  Label slow_case_no_pop;
  Label done;
  Label initialize_header;
  Label initialize_object;  // including clearing the fields
  Label allocate_shared;

  __ get_cpool_and_tags(rcx, rax);

  // Make sure the class we're about to instantiate has been resolved.
  // This is done before loading InstanceKlass to be consistent with the order
  // how Constant Pool is updated (see ConstantPool::klass_at_put)
  const int tags_offset = Array<u1>::base_offset_in_bytes();
  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
  __ jcc(Assembler::notEqual, slow_case_no_pop);

  // get InstanceKlass
  __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(ConstantPool)));
  __ push(rcx);  // save the contexts of klass for initializing the header

  // make sure klass is initialized & doesn't have finalizer
  // make sure klass is fully initialized
  __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
  __ jcc(Assembler::notEqual, slow_case);

  // get instance_size in InstanceKlass (scaled to a count of bytes)
  __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
  // test to see if it has a finalizer or is malformed in some way
  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
  __ jcc(Assembler::notZero, slow_case);

  //
  // Allocate the instance
  // 1) Try to allocate in the TLAB
  // 2) if fail and the object is large allocate in the shared Eden
  // 3) if the above fails (or is not applicable), go to a slow case
  // (creates a new TLAB, etc.)

  const bool allow_shared_alloc =
    Universe::heap()->supports_inline_contig_alloc();

  const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
#ifndef _LP64
  if (UseTLAB || allow_shared_alloc) {
    __ get_thread(thread);
  }
#endif // _LP64

  if (UseTLAB) {
    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
    __ lea(rbx, Address(rax, rdx, Address::times_1));
    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
    if (ZeroTLAB) {
      // the fields have been already cleared
      __ jmp(initialize_header);
    } else {
      // initialize both the header and fields
      __ jmp(initialize_object);
    }
  }

  // Allocation in the shared Eden, if allowed.
  //
  // rdx: instance size in bytes
  if (allow_shared_alloc) {
    __ bind(allocate_shared);

    ExternalAddress heap_top((address)Universe::heap()->top_addr());
    ExternalAddress heap_end((address)Universe::heap()->end_addr());

    Label retry;
    __ bind(retry);
    __ movptr(rax, heap_top);
    __ lea(rbx, Address(rax, rdx, Address::times_1));
    __ cmpptr(rbx, heap_end);
    __ jcc(Assembler::above, slow_case);

    // Compare rax, with the top addr, and if still equal, store the new
    // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
    //
    // rax,: object begin
    // rbx,: object end
    // rdx: instance size in bytes
    __ locked_cmpxchgptr(rbx, heap_top);

    // if someone beat us on the allocation, try again, otherwise continue
    __ jcc(Assembler::notEqual, retry);

    __ incr_allocated_bytes(thread, rdx, 0);
  }

  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
    // The object is initialized before the header.  If the object size is
    // zero, go directly to the header initialization.
    __ bind(initialize_object);
    __ decrement(rdx, sizeof(oopDesc));
    __ jcc(Assembler::zero, initialize_header);

    // Initialize topmost object field, divide rdx by 8, check if odd and
    // test if zero.
    __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
    __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd

    // rdx must have been multiple of 8
#ifdef ASSERT
    // make sure rdx was multiple of 8
    Label L;
    // Ignore partial flag stall after shrl() since it is debug VM
    __ jccb(Assembler::carryClear, L);
    __ stop("object size is not multiple of 2 - adjust this code");
    __ bind(L);
    // rdx must be > 0, no extra check needed here
#endif

    // initialize remaining object fields: rdx was a multiple of 8
    { Label loop;
    __ bind(loop);
    __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
    __ decrement(rdx);
    __ jcc(Assembler::notZero, loop);
    }

    // initialize object header only.
    __ bind(initialize_header);
    if (UseBiasedLocking) {
      __ pop(rcx);   // get saved klass back in the register.
      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
    } else {
      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
                (intptr_t)markOopDesc::prototype()); // header
      __ pop(rcx);   // get saved klass back in the register.
    }
#ifdef _LP64
    __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
    __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
#endif
    __ store_klass(rax, rcx);  // klass

    {
      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
      // Trigger dtrace event for fastpath
      __ push(atos);
      __ call_VM_leaf(
           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
      __ pop(atos);
    }

    __ jmp(done);
  }

  // slow case
  __ bind(slow_case);
  __ pop(rcx);   // restore stack pointer to what it was when we came in.
  __ bind(slow_case_no_pop);

  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
  Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);

  __ get_constant_pool(rarg1);
  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2);
   __ verify_oop(rax);

  // continue
  __ bind(done);
}

下面我们来逐步看看上面这些代码主要干了啥。

1:获取new指令后的操作数,即类在常量池的索引,放入rdx寄存器中。bcp即rsi寄存器,用来记录当前解释器运行的字节码指令地址,类似SS:IP寄存器,用来进行pc计数。这个方法主要就是获取当前运行指令地址偏移一个字节处内容。

__ get_unsigned_2_byte_index_at_bcp(rdx, 1);

2:获取常量池首地址放入rcx寄存器,获取常量池中元素类型数组_tags首地址,放入rax中。_tags数组按顺序存放了每个常量池元素的类型。

__ get_cpool_and_tags(rcx, rax);

3:判断_tags数组中对应元素类型是否为JVM_CONSTANT_Class,不是则跳往slow_case_no_pop处。

const int tags_offset = Array<u1>::base_offset_in_bytes();
__ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
__ jcc(Assembler::notEqual, slow_case_no_pop);

4:获取创建对象所属类地址,放入rcx中,即类的运行时数据结构InstanceKlass,并将其入栈。

__ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(ConstantPool)));
__ push(rcx);  // save the contexts of klass for initializing the header

5:判断类是否已经被解析过,没有解析的话直接跳往slow_close,slow_case即慢速分配,如果对象所属类已经被解析过,则会进入快速分配,否则会进入慢速分配,去进行类的解析。

__ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
__ jcc(Assembler::notEqual, slow_case);

6:此时rcx中存放的是类InstanceKlass的内存地址,利用偏移获取类实例大小,存入rdx寄存器,对象的大小早在类加载时就已经确定了。

__ movl(rdx, Address(rcx, Klass::layout_helper_offset()));

7:尝试在TLAB区为对象分配内存,TLAB即ThreadLocalAllocationBuffers(线程局部分配缓存)。每个线程都有自己的一块内存区域,用于分配对象,这块内存区域便为TLAB区。这样的好处是在分配内存时,无需对一整块内存进行加锁。TLAB只是在分配对象时的操作属于线程私有,分配的对象对于其他线程仍是可读的。

if (UseTLAB) {
    // 获取TLAB区剩余空间首地址,放入rax寄存器。
    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
    // rdx寄存器已经记录了对象大小,此处及根据TLAB空闲区首地址,计算出对象分配后,对象尾地址,放入rbx中
    __ lea(rbx, Address(rax, rdx, Address::times_1));
    // 将rbx中内容与TLAB空闲区尾地址进行比较。
    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
    // 如果上面比较结果表明rbx > TLAB空闲区尾地址,则表明TLAB区空闲区大小不足以分配该对象,那么在allow_shared_alloc(允许在Eden区分配)情况下,就直接跳往Eden区分配内存标号处运行,即第8步
    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
   // 因为对象分配后,TLAB区空间变小,此处更新TLAB空闲区首地址为对象尾地址
    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
   // 如果TLAB区默认会对回收的空闲区清零,那么就不需要在为对象变量进行清零操作了,直接跳往对象头初始化处运行。有同学可能会问为什么要进行清零操作呢?因为分配的内存可能还保留着上次分配给其他对象时的数据,内存块虽然被回收了,但是之前的数据没有被清除,会污染新对象。
   if (ZeroTLAB) {
      // the fields have been already cleared
      __ jmp(initialize_header);
   } else {
      // initialize both the header and fields
      __ jmp(initialize_object);
   }
}

8:如果在TLAB区分配失败,会直接在Eden区进行分配,具体过程和第7步很像。

if (allow_shared_alloc) {
    // TLAB区分配失败会跳到这。
    __ bind(allocate_shared);
    // 获取Eden区剩余空间的首地址和结束地址。
    ExternalAddress heap_top((address)Universe::heap()->top_addr());
    ExternalAddress heap_end((address)Universe::heap()->end_addr());

    Label retry;
    __ bind(retry);
    
    // 将空闲区首地址放入rax中,用作对象分配开始处。
    __ movptr(rax, heap_top);
    
    // 计算对象尾地址,与空闲区尾地址进行比较,内存不足则跳往慢速分配。
    __ lea(rbx, Address(rax, rdx, Address::times_1));
    __ cmpptr(rbx, heap_end);
    __ jcc(Assembler::above, slow_case);

    // rax,: object begin,rax此时记录了对象分配的内存首地址
    // rbx,: object end    rbx此时记录了对象分配的内存尾地址
    // rdx: instance size in bytes rdx记录了对象大小

    // 利用CAS操作,更新Eden空闲区首地址为对象尾地址,因为Eden区是线程共用的,所以需要加锁。
    __ locked_cmpxchgptr(rbx, heap_top);

    // if someone beat us on the allocation, try again, otherwise continue
    __ jcc(Assembler::notEqual, retry);

    __ incr_allocated_bytes(thread, rdx, 0);
}

9:对象所需内存已经分配好后,就会进行对象的初始化了,先初始化对象实例数据。

// 开始初始化对象处
__ bind(initialize_object);
// 如果rdx和sizeof(oopDesc)大小一样,即对象所需大小和对象头大小一样,则表明对象真正的实例数据内存为0,那么就不需要进行对象实例数据的初始化了,直接跳往对象头初始化处即可。Hotspot中虽然对象头在内存中排在对象实例数据前,但是会先初始化对象实例数据,再初始化对象头。
__ decrement(rdx, sizeof(oopDesc));
__ jcc(Assembler::zero, initialize_header);

// 执行异或,使得rcx为0,为之后给对象变量赋零值做准备
__ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
__ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd

Label L;
__ jccb(Assembler::carryClear, L);
__ stop("object size is not multiple of 2 - adjust this code");
__ bind(L);

// 此处以rdx(对象大小)递减,按字节进行循环遍历对内存,初始化对象实例内存为零值。
{ Label loop;
__ bind(loop);
__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
__ decrement(rdx);
__ jcc(Assembler::notZero, loop);
}

10:对象实例数据初始化好后,就开始进行对象头的初始化了。

// 初始化对象头标号处
__ bind(initialize_header);

// 是否使用偏向锁,大多时一个对象只会被同一个线程访问,所以在对象头中记录获取锁的线程id,下次线程获取锁时就不需要加锁了。
if (UseBiasedLocking) {
    // 第4步中有将类数据InstanceKlass的地址入栈,此时重新出栈,放入rcx寄存器。
    __ pop(rcx);  
    // 接下来两步将类的偏向锁相关数据移动到对象头部
    __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
    __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
} else {
    __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
                (intptr_t)markOopDesc::prototype()); // header
    __ pop(rcx);   // get saved klass back in the register.
}
// 此时rcx保存了InstanceKlass,rax保存了对象首地址,此处保存对象所属的类数据InstanceKlass放入对象头中,对象头尾oopDesc类型,里面有个_metadata联合体,_metadata中专门有个Klass指针用来指向类所属对象,此处其实就是将InstanceKlass地址放入该指针中。
__ store_klass(rax, rcx);  // klass

{
  SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
  // Trigger dtrace event for fastpath
  __ push(atos);
  __ call_VM_leaf(
           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
  __ pop(atos);
}

__ jmp(done);

11:慢速分配,经过上面分析可知,如果类没有被加载解析,会跳到此处执行。

__ bind(slow_case);
 // 因为第4步有将InsanceKlass入栈,这里用不上,重新出栈,还原栈顶数据。
 __ pop(rcx);   // restore stack pointer to what it was when we came in.
 __ bind(slow_case_no_pop);

 Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
 Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);

 // 获取常量池地址,存入rarg1寄存器。
 __ get_constant_pool(rarg1);
 // 获取new 指令后操作数,即类在常量池中的索引,放入rarg2寄存器。
 __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
 // 进入InterpreterRuntime::_new生成的机器指令地址处,开始执行,里面会进行类的加载和对象分配,并将分配的对象地址返回,存入rax寄存器中。
 call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2);
 __ verify_oop(rax);

 // 创建结束
 __ bind(done);

对象的创建到这就结束了,希望大家能对java对象创建有了更多的了解。因为上面拿模板解释器进行讲解的,都是汇编语言,其实大家也可以直接看看字节码解释器中对象创建的方法,比较好理解。本人能力有限,如有错误,请多指正。目前正在研读HotSpot源码,如果有同学比较感兴趣,也可以一起交流,附上wechat:wang_atbeijing

欢迎关注我的专栏:半栈工程师

听说喜欢点关注的同学都长得帅


Recommend

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK