diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 5d5afbbfc63..4e96793751e 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1964,7 +1964,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { C2_MacroAssembler _masm(&cbuf); int framesize = C->output()->frame_slots() << LogBytesPerInt; - __ remove_frame(framesize); + __ remove_frame(framesize, C->needs_stack_repair(), C->output()->sp_inc_offset()); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { __ reserved_stack_check(); diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index bb840eb9cdb..801c0bf2904 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -376,10 +376,7 @@ void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) int LIR_Assembler::initial_frame_size_in_bytes() const { // if rounding, must let FrameMap know! - // The frame_map records size in slots (32bit word) - - // subtract two words to account for return address and link - return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size; + return in_bytes(frame_map()->framesize_in_bytes()); } @@ -461,7 +458,8 @@ int LIR_Assembler::emit_unwind_handler() { // remove the activation and dispatch to the unwind handler __ block_comment("remove_frame and dispatch to the unwind handler"); int initial_framesize = initial_frame_size_in_bytes(); - __ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize); + int sp_inc_offset = initial_framesize - 3*wordSize; // Below saved FP and LR + __ remove_frame(initial_framesize, needs_stack_repair(), sp_inc_offset); __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); // Emit the slow path assembly @@ -528,7 +526,8 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { // Pop the stack before the safepoint code int initial_framesize = initial_frame_size_in_bytes(); - __ remove_frame(initial_framesize, needs_stack_repair(), initial_framesize - wordSize); + int sp_inc_offset = initial_framesize - 3*wordSize; // Below saved FP and LR + __ remove_frame(initial_framesize, needs_stack_repair(), sp_inc_offset); if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { __ reserved_stack_check(); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index b01a10e7033..f4ba39d2f6b 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -349,35 +349,44 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { } void C1_MacroAssembler::build_frame_helper(int frame_size_in_bytes, int sp_inc, bool needs_stack_repair) { - MacroAssembler::build_frame(frame_size_in_bytes + 2 * wordSize); + MacroAssembler::build_frame(frame_size_in_bytes); if (needs_stack_repair) { - Unimplemented(); + int sp_inc_offset = frame_size_in_bytes - 3 * wordSize; // Immediately below saved LR and FP + save_stack_increment(sp_inc, frame_size_in_bytes, sp_inc_offset); } } void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes, int sp_offset_for_orig_pc, bool needs_stack_repair, bool has_scalarized_args, Label* verified_inline_entry_label) { - assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); - // Make sure there is enough stack space for this method's activation. - // Note that we do this before doing an enter(). - generate_stack_overflow_check(bang_size_in_bytes); - - guarantee(needs_stack_repair == false, "Stack repair should not be true"); - if (verified_inline_entry_label != NULL) { + if (has_scalarized_args) { + // Initialize orig_pc to detect deoptimization during buffering in the entry points + str(zr, Address(sp, sp_offset_for_orig_pc - frame_size_in_bytes)); + } + if (!needs_stack_repair && verified_inline_entry_label != NULL) { bind(*verified_inline_entry_label); } + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); + build_frame_helper(frame_size_in_bytes, 0, needs_stack_repair); // Insert nmethod entry barrier into frame. BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->nmethod_entry_barrier(this); + + if (needs_stack_repair && verified_inline_entry_label != NULL) { + // Jump here from the scalarized entry points that require additional stack space + // for packing scalarized arguments and therefore already created the frame. + bind(*verified_inline_entry_label); + } } void C1_MacroAssembler::remove_frame(int frame_size_in_bytes, bool needs_stack_repair, int sp_inc_offset) { - MacroAssembler::remove_frame(frame_size_in_bytes + 2 * wordSize, - needs_stack_repair, sp_inc_offset); + MacroAssembler::remove_frame(frame_size_in_bytes, needs_stack_repair, sp_inc_offset); } void C1_MacroAssembler::verified_entry() { @@ -410,7 +419,7 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f // Check if we need to extend the stack for packing int sp_inc = 0; if (args_on_stack > args_on_stack_cc) { - Unimplemented(); + sp_inc = extend_stack_for_inline_args(args_on_stack); } // Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC. @@ -423,8 +432,16 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); bs->nmethod_entry_barrier(this); + // The runtime call returns the new array in r0 which is also j_rarg7 + // so we must avoid clobbering that. Temporarily save r0 in a + // non-argument register and pass the buffered array in r20 instead. + // This is safe because the runtime stub saves all registers. + Register val_array = r20; + Register tmp1 = r21; + mov(tmp1, j_rarg7); + // FIXME -- call runtime only if we cannot in-line allocate all the incoming inline type args. - mov(r1, (intptr_t) ces->method()); + mov(r19, (intptr_t) ces->method()); if (is_inline_ro_entry) { far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::buffer_inline_args_no_receiver_id))); } else { @@ -432,16 +449,21 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f } int rt_call_offset = offset(); + mov(val_array, r0); + mov(j_rarg7, tmp1); + // Remove the temp frame - add(sp, sp, frame_size_in_bytes); + MacroAssembler::remove_frame(frame_size_in_bytes); shuffle_inline_args(true, is_inline_ro_entry, sig_cc, args_passed_cc, args_on_stack_cc, regs_cc, // from args_passed, args_on_stack, regs, // to - sp_inc); + sp_inc, val_array); if (ces->c1_needs_stack_repair()) { - Unimplemented(); + // Create the real frame. Below jump will then skip over the stack banging and frame + // setup code in the verified_inline_entry (which has a different real_frame_size). + build_frame_helper(frame_size_in_bytes, sp_inc, true); } b(verified_inline_entry_label); diff --git a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp index da0a875a3e1..bc27175c8c2 100644 --- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp @@ -885,7 +885,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { "buffer_inline_args" : "buffer_inline_args_no_receiver"; StubFrame f(sasm, name, dont_gc_arguments); OopMap* map = save_live_registers(sasm); - Register method = r1; + Register method = r19; // Incoming address entry = (id == buffer_inline_args_id) ? CAST_FROM_FN_PTR(address, buffer_inline_args) : CAST_FROM_FN_PTR(address, buffer_inline_args_no_receiver); diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp index 5bdd0ffcd5a..7b56e1288b0 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp @@ -150,12 +150,15 @@ bool frame::safe_for_sender(JavaThread *thread) { if (!thread->is_in_full_stack_checked((address)sender_sp)) { return false; } - sender_unextended_sp = sender_sp; sender_pc = (address) *(sender_sp-1); // Note: frame::sender_sp_offset is only valid for compiled frame - saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); - } + intptr_t **saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + saved_fp = *saved_fp_addr; + // Repair the sender sp if this is a method with scalarized inline type args + sender_sp = repair_sender_sp(sender_sp, saved_fp_addr); + sender_unextended_sp = sender_sp; + } // If the potential sender is the interpreter then we can do some more checking if (Interpreter::contains(sender_pc)) { @@ -449,21 +452,50 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { assert(_cb->frame_size() >= 0, "must have non-zero frame size"); intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); - intptr_t* unextended_sp = l_sender_sp; - // the return_address is always the word on the stack - address sender_pc = (address) *(l_sender_sp-1); +#ifdef ASSERT + address sender_pc_copy = (address) *(l_sender_sp-1); +#endif intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset); // assert (sender_sp() == l_sender_sp, "should be"); // assert (*saved_fp_addr == link(), "should be"); + // Repair the sender sp if the frame has been extended + l_sender_sp = repair_sender_sp(l_sender_sp, saved_fp_addr); + + // The return address is always the first word on the stack + address sender_pc = (address) *(l_sender_sp-1); + +#ifdef ASSERT + if (sender_pc != sender_pc_copy) { + // When extending the stack in the callee method entry to make room for unpacking of value + // type args, we keep a copy of the sender pc at the expected location in the callee frame. + // If the sender pc is patched due to deoptimization, the copy is not consistent anymore. + nmethod* nm = CodeCache::find_blob(sender_pc)->as_nmethod(); + assert(sender_pc == nm->deopt_mh_handler_begin() || sender_pc == nm->deopt_handler_begin(), "unexpected sender pc"); + } +#endif + if (map->update_map()) { // Tell GC to use argument oopmaps for some runtime stubs that need it. // For C1, the runtime stub might not have oop maps, so set this flag // outside of update_register_map. - map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + bool caller_args = _cb->caller_must_gc_arguments(map->thread()); +#ifdef COMPILER1 + if (!caller_args) { + nmethod* nm = _cb->as_nmethod_or_null(); + if (nm != NULL && nm->is_compiled_by_c1() && nm->method()->has_scalarized_args() && + pc() < nm->verified_inline_entry_point()) { + // The VEP and VIEP(RO) of C1-compiled methods call buffer_inline_args_xxx + // before doing any argument shuffling, so we need to scan the oops + // as the caller passes them. + caller_args = true; + } + } +#endif + map->set_include_argument_oops(caller_args); if (_cb->oop_maps() != NULL) { OopMapSet::update_register_map(this, map); } @@ -475,7 +507,7 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { update_map_with_saved_link(map, saved_fp_addr); } - return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); + return frame(l_sender_sp, l_sender_sp, *saved_fp_addr, sender_pc); } //------------------------------------------------------------------------------ @@ -797,6 +829,22 @@ frame::frame(void* sp, void* fp, void* pc) { void frame::pd_ps() {} #endif +// Check for a method with scalarized inline type arguments that needs +// a stack repair and return the repaired sender stack pointer. +intptr_t* frame::repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const { + CompiledMethod* cm = _cb->as_compiled_method_or_null(); + if (cm != NULL && cm->needs_stack_repair()) { + // The stack increment resides just below the saved FP on the stack and + // records the total frame size exluding the two words for saving FP and LR. + intptr_t* sp_inc_addr = (intptr_t*) (saved_fp_addr - 1); + assert(*sp_inc_addr % StackAlignmentInBytes == 0, "sp_inc not aligned"); + int real_frame_size = (*sp_inc_addr / wordSize) + 2; + assert(real_frame_size >= _cb->frame_size() && real_frame_size <= 1000000, "invalid frame size"); + sender_sp = unextended_sp() + real_frame_size; + } + return sender_sp; +} + void JavaFrameAnchor::make_walkable(JavaThread* thread) { // last frame set? if (last_Java_sp() == NULL) return; diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.hpp index e2490d28611..19149c27cf3 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -128,6 +128,9 @@ return (intptr_t*) addr_at(offset); } + // Support for scalarized inline type calling convention + intptr_t* repair_sender_sp(intptr_t* sender_sp, intptr_t** saved_fp_addr) const; + #ifdef ASSERT // Used in frame::sender_for_{interpreter,compiled}_frame static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp index 60bb11586b7..23c5e68a4cd 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -193,8 +193,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register tmp, Register tmp2) { assert(thread == rthread, "must be"); - assert_different_registers(store_addr, new_val, thread, tmp, tmp2, - rscratch1); + assert_different_registers(store_addr, new_val, thread, tmp, rscratch1); assert(store_addr != noreg && new_val != noreg && tmp != noreg && tmp2 != noreg, "expecting a register"); @@ -220,6 +219,8 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, // storing region crossing non-NULL, is card already dirty? + assert_different_registers(store_addr, thread, tmp, tmp2, rscratch1); + const Register card_addr = tmp; __ lsr(card_addr, store_addr, CardTable::card_shift); @@ -290,17 +291,15 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco bool in_heap = (decorators & IN_HEAP) != 0; bool as_normal = (decorators & AS_NORMAL) != 0; - assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; - bool needs_pre_barrier = as_normal; + bool needs_pre_barrier = as_normal && !dest_uninitialized; bool needs_post_barrier = (val != noreg && in_heap); - - if (tmp3 == noreg) { - tmp3 = rscratch2; - } - // assert_different_registers(val, tmp1, tmp2, tmp3, rscratch1, rscratch2); - assert_different_registers(val, tmp1, tmp2, tmp3); + if (tmp3 == noreg) { + tmp3 = rscratch2; + } + assert_different_registers(val, tmp1, tmp2, tmp3); // flatten object address if needed if (dst.index() == noreg && dst.offset() == 0) { @@ -311,7 +310,6 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco __ lea(tmp1, dst); } - if (needs_pre_barrier) { g1_write_barrier_pre(masm, tmp1 /* obj */, @@ -329,23 +327,22 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco Register new_val = val; if (needs_post_barrier) { if (UseCompressedOops) { - // FIXME: Refactor the code to avoid usage of r19 and stay within tmpX - new_val = r19; + new_val = tmp3; __ mov(new_val, val); } - } + } - BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg); + BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg, noreg); if (needs_post_barrier) { - g1_write_barrier_post(masm, - tmp1 /* store_adr */, - new_val /* new_val */, - rthread /* thread */, - tmp2 /* tmp */, - tmp3 /* tmp2 */); - } - } + g1_write_barrier_post(masm, + tmp1 /* store_adr */, + new_val /* new_val */, + rthread /* thread */, + tmp2 /* tmp */, + tmp3 /* tmp2 */); + } + } } diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index 75f79c5f65e..9efc0a1cad3 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -64,7 +64,7 @@ define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, PreserveFramePointer, false); -define_pd_global(bool, InlineTypePassFieldsAsArgs, false); +define_pd_global(bool, InlineTypePassFieldsAsArgs, true); define_pd_global(bool, InlineTypeReturnedAsFields, false); define_pd_global(uintx, TypeProfileLevel, 111); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index b0a7eeb3353..fe4a71fe53b 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -4781,7 +4781,8 @@ void MacroAssembler::load_byte_map_base(Register reg) { } void MacroAssembler::build_frame(int framesize) { - assert(framesize > 0, "framesize must be > 0"); + assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); if (framesize < ((1 << 9) + 2 * wordSize)) { sub(sp, sp, framesize); stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); @@ -4800,7 +4801,8 @@ void MacroAssembler::build_frame(int framesize) { } void MacroAssembler::remove_frame(int framesize) { - assert(framesize > 0, "framesize must be > 0"); + assert(framesize >= 2 * wordSize, "framesize must include space for FP/LR"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); if (framesize < ((1 << 9) + 2 * wordSize)) { ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); add(sp, sp, framesize); @@ -4815,6 +4817,53 @@ void MacroAssembler::remove_frame(int framesize) { } } +void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset) { + if (needs_stack_repair) { + // Remove the extension of the caller's frame used for inline type unpacking + // + // Right now the stack looks like this: + // + // | Arguments from caller | + // |---------------------------| <-- caller's SP + // | Saved LR #1 | + // | Saved FP #1 | + // |---------------------------| + // | Extension space for | + // | inline arg (un)packing | + // |---------------------------| <-- start of this method's frame + // | Saved LR #2 | + // | Saved FP #2 | + // |---------------------------| <-- FP + // | sp_inc | + // | method locals | + // |---------------------------| <-- SP + // + // There are two copies of FP and LR on the stack. They will be identical + // unless the caller has been deoptimized, in which case LR #1 will be patched + // to point at the deopt blob, and LR #2 will still point into the old method. + // + // The sp_inc stack slot holds the total size of the frame including the + // extension space minus two words for the saved FP and LR. + + ldr(rscratch1, Address(sp, sp_inc_offset)); + add(sp, sp, rscratch1); + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } else { + remove_frame(initial_framesize); + } +} + +void MacroAssembler::save_stack_increment(int sp_inc, int frame_size, int sp_inc_offset) { + int real_frame_size = frame_size + sp_inc; + assert(sp_inc == 0 || sp_inc > 2*wordSize, "invalid sp_inc value"); + assert(real_frame_size >= 2*wordSize, "frame size must include FP/LR space"); + assert((real_frame_size & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Subtract two words for the saved FP and LR as these will be popped + // separately. See remove_frame above. + mov(rscratch1, real_frame_size - 2*wordSize); + str(rscratch1, Address(sp, sp_inc_offset)); +} // This method checks if provided byte array contains byte with highest bit set. address MacroAssembler::has_negatives(Register ary1, Register len, Register result) { @@ -5624,9 +5673,8 @@ void MacroAssembler::get_thread(Register dst) { // Moved here from aarch64.ad to support Valhalla code belows void MacroAssembler::verified_entry(Compile* C, int sp_inc) { -// n.b. frame size includes space for return pc and rfp + // n.b. frame size includes space for return pc and rfp const long framesize = C->output()->frame_size_in_bytes(); - assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); // insert a nop at the start of the prolog so we can patch in a // branch if we need to invalidate the method later @@ -5634,12 +5682,12 @@ void MacroAssembler::verified_entry(Compile* C, int sp_inc) { int bangsize = C->output()->bang_size_in_bytes(); if (C->output()->need_stack_bang(bangsize)) - generate_stack_overflow_check(bangsize); + generate_stack_overflow_check(bangsize); build_frame(framesize); if (C->needs_stack_repair()) { - Unimplemented(); + save_stack_increment(sp_inc, framesize, C->output()->sp_inc_offset()); } if (VerifyStackAtCalls) { @@ -5733,9 +5781,15 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re } if (from->is_reg()) { if (to->is_reg()) { - mov(to->as_Register(), from->as_Register()); + if (from->is_Register() && to->is_Register()) { + mov(to->as_Register(), from->as_Register()); + } else if (from->is_FloatRegister() && to->is_FloatRegister()) { + fmovd(to->as_FloatRegister(), from->as_FloatRegister()); + } else { + ShouldNotReachHere(); + } } else { - int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size; Address to_addr = Address(sp, st_off); if (from->is_FloatRegister()) { if (bt == T_DOUBLE) { @@ -5749,11 +5803,11 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re } } } else { - Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize); + Address from_addr = Address(sp, from->reg2stack() * VMRegImpl::stack_slot_size); if (to->is_reg()) { if (to->is_FloatRegister()) { if (bt == T_DOUBLE) { - ldrd(to->as_FloatRegister(), from_addr); + ldrd(to->as_FloatRegister(), from_addr); } else { assert(bt == T_FLOAT, "must be float"); ldrs(to->as_FloatRegister(), from_addr); @@ -5762,7 +5816,7 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re ldr(to->as_Register(), from_addr); } } else { - int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size; ldr(rscratch1, from_addr); str(rscratch1, Address(sp, st_off)); } @@ -5775,19 +5829,41 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re return true; } +// Calculate the extra stack space required for packing or unpacking inline +// args and adjust the stack pointer +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + int sp_inc = args_on_stack * VMRegImpl::stack_slot_size; + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + assert(sp_inc > 0, "sanity"); + + // Save a copy of the FP and LR here for deoptimization patching and frame walking + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + + // Adjust the stack pointer. This will be repaired on return by MacroAssembler::remove_frame + if (sp_inc < (1 << 9)) { + sub(sp, sp, sp_inc); // Fits in an immediate + } else { + mov(rscratch1, sp_inc); + sub(sp, sp, rscratch1); + } + + return sp_inc + 2 * wordSize; // Account for the FP/LR space +} + // Read all fields from an inline type oop and store the values in registers/stack slots bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, RegState reg_state[]) { assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter"); assert(from->is_valid(), "source must bevalid"); + Register tmp1 = r10, tmp2 = r11; Register fromReg; if (from->is_reg()) { fromReg = from->as_Register(); } else { - int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize; - ldr(r10, Address(sp, st_off)); - fromReg = r10; + int st_off = from->reg2stack() * VMRegImpl::stack_slot_size; + ldr(tmp1, Address(sp, st_off)); + fromReg = tmp1; } ScalarizedInlineArgsStream stream(sig, sig_index, to, to_count, to_index, -1); @@ -5802,11 +5878,11 @@ bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, in int idx = (int)toReg->value(); if (reg_state[idx] == reg_readonly) { - if (idx != from->value()) { - mark_done = false; - } - done = false; - continue; + if (idx != from->value()) { + mark_done = false; + } + done = false; + continue; } else if (reg_state[idx] == reg_written) { continue; } else { @@ -5815,7 +5891,7 @@ bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, in } if (!toReg->is_FloatRegister()) { - Register dst = toReg->is_stack() ? r13 : toReg->as_Register(); + Register dst = toReg->is_stack() ? tmp2 : toReg->as_Register(); if (is_reference_type(bt)) { load_heap_oop(dst, fromAddr); } else { @@ -5823,7 +5899,7 @@ bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, in load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed); } if (toReg->is_stack()) { - int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + int st_off = toReg->reg2stack() * VMRegImpl::stack_slot_size; str(dst, Address(sp, st_off)); } } else if (bt == T_DOUBLE) { @@ -5847,7 +5923,7 @@ bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, in // Pack fields back into an inline type oop bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, VMRegPair* from, int from_count, int& from_index, VMReg to, - RegState reg_state[]) { + RegState reg_state[], Register val_array) { assert(sig->at(sig_index)._bt == T_INLINE_TYPE, "should be at end delimiter"); assert(to->is_valid(), "destination must be valid"); @@ -5856,14 +5932,15 @@ bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& return true; // Already written } - Register val_array = r0; Register val_obj_tmp = r11; Register from_reg_tmp = r10; Register tmp1 = r14; Register tmp2 = r13; - Register tmp3 = r1; + Register tmp3 = r12; Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register(); + assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array); + if (reg_state[to->value()] == reg_readonly) { if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) { skip_unpacked_fields(sig, sig_index, from, from_count, from_index); @@ -5890,7 +5967,7 @@ bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& Register src; if (fromReg->is_stack()) { src = from_reg_tmp; - int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + int ld_off = fromReg->reg2stack() * VMRegImpl::stack_slot_size; load_sized_value(src, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); } else { src = fromReg->as_Register(); @@ -5923,15 +6000,6 @@ VMReg MacroAssembler::spill_reg_for(VMReg reg) { return (reg->is_FloatRegister()) ? v0->as_VMReg() : r14->as_VMReg(); } -void MacroAssembler::remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset) { - assert((initial_framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - if (needs_stack_repair) { - Unimplemented(); - } else { - remove_frame(initial_framesize); - } -} - void MacroAssembler::cache_wb(Address line) { assert(line.getMode() == Address::base_plus_offset, "mode should be base_plus_offset"); assert(line.index() == noreg, "index should be noreg"); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 6809b7c591e..1159801dc35 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1265,9 +1265,11 @@ class MacroAssembler: public Assembler { RegState reg_state[]); bool pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, VMRegPair* from, int from_count, int& from_index, VMReg to, - RegState reg_state[]); + RegState reg_state[], Register val_array); + int extend_stack_for_inline_args(int args_on_stack); void remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset); VMReg spill_reg_for(VMReg reg); + void save_stack_increment(int sp_inc, int frame_size, int sp_inc_offset); void tableswitch(Register index, jint lowbound, jint highbound, Label &jumptable, Label &jumptable_end, int stride = 1) { diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp index 415d3774b9c..4ca9082bb68 100644 --- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp @@ -115,7 +115,11 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth __ BIND(run_compiled_code); } - const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + // The following jump might pass an inline type argument that was erased to Object as oop to a + // callee that expects inline type arguments to be passed as fields. We need to call the compiled + // value entry (_code->inline_entry_point() or _adapter->c2i_inline_entry()) which will take care + // of translating between the calling conventions. + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_inline_offset() : Method::from_interpreted_offset(); __ ldr(rscratch1,Address(method, entry_offset)); __ br(rscratch1); diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 523bbed21b9..94f261b7e6b 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -517,58 +517,68 @@ static int compute_total_args_passed_int(const GrowableArray* sig_exte } -static void gen_c2i_adapter_helper(MacroAssembler* masm, BasicType bt, const VMRegPair& reg_pair, int extraspace, const Address& to) { - - assert(bt != T_INLINE_TYPE || !InlineTypePassFieldsAsArgs, "no inline type here"); - - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a Java long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. - - // int next_off = st_off - Interpreter::stackElementSize; - - VMReg r_1 = reg_pair.first(); - VMReg r_2 = reg_pair.second(); - - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - return; - } - +static void gen_c2i_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& to, + Register tmp1, + Register tmp2, + Register tmp3, + int extraspace, + bool is_oop) { + assert(bt != T_INLINE_TYPE || !InlineTypePassFieldsAsArgs, "no inline type here"); + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a Java long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + return; + } + + if (!r_1->is_FloatRegister()) { + Register val = tmp3; if (r_1->is_stack()) { - // memory to memory use rscratch1 - // words_pushed is always 0 so we don't use it. - int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace /* + word_pushed * wordSize */); - if (!r_2->is_valid()) { - // sign extend?? - __ ldrw(rscratch1, Address(sp, ld_off)); - __ str(rscratch1, to); - - } else { - __ ldr(rscratch1, Address(sp, ld_off)); - __ str(rscratch1, to); - } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - __ str(r, to); + // memory to memory use tmp3 (scratch registers are used by store_heap_oop) + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ load_sized_value(val, Address(sp, ld_off), size_in_bytes, /* is_signed */ false); } else { - assert(r_1->is_FloatRegister(), ""); - if (!r_2->is_valid()) { - // only a float use just part of the slot - __ strs(r_1->as_FloatRegister(), to); - } else { - __ strd(r_1->as_FloatRegister(), to); - } - } + val = r_1->as_Register(); + } + assert_different_registers(to.base(), val, rscratch2, tmp1, tmp2); + if (is_oop) { + __ store_heap_oop(to, val, rscratch2, tmp1, tmp2, IN_HEAP | ACCESS_WRITE | IS_DEST_UNINITIALIZED); + } else { + __ store_sized_value(to, val, size_in_bytes); + } + } else { + if (wide) { + __ strd(r_1->as_FloatRegister(), to); + } else { + // only a float use just part of the slot + __ strs(r_1->as_FloatRegister(), to); + } + } } static void gen_c2i_adapter(MacroAssembler *masm, @@ -590,14 +600,23 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ bind(skip_fixup); - bool has_inline_argument = false; + // Name some registers to be used in the following code. We can use + // anything except r0-r7 which are arguments in the Java calling + // convention, rmethod (r12), and r13 which holds the outgoing sender + // SP for the interpreter. + Register buf_array = r10; // Array of buffered inline types + Register buf_oop = r11; // Buffered inline type oop + Register tmp1 = r15; + Register tmp2 = r16; + Register tmp3 = r17; if (InlineTypePassFieldsAsArgs) { - // Is there an inline type argument? - for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { - has_inline_argument = (sig_extended->at(i)._bt == T_INLINE_TYPE); - } - if (has_inline_argument) { + // Is there an inline type argument? + bool has_inline_argument = false; + for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) { + has_inline_argument = (sig_extended->at(i)._bt == T_INLINE_TYPE); + } + if (has_inline_argument) { // There is at least an inline type argument: we're coming from // compiled code so we have no buffers to back the inline types // Allocate the buffers here with a runtime call. @@ -607,23 +626,25 @@ static void gen_c2i_adapter(MacroAssembler *masm, frame_complete = __ offset(); address the_pc = __ pc(); - __ set_last_Java_frame(noreg, noreg, the_pc, rscratch1); + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); __ mov(c_rarg0, rthread); - __ mov(c_rarg1, r1); + __ mov(c_rarg1, rmethod); __ mov(c_rarg2, (int64_t)alloc_inline_receiver); __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_inline_types))); __ blr(rscratch1); + __ bind(retaddr); - oop_maps->add_gc_map((int)(__ pc() - start), map); + oop_maps->add_gc_map(__ pc() - start, map); __ reset_last_Java_frame(false); reg_save.restore_live_registers(masm); Label no_exception; - __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); - __ cbz(r0, no_exception); + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, no_exception); __ str(zr, Address(rthread, JavaThread::vm_result_offset())); __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); @@ -632,60 +653,68 @@ static void gen_c2i_adapter(MacroAssembler *masm, __ bind(no_exception); // We get an array of objects from the runtime call - __ get_vm_result(r10, rthread); - __ get_vm_result_2(r1, rthread); // TODO: required to keep the callee Method live? + __ get_vm_result(buf_array, rthread); + __ get_vm_result_2(rmethod, rthread); // TODO: required to keep the callee Method live? } } - int words_pushed = 0; - // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. int total_args_passed = compute_total_args_passed_int(sig_extended); - int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; + int extraspace = total_args_passed * Interpreter::stackElementSize; // stack is aligned, keep it that way - extraspace = align_up(extraspace, 2 * wordSize); + extraspace = align_up(extraspace, StackAlignmentInBytes); + // set senderSP value __ mov(r13, sp); - if (extraspace) - __ sub(sp, sp, extraspace); + __ sub(sp, sp, extraspace); // Now write the args into the outgoing interpreter space - int ignored = 0, next_vt_arg = 0, next_arg_int = 0; - bool has_oop_field = false; - - for (int next_arg_comp = 0; next_arg_comp < total_args_passed; next_arg_comp++) { + // next_arg_comp is the next argument from the compiler point of + // view (inline type fields are passed in registers/on the stack). In + // sig_extended, an inline type argument starts with: T_INLINE_TYPE, + // followed by the types of the fields of the inline type and T_VOID + // to mark the end of the inline type. ignored counts the number of + // T_INLINE_TYPE/T_VOID. next_vt_arg is the next inline type argument: + // used to get the buffer for that argument from the pool of buffers + // we allocated above and want to pass to the + // interpreter. next_arg_int is the next argument from the + // interpreter point of view (inline types are passed by reference). + for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; + next_arg_comp < sig_extended->length(); next_arg_comp++) { + assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); + assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); BasicType bt = sig_extended->at(next_arg_comp)._bt; - // offset to start parameters - int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; - + int st_off = (total_args_passed - next_arg_int - 1) * Interpreter::stackElementSize; if (!InlineTypePassFieldsAsArgs || bt != T_INLINE_TYPE) { - if (bt == T_VOID) { - assert(next_arg_comp > 0 && (sig_extended->at(next_arg_comp - 1)._bt == T_LONG || sig_extended->at(next_arg_comp - 1)._bt == T_DOUBLE), "missing half"); - next_arg_int ++; - continue; - } - - int next_off = st_off - Interpreter::stackElementSize; - int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; - - gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp], extraspace, Address(sp, offset)); - next_arg_int ++; - } else { - ignored++; + int next_off = st_off - Interpreter::stackElementSize; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + const VMRegPair reg_pair = regs[next_arg_comp-ignored]; + size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, reg_pair, Address(sp, offset), tmp1, tmp2, tmp3, extraspace, false); + next_arg_int++; +#ifdef ASSERT + if (bt == T_LONG || bt == T_DOUBLE) { + // Overwrite the unused slot with known junk + __ mov(rscratch1, CONST64(0xdeadffffdeadaaaa)); + __ str(rscratch1, Address(sp, st_off)); + } +#endif /* ASSERT */ + } else { + ignored++; // get the buffer from the just allocated pool of buffers int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_INLINE_TYPE); - __ load_heap_oop(rscratch1, Address(r10, index)); - next_vt_arg++; - next_arg_int++; + __ load_heap_oop(buf_oop, Address(buf_array, index)); + next_vt_arg++; next_arg_int++; int vt = 1; // write fields we get from compiled code in registers/stack // slots to the buffer: we know we are done with that inline type - // argument when we hit the T_VOID that acts as an end of value + // argument when we hit the T_VOID that acts as an end of inline // type delimiter for this inline type. Inline types are flattened // so we might encounter embedded inline types. Each entry in // sig_extended contains a field offset in the buffer. @@ -702,34 +731,15 @@ static void gen_c2i_adapter(MacroAssembler *masm, } else { int off = sig_extended->at(next_arg_comp)._offset; assert(off > 0, "offset in object should be positive"); - - bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); - has_oop_field = has_oop_field || is_oop; - - gen_c2i_adapter_helper(masm, bt, regs[next_arg_comp - ignored], extraspace, Address(r11, off)); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + bool is_oop = is_reference_type(bt); + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, regs[next_arg_comp-ignored], Address(buf_oop, off), tmp1, tmp2, tmp3, extraspace, is_oop); } } while (vt != 0); // pass the buffer to the interpreter - __ str(rscratch1, Address(sp, st_off)); - } - - } - -// If an inline type was allocated and initialized, apply post barrier to all oop fields - if (has_inline_argument && has_oop_field) { - __ push(r13); // save senderSP - __ push(r1); // save callee - // Allocate argument register save area - if (frame::arg_reg_save_area_bytes != 0) { - __ sub(sp, sp, frame::arg_reg_save_area_bytes); - } - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), rthread, r10); - // De-allocate argument register save area - if (frame::arg_reg_save_area_bytes != 0) { - __ add(sp, sp, frame::arg_reg_save_area_bytes); + __ str(buf_oop, Address(sp, st_off)); } - __ pop(r1); // restore callee - __ pop(r13); // restore sender SP } __ mov(esp, sp); // Interp expects args on caller's expression stack @@ -808,7 +818,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. - __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset()))); + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_inline_offset()))); #if INCLUDE_JVMCI if (EnableJVMCI) { @@ -1031,7 +1041,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm // the GC knows about the location of oop argument locations passed to the c2i adapter. bool caller_must_gc_arguments = (regs != regs_cc); - new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words + 10, oop_maps, caller_must_gc_arguments); + new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_inline_entry, c2i_inline_ro_entry, c2i_unverified_entry, c2i_unverified_inline_entry, c2i_no_clinit_check_entry); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index e37b8bd1b41..5b82cadbc25 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -414,15 +414,7 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f // Check if we need to extend the stack for packing int sp_inc = 0; if (args_on_stack > args_on_stack_cc) { - // Two additional slots to account for return address - sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size; - sp_inc = align_up(sp_inc, StackAlignmentInBytes); - // Save the return address, adjust the stack (make sure it is properly - // 16-byte aligned) and copy the return address to the new top of the stack. - // The stack will be repaired on return (see MacroAssembler::remove_frame). - pop(r13); - subptr(rsp, sp_inc); - push(r13); + sp_inc = extend_stack_for_inline_args(args_on_stack); } // Create a temp frame so we can call into the runtime. It must be properly set up to accommodate GC. @@ -451,7 +443,7 @@ int C1_MacroAssembler::scalarized_entry(const CompiledEntrySignature* ces, int f shuffle_inline_args(true, is_inline_ro_entry, sig_cc, args_passed_cc, args_on_stack_cc, regs_cc, // from args_passed, args_on_stack, regs, // to - sp_inc); + sp_inc, rax); if (ces->c1_needs_stack_repair()) { // Create the real frame. Below jump will then skip over the stack banging and frame diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 316ead39e1d..b09c347c819 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -5572,6 +5572,22 @@ bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState re return true; } +// Calculate the extra stack space required for packing or unpacking inline +// args and adjust the stack pointer +int MacroAssembler::extend_stack_for_inline_args(int args_on_stack) { + // Two additional slots to account for return address + int sp_inc = (args_on_stack + 2) * VMRegImpl::stack_slot_size; + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + // Save the return address, adjust the stack (make sure it is properly + // 16-byte aligned) and copy the return address to the new top of the stack. + // The stack will be repaired on return (see MacroAssembler::remove_frame). + assert(sp_inc > 0, "sanity"); + pop(r13); + subptr(rsp, sp_inc); + push(r13); + return sp_inc; +} + // Read all fields from an inline type buffer and store the field values in registers/stack slots. bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, int& sig_index, VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, @@ -5643,7 +5659,7 @@ bool MacroAssembler::unpack_inline_helper(const GrowableArray* sig, in bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, VMRegPair* from, int from_count, int& from_index, VMReg to, - RegState reg_state[]) { + RegState reg_state[], Register val_array) { assert(sig->at(sig_index)._bt == T_INLINE_TYPE, "should be at end delimiter"); assert(to->is_valid(), "destination must be valid"); @@ -5652,7 +5668,6 @@ bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& return true; // Already written } - Register val_array = rax; Register val_obj_tmp = r11; Register from_reg_tmp = r14; // Be careful with r14 because it's used for spilling Register tmp1 = r10; @@ -5660,6 +5675,8 @@ bool MacroAssembler::pack_inline_helper(const GrowableArray* sig, int& Register tmp3 = rbx; Register val_obj = to->is_stack() ? val_obj_tmp : to->as_Register(); + assert_different_registers(val_obj_tmp, from_reg_tmp, tmp1, tmp2, tmp3, val_array); + if (reg_state[to->value()] == reg_readonly) { if (!is_reg_in_unpacked_fields(sig, sig_index, to, from, from_count, from_index)) { skip_unpacked_fields(sig, sig_index, from, from_count, from_index); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 5f1c8ba4987..0e3d7fd0bf4 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1760,7 +1760,8 @@ class MacroAssembler: public Assembler { RegState reg_state[]); bool pack_inline_helper(const GrowableArray* sig, int& sig_index, int vtarg_index, VMRegPair* from, int from_count, int& from_index, VMReg to, - RegState reg_state[]); + RegState reg_state[], Register val_array); + int extend_stack_for_inline_args(int args_on_stack); void remove_frame(int initial_framesize, bool needs_stack_repair, int sp_inc_offset); VMReg spill_reg_for(VMReg reg); diff --git a/src/hotspot/share/asm/macroAssembler_common.cpp b/src/hotspot/share/asm/macroAssembler_common.cpp index 49d0c91d78b..83326e7fb02 100644 --- a/src/hotspot/share/asm/macroAssembler_common.cpp +++ b/src/hotspot/share/asm/macroAssembler_common.cpp @@ -132,25 +132,12 @@ int MacroAssembler::unpack_inline_args(Compile* C, bool receiver_only) { // Check if we need to extend the stack for unpacking int sp_inc = 0; if (args_on_stack_cc > args_on_stack) { - // Two additional slots to account for return address - sp_inc = (args_on_stack_cc + 2) * VMRegImpl::stack_slot_size; - sp_inc = align_up(sp_inc, StackAlignmentInBytes); - // Save the return address, adjust the stack (make sure it is properly - // 16-byte aligned) and copy the return address to the new top of the stack. - // The stack will be repaired on return (see MacroAssembler::remove_frame). - assert(sp_inc > 0, "sanity"); -#ifdef X86 - pop(r13); - subptr(rsp, sp_inc); - push(r13); -#else - Unimplemented(); -#endif + sp_inc = extend_stack_for_inline_args(args_on_stack_cc); } shuffle_inline_args(false, receiver_only, sig, args_passed, args_on_stack, regs, // from args_passed_cc, args_on_stack_cc, regs_cc, // to - sp_inc); + sp_inc, noreg); return sp_inc; } #endif // COMPILER2 @@ -159,7 +146,7 @@ void MacroAssembler::shuffle_inline_args(bool is_packing, bool receiver_only, const GrowableArray* sig, int args_passed, int args_on_stack, VMRegPair* regs, int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, - int sp_inc) { + int sp_inc, Register val_array) { int max_stack = MAX2(args_on_stack + sp_inc/VMRegImpl::stack_slot_size, args_on_stack_to); RegState* reg_state = init_reg_state(regs, args_passed, sp_inc, max_stack); @@ -190,10 +177,11 @@ void MacroAssembler::shuffle_inline_args(bool is_packing, bool receiver_only, to_index += step; from_index += step; } else if (is_packing) { + assert(val_array != noreg, "must be"); VMReg reg_to = regs_to[to_index].first(); done &= pack_inline_helper(sig, sig_index, vtarg_index, regs, args_passed, from_index, reg_to, - reg_state); + reg_state, val_array); vtarg_index++; to_index++; } else if (!receiver_only || (from_index == 0 && bt == T_VOID)) { diff --git a/src/hotspot/share/asm/macroAssembler_common.hpp b/src/hotspot/share/asm/macroAssembler_common.hpp index 86c77ef3ba7..23f37501597 100644 --- a/src/hotspot/share/asm/macroAssembler_common.hpp +++ b/src/hotspot/share/asm/macroAssembler_common.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,7 +46,7 @@ const GrowableArray* sig, int args_passed, int args_on_stack, VMRegPair* regs, int args_passed_to, int args_on_stack_to, VMRegPair* regs_to, - int sp_inc); + int sp_inc, Register val_array); bool shuffle_inline_args_spill(bool is_packing, const GrowableArray* sig, int sig_index, VMRegPair* regs_from, int from_index, int regs_from_count, RegState* reg_state); // }; diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 0620d32df0f..261f06dab22 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -2010,7 +2010,7 @@ bool Arguments::check_vm_args_consistency() { } #endif - if (AMD64_ONLY(false &&) !FLAG_IS_DEFAULT(InlineTypePassFieldsAsArgs)) { + if (AMD64_ONLY(false &&) AARCH64_ONLY(false &&) !FLAG_IS_DEFAULT(InlineTypePassFieldsAsArgs)) { FLAG_SET_CMDLINE(InlineTypePassFieldsAsArgs, false); warning("InlineTypePassFieldsAsArgs is not supported on this platform"); } diff --git a/src/hotspot/share/runtime/frame.cpp b/src/hotspot/share/runtime/frame.cpp index 5b59e87d7ee..453837b5556 100644 --- a/src/hotspot/share/runtime/frame.cpp +++ b/src/hotspot/share/runtime/frame.cpp @@ -300,7 +300,7 @@ void frame::deoptimize(JavaThread* thread) { // Also, if the method is synchronized, we first need to acquire the lock. // Don't patch the return pc to delay deoptimization until we enter the method body (the check // addedin LIRGenerator::do_Base will detect the pending deoptimization by checking the original_pc). -#ifdef ASSERT +#if defined ASSERT && !defined AARCH64 // Stub call site does not look like NativeCall on AArch64 NativeCall* call = nativeCall_before(this->pc()); address dest = call->destination(); assert(dest == Runtime1::entry_for(Runtime1::buffer_inline_args_no_receiver_id) || diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp index 4579a34c41a..9d4f2d21ee0 100644 --- a/src/hotspot/share/runtime/sharedRuntime.cpp +++ b/src/hotspot/share/runtime/sharedRuntime.cpp @@ -3544,31 +3544,6 @@ JRT_ENTRY(void, SharedRuntime::allocate_inline_types(JavaThread* current, Method current->set_vm_result_2(callee()); // TODO: required to keep callee live? JRT_END -// TODO remove this once the AARCH64 dependency is gone -// Iterate over the array of heap allocated inline types and apply the GC post barrier to all reference fields. -// This is called from the C2I adapter after inline type arguments are heap allocated and initialized. -JRT_LEAF(void, SharedRuntime::apply_post_barriers(JavaThread* current, objArrayOopDesc* array)) -{ - assert(InlineTypePassFieldsAsArgs, "no reason to call this"); - assert(oopDesc::is_oop(array), "should be oop"); - for (int i = 0; i < array->length(); ++i) { - instanceOop valueOop = (instanceOop)array->obj_at(i); - InlineKlass* vk = InlineKlass::cast(valueOop->klass()); - if (vk->contains_oops()) { - const address dst_oop_addr = ((address) (void*) valueOop); - OopMapBlock* map = vk->start_of_nonstatic_oop_maps(); - OopMapBlock* const end = map + vk->nonstatic_oop_map_count(); - while (map != end) { - address doop_address = dst_oop_addr + map->offset(); - barrier_set_cast(BarrierSet::barrier_set())-> - write_ref_array((HeapWord*) doop_address, map->count()); - map++; - } - } - } -} -JRT_END - // We're returning from an interpreted method: load each field into a // register following the calling convention JRT_LEAF(void, SharedRuntime::load_inline_type_fields_in_regs(JavaThread* current, oopDesc* res)) diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp index 6ed336f1dea..2fb7aa440f9 100644 --- a/src/hotspot/share/runtime/sharedRuntime.hpp +++ b/src/hotspot/share/runtime/sharedRuntime.hpp @@ -533,7 +533,6 @@ class SharedRuntime: AllStatic { static address handle_wrong_method_ic_miss(JavaThread* current); static void allocate_inline_types(JavaThread* current, Method* callee, bool allocate_receiver); static oop allocate_inline_types_impl(JavaThread* current, methodHandle callee, bool allocate_receiver, TRAPS); - static void apply_post_barriers(JavaThread* current, objArrayOopDesc* array); static address handle_unsafe_access(JavaThread* thread, address next_pc);