Path: blob/master/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
41155 views
/*1* Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include "precompiled.hpp"25#include "gc/shenandoah/shenandoahBarrierSet.hpp"26#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"27#include "gc/shenandoah/shenandoahForwarding.hpp"28#include "gc/shenandoah/shenandoahHeap.inline.hpp"29#include "gc/shenandoah/shenandoahHeapRegion.hpp"30#include "gc/shenandoah/shenandoahRuntime.hpp"31#include "gc/shenandoah/shenandoahThreadLocalData.hpp"32#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"33#include "interpreter/interpreter.hpp"34#include "runtime/sharedRuntime.hpp"35#include "runtime/thread.hpp"36#include "utilities/macros.hpp"37#ifdef COMPILER138#include "c1/c1_LIRAssembler.hpp"39#include "c1/c1_MacroAssembler.hpp"40#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"41#endif4243#define __ masm->4445static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {46if (handle_gpr) {47__ push_IU_state();48}4950if (handle_fp) {51// Some paths can be reached from the c2i adapter with live fp arguments in registers.52LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call"));5354if (UseSSE >= 2) {55const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4);56__ subptr(rsp, xmm_size * 8);57__ movdbl(Address(rsp, xmm_size * 0), xmm0);58__ movdbl(Address(rsp, xmm_size * 1), xmm1);59__ movdbl(Address(rsp, xmm_size * 2), xmm2);60__ movdbl(Address(rsp, xmm_size * 3), xmm3);61__ movdbl(Address(rsp, xmm_size * 4), xmm4);62__ movdbl(Address(rsp, xmm_size * 5), xmm5);63__ movdbl(Address(rsp, xmm_size * 6), xmm6);64__ movdbl(Address(rsp, xmm_size * 7), xmm7);65} else if (UseSSE >= 1) {66const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2);67__ subptr(rsp, xmm_size * 8);68__ movflt(Address(rsp, xmm_size * 0), xmm0);69__ movflt(Address(rsp, xmm_size * 1), xmm1);70__ movflt(Address(rsp, xmm_size * 2), xmm2);71__ movflt(Address(rsp, xmm_size * 3), xmm3);72__ movflt(Address(rsp, xmm_size * 4), xmm4);73__ movflt(Address(rsp, xmm_size * 5), xmm5);74__ movflt(Address(rsp, xmm_size * 6), xmm6);75__ movflt(Address(rsp, xmm_size * 7), xmm7);76} else {77__ push_FPU_state();78}79}80}8182static void restore_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) {83if (handle_fp) {84if (UseSSE >= 2) {85const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4);86__ movdbl(xmm0, Address(rsp, xmm_size * 0));87__ movdbl(xmm1, Address(rsp, xmm_size * 1));88__ movdbl(xmm2, Address(rsp, xmm_size * 2));89__ movdbl(xmm3, Address(rsp, xmm_size * 3));90__ movdbl(xmm4, Address(rsp, xmm_size * 4));91__ movdbl(xmm5, Address(rsp, xmm_size * 5));92__ movdbl(xmm6, Address(rsp, xmm_size * 6));93__ movdbl(xmm7, Address(rsp, xmm_size * 7));94__ addptr(rsp, xmm_size * 8);95} else if (UseSSE >= 1) {96const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2);97__ movflt(xmm0, Address(rsp, xmm_size * 0));98__ movflt(xmm1, Address(rsp, xmm_size * 1));99__ movflt(xmm2, Address(rsp, xmm_size * 2));100__ movflt(xmm3, Address(rsp, xmm_size * 3));101__ movflt(xmm4, Address(rsp, xmm_size * 4));102__ movflt(xmm5, Address(rsp, xmm_size * 5));103__ movflt(xmm6, Address(rsp, xmm_size * 6));104__ movflt(xmm7, Address(rsp, xmm_size * 7));105__ addptr(rsp, xmm_size * 8);106} else {107__ pop_FPU_state();108}109}110111if (handle_gpr) {112__ pop_IU_state();113}114}115116void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,117Register src, Register dst, Register count) {118119bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;120121if (is_reference_type(type)) {122123if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {124#ifdef _LP64125Register thread = r15_thread;126#else127Register thread = rax;128if (thread == src || thread == dst || thread == count) {129thread = rbx;130}131if (thread == src || thread == dst || thread == count) {132thread = rcx;133}134if (thread == src || thread == dst || thread == count) {135thread = rdx;136}137__ push(thread);138__ get_thread(thread);139#endif140assert_different_registers(src, dst, count, thread);141142Label done;143// Short-circuit if count == 0.144__ testptr(count, count);145__ jcc(Assembler::zero, done);146147// Avoid runtime call when not active.148Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));149int flags;150if (ShenandoahSATBBarrier && dest_uninitialized) {151flags = ShenandoahHeap::HAS_FORWARDED;152} else {153flags = ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING;154}155__ testb(gc_state, flags);156__ jcc(Assembler::zero, done);157158save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);159160#ifdef _LP64161assert(src == rdi, "expected");162assert(dst == rsi, "expected");163assert(count == rdx, "expected");164if (UseCompressedOops) {165__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry),166src, dst, count);167} else168#endif169{170__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry),171src, dst, count);172}173174restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false);175176__ bind(done);177NOT_LP64(__ pop(thread);)178}179}180181}182183void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm,184Register obj,185Register pre_val,186Register thread,187Register tmp,188bool tosca_live,189bool expand_call) {190191if (ShenandoahSATBBarrier) {192satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call);193}194}195196void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,197Register obj,198Register pre_val,199Register thread,200Register tmp,201bool tosca_live,202bool expand_call) {203// If expand_call is true then we expand the call_VM_leaf macro204// directly to skip generating the check by205// InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.206207#ifdef _LP64208assert(thread == r15_thread, "must be");209#endif // _LP64210211Label done;212Label runtime;213214assert(pre_val != noreg, "check this code");215216if (obj != noreg) {217assert_different_registers(obj, pre_val, tmp);218assert(pre_val != rax, "check this code");219}220221Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset()));222Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));223Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));224225Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));226__ testb(gc_state, ShenandoahHeap::MARKING);227__ jcc(Assembler::zero, done);228229// Do we need to load the previous value?230if (obj != noreg) {231__ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);232}233234// Is the previous value null?235__ cmpptr(pre_val, (int32_t) NULL_WORD);236__ jcc(Assembler::equal, done);237238// Can we store original value in the thread's buffer?239// Is index == 0?240// (The index field is typed as size_t.)241242__ movptr(tmp, index); // tmp := *index_adr243__ cmpptr(tmp, 0); // tmp == 0?244__ jcc(Assembler::equal, runtime); // If yes, goto runtime245246__ subptr(tmp, wordSize); // tmp := tmp - wordSize247__ movptr(index, tmp); // *index_adr := tmp248__ addptr(tmp, buffer); // tmp := tmp + *buffer_adr249250// Record the previous value251__ movptr(Address(tmp, 0), pre_val);252__ jmp(done);253254__ bind(runtime);255// save the live input values256if(tosca_live) __ push(rax);257258if (obj != noreg && obj != rax)259__ push(obj);260261if (pre_val != rax)262__ push(pre_val);263264// Calling the runtime using the regular call_VM_leaf mechanism generates265// code (generated by InterpreterMacroAssember::call_VM_leaf_base)266// that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.267//268// If we care generating the pre-barrier without a frame (e.g. in the269// intrinsified Reference.get() routine) then ebp might be pointing to270// the caller frame and so this check will most likely fail at runtime.271//272// Expanding the call directly bypasses the generation of the check.273// So when we do not have have a full interpreter frame on the stack274// expand_call should be passed true.275276NOT_LP64( __ push(thread); )277278#ifdef _LP64279// We move pre_val into c_rarg0 early, in order to avoid smashing it, should280// pre_val be c_rarg1 (where the call prologue would copy thread argument).281// Note: this should not accidentally smash thread, because thread is always r15.282assert(thread != c_rarg0, "smashed arg");283if (c_rarg0 != pre_val) {284__ mov(c_rarg0, pre_val);285}286#endif287288if (expand_call) {289LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )290#ifdef _LP64291if (c_rarg1 != thread) {292__ mov(c_rarg1, thread);293}294// Already moved pre_val into c_rarg0 above295#else296__ push(thread);297__ push(pre_val);298#endif299__ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), 2);300} else {301__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread);302}303304NOT_LP64( __ pop(thread); )305306// save the live input values307if (pre_val != rax)308__ pop(pre_val);309310if (obj != noreg && obj != rax)311__ pop(obj);312313if(tosca_live) __ pop(rax);314315__ bind(done);316}317318void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, DecoratorSet decorators) {319assert(ShenandoahLoadRefBarrier, "Should be enabled");320321bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);322bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);323bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);324bool is_native = ShenandoahBarrierSet::is_native_access(decorators);325bool is_narrow = UseCompressedOops && !is_native;326327Label heap_stable, not_cset;328329__ block_comment("load_reference_barrier { ");330331// Check if GC is active332#ifdef _LP64333Register thread = r15_thread;334#else335Register thread = rcx;336if (thread == dst) {337thread = rbx;338}339__ push(thread);340__ get_thread(thread);341#endif342343Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));344int flags = ShenandoahHeap::HAS_FORWARDED;345if (!is_strong) {346flags |= ShenandoahHeap::WEAK_ROOTS;347}348__ testb(gc_state, flags);349__ jcc(Assembler::zero, heap_stable);350351Register tmp1 = noreg, tmp2 = noreg;352if (is_strong) {353// Test for object in cset354// Allocate temporary registers355for (int i = 0; i < 8; i++) {356Register r = as_Register(i);357if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) {358if (tmp1 == noreg) {359tmp1 = r;360} else {361tmp2 = r;362break;363}364}365}366assert(tmp1 != noreg, "tmp1 allocated");367assert(tmp2 != noreg, "tmp2 allocated");368assert_different_registers(tmp1, tmp2, src.base(), src.index());369assert_different_registers(tmp1, tmp2, dst);370371__ push(tmp1);372__ push(tmp2);373374// Optimized cset-test375__ movptr(tmp1, dst);376__ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());377__ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());378__ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));379__ testbool(tmp1);380__ jcc(Assembler::zero, not_cset);381}382383save_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);384385// The rest is saved with the optimized path386387uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4);388__ subptr(rsp, num_saved_regs * wordSize);389uint slot = num_saved_regs;390if (dst != rax) {391__ movptr(Address(rsp, (--slot) * wordSize), rax);392}393__ movptr(Address(rsp, (--slot) * wordSize), rcx);394__ movptr(Address(rsp, (--slot) * wordSize), rdx);395__ movptr(Address(rsp, (--slot) * wordSize), rdi);396__ movptr(Address(rsp, (--slot) * wordSize), rsi);397#ifdef _LP64398__ movptr(Address(rsp, (--slot) * wordSize), r8);399__ movptr(Address(rsp, (--slot) * wordSize), r9);400__ movptr(Address(rsp, (--slot) * wordSize), r10);401__ movptr(Address(rsp, (--slot) * wordSize), r11);402// r12-r15 are callee saved in all calling conventions403#endif404assert(slot == 0, "must use all slots");405406// Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.407#ifdef _LP64408Register arg0 = c_rarg0, arg1 = c_rarg1;409#else410Register arg0 = rdi, arg1 = rsi;411#endif412if (dst == arg1) {413__ lea(arg0, src);414__ xchgptr(arg1, arg0);415} else {416__ lea(arg1, src);417__ movptr(arg0, dst);418}419420if (is_strong) {421if (is_narrow) {422__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), arg0, arg1);423} else {424__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), arg0, arg1);425}426} else if (is_weak) {427if (is_narrow) {428__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), arg0, arg1);429} else {430__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1);431}432} else {433assert(is_phantom, "only remaining strength");434assert(!is_narrow, "phantom access cannot be narrow");435__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), arg0, arg1);436}437438#ifdef _LP64439__ movptr(r11, Address(rsp, (slot++) * wordSize));440__ movptr(r10, Address(rsp, (slot++) * wordSize));441__ movptr(r9, Address(rsp, (slot++) * wordSize));442__ movptr(r8, Address(rsp, (slot++) * wordSize));443#endif444__ movptr(rsi, Address(rsp, (slot++) * wordSize));445__ movptr(rdi, Address(rsp, (slot++) * wordSize));446__ movptr(rdx, Address(rsp, (slot++) * wordSize));447__ movptr(rcx, Address(rsp, (slot++) * wordSize));448449if (dst != rax) {450__ movptr(dst, rax);451__ movptr(rax, Address(rsp, (slot++) * wordSize));452}453454assert(slot == num_saved_regs, "must use all slots");455__ addptr(rsp, num_saved_regs * wordSize);456457restore_machine_state(masm, /* handle_gpr = */ false, /* handle_fp = */ true);458459__ bind(not_cset);460461if (is_strong) {462__ pop(tmp2);463__ pop(tmp1);464}465466__ bind(heap_stable);467468__ block_comment("} load_reference_barrier");469470#ifndef _LP64471__ pop(thread);472#endif473}474475void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {476if (ShenandoahIUBarrier) {477iu_barrier_impl(masm, dst, tmp);478}479}480481void ShenandoahBarrierSetAssembler::iu_barrier_impl(MacroAssembler* masm, Register dst, Register tmp) {482assert(ShenandoahIUBarrier, "should be enabled");483484if (dst == noreg) return;485486if (ShenandoahIUBarrier) {487save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);488489#ifdef _LP64490Register thread = r15_thread;491#else492Register thread = rcx;493if (thread == dst || thread == tmp) {494thread = rdi;495}496if (thread == dst || thread == tmp) {497thread = rbx;498}499__ get_thread(thread);500#endif501assert_different_registers(dst, tmp, thread);502503satb_write_barrier_pre(masm, noreg, dst, thread, tmp, true, false);504505restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);506}507}508509//510// Arguments:511//512// Inputs:513// src: oop location, might be clobbered514// tmp1: scratch register, might not be valid.515//516// Output:517// dst: oop loaded from src location518//519// Kill:520// tmp1 (if it is valid)521//522void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,523Register dst, Address src, Register tmp1, Register tmp_thread) {524// 1: non-reference load, no additional barrier is needed525if (!is_reference_type(type)) {526BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);527return;528}529530assert((decorators & ON_UNKNOWN_OOP_REF) == 0, "Not expected");531532// 2: load a reference from src location and apply LRB if needed533if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {534Register result_dst = dst;535bool use_tmp1_for_dst = false;536537// Preserve src location for LRB538if (dst == src.base() || dst == src.index()) {539// Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()540if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {541dst = tmp1;542use_tmp1_for_dst = true;543} else {544dst = rdi;545__ push(dst);546}547assert_different_registers(dst, src.base(), src.index());548}549550BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);551552load_reference_barrier(masm, dst, src, decorators);553554// Move loaded oop to final destination555if (dst != result_dst) {556__ movptr(result_dst, dst);557558if (!use_tmp1_for_dst) {559__ pop(dst);560}561562dst = result_dst;563}564} else {565BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);566}567568// 3: apply keep-alive barrier if needed569if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) {570save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);571572Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread);573assert_different_registers(dst, tmp1, tmp_thread);574if (!thread->is_valid()) {575thread = rdx;576}577NOT_LP64(__ get_thread(thread));578// Generate the SATB pre-barrier code to log the value of579// the referent field in an SATB buffer.580shenandoah_write_barrier_pre(masm /* masm */,581noreg /* obj */,582dst /* pre_val */,583thread /* thread */,584tmp1 /* tmp */,585true /* tosca_live */,586true /* expand_call */);587588restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true);589}590}591592void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,593Address dst, Register val, Register tmp1, Register tmp2) {594595bool on_oop = is_reference_type(type);596bool in_heap = (decorators & IN_HEAP) != 0;597bool as_normal = (decorators & AS_NORMAL) != 0;598if (on_oop && in_heap) {599bool needs_pre_barrier = as_normal;600601Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi);602Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);603// flatten object address if needed604// We do it regardless of precise because we need the registers605if (dst.index() == noreg && dst.disp() == 0) {606if (dst.base() != tmp1) {607__ movptr(tmp1, dst.base());608}609} else {610__ lea(tmp1, dst);611}612613assert_different_registers(val, tmp1, tmp2, tmp3, rthread);614615#ifndef _LP64616__ get_thread(rthread);617InterpreterMacroAssembler *imasm = static_cast<InterpreterMacroAssembler*>(masm);618imasm->save_bcp();619#endif620621if (needs_pre_barrier) {622shenandoah_write_barrier_pre(masm /*masm*/,623tmp1 /* obj */,624tmp2 /* pre_val */,625rthread /* thread */,626tmp3 /* tmp */,627val != noreg /* tosca_live */,628false /* expand_call */);629}630if (val == noreg) {631BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);632} else {633iu_barrier(masm, val, tmp3);634BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);635}636NOT_LP64(imasm->restore_bcp());637} else {638BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);639}640}641642void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,643Register obj, Register tmp, Label& slowpath) {644Label done;645// Resolve jobject646BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);647648// Check for null.649__ testptr(obj, obj);650__ jcc(Assembler::zero, done);651652Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset());653__ testb(gc_state, ShenandoahHeap::EVACUATION);654__ jccb(Assembler::notZero, slowpath);655__ bind(done);656}657658// Special Shenandoah CAS implementation that handles false negatives659// due to concurrent evacuation.660void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,661Register res, Address addr, Register oldval, Register newval,662bool exchange, Register tmp1, Register tmp2) {663assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");664assert(oldval == rax, "must be in rax for implicit use in cmpxchg");665assert_different_registers(oldval, tmp1, tmp2);666assert_different_registers(newval, tmp1, tmp2);667668Label L_success, L_failure;669670// Remember oldval for retry logic below671#ifdef _LP64672if (UseCompressedOops) {673__ movl(tmp1, oldval);674} else675#endif676{677__ movptr(tmp1, oldval);678}679680// Step 1. Fast-path.681//682// Try to CAS with given arguments. If successful, then we are done.683684if (os::is_MP()) __ lock();685#ifdef _LP64686if (UseCompressedOops) {687__ cmpxchgl(newval, addr);688} else689#endif690{691__ cmpxchgptr(newval, addr);692}693__ jcc(Assembler::equal, L_success);694695// Step 2. CAS had failed. This may be a false negative.696//697// The trouble comes when we compare the to-space pointer with the from-space698// pointer to the same object. To resolve this, it will suffice to resolve699// the value from memory -- this will give both to-space pointers.700// If they mismatch, then it was a legitimate failure.701//702// Before reaching to resolve sequence, see if we can avoid the whole shebang703// with filters.704705// Filter: when offending in-memory value is NULL, the failure is definitely legitimate706__ testptr(oldval, oldval);707__ jcc(Assembler::zero, L_failure);708709// Filter: when heap is stable, the failure is definitely legitimate710#ifdef _LP64711const Register thread = r15_thread;712#else713const Register thread = tmp2;714__ get_thread(thread);715#endif716Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));717__ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);718__ jcc(Assembler::zero, L_failure);719720#ifdef _LP64721if (UseCompressedOops) {722__ movl(tmp2, oldval);723__ decode_heap_oop(tmp2);724} else725#endif726{727__ movptr(tmp2, oldval);728}729730// Decode offending in-memory value.731// Test if-forwarded732__ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markWord::marked_value);733__ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded734__ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded735736// Load and mask forwarding pointer737__ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes()));738__ shrptr(tmp2, 2);739__ shlptr(tmp2, 2);740741#ifdef _LP64742if (UseCompressedOops) {743__ decode_heap_oop(tmp1); // decode for comparison744}745#endif746747// Now we have the forwarded offender in tmp2.748// Compare and if they don't match, we have legitimate failure749__ cmpptr(tmp1, tmp2);750__ jcc(Assembler::notEqual, L_failure);751752// Step 3. Need to fix the memory ptr before continuing.753//754// At this point, we have from-space oldval in the register, and its to-space755// address is in tmp2. Let's try to update it into memory. We don't care if it756// succeeds or not. If it does, then the retrying CAS would see it and succeed.757// If this fixup fails, this means somebody else beat us to it, and necessarily758// with to-space ptr store. We still have to do the retry, because the GC might759// have updated the reference for us.760761#ifdef _LP64762if (UseCompressedOops) {763__ encode_heap_oop(tmp2); // previously decoded at step 2.764}765#endif766767if (os::is_MP()) __ lock();768#ifdef _LP64769if (UseCompressedOops) {770__ cmpxchgl(tmp2, addr);771} else772#endif773{774__ cmpxchgptr(tmp2, addr);775}776777// Step 4. Try to CAS again.778//779// This is guaranteed not to have false negatives, because oldval is definitely780// to-space, and memory pointer is to-space as well. Nothing is able to store781// from-space ptr into memory anymore. Make sure oldval is restored, after being782// garbled during retries.783//784#ifdef _LP64785if (UseCompressedOops) {786__ movl(oldval, tmp2);787} else788#endif789{790__ movptr(oldval, tmp2);791}792793if (os::is_MP()) __ lock();794#ifdef _LP64795if (UseCompressedOops) {796__ cmpxchgl(newval, addr);797} else798#endif799{800__ cmpxchgptr(newval, addr);801}802if (!exchange) {803__ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump804}805806// Step 5. If we need a boolean result out of CAS, set the flag appropriately.807// and promote the result. Note that we handle the flag from both the 1st and 2nd CAS.808// Otherwise, failure witness for CAE is in oldval on all paths, and we can return.809810if (exchange) {811__ bind(L_failure);812__ bind(L_success);813} else {814assert(res != NULL, "need result register");815816Label exit;817__ bind(L_failure);818__ xorptr(res, res);819__ jmpb(exit);820821__ bind(L_success);822__ movptr(res, 1);823__ bind(exit);824}825}826827#undef __828829#ifdef COMPILER1830831#define __ ce->masm()->832833void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) {834ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();835// At this point we know that marking is in progress.836// If do_load() is true then we have to emit the837// load of the previous value; otherwise it has already838// been loaded into _pre_val.839840__ bind(*stub->entry());841assert(stub->pre_val()->is_register(), "Precondition.");842843Register pre_val_reg = stub->pre_val()->as_register();844845if (stub->do_load()) {846ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);847}848849__ cmpptr(pre_val_reg, (int32_t)NULL_WORD);850__ jcc(Assembler::equal, *stub->continuation());851ce->store_parameter(stub->pre_val()->as_register(), 0);852__ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));853__ jmp(*stub->continuation());854855}856857void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub) {858ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();859__ bind(*stub->entry());860861DecoratorSet decorators = stub->decorators();862bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);863bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);864bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);865bool is_native = ShenandoahBarrierSet::is_native_access(decorators);866867Register obj = stub->obj()->as_register();868Register res = stub->result()->as_register();869Register addr = stub->addr()->as_pointer_register();870Register tmp1 = stub->tmp1()->as_register();871Register tmp2 = stub->tmp2()->as_register();872assert_different_registers(obj, res, addr, tmp1, tmp2);873874Label slow_path;875876assert(res == rax, "result must arrive in rax");877878if (res != obj) {879__ mov(res, obj);880}881882if (is_strong) {883// Check for object being in the collection set.884__ mov(tmp1, res);885__ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());886__ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());887#ifdef _LP64888__ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));889__ testbool(tmp2);890#else891// On x86_32, C1 register allocator can give us the register without 8-bit support.892// Do the full-register access and test to avoid compilation failures.893__ movptr(tmp2, Address(tmp2, tmp1, Address::times_1));894__ testptr(tmp2, 0xFF);895#endif896__ jcc(Assembler::zero, *stub->continuation());897}898899__ bind(slow_path);900ce->store_parameter(res, 0);901ce->store_parameter(addr, 1);902if (is_strong) {903if (is_native) {904__ call(RuntimeAddress(bs->load_reference_barrier_strong_native_rt_code_blob()->code_begin()));905} else {906__ call(RuntimeAddress(bs->load_reference_barrier_strong_rt_code_blob()->code_begin()));907}908} else if (is_weak) {909__ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));910} else {911assert(is_phantom, "only remaining strength");912__ call(RuntimeAddress(bs->load_reference_barrier_phantom_rt_code_blob()->code_begin()));913}914__ jmp(*stub->continuation());915}916917#undef __918919#define __ sasm->920921void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {922__ prologue("shenandoah_pre_barrier", false);923// arg0 : previous value of memory924925__ push(rax);926__ push(rdx);927928const Register pre_val = rax;929const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);930const Register tmp = rdx;931932NOT_LP64(__ get_thread(thread);)933934Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset()));935Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));936937Label done;938Label runtime;939940// Is SATB still active?941Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));942__ testb(gc_state, ShenandoahHeap::MARKING);943__ jcc(Assembler::zero, done);944945// Can we store original value in the thread's buffer?946947__ movptr(tmp, queue_index);948__ testptr(tmp, tmp);949__ jcc(Assembler::zero, runtime);950__ subptr(tmp, wordSize);951__ movptr(queue_index, tmp);952__ addptr(tmp, buffer);953954// prev_val (rax)955__ load_parameter(0, pre_val);956__ movptr(Address(tmp, 0), pre_val);957__ jmp(done);958959__ bind(runtime);960961__ save_live_registers_no_oop_map(true);962963// load the pre-value964__ load_parameter(0, rcx);965__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), rcx, thread);966967__ restore_live_registers(true);968969__ bind(done);970971__ pop(rdx);972__ pop(rax);973974__ epilogue();975}976977void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, DecoratorSet decorators) {978__ prologue("shenandoah_load_reference_barrier", false);979// arg0 : object to be resolved980981__ save_live_registers_no_oop_map(true);982983bool is_strong = ShenandoahBarrierSet::is_strong_access(decorators);984bool is_weak = ShenandoahBarrierSet::is_weak_access(decorators);985bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators);986bool is_native = ShenandoahBarrierSet::is_native_access(decorators);987988#ifdef _LP64989__ load_parameter(0, c_rarg0);990__ load_parameter(1, c_rarg1);991if (is_strong) {992if (is_native) {993__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);994} else {995if (UseCompressedOops) {996__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong_narrow), c_rarg0, c_rarg1);997} else {998__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), c_rarg0, c_rarg1);999}1000}1001} else if (is_weak) {1002assert(!is_native, "weak must not be called off-heap");1003if (UseCompressedOops) {1004__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);1005} else {1006__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);1007}1008} else {1009assert(is_phantom, "only remaining strength");1010assert(is_native, "phantom must only be called off-heap");1011__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1);1012}1013#else1014__ load_parameter(0, rax);1015__ load_parameter(1, rbx);1016if (is_strong) {1017__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), rax, rbx);1018} else if (is_weak) {1019__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), rax, rbx);1020} else {1021assert(is_phantom, "only remaining strength");1022__ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), rax, rbx);1023}1024#endif10251026__ restore_live_registers_except_rax(true);10271028__ epilogue();1029}10301031#undef __10321033#endif // COMPILER1103410351036