Path: blob/master/src/hotspot/cpu/x86/assembler_x86.hpp
41144 views
/*1* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#ifndef CPU_X86_ASSEMBLER_X86_HPP25#define CPU_X86_ASSEMBLER_X86_HPP2627#include "asm/register.hpp"28#include "utilities/powerOfTwo.hpp"2930class BiasedLockingCounters;3132// Contains all the definitions needed for x86 assembly code generation.3334// Calling convention35class Argument {36public:37enum {38#ifdef _LP6439#ifdef _WIN6440n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)41n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )42n_int_register_returns_c = 1, // rax43n_float_register_returns_c = 1, // xmm044#else45n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)46n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )47n_int_register_returns_c = 2, // rax, rdx48n_float_register_returns_c = 2, // xmm0, xmm149#endif // _WIN6450n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...51n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...52#else53n_register_parameters = 0 // 0 registers used to pass arguments54#endif // _LP6455};56};575859#ifdef _LP6460// Symbolically name the register arguments used by the c calling convention.61// Windows is different from linux/solaris. So much for standards...6263#ifdef _WIN646465REGISTER_DECLARATION(Register, c_rarg0, rcx);66REGISTER_DECLARATION(Register, c_rarg1, rdx);67REGISTER_DECLARATION(Register, c_rarg2, r8);68REGISTER_DECLARATION(Register, c_rarg3, r9);6970REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);71REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);72REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);73REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);7475#else7677REGISTER_DECLARATION(Register, c_rarg0, rdi);78REGISTER_DECLARATION(Register, c_rarg1, rsi);79REGISTER_DECLARATION(Register, c_rarg2, rdx);80REGISTER_DECLARATION(Register, c_rarg3, rcx);81REGISTER_DECLARATION(Register, c_rarg4, r8);82REGISTER_DECLARATION(Register, c_rarg5, r9);8384REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);85REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);86REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);87REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);88REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);89REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);90REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);91REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);9293#endif // _WIN649495// Symbolically name the register arguments used by the Java calling convention.96// We have control over the convention for java so we can do what we please.97// What pleases us is to offset the java calling convention so that when98// we call a suitable jni method the arguments are lined up and we don't99// have to do little shuffling. A suitable jni method is non-static and a100// small number of arguments (two fewer args on windows)101//102// |-------------------------------------------------------|103// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |104// |-------------------------------------------------------|105// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)106// | rdi rsi rdx rcx r8 r9 | solaris/linux107// |-------------------------------------------------------|108// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |109// |-------------------------------------------------------|110111REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);112REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);113REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);114// Windows runs out of register args here115#ifdef _WIN64116REGISTER_DECLARATION(Register, j_rarg3, rdi);117REGISTER_DECLARATION(Register, j_rarg4, rsi);118#else119REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);120REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);121#endif /* _WIN64 */122REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);123124REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);125REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);126REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);127REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);128REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);129REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);130REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);131REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);132133REGISTER_DECLARATION(Register, rscratch1, r10); // volatile134REGISTER_DECLARATION(Register, rscratch2, r11); // volatile135136REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved137REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved138139#else140// rscratch1 will apear in 32bit code that is dead but of course must compile141// Using noreg ensures if the dead code is incorrectly live and executed it142// will cause an assertion failure143#define rscratch1 noreg144#define rscratch2 noreg145146#endif // _LP64147148// JSR 292149// On x86, the SP does not have to be saved when invoking method handle intrinsics150// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.151REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);152153// Address is an abstraction used to represent a memory location154// using any of the amd64 addressing modes with one object.155//156// Note: A register location is represented via a Register, not157// via an address for efficiency & simplicity reasons.158159class ArrayAddress;160161class Address {162public:163enum ScaleFactor {164no_scale = -1,165times_1 = 0,166times_2 = 1,167times_4 = 2,168times_8 = 3,169times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)170};171static ScaleFactor times(int size) {172assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");173if (size == 8) return times_8;174if (size == 4) return times_4;175if (size == 2) return times_2;176return times_1;177}178static int scale_size(ScaleFactor scale) {179assert(scale != no_scale, "");180assert(((1 << (int)times_1) == 1 &&181(1 << (int)times_2) == 2 &&182(1 << (int)times_4) == 4 &&183(1 << (int)times_8) == 8), "");184return (1 << (int)scale);185}186187private:188Register _base;189Register _index;190XMMRegister _xmmindex;191ScaleFactor _scale;192int _disp;193bool _isxmmindex;194RelocationHolder _rspec;195196// Easily misused constructors make them private197// %%% can we make these go away?198NOT_LP64(Address(address loc, RelocationHolder spec);)199Address(int disp, address loc, relocInfo::relocType rtype);200Address(int disp, address loc, RelocationHolder spec);201202public:203204int disp() { return _disp; }205// creation206Address()207: _base(noreg),208_index(noreg),209_xmmindex(xnoreg),210_scale(no_scale),211_disp(0),212_isxmmindex(false){213}214215// No default displacement otherwise Register can be implicitly216// converted to 0(Register) which is quite a different animal.217218Address(Register base, int disp)219: _base(base),220_index(noreg),221_xmmindex(xnoreg),222_scale(no_scale),223_disp(disp),224_isxmmindex(false){225}226227Address(Register base, Register index, ScaleFactor scale, int disp = 0)228: _base (base),229_index(index),230_xmmindex(xnoreg),231_scale(scale),232_disp (disp),233_isxmmindex(false) {234assert(!index->is_valid() == (scale == Address::no_scale),235"inconsistent address");236}237238Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)239: _base (base),240_index(index.register_or_noreg()),241_xmmindex(xnoreg),242_scale(scale),243_disp (disp + (index.constant_or_zero() * scale_size(scale))),244_isxmmindex(false){245if (!index.is_register()) scale = Address::no_scale;246assert(!_index->is_valid() == (scale == Address::no_scale),247"inconsistent address");248}249250Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)251: _base (base),252_index(noreg),253_xmmindex(index),254_scale(scale),255_disp(disp),256_isxmmindex(true) {257assert(!index->is_valid() == (scale == Address::no_scale),258"inconsistent address");259}260261// The following overloads are used in connection with the262// ByteSize type (see sizes.hpp). They simplify the use of263// ByteSize'd arguments in assembly code.264265Address(Register base, ByteSize disp)266: Address(base, in_bytes(disp)) {}267268Address(Register base, Register index, ScaleFactor scale, ByteSize disp)269: Address(base, index, scale, in_bytes(disp)) {}270271Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)272: Address(base, index, scale, in_bytes(disp)) {}273274Address plus_disp(int disp) const {275Address a = (*this);276a._disp += disp;277return a;278}279Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {280Address a = (*this);281a._disp += disp.constant_or_zero() * scale_size(scale);282if (disp.is_register()) {283assert(!a.index()->is_valid(), "competing indexes");284a._index = disp.as_register();285a._scale = scale;286}287return a;288}289bool is_same_address(Address a) const {290// disregard _rspec291return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;292}293294// accessors295bool uses(Register reg) const { return _base == reg || _index == reg; }296Register base() const { return _base; }297Register index() const { return _index; }298XMMRegister xmmindex() const { return _xmmindex; }299ScaleFactor scale() const { return _scale; }300int disp() const { return _disp; }301bool isxmmindex() const { return _isxmmindex; }302303// Convert the raw encoding form into the form expected by the constructor for304// Address. An index of 4 (rsp) corresponds to having no index, so convert305// that to noreg for the Address constructor.306static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);307308static Address make_array(ArrayAddress);309310private:311bool base_needs_rex() const {312return _base->is_valid() && _base->encoding() >= 8;313}314315bool index_needs_rex() const {316return _index->is_valid() &&_index->encoding() >= 8;317}318319bool xmmindex_needs_rex() const {320return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;321}322323relocInfo::relocType reloc() const { return _rspec.type(); }324325friend class Assembler;326friend class MacroAssembler;327friend class LIR_Assembler; // base/index/scale/disp328};329330//331// AddressLiteral has been split out from Address because operands of this type332// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out333// the few instructions that need to deal with address literals are unique and the334// MacroAssembler does not have to implement every instruction in the Assembler335// in order to search for address literals that may need special handling depending336// on the instruction and the platform. As small step on the way to merging i486/amd64337// directories.338//339class AddressLiteral {340friend class ArrayAddress;341RelocationHolder _rspec;342// Typically we use AddressLiterals we want to use their rval343// However in some situations we want the lval (effect address) of the item.344// We provide a special factory for making those lvals.345bool _is_lval;346347// If the target is far we'll need to load the ea of this to348// a register to reach it. Otherwise if near we can do rip349// relative addressing.350351address _target;352353protected:354// creation355AddressLiteral()356: _is_lval(false),357_target(NULL)358{}359360public:361362363AddressLiteral(address target, relocInfo::relocType rtype);364365AddressLiteral(address target, RelocationHolder const& rspec)366: _rspec(rspec),367_is_lval(false),368_target(target)369{}370371AddressLiteral addr() {372AddressLiteral ret = *this;373ret._is_lval = true;374return ret;375}376377378private:379380address target() { return _target; }381bool is_lval() { return _is_lval; }382383relocInfo::relocType reloc() const { return _rspec.type(); }384const RelocationHolder& rspec() const { return _rspec; }385386friend class Assembler;387friend class MacroAssembler;388friend class Address;389friend class LIR_Assembler;390};391392// Convience classes393class RuntimeAddress: public AddressLiteral {394395public:396397RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}398399};400401class ExternalAddress: public AddressLiteral {402private:403static relocInfo::relocType reloc_for_target(address target) {404// Sometimes ExternalAddress is used for values which aren't405// exactly addresses, like the card table base.406// external_word_type can't be used for values in the first page407// so just skip the reloc in that case.408return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;409}410411public:412413ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}414415};416417class InternalAddress: public AddressLiteral {418419public:420421InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}422423};424425// x86 can do array addressing as a single operation since disp can be an absolute426// address amd64 can't. We create a class that expresses the concept but does extra427// magic on amd64 to get the final result428429class ArrayAddress {430private:431432AddressLiteral _base;433Address _index;434435public:436437ArrayAddress() {};438ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};439AddressLiteral base() { return _base; }440Address index() { return _index; }441442};443444class InstructionAttr;445446// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes447// See fxsave and xsave(EVEX enabled) documentation for layout448const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);449450// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction451// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write452// is what you get. The Assembler is generating code into a CodeBuffer.453454class Assembler : public AbstractAssembler {455friend class AbstractAssembler; // for the non-virtual hack456friend class LIR_Assembler; // as_Address()457friend class StubGenerator;458459public:460enum Condition { // The x86 condition codes used for conditional jumps/moves.461zero = 0x4,462notZero = 0x5,463equal = 0x4,464notEqual = 0x5,465less = 0xc,466lessEqual = 0xe,467greater = 0xf,468greaterEqual = 0xd,469below = 0x2,470belowEqual = 0x6,471above = 0x7,472aboveEqual = 0x3,473overflow = 0x0,474noOverflow = 0x1,475carrySet = 0x2,476carryClear = 0x3,477negative = 0x8,478positive = 0x9,479parity = 0xa,480noParity = 0xb481};482483enum Prefix {484// segment overrides485CS_segment = 0x2e,486SS_segment = 0x36,487DS_segment = 0x3e,488ES_segment = 0x26,489FS_segment = 0x64,490GS_segment = 0x65,491492REX = 0x40,493494REX_B = 0x41,495REX_X = 0x42,496REX_XB = 0x43,497REX_R = 0x44,498REX_RB = 0x45,499REX_RX = 0x46,500REX_RXB = 0x47,501502REX_W = 0x48,503504REX_WB = 0x49,505REX_WX = 0x4A,506REX_WXB = 0x4B,507REX_WR = 0x4C,508REX_WRB = 0x4D,509REX_WRX = 0x4E,510REX_WRXB = 0x4F,511512VEX_3bytes = 0xC4,513VEX_2bytes = 0xC5,514EVEX_4bytes = 0x62,515Prefix_EMPTY = 0x0516};517518enum VexPrefix {519VEX_B = 0x20,520VEX_X = 0x40,521VEX_R = 0x80,522VEX_W = 0x80523};524525enum ExexPrefix {526EVEX_F = 0x04,527EVEX_V = 0x08,528EVEX_Rb = 0x10,529EVEX_X = 0x40,530EVEX_Z = 0x80531};532533enum VexSimdPrefix {534VEX_SIMD_NONE = 0x0,535VEX_SIMD_66 = 0x1,536VEX_SIMD_F3 = 0x2,537VEX_SIMD_F2 = 0x3538};539540enum VexOpcode {541VEX_OPCODE_NONE = 0x0,542VEX_OPCODE_0F = 0x1,543VEX_OPCODE_0F_38 = 0x2,544VEX_OPCODE_0F_3A = 0x3,545VEX_OPCODE_MASK = 0x1F546};547548enum AvxVectorLen {549AVX_128bit = 0x0,550AVX_256bit = 0x1,551AVX_512bit = 0x2,552AVX_NoVec = 0x4553};554555enum EvexTupleType {556EVEX_FV = 0,557EVEX_HV = 4,558EVEX_FVM = 6,559EVEX_T1S = 7,560EVEX_T1F = 11,561EVEX_T2 = 13,562EVEX_T4 = 15,563EVEX_T8 = 17,564EVEX_HVM = 18,565EVEX_QVM = 19,566EVEX_OVM = 20,567EVEX_M128 = 21,568EVEX_DUP = 22,569EVEX_ETUP = 23570};571572enum EvexInputSizeInBits {573EVEX_8bit = 0,574EVEX_16bit = 1,575EVEX_32bit = 2,576EVEX_64bit = 3,577EVEX_NObit = 4578};579580enum WhichOperand {581// input to locate_operand, and format code for relocations582imm_operand = 0, // embedded 32-bit|64-bit immediate operand583disp32_operand = 1, // embedded 32-bit displacement or address584call32_operand = 2, // embedded 32-bit self-relative displacement585#ifndef _LP64586_WhichOperand_limit = 3587#else588narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop589_WhichOperand_limit = 4590#endif591};592593// Comparison predicates for integral types & FP types when using SSE594enum ComparisonPredicate {595eq = 0,596lt = 1,597le = 2,598_false = 3,599neq = 4,600nlt = 5,601nle = 6,602_true = 7603};604605// Comparison predicates for FP types when using AVX606// O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.607// S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.608enum ComparisonPredicateFP {609EQ_OQ = 0,610LT_OS = 1,611LE_OS = 2,612UNORD_Q = 3,613NEQ_UQ = 4,614NLT_US = 5,615NLE_US = 6,616ORD_Q = 7,617EQ_UQ = 8,618NGE_US = 9,619NGT_US = 0xA,620FALSE_OQ = 0XB,621NEQ_OQ = 0xC,622GE_OS = 0xD,623GT_OS = 0xE,624TRUE_UQ = 0xF,625EQ_OS = 0x10,626LT_OQ = 0x11,627LE_OQ = 0x12,628UNORD_S = 0x13,629NEQ_US = 0x14,630NLT_UQ = 0x15,631NLE_UQ = 0x16,632ORD_S = 0x17,633EQ_US = 0x18,634NGE_UQ = 0x19,635NGT_UQ = 0x1A,636FALSE_OS = 0x1B,637NEQ_OS = 0x1C,638GE_OQ = 0x1D,639GT_OQ = 0x1E,640TRUE_US =0x1F641};642643enum Width {644B = 0,645W = 1,646D = 2,647Q = 3648};649650//---< calculate length of instruction >---651// As instruction size can't be found out easily on x86/x64,652// we just use '4' for len and maxlen.653// instruction must start at passed address654static unsigned int instr_len(unsigned char *instr) { return 4; }655656//---< longest instructions >---657// Max instruction length is not specified in architecture documentation.658// We could use a "safe enough" estimate (15), but just default to659// instruction length guess from above.660static unsigned int instr_maxlen() { return 4; }661662// NOTE: The general philopsophy of the declarations here is that 64bit versions663// of instructions are freely declared without the need for wrapping them an ifdef.664// (Some dangerous instructions are ifdef's out of inappropriate jvm's.)665// In the .cpp file the implementations are wrapped so that they are dropped out666// of the resulting jvm. This is done mostly to keep the footprint of MINIMAL667// to the size it was prior to merging up the 32bit and 64bit assemblers.668//669// This does mean you'll get a linker/runtime error if you use a 64bit only instruction670// in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.671672private:673674bool _legacy_mode_bw;675bool _legacy_mode_dq;676bool _legacy_mode_vl;677bool _legacy_mode_vlbw;678NOT_LP64(bool _is_managed;)679680class InstructionAttr *_attributes;681682// 64bit prefixes683void prefix(Register reg);684void prefix(Register dst, Register src, Prefix p);685void prefix(Register dst, Address adr, Prefix p);686687void prefix(Address adr);688void prefix(Address adr, Register reg, bool byteinst = false);689void prefix(Address adr, XMMRegister reg);690691int prefix_and_encode(int reg_enc, bool byteinst = false);692int prefix_and_encode(int dst_enc, int src_enc) {693return prefix_and_encode(dst_enc, false, src_enc, false);694}695int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);696697// Some prefixq variants always emit exactly one prefix byte, so besides a698// prefix-emitting method we provide a method to get the prefix byte to emit,699// which can then be folded into a byte stream.700int8_t get_prefixq(Address adr);701int8_t get_prefixq(Address adr, Register reg);702703void prefixq(Address adr);704void prefixq(Address adr, Register reg);705void prefixq(Address adr, XMMRegister reg);706707int prefixq_and_encode(int reg_enc);708int prefixq_and_encode(int dst_enc, int src_enc);709710void rex_prefix(Address adr, XMMRegister xreg,711VexSimdPrefix pre, VexOpcode opc, bool rex_w);712int rex_prefix_and_encode(int dst_enc, int src_enc,713VexSimdPrefix pre, VexOpcode opc, bool rex_w);714715void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);716717void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,718int nds_enc, VexSimdPrefix pre, VexOpcode opc);719720void vex_prefix(Address adr, int nds_enc, int xreg_enc,721VexSimdPrefix pre, VexOpcode opc,722InstructionAttr *attributes);723724int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,725VexSimdPrefix pre, VexOpcode opc,726InstructionAttr *attributes);727728void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,729VexOpcode opc, InstructionAttr *attributes);730731int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,732VexOpcode opc, InstructionAttr *attributes);733734// Helper functions for groups of instructions735void emit_arith_b(int op1, int op2, Register dst, int imm8);736737void emit_arith(int op1, int op2, Register dst, int32_t imm32);738// Force generation of a 4 byte immediate value even if it fits into 8bit739void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);740void emit_arith(int op1, int op2, Register dst, Register src);741742bool emit_compressed_disp_byte(int &disp);743744void emit_modrm(int mod, int dst_enc, int src_enc);745void emit_modrm_disp8(int mod, int dst_enc, int src_enc,746int disp);747void emit_modrm_sib(int mod, int dst_enc, int src_enc,748Address::ScaleFactor scale, int index_enc, int base_enc);749void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,750Address::ScaleFactor scale, int index_enc, int base_enc,751int disp);752753void emit_operand_helper(int reg_enc,754int base_enc, int index_enc, Address::ScaleFactor scale,755int disp,756RelocationHolder const& rspec,757int rip_relative_correction = 0);758759void emit_operand(Register reg,760Register base, Register index, Address::ScaleFactor scale,761int disp,762RelocationHolder const& rspec,763int rip_relative_correction = 0);764765void emit_operand(Register reg,766Register base, XMMRegister index, Address::ScaleFactor scale,767int disp,768RelocationHolder const& rspec);769770void emit_operand(XMMRegister xreg,771Register base, XMMRegister xindex, Address::ScaleFactor scale,772int disp,773RelocationHolder const& rspec);774775void emit_operand(Register reg, Address adr,776int rip_relative_correction = 0);777778void emit_operand(XMMRegister reg,779Register base, Register index, Address::ScaleFactor scale,780int disp,781RelocationHolder const& rspec);782783void emit_operand(XMMRegister reg, Address adr);784785// Immediate-to-memory forms786void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);787788protected:789#ifdef ASSERT790void check_relocation(RelocationHolder const& rspec, int format);791#endif792793void emit_data(jint data, relocInfo::relocType rtype, int format);794void emit_data(jint data, RelocationHolder const& rspec, int format);795void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);796void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);797798bool reachable(AddressLiteral adr) NOT_LP64({ return true;});799800// These are all easily abused and hence protected801802// 32BIT ONLY SECTION803#ifndef _LP64804// Make these disappear in 64bit mode since they would never be correct805void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY806void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY807808void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY809void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY810811void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY812#else813// 64BIT ONLY SECTION814void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY815816void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);817void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);818819void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);820void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);821#endif // _LP64822823// These are unique in that we are ensured by the caller that the 32bit824// relative in these instructions will always be able to reach the potentially825// 64bit address described by entry. Since they can take a 64bit address they826// don't have the 32 suffix like the other instructions in this class.827828void call_literal(address entry, RelocationHolder const& rspec);829void jmp_literal(address entry, RelocationHolder const& rspec);830831// Avoid using directly section832// Instructions in this section are actually usable by anyone without danger833// of failure but have performance issues that are addressed my enhanced834// instructions which will do the proper thing base on the particular cpu.835// We protect them because we don't trust you...836837// Don't use next inc() and dec() methods directly. INC & DEC instructions838// could cause a partial flag stall since they don't set CF flag.839// Use MacroAssembler::decrement() & MacroAssembler::increment() methods840// which call inc() & dec() or add() & sub() in accordance with841// the product flag UseIncDec value.842843void decl(Register dst);844void decl(Address dst);845void decq(Address dst);846847void incl(Register dst);848void incl(Address dst);849void incq(Register dst);850void incq(Address dst);851852// New cpus require use of movsd and movss to avoid partial register stall853// when loading from memory. But for old Opteron use movlpd instead of movsd.854// The selection is done in MacroAssembler::movdbl() and movflt().855856// Move Scalar Single-Precision Floating-Point Values857void movss(XMMRegister dst, Address src);858void movss(XMMRegister dst, XMMRegister src);859void movss(Address dst, XMMRegister src);860861// Move Scalar Double-Precision Floating-Point Values862void movsd(XMMRegister dst, Address src);863void movsd(XMMRegister dst, XMMRegister src);864void movsd(Address dst, XMMRegister src);865void movlpd(XMMRegister dst, Address src);866867// New cpus require use of movaps and movapd to avoid partial register stall868// when moving between registers.869void movaps(XMMRegister dst, XMMRegister src);870void movapd(XMMRegister dst, XMMRegister src);871872// End avoid using directly873874875// Instruction prefixes876void prefix(Prefix p);877878public:879880// Creation881Assembler(CodeBuffer* code) : AbstractAssembler(code) {882init_attributes();883}884885// Decoding886static address locate_operand(address inst, WhichOperand which);887static address locate_next_instruction(address inst);888889// Utilities890static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,891int cur_tuple_type, int in_size_in_bits, int cur_encoding);892893// Generic instructions894// Does 32bit or 64bit as needed for the platform. In some sense these895// belong in macro assembler but there is no need for both varieties to exist896897void init_attributes(void);898899void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }900void clear_attributes(void) { _attributes = NULL; }901902void set_managed(void) { NOT_LP64(_is_managed = true;) }903void clear_managed(void) { NOT_LP64(_is_managed = false;) }904bool is_managed(void) {905NOT_LP64(return _is_managed;)906LP64_ONLY(return false;) }907908void lea(Register dst, Address src);909910void mov(Register dst, Register src);911912#ifdef _LP64913// support caching the result of some routines914915// must be called before pusha(), popa(), vzeroupper() - checked with asserts916static void precompute_instructions();917918void pusha_uncached();919void popa_uncached();920#endif921void vzeroupper_uncached();922void decq(Register dst);923924void pusha();925void popa();926927void pushf();928void popf();929930void push(int32_t imm32);931932void push(Register src);933934void pop(Register dst);935936// These are dummies to prevent surprise implicit conversions to Register937void push(void* v);938void pop(void* v);939940// These do register sized moves/scans941void rep_mov();942void rep_stos();943void rep_stosb();944void repne_scan();945#ifdef _LP64946void repne_scanl();947#endif948949// Vanilla instructions in lexical order950951void adcl(Address dst, int32_t imm32);952void adcl(Address dst, Register src);953void adcl(Register dst, int32_t imm32);954void adcl(Register dst, Address src);955void adcl(Register dst, Register src);956957void adcq(Register dst, int32_t imm32);958void adcq(Register dst, Address src);959void adcq(Register dst, Register src);960961void addb(Address dst, int imm8);962void addw(Register dst, Register src);963void addw(Address dst, int imm16);964965void addl(Address dst, int32_t imm32);966void addl(Address dst, Register src);967void addl(Register dst, int32_t imm32);968void addl(Register dst, Address src);969void addl(Register dst, Register src);970971void addq(Address dst, int32_t imm32);972void addq(Address dst, Register src);973void addq(Register dst, int32_t imm32);974void addq(Register dst, Address src);975void addq(Register dst, Register src);976977#ifdef _LP64978//Add Unsigned Integers with Carry Flag979void adcxq(Register dst, Register src);980981//Add Unsigned Integers with Overflow Flag982void adoxq(Register dst, Register src);983#endif984985void addr_nop_4();986void addr_nop_5();987void addr_nop_7();988void addr_nop_8();989990// Add Scalar Double-Precision Floating-Point Values991void addsd(XMMRegister dst, Address src);992void addsd(XMMRegister dst, XMMRegister src);993994// Add Scalar Single-Precision Floating-Point Values995void addss(XMMRegister dst, Address src);996void addss(XMMRegister dst, XMMRegister src);997998// AES instructions999void aesdec(XMMRegister dst, Address src);1000void aesdec(XMMRegister dst, XMMRegister src);1001void aesdeclast(XMMRegister dst, Address src);1002void aesdeclast(XMMRegister dst, XMMRegister src);1003void aesenc(XMMRegister dst, Address src);1004void aesenc(XMMRegister dst, XMMRegister src);1005void aesenclast(XMMRegister dst, Address src);1006void aesenclast(XMMRegister dst, XMMRegister src);1007// Vector AES instructions1008void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1009void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1010void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1011void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);10121013void andw(Register dst, Register src);1014void andb(Address dst, Register src);10151016void andl(Address dst, int32_t imm32);1017void andl(Register dst, int32_t imm32);1018void andl(Register dst, Address src);1019void andl(Register dst, Register src);1020void andl(Address dst, Register src);10211022void andq(Address dst, int32_t imm32);1023void andq(Register dst, int32_t imm32);1024void andq(Register dst, Address src);1025void andq(Register dst, Register src);1026void andq(Address dst, Register src);10271028// BMI instructions1029void andnl(Register dst, Register src1, Register src2);1030void andnl(Register dst, Register src1, Address src2);1031void andnq(Register dst, Register src1, Register src2);1032void andnq(Register dst, Register src1, Address src2);10331034void blsil(Register dst, Register src);1035void blsil(Register dst, Address src);1036void blsiq(Register dst, Register src);1037void blsiq(Register dst, Address src);10381039void blsmskl(Register dst, Register src);1040void blsmskl(Register dst, Address src);1041void blsmskq(Register dst, Register src);1042void blsmskq(Register dst, Address src);10431044void blsrl(Register dst, Register src);1045void blsrl(Register dst, Address src);1046void blsrq(Register dst, Register src);1047void blsrq(Register dst, Address src);10481049void bsfl(Register dst, Register src);1050void bsrl(Register dst, Register src);10511052#ifdef _LP641053void bsfq(Register dst, Register src);1054void bsrq(Register dst, Register src);1055#endif10561057void bswapl(Register reg);10581059void bswapq(Register reg);10601061void call(Label& L, relocInfo::relocType rtype);1062void call(Register reg); // push pc; pc <- reg1063void call(Address adr); // push pc; pc <- adr10641065void cdql();10661067void cdqq();10681069void cld();10701071void clflush(Address adr);1072void clflushopt(Address adr);1073void clwb(Address adr);10741075void cmovl(Condition cc, Register dst, Register src);1076void cmovl(Condition cc, Register dst, Address src);10771078void cmovq(Condition cc, Register dst, Register src);1079void cmovq(Condition cc, Register dst, Address src);108010811082void cmpb(Address dst, int imm8);10831084void cmpl(Address dst, int32_t imm32);10851086void cmp(Register dst, int32_t imm32);1087void cmpl(Register dst, int32_t imm32);1088void cmpl(Register dst, Register src);1089void cmpl(Register dst, Address src);10901091void cmpq(Address dst, int32_t imm32);1092void cmpq(Address dst, Register src);10931094void cmpq(Register dst, int32_t imm32);1095void cmpq(Register dst, Register src);1096void cmpq(Register dst, Address src);10971098// these are dummies used to catch attempting to convert NULL to Register1099void cmpl(Register dst, void* junk); // dummy1100void cmpq(Register dst, void* junk); // dummy11011102void cmpw(Address dst, int imm16);11031104void cmpxchg8 (Address adr);11051106void cmpxchgb(Register reg, Address adr);1107void cmpxchgl(Register reg, Address adr);11081109void cmpxchgq(Register reg, Address adr);1110void cmpxchgw(Register reg, Address adr);11111112// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS1113void comisd(XMMRegister dst, Address src);1114void comisd(XMMRegister dst, XMMRegister src);11151116// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS1117void comiss(XMMRegister dst, Address src);1118void comiss(XMMRegister dst, XMMRegister src);11191120// Identify processor type and features1121void cpuid();11221123// CRC32C1124void crc32(Register crc, Register v, int8_t sizeInBytes);1125void crc32(Register crc, Address adr, int8_t sizeInBytes);11261127// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value1128void cvtsd2ss(XMMRegister dst, XMMRegister src);1129void cvtsd2ss(XMMRegister dst, Address src);11301131// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value1132void cvtsi2sdl(XMMRegister dst, Register src);1133void cvtsi2sdl(XMMRegister dst, Address src);1134void cvtsi2sdq(XMMRegister dst, Register src);1135void cvtsi2sdq(XMMRegister dst, Address src);11361137// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value1138void cvtsi2ssl(XMMRegister dst, Register src);1139void cvtsi2ssl(XMMRegister dst, Address src);1140void cvtsi2ssq(XMMRegister dst, Register src);1141void cvtsi2ssq(XMMRegister dst, Address src);11421143// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value1144void cvtdq2pd(XMMRegister dst, XMMRegister src);1145void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);11461147// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value1148void cvtdq2ps(XMMRegister dst, XMMRegister src);1149void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);11501151// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value1152void cvtss2sd(XMMRegister dst, XMMRegister src);1153void cvtss2sd(XMMRegister dst, Address src);11541155// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer1156void cvttsd2sil(Register dst, Address src);1157void cvttsd2sil(Register dst, XMMRegister src);1158void cvttsd2siq(Register dst, Address src);1159void cvttsd2siq(Register dst, XMMRegister src);11601161// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer1162void cvttss2sil(Register dst, XMMRegister src);1163void cvttss2siq(Register dst, XMMRegister src);11641165// Convert vector double to int1166void cvttpd2dq(XMMRegister dst, XMMRegister src);11671168// Convert vector float and double1169void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);1170void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);11711172// Convert vector long to vector FP1173void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);1174void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);11751176// Evex casts with truncation1177void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);1178void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);1179void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);1180void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);1181void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);1182void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);11831184//Abs of packed Integer values1185void pabsb(XMMRegister dst, XMMRegister src);1186void pabsw(XMMRegister dst, XMMRegister src);1187void pabsd(XMMRegister dst, XMMRegister src);1188void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);1189void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);1190void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);1191void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);11921193// Divide Scalar Double-Precision Floating-Point Values1194void divsd(XMMRegister dst, Address src);1195void divsd(XMMRegister dst, XMMRegister src);11961197// Divide Scalar Single-Precision Floating-Point Values1198void divss(XMMRegister dst, Address src);1199void divss(XMMRegister dst, XMMRegister src);120012011202#ifndef _LP641203private:12041205void emit_farith(int b1, int b2, int i);12061207public:1208void emms();12091210void fabs();12111212void fadd(int i);12131214void fadd_d(Address src);1215void fadd_s(Address src);12161217// "Alternate" versions of x87 instructions place result down in FPU1218// stack instead of on TOS12191220void fadda(int i); // "alternate" fadd1221void faddp(int i = 1);12221223void fchs();12241225void fcom(int i);12261227void fcomp(int i = 1);1228void fcomp_d(Address src);1229void fcomp_s(Address src);12301231void fcompp();12321233void fcos();12341235void fdecstp();12361237void fdiv(int i);1238void fdiv_d(Address src);1239void fdivr_s(Address src);1240void fdiva(int i); // "alternate" fdiv1241void fdivp(int i = 1);12421243void fdivr(int i);1244void fdivr_d(Address src);1245void fdiv_s(Address src);12461247void fdivra(int i); // "alternate" reversed fdiv12481249void fdivrp(int i = 1);12501251void ffree(int i = 0);12521253void fild_d(Address adr);1254void fild_s(Address adr);12551256void fincstp();12571258void finit();12591260void fist_s (Address adr);1261void fistp_d(Address adr);1262void fistp_s(Address adr);12631264void fld1();12651266void fld_d(Address adr);1267void fld_s(Address adr);1268void fld_s(int index);12691270void fldcw(Address src);12711272void fldenv(Address src);12731274void fldlg2();12751276void fldln2();12771278void fldz();12791280void flog();1281void flog10();12821283void fmul(int i);12841285void fmul_d(Address src);1286void fmul_s(Address src);12871288void fmula(int i); // "alternate" fmul12891290void fmulp(int i = 1);12911292void fnsave(Address dst);12931294void fnstcw(Address src);12951296void fnstsw_ax();12971298void fprem();1299void fprem1();13001301void frstor(Address src);13021303void fsin();13041305void fsqrt();13061307void fst_d(Address adr);1308void fst_s(Address adr);13091310void fstp_d(Address adr);1311void fstp_d(int index);1312void fstp_s(Address adr);13131314void fsub(int i);1315void fsub_d(Address src);1316void fsub_s(Address src);13171318void fsuba(int i); // "alternate" fsub13191320void fsubp(int i = 1);13211322void fsubr(int i);1323void fsubr_d(Address src);1324void fsubr_s(Address src);13251326void fsubra(int i); // "alternate" reversed fsub13271328void fsubrp(int i = 1);13291330void ftan();13311332void ftst();13331334void fucomi(int i = 1);1335void fucomip(int i = 1);13361337void fwait();13381339void fxch(int i = 1);13401341void fyl2x();1342void frndint();1343void f2xm1();1344void fldl2e();1345#endif // !_LP6413461347// operands that only take the original 32bit registers1348void emit_operand32(Register reg, Address adr);13491350void fld_x(Address adr); // extended-precision (80-bit) format1351void fstp_x(Address adr); // extended-precision (80-bit) format1352void fxrstor(Address src);1353void xrstor(Address src);13541355void fxsave(Address dst);1356void xsave(Address dst);13571358void hlt();13591360void idivl(Register src);1361void divl(Register src); // Unsigned division13621363#ifdef _LP641364void idivq(Register src);1365#endif13661367void imull(Register src);1368void imull(Register dst, Register src);1369void imull(Register dst, Register src, int value);1370void imull(Register dst, Address src, int value);1371void imull(Register dst, Address src);13721373#ifdef _LP641374void imulq(Register dst, Register src);1375void imulq(Register dst, Register src, int value);1376void imulq(Register dst, Address src, int value);1377void imulq(Register dst, Address src);1378void imulq(Register dst);1379#endif13801381// jcc is the generic conditional branch generator to run-1382// time routines, jcc is used for branches to labels. jcc1383// takes a branch opcode (cc) and a label (L) and generates1384// either a backward branch or a forward branch and links it1385// to the label fixup chain. Usage:1386//1387// Label L; // unbound label1388// jcc(cc, L); // forward branch to unbound label1389// bind(L); // bind label to the current pc1390// jcc(cc, L); // backward branch to bound label1391// bind(L); // illegal: a label may be bound only once1392//1393// Note: The same Label can be used for forward and backward branches1394// but it may be bound only once.13951396void jcc(Condition cc, Label& L, bool maybe_short = true);13971398// Conditional jump to a 8-bit offset to L.1399// WARNING: be very careful using this for forward jumps. If the label is1400// not bound within an 8-bit offset of this instruction, a run-time error1401// will occur.14021403// Use macro to record file and line number.1404#define jccb(cc, L) jccb_0(cc, L, __FILE__, __LINE__)14051406void jccb_0(Condition cc, Label& L, const char* file, int line);14071408void jmp(Address entry); // pc <- entry14091410// Label operations & relative jumps (PPUM Appendix D)1411void jmp(Label& L, bool maybe_short = true); // unconditional jump to L14121413void jmp(Register entry); // pc <- entry14141415// Unconditional 8-bit offset jump to L.1416// WARNING: be very careful using this for forward jumps. If the label is1417// not bound within an 8-bit offset of this instruction, a run-time error1418// will occur.14191420// Use macro to record file and line number.1421#define jmpb(L) jmpb_0(L, __FILE__, __LINE__)14221423void jmpb_0(Label& L, const char* file, int line);14241425void ldmxcsr( Address src );14261427void leal(Register dst, Address src);14281429void leaq(Register dst, Address src);14301431void lfence();14321433void lock();1434void size_prefix();14351436void lzcntl(Register dst, Register src);14371438#ifdef _LP641439void lzcntq(Register dst, Register src);1440#endif14411442enum Membar_mask_bits {1443StoreStore = 1 << 3,1444LoadStore = 1 << 2,1445StoreLoad = 1 << 1,1446LoadLoad = 1 << 01447};14481449// Serializes memory and blows flags1450void membar(Membar_mask_bits order_constraint);14511452void mfence();1453void sfence();14541455// Moves14561457void mov64(Register dst, int64_t imm64);1458void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);14591460void movb(Address dst, Register src);1461void movb(Address dst, int imm8);1462void movb(Register dst, Address src);14631464void movddup(XMMRegister dst, XMMRegister src);14651466void kmovbl(KRegister dst, Register src);1467void kmovbl(Register dst, KRegister src);1468void kmovwl(KRegister dst, Register src);1469void kmovwl(KRegister dst, Address src);1470void kmovwl(Register dst, KRegister src);1471void kmovwl(Address dst, KRegister src);1472void kmovwl(KRegister dst, KRegister src);1473void kmovdl(KRegister dst, Register src);1474void kmovdl(Register dst, KRegister src);1475void kmovql(KRegister dst, KRegister src);1476void kmovql(Address dst, KRegister src);1477void kmovql(KRegister dst, Address src);1478void kmovql(KRegister dst, Register src);1479void kmovql(Register dst, KRegister src);14801481void knotwl(KRegister dst, KRegister src);14821483void kortestbl(KRegister dst, KRegister src);1484void kortestwl(KRegister dst, KRegister src);1485void kortestdl(KRegister dst, KRegister src);1486void kortestql(KRegister dst, KRegister src);14871488void ktestq(KRegister src1, KRegister src2);1489void ktestd(KRegister src1, KRegister src2);14901491void ktestql(KRegister dst, KRegister src);14921493void movdl(XMMRegister dst, Register src);1494void movdl(Register dst, XMMRegister src);1495void movdl(XMMRegister dst, Address src);1496void movdl(Address dst, XMMRegister src);14971498// Move Double Quadword1499void movdq(XMMRegister dst, Register src);1500void movdq(Register dst, XMMRegister src);15011502// Move Aligned Double Quadword1503void movdqa(XMMRegister dst, XMMRegister src);1504void movdqa(XMMRegister dst, Address src);15051506// Move Unaligned Double Quadword1507void movdqu(Address dst, XMMRegister src);1508void movdqu(XMMRegister dst, Address src);1509void movdqu(XMMRegister dst, XMMRegister src);15101511// Move Unaligned 256bit Vector1512void vmovdqu(Address dst, XMMRegister src);1513void vmovdqu(XMMRegister dst, Address src);1514void vmovdqu(XMMRegister dst, XMMRegister src);15151516// Move Unaligned 512bit Vector1517void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);1518void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);1519void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);1520void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);1521void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);1522void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);1523void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);1524void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);1525void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);1526void evmovdqul(Address dst, XMMRegister src, int vector_len);1527void evmovdqul(XMMRegister dst, Address src, int vector_len);1528void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);1529void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);1530void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);1531void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);1532void evmovdquq(Address dst, XMMRegister src, int vector_len);1533void evmovdquq(XMMRegister dst, Address src, int vector_len);1534void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);1535void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);1536void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);1537void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);15381539// Move lower 64bit to high 64bit in 128bit register1540void movlhps(XMMRegister dst, XMMRegister src);15411542void movl(Register dst, int32_t imm32);1543void movl(Address dst, int32_t imm32);1544void movl(Register dst, Register src);1545void movl(Register dst, Address src);1546void movl(Address dst, Register src);15471548// These dummies prevent using movl from converting a zero (like NULL) into Register1549// by giving the compiler two choices it can't resolve15501551void movl(Address dst, void* junk);1552void movl(Register dst, void* junk);15531554#ifdef _LP641555void movq(Register dst, Register src);1556void movq(Register dst, Address src);1557void movq(Address dst, Register src);1558void movq(Address dst, int32_t imm32);1559void movq(Register dst, int32_t imm32);15601561// These dummies prevent using movq from converting a zero (like NULL) into Register1562// by giving the compiler two choices it can't resolve15631564void movq(Address dst, void* dummy);1565void movq(Register dst, void* dummy);1566#endif15671568// Move Quadword1569void movq(Address dst, XMMRegister src);1570void movq(XMMRegister dst, Address src);1571void movq(XMMRegister dst, XMMRegister src);1572void movq(Register dst, XMMRegister src);1573void movq(XMMRegister dst, Register src);15741575void movsbl(Register dst, Address src);1576void movsbl(Register dst, Register src);15771578#ifdef _LP641579void movsbq(Register dst, Address src);1580void movsbq(Register dst, Register src);15811582// Move signed 32bit immediate to 64bit extending sign1583void movslq(Address dst, int32_t imm64);1584void movslq(Register dst, int32_t imm64);15851586void movslq(Register dst, Address src);1587void movslq(Register dst, Register src);1588void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous1589#endif15901591void movswl(Register dst, Address src);1592void movswl(Register dst, Register src);15931594#ifdef _LP641595void movswq(Register dst, Address src);1596void movswq(Register dst, Register src);1597#endif15981599void movw(Address dst, int imm16);1600void movw(Register dst, Address src);1601void movw(Address dst, Register src);16021603void movzbl(Register dst, Address src);1604void movzbl(Register dst, Register src);16051606#ifdef _LP641607void movzbq(Register dst, Address src);1608void movzbq(Register dst, Register src);1609#endif16101611void movzwl(Register dst, Address src);1612void movzwl(Register dst, Register src);16131614#ifdef _LP641615void movzwq(Register dst, Address src);1616void movzwq(Register dst, Register src);1617#endif16181619// Unsigned multiply with RAX destination register1620void mull(Address src);1621void mull(Register src);16221623#ifdef _LP641624void mulq(Address src);1625void mulq(Register src);1626void mulxq(Register dst1, Register dst2, Register src);1627#endif16281629// Multiply Scalar Double-Precision Floating-Point Values1630void mulsd(XMMRegister dst, Address src);1631void mulsd(XMMRegister dst, XMMRegister src);16321633// Multiply Scalar Single-Precision Floating-Point Values1634void mulss(XMMRegister dst, Address src);1635void mulss(XMMRegister dst, XMMRegister src);16361637void negl(Register dst);1638void negl(Address dst);16391640#ifdef _LP641641void negq(Register dst);1642void negq(Address dst);1643#endif16441645void nop(int i = 1);16461647void notl(Register dst);16481649#ifdef _LP641650void notq(Register dst);16511652void btsq(Address dst, int imm8);1653void btrq(Address dst, int imm8);1654#endif16551656void orw(Register dst, Register src);16571658void orl(Address dst, int32_t imm32);1659void orl(Register dst, int32_t imm32);1660void orl(Register dst, Address src);1661void orl(Register dst, Register src);1662void orl(Address dst, Register src);16631664void orb(Address dst, int imm8);1665void orb(Address dst, Register src);16661667void orq(Address dst, int32_t imm32);1668void orq(Address dst, Register src);1669void orq(Register dst, int32_t imm32);1670void orq(Register dst, Address src);1671void orq(Register dst, Register src);16721673// Pack with signed saturation1674void packsswb(XMMRegister dst, XMMRegister src);1675void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1676void packssdw(XMMRegister dst, XMMRegister src);1677void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);16781679// Pack with unsigned saturation1680void packuswb(XMMRegister dst, XMMRegister src);1681void packuswb(XMMRegister dst, Address src);1682void packusdw(XMMRegister dst, XMMRegister src);1683void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1684void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);16851686// Permutations1687void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);1688void vpermq(XMMRegister dst, XMMRegister src, int imm8);1689void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1690void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1691void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1692void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);1693void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1694void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);1695void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);1696void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);1697void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);1698void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);1699void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);17001701void pause();17021703// Undefined Instruction1704void ud2();17051706// SSE4.2 string instructions1707void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);1708void pcmpestri(XMMRegister xmm1, Address src, int imm8);17091710void pcmpeqb(XMMRegister dst, XMMRegister src);1711void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);17121713void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1714void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);1715void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);1716void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);17171718void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1719void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);1720void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);17211722void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);1723void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);17241725void pcmpeqw(XMMRegister dst, XMMRegister src);1726void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1727void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);1728void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);17291730void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);17311732void pcmpeqd(XMMRegister dst, XMMRegister src);1733void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1734void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);1735void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);17361737void pcmpeqq(XMMRegister dst, XMMRegister src);1738void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);1739void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1740void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);1741void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);17421743void pcmpgtq(XMMRegister dst, XMMRegister src);1744void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);17451746void pmovmskb(Register dst, XMMRegister src);1747void vpmovmskb(Register dst, XMMRegister src, int vec_enc);17481749// SSE 4.1 extract1750void pextrd(Register dst, XMMRegister src, int imm8);1751void pextrq(Register dst, XMMRegister src, int imm8);1752void pextrd(Address dst, XMMRegister src, int imm8);1753void pextrq(Address dst, XMMRegister src, int imm8);1754void pextrb(Register dst, XMMRegister src, int imm8);1755void pextrb(Address dst, XMMRegister src, int imm8);1756// SSE 2 extract1757void pextrw(Register dst, XMMRegister src, int imm8);1758void pextrw(Address dst, XMMRegister src, int imm8);17591760// SSE 4.1 insert1761void pinsrd(XMMRegister dst, Register src, int imm8);1762void pinsrq(XMMRegister dst, Register src, int imm8);1763void pinsrb(XMMRegister dst, Register src, int imm8);1764void pinsrd(XMMRegister dst, Address src, int imm8);1765void pinsrq(XMMRegister dst, Address src, int imm8);1766void pinsrb(XMMRegister dst, Address src, int imm8);1767void insertps(XMMRegister dst, XMMRegister src, int imm8);1768// SSE 2 insert1769void pinsrw(XMMRegister dst, Register src, int imm8);1770void pinsrw(XMMRegister dst, Address src, int imm8);17711772// AVX insert1773void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);1774void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);1775void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);1776void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);1777void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);17781779// Zero extend moves1780void pmovzxbw(XMMRegister dst, XMMRegister src);1781void pmovzxbw(XMMRegister dst, Address src);1782void pmovzxbd(XMMRegister dst, XMMRegister src);1783void vpmovzxbw( XMMRegister dst, Address src, int vector_len);1784void pmovzxdq(XMMRegister dst, XMMRegister src);1785void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);1786void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);1787void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);1788void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);1789void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);17901791// Sign extend moves1792void pmovsxbd(XMMRegister dst, XMMRegister src);1793void pmovsxbq(XMMRegister dst, XMMRegister src);1794void pmovsxbw(XMMRegister dst, XMMRegister src);1795void pmovsxwd(XMMRegister dst, XMMRegister src);1796void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);1797void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);1798void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);1799void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);1800void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);1801void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);18021803void evpmovwb(Address dst, XMMRegister src, int vector_len);1804void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);18051806void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);18071808void evpmovdb(Address dst, XMMRegister src, int vector_len);18091810// Multiply add1811void pmaddwd(XMMRegister dst, XMMRegister src);1812void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);1813// Multiply add accumulate1814void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);18151816#ifndef _LP64 // no 32bit push/pop on amd641817void popl(Address dst);1818#endif18191820#ifdef _LP641821void popq(Address dst);1822void popq(Register dst);1823#endif18241825void popcntl(Register dst, Address src);1826void popcntl(Register dst, Register src);18271828void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);18291830#ifdef _LP641831void popcntq(Register dst, Address src);1832void popcntq(Register dst, Register src);1833#endif18341835// Prefetches (SSE, SSE2, 3DNOW only)18361837void prefetchnta(Address src);1838void prefetchr(Address src);1839void prefetcht0(Address src);1840void prefetcht1(Address src);1841void prefetcht2(Address src);1842void prefetchw(Address src);18431844// Shuffle Bytes1845void pshufb(XMMRegister dst, XMMRegister src);1846void pshufb(XMMRegister dst, Address src);1847void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);18481849// Shuffle Packed Doublewords1850void pshufd(XMMRegister dst, XMMRegister src, int mode);1851void pshufd(XMMRegister dst, Address src, int mode);1852void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);18531854// Shuffle Packed High/Low Words1855void pshufhw(XMMRegister dst, XMMRegister src, int mode);1856void pshuflw(XMMRegister dst, XMMRegister src, int mode);1857void pshuflw(XMMRegister dst, Address src, int mode);18581859//shuffle floats and doubles1860void pshufps(XMMRegister, XMMRegister, int);1861void pshufpd(XMMRegister, XMMRegister, int);1862void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);1863void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);18641865// Shuffle packed values at 128 bit granularity1866void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);18671868// Shift Right by bytes Logical DoubleQuadword Immediate1869void psrldq(XMMRegister dst, int shift);1870// Shift Left by bytes Logical DoubleQuadword Immediate1871void pslldq(XMMRegister dst, int shift);18721873// Logical Compare 128bit1874void ptest(XMMRegister dst, XMMRegister src);1875void ptest(XMMRegister dst, Address src);1876// Logical Compare 256bit1877void vptest(XMMRegister dst, XMMRegister src);1878void vptest(XMMRegister dst, Address src);18791880// Vector compare1881void vptest(XMMRegister dst, XMMRegister src, int vector_len);18821883// Interleave Low Bytes1884void punpcklbw(XMMRegister dst, XMMRegister src);1885void punpcklbw(XMMRegister dst, Address src);18861887// Interleave Low Doublewords1888void punpckldq(XMMRegister dst, XMMRegister src);1889void punpckldq(XMMRegister dst, Address src);18901891// Interleave Low Quadwords1892void punpcklqdq(XMMRegister dst, XMMRegister src);18931894#ifndef _LP64 // no 32bit push/pop on amd641895void pushl(Address src);1896#endif18971898void pushq(Address src);18991900void rcll(Register dst, int imm8);19011902void rclq(Register dst, int imm8);19031904void rcrq(Register dst, int imm8);19051906void rcpps(XMMRegister dst, XMMRegister src);19071908void rcpss(XMMRegister dst, XMMRegister src);19091910void rdtsc();19111912void ret(int imm16);19131914void roll(Register dst);19151916void roll(Register dst, int imm8);19171918void rorl(Register dst);19191920void rorl(Register dst, int imm8);19211922#ifdef _LP641923void rolq(Register dst);1924void rolq(Register dst, int imm8);1925void rorq(Register dst);1926void rorq(Register dst, int imm8);1927void rorxq(Register dst, Register src, int imm8);1928void rorxd(Register dst, Register src, int imm8);1929#endif19301931void sahf();19321933void sall(Register dst, int imm8);1934void sall(Register dst);1935void sall(Address dst, int imm8);1936void sall(Address dst);19371938void sarl(Address dst, int imm8);1939void sarl(Address dst);1940void sarl(Register dst, int imm8);1941void sarl(Register dst);19421943#ifdef _LP641944void salq(Register dst, int imm8);1945void salq(Register dst);1946void salq(Address dst, int imm8);1947void salq(Address dst);19481949void sarq(Address dst, int imm8);1950void sarq(Address dst);1951void sarq(Register dst, int imm8);1952void sarq(Register dst);1953#endif19541955void sbbl(Address dst, int32_t imm32);1956void sbbl(Register dst, int32_t imm32);1957void sbbl(Register dst, Address src);1958void sbbl(Register dst, Register src);19591960void sbbq(Address dst, int32_t imm32);1961void sbbq(Register dst, int32_t imm32);1962void sbbq(Register dst, Address src);1963void sbbq(Register dst, Register src);19641965void setb(Condition cc, Register dst);19661967void sete(Register dst);1968void setl(Register dst);1969void setne(Register dst);19701971void palignr(XMMRegister dst, XMMRegister src, int imm8);1972void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);1973void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);19741975void pblendw(XMMRegister dst, XMMRegister src, int imm8);1976void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);19771978void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);1979void sha1nexte(XMMRegister dst, XMMRegister src);1980void sha1msg1(XMMRegister dst, XMMRegister src);1981void sha1msg2(XMMRegister dst, XMMRegister src);1982// xmm0 is implicit additional source to the following instruction.1983void sha256rnds2(XMMRegister dst, XMMRegister src);1984void sha256msg1(XMMRegister dst, XMMRegister src);1985void sha256msg2(XMMRegister dst, XMMRegister src);19861987void shldl(Register dst, Register src);1988void shldl(Register dst, Register src, int8_t imm8);1989void shrdl(Register dst, Register src);1990void shrdl(Register dst, Register src, int8_t imm8);19911992void shll(Register dst, int imm8);1993void shll(Register dst);19941995void shlq(Register dst, int imm8);1996void shlq(Register dst);19971998void shrl(Register dst, int imm8);1999void shrl(Register dst);2000void shrl(Address dst);2001void shrl(Address dst, int imm8);20022003void shrq(Register dst, int imm8);2004void shrq(Register dst);2005void shrq(Address dst);2006void shrq(Address dst, int imm8);20072008void smovl(); // QQQ generic?20092010// Compute Square Root of Scalar Double-Precision Floating-Point Value2011void sqrtsd(XMMRegister dst, Address src);2012void sqrtsd(XMMRegister dst, XMMRegister src);20132014void roundsd(XMMRegister dst, Address src, int32_t rmode);2015void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);20162017// Compute Square Root of Scalar Single-Precision Floating-Point Value2018void sqrtss(XMMRegister dst, Address src);2019void sqrtss(XMMRegister dst, XMMRegister src);20202021void std();20222023void stmxcsr( Address dst );20242025void subl(Address dst, int32_t imm32);2026void subl(Address dst, Register src);2027void subl(Register dst, int32_t imm32);2028void subl(Register dst, Address src);2029void subl(Register dst, Register src);20302031void subq(Address dst, int32_t imm32);2032void subq(Address dst, Register src);2033void subq(Register dst, int32_t imm32);2034void subq(Register dst, Address src);2035void subq(Register dst, Register src);20362037// Force generation of a 4 byte immediate value even if it fits into 8bit2038void subl_imm32(Register dst, int32_t imm32);2039void subq_imm32(Register dst, int32_t imm32);20402041// Subtract Scalar Double-Precision Floating-Point Values2042void subsd(XMMRegister dst, Address src);2043void subsd(XMMRegister dst, XMMRegister src);20442045// Subtract Scalar Single-Precision Floating-Point Values2046void subss(XMMRegister dst, Address src);2047void subss(XMMRegister dst, XMMRegister src);20482049void testb(Register dst, int imm8);2050void testb(Address dst, int imm8);20512052void testl(Register dst, int32_t imm32);2053void testl(Register dst, Register src);2054void testl(Register dst, Address src);20552056void testq(Address dst, int32_t imm32);2057void testq(Register dst, int32_t imm32);2058void testq(Register dst, Register src);2059void testq(Register dst, Address src);20602061// BMI - count trailing zeros2062void tzcntl(Register dst, Register src);2063void tzcntq(Register dst, Register src);20642065// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS2066void ucomisd(XMMRegister dst, Address src);2067void ucomisd(XMMRegister dst, XMMRegister src);20682069// Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS2070void ucomiss(XMMRegister dst, Address src);2071void ucomiss(XMMRegister dst, XMMRegister src);20722073void xabort(int8_t imm8);20742075void xaddb(Address dst, Register src);2076void xaddw(Address dst, Register src);2077void xaddl(Address dst, Register src);2078void xaddq(Address dst, Register src);20792080void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);20812082void xchgb(Register reg, Address adr);2083void xchgw(Register reg, Address adr);2084void xchgl(Register reg, Address adr);2085void xchgl(Register dst, Register src);20862087void xchgq(Register reg, Address adr);2088void xchgq(Register dst, Register src);20892090void xend();20912092// Get Value of Extended Control Register2093void xgetbv();20942095void xorl(Register dst, int32_t imm32);2096void xorl(Address dst, int32_t imm32);2097void xorl(Register dst, Address src);2098void xorl(Register dst, Register src);2099void xorl(Address dst, Register src);21002101void xorb(Address dst, Register src);2102void xorb(Register dst, Address src);2103void xorw(Register dst, Register src);21042105void xorq(Register dst, Address src);2106void xorq(Address dst, int32_t imm32);2107void xorq(Register dst, Register src);2108void xorq(Register dst, int32_t imm32);2109void xorq(Address dst, Register src);21102111void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 021122113// AVX 3-operands scalar instructions (encoded with VEX prefix)21142115void vaddsd(XMMRegister dst, XMMRegister nds, Address src);2116void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);2117void vaddss(XMMRegister dst, XMMRegister nds, Address src);2118void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);2119void vdivsd(XMMRegister dst, XMMRegister nds, Address src);2120void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);2121void vdivss(XMMRegister dst, XMMRegister nds, Address src);2122void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);2123void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);2124void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);2125void vmulsd(XMMRegister dst, XMMRegister nds, Address src);2126void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);2127void vmulss(XMMRegister dst, XMMRegister nds, Address src);2128void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);2129void vsubsd(XMMRegister dst, XMMRegister nds, Address src);2130void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);2131void vsubss(XMMRegister dst, XMMRegister nds, Address src);2132void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);21332134void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);2135void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);2136void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);2137void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);21382139void shlxl(Register dst, Register src1, Register src2);2140void shlxq(Register dst, Register src1, Register src2);2141void shrxq(Register dst, Register src1, Register src2);21422143void bzhiq(Register dst, Register src1, Register src2);21442145//====================VECTOR ARITHMETIC=====================================2146void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);2147void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);21482149// Add Packed Floating-Point Values2150void addpd(XMMRegister dst, XMMRegister src);2151void addpd(XMMRegister dst, Address src);2152void addps(XMMRegister dst, XMMRegister src);2153void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2154void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2155void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2156void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);21572158// Subtract Packed Floating-Point Values2159void subpd(XMMRegister dst, XMMRegister src);2160void subps(XMMRegister dst, XMMRegister src);2161void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2162void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2163void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2164void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);21652166// Multiply Packed Floating-Point Values2167void mulpd(XMMRegister dst, XMMRegister src);2168void mulpd(XMMRegister dst, Address src);2169void mulps(XMMRegister dst, XMMRegister src);2170void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2171void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2172void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2173void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);21742175void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2176void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2177void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2178void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);21792180// Divide Packed Floating-Point Values2181void divpd(XMMRegister dst, XMMRegister src);2182void divps(XMMRegister dst, XMMRegister src);2183void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2184void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2185void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2186void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);21872188// Sqrt Packed Floating-Point Values2189void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);2190void vsqrtpd(XMMRegister dst, Address src, int vector_len);2191void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);2192void vsqrtps(XMMRegister dst, Address src, int vector_len);21932194// Round Packed Double precision value.2195void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);2196void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);2197void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);2198void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);21992200// Bitwise Logical AND of Packed Floating-Point Values2201void andpd(XMMRegister dst, XMMRegister src);2202void andps(XMMRegister dst, XMMRegister src);2203void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2204void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2205void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2206void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);22072208void unpckhpd(XMMRegister dst, XMMRegister src);2209void unpcklpd(XMMRegister dst, XMMRegister src);22102211// Bitwise Logical XOR of Packed Floating-Point Values2212void xorpd(XMMRegister dst, XMMRegister src);2213void xorps(XMMRegister dst, XMMRegister src);2214void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2215void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2216void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2217void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);22182219// Add horizontal packed integers2220void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2221void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2222void phaddw(XMMRegister dst, XMMRegister src);2223void phaddd(XMMRegister dst, XMMRegister src);22242225// Add packed integers2226void paddb(XMMRegister dst, XMMRegister src);2227void paddw(XMMRegister dst, XMMRegister src);2228void paddd(XMMRegister dst, XMMRegister src);2229void paddd(XMMRegister dst, Address src);2230void paddq(XMMRegister dst, XMMRegister src);2231void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2232void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2233void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2234void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2235void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2236void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2237void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2238void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);22392240// Sub packed integers2241void psubb(XMMRegister dst, XMMRegister src);2242void psubw(XMMRegister dst, XMMRegister src);2243void psubd(XMMRegister dst, XMMRegister src);2244void psubq(XMMRegister dst, XMMRegister src);2245void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2246void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2247void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2248void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2249void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2250void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2251void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2252void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);22532254// Multiply packed integers (only shorts and ints)2255void pmullw(XMMRegister dst, XMMRegister src);2256void pmulld(XMMRegister dst, XMMRegister src);2257void pmuludq(XMMRegister dst, XMMRegister src);2258void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2259void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2260void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2261void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2262void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2263void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2264void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);22652266// Minimum of packed integers2267void pminsb(XMMRegister dst, XMMRegister src);2268void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2269void pminsw(XMMRegister dst, XMMRegister src);2270void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2271void pminsd(XMMRegister dst, XMMRegister src);2272void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2273void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2274void minps(XMMRegister dst, XMMRegister src);2275void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2276void minpd(XMMRegister dst, XMMRegister src);2277void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);22782279// Maximum of packed integers2280void pmaxsb(XMMRegister dst, XMMRegister src);2281void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2282void pmaxsw(XMMRegister dst, XMMRegister src);2283void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2284void pmaxsd(XMMRegister dst, XMMRegister src);2285void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2286void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2287void maxps(XMMRegister dst, XMMRegister src);2288void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);2289void maxpd(XMMRegister dst, XMMRegister src);2290void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);22912292// Shift left packed integers2293void psllw(XMMRegister dst, int shift);2294void pslld(XMMRegister dst, int shift);2295void psllq(XMMRegister dst, int shift);2296void psllw(XMMRegister dst, XMMRegister shift);2297void pslld(XMMRegister dst, XMMRegister shift);2298void psllq(XMMRegister dst, XMMRegister shift);2299void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);2300void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);2301void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);2302void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2303void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2304void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2305void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);23062307// Logical shift right packed integers2308void psrlw(XMMRegister dst, int shift);2309void psrld(XMMRegister dst, int shift);2310void psrlq(XMMRegister dst, int shift);2311void psrlw(XMMRegister dst, XMMRegister shift);2312void psrld(XMMRegister dst, XMMRegister shift);2313void psrlq(XMMRegister dst, XMMRegister shift);2314void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);2315void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);2316void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);2317void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2318void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2319void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2320void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);2321void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2322void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);23232324// Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)2325void psraw(XMMRegister dst, int shift);2326void psrad(XMMRegister dst, int shift);2327void psraw(XMMRegister dst, XMMRegister shift);2328void psrad(XMMRegister dst, XMMRegister shift);2329void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);2330void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);2331void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2332void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2333void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2334void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);2335void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);23362337// Variable shift left packed integers2338void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2339void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);23402341// Variable shift right packed integers2342void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2343void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);23442345// Variable shift right arithmetic packed integers2346void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2347void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);23482349void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2350void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);23512352// And packed integers2353void pand(XMMRegister dst, XMMRegister src);2354void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2355void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2356void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2357void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);23582359// Andn packed integers2360void pandn(XMMRegister dst, XMMRegister src);2361void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);23622363// Or packed integers2364void por(XMMRegister dst, XMMRegister src);2365void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2366void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2367void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);23682369void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2370void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);23712372// Xor packed integers2373void pxor(XMMRegister dst, XMMRegister src);2374void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2375void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);2376void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2377void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2378void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2379void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);23802381// Ternary logic instruction.2382void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);2383void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);2384void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);23852386// Vector Rotate Left/Right instruction.2387void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2388void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2389void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2390void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);2391void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);2392void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);2393void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);2394void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);23952396// vinserti forms2397void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);2398void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);2399void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);2400void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);2401void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);24022403// vinsertf forms2404void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);2405void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);2406void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);2407void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);2408void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);2409void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);24102411// vextracti forms2412void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);2413void vextracti128(Address dst, XMMRegister src, uint8_t imm8);2414void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);2415void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);2416void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);2417void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);2418void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);24192420// vextractf forms2421void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);2422void vextractf128(Address dst, XMMRegister src, uint8_t imm8);2423void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);2424void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);2425void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);2426void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);2427void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);24282429// xmm/mem sourced byte/word/dword/qword replicate2430void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);2431void vpbroadcastb(XMMRegister dst, Address src, int vector_len);2432void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);2433void vpbroadcastw(XMMRegister dst, Address src, int vector_len);2434void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);2435void vpbroadcastd(XMMRegister dst, Address src, int vector_len);2436void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);2437void vpbroadcastq(XMMRegister dst, Address src, int vector_len);24382439void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);2440void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);2441void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);24422443// scalar single/double/128bit precision replicate2444void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);2445void vbroadcastss(XMMRegister dst, Address src, int vector_len);2446void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);2447void vbroadcastsd(XMMRegister dst, Address src, int vector_len);2448void vbroadcastf128(XMMRegister dst, Address src, int vector_len);24492450// gpr sourced byte/word/dword/qword replicate2451void evpbroadcastb(XMMRegister dst, Register src, int vector_len);2452void evpbroadcastw(XMMRegister dst, Register src, int vector_len);2453void evpbroadcastd(XMMRegister dst, Register src, int vector_len);2454void evpbroadcastq(XMMRegister dst, Register src, int vector_len);24552456// Gather AVX2 and AVX32457void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);2458void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);2459void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);2460void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);2461void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);2462void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);2463void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);2464void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);24652466//Scatter AVX3 only2467void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);2468void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);2469void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);2470void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);24712472// Carry-Less Multiplication Quadword2473void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);2474void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);2475void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);2476// AVX instruction which is used to clear upper 128 bits of YMM registers and2477// to avoid transaction penalty between AVX and SSE states. There is no2478// penalty if legacy SSE instructions are encoded using VEX prefix because2479// they always clear upper 128 bits. It should be used before calling2480// runtime code and native libraries.2481void vzeroupper();24822483// Vector double compares2484void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);2485void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,2486ComparisonPredicateFP comparison, int vector_len);24872488// Vector float compares2489void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);2490void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,2491ComparisonPredicateFP comparison, int vector_len);24922493// Vector integer compares2494void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);2495void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,2496int comparison, bool is_signed, int vector_len);2497void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,2498int comparison, bool is_signed, int vector_len);24992500// Vector long compares2501void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,2502int comparison, bool is_signed, int vector_len);2503void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,2504int comparison, bool is_signed, int vector_len);25052506// Vector byte compares2507void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,2508int comparison, bool is_signed, int vector_len);2509void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,2510int comparison, bool is_signed, int vector_len);25112512// Vector short compares2513void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,2514int comparison, bool is_signed, int vector_len);2515void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,2516int comparison, bool is_signed, int vector_len);25172518void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);25192520// Vector blends2521void blendvps(XMMRegister dst, XMMRegister src);2522void blendvpd(XMMRegister dst, XMMRegister src);2523void pblendvb(XMMRegister dst, XMMRegister src);2524void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);2525void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);2526void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);2527void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);2528void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);2529void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2530void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2531void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2532void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2533void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2534void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);2535protected:2536// Next instructions require address alignment 16 bytes SSE mode.2537// They should be called only from corresponding MacroAssembler instructions.2538void andpd(XMMRegister dst, Address src);2539void andps(XMMRegister dst, Address src);2540void xorpd(XMMRegister dst, Address src);2541void xorps(XMMRegister dst, Address src);25422543};25442545// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.2546// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction2547// are applied.2548class InstructionAttr {2549public:2550InstructionAttr(2551int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX2552bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true2553bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX2554bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used2555bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX2556:2557_rex_vex_w(rex_vex_w),2558_legacy_mode(legacy_mode || UseAVX < 3),2559_no_reg_mask(no_reg_mask),2560_uses_vl(uses_vl),2561_rex_vex_w_reverted(false),2562_is_evex_instruction(false),2563_is_clear_context(true),2564_is_extended_context(false),2565_avx_vector_len(vector_len),2566_tuple_type(Assembler::EVEX_ETUP),2567_input_size_in_bits(Assembler::EVEX_NObit),2568_evex_encoding(0),2569_embedded_opmask_register_specifier(0), // hard code k02570_current_assembler(NULL) { }25712572~InstructionAttr() {2573if (_current_assembler != NULL) {2574_current_assembler->clear_attributes();2575}2576_current_assembler = NULL;2577}25782579private:2580bool _rex_vex_w;2581bool _legacy_mode;2582bool _no_reg_mask;2583bool _uses_vl;2584bool _rex_vex_w_reverted;2585bool _is_evex_instruction;2586bool _is_clear_context;2587bool _is_extended_context;2588int _avx_vector_len;2589int _tuple_type;2590int _input_size_in_bits;2591int _evex_encoding;2592int _embedded_opmask_register_specifier;25932594Assembler *_current_assembler;25952596public:2597// query functions for field accessors2598bool is_rex_vex_w(void) const { return _rex_vex_w; }2599bool is_legacy_mode(void) const { return _legacy_mode; }2600bool is_no_reg_mask(void) const { return _no_reg_mask; }2601bool uses_vl(void) const { return _uses_vl; }2602bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }2603bool is_evex_instruction(void) const { return _is_evex_instruction; }2604bool is_clear_context(void) const { return _is_clear_context; }2605bool is_extended_context(void) const { return _is_extended_context; }2606int get_vector_len(void) const { return _avx_vector_len; }2607int get_tuple_type(void) const { return _tuple_type; }2608int get_input_size(void) const { return _input_size_in_bits; }2609int get_evex_encoding(void) const { return _evex_encoding; }2610int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }26112612// Set the vector len manually2613void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }26142615// Set revert rex_vex_w for avx encoding2616void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }26172618// Set rex_vex_w based on state2619void set_rex_vex_w(bool state) { _rex_vex_w = state; }26202621// Set the instruction to be encoded in AVX mode2622void set_is_legacy_mode(void) { _legacy_mode = true; }26232624// Set the current instuction to be encoded as an EVEX instuction2625void set_is_evex_instruction(void) { _is_evex_instruction = true; }26262627// Internal encoding data used in compressed immediate offset programming2628void set_evex_encoding(int value) { _evex_encoding = value; }26292630// When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.2631// This method unsets it so that merge semantics are used instead.2632void reset_is_clear_context(void) { _is_clear_context = false; }26332634// Map back to current asembler so that we can manage object level assocation2635void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }26362637// Address modifiers used for compressed displacement calculation2638void set_address_attributes(int tuple_type, int input_size_in_bits);26392640// Set embedded opmask register specifier.2641void set_embedded_opmask_register_specifier(KRegister mask) {2642_embedded_opmask_register_specifier = (*mask).encoding() & 0x7;2643}26442645};26462647#endif // CPU_X86_ASSEMBLER_X86_HPP264826492650