Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mobile
Path: blob/master/src/hotspot/cpu/x86/assembler_x86.hpp
41144 views
1
/*
2
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation.
8
*
9
* This code is distributed in the hope that it will be useful, but WITHOUT
10
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12
* version 2 for more details (a copy is included in the LICENSE file that
13
* accompanied this code).
14
*
15
* You should have received a copy of the GNU General Public License version
16
* 2 along with this work; if not, write to the Free Software Foundation,
17
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18
*
19
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20
* or visit www.oracle.com if you need additional information or have any
21
* questions.
22
*
23
*/
24
25
#ifndef CPU_X86_ASSEMBLER_X86_HPP
26
#define CPU_X86_ASSEMBLER_X86_HPP
27
28
#include "asm/register.hpp"
29
#include "utilities/powerOfTwo.hpp"
30
31
class BiasedLockingCounters;
32
33
// Contains all the definitions needed for x86 assembly code generation.
34
35
// Calling convention
36
class Argument {
37
public:
38
enum {
39
#ifdef _LP64
40
#ifdef _WIN64
41
n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
42
n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
43
n_int_register_returns_c = 1, // rax
44
n_float_register_returns_c = 1, // xmm0
45
#else
46
n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
47
n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
48
n_int_register_returns_c = 2, // rax, rdx
49
n_float_register_returns_c = 2, // xmm0, xmm1
50
#endif // _WIN64
51
n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
52
n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
53
#else
54
n_register_parameters = 0 // 0 registers used to pass arguments
55
#endif // _LP64
56
};
57
};
58
59
60
#ifdef _LP64
61
// Symbolically name the register arguments used by the c calling convention.
62
// Windows is different from linux/solaris. So much for standards...
63
64
#ifdef _WIN64
65
66
REGISTER_DECLARATION(Register, c_rarg0, rcx);
67
REGISTER_DECLARATION(Register, c_rarg1, rdx);
68
REGISTER_DECLARATION(Register, c_rarg2, r8);
69
REGISTER_DECLARATION(Register, c_rarg3, r9);
70
71
REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
72
REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
73
REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
74
REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
75
76
#else
77
78
REGISTER_DECLARATION(Register, c_rarg0, rdi);
79
REGISTER_DECLARATION(Register, c_rarg1, rsi);
80
REGISTER_DECLARATION(Register, c_rarg2, rdx);
81
REGISTER_DECLARATION(Register, c_rarg3, rcx);
82
REGISTER_DECLARATION(Register, c_rarg4, r8);
83
REGISTER_DECLARATION(Register, c_rarg5, r9);
84
85
REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0);
86
REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1);
87
REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2);
88
REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3);
89
REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4);
90
REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5);
91
REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6);
92
REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7);
93
94
#endif // _WIN64
95
96
// Symbolically name the register arguments used by the Java calling convention.
97
// We have control over the convention for java so we can do what we please.
98
// What pleases us is to offset the java calling convention so that when
99
// we call a suitable jni method the arguments are lined up and we don't
100
// have to do little shuffling. A suitable jni method is non-static and a
101
// small number of arguments (two fewer args on windows)
102
//
103
// |-------------------------------------------------------|
104
// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
105
// |-------------------------------------------------------|
106
// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
107
// | rdi rsi rdx rcx r8 r9 | solaris/linux
108
// |-------------------------------------------------------|
109
// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
110
// |-------------------------------------------------------|
111
112
REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
113
REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
114
REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
115
// Windows runs out of register args here
116
#ifdef _WIN64
117
REGISTER_DECLARATION(Register, j_rarg3, rdi);
118
REGISTER_DECLARATION(Register, j_rarg4, rsi);
119
#else
120
REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
121
REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
122
#endif /* _WIN64 */
123
REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
124
125
REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0);
126
REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1);
127
REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2);
128
REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3);
129
REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4);
130
REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5);
131
REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6);
132
REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7);
133
134
REGISTER_DECLARATION(Register, rscratch1, r10); // volatile
135
REGISTER_DECLARATION(Register, rscratch2, r11); // volatile
136
137
REGISTER_DECLARATION(Register, r12_heapbase, r12); // callee-saved
138
REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved
139
140
#else
141
// rscratch1 will apear in 32bit code that is dead but of course must compile
142
// Using noreg ensures if the dead code is incorrectly live and executed it
143
// will cause an assertion failure
144
#define rscratch1 noreg
145
#define rscratch2 noreg
146
147
#endif // _LP64
148
149
// JSR 292
150
// On x86, the SP does not have to be saved when invoking method handle intrinsics
151
// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
152
REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
153
154
// Address is an abstraction used to represent a memory location
155
// using any of the amd64 addressing modes with one object.
156
//
157
// Note: A register location is represented via a Register, not
158
// via an address for efficiency & simplicity reasons.
159
160
class ArrayAddress;
161
162
class Address {
163
public:
164
enum ScaleFactor {
165
no_scale = -1,
166
times_1 = 0,
167
times_2 = 1,
168
times_4 = 2,
169
times_8 = 3,
170
times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4)
171
};
172
static ScaleFactor times(int size) {
173
assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
174
if (size == 8) return times_8;
175
if (size == 4) return times_4;
176
if (size == 2) return times_2;
177
return times_1;
178
}
179
static int scale_size(ScaleFactor scale) {
180
assert(scale != no_scale, "");
181
assert(((1 << (int)times_1) == 1 &&
182
(1 << (int)times_2) == 2 &&
183
(1 << (int)times_4) == 4 &&
184
(1 << (int)times_8) == 8), "");
185
return (1 << (int)scale);
186
}
187
188
private:
189
Register _base;
190
Register _index;
191
XMMRegister _xmmindex;
192
ScaleFactor _scale;
193
int _disp;
194
bool _isxmmindex;
195
RelocationHolder _rspec;
196
197
// Easily misused constructors make them private
198
// %%% can we make these go away?
199
NOT_LP64(Address(address loc, RelocationHolder spec);)
200
Address(int disp, address loc, relocInfo::relocType rtype);
201
Address(int disp, address loc, RelocationHolder spec);
202
203
public:
204
205
int disp() { return _disp; }
206
// creation
207
Address()
208
: _base(noreg),
209
_index(noreg),
210
_xmmindex(xnoreg),
211
_scale(no_scale),
212
_disp(0),
213
_isxmmindex(false){
214
}
215
216
// No default displacement otherwise Register can be implicitly
217
// converted to 0(Register) which is quite a different animal.
218
219
Address(Register base, int disp)
220
: _base(base),
221
_index(noreg),
222
_xmmindex(xnoreg),
223
_scale(no_scale),
224
_disp(disp),
225
_isxmmindex(false){
226
}
227
228
Address(Register base, Register index, ScaleFactor scale, int disp = 0)
229
: _base (base),
230
_index(index),
231
_xmmindex(xnoreg),
232
_scale(scale),
233
_disp (disp),
234
_isxmmindex(false) {
235
assert(!index->is_valid() == (scale == Address::no_scale),
236
"inconsistent address");
237
}
238
239
Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
240
: _base (base),
241
_index(index.register_or_noreg()),
242
_xmmindex(xnoreg),
243
_scale(scale),
244
_disp (disp + (index.constant_or_zero() * scale_size(scale))),
245
_isxmmindex(false){
246
if (!index.is_register()) scale = Address::no_scale;
247
assert(!_index->is_valid() == (scale == Address::no_scale),
248
"inconsistent address");
249
}
250
251
Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
252
: _base (base),
253
_index(noreg),
254
_xmmindex(index),
255
_scale(scale),
256
_disp(disp),
257
_isxmmindex(true) {
258
assert(!index->is_valid() == (scale == Address::no_scale),
259
"inconsistent address");
260
}
261
262
// The following overloads are used in connection with the
263
// ByteSize type (see sizes.hpp). They simplify the use of
264
// ByteSize'd arguments in assembly code.
265
266
Address(Register base, ByteSize disp)
267
: Address(base, in_bytes(disp)) {}
268
269
Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
270
: Address(base, index, scale, in_bytes(disp)) {}
271
272
Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
273
: Address(base, index, scale, in_bytes(disp)) {}
274
275
Address plus_disp(int disp) const {
276
Address a = (*this);
277
a._disp += disp;
278
return a;
279
}
280
Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
281
Address a = (*this);
282
a._disp += disp.constant_or_zero() * scale_size(scale);
283
if (disp.is_register()) {
284
assert(!a.index()->is_valid(), "competing indexes");
285
a._index = disp.as_register();
286
a._scale = scale;
287
}
288
return a;
289
}
290
bool is_same_address(Address a) const {
291
// disregard _rspec
292
return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
293
}
294
295
// accessors
296
bool uses(Register reg) const { return _base == reg || _index == reg; }
297
Register base() const { return _base; }
298
Register index() const { return _index; }
299
XMMRegister xmmindex() const { return _xmmindex; }
300
ScaleFactor scale() const { return _scale; }
301
int disp() const { return _disp; }
302
bool isxmmindex() const { return _isxmmindex; }
303
304
// Convert the raw encoding form into the form expected by the constructor for
305
// Address. An index of 4 (rsp) corresponds to having no index, so convert
306
// that to noreg for the Address constructor.
307
static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
308
309
static Address make_array(ArrayAddress);
310
311
private:
312
bool base_needs_rex() const {
313
return _base->is_valid() && _base->encoding() >= 8;
314
}
315
316
bool index_needs_rex() const {
317
return _index->is_valid() &&_index->encoding() >= 8;
318
}
319
320
bool xmmindex_needs_rex() const {
321
return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
322
}
323
324
relocInfo::relocType reloc() const { return _rspec.type(); }
325
326
friend class Assembler;
327
friend class MacroAssembler;
328
friend class LIR_Assembler; // base/index/scale/disp
329
};
330
331
//
332
// AddressLiteral has been split out from Address because operands of this type
333
// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
334
// the few instructions that need to deal with address literals are unique and the
335
// MacroAssembler does not have to implement every instruction in the Assembler
336
// in order to search for address literals that may need special handling depending
337
// on the instruction and the platform. As small step on the way to merging i486/amd64
338
// directories.
339
//
340
class AddressLiteral {
341
friend class ArrayAddress;
342
RelocationHolder _rspec;
343
// Typically we use AddressLiterals we want to use their rval
344
// However in some situations we want the lval (effect address) of the item.
345
// We provide a special factory for making those lvals.
346
bool _is_lval;
347
348
// If the target is far we'll need to load the ea of this to
349
// a register to reach it. Otherwise if near we can do rip
350
// relative addressing.
351
352
address _target;
353
354
protected:
355
// creation
356
AddressLiteral()
357
: _is_lval(false),
358
_target(NULL)
359
{}
360
361
public:
362
363
364
AddressLiteral(address target, relocInfo::relocType rtype);
365
366
AddressLiteral(address target, RelocationHolder const& rspec)
367
: _rspec(rspec),
368
_is_lval(false),
369
_target(target)
370
{}
371
372
AddressLiteral addr() {
373
AddressLiteral ret = *this;
374
ret._is_lval = true;
375
return ret;
376
}
377
378
379
private:
380
381
address target() { return _target; }
382
bool is_lval() { return _is_lval; }
383
384
relocInfo::relocType reloc() const { return _rspec.type(); }
385
const RelocationHolder& rspec() const { return _rspec; }
386
387
friend class Assembler;
388
friend class MacroAssembler;
389
friend class Address;
390
friend class LIR_Assembler;
391
};
392
393
// Convience classes
394
class RuntimeAddress: public AddressLiteral {
395
396
public:
397
398
RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
399
400
};
401
402
class ExternalAddress: public AddressLiteral {
403
private:
404
static relocInfo::relocType reloc_for_target(address target) {
405
// Sometimes ExternalAddress is used for values which aren't
406
// exactly addresses, like the card table base.
407
// external_word_type can't be used for values in the first page
408
// so just skip the reloc in that case.
409
return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
410
}
411
412
public:
413
414
ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
415
416
};
417
418
class InternalAddress: public AddressLiteral {
419
420
public:
421
422
InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
423
424
};
425
426
// x86 can do array addressing as a single operation since disp can be an absolute
427
// address amd64 can't. We create a class that expresses the concept but does extra
428
// magic on amd64 to get the final result
429
430
class ArrayAddress {
431
private:
432
433
AddressLiteral _base;
434
Address _index;
435
436
public:
437
438
ArrayAddress() {};
439
ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
440
AddressLiteral base() { return _base; }
441
Address index() { return _index; }
442
443
};
444
445
class InstructionAttr;
446
447
// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
448
// See fxsave and xsave(EVEX enabled) documentation for layout
449
const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
450
451
// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
452
// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
453
// is what you get. The Assembler is generating code into a CodeBuffer.
454
455
class Assembler : public AbstractAssembler {
456
friend class AbstractAssembler; // for the non-virtual hack
457
friend class LIR_Assembler; // as_Address()
458
friend class StubGenerator;
459
460
public:
461
enum Condition { // The x86 condition codes used for conditional jumps/moves.
462
zero = 0x4,
463
notZero = 0x5,
464
equal = 0x4,
465
notEqual = 0x5,
466
less = 0xc,
467
lessEqual = 0xe,
468
greater = 0xf,
469
greaterEqual = 0xd,
470
below = 0x2,
471
belowEqual = 0x6,
472
above = 0x7,
473
aboveEqual = 0x3,
474
overflow = 0x0,
475
noOverflow = 0x1,
476
carrySet = 0x2,
477
carryClear = 0x3,
478
negative = 0x8,
479
positive = 0x9,
480
parity = 0xa,
481
noParity = 0xb
482
};
483
484
enum Prefix {
485
// segment overrides
486
CS_segment = 0x2e,
487
SS_segment = 0x36,
488
DS_segment = 0x3e,
489
ES_segment = 0x26,
490
FS_segment = 0x64,
491
GS_segment = 0x65,
492
493
REX = 0x40,
494
495
REX_B = 0x41,
496
REX_X = 0x42,
497
REX_XB = 0x43,
498
REX_R = 0x44,
499
REX_RB = 0x45,
500
REX_RX = 0x46,
501
REX_RXB = 0x47,
502
503
REX_W = 0x48,
504
505
REX_WB = 0x49,
506
REX_WX = 0x4A,
507
REX_WXB = 0x4B,
508
REX_WR = 0x4C,
509
REX_WRB = 0x4D,
510
REX_WRX = 0x4E,
511
REX_WRXB = 0x4F,
512
513
VEX_3bytes = 0xC4,
514
VEX_2bytes = 0xC5,
515
EVEX_4bytes = 0x62,
516
Prefix_EMPTY = 0x0
517
};
518
519
enum VexPrefix {
520
VEX_B = 0x20,
521
VEX_X = 0x40,
522
VEX_R = 0x80,
523
VEX_W = 0x80
524
};
525
526
enum ExexPrefix {
527
EVEX_F = 0x04,
528
EVEX_V = 0x08,
529
EVEX_Rb = 0x10,
530
EVEX_X = 0x40,
531
EVEX_Z = 0x80
532
};
533
534
enum VexSimdPrefix {
535
VEX_SIMD_NONE = 0x0,
536
VEX_SIMD_66 = 0x1,
537
VEX_SIMD_F3 = 0x2,
538
VEX_SIMD_F2 = 0x3
539
};
540
541
enum VexOpcode {
542
VEX_OPCODE_NONE = 0x0,
543
VEX_OPCODE_0F = 0x1,
544
VEX_OPCODE_0F_38 = 0x2,
545
VEX_OPCODE_0F_3A = 0x3,
546
VEX_OPCODE_MASK = 0x1F
547
};
548
549
enum AvxVectorLen {
550
AVX_128bit = 0x0,
551
AVX_256bit = 0x1,
552
AVX_512bit = 0x2,
553
AVX_NoVec = 0x4
554
};
555
556
enum EvexTupleType {
557
EVEX_FV = 0,
558
EVEX_HV = 4,
559
EVEX_FVM = 6,
560
EVEX_T1S = 7,
561
EVEX_T1F = 11,
562
EVEX_T2 = 13,
563
EVEX_T4 = 15,
564
EVEX_T8 = 17,
565
EVEX_HVM = 18,
566
EVEX_QVM = 19,
567
EVEX_OVM = 20,
568
EVEX_M128 = 21,
569
EVEX_DUP = 22,
570
EVEX_ETUP = 23
571
};
572
573
enum EvexInputSizeInBits {
574
EVEX_8bit = 0,
575
EVEX_16bit = 1,
576
EVEX_32bit = 2,
577
EVEX_64bit = 3,
578
EVEX_NObit = 4
579
};
580
581
enum WhichOperand {
582
// input to locate_operand, and format code for relocations
583
imm_operand = 0, // embedded 32-bit|64-bit immediate operand
584
disp32_operand = 1, // embedded 32-bit displacement or address
585
call32_operand = 2, // embedded 32-bit self-relative displacement
586
#ifndef _LP64
587
_WhichOperand_limit = 3
588
#else
589
narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
590
_WhichOperand_limit = 4
591
#endif
592
};
593
594
// Comparison predicates for integral types & FP types when using SSE
595
enum ComparisonPredicate {
596
eq = 0,
597
lt = 1,
598
le = 2,
599
_false = 3,
600
neq = 4,
601
nlt = 5,
602
nle = 6,
603
_true = 7
604
};
605
606
// Comparison predicates for FP types when using AVX
607
// O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
608
// S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
609
enum ComparisonPredicateFP {
610
EQ_OQ = 0,
611
LT_OS = 1,
612
LE_OS = 2,
613
UNORD_Q = 3,
614
NEQ_UQ = 4,
615
NLT_US = 5,
616
NLE_US = 6,
617
ORD_Q = 7,
618
EQ_UQ = 8,
619
NGE_US = 9,
620
NGT_US = 0xA,
621
FALSE_OQ = 0XB,
622
NEQ_OQ = 0xC,
623
GE_OS = 0xD,
624
GT_OS = 0xE,
625
TRUE_UQ = 0xF,
626
EQ_OS = 0x10,
627
LT_OQ = 0x11,
628
LE_OQ = 0x12,
629
UNORD_S = 0x13,
630
NEQ_US = 0x14,
631
NLT_UQ = 0x15,
632
NLE_UQ = 0x16,
633
ORD_S = 0x17,
634
EQ_US = 0x18,
635
NGE_UQ = 0x19,
636
NGT_UQ = 0x1A,
637
FALSE_OS = 0x1B,
638
NEQ_OS = 0x1C,
639
GE_OQ = 0x1D,
640
GT_OQ = 0x1E,
641
TRUE_US =0x1F
642
};
643
644
enum Width {
645
B = 0,
646
W = 1,
647
D = 2,
648
Q = 3
649
};
650
651
//---< calculate length of instruction >---
652
// As instruction size can't be found out easily on x86/x64,
653
// we just use '4' for len and maxlen.
654
// instruction must start at passed address
655
static unsigned int instr_len(unsigned char *instr) { return 4; }
656
657
//---< longest instructions >---
658
// Max instruction length is not specified in architecture documentation.
659
// We could use a "safe enough" estimate (15), but just default to
660
// instruction length guess from above.
661
static unsigned int instr_maxlen() { return 4; }
662
663
// NOTE: The general philopsophy of the declarations here is that 64bit versions
664
// of instructions are freely declared without the need for wrapping them an ifdef.
665
// (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
666
// In the .cpp file the implementations are wrapped so that they are dropped out
667
// of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
668
// to the size it was prior to merging up the 32bit and 64bit assemblers.
669
//
670
// This does mean you'll get a linker/runtime error if you use a 64bit only instruction
671
// in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
672
673
private:
674
675
bool _legacy_mode_bw;
676
bool _legacy_mode_dq;
677
bool _legacy_mode_vl;
678
bool _legacy_mode_vlbw;
679
NOT_LP64(bool _is_managed;)
680
681
class InstructionAttr *_attributes;
682
683
// 64bit prefixes
684
void prefix(Register reg);
685
void prefix(Register dst, Register src, Prefix p);
686
void prefix(Register dst, Address adr, Prefix p);
687
688
void prefix(Address adr);
689
void prefix(Address adr, Register reg, bool byteinst = false);
690
void prefix(Address adr, XMMRegister reg);
691
692
int prefix_and_encode(int reg_enc, bool byteinst = false);
693
int prefix_and_encode(int dst_enc, int src_enc) {
694
return prefix_and_encode(dst_enc, false, src_enc, false);
695
}
696
int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
697
698
// Some prefixq variants always emit exactly one prefix byte, so besides a
699
// prefix-emitting method we provide a method to get the prefix byte to emit,
700
// which can then be folded into a byte stream.
701
int8_t get_prefixq(Address adr);
702
int8_t get_prefixq(Address adr, Register reg);
703
704
void prefixq(Address adr);
705
void prefixq(Address adr, Register reg);
706
void prefixq(Address adr, XMMRegister reg);
707
708
int prefixq_and_encode(int reg_enc);
709
int prefixq_and_encode(int dst_enc, int src_enc);
710
711
void rex_prefix(Address adr, XMMRegister xreg,
712
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
713
int rex_prefix_and_encode(int dst_enc, int src_enc,
714
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
715
716
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
717
718
void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
719
int nds_enc, VexSimdPrefix pre, VexOpcode opc);
720
721
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
722
VexSimdPrefix pre, VexOpcode opc,
723
InstructionAttr *attributes);
724
725
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
726
VexSimdPrefix pre, VexOpcode opc,
727
InstructionAttr *attributes);
728
729
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
730
VexOpcode opc, InstructionAttr *attributes);
731
732
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
733
VexOpcode opc, InstructionAttr *attributes);
734
735
// Helper functions for groups of instructions
736
void emit_arith_b(int op1, int op2, Register dst, int imm8);
737
738
void emit_arith(int op1, int op2, Register dst, int32_t imm32);
739
// Force generation of a 4 byte immediate value even if it fits into 8bit
740
void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
741
void emit_arith(int op1, int op2, Register dst, Register src);
742
743
bool emit_compressed_disp_byte(int &disp);
744
745
void emit_modrm(int mod, int dst_enc, int src_enc);
746
void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
747
int disp);
748
void emit_modrm_sib(int mod, int dst_enc, int src_enc,
749
Address::ScaleFactor scale, int index_enc, int base_enc);
750
void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
751
Address::ScaleFactor scale, int index_enc, int base_enc,
752
int disp);
753
754
void emit_operand_helper(int reg_enc,
755
int base_enc, int index_enc, Address::ScaleFactor scale,
756
int disp,
757
RelocationHolder const& rspec,
758
int rip_relative_correction = 0);
759
760
void emit_operand(Register reg,
761
Register base, Register index, Address::ScaleFactor scale,
762
int disp,
763
RelocationHolder const& rspec,
764
int rip_relative_correction = 0);
765
766
void emit_operand(Register reg,
767
Register base, XMMRegister index, Address::ScaleFactor scale,
768
int disp,
769
RelocationHolder const& rspec);
770
771
void emit_operand(XMMRegister xreg,
772
Register base, XMMRegister xindex, Address::ScaleFactor scale,
773
int disp,
774
RelocationHolder const& rspec);
775
776
void emit_operand(Register reg, Address adr,
777
int rip_relative_correction = 0);
778
779
void emit_operand(XMMRegister reg,
780
Register base, Register index, Address::ScaleFactor scale,
781
int disp,
782
RelocationHolder const& rspec);
783
784
void emit_operand(XMMRegister reg, Address adr);
785
786
// Immediate-to-memory forms
787
void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
788
789
protected:
790
#ifdef ASSERT
791
void check_relocation(RelocationHolder const& rspec, int format);
792
#endif
793
794
void emit_data(jint data, relocInfo::relocType rtype, int format);
795
void emit_data(jint data, RelocationHolder const& rspec, int format);
796
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
797
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
798
799
bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
800
801
// These are all easily abused and hence protected
802
803
// 32BIT ONLY SECTION
804
#ifndef _LP64
805
// Make these disappear in 64bit mode since they would never be correct
806
void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
807
void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
808
809
void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
810
void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
811
812
void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
813
#else
814
// 64BIT ONLY SECTION
815
void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
816
817
void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
818
void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
819
820
void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
821
void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
822
#endif // _LP64
823
824
// These are unique in that we are ensured by the caller that the 32bit
825
// relative in these instructions will always be able to reach the potentially
826
// 64bit address described by entry. Since they can take a 64bit address they
827
// don't have the 32 suffix like the other instructions in this class.
828
829
void call_literal(address entry, RelocationHolder const& rspec);
830
void jmp_literal(address entry, RelocationHolder const& rspec);
831
832
// Avoid using directly section
833
// Instructions in this section are actually usable by anyone without danger
834
// of failure but have performance issues that are addressed my enhanced
835
// instructions which will do the proper thing base on the particular cpu.
836
// We protect them because we don't trust you...
837
838
// Don't use next inc() and dec() methods directly. INC & DEC instructions
839
// could cause a partial flag stall since they don't set CF flag.
840
// Use MacroAssembler::decrement() & MacroAssembler::increment() methods
841
// which call inc() & dec() or add() & sub() in accordance with
842
// the product flag UseIncDec value.
843
844
void decl(Register dst);
845
void decl(Address dst);
846
void decq(Address dst);
847
848
void incl(Register dst);
849
void incl(Address dst);
850
void incq(Register dst);
851
void incq(Address dst);
852
853
// New cpus require use of movsd and movss to avoid partial register stall
854
// when loading from memory. But for old Opteron use movlpd instead of movsd.
855
// The selection is done in MacroAssembler::movdbl() and movflt().
856
857
// Move Scalar Single-Precision Floating-Point Values
858
void movss(XMMRegister dst, Address src);
859
void movss(XMMRegister dst, XMMRegister src);
860
void movss(Address dst, XMMRegister src);
861
862
// Move Scalar Double-Precision Floating-Point Values
863
void movsd(XMMRegister dst, Address src);
864
void movsd(XMMRegister dst, XMMRegister src);
865
void movsd(Address dst, XMMRegister src);
866
void movlpd(XMMRegister dst, Address src);
867
868
// New cpus require use of movaps and movapd to avoid partial register stall
869
// when moving between registers.
870
void movaps(XMMRegister dst, XMMRegister src);
871
void movapd(XMMRegister dst, XMMRegister src);
872
873
// End avoid using directly
874
875
876
// Instruction prefixes
877
void prefix(Prefix p);
878
879
public:
880
881
// Creation
882
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
883
init_attributes();
884
}
885
886
// Decoding
887
static address locate_operand(address inst, WhichOperand which);
888
static address locate_next_instruction(address inst);
889
890
// Utilities
891
static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
892
int cur_tuple_type, int in_size_in_bits, int cur_encoding);
893
894
// Generic instructions
895
// Does 32bit or 64bit as needed for the platform. In some sense these
896
// belong in macro assembler but there is no need for both varieties to exist
897
898
void init_attributes(void);
899
900
void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
901
void clear_attributes(void) { _attributes = NULL; }
902
903
void set_managed(void) { NOT_LP64(_is_managed = true;) }
904
void clear_managed(void) { NOT_LP64(_is_managed = false;) }
905
bool is_managed(void) {
906
NOT_LP64(return _is_managed;)
907
LP64_ONLY(return false;) }
908
909
void lea(Register dst, Address src);
910
911
void mov(Register dst, Register src);
912
913
#ifdef _LP64
914
// support caching the result of some routines
915
916
// must be called before pusha(), popa(), vzeroupper() - checked with asserts
917
static void precompute_instructions();
918
919
void pusha_uncached();
920
void popa_uncached();
921
#endif
922
void vzeroupper_uncached();
923
void decq(Register dst);
924
925
void pusha();
926
void popa();
927
928
void pushf();
929
void popf();
930
931
void push(int32_t imm32);
932
933
void push(Register src);
934
935
void pop(Register dst);
936
937
// These are dummies to prevent surprise implicit conversions to Register
938
void push(void* v);
939
void pop(void* v);
940
941
// These do register sized moves/scans
942
void rep_mov();
943
void rep_stos();
944
void rep_stosb();
945
void repne_scan();
946
#ifdef _LP64
947
void repne_scanl();
948
#endif
949
950
// Vanilla instructions in lexical order
951
952
void adcl(Address dst, int32_t imm32);
953
void adcl(Address dst, Register src);
954
void adcl(Register dst, int32_t imm32);
955
void adcl(Register dst, Address src);
956
void adcl(Register dst, Register src);
957
958
void adcq(Register dst, int32_t imm32);
959
void adcq(Register dst, Address src);
960
void adcq(Register dst, Register src);
961
962
void addb(Address dst, int imm8);
963
void addw(Register dst, Register src);
964
void addw(Address dst, int imm16);
965
966
void addl(Address dst, int32_t imm32);
967
void addl(Address dst, Register src);
968
void addl(Register dst, int32_t imm32);
969
void addl(Register dst, Address src);
970
void addl(Register dst, Register src);
971
972
void addq(Address dst, int32_t imm32);
973
void addq(Address dst, Register src);
974
void addq(Register dst, int32_t imm32);
975
void addq(Register dst, Address src);
976
void addq(Register dst, Register src);
977
978
#ifdef _LP64
979
//Add Unsigned Integers with Carry Flag
980
void adcxq(Register dst, Register src);
981
982
//Add Unsigned Integers with Overflow Flag
983
void adoxq(Register dst, Register src);
984
#endif
985
986
void addr_nop_4();
987
void addr_nop_5();
988
void addr_nop_7();
989
void addr_nop_8();
990
991
// Add Scalar Double-Precision Floating-Point Values
992
void addsd(XMMRegister dst, Address src);
993
void addsd(XMMRegister dst, XMMRegister src);
994
995
// Add Scalar Single-Precision Floating-Point Values
996
void addss(XMMRegister dst, Address src);
997
void addss(XMMRegister dst, XMMRegister src);
998
999
// AES instructions
1000
void aesdec(XMMRegister dst, Address src);
1001
void aesdec(XMMRegister dst, XMMRegister src);
1002
void aesdeclast(XMMRegister dst, Address src);
1003
void aesdeclast(XMMRegister dst, XMMRegister src);
1004
void aesenc(XMMRegister dst, Address src);
1005
void aesenc(XMMRegister dst, XMMRegister src);
1006
void aesenclast(XMMRegister dst, Address src);
1007
void aesenclast(XMMRegister dst, XMMRegister src);
1008
// Vector AES instructions
1009
void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1010
void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1011
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1012
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1013
1014
void andw(Register dst, Register src);
1015
void andb(Address dst, Register src);
1016
1017
void andl(Address dst, int32_t imm32);
1018
void andl(Register dst, int32_t imm32);
1019
void andl(Register dst, Address src);
1020
void andl(Register dst, Register src);
1021
void andl(Address dst, Register src);
1022
1023
void andq(Address dst, int32_t imm32);
1024
void andq(Register dst, int32_t imm32);
1025
void andq(Register dst, Address src);
1026
void andq(Register dst, Register src);
1027
void andq(Address dst, Register src);
1028
1029
// BMI instructions
1030
void andnl(Register dst, Register src1, Register src2);
1031
void andnl(Register dst, Register src1, Address src2);
1032
void andnq(Register dst, Register src1, Register src2);
1033
void andnq(Register dst, Register src1, Address src2);
1034
1035
void blsil(Register dst, Register src);
1036
void blsil(Register dst, Address src);
1037
void blsiq(Register dst, Register src);
1038
void blsiq(Register dst, Address src);
1039
1040
void blsmskl(Register dst, Register src);
1041
void blsmskl(Register dst, Address src);
1042
void blsmskq(Register dst, Register src);
1043
void blsmskq(Register dst, Address src);
1044
1045
void blsrl(Register dst, Register src);
1046
void blsrl(Register dst, Address src);
1047
void blsrq(Register dst, Register src);
1048
void blsrq(Register dst, Address src);
1049
1050
void bsfl(Register dst, Register src);
1051
void bsrl(Register dst, Register src);
1052
1053
#ifdef _LP64
1054
void bsfq(Register dst, Register src);
1055
void bsrq(Register dst, Register src);
1056
#endif
1057
1058
void bswapl(Register reg);
1059
1060
void bswapq(Register reg);
1061
1062
void call(Label& L, relocInfo::relocType rtype);
1063
void call(Register reg); // push pc; pc <- reg
1064
void call(Address adr); // push pc; pc <- adr
1065
1066
void cdql();
1067
1068
void cdqq();
1069
1070
void cld();
1071
1072
void clflush(Address adr);
1073
void clflushopt(Address adr);
1074
void clwb(Address adr);
1075
1076
void cmovl(Condition cc, Register dst, Register src);
1077
void cmovl(Condition cc, Register dst, Address src);
1078
1079
void cmovq(Condition cc, Register dst, Register src);
1080
void cmovq(Condition cc, Register dst, Address src);
1081
1082
1083
void cmpb(Address dst, int imm8);
1084
1085
void cmpl(Address dst, int32_t imm32);
1086
1087
void cmp(Register dst, int32_t imm32);
1088
void cmpl(Register dst, int32_t imm32);
1089
void cmpl(Register dst, Register src);
1090
void cmpl(Register dst, Address src);
1091
1092
void cmpq(Address dst, int32_t imm32);
1093
void cmpq(Address dst, Register src);
1094
1095
void cmpq(Register dst, int32_t imm32);
1096
void cmpq(Register dst, Register src);
1097
void cmpq(Register dst, Address src);
1098
1099
// these are dummies used to catch attempting to convert NULL to Register
1100
void cmpl(Register dst, void* junk); // dummy
1101
void cmpq(Register dst, void* junk); // dummy
1102
1103
void cmpw(Address dst, int imm16);
1104
1105
void cmpxchg8 (Address adr);
1106
1107
void cmpxchgb(Register reg, Address adr);
1108
void cmpxchgl(Register reg, Address adr);
1109
1110
void cmpxchgq(Register reg, Address adr);
1111
void cmpxchgw(Register reg, Address adr);
1112
1113
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1114
void comisd(XMMRegister dst, Address src);
1115
void comisd(XMMRegister dst, XMMRegister src);
1116
1117
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1118
void comiss(XMMRegister dst, Address src);
1119
void comiss(XMMRegister dst, XMMRegister src);
1120
1121
// Identify processor type and features
1122
void cpuid();
1123
1124
// CRC32C
1125
void crc32(Register crc, Register v, int8_t sizeInBytes);
1126
void crc32(Register crc, Address adr, int8_t sizeInBytes);
1127
1128
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1129
void cvtsd2ss(XMMRegister dst, XMMRegister src);
1130
void cvtsd2ss(XMMRegister dst, Address src);
1131
1132
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1133
void cvtsi2sdl(XMMRegister dst, Register src);
1134
void cvtsi2sdl(XMMRegister dst, Address src);
1135
void cvtsi2sdq(XMMRegister dst, Register src);
1136
void cvtsi2sdq(XMMRegister dst, Address src);
1137
1138
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1139
void cvtsi2ssl(XMMRegister dst, Register src);
1140
void cvtsi2ssl(XMMRegister dst, Address src);
1141
void cvtsi2ssq(XMMRegister dst, Register src);
1142
void cvtsi2ssq(XMMRegister dst, Address src);
1143
1144
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1145
void cvtdq2pd(XMMRegister dst, XMMRegister src);
1146
void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1147
1148
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1149
void cvtdq2ps(XMMRegister dst, XMMRegister src);
1150
void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1151
1152
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1153
void cvtss2sd(XMMRegister dst, XMMRegister src);
1154
void cvtss2sd(XMMRegister dst, Address src);
1155
1156
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1157
void cvttsd2sil(Register dst, Address src);
1158
void cvttsd2sil(Register dst, XMMRegister src);
1159
void cvttsd2siq(Register dst, Address src);
1160
void cvttsd2siq(Register dst, XMMRegister src);
1161
1162
// Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1163
void cvttss2sil(Register dst, XMMRegister src);
1164
void cvttss2siq(Register dst, XMMRegister src);
1165
1166
// Convert vector double to int
1167
void cvttpd2dq(XMMRegister dst, XMMRegister src);
1168
1169
// Convert vector float and double
1170
void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1171
void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1172
1173
// Convert vector long to vector FP
1174
void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1175
void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1176
1177
// Evex casts with truncation
1178
void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1179
void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1180
void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1181
void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1182
void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1183
void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1184
1185
//Abs of packed Integer values
1186
void pabsb(XMMRegister dst, XMMRegister src);
1187
void pabsw(XMMRegister dst, XMMRegister src);
1188
void pabsd(XMMRegister dst, XMMRegister src);
1189
void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1190
void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1191
void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1192
void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1193
1194
// Divide Scalar Double-Precision Floating-Point Values
1195
void divsd(XMMRegister dst, Address src);
1196
void divsd(XMMRegister dst, XMMRegister src);
1197
1198
// Divide Scalar Single-Precision Floating-Point Values
1199
void divss(XMMRegister dst, Address src);
1200
void divss(XMMRegister dst, XMMRegister src);
1201
1202
1203
#ifndef _LP64
1204
private:
1205
1206
void emit_farith(int b1, int b2, int i);
1207
1208
public:
1209
void emms();
1210
1211
void fabs();
1212
1213
void fadd(int i);
1214
1215
void fadd_d(Address src);
1216
void fadd_s(Address src);
1217
1218
// "Alternate" versions of x87 instructions place result down in FPU
1219
// stack instead of on TOS
1220
1221
void fadda(int i); // "alternate" fadd
1222
void faddp(int i = 1);
1223
1224
void fchs();
1225
1226
void fcom(int i);
1227
1228
void fcomp(int i = 1);
1229
void fcomp_d(Address src);
1230
void fcomp_s(Address src);
1231
1232
void fcompp();
1233
1234
void fcos();
1235
1236
void fdecstp();
1237
1238
void fdiv(int i);
1239
void fdiv_d(Address src);
1240
void fdivr_s(Address src);
1241
void fdiva(int i); // "alternate" fdiv
1242
void fdivp(int i = 1);
1243
1244
void fdivr(int i);
1245
void fdivr_d(Address src);
1246
void fdiv_s(Address src);
1247
1248
void fdivra(int i); // "alternate" reversed fdiv
1249
1250
void fdivrp(int i = 1);
1251
1252
void ffree(int i = 0);
1253
1254
void fild_d(Address adr);
1255
void fild_s(Address adr);
1256
1257
void fincstp();
1258
1259
void finit();
1260
1261
void fist_s (Address adr);
1262
void fistp_d(Address adr);
1263
void fistp_s(Address adr);
1264
1265
void fld1();
1266
1267
void fld_d(Address adr);
1268
void fld_s(Address adr);
1269
void fld_s(int index);
1270
1271
void fldcw(Address src);
1272
1273
void fldenv(Address src);
1274
1275
void fldlg2();
1276
1277
void fldln2();
1278
1279
void fldz();
1280
1281
void flog();
1282
void flog10();
1283
1284
void fmul(int i);
1285
1286
void fmul_d(Address src);
1287
void fmul_s(Address src);
1288
1289
void fmula(int i); // "alternate" fmul
1290
1291
void fmulp(int i = 1);
1292
1293
void fnsave(Address dst);
1294
1295
void fnstcw(Address src);
1296
1297
void fnstsw_ax();
1298
1299
void fprem();
1300
void fprem1();
1301
1302
void frstor(Address src);
1303
1304
void fsin();
1305
1306
void fsqrt();
1307
1308
void fst_d(Address adr);
1309
void fst_s(Address adr);
1310
1311
void fstp_d(Address adr);
1312
void fstp_d(int index);
1313
void fstp_s(Address adr);
1314
1315
void fsub(int i);
1316
void fsub_d(Address src);
1317
void fsub_s(Address src);
1318
1319
void fsuba(int i); // "alternate" fsub
1320
1321
void fsubp(int i = 1);
1322
1323
void fsubr(int i);
1324
void fsubr_d(Address src);
1325
void fsubr_s(Address src);
1326
1327
void fsubra(int i); // "alternate" reversed fsub
1328
1329
void fsubrp(int i = 1);
1330
1331
void ftan();
1332
1333
void ftst();
1334
1335
void fucomi(int i = 1);
1336
void fucomip(int i = 1);
1337
1338
void fwait();
1339
1340
void fxch(int i = 1);
1341
1342
void fyl2x();
1343
void frndint();
1344
void f2xm1();
1345
void fldl2e();
1346
#endif // !_LP64
1347
1348
// operands that only take the original 32bit registers
1349
void emit_operand32(Register reg, Address adr);
1350
1351
void fld_x(Address adr); // extended-precision (80-bit) format
1352
void fstp_x(Address adr); // extended-precision (80-bit) format
1353
void fxrstor(Address src);
1354
void xrstor(Address src);
1355
1356
void fxsave(Address dst);
1357
void xsave(Address dst);
1358
1359
void hlt();
1360
1361
void idivl(Register src);
1362
void divl(Register src); // Unsigned division
1363
1364
#ifdef _LP64
1365
void idivq(Register src);
1366
#endif
1367
1368
void imull(Register src);
1369
void imull(Register dst, Register src);
1370
void imull(Register dst, Register src, int value);
1371
void imull(Register dst, Address src, int value);
1372
void imull(Register dst, Address src);
1373
1374
#ifdef _LP64
1375
void imulq(Register dst, Register src);
1376
void imulq(Register dst, Register src, int value);
1377
void imulq(Register dst, Address src, int value);
1378
void imulq(Register dst, Address src);
1379
void imulq(Register dst);
1380
#endif
1381
1382
// jcc is the generic conditional branch generator to run-
1383
// time routines, jcc is used for branches to labels. jcc
1384
// takes a branch opcode (cc) and a label (L) and generates
1385
// either a backward branch or a forward branch and links it
1386
// to the label fixup chain. Usage:
1387
//
1388
// Label L; // unbound label
1389
// jcc(cc, L); // forward branch to unbound label
1390
// bind(L); // bind label to the current pc
1391
// jcc(cc, L); // backward branch to bound label
1392
// bind(L); // illegal: a label may be bound only once
1393
//
1394
// Note: The same Label can be used for forward and backward branches
1395
// but it may be bound only once.
1396
1397
void jcc(Condition cc, Label& L, bool maybe_short = true);
1398
1399
// Conditional jump to a 8-bit offset to L.
1400
// WARNING: be very careful using this for forward jumps. If the label is
1401
// not bound within an 8-bit offset of this instruction, a run-time error
1402
// will occur.
1403
1404
// Use macro to record file and line number.
1405
#define jccb(cc, L) jccb_0(cc, L, __FILE__, __LINE__)
1406
1407
void jccb_0(Condition cc, Label& L, const char* file, int line);
1408
1409
void jmp(Address entry); // pc <- entry
1410
1411
// Label operations & relative jumps (PPUM Appendix D)
1412
void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
1413
1414
void jmp(Register entry); // pc <- entry
1415
1416
// Unconditional 8-bit offset jump to L.
1417
// WARNING: be very careful using this for forward jumps. If the label is
1418
// not bound within an 8-bit offset of this instruction, a run-time error
1419
// will occur.
1420
1421
// Use macro to record file and line number.
1422
#define jmpb(L) jmpb_0(L, __FILE__, __LINE__)
1423
1424
void jmpb_0(Label& L, const char* file, int line);
1425
1426
void ldmxcsr( Address src );
1427
1428
void leal(Register dst, Address src);
1429
1430
void leaq(Register dst, Address src);
1431
1432
void lfence();
1433
1434
void lock();
1435
void size_prefix();
1436
1437
void lzcntl(Register dst, Register src);
1438
1439
#ifdef _LP64
1440
void lzcntq(Register dst, Register src);
1441
#endif
1442
1443
enum Membar_mask_bits {
1444
StoreStore = 1 << 3,
1445
LoadStore = 1 << 2,
1446
StoreLoad = 1 << 1,
1447
LoadLoad = 1 << 0
1448
};
1449
1450
// Serializes memory and blows flags
1451
void membar(Membar_mask_bits order_constraint);
1452
1453
void mfence();
1454
void sfence();
1455
1456
// Moves
1457
1458
void mov64(Register dst, int64_t imm64);
1459
void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
1460
1461
void movb(Address dst, Register src);
1462
void movb(Address dst, int imm8);
1463
void movb(Register dst, Address src);
1464
1465
void movddup(XMMRegister dst, XMMRegister src);
1466
1467
void kmovbl(KRegister dst, Register src);
1468
void kmovbl(Register dst, KRegister src);
1469
void kmovwl(KRegister dst, Register src);
1470
void kmovwl(KRegister dst, Address src);
1471
void kmovwl(Register dst, KRegister src);
1472
void kmovwl(Address dst, KRegister src);
1473
void kmovwl(KRegister dst, KRegister src);
1474
void kmovdl(KRegister dst, Register src);
1475
void kmovdl(Register dst, KRegister src);
1476
void kmovql(KRegister dst, KRegister src);
1477
void kmovql(Address dst, KRegister src);
1478
void kmovql(KRegister dst, Address src);
1479
void kmovql(KRegister dst, Register src);
1480
void kmovql(Register dst, KRegister src);
1481
1482
void knotwl(KRegister dst, KRegister src);
1483
1484
void kortestbl(KRegister dst, KRegister src);
1485
void kortestwl(KRegister dst, KRegister src);
1486
void kortestdl(KRegister dst, KRegister src);
1487
void kortestql(KRegister dst, KRegister src);
1488
1489
void ktestq(KRegister src1, KRegister src2);
1490
void ktestd(KRegister src1, KRegister src2);
1491
1492
void ktestql(KRegister dst, KRegister src);
1493
1494
void movdl(XMMRegister dst, Register src);
1495
void movdl(Register dst, XMMRegister src);
1496
void movdl(XMMRegister dst, Address src);
1497
void movdl(Address dst, XMMRegister src);
1498
1499
// Move Double Quadword
1500
void movdq(XMMRegister dst, Register src);
1501
void movdq(Register dst, XMMRegister src);
1502
1503
// Move Aligned Double Quadword
1504
void movdqa(XMMRegister dst, XMMRegister src);
1505
void movdqa(XMMRegister dst, Address src);
1506
1507
// Move Unaligned Double Quadword
1508
void movdqu(Address dst, XMMRegister src);
1509
void movdqu(XMMRegister dst, Address src);
1510
void movdqu(XMMRegister dst, XMMRegister src);
1511
1512
// Move Unaligned 256bit Vector
1513
void vmovdqu(Address dst, XMMRegister src);
1514
void vmovdqu(XMMRegister dst, Address src);
1515
void vmovdqu(XMMRegister dst, XMMRegister src);
1516
1517
// Move Unaligned 512bit Vector
1518
void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
1519
void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
1520
void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1521
void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1522
void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1523
void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
1524
void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1525
void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
1526
void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1527
void evmovdqul(Address dst, XMMRegister src, int vector_len);
1528
void evmovdqul(XMMRegister dst, Address src, int vector_len);
1529
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1530
void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1531
void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1532
void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1533
void evmovdquq(Address dst, XMMRegister src, int vector_len);
1534
void evmovdquq(XMMRegister dst, Address src, int vector_len);
1535
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1536
void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1537
void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1538
void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1539
1540
// Move lower 64bit to high 64bit in 128bit register
1541
void movlhps(XMMRegister dst, XMMRegister src);
1542
1543
void movl(Register dst, int32_t imm32);
1544
void movl(Address dst, int32_t imm32);
1545
void movl(Register dst, Register src);
1546
void movl(Register dst, Address src);
1547
void movl(Address dst, Register src);
1548
1549
// These dummies prevent using movl from converting a zero (like NULL) into Register
1550
// by giving the compiler two choices it can't resolve
1551
1552
void movl(Address dst, void* junk);
1553
void movl(Register dst, void* junk);
1554
1555
#ifdef _LP64
1556
void movq(Register dst, Register src);
1557
void movq(Register dst, Address src);
1558
void movq(Address dst, Register src);
1559
void movq(Address dst, int32_t imm32);
1560
void movq(Register dst, int32_t imm32);
1561
1562
// These dummies prevent using movq from converting a zero (like NULL) into Register
1563
// by giving the compiler two choices it can't resolve
1564
1565
void movq(Address dst, void* dummy);
1566
void movq(Register dst, void* dummy);
1567
#endif
1568
1569
// Move Quadword
1570
void movq(Address dst, XMMRegister src);
1571
void movq(XMMRegister dst, Address src);
1572
void movq(XMMRegister dst, XMMRegister src);
1573
void movq(Register dst, XMMRegister src);
1574
void movq(XMMRegister dst, Register src);
1575
1576
void movsbl(Register dst, Address src);
1577
void movsbl(Register dst, Register src);
1578
1579
#ifdef _LP64
1580
void movsbq(Register dst, Address src);
1581
void movsbq(Register dst, Register src);
1582
1583
// Move signed 32bit immediate to 64bit extending sign
1584
void movslq(Address dst, int32_t imm64);
1585
void movslq(Register dst, int32_t imm64);
1586
1587
void movslq(Register dst, Address src);
1588
void movslq(Register dst, Register src);
1589
void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1590
#endif
1591
1592
void movswl(Register dst, Address src);
1593
void movswl(Register dst, Register src);
1594
1595
#ifdef _LP64
1596
void movswq(Register dst, Address src);
1597
void movswq(Register dst, Register src);
1598
#endif
1599
1600
void movw(Address dst, int imm16);
1601
void movw(Register dst, Address src);
1602
void movw(Address dst, Register src);
1603
1604
void movzbl(Register dst, Address src);
1605
void movzbl(Register dst, Register src);
1606
1607
#ifdef _LP64
1608
void movzbq(Register dst, Address src);
1609
void movzbq(Register dst, Register src);
1610
#endif
1611
1612
void movzwl(Register dst, Address src);
1613
void movzwl(Register dst, Register src);
1614
1615
#ifdef _LP64
1616
void movzwq(Register dst, Address src);
1617
void movzwq(Register dst, Register src);
1618
#endif
1619
1620
// Unsigned multiply with RAX destination register
1621
void mull(Address src);
1622
void mull(Register src);
1623
1624
#ifdef _LP64
1625
void mulq(Address src);
1626
void mulq(Register src);
1627
void mulxq(Register dst1, Register dst2, Register src);
1628
#endif
1629
1630
// Multiply Scalar Double-Precision Floating-Point Values
1631
void mulsd(XMMRegister dst, Address src);
1632
void mulsd(XMMRegister dst, XMMRegister src);
1633
1634
// Multiply Scalar Single-Precision Floating-Point Values
1635
void mulss(XMMRegister dst, Address src);
1636
void mulss(XMMRegister dst, XMMRegister src);
1637
1638
void negl(Register dst);
1639
void negl(Address dst);
1640
1641
#ifdef _LP64
1642
void negq(Register dst);
1643
void negq(Address dst);
1644
#endif
1645
1646
void nop(int i = 1);
1647
1648
void notl(Register dst);
1649
1650
#ifdef _LP64
1651
void notq(Register dst);
1652
1653
void btsq(Address dst, int imm8);
1654
void btrq(Address dst, int imm8);
1655
#endif
1656
1657
void orw(Register dst, Register src);
1658
1659
void orl(Address dst, int32_t imm32);
1660
void orl(Register dst, int32_t imm32);
1661
void orl(Register dst, Address src);
1662
void orl(Register dst, Register src);
1663
void orl(Address dst, Register src);
1664
1665
void orb(Address dst, int imm8);
1666
void orb(Address dst, Register src);
1667
1668
void orq(Address dst, int32_t imm32);
1669
void orq(Address dst, Register src);
1670
void orq(Register dst, int32_t imm32);
1671
void orq(Register dst, Address src);
1672
void orq(Register dst, Register src);
1673
1674
// Pack with signed saturation
1675
void packsswb(XMMRegister dst, XMMRegister src);
1676
void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1677
void packssdw(XMMRegister dst, XMMRegister src);
1678
void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1679
1680
// Pack with unsigned saturation
1681
void packuswb(XMMRegister dst, XMMRegister src);
1682
void packuswb(XMMRegister dst, Address src);
1683
void packusdw(XMMRegister dst, XMMRegister src);
1684
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1685
void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1686
1687
// Permutations
1688
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1689
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1690
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1691
void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1692
void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1693
void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1694
void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1695
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1696
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1697
void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1698
void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1699
void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1700
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1701
1702
void pause();
1703
1704
// Undefined Instruction
1705
void ud2();
1706
1707
// SSE4.2 string instructions
1708
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1709
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1710
1711
void pcmpeqb(XMMRegister dst, XMMRegister src);
1712
void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1713
1714
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1715
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1716
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1717
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1718
1719
void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1720
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1721
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1722
1723
void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1724
void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1725
1726
void pcmpeqw(XMMRegister dst, XMMRegister src);
1727
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1728
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1729
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1730
1731
void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1732
1733
void pcmpeqd(XMMRegister dst, XMMRegister src);
1734
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1735
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1736
void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1737
1738
void pcmpeqq(XMMRegister dst, XMMRegister src);
1739
void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1740
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1741
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1742
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1743
1744
void pcmpgtq(XMMRegister dst, XMMRegister src);
1745
void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1746
1747
void pmovmskb(Register dst, XMMRegister src);
1748
void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
1749
1750
// SSE 4.1 extract
1751
void pextrd(Register dst, XMMRegister src, int imm8);
1752
void pextrq(Register dst, XMMRegister src, int imm8);
1753
void pextrd(Address dst, XMMRegister src, int imm8);
1754
void pextrq(Address dst, XMMRegister src, int imm8);
1755
void pextrb(Register dst, XMMRegister src, int imm8);
1756
void pextrb(Address dst, XMMRegister src, int imm8);
1757
// SSE 2 extract
1758
void pextrw(Register dst, XMMRegister src, int imm8);
1759
void pextrw(Address dst, XMMRegister src, int imm8);
1760
1761
// SSE 4.1 insert
1762
void pinsrd(XMMRegister dst, Register src, int imm8);
1763
void pinsrq(XMMRegister dst, Register src, int imm8);
1764
void pinsrb(XMMRegister dst, Register src, int imm8);
1765
void pinsrd(XMMRegister dst, Address src, int imm8);
1766
void pinsrq(XMMRegister dst, Address src, int imm8);
1767
void pinsrb(XMMRegister dst, Address src, int imm8);
1768
void insertps(XMMRegister dst, XMMRegister src, int imm8);
1769
// SSE 2 insert
1770
void pinsrw(XMMRegister dst, Register src, int imm8);
1771
void pinsrw(XMMRegister dst, Address src, int imm8);
1772
1773
// AVX insert
1774
void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1775
void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1776
void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1777
void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1778
void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1779
1780
// Zero extend moves
1781
void pmovzxbw(XMMRegister dst, XMMRegister src);
1782
void pmovzxbw(XMMRegister dst, Address src);
1783
void pmovzxbd(XMMRegister dst, XMMRegister src);
1784
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1785
void pmovzxdq(XMMRegister dst, XMMRegister src);
1786
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1787
void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1788
void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1789
void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1790
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1791
1792
// Sign extend moves
1793
void pmovsxbd(XMMRegister dst, XMMRegister src);
1794
void pmovsxbq(XMMRegister dst, XMMRegister src);
1795
void pmovsxbw(XMMRegister dst, XMMRegister src);
1796
void pmovsxwd(XMMRegister dst, XMMRegister src);
1797
void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1798
void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1799
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1800
void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1801
void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1802
void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1803
1804
void evpmovwb(Address dst, XMMRegister src, int vector_len);
1805
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1806
1807
void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1808
1809
void evpmovdb(Address dst, XMMRegister src, int vector_len);
1810
1811
// Multiply add
1812
void pmaddwd(XMMRegister dst, XMMRegister src);
1813
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1814
// Multiply add accumulate
1815
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1816
1817
#ifndef _LP64 // no 32bit push/pop on amd64
1818
void popl(Address dst);
1819
#endif
1820
1821
#ifdef _LP64
1822
void popq(Address dst);
1823
void popq(Register dst);
1824
#endif
1825
1826
void popcntl(Register dst, Address src);
1827
void popcntl(Register dst, Register src);
1828
1829
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1830
1831
#ifdef _LP64
1832
void popcntq(Register dst, Address src);
1833
void popcntq(Register dst, Register src);
1834
#endif
1835
1836
// Prefetches (SSE, SSE2, 3DNOW only)
1837
1838
void prefetchnta(Address src);
1839
void prefetchr(Address src);
1840
void prefetcht0(Address src);
1841
void prefetcht1(Address src);
1842
void prefetcht2(Address src);
1843
void prefetchw(Address src);
1844
1845
// Shuffle Bytes
1846
void pshufb(XMMRegister dst, XMMRegister src);
1847
void pshufb(XMMRegister dst, Address src);
1848
void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1849
1850
// Shuffle Packed Doublewords
1851
void pshufd(XMMRegister dst, XMMRegister src, int mode);
1852
void pshufd(XMMRegister dst, Address src, int mode);
1853
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1854
1855
// Shuffle Packed High/Low Words
1856
void pshufhw(XMMRegister dst, XMMRegister src, int mode);
1857
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1858
void pshuflw(XMMRegister dst, Address src, int mode);
1859
1860
//shuffle floats and doubles
1861
void pshufps(XMMRegister, XMMRegister, int);
1862
void pshufpd(XMMRegister, XMMRegister, int);
1863
void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
1864
void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
1865
1866
// Shuffle packed values at 128 bit granularity
1867
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1868
1869
// Shift Right by bytes Logical DoubleQuadword Immediate
1870
void psrldq(XMMRegister dst, int shift);
1871
// Shift Left by bytes Logical DoubleQuadword Immediate
1872
void pslldq(XMMRegister dst, int shift);
1873
1874
// Logical Compare 128bit
1875
void ptest(XMMRegister dst, XMMRegister src);
1876
void ptest(XMMRegister dst, Address src);
1877
// Logical Compare 256bit
1878
void vptest(XMMRegister dst, XMMRegister src);
1879
void vptest(XMMRegister dst, Address src);
1880
1881
// Vector compare
1882
void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1883
1884
// Interleave Low Bytes
1885
void punpcklbw(XMMRegister dst, XMMRegister src);
1886
void punpcklbw(XMMRegister dst, Address src);
1887
1888
// Interleave Low Doublewords
1889
void punpckldq(XMMRegister dst, XMMRegister src);
1890
void punpckldq(XMMRegister dst, Address src);
1891
1892
// Interleave Low Quadwords
1893
void punpcklqdq(XMMRegister dst, XMMRegister src);
1894
1895
#ifndef _LP64 // no 32bit push/pop on amd64
1896
void pushl(Address src);
1897
#endif
1898
1899
void pushq(Address src);
1900
1901
void rcll(Register dst, int imm8);
1902
1903
void rclq(Register dst, int imm8);
1904
1905
void rcrq(Register dst, int imm8);
1906
1907
void rcpps(XMMRegister dst, XMMRegister src);
1908
1909
void rcpss(XMMRegister dst, XMMRegister src);
1910
1911
void rdtsc();
1912
1913
void ret(int imm16);
1914
1915
void roll(Register dst);
1916
1917
void roll(Register dst, int imm8);
1918
1919
void rorl(Register dst);
1920
1921
void rorl(Register dst, int imm8);
1922
1923
#ifdef _LP64
1924
void rolq(Register dst);
1925
void rolq(Register dst, int imm8);
1926
void rorq(Register dst);
1927
void rorq(Register dst, int imm8);
1928
void rorxq(Register dst, Register src, int imm8);
1929
void rorxd(Register dst, Register src, int imm8);
1930
#endif
1931
1932
void sahf();
1933
1934
void sall(Register dst, int imm8);
1935
void sall(Register dst);
1936
void sall(Address dst, int imm8);
1937
void sall(Address dst);
1938
1939
void sarl(Address dst, int imm8);
1940
void sarl(Address dst);
1941
void sarl(Register dst, int imm8);
1942
void sarl(Register dst);
1943
1944
#ifdef _LP64
1945
void salq(Register dst, int imm8);
1946
void salq(Register dst);
1947
void salq(Address dst, int imm8);
1948
void salq(Address dst);
1949
1950
void sarq(Address dst, int imm8);
1951
void sarq(Address dst);
1952
void sarq(Register dst, int imm8);
1953
void sarq(Register dst);
1954
#endif
1955
1956
void sbbl(Address dst, int32_t imm32);
1957
void sbbl(Register dst, int32_t imm32);
1958
void sbbl(Register dst, Address src);
1959
void sbbl(Register dst, Register src);
1960
1961
void sbbq(Address dst, int32_t imm32);
1962
void sbbq(Register dst, int32_t imm32);
1963
void sbbq(Register dst, Address src);
1964
void sbbq(Register dst, Register src);
1965
1966
void setb(Condition cc, Register dst);
1967
1968
void sete(Register dst);
1969
void setl(Register dst);
1970
void setne(Register dst);
1971
1972
void palignr(XMMRegister dst, XMMRegister src, int imm8);
1973
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1974
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
1975
1976
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
1977
void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
1978
1979
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
1980
void sha1nexte(XMMRegister dst, XMMRegister src);
1981
void sha1msg1(XMMRegister dst, XMMRegister src);
1982
void sha1msg2(XMMRegister dst, XMMRegister src);
1983
// xmm0 is implicit additional source to the following instruction.
1984
void sha256rnds2(XMMRegister dst, XMMRegister src);
1985
void sha256msg1(XMMRegister dst, XMMRegister src);
1986
void sha256msg2(XMMRegister dst, XMMRegister src);
1987
1988
void shldl(Register dst, Register src);
1989
void shldl(Register dst, Register src, int8_t imm8);
1990
void shrdl(Register dst, Register src);
1991
void shrdl(Register dst, Register src, int8_t imm8);
1992
1993
void shll(Register dst, int imm8);
1994
void shll(Register dst);
1995
1996
void shlq(Register dst, int imm8);
1997
void shlq(Register dst);
1998
1999
void shrl(Register dst, int imm8);
2000
void shrl(Register dst);
2001
void shrl(Address dst);
2002
void shrl(Address dst, int imm8);
2003
2004
void shrq(Register dst, int imm8);
2005
void shrq(Register dst);
2006
void shrq(Address dst);
2007
void shrq(Address dst, int imm8);
2008
2009
void smovl(); // QQQ generic?
2010
2011
// Compute Square Root of Scalar Double-Precision Floating-Point Value
2012
void sqrtsd(XMMRegister dst, Address src);
2013
void sqrtsd(XMMRegister dst, XMMRegister src);
2014
2015
void roundsd(XMMRegister dst, Address src, int32_t rmode);
2016
void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
2017
2018
// Compute Square Root of Scalar Single-Precision Floating-Point Value
2019
void sqrtss(XMMRegister dst, Address src);
2020
void sqrtss(XMMRegister dst, XMMRegister src);
2021
2022
void std();
2023
2024
void stmxcsr( Address dst );
2025
2026
void subl(Address dst, int32_t imm32);
2027
void subl(Address dst, Register src);
2028
void subl(Register dst, int32_t imm32);
2029
void subl(Register dst, Address src);
2030
void subl(Register dst, Register src);
2031
2032
void subq(Address dst, int32_t imm32);
2033
void subq(Address dst, Register src);
2034
void subq(Register dst, int32_t imm32);
2035
void subq(Register dst, Address src);
2036
void subq(Register dst, Register src);
2037
2038
// Force generation of a 4 byte immediate value even if it fits into 8bit
2039
void subl_imm32(Register dst, int32_t imm32);
2040
void subq_imm32(Register dst, int32_t imm32);
2041
2042
// Subtract Scalar Double-Precision Floating-Point Values
2043
void subsd(XMMRegister dst, Address src);
2044
void subsd(XMMRegister dst, XMMRegister src);
2045
2046
// Subtract Scalar Single-Precision Floating-Point Values
2047
void subss(XMMRegister dst, Address src);
2048
void subss(XMMRegister dst, XMMRegister src);
2049
2050
void testb(Register dst, int imm8);
2051
void testb(Address dst, int imm8);
2052
2053
void testl(Register dst, int32_t imm32);
2054
void testl(Register dst, Register src);
2055
void testl(Register dst, Address src);
2056
2057
void testq(Address dst, int32_t imm32);
2058
void testq(Register dst, int32_t imm32);
2059
void testq(Register dst, Register src);
2060
void testq(Register dst, Address src);
2061
2062
// BMI - count trailing zeros
2063
void tzcntl(Register dst, Register src);
2064
void tzcntq(Register dst, Register src);
2065
2066
// Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
2067
void ucomisd(XMMRegister dst, Address src);
2068
void ucomisd(XMMRegister dst, XMMRegister src);
2069
2070
// Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
2071
void ucomiss(XMMRegister dst, Address src);
2072
void ucomiss(XMMRegister dst, XMMRegister src);
2073
2074
void xabort(int8_t imm8);
2075
2076
void xaddb(Address dst, Register src);
2077
void xaddw(Address dst, Register src);
2078
void xaddl(Address dst, Register src);
2079
void xaddq(Address dst, Register src);
2080
2081
void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2082
2083
void xchgb(Register reg, Address adr);
2084
void xchgw(Register reg, Address adr);
2085
void xchgl(Register reg, Address adr);
2086
void xchgl(Register dst, Register src);
2087
2088
void xchgq(Register reg, Address adr);
2089
void xchgq(Register dst, Register src);
2090
2091
void xend();
2092
2093
// Get Value of Extended Control Register
2094
void xgetbv();
2095
2096
void xorl(Register dst, int32_t imm32);
2097
void xorl(Address dst, int32_t imm32);
2098
void xorl(Register dst, Address src);
2099
void xorl(Register dst, Register src);
2100
void xorl(Address dst, Register src);
2101
2102
void xorb(Address dst, Register src);
2103
void xorb(Register dst, Address src);
2104
void xorw(Register dst, Register src);
2105
2106
void xorq(Register dst, Address src);
2107
void xorq(Address dst, int32_t imm32);
2108
void xorq(Register dst, Register src);
2109
void xorq(Register dst, int32_t imm32);
2110
void xorq(Address dst, Register src);
2111
2112
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
2113
2114
// AVX 3-operands scalar instructions (encoded with VEX prefix)
2115
2116
void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2117
void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2118
void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2119
void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2120
void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2121
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2122
void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2123
void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2124
void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2125
void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2126
void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2127
void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2128
void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2129
void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2130
void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2131
void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2132
void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2133
void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2134
2135
void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2136
void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2137
void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2138
void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2139
2140
void shlxl(Register dst, Register src1, Register src2);
2141
void shlxq(Register dst, Register src1, Register src2);
2142
void shrxq(Register dst, Register src1, Register src2);
2143
2144
void bzhiq(Register dst, Register src1, Register src2);
2145
2146
//====================VECTOR ARITHMETIC=====================================
2147
void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
2148
void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
2149
2150
// Add Packed Floating-Point Values
2151
void addpd(XMMRegister dst, XMMRegister src);
2152
void addpd(XMMRegister dst, Address src);
2153
void addps(XMMRegister dst, XMMRegister src);
2154
void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2155
void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2156
void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2157
void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2158
2159
// Subtract Packed Floating-Point Values
2160
void subpd(XMMRegister dst, XMMRegister src);
2161
void subps(XMMRegister dst, XMMRegister src);
2162
void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2163
void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2164
void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2165
void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2166
2167
// Multiply Packed Floating-Point Values
2168
void mulpd(XMMRegister dst, XMMRegister src);
2169
void mulpd(XMMRegister dst, Address src);
2170
void mulps(XMMRegister dst, XMMRegister src);
2171
void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2172
void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2173
void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2174
void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2175
2176
void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2177
void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2178
void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2179
void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2180
2181
// Divide Packed Floating-Point Values
2182
void divpd(XMMRegister dst, XMMRegister src);
2183
void divps(XMMRegister dst, XMMRegister src);
2184
void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2185
void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2186
void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2187
void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2188
2189
// Sqrt Packed Floating-Point Values
2190
void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
2191
void vsqrtpd(XMMRegister dst, Address src, int vector_len);
2192
void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
2193
void vsqrtps(XMMRegister dst, Address src, int vector_len);
2194
2195
// Round Packed Double precision value.
2196
void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2197
void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2198
void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2199
void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2200
2201
// Bitwise Logical AND of Packed Floating-Point Values
2202
void andpd(XMMRegister dst, XMMRegister src);
2203
void andps(XMMRegister dst, XMMRegister src);
2204
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2205
void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2206
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2207
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2208
2209
void unpckhpd(XMMRegister dst, XMMRegister src);
2210
void unpcklpd(XMMRegister dst, XMMRegister src);
2211
2212
// Bitwise Logical XOR of Packed Floating-Point Values
2213
void xorpd(XMMRegister dst, XMMRegister src);
2214
void xorps(XMMRegister dst, XMMRegister src);
2215
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2216
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2217
void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2218
void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2219
2220
// Add horizontal packed integers
2221
void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2222
void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2223
void phaddw(XMMRegister dst, XMMRegister src);
2224
void phaddd(XMMRegister dst, XMMRegister src);
2225
2226
// Add packed integers
2227
void paddb(XMMRegister dst, XMMRegister src);
2228
void paddw(XMMRegister dst, XMMRegister src);
2229
void paddd(XMMRegister dst, XMMRegister src);
2230
void paddd(XMMRegister dst, Address src);
2231
void paddq(XMMRegister dst, XMMRegister src);
2232
void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2233
void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2234
void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2235
void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2236
void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2237
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2238
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2239
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2240
2241
// Sub packed integers
2242
void psubb(XMMRegister dst, XMMRegister src);
2243
void psubw(XMMRegister dst, XMMRegister src);
2244
void psubd(XMMRegister dst, XMMRegister src);
2245
void psubq(XMMRegister dst, XMMRegister src);
2246
void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2247
void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2248
void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2249
void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2250
void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2251
void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2252
void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2253
void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2254
2255
// Multiply packed integers (only shorts and ints)
2256
void pmullw(XMMRegister dst, XMMRegister src);
2257
void pmulld(XMMRegister dst, XMMRegister src);
2258
void pmuludq(XMMRegister dst, XMMRegister src);
2259
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2260
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2261
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2262
void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2263
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2264
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2265
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2266
2267
// Minimum of packed integers
2268
void pminsb(XMMRegister dst, XMMRegister src);
2269
void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2270
void pminsw(XMMRegister dst, XMMRegister src);
2271
void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2272
void pminsd(XMMRegister dst, XMMRegister src);
2273
void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2274
void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2275
void minps(XMMRegister dst, XMMRegister src);
2276
void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2277
void minpd(XMMRegister dst, XMMRegister src);
2278
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2279
2280
// Maximum of packed integers
2281
void pmaxsb(XMMRegister dst, XMMRegister src);
2282
void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2283
void pmaxsw(XMMRegister dst, XMMRegister src);
2284
void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2285
void pmaxsd(XMMRegister dst, XMMRegister src);
2286
void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2287
void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2288
void maxps(XMMRegister dst, XMMRegister src);
2289
void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2290
void maxpd(XMMRegister dst, XMMRegister src);
2291
void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2292
2293
// Shift left packed integers
2294
void psllw(XMMRegister dst, int shift);
2295
void pslld(XMMRegister dst, int shift);
2296
void psllq(XMMRegister dst, int shift);
2297
void psllw(XMMRegister dst, XMMRegister shift);
2298
void pslld(XMMRegister dst, XMMRegister shift);
2299
void psllq(XMMRegister dst, XMMRegister shift);
2300
void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2301
void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2302
void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2303
void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2304
void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2305
void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2306
void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2307
2308
// Logical shift right packed integers
2309
void psrlw(XMMRegister dst, int shift);
2310
void psrld(XMMRegister dst, int shift);
2311
void psrlq(XMMRegister dst, int shift);
2312
void psrlw(XMMRegister dst, XMMRegister shift);
2313
void psrld(XMMRegister dst, XMMRegister shift);
2314
void psrlq(XMMRegister dst, XMMRegister shift);
2315
void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2316
void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2317
void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2318
void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2319
void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2320
void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2321
void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2322
void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2323
void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2324
2325
// Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2326
void psraw(XMMRegister dst, int shift);
2327
void psrad(XMMRegister dst, int shift);
2328
void psraw(XMMRegister dst, XMMRegister shift);
2329
void psrad(XMMRegister dst, XMMRegister shift);
2330
void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2331
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2332
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2333
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2334
void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2335
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2336
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2337
2338
// Variable shift left packed integers
2339
void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2340
void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2341
2342
// Variable shift right packed integers
2343
void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2344
void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2345
2346
// Variable shift right arithmetic packed integers
2347
void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2348
void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2349
2350
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2351
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2352
2353
// And packed integers
2354
void pand(XMMRegister dst, XMMRegister src);
2355
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2356
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2357
void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2358
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2359
2360
// Andn packed integers
2361
void pandn(XMMRegister dst, XMMRegister src);
2362
void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2363
2364
// Or packed integers
2365
void por(XMMRegister dst, XMMRegister src);
2366
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2367
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2368
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2369
2370
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2371
void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2372
2373
// Xor packed integers
2374
void pxor(XMMRegister dst, XMMRegister src);
2375
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2376
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2377
void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2378
void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2379
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2380
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2381
2382
// Ternary logic instruction.
2383
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2384
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2385
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2386
2387
// Vector Rotate Left/Right instruction.
2388
void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2389
void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2390
void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2391
void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2392
void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2393
void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2394
void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2395
void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2396
2397
// vinserti forms
2398
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2399
void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2400
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2401
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2402
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2403
2404
// vinsertf forms
2405
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2406
void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2407
void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2408
void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2409
void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2410
void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2411
2412
// vextracti forms
2413
void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2414
void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2415
void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2416
void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2417
void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2418
void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2419
void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
2420
2421
// vextractf forms
2422
void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2423
void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
2424
void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2425
void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
2426
void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2427
void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2428
void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
2429
2430
// xmm/mem sourced byte/word/dword/qword replicate
2431
void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2432
void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
2433
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2434
void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
2435
void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2436
void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2437
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2438
void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2439
2440
void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
2441
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2442
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2443
2444
// scalar single/double/128bit precision replicate
2445
void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2446
void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2447
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2448
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2449
void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
2450
2451
// gpr sourced byte/word/dword/qword replicate
2452
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2453
void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2454
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2455
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2456
2457
// Gather AVX2 and AVX3
2458
void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2459
void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2460
void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2461
void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2462
void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2463
void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
2464
void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2465
void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
2466
2467
//Scatter AVX3 only
2468
void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2469
void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
2470
void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
2471
void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2472
2473
// Carry-Less Multiplication Quadword
2474
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2475
void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2476
void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2477
// AVX instruction which is used to clear upper 128 bits of YMM registers and
2478
// to avoid transaction penalty between AVX and SSE states. There is no
2479
// penalty if legacy SSE instructions are encoded using VEX prefix because
2480
// they always clear upper 128 bits. It should be used before calling
2481
// runtime code and native libraries.
2482
void vzeroupper();
2483
2484
// Vector double compares
2485
void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2486
void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2487
ComparisonPredicateFP comparison, int vector_len);
2488
2489
// Vector float compares
2490
void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
2491
void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2492
ComparisonPredicateFP comparison, int vector_len);
2493
2494
// Vector integer compares
2495
void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2496
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2497
int comparison, bool is_signed, int vector_len);
2498
void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2499
int comparison, bool is_signed, int vector_len);
2500
2501
// Vector long compares
2502
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2503
int comparison, bool is_signed, int vector_len);
2504
void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2505
int comparison, bool is_signed, int vector_len);
2506
2507
// Vector byte compares
2508
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2509
int comparison, bool is_signed, int vector_len);
2510
void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2511
int comparison, bool is_signed, int vector_len);
2512
2513
// Vector short compares
2514
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2515
int comparison, bool is_signed, int vector_len);
2516
void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2517
int comparison, bool is_signed, int vector_len);
2518
2519
void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
2520
2521
// Vector blends
2522
void blendvps(XMMRegister dst, XMMRegister src);
2523
void blendvpd(XMMRegister dst, XMMRegister src);
2524
void pblendvb(XMMRegister dst, XMMRegister src);
2525
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2526
void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2527
void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2528
void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2529
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2530
void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2531
void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2532
void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2533
void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2534
void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2535
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2536
protected:
2537
// Next instructions require address alignment 16 bytes SSE mode.
2538
// They should be called only from corresponding MacroAssembler instructions.
2539
void andpd(XMMRegister dst, Address src);
2540
void andps(XMMRegister dst, Address src);
2541
void xorpd(XMMRegister dst, Address src);
2542
void xorps(XMMRegister dst, Address src);
2543
2544
};
2545
2546
// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2547
// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2548
// are applied.
2549
class InstructionAttr {
2550
public:
2551
InstructionAttr(
2552
int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2553
bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2554
bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2555
bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2556
bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
2557
:
2558
_rex_vex_w(rex_vex_w),
2559
_legacy_mode(legacy_mode || UseAVX < 3),
2560
_no_reg_mask(no_reg_mask),
2561
_uses_vl(uses_vl),
2562
_rex_vex_w_reverted(false),
2563
_is_evex_instruction(false),
2564
_is_clear_context(true),
2565
_is_extended_context(false),
2566
_avx_vector_len(vector_len),
2567
_tuple_type(Assembler::EVEX_ETUP),
2568
_input_size_in_bits(Assembler::EVEX_NObit),
2569
_evex_encoding(0),
2570
_embedded_opmask_register_specifier(0), // hard code k0
2571
_current_assembler(NULL) { }
2572
2573
~InstructionAttr() {
2574
if (_current_assembler != NULL) {
2575
_current_assembler->clear_attributes();
2576
}
2577
_current_assembler = NULL;
2578
}
2579
2580
private:
2581
bool _rex_vex_w;
2582
bool _legacy_mode;
2583
bool _no_reg_mask;
2584
bool _uses_vl;
2585
bool _rex_vex_w_reverted;
2586
bool _is_evex_instruction;
2587
bool _is_clear_context;
2588
bool _is_extended_context;
2589
int _avx_vector_len;
2590
int _tuple_type;
2591
int _input_size_in_bits;
2592
int _evex_encoding;
2593
int _embedded_opmask_register_specifier;
2594
2595
Assembler *_current_assembler;
2596
2597
public:
2598
// query functions for field accessors
2599
bool is_rex_vex_w(void) const { return _rex_vex_w; }
2600
bool is_legacy_mode(void) const { return _legacy_mode; }
2601
bool is_no_reg_mask(void) const { return _no_reg_mask; }
2602
bool uses_vl(void) const { return _uses_vl; }
2603
bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2604
bool is_evex_instruction(void) const { return _is_evex_instruction; }
2605
bool is_clear_context(void) const { return _is_clear_context; }
2606
bool is_extended_context(void) const { return _is_extended_context; }
2607
int get_vector_len(void) const { return _avx_vector_len; }
2608
int get_tuple_type(void) const { return _tuple_type; }
2609
int get_input_size(void) const { return _input_size_in_bits; }
2610
int get_evex_encoding(void) const { return _evex_encoding; }
2611
int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2612
2613
// Set the vector len manually
2614
void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2615
2616
// Set revert rex_vex_w for avx encoding
2617
void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2618
2619
// Set rex_vex_w based on state
2620
void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2621
2622
// Set the instruction to be encoded in AVX mode
2623
void set_is_legacy_mode(void) { _legacy_mode = true; }
2624
2625
// Set the current instuction to be encoded as an EVEX instuction
2626
void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2627
2628
// Internal encoding data used in compressed immediate offset programming
2629
void set_evex_encoding(int value) { _evex_encoding = value; }
2630
2631
// When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
2632
// This method unsets it so that merge semantics are used instead.
2633
void reset_is_clear_context(void) { _is_clear_context = false; }
2634
2635
// Map back to current asembler so that we can manage object level assocation
2636
void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2637
2638
// Address modifiers used for compressed displacement calculation
2639
void set_address_attributes(int tuple_type, int input_size_in_bits);
2640
2641
// Set embedded opmask register specifier.
2642
void set_embedded_opmask_register_specifier(KRegister mask) {
2643
_embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2644
}
2645
2646
};
2647
2648
#endif // CPU_X86_ASSEMBLER_X86_HPP
2649
2650