Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/ArmEmitter.h
3185 views
1
// Copyright (C) 2003 Dolphin Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17
18
#pragma once
19
20
#include <vector>
21
#include <cstdint>
22
23
#include "Common/CommonTypes.h"
24
#include "Common/Log.h"
25
#include "Common/ArmCommon.h"
26
#include "Common/CodeBlock.h"
27
28
// VCVT flags
29
#define TO_FLOAT 0
30
#define TO_INT 1 << 0
31
#define IS_SIGNED 1 << 1
32
#define ROUND_TO_ZERO 1 << 2
33
34
// Unclear why we suddenly need this.
35
#undef VMIN
36
37
namespace ArmGen
38
{
39
enum ARMReg
40
{
41
// GPRs
42
R0 = 0, R1, R2, R3, R4, R5,
43
R6, R7, R8, R9, R10, R11,
44
45
// SPRs
46
// R13 - R15 are SP, LR, and PC.
47
// Almost always referred to by name instead of register number
48
R12 = 12, R13 = 13, R14 = 14, R15 = 15,
49
R_IP = 12, R_SP = 13, R_LR = 14, R_PC = 15,
50
51
52
// VFP single precision registers
53
S0, S1, S2, S3, S4, S5, S6,
54
S7, S8, S9, S10, S11, S12, S13,
55
S14, S15, S16, S17, S18, S19, S20,
56
S21, S22, S23, S24, S25, S26, S27,
57
S28, S29, S30, S31,
58
59
// VFP Double Precision registers
60
D0, D1, D2, D3, D4, D5, D6, D7,
61
D8, D9, D10, D11, D12, D13, D14, D15,
62
D16, D17, D18, D19, D20, D21, D22, D23,
63
D24, D25, D26, D27, D28, D29, D30, D31,
64
65
// ASIMD Quad-Word registers
66
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
67
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
68
69
// for NEON VLD/VST instructions
70
REG_UPDATE = R13,
71
INVALID_REG = 0xFFFFFFFF
72
};
73
74
enum ShiftType
75
{
76
ST_LSL = 0,
77
ST_ASL = 0,
78
ST_LSR = 1,
79
ST_ASR = 2,
80
ST_ROR = 3,
81
ST_RRX = 4
82
};
83
enum IntegerSize
84
{
85
I_I8 = 0,
86
I_I16,
87
I_I32,
88
I_I64
89
};
90
91
enum
92
{
93
NUMGPRs = 13,
94
};
95
96
class ARMXEmitter;
97
98
enum OpType
99
{
100
TYPE_IMM = 0,
101
TYPE_REG,
102
TYPE_IMMSREG,
103
TYPE_RSR,
104
TYPE_MEM
105
};
106
107
// This is no longer a proper operand2 class. Need to split up.
108
class Operand2
109
{
110
friend class ARMXEmitter;
111
protected:
112
u32 Value;
113
114
private:
115
OpType Type;
116
117
// IMM types
118
u8 Rotation = 0; // Only for u8 values
119
120
// Register types
121
u8 IndexOrShift = 0;
122
ShiftType Shift = ST_LSL;
123
public:
124
OpType GetType() const {
125
return Type;
126
}
127
Operand2() {
128
Type = TYPE_IMM;
129
Value = 0;
130
}
131
Operand2(u32 imm, OpType type = TYPE_IMM) {
132
Type = type;
133
Value = imm;
134
}
135
136
Operand2(ARMReg Reg) {
137
Type = TYPE_REG;
138
Value = Reg;
139
}
140
Operand2(u8 imm, u8 rotation) {
141
Type = TYPE_IMM;
142
Value = imm;
143
Rotation = rotation;
144
}
145
Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR
146
{
147
Type = TYPE_RSR;
148
_assert_msg_(type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount");
149
IndexOrShift = shift;
150
Shift = type;
151
Value = base;
152
}
153
154
Operand2(ARMReg base, ShiftType type, u8 shift)// For IMM shifted register
155
{
156
if(shift == 32) shift = 0;
157
switch (type)
158
{
159
case ST_LSL:
160
_assert_msg_(shift < 32, "Invalid Operand2: LSL %u", shift);
161
break;
162
case ST_LSR:
163
_assert_msg_(shift <= 32, "Invalid Operand2: LSR %u", shift);
164
if (!shift)
165
type = ST_LSL;
166
if (shift == 32)
167
shift = 0;
168
break;
169
case ST_ASR:
170
_assert_msg_(shift < 32, "Invalid Operand2: ASR %u", shift);
171
if (!shift)
172
type = ST_LSL;
173
if (shift == 32)
174
shift = 0;
175
break;
176
case ST_ROR:
177
_assert_msg_(shift < 32, "Invalid Operand2: ROR %u", shift);
178
if (!shift)
179
type = ST_LSL;
180
break;
181
case ST_RRX:
182
_assert_msg_(shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount");
183
type = ST_ROR;
184
break;
185
}
186
IndexOrShift = shift;
187
Shift = type;
188
Value = base;
189
Type = TYPE_IMMSREG;
190
}
191
u32 GetData()
192
{
193
switch(Type)
194
{
195
case TYPE_IMM:
196
return Imm12Mod(); // This'll need to be changed later
197
case TYPE_REG:
198
return Rm();
199
case TYPE_IMMSREG:
200
return IMMSR();
201
case TYPE_RSR:
202
return RSR();
203
default:
204
_assert_msg_(false, "GetData with Invalid Type");
205
return 0;
206
}
207
}
208
u32 IMMSR() // IMM shifted register
209
{
210
_assert_msg_(Type == TYPE_IMMSREG, "IMMSR must be imm shifted register");
211
return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value);
212
}
213
u32 RSR() // Register shifted register
214
{
215
_assert_msg_(Type == TYPE_RSR, "RSR must be RSR Of Course");
216
return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value;
217
}
218
u32 Rm() const
219
{
220
_assert_msg_(Type == TYPE_REG, "Rm must be with Reg");
221
return Value;
222
}
223
224
u32 Imm5() const
225
{
226
_assert_msg_((Type == TYPE_IMM), "Imm5 not IMM value");
227
return ((Value & 0x0000001F) << 7);
228
}
229
u32 Imm8() const
230
{
231
_assert_msg_((Type == TYPE_IMM), "Imm8Rot not IMM value");
232
return Value & 0xFF;
233
}
234
u32 Imm8Rot() const // IMM8 with Rotation
235
{
236
_assert_msg_((Type == TYPE_IMM), "Imm8Rot not IMM value");
237
_assert_msg_((Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation);
238
return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF);
239
}
240
u32 Imm12() const
241
{
242
_assert_msg_((Type == TYPE_IMM), "Imm12 not IMM");
243
return (Value & 0x00000FFF);
244
}
245
246
u32 Imm12Mod() const
247
{
248
// This is an IMM12 with the top four bits being rotation and the
249
// bottom eight being an IMM. This is for instructions that need to
250
// expand a 8bit IMM to a 32bit value and gives you some rotation as
251
// well.
252
// Each rotation rotates to the right by 2 bits
253
_assert_msg_((Type == TYPE_IMM), "Imm12Mod not IMM");
254
return ((Rotation & 0xF) << 8) | (Value & 0xFF);
255
}
256
u32 Imm16() const
257
{
258
_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
259
return ( (Value & 0xF000) << 4) | (Value & 0x0FFF);
260
}
261
u32 Imm16Low() const
262
{
263
return Imm16();
264
}
265
u32 Imm16High() const // Returns high 16bits
266
{
267
_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
268
return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF);
269
}
270
u32 Imm24() const
271
{
272
_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
273
return (Value & 0x0FFFFFFF);
274
}
275
// NEON and ASIMD specific
276
u32 Imm8ASIMD() const
277
{
278
_assert_msg_((Type == TYPE_IMM), "Imm8ASIMD not IMM");
279
return ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF);
280
}
281
u32 Imm8VFP() const
282
{
283
_assert_msg_((Type == TYPE_IMM), "Imm8VFP not IMM");
284
return ((Value & 0xF0) << 12) | (Value & 0xF);
285
}
286
};
287
288
// Use these when you don't know if an imm can be represented as an operand2.
289
// This lets you generate both an optimal and a fallback solution by checking
290
// the return value, which will be false if these fail to find a Operand2 that
291
// represents your 32-bit imm value.
292
bool TryMakeOperand2(u32 imm, Operand2 &op2);
293
bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse);
294
bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated);
295
296
// Use this only when you know imm can be made into an Operand2.
297
Operand2 AssumeMakeOperand2(u32 imm);
298
299
inline Operand2 R(ARMReg Reg) { return Operand2(Reg, TYPE_REG); }
300
inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); }
301
inline Operand2 Mem(void *ptr) { return Operand2((u32)(uintptr_t)ptr, TYPE_IMM); }
302
//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
303
#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str)))
304
305
306
struct FixupBranch
307
{
308
u8 *ptr;
309
u32 condition; // Remembers our codition at the time
310
int type; //0 = B 1 = BL
311
};
312
313
struct LiteralPool
314
{
315
intptr_t loc;
316
u8* ldr_address;
317
u32 val;
318
};
319
320
typedef const u8* JumpTarget;
321
322
// XXX: Stop polluting the global namespace
323
const u32 I_8 = (1 << 0);
324
const u32 I_16 = (1 << 1);
325
const u32 I_32 = (1 << 2);
326
const u32 I_64 = (1 << 3);
327
const u32 I_SIGNED = (1 << 4);
328
const u32 I_UNSIGNED = (1 << 5);
329
const u32 F_32 = (1 << 6);
330
const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL
331
332
enum VIMMMode {
333
VIMM___x___x = 0x0, // 0000 VMOV
334
VIMM__x___x_ = 0x2, // 0010
335
VIMM_x___x__ = 0x4, // 0100
336
VIMMx___x___ = 0x6, // 0110
337
VIMM_x_x_x_x = 0x8, // 1000
338
VIMMx_x_x_x_ = 0xA, // 1010
339
VIMM__x1__x1 = 0xC, // 1100
340
VIMM_x11_x11 = 0xD, // 1101
341
VIMMxxxxxxxx = 0xE, // 1110 // op == 0
342
VIMMf000f000 = 0xF, // 1111 // op == 0 ( really aBbbbbbc defgh 00000000 00000000 ) where B = NOT b
343
VIMMbits2bytes = 0x1E, // Bit replication into bytes! Easily created 111111111 00000000 masks!
344
};
345
346
u32 EncodeVd(ARMReg Vd);
347
u32 EncodeVn(ARMReg Vn);
348
u32 EncodeVm(ARMReg Vm);
349
350
u32 encodedSize(u32 value);
351
352
// Subtracts the base from the register to give us the real one
353
ARMReg SubBase(ARMReg Reg);
354
355
inline bool IsQ(ARMReg r) {
356
return r >= Q0 && r <= Q15;
357
}
358
359
inline bool IsD(ARMReg r) {
360
return r >= D0 && r <= D31;
361
}
362
363
// See A.7.1 in the ARMv7-A
364
// VMUL F32 scalars can only be up to D15[0], D15[1] - higher scalars cannot be individually addressed
365
ARMReg DScalar(ARMReg dreg, int subScalar);
366
ARMReg QScalar(ARMReg qreg, int subScalar);
367
inline ARMReg XScalar(ARMReg reg, int subScalar) {
368
if (IsQ(reg))
369
return QScalar(reg, subScalar);
370
else
371
return DScalar(reg, subScalar);
372
}
373
374
const char *ARMRegAsString(ARMReg reg);
375
376
// Get the two halves of a Q register.
377
inline ARMReg D_0(ARMReg q) {
378
if (q >= Q0 && q <= Q15) {
379
return ARMReg(D0 + (q - Q0) * 2);
380
} else if (q >= D0 && q <= D31) {
381
return q;
382
} else {
383
return INVALID_REG;
384
}
385
}
386
inline ARMReg D_1(ARMReg q) {
387
return ARMReg(D0 + (q - Q0) * 2 + 1);
388
}
389
390
enum NEONAlignment {
391
ALIGN_NONE = 0,
392
ALIGN_64 = 1,
393
ALIGN_128 = 2,
394
ALIGN_256 = 3
395
};
396
397
398
class NEONXEmitter;
399
400
class ARMXEmitter
401
{
402
friend struct OpArg; // for Write8 etc
403
friend class NEONXEmitter;
404
private:
405
u8 *code, *startcode;
406
u8 *lastCacheFlushEnd;
407
u32 condition;
408
std::vector<LiteralPool> currentLitPool;
409
410
void WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 op2, bool RegAdd);
411
void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList);
412
void WriteVRegStoreOp(u32 op, ARMReg dest, bool Double, bool WriteBack, ARMReg firstreg, u8 numregs);
413
void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2);
414
void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2);
415
void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2);
416
417
void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm);
418
419
void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
420
421
// New Ops
422
void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false);
423
424
void WriteVLDST1(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm);
425
void WriteVLDST1_lane(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm);
426
427
void WriteVimm(ARMReg Vd, int cmode, u8 imm, int op);
428
429
void EncodeShiftByImm(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount, u8 opcode, bool quad, bool inverse, bool halve);
430
431
protected:
432
inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
433
434
public:
435
ARMXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
436
condition = CC_AL << 28;
437
}
438
ARMXEmitter(u8 *code_ptr) {
439
code = code_ptr;
440
lastCacheFlushEnd = code_ptr;
441
startcode = code_ptr;
442
condition = CC_AL << 28;
443
}
444
virtual ~ARMXEmitter() {}
445
446
void SetCodePointer(u8 *ptr, u8 *writePtr);
447
const u8 *GetCodePointer() const;
448
449
void ReserveCodeSpace(u32 bytes);
450
const u8 *AlignCode16();
451
const u8 *AlignCodePage();
452
const u8 *NopAlignCode16();
453
454
void FlushIcache();
455
void FlushIcacheSection(u8 *start, u8 *end);
456
u8 *GetWritableCodePtr();
457
458
void FlushLitPool();
459
void AddNewLit(u32 val);
460
bool TrySetValue_TwoOp(ARMReg reg, u32 val);
461
462
CCFlags GetCC() const { return CCFlags(condition >> 28); }
463
void SetCC(CCFlags cond = CC_AL);
464
465
// Special purpose instructions
466
467
// Dynamic Endian Switching
468
void SETEND(bool BE);
469
// Debug Breakpoint
470
void BKPT(u16 arg);
471
472
// Hint instruction
473
void YIELD();
474
475
// Do nothing
476
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
477
478
#ifdef CALL
479
#undef CALL
480
#endif
481
482
// Branching
483
FixupBranch B();
484
FixupBranch B_CC(CCFlags Cond);
485
void B_CC(CCFlags Cond, const void *fnptr);
486
FixupBranch BL();
487
FixupBranch BL_CC(CCFlags Cond);
488
void SetJumpTarget(FixupBranch const &branch);
489
490
void B (const void *fnptr);
491
void B (ARMReg src);
492
void BL(const void *fnptr);
493
void BL(ARMReg src);
494
bool BLInRange(const void *fnptr) const;
495
496
void PUSH(const int num, ...);
497
void POP(const int num, ...);
498
499
// New Data Ops
500
void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm);
501
void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm);
502
void EOR (ARMReg dest, ARMReg src, Operand2 op2);
503
void EORS(ARMReg dest, ARMReg src, Operand2 op2);
504
void SUB (ARMReg dest, ARMReg src, Operand2 op2);
505
void SUBS(ARMReg dest, ARMReg src, Operand2 op2);
506
void RSB (ARMReg dest, ARMReg src, Operand2 op2);
507
void RSBS(ARMReg dest, ARMReg src, Operand2 op2);
508
void ADD (ARMReg dest, ARMReg src, Operand2 op2);
509
void ADDS(ARMReg dest, ARMReg src, Operand2 op2);
510
void ADC (ARMReg dest, ARMReg src, Operand2 op2);
511
void ADCS(ARMReg dest, ARMReg src, Operand2 op2);
512
void LSL (ARMReg dest, ARMReg src, Operand2 op2);
513
void LSL (ARMReg dest, ARMReg src, ARMReg op2);
514
void LSLS(ARMReg dest, ARMReg src, Operand2 op2);
515
void LSLS(ARMReg dest, ARMReg src, ARMReg op2);
516
void LSR (ARMReg dest, ARMReg src, Operand2 op2);
517
void LSRS(ARMReg dest, ARMReg src, Operand2 op2);
518
void LSR (ARMReg dest, ARMReg src, ARMReg op2);
519
void LSRS(ARMReg dest, ARMReg src, ARMReg op2);
520
void ASR (ARMReg dest, ARMReg src, Operand2 op2);
521
void ASRS(ARMReg dest, ARMReg src, Operand2 op2);
522
void ASR (ARMReg dest, ARMReg src, ARMReg op2);
523
void ASRS(ARMReg dest, ARMReg src, ARMReg op2);
524
525
void SBC (ARMReg dest, ARMReg src, Operand2 op2);
526
void SBCS(ARMReg dest, ARMReg src, Operand2 op2);
527
void RBIT(ARMReg dest, ARMReg src);
528
void REV (ARMReg dest, ARMReg src);
529
void REV16 (ARMReg dest, ARMReg src);
530
void RSC (ARMReg dest, ARMReg src, Operand2 op2);
531
void RSCS(ARMReg dest, ARMReg src, Operand2 op2);
532
void TST ( ARMReg src, Operand2 op2);
533
void TEQ ( ARMReg src, Operand2 op2);
534
void CMP ( ARMReg src, Operand2 op2);
535
void CMN ( ARMReg src, Operand2 op2);
536
void ORR (ARMReg dest, ARMReg src, Operand2 op2);
537
void ORRS(ARMReg dest, ARMReg src, Operand2 op2);
538
void MOV (ARMReg dest, Operand2 op2);
539
void MOVS(ARMReg dest, Operand2 op2);
540
void BIC (ARMReg dest, ARMReg src, Operand2 op2); // BIC = ANDN
541
void BICS(ARMReg dest, ARMReg src, Operand2 op2);
542
void MVN (ARMReg dest, Operand2 op2);
543
void MVNS(ARMReg dest, Operand2 op2);
544
void MOVW(ARMReg dest, Operand2 op2);
545
void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false);
546
547
// UDIV and SDIV are only available on CPUs that have
548
// the idiva hardare capacity
549
void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
550
void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
551
552
void MUL (ARMReg dest, ARMReg src, ARMReg op2);
553
void MULS(ARMReg dest, ARMReg src, ARMReg op2);
554
555
void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
556
void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
557
558
void UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
559
void SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
560
561
void SXTB(ARMReg dest, ARMReg op2);
562
void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
563
void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
564
void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
565
void BFC(ARMReg rd, u8 lsb, u8 width);
566
void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
567
void SBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
568
void CLZ(ARMReg rd, ARMReg rm);
569
void PLD(ARMReg rd, int offset, bool forWrite = false);
570
571
// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
572
// Just need to put an underscore here, bit annoying.
573
void _MSR (bool nzcvq, bool g, Operand2 op2);
574
void _MSR (bool nzcvq, bool g, ARMReg src);
575
void MRS (ARMReg dest);
576
577
// Memory load/store operations
578
void LDR (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
579
void LDRB (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
580
void LDRH (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
581
void LDRSB(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
582
void LDRSH(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
583
void STR (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
584
void STRB (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
585
void STRH (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
586
587
void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
588
void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
589
void STMIA(ARMReg dest, bool WriteBack, const int Regnum, ...);
590
void LDMIA(ARMReg dest, bool WriteBack, const int Regnum, ...);
591
void STM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...);
592
void LDM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...);
593
void STMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList);
594
void LDMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList);
595
596
// Exclusive Access operations
597
void LDREX(ARMReg dest, ARMReg base);
598
// result contains the result if the instruction managed to store the value
599
void STREX(ARMReg result, ARMReg base, ARMReg op);
600
void DMB ();
601
void SVC(Operand2 op);
602
603
// NEON and ASIMD instructions
604
// None of these will be created with conditional since ARM
605
// is deprecating conditional execution of ASIMD instructions.
606
// ASIMD instructions don't even have a conditional encoding.
607
608
// NEON Only
609
void VABD(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
610
void VADD(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
611
void VSUB(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
612
613
// VFP Only
614
void VLDMIA(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
615
void VSTMIA(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
616
void VLDMDB(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
617
void VSTMDB(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
618
void VPUSH(ARMReg firstvreg, int numvregs) {
619
VSTMDB(R_SP, true, firstvreg, numvregs);
620
}
621
void VPOP(ARMReg firstvreg, int numvregs) {
622
VLDMIA(R_SP, true, firstvreg, numvregs);
623
}
624
void VLDR(ARMReg Dest, ARMReg Base, s16 offset);
625
void VSTR(ARMReg Src, ARMReg Base, s16 offset);
626
void VCMP(ARMReg Vd, ARMReg Vm);
627
void VCMPE(ARMReg Vd, ARMReg Vm);
628
// Compares against zero
629
void VCMP(ARMReg Vd);
630
void VCMPE(ARMReg Vd);
631
632
void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
633
void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
634
void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
635
void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm);
636
void VSQRT(ARMReg Vd, ARMReg Vm);
637
638
// NEON and VFP
639
void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm);
640
void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm);
641
void VABS(ARMReg Vd, ARMReg Vm);
642
void VNEG(ARMReg Vd, ARMReg Vm);
643
void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
644
void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
645
void VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
646
void VMOV(ARMReg Dest, Operand2 op2);
647
void VMOV(ARMReg Dest, ARMReg Src, bool high);
648
void VMOV(ARMReg Dest, ARMReg Src);
649
// Either Vd, Rt, Rt2 or Rt, Rt2, Vd.
650
void VMOV(ARMReg Dest, ARMReg Src1, ARMReg Src2);
651
void VCVT(ARMReg Dest, ARMReg Src, int flags);
652
653
// NEON, need to check for this (supported if VFP4 is supported)
654
void VCVTF32F16(ARMReg Dest, ARMReg Src);
655
void VCVTF16F32(ARMReg Dest, ARMReg Src);
656
657
void VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
658
void VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
659
void VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
660
void VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
661
void VABS(u32 Size, ARMReg Vd, ARMReg Vm);
662
void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
663
void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
664
void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
665
void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
666
void VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
667
void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
668
void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
669
void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
670
void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm);
671
void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
672
void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
673
void VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
674
void VCEQ(u32 Size, ARMReg Vd, ARMReg Vm);
675
void VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
676
void VCGE(u32 Size, ARMReg Vd, ARMReg Vm);
677
void VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
678
void VCGT(u32 Size, ARMReg Vd, ARMReg Vm);
679
void VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
680
void VCLE(u32 Size, ARMReg Vd, ARMReg Vm);
681
void VCLS(u32 Size, ARMReg Vd, ARMReg Vm);
682
void VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
683
void VCLT(u32 Size, ARMReg Vd, ARMReg Vm);
684
void VCLZ(u32 Size, ARMReg Vd, ARMReg Vm);
685
void VCNT(u32 Size, ARMReg Vd, ARMReg Vm);
686
void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index);
687
void VDUP(u32 Size, ARMReg Vd, ARMReg Rt);
688
void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index);
689
void VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
690
void VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
691
void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
692
void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
693
void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
694
void VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
695
696
// Three registers
697
void VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
698
void VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
699
void VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
700
void VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
701
void VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
702
void VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
703
void VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
704
void VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
705
void VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
706
void VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
707
void VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
708
709
// Two registers and a scalar
710
// These two are super useful for matrix multiplication
711
void VMUL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
712
void VMLA_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
713
714
// TODO:
715
/*
716
void VMLS_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
717
void VMLAL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
718
void VMLSL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
719
void VMULL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
720
void VQDMLAL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
721
void VQDMLSL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
722
void VQDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
723
void VQDMULL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
724
void VQRDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
725
*/
726
727
// Vector bitwise. These don't have an element size for obvious reasons.
728
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
729
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
730
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
731
void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm);
732
void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
733
inline void VMOV_neon(ARMReg Dest, ARMReg Src) {
734
VORR(Dest, Src, Src);
735
}
736
void VMOV_neon(u32 Size, ARMReg Vd, u32 imm);
737
void VMOV_neon(u32 Size, ARMReg Vd, float imm) {
738
_dbg_assert_msg_(Size == F_32, "Expecting F_32 immediate for VMOV_neon float arg.");
739
union {
740
float f;
741
u32 u;
742
} val;
743
val.f = imm;
744
VMOV_neon(I_32, Vd, val.u);
745
}
746
void VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane);
747
748
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
749
void VMVN(ARMReg Vd, ARMReg Vm);
750
void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm);
751
void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
752
void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm);
753
void VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
754
void VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
755
void VQABS(u32 Size, ARMReg Vd, ARMReg Vm);
756
void VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
757
void VQNEG(u32 Size, ARMReg Vd, ARMReg Vm);
758
void VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
759
void VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
760
void VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
761
void VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
762
void VRECPE(u32 Size, ARMReg Vd, ARMReg Vm);
763
void VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
764
void VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
765
void VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
766
void VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm);
767
void VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
768
void VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
769
void VSHL(u32 Size, ARMReg Vd, ARMReg Vm, ARMReg Vn); // Register shift
770
void VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
771
void VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
772
void VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
773
void VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
774
void VSWP(ARMReg Vd, ARMReg Vm);
775
void VTRN(u32 Size, ARMReg Vd, ARMReg Vm);
776
void VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
777
void VUZP(u32 Size, ARMReg Vd, ARMReg Vm);
778
void VZIP(u32 Size, ARMReg Vd, ARMReg Vm);
779
void VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm);
780
void VREV64(u32 Size, ARMReg Vd, ARMReg Vm);
781
void VREV32(u32 Size, ARMReg Vd, ARMReg Vm);
782
void VREV16(u32 Size, ARMReg Vd, ARMReg Vm);
783
784
785
// NEON immediate instructions
786
787
788
void VMOV_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
789
void VMOV_immf(ARMReg Vd, float value); // This only works with a select few values (1.0f and -1.0f).
790
791
void VORR_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
792
void VMVN_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
793
void VBIC_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
794
795
// Widening and narrowing moves
796
void VMOVL(u32 Size, ARMReg Vd, ARMReg Vm);
797
void VMOVN(u32 Size, ARMReg Vd, ARMReg Vm);
798
void VQMOVN(u32 Size, ARMReg Vd, ARMReg Vm);
799
void VQMOVUN(u32 Size, ARMReg Vd, ARMReg Vm);
800
801
// Shifts by immediate
802
void VSHL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);
803
void VSHLL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount); // widening
804
void VSHR(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);
805
void VSHRN(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount); // narrowing
806
807
// Vector VCVT
808
void VCVT(u32 DestSize, ARMReg Dest, ARMReg Src);
809
810
811
// Notes:
812
// Rm == R_PC is interpreted as no offset, otherwise, effective address is sum of Rn and Rm
813
// Rm == R13 is interpreted as VLD1, .... [Rn]! Added a REG_UPDATE pseudo register.
814
815
// Load/store multiple registers full of elements (a register is a D register)
816
// Specifying alignment when it can be guaranteed is documented to improve load/store performance.
817
// For example, when loading a set of four 64-bit registers that we know is 32-byte aligned, we should specify ALIGN_256.
818
void VLD1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
819
void VST1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
820
821
// Load/store single lanes of D registers
822
void VLD1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm = R_PC);
823
void VST1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm = R_PC);
824
825
// Load one value into all lanes of a D or a Q register (either supported, all formats should work).
826
void VLD1_all_lanes(u32 Size, ARMReg Vd, ARMReg Rn, bool aligned, ARMReg Rm = R_PC);
827
828
/*
829
// Deinterleave two loads... or something. TODO
830
void VLD2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
831
void VST2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
832
833
void VLD2_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
834
void VST2_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
835
836
void VLD3(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
837
void VST3(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
838
839
void VLD3_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
840
void VST3_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
841
842
void VLD4(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
843
void VST4(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
844
845
void VLD4_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
846
void VST4_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
847
*/
848
849
void VMRS_APSR();
850
void VMRS(ARMReg Rt);
851
void VMSR(ARMReg Rt);
852
853
void QuickCallFunction(ARMReg scratchreg, const void *func);
854
template <typename T> void QuickCallFunction(ARMReg scratchreg, T func) {
855
QuickCallFunction(scratchreg, (const void *)func);
856
}
857
858
// Wrapper around MOVT/MOVW with fallbacks.
859
void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
860
void MOVI2FR(ARMReg dest, float val, bool negate = false);
861
void MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
862
void MOVI2F_neon(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
863
864
// Load pointers without casting
865
template <class T> void MOVP2R(ARMReg reg, T *val) {
866
MOVI2R(reg, (u32)(uintptr_t)(void *)val);
867
}
868
869
void MOVIU2F(ARMReg dest, u32 val, ARMReg tempReg, bool negate = false) {
870
union {
871
u32 u;
872
float f;
873
} v = {val};
874
MOVI2F(dest, v.f, tempReg, negate);
875
}
876
877
void ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
878
bool TryADDI2R(ARMReg rd, ARMReg rs, u32 val);
879
void SUBI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
880
bool TrySUBI2R(ARMReg rd, ARMReg rs, u32 val);
881
void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
882
bool TryANDI2R(ARMReg rd, ARMReg rs, u32 val);
883
void CMPI2R(ARMReg rs, u32 val, ARMReg scratch);
884
bool TryCMPI2R(ARMReg rs, u32 val);
885
void TSTI2R(ARMReg rs, u32 val, ARMReg scratch);
886
bool TryTSTI2R(ARMReg rs, u32 val);
887
void ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
888
bool TryORI2R(ARMReg rd, ARMReg rs, u32 val);
889
void EORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
890
bool TryEORI2R(ARMReg rd, ARMReg rs, u32 val);
891
}; // class ARMXEmitter
892
893
894
// Everything that needs to generate machine code should inherit from this.
895
// You get memory management for free, plus, you can use all the MOV etc functions without
896
// having to prefix them with gen-> or something similar.
897
898
class ARMXCodeBlock : public CodeBlock<ARMXEmitter> {
899
public:
900
void PoisonMemory(int offset) override;
901
};
902
903
// VFP Specific
904
struct VFPEnc {
905
s16 opc1;
906
s16 opc2;
907
};
908
extern const VFPEnc VFPOps[16][2];
909
extern const char *VFPOpNames[16];
910
911
} // namespace
912
913