Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/LoongArch64/LoongArch64RegCache.cpp
3188 views
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#ifndef offsetof
19
#include <cstddef>
20
#endif
21
22
#include "Common/CPUDetect.h"
23
#include "Core/MIPS/IR/IRInst.h"
24
#include "Core/MIPS/IR/IRAnalysis.h"
25
#include "Core/MIPS/LoongArch64/LoongArch64RegCache.h"
26
#include "Core/MIPS/JitCommon/JitState.h"
27
#include "Core/Reporting.h"
28
29
using namespace LoongArch64Gen;
30
using namespace LoongArch64JitConstants;
31
32
LoongArch64RegCache::LoongArch64RegCache(MIPSComp::JitOptions *jo)
33
: IRNativeRegCacheBase(jo) {
34
// The V(LSX) regs overlap F regs, so we just use one slot.
35
config_.totalNativeRegs = NUM_LAGPR + NUM_LAFPR;
36
// F regs are used for both FPU and Vec, so we don't need VREGs.
37
config_.mapUseVRegs = false;
38
config_.mapFPUSIMD = true;
39
}
40
41
void LoongArch64RegCache::Init(LoongArch64Emitter *emitter) {
42
emit_ = emitter;
43
}
44
45
void LoongArch64RegCache::SetupInitialRegs() {
46
IRNativeRegCacheBase::SetupInitialRegs();
47
48
// Treat R_ZERO a bit specially, but it's basically static alloc too.
49
nrInitial_[R_ZERO].mipsReg = MIPS_REG_ZERO;
50
nrInitial_[R_ZERO].normalized32 = true;
51
52
// Since we also have a fixed zero, mark it as a static allocation.
53
mrInitial_[MIPS_REG_ZERO].loc = MIPSLoc::REG_IMM;
54
mrInitial_[MIPS_REG_ZERO].nReg = R_ZERO;
55
mrInitial_[MIPS_REG_ZERO].imm = 0;
56
mrInitial_[MIPS_REG_ZERO].isStatic = true;
57
}
58
59
const int *LoongArch64RegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const {
60
base = R0;
61
62
if (type == MIPSLoc::REG) {
63
// R22-R26 (Also R27) are most suitable for static allocation. Those that are chosen for static allocation
64
static const int allocationOrder[] = {
65
R22, R23, R24, R25, R26, R27, R4, R5, R6, R7, R8, R9, R10, R11, R14, R15, R16, R17, R18, R19, R20,
66
};
67
static const int allocationOrderStaticAlloc[] = {
68
R4, R5, R6, R7, R8, R9, R10, R11, R14, R15, R16, R17, R18, R19, R20,
69
};
70
71
if (jo_->useStaticAlloc) {
72
count = ARRAY_SIZE(allocationOrderStaticAlloc);
73
return allocationOrderStaticAlloc;
74
} else {
75
count = ARRAY_SIZE(allocationOrder);
76
return allocationOrder;
77
}
78
} else if (type == MIPSLoc::FREG) {
79
static const int allocationOrder[] = {
80
F24, F25, F26, F27, F28, F29, F30, F31,
81
F0, F1, F2, F3, F4, F5, F6, F7,
82
F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23,
83
};
84
85
count = ARRAY_SIZE(allocationOrder);
86
return allocationOrder;
87
} else {
88
_assert_msg_(false, "Allocation order not yet implemented");
89
count = 0;
90
return nullptr;
91
}
92
}
93
94
const LoongArch64RegCache::StaticAllocation *LoongArch64RegCache::GetStaticAllocations(int &count) const {
95
static const StaticAllocation allocs[] = {
96
{ MIPS_REG_SP, R22, MIPSLoc::REG, true },
97
{ MIPS_REG_V0, R23, MIPSLoc::REG },
98
{ MIPS_REG_V1, R24, MIPSLoc::REG },
99
{ MIPS_REG_A0, R25, MIPSLoc::REG },
100
{ MIPS_REG_A1, R26, MIPSLoc::REG },
101
{ MIPS_REG_RA, R27, MIPSLoc::REG },
102
};
103
104
if (jo_->useStaticAlloc) {
105
count = ARRAY_SIZE(allocs);
106
return allocs;
107
}
108
return IRNativeRegCacheBase::GetStaticAllocations(count);
109
}
110
111
void LoongArch64RegCache::EmitLoadStaticRegisters() {
112
int count;
113
const StaticAllocation *allocs = GetStaticAllocations(count);
114
for (int i = 0; i < count; i++) {
115
int offset = GetMipsRegOffset(allocs[i].mr);
116
if (allocs[i].pointerified && jo_->enablePointerify) {
117
emit_->LD_WU((LoongArch64Reg)allocs[i].nr, CTXREG, offset);
118
emit_->ADD_D((LoongArch64Reg)allocs[i].nr, (LoongArch64Reg)allocs[i].nr, MEMBASEREG);
119
} else {
120
emit_->LD_W((LoongArch64Reg)allocs[i].nr, CTXREG, offset);
121
}
122
}
123
}
124
125
void LoongArch64RegCache::EmitSaveStaticRegisters() {
126
int count;
127
const StaticAllocation *allocs = GetStaticAllocations(count);
128
// This only needs to run once (by Asm) so checks don't need to be fast.
129
for (int i = 0; i < count; i++) {
130
int offset = GetMipsRegOffset(allocs[i].mr);
131
emit_->ST_W((LoongArch64Reg)allocs[i].nr, CTXREG, offset);
132
}
133
}
134
135
void LoongArch64RegCache::FlushBeforeCall() {
136
// These registers are not preserved by function calls.
137
// They match between R0 and F0, conveniently.
138
for (int i = 4; i <= 20; ++i) {
139
FlushNativeReg(R0 + i);
140
}
141
for (int i = 0; i <= 23; ++i) {
142
FlushNativeReg(F0 + i);
143
}
144
}
145
146
bool LoongArch64RegCache::IsNormalized32(IRReg mipsReg) {
147
_dbg_assert_(IsValidGPR(mipsReg));
148
if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
149
return nr[mr[mipsReg].nReg].normalized32;
150
}
151
return false;
152
}
153
154
LoongArch64Gen::LoongArch64Reg LoongArch64RegCache::Normalize32(IRReg mipsReg, LoongArch64Gen::LoongArch64Reg destReg) {
155
_dbg_assert_(IsValidGPR(mipsReg));
156
_dbg_assert_(destReg == INVALID_REG || (destReg > R0 && destReg <= R31));
157
158
LoongArch64Reg reg = (LoongArch64Reg)mr[mipsReg].nReg;
159
160
switch (mr[mipsReg].loc) {
161
case MIPSLoc::IMM:
162
case MIPSLoc::MEM:
163
_assert_msg_(false, "Cannot normalize an imm or mem");
164
return INVALID_REG;
165
166
case MIPSLoc::REG:
167
case MIPSLoc::REG_IMM:
168
if (!nr[mr[mipsReg].nReg].normalized32) {
169
if (destReg == INVALID_REG) {
170
emit_->ADDI_W((LoongArch64Reg)mr[mipsReg].nReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);
171
nr[mr[mipsReg].nReg].normalized32 = true;
172
nr[mr[mipsReg].nReg].pointerified = false;
173
} else {
174
emit_->ADDI_W(destReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);
175
}
176
} else if (destReg != INVALID_REG) {
177
emit_->ADDI_W(destReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);
178
}
179
break;
180
181
case MIPSLoc::REG_AS_PTR:
182
_dbg_assert_(nr[mr[mipsReg].nReg].normalized32 == false);
183
if (destReg == INVALID_REG) {
184
// If we can pointerify, ADDI_W will be enough.
185
if (!jo_->enablePointerify)
186
AdjustNativeRegAsPtr(mr[mipsReg].nReg, false);
187
emit_->ADDI_W((LoongArch64Reg)mr[mipsReg].nReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);
188
mr[mipsReg].loc = MIPSLoc::REG;
189
nr[mr[mipsReg].nReg].normalized32 = true;
190
nr[mr[mipsReg].nReg].pointerified = false;
191
} else if (!jo_->enablePointerify) {
192
emit_->SUB_D(destReg, (LoongArch64Reg)mr[mipsReg].nReg, MEMBASEREG);
193
emit_->ADDI_W(destReg, destReg, 0);
194
} else {
195
emit_->ADDI_W(destReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);
196
}
197
break;
198
199
default:
200
_assert_msg_(false, "Should not normalize32 floats");
201
break;
202
}
203
204
return destReg == INVALID_REG ? reg : destReg;
205
}
206
207
LoongArch64Reg LoongArch64RegCache::TryMapTempImm(IRReg r) {
208
_dbg_assert_(IsValidGPR(r));
209
// If already mapped, no need for a temporary.
210
if (IsGPRMapped(r)) {
211
return R(r);
212
}
213
214
if (mr[r].loc == MIPSLoc::IMM) {
215
if (mr[r].imm == 0) {
216
return R_ZERO;
217
}
218
219
// Try our luck - check for an exact match in another LoongArch reg.
220
for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; ++i) {
221
if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == mr[r].imm) {
222
// Awesome, let's just use this reg.
223
return (LoongArch64Reg)mr[i].nReg;
224
}
225
}
226
}
227
228
return INVALID_REG;
229
}
230
231
LoongArch64Reg LoongArch64RegCache::GetAndLockTempGPR() {
232
LoongArch64Reg reg = (LoongArch64Reg)AllocateReg(MIPSLoc::REG, MIPSMap::INIT);
233
if (reg != INVALID_REG) {
234
nr[reg].tempLockIRIndex = irIndex_;
235
}
236
return reg;
237
}
238
239
LoongArch64Reg LoongArch64RegCache::MapWithFPRTemp(const IRInst &inst) {
240
return (LoongArch64Reg)MapWithTemp(inst, MIPSLoc::FREG);
241
}
242
243
LoongArch64Reg LoongArch64RegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) {
244
_dbg_assert_(IsValidGPR(mipsReg));
245
246
// Okay, not mapped, so we need to allocate an LA register.
247
IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 1, mapFlags);
248
return (LoongArch64Reg)nreg;
249
}
250
251
LoongArch64Reg LoongArch64RegCache::MapGPRAsPointer(IRReg reg) {
252
return (LoongArch64Reg)MapNativeRegAsPointer(reg);
253
}
254
255
LoongArch64Reg LoongArch64RegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) {
256
_dbg_assert_(IsValidFPR(mipsReg));
257
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::MEM || mr[mipsReg + 32].loc == MIPSLoc::FREG);
258
259
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, mipsReg + 32, 1, mapFlags);
260
if (nreg != -1)
261
return (LoongArch64Reg)nreg;
262
return INVALID_REG;
263
}
264
265
LoongArch64Reg LoongArch64RegCache::MapVec4(IRReg first, MIPSMap mapFlags) {
266
_dbg_assert_(IsValidFPR(first));
267
_dbg_assert_((first & 3) == 0);
268
_dbg_assert_(mr[first + 32].loc == MIPSLoc::MEM || mr[first + 32].loc == MIPSLoc::FREG);
269
270
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, first + 32, 4, mapFlags);
271
if (nreg != -1)
272
return EncodeRegToV((LoongArch64Reg)nreg);
273
return INVALID_REG;
274
}
275
276
void LoongArch64RegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
277
LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);
278
_assert_(r >= R0 && r <= R31);
279
if (state) {
280
#ifdef MASKED_PSP_MEMORY
281
// This destroys the value...
282
_dbg_assert_(!nr[nreg].isDirty);
283
emit_->SLLI_W(r, r, 2);
284
emit_->SRLI_W(r, r, 2);
285
emit_->ADD_D(r, r, MEMBASEREG);
286
#else
287
// Clear the top bits to be safe.
288
emit_->SLLI_D(r, r, 32);
289
emit_->SRLI_D(r, r, 32);
290
emit_->ADD_D(r, r, MEMBASEREG);
291
#endif
292
nr[nreg].normalized32 = false;
293
} else {
294
#ifdef MASKED_PSP_MEMORY
295
_dbg_assert_(!nr[nreg].isDirty);
296
#endif
297
emit_->SUB_D(r, r, MEMBASEREG);
298
nr[nreg].normalized32 = false;
299
}
300
}
301
302
bool LoongArch64RegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
303
// No special flags, skip the check for a little speed.
304
return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags, lanes);
305
}
306
307
void LoongArch64RegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
308
LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);
309
_dbg_assert_(r > R0);
310
_dbg_assert_(first != MIPS_REG_ZERO);
311
if (r <= R31) {
312
_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));
313
if (lanes == 1)
314
emit_->LD_W(r, CTXREG, GetMipsRegOffset(first));
315
else if (lanes == 2)
316
emit_->LD_D(r, CTXREG, GetMipsRegOffset(first));
317
else
318
_assert_(false);
319
nr[nreg].normalized32 = true;
320
} else {
321
_dbg_assert_(r >= F0 && r <= F31);
322
_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc);
323
if (lanes == 1)
324
emit_->FLD_S(r, CTXREG, GetMipsRegOffset(first));
325
else if (lanes == 2)
326
emit_->FLD_D(r, CTXREG, GetMipsRegOffset(first));
327
else if (lanes == 4)
328
emit_->VLD(EncodeRegToV(r), CTXREG, GetMipsRegOffset(first));
329
else
330
_assert_(false);
331
}
332
}
333
334
void LoongArch64RegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
335
LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);
336
_dbg_assert_(r > R0);
337
_dbg_assert_(first != MIPS_REG_ZERO);
338
if (r <= R31) {
339
_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));
340
_assert_(mr[first].loc == MIPSLoc::REG || mr[first].loc == MIPSLoc::REG_IMM);
341
if (lanes == 1)
342
emit_->ST_W(r, CTXREG, GetMipsRegOffset(first));
343
else if (lanes == 2)
344
emit_->ST_D(r, CTXREG, GetMipsRegOffset(first));
345
else
346
_assert_(false);
347
} else {
348
_dbg_assert_(r >= F0 && r <= F31);
349
_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc);
350
if (lanes == 1)
351
emit_->FST_S(r, CTXREG, GetMipsRegOffset(first));
352
else if (lanes == 2)
353
emit_->FST_D(r, CTXREG, GetMipsRegOffset(first));
354
else if (lanes == 4)
355
emit_->VST(EncodeRegToV(r), CTXREG, GetMipsRegOffset(first));
356
else
357
_assert_(false);
358
}
359
}
360
361
void LoongArch64RegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) {
362
LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);
363
if (r == R_ZERO && imm == 0)
364
return;
365
_dbg_assert_(r > R0 && r <= R31);
366
emit_->LI(r, (int32_t)imm);
367
368
// We always use 32-bit immediates, so this is normalized now.
369
nr[nreg].normalized32 = true;
370
}
371
372
void LoongArch64RegCache::StoreRegValue(IRReg mreg, uint32_t imm) {
373
_assert_(IsValidGPRNoZero(mreg));
374
// Try to optimize using a different reg.
375
LoongArch64Reg storeReg = INVALID_REG;
376
377
// Zero is super easy.
378
if (imm == 0) {
379
storeReg = R_ZERO;
380
} else {
381
// Could we get lucky? Check for an exact match in another lareg.
382
for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; ++i) {
383
if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == imm) {
384
// Awesome, let's just store this reg.
385
storeReg = (LoongArch64Reg)mr[i].nReg;
386
break;
387
}
388
}
389
390
if (storeReg == INVALID_REG) {
391
emit_->LI(SCRATCH1, imm);
392
storeReg = SCRATCH1;
393
}
394
}
395
396
emit_->ST_W(storeReg, CTXREG, GetMipsRegOffset(mreg));
397
}
398
399
bool LoongArch64RegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
400
bool allowed = !mr[nr[nreg].mipsReg].isStatic;
401
// There's currently no support for non-FREGs here.
402
allowed = allowed && type == MIPSLoc::FREG;
403
404
if (dest == -1)
405
dest = nreg;
406
407
if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) {
408
// Alright, changing lane count (possibly including lane position.)
409
IRReg oldfirst = nr[nreg].mipsReg;
410
int oldlanes = 0;
411
while (mr[oldfirst + oldlanes].nReg == nreg)
412
oldlanes++;
413
_assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch");
414
_assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?");
415
416
if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes))
417
return true;
418
if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes))
419
return true;
420
}
421
422
return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags);
423
}
424
425
bool LoongArch64RegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) {
426
IRReg oldfirst = nr[nreg].mipsReg;
427
428
// Is it worth preserving any of the old regs?
429
int numKept = 0;
430
for (int i = 0; i < oldlanes; ++i) {
431
// Skip whichever one this is extracting.
432
if (oldfirst + i == first)
433
continue;
434
// If 0 isn't being transfered, easy to keep in its original reg.
435
if (i == 0 && dest != nreg) {
436
numKept++;
437
continue;
438
}
439
440
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
441
if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) {
442
// If there's one free, use it. Don't modify nreg, though.
443
emit_->VREPLVEI_W(FromNativeReg(freeReg), FromNativeReg(nreg), i);
444
445
// Update accounting.
446
nr[freeReg].isDirty = nr[nreg].isDirty;
447
nr[freeReg].mipsReg = oldfirst + i;
448
mr[oldfirst + i].lane = -1;
449
mr[oldfirst + i].nReg = freeReg;
450
numKept++;
451
}
452
}
453
454
// Unless all other lanes were kept, store.
455
if (nr[nreg].isDirty && numKept < oldlanes - 1) {
456
StoreNativeReg(nreg, oldfirst, oldlanes);
457
// Set false even for regs that were split out, since they were flushed too.
458
for (int i = 0; i < oldlanes; ++i) {
459
if (mr[oldfirst + i].nReg != -1)
460
nr[mr[oldfirst + i].nReg].isDirty = false;
461
}
462
}
463
464
// Next, shuffle the desired element into first place.
465
if (mr[first].lane > 0) {
466
emit_->VREPLVEI_W(FromNativeReg(dest), FromNativeReg(nreg), mr[first].lane);
467
} else if (mr[first].lane <= 0 && dest != nreg) {
468
emit_->VREPLVEI_W(FromNativeReg(dest), FromNativeReg(nreg), 0);
469
}
470
471
// Now update accounting.
472
for (int i = 0; i < oldlanes; ++i) {
473
auto &mreg = mr[oldfirst + i];
474
if (oldfirst + i == first) {
475
mreg.lane = -1;
476
mreg.nReg = dest;
477
} else if (mreg.nReg == nreg && i == 0 && nreg != dest) {
478
// Still in the same register, but no longer a vec.
479
mreg.lane = -1;
480
} else if (mreg.nReg == nreg) {
481
// No longer in a register.
482
mreg.nReg = -1;
483
mreg.lane = -1;
484
mreg.loc = MIPSLoc::MEM;
485
}
486
}
487
488
if (dest != nreg) {
489
nr[dest].isDirty = nr[nreg].isDirty;
490
if (oldfirst == first) {
491
nr[nreg].mipsReg = -1;
492
nr[nreg].isDirty = false;
493
}
494
}
495
nr[dest].mipsReg = first;
496
497
return true;
498
}
499
500
bool LoongArch64RegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) {
501
LoongArch64Reg destReg = FromNativeReg(dest);
502
LoongArch64Reg cur[4]{};
503
int numInRegs = 0;
504
u8 blendMask = 0;
505
for (int i = 0; i < lanes; ++i) {
506
if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) {
507
// Can't do it, either double mapped or overlapping vec.
508
return false;
509
}
510
511
if (mr[first + i].nReg == -1) {
512
cur[i] = INVALID_REG;
513
blendMask |= 1 << i;
514
} else {
515
cur[i] = FromNativeReg(mr[first + i].nReg);
516
numInRegs++;
517
}
518
}
519
520
// Shouldn't happen, this should only get called to transfer one in a reg.
521
if (numInRegs == 0)
522
return false;
523
524
// If everything's currently in a reg, move it into this reg.
525
if (lanes == 4) {
526
// Go with an exhaustive approach, only 15 possibilities...
527
if (blendMask == 0) {
528
// y = yw##, x = xz##, dest = xyzw.
529
emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));
530
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));
531
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));
532
} else if (blendMask == 0b0001) {
533
// y = yw##, w = x###, w = xz##, dest = xyzw.
534
emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));
535
emit_->FLD_S( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 0));
536
emit_->VEXTRINS_W(EncodeRegToV(cur[3]), EncodeRegToV(SCRATCHF1), 0);
537
emit_->VILVL_W(EncodeRegToV(cur[3]), EncodeRegToV(cur[2]), EncodeRegToV(cur[3]));
538
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[3]));
539
} else if (blendMask == 0b0010) {
540
// x = xz##, z = y###, z = yw##, dest = xyzw.
541
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));
542
emit_->FLD_S( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 1));
543
emit_->VEXTRINS_W(EncodeRegToV(cur[2]), EncodeRegToV(SCRATCHF1), 0);
544
emit_->VILVL_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[3]), EncodeRegToV(cur[2]));
545
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));
546
} else if (blendMask == 0b0011 && (first & 1) == 0) {
547
// z = zw##, w = xy##, dest = xyzw. Mixed lane sizes.
548
emit_->VILVL_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[3]), EncodeRegToV(cur[2]));
549
emit_->FLD_D( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 0));
550
emit_->VEXTRINS_D(EncodeRegToV(cur[3]), EncodeRegToV(SCRATCHF1), 0);
551
emit_->VILVL_D(EncodeRegToV(destReg), EncodeRegToV(cur[2]), EncodeRegToV(cur[3]));
552
} else if (blendMask == 0b0100) {
553
// y = yw##, w = z###, x = xz##, dest = xyzw.
554
emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));
555
emit_->FLD_S( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 2));
556
emit_->VEXTRINS_W(EncodeRegToV(cur[3]), EncodeRegToV(SCRATCHF1), 0);
557
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[3]), EncodeRegToV(cur[0]));
558
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));
559
} else if (blendMask == 0b0101 && (first & 3) == 0) {
560
// y = yw##, w=x#z#, w = xz##, dest = xyzw.
561
emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));
562
emit_->VLD(EncodeRegToV(cur[3]), CTXREG, GetMipsRegOffset(first));
563
emit_->VPICKEV_W(EncodeRegToV(cur[3]), EncodeRegToV(cur[3]), EncodeRegToV(cur[3]));
564
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[3]));
565
} else if (blendMask == 0b0110 && (first & 3) == 0) {
566
if (destReg == cur[0]) {
567
// w = wx##, dest = #yz#, dest = xyz#, dest = xyzw.
568
emit_->VILVL_W(EncodeRegToV(cur[3]), EncodeRegToV(cur[0]), EncodeRegToV(cur[3]));
569
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
570
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[3]), 1);
571
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[3]), (3 << 4));
572
} else {
573
// Assumes destReg may equal cur[3].
574
// x = xw##, dest = #yz#, dest = xyz#, dest = xyzw.
575
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[3]), EncodeRegToV(cur[0]));
576
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
577
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[0]), 0);
578
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[0]), (3 << 4 | 1));
579
}
580
} else if (blendMask == 0b0111 && (first & 3) == 0 && destReg != cur[3]) {
581
// dest = xyz#, dest = xyzw.
582
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
583
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[3]), (3 << 4));
584
} else if (blendMask == 0b1000) {
585
// x = xz##, z = w###, y = yw##, dest = xyzw.
586
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));
587
emit_->FLD_S(SCRATCHF1, CTXREG, GetMipsRegOffset(first + 3));
588
emit_->VEXTRINS_W(EncodeRegToV(cur[2]), EncodeRegToV(SCRATCHF1), 0);
589
emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[2]), EncodeRegToV(cur[1]));
590
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));
591
} else if (blendMask == 0b1001 && (first & 3) == 0) {
592
if (destReg == cur[1]) {
593
// w = zy##, dest = x##w, dest = xy#w, dest = xyzw.
594
emit_->VILVL_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[1]), EncodeRegToV(cur[2]));
595
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
596
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), (1 << 4 | 1));
597
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), (2 << 4));
598
} else {
599
// Assumes destReg may equal cur[2].
600
// y = yz##, dest = x##w, dest = xy#w, dest = xyzw.
601
emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[2]), EncodeRegToV(cur[1]));
602
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
603
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), (1 << 4));
604
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), (2 << 4 | 1));
605
}
606
} else if (blendMask == 0b1010 && (first & 3) == 0) {
607
// x = xz##, z = #y#w, z=yw##, dest = xyzw.
608
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));
609
emit_->VLD(EncodeRegToV(cur[2]), CTXREG, GetMipsRegOffset(first));
610
emit_->VPICKOD_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[2]), EncodeRegToV(cur[2]));
611
emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));
612
} else if (blendMask == 0b1011 && (first & 3) == 0 && destReg != cur[2]) {
613
// dest = xy#w, dest = xyzw.
614
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
615
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), (2 << 4));
616
} else if (blendMask == 0b1100 && (first & 1) == 0) {
617
// x = xy##, y = zw##, dest = xyzw. Mixed lane sizes.
618
emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));
619
emit_->FLD_D(SCRATCHF1, CTXREG, GetMipsRegOffset(first + 2));
620
emit_->VEXTRINS_D(EncodeRegToV(cur[1]), EncodeRegToV(SCRATCHF1), 0);
621
emit_->VILVL_D(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));
622
} else if (blendMask == 0b1101 && (first & 3) == 0 && destReg != cur[1]) {
623
// dest = x#zw, dest = xyzw.
624
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
625
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), (1 << 4));
626
} else if (blendMask == 0b1110 && (first & 3) == 0 && destReg != cur[0]) {
627
// dest = #yzw, dest = xyzw.
628
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
629
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[0]), 0);
630
} else if (blendMask == 0b1110 && (first & 3) == 0) {
631
// If dest == cur[0] (which may be common), we need a temp...
632
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
633
// Very unfortunate.
634
if (freeReg == INVALID_REG)
635
return false;
636
637
// free = x###, dest = #yzw, dest = xyzw.
638
emit_->VREPLVEI_W(EncodeRegToV(FromNativeReg(freeReg)), EncodeRegToV(cur[0]), 0);
639
emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));
640
emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(FromNativeReg(freeReg)), 0);
641
} else {
642
return false;
643
}
644
} else {
645
return false;
646
}
647
648
mr[first].lane = 0;
649
for (int i = 0; i < lanes; ++i) {
650
if (mr[first + i].nReg != -1) {
651
// If this was dirty, the combined reg is now dirty.
652
if (nr[mr[first + i].nReg].isDirty)
653
nr[dest].isDirty = true;
654
655
// Throw away the other register we're no longer using.
656
if (i != 0)
657
DiscardNativeReg(mr[first + i].nReg);
658
}
659
660
// And set it as using the new one.
661
mr[first + i].lane = i;
662
mr[first + i].loc = MIPSLoc::FREG;
663
mr[first + i].nReg = dest;
664
}
665
666
if (dest != nreg) {
667
nr[dest].mipsReg = first;
668
nr[nreg].mipsReg = -1;
669
nr[nreg].isDirty = false;
670
}
671
672
return true;
673
}
674
675
LoongArch64Reg LoongArch64RegCache::R(IRReg mipsReg) {
676
_dbg_assert_(IsValidGPR(mipsReg));
677
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM);
678
if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
679
return (LoongArch64Reg)mr[mipsReg].nReg;
680
} else {
681
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in LoongArch64 reg", mipsReg);
682
return INVALID_REG; // BAAAD
683
}
684
}
685
686
LoongArch64Reg LoongArch64RegCache::RPtr(IRReg mipsReg) {
687
_dbg_assert_(IsValidGPR(mipsReg));
688
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM || mr[mipsReg].loc == MIPSLoc::REG_AS_PTR);
689
if (mr[mipsReg].loc == MIPSLoc::REG_AS_PTR) {
690
return (LoongArch64Reg)mr[mipsReg].nReg;
691
} else if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
692
int la = mr[mipsReg].nReg;
693
_dbg_assert_(nr[la].pointerified);
694
if (nr[la].pointerified) {
695
return (LoongArch64Reg)mr[mipsReg].nReg;
696
} else {
697
ERROR_LOG(Log::JIT, "Tried to use a non-pointer register as a pointer");
698
return INVALID_REG;
699
}
700
} else {
701
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in LoongArch64 reg", mipsReg);
702
return INVALID_REG; // BAAAD
703
}
704
}
705
706
LoongArch64Reg LoongArch64RegCache::F(IRReg mipsReg) {
707
_dbg_assert_(IsValidFPR(mipsReg));
708
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::FREG);
709
if (mr[mipsReg + 32].loc == MIPSLoc::FREG) {
710
return (LoongArch64Reg)mr[mipsReg + 32].nReg;
711
} else {
712
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in LoongArch64 reg", mipsReg);
713
return INVALID_REG; // BAAAD
714
}
715
}
716
717
LoongArch64Reg LoongArch64RegCache::V(IRReg mipsReg) {
718
return EncodeRegToV(F(mipsReg));
719
}
720
721