Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/RasterizerRegCache.cpp
3186 views
1
// Copyright (c) 2021- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "GPU/Software/RasterizerRegCache.h"
19
20
#include "Common/Arm64Emitter.h"
21
22
namespace Rasterizer {
23
24
void RegCache::SetupABI(const std::vector<Purpose> &args, bool forceRetain) {
25
#if PPSSPP_ARCH(ARM)
26
_assert_msg_(false, "Not yet implemented");
27
#elif PPSSPP_ARCH(ARM64_NEON)
28
using namespace Arm64Gen;
29
30
// ARM64 has a generous allotment of registers.
31
static const Reg genArgs[] = { X0, X1, X2, X3, X4, X5, X6, X7 };
32
static const Reg vecArgs[] = { Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7 };
33
size_t genIndex = 0;
34
size_t vecIndex = 0;
35
36
for (const Purpose &p : args) {
37
if ((p & FLAG_GEN) != 0) {
38
if (genIndex < ARRAY_SIZE(genArgs)) {
39
Add(genArgs[genIndex++], p);
40
if (forceRetain)
41
ForceRetain(p);
42
}
43
} else {
44
if (vecIndex < ARRAY_SIZE(vecArgs)) {
45
Add(vecArgs[vecIndex++], p);
46
if (forceRetain)
47
ForceRetain(p);
48
}
49
}
50
}
51
52
// Any others are free and purposeless.
53
for (size_t i = genIndex; i < ARRAY_SIZE(genArgs); ++i)
54
Add(genArgs[i], GEN_INVALID);
55
for (size_t i = vecIndex; i < ARRAY_SIZE(vecArgs); ++i)
56
Add(vecArgs[i], VEC_INVALID);
57
58
// Add all other caller saved regs without purposes yet.
59
static const Reg genTemps[] = { X8, X9, X10, X11, X12, X13, X14, X15 };
60
for (Reg r : genTemps)
61
Add(r, GEN_INVALID);
62
static const Reg vecTemps[] = { Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23 };
63
for (Reg r : vecTemps)
64
Add(r, VEC_INVALID);
65
// We also have X16-17 and Q24-Q31, but leave those for ordered paired instructions.
66
#elif PPSSPP_ARCH(X86)
67
_assert_msg_(false, "Not yet implemented");
68
#elif PPSSPP_ARCH(AMD64)
69
using namespace Gen;
70
71
#if PPSSPP_PLATFORM(WINDOWS)
72
// The Windows convention is annoying, as it wastes registers and keeps to "positions."
73
Reg genArgs[] = { RCX, RDX, R8, R9 };
74
Reg vecArgs[] = { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5 };
75
76
for (size_t i = 0; i < args.size(); ++i) {
77
const Purpose &p = args[i];
78
if ((p & FLAG_GEN) != 0) {
79
if (i < ARRAY_SIZE(genArgs)) {
80
Add(genArgs[i], p);
81
genArgs[i] = INVALID_REG;
82
if (forceRetain)
83
ForceRetain(p);
84
}
85
} else {
86
if (i < ARRAY_SIZE(vecArgs)) {
87
Add(vecArgs[i], p);
88
vecArgs[i] = INVALID_REG;
89
if (forceRetain)
90
ForceRetain(p);
91
}
92
}
93
}
94
95
// Any unused regs can be used freely as temps.
96
for (Reg r : genArgs) {
97
if (r != INVALID_REG)
98
Add(r, GEN_INVALID);
99
}
100
for (Reg r : vecArgs) {
101
if (r != INVALID_REG)
102
Add(r, VEC_INVALID);
103
}
104
105
// Additionally, these three are volatile.
106
// Must save: RBX, RSP, RBP, RDI, RSI, R12-R15, XMM6-15
107
static const Reg genTemps[] = { RAX, R10, R11 };
108
for (Reg r : genTemps)
109
Add(r, GEN_INVALID);
110
#else
111
// Okay, first, allocate args. SystemV gives to the first of each usable pool.
112
static const Reg genArgs[] = { RDI, RSI, RDX, RCX, R8, R9 };
113
static const Reg vecArgs[] = { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
114
size_t genIndex = 0;
115
size_t vecIndex = 0;
116
117
for (const Purpose &p : args) {
118
if ((p & FLAG_GEN) != 0) {
119
if (genIndex < ARRAY_SIZE(genArgs)) {
120
Add(genArgs[genIndex++], p);
121
if (forceRetain)
122
ForceRetain(p);
123
}
124
} else {
125
if (vecIndex < ARRAY_SIZE(vecArgs)) {
126
Add(vecArgs[vecIndex++], p);
127
if (forceRetain)
128
ForceRetain(p);
129
}
130
}
131
}
132
133
// Any others are free and purposeless.
134
for (size_t i = genIndex; i < ARRAY_SIZE(genArgs); ++i)
135
Add(genArgs[i], GEN_INVALID);
136
for (size_t i = vecIndex; i < ARRAY_SIZE(vecArgs); ++i)
137
Add(vecArgs[i], VEC_INVALID);
138
139
// Add all other caller saved regs without purposes yet.
140
// Must save: RBX, RSP, RBP, R12-R15
141
static const Reg genTemps[] = { RAX, R10, R11 };
142
for (Reg r : genTemps)
143
Add(r, GEN_INVALID);
144
static const Reg vecTemps[] = { XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 };
145
for (Reg r : vecTemps)
146
Add(r, VEC_INVALID);
147
#endif
148
#elif PPSSPP_ARCH(RISCV64)
149
_assert_msg_(false, "Not yet implemented (no vector calling standard yet)");
150
#elif PPSSPP_ARCH(LOONGARCH64)
151
_assert_msg_(false, "Not yet implemented");
152
#elif PPSSPP_ARCH(MIPS)
153
_assert_msg_(false, "Not yet implemented");
154
#else
155
_assert_msg_(false, "Not yet implemented");
156
#endif
157
}
158
159
void RegCache::Reset(bool validate) {
160
if (validate) {
161
for (auto &reg : regs) {
162
_assert_msg_(reg.locked == 0, "softjit: Reset() with reg still locked (%04X)", reg.purpose);
163
_assert_msg_(!reg.forceRetained, "softjit: Reset() with reg force retained (%04X)", reg.purpose);
164
}
165
}
166
regs.clear();
167
}
168
169
void RegCache::Add(Reg r, Purpose p) {
170
for (auto &reg : regs) {
171
if (reg.reg == r && (reg.purpose & FLAG_GEN) == (p & FLAG_GEN)) {
172
_assert_msg_(false, "softjit Add() reg duplicate (%04X)", p);
173
}
174
}
175
_assert_msg_(r != REG_INVALID_VALUE, "softjit Add() invalid reg (%04X)", p);
176
177
RegStatus newStatus;
178
newStatus.reg = r;
179
newStatus.purpose = p;
180
regs.push_back(newStatus);
181
}
182
183
void RegCache::Change(Purpose history, Purpose destiny) {
184
for (auto &reg : regs) {
185
if (reg.purpose == history) {
186
reg.purpose = destiny;
187
return;
188
}
189
}
190
191
_assert_msg_(false, "softjit Change() reg that isn't there (%04X)", history);
192
}
193
194
void RegCache::Release(Reg &r, Purpose p) {
195
RegStatus *status = FindReg(r, p);
196
_assert_msg_(status != nullptr, "softjit Release() reg that isn't there (%04X)", p);
197
_assert_msg_(status->locked > 0, "softjit Release() reg that isn't locked (%04X)", p);
198
_assert_msg_(!status->forceRetained, "softjit Release() reg that is force retained (%04X)", p);
199
200
status->locked--;
201
if (status->locked == 0) {
202
if ((status->purpose & FLAG_GEN) != 0)
203
status->purpose = GEN_INVALID;
204
else
205
status->purpose = VEC_INVALID;
206
}
207
208
r = REG_INVALID_VALUE;
209
}
210
211
void RegCache::Unlock(Reg &r, Purpose p) {
212
_assert_msg_((p & FLAG_TEMP) == 0, "softjit Unlock() temp reg (%04X)", p);
213
RegStatus *status = FindReg(r, p);
214
if (status) {
215
_assert_msg_(status->locked > 0, "softjit Unlock() reg that isn't locked (%04X)", p);
216
status->locked--;
217
r = REG_INVALID_VALUE;
218
return;
219
}
220
221
_assert_msg_(false, "softjit Unlock() reg that isn't there (%04X)", p);
222
}
223
224
bool RegCache::Has(Purpose p) {
225
for (auto &reg : regs) {
226
if (reg.purpose == p) {
227
return true;
228
}
229
}
230
return false;
231
}
232
233
RegCache::Reg RegCache::Find(Purpose p) {
234
for (auto &reg : regs) {
235
if (reg.purpose == p) {
236
_assert_msg_(reg.locked <= 255, "softjit Find() reg has lots of locks (%04X)", p);
237
reg.locked++;
238
reg.everLocked = true;
239
return reg.reg;
240
}
241
}
242
_assert_msg_(false, "softjit Find() reg that isn't there (%04X)", p);
243
return REG_INVALID_VALUE;
244
}
245
246
RegCache::Reg RegCache::Alloc(Purpose p) {
247
_assert_msg_(!Has(p), "softjit Alloc() reg duplicate (%04X)", p);
248
RegStatus *best = nullptr;
249
for (auto &reg : regs) {
250
if (reg.locked != 0 || reg.forceRetained)
251
continue;
252
// Needs to be the same type.
253
if ((reg.purpose & FLAG_GEN) != (p & FLAG_GEN))
254
continue;
255
256
if (best == nullptr)
257
best = &reg;
258
// Prefer a free/purposeless reg (includes INVALID.)
259
if ((reg.purpose & FLAG_TEMP) != 0) {
260
best = &reg;
261
break;
262
}
263
// But also prefer a lower priority reg.
264
if (reg.purpose < best->purpose)
265
best = &reg;
266
}
267
268
if (best) {
269
best->locked = 1;
270
best->everLocked = true;
271
best->purpose = p;
272
return best->reg;
273
}
274
275
_assert_msg_(false, "softjit Alloc() reg with none free (%04X)", p);
276
return REG_INVALID_VALUE;
277
}
278
279
void RegCache::ForceRetain(Purpose p) {
280
for (auto &reg : regs) {
281
if (reg.purpose == p) {
282
reg.forceRetained = true;
283
return;
284
}
285
}
286
287
_assert_msg_(false, "softjit ForceRetain() reg that isn't there (%04X)", p);
288
}
289
290
void RegCache::ForceRelease(Purpose p) {
291
for (auto &reg : regs) {
292
if (reg.purpose == p) {
293
_assert_msg_(reg.locked == 0, "softjit ForceRelease() while locked (%04X)", p);
294
reg.forceRetained = false;
295
if ((reg.purpose & FLAG_GEN) != 0)
296
reg.purpose = GEN_INVALID;
297
else
298
reg.purpose = VEC_INVALID;
299
return;
300
}
301
}
302
303
_assert_msg_(false, "softjit ForceRelease() reg that isn't there (%04X)", p);
304
}
305
306
void RegCache::GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose) {
307
for (auto &reg : regs) {
308
if (reg.reg != r)
309
continue;
310
if ((reg.purpose & FLAG_GEN) != (p & FLAG_GEN))
311
continue;
312
313
// Easy version, it's free.
314
if (reg.locked == 0 && !reg.forceRetained) {
315
needsSwap = false;
316
reg.purpose = p;
317
reg.locked = 1;
318
reg.everLocked = true;
319
return;
320
}
321
322
// Okay, we need to swap. Find that reg.
323
needsSwap = true;
324
RegStatus *swap = FindReg(swapReg, swapPurpose);
325
if (swap) {
326
swap->purpose = reg.purpose;
327
swap->forceRetained = reg.forceRetained;
328
swap->locked = reg.locked;
329
swap->everLocked = true;
330
} else {
331
_assert_msg_(!Has(swapPurpose), "softjit GrabReg() wrong purpose (%04X)", swapPurpose);
332
RegStatus newStatus = reg;
333
newStatus.reg = swapReg;
334
newStatus.everLocked = true;
335
regs.push_back(newStatus);
336
}
337
338
reg.purpose = p;
339
reg.locked = 1;
340
reg.everLocked = true;
341
reg.forceRetained = false;
342
return;
343
}
344
345
_assert_msg_(false, "softjit GrabReg() reg that isn't there");
346
}
347
348
bool RegCache::ChangeReg(Reg r, Purpose p) {
349
for (auto &reg : regs) {
350
if (reg.reg != r)
351
continue;
352
if ((reg.purpose & FLAG_GEN) != (p & FLAG_GEN))
353
continue;
354
355
if (reg.purpose == p)
356
return true;
357
_assert_msg_(!Has(p), "softjit ChangeReg() duplicate purpose (%04X)", p);
358
359
if (reg.locked != 0 || reg.forceRetained)
360
return false;
361
362
reg.purpose = p;
363
// Since we're setting it's purpose, we must've used it.
364
reg.everLocked = true;
365
return true;
366
}
367
368
_assert_msg_(false, "softjit ChangeReg() reg that isn't there");
369
return false;
370
}
371
372
bool RegCache::UsedReg(Reg r, Purpose flag) {
373
for (auto &reg : regs) {
374
if (reg.reg != r)
375
continue;
376
if ((reg.purpose & FLAG_GEN) != (flag & FLAG_GEN))
377
continue;
378
return reg.everLocked;
379
}
380
381
_assert_msg_(false, "softjit UsedReg() reg that isn't there");
382
return false;
383
}
384
385
RegCache::RegStatus *RegCache::FindReg(Reg r, Purpose p) {
386
for (auto &reg : regs) {
387
if (reg.reg == r && reg.purpose == p) {
388
return &reg;
389
}
390
}
391
392
return nullptr;
393
}
394
395
CodeBlock::CodeBlock(int size)
396
#if PPSSPP_ARCH(ARM64_NEON)
397
: fp(this)
398
#endif
399
{
400
AllocCodeSpace(size);
401
ClearCodeSpace(0);
402
403
// Add some random code to "help" MSVC's buggy disassembler :(
404
#if defined(_WIN32) && (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !PPSSPP_PLATFORM(UWP)
405
using namespace Gen;
406
for (int i = 0; i < 100; i++) {
407
MOV(32, R(EAX), R(EBX));
408
RET();
409
}
410
#elif PPSSPP_ARCH(ARM)
411
BKPT(0);
412
BKPT(0);
413
#endif
414
}
415
416
int CodeBlock::WriteProlog(int extraStack, const std::vector<RegCache::Reg> &vec, const std::vector<RegCache::Reg> &gen) {
417
savedStack_ = 0;
418
firstVecStack_ = extraStack;
419
prologVec_ = vec;
420
prologGen_ = gen;
421
422
int totalStack = 0;
423
424
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
425
using namespace Gen;
426
427
BeginWrite(32768);
428
AlignCode16();
429
lastPrologStart_ = GetWritableCodePtr();
430
431
for (X64Reg r : gen) {
432
PUSH(r);
433
regCache_.Add(r, RegCache::GEN_INVALID);
434
totalStack += 8;
435
}
436
437
savedStack_ = 16 * (int)vec.size() + extraStack;
438
// We want to align if possible. It starts out unaligned.
439
if ((totalStack & 8) == 0)
440
savedStack_ += 8;
441
totalStack += savedStack_;
442
if (savedStack_ != 0)
443
SUB(64, R(RSP), Imm32(savedStack_));
444
445
int nextOffset = extraStack;
446
for (X64Reg r : vec) {
447
MOVUPS(MDisp(RSP, nextOffset), r);
448
regCache_.Add(r, RegCache::VEC_INVALID);
449
nextOffset += 16;
450
}
451
452
lastPrologEnd_ = GetWritableCodePtr();
453
#else
454
_assert_msg_(false, "Not yet implemented");
455
#endif
456
457
return totalStack;
458
}
459
460
const u8 *CodeBlock::WriteFinalizedEpilog() {
461
u8 *prologPtr = lastPrologStart_;
462
ptrdiff_t prologMaxSize = lastPrologEnd_ - lastPrologStart_;
463
lastPrologStart_ = nullptr;
464
lastPrologEnd_ = nullptr;
465
466
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
467
using namespace Gen;
468
469
bool prologChange = false;
470
int nextOffset = firstVecStack_;
471
for (X64Reg r : prologVec_) {
472
if (regCache_.UsedReg(r, RegCache::VEC_INVALID)) {
473
MOVUPS(r, MDisp(RSP, nextOffset));
474
nextOffset += 16;
475
} else {
476
prologChange = true;
477
}
478
}
479
480
// We use the stack offset in generated code, so maintain any difference.
481
int unusedGenSpace = 0;
482
for (X64Reg r : prologGen_) {
483
if (!regCache_.UsedReg(r, RegCache::GEN_INVALID))
484
unusedGenSpace += 8;
485
}
486
if (unusedGenSpace != 0)
487
prologChange = true;
488
489
if (savedStack_ + unusedGenSpace != 0)
490
ADD(64, R(RSP), Imm32(savedStack_ + unusedGenSpace));
491
for (int i = (int)prologGen_.size(); i > 0; --i) {
492
X64Reg r = prologGen_[i - 1];
493
if (regCache_.UsedReg(r, RegCache::GEN_INVALID))
494
POP(r);
495
}
496
497
RET();
498
EndWrite();
499
500
if (prologChange) {
501
// Okay, now let's rewrite the prolog since we didn't need all those regs.
502
XEmitter prolog(prologPtr);
503
if (PlatformIsWXExclusive()) {
504
ProtectMemoryPages(prologPtr, 128, MEM_PROT_READ | MEM_PROT_WRITE);
505
}
506
507
// First, write the new prolog at the original position.
508
for (X64Reg r : prologGen_) {
509
if (regCache_.UsedReg(r, RegCache::GEN_INVALID))
510
prolog.PUSH(r);
511
}
512
513
// Even if less of the stack is actually used, we want the number to match to references.
514
if (savedStack_ + unusedGenSpace != 0)
515
prolog.SUB(64, R(RSP), Imm32(savedStack_ + unusedGenSpace));
516
517
nextOffset = firstVecStack_;
518
for (X64Reg r : prologVec_) {
519
if (regCache_.UsedReg(r, RegCache::VEC_INVALID)) {
520
prolog.MOVUPS(MDisp(RSP, nextOffset), r);
521
nextOffset += 16;
522
}
523
}
524
525
ptrdiff_t prologLen = prolog.GetWritableCodePtr() - prologPtr;
526
if (prologLen < prologMaxSize) {
527
// We wrote it at the start, but we actually want it at the end.
528
u8 *oldPrologPtr = prologPtr;
529
prologPtr += prologMaxSize - prologLen;
530
memmove(prologPtr, oldPrologPtr, prologLen);
531
// Set INT3s before the new start to be safe.
532
memset(oldPrologPtr, 0xCC, prologMaxSize - prologLen);
533
}
534
535
if (PlatformIsWXExclusive()) {
536
ProtectMemoryPages(prologPtr, 128, MEM_PROT_READ | MEM_PROT_EXEC);
537
}
538
}
539
#else
540
_assert_msg_(false, "Not yet implemented");
541
#endif
542
543
return prologPtr;
544
}
545
546
RegCache::Reg CodeBlock::GetZeroVec() {
547
if (!regCache_.Has(RegCache::VEC_ZERO)) {
548
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
549
using namespace Gen;
550
X64Reg r = regCache_.Alloc(RegCache::VEC_ZERO);
551
PXOR(r, R(r));
552
return r;
553
#else
554
return RegCache::REG_INVALID_VALUE;
555
#endif
556
}
557
return regCache_.Find(RegCache::VEC_ZERO);
558
}
559
560
void CodeBlock::Describe(const std::string &message) {
561
descriptions_[GetCodePointer()] = message;
562
}
563
564
std::string CodeBlock::DescribeCodePtr(const u8 *ptr) {
565
ptrdiff_t dist = 0x7FFFFFFF;
566
std::string found;
567
for (const auto &it : descriptions_) {
568
ptrdiff_t it_dist = ptr - it.first;
569
if (it_dist >= 0 && it_dist < dist) {
570
found = it.second;
571
dist = it_dist;
572
}
573
}
574
return found;
575
}
576
577
void CodeBlock::Clear() {
578
ClearCodeSpace(0);
579
descriptions_.clear();
580
}
581
582
void CodeBlock::WriteSimpleConst16x8(const u8 *&ptr, uint8_t value) {
583
if (ptr == nullptr)
584
WriteDynamicConst16x8(ptr, value);
585
}
586
587
void CodeBlock::WriteSimpleConst8x16(const u8 *&ptr, uint16_t value) {
588
if (ptr == nullptr)
589
WriteDynamicConst8x16(ptr, value);
590
}
591
592
void CodeBlock::WriteSimpleConst4x32(const u8 *&ptr, uint32_t value) {
593
if (ptr == nullptr)
594
WriteDynamicConst4x32(ptr, value);
595
}
596
597
void CodeBlock::WriteDynamicConst16x8(const u8 *&ptr, uint8_t value) {
598
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
599
ptr = AlignCode16();
600
for (int i = 0; i < 16; ++i)
601
Write8(value);
602
#else
603
_assert_msg_(false, "Not yet implemented");
604
#endif
605
}
606
607
void CodeBlock::WriteDynamicConst8x16(const u8 *&ptr, uint16_t value) {
608
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
609
ptr = AlignCode16();
610
for (int i = 0; i < 8; ++i)
611
Write16(value);
612
#else
613
_assert_msg_(false, "Not yet implemented");
614
#endif
615
}
616
617
void CodeBlock::WriteDynamicConst4x32(const u8 *&ptr, uint32_t value) {
618
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
619
ptr = AlignCode16();
620
for (int i = 0; i < 4; ++i)
621
Write32(value);
622
#else
623
_assert_msg_(false, "Not yet implemented");
624
#endif
625
}
626
627
};
628
629