Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/RasterizerRegCache.h
3186 views
1
// Copyright (c) 2021- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include "ppsspp_config.h"
21
22
#include <cstdint>
23
#include <string>
24
#include <unordered_map>
25
#include <vector>
26
27
#include "Common/Common.h"
28
#include "Common/Math/SIMDHeaders.h"
29
30
#if PPSSPP_ARCH(ARM64_NEON)
31
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
32
#include <arm64_neon.h>
33
#else
34
#include <arm_neon.h>
35
#endif
36
#endif
37
38
#if PPSSPP_ARCH(ARM)
39
#include "Common/ArmEmitter.h"
40
#elif PPSSPP_ARCH(ARM64_NEON)
41
#include "Common/Arm64Emitter.h"
42
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
43
#include "Common/x64Emitter.h"
44
#elif PPSSPP_ARCH(MIPS)
45
#include "Common/MipsEmitter.h"
46
#elif PPSSPP_ARCH(RISCV64)
47
#include "Common/RiscVEmitter.h"
48
#elif PPSSPP_ARCH(LOONGARCH64)
49
#include "Common/LoongArch64Emitter.h"
50
#else
51
#include "Common/FakeEmitter.h"
52
#endif
53
#include "GPU/Math3D.h"
54
55
namespace Rasterizer {
56
57
// While not part of the reg cache proper, this is the type it is built for.
58
#if PPSSPP_ARCH(ARM)
59
typedef ArmGen::ARMXCodeBlock BaseCodeBlock;
60
#elif PPSSPP_ARCH(ARM64_NEON)
61
typedef Arm64Gen::ARM64CodeBlock BaseCodeBlock;
62
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
63
typedef Gen::XCodeBlock BaseCodeBlock;
64
#elif PPSSPP_ARCH(MIPS)
65
typedef MIPSGen::MIPSCodeBlock BaseCodeBlock;
66
#elif PPSSPP_ARCH(RISCV64)
67
typedef RiscVGen::RiscVCodeBlock BaseCodeBlock;
68
#elif PPSSPP_ARCH(LOONGARCH64)
69
typedef LoongArch64Gen::LoongArch64CodeBlock BaseCodeBlock;
70
#else
71
typedef FakeGen::FakeXCodeBlock BaseCodeBlock;
72
#endif
73
74
// We also have the types of things that end up in regs.
75
#if PPSSPP_ARCH(ARM64_NEON)
76
typedef int32x4_t Vec4IntArg;
77
typedef int32x4_t Vec4IntResult;
78
typedef float32x4_t Vec4FloatArg;
79
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
80
static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }
81
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
82
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return vld1q_f32(a.AsArray()); }
83
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
84
typedef __m128i Vec4IntArg;
85
typedef __m128i Vec4IntResult;
86
typedef __m128 Vec4FloatArg;
87
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a.ivec; }
88
static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }
89
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a.ivec; }
90
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a.vec; }
91
#else
92
typedef const Math3D::Vec4<int> &Vec4IntArg;
93
typedef Math3D::Vec4<int> Vec4IntResult;
94
typedef const Math3D::Vec4<float> &Vec4FloatArg;
95
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a; }
96
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a; }
97
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a; }
98
#endif
99
100
#if PPSSPP_ARCH(AMD64) && PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))
101
#define SOFTRAST_CALL __vectorcall
102
#else
103
#define SOFTRAST_CALL
104
#endif
105
106
struct RegCache {
107
enum Purpose {
108
FLAG_GEN = 0x0100,
109
FLAG_TEMP = 0x1000,
110
111
VEC_ZERO = 0x0000,
112
VEC_RESULT = 0x0001,
113
VEC_RESULT1 = 0x0002,
114
VEC_U1 = 0x0003,
115
VEC_V1 = 0x0004,
116
VEC_INDEX = 0x0005,
117
VEC_INDEX1 = 0x0006,
118
119
GEN_SRC_ALPHA = 0x0100,
120
GEN_ID = 0x0101,
121
GEN_STENCIL = 0x0103,
122
GEN_COLOR_OFF = 0x0104,
123
GEN_DEPTH_OFF = 0x0105,
124
GEN_RESULT = 0x0106,
125
GEN_SHIFTVAL = 0x0107,
126
127
GEN_ARG_X = 0x0180,
128
GEN_ARG_Y = 0x0181,
129
GEN_ARG_Z = 0x0182,
130
GEN_ARG_FOG = 0x0183,
131
GEN_ARG_ID = 0x0184,
132
GEN_ARG_U = 0x0185,
133
GEN_ARG_V = 0x0186,
134
GEN_ARG_TEXPTR = 0x0187,
135
GEN_ARG_BUFW = 0x0188,
136
GEN_ARG_LEVEL = 0x0189,
137
GEN_ARG_TEXPTR_PTR = 0x018A,
138
GEN_ARG_BUFW_PTR = 0x018B,
139
GEN_ARG_LEVELFRAC = 0x018C,
140
VEC_ARG_COLOR = 0x0080,
141
VEC_ARG_MASK = 0x0081,
142
VEC_ARG_U = 0x0082,
143
VEC_ARG_V = 0x0083,
144
VEC_ARG_S = 0x0084,
145
VEC_ARG_T = 0x0085,
146
VEC_FRAC = 0x0086,
147
148
VEC_TEMP0 = 0x1000,
149
VEC_TEMP1 = 0x1001,
150
VEC_TEMP2 = 0x1002,
151
VEC_TEMP3 = 0x1003,
152
VEC_TEMP4 = 0x1004,
153
VEC_TEMP5 = 0x1005,
154
155
GEN_TEMP0 = 0x1100,
156
GEN_TEMP1 = 0x1101,
157
GEN_TEMP2 = 0x1102,
158
GEN_TEMP3 = 0x1103,
159
GEN_TEMP4 = 0x1104,
160
GEN_TEMP5 = 0x1105,
161
GEN_TEMP_HELPER = 0x1106,
162
163
VEC_INVALID = 0xFEFF,
164
GEN_INVALID = 0xFFFF,
165
};
166
167
#if PPSSPP_ARCH(ARM)
168
typedef ArmGen::ARMReg Reg;
169
static constexpr Reg REG_INVALID_VALUE = ArmGen::INVALID_REG;
170
#elif PPSSPP_ARCH(ARM64_NEON)
171
typedef Arm64Gen::ARM64Reg Reg;
172
static constexpr Reg REG_INVALID_VALUE = Arm64Gen::INVALID_REG;
173
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
174
typedef Gen::X64Reg Reg;
175
static constexpr Reg REG_INVALID_VALUE = Gen::INVALID_REG;
176
#elif PPSSPP_ARCH(MIPS)
177
typedef MIPSGen::MIPSReg Reg;
178
static constexpr Reg REG_INVALID_VALUE = MIPSGen::INVALID_REG;
179
#elif PPSSPP_ARCH(RISCV64)
180
typedef RiscVGen::RiscVReg Reg;
181
static constexpr Reg REG_INVALID_VALUE = RiscVGen::INVALID_REG;
182
#elif PPSSPP_ARCH(LOONGARCH64)
183
typedef LoongArch64Gen::LoongArch64Reg Reg;
184
static constexpr Reg REG_INVALID_VALUE = LoongArch64Gen::INVALID_REG;
185
#else
186
typedef int Reg;
187
static constexpr Reg REG_INVALID_VALUE = -1;
188
#endif
189
190
struct RegStatus {
191
Reg reg;
192
Purpose purpose;
193
uint8_t locked = 0;
194
bool forceRetained = false;
195
bool everLocked = false;
196
};
197
198
// Note: Assumes __vectorcall on Windows.
199
// Keep in mind, some args won't fit in regs, this ignores stack and tracks what's in regs.
200
void SetupABI(const std::vector<Purpose> &args, bool forceRetain = true);
201
// Reset after compile complete, pass false for validate if compile failed.
202
void Reset(bool validate);
203
// Add register to cache for tracking with initial purpose (won't be locked or force retained.)
204
void Add(Reg r, Purpose p);
205
// Find registers with one purpose and change to the other.
206
void Change(Purpose history, Purpose destiny);
207
// Release a previously found or allocated register, setting purpose to invalid.
208
void Release(Reg &r, Purpose p);
209
// Unlock a previously found or allocated register, but try to retain it.
210
void Unlock(Reg &r, Purpose p);
211
// Check if the purpose is currently in a register.
212
bool Has(Purpose p);
213
// Return the register for a given purpose (check with Has() first if not certainly there.)
214
Reg Find(Purpose p);
215
// Allocate a new register for the given purpose.
216
Reg Alloc(Purpose p);
217
// Force a register to be retained, even if we run short on regs.
218
void ForceRetain(Purpose p);
219
// Reverse ForceRetain, and release the register back to invalid.
220
void ForceRelease(Purpose p);
221
222
// For getting a specific reg. WARNING: May return a locked reg, so you have to check.
223
void GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose);
224
// For setting the purpose of a specific reg. Returns false if it is locked.
225
bool ChangeReg(Reg r, Purpose p);
226
// Retrieves whether reg was ever used.
227
bool UsedReg(Reg r, Purpose flag);
228
229
private:
230
RegStatus *FindReg(Reg r, Purpose p);
231
232
std::vector<RegStatus> regs;
233
};
234
235
class CodeBlock : public BaseCodeBlock {
236
public:
237
virtual std::string DescribeCodePtr(const u8 *ptr);
238
virtual void Clear();
239
240
protected:
241
CodeBlock(int size);
242
243
RegCache::Reg GetZeroVec();
244
245
void Describe(const std::string &message);
246
// Returns amount of stack space used.
247
int WriteProlog(int extraStack, const std::vector<RegCache::Reg> &vec, const std::vector<RegCache::Reg> &gen);
248
// Returns updated function start position, modifies prolog and finishes writing.
249
const u8 *WriteFinalizedEpilog();
250
251
void WriteSimpleConst16x8(const u8 *&ptr, uint8_t value);
252
void WriteSimpleConst8x16(const u8 *&ptr, uint16_t value);
253
void WriteSimpleConst4x32(const u8 *&ptr, uint32_t value);
254
void WriteDynamicConst16x8(const u8 *&ptr, uint8_t value);
255
void WriteDynamicConst8x16(const u8 *&ptr, uint16_t value);
256
void WriteDynamicConst4x32(const u8 *&ptr, uint32_t value);
257
258
#if PPSSPP_ARCH(ARM64_NEON)
259
Arm64Gen::ARM64FloatEmitter fp;
260
#endif
261
262
std::unordered_map<const u8 *, std::string> descriptions_;
263
Rasterizer::RegCache regCache_;
264
265
private:
266
u8 *lastPrologStart_ = nullptr;
267
u8 *lastPrologEnd_ = nullptr;
268
int savedStack_;
269
int firstVecStack_;
270
std::vector<RegCache::Reg> prologVec_;
271
std::vector<RegCache::Reg> prologGen_;
272
};
273
274
};
275
276