Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUCommon.h
3185 views
1
#pragma once
2
3
#include <vector>
4
#include <list>
5
6
#include "ppsspp_config.h"
7
#include "Common/Common.h"
8
#include "Common/Swap.h"
9
#include "Core/MemMap.h"
10
#include "Common/MemoryUtil.h"
11
#include "GPU/ge_constants.h"
12
#include "GPU/GPU.h"
13
#include "GPU/GPUCommon.h"
14
#include "GPU/GPUState.h"
15
#include "GPU/Debugger/Record.h"
16
#include "GPU/Debugger/Breakpoints.h"
17
#include "GPU/Common/ShaderCommon.h"
18
#include "GPU/Common/GPUDebugInterface.h"
19
#include "GPU/GPUDefinitions.h"
20
21
#if defined(__ANDROID__)
22
#include <atomic>
23
#endif
24
25
// X11, sigh.
26
#ifdef None
27
#undef None
28
#endif
29
30
class FramebufferManagerCommon;
31
class TextureCacheCommon;
32
class DrawEngineCommon;
33
class GraphicsContext;
34
struct PspGeListArgs;
35
struct GPUgstate;
36
class PointerWrap;
37
struct VirtualFramebuffer;
38
39
namespace Draw {
40
class DrawContext;
41
}
42
43
enum SignalBehavior {
44
PSP_GE_SIGNAL_NONE = 0x00,
45
PSP_GE_SIGNAL_HANDLER_SUSPEND = 0x01,
46
PSP_GE_SIGNAL_HANDLER_CONTINUE = 0x02,
47
PSP_GE_SIGNAL_HANDLER_PAUSE = 0x03,
48
PSP_GE_SIGNAL_SYNC = 0x08,
49
PSP_GE_SIGNAL_JUMP = 0x10,
50
PSP_GE_SIGNAL_CALL = 0x11,
51
PSP_GE_SIGNAL_RET = 0x12,
52
PSP_GE_SIGNAL_RJUMP = 0x13,
53
PSP_GE_SIGNAL_RCALL = 0x14,
54
PSP_GE_SIGNAL_OJUMP = 0x15,
55
PSP_GE_SIGNAL_OCALL = 0x16,
56
57
PSP_GE_SIGNAL_RTBP0 = 0x20,
58
PSP_GE_SIGNAL_RTBP1 = 0x21,
59
PSP_GE_SIGNAL_RTBP2 = 0x22,
60
PSP_GE_SIGNAL_RTBP3 = 0x23,
61
PSP_GE_SIGNAL_RTBP4 = 0x24,
62
PSP_GE_SIGNAL_RTBP5 = 0x25,
63
PSP_GE_SIGNAL_RTBP6 = 0x26,
64
PSP_GE_SIGNAL_RTBP7 = 0x27,
65
PSP_GE_SIGNAL_OTBP0 = 0x28,
66
PSP_GE_SIGNAL_OTBP1 = 0x29,
67
PSP_GE_SIGNAL_OTBP2 = 0x2A,
68
PSP_GE_SIGNAL_OTBP3 = 0x2B,
69
PSP_GE_SIGNAL_OTBP4 = 0x2C,
70
PSP_GE_SIGNAL_OTBP5 = 0x2D,
71
PSP_GE_SIGNAL_OTBP6 = 0x2E,
72
PSP_GE_SIGNAL_OTBP7 = 0x2F,
73
PSP_GE_SIGNAL_RCBP = 0x30,
74
PSP_GE_SIGNAL_OCBP = 0x38,
75
PSP_GE_SIGNAL_BREAK1 = 0xF0,
76
PSP_GE_SIGNAL_BREAK2 = 0xFF,
77
};
78
79
enum GPURunState {
80
GPUSTATE_RUNNING = 0,
81
GPUSTATE_DONE = 1,
82
GPUSTATE_STALL = 2,
83
GPUSTATE_INTERRUPT = 3,
84
GPUSTATE_ERROR = 4,
85
};
86
87
enum GPUSyncType {
88
GPU_SYNC_DRAW,
89
GPU_SYNC_LIST,
90
};
91
92
enum class WriteStencil {
93
NEEDS_CLEAR = 1,
94
STENCIL_IS_ZERO = 2,
95
IGNORE_ALPHA = 4,
96
};
97
ENUM_CLASS_BITOPS(WriteStencil);
98
99
enum class GPUCopyFlag {
100
NONE = 0,
101
FORCE_SRC_MATCH_MEM = 1,
102
FORCE_DST_MATCH_MEM = 2,
103
// Note: implies src == dst and FORCE_SRC_MATCH_MEM.
104
MEMSET = 4,
105
DEPTH_REQUESTED = 8,
106
DEBUG_NOTIFIED = 16,
107
DISALLOW_CREATE_VFB = 32,
108
};
109
ENUM_CLASS_BITOPS(GPUCopyFlag);
110
111
struct DisplayListStackEntry {
112
u32 pc;
113
u32 offsetAddr;
114
u32 baseAddr;
115
};
116
117
struct DisplayList {
118
int id;
119
u32 startpc;
120
u32 pc;
121
u32 stall;
122
DisplayListState state;
123
SignalBehavior signal;
124
int subIntrBase;
125
u16 subIntrToken;
126
DisplayListStackEntry stack[32];
127
int stackptr;
128
bool interrupted;
129
u64 waitUntilTicks;
130
bool interruptsEnabled;
131
bool pendingInterrupt;
132
bool started;
133
PSPPointer<u32_le> context;
134
u32 offsetAddr;
135
bool bboxResult;
136
u32 stackAddr;
137
138
u32 padding; // Android x86-32 does not round the structure size up to the closest multiple of 8 like the other platforms.
139
};
140
141
namespace Draw {
142
class DrawContext;
143
}
144
145
enum DrawType {
146
DRAW_UNKNOWN,
147
DRAW_PRIM,
148
DRAW_SPLINE,
149
DRAW_BEZIER,
150
};
151
152
enum {
153
FLAG_FLUSHBEFOREONCHANGE = 2,
154
FLAG_EXECUTE = 4,
155
FLAG_EXECUTEONCHANGE = 8,
156
FLAG_READS_PC = 16,
157
FLAG_WRITES_PC = 32,
158
FLAG_DIRTYONCHANGE = 64, // NOTE: Either this or FLAG_EXECUTE*, not both!
159
};
160
161
struct TransformedVertex {
162
union {
163
struct {
164
float x, y, z, pos_w; // in case of morph, preblend during decode
165
};
166
float pos[4];
167
};
168
union {
169
struct {
170
float u; float v; float uv_w; // scaled by uscale, vscale, if there
171
};
172
float uv[3];
173
};
174
float fog;
175
union {
176
u8 color0[4]; // prelit
177
u32 color0_32;
178
};
179
union {
180
u8 color1[4]; // prelit
181
u32 color1_32;
182
};
183
184
void CopyFromWithOffset(const TransformedVertex &other, float xoff, float yoff) {
185
this->x = other.x + xoff;
186
this->y = other.y + yoff;
187
memcpy(&this->z, &other.z, sizeof(*this) - sizeof(float) * 2);
188
}
189
};
190
191
inline bool IsTrianglePrim(GEPrimitiveType prim) {
192
// TODO: KEEP_PREVIOUS is mistakenly treated as TRIANGLE here... This isn't new.
193
//
194
// Interesting optimization, but not confident in performance:
195
// static const bool p[8] = { false, false, false, true, true, true, false, true };
196
// 10111000 = 0xB8;
197
// return (0xB8U >> (u8)prim) & 1;
198
199
return prim > GE_PRIM_LINE_STRIP && prim != GE_PRIM_RECTANGLES;
200
}
201
202
class GPUCommon : public GPUDebugInterface {
203
public:
204
// The constructor might run on the loader thread.
205
GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
206
207
// FinishInitOnMainThread runs on the main thread, of course.
208
virtual void FinishInitOnMainThread() {}
209
210
virtual ~GPUCommon() {}
211
212
Draw::DrawContext *GetDrawContext() {
213
return draw_;
214
}
215
216
virtual void DeviceLost() = 0;
217
virtual void DeviceRestore(Draw::DrawContext *draw) = 0;
218
219
virtual u32 CheckGPUFeatures() const = 0;
220
221
virtual void UpdateCmdInfo() = 0;
222
223
virtual bool IsStarted() {
224
return true;
225
}
226
virtual void Reinitialize();
227
228
virtual void BeginHostFrame();
229
virtual void EndHostFrame();
230
231
void InterruptStart(int listid);
232
void InterruptEnd(int listid);
233
void SyncEnd(GPUSyncType waitType, int listid, bool wokeThreads);
234
void EnableInterrupts(bool enable) {
235
interruptsEnabled_ = enable;
236
}
237
238
virtual void CheckDisplayResized() = 0;
239
virtual void CheckConfigChanged() = 0;
240
241
virtual void NotifyDisplayResized();
242
virtual void NotifyRenderResized();
243
virtual void NotifyConfigChanged();
244
245
void DumpNextFrame();
246
247
virtual void PreExecuteOp(u32 op, u32 diff) {}
248
249
DLResult ProcessDLQueue();
250
251
u32 UpdateStall(int listid, u32 newstall, bool *runList);
252
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head, bool *runList);
253
u32 DequeueList(int listid);
254
virtual int ListSync(int listid, int mode);
255
virtual u32 DrawSync(int mode);
256
int GetStack(int index, u32 stackPtr);
257
virtual bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits);
258
virtual void ResetMatrices();
259
virtual void DoState(PointerWrap &p);
260
bool BusyDrawing();
261
u32 Continue(bool *runList);
262
u32 Break(int mode);
263
264
virtual bool FramebufferDirty() = 0;
265
virtual bool FramebufferReallyDirty() = 0;
266
267
virtual void ReapplyGfxState();
268
269
// Returns true if we should split the call across GE execution.
270
// For example, a debugger is active.
271
bool ShouldSplitOverGe() const;
272
273
uint32_t SetAddrTranslation(uint32_t value) override;
274
uint32_t GetAddrTranslation() override;
275
276
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) = 0;
277
virtual void CopyDisplayToOutput(bool reallyDirty) = 0;
278
virtual bool PresentedThisFrame() const = 0;
279
280
// Invalidate any cached content sourced from the specified range.
281
// If size = -1, invalidate everything.
282
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type) = 0;
283
284
virtual bool PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags = GPUCopyFlag::NONE);
285
virtual bool PerformMemorySet(u32 dest, u8 v, int size);
286
virtual bool PerformReadbackToMemory(u32 dest, int size);
287
virtual bool PerformWriteColorFromMemory(u32 dest, int size);
288
289
virtual void PerformWriteFormattedFromMemory(u32 addr, int size, int width, GEBufferFormat format);
290
virtual bool PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags);
291
292
virtual void ExecuteOp(u32 op, u32 diff) = 0;
293
294
void Execute_OffsetAddr(u32 op, u32 diff);
295
void Execute_Vaddr(u32 op, u32 diff);
296
void Execute_Iaddr(u32 op, u32 diff);
297
void Execute_Origin(u32 op, u32 diff);
298
void Execute_Jump(u32 op, u32 diff);
299
void Execute_BJump(u32 op, u32 diff);
300
void Execute_Call(u32 op, u32 diff);
301
void Execute_Ret(u32 op, u32 diff);
302
void Execute_End(u32 op, u32 diff);
303
304
void Execute_BoundingBox(u32 op, u32 diff);
305
306
void Execute_MorphWeight(u32 op, u32 diff);
307
308
void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);
309
310
void Execute_Unknown(u32 op, u32 diff);
311
312
static int EstimatePerVertexCost();
313
314
void Flush() override;
315
316
#ifdef USE_CRT_DBG
317
#undef new
318
#endif
319
void *operator new(size_t s) {
320
return AllocateAlignedMemory(s, 16);
321
}
322
void operator delete(void *p) {
323
FreeAlignedMemory(p);
324
}
325
#ifdef USE_CRT_DBG
326
#define new DBG_NEW
327
#endif
328
329
// From GPUDebugInterface.
330
bool GetCurrentDisplayList(DisplayList &list) override;
331
bool GetCurrentDrawAsDebugVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) override;
332
int GetCurrentPrimCount() override;
333
FramebufferManagerCommon *GetFramebufferManagerCommon() override {
334
return nullptr;
335
}
336
337
TextureCacheCommon *GetTextureCacheCommon() override {
338
return nullptr;
339
}
340
341
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override { return std::vector<std::string>(); };
342
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override {
343
return "N/A";
344
}
345
bool DescribeCodePtr(const u8 *ptr, std::string &name) override;
346
347
std::vector<DisplayList> ActiveDisplayLists() override;
348
void ResetListPC(int listID, u32 pc) override;
349
void ResetListStall(int listID, u32 stall) override;
350
void ResetListState(int listID, DisplayListState state) override;
351
352
GPUDebugOp DisassembleOp(u32 pc, u32 op) override;
353
std::vector<GPUDebugOp> DisassembleOpRange(u32 startpc, u32 endpc) override;
354
355
u32 GetRelativeAddress(u32 data) override;
356
u32 GetVertexAddress() override;
357
u32 GetIndexAddress() override;
358
const GPUgstate &GetGState() override;
359
void SetCmdValue(u32 op) override;
360
361
DisplayList* getList(int listid) {
362
return &dls[listid];
363
}
364
365
const std::list<int> &GetDisplayListQueue() override {
366
return dlQueue;
367
}
368
const DisplayList &GetDisplayList(int index) override {
369
return dls[index];
370
}
371
372
s64 GetListTicks(int listid) const {
373
if (listid >= 0 && listid < DisplayListMaxCount) {
374
return dls[listid].waitUntilTicks;
375
}
376
return -1;
377
}
378
379
virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) const {
380
primaryInfo = reportingPrimaryInfo_;
381
fullInfo = reportingFullInfo_;
382
}
383
384
void PSPFrame();
385
386
GPURecord::Recorder *GetRecorder() override {
387
return &recorder_;
388
}
389
GPUBreakpoints *GetBreakpoints() override {
390
return &breakpoints_;
391
}
392
393
void ClearBreakNext() override;
394
void SetBreakNext(GPUDebug::BreakNext next) override;
395
void SetBreakCount(int c, bool relative = false) override;
396
GPUDebug::BreakNext GetBreakNext() const override {
397
return breakNext_;
398
}
399
int GetBreakCount() const override {
400
return breakAtCount_;
401
}
402
bool SetRestrictPrims(std::string_view rule) override;
403
std::string_view GetRestrictPrims() override {
404
return restrictPrimRule_;
405
}
406
407
int PrimsThisFrame() const override {
408
return primsThisFrame_;
409
}
410
int PrimsLastFrame() const override {
411
return primsLastFrame_;
412
}
413
414
void NotifyFlush();
415
416
protected:
417
// While debugging is active, these may block.
418
void NotifyDisplay(u32 framebuf, u32 stride, int format);
419
420
bool NeedsSlowInterpreter() const;
421
GPUDebug::NotifyResult NotifyCommand(u32 pc, GPUBreakpoints *breakpoints);
422
423
virtual void ClearCacheNextFrame() {}
424
425
virtual void CheckRenderResized() {}
426
427
void SetDrawType(DrawType type, GEPrimitiveType prim) {
428
if (type != lastDraw_) {
429
// We always flush when drawing splines/beziers so no need to do so here
430
gstate_c.Dirty(DIRTY_UVSCALEOFFSET | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
431
lastDraw_ = type;
432
}
433
// Prim == RECTANGLES can cause CanUseHardwareTransform to flip, so we need to dirty.
434
// Also, culling may be affected so dirty the raster state.
435
if (IsTrianglePrim(prim) != IsTrianglePrim(lastPrim_)) {
436
Flush();
437
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
438
lastPrim_ = prim;
439
}
440
}
441
442
virtual void CheckDepthUsage(VirtualFramebuffer *vfb) {}
443
virtual void FastRunLoop(DisplayList &list) = 0;
444
445
bool SlowRunLoop(DisplayList &list); // Returns false on breakpoint.
446
void UpdatePC(u32 currentPC, u32 newPC);
447
void UpdateState(GPURunState state);
448
void FastLoadBoneMatrix(u32 target);
449
void FlushImm();
450
void DoBlockTransfer(u32 skipDrawReason);
451
452
// TODO: Unify this. Vulkan and OpenGL are different due to how they buffer data.
453
virtual void FinishDeferred() {}
454
455
void AdvanceVerts(u32 vertType, int count, int bytesRead) {
456
if ((vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
457
const int indexShift = ((vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
458
gstate_c.indexAddr += count << indexShift;
459
} else {
460
gstate_c.vertexAddr += bytesRead;
461
}
462
}
463
464
virtual void BuildReportingInfo() = 0;
465
466
virtual void UpdateMSAALevel(Draw::DrawContext *draw) {}
467
468
enum {
469
DisplayListMaxCount = 64
470
};
471
472
DrawEngineCommon *drawEngineCommon_ = nullptr;
473
474
// TODO: These should live in GPUCommonHW.
475
FramebufferManagerCommon *framebufferManager_ = nullptr;
476
TextureCacheCommon *textureCache_ = nullptr;
477
478
bool flushOnParams_ = true;
479
480
GraphicsContext *gfxCtx_;
481
Draw::DrawContext *draw_;
482
483
typedef std::list<int> DisplayListQueue;
484
485
int nextListID;
486
DisplayList dls[DisplayListMaxCount];
487
DisplayList *currentList;
488
DisplayListQueue dlQueue;
489
490
bool interruptRunning = false;
491
GPURunState gpuState = GPUSTATE_RUNNING;
492
bool isbreak; // This doesn't mean debugger breakpoints.
493
u64 drawCompleteTicks;
494
u64 busyTicks;
495
496
int downcount;
497
u64 startingTicks;
498
u32 cycleLastPC;
499
int cyclesExecuted;
500
501
bool resumingFromDebugBreak_ = false;
502
bool dumpNextFrame_ = false;
503
bool dumpThisFrame_ = false;
504
bool useFastRunLoop_ = false;
505
bool interruptsEnabled_ = false;
506
bool displayResized_ = false;
507
bool renderResized_ = false;
508
bool configChanged_ = false;
509
DrawType lastDraw_ = DRAW_UNKNOWN;
510
GEPrimitiveType lastPrim_ = GE_PRIM_INVALID;
511
512
int vertexCost_ = 0;
513
514
// No idea how big this buffer needs to be.
515
enum {
516
MAX_IMMBUFFER_SIZE = 32,
517
};
518
519
TransformedVertex immBuffer_[MAX_IMMBUFFER_SIZE];
520
int immCount_ = 0;
521
GEPrimitiveType immPrim_ = GE_PRIM_INVALID;
522
uint32_t immFlags_ = 0;
523
bool immFirstSent_ = false;
524
525
uint32_t edramTranslation_ = 0x400;
526
527
// When matrix data overflows, the CPU visible values wrap and bleed between matrices.
528
// But this doesn't actually change the values used by rendering.
529
// The CPU visible values affect the GPU when list contexts are restored.
530
// Note: not maintained by all backends, here for save stating.
531
union {
532
struct {
533
u32 bone[12 * 8];
534
u32 world[12];
535
u32 view[12];
536
u32 proj[16];
537
u32 tgen[12];
538
};
539
u32 all[12 * 8 + 12 + 12 + 16 + 12];
540
} matrixVisible;
541
542
std::string reportingPrimaryInfo_;
543
std::string reportingFullInfo_;
544
545
// Debugging state
546
bool debugRecording_ = false;
547
548
GPURecord::Recorder recorder_;
549
GPUBreakpoints breakpoints_;
550
551
GPUDebug::BreakNext breakNext_ = GPUDebug::BreakNext::NONE;
552
int breakAtCount_ = -1;
553
554
int primsLastFrame_ = 0;
555
int primsThisFrame_ = 0;
556
int thisFlipNum_ = 0;
557
558
bool primAfterDraw_ = false;
559
560
uint32_t skipPcOnce_ = 0;
561
562
std::vector<std::pair<int, int>> restrictPrimRanges_;
563
std::string restrictPrimRule_;
564
565
private:
566
void DoExecuteCall(u32 target);
567
void PopDLQueue();
568
void CheckDrawSync();
569
int GetNextListIndex();
570
};
571
572