Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUCommon.cpp
3185 views
1
#include "ppsspp_config.h"
2
3
#include <algorithm> // std::remove
4
5
#include "Common/Profiler/Profiler.h"
6
7
#include "Common/GraphicsContext.h"
8
#include "Common/LogReporting.h"
9
#include "Common/Math/SIMDHeaders.h"
10
#include "Common/Serialize/Serializer.h"
11
#include "Common/Serialize/SerializeFuncs.h"
12
#include "Common/Serialize/SerializeList.h"
13
#include "Common/TimeUtil.h"
14
#include "GPU/GeDisasm.h"
15
#include "GPU/GPU.h"
16
#include "GPU/GPUCommon.h"
17
#include "GPU/GPUState.h"
18
#include "Core/Config.h"
19
#include "Core/Core.h"
20
#include "Core/CoreTiming.h"
21
#include "Core/Debugger/MemBlockInfo.h"
22
#include "Core/MemMap.h"
23
#include "Core/Reporting.h"
24
#include "Core/HLE/HLE.h"
25
#include "Core/HLE/ErrorCodes.h"
26
#include "Core/HLE/sceKernelMemory.h"
27
#include "Core/HLE/sceKernelInterrupt.h"
28
#include "Core/HLE/sceGe.h"
29
#include "Core/Util/PPGeDraw.h"
30
#include "Core/MemMapHelpers.h"
31
#include "GPU/Common/DrawEngineCommon.h"
32
#include "GPU/Common/FramebufferManagerCommon.h"
33
#include "GPU/Common/TextureCacheCommon.h"
34
#include "GPU/Common/SoftwareTransformCommon.h"
35
#include "GPU/Debugger/Debugger.h"
36
#include "GPU/Debugger/Record.h"
37
#include "GPU/Debugger/Stepping.h"
38
39
bool __KernelIsDispatchEnabled();
40
41
void GPUCommon::Flush() {
42
drawEngineCommon_->Flush();
43
}
44
45
GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
46
gfxCtx_(gfxCtx),
47
draw_(draw)
48
{
49
// This assert failed on GCC x86 32-bit (but not MSVC 32-bit!) before adding the
50
// "padding" field at the end. This is important for save state compatibility.
51
// The compiler was not rounding the struct size up to an 8 byte boundary, which
52
// you'd expect due to the int64 field, but the Linux ABI apparently does not require that.
53
static_assert(sizeof(DisplayList) == 456, "Bad DisplayList size");
54
55
Reinitialize();
56
gstate.Reset();
57
gstate_c.Reset();
58
gpuStats.Reset();
59
60
PPGeSetDrawContext(draw);
61
ResetMatrices();
62
}
63
64
void GPUCommon::BeginHostFrame() {
65
ReapplyGfxState();
66
67
// TODO: Assume config may have changed - maybe move to resize.
68
gstate_c.Dirty(DIRTY_ALL);
69
70
UpdateCmdInfo();
71
72
UpdateMSAALevel(draw_);
73
CheckConfigChanged();
74
CheckDisplayResized();
75
CheckRenderResized();
76
}
77
78
void GPUCommon::EndHostFrame() {
79
// Probably not necessary.
80
if (draw_) {
81
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
82
}
83
}
84
85
void GPUCommon::Reinitialize() {
86
memset(dls, 0, sizeof(dls));
87
for (int i = 0; i < DisplayListMaxCount; ++i) {
88
dls[i].state = PSP_GE_DL_STATE_NONE;
89
dls[i].waitUntilTicks = 0;
90
}
91
92
nextListID = 0;
93
currentList = nullptr;
94
isbreak = false;
95
drawCompleteTicks = 0;
96
busyTicks = 0;
97
interruptsEnabled_ = true;
98
99
if (textureCache_)
100
textureCache_->Clear(true);
101
if (framebufferManager_)
102
framebufferManager_->DestroyAllFBOs();
103
}
104
105
int GPUCommon::EstimatePerVertexCost() {
106
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
107
// runs in parallel with transform.
108
109
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
110
111
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
112
// went too fast and starts doing all the work over again).
113
114
int cost = 20;
115
if (gstate.isLightingEnabled()) {
116
cost += 10;
117
118
for (int i = 0; i < 4; i++) {
119
if (gstate.isLightChanEnabled(i))
120
cost += 7;
121
}
122
}
123
124
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
125
cost += 20;
126
}
127
int morphCount = gstate.getNumMorphWeights();
128
if (morphCount > 1) {
129
cost += 5 * morphCount;
130
}
131
return cost;
132
}
133
134
void GPUCommon::PopDLQueue() {
135
if(!dlQueue.empty()) {
136
dlQueue.pop_front();
137
if(!dlQueue.empty()) {
138
bool running = currentList->state == PSP_GE_DL_STATE_RUNNING;
139
currentList = &dls[dlQueue.front()];
140
if (running)
141
currentList->state = PSP_GE_DL_STATE_RUNNING;
142
} else {
143
currentList = nullptr;
144
}
145
}
146
}
147
148
bool GPUCommon::BusyDrawing() {
149
u32 state = DrawSync(1);
150
if (state == PSP_GE_LIST_DRAWING || state == PSP_GE_LIST_STALLING) {
151
if (currentList && currentList->state != PSP_GE_DL_STATE_PAUSED) {
152
return true;
153
}
154
}
155
return false;
156
}
157
158
void GPUCommon::NotifyConfigChanged() {
159
configChanged_ = true;
160
}
161
162
void GPUCommon::NotifyRenderResized() {
163
renderResized_ = true;
164
}
165
166
void GPUCommon::NotifyDisplayResized() {
167
displayResized_ = true;
168
}
169
170
void GPUCommon::DumpNextFrame() {
171
dumpNextFrame_ = true;
172
}
173
174
u32 GPUCommon::DrawSync(int mode) {
175
gpuStats.numDrawSyncs++;
176
177
if (mode < 0 || mode > 1)
178
return SCE_KERNEL_ERROR_INVALID_MODE;
179
180
if (mode == 0) {
181
if (!__KernelIsDispatchEnabled()) {
182
return SCE_KERNEL_ERROR_CAN_NOT_WAIT;
183
}
184
if (__IsInInterrupt()) {
185
return SCE_KERNEL_ERROR_ILLEGAL_CONTEXT;
186
}
187
188
if (drawCompleteTicks > CoreTiming::GetTicks()) {
189
__GeWaitCurrentThread(GPU_SYNC_DRAW, 1, "GeDrawSync");
190
} else {
191
for (int i = 0; i < DisplayListMaxCount; ++i) {
192
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
193
dls[i].state = PSP_GE_DL_STATE_NONE;
194
}
195
}
196
}
197
return 0;
198
}
199
200
// If there's no current list, it must be complete.
201
DisplayList *top = NULL;
202
for (int i : dlQueue) {
203
if (dls[i].state != PSP_GE_DL_STATE_COMPLETED) {
204
top = &dls[i];
205
break;
206
}
207
}
208
if (!top || top->state == PSP_GE_DL_STATE_COMPLETED)
209
return PSP_GE_LIST_COMPLETED;
210
211
if (currentList->pc == currentList->stall)
212
return PSP_GE_LIST_STALLING;
213
214
return PSP_GE_LIST_DRAWING;
215
}
216
217
void GPUCommon::CheckDrawSync() {
218
if (dlQueue.empty()) {
219
for (int i = 0; i < DisplayListMaxCount; ++i)
220
dls[i].state = PSP_GE_DL_STATE_NONE;
221
}
222
}
223
224
int GPUCommon::ListSync(int listid, int mode) {
225
gpuStats.numListSyncs++;
226
227
if (listid < 0 || listid >= DisplayListMaxCount)
228
return SCE_KERNEL_ERROR_INVALID_ID;
229
230
if (mode < 0 || mode > 1)
231
return SCE_KERNEL_ERROR_INVALID_MODE;
232
233
DisplayList& dl = dls[listid];
234
if (mode == 1) {
235
switch (dl.state) {
236
case PSP_GE_DL_STATE_QUEUED:
237
if (dl.interrupted)
238
return PSP_GE_LIST_PAUSED;
239
return PSP_GE_LIST_QUEUED;
240
241
case PSP_GE_DL_STATE_RUNNING:
242
if (dl.pc == dl.stall)
243
return PSP_GE_LIST_STALLING;
244
return PSP_GE_LIST_DRAWING;
245
246
case PSP_GE_DL_STATE_COMPLETED:
247
return PSP_GE_LIST_COMPLETED;
248
249
case PSP_GE_DL_STATE_PAUSED:
250
return PSP_GE_LIST_PAUSED;
251
252
default:
253
return SCE_KERNEL_ERROR_INVALID_ID;
254
}
255
}
256
257
if (!__KernelIsDispatchEnabled()) {
258
return SCE_KERNEL_ERROR_CAN_NOT_WAIT;
259
}
260
if (__IsInInterrupt()) {
261
return SCE_KERNEL_ERROR_ILLEGAL_CONTEXT;
262
}
263
264
if (dl.waitUntilTicks > CoreTiming::GetTicks()) {
265
__GeWaitCurrentThread(GPU_SYNC_LIST, listid, "GeListSync");
266
}
267
return PSP_GE_LIST_COMPLETED;
268
}
269
270
int GPUCommon::GetStack(int index, u32 stackPtr) {
271
if (!currentList) {
272
// Seems like it doesn't return an error code?
273
return 0;
274
}
275
276
if (currentList->stackptr <= index) {
277
return SCE_KERNEL_ERROR_INVALID_INDEX;
278
}
279
280
if (index >= 0) {
281
auto stack = PSPPointer<u32_le>::Create(stackPtr);
282
if (stack.IsValid()) {
283
auto entry = currentList->stack[index];
284
// Not really sure what most of these values are.
285
stack[0] = 0;
286
stack[1] = entry.pc + 4;
287
stack[2] = entry.offsetAddr;
288
stack[7] = entry.baseAddr;
289
}
290
}
291
292
return currentList->stackptr;
293
}
294
295
static void CopyMatrix24(u32_le *result, const float *mtx, u32 count, u32 cmdbits) {
296
// Screams out for simple SIMD, but probably not called often enough to be worth it.
297
for (u32 i = 0; i < count; ++i) {
298
result[i] = toFloat24(mtx[i]) | cmdbits;
299
}
300
}
301
302
bool GPUCommon::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
303
switch (type) {
304
case GE_MTX_BONE0:
305
case GE_MTX_BONE1:
306
case GE_MTX_BONE2:
307
case GE_MTX_BONE3:
308
case GE_MTX_BONE4:
309
case GE_MTX_BONE5:
310
case GE_MTX_BONE6:
311
case GE_MTX_BONE7:
312
CopyMatrix24(result, gstate.boneMatrix + (type - GE_MTX_BONE0) * 12, 12, cmdbits);
313
break;
314
case GE_MTX_TEXGEN:
315
CopyMatrix24(result, gstate.tgenMatrix, 12, cmdbits);
316
break;
317
case GE_MTX_WORLD:
318
CopyMatrix24(result, gstate.worldMatrix, 12, cmdbits);
319
break;
320
case GE_MTX_VIEW:
321
CopyMatrix24(result, gstate.viewMatrix, 12, cmdbits);
322
break;
323
case GE_MTX_PROJECTION:
324
CopyMatrix24(result, gstate.projMatrix, 16, cmdbits);
325
break;
326
default:
327
return false;
328
}
329
return true;
330
}
331
332
void GPUCommon::ResetMatrices() {
333
// This means we restored a context, so update the visible matrix data.
334
for (size_t i = 0; i < ARRAY_SIZE(gstate.boneMatrix); ++i)
335
matrixVisible.bone[i] = toFloat24(gstate.boneMatrix[i]);
336
for (size_t i = 0; i < ARRAY_SIZE(gstate.worldMatrix); ++i)
337
matrixVisible.world[i] = toFloat24(gstate.worldMatrix[i]);
338
for (size_t i = 0; i < ARRAY_SIZE(gstate.viewMatrix); ++i)
339
matrixVisible.view[i] = toFloat24(gstate.viewMatrix[i]);
340
for (size_t i = 0; i < ARRAY_SIZE(gstate.projMatrix); ++i)
341
matrixVisible.proj[i] = toFloat24(gstate.projMatrix[i]);
342
for (size_t i = 0; i < ARRAY_SIZE(gstate.tgenMatrix); ++i)
343
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);
344
345
// Assume all the matrices changed, so dirty things related to them.
346
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BONE_UNIFORMS);
347
}
348
349
u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head, bool *runList) {
350
*runList = false;
351
352
// TODO Check the stack values in missing arg and ajust the stack depth
353
354
// Check alignment
355
// TODO Check the context and stack alignement too
356
if (((listpc | stall) & 3) != 0 || !Memory::IsValidAddress(listpc)) {
357
ERROR_LOG_REPORT(Log::G3D, "sceGeListEnqueue: invalid address %08x", listpc);
358
return SCE_KERNEL_ERROR_INVALID_POINTER;
359
}
360
361
// If args->size is below 16, it's the old struct without stack info.
362
if (args.IsValid() && args->size >= 16 && args->numStacks >= 256) {
363
ERROR_LOG(Log::G3D, "invalid stack depth %d", args->numStacks);
364
return SCE_KERNEL_ERROR_INVALID_SIZE;
365
}
366
367
int id = -1;
368
u64 currentTicks = CoreTiming::GetTicks();
369
u32 stackAddr = args.IsValid() && args->size >= 16 ? (u32)args->stackAddr : 0;
370
// Check compatibility
371
if (sceKernelGetCompiledSdkVersion() > 0x01FFFFFF) {
372
//numStacks = 0;
373
//stack = NULL;
374
for (int i = 0; i < DisplayListMaxCount; ++i) {
375
if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) {
376
// Logically, if the CPU has not interrupted yet, it hasn't seen the latest pc either.
377
// Exit enqueues right after an END, which fails without ignoring pendingInterrupt lists.
378
if (dls[i].pc == listpc && !dls[i].pendingInterrupt) {
379
ERROR_LOG(Log::G3D, "sceGeListEnqueue: can't enqueue, list address %08X already used", listpc);
380
return 0x80000021;
381
} else if (stackAddr != 0 && dls[i].stackAddr == stackAddr && !dls[i].pendingInterrupt) {
382
ERROR_LOG(Log::G3D, "sceGeListEnqueue: can't enqueue, stack address %08X already used", stackAddr);
383
return 0x80000021;
384
}
385
}
386
}
387
}
388
// TODO Check if list stack dls[i].stack already used then return 0x80000021 as above
389
390
for (int i = 0; i < DisplayListMaxCount; ++i) {
391
int possibleID = (i + nextListID) % DisplayListMaxCount;
392
auto possibleList = dls[possibleID];
393
if (possibleList.pendingInterrupt) {
394
continue;
395
}
396
397
if (possibleList.state == PSP_GE_DL_STATE_NONE) {
398
id = possibleID;
399
break;
400
}
401
if (possibleList.state == PSP_GE_DL_STATE_COMPLETED && possibleList.waitUntilTicks < currentTicks) {
402
id = possibleID;
403
}
404
}
405
if (id < 0) {
406
ERROR_LOG_REPORT(Log::G3D, "No DL ID available to enqueue");
407
for (int i : dlQueue) {
408
DisplayList &dl = dls[i];
409
DEBUG_LOG(Log::G3D, "DisplayList %d status %d pc %08x stall %08x", i, dl.state, dl.pc, dl.stall);
410
}
411
return SCE_KERNEL_ERROR_OUT_OF_MEMORY;
412
}
413
nextListID = id + 1;
414
415
DisplayList &dl = dls[id];
416
dl.id = id;
417
dl.startpc = listpc & 0x0FFFFFFF;
418
dl.pc = listpc & 0x0FFFFFFF;
419
dl.stall = stall & 0x0FFFFFFF;
420
dl.subIntrBase = std::max(subIntrBase, -1);
421
dl.stackptr = 0;
422
dl.signal = PSP_GE_SIGNAL_NONE;
423
dl.interrupted = false;
424
dl.waitUntilTicks = (u64)-1;
425
dl.interruptsEnabled = interruptsEnabled_;
426
dl.started = false;
427
dl.offsetAddr = 0;
428
dl.bboxResult = false;
429
dl.stackAddr = stackAddr;
430
431
if (args.IsValid() && args->context.IsValid())
432
dl.context = args->context;
433
else
434
dl.context = 0;
435
436
if (head) {
437
if (currentList) {
438
if (currentList->state != PSP_GE_DL_STATE_PAUSED)
439
return SCE_KERNEL_ERROR_INVALID_VALUE;
440
currentList->state = PSP_GE_DL_STATE_QUEUED;
441
// Make sure we clear the signal so we don't try to pause it again.
442
currentList->signal = PSP_GE_SIGNAL_NONE;
443
}
444
445
dl.state = PSP_GE_DL_STATE_PAUSED;
446
447
currentList = &dl;
448
dlQueue.push_front(id);
449
} else if (currentList) {
450
dl.state = PSP_GE_DL_STATE_QUEUED;
451
dlQueue.push_back(id);
452
} else {
453
dl.state = PSP_GE_DL_STATE_RUNNING;
454
currentList = &dl;
455
dlQueue.push_front(id);
456
457
drawCompleteTicks = (u64)-1;
458
459
// TODO save context when starting the list if param is set
460
// LATER: Wait, what? Please explain.
461
*runList = true;
462
}
463
return id;
464
}
465
466
u32 GPUCommon::DequeueList(int listid) {
467
if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE)
468
return SCE_KERNEL_ERROR_INVALID_ID;
469
470
auto &dl = dls[listid];
471
if (dl.started)
472
return SCE_KERNEL_ERROR_BUSY;
473
474
dl.state = PSP_GE_DL_STATE_NONE;
475
476
if (listid == dlQueue.front())
477
PopDLQueue();
478
else
479
dlQueue.remove(listid);
480
481
dl.waitUntilTicks = 0;
482
__GeTriggerWait(GPU_SYNC_LIST, listid);
483
484
CheckDrawSync();
485
return 0;
486
}
487
488
u32 GPUCommon::UpdateStall(int listid, u32 newstall, bool *runList) {
489
*runList = false;
490
if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE)
491
return SCE_KERNEL_ERROR_INVALID_ID;
492
auto &dl = dls[listid];
493
if (dl.state == PSP_GE_DL_STATE_COMPLETED)
494
return SCE_KERNEL_ERROR_ALREADY;
495
496
dl.stall = newstall & 0x0FFFFFFF;
497
498
*runList = true;
499
return 0;
500
}
501
502
u32 GPUCommon::Continue(bool *runList) {
503
*runList = false;
504
if (!currentList)
505
return 0;
506
507
if (currentList->state == PSP_GE_DL_STATE_PAUSED)
508
{
509
if (!isbreak) {
510
// TODO: Supposedly this returns SCE_KERNEL_ERROR_BUSY in some case, previously it had
511
// currentList->signal == PSP_GE_SIGNAL_HANDLER_PAUSE, but it doesn't reproduce.
512
513
currentList->state = PSP_GE_DL_STATE_RUNNING;
514
currentList->signal = PSP_GE_SIGNAL_NONE;
515
516
// TODO Restore context of DL is necessary
517
// TODO Restore BASE
518
519
// We have a list now, so it's not complete.
520
drawCompleteTicks = (u64)-1;
521
} else {
522
currentList->state = PSP_GE_DL_STATE_QUEUED;
523
currentList->signal = PSP_GE_SIGNAL_NONE;
524
}
525
}
526
else if (currentList->state == PSP_GE_DL_STATE_RUNNING)
527
{
528
if (sceKernelGetCompiledSdkVersion() >= 0x02000000)
529
return 0x80000020;
530
return -1;
531
}
532
else
533
{
534
if (sceKernelGetCompiledSdkVersion() >= 0x02000000)
535
return 0x80000004; // matches SCE_KERNEL_ERROR_BAD_ARGUMENT but doesn't really seem like it. Maybe that error code is more general.
536
return -1;
537
}
538
539
*runList = true;
540
return 0;
541
}
542
543
u32 GPUCommon::Break(int mode) {
544
if (mode < 0 || mode > 1)
545
return SCE_KERNEL_ERROR_INVALID_MODE;
546
547
if (!currentList)
548
return SCE_KERNEL_ERROR_ALREADY;
549
550
if (mode == 1)
551
{
552
// Clear the queue
553
dlQueue.clear();
554
for (int i = 0; i < DisplayListMaxCount; ++i)
555
{
556
dls[i].state = PSP_GE_DL_STATE_NONE;
557
dls[i].signal = PSP_GE_SIGNAL_NONE;
558
}
559
560
nextListID = 0;
561
currentList = NULL;
562
return 0;
563
}
564
565
if (currentList->state == PSP_GE_DL_STATE_NONE || currentList->state == PSP_GE_DL_STATE_COMPLETED)
566
{
567
if (sceKernelGetCompiledSdkVersion() >= 0x02000000)
568
return 0x80000004;
569
return -1;
570
}
571
572
if (currentList->state == PSP_GE_DL_STATE_PAUSED)
573
{
574
if (sceKernelGetCompiledSdkVersion() > 0x02000010)
575
{
576
if (currentList->signal == PSP_GE_SIGNAL_HANDLER_PAUSE)
577
{
578
ERROR_LOG_REPORT(Log::G3D, "sceGeBreak: can't break signal-pausing list");
579
}
580
else
581
return SCE_KERNEL_ERROR_ALREADY;
582
}
583
return SCE_KERNEL_ERROR_BUSY;
584
}
585
586
if (currentList->state == PSP_GE_DL_STATE_QUEUED)
587
{
588
currentList->state = PSP_GE_DL_STATE_PAUSED;
589
return currentList->id;
590
}
591
592
// TODO Save BASE
593
// TODO Adjust pc to be just before SIGNAL/END
594
595
// TODO: Is this right?
596
if (currentList->signal == PSP_GE_SIGNAL_SYNC)
597
currentList->pc += 8;
598
599
currentList->interrupted = true;
600
currentList->state = PSP_GE_DL_STATE_PAUSED;
601
currentList->signal = PSP_GE_SIGNAL_HANDLER_SUSPEND;
602
isbreak = true;
603
604
return currentList->id;
605
}
606
607
void GPUCommon::PSPFrame() {
608
immCount_ = 0;
609
if (dumpNextFrame_) {
610
NOTICE_LOG(Log::G3D, "DUMPING THIS FRAME");
611
dumpThisFrame_ = true;
612
dumpNextFrame_ = false;
613
} else if (dumpThisFrame_) {
614
dumpThisFrame_ = false;
615
}
616
617
if (breakNext_ == GPUDebug::BreakNext::VSYNC) {
618
// Just start stepping as soon as we can once the vblank finishes.
619
breakNext_ = GPUDebug::BreakNext::OP;
620
}
621
recorder_.NotifyBeginFrame();
622
}
623
624
// Returns false on breakpoint.
625
bool GPUCommon::SlowRunLoop(DisplayList &list) {
626
const bool dumpThisFrame = dumpThisFrame_;
627
while (downcount > 0) {
628
GPUDebug::NotifyResult result = NotifyCommand(list.pc, &breakpoints_);
629
if (result == GPUDebug::NotifyResult::Break) {
630
return false;
631
}
632
633
recorder_.NotifyCommand(list.pc);
634
u32 op = Memory::ReadUnchecked_U32(list.pc);
635
u32 cmd = op >> 24;
636
637
u32 diff = op ^ gstate.cmdmem[cmd];
638
PreExecuteOp(op, diff);
639
if (dumpThisFrame) {
640
char temp[256];
641
u32 prev;
642
if (Memory::IsValidAddress(list.pc - 4)) {
643
prev = Memory::ReadUnchecked_U32(list.pc - 4);
644
} else {
645
prev = 0;
646
}
647
GeDisassembleOp(list.pc, op, prev, temp, 256);
648
NOTICE_LOG(Log::G3D, "%08x: %s", op, temp);
649
}
650
gstate.cmdmem[cmd] = op;
651
652
ExecuteOp(op, diff);
653
654
list.pc += 4;
655
--downcount;
656
}
657
return true;
658
}
659
660
// The newPC parameter is used for jumps, we don't count cycles between.
661
void GPUCommon::UpdatePC(u32 currentPC, u32 newPC) {
662
// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.
663
u32 executed = (currentPC - cycleLastPC) / 4;
664
cyclesExecuted += 2 * executed;
665
cycleLastPC = newPC;
666
667
// Exit the runloop and recalculate things. This happens a lot in some games.
668
if (currentList)
669
downcount = currentList->stall == 0 ? 0x0FFFFFFF : (currentList->stall - newPC) / 4;
670
else
671
downcount = 0;
672
}
673
674
void GPUCommon::ReapplyGfxState() {
675
// The commands are embedded in the command memory so we can just reexecute the words. Convenient.
676
// To be safe we pass 0xFFFFFFFF as the diff.
677
678
// TODO: Consider whether any of this should really be done. We might be able to get all the way
679
// by simplying dirtying the appropriate gstate_c dirty flags.
680
681
for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) {
682
if (i != GE_CMD_ORIGIN && i != GE_CMD_OFFSETADDR) {
683
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
684
}
685
}
686
687
// Can't write to bonematrixnumber here
688
689
for (int i = GE_CMD_MORPHWEIGHT0; i <= GE_CMD_PATCHFACING; i++) {
690
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
691
}
692
693
// There are a few here in the middle that we shouldn't execute...
694
695
// 0x42 to 0xEA
696
for (int i = GE_CMD_VIEWPORTXSCALE; i < GE_CMD_TRANSFERSTART; i++) {
697
switch (i) {
698
case GE_CMD_LOADCLUT:
699
case GE_CMD_TEXSYNC:
700
case GE_CMD_TEXFLUSH:
701
break;
702
default:
703
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
704
break;
705
}
706
}
707
708
// Let's just skip the transfer size stuff, it's just values.
709
}
710
711
uint32_t GPUCommon::SetAddrTranslation(uint32_t value) {
712
std::swap(edramTranslation_, value);
713
return value;
714
}
715
716
uint32_t GPUCommon::GetAddrTranslation() {
717
return edramTranslation_;
718
}
719
720
inline void GPUCommon::UpdateState(GPURunState state) {
721
gpuState = state;
722
if (state != GPUSTATE_RUNNING)
723
downcount = 0;
724
}
725
726
int GPUCommon::GetNextListIndex() {
727
auto iter = dlQueue.begin();
728
if (iter != dlQueue.end()) {
729
return *iter;
730
} else {
731
return -1;
732
}
733
}
734
735
// This is now called when coreState == CORE_RUNNING_GE, in addition to from the various sceGe commands.
736
DLResult GPUCommon::ProcessDLQueue() {
737
if (!resumingFromDebugBreak_) {
738
startingTicks = CoreTiming::GetTicks();
739
cyclesExecuted = 0;
740
741
// ?? Seems to be correct behaviour to process the list anyway?
742
if (startingTicks < busyTicks) {
743
DEBUG_LOG(Log::G3D, "Can't execute a list yet, still busy for %lld ticks", busyTicks - startingTicks);
744
//return;
745
}
746
}
747
748
TimeCollector collectStat(&gpuStats.msProcessingDisplayLists, coreCollectDebugStats);
749
750
for (int listIndex = GetNextListIndex(); listIndex != -1; listIndex = GetNextListIndex()) {
751
DisplayList &list = dls[listIndex];
752
753
if (list.state == PSP_GE_DL_STATE_PAUSED) {
754
return DLResult::Done;
755
}
756
757
// Temporary workaround for Crazy Taxi, see #19894
758
if (list.state == PSP_GE_DL_STATE_NONE) {
759
WARN_LOG(Log::G3D, "Discarding display list with state NONE (pc=%08x). This is odd.", list.pc);
760
dlQueue.erase(std::remove(dlQueue.begin(), dlQueue.end(), listIndex), dlQueue.end());
761
return DLResult::Done;
762
}
763
764
DEBUG_LOG(Log::G3D, "%s DL execution at %08x - stall = %08x (startingTicks=%lld)",
765
list.pc == list.startpc ? "Starting" : "Resuming", list.pc, list.stall, startingTicks);
766
767
if (!resumingFromDebugBreak_) {
768
// TODO: Need to be careful when *resuming* a list (when it wasn't from a stall...)
769
currentList = &list;
770
771
if (!list.started && list.context.IsValid()) {
772
gstate.Save(list.context);
773
}
774
list.started = true;
775
776
gstate_c.offsetAddr = list.offsetAddr;
777
778
if (!Memory::IsValidAddress(list.pc)) {
779
ERROR_LOG(Log::G3D, "DL PC = %08x WTF!!!!", list.pc);
780
return DLResult::Done;
781
}
782
783
cycleLastPC = list.pc;
784
cyclesExecuted += 60;
785
downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;
786
list.state = PSP_GE_DL_STATE_RUNNING;
787
list.interrupted = false;
788
789
gpuState = list.pc == list.stall ? GPUSTATE_STALL : GPUSTATE_RUNNING;
790
791
// To enable breakpoints, we don't do fast matrix loads while debugger active.
792
debugRecording_ = recorder_.IsActive();
793
useFastRunLoop_ = !(dumpThisFrame_ || debugRecording_ || NeedsSlowInterpreter() || breakpoints_.HasBreakpoints());
794
} else {
795
resumingFromDebugBreak_ = false;
796
// The bottom part of the gpuState loop below, that wasn't executed
797
// when we bailed.
798
downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;
799
if (gpuState == GPUSTATE_STALL && list.pc != list.stall) {
800
// Unstalled (Can this happen?)
801
gpuState = GPUSTATE_RUNNING;
802
}
803
// Proceed...
804
}
805
806
const bool useFastRunLoop = useFastRunLoop_;
807
808
while (gpuState == GPUSTATE_RUNNING) {
809
if (list.pc == list.stall) {
810
gpuState = GPUSTATE_STALL;
811
downcount = 0;
812
}
813
814
if (useFastRunLoop) {
815
// When no Ge debugger is active, we go full speed.
816
FastRunLoop(list);
817
} else {
818
// When a Ge debugger is active (or similar), we do more checking.
819
if (!SlowRunLoop(list)) {
820
// Hit a breakpoint, so we set the state and bail. We can resume later.
821
// TODO: Cycle counting might need some more care?
822
FinishDeferred();
823
_dbg_assert_(!recorder_.IsActive());
824
825
resumingFromDebugBreak_ = true;
826
return DLResult::DebugBreak;
827
}
828
}
829
830
downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;
831
if (gpuState == GPUSTATE_STALL && list.pc != list.stall) {
832
// Unstalled (Can this happen?)
833
gpuState = GPUSTATE_RUNNING;
834
}
835
}
836
837
FinishDeferred();
838
if (debugRecording_)
839
recorder_.NotifyCPU();
840
841
// We haven't run the op at list.pc, so it shouldn't count.
842
if (cycleLastPC != list.pc) {
843
UpdatePC(list.pc - 4, list.pc);
844
}
845
846
list.offsetAddr = gstate_c.offsetAddr;
847
848
switch (gpuState) {
849
case GPUSTATE_DONE:
850
case GPUSTATE_ERROR:
851
// don't do anything - though dunno about error...
852
break;
853
case GPUSTATE_STALL:
854
// Resume work on this same display list later.
855
return DLResult::Done;
856
default:
857
return DLResult::Error;
858
}
859
860
// Some other list could've taken the spot while we dilly-dallied around, so we need the check.
861
// Yes, this does happen.
862
if (list.state != PSP_GE_DL_STATE_QUEUED) {
863
// At the end, we can remove it from the queue and continue.
864
dlQueue.erase(std::remove(dlQueue.begin(), dlQueue.end(), listIndex), dlQueue.end());
865
}
866
}
867
868
currentList = nullptr;
869
870
if (coreCollectDebugStats) {
871
gpuStats.otherGPUCycles += cyclesExecuted;
872
}
873
874
drawCompleteTicks = startingTicks + cyclesExecuted;
875
busyTicks = std::max(busyTicks, drawCompleteTicks);
876
877
__GeTriggerSync(GPU_SYNC_DRAW, 1, drawCompleteTicks);
878
// Since the event is in CoreTiming, we're in sync. Just set 0 now.
879
return DLResult::Done;
880
}
881
882
bool GPUCommon::ShouldSplitOverGe() const {
883
// Check for debugger active.
884
// We only need to do this if we want to be able to step through Ge display lists using the Ge debuggers.
885
return NeedsSlowInterpreter() || breakpoints_.HasBreakpoints();
886
}
887
888
void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {
889
gstate_c.offsetAddr = op << 8;
890
}
891
892
void GPUCommon::Execute_Vaddr(u32 op, u32 diff) {
893
gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
894
}
895
896
void GPUCommon::Execute_Iaddr(u32 op, u32 diff) {
897
gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
898
}
899
900
void GPUCommon::Execute_Origin(u32 op, u32 diff) {
901
if (currentList)
902
gstate_c.offsetAddr = currentList->pc;
903
}
904
905
void GPUCommon::Execute_Jump(u32 op, u32 diff) {
906
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
907
if (!Memory::IsValidAddress(target)) {
908
ERROR_LOG(Log::G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);
909
UpdateState(GPUSTATE_ERROR);
910
return;
911
}
912
UpdatePC(currentList->pc, target - 4);
913
currentList->pc = target - 4; // pc will be increased after we return, counteract that
914
}
915
916
void GPUCommon::Execute_BJump(u32 op, u32 diff) {
917
if (!currentList->bboxResult) {
918
// bounding box jump.
919
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
920
gpuStats.numBBOXJumps++;
921
if (Memory::IsValidAddress(target)) {
922
UpdatePC(currentList->pc, target - 4);
923
currentList->pc = target - 4; // pc will be increased after we return, counteract that
924
} else {
925
ERROR_LOG(Log::G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);
926
UpdateState(GPUSTATE_ERROR);
927
}
928
}
929
}
930
931
void GPUCommon::Execute_Call(u32 op, u32 diff) {
932
PROFILE_THIS_SCOPE("gpu_call");
933
934
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
935
if (!Memory::IsValidAddress(target)) {
936
ERROR_LOG(Log::G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);
937
if (g_Config.bIgnoreBadMemAccess) {
938
return;
939
}
940
UpdateState(GPUSTATE_ERROR);
941
return;
942
}
943
DoExecuteCall(target);
944
}
945
946
void GPUCommon::DoExecuteCall(u32 target) {
947
// Local variable for better codegen
948
DisplayList *currentList = this->currentList;
949
950
// Bone matrix optimization - many games will CALL a bone matrix (!).
951
// We don't optimize during recording or debugging - so the matrix data gets recorded.
952
if (useFastRunLoop_ && Memory::IsValidRange(target, 13 * 4) && (Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
953
// Check for the end
954
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
955
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
956
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
957
// Yep, pretty sure this is a bone matrix call. Double check stall first.
958
if (target > currentList->stall || target + 12 * 4 < currentList->stall) {
959
FastLoadBoneMatrix(target);
960
return;
961
}
962
}
963
}
964
965
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
966
ERROR_LOG(Log::G3D, "CALL: Stack full!");
967
// TODO: UpdateState(GPUSTATE_ERROR) ?
968
} else {
969
auto &stackEntry = currentList->stack[currentList->stackptr++];
970
stackEntry.pc = currentList->pc + 4;
971
stackEntry.offsetAddr = gstate_c.offsetAddr;
972
// The base address is NOT saved/restored for a regular call.
973
UpdatePC(currentList->pc, target - 4);
974
currentList->pc = target - 4; // pc will be increased after we return, counteract that
975
}
976
}
977
978
void GPUCommon::Execute_Ret(u32 op, u32 diff) {
979
// Local variable for better codegen
980
DisplayList *currentList = this->currentList;
981
if (currentList->stackptr == 0) {
982
DEBUG_LOG(Log::G3D, "RET: Stack empty!");
983
} else {
984
auto &stackEntry = currentList->stack[--currentList->stackptr];
985
gstate_c.offsetAddr = stackEntry.offsetAddr;
986
// We always clear the top (uncached/etc.) bits
987
const u32 target = stackEntry.pc & 0x0FFFFFFF;
988
UpdatePC(currentList->pc, target - 4);
989
currentList->pc = target - 4;
990
#ifdef _DEBUG
991
if (!Memory::IsValidAddress(currentList->pc)) {
992
ERROR_LOG_REPORT(Log::G3D, "Invalid DL PC %08x on return", currentList->pc);
993
UpdateState(GPUSTATE_ERROR);
994
}
995
#endif
996
}
997
}
998
999
void GPUCommon::Execute_End(u32 op, u32 diff) {
1000
if (flushOnParams_) {
1001
drawEngineCommon_->FlushQueuedDepth();
1002
Flush();
1003
}
1004
1005
const u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
1006
UpdatePC(currentList->pc, currentList->pc);
1007
// Count in a few extra cycles on END.
1008
cyclesExecuted += 60;
1009
1010
switch (prev >> 24) {
1011
case GE_CMD_SIGNAL:
1012
{
1013
// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#
1014
SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);
1015
const int signal = prev & 0xFFFF;
1016
const int enddata = op & 0xFFFF;
1017
bool trigger = true;
1018
currentList->subIntrToken = signal;
1019
1020
switch (behaviour) {
1021
case PSP_GE_SIGNAL_HANDLER_SUSPEND:
1022
// Suspend the list, and call the signal handler. When it's done, resume.
1023
// Before sdkver 0x02000010, listsync should return paused.
1024
if (sceKernelGetCompiledSdkVersion() <= 0x02000010)
1025
currentList->state = PSP_GE_DL_STATE_PAUSED;
1026
currentList->signal = behaviour;
1027
DEBUG_LOG(Log::G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);
1028
break;
1029
case PSP_GE_SIGNAL_HANDLER_CONTINUE:
1030
// Resume the list right away, then call the handler.
1031
currentList->signal = behaviour;
1032
DEBUG_LOG(Log::G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);
1033
break;
1034
case PSP_GE_SIGNAL_HANDLER_PAUSE:
1035
// Pause the list instead of ending at the next FINISH.
1036
// Call the handler with the PAUSE signal value at that FINISH.
1037
// Technically, this ought to trigger an interrupt, but it won't do anything.
1038
// But right now, signal is always reset by interrupts, so that causes pause to not work.
1039
trigger = false;
1040
currentList->signal = behaviour;
1041
DEBUG_LOG(Log::G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);
1042
break;
1043
case PSP_GE_SIGNAL_SYNC:
1044
// Acts as a memory barrier, never calls any user code.
1045
// Technically, this ought to trigger an interrupt, but it won't do anything.
1046
// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.
1047
// However, this is likely a bug in how GE signal interrupts are handled.
1048
trigger = false;
1049
currentList->signal = behaviour;
1050
DEBUG_LOG(Log::G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);
1051
break;
1052
case PSP_GE_SIGNAL_JUMP:
1053
case PSP_GE_SIGNAL_RJUMP:
1054
case PSP_GE_SIGNAL_OJUMP:
1055
{
1056
trigger = false;
1057
currentList->signal = behaviour;
1058
// pc will be increased after we return, counteract that.
1059
u32 target = (((signal << 16) | enddata) & 0xFFFFFFFC) - 4;
1060
const char *targetType = "absolute";
1061
if (behaviour == PSP_GE_SIGNAL_RJUMP) {
1062
target += currentList->pc - 4;
1063
targetType = "relative";
1064
} else if (behaviour == PSP_GE_SIGNAL_OJUMP) {
1065
target = gstate_c.getRelativeAddress(target);
1066
targetType = "origin";
1067
}
1068
1069
if (!Memory::IsValidAddress(target)) {
1070
ERROR_LOG_REPORT(Log::G3D, "Signal with Jump (%s): bad address. signal/end: %04x %04x", targetType, signal, enddata);
1071
UpdateState(GPUSTATE_ERROR);
1072
} else {
1073
UpdatePC(currentList->pc, target);
1074
currentList->pc = target;
1075
DEBUG_LOG(Log::G3D, "Signal with Jump (%s). signal/end: %04x %04x", targetType, signal, enddata);
1076
}
1077
}
1078
break;
1079
case PSP_GE_SIGNAL_CALL:
1080
case PSP_GE_SIGNAL_RCALL:
1081
case PSP_GE_SIGNAL_OCALL:
1082
{
1083
trigger = false;
1084
currentList->signal = behaviour;
1085
// pc will be increased after we return, counteract that.
1086
u32 target = (((signal << 16) | enddata) & 0xFFFFFFFC) - 4;
1087
const char *targetType = "absolute";
1088
if (behaviour == PSP_GE_SIGNAL_RCALL) {
1089
target += currentList->pc - 4;
1090
targetType = "relative";
1091
} else if (behaviour == PSP_GE_SIGNAL_OCALL) {
1092
target = gstate_c.getRelativeAddress(target);
1093
targetType = "origin";
1094
}
1095
1096
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
1097
ERROR_LOG_REPORT(Log::G3D, "Signal with Call (%s): stack full. signal/end: %04x %04x", targetType, signal, enddata);
1098
} else if (!Memory::IsValidAddress(target)) {
1099
ERROR_LOG_REPORT(Log::G3D, "Signal with Call (%s): bad address. signal/end: %04x %04x", targetType, signal, enddata);
1100
UpdateState(GPUSTATE_ERROR);
1101
} else {
1102
// TODO: This might save/restore other state...
1103
auto &stackEntry = currentList->stack[currentList->stackptr++];
1104
stackEntry.pc = currentList->pc;
1105
stackEntry.offsetAddr = gstate_c.offsetAddr;
1106
stackEntry.baseAddr = gstate.base;
1107
UpdatePC(currentList->pc, target);
1108
currentList->pc = target;
1109
DEBUG_LOG(Log::G3D, "Signal with Call (%s). signal/end: %04x %04x", targetType, signal, enddata);
1110
}
1111
}
1112
break;
1113
case PSP_GE_SIGNAL_RET:
1114
{
1115
trigger = false;
1116
currentList->signal = behaviour;
1117
if (currentList->stackptr == 0) {
1118
ERROR_LOG_REPORT(Log::G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);
1119
} else {
1120
// TODO: This might save/restore other state...
1121
auto &stackEntry = currentList->stack[--currentList->stackptr];
1122
gstate_c.offsetAddr = stackEntry.offsetAddr;
1123
gstate.base = stackEntry.baseAddr;
1124
UpdatePC(currentList->pc, stackEntry.pc);
1125
currentList->pc = stackEntry.pc;
1126
DEBUG_LOG(Log::G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);
1127
}
1128
}
1129
break;
1130
default:
1131
ERROR_LOG_REPORT(Log::G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);
1132
break;
1133
}
1134
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
1135
if (currentList->interruptsEnabled && trigger) {
1136
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
1137
currentList->pendingInterrupt = true;
1138
UpdateState(GPUSTATE_INTERRUPT);
1139
}
1140
}
1141
}
1142
break;
1143
case GE_CMD_FINISH:
1144
switch (currentList->signal) {
1145
case PSP_GE_SIGNAL_HANDLER_PAUSE:
1146
currentList->state = PSP_GE_DL_STATE_PAUSED;
1147
if (currentList->interruptsEnabled) {
1148
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
1149
currentList->pendingInterrupt = true;
1150
UpdateState(GPUSTATE_INTERRUPT);
1151
}
1152
}
1153
break;
1154
1155
case PSP_GE_SIGNAL_SYNC:
1156
currentList->signal = PSP_GE_SIGNAL_NONE;
1157
// TODO: Technically this should still cause an interrupt. Probably for memory sync.
1158
break;
1159
1160
default:
1161
FlushImm();
1162
currentList->subIntrToken = prev & 0xFFFF;
1163
UpdateState(GPUSTATE_DONE);
1164
// Since we marked done, we have to restore the context now before the next list runs.
1165
if (currentList->started && currentList->context.IsValid()) {
1166
gstate.Restore(currentList->context);
1167
ReapplyGfxState();
1168
// Don't restore the context again.
1169
currentList->started = false;
1170
}
1171
1172
if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
1173
currentList->pendingInterrupt = true;
1174
} else {
1175
currentList->state = PSP_GE_DL_STATE_COMPLETED;
1176
currentList->waitUntilTicks = startingTicks + cyclesExecuted;
1177
busyTicks = std::max(busyTicks, currentList->waitUntilTicks);
1178
__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitUntilTicks);
1179
}
1180
break;
1181
}
1182
break;
1183
default:
1184
DEBUG_LOG(Log::G3D, "END: Not finished: %06x", prev & 0xFFFFFF);
1185
break;
1186
}
1187
}
1188
1189
void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
1190
// Just resetting, nothing to check bounds for.
1191
const u32 count = op & 0xFFFF;
1192
if (count == 0) {
1193
currentList->bboxResult = false;
1194
return;
1195
}
1196
1197
// Approximate based on timings of several counts on a PSP.
1198
cyclesExecuted += count * 22;
1199
1200
const u32 vertType = gstate.vertType;
1201
1202
const bool useInds = (vertType & GE_VTYPE_IDX_MASK) != 0;
1203
const VertexDecoder *dec = drawEngineCommon_->GetVertexDecoder(vertType);
1204
int bytesRead = (useInds ? 1 : dec->VertexSize()) * count;
1205
1206
if (!Memory::IsValidRange(gstate_c.vertexAddr, bytesRead)) {
1207
ERROR_LOG_REPORT_ONCE(boundingbox, Log::G3D, "Bad bounding box data: %06x", count);
1208
// Data seems invalid. Let's assume the box test passed.
1209
currentList->bboxResult = true;
1210
return;
1211
}
1212
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr); // we checked the range above.
1213
1214
const void *inds = nullptr;
1215
if (useInds) {
1216
const int indexSizeShift = ((vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
1217
if (!Memory::IsValidRange(gstate_c.indexAddr, count << indexSizeShift)) {
1218
ERROR_LOG_REPORT_ONCE(boundingboxInds, Log::G3D, "Invalid inds in bounding box check");
1219
currentList->bboxResult = true;
1220
return;
1221
}
1222
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
1223
}
1224
1225
// Test if the bounding box is within the drawing region.
1226
// The PSP only seems to vary the result based on a single range of 0x100.
1227
if (count > 0x200) {
1228
// The second to last set of 0x100 is checked (even for odd counts.)
1229
size_t skipSize = (count - 0x200) * dec->VertexSize();
1230
currentList->bboxResult = drawEngineCommon_->TestBoundingBox((const uint8_t *)control_points + skipSize, inds, 0x100, dec, vertType);
1231
} else if (count > 0x100) {
1232
int checkSize = count - 0x100;
1233
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, checkSize, dec, vertType);
1234
} else {
1235
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, count, dec, vertType);
1236
}
1237
AdvanceVerts(gstate.vertType, count, bytesRead);
1238
}
1239
1240
void GPUCommon::Execute_MorphWeight(u32 op, u32 diff) {
1241
gstate_c.morphWeights[(op >> 24) - GE_CMD_MORPHWEIGHT0] = getFloat24(op);
1242
}
1243
1244
void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
1245
// Safety check.
1246
if (immCount_ >= MAX_IMMBUFFER_SIZE) {
1247
// Only print once for each overrun.
1248
if (immCount_ == MAX_IMMBUFFER_SIZE) {
1249
ERROR_LOG_REPORT_ONCE(exceed_imm_buffer, Log::G3D, "Exceeded immediate draw buffer size. gstate.imm_ap=%06x , prim=%d", gstate.imm_ap & 0xFFFFFF, (int)immPrim_);
1250
}
1251
if (immCount_ < 0x7fffffff) // Paranoia :)
1252
immCount_++;
1253
return;
1254
}
1255
1256
const int prim = (op >> 8) & 0x7;
1257
if (prim != GE_PRIM_KEEP_PREVIOUS) {
1258
// Flush before changing the prim type. Only continue can be used to continue a prim.
1259
FlushImm();
1260
}
1261
1262
TransformedVertex &v = immBuffer_[immCount_++];
1263
1264
// ThrillVille does a clear with this, additional parameters found via tests.
1265
// The current vtype affects how the coordinate is processed.
1266
if (gstate.isModeThrough()) {
1267
v.x = ((int)(gstate.imm_vscx & 0xFFFF) - 0x8000) / 16.0f;
1268
v.y = ((int)(gstate.imm_vscy & 0xFFFF) - 0x8000) / 16.0f;
1269
} else {
1270
int offsetX = gstate.getOffsetX16();
1271
int offsetY = gstate.getOffsetY16();
1272
v.x = ((int)(gstate.imm_vscx & 0xFFFF) - offsetX) / 16.0f;
1273
v.y = ((int)(gstate.imm_vscy & 0xFFFF) - offsetY) / 16.0f;
1274
}
1275
v.z = gstate.imm_vscz & 0xFFFF;
1276
v.pos_w = 1.0f;
1277
v.u = getFloat24(gstate.imm_vtcs);
1278
v.v = getFloat24(gstate.imm_vtct);
1279
v.uv_w = getFloat24(gstate.imm_vtcq);
1280
v.color0_32 = (gstate.imm_cv & 0xFFFFFF) | (gstate.imm_ap << 24);
1281
// TODO: When !gstate.isModeThrough(), direct fog coefficient (0 = entirely fog), ignore fog flag (also GE_IMM_FOG.)
1282
v.fog = (gstate.imm_fc & 0xFF) / 255.0f;
1283
// TODO: Apply if gstate.isUsingSecondaryColor() && !gstate.isModeThrough(), ignore lighting flag.
1284
v.color1_32 = gstate.imm_scv & 0xFFFFFF;
1285
if (prim != GE_PRIM_KEEP_PREVIOUS) {
1286
immPrim_ = (GEPrimitiveType)prim;
1287
// Flags seem to only be respected from the first prim.
1288
immFlags_ = op & 0x00FFF800;
1289
immFirstSent_ = false;
1290
} else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) {
1291
static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 };
1292
// Instead of finding a proper point to flush, we just emit prims when we can.
1293
if (immCount_ == flushPrimCount[immPrim_ & 7])
1294
FlushImm();
1295
} else {
1296
ERROR_LOG_REPORT_ONCE(imm_draw_prim, Log::G3D, "Immediate draw: Unexpected primitive %d at count %d", prim, immCount_);
1297
}
1298
}
1299
1300
void GPUCommon::FlushImm() {
1301
if (immCount_ == 0 || immPrim_ == GE_PRIM_INVALID)
1302
return;
1303
1304
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
1305
// No idea how many cycles to skip, heh.
1306
immCount_ = 0;
1307
return;
1308
}
1309
1310
// Ignore immediate point primitives with a X/Y of 0.
1311
// These are accidentally created when games clear the graphics state.
1312
if (immCount_ == 1 && immPrim_ == GE_PRIM_POINTS && immBuffer_[0].x == 0 && immBuffer_[0].y == 0 && immBuffer_[0].z == 0 && immBuffer_[0].color0_32 == 0) {
1313
immCount_ = 0;
1314
return;
1315
}
1316
1317
SetDrawType(DRAW_PRIM, immPrim_);
1318
1319
gstate_c.UpdateUVScaleOffset();
1320
1321
VirtualFramebuffer *vfb = nullptr;
1322
if (framebufferManager_) {
1323
bool changed;
1324
vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
1325
}
1326
if (vfb) {
1327
CheckDepthUsage(vfb);
1328
}
1329
1330
bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0;
1331
bool prevAntialias = gstate.isAntiAliasEnabled();
1332
bool shading = (immFlags_ & GE_IMM_SHADING) != 0;
1333
bool prevShading = gstate.getShadeMode() == GE_SHADE_GOURAUD;
1334
bool cullEnable = (immFlags_ & GE_IMM_CULLENABLE) != 0;
1335
bool prevCullEnable = gstate.isCullEnabled();
1336
int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0;
1337
bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0;
1338
bool prevTexturing = gstate.isTextureMapEnabled();
1339
bool fog = (immFlags_ & GE_IMM_FOG) != 0;
1340
bool prevFog = gstate.isFogEnabled();
1341
bool dither = (immFlags_ & GE_IMM_DITHER) != 0;
1342
bool prevDither = gstate.isDitherEnabled();
1343
1344
if ((immFlags_ & GE_IMM_CLIPMASK) != 0) {
1345
WARN_LOG_REPORT_ONCE(geimmclipvalue, Log::G3D, "Imm vertex used clip value, flags=%06x", immFlags_);
1346
}
1347
1348
bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither;
1349
changed = changed || prevShading != shading || prevFog != fog;
1350
if (changed) {
1351
Flush();
1352
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias;
1353
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading;
1354
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable;
1355
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing;
1356
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog;
1357
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither;
1358
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
1359
}
1360
1361
drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_);
1362
immCount_ = 0;
1363
immFirstSent_ = true;
1364
1365
if (changed) {
1366
Flush();
1367
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
1368
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
1369
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
1370
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
1371
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog;
1372
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
1373
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
1374
}
1375
}
1376
1377
void GPUCommon::Execute_Unknown(u32 op, u32 diff) {
1378
// Do nothing. We used to report here, but we're confident we have them all so no need to report unknown.
1379
}
1380
1381
void GPUCommon::FastLoadBoneMatrix(u32 target) {
1382
const u32 num = gstate.boneMatrixNumber & 0x7F;
1383
_dbg_assert_msg_(num + 12 <= 96, "FastLoadBoneMatrix would corrupt memory");
1384
const u32 mtxNum = num / 12;
1385
u32 uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
1386
if (num != 12 * mtxNum) {
1387
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
1388
}
1389
1390
if (!g_Config.bSoftwareSkinning) {
1391
if (flushOnParams_) {
1392
Flush();
1393
}
1394
gstate_c.Dirty(uniformsToDirty);
1395
} else {
1396
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
1397
}
1398
gstate.FastLoadBoneMatrix(target);
1399
1400
cyclesExecuted += 2 * 14; // one to reset the counter, 12 to load the matrix, and a return.
1401
1402
if (coreCollectDebugStats) {
1403
gpuStats.otherGPUCycles += 2 * 14;
1404
}
1405
}
1406
1407
struct DisplayList_v1 {
1408
int id;
1409
u32 startpc;
1410
u32 pc;
1411
u32 stall;
1412
DisplayListState state;
1413
SignalBehavior signal;
1414
int subIntrBase;
1415
u16 subIntrToken;
1416
DisplayListStackEntry stack[32];
1417
int stackptr;
1418
bool interrupted;
1419
u64 waitUntilTicks;
1420
bool interruptsEnabled;
1421
bool pendingInterrupt;
1422
bool started;
1423
size_t contextPtr;
1424
u32 offsetAddr;
1425
bool bboxResult;
1426
};
1427
1428
struct DisplayList_v2 {
1429
int id;
1430
u32 startpc;
1431
u32 pc;
1432
u32 stall;
1433
DisplayListState state;
1434
SignalBehavior signal;
1435
int subIntrBase;
1436
u16 subIntrToken;
1437
DisplayListStackEntry stack[32];
1438
int stackptr;
1439
bool interrupted;
1440
u64 waitUntilTicks;
1441
bool interruptsEnabled;
1442
bool pendingInterrupt;
1443
bool started;
1444
PSPPointer<u32_le> context;
1445
u32 offsetAddr;
1446
bool bboxResult;
1447
};
1448
1449
void GPUCommon::DoState(PointerWrap &p) {
1450
auto s = p.Section("GPUCommon", 1, 6);
1451
if (!s)
1452
return;
1453
1454
Do<int>(p, dlQueue);
1455
if (s >= 4) {
1456
DoArray(p, dls, ARRAY_SIZE(dls));
1457
} else if (s >= 3) {
1458
// This may have been saved with or without padding, depending on platform.
1459
// We need to upconvert it to our consistently-padded struct.
1460
static const size_t DisplayList_v3_size = 452;
1461
static const size_t DisplayList_v4_size = 456;
1462
static_assert(DisplayList_v4_size == sizeof(DisplayList), "Make sure to change here when updating DisplayList");
1463
1464
p.DoVoid(&dls[0], DisplayList_v3_size);
1465
dls[0].padding = 0;
1466
1467
const u8 *savedPtr = *p.GetPPtr();
1468
const u32 *savedPtr32 = (const u32 *)savedPtr;
1469
// Here's the trick: the first member (id) is always the same as the index.
1470
// The second member (startpc) is always an address, or 0, never 1. So we can see the padding.
1471
const bool hasPadding = savedPtr32[1] == 1;
1472
if (hasPadding) {
1473
u32 padding;
1474
Do(p, padding);
1475
}
1476
1477
for (size_t i = 1; i < ARRAY_SIZE(dls); ++i) {
1478
p.DoVoid(&dls[i], DisplayList_v3_size);
1479
dls[i].padding = 0;
1480
if (hasPadding) {
1481
u32 padding;
1482
Do(p, padding);
1483
}
1484
}
1485
} else if (s >= 2) {
1486
for (size_t i = 0; i < ARRAY_SIZE(dls); ++i) {
1487
DisplayList_v2 oldDL;
1488
Do(p, oldDL);
1489
// Copy over everything except the last, new member (stackAddr.)
1490
memcpy(&dls[i], &oldDL, sizeof(DisplayList_v2));
1491
dls[i].stackAddr = 0;
1492
}
1493
} else {
1494
// Can only be in read mode here.
1495
for (size_t i = 0; i < ARRAY_SIZE(dls); ++i) {
1496
DisplayList_v1 oldDL;
1497
Do(p, oldDL);
1498
// On 32-bit, they're the same, on 64-bit oldDL is bigger.
1499
memcpy(&dls[i], &oldDL, sizeof(DisplayList_v1));
1500
// Fix the other fields. Let's hope context wasn't important, it was a pointer.
1501
dls[i].context = 0;
1502
dls[i].offsetAddr = oldDL.offsetAddr;
1503
dls[i].bboxResult = oldDL.bboxResult;
1504
dls[i].stackAddr = 0;
1505
}
1506
}
1507
int currentID = 0;
1508
if (currentList != nullptr) {
1509
currentID = (int)(currentList - &dls[0]);
1510
}
1511
Do(p, currentID);
1512
if (currentID == 0) {
1513
currentList = nullptr;
1514
} else {
1515
currentList = &dls[currentID];
1516
}
1517
Do(p, interruptRunning);
1518
Do(p, gpuState);
1519
Do(p, isbreak);
1520
Do(p, drawCompleteTicks);
1521
Do(p, busyTicks);
1522
1523
if (s >= 5) {
1524
Do(p, matrixVisible.all);
1525
}
1526
if (s >= 6) {
1527
Do(p, edramTranslation_);
1528
}
1529
}
1530
1531
void GPUCommon::InterruptStart(int listid) {
1532
interruptRunning = true;
1533
}
1534
1535
void GPUCommon::InterruptEnd(int listid) {
1536
interruptRunning = false;
1537
isbreak = false;
1538
1539
DisplayList &dl = dls[listid];
1540
dl.pendingInterrupt = false;
1541
// TODO: Unless the signal handler could change it?
1542
if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) {
1543
if (dl.started && dl.context.IsValid()) {
1544
gstate.Restore(dl.context);
1545
ReapplyGfxState();
1546
}
1547
dl.waitUntilTicks = 0;
1548
__GeTriggerWait(GPU_SYNC_LIST, listid);
1549
1550
// Make sure the list isn't still queued since it's now completed.
1551
if (!dlQueue.empty()) {
1552
if (listid == dlQueue.front())
1553
PopDLQueue();
1554
else
1555
dlQueue.remove(listid);
1556
}
1557
}
1558
}
1559
1560
// TODO: Maybe cleaner to keep this in GE and trigger the clear directly?
1561
void GPUCommon::SyncEnd(GPUSyncType waitType, int listid, bool wokeThreads) {
1562
if (waitType == GPU_SYNC_DRAW && wokeThreads)
1563
{
1564
for (int i = 0; i < DisplayListMaxCount; ++i) {
1565
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
1566
dls[i].state = PSP_GE_DL_STATE_NONE;
1567
}
1568
}
1569
}
1570
}
1571
1572
bool GPUCommon::GetCurrentDisplayList(DisplayList &list) {
1573
if (!currentList) {
1574
return false;
1575
}
1576
list = *currentList;
1577
return true;
1578
}
1579
1580
int GPUCommon::GetCurrentPrimCount() {
1581
DisplayList list;
1582
if (GetCurrentDisplayList(list)) {
1583
u32 cmd = Memory::Read_U32(list.pc);
1584
if ((cmd >> 24) == GE_CMD_PRIM || (cmd >> 24) == GE_CMD_BOUNDINGBOX) {
1585
return cmd & 0xFFFF;
1586
} else if ((cmd >> 24) == GE_CMD_BEZIER || (cmd >> 24) == GE_CMD_SPLINE) {
1587
u32 u = (cmd & 0x00FF) >> 0;
1588
u32 v = (cmd & 0xFF00) >> 8;
1589
return u * v;
1590
}
1591
return true;
1592
} else {
1593
// Current prim value.
1594
return gstate.cmdmem[GE_CMD_PRIM] & 0xFFFF;
1595
}
1596
}
1597
1598
std::vector<DisplayList> GPUCommon::ActiveDisplayLists() {
1599
std::vector<DisplayList> result;
1600
1601
for (int it : dlQueue) {
1602
result.push_back(dls[it]);
1603
}
1604
1605
return result;
1606
}
1607
1608
void GPUCommon::ResetListPC(int listID, u32 pc) {
1609
if (listID < 0 || listID >= DisplayListMaxCount) {
1610
_dbg_assert_msg_(false, "listID out of range: %d", listID);
1611
return;
1612
}
1613
1614
Reporting::NotifyDebugger();
1615
dls[listID].pc = pc;
1616
downcount = 0;
1617
}
1618
1619
void GPUCommon::ResetListStall(int listID, u32 stall) {
1620
if (listID < 0 || listID >= DisplayListMaxCount) {
1621
_dbg_assert_msg_(false, "listID out of range: %d", listID);
1622
return;
1623
}
1624
1625
Reporting::NotifyDebugger();
1626
dls[listID].stall = stall;
1627
downcount = 0;
1628
}
1629
1630
void GPUCommon::ResetListState(int listID, DisplayListState state) {
1631
if (listID < 0 || listID >= DisplayListMaxCount) {
1632
_dbg_assert_msg_(false, "listID out of range: %d", listID);
1633
return;
1634
}
1635
1636
Reporting::NotifyDebugger();
1637
dls[listID].state = state;
1638
downcount = 0;
1639
}
1640
1641
GPUDebugOp GPUCommon::DisassembleOp(u32 pc, u32 op) {
1642
char buffer[1024];
1643
u32 prev = Memory::IsValidAddress(pc - 4) ? Memory::ReadUnchecked_U32(pc - 4) : 0;
1644
GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));
1645
1646
GPUDebugOp info;
1647
info.pc = pc;
1648
info.cmd = op >> 24;
1649
info.op = op;
1650
info.desc = buffer;
1651
return info;
1652
}
1653
1654
std::vector<GPUDebugOp> GPUCommon::DisassembleOpRange(u32 startpc, u32 endpc) {
1655
char buffer[1024];
1656
std::vector<GPUDebugOp> result;
1657
GPUDebugOp info;
1658
1659
// Don't trigger a pause.
1660
u32 prev = Memory::IsValidAddress(startpc - 4) ? Memory::Read_U32(startpc - 4) : 0;
1661
result.reserve((endpc - startpc) / 4);
1662
for (u32 pc = startpc; pc < endpc; pc += 4) {
1663
u32 op = Memory::IsValidAddress(pc) ? Memory::Read_U32(pc) : 0;
1664
GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));
1665
prev = op;
1666
1667
info.pc = pc;
1668
info.cmd = op >> 24;
1669
info.op = op;
1670
info.desc = buffer;
1671
result.push_back(info);
1672
}
1673
return result;
1674
}
1675
1676
u32 GPUCommon::GetRelativeAddress(u32 data) {
1677
return gstate_c.getRelativeAddress(data);
1678
}
1679
1680
u32 GPUCommon::GetVertexAddress() {
1681
return gstate_c.vertexAddr;
1682
}
1683
1684
u32 GPUCommon::GetIndexAddress() {
1685
return gstate_c.indexAddr;
1686
}
1687
1688
const GPUgstate &GPUCommon::GetGState() {
1689
return gstate;
1690
}
1691
1692
void GPUCommon::SetCmdValue(u32 op) {
1693
u32 cmd = op >> 24;
1694
u32 diff = op ^ gstate.cmdmem[cmd];
1695
1696
Reporting::NotifyDebugger();
1697
PreExecuteOp(op, diff);
1698
gstate.cmdmem[cmd] = op;
1699
ExecuteOp(op, diff);
1700
downcount = 0;
1701
}
1702
1703
void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
1704
u32 srcBasePtr = gstate.getTransferSrcAddress();
1705
u32 srcStride = gstate.getTransferSrcStride();
1706
1707
u32 dstBasePtr = gstate.getTransferDstAddress();
1708
u32 dstStride = gstate.getTransferDstStride();
1709
1710
int srcX = gstate.getTransferSrcX();
1711
int srcY = gstate.getTransferSrcY();
1712
1713
int dstX = gstate.getTransferDstX();
1714
int dstY = gstate.getTransferDstY();
1715
1716
int width = gstate.getTransferWidth();
1717
int height = gstate.getTransferHeight();
1718
1719
int bpp = gstate.getTransferBpp();
1720
1721
DEBUG_LOG(Log::G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
1722
gpuStats.numBlockTransfers++;
1723
1724
// For VRAM, we wrap around when outside valid memory (mirrors still work.)
1725
if ((srcBasePtr & 0x04800000) == 0x04800000)
1726
srcBasePtr &= ~0x00800000;
1727
if ((dstBasePtr & 0x04800000) == 0x04800000)
1728
dstBasePtr &= ~0x00800000;
1729
1730
// Use height less one to account for width, which can be greater or less than stride, and then add it on for the last line.
1731
// NOTE: The sizes are only used for validity checks and memory info tracking.
1732
const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;
1733
const uint32_t dst = dstBasePtr + (dstY * dstStride + dstX) * bpp;
1734
const uint32_t srcSize = ((height - 1) * srcStride) + width * bpp;
1735
const uint32_t dstSize = ((height - 1) * dstStride) + width * bpp;
1736
1737
bool srcDstOverlap = src + srcSize > dst && dst + dstSize > src;
1738
bool srcValid = Memory::IsValidRange(src, srcSize);
1739
bool dstValid = Memory::IsValidRange(dst, dstSize);
1740
bool srcWraps = Memory::IsVRAMAddress(srcBasePtr) && !srcValid;
1741
bool dstWraps = Memory::IsVRAMAddress(dstBasePtr) && !dstValid;
1742
1743
char tag[128];
1744
size_t tagSize = 0;
1745
1746
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
1747
if (!framebufferManager_ || !framebufferManager_->NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason)) {
1748
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
1749
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
1750
// entirely by walking a couple of pointers...
1751
1752
// Simple case: just a straight copy, no overlap or wrapping.
1753
if (srcStride == dstStride && (u32)width == srcStride && !srcDstOverlap && srcValid && dstValid) {
1754
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
1755
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
1756
u32 bytesToCopy = width * height * bpp;
1757
1758
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
1759
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
1760
memcpy(dstp, srcp, bytesToCopy);
1761
1762
if (MemBlockInfoDetailed(bytesToCopy)) {
1763
NotifyMemInfoCopy(dst, src, bytesToCopy, "GPUBlockTransfer/");
1764
}
1765
} else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) {
1766
// This path means we have either src/dst overlap, OR one or both of src and dst wrap.
1767
// This should be uncommon so it's the slowest path.
1768
u32 bytesToCopy = width * bpp;
1769
bool notifyDetail = MemBlockInfoDetailed(srcWraps || dstWraps ? 64 : bytesToCopy);
1770
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
1771
if (notifyDetail || notifyAll) {
1772
tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);
1773
}
1774
1775
auto notifyingMemmove = [&](u32 d, u32 s, u32 sz) {
1776
const u8 *srcp = Memory::GetPointer(s);
1777
u8 *dstp = Memory::GetPointerWrite(d);
1778
memmove(dstp, srcp, sz);
1779
1780
if (notifyDetail) {
1781
NotifyMemInfo(MemBlockFlags::READ, s, sz, tag, tagSize);
1782
NotifyMemInfo(MemBlockFlags::WRITE, d, sz, tag, tagSize);
1783
}
1784
};
1785
1786
for (int y = 0; y < height; y++) {
1787
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
1788
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
1789
// If we already passed a wrap, we can use the quicker path.
1790
if ((srcLineStartAddr & 0x04800000) == 0x04800000)
1791
srcLineStartAddr &= ~0x00800000;
1792
if ((dstLineStartAddr & 0x04800000) == 0x04800000)
1793
dstLineStartAddr &= ~0x00800000;
1794
// These flags mean there's a wrap inside this line.
1795
bool srcLineWrap = !Memory::IsValidRange(srcLineStartAddr, bytesToCopy);
1796
bool dstLineWrap = !Memory::IsValidRange(dstLineStartAddr, bytesToCopy);
1797
1798
if (!srcLineWrap && !dstLineWrap) {
1799
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
1800
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
1801
for (u32 i = 0; i < bytesToCopy; i += 64) {
1802
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
1803
memmove(dstp + i, srcp + i, chunk);
1804
}
1805
1806
// If we're tracking detail, it's useful to have the gaps illustrated properly.
1807
if (notifyDetail) {
1808
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag, tagSize);
1809
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag, tagSize);
1810
}
1811
} else {
1812
// We can wrap at any point, so along with overlap this gets a bit complicated.
1813
// We're just going to do this the slow and easy way.
1814
u32 srcLinePos = srcLineStartAddr;
1815
u32 dstLinePos = dstLineStartAddr;
1816
for (u32 i = 0; i < bytesToCopy; i += 64) {
1817
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
1818
u32 srcValid = Memory::ValidSize(srcLinePos, chunk);
1819
u32 dstValid = Memory::ValidSize(dstLinePos, chunk);
1820
1821
// First chunk, for which both are valid.
1822
u32 bothSize = std::min(srcValid, dstValid);
1823
if (bothSize != 0)
1824
notifyingMemmove(dstLinePos, srcLinePos, bothSize);
1825
1826
// Now, whichever side has more valid (or the rest, if only one side must wrap.)
1827
u32 exclusiveSize = std::max(srcValid, dstValid) - bothSize;
1828
if (exclusiveSize != 0 && srcValid >= dstValid) {
1829
notifyingMemmove(PSP_GetVidMemBase(), srcLineStartAddr + bothSize, exclusiveSize);
1830
} else if (exclusiveSize != 0 && srcValid < dstValid) {
1831
notifyingMemmove(dstLineStartAddr + bothSize, PSP_GetVidMemBase(), exclusiveSize);
1832
}
1833
1834
// Finally, if both src and dst wrapped, that portion.
1835
u32 wrappedSize = chunk - bothSize - exclusiveSize;
1836
if (wrappedSize != 0 && srcValid >= dstValid) {
1837
notifyingMemmove(PSP_GetVidMemBase() + exclusiveSize, PSP_GetVidMemBase(), wrappedSize);
1838
} else if (wrappedSize != 0 && srcValid < dstValid) {
1839
notifyingMemmove(PSP_GetVidMemBase(), PSP_GetVidMemBase() + exclusiveSize, wrappedSize);
1840
}
1841
1842
srcLinePos += chunk;
1843
dstLinePos += chunk;
1844
if ((srcLinePos & 0x04800000) == 0x04800000)
1845
srcLinePos &= ~0x00800000;
1846
if ((dstLinePos & 0x04800000) == 0x04800000)
1847
dstLinePos &= ~0x00800000;
1848
}
1849
}
1850
}
1851
1852
if (notifyAll) {
1853
if (srcWraps) {
1854
u32 validSize = Memory::ValidSize(src, srcSize);
1855
NotifyMemInfo(MemBlockFlags::READ, src, validSize, tag, tagSize);
1856
NotifyMemInfo(MemBlockFlags::READ, PSP_GetVidMemBase(), srcSize - validSize, tag, tagSize);
1857
} else {
1858
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);
1859
}
1860
if (dstWraps) {
1861
u32 validSize = Memory::ValidSize(dst, dstSize);
1862
NotifyMemInfo(MemBlockFlags::WRITE, dst, validSize, tag, tagSize);
1863
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetVidMemBase(), dstSize - validSize, tag, tagSize);
1864
} else {
1865
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);
1866
}
1867
}
1868
} else if (srcValid && dstValid) {
1869
u32 bytesToCopy = width * bpp;
1870
bool notifyDetail = MemBlockInfoDetailed(bytesToCopy);
1871
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
1872
if (notifyDetail || notifyAll) {
1873
tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);
1874
}
1875
1876
for (int y = 0; y < height; y++) {
1877
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
1878
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
1879
1880
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
1881
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
1882
memcpy(dstp, srcp, bytesToCopy);
1883
1884
// If we're tracking detail, it's useful to have the gaps illustrated properly.
1885
if (notifyDetail) {
1886
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag, tagSize);
1887
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag, tagSize);
1888
}
1889
}
1890
1891
if (notifyAll) {
1892
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);
1893
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);
1894
}
1895
} else {
1896
// This seems to cause the GE to require a break/reset on a PSP.
1897
// TODO: Handle that and figure out which bytes are still copied?
1898
ERROR_LOG_REPORT_ONCE(invalidtransfer, Log::G3D, "Block transfer invalid: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
1899
}
1900
1901
if (framebufferManager_) {
1902
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
1903
textureCache_->Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
1904
framebufferManager_->NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason);
1905
}
1906
}
1907
1908
// TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.
1909
cyclesExecuted += ((height * width * bpp) * 16) / 10;
1910
}
1911
1912
bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) {
1913
/*
1914
// TODO: Should add this. But let's do it after the 1.18 release.
1915
if (dest == 0 || src == 0) {
1916
_dbg_assert_msg_(false, "Bad PerformMemoryCopy: %08x -> %08x, size %d (flag: %d)", src, dest, size, (int)flags);
1917
return false;
1918
}
1919
*/
1920
if (size == 0) {
1921
_dbg_assert_msg_(false, "Zero-sized PerformMemoryCopy: %08x -> %08x, size %d (flag: %d)", src, dest, size, (int)flags);
1922
// Let's not ignore this yet but if we hit this, we should investigate.
1923
}
1924
1925
// Track stray copies of a framebuffer in RAM. MotoGP does this.
1926
if (framebufferManager_->MayIntersectFramebufferColor(src) || framebufferManager_->MayIntersectFramebufferColor(dest)) {
1927
if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, flags, gstate_c.skipDrawReason)) {
1928
// We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory.
1929
// Since they're identical we don't need to copy.
1930
if (dest != src) {
1931
if (Memory::IsValidRange(dest, size) && Memory::IsValidRange(src, size)) {
1932
memcpy(Memory::GetPointerWriteUnchecked(dest), Memory::GetPointerUnchecked(src), size);
1933
}
1934
if (MemBlockInfoDetailed(size)) {
1935
NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");
1936
}
1937
}
1938
}
1939
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1940
return true;
1941
}
1942
1943
if (MemBlockInfoDetailed(size)) {
1944
NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");
1945
}
1946
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1947
if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED))
1948
recorder_.NotifyMemcpy(dest, src, size);
1949
return false;
1950
}
1951
1952
bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) {
1953
// This may indicate a memset, usually to 0, of a framebuffer.
1954
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
1955
Memory::Memset(dest, v, size, "GPUMemset");
1956
if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, GPUCopyFlag::MEMSET, gstate_c.skipDrawReason)) {
1957
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1958
}
1959
return true;
1960
}
1961
1962
NotifyMemInfo(MemBlockFlags::WRITE, dest, size, "GPUMemset");
1963
// Or perhaps a texture, let's invalidate.
1964
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1965
recorder_.NotifyMemset(dest, v, size);
1966
return false;
1967
}
1968
1969
bool GPUCommon::PerformReadbackToMemory(u32 dest, int size) {
1970
if (Memory::IsVRAMAddress(dest)) {
1971
return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_DST_MATCH_MEM);
1972
}
1973
return false;
1974
}
1975
1976
bool GPUCommon::PerformWriteColorFromMemory(u32 dest, int size) {
1977
if (Memory::IsVRAMAddress(dest)) {
1978
recorder_.NotifyUpload(dest, size);
1979
return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::DEBUG_NOTIFIED);
1980
}
1981
return false;
1982
}
1983
1984
void GPUCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int frameWidth, GEBufferFormat format) {
1985
if (Memory::IsVRAMAddress(addr)) {
1986
framebufferManager_->PerformWriteFormattedFromMemory(addr, size, frameWidth, format);
1987
}
1988
textureCache_->NotifyWriteFormattedFromMemory(addr, size, frameWidth, format);
1989
InvalidateCache(addr, size, GPU_INVALIDATE_SAFE);
1990
}
1991
1992
bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags) {
1993
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
1994
framebufferManager_->PerformWriteStencilFromMemory(dest, size, flags);
1995
return true;
1996
}
1997
return false;
1998
}
1999
2000
bool GPUCommon::GetCurrentDrawAsDebugVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
2001
gstate_c.UpdateUVScaleOffset();
2002
return ::GetCurrentDrawAsDebugVertices(drawEngineCommon_, count, vertices, indices);
2003
}
2004
2005
bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {
2006
// The only part of GPU emulation (other than software) that jits is the vertex decoder, currently,
2007
// which is owned by the drawengine.
2008
return drawEngineCommon_->DescribeCodePtr(ptr, name);
2009
}
2010
2011
bool GPUCommon::NeedsSlowInterpreter() const {
2012
return breakNext_ != GPUDebug::BreakNext::NONE;
2013
}
2014
2015
void GPUCommon::ClearBreakNext() {
2016
breakNext_ = GPUDebug::BreakNext::NONE;
2017
breakAtCount_ = -1;
2018
GPUStepping::ResumeFromStepping();
2019
}
2020
2021
void GPUCommon::SetBreakNext(GPUDebug::BreakNext next) {
2022
breakNext_ = next;
2023
breakAtCount_ = -1;
2024
switch (next) {
2025
case GPUDebug::BreakNext::TEX:
2026
breakpoints_.AddTextureChangeTempBreakpoint();
2027
break;
2028
case GPUDebug::BreakNext::PRIM:
2029
case GPUDebug::BreakNext::COUNT:
2030
breakpoints_.AddCmdBreakpoint(GE_CMD_PRIM, true);
2031
breakpoints_.AddCmdBreakpoint(GE_CMD_BEZIER, true);
2032
breakpoints_.AddCmdBreakpoint(GE_CMD_SPLINE, true);
2033
breakpoints_.AddCmdBreakpoint(GE_CMD_VAP, true);
2034
breakpoints_.AddCmdBreakpoint(GE_CMD_TRANSFERSTART, true); // We count block transfers as prims, too.
2035
break;
2036
case GPUDebug::BreakNext::CURVE:
2037
breakpoints_.AddCmdBreakpoint(GE_CMD_BEZIER, true);
2038
breakpoints_.AddCmdBreakpoint(GE_CMD_SPLINE, true);
2039
break;
2040
case GPUDebug::BreakNext::DRAW:
2041
// This is now handled by switching to BreakNext::PRIM when we encounter a flush.
2042
// This will take us to the following actual draw.
2043
primAfterDraw_ = true;
2044
break;
2045
case GPUDebug::BreakNext::BLOCK_TRANSFER:
2046
breakpoints_.AddCmdBreakpoint(GE_CMD_TRANSFERSTART, true);
2047
break;
2048
default:
2049
break;
2050
}
2051
2052
if (GPUStepping::IsStepping()) {
2053
GPUStepping::ResumeFromStepping();
2054
}
2055
}
2056
2057
void GPUCommon::SetBreakCount(int c, bool relative) {
2058
if (relative) {
2059
breakAtCount_ = primsThisFrame_ + c;
2060
} else {
2061
breakAtCount_ = c;
2062
}
2063
}
2064
2065
GPUDebug::NotifyResult GPUCommon::NotifyCommand(u32 pc, GPUBreakpoints *breakpoints) {
2066
using namespace GPUDebug;
2067
2068
u32 op = Memory::ReadUnchecked_U32(pc);
2069
u32 cmd = op >> 24;
2070
if (thisFlipNum_ != gpuStats.numFlips) {
2071
primsLastFrame_ = primsThisFrame_;
2072
primsThisFrame_ = 0;
2073
thisFlipNum_ = gpuStats.numFlips;
2074
}
2075
2076
bool isPrim = false;
2077
2078
bool process = true; // Process is only for the restrictPrimRanges functionality
2079
if (cmd == GE_CMD_PRIM || cmd == GE_CMD_BEZIER || cmd == GE_CMD_SPLINE || cmd == GE_CMD_VAP || cmd == GE_CMD_TRANSFERSTART) { // VAP is immediate mode prims.
2080
isPrim = true;
2081
primsThisFrame_++;
2082
2083
// TODO: Should restricted prim ranges also avoid breakpoints?
2084
2085
if (!restrictPrimRanges_.empty()) {
2086
process = false;
2087
for (const auto &range : restrictPrimRanges_) {
2088
if ((primsThisFrame_ + 1) >= range.first && (primsThisFrame_ + 1) <= range.second) {
2089
process = true;
2090
break;
2091
}
2092
}
2093
}
2094
}
2095
2096
bool debugBreak = false;
2097
if (breakNext_ == BreakNext::OP) {
2098
debugBreak = true;
2099
} else if (breakNext_ == BreakNext::COUNT) {
2100
debugBreak = primsThisFrame_ == breakAtCount_;
2101
} else if (breakpoints->HasBreakpoints()) {
2102
debugBreak = breakpoints->IsBreakpoint(pc, op);
2103
}
2104
2105
if (debugBreak && pc == skipPcOnce_) {
2106
INFO_LOG(Log::GeDebugger, "Skipping GE break at %08x (last break was here)", skipPcOnce_);
2107
skipPcOnce_ = 0;
2108
if (isPrim)
2109
primsThisFrame_--; // Compensate for the wrong increment above - we didn't run anything.
2110
return process ? NotifyResult::Execute : NotifyResult::Skip;
2111
}
2112
skipPcOnce_ = 0;
2113
2114
if (debugBreak) {
2115
breakpoints->ClearTempBreakpoints();
2116
2117
u32 op = Memory::Read_U32(pc);
2118
auto info = DisassembleOp(pc, op);
2119
NOTICE_LOG(Log::GeDebugger, "Waiting at %08x, %s", pc, info.desc.c_str());
2120
2121
skipPcOnce_ = pc;
2122
breakNext_ = BreakNext::NONE;
2123
// Not incrementing the prim counter!
2124
return NotifyResult::Break; // caller will call GPUStepping::EnterStepping().
2125
}
2126
2127
return process ? NotifyResult::Execute : NotifyResult::Skip;
2128
}
2129
2130
void GPUCommon::NotifyFlush() {
2131
using namespace GPUDebug;
2132
if (breakNext_ == BreakNext::DRAW && !GPUStepping::IsStepping()) {
2133
// Break on the first PRIM after a flush.
2134
if (primAfterDraw_) {
2135
NOTICE_LOG(Log::GeDebugger, "Flush detected, breaking at next PRIM");
2136
primAfterDraw_ = false;
2137
2138
// We've got one to rewind.
2139
primsThisFrame_--;
2140
2141
// Switch to PRIM mode.
2142
SetBreakNext(BreakNext::PRIM);
2143
}
2144
}
2145
}
2146
2147
void GPUCommon::NotifyDisplay(u32 framebuf, u32 stride, int format) {
2148
using namespace GPUDebug;
2149
if (breakNext_ == BreakNext::FRAME) {
2150
// Start stepping at the first op of the new frame.
2151
breakNext_ = BreakNext::OP;
2152
}
2153
recorder_.NotifyDisplay(framebuf, stride, format);
2154
}
2155
2156
bool GPUCommon::SetRestrictPrims(std::string_view rule) {
2157
if (rule.empty() || rule == "*") {
2158
restrictPrimRanges_.clear();
2159
restrictPrimRule_.clear();
2160
return true;
2161
}
2162
2163
if (GPUDebug::ParsePrimRanges(rule, &restrictPrimRanges_)) {
2164
restrictPrimRule_ = rule;
2165
return true;
2166
} else {
2167
return false;
2168
}
2169
}
2170
2171