Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/BinManager.cpp
3187 views
1
// Copyright (c) 2022- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <atomic>
19
#include <mutex>
20
#include <condition_variable>
21
22
#include "Common/Profiler/Profiler.h"
23
#include "Common/Thread/ThreadManager.h"
24
#include "Common/TimeUtil.h"
25
#include "Core/System.h"
26
#include "GPU/Common/TextureDecoder.h"
27
#include "GPU/Software/BinManager.h"
28
#include "GPU/Software/Rasterizer.h"
29
#include "GPU/Software/RasterizerRectangle.h"
30
31
// Sometimes useful for debugging.
32
static constexpr bool FORCE_SINGLE_THREAD = false;
33
34
using namespace Rasterizer;
35
36
struct BinWaitable : public Waitable {
37
public:
38
BinWaitable() {
39
count_ = 0;
40
}
41
42
void Fill() {
43
count_++;
44
}
45
46
bool Empty() {
47
return count_ == 0;
48
}
49
50
void Drain() {
51
int result = --count_;
52
if (result == 0) {
53
// We were the last one to increment.
54
std::unique_lock<std::mutex> lock(mutex_);
55
cond_.notify_all();
56
}
57
}
58
59
void Wait() override {
60
std::unique_lock<std::mutex> lock(mutex_);
61
while (count_ != 0) {
62
cond_.wait(lock);
63
}
64
}
65
66
std::atomic<int> count_;
67
std::mutex mutex_;
68
std::condition_variable cond_;
69
};
70
71
static inline void DrawBinItem(const BinItem &item, const RasterizerState &state) {
72
switch (item.type) {
73
case BinItemType::TRIANGLE:
74
DrawTriangle(item.v0, item.v1, item.v2, item.range, state);
75
break;
76
77
case BinItemType::CLEAR_RECT:
78
ClearRectangle(item.v0, item.v1, item.range, state);
79
break;
80
81
case BinItemType::RECT:
82
DrawRectangle(item.v0, item.v1, item.range, state);
83
break;
84
85
case BinItemType::SPRITE:
86
DrawSprite(item.v0, item.v1, item.range, state);
87
break;
88
89
case BinItemType::LINE:
90
DrawLine(item.v0, item.v1, item.range, state);
91
break;
92
93
case BinItemType::POINT:
94
DrawPoint(item.v0, item.range, state);
95
break;
96
}
97
}
98
99
class DrawBinItemsTask : public Task {
100
public:
101
DrawBinItemsTask(BinWaitable *notify, BinManager::BinItemQueue &items, std::atomic<bool> &status, const BinManager::BinStateQueue &states)
102
: notify_(notify), items_(items), status_(status), states_(states) {
103
}
104
105
TaskType Type() const override {
106
return TaskType::CPU_COMPUTE;
107
}
108
109
TaskPriority Priority() const override {
110
// Let priority emulation tasks win over this.
111
return TaskPriority::NORMAL;
112
}
113
114
void Run() override {
115
ProcessItems();
116
status_ = false;
117
// In case of any atomic issues, do another pass.
118
ProcessItems();
119
notify_->Drain();
120
}
121
122
void Release() override {
123
// Don't delete, this is statically allocated.
124
}
125
126
private:
127
void ProcessItems() {
128
while (!items_.Empty()) {
129
const BinItem &item = items_.PeekNext();
130
DrawBinItem(item, states_[item.stateIndex]);
131
items_.SkipNext();
132
}
133
}
134
135
BinWaitable *notify_;
136
BinManager::BinItemQueue &items_;
137
std::atomic<bool> &status_;
138
const BinManager::BinStateQueue &states_;
139
};
140
141
constexpr int BinManager::MAX_POSSIBLE_TASKS;
142
143
BinManager::BinManager() {
144
queueRange_.x1 = 0x7FFFFFFF;
145
queueRange_.y1 = 0x7FFFFFFF;
146
queueRange_.x2 = 0;
147
queueRange_.y2 = 0;
148
149
waitable_ = new BinWaitable();
150
for (auto &s : taskStatus_)
151
s = false;
152
153
int maxInitTasks = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);
154
for (int i = 0; i < maxInitTasks; ++i) {
155
taskQueues_[i].Setup();
156
for (DrawBinItemsTask *&task : taskLists_[i].tasks)
157
task = new DrawBinItemsTask(waitable_, taskQueues_[i], taskStatus_[i], states_);
158
}
159
states_.Setup();
160
cluts_.Setup();
161
queue_.Setup();
162
}
163
164
BinManager::~BinManager() {
165
delete waitable_;
166
167
for (int i = 0; i < MAX_POSSIBLE_TASKS; ++i) {
168
for (DrawBinItemsTask *task : taskLists_[i].tasks)
169
delete task;
170
}
171
}
172
173
void BinManager::UpdateState() {
174
PROFILE_THIS_SCOPE("bin_state");
175
if (HasDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL)) {
176
if (states_.Full())
177
Flush("states");
178
creatingState_ = true;
179
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
180
// When new funcs are compiled, we need to flush if WX exclusive.
181
ComputeRasterizerState(&states_[stateIndex_], this);
182
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
183
creatingState_ = false;
184
185
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
186
}
187
188
if (lastFlipstats_ != gpuStats.numFlips) {
189
lastFlipstats_ = gpuStats.numFlips;
190
ResetStats();
191
}
192
193
const auto &state = State();
194
const bool hadDepth = pendingWrites_[1].base != 0;
195
196
if (HasDirty(SoftDirty::BINNER_RANGE)) {
197
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
198
DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));
199
ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0);
200
ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0);
201
202
scissor_.x1 = screenScissorTL.x;
203
scissor_.y1 = screenScissorTL.y;
204
scissor_.x2 = screenScissorBR.x + SCREEN_SCALE_FACTOR - 1;
205
scissor_.y2 = screenScissorBR.y + SCREEN_SCALE_FACTOR - 1;
206
207
// If we're about to texture from something still pending (i.e. depth), flush.
208
if (HasTextureWrite(state))
209
Flush("tex");
210
211
// Okay, now update what's pending.
212
MarkPendingWrites(state);
213
214
ClearDirty(SoftDirty::BINNER_RANGE);
215
} else if (pendingOverlap_) {
216
if (HasTextureWrite(state)) {
217
Flush("tex");
218
219
// We need the pending writes set, which flushing cleared. Set them again.
220
MarkPendingWrites(state);
221
}
222
}
223
224
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
225
// This is a good place to record any dependencies for block transfer overlap.
226
MarkPendingReads(state);
227
228
// Disallow threads when rendering to the target, even offset.
229
bool selfRender = HasTextureWrite(state);
230
int newMaxTasks = selfRender || FORCE_SINGLE_THREAD ? 1 : g_threadManager.GetNumLooperThreads();
231
if (newMaxTasks > MAX_POSSIBLE_TASKS)
232
newMaxTasks = MAX_POSSIBLE_TASKS;
233
// We don't want to overlap wrong, so flush any pending.
234
if (maxTasks_ != newMaxTasks) {
235
maxTasks_ = newMaxTasks;
236
Flush("selfrender");
237
}
238
pendingOverlap_ = pendingOverlap_ || selfRender;
239
240
// Lastly, we have to check if we're newly writing depth we were texturing before.
241
// This happens in Call of Duty (depth clear after depth texture), for example.
242
if (!hadDepth && state.pixelID.depthWrite) {
243
for (size_t i = 0; i < states_.Size(); ++i) {
244
if (HasTextureWrite(states_.Peek(i))) {
245
Flush("selfdepth");
246
}
247
}
248
}
249
ClearDirty(SoftDirty::BINNER_OVERLAP);
250
}
251
}
252
253
bool BinManager::HasTextureWrite(const RasterizerState &state) {
254
if (!state.enableTextures)
255
return false;
256
257
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
258
for (int i = 0; i <= state.maxTexLevel; ++i) {
259
int byteStride = (state.texbufw[i] * textureBits) / 8;
260
int byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
261
int h = state.samplerID.cached.sizes[i].h;
262
if (HasPendingWrite(state.texaddr[i], byteStride, byteWidth, h))
263
return true;
264
}
265
266
return false;
267
}
268
269
bool BinManager::IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item) {
270
if (item.type != BinItemType::SPRITE && item.type != BinItemType::RECT)
271
return false;
272
if (state.textureProj || state.maxTexLevel > 0)
273
return false;
274
275
// Only possible if the texture is 1:1.
276
if ((state.texaddr[0] & 0x0F1FFFFF) != (gstate.getFrameBufAddress() & 0x0F1FFFFF))
277
return false;
278
int bufferPixelWidth = BufferFormatBytesPerPixel(state.pixelID.FBFormat());
279
int texturePixelWidth = textureBitsPerPixel[state.samplerID.texfmt] / 8;
280
if (bufferPixelWidth != texturePixelWidth)
281
return false;
282
283
Vec4f tc = Vec4f(item.v0.texturecoords.x, item.v0.texturecoords.y, item.v1.texturecoords.x, item.v1.texturecoords.y);
284
if (state.throughMode) {
285
// Already at texels, convert to screen.
286
tc = tc * SCREEN_SCALE_FACTOR;
287
} else {
288
// Need to also multiply by width/height in transform mode.
289
int w = state.samplerID.cached.sizes[0].w * SCREEN_SCALE_FACTOR;
290
int h = state.samplerID.cached.sizes[0].h * SCREEN_SCALE_FACTOR;
291
tc = tc * Vec4f(w, h, w, h);
292
}
293
294
Vec4<int> tci = tc.Cast<int>();
295
if (tci.x != item.v0.screenpos.x || tci.y != item.v0.screenpos.y)
296
return false;
297
if (tci.z != item.v1.screenpos.x || tci.w != item.v1.screenpos.y)
298
return false;
299
300
return true;
301
}
302
303
void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
304
if (!state.enableTextures)
305
return;
306
307
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
308
for (int i = 0; i <= state.maxTexLevel; ++i) {
309
uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;
310
uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
311
uint32_t h = state.samplerID.cached.sizes[i].h;
312
auto it = pendingReads_.find(state.texaddr[i]);
313
if (it != pendingReads_.end()) {
314
uint32_t total = byteStride * (h - 1) + byteWidth;
315
uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;
316
if (existing < total) {
317
it->second.strideBytes = std::max(it->second.strideBytes, byteStride);
318
it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);
319
it->second.height = std::max(it->second.height, h);
320
}
321
} else {
322
auto &range = pendingReads_[state.texaddr[i]];
323
range.base = state.texaddr[i];
324
range.strideBytes = byteStride;
325
range.widthBytes = byteWidth;
326
range.height = h;
327
}
328
}
329
}
330
331
void BinManager::MarkPendingWrites(const Rasterizer::RasterizerState &state) {
332
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
333
DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));
334
335
constexpr uint32_t mirrorMask = 0x041FFFFF;
336
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
337
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
338
if (state.pixelID.depthWrite)
339
pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);
340
}
341
342
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, const DrawingCoords &tl, const DrawingCoords &br) {
343
const uint32_t w = br.x - tl.x + 1;
344
const uint32_t h = br.y - tl.y + 1;
345
346
newBase += tl.y * stride * bpp + tl.x * bpp;
347
if (base == 0) {
348
base = newBase;
349
strideBytes = stride * bpp;
350
widthBytes = w * bpp;
351
height = h;
352
return;
353
}
354
355
height = std::max(height, h);
356
if (base == newBase && strideBytes == stride * bpp) {
357
widthBytes = std::max(widthBytes, w * bpp);
358
return;
359
}
360
361
if (stride != 0)
362
height += ((int)base - (int)newBase) / (stride * bpp);
363
base = std::min(base, newBase);
364
strideBytes = std::max(strideBytes, stride * bpp);
365
widthBytes = strideBytes;
366
}
367
368
void BinManager::UpdateClut(const void *src) {
369
PROFILE_THIS_SCOPE("bin_clut");
370
if (cluts_.Full())
371
Flush("cluts");
372
BinClut &clut = cluts_.PeekPush();
373
memcpy(clut.readable, src, sizeof(BinClut));
374
clutIndex_ = (uint16_t)cluts_.PushPeeked();
375
}
376
377
void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2) {
378
Vec2<int> d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y);
379
Vec2<int> d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y);
380
Vec2<int> d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y);
381
382
// Drop primitives which are not in CCW order by checking the cross product.
383
static_assert(SCREEN_SCALE_FACTOR <= 16, "Fails if scale factor is too high");
384
if (d01.x * d02.y - d01.y * d02.x < 0)
385
return;
386
// If all points have identical coords, we'll have 0 weights and not skip properly, so skip here.
387
if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0))
388
return;
389
390
// Was it fully outside the scissor?
391
const BinCoords range = Range(v0, v1, v2);
392
if (range.Invalid())
393
return;
394
395
if (queue_.Full())
396
Drain();
397
queue_.Push(BinItem{ BinItemType::TRIANGLE, stateIndex_, range, v0, v1, v2 });
398
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, v2);
399
Expand(range);
400
}
401
402
void BinManager::AddClearRect(const VertexData &v0, const VertexData &v1) {
403
const BinCoords range = Range(v0, v1);
404
if (range.Invalid())
405
return;
406
407
if (queue_.Full())
408
Drain();
409
queue_.Push(BinItem{ BinItemType::CLEAR_RECT, stateIndex_, range, v0, v1 });
410
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
411
Expand(range);
412
}
413
414
void BinManager::AddRect(const VertexData &v0, const VertexData &v1) {
415
const BinCoords range = Range(v0, v1);
416
if (range.Invalid())
417
return;
418
419
if (queue_.Full())
420
Drain();
421
queue_.Push(BinItem{ BinItemType::RECT, stateIndex_, range, v0, v1 });
422
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
423
Expand(range);
424
}
425
426
void BinManager::AddSprite(const VertexData &v0, const VertexData &v1) {
427
const BinCoords range = Range(v0, v1);
428
if (range.Invalid())
429
return;
430
431
if (queue_.Full())
432
Drain();
433
queue_.Push(BinItem{ BinItemType::SPRITE, stateIndex_, range, v0, v1 });
434
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
435
Expand(range);
436
}
437
438
void BinManager::AddLine(const VertexData &v0, const VertexData &v1) {
439
const BinCoords range = Range(v0, v1);
440
if (range.Invalid())
441
return;
442
443
if (queue_.Full())
444
Drain();
445
queue_.Push(BinItem{ BinItemType::LINE, stateIndex_, range, v0, v1 });
446
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, false);
447
Expand(range);
448
}
449
450
void BinManager::AddPoint(const VertexData &v0) {
451
const BinCoords range = Range(v0);
452
if (range.Invalid())
453
return;
454
455
if (queue_.Full())
456
Drain();
457
queue_.Push(BinItem{ BinItemType::POINT, stateIndex_, range, v0 });
458
CalculateRasterStateFlags(&states_[stateIndex_], v0);
459
Expand(range);
460
}
461
462
void BinManager::Drain(bool flushing) {
463
PROFILE_THIS_SCOPE("bin_drain");
464
465
// If the waitable has fully drained, we can update our binning decisions.
466
if (!tasksSplit_ || waitable_->Empty()) {
467
int w2 = (queueRange_.x2 - queueRange_.x1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2);
468
int h2 = (queueRange_.y2 - queueRange_.y1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2);
469
470
// Always bin the entire possible range, but focus on the drawn area.
471
ScreenCoords tl(0, 0, 0);
472
ScreenCoords br(1024 * SCREEN_SCALE_FACTOR, 1024 * SCREEN_SCALE_FACTOR, 0);
473
474
if (pendingOverlap_ && maxTasks_ == 1 && flushing && queue_.Size() == 1 && !FORCE_SINGLE_THREAD) {
475
// If the drawing is 1:1, we can potentially use threads. It's worth checking.
476
const auto &item = queue_.PeekNext();
477
const auto &state = states_[item.stateIndex];
478
if (IsExactSelfRender(state, item))
479
maxTasks_ = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);
480
}
481
482
taskRanges_.clear();
483
if (h2 >= 18 && w2 >= h2 * 4) {
484
int bin_w = std::max(4, (w2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2;
485
taskRanges_.push_back(BinCoords{ tl.x, tl.y, queueRange_.x1 + bin_w - 1, br.y - 1 });
486
for (int x = queueRange_.x1 + bin_w; x <= queueRange_.x2; x += bin_w) {
487
int x2 = x + bin_w > queueRange_.x2 ? br.x : x + bin_w;
488
taskRanges_.push_back(BinCoords{ x, tl.y, x2 - 1, br.y - 1 });
489
}
490
} else if (h2 >= 18 && w2 >= 18) {
491
int bin_h = std::max(4, (h2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2;
492
taskRanges_.push_back(BinCoords{ tl.x, tl.y, br.x - 1, queueRange_.y1 + bin_h - 1 });
493
for (int y = queueRange_.y1 + bin_h; y <= queueRange_.y2; y += bin_h) {
494
int y2 = y + bin_h > queueRange_.y2 ? br.y : y + bin_h;
495
taskRanges_.push_back(BinCoords{ tl.x, y, br.x - 1, y2 - 1 });
496
}
497
}
498
499
tasksSplit_ = true;
500
}
501
502
// Let's try to optimize states, if we can.
503
OptimizePendingStates(pendingStateIndex_, stateIndex_);
504
pendingStateIndex_ = stateIndex_;
505
506
if (taskRanges_.size() <= 1) {
507
PROFILE_THIS_SCOPE("bin_drain_single");
508
while (!queue_.Empty()) {
509
const BinItem &item = queue_.PeekNext();
510
DrawBinItem(item, states_[item.stateIndex]);
511
queue_.SkipNext();
512
}
513
} else {
514
int max = flushing ? QUEUED_PRIMS : QUEUED_PRIMS / 2;
515
while (!queue_.Empty()) {
516
const BinItem &item = queue_.PeekNext();
517
for (int i = 0; i < (int)taskRanges_.size(); ++i) {
518
const BinCoords range = taskRanges_[i].Intersect(item.range);
519
if (range.Invalid())
520
continue;
521
522
if (taskQueues_[i].NearFull()) {
523
// This shouldn't often happen, but if it does, wait for space.
524
if (taskQueues_[i].Full())
525
waitable_->Wait();
526
// If we're not flushing and not near full, let's just continue later.
527
// Near full means we'd drain on next prim, so better to finish it now.
528
else if (!flushing && !queue_.NearFull())
529
max = 0;
530
}
531
532
BinItem &taskItem = taskQueues_[i].PeekPush();
533
taskItem = item;
534
taskItem.range = range;
535
taskQueues_[i].PushPeeked();
536
}
537
queue_.SkipNext();
538
if (--max <= 0)
539
break;
540
}
541
542
int threads = 0;
543
for (int i = 0; i < (int)taskRanges_.size(); ++i) {
544
if (taskQueues_[i].Empty())
545
continue;
546
threads++;
547
if (taskStatus_[i])
548
continue;
549
550
waitable_->Fill();
551
taskStatus_[i] = true;
552
g_threadManager.EnqueueTaskOnThread(i, taskLists_[i].Next());
553
enqueues_++;
554
}
555
556
mostThreads_ = std::max(mostThreads_, threads);
557
}
558
}
559
560
void BinManager::Flush(const char *reason) {
561
if (queueRange_.x1 == 0x7FFFFFFF)
562
return;
563
564
double st;
565
if (coreCollectDebugStats)
566
st = time_now_d();
567
Drain(true);
568
waitable_->Wait();
569
taskRanges_.clear();
570
tasksSplit_ = false;
571
572
queue_.Reset();
573
while (states_.Size() > 1)
574
states_.SkipNext();
575
while (cluts_.Size() > 1)
576
cluts_.SkipNext();
577
578
Rasterizer::FlushJit();
579
Sampler::FlushJit();
580
581
queueRange_.x1 = 0x7FFFFFFF;
582
queueRange_.y1 = 0x7FFFFFFF;
583
queueRange_.x2 = 0;
584
queueRange_.y2 = 0;
585
586
for (auto &pending : pendingWrites_)
587
pending.base = 0;
588
pendingOverlap_ = false;
589
pendingReads_.clear();
590
591
// We'll need to set the pending writes and reads again, since we just flushed it.
592
dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;
593
594
if (coreCollectDebugStats) {
595
double et = time_now_d();
596
flushReasonTimes_[reason] += et - st;
597
if (et - st > slowestFlushTime_) {
598
slowestFlushTime_ = et - st;
599
slowestFlushReason_ = reason;
600
}
601
}
602
}
603
604
void BinManager::OptimizePendingStates(uint16_t first, uint16_t last) {
605
// We can sometimes hit this when compiling new funcs while creating a state.
606
// At that point, the state isn't loaded fully yet, so don't touch it.
607
if (creatingState_ && last == stateIndex_) {
608
if (first == last)
609
return;
610
last--;
611
}
612
613
int count = (QUEUED_STATES + last - first) % QUEUED_STATES + 1;
614
for (int i = 0; i < count; ++i) {
615
size_t pos = (first + i) % QUEUED_STATES;
616
OptimizeRasterState(&states_[pos]);
617
}
618
}
619
620
bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
621
// We can only write to VRAM.
622
if (!Memory::IsVRAMAddress(start))
623
return false;
624
// Ignore mirrors for overlap detection.
625
start &= 0x041FFFFF;
626
627
uint32_t size = stride * (h - 1) + w;
628
for (const auto &range : pendingWrites_) {
629
if (range.base == 0 || range.strideBytes == 0)
630
continue;
631
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
632
continue;
633
634
// Let's simply go through each line. Might be in the stride gap.
635
uint32_t row = start;
636
for (uint32_t y = 0; y < h; ++y) {
637
int32_t offset = row - range.base;
638
int32_t rangeY = offset / (int32_t)range.strideBytes;
639
uint32_t rangeX = offset % (int32_t)range.strideBytes;
640
if (rangeY >= 0 && (uint32_t)rangeY < range.height) {
641
// If this row is either within width, or extends beyond stride, overlap.
642
if (rangeX < range.widthBytes || rangeX + w >= range.strideBytes)
643
return true;
644
}
645
646
row += stride;
647
}
648
}
649
650
return false;
651
}
652
653
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
654
if (Memory::IsVRAMAddress(start)) {
655
// Ignore VRAM mirrors.
656
start &= 0x041FFFFF;
657
} else {
658
// Ignore only regular RAM mirrors.
659
start &= 0x3FFFFFFF;
660
}
661
662
uint32_t size = stride * (h - 1) + w;
663
for (const auto &pair : pendingReads_) {
664
const auto &range = pair.second;
665
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
666
continue;
667
668
// Stride gaps are uncommon with reads, so don't bother.
669
return true;
670
}
671
672
return false;
673
}
674
675
void BinManager::GetStats(char *buffer, size_t bufsize) {
676
double allTotal = 0.0;
677
double slowestTotalTime = 0.0;
678
const char *slowestTotalReason = nullptr;
679
for (auto &it : flushReasonTimes_) {
680
if (it.second > slowestTotalTime) {
681
slowestTotalTime = it.second;
682
slowestTotalReason = it.first;
683
}
684
allTotal += it.second;
685
}
686
687
// Many games are 30 FPS, so check last frame too for better stats.
688
double recentTotal = allTotal;
689
double slowestRecentTime = slowestTotalTime;
690
const char *slowestRecentReason = slowestTotalReason;
691
for (auto &it : lastFlushReasonTimes_) {
692
if (it.second > slowestRecentTime) {
693
slowestRecentTime = it.second;
694
slowestRecentReason = it.first;
695
}
696
recentTotal += it.second;
697
}
698
699
snprintf(buffer, bufsize,
700
"Slowest individual flush: %s (%0.4f)\n"
701
"Slowest frame flush: %s (%0.4f)\n"
702
"Slowest recent flush: %s (%0.4f)\n"
703
"Total flush time: %0.4f (%05.2f%%, last 2: %05.2f%%)\n"
704
"Thread enqueues: %d, count %d",
705
slowestFlushReason_, slowestFlushTime_,
706
slowestTotalReason, slowestTotalTime,
707
slowestRecentReason, slowestRecentTime,
708
allTotal, allTotal * (6000.0 / 1.001), recentTotal * (3000.0 / 1.001),
709
enqueues_, mostThreads_);
710
}
711
712
void BinManager::ResetStats() {
713
lastFlushReasonTimes_ = std::move(flushReasonTimes_);
714
flushReasonTimes_.clear();
715
slowestFlushReason_ = nullptr;
716
slowestFlushTime_ = 0.0;
717
enqueues_ = 0;
718
mostThreads_ = 0;
719
}
720
721
inline BinCoords BinCoords::Intersect(const BinCoords &range) const {
722
BinCoords sub;
723
sub.x1 = std::max(x1, range.x1);
724
sub.y1 = std::max(y1, range.y1);
725
sub.x2 = std::min(x2, range.x2);
726
sub.y2 = std::min(y2, range.y2);
727
return sub;
728
}
729
730
BinCoords BinManager::Scissor(BinCoords range) {
731
return range.Intersect(scissor_);
732
}
733
734
BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1, const VertexData &v2) {
735
BinCoords range;
736
range.x1 = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1);
737
range.y1 = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1);
738
range.x2 = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) | (SCREEN_SCALE_FACTOR - 1);
739
range.y2 = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) | (SCREEN_SCALE_FACTOR - 1);
740
return Scissor(range);
741
}
742
743
BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1) {
744
BinCoords range;
745
range.x1 = std::min(v0.screenpos.x, v1.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1);
746
range.y1 = std::min(v0.screenpos.y, v1.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1);
747
range.x2 = std::max(v0.screenpos.x, v1.screenpos.x) | (SCREEN_SCALE_FACTOR - 1);
748
range.y2 = std::max(v0.screenpos.y, v1.screenpos.y) | (SCREEN_SCALE_FACTOR - 1);
749
return Scissor(range);
750
}
751
752
BinCoords BinManager::Range(const VertexData &v0) {
753
BinCoords range;
754
range.x1 = v0.screenpos.x & ~(SCREEN_SCALE_FACTOR - 1);
755
range.y1 = v0.screenpos.y & ~(SCREEN_SCALE_FACTOR - 1);
756
range.x2 = v0.screenpos.x | (SCREEN_SCALE_FACTOR - 1);
757
range.y2 = v0.screenpos.y | (SCREEN_SCALE_FACTOR - 1);
758
return Scissor(range);
759
}
760
761
void BinManager::Expand(const BinCoords &range) {
762
queueRange_.x1 = std::min(queueRange_.x1, range.x1);
763
queueRange_.y1 = std::min(queueRange_.y1, range.y1);
764
queueRange_.x2 = std::max(queueRange_.x2, range.x2);
765
queueRange_.y2 = std::max(queueRange_.y2, range.y2);
766
767
if (maxTasks_ == 1 || (queueRange_.y2 - queueRange_.y1 >= 224 * SCREEN_SCALE_FACTOR && enqueues_ < 36 * maxTasks_)) {
768
if (pendingOverlap_)
769
Flush("expand");
770
else
771
Drain();
772
}
773
}
774
775