Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
3189 views
1
#include "ppsspp_config.h"
2
#include "GLRenderManager.h"
3
#include "Common/GPU/OpenGL/GLFeatures.h"
4
#include "Common/GPU/thin3d.h"
5
#include "Common/Thread/ThreadUtil.h"
6
#include "Common/VR/PPSSPPVR.h"
7
8
#include "Common/Log.h"
9
#include "Common/TimeUtil.h"
10
#include "Common/MemoryUtil.h"
11
#include "Common/StringUtils.h"
12
#include "Common/Math/math_util.h"
13
14
#if 0 // def _DEBUG
15
#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)
16
#else
17
#define VLOG(...)
18
#endif
19
20
std::thread::id renderThreadId;
21
22
GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {
23
if (caps.textureNPOTFullySupported) {
24
canWrap = true;
25
} else {
26
canWrap = isPowerOf2(width) && isPowerOf2(height);
27
}
28
w = width;
29
h = height;
30
d = depth;
31
this->numMips = numMips;
32
}
33
34
GLRTexture::~GLRTexture() {
35
if (texture) {
36
glDeleteTextures(1, &texture);
37
}
38
}
39
40
GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {
41
// size_t sz = sizeof(GLRRenderData);
42
// _dbg_assert_(sz == 88);
43
}
44
45
GLRenderManager::~GLRenderManager() {
46
_dbg_assert_(!runCompileThread_);
47
48
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
49
_assert_(frameData_[i].deleter.IsEmpty());
50
_assert_(frameData_[i].deleter_prev.IsEmpty());
51
}
52
// Was anything deleted during shutdown?
53
deleter_.Perform(this, skipGLCalls_);
54
_assert_(deleter_.IsEmpty());
55
}
56
57
void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
58
queueRunner_.CreateDeviceObjects();
59
renderThreadId = std::this_thread::get_id();
60
61
if (newInflightFrames_ != -1) {
62
INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);
63
inflightFrames_ = newInflightFrames_;
64
newInflightFrames_ = -1;
65
}
66
67
// Don't save draw, we don't want any thread safety confusion.
68
bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);
69
bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;
70
if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {
71
// Force disable if it wouldn't work anyway.
72
mapBuffers = false;
73
}
74
75
// Notes on buffer mapping:
76
// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.
77
// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.
78
if (mapBuffers) {
79
switch (gl_extensions.gpuVendor) {
80
case GPU_VENDOR_NVIDIA:
81
bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;
82
break;
83
84
// Temporarily disabled because it doesn't work with task switching on Android.
85
// The mapped buffer seems to just be pulled out like a rug from under us, crashing
86
// as soon as any write happens, which can happen during shutdown since we write from the
87
// Emu thread which may not yet have shut down. There may be solutions to this, but for now,
88
// disable this strategy to avoid crashing.
89
//case GPU_VENDOR_QUALCOMM:
90
// bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;
91
// break;
92
93
default:
94
bufferStrategy_ = GLBufferStrategy::SUBDATA;
95
}
96
} else {
97
bufferStrategy_ = GLBufferStrategy::SUBDATA;
98
}
99
}
100
101
void GLRenderManager::ThreadEnd() {
102
INFO_LOG(Log::G3D, "ThreadEnd");
103
104
queueRunner_.DestroyDeviceObjects();
105
VLOG(" PULL: Quitting");
106
107
// Good time to run all the deleters to get rid of leftover objects.
108
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
109
// Since we're in shutdown, we should skip the GL calls on Android.
110
frameData_[i].deleter.Perform(this, skipGLCalls_);
111
frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
112
}
113
deleter_.Perform(this, skipGLCalls_);
114
for (int i = 0; i < (int)steps_.size(); i++) {
115
delete steps_[i];
116
}
117
steps_.clear();
118
initSteps_.clear();
119
}
120
121
// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
122
//
123
// This means that we have to block and run the render queue until we've presented one frame,
124
// at which point we can leave.
125
//
126
// NOTE: If run_ is true, we WILL run a task!
127
bool GLRenderManager::ThreadFrame() {
128
if (!runCompileThread_) {
129
return false;
130
}
131
132
GLRRenderThreadTask *task = nullptr;
133
134
// In case of syncs or other partial completion, we keep going until we complete a frame.
135
while (true) {
136
// Pop a task of the queue and execute it.
137
// NOTE: We need to actually wait for a task, we can't just bail!
138
{
139
std::unique_lock<std::mutex> lock(pushMutex_);
140
while (renderThreadQueue_.empty()) {
141
pushCondVar_.wait(lock);
142
}
143
task = std::move(renderThreadQueue_.front());
144
renderThreadQueue_.pop();
145
}
146
147
// We got a task! We can now have pushMutex_ unlocked, allowing the host to
148
// push more work when it feels like it, and just start working.
149
if (task->runType == GLRRunType::EXIT) {
150
delete task;
151
// Oh, host wanted out. Let's leave, and also let's notify the host.
152
// This is unlike Vulkan too which can just block on the thread existing.
153
std::unique_lock<std::mutex> lock(syncMutex_);
154
syncCondVar_.notify_one();
155
syncDone_ = true;
156
break;
157
}
158
159
// Render the scene.
160
VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());
161
if (Run(*task)) {
162
// Swap requested, so we just bail the loop.
163
delete task;
164
break;
165
}
166
delete task;
167
};
168
169
return true;
170
}
171
172
void GLRenderManager::StopThread() {
173
// There's not really a lot to do here anymore.
174
INFO_LOG(Log::G3D, "GLRenderManager::StopThread()");
175
if (runCompileThread_) {
176
runCompileThread_ = false;
177
178
std::unique_lock<std::mutex> lock(pushMutex_);
179
renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));
180
pushCondVar_.notify_one();
181
} else {
182
WARN_LOG(Log::G3D, "GL submission thread was already paused.");
183
}
184
}
185
186
void GLRenderManager::StartThread() {
187
// There's not really a lot to do here anymore.
188
INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");
189
if (!runCompileThread_) {
190
runCompileThread_ = true;
191
} else {
192
INFO_LOG(Log::G3D, "GL submission thread was already running.");
193
}
194
}
195
196
std::string GLRenderManager::GetGpuProfileString() const {
197
int curFrame = curFrame_;
198
const GLQueueProfileContext &profile = frameData_[curFrame].profile;
199
200
float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
201
return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());
202
}
203
204
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
205
_assert_(insideFrame_);
206
#ifdef _DEBUG
207
curProgram_ = nullptr;
208
#endif
209
210
// Eliminate dupes.
211
if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
212
if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {
213
// We don't move to a new step, this bind was unnecessary and we can safely skip it.
214
curRenderStep_ = steps_.back();
215
return;
216
}
217
}
218
if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
219
VLOG("Empty render step. Usually happens after uploading pixels.");
220
}
221
222
GLRStep *step = new GLRStep{ GLRStepType::RENDER };
223
// This is what queues up new passes, and can end previous ones.
224
step->render.framebuffer = fb;
225
step->render.color = color;
226
step->render.depth = depth;
227
step->render.stencil = stencil;
228
step->tag = tag;
229
steps_.push_back(step);
230
231
GLuint clearMask = 0;
232
GLRRenderData data(GLRRenderCommand::CLEAR);
233
if (color == GLRRenderPassAction::CLEAR) {
234
clearMask |= GL_COLOR_BUFFER_BIT;
235
data.clear.clearColor = clearColor;
236
}
237
if (depth == GLRRenderPassAction::CLEAR) {
238
clearMask |= GL_DEPTH_BUFFER_BIT;
239
data.clear.clearZ = clearDepth;
240
}
241
if (stencil == GLRRenderPassAction::CLEAR) {
242
clearMask |= GL_STENCIL_BUFFER_BIT;
243
data.clear.clearStencil = clearStencil;
244
}
245
if (clearMask) {
246
data.clear.scissorX = 0;
247
data.clear.scissorY = 0;
248
data.clear.scissorW = 0;
249
data.clear.scissorH = 0;
250
data.clear.clearMask = clearMask;
251
data.clear.colorMask = 0xF;
252
step->commands.push_back(data);
253
}
254
curRenderStep_ = step;
255
256
if (fb) {
257
if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
258
step->dependencies.insert(fb);
259
}
260
}
261
262
if (invalidationCallback_) {
263
invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);
264
}
265
}
266
267
// aspectBit: GL_COLOR_BUFFER_BIT etc
268
void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {
269
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
270
_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);
271
GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };
272
data.bind_fb_texture.slot = binding;
273
data.bind_fb_texture.framebuffer = fb;
274
data.bind_fb_texture.aspect = aspectBit;
275
curRenderStep_->commands.push_back(data);
276
curRenderStep_->dependencies.insert(fb);
277
}
278
279
void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {
280
GLRStep *step = new GLRStep{ GLRStepType::COPY };
281
step->copy.srcRect = srcRect;
282
step->copy.dstPos = dstPos;
283
step->copy.src = src;
284
step->copy.dst = dst;
285
step->copy.aspectMask = aspectMask;
286
step->dependencies.insert(src);
287
step->tag = tag;
288
bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
289
if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
290
step->dependencies.insert(dst);
291
steps_.push_back(step);
292
}
293
294
void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {
295
GLRStep *step = new GLRStep{ GLRStepType::BLIT };
296
step->blit.srcRect = srcRect;
297
step->blit.dstRect = dstRect;
298
step->blit.src = src;
299
step->blit.dst = dst;
300
step->blit.aspectMask = aspectMask;
301
step->blit.filter = filter;
302
step->dependencies.insert(src);
303
step->tag = tag;
304
bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
305
if (!fillsDst)
306
step->dependencies.insert(dst);
307
steps_.push_back(step);
308
}
309
310
bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {
311
_assert_(pixels);
312
313
GLRStep *step = new GLRStep{ GLRStepType::READBACK };
314
step->readback.src = src;
315
step->readback.srcRect = { x, y, w, h };
316
step->readback.aspectMask = aspectBits;
317
step->readback.dstFormat = destFormat;
318
step->dependencies.insert(src);
319
step->tag = tag;
320
steps_.push_back(step);
321
322
curRenderStep_ = nullptr;
323
FlushSync();
324
325
Draw::DataFormat srcFormat;
326
if (aspectBits & GL_COLOR_BUFFER_BIT) {
327
srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;
328
} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {
329
// Copies from stencil are always S8.
330
srcFormat = Draw::DataFormat::S8;
331
} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {
332
// TODO: Do this properly.
333
srcFormat = Draw::DataFormat::D24_S8;
334
} else {
335
return false;
336
}
337
queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);
338
return true;
339
}
340
341
void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
342
_assert_(texture);
343
_assert_(pixels);
344
GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };
345
step->readback_image.texture = texture;
346
step->readback_image.mipLevel = mipLevel;
347
step->readback_image.srcRect = { x, y, w, h };
348
step->tag = tag;
349
steps_.push_back(step);
350
351
curRenderStep_ = nullptr;
352
FlushSync();
353
354
queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);
355
}
356
357
void GLRenderManager::BeginFrame(bool enableProfiling) {
358
#ifdef _DEBUG
359
curProgram_ = nullptr;
360
#endif
361
362
// Shouldn't call BeginFrame unless we're in a run state.
363
_dbg_assert_(runCompileThread_);
364
365
int curFrame = GetCurFrame();
366
367
FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);
368
frameTimeData.frameBegin = time_now_d();
369
frameTimeData.afterFenceWait = frameTimeData.frameBegin;
370
371
GLFrameData &frameData = frameData_[curFrame];
372
frameData.frameId = frameIdGen_;
373
frameData.profile.enabled = enableProfiling;
374
375
frameIdGen_++;
376
{
377
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
378
VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());
379
while (!frameData.readyForFence) {
380
frameData.fenceCondVar.wait(lock);
381
}
382
frameData.readyForFence = false;
383
}
384
385
insideFrame_ = true;
386
}
387
388
void GLRenderManager::Finish() {
389
curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.
390
391
int curFrame = curFrame_;
392
GLFrameData &frameData = frameData_[curFrame];
393
394
frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();
395
396
frameData_[curFrame].deleter.Take(deleter_);
397
398
if (frameData.profile.enabled) {
399
profilePassesString_ = std::move(frameData.profile.passesString);
400
401
#ifdef _DEBUG
402
std::string cmdString;
403
for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {
404
if (frameData.profile.commandCounts[i] > 0) {
405
cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);
406
}
407
}
408
memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));
409
profilePassesString_ = cmdString + profilePassesString_;
410
#endif
411
412
frameData.profile.passesString.clear();
413
}
414
415
VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);
416
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);
417
task->frame = curFrame;
418
{
419
std::unique_lock<std::mutex> lock(pushMutex_);
420
renderThreadQueue_.push(task);
421
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
422
renderThreadQueue_.back()->steps = std::move(steps_);
423
initSteps_.clear();
424
steps_.clear();
425
pushCondVar_.notify_one();
426
}
427
}
428
429
void GLRenderManager::Present() {
430
GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);
431
presentTask->frame = curFrame_;
432
{
433
std::unique_lock<std::mutex> lock(pushMutex_);
434
renderThreadQueue_.push(presentTask);
435
pushCondVar_.notify_one();
436
}
437
438
int newCurFrame = curFrame_ + 1;
439
if (newCurFrame >= inflightFrames_) {
440
newCurFrame = 0;
441
}
442
curFrame_ = newCurFrame;
443
444
insideFrame_ = false;
445
}
446
447
// Render thread. Returns true if the caller should handle a swap.
448
bool GLRenderManager::Run(GLRRenderThreadTask &task) {
449
_dbg_assert_(task.frame >= 0);
450
451
GLFrameData &frameData = frameData_[task.frame];
452
453
if (task.runType == GLRRunType::PRESENT) {
454
bool swapRequest = false;
455
if (!frameData.skipSwap) {
456
frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();
457
if (swapIntervalChanged_) {
458
swapIntervalChanged_ = false;
459
if (swapIntervalFunction_) {
460
swapIntervalFunction_(swapInterval_);
461
}
462
}
463
// This is the swapchain framebuffer flip.
464
if (swapFunction_) {
465
VLOG(" PULL: SwapFunction()");
466
swapFunction_();
467
}
468
swapRequest = true;
469
} else {
470
frameData.skipSwap = false;
471
}
472
frameData.hasBegun = false;
473
474
VLOG(" PULL: Frame %d.readyForFence = true", task.frame);
475
476
{
477
std::lock_guard<std::mutex> lock(frameData.fenceMutex);
478
frameData.readyForFence = true;
479
frameData.fenceCondVar.notify_one();
480
// At this point, we're done with this framedata (for now).
481
}
482
return swapRequest;
483
}
484
485
if (!frameData.hasBegun) {
486
frameData.hasBegun = true;
487
488
frameData.deleter_prev.Perform(this, skipGLCalls_);
489
frameData.deleter_prev.Take(frameData.deleter);
490
}
491
492
// queueRunner_.LogSteps(stepsOnThread);
493
queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);
494
495
// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
496
if (!skipGLCalls_) {
497
for (auto iter : frameData.activePushBuffers) {
498
iter->Flush();
499
iter->UnmapDevice();
500
}
501
}
502
503
if (frameData.profile.enabled) {
504
frameData.profile.cpuStartTime = time_now_d();
505
}
506
507
if (IsVREnabled()) {
508
int passes = GetVRPassesCount();
509
for (int i = 0; i < passes; i++) {
510
PreVRFrameRender(i);
511
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);
512
PostVRFrameRender();
513
}
514
} else {
515
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);
516
}
517
518
if (frameData.profile.enabled) {
519
frameData.profile.cpuEndTime = time_now_d();
520
}
521
522
if (!skipGLCalls_) {
523
for (auto iter : frameData.activePushBuffers) {
524
iter->MapDevice(bufferStrategy_);
525
}
526
}
527
528
switch (task.runType) {
529
case GLRRunType::SUBMIT:
530
break;
531
532
case GLRRunType::SYNC:
533
frameData.hasBegun = false;
534
535
// glFinish is not actually necessary here, and won't be unless we start using
536
// glBufferStorage. Then we need to use fences.
537
{
538
std::lock_guard<std::mutex> lock(syncMutex_);
539
syncDone_ = true;
540
syncCondVar_.notify_one();
541
}
542
break;
543
544
default:
545
_assert_(false);
546
}
547
VLOG(" PULL: ::Run(): Done running tasks");
548
return false;
549
}
550
551
void GLRenderManager::FlushSync() {
552
{
553
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);
554
555
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);
556
task->frame = curFrame_;
557
558
std::unique_lock<std::mutex> lock(pushMutex_);
559
renderThreadQueue_.push(task);
560
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
561
renderThreadQueue_.back()->steps = std::move(steps_);
562
pushCondVar_.notify_one();
563
steps_.clear();
564
}
565
566
{
567
std::unique_lock<std::mutex> lock(syncMutex_);
568
// Wait for the flush to be hit, since we're syncing.
569
while (!syncDone_) {
570
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);
571
syncCondVar_.wait(lock);
572
}
573
syncDone_ = false;
574
}
575
}
576
577