CoCalc -- GLRenderManager.cpp

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
³¹⁸⁹ views
1
#include "ppsspp_config.h"
2
#include "GLRenderManager.h"
3
#include "Common/GPU/OpenGL/GLFeatures.h"
4
#include "Common/GPU/thin3d.h"
5
#include "Common/Thread/ThreadUtil.h"
6
#include "Common/VR/PPSSPPVR.h"
7

8
#include "Common/Log.h"
9
#include "Common/TimeUtil.h"
10
#include "Common/MemoryUtil.h"
11
#include "Common/StringUtils.h"
12
#include "Common/Math/math_util.h"
13

14
#if 0 // def _DEBUG
15
#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)
16
#else
17
#define VLOG(...)
18
#endif
19

20
std::thread::id renderThreadId;
21

22
GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {
23
	if (caps.textureNPOTFullySupported) {
24
		canWrap = true;
25
	} else {
26
		canWrap = isPowerOf2(width) && isPowerOf2(height);
27
	}
28
	w = width;
29
	h = height;
30
	d = depth;
31
	this->numMips = numMips;
32
}
33

34
GLRTexture::~GLRTexture() {
35
	if (texture) {
36
		glDeleteTextures(1, &texture);
37
	}
38
}
39

40
GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {
41
	// size_t sz = sizeof(GLRRenderData);
42
	// _dbg_assert_(sz == 88);
43
}
44

45
GLRenderManager::~GLRenderManager() {
46
	_dbg_assert_(!runCompileThread_);
47

48
	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
49
		_assert_(frameData_[i].deleter.IsEmpty());
50
		_assert_(frameData_[i].deleter_prev.IsEmpty());
51
	}
52
	// Was anything deleted during shutdown?
53
	deleter_.Perform(this, skipGLCalls_);
54
	_assert_(deleter_.IsEmpty());
55
}
56

57
void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
58
	queueRunner_.CreateDeviceObjects();
59
	renderThreadId = std::this_thread::get_id();
60

61
	if (newInflightFrames_ != -1) {
62
		INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);
63
		inflightFrames_ = newInflightFrames_;
64
		newInflightFrames_ = -1;
65
	}
66

67
	// Don't save draw, we don't want any thread safety confusion.
68
	bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);
69
	bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;
70
	if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {
71
		// Force disable if it wouldn't work anyway.
72
		mapBuffers = false;
73
	}
74

75
	// Notes on buffer mapping:
76
	// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.
77
	// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.
78
	if (mapBuffers) {
79
		switch (gl_extensions.gpuVendor) {
80
		case GPU_VENDOR_NVIDIA:
81
			bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;
82
			break;
83

84
		// Temporarily disabled because it doesn't work with task switching on Android.
85
		// The mapped buffer seems to just be pulled out like a rug from under us, crashing
86
		// as soon as any write happens, which can happen during shutdown since we write from the
87
		// Emu thread which may not yet have shut down. There may be solutions to this, but for now,
88
		// disable this strategy to avoid crashing.
89
		//case GPU_VENDOR_QUALCOMM:
90
		//	bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;
91
		//	break;
92

93
		default:
94
			bufferStrategy_ = GLBufferStrategy::SUBDATA;
95
		}
96
	} else {
97
		bufferStrategy_ = GLBufferStrategy::SUBDATA;
98
	}
99
}
100

101
void GLRenderManager::ThreadEnd() {
102
	INFO_LOG(Log::G3D, "ThreadEnd");
103

104
	queueRunner_.DestroyDeviceObjects();
105
	VLOG("  PULL: Quitting");
106

107
	// Good time to run all the deleters to get rid of leftover objects.
108
	for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
109
		// Since we're in shutdown, we should skip the GL calls on Android.
110
		frameData_[i].deleter.Perform(this, skipGLCalls_);
111
		frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
112
	}
113
	deleter_.Perform(this, skipGLCalls_);
114
	for (int i = 0; i < (int)steps_.size(); i++) {
115
		delete steps_[i];
116
	}
117
	steps_.clear();
118
	initSteps_.clear();
119
}
120

121
// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
122
//
123
// This means that we have to block and run the render queue until we've presented one frame,
124
// at which point we can leave.
125
//
126
// NOTE: If run_ is true, we WILL run a task!
127
bool GLRenderManager::ThreadFrame() {
128
	if (!runCompileThread_) {
129
		return false;
130
	}
131

132
	GLRRenderThreadTask *task = nullptr;
133

134
	// In case of syncs or other partial completion, we keep going until we complete a frame.
135
	while (true) {
136
		// Pop a task of the queue and execute it.
137
		// NOTE: We need to actually wait for a task, we can't just bail!
138
		{
139
			std::unique_lock<std::mutex> lock(pushMutex_);
140
			while (renderThreadQueue_.empty()) {
141
				pushCondVar_.wait(lock);
142
			}
143
			task = std::move(renderThreadQueue_.front());
144
			renderThreadQueue_.pop();
145
		}
146

147
		// We got a task! We can now have pushMutex_ unlocked, allowing the host to
148
		// push more work when it feels like it, and just start working.
149
		if (task->runType == GLRRunType::EXIT) {
150
			delete task;
151
			// Oh, host wanted out. Let's leave, and also let's notify the host.
152
			// This is unlike Vulkan too which can just block on the thread existing.
153
			std::unique_lock<std::mutex> lock(syncMutex_);
154
			syncCondVar_.notify_one();
155
			syncDone_ = true;
156
			break;
157
		}
158

159
		// Render the scene.
160
		VLOG("  PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());
161
		if (Run(*task)) {
162
			// Swap requested, so we just bail the loop.
163
			delete task;
164
			break;
165
		}
166
		delete task;
167
	};
168

169
	return true;
170
}
171

172
void GLRenderManager::StopThread() {
173
	// There's not really a lot to do here anymore.
174
	INFO_LOG(Log::G3D, "GLRenderManager::StopThread()");
175
	if (runCompileThread_) {
176
		runCompileThread_ = false;
177

178
		std::unique_lock<std::mutex> lock(pushMutex_);
179
		renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));
180
		pushCondVar_.notify_one();
181
	} else {
182
		WARN_LOG(Log::G3D, "GL submission thread was already paused.");
183
	}
184
}
185

186
void GLRenderManager::StartThread() {
187
	// There's not really a lot to do here anymore.
188
	INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");
189
	if (!runCompileThread_) {
190
		runCompileThread_ = true;
191
	} else {
192
		INFO_LOG(Log::G3D, "GL submission thread was already running.");
193
	}
194
}
195

196
std::string GLRenderManager::GetGpuProfileString() const {
197
	int curFrame = curFrame_;
198
	const GLQueueProfileContext &profile = frameData_[curFrame].profile;
199

200
	float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
201
	return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());
202
}
203

204
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
205
	_assert_(insideFrame_);
206
#ifdef _DEBUG
207
	curProgram_ = nullptr;
208
#endif
209

210
	// Eliminate dupes.
211
	if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
212
		if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {
213
			// We don't move to a new step, this bind was unnecessary and we can safely skip it.
214
			curRenderStep_ = steps_.back();
215
			return;
216
		}
217
	}
218
	if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
219
		VLOG("Empty render step. Usually happens after uploading pixels.");
220
	}
221

222
	GLRStep *step = new GLRStep{ GLRStepType::RENDER };
223
	// This is what queues up new passes, and can end previous ones.
224
	step->render.framebuffer = fb;
225
	step->render.color = color;
226
	step->render.depth = depth;
227
	step->render.stencil = stencil;
228
	step->tag = tag;
229
	steps_.push_back(step);
230

231
	GLuint clearMask = 0;
232
	GLRRenderData data(GLRRenderCommand::CLEAR);
233
	if (color == GLRRenderPassAction::CLEAR) {
234
		clearMask |= GL_COLOR_BUFFER_BIT;
235
		data.clear.clearColor = clearColor;
236
	}
237
	if (depth == GLRRenderPassAction::CLEAR) {
238
		clearMask |= GL_DEPTH_BUFFER_BIT;
239
		data.clear.clearZ = clearDepth;
240
	}
241
	if (stencil == GLRRenderPassAction::CLEAR) {
242
		clearMask |= GL_STENCIL_BUFFER_BIT;
243
		data.clear.clearStencil = clearStencil;
244
	}
245
	if (clearMask) {
246
		data.clear.scissorX = 0;
247
		data.clear.scissorY = 0;
248
		data.clear.scissorW = 0;
249
		data.clear.scissorH = 0;
250
		data.clear.clearMask = clearMask;
251
		data.clear.colorMask = 0xF;
252
		step->commands.push_back(data);
253
	}
254
	curRenderStep_ = step;
255

256
	if (fb) {
257
		if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
258
			step->dependencies.insert(fb);
259
		}
260
	}
261

262
	if (invalidationCallback_) {
263
		invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);
264
	}
265
}
266

267
// aspectBit: GL_COLOR_BUFFER_BIT etc
268
void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {
269
	_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
270
	_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);
271
	GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };
272
	data.bind_fb_texture.slot = binding;
273
	data.bind_fb_texture.framebuffer = fb;
274
	data.bind_fb_texture.aspect = aspectBit;
275
	curRenderStep_->commands.push_back(data);
276
	curRenderStep_->dependencies.insert(fb);
277
}
278

279
void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {
280
	GLRStep *step = new GLRStep{ GLRStepType::COPY };
281
	step->copy.srcRect = srcRect;
282
	step->copy.dstPos = dstPos;
283
	step->copy.src = src;
284
	step->copy.dst = dst;
285
	step->copy.aspectMask = aspectMask;
286
	step->dependencies.insert(src);
287
	step->tag = tag;
288
	bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
289
	if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
290
		step->dependencies.insert(dst);
291
	steps_.push_back(step);
292
}
293

294
void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {
295
	GLRStep *step = new GLRStep{ GLRStepType::BLIT };
296
	step->blit.srcRect = srcRect;
297
	step->blit.dstRect = dstRect;
298
	step->blit.src = src;
299
	step->blit.dst = dst;
300
	step->blit.aspectMask = aspectMask;
301
	step->blit.filter = filter;
302
	step->dependencies.insert(src);
303
	step->tag = tag;
304
	bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
305
	if (!fillsDst)
306
		step->dependencies.insert(dst);
307
	steps_.push_back(step);
308
}
309

310
bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {
311
	_assert_(pixels);
312

313
	GLRStep *step = new GLRStep{ GLRStepType::READBACK };
314
	step->readback.src = src;
315
	step->readback.srcRect = { x, y, w, h };
316
	step->readback.aspectMask = aspectBits;
317
	step->readback.dstFormat = destFormat;
318
	step->dependencies.insert(src);
319
	step->tag = tag;
320
	steps_.push_back(step);
321

322
	curRenderStep_ = nullptr;
323
	FlushSync();
324

325
	Draw::DataFormat srcFormat;
326
	if (aspectBits & GL_COLOR_BUFFER_BIT) {
327
		srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;
328
	} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {
329
		// Copies from stencil are always S8.
330
		srcFormat = Draw::DataFormat::S8;
331
	} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {
332
		// TODO: Do this properly.
333
		srcFormat = Draw::DataFormat::D24_S8;
334
	} else {
335
		return false;
336
	}
337
	queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);
338
	return true;
339
}
340

341
void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
342
	_assert_(texture);
343
	_assert_(pixels);
344
	GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };
345
	step->readback_image.texture = texture;
346
	step->readback_image.mipLevel = mipLevel;
347
	step->readback_image.srcRect = { x, y, w, h };
348
	step->tag = tag;
349
	steps_.push_back(step);
350

351
	curRenderStep_ = nullptr;
352
	FlushSync();
353

354
	queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);
355
}
356

357
void GLRenderManager::BeginFrame(bool enableProfiling) {
358
#ifdef _DEBUG
359
	curProgram_ = nullptr;
360
#endif
361

362
	// Shouldn't call BeginFrame unless we're in a run state.
363
	_dbg_assert_(runCompileThread_);
364

365
	int curFrame = GetCurFrame();
366

367
	FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);
368
	frameTimeData.frameBegin = time_now_d();
369
	frameTimeData.afterFenceWait = frameTimeData.frameBegin;
370

371
	GLFrameData &frameData = frameData_[curFrame];
372
	frameData.frameId = frameIdGen_;
373
	frameData.profile.enabled = enableProfiling;
374

375
	frameIdGen_++;
376
	{
377
		std::unique_lock<std::mutex> lock(frameData.fenceMutex);
378
		VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());
379
		while (!frameData.readyForFence) {
380
			frameData.fenceCondVar.wait(lock);
381
		}
382
		frameData.readyForFence = false;
383
	}
384

385
	insideFrame_ = true;
386
}
387

388
void GLRenderManager::Finish() {
389
	curRenderStep_ = nullptr;  // EndCurRenderStep is this simple here.
390

391
	int curFrame = curFrame_;
392
	GLFrameData &frameData = frameData_[curFrame];
393

394
	frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();
395

396
	frameData_[curFrame].deleter.Take(deleter_);
397

398
	if (frameData.profile.enabled) {
399
		profilePassesString_ = std::move(frameData.profile.passesString);
400

401
#ifdef _DEBUG
402
		std::string cmdString;
403
		for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {
404
			if (frameData.profile.commandCounts[i] > 0) {
405
				cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);
406
			}
407
		}
408
		memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));
409
		profilePassesString_ = cmdString + profilePassesString_;
410
#endif
411

412
		frameData.profile.passesString.clear();
413
	}
414

415
	VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);
416
	GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);
417
	task->frame = curFrame;
418
	{
419
		std::unique_lock<std::mutex> lock(pushMutex_);
420
		renderThreadQueue_.push(task);
421
		renderThreadQueue_.back()->initSteps = std::move(initSteps_);
422
		renderThreadQueue_.back()->steps = std::move(steps_);
423
		initSteps_.clear();
424
		steps_.clear();
425
		pushCondVar_.notify_one();
426
	}
427
}
428

429
void GLRenderManager::Present() {
430
	GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);
431
	presentTask->frame = curFrame_;
432
	{
433
		std::unique_lock<std::mutex> lock(pushMutex_);
434
		renderThreadQueue_.push(presentTask);
435
		pushCondVar_.notify_one();
436
	}
437

438
	int newCurFrame = curFrame_ + 1;
439
	if (newCurFrame >= inflightFrames_) {
440
		newCurFrame = 0;
441
	}
442
	curFrame_ = newCurFrame;
443

444
	insideFrame_ = false;
445
}
446

447
// Render thread. Returns true if the caller should handle a swap.
448
bool GLRenderManager::Run(GLRRenderThreadTask &task) {
449
	_dbg_assert_(task.frame >= 0);
450

451
	GLFrameData &frameData = frameData_[task.frame];
452

453
	if (task.runType == GLRRunType::PRESENT) {
454
		bool swapRequest = false;
455
		if (!frameData.skipSwap) {
456
			frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();
457
			if (swapIntervalChanged_) {
458
				swapIntervalChanged_ = false;
459
				if (swapIntervalFunction_) {
460
					swapIntervalFunction_(swapInterval_);
461
				}
462
			}
463
			// This is the swapchain framebuffer flip.
464
			if (swapFunction_) {
465
				VLOG("  PULL: SwapFunction()");
466
				swapFunction_();
467
			}
468
			swapRequest = true;
469
		} else {
470
			frameData.skipSwap = false;
471
		}
472
		frameData.hasBegun = false;
473

474
		VLOG("  PULL: Frame %d.readyForFence = true", task.frame);
475

476
		{
477
			std::lock_guard<std::mutex> lock(frameData.fenceMutex);
478
			frameData.readyForFence = true;
479
			frameData.fenceCondVar.notify_one();
480
			// At this point, we're done with this framedata (for now).
481
		}
482
		return swapRequest;
483
	}
484

485
	if (!frameData.hasBegun) {
486
		frameData.hasBegun = true;
487

488
		frameData.deleter_prev.Perform(this, skipGLCalls_);
489
		frameData.deleter_prev.Take(frameData.deleter);
490
	}
491

492
	// queueRunner_.LogSteps(stepsOnThread);
493
	queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);
494

495
	// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
496
	if (!skipGLCalls_) {
497
		for (auto iter : frameData.activePushBuffers) {
498
			iter->Flush();
499
			iter->UnmapDevice();
500
		}
501
	}
502

503
	if (frameData.profile.enabled) {
504
		frameData.profile.cpuStartTime = time_now_d();
505
	}
506

507
	if (IsVREnabled()) {
508
		int passes = GetVRPassesCount();
509
		for (int i = 0; i < passes; i++) {
510
			PreVRFrameRender(i);
511
			queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);
512
			PostVRFrameRender();
513
		}
514
	} else {
515
		queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);
516
	}
517

518
	if (frameData.profile.enabled) {
519
		frameData.profile.cpuEndTime = time_now_d();
520
	}
521

522
	if (!skipGLCalls_) {
523
		for (auto iter : frameData.activePushBuffers) {
524
			iter->MapDevice(bufferStrategy_);
525
		}
526
	}
527

528
	switch (task.runType) {
529
	case GLRRunType::SUBMIT:
530
		break;
531

532
	case GLRRunType::SYNC:
533
		frameData.hasBegun = false;
534

535
		// glFinish is not actually necessary here, and won't be unless we start using
536
		// glBufferStorage. Then we need to use fences.
537
		{
538
			std::lock_guard<std::mutex> lock(syncMutex_);
539
			syncDone_ = true;
540
			syncCondVar_.notify_one();
541
		}
542
		break;
543

544
	default:
545
		_assert_(false);
546
	}
547
	VLOG("  PULL: ::Run(): Done running tasks");
548
	return false;
549
}
550

551
void GLRenderManager::FlushSync() {
552
	{
553
		VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);
554

555
		GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);
556
		task->frame = curFrame_;
557

558
		std::unique_lock<std::mutex> lock(pushMutex_);
559
		renderThreadQueue_.push(task);
560
		renderThreadQueue_.back()->initSteps = std::move(initSteps_);
561
		renderThreadQueue_.back()->steps = std::move(steps_);
562
		pushCondVar_.notify_one();
563
		steps_.clear();
564
	}
565

566
	{
567
		std::unique_lock<std::mutex> lock(syncMutex_);
568
		// Wait for the flush to be hit, since we're syncing.
569
		while (!syncDone_) {
570
			VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);
571
			syncCondVar_.wait(lock);
572
		}
573
		syncDone_ = false;
574
	}
575
}
576

577
Product

Resources

Company