Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
3189 views
#include "ppsspp_config.h"1#include "GLRenderManager.h"2#include "Common/GPU/OpenGL/GLFeatures.h"3#include "Common/GPU/thin3d.h"4#include "Common/Thread/ThreadUtil.h"5#include "Common/VR/PPSSPPVR.h"67#include "Common/Log.h"8#include "Common/TimeUtil.h"9#include "Common/MemoryUtil.h"10#include "Common/StringUtils.h"11#include "Common/Math/math_util.h"1213#if 0 // def _DEBUG14#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)15#else16#define VLOG(...)17#endif1819std::thread::id renderThreadId;2021GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {22if (caps.textureNPOTFullySupported) {23canWrap = true;24} else {25canWrap = isPowerOf2(width) && isPowerOf2(height);26}27w = width;28h = height;29d = depth;30this->numMips = numMips;31}3233GLRTexture::~GLRTexture() {34if (texture) {35glDeleteTextures(1, &texture);36}37}3839GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {40// size_t sz = sizeof(GLRRenderData);41// _dbg_assert_(sz == 88);42}4344GLRenderManager::~GLRenderManager() {45_dbg_assert_(!runCompileThread_);4647for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {48_assert_(frameData_[i].deleter.IsEmpty());49_assert_(frameData_[i].deleter_prev.IsEmpty());50}51// Was anything deleted during shutdown?52deleter_.Perform(this, skipGLCalls_);53_assert_(deleter_.IsEmpty());54}5556void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {57queueRunner_.CreateDeviceObjects();58renderThreadId = std::this_thread::get_id();5960if (newInflightFrames_ != -1) {61INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);62inflightFrames_ = newInflightFrames_;63newInflightFrames_ = -1;64}6566// Don't save draw, we don't want any thread safety confusion.67bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);68bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;69if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {70// Force disable if it wouldn't work anyway.71mapBuffers = false;72}7374// Notes on buffer mapping:75// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.76// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.77if (mapBuffers) {78switch (gl_extensions.gpuVendor) {79case GPU_VENDOR_NVIDIA:80bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;81break;8283// Temporarily disabled because it doesn't work with task switching on Android.84// The mapped buffer seems to just be pulled out like a rug from under us, crashing85// as soon as any write happens, which can happen during shutdown since we write from the86// Emu thread which may not yet have shut down. There may be solutions to this, but for now,87// disable this strategy to avoid crashing.88//case GPU_VENDOR_QUALCOMM:89// bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;90// break;9192default:93bufferStrategy_ = GLBufferStrategy::SUBDATA;94}95} else {96bufferStrategy_ = GLBufferStrategy::SUBDATA;97}98}99100void GLRenderManager::ThreadEnd() {101INFO_LOG(Log::G3D, "ThreadEnd");102103queueRunner_.DestroyDeviceObjects();104VLOG(" PULL: Quitting");105106// Good time to run all the deleters to get rid of leftover objects.107for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {108// Since we're in shutdown, we should skip the GL calls on Android.109frameData_[i].deleter.Perform(this, skipGLCalls_);110frameData_[i].deleter_prev.Perform(this, skipGLCalls_);111}112deleter_.Perform(this, skipGLCalls_);113for (int i = 0; i < (int)steps_.size(); i++) {114delete steps_[i];115}116steps_.clear();117initSteps_.clear();118}119120// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.121//122// This means that we have to block and run the render queue until we've presented one frame,123// at which point we can leave.124//125// NOTE: If run_ is true, we WILL run a task!126bool GLRenderManager::ThreadFrame() {127if (!runCompileThread_) {128return false;129}130131GLRRenderThreadTask *task = nullptr;132133// In case of syncs or other partial completion, we keep going until we complete a frame.134while (true) {135// Pop a task of the queue and execute it.136// NOTE: We need to actually wait for a task, we can't just bail!137{138std::unique_lock<std::mutex> lock(pushMutex_);139while (renderThreadQueue_.empty()) {140pushCondVar_.wait(lock);141}142task = std::move(renderThreadQueue_.front());143renderThreadQueue_.pop();144}145146// We got a task! We can now have pushMutex_ unlocked, allowing the host to147// push more work when it feels like it, and just start working.148if (task->runType == GLRRunType::EXIT) {149delete task;150// Oh, host wanted out. Let's leave, and also let's notify the host.151// This is unlike Vulkan too which can just block on the thread existing.152std::unique_lock<std::mutex> lock(syncMutex_);153syncCondVar_.notify_one();154syncDone_ = true;155break;156}157158// Render the scene.159VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());160if (Run(*task)) {161// Swap requested, so we just bail the loop.162delete task;163break;164}165delete task;166};167168return true;169}170171void GLRenderManager::StopThread() {172// There's not really a lot to do here anymore.173INFO_LOG(Log::G3D, "GLRenderManager::StopThread()");174if (runCompileThread_) {175runCompileThread_ = false;176177std::unique_lock<std::mutex> lock(pushMutex_);178renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));179pushCondVar_.notify_one();180} else {181WARN_LOG(Log::G3D, "GL submission thread was already paused.");182}183}184185void GLRenderManager::StartThread() {186// There's not really a lot to do here anymore.187INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");188if (!runCompileThread_) {189runCompileThread_ = true;190} else {191INFO_LOG(Log::G3D, "GL submission thread was already running.");192}193}194195std::string GLRenderManager::GetGpuProfileString() const {196int curFrame = curFrame_;197const GLQueueProfileContext &profile = frameData_[curFrame].profile;198199float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);200return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());201}202203void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {204_assert_(insideFrame_);205#ifdef _DEBUG206curProgram_ = nullptr;207#endif208209// Eliminate dupes.210if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {211if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {212// We don't move to a new step, this bind was unnecessary and we can safely skip it.213curRenderStep_ = steps_.back();214return;215}216}217if (curRenderStep_ && curRenderStep_->commands.size() == 0) {218VLOG("Empty render step. Usually happens after uploading pixels.");219}220221GLRStep *step = new GLRStep{ GLRStepType::RENDER };222// This is what queues up new passes, and can end previous ones.223step->render.framebuffer = fb;224step->render.color = color;225step->render.depth = depth;226step->render.stencil = stencil;227step->tag = tag;228steps_.push_back(step);229230GLuint clearMask = 0;231GLRRenderData data(GLRRenderCommand::CLEAR);232if (color == GLRRenderPassAction::CLEAR) {233clearMask |= GL_COLOR_BUFFER_BIT;234data.clear.clearColor = clearColor;235}236if (depth == GLRRenderPassAction::CLEAR) {237clearMask |= GL_DEPTH_BUFFER_BIT;238data.clear.clearZ = clearDepth;239}240if (stencil == GLRRenderPassAction::CLEAR) {241clearMask |= GL_STENCIL_BUFFER_BIT;242data.clear.clearStencil = clearStencil;243}244if (clearMask) {245data.clear.scissorX = 0;246data.clear.scissorY = 0;247data.clear.scissorW = 0;248data.clear.scissorH = 0;249data.clear.clearMask = clearMask;250data.clear.colorMask = 0xF;251step->commands.push_back(data);252}253curRenderStep_ = step;254255if (fb) {256if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {257step->dependencies.insert(fb);258}259}260261if (invalidationCallback_) {262invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);263}264}265266// aspectBit: GL_COLOR_BUFFER_BIT etc267void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {268_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);269_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);270GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };271data.bind_fb_texture.slot = binding;272data.bind_fb_texture.framebuffer = fb;273data.bind_fb_texture.aspect = aspectBit;274curRenderStep_->commands.push_back(data);275curRenderStep_->dependencies.insert(fb);276}277278void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {279GLRStep *step = new GLRStep{ GLRStepType::COPY };280step->copy.srcRect = srcRect;281step->copy.dstPos = dstPos;282step->copy.src = src;283step->copy.dst = dst;284step->copy.aspectMask = aspectMask;285step->dependencies.insert(src);286step->tag = tag;287bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;288if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)289step->dependencies.insert(dst);290steps_.push_back(step);291}292293void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {294GLRStep *step = new GLRStep{ GLRStepType::BLIT };295step->blit.srcRect = srcRect;296step->blit.dstRect = dstRect;297step->blit.src = src;298step->blit.dst = dst;299step->blit.aspectMask = aspectMask;300step->blit.filter = filter;301step->dependencies.insert(src);302step->tag = tag;303bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;304if (!fillsDst)305step->dependencies.insert(dst);306steps_.push_back(step);307}308309bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {310_assert_(pixels);311312GLRStep *step = new GLRStep{ GLRStepType::READBACK };313step->readback.src = src;314step->readback.srcRect = { x, y, w, h };315step->readback.aspectMask = aspectBits;316step->readback.dstFormat = destFormat;317step->dependencies.insert(src);318step->tag = tag;319steps_.push_back(step);320321curRenderStep_ = nullptr;322FlushSync();323324Draw::DataFormat srcFormat;325if (aspectBits & GL_COLOR_BUFFER_BIT) {326srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;327} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {328// Copies from stencil are always S8.329srcFormat = Draw::DataFormat::S8;330} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {331// TODO: Do this properly.332srcFormat = Draw::DataFormat::D24_S8;333} else {334return false;335}336queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);337return true;338}339340void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {341_assert_(texture);342_assert_(pixels);343GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };344step->readback_image.texture = texture;345step->readback_image.mipLevel = mipLevel;346step->readback_image.srcRect = { x, y, w, h };347step->tag = tag;348steps_.push_back(step);349350curRenderStep_ = nullptr;351FlushSync();352353queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);354}355356void GLRenderManager::BeginFrame(bool enableProfiling) {357#ifdef _DEBUG358curProgram_ = nullptr;359#endif360361// Shouldn't call BeginFrame unless we're in a run state.362_dbg_assert_(runCompileThread_);363364int curFrame = GetCurFrame();365366FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);367frameTimeData.frameBegin = time_now_d();368frameTimeData.afterFenceWait = frameTimeData.frameBegin;369370GLFrameData &frameData = frameData_[curFrame];371frameData.frameId = frameIdGen_;372frameData.profile.enabled = enableProfiling;373374frameIdGen_++;375{376std::unique_lock<std::mutex> lock(frameData.fenceMutex);377VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());378while (!frameData.readyForFence) {379frameData.fenceCondVar.wait(lock);380}381frameData.readyForFence = false;382}383384insideFrame_ = true;385}386387void GLRenderManager::Finish() {388curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.389390int curFrame = curFrame_;391GLFrameData &frameData = frameData_[curFrame];392393frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();394395frameData_[curFrame].deleter.Take(deleter_);396397if (frameData.profile.enabled) {398profilePassesString_ = std::move(frameData.profile.passesString);399400#ifdef _DEBUG401std::string cmdString;402for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {403if (frameData.profile.commandCounts[i] > 0) {404cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);405}406}407memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));408profilePassesString_ = cmdString + profilePassesString_;409#endif410411frameData.profile.passesString.clear();412}413414VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);415GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);416task->frame = curFrame;417{418std::unique_lock<std::mutex> lock(pushMutex_);419renderThreadQueue_.push(task);420renderThreadQueue_.back()->initSteps = std::move(initSteps_);421renderThreadQueue_.back()->steps = std::move(steps_);422initSteps_.clear();423steps_.clear();424pushCondVar_.notify_one();425}426}427428void GLRenderManager::Present() {429GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);430presentTask->frame = curFrame_;431{432std::unique_lock<std::mutex> lock(pushMutex_);433renderThreadQueue_.push(presentTask);434pushCondVar_.notify_one();435}436437int newCurFrame = curFrame_ + 1;438if (newCurFrame >= inflightFrames_) {439newCurFrame = 0;440}441curFrame_ = newCurFrame;442443insideFrame_ = false;444}445446// Render thread. Returns true if the caller should handle a swap.447bool GLRenderManager::Run(GLRRenderThreadTask &task) {448_dbg_assert_(task.frame >= 0);449450GLFrameData &frameData = frameData_[task.frame];451452if (task.runType == GLRRunType::PRESENT) {453bool swapRequest = false;454if (!frameData.skipSwap) {455frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();456if (swapIntervalChanged_) {457swapIntervalChanged_ = false;458if (swapIntervalFunction_) {459swapIntervalFunction_(swapInterval_);460}461}462// This is the swapchain framebuffer flip.463if (swapFunction_) {464VLOG(" PULL: SwapFunction()");465swapFunction_();466}467swapRequest = true;468} else {469frameData.skipSwap = false;470}471frameData.hasBegun = false;472473VLOG(" PULL: Frame %d.readyForFence = true", task.frame);474475{476std::lock_guard<std::mutex> lock(frameData.fenceMutex);477frameData.readyForFence = true;478frameData.fenceCondVar.notify_one();479// At this point, we're done with this framedata (for now).480}481return swapRequest;482}483484if (!frameData.hasBegun) {485frameData.hasBegun = true;486487frameData.deleter_prev.Perform(this, skipGLCalls_);488frameData.deleter_prev.Take(frameData.deleter);489}490491// queueRunner_.LogSteps(stepsOnThread);492queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);493494// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.495if (!skipGLCalls_) {496for (auto iter : frameData.activePushBuffers) {497iter->Flush();498iter->UnmapDevice();499}500}501502if (frameData.profile.enabled) {503frameData.profile.cpuStartTime = time_now_d();504}505506if (IsVREnabled()) {507int passes = GetVRPassesCount();508for (int i = 0; i < passes; i++) {509PreVRFrameRender(i);510queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);511PostVRFrameRender();512}513} else {514queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);515}516517if (frameData.profile.enabled) {518frameData.profile.cpuEndTime = time_now_d();519}520521if (!skipGLCalls_) {522for (auto iter : frameData.activePushBuffers) {523iter->MapDevice(bufferStrategy_);524}525}526527switch (task.runType) {528case GLRRunType::SUBMIT:529break;530531case GLRRunType::SYNC:532frameData.hasBegun = false;533534// glFinish is not actually necessary here, and won't be unless we start using535// glBufferStorage. Then we need to use fences.536{537std::lock_guard<std::mutex> lock(syncMutex_);538syncDone_ = true;539syncCondVar_.notify_one();540}541break;542543default:544_assert_(false);545}546VLOG(" PULL: ::Run(): Done running tasks");547return false;548}549550void GLRenderManager::FlushSync() {551{552VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);553554GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);555task->frame = curFrame_;556557std::unique_lock<std::mutex> lock(pushMutex_);558renderThreadQueue_.push(task);559renderThreadQueue_.back()->initSteps = std::move(initSteps_);560renderThreadQueue_.back()->steps = std::move(steps_);561pushCondVar_.notify_one();562steps_.clear();563}564565{566std::unique_lock<std::mutex> lock(syncMutex_);567// Wait for the flush to be hit, since we're syncing.568while (!syncDone_) {569VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);570syncCondVar_.wait(lock);571}572syncDone_ = false;573}574}575576577