Path: blob/master/Common/GPU/Vulkan/VulkanRenderManager.cpp
3187 views
#include <cstdint>12#include <map>3#include <sstream>45#include "Common/Log.h"6#include "Common/StringUtils.h"7#include "Common/TimeUtil.h"89#include "Common/GPU/Vulkan/VulkanAlloc.h"10#include "Common/GPU/Vulkan/VulkanContext.h"11#include "Common/GPU/Vulkan/VulkanRenderManager.h"1213#include "Common/LogReporting.h"14#include "Common/Thread/ThreadUtil.h"1516#if 0 // def _DEBUG17#define VLOG(...) NOTICE_LOG(Log::G3D, __VA_ARGS__)18#else19#define VLOG(...)20#endif2122#ifndef UINT64_MAX23#define UINT64_MAX 0xFFFFFFFFFFFFFFFFULL24#endif2526using namespace PPSSPP_VK;2728// renderPass is an example of the "compatibility class" or RenderPassType type.29bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {30// Good torture test to test the shutdown-while-precompiling-shaders issue on PC where it's normally31// hard to catch because shaders compile so fast.32// sleep_ms(200);3334bool multisample = RenderPassTypeHasMultisample(rpType);35if (multisample) {36if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {37_assert_(sampleCount == sampleCount_);38} else {39sampleCount_ = sampleCount;40}41}4243// Sanity check.44// Seen in crash reports from PowerVR GE8320, presumably we failed creating some shader modules.45if (!desc->vertexShader || !desc->fragmentShader) {46ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - missing vs/fs shader module pointers!");47pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);48return false;49}5051// Fill in the last part of the desc since now it's time to block.52VkShaderModule vs = desc->vertexShader->BlockUntilReady();53VkShaderModule fs = desc->fragmentShader->BlockUntilReady();54VkShaderModule gs = desc->geometryShader ? desc->geometryShader->BlockUntilReady() : VK_NULL_HANDLE;5556if (!vs || !fs || (!gs && desc->geometryShader)) {57ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - missing shader modules");58pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);59return false;60}6162if (!compatibleRenderPass) {63ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - compatible render pass was nullptr");64pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);65return false;66}6768uint32_t stageCount = 2;69VkPipelineShaderStageCreateInfo ss[3]{};70ss[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;71ss[0].stage = VK_SHADER_STAGE_VERTEX_BIT;72ss[0].pSpecializationInfo = nullptr;73ss[0].module = vs;74ss[0].pName = "main";75ss[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;76ss[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;77ss[1].pSpecializationInfo = nullptr;78ss[1].module = fs;79ss[1].pName = "main";80if (gs) {81stageCount++;82ss[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;83ss[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT;84ss[2].pSpecializationInfo = nullptr;85ss[2].module = gs;86ss[2].pName = "main";87}8889VkGraphicsPipelineCreateInfo pipe{ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO };90pipe.pStages = ss;91pipe.stageCount = stageCount;92pipe.renderPass = compatibleRenderPass;93pipe.basePipelineIndex = 0;94pipe.pColorBlendState = &desc->cbs;95pipe.pDepthStencilState = &desc->dss;96pipe.pRasterizationState = &desc->rs;9798VkPipelineMultisampleStateCreateInfo ms{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO };99ms.rasterizationSamples = multisample ? sampleCount : VK_SAMPLE_COUNT_1_BIT;100if (multisample && (flags_ & PipelineFlags::USES_DISCARD)) {101// Extreme quality102ms.sampleShadingEnable = true;103ms.minSampleShading = 1.0f;104}105106VkPipelineInputAssemblyStateCreateInfo inputAssembly{ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };107inputAssembly.topology = desc->topology;108109// We will use dynamic viewport state.110pipe.pVertexInputState = &desc->vis;111pipe.pViewportState = &desc->views;112pipe.pTessellationState = nullptr;113pipe.pDynamicState = &desc->ds;114pipe.pInputAssemblyState = &inputAssembly;115pipe.pMultisampleState = &ms;116pipe.layout = desc->pipelineLayout->pipelineLayout;117pipe.basePipelineHandle = VK_NULL_HANDLE;118pipe.basePipelineIndex = 0;119pipe.subpass = 0;120121double start = time_now_d();122VkPipeline vkpipeline;123VkResult result = vkCreateGraphicsPipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &pipe, nullptr, &vkpipeline);124125double now = time_now_d();126double taken_ms_since_scheduling = (now - scheduleTime) * 1000.0;127double taken_ms = (now - start) * 1000.0;128129#ifndef _DEBUG130if (taken_ms < 0.1) {131DEBUG_LOG(Log::G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling (fast) rpType: %04x sampleBits: %d (%s)",132countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());133} else {134INFO_LOG(Log::G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling rpType: %04x sampleBits: %d (%s)",135countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());136}137#endif138139bool success = true;140if (result == VK_INCOMPLETE) {141// Bad (disallowed by spec) return value seen on Adreno in Burnout :( Try to ignore?142// Would really like to log more here, we could probably attach more info to desc.143//144// At least create a null placeholder to avoid creating over and over if something is broken.145pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);146ERROR_LOG(Log::G3D, "Failed creating graphics pipeline! VK_INCOMPLETE");147LogCreationFailure();148success = false;149} else if (result != VK_SUCCESS) {150pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);151ERROR_LOG(Log::G3D, "Failed creating graphics pipeline! result='%s'", VulkanResultToString(result));152LogCreationFailure();153success = false;154} else {155// Success!156if (!tag_.empty()) {157vulkan->SetDebugName(vkpipeline, VK_OBJECT_TYPE_PIPELINE, tag_.c_str());158}159pipeline[(size_t)rpType]->Post(vkpipeline);160}161162return success;163}164165void VKRGraphicsPipeline::DestroyVariants(VulkanContext *vulkan, bool msaaOnly) {166for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {167if (!this->pipeline[i])168continue;169if (msaaOnly && (i & (int)RenderPassType::MULTISAMPLE) == 0)170continue;171172VkPipeline pipeline = this->pipeline[i]->BlockUntilReady();173// pipeline can be nullptr here, if it failed to compile before.174if (pipeline) {175vulkan->Delete().QueueDeletePipeline(pipeline);176}177this->pipeline[i] = nullptr;178}179sampleCount_ = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;180}181182void VKRGraphicsPipeline::DestroyVariantsInstant(VkDevice device) {183for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {184if (pipeline[i]) {185vkDestroyPipeline(device, pipeline[i]->BlockUntilReady(), nullptr);186delete pipeline[i];187pipeline[i] = nullptr;188}189}190}191192VKRGraphicsPipeline::~VKRGraphicsPipeline() {193// This is called from the callbacked queued in QueueForDeletion.194// When we reach here, we should already be empty, so let's assert on that.195for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {196_assert_(!pipeline[i]);197}198if (desc)199desc->Release();200}201202void VKRGraphicsPipeline::BlockUntilCompiled() {203for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {204if (pipeline[i]) {205pipeline[i]->BlockUntilReady();206}207}208}209210void VKRGraphicsPipeline::QueueForDeletion(VulkanContext *vulkan) {211// Can't destroy variants here, the pipeline still lives for a while.212vulkan->Delete().QueueCallback([](VulkanContext *vulkan, void *p) {213VKRGraphicsPipeline *pipeline = (VKRGraphicsPipeline *)p;214pipeline->DestroyVariantsInstant(vulkan->GetDevice());215delete pipeline;216}, this);217}218219u32 VKRGraphicsPipeline::GetVariantsBitmask() const {220u32 bitmask = 0;221for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {222if (pipeline[i]) {223bitmask |= 1 << i;224}225}226return bitmask;227}228229void VKRGraphicsPipeline::LogCreationFailure() const {230ERROR_LOG(Log::G3D, "vs: %s\n[END VS]", desc->vertexShaderSource.c_str());231ERROR_LOG(Log::G3D, "fs: %s\n[END FS]", desc->fragmentShaderSource.c_str());232if (desc->geometryShader) {233ERROR_LOG(Log::G3D, "gs: %s\n[END GS]", desc->geometryShaderSource.c_str());234}235// TODO: Maybe log various other state?236ERROR_LOG(Log::G3D, "======== END OF PIPELINE ==========");237}238239struct SinglePipelineTask {240VKRGraphicsPipeline *pipeline;241VkRenderPass compatibleRenderPass;242RenderPassType rpType;243VkSampleCountFlagBits sampleCount;244double scheduleTime;245int countToCompile;246};247248class CreateMultiPipelinesTask : public Task {249public:250CreateMultiPipelinesTask(VulkanContext *vulkan, std::vector<SinglePipelineTask> tasks) : vulkan_(vulkan), tasks_(std::move(tasks)) {251tasksInFlight_.fetch_add(1);252}253~CreateMultiPipelinesTask() = default;254255TaskType Type() const override {256return TaskType::CPU_COMPUTE;257}258259TaskPriority Priority() const override {260return TaskPriority::HIGH;261}262263void Run() override {264for (auto &task : tasks_) {265task.pipeline->Create(vulkan_, task.compatibleRenderPass, task.rpType, task.sampleCount, task.scheduleTime, task.countToCompile);266}267tasksInFlight_.fetch_sub(1);268}269270VulkanContext *vulkan_;271std::vector<SinglePipelineTask> tasks_;272273// Use during shutdown to make sure there aren't any leftover tasks sitting queued.274// Could probably be done more elegantly. Like waiting for all tasks of a type, or saving pointers to them, or something...275// Returns the maximum value of tasks in flight seen during the wait.276static int WaitForAll();277static std::atomic<int> tasksInFlight_;278};279280int CreateMultiPipelinesTask::WaitForAll() {281int inFlight = 0;282int maxInFlight = 0;283while ((inFlight = tasksInFlight_.load()) > 0) {284if (inFlight > maxInFlight) {285maxInFlight = inFlight;286}287sleep_ms(2, "create-multi-pipelines-wait");288}289return maxInFlight;290}291292std::atomic<int> CreateMultiPipelinesTask::tasksInFlight_;293294VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan, bool useThread, HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory)295: vulkan_(vulkan), queueRunner_(vulkan),296initTimeMs_("initTimeMs"),297totalGPUTimeMs_("totalGPUTimeMs"),298renderCPUTimeMs_("renderCPUTimeMs"),299descUpdateTimeMs_("descUpdateCPUTimeMs"),300useRenderThread_(useThread),301frameTimeHistory_(frameTimeHistory)302{303inflightFramesAtStart_ = vulkan_->GetInflightFrames();304305// For present timing experiments. Disabled for now.306measurePresentTime_ = false;307308frameDataShared_.Init(vulkan, useThread, measurePresentTime_);309310for (int i = 0; i < inflightFramesAtStart_; i++) {311frameData_[i].Init(vulkan, i);312}313314queueRunner_.CreateDeviceObjects();315}316317bool VulkanRenderManager::CreateBackbuffers() {318if (!vulkan_->IsSwapchainInited()) {319ERROR_LOG(Log::G3D, "No swapchain - can't create backbuffers");320return false;321}322323VkCommandBuffer cmdInit = GetInitCmd();324325if (vulkan_->HasRealSwapchain()) {326if (!CreateSwapchainViewsAndDepth(cmdInit, &postInitBarrier_, frameDataShared_)) {327return false;328}329}330331curWidthRaw_ = -1;332curHeightRaw_ = -1;333334if (newInflightFrames_ != -1) {335INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);336vulkan_->UpdateInflightFrames(newInflightFrames_);337newInflightFrames_ = -1;338}339340outOfDateFrames_ = 0;341342for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {343auto &frameData = frameData_[i];344frameData.readyForFence = true; // Just in case.345}346347// Start the thread(s).348StartThreads();349return true;350}351352bool VulkanRenderManager::CreateSwapchainViewsAndDepth(VkCommandBuffer cmdInit, VulkanBarrierBatch *barriers, FrameDataShared &frameDataShared) {353VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &frameDataShared.swapchainImageCount_, nullptr);354_dbg_assert_(res == VK_SUCCESS);355356VkImage *swapchainImages = new VkImage[frameDataShared.swapchainImageCount_];357res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &frameDataShared.swapchainImageCount_, swapchainImages);358if (res != VK_SUCCESS) {359ERROR_LOG(Log::G3D, "vkGetSwapchainImagesKHR failed");360delete[] swapchainImages;361return false;362}363364static const VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };365for (uint32_t i = 0; i < frameDataShared.swapchainImageCount_; i++) {366SwapchainImageData sc_buffer{};367sc_buffer.image = swapchainImages[i];368res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &sc_buffer.renderingCompleteSemaphore);369_dbg_assert_(res == VK_SUCCESS);370371VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };372color_image_view.format = vulkan_->GetSwapchainFormat();373color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;374color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;375color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;376color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;377color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;378color_image_view.subresourceRange.baseMipLevel = 0;379color_image_view.subresourceRange.levelCount = 1;380color_image_view.subresourceRange.baseArrayLayer = 0;381color_image_view.subresourceRange.layerCount = 1; // TODO: Investigate hw-assisted stereo.382color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;383color_image_view.flags = 0;384color_image_view.image = sc_buffer.image;385386// We leave the images as UNDEFINED, there's no need to pre-transition them as387// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.388// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.389390res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);391vulkan_->SetDebugName(sc_buffer.view, VK_OBJECT_TYPE_IMAGE_VIEW, "swapchain_view");392frameDataShared.swapchainImages_.push_back(sc_buffer);393_dbg_assert_(res == VK_SUCCESS);394}395delete[] swapchainImages;396397// Must be before InitBackbufferRenderPass.398if (queueRunner_.InitDepthStencilBuffer(cmdInit, barriers)) {399queueRunner_.InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight(), frameDataShared);400}401return true;402}403404void VulkanRenderManager::StartThreads() {405{406std::unique_lock<std::mutex> lock(compileQueueMutex_);407_assert_(compileQueue_.empty());408}409410runCompileThread_ = true; // For controlling the compiler thread's exit411412if (useRenderThread_) {413INFO_LOG(Log::G3D, "Starting Vulkan submission thread");414renderThread_ = std::thread(&VulkanRenderManager::RenderThreadFunc, this);415}416INFO_LOG(Log::G3D, "Starting Vulkan compiler thread");417compileThread_ = std::thread(&VulkanRenderManager::CompileThreadFunc, this);418419if (measurePresentTime_ && vulkan_->Extensions().KHR_present_wait && vulkan_->GetPresentMode() == VK_PRESENT_MODE_FIFO_KHR) {420INFO_LOG(Log::G3D, "Starting Vulkan present wait thread");421presentWaitThread_ = std::thread(&VulkanRenderManager::PresentWaitThreadFunc, this);422}423}424425// Called from main thread.426void VulkanRenderManager::StopThreads() {427// Make sure we don't have an open non-backbuffer render pass428if (curRenderStep_ && curRenderStep_->render.framebuffer != nullptr) {429EndCurRenderStep();430}431// Not sure this is a sensible check - should be ok even if not.432// _dbg_assert_(steps_.empty());433434if (useRenderThread_) {435_dbg_assert_(renderThread_.joinable());436// Tell the render thread to quit when it's done.437VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT);438task->frame = vulkan_->GetCurFrame();439{440std::unique_lock<std::mutex> lock(pushMutex_);441renderThreadQueue_.push(task);442}443pushCondVar_.notify_one();444// Once the render thread encounters the above exit task, it'll exit.445renderThread_.join();446INFO_LOG(Log::G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());447}448449for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {450auto &frameData = frameData_[i];451// Zero the queries so we don't try to pull them later.452frameData.profile.timestampDescriptions.clear();453}454455{456std::unique_lock<std::mutex> lock(compileQueueMutex_);457runCompileThread_ = false; // Compiler and present thread both look at this bool.458_assert_(compileThread_.joinable());459compileCond_.notify_one();460}461compileThread_.join();462463if (presentWaitThread_.joinable()) {464presentWaitThread_.join();465}466467INFO_LOG(Log::G3D, "Vulkan compiler thread joined. Now wait for any straggling compile tasks. runCompileThread_ = %d", (int)runCompileThread_);468CreateMultiPipelinesTask::WaitForAll();469470{471std::unique_lock<std::mutex> lock(compileQueueMutex_);472_assert_(compileQueue_.empty());473}474}475476void VulkanRenderManager::DestroyBackbuffers() {477StopThreads();478vulkan_->WaitUntilQueueIdle();479480for (auto &image : frameDataShared_.swapchainImages_) {481vulkan_->Delete().QueueDeleteImageView(image.view);482vkDestroySemaphore(vulkan_->GetDevice(), image.renderingCompleteSemaphore, nullptr);483}484frameDataShared_.swapchainImages_.clear();485frameDataShared_.swapchainImageCount_ = 0;486487queueRunner_.DestroyBackBuffers();488}489490// Hm, I'm finding the occasional report of these asserts.491void VulkanRenderManager::CheckNothingPending() {492_assert_(pipelinesToCheck_.empty());493{494std::unique_lock<std::mutex> lock(compileQueueMutex_);495_assert_(compileQueue_.empty());496}497}498499VulkanRenderManager::~VulkanRenderManager() {500INFO_LOG(Log::G3D, "VulkanRenderManager destructor");501502{503std::unique_lock<std::mutex> lock(compileQueueMutex_);504_assert_(compileQueue_.empty());505}506507if (useRenderThread_) {508_dbg_assert_(!renderThread_.joinable());509}510511_dbg_assert_(!runCompileThread_); // StopThread should already have been called from DestroyBackbuffers.512513vulkan_->WaitUntilQueueIdle();514515_dbg_assert_(pipelineLayouts_.empty());516517VkDevice device = vulkan_->GetDevice();518frameDataShared_.Destroy(vulkan_);519for (int i = 0; i < inflightFramesAtStart_; i++) {520frameData_[i].Destroy(vulkan_);521}522queueRunner_.DestroyDeviceObjects();523}524525void VulkanRenderManager::CompileThreadFunc() {526SetCurrentThreadName("ShaderCompile");527while (true) {528bool exitAfterCompile = false;529std::vector<CompileQueueEntry> toCompile;530{531std::unique_lock<std::mutex> lock(compileQueueMutex_);532while (compileQueue_.empty() && runCompileThread_) {533compileCond_.wait(lock);534}535toCompile = std::move(compileQueue_);536compileQueue_.clear();537if (!runCompileThread_) {538exitAfterCompile = true;539}540}541542int countToCompile = (int)toCompile.size();543544// Here we sort the pending pipelines by vertex and fragment shaders,545std::map<std::pair<Promise<VkShaderModule> *, Promise<VkShaderModule> *>, std::vector<SinglePipelineTask>> map;546547double scheduleTime = time_now_d();548549// Here we sort pending graphics pipelines by vertex and fragment shaders, and split up further.550// Those with the same pairs of shaders should be on the same thread, at least on NVIDIA.551// I don't think PowerVR cares though, it doesn't seem to reuse information between the compiles,552// so we might want a different splitting algorithm there.553for (auto &entry : toCompile) {554switch (entry.type) {555case CompileQueueEntry::Type::GRAPHICS:556{557map[std::make_pair(entry.graphics->desc->vertexShader, entry.graphics->desc->fragmentShader)].push_back(558SinglePipelineTask{559entry.graphics,560entry.compatibleRenderPass,561entry.renderPassType,562entry.sampleCount,563scheduleTime, // these two are for logging purposes.564countToCompile,565}566);567break;568}569}570}571572for (const auto &iter : map) {573auto &shaders = iter.first;574auto &entries = iter.second;575576// NOTICE_LOG(Log::G3D, "For this shader pair, we have %d pipelines to create", (int)entries.size());577578Task *task = new CreateMultiPipelinesTask(vulkan_, entries);579g_threadManager.EnqueueTask(task);580}581582if (exitAfterCompile) {583break;584}585586// Hold off just a bit before we check again, to allow bunches of pipelines to collect.587sleep_ms(1, "pipeline-collect");588}589590std::unique_lock<std::mutex> lock(compileQueueMutex_);591_assert_(compileQueue_.empty());592}593594void VulkanRenderManager::RenderThreadFunc() {595SetCurrentThreadName("VulkanRenderMan");596while (true) {597_dbg_assert_(useRenderThread_);598599// Pop a task of the queue and execute it.600VKRRenderThreadTask *task = nullptr;601{602std::unique_lock<std::mutex> lock(pushMutex_);603while (renderThreadQueue_.empty()) {604pushCondVar_.wait(lock);605}606task = renderThreadQueue_.front();607renderThreadQueue_.pop();608}609610// Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to611// push more work when it feels like it, and just start working.612if (task->runType == VKRRunType::EXIT) {613// Oh, host wanted out. Let's leave.614delete task;615// In this case, there should be no more tasks.616break;617}618619Run(*task);620delete task;621}622623// Wait for the device to be done with everything, before tearing stuff down.624// TODO: Do we really need this? It's probably a good idea, though.625vkDeviceWaitIdle(vulkan_->GetDevice());626VLOG("PULL: Quitting");627}628629void VulkanRenderManager::PresentWaitThreadFunc() {630SetCurrentThreadName("PresentWait");631632#if !PPSSPP_PLATFORM(IOS_APP_STORE)633_dbg_assert_(vkWaitForPresentKHR != nullptr);634635uint64_t waitedId = frameIdGen_;636while (runCompileThread_) {637const uint64_t timeout = 1000000000ULL; // 1 sec638if (VK_SUCCESS == vkWaitForPresentKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), waitedId, timeout)) {639frameTimeHistory_[waitedId].actualPresent = time_now_d();640frameTimeHistory_[waitedId].waitCount++;641waitedId++;642} else {643// We caught up somehow, which is a bad sign (we should have blocked, right?). Maybe we should break out of the loop?644sleep_ms(1, "present-wait-problem");645frameTimeHistory_[waitedId].waitCount++;646}647_dbg_assert_(waitedId <= frameIdGen_);648}649#endif650651INFO_LOG(Log::G3D, "Leaving PresentWaitThreadFunc()");652}653654void VulkanRenderManager::PollPresentTiming() {655// For VK_GOOGLE_display_timing, we need to poll.656657// Poll for information about completed frames.658// NOTE: We seem to get the information pretty late! Like after 6 frames, which is quite weird.659// Tested on POCO F4.660// TODO: Getting validation errors that this should be called from the thread doing the presenting.661// Probably a fair point. For now, we turn it off.662if (measurePresentTime_ && vulkan_->Extensions().GOOGLE_display_timing) {663uint32_t count = 0;664vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, nullptr);665if (count > 0) {666VkPastPresentationTimingGOOGLE *timings = new VkPastPresentationTimingGOOGLE[count];667vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, timings);668for (uint32_t i = 0; i < count; i++) {669uint64_t presentId = timings[i].presentID;670frameTimeHistory_[presentId].actualPresent = from_time_raw(timings[i].actualPresentTime);671frameTimeHistory_[presentId].desiredPresentTime = from_time_raw(timings[i].desiredPresentTime);672frameTimeHistory_[presentId].earliestPresentTime = from_time_raw(timings[i].earliestPresentTime);673double presentMargin = from_time_raw_relative(timings[i].presentMargin);674frameTimeHistory_[presentId].presentMargin = presentMargin;675}676delete[] timings;677}678}679}680681void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) {682double frameBeginTime = time_now_d()683VLOG("BeginFrame");684VkDevice device = vulkan_->GetDevice();685686int curFrame = vulkan_->GetCurFrame();687FrameData &frameData = frameData_[curFrame];688VLOG("PUSH: Fencing %d", curFrame);689690// Makes sure the submission from the previous time around has happened. Otherwise691// we are not allowed to wait from another thread here..692if (useRenderThread_) {693std::unique_lock<std::mutex> lock(frameData.fenceMutex);694while (!frameData.readyForFence) {695frameData.fenceCondVar.wait(lock);696}697frameData.readyForFence = false;698}699700// This must be the very first Vulkan call we do in a new frame.701// Makes sure the very last command buffer from the frame before the previous has been fully executed.702if (vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX) == VK_ERROR_DEVICE_LOST) {703_assert_msg_(false, "Device lost in vkWaitForFences");704}705vkResetFences(device, 1, &frameData.fence);706707uint64_t frameId = frameIdGen_++;708709PollPresentTiming();710711ResetDescriptorLists(curFrame);712713int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;714715FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameId);716frameTimeData.frameId = frameId;717frameTimeData.frameBegin = frameBeginTime;718frameTimeData.afterFenceWait = time_now_d();719720// Can't set this until after the fence.721frameData.profile.enabled = enableProfiling;722frameData.profile.timestampsEnabled = enableProfiling && validBits > 0;723frameData.frameId = frameId;724725uint64_t queryResults[MAX_TIMESTAMP_QUERIES];726727if (enableProfiling) {728// Pull the profiling results from last time and produce a summary!729if (!frameData.profile.timestampDescriptions.empty() && frameData.profile.timestampsEnabled) {730int numQueries = (int)frameData.profile.timestampDescriptions.size();731VkResult res = vkGetQueryPoolResults(732vulkan_->GetDevice(),733frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),734VK_QUERY_RESULT_64_BIT);735if (res == VK_SUCCESS) {736double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);737uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);738std::stringstream str;739740char line[256];741totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));742totalGPUTimeMs_.Format(line, sizeof(line));743str << line;744renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);745renderCPUTimeMs_.Format(line, sizeof(line));746str << line;747descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);748descUpdateTimeMs_.Format(line, sizeof(line));749str << line;750snprintf(line, sizeof(line), "Descriptors written: %d (dedup: %d)\n", frameData.profile.descriptorsWritten, frameData.profile.descriptorsDeduped);751str << line;752snprintf(line, sizeof(line), "Resource deletions: %d\n", vulkan_->GetLastDeleteCount());753str << line;754for (int i = 0; i < numQueries - 1; i++) {755uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;756double milliseconds = (double)diff * timestampConversionFactor;757758// Can't use SimpleStat for these very easily since these are dynamic per frame.759// Only the first one is static, the initCmd.760// Could try some hashtable tracking for the rest, later.761if (i == 0) {762initTimeMs_.Update(milliseconds);763initTimeMs_.Format(line, sizeof(line));764} else {765snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);766}767str << line;768}769frameData.profile.profileSummary = str.str();770} else {771frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";772}773} else {774std::stringstream str;775char line[256];776renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);777renderCPUTimeMs_.Format(line, sizeof(line));778str << line;779descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);780descUpdateTimeMs_.Format(line, sizeof(line));781str << line;782snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten);783str << line;784frameData.profile.profileSummary = str.str();785}786787#ifdef _DEBUG788std::string cmdString;789for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {790if (frameData.profile.commandCounts[i] > 0) {791cmdString += StringFromFormat("%s: %d\n", VKRRenderCommandToString((VKRRenderCommand)i), frameData.profile.commandCounts[i]);792}793}794memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));795frameData.profile.profileSummary += cmdString;796#endif797}798799frameData.profile.descriptorsWritten = 0;800frameData.profile.descriptorsDeduped = 0;801802// Must be after the fence - this performs deletes.803VLOG("PUSH: BeginFrame %d", curFrame);804805insideFrame_ = true;806vulkan_->BeginFrame(enableLogProfiler ? GetInitCmd() : VK_NULL_HANDLE);807808frameData.profile.timestampDescriptions.clear();809if (frameData.profile.timestampsEnabled) {810// For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,811// unless we want to limit ourselves to only measure the main cmd buffer.812// Later versions of Vulkan have support for clearing queries on the CPU timeline, but we don't want to rely on that.813// Reserve the first two queries for initCmd.814frameData.profile.timestampDescriptions.emplace_back("initCmd Begin");815frameData.profile.timestampDescriptions.emplace_back("initCmd");816VkCommandBuffer initCmd = GetInitCmd();817}818}819820VkCommandBuffer VulkanRenderManager::GetInitCmd() {821int curFrame = vulkan_->GetCurFrame();822return frameData_[curFrame].GetInitCmd(vulkan_);823}824825void VulkanRenderManager::ReportBadStateForDraw() {826const char *cause1 = "";827char cause2[256];828cause2[0] = '\0';829if (!curRenderStep_) {830cause1 = "No current render step";831}832if (curRenderStep_ && curRenderStep_->stepType != VKRStepType::RENDER) {833cause1 = "Not a render step: ";834std::string str = VulkanQueueRunner::StepToString(vulkan_, *curRenderStep_);835truncate_cpy(cause2, str.c_str());836}837ERROR_LOG_REPORT_ONCE(baddraw, Log::G3D, "Can't draw: %s%s. Step count: %d", cause1, cause2, (int)steps_.size());838}839840int VulkanRenderManager::WaitForPipelines() {841return CreateMultiPipelinesTask::WaitForAll();842}843844VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, bool cacheLoad, const char *tag) {845if (!desc->vertexShader || !desc->fragmentShader) {846ERROR_LOG(Log::G3D, "Can't create graphics pipeline with missing vs/ps: %p %p", desc->vertexShader, desc->fragmentShader);847return nullptr;848}849850VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(pipelineFlags, tag);851pipeline->desc = desc;852pipeline->desc->AddRef();853if (curRenderStep_ && !cacheLoad) {854// The common case during gameplay.855pipelinesToCheck_.push_back(pipeline);856} else {857if (!variantBitmask) {858WARN_LOG(Log::G3D, "WARNING: Will not compile any variants of pipeline, not in renderpass and empty variantBitmask");859}860// Presumably we're in initialization, loading the shader cache.861// Look at variantBitmask to see what variants we should queue up.862RPKey key{863VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,864VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,865};866VKRRenderPass *compatibleRenderPass = queueRunner_.GetRenderPass(key);867std::unique_lock<std::mutex> lock(compileQueueMutex_);868_dbg_assert_(runCompileThread_);869bool needsCompile = false;870for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {871if (!(variantBitmask & (1 << i)))872continue;873RenderPassType rpType = (RenderPassType)i;874875// Sanity check - don't compile incompatible types (could be caused by corrupt caches, changes in data structures, etc).876if ((pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) && !RenderPassTypeHasDepth(rpType)) {877WARN_LOG(Log::G3D, "Not compiling pipeline that requires depth, for non depth renderpass type");878continue;879}880// Shouldn't hit this, these should have been filtered elsewhere. However, still a good check to do.881if (sampleCount == VK_SAMPLE_COUNT_1_BIT && RenderPassTypeHasMultisample(rpType)) {882WARN_LOG(Log::G3D, "Not compiling single sample pipeline for a multisampled render pass type");883continue;884}885886if (rpType == RenderPassType::BACKBUFFER) {887sampleCount = VK_SAMPLE_COUNT_1_BIT;888}889890// Sanity check891if (runCompileThread_) {892pipeline->pipeline[i] = Promise<VkPipeline>::CreateEmpty();893compileQueue_.emplace_back(pipeline, compatibleRenderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount);894}895needsCompile = true;896}897if (needsCompile)898compileCond_.notify_one();899}900return pipeline;901}902903void VulkanRenderManager::EndCurRenderStep() {904if (!curRenderStep_)905return;906907_dbg_assert_(runCompileThread_);908909RPKey key{910curRenderStep_->render.colorLoad, curRenderStep_->render.depthLoad, curRenderStep_->render.stencilLoad,911curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,912};913// Save the accumulated pipeline flags so we can use that to configure the render pass.914// We'll often be able to avoid loading/saving the depth/stencil buffer.915curRenderStep_->render.pipelineFlags = curPipelineFlags_;916bool depthStencil = (curPipelineFlags_ & PipelineFlags::USES_DEPTH_STENCIL) != 0;917RenderPassType rpType = depthStencil ? RenderPassType::HAS_DEPTH : RenderPassType::DEFAULT;918919if (curRenderStep_->render.framebuffer && (rpType & RenderPassType::HAS_DEPTH) && !curRenderStep_->render.framebuffer->HasDepth()) {920WARN_LOG(Log::G3D, "Trying to render with a depth-writing pipeline to a framebuffer without depth: %s", curRenderStep_->render.framebuffer->Tag());921rpType = RenderPassType::DEFAULT;922}923924if (!curRenderStep_->render.framebuffer) {925rpType = RenderPassType::BACKBUFFER;926} else {927// Framebuffers can be stereo, and if so, will control the render pass type to match.928// Pipelines can be mono and render fine to stereo etc, so not checking them here.929// Note that we don't support rendering to just one layer of a multilayer framebuffer!930if (curRenderStep_->render.framebuffer->numLayers > 1) {931rpType = (RenderPassType)(rpType | RenderPassType::MULTIVIEW);932}933934if (curRenderStep_->render.framebuffer->sampleCount != VK_SAMPLE_COUNT_1_BIT) {935rpType = (RenderPassType)(rpType | RenderPassType::MULTISAMPLE);936}937}938939VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);940curRenderStep_->render.renderPassType = rpType;941942VkSampleCountFlagBits sampleCount = curRenderStep_->render.framebuffer ? curRenderStep_->render.framebuffer->sampleCount : VK_SAMPLE_COUNT_1_BIT;943944bool needsCompile = false;945for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {946if (!pipeline) {947// Not good, but let's try not to crash.948continue;949}950std::unique_lock<std::mutex> lock(pipeline->mutex_);951if (!pipeline->pipeline[(size_t)rpType]) {952pipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();953lock.unlock();954955_assert_(renderPass);956compileQueueMutex_.lock();957compileQueue_.emplace_back(pipeline, renderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount);958compileQueueMutex_.unlock();959needsCompile = true;960}961}962963compileQueueMutex_.lock();964if (needsCompile)965compileCond_.notify_one();966compileQueueMutex_.unlock();967pipelinesToCheck_.clear();968969// We don't do this optimization for very small targets, probably not worth it.970if (!curRenderArea_.Empty() && (curWidth_ > 32 && curHeight_ > 32)) {971curRenderStep_->render.renderArea = curRenderArea_.ToVkRect2D();972} else {973curRenderStep_->render.renderArea.offset = {};974curRenderStep_->render.renderArea.extent = { (uint32_t)curWidth_, (uint32_t)curHeight_ };975}976curRenderArea_.Reset();977978// We no longer have a current render step.979curRenderStep_ = nullptr;980curPipelineFlags_ = (PipelineFlags)0;981}982983void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {984_dbg_assert_(insideFrame_);985986// Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.987if (!steps_.empty() && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == fb) {988u32 clearMask = 0;989if (color == VKRRenderPassLoadAction::CLEAR) {990clearMask |= VK_IMAGE_ASPECT_COLOR_BIT;991}992if (depth == VKRRenderPassLoadAction::CLEAR) {993clearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;994curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;995}996if (stencil == VKRRenderPassLoadAction::CLEAR) {997clearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;998curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;999}10001001// If we need a clear and the previous step has commands already, it's best to just add a clear and keep going.1002// If there's no clear needed, let's also do that.1003//1004// However, if we do need a clear and there are no commands in the previous pass,1005// we want the queuerunner to have the opportunity to merge, so we'll go ahead and make a new renderpass.1006if (clearMask == 0 || !steps_.back()->commands.empty()) {1007curRenderStep_ = steps_.back();1008curStepHasViewport_ = false;1009curStepHasScissor_ = false;1010for (const auto &c : steps_.back()->commands) {1011if (c.cmd == VKRRenderCommand::VIEWPORT) {1012curStepHasViewport_ = true;1013} else if (c.cmd == VKRRenderCommand::SCISSOR) {1014curStepHasScissor_ = true;1015}1016}1017if (clearMask != 0) {1018VkRenderData data{ VKRRenderCommand::CLEAR };1019data.clear.clearColor = clearColor;1020data.clear.clearZ = clearDepth;1021data.clear.clearStencil = clearStencil;1022data.clear.clearMask = clearMask;1023curRenderStep_->commands.push_back(data);1024curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1025}1026return;1027}1028}10291030#ifdef _DEBUG1031SanityCheckPassesOnAdd();1032#endif10331034// More redundant bind elimination.1035if (curRenderStep_) {1036if (curRenderStep_->commands.empty()) {1037if (curRenderStep_->render.colorLoad != VKRRenderPassLoadAction::CLEAR && curRenderStep_->render.depthLoad != VKRRenderPassLoadAction::CLEAR && curRenderStep_->render.stencilLoad != VKRRenderPassLoadAction::CLEAR) {1038// Can trivially kill the last empty render step.1039_dbg_assert_(steps_.back() == curRenderStep_);1040delete steps_.back();1041steps_.pop_back();1042curRenderStep_ = nullptr;1043}1044VLOG("Empty render step. Usually happens after uploading pixels..");1045}10461047EndCurRenderStep();1048}10491050// Sanity check that we don't have binds to the backbuffer before binds to other buffers. It must always be bound last.1051if (steps_.size() >= 1 && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == nullptr && fb != nullptr) {1052_dbg_assert_(false);1053}10541055// Older Mali drivers have issues with depth and stencil don't match load/clear/etc.1056// TODO: Determine which versions and do this only where necessary.1057u32 lateClearMask = 0;1058if (depth != stencil && vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_ARM) {1059if (stencil == VKRRenderPassLoadAction::DONT_CARE) {1060stencil = depth;1061} else if (depth == VKRRenderPassLoadAction::DONT_CARE) {1062depth = stencil;1063} else if (stencil == VKRRenderPassLoadAction::CLEAR) {1064depth = stencil;1065lateClearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;1066} else if (depth == VKRRenderPassLoadAction::CLEAR) {1067stencil = depth;1068lateClearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;1069}1070}10711072VKRStep *step = new VKRStep{ VKRStepType::RENDER };1073step->render.framebuffer = fb;1074step->render.colorLoad = color;1075step->render.depthLoad = depth;1076step->render.stencilLoad = stencil;1077step->render.colorStore = VKRRenderPassStoreAction::STORE;1078step->render.depthStore = VKRRenderPassStoreAction::STORE;1079step->render.stencilStore = VKRRenderPassStoreAction::STORE;1080step->render.clearColor = clearColor;1081step->render.clearDepth = clearDepth;1082step->render.clearStencil = clearStencil;1083step->render.numDraws = 0;1084step->render.numReads = 0;1085step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;1086step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;1087// pipelineFlags, renderArea and renderPassType get filled in when we finalize the step. Do not read from them before that.1088step->tag = tag;1089steps_.push_back(step);10901091if (fb) {1092// If there's a KEEP, we naturally read from the framebuffer.1093if (color == VKRRenderPassLoadAction::KEEP || depth == VKRRenderPassLoadAction::KEEP || stencil == VKRRenderPassLoadAction::KEEP) {1094step->dependencies.insert(fb);1095}1096}10971098curRenderStep_ = step;1099curStepHasViewport_ = false;1100curStepHasScissor_ = false;1101if (fb) {1102curWidthRaw_ = fb->width;1103curHeightRaw_ = fb->height;1104curWidth_ = fb->width;1105curHeight_ = fb->height;1106} else {1107curWidthRaw_ = vulkan_->GetBackbufferWidth();1108curHeightRaw_ = vulkan_->GetBackbufferHeight();1109if (g_display.rotation == DisplayRotation::ROTATE_90 ||1110g_display.rotation == DisplayRotation::ROTATE_270) {1111curWidth_ = curHeightRaw_;1112curHeight_ = curWidthRaw_;1113} else {1114curWidth_ = curWidthRaw_;1115curHeight_ = curHeightRaw_;1116}1117}11181119if (color == VKRRenderPassLoadAction::CLEAR || depth == VKRRenderPassLoadAction::CLEAR || stencil == VKRRenderPassLoadAction::CLEAR) {1120curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1121}11221123// See above - we add a clear afterward if only one side for depth/stencil CLEAR/KEEP.1124if (lateClearMask != 0) {1125VkRenderData data{ VKRRenderCommand::CLEAR };1126data.clear.clearColor = clearColor;1127data.clear.clearZ = clearDepth;1128data.clear.clearStencil = clearStencil;1129data.clear.clearMask = lateClearMask;1130curRenderStep_->commands.push_back(data);1131}11321133if (invalidationCallback_) {1134invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);1135}1136}11371138bool VulkanRenderManager::CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {1139_dbg_assert_(insideFrame_);11401141for (int i = (int)steps_.size() - 1; i >= 0; i--) {1142if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1143steps_[i]->render.numReads++;1144break;1145}1146}11471148EndCurRenderStep();11491150VKRStep *step = new VKRStep{ VKRStepType::READBACK };1151step->readback.aspectMask = aspectBits;1152step->readback.src = src;1153step->readback.srcRect.offset = { x, y };1154step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h };1155step->readback.delayed = mode == Draw::ReadbackMode::OLD_DATA_OK;1156step->dependencies.insert(src);1157step->tag = tag;1158steps_.push_back(step);11591160if (mode == Draw::ReadbackMode::BLOCK) {1161FlushSync();1162}11631164Draw::DataFormat srcFormat = Draw::DataFormat::UNDEFINED;1165if (aspectBits & VK_IMAGE_ASPECT_COLOR_BIT) {1166if (src) {1167switch (src->color.format) {1168case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;1169default: _assert_(false);1170}1171} else {1172// Backbuffer.1173if (!(vulkan_->GetSurfaceCapabilities().supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) {1174ERROR_LOG(Log::G3D, "Copying from backbuffer not supported, can't take screenshots");1175return false;1176}1177switch (vulkan_->GetSwapchainFormat()) {1178case VK_FORMAT_B8G8R8A8_UNORM: srcFormat = Draw::DataFormat::B8G8R8A8_UNORM; break;1179case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;1180// NOTE: If you add supported formats here, make sure to also support them in VulkanQueueRunner::CopyReadbackBuffer.1181default:1182ERROR_LOG(Log::G3D, "Unsupported backbuffer format for screenshots");1183return false;1184}1185}1186} else if (aspectBits & VK_IMAGE_ASPECT_STENCIL_BIT) {1187// Copies from stencil are always S8.1188srcFormat = Draw::DataFormat::S8;1189} else if (aspectBits & VK_IMAGE_ASPECT_DEPTH_BIT) {1190switch (src->depth.format) {1191case VK_FORMAT_D24_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D24_S8; break;1192case VK_FORMAT_D32_SFLOAT_S8_UINT: srcFormat = Draw::DataFormat::D32F; break;1193case VK_FORMAT_D16_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D16; break;1194default: _assert_(false);1195}1196} else {1197_assert_(false);1198}11991200// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.1201return queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()],1202mode == Draw::ReadbackMode::OLD_DATA_OK ? src : nullptr, w, h, srcFormat, destFormat, pixelStride, pixels);1203}12041205void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {1206_dbg_assert_(insideFrame_);12071208EndCurRenderStep();12091210VKRStep *step = new VKRStep{ VKRStepType::READBACK_IMAGE };1211step->readback_image.image = image;1212step->readback_image.srcRect.offset = { x, y };1213step->readback_image.srcRect.extent = { (uint32_t)w, (uint32_t)h };1214step->readback_image.mipLevel = mipLevel;1215step->tag = tag;1216steps_.push_back(step);12171218FlushSync();12191220// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.1221queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels);12221223_dbg_assert_(steps_.empty());1224}12251226static void RemoveDrawCommands(FastVec<VkRenderData> *cmds) {1227// Here we remove any DRAW type commands when we hit a CLEAR.1228for (auto &c : *cmds) {1229if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {1230c.cmd = VKRRenderCommand::REMOVED;1231}1232}1233}12341235static void CleanupRenderCommands(FastVec<VkRenderData> *cmds) {1236size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];1237memset(lastCommand, -1, sizeof(lastCommand));12381239// Find any duplicate state commands (likely from RemoveDrawCommands.)1240for (size_t i = 0; i < cmds->size(); ++i) {1241auto &c = cmds->at(i);1242auto &lastOfCmd = lastCommand[(uint8_t)c.cmd];12431244switch (c.cmd) {1245case VKRRenderCommand::REMOVED:1246continue;12471248case VKRRenderCommand::VIEWPORT:1249case VKRRenderCommand::SCISSOR:1250case VKRRenderCommand::BLEND:1251case VKRRenderCommand::STENCIL:1252if (lastOfCmd != -1) {1253cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;1254}1255break;12561257case VKRRenderCommand::PUSH_CONSTANTS:1258// TODO: For now, we have to keep this one (it has an offset.) Still update lastCommand.1259break;12601261case VKRRenderCommand::CLEAR:1262// Ignore, doesn't participate in state.1263continue;12641265case VKRRenderCommand::DRAW_INDEXED:1266case VKRRenderCommand::DRAW:1267default:1268// Boundary - must keep state before this.1269memset(lastCommand, -1, sizeof(lastCommand));1270continue;1271}12721273lastOfCmd = i;1274}12751276// At this point, anything in lastCommand can be cleaned up too.1277// Note that it's safe to remove the last unused PUSH_CONSTANTS here.1278for (size_t i = 0; i < ARRAY_SIZE(lastCommand); ++i) {1279auto &lastOfCmd = lastCommand[i];1280if (lastOfCmd != -1) {1281cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;1282}1283}1284}12851286void VulkanRenderManager::Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask) {1287_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);1288if (!clearMask)1289return;12901291// If this is the first drawing command or clears everything, merge it into the pass.1292int allAspects = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;1293if (curRenderStep_->render.numDraws == 0 || clearMask == allAspects) {1294curRenderStep_->render.clearColor = clearColor;1295curRenderStep_->render.clearDepth = clearZ;1296curRenderStep_->render.clearStencil = clearStencil;1297curRenderStep_->render.colorLoad = (clearMask & VK_IMAGE_ASPECT_COLOR_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;1298curRenderStep_->render.depthLoad = (clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;1299curRenderStep_->render.stencilLoad = (clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;13001301if (clearMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1302if (curRenderStep_->render.framebuffer && !curRenderStep_->render.framebuffer->HasDepth()) {1303WARN_LOG(Log::G3D, "Trying to clear depth/stencil on a non-depth framebuffer: %s", curRenderStep_->render.framebuffer->Tag());1304} else {1305curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;1306}1307}13081309// In case there were commands already.1310curRenderStep_->render.numDraws = 0;1311RemoveDrawCommands(&curRenderStep_->commands);1312} else {1313VkRenderData data{ VKRRenderCommand::CLEAR };1314data.clear.clearColor = clearColor;1315data.clear.clearZ = clearZ;1316data.clear.clearStencil = clearStencil;1317data.clear.clearMask = clearMask;1318curRenderStep_->commands.push_back(data);1319}13201321curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1322}13231324void VulkanRenderManager::CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkOffset2D dstPos, VkImageAspectFlags aspectMask, const char *tag) {1325#ifdef _DEBUG1326SanityCheckPassesOnAdd();1327#endif13281329_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);1330_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);1331_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);1332_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);13331334_dbg_assert_msg_(srcRect.extent.width > 0, "copy srcwidth == 0");1335_dbg_assert_msg_(srcRect.extent.height > 0, "copy srcheight == 0");13361337_dbg_assert_msg_(dstPos.x >= 0, "dstPos offset x (%d) < 0", dstPos.x);1338_dbg_assert_msg_(dstPos.y >= 0, "dstPos offset y (%d) < 0", dstPos.y);1339_dbg_assert_msg_(dstPos.x + srcRect.extent.width <= (uint32_t)dst->width, "dstPos + extent x > width");1340_dbg_assert_msg_(dstPos.y + srcRect.extent.height <= (uint32_t)dst->height, "dstPos + extent y > height");13411342for (int i = (int)steps_.size() - 1; i >= 0; i--) {1343if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1344if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1345if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1346steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;1347}1348}1349if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1350if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1351steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;1352}1353}1354steps_[i]->render.numReads++;1355break;1356}1357}1358for (int i = (int)steps_.size() - 1; i >= 0; i--) {1359if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == dst) {1360if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1361if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1362steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;1363}1364}1365if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1366if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1367steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;1368}1369}1370break;1371}1372}13731374EndCurRenderStep();13751376VKRStep *step = new VKRStep{ VKRStepType::COPY };13771378step->copy.aspectMask = aspectMask;1379step->copy.src = src;1380step->copy.srcRect = srcRect;1381step->copy.dst = dst;1382step->copy.dstPos = dstPos;1383step->dependencies.insert(src);1384step->tag = tag;1385bool fillsDst = dst && srcRect.offset.x == 0 && srcRect.offset.y == 0 && srcRect.extent.width == dst->width && srcRect.extent.height == dst->height;1386if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)1387step->dependencies.insert(dst);13881389steps_.push_back(step);1390}13911392void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkRect2D dstRect, VkImageAspectFlags aspectMask, VkFilter filter, const char *tag) {1393#ifdef _DEBUG1394SanityCheckPassesOnAdd();1395#endif13961397_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);1398_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);1399_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);1400_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);14011402_dbg_assert_msg_(srcRect.extent.width > 0, "blit srcwidth == 0");1403_dbg_assert_msg_(srcRect.extent.height > 0, "blit srcheight == 0");14041405_dbg_assert_msg_(dstRect.offset.x >= 0, "dstrect offset x < 0");1406_dbg_assert_msg_(dstRect.offset.y >= 0, "dstrect offset y < 0");1407_dbg_assert_msg_(dstRect.offset.x + dstRect.extent.width <= (uint32_t)dst->width, "dstrect offset x + extent > width");1408_dbg_assert_msg_(dstRect.offset.y + dstRect.extent.height <= (uint32_t)dst->height, "dstrect offset y + extent > height");14091410_dbg_assert_msg_(dstRect.extent.width > 0, "blit dstwidth == 0");1411_dbg_assert_msg_(dstRect.extent.height > 0, "blit dstheight == 0");14121413// TODO: Seem to be missing final layouts here like in Copy...14141415for (int i = (int)steps_.size() - 1; i >= 0; i--) {1416if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1417steps_[i]->render.numReads++;1418break;1419}1420}14211422// Sanity check. Added an assert to try to gather more info.1423// Got this assert in NPJH50443 FINAL FANTASY TYPE-0, but pretty rare. Moving back to debug assert.1424if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1425_dbg_assert_msg_(src->depth.image != VK_NULL_HANDLE, "%s", src->Tag());1426_dbg_assert_msg_(dst->depth.image != VK_NULL_HANDLE, "%s", dst->Tag());14271428if (!src->depth.image || !dst->depth.image) {1429// Something has gone wrong, but let's try to stumble along.1430return;1431}1432}14331434EndCurRenderStep();14351436VKRStep *step = new VKRStep{ VKRStepType::BLIT };1437step->blit.aspectMask = aspectMask;1438step->blit.src = src;1439step->blit.srcRect = srcRect;1440step->blit.dst = dst;1441step->blit.dstRect = dstRect;1442step->blit.filter = filter;1443step->dependencies.insert(src);1444step->tag = tag;1445bool fillsDst = dst && dstRect.offset.x == 0 && dstRect.offset.y == 0 && dstRect.extent.width == dst->width && dstRect.extent.height == dst->height;1446if (!fillsDst)1447step->dependencies.insert(dst);14481449steps_.push_back(step);1450}14511452VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit, int layer) {1453_dbg_assert_(curRenderStep_ != nullptr);1454_dbg_assert_(fb != nullptr);14551456// We don't support texturing from stencil, neither do we support texturing from depth|stencil together (nonsensical).1457_dbg_assert_(aspectBit == VK_IMAGE_ASPECT_COLOR_BIT || aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT);14581459// Mark the dependency, check for required transitions, and return the image.14601461// Optimization: If possible, use final*Layout to put the texture into the correct layout "early".1462for (int i = (int)steps_.size() - 1; i >= 0; i--) {1463if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == fb) {1464if (aspectBit == VK_IMAGE_ASPECT_COLOR_BIT) {1465// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.1466if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1467steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1468}1469// If we find some other layout, a copy after this is likely involved. It's fine though,1470// we'll just transition it right as we need it and lose a tiny optimization.1471} else if (aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT) {1472// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.1473if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1474steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1475}1476} // We don't (yet?) support texturing from stencil images.1477steps_[i]->render.numReads++;1478break;1479}1480}14811482// Track dependencies fully.1483curRenderStep_->dependencies.insert(fb);14841485// Add this pretransition unless we already have it.1486TransitionRequest rq{ fb, aspectBit, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };1487curRenderStep_->preTransitions.insert(rq); // Note that insert avoids inserting duplicates.14881489if (layer == -1) {1490return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texAllLayersView : fb->depth.texAllLayersView;1491} else {1492return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texLayerViews[layer] : fb->depth.texLayerViews[layer];1493}1494}14951496// Called on main thread.1497// Sends the collected commands to the render thread. Submit-latency should be1498// measured from here, probably.1499void VulkanRenderManager::Finish() {1500EndCurRenderStep();15011502// Let's do just a bit of cleanup on render commands now.1503// TODO: Should look into removing this.1504for (auto &step : steps_) {1505if (step->stepType == VKRStepType::RENDER) {1506CleanupRenderCommands(&step->commands);1507}1508}15091510int curFrame = vulkan_->GetCurFrame();1511FrameData &frameData = frameData_[curFrame];15121513if (!postInitBarrier_.empty()) {1514VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);1515postInitBarrier_.Flush(buffer);1516}15171518VLOG("PUSH: Frame[%d]", curFrame);1519VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SUBMIT);1520task->frame = curFrame;1521if (useRenderThread_) {1522std::unique_lock<std::mutex> lock(pushMutex_);1523renderThreadQueue_.push(task);1524renderThreadQueue_.back()->steps = std::move(steps_);1525pushCondVar_.notify_one();1526} else {1527// Just do it!1528task->steps = std::move(steps_);1529Run(*task);1530delete task;1531}15321533steps_.clear();1534}15351536void VulkanRenderManager::Present() {1537int curFrame = vulkan_->GetCurFrame();15381539VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT);1540task->frame = curFrame;1541if (useRenderThread_) {1542std::unique_lock<std::mutex> lock(pushMutex_);1543renderThreadQueue_.push(task);1544pushCondVar_.notify_one();1545} else {1546// Just do it!1547Run(*task);1548delete task;1549}15501551vulkan_->EndFrame();1552insideFrame_ = false;1553}15541555// Called on the render thread.1556//1557// Can be called again after a VKRRunType::SYNC on the same frame.1558void VulkanRenderManager::Run(VKRRenderThreadTask &task) {1559FrameData &frameData = frameData_[task.frame];15601561if (task.runType == VKRRunType::PRESENT) {1562if (!frameData.skipSwap) {1563VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);1564frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();1565if (res == VK_ERROR_OUT_OF_DATE_KHR) {1566// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.1567// Do the increment.1568outOfDateFrames_++;1569} else if (res == VK_SUBOPTIMAL_KHR) {1570outOfDateFrames_++;1571} else if (res != VK_SUCCESS) {1572_assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));1573} else {1574// Success1575outOfDateFrames_ = 0;1576}1577} else {1578// We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.1579if (vulkan_->HasRealSwapchain()) {1580outOfDateFrames_++;1581}1582frameData.skipSwap = false;1583}1584return;1585}15861587_dbg_assert_(!frameData.hasPresentCommands);15881589if (!frameTimeHistory_[frameData.frameId].firstSubmit) {1590frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();1591}1592frameData.Submit(vulkan_, FrameSubmitType::Pending, frameDataShared_);15931594// Flush descriptors.1595double descStart = time_now_d();1596FlushDescriptors(task.frame);1597frameData.profile.descWriteTime = time_now_d() - descStart;15981599if (!frameData.hasMainCommands) {1600// Effectively resets both main and present command buffers, since they both live in this pool.1601// We always record main commands first, so we don't need to reset the present command buffer separately.1602vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);16031604VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };1605begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;1606VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);1607frameData.hasMainCommands = true;1608_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));1609}16101611queueRunner_.PreprocessSteps(task.steps);1612// Likely during shutdown, happens in headless.1613if (task.steps.empty() && !frameData.hasAcquired)1614frameData.skipSwap = true;1615//queueRunner_.LogSteps(stepsOnThread, false);1616queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_);16171618switch (task.runType) {1619case VKRRunType::SUBMIT:1620frameData.Submit(vulkan_, FrameSubmitType::FinishFrame, frameDataShared_);1621break;16221623case VKRRunType::SYNC:1624// The submit will trigger the readbackFence, and also do the wait for it.1625frameData.Submit(vulkan_, FrameSubmitType::Sync, frameDataShared_);16261627if (useRenderThread_) {1628std::unique_lock<std::mutex> lock(syncMutex_);1629syncCondVar_.notify_one();1630}16311632// At this point the GPU is idle, and we can resume filling the command buffers for the1633// current frame since and thus all previously enqueued command buffers have been1634// processed. No need to switch to the next frame number, would just be confusing.1635break;16361637default:1638_dbg_assert_(false);1639}16401641VLOG("PULL: Finished running frame %d", task.frame);1642}16431644// Called from main thread.1645void VulkanRenderManager::FlushSync() {1646_dbg_assert_(!curRenderStep_);16471648if (invalidationCallback_) {1649invalidationCallback_(InvalidationCallbackFlags::COMMAND_BUFFER_STATE);1650}16511652int curFrame = vulkan_->GetCurFrame();1653FrameData &frameData = frameData_[curFrame];16541655if (!postInitBarrier_.empty()) {1656VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);1657postInitBarrier_.Flush(buffer);1658}16591660if (useRenderThread_) {1661{1662VLOG("PUSH: Frame[%d]", curFrame);1663VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);1664task->frame = curFrame;1665{1666std::unique_lock<std::mutex> lock(pushMutex_);1667renderThreadQueue_.push(task);1668renderThreadQueue_.back()->steps = std::move(steps_);1669pushCondVar_.notify_one();1670}1671steps_.clear();1672}16731674{1675std::unique_lock<std::mutex> lock(syncMutex_);1676// Wait for the flush to be hit, since we're syncing.1677while (!frameData.syncDone) {1678VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);1679syncCondVar_.wait(lock);1680}1681frameData.syncDone = false;1682}1683} else {1684VKRRenderThreadTask task(VKRRunType::SYNC);1685task.frame = curFrame;1686task.steps = std::move(steps_);1687Run(task);1688steps_.clear();1689}1690}16911692void VulkanRenderManager::ResetStats() {1693initTimeMs_.Reset();1694totalGPUTimeMs_.Reset();1695renderCPUTimeMs_.Reset();1696}16971698VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindingTypes, size_t bindingTypesCount, bool geoShadersEnabled, const char *tag) {1699VKRPipelineLayout *layout = new VKRPipelineLayout();1700layout->SetTag(tag);1701layout->bindingTypesCount = (uint32_t)bindingTypesCount;17021703_dbg_assert_(bindingTypesCount <= ARRAY_SIZE(layout->bindingTypes));1704memcpy(layout->bindingTypes, bindingTypes, sizeof(BindingType) * bindingTypesCount);17051706VkDescriptorSetLayoutBinding bindings[VKRPipelineLayout::MAX_DESC_SET_BINDINGS];1707for (int i = 0; i < (int)bindingTypesCount; i++) {1708bindings[i].binding = i;1709bindings[i].descriptorCount = 1;1710bindings[i].pImmutableSamplers = nullptr;17111712switch (bindingTypes[i]) {1713case BindingType::COMBINED_IMAGE_SAMPLER:1714bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;1715bindings[i].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;1716break;1717case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:1718bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1719bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;1720break;1721case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:1722bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1723bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;1724if (geoShadersEnabled) {1725bindings[i].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT;1726}1727break;1728case BindingType::STORAGE_BUFFER_VERTEX:1729bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1730bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;1731break;1732case BindingType::STORAGE_BUFFER_COMPUTE:1733bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1734bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;1735break;1736case BindingType::STORAGE_IMAGE_COMPUTE:1737bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;1738bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;1739break;1740default:1741_dbg_assert_(false);1742break;1743}1744}17451746VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };1747dsl.bindingCount = (uint32_t)bindingTypesCount;1748dsl.pBindings = bindings;1749VkResult res = vkCreateDescriptorSetLayout(vulkan_->GetDevice(), &dsl, nullptr, &layout->descriptorSetLayout);1750_assert_(VK_SUCCESS == res && layout->descriptorSetLayout);17511752VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };1753VkDescriptorSetLayout setLayouts[1] = { layout->descriptorSetLayout };1754pl.setLayoutCount = ARRAY_SIZE(setLayouts);1755pl.pSetLayouts = setLayouts;1756res = vkCreatePipelineLayout(vulkan_->GetDevice(), &pl, nullptr, &layout->pipelineLayout);1757_assert_(VK_SUCCESS == res && layout->pipelineLayout);17581759vulkan_->SetDebugName(layout->descriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, tag);1760vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag);17611762for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {1763// Some games go beyond 1024 and end up having to resize like GTA, but most stay below so we start there.1764layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 1024);1765}17661767pipelineLayouts_.push_back(layout);1768return layout;1769}17701771void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) {1772for (auto iter = pipelineLayouts_.begin(); iter != pipelineLayouts_.end(); iter++) {1773if (*iter == layout) {1774pipelineLayouts_.erase(iter);1775break;1776}1777}1778vulkan_->Delete().QueueCallback([](VulkanContext *vulkan, void *userdata) {1779VKRPipelineLayout *layout = (VKRPipelineLayout *)userdata;1780for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {1781layout->frameData[i].pool.DestroyImmediately();1782}1783vkDestroyPipelineLayout(vulkan->GetDevice(), layout->pipelineLayout, nullptr);1784vkDestroyDescriptorSetLayout(vulkan->GetDevice(), layout->descriptorSetLayout, nullptr);17851786delete layout;1787}, layout);1788}17891790void VulkanRenderManager::FlushDescriptors(int frame) {1791for (auto iter : pipelineLayouts_) {1792iter->FlushDescSets(vulkan_, frame, &frameData_[frame].profile);1793}1794}17951796void VulkanRenderManager::ResetDescriptorLists(int frame) {1797for (auto iter : pipelineLayouts_) {1798VKRPipelineLayout::FrameData &data = iter->frameData[frame];17991800data.flushedDescriptors_ = 0;1801data.descSets_.clear();1802data.descData_.clear();1803}1804}18051806VKRPipelineLayout::~VKRPipelineLayout() {1807_assert_(frameData[0].pool.IsDestroyed());1808}18091810void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile) {1811_dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES);18121813FrameData &data = frameData[frame];18141815VulkanDescSetPool &pool = data.pool;1816FastVec<PackedDescriptor> &descData = data.descData_;1817FastVec<PendingDescSet> &descSets = data.descSets_;18181819pool.Reset();18201821VkDescriptorSet setCache[8];1822VkDescriptorSetLayout layoutsForAlloc[ARRAY_SIZE(setCache)];1823for (int i = 0; i < ARRAY_SIZE(setCache); i++) {1824layoutsForAlloc[i] = descriptorSetLayout;1825}1826int setsUsed = ARRAY_SIZE(setCache); // To allocate immediately.18271828// This will write all descriptors.1829// Initially, we just do a simple look-back comparing to the previous descriptor to avoid sequential dupes.1830// In theory, we could multithread this. Gotta be a lot of descriptors for that to be worth it though.18311832// Initially, let's do naive single desc set writes.1833VkWriteDescriptorSet writes[MAX_DESC_SET_BINDINGS];1834VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number1835VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS];18361837// Preinitialize fields that won't change.1838for (size_t i = 0; i < ARRAY_SIZE(writes); i++) {1839writes[i].descriptorCount = 1;1840writes[i].dstArrayElement = 0;1841writes[i].pTexelBufferView = nullptr;1842writes[i].pNext = nullptr;1843writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;1844}18451846size_t start = data.flushedDescriptors_;1847int writeCount = 0, dedupCount = 0;18481849for (size_t index = start; index < descSets.size(); index++) {1850auto &d = descSets[index];18511852// This is where we look up to see if we already have an identical descriptor previously in the array.1853// We could do a simple custom hash map here that doesn't handle collisions, since those won't matter.1854// Instead, for now we just check history one item backwards. Good enough, it seems.1855if (index > start + 1) {1856if (descSets[index - 1].count == d.count) {1857if (!memcmp(descData.data() + d.offset, descData.data() + descSets[index - 1].offset, d.count * sizeof(PackedDescriptor))) {1858d.set = descSets[index - 1].set;1859dedupCount++;1860continue;1861}1862}1863}18641865if (setsUsed < ARRAY_SIZE(setCache)) {1866d.set = setCache[setsUsed++];1867} else {1868// Allocate in small batches.1869bool success = pool.Allocate(setCache, ARRAY_SIZE(setCache), layoutsForAlloc);1870_dbg_assert_(success);1871d.set = setCache[0];1872setsUsed = 1;1873}18741875// TODO: Build up bigger batches of writes.1876const PackedDescriptor *data = descData.begin() + d.offset;1877int numWrites = 0;1878int numBuffers = 0;1879int numImages = 0;1880for (int i = 0; i < d.count; i++) {1881if (!data[i].image.view) { // This automatically also checks for an null buffer due to the union.1882continue;1883}1884switch (this->bindingTypes[i]) {1885case BindingType::COMBINED_IMAGE_SAMPLER:1886_dbg_assert_(data[i].image.sampler != VK_NULL_HANDLE);1887_dbg_assert_(data[i].image.view != VK_NULL_HANDLE);1888imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1889imageInfo[numImages].imageView = data[i].image.view;1890imageInfo[numImages].sampler = data[i].image.sampler;1891writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;1892writes[numWrites].pImageInfo = &imageInfo[numImages];1893writes[numWrites].pBufferInfo = nullptr;1894numImages++;1895break;1896case BindingType::STORAGE_IMAGE_COMPUTE:1897_dbg_assert_(data[i].image.view != VK_NULL_HANDLE);1898imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_GENERAL;1899imageInfo[numImages].imageView = data[i].image.view;1900imageInfo[numImages].sampler = VK_NULL_HANDLE;1901writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;1902writes[numWrites].pImageInfo = &imageInfo[numImages];1903writes[numWrites].pBufferInfo = nullptr;1904numImages++;1905break;1906case BindingType::STORAGE_BUFFER_VERTEX:1907case BindingType::STORAGE_BUFFER_COMPUTE:1908_dbg_assert_(data[i].buffer.buffer != VK_NULL_HANDLE);1909bufferInfo[numBuffers].buffer = data[i].buffer.buffer;1910bufferInfo[numBuffers].range = data[i].buffer.range;1911bufferInfo[numBuffers].offset = data[i].buffer.offset;1912writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1913writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];1914writes[numWrites].pImageInfo = nullptr;1915numBuffers++;1916break;1917case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:1918case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:1919_dbg_assert_(data[i].buffer.buffer != VK_NULL_HANDLE);1920bufferInfo[numBuffers].buffer = data[i].buffer.buffer;1921bufferInfo[numBuffers].range = data[i].buffer.range;1922bufferInfo[numBuffers].offset = 0;1923writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1924writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];1925writes[numWrites].pImageInfo = nullptr;1926numBuffers++;1927break;1928}1929writes[numWrites].dstBinding = i;1930writes[numWrites].dstSet = d.set;1931numWrites++;1932}19331934vkUpdateDescriptorSets(vulkan->GetDevice(), numWrites, writes, 0, nullptr);19351936writeCount++;1937}19381939data.flushedDescriptors_ = (int)descSets.size();1940profile->descriptorsWritten += writeCount;1941profile->descriptorsDeduped += dedupCount;1942}19431944void VulkanRenderManager::SanityCheckPassesOnAdd() {1945#if _DEBUG1946// Check that we don't have any previous passes that write to the backbuffer, that must ALWAYS be the last one.1947for (int i = 0; i < (int)steps_.size(); i++) {1948if (steps_[i]->stepType == VKRStepType::RENDER) {1949_dbg_assert_msg_(steps_[i]->render.framebuffer != nullptr, "Adding second backbuffer pass? Not good!");1950}1951}1952#endif1953}195419551956