Path: blob/master/servers/rendering/renderer_rd/effects/fsr2.cpp
10279 views
/**************************************************************************/1/* fsr2.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "fsr2.h"3132#include "../storage_rd/material_storage.h"33#include "../uniform_set_cache_rd.h"3435using namespace RendererRD;3637#ifndef _MSC_VER38#include <cwchar>39#define wcscpy_s wcscpy40#endif4142static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) {43switch (p_type) {44case FFX_RESOURCE_TYPE_TEXTURE1D:45return RD::TEXTURE_TYPE_1D;46case FFX_RESOURCE_TYPE_TEXTURE2D:47return RD::TEXTURE_TYPE_2D;48case FFX_RESOURCE_TYPE_TEXTURE3D:49return RD::TEXTURE_TYPE_3D;50default:51return RD::TEXTURE_TYPE_MAX;52}53}5455static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) {56switch (p_type) {57case RD::TEXTURE_TYPE_1D:58return FFX_RESOURCE_TYPE_TEXTURE1D;59case RD::TEXTURE_TYPE_2D:60return FFX_RESOURCE_TYPE_TEXTURE2D;61case RD::TEXTURE_TYPE_3D:62return FFX_RESOURCE_TYPE_TEXTURE3D;63default:64return FFX_RESOURCE_TYPE_BUFFER;65}66}6768static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) {69switch (p_format) {70case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS:71return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;72case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT:73return RD::DATA_FORMAT_R32G32B32A32_SFLOAT;74case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT:75return RD::DATA_FORMAT_R16G16B16A16_SFLOAT;76case FFX_SURFACE_FORMAT_R16G16B16A16_UNORM:77return RD::DATA_FORMAT_R16G16B16A16_UNORM;78case FFX_SURFACE_FORMAT_R32G32_FLOAT:79return RD::DATA_FORMAT_R32G32_SFLOAT;80case FFX_SURFACE_FORMAT_R32_UINT:81return RD::DATA_FORMAT_R32_UINT;82case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS:83return RD::DATA_FORMAT_R8G8B8A8_UNORM;84case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM:85return RD::DATA_FORMAT_R8G8B8A8_UNORM;86case FFX_SURFACE_FORMAT_R11G11B10_FLOAT:87return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32;88case FFX_SURFACE_FORMAT_R16G16_FLOAT:89return RD::DATA_FORMAT_R16G16_SFLOAT;90case FFX_SURFACE_FORMAT_R16G16_UINT:91return RD::DATA_FORMAT_R16G16_UINT;92case FFX_SURFACE_FORMAT_R16_FLOAT:93return RD::DATA_FORMAT_R16_SFLOAT;94case FFX_SURFACE_FORMAT_R16_UINT:95return RD::DATA_FORMAT_R16_UINT;96case FFX_SURFACE_FORMAT_R16_UNORM:97return RD::DATA_FORMAT_R16_UNORM;98case FFX_SURFACE_FORMAT_R16_SNORM:99return RD::DATA_FORMAT_R16_SNORM;100case FFX_SURFACE_FORMAT_R8_UNORM:101return RD::DATA_FORMAT_R8_UNORM;102case FFX_SURFACE_FORMAT_R8_UINT:103return RD::DATA_FORMAT_R8_UINT;104case FFX_SURFACE_FORMAT_R8G8_UNORM:105return RD::DATA_FORMAT_R8G8_UNORM;106case FFX_SURFACE_FORMAT_R32_FLOAT:107return RD::DATA_FORMAT_R32_SFLOAT;108default:109return RD::DATA_FORMAT_MAX;110}111}112113static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format) {114switch (p_format) {115case RD::DATA_FORMAT_R32G32B32A32_SFLOAT:116return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT;117case RD::DATA_FORMAT_R16G16B16A16_SFLOAT:118return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT;119case RD::DATA_FORMAT_R16G16B16A16_UNORM:120return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM;121case RD::DATA_FORMAT_R32G32_SFLOAT:122return FFX_SURFACE_FORMAT_R32G32_FLOAT;123case RD::DATA_FORMAT_R32_UINT:124return FFX_SURFACE_FORMAT_R32_UINT;125case RD::DATA_FORMAT_R8G8B8A8_UNORM:126return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM;127case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32:128return FFX_SURFACE_FORMAT_R11G11B10_FLOAT;129case RD::DATA_FORMAT_R16G16_SFLOAT:130return FFX_SURFACE_FORMAT_R16G16_FLOAT;131case RD::DATA_FORMAT_R16G16_UINT:132return FFX_SURFACE_FORMAT_R16G16_UINT;133case RD::DATA_FORMAT_R16_SFLOAT:134return FFX_SURFACE_FORMAT_R16_FLOAT;135case RD::DATA_FORMAT_R16_UINT:136return FFX_SURFACE_FORMAT_R16_UINT;137case RD::DATA_FORMAT_R16_UNORM:138return FFX_SURFACE_FORMAT_R16_UNORM;139case RD::DATA_FORMAT_R16_SNORM:140return FFX_SURFACE_FORMAT_R16_SNORM;141case RD::DATA_FORMAT_R8_UNORM:142return FFX_SURFACE_FORMAT_R8_UNORM;143case RD::DATA_FORMAT_R8_UINT:144return FFX_SURFACE_FORMAT_R8_UINT;145case RD::DATA_FORMAT_R8G8_UNORM:146return FFX_SURFACE_FORMAT_R8G8_UNORM;147case RD::DATA_FORMAT_R32_SFLOAT:148return FFX_SURFACE_FORMAT_R32_FLOAT;149default:150return FFX_SURFACE_FORMAT_UNKNOWN;151}152}153154static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) {155uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;156157if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) {158ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;159}160161if (p_flags & FFX_RESOURCE_USAGE_UAV) {162ret |= RD::TEXTURE_USAGE_STORAGE_BIT;163ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT;164ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;165}166167return ret;168}169170static FfxErrorCode create_backend_context_rd(FfxFsr2Interface *p_backend_interface, FfxDevice p_device) {171FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);172173// Store pointer to the device common to all contexts.174scratch.device = p_device;175176// Create a ring buffer of uniform buffers.177// FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO.178for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) {179scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_MAX_CONST_SIZE * sizeof(uint32_t));180ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR);181}182183return FFX_OK;184}185186static FfxErrorCode get_device_capabilities_rd(FfxFsr2Interface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities, FfxDevice p_device) {187FSR2Effect::Device &effect_device = *reinterpret_cast<FSR2Effect::Device *>(p_device);188189*p_out_device_capabilities = effect_device.capabilities;190191return FFX_OK;192}193194static FfxErrorCode destroy_backend_context_rd(FfxFsr2Interface *p_backend_interface) {195FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);196197for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) {198RD::get_singleton()->free(scratch.ubo_ring_buffer[i]);199}200201return FFX_OK;202}203204static FfxErrorCode create_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxResourceInternal *p_out_resource) {205// FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own,206// so we can safely ignore it as RD does not expose this concept.207ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT);208209RenderingDevice *rd = RD::get_singleton();210FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);211FfxResourceDescription res_desc = p_create_resource_description->resourceDescription;212213// FSR2's base implementation never requests buffer creation.214ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT);215216if (res_desc.mipCount == 0) {217// Mipmap count must be derived from the resource's dimensions.218res_desc.mipCount = uint32_t(1 + std::floor(std::log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth))));219}220221Vector<PackedByteArray> initial_data;222if (p_create_resource_description->initDataSize) {223PackedByteArray byte_array;224byte_array.resize(p_create_resource_description->initDataSize);225memcpy(byte_array.ptrw(), p_create_resource_description->initData, p_create_resource_description->initDataSize);226initial_data.push_back(byte_array);227}228229RD::TextureFormat texture_format;230texture_format.texture_type = ffx_resource_type_to_rd_texture_type(res_desc.type);231texture_format.format = ffx_surface_format_to_rd_format(res_desc.format);232texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->usage);233texture_format.width = res_desc.width;234texture_format.height = res_desc.height;235texture_format.depth = res_desc.depth;236texture_format.mipmaps = res_desc.mipCount;237texture_format.is_discardable = true;238239RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data);240ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR);241242rd->set_resource_name(texture, String(p_create_resource_description->name));243244// Add the resource to the storage and use the internal index to reference it.245p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc);246247return FFX_OK;248}249250static FfxErrorCode register_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxResource *p_in_resource, FfxResourceInternal *p_out_resource) {251if (p_in_resource->resource == nullptr) {252// Null resource case.253p_out_resource->internalIndex = -1;254return FFX_OK;255}256257FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);258const RID &rid = *reinterpret_cast<const RID *>(p_in_resource->resource);259ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);260261// Add the resource to the storage and use the internal index to reference it.262p_out_resource->internalIndex = scratch.resources.add(rid, true, FSR2Context::RESOURCE_ID_DYNAMIC, p_in_resource->description);263264return FFX_OK;265}266267static FfxErrorCode unregister_resources_rd(FfxFsr2Interface *p_backend_interface) {268FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);269LocalVector<uint32_t> dynamic_list_copy = scratch.resources.dynamic_list;270for (uint32_t i : dynamic_list_copy) {271scratch.resources.remove(i);272}273274return FFX_OK;275}276277static FfxResourceDescription get_resource_description_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) {278if (p_resource.internalIndex != -1) {279FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);280return scratch.resources.descriptions[p_resource.internalIndex];281} else {282return {};283}284}285286static FfxErrorCode destroy_resource_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) {287if (p_resource.internalIndex != -1) {288FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);289if (scratch.resources.rids[p_resource.internalIndex].is_valid()) {290RD::get_singleton()->free(scratch.resources.rids[p_resource.internalIndex]);291scratch.resources.remove(p_resource.internalIndex);292}293}294295return FFX_OK;296}297298static FfxErrorCode create_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxFsr2Pass p_pass, const FfxPipelineDescription *p_pipeline_description, FfxPipelineState *p_out_pipeline) {299FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);300FSR2Effect::Device &device = *reinterpret_cast<FSR2Effect::Device *>(scratch.device);301FSR2Effect::Pass &effect_pass = device.passes[p_pass];302303if (effect_pass.pipeline.pipeline_rid.is_null()) {304// Create pipeline for the device if it hasn't been created yet.305effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant);306ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);307308effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid);309ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR);310}311312// While this is not their intended use, we use the pipeline and root signature pointers to store the313// RIDs to the pipeline and shader that RD needs for the compute pipeline.314p_out_pipeline->pipeline = reinterpret_cast<FfxPipeline>(&effect_pass.pipeline);315p_out_pipeline->rootSignature = reinterpret_cast<FfxRootSignature>(&effect_pass.root_signature);316317p_out_pipeline->srvCount = effect_pass.sampled_bindings.size();318ERR_FAIL_COND_V(p_out_pipeline->srvCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE);319memcpy(p_out_pipeline->srvResourceBindings, effect_pass.sampled_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvCount);320321p_out_pipeline->uavCount = effect_pass.storage_bindings.size();322ERR_FAIL_COND_V(p_out_pipeline->uavCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE);323memcpy(p_out_pipeline->uavResourceBindings, effect_pass.storage_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavCount);324325p_out_pipeline->constCount = effect_pass.uniform_bindings.size();326ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE);327memcpy(p_out_pipeline->cbResourceBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount);328329bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0;330331if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) {332// Change the binding for motion vectors in this particular pass if low resolution MVs are used.333if (low_resolution_mvs) {334FfxResourceBinding &binding = p_out_pipeline->srvResourceBindings[2];335wcscpy_s(binding.name, L"r_dilated_motion_vectors");336}337}338339return FFX_OK;340}341342static FfxErrorCode destroy_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxPipelineState *p_pipeline) {343// We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed.344345return FFX_OK;346}347348static FfxErrorCode schedule_gpu_job_rd(FfxFsr2Interface *p_backend_interface, const FfxGpuJobDescription *p_job) {349ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);350ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT);351352FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);353scratch.gpu_jobs.push_back(*p_job);354355return FFX_OK;356}357358static FfxErrorCode execute_gpu_job_clear_float_rd(FSR2Context::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job) {359RID resource = p_scratch.resources.rids[p_job.target.internalIndex];360FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex];361362ERR_FAIL_COND_V(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);363364Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]);365RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1);366367return FFX_OK;368}369370static FfxErrorCode execute_gpu_job_copy_rd(FSR2Context::Scratch &p_scratch, const FfxCopyJobDescription &p_job) {371RID src = p_scratch.resources.rids[p_job.src.internalIndex];372RID dst = p_scratch.resources.rids[p_job.dst.internalIndex];373FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex];374FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex];375376ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);377ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT);378379for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) {380RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0);381}382383return FFX_OK;384}385386static FfxErrorCode execute_gpu_job_compute_rd(FSR2Context::Scratch &p_scratch, const FfxComputeJobDescription &p_job) {387UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton();388ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR);389390FSR2Effect::RootSignature &root_signature = *reinterpret_cast<FSR2Effect::RootSignature *>(p_job.pipeline.rootSignature);391ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);392393FSR2Effect::Pipeline &backend_pipeline = *reinterpret_cast<FSR2Effect::Pipeline *>(p_job.pipeline.pipeline);394ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT);395396thread_local LocalVector<RD::Uniform> compute_uniforms;397compute_uniforms.clear();398399for (uint32_t i = 0; i < p_job.pipeline.srvCount; i++) {400RID texture_rid = p_scratch.resources.rids[p_job.srvs[i].internalIndex];401RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvResourceBindings[i].slotIndex, texture_rid);402compute_uniforms.push_back(texture_uniform);403}404405for (uint32_t i = 0; i < p_job.pipeline.uavCount; i++) {406RID image_rid = p_scratch.resources.rids[p_job.uavs[i].internalIndex];407RD::Uniform storage_uniform;408storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE;409storage_uniform.binding = p_job.pipeline.uavResourceBindings[i].slotIndex;410411if (p_job.uavMip[i] > 0) {412LocalVector<RID> &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavs[i].internalIndex];413if (mip_slice_rids.is_empty()) {414mip_slice_rids.resize(p_scratch.resources.descriptions[p_job.uavs[i].internalIndex].mipCount);415}416417ERR_FAIL_COND_V(p_job.uavMip[i] >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT);418419if (mip_slice_rids[p_job.uavMip[i]].is_null()) {420mip_slice_rids[p_job.uavMip[i]] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavMip[i]);421}422423ERR_FAIL_COND_V(mip_slice_rids[p_job.uavMip[i]].is_null(), FFX_ERROR_BACKEND_API_ERROR);424425storage_uniform.append_id(mip_slice_rids[p_job.uavMip[i]]);426} else {427storage_uniform.append_id(image_rid);428}429430compute_uniforms.push_back(storage_uniform);431}432433for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) {434RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index];435p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FSR2_UBO_RING_BUFFER_SIZE;436437RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data);438439RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.cbResourceBindings[i].slotIndex, buffer_rid);440compute_uniforms.push_back(buffer_uniform);441}442443FSR2Effect::Device &device = *reinterpret_cast<FSR2Effect::Device *>(p_scratch.device);444RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 0, device.point_clamp_sampler);445RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 1, device.linear_clamp_sampler);446447RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();448RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid);449RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(root_signature.shader_rid, 0, u_point_clamp_sampler, u_linear_clamp_sampler), 0);450RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 1, compute_uniforms), 1);451RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]);452RD::get_singleton()->compute_list_end();453454return FFX_OK;455}456457static FfxErrorCode execute_gpu_jobs_rd(FfxFsr2Interface *p_backend_interface, FfxCommandList p_command_list) {458ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT);459460FSR2Context::Scratch &scratch = *reinterpret_cast<FSR2Context::Scratch *>(p_backend_interface->scratchBuffer);461FfxErrorCode error_code = FFX_OK;462for (const FfxGpuJobDescription &job : scratch.gpu_jobs) {463switch (job.jobType) {464case FFX_GPU_JOB_CLEAR_FLOAT: {465error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor);466} break;467case FFX_GPU_JOB_COPY: {468error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor);469} break;470case FFX_GPU_JOB_COMPUTE: {471error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor);472} break;473default: {474error_code = FFX_ERROR_INVALID_ARGUMENT;475} break;476}477478if (error_code != FFX_OK) {479scratch.gpu_jobs.clear();480return error_code;481}482}483484scratch.gpu_jobs.clear();485486return FFX_OK;487}488489static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name) {490FfxResource res = {};491if (p_rid->is_null()) {492return res;493}494495wcscpy_s(res.name, p_name);496497RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid);498res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type);499res.description.format = rd_format_to_ffx_surface_format(texture_format.format);500res.description.width = texture_format.width;501res.description.height = texture_format.height;502res.description.depth = texture_format.depth;503res.description.mipCount = texture_format.mipmaps;504res.description.flags = FFX_RESOURCE_FLAGS_NONE;505res.resource = reinterpret_cast<void *>(p_rid);506res.isDepth = texture_format.usage_bits & RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;507508return res;509}510511FSR2Context::~FSR2Context() {512ffxFsr2ContextDestroy(&fsr_context);513}514515FSR2Effect::FSR2Effect() {516FfxDeviceCapabilities &capabilities = device.capabilities;517capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1;518capabilities.waveLaneCountMin = 32;519capabilities.waveLaneCountMax = 32;520capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_HALF_FLOAT);521capabilities.raytracingSupported = false;522523String general_defines =524"\n#define FFX_GPU\n"525"\n#define FFX_GLSL 1\n"526"\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"527"\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n"528"\n#define FFX_FSR2_OPTION_INVERTED_DEPTH 1\n"529"\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"530"\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";531532Vector<String> modes_single;533modes_single.push_back("");534535Vector<String> modes_with_fp16;536modes_with_fp16.push_back("");537modes_with_fp16.push_back("\n#define FFX_HALF 1\n");538539// Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and540// there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL541// files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names.542//543// It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these544// bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an545// error if the bindings do not match.546547{548Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP];549pass.shader = &shaders.depth_clip;550pass.shader->initialize(modes_with_fp16, general_defines);551pass.shader_version = pass.shader->version_create();552pass.shader_variant = capabilities.fp16Supported ? 1 : 0;553554pass.sampled_bindings = {555FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" },556FfxResourceBinding{ 1, 0, L"r_dilated_motion_vectors" },557FfxResourceBinding{ 2, 0, L"r_dilatedDepth" },558FfxResourceBinding{ 3, 0, L"r_reactive_mask" },559FfxResourceBinding{ 4, 0, L"r_transparency_and_composition_mask" },560FfxResourceBinding{ 6, 0, L"r_previous_dilated_motion_vectors" },561FfxResourceBinding{ 7, 0, L"r_input_motion_vectors" },562FfxResourceBinding{ 8, 0, L"r_input_color_jittered" },563FfxResourceBinding{ 9, 0, L"r_input_depth" },564FfxResourceBinding{ 10, 0, L"r_input_exposure" }565};566567pass.storage_bindings = {568// FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything.569FfxResourceBinding{ 12, 0, L"rw_dilated_reactive_masks" },570FfxResourceBinding{ 13, 0, L"rw_prepared_input_color" }571};572573pass.uniform_bindings = {574FfxResourceBinding{ 14, 0, L"cbFSR2" }575};576}577578{579Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH];580pass.shader = &shaders.reconstruct_previous_depth;581pass.shader->initialize(modes_with_fp16, general_defines);582pass.shader_version = pass.shader->version_create();583pass.shader_variant = capabilities.fp16Supported ? 1 : 0;584585pass.sampled_bindings = {586FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" },587FfxResourceBinding{ 1, 0, L"r_input_depth" },588FfxResourceBinding{ 2, 0, L"r_input_color_jittered" },589FfxResourceBinding{ 3, 0, L"r_input_exposure" },590FfxResourceBinding{ 4, 0, L"r_luma_history" }591};592593pass.storage_bindings = {594FfxResourceBinding{ 5, 0, L"rw_reconstructed_previous_nearest_depth" },595FfxResourceBinding{ 6, 0, L"rw_dilated_motion_vectors" },596FfxResourceBinding{ 7, 0, L"rw_dilatedDepth" },597FfxResourceBinding{ 8, 0, L"rw_prepared_input_color" },598FfxResourceBinding{ 9, 0, L"rw_luma_history" },599// FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything.600FfxResourceBinding{ 11, 0, L"rw_lock_input_luma" }601};602603pass.uniform_bindings = {604FfxResourceBinding{ 12, 0, L"cbFSR2" }605};606}607608{609Pass &pass = device.passes[FFX_FSR2_PASS_LOCK];610pass.shader = &shaders.lock;611pass.shader->initialize(modes_with_fp16, general_defines);612pass.shader_version = pass.shader->version_create();613pass.shader_variant = capabilities.fp16Supported ? 1 : 0;614615pass.sampled_bindings = {616FfxResourceBinding{ 0, 0, L"r_lock_input_luma" }617};618619pass.storage_bindings = {620FfxResourceBinding{ 1, 0, L"rw_new_locks" },621FfxResourceBinding{ 2, 0, L"rw_reconstructed_previous_nearest_depth" }622};623624pass.uniform_bindings = {625FfxResourceBinding{ 3, 0, L"cbFSR2" }626};627}628629{630Vector<String> accumulate_modes_with_fp16;631accumulate_modes_with_fp16.push_back("\n");632accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");633accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");634accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");635636// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.637const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA";638Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE];639pass.shader = &shaders.accumulate;640pass.shader->initialize(accumulate_modes_with_fp16, general_defines);641pass.shader_version = pass.shader->version_create();642pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0;643644pass.sampled_bindings = {645FfxResourceBinding{ 0, 0, L"r_input_exposure" },646FfxResourceBinding{ 1, 0, L"r_dilated_reactive_masks" },647FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" },648FfxResourceBinding{ 3, 0, L"r_internal_upscaled_color" },649FfxResourceBinding{ 4, 0, L"r_lock_status" },650FfxResourceBinding{ 5, 0, L"r_input_depth" },651FfxResourceBinding{ 6, 0, L"r_prepared_input_color" },652// FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything.653FfxResourceBinding{ 8, 0, L"r_lanczos_lut" },654FfxResourceBinding{ 9, 0, L"r_upsample_maximum_bias_lut" },655FfxResourceBinding{ 10, 0, L"r_imgMips" },656FfxResourceBinding{ 11, 0, L"r_auto_exposure" },657FfxResourceBinding{ 12, 0, L"r_luma_history" }658};659660pass.storage_bindings = {661FfxResourceBinding{ 13, 0, L"rw_internal_upscaled_color" },662FfxResourceBinding{ 14, 0, L"rw_lock_status" },663FfxResourceBinding{ 15, 0, L"rw_upscaled_output" },664FfxResourceBinding{ 16, 0, L"rw_new_locks" },665FfxResourceBinding{ 17, 0, L"rw_luma_history" }666};667668pass.uniform_bindings = {669FfxResourceBinding{ 18, 0, L"cbFSR2" }670};671672// Sharpen pass is a clone of the accumulate pass with the sharpening variant.673Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN];674sharpen_pass = pass;675sharpen_pass.shader_variant = pass.shader_variant + 1;676}677678{679Pass &pass = device.passes[FFX_FSR2_PASS_RCAS];680pass.shader = &shaders.rcas;681pass.shader->initialize(modes_single, general_defines);682pass.shader_version = pass.shader->version_create();683684pass.sampled_bindings = {685FfxResourceBinding{ 0, 0, L"r_input_exposure" },686FfxResourceBinding{ 1, 0, L"r_rcas_input" }687};688689pass.storage_bindings = {690FfxResourceBinding{ 2, 0, L"rw_upscaled_output" }691};692693pass.uniform_bindings = {694FfxResourceBinding{ 3, 0, L"cbFSR2" },695FfxResourceBinding{ 4, 0, L"cbRCAS" }696};697}698699{700Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID];701pass.shader = &shaders.compute_luminance_pyramid;702pass.shader->initialize(modes_single, general_defines);703pass.shader_version = pass.shader->version_create();704705pass.sampled_bindings = {706FfxResourceBinding{ 0, 0, L"r_input_color_jittered" }707};708709pass.storage_bindings = {710FfxResourceBinding{ 1, 0, L"rw_spd_global_atomic" },711FfxResourceBinding{ 2, 0, L"rw_img_mip_shading_change" },712FfxResourceBinding{ 3, 0, L"rw_img_mip_5" },713FfxResourceBinding{ 4, 0, L"rw_auto_exposure" }714};715716pass.uniform_bindings = {717FfxResourceBinding{ 5, 0, L"cbFSR2" },718FfxResourceBinding{ 6, 0, L"cbSPD" }719};720}721722{723Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE];724pass.shader = &shaders.autogen_reactive;725pass.shader->initialize(modes_with_fp16, general_defines);726pass.shader_version = pass.shader->version_create();727pass.shader_variant = capabilities.fp16Supported ? 1 : 0;728729pass.sampled_bindings = {730FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },731FfxResourceBinding{ 1, 0, L"r_input_color_jittered" }732};733734pass.storage_bindings = {735FfxResourceBinding{ 2, 0, L"rw_output_autoreactive" }736};737738pass.uniform_bindings = {739FfxResourceBinding{ 3, 0, L"cbGenerateReactive" },740FfxResourceBinding{ 4, 0, L"cbFSR2" }741};742}743744{745Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE];746pass.shader = &shaders.tcr_autogen;747pass.shader->initialize(modes_with_fp16, general_defines);748pass.shader_version = pass.shader->version_create();749pass.shader_variant = capabilities.fp16Supported ? 1 : 0;750751pass.sampled_bindings = {752FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },753FfxResourceBinding{ 1, 0, L"r_input_color_jittered" },754FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" },755FfxResourceBinding{ 3, 0, L"r_input_prev_color_pre_alpha" },756FfxResourceBinding{ 4, 0, L"r_input_prev_color_post_alpha" },757FfxResourceBinding{ 5, 0, L"r_reactive_mask" },758FfxResourceBinding{ 6, 0, L"r_transparency_and_composition_mask" },759FfxResourceBinding{ 13, 0, L"r_input_depth" }760};761762pass.storage_bindings = {763FfxResourceBinding{ 7, 0, L"rw_output_autoreactive" },764FfxResourceBinding{ 8, 0, L"rw_output_autocomposition" },765FfxResourceBinding{ 9, 0, L"rw_output_prev_color_pre_alpha" },766FfxResourceBinding{ 10, 0, L"rw_output_prev_color_post_alpha" }767};768769pass.uniform_bindings = {770FfxResourceBinding{ 11, 0, L"cbFSR2" },771FfxResourceBinding{ 12, 0, L"cbGenerateReactive" }772};773}774775RD::SamplerState state;776state.mag_filter = RD::SAMPLER_FILTER_NEAREST;777state.min_filter = RD::SAMPLER_FILTER_NEAREST;778state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;779state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;780state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;781state.min_lod = -1000.0f;782state.max_lod = 1000.0f;783state.anisotropy_max = 1.0;784device.point_clamp_sampler = RD::get_singleton()->sampler_create(state);785ERR_FAIL_COND(device.point_clamp_sampler.is_null());786787state.mag_filter = RD::SAMPLER_FILTER_LINEAR;788state.min_filter = RD::SAMPLER_FILTER_LINEAR;789device.linear_clamp_sampler = RD::get_singleton()->sampler_create(state);790ERR_FAIL_COND(device.linear_clamp_sampler.is_null());791}792793FSR2Effect::~FSR2Effect() {794RD::get_singleton()->free(device.point_clamp_sampler);795RD::get_singleton()->free(device.linear_clamp_sampler);796797for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) {798device.passes[i].shader->version_free(device.passes[i].shader_version);799}800}801802FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size) {803FSR2Context *context = memnew(RendererRD::FSR2Context);804context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR2_ENABLE_DEPTH_INVERTED;805context->fsr_desc.maxRenderSize.width = p_internal_size.x;806context->fsr_desc.maxRenderSize.height = p_internal_size.y;807context->fsr_desc.displaySize.width = p_target_size.x;808context->fsr_desc.displaySize.height = p_target_size.y;809context->fsr_desc.device = &device;810811FfxFsr2Interface &functions = context->fsr_desc.callbacks;812functions.fpCreateBackendContext = create_backend_context_rd;813functions.fpGetDeviceCapabilities = get_device_capabilities_rd;814functions.fpDestroyBackendContext = destroy_backend_context_rd;815functions.fpCreateResource = create_resource_rd;816functions.fpRegisterResource = register_resource_rd;817functions.fpUnregisterResources = unregister_resources_rd;818functions.fpGetResourceDescription = get_resource_description_rd;819functions.fpDestroyResource = destroy_resource_rd;820functions.fpCreatePipeline = create_pipeline_rd;821functions.fpDestroyPipeline = destroy_pipeline_rd;822functions.fpScheduleGpuJob = schedule_gpu_job_rd;823functions.fpExecuteGpuJobs = execute_gpu_jobs_rd;824functions.scratchBuffer = &context->scratch;825functions.scratchBufferSize = sizeof(context->scratch);826827FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc);828if (result == FFX_OK) {829return context;830} else {831memdelete(context);832return nullptr;833}834}835836void FSR2Effect::upscale(const Parameters &p_params) {837// TODO: Transparency & Composition mask is not implemented.838FfxFsr2DispatchDescription dispatch_desc = {};839RID color = p_params.color;840RID depth = p_params.depth;841RID velocity = p_params.velocity;842RID reactive = p_params.reactive;843RID exposure = p_params.exposure;844RID output = p_params.output;845dispatch_desc.commandList = nullptr;846dispatch_desc.color = get_resource_rd(&color, L"color");847dispatch_desc.depth = get_resource_rd(&depth, L"depth");848dispatch_desc.motionVectors = get_resource_rd(&velocity, L"velocity");849dispatch_desc.reactive = get_resource_rd(&reactive, L"reactive");850dispatch_desc.exposure = get_resource_rd(&exposure, L"exposure");851dispatch_desc.transparencyAndComposition = {};852dispatch_desc.output = get_resource_rd(&output, L"output");853dispatch_desc.colorOpaqueOnly = {};854dispatch_desc.jitterOffset.x = p_params.jitter.x;855dispatch_desc.jitterOffset.y = p_params.jitter.y;856dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width);857dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height);858dispatch_desc.reset = p_params.reset_accumulation;859dispatch_desc.renderSize.width = p_params.internal_size.width;860dispatch_desc.renderSize.height = p_params.internal_size.height;861dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f);862dispatch_desc.sharpness = p_params.sharpness;863dispatch_desc.frameTimeDelta = p_params.delta_time;864dispatch_desc.preExposure = 1.0f;865dispatch_desc.cameraNear = p_params.z_near;866dispatch_desc.cameraFar = p_params.z_far;867dispatch_desc.cameraFovAngleVertical = p_params.fovy;868dispatch_desc.viewSpaceToMetersFactor = 1.0f;869dispatch_desc.enableAutoReactive = false;870dispatch_desc.autoTcThreshold = 1.0f;871dispatch_desc.autoTcScale = 1.0f;872dispatch_desc.autoReactiveScale = 1.0f;873dispatch_desc.autoReactiveMax = 1.0f;874875RendererRD::MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix);876877FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc);878ERR_FAIL_COND(result != FFX_OK);879}880881882