Path: blob/master/servers/rendering/renderer_rd/cluster_builder_rd.h
10278 views
/**************************************************************************/1/* cluster_builder_rd.h */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#pragma once3132#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h"33#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h"34#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h"35#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"3637class ClusterBuilderSharedDataRD {38friend class ClusterBuilderRD;3940RID sphere_vertex_buffer;41RID sphere_vertex_array;42RID sphere_index_buffer;43RID sphere_index_array;44float sphere_overfit = 0.0; // Because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area.4546RID cone_vertex_buffer;47RID cone_vertex_array;48RID cone_index_buffer;49RID cone_index_array;50float cone_overfit = 0.0; // Because an cone mesh is not a perfect cone, we need to enlarge it to cover the actual cone area.5152RID box_vertex_buffer;53RID box_vertex_array;54RID box_index_buffer;55RID box_index_array;5657enum Divisor {58DIVISOR_1,59DIVISOR_2,60DIVISOR_4,61};6263struct ClusterRender {64struct PushConstant {65uint32_t base_index;66uint32_t pad0;67uint32_t pad1;68uint32_t pad2;69};7071ClusterRenderShaderRD cluster_render_shader;72RID shader_version;73RID shader;7475enum ShaderVariant {76SHADER_NORMAL,77SHADER_USE_ATTACHMENT,78SHADER_NORMAL_MOLTENVK,79SHADER_USE_ATTACHMENT_MOLTENVK,80SHADER_NORMAL_NO_ATOMICS,81SHADER_USE_ATTACHMENT_NO_ATOMICS,82};8384enum PipelineVersion {85PIPELINE_NORMAL,86PIPELINE_MSAA,87PIPELINE_MAX88};8990RID shader_pipelines[PIPELINE_MAX];91} cluster_render;9293struct ClusterStore {94struct PushConstant {95uint32_t cluster_render_data_size; // how much data for a single cluster takes96uint32_t max_render_element_count_div_32; // divided by 3297uint32_t cluster_screen_size[2];98uint32_t render_element_count_div_32; // divided by 3299uint32_t max_cluster_element_count_div_32; // divided by 32100101uint32_t pad1;102uint32_t pad2;103};104105ClusterStoreShaderRD cluster_store_shader;106RID shader_version;107RID shader;108RID shader_pipeline;109} cluster_store;110111struct ClusterDebug {112struct PushConstant {113uint32_t screen_size[2];114uint32_t cluster_screen_size[2];115116uint32_t cluster_shift;117uint32_t cluster_type;118float z_near;119float z_far;120121uint32_t orthogonal;122uint32_t max_cluster_element_count_div_32;123124uint32_t pad1;125uint32_t pad2;126};127128ClusterDebugShaderRD cluster_debug_shader;129RID shader_version;130RID shader;131RID shader_pipeline;132} cluster_debug;133134public:135ClusterBuilderSharedDataRD();136~ClusterBuilderSharedDataRD();137};138139class ClusterBuilderRD {140public:141static constexpr float WIDE_SPOT_ANGLE_THRESHOLD_DEG = 60.0f;142143enum LightType {144LIGHT_TYPE_OMNI,145LIGHT_TYPE_SPOT146};147148enum BoxType {149BOX_TYPE_REFLECTION_PROBE,150BOX_TYPE_DECAL,151};152153enum ElementType {154ELEMENT_TYPE_OMNI_LIGHT,155ELEMENT_TYPE_SPOT_LIGHT,156ELEMENT_TYPE_DECAL,157ELEMENT_TYPE_REFLECTION_PROBE,158ELEMENT_TYPE_MAX,159};160161private:162ClusterBuilderSharedDataRD *shared = nullptr;163164struct RenderElementData {165uint32_t type; // 0-4166uint32_t touches_near;167uint32_t touches_far;168uint32_t original_index;169float transform_inv[12]; // Transposed transform for less space.170float scale[3];171uint32_t has_wide_spot_angle;172}; // Keep aligned to 32 bytes.173174uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {};175uint32_t max_elements_by_type = 0;176177RenderElementData *render_elements = nullptr;178uint32_t render_element_count = 0;179uint32_t render_element_max = 0;180181Transform3D view_xform;182Projection adjusted_projection;183Projection projection;184float z_far = 0;185float z_near = 0;186bool camera_orthogonal = false;187188enum Divisor {189DIVISOR_1,190DIVISOR_2,191DIVISOR_4,192};193194uint32_t cluster_size = 32;195#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)196// Results in visual artifacts on macOS and iOS/visionOS when using MSAA and subgroups.197// Using subgroups and disabling MSAA is the optimal solution for now and also works198// with MoltenVK.199bool use_msaa = false;200#else201bool use_msaa = true;202#endif203Divisor divisor = DIVISOR_4;204205Size2i screen_size;206Size2i cluster_screen_size;207208RID framebuffer;209RID cluster_render_buffer; // Used for creating.210RID cluster_buffer; // Used for rendering.211RID element_buffer; // Used for storing, to hint element touches far plane or near plane.212uint32_t cluster_render_buffer_size = 0;213uint32_t cluster_buffer_size = 0;214215RID cluster_render_uniform_set;216RID cluster_store_uniform_set;217218// Persistent data.219220void _clear();221222struct StateUniform {223float projection[16];224float inv_z_far;225uint32_t screen_to_clusters_shift; // Shift to obtain coordinates in block indices.226uint32_t cluster_screen_width;227uint32_t cluster_data_size; // How much data is needed for a single cluster.228uint32_t cluster_depth_offset;229230uint32_t pad0;231uint32_t pad1;232uint32_t pad2;233};234235RID state_uniform;236237RID debug_uniform_set;238239public:240void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer);241242void begin(const Transform3D &p_view_transform, const Projection &p_cam_projection, bool p_flip_y);243244_FORCE_INLINE_ void add_light(LightType p_type, const Transform3D &p_transform, float p_radius, float p_spot_aperture) {245if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) {246return; // Max number elements reached.247}248if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) {249return; // Max number elements reached.250}251252RenderElementData &e = render_elements[render_element_count];253254Transform3D xform = view_xform * p_transform;255256float radius = xform.basis.get_uniform_scale();257if (radius < 0.98 || radius > 1.02) {258xform.basis.orthonormalize();259}260261radius *= p_radius;262263// Spotlights with wide angle are trated as Omni lights.264// If the spot angle is above the threshold, we need a sphere instead of a cone for building the clusters265// since the cone gets too flat/large (spot angle close to 90 degrees) or266// can't even cover the affected area of the light (spot angle above 90 degrees).267if (p_type == LIGHT_TYPE_OMNI || (p_type == LIGHT_TYPE_SPOT && p_spot_aperture > WIDE_SPOT_ANGLE_THRESHOLD_DEG)) {268radius *= shared->sphere_overfit; // Overfit icosphere.269270float depth = -xform.origin.z;271if (camera_orthogonal) {272e.touches_near = (depth - radius) < z_near;273} else {274// Contains camera inside light.275float radius2 = radius * shared->sphere_overfit; // Overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex)276e.touches_near = xform.origin.length_squared() < radius2 * radius2;277}278279e.touches_far = (depth + radius) > z_far;280e.scale[0] = radius;281e.scale[1] = radius;282e.scale[2] = radius;283if (p_type == LIGHT_TYPE_OMNI) {284e.type = ELEMENT_TYPE_OMNI_LIGHT;285e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT];286cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++;287} else { // LIGHT_TYPE_SPOT with wide angle.288e.type = ELEMENT_TYPE_SPOT_LIGHT;289e.has_wide_spot_angle = true;290e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT];291cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;292}293294RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);295296} else /*LIGHT_TYPE_SPOT with no wide angle*/ {297radius *= shared->cone_overfit; // Overfit cone.298299real_t len = Math::tan(Math::deg_to_rad(p_spot_aperture)) * radius;300// Approximate, probably better to use a cone support function.301float max_d = -1e20;302float min_d = 1e20;303#define CONE_MINMAX(m_x, m_y) \304{ \305float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \306min_d = MIN(d, min_d); \307max_d = MAX(d, max_d); \308}309310CONE_MINMAX(1, 1);311CONE_MINMAX(-1, 1);312CONE_MINMAX(-1, -1);313CONE_MINMAX(1, -1);314315if (camera_orthogonal) {316e.touches_near = min_d < z_near;317} else {318Plane base_plane(-xform.basis.get_column(Vector3::AXIS_Z), xform.origin);319float dist = base_plane.distance_to(Vector3());320if (dist >= 0 && dist < radius) {321// Contains camera inside light, check angle.322float angle = Math::rad_to_deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_column(Vector3::AXIS_Z))));323e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit324} else {325e.touches_near = false;326}327}328329e.touches_far = max_d > z_far;330e.scale[0] = len * shared->cone_overfit;331e.scale[1] = len * shared->cone_overfit;332e.scale[2] = radius;333e.has_wide_spot_angle = false;334e.type = ELEMENT_TYPE_SPOT_LIGHT;335e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT];336337RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);338339cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;340}341342render_element_count++;343}344345_FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform3D &p_transform, const Vector3 &p_half_size) {346if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) {347return; // Max number elements reached.348}349if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) {350return; // Max number elements reached.351}352353RenderElementData &e = render_elements[render_element_count];354Transform3D xform = view_xform * p_transform;355356// Extract scale and scale the matrix by it, makes things simpler.357Vector3 scale = p_half_size;358for (uint32_t i = 0; i < 3; i++) {359float s = xform.basis.rows[i].length();360scale[i] *= s;361xform.basis.rows[i] /= s;362};363364float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale));365float depth = -xform.origin.z;366367if (camera_orthogonal) {368e.touches_near = depth - box_depth < z_near;369} else {370// Contains camera inside box.371Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs();372e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z;373}374375e.touches_far = depth + box_depth > z_far;376377e.scale[0] = scale.x;378e.scale[1] = scale.y;379e.scale[2] = scale.z;380381e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE;382e.original_index = cluster_count_by_type[e.type];383384RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);385386cluster_count_by_type[e.type]++;387render_element_count++;388}389390void bake_cluster();391void debug(ElementType p_element);392393RID get_cluster_buffer() const;394uint32_t get_cluster_size() const;395uint32_t get_max_cluster_elements() const;396397void set_shared(ClusterBuilderSharedDataRD *p_shared);398399ClusterBuilderRD();400~ClusterBuilderRD();401};402403404