Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/servers/rendering/renderer_rd/cluster_builder_rd.h
10278 views
1
/**************************************************************************/
2
/* cluster_builder_rd.h */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#pragma once
32
33
#include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h"
34
#include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h"
35
#include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h"
36
#include "servers/rendering/renderer_rd/storage_rd/material_storage.h"
37
38
class ClusterBuilderSharedDataRD {
39
friend class ClusterBuilderRD;
40
41
RID sphere_vertex_buffer;
42
RID sphere_vertex_array;
43
RID sphere_index_buffer;
44
RID sphere_index_array;
45
float sphere_overfit = 0.0; // Because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area.
46
47
RID cone_vertex_buffer;
48
RID cone_vertex_array;
49
RID cone_index_buffer;
50
RID cone_index_array;
51
float cone_overfit = 0.0; // Because an cone mesh is not a perfect cone, we need to enlarge it to cover the actual cone area.
52
53
RID box_vertex_buffer;
54
RID box_vertex_array;
55
RID box_index_buffer;
56
RID box_index_array;
57
58
enum Divisor {
59
DIVISOR_1,
60
DIVISOR_2,
61
DIVISOR_4,
62
};
63
64
struct ClusterRender {
65
struct PushConstant {
66
uint32_t base_index;
67
uint32_t pad0;
68
uint32_t pad1;
69
uint32_t pad2;
70
};
71
72
ClusterRenderShaderRD cluster_render_shader;
73
RID shader_version;
74
RID shader;
75
76
enum ShaderVariant {
77
SHADER_NORMAL,
78
SHADER_USE_ATTACHMENT,
79
SHADER_NORMAL_MOLTENVK,
80
SHADER_USE_ATTACHMENT_MOLTENVK,
81
SHADER_NORMAL_NO_ATOMICS,
82
SHADER_USE_ATTACHMENT_NO_ATOMICS,
83
};
84
85
enum PipelineVersion {
86
PIPELINE_NORMAL,
87
PIPELINE_MSAA,
88
PIPELINE_MAX
89
};
90
91
RID shader_pipelines[PIPELINE_MAX];
92
} cluster_render;
93
94
struct ClusterStore {
95
struct PushConstant {
96
uint32_t cluster_render_data_size; // how much data for a single cluster takes
97
uint32_t max_render_element_count_div_32; // divided by 32
98
uint32_t cluster_screen_size[2];
99
uint32_t render_element_count_div_32; // divided by 32
100
uint32_t max_cluster_element_count_div_32; // divided by 32
101
102
uint32_t pad1;
103
uint32_t pad2;
104
};
105
106
ClusterStoreShaderRD cluster_store_shader;
107
RID shader_version;
108
RID shader;
109
RID shader_pipeline;
110
} cluster_store;
111
112
struct ClusterDebug {
113
struct PushConstant {
114
uint32_t screen_size[2];
115
uint32_t cluster_screen_size[2];
116
117
uint32_t cluster_shift;
118
uint32_t cluster_type;
119
float z_near;
120
float z_far;
121
122
uint32_t orthogonal;
123
uint32_t max_cluster_element_count_div_32;
124
125
uint32_t pad1;
126
uint32_t pad2;
127
};
128
129
ClusterDebugShaderRD cluster_debug_shader;
130
RID shader_version;
131
RID shader;
132
RID shader_pipeline;
133
} cluster_debug;
134
135
public:
136
ClusterBuilderSharedDataRD();
137
~ClusterBuilderSharedDataRD();
138
};
139
140
class ClusterBuilderRD {
141
public:
142
static constexpr float WIDE_SPOT_ANGLE_THRESHOLD_DEG = 60.0f;
143
144
enum LightType {
145
LIGHT_TYPE_OMNI,
146
LIGHT_TYPE_SPOT
147
};
148
149
enum BoxType {
150
BOX_TYPE_REFLECTION_PROBE,
151
BOX_TYPE_DECAL,
152
};
153
154
enum ElementType {
155
ELEMENT_TYPE_OMNI_LIGHT,
156
ELEMENT_TYPE_SPOT_LIGHT,
157
ELEMENT_TYPE_DECAL,
158
ELEMENT_TYPE_REFLECTION_PROBE,
159
ELEMENT_TYPE_MAX,
160
};
161
162
private:
163
ClusterBuilderSharedDataRD *shared = nullptr;
164
165
struct RenderElementData {
166
uint32_t type; // 0-4
167
uint32_t touches_near;
168
uint32_t touches_far;
169
uint32_t original_index;
170
float transform_inv[12]; // Transposed transform for less space.
171
float scale[3];
172
uint32_t has_wide_spot_angle;
173
}; // Keep aligned to 32 bytes.
174
175
uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {};
176
uint32_t max_elements_by_type = 0;
177
178
RenderElementData *render_elements = nullptr;
179
uint32_t render_element_count = 0;
180
uint32_t render_element_max = 0;
181
182
Transform3D view_xform;
183
Projection adjusted_projection;
184
Projection projection;
185
float z_far = 0;
186
float z_near = 0;
187
bool camera_orthogonal = false;
188
189
enum Divisor {
190
DIVISOR_1,
191
DIVISOR_2,
192
DIVISOR_4,
193
};
194
195
uint32_t cluster_size = 32;
196
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
197
// Results in visual artifacts on macOS and iOS/visionOS when using MSAA and subgroups.
198
// Using subgroups and disabling MSAA is the optimal solution for now and also works
199
// with MoltenVK.
200
bool use_msaa = false;
201
#else
202
bool use_msaa = true;
203
#endif
204
Divisor divisor = DIVISOR_4;
205
206
Size2i screen_size;
207
Size2i cluster_screen_size;
208
209
RID framebuffer;
210
RID cluster_render_buffer; // Used for creating.
211
RID cluster_buffer; // Used for rendering.
212
RID element_buffer; // Used for storing, to hint element touches far plane or near plane.
213
uint32_t cluster_render_buffer_size = 0;
214
uint32_t cluster_buffer_size = 0;
215
216
RID cluster_render_uniform_set;
217
RID cluster_store_uniform_set;
218
219
// Persistent data.
220
221
void _clear();
222
223
struct StateUniform {
224
float projection[16];
225
float inv_z_far;
226
uint32_t screen_to_clusters_shift; // Shift to obtain coordinates in block indices.
227
uint32_t cluster_screen_width;
228
uint32_t cluster_data_size; // How much data is needed for a single cluster.
229
uint32_t cluster_depth_offset;
230
231
uint32_t pad0;
232
uint32_t pad1;
233
uint32_t pad2;
234
};
235
236
RID state_uniform;
237
238
RID debug_uniform_set;
239
240
public:
241
void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer);
242
243
void begin(const Transform3D &p_view_transform, const Projection &p_cam_projection, bool p_flip_y);
244
245
_FORCE_INLINE_ void add_light(LightType p_type, const Transform3D &p_transform, float p_radius, float p_spot_aperture) {
246
if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) {
247
return; // Max number elements reached.
248
}
249
if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) {
250
return; // Max number elements reached.
251
}
252
253
RenderElementData &e = render_elements[render_element_count];
254
255
Transform3D xform = view_xform * p_transform;
256
257
float radius = xform.basis.get_uniform_scale();
258
if (radius < 0.98 || radius > 1.02) {
259
xform.basis.orthonormalize();
260
}
261
262
radius *= p_radius;
263
264
// Spotlights with wide angle are trated as Omni lights.
265
// If the spot angle is above the threshold, we need a sphere instead of a cone for building the clusters
266
// since the cone gets too flat/large (spot angle close to 90 degrees) or
267
// can't even cover the affected area of the light (spot angle above 90 degrees).
268
if (p_type == LIGHT_TYPE_OMNI || (p_type == LIGHT_TYPE_SPOT && p_spot_aperture > WIDE_SPOT_ANGLE_THRESHOLD_DEG)) {
269
radius *= shared->sphere_overfit; // Overfit icosphere.
270
271
float depth = -xform.origin.z;
272
if (camera_orthogonal) {
273
e.touches_near = (depth - radius) < z_near;
274
} else {
275
// Contains camera inside light.
276
float radius2 = radius * shared->sphere_overfit; // Overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex)
277
e.touches_near = xform.origin.length_squared() < radius2 * radius2;
278
}
279
280
e.touches_far = (depth + radius) > z_far;
281
e.scale[0] = radius;
282
e.scale[1] = radius;
283
e.scale[2] = radius;
284
if (p_type == LIGHT_TYPE_OMNI) {
285
e.type = ELEMENT_TYPE_OMNI_LIGHT;
286
e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT];
287
cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++;
288
} else { // LIGHT_TYPE_SPOT with wide angle.
289
e.type = ELEMENT_TYPE_SPOT_LIGHT;
290
e.has_wide_spot_angle = true;
291
e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT];
292
cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;
293
}
294
295
RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);
296
297
} else /*LIGHT_TYPE_SPOT with no wide angle*/ {
298
radius *= shared->cone_overfit; // Overfit cone.
299
300
real_t len = Math::tan(Math::deg_to_rad(p_spot_aperture)) * radius;
301
// Approximate, probably better to use a cone support function.
302
float max_d = -1e20;
303
float min_d = 1e20;
304
#define CONE_MINMAX(m_x, m_y) \
305
{ \
306
float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \
307
min_d = MIN(d, min_d); \
308
max_d = MAX(d, max_d); \
309
}
310
311
CONE_MINMAX(1, 1);
312
CONE_MINMAX(-1, 1);
313
CONE_MINMAX(-1, -1);
314
CONE_MINMAX(1, -1);
315
316
if (camera_orthogonal) {
317
e.touches_near = min_d < z_near;
318
} else {
319
Plane base_plane(-xform.basis.get_column(Vector3::AXIS_Z), xform.origin);
320
float dist = base_plane.distance_to(Vector3());
321
if (dist >= 0 && dist < radius) {
322
// Contains camera inside light, check angle.
323
float angle = Math::rad_to_deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_column(Vector3::AXIS_Z))));
324
e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit
325
} else {
326
e.touches_near = false;
327
}
328
}
329
330
e.touches_far = max_d > z_far;
331
e.scale[0] = len * shared->cone_overfit;
332
e.scale[1] = len * shared->cone_overfit;
333
e.scale[2] = radius;
334
e.has_wide_spot_angle = false;
335
e.type = ELEMENT_TYPE_SPOT_LIGHT;
336
e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT];
337
338
RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);
339
340
cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;
341
}
342
343
render_element_count++;
344
}
345
346
_FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform3D &p_transform, const Vector3 &p_half_size) {
347
if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) {
348
return; // Max number elements reached.
349
}
350
if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) {
351
return; // Max number elements reached.
352
}
353
354
RenderElementData &e = render_elements[render_element_count];
355
Transform3D xform = view_xform * p_transform;
356
357
// Extract scale and scale the matrix by it, makes things simpler.
358
Vector3 scale = p_half_size;
359
for (uint32_t i = 0; i < 3; i++) {
360
float s = xform.basis.rows[i].length();
361
scale[i] *= s;
362
xform.basis.rows[i] /= s;
363
};
364
365
float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale));
366
float depth = -xform.origin.z;
367
368
if (camera_orthogonal) {
369
e.touches_near = depth - box_depth < z_near;
370
} else {
371
// Contains camera inside box.
372
Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs();
373
e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z;
374
}
375
376
e.touches_far = depth + box_depth > z_far;
377
378
e.scale[0] = scale.x;
379
e.scale[1] = scale.y;
380
e.scale[2] = scale.z;
381
382
e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE;
383
e.original_index = cluster_count_by_type[e.type];
384
385
RendererRD::MaterialStorage::store_transform_transposed_3x4(xform, e.transform_inv);
386
387
cluster_count_by_type[e.type]++;
388
render_element_count++;
389
}
390
391
void bake_cluster();
392
void debug(ElementType p_element);
393
394
RID get_cluster_buffer() const;
395
uint32_t get_cluster_size() const;
396
uint32_t get_max_cluster_elements() const;
397
398
void set_shared(ClusterBuilderSharedDataRD *p_shared);
399
400
ClusterBuilderRD();
401
~ClusterBuilderRD();
402
};
403
404