#[versions]
version_float = "#define VER_FLOAT";
version_half = "#define VER_HALF";
version_unorm8 = "#define VER_UINT8";
version_unorm16 = "#define VER_UINT16";
#[compute]
#version 450
#VERSION_DEFINES
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(std430, binding = 0) buffer Source {
#if defined(VER_FLOAT)
float data[];
#else
uint data[];
#endif
}
source;
#if defined(VER_FLOAT)
layout(binding = 1, rgba32f) uniform writeonly image2D dest;
#elif defined(VER_HALF)
layout(binding = 1, rgba16f) uniform writeonly image2D dest;
#elif defined(VER_UINT8)
layout(binding = 1, rgba8) uniform writeonly image2D dest;
#elif defined(VER_UINT16)
layout(binding = 1, rgba16) uniform writeonly image2D dest;
#endif
layout(push_constant, std430) uniform Params {
uint p_width;
uint p_height;
uint p_padding[2];
}
params;
void main() {
// gl_GlobalInvocationID is equivalent to the current texel coordinates.
if (gl_GlobalInvocationID.x >= params.p_width || gl_GlobalInvocationID.y >= params.p_height) {
return;
}
// The index of a texel in the source buffer, NOT an index of source.data[]
const int texel_index = int(gl_GlobalInvocationID.y * params.p_width + gl_GlobalInvocationID.x);
#if defined(VER_FLOAT)
// Since 32-bit floats are aligned with RGBF texel data, just retrieve the values from the array.
// Multiply by 3 to align with the components.
int data_index = texel_index * 3;
vec3 color_rgb = vec3(source.data[data_index], source.data[data_index + 1], source.data[data_index + 2]);
#elif defined(VER_UINT8)
// RGB8 texel data and 32-bit uints are not aligned, so we have to use a bit of magic.
// The source texel can be in either of 4 alignment 'states':
// 0 - [ XYZ_-____ ]
// 1 - [ _YZW-____ ]
// 2 - [ __ZW-X___ ]
// 3 - [ ___W-XY__ ]
// The texel index additionally needs to be decremented after every 'cycle' in order to properly fit into the source array.
vec3 color_rgb = vec3(0.0);
int data_index = texel_index - (texel_index / 4);
switch ((texel_index * 3) % 4) {
case 0:
color_rgb = unpackUnorm4x8(source.data[data_index]).xyz;
break;
case 1:
color_rgb = unpackUnorm4x8(source.data[data_index - 1]).yzw;
break;
case 2:
color_rgb.rg = unpackUnorm4x8(source.data[data_index - 1]).zw;
color_rgb.b = unpackUnorm4x8(source.data[data_index]).x;
break;
case 3:
color_rgb.r = unpackUnorm4x8(source.data[data_index - 1]).w;
color_rgb.gb = unpackUnorm4x8(source.data[data_index]).xy;
break;
default:
break;
}
#else
// In a similar vein to RGB8, the RGBH/RGB16 source texel can be in either of 2 alignment 'states':
// 0 - [ XY-X_ ]
// 1 - [ _Y-XY ]
// The texel index has to be incremented this time, as the size of a texel (6 bytes) is greater than that of a 32-bit uint (4 bytes).
vec3 color_rgb = vec3(0.0);
int data_index = texel_index + (texel_index / 2);
switch ((texel_index * 3) % 2) {
#if defined(VER_HALF)
case 0:
color_rgb.xy = unpackHalf2x16(source.data[data_index]);
color_rgb.z = unpackHalf2x16(source.data[data_index + 1]).x;
break;
case 1:
color_rgb.x = unpackHalf2x16(source.data[data_index]).y;
color_rgb.yz = unpackHalf2x16(source.data[data_index + 1]);
break;
#elif defined(VER_UINT16)
case 0:
color_rgb.xy = unpackUnorm2x16(source.data[data_index]);
color_rgb.z = unpackUnorm2x16(source.data[data_index + 1]).x;
break;
case 1:
color_rgb.x = unpackUnorm2x16(source.data[data_index]).y;
color_rgb.yz = unpackUnorm2x16(source.data[data_index + 1]);
break;
#endif
default:
break;
}
#endif
// Store the resulting RGBA color.
imageStore(dest, ivec2(gl_GlobalInvocationID.xy), vec4(color_rgb, 1.0));
}