Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/DepalettizeShaderCommon.cpp
3186 views
1
// Copyright (c) 2014- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <cstdio>
19
20
#include "Common/GPU/Shader.h"
21
#include "Common/GPU/ShaderWriter.h"
22
23
#include "Common/StringUtils.h"
24
#include "Common/Log.h"
25
#include "Common/LogReporting.h"
26
#include "GPU/Common/GPUStateUtils.h"
27
#include "GPU/Common/DepalettizeShaderCommon.h"
28
#include "GPU/Common/Draw2D.h"
29
30
static const InputDef vsInputs[2] = {
31
{ "vec2", "a_position", Draw::SEM_POSITION, },
32
{ "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, },
33
};
34
35
// TODO: Deduplicate with TextureShaderCommon.cpp
36
static const SamplerDef samplers[2] = {
37
{ 0, "tex", SamplerFlags::ARRAY_ON_VULKAN },
38
{ 1, "pal" },
39
};
40
41
static const VaryingDef varyings[1] = {
42
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
43
};
44
45
// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.
46
void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
47
const int shift = config.shift;
48
const int mask = config.mask;
49
50
writer.C(" vec2 texcoord = v_texcoord;\n");
51
52
// Implement the swizzle we need to simulate, if a game uses 8888 framebuffers and any other mode than "6" to access depth textures.
53
// This implements the "2" mode swizzle (it fixes up the Y direction but not X. See comments on issue #15898, Tantalus games)
54
// NOTE: This swizzle can be made to work with any power-of-2 resolution scaleFactor by shifting
55
// the bits around, but not sure how to handle 3x scaling. For now this is 1x-only (rough edges at higher resolutions).
56
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
57
if (config.depthUpperBits == 0x2) {
58
writer.C(R"(
59
int x = int((texcoord.x / scaleFactor) * texSize.x);
60
int xclear = x & 0x01F0;
61
int temp = (x - xclear) | ((x >> 1) & 0xF0) | ((x << 4) & 0x100);
62
texcoord.x = (float(temp) / texSize.x) * scaleFactor;
63
)");
64
}
65
}
66
67
// Sampling turns our texture into floating point. To avoid this, might be able
68
// to declare them as isampler2D objects, but these require integer textures, which needs more work.
69
// Anyhow, we simply work around this by converting back to integer, which is fine.
70
// Use the mask to skip reading some components.
71
72
// TODO: Since we actually have higher precision color data here, we might want to apply a dithering pattern here
73
// in the 5551, 565 and 4444 modes. This would benefit Test Drive which renders at 16-bit on the real hardware
74
// and dithers immediately, while we render at higher color depth and thus don't dither resulting in banding
75
// when we sample it at low color depth like this.
76
77
// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such
78
// as those that Test Drive uses for its color remapping. But would need game specific flagging.
79
80
writer.C(" vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n");
81
82
int shiftedMask = mask << shift;
83
switch (config.bufferFormat) {
84
case GE_FORMAT_CLUT8:
85
writer.C(" int index = int(color.r * 255.99);\n");
86
break;
87
case GE_FORMAT_8888:
88
if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");
89
if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");
90
if (shiftedMask & 0xFF0000) writer.C(" int b = int(color.b * 255.99);\n"); else writer.C(" int b = 0;\n");
91
if (shiftedMask & 0xFF000000) writer.C(" int a = int(color.a * 255.99);\n"); else writer.C(" int a = 0;\n");
92
writer.C(" int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");
93
break;
94
case GE_FORMAT_4444:
95
if (shiftedMask & 0xF) writer.C(" int r = int(color.r * 15.99);\n"); else writer.C(" int r = 0;\n");
96
if (shiftedMask & 0xF0) writer.C(" int g = int(color.g * 15.99);\n"); else writer.C(" int g = 0;\n");
97
if (shiftedMask & 0xF00) writer.C(" int b = int(color.b * 15.99);\n"); else writer.C(" int b = 0;\n");
98
if (shiftedMask & 0xF000) writer.C(" int a = int(color.a * 15.99);\n"); else writer.C(" int a = 0;\n");
99
writer.C(" int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");
100
break;
101
case GE_FORMAT_565:
102
if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n");
103
if (shiftedMask & 0x7E0) writer.C(" int g = int(color.g * 63.99);\n"); else writer.C(" int g = 0;\n");
104
if (shiftedMask & 0xF800) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");
105
writer.C(" int index = (b << 11) | (g << 5) | (r);\n");
106
break;
107
case GE_FORMAT_5551:
108
if (config.textureFormat == GE_TFMT_CLUT8) {
109
// SOCOM case. We need to make sure the next few lines load the right bits, see below.
110
shiftedMask <<= 8;
111
}
112
if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n");
113
if (shiftedMask & 0x3E0) writer.C(" int g = int(color.g * 31.99);\n"); else writer.C(" int g = 0;\n");
114
if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");
115
if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n");
116
writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
117
118
if (config.textureFormat == GE_TFMT_CLUT8) {
119
// SOCOM case. #16210
120
// To debug the issue, remove this shift to see the texture (check for clamping etc).
121
writer.C(" index >>= 8;\n");
122
}
123
124
break;
125
case GE_FORMAT_DEPTH16:
126
// Decode depth buffer.
127
writer.C(" float depth = (color.x - z_offset) * z_scale * 65535.0f;\n");
128
129
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
130
// Convert depth to 565, without going through a CLUT.
131
// TODO: Make "depal without a CLUT" a separate concept, to avoid redundantly creating a CLUT texture.
132
writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n");
133
writer.C(" float r = float(idepth & 31) / 31.0;\n");
134
writer.C(" float g = float((idepth >> 5) & 63) / 63.0;\n");
135
writer.C(" float b = float((idepth >> 11) & 31) / 31.0;\n");
136
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
137
return;
138
}
139
140
writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n");
141
break;
142
default:
143
break;
144
}
145
146
float texturePixels = 512.0f;
147
148
if (shift) {
149
writer.F(" index = (int(uint(index) >> uint(%d)) & 0x%02x)", shift, mask);
150
} else {
151
writer.F(" index = (index & 0x%02x)", mask);
152
}
153
if (config.startPos) {
154
writer.F(" | %d;\n", config.startPos); // '|' matches what we have in gstate.h
155
} else {
156
writer.F(";\n");
157
}
158
159
writer.F(" vec2 uv = vec2((float(index) + 0.5) * %f, 0.0);\n", 1.0f / texturePixels);
160
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "uv").C(";\n");
161
}
162
163
// FP only, to suit GL(ES) 2.0 and DX9
164
void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
165
char lookupMethod[128] = "index.r";
166
167
const int shift = config.shift;
168
const int mask = config.mask;
169
170
if (config.bufferFormat == GE_FORMAT_DEPTH16) {
171
DepthScaleFactors factors = GetDepthScaleFactors(gstate_c.UseFlags());
172
writer.ConstFloat("z_scale", factors.ScaleU16());
173
writer.ConstFloat("z_offset", factors.Offset());
174
}
175
176
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");
177
178
float index_multiplier = 1.0f;
179
// pixelformat is the format of the texture we are sampling.
180
bool formatOK = true;
181
switch (config.bufferFormat) {
182
case GE_FORMAT_CLUT8:
183
if (shift == 0 && mask == 0xFF) {
184
// Easy peasy.
185
snprintf(lookupMethod, sizeof(lookupMethod), "index.r");
186
formatOK = true;
187
} else {
188
// Deal with this if we find it.
189
formatOK = false;
190
}
191
break;
192
case GE_FORMAT_8888:
193
if ((mask & (mask + 1)) == 0) {
194
// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.
195
const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";
196
const u8 rgba_shift = shift & 7;
197
if (rgba_shift == 0 && mask == 0xFF) {
198
snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);
199
} else {
200
snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);
201
index_multiplier = 1.0f / 256.0f;
202
// Format was OK if there weren't bits from another component.
203
formatOK = mask <= 255 - (1 << rgba_shift);
204
}
205
} else {
206
formatOK = false;
207
}
208
break;
209
case GE_FORMAT_4444:
210
if ((mask & (mask + 1)) == 0 && shift < 16) {
211
const char *rgba = "rrrrggggbbbbaaaa";
212
const u8 rgba_shift = shift & 3;
213
if (rgba_shift == 0 && mask == 0xF) {
214
snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);
215
index_multiplier = 15.0f / 256.0f;
216
} else {
217
// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.
218
snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);
219
index_multiplier = 1.0f / 256.0f;
220
formatOK = mask <= 15 - (1 << rgba_shift);
221
}
222
} else {
223
formatOK = false;
224
}
225
break;
226
case GE_FORMAT_565:
227
if ((mask & (mask + 1)) == 0 && shift < 16) {
228
const u8 shifts[16] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4 };
229
const int multipliers[16] = { 31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31 };
230
const char *rgba = "rrrrrggggggbbbbb";
231
const u8 rgba_shift = shifts[shift];
232
if (rgba_shift == 0 && mask == multipliers[shift]) {
233
snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);
234
index_multiplier = multipliers[shift] / 256.0f;
235
} else {
236
// We just need to divide the right component by the right value, and then mod against the mask.
237
// A common case is shift=1, mask=0f.
238
snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);
239
index_multiplier = 1.0f / 256.0f;
240
formatOK = mask <= multipliers[shift] - (1 << rgba_shift);
241
}
242
} else {
243
formatOK = false;
244
}
245
break;
246
case GE_FORMAT_5551:
247
if (config.textureFormat == GE_TFMT_CLUT8 && mask == 0xFF && shift == 0) {
248
// Follow the intent here, and ignore g (and let's not round unnecessarily).
249
snprintf(lookupMethod, sizeof(lookupMethod), "floor(floor(index.a) * 128.0 + index.b * 64.0)");
250
index_multiplier = 1.0f / 256.0f;
251
// SOCOM case. #16210
252
} else if ((mask & (mask + 1)) == 0 && shift < 16) {
253
const char *rgba = "rrrrrgggggbbbbba";
254
const u8 rgba_shift = shift % 5;
255
if (rgba_shift == 0 && mask == 0x1F) {
256
snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);
257
index_multiplier = 31.0f / 256.0f;
258
} else if (shift == 15 && mask == 1) {
259
snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);
260
index_multiplier = 1.0f / 256.0f;
261
} else {
262
// A isn't possible here.
263
snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);
264
index_multiplier = 1.0f / 256.0f;
265
formatOK = mask <= 31 - (1 << rgba_shift);
266
}
267
} else {
268
formatOK = false;
269
}
270
break;
271
case GE_FORMAT_DEPTH16:
272
{
273
// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.
274
// Not on D3D9 though, so this path is still relevant.
275
276
if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {
277
// Convert depth to 565, without going through a CLUT.
278
writer.C(" float depth = (index.x - z_offset) * z_scale;\n");
279
writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n");
280
writer.C(" float r = mod(idepth, 32.0) / 31.0;\n");
281
writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0;\n");
282
writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0;\n");
283
writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");
284
return;
285
}
286
287
if (shift < 16) {
288
index_multiplier = 1.0f / (float)(1 << shift);
289
truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)");
290
291
if ((mask & (mask + 1)) != 0) {
292
// But we'll try with the above anyway.
293
formatOK = false;
294
}
295
} else {
296
formatOK = false;
297
}
298
break;
299
}
300
default:
301
break;
302
}
303
304
// We always use 512-sized textures now.
305
float texturePixels = 512.f;
306
index_multiplier *= 0.5f;
307
308
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
309
// index_multiplier -= 0.01f / texturePixels;
310
311
if (!formatOK) {
312
ERROR_LOG_REPORT_ONCE(depal, Log::G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos);
313
}
314
315
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
316
// Technically, the clutBase should be |'d, not added, but that's hard with floats.
317
float texel_offset = ((float)config.startPos + 0.5f) / texturePixels;
318
writer.F(" float coord = (%s * %f) + %f;\n", lookupMethod, index_multiplier, texel_offset);
319
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
320
}
321
322
void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
323
const char *sourceChannel = "error";
324
float indexMultiplier = 31.0f;
325
326
if (config.bufferFormat == GE_FORMAT_5551) {
327
_dbg_assert_(config.mask == 0x1F);
328
switch (config.shift) {
329
case 0: sourceChannel = "r"; break;
330
case 5: sourceChannel = "g"; break;
331
case 10: sourceChannel = "b"; break;
332
default: _dbg_assert_(false);
333
}
334
} else if (config.bufferFormat == GE_FORMAT_565) {
335
_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);
336
switch (config.shift) {
337
case 0: sourceChannel = "r"; break;
338
case 5: sourceChannel = "g"; indexMultiplier = 63.0f; break;
339
case 11: sourceChannel = "b"; break;
340
default: _dbg_assert_(false);
341
}
342
} else {
343
_dbg_assert_(false);
344
}
345
346
writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);
347
float texturePixels = 512.f;
348
writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
349
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
350
}
351
352
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {
353
writer.DeclareSamplers(samplers);
354
writer.HighPrecisionFloat();
355
writer.BeginFSMain(config.bufferFormat == GE_FORMAT_DEPTH16 ? g_draw2Duniforms : Slice<UniformDef>::empty(), varyings);
356
if (config.smoothedDepal) {
357
// Handles a limited set of cases, but doesn't need any integer math so we don't
358
// need two variants.
359
GenerateDepalSmoothed(writer, config);
360
} else {
361
switch (writer.Lang().shaderLanguage) {
362
case GLSL_1xx:
363
GenerateDepalShaderFloat(writer, config);
364
break;
365
case GLSL_VULKAN:
366
case GLSL_3xx:
367
case HLSL_D3D11:
368
// Use the float shader for the SOCOM special.
369
if (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_TFMT_CLUT8) {
370
GenerateDepalShaderFloat(writer, config);
371
} else {
372
GenerateDepalShader300(writer, config);
373
}
374
break;
375
default:
376
_assert_msg_(false, "Shader language not supported for depal: %d", (int)writer.Lang().shaderLanguage);
377
}
378
}
379
writer.EndFSMain("outColor");
380
}
381
382