Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/GPUStateUtils.cpp
3187 views
1
// Copyright (c) 2015- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include <algorithm>
20
#include <limits>
21
22
#include "Core/ConfigValues.h"
23
#include "Core/System.h"
24
#include "Core/Config.h"
25
#include "Core/Reporting.h"
26
27
#include "GPU/ge_constants.h"
28
#include "GPU/GPUState.h"
29
#include "GPU/Math3D.h"
30
#include "GPU/Common/PresentationCommon.h"
31
32
#include "GPU/Common/GPUStateUtils.h"
33
34
bool IsStencilTestOutputDisabled() {
35
// The mask applies on all stencil ops.
36
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) {
37
if (gstate_c.framebufFormat == GE_FORMAT_565) {
38
return true;
39
}
40
return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP;
41
}
42
return true;
43
}
44
45
bool NeedsTestDiscard() {
46
// We assume this is called only when enabled and not trivially true (may also be for color testing.)
47
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF)
48
return true;
49
if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled())
50
return true;
51
if (!gstate.isAlphaBlendEnabled())
52
return true;
53
if (gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncA() != GE_SRCBLEND_DOUBLESRCALPHA)
54
return true;
55
// GE_DSTBLEND_DOUBLEINVSRCALPHA is actually inverse double src alpha, and doubling zero is still zero.
56
if (gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA && gstate.getBlendFuncB() != GE_DSTBLEND_DOUBLEINVSRCALPHA) {
57
if (gstate.getBlendFuncB() != GE_DSTBLEND_FIXB || gstate.getFixB() != 0xFFFFFF)
58
return true;
59
}
60
if (gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_ADD && gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)
61
return true;
62
if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY)
63
return true;
64
65
return false;
66
}
67
68
bool IsAlphaTestTriviallyTrue() {
69
switch (gstate.getAlphaTestFunction()) {
70
case GE_COMP_NEVER:
71
return false;
72
73
case GE_COMP_ALWAYS:
74
return true;
75
76
case GE_COMP_GEQUAL:
77
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
78
return true; // If alpha is full, it doesn't matter what the ref value is.
79
return gstate.getAlphaTestRef() == 0;
80
81
// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
82
// Speeds up Lumines by a LOT on PowerVR.
83
case GE_COMP_NOTEQUAL:
84
if (gstate.getAlphaTestRef() == 255) {
85
// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding
86
// complicated code to discard the draw or whatnot.
87
return false;
88
}
89
// Fallthrough on purpose
90
[[fallthrough]];
91
case GE_COMP_GREATER:
92
{
93
// If the texture and vertex only use 1.0 alpha, then the ref value doesn't matter.
94
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
95
return true;
96
return gstate.getAlphaTestRef() == 0 && !NeedsTestDiscard();
97
}
98
99
case GE_COMP_LEQUAL:
100
return gstate.getAlphaTestRef() == 255;
101
102
case GE_COMP_EQUAL:
103
case GE_COMP_LESS:
104
return false;
105
106
default:
107
return false;
108
}
109
}
110
111
bool IsAlphaTestAgainstZero() {
112
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
113
}
114
115
bool IsColorTestAgainstZero() {
116
return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;
117
}
118
119
bool IsColorTestTriviallyTrue() {
120
switch (gstate.getColorTestFunction()) {
121
case GE_COMP_NEVER:
122
return false;
123
124
case GE_COMP_ALWAYS:
125
return true;
126
127
case GE_COMP_EQUAL:
128
case GE_COMP_NOTEQUAL:
129
return false;
130
default:
131
return false;
132
}
133
}
134
135
bool IsDepthTestEffectivelyDisabled() {
136
if (!gstate.isDepthTestEnabled())
137
return true;
138
// We can ignore stencil, because ALWAYS and disabled choose the same stencil path.
139
if (gstate.getDepthTestFunction() != GE_COMP_ALWAYS)
140
return false;
141
return !gstate.isDepthWriteEnabled();
142
}
143
144
const bool nonAlphaSrcFactors[16] = {
145
true, // GE_SRCBLEND_DSTCOLOR,
146
true, // GE_SRCBLEND_INVDSTCOLOR,
147
false, // GE_SRCBLEND_SRCALPHA,
148
false, // GE_SRCBLEND_INVSRCALPHA,
149
true, // GE_SRCBLEND_DSTALPHA,
150
true, // GE_SRCBLEND_INVDSTALPHA,
151
false, // GE_SRCBLEND_DOUBLESRCALPHA,
152
false, // GE_SRCBLEND_DOUBLEINVSRCALPHA,
153
true, // GE_SRCBLEND_DOUBLEDSTALPHA,
154
true, // GE_SRCBLEND_DOUBLEINVDSTALPHA,
155
true, // GE_SRCBLEND_FIXA,
156
true,
157
true,
158
true,
159
true,
160
true,
161
};
162
163
const bool nonAlphaDestFactors[16] = {
164
true, // GE_DSTBLEND_SRCCOLOR,
165
true, // GE_DSTBLEND_INVSRCCOLOR,
166
false, // GE_DSTBLEND_SRCALPHA,
167
false, // GE_DSTBLEND_INVSRCALPHA,
168
true, // GE_DSTBLEND_DSTALPHA,
169
true, // GE_DSTBLEND_INVDSTALPHA,
170
false, // GE_DSTBLEND_DOUBLESRCALPHA,
171
false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,
172
true, // GE_DSTBLEND_DOUBLEDSTALPHA,
173
true, // GE_DSTBLEND_DOUBLEINVDSTALPHA,
174
true, // GE_DSTBLEND_FIXB,
175
true,
176
true,
177
true,
178
true,
179
true,
180
};
181
182
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
183
if (IsStencilTestOutputDisabled() || gstate.isModeClear()) {
184
return REPLACE_ALPHA_NO;
185
}
186
187
if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_READ_FRAMEBUFFER) {
188
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
189
return REPLACE_ALPHA_YES;
190
} else {
191
if (gstate_c.Use(GPU_USE_DUALSOURCE_BLEND)) {
192
return REPLACE_ALPHA_DUALSOURCE;
193
} else {
194
return REPLACE_ALPHA_NO;
195
}
196
}
197
}
198
199
if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) {
200
return REPLACE_ALPHA_NO; // irrelevant
201
}
202
203
return REPLACE_ALPHA_YES;
204
}
205
206
StencilValueType ReplaceAlphaWithStencilType() {
207
switch (gstate_c.framebufFormat) {
208
case GE_FORMAT_565:
209
// There's never a stencil value. Maybe the right alpha is 1?
210
return STENCIL_VALUE_ONE;
211
212
case GE_FORMAT_5551:
213
switch (gstate.getStencilOpZPass()) {
214
// Technically, this should only ever use zero/one.
215
case GE_STENCILOP_REPLACE:
216
return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO;
217
218
// Decrementing always zeros, since there's only one bit.
219
case GE_STENCILOP_DECR:
220
case GE_STENCILOP_ZERO:
221
return STENCIL_VALUE_ZERO;
222
223
// Incrementing always fills, since there's only one bit.
224
case GE_STENCILOP_INCR:
225
return STENCIL_VALUE_ONE;
226
227
case GE_STENCILOP_INVERT:
228
return STENCIL_VALUE_INVERT;
229
230
case GE_STENCILOP_KEEP:
231
return STENCIL_VALUE_KEEP;
232
}
233
break;
234
235
case GE_FORMAT_4444:
236
case GE_FORMAT_8888:
237
case GE_FORMAT_INVALID:
238
case GE_FORMAT_DEPTH16:
239
case GE_FORMAT_CLUT8:
240
switch (gstate.getStencilOpZPass()) {
241
case GE_STENCILOP_REPLACE:
242
// TODO: Could detect zero here and force ZERO - less uniform updates?
243
return STENCIL_VALUE_UNIFORM;
244
245
case GE_STENCILOP_ZERO:
246
return STENCIL_VALUE_ZERO;
247
248
case GE_STENCILOP_DECR:
249
return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8;
250
251
case GE_STENCILOP_INCR:
252
return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8;
253
254
case GE_STENCILOP_INVERT:
255
return STENCIL_VALUE_INVERT;
256
257
case GE_STENCILOP_KEEP:
258
return STENCIL_VALUE_KEEP;
259
}
260
break;
261
}
262
263
return STENCIL_VALUE_KEEP;
264
}
265
266
ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
267
if (gstate_c.blueToAlpha) {
268
return REPLACE_BLEND_BLUE_TO_ALPHA;
269
}
270
271
if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) {
272
return REPLACE_BLEND_NO;
273
}
274
275
GEBlendMode eq = gstate.getBlendEq();
276
// Let's get the non-factor modes out of the way first.
277
switch (eq) {
278
case GE_BLENDMODE_ABSDIFF:
279
return REPLACE_BLEND_READ_FRAMEBUFFER;
280
281
case GE_BLENDMODE_MIN:
282
case GE_BLENDMODE_MAX:
283
if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {
284
return REPLACE_BLEND_STANDARD;
285
} else {
286
return REPLACE_BLEND_READ_FRAMEBUFFER;
287
}
288
289
case GE_BLENDMODE_MUL_AND_ADD:
290
case GE_BLENDMODE_MUL_AND_SUBTRACT:
291
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
292
// Other blend equations simply don't blend on hardware.
293
break;
294
295
default:
296
return REPLACE_BLEND_NO;
297
}
298
299
GEBlendSrcFactor funcA = gstate.getBlendFuncA();
300
GEBlendDstFactor funcB = gstate.getBlendFuncB();
301
302
switch (funcA) {
303
case GE_SRCBLEND_DOUBLESRCALPHA:
304
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
305
// 2x alpha in the source function and not in the dest = source color doubling.
306
// Even dest alpha is safe, since we're moving the * 2.0 into the src color.
307
switch (funcB) {
308
case GE_DSTBLEND_SRCCOLOR:
309
case GE_DSTBLEND_INVSRCCOLOR:
310
// When inversing, alpha clamping isn't an issue.
311
if (funcA == GE_SRCBLEND_DOUBLEINVSRCALPHA)
312
return REPLACE_BLEND_2X_ALPHA;
313
// Can't double, we need the source color to be correct.
314
// Doubling only alpha would clamp the src alpha incorrectly.
315
return REPLACE_BLEND_READ_FRAMEBUFFER;
316
317
case GE_DSTBLEND_DOUBLEDSTALPHA:
318
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
319
if (bufferFormat == GE_FORMAT_565)
320
return REPLACE_BLEND_2X_ALPHA;
321
return REPLACE_BLEND_READ_FRAMEBUFFER;
322
323
case GE_DSTBLEND_DOUBLESRCALPHA:
324
// We can't technically do this correctly (due to clamping) without reading the dst color.
325
// Using a copy isn't accurate either, though, when there's overlap.
326
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
327
return REPLACE_BLEND_READ_FRAMEBUFFER;
328
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
329
330
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
331
// For the inverse, doubling alpha is safe, because it will clamp correctly.
332
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
333
334
case GE_DSTBLEND_SRCALPHA:
335
case GE_DSTBLEND_INVSRCALPHA:
336
case GE_DSTBLEND_DSTALPHA:
337
case GE_DSTBLEND_INVDSTALPHA:
338
case GE_DSTBLEND_FIXB:
339
default:
340
// TODO: Could use vertexFullAlpha, but it's not calculated yet.
341
// This outputs the original alpha for the dest factor.
342
return REPLACE_BLEND_PRE_SRC;
343
}
344
345
case GE_SRCBLEND_DOUBLEDSTALPHA:
346
switch (funcB) {
347
case GE_DSTBLEND_SRCCOLOR:
348
case GE_DSTBLEND_INVSRCCOLOR:
349
if (bufferFormat == GE_FORMAT_565) {
350
// Dest alpha should be zero.
351
return REPLACE_BLEND_STANDARD;
352
}
353
// Can't double, we need the source color to be correct.
354
return REPLACE_BLEND_READ_FRAMEBUFFER;
355
356
case GE_DSTBLEND_DOUBLEDSTALPHA:
357
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
358
if (bufferFormat == GE_FORMAT_565) {
359
// Both blend factors are 0 or 1, no need to read it, since it's known.
360
// Doubling will have no effect here.
361
return REPLACE_BLEND_STANDARD;
362
}
363
return REPLACE_BLEND_READ_FRAMEBUFFER;
364
365
case GE_DSTBLEND_DOUBLESRCALPHA:
366
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
367
if (bufferFormat == GE_FORMAT_565) {
368
return REPLACE_BLEND_2X_ALPHA;
369
}
370
// Double both src (for dst alpha) and alpha (for dst factor.)
371
// But to be accurate (clamping), we need to read the dst color.
372
return REPLACE_BLEND_READ_FRAMEBUFFER;
373
374
case GE_DSTBLEND_SRCALPHA:
375
case GE_DSTBLEND_INVSRCALPHA:
376
case GE_DSTBLEND_DSTALPHA:
377
case GE_DSTBLEND_INVDSTALPHA:
378
case GE_DSTBLEND_FIXB:
379
default:
380
if (bufferFormat == GE_FORMAT_565) {
381
return REPLACE_BLEND_STANDARD;
382
}
383
// We can't technically do this correctly (due to clamping) without reading the dst alpha.
384
return REPLACE_BLEND_READ_FRAMEBUFFER;
385
}
386
387
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
388
// Inverse double dst alpha is tricky. Doubling the src color is probably the wrong direction,
389
// halving might be more correct. We really need to read the dst color.
390
switch (funcB) {
391
case GE_DSTBLEND_SRCCOLOR:
392
case GE_DSTBLEND_INVSRCCOLOR:
393
case GE_DSTBLEND_DOUBLEDSTALPHA:
394
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
395
if (bufferFormat == GE_FORMAT_565) {
396
return REPLACE_BLEND_STANDARD;
397
}
398
return REPLACE_BLEND_READ_FRAMEBUFFER;
399
400
case GE_DSTBLEND_DOUBLESRCALPHA:
401
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
402
if (bufferFormat == GE_FORMAT_565) {
403
return REPLACE_BLEND_2X_ALPHA;
404
}
405
return REPLACE_BLEND_READ_FRAMEBUFFER;
406
407
case GE_DSTBLEND_SRCALPHA:
408
case GE_DSTBLEND_INVSRCALPHA:
409
case GE_DSTBLEND_DSTALPHA:
410
case GE_DSTBLEND_INVDSTALPHA:
411
case GE_DSTBLEND_FIXB:
412
default:
413
if (bufferFormat == GE_FORMAT_565) {
414
return REPLACE_BLEND_STANDARD;
415
}
416
return REPLACE_BLEND_READ_FRAMEBUFFER;
417
}
418
419
case GE_SRCBLEND_FIXA:
420
default:
421
switch (funcB) {
422
case GE_DSTBLEND_DOUBLESRCALPHA:
423
{
424
// L.A. Rush ends up here (detail textures at the end of the frame). It uses FIXA = 0 (no src color contribution)
425
// but I still can't find a way to replicate the formula.
426
// If our framebuffer was floating point we could make it work (since that turns off clamping before blending)
427
// by just doubling src_alpha in the shader.
428
//
429
// It might be possible to replicate it if we implement a 2-pass decomposition:
430
// * First pass just does:
431
// src=ZERO dst=SRC_ALPHA.
432
// * Second pass renders with white input color. To double the resulting destination color:
433
// src=DST_COLOR dst=ONE
434
return REPLACE_BLEND_READ_FRAMEBUFFER;
435
}
436
437
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
438
// Doubling alpha is safe for the inverse, will clamp to zero either way.
439
return REPLACE_BLEND_2X_ALPHA;
440
441
case GE_DSTBLEND_DOUBLEDSTALPHA:
442
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
443
if (bufferFormat == GE_FORMAT_565) {
444
// Alpha is irrelevant with this format.
445
return REPLACE_BLEND_STANDARD;
446
}
447
return REPLACE_BLEND_READ_FRAMEBUFFER;
448
449
case GE_DSTBLEND_FIXB:
450
default:
451
if (gstate.getFixA() == 0xFFFFFF && gstate.getFixB() == 0x000000) {
452
// Some games specify this. Some GPUs may prefer blending off entirely.
453
return REPLACE_BLEND_NO;
454
} else if (gstate.getFixA() == 0xFFFFFF || gstate.getFixA() == 0x000000 || gstate.getFixB() == 0xFFFFFF || gstate.getFixB() == 0x000000) {
455
// We can represent this with standard factors.
456
return REPLACE_BLEND_STANDARD;
457
} else {
458
// Multiply the src color in the shader, that way it's always accurate.
459
return REPLACE_BLEND_PRE_SRC;
460
}
461
462
case GE_DSTBLEND_SRCCOLOR:
463
case GE_DSTBLEND_INVSRCCOLOR:
464
case GE_DSTBLEND_SRCALPHA:
465
case GE_DSTBLEND_INVSRCALPHA:
466
case GE_DSTBLEND_DSTALPHA:
467
case GE_DSTBLEND_INVDSTALPHA:
468
return REPLACE_BLEND_STANDARD;
469
}
470
471
case GE_SRCBLEND_DSTCOLOR:
472
case GE_SRCBLEND_INVDSTCOLOR:
473
case GE_SRCBLEND_SRCALPHA:
474
case GE_SRCBLEND_INVSRCALPHA:
475
case GE_SRCBLEND_DSTALPHA:
476
case GE_SRCBLEND_INVDSTALPHA:
477
switch (funcB) {
478
case GE_DSTBLEND_DOUBLESRCALPHA:
479
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
480
// Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap.
481
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
482
return REPLACE_BLEND_READ_FRAMEBUFFER;
483
// Hm, this is similar to the L.A. Rush case above. This will not be accurate.
484
// Wonder in which games we encounter this?
485
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
486
} else {
487
// This means dst alpha/color is used in the src factor.
488
// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)
489
// We will just hope that doubling alpha for the dst factor will not clamp too badly.
490
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
491
return REPLACE_BLEND_READ_FRAMEBUFFER;
492
// Hm, this is similar to the L.A. Rush case above. This will not be accurate.
493
// Wonder in which games we encounter this? One example is MotorStorm.
494
return REPLACE_BLEND_2X_ALPHA;
495
}
496
497
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
498
// For inverse, things are simpler. Clamping isn't an issue, as long as we avoid
499
// messing with the other factor's components.
500
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
501
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
502
}
503
return REPLACE_BLEND_2X_ALPHA;
504
505
case GE_DSTBLEND_DOUBLEDSTALPHA:
506
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
507
if (bufferFormat == GE_FORMAT_565) {
508
return REPLACE_BLEND_STANDARD;
509
}
510
return REPLACE_BLEND_READ_FRAMEBUFFER;
511
512
default:
513
return REPLACE_BLEND_STANDARD;
514
}
515
}
516
517
// Should never get here.
518
return REPLACE_BLEND_STANDARD;
519
}
520
521
static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;
522
static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;
523
524
// The supported flag combinations. TODO: Maybe they should be distilled down into an enum.
525
//
526
// 0 - "Old"-style GL depth.
527
// Or "Non-accurate depth" : effectively ignore minz / maxz. Map Z values based on viewport, which clamps.
528
// This skews depth in many instances. Depth can be inverted in this mode if viewport says.
529
// This is completely wrong, but works in some cases (probably because some game devs assumed it was how it worked)
530
// and avoids some depth clamp issues.
531
//
532
// GPU_USE_ACCURATE_DEPTH:
533
// Accurate depth: Z in the framebuffer matches the range of Z used on the PSP linearly in some way. We choose
534
// a centered range, to simulate clamping by letting otherwise out-of-range pixels survive the 0 and 1 cutoffs.
535
// Clip depth based on minz/maxz, and viewport is just a means to scale and center the value, not clipping or mapping to stored values.
536
//
537
// GPU_USE_ACCURATE_DEPTH | GPU_USE_DEPTH_CLAMP:
538
// Variant of GPU_USE_ACCURATE_DEPTH, just the range is the nice and convenient 0-1 since we can use
539
// hardware depth clamp. only viable in accurate depth mode, clamps depth and therefore uses the full 0-1 range. Using the full 0-1 range is not what accurate means, it's implied by depth clamp (which also means we're clamping.)
540
//
541
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT:
542
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT | GPU_USE_DEPTH_CLAMP:
543
// Only viable in accurate depth mode, means to use a range of the 24-bit depth values available
544
// from the GPU to represent the 16-bit values the PSP had, to try to make everything round and
545
// z-fight (close to) the same way as on hardware, cheaply (cheaper than rounding depth in fragment shader).
546
// We automatically switch to this if Z tests for equality are used.
547
// Depth clamp has no effect on the depth scaling here if set, though will still be enabled
548
// and clamp wildly out of line values.
549
//
550
// Any other combinations of these particular flags are bogus (like for example a lonely GPU_USE_DEPTH_CLAMP).
551
552
float DepthSliceFactor(u32 useFlags) {
553
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
554
// Old style depth.
555
return 1.0f;
556
}
557
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
558
// Accurate depth but 16-bit resolution, so squish.
559
return DEPTH_SLICE_FACTOR_16BIT;
560
}
561
if (useFlags & GPU_USE_DEPTH_CLAMP) {
562
// Accurate depth, but we can use the full range since clamping is available.
563
return 1.0f;
564
}
565
566
// Standard accurate depth.
567
return DEPTH_SLICE_FACTOR_HIGH;
568
}
569
570
// See class DepthScaleFactors for how to apply.
571
DepthScaleFactors GetDepthScaleFactors(u32 useFlags) {
572
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
573
return DepthScaleFactors(0.0f, 65535.0f);
574
}
575
576
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
577
const double offset = 0.5 * (DEPTH_SLICE_FACTOR_16BIT - 1.0) / DEPTH_SLICE_FACTOR_16BIT;
578
// Use one bit for each value, rather than 1.0 / (65535.0 * 256.0).
579
const double scale = 16777215.0;
580
return DepthScaleFactors(offset, scale);
581
} else if (useFlags & GPU_USE_DEPTH_CLAMP) {
582
return DepthScaleFactors(0.0f, 65535.0f);
583
} else {
584
const double offset = 0.5f * (DEPTH_SLICE_FACTOR_HIGH - 1.0f) * (1.0f / DEPTH_SLICE_FACTOR_HIGH);
585
return DepthScaleFactors(offset, (float)(DEPTH_SLICE_FACTOR_HIGH * 65535.0));
586
}
587
}
588
589
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
590
out.throughMode = gstate.isModeThrough();
591
592
float renderWidthFactor, renderHeightFactor;
593
float renderX = 0.0f, renderY = 0.0f;
594
float displayOffsetX, displayOffsetY;
595
if (useBufferedRendering) {
596
displayOffsetX = 0.0f;
597
displayOffsetY = 0.0f;
598
renderWidthFactor = (float)renderWidth / (float)bufferWidth;
599
renderHeightFactor = (float)renderHeight / (float)bufferHeight;
600
} else {
601
float pixelW = PSP_CoreParameter().pixelWidth;
602
float pixelH = PSP_CoreParameter().pixelHeight;
603
FRect frame = GetScreenFrame(pixelW, pixelH);
604
FRect rc;
605
CalculateDisplayOutputRect(&rc, 480, 272, frame, ROTATION_LOCKED_HORIZONTAL);
606
displayOffsetX = rc.x;
607
displayOffsetY = rc.y;
608
renderWidth = rc.w;
609
renderHeight = rc.h;
610
renderWidthFactor = renderWidth / 480.0f;
611
renderHeightFactor = renderHeight / 272.0f;
612
}
613
614
// We take care negative offsets of in the projection matrix.
615
// These come from split framebuffers (Killzone).
616
// TODO: Might be safe to do get rid of this here and do the same for positive offsets?
617
renderX = std::max(gstate_c.curRTOffsetX, 0);
618
renderY = std::max(gstate_c.curRTOffsetY, 0);
619
620
// Scissor
621
int scissorX1 = gstate.getScissorX1();
622
int scissorY1 = gstate.getScissorY1();
623
int scissorX2 = gstate.getScissorX2() + 1;
624
int scissorY2 = gstate.getScissorY2() + 1;
625
626
if (scissorX2 < scissorX1 || scissorY2 < scissorY1) {
627
out.scissorX = 0;
628
out.scissorY = 0;
629
out.scissorW = 0;
630
out.scissorH = 0;
631
} else {
632
out.scissorX = (renderX * renderWidthFactor) + displayOffsetX + scissorX1 * renderWidthFactor;
633
out.scissorY = (renderY * renderHeightFactor) + displayOffsetY + scissorY1 * renderHeightFactor;
634
out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor;
635
out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor;
636
}
637
638
int curRTWidth = gstate_c.curRTWidth;
639
int curRTHeight = gstate_c.curRTHeight;
640
641
float offsetX = gstate.getOffsetX();
642
float offsetY = gstate.getOffsetY();
643
644
DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());
645
646
if (out.throughMode) {
647
// If renderX/renderY are offset to compensate for a split framebuffer,
648
// applying the offset to the viewport isn't enough, since the viewport clips.
649
// We need to apply either directly to the vertices, or to the "through" projection matrix.
650
out.viewportX = renderX * renderWidthFactor + displayOffsetX;
651
out.viewportY = renderY * renderHeightFactor + displayOffsetY;
652
out.viewportW = curRTWidth * renderWidthFactor;
653
out.viewportH = curRTHeight * renderHeightFactor;
654
out.depthRangeMin = depthScale.EncodeFromU16(0.0f);
655
out.depthRangeMax = depthScale.EncodeFromU16(65536.0f);
656
} else {
657
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
658
float vpXScale = gstate.getViewportXScale();
659
float vpXCenter = gstate.getViewportXCenter();
660
float vpYScale = gstate.getViewportYScale();
661
float vpYCenter = gstate.getViewportYCenter();
662
663
// The viewport transform appears to go like this:
664
// Xscreen = -offsetX + vpXCenter + vpXScale * Xview
665
// Yscreen = -offsetY + vpYCenter + vpYScale * Yview
666
// Zscreen = vpZCenter + vpZScale * Zview
667
668
// The viewport is normally centered at 2048,2048 but can also be centered at other locations.
669
// Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover
670
// the desired screen area ([0-480)x[0-272)), so 1808,1912.
671
672
// This means that to get the analogue glViewport we must:
673
float vpX0 = vpXCenter - offsetX - fabsf(vpXScale);
674
float vpY0 = vpYCenter - offsetY - fabsf(vpYScale);
675
gstate_c.vpWidth = vpXScale * 2.0f;
676
gstate_c.vpHeight = vpYScale * 2.0f;
677
678
float vpWidth = fabsf(gstate_c.vpWidth);
679
float vpHeight = fabsf(gstate_c.vpHeight);
680
681
float left = renderX + vpX0;
682
float top = renderY + vpY0;
683
float right = left + vpWidth;
684
float bottom = top + vpHeight;
685
686
out.widthScale = 1.0f;
687
out.xOffset = 0.0f;
688
out.heightScale = 1.0f;
689
out.yOffset = 0.0f;
690
691
// If we're within the bounds, we want clipping the viewport way. So leave it be.
692
{
693
float overageLeft = std::max(-left, 0.0f);
694
float overageRight = std::max(right - bufferWidth, 0.0f);
695
696
// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
697
if (right < scissorX2) {
698
overageRight -= scissorX2 - right;
699
}
700
if (left > scissorX1) {
701
overageLeft += scissorX1 - left;
702
}
703
704
// Our center drifted by the difference in overages.
705
float drift = overageRight - overageLeft;
706
707
if (overageLeft != 0.0f || overageRight != 0.0f) {
708
left += overageLeft;
709
right -= overageRight;
710
711
// Protect against the viewport being entirely outside the scissor.
712
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
713
if (right <= left) {
714
right = left + 1.0f;
715
}
716
717
out.widthScale = vpWidth / (right - left);
718
out.xOffset = drift / (right - left);
719
}
720
}
721
722
{
723
float overageTop = std::max(-top, 0.0f);
724
float overageBottom = std::max(bottom - bufferHeight, 0.0f);
725
726
// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
727
if (bottom < scissorY2) {
728
overageBottom -= scissorY2 - bottom;
729
}
730
if (top > scissorY1) {
731
overageTop += scissorY1 - top;
732
}
733
// Our center drifted by the difference in overages.
734
float drift = overageBottom - overageTop;
735
736
if (overageTop != 0.0f || overageBottom != 0.0f) {
737
top += overageTop;
738
bottom -= overageBottom;
739
740
// Protect against the viewport being entirely outside the scissor.
741
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
742
if (bottom <= top) {
743
bottom = top + 1.0f;
744
}
745
746
out.heightScale = vpHeight / (bottom - top);
747
out.yOffset = drift / (bottom - top);
748
}
749
}
750
751
out.viewportX = left * renderWidthFactor + displayOffsetX;
752
out.viewportY = top * renderHeightFactor + displayOffsetY;
753
out.viewportW = (right - left) * renderWidthFactor;
754
out.viewportH = (bottom - top) * renderHeightFactor;
755
756
// The depth viewport parameters are the same, but we handle it a bit differently.
757
// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.
758
// So, we apply the depth range as minz/maxz, and transform for the viewport.
759
float vpZScale = gstate.getViewportZScale();
760
float vpZCenter = gstate.getViewportZCenter();
761
// TODO: This clip the entire draw if minz > maxz.
762
float minz = gstate.getDepthRangeMin();
763
float maxz = gstate.getDepthRangeMax();
764
765
if (gstate.isDepthClampEnabled() && (minz == 0 || maxz == 65535)) {
766
// Here, we should "clamp." But clamping per fragment would be slow.
767
// So, instead, we just increase the available range and hope.
768
// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
769
float fullDepthRange = 65535.0f * (depthScale.Scale() - 1.0f) * (1.0f / 2.0f);
770
if (minz == 0) {
771
minz -= fullDepthRange;
772
}
773
if (maxz == 65535) {
774
maxz += fullDepthRange;
775
}
776
} else if (maxz == 65535) {
777
// This means clamp isn't enabled, but we still want to allow values up to 65535.99.
778
// If DepthSliceFactor() is 1.0, though, this would make out.depthRangeMax exceed 1.
779
// Since that would clamp, it would make Z=1234 not match between draws when maxz changes.
780
if (depthScale.Scale() > 1.0f)
781
maxz = 65535.99f;
782
}
783
784
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
785
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
786
out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
787
// This adjusts the center from halfActualZRange to vpZCenter.
788
out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
789
790
if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {
791
out.depthScale = 1.0f;
792
out.zOffset = 0.0f;
793
out.depthRangeMin = depthScale.EncodeFromU16(vpZCenter - vpZScale);
794
out.depthRangeMax = depthScale.EncodeFromU16(vpZCenter + vpZScale);
795
} else {
796
out.depthRangeMin = depthScale.EncodeFromU16(minz);
797
out.depthRangeMax = depthScale.EncodeFromU16(maxz);
798
}
799
800
// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
801
// Of course, if this happens we've skewed out.depthScale/out.zOffset and may get z-fighting.
802
out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
803
out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
804
}
805
}
806
807
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {
808
if (vpAndScissor.throughMode)
809
return;
810
811
bool scaleChanged = gstate_c.vpWidthScale != vpAndScissor.widthScale || gstate_c.vpHeightScale != vpAndScissor.heightScale;
812
bool offsetChanged = gstate_c.vpXOffset != vpAndScissor.xOffset || gstate_c.vpYOffset != vpAndScissor.yOffset;
813
bool depthChanged = gstate_c.vpDepthScale != vpAndScissor.depthScale || gstate_c.vpZOffset != vpAndScissor.zOffset;
814
if (scaleChanged || offsetChanged || depthChanged) {
815
gstate_c.vpWidthScale = vpAndScissor.widthScale;
816
gstate_c.vpHeightScale = vpAndScissor.heightScale;
817
gstate_c.vpDepthScale = vpAndScissor.depthScale;
818
gstate_c.vpXOffset = vpAndScissor.xOffset;
819
gstate_c.vpYOffset = vpAndScissor.yOffset;
820
gstate_c.vpZOffset = vpAndScissor.zOffset;
821
822
gstate_c.Dirty(DIRTY_PROJMATRIX);
823
if (depthChanged) {
824
gstate_c.Dirty(DIRTY_DEPTHRANGE);
825
}
826
}
827
}
828
829
static const BlendFactor genericALookup[11] = {
830
BlendFactor::DST_COLOR,
831
BlendFactor::ONE_MINUS_DST_COLOR,
832
BlendFactor::SRC_ALPHA,
833
BlendFactor::ONE_MINUS_SRC_ALPHA,
834
BlendFactor::DST_ALPHA,
835
BlendFactor::ONE_MINUS_DST_ALPHA,
836
BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA
837
BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA
838
BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA
839
BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA
840
BlendFactor::CONSTANT_COLOR, // FIXA
841
};
842
843
static const BlendFactor genericBLookup[11] = {
844
BlendFactor::SRC_COLOR,
845
BlendFactor::ONE_MINUS_SRC_COLOR,
846
BlendFactor::SRC_ALPHA,
847
BlendFactor::ONE_MINUS_SRC_ALPHA,
848
BlendFactor::DST_ALPHA,
849
BlendFactor::ONE_MINUS_DST_ALPHA,
850
BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA
851
BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA
852
BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA
853
BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA
854
BlendFactor::CONSTANT_COLOR, // FIXB
855
};
856
857
static const BlendEq eqLookupNoMinMax[] = {
858
BlendEq::ADD,
859
BlendEq::SUBTRACT,
860
BlendEq::REVERSE_SUBTRACT,
861
BlendEq::ADD, // GE_BLENDMODE_MIN
862
BlendEq::ADD, // GE_BLENDMODE_MAX
863
BlendEq::ADD, // GE_BLENDMODE_ABSDIFF
864
BlendEq::ADD,
865
BlendEq::ADD,
866
};
867
868
static const BlendEq eqLookup[] = {
869
BlendEq::ADD,
870
BlendEq::SUBTRACT,
871
BlendEq::REVERSE_SUBTRACT,
872
BlendEq::MIN, // GE_BLENDMODE_MIN
873
BlendEq::MAX, // GE_BLENDMODE_MAX
874
BlendEq::MAX, // GE_BLENDMODE_ABSDIFF
875
BlendEq::ADD,
876
BlendEq::ADD,
877
};
878
879
static BlendFactor toDualSource(BlendFactor blendfunc) {
880
switch (blendfunc) {
881
case BlendFactor::SRC_ALPHA:
882
return BlendFactor::SRC1_ALPHA;
883
case BlendFactor::ONE_MINUS_SRC_ALPHA:
884
return BlendFactor::ONE_MINUS_SRC1_ALPHA;
885
default:
886
return blendfunc;
887
}
888
}
889
890
static BlendFactor blendColor2Func(u32 fix, bool &approx) {
891
if (fix == 0xFFFFFF)
892
return BlendFactor::ONE;
893
if (fix == 0)
894
return BlendFactor::ZERO;
895
896
// Otherwise, it's approximate if we pick ONE/ZERO.
897
approx = true;
898
899
const Vec3f fix3 = Vec3f::FromRGB(fix);
900
if (fix3.x >= 0.99 && fix3.y >= 0.99 && fix3.z >= 0.99)
901
return BlendFactor::ONE;
902
else if (fix3.x <= 0.01 && fix3.y <= 0.01 && fix3.z <= 0.01)
903
return BlendFactor::ZERO;
904
return BlendFactor::INVALID;
905
}
906
907
// abs is a quagmire of compiler incompatibilities, so...
908
inline int iabs(int x) {
909
return x >= 0 ? x : -x;
910
}
911
912
static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) { // 25 ~= 0.1 * 255
913
int diffx = iabs((a & 0xff) - (b & 0xff));
914
int diffy = iabs(((a >> 8) & 0xff) - ((b >> 8) & 0xff));
915
int diffz = iabs(((a >> 16) & 0xff) - ((b >> 16) & 0xff));
916
if (diffx <= margin && diffy <= margin && diffz <= margin)
917
return true;
918
return false;
919
}
920
921
// Try to simulate some common logic ops by using blend, if needed.
922
// The shader might also need modification, the below function SimulateLogicOpShaderTypeIfNeeded
923
// takes care of that.
924
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
925
if (!gstate.isLogicOpEnabled())
926
return false;
927
928
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
929
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
930
if (!gstate_c.Use(GPU_USE_LOGIC_OP)) {
931
switch (gstate.getLogicOp()) {
932
case GE_LOGIC_CLEAR:
933
srcBlend = BlendFactor::ZERO;
934
dstBlend = BlendFactor::ZERO;
935
blendEq = BlendEq::ADD;
936
return true;
937
case GE_LOGIC_AND:
938
case GE_LOGIC_AND_REVERSE:
939
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, Log::G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
940
break;
941
case GE_LOGIC_COPY:
942
// This is the same as off.
943
break;
944
case GE_LOGIC_COPY_INVERTED:
945
// Handled in the shader.
946
break;
947
case GE_LOGIC_AND_INVERTED:
948
case GE_LOGIC_NOR:
949
case GE_LOGIC_NAND:
950
case GE_LOGIC_EQUIV:
951
// Handled in the shader.
952
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, Log::G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
953
break;
954
case GE_LOGIC_INVERTED:
955
srcBlend = BlendFactor::ONE;
956
dstBlend = BlendFactor::ONE;
957
blendEq = BlendEq::SUBTRACT;
958
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, Log::G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
959
return true;
960
case GE_LOGIC_NOOP:
961
srcBlend = BlendFactor::ZERO;
962
dstBlend = BlendFactor::ONE;
963
blendEq = BlendEq::ADD;
964
return true;
965
case GE_LOGIC_XOR:
966
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, Log::G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
967
break;
968
case GE_LOGIC_OR:
969
case GE_LOGIC_OR_INVERTED:
970
// Inverted in shader.
971
srcBlend = BlendFactor::ONE;
972
dstBlend = BlendFactor::ONE;
973
blendEq = BlendEq::ADD;
974
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, Log::G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
975
return true;
976
case GE_LOGIC_OR_REVERSE:
977
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, Log::G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
978
break;
979
case GE_LOGIC_SET:
980
srcBlend = BlendFactor::ONE;
981
dstBlend = BlendFactor::ONE;
982
blendEq = BlendEq::ADD;
983
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, Log::G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
984
return true;
985
}
986
} else {
987
// Even if we support hardware logic ops, alpha is handled wrong.
988
// It's better to override blending for the simple cases.
989
switch (gstate.getLogicOp()) {
990
case GE_LOGIC_CLEAR:
991
srcBlend = BlendFactor::ZERO;
992
dstBlend = BlendFactor::ZERO;
993
blendEq = BlendEq::ADD;
994
return true;
995
case GE_LOGIC_NOOP:
996
srcBlend = BlendFactor::ZERO;
997
dstBlend = BlendFactor::ONE;
998
blendEq = BlendEq::ADD;
999
return true;
1000
1001
default:
1002
// Let's hope hardware gets it right.
1003
return false;
1004
}
1005
}
1006
return false;
1007
}
1008
1009
// Choose the shader part of the above logic op fallback simulation.
1010
SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded() {
1011
if (!gstate_c.Use(GPU_USE_LOGIC_OP) && gstate.isLogicOpEnabled()) {
1012
switch (gstate.getLogicOp()) {
1013
case GE_LOGIC_COPY_INVERTED:
1014
case GE_LOGIC_AND_INVERTED:
1015
case GE_LOGIC_OR_INVERTED:
1016
case GE_LOGIC_NOR:
1017
case GE_LOGIC_NAND:
1018
case GE_LOGIC_EQUIV:
1019
return LOGICOPTYPE_INVERT;
1020
case GE_LOGIC_INVERTED:
1021
return LOGICOPTYPE_ONE;
1022
case GE_LOGIC_SET:
1023
return LOGICOPTYPE_ONE;
1024
default:
1025
return LOGICOPTYPE_NORMAL;
1026
}
1027
}
1028
return LOGICOPTYPE_NORMAL;
1029
}
1030
1031
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState) {
1032
StencilValueType stencilType = STENCIL_VALUE_KEEP;
1033
if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) {
1034
stencilType = ReplaceAlphaWithStencilType();
1035
}
1036
1037
// Normally, we would add src + 0 with blending off, but the logic op may have us do differently.
1038
BlendFactor srcBlend = BlendFactor::ONE;
1039
BlendFactor dstBlend = BlendFactor::ZERO;
1040
BlendEq blendEq = BlendEq::ADD;
1041
1042
// We're not blending, but we may still want to "blend" for stencil.
1043
// This is only useful for INCR/DECR/INVERT. Others can write directly.
1044
switch (stencilType) {
1045
case STENCIL_VALUE_INCR_4:
1046
case STENCIL_VALUE_INCR_8:
1047
// We'll add the incremented value output by the shader.
1048
blendState.blendEnabled = true;
1049
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1050
blendState.setEquation(blendEq, BlendEq::ADD);
1051
break;
1052
1053
case STENCIL_VALUE_DECR_4:
1054
case STENCIL_VALUE_DECR_8:
1055
// We'll subtract the incremented value output by the shader.
1056
blendState.blendEnabled = true;
1057
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1058
blendState.setEquation(blendEq, BlendEq::SUBTRACT);
1059
break;
1060
1061
case STENCIL_VALUE_INVERT:
1062
// The shader will output one, and reverse subtracting will essentially invert.
1063
blendState.blendEnabled = true;
1064
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1065
blendState.setEquation(blendEq, BlendEq::REVERSE_SUBTRACT);
1066
break;
1067
1068
default:
1069
if (srcBlend == BlendFactor::ONE && dstBlend == BlendFactor::ZERO && blendEq == BlendEq::ADD) {
1070
blendState.blendEnabled = false;
1071
} else {
1072
blendState.blendEnabled = true;
1073
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ZERO);
1074
blendState.setEquation(blendEq, BlendEq::ADD);
1075
}
1076
break;
1077
}
1078
}
1079
1080
enum class FBReadSetting {
1081
Forced,
1082
Allowed,
1083
Disallowed,
1084
};
1085
1086
// If we can we emulate the colorMask by simply toggling the full R G B A masks offered
1087
// by modern hardware, we do that. This is 99.9% of the time.
1088
// When that's not enough, we fall back on a technique similar to shader blending,
1089
// we read from the framebuffer (or a copy of it).
1090
// We also prepare uniformMask so that if doing this in the shader gets forced-on,
1091
// we have the right mask already.
1092
static void ConvertMaskState(GenericMaskState &maskState, FBReadSetting useShader) {
1093
if (gstate_c.blueToAlpha) {
1094
maskState.applyFramebufferRead = false;
1095
maskState.uniformMask = 0xFF000000;
1096
maskState.channelMask = 0x8;
1097
return;
1098
}
1099
1100
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
1101
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
1102
1103
maskState.uniformMask = colorMask;
1104
maskState.applyFramebufferRead = false;
1105
maskState.channelMask = 0;
1106
for (int i = 0; i < 4; i++) {
1107
uint32_t channelMask = (colorMask >> (i * 8)) & 0xFF;
1108
switch (channelMask) {
1109
case 0x0:
1110
break;
1111
case 0xFF:
1112
maskState.channelMask |= 1 << i;
1113
break;
1114
default:
1115
if (useShader != FBReadSetting::Disallowed && PSP_CoreParameter().compat.flags().ShaderColorBitmask) {
1116
// Shaders can emulate masking accurately. Let's make use of that.
1117
maskState.applyFramebufferRead = true;
1118
maskState.channelMask |= 1 << i;
1119
} else {
1120
// Use the old inaccurate heuristic.
1121
if (channelMask >= 128) {
1122
maskState.channelMask |= 1 << i;
1123
}
1124
}
1125
}
1126
}
1127
1128
// Let's not write to alpha if stencil isn't enabled.
1129
// Also if the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
1130
if (IsStencilTestOutputDisabled() || ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
1131
maskState.channelMask &= ~8;
1132
maskState.uniformMask &= ~0xFF000000;
1133
}
1134
1135
// For 5551, only the top alpha bit matters. We might even want to swizzle 4444.
1136
// Alpha should correctly read as 255 from a 5551 texture.
1137
if (gstate.FrameBufFormat() == GE_FORMAT_5551) {
1138
if ((maskState.uniformMask & 0x80000000) != 0)
1139
maskState.uniformMask |= 0xFF000000;
1140
else
1141
maskState.uniformMask &= ~0xFF000000;
1142
}
1143
}
1144
1145
// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
1146
static void ConvertBlendState(GenericBlendState &blendState, FBReadSetting useFBRead) {
1147
// Blending is a bit complex to emulate. This is due to several reasons:
1148
//
1149
// * Doubled blend modes (src, dst, inversed) aren't supported in OpenGL.
1150
// If possible, we double the src color or src alpha in the shader to account for these.
1151
// These may clip incorrectly, so we avoid unfortunately.
1152
// * OpenGL only has one arbitrary fixed color. We premultiply the other in the shader.
1153
// * The written output alpha should actually be the stencil value. Alpha is not written.
1154
//
1155
// If we can't apply blending, we make a copy of the framebuffer and do it manually.
1156
1157
blendState.applyFramebufferRead = false;
1158
blendState.dirtyShaderBlendFixValues = false;
1159
blendState.useBlendColor = false;
1160
1161
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.framebufFormat);
1162
if (useFBRead == FBReadSetting::Forced) {
1163
// Enforce blend replacement if enabled. If not, shouldn't do anything of course.
1164
replaceBlend = gstate.isAlphaBlendEnabled() ? REPLACE_BLEND_READ_FRAMEBUFFER : REPLACE_BLEND_NO;
1165
}
1166
1167
blendState.replaceBlend = replaceBlend;
1168
1169
blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();
1170
1171
ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend);
1172
blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;
1173
1174
bool usePreSrc = false;
1175
1176
bool blueToAlpha = false;
1177
1178
switch (replaceBlend) {
1179
case REPLACE_BLEND_NO:
1180
// We may still want to do something about stencil -> alpha.
1181
ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
1182
1183
if (useFBRead == FBReadSetting::Forced) {
1184
// If this is true, the logic and mask replacements will be applied, at least. In that case,
1185
// we should not apply any logic op simulation.
1186
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
1187
}
1188
return;
1189
1190
case REPLACE_BLEND_BLUE_TO_ALPHA:
1191
blueToAlpha = true;
1192
blendState.blendEnabled = gstate.isAlphaBlendEnabled();
1193
// We'll later convert the color blend to blend in the alpha channel.
1194
break;
1195
1196
case REPLACE_BLEND_READ_FRAMEBUFFER:
1197
blendState.blendEnabled = true;
1198
blendState.applyFramebufferRead = true;
1199
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
1200
break;
1201
1202
case REPLACE_BLEND_PRE_SRC:
1203
case REPLACE_BLEND_PRE_SRC_2X_ALPHA:
1204
blendState.blendEnabled = true;
1205
usePreSrc = true;
1206
break;
1207
1208
case REPLACE_BLEND_STANDARD:
1209
case REPLACE_BLEND_2X_ALPHA:
1210
case REPLACE_BLEND_2X_SRC:
1211
blendState.blendEnabled = true;
1212
break;
1213
}
1214
1215
const GEBlendMode blendFuncEq = gstate.getBlendEq();
1216
GEBlendSrcFactor blendFuncA = gstate.getBlendFuncA();
1217
GEBlendDstFactor blendFuncB = gstate.getBlendFuncB();
1218
const u32 fixA = gstate.getFixA();
1219
const u32 fixB = gstate.getFixB();
1220
1221
if (blendFuncA > GE_SRCBLEND_FIXA)
1222
blendFuncA = GE_SRCBLEND_FIXA;
1223
if (blendFuncB > GE_DSTBLEND_FIXB)
1224
blendFuncB = GE_DSTBLEND_FIXB;
1225
1226
int constantAlpha = 255;
1227
BlendFactor constantAlphaGL = BlendFactor::ONE;
1228
if (!IsStencilTestOutputDisabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
1229
switch (ReplaceAlphaWithStencilType()) {
1230
case STENCIL_VALUE_UNIFORM:
1231
constantAlpha = gstate.getStencilTestRef();
1232
break;
1233
1234
case STENCIL_VALUE_INCR_4:
1235
case STENCIL_VALUE_DECR_4:
1236
constantAlpha = 16;
1237
break;
1238
1239
case STENCIL_VALUE_INCR_8:
1240
case STENCIL_VALUE_DECR_8:
1241
constantAlpha = 1;
1242
break;
1243
1244
default:
1245
break;
1246
}
1247
1248
// Otherwise it will stay GL_ONE.
1249
if (constantAlpha <= 0) {
1250
constantAlphaGL = BlendFactor::ZERO;
1251
} else if (constantAlpha < 255) {
1252
constantAlphaGL = BlendFactor::CONSTANT_ALPHA;
1253
}
1254
}
1255
1256
// Shortcut by using GL_ONE where possible, no need to set blendcolor
1257
bool approxFuncA = false;
1258
BlendFactor glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(fixA, approxFuncA) : genericALookup[blendFuncA];
1259
bool approxFuncB = false;
1260
BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB];
1261
1262
if (gstate_c.framebufFormat == GE_FORMAT_565) {
1263
if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) {
1264
glBlendFuncA = BlendFactor::ZERO;
1265
}
1266
if (blendFuncA == GE_SRCBLEND_INVDSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEINVDSTALPHA) {
1267
glBlendFuncA = BlendFactor::ONE;
1268
}
1269
if (blendFuncB == GE_DSTBLEND_DSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEDSTALPHA) {
1270
glBlendFuncB = BlendFactor::ZERO;
1271
}
1272
if (blendFuncB == GE_DSTBLEND_INVDSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEINVDSTALPHA) {
1273
glBlendFuncB = BlendFactor::ONE;
1274
}
1275
}
1276
1277
if (usePreSrc) {
1278
glBlendFuncA = BlendFactor::ONE;
1279
// Need to pull in the fixed color. TODO: If it hasn't changed, no need to dirty.
1280
if (blendFuncA == GE_SRCBLEND_FIXA) {
1281
blendState.dirtyShaderBlendFixValues = true;
1282
}
1283
}
1284
1285
if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) {
1286
glBlendFuncA = toDualSource(glBlendFuncA);
1287
glBlendFuncB = toDualSource(glBlendFuncB);
1288
}
1289
1290
if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
1291
if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB != BlendFactor::INVALID) {
1292
// Can use blendcolor trivially.
1293
blendState.setBlendColor(fixA, constantAlpha);
1294
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1295
} else if (glBlendFuncA != BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1296
// Can use blendcolor trivially.
1297
blendState.setBlendColor(fixB, constantAlpha);
1298
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1299
} else if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1300
if (blendColorSimilar(fixA, 0xFFFFFF ^ fixB)) {
1301
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1302
glBlendFuncB = BlendFactor::ONE_MINUS_CONSTANT_COLOR;
1303
blendState.setBlendColor(fixA, constantAlpha);
1304
} else if (blendColorSimilar(fixA, fixB)) {
1305
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1306
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1307
blendState.setBlendColor(fixA, constantAlpha);
1308
} else {
1309
DEBUG_LOG(Log::G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", fixA, fixB, blendFuncA, blendFuncB);
1310
// Let's approximate, at least. Close is better than totally off.
1311
const bool nearZeroA = blendColorSimilar(fixA, 0, 64);
1312
const bool nearZeroB = blendColorSimilar(fixB, 0, 64);
1313
if (nearZeroA || blendColorSimilar(fixA, 0xFFFFFF, 64)) {
1314
glBlendFuncA = nearZeroA ? BlendFactor::ZERO : BlendFactor::ONE;
1315
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1316
blendState.setBlendColor(fixB, constantAlpha);
1317
} else {
1318
// We need to pick something. Let's go with A as the fixed color.
1319
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1320
glBlendFuncB = nearZeroB ? BlendFactor::ZERO : BlendFactor::ONE;
1321
blendState.setBlendColor(fixA, constantAlpha);
1322
}
1323
}
1324
} else {
1325
// We optimized both, but that's probably not necessary, so let's pick one to be constant.
1326
if (blendFuncA == GE_SRCBLEND_FIXA && !usePreSrc && approxFuncA) {
1327
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1328
blendState.setBlendColor(fixA, constantAlpha);
1329
} else if (approxFuncB) {
1330
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1331
blendState.setBlendColor(fixB, constantAlpha);
1332
} else {
1333
if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1334
blendState.defaultBlendColor(constantAlpha);
1335
}
1336
}
1337
}
1338
} else {
1339
if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1340
blendState.defaultBlendColor(constantAlpha);
1341
}
1342
}
1343
1344
// Some Android devices (especially old Mali, it seems) composite badly if there's alpha in the backbuffer.
1345
// So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations.
1346
#if PPSSPP_PLATFORM(ANDROID)
1347
if (g_Config.bSkipBufferEffects) {
1348
if (glBlendFuncA == BlendFactor::DST_ALPHA) glBlendFuncA = BlendFactor::ZERO;
1349
if (glBlendFuncB == BlendFactor::DST_ALPHA) glBlendFuncB = BlendFactor::ZERO;
1350
if (glBlendFuncA == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncA = BlendFactor::ONE;
1351
if (glBlendFuncB == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncB = BlendFactor::ONE;
1352
}
1353
#endif
1354
1355
// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.
1356
BlendEq colorEq;
1357
if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {
1358
colorEq = eqLookup[blendFuncEq];
1359
} else {
1360
colorEq = eqLookupNoMinMax[blendFuncEq];
1361
}
1362
1363
// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
1364
// do any blending in the alpha channel as that doesn't seem to happen on PSP. So, we attempt to
1365
// apply the stencil to the alpha, since that's what should be stored.
1366
BlendEq alphaEq = BlendEq::ADD;
1367
if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) {
1368
// Let the fragment shader take care of it.
1369
switch (ReplaceAlphaWithStencilType()) {
1370
case STENCIL_VALUE_INCR_4:
1371
case STENCIL_VALUE_INCR_8:
1372
// We'll add the increment value.
1373
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1374
break;
1375
1376
case STENCIL_VALUE_DECR_4:
1377
case STENCIL_VALUE_DECR_8:
1378
// Like add with a small value, but subtracting.
1379
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1380
alphaEq = BlendEq::SUBTRACT;
1381
break;
1382
1383
case STENCIL_VALUE_INVERT:
1384
// This will subtract by one, effectively inverting the bits.
1385
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1386
alphaEq = BlendEq::REVERSE_SUBTRACT;
1387
break;
1388
1389
default:
1390
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ZERO);
1391
break;
1392
}
1393
} else if (!IsStencilTestOutputDisabled()) {
1394
StencilValueType stencilValue = ReplaceAlphaWithStencilType();
1395
if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0x00) {
1396
stencilValue = STENCIL_VALUE_ZERO;
1397
} else if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0xFF) {
1398
stencilValue = STENCIL_VALUE_ONE;
1399
}
1400
switch (stencilValue) {
1401
case STENCIL_VALUE_KEEP:
1402
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1403
break;
1404
case STENCIL_VALUE_ONE:
1405
// This won't give one but it's our best shot...
1406
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1407
break;
1408
case STENCIL_VALUE_ZERO:
1409
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ZERO);
1410
break;
1411
case STENCIL_VALUE_UNIFORM:
1412
// This won't give a correct value (it multiplies) but it may be better than random values.
1413
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ZERO);
1414
break;
1415
case STENCIL_VALUE_INCR_4:
1416
case STENCIL_VALUE_INCR_8:
1417
// This won't give a correct value always, but it will try to increase at least.
1418
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1419
break;
1420
case STENCIL_VALUE_DECR_4:
1421
case STENCIL_VALUE_DECR_8:
1422
// This won't give a correct value always, but it will try to decrease at least.
1423
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1424
alphaEq = BlendEq::SUBTRACT;
1425
break;
1426
case STENCIL_VALUE_INVERT:
1427
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1428
// If the output alpha is near 1, this will basically invert. It's our best shot.
1429
alphaEq = BlendEq::REVERSE_SUBTRACT;
1430
break;
1431
}
1432
} else if (blueToAlpha) {
1433
blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, BlendFactor::ONE, glBlendFuncB);
1434
blendState.setEquation(BlendEq::ADD, colorEq);
1435
return;
1436
} else {
1437
// Retain the existing value when stencil testing is off.
1438
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1439
}
1440
1441
blendState.setEquation(colorEq, alphaEq);
1442
}
1443
1444
static void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported, FBReadSetting useFBRead) {
1445
// TODO: We can get more detailed with checks here. Some logic ops don't involve the destination at all.
1446
// Several can be trivially supported even without any bitwise logic.
1447
if (!gstate.isLogicOpEnabled() || gstate.getLogicOp() == GE_LOGIC_COPY) {
1448
// No matter what, don't need to do anything.
1449
logicOpState.logicOpEnabled = false;
1450
logicOpState.logicOp = GE_LOGIC_COPY;
1451
logicOpState.applyFramebufferRead = useFBRead == FBReadSetting::Forced;
1452
return;
1453
}
1454
1455
// TODO: Brave story uses GE_INVERTED, this is easy to convert to a blend function - unless blend is also enabled simultaneously.
1456
1457
if (useFBRead == FBReadSetting::Forced && shaderBitOpsSupported) {
1458
// We have to emulate logic ops in the shader.
1459
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
1460
logicOpState.applyFramebufferRead = true;
1461
logicOpState.logicOp = gstate.getLogicOp();
1462
} else if (logicSupported) {
1463
// We can use hardware logic ops directly, if needed.
1464
logicOpState.applyFramebufferRead = false;
1465
if (gstate.isLogicOpEnabled()) {
1466
logicOpState.logicOpEnabled = true;
1467
logicOpState.logicOp = gstate.getLogicOp();
1468
} else {
1469
logicOpState.logicOpEnabled = false;
1470
logicOpState.logicOp = GE_LOGIC_COPY;
1471
}
1472
} else if (shaderBitOpsSupported && useFBRead != FBReadSetting::Disallowed) {
1473
// D3D11 and some OpenGL versions will end up here.
1474
// Logic ops not support, bitops supported. Let's punt to the shader.
1475
// We should possibly always do this and never use the hardware ops, since they'll mishandle the alpha channel..
1476
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
1477
logicOpState.applyFramebufferRead = true;
1478
logicOpState.logicOp = gstate.getLogicOp();
1479
} else {
1480
// In this case, the SIMULATE fallback should kick in.
1481
// Need to make sure this is checking for the same things though...
1482
logicOpState.logicOpEnabled = false;
1483
logicOpState.logicOp = GE_LOGIC_COPY;
1484
logicOpState.applyFramebufferRead = false;
1485
}
1486
}
1487
1488
static void ConvertStencilFunc5551(GenericStencilFuncState &state) {
1489
// Flaws:
1490
// - INVERT should convert 1, 5, 0xFF to 0. Currently it won't always.
1491
// - INCR twice shouldn't change the value.
1492
// - REPLACE should write 0 for 0x00 - 0x7F, and non-zero for 0x80 - 0xFF.
1493
// - Write mask may need double checking, but likely only the top bit matters.
1494
1495
const bool usesRef = state.sFail == GE_STENCILOP_REPLACE || state.zFail == GE_STENCILOP_REPLACE || state.zPass == GE_STENCILOP_REPLACE;
1496
const u8 maskedRef = state.testRef & state.testMask;
1497
const u8 usedRef = (state.testRef & 0x80) != 0 ? 0xFF : 0x00;
1498
1499
auto rewriteFunc = [&](GEComparison func, u8 ref) {
1500
// We can only safely rewrite if it doesn't use the ref, or if the ref is the same.
1501
if (!usesRef || usedRef == ref) {
1502
state.testFunc = func;
1503
state.testRef = ref;
1504
state.testMask = 0xFF;
1505
}
1506
};
1507
auto rewriteRef = [&](bool always) {
1508
state.testFunc = always ? GE_COMP_ALWAYS : GE_COMP_NEVER;
1509
if (usesRef) {
1510
// Rewrite the ref (for REPLACE) to 0x00 or 0xFF (the "best" values) if safe.
1511
// This will only be called if the test doesn't need the ref.
1512
state.testRef = usedRef;
1513
// Nuke the mask as well, since this is always/never, just for consistency.
1514
state.testMask = 0xFF;
1515
} else {
1516
// Not used, so let's make the ref 0xFF which is a useful value later.
1517
state.testRef = 0xFF;
1518
state.testMask = 0xFF;
1519
}
1520
};
1521
1522
// For 5551, we treat any non-zero value in the buffer as 255. Only zero is treated as zero.
1523
// See: https://github.com/hrydgard/ppsspp/pull/4150#issuecomment-26211193
1524
switch (state.testFunc) {
1525
case GE_COMP_NEVER:
1526
case GE_COMP_ALWAYS:
1527
// Fine as is.
1528
rewriteRef(state.testFunc == GE_COMP_ALWAYS);
1529
break;
1530
case GE_COMP_EQUAL: // maskedRef == maskedBuffer
1531
if (maskedRef == 0) {
1532
// Remove any mask, we might have bits less than 255 but that should not match.
1533
rewriteFunc(GE_COMP_EQUAL, 0);
1534
} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1535
// Equal to 255, for our buffer, means not equal to zero.
1536
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1537
} else {
1538
// This should never pass, regardless of buffer value. Only 0 and 255 are directly equal.
1539
rewriteRef(false);
1540
}
1541
break;
1542
case GE_COMP_NOTEQUAL: // maskedRef != maskedBuffer
1543
if (maskedRef == 0) {
1544
// Remove the mask, since our buffer might not be exactly 255.
1545
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1546
} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1547
// The only value != 255 is 0, in our buffer.
1548
rewriteFunc(GE_COMP_EQUAL, 0);
1549
} else {
1550
// Every other value evaluates as not equal, always.
1551
rewriteRef(true);
1552
}
1553
break;
1554
case GE_COMP_LESS: // maskedRef < maskedBuffer
1555
if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1556
// No possible value is less than 255.
1557
rewriteRef(false);
1558
} else {
1559
// "0 < (0 or 255)" and "254 < (0 or 255)" can only work for non zero.
1560
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1561
}
1562
break;
1563
case GE_COMP_LEQUAL: // maskedRef <= maskedBuffer
1564
if (maskedRef == 0) {
1565
// 0 is <= every possible value.
1566
rewriteRef(true);
1567
} else {
1568
// "1 <= (0 or 255)" and "255 <= (0 or 255)" simply mean, anything but zero.
1569
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1570
}
1571
break;
1572
case GE_COMP_GREATER: // maskedRef > maskedBuffer
1573
if (maskedRef > 0) {
1574
// "1 > (0 or 255)" and "255 > (0 or 255)" can only match 0.
1575
rewriteFunc(GE_COMP_EQUAL, 0);
1576
} else {
1577
// 0 is never greater than any possible value.
1578
rewriteRef(false);
1579
}
1580
break;
1581
case GE_COMP_GEQUAL: // maskedRef >= maskedBuffer
1582
if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1583
// 255 is >= every possible value.
1584
rewriteRef(true);
1585
} else {
1586
// "0 >= (0 or 255)" and "254 >= "(0 or 255)" are the same, equal to zero.
1587
rewriteFunc(GE_COMP_EQUAL, 0);
1588
}
1589
break;
1590
}
1591
1592
auto rewriteOps = [&](GEStencilOp from, GEStencilOp to) {
1593
if (state.sFail == from)
1594
state.sFail = to;
1595
if (state.zFail == from)
1596
state.zFail = to;
1597
if (state.zPass == from)
1598
state.zPass = to;
1599
};
1600
1601
// Decrement always zeros, so let's rewrite those to be safe (even if it's not 1.)
1602
rewriteOps(GE_STENCILOP_DECR, GE_STENCILOP_ZERO);
1603
1604
if (state.testFunc == GE_COMP_NOTEQUAL && state.testRef == 0 && state.testMask != 0) {
1605
// If it's != 0 (as optimized above), then we can rewrite INVERT to ZERO.
1606
// With 1 bit of stencil, INVERT != 0 can only make it 0.
1607
rewriteOps(GE_STENCILOP_INVERT, GE_STENCILOP_ZERO);
1608
}
1609
if (state.testFunc == GE_COMP_EQUAL && state.testRef == 0 && state.testMask != 0) {
1610
// If it's == 0 (as optimized above), then we can rewrite INCR to INVERT.
1611
// Otherwise we get 1, which we mostly handle, but won't INVERT correctly.
1612
rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_INVERT);
1613
}
1614
if (!usesRef && state.testRef == 0xFF) {
1615
// Safe to use REPLACE instead of INCR.
1616
rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_REPLACE);
1617
}
1618
}
1619
1620
static void ConvertStencilMask5551(GenericStencilFuncState &state) {
1621
state.writeMask = state.writeMask >= 0x80 ? 0xff : 0x00;
1622
}
1623
1624
void ConvertStencilFuncState(GenericStencilFuncState &state) {
1625
// The PSP's mask is reversed (bits not to write.) Ignore enabled, used for clears too.
1626
state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF;
1627
state.enabled = gstate.isStencilTestEnabled();
1628
if (!state.enabled) {
1629
if (gstate_c.framebufFormat == GE_FORMAT_5551)
1630
ConvertStencilMask5551(state);
1631
return;
1632
}
1633
1634
state.sFail = gstate.getStencilOpSFail();
1635
state.zFail = gstate.getStencilOpZFail();
1636
state.zPass = gstate.getStencilOpZPass();
1637
1638
state.testFunc = gstate.getStencilTestFunction();
1639
state.testRef = gstate.getStencilTestRef();
1640
state.testMask = gstate.getStencilTestMask();
1641
1642
bool depthTest = gstate.isDepthTestEnabled();
1643
if ((state.sFail == state.zFail || !depthTest) && state.sFail == state.zPass) {
1644
// Common case: we're writing only to stencil (usually REPLACE/REPLACE/REPLACE.)
1645
// We want to write stencil to alpha in this case, so switch to ALWAYS if already masked.
1646
bool depthWrite = gstate.isDepthWriteEnabled();
1647
if ((gstate.getColorMask() & 0x00FFFFFF) == 0x00FFFFFF && (!depthTest || !depthWrite)) {
1648
state.testFunc = GE_COMP_ALWAYS;
1649
}
1650
}
1651
1652
switch (gstate_c.framebufFormat) {
1653
case GE_FORMAT_565:
1654
state.writeMask = 0;
1655
break;
1656
1657
case GE_FORMAT_5551:
1658
ConvertStencilMask5551(state);
1659
ConvertStencilFunc5551(state);
1660
break;
1661
1662
default:
1663
// Hard to do anything useful for 4444, and 8888 is fine.
1664
break;
1665
}
1666
}
1667
1668
void GenericMaskState::Log() {
1669
WARN_LOG(Log::G3D, "Mask: %08x %01X readfb=%d", uniformMask, channelMask, applyFramebufferRead);
1670
}
1671
1672
void GenericBlendState::Log() {
1673
WARN_LOG(Log::G3D, "Blend: hwenable=%d readfb=%d replblend=%d replalpha=%d",
1674
blendEnabled, applyFramebufferRead, replaceBlend, (int)replaceAlphaWithStencil);
1675
}
1676
1677
void ComputedPipelineState::Convert(bool shaderBitOpsSupported, bool fbReadAllowed) {
1678
// Passing on the previous applyFramebufferRead as forceFrameBuffer read in the next one,
1679
// thus propagating forward.
1680
FBReadSetting readFB = (fbReadAllowed && shaderBitOpsSupported) ? FBReadSetting::Allowed : FBReadSetting::Disallowed;
1681
ConvertMaskState(maskState, readFB);
1682
readFB = maskState.applyFramebufferRead ? FBReadSetting::Forced : (fbReadAllowed ? FBReadSetting::Allowed : FBReadSetting::Disallowed);
1683
ConvertLogicOpState(logicState, gstate_c.Use(GPU_USE_LOGIC_OP), shaderBitOpsSupported, readFB);
1684
readFB = logicState.applyFramebufferRead ? FBReadSetting::Forced : (fbReadAllowed ? FBReadSetting::Allowed : FBReadSetting::Disallowed);
1685
ConvertBlendState(blendState, readFB);
1686
1687
// Note: If the blend state decided it had to use framebuffer reads,
1688
// we need to make sure that both mask and logic also use it, otherwise things will go wrong.
1689
if (blendState.applyFramebufferRead || logicState.applyFramebufferRead) {
1690
_dbg_assert_(fbReadAllowed);
1691
maskState.ConvertToShaderBlend();
1692
logicState.ConvertToShaderBlend();
1693
} else {
1694
// If it isn't a read, we may need to change blending to apply the logic op.
1695
logicState.ApplyToBlendState(blendState);
1696
}
1697
}
1698
1699
void GenericLogicState::ApplyToBlendState(GenericBlendState &blendState) {
1700
if (SimulateLogicOpIfNeeded(blendState.srcColor, blendState.dstColor, blendState.eqColor)) {
1701
if (!blendState.blendEnabled) {
1702
// If it wasn't turned on, make sure it is now.
1703
blendState.blendEnabled = true;
1704
blendState.srcAlpha = BlendFactor::ONE;
1705
blendState.dstAlpha = BlendFactor::ZERO;
1706
blendState.eqAlpha = BlendEq::ADD;
1707
}
1708
logicOpEnabled = false;
1709
logicOp = GE_LOGIC_COPY;
1710
}
1711
}
1712
1713