Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUState.h
3185 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include "ppsspp_config.h"
21
22
#include "Common/CommonTypes.h"
23
#include "Common/Swap.h"
24
#include "GPU/GPU.h"
25
#include "GPU/ge_constants.h"
26
#include "GPU/Common/ShaderCommon.h"
27
#include "Common/Math/SIMDHeaders.h"
28
29
class PointerWrap;
30
31
struct GPUgstate {
32
// Getting rid of this ugly union in favor of the accessor functions
33
// might be a good idea....
34
union {
35
u32 cmdmem[256];
36
struct {
37
u32 nop,
38
vaddr,
39
iaddr,
40
pad00,
41
prim,
42
bezier,
43
spline,
44
boundBox,
45
jump,
46
bjump,
47
call,
48
ret,
49
end,
50
pad01,
51
signal,
52
finish,
53
base,
54
pad02,
55
vertType,
56
offsetAddr,
57
origin,
58
region1,
59
region2,
60
lightingEnable,
61
lightEnable[4],
62
depthClampEnable,
63
cullfaceEnable,
64
textureMapEnable, // 0x1E GE_CMD_TEXTUREMAPENABLE
65
fogEnable,
66
ditherEnable,
67
alphaBlendEnable,
68
alphaTestEnable,
69
zTestEnable,
70
stencilTestEnable,
71
antiAliasEnable,
72
patchCullEnable,
73
colorTestEnable,
74
logicOpEnable,
75
pad03,
76
boneMatrixNumber,
77
boneMatrixData,
78
morphwgt[8], //dont use
79
pad04[2],
80
patchdivision,
81
patchprimitive,
82
patchfacing,
83
pad04_a,
84
85
worldmtxnum, // 0x3A
86
worldmtxdata, // 0x3B
87
viewmtxnum, // 0x3C
88
viewmtxdata, // 0x3D
89
projmtxnum, // 0x3E
90
projmtxdata, // 0x3F
91
texmtxnum, // 0x40
92
texmtxdata, // 0x41
93
94
viewportxscale, // 0x42
95
viewportyscale, // 0x43
96
viewportzscale, // 0x44
97
viewportxcenter, // 0x45
98
viewportycenter, // 0x46
99
viewportzcenter, // 0x47
100
texscaleu, // 0x48
101
texscalev, // 0x49
102
texoffsetu, // 0x4A
103
texoffsetv, // 0x4B
104
offsetx, // 0x4C
105
offsety, // 0x4D
106
pad111[2],
107
shademodel, // 0x50
108
reversenormals, // 0x51
109
pad222,
110
materialupdate, // 0x53
111
materialemissive, // 0x54
112
materialambient, // 0x55
113
materialdiffuse, // 0x56
114
materialspecular, // 0x57
115
materialalpha, // 0x58
116
pad333[2],
117
materialspecularcoef, // 0x5B
118
ambientcolor, // 0x5C
119
ambientalpha, // 0x5D
120
lmode, // 0x5E GE_CMD_LIGHTMODE
121
ltype[4], // 0x5F-0x62 GE_CMD_LIGHTTYPEx
122
lpos[12], // 0x63-0x6E
123
ldir[12], // 0x6F-0x7A
124
latt[12], // 0x7B-0x86
125
lconv[4], // 0x87-0x8A
126
lcutoff[4], // 0x8B-0x8E
127
lcolor[12], // 0x8F-0x9A
128
cullmode, // 0x9B
129
fbptr, // 0x9C
130
fbwidth, // 0x9D
131
zbptr, // 0x9E
132
zbwidth, // 0x9F
133
texaddr[8], // 0xA0-0xA7
134
texbufwidth[8], // 0xA8-0xAF
135
clutaddr, // 0xB0
136
clutaddrupper, // 0xB1
137
transfersrc, // 0xB2
138
transfersrcw, // 0xB3
139
transferdst, // 0xB4
140
transferdstw, // 0xB5
141
padxxx[2],
142
texsize[8], // 0xB8-BF
143
texmapmode, // 0xC0
144
texshade, // 0xC1
145
texmode, // 0xC2 GE_CMD_TEXMODE
146
texformat, // 0xC3
147
loadclut, // 0xC4
148
clutformat, // 0xC5
149
texfilter, // 0xC6
150
texwrap, // 0xC7
151
texlevel, // 0xC8
152
texfunc, // 0xC9
153
texenvcolor, // 0xCA
154
texflush, // 0xCB
155
texsync, // 0xCC
156
fog1, // 0xCD
157
fog2, // 0xCE
158
fogcolor, // 0xCF
159
texlodslope, // 0xD0
160
padxxxxxx, // 0xD1
161
framebufpixformat, // 0xD2
162
clearmode, // 0xD3 GE_CMD_CLEARMODE
163
scissor1,
164
scissor2,
165
minz,
166
maxz,
167
colortest,
168
colorref,
169
colortestmask,
170
alphatest,
171
stenciltest,
172
stencilop,
173
ztestfunc,
174
blend,
175
blendfixa,
176
blendfixb,
177
dithmtx[4],
178
lop, // 0xE6
179
zmsk,
180
pmskc,
181
pmska,
182
transferstart,
183
transfersrcpos,
184
transferdstpos,
185
pad99,
186
transfersize, // 0xEE
187
pad100, // 0xEF
188
imm_vscx, // 0xF0
189
imm_vscy,
190
imm_vscz,
191
imm_vtcs,
192
imm_vtct,
193
imm_vtcq,
194
imm_cv,
195
imm_ap,
196
imm_fc,
197
imm_scv; // 0xF9
198
// In the unlikely case we ever add anything else here, don't forget to update the padding on the next line!
199
u32 pad05[0xFF- 0xF9];
200
};
201
};
202
203
// These are not directly mapped, instead these are loaded one-by-one through special commands.
204
// However, these are actual state, and can be read back.
205
float worldMatrix[12]; // 4x3
206
float viewMatrix[12]; // 4x3
207
float projMatrix[16]; // 4x4
208
float tgenMatrix[12]; // 4x3
209
float boneMatrix[12 * 8]; // Eight 4x3 bone matrices.
210
211
// We ignore the high bits of the framebuffer in fbwidth - even 0x08000000 renders to vRAM.
212
// The top bits of mirroring are also not respected, so we mask them away.
213
u32 getFrameBufRawAddress() const { return fbptr & 0x1FFFF0; }
214
// 0x44000000 is uncached VRAM.
215
u32 getFrameBufAddress() const { return 0x44000000 | getFrameBufRawAddress(); }
216
GEBufferFormat FrameBufFormat() const { return static_cast<GEBufferFormat>(framebufpixformat & 3); }
217
int FrameBufStride() const { return fbwidth&0x7FC; }
218
u32 getDepthBufRawAddress() const { return zbptr & 0x1FFFF0; }
219
u32 getDepthBufAddress() const { return 0x44600000 | getDepthBufRawAddress(); }
220
int DepthBufStride() const { return zbwidth&0x7FC; }
221
222
// Pixel Pipeline
223
bool isModeClear() const { return clearmode & 1; }
224
bool isFogEnabled() const { return fogEnable & 1; }
225
float getFogCoef1() const { return getFloat24(fog1); }
226
float getFogCoef2() const { return getFloat24(fog2); }
227
228
// Cull
229
bool isCullEnabled() const { return cullfaceEnable & 1; }
230
GECullMode getCullMode() const { return (GECullMode)(cullmode & 1); }
231
232
// Color Mask
233
bool isClearModeColorMask() const { return (clearmode&0x100) != 0; }
234
bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; }
235
bool isClearModeDepthMask() const { return (clearmode&0x400) != 0; }
236
u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0 : 0xFFFFFF) | ((clearmode&0x200) ? 0 : 0xFF000000); }
237
238
// Blend
239
GEBlendSrcFactor getBlendFuncA() const { return (GEBlendSrcFactor)(blend & 0xF); }
240
GEBlendDstFactor getBlendFuncB() const { return (GEBlendDstFactor)((blend >> 4) & 0xF); }
241
u32 getFixA() const { return blendfixa & 0xFFFFFF; }
242
u32 getFixB() const { return blendfixb & 0xFFFFFF; }
243
GEBlendMode getBlendEq() const { return static_cast<GEBlendMode>((blend >> 8) & 0x7); }
244
bool isAlphaBlendEnabled() const { return alphaBlendEnable & 1; }
245
246
// AntiAlias
247
bool isAntiAliasEnabled() const { return antiAliasEnable & 1; }
248
249
// Dither
250
bool isDitherEnabled() const { return ditherEnable & 1; }
251
int getDitherValue(int x, int y) const {
252
u8 raw = (dithmtx[y & 3] >> ((x & 3) * 4)) & 0xF;
253
// Apply sign extension to make 8-F negative, 0-7 positive.
254
return ((s8)(raw << 4)) >> 4;
255
}
256
257
// Color Mask
258
u32 getColorMask() const { return (pmskc & 0xFFFFFF) | ((pmska & 0xFF) << 24); }
259
u8 getStencilWriteMask() const { return pmska & 0xFF; }
260
bool isLogicOpEnabled() const { return logicOpEnable & 1; }
261
GELogicOp getLogicOp() const { return static_cast<GELogicOp>(lop & 0xF); }
262
263
// Depth Test
264
bool isDepthTestEnabled() const { return zTestEnable & 1; }
265
bool isDepthWriteEnabled() const { return !(zmsk & 1); }
266
GEComparison getDepthTestFunction() const { return static_cast<GEComparison>(ztestfunc & 0x7); }
267
u16 getDepthRangeMin() const { return minz & 0xFFFF; }
268
u16 getDepthRangeMax() const { return maxz & 0xFFFF; }
269
270
// Stencil Test
271
bool isStencilTestEnabled() const { return stencilTestEnable & 1; }
272
GEComparison getStencilTestFunction() const { return static_cast<GEComparison>(stenciltest & 0x7); }
273
int getStencilTestRef() const { return (stenciltest>>8) & 0xFF; }
274
int getStencilTestMask() const { return (stenciltest>>16) & 0xFF; }
275
GEStencilOp getStencilOpSFail() const { return static_cast<GEStencilOp>(stencilop & 0x7); }
276
GEStencilOp getStencilOpZFail() const { return static_cast<GEStencilOp>((stencilop>>8) & 0x7); }
277
GEStencilOp getStencilOpZPass() const { return static_cast<GEStencilOp>((stencilop>>16) & 0x7); }
278
279
// Alpha Test
280
bool isAlphaTestEnabled() const { return alphaTestEnable & 1; }
281
GEComparison getAlphaTestFunction() const { return static_cast<GEComparison>(alphatest & 0x7); }
282
int getAlphaTestRef() const { return (alphatest >> 8) & 0xFF; }
283
int getAlphaTestMask() const { return (alphatest >> 16) & 0xFF; }
284
285
// Color Test
286
bool isColorTestEnabled() const { return colorTestEnable & 1; }
287
GEComparison getColorTestFunction() const { return static_cast<GEComparison>(colortest & 0x3); }
288
u32 getColorTestRef() const { return colorref & 0xFFFFFF; }
289
u32 getColorTestMask() const { return colortestmask & 0xFFFFFF; }
290
291
// Texturing
292
// TODO: Verify getTextureAddress() alignment?
293
u32 getTextureAddress(int level) const { return (texaddr[level] & 0xFFFFF0) | ((texbufwidth[level] << 8) & 0x0F000000); }
294
int getTextureWidth(int level) const { return 1 << (texsize[level] & 0xf);}
295
int getTextureHeight(int level) const { return 1 << ((texsize[level] >> 8) & 0xf);}
296
u16 getTextureDimension(int level) const { return texsize[level] & 0xf0f;}
297
GETexLevelMode getTexLevelMode() const { return static_cast<GETexLevelMode>(texlevel & 0x3); }
298
int getTexLevelOffset16() const { return (int)(s8)((texlevel >> 16) & 0xFF); }
299
bool isTextureMapEnabled() const { return textureMapEnable & 1; }
300
GETexFunc getTextureFunction() const { return static_cast<GETexFunc>(texfunc & 0x7); }
301
bool isColorDoublingEnabled() const { return (texfunc & 0x10000) != 0; }
302
bool isTextureAlphaUsed() const { return (texfunc & 0x100) != 0; }
303
GETextureFormat getTextureFormat() const { return static_cast<GETextureFormat>(texformat & 0xF); }
304
bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx.
305
int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; }
306
u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); }
307
int getClutLoadBytes() const { return getClutLoadBlocks() * 32; }
308
int getClutLoadBlocks() const {
309
// The PSP only supports 0x3F, but Misshitsu no Sacrifice has extra color data (see #15727.)
310
// 0x40 would be 0, which would be a no-op, so we allow it.
311
if ((loadclut & 0x7F) == 0x40)
312
return 0x40;
313
return loadclut & 0x3F;
314
}
315
GEPaletteFormat getClutPaletteFormat() const { return static_cast<GEPaletteFormat>(clutformat & 3); }
316
int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; }
317
int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; }
318
int getClutIndexStartPos() const { return ((clutformat >> 16) & 0x1F) << 4; }
319
u32 transformClutIndex(u32 index) const {
320
// We need to wrap any entries beyond the first 1024 bytes.
321
u32 mask = getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 0xFF : 0x1FF;
322
return ((index >> getClutIndexShift()) & getClutIndexMask()) | (getClutIndexStartPos() & mask);
323
}
324
bool isClutIndexSimple() const { return (clutformat & ~3) == 0xC500FF00; } // Meaning, no special mask, shift, or start pos.
325
bool isTextureSwizzled() const { return texmode & 1; }
326
bool isClutSharedForMipmaps() const { return (texmode & 0x100) == 0; }
327
bool isMipmapEnabled() const { return (texfilter & 4) != 0; }
328
bool isMipmapFilteringEnabled() const { return (texfilter & 2) != 0; }
329
bool isMinifyFilteringEnabled() const { return (texfilter & 1) != 0; }
330
bool isMagnifyFilteringEnabled() const { return (texfilter >> 8) & 1; }
331
int getTextureMaxLevel() const { return (texmode >> 16) & 0x7; }
332
float getTextureLodSlope() const { return getFloat24(texlodslope); }
333
334
// Lighting
335
bool isLightingEnabled() const { return lightingEnable & 1; }
336
bool isLightChanEnabled(int chan) const { return lightEnable[chan] & 1; }
337
GELightComputation getLightComputation(int chan) const { return static_cast<GELightComputation>(ltype[chan] & 0x3); }
338
bool isUsingPoweredDiffuseLight(int chan) const { return getLightComputation(chan) == GE_LIGHTCOMP_ONLYPOWDIFFUSE; }
339
bool isUsingSpecularLight(int chan) const { return getLightComputation(chan) == GE_LIGHTCOMP_BOTH; }
340
bool isUsingSecondaryColor() const { return lmode & 1; }
341
GELightType getLightType(int chan) const { return static_cast<GELightType>((ltype[chan] >> 8) & 3); }
342
bool isDirectionalLight(int chan) const { return getLightType(chan) == GE_LIGHTTYPE_DIRECTIONAL; }
343
bool isPointLight(int chan) const { return getLightType(chan) == GE_LIGHTTYPE_POINT; }
344
bool isSpotLight(int chan) const { return getLightType(chan) >= GE_LIGHTTYPE_SPOT; }
345
GEShadeMode getShadeMode() const { return static_cast<GEShadeMode>(shademodel & 1); }
346
unsigned int getAmbientA() const { return ambientalpha&0xFF; }
347
unsigned int getAmbientRGBA() const { return (ambientcolor&0xFFFFFF) | ((ambientalpha&0xFF)<<24); }
348
unsigned int getMaterialUpdate() const { return materialupdate & 7; }
349
unsigned int getMaterialAmbientR() const { return materialambient&0xFF; }
350
unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }
351
unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }
352
unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; }
353
unsigned int getMaterialAmbientRGBA() const { return (materialambient & 0x00FFFFFF) | (materialalpha << 24); }
354
unsigned int getMaterialDiffuse() const { return materialdiffuse & 0xffffff; }
355
unsigned int getMaterialEmissive() const { return materialemissive & 0xffffff; }
356
unsigned int getMaterialSpecular() const { return materialspecular & 0xffffff; }
357
float getMaterialSpecularCoef() const { return getFloat24(materialspecularcoef); }
358
unsigned int getLightAmbientColor(int chan) const { return lcolor[chan*3]&0xFFFFFF; }
359
unsigned int getDiffuseColor(int chan) const { return lcolor[1+chan*3]&0xFFFFFF; }
360
unsigned int getSpecularColor(int chan) const { return lcolor[2+chan*3]&0xFFFFFF; }
361
362
int getPatchDivisionU() const { return patchdivision & 0x7F; }
363
int getPatchDivisionV() const { return (patchdivision >> 8) & 0x7F; }
364
365
// UV gen
366
GETexMapMode getUVGenMode() const { return static_cast<GETexMapMode>(texmapmode & 3);} // 2 bits
367
GETexProjMapMode getUVProjMode() const { return static_cast<GETexProjMapMode>((texmapmode >> 8) & 3);} // 2 bits
368
int getUVLS0() const { return texshade & 0x3; } // 2 bits
369
int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits
370
371
bool isTexCoordClampedS() const { return texwrap & 1; }
372
bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; }
373
374
int getScissorX1() const { return scissor1 & 0x3FF; }
375
int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; }
376
int getScissorX2() const { return scissor2 & 0x3FF; }
377
int getScissorY2() const { return (scissor2 >> 10) & 0x3FF; }
378
int getRegionRateX() const { return 0x100 + (region1 & 0x3FF); }
379
int getRegionRateY() const { return 0x100 + ((region1 >> 10) & 0x3FF); }
380
int getRegionX2() const { return (region2 & 0x3FF); }
381
int getRegionY2() const { return (region2 >> 10) & 0x3FF; }
382
383
bool isDepthClampEnabled() const { return depthClampEnable & 1; }
384
385
// Note that the X1/Y1/Z1 here does not mean the upper-left corner, but half the dimensions. X2/Y2/Z2 are the center.
386
float getViewportXScale() const { return getFloat24(viewportxscale); }
387
float getViewportYScale() const { return getFloat24(viewportyscale); }
388
float getViewportZScale() const { return getFloat24(viewportzscale); }
389
float getViewportXCenter() const { return getFloat24(viewportxcenter); }
390
float getViewportYCenter() const { return getFloat24(viewportycenter); }
391
float getViewportZCenter() const { return getFloat24(viewportzcenter); }
392
393
// Fixed 12.4 point.
394
int getOffsetX16() const { return offsetx & 0xFFFF; }
395
int getOffsetY16() const { return offsety & 0xFFFF; }
396
float getOffsetX() const { return (float)getOffsetX16() / 16.0f; }
397
float getOffsetY() const { return (float)getOffsetY16() / 16.0f; }
398
399
// Vertex type
400
bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; }
401
bool areNormalsReversed() const { return reversenormals & 1; }
402
bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }
403
int getNumMorphWeights() const { return ((vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT) + 1; }
404
405
GEPatchPrimType getPatchPrimitiveType() const { return static_cast<GEPatchPrimType>(patchprimitive & 3); }
406
bool isPatchNormalsReversed() const { return patchfacing & 1; }
407
408
// Transfers
409
u32 getTransferSrcAddress() const { return (transfersrc & 0xFFFFF0) | ((transfersrcw & 0xFF0000) << 8); }
410
// Bits 0xf800 are ignored, > 0x400 is treated as 0.
411
u32 getTransferSrcStride() const { int stride = transfersrcw & 0x7F8; return stride > 0x400 ? 0 : stride; }
412
int getTransferSrcX() const { return (transfersrcpos >> 0) & 0x3FF; }
413
int getTransferSrcY() const { return (transfersrcpos >> 10) & 0x3FF; }
414
u32 getTransferDstAddress() const { return (transferdst & 0xFFFFF0) | ((transferdstw & 0xFF0000) << 8); }
415
// Bits 0xf800 are ignored, > 0x400 is treated as 0.
416
u32 getTransferDstStride() const { int stride = transferdstw & 0x7F8; return stride > 0x400 ? 0 : stride; }
417
int getTransferDstX() const { return (transferdstpos >> 0) & 0x3FF; }
418
int getTransferDstY() const { return (transferdstpos >> 10) & 0x3FF; }
419
int getTransferWidth() const { return ((transfersize >> 0) & 0x3FF) + 1; }
420
int getTransferHeight() const { return ((transfersize >> 10) & 0x3FF) + 1; }
421
int getTransferBpp() const { return (transferstart & 1) ? 4 : 2; }
422
423
void FastLoadBoneMatrix(u32 addr);
424
425
// Real data in the context ends here
426
427
static void Reset();
428
void Save(u32_le *ptr);
429
void Restore(const u32_le *ptr);
430
};
431
432
bool vertTypeIsSkinningEnabled(u32 vertType);
433
434
inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
435
inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; }
436
437
// The rest is cached simplified/converted data for fast access.
438
// Does not need to be saved when saving/restoring context.
439
//
440
// Lots of this, however, is actual emulator state which must be saved when savestating.
441
// vertexAddr, indexAddr, offsetAddr for example.
442
443
struct UVScale {
444
float uScale, vScale;
445
float uOff, vOff;
446
};
447
448
#define FLAG_BIT(x) (1 << x)
449
450
// These flags are mainly to make sure that we make decisions on code path in a single
451
// location. Sometimes we need to take things into account in multiple places, it helps
452
// to centralize into flags like this. They're also fast to check since the cache line
453
// will be hot.
454
// NOTE: Do not forget to update the string array at the end of GPUState.cpp!
455
enum {
456
GPU_USE_DUALSOURCE_BLEND = FLAG_BIT(0),
457
GPU_USE_LIGHT_UBERSHADER = FLAG_BIT(1),
458
GPU_USE_FRAGMENT_TEST_CACHE = FLAG_BIT(2),
459
GPU_USE_VS_RANGE_CULLING = FLAG_BIT(3),
460
GPU_USE_BLEND_MINMAX = FLAG_BIT(4),
461
GPU_USE_LOGIC_OP = FLAG_BIT(5),
462
GPU_USE_FRAGMENT_UBERSHADER = FLAG_BIT(6),
463
GPU_USE_TEXTURE_NPOT = FLAG_BIT(7),
464
GPU_USE_ANISOTROPY = FLAG_BIT(8),
465
GPU_USE_CLEAR_RAM_HACK = FLAG_BIT(9),
466
GPU_USE_INSTANCE_RENDERING = FLAG_BIT(10),
467
GPU_USE_VERTEX_TEXTURE_FETCH = FLAG_BIT(11),
468
GPU_USE_TEXTURE_FLOAT = FLAG_BIT(12),
469
GPU_USE_16BIT_FORMATS = FLAG_BIT(13),
470
GPU_USE_DEPTH_CLAMP = FLAG_BIT(14),
471
GPU_USE_TEXTURE_LOD_CONTROL = FLAG_BIT(15),
472
GPU_USE_DEPTH_TEXTURE = FLAG_BIT(16),
473
GPU_USE_ACCURATE_DEPTH = FLAG_BIT(17),
474
GPU_USE_GS_CULLING = FLAG_BIT(18), // Geometry shader
475
GPU_USE_FRAMEBUFFER_ARRAYS = FLAG_BIT(19),
476
GPU_USE_FRAMEBUFFER_FETCH = FLAG_BIT(20),
477
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
478
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),
479
GPU_ROUND_DEPTH_TO_16BIT = FLAG_BIT(23), // Can be disabled either per game or if we use a real 16-bit depth buffer
480
GPU_USE_CLIP_DISTANCE = FLAG_BIT(24),
481
GPU_USE_CULL_DISTANCE = FLAG_BIT(25),
482
GPU_USE_SHADER_BLENDING = FLAG_BIT(26), // This is set to false when skip buffer effects is enabled and GPU_USE_FRAMEBUFFER_FETCH is not.
483
484
// VR flags (reserved or in-use)
485
GPU_USE_VIRTUAL_REALITY = FLAG_BIT(29),
486
GPU_USE_SINGLE_PASS_STEREO = FLAG_BIT(30),
487
GPU_USE_SIMPLE_STEREO_PERSPECTIVE = FLAG_BIT(31),
488
};
489
490
// Note that this take a flag index, not the bit value.
491
const char *GpuUseFlagToString(int useFlag);
492
493
struct KnownVertexBounds {
494
u16 minU;
495
u16 minV;
496
u16 maxU;
497
u16 maxV;
498
};
499
500
enum class SubmitType {
501
DRAW,
502
BEZIER,
503
SPLINE,
504
HW_BEZIER,
505
HW_SPLINE,
506
};
507
508
extern GPUgstate gstate;
509
510
struct GPUStateCache {
511
bool Use(u32 flags) const { return (useFlags_ & flags) != 0; } // Return true if ANY of flags are true.
512
bool UseAll(u32 flags) const { return (useFlags_ & flags) == flags; } // Return true if ALL flags are true.
513
514
u32 UseFlags() const { return useFlags_; }
515
516
uint64_t GetDirtyUniforms() { return dirty & DIRTY_ALL_UNIFORMS; }
517
void Dirty(u64 what) {
518
dirty |= what;
519
}
520
void CleanUniforms() {
521
dirty &= ~DIRTY_ALL_UNIFORMS;
522
}
523
void Clean(u64 what) {
524
dirty &= ~what;
525
}
526
bool IsDirty(u64 what) const {
527
return (dirty & what) != 0ULL;
528
}
529
void SetUseShaderDepal(ShaderDepalMode mode) {
530
if (mode != shaderDepalMode) {
531
shaderDepalMode = mode;
532
Dirty(DIRTY_FRAGMENTSHADER_STATE);
533
}
534
}
535
void SetTextureFullAlpha(bool fullAlpha) {
536
if (fullAlpha != textureFullAlpha) {
537
textureFullAlpha = fullAlpha;
538
Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEX_ALPHA_MUL);
539
}
540
}
541
void SetNeedShaderTexclamp(bool need) {
542
if (need != needShaderTexClamp) {
543
needShaderTexClamp = need;
544
Dirty(DIRTY_FRAGMENTSHADER_STATE);
545
if (need)
546
Dirty(DIRTY_TEXCLAMP);
547
}
548
}
549
void SetTextureIs3D(bool is3D) {
550
if (is3D != curTextureIs3D) {
551
curTextureIs3D = is3D;
552
Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0));
553
}
554
}
555
void SetTextureIsArray(bool isArrayTexture) { // VK only
556
if (textureIsArray != isArrayTexture) {
557
textureIsArray = isArrayTexture;
558
Dirty(DIRTY_FRAGMENTSHADER_STATE);
559
}
560
}
561
void SetTextureIsVideo(bool isVideo) {
562
textureIsVideo = isVideo;
563
}
564
void SetTextureIsBGRA(bool isBGRA) {
565
if (bgraTexture != isBGRA) {
566
bgraTexture = isBGRA;
567
Dirty(DIRTY_FRAGMENTSHADER_STATE);
568
}
569
}
570
void SetTextureIsFramebuffer(bool isFramebuffer) {
571
if (textureIsFramebuffer != isFramebuffer) {
572
textureIsFramebuffer = isFramebuffer;
573
Dirty(DIRTY_UVSCALEOFFSET);
574
} else if (isFramebuffer) {
575
// Always dirty if it's a framebuffer, since the uniform value depends both
576
// on the specified texture size and the bound texture size. Makes things easier.
577
// TODO: Look at this again later.
578
Dirty(DIRTY_UVSCALEOFFSET);
579
}
580
}
581
bool SetUseFlags(u32 newFlags);
582
583
// When checking for a single flag, use Use()/UseAll().
584
u32 GetUseFlags() const {
585
return useFlags_;
586
}
587
588
void UpdateUVScaleOffset() {
589
#if defined(_M_SSE)
590
__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);
591
_mm_storeu_si128((__m128i *)&uv, values);
592
#elif PPSSPP_ARCH(ARM_NEON)
593
const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8);
594
vst1q_u32((u32 *)&uv, values);
595
#else
596
uv.uScale = getFloat24(gstate.texscaleu);
597
uv.vScale = getFloat24(gstate.texscalev);
598
uv.uOff = getFloat24(gstate.texoffsetu);
599
uv.vOff = getFloat24(gstate.texoffsetv);
600
#endif
601
}
602
603
private:
604
u32 useFlags_;
605
public:
606
u32 vertexAddr;
607
u32 indexAddr;
608
u32 offsetAddr;
609
610
uint64_t dirty;
611
612
bool usingDepth; // For deferred depth copies.
613
bool clearingDepth;
614
615
bool textureFullAlpha;
616
bool vertexFullAlpha;
617
618
int skipDrawReason;
619
620
UVScale uv;
621
622
bool bgraTexture;
623
bool needShaderTexClamp;
624
bool textureIsArray;
625
bool textureIsFramebuffer;
626
bool textureIsVideo;
627
bool useFlagsChanged;
628
629
float morphWeights[8];
630
u32 deferredVertTypeDirty;
631
632
u32 curTextureWidth;
633
u32 curTextureHeight;
634
u32 actualTextureHeight;
635
// Only applied when needShaderTexClamp = true.
636
int curTextureXOffset;
637
int curTextureYOffset;
638
bool curTextureIs3D;
639
640
float vpWidth;
641
float vpHeight;
642
643
float vpXOffset;
644
float vpYOffset;
645
float vpZOffset;
646
float vpWidthScale;
647
float vpHeightScale;
648
float vpDepthScale;
649
650
KnownVertexBounds vertBounds;
651
652
GEBufferFormat framebufFormat;
653
// Some games use a very specific masking setup to draw into the alpha channel of a 4444 target using the blue channel of a 565 target.
654
// This is done because on PSP you can't write to destination alpha, other than stencil values, which can't be set from a texture.
655
// Examples of games that do this: Outrun, Split/Second.
656
// We detect this case and go into a special drawing mode.
657
bool blueToAlpha;
658
659
// U/V is 1:1 to pixels. Can influence texture sampling.
660
bool pixelMapped;
661
662
// TODO: These should be accessed from the current VFB object directly.
663
u32 curRTWidth;
664
u32 curRTHeight;
665
u32 curRTRenderWidth;
666
u32 curRTRenderHeight;
667
668
void SetCurRTOffset(int xoff, int yoff) {
669
if (xoff != curRTOffsetX || yoff != curRTOffsetY) {
670
curRTOffsetX = xoff;
671
curRTOffsetY = yoff;
672
Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_PROJTHROUGHMATRIX);
673
}
674
}
675
int curRTOffsetX;
676
int curRTOffsetY;
677
678
// Set if we are doing hardware bezier/spline.
679
SubmitType submitType;
680
int spline_num_points_u;
681
682
ShaderDepalMode shaderDepalMode;
683
GEBufferFormat depalFramebufferFormat;
684
685
u32 getRelativeAddress(u32 data) const;
686
static void Reset();
687
void DoState(PointerWrap &p);
688
};
689
690
class GPUInterface;
691
class GPUDebugInterface;
692
693
extern GPUStateCache gstate_c;
694
695
inline u32 GPUStateCache::getRelativeAddress(u32 data) const {
696
u32 baseExtended = ((gstate.base & 0x000F0000) << 8) | data;
697
return (gstate_c.offsetAddr + baseExtended) & 0x0FFFFFFF;
698
}
699
700