Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUCommonHW.cpp
3185 views
1
#include "Common/Profiler/Profiler.h"
2
3
#include "Common/GPU/thin3d.h"
4
#include "Common/Serialize/Serializer.h"
5
#include "Common/System/System.h"
6
7
#include "Core/System.h"
8
#include "Core/Config.h"
9
#include "Core/Util/PPGeDraw.h"
10
11
#include "GPU/GPUCommonHW.h"
12
#include "GPU/Common/SplineCommon.h"
13
#include "GPU/Common/DrawEngineCommon.h"
14
#include "GPU/Common/TextureCacheCommon.h"
15
#include "GPU/Common/FramebufferManagerCommon.h"
16
17
struct CommonCommandTableEntry {
18
uint8_t cmd;
19
uint8_t flags;
20
uint64_t dirty;
21
GPUCommonHW::CmdFunc func;
22
};
23
24
struct CommandInfo {
25
uint64_t flags;
26
GPUCommonHW::CmdFunc func;
27
28
// Dirty flags are mashed into the regular flags by a left shift of 8.
29
void AddDirty(u64 dirty) {
30
flags |= dirty << 8;
31
}
32
void RemoveDirty(u64 dirty) {
33
flags &= ~(dirty << 8);
34
}
35
};
36
37
static CommandInfo cmdInfo_[256];
38
39
const CommonCommandTableEntry commonCommandTable[] = {
40
// From Common. No flushing but definitely need execute.
41
{ GE_CMD_OFFSETADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_OffsetAddr },
42
{ GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommon::Execute_Origin },
43
{ GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_Jump },
44
{ GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_Call },
45
{ GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_Ret },
46
{ GE_CMD_END, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_End },
47
{ GE_CMD_VADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Vaddr },
48
{ GE_CMD_IADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Iaddr },
49
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
50
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_BoundingBox }, // Shouldn't need to FLUSHBEFORE.
51
52
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Prim },
53
{ GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Bezier },
54
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline },
55
56
// Changing the vertex type requires us to flush.
57
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
58
59
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut},
60
61
// These two are actually processed in CMD_END.
62
{ GE_CMD_SIGNAL },
63
{ GE_CMD_FINISH },
64
65
// Changes that dirty the framebuffer
66
{ GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS },
67
{ GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
68
{ GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
69
{ GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE },
70
{ GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE },
71
72
{ GE_CMD_FOGCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOLOR },
73
{ GE_CMD_FOG1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOEF },
74
{ GE_CMD_FOG2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOEF },
75
76
// These affect the fragment shader so need flushing.
77
{ GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
78
{ GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
79
{ GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
80
{ GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
81
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
82
// Raster state for Direct3D 9, uncommon.
83
{ GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE },
84
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEX_ALPHA_MUL },
85
{ GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
86
{ GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
87
{ GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
88
{ GE_CMD_COLORTESTMASK, FLAG_FLUSHBEFOREONCHANGE, DIRTY_ALPHACOLORMASK | DIRTY_FRAGMENTSHADER_STATE },
89
90
// These change the vertex shader so need flushing.
91
{ GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
92
{ GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
93
{ GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
94
{ GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
95
{ GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
96
{ GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
97
{ GE_CMD_LIGHTTYPE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT0 },
98
{ GE_CMD_LIGHTTYPE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT1 },
99
{ GE_CMD_LIGHTTYPE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT2 },
100
{ GE_CMD_LIGHTTYPE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT3 },
101
{ GE_CMD_MATERIALUPDATE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
102
103
// These change all shaders so need flushing.
104
{ GE_CMD_LIGHTMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
105
106
{ GE_CMD_TEXFILTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
107
{ GE_CMD_TEXWRAP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
108
109
// Uniform changes. though the fragmentshader optimizes based on these sometimes.
110
{ GE_CMD_ALPHATEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FRAGMENTSHADER_STATE },
111
{ GE_CMD_COLORREF, FLAG_FLUSHBEFOREONCHANGE, DIRTY_ALPHACOLORREF | DIRTY_FRAGMENTSHADER_STATE },
112
{ GE_CMD_TEXENVCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXENV },
113
114
// Simple render state changes. Handled in StateMapping.cpp.
115
{ GE_CMD_CULL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_RASTER_STATE },
116
{ GE_CMD_CULLFACEENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_RASTER_STATE },
117
{ GE_CMD_DITHERENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_RASTER_STATE },
118
{ GE_CMD_STENCILOP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
119
{ GE_CMD_STENCILTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_STENCILREPLACEVALUE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE },
120
{ GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
121
{ GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
122
{ GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
123
{ GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
124
{ GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
125
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
126
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
127
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
128
{ GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
129
{ GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
130
{ GE_CMD_LOGICOP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
131
{ GE_CMD_LOGICOPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
132
133
{ GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
134
135
// These are read on every SubmitPrim, no need for dirtying or flushing.
136
{ GE_CMD_TEXSCALEU },
137
{ GE_CMD_TEXSCALEV },
138
{ GE_CMD_TEXOFFSETU },
139
{ GE_CMD_TEXOFFSETV },
140
141
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexSize0 },
142
{ GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
143
{ GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
144
{ GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
145
{ GE_CMD_TEXSIZE4, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
146
{ GE_CMD_TEXSIZE5, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
147
{ GE_CMD_TEXSIZE6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
148
{ GE_CMD_TEXSIZE7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
149
{ GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
150
{ GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS, &GPUCommonHW::Execute_TexLevel },
151
{ GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
152
{ GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
153
{ GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
154
{ GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
155
{ GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
156
{ GE_CMD_TEXADDR4, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
157
{ GE_CMD_TEXADDR5, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
158
{ GE_CMD_TEXADDR6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
159
{ GE_CMD_TEXADDR7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
160
{ GE_CMD_TEXBUFWIDTH0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
161
{ GE_CMD_TEXBUFWIDTH1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
162
{ GE_CMD_TEXBUFWIDTH2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
163
{ GE_CMD_TEXBUFWIDTH3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
164
{ GE_CMD_TEXBUFWIDTH4, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
165
{ GE_CMD_TEXBUFWIDTH5, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
166
{ GE_CMD_TEXBUFWIDTH6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
167
{ GE_CMD_TEXBUFWIDTH7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
168
169
// These must flush on change, so that LoadClut doesn't have to always flush.
170
{ GE_CMD_CLUTADDR, FLAG_FLUSHBEFOREONCHANGE },
171
{ GE_CMD_CLUTADDRUPPER, FLAG_FLUSHBEFOREONCHANGE },
172
{ GE_CMD_CLUTFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_DEPAL },
173
174
// Morph weights. TODO: Remove precomputation?
175
{ GE_CMD_MORPHWEIGHT0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
176
{ GE_CMD_MORPHWEIGHT1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
177
{ GE_CMD_MORPHWEIGHT2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
178
{ GE_CMD_MORPHWEIGHT3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
179
{ GE_CMD_MORPHWEIGHT4, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
180
{ GE_CMD_MORPHWEIGHT5, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
181
{ GE_CMD_MORPHWEIGHT6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
182
{ GE_CMD_MORPHWEIGHT7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
183
184
// Control spline/bezier patches. Don't really require flushing as such, but meh.
185
{ GE_CMD_PATCHDIVISION, FLAG_FLUSHBEFOREONCHANGE },
186
{ GE_CMD_PATCHPRIMITIVE, FLAG_FLUSHBEFOREONCHANGE },
187
{ GE_CMD_PATCHFACING, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
188
{ GE_CMD_PATCHCULLENABLE, FLAG_FLUSHBEFOREONCHANGE },
189
190
// Can probably ignore this one as we don't support AA lines.
191
{ GE_CMD_ANTIALIASENABLE, FLAG_FLUSHBEFOREONCHANGE },
192
193
// Viewport.
194
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
195
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
196
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
197
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
198
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
199
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
200
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
201
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
202
{ GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE },
203
204
// Z clip
205
{ GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
206
{ GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
207
208
// Region
209
{ GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
210
{ GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
211
212
// Scissor
213
{ GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
214
{ GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
215
216
// Lighting base colors
217
{ GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_AMBIENT },
218
{ GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_AMBIENT },
219
{ GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATDIFFUSE },
220
{ GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATEMISSIVE },
221
{ GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATAMBIENTALPHA },
222
{ GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATAMBIENTALPHA },
223
{ GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATSPECULAR },
224
{ GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATSPECULAR },
225
226
// Light parameters
227
{ GE_CMD_LX0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
228
{ GE_CMD_LY0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
229
{ GE_CMD_LZ0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
230
{ GE_CMD_LX1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
231
{ GE_CMD_LY1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
232
{ GE_CMD_LZ1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
233
{ GE_CMD_LX2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
234
{ GE_CMD_LY2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
235
{ GE_CMD_LZ2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
236
{ GE_CMD_LX3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
237
{ GE_CMD_LY3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
238
{ GE_CMD_LZ3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
239
240
{ GE_CMD_LDX0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
241
{ GE_CMD_LDY0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
242
{ GE_CMD_LDZ0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
243
{ GE_CMD_LDX1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
244
{ GE_CMD_LDY1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
245
{ GE_CMD_LDZ1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
246
{ GE_CMD_LDX2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
247
{ GE_CMD_LDY2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
248
{ GE_CMD_LDZ2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
249
{ GE_CMD_LDX3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
250
{ GE_CMD_LDY3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
251
{ GE_CMD_LDZ3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
252
253
{ GE_CMD_LKA0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
254
{ GE_CMD_LKB0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
255
{ GE_CMD_LKC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
256
{ GE_CMD_LKA1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
257
{ GE_CMD_LKB1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
258
{ GE_CMD_LKC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
259
{ GE_CMD_LKA2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
260
{ GE_CMD_LKB2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
261
{ GE_CMD_LKC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
262
{ GE_CMD_LKA3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
263
{ GE_CMD_LKB3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
264
{ GE_CMD_LKC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
265
266
{ GE_CMD_LKS0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
267
{ GE_CMD_LKS1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
268
{ GE_CMD_LKS2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
269
{ GE_CMD_LKS3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
270
271
{ GE_CMD_LKO0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
272
{ GE_CMD_LKO1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
273
{ GE_CMD_LKO2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
274
{ GE_CMD_LKO3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
275
276
{ GE_CMD_LAC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
277
{ GE_CMD_LDC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
278
{ GE_CMD_LSC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
279
{ GE_CMD_LAC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
280
{ GE_CMD_LDC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
281
{ GE_CMD_LSC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
282
{ GE_CMD_LAC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
283
{ GE_CMD_LDC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
284
{ GE_CMD_LSC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
285
{ GE_CMD_LAC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
286
{ GE_CMD_LDC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
287
{ GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
288
289
// Ignored commands
290
{ GE_CMD_TEXFLUSH, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexFlush },
291
{ GE_CMD_TEXSYNC, 0 },
292
293
// These are just nop or part of other later commands.
294
{ GE_CMD_NOP, 0 },
295
{ GE_CMD_BASE, 0 },
296
{ GE_CMD_TRANSFERSRC, 0 },
297
{ GE_CMD_TRANSFERSRCW, 0 },
298
{ GE_CMD_TRANSFERDST, 0 },
299
{ GE_CMD_TRANSFERDSTW, 0 },
300
{ GE_CMD_TRANSFERSRCPOS, 0 },
301
{ GE_CMD_TRANSFERDSTPOS, 0 },
302
{ GE_CMD_TRANSFERSIZE, 0 },
303
{ GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommonHW::Execute_BlockTransferStart },
304
305
// We don't use the dither table.
306
{ GE_CMD_DITH0 },
307
{ GE_CMD_DITH1 },
308
{ GE_CMD_DITH2 },
309
{ GE_CMD_DITH3 },
310
311
// These handle their own flushing.
312
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_WorldMtxNum },
313
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_WorldMtxData },
314
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_ViewMtxNum },
315
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_ViewMtxData },
316
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_ProjMtxNum },
317
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_ProjMtxData },
318
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_TgenMtxNum },
319
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TgenMtxData },
320
{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_BoneMtxNum },
321
{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_BoneMtxData },
322
323
// Vertex Screen/Texture/Color
324
{ GE_CMD_VSCX },
325
{ GE_CMD_VSCY },
326
{ GE_CMD_VSCZ },
327
{ GE_CMD_VTCS },
328
{ GE_CMD_VTCT },
329
{ GE_CMD_VTCQ },
330
{ GE_CMD_VCV },
331
{ GE_CMD_VAP, FLAG_EXECUTE, 0, &GPUCommon::Execute_ImmVertexAlphaPrim },
332
{ GE_CMD_VFC },
333
{ GE_CMD_VSCV },
334
335
// "Missing" commands (gaps in the sequence)
336
{ GE_CMD_UNKNOWN_03, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
337
{ GE_CMD_UNKNOWN_0D, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
338
{ GE_CMD_UNKNOWN_11, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
339
{ GE_CMD_UNKNOWN_29, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
340
{ GE_CMD_UNKNOWN_34, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
341
{ GE_CMD_UNKNOWN_35, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
342
{ GE_CMD_UNKNOWN_39, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
343
{ GE_CMD_UNKNOWN_4E, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
344
{ GE_CMD_UNKNOWN_4F, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
345
{ GE_CMD_UNKNOWN_52, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
346
{ GE_CMD_UNKNOWN_59, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
347
{ GE_CMD_UNKNOWN_5A, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
348
{ GE_CMD_UNKNOWN_B6, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
349
{ GE_CMD_UNKNOWN_B7, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
350
{ GE_CMD_UNKNOWN_D1, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
351
{ GE_CMD_UNKNOWN_ED, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
352
{ GE_CMD_UNKNOWN_EF, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
353
{ GE_CMD_UNKNOWN_FA, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
354
{ GE_CMD_UNKNOWN_FB, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
355
{ GE_CMD_UNKNOWN_FC, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
356
{ GE_CMD_UNKNOWN_FD, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
357
{ GE_CMD_UNKNOWN_FE, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
358
// Appears to be debugging related or something? Hit a lot in GoW.
359
{ GE_CMD_NOP_FF, 0 },
360
};
361
362
GPUCommonHW::GPUCommonHW(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw) {
363
memset(cmdInfo_, 0, sizeof(cmdInfo_));
364
365
// Convert the command table to a faster format, and check for dupes.
366
std::set<u8> dupeCheck;
367
for (size_t i = 0; i < ARRAY_SIZE(commonCommandTable); i++) {
368
const u8 cmd = commonCommandTable[i].cmd;
369
if (dupeCheck.find(cmd) != dupeCheck.end()) {
370
ERROR_LOG(Log::G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
371
} else {
372
dupeCheck.insert(cmd);
373
}
374
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
375
cmdInfo_[cmd].func = commonCommandTable[i].func;
376
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
377
// Can't have FLAG_EXECUTE commands without a function pointer to execute.
378
Crash();
379
}
380
}
381
// Find commands missing from the table.
382
for (int i = 0; i < 0xEF; i++) {
383
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
384
ERROR_LOG(Log::G3D, "Command missing from table: %02x (%i)", i, i);
385
}
386
}
387
388
UpdateCmdInfo();
389
UpdateMSAALevel(draw);
390
}
391
392
GPUCommonHW::~GPUCommonHW() {
393
// Clear features so they're not visible in system info.
394
gstate_c.SetUseFlags(0);
395
396
// Delete the various common managers.
397
framebufferManager_->DestroyAllFBOs();
398
delete framebufferManager_;
399
delete textureCache_;
400
if (shaderManager_) {
401
shaderManager_->ClearShaders();
402
delete shaderManager_;
403
}
404
}
405
406
// Called once per frame. Might also get called during the pause screen
407
// if "transparent".
408
void GPUCommonHW::CheckConfigChanged() {
409
if (configChanged_) {
410
ClearCacheNextFrame();
411
gstate_c.SetUseFlags(CheckGPUFeatures());
412
drawEngineCommon_->NotifyConfigChanged();
413
textureCache_->NotifyConfigChanged();
414
framebufferManager_->NotifyConfigChanged();
415
BuildReportingInfo();
416
configChanged_ = false;
417
}
418
419
// Check needed when running tests.
420
if (framebufferManager_) {
421
framebufferManager_->CheckPostShaders();
422
}
423
}
424
425
void GPUCommonHW::CheckDisplayResized() {
426
if (displayResized_) {
427
framebufferManager_->NotifyDisplayResized();
428
displayResized_ = false;
429
}
430
}
431
432
void GPUCommonHW::CheckRenderResized() {
433
if (renderResized_) {
434
framebufferManager_->NotifyRenderResized(msaaLevel_);
435
renderResized_ = false;
436
}
437
}
438
439
// Call at the END of the GPU implementation's DeviceLost
440
void GPUCommonHW::DeviceLost() {
441
framebufferManager_->DeviceLost();
442
draw_ = nullptr;
443
textureCache_->Clear(false);
444
textureCache_->DeviceLost();
445
shaderManager_->DeviceLost();
446
drawEngineCommon_->DeviceLost();
447
}
448
449
// Call at the start of the GPU implementation's DeviceRestore
450
void GPUCommonHW::DeviceRestore(Draw::DrawContext *draw) {
451
draw_ = draw;
452
displayResized_ = true; // re-check display bounds.
453
renderResized_ = true;
454
framebufferManager_->DeviceRestore(draw_);
455
textureCache_->DeviceRestore(draw_);
456
shaderManager_->DeviceRestore(draw_);
457
drawEngineCommon_->DeviceRestore(draw_);
458
459
PPGeSetDrawContext(draw_);
460
461
gstate_c.SetUseFlags(CheckGPUFeatures());
462
BuildReportingInfo();
463
UpdateCmdInfo();
464
}
465
466
void GPUCommonHW::UpdateCmdInfo() {
467
if (g_Config.bSoftwareSkinning) {
468
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
469
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning;
470
} else {
471
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
472
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType;
473
}
474
475
// Reconfigure for light ubershader or not.
476
for (int i = 0; i < 4; i++) {
477
if (gstate_c.Use(GPU_USE_LIGHT_UBERSHADER)) {
478
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
479
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_LIGHT_CONTROL);
480
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
481
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_LIGHT_CONTROL);
482
} else {
483
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL);
484
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE);
485
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL);
486
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE);
487
}
488
}
489
490
if (gstate_c.Use(GPU_USE_LIGHT_UBERSHADER)) {
491
cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
492
cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_LIGHT_CONTROL);
493
} else {
494
cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_LIGHT_CONTROL);
495
cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_VERTEXSHADER_STATE);
496
}
497
498
if (gstate_c.Use(GPU_USE_FRAGMENT_UBERSHADER)) {
499
// Texfunc controls both texalpha and doubling. The rest is not dynamic yet so can't remove fragment shader dirtying.
500
cmdInfo_[GE_CMD_TEXFUNC].AddDirty(DIRTY_TEX_ALPHA_MUL);
501
} else {
502
cmdInfo_[GE_CMD_TEXFUNC].RemoveDirty(DIRTY_TEX_ALPHA_MUL);
503
}
504
}
505
506
void GPUCommonHW::BeginHostFrame() {
507
GPUCommon::BeginHostFrame();
508
if (drawEngineCommon_->EverUsedExactEqualDepth() && !sawExactEqualDepth_) {
509
sawExactEqualDepth_ = true;
510
gstate_c.SetUseFlags(CheckGPUFeatures());
511
}
512
}
513
514
void GPUCommonHW::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
515
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
516
NotifyDisplay(framebuf, stride, format);
517
}
518
519
void GPUCommonHW::CheckFlushOp(int cmd, u32 diff) {
520
const u8 cmdFlags = cmdInfo_[cmd].flags;
521
if (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE)) {
522
if (dumpThisFrame_) {
523
NOTICE_LOG(Log::G3D, "================ FLUSH ================");
524
}
525
drawEngineCommon_->Flush();
526
}
527
}
528
529
void GPUCommonHW::PreExecuteOp(u32 op, u32 diff) {
530
CheckFlushOp(op >> 24, diff);
531
}
532
533
void GPUCommonHW::CopyDisplayToOutput(bool reallyDirty) {
534
drawEngineCommon_->FlushQueuedDepth();
535
// Flush anything left over.
536
drawEngineCommon_->Flush();
537
538
shaderManager_->DirtyLastShader();
539
540
// after this, render pass is active.
541
framebufferManager_->CopyDisplayToOutput(reallyDirty);
542
543
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
544
}
545
546
bool GPUCommonHW::PresentedThisFrame() const {
547
return framebufferManager_->PresentedThisFrame();
548
}
549
550
void GPUCommonHW::DoState(PointerWrap &p) {
551
GPUCommon::DoState(p);
552
553
// TODO: Some of these things may not be necessary.
554
// None of these are necessary when saving.
555
if (p.mode == p.MODE_READ && !PSP_CoreParameter().frozen) {
556
textureCache_->Clear(true);
557
558
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
559
framebufferManager_->DestroyAllFBOs();
560
}
561
}
562
563
void GPUCommonHW::ClearCacheNextFrame() {
564
textureCache_->ClearNextFrame();
565
}
566
567
// Needs to be called on GPU thread, not reporting thread.
568
void GPUCommonHW::BuildReportingInfo() {
569
using namespace Draw;
570
571
reportingPrimaryInfo_ = draw_->GetInfoString(InfoField::VENDORSTRING);
572
reportingFullInfo_ = reportingPrimaryInfo_ + " - " + System_GetProperty(SYSPROP_GPUDRIVER_VERSION) + " - " + draw_->GetInfoString(InfoField::SHADELANGVERSION);
573
}
574
575
u32 GPUCommonHW::CheckGPUFeatures() const {
576
u32 features = 0;
577
if (draw_->GetDeviceCaps().logicOpSupported) {
578
features |= GPU_USE_LOGIC_OP;
579
}
580
if (draw_->GetDeviceCaps().anisoSupported) {
581
features |= GPU_USE_ANISOTROPY;
582
}
583
if (draw_->GetDeviceCaps().textureNPOTFullySupported) {
584
features |= GPU_USE_TEXTURE_NPOT;
585
}
586
if (draw_->GetDeviceCaps().dualSourceBlend) {
587
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
588
features |= GPU_USE_DUALSOURCE_BLEND;
589
}
590
}
591
if (draw_->GetDeviceCaps().blendMinMaxSupported) {
592
features |= GPU_USE_BLEND_MINMAX;
593
}
594
595
if (draw_->GetDeviceCaps().clipDistanceSupported) {
596
features |= GPU_USE_CLIP_DISTANCE;
597
}
598
599
if (draw_->GetDeviceCaps().cullDistanceSupported) {
600
features |= GPU_USE_CULL_DISTANCE;
601
}
602
603
if (draw_->GetDeviceCaps().textureDepthSupported) {
604
features |= GPU_USE_DEPTH_TEXTURE;
605
}
606
607
if (draw_->GetDeviceCaps().depthClampSupported) {
608
// Some backends always do GPU_USE_ACCURATE_DEPTH, but it's required for depth clamp.
609
features |= GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH;
610
}
611
612
bool canClipOrCull = draw_->GetDeviceCaps().clipDistanceSupported || draw_->GetDeviceCaps().cullDistanceSupported;
613
bool canDiscardVertex = !draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL);
614
if ((canClipOrCull || canDiscardVertex) && !g_Config.bDisableRangeCulling) {
615
// We'll dynamically use the parts that are supported, to reduce artifacts as much as possible.
616
features |= GPU_USE_VS_RANGE_CULLING;
617
}
618
619
if (draw_->GetDeviceCaps().framebufferFetchSupported) {
620
features |= GPU_USE_FRAMEBUFFER_FETCH;
621
features |= GPU_USE_SHADER_BLENDING; // doesn't matter if we are buffered or not here.
622
} else {
623
if (!g_Config.bSkipBufferEffects) {
624
features |= GPU_USE_SHADER_BLENDING;
625
}
626
}
627
628
if (draw_->GetShaderLanguageDesc().bitwiseOps && g_Config.bUberShaderVertex) {
629
features |= GPU_USE_LIGHT_UBERSHADER;
630
}
631
632
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
633
features |= GPU_USE_CLEAR_RAM_HACK;
634
}
635
636
// Even without depth clamp, force accurate depth on for some games that break without it.
637
if (PSP_CoreParameter().compat.flags().DepthRangeHack) {
638
features |= GPU_USE_ACCURATE_DEPTH;
639
}
640
641
// Some backends will turn this off again in the calling function.
642
if (g_Config.bUberShaderFragment) {
643
features |= GPU_USE_FRAGMENT_UBERSHADER;
644
}
645
646
return features;
647
}
648
649
u32 GPUCommonHW::CheckGPUFeaturesLate(u32 features) const {
650
// If we already have a 16-bit depth buffer, we don't need to round.
651
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
652
bool prefer16 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D16;
653
if (!prefer16) {
654
if (sawExactEqualDepth_ && (features & GPU_USE_ACCURATE_DEPTH) != 0 && !PSP_CoreParameter().compat.flags().ForceMaxDepthResolution) {
655
// Exact equal tests tend to have issues unless we use the PSP's depth range.
656
// We use 24-bit depth virtually everwhere, the fallback is just for safety.
657
if (prefer24)
658
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
659
else
660
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
661
} else if (!g_Config.bHighQualityDepth && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
662
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
663
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
664
if (prefer24 && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
665
// Here we can simulate a 16 bit depth buffer by scaling.
666
// Note that the depth buffer is fixed point, not floating, so dividing by 256 is pretty good.
667
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
668
} else {
669
// Use fragment rounding on where available otherwise.
670
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
671
}
672
} else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) {
673
features |= GPU_ROUND_DEPTH_TO_16BIT;
674
}
675
}
676
677
return features;
678
}
679
680
void GPUCommonHW::UpdateMSAALevel(Draw::DrawContext *draw) {
681
int level = g_Config.iMultiSampleLevel;
682
if (draw && draw->GetDeviceCaps().multiSampleLevelsMask & (1 << level)) {
683
msaaLevel_ = level;
684
} else {
685
// Didn't support the configured level, so revert to 0.
686
msaaLevel_ = 0;
687
}
688
}
689
690
std::vector<std::string> GPUCommonHW::DebugGetShaderIDs(DebugShaderType type) {
691
switch (type) {
692
case SHADER_TYPE_VERTEXLOADER:
693
return drawEngineCommon_->DebugGetVertexLoaderIDs();
694
case SHADER_TYPE_TEXTURE:
695
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
696
default:
697
return shaderManager_->DebugGetShaderIDs(type);
698
}
699
}
700
701
std::string GPUCommonHW::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
702
switch (type) {
703
case SHADER_TYPE_VERTEXLOADER:
704
return drawEngineCommon_->DebugGetVertexLoaderString(id, stringType);
705
case SHADER_TYPE_TEXTURE:
706
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
707
default:
708
return shaderManager_->DebugGetShaderString(id, type, stringType);
709
}
710
}
711
712
bool GPUCommonHW::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {
713
u32 fb_address = type == GPU_DBG_FRAMEBUF_RENDER ? (gstate.getFrameBufRawAddress() | 0x04000000) : framebufferManager_->DisplayFramebufAddr();
714
int fb_stride = type == GPU_DBG_FRAMEBUF_RENDER ? gstate.FrameBufStride() : framebufferManager_->DisplayFramebufStride();
715
GEBufferFormat format = type == GPU_DBG_FRAMEBUF_RENDER ? gstate_c.framebufFormat : framebufferManager_->DisplayFramebufFormat();
716
return framebufferManager_->GetFramebuffer(fb_address, fb_stride, format, buffer, maxRes);
717
}
718
719
bool GPUCommonHW::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
720
u32 fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
721
int fb_stride = gstate.FrameBufStride();
722
723
u32 z_address = gstate.getDepthBufRawAddress() | 0x04000000;
724
int z_stride = gstate.DepthBufStride();
725
726
return framebufferManager_->GetDepthbuffer(fb_address, fb_stride, z_address, z_stride, buffer);
727
}
728
729
bool GPUCommonHW::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
730
u32 fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
731
int fb_stride = gstate.FrameBufStride();
732
733
return framebufferManager_->GetStencilbuffer(fb_address, fb_stride, buffer);
734
}
735
736
bool GPUCommonHW::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
737
// framebufferManager_ can be null here when taking screens in software rendering mode.
738
// TODO: Actually grab the framebuffer anyway.
739
return framebufferManager_ ? framebufferManager_->GetOutputFramebuffer(buffer) : false;
740
}
741
742
std::vector<const VirtualFramebuffer *> GPUCommonHW::GetFramebufferList() const {
743
return framebufferManager_->GetFramebufferList();
744
}
745
746
bool GPUCommonHW::GetCurrentClut(GPUDebugBuffer &buffer) {
747
return textureCache_->GetCurrentClutBuffer(buffer);
748
}
749
750
bool GPUCommonHW::GetCurrentTexture(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) {
751
if (!gstate.isTextureMapEnabled()) {
752
return false;
753
}
754
return textureCache_->GetCurrentTextureDebug(buffer, level, isFramebuffer);
755
}
756
757
void GPUCommonHW::CheckDepthUsage(VirtualFramebuffer *vfb) {
758
if (!gstate_c.usingDepth) {
759
bool isReadingDepth = false;
760
bool isClearingDepth = false;
761
bool isWritingDepth = false;
762
if (gstate.isModeClear()) {
763
isClearingDepth = gstate.isClearModeDepthMask();
764
isWritingDepth = isClearingDepth;
765
} else if (gstate.isDepthTestEnabled()) {
766
isWritingDepth = gstate.isDepthWriteEnabled();
767
isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS;
768
}
769
770
if (isWritingDepth || isReadingDepth) {
771
gstate_c.usingDepth = true;
772
gstate_c.clearingDepth = isClearingDepth;
773
vfb->last_frame_depth_render = gpuStats.numFlips;
774
if (isWritingDepth) {
775
vfb->last_frame_depth_updated = gpuStats.numFlips;
776
}
777
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);
778
}
779
}
780
}
781
782
void GPUCommonHW::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
783
if (size > 0)
784
textureCache_->Invalidate(addr, size, type);
785
else
786
textureCache_->InvalidateAll(type);
787
788
if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebufferColor(addr)) {
789
// Vempire invalidates (with writeback) after drawing, but before blitting.
790
// TODO: Investigate whether we can get this to work some other way.
791
if (type == GPU_INVALIDATE_SAFE) {
792
framebufferManager_->UpdateFromMemory(addr, size);
793
}
794
}
795
}
796
797
bool GPUCommonHW::FramebufferDirty() {
798
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
799
if (vfb) {
800
bool dirty = vfb->dirtyAfterDisplay;
801
vfb->dirtyAfterDisplay = false;
802
return dirty;
803
}
804
return true;
805
}
806
807
bool GPUCommonHW::FramebufferReallyDirty() {
808
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
809
if (vfb) {
810
bool dirty = vfb->reallyDirtyAfterDisplay;
811
vfb->reallyDirtyAfterDisplay = false;
812
return dirty;
813
}
814
return true;
815
}
816
817
void GPUCommonHW::ExecuteOp(u32 op, u32 diff) {
818
const u8 cmd = op >> 24;
819
const CommandInfo info = cmdInfo_[cmd];
820
const u8 cmdFlags = info.flags;
821
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
822
(this->*info.func)(op, diff);
823
} else if (diff) {
824
uint64_t dirty = info.flags >> 8;
825
if (dirty)
826
gstate_c.Dirty(dirty);
827
}
828
}
829
830
void GPUCommonHW::FastRunLoop(DisplayList &list) {
831
PROFILE_THIS_SCOPE("gpuloop");
832
833
if (!Memory::IsValidAddress(list.pc)) {
834
// We're having some serious problems here, just bail and try to limp along and not crash the app.
835
downcount = 0;
836
return;
837
}
838
839
const CommandInfo *cmdInfo = cmdInfo_;
840
int dc = downcount;
841
for (; dc > 0; --dc) {
842
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
843
const u32 op = *(const u32_le *)(Memory::base + list.pc);
844
const u32 cmd = op >> 24;
845
const CommandInfo &info = cmdInfo[cmd];
846
const u32 diff = op ^ gstate.cmdmem[cmd];
847
if (diff == 0) {
848
if (info.flags & FLAG_EXECUTE) {
849
downcount = dc;
850
(this->*info.func)(op, diff);
851
dc = downcount;
852
}
853
} else {
854
uint64_t flags = info.flags;
855
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
856
drawEngineCommon_->Flush();
857
}
858
gstate.cmdmem[cmd] = op;
859
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
860
downcount = dc;
861
(this->*info.func)(op, diff);
862
dc = downcount;
863
} else {
864
uint64_t dirty = flags >> 8;
865
if (dirty)
866
gstate_c.Dirty(dirty);
867
}
868
}
869
list.pc += 4;
870
}
871
downcount = 0;
872
}
873
874
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
875
if (diff) {
876
// TODO: We only need to dirty vshader-state here if the output format will be different.
877
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
878
879
if (diff & GE_VTYPE_THROUGH_MASK) {
880
// Switching between through and non-through, we need to invalidate a bunch of stuff.
881
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
882
}
883
}
884
}
885
886
void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
887
// Don't flush when weight count changes.
888
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
889
// Restore and flush
890
gstate.vertType ^= diff;
891
Flush();
892
gstate.vertType ^= diff;
893
// In this case, we may be doing weights and morphs.
894
// Update any bone matrix uniforms so it uses them correctly.
895
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
896
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
897
gstate_c.deferredVertTypeDirty = 0;
898
}
899
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
900
}
901
if (diff & GE_VTYPE_THROUGH_MASK)
902
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
903
}
904
905
void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
906
// This drives all drawing. All other state we just buffer up, then we apply it only
907
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
908
909
PROFILE_THIS_SCOPE("execprim");
910
911
FlushImm();
912
913
// Upper bits are ignored.
914
const GEPrimitiveType prim = static_cast<GEPrimitiveType>((op >> 16) & 7);
915
SetDrawType(DRAW_PRIM, prim);
916
917
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
918
if (gstate.isAntiAliasEnabled()) {
919
// Heuristic derived from discussions in #6483 and #12588.
920
// Discard AA lines in Persona 3 Portable, DOA Paradise and Summon Night 5, while still keeping AA lines in Echochrome.
921
if ((prim == GE_PRIM_LINE_STRIP || prim == GE_PRIM_LINES) && gstate.getTextureFunction() == GE_TEXFUNC_REPLACE)
922
return;
923
}
924
925
// Update cached framebuffer format.
926
// We store it in the cache so it can be modified for blue-to-alpha, next.
927
gstate_c.framebufFormat = gstate.FrameBufFormat();
928
929
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
930
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
931
return;
932
}
933
934
// See the documentation for gstate_c.blueToAlpha.
935
bool blueToAlpha = false;
936
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
937
if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && !gstate.isLogicOpEnabled()) {
938
blueToAlpha = true;
939
gstate_c.framebufFormat = GEBufferFormat::GE_FORMAT_4444;
940
}
941
if (blueToAlpha != gstate_c.blueToAlpha) {
942
gstate_c.blueToAlpha = blueToAlpha;
943
gstate_c.Dirty(DIRTY_FRAMEBUF | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE);
944
}
945
}
946
947
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin) {
948
switch (gstate.vertType & 0xFFFFFF) {
949
case 0x00800102: // through, u16 uv, u16 pos (used for the framebuffer effect in-game)
950
case 0x0080011c: // through, 8888 color, s16 pos (used for clearing in the margin of the title screen)
951
case 0x00000183: // float uv, float pos (used for drawing in the margin of the title screen)
952
// Need to re-check the framebuffer every one of these draws, to update the split if needed.
953
gstate_c.Dirty(DIRTY_FRAMEBUF);
954
}
955
}
956
957
// This also makes skipping drawing very effective.
958
bool changed;
959
VirtualFramebuffer *const vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
960
if (blueToAlpha) {
961
vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA;
962
}
963
964
if (changed) {
965
drawEngineCommon_->FlushQueuedDepth();
966
}
967
968
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
969
vertexCost_ = EstimatePerVertexCost();
970
}
971
972
const u32 count = op & 0xFFFF;
973
// Must check this after SetRenderFrameBuffer so we know SKIPDRAW_NON_DISPLAYED_FB.
974
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
975
// Rough estimate, not sure what's correct.
976
cyclesExecuted += vertexCost_ * count;
977
if (gstate.isModeClear()) {
978
gpuStats.numClears++;
979
}
980
return;
981
}
982
983
CheckDepthUsage(vfb);
984
985
const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
986
const void *inds = nullptr;
987
988
const bool isTriangle = IsTrianglePrim(prim);
989
990
bool canExtend = isTriangle;
991
u32 vertexType = gstate.vertType;
992
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
993
u32 indexAddr = gstate_c.indexAddr;
994
const int indexShift = ((vertexType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
995
if (!Memory::IsValidRange(indexAddr, count << indexShift)) {
996
ERROR_LOG(Log::G3D, "Bad index address %08x (%d)!", indexAddr, count);
997
return;
998
}
999
inds = Memory::GetPointerUnchecked(indexAddr);
1000
canExtend = false;
1001
}
1002
1003
gstate_c.UpdateUVScaleOffset();
1004
1005
// cull mode
1006
int cullMode = gstate.getCullMode();
1007
1008
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
1009
VertexDecoder *decoder = drawEngineCommon_->GetVertexDecoder(vertTypeID);
1010
1011
// Through mode early-out for simple float 2D draws, like in Fate Extra CCC (very beneficial there due to avoiding texture loads)
1012
if ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_POS_MASK | GE_VTYPE_IDX_MASK)) == (GE_VTYPE_THROUGH_MASK | GE_VTYPE_POS_FLOAT | GE_VTYPE_IDX_NONE)) {
1013
int bytesRead = 0;
1014
if (!drawEngineCommon_->TestBoundingBoxThrough(verts, count, decoder, vertexType, &bytesRead)) {
1015
gpuStats.numCulledDraws++;
1016
int cycles = vertexCost_ * count;
1017
gpuStats.vertexGPUCycles += cycles;
1018
cyclesExecuted += cycles;
1019
// NOTE! We still have to advance vertex pointers!
1020
gstate_c.vertexAddr += bytesRead; // We know from the above check that it's not an indexed draw.
1021
return;
1022
}
1023
}
1024
1025
#define MAX_CULL_CHECK_COUNT 6
1026
1027
// For now, turn off culling on platforms where we don't have SIMD bounding box tests, like RISC-V.
1028
#if PPSSPP_ARCH(ARM_NEON) || PPSSPP_ARCH(SSE2)
1029
1030
#define PASSES_CULLING ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHT_MASK | GE_VTYPE_IDX_MASK)) || count > MAX_CULL_CHECK_COUNT)
1031
1032
#else
1033
1034
#define PASSES_CULLING true
1035
1036
#endif
1037
1038
// If certain conditions are true, do frustum culling.
1039
bool passCulling = PASSES_CULLING;
1040
if (!passCulling) {
1041
// Do software culling.
1042
if (drawEngineCommon_->TestBoundingBoxFast(verts, count, decoder, vertexType)) {
1043
passCulling = true;
1044
} else {
1045
gpuStats.numCulledDraws++;
1046
}
1047
}
1048
1049
int bytesRead = 0;
1050
1051
// If the first one in a batch passes, let's assume the whole batch passes.
1052
// Cuts down on checking, while not losing that much efficiency.
1053
bool onePassed = false;
1054
if (passCulling) {
1055
if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, decoder, vertTypeID, true, &bytesRead)) {
1056
canExtend = false;
1057
}
1058
onePassed = true;
1059
} else {
1060
// Still need to advance bytesRead.
1061
drawEngineCommon_->SkipPrim(prim, count, decoder, vertTypeID, &bytesRead);
1062
canExtend = false;
1063
}
1064
1065
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
1066
// Some games rely on this, they don't bother reloading VADDR and IADDR.
1067
// The VADDR/IADDR registers are NOT updated.
1068
AdvanceVerts(vertexType, count, bytesRead);
1069
1070
int totalVertCount = count;
1071
1072
// PRIMs are often followed by more PRIMs. Save some work and submit them immediately.
1073
const u32_le *start = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1074
const u32_le *src = start;
1075
const u32_le *stall = currentList->stall ? (const u32_le *)Memory::GetPointerUnchecked(currentList->stall) : 0;
1076
1077
// Optimized submission of sequences of PRIM. Allows us to avoid going through all the mess
1078
// above for each one. This can be expanded to support additional games that intersperse
1079
// PRIM commands with other commands. A special case is Earth Defence Force 2 that changes culling mode
1080
// between each prim, we just change the triangle winding right here to still be able to join draw calls.
1081
1082
const uint32_t vtypeCheckMask = g_Config.bSoftwareSkinning ? (~GE_VTYPE_WEIGHTCOUNT_MASK) : 0xFFFFFFFF;
1083
1084
if (!useFastRunLoop_)
1085
goto bail; // we're either recording or stepping.
1086
1087
while (src != stall) {
1088
uint32_t data = *src;
1089
switch (data >> 24) {
1090
case GE_CMD_PRIM:
1091
{
1092
GEPrimitiveType newPrim = static_cast<GEPrimitiveType>((data >> 16) & 7);
1093
if (IsTrianglePrim(newPrim) != isTriangle)
1094
goto bail; // Can't join over this boundary. Might as well exit and get this on the next time around.
1095
// TODO: more efficient updating of verts/inds
1096
1097
u32 count = data & 0xFFFF;
1098
bool clockwise = !gstate.isCullEnabled() || gstate.getCullMode() == cullMode;
1099
if (canExtend) {
1100
// Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices
1101
// are consecutive in memory. We also ignore culling here.
1102
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
1103
int commandsExecuted = drawEngineCommon_->ExtendNonIndexedPrim(src, stall, decoder, vertTypeID, clockwise, &bytesRead, isTriangle);
1104
if (!commandsExecuted) {
1105
goto bail;
1106
}
1107
src += commandsExecuted - 1;
1108
gstate_c.vertexAddr += bytesRead;
1109
totalVertCount += count;
1110
break;
1111
}
1112
1113
verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1114
inds = nullptr;
1115
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
1116
const u32 indexAddr = gstate_c.indexAddr;
1117
const int indexShift = ((vertexType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
1118
if (!Memory::IsValidRange(gstate_c.indexAddr, count << indexShift)) {
1119
// Bad index range. Let's give up the fast loop.
1120
goto bail;
1121
}
1122
inds = Memory::GetPointerUnchecked(indexAddr);
1123
} else {
1124
// We can extend again after submitting a normal draw.
1125
canExtend = isTriangle;
1126
}
1127
1128
bool passCulling = onePassed || PASSES_CULLING;
1129
if (!passCulling) {
1130
// Do software culling.
1131
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
1132
if (drawEngineCommon_->TestBoundingBoxFast(verts, count, decoder, vertexType)) {
1133
passCulling = true;
1134
} else {
1135
gpuStats.numCulledDraws++;
1136
}
1137
}
1138
if (passCulling) {
1139
if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, decoder, vertTypeID, clockwise, &bytesRead)) {
1140
canExtend = false;
1141
}
1142
// As soon as one passes, assume we don't need to check the rest of this batch.
1143
onePassed = true;
1144
} else {
1145
// Still need to advance bytesRead.
1146
drawEngineCommon_->SkipPrim(newPrim, count, decoder, vertTypeID, &bytesRead);
1147
canExtend = false;
1148
}
1149
AdvanceVerts(vertexType, count, bytesRead);
1150
totalVertCount += count;
1151
break;
1152
}
1153
case GE_CMD_VERTEXTYPE:
1154
{
1155
uint32_t diff = data ^ vertexType;
1156
// don't mask upper bits, vertexType is unmasked
1157
if (diff) {
1158
if (diff & vtypeCheckMask)
1159
goto bail;
1160
drawEngineCommon_->FlushSkin();
1161
canExtend = false; // TODO: Might support extending between some vertex types in the future.
1162
vertexType = data;
1163
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
1164
decoder = drawEngineCommon_->GetVertexDecoder(vertTypeID);
1165
}
1166
break;
1167
}
1168
case GE_CMD_VADDR:
1169
{
1170
gstate.cmdmem[GE_CMD_VADDR] = data;
1171
uint32_t newAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
1172
if (gstate_c.vertexAddr != newAddr) {
1173
canExtend = false;
1174
gstate_c.vertexAddr = newAddr;
1175
}
1176
break;
1177
}
1178
case GE_CMD_IADDR:
1179
gstate.cmdmem[GE_CMD_IADDR] = data;
1180
gstate_c.indexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
1181
break;
1182
case GE_CMD_OFFSETADDR:
1183
gstate.cmdmem[GE_CMD_OFFSETADDR] = data;
1184
gstate_c.offsetAddr = data << 8;
1185
break;
1186
case GE_CMD_BASE:
1187
gstate.cmdmem[GE_CMD_BASE] = data;
1188
break;
1189
case GE_CMD_CULLFACEENABLE:
1190
// Earth Defence Force 2
1191
if (gstate.cmdmem[GE_CMD_CULLFACEENABLE] != data) {
1192
goto bail;
1193
}
1194
break;
1195
case GE_CMD_CULL:
1196
// flip face by indices for triangles
1197
cullMode = data & 1;
1198
break;
1199
case GE_CMD_TEXFLUSH:
1200
case GE_CMD_NOP:
1201
case GE_CMD_NOP_FF:
1202
gstate.cmdmem[data >> 24] = data;
1203
break;
1204
case GE_CMD_BONEMATRIXNUMBER:
1205
gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data;
1206
break;
1207
case GE_CMD_TEXSCALEU:
1208
// We don't "dirty-check" - we could avoid getFloat24 and setting canExtend=false, but usually
1209
// when texscale commands are in line with the prims like this, they actually have an effect
1210
// and requires us to stop extending strips anyway.
1211
gstate.cmdmem[GE_CMD_TEXSCALEU] = data;
1212
gstate_c.uv.uScale = getFloat24(data);
1213
canExtend = false;
1214
break;
1215
case GE_CMD_TEXSCALEV:
1216
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
1217
gstate_c.uv.vScale = getFloat24(data);
1218
canExtend = false;
1219
break;
1220
case GE_CMD_TEXOFFSETU:
1221
gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
1222
gstate_c.uv.uOff = getFloat24(data);
1223
canExtend = false;
1224
break;
1225
case GE_CMD_TEXOFFSETV:
1226
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
1227
gstate_c.uv.vOff = getFloat24(data);
1228
canExtend = false;
1229
break;
1230
case GE_CMD_TEXLEVEL:
1231
// Same Gran Turismo hack from Execute_TexLevel
1232
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {
1233
goto bail;
1234
}
1235
gstate.cmdmem[GE_CMD_TEXLEVEL] = data;
1236
break;
1237
case GE_CMD_CALL:
1238
{
1239
// A bone matrix probably. If not we bail.
1240
const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC);
1241
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA &&
1242
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
1243
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
1244
(target > currentList->stall || target + 12 * 4 < currentList->stall) &&
1245
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
1246
drawEngineCommon_->FlushSkin();
1247
canExtend = false;
1248
FastLoadBoneMatrix(target);
1249
} else {
1250
goto bail;
1251
}
1252
break;
1253
}
1254
1255
case GE_CMD_TEXBUFWIDTH0:
1256
case GE_CMD_TEXADDR0:
1257
if (data != gstate.cmdmem[data >> 24])
1258
goto bail;
1259
break;
1260
1261
default:
1262
// All other commands might need a flush or something, stop this inner loop.
1263
goto bail;
1264
}
1265
src++;
1266
}
1267
1268
bail:
1269
drawEngineCommon_->FlushSkin();
1270
gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType;
1271
const int cmdCount = src - start;
1272
// Skip over the commands we just read out manually.
1273
if (cmdCount > 0) {
1274
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
1275
currentList->pc += cmdCount * 4;
1276
// flush back cull mode
1277
if (cullMode != gstate.getCullMode()) {
1278
// We rewrote everything to the old cull mode, so flush first.
1279
drawEngineCommon_->Flush();
1280
1281
// Now update things for next time.
1282
gstate.cmdmem[GE_CMD_CULL] ^= 1;
1283
gstate_c.Dirty(DIRTY_RASTER_STATE);
1284
}
1285
}
1286
1287
int cycles = vertexCost_ * totalVertCount;
1288
gpuStats.vertexGPUCycles += cycles;
1289
cyclesExecuted += cycles;
1290
}
1291
1292
void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
1293
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
1294
gstate_c.framebufFormat = gstate.FrameBufFormat();
1295
1296
// This also make skipping drawing very effective.
1297
bool changed;
1298
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
1299
if (changed) {
1300
drawEngineCommon_->FlushQueuedDepth();
1301
}
1302
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
1303
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
1304
return;
1305
}
1306
1307
CheckDepthUsage(vfb);
1308
1309
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
1310
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
1311
return;
1312
}
1313
1314
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1315
const void *indices = NULL;
1316
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
1317
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
1318
ERROR_LOG(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
1319
return;
1320
}
1321
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
1322
}
1323
1324
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
1325
DEBUG_LOG_REPORT(Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
1326
}
1327
1328
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
1329
if (flushOnParams_)
1330
drawEngineCommon_->Flush();
1331
1332
Spline::BezierSurface surface;
1333
surface.tess_u = gstate.getPatchDivisionU();
1334
surface.tess_v = gstate.getPatchDivisionV();
1335
surface.num_points_u = op & 0xFF;
1336
surface.num_points_v = (op >> 8) & 0xFF;
1337
surface.num_patches_u = (surface.num_points_u - 1) / 3;
1338
surface.num_patches_v = (surface.num_points_v - 1) / 3;
1339
surface.primType = gstate.getPatchPrimitiveType();
1340
surface.patchFacing = gstate.patchfacing & 1;
1341
1342
SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType));
1343
1344
// We need to dirty UVSCALEOFFSET here because we look at the submit type when setting that uniform.
1345
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1346
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
1347
gstate_c.submitType = SubmitType::HW_BEZIER;
1348
if (gstate_c.spline_num_points_u != surface.num_points_u) {
1349
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
1350
gstate_c.spline_num_points_u = surface.num_points_u;
1351
}
1352
} else {
1353
gstate_c.submitType = SubmitType::BEZIER;
1354
}
1355
1356
int bytesRead = 0;
1357
gstate_c.UpdateUVScaleOffset();
1358
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");
1359
1360
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1361
gstate_c.submitType = SubmitType::DRAW;
1362
1363
// After drawing, we advance pointers - see SubmitPrim which does the same.
1364
const int count = surface.num_points_u * surface.num_points_v;
1365
AdvanceVerts(gstate.vertType, count, bytesRead);
1366
}
1367
1368
void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
1369
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
1370
gstate_c.framebufFormat = gstate.FrameBufFormat();
1371
1372
// This also make skipping drawing very effective.
1373
bool changed;
1374
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
1375
if (changed) {
1376
drawEngineCommon_->FlushQueuedDepth();
1377
}
1378
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
1379
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
1380
return;
1381
}
1382
1383
CheckDepthUsage(vfb);
1384
1385
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
1386
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
1387
return;
1388
}
1389
1390
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1391
const void *indices = NULL;
1392
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
1393
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
1394
ERROR_LOG(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
1395
return;
1396
}
1397
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
1398
}
1399
1400
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
1401
WARN_LOG_ONCE(unusualcurve, Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
1402
}
1403
1404
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
1405
if (flushOnParams_)
1406
drawEngineCommon_->Flush();
1407
1408
Spline::SplineSurface surface;
1409
surface.tess_u = gstate.getPatchDivisionU();
1410
surface.tess_v = gstate.getPatchDivisionV();
1411
surface.type_u = (op >> 16) & 0x3;
1412
surface.type_v = (op >> 18) & 0x3;
1413
surface.num_points_u = op & 0xFF;
1414
surface.num_points_v = (op >> 8) & 0xFF;
1415
surface.num_patches_u = surface.num_points_u - 3;
1416
surface.num_patches_v = surface.num_points_v - 3;
1417
surface.primType = gstate.getPatchPrimitiveType();
1418
surface.patchFacing = gstate.patchfacing & 1;
1419
1420
SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType));
1421
1422
// We need to dirty UVSCALEOFFSET here because we look at the submit type when setting that uniform.
1423
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1424
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
1425
gstate_c.submitType = SubmitType::HW_SPLINE;
1426
if (gstate_c.spline_num_points_u != surface.num_points_u) {
1427
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
1428
gstate_c.spline_num_points_u = surface.num_points_u;
1429
}
1430
} else {
1431
gstate_c.submitType = SubmitType::SPLINE;
1432
}
1433
1434
int bytesRead = 0;
1435
gstate_c.UpdateUVScaleOffset();
1436
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");
1437
1438
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1439
gstate_c.submitType = SubmitType::DRAW;
1440
1441
// After drawing, we advance pointers - see SubmitPrim which does the same.
1442
int count = surface.num_points_u * surface.num_points_v;
1443
AdvanceVerts(gstate.vertType, count, bytesRead);
1444
}
1445
1446
void GPUCommonHW::Execute_BlockTransferStart(u32 op, u32 diff) {
1447
drawEngineCommon_->FlushQueuedDepth();
1448
Flush();
1449
1450
PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading.
1451
1452
gstate_c.framebufFormat = gstate.FrameBufFormat();
1453
1454
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
1455
// Can we skip this on SkipDraw?
1456
DoBlockTransfer(gstate_c.skipDrawReason);
1457
}
1458
1459
void GPUCommonHW::Execute_TexSize0(u32 op, u32 diff) {
1460
// Render to texture may have overridden the width/height.
1461
// Don't reset it unless the size is different / the texture has changed.
1462
if (diff || gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS)) {
1463
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
1464
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
1465
// We will need to reset the texture now.
1466
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1467
}
1468
}
1469
1470
void GPUCommonHW::Execute_TexLevel(u32 op, u32 diff) {
1471
// TODO: If you change the rules here, don't forget to update the inner interpreter in Execute_Prim.
1472
if (diff == 0xFFFFFFFF)
1473
return;
1474
1475
gstate.texlevel ^= diff;
1476
1477
if (diff & 0xFF0000) {
1478
// Piggyback on this flag for 3D textures.
1479
gstate_c.Dirty(DIRTY_MIPBIAS);
1480
}
1481
if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & gstate.texlevel) != 0) {
1482
Flush();
1483
}
1484
1485
gstate.texlevel ^= diff;
1486
1487
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE);
1488
}
1489
1490
void GPUCommonHW::Execute_LoadClut(u32 op, u32 diff) {
1491
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1492
textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes(), &recorder_);
1493
}
1494
1495
1496
void GPUCommonHW::Execute_WorldMtxNum(u32 op, u32 diff) {
1497
if (!currentList) {
1498
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);
1499
return;
1500
}
1501
1502
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
1503
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1504
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
1505
const int end = 12 - (op & 0xF);
1506
int i = 0;
1507
1508
// We must record the individual data commands while debugRecording_.
1509
bool fastLoad = !debugRecording_ && end > 0;
1510
// Stalling in the middle of a matrix would be stupid, I doubt this check is necessary.
1511
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1512
fastLoad = false;
1513
}
1514
1515
if (fastLoad) {
1516
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
1517
const u32 newVal = src[i] << 8;
1518
if (dst[i] != newVal) {
1519
Flush();
1520
dst[i] = newVal;
1521
gstate_c.Dirty(DIRTY_WORLDMATRIX);
1522
}
1523
if (++i >= end) {
1524
break;
1525
}
1526
}
1527
}
1528
1529
const int count = i;
1530
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op & 0xF) + count);
1531
1532
// Skip over the loaded data, it's done now.
1533
UpdatePC(currentList->pc, currentList->pc + count * 4);
1534
currentList->pc += count * 4;
1535
}
1536
1537
void GPUCommonHW::Execute_WorldMtxData(u32 op, u32 diff) {
1538
// Note: it's uncommon to get here now, see above.
1539
int num = gstate.worldmtxnum & 0x00FFFFFF;
1540
u32 newVal = op << 8;
1541
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
1542
Flush();
1543
((u32 *)gstate.worldMatrix)[num] = newVal;
1544
gstate_c.Dirty(DIRTY_WORLDMATRIX);
1545
}
1546
num++;
1547
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1548
gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;
1549
}
1550
1551
void GPUCommonHW::Execute_ViewMtxNum(u32 op, u32 diff) {
1552
if (!currentList) {
1553
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (op & 0xF);
1554
return;
1555
}
1556
1557
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
1558
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1559
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
1560
const int end = 12 - (op & 0xF);
1561
int i = 0;
1562
1563
bool fastLoad = !debugRecording_ && end > 0;
1564
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1565
fastLoad = false;
1566
}
1567
1568
if (fastLoad) {
1569
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
1570
const u32 newVal = src[i] << 8;
1571
if (dst[i] != newVal) {
1572
Flush();
1573
dst[i] = newVal;
1574
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
1575
}
1576
if (++i >= end) {
1577
break;
1578
}
1579
}
1580
}
1581
1582
const int count = i;
1583
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op & 0xF) + count);
1584
1585
// Skip over the loaded data, it's done now.
1586
UpdatePC(currentList->pc, currentList->pc + count * 4);
1587
currentList->pc += count * 4;
1588
}
1589
1590
void GPUCommonHW::Execute_ViewMtxData(u32 op, u32 diff) {
1591
// Note: it's uncommon to get here now, see above.
1592
int num = gstate.viewmtxnum & 0x00FFFFFF;
1593
u32 newVal = op << 8;
1594
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
1595
Flush();
1596
((u32 *)gstate.viewMatrix)[num] = newVal;
1597
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
1598
}
1599
num++;
1600
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1601
gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;
1602
}
1603
1604
void GPUCommonHW::Execute_ProjMtxNum(u32 op, u32 diff) {
1605
if (!currentList) {
1606
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (op & 0xF);
1607
return;
1608
}
1609
1610
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
1611
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1612
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
1613
const int end = 16 - (op & 0xF);
1614
int i = 0;
1615
1616
bool fastLoad = !debugRecording_;
1617
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1618
fastLoad = false;
1619
}
1620
1621
if (fastLoad) {
1622
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
1623
const u32 newVal = src[i] << 8;
1624
if (dst[i] != newVal) {
1625
Flush();
1626
dst[i] = newVal;
1627
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
1628
}
1629
if (++i >= end) {
1630
break;
1631
}
1632
}
1633
}
1634
1635
const int count = i;
1636
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op & 0xF) + count);
1637
1638
// Skip over the loaded data, it's done now.
1639
UpdatePC(currentList->pc, currentList->pc + count * 4);
1640
currentList->pc += count * 4;
1641
}
1642
1643
void GPUCommonHW::Execute_ProjMtxData(u32 op, u32 diff) {
1644
// Note: it's uncommon to get here now, see above.
1645
int num = gstate.projmtxnum & 0x00FFFFFF;
1646
u32 newVal = op << 8;
1647
if (num < 16 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
1648
Flush();
1649
((u32 *)gstate.projMatrix)[num] = newVal;
1650
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
1651
}
1652
num++;
1653
if (num <= 16)
1654
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1655
gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;
1656
}
1657
1658
void GPUCommonHW::Execute_TgenMtxNum(u32 op, u32 diff) {
1659
if (!currentList) {
1660
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (op & 0xF);
1661
return;
1662
}
1663
1664
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
1665
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1666
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
1667
const int end = 12 - (op & 0xF);
1668
int i = 0;
1669
1670
bool fastLoad = !debugRecording_ && end > 0;
1671
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1672
fastLoad = false;
1673
}
1674
1675
if (fastLoad) {
1676
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
1677
const u32 newVal = src[i] << 8;
1678
if (dst[i] != newVal) {
1679
Flush();
1680
dst[i] = newVal;
1681
// We check the matrix to see if we need projection.
1682
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE);
1683
}
1684
if (++i >= end) {
1685
break;
1686
}
1687
}
1688
}
1689
1690
const int count = i;
1691
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op & 0xF) + count);
1692
1693
// Skip over the loaded data, it's done now.
1694
UpdatePC(currentList->pc, currentList->pc + count * 4);
1695
currentList->pc += count * 4;
1696
}
1697
1698
void GPUCommonHW::Execute_TgenMtxData(u32 op, u32 diff) {
1699
// Note: it's uncommon to get here now, see above.
1700
int num = gstate.texmtxnum & 0x00FFFFFF;
1701
u32 newVal = op << 8;
1702
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
1703
Flush();
1704
((u32 *)gstate.tgenMatrix)[num] = newVal;
1705
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE); // We check the matrix to see if we need projection
1706
}
1707
num++;
1708
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1709
gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;
1710
}
1711
1712
void GPUCommonHW::Execute_BoneMtxNum(u32 op, u32 diff) {
1713
if (!currentList) {
1714
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (op & 0x7F);
1715
return;
1716
}
1717
1718
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
1719
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1720
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
1721
const int end = 12 * 8 - (op & 0x7F);
1722
int i = 0;
1723
1724
bool fastLoad = !debugRecording_ && end > 0;
1725
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1726
fastLoad = false;
1727
}
1728
1729
if (fastLoad) {
1730
// If we can't use software skinning, we have to flush and dirty.
1731
if (!g_Config.bSoftwareSkinning) {
1732
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
1733
const u32 newVal = src[i] << 8;
1734
if (dst[i] != newVal) {
1735
Flush();
1736
dst[i] = newVal;
1737
}
1738
if (++i >= end) {
1739
break;
1740
}
1741
}
1742
1743
const unsigned int numPlusCount = (op & 0x7F) + i;
1744
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
1745
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
1746
}
1747
} else {
1748
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
1749
dst[i] = src[i] << 8;
1750
if (++i >= end) {
1751
break;
1752
}
1753
}
1754
1755
const unsigned int numPlusCount = (op & 0x7F) + i;
1756
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
1757
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
1758
}
1759
}
1760
}
1761
1762
const int count = i;
1763
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op & 0x7F) + count);
1764
1765
// Skip over the loaded data, it's done now.
1766
UpdatePC(currentList->pc, currentList->pc + count * 4);
1767
currentList->pc += count * 4;
1768
}
1769
1770
void GPUCommonHW::Execute_BoneMtxData(u32 op, u32 diff) {
1771
// Note: it's uncommon to get here now, see above.
1772
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
1773
u32 newVal = op << 8;
1774
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
1775
// Bone matrices should NOT flush when software skinning is enabled!
1776
if (!g_Config.bSoftwareSkinning) {
1777
Flush();
1778
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
1779
} else {
1780
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
1781
}
1782
((u32 *)gstate.boneMatrix)[num] = newVal;
1783
}
1784
num++;
1785
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1786
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
1787
}
1788
1789
void GPUCommonHW::Execute_TexFlush(u32 op, u32 diff) {
1790
// Games call this when they need the effect of drawing to be visible to texturing.
1791
// And for a bunch of other reasons, but either way, this is what we need to do.
1792
// It's possible we could also use this as a hint for the texture cache somehow.
1793
framebufferManager_->DiscardFramebufferCopy();
1794
}
1795
1796
u32 GPUCommonHW::DrawSync(int mode) {
1797
drawEngineCommon_->FlushQueuedDepth();
1798
return GPUCommon::DrawSync(mode);
1799
}
1800
1801
int GPUCommonHW::ListSync(int listid, int mode) {
1802
drawEngineCommon_->FlushQueuedDepth();
1803
return GPUCommon::ListSync(listid, mode);
1804
}
1805
1806
size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
1807
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
1808
return snprintf(buffer, size,
1809
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
1810
"Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
1811
"Vertices: %d dec: %d drawn: %d\n"
1812
"FBOs active: %d (evaluations: %d, created %d)\n"
1813
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB, clut %d\n"
1814
"readbacks %d (%d non-block), upload %d (cached %d), depal %d\n"
1815
"block transfers: %d\n"
1816
"replacer: tracks %d references, %d unique textures\n"
1817
"Cpy: depth %d, color %d, reint %d, blend %d, self %d\n"
1818
"GPU cycles: %d (%0.1f per vertex)\n"
1819
"Z-rast: %0.2f+%0.2f+%0.2f (total %0.2f/%0.2f) ms\n"
1820
"Z-rast: %d prim, %d nopix, %d small, %d earlysize, %d zcull, %d box\n%s",
1821
gpuStats.msProcessingDisplayLists * 1000.0f,
1822
gpuStats.numDrawSyncs,
1823
gpuStats.numListSyncs,
1824
gpuStats.numDrawCalls,
1825
gpuStats.numVertexDecodes,
1826
gpuStats.numCulledDraws,
1827
gpuStats.numFlushes,
1828
gpuStats.numClears,
1829
gpuStats.numBBOXJumps,
1830
gpuStats.numPlaneUpdates,
1831
gpuStats.numVertsSubmitted,
1832
gpuStats.numVertsDecoded,
1833
gpuStats.numUncachedVertsDrawn,
1834
(int)framebufferManager_->NumVFBs(),
1835
gpuStats.numFramebufferEvaluations,
1836
gpuStats.numFBOsCreated,
1837
(int)textureCache_->NumLoadedTextures(),
1838
gpuStats.numTexturesDecoded,
1839
gpuStats.numTextureInvalidations,
1840
gpuStats.numTextureDataBytesHashed / 1024,
1841
gpuStats.numClutTextures,
1842
gpuStats.numBlockingReadbacks,
1843
gpuStats.numReadbacks,
1844
gpuStats.numUploads,
1845
gpuStats.numCachedUploads,
1846
gpuStats.numDepal,
1847
gpuStats.numBlockTransfers,
1848
gpuStats.numReplacerTrackedTex,
1849
gpuStats.numCachedReplacedTextures,
1850
gpuStats.numDepthCopies,
1851
gpuStats.numColorCopies,
1852
gpuStats.numReinterpretCopies,
1853
gpuStats.numCopiesForShaderBlend,
1854
gpuStats.numCopiesForSelfTex,
1855
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
1856
vertexAverageCycles,
1857
gpuStats.msPrepareDepth * 1000.0,
1858
gpuStats.msCullDepth * 1000.0,
1859
gpuStats.msRasterizeDepth * 1000.0,
1860
(gpuStats.msPrepareDepth + gpuStats.msCullDepth + gpuStats.msRasterizeDepth) * 1000.0,
1861
gpuStats.msRasterTimeAvailable * 1000.0,
1862
gpuStats.numDepthRasterPrims,
1863
gpuStats.numDepthRasterNoPixels,
1864
gpuStats.numDepthRasterTooSmall,
1865
gpuStats.numDepthRasterEarlySize,
1866
gpuStats.numDepthRasterZCulled,
1867
gpuStats.numDepthEarlyBoxCulled,
1868
debugRecording_ ? "(debug-recording)" : ""
1869
);
1870
}
1871
1872