CoCalc -- GPUState.cpp

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUState.cpp
³¹⁸⁵ views
1
// Copyright (c) 2012- PPSSPP Project.
2

3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6

7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
// GNU General Public License 2.0 for more details.
11

12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14

15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17

18
#include "ppsspp_config.h"
19
#include "Common/Common.h"
20
#include "Common/Math/SIMDHeaders.h"
21
#include "Common/Serialize/Serializer.h"
22
#include "Common/Serialize/SerializeFuncs.h"
23
#include "Core/MemMap.h"
24
#include "GPU/ge_constants.h"
25
#include "GPU/GPUCommon.h"
26
#include "GPU/GPUState.h"
27

28
// This must be aligned so that the matrices within are aligned.
29
alignas(16) GPUgstate gstate;
30
// Let's align this one too for good measure.
31
alignas(16) GPUStateCache gstate_c;
32

33
// For save state compatibility.
34
static int savedContextVersion = 1;
35

36
struct CmdRange {
37
	u8 start;
38
	u8 end;
39
};
40

41
static const CmdRange contextCmdRanges[] = {
42
	{0x00, 0x02},
43
	// Skip: {0x03, 0x0F},
44
	{0x10, 0x10},
45
	// Skip: {0x11, 0x11},
46
	{0x12, 0x28},
47
	// Skip: {0x29, 0x2B},
48
	{0x2c, 0x33},
49
	// Skip: {0x34, 0x35},
50
	{0x36, 0x38},
51
	// Skip: {0x39, 0x41},
52
	{0x42, 0x4D},
53
	// Skip: {0x4E, 0x4F},
54
	{0x50, 0x51},
55
	// Skip: {0x52, 0x52},
56
	{0x53, 0x58},
57
	// Skip: {0x59, 0x5A},
58
	{0x5B, 0xB5},
59
	// Skip: {0xB6, 0xB7},
60
	{0xB8, 0xC3},
61
	// Skip: {0xC4, 0xC4},
62
	{0xC5, 0xD0},
63
	// Skip: {0xD1, 0xD1}
64
	{0xD2, 0xE9},
65
	// Skip: {0xEA, 0xEA},
66
	{0xEB, 0xEC},
67
	// Skip: {0xED, 0xED},
68
	{0xEE, 0xEE},
69
	// Skip: {0xEF, 0xEF},
70
	{0xF0, 0xF6},
71
	// Skip: {0xF7, 0xF7},
72
	{0xF8, 0xF9},
73
	// Skip: {0xFA, 0xFF},
74
};
75

76
static u32_le *SaveMatrix(u32_le *cmds, GEMatrixType type, int sz, int numcmd, int datacmd) {
77
	if (!gpu)
78
		return cmds;
79

80
	*cmds++ = numcmd << 24;
81
	// This saves the CPU-visible values, not the actual used ones, which may differ.
82
	// Note that Restore overwrites both values.
83
	if (type == GE_MTX_BONE0) {
84
		for (int i = 0; i < 8; ++i)
85
			gpu->GetMatrix24(GEMatrixType(GE_MTX_BONE0 + i), cmds + i * 12, datacmd << 24);
86
	} else {
87
		gpu->GetMatrix24(type, cmds, datacmd << 24);
88
	}
89
	cmds += sz;
90

91
	return cmds;
92
}
93

94
static const u32_le *LoadMatrix(const u32_le *cmds, float *mtx, int sz) {
95
	// Skip the reset.
96
	cmds++;
97
	for (int i = 0; i < sz; ++i) {
98
		mtx[i] = getFloat24(*cmds++);
99
	}
100

101
	return cmds;
102
}
103

104
void GPUgstate::Reset() {
105
	memset(gstate.cmdmem, 0, sizeof(gstate.cmdmem));
106
	for (int i = 0; i < 256; i++) {
107
		gstate.cmdmem[i] = i << 24;
108
	}
109

110
	// Lighting is not enabled by default, matrices are zero initialized.
111
	memset(gstate.worldMatrix, 0, sizeof(gstate.worldMatrix));
112
	memset(gstate.viewMatrix, 0, sizeof(gstate.viewMatrix));
113
	memset(gstate.projMatrix, 0, sizeof(gstate.projMatrix));
114
	memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));
115
	memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));
116

117
	savedContextVersion = 1;
118

119
	gstate_c.Dirty(DIRTY_CULL_PLANES);
120
}
121

122
void GPUgstate::Save(u32_le *ptr) {
123
	// Not sure what the first 10 values are, exactly, but these seem right.
124
	ptr[5] = gstate_c.vertexAddr;
125
	ptr[6] = gstate_c.indexAddr;
126
	ptr[7] = gstate_c.offsetAddr;
127

128
	// Command values start 17 ints in.
129
	u32_le *cmds = ptr + 17;
130
	for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
131
		for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
132
			// We'll run ReapplyGfxState after this to process dirtying.
133
			*cmds++ = cmdmem[n];
134
		}
135
	}
136

137
	if (savedContextVersion == 0) {
138
		if (Memory::IsValidAddress(getClutAddress()))
139
			*cmds++ = loadclut;
140

141
		// Seems like it actually writes commands to load the matrices and then reset the counts.
142
		*cmds++ = boneMatrixNumber;
143
		*cmds++ = worldmtxnum;
144
		*cmds++ = viewmtxnum;
145
		*cmds++ = projmtxnum;
146
		*cmds++ = texmtxnum;
147

148
		u8 *matrices = (u8 *)cmds;
149
		memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
150
		memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
151
		memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
152
		memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
153
		memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
154
	} else {
155
		cmds = SaveMatrix(cmds, GE_MTX_BONE0, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
156
		cmds = SaveMatrix(cmds, GE_MTX_WORLD, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
157
		cmds = SaveMatrix(cmds, GE_MTX_VIEW, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
158
		cmds = SaveMatrix(cmds, GE_MTX_PROJECTION, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
159
		cmds = SaveMatrix(cmds, GE_MTX_TEXGEN, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
160

161
		*cmds++ = boneMatrixNumber & 0xFF00007F;
162
		*cmds++ = worldmtxnum & 0xFF00000F;
163
		*cmds++ = viewmtxnum & 0xFF00000F;
164
		*cmds++ = projmtxnum & 0xFF00000F;
165
		*cmds++ = texmtxnum & 0xFF00000F;
166
		*cmds++ = GE_CMD_END << 24;
167
	}
168
}
169

170
void GPUgstate::FastLoadBoneMatrix(u32 addr) {
171
	const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(addr);
172
	u32 num = boneMatrixNumber;
173
	u32 *dst = (u32 *)(boneMatrix + (num & 0x7F));
174

175
#ifdef _M_SSE
176
	__m128i row1 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)src), 8);
177
	__m128i row2 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 4)), 8);
178
	__m128i row3 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 8)), 8);
179
	if ((num & 0x3) == 0) {
180
		_mm_store_si128((__m128i *)dst, row1);
181
		_mm_store_si128((__m128i *)(dst + 4), row2);
182
		_mm_store_si128((__m128i *)(dst + 8), row3);
183
	} else {
184
		_mm_storeu_si128((__m128i *)dst, row1);
185
		_mm_storeu_si128((__m128i *)(dst + 4), row2);
186
		_mm_storeu_si128((__m128i *)(dst + 8), row3);
187
	}
188
#elif PPSSPP_ARCH(ARM_NEON)
189
	const uint32x4_t row1 = vshlq_n_u32(vld1q_u32(src), 8);
190
	const uint32x4_t row2 = vshlq_n_u32(vld1q_u32(src + 4), 8);
191
	const uint32x4_t row3 = vshlq_n_u32(vld1q_u32(src + 8), 8);
192
	vst1q_u32(dst, row1);
193
	vst1q_u32(dst + 4, row2);
194
	vst1q_u32(dst + 8, row3);
195
#else
196
	for (int i = 0; i < 12; i++) {
197
		dst[i] = src[i] << 8;
198
	}
199
#endif
200

201
	num += 12;
202
	gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
203
}
204

205
void GPUgstate::Restore(const u32_le *ptr) {
206
	// Not sure what the first 10 values are, exactly, but these seem right.
207
	gstate_c.vertexAddr = ptr[5];
208
	gstate_c.indexAddr = ptr[6];
209
	gstate_c.offsetAddr = ptr[7];
210

211
	// Command values start 17 ints in.
212
	const u32_le *cmds = ptr + 17;
213
	for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
214
		for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
215
			cmdmem[n] = *cmds++;
216
		}
217
	}
218

219
	if (savedContextVersion == 0) {
220
		if (Memory::IsValidAddress(getClutAddress()))
221
			loadclut = *cmds++;
222
		boneMatrixNumber = *cmds++;
223
		worldmtxnum = *cmds++;
224
		viewmtxnum = *cmds++;
225
		projmtxnum = *cmds++;
226
		texmtxnum = *cmds++;
227

228
		u8 *matrices = (u8 *)cmds;
229
		memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
230
		memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
231
		memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
232
		memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
233
		memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
234
	} else {
235
		cmds = LoadMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix));
236
		cmds = LoadMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix));
237
		cmds = LoadMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix));
238
		cmds = LoadMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix));
239
		cmds = LoadMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix));
240

241
		boneMatrixNumber = (*cmds++) & 0xFF00007F;
242
		worldmtxnum = (*cmds++) & 0xFF00000F;
243
		viewmtxnum = (*cmds++) & 0xFF00000F;
244
		projmtxnum = (*cmds++) & 0xFF00000F;
245
		texmtxnum = (*cmds++) & 0xFF00000F;
246
	}
247

248
	if (gpu)
249
		gpu->ResetMatrices();
250

251
	gstate_c.Dirty(DIRTY_CULL_PLANES);
252
}
253

254
bool vertTypeIsSkinningEnabled(u32 vertType) {
255
	return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);
256
}
257

258
struct GPUStateCache_v0 {
259
	u32 vertexAddr;
260
	u32 indexAddr;
261

262
	u32 offsetAddr;
263

264
	bool textureChanged;
265
	bool textureFullAlpha;
266
	bool vertexFullAlpha;
267
	bool framebufChanged;
268

269
	int skipDrawReason;
270

271
	UVScale uv;
272
	bool flipTexture;
273
};
274

275
void GPUStateCache::Reset() {
276
	memset(&gstate_c, 0, sizeof(gstate_c));
277
}
278

279
void GPUStateCache::DoState(PointerWrap &p) {
280
	auto s = p.Section("GPUStateCache", 0, 5);
281
	if (!s) {
282
		// Old state, this was not versioned.
283
		GPUStateCache_v0 old;
284
		Do(p, old);
285

286
		vertexAddr = old.vertexAddr;
287
		indexAddr = old.indexAddr;
288
		offsetAddr = old.offsetAddr;
289
		gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
290
		textureFullAlpha = old.textureFullAlpha;
291
		vertexFullAlpha = old.vertexFullAlpha;
292
		skipDrawReason = old.skipDrawReason;
293
		uv = old.uv;
294

295
		savedContextVersion = 0;
296
	} else {
297
		Do(p, vertexAddr);
298
		Do(p, indexAddr);
299
		Do(p, offsetAddr);
300

301
		uint8_t textureChanged = 0;
302
		Do(p, textureChanged);  // legacy
303
		gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
304
		Do(p, textureFullAlpha);
305
		Do(p, vertexFullAlpha);
306
		bool framebufChanged = false;  // legacy
307
		Do(p, framebufChanged);
308

309
		Do(p, skipDrawReason);
310

311
		Do(p, uv);
312

313
		bool oldFlipTexture = false;
314
		Do(p, oldFlipTexture);  // legacy
315
	}
316

317
	// needShaderTexClamp and bgraTexture don't need to be saved.
318

319
	if (s >= 3) {
320
		bool oldTextureSimpleAlpha = false;
321
		Do(p, oldTextureSimpleAlpha);
322
	}
323

324
	if (s < 2) {
325
		float l12[12];
326
		float l4[4];
327
		Do(p, l12);  // lightpos
328
		Do(p, l12);  // lightdir
329
		Do(p, l12);  // lightattr
330
		Do(p, l12);  // lightcol0
331
		Do(p, l12);  // lightcol1
332
		Do(p, l12);  // lightcol2
333
		Do(p, l4);    // lightangle
334
		Do(p, l4);  // lightspot
335
	}
336

337
	Do(p, morphWeights);
338

339
	Do(p, curTextureWidth);
340
	Do(p, curTextureHeight);
341
	Do(p, actualTextureHeight);
342
	// curTextureXOffset and curTextureYOffset don't need to be saved.  Well, the above don't either...
343

344
	Do(p, vpWidth);
345
	Do(p, vpHeight);
346
	if (s == 4) {
347
		float oldDepth = 1.0f;
348
		Do(p, oldDepth);
349
	}
350

351
	Do(p, curRTWidth);
352
	Do(p, curRTHeight);
353

354
	// curRTBufferWidth, curRTBufferHeight, and cutRTOffsetX don't need to be saved.
355
	if (s < 5) {
356
		savedContextVersion = 0;
357
	} else {
358
		Do(p, savedContextVersion);
359
	}
360

361
	if (p.GetMode() == PointerWrap::MODE_READ)
362
		gstate_c.Dirty(DIRTY_CULL_PLANES);
363
}
364

365
static const char *const g_gpuUseFlagNames[32] = {
366
	"GPU_USE_DUALSOURCE_BLEND",
367
	"GPU_USE_LIGHT_UBERSHADER",
368
	"GPU_USE_FRAGMENT_TEST_CACHE",
369
	"GPU_USE_VS_RANGE_CULLING",
370
	"GPU_USE_BLEND_MINMAX",
371
	"GPU_USE_LOGIC_OP",
372
	"GPU_USE_FRAGMENT_UBERSHADER",
373
	"GPU_USE_TEXTURE_NPOT",
374
	"GPU_USE_ANISOTROPY",
375
	"GPU_USE_CLEAR_RAM_HACK",
376
	"GPU_USE_INSTANCE_RENDERING",
377
	"GPU_USE_VERTEX_TEXTURE_FETCH",
378
	"GPU_USE_TEXTURE_FLOAT",
379
	"GPU_USE_16BIT_FORMATS",
380
	"GPU_USE_DEPTH_CLAMP",
381
	"GPU_USE_TEXTURE_LOD_CONTROL",
382
	"GPU_USE_DEPTH_TEXTURE",
383
	"GPU_USE_ACCURATE_DEPTH",
384
	"GPU_USE_GS_CULLING",
385
	"N/A",
386
	"GPU_USE_FRAMEBUFFER_FETCH",
387
	"GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT",
388
	"GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT",
389
	"GPU_ROUND_DEPTH_TO_16BIT",
390
	"GPU_USE_CLIP_DISTANCE",
391
	"GPU_USE_CULL_DISTANCE",
392
	"N/A", // bit 26
393
	"N/A", // bit 27
394
	"N/A", // bit 28
395
	"GPU_USE_VIRTUAL_REALITY",
396
	"GPU_USE_SINGLE_PASS_STEREO",
397
	"GPU_USE_SIMPLE_STEREO_PERSPECTIVE",
398
};
399

400
const char *GpuUseFlagToString(int useFlag) {
401
	if ((u32)useFlag < 32) {
402
		return g_gpuUseFlagNames[useFlag];
403
	} else {
404
		return "N/A";
405
	}
406
}
407

408
bool GPUStateCache::SetUseFlags(const u32 newFlags) {
409
	if (newFlags != useFlags_) {
410
		if (useFlags_ != 0 && newFlags != 0) {
411
			INFO_LOG(Log::G3D, "Shader useflags changed from %08x to %08x:", useFlags_, newFlags);
412
			for (int i = 0; i < 32; i++) {
413
				const int mask = 1 << i;
414
				bool oldVal = (useFlags_ & mask) != 0;
415
				bool newVal = (newFlags & mask) != 0;
416
				if (oldVal != newVal) {
417
					INFO_LOG(Log::G3D, "%s changed from %d to %d", g_gpuUseFlagNames[i], (int)oldVal, (int)newVal);
418
				}
419
			}
420
			useFlagsChanged = true;
421
		}
422
		useFlags_ = newFlags;
423
	}
424
	return useFlagsChanged;
425
}
426

427
Product

Resources

Company