Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/Lighting.cpp
3187 views
1
// Copyright (c) 2013- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include <cmath>
20
#include "Common/Common.h"
21
#include "Common/CPUDetect.h"
22
#include "Common/Math/SIMDHeaders.h"
23
#include "GPU/GPUState.h"
24
#include "GPU/Software/Lighting.h"
25
26
#if PPSSPP_ARCH(SSE2)
27
// For the SSE4 stuff.
28
#include <smmintrin.h>
29
#endif
30
31
namespace Lighting {
32
33
static inline Vec3f GetLightVec(const u32 lparams[12], int light) {
34
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
35
__m128i values = _mm_loadu_si128((__m128i *)&lparams[3 * light]);
36
__m128i from24 = _mm_slli_epi32(values, 8);
37
return _mm_castsi128_ps(from24);
38
#elif PPSSPP_ARCH(ARM64_NEON)
39
uint32x4_t values = vld1q_u32((uint32_t *)&lparams[3 * light]);
40
uint32x4_t from24 = vshlq_n_u32(values, 8);
41
return vreinterpretq_f32_u32(from24);
42
#else
43
return Vec3<float>(getFloat24(lparams[3 * light]), getFloat24(lparams[3 * light + 1]), getFloat24(lparams[3 * light + 2]));
44
#endif
45
}
46
47
static inline float pspLightPow(float v, float e) {
48
if (e <= 0.0f) {
49
return 1.0f;
50
}
51
if (v > 0.0f) {
52
return pow(v, e);
53
}
54
// Negative stays negative, so let's just return the original.
55
return v;
56
}
57
58
static inline Vec4<int> LightColorFactor(const Vec4<int> &expanded, const Vec4<int> &ones) {
59
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
60
return _mm_add_epi32(_mm_slli_epi32(expanded.ivec, 1), ones.ivec);
61
#elif PPSSPP_ARCH(ARM64_NEON)
62
return vaddq_s32(vshlq_n_s32(expanded.ivec, 1), ones.ivec);
63
#else
64
return expanded * 2 + ones;
65
#endif
66
}
67
68
static inline Vec4<int> LightColorFactor(uint32_t c, const Vec4<int> &ones) {
69
return LightColorFactor(Vec4<int>::FromRGBA(c), ones);
70
}
71
72
static inline bool IsLargerThanHalf(const Vec4<int> &v) {
73
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
74
__m128i add23 = _mm_add_epi32(v.ivec, _mm_shuffle_epi32(v.ivec, _MM_SHUFFLE(3, 2, 3, 2)));
75
__m128i add1 = _mm_add_epi32(add23, _mm_shuffle_epi32(add23, _MM_SHUFFLE(1, 1, 1, 1)));
76
return _mm_cvtsi128_si32(add1) > 4;
77
#elif PPSSPP_ARCH(ARM64_NEON)
78
int32x2_t add02 = vpmax_s32(vget_low_s32(v.ivec), vget_high_s32(v.ivec));
79
int32x2_t add1 = vpmax_s32(add02, add02);
80
return vget_lane_s32(add1, 0) > 4;
81
#else
82
bool larger = false;
83
for (int i = 0; i < 3; ++i)
84
larger = v[i] > 1;
85
return larger;
86
#endif
87
}
88
89
void ComputeState(State *state, bool hasColor0) {
90
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
91
92
bool anyAmbient = false;
93
bool anyDiffuse = false;
94
bool anySpecular = false;
95
bool anyNonDirectional = false;
96
for (int light = 0; light < 4; ++light) {
97
auto &lstate = state->lights[light];
98
lstate.enabled = gstate.isLightChanEnabled(light);
99
if (!lstate.enabled)
100
continue;
101
102
lstate.poweredDiffuse = gstate.isUsingPoweredDiffuseLight(light);
103
lstate.specular = gstate.isUsingSpecularLight(light);
104
105
lstate.ambientColorFactor = LightColorFactor(gstate.getLightAmbientColor(light), ones);
106
lstate.ambient = IsLargerThanHalf(lstate.ambientColorFactor);
107
anyAmbient = anyAmbient || lstate.ambient;
108
109
lstate.diffuseColorFactor = LightColorFactor(gstate.getDiffuseColor(light), ones);
110
lstate.diffuse = IsLargerThanHalf(lstate.diffuseColorFactor);
111
anyDiffuse = anyDiffuse || lstate.diffuse;
112
113
if (lstate.specular) {
114
lstate.specularColorFactor = LightColorFactor(gstate.getSpecularColor(light), ones);
115
lstate.specular = IsLargerThanHalf(lstate.specularColorFactor);
116
anySpecular = anySpecular || lstate.specular;
117
}
118
119
// Doesn't actually need to be on if nothing will affect it.
120
if (!lstate.specular && !lstate.ambient && !lstate.diffuse) {
121
lstate.enabled = false;
122
continue;
123
}
124
125
lstate.pos = GetLightVec(gstate.lpos, light);
126
lstate.directional = gstate.isDirectionalLight(light);
127
if (lstate.directional) {
128
lstate.pos.NormalizeOr001();
129
} else {
130
lstate.att = GetLightVec(gstate.latt, light);
131
anyNonDirectional = true;
132
}
133
134
lstate.spot = gstate.isSpotLight(light);
135
if (lstate.spot) {
136
lstate.spotDir = GetLightVec(gstate.ldir, light);
137
lstate.spotDir.Normalize();
138
lstate.spotCutoff = getFloat24(gstate.lcutoff[light]);
139
if (std::isnan(lstate.spotCutoff) && std::signbit(lstate.spotCutoff))
140
lstate.spotCutoff = 0.0f;
141
142
lstate.spotExp = getFloat24(gstate.lconv[light]);
143
if (lstate.spotExp <= 0.0f)
144
lstate.spotExp = 0.0f;
145
else if (std::isnan(lstate.spotExp))
146
lstate.spotExp = std::signbit(lstate.spotExp) ? 0.0f : INFINITY;
147
}
148
}
149
150
const int materialupdate = gstate.materialupdate & (hasColor0 ? 7 : 0);
151
state->colorForAmbient = (materialupdate & 1) != 0;
152
state->colorForDiffuse = (materialupdate & 2) != 0;
153
state->colorForSpecular = (materialupdate & 4) != 0;
154
155
if (!state->colorForAmbient) {
156
state->material.ambientColorFactor = LightColorFactor(gstate.getMaterialAmbientRGBA(), ones);
157
if (!IsLargerThanHalf(state->material.ambientColorFactor) && anyAmbient) {
158
for (int i = 0; i < 4; ++i)
159
state->lights[i].ambient = false;
160
}
161
}
162
163
if (anyDiffuse && !state->colorForDiffuse) {
164
state->material.diffuseColorFactor = LightColorFactor(gstate.getMaterialDiffuse(), ones);
165
if (!IsLargerThanHalf(state->material.diffuseColorFactor)) {
166
anyDiffuse = false;
167
for (int i = 0; i < 4; ++i)
168
state->lights[i].diffuse = false;
169
}
170
}
171
172
if (anySpecular && !state->colorForSpecular) {
173
state->material.specularColorFactor = LightColorFactor(gstate.getMaterialSpecular(), ones);
174
if (!IsLargerThanHalf(state->material.specularColorFactor)) {
175
anySpecular = false;
176
for (int i = 0; i < 4; ++i)
177
state->lights[i].specular = false;
178
}
179
}
180
181
if (anyDiffuse || anySpecular) {
182
state->specularExp = gstate.getMaterialSpecularCoef();
183
if (state->specularExp <= 0.0f)
184
state->specularExp = 0.0f;
185
else if (std::isnan(state->specularExp))
186
state->specularExp = std::signbit(state->specularExp) ? 0.0f : INFINITY;
187
}
188
189
state->baseAmbientColorFactor = LightColorFactor(gstate.getAmbientRGBA(), ones);
190
state->setColor1 = gstate.isUsingSecondaryColor() && anySpecular;
191
state->addColor1 = !gstate.isUsingSecondaryColor() && anySpecular;
192
state->usesWorldPos = anyNonDirectional;
193
state->usesWorldNormal = gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP || anyDiffuse || anySpecular;
194
}
195
196
static inline float GenerateLightCoord(VertexData &vertex, const WorldCoords &worldnormal, int light) {
197
// TODO: Should specular lighting should affect this, too? Doesn't in GLES.
198
Vec3<float> L = GetLightVec(gstate.lpos, light);
199
// In other words, L.Length2() == 0.0f means Dot({0, 0, 1}, worldnormal).
200
float diffuse_factor = Dot(L.NormalizedOr001(cpu_info.bSSE4_1), worldnormal);
201
202
return (diffuse_factor + 1.0f) / 2.0f;
203
}
204
205
void GenerateLightST(VertexData &vertex, const WorldCoords &worldnormal) {
206
// Always calculate texture coords from lighting results if environment mapping is active
207
// This should be done even if lighting is disabled altogether.
208
vertex.texturecoords.s() = GenerateLightCoord(vertex, worldnormal, gstate.getUVLS0());
209
vertex.texturecoords.t() = GenerateLightCoord(vertex, worldnormal, gstate.getUVLS1());
210
}
211
212
#if defined(_M_SSE)
213
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
214
[[gnu::target("sse4.1")]]
215
#endif
216
static inline int LightCeilSSE4(float f) {
217
__m128 v = _mm_set_ss(f);
218
// This isn't terribly fast, but seems to be better than calling ceilf().
219
return _mm_cvt_ss2si(_mm_ceil_ss(v, v));
220
}
221
222
#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
223
[[gnu::target("sse4.1")]]
224
#endif
225
static inline __m128i LightColorScaleBy512SSE4(__m128i factor, __m128i color, __m128i scale) {
226
// We can use 16-bit multiply here (faster than 32-bit multiply) since our top bits are zero.
227
__m128i result18 = _mm_madd_epi16(factor, color);
228
// But now with 18 bits, we need a full multiply.
229
__m128i multiplied = _mm_mullo_epi32(result18, scale);
230
return _mm_srai_epi32(multiplied, 10 + 9);
231
}
232
#endif
233
234
template <bool useSSE4>
235
static inline int LightCeil(float f) {
236
#if defined(_M_SSE)
237
if (useSSE4)
238
return LightCeilSSE4(f);
239
#elif PPSSPP_ARCH(ARM64_NEON)
240
return vcvtps_s32_f32(f);
241
#endif
242
return (int)ceilf(f);
243
}
244
245
template <bool useSSE4>
246
static Vec4<int> LightColorScaleBy512(const Vec4<int> &factor, const Vec4<int> &color, int scale) {
247
// We multiply s9 * s9 * s9, resulting in s27, then shift off 19 to get 8-bit.
248
// The reason all factors are s9 is to account for rounding.
249
// Also note that all values are positive, so can be treated as unsigned.
250
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
251
if (useSSE4)
252
return LightColorScaleBy512SSE4(factor.ivec, color.ivec, _mm_set1_epi32(scale));
253
#elif PPSSPP_ARCH(ARM64_NEON)
254
int32x4_t multiplied = vmulq_n_s32(vmulq_s32(factor.ivec, color.ivec), scale);
255
return vshrq_n_s32(multiplied, 10 + 9);
256
#endif
257
return (factor * color * scale) >> (10 + 9);
258
}
259
260
static inline void LightColorSum(Vec4<int> &sum, const Vec4<int> &src) {
261
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
262
sum.ivec = _mm_add_epi32(sum.ivec, src.ivec);
263
#elif PPSSPP_ARCH(ARM64_NEON)
264
sum.ivec = vaddq_s32(sum.ivec, src.ivec);
265
#else
266
sum += src;
267
#endif
268
}
269
270
static inline float Dot33(const Vec3f &a, const Vec3f &b) {
271
#if defined(_M_SSE)
272
__m128 v = _mm_mul_ps(SAFE_M128(a.vec), SAFE_M128(b.vec)); // [X, Y, Z, W]
273
__m128 shuf = _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 2, 0, 1)); // [Y, X, Z, W]
274
__m128 sums = _mm_add_ps(v, shuf); // [X + Y, X + Y, Z + Z, W + W]
275
shuf = _mm_movehl_ps(shuf, shuf); // [Z, W, Z, W]
276
return _mm_cvtss_f32(_mm_add_ss(sums, shuf)); // X + Y + Z
277
#elif PPSSPP_ARCH(ARM64_NEON)
278
float32x4_t multipled = vsetq_lane_f32(0.0f, vmulq_f32(a.vec, b.vec), 3);
279
float32x2_t add1 = vget_low_f32(vpaddq_f32(multipled, multipled));
280
float32x2_t add2 = vpadd_f32(add1, add1);
281
return vget_lane_f32(add2, 0);
282
#else
283
return Dot(a, b);
284
#endif
285
}
286
287
template <bool useSSE4>
288
static void ProcessSIMD(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, const State &state) {
289
// Lighting blending rounds using the half offset method (like alpha blend.)
290
Vec4<int> colorFactor;
291
if (state.colorForAmbient || state.colorForDiffuse || state.colorForSpecular) {
292
const Vec4<int> ones = Vec4<int>::AssignToAll(1);
293
colorFactor = LightColorFactor(vertex.color0, ones);
294
}
295
296
Vec4<int> mec = Vec4<int>::FromRGBA(gstate.getMaterialEmissive());
297
298
Vec4<int> mac = state.colorForAmbient ? colorFactor : state.material.ambientColorFactor;
299
Vec4<int> ambient = (mac * state.baseAmbientColorFactor) >> 10;
300
301
Vec4<int> final_color = mec + ambient;
302
Vec4<int> specular_color = Vec4<int>::AssignToAll(0);
303
304
for (unsigned int light = 0; light < 4; ++light) {
305
const auto &lstate = state.lights[light];
306
if (!lstate.enabled)
307
continue;
308
309
// L = vector from vertex to light source
310
// TODO: Should transfer the light positions to world/view space for these calculations?
311
Vec3<float> L = lstate.pos;
312
float attspot = 1.0f;
313
if (!lstate.directional) {
314
L -= worldpos;
315
// TODO: Should this normalize (0, 0, 0) to (0, 0, 1)?
316
float d = L.NormalizeOr001();
317
318
float att = 1.0f / Dot33(lstate.att, Vec3f(1.0f, d, d * d));
319
if (!(att > 0.0f))
320
att = 0.0f;
321
else if (att > 1.0f)
322
att = 1.0f;
323
attspot = att;
324
}
325
326
if (lstate.spot) {
327
float rawSpot = Dot33(lstate.spotDir, L);
328
if (std::isnan(rawSpot))
329
rawSpot = std::signbit(rawSpot) ? 0.0f : 1.0f;
330
331
float spot = 1.0f;
332
if (rawSpot >= lstate.spotCutoff) {
333
spot = pspLightPow(rawSpot, lstate.spotExp);
334
if (std::isnan(spot))
335
spot = 0.0f;
336
} else {
337
spot = 0.0f;
338
}
339
340
attspot *= spot;
341
}
342
343
// ambient lighting
344
if (lstate.ambient) {
345
int attspot512 = (int)LightCeil<useSSE4>(256 * 2 * attspot + 1);
346
if (attspot512 > 512)
347
attspot512 = 512;
348
Vec4<int> lambient = LightColorScaleBy512<useSSE4>(lstate.ambientColorFactor, mac, attspot512);
349
LightColorSum(final_color, lambient);
350
}
351
352
// diffuse lighting
353
float diffuse_factor;
354
if (lstate.diffuse || lstate.specular) {
355
diffuse_factor = Dot33(L, worldnormal);
356
if (lstate.poweredDiffuse) {
357
diffuse_factor = pspLightPow(diffuse_factor, state.specularExp);
358
}
359
}
360
361
if (lstate.diffuse && diffuse_factor > 0.0f) {
362
int diffuse_attspot = (int)LightCeil<useSSE4>(256 * 2 * attspot * diffuse_factor + 1);
363
if (diffuse_attspot > 512)
364
diffuse_attspot = 512;
365
Vec4<int> mdc = state.colorForDiffuse ? colorFactor : state.material.diffuseColorFactor;
366
Vec4<int> ldiffuse = LightColorScaleBy512<useSSE4>(lstate.diffuseColorFactor, mdc, diffuse_attspot);
367
LightColorSum(final_color, ldiffuse);
368
}
369
370
if (lstate.specular && diffuse_factor >= 0.0f) {
371
Vec3<float> H = L + Vec3<float>(0.f, 0.f, 1.f);
372
373
float specular_factor = Dot33(H.NormalizedOr001(useSSE4), worldnormal);
374
specular_factor = pspLightPow(specular_factor, state.specularExp);
375
376
if (specular_factor > 0.0f) {
377
int specular_attspot = (int)LightCeil<useSSE4>(256 * 2 * attspot * specular_factor + 1);
378
if (specular_attspot > 512)
379
specular_attspot = 512;
380
381
Vec4<int> msc = state.colorForSpecular ? colorFactor : state.material.specularColorFactor;
382
Vec4<int> lspecular = LightColorScaleBy512<useSSE4>(lstate.specularColorFactor, msc, specular_attspot);
383
LightColorSum(specular_color, lspecular);
384
}
385
}
386
}
387
388
// Note: these are all naturally clamped by ToRGBA/toRGB.
389
if (state.setColor1) {
390
vertex.color0 = final_color.ToRGBA();
391
vertex.color1 = specular_color.rgb().ToRGB();
392
} else if (state.addColor1) {
393
vertex.color0 = (final_color + specular_color).ToRGBA();
394
} else {
395
vertex.color0 = final_color.ToRGBA();
396
}
397
}
398
399
void Process(VertexData &vertex, const WorldCoords &worldpos, const WorldCoords &worldnormal, const State &state) {
400
#ifdef _M_SSE
401
if (cpu_info.bSSE4_1) {
402
ProcessSIMD<true>(vertex, worldpos, worldnormal, state);
403
return;
404
}
405
#endif
406
ProcessSIMD<false>(vertex, worldpos, worldnormal, state);
407
}
408
409
} // namespace
410
411