Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/IndexGenerator.cpp
3187 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <cstring>
19
20
#include "ppsspp_config.h"
21
22
#include "Common/Math/SIMDHeaders.h"
23
#include "GPU/Common/IndexGenerator.h"
24
25
// Points don't need indexing...
26
const u8 IndexGenerator::indexedPrimitiveType[7] = {
27
GE_PRIM_POINTS,
28
GE_PRIM_LINES,
29
GE_PRIM_LINES,
30
GE_PRIM_TRIANGLES,
31
GE_PRIM_TRIANGLES,
32
GE_PRIM_TRIANGLES,
33
GE_PRIM_RECTANGLES,
34
};
35
36
void IndexGenerator::AddPrim(int prim, int vertexCount, int indexOffset, bool clockwise) {
37
switch (prim) {
38
case GE_PRIM_POINTS: AddPoints(vertexCount, indexOffset); break;
39
case GE_PRIM_LINES: AddLineList(vertexCount, indexOffset); break;
40
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount, indexOffset); break;
41
case GE_PRIM_TRIANGLES: AddList(vertexCount, indexOffset, clockwise); break;
42
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, indexOffset, clockwise); break;
43
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, indexOffset, clockwise); break;
44
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount, indexOffset); break; // Same
45
}
46
}
47
48
void IndexGenerator::AddPoints(int numVerts, int indexOffset) {
49
u16 *outInds = inds_;
50
for (int i = 0; i < numVerts; i++)
51
*outInds++ = indexOffset + i;
52
inds_ = outInds;
53
}
54
55
void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {
56
u16 *outInds = inds_;
57
const int v1 = clockwise ? 1 : 2;
58
const int v2 = clockwise ? 2 : 1;
59
for (int i = 0; i < numVerts; i += 3) {
60
*outInds++ = indexOffset + i;
61
*outInds++ = indexOffset + i + v1;
62
*outInds++ = indexOffset + i + v2;
63
}
64
inds_ = outInds;
65
}
66
67
alignas(16) static const u16 offsets_clockwise[24] = {
68
0, (u16)(0 + 1), (u16)(0 + 2),
69
(u16)(1 + 1), 1, (u16)(1 + 2),
70
2, (u16)(2 + 1), (u16)(2 + 2),
71
(u16)(3 + 1), 3, (u16)(3 + 2),
72
4, (u16)(4 + 1), (u16)(4 + 2),
73
(u16)(5 + 1), 5, (u16)(5 + 2),
74
6, (u16)(6 + 1), (u16)(6 + 2),
75
(u16)(7 + 1), 7, (u16)(7 + 2),
76
};
77
78
alignas(16) static const uint16_t offsets_counter_clockwise[24] = {
79
0, (u16)(0 + 2), (u16)(0 + 1),
80
1, (u16)(1 + 1), (u16)(1 + 2),
81
2, (u16)(2 + 2), (u16)(2 + 1),
82
3, (u16)(3 + 1), (u16)(3 + 2),
83
4, (u16)(4 + 2), (u16)(4 + 1),
84
5, (u16)(5 + 1), (u16)(5 + 2),
85
6, (u16)(6 + 2), (u16)(6 + 1),
86
7, (u16)(7 + 1), (u16)(7 + 2),
87
};
88
89
void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
90
int numTris = numVerts - 2;
91
if (numTris <= 0) {
92
return;
93
}
94
#ifdef _M_SSE
95
// In an SSE2 register we can fit 8 16-bit integers.
96
// However, we need to output a multiple of 3 indices.
97
// The first such multiple is 24, which means we'll generate 24 indices per cycle,
98
// which corresponds to 8 triangles. That's pretty cool.
99
100
// We allow ourselves to write some extra indices to avoid the fallback loop.
101
// That's alright as we're appending to a buffer - they will get overwritten anyway.
102
__m128i ibase8 = _mm_set1_epi16(indexOffset);
103
const __m128i *offsets = (const __m128i *)(clockwise ? offsets_clockwise : offsets_counter_clockwise);
104
__m128i *dst = (__m128i *)inds_;
105
__m128i offsets0 = _mm_add_epi16(ibase8, _mm_load_si128(offsets));
106
// A single store is always enough for two triangles, which is a very common case.
107
_mm_storeu_si128(dst, offsets0);
108
if (numTris > 2) {
109
__m128i offsets1 = _mm_add_epi16(ibase8, _mm_load_si128(offsets + 1));
110
_mm_storeu_si128(dst + 1, offsets1);
111
if (numTris > 5) {
112
__m128i offsets2 = _mm_add_epi16(ibase8, _mm_load_si128(offsets + 2));
113
_mm_storeu_si128(dst + 2, offsets2);
114
__m128i increment = _mm_set1_epi16(8);
115
int numChunks = (numTris + 7) >> 3;
116
for (int i = 1; i < numChunks; i++) {
117
dst += 3;
118
offsets0 = _mm_add_epi16(offsets0, increment);
119
offsets1 = _mm_add_epi16(offsets1, increment);
120
offsets2 = _mm_add_epi16(offsets2, increment);
121
_mm_storeu_si128(dst, offsets0);
122
_mm_storeu_si128(dst + 1, offsets1);
123
_mm_storeu_si128(dst + 2, offsets2);
124
}
125
}
126
}
127
inds_ += numTris * 3;
128
// wind doesn't need to be updated, an even number of triangles have been drawn.
129
#elif PPSSPP_ARCH(ARM_NEON)
130
uint16x8_t ibase8 = vdupq_n_u16(indexOffset);
131
const u16 *offsets = clockwise ? offsets_clockwise : offsets_counter_clockwise;
132
u16 *dst = inds_;
133
uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets));
134
vst1q_u16(dst, offsets0);
135
if (numTris > 2) {
136
uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8));
137
vst1q_u16(dst + 8, offsets1);
138
if (numTris > 5) {
139
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
140
vst1q_u16(dst + 16, offsets2);
141
uint16x8_t increment = vdupq_n_u16(8);
142
int numChunks = (numTris + 7) >> 3;
143
for (int i = 1; i < numChunks; i++) {
144
dst += 3 * 8;
145
offsets0 = vaddq_u16(offsets0, increment);
146
offsets1 = vaddq_u16(offsets1, increment);
147
offsets2 = vaddq_u16(offsets2, increment);
148
vst1q_u16(dst, offsets0);
149
vst1q_u16(dst + 8, offsets1);
150
vst1q_u16(dst + 16, offsets2);
151
}
152
}
153
}
154
inds_ += numTris * 3;
155
#else
156
// Slow fallback loop.
157
int wind = clockwise ? 1 : 2;
158
int ibase = indexOffset;
159
size_t numPairs = numTris / 2;
160
u16 *outInds = inds_;
161
while (numPairs > 0) {
162
*outInds++ = ibase;
163
*outInds++ = ibase + wind;
164
*outInds++ = ibase + (wind ^ 3);
165
*outInds++ = ibase + 1;
166
*outInds++ = ibase + 1 + (wind ^ 3);
167
*outInds++ = ibase + 1 + wind;
168
ibase += 2;
169
numPairs--;
170
}
171
if (numTris & 1) {
172
*outInds++ = ibase;
173
*outInds++ = ibase + wind;
174
wind ^= 3; // toggle between 1 and 2
175
*outInds++ = ibase + wind;
176
}
177
inds_ = outInds;
178
#endif
179
}
180
181
// God of War uses this for text. Otherwise rare, not much reason to optimize.
182
void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) {
183
const int numTris = numVerts - 2;
184
u16 *outInds = inds_;
185
const int v1 = clockwise ? 1 : 2;
186
const int v2 = clockwise ? 2 : 1;
187
for (int i = 0; i < numTris; i++) {
188
*outInds++ = indexOffset;
189
*outInds++ = indexOffset + i + v1;
190
*outInds++ = indexOffset + i + v2;
191
}
192
inds_ = outInds;
193
}
194
195
//Lines
196
void IndexGenerator::AddLineList(int numVerts, int indexOffset) {
197
u16 *outInds = inds_;
198
numVerts &= ~1;
199
for (int i = 0; i < numVerts; i += 2) {
200
*outInds++ = indexOffset + i;
201
*outInds++ = indexOffset + i + 1;
202
}
203
inds_ = outInds;
204
}
205
206
void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) {
207
const int numLines = numVerts - 1;
208
u16 *outInds = inds_;
209
for (int i = 0; i < numLines; i++) {
210
*outInds++ = indexOffset + i;
211
*outInds++ = indexOffset + i + 1;
212
}
213
inds_ = outInds;
214
}
215
216
void IndexGenerator::AddRectangles(int numVerts, int indexOffset) {
217
u16 *outInds = inds_;
218
//rectangles always need 2 vertices, disregard the last one if there's an odd number
219
numVerts = numVerts & ~1;
220
for (int i = 0; i < numVerts; i += 2) {
221
*outInds++ = indexOffset + i;
222
*outInds++ = indexOffset + i + 1;
223
}
224
inds_ = outInds;
225
}
226
227
template <class ITypeLE>
228
void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) {
229
u16 *outInds = inds_;
230
for (int i = 0; i < numInds; i++)
231
*outInds++ = indexOffset + inds[i];
232
inds_ = outInds;
233
}
234
235
template <class ITypeLE>
236
void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) {
237
u16 *outInds = inds_;
238
numInds = numInds & ~1;
239
for (int i = 0; i < numInds; i += 2) {
240
*outInds++ = indexOffset + inds[i];
241
*outInds++ = indexOffset + inds[i + 1];
242
}
243
inds_ = outInds;
244
}
245
246
template <class ITypeLE>
247
void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) {
248
int numLines = numInds - 1;
249
u16 *outInds = inds_;
250
for (int i = 0; i < numLines; i++) {
251
*outInds++ = indexOffset + inds[i];
252
*outInds++ = indexOffset + inds[i + 1];
253
}
254
inds_ = outInds;
255
}
256
257
template <class ITypeLE>
258
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
259
// We only bother doing this minor optimization in triangle list, since it's by far the most
260
// common operation that can benefit.
261
if (sizeof(ITypeLE) == sizeof(inds_[0]) && indexOffset == 0 && clockwise) {
262
memcpy(inds_, inds, numInds * sizeof(ITypeLE));
263
inds_ += numInds;
264
} else {
265
u16 *outInds = inds_;
266
int numTris = numInds / 3; // Round to whole triangles
267
numInds = numTris * 3;
268
const int v1 = clockwise ? 1 : 2;
269
const int v2 = clockwise ? 2 : 1;
270
// TODO: This can actually be SIMD-d, although will need complex shuffles if clockwise.
271
for (int i = 0; i < numInds; i += 3) {
272
*outInds++ = indexOffset + inds[i];
273
*outInds++ = indexOffset + inds[i + v1];
274
*outInds++ = indexOffset + inds[i + v2];
275
}
276
inds_ = outInds;
277
}
278
}
279
280
template <class ITypeLE>
281
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
282
int wind = clockwise ? 1 : 2;
283
int numTris = numInds - 2;
284
u16 *outInds = inds_;
285
for (int i = 0; i < numTris; i++) {
286
*outInds++ = indexOffset + inds[i];
287
*outInds++ = indexOffset + inds[i + wind];
288
wind ^= 3; // Toggle between 1 and 2
289
*outInds++ = indexOffset + inds[i + wind];
290
}
291
inds_ = outInds;
292
}
293
294
template <class ITypeLE>
295
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
296
if (numInds <= 0) return;
297
int numTris = numInds - 2;
298
u16 *outInds = inds_;
299
const int v1 = clockwise ? 1 : 2;
300
const int v2 = clockwise ? 2 : 1;
301
for (int i = 0; i < numTris; i++) {
302
*outInds++ = indexOffset + inds[0];
303
*outInds++ = indexOffset + inds[i + v1];
304
*outInds++ = indexOffset + inds[i + v2];
305
}
306
inds_ = outInds;
307
}
308
309
template <class ITypeLE>
310
inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) {
311
u16 *outInds = inds_;
312
//rectangles always need 2 vertices, disregard the last one if there's an odd number
313
numInds = numInds & ~1;
314
for (int i = 0; i < numInds; i += 2) {
315
*outInds++ = indexOffset + inds[i];
316
*outInds++ = indexOffset + inds[i+1];
317
}
318
inds_ = outInds;
319
}
320
321
// Could template this too, but would have to define in header.
322
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) {
323
switch (prim) {
324
case GE_PRIM_POINTS: TranslatePoints<u8>(numInds, inds, indexOffset); break;
325
case GE_PRIM_LINES: TranslateLineList<u8>(numInds, inds, indexOffset); break;
326
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8>(numInds, inds, indexOffset); break;
327
case GE_PRIM_TRIANGLES: TranslateList<u8>(numInds, inds, indexOffset, clockwise); break;
328
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8>(numInds, inds, indexOffset, clockwise); break;
329
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8>(numInds, inds, indexOffset, clockwise); break;
330
case GE_PRIM_RECTANGLES: TranslateRectangles<u8>(numInds, inds, indexOffset); break; // Same
331
}
332
}
333
334
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) {
335
switch (prim) {
336
case GE_PRIM_POINTS: TranslatePoints<u16_le>(numInds, inds, indexOffset); break;
337
case GE_PRIM_LINES: TranslateLineList<u16_le>(numInds, inds, indexOffset); break;
338
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le>(numInds, inds, indexOffset); break;
339
case GE_PRIM_TRIANGLES: TranslateList<u16_le>(numInds, inds, indexOffset, clockwise); break;
340
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le>(numInds, inds, indexOffset, clockwise); break;
341
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le>(numInds, inds, indexOffset, clockwise); break;
342
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le>(numInds, inds, indexOffset); break; // Same
343
}
344
}
345
346
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) {
347
switch (prim) {
348
case GE_PRIM_POINTS: TranslatePoints<u32_le>(numInds, inds, indexOffset); break;
349
case GE_PRIM_LINES: TranslateLineList<u32_le>(numInds, inds, indexOffset); break;
350
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le>(numInds, inds, indexOffset); break;
351
case GE_PRIM_TRIANGLES: TranslateList<u32_le>(numInds, inds, indexOffset, clockwise); break;
352
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le>(numInds, inds, indexOffset, clockwise); break;
353
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le>(numInds, inds, indexOffset, clockwise); break;
354
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le>(numInds, inds, indexOffset); break; // Same
355
}
356
}
357
358