#include <cstring>
#include "ppsspp_config.h"
#include "Common/Math/SIMDHeaders.h"
#include "GPU/Common/IndexGenerator.h"
const u8 IndexGenerator::indexedPrimitiveType[7] = {
GE_PRIM_POINTS,
GE_PRIM_LINES,
GE_PRIM_LINES,
GE_PRIM_TRIANGLES,
GE_PRIM_TRIANGLES,
GE_PRIM_TRIANGLES,
GE_PRIM_RECTANGLES,
};
void IndexGenerator::AddPrim(int prim, int vertexCount, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: AddPoints(vertexCount, indexOffset); break;
case GE_PRIM_LINES: AddLineList(vertexCount, indexOffset); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount, indexOffset); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount, indexOffset); break;
}
}
void IndexGenerator::AddPoints(int numVerts, int indexOffset) {
u16 *outInds = inds_;
for (int i = 0; i < numVerts; i++)
*outInds++ = indexOffset + i;
inds_ = outInds;
}
void IndexGenerator::AddList(int numVerts, int indexOffset, bool clockwise) {
u16 *outInds = inds_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numVerts; i += 3) {
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + v1;
*outInds++ = indexOffset + i + v2;
}
inds_ = outInds;
}
alignas(16) static const u16 offsets_clockwise[24] = {
0, (u16)(0 + 1), (u16)(0 + 2),
(u16)(1 + 1), 1, (u16)(1 + 2),
2, (u16)(2 + 1), (u16)(2 + 2),
(u16)(3 + 1), 3, (u16)(3 + 2),
4, (u16)(4 + 1), (u16)(4 + 2),
(u16)(5 + 1), 5, (u16)(5 + 2),
6, (u16)(6 + 1), (u16)(6 + 2),
(u16)(7 + 1), 7, (u16)(7 + 2),
};
alignas(16) static const uint16_t offsets_counter_clockwise[24] = {
0, (u16)(0 + 2), (u16)(0 + 1),
1, (u16)(1 + 1), (u16)(1 + 2),
2, (u16)(2 + 2), (u16)(2 + 1),
3, (u16)(3 + 1), (u16)(3 + 2),
4, (u16)(4 + 2), (u16)(4 + 1),
5, (u16)(5 + 1), (u16)(5 + 2),
6, (u16)(6 + 2), (u16)(6 + 1),
7, (u16)(7 + 1), (u16)(7 + 2),
};
void IndexGenerator::AddStrip(int numVerts, int indexOffset, bool clockwise) {
int numTris = numVerts - 2;
if (numTris <= 0) {
return;
}
#ifdef _M_SSE
__m128i ibase8 = _mm_set1_epi16(indexOffset);
const __m128i *offsets = (const __m128i *)(clockwise ? offsets_clockwise : offsets_counter_clockwise);
__m128i *dst = (__m128i *)inds_;
__m128i offsets0 = _mm_add_epi16(ibase8, _mm_load_si128(offsets));
_mm_storeu_si128(dst, offsets0);
if (numTris > 2) {
__m128i offsets1 = _mm_add_epi16(ibase8, _mm_load_si128(offsets + 1));
_mm_storeu_si128(dst + 1, offsets1);
if (numTris > 5) {
__m128i offsets2 = _mm_add_epi16(ibase8, _mm_load_si128(offsets + 2));
_mm_storeu_si128(dst + 2, offsets2);
__m128i increment = _mm_set1_epi16(8);
int numChunks = (numTris + 7) >> 3;
for (int i = 1; i < numChunks; i++) {
dst += 3;
offsets0 = _mm_add_epi16(offsets0, increment);
offsets1 = _mm_add_epi16(offsets1, increment);
offsets2 = _mm_add_epi16(offsets2, increment);
_mm_storeu_si128(dst, offsets0);
_mm_storeu_si128(dst + 1, offsets1);
_mm_storeu_si128(dst + 2, offsets2);
}
}
}
inds_ += numTris * 3;
#elif PPSSPP_ARCH(ARM_NEON)
uint16x8_t ibase8 = vdupq_n_u16(indexOffset);
const u16 *offsets = clockwise ? offsets_clockwise : offsets_counter_clockwise;
u16 *dst = inds_;
uint16x8_t offsets0 = vaddq_u16(ibase8, vld1q_u16(offsets));
vst1q_u16(dst, offsets0);
if (numTris > 2) {
uint16x8_t offsets1 = vaddq_u16(ibase8, vld1q_u16(offsets + 8));
vst1q_u16(dst + 8, offsets1);
if (numTris > 5) {
uint16x8_t offsets2 = vaddq_u16(ibase8, vld1q_u16(offsets + 16));
vst1q_u16(dst + 16, offsets2);
uint16x8_t increment = vdupq_n_u16(8);
int numChunks = (numTris + 7) >> 3;
for (int i = 1; i < numChunks; i++) {
dst += 3 * 8;
offsets0 = vaddq_u16(offsets0, increment);
offsets1 = vaddq_u16(offsets1, increment);
offsets2 = vaddq_u16(offsets2, increment);
vst1q_u16(dst, offsets0);
vst1q_u16(dst + 8, offsets1);
vst1q_u16(dst + 16, offsets2);
}
}
}
inds_ += numTris * 3;
#else
int wind = clockwise ? 1 : 2;
int ibase = indexOffset;
size_t numPairs = numTris / 2;
u16 *outInds = inds_;
while (numPairs > 0) {
*outInds++ = ibase;
*outInds++ = ibase + wind;
*outInds++ = ibase + (wind ^ 3);
*outInds++ = ibase + 1;
*outInds++ = ibase + 1 + (wind ^ 3);
*outInds++ = ibase + 1 + wind;
ibase += 2;
numPairs--;
}
if (numTris & 1) {
*outInds++ = ibase;
*outInds++ = ibase + wind;
wind ^= 3;
*outInds++ = ibase + wind;
}
inds_ = outInds;
#endif
}
void IndexGenerator::AddFan(int numVerts, int indexOffset, bool clockwise) {
const int numTris = numVerts - 2;
u16 *outInds = inds_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset;
*outInds++ = indexOffset + i + v1;
*outInds++ = indexOffset + i + v2;
}
inds_ = outInds;
}
void IndexGenerator::AddLineList(int numVerts, int indexOffset) {
u16 *outInds = inds_;
numVerts &= ~1;
for (int i = 0; i < numVerts; i += 2) {
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
}
void IndexGenerator::AddLineStrip(int numVerts, int indexOffset) {
const int numLines = numVerts - 1;
u16 *outInds = inds_;
for (int i = 0; i < numLines; i++) {
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
}
void IndexGenerator::AddRectangles(int numVerts, int indexOffset) {
u16 *outInds = inds_;
numVerts = numVerts & ~1;
for (int i = 0; i < numVerts; i += 2) {
*outInds++ = indexOffset + i;
*outInds++ = indexOffset + i + 1;
}
inds_ = outInds;
}
template <class ITypeLE>
void IndexGenerator::TranslatePoints(int numInds, const ITypeLE *inds, int indexOffset) {
u16 *outInds = inds_;
for (int i = 0; i < numInds; i++)
*outInds++ = indexOffset + inds[i];
inds_ = outInds;
}
template <class ITypeLE>
void IndexGenerator::TranslateLineList(int numInds, const ITypeLE *inds, int indexOffset) {
u16 *outInds = inds_;
numInds = numInds & ~1;
for (int i = 0; i < numInds; i += 2) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + 1];
}
inds_ = outInds;
}
template <class ITypeLE>
void IndexGenerator::TranslateLineStrip(int numInds, const ITypeLE *inds, int indexOffset) {
int numLines = numInds - 1;
u16 *outInds = inds_;
for (int i = 0; i < numLines; i++) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + 1];
}
inds_ = outInds;
}
template <class ITypeLE>
void IndexGenerator::TranslateList(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
if (sizeof(ITypeLE) == sizeof(inds_[0]) && indexOffset == 0 && clockwise) {
memcpy(inds_, inds, numInds * sizeof(ITypeLE));
inds_ += numInds;
} else {
u16 *outInds = inds_;
int numTris = numInds / 3;
numInds = numTris * 3;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numInds; i += 3) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
}
}
template <class ITypeLE>
void IndexGenerator::TranslateStrip(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
int wind = clockwise ? 1 : 2;
int numTris = numInds - 2;
u16 *outInds = inds_;
for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i + wind];
wind ^= 3;
*outInds++ = indexOffset + inds[i + wind];
}
inds_ = outInds;
}
template <class ITypeLE>
void IndexGenerator::TranslateFan(int numInds, const ITypeLE *inds, int indexOffset, bool clockwise) {
if (numInds <= 0) return;
int numTris = numInds - 2;
u16 *outInds = inds_;
const int v1 = clockwise ? 1 : 2;
const int v2 = clockwise ? 2 : 1;
for (int i = 0; i < numTris; i++) {
*outInds++ = indexOffset + inds[0];
*outInds++ = indexOffset + inds[i + v1];
*outInds++ = indexOffset + inds[i + v2];
}
inds_ = outInds;
}
template <class ITypeLE>
inline void IndexGenerator::TranslateRectangles(int numInds, const ITypeLE *inds, int indexOffset) {
u16 *outInds = inds_;
numInds = numInds & ~1;
for (int i = 0; i < numInds; i += 2) {
*outInds++ = indexOffset + inds[i];
*outInds++ = indexOffset + inds[i+1];
}
inds_ = outInds;
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u8>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u8>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u8>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u8>(numInds, inds, indexOffset); break;
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u16_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u16_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u16_le>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u16_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u16_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u16_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u16_le>(numInds, inds, indexOffset); break;
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u32_le *inds, int indexOffset, bool clockwise) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints<u32_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList<u32_le>(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip<u32_le>(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList<u32_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip<u32_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan<u32_le>(numInds, inds, indexOffset, clockwise); break;
case GE_PRIM_RECTANGLES: TranslateRectangles<u32_le>(numInds, inds, indexOffset); break;
}
}