#pragma once
#include "ppsspp_config.h"
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>
#include "Common/Common.h"
#include "Common/Math/SIMDHeaders.h"
#if PPSSPP_ARCH(ARM64_NEON)
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#endif
#if PPSSPP_ARCH(ARM)
#include "Common/ArmEmitter.h"
#elif PPSSPP_ARCH(ARM64_NEON)
#include "Common/Arm64Emitter.h"
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include "Common/x64Emitter.h"
#elif PPSSPP_ARCH(MIPS)
#include "Common/MipsEmitter.h"
#elif PPSSPP_ARCH(RISCV64)
#include "Common/RiscVEmitter.h"
#elif PPSSPP_ARCH(LOONGARCH64)
#include "Common/LoongArch64Emitter.h"
#else
#include "Common/FakeEmitter.h"
#endif
#include "GPU/Math3D.h"
namespace Rasterizer {
#if PPSSPP_ARCH(ARM)
typedef ArmGen::ARMXCodeBlock BaseCodeBlock;
#elif PPSSPP_ARCH(ARM64_NEON)
typedef Arm64Gen::ARM64CodeBlock BaseCodeBlock;
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef Gen::XCodeBlock BaseCodeBlock;
#elif PPSSPP_ARCH(MIPS)
typedef MIPSGen::MIPSCodeBlock BaseCodeBlock;
#elif PPSSPP_ARCH(RISCV64)
typedef RiscVGen::RiscVCodeBlock BaseCodeBlock;
#elif PPSSPP_ARCH(LOONGARCH64)
typedef LoongArch64Gen::LoongArch64CodeBlock BaseCodeBlock;
#else
typedef FakeGen::FakeXCodeBlock BaseCodeBlock;
#endif
#if PPSSPP_ARCH(ARM64_NEON)
typedef int32x4_t Vec4IntArg;
typedef int32x4_t Vec4IntResult;
typedef float32x4_t Vec4FloatArg;
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return vld1q_f32(a.AsArray()); }
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef __m128i Vec4IntArg;
typedef __m128i Vec4IntResult;
typedef __m128 Vec4FloatArg;
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a.ivec; }
static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a.ivec; }
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a.vec; }
#else
typedef const Math3D::Vec4<int> &Vec4IntArg;
typedef Math3D::Vec4<int> Vec4IntResult;
typedef const Math3D::Vec4<float> &Vec4FloatArg;
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a; }
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a; }
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a; }
#endif
#if PPSSPP_ARCH(AMD64) && PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))
#define SOFTRAST_CALL __vectorcall
#else
#define SOFTRAST_CALL
#endif
struct RegCache {
enum Purpose {
FLAG_GEN = 0x0100,
FLAG_TEMP = 0x1000,
VEC_ZERO = 0x0000,
VEC_RESULT = 0x0001,
VEC_RESULT1 = 0x0002,
VEC_U1 = 0x0003,
VEC_V1 = 0x0004,
VEC_INDEX = 0x0005,
VEC_INDEX1 = 0x0006,
GEN_SRC_ALPHA = 0x0100,
GEN_ID = 0x0101,
GEN_STENCIL = 0x0103,
GEN_COLOR_OFF = 0x0104,
GEN_DEPTH_OFF = 0x0105,
GEN_RESULT = 0x0106,
GEN_SHIFTVAL = 0x0107,
GEN_ARG_X = 0x0180,
GEN_ARG_Y = 0x0181,
GEN_ARG_Z = 0x0182,
GEN_ARG_FOG = 0x0183,
GEN_ARG_ID = 0x0184,
GEN_ARG_U = 0x0185,
GEN_ARG_V = 0x0186,
GEN_ARG_TEXPTR = 0x0187,
GEN_ARG_BUFW = 0x0188,
GEN_ARG_LEVEL = 0x0189,
GEN_ARG_TEXPTR_PTR = 0x018A,
GEN_ARG_BUFW_PTR = 0x018B,
GEN_ARG_LEVELFRAC = 0x018C,
VEC_ARG_COLOR = 0x0080,
VEC_ARG_MASK = 0x0081,
VEC_ARG_U = 0x0082,
VEC_ARG_V = 0x0083,
VEC_ARG_S = 0x0084,
VEC_ARG_T = 0x0085,
VEC_FRAC = 0x0086,
VEC_TEMP0 = 0x1000,
VEC_TEMP1 = 0x1001,
VEC_TEMP2 = 0x1002,
VEC_TEMP3 = 0x1003,
VEC_TEMP4 = 0x1004,
VEC_TEMP5 = 0x1005,
GEN_TEMP0 = 0x1100,
GEN_TEMP1 = 0x1101,
GEN_TEMP2 = 0x1102,
GEN_TEMP3 = 0x1103,
GEN_TEMP4 = 0x1104,
GEN_TEMP5 = 0x1105,
GEN_TEMP_HELPER = 0x1106,
VEC_INVALID = 0xFEFF,
GEN_INVALID = 0xFFFF,
};
#if PPSSPP_ARCH(ARM)
typedef ArmGen::ARMReg Reg;
static constexpr Reg REG_INVALID_VALUE = ArmGen::INVALID_REG;
#elif PPSSPP_ARCH(ARM64_NEON)
typedef Arm64Gen::ARM64Reg Reg;
static constexpr Reg REG_INVALID_VALUE = Arm64Gen::INVALID_REG;
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
typedef Gen::X64Reg Reg;
static constexpr Reg REG_INVALID_VALUE = Gen::INVALID_REG;
#elif PPSSPP_ARCH(MIPS)
typedef MIPSGen::MIPSReg Reg;
static constexpr Reg REG_INVALID_VALUE = MIPSGen::INVALID_REG;
#elif PPSSPP_ARCH(RISCV64)
typedef RiscVGen::RiscVReg Reg;
static constexpr Reg REG_INVALID_VALUE = RiscVGen::INVALID_REG;
#elif PPSSPP_ARCH(LOONGARCH64)
typedef LoongArch64Gen::LoongArch64Reg Reg;
static constexpr Reg REG_INVALID_VALUE = LoongArch64Gen::INVALID_REG;
#else
typedef int Reg;
static constexpr Reg REG_INVALID_VALUE = -1;
#endif
struct RegStatus {
Reg reg;
Purpose purpose;
uint8_t locked = 0;
bool forceRetained = false;
bool everLocked = false;
};
void SetupABI(const std::vector<Purpose> &args, bool forceRetain = true);
void Reset(bool validate);
void Add(Reg r, Purpose p);
void Change(Purpose history, Purpose destiny);
void Release(Reg &r, Purpose p);
void Unlock(Reg &r, Purpose p);
bool Has(Purpose p);
Reg Find(Purpose p);
Reg Alloc(Purpose p);
void ForceRetain(Purpose p);
void ForceRelease(Purpose p);
void GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose);
bool ChangeReg(Reg r, Purpose p);
bool UsedReg(Reg r, Purpose flag);
private:
RegStatus *FindReg(Reg r, Purpose p);
std::vector<RegStatus> regs;
};
class CodeBlock : public BaseCodeBlock {
public:
virtual std::string DescribeCodePtr(const u8 *ptr);
virtual void Clear();
protected:
CodeBlock(int size);
RegCache::Reg GetZeroVec();
void Describe(const std::string &message);
int WriteProlog(int extraStack, const std::vector<RegCache::Reg> &vec, const std::vector<RegCache::Reg> &gen);
const u8 *WriteFinalizedEpilog();
void WriteSimpleConst16x8(const u8 *&ptr, uint8_t value);
void WriteSimpleConst8x16(const u8 *&ptr, uint16_t value);
void WriteSimpleConst4x32(const u8 *&ptr, uint32_t value);
void WriteDynamicConst16x8(const u8 *&ptr, uint8_t value);
void WriteDynamicConst8x16(const u8 *&ptr, uint16_t value);
void WriteDynamicConst4x32(const u8 *&ptr, uint32_t value);
#if PPSSPP_ARCH(ARM64_NEON)
Arm64Gen::ARM64FloatEmitter fp;
#endif
std::unordered_map<const u8 *, std::string> descriptions_;
Rasterizer::RegCache regCache_;
private:
u8 *lastPrologStart_ = nullptr;
u8 *lastPrologEnd_ = nullptr;
int savedStack_;
int firstVecStack_;
std::vector<RegCache::Reg> prologVec_;
std::vector<RegCache::Reg> prologGen_;
};
};