#include "ppsspp_config.h"
#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__))
#ifndef offsetof
#include <cstddef>
#endif
#include "Core/MIPS/ARM64/Arm64IRJit.h"
#include "Core/MIPS/ARM64/Arm64IRRegCache.h"
#define CONDITIONAL_DISABLE {}
#define DISABLE { CompIR_Generic(inst); return; }
#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }
namespace MIPSComp {
using namespace Arm64Gen;
using namespace Arm64IRJitConstants;
void Arm64JitBackend::CompIR_FArith(IRInst inst) {
CONDITIONAL_DISABLE;
switch (inst.op) {
case IROp::FAdd:
regs_.Map(inst);
fp_.FADD(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;
case IROp::FSub:
regs_.Map(inst);
fp_.FSUB(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;
case IROp::FMul:
regs_.Map(inst);
fp_.FMUL(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;
case IROp::FDiv:
regs_.Map(inst);
fp_.FDIV(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;
case IROp::FSqrt:
regs_.Map(inst);
fp_.FSQRT(regs_.F(inst.dest), regs_.F(inst.src1));
break;
case IROp::FNeg:
regs_.Map(inst);
fp_.FNEG(regs_.F(inst.dest), regs_.F(inst.src1));
break;
default:
INVALIDOP;
break;
}
}
void Arm64JitBackend::CompIR_FAssign(IRInst inst) {
CONDITIONAL_DISABLE;
switch (inst.op) {
case IROp::FMov:
if (inst.dest != inst.src1) {
regs_.Map(inst);
fp_.FMOV(regs_.F(inst.dest), regs_.F(inst.src1));
}
break;
case IROp::FAbs:
regs_.Map(inst);
fp_.FABS(regs_.F(inst.dest), regs_.F(inst.src1));
break;
case IROp::FSign:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1));
fp_.MOVI2FDUP(EncodeRegToDouble(SCRATCHF1), 1.0f);
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
if (inst.dest != inst.src1)
fp_.FMOV(regs_.FD(inst.dest), regs_.FD(inst.src1));
fp_.BIT(regs_.FD(inst.dest), EncodeRegToDouble(SCRATCHF1), EncodeRegToDouble(SCRATCHF2));
fp_.MOVI2FDUP(EncodeRegToDouble(SCRATCHF1), 0.0f);
fp_.FCSEL(regs_.F(inst.dest), SCRATCHF1, regs_.F(inst.dest), CC_EQ);
break;
default:
INVALIDOP;
break;
}
}
void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
CONDITIONAL_DISABLE;
constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
switch (inst.op) {
case IROp::FCmp:
switch (inst.dest) {
case IRFpCompareMode::False:
regs_.SetGPRImm(IRREG_FPCOND, 0);
break;
case IRFpCompareMode::EitherUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_VS);
break;
case IRFpCompareMode::EqualOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_EQ);
break;
case IRFpCompareMode::EqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_EQ);
CSINC(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND), WZR, CC_VC);
break;
case IRFpCompareMode::LessEqualOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LS);
break;
case IRFpCompareMode::LessEqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LE);
break;
case IRFpCompareMode::LessOrdered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LO);
break;
case IRFpCompareMode::LessUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(regs_.R(IRREG_FPCOND), CC_LT);
break;
default:
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
}
break;
case IROp::FCmovVfpuCC:
regs_.MapWithExtra(inst, { { 'G', IRREG_VFPU_CC, 1, MIPSMap::INIT } });
TSTI2R(regs_.R(IRREG_VFPU_CC), 1ULL << (inst.src2 & 0xF));
if ((inst.src2 >> 7) & 1) {
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), regs_.F(inst.src1), CC_EQ);
} else {
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), regs_.F(inst.src1), CC_NEQ);
}
break;
case IROp::FCmpVfpuBit:
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
switch (VCondition(inst.dest & 0xF)) {
case VC_EQ:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(SCRATCH1, CC_EQ);
break;
case VC_NE:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(SCRATCH1, CC_NEQ);
break;
case VC_LT:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(SCRATCH1, CC_LO);
break;
case VC_LE:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(SCRATCH1, CC_LS);
break;
case VC_GT:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(SCRATCH1, CC_GT);
break;
case VC_GE:
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
CSET(SCRATCH1, CC_GE);
break;
case VC_EZ:
regs_.MapFPR(inst.src1);
fp_.FCMP(regs_.F(inst.src1));
CSET(SCRATCH1, CC_EQ);
break;
case VC_NZ:
regs_.MapFPR(inst.src1);
fp_.FCMP(regs_.F(inst.src1));
CSET(SCRATCH1, CC_NEQ);
break;
case VC_EN:
regs_.MapFPR(inst.src1);
fp_.FCMP(regs_.F(inst.src1));
CSET(SCRATCH1, CC_VS);
break;
case VC_NN:
regs_.MapFPR(inst.src1);
fp_.FCMP(regs_.F(inst.src1));
CSET(SCRATCH1, CC_VC);
break;
case VC_EI:
regs_.MapFPR(inst.src1);
MOVI2R(SCRATCH1, 0x7F800000);
fp_.FMOV(SCRATCHF2, SCRATCH1);
fp_.FABS(SCRATCHF1, regs_.F(inst.src1));
fp_.FCMP(SCRATCHF1, SCRATCHF2);
CSET(SCRATCH1, CC_GE);
break;
case VC_NI:
regs_.MapFPR(inst.src1);
MOVI2R(SCRATCH1, 0x7F800000);
fp_.FMOV(SCRATCHF2, SCRATCH1);
fp_.FABS(SCRATCHF1, regs_.F(inst.src1));
fp_.FCMP(SCRATCHF1, SCRATCHF2);
CSET(SCRATCH1, CC_LT);
break;
case VC_ES:
regs_.MapFPR(inst.src1);
MOVI2R(SCRATCH1, 0x7F800000);
fp_.FMOV(SCRATCHF2, SCRATCH1);
fp_.FABS(SCRATCHF1, regs_.F(inst.src1));
fp_.FCMP(SCRATCHF1, SCRATCHF2);
CSET(SCRATCH1, CC_HS);
break;
case VC_NS:
regs_.MapFPR(inst.src1);
MOVI2R(SCRATCH1, 0x7F800000);
fp_.FMOV(SCRATCHF2, SCRATCH1);
fp_.FABS(SCRATCHF1, regs_.F(inst.src1));
fp_.FCMP(SCRATCHF1, SCRATCHF2);
CSET(SCRATCH1, CC_LO);
break;
case VC_TR:
MOVI2R(SCRATCH1, 1);
break;
case VC_FL:
MOVI2R(SCRATCH1, 0);
break;
}
BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, inst.dest >> 4, 1);
break;
case IROp::FCmpVfpuAggregate:
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
if (inst.dest == 1) {
BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 4, 1);
BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 5, 1);
} else {
MOVI2R(SCRATCH1, inst.dest);
TST(regs_.R(IRREG_VFPU_CC), SCRATCH1);
CSET(SCRATCH2, CC_NEQ);
BICS(WZR, SCRATCH1, regs_.R(IRREG_VFPU_CC));
CSET(SCRATCH1, CC_EQ);
BFI(regs_.R(IRREG_VFPU_CC), SCRATCH2, 4, 1);
BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, 5, 1);
}
break;
default:
INVALIDOP;
break;
}
}
void Arm64JitBackend::CompIR_FCondAssign(IRInst inst) {
CONDITIONAL_DISABLE;
regs_.Map(inst);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src2));
FixupBranch unordered = B(CC_VS);
switch (inst.op) {
case IROp::FMin:
fp_.FMIN(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;
case IROp::FMax:
fp_.FMAX(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
break;
default:
INVALIDOP;
break;
}
FixupBranch orderedDone = B();
SetJumpTarget(unordered);
fp_.AND(EncodeRegToDouble(SCRATCHF1), regs_.FD(inst.src1), regs_.FD(inst.src2));
fp_.CMLT(32, EncodeRegToDouble(SCRATCHF1), EncodeRegToDouble(SCRATCHF1));
switch (inst.op) {
case IROp::FMin:
fp_.SMAX(32, EncodeRegToDouble(SCRATCHF2), regs_.FD(inst.src1), regs_.FD(inst.src2));
fp_.SMIN(32, regs_.FD(inst.dest), regs_.FD(inst.src1), regs_.FD(inst.src2));
break;
case IROp::FMax:
fp_.SMIN(32, EncodeRegToDouble(SCRATCHF2), regs_.FD(inst.src1), regs_.FD(inst.src2));
fp_.SMAX(32, regs_.FD(inst.dest), regs_.FD(inst.src1), regs_.FD(inst.src2));
break;
default:
INVALIDOP;
break;
}
fp_.BIT(regs_.FD(inst.dest), EncodeRegToDouble(SCRATCHF2), EncodeRegToDouble(SCRATCHF1));
SetJumpTarget(orderedDone);
}
void Arm64JitBackend::CompIR_FCvt(IRInst inst) {
CONDITIONAL_DISABLE;
switch (inst.op) {
case IROp::FCvtWS:
regs_.Map(inst);
fp_.FMOV(S0, regs_.F(inst.src1));
MOVP2R(SCRATCH1_64, ¤tRoundingFunc_);
LDR(INDEX_UNSIGNED, SCRATCH1_64, SCRATCH1_64, 0);
BLR(SCRATCH1_64);
fp_.FMOV(regs_.F(inst.dest), S0);
break;
case IROp::FCvtSW:
regs_.Map(inst);
fp_.SCVTF(regs_.F(inst.dest), regs_.F(inst.src1));
break;
case IROp::FCvtScaledWS:
if (IRRoundMode(inst.src2 >> 6) == IRRoundMode::CAST_1) {
regs_.Map(inst);
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
fp_.FCVTZS(regs_.F(inst.dest), regs_.F(inst.src1), inst.src2 & 0x1F);
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, CC_VC);
} else {
RoundingMode rm;
switch (IRRoundMode(inst.src2 >> 6)) {
case IRRoundMode::RINT_0: rm = RoundingMode::ROUND_N; break;
case IRRoundMode::CEIL_2: rm = RoundingMode::ROUND_P; break;
case IRRoundMode::FLOOR_3: rm = RoundingMode::ROUND_M; break;
default:
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
return;
}
regs_.Map(inst);
fp_.MOVI2F(SCRATCHF1, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF2), 0x80, 24);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
fp_.FMUL(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.dest), rm);
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, CC_VC);
}
break;
case IROp::FCvtScaledSW:
regs_.Map(inst);
fp_.SCVTF(regs_.F(inst.dest), regs_.F(inst.src1), inst.src2 & 0x1F);
break;
default:
INVALIDOP;
break;
}
}
void Arm64JitBackend::CompIR_FRound(IRInst inst) {
CONDITIONAL_DISABLE;
regs_.Map(inst);
fp_.MVNI(32, EncodeRegToDouble(SCRATCHF1), 0x80, 24);
fp_.FCMP(regs_.F(inst.src1), regs_.F(inst.src1));
switch (inst.op) {
case IROp::FRound:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_N);
break;
case IROp::FTrunc:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_Z);
break;
case IROp::FCeil:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_P);
break;
case IROp::FFloor:
fp_.FCVTS(regs_.F(inst.dest), regs_.F(inst.src1), ROUND_M);
break;
default:
INVALIDOP;
break;
}
fp_.FCSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF1, CC_VC);
}
void Arm64JitBackend::CompIR_FSat(IRInst inst) {
CONDITIONAL_DISABLE;
switch (inst.op) {
case IROp::FSat0_1:
regs_.Map(inst);
fp_.MOVI2F(SCRATCHF1, 1.0f);
fp_.MOVI2F(SCRATCHF2, 0.0f);
fp_.FMIN(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
fp_.FMAX(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2);
break;
case IROp::FSatMinus1_1:
regs_.Map(inst);
fp_.MOVI2F(SCRATCHF1, 1.0f);
fp_.FNEG(SCRATCHF2, SCRATCHF1);
fp_.FMIN(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
fp_.FMAX(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2);
break;
default:
INVALIDOP;
break;
}
}
void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
CONDITIONAL_DISABLE;
auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
if (regs_.IsFPRMapped(inst.src1)) {
int lane = regs_.GetFPRLane(inst.src1);
if (lane == 0)
fp_.FMOV(S0, regs_.F(inst.src1));
else
fp_.DUP(32, Q0, regs_.F(inst.src1), lane);
} else {
int offset = offsetof(MIPSState, f) + inst.src1 * 4;
fp_.LDR(32, INDEX_UNSIGNED, S0, CTXREG, offset);
}
QuickCallFunction(SCRATCH2_64, func);
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
if (regs_.F(inst.dest) != S0) {
fp_.FMOV(regs_.F(inst.dest), S0);
}
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
switch (inst.op) {
case IROp::FSin:
callFuncF_F(&vfpu_sin);
break;
case IROp::FCos:
callFuncF_F(&vfpu_cos);
break;
case IROp::FRSqrt:
regs_.Map(inst);
fp_.MOVI2F(SCRATCHF1, 1.0f);
fp_.FSQRT(regs_.F(inst.dest), regs_.F(inst.src1));
fp_.FDIV(regs_.F(inst.dest), SCRATCHF1, regs_.F(inst.dest));
break;
case IROp::FRecip:
regs_.Map(inst);
fp_.MOVI2F(SCRATCHF1, 1.0f);
fp_.FDIV(regs_.F(inst.dest), SCRATCHF1, regs_.F(inst.src1));
break;
case IROp::FAsin:
callFuncF_F(&vfpu_asin);
break;
default:
INVALIDOP;
break;
}
}
void Arm64JitBackend::CompIR_RoundingMode(IRInst inst) {
CONDITIONAL_DISABLE;
switch (inst.op) {
case IROp::RestoreRoundingMode:
RestoreRoundingMode();
break;
case IROp::ApplyRoundingMode:
ApplyRoundingMode();
break;
case IROp::UpdateRoundingMode:
UpdateRoundingMode();
break;
default:
INVALIDOP;
break;
}
}
}
#endif