#include "ppsspp_config.h"
#if PPSSPP_ARCH(AMD64)
#include "Common/x64Emitter.h"
#include "Common/CPUDetect.h"
#include "Common/LogReporting.h"
#include "Common/Math/SIMDHeaders.h"
#include "GPU/GPUState.h"
#include "GPU/Software/DrawPixel.h"
#include "GPU/Software/SoftGpu.h"
#include "GPU/ge_constants.h"
using namespace Gen;
namespace Rasterizer {
SingleFunc PixelJitCache::CompileSingle(const PixelFuncID &id) {
regCache_.SetupABI({
RegCache::GEN_ARG_X,
RegCache::GEN_ARG_Y,
RegCache::GEN_ARG_Z,
RegCache::GEN_ARG_FOG,
RegCache::VEC_ARG_COLOR,
RegCache::GEN_ARG_ID,
});
BeginWrite(64);
Describe("Init");
WriteConstantPool(id);
const u8 *resetPos = AlignCode16();
EndWrite();
bool success = true;
#if PPSSPP_PLATFORM(WINDOWS)
_assert_(!regCache_.Has(RegCache::GEN_ARG_ID));
int stackSpace = 0;
if (id.hasStencilTestMask)
stackSpace = WriteProlog(0, { XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15 }, { R12, R13, R14, R15 });
else
stackSpace = WriteProlog(0, {}, {});
stackIDOffset_ = stackSpace + 8 + 8 + 4 * PTRBITS / 8;
#else
_assert_(regCache_.Has(RegCache::GEN_ARG_ID));
WriteProlog(0, {}, {});
stackIDOffset_ = -1;
#endif
success = success && Jit_ApplyDepthRange(id);
Describe("ClampColor");
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
PACKSSDW(argColorReg, R(argColorReg));
PACKUSWB(argColorReg, R(argColorReg));
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
colorIs16Bit_ = false;
success = success && Jit_AlphaTest(id);
success = success && Jit_ApplyFog(id);
success = success && Jit_ColorTest(id);
if (id.stencilTest && !id.clearMode)
success = success && Jit_StencilAndDepthTest(id);
else if (!id.clearMode)
success = success && Jit_DepthTest(id);
success = success && Jit_WriteDepth(id);
success = success && Jit_AlphaBlend(id);
success = success && Jit_Dither(id);
success = success && Jit_WriteColor(id);
for (auto &fixup : discards_) {
SetJumpTarget(fixup);
}
discards_.clear();
if (regCache_.Has(RegCache::GEN_ARG_ID))
regCache_.ForceRelease(RegCache::GEN_ARG_ID);
if (!success) {
ERROR_LOG_REPORT(Log::G3D, "Could not compile pixel func: %s", DescribePixelFuncID(id).c_str());
regCache_.Reset(false);
EndWrite();
ResetCodePtr(GetOffset(resetPos));
return nullptr;
}
const u8 *start = WriteFinalizedEpilog();
regCache_.Reset(true);
return (SingleFunc)start;
}
RegCache::Reg PixelJitCache::GetPixelID() {
if (regCache_.Has(RegCache::GEN_ARG_ID))
return regCache_.Find(RegCache::GEN_ARG_ID);
if (!regCache_.Has(RegCache::GEN_ID)) {
X64Reg r = regCache_.Alloc(RegCache::GEN_ID);
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
return r;
}
return regCache_.Find(RegCache::GEN_ID);
}
void PixelJitCache::UnlockPixelID(RegCache::Reg &r) {
if (regCache_.Has(RegCache::GEN_ARG_ID))
regCache_.Unlock(r, RegCache::GEN_ARG_ID);
else
regCache_.Unlock(r, RegCache::GEN_ID);
}
RegCache::Reg PixelJitCache::GetColorOff(const PixelFuncID &id) {
if (!regCache_.Has(RegCache::GEN_COLOR_OFF)) {
Describe("GetColorOff");
if (id.useStandardStride && !id.dithering) {
bool loadDepthOff = id.depthWrite || (id.DepthTestFunc() != GE_COMP_ALWAYS && !id.earlyZChecks);
X64Reg depthTemp = INVALID_REG;
X64Reg argYReg = regCache_.Find(RegCache::GEN_ARG_Y);
X64Reg argXReg = regCache_.Find(RegCache::GEN_ARG_X);
SHL(32, R(argYReg), Imm8(9));
ADD(32, R(argXReg), R(argYReg));
if (loadDepthOff) {
_assert_msg_(Accessible(&fb.data, &depthbuf.data), "fb.data and depthbuf.data too far apart: %p %p (fb=%08x d=%08x)", fb.data, depthbuf.data, gstate.getFrameBufAddress(), gstate.getDepthBufAddress());
depthTemp = regCache_.Alloc(RegCache::GEN_DEPTH_OFF);
if (RipAccessible(&fb.data) && RipAccessible(&depthbuf.data)) {
MOV(PTRBITS, R(argYReg), M(&fb.data));
} else {
MOV(PTRBITS, R(depthTemp), ImmPtr(&fb.data));
MOV(PTRBITS, R(argYReg), MatR(depthTemp));
}
} else {
if (RipAccessible(&fb.data)) {
MOV(PTRBITS, R(argYReg), M(&fb.data));
} else {
MOV(PTRBITS, R(argYReg), ImmPtr(&fb.data));
MOV(PTRBITS, R(argYReg), MatR(argYReg));
}
}
LEA(PTRBITS, argYReg, MComplex(argYReg, argXReg, id.FBFormat() == GE_FORMAT_8888 ? 4 : 2, 0));
regCache_.Unlock(argYReg, RegCache::GEN_ARG_Y);
regCache_.Change(RegCache::GEN_ARG_Y, RegCache::GEN_COLOR_OFF);
regCache_.ForceRetain(RegCache::GEN_COLOR_OFF);
if (loadDepthOff) {
if (RipAccessible(&fb.data) && RipAccessible(&depthbuf.data)) {
MOV(PTRBITS, R(depthTemp), M(&depthbuf.data));
} else {
MOV(PTRBITS, R(depthTemp), MAccessibleDisp(depthTemp, &fb.data, &depthbuf.data));
}
LEA(PTRBITS, argXReg, MComplex(depthTemp, argXReg, 2, 0));
regCache_.Release(depthTemp, RegCache::GEN_DEPTH_OFF);
regCache_.Unlock(argXReg, RegCache::GEN_ARG_X);
regCache_.Change(RegCache::GEN_ARG_X, RegCache::GEN_DEPTH_OFF);
regCache_.ForceRetain(RegCache::GEN_DEPTH_OFF);
} else {
regCache_.Unlock(argXReg, RegCache::GEN_ARG_X);
regCache_.ForceRelease(RegCache::GEN_ARG_X);
}
return regCache_.Find(RegCache::GEN_COLOR_OFF);
}
X64Reg argYReg = regCache_.Find(RegCache::GEN_ARG_Y);
X64Reg r = regCache_.Alloc(RegCache::GEN_COLOR_OFF);
if (id.useStandardStride) {
MOV(32, R(r), R(argYReg));
SHL(32, R(r), Imm8(9));
} else {
if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = GetPixelID();
MOVZX(32, 16, r, MDisp(idReg, offsetof(PixelFuncID, cached.framebufStride)));
UnlockPixelID(idReg);
} else {
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
MOVZX(32, 16, r, MDisp(r, offsetof(PixelFuncID, cached.framebufStride)));
}
IMUL(32, r, R(argYReg));
}
regCache_.Unlock(argYReg, RegCache::GEN_ARG_Y);
X64Reg argXReg = regCache_.Find(RegCache::GEN_ARG_X);
ADD(32, R(r), R(argXReg));
regCache_.Unlock(argXReg, RegCache::GEN_ARG_X);
X64Reg temp = regCache_.Alloc(RegCache::GEN_TEMP_HELPER);
if (RipAccessible(&fb.data)) {
MOV(PTRBITS, R(temp), M(&fb.data));
} else {
MOV(PTRBITS, R(temp), ImmPtr(&fb.data));
MOV(PTRBITS, R(temp), MatR(temp));
}
LEA(PTRBITS, r, MComplex(temp, r, id.FBFormat() == GE_FORMAT_8888 ? 4 : 2, 0));
regCache_.Release(temp, RegCache::GEN_TEMP_HELPER);
return r;
}
return regCache_.Find(RegCache::GEN_COLOR_OFF);
}
RegCache::Reg PixelJitCache::GetDepthOff(const PixelFuncID &id) {
if (!regCache_.Has(RegCache::GEN_DEPTH_OFF)) {
if (id.useStandardStride && !id.dithering) {
X64Reg colorOffReg = GetColorOff(id);
regCache_.Unlock(colorOffReg, RegCache::GEN_COLOR_OFF);
return regCache_.Find(RegCache::GEN_DEPTH_OFF);
}
Describe("GetDepthOff");
X64Reg argYReg = regCache_.Find(RegCache::GEN_ARG_Y);
X64Reg r = regCache_.Alloc(RegCache::GEN_DEPTH_OFF);
if (id.useStandardStride) {
MOV(32, R(r), R(argYReg));
SHL(32, R(r), Imm8(9));
} else {
if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = GetPixelID();
MOVZX(32, 16, r, MDisp(idReg, offsetof(PixelFuncID, cached.depthbufStride)));
UnlockPixelID(idReg);
} else {
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
MOVZX(32, 16, r, MDisp(r, offsetof(PixelFuncID, cached.depthbufStride)));
}
IMUL(32, r, R(argYReg));
}
regCache_.Unlock(argYReg, RegCache::GEN_ARG_Y);
X64Reg argXReg = regCache_.Find(RegCache::GEN_ARG_X);
ADD(32, R(r), R(argXReg));
regCache_.Unlock(argXReg, RegCache::GEN_ARG_X);
X64Reg temp = regCache_.Alloc(RegCache::GEN_TEMP_HELPER);
if (RipAccessible(&depthbuf.data)) {
MOV(PTRBITS, R(temp), M(&depthbuf.data));
} else {
MOV(PTRBITS, R(temp), ImmPtr(&depthbuf.data));
MOV(PTRBITS, R(temp), MatR(temp));
}
LEA(PTRBITS, r, MComplex(temp, r, 2, 0));
regCache_.Release(temp, RegCache::GEN_TEMP_HELPER);
return r;
}
return regCache_.Find(RegCache::GEN_DEPTH_OFF);
}
RegCache::Reg PixelJitCache::GetDestStencil(const PixelFuncID &id) {
if (id.FBFormat() == GE_FORMAT_565)
return INVALID_REG;
X64Reg colorOffReg = GetColorOff(id);
Describe("GetDestStencil");
X64Reg stencilReg = regCache_.Alloc(RegCache::GEN_STENCIL);
if (id.FBFormat() == GE_FORMAT_8888) {
MOVZX(32, 8, stencilReg, MDisp(colorOffReg, 3));
} else if (id.FBFormat() == GE_FORMAT_5551) {
MOVZX(32, 8, stencilReg, MDisp(colorOffReg, 1));
SAR(8, R(stencilReg), Imm8(7));
} else if (id.FBFormat() == GE_FORMAT_4444) {
MOVZX(32, 8, stencilReg, MDisp(colorOffReg, 1));
SHR(32, R(stencilReg), Imm8(4));
X64Reg temp = regCache_.Alloc(RegCache::GEN_TEMP_HELPER);
MOV(32, R(temp), R(stencilReg));
SHL(32, R(temp), Imm8(4));
OR(32, R(stencilReg), R(temp));
regCache_.Release(temp, RegCache::GEN_TEMP_HELPER);
}
regCache_.Unlock(colorOffReg, RegCache::GEN_COLOR_OFF);
return stencilReg;
}
void PixelJitCache::Discard() {
discards_.push_back(J(true));
}
void PixelJitCache::Discard(Gen::CCFlags cc) {
discards_.push_back(J_CC(cc, true));
}
void PixelJitCache::WriteConstantPool(const PixelFuncID &id) {
WriteSimpleConst8x16(constBlendHalf_11_4s_, 1 << 3);
WriteSimpleConst8x16(constBlendInvert_11_4s_, 0xFF << 4);
}
bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
if (id.applyDepthRange && !id.earlyZChecks) {
Describe("ApplyDepthR");
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
X64Reg idReg = GetPixelID();
CMP(32, R(argZReg), MDisp(idReg, offsetof(PixelFuncID, cached.minz)));
Discard(CC_L);
CMP(32, R(argZReg), MDisp(idReg, offsetof(PixelFuncID, cached.maxz)));
Discard(CC_G);
UnlockPixelID(idReg);
regCache_.Unlock(argZReg, RegCache::GEN_ARG_Z);
}
if (id.clearMode && !id.DepthClear())
regCache_.ForceRelease(RegCache::GEN_ARG_Z);
else if (!id.clearMode && !id.depthWrite && (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks))
regCache_.ForceRelease(RegCache::GEN_ARG_Z);
return true;
}
bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) {
Describe("AlphaTest");
switch (id.AlphaTestFunc()) {
case GE_COMP_NEVER:
Discard();
return true;
case GE_COMP_ALWAYS:
return true;
default:
break;
}
X64Reg alphaReg;
if (regCache_.Has(RegCache::GEN_SRC_ALPHA)) {
alphaReg = regCache_.Find(RegCache::GEN_SRC_ALPHA);
} else {
alphaReg = regCache_.Alloc(RegCache::GEN_SRC_ALPHA);
_assert_(!colorIs16Bit_);
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
MOVD_xmm(R(alphaReg), argColorReg);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
SHR(32, R(alphaReg), Imm8(24));
}
if (id.hasAlphaTestMask) {
X64Reg idReg = GetPixelID();
X64Reg maskedReg = regCache_.Alloc(RegCache::GEN_TEMP0);
MOVZX(32, 8, maskedReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaTestMask)));
UnlockPixelID(idReg);
AND(32, R(maskedReg), R(alphaReg));
regCache_.Unlock(alphaReg, RegCache::GEN_SRC_ALPHA);
alphaReg = maskedReg;
}
CMP(8, R(alphaReg), Imm8(id.alphaTestRef));
if (id.hasAlphaTestMask)
regCache_.Release(alphaReg, RegCache::GEN_TEMP0);
else
regCache_.Unlock(alphaReg, RegCache::GEN_SRC_ALPHA);
switch (id.AlphaTestFunc()) {
case GE_COMP_NEVER:
case GE_COMP_ALWAYS:
break;
case GE_COMP_EQUAL:
Discard(CC_NE);
break;
case GE_COMP_NOTEQUAL:
Discard(CC_E);
break;
case GE_COMP_LESS:
Discard(CC_AE);
break;
case GE_COMP_LEQUAL:
Discard(CC_A);
break;
case GE_COMP_GREATER:
Discard(CC_BE);
break;
case GE_COMP_GEQUAL:
Discard(CC_B);
break;
}
return true;
}
bool PixelJitCache::Jit_ColorTest(const PixelFuncID &id) {
if (!id.colorTest || id.clearMode)
return true;
Describe("ColorTest");
X64Reg idReg = GetPixelID();
X64Reg funcReg = regCache_.Alloc(RegCache::GEN_TEMP0);
X64Reg maskReg = regCache_.Alloc(RegCache::GEN_TEMP1);
X64Reg refReg = regCache_.Alloc(RegCache::GEN_TEMP2);
MOV(32, R(maskReg), MDisp(idReg, offsetof(PixelFuncID, cached.colorTestMask)));
MOV(32, R(refReg), MDisp(idReg, offsetof(PixelFuncID, cached.colorTestRef)));
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
if (colorIs16Bit_) {
PACKUSWB(argColorReg, R(argColorReg));
colorIs16Bit_ = false;
}
MOVD_xmm(R(funcReg), argColorReg);
AND(32, R(maskReg), R(funcReg));
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
MOVZX(32, 8, funcReg, MDisp(idReg, offsetof(PixelFuncID, cached.colorTestFunc)));
UnlockPixelID(idReg);
CMP(8, R(funcReg), Imm8(GE_COMP_ALWAYS));
Discard(CC_B);
FixupBranch skip = J_CC(CC_E);
CMP(8, R(funcReg), Imm8(GE_COMP_EQUAL));
FixupBranch doEqual = J_CC(CC_E);
regCache_.Release(funcReg, RegCache::GEN_TEMP0);
CMP(32, R(refReg), R(maskReg));
Discard(CC_E);
FixupBranch skip2 = J();
SetJumpTarget(doEqual);
CMP(32, R(refReg), R(maskReg));
Discard(CC_NE);
regCache_.Release(maskReg, RegCache::GEN_TEMP1);
regCache_.Release(refReg, RegCache::GEN_TEMP2);
SetJumpTarget(skip);
SetJumpTarget(skip2);
return true;
}
bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
if (!id.applyFog) {
regCache_.ForceRelease(RegCache::GEN_ARG_FOG);
return true;
}
Describe("ApplyFog");
X64Reg fogColorReg = regCache_.Alloc(RegCache::VEC_TEMP1);
X64Reg idReg = GetPixelID();
if (cpu_info.bSSE4_1) {
PMOVZXBW(fogColorReg, MDisp(idReg, offsetof(PixelFuncID, cached.fogColor)));
} else {
X64Reg zeroReg = GetZeroVec();
MOVD_xmm(fogColorReg, MDisp(idReg, offsetof(PixelFuncID, cached.fogColor)));
PUNPCKLBW(fogColorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
UnlockPixelID(idReg);
X64Reg invertReg = regCache_.Alloc(RegCache::VEC_TEMP2);
PCMPEQW(invertReg, R(invertReg));
PSRLW(invertReg, 8);
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
if (!colorIs16Bit_) {
if (cpu_info.bSSE4_1) {
PMOVZXBW(argColorReg, R(argColorReg));
} else {
X64Reg zeroReg = GetZeroVec();
PUNPCKLBW(argColorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
colorIs16Bit_ = true;
}
X64Reg alphaReg;
if (regCache_.Has(RegCache::GEN_SRC_ALPHA)) {
alphaReg = regCache_.Find(RegCache::GEN_SRC_ALPHA);
} else {
alphaReg = regCache_.Alloc(RegCache::GEN_SRC_ALPHA);
PEXTRW(alphaReg, argColorReg, 3);
}
X64Reg fogMultReg = regCache_.Alloc(RegCache::VEC_TEMP3);
X64Reg argFogReg = regCache_.Find(RegCache::GEN_ARG_FOG);
MOVD_xmm(fogMultReg, R(argFogReg));
PSHUFLW(fogMultReg, R(fogMultReg), _MM_SHUFFLE(0, 0, 0, 0));
regCache_.Unlock(argFogReg, RegCache::GEN_ARG_FOG);
regCache_.ForceRelease(RegCache::GEN_ARG_FOG);
PMULLW(argColorReg, R(fogMultReg));
PADDW(argColorReg, R(invertReg));
PSUBW(invertReg, R(fogMultReg));
PMULLW(fogColorReg, R(invertReg));
PADDW(argColorReg, R(fogColorReg));
regCache_.Release(fogColorReg, RegCache::VEC_TEMP1);
regCache_.Release(invertReg, RegCache::VEC_TEMP2);
regCache_.Release(fogMultReg, RegCache::VEC_TEMP3);
PSRLW(argColorReg, 8);
PINSRW(argColorReg, R(alphaReg), 3);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
regCache_.Unlock(alphaReg, RegCache::GEN_SRC_ALPHA);
return true;
}
bool PixelJitCache::Jit_StencilAndDepthTest(const PixelFuncID &id) {
_assert_(!id.clearMode && id.stencilTest);
X64Reg stencilReg = GetDestStencil(id);
Describe("StencilAndDepth");
X64Reg maskedReg = stencilReg;
if (id.hasStencilTestMask && stencilReg != INVALID_REG) {
X64Reg idReg = GetPixelID();
maskedReg = regCache_.Alloc(RegCache::GEN_TEMP0);
MOV(32, R(maskedReg), R(stencilReg));
AND(8, R(maskedReg), MDisp(idReg, offsetof(PixelFuncID, cached.stencilTestMask)));
UnlockPixelID(idReg);
}
bool success = true;
success = success && Jit_StencilTest(id, stencilReg, maskedReg);
if (maskedReg != stencilReg)
regCache_.Release(maskedReg, RegCache::GEN_TEMP0);
if (stencilReg == INVALID_REG) {
return success && Jit_DepthTest(id);
}
success = success && Jit_DepthTestForStencil(id, stencilReg);
success = success && Jit_ApplyStencilOp(id, id.ZPass(), stencilReg);
regCache_.Unlock(stencilReg, RegCache::GEN_STENCIL);
regCache_.ForceRetain(RegCache::GEN_STENCIL);
return success;
}
bool PixelJitCache::Jit_StencilTest(const PixelFuncID &id, RegCache::Reg stencilReg, RegCache::Reg maskedReg) {
Describe("StencilTest");
bool hasFixedResult = false;
bool fixedResult = false;
FixupBranch toPass;
if (stencilReg == INVALID_REG) {
hasFixedResult = true;
switch (id.StencilTestFunc()) {
case GE_COMP_NEVER: fixedResult = false; break;
case GE_COMP_ALWAYS: fixedResult = true; break;
case GE_COMP_EQUAL: fixedResult = id.stencilTestRef == 0; break;
case GE_COMP_NOTEQUAL: fixedResult = id.stencilTestRef != 0; break;
case GE_COMP_LESS: fixedResult = false; break;
case GE_COMP_LEQUAL: fixedResult = id.stencilTestRef == 0; break;
case GE_COMP_GREATER: fixedResult = id.stencilTestRef != 0; break;
case GE_COMP_GEQUAL: fixedResult = true; break;
}
} else if (id.StencilTestFunc() == GE_COMP_ALWAYS) {
hasFixedResult = true;
fixedResult = true;
} else {
CMP(8, R(maskedReg), Imm8(id.stencilTestRef));
switch (id.StencilTestFunc()) {
case GE_COMP_NEVER:
hasFixedResult = true;
fixedResult = false;
break;
case GE_COMP_ALWAYS:
_assert_(false);
break;
case GE_COMP_EQUAL:
toPass = J_CC(CC_E);
break;
case GE_COMP_NOTEQUAL:
toPass = J_CC(CC_NE);
break;
case GE_COMP_LESS:
toPass = J_CC(CC_A);
break;
case GE_COMP_LEQUAL:
toPass = J_CC(CC_AE);
break;
case GE_COMP_GREATER:
toPass = J_CC(CC_B);
break;
case GE_COMP_GEQUAL:
toPass = J_CC(CC_BE);
break;
}
}
if (hasFixedResult && !fixedResult && stencilReg == INVALID_REG) {
Discard();
return true;
}
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
bool success = true;
if (stencilReg != INVALID_REG && (!hasFixedResult || !fixedResult)) {
success = success && Jit_ApplyStencilOp(id, id.SFail(), stencilReg);
success = success && Jit_WriteStencilOnly(id, stencilReg);
Discard();
}
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
regCache_.Change(RegCache::GEN_ID, RegCache::GEN_INVALID);
if (!hasFixedResult)
SetJumpTarget(toPass);
return success;
}
bool PixelJitCache::Jit_DepthTestForStencil(const PixelFuncID &id, RegCache::Reg stencilReg) {
if (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks)
return true;
X64Reg depthOffReg = GetDepthOff(id);
Describe("DepthTestStencil");
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
CMP(16, R(argZReg), MatR(depthOffReg));
regCache_.Unlock(depthOffReg, RegCache::GEN_DEPTH_OFF);
regCache_.Unlock(argZReg, RegCache::GEN_ARG_Z);
FixupBranch skip;
switch (id.DepthTestFunc()) {
case GE_COMP_NEVER:
CMP(32, R(RAX), R(RAX));
skip = J_CC(CC_NE);
break;
case GE_COMP_ALWAYS:
CMP(32, R(RAX), R(RAX));
skip = J_CC(CC_E);
break;
case GE_COMP_EQUAL:
skip = J_CC(CC_E);
break;
case GE_COMP_NOTEQUAL:
skip = J_CC(CC_NE);
break;
case GE_COMP_LESS:
skip = J_CC(CC_B);
break;
case GE_COMP_LEQUAL:
skip = J_CC(CC_BE);
break;
case GE_COMP_GREATER:
skip = J_CC(CC_A);
break;
case GE_COMP_GEQUAL:
skip = J_CC(CC_AE);
break;
}
bool hadColorOffReg = regCache_.Has(RegCache::GEN_COLOR_OFF);
bool hadIdReg = regCache_.Has(RegCache::GEN_ID);
bool success = true;
success = success && Jit_ApplyStencilOp(id, id.ZFail(), stencilReg);
success = success && Jit_WriteStencilOnly(id, stencilReg);
Discard();
if (!hadColorOffReg && regCache_.Has(RegCache::GEN_COLOR_OFF))
regCache_.Change(RegCache::GEN_COLOR_OFF, RegCache::GEN_INVALID);
if (!hadIdReg && regCache_.Has(RegCache::GEN_ID))
regCache_.Change(RegCache::GEN_ID, RegCache::GEN_INVALID);
SetJumpTarget(skip);
if (!id.depthWrite)
regCache_.ForceRelease(RegCache::GEN_ARG_Z);
return success;
}
bool PixelJitCache::Jit_ApplyStencilOp(const PixelFuncID &id, GEStencilOp op, RegCache::Reg stencilReg) {
_assert_(stencilReg != INVALID_REG);
Describe("ApplyStencil");
FixupBranch skip;
switch (op) {
case GE_STENCILOP_KEEP:
break;
case GE_STENCILOP_ZERO:
XOR(32, R(stencilReg), R(stencilReg));
break;
case GE_STENCILOP_REPLACE:
if (id.hasStencilTestMask) {
X64Reg idReg = GetPixelID();
MOVZX(32, 8, stencilReg, MDisp(idReg, offsetof(PixelFuncID, cached.stencilRef)));
UnlockPixelID(idReg);
} else {
MOV(8, R(stencilReg), Imm8(id.stencilTestRef));
}
break;
case GE_STENCILOP_INVERT:
NOT(8, R(stencilReg));
break;
case GE_STENCILOP_INCR:
switch (id.fbFormat) {
case GE_FORMAT_565:
break;
case GE_FORMAT_5551:
MOV(8, R(stencilReg), Imm8(0xFF));
break;
case GE_FORMAT_4444:
CMP(8, R(stencilReg), Imm8(0xF0));
skip = J_CC(CC_AE);
ADD(8, R(stencilReg), Imm8(0x11));
SetJumpTarget(skip);
break;
case GE_FORMAT_8888:
CMP(8, R(stencilReg), Imm8(0xFF));
skip = J_CC(CC_E);
ADD(8, R(stencilReg), Imm8(0x01));
SetJumpTarget(skip);
break;
}
break;
case GE_STENCILOP_DECR:
switch (id.fbFormat) {
case GE_FORMAT_565:
break;
case GE_FORMAT_5551:
XOR(32, R(stencilReg), R(stencilReg));
break;
case GE_FORMAT_4444:
CMP(8, R(stencilReg), Imm8(0x11));
skip = J_CC(CC_B);
SUB(8, R(stencilReg), Imm8(0x11));
SetJumpTarget(skip);
break;
case GE_FORMAT_8888:
CMP(8, R(stencilReg), Imm8(0x00));
skip = J_CC(CC_E);
SUB(8, R(stencilReg), Imm8(0x01));
SetJumpTarget(skip);
break;
}
break;
}
return true;
}
bool PixelJitCache::Jit_WriteStencilOnly(const PixelFuncID &id, RegCache::Reg stencilReg) {
_assert_(stencilReg != INVALID_REG);
X64Reg colorOffReg = GetColorOff(id);
Describe("WriteStencil");
if (id.applyColorWriteMask) {
X64Reg idReg = GetPixelID();
X64Reg maskReg = regCache_.Alloc(RegCache::GEN_TEMP5);
switch (id.fbFormat) {
case GE_FORMAT_565:
break;
case GE_FORMAT_5551:
MOVZX(32, 8, maskReg, MDisp(idReg, offsetof(PixelFuncID, cached.colorWriteMask) + 1));
OR(8, R(maskReg), Imm8(0x7F));
NOT(32, R(stencilReg));
OR(32, R(stencilReg), R(maskReg));
NOT(32, R(stencilReg));
AND(8, MDisp(colorOffReg, 1), R(maskReg));
OR(8, MDisp(colorOffReg, 1), R(stencilReg));
break;
case GE_FORMAT_4444:
MOVZX(32, 8, maskReg, MDisp(idReg, offsetof(PixelFuncID, cached.colorWriteMask) + 1));
OR(8, R(maskReg), Imm8(0x0F));
NOT(32, R(stencilReg));
OR(32, R(stencilReg), R(maskReg));
NOT(32, R(stencilReg));
AND(8, MDisp(colorOffReg, 1), R(maskReg));
OR(8, MDisp(colorOffReg, 1), R(stencilReg));
break;
case GE_FORMAT_8888:
MOVZX(32, 8, maskReg, MDisp(idReg, offsetof(PixelFuncID, cached.colorWriteMask) + 3));
NOT(32, R(stencilReg));
OR(32, R(stencilReg), R(maskReg));
NOT(32, R(stencilReg));
AND(8, MDisp(colorOffReg, 3), R(maskReg));
OR(8, MDisp(colorOffReg, 3), R(stencilReg));
break;
}
regCache_.Release(maskReg, RegCache::GEN_TEMP5);
UnlockPixelID(idReg);
} else {
switch (id.fbFormat) {
case GE_FORMAT_565:
break;
case GE_FORMAT_5551:
AND(8, R(stencilReg), Imm8(0x80));
AND(8, MDisp(colorOffReg, 1), Imm8(0x7F));
OR(8, MDisp(colorOffReg, 1), R(stencilReg));
break;
case GE_FORMAT_4444:
AND(8, MDisp(colorOffReg, 1), Imm8(0x0F));
AND(8, R(stencilReg), Imm8(0xF0));
OR(8, MDisp(colorOffReg, 1), R(stencilReg));
break;
case GE_FORMAT_8888:
MOV(8, MDisp(colorOffReg, 3), R(stencilReg));
break;
}
}
regCache_.Unlock(colorOffReg, RegCache::GEN_COLOR_OFF);
return true;
}
bool PixelJitCache::Jit_DepthTest(const PixelFuncID &id) {
if (id.DepthTestFunc() == GE_COMP_ALWAYS || id.earlyZChecks)
return true;
if (id.DepthTestFunc() == GE_COMP_NEVER) {
Discard();
}
X64Reg depthOffReg = GetDepthOff(id);
Describe("DepthTest");
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
CMP(16, R(argZReg), MatR(depthOffReg));
regCache_.Unlock(depthOffReg, RegCache::GEN_DEPTH_OFF);
regCache_.Unlock(argZReg, RegCache::GEN_ARG_Z);
switch (id.DepthTestFunc()) {
case GE_COMP_NEVER:
case GE_COMP_ALWAYS:
break;
case GE_COMP_EQUAL:
Discard(CC_NE);
break;
case GE_COMP_NOTEQUAL:
Discard(CC_E);
break;
case GE_COMP_LESS:
Discard(CC_AE);
break;
case GE_COMP_LEQUAL:
Discard(CC_A);
break;
case GE_COMP_GREATER:
Discard(CC_BE);
break;
case GE_COMP_GEQUAL:
Discard(CC_B);
break;
}
if (!id.depthWrite)
regCache_.ForceRelease(RegCache::GEN_ARG_Z);
return true;
}
bool PixelJitCache::Jit_WriteDepth(const PixelFuncID &id) {
if (id.depthWrite) {
X64Reg depthOffReg = GetDepthOff(id);
Describe("WriteDepth");
X64Reg argZReg = regCache_.Find(RegCache::GEN_ARG_Z);
MOV(16, MatR(depthOffReg), R(argZReg));
regCache_.Unlock(depthOffReg, RegCache::GEN_DEPTH_OFF);
regCache_.Unlock(argZReg, RegCache::GEN_ARG_Z);
regCache_.ForceRelease(RegCache::GEN_ARG_Z);
}
if (regCache_.Has(RegCache::GEN_DEPTH_OFF)) {
regCache_.ForceRelease(RegCache::GEN_DEPTH_OFF);
}
return true;
}
bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
if (!id.alphaBlend)
return true;
PixelBlendState blendState;
ComputePixelBlendState(blendState, id);
bool success = true;
X64Reg dstReg = regCache_.Alloc(RegCache::VEC_TEMP0);
if (!blendState.readsDstPixel) {
X64Reg colorOff = GetColorOff(id);
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
PXOR(dstReg, R(dstReg));
} else if (id.FBFormat() == GE_FORMAT_8888) {
X64Reg colorOff = GetColorOff(id);
Describe("AlphaBlend");
MOVD_xmm(dstReg, MatR(colorOff));
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
} else {
X64Reg colorOff = GetColorOff(id);
Describe("AlphaBlend");
X64Reg dstGenReg = regCache_.Alloc(RegCache::GEN_TEMP0);
MOVZX(32, 16, dstGenReg, MatR(colorOff));
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
switch (id.fbFormat) {
case GE_FORMAT_565:
success = success && Jit_ConvertFrom565(id, dstGenReg, temp1Reg, temp2Reg);
break;
case GE_FORMAT_5551:
success = success && Jit_ConvertFrom5551(id, dstGenReg, temp1Reg, temp2Reg, blendState.usesDstAlpha);
break;
case GE_FORMAT_4444:
success = success && Jit_ConvertFrom4444(id, dstGenReg, temp1Reg, temp2Reg, blendState.usesDstAlpha);
break;
case GE_FORMAT_8888:
break;
}
Describe("AlphaBlend");
MOVD_xmm(dstReg, R(dstGenReg));
regCache_.Release(dstGenReg, RegCache::GEN_TEMP0);
regCache_.Release(temp1Reg, RegCache::GEN_TEMP1);
regCache_.Release(temp2Reg, RegCache::GEN_TEMP2);
}
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
if (blendState.usesFactors) {
X64Reg srcFactorReg = regCache_.Alloc(RegCache::VEC_TEMP1);
X64Reg dstFactorReg = regCache_.Alloc(RegCache::VEC_TEMP2);
if (cpu_info.bSSE4_1) {
if (!colorIs16Bit_)
PMOVZXBW(argColorReg, R(argColorReg));
PMOVZXBW(dstReg, R(dstReg));
} else {
X64Reg zeroReg = GetZeroVec();
if (!colorIs16Bit_)
PUNPCKLBW(argColorReg, R(zeroReg));
PUNPCKLBW(dstReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
colorIs16Bit_ = true;
bool multiplySrc = id.AlphaBlendSrc() != PixelBlendFactor::ZERO && id.AlphaBlendSrc() != PixelBlendFactor::ONE;
bool multiplyDst = id.AlphaBlendDst() != PixelBlendFactor::ZERO && id.AlphaBlendDst() != PixelBlendFactor::ONE;
if (multiplySrc || blendState.srcColorAsFactor)
PSLLW(argColorReg, 4);
if (multiplyDst || blendState.dstColorAsFactor || blendState.usesDstAlpha)
PSLLW(dstReg, 4);
if (id.AlphaBlendSrc() < PixelBlendFactor::ZERO)
success = success && Jit_BlendFactor(id, srcFactorReg, dstReg, id.AlphaBlendSrc());
if (id.AlphaBlendDst() < PixelBlendFactor::ZERO)
success = success && Jit_DstBlendFactor(id, srcFactorReg, dstFactorReg, dstReg);
X64Reg halfReg = INVALID_REG;
if (multiplySrc || multiplyDst) {
halfReg = regCache_.Alloc(RegCache::VEC_TEMP3);
MOVDQA(halfReg, M(constBlendHalf_11_4s_));
}
if (multiplySrc) {
POR(srcFactorReg, R(halfReg));
POR(argColorReg, R(halfReg));
PMULHUW(argColorReg, R(srcFactorReg));
} else if (id.AlphaBlendSrc() == PixelBlendFactor::ZERO) {
PXOR(argColorReg, R(argColorReg));
} else if (id.AlphaBlendSrc() == PixelBlendFactor::ONE) {
if (blendState.srcColorAsFactor)
PSRLW(argColorReg, 4);
}
if (multiplyDst) {
POR(dstFactorReg, R(halfReg));
POR(dstReg, R(halfReg));
PMULHUW(dstReg, R(dstFactorReg));
} else if (id.AlphaBlendDst() == PixelBlendFactor::ZERO) {
if (id.AlphaBlendEq() == GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)
PXOR(dstReg, R(dstReg));
} else if (id.AlphaBlendDst() == PixelBlendFactor::ONE) {
if (blendState.dstColorAsFactor || blendState.usesDstAlpha)
PSRLW(dstReg, 4);
}
regCache_.Release(srcFactorReg, RegCache::VEC_TEMP1);
regCache_.Release(dstFactorReg, RegCache::VEC_TEMP2);
if (halfReg != INVALID_REG)
regCache_.Release(halfReg, RegCache::VEC_TEMP3);
} else if (colorIs16Bit_) {
PACKUSWB(argColorReg, R(argColorReg));
colorIs16Bit_ = false;
}
X64Reg tempReg = regCache_.Alloc(RegCache::VEC_TEMP1);
switch (id.AlphaBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD:
if (id.AlphaBlendDst() != PixelBlendFactor::ZERO)
PADDUSW(argColorReg, R(dstReg));
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT:
if (id.AlphaBlendDst() != PixelBlendFactor::ZERO)
PSUBUSW(argColorReg, R(dstReg));
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
if (cpu_info.bAVX) {
VPSUBUSW(128, argColorReg, dstReg, R(argColorReg));
} else {
MOVDQA(tempReg, R(argColorReg));
MOVDQA(argColorReg, R(dstReg));
PSUBUSW(argColorReg, R(tempReg));
}
break;
case GE_BLENDMODE_MIN:
PMINUB(argColorReg, R(dstReg));
break;
case GE_BLENDMODE_MAX:
PMAXUB(argColorReg, R(dstReg));
break;
case GE_BLENDMODE_ABSDIFF:
MOVDQA(tempReg, R(dstReg));
PSUBUSB(tempReg, R(argColorReg));
PSUBUSB(argColorReg, R(dstReg));
POR(argColorReg, R(tempReg));
break;
}
regCache_.Release(dstReg, RegCache::VEC_TEMP0);
regCache_.Release(tempReg, RegCache::VEC_TEMP1);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
return success;
}
bool PixelJitCache::Jit_BlendFactor(const PixelFuncID &id, RegCache::Reg factorReg, RegCache::Reg dstReg, PixelBlendFactor factor) {
X64Reg idReg = INVALID_REG;
X64Reg tempReg = INVALID_REG;
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
_assert_(colorIs16Bit_);
switch (factor) {
case PixelBlendFactor::INVOTHERCOLOR:
case PixelBlendFactor::INVSRCALPHA:
case PixelBlendFactor::INVDSTALPHA:
case PixelBlendFactor::DOUBLEINVSRCALPHA:
case PixelBlendFactor::DOUBLEINVDSTALPHA:
MOVDQA(factorReg, M(constBlendInvert_11_4s_));
break;
default:
break;
}
switch (factor) {
case PixelBlendFactor::OTHERCOLOR:
MOVDQA(factorReg, R(dstReg));
break;
case PixelBlendFactor::INVOTHERCOLOR:
PSUBUSW(factorReg, R(dstReg));
break;
case PixelBlendFactor::SRCALPHA:
PSHUFLW(factorReg, R(argColorReg), _MM_SHUFFLE(3, 3, 3, 3));
break;
case PixelBlendFactor::INVSRCALPHA:
tempReg = regCache_.Alloc(RegCache::VEC_TEMP3);
PSHUFLW(tempReg, R(argColorReg), _MM_SHUFFLE(3, 3, 3, 3));
PSUBUSW(factorReg, R(tempReg));
break;
case PixelBlendFactor::DSTALPHA:
PSHUFLW(factorReg, R(dstReg), _MM_SHUFFLE(3, 3, 3, 3));
break;
case PixelBlendFactor::INVDSTALPHA:
tempReg = regCache_.Alloc(RegCache::VEC_TEMP3);
PSHUFLW(tempReg, R(dstReg), _MM_SHUFFLE(3, 3, 3, 3));
PSUBUSW(factorReg, R(tempReg));
break;
case PixelBlendFactor::DOUBLESRCALPHA:
PSHUFLW(factorReg, R(argColorReg), _MM_SHUFFLE(3, 3, 3, 3));
PSLLW(factorReg, 1);
break;
case PixelBlendFactor::DOUBLEINVSRCALPHA:
tempReg = regCache_.Alloc(RegCache::VEC_TEMP3);
PSHUFLW(tempReg, R(argColorReg), _MM_SHUFFLE(3, 3, 3, 3));
PSLLW(tempReg, 1);
PSUBUSW(factorReg, R(tempReg));
break;
case PixelBlendFactor::DOUBLEDSTALPHA:
PSHUFLW(factorReg, R(dstReg), _MM_SHUFFLE(3, 3, 3, 3));
PSLLW(factorReg, 1);
break;
case PixelBlendFactor::DOUBLEINVDSTALPHA:
tempReg = regCache_.Alloc(RegCache::VEC_TEMP3);
PSHUFLW(tempReg, R(dstReg), _MM_SHUFFLE(3, 3, 3, 3));
PSLLW(tempReg, 1);
PSUBUSW(factorReg, R(tempReg));
break;
case PixelBlendFactor::ZERO:
PXOR(factorReg, R(factorReg));
break;
case PixelBlendFactor::ONE:
PCMPEQD(factorReg, R(factorReg));
PSLLW(factorReg, 8);
PSRLW(factorReg, 4);
break;
case PixelBlendFactor::FIX:
default:
idReg = GetPixelID();
if (cpu_info.bSSE4_1) {
PMOVZXBW(factorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendSrc)));
} else {
X64Reg zeroReg = GetZeroVec();
MOVD_xmm(factorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendSrc)));
PUNPCKLBW(factorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
PSLLW(factorReg, 4);
break;
}
if (idReg != INVALID_REG)
UnlockPixelID(idReg);
if (tempReg != INVALID_REG)
regCache_.Release(tempReg, RegCache::VEC_TEMP3);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
return true;
}
bool PixelJitCache::Jit_DstBlendFactor(const PixelFuncID &id, RegCache::Reg srcFactorReg, RegCache::Reg dstFactorReg, RegCache::Reg dstReg) {
bool success = true;
X64Reg idReg = INVALID_REG;
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
_assert_(colorIs16Bit_);
PixelBlendState blendState;
ComputePixelBlendState(blendState, id);
switch (id.AlphaBlendDst()) {
case PixelBlendFactor::OTHERCOLOR:
MOVDQA(dstFactorReg, R(argColorReg));
break;
case PixelBlendFactor::INVOTHERCOLOR:
MOVDQA(dstFactorReg, M(constBlendInvert_11_4s_));
PSUBUSW(dstFactorReg, R(argColorReg));
break;
case PixelBlendFactor::SRCALPHA:
case PixelBlendFactor::INVSRCALPHA:
case PixelBlendFactor::DSTALPHA:
case PixelBlendFactor::INVDSTALPHA:
case PixelBlendFactor::DOUBLESRCALPHA:
case PixelBlendFactor::DOUBLEINVSRCALPHA:
case PixelBlendFactor::DOUBLEDSTALPHA:
case PixelBlendFactor::DOUBLEINVDSTALPHA:
case PixelBlendFactor::ZERO:
case PixelBlendFactor::ONE:
if (id.AlphaBlendSrc() == id.AlphaBlendDst()) {
MOVDQA(dstFactorReg, R(srcFactorReg));
} else if (blendState.dstFactorIsInverse) {
MOVDQA(dstFactorReg, M(constBlendInvert_11_4s_));
PSUBUSW(dstFactorReg, R(srcFactorReg));
} else {
success = success && Jit_BlendFactor(id, dstFactorReg, dstReg, id.AlphaBlendDst());
}
break;
case PixelBlendFactor::FIX:
default:
idReg = GetPixelID();
if (cpu_info.bSSE4_1) {
PMOVZXBW(dstFactorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendDst)));
} else {
X64Reg zeroReg = GetZeroVec();
MOVD_xmm(dstFactorReg, MDisp(idReg, offsetof(PixelFuncID, cached.alphaBlendDst)));
PUNPCKLBW(dstFactorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
PSLLW(dstFactorReg, 4);
break;
}
if (idReg != INVALID_REG)
UnlockPixelID(idReg);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
return success;
}
bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
if (!id.dithering)
return true;
Describe("Dither");
X64Reg valueReg = regCache_.Alloc(RegCache::GEN_TEMP0);
X64Reg argYReg = regCache_.Find(RegCache::GEN_ARG_Y);
MOV(32, R(valueReg), R(argYReg));
AND(32, R(valueReg), Imm8(3));
X64Reg colorOffReg = GetColorOff(id);
Describe("Dither");
regCache_.Unlock(colorOffReg, RegCache::GEN_COLOR_OFF);
regCache_.ForceRetain(RegCache::GEN_COLOR_OFF);
regCache_.Unlock(argYReg, RegCache::GEN_ARG_Y);
regCache_.ForceRelease(RegCache::GEN_ARG_Y);
X64Reg argXReg = regCache_.Find(RegCache::GEN_ARG_X);
AND(32, R(argXReg), Imm32(3));
LEA(32, valueReg, MComplex(argXReg, valueReg, 4, offsetof(PixelFuncID, cached.ditherMatrix)));
if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = GetPixelID();
MOVSX(32, 8, valueReg, MRegSum(idReg, valueReg));
UnlockPixelID(idReg);
} else {
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(argXReg), MDisp(RSP, stackIDOffset_));
MOVSX(32, 8, valueReg, MRegSum(argXReg, valueReg));
}
regCache_.Unlock(argXReg, RegCache::GEN_ARG_X);
regCache_.ForceRelease(RegCache::GEN_ARG_X);
X64Reg vecValueReg = regCache_.Alloc(RegCache::VEC_TEMP0);
MOVD_xmm(vecValueReg, R(valueReg));
regCache_.Release(valueReg, RegCache::GEN_TEMP0);
PSHUFLW(vecValueReg, R(vecValueReg), _MM_SHUFFLE(2, 0, 0, 0));
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
if (!colorIs16Bit_) {
if (cpu_info.bSSE4_1) {
PMOVZXBW(argColorReg, R(argColorReg));
} else {
X64Reg zeroReg = GetZeroVec();
PUNPCKLBW(argColorReg, R(zeroReg));
regCache_.Unlock(zeroReg, RegCache::VEC_ZERO);
}
colorIs16Bit_ = true;
}
PADDSW(argColorReg, R(vecValueReg));
regCache_.Release(vecValueReg, RegCache::VEC_TEMP0);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
return true;
}
bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
X64Reg colorOff = GetColorOff(id);
Describe("WriteColor");
if (regCache_.Has(RegCache::GEN_ARG_X)) {
regCache_.ForceRelease(RegCache::GEN_ARG_X);
regCache_.ForceRelease(RegCache::GEN_ARG_Y);
regCache_.ForceRetain(RegCache::GEN_COLOR_OFF);
}
X64Reg argColorReg = regCache_.Find(RegCache::VEC_ARG_COLOR);
if (colorIs16Bit_) {
PACKUSWB(argColorReg, R(argColorReg));
colorIs16Bit_ = false;
}
if (id.clearMode) {
bool drawingDone = false;
if (!id.ColorClear() && !id.StencilClear())
drawingDone = true;
if (!id.ColorClear() && id.FBFormat() == GE_FORMAT_565)
drawingDone = true;
bool success = true;
if (!id.ColorClear() && !drawingDone) {
X64Reg alphaReg;
if (regCache_.Has(RegCache::GEN_SRC_ALPHA)) {
alphaReg = regCache_.Find(RegCache::GEN_SRC_ALPHA);
} else {
alphaReg = regCache_.Alloc(RegCache::GEN_SRC_ALPHA);
MOVD_xmm(R(alphaReg), argColorReg);
SHR(32, R(alphaReg), Imm8(24));
}
success = Jit_WriteStencilOnly(id, alphaReg);
regCache_.Release(alphaReg, RegCache::GEN_SRC_ALPHA);
drawingDone = true;
}
if (drawingDone) {
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
regCache_.ForceRelease(RegCache::VEC_ARG_COLOR);
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
regCache_.ForceRelease(RegCache::GEN_COLOR_OFF);
return success;
}
}
X64Reg colorReg = regCache_.Alloc(RegCache::GEN_TEMP0);
MOVD_xmm(R(colorReg), argColorReg);
regCache_.Unlock(argColorReg, RegCache::VEC_ARG_COLOR);
regCache_.ForceRelease(RegCache::VEC_ARG_COLOR);
X64Reg stencilReg = INVALID_REG;
if (regCache_.Has(RegCache::GEN_STENCIL))
stencilReg = regCache_.Find(RegCache::GEN_STENCIL);
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
X64Reg temp2Reg = regCache_.Alloc(RegCache::GEN_TEMP2);
bool convertAlpha = id.clearMode && id.StencilClear();
bool writeAlpha = convertAlpha || stencilReg != INVALID_REG;
uint32_t fixedKeepMask = 0x00000000;
bool success = true;
switch (id.fbFormat) {
case GE_FORMAT_565:
success = success && Jit_ConvertTo565(id, colorReg, temp1Reg, temp2Reg);
break;
case GE_FORMAT_5551:
success = success && Jit_ConvertTo5551(id, colorReg, temp1Reg, temp2Reg, convertAlpha);
if (stencilReg != INVALID_REG) {
SHR(32, R(stencilReg), Imm8(7));
SHL(32, R(stencilReg), Imm8(15));
} else if (!writeAlpha) {
fixedKeepMask = 0x8000;
}
break;
case GE_FORMAT_4444:
success = success && Jit_ConvertTo4444(id, colorReg, temp1Reg, temp2Reg, convertAlpha);
if (stencilReg != INVALID_REG) {
SHR(32, R(stencilReg), Imm8(4));
SHL(32, R(stencilReg), Imm8(12));
} else if (!writeAlpha) {
fixedKeepMask = 0xF000;
}
break;
case GE_FORMAT_8888:
if (stencilReg != INVALID_REG) {
SHL(32, R(stencilReg), Imm8(24));
AND(32, R(colorReg), Imm32(0x00FFFFFF));
} else if (!writeAlpha) {
fixedKeepMask = 0xFF000000;
}
break;
}
Describe("WriteColor");
X64Reg maskReg = INVALID_REG;
if (id.applyColorWriteMask) {
maskReg = regCache_.Alloc(RegCache::GEN_TEMP3);
if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = GetPixelID();
MOV(32, R(maskReg), MDisp(idReg, offsetof(PixelFuncID, cached.colorWriteMask)));
UnlockPixelID(idReg);
} else {
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(maskReg), MDisp(RSP, stackIDOffset_));
MOV(32, R(maskReg), MDisp(maskReg, offsetof(PixelFuncID, cached.colorWriteMask)));
}
}
regCache_.Release(temp2Reg, RegCache::GEN_TEMP2);
skipStandardWrites_.clear();
if (id.applyLogicOp) {
success = success && Jit_ApplyLogicOp(id, colorReg, maskReg);
} else if (stencilReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
}
Describe("WriteColor");
switch (id.fbFormat) {
case GE_FORMAT_565:
case GE_FORMAT_5551:
case GE_FORMAT_4444:
if (maskReg != INVALID_REG) {
AND(16, MatR(colorOff), R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
}
OR(16, MatR(colorOff), R(colorReg));
} else if (fixedKeepMask == 0) {
MOV(16, MatR(colorOff), R(colorReg));
} else {
AND(16, MatR(colorOff), Imm16((uint16_t)fixedKeepMask));
OR(16, MatR(colorOff), R(colorReg));
}
break;
case GE_FORMAT_8888:
if (maskReg != INVALID_REG) {
AND(32, MatR(colorOff), R(maskReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
}
OR(32, MatR(colorOff), R(colorReg));
} else if (fixedKeepMask == 0) {
MOV(32, MatR(colorOff), R(colorReg));
} else if (fixedKeepMask == 0xFF000000) {
MOV(16, MatR(colorOff), R(colorReg));
SHR(32, R(colorReg), Imm8(16));
MOV(8, MDisp(colorOff, 2), R(colorReg));
} else {
AND(32, MatR(colorOff), Imm32(fixedKeepMask));
OR(32, MatR(colorOff), R(colorReg));
}
break;
}
for (FixupBranch &fixup : skipStandardWrites_)
SetJumpTarget(fixup);
skipStandardWrites_.clear();
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
regCache_.ForceRelease(RegCache::GEN_COLOR_OFF);
regCache_.Release(colorReg, RegCache::GEN_TEMP0);
regCache_.Release(temp1Reg, RegCache::GEN_TEMP1);
if (maskReg != INVALID_REG)
regCache_.Release(maskReg, RegCache::GEN_TEMP3);
if (stencilReg != INVALID_REG) {
regCache_.Unlock(stencilReg, RegCache::GEN_STENCIL);
regCache_.ForceRelease(RegCache::GEN_STENCIL);
}
return success;
}
bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg maskReg) {
Describe("LogicOp");
X64Reg logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP4);
if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = GetPixelID();
MOVZX(32, 8, logicOpReg, MDisp(idReg, offsetof(PixelFuncID, cached.logicOp)));
UnlockPixelID(idReg);
} else {
_assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(logicOpReg), MDisp(RSP, stackIDOffset_));
MOVZX(32, 8, logicOpReg, MDisp(logicOpReg, offsetof(PixelFuncID, cached.logicOp)));
}
X64Reg stencilReg = INVALID_REG;
if (regCache_.Has(RegCache::GEN_STENCIL))
stencilReg = regCache_.Find(RegCache::GEN_STENCIL);
X64Reg colorOff = regCache_.Find(RegCache::GEN_COLOR_OFF);
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP5);
int bits = id.fbFormat == GE_FORMAT_8888 ? 32 : 16;
OpArg stencilMask, notStencilMask;
switch (id.fbFormat) {
case GE_FORMAT_565:
stencilMask = Imm16(0);
notStencilMask = Imm16(0xFFFF);
break;
case GE_FORMAT_5551:
stencilMask = Imm16(0x8000);
notStencilMask = Imm16(0x7FFF);
break;
case GE_FORMAT_4444:
stencilMask = Imm16(0xF000);
notStencilMask = Imm16(0x0FFF);
break;
case GE_FORMAT_8888:
stencilMask = Imm32(0xFF000000);
notStencilMask = Imm32(0x00FFFFFF);
break;
}
std::vector<FixupBranch> finishes;
finishes.reserve(11);
FixupBranch skipTable = J(true);
const u8 *tableValues[16]{};
tableValues[GE_LOGIC_CLEAR] = GetCodePointer();
if (stencilReg != INVALID_REG) {
MOV(32, R(colorReg), R(stencilReg));
finishes.push_back(J(true));
} else if (maskReg != INVALID_REG) {
AND(bits, MatR(colorOff), R(maskReg));
skipStandardWrites_.push_back(J(true));
} else {
AND(bits, MatR(colorOff), stencilMask);
skipStandardWrites_.push_back(J(true));
}
tableValues[GE_LOGIC_AND] = GetCodePointer();
if (stencilReg != INVALID_REG && maskReg != INVALID_REG) {
OR(32, R(colorReg), R(maskReg));
OR(32, R(colorReg), R(stencilReg));
NOT(32, R(maskReg));
AND(bits, R(maskReg), stencilMask);
OR(bits, MatR(colorOff), R(maskReg));
} else if (stencilReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
OR(bits, MatR(colorOff), stencilMask);
} else if (maskReg != INVALID_REG) {
OR(32, R(colorReg), R(maskReg));
} else {
if (stencilMask.GetImmValue() != 0)
OR(bits, R(colorReg), stencilMask);
}
AND(bits, MatR(colorOff), R(colorReg));
skipStandardWrites_.push_back(J(true));
tableValues[GE_LOGIC_AND_REVERSE] = GetCodePointer();
MOV(bits, R(temp1Reg), MatR(colorOff));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, temp1Reg, R(colorReg));
} else {
NOT(32, R(temp1Reg));
AND(32, R(colorReg), R(temp1Reg));
}
if (stencilReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_COPY] = GetCodePointer();
if (stencilReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_AND_INVERTED] = GetCodePointer();
if (stencilReg != INVALID_REG) {
OR(bits, R(colorReg), stencilMask);
NOT(32, R(colorReg));
OR(32, R(colorReg), R(stencilReg));
if (maskReg != INVALID_REG) {
OR(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
AND(bits, R(maskReg), stencilMask);
OR(bits, MatR(colorOff), R(maskReg));
} else {
OR(bits, MatR(colorOff), stencilMask);
}
} else if (maskReg != INVALID_REG) {
NOT(32, R(colorReg));
OR(32, R(colorReg), R(maskReg));
} else {
NOT(32, R(colorReg));
if (id.FBFormat() == GE_FORMAT_8888)
OR(bits, R(colorReg), stencilMask);
}
AND(bits, MatR(colorOff), R(colorReg));
skipStandardWrites_.push_back(J(true));
tableValues[GE_LOGIC_NOOP] = GetCodePointer();
if (stencilReg != INVALID_REG && maskReg != INVALID_REG) {
if (cpu_info.bBMI1) {
ANDN(32, stencilReg, maskReg, R(stencilReg));
} else {
NOT(32, R(maskReg));
AND(32, R(stencilReg), R(maskReg));
NOT(32, R(maskReg));
}
OR(bits, R(maskReg), notStencilMask);
AND(bits, MatR(colorOff), R(maskReg));
OR(bits, MatR(colorOff), R(stencilReg));
skipStandardWrites_.push_back(J(true));
} else if (stencilReg != INVALID_REG) {
AND(bits, MatR(colorOff), notStencilMask);
OR(bits, MatR(colorOff), R(stencilReg));
skipStandardWrites_.push_back(J(true));
} else {
Discard();
}
tableValues[GE_LOGIC_XOR] = GetCodePointer();
XOR(bits, R(colorReg), MatR(colorOff));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_OR] = GetCodePointer();
if (stencilReg != INVALID_REG && maskReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
}
OR(bits, R(maskReg), notStencilMask);
AND(bits, MatR(colorOff), R(maskReg));
} else if (stencilReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
AND(bits, MatR(colorOff), notStencilMask);
} else if (maskReg != INVALID_REG) {
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
}
} else if (id.FBFormat() == GE_FORMAT_8888) {
AND(bits, R(colorReg), notStencilMask);
}
OR(bits, MatR(colorOff), R(colorReg));
skipStandardWrites_.push_back(J(true));
tableValues[GE_LOGIC_NOR] = GetCodePointer();
OR(bits, R(colorReg), MatR(colorOff));
NOT(32, R(colorReg));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_EQUIV] = GetCodePointer();
XOR(bits, R(colorReg), MatR(colorOff));
NOT(32, R(colorReg));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_INVERTED] = GetCodePointer();
MOV(bits, R(colorReg), MatR(colorOff));
NOT(32, R(colorReg));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_OR_REVERSE] = GetCodePointer();
MOV(bits, R(temp1Reg), MatR(colorOff));
NOT(32, R(temp1Reg));
OR(32, R(colorReg), R(temp1Reg));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_COPY_INVERTED] = GetCodePointer();
NOT(32, R(colorReg));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_OR_INVERTED] = GetCodePointer();
NOT(32, R(colorReg));
if (stencilReg != INVALID_REG && maskReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
if (cpu_info.bBMI1) {
ANDN(32, colorReg, maskReg, R(colorReg));
} else {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
NOT(32, R(maskReg));
}
OR(bits, R(maskReg), notStencilMask);
AND(bits, MatR(colorOff), R(maskReg));
} else if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
AND(bits, MatR(colorOff), notStencilMask);
} else if (maskReg != INVALID_REG) {
NOT(32, R(maskReg));
AND(32, R(colorReg), R(maskReg));
} else if (id.FBFormat() == GE_FORMAT_8888) {
AND(bits, R(colorReg), notStencilMask);
}
OR(bits, MatR(colorOff), R(colorReg));
skipStandardWrites_.push_back(J(true));
tableValues[GE_LOGIC_NAND] = GetCodePointer();
AND(bits, R(temp1Reg), MatR(colorOff));
NOT(32, R(colorReg));
if (stencilReg != INVALID_REG) {
AND(bits, R(colorReg), notStencilMask);
OR(32, R(colorReg), R(stencilReg));
} else if (maskReg == INVALID_REG && stencilMask.GetImmValue() != 0) {
AND(bits, R(colorReg), notStencilMask);
}
finishes.push_back(J(true));
tableValues[GE_LOGIC_SET] = GetCodePointer();
if (stencilReg != INVALID_REG && maskReg != INVALID_REG) {
OR(32, R(colorReg), R(stencilReg));
OR(bits, R(colorReg), notStencilMask);
finishes.push_back(J(true));
} else if (stencilReg != INVALID_REG) {
OR(bits, R(stencilReg), notStencilMask);
MOV(bits, MatR(colorOff), R(stencilReg));
skipStandardWrites_.push_back(J(true));
} else if (maskReg != INVALID_REG) {
NOT(32, R(maskReg));
OR(bits, MatR(colorOff), R(maskReg));
skipStandardWrites_.push_back(J(true));
} else {
OR(bits, MatR(colorOff), notStencilMask);
skipStandardWrites_.push_back(J(true));
}
const u8 *tablePtr = GetCodePointer();
for (int i = 0; i < 16; ++i) {
Write64((uintptr_t)tableValues[i]);
}
SetJumpTarget(skipTable);
LEA(64, temp1Reg, M(tablePtr));
JMPptr(MComplex(temp1Reg, logicOpReg, 8, 0));
for (FixupBranch &fixup : finishes)
SetJumpTarget(fixup);
regCache_.Unlock(colorOff, RegCache::GEN_COLOR_OFF);
regCache_.Release(logicOpReg, RegCache::GEN_TEMP4);
regCache_.Release(temp1Reg, RegCache::GEN_TEMP5);
if (stencilReg != INVALID_REG)
regCache_.Unlock(stencilReg, RegCache::GEN_STENCIL);
return true;
}
bool PixelJitCache::Jit_ConvertTo565(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg) {
Describe("ConvertTo565");
if (cpu_info.bBMI2_fast) {
MOV(32, R(temp1Reg), Imm32(0x00F8FCF8));
PEXT(32, colorReg, colorReg, R(temp1Reg));
return true;
}
MOV(32, R(temp1Reg), R(colorReg));
SHR(32, R(temp1Reg), Imm8(3));
AND(16, R(temp1Reg), Imm16(0x1F << 0));
MOV(32, R(temp2Reg), R(colorReg));
SHR(32, R(temp2Reg), Imm8(5));
AND(16, R(temp2Reg), Imm16(0x3F << 5));
OR(32, R(temp1Reg), R(temp2Reg));
SHR(32, R(colorReg), Imm8(8));
AND(16, R(colorReg), Imm16(0x1F << 11));
OR(32, R(colorReg), R(temp1Reg));
return true;
}
bool PixelJitCache::Jit_ConvertTo5551(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha) {
Describe("ConvertTo5551");
if (cpu_info.bBMI2_fast) {
MOV(32, R(temp1Reg), Imm32(keepAlpha ? 0x80F8F8F8 : 0x00F8F8F8));
PEXT(32, colorReg, colorReg, R(temp1Reg));
return true;
}
MOV(32, R(temp1Reg), R(colorReg));
SHR(32, R(temp1Reg), Imm8(3));
AND(16, R(temp1Reg), Imm16(0x1F << 0));
MOV(32, R(temp2Reg), R(colorReg));
SHR(32, R(temp2Reg), Imm8(6));
AND(16, R(temp2Reg), Imm16(0x1F << 5));
OR(32, R(temp1Reg), R(temp2Reg));
if (keepAlpha) {
MOV(32, R(temp2Reg), R(colorReg));
SHR(32, R(temp2Reg), Imm8(31));
SHL(32, R(temp2Reg), Imm8(15));
}
SHR(32, R(colorReg), Imm8(9));
AND(16, R(colorReg), Imm16(0x1F << 10));
OR(32, R(colorReg), R(temp1Reg));
if (keepAlpha)
OR(32, R(colorReg), R(temp2Reg));
return true;
}
bool PixelJitCache::Jit_ConvertTo4444(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha) {
Describe("ConvertTo4444");
if (cpu_info.bBMI2_fast) {
MOV(32, R(temp1Reg), Imm32(keepAlpha ? 0xF0F0F0F0 : 0x00F0F0F0));
PEXT(32, colorReg, colorReg, R(temp1Reg));
return true;
}
MOV(32, R(temp1Reg), R(colorReg));
SHR(32, R(temp1Reg), Imm8(4));
AND(16, R(temp1Reg), Imm16(0xF << 0));
MOV(32, R(temp2Reg), R(colorReg));
SHR(32, R(temp2Reg), Imm8(8));
AND(16, R(temp2Reg), Imm16(0xF << 4));
OR(32, R(temp1Reg), R(temp2Reg));
if (keepAlpha) {
MOV(32, R(temp2Reg), R(colorReg));
SHR(32, R(temp2Reg), Imm8(28));
SHL(32, R(temp2Reg), Imm8(12));
}
SHR(32, R(colorReg), Imm8(12));
AND(16, R(colorReg), Imm16(0xF << 8));
OR(32, R(colorReg), R(temp1Reg));
if (keepAlpha)
OR(32, R(colorReg), R(temp2Reg));
return true;
}
bool PixelJitCache::Jit_ConvertFrom565(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg) {
Describe("ConvertFrom565");
if (cpu_info.bBMI2_fast) {
MOV(32, R(temp1Reg), Imm32(0x00F8FCF8));
PDEP(32, temp1Reg, colorReg, R(temp1Reg));
MOV(32, R(temp2Reg), Imm32(0x0000E61C));
PEXT(32, colorReg, colorReg, R(temp2Reg));
MOV(32, R(temp2Reg), Imm32(0x00070307));
PDEP(32, colorReg, colorReg, R(temp2Reg));
OR(32, R(colorReg), R(temp1Reg));
return true;
}
MOV(32, R(temp1Reg), R(colorReg));
AND(16, R(temp1Reg), Imm16(0x1F << 0));
SHL(32, R(temp1Reg), Imm8(3));
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0x1F << 11));
SHL(32, R(temp2Reg), Imm8(8));
OR(32, R(temp1Reg), R(temp2Reg));
OR(32, R(temp2Reg), R(temp1Reg));
SHL(32, R(temp2Reg), Imm8(1));
AND(16, R(colorReg), Imm16(0x3F << 5));
SHL(32, R(colorReg), Imm8(5));
OR(32, R(temp2Reg), R(colorReg));
OR(32, R(colorReg), R(temp1Reg));
SHR(32, R(temp2Reg), Imm8(6));
AND(32, R(temp2Reg), Imm32(0x00070307));
OR(32, R(colorReg), R(temp2Reg));
return true;
}
bool PixelJitCache::Jit_ConvertFrom5551(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha) {
Describe("ConvertFrom5551");
if (cpu_info.bBMI2_fast) {
MOV(32, R(temp1Reg), Imm32(keepAlpha ? 0x01F8F8F8 : 0x00F8F8F8));
PDEP(32, colorReg, colorReg, R(temp1Reg));
MOV(32, R(temp2Reg), R(colorReg));
SHR(32, R(temp2Reg), Imm8(5));
AND(32, R(temp2Reg), Imm32(0x00070707));
if (keepAlpha) {
SHL(32, R(colorReg), Imm8(7));
SAR(32, R(colorReg), Imm8(7));
}
OR(32, R(colorReg), R(temp2Reg));
return true;
}
MOV(32, R(temp1Reg), R(colorReg));
AND(16, R(temp1Reg), Imm16(0x1F << 0));
SHL(32, R(temp1Reg), Imm8(3));
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0x1F << 5));
SHL(32, R(temp2Reg), Imm8(6));
OR(32, R(temp1Reg), R(temp2Reg));
if (keepAlpha) {
AND(16, R(colorReg), Imm16(0x8000 | (0x1F << 10)));
SHL(32, R(colorReg), Imm8(16));
SAR(32, R(colorReg), Imm8(7));
} else {
AND(16, R(colorReg), Imm16(0x1F << 10));
SHL(32, R(colorReg), Imm8(9));
}
OR(32, R(colorReg), R(temp1Reg));
OR(32, R(temp1Reg), R(colorReg));
SHR(32, R(temp1Reg), Imm8(5));
AND(32, R(temp1Reg), Imm32(0x00070707));
OR(32, R(colorReg), R(temp1Reg));
return true;
}
bool PixelJitCache::Jit_ConvertFrom4444(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg temp1Reg, RegCache::Reg temp2Reg, bool keepAlpha) {
Describe("ConvertFrom4444");
if (cpu_info.bBMI2_fast) {
MOV(32, R(temp1Reg), Imm32(keepAlpha ? 0xF0F0F0F0 : 0x00F0F0F0));
PDEP(32, colorReg, colorReg, R(temp1Reg));
MOV(32, R(temp1Reg), R(colorReg));
SHR(32, R(temp1Reg), Imm8(4));
OR(32, R(colorReg), R(temp1Reg));
return true;
}
MOV(32, R(temp1Reg), R(colorReg));
AND(16, R(temp1Reg), Imm16(0xF << 0));
SHL(32, R(temp1Reg), Imm8(4));
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0xF << 4));
SHL(32, R(temp2Reg), Imm8(8));
OR(32, R(temp1Reg), R(temp2Reg));
MOV(32, R(temp2Reg), R(colorReg));
AND(16, R(temp2Reg), Imm16(0xF << 8));
SHL(32, R(temp2Reg), Imm8(12));
OR(32, R(temp1Reg), R(temp2Reg));
if (keepAlpha) {
AND(16, R(colorReg), Imm16(0xF << 12));
SHL(32, R(colorReg), Imm8(16));
OR(32, R(colorReg), R(temp1Reg));
OR(32, R(temp1Reg), R(colorReg));
} else {
MOV(32, R(colorReg), R(temp1Reg));
}
SHR(32, R(temp1Reg), Imm8(4));
OR(32, R(colorReg), R(temp1Reg));
return true;
}
};
#endif