Path: blob/master/Core/MIPS/LoongArch64/LoongArch64Jit.cpp
3188 views
// Copyright (c) 2025- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <cstddef>18#include "Core/MemMap.h"19#include "Core/MIPS/MIPSTables.h"20#include "Core/MIPS/LoongArch64/LoongArch64Jit.h"21#include "Core/MIPS/LoongArch64/LoongArch64RegCache.h"2223#include <algorithm>24// for std::min2526namespace MIPSComp {2728using namespace LoongArch64Gen;29using namespace LoongArch64JitConstants;3031// Needs space for a LI and J which might both be 32-bit offsets.32static constexpr int MIN_BLOCK_NORMAL_LEN = 16;33static constexpr int MIN_BLOCK_EXIT_LEN = 8;3435LoongArch64JitBackend::LoongArch64JitBackend(JitOptions &jitopt, IRBlockCache &blocks)36: IRNativeBackend(blocks), jo(jitopt), regs_(&jo) {37// Automatically disable incompatible options.38if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {39jo.enablePointerify = false;40}41jo.optimizeForInterpreter = false;4243// Since we store the offset, this is as big as it can be.44// We could shift off one bit to double it, would need to change LoongArch64Asm.45AllocCodeSpace(1024 * 1024 * 16);4647regs_.Init(this);48}4950LoongArch64JitBackend::~LoongArch64JitBackend(){51}5253static void NoBlockExits() {54_assert_msg_(false, "Never exited block, invalid IR?");55}5657bool LoongArch64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num) {58if (GetSpaceLeft() < 0x800)59return false;6061IRBlock *block = irBlockCache->GetBlock(block_num);62BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumIRInstructions() * 32));6364u32 startPC = block->GetOriginalStart();65bool wroteCheckedOffset = false;66if (jo.enableBlocklink && !jo.useBackJump) {67SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));68wroteCheckedOffset = true;6970WriteDebugPC(startPC);7172FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO);73LI(SCRATCH1, startPC);74QuickJ(R_RA, outerLoopPCInSCRATCH1_);75SetJumpTarget(normalEntry);76}7778// Don't worry, the codespace isn't large enough to overflow offsets.79const u8 *blockStart = GetCodePointer();80block->SetNativeOffset((int)GetOffset(blockStart));81compilingBlockNum_ = block_num;8283regs_.Start(irBlockCache, block_num);8485std::vector<const u8 *> addresses;86const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);87for (int i = 0; i < block->GetNumIRInstructions(); ++i) {88const IRInst &inst = instructions[i];89regs_.SetIRIndex(i);90addresses.push_back(GetCodePtr());9192CompileIRInst(inst);9394if (jo.Disabled(JitDisable::REGALLOC_GPR) || jo.Disabled(JitDisable::REGALLOC_FPR))95regs_.FlushAll(jo.Disabled(JitDisable::REGALLOC_GPR), jo.Disabled(JitDisable::REGALLOC_FPR));9697// Safety check, in case we get a bunch of really large jit ops without a lot of branching.98if (GetSpaceLeft() < 0x800) {99compilingBlockNum_ = -1;100return false;101}102}103104// We should've written an exit above. If we didn't, bad things will happen.105// Only check if debug stats are enabled - needlessly wastes jit space.106if (DebugStatsEnabled()) {107QuickCallFunction(&NoBlockExits, SCRATCH2);108QuickJ(R_RA, hooks_.crashHandler);109}110111int len = (int)GetOffset(GetCodePointer()) - block->GetNativeOffset();112if (len < MIN_BLOCK_NORMAL_LEN) {113// We need at least 16 bytes to invalidate blocks with, but larger doesn't need to align.114ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - len);115}116117if (!wroteCheckedOffset) {118// Always record this, even if block link disabled - it's used for size calc.119SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));120}121122if (jo.enableBlocklink && jo.useBackJump) {123WriteDebugPC(startPC);124125// Most blocks shouldn't be >= 256KB, so usually we can just BGE.126if (BranchInRange(blockStart)) {127BGE(DOWNCOUNTREG, R_ZERO, blockStart);128} else {129FixupBranch skip = BLT(DOWNCOUNTREG, R_ZERO);130B(blockStart);131SetJumpTarget(skip);132}133LI(SCRATCH1, startPC);134QuickJ(R_RA, outerLoopPCInSCRATCH1_);135}136137if (logBlocks_ > 0) {138--logBlocks_;139140std::map<const u8 *, int> addressesLookup;141for (int i = 0; i < (int)addresses.size(); ++i)142addressesLookup[addresses[i]] = i;143144INFO_LOG(Log::JIT, "=============== LoongArch64 (%08x, %d bytes) ===============", startPC, len);145const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);146for (const u8 *p = blockStart; p < GetCodePointer(); ) {147auto it = addressesLookup.find(p);148if (it != addressesLookup.end()) {149const IRInst &inst = instructions[it->second];150151char temp[512];152DisassembleIR(temp, sizeof(temp), inst);153INFO_LOG(Log::JIT, "IR: #%d %s", it->second, temp);154}155156auto next = std::next(it);157const u8 *nextp = next == addressesLookup.end() ? GetCodePointer() : next->first;158159#if PPSSPP_ARCH(LOONGARCH64)160auto lines = DisassembleLA64(p, (int)(nextp - p));161for (const auto &line : lines)162INFO_LOG(Log::JIT, "LA: %s", line.c_str());163#endif164p = nextp;165}166}167168EndWrite();169FlushIcache();170compilingBlockNum_ = -1;171172return true;173}174175void LoongArch64JitBackend::WriteConstExit(uint32_t pc) {176int block_num = blocks_.GetBlockNumberFromStartAddress(pc);177const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);178179int exitStart = (int)GetOffset(GetCodePointer());180if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) {181QuickJ(SCRATCH1, GetBasePtr() + nativeBlock->checkedOffset);182} else {183LI(SCRATCH1, pc);184QuickJ(R_RA, dispatcherPCInSCRATCH1_);185}186187if (jo.enableBlocklink) {188// In case of compression or early link, make sure it's large enough.189int len = (int)GetOffset(GetCodePointer()) - exitStart;190if (len < MIN_BLOCK_EXIT_LEN) {191ReserveCodeSpace(MIN_BLOCK_EXIT_LEN - len);192len = MIN_BLOCK_EXIT_LEN;193}194195AddLinkableExit(compilingBlockNum_, pc, exitStart, len);196}197}198199void LoongArch64JitBackend::OverwriteExit(int srcOffset, int len, int block_num) {200_dbg_assert_(len >= MIN_BLOCK_EXIT_LEN);201202const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);203if (nativeBlock) {204u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + srcOffset;205if (PlatformIsWXExclusive()) {206ProtectMemoryPages(writable, len, MEM_PROT_READ | MEM_PROT_WRITE);207}208209LoongArch64Emitter emitter(GetBasePtr() + srcOffset, writable);210emitter.QuickJ(SCRATCH1, GetBasePtr() + nativeBlock->checkedOffset);211int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);212if (bytesWritten < len)213emitter.ReserveCodeSpace(len - bytesWritten);214emitter.FlushIcache();215216if (PlatformIsWXExclusive()) {217ProtectMemoryPages(writable, 16, MEM_PROT_READ | MEM_PROT_EXEC);218}219}220}221222void LoongArch64JitBackend::CompIR_Generic(IRInst inst) {223// If we got here, we're going the slow way.224uint64_t value;225memcpy(&value, &inst, sizeof(inst));226227FlushAll();228LI(R4, value);229SaveStaticRegisters();230WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);231QuickCallFunction(&DoIRInst, SCRATCH2);232WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);233LoadStaticRegisters();234235// We only need to check the return value if it's a potential exit.236if ((GetIRMeta(inst.op)->flags & IRFLAG_EXIT) != 0) {237MOVE(SCRATCH1, R4);238239if (BranchZeroInRange(dispatcherPCInSCRATCH1_)) {240BNEZ(R4, dispatcherPCInSCRATCH1_);241} else {242FixupBranch skip = BEQZ(R4);243QuickJ(R_RA, dispatcherPCInSCRATCH1_);244SetJumpTarget(skip);245}246}247}248249void LoongArch64JitBackend::CompIR_Interpret(IRInst inst) {250MIPSOpcode op(inst.constant);251252// IR protects us against this being a branching instruction (well, hopefully.)253FlushAll();254SaveStaticRegisters();255WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);256if (DebugStatsEnabled()) {257LI(R4, MIPSGetName(op));258QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2);259}260LI(R4, (int32_t)inst.constant);261QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2);262WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);263LoadStaticRegisters();264}265266void LoongArch64JitBackend::FlushAll() {267regs_.FlushAll();268}269270bool LoongArch64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {271// Used in disassembly viewer.272// Don't use spaces; profilers get confused or truncate them.273if (ptr == dispatcherPCInSCRATCH1_) {274name = "dispatcherPCInSCRATCH1";275} else if (ptr == outerLoopPCInSCRATCH1_) {276name = "outerLoopPCInSCRATCH1";277} else if (ptr == dispatcherNoCheck_) {278name = "dispatcherNoCheck";279} else if (ptr == saveStaticRegisters_) {280name = "saveStaticRegisters";281} else if (ptr == loadStaticRegisters_) {282name = "loadStaticRegisters";283} else if (ptr == applyRoundingMode_) {284name = "applyRoundingMode";285} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {286name = "fixedCode";287} else {288return IRNativeBackend::DescribeCodePtr(ptr, name);289}290return true;291}292293void LoongArch64JitBackend::ClearAllBlocks() {294ClearCodeSpace(jitStartOffset_);295FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_);296EraseAllLinks(-1);297}298299void LoongArch64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {300IRBlock *block = irBlockCache->GetBlock(block_num);301int offset = block->GetNativeOffset();302u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;303304// Overwrite the block with a jump to compile it again.305u32 pc = block->GetOriginalStart();306if (pc != 0) {307// Hopefully we always have at least 16 bytes, which should be all we need.308if (PlatformIsWXExclusive()) {309ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_WRITE);310}311312LoongArch64Emitter emitter(GetBasePtr() + offset, writable);313// We sign extend to ensure it will fit in 32-bit and 8 bytes LI.314// TODO: May need to change if dispatcher doesn't reload PC.315emitter.LI(SCRATCH1, (int32_t)pc);316emitter.QuickJ(R_RA, dispatcherPCInSCRATCH1_);317int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);318if (bytesWritten < MIN_BLOCK_NORMAL_LEN)319emitter.ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - bytesWritten);320emitter.FlushIcache();321322if (PlatformIsWXExclusive()) {323ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_EXEC);324}325}326327EraseAllLinks(block_num);328}329330void LoongArch64JitBackend::RestoreRoundingMode(bool force) {331MOVGR2FCSR(FCSR3, R_ZERO); // 0 = RNE - Round Nearest Even332}333334void LoongArch64JitBackend::ApplyRoundingMode(bool force) {335QuickCallFunction(applyRoundingMode_);336}337338void LoongArch64JitBackend::MovFromPC(LoongArch64Reg r) {339LD_WU(r, CTXREG, offsetof(MIPSState, pc));340}341342void LoongArch64JitBackend::MovToPC(LoongArch64Reg r) {343ST_W(r, CTXREG, offsetof(MIPSState, pc));344}345346void LoongArch64JitBackend::WriteDebugPC(uint32_t pc) {347if (hooks_.profilerPC) {348int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();349LI(SCRATCH2, hooks_.profilerPC);350LI(R_RA, (int32_t)pc);351ST_W(R_RA, SCRATCH2, 0);352}353}354355void LoongArch64JitBackend::WriteDebugPC(LoongArch64Reg r) {356if (hooks_.profilerPC) {357int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();358LI(SCRATCH2, hooks_.profilerPC);359ST_W(r, SCRATCH2, 0);360}361}362363void LoongArch64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {364if (hooks_.profilerPC) {365int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr();366LI(SCRATCH2, hooks_.profilerStatus);367LI(R_RA, (int)status);368ST_W(R_RA, SCRATCH2, 0);369}370}371372void LoongArch64JitBackend::SaveStaticRegisters() {373if (jo.useStaticAlloc) {374QuickCallFunction(saveStaticRegisters_);375} else {376// Inline the single operation377ST_W(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));378}379}380381void LoongArch64JitBackend::LoadStaticRegisters() {382if (jo.useStaticAlloc) {383QuickCallFunction(loadStaticRegisters_);384} else {385LD_W(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));386}387}388389void LoongArch64JitBackend::NormalizeSrc1(IRInst inst, LoongArch64Reg *reg, LoongArch64Reg tempReg, bool allowOverlap) {390*reg = NormalizeR(inst.src1, allowOverlap ? 0 : inst.dest, tempReg);391}392393void LoongArch64JitBackend::NormalizeSrc12(IRInst inst, LoongArch64Reg *lhs, LoongArch64Reg *rhs, LoongArch64Reg lhsTempReg, LoongArch64Reg rhsTempReg, bool allowOverlap) {394*lhs = NormalizeR(inst.src1, allowOverlap ? 0 : inst.dest, lhsTempReg);395*rhs = NormalizeR(inst.src2, allowOverlap ? 0 : inst.dest, rhsTempReg);396}397398LoongArch64Reg LoongArch64JitBackend::NormalizeR(IRReg rs, IRReg rd, LoongArch64Reg tempReg) {399// For proper compare, we must sign extend so they both match or don't match.400// But don't change pointers, in case one is SP (happens in LittleBigPlanet.)401if (regs_.IsGPRImm(rs) && regs_.GetGPRImm(rs) == 0) {402return R_ZERO;403} else if (regs_.IsGPRMappedAsPointer(rs) || rs == rd) {404return regs_.Normalize32(rs, tempReg);405} else {406return regs_.Normalize32(rs);407}408}409410} // namespace MIPSComp411412