Path: blob/master/Core/MIPS/LoongArch64/LoongArch64RegCache.cpp
3188 views
// Copyright (c) 2023- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#ifndef offsetof18#include <cstddef>19#endif2021#include "Common/CPUDetect.h"22#include "Core/MIPS/IR/IRInst.h"23#include "Core/MIPS/IR/IRAnalysis.h"24#include "Core/MIPS/LoongArch64/LoongArch64RegCache.h"25#include "Core/MIPS/JitCommon/JitState.h"26#include "Core/Reporting.h"2728using namespace LoongArch64Gen;29using namespace LoongArch64JitConstants;3031LoongArch64RegCache::LoongArch64RegCache(MIPSComp::JitOptions *jo)32: IRNativeRegCacheBase(jo) {33// The V(LSX) regs overlap F regs, so we just use one slot.34config_.totalNativeRegs = NUM_LAGPR + NUM_LAFPR;35// F regs are used for both FPU and Vec, so we don't need VREGs.36config_.mapUseVRegs = false;37config_.mapFPUSIMD = true;38}3940void LoongArch64RegCache::Init(LoongArch64Emitter *emitter) {41emit_ = emitter;42}4344void LoongArch64RegCache::SetupInitialRegs() {45IRNativeRegCacheBase::SetupInitialRegs();4647// Treat R_ZERO a bit specially, but it's basically static alloc too.48nrInitial_[R_ZERO].mipsReg = MIPS_REG_ZERO;49nrInitial_[R_ZERO].normalized32 = true;5051// Since we also have a fixed zero, mark it as a static allocation.52mrInitial_[MIPS_REG_ZERO].loc = MIPSLoc::REG_IMM;53mrInitial_[MIPS_REG_ZERO].nReg = R_ZERO;54mrInitial_[MIPS_REG_ZERO].imm = 0;55mrInitial_[MIPS_REG_ZERO].isStatic = true;56}5758const int *LoongArch64RegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const {59base = R0;6061if (type == MIPSLoc::REG) {62// R22-R26 (Also R27) are most suitable for static allocation. Those that are chosen for static allocation63static const int allocationOrder[] = {64R22, R23, R24, R25, R26, R27, R4, R5, R6, R7, R8, R9, R10, R11, R14, R15, R16, R17, R18, R19, R20,65};66static const int allocationOrderStaticAlloc[] = {67R4, R5, R6, R7, R8, R9, R10, R11, R14, R15, R16, R17, R18, R19, R20,68};6970if (jo_->useStaticAlloc) {71count = ARRAY_SIZE(allocationOrderStaticAlloc);72return allocationOrderStaticAlloc;73} else {74count = ARRAY_SIZE(allocationOrder);75return allocationOrder;76}77} else if (type == MIPSLoc::FREG) {78static const int allocationOrder[] = {79F24, F25, F26, F27, F28, F29, F30, F31,80F0, F1, F2, F3, F4, F5, F6, F7,81F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23,82};8384count = ARRAY_SIZE(allocationOrder);85return allocationOrder;86} else {87_assert_msg_(false, "Allocation order not yet implemented");88count = 0;89return nullptr;90}91}9293const LoongArch64RegCache::StaticAllocation *LoongArch64RegCache::GetStaticAllocations(int &count) const {94static const StaticAllocation allocs[] = {95{ MIPS_REG_SP, R22, MIPSLoc::REG, true },96{ MIPS_REG_V0, R23, MIPSLoc::REG },97{ MIPS_REG_V1, R24, MIPSLoc::REG },98{ MIPS_REG_A0, R25, MIPSLoc::REG },99{ MIPS_REG_A1, R26, MIPSLoc::REG },100{ MIPS_REG_RA, R27, MIPSLoc::REG },101};102103if (jo_->useStaticAlloc) {104count = ARRAY_SIZE(allocs);105return allocs;106}107return IRNativeRegCacheBase::GetStaticAllocations(count);108}109110void LoongArch64RegCache::EmitLoadStaticRegisters() {111int count;112const StaticAllocation *allocs = GetStaticAllocations(count);113for (int i = 0; i < count; i++) {114int offset = GetMipsRegOffset(allocs[i].mr);115if (allocs[i].pointerified && jo_->enablePointerify) {116emit_->LD_WU((LoongArch64Reg)allocs[i].nr, CTXREG, offset);117emit_->ADD_D((LoongArch64Reg)allocs[i].nr, (LoongArch64Reg)allocs[i].nr, MEMBASEREG);118} else {119emit_->LD_W((LoongArch64Reg)allocs[i].nr, CTXREG, offset);120}121}122}123124void LoongArch64RegCache::EmitSaveStaticRegisters() {125int count;126const StaticAllocation *allocs = GetStaticAllocations(count);127// This only needs to run once (by Asm) so checks don't need to be fast.128for (int i = 0; i < count; i++) {129int offset = GetMipsRegOffset(allocs[i].mr);130emit_->ST_W((LoongArch64Reg)allocs[i].nr, CTXREG, offset);131}132}133134void LoongArch64RegCache::FlushBeforeCall() {135// These registers are not preserved by function calls.136// They match between R0 and F0, conveniently.137for (int i = 4; i <= 20; ++i) {138FlushNativeReg(R0 + i);139}140for (int i = 0; i <= 23; ++i) {141FlushNativeReg(F0 + i);142}143}144145bool LoongArch64RegCache::IsNormalized32(IRReg mipsReg) {146_dbg_assert_(IsValidGPR(mipsReg));147if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {148return nr[mr[mipsReg].nReg].normalized32;149}150return false;151}152153LoongArch64Gen::LoongArch64Reg LoongArch64RegCache::Normalize32(IRReg mipsReg, LoongArch64Gen::LoongArch64Reg destReg) {154_dbg_assert_(IsValidGPR(mipsReg));155_dbg_assert_(destReg == INVALID_REG || (destReg > R0 && destReg <= R31));156157LoongArch64Reg reg = (LoongArch64Reg)mr[mipsReg].nReg;158159switch (mr[mipsReg].loc) {160case MIPSLoc::IMM:161case MIPSLoc::MEM:162_assert_msg_(false, "Cannot normalize an imm or mem");163return INVALID_REG;164165case MIPSLoc::REG:166case MIPSLoc::REG_IMM:167if (!nr[mr[mipsReg].nReg].normalized32) {168if (destReg == INVALID_REG) {169emit_->ADDI_W((LoongArch64Reg)mr[mipsReg].nReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);170nr[mr[mipsReg].nReg].normalized32 = true;171nr[mr[mipsReg].nReg].pointerified = false;172} else {173emit_->ADDI_W(destReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);174}175} else if (destReg != INVALID_REG) {176emit_->ADDI_W(destReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);177}178break;179180case MIPSLoc::REG_AS_PTR:181_dbg_assert_(nr[mr[mipsReg].nReg].normalized32 == false);182if (destReg == INVALID_REG) {183// If we can pointerify, ADDI_W will be enough.184if (!jo_->enablePointerify)185AdjustNativeRegAsPtr(mr[mipsReg].nReg, false);186emit_->ADDI_W((LoongArch64Reg)mr[mipsReg].nReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);187mr[mipsReg].loc = MIPSLoc::REG;188nr[mr[mipsReg].nReg].normalized32 = true;189nr[mr[mipsReg].nReg].pointerified = false;190} else if (!jo_->enablePointerify) {191emit_->SUB_D(destReg, (LoongArch64Reg)mr[mipsReg].nReg, MEMBASEREG);192emit_->ADDI_W(destReg, destReg, 0);193} else {194emit_->ADDI_W(destReg, (LoongArch64Reg)mr[mipsReg].nReg, 0);195}196break;197198default:199_assert_msg_(false, "Should not normalize32 floats");200break;201}202203return destReg == INVALID_REG ? reg : destReg;204}205206LoongArch64Reg LoongArch64RegCache::TryMapTempImm(IRReg r) {207_dbg_assert_(IsValidGPR(r));208// If already mapped, no need for a temporary.209if (IsGPRMapped(r)) {210return R(r);211}212213if (mr[r].loc == MIPSLoc::IMM) {214if (mr[r].imm == 0) {215return R_ZERO;216}217218// Try our luck - check for an exact match in another LoongArch reg.219for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; ++i) {220if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == mr[r].imm) {221// Awesome, let's just use this reg.222return (LoongArch64Reg)mr[i].nReg;223}224}225}226227return INVALID_REG;228}229230LoongArch64Reg LoongArch64RegCache::GetAndLockTempGPR() {231LoongArch64Reg reg = (LoongArch64Reg)AllocateReg(MIPSLoc::REG, MIPSMap::INIT);232if (reg != INVALID_REG) {233nr[reg].tempLockIRIndex = irIndex_;234}235return reg;236}237238LoongArch64Reg LoongArch64RegCache::MapWithFPRTemp(const IRInst &inst) {239return (LoongArch64Reg)MapWithTemp(inst, MIPSLoc::FREG);240}241242LoongArch64Reg LoongArch64RegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) {243_dbg_assert_(IsValidGPR(mipsReg));244245// Okay, not mapped, so we need to allocate an LA register.246IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 1, mapFlags);247return (LoongArch64Reg)nreg;248}249250LoongArch64Reg LoongArch64RegCache::MapGPRAsPointer(IRReg reg) {251return (LoongArch64Reg)MapNativeRegAsPointer(reg);252}253254LoongArch64Reg LoongArch64RegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) {255_dbg_assert_(IsValidFPR(mipsReg));256_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::MEM || mr[mipsReg + 32].loc == MIPSLoc::FREG);257258IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, mipsReg + 32, 1, mapFlags);259if (nreg != -1)260return (LoongArch64Reg)nreg;261return INVALID_REG;262}263264LoongArch64Reg LoongArch64RegCache::MapVec4(IRReg first, MIPSMap mapFlags) {265_dbg_assert_(IsValidFPR(first));266_dbg_assert_((first & 3) == 0);267_dbg_assert_(mr[first + 32].loc == MIPSLoc::MEM || mr[first + 32].loc == MIPSLoc::FREG);268269IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, first + 32, 4, mapFlags);270if (nreg != -1)271return EncodeRegToV((LoongArch64Reg)nreg);272return INVALID_REG;273}274275void LoongArch64RegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {276LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);277_assert_(r >= R0 && r <= R31);278if (state) {279#ifdef MASKED_PSP_MEMORY280// This destroys the value...281_dbg_assert_(!nr[nreg].isDirty);282emit_->SLLI_W(r, r, 2);283emit_->SRLI_W(r, r, 2);284emit_->ADD_D(r, r, MEMBASEREG);285#else286// Clear the top bits to be safe.287emit_->SLLI_D(r, r, 32);288emit_->SRLI_D(r, r, 32);289emit_->ADD_D(r, r, MEMBASEREG);290#endif291nr[nreg].normalized32 = false;292} else {293#ifdef MASKED_PSP_MEMORY294_dbg_assert_(!nr[nreg].isDirty);295#endif296emit_->SUB_D(r, r, MEMBASEREG);297nr[nreg].normalized32 = false;298}299}300301bool LoongArch64RegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {302// No special flags, skip the check for a little speed.303return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags, lanes);304}305306void LoongArch64RegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {307LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);308_dbg_assert_(r > R0);309_dbg_assert_(first != MIPS_REG_ZERO);310if (r <= R31) {311_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));312if (lanes == 1)313emit_->LD_W(r, CTXREG, GetMipsRegOffset(first));314else if (lanes == 2)315emit_->LD_D(r, CTXREG, GetMipsRegOffset(first));316else317_assert_(false);318nr[nreg].normalized32 = true;319} else {320_dbg_assert_(r >= F0 && r <= F31);321_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc);322if (lanes == 1)323emit_->FLD_S(r, CTXREG, GetMipsRegOffset(first));324else if (lanes == 2)325emit_->FLD_D(r, CTXREG, GetMipsRegOffset(first));326else if (lanes == 4)327emit_->VLD(EncodeRegToV(r), CTXREG, GetMipsRegOffset(first));328else329_assert_(false);330}331}332333void LoongArch64RegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {334LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);335_dbg_assert_(r > R0);336_dbg_assert_(first != MIPS_REG_ZERO);337if (r <= R31) {338_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));339_assert_(mr[first].loc == MIPSLoc::REG || mr[first].loc == MIPSLoc::REG_IMM);340if (lanes == 1)341emit_->ST_W(r, CTXREG, GetMipsRegOffset(first));342else if (lanes == 2)343emit_->ST_D(r, CTXREG, GetMipsRegOffset(first));344else345_assert_(false);346} else {347_dbg_assert_(r >= F0 && r <= F31);348_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc);349if (lanes == 1)350emit_->FST_S(r, CTXREG, GetMipsRegOffset(first));351else if (lanes == 2)352emit_->FST_D(r, CTXREG, GetMipsRegOffset(first));353else if (lanes == 4)354emit_->VST(EncodeRegToV(r), CTXREG, GetMipsRegOffset(first));355else356_assert_(false);357}358}359360void LoongArch64RegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) {361LoongArch64Reg r = (LoongArch64Reg)(R0 + nreg);362if (r == R_ZERO && imm == 0)363return;364_dbg_assert_(r > R0 && r <= R31);365emit_->LI(r, (int32_t)imm);366367// We always use 32-bit immediates, so this is normalized now.368nr[nreg].normalized32 = true;369}370371void LoongArch64RegCache::StoreRegValue(IRReg mreg, uint32_t imm) {372_assert_(IsValidGPRNoZero(mreg));373// Try to optimize using a different reg.374LoongArch64Reg storeReg = INVALID_REG;375376// Zero is super easy.377if (imm == 0) {378storeReg = R_ZERO;379} else {380// Could we get lucky? Check for an exact match in another lareg.381for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; ++i) {382if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == imm) {383// Awesome, let's just store this reg.384storeReg = (LoongArch64Reg)mr[i].nReg;385break;386}387}388389if (storeReg == INVALID_REG) {390emit_->LI(SCRATCH1, imm);391storeReg = SCRATCH1;392}393}394395emit_->ST_W(storeReg, CTXREG, GetMipsRegOffset(mreg));396}397398bool LoongArch64RegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {399bool allowed = !mr[nr[nreg].mipsReg].isStatic;400// There's currently no support for non-FREGs here.401allowed = allowed && type == MIPSLoc::FREG;402403if (dest == -1)404dest = nreg;405406if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) {407// Alright, changing lane count (possibly including lane position.)408IRReg oldfirst = nr[nreg].mipsReg;409int oldlanes = 0;410while (mr[oldfirst + oldlanes].nReg == nreg)411oldlanes++;412_assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch");413_assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?");414415if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes))416return true;417if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes))418return true;419}420421return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags);422}423424bool LoongArch64RegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) {425IRReg oldfirst = nr[nreg].mipsReg;426427// Is it worth preserving any of the old regs?428int numKept = 0;429for (int i = 0; i < oldlanes; ++i) {430// Skip whichever one this is extracting.431if (oldfirst + i == first)432continue;433// If 0 isn't being transfered, easy to keep in its original reg.434if (i == 0 && dest != nreg) {435numKept++;436continue;437}438439IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);440if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) {441// If there's one free, use it. Don't modify nreg, though.442emit_->VREPLVEI_W(FromNativeReg(freeReg), FromNativeReg(nreg), i);443444// Update accounting.445nr[freeReg].isDirty = nr[nreg].isDirty;446nr[freeReg].mipsReg = oldfirst + i;447mr[oldfirst + i].lane = -1;448mr[oldfirst + i].nReg = freeReg;449numKept++;450}451}452453// Unless all other lanes were kept, store.454if (nr[nreg].isDirty && numKept < oldlanes - 1) {455StoreNativeReg(nreg, oldfirst, oldlanes);456// Set false even for regs that were split out, since they were flushed too.457for (int i = 0; i < oldlanes; ++i) {458if (mr[oldfirst + i].nReg != -1)459nr[mr[oldfirst + i].nReg].isDirty = false;460}461}462463// Next, shuffle the desired element into first place.464if (mr[first].lane > 0) {465emit_->VREPLVEI_W(FromNativeReg(dest), FromNativeReg(nreg), mr[first].lane);466} else if (mr[first].lane <= 0 && dest != nreg) {467emit_->VREPLVEI_W(FromNativeReg(dest), FromNativeReg(nreg), 0);468}469470// Now update accounting.471for (int i = 0; i < oldlanes; ++i) {472auto &mreg = mr[oldfirst + i];473if (oldfirst + i == first) {474mreg.lane = -1;475mreg.nReg = dest;476} else if (mreg.nReg == nreg && i == 0 && nreg != dest) {477// Still in the same register, but no longer a vec.478mreg.lane = -1;479} else if (mreg.nReg == nreg) {480// No longer in a register.481mreg.nReg = -1;482mreg.lane = -1;483mreg.loc = MIPSLoc::MEM;484}485}486487if (dest != nreg) {488nr[dest].isDirty = nr[nreg].isDirty;489if (oldfirst == first) {490nr[nreg].mipsReg = -1;491nr[nreg].isDirty = false;492}493}494nr[dest].mipsReg = first;495496return true;497}498499bool LoongArch64RegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) {500LoongArch64Reg destReg = FromNativeReg(dest);501LoongArch64Reg cur[4]{};502int numInRegs = 0;503u8 blendMask = 0;504for (int i = 0; i < lanes; ++i) {505if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) {506// Can't do it, either double mapped or overlapping vec.507return false;508}509510if (mr[first + i].nReg == -1) {511cur[i] = INVALID_REG;512blendMask |= 1 << i;513} else {514cur[i] = FromNativeReg(mr[first + i].nReg);515numInRegs++;516}517}518519// Shouldn't happen, this should only get called to transfer one in a reg.520if (numInRegs == 0)521return false;522523// If everything's currently in a reg, move it into this reg.524if (lanes == 4) {525// Go with an exhaustive approach, only 15 possibilities...526if (blendMask == 0) {527// y = yw##, x = xz##, dest = xyzw.528emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));529emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));530emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));531} else if (blendMask == 0b0001) {532// y = yw##, w = x###, w = xz##, dest = xyzw.533emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));534emit_->FLD_S( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 0));535emit_->VEXTRINS_W(EncodeRegToV(cur[3]), EncodeRegToV(SCRATCHF1), 0);536emit_->VILVL_W(EncodeRegToV(cur[3]), EncodeRegToV(cur[2]), EncodeRegToV(cur[3]));537emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[3]));538} else if (blendMask == 0b0010) {539// x = xz##, z = y###, z = yw##, dest = xyzw.540emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));541emit_->FLD_S( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 1));542emit_->VEXTRINS_W(EncodeRegToV(cur[2]), EncodeRegToV(SCRATCHF1), 0);543emit_->VILVL_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[3]), EncodeRegToV(cur[2]));544emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));545} else if (blendMask == 0b0011 && (first & 1) == 0) {546// z = zw##, w = xy##, dest = xyzw. Mixed lane sizes.547emit_->VILVL_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[3]), EncodeRegToV(cur[2]));548emit_->FLD_D( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 0));549emit_->VEXTRINS_D(EncodeRegToV(cur[3]), EncodeRegToV(SCRATCHF1), 0);550emit_->VILVL_D(EncodeRegToV(destReg), EncodeRegToV(cur[2]), EncodeRegToV(cur[3]));551} else if (blendMask == 0b0100) {552// y = yw##, w = z###, x = xz##, dest = xyzw.553emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));554emit_->FLD_S( SCRATCHF1, CTXREG, GetMipsRegOffset(first + 2));555emit_->VEXTRINS_W(EncodeRegToV(cur[3]), EncodeRegToV(SCRATCHF1), 0);556emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[3]), EncodeRegToV(cur[0]));557emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));558} else if (blendMask == 0b0101 && (first & 3) == 0) {559// y = yw##, w=x#z#, w = xz##, dest = xyzw.560emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[3]), EncodeRegToV(cur[1]));561emit_->VLD(EncodeRegToV(cur[3]), CTXREG, GetMipsRegOffset(first));562emit_->VPICKEV_W(EncodeRegToV(cur[3]), EncodeRegToV(cur[3]), EncodeRegToV(cur[3]));563emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[3]));564} else if (blendMask == 0b0110 && (first & 3) == 0) {565if (destReg == cur[0]) {566// w = wx##, dest = #yz#, dest = xyz#, dest = xyzw.567emit_->VILVL_W(EncodeRegToV(cur[3]), EncodeRegToV(cur[0]), EncodeRegToV(cur[3]));568emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));569emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[3]), 1);570emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[3]), (3 << 4));571} else {572// Assumes destReg may equal cur[3].573// x = xw##, dest = #yz#, dest = xyz#, dest = xyzw.574emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[3]), EncodeRegToV(cur[0]));575emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));576emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[0]), 0);577emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[0]), (3 << 4 | 1));578}579} else if (blendMask == 0b0111 && (first & 3) == 0 && destReg != cur[3]) {580// dest = xyz#, dest = xyzw.581emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));582emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[3]), (3 << 4));583} else if (blendMask == 0b1000) {584// x = xz##, z = w###, y = yw##, dest = xyzw.585emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));586emit_->FLD_S(SCRATCHF1, CTXREG, GetMipsRegOffset(first + 3));587emit_->VEXTRINS_W(EncodeRegToV(cur[2]), EncodeRegToV(SCRATCHF1), 0);588emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[2]), EncodeRegToV(cur[1]));589emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));590} else if (blendMask == 0b1001 && (first & 3) == 0) {591if (destReg == cur[1]) {592// w = zy##, dest = x##w, dest = xy#w, dest = xyzw.593emit_->VILVL_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[1]), EncodeRegToV(cur[2]));594emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));595emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), (1 << 4 | 1));596emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), (2 << 4));597} else {598// Assumes destReg may equal cur[2].599// y = yz##, dest = x##w, dest = xy#w, dest = xyzw.600emit_->VILVL_W(EncodeRegToV(cur[1]), EncodeRegToV(cur[2]), EncodeRegToV(cur[1]));601emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));602emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), (1 << 4));603emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), (2 << 4 | 1));604}605} else if (blendMask == 0b1010 && (first & 3) == 0) {606// x = xz##, z = #y#w, z=yw##, dest = xyzw.607emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));608emit_->VLD(EncodeRegToV(cur[2]), CTXREG, GetMipsRegOffset(first));609emit_->VPICKOD_W(EncodeRegToV(cur[2]), EncodeRegToV(cur[2]), EncodeRegToV(cur[2]));610emit_->VILVL_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), EncodeRegToV(cur[0]));611} else if (blendMask == 0b1011 && (first & 3) == 0 && destReg != cur[2]) {612// dest = xy#w, dest = xyzw.613emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));614emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[2]), (2 << 4));615} else if (blendMask == 0b1100 && (first & 1) == 0) {616// x = xy##, y = zw##, dest = xyzw. Mixed lane sizes.617emit_->VILVL_W(EncodeRegToV(cur[0]), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));618emit_->FLD_D(SCRATCHF1, CTXREG, GetMipsRegOffset(first + 2));619emit_->VEXTRINS_D(EncodeRegToV(cur[1]), EncodeRegToV(SCRATCHF1), 0);620emit_->VILVL_D(EncodeRegToV(destReg), EncodeRegToV(cur[1]), EncodeRegToV(cur[0]));621} else if (blendMask == 0b1101 && (first & 3) == 0 && destReg != cur[1]) {622// dest = x#zw, dest = xyzw.623emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));624emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[1]), (1 << 4));625} else if (blendMask == 0b1110 && (first & 3) == 0 && destReg != cur[0]) {626// dest = #yzw, dest = xyzw.627emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));628emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(cur[0]), 0);629} else if (blendMask == 0b1110 && (first & 3) == 0) {630// If dest == cur[0] (which may be common), we need a temp...631IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);632// Very unfortunate.633if (freeReg == INVALID_REG)634return false;635636// free = x###, dest = #yzw, dest = xyzw.637emit_->VREPLVEI_W(EncodeRegToV(FromNativeReg(freeReg)), EncodeRegToV(cur[0]), 0);638emit_->VLD(EncodeRegToV(destReg), CTXREG, GetMipsRegOffset(first));639emit_->VEXTRINS_W(EncodeRegToV(destReg), EncodeRegToV(FromNativeReg(freeReg)), 0);640} else {641return false;642}643} else {644return false;645}646647mr[first].lane = 0;648for (int i = 0; i < lanes; ++i) {649if (mr[first + i].nReg != -1) {650// If this was dirty, the combined reg is now dirty.651if (nr[mr[first + i].nReg].isDirty)652nr[dest].isDirty = true;653654// Throw away the other register we're no longer using.655if (i != 0)656DiscardNativeReg(mr[first + i].nReg);657}658659// And set it as using the new one.660mr[first + i].lane = i;661mr[first + i].loc = MIPSLoc::FREG;662mr[first + i].nReg = dest;663}664665if (dest != nreg) {666nr[dest].mipsReg = first;667nr[nreg].mipsReg = -1;668nr[nreg].isDirty = false;669}670671return true;672}673674LoongArch64Reg LoongArch64RegCache::R(IRReg mipsReg) {675_dbg_assert_(IsValidGPR(mipsReg));676_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM);677if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {678return (LoongArch64Reg)mr[mipsReg].nReg;679} else {680ERROR_LOG_REPORT(Log::JIT, "Reg %i not in LoongArch64 reg", mipsReg);681return INVALID_REG; // BAAAD682}683}684685LoongArch64Reg LoongArch64RegCache::RPtr(IRReg mipsReg) {686_dbg_assert_(IsValidGPR(mipsReg));687_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM || mr[mipsReg].loc == MIPSLoc::REG_AS_PTR);688if (mr[mipsReg].loc == MIPSLoc::REG_AS_PTR) {689return (LoongArch64Reg)mr[mipsReg].nReg;690} else if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {691int la = mr[mipsReg].nReg;692_dbg_assert_(nr[la].pointerified);693if (nr[la].pointerified) {694return (LoongArch64Reg)mr[mipsReg].nReg;695} else {696ERROR_LOG(Log::JIT, "Tried to use a non-pointer register as a pointer");697return INVALID_REG;698}699} else {700ERROR_LOG_REPORT(Log::JIT, "Reg %i not in LoongArch64 reg", mipsReg);701return INVALID_REG; // BAAAD702}703}704705LoongArch64Reg LoongArch64RegCache::F(IRReg mipsReg) {706_dbg_assert_(IsValidFPR(mipsReg));707_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::FREG);708if (mr[mipsReg + 32].loc == MIPSLoc::FREG) {709return (LoongArch64Reg)mr[mipsReg + 32].nReg;710} else {711ERROR_LOG_REPORT(Log::JIT, "Reg %i not in LoongArch64 reg", mipsReg);712return INVALID_REG; // BAAAD713}714}715716LoongArch64Reg LoongArch64RegCache::V(IRReg mipsReg) {717return EncodeRegToV(F(mipsReg));718}719720721