Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/LoongArch64/LoongArch64CompFPU.cpp
3188 views
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "Core/MemMap.h"
19
#include "Core/MIPS/LoongArch64/LoongArch64Jit.h"
20
#include "Core/MIPS/LoongArch64/LoongArch64RegCache.h"
21
22
// This file contains compilation for floating point related instructions.
23
//
24
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
25
// Currently known non working ones should have DISABLE. No flags because that's in IR already.
26
27
// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }
28
#define CONDITIONAL_DISABLE {}
29
#define DISABLE { CompIR_Generic(inst); return; }
30
#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }
31
32
namespace MIPSComp {
33
34
using namespace LoongArch64Gen;
35
using namespace LoongArch64JitConstants;
36
37
void LoongArch64JitBackend::CompIR_FArith(IRInst inst) {
38
CONDITIONAL_DISABLE;
39
40
switch (inst.op) {
41
case IROp::FAdd:
42
regs_.Map(inst);
43
FADD_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
44
break;
45
46
case IROp::FSub:
47
regs_.Map(inst);
48
FSUB_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
49
break;
50
51
case IROp::FMul:
52
regs_.Map(inst);
53
// We'll assume everyone will make it such that 0 * infinity = NAN properly.
54
// See blame on this comment if that proves untrue.
55
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
56
break;
57
58
case IROp::FDiv:
59
regs_.Map(inst);
60
FDIV_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
61
break;
62
63
case IROp::FSqrt:
64
regs_.Map(inst);
65
FSQRT_S(regs_.F(inst.dest), regs_.F(inst.src1));
66
break;
67
68
case IROp::FNeg:
69
regs_.Map(inst);
70
FNEG_S(regs_.F(inst.dest), regs_.F(inst.src1));
71
break;
72
73
default:
74
INVALIDOP;
75
break;
76
}
77
}
78
79
void LoongArch64JitBackend::CompIR_FCondAssign(IRInst inst) {
80
CONDITIONAL_DISABLE;
81
82
regs_.Map(inst);
83
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CUN);
84
MOVCF2GR(SCRATCH1, FCC0);
85
FixupBranch unordered = BNEZ(SCRATCH1);
86
87
switch (inst.op) {
88
case IROp::FMin:
89
FMIN_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
90
break;
91
92
case IROp::FMax:
93
FMAX_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
94
break;
95
96
default:
97
INVALIDOP;
98
break;
99
}
100
101
FixupBranch ordererDone = B();
102
SetJumpTarget(unordered);
103
104
MOVFR2GR_S(SCRATCH1, regs_.F(inst.src1));
105
MOVFR2GR_S(SCRATCH2, regs_.F(inst.src2));
106
107
// If both are negative, we flip the comparison (not two's compliment.)
108
// We cheat and use RA...
109
AND(R_RA, SCRATCH1, SCRATCH2);
110
SRLI_W(R_RA, R_RA, 31);
111
112
LoongArch64Reg isSrc1LowerReg = regs_.GetAndLockTempGPR();
113
SLT(isSrc1LowerReg, SCRATCH1, SCRATCH2);
114
// Flip the flag (to reverse the min/max) based on if both were negative.
115
XOR(isSrc1LowerReg, isSrc1LowerReg, R_RA);
116
FixupBranch useSrc1;
117
switch (inst.op) {
118
case IROp::FMin:
119
useSrc1 = BNEZ(isSrc1LowerReg);
120
break;
121
122
case IROp::FMax:
123
useSrc1 = BEQZ(isSrc1LowerReg);
124
break;
125
126
default:
127
INVALIDOP;
128
break;
129
}
130
MOVE(SCRATCH1, SCRATCH2);
131
SetJumpTarget(useSrc1);
132
133
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH1);
134
135
SetJumpTarget(ordererDone);
136
}
137
138
void LoongArch64JitBackend::CompIR_FAssign(IRInst inst) {
139
CONDITIONAL_DISABLE;
140
141
switch (inst.op) {
142
case IROp::FMov:
143
if (inst.dest != inst.src1) {
144
regs_.Map(inst);
145
FMOV_S(regs_.F(inst.dest), regs_.F(inst.src1));
146
}
147
break;
148
149
case IROp::FAbs:
150
regs_.Map(inst);
151
FABS_S(regs_.F(inst.dest), regs_.F(inst.src1));
152
break;
153
154
case IROp::FSign:
155
{
156
regs_.Map(inst);
157
// Check if it's negative zero, either 0x20/0x200 is zero.
158
FCLASS_S(SCRATCHF1, regs_.F(inst.src1));
159
MOVFR2GR_S(SCRATCH1, SCRATCHF1);
160
ANDI(SCRATCH1, SCRATCH1, 0x220);
161
SLTUI(SCRATCH1, SCRATCH1, 1);
162
// Okay, it's zero if zero, 1 otherwise. Convert 1 to a constant 1.0.
163
// Probably non-zero is the common case, so we make that the straight line.
164
FixupBranch skipOne = BEQZ(SCRATCH1);
165
LI(SCRATCH1, 1.0f);
166
167
// Now we just need the sign from it.
168
MOVFR2GR_S(SCRATCH2, regs_.F(inst.src1));
169
// Use a wall to isolate the sign, and combine.
170
SRAI_W(SCRATCH2, SCRATCH2, 31);
171
SLLI_W(SCRATCH2, SCRATCH2, 31);
172
OR(SCRATCH1, SCRATCH1, SCRATCH2);
173
174
SetJumpTarget(skipOne);
175
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH1);
176
break;
177
}
178
179
default:
180
INVALIDOP;
181
break;
182
}
183
}
184
185
void LoongArch64JitBackend::CompIR_FRound(IRInst inst) {
186
CONDITIONAL_DISABLE;
187
188
regs_.Map(inst);
189
// FTINT* instruction will convert NAN to zero, tested on 3A6000.
190
QuickFLI(32, SCRATCHF1, (uint32_t)0x7fffffffl, SCRATCH1);
191
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src1), LoongArch64Fcond::CUN);
192
193
switch (inst.op) {
194
case IROp::FRound:
195
FTINTRNE_W_S(regs_.F(inst.dest), regs_.F(inst.src1));
196
break;
197
198
case IROp::FTrunc:
199
FTINTRZ_W_S(regs_.F(inst.dest), regs_.F(inst.src1));
200
break;
201
202
case IROp::FCeil:
203
FTINTRP_W_S(regs_.F(inst.dest), regs_.F(inst.src1));
204
break;
205
206
case IROp::FFloor:
207
FTINTRM_W_S(regs_.F(inst.dest), regs_.F(inst.src1));
208
break;
209
210
default:
211
INVALIDOP;
212
break;
213
}
214
215
// Switch to INT_MAX if it was NAN.
216
FSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF1, FCC0);
217
}
218
219
void LoongArch64JitBackend::CompIR_FCvt(IRInst inst) {
220
CONDITIONAL_DISABLE;
221
222
switch (inst.op) {
223
case IROp::FCvtWS:
224
CompIR_Generic(inst);
225
break;
226
227
case IROp::FCvtSW:
228
regs_.Map(inst);
229
FFINT_S_W(regs_.F(inst.dest), regs_.F(inst.src1));
230
break;
231
232
case IROp::FCvtScaledWS:
233
regs_.Map(inst);
234
// Prepare for the NAN result
235
QuickFLI(32, SCRATCHF1, (uint32_t)(0x7FFFFFFF), SCRATCH1);
236
// Prepare the multiplier.
237
QuickFLI(32, SCRATCHF1, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
238
239
switch (inst.src2 >> 6) {
240
case 0: // RNE
241
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src1), LoongArch64Fcond::CUN);
242
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
243
FTINTRNE_W_S(regs_.F(inst.dest), regs_.F(inst.dest));
244
FSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, FCC0);
245
break;
246
case 1: // RZ
247
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src1), LoongArch64Fcond::CUN);
248
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
249
FTINTRZ_W_S(regs_.F(inst.dest), regs_.F(inst.dest));
250
FSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, FCC0);
251
break;
252
case 2: // RP
253
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src1), LoongArch64Fcond::CUN);
254
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
255
FTINTRP_W_S(regs_.F(inst.dest), regs_.F(inst.dest));
256
FSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, FCC0);
257
break;
258
case 3: // RM
259
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src1), LoongArch64Fcond::CUN);
260
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
261
FTINTRM_W_S(regs_.F(inst.dest), regs_.F(inst.dest));
262
FSEL(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2, FCC0);
263
break;
264
default:
265
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
266
}
267
268
break;
269
270
case IROp::FCvtScaledSW:
271
regs_.Map(inst);
272
FFINT_S_W(regs_.F(inst.dest), regs_.F(inst.src1));
273
274
// Pre-divide so we can avoid any actual divide.
275
QuickFLI(32, SCRATCHF1, 1.0f / (1UL << (inst.src2 & 0x1F)), SCRATCH1);
276
FMUL_S(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF1);
277
break;
278
279
default:
280
INVALIDOP;
281
break;
282
}
283
}
284
285
void LoongArch64JitBackend::CompIR_FSat(IRInst inst) {
286
CONDITIONAL_DISABLE;
287
288
switch (inst.op) {
289
case IROp::FSat0_1:
290
regs_.Map(inst);
291
QuickFLI(32, SCRATCHF1, (float)1.0f, SCRATCH1);
292
// Check whether FMAX takes the larger of the two zeros, which is what we want.
293
QuickFLI(32, SCRATCHF2, (float)0.0f, SCRATCH1);
294
295
FMIN_S(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
296
FMAX_S(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2);
297
break;
298
299
case IROp::FSatMinus1_1:
300
regs_.Map(inst);
301
QuickFLI(32, SCRATCHF1, (float)1.0f, SCRATCH1);
302
FNEG_S(SCRATCHF2, SCRATCHF1);
303
304
FMIN_S(regs_.F(inst.dest), regs_.F(inst.src1), SCRATCHF1);
305
FMAX_S(regs_.F(inst.dest), regs_.F(inst.dest), SCRATCHF2);
306
break;
307
308
default:
309
INVALIDOP;
310
break;
311
}
312
}
313
314
void LoongArch64JitBackend::CompIR_FCompare(IRInst inst) {
315
CONDITIONAL_DISABLE;
316
317
constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
318
319
switch (inst.op) {
320
case IROp::FCmp:
321
switch (inst.dest) {
322
case IRFpCompareMode::False:
323
regs_.SetGPRImm(IRREG_FPCOND, 0);
324
break;
325
326
case IRFpCompareMode::EitherUnordered:
327
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
328
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CUN);
329
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
330
regs_.MarkGPRDirty(IRREG_FPCOND, true);
331
break;
332
333
case IRFpCompareMode::EqualOrdered:
334
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
335
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CEQ);
336
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
337
regs_.MarkGPRDirty(IRREG_FPCOND, true);
338
break;
339
340
case IRFpCompareMode::EqualUnordered:
341
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
342
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CUEQ);
343
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
344
regs_.MarkGPRDirty(IRREG_FPCOND, true);
345
break;
346
347
case IRFpCompareMode::LessEqualOrdered:
348
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
349
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CLE);
350
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
351
regs_.MarkGPRDirty(IRREG_FPCOND, true);
352
break;
353
354
case IRFpCompareMode::LessEqualUnordered:
355
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
356
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CULE);
357
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
358
regs_.MarkGPRDirty(IRREG_FPCOND, true);
359
break;
360
361
case IRFpCompareMode::LessOrdered:
362
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
363
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CLT);
364
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
365
regs_.MarkGPRDirty(IRREG_FPCOND, true);
366
break;
367
368
case IRFpCompareMode::LessUnordered:
369
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
370
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CULT);
371
MOVCF2GR(regs_.R(IRREG_FPCOND), FCC0);
372
regs_.MarkGPRDirty(IRREG_FPCOND, true);
373
break;
374
375
default:
376
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
377
}
378
break;
379
380
case IROp::FCmovVfpuCC:
381
regs_.MapWithExtra(inst, { { 'G', IRREG_VFPU_CC, 1, MIPSMap::INIT } });
382
if ((inst.src2 & 0xF) == 0) {
383
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), 1);
384
} else {
385
BSTRPICK_D(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.src2 & 0xF, inst.src2 & 0xF);
386
}
387
if ((inst.src2 >> 7) & 1) {
388
FixupBranch skip = BEQZ(SCRATCH1);
389
FMOV_S(regs_.F(inst.dest), regs_.F(inst.src1));
390
SetJumpTarget(skip);
391
} else {
392
FixupBranch skip = BNEZ(SCRATCH1);
393
FMOV_S(regs_.F(inst.dest), regs_.F(inst.src1));
394
SetJumpTarget(skip);
395
}
396
break;
397
398
case IROp::FCmpVfpuBit:
399
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
400
401
switch (VCondition(inst.dest & 0xF)) {
402
case VC_EQ:
403
regs_.Map(inst);
404
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CEQ);
405
MOVCF2GR(SCRATCH1, FCC0);
406
break;
407
case VC_NE:
408
regs_.Map(inst);
409
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CNE);
410
MOVCF2GR(SCRATCH1, FCC0);
411
break;
412
case VC_LT:
413
regs_.Map(inst);
414
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CLT);
415
MOVCF2GR(SCRATCH1, FCC0);
416
break;
417
case VC_LE:
418
regs_.Map(inst);
419
FCMP_COND_S(FCC0, regs_.F(inst.src1), regs_.F(inst.src2), LoongArch64Fcond::CLE);
420
MOVCF2GR(SCRATCH1, FCC0);
421
break;
422
case VC_GT:
423
regs_.Map(inst);
424
FCMP_COND_S(FCC0, regs_.F(inst.src2), regs_.F(inst.src1), LoongArch64Fcond::CLT);
425
MOVCF2GR(SCRATCH1, FCC0);
426
break;
427
case VC_GE:
428
regs_.Map(inst);
429
FCMP_COND_S(FCC0, regs_.F(inst.src2), regs_.F(inst.src1), LoongArch64Fcond::CLE);
430
MOVCF2GR(SCRATCH1, FCC0);
431
break;
432
case VC_EZ:
433
case VC_NZ:
434
regs_.MapFPR(inst.src1);
435
// Zero is either 0x20 or 0x200.
436
FCLASS_S(SCRATCHF1, regs_.F(inst.src1));
437
MOVFR2GR_S(SCRATCH1, SCRATCHF1);
438
ANDI(SCRATCH1, SCRATCH1, 0x220);
439
if ((inst.dest & 4) == 0)
440
SLTU(SCRATCH1, R_ZERO, SCRATCH1);
441
else
442
SLTUI(SCRATCH1, SCRATCH1, 1);
443
break;
444
case VC_EN:
445
case VC_NN:
446
regs_.MapFPR(inst.src1);
447
// NAN is either 0x1 or 0x2.
448
FCLASS_S(SCRATCHF1, regs_.F(inst.src1));
449
MOVFR2GR_S(SCRATCH1, SCRATCHF1);
450
ANDI(SCRATCH1, SCRATCH1, 0x3);
451
if ((inst.dest & 4) == 0)
452
SLTU(SCRATCH1, R_ZERO, SCRATCH1);
453
else
454
SLTUI(SCRATCH1, SCRATCH1, 1);
455
break;
456
case VC_EI:
457
case VC_NI:
458
regs_.MapFPR(inst.src1);
459
// Infinity is either 0x40 or 0x04.
460
FCLASS_S(SCRATCHF1, regs_.F(inst.src1));
461
MOVFR2GR_S(SCRATCH1, SCRATCHF1);
462
ANDI(SCRATCH1, SCRATCH1, 0x44);
463
if ((inst.dest & 4) == 0)
464
SLTU(SCRATCH1, R_ZERO, SCRATCH1);
465
else
466
SLTUI(SCRATCH1, SCRATCH1, 1);
467
break;
468
case VC_ES:
469
case VC_NS:
470
regs_.MapFPR(inst.src1);
471
// Infinity is either 0x40 or 0x04, NAN is either 0x1 or 0x2.
472
FCLASS_S(SCRATCHF1, regs_.F(inst.src1));
473
MOVFR2GR_S(SCRATCH1, SCRATCHF1);
474
ANDI(SCRATCH1, SCRATCH1, 0x47);
475
if ((inst.dest & 4) == 0)
476
SLTU(SCRATCH1, R_ZERO, SCRATCH1);
477
else
478
SLTUI(SCRATCH1, SCRATCH1, 1);
479
break;
480
case VC_TR:
481
LI(SCRATCH1, 1);
482
break;
483
case VC_FL:
484
LI(SCRATCH1, 0);
485
break;
486
}
487
488
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~(1 << (inst.dest >> 4)));
489
if ((inst.dest >> 4) != 0)
490
SLLI_D(SCRATCH1, SCRATCH1, inst.dest >> 4);
491
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
492
break;
493
494
case IROp::FCmpVfpuAggregate:
495
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
496
if (inst.dest == 1) {
497
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
498
// Negate so 1 becomes all bits set and zero stays zero, then mask to 0x30.
499
SUB_D(SCRATCH1, R_ZERO, SCRATCH1);
500
ANDI(SCRATCH1, SCRATCH1, 0x30);
501
502
// Reject the old any/all bits and replace them with our own.
503
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
504
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
505
} else {
506
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
507
FixupBranch skipZero = BEQZ(SCRATCH1);
508
509
// To compare to inst.dest for "all", let's simply subtract it and compare to zero.
510
ADDI_D(SCRATCH1, SCRATCH1, -inst.dest);
511
SLTUI(SCRATCH1, SCRATCH1, 1);
512
// Now we combine with the "any" bit.
513
SLLI_D(SCRATCH1, SCRATCH1, 5);
514
ORI(SCRATCH1, SCRATCH1, 0x10);
515
516
SetJumpTarget(skipZero);
517
518
// Reject the old any/all bits and replace them with our own.
519
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
520
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
521
}
522
break;
523
524
default:
525
INVALIDOP;
526
break;
527
}
528
}
529
530
void LoongArch64JitBackend::CompIR_RoundingMode(IRInst inst) {
531
CONDITIONAL_DISABLE;
532
533
switch (inst.op) {
534
case IROp::RestoreRoundingMode:
535
RestoreRoundingMode();
536
break;
537
538
case IROp::ApplyRoundingMode:
539
ApplyRoundingMode();
540
break;
541
542
case IROp::UpdateRoundingMode:
543
// Do nothing, we don't use any instructions that need updating the rounding mode.
544
break;
545
546
default:
547
INVALIDOP;
548
break;
549
}
550
}
551
552
void LoongArch64JitBackend::CompIR_FSpecial(IRInst inst) {
553
CONDITIONAL_DISABLE;
554
555
auto callFuncF_F = [&](float (*func)(float)) {
556
regs_.FlushBeforeCall();
557
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
558
559
// It might be in a non-volatile register.
560
// TODO: May have to handle a transfer if SIMD here.
561
if (regs_.IsFPRMapped(inst.src1)) {
562
int lane = regs_.GetFPRLane(inst.src1);
563
if (lane == 0)
564
FMOV_S(F0, regs_.F(inst.src1));
565
else
566
VREPLVEI_W(V0, regs_.V(inst.src1), lane);
567
} else {
568
int offset = offsetof(MIPSState, f) + inst.src1 * 4;
569
FLD_S(F0, CTXREG, offset);
570
}
571
QuickCallFunction(func, SCRATCH1);
572
573
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
574
// If it's already F0, we're done - MapReg doesn't actually overwrite the reg in that case.
575
if (regs_.F(inst.dest) != F0) {
576
FMOV_S(regs_.F(inst.dest), F0);
577
}
578
579
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
580
};
581
582
switch (inst.op) {
583
case IROp::FSin:
584
callFuncF_F(&vfpu_sin);
585
break;
586
587
case IROp::FCos:
588
callFuncF_F(&vfpu_cos);
589
break;
590
591
case IROp::FRSqrt:
592
regs_.Map(inst);
593
FRSQRT_S(regs_.F(inst.dest), regs_.F(inst.src1));
594
break;
595
596
case IROp::FRecip:
597
regs_.Map(inst);
598
FRECIP_S(regs_.F(inst.dest), regs_.F(inst.src1));
599
break;
600
601
case IROp::FAsin:
602
callFuncF_F(&vfpu_asin);
603
break;
604
605
default:
606
INVALIDOP;
607
break;
608
}
609
}
610
611
} // namespace MIPSComp
612