Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/LoongArch64/LoongArch64CompVec.cpp
3188 views
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <algorithm>
19
#include "Core/MemMap.h"
20
#include "Core/MIPS/LoongArch64/LoongArch64Jit.h"
21
#include "Core/MIPS/LoongArch64/LoongArch64RegCache.h"
22
23
// This file contains compilation for vector instructions.
24
//
25
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
26
// Currently known non working ones should have DISABLE. No flags because that's in IR already.
27
28
// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }
29
#define CONDITIONAL_DISABLE {}
30
#define DISABLE { CompIR_Generic(inst); return; }
31
#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }
32
33
namespace MIPSComp {
34
35
using namespace LoongArch64Gen;
36
using namespace LoongArch64JitConstants;
37
38
static bool Overlap(IRReg r1, int l1, IRReg r2, int l2) {
39
return r1 < r2 + l2 && r1 + l1 > r2;
40
}
41
42
void LoongArch64JitBackend::CompIR_VecAssign(IRInst inst) {
43
CONDITIONAL_DISABLE;
44
45
switch (inst.op) {
46
case IROp::Vec4Init:
47
regs_.Map(inst);
48
49
switch ((Vec4Init)inst.src1) {
50
case Vec4Init::AllZERO:
51
if (cpu_info.LOONGARCH_LSX)
52
VREPLGR2VR_D(regs_.V(inst.dest), R_ZERO);
53
else
54
for (int i = 0; i < 4; ++i)
55
MOVGR2FR_W(regs_.F(inst.dest + i), R_ZERO);
56
break;
57
58
case Vec4Init::AllONE:
59
LI(SCRATCH1, 1.0f);
60
if (cpu_info.LOONGARCH_LSX) {
61
VREPLGR2VR_W(regs_.V(inst.dest), SCRATCH1);
62
} else {
63
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH1);
64
for (int i = 1; i < 4; ++i)
65
FMOV_S(regs_.F(inst.dest + i), regs_.F(inst.dest));
66
}
67
break;
68
69
case Vec4Init::AllMinusONE:
70
LI(SCRATCH1, -1.0f);
71
if (cpu_info.LOONGARCH_LSX) {
72
VREPLGR2VR_W(regs_.V(inst.dest), SCRATCH1);
73
} else {
74
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH1);
75
for (int i = 1; i < 4; ++i)
76
FMOV_S(regs_.F(inst.dest + i), regs_.F(inst.dest));
77
}
78
break;
79
80
case Vec4Init::Set_1000:
81
LI(SCRATCH1, 1.0f);
82
if (cpu_info.LOONGARCH_LSX) {
83
VREPLGR2VR_D(regs_.V(inst.dest), R_ZERO);
84
VINSGR2VR_W(regs_.V(inst.dest), SCRATCH1, 0);
85
} else {
86
for (int i = 0; i < 4; ++i) {
87
if (i == 0) {
88
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
89
} else {
90
MOVGR2FR_W(regs_.F(inst.dest + i), R_ZERO);
91
}
92
}
93
}
94
break;
95
96
case Vec4Init::Set_0100:
97
LI(SCRATCH1, 1.0f);
98
if (cpu_info.LOONGARCH_LSX) {
99
VREPLGR2VR_D(regs_.V(inst.dest), R_ZERO);
100
VINSGR2VR_W(regs_.V(inst.dest), SCRATCH1, 1);
101
} else {
102
for (int i = 0; i < 4; ++i) {
103
if (i == 1) {
104
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
105
} else {
106
MOVGR2FR_W(regs_.F(inst.dest + i), R_ZERO);
107
}
108
}
109
}
110
break;
111
112
case Vec4Init::Set_0010:
113
LI(SCRATCH1, 1.0f);
114
if (cpu_info.LOONGARCH_LSX) {
115
VREPLGR2VR_D(regs_.V(inst.dest), R_ZERO);
116
VINSGR2VR_W(regs_.V(inst.dest), SCRATCH1, 2);
117
} else {
118
for (int i = 0; i < 4; ++i) {
119
if (i == 2) {
120
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
121
} else {
122
MOVGR2FR_W(regs_.F(inst.dest + i), R_ZERO);
123
}
124
}
125
}
126
break;
127
128
case Vec4Init::Set_0001:
129
LI(SCRATCH1, 1.0f);
130
if (cpu_info.LOONGARCH_LSX) {
131
VREPLGR2VR_D(regs_.V(inst.dest), R_ZERO);
132
VINSGR2VR_W(regs_.V(inst.dest), SCRATCH1, 3);
133
} else {
134
for (int i = 0; i < 4; ++i) {
135
if (i == 3) {
136
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
137
} else {
138
MOVGR2FR_W(regs_.F(inst.dest + i), R_ZERO);
139
}
140
}
141
}
142
break;
143
}
144
break;
145
146
case IROp::Vec4Shuffle:
147
if (cpu_info.LOONGARCH_LSX) {
148
regs_.Map(inst);
149
if (regs_.GetFPRLaneCount(inst.src1) == 1 && (inst.src1 & 3) == 0 && inst.src2 == 0) {
150
// This is a broadcast. If dest == src1, this won't clear it.
151
regs_.SpillLockFPR(inst.src1);
152
regs_.MapVec4(inst.dest, MIPSMap::NOINIT);
153
} else {
154
regs_.Map(inst);
155
}
156
157
VSHUF4I_W(regs_.V(inst.dest), regs_.V(inst.src1), inst.src2);
158
} else {
159
if (inst.dest == inst.src1) {
160
regs_.Map(inst);
161
// Try to find the least swaps needed to move in place, never worse than 6 FMOVs.
162
// Would be better with a vmerge and vector regs.
163
int state[4]{ 0, 1, 2, 3 };
164
int goal[4]{ (inst.src2 >> 0) & 3, (inst.src2 >> 2) & 3, (inst.src2 >> 4) & 3, (inst.src2 >> 6) & 3 };
165
166
static constexpr int NOT_FOUND = 4;
167
auto findIndex = [](int *arr, int val, int start = 0) {
168
return (int)(std::find(arr + start, arr + 4, val) - arr);
169
};
170
auto moveChained = [&](const std::vector<int> &lanes, bool rotate) {
171
int firstState = state[lanes.front()];
172
if (rotate)
173
FMOV_S(SCRATCHF1, regs_.F(inst.dest + lanes.front()));
174
for (size_t i = 1; i < lanes.size(); ++i) {
175
FMOV_S(regs_.F(inst.dest + lanes[i - 1]), regs_.F(inst.dest + lanes[i]));
176
state[lanes[i - 1]] = state[lanes[i]];
177
}
178
if (rotate) {
179
FMOV_S(regs_.F(inst.dest + lanes.back()), SCRATCHF1);
180
state[lanes.back()] = firstState;
181
}
182
};
183
184
for (int i = 0; i < 4; ++i) {
185
// Overlap, so if they match, nothing to do.
186
if (goal[i] == state[i])
187
continue;
188
189
int neededBy = findIndex(goal, state[i], i + 1);
190
int foundIn = findIndex(state, goal[i], 0);
191
_assert_(foundIn != NOT_FOUND);
192
193
if (neededBy == NOT_FOUND || neededBy == foundIn) {
194
moveChained({ i, foundIn }, neededBy == foundIn);
195
continue;
196
}
197
198
// Maybe we can avoid a swap and move the next thing into place.
199
int neededByDepth2 = findIndex(goal, state[neededBy], i + 1);
200
if (neededByDepth2 == NOT_FOUND || neededByDepth2 == foundIn) {
201
moveChained({ neededBy, i, foundIn }, neededByDepth2 == foundIn);
202
continue;
203
}
204
205
// Since we only have 4 items, this is as deep as the chain could go.
206
int neededByDepth3 = findIndex(goal, state[neededByDepth2], i + 1);
207
moveChained({ neededByDepth2, neededBy, i, foundIn }, neededByDepth3 == foundIn);
208
}
209
} else {
210
regs_.Map(inst);
211
for (int i = 0; i < 4; ++i) {
212
int lane = (inst.src2 >> (i * 2)) & 3;
213
FMOV_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + lane));
214
}
215
}
216
}
217
break;
218
219
case IROp::Vec4Blend:
220
regs_.Map(inst);
221
if (cpu_info.LOONGARCH_LSX) {
222
IRReg src = inst.src1;
223
uint8_t imm = inst.constant;
224
if (inst.dest == inst.src1) {
225
src = inst.src2;
226
} else if (inst.dest == inst.src2) {
227
imm = ~imm;
228
} else {
229
VOR_V(regs_.V(inst.dest), regs_.V(src), regs_.V(src));
230
src = inst.src2;
231
}
232
233
for (int i = 0; i < 4; ++i)
234
if (imm & (1 << i))
235
VEXTRINS_W(regs_.V(inst.dest), regs_.V(src), (i << 4) | i);
236
} else {
237
for (int i = 0; i < 4; ++i) {
238
int which = (inst.constant >> i) & 1;
239
IRReg srcReg = which ? inst.src2 : inst.src1;
240
if (inst.dest != srcReg)
241
FMOV_S(regs_.F(inst.dest + i), regs_.F(srcReg + i));
242
}
243
}
244
break;
245
246
case IROp::Vec4Mov:
247
if (inst.dest != inst.src1) {
248
regs_.Map(inst);
249
if (cpu_info.LOONGARCH_LSX)
250
VOR_V(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src1));
251
else
252
for (int i = 0; i < 4; ++i)
253
FMOV_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i));
254
}
255
break;
256
257
default:
258
INVALIDOP;
259
break;
260
}
261
}
262
263
void LoongArch64JitBackend::CompIR_VecArith(IRInst inst) {
264
CONDITIONAL_DISABLE;
265
266
switch (inst.op) {
267
case IROp::Vec4Add:
268
regs_.Map(inst);
269
if (cpu_info.LOONGARCH_LSX)
270
VFADD_S(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src2));
271
else
272
for (int i = 0; i < 4; ++i)
273
FADD_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i));
274
break;
275
276
case IROp::Vec4Sub:
277
regs_.Map(inst);
278
if (cpu_info.LOONGARCH_LSX)
279
VFSUB_S(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src2));
280
else
281
for (int i = 0; i < 4; ++i)
282
FSUB_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i));
283
break;
284
285
case IROp::Vec4Mul:
286
regs_.Map(inst);
287
if (cpu_info.LOONGARCH_LSX)
288
VFMUL_S(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src2));
289
else
290
for (int i = 0; i < 4; ++i)
291
FMUL_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i));
292
break;
293
294
case IROp::Vec4Div:
295
regs_.Map(inst);
296
if (cpu_info.LOONGARCH_LSX)
297
VFDIV_S(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src2));
298
else
299
for (int i = 0; i < 4; ++i)
300
FDIV_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i));
301
break;
302
303
case IROp::Vec4Scale:
304
regs_.Map(inst);
305
if (cpu_info.LOONGARCH_LSX) {
306
if (Overlap(inst.dest, 4, inst.src2, 1) || Overlap(inst.src1, 4, inst.src2, 1))
307
DISABLE;
308
309
VSHUF4I_W(regs_.V(inst.src2), regs_.V(inst.src2), 0);
310
VFMUL_S(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src2));
311
} else {
312
if (Overlap(inst.src2, 1, inst.dest, 3)) {
313
// We have to handle overlap, doing dest == src2 last.
314
for (int i = 0; i < 4; ++i) {
315
if (inst.src2 != inst.dest + i)
316
FMUL_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2));
317
}
318
for (int i = 0; i < 4; ++i) {
319
if (inst.src2 == inst.dest + i)
320
FMUL_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2));
321
}
322
} else {
323
for (int i = 0; i < 4; ++i)
324
FMUL_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i), regs_.F(inst.src2));
325
}
326
}
327
break;
328
329
case IROp::Vec4Neg:
330
regs_.Map(inst);
331
if (cpu_info.LOONGARCH_LSX)
332
VBITREVI_W(regs_.V(inst.dest), regs_.V(inst.src1), 31);
333
else
334
for (int i = 0; i < 4; ++i)
335
FNEG_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i));
336
break;
337
338
case IROp::Vec4Abs:
339
regs_.Map(inst);
340
if (cpu_info.LOONGARCH_LSX)
341
VBITCLRI_W(regs_.V(inst.dest), regs_.V(inst.src1), 31);
342
else
343
for (int i = 0; i < 4; ++i)
344
FABS_S(regs_.F(inst.dest + i), regs_.F(inst.src1 + i));
345
break;
346
347
default:
348
INVALIDOP;
349
break;
350
}
351
}
352
353
void LoongArch64JitBackend::CompIR_VecHoriz(IRInst inst) {
354
CONDITIONAL_DISABLE;
355
356
switch (inst.op) {
357
case IROp::Vec4Dot:
358
regs_.Map(inst);
359
if (cpu_info.LOONGARCH_LSX) {
360
if (Overlap(inst.dest, 1, inst.src1, 4) || Overlap(inst.dest, 1, inst.src2, 4))
361
DISABLE;
362
363
VFMUL_S(regs_.V(inst.dest), regs_.V(inst.src1), regs_.V(inst.src2));
364
VOR_V(EncodeRegToV(SCRATCHF1), regs_.V(inst.dest), regs_.V(inst.dest));
365
VSHUF4I_W(EncodeRegToV(SCRATCHF1), regs_.V(inst.dest), VFPU_SWIZZLE(1, 0, 3, 2));
366
VFADD_S(regs_.V(inst.dest), regs_.V(inst.dest), EncodeRegToV(SCRATCHF1));
367
VEXTRINS_D(EncodeRegToV(SCRATCHF1), regs_.V(inst.dest), 1);
368
// Do we need care about upper 96 bits?
369
VFADD_S(regs_.V(inst.dest), regs_.V(inst.dest), EncodeRegToV(SCRATCHF1));
370
} else {
371
if (Overlap(inst.dest, 1, inst.src1, 4) || Overlap(inst.dest, 1, inst.src2, 4)) {
372
// This means inst.dest overlaps one of src1 or src2. We have to do that one first.
373
// Technically this may impact -0.0 and such, but dots accurately need to be aligned anyway.
374
for (int i = 0; i < 4; ++i) {
375
if (inst.dest == inst.src1 + i || inst.dest == inst.src2 + i)
376
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i));
377
}
378
for (int i = 0; i < 4; ++i) {
379
if (inst.dest != inst.src1 + i && inst.dest != inst.src2 + i)
380
FMADD_S(regs_.F(inst.dest), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i), regs_.F(inst.dest));
381
}
382
} else {
383
FMUL_S(regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
384
for (int i = 1; i < 4; ++i)
385
FMADD_S(regs_.F(inst.dest), regs_.F(inst.src1 + i), regs_.F(inst.src2 + i), regs_.F(inst.dest));
386
}
387
}
388
break;
389
390
default:
391
INVALIDOP;
392
break;
393
}
394
}
395
396
void LoongArch64JitBackend::CompIR_VecPack(IRInst inst) {
397
CONDITIONAL_DISABLE;
398
399
switch (inst.op) {
400
case IROp::Vec2Unpack16To31:
401
case IROp::Vec2Pack31To16:
402
CompIR_Generic(inst);
403
break;
404
405
case IROp::Vec4Pack32To8:
406
if (cpu_info.LOONGARCH_LSX) {
407
if (Overlap(inst.dest, 1, inst.src1, 4))
408
DISABLE;
409
410
regs_.Map(inst);
411
VSRLI_W(EncodeRegToV(SCRATCHF1), regs_.V(inst.src1), 24);
412
VPICKEV_B(EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1));
413
VPICKEV_B(regs_.V(inst.dest), EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1));
414
} else {
415
CompIR_Generic(inst);
416
}
417
break;
418
419
case IROp::Vec4Unpack8To32:
420
if (cpu_info.LOONGARCH_LSX) {
421
if (Overlap(inst.dest, 1, inst.src1, 4))
422
DISABLE;
423
424
regs_.Map(inst);
425
VSLLWIL_HU_BU(regs_.V(inst.dest), regs_.V(inst.src1), 0);
426
VSLLWIL_WU_HU(regs_.V(inst.dest), regs_.V(inst.dest), 0);
427
VSLLI_W(regs_.V(inst.dest), regs_.V(inst.dest), 24);
428
} else {
429
regs_.Map(inst);
430
MOVFR2GR_S(SCRATCH2, regs_.F(inst.src1));
431
for (int i = 0; i < 4; ++i) {
432
// Mask using walls.
433
if (i != 0) {
434
SRLI_D(SCRATCH1, SCRATCH2, i * 8);
435
SLLI_D(SCRATCH1, SCRATCH1, 24);
436
} else {
437
SLLI_D(SCRATCH1, SCRATCH2, 24);
438
}
439
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
440
}
441
}
442
break;
443
444
case IROp::Vec2Unpack16To32:
445
// TODO: This works for now, but may need to handle aliasing for vectors.
446
if (cpu_info.LOONGARCH_LSX) {
447
CompIR_Generic(inst);
448
break;
449
}
450
regs_.Map(inst);
451
MOVFR2GR_S(SCRATCH2, regs_.F(inst.src1));
452
SLLI_D(SCRATCH1, SCRATCH2, 16);
453
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH1);
454
SRLI_D(SCRATCH1, SCRATCH2, 16);
455
SLLI_D(SCRATCH1, SCRATCH1, 16);
456
MOVGR2FR_W(regs_.F(inst.dest + 1), SCRATCH1);
457
break;
458
459
case IROp::Vec4DuplicateUpperBitsAndShift1:
460
regs_.Map(inst);
461
if (cpu_info.LOONGARCH_LSX) {
462
VSRLI_W(EncodeRegToV(SCRATCHF1), regs_.V(inst.src1), 16);
463
VOR_V(EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1), regs_.V(inst.src1));
464
VSRLI_W(regs_.V(inst.dest), EncodeRegToV(SCRATCHF1), 8);
465
VOR_V(regs_.V(inst.dest), regs_.V(inst.dest), EncodeRegToV(SCRATCHF1));
466
VSRLI_W(regs_.V(inst.dest), regs_.V(inst.dest), 1);
467
} else {
468
for (int i = 0; i < 4; i++) {
469
MOVFR2GR_S(SCRATCH1, regs_.F(inst.src1 + i));
470
SRLI_W(SCRATCH2, SCRATCH1, 8);
471
OR(SCRATCH1, SCRATCH1, SCRATCH2);
472
SRLI_W(SCRATCH2, SCRATCH1, 16);
473
OR(SCRATCH1, SCRATCH1, SCRATCH2);
474
SRLI_W(SCRATCH1, SCRATCH1, 1);
475
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
476
}
477
}
478
break;
479
480
case IROp::Vec4Pack31To8:
481
// TODO: This works for now, but may need to handle aliasing for vectors.
482
if (cpu_info.LOONGARCH_LSX) {
483
if (Overlap(inst.dest, 1, inst.src1, 4))
484
DISABLE;
485
486
regs_.Map(inst);
487
VSRLI_W(EncodeRegToV(SCRATCHF1), regs_.V(inst.src1), 23);
488
VPICKEV_B(EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1));
489
VPICKEV_B(regs_.V(inst.dest), EncodeRegToV(SCRATCHF1), EncodeRegToV(SCRATCHF1));
490
} else {
491
regs_.Map(inst);
492
for (int i = 0; i < 4; ++i) {
493
MOVFR2GR_S(SCRATCH1, regs_.F(inst.src1 + i));
494
SRLI_D(SCRATCH1, SCRATCH1, 23);
495
if (i == 0) {
496
ANDI(SCRATCH2, SCRATCH1, 0xFF);
497
} else {
498
ANDI(SCRATCH1, SCRATCH1, 0xFF);
499
SLLI_D(SCRATCH1, SCRATCH1, 8 * i);
500
OR(SCRATCH2, SCRATCH2, SCRATCH1);
501
}
502
}
503
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH2);
504
}
505
break;
506
507
case IROp::Vec2Pack32To16:
508
// TODO: This works for now, but may need to handle aliasing for vectors.
509
if (cpu_info.LOONGARCH_LSX) {
510
CompIR_Generic(inst);
511
break;
512
}
513
regs_.Map(inst);
514
MOVFR2GR_S(SCRATCH1, regs_.F(inst.src1));
515
MOVFR2GR_S(SCRATCH2, regs_.F(inst.src1 + 1));
516
// Keep in mind, this was sign-extended, so we have to zero the upper.
517
SLLI_D(SCRATCH1, SCRATCH1, 32);
518
// Now we just set (SCRATCH2 & 0xFFFF0000) | SCRATCH1.
519
SRLI_D(SCRATCH1, SCRATCH1, 48);
520
// Use a wall to mask. We can ignore the upper 32 here.
521
SRLI_D(SCRATCH2, SCRATCH2, 16);
522
SLLI_D(SCRATCH2, SCRATCH2, 16);
523
OR(SCRATCH1, SCRATCH1, SCRATCH2);
524
// Okay, to the floating point register.
525
MOVGR2FR_W(regs_.F(inst.dest), SCRATCH1);
526
break;
527
528
default:
529
INVALIDOP;
530
break;
531
}
532
}
533
534
void LoongArch64JitBackend::CompIR_VecClamp(IRInst inst) {
535
CONDITIONAL_DISABLE;
536
537
switch (inst.op) {
538
case IROp::Vec4ClampToZero:
539
regs_.Map(inst);
540
if (cpu_info.LOONGARCH_LSX) {
541
VREPLGR2VR_D(EncodeRegToV(SCRATCHF1), R_ZERO);
542
VMAX_W(regs_.V(inst.dest), regs_.V(inst.src1), EncodeRegToV(SCRATCHF1));
543
} else {
544
for (int i = 0; i < 4; i++) {
545
MOVFR2GR_S(SCRATCH1, regs_.F(inst.src1 + i));
546
SRAI_W(SCRATCH2, SCRATCH1, 31);
547
ORN(SCRATCH2, R_ZERO, SCRATCH2);
548
AND(SCRATCH1, SCRATCH1, SCRATCH2);
549
MOVGR2FR_W(regs_.F(inst.dest + i), SCRATCH1);
550
}
551
}
552
break;
553
554
case IROp::Vec2ClampToZero:
555
CompIR_Generic(inst);
556
break;
557
558
default:
559
INVALIDOP;
560
break;
561
}
562
}
563
564
} // namespace MIPSComp
565
566