Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/MIPSIntVFPU.cpp
3186 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
// TODO: Test and maybe fix: https://code.google.com/p/jpcsp/source/detail?r=3082#
19
20
#include <cmath>
21
#include <limits>
22
#include <algorithm>
23
24
#include "Common/Data/Convert/SmallDataConvert.h"
25
#include "Common/Math/math_util.h"
26
27
#include "Core/Compatibility.h"
28
#include "Core/Core.h"
29
#include "Core/MemMap.h"
30
#include "Core/Reporting.h"
31
#include "Core/System.h"
32
33
#include "Core/MIPS/MIPS.h"
34
#include "Core/MIPS/MIPSInt.h"
35
#include "Core/MIPS/MIPSTables.h"
36
#include "Core/MIPS/MIPSVFPUUtils.h"
37
38
#define R(i) (currentMIPS->r[i])
39
#define V(i) (currentMIPS->v[voffset[i]])
40
#define VI(i) (currentMIPS->vi[voffset[i]])
41
#define FI(i) (currentMIPS->fi[i])
42
#define FsI(i) (currentMIPS->fs[i])
43
#define PC (currentMIPS->pc)
44
45
#define _RS ((op>>21) & 0x1F)
46
#define _RT ((op>>16) & 0x1F)
47
#define _RD ((op>>11) & 0x1F)
48
#define _FS ((op>>11) & 0x1F)
49
#define _FT ((op>>16) & 0x1F)
50
#define _FD ((op>>6 ) & 0x1F)
51
#define _POS ((op>>6 ) & 0x1F)
52
#define _SIZE ((op>>11) & 0x1F)
53
54
#define HI currentMIPS->hi
55
#define LO currentMIPS->lo
56
57
#ifndef M_LOG2E
58
#define M_E 2.71828182845904523536f
59
#define M_LOG2E 1.44269504088896340736f
60
#define M_LOG10E 0.434294481903251827651f
61
#define M_LN2 0.693147180559945309417f
62
#define M_LN10 2.30258509299404568402f
63
#undef M_PI
64
#define M_PI 3.14159265358979323846f
65
#ifndef M_PI_2
66
#define M_PI_2 1.57079632679489661923f
67
#endif
68
#define M_PI_4 0.785398163397448309616f
69
#define M_1_PI 0.318309886183790671538f
70
#define M_2_PI 0.636619772367581343076f
71
#define M_2_SQRTPI 1.12837916709551257390f
72
#define M_SQRT2 1.41421356237309504880f
73
#define M_SQRT1_2 0.707106781186547524401f
74
#endif
75
76
static const bool USE_VFPU_DOT = false;
77
static const bool USE_VFPU_SQRT = false;
78
79
union FloatBits {
80
float f[4];
81
u32 u[4];
82
int i[4];
83
};
84
85
// Preserves NaN in first param, takes sign of equal second param.
86
// Technically, std::max may do this but it's undefined.
87
inline float nanmax(float f, float cst)
88
{
89
return f <= cst ? cst : f;
90
}
91
92
// Preserves NaN in first param, takes sign of equal second param.
93
inline float nanmin(float f, float cst)
94
{
95
return f >= cst ? cst : f;
96
}
97
98
// Preserves NaN in first param, takes sign of equal value in others.
99
inline float nanclamp(float f, float lower, float upper)
100
{
101
return nanmin(nanmax(f, lower), upper);
102
}
103
104
static void ApplyPrefixST(float *r, u32 data, VectorSize size, float invalid = 0.0f) {
105
// Check for no prefix.
106
if (data == 0xe4)
107
return;
108
109
int n = GetNumVectorElements(size);
110
float origV[4]{ invalid, invalid, invalid, invalid };
111
static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};
112
113
for (int i = 0; i < n; i++) {
114
origV[i] = r[i];
115
}
116
117
for (int i = 0; i < n; i++) {
118
int regnum = (data >> (i*2)) & 3;
119
int abs = (data >> (8+i)) & 1;
120
int negate = (data >> (16+i)) & 1;
121
int constants = (data >> (12+i)) & 1;
122
123
if (!constants) {
124
if (regnum >= n) {
125
// We mostly handle this now, but still worth reporting.
126
ERROR_LOG_REPORT(Log::CPU, "Invalid VFPU swizzle: %08x: %i / %d at PC = %08x (%s)", data, regnum, n, currentMIPS->pc, MIPSDisasmAt(currentMIPS->pc).c_str());
127
}
128
r[i] = origV[regnum];
129
if (abs)
130
((u32 *)r)[i] = ((u32 *)r)[i] & 0x7FFFFFFF;
131
} else {
132
r[i] = constantArray[regnum + (abs<<2)];
133
}
134
135
if (negate)
136
((u32 *)r)[i] = ((u32 *)r)[i] ^ 0x80000000;
137
}
138
}
139
140
inline void ApplySwizzleS(float *v, VectorSize size, float invalid = 0.0f)
141
{
142
ApplyPrefixST(v, currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX], size, invalid);
143
}
144
145
inline void ApplySwizzleT(float *v, VectorSize size, float invalid = 0.0f)
146
{
147
ApplyPrefixST(v, currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX], size, invalid);
148
}
149
150
void ApplyPrefixD(float *v, VectorSize size, bool onlyWriteMask = false)
151
{
152
u32 data = currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX];
153
if (!data || onlyWriteMask)
154
return;
155
int n = GetNumVectorElements(size);
156
for (int i = 0; i < n; i++)
157
{
158
int sat = (data >> (i * 2)) & 3;
159
if (sat == 1)
160
v[i] = vfpu_clamp(v[i], 0.0f, 1.0f);
161
else if (sat == 3)
162
v[i] = vfpu_clamp(v[i], -1.0f, 1.0f);
163
}
164
}
165
166
static void RetainInvalidSwizzleST(float *d, VectorSize sz) {
167
// Somehow it's like a supernan, maybe wires through to zero?
168
// Doesn't apply to all ops.
169
int sPrefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
170
int tPrefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
171
int n = GetNumVectorElements(sz);
172
173
// TODO: We can probably do some faster check of sPrefix and tPrefix to skip over this loop.
174
for (int i = 0; i < n; i++) {
175
int swizzleS = (sPrefix >> (i + i)) & 3;
176
int swizzleT = (tPrefix >> (i + i)) & 3;
177
int constS = (sPrefix >> (12 + i)) & 1;
178
int constT = (tPrefix >> (12 + i)) & 1;
179
if ((swizzleS >= n && !constS) || (swizzleT >= n && !constT))
180
d[i] = 0.0f;
181
}
182
}
183
184
void EatPrefixes()
185
{
186
currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] = 0xe4; // passthru
187
currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX] = 0xe4; // passthru
188
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = 0;
189
}
190
191
namespace MIPSInt
192
{
193
void Int_VPFX(MIPSOpcode op)
194
{
195
int data = op & 0x000FFFFF;
196
int regnum = (op >> 24) & 3;
197
if (regnum == VFPU_CTRL_DPREFIX)
198
data &= 0x00000FFF;
199
currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum] = data;
200
PC += 4;
201
}
202
203
void Int_SVQ(MIPSOpcode op)
204
{
205
int imm = SignExtend16ToS32(op & 0xFFFC);
206
int rs = _RS;
207
int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);
208
209
u32 addr = R(rs) + imm;
210
float *f;
211
const float *cf;
212
213
switch (op >> 26)
214
{
215
case 53: //lvl.q/lvr.q
216
{
217
if (addr & 0x3)
218
{
219
_dbg_assert_msg_( 0, "Misaligned lvX.q at %08x (pc = %08x)", addr, PC);
220
}
221
float d[4];
222
ReadVector(d, V_Quad, vt);
223
int offset = (addr >> 2) & 3;
224
if ((op & 2) == 0)
225
{
226
// It's an LVL
227
for (int i = 0; i < offset + 1; i++)
228
{
229
d[3 - i] = Memory::Read_Float(addr - 4 * i);
230
}
231
}
232
else
233
{
234
// It's an LVR
235
for (int i = 0; i < (3 - offset) + 1; i++)
236
{
237
d[i] = Memory::Read_Float(addr + 4 * i);
238
}
239
}
240
WriteVector(d, V_Quad, vt);
241
}
242
break;
243
244
case 54: //lv.q
245
if (addr & 0xF)
246
{
247
_dbg_assert_msg_( 0, "Misaligned lv.q at %08x (pc = %08x)", addr, PC);
248
}
249
#ifndef COMMON_BIG_ENDIAN
250
cf = reinterpret_cast<const float *>(Memory::GetPointerRange(addr, 16));
251
if (cf)
252
WriteVector(cf, V_Quad, vt);
253
#else
254
float lvqd[4];
255
256
lvqd[0] = Memory::Read_Float(addr);
257
lvqd[1] = Memory::Read_Float(addr + 4);
258
lvqd[2] = Memory::Read_Float(addr + 8);
259
lvqd[3] = Memory::Read_Float(addr + 12);
260
261
WriteVector(lvqd, V_Quad, vt);
262
#endif
263
break;
264
265
case 61: // svl.q/svr.q
266
{
267
if (addr & 0x3)
268
{
269
_dbg_assert_msg_( 0, "Misaligned svX.q at %08x (pc = %08x)", addr, PC);
270
}
271
float d[4];
272
ReadVector(d, V_Quad, vt);
273
int offset = (addr >> 2) & 3;
274
if ((op&2) == 0)
275
{
276
// It's an SVL
277
for (int i = 0; i < offset + 1; i++)
278
{
279
Memory::Write_Float(d[3 - i], addr - i * 4);
280
}
281
}
282
else
283
{
284
// It's an SVR
285
for (int i = 0; i < (3 - offset) + 1; i++)
286
{
287
Memory::Write_Float(d[i], addr + 4 * i);
288
}
289
}
290
break;
291
}
292
293
case 62: //sv.q
294
if (addr & 0xF)
295
{
296
_dbg_assert_msg_( 0, "Misaligned sv.q at %08x (pc = %08x)", addr, PC);
297
}
298
#ifndef COMMON_BIG_ENDIAN
299
f = reinterpret_cast<float *>(Memory::GetPointerWriteRange(addr, 16));
300
if (f)
301
ReadVector(f, V_Quad, vt);
302
#else
303
float svqd[4];
304
ReadVector(svqd, V_Quad, vt);
305
306
Memory::Write_Float(svqd[0], addr);
307
Memory::Write_Float(svqd[1], addr + 4);
308
Memory::Write_Float(svqd[2], addr + 8);
309
Memory::Write_Float(svqd[3], addr + 12);
310
#endif
311
break;
312
313
default:
314
_dbg_assert_msg_(false,"Trying to interpret VQ instruction that can't be interpreted");
315
break;
316
}
317
PC += 4;
318
}
319
320
void Int_VMatrixInit(MIPSOpcode op) {
321
static const float idt[16] = {
322
1,0,0,0,
323
0,1,0,0,
324
0,0,1,0,
325
0,0,0,1,
326
};
327
static const float zero[16] = {
328
0,0,0,0,
329
0,0,0,0,
330
0,0,0,0,
331
0,0,0,0,
332
};
333
static const float one[16] = {
334
1,1,1,1,
335
1,1,1,1,
336
1,1,1,1,
337
1,1,1,1,
338
};
339
int vd = _VD;
340
MatrixSize sz = GetMtxSize(op);
341
const float *m;
342
343
switch ((op >> 16) & 0xF) {
344
case 3: m=idt; break; //identity // vmidt
345
case 6: m=zero; break; // vmzero
346
case 7: m=one; break; // vmone
347
default:
348
_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
349
PC += 4;
350
EatPrefixes();
351
return;
352
}
353
354
// The S prefix generates constants, but only for the final (possibly transposed) row.
355
if (currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] & 0xF0F00) {
356
float prefixed[16];
357
memcpy(prefixed, m, sizeof(prefixed));
358
359
int off = GetMatrixSide(sz) - 1;
360
u32 sprefixRemove = VFPU_ANY_SWIZZLE();
361
u32 sprefixAdd = 0;
362
switch ((op >> 16) & 0xF) {
363
case 3:
364
{
365
VFPUConst constX = off == 0 ? VFPUConst::ONE : VFPUConst::ZERO;
366
VFPUConst constY = off == 1 ? VFPUConst::ONE : VFPUConst::ZERO;
367
VFPUConst constZ = off == 2 ? VFPUConst::ONE : VFPUConst::ZERO;
368
VFPUConst constW = off == 3 ? VFPUConst::ONE : VFPUConst::ZERO;
369
sprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
370
break;
371
}
372
case 6:
373
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
374
break;
375
case 7:
376
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
377
break;
378
default:
379
_dbg_assert_msg_( 0, "Unknown matrix init op");
380
break;
381
}
382
ApplyPrefixST(&prefixed[off * 4], VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Quad);
383
WriteMatrix(prefixed, sz, vd);
384
} else {
385
// Write mask applies to the final (maybe transposed) row. Sat causes hang.
386
WriteMatrix(m, sz, vd);
387
}
388
PC += 4;
389
EatPrefixes();
390
}
391
392
void Int_VVectorInit(MIPSOpcode op)
393
{
394
int vd = _VD;
395
VectorSize sz = GetVecSize(op);
396
float d[4];
397
398
VFPUConst constant = VFPUConst::ZERO;
399
switch ((op >> 16) & 0xF) {
400
case 6: constant = VFPUConst::ZERO; break; //vzero
401
case 7: constant = VFPUConst::ONE; break; //vone
402
default:
403
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
404
PC += 4;
405
EatPrefixes();
406
return;
407
}
408
409
// The S prefix generates constants, but negate is still respected.
410
u32 sprefixRemove = VFPU_ANY_SWIZZLE();
411
u32 sprefixAdd = VFPU_MAKE_CONSTANTS(constant, constant, constant, constant);
412
ApplyPrefixST(d, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);
413
414
ApplyPrefixD(d, sz);
415
WriteVector(d, sz, vd);
416
417
EatPrefixes();
418
PC += 4;
419
}
420
421
void Int_Viim(MIPSOpcode op) {
422
int vt = _VT;
423
s32 imm = SignExtend16ToS32(op & 0xFFFF);
424
u16 uimm16 = (op&0xFFFF);
425
float f[1];
426
int type = (op >> 23) & 7;
427
if (type == 6) {
428
f[0] = (float)imm; // viim
429
} else if (type == 7) {
430
f[0] = Float16ToFloat32((u16)uimm16); // vfim
431
} else {
432
_dbg_assert_msg_( 0, "Invalid Viim opcode type %d", type);
433
f[0] = 0;
434
}
435
436
ApplyPrefixD(f, V_Single);
437
WriteVector(f, V_Single, vt);
438
PC += 4;
439
EatPrefixes();
440
}
441
442
void Int_Vidt(MIPSOpcode op) {
443
int vd = _VD;
444
VectorSize sz = GetVecSize(op);
445
float f[4];
446
447
// The S prefix generates constants, but negate is still respected.
448
int offmask = sz == V_Quad || sz == V_Triple ? 3 : 1;
449
int off = vd & offmask;
450
// If it's a pair, the identity starts in a different position.
451
VFPUConst constX = off == (0 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
452
VFPUConst constY = off == (1 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
453
VFPUConst constZ = off == (2 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
454
VFPUConst constW = off == (3 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
455
456
u32 sprefixRemove = VFPU_ANY_SWIZZLE();
457
u32 sprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
458
ApplyPrefixST(f, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);
459
460
ApplyPrefixD(f, sz);
461
WriteVector(f, sz, vd);
462
PC += 4;
463
EatPrefixes();
464
}
465
466
// The test really needs some work.
467
void Int_Vmmul(MIPSOpcode op) {
468
float s[16]{}, t[16]{}, d[16];
469
470
int vd = _VD;
471
int vs = _VS;
472
int vt = _VT;
473
MatrixSize sz = GetMtxSize(op);
474
int n = GetMatrixSide(sz);
475
476
ReadMatrix(s, sz, vs);
477
ReadMatrix(t, sz, vt);
478
479
// TODO: Always use the more accurate path in interpreter?
480
bool useAccurateDot = USE_VFPU_DOT || PSP_CoreParameter().compat.flags().MoreAccurateVMMUL;
481
for (int a = 0; a < n; a++) {
482
for (int b = 0; b < n; b++) {
483
union { float f; uint32_t u; } sum = { 0.0f };
484
if (a == n - 1 && b == n - 1) {
485
// S and T prefixes work on the final (or maybe first, in reverse?) dot.
486
ApplySwizzleS(&s[b * 4], V_Quad);
487
ApplySwizzleT(&t[a * 4], V_Quad);
488
}
489
490
if (useAccurateDot) {
491
sum.f = vfpu_dot(&s[b * 4], &t[a * 4]);
492
if (my_isnan(sum.f)) {
493
sum.u = 0x7f800001;
494
} else if ((sum.u & 0x7F800000) == 0) {
495
sum.u &= 0xFF800000;
496
}
497
} else {
498
if (a == n - 1 && b == n - 1) {
499
for (int c = 0; c < 4; c++) {
500
sum.f += s[b * 4 + c] * t[a * 4 + c];
501
}
502
} else {
503
for (int c = 0; c < n; c++) {
504
sum.f += s[b * 4 + c] * t[a * 4 + c];
505
}
506
}
507
}
508
509
d[a * 4 + b] = sum.f;
510
}
511
}
512
513
// The D prefix applies ONLY to the final element, but sat does work.
514
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
515
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
516
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
517
ApplyPrefixD(&d[4 * (n - 1)], V_Quad, false);
518
WriteMatrix(d, sz, vd);
519
PC += 4;
520
EatPrefixes();
521
}
522
523
void Int_Vmscl(MIPSOpcode op) {
524
float s[16]{}, t[4]{}, d[16];
525
526
int vd = _VD;
527
int vs = _VS;
528
int vt = _VT;
529
MatrixSize sz = GetMtxSize(op);
530
int n = GetMatrixSide(sz);
531
532
ReadMatrix(s, sz, vs);
533
ReadVector(t, V_Single, vt);
534
535
for (int a = 0; a < n - 1; a++) {
536
for (int b = 0; b < n; b++) {
537
d[a * 4 + b] = s[a * 4 + b] * t[0];
538
}
539
}
540
541
// S prefix applies to the last row.
542
ApplySwizzleS(&s[(n - 1) * 4], V_Quad);
543
// T prefix applies only for the last row, and is used per element.
544
// This is like vscl, but instead of zzzz it uses xxxx.
545
int tlane = (vt >> 5) & 3;
546
t[tlane] = t[0];
547
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
548
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
549
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
550
551
for (int b = 0; b < n; b++) {
552
d[(n - 1) * 4 + b] = s[(n - 1) * 4 + b] * t[b];
553
}
554
555
// The D prefix is applied to the last row.
556
ApplyPrefixD(&d[(n - 1) * 4], V_Quad);
557
WriteMatrix(d, sz, vd);
558
PC += 4;
559
EatPrefixes();
560
}
561
562
void Int_Vmmov(MIPSOpcode op) {
563
float s[16]{};
564
int vd = _VD;
565
int vs = _VS;
566
MatrixSize sz = GetMtxSize(op);
567
ReadMatrix(s, sz, vs);
568
// S and D prefixes are applied to the last row.
569
int off = GetMatrixSide(sz) - 1;
570
ApplySwizzleS(&s[off * 4], V_Quad);
571
ApplyPrefixD(&s[off * 4], V_Quad);
572
WriteMatrix(s, sz, vd);
573
PC += 4;
574
EatPrefixes();
575
}
576
577
void Int_Vflush(MIPSOpcode op)
578
{
579
VERBOSE_LOG(Log::CPU, "vflush");
580
PC += 4;
581
// Anything with 0xFC000000 is a nop, but only 0xFFFF0000 retains prefixes.
582
if ((op & 0xFFFF0000) != 0xFFFF0000)
583
EatPrefixes();
584
}
585
586
void Int_VV2Op(MIPSOpcode op) {
587
float s[4], d[4];
588
int vd = _VD;
589
int vs = _VS;
590
int optype = (op >> 16) & 0x1f;
591
VectorSize sz = GetVecSize(op);
592
u32 n = GetNumVectorElements(sz);
593
ReadVector(s, sz, vs);
594
// Some of these are prefix hacks (affects constants, etc.)
595
switch (optype) {
596
case 1:
597
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, VFPU_ABS(1, 1, 1, 1)), sz);
598
break;
599
case 2:
600
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, VFPU_NEGATE(1, 1, 1, 1)), sz);
601
break;
602
case 16:
603
case 17:
604
case 18:
605
case 19:
606
case 20:
607
case 21:
608
case 22:
609
case 23:
610
// Similar to vdiv. Some of the behavior using the invalid constant is iffy.
611
ApplySwizzleS(&s[n - 1], V_Single, INFINITY);
612
break;
613
case 24:
614
case 26:
615
// Similar to above, but also ignores negate.
616
ApplyPrefixST(&s[n - 1], VFPURewritePrefix(VFPU_CTRL_SPREFIX, VFPU_NEGATE(1, 0, 0, 0), 0), V_Single, -INFINITY);
617
break;
618
case 28:
619
// Similar to above, but also ignores negate.
620
ApplyPrefixST(&s[n - 1], VFPURewritePrefix(VFPU_CTRL_SPREFIX, VFPU_NEGATE(1, 0, 0, 0), 0), V_Single, INFINITY);
621
break;
622
default:
623
ApplySwizzleS(s, sz);
624
break;
625
}
626
for (int i = 0; i < (int)n; i++) {
627
switch (optype) {
628
case 0: d[i] = s[i]; break; //vmov
629
case 1: d[i] = s[i]; break; //vabs (prefix)
630
case 2: d[i] = s[i]; break; //vneg (prefix)
631
// vsat0 changes -0.0 to +0.0, both retain NAN.
632
case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
633
case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
634
case 16: { d[i] = vfpu_rcp(s[i]); } break; //vrcp
635
case 17: d[i] = USE_VFPU_SQRT ? vfpu_rsqrt(s[i]) : 1.0f / sqrtf(s[i]); break; //vrsq
636
637
case 18: { d[i] = vfpu_sin(s[i]); } break; //vsin
638
case 19: { d[i] = vfpu_cos(s[i]); } break; //vcos
639
case 20: { d[i] = vfpu_exp2(s[i]); } break; //vexp2
640
case 21: { d[i] = vfpu_log2(s[i]); } break; //vlog2
641
case 22: d[i] = USE_VFPU_SQRT ? vfpu_sqrt(s[i]) : fabsf(sqrtf(s[i])); break; //vsqrt
642
case 23: { d[i] = vfpu_asin(s[i]); } break; //vasin
643
case 24: { d[i] = -vfpu_rcp(s[i]); } break; // vnrcp
644
case 26: { d[i] = -vfpu_sin(s[i]); } break; // vnsin
645
case 28: { d[i] = vfpu_rexp2(s[i]); } break; // vrexp2
646
default:
647
_dbg_assert_msg_( false, "Invalid VV2Op op type %d", optype);
648
break;
649
}
650
}
651
// vsat1 is a prefix hack, so 0:1 doesn't apply. Others don't process sat at all.
652
switch (optype) {
653
case 5:
654
ApplyPrefixD(d, sz, true);
655
break;
656
case 16:
657
case 17:
658
case 18:
659
case 19:
660
case 20:
661
case 21:
662
case 22:
663
case 23:
664
case 24:
665
case 26:
666
case 28:
667
{
668
// Only the last element gets the mask applied.
669
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
670
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
671
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
672
ApplyPrefixD(d, sz);
673
break;
674
}
675
default:
676
ApplyPrefixD(d, sz);
677
}
678
WriteVector(d, sz, vd);
679
PC += 4;
680
EatPrefixes();
681
}
682
683
void Int_Vocp(MIPSOpcode op) {
684
float s[4], t[4], d[4];
685
int vd = _VD;
686
int vs = _VS;
687
VectorSize sz = GetVecSize(op);
688
ReadVector(s, sz, vs);
689
690
// S prefix forces the negate flags.
691
u32 sprefixAdd = VFPU_NEGATE(1, 1, 1, 1);
692
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);
693
694
// T prefix forces constants on and regnum to 1.
695
// That means negate still works, and abs activates a different constant.
696
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
697
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
698
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
699
700
for (int i = 0; i < GetNumVectorElements(sz); i++) {
701
// Always positive NaN. Note that s is always negated from the registers.
702
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];
703
}
704
RetainInvalidSwizzleST(d, sz);
705
ApplyPrefixD(d, sz);
706
WriteVector(d, sz, vd);
707
PC += 4;
708
EatPrefixes();
709
}
710
711
void Int_Vsocp(MIPSOpcode op) {
712
float s[4], t[4], d[4];
713
int vd = _VD;
714
int vs = _VS;
715
VectorSize sz = GetVecSize(op);
716
VectorSize outSize = GetDoubleVectorSizeSafe(sz);
717
if (outSize == V_Invalid)
718
outSize = V_Quad;
719
ReadVector(s, sz, vs);
720
721
// S prefix forces negate in even/odd and xxyy swizzle.
722
// abs works, and applies to final position (not source.)
723
u32 sprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_NEGATE(1, 1, 1, 1);
724
u32 sprefixAdd = VFPU_SWIZZLE(0, 0, 1, 1) | VFPU_NEGATE(1, 0, 1, 0);
725
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), outSize);
726
727
// T prefix forces constants on and regnum to 1, 0, 1, 0.
728
// That means negate still works, and abs activates a different constant.
729
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
730
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ZERO, VFPUConst::ONE, VFPUConst::ZERO);
731
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), outSize);
732
733
// Essentially D prefix saturation is forced.
734
d[0] = nanclamp(t[0] + s[0], 0.0f, 1.0f);
735
d[1] = nanclamp(t[1] + s[1], 0.0f, 1.0f);
736
if (outSize == V_Quad) {
737
d[2] = nanclamp(t[2] + s[2], 0.0f, 1.0f);
738
d[3] = nanclamp(t[3] + s[3], 0.0f, 1.0f);
739
}
740
ApplyPrefixD(d, sz, true);
741
WriteVector(d, outSize, vd);
742
PC += 4;
743
EatPrefixes();
744
}
745
746
void Int_Vsgn(MIPSOpcode op) {
747
float s[4], t[4], d[4];
748
int vd = _VD;
749
int vs = _VS;
750
VectorSize sz = GetVecSize(op);
751
ReadVector(s, sz, vs);
752
753
// Not sure who would do this, but using abs/neg allows a compare against 3 or -3.
754
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
755
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
756
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
757
758
int n = GetNumVectorElements(sz);
759
if (n < 4) {
760
// Compare with a swizzled value out of bounds always produces 0.
761
memcpy(&s[n], &t[n], sizeof(float) * (4 - n));
762
}
763
ApplySwizzleS(s, V_Quad);
764
765
for (int i = 0; i < n; i++) {
766
float diff = s[i] - t[i];
767
// To handle NaNs correctly, we do this with integer hackery
768
u32 val;
769
memcpy(&val, &diff, sizeof(u32));
770
if (val == 0 || val == 0x80000000)
771
d[i] = 0.0f;
772
else if ((val >> 31) == 0)
773
d[i] = 1.0f;
774
else
775
d[i] = -1.0f;
776
}
777
ApplyPrefixD(d, sz);
778
WriteVector(d, sz, vd);
779
PC += 4;
780
EatPrefixes();
781
}
782
783
inline int round_vfpu_n(double param) {
784
// return floorf(param);
785
return (int)round_ieee_754(param);
786
}
787
788
void Int_Vf2i(MIPSOpcode op) {
789
float s[4];
790
int d[4];
791
int vd = _VD;
792
int vs = _VS;
793
int imm = (op >> 16) & 0x1f;
794
float mult = (float)(1UL << imm);
795
VectorSize sz = GetVecSize(op);
796
ReadVector(s, sz, vs);
797
// Negate, abs, and constants apply as you'd expect to the bits.
798
ApplySwizzleS(s, sz);
799
for (int i = 0; i < GetNumVectorElements(sz); i++) {
800
if (my_isnan(s[i])) {
801
d[i] = 0x7FFFFFFF;
802
continue;
803
}
804
double sv = s[i] * mult; // (float)0x7fffffff == (float)0x80000000
805
// Cap/floor it to 0x7fffffff / 0x80000000
806
if (sv > (double)0x7fffffff) {
807
d[i] = 0x7fffffff;
808
} else if (sv <= (double)(int)0x80000000) {
809
d[i] = 0x80000000;
810
} else {
811
switch ((op >> 21) & 0x1f)
812
{
813
case 16: d[i] = (int)round_vfpu_n(sv); break; //(floor(sv + 0.5f)); break; //n
814
case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z
815
case 18: d[i] = (int)ceil(sv); break; //u
816
case 19: d[i] = (int)floor(sv); break; //d
817
default: d[i] = 0x7FFFFFFF; break;
818
}
819
}
820
}
821
// Does not apply sat, but does apply mask.
822
ApplyPrefixD(reinterpret_cast<float *>(d), sz, true);
823
WriteVector(reinterpret_cast<float *>(d), sz, vd);
824
PC += 4;
825
EatPrefixes();
826
}
827
828
void Int_Vi2f(MIPSOpcode op) {
829
int s[4];
830
float d[4];
831
int vd = _VD;
832
int vs = _VS;
833
int imm = (op >> 16) & 0x1f;
834
float mult = 1.0f/(float)(1UL << imm);
835
VectorSize sz = GetVecSize(op);
836
ReadVector(reinterpret_cast<float *>(s), sz, vs);
837
// Negate, abs, and constants apply as you'd expect to the bits.
838
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
839
for (int i = 0; i < GetNumVectorElements(sz); i++) {
840
d[i] = (float)s[i] * mult;
841
}
842
// Sat and mask apply normally.
843
ApplyPrefixD(d, sz);
844
WriteVector(d, sz, vd);
845
PC += 4;
846
EatPrefixes();
847
}
848
849
void Int_Vh2f(MIPSOpcode op) {
850
u32 s[4];
851
float d[4];
852
int vd = _VD;
853
int vs = _VS;
854
VectorSize sz = GetVecSize(op);
855
ReadVector(reinterpret_cast<float *>(s), sz, vs);
856
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
857
858
VectorSize outsize = V_Pair;
859
switch (sz) {
860
case V_Single:
861
outsize = V_Pair;
862
d[0] = ExpandHalf(s[0] & 0xFFFF);
863
d[1] = ExpandHalf(s[0] >> 16);
864
break;
865
case V_Pair:
866
default:
867
// All other sizes are treated the same.
868
outsize = V_Quad;
869
d[0] = ExpandHalf(s[0] & 0xFFFF);
870
d[1] = ExpandHalf(s[0] >> 16);
871
d[2] = ExpandHalf(s[1] & 0xFFFF);
872
d[3] = ExpandHalf(s[1] >> 16);
873
break;
874
}
875
ApplyPrefixD(d, outsize);
876
WriteVector(d, outsize, vd);
877
PC += 4;
878
EatPrefixes();
879
}
880
881
void Int_Vf2h(MIPSOpcode op) {
882
float s[4]{};
883
u32 d[4];
884
int vd = _VD;
885
int vs = _VS;
886
VectorSize sz = GetVecSize(op);
887
ReadVector(s, sz, vs);
888
// Swizzle can cause V_Single to properly write both components.
889
ApplySwizzleS(s, V_Quad);
890
// Negate should not actually apply to invalid swizzle.
891
RetainInvalidSwizzleST(s, V_Quad);
892
893
VectorSize outsize = V_Single;
894
switch (sz) {
895
case V_Single:
896
case V_Pair:
897
outsize = V_Single;
898
d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);
899
break;
900
case V_Triple:
901
case V_Quad:
902
outsize = V_Pair;
903
d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);
904
d[1] = ShrinkToHalf(s[2]) | ((u32)ShrinkToHalf(s[3]) << 16);
905
break;
906
907
default:
908
ERROR_LOG_REPORT(Log::CPU, "vf2h with invalid elements");
909
break;
910
}
911
ApplyPrefixD(reinterpret_cast<float *>(d), outsize);
912
WriteVector(reinterpret_cast<float *>(d), outsize, vd);
913
PC += 4;
914
EatPrefixes();
915
}
916
917
void Int_Vx2i(MIPSOpcode op) {
918
u32 s[4], d[4]{};
919
int vd = _VD;
920
int vs = _VS;
921
VectorSize sz = GetVecSize(op);
922
VectorSize oz = sz;
923
ReadVector(reinterpret_cast<float *>(s), sz, vs);
924
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
925
926
// TODO: Similar to colorconv, invalid swizzle seems to reuse last output.
927
switch ((op >> 16) & 3) {
928
case 0: // vuc2i
929
// Quad is the only option.
930
// This converts 8-bit unsigned to 31-bit signed, swizzling to saturate.
931
// Similar to 5-bit to 8-bit color swizzling, but clamping to INT_MAX.
932
{
933
u32 value = s[0];
934
for (int i = 0; i < 4; i++) {
935
d[i] = (u32)((u32)(value & 0xFF) * 0x01010101UL) >> 1;
936
value >>= 8;
937
}
938
oz = V_Quad;
939
}
940
break;
941
942
case 1: // vc2i
943
// Quad is the only option
944
// Unlike vuc2i, the source and destination are signed so there is no shift.
945
// It lacks the swizzle because of negative values.
946
{
947
u32 value = s[0];
948
d[0] = (value & 0xFF) << 24;
949
d[1] = (value & 0xFF00) << 16;
950
d[2] = (value & 0xFF0000) << 8;
951
d[3] = (value & 0xFF000000);
952
oz = V_Quad;
953
}
954
break;
955
956
case 2: // vus2i
957
// Note: for some reason, this skips swizzle such that 0xFFFF -> 0x7FFF8000 unlike vuc2i.
958
oz = V_Pair;
959
switch (sz) {
960
case V_Quad:
961
case V_Triple:
962
sz = V_Pair;
963
// Intentional fallthrough.
964
[[fallthrough]];
965
case V_Pair:
966
oz = V_Quad;
967
// Intentional fallthrough.
968
[[fallthrough]];
969
case V_Single:
970
for (int i = 0; i < GetNumVectorElements(sz); i++) {
971
u32 value = s[i];
972
d[i * 2] = (value & 0xFFFF) << 15;
973
d[i * 2 + 1] = (value & 0xFFFF0000) >> 1;
974
}
975
break;
976
977
default:
978
ERROR_LOG_REPORT(Log::CPU, "vus2i with more than 2 elements");
979
break;
980
}
981
break;
982
983
case 3: // vs2i
984
oz = V_Pair;
985
switch (sz) {
986
case V_Quad:
987
case V_Triple:
988
sz = V_Pair;
989
// Intentional fallthrough.
990
[[fallthrough]];
991
case V_Pair:
992
oz = V_Quad;
993
// Intentional fallthrough.
994
[[fallthrough]];
995
case V_Single:
996
for (int i = 0; i < GetNumVectorElements(sz); i++) {
997
u32 value = s[i];
998
d[i * 2] = (value & 0xFFFF) << 16;
999
d[i * 2 + 1] = value & 0xFFFF0000;
1000
}
1001
break;
1002
1003
default:
1004
ERROR_LOG_REPORT(Log::CPU, "vs2i with more than 2 elements");
1005
break;
1006
}
1007
break;
1008
1009
default:
1010
_dbg_assert_msg_( false, "Trying to interpret instruction that can't be interpreted");
1011
break;
1012
}
1013
1014
// Saturation does in fact apply.
1015
ApplyPrefixD(reinterpret_cast<float *>(d),oz);
1016
WriteVector(reinterpret_cast<float *>(d), oz, vd);
1017
PC += 4;
1018
EatPrefixes();
1019
}
1020
1021
void Int_Vi2x(MIPSOpcode op) {
1022
int s[4]{};
1023
u32 d[2]{};
1024
const int vd = _VD;
1025
const int vs = _VS;
1026
const VectorSize sz = GetVecSize(op);
1027
VectorSize oz;
1028
ReadVector(reinterpret_cast<float *>(s), sz, vs);
1029
// Negate, const, etc. apply as expected.
1030
ApplySwizzleS(reinterpret_cast<float *>(s), V_Quad);
1031
1032
// TODO: Similar to colorconv, invalid swizzle seems to reuse last output.
1033
switch ((op >> 16) & 3) {
1034
case 0: //vi2uc
1035
for (int i = 0; i < 4; i++) {
1036
int v = s[i];
1037
if (v < 0) v = 0;
1038
v >>= 23;
1039
d[0] |= ((u32)v & 0xFF) << (i * 8);
1040
}
1041
oz = V_Single;
1042
break;
1043
1044
case 1: //vi2c
1045
for (int i = 0; i < 4; i++) {
1046
u32 v = s[i];
1047
d[0] |= (v >> 24) << (i * 8);
1048
}
1049
oz = V_Single;
1050
break;
1051
1052
case 2: //vi2us
1053
{
1054
int elems = (GetNumVectorElements(sz) + 1) / 2;
1055
for (int i = 0; i < elems; i++) {
1056
int low = s[i * 2];
1057
int high = s[i * 2 + 1];
1058
if (low < 0) low = 0;
1059
if (high < 0) high = 0;
1060
low >>= 15;
1061
high >>= 15;
1062
d[i] = low | (high << 16);
1063
}
1064
switch (sz) {
1065
case V_Quad: oz = V_Pair; break;
1066
case V_Triple: oz = V_Pair; break;
1067
case V_Pair: oz = V_Single; break;
1068
case V_Single: oz = V_Single; break;
1069
default:
1070
_dbg_assert_msg_( false, "Trying to interpret instruction that can't be interpreted");
1071
oz = V_Single;
1072
break;
1073
}
1074
break;
1075
}
1076
case 3: //vi2s
1077
{
1078
int elems = (GetNumVectorElements(sz) + 1) / 2;
1079
for (int i = 0; i < elems; i++) {
1080
u32 low = s[i * 2];
1081
u32 high = s[i * 2 + 1];
1082
low >>= 16;
1083
high >>= 16;
1084
d[i] = low | (high << 16);
1085
}
1086
switch (sz) {
1087
case V_Quad: oz = V_Pair; break;
1088
case V_Triple: oz = V_Pair; break;
1089
case V_Pair: oz = V_Single; break;
1090
case V_Single: oz = V_Single; break;
1091
default:
1092
_dbg_assert_msg_(0, "Trying to interpret instruction that can't be interpreted");
1093
oz = V_Single;
1094
break;
1095
}
1096
break;
1097
}
1098
default:
1099
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
1100
oz = V_Single;
1101
break;
1102
}
1103
// D prefix applies as expected.
1104
ApplyPrefixD(reinterpret_cast<float *>(d), oz);
1105
WriteVector(reinterpret_cast<float *>(d), oz, vd);
1106
PC += 4;
1107
EatPrefixes();
1108
}
1109
1110
void Int_ColorConv(MIPSOpcode op)
1111
{
1112
int vd = _VD;
1113
int vs = _VS;
1114
u32 s[4];
1115
VectorSize isz = GetVecSize(op);
1116
VectorSize sz = V_Quad;
1117
ReadVector(reinterpret_cast<float *>(s), sz, vs);
1118
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
1119
u16 colors[4];
1120
// TODO: Invalid swizzle values almost seem to use the last value converted in a
1121
// previous execution of these ops. It's a bit odd.
1122
for (int i = 0; i < 4; i++)
1123
{
1124
u32 in = s[i];
1125
u16 col = 0;
1126
switch ((op >> 16) & 3)
1127
{
1128
case 1: // 4444
1129
{
1130
int a = ((in >> 24) & 0xFF) >> 4;
1131
int b = ((in >> 16) & 0xFF) >> 4;
1132
int g = ((in >> 8) & 0xFF) >> 4;
1133
int r = ((in) & 0xFF) >> 4;
1134
col = (a << 12) | (b << 8) | (g << 4) | (r);
1135
break;
1136
}
1137
case 2: // 5551
1138
{
1139
int a = ((in >> 24) & 0xFF) >> 7;
1140
int b = ((in >> 16) & 0xFF) >> 3;
1141
int g = ((in >> 8) & 0xFF) >> 3;
1142
int r = ((in) & 0xFF) >> 3;
1143
col = (a << 15) | (b << 10) | (g << 5) | (r);
1144
break;
1145
}
1146
case 3: // 565
1147
{
1148
int b = ((in >> 16) & 0xFF) >> 3;
1149
int g = ((in >> 8) & 0xFF) >> 2;
1150
int r = ((in) & 0xFF) >> 3;
1151
col = (b << 11) | (g << 5) | (r);
1152
break;
1153
}
1154
}
1155
colors[i] = col;
1156
}
1157
u32 ov[2] = {(u32)colors[0] | (colors[1] << 16), (u32)colors[2] | (colors[3] << 16)};
1158
ApplyPrefixD(reinterpret_cast<float *>(ov), V_Pair);
1159
WriteVector((const float *)ov, isz == V_Single ? V_Single : V_Pair, vd);
1160
PC += 4;
1161
EatPrefixes();
1162
}
1163
1164
void Int_VDot(MIPSOpcode op) {
1165
float s[4]{}, t[4]{};
1166
union { float f; uint32_t u; } d;
1167
int vd = _VD;
1168
int vs = _VS;
1169
int vt = _VT;
1170
VectorSize sz = GetVecSize(op);
1171
ReadVector(s, sz, vs);
1172
ApplySwizzleS(s, V_Quad);
1173
ReadVector(t, sz, vt);
1174
ApplySwizzleT(t, V_Quad);
1175
1176
if (USE_VFPU_DOT) {
1177
d.f = vfpu_dot(s, t);
1178
if (my_isnan(d.f)) {
1179
d.u = 0x7f800001;
1180
} else if ((d.u & 0x7F800000) == 0) {
1181
d.u &= 0xFF800000;
1182
}
1183
} else {
1184
d.f = 0.0f;
1185
for (int i = 0; i < 4; i++) {
1186
d.f += s[i] * t[i];
1187
}
1188
}
1189
1190
ApplyPrefixD(&d.f, V_Single);
1191
WriteVector(&d.f, V_Single, vd);
1192
PC += 4;
1193
EatPrefixes();
1194
}
1195
1196
void Int_VHdp(MIPSOpcode op) {
1197
float s[4]{}, t[4]{};
1198
float d;
1199
int vd = _VD;
1200
int vs = _VS;
1201
int vt = _VT;
1202
VectorSize sz = GetVecSize(op);
1203
ReadVector(s, sz, vs);
1204
ReadVector(t, sz, vt);
1205
ApplySwizzleT(t, V_Quad);
1206
1207
// S prefix forces constant 1 for the last element (w for quad.)
1208
// Otherwise it is the same as vdot.
1209
u32 sprefixRemove;
1210
u32 sprefixAdd;
1211
if (sz == V_Quad) {
1212
sprefixRemove = VFPU_SWIZZLE(0, 0, 0, 3);
1213
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::NONE, VFPUConst::NONE, VFPUConst::ONE);
1214
} else if (sz == V_Triple) {
1215
sprefixRemove = VFPU_SWIZZLE(0, 0, 3, 0);
1216
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::NONE, VFPUConst::ONE, VFPUConst::NONE);
1217
} else if (sz == V_Pair) {
1218
sprefixRemove = VFPU_SWIZZLE(0, 3, 0, 0);
1219
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::ONE, VFPUConst::NONE, VFPUConst::NONE);
1220
} else {
1221
sprefixRemove = VFPU_SWIZZLE(3, 0, 0, 0);
1222
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::NONE, VFPUConst::NONE, VFPUConst::NONE);
1223
}
1224
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Quad);
1225
1226
float sum = 0.0f;
1227
if (USE_VFPU_DOT) {
1228
sum = vfpu_dot(s, t);
1229
} else {
1230
for (int i = 0; i < 4; i++) {
1231
sum += s[i] * t[i];
1232
}
1233
}
1234
d = my_isnan(sum) ? fabsf(sum) : sum;
1235
ApplyPrefixD(&d, V_Single);
1236
WriteVector(&d, V_Single, vd);
1237
PC += 4;
1238
EatPrefixes();
1239
}
1240
1241
void Int_Vbfy(MIPSOpcode op) {
1242
float s[4]{}, t[4]{}, d[4];
1243
int vd = _VD;
1244
int vs = _VS;
1245
VectorSize sz = GetVecSize(op);
1246
ReadVector(s, sz, vs);
1247
ReadVector(t, sz, vs);
1248
1249
if (op & 0x10000) {
1250
// vbfy2
1251
// S prefix forces the negate flags (so z and w are negative.)
1252
u32 sprefixAdd = VFPU_NEGATE(0, 0, 1, 1);
1253
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);
1254
1255
// T prefix forces swizzle (zwxy.)
1256
// That means negate still works, but constants are a bit weird.
1257
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1258
u32 tprefixAdd = VFPU_SWIZZLE(2, 3, 0, 1);
1259
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1260
1261
// Other sizes don't seem completely predictable.
1262
if (sz != V_Quad) {
1263
ERROR_LOG_REPORT_ONCE(vbfy2, Log::CPU, "vfby2 with incorrect size");
1264
}
1265
} else {
1266
// vbfy1
1267
// S prefix forces the negate flags (so y and w are negative.)
1268
u32 sprefixAdd = VFPU_NEGATE(0, 1, 0, 1);
1269
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);
1270
1271
// T prefix forces swizzle (yxwz.)
1272
// That means negate still works, but constants are a bit weird.
1273
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1274
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);
1275
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1276
1277
if (sz != V_Quad && sz != V_Pair) {
1278
ERROR_LOG_REPORT_ONCE(vbfy2, Log::CPU, "vfby1 with incorrect size");
1279
}
1280
}
1281
1282
d[0] = s[0] + t[0];
1283
d[1] = s[1] + t[1];
1284
d[2] = s[2] + t[2];
1285
d[3] = s[3] + t[3];
1286
1287
ApplyPrefixD(d, sz);
1288
WriteVector(d, sz, vd);
1289
PC += 4;
1290
EatPrefixes();
1291
}
1292
1293
void Int_Vsrt1(MIPSOpcode op) {
1294
float s[4], t[4], d[4];
1295
int vd = _VD;
1296
int vs = _VS;
1297
VectorSize sz = GetVecSize(op);
1298
ReadVector(s, sz, vs);
1299
ApplySwizzleS(s, sz);
1300
ReadVector(t, sz, vs);
1301
1302
// T is force swizzled to yxwz from S.
1303
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1304
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);
1305
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1306
1307
// TODO: May mishandle NAN / negative zero / etc.
1308
d[0] = std::min(s[0], t[0]);
1309
d[1] = std::max(s[1], t[1]);
1310
d[2] = std::min(s[2], t[2]);
1311
d[3] = std::max(s[3], t[3]);
1312
RetainInvalidSwizzleST(d, sz);
1313
ApplyPrefixD(d, sz);
1314
WriteVector(d, sz, vd);
1315
PC += 4;
1316
EatPrefixes();
1317
}
1318
1319
void Int_Vsrt2(MIPSOpcode op) {
1320
float s[4], t[4], d[4];
1321
int vd = _VD;
1322
int vs = _VS;
1323
VectorSize sz = GetVecSize(op);
1324
ReadVector(s, sz, vs);
1325
ApplySwizzleS(s, sz);
1326
ReadVector(t, sz, vs);
1327
1328
// T is force swizzled to wzyx from S.
1329
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1330
u32 tprefixAdd = VFPU_SWIZZLE(3, 2, 1, 0);
1331
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1332
1333
// TODO: May mishandle NAN / negative zero / etc.
1334
d[0] = std::min(s[0], t[0]);
1335
d[1] = std::min(s[1], t[1]);
1336
d[2] = std::max(s[2], t[2]);
1337
d[3] = std::max(s[3], t[3]);
1338
RetainInvalidSwizzleST(d, sz);
1339
ApplyPrefixD(d, sz);
1340
WriteVector(d, sz, vd);
1341
PC += 4;
1342
EatPrefixes();
1343
}
1344
1345
void Int_Vsrt3(MIPSOpcode op) {
1346
float s[4], t[4], d[4];
1347
int vd = _VD;
1348
int vs = _VS;
1349
VectorSize sz = GetVecSize(op);
1350
ReadVector(s, sz, vs);
1351
ApplySwizzleS(s, sz);
1352
ReadVector(t, sz, vs);
1353
1354
// T is force swizzled to yxwz from S.
1355
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1356
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);
1357
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1358
1359
// TODO: May mishandle NAN / negative zero / etc.
1360
d[0] = std::max(s[0], t[0]);
1361
d[1] = std::min(s[1], t[1]);
1362
d[2] = std::max(s[2], t[2]);
1363
d[3] = std::min(s[3], t[3]);
1364
RetainInvalidSwizzleST(d, sz);
1365
ApplyPrefixD(d, sz);
1366
WriteVector(d, sz, vd);
1367
PC += 4;
1368
EatPrefixes();
1369
}
1370
1371
void Int_Vsrt4(MIPSOpcode op) {
1372
float s[4], t[4], d[4];
1373
int vd = _VD;
1374
int vs = _VS;
1375
VectorSize sz = GetVecSize(op);
1376
ReadVector(s, sz, vs);
1377
ApplySwizzleS(s, sz);
1378
ReadVector(t, sz, vs);
1379
1380
// T is force swizzled to wzyx from S.
1381
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1382
u32 tprefixAdd = VFPU_SWIZZLE(3, 2, 1, 0);
1383
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1384
1385
// TODO: May mishandle NAN / negative zero / etc.
1386
d[0] = std::max(s[0], t[0]);
1387
d[1] = std::max(s[1], t[1]);
1388
d[2] = std::min(s[2], t[2]);
1389
d[3] = std::min(s[3], t[3]);
1390
RetainInvalidSwizzleST(d, sz);
1391
ApplyPrefixD(d, sz);
1392
WriteVector(d, sz, vd);
1393
PC += 4;
1394
EatPrefixes();
1395
}
1396
1397
void Int_Vcrs(MIPSOpcode op) {
1398
//half a cross product
1399
float s[4]{}, t[4]{}, d[4];
1400
int vd = _VD;
1401
int vs = _VS;
1402
int vt = _VT;
1403
VectorSize sz = GetVecSize(op);
1404
ReadVector(s, sz, vs);
1405
ReadVector(t, sz, vt);
1406
1407
// S prefix forces swizzle (yzx?.)
1408
// That means negate still works, but constants are a bit weird.
1409
u32 sprefixRemove = VFPU_SWIZZLE(3, 3, 3, 0);
1410
u32 sprefixAdd = VFPU_SWIZZLE(1, 2, 0, 0);
1411
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);
1412
1413
// T prefix forces swizzle (zxy?.)
1414
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 0);
1415
u32 tprefixAdd = VFPU_SWIZZLE(2, 0, 1, 0);
1416
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1417
1418
d[0] = s[0] * t[0];
1419
d[1] = s[1] * t[1];
1420
d[2] = s[2] * t[2];
1421
d[3] = s[3] * t[3];
1422
ApplyPrefixD(d, sz);
1423
WriteVector(d, sz, vd);
1424
PC += 4;
1425
EatPrefixes();
1426
}
1427
1428
void Int_Vdet(MIPSOpcode op) {
1429
float s[4]{}, t[4]{}, d[4];
1430
int vd = _VD;
1431
int vs = _VS;
1432
int vt = _VT;
1433
VectorSize sz = GetVecSize(op);
1434
// This is normally V_Pair. Unfilled s/t values are treated as zero.
1435
ReadVector(s, sz, vs);
1436
ApplySwizzleS(s, V_Quad);
1437
ReadVector(t, sz, vt);
1438
1439
// T prefix forces swizzle for x and y (yx??.)
1440
// That means negate still works, but constants are a bit weird.
1441
// Note: there is no forced negation here.
1442
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 0, 0);
1443
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 0, 0);
1444
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1445
1446
if (USE_VFPU_DOT) {
1447
s[1] = -s[1];
1448
d[0] = vfpu_dot(s, t);
1449
} else {
1450
d[0] = s[0] * t[0] - s[1] * t[1];
1451
d[0] += s[2] * t[2] + s[3] * t[3];
1452
}
1453
1454
ApplyPrefixD(d, V_Single);
1455
WriteVector(d, V_Single, vd);
1456
PC += 4;
1457
EatPrefixes();
1458
}
1459
1460
void Int_Vfad(MIPSOpcode op) {
1461
float s[4]{}, t[4]{};
1462
float d;
1463
int vd = _VD;
1464
int vs = _VS;
1465
VectorSize sz = GetVecSize(op);
1466
ReadVector(s, sz, vs);
1467
ApplySwizzleS(s, V_Quad);
1468
1469
// T prefix generates constants, but abs can change the constant.
1470
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1471
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
1472
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1473
1474
if (USE_VFPU_DOT) {
1475
d = vfpu_dot(s, t);
1476
} else {
1477
d = 0.0f;
1478
for (int i = 0; i < 4; i++) {
1479
d += s[i] * t[i];
1480
}
1481
}
1482
ApplyPrefixD(&d, V_Single);
1483
WriteVector(&d, V_Single, vd);
1484
PC += 4;
1485
EatPrefixes();
1486
}
1487
1488
void Int_Vavg(MIPSOpcode op) {
1489
float s[4]{}, t[4]{};
1490
float d;
1491
int vd = _VD;
1492
int vs = _VS;
1493
VectorSize sz = GetVecSize(op);
1494
ReadVector(s, sz, vs);
1495
ApplySwizzleS(s, V_Quad);
1496
1497
// T prefix generates constants, but supports negate.
1498
u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_ABS(1, 1, 1, 1);
1499
u32 tprefixAdd;
1500
if (sz == V_Single)
1501
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
1502
else if (sz == V_Pair)
1503
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF);
1504
else if (sz == V_Triple)
1505
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD);
1506
else if (sz == V_Quad)
1507
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH);
1508
else
1509
tprefixAdd = 0;
1510
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1511
1512
if (USE_VFPU_DOT) {
1513
d = vfpu_dot(s, t);
1514
} else {
1515
d = 0.0f;
1516
for (int i = 0; i < 4; i++) {
1517
d += s[i] * t[i];
1518
}
1519
}
1520
ApplyPrefixD(&d, V_Single);
1521
WriteVector(&d, V_Single, vd);
1522
PC += 4;
1523
EatPrefixes();
1524
}
1525
1526
void Int_VScl(MIPSOpcode op) {
1527
float s[4], t[4], d[4];
1528
int vd = _VD;
1529
int vs = _VS;
1530
int vt = _VT;
1531
VectorSize sz = GetVecSize(op);
1532
ReadVector(s, sz, vs);
1533
ApplySwizzleS(s, sz);
1534
1535
// T prefix forces swizzle (zzzz for some reason, so we force V_Quad.)
1536
// That means negate still works, but constants are a bit weird.
1537
int tlane = (vt >> 5) & 3;
1538
t[tlane] = V(vt);
1539
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1540
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
1541
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1542
1543
int n = GetNumVectorElements(sz);
1544
for (int i = 0; i < n; i++) {
1545
d[i] = s[i] * t[i];
1546
}
1547
ApplyPrefixD(d, sz);
1548
WriteVector(d, sz, vd);
1549
PC += 4;
1550
EatPrefixes();
1551
}
1552
1553
void Int_Vrnds(MIPSOpcode op) {
1554
int vd = _VD;
1555
int seed = VI(vd);
1556
// Swizzles apply a constant value, constants/abs/neg work to vary the seed.
1557
ApplySwizzleS(reinterpret_cast<float *>(&seed), V_Single);
1558
vrnd_init(uint32_t(seed), currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0);
1559
PC += 4;
1560
EatPrefixes();
1561
}
1562
1563
void Int_VrndX(MIPSOpcode op) {
1564
FloatBits d;
1565
int vd = _VD;
1566
VectorSize sz = GetVecSize(op);
1567
u32 n = GetNumVectorElements(sz);
1568
// Values are written in backwards order.
1569
for (int i = n - 1; i >= 0; i--) {
1570
switch ((op >> 16) & 0x1f) {
1571
case 1: d.u[i] = vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0); break; // vrndi
1572
case 2: d.u[i] = 0x3F800000 | (vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0) & 0x007FFFFF); break; // vrndf1 (>= 1, < 2)
1573
case 3: d.u[i] = 0x40000000 | (vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0) & 0x007FFFFF); break; // vrndf2 (>= 2, < 4)
1574
default: _dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
1575
}
1576
}
1577
// D prefix is broken and applies to the last element only (mask and sat.)
1578
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
1579
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
1580
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
1581
ApplyPrefixD(d.f, sz);
1582
WriteVector(d.f, sz, vd);
1583
PC += 4;
1584
EatPrefixes();
1585
}
1586
1587
// Generates one line of a rotation matrix around one of the three axes
1588
void Int_Vrot(MIPSOpcode op) {
1589
float d[4]{};
1590
int vd = _VD;
1591
int vs = _VS;
1592
int imm = (op >> 16) & 0x1f;
1593
VectorSize sz = GetVecSize(op);
1594
bool negSin = (imm & 0x10) != 0;
1595
int sineLane = (imm >> 2) & 3;
1596
int cosineLane = imm & 3;
1597
1598
float sine, cosine;
1599
if (currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] == 0x000E4) {
1600
vfpu_sincos(V(vs), sine, cosine);
1601
if (negSin)
1602
sine = -sine;
1603
} else {
1604
// Swizzle on S is a bit odd here, but generally only applies to sine.
1605
float s[4]{};
1606
ReadVector(s, V_Single, vs);
1607
u32 sprefixRemove = VFPU_NEGATE(1, 0, 0, 0);
1608
// We apply negSin later, not here. This handles zero a bit better.
1609
u32 sprefixAdd = VFPU_NEGATE(0, 0, 0, 0);
1610
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Single);
1611
1612
// Cosine ignores all prefixes, so take the original.
1613
cosine = vfpu_cos(V(vs));
1614
sine = vfpu_sin(s[0]);
1615
1616
if (negSin)
1617
sine = -sine;
1618
RetainInvalidSwizzleST(&sine, V_Single);
1619
}
1620
1621
if (sineLane == cosineLane) {
1622
for (int i = 0; i < 4; i++)
1623
d[i] = sine;
1624
} else {
1625
d[sineLane] = sine;
1626
}
1627
1628
if (((vd >> 2) & 7) == ((vs >> 2) & 7)) {
1629
u8 dregs[4]{};
1630
GetVectorRegs(dregs, sz, vd);
1631
// Calculate cosine based on sine/zero result.
1632
bool written = false;
1633
for (int i = 0; i < 4; i++) {
1634
if (vs == dregs[i]) {
1635
d[cosineLane] = vfpu_cos(d[i]);
1636
written = true;
1637
break;
1638
}
1639
}
1640
if (!written)
1641
d[cosineLane] = cosine;
1642
} else {
1643
d[cosineLane] = cosine;
1644
}
1645
1646
// D prefix works, just not for the cosine lane.
1647
uint32_t dprefixRemove = (3 << cosineLane) | (1 << (8 + cosineLane));
1648
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] &= 0xFFFFF ^ dprefixRemove;
1649
ApplyPrefixD(d, sz);
1650
WriteVector(d, sz, vd);
1651
PC += 4;
1652
EatPrefixes();
1653
}
1654
1655
void Int_Vtfm(MIPSOpcode op) {
1656
float s[16]{}, t[4]{};
1657
FloatBits d;
1658
int vd = _VD;
1659
int vs = _VS;
1660
int vt = _VT;
1661
int ins = (op >> 23) & 3;
1662
1663
VectorSize sz = (VectorSize)(ins + 1);
1664
MatrixSize msz = (MatrixSize)(ins + 1);
1665
int n = GetNumVectorElements(GetVecSize(op));
1666
1667
int tn = std::min(n, ins + 1);
1668
ReadMatrix(s, msz, vs);
1669
ReadVector(t, sz, vt);
1670
1671
if (USE_VFPU_DOT) {
1672
float t2[4];
1673
for (int i = 0; i < 4; i++) {
1674
if (i < tn) {
1675
t2[i] = t[i];
1676
} else if (i == ins) {
1677
t2[i] = 1.0f;
1678
} else {
1679
t2[i] = 0.0f;
1680
}
1681
}
1682
1683
for (int i = 0; i < ins; i++) {
1684
d.f[i] = vfpu_dot(&s[i * 4], t2);
1685
1686
if (my_isnan(d.f[i])) {
1687
d.u[i] = 0x7f800001;
1688
} else if ((d.u[i] & 0x7F800000) == 0) {
1689
d.u[i] &= 0xFF800000;
1690
}
1691
}
1692
} else {
1693
for (int i = 0; i < ins; i++) {
1694
d.f[i] = s[i * 4] * t[0];
1695
for (int k = 1; k < tn; k++) {
1696
d.f[i] += s[i * 4 + k] * t[k];
1697
}
1698
if (ins >= n) {
1699
d.f[i] += s[i * 4 + ins];
1700
}
1701
}
1702
}
1703
1704
// S and T prefixes apply for the final row only.
1705
// The T prefix is used to apply zero/one constants, but abs still changes it.
1706
ApplySwizzleS(&s[ins * 4], V_Quad);
1707
VFPUConst constX = VFPUConst::NONE;
1708
VFPUConst constY = n < 2 ? VFPUConst::ZERO : VFPUConst::NONE;
1709
VFPUConst constZ = n < 3 ? VFPUConst::ZERO : VFPUConst::NONE;
1710
VFPUConst constW = n < 4 ? VFPUConst::ZERO : VFPUConst::NONE;
1711
if (ins >= n) {
1712
if (ins == 1) {
1713
constY = VFPUConst::ONE;
1714
} else if (ins == 2) {
1715
constZ = VFPUConst::ONE;
1716
} else if (ins == 3) {
1717
constW = VFPUConst::ONE;
1718
}
1719
}
1720
u32 tprefixRemove = VFPU_SWIZZLE(0, n < 2 ? 3 : 0, n < 3 ? 3 : 0, n < 4 ? 3 : 0);
1721
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
1722
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1723
1724
// Really this is the operation all rows probably use (with constant wiring.)
1725
if (USE_VFPU_DOT) {
1726
d.f[ins] = vfpu_dot(&s[ins * 4], t);
1727
1728
if (my_isnan(d.f[ins])) {
1729
d.u[ins] = 0x7f800001;
1730
} else if ((d.u[ins] & 0x7F800000) == 0) {
1731
d.u[ins] &= 0xFF800000;
1732
}
1733
} else {
1734
d.f[ins] = s[ins * 4] * t[0];
1735
for (int k = 1; k < 4; k++) {
1736
d.f[ins] += s[ins * 4 + k] * t[k];
1737
}
1738
}
1739
1740
// D prefix applies to the last element only.
1741
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << ins;
1742
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (ins + ins);
1743
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
1744
ApplyPrefixD(d.f, sz);
1745
WriteVector(d.f, sz, vd);
1746
PC += 4;
1747
EatPrefixes();
1748
}
1749
1750
void Int_SV(MIPSOpcode op)
1751
{
1752
s32 imm = SignExtend16ToS32(op & 0xFFFC);
1753
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
1754
int rs = _RS;
1755
u32 addr = R(rs) + imm;
1756
1757
switch (op >> 26)
1758
{
1759
case 50: //lv.s
1760
VI(vt) = Memory::Read_U32(addr);
1761
break;
1762
case 58: //sv.s
1763
Memory::Write_U32(VI(vt), addr);
1764
break;
1765
default:
1766
_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
1767
break;
1768
}
1769
PC += 4;
1770
}
1771
1772
1773
void Int_Mftv(MIPSOpcode op)
1774
{
1775
int imm = op & 0xFF;
1776
int rt = _RT;
1777
switch ((op >> 21) & 0x1f)
1778
{
1779
case 3: //mfv / mfvc
1780
// rt = 0, imm = 255 appears to be used as a CPU interlock by some games.
1781
if (rt != 0) {
1782
if (imm < 128) {
1783
R(rt) = VI(imm);
1784
} else if (imm < 128 + VFPU_CTRL_MAX) { //mfvc
1785
R(rt) = currentMIPS->vfpuCtrl[imm - 128];
1786
} else {
1787
//ERROR - maybe need to make this value too an "interlock" value?
1788
_dbg_assert_msg_(false,"mfv - invalid register");
1789
}
1790
}
1791
break;
1792
1793
case 7: //mtv
1794
if (imm < 128) {
1795
VI(imm) = R(rt);
1796
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc
1797
u32 mask;
1798
if (GetVFPUCtrlMask(imm - 128, &mask)) {
1799
currentMIPS->vfpuCtrl[imm - 128] = R(rt) & mask;
1800
}
1801
} else {
1802
//ERROR
1803
_dbg_assert_msg_(false,"mtv - invalid register");
1804
}
1805
break;
1806
1807
default:
1808
_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
1809
break;
1810
}
1811
PC += 4;
1812
}
1813
1814
void Int_Vmfvc(MIPSOpcode op) {
1815
int vd = _VD;
1816
int imm = (op >> 8) & 0x7F;
1817
if (imm < VFPU_CTRL_MAX) {
1818
VI(vd) = currentMIPS->vfpuCtrl[imm];
1819
} else {
1820
VI(vd) = 0;
1821
}
1822
PC += 4;
1823
}
1824
1825
void Int_Vmtvc(MIPSOpcode op) {
1826
int vs = _VS;
1827
int imm = op & 0x7F;
1828
if (imm < VFPU_CTRL_MAX) {
1829
u32 mask;
1830
if (GetVFPUCtrlMask(imm, &mask)) {
1831
currentMIPS->vfpuCtrl[imm] = VI(vs) & mask;
1832
}
1833
}
1834
PC += 4;
1835
}
1836
1837
void Int_Vcst(MIPSOpcode op)
1838
{
1839
int conNum = (op >> 16) & 0x1f;
1840
int vd = _VD;
1841
1842
VectorSize sz = GetVecSize(op);
1843
float c = cst_constants[conNum];
1844
float temp[4] = {c,c,c,c};
1845
ApplyPrefixD(temp, sz);
1846
WriteVector(temp, sz, vd);
1847
PC += 4;
1848
EatPrefixes();
1849
}
1850
1851
void Int_Vcmp(MIPSOpcode op)
1852
{
1853
int vs = _VS;
1854
int vt = _VT;
1855
int cond = op & 0xf;
1856
VectorSize sz = GetVecSize(op);
1857
int n = GetNumVectorElements(sz);
1858
float s[4];
1859
float t[4];
1860
ReadVector(s, sz, vs);
1861
ApplySwizzleS(s, sz);
1862
ReadVector(t, sz, vt);
1863
ApplySwizzleT(t, sz);
1864
int cc = 0;
1865
int or_val = 0;
1866
int and_val = 1;
1867
int affected_bits = (1 << 4) | (1 << 5); // 4 and 5
1868
for (int i = 0; i < n; i++)
1869
{
1870
int c;
1871
// These set c to 0 or 1, nothing else.
1872
switch (cond)
1873
{
1874
case VC_FL: c = 0; break;
1875
case VC_EQ: c = s[i] == t[i]; break;
1876
case VC_LT: c = s[i] < t[i]; break;
1877
case VC_LE: c = s[i] <= t[i]; break;
1878
1879
case VC_TR: c = 1; break;
1880
case VC_NE: c = s[i] != t[i]; break;
1881
case VC_GE: c = s[i] >= t[i]; break;
1882
case VC_GT: c = s[i] > t[i]; break;
1883
1884
case VC_EZ: c = s[i] == 0.0f || s[i] == -0.0f; break;
1885
case VC_EN: c = my_isnan(s[i]); break;
1886
case VC_EI: c = my_isinf(s[i]); break;
1887
case VC_ES: c = my_isnanorinf(s[i]); break; // Tekken Dark Resurrection
1888
1889
case VC_NZ: c = s[i] != 0; break;
1890
case VC_NN: c = !my_isnan(s[i]); break;
1891
case VC_NI: c = !my_isinf(s[i]); break;
1892
case VC_NS: c = !(my_isnanorinf(s[i])); break; // How about t[i] ?
1893
1894
default:
1895
_dbg_assert_msg_(false,"Unsupported vcmp condition code %d", cond);
1896
PC += 4;
1897
EatPrefixes();
1898
return;
1899
}
1900
cc |= (c<<i);
1901
or_val |= c;
1902
and_val &= c;
1903
affected_bits |= 1 << i;
1904
}
1905
// Use masking to only change the affected bits
1906
currentMIPS->vfpuCtrl[VFPU_CTRL_CC] =
1907
(currentMIPS->vfpuCtrl[VFPU_CTRL_CC] & ~affected_bits) |
1908
((cc | (or_val << 4) | (and_val << 5)) & affected_bits);
1909
PC += 4;
1910
EatPrefixes();
1911
}
1912
1913
void Int_Vminmax(MIPSOpcode op) {
1914
FloatBits s, t, d;
1915
int vt = _VT;
1916
int vs = _VS;
1917
int vd = _VD;
1918
int cond = op&15;
1919
VectorSize sz = GetVecSize(op);
1920
int numElements = GetNumVectorElements(sz);
1921
1922
ReadVector(s.f, sz, vs);
1923
ApplySwizzleS(s.f, sz);
1924
ReadVector(t.f, sz, vt);
1925
ApplySwizzleT(t.f, sz);
1926
1927
// If both are zero, take t's sign.
1928
// Otherwise: -NAN < -INF < real < INF < NAN (higher mantissa is farther from 0.)
1929
1930
switch ((op >> 23) & 3) {
1931
case 2: // vmin
1932
for (int i = 0; i < numElements; i++) {
1933
if (my_isnanorinf(s.f[i]) || my_isnanorinf(t.f[i])) {
1934
// If both are negative, we flip the comparison (not two's compliment.)
1935
if (s.i[i] < 0 && t.i[i] < 0) {
1936
// If at least one side is NAN, we take the highest mantissa bits.
1937
d.i[i] = std::max(t.i[i], s.i[i]);
1938
} else {
1939
// Otherwise, we take the lowest value (negative or lowest mantissa.)
1940
d.i[i] = std::min(t.i[i], s.i[i]);
1941
}
1942
} else {
1943
d.f[i] = std::min(t.f[i], s.f[i]);
1944
}
1945
}
1946
break;
1947
case 3: // vmax
1948
for (int i = 0; i < numElements; i++) {
1949
// This is the same logic as vmin, just reversed.
1950
if (my_isnanorinf(s.f[i]) || my_isnanorinf(t.f[i])) {
1951
if (s.i[i] < 0 && t.i[i] < 0) {
1952
d.i[i] = std::min(t.i[i], s.i[i]);
1953
} else {
1954
d.i[i] = std::max(t.i[i], s.i[i]);
1955
}
1956
} else {
1957
d.f[i] = std::max(t.f[i], s.f[i]);
1958
}
1959
}
1960
break;
1961
default:
1962
_dbg_assert_msg_(false,"unknown min/max op %d", cond);
1963
PC += 4;
1964
EatPrefixes();
1965
return;
1966
}
1967
RetainInvalidSwizzleST(d.f, sz);
1968
ApplyPrefixD(d.f, sz);
1969
WriteVector(d.f, sz, vd);
1970
PC += 4;
1971
EatPrefixes();
1972
}
1973
1974
void Int_Vscmp(MIPSOpcode op) {
1975
FloatBits s, t, d;
1976
int vt = _VT;
1977
int vs = _VS;
1978
int vd = _VD;
1979
VectorSize sz = GetVecSize(op);
1980
ReadVector(s.f, sz, vs);
1981
ApplySwizzleS(s.f, sz);
1982
ReadVector(t.f, sz, vt);
1983
ApplySwizzleT(t.f, sz);
1984
int n = GetNumVectorElements(sz);
1985
for (int i = 0; i < n ; i++) {
1986
float a = s.f[i] - t.f[i];
1987
if (my_isnan(a)) {
1988
// NAN/INF are treated as just larger numbers, as in vmin/vmax.
1989
int sMagnitude = s.u[i] & 0x7FFFFFFF;
1990
int tMagnitude = t.u[i] & 0x7FFFFFFF;
1991
int b = (s.i[i] < 0 ? -sMagnitude : sMagnitude) - (t.i[i] < 0 ? -tMagnitude : tMagnitude);
1992
d.f[i] = (float)((0 < b) - (b < 0));
1993
} else {
1994
d.f[i] = (float)((0.0f < a) - (a < 0.0f));
1995
}
1996
}
1997
RetainInvalidSwizzleST(d.f, sz);
1998
ApplyPrefixD(d.f, sz);
1999
WriteVector(d.f, sz, vd);
2000
PC += 4;
2001
EatPrefixes();
2002
}
2003
2004
void Int_Vsge(MIPSOpcode op) {
2005
float s[4], t[4], d[4];
2006
int vt = _VT;
2007
int vs = _VS;
2008
int vd = _VD;
2009
VectorSize sz = GetVecSize(op);
2010
int numElements = GetNumVectorElements(sz);
2011
ReadVector(s, sz, vs);
2012
ApplySwizzleS(s, sz);
2013
ReadVector(t, sz, vt);
2014
ApplySwizzleT(t, sz);
2015
for (int i = 0; i < numElements; i++) {
2016
if ( my_isnan(s[i]) || my_isnan(t[i]) )
2017
d[i] = 0.0f;
2018
else
2019
d[i] = s[i] >= t[i] ? 1.0f : 0.0f;
2020
}
2021
RetainInvalidSwizzleST(d, sz);
2022
// The clamp cannot matter, so skip it.
2023
ApplyPrefixD(d, sz, true);
2024
WriteVector(d, sz, vd);
2025
PC += 4;
2026
EatPrefixes();
2027
}
2028
2029
void Int_Vslt(MIPSOpcode op) {
2030
float s[4], t[4], d[4];
2031
int vt = _VT;
2032
int vs = _VS;
2033
int vd = _VD;
2034
VectorSize sz = GetVecSize(op);
2035
int numElements = GetNumVectorElements(sz);
2036
ReadVector(s, sz, vs);
2037
ApplySwizzleS(s, sz);
2038
ReadVector(t, sz, vt);
2039
ApplySwizzleT(t, sz);
2040
for (int i = 0; i < numElements; i++) {
2041
if ( my_isnan(s[i]) || my_isnan(t[i]) )
2042
d[i] = 0.0f;
2043
else
2044
d[i] = s[i] < t[i] ? 1.0f : 0.0f;
2045
}
2046
RetainInvalidSwizzleST(d, sz);
2047
// The clamp cannot matter, so skip it.
2048
ApplyPrefixD(d, sz, true);
2049
WriteVector(d, sz, vd);
2050
PC += 4;
2051
EatPrefixes();
2052
}
2053
2054
2055
void Int_Vcmov(MIPSOpcode op) {
2056
int vs = _VS;
2057
int vd = _VD;
2058
int tf = (op >> 19) & 1;
2059
int imm3 = (op >> 16) & 7;
2060
VectorSize sz = GetVecSize(op);
2061
int n = GetNumVectorElements(sz);
2062
float s[4];
2063
float d[4];
2064
ReadVector(s, sz, vs);
2065
ApplySwizzleS(s, sz);
2066
// Not only is D read (as T), but the T prefix applies to it.
2067
ReadVector(d, sz, vd);
2068
ApplySwizzleT(d, sz);
2069
2070
int CC = currentMIPS->vfpuCtrl[VFPU_CTRL_CC];
2071
2072
if (imm3 < 6) {
2073
if (((CC >> imm3) & 1) == !tf) {
2074
for (int i = 0; i < n; i++)
2075
d[i] = s[i];
2076
}
2077
} else if (imm3 == 6) {
2078
for (int i = 0; i < n; i++) {
2079
if (((CC >> i) & 1) == !tf)
2080
d[i] = s[i];
2081
}
2082
} else {
2083
ERROR_LOG_REPORT(Log::CPU, "Bad Imm3 in cmov: %d", imm3);
2084
}
2085
ApplyPrefixD(d, sz);
2086
WriteVector(d, sz, vd);
2087
PC += 4;
2088
EatPrefixes();
2089
}
2090
2091
void Int_VecDo3(MIPSOpcode op) {
2092
float s[4], t[4];
2093
FloatBits d;
2094
int vd = _VD;
2095
int vs = _VS;
2096
int vt = _VT;
2097
VectorSize sz = GetVecSize(op);
2098
2099
int optype = 0;
2100
switch (op >> 26) {
2101
case 24: //VFPU0
2102
switch ((op >> 23) & 7) {
2103
case 0: optype = 0; break;
2104
case 1: optype = 1; break;
2105
case 7: optype = 7; break;
2106
default: goto bad;
2107
}
2108
break;
2109
case 25: //VFPU1
2110
switch ((op >> 23) & 7) {
2111
case 0: optype = 8; break;
2112
default: goto bad;
2113
}
2114
break;
2115
default:
2116
bad:
2117
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
2118
break;
2119
}
2120
2121
u32 n = GetNumVectorElements(sz);
2122
ReadVector(s, sz, vs);
2123
ReadVector(t, sz, vt);
2124
if (optype != 7) {
2125
ApplySwizzleS(s, sz);
2126
ApplySwizzleT(t, sz);
2127
} else {
2128
// The prefix handling of S/T is a bit odd, probably the HW doesn't do it in parallel.
2129
// The X prefix is applied to the last element in sz.
2130
// TODO: This doesn't match exactly for a swizzle past x in some cases...
2131
ApplySwizzleS(&s[n - 1], V_Single, -INFINITY);
2132
ApplySwizzleT(&t[n - 1], V_Single, -INFINITY);
2133
}
2134
2135
for (int i = 0; i < (int)n; i++) {
2136
switch (optype) {
2137
case 0: d.f[i] = s[i] + t[i]; break; //vadd
2138
case 1: d.f[i] = s[i] - t[i]; break; //vsub
2139
case 7: d.f[i] = s[i] / t[i]; break; //vdiv
2140
case 8: d.f[i] = s[i] * t[i]; break; //vmul
2141
}
2142
2143
if (USE_VFPU_DOT) {
2144
if (my_isnan(d.f[i])) {
2145
d.u[i] = (d.u[i] & 0xff800001) | 1;
2146
} else if ((d.u[i] & 0x7F800000) == 0) {
2147
d.u[i] &= 0xFF800000;
2148
}
2149
}
2150
}
2151
2152
// For vdiv only, the D prefix only applies mask (and like S/T, x applied to last.)
2153
if (optype == 7) {
2154
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
2155
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
2156
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
2157
ApplyPrefixD(d.f, sz);
2158
} else {
2159
RetainInvalidSwizzleST(d.f, sz);
2160
ApplyPrefixD(d.f, sz);
2161
}
2162
WriteVector(d.f, sz, vd);
2163
PC += 4;
2164
EatPrefixes();
2165
}
2166
2167
void Int_CrossQuat(MIPSOpcode op) {
2168
float s[4]{}, t[4]{}, d[4];
2169
int vd = _VD;
2170
int vs = _VS;
2171
int vt = _VT;
2172
VectorSize sz = GetVecSize(op);
2173
u32 n = GetNumVectorElements(sz);
2174
ReadVector(s, sz, vs);
2175
ReadVector(t, sz, vt);
2176
2177
u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_NEGATE(1, 1, 1, 1);
2178
u32 tprefixAdd;
2179
2180
switch (sz) {
2181
case V_Triple: // vcrsp.t
2182
{
2183
if (USE_VFPU_DOT) {
2184
float t0[4] = { 0.0f, t[2], -t[1], 0.0f };
2185
float t1[4] = { -t[2], 0.0f, t[0], 0.0f };
2186
d[0] = vfpu_dot(s, t0);
2187
d[1] = vfpu_dot(s, t1);
2188
} else {
2189
d[0] = s[1] * t[2] - s[2] * t[1];
2190
d[1] = s[2] * t[0] - s[0] * t[2];
2191
}
2192
2193
// T prefix forces swizzle and negate, can be used to have weird constants.
2194
tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2) | VFPU_NEGATE(0, 1, 0, 0);
2195
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
2196
ApplySwizzleS(s, V_Quad);
2197
if (USE_VFPU_DOT) {
2198
// TODO: But flush any infs to 0? This seems sketchy.
2199
for (int i = 0; i < 4; ++i) {
2200
if (my_isinf(s[i]))
2201
s[i] = 0.0f;
2202
if (my_isinf(t[i]))
2203
t[i] = 0.0f;
2204
}
2205
d[2] = vfpu_dot(s, t);
2206
} else {
2207
d[2] = s[0] * t[0] + s[1] * t[1] + s[2] * t[2] + s[3] * t[3];
2208
}
2209
break;
2210
}
2211
2212
case V_Quad: // vqmul.q
2213
{
2214
if (USE_VFPU_DOT) {
2215
float t0[4] = { t[3], t[2], -t[1], t[0] };
2216
float t1[4] = { -t[2], t[3], t[0], t[1] };
2217
float t2[4] = { t[1], -t[0], t[3], t[2] };
2218
d[0] = vfpu_dot(s, t0);
2219
d[1] = vfpu_dot(s, t1);
2220
d[2] = vfpu_dot(s, t2);
2221
} else {
2222
d[0] = s[0] * t[3] + s[1] * t[2] - s[2] * t[1] + s[3] * t[0];
2223
d[1] = -s[0] * t[2] + s[1] * t[3] + s[2] * t[0] + s[3] * t[1];
2224
d[2] = s[0] * t[1] - s[1] * t[0] + s[2] * t[3] + s[3] * t[2];
2225
}
2226
2227
// T prefix forces swizzle and negate, can be used to have weird constants.
2228
tprefixAdd = VFPU_SWIZZLE(0, 1, 2, 3) | VFPU_NEGATE(1, 1, 1, 0);
2229
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
2230
ApplySwizzleS(s, sz);
2231
if (USE_VFPU_DOT)
2232
d[3] = vfpu_dot(s, t);
2233
else
2234
d[3] = s[0] * t[0] + s[1] * t[1] + s[2] * t[2] + s[3] * t[3];
2235
break;
2236
}
2237
2238
case V_Pair:
2239
// t swizzles invalid so the multiply is always zero.
2240
d[0] = 0;
2241
2242
tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0) | VFPU_NEGATE(0, 0, 0, 0);
2243
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
2244
ApplySwizzleS(s, V_Quad);
2245
// It's possible to populate a value by swizzling s[2].
2246
d[1] = s[2] * t[2];
2247
break;
2248
2249
case V_Single:
2250
// t swizzles invalid so the multiply is always zero.
2251
d[0] = 0;
2252
break;
2253
2254
default:
2255
ERROR_LOG_REPORT(Log::CPU, "vcrsp/vqmul with invalid elements");
2256
break;
2257
}
2258
2259
// D prefix applies to the last element only (mask and sat) for pair and larger.
2260
if (sz != V_Single) {
2261
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
2262
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
2263
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
2264
ApplyPrefixD(d, sz);
2265
} else {
2266
// Single always seems to write out zero.
2267
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = 0;
2268
}
2269
WriteVector(d, sz, vd);
2270
PC += 4;
2271
EatPrefixes();
2272
}
2273
2274
void Int_Vlgb(MIPSOpcode op) {
2275
// Vector log binary (extract exponent)
2276
FloatBits d, s;
2277
int vd = _VD;
2278
int vs = _VS;
2279
VectorSize sz = GetVecSize(op);
2280
2281
ReadVector(s.f, sz, vs);
2282
ApplySwizzleS(s.f, sz);
2283
2284
int exp = (s.u[0] & 0x7F800000) >> 23;
2285
if (exp == 0xFF) {
2286
d.f[0] = s.f[0];
2287
} else if (exp == 0) {
2288
d.f[0] = -INFINITY;
2289
} else {
2290
d.f[0] = (float)(exp - 127);
2291
}
2292
2293
// If sz is greater than V_Single, the rest are copied unchanged.
2294
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2295
d.u[i] = s.u[i];
2296
}
2297
2298
RetainInvalidSwizzleST(d.f, sz);
2299
ApplyPrefixD(d.f, sz);
2300
WriteVector(d.f, sz, vd);
2301
PC += 4;
2302
EatPrefixes();
2303
}
2304
2305
void Int_Vwbn(MIPSOpcode op) {
2306
FloatBits d, s;
2307
int vd = _VD;
2308
int vs = _VS;
2309
VectorSize sz = GetVecSize(op);
2310
u8 exp = (u8)((op >> 16) & 0xFF);
2311
2312
ReadVector(s.f, sz, vs);
2313
ApplySwizzleS(s.f, sz);
2314
2315
u32 sigbit = s.u[0] & 0x80000000;
2316
u32 prevExp = (s.u[0] & 0x7F800000) >> 23;
2317
u32 mantissa = (s.u[0] & 0x007FFFFF) | 0x00800000;
2318
if (prevExp != 0xFF && prevExp != 0) {
2319
if (exp > prevExp) {
2320
s8 shift = (exp - prevExp) & 0xF;
2321
mantissa = mantissa >> shift;
2322
} else {
2323
s8 shift = (prevExp - exp) & 0xF;
2324
mantissa = mantissa << shift;
2325
}
2326
d.u[0] = sigbit | (mantissa & 0x007FFFFF) | (exp << 23);
2327
} else {
2328
d.u[0] = s.u[0] | (exp << 23);
2329
}
2330
2331
// If sz is greater than V_Single, the rest are copied unchanged.
2332
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2333
d.u[i] = s.u[i];
2334
}
2335
2336
RetainInvalidSwizzleST(d.f, sz);
2337
ApplyPrefixD(d.f, sz);
2338
WriteVector(d.f, sz, vd);
2339
PC += 4;
2340
EatPrefixes();
2341
}
2342
2343
void Int_Vsbn(MIPSOpcode op) {
2344
FloatBits d, s, t;
2345
int vd = _VD;
2346
int vs = _VS;
2347
int vt = _VT;
2348
VectorSize sz = GetVecSize(op);
2349
2350
ReadVector(s.f, sz, vs);
2351
ApplySwizzleS(s.f, sz);
2352
ReadVector(t.f, sz, vt);
2353
ApplySwizzleT(t.f, sz);
2354
// Swizzle does apply to the value read as an integer.
2355
u8 exp = (u8)(127 + t.i[0]);
2356
2357
// Simply replace the exponent bits.
2358
u32 prev = s.u[0] & 0x7F800000;
2359
if (prev != 0 && prev != 0x7F800000) {
2360
d.u[0] = (s.u[0] & ~0x7F800000) | (exp << 23);
2361
} else {
2362
d.u[0] = s.u[0];
2363
}
2364
2365
// If sz is greater than V_Single, the rest are copied unchanged.
2366
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2367
d.u[i] = s.u[i];
2368
}
2369
2370
ApplyPrefixD(d.f, sz);
2371
WriteVector(d.f, sz, vd);
2372
PC += 4;
2373
EatPrefixes();
2374
}
2375
2376
void Int_Vsbz(MIPSOpcode op) {
2377
// Vector scale by zero (set exp to 0 to extract mantissa)
2378
FloatBits d, s;
2379
int vd = _VD;
2380
int vs = _VS;
2381
VectorSize sz = GetVecSize(op);
2382
2383
ReadVector(s.f, sz, vs);
2384
ApplySwizzleS(s.f, sz);
2385
2386
// NAN and denormals pass through.
2387
if (my_isnan(s.f[0]) || (s.u[0] & 0x7F800000) == 0) {
2388
d.u[0] = s.u[0];
2389
} else {
2390
d.u[0] = (127 << 23) | (s.u[0] & 0x007FFFFF);
2391
}
2392
2393
// If sz is greater than V_Single, the rest are copied unchanged.
2394
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2395
d.u[i] = s.u[i];
2396
}
2397
2398
ApplyPrefixD(d.f, sz);
2399
WriteVector(d.f, sz, vd);
2400
PC += 4;
2401
EatPrefixes();
2402
}
2403
}
2404
2405