Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/IR/IRInterpreter.cpp
3187 views
1
#include <algorithm>
2
#include <cmath>
3
4
#include "ppsspp_config.h"
5
#include "Common/BitSet.h"
6
#include "Common/BitScan.h"
7
#include "Common/Common.h"
8
#include "Common/Data/Convert/SmallDataConvert.h"
9
#include "Common/Math/math_util.h"
10
#include "Common/Math/SIMDHeaders.h"
11
#include "Core/Core.h"
12
#include "Core/CoreTiming.h"
13
#include "Core/Debugger/Breakpoints.h"
14
#include "Core/HLE/HLE.h"
15
#include "Core/HLE/ReplaceTables.h"
16
#include "Core/MemMap.h"
17
#include "Core/MIPS/MIPS.h"
18
#include "Core/MIPS/MIPSTables.h"
19
#include "Core/MIPS/MIPSVFPUUtils.h"
20
#include "Core/MIPS/IR/IRInst.h"
21
#include "Core/MIPS/IR/IRInterpreter.h"
22
#include "Core/System.h"
23
#include "Core/MIPS/MIPSTracer.h"
24
25
#ifdef mips
26
// Why do MIPS compilers define something so generic? Try to keep defined, at least...
27
#undef mips
28
#define mips mips
29
#endif
30
31
alignas(16) static const float vec4InitValues[8][4] = {
32
{ 0.0f, 0.0f, 0.0f, 0.0f },
33
{ 1.0f, 1.0f, 1.0f, 1.0f },
34
{ -1.0f, -1.0f, -1.0f, -1.0f },
35
{ 1.0f, 0.0f, 0.0f, 0.0f },
36
{ 0.0f, 1.0f, 0.0f, 0.0f },
37
{ 0.0f, 0.0f, 1.0f, 0.0f },
38
{ 0.0f, 0.0f, 0.0f, 1.0f },
39
};
40
41
alignas(16) static const uint32_t signBits[4] = {
42
0x80000000, 0x80000000, 0x80000000, 0x80000000,
43
};
44
45
alignas(16) static const uint32_t noSignMask[4] = {
46
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
47
};
48
49
alignas(16) static const uint32_t lowBytesMask[4] = {
50
0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF,
51
};
52
53
u32 IRRunBreakpoint(u32 pc) {
54
// Should we skip this breakpoint?
55
uint32_t skipFirst = g_breakpoints.CheckSkipFirst();
56
if (skipFirst == pc || skipFirst == currentMIPS->pc)
57
return 0;
58
59
// Did we already hit one?
60
if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME)
61
return 1;
62
63
g_breakpoints.ExecBreakPoint(pc);
64
return coreState != CORE_RUNNING_CPU ? 1 : 0;
65
}
66
67
u32 IRRunMemCheck(u32 pc, u32 addr) {
68
// Should we skip this breakpoint?
69
uint32_t skipFirst = g_breakpoints.CheckSkipFirst();
70
if (skipFirst == pc || skipFirst == currentMIPS->pc)
71
return 0;
72
73
// Did we already hit one?
74
if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME)
75
return 1;
76
77
g_breakpoints.ExecOpMemCheck(addr, pc);
78
return coreState != CORE_RUNNING_CPU ? 1 : 0;
79
}
80
81
void IRApplyRounding(MIPSState *mips) {
82
u32 fcr1Bits = mips->fcr31 & 0x01000003;
83
// If these are 0, we just leave things as they are.
84
if (fcr1Bits) {
85
int rmode = fcr1Bits & 3;
86
bool ftz = (fcr1Bits & 0x01000000) != 0;
87
#if PPSSPP_ARCH(SSE2)
88
u32 csr = _mm_getcsr() & ~0x6000;
89
// Translate the rounding mode bits to X86, the same way as in Asm.cpp.
90
if (rmode & 1) {
91
rmode ^= 2;
92
}
93
csr |= rmode << 13;
94
95
if (ftz) {
96
// Flush to zero
97
csr |= 0x8000;
98
}
99
_mm_setcsr(csr);
100
#elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
101
// On ARM64 we need to use inline assembly for a portable solution.
102
// Unfortunately we don't have this possibility on Windows with MSVC, so ifdeffed out above.
103
// Note that in the JIT, for fcvts, we use specific conversions. We could use the FCVTS variants
104
// directly through inline assembly.
105
u64 fpcr; // not really 64-bit, just to match the register size.
106
asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
107
108
// Translate MIPS to ARM rounding mode
109
static const u8 lookup[4] = {0, 3, 1, 2};
110
111
fpcr &= ~(3 << 22); // Clear bits [23:22]
112
fpcr |= (lookup[rmode] << 22);
113
114
if (ftz) {
115
fpcr |= 1 << 24;
116
}
117
// Write back the modified FPCR
118
asm volatile ("msr fpcr, %0" : : "r" (fpcr));
119
#endif
120
}
121
}
122
123
void IRRestoreRounding() {
124
#if PPSSPP_ARCH(SSE2)
125
// TODO: We should avoid this if we didn't apply rounding in the first place.
126
// In the meantime, clear out FTZ and rounding mode bits.
127
u32 csr = _mm_getcsr();
128
csr &= ~(7 << 13);
129
_mm_setcsr(csr);
130
#elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
131
u64 fpcr; // not really 64-bit, just to match the regsiter size.
132
asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
133
fpcr &= ~(7 << 22); // Clear bits [23:22] for rounding, 24 for FTZ
134
// Write back the modified FPCR
135
asm volatile ("msr fpcr, %0" : : "r" (fpcr));
136
#endif
137
}
138
139
// We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
140
u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
141
while (true) {
142
switch (inst->op) {
143
case IROp::SetConst:
144
mips->r[inst->dest] = inst->constant;
145
break;
146
case IROp::SetConstF:
147
memcpy(&mips->f[inst->dest], &inst->constant, 4);
148
break;
149
case IROp::Add:
150
mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2];
151
break;
152
case IROp::Sub:
153
mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2];
154
break;
155
case IROp::And:
156
mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2];
157
break;
158
case IROp::Or:
159
mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2];
160
break;
161
case IROp::Xor:
162
mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2];
163
break;
164
case IROp::Mov:
165
mips->r[inst->dest] = mips->r[inst->src1];
166
break;
167
case IROp::AddConst:
168
mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
169
break;
170
case IROp::OptAddConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
171
mips->r[inst->dest] += inst->constant;
172
break;
173
case IROp::SubConst:
174
mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
175
break;
176
case IROp::AndConst:
177
mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
178
break;
179
case IROp::OptAndConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
180
mips->r[inst->dest] &= inst->constant;
181
break;
182
case IROp::OrConst:
183
mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
184
break;
185
case IROp::OptOrConst:
186
mips->r[inst->dest] |= inst->constant;
187
break;
188
case IROp::XorConst:
189
mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
190
break;
191
case IROp::Neg:
192
mips->r[inst->dest] = (u32)(-(s32)mips->r[inst->src1]);
193
break;
194
case IROp::Not:
195
mips->r[inst->dest] = ~mips->r[inst->src1];
196
break;
197
case IROp::Ext8to32:
198
mips->r[inst->dest] = SignExtend8ToU32(mips->r[inst->src1]);
199
break;
200
case IROp::Ext16to32:
201
mips->r[inst->dest] = SignExtend16ToU32(mips->r[inst->src1]);
202
break;
203
case IROp::ReverseBits:
204
mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
205
break;
206
207
case IROp::Load8:
208
mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
209
break;
210
case IROp::Load8Ext:
211
mips->r[inst->dest] = SignExtend8ToU32(Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant));
212
break;
213
case IROp::Load16:
214
mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
215
break;
216
case IROp::Load16Ext:
217
mips->r[inst->dest] = SignExtend16ToU32(Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant));
218
break;
219
case IROp::Load32:
220
mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
221
break;
222
case IROp::Load32Left:
223
{
224
u32 addr = mips->r[inst->src1] + inst->constant;
225
u32 shift = (addr & 3) * 8;
226
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
227
u32 destMask = 0x00ffffff >> shift;
228
mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem << (24 - shift));
229
break;
230
}
231
case IROp::Load32Right:
232
{
233
u32 addr = mips->r[inst->src1] + inst->constant;
234
u32 shift = (addr & 3) * 8;
235
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
236
u32 destMask = 0xffffff00 << (24 - shift);
237
mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem >> shift);
238
break;
239
}
240
case IROp::Load32Linked:
241
if (inst->dest != MIPS_REG_ZERO)
242
mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
243
mips->llBit = 1;
244
break;
245
case IROp::LoadFloat:
246
mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant);
247
break;
248
249
case IROp::Store8:
250
Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
251
break;
252
case IROp::Store16:
253
Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
254
break;
255
case IROp::Store32:
256
Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
257
break;
258
case IROp::Store32Left:
259
{
260
u32 addr = mips->r[inst->src1] + inst->constant;
261
u32 shift = (addr & 3) * 8;
262
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
263
u32 memMask = 0xffffff00 << shift;
264
u32 result = (mips->r[inst->src3] >> (24 - shift)) | (mem & memMask);
265
Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
266
break;
267
}
268
case IROp::Store32Right:
269
{
270
u32 addr = mips->r[inst->src1] + inst->constant;
271
u32 shift = (addr & 3) * 8;
272
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
273
u32 memMask = 0x00ffffff >> (24 - shift);
274
u32 result = (mips->r[inst->src3] << shift) | (mem & memMask);
275
Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
276
break;
277
}
278
case IROp::Store32Conditional:
279
if (mips->llBit) {
280
Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
281
if (inst->dest != MIPS_REG_ZERO) {
282
mips->r[inst->dest] = 1;
283
}
284
} else if (inst->dest != MIPS_REG_ZERO) {
285
mips->r[inst->dest] = 0;
286
}
287
break;
288
case IROp::StoreFloat:
289
Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant);
290
break;
291
292
case IROp::LoadVec4:
293
{
294
u32 base = mips->r[inst->src1] + inst->constant;
295
// This compiles to a nice SSE load/store on x86, and hopefully similar on ARM.
296
memcpy(&mips->f[inst->dest], Memory::GetPointerUnchecked(base), 4 * 4);
297
break;
298
}
299
case IROp::StoreVec4:
300
{
301
u32 base = mips->r[inst->src1] + inst->constant;
302
memcpy((float *)Memory::GetPointerUnchecked(base), &mips->f[inst->dest], 4 * 4);
303
break;
304
}
305
306
case IROp::Vec4Init:
307
{
308
memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float));
309
break;
310
}
311
312
case IROp::Vec4Shuffle:
313
{
314
// Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
315
// or a big switch - there are only 256 shuffles possible (4^4)
316
float temp[4];
317
for (int i = 0; i < 4; i++)
318
temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
319
const int dest = inst->dest;
320
for (int i = 0; i < 4; i++)
321
mips->f[dest + i] = temp[i];
322
break;
323
}
324
325
case IROp::Vec4Blend:
326
{
327
const int dest = inst->dest;
328
const int src1 = inst->src1;
329
const int src2 = inst->src2;
330
const int constant = inst->constant;
331
// 90% of calls to this is inst->constant == 7 or inst->constant == 8. Some are 1 and 4, others very rare.
332
// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
333
for (int i = 0; i < 4; i++)
334
mips->f[dest + i] = ((constant >> i) & 1) ? mips->f[src2 + i] : mips->f[src1 + i];
335
break;
336
}
337
338
case IROp::Vec4Mov:
339
{
340
#if defined(_M_SSE)
341
_mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1]));
342
#elif PPSSPP_ARCH(ARM_NEON)
343
vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1]));
344
#else
345
memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float));
346
#endif
347
break;
348
}
349
350
case IROp::Vec4Add:
351
{
352
#if defined(_M_SSE)
353
_mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
354
#elif PPSSPP_ARCH(ARM_NEON)
355
vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
356
#else
357
for (int i = 0; i < 4; i++)
358
mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i];
359
#endif
360
break;
361
}
362
363
case IROp::Vec4Sub:
364
{
365
#if defined(_M_SSE)
366
_mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
367
#elif PPSSPP_ARCH(ARM_NEON)
368
vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
369
#else
370
for (int i = 0; i < 4; i++)
371
mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i];
372
#endif
373
break;
374
}
375
376
case IROp::Vec4Mul:
377
{
378
#if defined(_M_SSE)
379
_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
380
#elif PPSSPP_ARCH(ARM_NEON)
381
vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
382
#else
383
for (int i = 0; i < 4; i++)
384
mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
385
#endif
386
break;
387
}
388
389
case IROp::Vec4Div:
390
{
391
#if defined(_M_SSE)
392
_mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
393
#elif PPSSPP_ARCH(ARM64_NEON)
394
vst1q_f32(&mips->f[inst->dest], vdivq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
395
#else
396
for (int i = 0; i < 4; i++)
397
mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i];
398
#endif
399
break;
400
}
401
402
case IROp::Vec4Scale:
403
{
404
#if defined(_M_SSE)
405
_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2])));
406
#elif PPSSPP_ARCH(ARM_NEON)
407
vst1q_f32(&mips->f[inst->dest], vmulq_lane_f32(vld1q_f32(&mips->f[inst->src1]), vdup_n_f32(mips->f[inst->src2]), 0));
408
#else
409
const float factor = mips->f[inst->src2];
410
for (int i = 0; i < 4; i++)
411
mips->f[inst->dest + i] = mips->f[inst->src1 + i] * factor;
412
#endif
413
break;
414
}
415
416
case IROp::Vec4Neg:
417
{
418
#if defined(_M_SSE)
419
_mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits)));
420
#elif PPSSPP_ARCH(ARM_NEON)
421
vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1])));
422
#else
423
for (int i = 0; i < 4; i++)
424
mips->f[inst->dest + i] = -mips->f[inst->src1 + i];
425
#endif
426
break;
427
}
428
429
case IROp::Vec4Abs:
430
{
431
#if defined(_M_SSE)
432
_mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask)));
433
#elif PPSSPP_ARCH(ARM_NEON)
434
vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1])));
435
#else
436
for (int i = 0; i < 4; i++)
437
mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
438
#endif
439
break;
440
}
441
442
case IROp::Vec2Unpack16To31:
443
{
444
const int dest = inst->dest;
445
const int src1 = inst->src1;
446
mips->fi[dest] = (mips->fi[src1] << 16) >> 1;
447
mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000) >> 1;
448
break;
449
}
450
451
case IROp::Vec2Unpack16To32:
452
{
453
const int dest = inst->dest;
454
const int src1 = inst->src1;
455
mips->fi[dest] = (mips->fi[src1] << 16);
456
mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000);
457
break;
458
}
459
460
case IROp::Vec4Unpack8To32:
461
{
462
#if defined(_M_SSE)
463
__m128i src = _mm_cvtsi32_si128(mips->fi[inst->src1]);
464
src = _mm_unpacklo_epi8(src, _mm_setzero_si128());
465
src = _mm_unpacklo_epi16(src, _mm_setzero_si128());
466
_mm_store_si128((__m128i *)&mips->fi[inst->dest], _mm_slli_epi32(src, 24));
467
#elif PPSSPP_ARCH(ARM_NEON) && 0 // Untested
468
const uint8x8_t value = (uint8x8_t)vdup_n_u32(mips->fi[inst->src1]);
469
const uint16x8_t value16 = vmovl_u8(value);
470
const uint32x4_t value32 = vshll_n_u16(vget_low_u16(value16), 24);
471
vst1q_u32(&mips->fi[inst->dest], value32);
472
#else
473
mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
474
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
475
mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
476
mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
477
#endif
478
break;
479
}
480
481
case IROp::Vec2Pack32To16:
482
{
483
u32 val = mips->fi[inst->src1] >> 16;
484
mips->fi[inst->dest] = (mips->fi[inst->src1 + 1] & 0xFFFF0000) | val;
485
break;
486
}
487
488
case IROp::Vec2Pack31To16:
489
{
490
// Used in Tekken 6
491
492
u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
493
val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
494
mips->fi[inst->dest] = val;
495
break;
496
}
497
498
case IROp::Vec4Pack32To8:
499
{
500
// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
501
// pshufb or SSE4 instructions can be used instead.
502
u32 val = mips->fi[inst->src1] >> 24;
503
val |= (mips->fi[inst->src1 + 1] >> 16) & 0xFF00;
504
val |= (mips->fi[inst->src1 + 2] >> 8) & 0xFF0000;
505
val |= (mips->fi[inst->src1 + 3]) & 0xFF000000;
506
mips->fi[inst->dest] = val;
507
break;
508
}
509
510
case IROp::Vec4Pack31To8:
511
{
512
// Used in Tekken 6
513
514
// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
515
// pshufb or SSE4 instructions can be used instead.
516
#if PPSSPP_ARCH(ARM_NEON) && 0
517
// Untested
518
uint32x4_t value = vld1q_u32(&mips->fi[inst->src1]);
519
value = vshlq_n_u32(value, 1);
520
uint32x2_t halved = vshrn_n_u32(value, 8);
521
uint32x2_t halvedAgain = vshrn_n_u32(vcombine_u32(halved, vdup_n_u32(0)), 8);
522
mips->fi[inst->dest] = vget_lane_u32(halvedAgain, 0);
523
#else
524
u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
525
val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00;
526
val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000;
527
val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000;
528
mips->fi[inst->dest] = val;
529
#endif
530
break;
531
}
532
533
case IROp::Vec2ClampToZero:
534
{
535
for (int i = 0; i < 2; i++) {
536
u32 val = mips->fi[inst->src1 + i];
537
mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
538
}
539
break;
540
}
541
542
case IROp::Vec4ClampToZero:
543
{
544
#if defined(_M_SSE)
545
// Trickery: Expand the sign bit, and use andnot to zero negative values.
546
__m128i val = _mm_load_si128((const __m128i *)&mips->fi[inst->src1]);
547
__m128i mask = _mm_srai_epi32(val, 31);
548
val = _mm_andnot_si128(mask, val);
549
_mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
550
#else
551
const int src1 = inst->src1;
552
const int dest = inst->dest;
553
for (int i = 0; i < 4; i++) {
554
u32 val = mips->fi[src1 + i];
555
mips->fi[dest + i] = (int)val >= 0 ? val : 0;
556
}
557
#endif
558
break;
559
}
560
561
case IROp::Vec4DuplicateUpperBitsAndShift1: // For vuc2i, the weird one.
562
{
563
const int src1 = inst->src1;
564
const int dest = inst->dest;
565
for (int i = 0; i < 4; i++) {
566
u32 val = mips->fi[src1 + i];
567
val = val | (val >> 8);
568
val = val | (val >> 16);
569
val >>= 1;
570
mips->fi[dest + i] = val;
571
}
572
break;
573
}
574
575
case IROp::FCmpVfpuBit:
576
{
577
const int op = inst->dest & 0xF;
578
const int bit = inst->dest >> 4;
579
int result = 0;
580
switch (op) {
581
case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
582
case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
583
case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
584
case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
585
case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
586
case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
587
case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
588
case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
589
case VC_EN: result = my_isnan(mips->f[inst->src1]); break;
590
case VC_NN: result = !my_isnan(mips->f[inst->src1]); break;
591
case VC_EI: result = my_isinf(mips->f[inst->src1]); break;
592
case VC_NI: result = !my_isinf(mips->f[inst->src1]); break;
593
case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break;
594
case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break;
595
case VC_TR: result = 1; break;
596
case VC_FL: result = 0; break;
597
default:
598
result = 0;
599
}
600
if (result != 0) {
601
mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
602
} else {
603
mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
604
}
605
break;
606
}
607
608
case IROp::FCmpVfpuAggregate:
609
{
610
const u32 mask = inst->dest;
611
const u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
612
int anyBit = (cc & mask) ? 0x10 : 0x00;
613
int allBit = (cc & mask) == mask ? 0x20 : 0x00;
614
mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | anyBit | allBit;
615
break;
616
}
617
618
case IROp::FCmovVfpuCC:
619
if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == ((u32)inst->src2 >> 7)) {
620
mips->f[inst->dest] = mips->f[inst->src1];
621
}
622
break;
623
624
case IROp::Vec4Dot:
625
{
626
// Not quickly implementable on all platforms, unfortunately.
627
// Though, this is still pretty fast compared to one split into multiple IR instructions.
628
// This might be good though: https://gist.github.com/rikusalminen/3040241
629
float dot = mips->f[inst->src1] * mips->f[inst->src2];
630
for (int i = 1; i < 4; i++)
631
dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
632
mips->f[inst->dest] = dot;
633
break;
634
}
635
636
case IROp::FSin:
637
mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
638
break;
639
case IROp::FCos:
640
mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]);
641
break;
642
case IROp::FRSqrt:
643
mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]);
644
break;
645
case IROp::FRecip:
646
mips->f[inst->dest] = 1.0f / mips->f[inst->src1];
647
break;
648
case IROp::FAsin:
649
mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]);
650
break;
651
652
case IROp::ShlImm:
653
mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
654
break;
655
case IROp::ShrImm:
656
mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2;
657
break;
658
case IROp::SarImm:
659
mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2;
660
break;
661
case IROp::RorImm:
662
{
663
u32 x = mips->r[inst->src1];
664
int sa = inst->src2;
665
mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
666
}
667
break;
668
669
case IROp::Shl:
670
mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31);
671
break;
672
case IROp::Shr:
673
mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
674
break;
675
case IROp::Sar:
676
mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
677
break;
678
case IROp::Ror:
679
{
680
u32 x = mips->r[inst->src1];
681
int sa = mips->r[inst->src2] & 31;
682
mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
683
break;
684
}
685
686
case IROp::Clz:
687
{
688
mips->r[inst->dest] = clz32(mips->r[inst->src1]);
689
break;
690
}
691
692
case IROp::Slt:
693
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2];
694
break;
695
696
case IROp::SltU:
697
mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2];
698
break;
699
700
case IROp::SltConst:
701
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant;
702
break;
703
704
case IROp::SltUConst:
705
mips->r[inst->dest] = mips->r[inst->src1] < inst->constant;
706
break;
707
708
case IROp::MovZ:
709
if (mips->r[inst->src1] == 0)
710
mips->r[inst->dest] = mips->r[inst->src2];
711
break;
712
case IROp::MovNZ:
713
if (mips->r[inst->src1] != 0)
714
mips->r[inst->dest] = mips->r[inst->src2];
715
break;
716
717
case IROp::Max:
718
mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
719
break;
720
case IROp::Min:
721
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
722
break;
723
724
case IROp::MtLo:
725
mips->lo = mips->r[inst->src1];
726
break;
727
case IROp::MtHi:
728
mips->hi = mips->r[inst->src1];
729
break;
730
case IROp::MfLo:
731
mips->r[inst->dest] = mips->lo;
732
break;
733
case IROp::MfHi:
734
mips->r[inst->dest] = mips->hi;
735
break;
736
737
case IROp::Mult:
738
{
739
s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
740
memcpy(&mips->lo, &result, 8);
741
break;
742
}
743
case IROp::MultU:
744
{
745
u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
746
memcpy(&mips->lo, &result, 8);
747
break;
748
}
749
case IROp::Madd:
750
{
751
s64 result;
752
memcpy(&result, &mips->lo, 8);
753
result += (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
754
memcpy(&mips->lo, &result, 8);
755
break;
756
}
757
case IROp::MaddU:
758
{
759
s64 result;
760
memcpy(&result, &mips->lo, 8);
761
result += (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
762
memcpy(&mips->lo, &result, 8);
763
break;
764
}
765
case IROp::Msub:
766
{
767
s64 result;
768
memcpy(&result, &mips->lo, 8);
769
result -= (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
770
memcpy(&mips->lo, &result, 8);
771
break;
772
}
773
case IROp::MsubU:
774
{
775
s64 result;
776
memcpy(&result, &mips->lo, 8);
777
result -= (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
778
memcpy(&mips->lo, &result, 8);
779
break;
780
}
781
782
case IROp::Div:
783
{
784
s32 numerator = (s32)mips->r[inst->src1];
785
s32 denominator = (s32)mips->r[inst->src2];
786
if (numerator == (s32)0x80000000 && denominator == -1) {
787
mips->lo = 0x80000000;
788
mips->hi = -1;
789
} else if (denominator != 0) {
790
mips->lo = (u32)(numerator / denominator);
791
mips->hi = (u32)(numerator % denominator);
792
} else {
793
mips->lo = numerator < 0 ? 1 : -1;
794
mips->hi = numerator;
795
}
796
break;
797
}
798
case IROp::DivU:
799
{
800
u32 numerator = mips->r[inst->src1];
801
u32 denominator = mips->r[inst->src2];
802
if (denominator != 0) {
803
mips->lo = numerator / denominator;
804
mips->hi = numerator % denominator;
805
} else {
806
mips->lo = numerator <= 0xFFFF ? 0xFFFF : -1;
807
mips->hi = numerator;
808
}
809
break;
810
}
811
812
case IROp::BSwap16:
813
{
814
u32 x = mips->r[inst->src1];
815
// Don't think we can beat this with intrinsics.
816
mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8);
817
break;
818
}
819
case IROp::BSwap32:
820
{
821
mips->r[inst->dest] = swap32(mips->r[inst->src1]);
822
break;
823
}
824
825
case IROp::FAdd:
826
mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2];
827
break;
828
case IROp::FSub:
829
mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2];
830
break;
831
case IROp::FMul:
832
#if 1
833
{
834
float a = mips->f[inst->src1];
835
float b = mips->f[inst->src2];
836
if ((b == 0.0f && my_isinf(a)) || (a == 0.0f && my_isinf(b))) {
837
mips->fi[inst->dest] = 0x7fc00000;
838
} else {
839
mips->f[inst->dest] = a * b;
840
}
841
}
842
break;
843
#else
844
// Not sure if faster since it needs to load the operands twice? But the code is simpler.
845
{
846
// Takes care of negative zero by masking away the top bit, which also makes the inf check shorter.
847
u32 a = mips->fi[inst->src1] & 0x7FFFFFFF;
848
u32 b = mips->fi[inst->src2] & 0x7FFFFFFF;
849
if ((a == 0 && b == 0x7F800000) || (b == 0 && a == 0x7F800000)) {
850
mips->fi[inst->dest] = 0x7fc00000;
851
} else {
852
mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
853
}
854
break;
855
}
856
#endif
857
case IROp::FDiv:
858
mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
859
break;
860
case IROp::FMin:
861
if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) {
862
// See interpreter for this logic: this is for vmin, we're comparing mantissa+exp.
863
if (mips->fs[inst->src1] < 0 && mips->fs[inst->src2] < 0) {
864
mips->fs[inst->dest] = std::max(mips->fs[inst->src1], mips->fs[inst->src2]);
865
} else {
866
mips->fs[inst->dest] = std::min(mips->fs[inst->src1], mips->fs[inst->src2]);
867
}
868
} else {
869
mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]);
870
}
871
break;
872
case IROp::FMax:
873
if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) {
874
// See interpreter for this logic: this is for vmax, we're comparing mantissa+exp.
875
if (mips->fs[inst->src1] < 0 && mips->fs[inst->src2] < 0) {
876
mips->fs[inst->dest] = std::min(mips->fs[inst->src1], mips->fs[inst->src2]);
877
} else {
878
mips->fs[inst->dest] = std::max(mips->fs[inst->src1], mips->fs[inst->src2]);
879
}
880
} else {
881
mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]);
882
}
883
break;
884
885
case IROp::FMov:
886
mips->f[inst->dest] = mips->f[inst->src1];
887
break;
888
case IROp::FAbs:
889
mips->f[inst->dest] = fabsf(mips->f[inst->src1]);
890
break;
891
case IROp::FSqrt:
892
mips->f[inst->dest] = sqrtf(mips->f[inst->src1]);
893
break;
894
case IROp::FNeg:
895
mips->f[inst->dest] = -mips->f[inst->src1];
896
break;
897
case IROp::FSat0_1:
898
// We have to do this carefully to handle NAN and -0.0f.
899
mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], 0.0f, 1.0f);
900
break;
901
case IROp::FSatMinus1_1:
902
mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f);
903
break;
904
905
case IROp::FSign:
906
{
907
// Bitwise trickery
908
u32 val;
909
memcpy(&val, &mips->f[inst->src1], sizeof(u32));
910
if (val == 0 || val == 0x80000000)
911
mips->f[inst->dest] = 0.0f;
912
else if ((val >> 31) == 0)
913
mips->f[inst->dest] = 1.0f;
914
else
915
mips->f[inst->dest] = -1.0f;
916
break;
917
}
918
919
case IROp::FpCondFromReg:
920
mips->fpcond = mips->r[inst->dest];
921
break;
922
case IROp::FpCondToReg:
923
mips->r[inst->dest] = mips->fpcond;
924
break;
925
case IROp::FpCtrlFromReg:
926
mips->fcr31 = mips->r[inst->src1] & 0x0181FFFF;
927
// Extract the new fpcond value.
928
// TODO: Is it really helping us to keep it separate?
929
mips->fpcond = (mips->fcr31 >> 23) & 1;
930
break;
931
case IROp::FpCtrlToReg:
932
// Update the fpcond bit first.
933
mips->fcr31 = (mips->fcr31 & ~(1 << 23)) | ((mips->fpcond & 1) << 23);
934
mips->r[inst->dest] = mips->fcr31;
935
break;
936
case IROp::VfpuCtrlToReg:
937
mips->r[inst->dest] = mips->vfpuCtrl[inst->src1];
938
break;
939
case IROp::FRound:
940
{
941
float value = mips->f[inst->src1];
942
if (my_isnanorinf(value)) {
943
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
944
break;
945
} else {
946
mips->fs[inst->dest] = (int)round_ieee_754(value);
947
}
948
break;
949
}
950
case IROp::FTrunc:
951
{
952
float value = mips->f[inst->src1];
953
if (my_isnanorinf(value)) {
954
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
955
break;
956
} else {
957
if (value >= 0.0f) {
958
mips->fs[inst->dest] = (int)floorf(value);
959
// Overflow, but it was positive.
960
if (mips->fs[inst->dest] == -2147483648LL) {
961
mips->fs[inst->dest] = 2147483647LL;
962
}
963
} else {
964
// Overflow happens to be the right value anyway.
965
mips->fs[inst->dest] = (int)ceilf(value);
966
}
967
break;
968
}
969
}
970
case IROp::FCeil:
971
{
972
float value = mips->f[inst->src1];
973
if (my_isnanorinf(value)) {
974
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
975
break;
976
} else {
977
mips->fs[inst->dest] = (int)ceilf(value);
978
}
979
break;
980
}
981
case IROp::FFloor:
982
{
983
float value = mips->f[inst->src1];
984
if (my_isnanorinf(value)) {
985
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
986
break;
987
} else {
988
mips->fs[inst->dest] = (int)floorf(value);
989
}
990
break;
991
}
992
case IROp::FCmp:
993
switch (inst->dest) {
994
case IRFpCompareMode::False:
995
mips->fpcond = 0;
996
break;
997
case IRFpCompareMode::EitherUnordered:
998
{
999
float a = mips->f[inst->src1];
1000
float b = mips->f[inst->src2];
1001
mips->fpcond = !(a > b || a < b || a == b);
1002
break;
1003
}
1004
case IRFpCompareMode::EqualOrdered:
1005
mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2];
1006
break;
1007
case IRFpCompareMode::EqualUnordered:
1008
mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2] || my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2]);
1009
break;
1010
case IRFpCompareMode::LessEqualOrdered:
1011
mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2];
1012
break;
1013
case IRFpCompareMode::LessEqualUnordered:
1014
mips->fpcond = !(mips->f[inst->src1] > mips->f[inst->src2]);
1015
break;
1016
case IRFpCompareMode::LessOrdered:
1017
mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2];
1018
break;
1019
case IRFpCompareMode::LessUnordered:
1020
mips->fpcond = !(mips->f[inst->src1] >= mips->f[inst->src2]);
1021
break;
1022
}
1023
break;
1024
1025
case IROp::FCvtSW:
1026
mips->f[inst->dest] = (float)mips->fs[inst->src1];
1027
break;
1028
case IROp::FCvtWS:
1029
{
1030
float src = mips->f[inst->src1];
1031
if (my_isnanorinf(src)) {
1032
mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL;
1033
break;
1034
}
1035
// TODO: Inline assembly to use here would be better.
1036
switch (IRRoundMode(mips->fcr31 & 3)) {
1037
case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(src); break;
1038
case IRRoundMode::CAST_1: mips->fs[inst->dest] = (int)src; break;
1039
case IRRoundMode::CEIL_2: mips->fs[inst->dest] = (int)ceilf(src); break;
1040
case IRRoundMode::FLOOR_3: mips->fs[inst->dest] = (int)floorf(src); break;
1041
}
1042
break; //cvt.w.s
1043
}
1044
case IROp::FCvtScaledSW:
1045
mips->f[inst->dest] = (float)mips->fs[inst->src1] * (1.0f / (1UL << (inst->src2 & 0x1F)));
1046
break;
1047
case IROp::FCvtScaledWS:
1048
{
1049
float src = mips->f[inst->src1];
1050
if (my_isnan(src)) {
1051
// TODO: True for negatives too?
1052
mips->fs[inst->dest] = 2147483647L;
1053
break;
1054
}
1055
1056
float mult = (float)(1UL << (inst->src2 & 0x1F));
1057
double sv = src * mult; // (float)0x7fffffff == (float)0x80000000
1058
// Cap/floor it to 0x7fffffff / 0x80000000
1059
if (sv > (double)0x7fffffff) {
1060
mips->fs[inst->dest] = 0x7fffffff;
1061
} else if (sv <= (double)(int)0x80000000) {
1062
mips->fs[inst->dest] = 0x80000000;
1063
} else {
1064
switch (IRRoundMode(inst->src2 >> 6)) {
1065
case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(sv); break;
1066
case IRRoundMode::CAST_1: mips->fs[inst->dest] = src >= 0 ? (int)floor(sv) : (int)ceil(sv); break;
1067
case IRRoundMode::CEIL_2: mips->fs[inst->dest] = (int)ceil(sv); break;
1068
case IRRoundMode::FLOOR_3: mips->fs[inst->dest] = (int)floor(sv); break;
1069
}
1070
}
1071
break;
1072
}
1073
1074
case IROp::FMovFromGPR:
1075
memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4);
1076
break;
1077
case IROp::OptFCvtSWFromGPR:
1078
mips->f[inst->dest] = (float)(int)mips->r[inst->src1];
1079
break;
1080
case IROp::FMovToGPR:
1081
memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
1082
break;
1083
case IROp::OptFMovToGPRShr8:
1084
{
1085
u32 temp;
1086
memcpy(&temp, &mips->f[inst->src1], 4);
1087
mips->r[inst->dest] = temp >> 8;
1088
break;
1089
}
1090
1091
case IROp::ExitToConst:
1092
return inst->constant;
1093
1094
case IROp::ExitToReg:
1095
return mips->r[inst->src1];
1096
1097
case IROp::ExitToConstIfEq:
1098
if (mips->r[inst->src1] == mips->r[inst->src2])
1099
return inst->constant;
1100
break;
1101
case IROp::ExitToConstIfNeq:
1102
if (mips->r[inst->src1] != mips->r[inst->src2])
1103
return inst->constant;
1104
break;
1105
case IROp::ExitToConstIfGtZ:
1106
if ((s32)mips->r[inst->src1] > 0)
1107
return inst->constant;
1108
break;
1109
case IROp::ExitToConstIfGeZ:
1110
if ((s32)mips->r[inst->src1] >= 0)
1111
return inst->constant;
1112
break;
1113
case IROp::ExitToConstIfLtZ:
1114
if ((s32)mips->r[inst->src1] < 0)
1115
return inst->constant;
1116
break;
1117
case IROp::ExitToConstIfLeZ:
1118
if ((s32)mips->r[inst->src1] <= 0)
1119
return inst->constant;
1120
break;
1121
1122
case IROp::Downcount:
1123
mips->downcount -= (int)inst->constant;
1124
break;
1125
1126
case IROp::SetPC:
1127
mips->pc = mips->r[inst->src1];
1128
break;
1129
1130
case IROp::SetPCConst:
1131
mips->pc = inst->constant;
1132
break;
1133
1134
case IROp::Syscall:
1135
// IROp::SetPC was (hopefully) executed before.
1136
{
1137
MIPSOpcode op(inst->constant);
1138
CallSyscall(op);
1139
if (coreState != CORE_RUNNING_CPU)
1140
CoreTiming::ForceCheck();
1141
break;
1142
}
1143
1144
case IROp::ExitToPC:
1145
return mips->pc;
1146
1147
case IROp::Interpret: // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging.
1148
{
1149
MIPSOpcode op(inst->constant);
1150
MIPSInterpret(op);
1151
break;
1152
}
1153
1154
case IROp::CallReplacement:
1155
{
1156
int funcIndex = inst->constant;
1157
const ReplacementTableEntry *f = GetReplacementFunc(funcIndex);
1158
int cycles = f->replaceFunc();
1159
mips->r[inst->dest] = cycles < 0 ? -1 : 0;
1160
mips->downcount -= cycles < 0 ? -cycles : cycles;
1161
break;
1162
}
1163
1164
case IROp::SetCtrlVFPU:
1165
mips->vfpuCtrl[inst->dest] = inst->constant;
1166
break;
1167
1168
case IROp::SetCtrlVFPUReg:
1169
mips->vfpuCtrl[inst->dest] = mips->r[inst->src1];
1170
break;
1171
1172
case IROp::SetCtrlVFPUFReg:
1173
memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4);
1174
break;
1175
1176
case IROp::ApplyRoundingMode:
1177
IRApplyRounding(mips);
1178
break;
1179
case IROp::RestoreRoundingMode:
1180
IRRestoreRounding();
1181
break;
1182
case IROp::UpdateRoundingMode:
1183
// TODO: Implement
1184
break;
1185
1186
case IROp::Break:
1187
Core_BreakException(mips->pc);
1188
return mips->pc + 4;
1189
1190
case IROp::Breakpoint:
1191
if (IRRunBreakpoint(inst->constant)) {
1192
CoreTiming::ForceCheck();
1193
return mips->pc;
1194
}
1195
break;
1196
1197
case IROp::MemoryCheck:
1198
if (IRRunMemCheck(mips->pc + inst->dest, mips->r[inst->src1] + inst->constant)) {
1199
CoreTiming::ForceCheck();
1200
return mips->pc;
1201
}
1202
break;
1203
1204
case IROp::ValidateAddress8:
1205
if (RunValidateAddress<1>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1206
CoreTiming::ForceCheck();
1207
return mips->pc;
1208
}
1209
break;
1210
case IROp::ValidateAddress16:
1211
if (RunValidateAddress<2>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1212
CoreTiming::ForceCheck();
1213
return mips->pc;
1214
}
1215
break;
1216
case IROp::ValidateAddress32:
1217
if (RunValidateAddress<4>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1218
CoreTiming::ForceCheck();
1219
return mips->pc;
1220
}
1221
break;
1222
case IROp::ValidateAddress128:
1223
if (RunValidateAddress<16>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1224
CoreTiming::ForceCheck();
1225
return mips->pc;
1226
}
1227
break;
1228
case IROp::LogIRBlock:
1229
if (mipsTracer.tracing_enabled) {
1230
mipsTracer.executed_blocks.push_back(inst->constant);
1231
}
1232
break;
1233
1234
case IROp::Nop: // TODO: This shouldn't crash, but for now we should not emit nops, so...
1235
case IROp::Bad:
1236
default:
1237
Crash();
1238
break;
1239
// Unimplemented IR op. Bad.
1240
}
1241
1242
#ifdef _DEBUG
1243
if (mips->r[0] != 0)
1244
Crash();
1245
#endif
1246
inst++;
1247
}
1248
1249
// We should not reach here anymore.
1250
return 0;
1251
}
1252
1253