CoCalc -- IRInterpreter.cpp

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/IR/IRInterpreter.cpp
³¹⁸⁷ views
1
#include <algorithm>
2
#include <cmath>
3

4
#include "ppsspp_config.h"
5
#include "Common/BitSet.h"
6
#include "Common/BitScan.h"
7
#include "Common/Common.h"
8
#include "Common/Data/Convert/SmallDataConvert.h"
9
#include "Common/Math/math_util.h"
10
#include "Common/Math/SIMDHeaders.h"
11
#include "Core/Core.h"
12
#include "Core/CoreTiming.h"
13
#include "Core/Debugger/Breakpoints.h"
14
#include "Core/HLE/HLE.h"
15
#include "Core/HLE/ReplaceTables.h"
16
#include "Core/MemMap.h"
17
#include "Core/MIPS/MIPS.h"
18
#include "Core/MIPS/MIPSTables.h"
19
#include "Core/MIPS/MIPSVFPUUtils.h"
20
#include "Core/MIPS/IR/IRInst.h"
21
#include "Core/MIPS/IR/IRInterpreter.h"
22
#include "Core/System.h"
23
#include "Core/MIPS/MIPSTracer.h"
24

25
#ifdef mips
26
// Why do MIPS compilers define something so generic?  Try to keep defined, at least...
27
#undef mips
28
#define mips mips
29
#endif
30

31
alignas(16) static const float vec4InitValues[8][4] = {
32
	{ 0.0f, 0.0f, 0.0f, 0.0f },
33
	{ 1.0f, 1.0f, 1.0f, 1.0f },
34
	{ -1.0f, -1.0f, -1.0f, -1.0f },
35
	{ 1.0f, 0.0f, 0.0f, 0.0f },
36
	{ 0.0f, 1.0f, 0.0f, 0.0f },
37
	{ 0.0f, 0.0f, 1.0f, 0.0f },
38
	{ 0.0f, 0.0f, 0.0f, 1.0f },
39
};
40

41
alignas(16) static const uint32_t signBits[4] = {
42
	0x80000000, 0x80000000, 0x80000000, 0x80000000,
43
};
44

45
alignas(16) static const uint32_t noSignMask[4] = {
46
	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
47
};
48

49
alignas(16) static const uint32_t lowBytesMask[4] = {
50
	0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF,
51
};
52

53
u32 IRRunBreakpoint(u32 pc) {
54
	// Should we skip this breakpoint?
55
	uint32_t skipFirst = g_breakpoints.CheckSkipFirst();
56
	if (skipFirst == pc || skipFirst == currentMIPS->pc)
57
		return 0;
58

59
	// Did we already hit one?
60
	if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME)
61
		return 1;
62

63
	g_breakpoints.ExecBreakPoint(pc);
64
	return coreState != CORE_RUNNING_CPU ? 1 : 0;
65
}
66

67
u32 IRRunMemCheck(u32 pc, u32 addr) {
68
	// Should we skip this breakpoint?
69
	uint32_t skipFirst = g_breakpoints.CheckSkipFirst();
70
	if (skipFirst == pc || skipFirst == currentMIPS->pc)
71
		return 0;
72

73
	// Did we already hit one?
74
	if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME)
75
		return 1;
76

77
	g_breakpoints.ExecOpMemCheck(addr, pc);
78
	return coreState != CORE_RUNNING_CPU ? 1 : 0;
79
}
80

81
void IRApplyRounding(MIPSState *mips) {
82
	u32 fcr1Bits = mips->fcr31 & 0x01000003;
83
	// If these are 0, we just leave things as they are.
84
	if (fcr1Bits) {
85
		int rmode = fcr1Bits & 3;
86
		bool ftz = (fcr1Bits & 0x01000000) != 0;
87
#if PPSSPP_ARCH(SSE2)
88
		u32 csr = _mm_getcsr() & ~0x6000;
89
		// Translate the rounding mode bits to X86, the same way as in Asm.cpp.
90
		if (rmode & 1) {
91
			rmode ^= 2;
92
		}
93
		csr |= rmode << 13;
94

95
		if (ftz) {
96
			// Flush to zero
97
			csr |= 0x8000;
98
		}
99
		_mm_setcsr(csr);
100
#elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
101
		// On ARM64 we need to use inline assembly for a portable solution.
102
		// Unfortunately we don't have this possibility on Windows with MSVC, so ifdeffed out above.
103
		// Note that in the JIT, for fcvts, we use specific conversions. We could use the FCVTS variants
104
		// directly through inline assembly.
105
		u64 fpcr;  // not really 64-bit, just to match the register size.
106
		asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
107

108
		// Translate MIPS to ARM rounding mode
109
		static const u8 lookup[4] = {0, 3, 1, 2};
110

111
		fpcr &= ~(3 << 22);    // Clear bits [23:22]
112
		fpcr |= (lookup[rmode] << 22);
113

114
		if (ftz) {
115
			fpcr |= 1 << 24;
116
		}
117
		// Write back the modified FPCR
118
		asm volatile ("msr fpcr, %0" : : "r" (fpcr));
119
#endif
120
	}
121
}
122

123
void IRRestoreRounding() {
124
#if PPSSPP_ARCH(SSE2)
125
	// TODO: We should avoid this if we didn't apply rounding in the first place.
126
	// In the meantime, clear out FTZ and rounding mode bits.
127
	u32 csr = _mm_getcsr();
128
	csr &= ~(7 << 13);
129
	_mm_setcsr(csr);
130
#elif PPSSPP_ARCH(ARM64) && !PPSSPP_PLATFORM(WINDOWS)
131
	u64 fpcr;  // not really 64-bit, just to match the regsiter size.
132
	asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
133
	fpcr &= ~(7 << 22);    // Clear bits [23:22] for rounding, 24 for FTZ
134
	// Write back the modified FPCR
135
	asm volatile ("msr fpcr, %0" : : "r" (fpcr));
136
#endif
137
}
138

139
// We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64.
140
u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
141
	while (true) {
142
		switch (inst->op) {
143
		case IROp::SetConst:
144
			mips->r[inst->dest] = inst->constant;
145
			break;
146
		case IROp::SetConstF:
147
			memcpy(&mips->f[inst->dest], &inst->constant, 4);
148
			break;
149
		case IROp::Add:
150
			mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2];
151
			break;
152
		case IROp::Sub:
153
			mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2];
154
			break;
155
		case IROp::And:
156
			mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2];
157
			break;
158
		case IROp::Or:
159
			mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2];
160
			break;
161
		case IROp::Xor:
162
			mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2];
163
			break;
164
		case IROp::Mov:
165
			mips->r[inst->dest] = mips->r[inst->src1];
166
			break;
167
		case IROp::AddConst:
168
			mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
169
			break;
170
		case IROp::OptAddConst:  // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
171
			mips->r[inst->dest] += inst->constant;
172
			break;
173
		case IROp::SubConst:
174
			mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
175
			break;
176
		case IROp::AndConst:
177
			mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
178
			break;
179
		case IROp::OptAndConst:  // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
180
			mips->r[inst->dest] &= inst->constant;
181
			break;
182
		case IROp::OrConst:
183
			mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
184
			break;
185
		case IROp::OptOrConst:
186
			mips->r[inst->dest] |= inst->constant;
187
			break;
188
		case IROp::XorConst:
189
			mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
190
			break;
191
		case IROp::Neg:
192
			mips->r[inst->dest] = (u32)(-(s32)mips->r[inst->src1]);
193
			break;
194
		case IROp::Not:
195
			mips->r[inst->dest] = ~mips->r[inst->src1];
196
			break;
197
		case IROp::Ext8to32:
198
			mips->r[inst->dest] = SignExtend8ToU32(mips->r[inst->src1]);
199
			break;
200
		case IROp::Ext16to32:
201
			mips->r[inst->dest] = SignExtend16ToU32(mips->r[inst->src1]);
202
			break;
203
		case IROp::ReverseBits:
204
			mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
205
			break;
206

207
		case IROp::Load8:
208
			mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
209
			break;
210
		case IROp::Load8Ext:
211
			mips->r[inst->dest] = SignExtend8ToU32(Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant));
212
			break;
213
		case IROp::Load16:
214
			mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
215
			break;
216
		case IROp::Load16Ext:
217
			mips->r[inst->dest] = SignExtend16ToU32(Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant));
218
			break;
219
		case IROp::Load32:
220
			mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
221
			break;
222
		case IROp::Load32Left:
223
		{
224
			u32 addr = mips->r[inst->src1] + inst->constant;
225
			u32 shift = (addr & 3) * 8;
226
			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
227
			u32 destMask = 0x00ffffff >> shift;
228
			mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem << (24 - shift));
229
			break;
230
		}
231
		case IROp::Load32Right:
232
		{
233
			u32 addr = mips->r[inst->src1] + inst->constant;
234
			u32 shift = (addr & 3) * 8;
235
			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
236
			u32 destMask = 0xffffff00 << (24 - shift);
237
			mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem >> shift);
238
			break;
239
		}
240
		case IROp::Load32Linked:
241
			if (inst->dest != MIPS_REG_ZERO)
242
				mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
243
			mips->llBit = 1;
244
			break;
245
		case IROp::LoadFloat:
246
			mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant);
247
			break;
248

249
		case IROp::Store8:
250
			Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
251
			break;
252
		case IROp::Store16:
253
			Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
254
			break;
255
		case IROp::Store32:
256
			Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
257
			break;
258
		case IROp::Store32Left:
259
		{
260
			u32 addr = mips->r[inst->src1] + inst->constant;
261
			u32 shift = (addr & 3) * 8;
262
			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
263
			u32 memMask = 0xffffff00 << shift;
264
			u32 result = (mips->r[inst->src3] >> (24 - shift)) | (mem & memMask);
265
			Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
266
			break;
267
		}
268
		case IROp::Store32Right:
269
		{
270
			u32 addr = mips->r[inst->src1] + inst->constant;
271
			u32 shift = (addr & 3) * 8;
272
			u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
273
			u32 memMask = 0x00ffffff >> (24 - shift);
274
			u32 result = (mips->r[inst->src3] << shift) | (mem & memMask);
275
			Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
276
			break;
277
		}
278
		case IROp::Store32Conditional:
279
			if (mips->llBit) {
280
				Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
281
				if (inst->dest != MIPS_REG_ZERO) {
282
					mips->r[inst->dest] = 1;
283
				}
284
			} else if (inst->dest != MIPS_REG_ZERO) {
285
				mips->r[inst->dest] = 0;
286
			}
287
			break;
288
		case IROp::StoreFloat:
289
			Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant);
290
			break;
291

292
		case IROp::LoadVec4:
293
		{
294
			u32 base = mips->r[inst->src1] + inst->constant;
295
			// This compiles to a nice SSE load/store on x86, and hopefully similar on ARM.
296
			memcpy(&mips->f[inst->dest], Memory::GetPointerUnchecked(base), 4 * 4);
297
			break;
298
		}
299
		case IROp::StoreVec4:
300
		{
301
			u32 base = mips->r[inst->src1] + inst->constant;
302
			memcpy((float *)Memory::GetPointerUnchecked(base), &mips->f[inst->dest], 4 * 4);
303
			break;
304
		}
305

306
		case IROp::Vec4Init:
307
		{
308
			memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float));
309
			break;
310
		}
311

312
		case IROp::Vec4Shuffle:
313
		{
314
			// Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
315
			// or a big switch - there are only 256 shuffles possible (4^4)
316
			float temp[4];
317
			for (int i = 0; i < 4; i++)
318
				temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
319
			const int dest = inst->dest;
320
			for (int i = 0; i < 4; i++)
321
				mips->f[dest + i] = temp[i];
322
			break;
323
		}
324

325
		case IROp::Vec4Blend:
326
		{
327
			const int dest = inst->dest;
328
			const int src1 = inst->src1;
329
			const int src2 = inst->src2;
330
			const int constant = inst->constant;
331
			// 90% of calls to this is inst->constant == 7 or inst->constant == 8. Some are 1 and 4, others very rare.
332
			// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
333
			for (int i = 0; i < 4; i++)
334
				mips->f[dest + i] = ((constant >> i) & 1) ? mips->f[src2 + i] : mips->f[src1 + i];
335
			break;
336
		}
337

338
		case IROp::Vec4Mov:
339
		{
340
#if defined(_M_SSE)
341
			_mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1]));
342
#elif PPSSPP_ARCH(ARM_NEON)
343
			vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1]));
344
#else
345
			memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float));
346
#endif
347
			break;
348
		}
349

350
		case IROp::Vec4Add:
351
		{
352
#if defined(_M_SSE)
353
			_mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
354
#elif PPSSPP_ARCH(ARM_NEON)
355
			vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
356
#else
357
			for (int i = 0; i < 4; i++)
358
				mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i];
359
#endif
360
			break;
361
		}
362

363
		case IROp::Vec4Sub:
364
		{
365
#if defined(_M_SSE)
366
			_mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
367
#elif PPSSPP_ARCH(ARM_NEON)
368
			vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
369
#else
370
			for (int i = 0; i < 4; i++)
371
				mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i];
372
#endif
373
			break;
374
		}
375

376
		case IROp::Vec4Mul:
377
		{
378
#if defined(_M_SSE)
379
			_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
380
#elif PPSSPP_ARCH(ARM_NEON)
381
			vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
382
#else
383
			for (int i = 0; i < 4; i++)
384
				mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
385
#endif
386
			break;
387
		}
388

389
		case IROp::Vec4Div:
390
		{
391
#if defined(_M_SSE)
392
			_mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
393
#elif PPSSPP_ARCH(ARM64_NEON)
394
			vst1q_f32(&mips->f[inst->dest], vdivq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
395
#else
396
			for (int i = 0; i < 4; i++)
397
				mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i];
398
#endif
399
			break;
400
		}
401

402
		case IROp::Vec4Scale:
403
		{
404
#if defined(_M_SSE)
405
			_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2])));
406
#elif PPSSPP_ARCH(ARM_NEON)
407
			vst1q_f32(&mips->f[inst->dest], vmulq_lane_f32(vld1q_f32(&mips->f[inst->src1]), vdup_n_f32(mips->f[inst->src2]), 0));
408
#else
409
			const float factor = mips->f[inst->src2];
410
			for (int i = 0; i < 4; i++)
411
				mips->f[inst->dest + i] = mips->f[inst->src1 + i] * factor;
412
#endif
413
			break;
414
		}
415

416
		case IROp::Vec4Neg:
417
		{
418
#if defined(_M_SSE)
419
			_mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits)));
420
#elif PPSSPP_ARCH(ARM_NEON)
421
			vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1])));
422
#else
423
			for (int i = 0; i < 4; i++)
424
				mips->f[inst->dest + i] = -mips->f[inst->src1 + i];
425
#endif
426
			break;
427
		}
428

429
		case IROp::Vec4Abs:
430
		{
431
#if defined(_M_SSE)
432
			_mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask)));
433
#elif PPSSPP_ARCH(ARM_NEON)
434
			vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1])));
435
#else
436
			for (int i = 0; i < 4; i++)
437
				mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
438
#endif
439
			break;
440
		}
441

442
		case IROp::Vec2Unpack16To31:
443
		{
444
			const int dest = inst->dest;
445
			const int src1 = inst->src1;
446
			mips->fi[dest] = (mips->fi[src1] << 16) >> 1;
447
			mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000) >> 1;
448
			break;
449
		}
450

451
		case IROp::Vec2Unpack16To32:
452
		{
453
			const int dest = inst->dest;
454
			const int src1 = inst->src1;
455
			mips->fi[dest] = (mips->fi[src1] << 16);
456
			mips->fi[dest + 1] = (mips->fi[src1] & 0xFFFF0000);
457
			break;
458
		}
459

460
		case IROp::Vec4Unpack8To32:
461
		{
462
#if defined(_M_SSE)
463
			__m128i src = _mm_cvtsi32_si128(mips->fi[inst->src1]);
464
			src = _mm_unpacklo_epi8(src, _mm_setzero_si128());
465
			src = _mm_unpacklo_epi16(src, _mm_setzero_si128());
466
			_mm_store_si128((__m128i *)&mips->fi[inst->dest], _mm_slli_epi32(src, 24));
467
#elif PPSSPP_ARCH(ARM_NEON) && 0 // Untested
468
			const uint8x8_t value = (uint8x8_t)vdup_n_u32(mips->fi[inst->src1]);
469
			const uint16x8_t value16 = vmovl_u8(value);
470
			const uint32x4_t value32 = vshll_n_u16(vget_low_u16(value16), 24);
471
			vst1q_u32(&mips->fi[inst->dest], value32);
472
#else
473
			mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
474
			mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
475
			mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
476
			mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
477
#endif
478
			break;
479
		}
480

481
		case IROp::Vec2Pack32To16:
482
		{
483
			u32 val = mips->fi[inst->src1] >> 16;
484
			mips->fi[inst->dest] = (mips->fi[inst->src1 + 1] & 0xFFFF0000) | val;
485
			break;
486
		}
487

488
		case IROp::Vec2Pack31To16:
489
		{
490
			// Used in Tekken 6
491

492
			u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
493
			val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
494
			mips->fi[inst->dest] = val;
495
			break;
496
		}
497

498
		case IROp::Vec4Pack32To8:
499
		{
500
			// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
501
			// pshufb or SSE4 instructions can be used instead.
502
			u32 val = mips->fi[inst->src1] >> 24;
503
			val |= (mips->fi[inst->src1 + 1] >> 16) & 0xFF00;
504
			val |= (mips->fi[inst->src1 + 2] >> 8) & 0xFF0000;
505
			val |= (mips->fi[inst->src1 + 3]) & 0xFF000000;
506
			mips->fi[inst->dest] = val;
507
			break;
508
		}
509

510
		case IROp::Vec4Pack31To8:
511
		{
512
			// Used in Tekken 6
513

514
			// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
515
			// pshufb or SSE4 instructions can be used instead.
516
#if PPSSPP_ARCH(ARM_NEON) && 0
517
			// Untested
518
			uint32x4_t value = vld1q_u32(&mips->fi[inst->src1]);
519
			value = vshlq_n_u32(value, 1);
520
			uint32x2_t halved = vshrn_n_u32(value, 8);
521
			uint32x2_t halvedAgain = vshrn_n_u32(vcombine_u32(halved, vdup_n_u32(0)), 8);
522
			mips->fi[inst->dest] = vget_lane_u32(halvedAgain, 0);
523
#else
524
			u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
525
			val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00;
526
			val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000;
527
			val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000;
528
			mips->fi[inst->dest] = val;
529
#endif
530
			break;
531
		}
532

533
		case IROp::Vec2ClampToZero:
534
		{
535
			for (int i = 0; i < 2; i++) {
536
				u32 val = mips->fi[inst->src1 + i];
537
				mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0;
538
			}
539
			break;
540
		}
541

542
		case IROp::Vec4ClampToZero:
543
		{
544
#if defined(_M_SSE)
545
			// Trickery: Expand the sign bit, and use andnot to zero negative values.
546
			__m128i val = _mm_load_si128((const __m128i *)&mips->fi[inst->src1]);
547
			__m128i mask = _mm_srai_epi32(val, 31);
548
			val = _mm_andnot_si128(mask, val);
549
			_mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
550
#else
551
			const int src1 = inst->src1;
552
			const int dest = inst->dest;
553
			for (int i = 0; i < 4; i++) {
554
				u32 val = mips->fi[src1 + i];
555
				mips->fi[dest + i] = (int)val >= 0 ? val : 0;
556
			}
557
#endif
558
			break;
559
		}
560

561
		case IROp::Vec4DuplicateUpperBitsAndShift1:  // For vuc2i, the weird one.
562
		{
563
			const int src1 = inst->src1;
564
			const int dest = inst->dest;
565
			for (int i = 0; i < 4; i++) {
566
				u32 val = mips->fi[src1 + i];
567
				val = val | (val >> 8);
568
				val = val | (val >> 16);
569
				val >>= 1;
570
				mips->fi[dest + i] = val;
571
			}
572
			break;
573
		}
574

575
		case IROp::FCmpVfpuBit:
576
		{
577
			const int op = inst->dest & 0xF;
578
			const int bit = inst->dest >> 4;
579
			int result = 0;
580
			switch (op) {
581
			case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
582
			case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
583
			case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
584
			case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
585
			case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
586
			case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
587
			case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
588
			case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
589
			case VC_EN: result = my_isnan(mips->f[inst->src1]); break;
590
			case VC_NN: result = !my_isnan(mips->f[inst->src1]); break;
591
			case VC_EI: result = my_isinf(mips->f[inst->src1]); break;
592
			case VC_NI: result = !my_isinf(mips->f[inst->src1]); break;
593
			case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break;
594
			case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break;
595
			case VC_TR: result = 1; break;
596
			case VC_FL: result = 0; break;
597
			default:
598
				result = 0;
599
			}
600
			if (result != 0) {
601
				mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
602
			} else {
603
				mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
604
			}
605
			break;
606
		}
607

608
		case IROp::FCmpVfpuAggregate:
609
		{
610
			const u32 mask = inst->dest;
611
			const u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
612
			int anyBit = (cc & mask) ? 0x10 : 0x00;
613
			int allBit = (cc & mask) == mask ? 0x20 : 0x00;
614
			mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | anyBit | allBit;
615
			break;
616
		}
617

618
		case IROp::FCmovVfpuCC:
619
			if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == ((u32)inst->src2 >> 7)) {
620
				mips->f[inst->dest] = mips->f[inst->src1];
621
			}
622
			break;
623

624
		case IROp::Vec4Dot:
625
		{
626
			// Not quickly implementable on all platforms, unfortunately.
627
			// Though, this is still pretty fast compared to one split into multiple IR instructions.
628
			// This might be good though: https://gist.github.com/rikusalminen/3040241
629
			float dot = mips->f[inst->src1] * mips->f[inst->src2];
630
			for (int i = 1; i < 4; i++)
631
				dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
632
			mips->f[inst->dest] = dot;
633
			break;
634
		}
635

636
		case IROp::FSin:
637
			mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
638
			break;
639
		case IROp::FCos:
640
			mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]);
641
			break;
642
		case IROp::FRSqrt:
643
			mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]);
644
			break;
645
		case IROp::FRecip:
646
			mips->f[inst->dest] = 1.0f / mips->f[inst->src1];
647
			break;
648
		case IROp::FAsin:
649
			mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]);
650
			break;
651

652
		case IROp::ShlImm:
653
			mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
654
			break;
655
		case IROp::ShrImm:
656
			mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2;
657
			break;
658
		case IROp::SarImm:
659
			mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2;
660
			break;
661
		case IROp::RorImm:
662
		{
663
			u32 x = mips->r[inst->src1];
664
			int sa = inst->src2;
665
			mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
666
		}
667
		break;
668

669
		case IROp::Shl:
670
			mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31);
671
			break;
672
		case IROp::Shr:
673
			mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
674
			break;
675
		case IROp::Sar:
676
			mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
677
			break;
678
		case IROp::Ror:
679
		{
680
			u32 x = mips->r[inst->src1];
681
			int sa = mips->r[inst->src2] & 31;
682
			mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
683
			break;
684
		}
685

686
		case IROp::Clz:
687
		{
688
			mips->r[inst->dest] = clz32(mips->r[inst->src1]);
689
			break;
690
		}
691

692
		case IROp::Slt:
693
			mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2];
694
			break;
695

696
		case IROp::SltU:
697
			mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2];
698
			break;
699

700
		case IROp::SltConst:
701
			mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant;
702
			break;
703

704
		case IROp::SltUConst:
705
			mips->r[inst->dest] = mips->r[inst->src1] < inst->constant;
706
			break;
707

708
		case IROp::MovZ:
709
			if (mips->r[inst->src1] == 0)
710
				mips->r[inst->dest] = mips->r[inst->src2];
711
			break;
712
		case IROp::MovNZ:
713
			if (mips->r[inst->src1] != 0)
714
				mips->r[inst->dest] = mips->r[inst->src2];
715
			break;
716

717
		case IROp::Max:
718
			mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
719
			break;
720
		case IROp::Min:
721
			mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
722
			break;
723

724
		case IROp::MtLo:
725
			mips->lo = mips->r[inst->src1];
726
			break;
727
		case IROp::MtHi:
728
			mips->hi = mips->r[inst->src1];
729
			break;
730
		case IROp::MfLo:
731
			mips->r[inst->dest] = mips->lo;
732
			break;
733
		case IROp::MfHi:
734
			mips->r[inst->dest] = mips->hi;
735
			break;
736

737
		case IROp::Mult:
738
		{
739
			s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
740
			memcpy(&mips->lo, &result, 8);
741
			break;
742
		}
743
		case IROp::MultU:
744
		{
745
			u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
746
			memcpy(&mips->lo, &result, 8);
747
			break;
748
		}
749
		case IROp::Madd:
750
		{
751
			s64 result;
752
			memcpy(&result, &mips->lo, 8);
753
			result += (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
754
			memcpy(&mips->lo, &result, 8);
755
			break;
756
		}
757
		case IROp::MaddU:
758
		{
759
			s64 result;
760
			memcpy(&result, &mips->lo, 8);
761
			result += (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
762
			memcpy(&mips->lo, &result, 8);
763
			break;
764
		}
765
		case IROp::Msub:
766
		{
767
			s64 result;
768
			memcpy(&result, &mips->lo, 8);
769
			result -= (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
770
			memcpy(&mips->lo, &result, 8);
771
			break;
772
		}
773
		case IROp::MsubU:
774
		{
775
			s64 result;
776
			memcpy(&result, &mips->lo, 8);
777
			result -= (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
778
			memcpy(&mips->lo, &result, 8);
779
			break;
780
		}
781

782
		case IROp::Div:
783
		{
784
			s32 numerator = (s32)mips->r[inst->src1];
785
			s32 denominator = (s32)mips->r[inst->src2];
786
			if (numerator == (s32)0x80000000 && denominator == -1) {
787
				mips->lo = 0x80000000;
788
				mips->hi = -1;
789
			} else if (denominator != 0) {
790
				mips->lo = (u32)(numerator / denominator);
791
				mips->hi = (u32)(numerator % denominator);
792
			} else {
793
				mips->lo = numerator < 0 ? 1 : -1;
794
				mips->hi = numerator;
795
			}
796
			break;
797
		}
798
		case IROp::DivU:
799
		{
800
			u32 numerator = mips->r[inst->src1];
801
			u32 denominator = mips->r[inst->src2];
802
			if (denominator != 0) {
803
				mips->lo = numerator / denominator;
804
				mips->hi = numerator % denominator;
805
			} else {
806
				mips->lo = numerator <= 0xFFFF ? 0xFFFF : -1;
807
				mips->hi = numerator;
808
			}
809
			break;
810
		}
811

812
		case IROp::BSwap16:
813
		{
814
			u32 x = mips->r[inst->src1];
815
			// Don't think we can beat this with intrinsics.
816
			mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8);
817
			break;
818
		}
819
		case IROp::BSwap32:
820
		{
821
			mips->r[inst->dest] = swap32(mips->r[inst->src1]);
822
			break;
823
		}
824

825
		case IROp::FAdd:
826
			mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2];
827
			break;
828
		case IROp::FSub:
829
			mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2];
830
			break;
831
		case IROp::FMul:
832
#if 1
833
		{
834
			float a = mips->f[inst->src1];
835
			float b = mips->f[inst->src2];
836
			if ((b == 0.0f && my_isinf(a)) || (a == 0.0f && my_isinf(b))) {
837
				mips->fi[inst->dest] = 0x7fc00000;
838
			} else {
839
				mips->f[inst->dest] = a * b;
840
			}
841
		}
842
			break;
843
#else
844
			// Not sure if faster since it needs to load the operands twice? But the code is simpler.
845
			{
846
				// Takes care of negative zero by masking away the top bit, which also makes the inf check shorter.
847
				u32 a = mips->fi[inst->src1] & 0x7FFFFFFF;
848
				u32 b = mips->fi[inst->src2] & 0x7FFFFFFF;
849
				if ((a == 0 && b == 0x7F800000) || (b == 0 && a == 0x7F800000)) {
850
					mips->fi[inst->dest] = 0x7fc00000;
851
				} else {
852
					mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
853
				}
854
				break;
855
			}
856
#endif
857
		case IROp::FDiv:
858
			mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
859
			break;
860
		case IROp::FMin:
861
			if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) {
862
				// See interpreter for this logic: this is for vmin, we're comparing mantissa+exp.
863
				if (mips->fs[inst->src1] < 0 && mips->fs[inst->src2] < 0) {
864
					mips->fs[inst->dest] = std::max(mips->fs[inst->src1], mips->fs[inst->src2]);
865
				} else {
866
					mips->fs[inst->dest] = std::min(mips->fs[inst->src1], mips->fs[inst->src2]);
867
				}
868
			} else {
869
				mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]);
870
			}
871
			break;
872
		case IROp::FMax:
873
			if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) {
874
				// See interpreter for this logic: this is for vmax, we're comparing mantissa+exp.
875
				if (mips->fs[inst->src1] < 0 && mips->fs[inst->src2] < 0) {
876
					mips->fs[inst->dest] = std::min(mips->fs[inst->src1], mips->fs[inst->src2]);
877
				} else {
878
					mips->fs[inst->dest] = std::max(mips->fs[inst->src1], mips->fs[inst->src2]);
879
				}
880
			} else {
881
				mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]);
882
			}
883
			break;
884

885
		case IROp::FMov:
886
			mips->f[inst->dest] = mips->f[inst->src1];
887
			break;
888
		case IROp::FAbs:
889
			mips->f[inst->dest] = fabsf(mips->f[inst->src1]);
890
			break;
891
		case IROp::FSqrt:
892
			mips->f[inst->dest] = sqrtf(mips->f[inst->src1]);
893
			break;
894
		case IROp::FNeg:
895
			mips->f[inst->dest] = -mips->f[inst->src1];
896
			break;
897
		case IROp::FSat0_1:
898
			// We have to do this carefully to handle NAN and -0.0f.
899
			mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], 0.0f, 1.0f);
900
			break;
901
		case IROp::FSatMinus1_1:
902
			mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f);
903
			break;
904

905
		case IROp::FSign:
906
		{
907
			// Bitwise trickery
908
			u32 val;
909
			memcpy(&val, &mips->f[inst->src1], sizeof(u32));
910
			if (val == 0 || val == 0x80000000)
911
				mips->f[inst->dest] = 0.0f;
912
			else if ((val >> 31) == 0)
913
				mips->f[inst->dest] = 1.0f;
914
			else
915
				mips->f[inst->dest] = -1.0f;
916
			break;
917
		}
918

919
		case IROp::FpCondFromReg:
920
			mips->fpcond = mips->r[inst->dest];
921
			break;
922
		case IROp::FpCondToReg:
923
			mips->r[inst->dest] = mips->fpcond;
924
			break;
925
		case IROp::FpCtrlFromReg:
926
			mips->fcr31 = mips->r[inst->src1] & 0x0181FFFF;
927
			// Extract the new fpcond value.
928
			// TODO: Is it really helping us to keep it separate?
929
			mips->fpcond = (mips->fcr31 >> 23) & 1;
930
			break;
931
		case IROp::FpCtrlToReg:
932
			// Update the fpcond bit first.
933
			mips->fcr31 = (mips->fcr31 & ~(1 << 23)) | ((mips->fpcond & 1) << 23);
934
			mips->r[inst->dest] = mips->fcr31;
935
			break;
936
		case IROp::VfpuCtrlToReg:
937
			mips->r[inst->dest] = mips->vfpuCtrl[inst->src1];
938
			break;
939
		case IROp::FRound:
940
		{
941
			float value = mips->f[inst->src1];
942
			if (my_isnanorinf(value)) {
943
				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
944
				break;
945
			} else {
946
				mips->fs[inst->dest] = (int)round_ieee_754(value);
947
			}
948
			break;
949
		}
950
		case IROp::FTrunc:
951
		{
952
			float value = mips->f[inst->src1];
953
			if (my_isnanorinf(value)) {
954
				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
955
				break;
956
			} else {
957
				if (value >= 0.0f) {
958
					mips->fs[inst->dest] = (int)floorf(value);
959
					// Overflow, but it was positive.
960
					if (mips->fs[inst->dest] == -2147483648LL) {
961
						mips->fs[inst->dest] = 2147483647LL;
962
					}
963
				} else {
964
					// Overflow happens to be the right value anyway.
965
					mips->fs[inst->dest] = (int)ceilf(value);
966
				}
967
				break;
968
			}
969
		}
970
		case IROp::FCeil:
971
		{
972
			float value = mips->f[inst->src1];
973
			if (my_isnanorinf(value)) {
974
				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
975
				break;
976
			} else {
977
				mips->fs[inst->dest] = (int)ceilf(value);
978
			}
979
			break;
980
		}
981
		case IROp::FFloor:
982
		{
983
			float value = mips->f[inst->src1];
984
			if (my_isnanorinf(value)) {
985
				mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
986
				break;
987
			} else {
988
				mips->fs[inst->dest] = (int)floorf(value);
989
			}
990
			break;
991
		}
992
		case IROp::FCmp:
993
			switch (inst->dest) {
994
			case IRFpCompareMode::False:
995
				mips->fpcond = 0;
996
				break;
997
			case IRFpCompareMode::EitherUnordered:
998
			{
999
				float a = mips->f[inst->src1];
1000
				float b = mips->f[inst->src2];
1001
				mips->fpcond = !(a > b || a < b || a == b);
1002
				break;
1003
			}
1004
			case IRFpCompareMode::EqualOrdered:
1005
				mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2];
1006
				break;
1007
			case IRFpCompareMode::EqualUnordered:
1008
				mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2] || my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2]);
1009
				break;
1010
			case IRFpCompareMode::LessEqualOrdered:
1011
				mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2];
1012
				break;
1013
			case IRFpCompareMode::LessEqualUnordered:
1014
				mips->fpcond = !(mips->f[inst->src1] > mips->f[inst->src2]);
1015
				break;
1016
			case IRFpCompareMode::LessOrdered:
1017
				mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2];
1018
				break;
1019
			case IRFpCompareMode::LessUnordered:
1020
				mips->fpcond = !(mips->f[inst->src1] >= mips->f[inst->src2]);
1021
				break;
1022
			}
1023
			break;
1024

1025
		case IROp::FCvtSW:
1026
			mips->f[inst->dest] = (float)mips->fs[inst->src1];
1027
			break;
1028
		case IROp::FCvtWS:
1029
		{
1030
			float src = mips->f[inst->src1];
1031
			if (my_isnanorinf(src)) {
1032
				mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL;
1033
				break;
1034
			}
1035
			// TODO: Inline assembly to use here would be better.
1036
			switch (IRRoundMode(mips->fcr31 & 3)) {
1037
			case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(src); break;
1038
			case IRRoundMode::CAST_1: mips->fs[inst->dest] = (int)src; break;
1039
			case IRRoundMode::CEIL_2: mips->fs[inst->dest] = (int)ceilf(src); break;
1040
			case IRRoundMode::FLOOR_3: mips->fs[inst->dest] = (int)floorf(src); break;
1041
			}
1042
			break; //cvt.w.s
1043
		}
1044
		case IROp::FCvtScaledSW:
1045
			mips->f[inst->dest] = (float)mips->fs[inst->src1] * (1.0f / (1UL << (inst->src2 & 0x1F)));
1046
			break;
1047
		case IROp::FCvtScaledWS:
1048
		{
1049
			float src = mips->f[inst->src1];
1050
			if (my_isnan(src)) {
1051
				// TODO: True for negatives too?
1052
				mips->fs[inst->dest] = 2147483647L;
1053
				break;
1054
			}
1055

1056
			float mult = (float)(1UL << (inst->src2 & 0x1F));
1057
			double sv = src * mult; // (float)0x7fffffff == (float)0x80000000
1058
			// Cap/floor it to 0x7fffffff / 0x80000000
1059
			if (sv > (double)0x7fffffff) {
1060
				mips->fs[inst->dest] = 0x7fffffff;
1061
			} else if (sv <= (double)(int)0x80000000) {
1062
				mips->fs[inst->dest] = 0x80000000;
1063
			} else {
1064
				switch (IRRoundMode(inst->src2 >> 6)) {
1065
				case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(sv); break;
1066
				case IRRoundMode::CAST_1: mips->fs[inst->dest] = src >= 0 ? (int)floor(sv) : (int)ceil(sv); break;
1067
				case IRRoundMode::CEIL_2: mips->fs[inst->dest] = (int)ceil(sv); break;
1068
				case IRRoundMode::FLOOR_3: mips->fs[inst->dest] = (int)floor(sv); break;
1069
				}
1070
			}
1071
			break;
1072
		}
1073

1074
		case IROp::FMovFromGPR:
1075
			memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4);
1076
			break;
1077
		case IROp::OptFCvtSWFromGPR:
1078
			mips->f[inst->dest] = (float)(int)mips->r[inst->src1];
1079
			break;
1080
		case IROp::FMovToGPR:
1081
			memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
1082
			break;
1083
		case IROp::OptFMovToGPRShr8:
1084
		{
1085
			u32 temp;
1086
			memcpy(&temp, &mips->f[inst->src1], 4);
1087
			mips->r[inst->dest] = temp >> 8;
1088
			break;
1089
		}
1090

1091
		case IROp::ExitToConst:
1092
			return inst->constant;
1093

1094
		case IROp::ExitToReg:
1095
			return mips->r[inst->src1];
1096

1097
		case IROp::ExitToConstIfEq:
1098
			if (mips->r[inst->src1] == mips->r[inst->src2])
1099
				return inst->constant;
1100
			break;
1101
		case IROp::ExitToConstIfNeq:
1102
			if (mips->r[inst->src1] != mips->r[inst->src2])
1103
				return inst->constant;
1104
			break;
1105
		case IROp::ExitToConstIfGtZ:
1106
			if ((s32)mips->r[inst->src1] > 0)
1107
				return inst->constant;
1108
			break;
1109
		case IROp::ExitToConstIfGeZ:
1110
			if ((s32)mips->r[inst->src1] >= 0)
1111
				return inst->constant;
1112
			break;
1113
		case IROp::ExitToConstIfLtZ:
1114
			if ((s32)mips->r[inst->src1] < 0)
1115
				return inst->constant;
1116
			break;
1117
		case IROp::ExitToConstIfLeZ:
1118
			if ((s32)mips->r[inst->src1] <= 0)
1119
				return inst->constant;
1120
			break;
1121

1122
		case IROp::Downcount:
1123
			mips->downcount -= (int)inst->constant;
1124
			break;
1125

1126
		case IROp::SetPC:
1127
			mips->pc = mips->r[inst->src1];
1128
			break;
1129

1130
		case IROp::SetPCConst:
1131
			mips->pc = inst->constant;
1132
			break;
1133

1134
		case IROp::Syscall:
1135
			// IROp::SetPC was (hopefully) executed before.
1136
		{
1137
			MIPSOpcode op(inst->constant);
1138
			CallSyscall(op);
1139
			if (coreState != CORE_RUNNING_CPU)
1140
				CoreTiming::ForceCheck();
1141
			break;
1142
		}
1143

1144
		case IROp::ExitToPC:
1145
			return mips->pc;
1146

1147
		case IROp::Interpret:  // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging.
1148
		{
1149
			MIPSOpcode op(inst->constant);
1150
			MIPSInterpret(op);
1151
			break;
1152
		}
1153

1154
		case IROp::CallReplacement:
1155
		{
1156
			int funcIndex = inst->constant;
1157
			const ReplacementTableEntry *f = GetReplacementFunc(funcIndex);
1158
			int cycles = f->replaceFunc();
1159
			mips->r[inst->dest] = cycles < 0 ? -1 : 0;
1160
			mips->downcount -= cycles < 0 ? -cycles : cycles;
1161
			break;
1162
		}
1163

1164
		case IROp::SetCtrlVFPU:
1165
			mips->vfpuCtrl[inst->dest] = inst->constant;
1166
			break;
1167

1168
		case IROp::SetCtrlVFPUReg:
1169
			mips->vfpuCtrl[inst->dest] = mips->r[inst->src1];
1170
			break;
1171

1172
		case IROp::SetCtrlVFPUFReg:
1173
			memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4);
1174
			break;
1175

1176
		case IROp::ApplyRoundingMode:
1177
			IRApplyRounding(mips);
1178
			break;
1179
		case IROp::RestoreRoundingMode:
1180
			IRRestoreRounding();
1181
			break;
1182
		case IROp::UpdateRoundingMode:
1183
			// TODO: Implement
1184
			break;
1185

1186
		case IROp::Break:
1187
			Core_BreakException(mips->pc);
1188
			return mips->pc + 4;
1189

1190
		case IROp::Breakpoint:
1191
			if (IRRunBreakpoint(inst->constant)) {
1192
				CoreTiming::ForceCheck();
1193
				return mips->pc;
1194
			}
1195
			break;
1196

1197
		case IROp::MemoryCheck:
1198
			if (IRRunMemCheck(mips->pc + inst->dest, mips->r[inst->src1] + inst->constant)) {
1199
				CoreTiming::ForceCheck();
1200
				return mips->pc;
1201
			}
1202
			break;
1203

1204
		case IROp::ValidateAddress8:
1205
			if (RunValidateAddress<1>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1206
				CoreTiming::ForceCheck();
1207
				return mips->pc;
1208
			}
1209
			break;
1210
		case IROp::ValidateAddress16:
1211
			if (RunValidateAddress<2>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1212
				CoreTiming::ForceCheck();
1213
				return mips->pc;
1214
			}
1215
			break;
1216
		case IROp::ValidateAddress32:
1217
			if (RunValidateAddress<4>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1218
				CoreTiming::ForceCheck();
1219
				return mips->pc;
1220
			}
1221
			break;
1222
		case IROp::ValidateAddress128:
1223
			if (RunValidateAddress<16>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1224
				CoreTiming::ForceCheck();
1225
				return mips->pc;
1226
			}
1227
			break;
1228
		case IROp::LogIRBlock:
1229
			if (mipsTracer.tracing_enabled) {
1230
				mipsTracer.executed_blocks.push_back(inst->constant);
1231
			}
1232
			break;
1233

1234
		case IROp::Nop: // TODO: This shouldn't crash, but for now we should not emit nops, so...
1235
		case IROp::Bad:
1236
		default:
1237
			Crash();
1238
			break;
1239
			// Unimplemented IR op. Bad.
1240
		}
1241

1242
#ifdef _DEBUG
1243
		if (mips->r[0] != 0)
1244
			Crash();
1245
#endif
1246
		inst++;
1247
	}
1248

1249
	// We should not reach here anymore.
1250
	return 0;
1251
}
1252

1253
Product

Resources

Company