Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/IR/IRJit.cpp
3188 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include <set>
20
#include <algorithm>
21
22
#include "ext/xxhash.h"
23
#include "Common/Profiler/Profiler.h"
24
25
#include "Common/Log.h"
26
#include "Common/Serialize/Serializer.h"
27
#include "Common/StringUtils.h"
28
29
#include "Core/Config.h"
30
#include "Core/Core.h"
31
#include "Core/CoreTiming.h"
32
#include "Core/HLE/sceKernelMemory.h"
33
#include "Core/MemMap.h"
34
#include "Core/MIPS/MIPS.h"
35
#include "Core/MIPS/MIPSCodeUtils.h"
36
#include "Core/MIPS/MIPSInt.h"
37
#include "Core/MIPS/MIPSTables.h"
38
#include "Core/MIPS/IR/IRRegCache.h"
39
#include "Core/MIPS/IR/IRInterpreter.h"
40
#include "Core/MIPS/IR/IRJit.h"
41
#include "Core/MIPS/IR/IRNativeCommon.h"
42
#include "Core/MIPS/JitCommon/JitCommon.h"
43
#include "Core/Reporting.h"
44
#include "Common/TimeUtil.h"
45
#include "Core/MIPS/MIPSTracer.h"
46
47
48
namespace MIPSComp {
49
50
IRJit::IRJit(MIPSState *mipsState, bool actualJit) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState), blocks_(actualJit) {
51
// u32 size = 128 * 1024;
52
InitIR();
53
54
compileToNative_ = actualJit;
55
56
// If this IRJit instance will be used to drive a "JIT using IR", don't optimize for interpretation.
57
jo.optimizeForInterpreter = !actualJit;
58
59
IROptions opts{};
60
opts.disableFlags = g_Config.uJitDisableFlags;
61
#if PPSSPP_ARCH(RISCV64)
62
// Assume RISC-V always has very slow unaligned memory accesses.
63
opts.unalignedLoadStore = false;
64
opts.unalignedLoadStoreVec4 = true;
65
opts.preferVec4 = cpu_info.RiscV_V;
66
#elif PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64)
67
opts.unalignedLoadStore = (opts.disableFlags & (uint32_t)JitDisable::LSU_UNALIGNED) == 0;
68
opts.unalignedLoadStoreVec4 = true;
69
opts.preferVec4 = true;
70
#else
71
opts.unalignedLoadStore = (opts.disableFlags & (uint32_t)JitDisable::LSU_UNALIGNED) == 0;
72
// TODO: Could allow on x86 pretty easily...
73
opts.unalignedLoadStoreVec4 = false;
74
opts.preferVec4 = true;
75
#endif
76
opts.optimizeForInterpreter = jo.optimizeForInterpreter;
77
frontend_.SetOptions(opts);
78
}
79
80
IRJit::~IRJit() {
81
}
82
83
void IRJit::DoState(PointerWrap &p) {
84
frontend_.DoState(p);
85
}
86
87
void IRJit::UpdateFCR31() {
88
}
89
90
void IRJit::ClearCache() {
91
INFO_LOG(Log::JIT, "IRJit: Clearing the block cache!");
92
blocks_.Clear();
93
}
94
95
void IRJit::InvalidateCacheAt(u32 em_address, int length) {
96
std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
97
if (numbers.empty()) {
98
return;
99
}
100
101
DEBUG_LOG(Log::JIT, "Invalidating IR block cache at %08x (%d bytes): %d blocks", em_address, length, (int)numbers.size());
102
103
for (int block_num : numbers) {
104
auto block = blocks_.GetBlock(block_num);
105
// TODO: We are invalidating a lot of blocks that are already invalid (yu gi oh).
106
// INFO_LOG(Log::JIT, "Block at %08x invalidated: valid: %d", block->GetOriginalStart(), block->IsValid());
107
// If we're a native JIT (IR->JIT, not just IR interpreter), we write native offsets into the blocks.
108
int cookie = compileToNative_ ? block->GetNativeOffset() : block->GetIRArenaOffset();
109
blocks_.RemoveBlockFromPageLookup(block_num);
110
block->Destroy(cookie);
111
}
112
}
113
114
void IRJit::Compile(u32 em_address) {
115
_dbg_assert_(compilerEnabled_);
116
117
PROFILE_THIS_SCOPE("jitc");
118
119
std::vector<IRInst> instructions;
120
u32 mipsBytes;
121
if (!CompileBlock(em_address, instructions, mipsBytes)) {
122
// Ran out of block numbers - need to reset.
123
ERROR_LOG(Log::JIT, "Ran out of block numbers, clearing cache");
124
ClearCache();
125
CompileBlock(em_address, instructions, mipsBytes);
126
}
127
128
if (frontend_.CheckRounding(em_address)) {
129
// Our assumptions are all wrong so it's clean-slate time.
130
ClearCache();
131
CompileBlock(em_address, instructions, mipsBytes);
132
}
133
}
134
135
// WARNING! This can be called from IRInterpret / the JIT, through the function preload stuff!
136
bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes) {
137
_dbg_assert_(compilerEnabled_);
138
139
frontend_.DoJit(em_address, instructions, mipsBytes);
140
_dbg_assert_(!instructions.empty());
141
142
int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions);
143
if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
144
WARN_LOG(Log::JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size());
145
// Out of block numbers. Caller will handle.
146
return false;
147
}
148
149
IRBlock *b = blocks_.GetBlock(block_num);
150
if (mipsTracer.tracing_enabled) {
151
// Hash, then only update page stats, don't link yet.
152
// TODO: Should we always hash? Then we can reuse blocks.
153
b->UpdateHash();
154
}
155
156
if (!CompileNativeBlock(&blocks_, block_num))
157
return false;
158
159
if (mipsTracer.tracing_enabled) {
160
mipsTracer.prepare_block(b, blocks_);
161
}
162
163
// Updates stats, also patches the first MIPS instruction into an emuhack if 'preload == false'
164
blocks_.FinalizeBlock(block_num);
165
FinalizeNativeBlock(&blocks_, block_num);
166
return true;
167
}
168
169
void IRJit::RunLoopUntil(u64 globalticks) {
170
PROFILE_THIS_SCOPE("jit");
171
172
// ApplyRoundingMode(true);
173
// IR Dispatcher
174
175
while (true) {
176
// RestoreRoundingMode(true);
177
CoreTiming::Advance();
178
// ApplyRoundingMode(true);
179
if (coreState != 0) {
180
break;
181
}
182
183
MIPSState *mips = mips_;
184
#ifdef _DEBUG
185
compilerEnabled_ = false;
186
#endif
187
while (mips->downcount >= 0) {
188
u32 inst = Memory::ReadUnchecked_U32(mips->pc);
189
u32 opcode = inst & 0xFF000000;
190
if (opcode == MIPS_EMUHACK_OPCODE) {
191
u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
192
const IRInst *instPtr = blocks_.GetArenaPtr() + offset;
193
// First op is always, except when using breakpoints, downcount, to save one dispatch inside IRInterpret.
194
// This branch is very cpu-branch-predictor-friendly so this still beats the dispatch.
195
if (instPtr->op == IROp::Downcount) {
196
mips->downcount -= instPtr->constant;
197
instPtr++;
198
}
199
#ifdef IR_PROFILING
200
IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromIRArenaOffset(offset));
201
Instant start = Instant::Now();
202
mips->pc = IRInterpret(mips, instPtr);
203
int64_t elapsedNanos = start.ElapsedNanos();
204
block->profileStats_.executions += 1;
205
block->profileStats_.totalNanos += elapsedNanos;
206
#else
207
mips->pc = IRInterpret(mips, instPtr);
208
#endif
209
// Note: this will "jump to zero" on a badly constructed block missing exits.
210
if (!Memory::IsValid4AlignedAddress(mips->pc)) {
211
int blockNum = blocks_.GetBlockNumFromIRArenaOffset(offset);
212
IRBlock *block = blocks_.GetBlockUnchecked(blockNum);
213
Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP);
214
break;
215
}
216
} else {
217
// RestoreRoundingMode(true);
218
#ifdef _DEBUG
219
compilerEnabled_ = true;
220
#endif
221
Compile(mips->pc);
222
#ifdef _DEBUG
223
compilerEnabled_ = false;
224
#endif
225
// ApplyRoundingMode(true);
226
}
227
}
228
#ifdef _DEBUG
229
compilerEnabled_ = true;
230
#endif
231
}
232
233
// RestoreRoundingMode(true);
234
}
235
236
bool IRJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
237
// Used in native disassembly viewer.
238
return false;
239
}
240
241
void IRJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
242
Crash();
243
}
244
245
void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
246
Crash();
247
}
248
249
void IRBlockCache::Clear() {
250
for (int i = 0; i < (int)blocks_.size(); ++i) {
251
int cookie = compileToNative_ ? blocks_[i].GetNativeOffset() : blocks_[i].GetIRArenaOffset();
252
blocks_[i].Destroy(cookie);
253
}
254
blocks_.clear();
255
byPage_.clear();
256
arena_.clear();
257
arena_.shrink_to_fit();
258
}
259
260
IRBlockCache::IRBlockCache(bool compileToNative) : compileToNative_(compileToNative) {}
261
262
int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector<IRInst> &insts) {
263
// We have 24 bits to represent offsets with.
264
const u32 MAX_ARENA_SIZE = 0x1000000 - 1;
265
int offset = (int)arena_.size();
266
if (offset >= MAX_ARENA_SIZE) {
267
WARN_LOG(Log::JIT, "Filled JIT arena, restarting");
268
return -1;
269
}
270
// TODO: Use memcpy.
271
for (int i = 0; i < insts.size(); i++) {
272
arena_.push_back(insts[i]);
273
}
274
int newBlockIndex = (int)blocks_.size();
275
blocks_.push_back(IRBlock(emAddr, origSize, offset, (u32)insts.size()));
276
return newBlockIndex;
277
}
278
279
int IRBlockCache::GetBlockNumFromIRArenaOffset(int offset) const {
280
// Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search.
281
int low = 0;
282
int high = (int)blocks_.size() - 1;
283
int found = -1;
284
while (low <= high) {
285
int mid = low + (high - low) / 2;
286
const int blockOffset = blocks_[mid].GetIRArenaOffset();
287
if (blockOffset == offset) {
288
found = mid;
289
break;
290
}
291
if (blockOffset < offset) {
292
low = mid + 1;
293
} else {
294
high = mid - 1;
295
}
296
}
297
298
#ifndef _DEBUG
299
// Then, in debug builds, cross check the result.
300
return found;
301
#else
302
// TODO: Optimize if we need to call this often.
303
for (int i = 0; i < (int)blocks_.size(); i++) {
304
if (blocks_[i].GetIRArenaOffset() == offset) {
305
_dbg_assert_(i == found);
306
return i;
307
}
308
}
309
#endif
310
_dbg_assert_(found == -1);
311
return -1;
312
}
313
314
std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 lengthInBytes) {
315
u32 startPage = AddressToPage(address);
316
u32 endPage = AddressToPage(address + lengthInBytes);
317
318
std::vector<int> found;
319
for (u32 page = startPage; page <= endPage; ++page) {
320
const auto iter = byPage_.find(page);
321
if (iter == byPage_.end())
322
continue;
323
324
const std::vector<int> &blocksInPage = iter->second;
325
for (int i : blocksInPage) {
326
if (blocks_[i].OverlapsRange(address, lengthInBytes)) {
327
// We now try to remove these during invalidation.
328
found.push_back(i);
329
}
330
}
331
}
332
333
return found;
334
}
335
336
void IRBlockCache::FinalizeBlock(int blockIndex) {
337
// TODO: What's different about preload blocks?
338
IRBlock &block = blocks_[blockIndex];
339
int cookie = compileToNative_ ? block.GetNativeOffset() : block.GetIRArenaOffset();
340
block.Finalize(cookie);
341
342
u32 startAddr, size;
343
block.GetRange(&startAddr, &size);
344
345
u32 startPage = AddressToPage(startAddr);
346
u32 endPage = AddressToPage(startAddr + size);
347
348
for (u32 page = startPage; page <= endPage; ++page) {
349
byPage_[page].push_back(blockIndex);
350
}
351
}
352
353
// Call after Destroy-ing it.
354
void IRBlockCache::RemoveBlockFromPageLookup(int blockIndex) {
355
// We need to remove the block from the byPage lookup.
356
IRBlock &block = blocks_[blockIndex];
357
358
u32 startAddr, size;
359
block.GetRange(&startAddr, &size);
360
361
u32 startPage = AddressToPage(startAddr);
362
u32 endPage = AddressToPage(startAddr + size);
363
364
for (u32 page = startPage; page <= endPage; ++page) {
365
auto iter = std::find(byPage_[page].begin(), byPage_[page].end(), blockIndex);
366
if (iter != byPage_[page].end()) {
367
byPage_[page].erase(iter);
368
} else if (block.IsValid()) {
369
// If it was previously invalidated, we don't care, hence the above check.
370
WARN_LOG(Log::JIT, "RemoveBlock: Block at %08x was not found where expected in byPage table.", startAddr);
371
}
372
}
373
374
// Additionally, we'd like to zap the block in the IR arena.
375
// However, this breaks if calling sceKernelIcacheClearAll(), since as soon as we return, we'll be executing garbage.
376
/*
377
IRInst bad{ IROp::Bad };
378
for (int off = block.GetIRArenaOffset(); off < (int)(block.GetIRArenaOffset() + block.GetNumIRInstructions()); off++) {
379
arena_[off] = bad;
380
}
381
*/
382
}
383
384
u32 IRBlockCache::AddressToPage(u32 addr) const {
385
// Use relatively small pages since basic blocks are typically small.
386
return (addr & 0x3FFFFFFF) >> 10;
387
}
388
389
int IRBlockCache::FindPreloadBlock(u32 em_address) {
390
u32 page = AddressToPage(em_address);
391
auto iter = byPage_.find(page);
392
if (iter == byPage_.end())
393
return -1;
394
395
const std::vector<int> &blocksInPage = iter->second;
396
for (int i : blocksInPage) {
397
if (blocks_[i].GetOriginalStart() == em_address) {
398
if (blocks_[i].HashMatches()) {
399
return i;
400
}
401
}
402
}
403
404
return -1;
405
}
406
407
int IRBlockCache::FindByCookie(int cookie) {
408
if (blocks_.empty())
409
return -1;
410
411
// TODO: Maybe a flag to determine native offset mode?
412
if (!compileToNative_) {
413
return GetBlockNumFromIRArenaOffset(cookie);
414
}
415
416
// TODO: This could also use a binary search.
417
for (int i = 0; i < GetNumBlocks(); ++i) {
418
int offset = blocks_[i].GetNativeOffset();
419
if (offset == cookie)
420
return i;
421
}
422
return -1;
423
}
424
425
std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
426
std::vector<u32> result;
427
result.resize(blocks_.size());
428
429
for (int number = 0; number < (int)blocks_.size(); ++number) {
430
IRBlock &b = blocks_[number];
431
int cookie = compileToNative_ ? b.GetNativeOffset() : b.GetIRArenaOffset();
432
if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
433
result[number] = number;
434
} else {
435
result[number] = 0;
436
}
437
}
438
439
return result;
440
}
441
442
void IRBlockCache::RestoreSavedEmuHackOps(const std::vector<u32> &saved) {
443
if ((int)blocks_.size() != (int)saved.size()) {
444
ERROR_LOG(Log::JIT, "RestoreSavedEmuHackOps: Wrong saved block size.");
445
return;
446
}
447
448
for (int number = 0; number < (int)blocks_.size(); ++number) {
449
IRBlock &b = blocks_[number];
450
// Only if we restored it, write it back.
451
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
452
int cookie = compileToNative_ ? b.GetNativeOffset() : b.GetIRArenaOffset();
453
b.Finalize(cookie);
454
}
455
}
456
}
457
458
JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const {
459
const IRBlock &ir = blocks_[blockNum];
460
JitBlockDebugInfo debugInfo{};
461
uint32_t start, size;
462
ir.GetRange(&start, &size);
463
debugInfo.originalAddress = start; // TODO
464
465
debugInfo.origDisasm.reserve(((start + size) - start) / 4);
466
for (u32 addr = start; addr < start + size; addr += 4) {
467
char temp[256];
468
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, sizeof(temp), true);
469
std::string mipsDis = temp;
470
debugInfo.origDisasm.push_back(mipsDis);
471
}
472
473
debugInfo.irDisasm.reserve(ir.GetNumIRInstructions());
474
const IRInst *instructions = GetBlockInstructionPtr(ir);
475
for (int i = 0; i < ir.GetNumIRInstructions(); i++) {
476
IRInst inst = instructions[i];
477
char buffer[256];
478
DisassembleIR(buffer, sizeof(buffer), inst);
479
debugInfo.irDisasm.push_back(buffer);
480
}
481
return debugInfo;
482
}
483
484
void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const {
485
double totalBloat = 0.0;
486
double maxBloat = 0.0;
487
double minBloat = 1000000000.0;
488
for (const auto &b : blocks_) {
489
double codeSize = (double)b.GetNumIRInstructions() * 4; // We count bloat in instructions, not bytes. sizeof(IRInst);
490
if (codeSize == 0)
491
continue;
492
u32 origAddr, mipsBytes;
493
b.GetRange(&origAddr, &mipsBytes);
494
double origSize = (double)mipsBytes;
495
double bloat = codeSize / origSize;
496
if (bloat < minBloat) {
497
minBloat = bloat;
498
bcStats.minBloatBlock = origAddr;
499
}
500
if (bloat > maxBloat) {
501
maxBloat = bloat;
502
bcStats.maxBloatBlock = origAddr;
503
}
504
totalBloat += bloat;
505
}
506
bcStats.numBlocks = (int)blocks_.size();
507
bcStats.minBloat = minBloat;
508
bcStats.maxBloat = maxBloat;
509
bcStats.avgBloat = totalBloat / (double)blocks_.size();
510
}
511
512
int IRBlockCache::GetBlockNumberFromStartAddress(u32 em_address, bool realBlocksOnly) const {
513
u32 page = AddressToPage(em_address);
514
515
const auto iter = byPage_.find(page);
516
if (iter == byPage_.end())
517
return -1;
518
519
const std::vector<int> &blocksInPage = iter->second;
520
int best = -1;
521
for (int i : blocksInPage) {
522
if (blocks_[i].GetOriginalStart() == em_address) {
523
best = i;
524
if (blocks_[i].IsValid()) {
525
return i;
526
}
527
}
528
}
529
return best;
530
}
531
532
bool IRBlock::HasOriginalFirstOp() const {
533
return Memory::ReadUnchecked_U32(origAddr_) == origFirstOpcode_.encoding;
534
}
535
536
bool IRBlock::RestoreOriginalFirstOp(int cookie) {
537
const u32 emuhack = MIPS_EMUHACK_OPCODE | cookie;
538
if (Memory::ReadUnchecked_U32(origAddr_) == emuhack) {
539
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
540
return true;
541
}
542
return false;
543
}
544
545
void IRBlock::Finalize(int cookie) {
546
// Check it wasn't invalidated, in case this is after preload.
547
// TODO: Allow reusing blocks when the code matches hash_ again, instead.
548
if (origAddr_) {
549
origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_);
550
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
551
Memory::Write_Opcode_JIT(origAddr_, opcode);
552
} else {
553
WARN_LOG(Log::JIT, "Finalizing invalid block (cookie: %d)", cookie);
554
}
555
}
556
557
void IRBlock::Destroy(int cookie) {
558
if (origAddr_) {
559
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
560
u32 memOp = Memory::ReadUnchecked_U32(origAddr_);
561
if (memOp == opcode.encoding) {
562
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
563
} else {
564
// NOTE: This is not an error. Just interesting to log.
565
DEBUG_LOG(Log::JIT, "IRBlock::Destroy: Note: Block at %08x was overwritten - checked for %08x, got %08x when restoring the MIPS op to %08x", origAddr_, opcode.encoding, memOp, origFirstOpcode_.encoding);
566
}
567
// TODO: Also wipe the block in the IR opcode arena.
568
// Let's mark this invalid so we don't try to clear it again.
569
origAddr_ = 0;
570
}
571
}
572
573
u64 IRBlock::CalculateHash() const {
574
if (origAddr_) {
575
// This is unfortunate. In case there are emuhacks, we have to make a copy.
576
// If we could hash while reading we could avoid this.
577
std::vector<u32> buffer;
578
buffer.resize(origSize_ / 4);
579
size_t pos = 0;
580
for (u32 off = 0; off < origSize_; off += 4) {
581
// Let's actually hash the replacement, if any.
582
MIPSOpcode instr = Memory::ReadUnchecked_Instruction(origAddr_ + off, false);
583
buffer[pos++] = instr.encoding;
584
}
585
return XXH3_64bits(&buffer[0], origSize_);
586
}
587
return 0;
588
}
589
590
bool IRBlock::OverlapsRange(u32 addr, u32 size) const {
591
addr &= 0x3FFFFFFF;
592
u32 origAddr = origAddr_ & 0x3FFFFFFF;
593
return addr + size > origAddr && addr < origAddr + origSize_;
594
}
595
596
MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) {
597
IRBlock *b = blocks_.GetBlock(blocks_.FindByCookie(op.encoding & 0xFFFFFF));
598
if (b) {
599
return b->GetOriginalFirstOp();
600
}
601
return op;
602
}
603
604
} // namespace MIPSComp
605
606