Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/FramebufferManagerCommon.cpp
3186 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <algorithm>
19
#include <sstream>
20
#include <cmath>
21
22
#include "Common/GPU/thin3d.h"
23
#include "Common/Data/Collections/TinySet.h"
24
#include "Common/Data/Convert/ColorConv.h"
25
#include "Common/LogReporting.h"
26
#include "Common/System/Display.h"
27
#include "Common/VR/PPSSPPVR.h"
28
#include "Common/CommonTypes.h"
29
#include "Common/StringUtils.h"
30
#include "Core/Config.h"
31
#include "Core/ConfigValues.h"
32
#include "Core/Core.h"
33
#include "Core/CoreParameter.h"
34
#include "Core/Debugger/MemBlockInfo.h"
35
#include "GPU/Common/DrawEngineCommon.h"
36
#include "GPU/Common/FramebufferManagerCommon.h"
37
#include "GPU/Common/PresentationCommon.h"
38
#include "GPU/Common/TextureCacheCommon.h"
39
#include "GPU/Common/ReinterpretFramebuffer.h"
40
#include "GPU/GPUCommon.h"
41
#include "GPU/GPUState.h"
42
43
static size_t FormatFramebufferName(const VirtualFramebuffer *vfb, char *tag, size_t len) {
44
return snprintf(tag, len, "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, vfb->bufferWidth, vfb->bufferHeight, GeBufferFormatToString(vfb->fb_format));
45
}
46
47
FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
48
: draw_(draw), draw2D_(draw_) {
49
presentation_ = new PresentationCommon(draw);
50
}
51
52
FramebufferManagerCommon::~FramebufferManagerCommon() {
53
DeviceLost();
54
55
DecimateFBOs();
56
for (auto vfb : vfbs_) {
57
DestroyFramebuf(vfb);
58
}
59
vfbs_.clear();
60
61
for (auto &tempFB : tempFBOs_) {
62
tempFB.second.fbo->Release();
63
}
64
tempFBOs_.clear();
65
66
// Do the same for ReadFramebuffersToMemory's VFBs
67
for (auto vfb : bvfbs_) {
68
DestroyFramebuf(vfb);
69
}
70
bvfbs_.clear();
71
72
delete presentation_;
73
delete[] convBuf_;
74
}
75
76
void FramebufferManagerCommon::Init(int msaaLevel) {
77
// We may need to override the render size if the shader is upscaling or SSAA.
78
NotifyDisplayResized();
79
NotifyRenderResized(msaaLevel);
80
}
81
82
// Returns true if we need to stop the render thread
83
bool FramebufferManagerCommon::UpdateRenderSize(int msaaLevel) {
84
const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight || msaaLevel_ != msaaLevel;
85
86
int effectiveBloomHack = g_Config.iBloomHack;
87
if (PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
88
effectiveBloomHack = 3;
89
} else if (PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOff) {
90
effectiveBloomHack = 0;
91
}
92
93
bool newBuffered = !g_Config.bSkipBufferEffects;
94
const bool newSettings = bloomHack_ != effectiveBloomHack || useBufferedRendering_ != newBuffered;
95
96
renderWidth_ = (float)PSP_CoreParameter().renderWidth;
97
renderHeight_ = (float)PSP_CoreParameter().renderHeight;
98
renderScaleFactor_ = (float)PSP_CoreParameter().renderScaleFactor;
99
msaaLevel_ = msaaLevel;
100
101
bloomHack_ = effectiveBloomHack;
102
useBufferedRendering_ = newBuffered;
103
104
presentation_->UpdateRenderSize(renderWidth_, renderHeight_);
105
106
// If just switching TO buffered rendering, no need to pause the threads. In fact this causes problems due to the open backbuffer renderpass.
107
if (!useBufferedRendering_ && newBuffered) {
108
return false;
109
}
110
return newRender || newSettings;
111
}
112
113
void FramebufferManagerCommon::CheckPostShaders() {
114
if (updatePostShaders_) {
115
presentation_->UpdatePostShader();
116
updatePostShaders_ = false;
117
}
118
}
119
120
void FramebufferManagerCommon::BeginFrame() {
121
DecimateFBOs();
122
presentation_->BeginFrame();
123
currentRenderVfb_ = nullptr;
124
}
125
126
bool FramebufferManagerCommon::PresentedThisFrame() const {
127
return presentation_->PresentedThisFrame();
128
}
129
130
void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
131
displayFramebufPtr_ = framebuf & 0x3FFFFFFF;
132
if (Memory::IsVRAMAddress(displayFramebufPtr_))
133
displayFramebufPtr_ = framebuf & 0x041FFFFF;
134
displayStride_ = stride;
135
displayFormat_ = format;
136
}
137
138
VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
139
addr &= 0x3FFFFFFF;
140
if (Memory::IsVRAMAddress(addr))
141
addr &= 0x041FFFFF;
142
VirtualFramebuffer *match = nullptr;
143
for (auto vfb : vfbs_) {
144
if (vfb->fb_address == addr) {
145
// Could check w too but whatever (actually, might very well make sense to do so, depending on context).
146
if (!match || vfb->last_frame_render > match->last_frame_render) {
147
match = vfb;
148
}
149
}
150
}
151
return match;
152
}
153
154
VirtualFramebuffer *FramebufferManagerCommon::GetExactVFB(u32 addr, int stride, GEBufferFormat format) const {
155
addr &= 0x3FFFFFFF;
156
if (Memory::IsVRAMAddress(addr))
157
addr &= 0x041FFFFF;
158
VirtualFramebuffer *newest = nullptr;
159
for (auto vfb : vfbs_) {
160
if (vfb->fb_address == addr && vfb->fb_stride == stride && vfb->fb_format == format) {
161
if (newest) {
162
if (vfb->colorBindSeq > newest->colorBindSeq) {
163
newest = vfb;
164
}
165
} else {
166
newest = vfb;
167
}
168
}
169
}
170
return newest;
171
}
172
173
VirtualFramebuffer *FramebufferManagerCommon::ResolveVFB(u32 addr, int stride, GEBufferFormat format) {
174
addr &= 0x3FFFFFFF;
175
if (Memory::IsVRAMAddress(addr))
176
addr &= 0x041FFFFF;
177
// Find the newest one matching addr and stride.
178
VirtualFramebuffer *newest = nullptr;
179
for (auto vfb : vfbs_) {
180
if (vfb->fb_address == addr && vfb->FbStrideInBytes() == stride * BufferFormatBytesPerPixel(format)) {
181
if (newest) {
182
if (vfb->colorBindSeq > newest->colorBindSeq) {
183
newest = vfb;
184
}
185
} else {
186
newest = vfb;
187
}
188
}
189
}
190
191
if (newest && newest->fb_format != format) {
192
WARN_LOG_ONCE(resolvevfb, Log::G3D, "ResolveVFB: Resolving from %s to %s at %08x/%d", GeBufferFormatToString(newest->fb_format), GeBufferFormatToString(format), addr, stride);
193
return ResolveFramebufferColorToFormat(newest, format);
194
}
195
196
return newest;
197
}
198
199
VirtualFramebuffer *FramebufferManagerCommon::GetDisplayVFB() {
200
return GetExactVFB(displayFramebufPtr_, displayStride_, displayFormat_);
201
}
202
203
// Heuristics to figure out the size of FBO to create.
204
// TODO: Possibly differentiate on whether through mode is used (since in through mode, viewport is meaningless?)
205
void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height) {
206
static const int MAX_FRAMEBUF_HEIGHT = 512;
207
208
// Games don't always set any of these. Take the greatest parameter that looks valid based on stride.
209
if (viewport_width > 4 && viewport_width <= fb_stride && viewport_height > 0) {
210
drawing_width = viewport_width;
211
drawing_height = viewport_height;
212
// Some games specify a viewport with 0.5, but don't have VRAM for 273. 480x272 is the buffer size.
213
if (viewport_width == 481 && region_width == 480 && viewport_height == 273 && region_height == 272) {
214
drawing_width = 480;
215
drawing_height = 272;
216
}
217
// Sometimes region is set larger than the VRAM for the framebuffer.
218
// However, in one game it's correctly set as a larger height (see #7277) with the same width.
219
// A bit of a hack, but we try to handle that unusual case here.
220
if (region_width <= fb_stride && (region_width > drawing_width || (region_width == drawing_width && region_height > drawing_height)) && region_height <= MAX_FRAMEBUF_HEIGHT) {
221
drawing_width = region_width;
222
drawing_height = std::max(drawing_height, region_height);
223
}
224
// Scissor is often set to a subsection of the framebuffer, so we pay the least attention to it.
225
if (scissor_width <= fb_stride && scissor_width > drawing_width && scissor_height <= MAX_FRAMEBUF_HEIGHT) {
226
drawing_width = scissor_width;
227
drawing_height = std::max(drawing_height, scissor_height);
228
}
229
} else {
230
// If viewport wasn't valid, let's just take the greatest anything regardless of stride.
231
drawing_width = std::min(std::max(region_width, scissor_width), fb_stride);
232
drawing_height = std::max(region_height, scissor_height);
233
}
234
235
if (scissor_width == 481 && region_width == 480 && scissor_height == 273 && region_height == 272) {
236
drawing_width = 480;
237
drawing_height = 272;
238
}
239
240
// Assume no buffer is > 512 tall, it couldn't be textured or displayed fully if so.
241
if (drawing_height >= MAX_FRAMEBUF_HEIGHT) {
242
if (region_height < MAX_FRAMEBUF_HEIGHT) {
243
drawing_height = region_height;
244
} else if (scissor_height < MAX_FRAMEBUF_HEIGHT) {
245
drawing_height = scissor_height;
246
}
247
}
248
249
if (viewport_width != region_width) {
250
// The majority of the time, these are equal. If not, let's check what we know.
251
u32 nearest_address = 0xFFFFFFFF;
252
for (auto vfb : vfbs_) {
253
const u32 other_address = vfb->fb_address;
254
if (other_address > fb_address && other_address < nearest_address) {
255
nearest_address = other_address;
256
}
257
}
258
259
// Unless the game is using overlapping buffers, the next buffer should be far enough away.
260
// This catches some cases where we can know this.
261
// Hmm. The problem is that we could only catch it for the first of two buffers...
262
const u32 bpp = BufferFormatBytesPerPixel(fb_format);
263
int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
264
if (avail_height < drawing_height && avail_height == region_height) {
265
drawing_width = std::min(region_width, fb_stride);
266
drawing_height = avail_height;
267
}
268
269
// Some games draw buffers interleaved, with a high stride/region/scissor but default viewport.
270
if (fb_stride == 1024 && region_width == 1024 && scissor_width == 1024) {
271
drawing_width = 1024;
272
}
273
}
274
275
bool margin = false;
276
// Let's check if we're in a stride gap of a full-size framebuffer.
277
for (auto vfb : vfbs_) {
278
if (fb_address == vfb->fb_address) {
279
continue;
280
}
281
if (vfb->fb_stride != 512) {
282
continue;
283
}
284
285
int vfb_stride_in_bytes = BufferFormatBytesPerPixel(vfb->fb_format) * vfb->fb_stride;
286
int stride_in_bytes = BufferFormatBytesPerPixel(fb_format) * fb_stride;
287
if (stride_in_bytes != vfb_stride_in_bytes) {
288
// Mismatching stride in bytes, not interesting
289
continue;
290
}
291
292
if (fb_address > vfb->fb_address && fb_address < vfb->fb_address + vfb_stride_in_bytes) {
293
// Candidate!
294
if (vfb->height == drawing_height) {
295
// Might have a margin texture! Fix the drawing width if it's too large.
296
int width_in_bytes = vfb->fb_address + vfb_stride_in_bytes - fb_address;
297
int width_in_pixels = width_in_bytes / BufferFormatBytesPerPixel(fb_format);
298
299
// Final check
300
if (width_in_pixels <= 32) {
301
drawing_width = std::min(drawing_width, width_in_pixels);
302
margin = true;
303
// Don't really need to keep looking.
304
break;
305
}
306
}
307
}
308
}
309
310
DEBUG_LOG(Log::G3D, "Est: %08x V: %ix%i, R: %ix%i, S: %ix%i, STR: %i, THR:%i, Z:%08x = %ix%i %s", fb_address, viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_stride, gstate.isModeThrough(), gstate.isDepthWriteEnabled() ? gstate.getDepthBufAddress() : 0, drawing_width, drawing_height, margin ? " (margin!)" : "");
311
}
312
313
void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
314
// GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
315
params->fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
316
params->fb_stride = gstate.FrameBufStride();
317
318
params->z_address = gstate.getDepthBufRawAddress() | 0x04000000;
319
params->z_stride = gstate.DepthBufStride();
320
321
if (params->z_address == params->fb_address) {
322
// Probably indicates that the game doesn't care about Z for this VFB.
323
// Let's avoid matching it for Z copies and other shenanigans.
324
params->z_address = 0;
325
params->z_stride = 0;
326
}
327
328
params->fb_format = gstate_c.framebufFormat;
329
330
params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
331
// Technically, it may write depth later, but we're trying to detect it only when it's really true.
332
if (gstate.isModeClear()) {
333
// Not quite seeing how this makes sense..
334
params->isWritingDepth = !gstate.isClearModeDepthMask() && gstate.isDepthWriteEnabled();
335
} else {
336
params->isWritingDepth = gstate.isDepthWriteEnabled();
337
}
338
params->isDrawing = !gstate.isModeClear() || !gstate.isClearModeColorMask() || !gstate.isClearModeAlphaMask();
339
params->isModeThrough = gstate.isModeThrough();
340
const bool alphaBlending = gstate.isAlphaBlendEnabled();
341
const bool logicOpBlending = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_CLEAR && gstate.getLogicOp() != GE_LOGIC_COPY;
342
params->isBlending = alphaBlending || logicOpBlending;
343
344
// Viewport-X1 and Y1 are not the upper left corner, but half the width/height. A bit confusing.
345
float vpx = gstate.getViewportXScale();
346
float vpy = gstate.getViewportYScale();
347
348
// Work around problem in F1 Grand Prix, where it draws in through mode with a bogus viewport.
349
// We set bad values to 0 which causes the framebuffer size heuristic to rely on the other parameters instead.
350
if (std::isnan(vpx) || vpx > 10000000.0f) {
351
vpx = 0.f;
352
}
353
if (std::isnan(vpy) || vpy > 10000000.0f) {
354
vpy = 0.f;
355
}
356
params->viewportWidth = (int)(fabsf(vpx) * 2.0f);
357
params->viewportHeight = (int)(fabsf(vpy) * 2.0f);
358
params->regionWidth = gstate.getRegionX2() + 1;
359
params->regionHeight = gstate.getRegionY2() + 1;
360
361
params->scissorLeft = gstate.getScissorX1();
362
params->scissorTop = gstate.getScissorY1();
363
params->scissorRight = gstate.getScissorX2() + 1;
364
params->scissorBottom = gstate.getScissorY2() + 1;
365
366
if (gstate.getRegionRateX() != 0x100 || gstate.getRegionRateY() != 0x100) {
367
WARN_LOG_REPORT_ONCE(regionRate, Log::G3D, "Drawing region rate add non-zero: %04x, %04x of %04x, %04x", gstate.getRegionRateX(), gstate.getRegionRateY(), gstate.getRegionX2(), gstate.getRegionY2());
368
}
369
}
370
371
static void ApplyKillzoneFramebufferSplit(FramebufferHeuristicParams *params, int *drawing_width);
372
373
VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(FramebufferHeuristicParams &params, u32 skipDrawReason) {
374
gstate_c.Clean(DIRTY_FRAMEBUF);
375
376
// Collect all parameters. This whole function has really become a cesspool of heuristics...
377
// but it appears that's what it takes, unless we emulate VRAM layout more accurately somehow.
378
379
// As there are no clear "framebuffer width" and "framebuffer height" registers,
380
// we need to infer the size of the current framebuffer somehow.
381
int drawing_width, drawing_height;
382
EstimateDrawingSize(params.fb_address, std::max(params.fb_stride, (u16)4), params.fb_format, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorRight, params.scissorBottom, drawing_width, drawing_height);
383
384
if (params.fb_address == params.z_address) {
385
// Most likely Z will not be used in this pass, as that would wreak havoc (undefined behavior for sure)
386
// We probably don't need to do anything about that, but let's log it.
387
WARN_LOG_ONCE(color_equal_z, Log::G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
388
}
389
390
// Compatibility hack for Killzone, see issue #6207.
391
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin && params.fb_format == GE_FORMAT_8888) {
392
ApplyKillzoneFramebufferSplit(&params, &drawing_width);
393
} else {
394
gstate_c.SetCurRTOffset(0, 0);
395
}
396
397
// Find a matching framebuffer.
398
VirtualFramebuffer *normal_vfb = nullptr;
399
int y_offset;
400
VirtualFramebuffer *large_offset_vfb = nullptr;
401
402
for (auto v : vfbs_) {
403
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
404
405
if (params.fb_address == v->fb_address && params.fb_format == v->fb_format && params.fb_stride == v->fb_stride) {
406
if (!normal_vfb) {
407
normal_vfb = v;
408
}
409
} else if (!PSP_CoreParameter().compat.flags().DisallowFramebufferAtOffset && !PSP_CoreParameter().compat.flags().SplitFramebufferMargin &&
410
v->fb_stride == params.fb_stride && v->fb_format == params.fb_format) {
411
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * bpp;
412
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
413
414
if (!normal_vfb && params.fb_address > v->fb_address && params.fb_address < v_fb_first_line_end_ptr) {
415
const int x_offset = (params.fb_address - v->fb_address) / bpp;
416
if (x_offset < params.fb_stride && v->height >= drawing_height) {
417
// Pretty certainly a pure render-to-X-offset.
418
WARN_LOG_REPORT_ONCE(renderoffset, Log::FrameBuf, "Rendering to framebuffer offset at %08x +%dx%d (stride %d)", v->fb_address, x_offset, 0, v->fb_stride);
419
normal_vfb = v;
420
gstate_c.SetCurRTOffset(x_offset, 0);
421
normal_vfb->width = std::max((int)normal_vfb->width, x_offset + drawing_width);
422
// To prevent the newSize code from being confused.
423
drawing_width += x_offset;
424
break;
425
}
426
} else if (PSP_CoreParameter().compat.flags().FramebufferAllowLargeVerticalOffset &&
427
params.fb_address > v->fb_address && v->fb_stride > 0 && (params.fb_address - v->fb_address) % v->FbStrideInBytes() == 0 &&
428
params.fb_address != 0x04088000 && v->fb_address != 0x04000000) { // Heuristic to avoid merging the main framebuffers.
429
y_offset = (params.fb_address - v->fb_address) / v->FbStrideInBytes();
430
if (y_offset <= v->bufferHeight) { // note: v->height is misdetected as 256 instead of 272 here in tokimeki. Note that 272 is just the height of the upper part, it's supersampling vertically.
431
large_offset_vfb = v;
432
break;
433
}
434
}
435
}
436
}
437
438
VirtualFramebuffer *vfb = nullptr;
439
if (large_offset_vfb) {
440
// These are prioritized over normal VFBs matches, to ensure things work even if the higher-address one
441
// is created first. Only enabled under compat flag.
442
vfb = large_offset_vfb;
443
WARN_LOG_REPORT_ONCE(tokimeki, Log::FrameBuf, "Detected FBO at Y offset %d of %08x: %08x", y_offset, large_offset_vfb->fb_address, params.fb_address);
444
gstate_c.SetCurRTOffset(0, y_offset);
445
vfb->height = std::max((int)vfb->height, y_offset + drawing_height);
446
drawing_height += y_offset;
447
// TODO: We can allow X/Y overlaps too, but haven't seen any so safer to not.
448
} else if (normal_vfb) {
449
vfb = normal_vfb;
450
if (vfb->z_address == 0 && vfb->z_stride == 0 && params.z_stride != 0) {
451
// Got one that was created by CreateRAMFramebuffer. Since it has no depth buffer,
452
// we just recreate it immediately.
453
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
454
}
455
456
// Keep track, but this isn't really used.
457
vfb->z_stride = params.z_stride;
458
// Heuristic: In throughmode, a higher height could be used. Let's avoid shrinking the buffer.
459
if (params.isModeThrough && (int)vfb->width <= params.fb_stride) {
460
vfb->width = std::max((int)vfb->width, drawing_width);
461
vfb->height = std::max((int)vfb->height, drawing_height);
462
} else {
463
vfb->width = drawing_width;
464
vfb->height = drawing_height;
465
}
466
}
467
468
if (vfb) {
469
bool resized = false;
470
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
471
// Even if it's not newly wrong, if this is larger we need to resize up.
472
if (vfb->width > vfb->bufferWidth || vfb->height > vfb->bufferHeight) {
473
ResizeFramebufFBO(vfb, vfb->width, vfb->height);
474
resized = true;
475
} else if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
476
// If it's newly wrong, or changing every frame, just keep track.
477
vfb->newWidth = drawing_width;
478
vfb->newHeight = drawing_height;
479
vfb->lastFrameNewSize = gpuStats.numFlips;
480
} else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) {
481
// Okay, it's changed for a while (and stayed that way.) Let's start over.
482
// But only if we really need to, to avoid blinking.
483
bool needsRecreate = vfb->bufferWidth > params.fb_stride;
484
needsRecreate = needsRecreate || vfb->newWidth > vfb->bufferWidth || vfb->newWidth * 2 < vfb->bufferWidth;
485
needsRecreate = needsRecreate || vfb->newHeight > vfb->bufferHeight || vfb->newHeight * 2 < vfb->bufferHeight;
486
487
// Whether we resize or not, change the size parameters so we stop detecting a resize.
488
// It might be larger if all drawing has been in throughmode.
489
vfb->width = drawing_width;
490
vfb->height = drawing_height;
491
492
if (needsRecreate) {
493
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
494
resized = true;
495
// Let's discard this information, might be wrong now.
496
vfb->safeWidth = 0;
497
vfb->safeHeight = 0;
498
}
499
}
500
} else {
501
// It's not different, let's keep track of that too.
502
vfb->lastFrameNewSize = gpuStats.numFlips;
503
}
504
505
if (!resized && renderScaleFactor_ != 1 && vfb->renderScaleFactor == 1) {
506
// Might be time to change this framebuffer - have we used depth?
507
if ((vfb->usageFlags & FB_USAGE_COLOR_MIXED_DEPTH) && !PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
508
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
509
_assert_(vfb->renderScaleFactor != 1);
510
}
511
}
512
}
513
514
// None found? Create one.
515
if (!vfb) {
516
gstate_c.usingDepth = false; // reset depth buffer tracking
517
518
vfb = new VirtualFramebuffer{};
519
vfb->fbo = nullptr;
520
vfb->fb_address = params.fb_address;
521
vfb->fb_stride = params.fb_stride;
522
vfb->z_address = params.z_address;
523
vfb->z_stride = params.z_stride;
524
525
// The other width/height parameters are set in ResizeFramebufFBO below.
526
vfb->width = drawing_width;
527
vfb->height = drawing_height;
528
vfb->newWidth = drawing_width;
529
vfb->newHeight = drawing_height;
530
vfb->lastFrameNewSize = gpuStats.numFlips;
531
vfb->fb_format = params.fb_format;
532
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
533
534
u32 colorByteSize = vfb->BufferByteSize(RASTER_COLOR);
535
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufColorRangeEnd_) {
536
framebufColorRangeEnd_ = params.fb_address + colorByteSize;
537
}
538
539
// This is where we actually create the framebuffer. The true is "force".
540
ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
541
NotifyRenderFramebufferCreated(vfb);
542
543
// Note that we do not even think about depth right now. That'll be handled
544
// on the first depth access, which will call SetDepthFramebuffer.
545
546
CopyToColorFromOverlappingFramebuffers(vfb);
547
SetColorUpdated(vfb, skipDrawReason);
548
549
INFO_LOG(Log::FrameBuf, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
550
551
vfb->last_frame_render = gpuStats.numFlips;
552
frameLastFramebufUsed_ = gpuStats.numFlips;
553
vfbs_.push_back(vfb);
554
currentRenderVfb_ = vfb;
555
556
// Assume that if we're clearing right when switching to a new framebuffer, we don't need to upload.
557
if (useBufferedRendering_ && params.isDrawing && vfb->fb_stride > 0) {
558
gpu->PerformWriteColorFromMemory(params.fb_address, colorByteSize);
559
// Alpha was already done by PerformWriteColorFromMemory.
560
PerformWriteStencilFromMemory(params.fb_address, colorByteSize, WriteStencil::STENCIL_IS_ZERO | WriteStencil::IGNORE_ALPHA);
561
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
562
}
563
564
DiscardFramebufferCopy();
565
566
// We already have it!
567
} else if (vfb != currentRenderVfb_) {
568
// Use it as a render target.
569
DEBUG_LOG(Log::FrameBuf, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
570
vfb->usageFlags |= FB_USAGE_RENDER_COLOR;
571
vfb->last_frame_render = gpuStats.numFlips;
572
frameLastFramebufUsed_ = gpuStats.numFlips;
573
vfb->dirtyAfterDisplay = true;
574
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
575
vfb->reallyDirtyAfterDisplay = true;
576
577
VirtualFramebuffer *prev = currentRenderVfb_;
578
currentRenderVfb_ = vfb;
579
NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
580
CopyToColorFromOverlappingFramebuffers(vfb);
581
gstate_c.usingDepth = false; // reset depth buffer tracking
582
583
DiscardFramebufferCopy();
584
} else {
585
// Something changed, but we still got the same framebuffer we were already rendering to.
586
// Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated
587
vfb->last_frame_render = gpuStats.numFlips;
588
frameLastFramebufUsed_ = gpuStats.numFlips;
589
vfb->dirtyAfterDisplay = true;
590
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
591
vfb->reallyDirtyAfterDisplay = true;
592
NotifyRenderFramebufferUpdated(vfb);
593
}
594
595
vfb->colorBindSeq = GetBindSeqCount();
596
597
gstate_c.curRTWidth = vfb->width;
598
gstate_c.curRTHeight = vfb->height;
599
gstate_c.curRTRenderWidth = vfb->renderWidth;
600
gstate_c.curRTRenderHeight = vfb->renderHeight;
601
return vfb;
602
}
603
604
// Called on the first use of depth in a render pass.
605
void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
606
if (!currentRenderVfb_) {
607
return;
608
}
609
610
// First time use of this framebuffer's depth buffer.
611
bool newlyUsingDepth = (currentRenderVfb_->usageFlags & FB_USAGE_RENDER_DEPTH) == 0;
612
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
613
614
uint32_t boundDepthBuffer = gstate.getDepthBufRawAddress() | 0x04000000;
615
uint32_t boundDepthStride = gstate.DepthBufStride();
616
if (currentRenderVfb_->z_address != boundDepthBuffer || currentRenderVfb_->z_stride != boundDepthStride) {
617
if (currentRenderVfb_->fb_address == boundDepthBuffer) {
618
// Disallow setting depth buffer to the same address as the color buffer, usually means it's not used.
619
WARN_LOG_N_TIMES(z_reassign, 5, Log::FrameBuf, "Ignoring color matching depth buffer at %08x", boundDepthBuffer);
620
boundDepthBuffer = 0;
621
boundDepthStride = 0;
622
}
623
WARN_LOG_N_TIMES(z_reassign, 5, Log::FrameBuf, "Framebuffer at %08x/%d has switched associated depth buffer from %08x to %08x, updating.",
624
currentRenderVfb_->fb_address, currentRenderVfb_->fb_stride, currentRenderVfb_->z_address, boundDepthBuffer);
625
626
// Technically, here we should copy away the depth buffer to another framebuffer that uses that z_address, or maybe
627
// even write it back to RAM. However, this is rare. Silent Hill is one example, see #16126.
628
currentRenderVfb_->z_address = boundDepthBuffer;
629
// Update the stride in case it changed.
630
currentRenderVfb_->z_stride = boundDepthStride;
631
632
if (currentRenderVfb_->fbo) {
633
char tag[128];
634
FormatFramebufferName(currentRenderVfb_, tag, sizeof(tag));
635
currentRenderVfb_->fbo->UpdateTag(tag);
636
}
637
}
638
639
// If this first draw call is anything other than a clear, "resolve" the depth buffer,
640
// by copying from any overlapping buffers with fresher content.
641
if (!isClearingDepth && useBufferedRendering_) {
642
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
643
644
// Need to upload the first line of depth buffers, for Burnout Dominator lens flares. See issue #11100 and comments to #16081.
645
// Might make this more generic and upload the whole depth buffer if we find it's needed for something.
646
if (newlyUsingDepth && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
647
// Sanity check the depth buffer pointer.
648
if (Memory::IsValidRange(currentRenderVfb_->z_address, currentRenderVfb_->width * 2)) {
649
const u16 *src = (const u16 *)Memory::GetPointerUnchecked(currentRenderVfb_->z_address);
650
DrawPixels(currentRenderVfb_, 0, 0, (const u8 *)src, GE_FORMAT_DEPTH16, currentRenderVfb_->z_stride, currentRenderVfb_->width, currentRenderVfb_->height, RASTER_DEPTH, "Depth Upload");
651
}
652
}
653
}
654
655
currentRenderVfb_->depthBindSeq = GetBindSeqCount();
656
}
657
658
struct CopySource {
659
VirtualFramebuffer *vfb;
660
RasterChannel channel;
661
int xOffset;
662
int yOffset;
663
664
int seq() const {
665
return channel == RASTER_DEPTH ? vfb->depthBindSeq : vfb->colorBindSeq;
666
}
667
668
bool operator < (const CopySource &other) const {
669
return seq() < other.seq();
670
}
671
};
672
673
// Not sure if it's more profitable to always do these copies with raster (which may screw up early-Z due to explicit depth buffer write)
674
// or to use image copies when possible (which may make it easier for the driver to preserve early-Z, but on the other hand, will cost additional memory
675
// bandwidth on tilers due to the load operation, which we might otherwise be able to skip).
676
void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest) {
677
std::vector<CopySource> sources;
678
for (auto src : vfbs_) {
679
if (src == dest)
680
continue;
681
682
if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->fb_format == GE_FORMAT_565) {
683
if (src->colorBindSeq > dest->depthBindSeq) {
684
// Source has newer data than the current buffer, use it.
685
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
686
}
687
} else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) {
688
sources.push_back(CopySource{ src, RASTER_DEPTH, 0, 0 });
689
} else {
690
// TODO: Do more detailed overlap checks here.
691
}
692
}
693
694
std::sort(sources.begin(), sources.end());
695
696
// TODO: A full copy will overwrite anything else. So we can eliminate
697
// anything that comes before such a copy.
698
699
// For now, let's just do the last thing, if there are multiple.
700
701
// for (auto &source : sources) {
702
if (!sources.empty()) {
703
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
704
705
auto &source = sources.back();
706
if (source.channel == RASTER_DEPTH) {
707
// Good old depth->depth copy.
708
BlitFramebufferDepth(source.vfb, dest);
709
gpuStats.numDepthCopies++;
710
dest->last_frame_depth_updated = gpuStats.numFlips;
711
} else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
712
VirtualFramebuffer *src = source.vfb;
713
if (src->fb_format != GE_FORMAT_565) {
714
WARN_LOG_ONCE(not565, Log::FrameBuf, "fb_format of buffer at %08x not 565 as expected", src->fb_address);
715
}
716
717
// Really hate to do this, but tracking the depth swizzle state across multiple
718
// copies is not easy.
719
Draw2DShader shader = DRAW2D_565_TO_DEPTH;
720
if (PSP_CoreParameter().compat.flags().DeswizzleDepth) {
721
shader = DRAW2D_565_TO_DEPTH_DESWIZZLE;
722
}
723
724
gpuStats.numReinterpretCopies++;
725
src->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
726
dest->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
727
728
// Copying color to depth.
729
BlitUsingRaster(
730
src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
731
dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
732
false, dest->renderScaleFactor, Get2DPipeline(shader), "565_to_depth");
733
}
734
}
735
736
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
737
}
738
739
// Can't easily dynamically create these strings, we just pass along the pointer.
740
static const char *reinterpretStrings[4][4] = {
741
{
742
"self_reinterpret_565",
743
"reinterpret_565_to_5551",
744
"reinterpret_565_to_4444",
745
"reinterpret_565_to_8888",
746
},
747
{
748
"reinterpret_5551_to_565",
749
"self_reinterpret_5551",
750
"reinterpret_5551_to_4444",
751
"reinterpret_5551_to_8888",
752
},
753
{
754
"reinterpret_4444_to_565",
755
"reinterpret_4444_to_5551",
756
"self_reinterpret_4444",
757
"reinterpret_4444_to_8888",
758
},
759
{
760
"reinterpret_8888_to_565",
761
"reinterpret_8888_to_5551",
762
"reinterpret_8888_to_4444",
763
"self_reinterpret_8888",
764
},
765
};
766
767
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
768
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
769
if (!useBufferedRendering_) {
770
return;
771
}
772
773
std::vector<CopySource> sources;
774
for (auto src : vfbs_) {
775
// Discard old and equal potential inputs.
776
if (src == dst || src->colorBindSeq < dst->colorBindSeq) {
777
continue;
778
}
779
780
if (src->fb_address == dst->fb_address && src->fb_stride == dst->fb_stride) {
781
// Another render target at the exact same location but gotta be a different format or a different stride, otherwise
782
// it would be the same, and should have been detected in DoSetRenderFrameBuffer.
783
if (src->fb_format != dst->fb_format) {
784
// This will result in reinterpret later, if both formats are 16-bit.
785
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
786
} else {
787
// This shouldn't happen anymore. I think when it happened last, we still had
788
// lax stride checking when video was incoming, and a resize happened causing a duplicate.
789
}
790
} else if (src->fb_stride == dst->fb_stride && src->fb_format == dst->fb_format) {
791
u32 bytesPerPixel = BufferFormatBytesPerPixel(src->fb_format);
792
793
u32 strideInBytes = src->fb_stride * bytesPerPixel; // Same for both src and dest
794
795
u32 srcColorStart = src->fb_address;
796
u32 srcFirstLineEnd = src->fb_address + strideInBytes;
797
u32 srcColorEnd = strideInBytes * src->height;
798
799
u32 dstColorStart = dst->fb_address;
800
u32 dstFirstLineEnd = dst->fb_address + strideInBytes;
801
u32 dstColorEnd = strideInBytes * dst->height;
802
803
// Initially we'll only allow pure horizontal and vertical overlap,
804
// to reduce the risk for false positives. We can allow diagonal overlap too if needed
805
// in the future.
806
807
// Check for potential vertical overlap, like in Juiced 2.
808
int xOffset = 0;
809
int yOffset = 0;
810
811
// TODO: Get rid of the compatibility flag check.
812
if ((dstColorStart - srcColorStart) % strideInBytes == 0
813
&& PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
814
// Buffers are aligned.
815
yOffset = ((int)dstColorStart - (int)srcColorStart) / strideInBytes;
816
if (yOffset <= -(int)src->height) {
817
// Not overlapping
818
continue;
819
} else if (yOffset >= dst->height) {
820
// Not overlapping
821
continue;
822
}
823
} else {
824
// Buffers not stride-aligned - ignoring for now.
825
// This is where we'll add the horizontal offset for GoW.
826
continue;
827
}
828
sources.push_back(CopySource{ src, RASTER_COLOR, xOffset, yOffset });
829
} else if (src->fb_address == dst->fb_address && src->FbStrideInBytes() == dst->FbStrideInBytes()) {
830
if (src->fb_stride == dst->fb_stride * 2) {
831
// Reinterpret from 16-bit to 32-bit.
832
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
833
} else if (src->fb_stride * 2 == dst->fb_stride) {
834
// Reinterpret from 32-bit to 16-bit.
835
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
836
} else {
837
// 16-to-16 reinterpret, should have been caught above already.
838
_assert_msg_(false, "Reinterpret: Shouldn't get here");
839
}
840
}
841
}
842
843
std::sort(sources.begin(), sources.end());
844
845
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
846
847
bool tookActions = false;
848
849
// TODO: Only do the latest one.
850
for (const CopySource &source : sources) {
851
VirtualFramebuffer *src = source.vfb;
852
853
// Copy a rectangle from the original to the new buffer.
854
// Yes, we mean to look at src->width/height for the dest rectangle.
855
856
// TODO: Try to bound the blit using gstate_c.vertBounds like depal does.
857
858
int srcWidth = src->width * src->renderScaleFactor;
859
int srcHeight = src->height * src->renderScaleFactor;
860
int dstWidth = src->width * dst->renderScaleFactor;
861
int dstHeight = src->height * dst->renderScaleFactor;
862
863
int dstX1 = -source.xOffset * dst->renderScaleFactor;
864
int dstY1 = -source.yOffset * dst->renderScaleFactor;
865
int dstX2 = dstX1 + dstWidth;
866
int dstY2 = dstY1 + dstHeight;
867
868
if (source.channel == RASTER_COLOR) {
869
Draw2DPipeline *pipeline = nullptr;
870
const char *pass_name = "N/A";
871
float scaleFactorX = 1.0f;
872
if (src->fb_format == dst->fb_format) {
873
gpuStats.numColorCopies++;
874
pipeline = Get2DPipeline(DRAW2D_COPY_COLOR);
875
pass_name = "copy_color";
876
} else {
877
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
878
WARN_LOG_ONCE(bta, Log::FrameBuf, "WARNING: Reinterpret encountered with BlueToAlpha on");
879
}
880
881
// Reinterpret!
882
WARN_LOG_N_TIMES(reint, 5, Log::FrameBuf, "Reinterpret detected from %08x_%s to %08x_%s",
883
src->fb_address, GeBufferFormatToString(src->fb_format),
884
dst->fb_address, GeBufferFormatToString(dst->fb_format));
885
886
pipeline = GetReinterpretPipeline(src->fb_format, dst->fb_format, &scaleFactorX);
887
dstX1 *= scaleFactorX;
888
dstX2 *= scaleFactorX;
889
890
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
891
892
gpuStats.numReinterpretCopies++;
893
}
894
895
if (pipeline) {
896
tookActions = true;
897
// OK we have the pipeline, now just do the blit.
898
BlitUsingRaster(src->fbo, 0.0f, 0.0f, srcWidth, srcHeight,
899
dst->fbo, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, pass_name);
900
}
901
902
if (scaleFactorX == 1.0f && dst->z_address == src->z_address && dst->z_stride == src->z_stride) {
903
// We should also copy the depth buffer in this case!
904
BlitFramebufferDepth(src, dst, true);
905
}
906
}
907
}
908
909
if (currentRenderVfb_ && dst != currentRenderVfb_ && tookActions) {
910
// Will probably just change the name of the current renderpass, since one was started by the reinterpret itself.
911
draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After Reinterpret");
912
}
913
914
shaderManager_->DirtyLastShader();
915
textureCache_->ForgetLastTexture();
916
}
917
918
Draw2DPipeline *FramebufferManagerCommon::GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX) {
919
if (from == to) {
920
*scaleFactorX = 1.0f;
921
return Get2DPipeline(DRAW2D_COPY_COLOR);
922
}
923
924
if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
925
// We halve the X coordinates in the destination framebuffer.
926
// The shader will collect two pixels worth of input data and merge into one.
927
*scaleFactorX = 0.5f;
928
} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
929
// We double the X coordinates in the destination framebuffer.
930
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
931
*scaleFactorX = 2.0f;
932
} else {
933
*scaleFactorX = 1.0f;
934
}
935
936
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)from][(int)to];
937
if (!pipeline) {
938
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
939
return GenerateReinterpretFragmentShader(shaderWriter, from, to);
940
});
941
reinterpretFromTo_[(int)from][(int)to] = pipeline;
942
}
943
return pipeline;
944
}
945
946
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
947
// Notify the texture cache of both the color and depth buffers.
948
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
949
if (v->fbo) {
950
v->fbo->Release();
951
v->fbo = nullptr;
952
}
953
954
// Wipe some pointers
955
DiscardFramebufferCopy();
956
if (currentRenderVfb_ == v)
957
currentRenderVfb_ = nullptr;
958
if (displayFramebuf_ == v)
959
displayFramebuf_ = nullptr;
960
if (prevDisplayFramebuf_ == v)
961
prevDisplayFramebuf_ = nullptr;
962
if (prevPrevDisplayFramebuf_ == v)
963
prevPrevDisplayFramebuf_ = nullptr;
964
965
delete v;
966
}
967
968
void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool allowSizeMismatch) {
969
_dbg_assert_(src && dst);
970
971
_dbg_assert_(src != dst);
972
973
// Check that the depth address is even the same before actually blitting.
974
bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0;
975
bool matchingSize = (src->width == dst->width || (src->width == 512 && dst->width == 480) || (src->width == 480 && dst->width == 512)) && src->height == dst->height;
976
if (!matchingDepthBuffer || (!matchingSize && !allowSizeMismatch)) {
977
return;
978
}
979
980
// Copy depth value from the previously bound framebuffer to the current one.
981
bool hasNewerDepth = src->last_frame_depth_render != 0 && src->last_frame_depth_render >= dst->last_frame_depth_updated;
982
if (!src->fbo || !dst->fbo || !useBufferedRendering_ || !hasNewerDepth) {
983
// If depth wasn't updated, then we're at least "two degrees" away from the data.
984
// This is an optimization: it probably doesn't need to be copied in this case.
985
return;
986
}
987
988
bool useCopy = draw_->GetDeviceCaps().framebufferSeparateDepthCopySupported || (!draw_->GetDeviceCaps().framebufferDepthBlitSupported && draw_->GetDeviceCaps().framebufferCopySupported);
989
bool useBlit = draw_->GetDeviceCaps().framebufferDepthBlitSupported;
990
991
bool useRaster = draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported && draw_->GetDeviceCaps().textureDepthSupported;
992
993
if (src->fbo->MultiSampleLevel() > 0 && dst->fbo->MultiSampleLevel() > 0) {
994
// If multisampling, we want to copy depth properly so we get all the samples, to avoid aliased edges.
995
// Can be seen in the fire in Jeanne D'arc, for example.
996
if (useRaster && useCopy) {
997
useRaster = false;
998
}
999
}
1000
1001
int w = std::min(src->renderWidth, dst->renderWidth);
1002
int h = std::min(src->renderHeight, dst->renderHeight);
1003
1004
// Some GPUs can copy depth but only if stencil gets to come along for the ride. We only want to use this if there is no blit functionality.
1005
if (useRaster) {
1006
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, dst->renderScaleFactor, Get2DPipeline(Draw2DShader::DRAW2D_COPY_DEPTH), "BlitDepthRaster");
1007
} else if (useCopy) {
1008
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::Aspect::DEPTH_BIT, "CopyFramebufferDepth");
1009
RebindFramebuffer("After BlitFramebufferDepth");
1010
} else if (useBlit) {
1011
// We'll accept whether we get a separate depth blit or not...
1012
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::Aspect::DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
1013
RebindFramebuffer("After BlitFramebufferDepth");
1014
}
1015
1016
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
1017
}
1018
1019
void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
1020
if (!useBufferedRendering_) {
1021
// Let's ignore rendering to targets that have not (yet) been displayed.
1022
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
1023
} else if (currentRenderVfb_) {
1024
DownloadFramebufferOnSwitch(currentRenderVfb_);
1025
}
1026
1027
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
1028
1029
NotifyRenderFramebufferUpdated(vfb);
1030
}
1031
1032
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb) {
1033
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
1034
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1035
}
1036
if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
1037
gstate_c.Dirty(DIRTY_PROJMATRIX);
1038
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
1039
}
1040
}
1041
1042
void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) {
1043
if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !(vfb->usageFlags & FB_USAGE_FIRST_FRAME_SAVED) && !vfb->memoryUpdated) {
1044
// Some games will draw to some memory once, and use it as a render-to-texture later.
1045
// To support this, we save the first frame to memory when we have a safe w/h.
1046
// Saving each frame would be slow.
1047
1048
// TODO: This type of download could be made async, for less stutter on framebuffer creation.
1049
if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && !PSP_CoreParameter().compat.flags().DisableFirstFrameReadback) {
1050
ReadFramebufferToMemory(vfb, 0, 0, vfb->safeWidth, vfb->safeHeight, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
1051
vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1052
vfb->safeWidth = 0;
1053
vfb->safeHeight = 0;
1054
}
1055
}
1056
}
1057
1058
bool FramebufferManagerCommon::ShouldDownloadFramebufferColor(const VirtualFramebuffer *vfb) {
1059
// Dangan Ronpa hack
1060
return PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000;
1061
}
1062
1063
bool FramebufferManagerCommon::ShouldDownloadFramebufferDepth(const VirtualFramebuffer *vfb) {
1064
// Download depth buffer if compat flag set (previously used for Syphon Filter lens flares, now used for nothing)
1065
if (!PSP_CoreParameter().compat.flags().ReadbackDepth || GetSkipGPUReadbackMode() != SkipGPUReadbackMode::NO_SKIP) {
1066
return false;
1067
}
1068
return (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) != 0 && vfb->width >= 480 && vfb->height >= 272;
1069
}
1070
1071
void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) {
1072
if (prevVfb) {
1073
if (ShouldDownloadFramebufferColor(prevVfb) && !prevVfb->memoryUpdated) {
1074
// NOTE: This path is ONLY for the Dangan Ronpa hack, see ShouldDownloadFramebufferColor
1075
ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::OLD_DATA_OK);
1076
prevVfb->usageFlags = (prevVfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1077
} else {
1078
DownloadFramebufferOnSwitch(prevVfb);
1079
}
1080
1081
if (ShouldDownloadFramebufferDepth(prevVfb)) {
1082
ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::BLOCK);
1083
}
1084
}
1085
1086
textureCache_->ForgetLastTexture();
1087
shaderManager_->DirtyLastShader();
1088
1089
if (useBufferedRendering_) {
1090
if (vfb->fbo) {
1091
shaderManager_->DirtyLastShader();
1092
Draw::RPAction depthAction = Draw::RPAction::KEEP;
1093
float clearDepth = 0.0f;
1094
if (vfb->usageFlags & FB_USAGE_INVALIDATE_DEPTH) {
1095
depthAction = Draw::RPAction::CLEAR;
1096
clearDepth = GetDepthScaleFactors(gstate_c.UseFlags()).Offset();
1097
vfb->usageFlags &= ~FB_USAGE_INVALIDATE_DEPTH;
1098
}
1099
draw_->BindFramebufferAsRenderTarget(vfb->fbo, {Draw::RPAction::KEEP, depthAction, Draw::RPAction::KEEP, 0, clearDepth}, "FBSwitch");
1100
} else {
1101
// This should only happen very briefly when toggling useBufferedRendering_.
1102
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
1103
}
1104
} else {
1105
if (vfb->fbo) {
1106
// This should only happen very briefly when toggling useBufferedRendering_.
1107
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED);
1108
vfb->fbo->Release();
1109
vfb->fbo = nullptr;
1110
}
1111
1112
// Let's ignore rendering to targets that have not (yet) been displayed.
1113
if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) {
1114
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
1115
} else {
1116
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
1117
}
1118
}
1119
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
1120
1121
NotifyRenderFramebufferUpdated(vfb);
1122
}
1123
1124
void FramebufferManagerCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int stride, GEBufferFormat fmt) {
1125
// Note: UpdateFromMemory() is still called later.
1126
// This is a special case where we have extra information prior to the invalidation,
1127
// because it's called from sceJpeg, sceMpeg, scePsmf etc.
1128
1129
// TODO: Could possibly be at an offset...
1130
// Also, stride needs better handling.
1131
VirtualFramebuffer *vfb = ResolveVFB(addr, stride, fmt);
1132
if (vfb) {
1133
// Let's count this as a "render". This will also force us to use the correct format.
1134
vfb->last_frame_render = gpuStats.numFlips;
1135
vfb->colorBindSeq = GetBindSeqCount();
1136
1137
if (vfb->fb_stride < stride) {
1138
INFO_LOG(Log::FrameBuf, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, stride);
1139
const int bpp = BufferFormatBytesPerPixel(fmt);
1140
ResizeFramebufFBO(vfb, stride, size / (bpp * stride));
1141
// Resizing may change the viewport/etc.
1142
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1143
vfb->fb_stride = stride;
1144
// This might be a bit wider than necessary, but we'll redetect on next render.
1145
vfb->width = stride;
1146
}
1147
}
1148
}
1149
1150
void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size) {
1151
// Take off the uncached flag from the address. Not to be confused with the start of VRAM.
1152
addr &= 0x3FFFFFFF;
1153
if (Memory::IsVRAMAddress(addr))
1154
addr &= 0x041FFFFF;
1155
// TODO: Could go through all FBOs, but probably not important?
1156
// TODO: Could also check for inner changes, but video is most important.
1157
// TODO: This shouldn't care if it's a display framebuf or not, should work exactly the same.
1158
bool isDisplayBuf = addr == CurrentDisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
1159
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
1160
if (!Memory::IsValidAddress(displayFramebufPtr_))
1161
return;
1162
1163
for (size_t i = 0; i < vfbs_.size(); ++i) {
1164
VirtualFramebuffer *vfb = vfbs_[i];
1165
if (vfb->fb_address == addr) {
1166
FlushBeforeCopy();
1167
1168
if (useBufferedRendering_ && vfb->fbo) {
1169
GEBufferFormat fmt = vfb->fb_format;
1170
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
1171
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
1172
// TODO: This doesn't seem quite right anymore.
1173
fmt = displayFormat_;
1174
}
1175
DrawPixels(vfb, 0, 0, Memory::GetPointerUnchecked(addr), fmt, vfb->fb_stride, vfb->width, vfb->height, RASTER_COLOR, "UpdateFromMemory_DrawPixels");
1176
SetColorUpdated(vfb, gstate_c.skipDrawReason);
1177
} else {
1178
INFO_LOG(Log::FrameBuf, "Invalidating FBO for %08x (%dx%d %s)", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
1179
DestroyFramebuf(vfb);
1180
vfbs_.erase(vfbs_.begin() + i--);
1181
}
1182
}
1183
}
1184
1185
RebindFramebuffer("RebindFramebuffer - UpdateFromMemory");
1186
1187
// TODO: Necessary?
1188
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1189
}
1190
1191
void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, RasterChannel channel, const char *tag) {
1192
textureCache_->ForgetLastTexture();
1193
shaderManager_->DirtyLastShader();
1194
float u0 = 0.0f, u1 = 1.0f;
1195
float v0 = 0.0f, v1 = 1.0f;
1196
1197
DrawTextureFlags flags;
1198
if (useBufferedRendering_ && vfb && vfb->fbo) {
1199
if (channel == RASTER_DEPTH || PSP_CoreParameter().compat.flags().NearestFilteringOnFramebufferCreate) {
1200
flags = DRAWTEX_NEAREST;
1201
} else {
1202
flags = DRAWTEX_LINEAR;
1203
}
1204
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
1205
SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
1206
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
1207
} else {
1208
_dbg_assert_(channel == RASTER_COLOR);
1209
// We are drawing directly to the back buffer so need to flip.
1210
// Should more of this be handled by the presentation engine?
1211
if (needBackBufferYSwap_)
1212
std::swap(v0, v1);
1213
flags = g_Config.iDisplayFilter == SCALE_LINEAR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
1214
flags = flags | DRAWTEX_TO_BACKBUFFER;
1215
FRect frame = GetScreenFrame(pixelWidth_, pixelHeight_);
1216
FRect rc;
1217
CalculateDisplayOutputRect(&rc, 480.0f, 272.0f, frame, ROTATION_LOCKED_HORIZONTAL);
1218
SetViewport2D(rc.x, rc.y, rc.w, rc.h);
1219
draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_);
1220
}
1221
1222
if (channel == RASTER_DEPTH) {
1223
_dbg_assert_(srcPixelFormat == GE_FORMAT_DEPTH16);
1224
flags = flags | DRAWTEX_DEPTH;
1225
if (vfb)
1226
vfb->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
1227
}
1228
1229
Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
1230
if (pixelsTex) {
1231
draw_->BindTextures(0, 1, &pixelsTex, Draw::TextureBindFlags::VULKAN_BIND_ARRAY);
1232
1233
// TODO: Replace with draw2D_.Blit() directly.
1234
DrawActiveTexture(dstX, dstY, width, height,
1235
vfb ? vfb->bufferWidth : g_display.pixel_xres,
1236
vfb ? vfb->bufferHeight : g_display.pixel_yres,
1237
u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
1238
1239
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
1240
1241
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
1242
}
1243
}
1244
1245
bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags, int layer) {
1246
if (!framebuffer->fbo || !useBufferedRendering_) {
1247
draw_->BindTexture(stage, nullptr);
1248
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
1249
return false;
1250
}
1251
1252
// currentRenderVfb_ will always be set when this is called, except from the GE debugger.
1253
// Let's just not bother with the copy in that case.
1254
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY);
1255
1256
// Currently rendering to this framebuffer. Need to make a copy.
1257
if (!skipCopy && framebuffer == currentRenderVfb_) {
1258
// Self-texturing, need a copy currently (some backends can potentially support it though).
1259
WARN_LOG_ONCE(selfTextureCopy, Log::G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
1260
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
1261
if (currentFramebufferCopy_ && (flags & BINDFBCOLOR_UNCACHED) == 0) {
1262
// We have a copy already that hasn't been invalidated, let's keep using it.
1263
draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::Aspect::COLOR_BIT, layer);
1264
return true;
1265
}
1266
1267
Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
1268
if (renderCopy) {
1269
VirtualFramebuffer copyInfo = *framebuffer;
1270
copyInfo.fbo = renderCopy;
1271
1272
bool partial = false;
1273
CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags, layer, &partial);
1274
RebindFramebuffer("After BindFramebufferAsColorTexture");
1275
draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::Aspect::COLOR_BIT, layer);
1276
1277
// Only cache the copy if it wasn't a partial copy.
1278
// TODO: Improve on this.
1279
if (!partial && (flags & BINDFBCOLOR_UNCACHED) == 0) {
1280
currentFramebufferCopy_ = renderCopy;
1281
}
1282
gpuStats.numCopiesForSelfTex++;
1283
} else {
1284
// Failed to get temp FBO? Weird.
1285
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::Aspect::COLOR_BIT, layer);
1286
}
1287
return true;
1288
} else if (framebuffer != currentRenderVfb_ || (flags & BINDFBCOLOR_FORCE_SELF) != 0) {
1289
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::Aspect::COLOR_BIT, layer);
1290
return true;
1291
} else {
1292
// Here it's an error because for some reason skipCopy is true. That shouldn't really happen.
1293
ERROR_LOG_REPORT_ONCE(selfTextureFail, Log::G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
1294
// To do this safely in Vulkan, we need to use input attachments.
1295
// Actually if the texture region and render regions don't overlap, this is safe, but we need
1296
// to transition to GENERAL image layout which will take some trickery.
1297
// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
1298
draw_->BindTexture(stage, nullptr);
1299
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
1300
return false;
1301
}
1302
}
1303
1304
void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial) {
1305
int x = 0;
1306
int y = 0;
1307
int w = src->drawnWidth;
1308
int h = src->drawnHeight;
1309
1310
*partial = false;
1311
1312
// If max is not > min, we probably could not detect it. Skip.
1313
// See the vertex decoder, where this is updated.
1314
// TODO: We're currently not hitting this path in Dante. See #17032
1315
if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
1316
x = std::max(gstate_c.vertBounds.minU, (u16)0);
1317
y = std::max(gstate_c.vertBounds.minV, (u16)0);
1318
w = std::min(gstate_c.vertBounds.maxU, src->drawnWidth) - x;
1319
h = std::min(gstate_c.vertBounds.maxV, src->drawnHeight) - y;
1320
1321
// If we bound a framebuffer, apply the byte offset as pixels to the copy too.
1322
if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
1323
x += gstate_c.curTextureXOffset;
1324
y += gstate_c.curTextureYOffset;
1325
}
1326
1327
// We'll have to reapply these next time since we cropped to UV.
1328
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1329
}
1330
1331
if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
1332
if (x != 0 || y != 0 || w < src->drawnWidth || h < src->drawnHeight) {
1333
*partial = true;
1334
}
1335
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
1336
}
1337
}
1338
1339
Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
1340
Draw::DataFormat depthFormat = Draw::DataFormat::UNDEFINED;
1341
1342
int bpp = BufferFormatBytesPerPixel(srcPixelFormat);
1343
int srcStrideInBytes = srcStride * bpp;
1344
int widthInBytes = width * bpp;
1345
1346
// Compute hash of contents.
1347
uint64_t imageHash;
1348
if (widthInBytes == srcStrideInBytes) {
1349
imageHash = XXH3_64bits(srcPixels, widthInBytes * height);
1350
} else {
1351
XXH3_state_t *hashState = XXH3_createState();
1352
XXH3_64bits_reset(hashState);
1353
for (int y = 0; y < height; y++) {
1354
XXH3_64bits_update(hashState, srcPixels + srcStrideInBytes * y, widthInBytes);
1355
}
1356
imageHash = XXH3_64bits_digest(hashState);
1357
XXH3_freeState(hashState);
1358
}
1359
1360
Draw::DataFormat texFormat = preferredPixelsFormat_;
1361
1362
if (srcPixelFormat == GE_FORMAT_DEPTH16) {
1363
if ((draw_->GetDataFormatSupport(Draw::DataFormat::R16_UNORM) & Draw::FMT_TEXTURE) != 0) {
1364
texFormat = Draw::DataFormat::R16_UNORM;
1365
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R8_UNORM) & Draw::FMT_TEXTURE) != 0) {
1366
// This could be improved by using specific draw shaders to pack full precision in two channels.
1367
// However, not really worth the trouble until we find a game that requires it.
1368
texFormat = Draw::DataFormat::R8_UNORM;
1369
} else {
1370
// No usable single channel format. Can't be bothered.
1371
return nullptr;
1372
}
1373
} else if (srcPixelFormat == GE_FORMAT_565) {
1374
// Check for supported matching formats.
1375
// This mainly benefits the redundant copies in God of War on low-end platforms.
1376
if ((draw_->GetDataFormatSupport(Draw::DataFormat::B5G6R5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
1377
texFormat = Draw::DataFormat::B5G6R5_UNORM_PACK16;
1378
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R5G6B5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
1379
texFormat = Draw::DataFormat::R5G6B5_UNORM_PACK16;
1380
}
1381
}
1382
1383
// TODO: We can just change the texture format and flip some bits around instead of this.
1384
// Could share code with the texture cache perhaps.
1385
auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
1386
for (int y = 0; y < height; y++) {
1387
const u16_le *src16 = (const u16_le *)srcPixels + srcStride * y;
1388
const u32_le *src32 = (const u32_le *)srcPixels + srcStride * y;
1389
u32 *dst = (u32 *)(data + byteStride * y);
1390
u16 *dst16 = (u16 *)(data + byteStride * y);
1391
u8 *dst8 = (u8 *)(data + byteStride * y);
1392
switch (srcPixelFormat) {
1393
case GE_FORMAT_565:
1394
if (texFormat == Draw::DataFormat::B5G6R5_UNORM_PACK16) {
1395
memcpy(dst16, src16, w * sizeof(uint16_t));
1396
} else if (texFormat == Draw::DataFormat::R5G6B5_UNORM_PACK16) {
1397
ConvertRGB565ToBGR565(dst16, src16, width); // Fast!
1398
} else if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
1399
ConvertRGB565ToBGRA8888(dst, src16, width);
1400
} else {
1401
ConvertRGB565ToRGBA8888(dst, src16, width);
1402
}
1403
break;
1404
1405
case GE_FORMAT_5551:
1406
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1407
ConvertRGBA5551ToBGRA8888(dst, src16, width);
1408
else
1409
ConvertRGBA5551ToRGBA8888(dst, src16, width);
1410
break;
1411
1412
case GE_FORMAT_4444:
1413
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1414
ConvertRGBA4444ToBGRA8888(dst, src16, width);
1415
else
1416
ConvertRGBA4444ToRGBA8888(dst, src16, width);
1417
break;
1418
1419
case GE_FORMAT_8888:
1420
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1421
ConvertRGBA8888ToBGRA8888(dst, src32, width);
1422
// This means use original pointer as-is. May avoid or optimize a copy.
1423
else if (srcStride == width)
1424
return false;
1425
else
1426
memcpy(dst, src32, width * 4);
1427
break;
1428
1429
case GE_FORMAT_DEPTH16:
1430
// TODO: Must take the depth range into account, unless it's already 0-1.
1431
// TODO: Depending on the color buffer format used with this depth buffer, we need
1432
// to do one of two different swizzle operations. However, for the only use of this so far,
1433
// the Burnout lens flare trickery, swizzle doesn't matter since it's just a 0, 7fff, 0, 7fff pattern
1434
// which comes out the same.
1435
if (texFormat == Draw::DataFormat::R16_UNORM) {
1436
// We just use this format straight.
1437
memcpy(dst16, src16, w * 2);
1438
} else if (texFormat == Draw::DataFormat::R8_UNORM) {
1439
// We fall back to R8_UNORM. Precision is enough for most cases of depth clearing and initialization we've seen,
1440
// but hardly ideal.
1441
for (int i = 0; i < width; i++) {
1442
dst8[i] = src16[i] >> 8;
1443
}
1444
}
1445
break;
1446
1447
case GE_FORMAT_INVALID:
1448
case GE_FORMAT_CLUT8:
1449
// Bad
1450
break;
1451
}
1452
}
1453
return true;
1454
};
1455
1456
int frameNumber = draw_->GetFrameCount();
1457
1458
// First look for an exact match (including contents hash) that we can re-use.
1459
for (auto &iter : drawPixelsCache_) {
1460
if (iter.contentsHash == imageHash && iter.tex->Width() == width && iter.tex->Height() == height && iter.tex->Format() == texFormat) {
1461
iter.frameNumber = frameNumber;
1462
gpuStats.numCachedUploads++;
1463
return iter.tex;
1464
}
1465
}
1466
1467
// Then, look for an alternative one that's not been used recently that we can overwrite.
1468
for (auto &iter : drawPixelsCache_) {
1469
if (iter.frameNumber >= frameNumber - 3 || iter.tex->Width() != width || iter.tex->Height() != height || iter.tex->Format() != texFormat) {
1470
continue;
1471
}
1472
1473
// OK, current one seems good, let's use it (and mark it used).
1474
gpuStats.numUploads++;
1475
draw_->UpdateTextureLevels(iter.tex, &srcPixels, generateTexture, 1);
1476
// NOTE: numFlips is no good - this is called every frame when paused sometimes!
1477
iter.frameNumber = frameNumber;
1478
// We need to update the hash for future matching.
1479
iter.contentsHash = imageHash;
1480
return iter.tex;
1481
}
1482
1483
// Note: For depth, we create an R16_UNORM texture, that'll be just fine for uploading depth through a shader,
1484
// and likely more efficient.
1485
Draw::TextureDesc desc{
1486
Draw::TextureType::LINEAR2D,
1487
texFormat,
1488
width,
1489
height,
1490
1,
1491
1,
1492
false,
1493
Draw::TextureSwizzle::DEFAULT,
1494
"DrawPixels",
1495
{ (uint8_t *)srcPixels },
1496
generateTexture,
1497
};
1498
1499
// Hot Shots Golf (#12355) does tons of these in a frame in some situations! So creating textures
1500
// better be fast. So does God of War, a lot of the time, a bit unclear what it's doing.
1501
Draw::Texture *tex = draw_->CreateTexture(desc);
1502
if (!tex) {
1503
ERROR_LOG(Log::G3D, "Failed to create DrawPixels texture");
1504
}
1505
// We don't need to count here, already counted by numUploads by the caller.
1506
1507
// INFO_LOG(Log::G3D, "Creating drawPixelsCache texture: %dx%d", tex->Width(), tex->Height());
1508
1509
DrawPixelsEntry entry{ tex, imageHash, frameNumber };
1510
drawPixelsCache_.push_back(entry);
1511
gpuStats.numUploads++;
1512
return tex;
1513
}
1514
1515
bool FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int srcStride, GEBufferFormat srcPixelFormat) {
1516
textureCache_->ForgetLastTexture();
1517
shaderManager_->DirtyLastShader();
1518
1519
float u0 = 0.0f, u1 = 480.0f / 512.0f;
1520
float v0 = 0.0f, v1 = 1.0f;
1521
Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272);
1522
if (!pixelsTex)
1523
return false;
1524
1525
int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1526
OutputFlags flags = g_Config.iDisplayFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1527
if (needBackBufferYSwap_) {
1528
flags |= OutputFlags::BACKBUFFER_FLIPPED;
1529
}
1530
// CopyToOutput reverses these, probably to match "up".
1531
if (GetGPUBackend() == GPUBackend::DIRECT3D11) {
1532
flags |= OutputFlags::POSITION_FLIPPED;
1533
}
1534
1535
presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1536
presentation_->SourceTexture(pixelsTex, 512, 272);
1537
presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1538
1539
// PresentationCommon sets all kinds of state, we can't rely on anything.
1540
gstate_c.Dirty(DIRTY_ALL);
1541
1542
DiscardFramebufferCopy();
1543
currentRenderVfb_ = nullptr;
1544
1545
return true;
1546
}
1547
1548
void FramebufferManagerCommon::SetViewport2D(int x, int y, int w, int h) {
1549
Draw::Viewport viewport{ (float)x, (float)y, (float)w, (float)h, 0.0f, 1.0f };
1550
draw_->SetViewport(viewport);
1551
}
1552
1553
void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
1554
DownloadFramebufferOnSwitch(currentRenderVfb_);
1555
shaderManager_->DirtyLastShader();
1556
1557
if (displayFramebufPtr_ == 0) {
1558
if (GetUIState() != UISTATE_PAUSEMENU) {
1559
if (Core_IsStepping())
1560
VERBOSE_LOG(Log::FrameBuf, "Display disabled, displaying only black");
1561
else
1562
DEBUG_LOG(Log::FrameBuf, "Display disabled, displaying only black");
1563
}
1564
// No framebuffer to display! Clear to black.
1565
if (useBufferedRendering_) {
1566
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput");
1567
}
1568
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1569
presentation_->NotifyPresent();
1570
return;
1571
}
1572
1573
u32 offsetX = 0;
1574
u32 offsetY = 0;
1575
1576
// If it's not really dirty, we're probably frameskipping. Use the last working one.
1577
u32 fbaddr = reallyDirty ? displayFramebufPtr_ : prevDisplayFramebufPtr_;
1578
prevDisplayFramebufPtr_ = fbaddr;
1579
1580
VirtualFramebuffer *vfb = ResolveVFB(fbaddr, displayStride_, displayFormat_);
1581
if (!vfb) {
1582
// Let's search for a framebuf within this range. Note that we also look for
1583
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
1584
// and uncached bits of the address when comparing.
1585
const u32 addr = fbaddr;
1586
for (auto v : vfbs_) {
1587
const u32 v_addr = v->fb_address;
1588
const u32 v_size = v->BufferByteSize(RASTER_COLOR);
1589
1590
if (v->fb_format != displayFormat_ || v->fb_stride != displayStride_) {
1591
// Displaying a buffer of the wrong format or stride is nonsense, ignore it.
1592
continue;
1593
}
1594
1595
if (addr >= v_addr && addr < v_addr + v_size) {
1596
const u32 dstBpp = BufferFormatBytesPerPixel(v->fb_format);
1597
const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
1598
const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
1599
// We have enough space there for the display, right?
1600
if (v_offsetX + 480 > (u32)v->fb_stride || v->bufferHeight < v_offsetY + 272) {
1601
continue;
1602
}
1603
// Check for the closest one.
1604
if (offsetY == 0 || offsetY > v_offsetY) {
1605
offsetX = v_offsetX;
1606
offsetY = v_offsetY;
1607
vfb = v;
1608
}
1609
}
1610
}
1611
1612
if (vfb) {
1613
// Okay, we found one above.
1614
// Log should be "Displaying from framebuf" but not worth changing the report.
1615
INFO_LOG(Log::FrameBuf, "Rendering from framebuf with offset %08x -> %08x+%dx%d", addr, vfb->fb_address, offsetX, offsetY);
1616
}
1617
}
1618
1619
// Reject too-tiny framebuffers to display (Godfather, see issue #16915).
1620
if (vfb && vfb->height < 64) {
1621
vfb = nullptr;
1622
}
1623
1624
if (!vfb) {
1625
if (Memory::IsValidAddress(fbaddr)) {
1626
// The game is displaying something directly from RAM. In GTA, it's decoded video.
1627
// If successful, this effectively calls presentation_->NotifyPresent();
1628
if (!DrawFramebufferToOutput(Memory::GetPointerUnchecked(fbaddr), displayStride_, displayFormat_)) {
1629
if (useBufferedRendering_) {
1630
// Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1631
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_DrawError");
1632
}
1633
presentation_->NotifyPresent();
1634
}
1635
return;
1636
} else {
1637
DEBUG_LOG(Log::FrameBuf, "Found no FBO to display! displayFBPtr = %08x", fbaddr);
1638
// No framebuffer to display! Clear to black.
1639
if (useBufferedRendering_) {
1640
// Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1641
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_NoFBO");
1642
} // For non-buffered rendering, every frame is cleared anyway.
1643
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1644
presentation_->NotifyPresent();
1645
return;
1646
}
1647
}
1648
1649
vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
1650
vfb->last_frame_displayed = gpuStats.numFlips;
1651
vfb->dirtyAfterDisplay = false;
1652
vfb->reallyDirtyAfterDisplay = false;
1653
1654
if (prevDisplayFramebuf_ != displayFramebuf_) {
1655
prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
1656
}
1657
if (displayFramebuf_ != vfb) {
1658
prevDisplayFramebuf_ = displayFramebuf_;
1659
}
1660
displayFramebuf_ = vfb;
1661
1662
if (vfb->fbo) {
1663
if (GetUIState() != UISTATE_PAUSEMENU) {
1664
if (Core_IsStepping())
1665
VERBOSE_LOG(Log::FrameBuf, "Displaying FBO %08x", vfb->fb_address);
1666
else
1667
DEBUG_LOG(Log::FrameBuf, "Displaying FBO %08x", vfb->fb_address);
1668
}
1669
1670
float u0 = offsetX / (float)vfb->bufferWidth;
1671
float v0 = offsetY / (float)vfb->bufferHeight;
1672
float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
1673
float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight;
1674
1675
//clip the VR framebuffer to keep the aspect ratio
1676
if (IsVREnabled() && !IsFlatVRGame() && !IsGameVRScene()) {
1677
float aspect = 272.0f / 480.0f * (IsImmersiveVRMode() ? 2.0f : 1.0f);
1678
float clipY = 272.0f * (1.0f - aspect) / 2.0f;
1679
v0 = (clipY + offsetY) / (float)vfb->bufferHeight;
1680
v1 = (272.0f - clipY + offsetY) / (float)vfb->bufferHeight;
1681
1682
//zoom inside
1683
float zoom = IsImmersiveVRMode() ? 0.4f : 0.1f;
1684
u0 += zoom / aspect;
1685
u1 -= zoom / aspect;
1686
v0 += zoom;
1687
v1 -= zoom;
1688
}
1689
1690
textureCache_->ForgetLastTexture();
1691
1692
int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1693
OutputFlags flags = g_Config.iDisplayFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1694
if (needBackBufferYSwap_) {
1695
flags |= OutputFlags::BACKBUFFER_FLIPPED;
1696
}
1697
// DrawActiveTexture reverses these, probably to match "up".
1698
if (GetGPUBackend() == GPUBackend::DIRECT3D11) {
1699
flags |= OutputFlags::POSITION_FLIPPED;
1700
}
1701
1702
int actualWidth = (vfb->bufferWidth * vfb->renderWidth) / vfb->width;
1703
int actualHeight = (vfb->bufferHeight * vfb->renderHeight) / vfb->height;
1704
presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1705
presentation_->SourceFramebuffer(vfb->fbo, actualWidth, actualHeight);
1706
presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1707
} else if (useBufferedRendering_) {
1708
WARN_LOG(Log::FrameBuf, "Using buffered rendering, and current VFB lacks an FBO: %08x", vfb->fb_address);
1709
} else {
1710
// This is OK because here we're in "skip buffered" mode, so even if we haven't presented
1711
// we will have a render target.
1712
presentation_->NotifyPresent();
1713
}
1714
1715
// This may get called mid-draw if the game uses an immediate flip.
1716
// PresentationCommon sets all kinds of state, we can't rely on anything.
1717
gstate_c.Dirty(DIRTY_ALL);
1718
DiscardFramebufferCopy();
1719
currentRenderVfb_ = nullptr;
1720
}
1721
1722
void FramebufferManagerCommon::DecimateFBOs() {
1723
DiscardFramebufferCopy();
1724
currentRenderVfb_ = nullptr;
1725
1726
for (auto iter : fbosToDelete_) {
1727
iter->Release();
1728
}
1729
fbosToDelete_.clear();
1730
1731
for (size_t i = 0; i < vfbs_.size(); ++i) {
1732
VirtualFramebuffer *vfb = vfbs_[i];
1733
int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used);
1734
1735
if (ShouldDownloadFramebufferColor(vfb) && age == 0 && !vfb->memoryUpdated) {
1736
ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
1737
vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1738
}
1739
1740
// Let's also "decimate" the usageFlags.
1741
UpdateFramebufUsage(vfb);
1742
1743
if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
1744
if (age > FBO_OLD_AGE) {
1745
INFO_LOG(Log::FrameBuf, "Decimating FBO for %08x (%ix%i %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
1746
DestroyFramebuf(vfb);
1747
vfbs_.erase(vfbs_.begin() + i--);
1748
}
1749
}
1750
}
1751
1752
for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) {
1753
int age = frameLastFramebufUsed_ - it->second.last_frame_used;
1754
if (age > FBO_OLD_AGE) {
1755
it->second.fbo->Release();
1756
it = tempFBOs_.erase(it);
1757
} else {
1758
++it;
1759
}
1760
}
1761
1762
// Do the same for ReadFramebuffersToMemory's VFBs
1763
for (size_t i = 0; i < bvfbs_.size(); ++i) {
1764
VirtualFramebuffer *vfb = bvfbs_[i];
1765
int age = frameLastFramebufUsed_ - vfb->last_frame_render;
1766
if (age > FBO_OLD_AGE) {
1767
INFO_LOG(Log::FrameBuf, "Decimating FBO for %08x (%dx%d %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
1768
DestroyFramebuf(vfb);
1769
bvfbs_.erase(bvfbs_.begin() + i--);
1770
}
1771
}
1772
1773
// And DrawPixels cached textures.
1774
1775
for (auto it = drawPixelsCache_.begin(); it != drawPixelsCache_.end(); ) {
1776
int age = draw_->GetFrameCount() - it->frameNumber;
1777
if (age > 10) {
1778
// INFO_LOG(Log::G3D, "Releasing drawPixelsCache texture: %dx%d", it->tex->Width(), it->tex->Height());
1779
it->tex->Release();
1780
it->tex = nullptr;
1781
it = drawPixelsCache_.erase(it);
1782
} else {
1783
++it;
1784
}
1785
}
1786
}
1787
1788
// Requires width/height to be set already.
1789
void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force, bool skipCopy) {
1790
_dbg_assert_(w > 0);
1791
_dbg_assert_(h > 0);
1792
VirtualFramebuffer old = *vfb;
1793
1794
int oldWidth = vfb->bufferWidth;
1795
int oldHeight = vfb->bufferHeight;
1796
1797
if (force) {
1798
vfb->bufferWidth = w;
1799
vfb->bufferHeight = h;
1800
} else {
1801
if (vfb->bufferWidth >= w && vfb->bufferHeight >= h) {
1802
return;
1803
}
1804
1805
// In case it gets thin and wide, don't resize down either side.
1806
vfb->bufferWidth = std::max((int)vfb->bufferWidth, w);
1807
vfb->bufferHeight = std::max((int)vfb->bufferHeight, h);
1808
}
1809
1810
bool force1x = false;
1811
switch (bloomHack_) {
1812
case 1:
1813
force1x = vfb->bufferWidth <= 128 || vfb->bufferHeight <= 64;
1814
break;
1815
case 2:
1816
force1x = vfb->bufferWidth <= 256 || vfb->bufferHeight <= 128;
1817
break;
1818
case 3:
1819
force1x = vfb->bufferWidth < 480 || vfb->bufferWidth > 800 || vfb->bufferHeight < 272; // GOW uses 864x272
1820
break;
1821
}
1822
1823
if ((vfb->usageFlags & FB_USAGE_COLOR_MIXED_DEPTH) && !PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
1824
force1x = false;
1825
}
1826
if (PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000) {
1827
force1x = true;
1828
}
1829
1830
if (force1x && g_Config.iInternalResolution != 1) {
1831
vfb->renderScaleFactor = 1;
1832
vfb->renderWidth = vfb->bufferWidth;
1833
vfb->renderHeight = vfb->bufferHeight;
1834
} else {
1835
vfb->renderScaleFactor = renderScaleFactor_;
1836
vfb->renderWidth = (u16)(vfb->bufferWidth * renderScaleFactor_);
1837
vfb->renderHeight = (u16)(vfb->bufferHeight * renderScaleFactor_);
1838
}
1839
1840
bool creating = old.bufferWidth == 0;
1841
if (creating) {
1842
INFO_LOG(Log::FrameBuf, "Creating %s FBO at %08x/%08x stride=%d %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->z_address, vfb->fb_stride, vfb->bufferWidth, vfb->bufferHeight, (int)force);
1843
} else {
1844
INFO_LOG(Log::FrameBuf, "Resizing %s FBO at %08x/%08x stride=%d from %dx%d to %dx%d (force=%d, skipCopy=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->z_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force, (int)skipCopy);
1845
}
1846
1847
// During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
1848
// It's not worth the trouble trying to support lower bit-depth rendering, just
1849
// more cases to test that nobody will ever use.
1850
1851
textureCache_->ForgetLastTexture();
1852
1853
if (!useBufferedRendering_) {
1854
if (vfb->fbo) {
1855
vfb->fbo->Release();
1856
vfb->fbo = nullptr;
1857
}
1858
return;
1859
}
1860
if (!old.fbo && vfb->last_frame_failed != 0 && vfb->last_frame_failed - gpuStats.numFlips < 63) {
1861
// Don't constantly retry FBOs which failed to create.
1862
return;
1863
}
1864
1865
shaderManager_->DirtyLastShader();
1866
char tag[128];
1867
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
1868
1869
gpuStats.numFBOsCreated++;
1870
1871
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
1872
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
1873
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len);
1874
}
1875
if (Memory::IsVRAMAddress(vfb->z_address) && vfb->z_stride != 0) {
1876
char buf[128];
1877
size_t len = snprintf(buf, sizeof(buf), "Z_%s", tag);
1878
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->z_stride * vfb->height * sizeof(uint16_t), buf, len);
1879
}
1880
if (old.fbo) {
1881
INFO_LOG(Log::FrameBuf, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->fb_format));
1882
if (vfb->fbo) {
1883
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1884
if (!skipCopy) {
1885
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_COLOR, "BlitColor_ResizeFramebufFBO");
1886
}
1887
if (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) {
1888
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_DEPTH, "BlitDepth_ResizeFramebufFBO");
1889
}
1890
}
1891
fbosToDelete_.push_back(old.fbo);
1892
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "ResizeFramebufFBO");
1893
} else {
1894
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1895
}
1896
DiscardFramebufferCopy();
1897
currentRenderVfb_ = vfb;
1898
1899
if (!vfb->fbo) {
1900
ERROR_LOG(Log::FrameBuf, "Error creating FBO during resize! %dx%d", vfb->renderWidth, vfb->renderHeight);
1901
vfb->last_frame_failed = gpuStats.numFlips;
1902
}
1903
}
1904
1905
struct CopyCandidate {
1906
VirtualFramebuffer *vfb = nullptr;
1907
int y = 0;
1908
int h = 0;
1909
1910
std::string ToString(RasterChannel channel) const {
1911
return StringFromFormat("%08x %s %dx%d y=%d h=%d", vfb->Address(channel), GeBufferFormatToString(vfb->Format(channel)), vfb->width, vfb->height, y, h);
1912
}
1913
};
1914
1915
static const CopyCandidate *GetBestCopyCandidate(const TinySet<CopyCandidate, 4> &candidates, uint32_t basePtr, RasterChannel channel) {
1916
const CopyCandidate *best = nullptr;
1917
1918
// Pick the "best" candidate by comparing to the old best using heuristics.
1919
for (size_t i = 0; i < candidates.size(); i++) {
1920
const CopyCandidate *candidate = &candidates[i];
1921
1922
bool better = !best;
1923
if (!better) {
1924
// Heuristics determined from the old algorithm, that we might want to keep:
1925
// * Lower yOffsets are prioritized.
1926
// * Bindseq
1927
better = candidate->y < best->y;
1928
if (!better) {
1929
better = candidate->vfb->BindSeq(channel) > best->vfb->BindSeq(channel);
1930
}
1931
}
1932
1933
if (better) {
1934
best = candidate;
1935
}
1936
}
1937
return best;
1938
}
1939
1940
// This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers.
1941
// Also with specialized flags from some replacement functions. Only those will currently request depth copies!
1942
// NOTE: This is very tricky because there's no information about color depth here, so we'll have to make guesses
1943
// about what underlying framebuffer is the most likely to be the relevant ones. For src, we can probably prioritize recent
1944
// ones. For dst, less clear.
1945
bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, GPUCopyFlag flags, u32 skipDrawReason) {
1946
if (size == 0) {
1947
return false;
1948
}
1949
1950
dst &= 0x3FFFFFFF;
1951
src &= 0x3FFFFFFF;
1952
1953
if (Memory::IsVRAMAddress(dst))
1954
dst &= 0x041FFFFF;
1955
if (Memory::IsVRAMAddress(src))
1956
src &= 0x041FFFFF;
1957
1958
// TODO: Merge the below into FindTransferFramebuffer.
1959
// Or at least this should be like the other ones, gathering possible candidates
1960
// with the ability to list them out for debugging.
1961
1962
bool ignoreDstBuffer = flags & GPUCopyFlag::FORCE_DST_MATCH_MEM;
1963
bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::MEMSET);
1964
1965
// TODO: In the future we should probably check both channels. Currently depth is only on request.
1966
RasterChannel channel = (flags & GPUCopyFlag::DEPTH_REQUESTED) ? RASTER_DEPTH : RASTER_COLOR;
1967
1968
TinySet<CopyCandidate, 4> srcCandidates;
1969
TinySet<CopyCandidate, 4> dstCandidates;
1970
1971
// TODO: These two loops should be merged into one utility function, similar to what's done with rectangle copies.
1972
1973
// First find candidates for the source.
1974
// We only look at the color channel for now.
1975
for (auto vfb : vfbs_) {
1976
if (vfb->fb_stride == 0 || ignoreSrcBuffer) {
1977
continue;
1978
}
1979
1980
// We only remove the kernel and uncached bits when comparing.
1981
const u32 vfb_address = vfb->Address(channel);
1982
const u32 vfb_size = vfb->BufferByteSize(channel);
1983
const u32 vfb_byteStride = vfb->BufferByteStride(channel);
1984
const int vfb_byteWidth = vfb->BufferByteWidth(channel);
1985
1986
CopyCandidate srcCandidate;
1987
srcCandidate.vfb = vfb;
1988
1989
// Special path for depth for now.
1990
if (channel == RASTER_DEPTH) {
1991
if (src == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
1992
srcCandidate.y = 0;
1993
srcCandidate.h = vfb->height;
1994
srcCandidates.push_back(srcCandidate);
1995
}
1996
continue;
1997
}
1998
1999
if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
2000
// Heuristic originally from dest below, but just as valid looking for the source.
2001
// Fixes a misdetection in Brothers in Arms: D-Day, issue #18512.
2002
if (vfb_address == dst && ((size == 0x44000 && vfb_size == 0x88000) || (size == 0x88000 && vfb_size == 0x44000))) {
2003
// Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM
2004
// that can be displayed from, or another matching buffer with the right format if rendering is going on.
2005
// If we had scoring here, we should strongly penalize this target instead of ignoring it.
2006
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x conspicuously not matching copy size %08x for source in NotifyFramebufferCopy. Ignoring.", size, vfb_size);
2007
continue;
2008
}
2009
2010
if ((u32)size > vfb_size + 0x1000 && vfb->fb_format != GE_FORMAT_8888 && vfb->last_frame_render < gpuStats.numFlips) {
2011
// Seems likely we are looking at a potential copy of 32-bit pixels (like video) to an old 16-bit buffer,
2012
// which is very likely simply the wrong target, so skip it. See issue #17740 where this happens in Naruto Ultimate Ninja Heroes 2.
2013
// Probably no point to give it a bad score and let it pass to sorting, as we're pretty sure here.
2014
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x too small for %08x bytes of data and also 16-bit (%s), and not rendered to this frame. Ignoring.", vfb_size, size, GeBufferFormatToString(vfb->fb_format));
2015
continue;
2016
}
2017
2018
const u32 offset = src - vfb_address;
2019
const u32 yOffset = offset / vfb_byteStride;
2020
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0)) {
2021
srcCandidate.y = yOffset;
2022
srcCandidate.h = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
2023
} else if ((offset % vfb_byteStride) == 0 && size == vfb->fb_stride) {
2024
// Valkyrie Profile reads 512 bytes at a time, rather than 2048. So, let's whitelist fb_stride also.
2025
srcCandidate.y = yOffset;
2026
srcCandidate.h = 1;
2027
} else if (yOffset == 0 && (vfb->usageFlags & FB_USAGE_CLUT)) {
2028
// Okay, last try - it might be a clut.
2029
srcCandidate.y = yOffset;
2030
srcCandidate.h = 1;
2031
} else {
2032
continue;
2033
}
2034
srcCandidates.push_back(srcCandidate);
2035
}
2036
}
2037
2038
for (auto vfb : vfbs_) {
2039
if (vfb->fb_stride == 0 || ignoreDstBuffer) {
2040
continue;
2041
}
2042
2043
// We only remove the kernel and uncached bits when comparing.
2044
const u32 vfb_address = vfb->Address(channel);
2045
const u32 vfb_size = vfb->BufferByteSize(channel);
2046
const u32 vfb_byteStride = vfb->BufferByteStride(channel);
2047
const int vfb_byteWidth = vfb->BufferByteWidth(channel);
2048
2049
// Heuristic to try to prevent potential glitches with video playback.
2050
if (vfb_address == dst && ((size == 0x44000 && vfb_size == 0x88000) || (size == 0x88000 && vfb_size == 0x44000))) {
2051
// Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM
2052
// that can be displayed from, or another matching buffer with the right format if rendering is going on.
2053
// If we had scoring here, we should strongly penalize this target instead of ignoring it.
2054
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x conspicuously not matching copy size %08x for dest in NotifyFramebufferCopy. Ignoring.", size, vfb_size);
2055
continue;
2056
}
2057
2058
CopyCandidate dstCandidate;
2059
dstCandidate.vfb = vfb;
2060
2061
// Special path for depth for now.
2062
if (channel == RASTER_DEPTH) {
2063
// Let's assume exact matches only for simplicity.
2064
if (dst == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
2065
dstCandidate.y = 0;
2066
dstCandidate.h = vfb->height;
2067
dstCandidates.push_back(dstCandidate);
2068
}
2069
continue;
2070
}
2071
2072
if (!ignoreDstBuffer && dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
2073
const u32 offset = dst - vfb_address;
2074
const u32 yOffset = offset / vfb_byteStride;
2075
if ((offset % vfb_byteStride) == 0 && (size <= vfb_byteWidth || (size % vfb_byteStride) == 0)) {
2076
dstCandidate.y = yOffset;
2077
dstCandidate.h = (size <= vfb_byteWidth) ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
2078
dstCandidates.push_back(dstCandidate);
2079
}
2080
}
2081
}
2082
2083
// For now fill in these old variables from the candidates to reduce the initial diff.
2084
VirtualFramebuffer *dstBuffer = nullptr;
2085
VirtualFramebuffer *srcBuffer = nullptr;
2086
int srcY;
2087
int srcH;
2088
int dstY;
2089
int dstH;
2090
2091
const CopyCandidate *bestSrc = GetBestCopyCandidate(srcCandidates, src, channel);
2092
if (bestSrc) {
2093
srcBuffer = bestSrc->vfb;
2094
srcY = bestSrc->y;
2095
srcH = bestSrc->h;
2096
}
2097
const CopyCandidate *bestDst = GetBestCopyCandidate(dstCandidates, dst, channel);
2098
if (bestDst) {
2099
dstBuffer = bestDst->vfb;
2100
dstY = bestDst->y;
2101
dstH = bestDst->h;
2102
}
2103
2104
if (srcCandidates.size() > 1) {
2105
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2106
std::string log;
2107
for (size_t i = 0; i < srcCandidates.size(); i++) {
2108
log += " - " + srcCandidates[i].ToString(channel);
2109
if (bestSrc && srcCandidates[i].vfb == bestSrc->vfb) {
2110
log += " * \n";
2111
} else {
2112
log += "\n";
2113
}
2114
}
2115
WARN_LOG(Log::FrameBuf, "Copy: Multiple src vfb candidates for (src: %08x, size: %d):\n%s (%s)", src, size, log.c_str(), RasterChannelToString(channel));
2116
}
2117
}
2118
2119
if (dstCandidates.size() > 1) {
2120
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2121
std::string log;
2122
for (size_t i = 0; i < dstCandidates.size(); i++) {
2123
log += " - " + dstCandidates[i].ToString(channel);
2124
if (bestDst && dstCandidates[i].vfb == bestDst->vfb) {
2125
log += " * \n";
2126
} else {
2127
log += "\n";
2128
}
2129
}
2130
WARN_LOG(Log::FrameBuf, "Copy: Multiple dst vfb candidates for (dst: %08x, size: %d):\n%s (%s)", src, size, log.c_str(), RasterChannelToString(channel));
2131
}
2132
}
2133
2134
if (!useBufferedRendering_) {
2135
// If we're copying into a recently used display buf, it's probably destined for the screen.
2136
if (channel == RASTER_DEPTH || srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
2137
return false;
2138
}
2139
}
2140
2141
if (!dstBuffer && srcBuffer && channel != RASTER_DEPTH) {
2142
// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
2143
// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
2144
bool allowCreateFB = (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB || GetSkipGPUReadbackMode() == SkipGPUReadbackMode::COPY_TO_TEXTURE);
2145
if (allowCreateFB && !(flags & GPUCopyFlag::DISALLOW_CREATE_VFB)) {
2146
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->fb_format);
2147
dstY = 0;
2148
}
2149
}
2150
if (dstBuffer) {
2151
dstBuffer->last_frame_used = gpuStats.numFlips;
2152
if (channel == RASTER_DEPTH && !srcBuffer)
2153
dstBuffer->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
2154
}
2155
if (srcBuffer && channel == RASTER_DEPTH && !dstBuffer)
2156
srcBuffer->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
2157
2158
if (dstBuffer && srcBuffer) {
2159
if (srcBuffer == dstBuffer) {
2160
WARN_LOG_ONCE(dstsrccpy, Log::FrameBuf, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size);
2161
} else {
2162
WARN_LOG_ONCE(dstnotsrccpy, Log::FrameBuf, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
2163
// Just do the blit!
2164
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, channel, "Blit_InterBufferMemcpy");
2165
SetColorUpdated(dstBuffer, skipDrawReason);
2166
RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy");
2167
}
2168
return false;
2169
} else if (dstBuffer) {
2170
if (flags & GPUCopyFlag::MEMSET) {
2171
gpuStats.numClears++;
2172
}
2173
WARN_LOG_N_TIMES(btucpy, 5, Log::FrameBuf, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
2174
FlushBeforeCopy();
2175
2176
// TODO: Hot Shots Golf makes a lot of these during the "meter", to copy back the image to the screen, it copies line by line.
2177
// We could collect these in a buffer and flush on the next draw, or something like that, to avoid that. The line copies cause
2178
// awkward visual artefacts.
2179
const u8 *srcBase = Memory::GetPointerUnchecked(src);
2180
GEBufferFormat srcFormat = channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : dstBuffer->fb_format;
2181
int srcStride = channel == RASTER_DEPTH ? dstBuffer->z_stride : dstBuffer->fb_stride;
2182
DrawPixels(dstBuffer, 0, dstY, srcBase, srcFormat, srcStride, dstBuffer->width, dstH, channel, "MemcpyFboUpload_DrawPixels");
2183
SetColorUpdated(dstBuffer, skipDrawReason);
2184
RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
2185
// This is a memcpy, let's still copy just in case.
2186
return false;
2187
} else if (srcBuffer) {
2188
WARN_LOG_N_TIMES(btdcpy, 5, Log::FrameBuf, "Memcpy fbo download %08x -> %08x", src, dst);
2189
FlushBeforeCopy();
2190
// TODO: In Hot Shots Golf, check if we can do a readback to a framebuffer here.
2191
// Again we have the problem though that it's doing a lot of small copies here, one for each line.
2192
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
2193
WARN_LOG_ONCE(btdcpyheight, Log::FrameBuf, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
2194
} else if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && (!srcBuffer->memoryUpdated || channel == RASTER_DEPTH)) {
2195
ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH, channel, Draw::ReadbackMode::BLOCK);
2196
srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
2197
}
2198
return false;
2199
} else {
2200
return false;
2201
}
2202
}
2203
2204
std::string BlockTransferRect::ToString() const {
2205
int bpp = BufferFormatBytesPerPixel(channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : vfb->fb_format);
2206
return StringFromFormat("%s %08x/%d/%s seq:%d %d,%d %dx%d", RasterChannelToString(channel), vfb->fb_address, vfb->FbStrideInBytes(), GeBufferFormatToString(vfb->fb_format), vfb->colorBindSeq, x_bytes / bpp, y, w_bytes / bpp, h);
2207
}
2208
2209
// This is used when looking for framebuffers for a block transfer.
2210
// The only known game to block transfer depth buffers is Iron Man, see #16530, so
2211
// we have a compat flag and pretty limited functionality for that.
2212
bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_pixels, int x_pixels, int y, int w_pixels, int h, int bpp, bool destination, BlockTransferRect *rect) {
2213
basePtr &= 0x3FFFFFFF;
2214
if (Memory::IsVRAMAddress(basePtr))
2215
basePtr &= 0x041FFFFF;
2216
rect->vfb = nullptr;
2217
2218
if (!stride_pixels) {
2219
WARN_LOG(Log::FrameBuf, "Zero stride in FindTransferFrameBuffer, ignoring");
2220
return false;
2221
}
2222
2223
const u32 byteStride = stride_pixels * bpp;
2224
int x_bytes = x_pixels * bpp;
2225
int w_bytes = w_pixels * bpp;
2226
2227
TinySet<BlockTransferRect, 4> candidates;
2228
2229
// We work entirely in bytes when we do the matching, because games don't consistently use bpps that match
2230
// that of their buffers. Then after matching we try to map the copy to the simplest operation that does
2231
// what we need.
2232
2233
// We are only looking at color for now, have not found any block transfers of depth data (although it's plausible).
2234
2235
for (auto vfb : vfbs_) {
2236
BlockTransferRect candidate{ vfb, RASTER_COLOR };
2237
2238
// Two cases so far of games depending on depth copies: Iron Man in issue #16530 (buffer->buffer)
2239
// and also #17878 where a game does ram->buffer to an auto-swizzling (|0x600000) address,
2240
// to initialize Z with a pre-rendered depth buffer.
2241
if (vfb->z_address == basePtr && vfb->BufferByteStride(RASTER_DEPTH) == byteStride && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
2242
WARN_LOG_N_TIMES(z_xfer, 5, Log::FrameBuf, "FindTransferFramebuffer: found matching depth buffer, %08x (dest=%d, bpp=%d)", basePtr, (int)destination, bpp);
2243
candidate.channel = RASTER_DEPTH;
2244
candidate.x_bytes = x_pixels * bpp;
2245
candidate.w_bytes = w_pixels * bpp;
2246
candidate.y = y;
2247
candidate.h = h;
2248
candidates.push_back(candidate);
2249
continue;
2250
}
2251
2252
const u32 vfb_address = vfb->fb_address;
2253
const u32 vfb_size = vfb->BufferByteSize(RASTER_COLOR);
2254
2255
if (basePtr < vfb_address || basePtr >= vfb_address + vfb_size) {
2256
continue;
2257
}
2258
2259
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
2260
const u32 vfb_byteStride = vfb->FbStrideInBytes();
2261
const u32 vfb_byteWidth = vfb->WidthInBytes();
2262
2263
candidate.w_bytes = w_pixels * bpp;
2264
candidate.h = h;
2265
2266
const u32 byteOffset = basePtr - vfb_address;
2267
const int memXOffset = byteOffset % byteStride;
2268
const int memYOffset = byteOffset / byteStride;
2269
2270
// Some games use mismatching bitdepths. But make sure the stride matches.
2271
// If it doesn't, generally this means we detected the framebuffer with too large a height.
2272
// Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
2273
2274
// If it's outside the vfb by a single pixel, we currently disregard it.
2275
if (memYOffset > vfb->bufferHeight - h) {
2276
continue;
2277
}
2278
2279
if (byteOffset == vfb->WidthInBytes() && w_bytes < vfb->FbStrideInBytes()) {
2280
// Looks like we're in a margin texture of the vfb, which is not the vfb itself.
2281
// Ignore the match.
2282
continue;
2283
}
2284
2285
if (vfb_byteStride != byteStride) {
2286
// Grand Knights History occasionally copies with a mismatching stride but a full line at a time.
2287
// That's why we multiply by height, not width - this copy is a rectangle with the wrong stride but a line with the correct one.
2288
// Makes it hard to detect the wrong transfers in e.g. God of War.
2289
if (w_pixels != stride_pixels || (byteStride * h != vfb_byteStride && byteStride * h != vfb_byteWidth)) {
2290
if (destination) {
2291
// However, some other games write cluts to framebuffers.
2292
// Let's catch this and upload. Otherwise reject the match.
2293
bool match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
2294
if (match) {
2295
candidate.w_bytes = byteStride * h;
2296
h = 1;
2297
} else {
2298
continue;
2299
}
2300
} else {
2301
continue;
2302
}
2303
} else {
2304
// This is the Grand Knights History case.
2305
candidate.w_bytes = byteStride * h;
2306
candidate.h = 1;
2307
}
2308
} else {
2309
candidate.w_bytes = w_bytes;
2310
candidate.h = h;
2311
}
2312
2313
candidate.x_bytes = x_bytes + memXOffset;
2314
candidate.y = y + memYOffset;
2315
candidate.vfb = vfb;
2316
candidates.push_back(candidate);
2317
}
2318
2319
const BlockTransferRect *best = nullptr;
2320
// Sort candidates by just recency for now, we might add other.
2321
for (size_t i = 0; i < candidates.size(); i++) {
2322
const BlockTransferRect *candidate = &candidates[i];
2323
2324
bool better = !best;
2325
if (!better) {
2326
if (candidate->channel == best->channel) {
2327
better = candidate->vfb->BindSeq(candidate->channel) > best->vfb->BindSeq(candidate->channel);
2328
} else {
2329
// Prefer depth over color if the address match is perfect.
2330
if (candidate->channel == RASTER_DEPTH && best->channel == RASTER_COLOR && candidate->vfb->z_address == basePtr) {
2331
better = true;
2332
}
2333
}
2334
}
2335
2336
if ((candidate->vfb->usageFlags & FB_USAGE_CLUT) && candidate->x_bytes == 0 && candidate->y == 0 && destination) {
2337
// Hack to prioritize copies to clut buffers.
2338
best = candidate;
2339
break;
2340
}
2341
if (better) {
2342
best = candidate;
2343
}
2344
}
2345
2346
if (candidates.size() > 1) {
2347
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2348
std::string log;
2349
for (size_t i = 0; i < candidates.size(); i++) {
2350
log += " - " + candidates[i].ToString() + "\n";
2351
}
2352
WARN_LOG(Log::FrameBuf, "Multiple framebuffer candidates for %08x/%d/%d %d,%d %dx%d (dest = %d):\n%s", basePtr, stride_pixels, bpp, x_pixels, y, w_pixels, h, (int)destination, log.c_str());
2353
}
2354
}
2355
2356
if (best) {
2357
*rect = *best;
2358
return true;
2359
} else {
2360
if (Memory::IsVRAMAddress(basePtr) && destination && h >= 128) {
2361
WARN_LOG_N_TIMES(nocands, 5, Log::FrameBuf, "Didn't find a destination candidate for %08x/%d/%d %d,%d %dx%d", basePtr, stride_pixels, bpp, x_pixels, y, w_pixels, h);
2362
}
2363
return false;
2364
}
2365
}
2366
2367
VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
2368
INFO_LOG(Log::FrameBuf, "Creating RAM framebuffer at %08x (%dx%d, stride %d, fb_format %d)", fbAddress, width, height, stride, format);
2369
2370
RasterChannel channel = format == GE_FORMAT_DEPTH16 ? RASTER_DEPTH : RASTER_COLOR;
2371
2372
// A target for the destination is missing - so just create one!
2373
// Make sure this one would be found by the algorithm above so we wouldn't
2374
// create a new one each frame.
2375
VirtualFramebuffer *vfb = new VirtualFramebuffer{};
2376
vfb->fbo = nullptr;
2377
uint32_t mask = Memory::IsVRAMAddress(fbAddress) ? 0x041FFFFF : 0x3FFFFFFF;
2378
if (format == GE_FORMAT_DEPTH16) {
2379
vfb->fb_address = 0xFFFFFFFF; // Invalid address
2380
vfb->fb_stride = 0;
2381
vfb->z_address = fbAddress; // marks that if anyone tries to render with depth to this framebuffer, it should be dropped and recreated.
2382
vfb->z_stride = stride;
2383
vfb->width = width;
2384
} else {
2385
vfb->fb_address = fbAddress & mask; // NOTE - not necessarily in VRAM!
2386
vfb->fb_stride = stride;
2387
vfb->z_address = 0;
2388
vfb->z_stride = 0;
2389
vfb->width = std::max(width, stride);
2390
}
2391
vfb->height = height;
2392
vfb->newWidth = vfb->width;
2393
vfb->newHeight = vfb->height;
2394
vfb->lastFrameNewSize = gpuStats.numFlips;
2395
vfb->renderScaleFactor = renderScaleFactor_;
2396
vfb->renderWidth = (u16)(vfb->width * renderScaleFactor_);
2397
vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
2398
vfb->bufferWidth = vfb->width;
2399
vfb->bufferHeight = vfb->height;
2400
vfb->fb_format = format == GE_FORMAT_DEPTH16 ? GE_FORMAT_8888 : format;
2401
vfb->usageFlags = format == GE_FORMAT_DEPTH16 ? FB_USAGE_RENDER_DEPTH : FB_USAGE_RENDER_COLOR;
2402
if (format != GE_FORMAT_DEPTH16) {
2403
SetColorUpdated(vfb, 0);
2404
}
2405
char name[64];
2406
snprintf(name, sizeof(name), "%08x_%s_RAM", vfb->Address(channel), RasterChannelToString(channel));
2407
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
2408
bool createDepthBuffer = format == GE_FORMAT_DEPTH16;
2409
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, createDepthBuffer, name });
2410
vfbs_.push_back(vfb);
2411
2412
u32 byteSize = vfb->BufferByteSize(channel);
2413
if (fbAddress + byteSize > framebufColorRangeEnd_) {
2414
framebufColorRangeEnd_ = fbAddress + byteSize;
2415
}
2416
2417
return vfb;
2418
}
2419
2420
// 1:1 pixel size buffers, we resize buffers to these before we read them back.
2421
// TODO: We shouldn't keep whole VirtualFramebuffer structs for these - the fbo and last_frame_render is enough.
2422
VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb, RasterChannel channel) {
2423
// For now we'll keep these on the same struct as the ones that can get displayed
2424
// (and blatantly copy work already done above while at it).
2425
VirtualFramebuffer *nvfb = nullptr;
2426
2427
// We maintain a separate vector of framebuffer objects for blitting.
2428
for (VirtualFramebuffer *v : bvfbs_) {
2429
if (v->Address(channel) == vfb->Address(channel) && v->Format(channel) == vfb->Format(channel)) {
2430
if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
2431
nvfb = v;
2432
if (channel == RASTER_COLOR) {
2433
v->fb_stride = vfb->fb_stride;
2434
} else {
2435
v->z_stride = vfb->z_stride;
2436
}
2437
v->width = vfb->width;
2438
v->height = vfb->height;
2439
break;
2440
}
2441
}
2442
}
2443
2444
// Create a new fbo if none was found for the size
2445
if (!nvfb) {
2446
nvfb = new VirtualFramebuffer{};
2447
nvfb->fbo = nullptr;
2448
nvfb->fb_address = channel == RASTER_COLOR ? vfb->fb_address : 0;
2449
nvfb->fb_stride = channel == RASTER_COLOR ? vfb->fb_stride : 0;
2450
nvfb->z_address = channel == RASTER_DEPTH ? vfb->z_address : 0;
2451
nvfb->z_stride = channel == RASTER_DEPTH ? vfb->z_stride : 0;
2452
nvfb->width = vfb->width;
2453
nvfb->height = vfb->height;
2454
nvfb->renderWidth = vfb->bufferWidth;
2455
nvfb->renderHeight = vfb->bufferHeight;
2456
nvfb->renderScaleFactor = 1; // For readbacks we resize to the original size, of course.
2457
nvfb->bufferWidth = vfb->bufferWidth;
2458
nvfb->bufferHeight = vfb->bufferHeight;
2459
nvfb->fb_format = vfb->fb_format;
2460
nvfb->drawnWidth = vfb->drawnWidth;
2461
nvfb->drawnHeight = vfb->drawnHeight;
2462
2463
char name[64];
2464
snprintf(name, sizeof(name), "download_temp_%08x_%s", vfb->Address(channel), RasterChannelToString(channel));
2465
2466
// We always create a color-only framebuffer here - readbacks of depth convert to color while translating the values.
2467
nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, false, name });
2468
if (!nvfb->fbo) {
2469
ERROR_LOG(Log::FrameBuf, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
2470
delete nvfb;
2471
return nullptr;
2472
}
2473
bvfbs_.push_back(nvfb);
2474
} else {
2475
UpdateDownloadTempBuffer(nvfb);
2476
}
2477
2478
nvfb->usageFlags |= FB_USAGE_RENDER_COLOR;
2479
nvfb->last_frame_render = gpuStats.numFlips;
2480
nvfb->dirtyAfterDisplay = true;
2481
2482
return nvfb;
2483
}
2484
2485
void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) {
2486
if (currentRenderVfb_) {
2487
if ((currentRenderVfb_->usageFlags & FB_USAGE_DOWNLOAD_CLEAR) != 0) {
2488
// Already zeroed in memory.
2489
return;
2490
}
2491
}
2492
2493
if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) {
2494
return;
2495
}
2496
2497
u8 *addr = Memory::GetPointerWriteUnchecked(gstate.getFrameBufAddress());
2498
const int bpp = BufferFormatBytesPerPixel(gstate_c.framebufFormat);
2499
2500
u32 clearBits = clearColor;
2501
if (bpp == 2) {
2502
u16 clear16 = 0;
2503
switch (gstate_c.framebufFormat) {
2504
case GE_FORMAT_565: clear16 = RGBA8888toRGB565(clearColor); break;
2505
case GE_FORMAT_5551: clear16 = RGBA8888toRGBA5551(clearColor); break;
2506
case GE_FORMAT_4444: clear16 = RGBA8888toRGBA4444(clearColor); break;
2507
default: _dbg_assert_(0); break;
2508
}
2509
clearBits = clear16 | (clear16 << 16);
2510
}
2511
2512
const bool singleByteClear = (clearBits >> 16) == (clearBits & 0xFFFF) && (clearBits >> 24) == (clearBits & 0xFF);
2513
const int stride = gstate.FrameBufStride();
2514
const int width = x2 - x1;
2515
2516
const int byteStride = stride * bpp;
2517
const int byteWidth = width * bpp;
2518
for (int y = y1; y < y2; ++y) {
2519
NotifyMemInfo(MemBlockFlags::WRITE, gstate.getFrameBufAddress() + x1 * bpp + y * byteStride, byteWidth, "FramebufferClear");
2520
}
2521
2522
// Can use memset for simple cases. Often alpha is different and gums up the works.
2523
if (singleByteClear) {
2524
addr += x1 * bpp;
2525
for (int y = y1; y < y2; ++y) {
2526
memset(addr + y * byteStride, clearBits, byteWidth);
2527
}
2528
} else {
2529
// This will most often be true - rarely is the width not aligned.
2530
// TODO: We should really use non-temporal stores here to avoid the cache,
2531
// as it's unlikely that these bytes will be read.
2532
if ((width & 3) == 0 && (x1 & 3) == 0) {
2533
u64 val64 = clearBits | ((u64)clearBits << 32);
2534
int xstride = 8 / bpp;
2535
2536
u64 *addr64 = (u64 *)addr;
2537
const int stride64 = stride / xstride;
2538
const int x1_64 = x1 / xstride;
2539
const int x2_64 = x2 / xstride;
2540
for (int y = y1; y < y2; ++y) {
2541
for (int x = x1_64; x < x2_64; ++x) {
2542
addr64[y * stride64 + x] = val64;
2543
}
2544
}
2545
} else if (bpp == 4) {
2546
u32 *addr32 = (u32 *)addr;
2547
for (int y = y1; y < y2; ++y) {
2548
for (int x = x1; x < x2; ++x) {
2549
addr32[y * stride + x] = clearBits;
2550
}
2551
}
2552
} else if (bpp == 2) {
2553
u16 *addr16 = (u16 *)addr;
2554
for (int y = y1; y < y2; ++y) {
2555
for (int x = x1; x < x2; ++x) {
2556
addr16[y * stride + x] = (u16)clearBits;
2557
}
2558
}
2559
}
2560
}
2561
2562
if (currentRenderVfb_) {
2563
// The current content is in memory now, so update the flag.
2564
if (x1 == 0 && y1 == 0 && x2 >= currentRenderVfb_->width && y2 >= currentRenderVfb_->height) {
2565
currentRenderVfb_->usageFlags |= FB_USAGE_DOWNLOAD_CLEAR;
2566
currentRenderVfb_->memoryUpdated = true;
2567
}
2568
}
2569
}
2570
2571
bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
2572
if (!useBufferedRendering_) {
2573
return false;
2574
}
2575
2576
// Skip checking if there's no framebuffers in that area. Make a special exception for obvious transfers to depth buffer, see issue #17878
2577
bool dstDepthSwizzle = Memory::IsVRAMAddress(dstBasePtr) && ((dstBasePtr & 0x600000) == 0x600000);
2578
2579
if (!dstDepthSwizzle && !MayIntersectFramebufferColor(srcBasePtr) && !MayIntersectFramebufferColor(dstBasePtr)) {
2580
return false;
2581
}
2582
2583
BlockTransferRect dstRect{};
2584
BlockTransferRect srcRect{};
2585
2586
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
2587
bool srcBuffer = FindTransferFramebuffer(srcBasePtr, srcStride, srcX, srcY, width, height, bpp, false, &srcRect);
2588
bool dstBuffer = FindTransferFramebuffer(dstBasePtr, dstStride, dstX, dstY, width, height, bpp, true, &dstRect);
2589
2590
if (srcRect.channel == RASTER_DEPTH) {
2591
// Ignore the found buffer if it's not 16-bit - we create a new more suitable one instead.
2592
if (dstRect.channel == RASTER_COLOR && dstRect.vfb->fb_format == GE_FORMAT_8888) {
2593
dstBuffer = false;
2594
}
2595
}
2596
2597
if (!srcBuffer && dstBuffer && dstRect.channel == RASTER_DEPTH) {
2598
dstBuffer = true;
2599
}
2600
2601
if (srcBuffer && !dstBuffer) {
2602
// In here, we can't read from dstRect.
2603
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
2604
GetSkipGPUReadbackMode() == SkipGPUReadbackMode::COPY_TO_TEXTURE ||
2605
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
2606
Memory::IsVRAMAddress(srcRect.vfb->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
2607
GEBufferFormat ramFormat;
2608
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
2609
if (srcRect.channel == RASTER_COLOR) {
2610
if (bpp == 4) {
2611
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
2612
ramFormat = GE_FORMAT_8888;
2613
} else if (srcRect.vfb->fb_format != GE_FORMAT_8888) {
2614
// We guess that the game will interpret the data the same as it was in the source of the copy.
2615
// Seems like a likely good guess, and works in Test Drive Unlimited.
2616
ramFormat = srcRect.vfb->fb_format;
2617
} else {
2618
// No info left - just fall back to something. But this is definitely split pixel tricks.
2619
ramFormat = GE_FORMAT_5551;
2620
}
2621
dstRect.vfb = CreateRAMFramebuffer(dstBasePtr, width, height, dstStride, ramFormat);
2622
} else {
2623
dstRect.vfb = CreateRAMFramebuffer(dstBasePtr, width, height, dstStride, GE_FORMAT_DEPTH16);
2624
dstRect.x_bytes = 0;
2625
dstRect.w_bytes = 2 * width;
2626
dstRect.y = 0;
2627
dstRect.h = height;
2628
dstRect.channel = RASTER_DEPTH;
2629
}
2630
dstBuffer = true;
2631
}
2632
}
2633
2634
if (dstBuffer) {
2635
dstRect.vfb->last_frame_used = gpuStats.numFlips;
2636
// Mark the destination as fresh.
2637
if (dstRect.channel == RASTER_COLOR) {
2638
dstRect.vfb->colorBindSeq = GetBindSeqCount();
2639
} else {
2640
dstRect.vfb->depthBindSeq = GetBindSeqCount();
2641
}
2642
}
2643
2644
if (dstBuffer && srcBuffer) {
2645
if (srcRect.vfb && srcRect.vfb == dstRect.vfb && srcRect.channel == dstRect.channel) {
2646
// Transfer within the same buffer.
2647
// This is a simple case because there will be no format conversion or similar shenanigans needed.
2648
// However, the BPP might still mismatch, but in such a case we can convert the coordinates.
2649
if (srcX == dstX && srcY == dstY) {
2650
// Ignore, nothing to do. Tales of Phantasia X does this by accident.
2651
// Returning true to also skip the memory copy.
2652
return true;
2653
}
2654
2655
int buffer_bpp = BufferFormatBytesPerPixel(srcRect.vfb->Format(srcRect.channel));
2656
2657
if (bpp != buffer_bpp) {
2658
WARN_LOG_ONCE(intrabpp, Log::G3D, "Mismatched transfer bpp in intra-buffer block transfer. Was %d, expected %d.", bpp, buffer_bpp);
2659
// We just switch to using the buffer's bpp, since we've already converted the rectangle to byte offsets.
2660
bpp = buffer_bpp;
2661
}
2662
2663
WARN_LOG_N_TIMES(dstsrc, 5, Log::G3D, "Intra-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
2664
width, height, bpp,
2665
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride,
2666
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride);
2667
FlushBeforeCopy();
2668
// Some backends can handle blitting within a framebuffer. Others will just have to deal with it or ignore it, apparently.
2669
BlitFramebuffer(dstRect.vfb, dstX, dstY, srcRect.vfb, srcX, srcY, dstRect.w_bytes / bpp, dstRect.h, bpp, dstRect.channel, "Blit_IntraBufferBlockTransfer");
2670
RebindFramebuffer("rebind after intra block transfer");
2671
SetColorUpdated(dstRect.vfb, skipDrawReason);
2672
return true; // Skip the memory copy.
2673
}
2674
2675
// Straightforward blit between two same-format framebuffers.
2676
if (srcRect.vfb && srcRect.channel == dstRect.channel && srcRect.vfb->Format(srcRect.channel) == dstRect.vfb->Format(dstRect.channel)) {
2677
WARN_LOG_N_TIMES(dstnotsrc, 5, Log::G3D, "Inter-buffer %s block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d %s) -> %08x (x:%d y:%d stride:%d %s)",
2678
RasterChannelToString(srcRect.channel),
2679
width, height, bpp,
2680
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride, GeBufferFormatToString(srcRect.vfb->fb_format),
2681
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride, GeBufferFormatToString(dstRect.vfb->fb_format));
2682
2683
// Straight blit will do, but check the bpp, we might need to convert coordinates differently.
2684
int buffer_bpp = BufferFormatBytesPerPixel(srcRect.vfb->Format(srcRect.channel));
2685
if (bpp != buffer_bpp) {
2686
WARN_LOG_ONCE(intrabpp, Log::G3D, "Mismatched transfer bpp in inter-buffer block transfer. Was %d, expected %d.", bpp, buffer_bpp);
2687
// We just switch to using the buffer's bpp, since we've already converted the rectangle to byte offsets.
2688
bpp = buffer_bpp;
2689
}
2690
FlushBeforeCopy();
2691
BlitFramebuffer(dstRect.vfb, dstRect.x_bytes / bpp, dstRect.y, srcRect.vfb, srcRect.x_bytes / bpp, srcRect.y, srcRect.w_bytes / bpp, height, bpp, srcRect.channel, "Blit_InterBufferBlockTransfer");
2692
RebindFramebuffer("RebindFramebuffer - Inter-buffer block transfer");
2693
SetColorUpdated(dstRect.vfb, skipDrawReason);
2694
return true;
2695
}
2696
2697
// Getting to the more complex cases. Have not actually seen much of these yet.
2698
WARN_LOG_N_TIMES(blockformat, 5, Log::G3D, "Mismatched buffer formats in block transfer: %s->%s (%dx%d)",
2699
GeBufferFormatToString(srcRect.vfb->Format(srcRect.channel)), GeBufferFormatToString(dstRect.vfb->Format(dstRect.channel)),
2700
width, height);
2701
2702
// TODO
2703
2704
// No need to actually do the memory copy behind, probably.
2705
return true;
2706
2707
} else if (dstBuffer) {
2708
// Handle depth uploads directly here, and let's not bother copying the data. This is compat-flag-gated for now,
2709
// may generalize it when I remove the compat flag.
2710
if (dstRect.channel == RASTER_DEPTH) {
2711
WARN_LOG_ONCE(btud, Log::G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d %s)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp, RasterChannelToString(dstRect.channel));
2712
FlushBeforeCopy();
2713
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
2714
DrawPixels(dstRect.vfb, dstX, dstY, srcBase, dstRect.vfb->Format(dstRect.channel), srcStride * bpp / 2, (int)(dstRect.w_bytes / 2), dstRect.h, dstRect.channel, "BlockTransferCopy_DrawPixelsDepth");
2715
RebindFramebuffer("RebindFramebuffer - UploadDepth");
2716
return true;
2717
}
2718
2719
// Here we should just draw the pixels into the buffer. Return false to copy the memory first.
2720
// NotifyBlockTransferAfter will take care of the rest.
2721
return false;
2722
} else if (srcBuffer) {
2723
if (width == 48 && height == 48 && srcY == 224 && srcX == 432 && PSP_CoreParameter().compat.flags().TacticsOgreEliminateDebugReadback) {
2724
return false;
2725
}
2726
2727
WARN_LOG_N_TIMES(btd, 10, Log::G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
2728
width, height, bpp,
2729
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride,
2730
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride);
2731
FlushBeforeCopy();
2732
if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && !srcRect.vfb->memoryUpdated) {
2733
const int srcBpp = BufferFormatBytesPerPixel(srcRect.vfb->fb_format);
2734
const float srcXFactor = (float)bpp / srcBpp;
2735
const bool tooTall = srcY + srcRect.h > srcRect.vfb->bufferHeight;
2736
if (srcRect.h <= 0 || (tooTall && srcY != 0)) {
2737
WARN_LOG_ONCE(btdheight, Log::G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcRect.y, srcRect.h, srcRect.vfb->bufferHeight);
2738
} else {
2739
if (tooTall) {
2740
WARN_LOG_ONCE(btdheight, Log::G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcRect.y, srcRect.h, srcRect.vfb->bufferHeight);
2741
}
2742
ReadFramebufferToMemory(srcRect.vfb, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcRect.w_bytes * srcXFactor), srcRect.h, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
2743
srcRect.vfb->usageFlags = (srcRect.vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
2744
}
2745
}
2746
return false; // Let the bit copy happen
2747
} else {
2748
return false;
2749
}
2750
}
2751
2752
SkipGPUReadbackMode FramebufferManagerCommon::GetSkipGPUReadbackMode() {
2753
if (PSP_CoreParameter().compat.flags().ForceEnableGPUReadback) {
2754
return SkipGPUReadbackMode::NO_SKIP;
2755
} else {
2756
return (SkipGPUReadbackMode)g_Config.iSkipGPUReadbackMode;
2757
}
2758
}
2759
2760
void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
2761
// If it's a block transfer direct to the screen, and we're not using buffers, draw immediately.
2762
// We may still do a partial block draw below if this doesn't pass.
2763
if (!useBufferedRendering_ && dstStride >= 480 && width >= 480 && height == 272) {
2764
bool isPrevDisplayBuffer = PrevDisplayFramebufAddr() == dstBasePtr;
2765
bool isDisplayBuffer = CurrentDisplayFramebufAddr() == dstBasePtr;
2766
if (isPrevDisplayBuffer || isDisplayBuffer) {
2767
FlushBeforeCopy();
2768
DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), dstStride, displayFormat_);
2769
return;
2770
}
2771
}
2772
2773
if (MayIntersectFramebufferColor(srcBasePtr) || MayIntersectFramebufferColor(dstBasePtr)) {
2774
// TODO: Figure out how we can avoid repeating the search here.
2775
2776
BlockTransferRect dstRect{};
2777
BlockTransferRect srcRect{};
2778
2779
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
2780
bool srcBuffer = FindTransferFramebuffer(srcBasePtr, srcStride, srcX, srcY, width, height, bpp, false, &srcRect);
2781
bool dstBuffer = FindTransferFramebuffer(dstBasePtr, dstStride, dstX, dstY, width, height, bpp, true, &dstRect);
2782
2783
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
2784
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
2785
if ((!useBufferedRendering_ && currentRenderVfb_ != dstRect.vfb) || dstRect.vfb == nullptr) {
2786
return;
2787
}
2788
2789
if (dstBuffer && !srcBuffer) {
2790
WARN_LOG_ONCE(btu, Log::G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp);
2791
FlushBeforeCopy();
2792
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
2793
2794
int dstBpp = BufferFormatBytesPerPixel(dstRect.vfb->fb_format);
2795
float dstXFactor = (float)bpp / dstBpp;
2796
if (dstRect.w_bytes / bpp > dstRect.vfb->width || dstRect.h > dstRect.vfb->height) {
2797
// The buffer isn't big enough, and we have a clear hint of size. Resize.
2798
// This happens in Valkyrie Profile when uploading video at the ending.
2799
// Also happens to the CLUT framebuffer in the Burnout Dominator lens flare effect. See #16075
2800
ResizeFramebufFBO(dstRect.vfb, dstRect.w_bytes / bpp, dstRect.h, false, true);
2801
// Make sure we don't flop back and forth.
2802
dstRect.vfb->newWidth = std::max(dstRect.w_bytes / bpp, (int)dstRect.vfb->width);
2803
dstRect.vfb->newHeight = std::max(dstRect.h, (int)dstRect.vfb->height);
2804
dstRect.vfb->lastFrameNewSize = gpuStats.numFlips;
2805
// Resizing may change the viewport/etc.
2806
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
2807
}
2808
DrawPixels(dstRect.vfb, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstRect.vfb->fb_format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstRect.w_bytes / bpp * dstXFactor), dstRect.h, RASTER_COLOR, "BlockTransferCopy_DrawPixels");
2809
SetColorUpdated(dstRect.vfb, skipDrawReason);
2810
RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
2811
}
2812
}
2813
}
2814
2815
void FramebufferManagerCommon::SetSafeSize(u16 w, u16 h) {
2816
VirtualFramebuffer *vfb = currentRenderVfb_;
2817
if (vfb) {
2818
vfb->safeWidth = std::min(vfb->bufferWidth, std::max(vfb->safeWidth, w));
2819
vfb->safeHeight = std::min(vfb->bufferHeight, std::max(vfb->safeHeight, h));
2820
}
2821
}
2822
2823
void FramebufferManagerCommon::NotifyDisplayResized() {
2824
pixelWidth_ = PSP_CoreParameter().pixelWidth;
2825
pixelHeight_ = PSP_CoreParameter().pixelHeight;
2826
presentation_->UpdateDisplaySize(pixelWidth_, pixelHeight_);
2827
2828
INFO_LOG(Log::G3D, "FramebufferManagerCommon::NotifyDisplayResized: %dx%d", pixelWidth_, pixelHeight_);
2829
2830
// No drawing is allowed here. This includes anything that might potentially touch a command buffer, like creating images!
2831
// So we need to defer the post processing initialization.
2832
updatePostShaders_ = true;
2833
}
2834
2835
void FramebufferManagerCommon::NotifyRenderResized(int msaaLevel) {
2836
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
2837
2838
int w, h, scaleFactor;
2839
presentation_->CalculateRenderResolution(&w, &h, &scaleFactor, &postShaderIsUpscalingFilter_, &postShaderIsSupersampling_);
2840
PSP_CoreParameter().renderWidth = w;
2841
PSP_CoreParameter().renderHeight = h;
2842
PSP_CoreParameter().renderScaleFactor = scaleFactor;
2843
2844
if (UpdateRenderSize(msaaLevel)) {
2845
draw_->StopThreads();
2846
DestroyAllFBOs();
2847
draw_->StartThreads();
2848
}
2849
2850
// No drawing is allowed here. This includes anything that might potentially touch a command buffer, like creating images!
2851
// So we need to defer the post processing initialization.
2852
updatePostShaders_ = true;
2853
}
2854
2855
void FramebufferManagerCommon::NotifyConfigChanged() {
2856
updatePostShaders_ = true;
2857
}
2858
2859
void FramebufferManagerCommon::DestroyAllFBOs() {
2860
DiscardFramebufferCopy();
2861
currentRenderVfb_ = nullptr;
2862
displayFramebuf_ = nullptr;
2863
prevDisplayFramebuf_ = nullptr;
2864
prevPrevDisplayFramebuf_ = nullptr;
2865
2866
for (VirtualFramebuffer *vfb : vfbs_) {
2867
INFO_LOG(Log::FrameBuf, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
2868
DestroyFramebuf(vfb);
2869
}
2870
vfbs_.clear();
2871
2872
for (VirtualFramebuffer *vfb : bvfbs_) {
2873
DestroyFramebuf(vfb);
2874
}
2875
bvfbs_.clear();
2876
2877
for (auto &tempFB : tempFBOs_) {
2878
tempFB.second.fbo->Release();
2879
}
2880
tempFBOs_.clear();
2881
2882
for (auto &iter : fbosToDelete_) {
2883
iter->Release();
2884
}
2885
fbosToDelete_.clear();
2886
2887
for (auto &iter : drawPixelsCache_) {
2888
iter.tex->Release();
2889
}
2890
drawPixelsCache_.clear();
2891
}
2892
2893
static const char *TempFBOReasonToString(TempFBO reason) {
2894
switch (reason) {
2895
case TempFBO::DEPAL: return "depal";
2896
case TempFBO::BLIT: return "blit";
2897
case TempFBO::COPY: return "copy";
2898
case TempFBO::STENCIL: return "stencil";
2899
default: break;
2900
}
2901
return "";
2902
}
2903
2904
Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
2905
u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
2906
auto it = tempFBOs_.find(key);
2907
if (it != tempFBOs_.end()) {
2908
it->second.last_frame_used = gpuStats.numFlips;
2909
return it->second.fbo;
2910
}
2911
2912
bool z_stencil = reason == TempFBO::STENCIL;
2913
char name[128];
2914
snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_);
2915
2916
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
2917
if (!fbo) {
2918
return nullptr;
2919
}
2920
2921
const TempFBOInfo info = { fbo, gpuStats.numFlips };
2922
tempFBOs_[key] = info;
2923
return fbo;
2924
}
2925
2926
void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) const {
2927
auto checkFlag = [&](u16 flag, int last_frame) {
2928
if (vfb->usageFlags & flag) {
2929
const int age = frameLastFramebufUsed_ - last_frame;
2930
if (age > FBO_OLD_USAGE_FLAG) {
2931
vfb->usageFlags &= ~flag;
2932
}
2933
}
2934
};
2935
2936
checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
2937
checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
2938
checkFlag(FB_USAGE_RENDER_COLOR, vfb->last_frame_render);
2939
checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
2940
}
2941
2942
void FramebufferManagerCommon::ClearAllDepthBuffers() {
2943
for (auto vfb : vfbs_) {
2944
vfb->usageFlags |= FB_USAGE_INVALIDATE_DEPTH;
2945
}
2946
}
2947
2948
// We might also want to implement an asynchronous callback-style version of this. Would probably
2949
// only be possible to implement optimally on Vulkan, but on GL and D3D11 we could do pixel buffers
2950
// and read on the next frame, then call the callback.
2951
//
2952
// The main use cases for this are:
2953
// * GE debugging(in practice async will not matter because it will stall anyway.)
2954
// * Video file recording(would probably be great if it was async.)
2955
// * Screenshots(benefit slightly from async.)
2956
// * Save state screenshots(could probably be async but need to manage the stall.)
2957
bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxScaleFactor) {
2958
VirtualFramebuffer *vfb = currentRenderVfb_;
2959
if (!vfb || vfb->fb_address != fb_address) {
2960
vfb = ResolveVFB(fb_address, fb_stride, format);
2961
}
2962
2963
if (!vfb) {
2964
if (!Memory::IsValidAddress(fb_address))
2965
return false;
2966
// If there's no vfb and we're drawing there, must be memory?
2967
buffer = GPUDebugBuffer(Memory::GetPointerWriteUnchecked(fb_address), fb_stride, 512, format);
2968
return true;
2969
}
2970
2971
int w = vfb->renderWidth, h = vfb->renderHeight;
2972
2973
Draw::Framebuffer *bound = nullptr;
2974
2975
if (vfb->fbo) {
2976
if (maxScaleFactor > 0 && vfb->renderWidth > vfb->width * maxScaleFactor) {
2977
w = vfb->width * maxScaleFactor;
2978
h = vfb->height * maxScaleFactor;
2979
2980
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::COPY, w, h);
2981
VirtualFramebuffer tempVfb = *vfb;
2982
tempVfb.fbo = tempFBO;
2983
tempVfb.bufferWidth = vfb->width;
2984
tempVfb.bufferHeight = vfb->height;
2985
tempVfb.renderWidth = w;
2986
tempVfb.renderHeight = h;
2987
tempVfb.renderScaleFactor = maxScaleFactor;
2988
BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, RASTER_COLOR, "Blit_GetFramebuffer");
2989
2990
bound = tempFBO;
2991
} else {
2992
bound = vfb->fbo;
2993
}
2994
}
2995
2996
if (!useBufferedRendering_) {
2997
// Safety check.
2998
w = std::min(w, PSP_CoreParameter().pixelWidth);
2999
h = std::min(h, PSP_CoreParameter().pixelHeight);
3000
}
3001
3002
// TODO: Maybe should handle flipY inside CopyFramebufferToMemorySync somehow?
3003
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3004
buffer.Allocate(w, h, GE_FORMAT_8888, flipY);
3005
bool retval = draw_->CopyFramebufferToMemory(bound, Draw::Aspect::COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetFramebuffer");
3006
3007
// Don't need to increment gpu stats for readback count here, this is a debugger-only function.
3008
3009
// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
3010
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
3011
// We may have blitted to a temp FBO.
3012
RebindFramebuffer("RebindFramebuffer - GetFramebuffer");
3013
return retval;
3014
}
3015
3016
bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
3017
VirtualFramebuffer *vfb = currentRenderVfb_;
3018
if (!vfb) {
3019
vfb = GetVFBAt(fb_address);
3020
}
3021
3022
if (!vfb) {
3023
if (!Memory::IsValidAddress(z_address))
3024
return false;
3025
// If there's no vfb and we're drawing there, must be memory?
3026
buffer = GPUDebugBuffer(Memory::GetPointerWriteUnchecked(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
3027
return true;
3028
}
3029
3030
int w = vfb->renderWidth;
3031
int h = vfb->renderHeight;
3032
if (!useBufferedRendering_) {
3033
// Safety check.
3034
w = std::min(w, PSP_CoreParameter().pixelWidth);
3035
h = std::min(h, PSP_CoreParameter().pixelHeight);
3036
}
3037
3038
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3039
3040
// Old code
3041
if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
3042
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
3043
} else {
3044
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
3045
}
3046
// No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
3047
bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::Aspect::DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetDepthBuffer");
3048
if (!retval) {
3049
// Try ReadbackDepthbufferSync, in case GLES.
3050
buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY);
3051
retval = ReadbackDepthbuffer(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h, Draw::ReadbackMode::BLOCK);
3052
}
3053
3054
// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
3055
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
3056
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3057
RebindFramebuffer("RebindFramebuffer - GetDepthbuffer");
3058
return retval;
3059
}
3060
3061
bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
3062
VirtualFramebuffer *vfb = currentRenderVfb_;
3063
if (!vfb) {
3064
vfb = GetVFBAt(fb_address);
3065
}
3066
3067
if (!vfb) {
3068
if (!Memory::IsValidAddress(fb_address))
3069
return false;
3070
// If there's no vfb and we're drawing there, must be memory?
3071
// TODO: Actually get the stencil.
3072
buffer = GPUDebugBuffer(Memory::GetPointerWrite(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888);
3073
return true;
3074
}
3075
3076
int w = vfb->renderWidth;
3077
int h = vfb->renderHeight;
3078
if (!useBufferedRendering_) {
3079
// Safety check.
3080
w = std::min(w, PSP_CoreParameter().pixelWidth);
3081
h = std::min(h, PSP_CoreParameter().pixelHeight);
3082
}
3083
3084
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3085
// No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway.
3086
buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
3087
bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::Aspect::STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetStencilbuffer");
3088
if (!retval) {
3089
retval = ReadbackStencilbuffer(vfb->fbo, 0, 0, w, h, buffer.GetData(), w, Draw::ReadbackMode::BLOCK);
3090
}
3091
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3092
RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
3093
return retval;
3094
}
3095
3096
bool GetOutputFramebuffer(Draw::DrawContext *draw, GPUDebugBuffer &buffer) {
3097
int w, h;
3098
draw->GetFramebufferDimensions(nullptr, &w, &h);
3099
Draw::DataFormat fmt = draw->PreferredFramebufferReadbackFormat(nullptr);
3100
// Ignore preferred formats other than BGRA.
3101
if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
3102
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
3103
3104
bool flipped = g_Config.iGPUBackend == (int)GPUBackend::OPENGL;
3105
3106
buffer.Allocate(w, h, fmt == Draw::DataFormat::R8G8B8A8_UNORM ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA, flipped);
3107
return draw->CopyFramebufferToMemory(nullptr, Draw::Aspect::COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetOutputFramebuffer");
3108
}
3109
3110
bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
3111
bool retval = ::GetOutputFramebuffer(draw_, buffer);
3112
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3113
RebindFramebuffer("RebindFramebuffer - GetOutputFramebuffer");
3114
return retval;
3115
}
3116
3117
// This reads a channel of a framebuffer into emulated PSP VRAM, taking care of scaling down as needed.
3118
//
3119
// Color conversion is currently done on CPU but should theoretically be done on GPU.
3120
// (Except using the GPU might cause problems because of various implementations'
3121
// dithering behavior and games that expect exact colors like Danganronpa, so we
3122
// can't entirely be rid of the CPU path.) -- unknown
3123
void FramebufferManagerCommon::ReadbackFramebuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode) {
3124
if (w <= 0 || h <= 0) {
3125
ERROR_LOG(Log::FrameBuf, "Bad inputs to ReadbackFramebufferSync: %d %d %d %d", x, y, w, h);
3126
return;
3127
}
3128
3129
// Note that ReadbackDepthBufferSync can stretch on its own while converting data format, so we don't need to downscale in that case.
3130
if (vfb->renderScaleFactor == 1 || channel == RASTER_DEPTH) {
3131
// No need to stretch-blit
3132
} else {
3133
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, channel);
3134
if (nvfb) {
3135
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, channel, "Blit_ReadFramebufferToMemory");
3136
vfb = nvfb;
3137
}
3138
}
3139
3140
const u32 fb_address = channel == RASTER_COLOR ? vfb->fb_address : vfb->z_address;
3141
3142
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
3143
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
3144
3145
int stride = channel == RASTER_COLOR ? vfb->fb_stride : vfb->z_stride;
3146
3147
const int dstByteOffset = (y * stride + x) * dstBpp;
3148
// Leave the gap between the end of the last line and the full stride.
3149
// This is only used for the NotifyMemInfo range.
3150
const int dstSize = ((h - 1) * stride + w) * dstBpp;
3151
3152
if (!Memory::IsValidRange(fb_address + dstByteOffset, dstSize)) {
3153
ERROR_LOG_REPORT(Log::G3D, "ReadbackFramebufferSync would write outside of memory, ignoring");
3154
return;
3155
}
3156
3157
u8 *destPtr = Memory::GetPointerWriteUnchecked(fb_address + dstByteOffset);
3158
3159
// We always need to convert from the framebuffer native format.
3160
// Right now that's always 8888.
3161
DEBUG_LOG(Log::FrameBuf, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
3162
3163
if (channel == RASTER_DEPTH) {
3164
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
3165
ReadbackDepthbuffer(vfb->fbo,
3166
x * vfb->renderScaleFactor, y * vfb->renderScaleFactor,
3167
w * vfb->renderScaleFactor, h * vfb->renderScaleFactor, (uint16_t *)destPtr, stride, w, h, mode);
3168
} else {
3169
draw_->CopyFramebufferToMemory(vfb->fbo, channel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, mode, "ReadbackFramebufferSync");
3170
}
3171
3172
char tag[128];
3173
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
3174
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
3175
3176
if (mode == Draw::ReadbackMode::BLOCK) {
3177
gpuStats.numBlockingReadbacks++;
3178
} else {
3179
gpuStats.numReadbacks++;
3180
}
3181
}
3182
3183
bool FramebufferManagerCommon::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride, Draw::ReadbackMode mode) {
3184
return draw_->CopyFramebufferToMemory(fbo, Draw::Aspect::DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, mode, "ReadbackStencilbufferSync");
3185
}
3186
3187
void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode) {
3188
if (!vfb || !vfb->fbo) {
3189
return;
3190
}
3191
3192
// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
3193
if (x + w >= vfb->bufferWidth) {
3194
w = vfb->bufferWidth - x;
3195
}
3196
if (gameUsesSequentialCopies_) {
3197
// Ignore the x/y/etc., read the entire thing. See below.
3198
x = 0;
3199
y = 0;
3200
w = vfb->width;
3201
h = vfb->height;
3202
vfb->memoryUpdated = true;
3203
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
3204
} else if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
3205
// Mark it as fully downloaded until next render to it.
3206
if (channel == RASTER_COLOR)
3207
vfb->memoryUpdated = true;
3208
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
3209
} else {
3210
// Let's try to set the flag eventually, if the game copies a lot.
3211
// Some games (like Grand Knights History) copy subranges very frequently.
3212
const static int FREQUENT_SEQUENTIAL_COPIES = 3;
3213
static int frameLastCopy = 0;
3214
static u32 bufferLastCopy = 0;
3215
static int copiesThisFrame = 0;
3216
if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
3217
frameLastCopy = gpuStats.numFlips;
3218
bufferLastCopy = vfb->fb_address;
3219
copiesThisFrame = 0;
3220
}
3221
if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
3222
gameUsesSequentialCopies_ = true;
3223
}
3224
}
3225
3226
// This handles any required stretching internally.
3227
ReadbackFramebuffer(vfb, x, y, w, h, channel, mode);
3228
3229
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3230
textureCache_->ForgetLastTexture();
3231
RebindFramebuffer("RebindFramebuffer - ReadFramebufferToMemory");
3232
}
3233
3234
void FramebufferManagerCommon::FlushBeforeCopy() {
3235
drawEngine_->FlushQueuedDepth();
3236
// Flush anything not yet drawn before blitting, downloading, or uploading.
3237
// This might be a stalled list, or unflushed before a block transfer, etc.
3238
// Only bother if any draws are pending.
3239
if (drawEngine_->GetNumDrawCalls() > 0) {
3240
// TODO: It's really bad that we are calling SetRenderFramebuffer here with
3241
// all the irrelevant state checking it'll use to decide what to do. Should
3242
// do something more focused here.
3243
bool changed;
3244
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
3245
drawEngine_->Flush();
3246
}
3247
}
3248
3249
// TODO: Replace with with depal, reading the palette from the texture on the GPU directly.
3250
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
3251
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
3252
if (vfb && vfb->fb_stride != 0) {
3253
const u32 bpp = BufferFormatBytesPerPixel(vfb->fb_format);
3254
int x = 0;
3255
int y = 0;
3256
int pixels = loadBytes / bpp;
3257
// The height will be 1 for each stride or part thereof.
3258
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
3259
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
3260
3261
if (w == 0 || h > 1) {
3262
// Exactly aligned, or more than one row.
3263
w = std::min(vfb->fb_stride, vfb->width);
3264
}
3265
3266
// We might still have a pending draw to the fb in question, flush if so.
3267
FlushBeforeCopy();
3268
3269
// No need to download if we already have it.
3270
if (w > 0 && h > 0 && !vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) {
3271
// We intentionally don't try to optimize into a full download here - we don't want to over download.
3272
3273
// CLUT framebuffers are often incorrectly estimated in size.
3274
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
3275
vfb->memoryUpdated = true;
3276
}
3277
vfb->clutUpdatedBytes = loadBytes;
3278
3279
// This function now handles scaling down internally.
3280
ReadbackFramebuffer(vfb, x, y, w, h, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
3281
3282
textureCache_->ForgetLastTexture();
3283
RebindFramebuffer("RebindFramebuffer - DownloadFramebufferForClut");
3284
}
3285
}
3286
}
3287
3288
void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
3289
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3290
shaderManager_->DirtyLastShader();
3291
// Needed for D3D11 to run validation clean. I don't think it's actually an issue.
3292
// textureCache_->ForgetLastTexture();
3293
if (currentRenderVfb_ && currentRenderVfb_->fbo) {
3294
draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
3295
} else {
3296
// This can happen (like it does in Parappa) when a frame starts with copies instead of rendering.
3297
// Let's do nothing and assume it'll take care of itself.
3298
}
3299
}
3300
3301
std::vector<const VirtualFramebuffer *> FramebufferManagerCommon::GetFramebufferList() const {
3302
std::vector<const VirtualFramebuffer *> list;
3303
for (auto vfb : vfbs_) {
3304
list.push_back(vfb);
3305
}
3306
return list;
3307
}
3308
3309
template <typename T>
3310
static void DoRelease(T *&obj) {
3311
if (obj)
3312
obj->Release();
3313
obj = nullptr;
3314
}
3315
3316
void FramebufferManagerCommon::ReleasePipelines() {
3317
for (int i = 0; i < ARRAY_SIZE(reinterpretFromTo_); i++) {
3318
for (int j = 0; j < ARRAY_SIZE(reinterpretFromTo_); j++) {
3319
DoRelease(reinterpretFromTo_[i][j]);
3320
}
3321
}
3322
DoRelease(stencilWriteSampler_);
3323
DoRelease(stencilWritePipeline_);
3324
DoRelease(stencilReadbackSampler_);
3325
DoRelease(stencilReadbackPipeline_);
3326
DoRelease(depthReadbackSampler_);
3327
DoRelease(depthReadbackPipeline_);
3328
DoRelease(draw2DPipelineCopyColor_);
3329
DoRelease(draw2DPipelineColorRect2Lin_);
3330
DoRelease(draw2DPipelineCopyDepth_);
3331
DoRelease(draw2DPipelineEncodeDepth_);
3332
DoRelease(draw2DPipeline565ToDepth_);
3333
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
3334
}
3335
3336
void FramebufferManagerCommon::DeviceLost() {
3337
DestroyAllFBOs();
3338
3339
presentation_->DeviceLost();
3340
draw2D_.DeviceLost();
3341
3342
ReleasePipelines();
3343
3344
draw_ = nullptr;
3345
}
3346
3347
void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
3348
draw_ = draw;
3349
draw2D_.DeviceRestore(draw_);
3350
presentation_->DeviceRestore(draw_);
3351
}
3352
3353
void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) {
3354
// Will be drawn as a strip.
3355
Draw2DVertex coord[4] = {
3356
{x, y, u0, v0},
3357
{x + w, y, u1, v0},
3358
{x + w, y + h, u1, v1},
3359
{x, y + h, u0, v1},
3360
};
3361
3362
if (uvRotation != ROTATION_LOCKED_HORIZONTAL) {
3363
float temp[8];
3364
int rotation = 0;
3365
switch (uvRotation) {
3366
case ROTATION_LOCKED_HORIZONTAL180: rotation = 2; break;
3367
case ROTATION_LOCKED_VERTICAL: rotation = 1; break;
3368
case ROTATION_LOCKED_VERTICAL180: rotation = 3; break;
3369
}
3370
for (int i = 0; i < 4; i++) {
3371
temp[i * 2] = coord[((i + rotation) & 3)].u;
3372
temp[i * 2 + 1] = coord[((i + rotation) & 3)].v;
3373
}
3374
3375
for (int i = 0; i < 4; i++) {
3376
coord[i].u = temp[i * 2];
3377
coord[i].v = temp[i * 2 + 1];
3378
}
3379
}
3380
3381
const float invDestW = 2.0f / destW;
3382
const float invDestH = 2.0f / destH;
3383
for (int i = 0; i < 4; i++) {
3384
coord[i].x = coord[i].x * invDestW - 1.0f;
3385
coord[i].y = coord[i].y * invDestH - 1.0f;
3386
}
3387
3388
if ((flags & DRAWTEX_TO_BACKBUFFER) && g_display.rotation != DisplayRotation::ROTATE_0) {
3389
for (int i = 0; i < 4; i++) {
3390
// backwards notation, should fix that...
3391
Lin::Vec3 pos = Lin::Vec3(coord[i].x, coord[i].y, 0.0);
3392
pos = pos * g_display.rot_matrix;
3393
coord[i].x = pos.x;
3394
coord[i].y = pos.y;
3395
}
3396
}
3397
3398
// Rearrange to strip form.
3399
std::swap(coord[2], coord[3]);
3400
3401
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline((flags & DRAWTEX_DEPTH) ? DRAW2D_ENCODE_R16_TO_DEPTH : DRAW2D_COPY_COLOR));
3402
3403
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3404
}
3405
3406
void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag) {
3407
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
3408
// This can happen if they recently switched from non-buffered.
3409
if (useBufferedRendering_) {
3410
// Just bind the back buffer for rendering, forget about doing anything else as we're in a weird state.
3411
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "BlitFramebuffer");
3412
}
3413
return;
3414
}
3415
3416
if (channel == RASTER_DEPTH && !draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
3417
// Can't do anything :(
3418
return;
3419
}
3420
3421
// Perform a little bit of clipping first.
3422
// Block transfer coords are unsigned so I don't think we need to clip on the left side.. Although there are
3423
// other uses for BlitFramebuffer.
3424
if (dstX + w > dst->bufferWidth) {
3425
w -= dstX + w - dst->bufferWidth;
3426
}
3427
if (dstY + h > dst->bufferHeight) {
3428
h -= dstY + h - dst->bufferHeight;
3429
}
3430
if (srcX + w > src->bufferWidth) {
3431
w -= srcX + w - src->bufferWidth;
3432
}
3433
if (srcY + h > src->bufferHeight) {
3434
h -= srcY + h - src->bufferHeight;
3435
}
3436
3437
if (w <= 0 || h <= 0) {
3438
// The whole rectangle got clipped.
3439
return;
3440
}
3441
3442
bool useBlit = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferBlitSupported : false;
3443
bool useCopy = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferCopySupported : false;
3444
if (dst == currentRenderVfb_ || dst->fbo->MultiSampleLevel() != 0 || src->fbo->MultiSampleLevel() != 0) {
3445
// If already bound, using either a blit or a copy is unlikely to be an optimization.
3446
// So we're gonna use a raster draw instead. Also multisampling has problems with copies currently.
3447
useBlit = false;
3448
useCopy = false;
3449
}
3450
3451
float srcXFactor = src->renderScaleFactor;
3452
float srcYFactor = src->renderScaleFactor;
3453
const int srcBpp = BufferFormatBytesPerPixel(src->Format(channel));
3454
if (srcBpp != bpp && bpp != 0) {
3455
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
3456
srcXFactor = (srcXFactor * bpp) / srcBpp;
3457
}
3458
int srcX1 = srcX * srcXFactor;
3459
int srcX2 = (srcX + w) * srcXFactor;
3460
int srcY1 = srcY * srcYFactor;
3461
int srcY2 = (srcY + h) * srcYFactor;
3462
3463
float dstXFactor = dst->renderScaleFactor;
3464
float dstYFactor = dst->renderScaleFactor;
3465
const int dstBpp = BufferFormatBytesPerPixel(dst->Format(channel));
3466
if (dstBpp != bpp && bpp != 0) {
3467
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
3468
dstXFactor = (dstXFactor * bpp) / dstBpp;
3469
}
3470
int dstX1 = dstX * dstXFactor;
3471
int dstX2 = (dstX + w) * dstXFactor;
3472
int dstY1 = dstY * dstYFactor;
3473
int dstY2 = (dstY + h) * dstYFactor;
3474
3475
if (src == dst && srcX == dstX && srcY == dstY) {
3476
// Let's just skip a copy where the destination is equal to the source.
3477
WARN_LOG_REPORT_ONCE(blitSame, Log::G3D, "Skipped blit with equal dst and src");
3478
return;
3479
}
3480
3481
if (useCopy) {
3482
// glBlitFramebuffer can clip, but glCopyImageSubData is more restricted.
3483
// In case the src goes outside, we just skip the optimization in that case.
3484
const bool sameSize = dstX2 - dstX1 == srcX2 - srcX1 && dstY2 - dstY1 == srcY2 - srcY1;
3485
const bool srcInsideBounds = srcX2 <= src->renderWidth && srcY2 <= src->renderHeight;
3486
const bool dstInsideBounds = dstX2 <= dst->renderWidth && dstY2 <= dst->renderHeight;
3487
const bool xOverlap = src == dst && srcX2 > dstX1 && srcX1 < dstX2;
3488
const bool yOverlap = src == dst && srcY2 > dstY1 && srcY1 < dstY2;
3489
if (sameSize && srcInsideBounds && dstInsideBounds && !(xOverlap && yOverlap)) {
3490
draw_->CopyFramebufferImage(src->fbo, 0, srcX1, srcY1, 0, dst->fbo, 0, dstX1, dstY1, 0, dstX2 - dstX1, dstY2 - dstY1, 1,
3491
channel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, tag);
3492
return;
3493
}
3494
}
3495
3496
if (useBlit) {
3497
draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2,
3498
channel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, Draw::FB_BLIT_NEAREST, tag);
3499
} else {
3500
Draw2DPipeline *pipeline = Get2DPipeline(channel == RASTER_COLOR ? DRAW2D_COPY_COLOR : DRAW2D_COPY_DEPTH);
3501
Draw::Framebuffer *srcFBO = src->fbo;
3502
if (src == dst) {
3503
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::BLIT, src->renderWidth, src->renderHeight);
3504
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, tag);
3505
srcFBO = tempFBO;
3506
}
3507
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, tag);
3508
}
3509
3510
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3511
3512
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3513
}
3514
3515
// The input is raw pixel coordinates, scale not taken into account.
3516
void FramebufferManagerCommon::BlitUsingRaster(
3517
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
3518
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
3519
bool linearFilter,
3520
int scaleFactor,
3521
Draw2DPipeline *pipeline, const char *tag) {
3522
3523
if (pipeline->info.writeChannel == RASTER_DEPTH) {
3524
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
3525
}
3526
3527
int destW, destH, srcW, srcH;
3528
draw_->GetFramebufferDimensions(src, &srcW, &srcH);
3529
draw_->GetFramebufferDimensions(dest, &destW, &destH);
3530
3531
// Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily).
3532
draw_->BindTexture(0, nullptr);
3533
// This will get optimized away in case it's already bound (in VK and GL at least..)
3534
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
3535
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, Draw::ALL_LAYERS);
3536
3537
if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) {
3538
// We overwrite the whole channel of the framebuffer, so we can invalidate the current contents.
3539
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_LOAD, pipeline->info.writeChannel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT);
3540
}
3541
3542
Draw::Viewport viewport{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
3543
draw_->SetViewport(viewport);
3544
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
3545
3546
draw2D_.Blit(pipeline, srcX1, srcY1, srcX2, srcY2, destX1, destY1, destX2, destY2, (float)srcW, (float)srcH, (float)destW, (float)destH, linearFilter, scaleFactor);
3547
3548
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3549
}
3550
3551
int FramebufferManagerCommon::GetFramebufferLayers() const {
3552
int layers = 1;
3553
if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) {
3554
layers = 2;
3555
}
3556
return layers;
3557
}
3558
3559
VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(VirtualFramebuffer *src, GEBufferFormat newFormat) {
3560
// Look for an identical framebuffer with the new format
3561
_dbg_assert_(src->fb_format != newFormat);
3562
3563
VirtualFramebuffer *vfb = nullptr;
3564
for (auto dest : vfbs_) {
3565
if (dest == src) {
3566
continue;
3567
}
3568
3569
// Sanity check for things that shouldn't exist.
3570
if (dest->fb_address == src->fb_address && dest->fb_format == src->fb_format && dest->fb_stride == src->fb_stride) {
3571
_dbg_assert_msg_(false, "illegal clone of src found");
3572
}
3573
3574
if (dest->fb_address == src->fb_address && dest->FbStrideInBytes() == src->FbStrideInBytes() && dest->fb_format == newFormat) {
3575
vfb = dest;
3576
break;
3577
}
3578
}
3579
3580
if (!vfb) {
3581
// Create a clone!
3582
vfb = new VirtualFramebuffer();
3583
*vfb = *src; // Copies everything, but watch out! Can't copy fbo.
3584
3585
// Adjust width by bpp.
3586
float widthFactor = (float)BufferFormatBytesPerPixel(vfb->fb_format) / (float)BufferFormatBytesPerPixel(newFormat);
3587
3588
vfb->width *= widthFactor;
3589
vfb->bufferWidth *= widthFactor;
3590
vfb->renderWidth *= widthFactor;
3591
vfb->drawnWidth *= widthFactor;
3592
vfb->newWidth *= widthFactor;
3593
vfb->safeWidth *= widthFactor;
3594
3595
vfb->fb_format = newFormat;
3596
// stride stays the same since it's in pixels.
3597
3598
WARN_LOG(Log::FrameBuf, "Creating %s clone of %08x/%08x/%s (%dx%d -> %dx%d)", GeBufferFormatToString(newFormat), src->fb_address, src->z_address, GeBufferFormatToString(src->fb_format), src->width, src->height, vfb->width, vfb->height);
3599
3600
char tag[128];
3601
FormatFramebufferName(vfb, tag, sizeof(tag));
3602
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
3603
vfbs_.push_back(vfb);
3604
}
3605
3606
// OK, now resolve it so we can texture from it.
3607
// This will do any necessary reinterprets.
3608
CopyToColorFromOverlappingFramebuffers(vfb);
3609
// Now we consider the resolved one the latest at the address (though really, we could make them equivalent?).
3610
vfb->colorBindSeq = GetBindSeqCount();
3611
return vfb;
3612
}
3613
3614
static void ApplyKillzoneFramebufferSplit(FramebufferHeuristicParams *params, int *drawing_width) {
3615
// Detect whether we're rendering to the margin.
3616
bool margin;
3617
if ((params->scissorRight - params->scissorLeft) == 32) {
3618
// Title screen has this easy case. It also uses non-through verts, so lucky for us that we have this.
3619
margin = true;
3620
} else if (params->scissorRight == 480) {
3621
margin = false;
3622
} else {
3623
// Go deep, look at the vertices. Killzone-specific, of course.
3624
margin = false;
3625
if ((gstate.vertType & 0xFFFFFF) == 0x00800102) { // through, u16, s16
3626
u16 *vdata = (u16 *)Memory::GetPointerUnchecked(gstate_c.vertexAddr);
3627
int v0PosU = vdata[0];
3628
int v0PosX = vdata[2];
3629
if (v0PosX >= 480 && v0PosU < 480) {
3630
// Texturing from surface, writing to margin
3631
margin = true;
3632
}
3633
}
3634
3635
// TODO: Implement this for Burnout Dominator. It has to handle self-reads inside
3636
// the margin framebuffer though, so framebuffer copies are still needed, just smaller.
3637
// It uses 0x0080019f (through, float texcoords, ABGR 8888 colors, float positions).
3638
}
3639
3640
if (margin) {
3641
gstate_c.SetCurRTOffset(-480, 0);
3642
// Modify the fb_address and z_address too to avoid matching below.
3643
params->fb_address += 480 * 4;
3644
params->z_address += 480 * 2;
3645
*drawing_width = 32;
3646
} else {
3647
gstate_c.SetCurRTOffset(0, 0);
3648
*drawing_width = 480;
3649
}
3650
}
3651
3652