Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/HW/GranularMixer.cpp
3658 views
1
// Copyright 2008 Dolphin Emulator Project
2
// SPDX-License-Identifier: GPL-2.0-or-later
3
4
#include "Core/HW/GranularMixer.h"
5
6
#include <chrono>
7
#include <algorithm>
8
#include <cmath>
9
#include <cstring>
10
11
#include "Common/CommonTypes.h"
12
#include "Common/Log.h"
13
#include "Common/Math/math_util.h"
14
#include "Common/Swap.h"
15
#include "Core/HW/Display.h"
16
#include "Core/Core.h"
17
#include "Core/System.h"
18
#include "Core/Util/AudioFormat.h" // for clamp_u16
19
20
// Something like a gaussian.
21
static const float g_GranuleWindow[256] = {
22
0.0000016272f, 0.0000050749f, 0.0000113187f, 0.0000216492f, 0.0000377350f, 0.0000616906f,
23
0.0000961509f, 0.0001443499f, 0.0002102045f, 0.0002984010f, 0.0004144844f, 0.0005649486f,
24
0.0007573262f, 0.0010002765f, 0.0013036694f, 0.0016786636f, 0.0021377783f, 0.0026949534f,
25
0.0033656000f, 0.0041666352f, 0.0051165029f, 0.0062351752f, 0.0075441359f, 0.0090663409f,
26
0.0108261579f, 0.0128492811f, 0.0151626215f, 0.0177941726f, 0.0207728499f, 0.0241283062f,
27
0.0278907219f, 0.0320905724f, 0.0367583739f, 0.0419244083f, 0.0476184323f, 0.0538693708f,
28
0.0607049996f, 0.0681516192f, 0.0762337261f, 0.0849736833f, 0.0943913952f, 0.1045039915f,
29
0.1153255250f, 0.1268666867f, 0.1391345431f, 0.1521323012f, 0.1658591025f, 0.1803098534f,
30
0.1954750915f, 0.2113408944f, 0.2278888303f, 0.2450959552f, 0.2629348550f, 0.2813737361f,
31
0.3003765625f, 0.3199032396f, 0.3399098438f, 0.3603488941f, 0.3811696664f, 0.4023185434f,
32
0.4237393998f, 0.4453740162f, 0.4671625177f, 0.4890438330f, 0.5109561670f, 0.5328374823f,
33
0.5546259838f, 0.5762606002f, 0.5976814566f, 0.6188303336f, 0.6396511059f, 0.6600901562f,
34
0.6800967604f, 0.6996234375f, 0.7186262639f, 0.7370651450f, 0.7549040448f, 0.7721111697f,
35
0.7886591056f, 0.8045249085f, 0.8196901466f, 0.8341408975f, 0.8478676988f, 0.8608654569f,
36
0.8731333133f, 0.8846744750f, 0.8954960085f, 0.9056086048f, 0.9150263167f, 0.9237662739f,
37
0.9318483808f, 0.9392950004f, 0.9461306292f, 0.9523815677f, 0.9580755917f, 0.9632416261f,
38
0.9679094276f, 0.9721092781f, 0.9758716938f, 0.9792271501f, 0.9822058274f, 0.9848373785f,
39
0.9871507189f, 0.9891738421f, 0.9909336591f, 0.9924558641f, 0.9937648248f, 0.9948834971f,
40
0.9958333648f, 0.9966344000f, 0.9973050466f, 0.9978622217f, 0.9983213364f, 0.9986963306f,
41
0.9989997235f, 0.9992426738f, 0.9994350514f, 0.9995855156f, 0.9997015990f, 0.9997897955f,
42
0.9998556501f, 0.9999038491f, 0.9999383094f, 0.9999622650f, 0.9999783508f, 0.9999886813f,
43
0.9999949251f, 0.9999983728f, 0.9999983728f, 0.9999949251f, 0.9999886813f, 0.9999783508f,
44
0.9999622650f, 0.9999383094f, 0.9999038491f, 0.9998556501f, 0.9997897955f, 0.9997015990f,
45
0.9995855156f, 0.9994350514f, 0.9992426738f, 0.9989997235f, 0.9986963306f, 0.9983213364f,
46
0.9978622217f, 0.9973050466f, 0.9966344000f, 0.9958333648f, 0.9948834971f, 0.9937648248f,
47
0.9924558641f, 0.9909336591f, 0.9891738421f, 0.9871507189f, 0.9848373785f, 0.9822058274f,
48
0.9792271501f, 0.9758716938f, 0.9721092781f, 0.9679094276f, 0.9632416261f, 0.9580755917f,
49
0.9523815677f, 0.9461306292f, 0.9392950004f, 0.9318483808f, 0.9237662739f, 0.9150263167f,
50
0.9056086048f, 0.8954960085f, 0.8846744750f, 0.8731333133f, 0.8608654569f, 0.8478676988f,
51
0.8341408975f, 0.8196901466f, 0.8045249085f, 0.7886591056f, 0.7721111697f, 0.7549040448f,
52
0.7370651450f, 0.7186262639f, 0.6996234375f, 0.6800967604f, 0.6600901562f, 0.6396511059f,
53
0.6188303336f, 0.5976814566f, 0.5762606002f, 0.5546259838f, 0.5328374823f, 0.5109561670f,
54
0.4890438330f, 0.4671625177f, 0.4453740162f, 0.4237393998f, 0.4023185434f, 0.3811696664f,
55
0.3603488941f, 0.3399098438f, 0.3199032396f, 0.3003765625f, 0.2813737361f, 0.2629348550f,
56
0.2450959552f, 0.2278888303f, 0.2113408944f, 0.1954750915f, 0.1803098534f, 0.1658591025f,
57
0.1521323012f, 0.1391345431f, 0.1268666867f, 0.1153255250f, 0.1045039915f, 0.0943913952f,
58
0.0849736833f, 0.0762337261f, 0.0681516192f, 0.0607049996f, 0.0538693708f, 0.0476184323f,
59
0.0419244083f, 0.0367583739f, 0.0320905724f, 0.0278907219f, 0.0241283062f, 0.0207728499f,
60
0.0177941726f, 0.0151626215f, 0.0128492811f, 0.0108261579f, 0.0090663409f, 0.0075441359f,
61
0.0062351752f, 0.0051165029f, 0.0041666352f, 0.0033656000f, 0.0026949534f, 0.0021377783f,
62
0.0016786636f, 0.0013036694f, 0.0010002765f, 0.0007573262f, 0.0005649486f, 0.0004144844f,
63
0.0002984010f, 0.0002102045f, 0.0001443499f, 0.0000961509f, 0.0000616906f, 0.0000377350f,
64
0.0000216492f, 0.0000113187f, 0.0000050749f, 0.0000016272f
65
};
66
67
inline s16 clampfloat_s16(float f) {
68
if (f <= -32767.0f) return -32767;
69
if (f >= 32767.0f) return 32767;
70
return (s16)f;
71
}
72
73
GranularMixer::GranularMixer() {
74
INFO_LOG(Log::Audio, "Mixer is initialized");
75
}
76
77
// Executed from sound stream thread
78
void GranularMixer::Mix(s16 *samples, u32 num_samples, int outSampleRate, float fpsEstimate) {
79
_dbg_assert_(samples);
80
if (!samples)
81
return;
82
memset(samples, 0, num_samples * 2 * sizeof(s16));
83
frameTimeEstimate_ = 1.0f / fpsEstimate;
84
85
smoothedReadSize_ = smoothedReadSize_ == 0 ? num_samples : (smoothedReadSize_ * 0.95f + num_samples * 0.05f);
86
87
constexpr u32 INDEX_HALF = 0x80000000;
88
constexpr double FADE_IN_RC = 0.008;
89
constexpr double FADE_OUT_RC = 0.064;
90
91
// We need at least a double because the index jump has 24 bits of fractional precision.
92
const double out_sample_rate = outSampleRate;
93
double inSampleRate = 44100;
94
95
const double emulation_speed = 1.0f; // TODO: Change when we're in slow-motion mode etc.
96
if (0 < emulation_speed && emulation_speed != 1.0)
97
inSampleRate *= emulation_speed;
98
99
const double base = static_cast<double>(1 << GRANULE_FRAC_BITS);
100
const u32 index_jump = std::lround(base * inSampleRate / out_sample_rate);
101
102
// These fade in / out multiplier are tuned to match a constant
103
// fade speed regardless of the input or the output sample rate.
104
const float fade_in_mul = -std::expm1(-1.0 / (out_sample_rate * FADE_IN_RC));
105
const float fade_out_mul = -std::expm1(-1.0 / (out_sample_rate * FADE_OUT_RC));
106
107
// Calculate the ideal length of the granule queue.
108
// NOTE: We must have enough room here for 20fps games, generating all their audio
109
// in a burst each frame (since we can't force real clock sync). That means 16*3 = 48 or rather 50ms.
110
// However, in case of faster framerates, we should apply some pressure to reduce this. And if real clock sync
111
// is on, we should also be able to get away with a shorter buffer here.
112
// const u32 buffer_size_ms = frameTimeEstimate_ * 44100.0f;
113
const u32 buffer_size_samples = smoothedReadSize_ * 4 + std::llround(frameTimeEstimate_ * inSampleRate);
114
queuedSamplesTarget_ = buffer_size_samples;
115
116
// Limit the possible queue sizes to any number between 4 and 64.
117
const u32 buffer_size_granules =
118
std::clamp((buffer_size_samples) / (GRANULE_SIZE >> 1), static_cast<u32>(4),
119
static_cast<u32>(MAX_GRANULE_QUEUE_SIZE));
120
121
if (buffer_size_granules != m_granule_queue_size.load(std::memory_order_relaxed)) {
122
INFO_LOG(Log::Audio, "Granule buffer size changed to %d", buffer_size_granules);
123
}
124
125
m_granule_queue_size.store(buffer_size_granules, std::memory_order_relaxed);
126
127
int actualQueueSize = m_queue_head - m_queue_tail;
128
if (smoothedQueueSize_ == 0) {
129
smoothedQueueSize_ = actualQueueSize;
130
} else {
131
constexpr float factor = 0.95f;
132
smoothedQueueSize_ = factor * smoothedQueueSize_ + (1.0f - factor) * (float)actualQueueSize;
133
}
134
if (actualQueueSize < queuedGranulesMin_) {
135
queuedGranulesMin_ = actualQueueSize;
136
}
137
if (actualQueueSize > queuedGranulesMax_) {
138
queuedGranulesMax_ = actualQueueSize;
139
}
140
141
// TODO: The performance of this could be greatly enhanced with SIMD but it won't be easy
142
// due to wrapping of various buffers.
143
bool queue_looping = m_queue_looping.load(std::memory_order_relaxed);
144
while (num_samples-- > 0) {
145
// The indexes for the front and back buffers are offset by 50% of the granule size.
146
// We use the modular nature of 32-bit integers to wrap around the granule size.
147
m_current_index += index_jump;
148
const u32 front_index = m_current_index;
149
const u32 back_index = m_current_index + INDEX_HALF;
150
151
// If either index is less than the index jump, that means we reached
152
// the end of the of the buffer and need to load the next granule.
153
if (front_index < index_jump)
154
Dequeue(&m_front);
155
else if (back_index < index_jump)
156
Dequeue(&m_back);
157
158
// The Granules are pre-windowed, so we can just add them together. A bit of accidental wrapping doesn't matter
159
// either since the tails are so weak.
160
const u32 ft = front_index >> GRANULE_FRAC_BITS;
161
const u32 bt = back_index >> GRANULE_FRAC_BITS;
162
const StereoPair s0 = m_front[(ft - 2) & GRANULE_MASK] + m_back[(bt - 2) & GRANULE_MASK];
163
const StereoPair s1 = m_front[(ft - 1) & GRANULE_MASK] + m_back[(bt - 1) & GRANULE_MASK];
164
const StereoPair s2 = m_front[(ft + 0) & GRANULE_MASK] + m_back[(bt + 0) & GRANULE_MASK];
165
const StereoPair s3 = m_front[(ft + 1) & GRANULE_MASK] + m_back[(bt + 1) & GRANULE_MASK];
166
const StereoPair s4 = m_front[(ft + 2) & GRANULE_MASK] + m_back[(bt + 2) & GRANULE_MASK];
167
const StereoPair s5 = m_front[(ft + 3) & GRANULE_MASK] + m_back[(bt + 3) & GRANULE_MASK];
168
169
// Probably an overkill interpolator, but let's go with it for now.
170
// Polynomial Interpolators for High-Quality Resampling of
171
// Over Sampled Audio by Olli Niemitalo, October 2001.
172
// Page 43 -- 6-point, 3rd-order Hermite:
173
// https://yehar.com/blog/wp-content/uploads/2009/08/deip.pdf
174
const u32 t_frac = m_current_index & ((1 << GRANULE_FRAC_BITS) - 1);
175
const float t1 = t_frac / static_cast<float>(1 << GRANULE_FRAC_BITS);
176
const float t2 = t1 * t1;
177
const float t3 = t2 * t1;
178
StereoPair sample = (
179
s0 * ((+0.0f + 1.0f * t1 - 2.0f * t2 + 1.0f * t3) * (1.0f / 12.0f)) +
180
s1 * ((+0.0f - 8.0f * t1 + 15.0f * t2 - 7.0f * t3) * (1.0f / 12.0f)) +
181
s2 * ((+3.0f + 0.0f * t1 - 7.0f * t2 + 4.0f * t3) * (1.0f / 3.0f)) +
182
s3 * ((+0.0f + 2.0f * t1 + 5.0f * t2 - 4.0f * t3) * (1.0f / 3.0f)) +
183
s4 * ((+0.0f - 1.0f * t1 - 6.0f * t2 + 7.0f * t3) * (1.0f / 12.0f)) +
184
s5 * ((+0.0f + 0.0f * t1 + 1.0f * t2 - 1.0f * t3) * (1.0f / 12.0f))
185
);
186
187
// Update the looping flag occasionally.
188
if (!(num_samples & 31)) {
189
queue_looping = m_queue_looping.load(std::memory_order_relaxed);
190
}
191
192
// Apply Fade In / Fade Out depending on if we are looping
193
if (queue_looping)
194
m_fade_volume += fade_out_mul * (0.0f - m_fade_volume);
195
else
196
m_fade_volume += fade_in_mul * (1.0f - m_fade_volume);
197
198
samples[0] = (int16_t)clamp_value(sample.l * m_fade_volume, -32767.0f, 32767.0f);
199
samples[1] = (int16_t)clamp_value(sample.r * m_fade_volume, -32767.0f, 32767.0f);
200
201
samples += 2;
202
}
203
}
204
205
void GranularMixer::PushSamples(const s32 *samples, u32 num_samples, float volume) {
206
// TODO: This can be massively sped up. Although hardly likely to be a bottleneck.
207
while (num_samples-- > 0) {
208
const s16 l = clampfloat_s16(samples[0] * volume);
209
const s16 r = clampfloat_s16(samples[1] * volume);
210
samples += 2;
211
212
m_next_buffer[m_next_buffer_index] = StereoPair(l, r);
213
m_next_buffer_index = (m_next_buffer_index + 1) & GRANULE_MASK;
214
215
// The granules overlap by 50%, so we need to enqueue the
216
// next buffer every time we fill half of the samples.
217
if (m_next_buffer_index == 0 || m_next_buffer_index == m_next_buffer.size() / 2) {
218
Enqueue();
219
}
220
}
221
}
222
223
void GranularMixer::Enqueue() {
224
const u32 head = m_queue_head.load(std::memory_order_acquire);
225
226
// Check if we run out of space in the circular queue. (rare)
227
u32 next_head = head + 1;
228
if ((next_head & GRANULE_QUEUE_MASK) == (m_queue_tail.load(std::memory_order_acquire) & GRANULE_QUEUE_MASK)) {
229
WARN_LOG(Log::Audio,
230
"Granule Queue has completely filled and audio samples are being dropped. "
231
"This should not happen unless the audio backend has stopped requesting audio.");
232
return;
233
}
234
235
// The compiler (at least MSVC) fails at optimizing this loop using SIMD instructions.
236
const u32 start_index = m_next_buffer_index;
237
238
const u32 maskedHead = head & GRANULE_QUEUE_MASK;
239
for (u32 i = 0; i < GRANULE_SIZE; ++i) {
240
m_queue[maskedHead][i] = m_next_buffer[(i + start_index) & GRANULE_MASK] * g_GranuleWindow[i];
241
}
242
243
m_queue_head.store(next_head, std::memory_order_release);
244
m_queue_looping.store(false, std::memory_order_relaxed);
245
}
246
247
void GranularMixer::Dequeue(Granule *granule) {
248
const u32 granule_queue_size = m_granule_queue_size.load(std::memory_order_relaxed);
249
const u32 head = m_queue_head.load(std::memory_order_acquire);
250
u32 tail = m_queue_tail.load(std::memory_order_acquire);
251
252
// Checks to see if the queue has gotten too long.
253
if ((head - tail) > granule_queue_size) {
254
// Jump the playhead to half the queue size behind the head.
255
const u32 gap = (granule_queue_size >> 1) + 1;
256
tail = (head - gap);
257
overruns_++;
258
}
259
260
// Checks to see if the queue is empty.
261
u32 next_tail = tail + 1;
262
263
bool looping = m_queue_looping.load();
264
265
/*if (!looping && !smoothedQueueSize_ < granule_queue_size / 2) {
266
// Repeat a single block occasionally to make sure we have a reasonably sized queue.
267
next_tail = tail;
268
} else*/ if (next_tail == head) {
269
// Only fill gaps when running to prevent stutter on pause.
270
CoreState state = coreState;
271
const bool is_running = state == CORE_RUNNING_CPU || state == CORE_RUNNING_GE;
272
if (g_Config.bFillAudioGaps && is_running) {
273
// Jump the playhead to half the queue size behind the head.
274
// This will repeat a few past granules I guess? They still contain sensible data.
275
// This provides smoother audio playback than suddenly stopping.
276
const u32 gap = std::max<u32>(2, granule_queue_size >> 1) - 1;
277
next_tail = head - gap;
278
underruns_++;
279
m_queue_looping.store(true, std::memory_order_relaxed);
280
} else {
281
// Send a zero granule.
282
std::fill(granule->begin(), granule->end(), StereoPair{ 0.0f, 0.0f });
283
m_queue_looping.store(false, std::memory_order_relaxed);
284
return;
285
}
286
}
287
288
*granule = m_queue[tail & GRANULE_QUEUE_MASK];
289
m_queue_tail.store(next_tail, std::memory_order_release);
290
}
291
292
void GranularMixer::GetStats(GranularStats *stats) {
293
stats->queuedGranulesMin = queuedGranulesMin_;
294
stats->queuedGranulesMax = queuedGranulesMax_;
295
stats->smoothedQueuedGranules = smoothedQueueSize_;
296
stats->targetQueueSize = m_granule_queue_size.load(std::memory_order_relaxed);
297
stats->maxQueuedGranules = MAX_GRANULE_QUEUE_SIZE;
298
stats->fadeVolume = m_fade_volume;
299
stats->looping = m_queue_looping;
300
stats->overruns = overruns_;
301
stats->underruns = underruns_;
302
stats->smoothedReadSize = smoothedReadSize_;
303
stats->frameTimeEstimate = frameTimeEstimate_;
304
stats->queuedSamplesTarget = queuedSamplesTarget_;
305
queuedGranulesMin_ = 10000;
306
queuedGranulesMax_ = 0;
307
}
308
309