CoCalc -- StereoResampler.cpp

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/HW/StereoResampler.cpp
³¹⁸⁶ views
1
// Copyright (c) 2015- PPSSPP Project and Dolphin Project.
2

3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6

7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
// GNU General Public License 2.0 for more details.
11

12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14

15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17

18
// Adapted from Dolphin.
19

20
// 16 bit Stereo
21

22
// These must be powers of 2.
23
#define MAX_BUFSIZE_DEFAULT (4096) // 2*64ms - had to double it for nVidia Shield which has huge buffers
24
#define MAX_BUFSIZE_EXTRA   (8192)
25

26
#define TARGET_BUFSIZE_MARGIN 512
27

28
#define TARGET_BUFSIZE_DEFAULT 1680 // 40 ms
29
#define TARGET_BUFSIZE_EXTRA 3360 // 80 ms
30

31
#define MAX_FREQ_SHIFT  600.0f  // how far off can we be from 44100 Hz
32
#define CONTROL_FACTOR  0.2f // in freq_shift per fifo size offset
33
#define CONTROL_AVG     32.0f
34

35
#include "ppsspp_config.h"
36
#include <algorithm>
37
#include <cstring>
38
#include <atomic>
39

40
#include "Common/Common.h"
41
#include "Common/System/System.h"
42
#include "Common/Log.h"
43
#include "Common/Math/SIMDHeaders.h"
44
#include "Common/Math/CrossSIMD.h"
45
#include "Common/TimeUtil.h"
46
#include "Core/Config.h"
47
#include "Core/ConfigValues.h"
48
#include "Core/HW/StereoResampler.h"
49
#include "Core/Util/AudioFormat.h"  // for clamp_u8
50
#include "Core/System.h"
51

52
StereoResampler::StereoResampler() noexcept
53
		: maxBufsize_(MAX_BUFSIZE_DEFAULT)
54
	  , targetBufsize_(TARGET_BUFSIZE_DEFAULT) {
55
	// Need to have space for the worst case in case it changes.
56
	buffer_ = new int16_t[MAX_BUFSIZE_EXTRA * 2]();
57

58
	// Some Android devices are v-synced to non-60Hz framerates. We simply timestretch audio to fit.
59
	// TODO: should only do this if auto frameskip is off?
60
	float refresh = System_GetPropertyFloat(SYSPROP_DISPLAY_REFRESH_RATE);
61

62
	// If framerate is "close"...
63
	if (refresh != 60.0f && refresh > 50.0f && refresh < 70.0f) {
64
		int input_sample_rate = (int)(44100 * (refresh / 60.0f));
65
		INFO_LOG(Log::Audio, "StereoResampler: Adjusting target sample rate to %dHz", input_sample_rate);
66
		inputSampleRateHz_ = input_sample_rate;
67
	}
68

69
	UpdateBufferSize();
70
}
71

72
StereoResampler::~StereoResampler() {
73
	delete[] buffer_;
74
	buffer_ = nullptr;
75
}
76

77
void StereoResampler::UpdateBufferSize() {
78
	if (g_Config.bExtraAudioBuffering) {
79
		maxBufsize_ = MAX_BUFSIZE_EXTRA;
80
		targetBufsize_ = TARGET_BUFSIZE_EXTRA;
81
	} else {
82
		maxBufsize_ = MAX_BUFSIZE_DEFAULT;
83
		targetBufsize_ = TARGET_BUFSIZE_DEFAULT;
84

85
		int systemBufsize = System_GetPropertyInt(SYSPROP_AUDIO_FRAMES_PER_BUFFER);
86
		if (systemBufsize > 0 && targetBufsize_ < systemBufsize + TARGET_BUFSIZE_MARGIN) {
87
			targetBufsize_ = std::min(4096, systemBufsize + TARGET_BUFSIZE_MARGIN);
88
			if (targetBufsize_ * 2 > MAX_BUFSIZE_DEFAULT)
89
				maxBufsize_ = MAX_BUFSIZE_EXTRA;
90
		}
91
	}
92
}
93

94
// factor is a 0.12-bit fixed point number.
95
template<bool multiply>
96
inline void ClampBufferToS16(s16 *out, const s32 *in, size_t size, int factor) {
97
	if (multiply) {
98
		// Let's SIMD later. Unfortunately for s16 operations, SSE2 is very different and odd
99
		// so CrossSIMD won't be very useful.
100
		// LLVM autovec does an okay job with this on ARM64, it turns out.
101
		for (size_t i = 0; i < size; i++) {
102
			out[i] = clamp_s16((in[i] * factor) >> 12);
103
		}
104
	} else {
105
#ifdef _M_SSE
106
		// Size will always be 16-byte aligned as the hwBlockSize is.
107
		while (size >= 8) {
108
			__m128i in1 = _mm_loadu_si128((__m128i *)in);
109
			__m128i in2 = _mm_loadu_si128((__m128i *)(in + 4));
110
			__m128i packed = _mm_packs_epi32(in1, in2);  // pack with signed saturation, perfect.
111
			_mm_storeu_si128((__m128i *)out, packed);
112
			out += 8;
113
			in += 8;
114
			size -= 8;
115
		}
116
#elif PPSSPP_ARCH(ARM_NEON)
117
		// Dynamic shifts can only be left, but it's signed - negate to shift right.
118
		while (size >= 8) {
119
			int32x4_t in1 = vld1q_s32(in);
120
			int32x4_t in2 = vld1q_s32(in + 4);
121
			int16x4_t packed1 = vqmovn_s32(in1);
122
			int16x4_t packed2 = vqmovn_s32(in2);
123
			vst1_s16(out, packed1);
124
			vst1_s16(out + 4, packed2);
125
			out += 8;
126
			in += 8;
127
			size -= 8;
128
		}
129
#endif
130
		// This does the remainder if SIMD was used, otherwise it does it all.
131
		for (size_t i = 0; i < size; i++) {
132
			out[i] = clamp_s16(in[i]);
133
		}
134
	}
135
}
136

137
inline void ClampBufferToS16WithVolume(s16 *out, const s32 *in, size_t size, int volume) {
138
	// The last parameter to ClampBufferToS16 is no longer a shift, now it's a 12-bit multiplier.
139
	if (volume >= 4096) {
140
		ClampBufferToS16<false>(out, in, size, 0);
141
	} else if (volume <= 0) {
142
		memset(out, 0, size * sizeof(s16));
143
	} else {
144
		ClampBufferToS16<true>(out, in, size, volume);
145
	}
146
}
147

148
void StereoResampler::Clear() {
149
	memset(buffer_, 0, maxBufsize_ * 2 * sizeof(int16_t));
150
}
151

152
inline int16_t MixSingleSample(int16_t s1, int16_t s2, uint16_t frac) {
153
	int32_t value = s1 + (((s2 - s1) * frac) >> 16);
154
	if (value < -32767)
155
		return -32767;
156
	else if (value > 32767)
157
		return 32767;
158
	else
159
		return (int16_t)value;
160
}
161

162
// Executed from sound stream thread, pulling sound out of the buffer.
163
void StereoResampler::Mix(s16 *samples, unsigned int numSamples, bool consider_framelimit, int sample_rate) {
164
	if (!samples)
165
		return;
166

167
	unsigned int currentSample;
168

169
	// Cache access in non-volatile variable
170
	// This is the only function changing the read value, so it's safe to
171
	// cache it locally although it's written here.
172
	// The writing pointer will be modified outside, but it will only increase,
173
	// so we will just ignore new written data while interpolating (until it wraps...).
174
	// Without this cache, the compiler wouldn't be allowed to optimize the
175
	// interpolation loop.
176
	u32 indexR = indexR_.load();
177
	u32 indexW = indexW_.load();
178

179
	const int INDEX_MASK = (maxBufsize_ * 2 - 1);
180

181
	// This is only for debug visualization, not used for anything.
182
	lastBufSize_ = ((indexW - indexR) & INDEX_MASK) / 2;
183

184
	// Drift prevention mechanism.
185
	float numLeft = (float)(((indexW - indexR) & INDEX_MASK) / 2);
186
	// If we had to discard samples the last frame due to underrun,
187
	// apply an adjustment here. Otherwise we'll overestimate how many
188
	// samples we need.
189
	numLeft -= droppedSamples_;
190
	droppedSamples_ = 0;
191

192
	// numLeftI_ here becomes a lowpass filtered version of numLeft.
193
	numLeftI_ = (numLeft + numLeftI_ * (CONTROL_AVG - 1.0f)) / CONTROL_AVG;
194

195
	// Here we try to keep the buffer size around m_lowwatermark (which is
196
	// really now more like desired_buffer_size) by adjusting the speed.
197
	// Note that the speed of adjustment here does not take the buffer size into
198
	// account. Since this is called once per "output frame", the frame size
199
	// will affect how fast this algorithm reacts, which can't be a good thing.
200
	float offset = (numLeftI_ - (float)targetBufsize_) * CONTROL_FACTOR;
201
	if (offset > MAX_FREQ_SHIFT) offset = MAX_FREQ_SHIFT;
202
	if (offset < -MAX_FREQ_SHIFT) offset = -MAX_FREQ_SHIFT;
203

204
	outputSampleRateHz_ = (float)(inputSampleRateHz_ + offset);
205
	const u32 ratio = (u32)(65536.0 * outputSampleRateHz_ / (double)sample_rate);
206
	ratio_ = ratio;
207
	// TODO: consider a higher-quality resampling algorithm.
208
	// TODO: Add a fast path for 1:1.
209
	u32 frac = frac_;
210
	for (currentSample = 0; currentSample < numSamples * 2; currentSample += 2) {
211
		if (((indexW - indexR) & INDEX_MASK) <= 2) {
212
			// Ran out!
213
			// int missing = numSamples * 2 - currentSample;
214
			// ILOG("Resampler underrun: %d (numSamples: %d, currentSample: %d)", missing, numSamples, currentSample / 2);
215
			underrunCount_++;
216
			break;
217
		}
218
		u32 indexR2 = indexR + 2; //next sample
219
		s16 l1 = buffer_[indexR & INDEX_MASK]; //current
220
		s16 r1 = buffer_[(indexR + 1) & INDEX_MASK]; //current
221
		s16 l2 = buffer_[indexR2 & INDEX_MASK]; //next
222
		s16 r2 = buffer_[(indexR2 + 1) & INDEX_MASK]; //next
223
		samples[currentSample] = MixSingleSample(l1, l2, (u16)frac);
224
		samples[currentSample + 1] = MixSingleSample(r1, r2, (u16)frac);
225
		frac += ratio;
226
		indexR += 2 * (frac >> 16);
227
		frac &= 0xffff;
228
	}
229
	frac_ = frac;
230

231
	// Let's not count the underrun padding here.
232
	outputSampleCount_ += currentSample / 2;
233

234
	// Padding with the last value to reduce clicking
235
	short s[2];
236
	s[0] = clamp_s16(buffer_[(indexR - 1) & INDEX_MASK]);
237
	s[1] = clamp_s16(buffer_[(indexR - 2) & INDEX_MASK]);
238
	for (; currentSample < numSamples * 2; currentSample += 2) {
239
		samples[currentSample] = s[0];
240
		samples[currentSample + 1] = s[1];
241
	}
242

243
	// Flush cached variable
244
	indexR_.store(indexR);
245
}
246

247
// Executes on the emulator thread, pushing sound into the buffer.
248
void StereoResampler::PushSamples(const s32 *samples, unsigned int numSamples, float multiplier) {
249
	inputSampleCount_ += numSamples;
250

251
	UpdateBufferSize();
252
	const int INDEX_MASK = (maxBufsize_ * 2 - 1);
253
	// Cache access in non-volatile variable
254
	// indexR isn't allowed to cache in the audio throttling loop as it
255
	// needs to get updates to not deadlock.
256
	u32 indexW = indexW_.load();
257

258
	u32 cap = maxBufsize_ * 2;
259
	// If fast-forwarding, no need to fill up the entire buffer, just screws up timing after releasing the fast-forward button.
260
	if (PSP_CoreParameter().fastForward) {
261
		cap = targetBufsize_ * 2;
262
	}
263

264
	// Check if we have enough free space
265
	// indexW == indexR_ results in empty buffer, so indexR must always be smaller than indexW
266
	if (numSamples * 2 + ((indexW - indexR_.load()) & INDEX_MASK) >= cap) {
267
		if (!PSP_CoreParameter().fastForward) {
268
			overrunCount_++;
269
		}
270
		// TODO: "Timestretch" by doing a windowed overlap with existing buffer content?
271
		return;
272
	}
273

274
	// 12-bit volume.
275
	int volume = (int)(multiplier * 4096.0f);
276

277
	// Check if we need to roll over to the start of the buffer during the copy.
278
	unsigned int indexW_left_samples = maxBufsize_ * 2 - (indexW & INDEX_MASK);
279
	if (numSamples * 2 > indexW_left_samples) {
280
		ClampBufferToS16WithVolume(&buffer_[indexW & INDEX_MASK], samples, indexW_left_samples, volume);
281
		ClampBufferToS16WithVolume(&buffer_[0], samples + indexW_left_samples, numSamples * 2 - indexW_left_samples, volume);
282
	} else {
283
		ClampBufferToS16WithVolume(&buffer_[indexW & INDEX_MASK], samples, numSamples * 2, volume);
284
	}
285

286
	indexW_ += numSamples * 2;
287
	lastPushSize_ = numSamples;
288
}
289

290
void StereoResampler::GetAudioDebugStats(char *buf, size_t bufSize) {
291
	double elapsed = time_now_d() - startTime_;
292

293
	double effective_input_sample_rate = (double)inputSampleCount_ / elapsed;
294
	double effective_output_sample_rate = (double)outputSampleCount_ / elapsed;
295

296
	double bufferLatencyMs = 1000.0 * (double)lastBufSize_ / (double)inputSampleRateHz_;
297
	snprintf(buf, bufSize,
298
		"Audio buffer: %d/%d (%0.1fms, target: %d)\n"
299
		"Filtered: %0.2f\n"
300
		"Underruns: %d\n"
301
		"Overruns: %d\n"
302
		"Sample rate: %d (input: %d)\n"
303
		"Effective input sample rate: %0.2f\n"
304
		"Effective output sample rate: %0.2f\n"
305
		"Push size: %d\n"
306
		"Ratio: %0.6f\n",
307
		lastBufSize_,
308
		maxBufsize_,
309
		bufferLatencyMs,
310
		targetBufsize_,
311
		numLeftI_,
312
		underrunCountTotal_,
313
		overrunCountTotal_,
314
		(int)outputSampleRateHz_,
315
		inputSampleRateHz_,
316
		effective_input_sample_rate,
317
		effective_output_sample_rate,
318
		lastPushSize_,
319
		(float)ratio_ / 65536.0f);
320
	underrunCountTotal_ += underrunCount_;
321
	overrunCountTotal_ += overrunCount_;
322
	underrunCount_ = 0;
323
	overrunCount_ = 0;
324

325
	// Use this to remove the bias from the startup.
326
	// if (elapsed > 3.0) {
327
		//ResetStatCounters();
328
	// }
329
}
330

331
void StereoResampler::ResetStatCounters() {
332
	underrunCount_ = 0;
333
	overrunCount_ = 0;
334
	underrunCountTotal_ = 0;
335
	overrunCountTotal_ = 0;
336
	inputSampleCount_ = 0;
337
	outputSampleCount_ = 0;
338
	startTime_ = time_now_d();
339
}
340

341
Product

Resources

Company