CoCalc -- WASAPIContext.cpp

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Windows/WASAPIContext.cpp
³¹⁸⁵ views
1
#include <windows.h>
2
#include <mmdeviceapi.h>
3
#include <functiondiscoverykeys_devpkey.h>
4
#include <audioclient.h>
5
#include <avrt.h>
6
#include <comdef.h>
7
#include <atomic>
8
#include <thread>
9
#include <vector>
10
#include <string_view>
11
#include <wrl/client.h>
12

13
#include "Common/Data/Encoding/Utf8.h"
14
#include "Common/Log.h"
15
#include "WASAPIContext.h"
16

17
using Microsoft::WRL::ComPtr;
18

19
// We must have one of these already...
20
static inline s16 ClampFloatToS16(float f) {
21
	f *= 32768.0f;
22
	if (f >= 32767) {
23
		return 32767;
24
	} else if (f < -32767) {
25
		return -32767;
26
	} else {
27
		return (s16)(s32)f;
28
	}
29
}
30

31
void BuildStereoFloatFormat(const WAVEFORMATEXTENSIBLE *original, WAVEFORMATEXTENSIBLE *output) {
32
	// Zero‑init all fields first.
33
	ZeroMemory(output, sizeof(WAVEFORMATEXTENSIBLE));
34

35
	// Fill the WAVEFORMATEX base part.
36
	output->Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
37
	output->Format.nChannels = 2;
38
	output->Format.nSamplesPerSec = original->Format.nSamplesPerSec;
39
	output->Format.wBitsPerSample = 32;                                 // 32‑bit float
40
	output->Format.nBlockAlign = output->Format.nChannels *
41
		output->Format.wBitsPerSample / 8;
42
	output->Format.nAvgBytesPerSec = output->Format.nSamplesPerSec *
43
		output->Format.nBlockAlign;
44
	output->Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
45

46
	// Fill the extensible fields.
47
	output->Samples.wValidBitsPerSample = 32;
48
	output->dwChannelMask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
49
	output->SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
50
}
51

52
WASAPIContext::WASAPIContext() : notificationClient_(this) {
53
	HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, IID_PPV_ARGS(&enumerator_));
54
	if (FAILED(hr)) {
55
		// Bad!
56
		enumerator_ = nullptr;
57
		return;
58
	}
59
	enumerator_->RegisterEndpointNotificationCallback(&notificationClient_);
60
}
61

62
WASAPIContext::~WASAPIContext() {
63
	if (!enumerator_) {
64
		// Nothing can have been happening.
65
		return;
66
	}
67
	Stop();
68
	enumerator_->UnregisterEndpointNotificationCallback(&notificationClient_);
69
	delete[] tempBuf_;
70
}
71

72
WASAPIContext::AudioFormat WASAPIContext::Classify(const WAVEFORMATEX *format) {
73
	if (format->wFormatTag == WAVE_FORMAT_PCM && format->wBitsPerSample == 2) {
74
		return AudioFormat::S16;
75
	} else if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
76
		const WAVEFORMATEXTENSIBLE *ex = (const WAVEFORMATEXTENSIBLE *)format;
77
		if (ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
78
			return AudioFormat::Float;
79
		}
80
	} else {
81
		WARN_LOG(Log::Audio, "Unhandled output format!");
82
	}
83
	return AudioFormat::Unhandled;
84
}
85

86
void WASAPIContext::EnumerateDevices(std::vector<AudioDeviceDesc> *output, bool captureDevices) {
87
	ComPtr<IMMDeviceCollection> collection;
88
	enumerator_->EnumAudioEndpoints(captureDevices ? eCapture : eRender, DEVICE_STATE_ACTIVE, &collection);
89

90
	if (!collection) {
91
		ERROR_LOG(Log::Audio, "Failed to enumerate devices");
92
		return;
93
	}
94

95
	UINT count = 0;
96
	collection->GetCount(&count);
97

98
	for (UINT i = 0; i < count; ++i) {
99
		ComPtr<IMMDevice> device;
100
		collection->Item(i, &device);
101

102
		ComPtr<IPropertyStore> props;
103
		device->OpenPropertyStore(STGM_READ, &props);
104

105
		PROPVARIANT nameProp;
106
		PropVariantInit(&nameProp);
107
		props->GetValue(PKEY_Device_FriendlyName, &nameProp);
108

109
		LPWSTR id_str = 0;
110
		if (SUCCEEDED(device->GetId(&id_str))) {
111
			AudioDeviceDesc desc;
112
			desc.name = ConvertWStringToUTF8(nameProp.pwszVal);
113
			desc.uniqueId = ConvertWStringToUTF8(id_str);
114
			output->push_back(desc);
115
			CoTaskMemFree(id_str);
116
		}
117

118
		PropVariantClear(&nameProp);
119
	}
120
}
121

122
bool WASAPIContext::InitOutputDevice(std::string_view uniqueId, LatencyMode latencyMode, bool *revertedToDefault) {
123
	Stop();
124

125
	*revertedToDefault = false;
126

127
	ComPtr<IMMDevice> device;
128
	if (uniqueId.empty()) {
129
		// Use the default device.
130
		if (FAILED(enumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device))) {
131
			return false;
132
		}
133
	} else {
134
		// Use whatever device.
135
		std::wstring wId = ConvertUTF8ToWString(uniqueId);
136
		if (FAILED(enumerator_->GetDevice(wId.c_str(), &device))) {
137
			// Fallback to default device
138
			INFO_LOG(Log::Audio, "Falling back to default device...\n");
139
			*revertedToDefault = true;
140
			if (FAILED(enumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device))) {
141
				return false;
142
			}
143
		}
144
	}
145

146
	deviceId_ = uniqueId;
147

148
	HRESULT hr = E_FAIL;
149
	// Try IAudioClient3 first if not in "safe" mode. It's probably safe anyway, but still, let's use the legacy client as a safe fallback option.
150
	if (false && latencyMode != LatencyMode::Safe) {
151
		hr = device->Activate(__uuidof(IAudioClient3), CLSCTX_ALL, nullptr, (void**)&audioClient3_);
152
	}
153

154
	// Get rid of any old tempBuf_.
155
	delete[] tempBuf_;
156
	tempBuf_ = nullptr;
157

158
	if (SUCCEEDED(hr)) {
159
		audioClient3_->GetMixFormat(&format_);
160
		// We only use AudioClient3 if we got the format we wanted (stereo float).
161
		if (format_->nChannels != 2 || Classify(format_) != AudioFormat::Float) {
162
			// Let's fall back to the old path. The docs seem to be wrong, if you try to create an
163
			// AudioClient3 with low latency audio with AUTOCONVERTPCM, you get the error 0x88890021.
164
			audioClient3_.Reset();
165
			// Fall through to AudioClient creation below.
166
		} else {
167
			audioClient3_->GetSharedModeEnginePeriod(format_, &defaultPeriodFrames, &fundamentalPeriodFrames, &minPeriodFrames, &maxPeriodFrames);
168

169
			INFO_LOG(Log::Audio, "default: %d fundamental: %d min: %d max: %d\n", (int)defaultPeriodFrames, (int)fundamentalPeriodFrames, (int)minPeriodFrames, (int)maxPeriodFrames);
170
			INFO_LOG(Log::Audio, "initializing with %d frame period at %d Hz, meaning %0.1fms\n", (int)minPeriodFrames, (int)format_->nSamplesPerSec, FramesToMs(minPeriodFrames, format_->nSamplesPerSec));
171

172
			audioEvent_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
173
			HRESULT result = audioClient3_->InitializeSharedAudioStream(
174
				AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
175
				minPeriodFrames,
176
				format_,
177
				nullptr
178
			);
179
			if (FAILED(result)) {
180
				WARN_LOG(Log::Audio, "Error initializing AudioClient3 shared audio stream: %08lx", result);
181
				audioClient3_.Reset();
182
				return false;
183
			}
184
			actualPeriodFrames_ = minPeriodFrames;
185

186
			audioClient3_->GetBufferSize(&reportedBufferSize_);
187
			audioClient3_->SetEventHandle(audioEvent_);
188
			audioClient3_->GetService(IID_PPV_ARGS(&renderClient_));
189
		}
190
	}
191

192
	if (!audioClient3_) {
193
		// Fallback to IAudioClient (older OS)
194
		device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&audioClient_);
195

196
		audioClient_->GetMixFormat(&format_);
197

198
		// If there are too many channels, try asking for a 2-channel output format.
199
		DWORD extraStreamFlags = 0;
200
		const AudioFormat fmt = Classify(format_);
201

202
		bool createBuffer = false;
203
		if (fmt == AudioFormat::Float) {
204
			if (format_->nChannels != 2) {
205
				INFO_LOG(Log::Audio, "Got %d channels, asking for stereo instead", format_->nChannels);
206
				WAVEFORMATEXTENSIBLE stereo;
207
				BuildStereoFloatFormat((const WAVEFORMATEXTENSIBLE *)format_, &stereo);
208

209
				WAVEFORMATEX *closestMatch = nullptr;
210
				const HRESULT result = audioClient_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, (const WAVEFORMATEX *)&stereo, &closestMatch);
211
				if (result == S_OK) {
212
					// We got the format! Use it and set as current.
213
					_dbg_assert_(!closestMatch);
214
					format_ = (WAVEFORMATEX *)CoTaskMemAlloc(sizeof(WAVEFORMATEXTENSIBLE));
215
					memcpy(format_, &stereo, sizeof(WAVEFORMATEX) + stereo.Format.cbSize);
216
					extraStreamFlags = AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY;
217
					INFO_LOG(Log::Audio, "Successfully asked for two channels");
218
				} else if (result == S_FALSE) {
219
					// We got another format. Meh, let's just use what we got.
220
					if (closestMatch) {
221
						WARN_LOG(Log::Audio, "Didn't get the format we wanted, but got: %d ch=%d", closestMatch->nSamplesPerSec, closestMatch->nChannels);
222
						CoTaskMemFree(closestMatch);
223
					} else {
224
						WARN_LOG(Log::Audio, "Failed to fall back to two channels. Using workarounds.");
225
					}
226
					createBuffer = true;
227
				} else {
228
					WARN_LOG(Log::Audio, "Got other error %08x", result);
229
					_dbg_assert_(!closestMatch);
230
				}
231
			} else {
232
				// All good, nothing to convert.
233
			}
234
		} else {
235
			// Some other format.
236
			WARN_LOG(Log::Audio, "Format not float, applying conversion.");
237
			createBuffer = true;
238
		}
239

240
		// Get engine period info
241
		REFERENCE_TIME defaultPeriod = 0, minPeriod = 0;
242
		audioClient_->GetDevicePeriod(&defaultPeriod, &minPeriod);
243

244
		audioEvent_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
245

246
		const REFERENCE_TIME duration = minPeriod;
247
		HRESULT hr = audioClient_->Initialize(
248
			AUDCLNT_SHAREMODE_SHARED,
249
			AUDCLNT_STREAMFLAGS_EVENTCALLBACK | extraStreamFlags,
250
			duration,  // This is a minimum, the result might be larger. We use GetBufferSize to check.
251
			0,  // ref duration, always 0 in shared mode.
252
			format_,
253
			nullptr
254
		);
255

256
		if (FAILED(hr)) {
257
			WARN_LOG(Log::Audio, "ERROR: Failed to initialize audio with all attempted buffer sizes\n");
258
			audioClient_.Reset();
259
			return false;
260
		}
261

262
		audioClient_->GetBufferSize(&reportedBufferSize_);
263
		actualPeriodFrames_ = reportedBufferSize_;  // we don't have a better estimate.
264
		audioClient_->SetEventHandle(audioEvent_);
265
		audioClient_->GetService(IID_PPV_ARGS(&renderClient_));
266

267
		if (createBuffer) {
268
			tempBuf_ = new float[reportedBufferSize_ * 2];
269
		}
270
	}
271

272
	latencyMode_ = latencyMode;
273

274
	Start();
275

276
	return true;
277
}
278

279
void WASAPIContext::Start() {
280
	running_ = true;
281
	audioThread_ = std::thread([this]() { AudioLoop(); });
282
}
283

284
void WASAPIContext::Stop() {
285
	running_ = false;
286
	if (audioClient_) audioClient_->Stop();
287
	if (audioEvent_) SetEvent(audioEvent_);
288
	if (audioThread_.joinable()) audioThread_.join();
289

290
	renderClient_.Reset();
291
	audioClient_.Reset();
292
	if (audioEvent_) {
293
		CloseHandle(audioEvent_);
294
		audioEvent_ = nullptr;
295
	}
296
	if (format_) {
297
		CoTaskMemFree(format_);
298
		format_ = nullptr;
299
	}
300
}
301

302
void WASAPIContext::FrameUpdate(bool allowAutoChange) {
303
	if (deviceId_.empty() && defaultDeviceChanged_ && allowAutoChange) {
304
		defaultDeviceChanged_ = false;
305
		Stop();
306
		Start();
307
	}
308
}
309

310
void WASAPIContext::AudioLoop() {
311
	DWORD taskID = 0;
312
	HANDLE mmcssHandle = nullptr;
313
	if (latencyMode_ == LatencyMode::Aggressive) {
314
		mmcssHandle = AvSetMmThreadCharacteristics(L"Pro Audio", &taskID);
315
	}
316

317
	UINT32 available;
318
	if (audioClient3_) {
319
		audioClient3_->Start();
320
		audioClient3_->GetBufferSize(&available);
321
	} else {
322
		audioClient_->Start();
323
		audioClient_->GetBufferSize(&available);
324
	}
325

326
	AudioFormat format = Classify(format_);
327
	const int nChannels = format_->nChannels;
328

329
	while (running_) {
330
		const DWORD waitResult = WaitForSingleObject(audioEvent_, INFINITE);
331
		if (waitResult != WAIT_OBJECT_0) {
332
			// Something bad happened.
333
			break;
334
		}
335

336
		UINT32 padding = 0;
337
		if (audioClient3_) {
338
			audioClient3_->GetCurrentPadding(&padding);
339
		} else {
340
			audioClient_->GetCurrentPadding(&padding);
341
		}
342

343
		const UINT32 framesToWrite = available - padding;
344
		BYTE* buffer = nullptr;
345
		if (framesToWrite > 0 && SUCCEEDED(renderClient_->GetBuffer(framesToWrite, &buffer))) {
346
			if (!tempBuf_) {
347
				// Mix directly to the output buffer, avoiding a copy.
348
				callback_(reinterpret_cast<float *>(buffer), framesToWrite, format_->nSamplesPerSec, userdata_);
349
			} else {
350
				// We decided previously that we need conversion, so mix to our temp buffer...
351
				callback_(tempBuf_, framesToWrite, format_->nSamplesPerSec, userdata_);
352
				// .. and convert according to format (we support multi-channel float and s16)
353
				if (format == AudioFormat::S16) {
354
					// Need to convert.
355
					s16 *dest = reinterpret_cast<s16 *>(buffer);
356
					for (UINT32 i = 0; i < framesToWrite; i++) {
357
						if (nChannels == 1) {
358
							// Maybe some bluetooth speakers? Mixdown.
359
							float sum = 0.5f * (tempBuf_[i * 2] + tempBuf_[i * 2 + 1]);
360
							dest[i] = ClampFloatToS16(sum);
361
						} else {
362
							dest[i * nChannels] = ClampFloatToS16(tempBuf_[i * 2]);
363
							dest[i * nChannels + 1] = ClampFloatToS16(tempBuf_[i * 2 + 1]);
364
							// Zero other channels.
365
							for (int j = 2; j < nChannels; j++) {
366
								dest[i * nChannels + j] = 0;
367
							}
368
						}
369
					}
370
				} else if (format == AudioFormat::Float) {
371
					// We have a non-2 number of channels (since we're in the tempBuf_ 'if'), so we contract/expand.
372
					float *dest = reinterpret_cast<float *>(buffer);
373
					for (UINT32 i = 0; i < framesToWrite; i++) {
374
						if (nChannels == 1) {
375
							// Maybe some bluetooth speakers? Mixdown.
376
							dest[i] = 0.5f * (tempBuf_[i * 2] + tempBuf_[i * 2 + 1]);
377
						} else {
378
							dest[i * nChannels] = tempBuf_[i * 2];
379
							dest[i * nChannels + 1] = tempBuf_[i * 2 + 1];
380
							// Zero other channels.
381
							for (int j = 2; j < nChannels; j++) {
382
								dest[i * nChannels + j] = 0;
383
							}
384
						}
385
					}
386
				}
387
			}
388

389
			renderClient_->ReleaseBuffer(framesToWrite, 0);
390
		}
391

392
		// In the old mode, we just estimate the "actualPeriodFrames_" from the framesToWrite.
393
		if (audioClient_ && framesToWrite < actualPeriodFrames_) {
394
			actualPeriodFrames_ = framesToWrite;
395
		}
396
	}
397

398
	if (audioClient3_) {
399
		audioClient3_->Stop();
400
	} else {
401
		audioClient_->Stop();
402
	}
403

404
	if (mmcssHandle) {
405
		AvRevertMmThreadCharacteristics(mmcssHandle);
406
	}
407
}
408

409
void WASAPIContext::DescribeOutputFormat(char *buffer, size_t bufferSize) const {
410
	const int numChannels = format_->nChannels;
411
	const int sampleBits = format_->wBitsPerSample;
412
	const int sampleRateHz = format_->nSamplesPerSec;
413
	const char *fmt = "N/A";
414
	if (format_->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
415
		const WAVEFORMATEXTENSIBLE *ex = (const WAVEFORMATEXTENSIBLE *)format_;
416
		if (ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
417
			fmt = "float";
418
		} else {
419
			fmt = "PCM";
420
		}
421
	} else {
422
		fmt = "PCM";  // probably
423
	}
424
	snprintf(buffer, bufferSize, "%d Hz %s %d-bit, %d ch%s", sampleRateHz, fmt, sampleBits, numChannels, audioClient3_ ? " (ac3)" : " (ac)");
425
}
426

427
Product

Resources

Company