Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Windows/WASAPIContext.cpp
3185 views
1
#include <windows.h>
2
#include <mmdeviceapi.h>
3
#include <functiondiscoverykeys_devpkey.h>
4
#include <audioclient.h>
5
#include <avrt.h>
6
#include <comdef.h>
7
#include <atomic>
8
#include <thread>
9
#include <vector>
10
#include <string_view>
11
#include <wrl/client.h>
12
13
#include "Common/Data/Encoding/Utf8.h"
14
#include "Common/Log.h"
15
#include "WASAPIContext.h"
16
17
using Microsoft::WRL::ComPtr;
18
19
// We must have one of these already...
20
static inline s16 ClampFloatToS16(float f) {
21
f *= 32768.0f;
22
if (f >= 32767) {
23
return 32767;
24
} else if (f < -32767) {
25
return -32767;
26
} else {
27
return (s16)(s32)f;
28
}
29
}
30
31
void BuildStereoFloatFormat(const WAVEFORMATEXTENSIBLE *original, WAVEFORMATEXTENSIBLE *output) {
32
// Zero‑init all fields first.
33
ZeroMemory(output, sizeof(WAVEFORMATEXTENSIBLE));
34
35
// Fill the WAVEFORMATEX base part.
36
output->Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
37
output->Format.nChannels = 2;
38
output->Format.nSamplesPerSec = original->Format.nSamplesPerSec;
39
output->Format.wBitsPerSample = 32; // 32‑bit float
40
output->Format.nBlockAlign = output->Format.nChannels *
41
output->Format.wBitsPerSample / 8;
42
output->Format.nAvgBytesPerSec = output->Format.nSamplesPerSec *
43
output->Format.nBlockAlign;
44
output->Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
45
46
// Fill the extensible fields.
47
output->Samples.wValidBitsPerSample = 32;
48
output->dwChannelMask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
49
output->SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
50
}
51
52
WASAPIContext::WASAPIContext() : notificationClient_(this) {
53
HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, IID_PPV_ARGS(&enumerator_));
54
if (FAILED(hr)) {
55
// Bad!
56
enumerator_ = nullptr;
57
return;
58
}
59
enumerator_->RegisterEndpointNotificationCallback(&notificationClient_);
60
}
61
62
WASAPIContext::~WASAPIContext() {
63
if (!enumerator_) {
64
// Nothing can have been happening.
65
return;
66
}
67
Stop();
68
enumerator_->UnregisterEndpointNotificationCallback(&notificationClient_);
69
delete[] tempBuf_;
70
}
71
72
WASAPIContext::AudioFormat WASAPIContext::Classify(const WAVEFORMATEX *format) {
73
if (format->wFormatTag == WAVE_FORMAT_PCM && format->wBitsPerSample == 2) {
74
return AudioFormat::S16;
75
} else if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
76
const WAVEFORMATEXTENSIBLE *ex = (const WAVEFORMATEXTENSIBLE *)format;
77
if (ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
78
return AudioFormat::Float;
79
}
80
} else {
81
WARN_LOG(Log::Audio, "Unhandled output format!");
82
}
83
return AudioFormat::Unhandled;
84
}
85
86
void WASAPIContext::EnumerateDevices(std::vector<AudioDeviceDesc> *output, bool captureDevices) {
87
ComPtr<IMMDeviceCollection> collection;
88
enumerator_->EnumAudioEndpoints(captureDevices ? eCapture : eRender, DEVICE_STATE_ACTIVE, &collection);
89
90
if (!collection) {
91
ERROR_LOG(Log::Audio, "Failed to enumerate devices");
92
return;
93
}
94
95
UINT count = 0;
96
collection->GetCount(&count);
97
98
for (UINT i = 0; i < count; ++i) {
99
ComPtr<IMMDevice> device;
100
collection->Item(i, &device);
101
102
ComPtr<IPropertyStore> props;
103
device->OpenPropertyStore(STGM_READ, &props);
104
105
PROPVARIANT nameProp;
106
PropVariantInit(&nameProp);
107
props->GetValue(PKEY_Device_FriendlyName, &nameProp);
108
109
LPWSTR id_str = 0;
110
if (SUCCEEDED(device->GetId(&id_str))) {
111
AudioDeviceDesc desc;
112
desc.name = ConvertWStringToUTF8(nameProp.pwszVal);
113
desc.uniqueId = ConvertWStringToUTF8(id_str);
114
output->push_back(desc);
115
CoTaskMemFree(id_str);
116
}
117
118
PropVariantClear(&nameProp);
119
}
120
}
121
122
bool WASAPIContext::InitOutputDevice(std::string_view uniqueId, LatencyMode latencyMode, bool *revertedToDefault) {
123
Stop();
124
125
*revertedToDefault = false;
126
127
ComPtr<IMMDevice> device;
128
if (uniqueId.empty()) {
129
// Use the default device.
130
if (FAILED(enumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device))) {
131
return false;
132
}
133
} else {
134
// Use whatever device.
135
std::wstring wId = ConvertUTF8ToWString(uniqueId);
136
if (FAILED(enumerator_->GetDevice(wId.c_str(), &device))) {
137
// Fallback to default device
138
INFO_LOG(Log::Audio, "Falling back to default device...\n");
139
*revertedToDefault = true;
140
if (FAILED(enumerator_->GetDefaultAudioEndpoint(eRender, eConsole, &device))) {
141
return false;
142
}
143
}
144
}
145
146
deviceId_ = uniqueId;
147
148
HRESULT hr = E_FAIL;
149
// Try IAudioClient3 first if not in "safe" mode. It's probably safe anyway, but still, let's use the legacy client as a safe fallback option.
150
if (false && latencyMode != LatencyMode::Safe) {
151
hr = device->Activate(__uuidof(IAudioClient3), CLSCTX_ALL, nullptr, (void**)&audioClient3_);
152
}
153
154
// Get rid of any old tempBuf_.
155
delete[] tempBuf_;
156
tempBuf_ = nullptr;
157
158
if (SUCCEEDED(hr)) {
159
audioClient3_->GetMixFormat(&format_);
160
// We only use AudioClient3 if we got the format we wanted (stereo float).
161
if (format_->nChannels != 2 || Classify(format_) != AudioFormat::Float) {
162
// Let's fall back to the old path. The docs seem to be wrong, if you try to create an
163
// AudioClient3 with low latency audio with AUTOCONVERTPCM, you get the error 0x88890021.
164
audioClient3_.Reset();
165
// Fall through to AudioClient creation below.
166
} else {
167
audioClient3_->GetSharedModeEnginePeriod(format_, &defaultPeriodFrames, &fundamentalPeriodFrames, &minPeriodFrames, &maxPeriodFrames);
168
169
INFO_LOG(Log::Audio, "default: %d fundamental: %d min: %d max: %d\n", (int)defaultPeriodFrames, (int)fundamentalPeriodFrames, (int)minPeriodFrames, (int)maxPeriodFrames);
170
INFO_LOG(Log::Audio, "initializing with %d frame period at %d Hz, meaning %0.1fms\n", (int)minPeriodFrames, (int)format_->nSamplesPerSec, FramesToMs(minPeriodFrames, format_->nSamplesPerSec));
171
172
audioEvent_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
173
HRESULT result = audioClient3_->InitializeSharedAudioStream(
174
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
175
minPeriodFrames,
176
format_,
177
nullptr
178
);
179
if (FAILED(result)) {
180
WARN_LOG(Log::Audio, "Error initializing AudioClient3 shared audio stream: %08lx", result);
181
audioClient3_.Reset();
182
return false;
183
}
184
actualPeriodFrames_ = minPeriodFrames;
185
186
audioClient3_->GetBufferSize(&reportedBufferSize_);
187
audioClient3_->SetEventHandle(audioEvent_);
188
audioClient3_->GetService(IID_PPV_ARGS(&renderClient_));
189
}
190
}
191
192
if (!audioClient3_) {
193
// Fallback to IAudioClient (older OS)
194
device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&audioClient_);
195
196
audioClient_->GetMixFormat(&format_);
197
198
// If there are too many channels, try asking for a 2-channel output format.
199
DWORD extraStreamFlags = 0;
200
const AudioFormat fmt = Classify(format_);
201
202
bool createBuffer = false;
203
if (fmt == AudioFormat::Float) {
204
if (format_->nChannels != 2) {
205
INFO_LOG(Log::Audio, "Got %d channels, asking for stereo instead", format_->nChannels);
206
WAVEFORMATEXTENSIBLE stereo;
207
BuildStereoFloatFormat((const WAVEFORMATEXTENSIBLE *)format_, &stereo);
208
209
WAVEFORMATEX *closestMatch = nullptr;
210
const HRESULT result = audioClient_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, (const WAVEFORMATEX *)&stereo, &closestMatch);
211
if (result == S_OK) {
212
// We got the format! Use it and set as current.
213
_dbg_assert_(!closestMatch);
214
format_ = (WAVEFORMATEX *)CoTaskMemAlloc(sizeof(WAVEFORMATEXTENSIBLE));
215
memcpy(format_, &stereo, sizeof(WAVEFORMATEX) + stereo.Format.cbSize);
216
extraStreamFlags = AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY;
217
INFO_LOG(Log::Audio, "Successfully asked for two channels");
218
} else if (result == S_FALSE) {
219
// We got another format. Meh, let's just use what we got.
220
if (closestMatch) {
221
WARN_LOG(Log::Audio, "Didn't get the format we wanted, but got: %d ch=%d", closestMatch->nSamplesPerSec, closestMatch->nChannels);
222
CoTaskMemFree(closestMatch);
223
} else {
224
WARN_LOG(Log::Audio, "Failed to fall back to two channels. Using workarounds.");
225
}
226
createBuffer = true;
227
} else {
228
WARN_LOG(Log::Audio, "Got other error %08x", result);
229
_dbg_assert_(!closestMatch);
230
}
231
} else {
232
// All good, nothing to convert.
233
}
234
} else {
235
// Some other format.
236
WARN_LOG(Log::Audio, "Format not float, applying conversion.");
237
createBuffer = true;
238
}
239
240
// Get engine period info
241
REFERENCE_TIME defaultPeriod = 0, minPeriod = 0;
242
audioClient_->GetDevicePeriod(&defaultPeriod, &minPeriod);
243
244
audioEvent_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
245
246
const REFERENCE_TIME duration = minPeriod;
247
HRESULT hr = audioClient_->Initialize(
248
AUDCLNT_SHAREMODE_SHARED,
249
AUDCLNT_STREAMFLAGS_EVENTCALLBACK | extraStreamFlags,
250
duration, // This is a minimum, the result might be larger. We use GetBufferSize to check.
251
0, // ref duration, always 0 in shared mode.
252
format_,
253
nullptr
254
);
255
256
if (FAILED(hr)) {
257
WARN_LOG(Log::Audio, "ERROR: Failed to initialize audio with all attempted buffer sizes\n");
258
audioClient_.Reset();
259
return false;
260
}
261
262
audioClient_->GetBufferSize(&reportedBufferSize_);
263
actualPeriodFrames_ = reportedBufferSize_; // we don't have a better estimate.
264
audioClient_->SetEventHandle(audioEvent_);
265
audioClient_->GetService(IID_PPV_ARGS(&renderClient_));
266
267
if (createBuffer) {
268
tempBuf_ = new float[reportedBufferSize_ * 2];
269
}
270
}
271
272
latencyMode_ = latencyMode;
273
274
Start();
275
276
return true;
277
}
278
279
void WASAPIContext::Start() {
280
running_ = true;
281
audioThread_ = std::thread([this]() { AudioLoop(); });
282
}
283
284
void WASAPIContext::Stop() {
285
running_ = false;
286
if (audioClient_) audioClient_->Stop();
287
if (audioEvent_) SetEvent(audioEvent_);
288
if (audioThread_.joinable()) audioThread_.join();
289
290
renderClient_.Reset();
291
audioClient_.Reset();
292
if (audioEvent_) {
293
CloseHandle(audioEvent_);
294
audioEvent_ = nullptr;
295
}
296
if (format_) {
297
CoTaskMemFree(format_);
298
format_ = nullptr;
299
}
300
}
301
302
void WASAPIContext::FrameUpdate(bool allowAutoChange) {
303
if (deviceId_.empty() && defaultDeviceChanged_ && allowAutoChange) {
304
defaultDeviceChanged_ = false;
305
Stop();
306
Start();
307
}
308
}
309
310
void WASAPIContext::AudioLoop() {
311
DWORD taskID = 0;
312
HANDLE mmcssHandle = nullptr;
313
if (latencyMode_ == LatencyMode::Aggressive) {
314
mmcssHandle = AvSetMmThreadCharacteristics(L"Pro Audio", &taskID);
315
}
316
317
UINT32 available;
318
if (audioClient3_) {
319
audioClient3_->Start();
320
audioClient3_->GetBufferSize(&available);
321
} else {
322
audioClient_->Start();
323
audioClient_->GetBufferSize(&available);
324
}
325
326
AudioFormat format = Classify(format_);
327
const int nChannels = format_->nChannels;
328
329
while (running_) {
330
const DWORD waitResult = WaitForSingleObject(audioEvent_, INFINITE);
331
if (waitResult != WAIT_OBJECT_0) {
332
// Something bad happened.
333
break;
334
}
335
336
UINT32 padding = 0;
337
if (audioClient3_) {
338
audioClient3_->GetCurrentPadding(&padding);
339
} else {
340
audioClient_->GetCurrentPadding(&padding);
341
}
342
343
const UINT32 framesToWrite = available - padding;
344
BYTE* buffer = nullptr;
345
if (framesToWrite > 0 && SUCCEEDED(renderClient_->GetBuffer(framesToWrite, &buffer))) {
346
if (!tempBuf_) {
347
// Mix directly to the output buffer, avoiding a copy.
348
callback_(reinterpret_cast<float *>(buffer), framesToWrite, format_->nSamplesPerSec, userdata_);
349
} else {
350
// We decided previously that we need conversion, so mix to our temp buffer...
351
callback_(tempBuf_, framesToWrite, format_->nSamplesPerSec, userdata_);
352
// .. and convert according to format (we support multi-channel float and s16)
353
if (format == AudioFormat::S16) {
354
// Need to convert.
355
s16 *dest = reinterpret_cast<s16 *>(buffer);
356
for (UINT32 i = 0; i < framesToWrite; i++) {
357
if (nChannels == 1) {
358
// Maybe some bluetooth speakers? Mixdown.
359
float sum = 0.5f * (tempBuf_[i * 2] + tempBuf_[i * 2 + 1]);
360
dest[i] = ClampFloatToS16(sum);
361
} else {
362
dest[i * nChannels] = ClampFloatToS16(tempBuf_[i * 2]);
363
dest[i * nChannels + 1] = ClampFloatToS16(tempBuf_[i * 2 + 1]);
364
// Zero other channels.
365
for (int j = 2; j < nChannels; j++) {
366
dest[i * nChannels + j] = 0;
367
}
368
}
369
}
370
} else if (format == AudioFormat::Float) {
371
// We have a non-2 number of channels (since we're in the tempBuf_ 'if'), so we contract/expand.
372
float *dest = reinterpret_cast<float *>(buffer);
373
for (UINT32 i = 0; i < framesToWrite; i++) {
374
if (nChannels == 1) {
375
// Maybe some bluetooth speakers? Mixdown.
376
dest[i] = 0.5f * (tempBuf_[i * 2] + tempBuf_[i * 2 + 1]);
377
} else {
378
dest[i * nChannels] = tempBuf_[i * 2];
379
dest[i * nChannels + 1] = tempBuf_[i * 2 + 1];
380
// Zero other channels.
381
for (int j = 2; j < nChannels; j++) {
382
dest[i * nChannels + j] = 0;
383
}
384
}
385
}
386
}
387
}
388
389
renderClient_->ReleaseBuffer(framesToWrite, 0);
390
}
391
392
// In the old mode, we just estimate the "actualPeriodFrames_" from the framesToWrite.
393
if (audioClient_ && framesToWrite < actualPeriodFrames_) {
394
actualPeriodFrames_ = framesToWrite;
395
}
396
}
397
398
if (audioClient3_) {
399
audioClient3_->Stop();
400
} else {
401
audioClient_->Stop();
402
}
403
404
if (mmcssHandle) {
405
AvRevertMmThreadCharacteristics(mmcssHandle);
406
}
407
}
408
409
void WASAPIContext::DescribeOutputFormat(char *buffer, size_t bufferSize) const {
410
const int numChannels = format_->nChannels;
411
const int sampleBits = format_->wBitsPerSample;
412
const int sampleRateHz = format_->nSamplesPerSec;
413
const char *fmt = "N/A";
414
if (format_->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
415
const WAVEFORMATEXTENSIBLE *ex = (const WAVEFORMATEXTENSIBLE *)format_;
416
if (ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
417
fmt = "float";
418
} else {
419
fmt = "PCM";
420
}
421
} else {
422
fmt = "PCM"; // probably
423
}
424
snprintf(buffer, bufferSize, "%d Hz %s %d-bit, %d ch%s", sampleRateHz, fmt, sampleBits, numChannels, audioClient3_ ? " (ac3)" : " (ac)");
425
}
426
427