Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
/*
2
* Copyright (c) 2012 Justin Ruggles <[email protected]>
3
*
4
* Triangular with Noise Shaping is based on opusfile.
5
* Copyright (c) 1994-2012 by the Xiph.Org Foundation and contributors
6
*
7
* This file is part of FFmpeg.
8
*
9
* FFmpeg is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
13
*
14
* FFmpeg is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
18
*
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with FFmpeg; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
*/
23
24
/**
25
* @file
26
* Dithered Audio Sample Quantization
27
*
28
* Converts from dbl, flt, or s32 to s16 using dithering.
29
*/
30
31
#include <math.h>
32
#include <stdint.h>
33
34
#include "libavutil/attributes.h"
35
#include "libavutil/common.h"
36
#include "libavutil/lfg.h"
37
#include "libavutil/mem.h"
38
#include "libavutil/samplefmt.h"
39
#include "audio_convert.h"
40
#include "dither.h"
41
#include "internal.h"
42
43
typedef struct DitherState {
44
int mute;
45
unsigned int seed;
46
AVLFG lfg;
47
float *noise_buf;
48
int noise_buf_size;
49
int noise_buf_ptr;
50
float dither_a[4];
51
float dither_b[4];
52
} DitherState;
53
54
struct DitherContext {
55
DitherDSPContext ddsp;
56
enum AVResampleDitherMethod method;
57
int apply_map;
58
ChannelMapInfo *ch_map_info;
59
60
int mute_dither_threshold; // threshold for disabling dither
61
int mute_reset_threshold; // threshold for resetting noise shaping
62
const float *ns_coef_b; // noise shaping coeffs
63
const float *ns_coef_a; // noise shaping coeffs
64
65
int channels;
66
DitherState *state; // dither states for each channel
67
68
AudioData *flt_data; // input data in fltp
69
AudioData *s16_data; // dithered output in s16p
70
AudioConvert *ac_in; // converter for input to fltp
71
AudioConvert *ac_out; // converter for s16p to s16 (if needed)
72
73
void (*quantize)(int16_t *dst, const float *src, float *dither, int len);
74
int samples_align;
75
};
76
77
/* mute threshold, in seconds */
78
#define MUTE_THRESHOLD_SEC 0.000333
79
80
/* scale factor for 16-bit output.
81
The signal is attenuated slightly to avoid clipping */
82
#define S16_SCALE 32753.0f
83
84
/* scale to convert lfg from INT_MIN/INT_MAX to -0.5/0.5 */
85
#define LFG_SCALE (1.0f / (2.0f * INT32_MAX))
86
87
/* noise shaping coefficients */
88
89
static const float ns_48_coef_b[4] = {
90
2.2374f, -0.7339f, -0.1251f, -0.6033f
91
};
92
93
static const float ns_48_coef_a[4] = {
94
0.9030f, 0.0116f, -0.5853f, -0.2571f
95
};
96
97
static const float ns_44_coef_b[4] = {
98
2.2061f, -0.4707f, -0.2534f, -0.6213f
99
};
100
101
static const float ns_44_coef_a[4] = {
102
1.0587f, 0.0676f, -0.6054f, -0.2738f
103
};
104
105
static void dither_int_to_float_rectangular_c(float *dst, int *src, int len)
106
{
107
int i;
108
for (i = 0; i < len; i++)
109
dst[i] = src[i] * LFG_SCALE;
110
}
111
112
static void dither_int_to_float_triangular_c(float *dst, int *src0, int len)
113
{
114
int i;
115
int *src1 = src0 + len;
116
117
for (i = 0; i < len; i++) {
118
float r = src0[i] * LFG_SCALE;
119
r += src1[i] * LFG_SCALE;
120
dst[i] = r;
121
}
122
}
123
124
static void quantize_c(int16_t *dst, const float *src, float *dither, int len)
125
{
126
int i;
127
for (i = 0; i < len; i++)
128
dst[i] = av_clip_int16(lrintf(src[i] * S16_SCALE + dither[i]));
129
}
130
131
#define SQRT_1_6 0.40824829046386301723f
132
133
static void dither_highpass_filter(float *src, int len)
134
{
135
int i;
136
137
/* filter is from libswresample in FFmpeg */
138
for (i = 0; i < len - 2; i++)
139
src[i] = (-src[i] + 2 * src[i + 1] - src[i + 2]) * SQRT_1_6;
140
}
141
142
static int generate_dither_noise(DitherContext *c, DitherState *state,
143
int min_samples)
144
{
145
int i;
146
int nb_samples = FFALIGN(min_samples, 16) + 16;
147
int buf_samples = nb_samples *
148
(c->method == AV_RESAMPLE_DITHER_RECTANGULAR ? 1 : 2);
149
unsigned int *noise_buf_ui;
150
151
av_freep(&state->noise_buf);
152
state->noise_buf_size = state->noise_buf_ptr = 0;
153
154
state->noise_buf = av_malloc(buf_samples * sizeof(*state->noise_buf));
155
if (!state->noise_buf)
156
return AVERROR(ENOMEM);
157
state->noise_buf_size = FFALIGN(min_samples, 16);
158
noise_buf_ui = (unsigned int *)state->noise_buf;
159
160
av_lfg_init(&state->lfg, state->seed);
161
for (i = 0; i < buf_samples; i++)
162
noise_buf_ui[i] = av_lfg_get(&state->lfg);
163
164
c->ddsp.dither_int_to_float(state->noise_buf, noise_buf_ui, nb_samples);
165
166
if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_HP)
167
dither_highpass_filter(state->noise_buf, nb_samples);
168
169
return 0;
170
}
171
172
static void quantize_triangular_ns(DitherContext *c, DitherState *state,
173
int16_t *dst, const float *src,
174
int nb_samples)
175
{
176
int i, j;
177
float *dither = &state->noise_buf[state->noise_buf_ptr];
178
179
if (state->mute > c->mute_reset_threshold)
180
memset(state->dither_a, 0, sizeof(state->dither_a));
181
182
for (i = 0; i < nb_samples; i++) {
183
float err = 0;
184
float sample = src[i] * S16_SCALE;
185
186
for (j = 0; j < 4; j++) {
187
err += c->ns_coef_b[j] * state->dither_b[j] -
188
c->ns_coef_a[j] * state->dither_a[j];
189
}
190
for (j = 3; j > 0; j--) {
191
state->dither_a[j] = state->dither_a[j - 1];
192
state->dither_b[j] = state->dither_b[j - 1];
193
}
194
state->dither_a[0] = err;
195
sample -= err;
196
197
if (state->mute > c->mute_dither_threshold) {
198
dst[i] = av_clip_int16(lrintf(sample));
199
state->dither_b[0] = 0;
200
} else {
201
dst[i] = av_clip_int16(lrintf(sample + dither[i]));
202
state->dither_b[0] = av_clipf(dst[i] - sample, -1.5f, 1.5f);
203
}
204
205
state->mute++;
206
if (src[i])
207
state->mute = 0;
208
}
209
}
210
211
static int convert_samples(DitherContext *c, int16_t **dst, float * const *src,
212
int channels, int nb_samples)
213
{
214
int ch, ret;
215
int aligned_samples = FFALIGN(nb_samples, 16);
216
217
for (ch = 0; ch < channels; ch++) {
218
DitherState *state = &c->state[ch];
219
220
if (state->noise_buf_size < aligned_samples) {
221
ret = generate_dither_noise(c, state, nb_samples);
222
if (ret < 0)
223
return ret;
224
} else if (state->noise_buf_size - state->noise_buf_ptr < aligned_samples) {
225
state->noise_buf_ptr = 0;
226
}
227
228
if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
229
quantize_triangular_ns(c, state, dst[ch], src[ch], nb_samples);
230
} else {
231
c->quantize(dst[ch], src[ch],
232
&state->noise_buf[state->noise_buf_ptr],
233
FFALIGN(nb_samples, c->samples_align));
234
}
235
236
state->noise_buf_ptr += aligned_samples;
237
}
238
239
return 0;
240
}
241
242
int ff_convert_dither(DitherContext *c, AudioData *dst, AudioData *src)
243
{
244
int ret;
245
AudioData *flt_data;
246
247
/* output directly to dst if it is planar */
248
if (dst->sample_fmt == AV_SAMPLE_FMT_S16P)
249
c->s16_data = dst;
250
else {
251
/* make sure s16_data is large enough for the output */
252
ret = ff_audio_data_realloc(c->s16_data, src->nb_samples);
253
if (ret < 0)
254
return ret;
255
}
256
257
if (src->sample_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
258
/* make sure flt_data is large enough for the input */
259
ret = ff_audio_data_realloc(c->flt_data, src->nb_samples);
260
if (ret < 0)
261
return ret;
262
flt_data = c->flt_data;
263
}
264
265
if (src->sample_fmt != AV_SAMPLE_FMT_FLTP) {
266
/* convert input samples to fltp and scale to s16 range */
267
ret = ff_audio_convert(c->ac_in, flt_data, src);
268
if (ret < 0)
269
return ret;
270
} else if (c->apply_map) {
271
ret = ff_audio_data_copy(flt_data, src, c->ch_map_info);
272
if (ret < 0)
273
return ret;
274
} else {
275
flt_data = src;
276
}
277
278
/* check alignment and padding constraints */
279
if (c->method != AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
280
int ptr_align = FFMIN(flt_data->ptr_align, c->s16_data->ptr_align);
281
int samples_align = FFMIN(flt_data->samples_align, c->s16_data->samples_align);
282
int aligned_len = FFALIGN(src->nb_samples, c->ddsp.samples_align);
283
284
if (!(ptr_align % c->ddsp.ptr_align) && samples_align >= aligned_len) {
285
c->quantize = c->ddsp.quantize;
286
c->samples_align = c->ddsp.samples_align;
287
} else {
288
c->quantize = quantize_c;
289
c->samples_align = 1;
290
}
291
}
292
293
ret = convert_samples(c, (int16_t **)c->s16_data->data,
294
(float * const *)flt_data->data, src->channels,
295
src->nb_samples);
296
if (ret < 0)
297
return ret;
298
299
c->s16_data->nb_samples = src->nb_samples;
300
301
/* interleave output to dst if needed */
302
if (dst->sample_fmt == AV_SAMPLE_FMT_S16) {
303
ret = ff_audio_convert(c->ac_out, dst, c->s16_data);
304
if (ret < 0)
305
return ret;
306
} else
307
c->s16_data = NULL;
308
309
return 0;
310
}
311
312
void ff_dither_free(DitherContext **cp)
313
{
314
DitherContext *c = *cp;
315
int ch;
316
317
if (!c)
318
return;
319
ff_audio_data_free(&c->flt_data);
320
ff_audio_data_free(&c->s16_data);
321
ff_audio_convert_free(&c->ac_in);
322
ff_audio_convert_free(&c->ac_out);
323
for (ch = 0; ch < c->channels; ch++)
324
av_free(c->state[ch].noise_buf);
325
av_free(c->state);
326
av_freep(cp);
327
}
328
329
static av_cold void dither_init(DitherDSPContext *ddsp,
330
enum AVResampleDitherMethod method)
331
{
332
ddsp->quantize = quantize_c;
333
ddsp->ptr_align = 1;
334
ddsp->samples_align = 1;
335
336
if (method == AV_RESAMPLE_DITHER_RECTANGULAR)
337
ddsp->dither_int_to_float = dither_int_to_float_rectangular_c;
338
else
339
ddsp->dither_int_to_float = dither_int_to_float_triangular_c;
340
341
if (ARCH_X86)
342
ff_dither_init_x86(ddsp, method);
343
}
344
345
DitherContext *ff_dither_alloc(AVAudioResampleContext *avr,
346
enum AVSampleFormat out_fmt,
347
enum AVSampleFormat in_fmt,
348
int channels, int sample_rate, int apply_map)
349
{
350
AVLFG seed_gen;
351
DitherContext *c;
352
int ch;
353
354
if (av_get_packed_sample_fmt(out_fmt) != AV_SAMPLE_FMT_S16 ||
355
av_get_bytes_per_sample(in_fmt) <= 2) {
356
av_log(avr, AV_LOG_ERROR, "dithering %s to %s is not supported\n",
357
av_get_sample_fmt_name(in_fmt), av_get_sample_fmt_name(out_fmt));
358
return NULL;
359
}
360
361
c = av_mallocz(sizeof(*c));
362
if (!c)
363
return NULL;
364
365
c->apply_map = apply_map;
366
if (apply_map)
367
c->ch_map_info = &avr->ch_map_info;
368
369
if (avr->dither_method == AV_RESAMPLE_DITHER_TRIANGULAR_NS &&
370
sample_rate != 48000 && sample_rate != 44100) {
371
av_log(avr, AV_LOG_WARNING, "sample rate must be 48000 or 44100 Hz "
372
"for triangular_ns dither. using triangular_hp instead.\n");
373
avr->dither_method = AV_RESAMPLE_DITHER_TRIANGULAR_HP;
374
}
375
c->method = avr->dither_method;
376
dither_init(&c->ddsp, c->method);
377
378
if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
379
if (sample_rate == 48000) {
380
c->ns_coef_b = ns_48_coef_b;
381
c->ns_coef_a = ns_48_coef_a;
382
} else {
383
c->ns_coef_b = ns_44_coef_b;
384
c->ns_coef_a = ns_44_coef_a;
385
}
386
}
387
388
/* Either s16 or s16p output format is allowed, but s16p is used
389
internally, so we need to use a temp buffer and interleave if the output
390
format is s16 */
391
if (out_fmt != AV_SAMPLE_FMT_S16P) {
392
c->s16_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_S16P,
393
"dither s16 buffer");
394
if (!c->s16_data)
395
goto fail;
396
397
c->ac_out = ff_audio_convert_alloc(avr, out_fmt, AV_SAMPLE_FMT_S16P,
398
channels, sample_rate, 0);
399
if (!c->ac_out)
400
goto fail;
401
}
402
403
if (in_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
404
c->flt_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_FLTP,
405
"dither flt buffer");
406
if (!c->flt_data)
407
goto fail;
408
}
409
if (in_fmt != AV_SAMPLE_FMT_FLTP) {
410
c->ac_in = ff_audio_convert_alloc(avr, AV_SAMPLE_FMT_FLTP, in_fmt,
411
channels, sample_rate, c->apply_map);
412
if (!c->ac_in)
413
goto fail;
414
}
415
416
c->state = av_mallocz(channels * sizeof(*c->state));
417
if (!c->state)
418
goto fail;
419
c->channels = channels;
420
421
/* calculate thresholds for turning off dithering during periods of
422
silence to avoid replacing digital silence with quiet dither noise */
423
c->mute_dither_threshold = lrintf(sample_rate * MUTE_THRESHOLD_SEC);
424
c->mute_reset_threshold = c->mute_dither_threshold * 4;
425
426
/* initialize dither states */
427
av_lfg_init(&seed_gen, 0xC0FFEE);
428
for (ch = 0; ch < channels; ch++) {
429
DitherState *state = &c->state[ch];
430
state->mute = c->mute_reset_threshold + 1;
431
state->seed = av_lfg_get(&seed_gen);
432
generate_dither_noise(c, state, FFMAX(32768, sample_rate / 2));
433
}
434
435
return c;
436
437
fail:
438
ff_dither_free(&c);
439
return NULL;
440
}
441
442