CoCalc -- af

05. Matplotlib / ffmpeg-3.0 / libavfilter / af_amix.c
⁵⁸⁷²⁶ views
1
/*
2
 * Audio Mix Filter
3
 * Copyright (c) 2012 Justin Ruggles <[email protected]>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

22
/**
23
 * @file
24
 * Audio Mix Filter
25
 *
26
 * Mixes audio from multiple sources into a single output. The channel layout,
27
 * sample rate, and sample format will be the same for all inputs and the
28
 * output.
29
 */
30

31
#include "libavutil/attributes.h"
32
#include "libavutil/audio_fifo.h"
33
#include "libavutil/avassert.h"
34
#include "libavutil/avstring.h"
35
#include "libavutil/channel_layout.h"
36
#include "libavutil/common.h"
37
#include "libavutil/float_dsp.h"
38
#include "libavutil/mathematics.h"
39
#include "libavutil/opt.h"
40
#include "libavutil/samplefmt.h"
41

42
#include "audio.h"
43
#include "avfilter.h"
44
#include "formats.h"
45
#include "internal.h"
46

47
#define INPUT_ON       1    /**< input is active */
48
#define INPUT_EOF      2    /**< input has reached EOF (may still be active) */
49

50
#define DURATION_LONGEST  0
51
#define DURATION_SHORTEST 1
52
#define DURATION_FIRST    2
53

54

55
typedef struct FrameInfo {
56
    int nb_samples;
57
    int64_t pts;
58
    struct FrameInfo *next;
59
} FrameInfo;
60

61
/**
62
 * Linked list used to store timestamps and frame sizes of all frames in the
63
 * FIFO for the first input.
64
 *
65
 * This is needed to keep timestamps synchronized for the case where multiple
66
 * input frames are pushed to the filter for processing before a frame is
67
 * requested by the output link.
68
 */
69
typedef struct FrameList {
70
    int nb_frames;
71
    int nb_samples;
72
    FrameInfo *list;
73
    FrameInfo *end;
74
} FrameList;
75

76
static void frame_list_clear(FrameList *frame_list)
77
{
78
    if (frame_list) {
79
        while (frame_list->list) {
80
            FrameInfo *info = frame_list->list;
81
            frame_list->list = info->next;
82
            av_free(info);
83
        }
84
        frame_list->nb_frames  = 0;
85
        frame_list->nb_samples = 0;
86
        frame_list->end        = NULL;
87
    }
88
}
89

90
static int frame_list_next_frame_size(FrameList *frame_list)
91
{
92
    if (!frame_list->list)
93
        return 0;
94
    return frame_list->list->nb_samples;
95
}
96

97
static int64_t frame_list_next_pts(FrameList *frame_list)
98
{
99
    if (!frame_list->list)
100
        return AV_NOPTS_VALUE;
101
    return frame_list->list->pts;
102
}
103

104
static void frame_list_remove_samples(FrameList *frame_list, int nb_samples)
105
{
106
    if (nb_samples >= frame_list->nb_samples) {
107
        frame_list_clear(frame_list);
108
    } else {
109
        int samples = nb_samples;
110
        while (samples > 0) {
111
            FrameInfo *info = frame_list->list;
112
            av_assert0(info);
113
            if (info->nb_samples <= samples) {
114
                samples -= info->nb_samples;
115
                frame_list->list = info->next;
116
                if (!frame_list->list)
117
                    frame_list->end = NULL;
118
                frame_list->nb_frames--;
119
                frame_list->nb_samples -= info->nb_samples;
120
                av_free(info);
121
            } else {
122
                info->nb_samples       -= samples;
123
                info->pts              += samples;
124
                frame_list->nb_samples -= samples;
125
                samples = 0;
126
            }
127
        }
128
    }
129
}
130

131
static int frame_list_add_frame(FrameList *frame_list, int nb_samples, int64_t pts)
132
{
133
    FrameInfo *info = av_malloc(sizeof(*info));
134
    if (!info)
135
        return AVERROR(ENOMEM);
136
    info->nb_samples = nb_samples;
137
    info->pts        = pts;
138
    info->next       = NULL;
139

140
    if (!frame_list->list) {
141
        frame_list->list = info;
142
        frame_list->end  = info;
143
    } else {
144
        av_assert0(frame_list->end);
145
        frame_list->end->next = info;
146
        frame_list->end       = info;
147
    }
148
    frame_list->nb_frames++;
149
    frame_list->nb_samples += nb_samples;
150

151
    return 0;
152
}
153

154

155
typedef struct MixContext {
156
    const AVClass *class;       /**< class for AVOptions */
157
    AVFloatDSPContext *fdsp;
158

159
    int nb_inputs;              /**< number of inputs */
160
    int active_inputs;          /**< number of input currently active */
161
    int duration_mode;          /**< mode for determining duration */
162
    float dropout_transition;   /**< transition time when an input drops out */
163

164
    int nb_channels;            /**< number of channels */
165
    int sample_rate;            /**< sample rate */
166
    int planar;
167
    AVAudioFifo **fifos;        /**< audio fifo for each input */
168
    uint8_t *input_state;       /**< current state of each input */
169
    float *input_scale;         /**< mixing scale factor for each input */
170
    float scale_norm;           /**< normalization factor for all inputs */
171
    int64_t next_pts;           /**< calculated pts for next output frame */
172
    FrameList *frame_list;      /**< list of frame info for the first input */
173
} MixContext;
174

175
#define OFFSET(x) offsetof(MixContext, x)
176
#define A AV_OPT_FLAG_AUDIO_PARAM
177
#define F AV_OPT_FLAG_FILTERING_PARAM
178
static const AVOption amix_options[] = {
179
    { "inputs", "Number of inputs.",
180
            OFFSET(nb_inputs), AV_OPT_TYPE_INT, { .i64 = 2 }, 1, 32, A|F },
181
    { "duration", "How to determine the end-of-stream.",
182
            OFFSET(duration_mode), AV_OPT_TYPE_INT, { .i64 = DURATION_LONGEST }, 0,  2, A|F, "duration" },
183
        { "longest",  "Duration of longest input.",  0, AV_OPT_TYPE_CONST, { .i64 = DURATION_LONGEST  }, INT_MIN, INT_MAX, A|F, "duration" },
184
        { "shortest", "Duration of shortest input.", 0, AV_OPT_TYPE_CONST, { .i64 = DURATION_SHORTEST }, INT_MIN, INT_MAX, A|F, "duration" },
185
        { "first",    "Duration of first input.",    0, AV_OPT_TYPE_CONST, { .i64 = DURATION_FIRST    }, INT_MIN, INT_MAX, A|F, "duration" },
186
    { "dropout_transition", "Transition time, in seconds, for volume "
187
                            "renormalization when an input stream ends.",
188
            OFFSET(dropout_transition), AV_OPT_TYPE_FLOAT, { .dbl = 2.0 }, 0, INT_MAX, A|F },
189
    { NULL }
190
};
191

192
AVFILTER_DEFINE_CLASS(amix);
193

194
/**
195
 * Update the scaling factors to apply to each input during mixing.
196
 *
197
 * This balances the full volume range between active inputs and handles
198
 * volume transitions when EOF is encountered on an input but mixing continues
199
 * with the remaining inputs.
200
 */
201
static void calculate_scales(MixContext *s, int nb_samples)
202
{
203
    int i;
204

205
    if (s->scale_norm > s->active_inputs) {
206
        s->scale_norm -= nb_samples / (s->dropout_transition * s->sample_rate);
207
        s->scale_norm = FFMAX(s->scale_norm, s->active_inputs);
208
    }
209

210
    for (i = 0; i < s->nb_inputs; i++) {
211
        if (s->input_state[i] & INPUT_ON)
212
            s->input_scale[i] = 1.0f / s->scale_norm;
213
        else
214
            s->input_scale[i] = 0.0f;
215
    }
216
}
217

218
static int config_output(AVFilterLink *outlink)
219
{
220
    AVFilterContext *ctx = outlink->src;
221
    MixContext *s      = ctx->priv;
222
    int i;
223
    char buf[64];
224

225
    s->planar          = av_sample_fmt_is_planar(outlink->format);
226
    s->sample_rate     = outlink->sample_rate;
227
    outlink->time_base = (AVRational){ 1, outlink->sample_rate };
228
    s->next_pts        = AV_NOPTS_VALUE;
229

230
    s->frame_list = av_mallocz(sizeof(*s->frame_list));
231
    if (!s->frame_list)
232
        return AVERROR(ENOMEM);
233

234
    s->fifos = av_mallocz_array(s->nb_inputs, sizeof(*s->fifos));
235
    if (!s->fifos)
236
        return AVERROR(ENOMEM);
237

238
    s->nb_channels = av_get_channel_layout_nb_channels(outlink->channel_layout);
239
    for (i = 0; i < s->nb_inputs; i++) {
240
        s->fifos[i] = av_audio_fifo_alloc(outlink->format, s->nb_channels, 1024);
241
        if (!s->fifos[i])
242
            return AVERROR(ENOMEM);
243
    }
244

245
    s->input_state = av_malloc(s->nb_inputs);
246
    if (!s->input_state)
247
        return AVERROR(ENOMEM);
248
    memset(s->input_state, INPUT_ON, s->nb_inputs);
249
    s->active_inputs = s->nb_inputs;
250

251
    s->input_scale = av_mallocz_array(s->nb_inputs, sizeof(*s->input_scale));
252
    if (!s->input_scale)
253
        return AVERROR(ENOMEM);
254
    s->scale_norm = s->active_inputs;
255
    calculate_scales(s, 0);
256

257
    av_get_channel_layout_string(buf, sizeof(buf), -1, outlink->channel_layout);
258

259
    av_log(ctx, AV_LOG_VERBOSE,
260
           "inputs:%d fmt:%s srate:%d cl:%s\n", s->nb_inputs,
261
           av_get_sample_fmt_name(outlink->format), outlink->sample_rate, buf);
262

263
    return 0;
264
}
265

266
static int calc_active_inputs(MixContext *s);
267

268
/**
269
 * Read samples from the input FIFOs, mix, and write to the output link.
270
 */
271
static int output_frame(AVFilterLink *outlink)
272
{
273
    AVFilterContext *ctx = outlink->src;
274
    MixContext      *s = ctx->priv;
275
    AVFrame *out_buf, *in_buf;
276
    int nb_samples, ns, ret, i;
277

278
    ret = calc_active_inputs(s);
279
    if (ret < 0)
280
        return ret;
281

282
    if (s->input_state[0] & INPUT_ON) {
283
        /* first input live: use the corresponding frame size */
284
        nb_samples = frame_list_next_frame_size(s->frame_list);
285
        for (i = 1; i < s->nb_inputs; i++) {
286
            if (s->input_state[i] & INPUT_ON) {
287
                ns = av_audio_fifo_size(s->fifos[i]);
288
                if (ns < nb_samples) {
289
                    if (!(s->input_state[i] & INPUT_EOF))
290
                        /* unclosed input with not enough samples */
291
                        return 0;
292
                    /* closed input to drain */
293
                    nb_samples = ns;
294
                }
295
            }
296
        }
297
    } else {
298
        /* first input closed: use the available samples */
299
        nb_samples = INT_MAX;
300
        for (i = 1; i < s->nb_inputs; i++) {
301
            if (s->input_state[i] & INPUT_ON) {
302
                ns = av_audio_fifo_size(s->fifos[i]);
303
                nb_samples = FFMIN(nb_samples, ns);
304
            }
305
        }
306
        if (nb_samples == INT_MAX)
307
            return AVERROR_EOF;
308
    }
309

310
    s->next_pts = frame_list_next_pts(s->frame_list);
311
    frame_list_remove_samples(s->frame_list, nb_samples);
312

313
    calculate_scales(s, nb_samples);
314

315
    out_buf = ff_get_audio_buffer(outlink, nb_samples);
316
    if (!out_buf)
317
        return AVERROR(ENOMEM);
318

319
    in_buf = ff_get_audio_buffer(outlink, nb_samples);
320
    if (!in_buf) {
321
        av_frame_free(&out_buf);
322
        return AVERROR(ENOMEM);
323
    }
324

325
    for (i = 0; i < s->nb_inputs; i++) {
326
        if (s->input_state[i] & INPUT_ON) {
327
            int planes, plane_size, p;
328

329
            av_audio_fifo_read(s->fifos[i], (void **)in_buf->extended_data,
330
                               nb_samples);
331

332
            planes     = s->planar ? s->nb_channels : 1;
333
            plane_size = nb_samples * (s->planar ? 1 : s->nb_channels);
334
            plane_size = FFALIGN(plane_size, 16);
335

336
            for (p = 0; p < planes; p++) {
337
                s->fdsp->vector_fmac_scalar((float *)out_buf->extended_data[p],
338
                                           (float *) in_buf->extended_data[p],
339
                                           s->input_scale[i], plane_size);
340
            }
341
        }
342
    }
343
    av_frame_free(&in_buf);
344

345
    out_buf->pts = s->next_pts;
346
    if (s->next_pts != AV_NOPTS_VALUE)
347
        s->next_pts += nb_samples;
348

349
    return ff_filter_frame(outlink, out_buf);
350
}
351

352
/**
353
 * Requests a frame, if needed, from each input link other than the first.
354
 */
355
static int request_samples(AVFilterContext *ctx, int min_samples)
356
{
357
    MixContext *s = ctx->priv;
358
    int i, ret;
359

360
    av_assert0(s->nb_inputs > 1);
361

362
    for (i = 1; i < s->nb_inputs; i++) {
363
        ret = 0;
364
        if (!(s->input_state[i] & INPUT_ON))
365
            continue;
366
        if (av_audio_fifo_size(s->fifos[i]) >= min_samples)
367
            continue;
368
        ret = ff_request_frame(ctx->inputs[i]);
369
        if (ret == AVERROR_EOF) {
370
            s->input_state[i] |= INPUT_EOF;
371
            if (av_audio_fifo_size(s->fifos[i]) == 0) {
372
                s->input_state[i] = 0;
373
                continue;
374
            }
375
        } else if (ret < 0)
376
            return ret;
377
    }
378
    return output_frame(ctx->outputs[0]);
379
}
380

381
/**
382
 * Calculates the number of active inputs and determines EOF based on the
383
 * duration option.
384
 *
385
 * @return 0 if mixing should continue, or AVERROR_EOF if mixing should stop.
386
 */
387
static int calc_active_inputs(MixContext *s)
388
{
389
    int i;
390
    int active_inputs = 0;
391
    for (i = 0; i < s->nb_inputs; i++)
392
        active_inputs += !!(s->input_state[i] & INPUT_ON);
393
    s->active_inputs = active_inputs;
394

395
    if (!active_inputs ||
396
        (s->duration_mode == DURATION_FIRST && !(s->input_state[0] & INPUT_ON)) ||
397
        (s->duration_mode == DURATION_SHORTEST && active_inputs != s->nb_inputs))
398
        return AVERROR_EOF;
399
    return 0;
400
}
401

402
static int request_frame(AVFilterLink *outlink)
403
{
404
    AVFilterContext *ctx = outlink->src;
405
    MixContext      *s = ctx->priv;
406
    int ret;
407
    int wanted_samples;
408

409
    ret = calc_active_inputs(s);
410
    if (ret < 0)
411
        return ret;
412

413
    if (!(s->input_state[0] & INPUT_ON))
414
        return request_samples(ctx, 1);
415

416
    if (s->frame_list->nb_frames == 0) {
417
        ret = ff_request_frame(ctx->inputs[0]);
418
        if (ret == AVERROR_EOF) {
419
            s->input_state[0] = 0;
420
            if (s->nb_inputs == 1)
421
                return AVERROR_EOF;
422
            return output_frame(ctx->outputs[0]);
423
        }
424
        return ret;
425
    }
426
    av_assert0(s->frame_list->nb_frames > 0);
427

428
    wanted_samples = frame_list_next_frame_size(s->frame_list);
429

430
    return request_samples(ctx, wanted_samples);
431
}
432

433
static int filter_frame(AVFilterLink *inlink, AVFrame *buf)
434
{
435
    AVFilterContext  *ctx = inlink->dst;
436
    MixContext       *s = ctx->priv;
437
    AVFilterLink *outlink = ctx->outputs[0];
438
    int i, ret = 0;
439

440
    for (i = 0; i < ctx->nb_inputs; i++)
441
        if (ctx->inputs[i] == inlink)
442
            break;
443
    if (i >= ctx->nb_inputs) {
444
        av_log(ctx, AV_LOG_ERROR, "unknown input link\n");
445
        ret = AVERROR(EINVAL);
446
        goto fail;
447
    }
448

449
    if (i == 0) {
450
        int64_t pts = av_rescale_q(buf->pts, inlink->time_base,
451
                                   outlink->time_base);
452
        ret = frame_list_add_frame(s->frame_list, buf->nb_samples, pts);
453
        if (ret < 0)
454
            goto fail;
455
    }
456

457
    ret = av_audio_fifo_write(s->fifos[i], (void **)buf->extended_data,
458
                              buf->nb_samples);
459

460
    av_frame_free(&buf);
461
    return output_frame(outlink);
462

463
fail:
464
    av_frame_free(&buf);
465

466
    return ret;
467
}
468

469
static av_cold int init(AVFilterContext *ctx)
470
{
471
    MixContext *s = ctx->priv;
472
    int i;
473

474
    for (i = 0; i < s->nb_inputs; i++) {
475
        char name[32];
476
        AVFilterPad pad = { 0 };
477

478
        snprintf(name, sizeof(name), "input%d", i);
479
        pad.type           = AVMEDIA_TYPE_AUDIO;
480
        pad.name           = av_strdup(name);
481
        if (!pad.name)
482
            return AVERROR(ENOMEM);
483
        pad.filter_frame   = filter_frame;
484

485
        ff_insert_inpad(ctx, i, &pad);
486
    }
487

488
    s->fdsp = avpriv_float_dsp_alloc(0);
489
    if (!s->fdsp)
490
        return AVERROR(ENOMEM);
491

492
    return 0;
493
}
494

495
static av_cold void uninit(AVFilterContext *ctx)
496
{
497
    int i;
498
    MixContext *s = ctx->priv;
499

500
    if (s->fifos) {
501
        for (i = 0; i < s->nb_inputs; i++)
502
            av_audio_fifo_free(s->fifos[i]);
503
        av_freep(&s->fifos);
504
    }
505
    frame_list_clear(s->frame_list);
506
    av_freep(&s->frame_list);
507
    av_freep(&s->input_state);
508
    av_freep(&s->input_scale);
509
    av_freep(&s->fdsp);
510

511
    for (i = 0; i < ctx->nb_inputs; i++)
512
        av_freep(&ctx->input_pads[i].name);
513
}
514

515
static int query_formats(AVFilterContext *ctx)
516
{
517
    AVFilterFormats *formats = NULL;
518
    AVFilterChannelLayouts *layouts;
519
    int ret;
520

521
    layouts = ff_all_channel_layouts();
522
    if (!layouts) {
523
        ret = AVERROR(ENOMEM);
524
        goto fail;
525
    }
526

527
    if ((ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLT ))          < 0 ||
528
        (ret = ff_add_format(&formats, AV_SAMPLE_FMT_FLTP))          < 0 ||
529
        (ret = ff_set_common_formats        (ctx, formats))          < 0 ||
530
        (ret = ff_set_common_channel_layouts(ctx, layouts))          < 0 ||
531
        (ret = ff_set_common_samplerates(ctx, ff_all_samplerates())) < 0)
532
        goto fail;
533
    return 0;
534
fail:
535
    if (layouts)
536
        av_freep(&layouts->channel_layouts);
537
    av_freep(&layouts);
538
    return ret;
539
}
540

541
static const AVFilterPad avfilter_af_amix_outputs[] = {
542
    {
543
        .name          = "default",
544
        .type          = AVMEDIA_TYPE_AUDIO,
545
        .config_props  = config_output,
546
        .request_frame = request_frame
547
    },
548
    { NULL }
549
};
550

551
AVFilter ff_af_amix = {
552
    .name           = "amix",
553
    .description    = NULL_IF_CONFIG_SMALL("Audio mixing."),
554
    .priv_size      = sizeof(MixContext),
555
    .priv_class     = &amix_class,
556
    .init           = init,
557
    .uninit         = uninit,
558
    .query_formats  = query_formats,
559
    .inputs         = NULL,
560
    .outputs        = avfilter_af_amix_outputs,
561
    .flags          = AVFILTER_FLAG_DYNAMIC_INPUTS,
562
};
563

564
Product

Resources

Company