CoCalc -- af_silencedetect.c

05. Matplotlib / ffmpeg-3.0 / libavfilter / af_silencedetect.c
⁵²⁸⁶⁸ views
1
/*
2
 * Copyright (c) 2012 Clément Bœsch <u pkh me>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

21
/**
22
 * @file
23
 * Audio silence detector
24
 */
25

26
#include <float.h> /* DBL_MAX */
27

28
#include "libavutil/opt.h"
29
#include "libavutil/timestamp.h"
30
#include "audio.h"
31
#include "formats.h"
32
#include "avfilter.h"
33
#include "internal.h"
34

35
typedef struct SilenceDetectContext {
36
    const AVClass *class;
37
    double noise;               ///< noise amplitude ratio
38
    double duration;            ///< minimum duration of silence until notification
39
    int64_t nb_null_samples;    ///< current number of continuous zero samples
40
    int64_t start;              ///< if silence is detected, this value contains the time of the first zero sample
41
    int last_sample_rate;       ///< last sample rate to check for sample rate changes
42

43
    void (*silencedetect)(struct SilenceDetectContext *s, AVFrame *insamples,
44
                          int nb_samples, int64_t nb_samples_notify,
45
                          AVRational time_base);
46
} SilenceDetectContext;
47

48
#define OFFSET(x) offsetof(SilenceDetectContext, x)
49
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
50
static const AVOption silencedetect_options[] = {
51
    { "n",         "set noise tolerance",              OFFSET(noise),     AV_OPT_TYPE_DOUBLE, {.dbl=0.001},          0, DBL_MAX,  FLAGS },
52
    { "noise",     "set noise tolerance",              OFFSET(noise),     AV_OPT_TYPE_DOUBLE, {.dbl=0.001},          0, DBL_MAX,  FLAGS },
53
    { "d",         "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_DOUBLE, {.dbl=2.},             0, 24*60*60, FLAGS },
54
    { "duration",  "set minimum duration in seconds",  OFFSET(duration),  AV_OPT_TYPE_DOUBLE, {.dbl=2.},             0, 24*60*60, FLAGS },
55
    { NULL }
56
};
57

58
AVFILTER_DEFINE_CLASS(silencedetect);
59

60
static char *get_metadata_val(AVFrame *insamples, const char *key)
61
{
62
    AVDictionaryEntry *e = av_dict_get(insamples->metadata, key, NULL, 0);
63
    return e && e->value ? e->value : NULL;
64
}
65

66
static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples,
67
                                    int is_silence, int64_t nb_samples_notify,
68
                                    AVRational time_base)
69
{
70
    if (is_silence) {
71
        if (!s->start) {
72
            s->nb_null_samples++;
73
            if (s->nb_null_samples >= nb_samples_notify) {
74
                s->start = insamples->pts - (int64_t)(s->duration / av_q2d(time_base) + .5);
75
                av_dict_set(&insamples->metadata, "lavfi.silence_start",
76
                            av_ts2timestr(s->start, &time_base), 0);
77
                av_log(s, AV_LOG_INFO, "silence_start: %s\n",
78
                       get_metadata_val(insamples, "lavfi.silence_start"));
79
            }
80
        }
81
    } else {
82
        if (s->start) {
83
            av_dict_set(&insamples->metadata, "lavfi.silence_end",
84
                        av_ts2timestr(insamples->pts, &time_base), 0);
85
            av_dict_set(&insamples->metadata, "lavfi.silence_duration",
86
                        av_ts2timestr(insamples->pts - s->start, &time_base), 0);
87
            av_log(s, AV_LOG_INFO,
88
                   "silence_end: %s | silence_duration: %s\n",
89
                   get_metadata_val(insamples, "lavfi.silence_end"),
90
                   get_metadata_val(insamples, "lavfi.silence_duration"));
91
        }
92
        s->nb_null_samples = s->start = 0;
93
    }
94
}
95

96
#define SILENCE_DETECT(name, type)                                               \
97
static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples,    \
98
                                 int nb_samples, int64_t nb_samples_notify,      \
99
                                 AVRational time_base)                           \
100
{                                                                                \
101
    const type *p = (const type *)insamples->data[0];                            \
102
    const type noise = s->noise;                                                 \
103
    int i;                                                                       \
104
                                                                                 \
105
    for (i = 0; i < nb_samples; i++, p++)                                        \
106
        update(s, insamples, *p < noise && *p > -noise,                          \
107
               nb_samples_notify, time_base);                                    \
108
}
109

110
SILENCE_DETECT(dbl, double)
111
SILENCE_DETECT(flt, float)
112
SILENCE_DETECT(s32, int32_t)
113
SILENCE_DETECT(s16, int16_t)
114

115
static int config_input(AVFilterLink *inlink)
116
{
117
    AVFilterContext *ctx = inlink->dst;
118
    SilenceDetectContext *s = ctx->priv;
119

120
    switch (inlink->format) {
121
    case AV_SAMPLE_FMT_DBL: s->silencedetect = silencedetect_dbl; break;
122
    case AV_SAMPLE_FMT_FLT: s->silencedetect = silencedetect_flt; break;
123
    case AV_SAMPLE_FMT_S32:
124
        s->noise *= INT32_MAX;
125
        s->silencedetect = silencedetect_s32;
126
        break;
127
    case AV_SAMPLE_FMT_S16:
128
        s->noise *= INT16_MAX;
129
        s->silencedetect = silencedetect_s16;
130
        break;
131
    }
132

133
    return 0;
134
}
135

136
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
137
{
138
    SilenceDetectContext *s         = inlink->dst->priv;
139
    const int nb_channels           = inlink->channels;
140
    const int srate                 = inlink->sample_rate;
141
    const int nb_samples            = insamples->nb_samples     * nb_channels;
142
    const int64_t nb_samples_notify = srate * s->duration * nb_channels;
143

144
    // scale number of null samples to the new sample rate
145
    if (s->last_sample_rate && s->last_sample_rate != srate)
146
        s->nb_null_samples = srate * s->nb_null_samples / s->last_sample_rate;
147
    s->last_sample_rate = srate;
148

149
    // TODO: document metadata
150
    s->silencedetect(s, insamples, nb_samples, nb_samples_notify,
151
                     inlink->time_base);
152

153
    return ff_filter_frame(inlink->dst->outputs[0], insamples);
154
}
155

156
static int query_formats(AVFilterContext *ctx)
157
{
158
    AVFilterFormats *formats = NULL;
159
    AVFilterChannelLayouts *layouts = NULL;
160
    static const enum AVSampleFormat sample_fmts[] = {
161
        AV_SAMPLE_FMT_DBL,
162
        AV_SAMPLE_FMT_FLT,
163
        AV_SAMPLE_FMT_S32,
164
        AV_SAMPLE_FMT_S16,
165
        AV_SAMPLE_FMT_NONE
166
    };
167
    int ret;
168

169
    layouts = ff_all_channel_layouts();
170
    if (!layouts)
171
        return AVERROR(ENOMEM);
172
    ret = ff_set_common_channel_layouts(ctx, layouts);
173
    if (ret < 0)
174
        return ret;
175

176
    formats = ff_make_format_list(sample_fmts);
177
    if (!formats)
178
        return AVERROR(ENOMEM);
179
    ret = ff_set_common_formats(ctx, formats);
180
    if (ret < 0)
181
        return ret;
182

183
    formats = ff_all_samplerates();
184
    if (!formats)
185
        return AVERROR(ENOMEM);
186
    return ff_set_common_samplerates(ctx, formats);
187
}
188

189
static const AVFilterPad silencedetect_inputs[] = {
190
    {
191
        .name         = "default",
192
        .type         = AVMEDIA_TYPE_AUDIO,
193
        .config_props = config_input,
194
        .filter_frame = filter_frame,
195
    },
196
    { NULL }
197
};
198

199
static const AVFilterPad silencedetect_outputs[] = {
200
    {
201
        .name = "default",
202
        .type = AVMEDIA_TYPE_AUDIO,
203
    },
204
    { NULL }
205
};
206

207
AVFilter ff_af_silencedetect = {
208
    .name          = "silencedetect",
209
    .description   = NULL_IF_CONFIG_SMALL("Detect silence."),
210
    .priv_size     = sizeof(SilenceDetectContext),
211
    .query_formats = query_formats,
212
    .inputs        = silencedetect_inputs,
213
    .outputs       = silencedetect_outputs,
214
    .priv_class    = &silencedetect_class,
215
};
216

217
Product

Resources

Company