Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
/*
2
* Copyright (c) 2012 Clément Bœsch <u pkh me>
3
*
4
* This file is part of FFmpeg.
5
*
6
* FFmpeg is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2.1 of the License, or (at your option) any later version.
10
*
11
* FFmpeg is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
15
*
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with FFmpeg; if not, write to the Free Software
18
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
*/
20
21
/**
22
* @file
23
* Audio silence detector
24
*/
25
26
#include <float.h> /* DBL_MAX */
27
28
#include "libavutil/opt.h"
29
#include "libavutil/timestamp.h"
30
#include "audio.h"
31
#include "formats.h"
32
#include "avfilter.h"
33
#include "internal.h"
34
35
typedef struct SilenceDetectContext {
36
const AVClass *class;
37
double noise; ///< noise amplitude ratio
38
double duration; ///< minimum duration of silence until notification
39
int64_t nb_null_samples; ///< current number of continuous zero samples
40
int64_t start; ///< if silence is detected, this value contains the time of the first zero sample
41
int last_sample_rate; ///< last sample rate to check for sample rate changes
42
43
void (*silencedetect)(struct SilenceDetectContext *s, AVFrame *insamples,
44
int nb_samples, int64_t nb_samples_notify,
45
AVRational time_base);
46
} SilenceDetectContext;
47
48
#define OFFSET(x) offsetof(SilenceDetectContext, x)
49
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
50
static const AVOption silencedetect_options[] = {
51
{ "n", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS },
52
{ "noise", "set noise tolerance", OFFSET(noise), AV_OPT_TYPE_DOUBLE, {.dbl=0.001}, 0, DBL_MAX, FLAGS },
53
{ "d", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DOUBLE, {.dbl=2.}, 0, 24*60*60, FLAGS },
54
{ "duration", "set minimum duration in seconds", OFFSET(duration), AV_OPT_TYPE_DOUBLE, {.dbl=2.}, 0, 24*60*60, FLAGS },
55
{ NULL }
56
};
57
58
AVFILTER_DEFINE_CLASS(silencedetect);
59
60
static char *get_metadata_val(AVFrame *insamples, const char *key)
61
{
62
AVDictionaryEntry *e = av_dict_get(insamples->metadata, key, NULL, 0);
63
return e && e->value ? e->value : NULL;
64
}
65
66
static av_always_inline void update(SilenceDetectContext *s, AVFrame *insamples,
67
int is_silence, int64_t nb_samples_notify,
68
AVRational time_base)
69
{
70
if (is_silence) {
71
if (!s->start) {
72
s->nb_null_samples++;
73
if (s->nb_null_samples >= nb_samples_notify) {
74
s->start = insamples->pts - (int64_t)(s->duration / av_q2d(time_base) + .5);
75
av_dict_set(&insamples->metadata, "lavfi.silence_start",
76
av_ts2timestr(s->start, &time_base), 0);
77
av_log(s, AV_LOG_INFO, "silence_start: %s\n",
78
get_metadata_val(insamples, "lavfi.silence_start"));
79
}
80
}
81
} else {
82
if (s->start) {
83
av_dict_set(&insamples->metadata, "lavfi.silence_end",
84
av_ts2timestr(insamples->pts, &time_base), 0);
85
av_dict_set(&insamples->metadata, "lavfi.silence_duration",
86
av_ts2timestr(insamples->pts - s->start, &time_base), 0);
87
av_log(s, AV_LOG_INFO,
88
"silence_end: %s | silence_duration: %s\n",
89
get_metadata_val(insamples, "lavfi.silence_end"),
90
get_metadata_val(insamples, "lavfi.silence_duration"));
91
}
92
s->nb_null_samples = s->start = 0;
93
}
94
}
95
96
#define SILENCE_DETECT(name, type) \
97
static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \
98
int nb_samples, int64_t nb_samples_notify, \
99
AVRational time_base) \
100
{ \
101
const type *p = (const type *)insamples->data[0]; \
102
const type noise = s->noise; \
103
int i; \
104
\
105
for (i = 0; i < nb_samples; i++, p++) \
106
update(s, insamples, *p < noise && *p > -noise, \
107
nb_samples_notify, time_base); \
108
}
109
110
SILENCE_DETECT(dbl, double)
111
SILENCE_DETECT(flt, float)
112
SILENCE_DETECT(s32, int32_t)
113
SILENCE_DETECT(s16, int16_t)
114
115
static int config_input(AVFilterLink *inlink)
116
{
117
AVFilterContext *ctx = inlink->dst;
118
SilenceDetectContext *s = ctx->priv;
119
120
switch (inlink->format) {
121
case AV_SAMPLE_FMT_DBL: s->silencedetect = silencedetect_dbl; break;
122
case AV_SAMPLE_FMT_FLT: s->silencedetect = silencedetect_flt; break;
123
case AV_SAMPLE_FMT_S32:
124
s->noise *= INT32_MAX;
125
s->silencedetect = silencedetect_s32;
126
break;
127
case AV_SAMPLE_FMT_S16:
128
s->noise *= INT16_MAX;
129
s->silencedetect = silencedetect_s16;
130
break;
131
}
132
133
return 0;
134
}
135
136
static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
137
{
138
SilenceDetectContext *s = inlink->dst->priv;
139
const int nb_channels = inlink->channels;
140
const int srate = inlink->sample_rate;
141
const int nb_samples = insamples->nb_samples * nb_channels;
142
const int64_t nb_samples_notify = srate * s->duration * nb_channels;
143
144
// scale number of null samples to the new sample rate
145
if (s->last_sample_rate && s->last_sample_rate != srate)
146
s->nb_null_samples = srate * s->nb_null_samples / s->last_sample_rate;
147
s->last_sample_rate = srate;
148
149
// TODO: document metadata
150
s->silencedetect(s, insamples, nb_samples, nb_samples_notify,
151
inlink->time_base);
152
153
return ff_filter_frame(inlink->dst->outputs[0], insamples);
154
}
155
156
static int query_formats(AVFilterContext *ctx)
157
{
158
AVFilterFormats *formats = NULL;
159
AVFilterChannelLayouts *layouts = NULL;
160
static const enum AVSampleFormat sample_fmts[] = {
161
AV_SAMPLE_FMT_DBL,
162
AV_SAMPLE_FMT_FLT,
163
AV_SAMPLE_FMT_S32,
164
AV_SAMPLE_FMT_S16,
165
AV_SAMPLE_FMT_NONE
166
};
167
int ret;
168
169
layouts = ff_all_channel_layouts();
170
if (!layouts)
171
return AVERROR(ENOMEM);
172
ret = ff_set_common_channel_layouts(ctx, layouts);
173
if (ret < 0)
174
return ret;
175
176
formats = ff_make_format_list(sample_fmts);
177
if (!formats)
178
return AVERROR(ENOMEM);
179
ret = ff_set_common_formats(ctx, formats);
180
if (ret < 0)
181
return ret;
182
183
formats = ff_all_samplerates();
184
if (!formats)
185
return AVERROR(ENOMEM);
186
return ff_set_common_samplerates(ctx, formats);
187
}
188
189
static const AVFilterPad silencedetect_inputs[] = {
190
{
191
.name = "default",
192
.type = AVMEDIA_TYPE_AUDIO,
193
.config_props = config_input,
194
.filter_frame = filter_frame,
195
},
196
{ NULL }
197
};
198
199
static const AVFilterPad silencedetect_outputs[] = {
200
{
201
.name = "default",
202
.type = AVMEDIA_TYPE_AUDIO,
203
},
204
{ NULL }
205
};
206
207
AVFilter ff_af_silencedetect = {
208
.name = "silencedetect",
209
.description = NULL_IF_CONFIG_SMALL("Detect silence."),
210
.priv_size = sizeof(SilenceDetectContext),
211
.query_formats = query_formats,
212
.inputs = silencedetect_inputs,
213
.outputs = silencedetect_outputs,
214
.priv_class = &silencedetect_class,
215
};
216
217