Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
52868 views
1
/*
2
* AAC decoder
3
* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4
* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5
* Copyright (c) 2008-2013 Alex Converse <[email protected]>
6
*
7
* AAC LATM decoder
8
* Copyright (c) 2008-2010 Paul Kendall <[email protected]>
9
* Copyright (c) 2010 Janne Grunau <[email protected]>
10
*
11
* This file is part of FFmpeg.
12
*
13
* FFmpeg is free software; you can redistribute it and/or
14
* modify it under the terms of the GNU Lesser General Public
15
* License as published by the Free Software Foundation; either
16
* version 2.1 of the License, or (at your option) any later version.
17
*
18
* FFmpeg is distributed in the hope that it will be useful,
19
* but WITHOUT ANY WARRANTY; without even the implied warranty of
20
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
* Lesser General Public License for more details.
22
*
23
* You should have received a copy of the GNU Lesser General Public
24
* License along with FFmpeg; if not, write to the Free Software
25
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26
*/
27
28
/**
29
* @file
30
* AAC decoder
31
* @author Oded Shimon ( ods15 ods15 dyndns org )
32
* @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33
*/
34
35
#define FFT_FLOAT 1
36
#define FFT_FIXED_32 0
37
#define USE_FIXED 0
38
39
#include "libavutil/float_dsp.h"
40
#include "libavutil/opt.h"
41
#include "avcodec.h"
42
#include "internal.h"
43
#include "get_bits.h"
44
#include "fft.h"
45
#include "imdct15.h"
46
#include "lpc.h"
47
#include "kbdwin.h"
48
#include "sinewin.h"
49
50
#include "aac.h"
51
#include "aactab.h"
52
#include "aacdectab.h"
53
#include "cbrt_tablegen.h"
54
#include "sbr.h"
55
#include "aacsbr.h"
56
#include "mpeg4audio.h"
57
#include "aacadtsdec.h"
58
#include "profiles.h"
59
#include "libavutil/intfloat.h"
60
61
#include <errno.h>
62
#include <math.h>
63
#include <stdint.h>
64
#include <string.h>
65
66
#if ARCH_ARM
67
# include "arm/aac.h"
68
#elif ARCH_MIPS
69
# include "mips/aacdec_mips.h"
70
#endif
71
72
static av_always_inline void reset_predict_state(PredictorState *ps)
73
{
74
ps->r0 = 0.0f;
75
ps->r1 = 0.0f;
76
ps->cor0 = 0.0f;
77
ps->cor1 = 0.0f;
78
ps->var0 = 1.0f;
79
ps->var1 = 1.0f;
80
}
81
82
#ifndef VMUL2
83
static inline float *VMUL2(float *dst, const float *v, unsigned idx,
84
const float *scale)
85
{
86
float s = *scale;
87
*dst++ = v[idx & 15] * s;
88
*dst++ = v[idx>>4 & 15] * s;
89
return dst;
90
}
91
#endif
92
93
#ifndef VMUL4
94
static inline float *VMUL4(float *dst, const float *v, unsigned idx,
95
const float *scale)
96
{
97
float s = *scale;
98
*dst++ = v[idx & 3] * s;
99
*dst++ = v[idx>>2 & 3] * s;
100
*dst++ = v[idx>>4 & 3] * s;
101
*dst++ = v[idx>>6 & 3] * s;
102
return dst;
103
}
104
#endif
105
106
#ifndef VMUL2S
107
static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
108
unsigned sign, const float *scale)
109
{
110
union av_intfloat32 s0, s1;
111
112
s0.f = s1.f = *scale;
113
s0.i ^= sign >> 1 << 31;
114
s1.i ^= sign << 31;
115
116
*dst++ = v[idx & 15] * s0.f;
117
*dst++ = v[idx>>4 & 15] * s1.f;
118
119
return dst;
120
}
121
#endif
122
123
#ifndef VMUL4S
124
static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
125
unsigned sign, const float *scale)
126
{
127
unsigned nz = idx >> 12;
128
union av_intfloat32 s = { .f = *scale };
129
union av_intfloat32 t;
130
131
t.i = s.i ^ (sign & 1U<<31);
132
*dst++ = v[idx & 3] * t.f;
133
134
sign <<= nz & 1; nz >>= 1;
135
t.i = s.i ^ (sign & 1U<<31);
136
*dst++ = v[idx>>2 & 3] * t.f;
137
138
sign <<= nz & 1; nz >>= 1;
139
t.i = s.i ^ (sign & 1U<<31);
140
*dst++ = v[idx>>4 & 3] * t.f;
141
142
sign <<= nz & 1;
143
t.i = s.i ^ (sign & 1U<<31);
144
*dst++ = v[idx>>6 & 3] * t.f;
145
146
return dst;
147
}
148
#endif
149
150
static av_always_inline float flt16_round(float pf)
151
{
152
union av_intfloat32 tmp;
153
tmp.f = pf;
154
tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
155
return tmp.f;
156
}
157
158
static av_always_inline float flt16_even(float pf)
159
{
160
union av_intfloat32 tmp;
161
tmp.f = pf;
162
tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
163
return tmp.f;
164
}
165
166
static av_always_inline float flt16_trunc(float pf)
167
{
168
union av_intfloat32 pun;
169
pun.f = pf;
170
pun.i &= 0xFFFF0000U;
171
return pun.f;
172
}
173
174
static av_always_inline void predict(PredictorState *ps, float *coef,
175
int output_enable)
176
{
177
const float a = 0.953125; // 61.0 / 64
178
const float alpha = 0.90625; // 29.0 / 32
179
float e0, e1;
180
float pv;
181
float k1, k2;
182
float r0 = ps->r0, r1 = ps->r1;
183
float cor0 = ps->cor0, cor1 = ps->cor1;
184
float var0 = ps->var0, var1 = ps->var1;
185
186
k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
187
k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
188
189
pv = flt16_round(k1 * r0 + k2 * r1);
190
if (output_enable)
191
*coef += pv;
192
193
e0 = *coef;
194
e1 = e0 - k1 * r0;
195
196
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
197
ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
198
ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
199
ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
200
201
ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
202
ps->r0 = flt16_trunc(a * e0);
203
}
204
205
/**
206
* Apply dependent channel coupling (applied before IMDCT).
207
*
208
* @param index index into coupling gain array
209
*/
210
static void apply_dependent_coupling(AACContext *ac,
211
SingleChannelElement *target,
212
ChannelElement *cce, int index)
213
{
214
IndividualChannelStream *ics = &cce->ch[0].ics;
215
const uint16_t *offsets = ics->swb_offset;
216
float *dest = target->coeffs;
217
const float *src = cce->ch[0].coeffs;
218
int g, i, group, k, idx = 0;
219
if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
220
av_log(ac->avctx, AV_LOG_ERROR,
221
"Dependent coupling is not supported together with LTP\n");
222
return;
223
}
224
for (g = 0; g < ics->num_window_groups; g++) {
225
for (i = 0; i < ics->max_sfb; i++, idx++) {
226
if (cce->ch[0].band_type[idx] != ZERO_BT) {
227
const float gain = cce->coup.gain[index][idx];
228
for (group = 0; group < ics->group_len[g]; group++) {
229
for (k = offsets[i]; k < offsets[i + 1]; k++) {
230
// FIXME: SIMDify
231
dest[group * 128 + k] += gain * src[group * 128 + k];
232
}
233
}
234
}
235
}
236
dest += ics->group_len[g] * 128;
237
src += ics->group_len[g] * 128;
238
}
239
}
240
241
/**
242
* Apply independent channel coupling (applied after IMDCT).
243
*
244
* @param index index into coupling gain array
245
*/
246
static void apply_independent_coupling(AACContext *ac,
247
SingleChannelElement *target,
248
ChannelElement *cce, int index)
249
{
250
int i;
251
const float gain = cce->coup.gain[index][0];
252
const float *src = cce->ch[0].ret;
253
float *dest = target->ret;
254
const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
255
256
for (i = 0; i < len; i++)
257
dest[i] += gain * src[i];
258
}
259
260
#include "aacdec_template.c"
261
262
#define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
263
264
struct LATMContext {
265
AACContext aac_ctx; ///< containing AACContext
266
int initialized; ///< initialized after a valid extradata was seen
267
268
// parser data
269
int audio_mux_version_A; ///< LATM syntax version
270
int frame_length_type; ///< 0/1 variable/fixed frame length
271
int frame_length; ///< frame length for fixed frame length
272
};
273
274
static inline uint32_t latm_get_value(GetBitContext *b)
275
{
276
int length = get_bits(b, 2);
277
278
return get_bits_long(b, (length+1)*8);
279
}
280
281
static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
282
GetBitContext *gb, int asclen)
283
{
284
AACContext *ac = &latmctx->aac_ctx;
285
AVCodecContext *avctx = ac->avctx;
286
MPEG4AudioConfig m4ac = { 0 };
287
int config_start_bit = get_bits_count(gb);
288
int sync_extension = 0;
289
int bits_consumed, esize;
290
291
if (asclen) {
292
sync_extension = 1;
293
asclen = FFMIN(asclen, get_bits_left(gb));
294
} else
295
asclen = get_bits_left(gb);
296
297
if (config_start_bit % 8) {
298
avpriv_request_sample(latmctx->aac_ctx.avctx,
299
"Non-byte-aligned audio-specific config");
300
return AVERROR_PATCHWELCOME;
301
}
302
if (asclen <= 0)
303
return AVERROR_INVALIDDATA;
304
bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
305
gb->buffer + (config_start_bit / 8),
306
asclen, sync_extension);
307
308
if (bits_consumed < 0)
309
return AVERROR_INVALIDDATA;
310
311
if (!latmctx->initialized ||
312
ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
313
ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
314
315
if(latmctx->initialized) {
316
av_log(avctx, AV_LOG_INFO, "audio config changed\n");
317
} else {
318
av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
319
}
320
latmctx->initialized = 0;
321
322
esize = (bits_consumed+7) / 8;
323
324
if (avctx->extradata_size < esize) {
325
av_free(avctx->extradata);
326
avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
327
if (!avctx->extradata)
328
return AVERROR(ENOMEM);
329
}
330
331
avctx->extradata_size = esize;
332
memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
333
memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
334
}
335
skip_bits_long(gb, bits_consumed);
336
337
return bits_consumed;
338
}
339
340
static int read_stream_mux_config(struct LATMContext *latmctx,
341
GetBitContext *gb)
342
{
343
int ret, audio_mux_version = get_bits(gb, 1);
344
345
latmctx->audio_mux_version_A = 0;
346
if (audio_mux_version)
347
latmctx->audio_mux_version_A = get_bits(gb, 1);
348
349
if (!latmctx->audio_mux_version_A) {
350
351
if (audio_mux_version)
352
latm_get_value(gb); // taraFullness
353
354
skip_bits(gb, 1); // allStreamSameTimeFraming
355
skip_bits(gb, 6); // numSubFrames
356
// numPrograms
357
if (get_bits(gb, 4)) { // numPrograms
358
avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
359
return AVERROR_PATCHWELCOME;
360
}
361
362
// for each program (which there is only one in DVB)
363
364
// for each layer (which there is only one in DVB)
365
if (get_bits(gb, 3)) { // numLayer
366
avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
367
return AVERROR_PATCHWELCOME;
368
}
369
370
// for all but first stream: use_same_config = get_bits(gb, 1);
371
if (!audio_mux_version) {
372
if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
373
return ret;
374
} else {
375
int ascLen = latm_get_value(gb);
376
if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
377
return ret;
378
ascLen -= ret;
379
skip_bits_long(gb, ascLen);
380
}
381
382
latmctx->frame_length_type = get_bits(gb, 3);
383
switch (latmctx->frame_length_type) {
384
case 0:
385
skip_bits(gb, 8); // latmBufferFullness
386
break;
387
case 1:
388
latmctx->frame_length = get_bits(gb, 9);
389
break;
390
case 3:
391
case 4:
392
case 5:
393
skip_bits(gb, 6); // CELP frame length table index
394
break;
395
case 6:
396
case 7:
397
skip_bits(gb, 1); // HVXC frame length table index
398
break;
399
}
400
401
if (get_bits(gb, 1)) { // other data
402
if (audio_mux_version) {
403
latm_get_value(gb); // other_data_bits
404
} else {
405
int esc;
406
do {
407
esc = get_bits(gb, 1);
408
skip_bits(gb, 8);
409
} while (esc);
410
}
411
}
412
413
if (get_bits(gb, 1)) // crc present
414
skip_bits(gb, 8); // config_crc
415
}
416
417
return 0;
418
}
419
420
static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
421
{
422
uint8_t tmp;
423
424
if (ctx->frame_length_type == 0) {
425
int mux_slot_length = 0;
426
do {
427
tmp = get_bits(gb, 8);
428
mux_slot_length += tmp;
429
} while (tmp == 255);
430
return mux_slot_length;
431
} else if (ctx->frame_length_type == 1) {
432
return ctx->frame_length;
433
} else if (ctx->frame_length_type == 3 ||
434
ctx->frame_length_type == 5 ||
435
ctx->frame_length_type == 7) {
436
skip_bits(gb, 2); // mux_slot_length_coded
437
}
438
return 0;
439
}
440
441
static int read_audio_mux_element(struct LATMContext *latmctx,
442
GetBitContext *gb)
443
{
444
int err;
445
uint8_t use_same_mux = get_bits(gb, 1);
446
if (!use_same_mux) {
447
if ((err = read_stream_mux_config(latmctx, gb)) < 0)
448
return err;
449
} else if (!latmctx->aac_ctx.avctx->extradata) {
450
av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
451
"no decoder config found\n");
452
return AVERROR(EAGAIN);
453
}
454
if (latmctx->audio_mux_version_A == 0) {
455
int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
456
if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
457
av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
458
return AVERROR_INVALIDDATA;
459
} else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
460
av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
461
"frame length mismatch %d << %d\n",
462
mux_slot_length_bytes * 8, get_bits_left(gb));
463
return AVERROR_INVALIDDATA;
464
}
465
}
466
return 0;
467
}
468
469
470
static int latm_decode_frame(AVCodecContext *avctx, void *out,
471
int *got_frame_ptr, AVPacket *avpkt)
472
{
473
struct LATMContext *latmctx = avctx->priv_data;
474
int muxlength, err;
475
GetBitContext gb;
476
477
if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
478
return err;
479
480
// check for LOAS sync word
481
if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
482
return AVERROR_INVALIDDATA;
483
484
muxlength = get_bits(&gb, 13) + 3;
485
// not enough data, the parser should have sorted this out
486
if (muxlength > avpkt->size)
487
return AVERROR_INVALIDDATA;
488
489
if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
490
return err;
491
492
if (!latmctx->initialized) {
493
if (!avctx->extradata) {
494
*got_frame_ptr = 0;
495
return avpkt->size;
496
} else {
497
push_output_configuration(&latmctx->aac_ctx);
498
if ((err = decode_audio_specific_config(
499
&latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
500
avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
501
pop_output_configuration(&latmctx->aac_ctx);
502
return err;
503
}
504
latmctx->initialized = 1;
505
}
506
}
507
508
if (show_bits(&gb, 12) == 0xfff) {
509
av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
510
"ADTS header detected, probably as result of configuration "
511
"misparsing\n");
512
return AVERROR_INVALIDDATA;
513
}
514
515
switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
516
case AOT_ER_AAC_LC:
517
case AOT_ER_AAC_LTP:
518
case AOT_ER_AAC_LD:
519
case AOT_ER_AAC_ELD:
520
err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
521
break;
522
default:
523
err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
524
}
525
if (err < 0)
526
return err;
527
528
return muxlength;
529
}
530
531
static av_cold int latm_decode_init(AVCodecContext *avctx)
532
{
533
struct LATMContext *latmctx = avctx->priv_data;
534
int ret = aac_decode_init(avctx);
535
536
if (avctx->extradata_size > 0)
537
latmctx->initialized = !ret;
538
539
return ret;
540
}
541
542
AVCodec ff_aac_decoder = {
543
.name = "aac",
544
.long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
545
.type = AVMEDIA_TYPE_AUDIO,
546
.id = AV_CODEC_ID_AAC,
547
.priv_data_size = sizeof(AACContext),
548
.init = aac_decode_init,
549
.close = aac_decode_close,
550
.decode = aac_decode_frame,
551
.sample_fmts = (const enum AVSampleFormat[]) {
552
AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
553
},
554
.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
555
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
556
.channel_layouts = aac_channel_layout,
557
.flush = flush,
558
.priv_class = &aac_decoder_class,
559
.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
560
};
561
562
/*
563
Note: This decoder filter is intended to decode LATM streams transferred
564
in MPEG transport streams which only contain one program.
565
To do a more complex LATM demuxing a separate LATM demuxer should be used.
566
*/
567
AVCodec ff_aac_latm_decoder = {
568
.name = "aac_latm",
569
.long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
570
.type = AVMEDIA_TYPE_AUDIO,
571
.id = AV_CODEC_ID_AAC_LATM,
572
.priv_data_size = sizeof(struct LATMContext),
573
.init = latm_decode_init,
574
.close = aac_decode_close,
575
.decode = latm_decode_frame,
576
.sample_fmts = (const enum AVSampleFormat[]) {
577
AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
578
},
579
.capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
580
.caps_internal = FF_CODEC_CAP_INIT_THREADSAFE,
581
.channel_layouts = aac_channel_layout,
582
.flush = flush,
583
.profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
584
};
585
586