/*1* AAC definitions and structures2* Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )3* Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )4*5* This file is part of FFmpeg.6*7* FFmpeg is free software; you can redistribute it and/or8* modify it under the terms of the GNU Lesser General Public9* License as published by the Free Software Foundation; either10* version 2.1 of the License, or (at your option) any later version.11*12* FFmpeg is distributed in the hope that it will be useful,13* but WITHOUT ANY WARRANTY; without even the implied warranty of14* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU15* Lesser General Public License for more details.16*17* You should have received a copy of the GNU Lesser General Public18* License along with FFmpeg; if not, write to the Free Software19* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA20*/2122/**23* @file24* AAC definitions and structures25* @author Oded Shimon ( ods15 ods15 dyndns org )26* @author Maxim Gavrilov ( maxim.gavrilov gmail com )27*/2829#ifndef AVCODEC_AAC_H30#define AVCODEC_AAC_H313233#include "aac_defines.h"34#include "libavutil/float_dsp.h"35#include "libavutil/fixed_dsp.h"36#include "avcodec.h"37#if !USE_FIXED38#include "imdct15.h"39#endif40#include "fft.h"41#include "mpeg4audio.h"42#include "sbr.h"4344#include <stdint.h>4546#define MAX_CHANNELS 6447#define MAX_ELEM_ID 164849#define TNS_MAX_ORDER 2050#define MAX_LTP_LONG_SFB 405152#define CLIP_AVOIDANCE_FACTOR 0.95f5354enum RawDataBlockType {55TYPE_SCE,56TYPE_CPE,57TYPE_CCE,58TYPE_LFE,59TYPE_DSE,60TYPE_PCE,61TYPE_FIL,62TYPE_END,63};6465enum ExtensionPayloadID {66EXT_FILL,67EXT_FILL_DATA,68EXT_DATA_ELEMENT,69EXT_DYNAMIC_RANGE = 0xb,70EXT_SBR_DATA = 0xd,71EXT_SBR_DATA_CRC = 0xe,72};7374enum WindowSequence {75ONLY_LONG_SEQUENCE,76LONG_START_SEQUENCE,77EIGHT_SHORT_SEQUENCE,78LONG_STOP_SEQUENCE,79};8081enum BandType {82ZERO_BT = 0, ///< Scalefactors and spectral data are all zero.83FIRST_PAIR_BT = 5, ///< This and later band types encode two values (rather than four) with one code word.84ESC_BT = 11, ///< Spectral data are coded with an escape sequence.85RESERVED_BT = 12, ///< Band types following are encoded differently from others.86NOISE_BT = 13, ///< Spectral data are scaled white noise not coded in the bitstream.87INTENSITY_BT2 = 14, ///< Scalefactor data are intensity stereo positions (out of phase).88INTENSITY_BT = 15, ///< Scalefactor data are intensity stereo positions (in phase).89};9091#define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10)9293enum ChannelPosition {94AAC_CHANNEL_OFF = 0,95AAC_CHANNEL_FRONT = 1,96AAC_CHANNEL_SIDE = 2,97AAC_CHANNEL_BACK = 3,98AAC_CHANNEL_LFE = 4,99AAC_CHANNEL_CC = 5,100};101102/**103* The point during decoding at which channel coupling is applied.104*/105enum CouplingPoint {106BEFORE_TNS,107BETWEEN_TNS_AND_IMDCT,108AFTER_IMDCT = 3,109};110111/**112* Output configuration status113*/114enum OCStatus {115OC_NONE, ///< Output unconfigured116OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE117OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header118OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked119OC_LOCKED, ///< Output configuration locked in place120};121122typedef struct OutputConfiguration {123MPEG4AudioConfig m4ac;124uint8_t layout_map[MAX_ELEM_ID*4][3];125int layout_map_tags;126int channels;127uint64_t channel_layout;128enum OCStatus status;129} OutputConfiguration;130131/**132* Predictor State133*/134typedef struct PredictorState {135AAC_FLOAT cor0;136AAC_FLOAT cor1;137AAC_FLOAT var0;138AAC_FLOAT var1;139AAC_FLOAT r0;140AAC_FLOAT r1;141AAC_FLOAT k1;142AAC_FLOAT x_est;143} PredictorState;144145#define MAX_PREDICTORS 672146147#define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times148#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0149#define SCALE_MAX_POS 255 ///< scalefactor index maximum value150#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard151#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference152153#define POW_SF2_ZERO 200 ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0);154155#define NOISE_PRE 256 ///< preamble for NOISE_BT, put in bitstream with the first noise band156#define NOISE_PRE_BITS 9 ///< length of preamble157#define NOISE_OFFSET 90 ///< subtracted from global gain, used as offset for the preamble158159/**160* Long Term Prediction161*/162typedef struct LongTermPrediction {163int8_t present;164int16_t lag;165int coef_idx;166INTFLOAT coef;167int8_t used[MAX_LTP_LONG_SFB];168} LongTermPrediction;169170/**171* Individual Channel Stream172*/173typedef struct IndividualChannelStream {174uint8_t max_sfb; ///< number of scalefactor bands per group175enum WindowSequence window_sequence[2];176uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window.177int num_window_groups;178uint8_t group_len[8];179LongTermPrediction ltp;180const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window181const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window182int num_swb; ///< number of scalefactor window bands183int num_windows;184int tns_max_bands;185int predictor_present;186int predictor_initialized;187int predictor_reset_group;188int predictor_reset_count[31]; ///< used by encoder to count prediction resets189uint8_t prediction_used[41];190uint8_t window_clipping[8]; ///< set if a certain window is near clipping191float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it192} IndividualChannelStream;193194/**195* Temporal Noise Shaping196*/197typedef struct TemporalNoiseShaping {198int present;199int n_filt[8];200int length[8][4];201int direction[8][4];202int order[8][4];203int coef_idx[8][4][TNS_MAX_ORDER];204INTFLOAT coef[8][4][TNS_MAX_ORDER];205} TemporalNoiseShaping;206207/**208* Dynamic Range Control - decoded from the bitstream but not processed further.209*/210typedef struct DynamicRangeControl {211int pce_instance_tag; ///< Indicates with which program the DRC info is associated.212int dyn_rng_sgn[17]; ///< DRC sign information; 0 - positive, 1 - negative213int dyn_rng_ctl[17]; ///< DRC magnitude information214int exclude_mask[MAX_CHANNELS]; ///< Channels to be excluded from DRC processing.215int band_incr; ///< Number of DRC bands greater than 1 having DRC info.216int interpolation_scheme; ///< Indicates the interpolation scheme used in the SBR QMF domain.217int band_top[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.218int prog_ref_level; /**< A reference level for the long-term program audio level for all219* channels combined.220*/221} DynamicRangeControl;222223typedef struct Pulse {224int num_pulse;225int start;226int pos[4];227int amp[4];228} Pulse;229230/**231* coupling parameters232*/233typedef struct ChannelCoupling {234enum CouplingPoint coupling_point; ///< The point during decoding at which coupling is applied.235int num_coupled; ///< number of target elements236enum RawDataBlockType type[8]; ///< Type of channel element to be coupled - SCE or CPE.237int id_select[8]; ///< element id238int ch_select[8]; /**< [0] shared list of gains; [1] list of gains for right channel;239* [2] list of gains for left channel; [3] lists of gains for both channels240*/241INTFLOAT gain[16][120];242} ChannelCoupling;243244/**245* Single Channel Element - used for both SCE and LFE elements.246*/247typedef struct SingleChannelElement {248IndividualChannelStream ics;249TemporalNoiseShaping tns;250Pulse pulse;251enum BandType band_type[128]; ///< band types252enum BandType band_alt[128]; ///< alternative band type (used by encoder)253int band_type_run_end[120]; ///< band type run end points254INTFLOAT sf[120]; ///< scalefactors255int sf_idx[128]; ///< scalefactor indices (used by encoder)256uint8_t zeroes[128]; ///< band is not coded (used by encoder)257uint8_t can_pns[128]; ///< band is allowed to PNS (informative)258float is_ener[128]; ///< Intensity stereo pos (used by encoder)259float pns_ener[128]; ///< Noise energy values (used by encoder)260DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine261DECLARE_ALIGNED(32, INTFLOAT, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed262DECLARE_ALIGNED(32, INTFLOAT, saved)[1536]; ///< overlap263DECLARE_ALIGNED(32, INTFLOAT, ret_buf)[2048]; ///< PCM output buffer264DECLARE_ALIGNED(16, INTFLOAT, ltp_state)[3072]; ///< time signal for LTP265DECLARE_ALIGNED(32, AAC_FLOAT, lcoeffs)[1024]; ///< MDCT of LTP coefficients (used by encoder)266DECLARE_ALIGNED(32, AAC_FLOAT, prcoeffs)[1024]; ///< Main prediction coefs (used by encoder)267PredictorState predictor_state[MAX_PREDICTORS];268INTFLOAT *ret; ///< PCM output269} SingleChannelElement;270271/**272* channel element - generic struct for SCE/CPE/CCE/LFE273*/274typedef struct ChannelElement {275int present;276// CPE specific277int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.278int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder)279uint8_t is_mode; ///< Set if any bands have been encoded using intensity stereo (used by encoder)280uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band281uint8_t is_mask[128]; ///< Set if intensity stereo is used (used by encoder)282// shared283SingleChannelElement ch[2];284// CCE specific285ChannelCoupling coup;286SpectralBandReplication sbr;287} ChannelElement;288289/**290* main AAC context291*/292struct AACContext {293AVClass *class;294AVCodecContext *avctx;295AVFrame *frame;296297int is_saved; ///< Set if elements have stored overlap from previous frame.298DynamicRangeControl che_drc;299300/**301* @name Channel element related data302* @{303*/304ChannelElement *che[4][MAX_ELEM_ID];305ChannelElement *tag_che_map[4][MAX_ELEM_ID];306int tags_mapped;307int warned_remapping_once;308/** @} */309310/**311* @name temporary aligned temporary buffers312* (We do not want to have these on the stack.)313* @{314*/315DECLARE_ALIGNED(32, INTFLOAT, buf_mdct)[1024];316/** @} */317318/**319* @name Computed / set up during initialization320* @{321*/322FFTContext mdct;323FFTContext mdct_small;324FFTContext mdct_ld;325FFTContext mdct_ltp;326#if USE_FIXED327AVFixedDSPContext *fdsp;328#else329IMDCT15Context *mdct480;330AVFloatDSPContext *fdsp;331#endif /* USE_FIXED */332int random_state;333/** @} */334335/**336* @name Members used for output337* @{338*/339SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement340/** @} */341342343/**344* @name Japanese DTV specific extension345* @{346*/347int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel348int dmono_mode; ///< 0->not dmono, 1->use first channel, 2->use second channel349/** @} */350351DECLARE_ALIGNED(32, INTFLOAT, temp)[128];352353OutputConfiguration oc[2];354int warned_num_aac_frames;355356/* aacdec functions pointers */357void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);358void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);359void (*apply_tns)(INTFLOAT coef[1024], TemporalNoiseShaping *tns,360IndividualChannelStream *ics, int decode);361void (*windowing_and_mdct_ltp)(AACContext *ac, INTFLOAT *out,362INTFLOAT *in, IndividualChannelStream *ics);363void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);364void (*vector_pow43)(int *coefs, int len);365void (*subband_scale)(int *dst, int *src, int scale, int offset, int len);366367};368369void ff_aacdec_init_mips(AACContext *c);370371#endif /* AVCODEC_AAC_H */372373374