aacdec: add a decoder for AAC USAC (xHE-AAC)

This commit adds a decoder for the frequency-domain part of USAC. What works: - Mono - Stereo (no prediction) - Stereo (mid/side coding) - Stereo (complex prediction) What's left: - SBR - Speech coding Known issues: - Desync with certain sequences - Preroll crossover missing (shouldn't matter, bitrate adaptation only)
author: Lynne <dev@lynne.ee> 2024-05-16 11:36:12 +0200
committer: Lynne <dev@lynne.ee> 2024-06-02 18:34:45 +0200
commit: eee5fa08083c1df6d0210bf215b658bc3017f98d (patch)
tree: 8c222da326d48f19c395d7f631042f03d3dcb726
parent: 23b45d7e20b0f60c8c5a00c631b95aa0f9e19448 (diff)
download: ffmpeg-eee5fa08083c1df6d0210bf215b658bc3017f98d.tar.gz
13 files changed, 2510 insertions, 76 deletions
diff --git a/libavcodec/aac/Makefile b/libavcodec/aac/Makefile
index c3e525d373..70b1dca274 100644
--- a/libavcodec/aac/Makefile
+++ b/libavcodec/aac/Makefile
@@ -2,6 +2,7 @@ clean::
 		$(RM) $(CLEANSUFFIXES:%=libavcodec/aac/%)
 
 OBJS-$(CONFIG_AAC_DECODER)          +=  aac/aacdec.o aac/aacdec_tab.o \
-                                        aac/aacdec_float.o
+                                        aac/aacdec_float.o aac/aacdec_usac.o \
+                                        aac/aacdec_ac.o aac/aacdec_lpd.o
 OBJS-$(CONFIG_AAC_FIXED_DECODER)    +=  aac/aacdec.o aac/aacdec_tab.o \
                                         aac/aacdec_fixed.o
diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 6f37ac5361..2b8322fc68 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -40,6 +40,7 @@
 
 #include "aacdec.h"
 #include "aacdec_tab.h"
+#include "aacdec_usac.h"
 
 #include "libavcodec/aac.h"
 #include "libavcodec/aac_defines.h"
@@ -535,6 +536,8 @@ static av_cold void flush(AVCodecContext *avctx)
             }
         }
     }
+
+    ff_aac_usac_reset_state(ac, &ac->oc[1]);
 }
 
 /**
@@ -993,13 +996,14 @@ static int decode_eld_specific_config(AACDecContext *ac, AVCodecContext *avctx,
  */
 static int decode_audio_specific_config_gb(AACDecContext *ac,
                                            AVCodecContext *avctx,
-                                           MPEG4AudioConfig *m4ac,
+                                           OutputConfiguration *oc,
                                            GetBitContext *gb,
                                            int get_bit_alignment,
                                            int sync_extension)
 {
     int i, ret;
     GetBitContext gbc = *gb;
+    MPEG4AudioConfig *m4ac = &oc->m4ac;
     MPEG4AudioConfig m4ac_bak = *m4ac;
 
     if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
@@ -1033,14 +1037,22 @@ static int decode_audio_specific_config_gb(AACDecContext *ac,
     case AOT_ER_AAC_LC:
     case AOT_ER_AAC_LD:
         if ((ret = decode_ga_specific_config(ac, avctx, gb, get_bit_alignment,
-                                            m4ac, m4ac->chan_config)) < 0)
+                                             &oc->m4ac, m4ac->chan_config)) < 0)
             return ret;
         break;
     case AOT_ER_AAC_ELD:
         if ((ret = decode_eld_specific_config(ac, avctx, gb,
-                                              m4ac, m4ac->chan_config)) < 0)
+                                              &oc->m4ac, m4ac->chan_config)) < 0)
+            return ret;
+        break;
+#if CONFIG_AAC_DECODER
+    case AOT_USAC_NOSBR: /* fallthrough */
+    case AOT_USAC:
+        if ((ret = ff_aac_usac_config_decode(ac, avctx, gb,
+                                             oc, m4ac->chan_config)) < 0)
             return ret;
         break;
+#endif
     default:
         avpriv_report_missing_feature(avctx,
                                       "Audio object type %s%d",
@@ -1060,7 +1072,7 @@ static int decode_audio_specific_config_gb(AACDecContext *ac,
 
 static int decode_audio_specific_config(AACDecContext *ac,
                                         AVCodecContext *avctx,
-                                        MPEG4AudioConfig *m4ac,
+                                        OutputConfiguration *oc,
                                         const uint8_t *data, int64_t bit_size,
                                         int sync_extension)
 {
@@ -1080,7 +1092,7 @@ static int decode_audio_specific_config(AACDecContext *ac,
     if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
         return ret;
 
-    return decode_audio_specific_config_gb(ac, avctx, m4ac, &gb, 0,
+    return decode_audio_specific_config_gb(ac, avctx, oc, &gb, 0,
                                            sync_extension);
 }
 
@@ -1104,6 +1116,15 @@ static av_cold int decode_close(AVCodecContext *avctx)
 {
     AACDecContext *ac = avctx->priv_data;
 
+    for (int i = 0; i < 2; i++) {
+        OutputConfiguration *oc = &ac->oc[i];
+        AACUSACConfig *usac = &oc->usac;
+        for (int j = 0; j < usac->nb_elems; j++) {
+            AACUsacElemConfig *ec = &usac->elems[i];
+            av_freep(&ec->ext.pl_data);
+        }
+    }
+
     for (int type = 0; type < FF_ARRAY_ELEMS(ac->che); type++) {
         for (int i = 0; i < MAX_ELEM_ID; i++) {
             if (ac->che[type][i]) {
@@ -1181,7 +1202,7 @@ av_cold int ff_aac_decode_init(AVCodecContext *avctx)
     ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
 
     if (avctx->extradata_size > 0) {
-        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+        if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1],
                                                 avctx->extradata,
                                                 avctx->extradata_size * 8LL,
                                                 1)) < 0)
@@ -1549,9 +1570,16 @@ static int decode_pulses(Pulse *pulse, GetBitContext *gb,
 int ff_aac_decode_tns(AACDecContext *ac, TemporalNoiseShaping *tns,
                       GetBitContext *gb, const IndividualChannelStream *ics)
 {
+    int tns_max_order = INT32_MAX;
+    const int is_usac = ac->oc[1].m4ac.object_type == AOT_USAC ||
+                        ac->oc[1].m4ac.object_type == AOT_USAC_NOSBR;
     int w, filt, i, coef_len, coef_res, coef_compress;
     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
-    const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
+
+    /* USAC doesn't seem to have a limit */
+    if (!is_usac)
+        tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
+
     for (w = 0; w < ics->num_windows; w++) {
         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
             coef_res = get_bits1(gb);
@@ -1560,7 +1588,12 @@ int ff_aac_decode_tns(AACDecContext *ac, TemporalNoiseShaping *tns,
                 int tmp2_idx;
                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
 
-                if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
+                if (is_usac)
+                    tns->order[w][filt] = get_bits(gb, 4 - is8);
+                else
+                    tns->order[w][filt] = get_bits(gb, 5 - (2 * is8));
+
+                if (tns->order[w][filt] > tns_max_order) {
                     av_log(ac->avctx, AV_LOG_ERROR,
                            "TNS filter order %d is greater than maximum %d.\n",
                            tns->order[w][filt], tns_max_order);
@@ -1598,6 +1631,7 @@ static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
 {
     int idx;
     int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
+    cpe->max_sfb_ste = cpe->ch[0].ics.max_sfb;
     if (ms_present == 1) {
         for (idx = 0; idx < max_idx; idx++)
             cpe->ms_mask[idx] = get_bits1(gb);
@@ -2182,42 +2216,19 @@ static int aac_decode_er_frame(AVCodecContext *avctx, AVFrame *frame,
     return 0;
 }
 
-static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
-                                int *got_frame_ptr, GetBitContext *gb,
-                                const AVPacket *avpkt)
+static int decode_frame_ga(AVCodecContext *avctx, AACDecContext *ac,
+                           GetBitContext *gb, int *got_frame_ptr)
 {
-    AACDecContext *ac = avctx->priv_data;
-    ChannelElement *che = NULL, *che_prev = NULL;
+    int err;
+    int is_dmono;
+    int elem_id;
     enum RawDataBlockType elem_type, che_prev_type = TYPE_END;
-    int err, elem_id;
-    int samples = 0, multiplier, audio_found = 0, pce_found = 0;
-    int is_dmono, sce_count = 0;
-    int payload_alignment;
     uint8_t che_presence[4][MAX_ELEM_ID] = {{0}};
+    ChannelElement *che = NULL, *che_prev = NULL;
+    int samples = 0, multiplier, audio_found = 0, pce_found = 0, sce_count = 0;
+    AVFrame *frame = ac->frame;
 
-    ac->frame = frame;
-
-    if (show_bits(gb, 12) == 0xfff) {
-        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
-            goto fail;
-        }
-        if (ac->oc[1].m4ac.sampling_index > 12) {
-            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
-    }
-
-    if ((err = frame_configure_elements(avctx)) < 0)
-        goto fail;
-
-    // The AV_PROFILE_AAC_* defines are all object_type - 1
-    // This may lead to an undefined profile being signaled
-    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
-
-    payload_alignment = get_bits_count(gb);
-    ac->tags_mapped = 0;
+    int payload_alignment = get_bits_count(gb);
     // parse
     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
         elem_id = get_bits(gb, 4);
@@ -2225,28 +2236,23 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
         if (avctx->debug & FF_DEBUG_STARTCODE)
             av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id);
 
-        if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE) {
-            err = AVERROR_INVALIDDATA;
-            goto fail;
-        }
+        if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE)
+            return AVERROR_INVALIDDATA;
 
         if (elem_type < TYPE_DSE) {
             if (che_presence[elem_type][elem_id]) {
                 int error = che_presence[elem_type][elem_id] > 1;
                 av_log(ac->avctx, error ? AV_LOG_ERROR : AV_LOG_DEBUG, "channel element %d.%d duplicate\n",
                        elem_type, elem_id);
-                if (error) {
-                    err = AVERROR_INVALIDDATA;
-                    goto fail;
-                }
+                if (error)
+                    return AVERROR_INVALIDDATA;
             }
             che_presence[elem_type][elem_id]++;
 
             if (!(che=ff_aac_get_che(ac, elem_type, elem_id))) {
                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
                        elem_type, elem_id);
-                err = AVERROR_INVALIDDATA;
-                goto fail;
+                return AVERROR_INVALIDDATA;
             }
             samples = ac->oc[1].m4ac.frame_length_short ? 960 : 1024;
             che->present = 1;
@@ -2283,10 +2289,8 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
             int tags;
 
             int pushed = push_output_configuration(ac);
-            if (pce_found && !pushed) {
-                err = AVERROR_INVALIDDATA;
-                goto fail;
-            }
+            if (pce_found && !pushed)
+                return AVERROR_INVALIDDATA;
 
             tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb,
                               payload_alignment);
@@ -2312,8 +2316,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
                 elem_id += get_bits(gb, 8) - 1;
             if (get_bits_left(gb) < 8 * elem_id) {
                     av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
-                    err = AVERROR_INVALIDDATA;
-                    goto fail;
+                    return AVERROR_INVALIDDATA;
             }
             err = 0;
             while (elem_id > 0) {
@@ -2337,19 +2340,16 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
         }
 
         if (err)
-            goto fail;
+            return err;
 
         if (get_bits_left(gb) < 3) {
             av_log(avctx, AV_LOG_ERROR, overread_err);
-            err = AVERROR_INVALIDDATA;
-            goto fail;
+            return AVERROR_INVALIDDATA;
         }
     }
 
-    if (!avctx->ch_layout.nb_channels) {
-        *got_frame_ptr = 0;
+    if (!avctx->ch_layout.nb_channels)
         return 0;
-    }
 
     multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
     samples <<= multiplier;
@@ -2364,16 +2364,17 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
 
     if (!ac->frame->data[0] && samples) {
         av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
-        err = AVERROR_INVALIDDATA;
-        goto fail;
+        return AVERROR_INVALIDDATA;
     }
 
     if (samples) {
         ac->frame->nb_samples = samples;
         ac->frame->sample_rate = avctx->sample_rate;
-    } else
+        *got_frame_ptr = 1;
+    } else {
         av_frame_unref(ac->frame);
-    *got_frame_ptr = !!samples;
+        *got_frame_ptr = 0;
+    }
 
     /* for dual-mono audio (SCE + SCE) */
     is_dmono = ac->dmono_mode && sce_count == 2 &&
@@ -2387,6 +2388,59 @@ static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
     }
 
     return 0;
+}
+
+static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
+                                int *got_frame_ptr, GetBitContext *gb,
+                                const AVPacket *avpkt)
+{
+    int err;
+    AACDecContext *ac = avctx->priv_data;
+
+    ac->frame = frame;
+    *got_frame_ptr = 0;
+
+    if (show_bits(gb, 12) == 0xfff) {
+        if ((err = parse_adts_frame_header(ac, gb)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
+            goto fail;
+        }
+        if (ac->oc[1].m4ac.sampling_index > 12) {
+            av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
+            err = AVERROR_INVALIDDATA;
+            goto fail;
+        }
+    }
+
+    if ((err = frame_configure_elements(avctx)) < 0)
+        goto fail;
+
+    // The AV_PROFILE_AAC_* defines are all object_type - 1
+    // This may lead to an undefined profile being signaled
+    ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
+
+    ac->tags_mapped = 0;
+
+    if ((ac->oc[1].m4ac.object_type == AOT_USAC) ||
+        (ac->oc[1].m4ac.object_type == AOT_USAC_NOSBR)) {
+        if (ac->is_fixed) {
+            avpriv_report_missing_feature(ac->avctx,
+                                          "AAC USAC fixed-point decoding");
+            return AVERROR_PATCHWELCOME;
+        }
+#if CONFIG_AAC_DECODER
+        err = ff_aac_usac_decode_frame(avctx, ac, gb, got_frame_ptr);
+        if (err < 0)
+            goto fail;
+#endif
+    } else {
+        err = decode_frame_ga(avctx, ac, gb, got_frame_ptr);
+        if (err < 0)
+            goto fail;
+    }
+
+    return err;
+
 fail:
     pop_output_configuration(ac);
     return err;
@@ -2414,7 +2468,7 @@ static int aac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
     if (new_extradata) {
         /* discard previous configuration */
         ac->oc[1].status = OC_NONE;
-        err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
+        err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1],
                                            new_extradata,
                                            new_extradata_size * 8LL, 1);
         if (err < 0) {
diff --git a/libavcodec/aac/aacdec.h b/libavcodec/aac/aacdec.h
index 8d1eb74066..ee21a94007 100644
--- a/libavcodec/aac/aacdec.h
+++ b/libavcodec/aac/aacdec.h
@@ -42,6 +42,8 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpeg4audio.h"
 
+#include "aacdec_ac.h"
+
 typedef struct AACDecContext AACDecContext;
 
 /**
@@ -69,6 +71,32 @@ enum CouplingPoint {
     AFTER_IMDCT = 3,
 };
 
+enum AACUsacElem {
+    ID_USAC_SCE = 0,
+    ID_USAC_CPE = 1,
+    ID_USAC_LFE = 2,
+    ID_USAC_EXT = 3,
+};
+
+enum ExtensionHeaderType {
+    ID_CONFIG_EXT_FILL = 0,
+    ID_CONFIG_EXT_LOUDNESS_INFO = 2,
+    ID_CONFIG_EXT_STREAM_ID = 7,
+};
+
+enum AACUsacExtension {
+    ID_EXT_ELE_FILL,
+    ID_EXT_ELE_MPEGS,
+    ID_EXT_ELE_SAOC,
+    ID_EXT_ELE_AUDIOPREROLL,
+    ID_EXT_ELE_UNI_DRC,
+};
+
+enum AACUSACLoudnessExt {
+    UNIDRCLOUDEXT_TERM = 0x0,
+    UNIDRCLOUDEXT_EQ = 0x1,
+};
+
 // Supposed to be equal to AAC_RENAME() in case of USE_FIXED.
 #define RENAME_FIXED(name) name ## _fixed
 
@@ -93,6 +121,40 @@ typedef struct LongTermPrediction {
     int8_t used[MAX_LTP_LONG_SFB];
 } LongTermPrediction;
 
+/* Per channel core mode */
+typedef struct AACUsacElemData {
+    uint8_t core_mode;
+    uint8_t scale_factor_grouping;
+
+    /* Timewarping ratio */
+#define NUM_TW_NODES 16
+    uint8_t tw_ratio[NUM_TW_NODES];
+
+    struct {
+        uint8_t acelp_core_mode : 3;
+        uint8_t lpd_mode : 5;
+
+        uint8_t bpf_control_info : 1;
+        uint8_t core_mode_last : 1;
+        uint8_t fac_data_present : 1;
+
+        int last_lpd_mode;
+    } ldp;
+
+    struct {
+        unsigned int seed;
+        uint8_t level : 3;
+        uint8_t offset : 5;
+    } noise;
+
+    struct {
+        uint8_t gain;
+        uint32_t kv[8 /* (1024 / 16) / 8 */][8];
+    } fac;
+
+    AACArithState ac;
+} AACUsacElemData;
+
 /**
  * Individual Channel Stream
  */
@@ -145,11 +207,13 @@ typedef struct ChannelCoupling {
  */
 typedef struct SingleChannelElement {
     IndividualChannelStream ics;
+    AACUsacElemData ue;                             ///< USAC element data
     TemporalNoiseShaping tns;
     enum BandType band_type[128];                   ///< band types
     int sfo[128];                                   ///< scalefactor offsets
     INTFLOAT_UNION(sf, [128]);                      ///< scalefactors (8 windows * 16 sfb max)
     INTFLOAT_ALIGNED_UNION(32, coeffs,    1024);    ///< coefficients for IMDCT, maybe processed
+    INTFLOAT_ALIGNED_UNION(32, prev_coeffs, 1024);  ///< unscaled previous contents of coeffs[] for USAC
     INTFLOAT_ALIGNED_UNION(32, saved,     1536);    ///< overlap
     INTFLOAT_ALIGNED_UNION(32, ret_buf,   2048);    ///< PCM output buffer
     INTFLOAT_ALIGNED_UNION(16, ltp_state, 3072);    ///< time signal for LTP
@@ -163,25 +227,148 @@ typedef struct SingleChannelElement {
     };
 } SingleChannelElement;
 
+typedef struct AACUsacStereo {
+    uint8_t common_window;
+    uint8_t common_tw;
+
+    uint8_t ms_mask_mode;
+    uint8_t config_idx;
+
+    /* Complex prediction */
+    uint8_t use_prev_frame;
+    uint8_t pred_dir;
+    uint8_t complex_coef;
+
+    uint8_t pred_used[128];
+
+    INTFLOAT_ALIGNED_UNION(32, alpha_q_re, 1024);
+    INTFLOAT_ALIGNED_UNION(32, alpha_q_im, 1024);
+    INTFLOAT_ALIGNED_UNION(32, prev_alpha_q_re, 1024);
+    INTFLOAT_ALIGNED_UNION(32, prev_alpha_q_im, 1024);
+
+    INTFLOAT_ALIGNED_UNION(32, dmix_re, 1024);
+    INTFLOAT_ALIGNED_UNION(32, prev_dmix_re, 1024); /* Recalculated on every frame */
+    INTFLOAT_ALIGNED_UNION(32, dmix_im, 1024); /* Final prediction data */
+} AACUsacStereo;
+
 /**
  * channel element - generic struct for SCE/CPE/CCE/LFE
  */
 typedef struct ChannelElement {
     int present;
     // CPE specific
+    uint8_t max_sfb_ste;      ///< (USAC) Maximum of both max_sfb values
     uint8_t ms_mask[128];     ///< Set if mid/side stereo is used for each scalefactor window band
     // shared
     SingleChannelElement ch[2];
     // CCE specific
     ChannelCoupling coup;
+    // USAC stereo coupling data
+    AACUsacStereo us;
 } ChannelElement;
 
+typedef struct AACUSACLoudnessInfo {
+    uint8_t drc_set_id : 6;
+    uint8_t downmix_id : 7;
+    struct {
+        uint16_t lvl : 12;
+        uint8_t present : 1;
+    } sample_peak;
+
+    struct {
+        uint16_t lvl : 12;
+        uint8_t measurement : 4;
+        uint8_t reliability : 2;
+        uint8_t present : 1;
+    } true_peak;
+
+    uint8_t nb_measurements : 4;
+    struct {
+        uint8_t method_def : 4;
+        uint8_t method_val;
+        uint8_t measurement : 4;
+        uint8_t reliability : 2;
+    } measurements[16];
+} AACUSACLoudnessInfo;
+
+typedef struct AACUsacElemConfig {
+    enum AACUsacElem type;
+
+    uint8_t tw_mdct : 1;
+    uint8_t noise_fill : 1;
+
+    uint8_t stereo_config_index;
+
+    struct {
+        int ratio;
+
+        uint8_t harmonic_sbr : 1; /* harmonicSBR */
+        uint8_t bs_intertes : 1; /* bs_interTes */
+        uint8_t bs_pvc : 1; /* bs_pvc */
+
+        struct {
+            uint8_t start_freq; /* dflt_start_freq */
+            uint8_t stop_freq; /* dflt_stop_freq */
+
+            uint8_t freq_scale; /* dflt_freq_scale */
+            uint8_t alter_scale : 1; /* dflt_alter_scale */
+            uint8_t noise_scale; /* dflt_noise_scale */
+
+            uint8_t limiter_bands; /* dflt_limiter_bands */
+            uint8_t limiter_gains; /* dflt_limiter_gains */
+            uint8_t interpol_freq : 1; /* dflt_interpol_freq */
+            uint8_t smoothing_mode : 1; /* dflt_smoothing_mode */
+        } dflt;
+    } sbr;
+
+    struct {
+        uint8_t freq_res; /* bsFreqRes */
+        uint8_t fixed_gain; /* bsFixedGainDMX */
+        uint8_t temp_shape_config; /* bsTempShapeConfig */
+        uint8_t decorr_config; /* bsDecorrConfig */
+        uint8_t high_rate_mode : 1; /* bsHighRateMode */
+        uint8_t phase_coding : 1; /* bsPhaseCoding */
+
+        uint8_t otts_bands_phase; /* bsOttBandsPhase */
+        uint8_t residual_coding; /* bsResidualCoding */
+        uint8_t residual_bands; /* bsResidualBands */
+        uint8_t pseudo_lr : 1; /* bsPseudoLr */
+        uint8_t env_quant_mode : 1; /* bsEnvQuantMode */
+    } mps;
+
+    struct {
+        enum AACUsacExtension type;
+        uint8_t payload_frag;
+        uint32_t default_len;
+        uint32_t pl_data_offset;
+        uint8_t *pl_data;
+    } ext;
+} AACUsacElemConfig;
+
+typedef struct AACUSACConfig {
+    uint8_t core_sbr_frame_len_idx; /* coreSbrFrameLengthIndex */
+    uint8_t rate_idx;
+    uint16_t core_frame_len;
+    uint16_t stream_identifier;
+
+    AACUsacElemConfig elems[64];
+    int nb_elems;
+
+    struct {
+        uint8_t nb_album;
+        AACUSACLoudnessInfo album_info[64];
+        uint8_t nb_info;
+        AACUSACLoudnessInfo info[64];
+    } loudness;
+} AACUSACConfig;
+
 typedef struct OutputConfiguration {
     MPEG4AudioConfig m4ac;
     uint8_t layout_map[MAX_ELEM_ID*4][3];
     int layout_map_tags;
     AVChannelLayout ch_layout;
     enum OCStatus status;
+    AACUSACConfig usac;
 } OutputConfiguration;
 
 /**
diff --git a/libavcodec/aac/aacdec_ac.c b/libavcodec/aac/aacdec_ac.c
new file mode 100644
index 0000000000..7e5077cd19
--- /dev/null
+++ b/libavcodec/aac/aacdec_ac.c
@@ -0,0 +1,208 @@
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2024 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/aactab.h"
+#include "aacdec_ac.h"
+
+uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int N)
+{
+    float ratio;
+    if (reset) {
+        memset(state->last, 0, sizeof(state->last));
+        state->last_len = N;
+    } else if (state->last_len != N) {
+        int i;
+        uint8_t last[512 /* 2048 / 4 */];
+        memcpy(last, state->last, sizeof(last));
+
+        ratio = state->last_len / (float)N;
+        for (i = 0; i < N/2; i++) {
+            int k = (int)(i * ratio);
+            state->last[i] = last[k];
+        }
+
+        for (; i < FF_ARRAY_ELEMS(state->last); i++)
+            state->last[i] = 0;
+
+        state->last_len = N;
+    }
+
+    state->cur[3] = 0;
+    state->cur[2] = 0;
+    state->cur[1] = 0;
+    state->cur[0] = 1;
+
+    state->state_pre = state->last[0] << 12;
+    return state->last[0] << 12;
+}
+
+uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t c, int i, int N)
+{
+    c = state->state_pre >> 8;
+    c = c + (state->last[i + 1] << 8);
+    c = (c << 4);
+    c += state->cur[1];
+
+    state->state_pre = c;
+
+    if (i > 3 &&
+        ((state->cur[3] + state->cur[2] + state->cur[1]) < 5))
+        return c + 0x10000;
+
+    return c;
+}
+
+uint32_t ff_aac_ac_get_pk(uint32_t c)
+{
+    int i_min = -1;
+    int i, j;
+    int i_max = FF_ARRAY_ELEMS(ff_aac_ac_lookup_m) - 1;
+    while ((i_max - i_min) > 1) {
+        i = i_min + ((i_max - i_min) / 2);
+        j = ff_aac_ac_hash_m[i];
+        if (c < (j >> 8))
+            i_max = i;
+        else if (c > (j >> 8))
+            i_min = i;
+        else
+            return (j & 0xFF);
+    }
+    return ff_aac_ac_lookup_m[i_max];
+}
+
+void ff_aac_ac_update_context(AACArithState *state, int idx,
+                              uint16_t a, uint16_t b)
+{
+    state->cur[0] = a + b + 1;
+    if (state->cur[0] > 0xF)
+        state->cur[0] = 0xF;
+
+    state->cur[3] = state->cur[2];
+    state->cur[2] = state->cur[1];
+    state->cur[1] = state->cur[0];
+
+    state->last[idx] = state->cur[0];
+}
+
+/* Initialize AC */
+void ff_aac_ac_init(AACArith *ac, GetBitContext *gb)
+{
+    ac->low = 0;
+    ac->high = UINT16_MAX;
+    ac->val = get_bits(gb, 16);
+}
+
+uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
+                          const uint16_t *cdf, uint16_t cdf_len)
+{
+    int val = ac->val;
+    int low = ac->low;
+    int high = ac->high;
+
+    int sym;
+    int rng = high - low + 1;
+    int c = ((((int)(val - low + 1)) << 14) - ((int)1));
+
+    const uint16_t *p = cdf - 1;
+
+    /* One for each possible CDF length in the spec */
+    switch (cdf_len) {
+    case 2:
+        if ((p[1] * rng) > c)
+            p += 1;
+        break;
+    case 4:
+        if ((p[2] * rng) > c)
+            p += 2;
+        if ((p[1] * rng) > c)
+            p += 1;
+        break;
+    case 17:
+        /* First check if the current probability is even met at all */
+        if ((p[1] * rng) <= c)
+            break;
+        p += 1;
+        for (int i = 8; i >= 1; i >>= 1)
+            if ((p[i] * rng) > c)
+                p += i;
+        break;
+    case 27:
+        if ((p[16] * rng) > c)
+            p += 16;
+        if ((p[8] * rng) > c)
+            p += 8;
+        if (p != (cdf - 1 + 24))
+            if ((p[4] * rng) > c)
+                p += 4;
+        if ((p[2] * rng) > c)
+            p += 2;
+
+        if (p != (cdf - 1 + 24 + 2))
+            if ((p[1] * rng) > c)
+                p += 1;
+        break;
+    default:
+        /* This should never happen */
+        av_assert2(0);
+    }
+
+    sym = (int)((ptrdiff_t)(p - cdf)) + 1;
+    if (sym)
+        high = low + ((rng * cdf[sym - 1]) >> 14) - 1;
+    low += (rng * cdf[sym]) >> 14;
+
+    /* This loop could be done faster */
+    while (1) {
+        if (high < 32768) {
+            ;
+        } else if (low >= 32768) {
+            val -= 32768;
+            low -= 32768;
+            high -= 32768;
+        } else if (low >= 16384 && high < 49152) {
+            val -= 16384;
+            low -= 16384;
+            high -= 16384;
+        } else {
+            break;
+        }
+        low += low;
+        high += high + 1;
+        val = (val << 1) | get_bits1(gb);
+    };
+
+    ac->low = low;
+    ac->high = high;
+    ac->val = val;
+
+    return sym;
+}
+
+void ff_aac_ac_finish(AACArithState *state, int offset, int N)
+{
+    int i;
+
+    for (i = offset; i < N/2; i++)
+        state->last[i] = 1;
+
+    for (; i < FF_ARRAY_ELEMS(state->last); i++)
+        state->last[i] = 0;
+}
diff --git a/libavcodec/aac/aacdec_ac.h b/libavcodec/aac/aacdec_ac.h
new file mode 100644
index 0000000000..0b98c0f0d9
--- /dev/null
+++ b/libavcodec/aac/aacdec_ac.h
@@ -0,0 +1,54 @@
+/*
+ * AAC definitions and structures
+ * Copyright (c) 2024 Lynne
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_AC_H
+#define AVCODEC_AAC_AACDEC_AC_H
+
+#include "libavcodec/get_bits.h"
+
+typedef struct AACArithState {
+    uint8_t last[512 /* 2048 / 4 */];
+    int last_len;
+    uint8_t cur[4];
+    uint16_t state_pre;
+} AACArithState;
+
+typedef struct AACArith {
+    uint16_t low;
+    uint16_t high;
+    uint16_t val;
+} AACArith;
+
+#define FF_AAC_AC_ESCAPE 16
+
+uint32_t ff_aac_ac_map_process(AACArithState *state, int reset, int len);
+uint32_t ff_aac_ac_get_context(AACArithState *state, uint32_t old_c, int idx, int len);
+uint32_t ff_aac_ac_get_pk(uint32_t c);
+
+void ff_aac_ac_update_context(AACArithState *state, int idx, uint16_t a, uint16_t b);
+void ff_aac_ac_init(AACArith *ac, GetBitContext *gb);
+
+uint16_t ff_aac_ac_decode(AACArith *ac, GetBitContext *gb,
+                          const uint16_t *cdf, uint16_t cdf_len);
+
+void ff_aac_ac_finish(AACArithState *state, int offset, int nb);
+
+#endif /* AVCODEC_AACDEC_AC_H */
diff --git a/libavcodec/aac/aacdec_dsp_template.c b/libavcodec/aac/aacdec_dsp_template.c
index 59a69d88f3..8d31af22f8 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -88,8 +88,8 @@ static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement
     INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
     const uint16_t *offsets = ics->swb_offset;
     for (int g = 0; g < ics->num_window_groups; g++) {
-        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
-            const int idx = g*ics->max_sfb + sfb;
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            const int idx = g*cpe->max_sfb_ste + sfb;
             if (cpe->ms_mask[idx] &&
                 cpe->ch[0].band_type[idx] < NOISE_BT &&
                 cpe->ch[1].band_type[idx] < NOISE_BT) {
diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
index e40a2fe1a7..047c11e0fb 100644
--- a/libavcodec/aac/aacdec_latm.h
+++ b/libavcodec/aac/aacdec_latm.h
@@ -56,7 +56,8 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
 {
     AACDecContext *ac     = &latmctx->aac_ctx;
     AVCodecContext *avctx = ac->avctx;
-    MPEG4AudioConfig m4ac = { 0 };
+    OutputConfiguration oc = { 0 };
+    MPEG4AudioConfig *m4ac = &oc.m4ac;
     GetBitContext gbc;
     int config_start_bit  = get_bits_count(gb);
     int sync_extension    = 0;
@@ -76,7 +77,7 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
     if (get_bits_left(gb) <= 0)
         return AVERROR_INVALIDDATA;
 
-    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
+    bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &oc,
                                                     &gbc, config_start_bit,
                                                     sync_extension);
 
@@ -88,11 +89,12 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
       asclen = bits_consumed;
 
     if (!latmctx->initialized ||
-        ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
-        ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
+        ac->oc[1].m4ac.sample_rate != m4ac->sample_rate ||
+        ac->oc[1].m4ac.chan_config != m4ac->chan_config) {
 
         if (latmctx->initialized) {
-            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
+            av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n",
+                   m4ac->sample_rate, m4ac->chan_config);
         } else {
             av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
         }
@@ -280,7 +282,7 @@ static int latm_decode_frame(AVCodecContext *avctx, AVFrame *out,
         } else {
             push_output_configuration(&latmctx->aac_ctx);
             if ((err = decode_audio_specific_config(
-                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
+                    &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1],
                     avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
                 pop_output_configuration(&latmctx->aac_ctx);
                 return err;
diff --git a/libavcodec/aac/aacdec_lpd.c b/libavcodec/aac/aacdec_lpd.c
new file mode 100644
index 0000000000..796edd2ab5
--- /dev/null
+++ b/libavcodec/aac/aacdec_lpd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec_lpd.h"
+#include "aacdec_usac.h"
+#include "libavcodec/unary.h"
+
+const uint8_t ff_aac_lpd_mode_tab[32][4] = {
+    { 0, 0, 0, 0 },
+    { 1, 0, 0, 0 },
+    { 0, 1, 0, 0 },
+    { 1, 1, 0, 0 },
+    { 0, 0, 1, 0 },
+    { 1, 0, 1, 0 },
+    { 0, 1, 1, 0 },
+    { 1, 1, 1, 0 },
+    { 0, 0, 0, 1 },
+    { 1, 0, 0, 1 },
+    { 0, 1, 0, 1 },
+    { 1, 1, 0, 1 },
+    { 0, 0, 1, 1 },
+    { 1, 0, 1, 1 },
+    { 0, 1, 1, 1 },
+    { 1, 1, 1, 1 },
+    { 2, 2, 0, 0 },
+    { 2, 2, 1, 0 },
+    { 2, 2, 0, 1 },
+    { 2, 2, 1, 1 },
+    { 0, 0, 2, 2 },
+    { 1, 0, 2, 2 },
+    { 0, 1, 2, 2 },
+    { 1, 1, 2, 2 },
+    { 2, 2, 2, 2 },
+    { 3, 3, 3, 3 },
+    /* Larger values are reserved, but permit them for resilience */
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+    { 0, 0, 0, 0 },
+};
+
+static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn)
+{
+    if (nk_mode == 1) {
+        for (int k = 0; k < no_qn; k++) {
+            qn[k] = get_unary(gb, 0, INT32_MAX); // TODO: find proper ranges
+            if (qn[k])
+                qn[k]++;
+        }
+        return;
+    }
+
+    for (int k = 0; k < no_qn; k++)
+        qn[k] = get_bits(gb, 2) + 2;
+
+    if (nk_mode == 2) {
+        for (int k = 0; k < no_qn; k++) {
+            if (qn[k] > 4) {
+                qn[k] = get_unary(gb, 0, INT32_MAX);;
+                if (qn[k])
+                    qn[k] += 4;
+            }
+        }
+        return;
+    }
+
+    for (int k = 0; k < no_qn; k++) {
+        if (qn[k] > 4) {
+            int qn_ext = get_unary(gb, 0, INT32_MAX);;
+            switch (qn_ext) {
+            case 0: qn[k] = 5; break;
+            case 1: qn[k] = 6; break;
+            case 2: qn[k] = 0; break;
+            default: qn[k] = qn_ext + 4; break;
+            }
+        }
+    }
+}
+
+static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv,
+                              int nk_mode, int no_qn)
+{
+    int idx, n, nk;
+
+    int qn[2];
+    parse_qn(gb, qn, nk_mode, no_qn);
+
+    for (int k = 0; k < no_qn; k++) {
+        if (qn[k] > 4) {
+            nk = (qn[k] - 3) / 2;
+            n = qn[k] - nk*2;
+        } else {
+            nk = 0;
+            n = qn[k];
+        }
+    }
+
+    idx = get_bits(gb, 4*n);
+
+    if (nk > 0)
+        for (int i = 0; i < 8; i++)
+            kv[i] = get_bits(gb, nk);
+
+    return 0;
+}
+
+int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
+                          int use_gain, int len)
+{
+    int ret;
+    if (use_gain)
+        ce->fac.gain = get_bits(gb, 7);
+
+    for (int i = 0; i < len/8; i++) {
+        ret = parse_codebook_idx(gb, ce->fac.kv[i], 1, 1);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
+                                    AACUsacElemData *ce, GetBitContext *gb)
+{
+    int k;
+    const uint8_t *mod;
+    int first_ldp_flag;
+    int first_tcx_flag;
+
+    ce->ldp.acelp_core_mode = get_bits(gb, 3);
+    ce->ldp.lpd_mode = get_bits(gb, 5);
+
+    ce->ldp.bpf_control_info = get_bits1(gb);
+    ce->ldp.core_mode_last = get_bits1(gb);
+    ce->ldp.fac_data_present = get_bits1(gb);
+
+    mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode];
+
+    first_ldp_flag = !ce->ldp.core_mode_last;
+    first_tcx_flag = 1;
+    if (first_ldp_flag)
+        ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */
+
+    k = 0;
+    while (k < 0) {
+        if (!k) {
+            if (ce->ldp.core_mode_last && ce->ldp.fac_data_present)
+                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
+        } else {
+            if (!ce->ldp.last_lpd_mode && mod[k] > 0 ||
+                ce->ldp.last_lpd_mode && !mod[k])
+                ff_aac_parse_fac_data(ce, gb, 0, usac->core_frame_len/8);
+        }
+        if (!mod[k]) {
+//            parse_acelp_coding();
+            ce->ldp.last_lpd_mode = 0;
+            k++;
+        } else {
+//            parse_tcx_coding();
+            ce->ldp.last_lpd_mode = mod[k];
+            k += (1 << (mod[k] - 1));
+            first_tcx_flag = 0;
+        }
+    }
+
+//    parse_lpc_data(first_lpd_flag);
+
+    if (!ce->ldp.core_mode_last && ce->ldp.fac_data_present) {
+        uint16_t len_8 = usac->core_frame_len / 8;
+        uint16_t len_16 = usac->core_frame_len / 16;
+        uint16_t fac_len = get_bits1(gb) /* short_fac_flag */ ? len_8 : len_16;
+        int ret = ff_aac_parse_fac_data(ce, gb, 1, fac_len);
+        if (ret < 0)
+            return ret;
+    }
+
+    return 0;
+}
diff --git a/libavcodec/aac/aacdec_lpd.h b/libavcodec/aac/aacdec_lpd.h
new file mode 100644
index 0000000000..924ff75e52
--- /dev/null
+++ b/libavcodec/aac/aacdec_lpd.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_LPD_H
+#define AVCODEC_AAC_AACDEC_LPD_H
+
+#include "aacdec.h"
+#include "libavcodec/get_bits.h"
+
+int ff_aac_parse_fac_data(AACUsacElemData *ce, GetBitContext *gb,
+                          int use_gain, int len);
+
+int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac,
+                                    AACUsacElemData *ce, GetBitContext *gb);
+
+#endif /* AVCODEC_AAC_AACDEC_LPD_H */
diff --git a/libavcodec/aac/aacdec_usac.c b/libavcodec/aac/aacdec_usac.c
new file mode 100644
index 0000000000..c3c9137a2e
--- /dev/null
+++ b/libavcodec/aac/aacdec_usac.c
@@ -0,0 +1,1608 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "aacdec_usac.h"
+#include "aacdec_tab.h"
+#include "aacdec_lpd.h"
+#include "aacdec_ac.h"
+
+#include "libavcodec/aactab.h"
+#include "libavutil/mem.h"
+#include "libavcodec/mpeg4audio.h"
+#include "libavcodec/unary.h"
+
+/* Number of scalefactor bands per complex prediction band, equal to 2. */
+#define SFB_PER_PRED_BAND 2
+
+static inline uint32_t get_escaped_value(GetBitContext *gb, int nb1, int nb2, int nb3)
+{
+    uint32_t val = get_bits(gb, nb1), val2;
+    if (val < ((1 << nb1) - 1))
+        return val;
+
+    val += val2 = get_bits(gb, nb2);
+    if (val2 == ((1 << nb2) - 1))
+        val += get_bits(gb, nb3);
+
+    return val;
+}
+
+/* ISO/IEC 23003-3, Table 74 — bsOutputChannelPos */
+static const enum AVChannel usac_ch_pos_to_av[64] = {
+    [0] = AV_CHAN_FRONT_LEFT,
+    [1] = AV_CHAN_FRONT_RIGHT,
+    [2] = AV_CHAN_FRONT_CENTER,
+    [3] = AV_CHAN_LOW_FREQUENCY,
+    [4] = AV_CHAN_SIDE_LEFT, // +110 degrees, Ls|LS|kAudioChannelLabel_LeftSurround
+    [5] = AV_CHAN_SIDE_RIGHT, // -110 degrees, Rs|RS|kAudioChannelLabel_RightSurround
+    [6] = AV_CHAN_FRONT_LEFT_OF_CENTER,
+    [7] = AV_CHAN_FRONT_RIGHT_OF_CENTER,
+    [8] = AV_CHAN_BACK_LEFT, // +135 degrees, Lsr|BL|kAudioChannelLabel_RearSurroundLeft
+    [9] = AV_CHAN_BACK_RIGHT, // -135 degrees, Rsr|BR|kAudioChannelLabel_RearSurroundRight
+    [10] = AV_CHAN_BACK_CENTER,
+    [11] = AV_CHAN_SURROUND_DIRECT_LEFT,
+    [12] = AV_CHAN_SURROUND_DIRECT_RIGHT,
+    [13] = AV_CHAN_SIDE_SURROUND_LEFT, // +90 degrees, Lss|SL|kAudioChannelLabel_LeftSideSurround
+    [14] = AV_CHAN_SIDE_SURROUND_RIGHT, // -90 degrees, Rss|SR|kAudioChannelLabel_RightSideSurround
+    [15] = AV_CHAN_WIDE_LEFT, // +60 degrees, Lw|FLw|kAudioChannelLabel_LeftWide
+    [16] = AV_CHAN_WIDE_RIGHT, // -60 degrees, Rw|FRw|kAudioChannelLabel_RightWide
+    [17] = AV_CHAN_TOP_FRONT_LEFT,
+    [18] = AV_CHAN_TOP_FRONT_RIGHT,
+    [19] = AV_CHAN_TOP_FRONT_CENTER,
+    [20] = AV_CHAN_TOP_BACK_LEFT,
+    [21] = AV_CHAN_TOP_BACK_RIGHT,
+    [22] = AV_CHAN_TOP_BACK_CENTER,
+    [23] = AV_CHAN_TOP_SIDE_LEFT,
+    [24] = AV_CHAN_TOP_SIDE_RIGHT,
+    [25] = AV_CHAN_TOP_CENTER,
+    [26] = AV_CHAN_LOW_FREQUENCY_2,
+    [27] = AV_CHAN_BOTTOM_FRONT_LEFT,
+    [28] = AV_CHAN_BOTTOM_FRONT_RIGHT,
+    [29] = AV_CHAN_BOTTOM_FRONT_CENTER,
+    [30] = AV_CHAN_TOP_SURROUND_LEFT, ///< +110 degrees, Lvs, TpLS
+    [31] = AV_CHAN_TOP_SURROUND_RIGHT, ///< -110 degrees, Rvs, TpRS
+};
+
+static int decode_loudness_info(AACDecContext *ac, AACUSACLoudnessInfo *info,
+                                GetBitContext *gb)
+{
+    info->drc_set_id = get_bits(gb, 6);
+    info->downmix_id = get_bits(gb, 7);
+
+    if ((info->sample_peak.present = get_bits1(gb))) /* samplePeakLevelPresent */
+        info->sample_peak.lvl = get_bits(gb, 12);
+
+    if ((info->true_peak.present = get_bits1(gb))) { /* truePeakLevelPresent */
+        info->true_peak.lvl = get_bits(gb, 12);
+        info->true_peak.measurement = get_bits(gb, 4);
+        info->true_peak.reliability = get_bits(gb, 2);
+    }
+
+    info->nb_measurements = get_bits(gb, 4);
+    for (int i = 0; i < info->nb_measurements; i++) {
+        info->measurements[i].method_def = get_bits(gb, 4);
+        info->measurements[i].method_val = get_unary(gb, 0, 8);
+        info->measurements[i].measurement = get_bits(gb, 4);
+        info->measurements[i].reliability = get_bits(gb, 2);
+    }
+
+    return 0;
+}
+
+static int decode_loudness_set(AACDecContext *ac, AACUSACConfig *usac,
+                               GetBitContext *gb)
+{
+    int ret;
+
+    usac->loudness.nb_album = get_bits(gb, 6); /* loudnessInfoAlbumCount */
+    usac->loudness.nb_info = get_bits(gb, 6); /* loudnessInfoCount */
+
+    for (int i = 0; i < usac->loudness.nb_album; i++) {
+        ret = decode_loudness_info(ac, &usac->loudness.album_info[i], gb);
+        if (ret < 0)
+            return ret;
+    }
+
+    for (int i = 0; i < usac->loudness.nb_info; i++) {
+        ret = decode_loudness_info(ac, &usac->loudness.info[i], gb);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (get_bits1(gb)) { /* loudnessInfoSetExtPresent */
+        enum AACUSACLoudnessExt type;
+        while ((type = get_bits(gb, 4)) != UNIDRCLOUDEXT_TERM) {
+            uint8_t size_bits = get_bits(gb, 4) + 4;
+            uint8_t bit_size = get_bits(gb, size_bits) + 1;
+            switch (type) {
+            case UNIDRCLOUDEXT_EQ:
+                avpriv_report_missing_feature(ac->avctx, "loudnessInfoV1");
+                return AVERROR_PATCHWELCOME;
+            default:
+                for (int i = 0; i < bit_size; i++)
+                    skip_bits1(gb);
+            }
+        }
+    }
+
+    return 0;
+}
+
+static void decode_usac_sbr_data(AACUsacElemConfig *e, GetBitContext *gb)
+{
+    uint8_t header_extra1;
+    uint8_t header_extra2;
+
+    e->sbr.harmonic_sbr = get_bits1(gb); /* harmonicSBR */
+    e->sbr.bs_intertes = get_bits1(gb); /* bs_interTes */
+    e->sbr.bs_pvc = get_bits1(gb); /* bs_pvc */
+
+    e->sbr.dflt.start_freq = get_bits(gb, 4); /* dflt_start_freq */
+    e->sbr.dflt.stop_freq = get_bits(gb, 4); /* dflt_stop_freq */
+
+    header_extra1 = get_bits1(gb); /* dflt_header_extra1 */
+    header_extra2 = get_bits1(gb); /* dflt_header_extra2 */
+
+    e->sbr.dflt.freq_scale = 2;
+    e->sbr.dflt.alter_scale = 1;
+    e->sbr.dflt.noise_scale = 2;
+    if (header_extra1) {
+        e->sbr.dflt.freq_scale = get_bits(gb, 2); /* dflt_freq_scale */
+        e->sbr.dflt.alter_scale = get_bits1(gb); /* dflt_alter_scale */
+        e->sbr.dflt.noise_scale = get_bits(gb, 2); /* dflt_noise_scale */
+    }
+
+    e->sbr.dflt.limiter_bands = 2;
+    e->sbr.dflt.limiter_gains = 2;
+    e->sbr.dflt.interpol_freq = 1;
+    e->sbr.dflt.smoothing_mode = 1;
+    if (header_extra2) {
+        e->sbr.dflt.limiter_bands = get_bits(gb, 2); /* dflt_limiter_bands */
+        e->sbr.dflt.limiter_gains = get_bits(gb, 2); /* dflt_limiter_gains */
+        e->sbr.dflt.interpol_freq = get_bits1(gb); /* dflt_interpol_freq */
+        e->sbr.dflt.smoothing_mode = get_bits1(gb); /* dflt_smoothing_mode */
+    }
+}
+
+static void decode_usac_element_core(AACUsacElemConfig *e,
+                                     GetBitContext *gb,
+                                     int sbr_ratio)
+{
+    e->tw_mdct = get_bits1(gb); /* tw_mdct */
+    e->noise_fill = get_bits1(gb);
+    e->sbr.ratio = sbr_ratio;
+}
+
+static void decode_usac_element_pair(AACUsacElemConfig *e, GetBitContext *gb)
+{
+    e->stereo_config_index = 0;
+    if (e->sbr.ratio) {
+        decode_usac_sbr_data(e, gb);
+        e->stereo_config_index = get_bits(gb, 2);
+    }
+    if (e->stereo_config_index) {
+        e->mps.freq_res = get_bits(gb, 3); /* bsFreqRes */
+        e->mps.fixed_gain = get_bits(gb, 3); /* bsFixedGainDMX */
+        e->mps.temp_shape_config = get_bits(gb, 2); /* bsTempShapeConfig */
+        e->mps.decorr_config = get_bits(gb, 2); /* bsDecorrConfig */
+        e->mps.high_rate_mode = get_bits1(gb); /* bsHighRateMode */
+        e->mps.phase_coding = get_bits1(gb); /* bsPhaseCoding */
+
+        if (get_bits1(gb)) /* bsOttBandsPhasePresent */
+            e->mps.otts_bands_phase = get_bits(gb, 5); /* bsOttBandsPhase */
+
+        e->mps.residual_coding = e->stereo_config_index >= 2; /* bsResidualCoding */
+        if (e->mps.residual_coding) {
+            e->mps.residual_bands = get_bits(gb, 5); /* bsResidualBands */
+            e->mps.pseudo_lr = get_bits1(gb); /* bsPseudoLr */
+        }
+        if (e->mps.temp_shape_config == 2)
+            e->mps.env_quant_mode = get_bits1(gb); /* bsEnvQuantMode */
+    }
+}
+
+static int decode_usac_extension(AACDecContext *ac, AACUsacElemConfig *e,
+                                 GetBitContext *gb)
+{
+    int len = 0, ext_config_len;
+
+    e->ext.type = get_escaped_value(gb, 4, 8, 16); /* usacExtElementType */
+    ext_config_len = get_escaped_value(gb, 4, 8, 16); /* usacExtElementConfigLength */
+
+    if (get_bits1(gb)) /* usacExtElementDefaultLengthPresent */
+        len = get_escaped_value(gb, 8, 16, 0) + 1;
+
+    e->ext.default_len = len;
+    e->ext.payload_frag = get_bits1(gb); /* usacExtElementPayloadFrag */
+
+    av_log(ac->avctx, AV_LOG_DEBUG, "Extension present: type %i, len %i\n",
+           e->ext.type, ext_config_len);
+
+    switch (e->ext.type) {
+#if 0 /* Skip unsupported values */
+    case ID_EXT_ELE_MPEGS:
+        break;
+    case ID_EXT_ELE_SAOC:
+        break;
+    case ID_EXT_ELE_UNI_DRC:
+        break;
+#endif
+    case ID_EXT_ELE_FILL:
+        break; /* This is what the spec does */
+    case ID_EXT_ELE_AUDIOPREROLL:
+        /* No configuration needed - fallthrough (len should be 0) */
+    default:
+        skip_bits(gb, 8*ext_config_len);
+        break;
+    };
+
+    return 0;
+}
+
+int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc)
+{
+    AACUSACConfig *usac = &oc->usac;
+    int elem_id[3 /* SCE, CPE, LFE */] = { 0, 0, 0 };
+
+    ChannelElement *che;
+    enum RawDataBlockType type;
+    int id, ch;
+
+    /* Initialize state */
+    for (int i = 0; i < usac->nb_elems; i++) {
+        AACUsacElemConfig *e = &usac->elems[i];
+        if (e->type != ID_USAC_SCE && e->type != ID_USAC_CPE)
+            continue;
+
+        if (e->type == ID_USAC_SCE) {
+            ch = 1;
+            type = TYPE_SCE;
+            id = elem_id[0]++;
+        } else {
+            ch = 2;
+            type = TYPE_CPE;
+            id = elem_id[1]++;
+        }
+
+        che = ff_aac_get_che(ac, type, id);
+        if (che) {
+            AACUsacStereo *us = &che->us;
+            memset(us, 0, sizeof(*us));
+
+            for (int j = 0; j < ch; j++) {
+                SingleChannelElement *sce = &che->ch[ch];
+                AACUsacElemData *ue = &sce->ue;
+
+                memset(ue, 0, sizeof(*ue));
+
+                if (!ch)
+                    ue->noise.seed = 0x3039;
+                else
+                    che->ch[1].ue.noise.seed = 0x10932;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/* UsacConfig */
+int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx,
+                              GetBitContext *gb, OutputConfiguration *oc,
+                              int channel_config)
+{
+    int ret, idx;
+    uint8_t freq_idx;
+    uint8_t channel_config_idx;
+    int nb_elements;
+    int samplerate;
+    int sbr_ratio;
+    MPEG4AudioConfig *m4ac = &oc->m4ac;
+    AACUSACConfig *usac = &oc->usac;
+    int elem_id[3 /* SCE, CPE, LFE */];
+
+    uint8_t layout_map[MAX_ELEM_ID*4][3];
+
+    memset(usac, 0, sizeof(*usac));
+
+    freq_idx = get_bits(gb, 5); /* usacSamplingFrequencyIndex */
+    if (freq_idx == 0x1f) {
+        samplerate = get_bits(gb, 24); /* usacSamplingFrequency */
+
+        /* Try to match up an index for the custom sample rate.
+         * TODO: not sure if correct */
+        for (idx = 0; idx < /* FF_ARRAY_ELEMS(ff_aac_usac_samplerate) */ 32; idx++) {
+            if (ff_aac_usac_samplerate[idx] >= samplerate)
+                break;
+        }
+        idx = FFMIN(idx, /* FF_ARRAY_ELEMS(ff_aac_usac_samplerate) */ 32 - 1);
+        usac->rate_idx = idx;
+    } else {
+        samplerate = ff_aac_usac_samplerate[freq_idx];
+        if (samplerate < 0)
+            return AVERROR(EINVAL);
+        usac->rate_idx = freq_idx;
+    }
+
+    m4ac->sample_rate = avctx->sample_rate = samplerate;
+
+    usac->core_sbr_frame_len_idx = get_bits(gb, 3); /* coreSbrFrameLengthIndex */
+    m4ac->frame_length_short = usac->core_sbr_frame_len_idx == 0 ||
+                               usac->core_sbr_frame_len_idx == 2;
+
+    usac->core_frame_len = (usac->core_sbr_frame_len_idx == 0 ||
+                            usac->core_sbr_frame_len_idx == 2) ? 768 : 1024;
+
+    sbr_ratio = usac->core_sbr_frame_len_idx == 2 ? 2 :
+                usac->core_sbr_frame_len_idx == 3 ? 3 :
+                usac->core_sbr_frame_len_idx == 4 ? 1 :
+                0;
+
+    channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex */
+    if (!channel_config_idx) {
+        /* UsacChannelConfig() */
+        uint8_t nb_channels = get_escaped_value(gb, 5, 8, 16); /* numOutChannels */
+        if (nb_channels >= 64)
+            return AVERROR(EINVAL);
+
+        av_channel_layout_uninit(&ac->oc[1].ch_layout);
+
+        ret = av_channel_layout_custom_init(&ac->oc[1].ch_layout, nb_channels);
+        if (ret < 0)
+            return ret;
+
+        for (int i = 0; i < nb_channels; i++) {
+            AVChannelCustom *cm = &ac->oc[1].ch_layout.u.map[i];
+            cm->id = usac_ch_pos_to_av[get_bits(gb, 5)]; /* bsOutputChannelPos */
+            if (cm->id == AV_CHAN_NONE)
+                cm->id = AV_CHAN_UNKNOWN;
+        }
+
+        ret = av_channel_layout_retype(&ac->oc[1].ch_layout,
+                                       AV_CHANNEL_ORDER_NATIVE,
+                                       AV_CHANNEL_LAYOUT_RETYPE_FLAG_CANONICAL);
+        if (ret < 0)
+            return ret;
+
+        ret = av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
+        if (ret < 0)
+            return ret;
+    } else {
+        if ((ret = ff_aac_set_default_channel_config(ac, avctx, layout_map,
+                                                     &nb_elements, channel_config_idx)))
+            return ret;
+    }
+
+    /* UsacDecoderConfig */
+    elem_id[0] = elem_id[1] = elem_id[2] = 0;
+    usac->nb_elems = get_escaped_value(gb, 4, 8, 16) + 1;
+
+    for (int i = 0; i < usac->nb_elems; i++) {
+        AACUsacElemConfig *e = &usac->elems[i];
+        memset(e, 0, sizeof(*e));
+
+        e->type = get_bits(gb, 2); /* usacElementType */
+        av_log(ac->avctx, AV_LOG_DEBUG, "Element present: idx %i, type %i\n",
+               i, e->type);
+
+        switch (e->type) {
+        case ID_USAC_SCE: /* SCE */
+            /* UsacCoreConfig */
+            decode_usac_element_core(e, gb, sbr_ratio);
+            if (e->sbr.ratio > 0)
+                decode_usac_sbr_data(e, gb);
+            layout_map[i][0] = TYPE_SCE;
+            layout_map[i][1] = i;
+            layout_map[i][2] = AAC_CHANNEL_FRONT;
+            elem_id[0]++;
+
+            break;
+        case ID_USAC_CPE: /* UsacChannelPairElementConf */
+            /* UsacCoreConfig */
+            decode_usac_element_core(e, gb, sbr_ratio);
+            decode_usac_element_pair(e, gb);
+            layout_map[i][0] = TYPE_CPE;
+            layout_map[i][1] = i;
+            layout_map[i][2] = AAC_CHANNEL_FRONT;
+            elem_id[1]++;
+
+            break;
+        case ID_USAC_LFE: /* LFE */
+            /* LFE has no need for any configuration */
+            e->tw_mdct = 0;
+            e->noise_fill = 0;
+            elem_id[2]++;
+            break;
+        case ID_USAC_EXT: /* EXT */
+            ret = decode_usac_extension(ac, e, gb);
+            if (ret < 0)
+                return ret;
+            break;
+        };
+    }
+
+    ret = ff_aac_output_configure(ac, layout_map, elem_id[0] + elem_id[1] + elem_id[2], OC_GLOBAL_HDR, 0);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to parse channel config!\n");
+        return ret;
+    }
+
+    if (get_bits1(gb)) { /* usacConfigExtensionPresent */
+        int invalid;
+        int nb_extensions = get_escaped_value(gb, 2, 4, 8) + 1; /* numConfigExtensions */
+        for (int i = 0; i < nb_extensions; i++) {
+            int type = get_escaped_value(gb, 4, 8, 16);
+            int len = get_escaped_value(gb, 4, 8, 16);
+            switch (type) {
+            case ID_CONFIG_EXT_LOUDNESS_INFO:
+                ret = decode_loudness_set(ac, usac, gb);
+                if (ret < 0)
+                    return ret;
+                break;
+            case ID_CONFIG_EXT_STREAM_ID:
+                usac->stream_identifier = get_bits(gb, 16);
+                break;
+            case ID_CONFIG_EXT_FILL: /* fallthrough */
+                invalid = 0;
+                while (len--) {
+                    if (get_bits(gb, 8) != 0xA5)
+                        invalid++;
+                }
+                if (invalid)
+                    av_log(avctx, AV_LOG_WARNING, "Invalid fill bytes: %i\n",
+                           invalid);
+                break;
+            default:
+                while (len--)
+                    skip_bits(gb, 8);
+                break;
+            }
+        }
+    }
+
+    ret = ff_aac_usac_reset_state(ac, oc);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int decode_usac_scale_factors(AACDecContext *ac,
+                                     SingleChannelElement *sce,
+                                     GetBitContext *gb, uint8_t global_gain)
+{
+    IndividualChannelStream *ics = &sce->ics;
+
+    /* Decode all scalefactors. */
+    int offset_sf = global_gain;
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
+            /* First coefficient is just the global gain */
+            if (!g && !sfb) {
+                /* The cannonical representation of quantized scalefactors
+                 * in the spec is with 100 subtracted. */
+                sce->sfo[0] = offset_sf - 100;
+                continue;
+            }
+
+            offset_sf += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - SCALE_DIFF_ZERO;
+            if (offset_sf > 255U) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "Scalefactor (%d) out of range.\n", offset_sf);
+                return AVERROR_INVALIDDATA;
+            }
+
+            sce->sfo[g*ics->max_sfb + sfb] = offset_sf - 100;
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Decode and dequantize arithmetically coded, uniformly quantized value
+ *
+ * @param   coef            array of dequantized, scaled spectral data
+ * @param   sf              array of scalefactors or intensity stereo positions
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int decode_spectrum_and_dequant_ac(AACDecContext *s, float coef[1024],
+                                          GetBitContext *gb, const float sf[120],
+                                          AACArithState *state, int reset,
+                                          uint16_t len, uint16_t N)
+{
+    AACArith ac;
+    int i, a, b;
+    uint32_t c;
+
+    int gb_count;
+    GetBitContext gb2;
+
+    ff_aac_ac_init(&ac, gb);
+    c = ff_aac_ac_map_process(state, reset, N);
+
+    /* Backup reader for rolling back by 14 bits at the end */
+    gb2 = (GetBitContext)*gb;
+    gb_count = get_bits_count(&gb2);
+
+    for (i = 0; i < len/2; i++) {
+        /* MSB */
+        int lvl, esc_nb, m;
+        c = ff_aac_ac_get_context(state, c, i, N);
+        for (lvl=esc_nb=0;;) {
+            uint32_t pki = ff_aac_ac_get_pk(c + (esc_nb << 17));
+            m = ff_aac_ac_decode(&ac, &gb2, ff_aac_ac_msb_cdfs[pki],
+                                 FF_ARRAY_ELEMS(ff_aac_ac_msb_cdfs[pki]));
+            if (m < FF_AAC_AC_ESCAPE)
+                break;
+            lvl++;
+
+            /* Cargo-culted value. */
+            if (lvl > 23)
+                return AVERROR(EINVAL);
+
+            if ((esc_nb = lvl) > 7)
+                esc_nb = 7;
+        }
+
+        b = m >> 2;
+        a = m - (b << 2);
+
+        /* ARITH_STOP detection */
+        if (!m) {
+            if (esc_nb)
+                break;
+            a = b = 0;
+        }
+
+        /* LSB */
+        for (int l = lvl; l > 0; l--) {
+            int lsbidx = !a ? 1 : (!b ? 0 : 2);
+            uint8_t r = ff_aac_ac_decode(&ac, &gb2, ff_aac_ac_lsb_cdfs[lsbidx],
+                                         FF_ARRAY_ELEMS(ff_aac_ac_lsb_cdfs[lsbidx]));
+            a = (a << 1) | (r & 1);
+            b = (b << 1) | ((r >> 1) & 1);
+        }
+
+        /* Dequantize coeffs here */
+        coef[2*i + 0] = a * cbrt(a);
+        coef[2*i + 1] = b * cbrt(b);
+        ff_aac_ac_update_context(state, i, a, b);
+    }
+
+    if (len > 1) {
+        /* "Rewind" bitstream back by 14 bits */
+        int gb_count2 = get_bits_count(&gb2);
+        skip_bits(gb, gb_count2 - gb_count - 14);
+    } else {
+        *gb = gb2;
+    }
+
+    ff_aac_ac_finish(state, i, N);
+
+    for (; i < N/2; i++) {
+        coef[2*i + 0] = 0;
+        coef[2*i + 1] = 0;
+    }
+
+    /* Signs */
+    for (i = 0; i < len; i++) {
+        if (coef[i]) {
+            if (!get_bits1(gb)) /* s */
+                coef[i] *= -1;
+        }
+    }
+
+    return 0;
+}
+
+static int decode_usac_stereo_cplx(AACDecContext *ac, AACUsacStereo *us,
+                                   ChannelElement *cpe, GetBitContext *gb,
+                                   int num_window_groups, int indep_flag)
+{
+    int delta_code_time;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+
+    if (!get_bits1(gb)) { /* cplx_pred_all */
+        for (int g = 0; g < num_window_groups; g++) {
+            for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb += SFB_PER_PRED_BAND) {
+                const uint8_t val = get_bits1(gb);
+                us->pred_used[g*cpe->max_sfb_ste + sfb] = val;
+                if ((sfb + 1) < cpe->max_sfb_ste)
+                    us->pred_used[g*cpe->max_sfb_ste + sfb + 1] = val;
+            }
+        }
+    } else {
+        for (int g = 0; g < num_window_groups; g++)
+            for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++)
+                us->pred_used[g*cpe->max_sfb_ste + sfb] = 1;
+    }
+
+    us->pred_dir = get_bits1(gb);
+    us->complex_coef = get_bits1(gb);
+
+    us->use_prev_frame = 0;
+    if (us->complex_coef && !indep_flag)
+        us->use_prev_frame = get_bits1(gb);
+
+    delta_code_time = 0;
+    if (!indep_flag)
+        delta_code_time = get_bits1(gb);
+
+    /* TODO: shouldn't be needed */
+    for (int g = 0; g < num_window_groups; g++) {
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb += SFB_PER_PRED_BAND) {
+            float last_alpha_q_re = 0;
+            float last_alpha_q_im = 0;
+            if (delta_code_time) {
+                if (g) {
+                    last_alpha_q_re = us->prev_alpha_q_re[(g - 1)*cpe->max_sfb_ste + sfb];
+                    last_alpha_q_im = us->prev_alpha_q_im[(g - 1)*cpe->max_sfb_ste + sfb];
+                } else if ((ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) &&
+                           ics->window_sequence[1] == EIGHT_SHORT_SEQUENCE ||
+                           ics->window_sequence[1] == EIGHT_SHORT_SEQUENCE) {
+                    /* The spec doesn't explicitly mention this, but it doesn't make
+                     * any other sense otherwise! */
+                    last_alpha_q_re = us->prev_alpha_q_re[7*cpe->max_sfb_ste + sfb];
+                    last_alpha_q_im = us->prev_alpha_q_im[7*cpe->max_sfb_ste + sfb];
+                } else {
+                    last_alpha_q_re = us->prev_alpha_q_re[g*cpe->max_sfb_ste + sfb];
+                    last_alpha_q_im = us->prev_alpha_q_im[g*cpe->max_sfb_ste + sfb];
+                }
+            } else {
+                if (sfb) {
+                    last_alpha_q_re = us->alpha_q_re[g*cpe->max_sfb_ste + sfb - 1];
+                    last_alpha_q_im = us->alpha_q_im[g*cpe->max_sfb_ste + sfb - 1];
+                }
+            }
+
+            if (us->pred_used[g*cpe->max_sfb_ste + sfb]) {
+                int val = -get_vlc2(gb, ff_vlc_scalefactors, 7, 3) + 60;
+                last_alpha_q_re += val * 0.1f;
+                if (us->complex_coef) {
+                    val = -get_vlc2(gb, ff_vlc_scalefactors, 7, 3) + 60;
+                    last_alpha_q_im += val * 0.1f;
+                }
+                us->alpha_q_re[g*cpe->max_sfb_ste + sfb] = last_alpha_q_re;
+                us->alpha_q_im[g*cpe->max_sfb_ste + sfb] = last_alpha_q_im;
+            } else {
+                us->alpha_q_re[g*cpe->max_sfb_ste + sfb] = 0;
+                us->alpha_q_im[g*cpe->max_sfb_ste + sfb] = 0;
+            }
+
+            if ((sfb + 1) < cpe->max_sfb_ste) {
+                us->alpha_q_re[g*cpe->max_sfb_ste + sfb + 1] =
+                    us->alpha_q_re[g*cpe->max_sfb_ste + sfb];
+                us->alpha_q_im[g*cpe->max_sfb_ste + sfb + 1] =
+                    us->alpha_q_im[g*cpe->max_sfb_ste + sfb];
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int setup_sce(AACDecContext *ac, SingleChannelElement *sce,
+                     AACUSACConfig *usac)
+{
+    AACUsacElemData *ue = &sce->ue;
+    IndividualChannelStream *ics = &sce->ics;
+
+    /* Setup window parameters */
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        if (usac->core_frame_len == 768) {
+            ics->swb_offset = ff_swb_offset_96[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_96[usac->rate_idx];
+        } else {
+            ics->swb_offset = ff_swb_offset_128[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_128[usac->rate_idx];
+        }
+        ics->tns_max_bands = ff_tns_max_bands_128[usac->rate_idx];
+
+        /* Setup scalefactor grouping. 7 bit mask. */
+        ics->num_window_groups = 0;
+        for (int j = 0; j < 7; j++) {
+            ics->group_len[j] = 1;
+            if (ue->scale_factor_grouping & (1 << (6 - j)))
+                ics->group_len[ics->num_window_groups] += 1;
+            else
+                ics->num_window_groups++;
+        }
+
+        ics->group_len[7] = 1;
+        ics->num_window_groups++;
+        ics->num_windows = 8;
+    } else {
+        if (usac->core_frame_len == 768) {
+            ics->swb_offset = ff_swb_offset_768[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_768[usac->rate_idx];
+        } else {
+            ics->swb_offset = ff_swb_offset_1024[usac->rate_idx];
+            ics->num_swb = ff_aac_num_swb_1024[usac->rate_idx];
+        }
+        ics->tns_max_bands = ff_tns_max_bands_1024[usac->rate_idx];
+
+        ics->group_len[0] = 1;
+        ics->num_window_groups = 1;
+        ics->num_windows  = 1;
+    }
+
+    if (ics->max_sfb > ics->num_swb) {
+        av_log(ac->avctx, AV_LOG_ERROR,
+               "Number of scalefactor bands in group (%d) "
+               "exceeds limit (%d).\n",
+               ics->max_sfb, ics->num_swb);
+        return AVERROR(EINVAL);
+    }
+
+    /* Just some defaults for the band types */
+    for (int i = 0; i < FF_ARRAY_ELEMS(sce->band_type); i++)
+        sce->band_type[i] = ESC_BT;
+
+    return 0;
+}
+
+static int decode_usac_stereo_info(AACDecContext *ac, AACUSACConfig *usac,
+                                   AACUsacElemConfig *ec, ChannelElement *cpe,
+                                   GetBitContext *gb, int indep_flag)
+{
+    int ret, tns_active;
+
+    AACUsacStereo *us = &cpe->us;
+    SingleChannelElement *sce1 = &cpe->ch[0];
+    SingleChannelElement *sce2 = &cpe->ch[1];
+    IndividualChannelStream *ics1 = &sce1->ics;
+    IndividualChannelStream *ics2 = &sce2->ics;
+    AACUsacElemData *ue1 = &sce1->ue;
+    AACUsacElemData *ue2 = &sce2->ue;
+
+    us->common_window = 0;
+    us->common_tw = 0;
+
+    if (!(!ue1->core_mode && !ue2->core_mode))
+        return 0;
+
+    tns_active = get_bits1(gb);
+    us->common_window = get_bits1(gb);
+
+    if (us->common_window) {
+        /* ics_info() */
+        ics1->window_sequence[1] = ics1->window_sequence[0];
+        ics2->window_sequence[1] = ics2->window_sequence[0];
+        ics1->window_sequence[0] = ics2->window_sequence[0] = get_bits(gb, 2);
+
+        ics1->use_kb_window[1] = ics1->use_kb_window[0];
+        ics2->use_kb_window[1] = ics2->use_kb_window[0];
+        ics1->use_kb_window[0] = ics2->use_kb_window[0] = get_bits1(gb);
+
+        if (ics1->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+            ics1->max_sfb = ics2->max_sfb = get_bits(gb, 4);
+            ue1->scale_factor_grouping = ue2->scale_factor_grouping = get_bits(gb, 7);
+        } else {
+            ics1->max_sfb = ics2->max_sfb = get_bits(gb, 6);
+        }
+
+        if (!get_bits1(gb)) { /* common_max_sfb */
+            if (ics2->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+                ics2->max_sfb = get_bits(gb, 4);
+            else
+                ics2->max_sfb = get_bits(gb, 6);
+        }
+
+        ret = setup_sce(ac, sce1, usac);
+        if (ret < 0)
+            return ret;
+
+        ret = setup_sce(ac, sce2, usac);
+        if (ret < 0)
+            return ret;
+
+        cpe->max_sfb_ste = FFMAX(ics1->max_sfb, ics2->max_sfb);
+
+        us->ms_mask_mode = get_bits(gb, 2); /* ms_mask_present */
+        memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
+        if (us->ms_mask_mode == 1) {
+            for (int g = 0; g < ics1->num_window_groups; g++)
+                for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++)
+                    cpe->ms_mask[g*cpe->max_sfb_ste + sfb] = get_bits1(gb);
+        } else if (us->ms_mask_mode == 2) {
+            memset(cpe->ms_mask, 0xFF, sizeof(cpe->ms_mask));
+        } else if ((us->ms_mask_mode == 3) && !ec->stereo_config_index) {
+            ret = decode_usac_stereo_cplx(ac, us, cpe, gb,
+                                          ics1->num_window_groups, indep_flag);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    if (ec->tw_mdct) {
+        us->common_tw = get_bits1(gb);
+        avpriv_report_missing_feature(ac->avctx,
+                                      "AAC USAC timewarping");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    sce1->tns.present = sce2->tns.present = 0;
+    if (tns_active) {
+        av_unused int tns_on_lr;
+        int common_tns = 0;
+        if (us->common_window)
+            common_tns = get_bits1(gb);
+
+        tns_on_lr = get_bits1(gb);
+        if (common_tns) {
+            ret = ff_aac_decode_tns(ac, &sce1->tns, gb, ics1);
+            if (ret < 0)
+                return ret;
+            memcpy(&sce2->tns, &sce1->tns, sizeof(sce1->tns));
+            sce2->tns.present = 0;
+            sce1->tns.present = 0;
+        } else {
+            if (get_bits1(gb)) {
+                sce2->tns.present = 1;
+                sce1->tns.present = 1;
+            } else {
+                sce2->tns.present = get_bits1(gb);
+                sce1->tns.present = !sce2->tns.present;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/* 7.2.4 Generation of random signs for spectral noise filling
+ * This function is exactly defined, though we've helped the definition
+ * along with being slightly faster. */
+static inline float noise_random_sign(unsigned int *seed)
+{
+    unsigned int new_seed = *seed = ((*seed) * 69069) + 5;
+    if (((new_seed) & 0x10000) > 0)
+        return -1.f;
+    return +1.f;
+}
+
+static void apply_noise_fill(AACDecContext *ac, SingleChannelElement *sce,
+                             AACUsacElemData *ue)
+{
+    float *coef;
+    IndividualChannelStream *ics = &sce->ics;
+
+    float noise_val = pow(2, (ue->noise.level - 14)/3);
+    int noise_offset = ue->noise.offset - 16;
+    int band_off;
+
+    band_off = ff_usac_noise_fill_start_offset[ac->oc[1].m4ac.frame_length_short]
+                                              [ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE];
+
+    coef = sce->coeffs;
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
+            float *cb = coef + ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - ics->swb_offset[sfb];
+            int band_quantized_to_zero = 1;
+
+            if (ics->swb_offset[sfb] < band_off)
+                continue;
+
+            for (int group = 0; group < (unsigned)g_len; group++, cb += 128) {
+                for (int z = 0; z < cb_len; z++) {
+                    if (cb[z] == 0)
+                        cb[z] = noise_random_sign(&sce->ue.noise.seed) * noise_val;
+                    else
+                        band_quantized_to_zero = 0;
+                }
+            }
+
+            if (band_quantized_to_zero)
+                sce->sf[g*ics->max_sfb + sfb] += noise_offset;
+        }
+        coef += g_len << 7;
+    }
+}
+
+static void spectrum_scale(AACDecContext *ac, SingleChannelElement *sce,
+                           AACUsacElemData *ue)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    float *coef;
+
+    /* Synthesise noise */
+    if (ue->noise.level)
+        apply_noise_fill(ac, sce, ue);
+
+    /* Apply scalefactors */
+    coef = sce->coeffs;
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+
+        for (int sfb = 0; sfb < ics->max_sfb; sfb++) {
+            float *cb = coef + ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - ics->swb_offset[sfb];
+            float sf = sce->sf[g*ics->max_sfb + sfb];
+
+            for (int group = 0; group < (unsigned)g_len; group++, cb += 128)
+                ac->fdsp->vector_fmul_scalar(cb, cb, sf, cb_len);
+        }
+        coef += g_len << 7;
+    }
+}
+
+static void complex_stereo_downmix_prev(AACDecContext *ac, ChannelElement *cpe,
+                                        float *dmix_re)
+{
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    int sign = !cpe->us.pred_dir ? +1 : -1;
+    float *coef1 = cpe->ch[0].coeffs;
+    float *coef2 = cpe->ch[1].coeffs;
+
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            int off = ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - off;
+
+            float *c1 = coef1 + off;
+            float *c2 = coef2 + off;
+            float *dm = dmix_re + off;
+
+            for (int group = 0; group < (unsigned)g_len;
+                 group++, c1 += 128, c2 += 128, dm += 128) {
+                for (int z = 0; z < cb_len; z++)
+                    dm[z] = 0.5*(c1[z] + sign*c2[z]);
+            }
+        }
+
+        coef1 += g_len << 7;
+        coef2 += g_len << 7;
+        dmix_re += g_len << 7;
+    }
+}
+
+static void complex_stereo_downmix_cur(AACDecContext *ac, ChannelElement *cpe,
+                                       float *dmix_re)
+{
+    AACUsacStereo *us = &cpe->us;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    int sign = !cpe->us.pred_dir ? +1 : -1;
+    float *coef1 = cpe->ch[0].coeffs;
+    float *coef2 = cpe->ch[1].coeffs;
+
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            int off = ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - off;
+
+            float *c1 = coef1 + off;
+            float *c2 = coef2 + off;
+            float *dm = dmix_re + off;
+
+            if (us->pred_used[g*cpe->max_sfb_ste + sfb]) {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm += 128) {
+                    for (int z = 0; z < cb_len; z++)
+                        dm[z] = 0.5*(c1[z] + sign*c2[z]);
+                }
+            } else {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm += 128) {
+                    for (int z = 0; z < cb_len; z++)
+                        dm[z] = c1[z];
+                }
+            }
+        }
+
+        coef1 += g_len << 7;
+        coef2 += g_len << 7;
+        dmix_re += g_len << 7;
+    }
+}
+
+static void complex_stereo_interpolate_imag(float *im, float *re, const float f[6],
+                                            int len, int factor_even, int factor_odd)
+{
+    int i = 0;
+    float s;
+
+    s = f[6]*re[2] + f[5]*re[1] + f[4]*re[0] +
+        f[3]*re[0] +
+        f[2]*re[1] + f[1]*re[2] + f[0]*re[3];
+    im[i] += s*factor_even;
+
+    i = 1;
+    s = f[6]*re[1] + f[5]*re[0] + f[4]*re[0] +
+        f[3]*re[1] +
+        f[2]*re[2] + f[1]*re[3] + f[0]*re[4];
+    im[i] += s*factor_odd;
+
+    i = 2;
+    s = f[6]*re[0] + f[5]*re[0] + f[4]*re[1] +
+        f[3]*re[2] +
+        f[2]*re[3] + f[1]*re[4] + f[0]*re[5];
+
+    im[i] += s*factor_even;
+    for (i = 3; i < len - 4; i += 2) {
+        s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+            f[3]*re[i] +
+            f[2]*re[i+1] + f[1]*re[i+2] + f[0]*re[i+3];
+        im[i+0] += s*factor_odd;
+
+        s = f[6]*re[i-2] + f[5]*re[i-1] + f[4]*re[i] +
+            f[3]*re[i+1] +
+            f[2]*re[i+2] + f[1]*re[i+3] + f[0]*re[i+4];
+        im[i+1] += s*factor_even;
+    }
+
+    i = len - 3;
+    s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+        f[3]*re[i] +
+        f[2]*re[i+1] + f[1]*re[i+2] + f[0]*re[i+2];
+    im[i] += s*factor_odd;
+
+    i = len - 2;
+    s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+        f[3]*re[i] +
+        f[2]*re[i+1] + f[1]*re[i+1] + f[0]*re[i];
+    im[i] += s*factor_even;
+
+    i = len - 1;
+    s = f[6]*re[i-3] + f[5]*re[i-2] + f[4]*re[i-1] +
+        f[3]*re[i] +
+        f[2]*re[i] + f[1]*re[i-1] + f[0]*re[i-2];
+    im[i] += s*factor_odd;
+}
+
+static void apply_complex_stereo(AACDecContext *ac, ChannelElement *cpe)
+{
+    AACUsacStereo *us = &cpe->us;
+    IndividualChannelStream *ics = &cpe->ch[0].ics;
+    float *coef1 = cpe->ch[0].coeffs;
+    float *coef2 = cpe->ch[1].coeffs;
+    float *dmix_im = us->dmix_im;
+
+    for (int g = 0; g < ics->num_window_groups; g++) {
+        unsigned g_len = ics->group_len[g];
+        for (int sfb = 0; sfb < cpe->max_sfb_ste; sfb++) {
+            int off = ics->swb_offset[sfb];
+            int cb_len = ics->swb_offset[sfb + 1] - off;
+
+            float *c1 = coef1 + off;
+            float *c2 = coef2 + off;
+            float *dm_im = dmix_im + off;
+            float alpha_re = us->alpha_q_re[g*cpe->max_sfb_ste + sfb];
+            float alpha_im = us->alpha_q_im[g*cpe->max_sfb_ste + sfb];
+
+            if (!us->pred_used[g*cpe->max_sfb_ste + sfb])
+                continue;
+
+            if (!cpe->us.pred_dir) {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm_im += 128) {
+                    for (int z = 0; z < cb_len; z++) {
+                        float side;
+                        side = c2[z] - alpha_re*c1[z] - alpha_im*dm_im[z];
+                        c2[z] = c1[z] - side;
+                        c1[z] = c1[z] + side;
+                    }
+                }
+            } else {
+                for (int group = 0; group < (unsigned)g_len;
+                     group++, c1 += 128, c2 += 128, dm_im += 128) {
+                    for (int z = 0; z < cb_len; z++) {
+                        float mid;
+                        mid = c2[z] - alpha_re*c1[z] - alpha_im*dm_im[z];
+                        c2[z] = mid - c1[z];
+                        c1[z] = mid + c1[z];
+                    }
+                }
+            }
+        }
+
+        coef1 += g_len << 7;
+        coef2 += g_len << 7;
+        dmix_im += g_len << 7;
+    }
+}
+
+static const float *complex_stereo_get_filter(ChannelElement *cpe, int is_prev)
+{
+    int win, shape;
+    if (!is_prev) {
+        switch (cpe->ch[0].ics.window_sequence[0]) {
+        default:
+        case ONLY_LONG_SEQUENCE:
+        case EIGHT_SHORT_SEQUENCE:
+            win = 0;
+            break;
+        case LONG_START_SEQUENCE:
+            win = 1;
+            break;
+        case LONG_STOP_SEQUENCE:
+            win = 2;
+            break;
+        }
+
+        if (cpe->ch[0].ics.use_kb_window[0] == 0 &&
+            cpe->ch[0].ics.use_kb_window[1] == 0)
+            shape = 0;
+        else if (cpe->ch[0].ics.use_kb_window[0] == 1 &&
+                 cpe->ch[0].ics.use_kb_window[1] == 1)
+            shape = 1;
+        else if (cpe->ch[0].ics.use_kb_window[0] == 0 &&
+                 cpe->ch[0].ics.use_kb_window[1] == 1)
+            shape = 2;
+        else if (cpe->ch[0].ics.use_kb_window[0] == 1 &&
+                 cpe->ch[0].ics.use_kb_window[1] == 0)
+            shape = 3;
+        else
+            shape = 3;
+    } else {
+        win = cpe->ch[0].ics.window_sequence[0] == LONG_STOP_SEQUENCE;
+        shape = cpe->ch[0].ics.use_kb_window[1];
+    }
+
+    return ff_aac_usac_mdst_filt_cur[win][shape];
+}
+
+static void spectrum_decode(AACDecContext *ac, AACUSACConfig *usac,
+                            ChannelElement *cpe, int nb_channels)
+{
+    AACUsacStereo *us = &cpe->us;
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &cpe->ch[ch];
+        AACUsacElemData *ue = &sce->ue;
+
+        spectrum_scale(ac, sce, ue);
+    }
+
+    if (nb_channels > 1 && us->common_window) {
+        if (us->ms_mask_mode == 3) {
+            const float *filt;
+            complex_stereo_downmix_cur(ac, cpe, us->dmix_re);
+            complex_stereo_downmix_prev(ac, cpe, us->prev_dmix_re);
+
+            filt = complex_stereo_get_filter(cpe, 0);
+            complex_stereo_interpolate_imag(us->dmix_im, us->dmix_re, filt,
+                                            usac->core_frame_len, 1, 1);
+            if (us->use_prev_frame) {
+                filt = complex_stereo_get_filter(cpe, 1);
+                complex_stereo_interpolate_imag(us->dmix_im, us->prev_dmix_re, filt,
+                                                usac->core_frame_len, -1, 1);
+            }
+
+            apply_complex_stereo(ac, cpe);
+        } else if (us->ms_mask_mode > 0) {
+            ac->dsp.apply_mid_side_stereo(ac, cpe);
+        }
+    }
+
+    /* Save coefficients and alpha values for prediction reasons */
+    if (nb_channels > 1) {
+        AACUsacStereo *us = &cpe->us;
+        for (int ch = 0; ch < nb_channels; ch++) {
+            SingleChannelElement *sce = &cpe->ch[ch];
+            memcpy(sce->prev_coeffs, sce->coeffs, sizeof(sce->coeffs));
+        }
+        memcpy(us->prev_alpha_q_re, us->alpha_q_re, sizeof(us->alpha_q_re));
+        memcpy(us->prev_alpha_q_im, us->alpha_q_im, sizeof(us->alpha_q_im));
+    }
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &cpe->ch[ch];
+
+        /* Apply TNS */
+        if (sce->tns.present)
+            ac->dsp.apply_tns(sce->coeffs, &sce->tns, &sce->ics, 1);
+
+        ac->oc[1].m4ac.frame_length_short ? ac->dsp.imdct_and_windowing_768(ac, sce) :
+                                            ac->dsp.imdct_and_windowing(ac, sce);
+    }
+}
+
+static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac,
+                                  AACUsacElemConfig *ec, ChannelElement *che,
+                                  GetBitContext *gb, int indep_flag, int nb_channels)
+{
+    int ret;
+    int arith_reset_flag;
+    AACUsacStereo *us = &che->us;
+
+    /* Local symbols */
+    uint8_t global_gain;
+
+    us->common_window = 0;
+    che->ch[0].tns.present = che->ch[1].tns.present = 0;
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &che->ch[ch];
+        AACUsacElemData *ue = &sce->ue;
+
+        ue->core_mode = get_bits1(gb);
+    }
+
+    if (nb_channels == 2) {
+        ret = decode_usac_stereo_info(ac, usac, ec, che, gb, indep_flag);
+        if (ret)
+            return ret;
+    }
+
+    for (int ch = 0; ch < nb_channels; ch++) {
+        SingleChannelElement *sce = &che->ch[ch];
+        IndividualChannelStream *ics = &sce->ics;
+        AACUsacElemData *ue = &sce->ue;
+
+        if (ue->core_mode) { /* lpd_channel_stream */
+            ret = ff_aac_ldp_parse_channel_stream(ac, usac, ue, gb);
+            if (ret < 0)
+                return ret;
+        }
+
+        if ((nb_channels == 1) ||
+            (che->ch[0].ue.core_mode != che->ch[1].ue.core_mode))
+            sce->tns.present = get_bits1(gb);
+
+        /* fd_channel_stream */
+        global_gain = get_bits(gb, 8);
+
+        ue->noise.level = 0;
+        if (ec->noise_fill) {
+            ue->noise.level = get_bits(gb, 3);
+            ue->noise.offset = get_bits(gb, 5);
+        }
+
+        if (!us->common_window) {
+            /* ics_info() */
+            ics->window_sequence[1] = ics->window_sequence[0];
+            ics->window_sequence[0] = get_bits(gb, 2);
+            ics->use_kb_window[1] = ics->use_kb_window[0];
+            ics->use_kb_window[0] = get_bits1(gb);
+            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+                ics->max_sfb = get_bits(gb, 4);
+                ue->scale_factor_grouping = get_bits(gb, 7);
+            } else {
+                ics->max_sfb = get_bits(gb, 6);
+            }
+
+            ret = setup_sce(ac, sce, usac);
+            if (ret < 0)
+                return ret;
+        }
+
+        if (ec->tw_mdct && !us->common_tw) {
+            /* tw_data() */
+            if (get_bits1(gb)) { /* tw_data_present */
+                /* Time warping is not supported in baseline profile streams. */
+                avpriv_report_missing_feature(ac->avctx,
+                                              "AAC USAC timewarping");
+                return AVERROR_PATCHWELCOME;
+            }
+        }
+
+        ret = decode_usac_scale_factors(ac, sce, gb, global_gain);
+        if (ret < 0)
+            return ret;
+
+        ac->dsp.dequant_scalefactors(sce);
+
+        if (sce->tns.present) {
+            ret = ff_aac_decode_tns(ac, &sce->tns, gb, ics);
+            if (ret < 0)
+                return ret;
+        }
+
+        /* ac_spectral_data */
+        arith_reset_flag = indep_flag;
+        if (!arith_reset_flag)
+            arith_reset_flag = get_bits1(gb);
+
+        /* Decode coeffs */
+        memset(&sce->coeffs[0], 0, 1024*sizeof(float));
+        for (int win = 0; win < ics->num_windows; win++) {
+            int lg = ics->swb_offset[ics->max_sfb];
+            int N;
+            if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE)
+                N = usac->core_frame_len / 8;
+            else
+                N = usac->core_frame_len;
+
+            ret = decode_spectrum_and_dequant_ac(ac, sce->coeffs + win*128, gb,
+                                                 sce->sf, &ue->ac,
+                                                 arith_reset_flag && (win == 0),
+                                                 lg, N);
+            if (ret < 0)
+                return ret;
+        }
+
+        if (get_bits1(gb)) { /* fac_data_present */
+            const uint16_t len_8 = usac->core_frame_len / 8;
+            const uint16_t len_16 = usac->core_frame_len / 16;
+            const uint16_t fac_len = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? len_8 : len_16;
+            ret = ff_aac_parse_fac_data(ue, gb, 1, fac_len);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    spectrum_decode(ac, usac, che, nb_channels);
+
+    return 0;
+}
+
+static int parse_audio_preroll(AACDecContext *ac, GetBitContext *gb)
+{
+    int ret = 0;
+    GetBitContext gbc;
+    OutputConfiguration *oc = &ac->oc[1];
+    MPEG4AudioConfig *m4ac = &oc->m4ac;
+    MPEG4AudioConfig m4ac_bak = oc->m4ac;
+    uint8_t temp_data[512];
+    uint8_t *tmp_buf = temp_data;
+    size_t tmp_buf_size = sizeof(temp_data);
+
+    av_unused int crossfade;
+    int num_preroll_frames;
+
+    int config_len = get_escaped_value(gb, 4, 4, 8);
+
+    /* Implementations are free to pad the config to any length, so use a
+     * different reader for this. */
+    gbc = *gb;
+    ret = ff_aac_usac_config_decode(ac, ac->avctx, &gbc, oc, m4ac->chan_config);
+    if (ret < 0) {
+        *m4ac = m4ac_bak;
+        return ret;
+    } else {
+        ac->oc[1].m4ac.chan_config = 0;
+    }
+
+    /* 7.18.3.3 Bitrate adaption
+     * If configuration didn't change after applying preroll, continue
+     * without decoding it. */
+    if (!memcmp(m4ac, &m4ac_bak, sizeof(m4ac_bak)))
+        return 0;
+
+    skip_bits_long(gb, config_len*8);
+
+    crossfade = get_bits1(gb); /* applyCrossfade */
+    skip_bits1(gb); /* reserved */
+    num_preroll_frames = get_escaped_value(gb, 2, 4, 0); /* numPreRollFrames */
+
+    for (int i = 0; i < num_preroll_frames; i++) {
+        int got_frame_ptr = 0;
+        int au_len = get_escaped_value(gb, 16, 16, 0);
+
+        if (au_len*8 > tmp_buf_size) {
+            uint8_t *tmp2;
+            tmp_buf = tmp_buf == temp_data ? NULL : tmp_buf;
+            tmp2 = realloc(tmp_buf, au_len*8);
+            if (!tmp2) {
+                if (tmp_buf != temp_data)
+                    av_free(tmp_buf);
+                return AVERROR(ENOMEM);
+            }
+            tmp_buf = tmp2;
+        }
+
+        /* Byte alignment is not guaranteed. */
+        for (int i = 0; i < au_len; i++)
+            tmp_buf[i] = get_bits(gb, 8);
+
+        ret = init_get_bits8(&gbc, tmp_buf, au_len);
+        if (ret < 0)
+            break;
+
+        ret = ff_aac_usac_decode_frame(ac->avctx, ac, &gbc, &got_frame_ptr);
+        if (ret < 0)
+            break;
+    }
+
+    if (tmp_buf != temp_data)
+        av_free(tmp_buf);
+
+    return 0;
+}
+
+static int parse_ext_ele(AACDecContext *ac, AACUsacElemConfig *e,
+                         GetBitContext *gb)
+{
+    uint8_t *tmp;
+    uint8_t pl_frag_start = 1;
+    uint8_t pl_frag_end = 1;
+    uint32_t len;
+
+    if (!get_bits1(gb)) /* usacExtElementPresent */
+        return 0;
+
+    if (get_bits1(gb)) { /* usacExtElementUseDefaultLength */
+        len = e->ext.default_len;
+    } else {
+        len = get_bits(gb, 8); /* usacExtElementPayloadLength */
+        if (len == 255)
+            len += get_bits(gb, 16) - 2;
+    }
+
+    if (!len)
+        return 0;
+
+    if (e->ext.payload_frag) {
+        pl_frag_start = get_bits1(gb); /* usacExtElementStart */
+        pl_frag_end = get_bits1(gb); /* usacExtElementStop */
+    }
+
+    if (pl_frag_start)
+        e->ext.pl_data_offset = 0;
+
+    /* If an extension starts and ends this packet, we can directly use it */
+    if (!(pl_frag_start && pl_frag_end)) {
+        tmp = av_realloc(e->ext.pl_data, e->ext.pl_data_offset + len);
+        if (!tmp) {
+            av_free(e->ext.pl_data);
+            return AVERROR(ENOMEM);
+        }
+        e->ext.pl_data = tmp;
+
+        /* Readout data to a buffer */
+        for (int i = 0; i < len; i++)
+            e->ext.pl_data[e->ext.pl_data_offset + i] = get_bits(gb, 8);
+    }
+
+    e->ext.pl_data_offset += len;
+
+    if (pl_frag_end) {
+        int ret = 0;
+        int start_bits = get_bits_count(gb);
+        const int pl_len = e->ext.pl_data_offset;
+        GetBitContext *gb2 = gb;
+        GetBitContext gbc;
+        if (!(pl_frag_start && pl_frag_end)) {
+            ret = init_get_bits8(&gbc, e->ext.pl_data, pl_len);
+            if (ret < 0)
+                return ret;
+
+            gb2 = &gbc;
+        }
+
+        switch (e->ext.type) {
+        case ID_EXT_ELE_FILL:
+            /* Filler elements have no usable payload */
+            break;
+        case ID_EXT_ELE_AUDIOPREROLL:
+            ret = parse_audio_preroll(ac, gb2);
+            break;
+        default:
+            /* This should never happen */
+            av_assert0(0);
+        }
+        av_freep(&e->ext.pl_data);
+        if (ret < 0)
+            return ret;
+
+        skip_bits_long(gb, pl_len*8 - (get_bits_count(gb) - start_bits));
+    }
+
+    return 0;
+}
+
+int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac,
+                             GetBitContext *gb, int *got_frame_ptr)
+{
+    int ret, nb_ch_el, is_dmono = 0;
+    int indep_flag, samples = 0;
+    int audio_found = 0, sce_count = 0;
+    AVFrame *frame = ac->frame;
+
+    ff_aac_output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
+                            ac->oc[1].status, 0);
+
+    indep_flag = get_bits1(gb);
+
+    nb_ch_el = 0;
+    for (int i = 0; i < ac->oc[1].usac.nb_elems; i++) {
+        AACUsacElemConfig *e = &ac->oc[1].usac.elems[i];
+        ChannelElement *che;
+
+        switch (e->type) {
+        case ID_USAC_LFE:
+            /* Fallthrough */
+        case ID_USAC_SCE:
+            che = ff_aac_get_che(ac, TYPE_SCE, nb_ch_el++);
+            if (!che) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "channel element %d.%d is not allocated\n",
+                       TYPE_SCE, nb_ch_el - 1);
+                return AVERROR_INVALIDDATA;
+            }
+
+            ret = decode_usac_core_coder(ac, &ac->oc[1].usac, e, che, gb,
+                                         indep_flag, 1);
+            if (ret < 0)
+                return ret;
+
+            sce_count++;
+            audio_found = 1;
+            che->present = 1;
+            samples = ac->oc[1].m4ac.frame_length_short ? 768 : 1024;
+            break;
+        case ID_USAC_CPE:
+            che = ff_aac_get_che(ac, TYPE_CPE, nb_ch_el++);
+            if (!che) {
+                av_log(ac->avctx, AV_LOG_ERROR,
+                       "channel element %d.%d is not allocated\n",
+                       TYPE_SCE, nb_ch_el - 1);
+                return AVERROR_INVALIDDATA;
+            }
+
+            ret = decode_usac_core_coder(ac, &ac->oc[1].usac, e, che, gb,
+                                         indep_flag, 2);
+            if (ret < 0)
+                return ret;
+
+            audio_found = 1;
+            che->present = 1;
+            samples = ac->oc[1].m4ac.frame_length_short ? 768 : 1024;
+            break;
+        case ID_USAC_EXT:
+            ret = parse_ext_ele(ac, e, gb);
+            if (ret < 0)
+                return ret;
+            break;
+        }
+    }
+
+    if (ac->oc[1].status && audio_found) {
+        avctx->sample_rate = ac->oc[1].m4ac.sample_rate;
+        avctx->frame_size = samples;
+        ac->oc[1].status = OC_LOCKED;
+    }
+
+    if (!frame->data[0] && samples) {
+        av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (samples) {
+        frame->nb_samples = samples;
+        frame->sample_rate = avctx->sample_rate;
+        frame->flags = indep_flag ? AV_FRAME_FLAG_KEY : 0x0;
+        *got_frame_ptr = 1;
+    } else {
+        av_frame_unref(ac->frame);
+        frame->flags = indep_flag ? AV_FRAME_FLAG_KEY : 0x0;
+        *got_frame_ptr = 0;
+    }
+
+    /* for dual-mono audio (SCE + SCE) */
+    is_dmono = ac->dmono_mode && sce_count == 2 &&
+               !av_channel_layout_compare(&ac->oc[1].ch_layout,
+                                          &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
+    if (is_dmono) {
+        if (ac->dmono_mode == 1)
+            frame->data[1] = frame->data[0];
+        else if (ac->dmono_mode == 2)
+            frame->data[0] = frame->data[1];
+    }
+
+    return 0;
+}
diff --git a/libavcodec/aac/aacdec_usac.h b/libavcodec/aac/aacdec_usac.h
new file mode 100644
index 0000000000..4116a2073a
--- /dev/null
+++ b/libavcodec/aac/aacdec_usac.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024 Lynne <dev@lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_USAC_H
+#define AVCODEC_AAC_AACDEC_USAC_H
+
+#include "aacdec.h"
+
+#include "libavcodec/get_bits.h"
+
+int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx,
+                              GetBitContext *gb, OutputConfiguration *oc,
+                              int channel_config);
+
+int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc);
+
+int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac,
+                             GetBitContext *gb, int *got_frame_ptr);
+
+#endif /* AVCODEC_AAC_AACDEC_USAC_H */
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c
index 18afa69bad..7b040531aa 100644
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -1998,6 +1998,11 @@ const uint8_t ff_tns_max_bands_128[] = {
 };
 // @}
 
+const uint8_t ff_usac_noise_fill_start_offset[2][2] = {
+    { 160, 20 },
+    { 120, 15 },
+};
+
 const DECLARE_ALIGNED(32, float, ff_aac_eld_window_512)[1920] = {
      0.00338834,  0.00567745,  0.00847677,  0.01172641,
      0.01532555,  0.01917664,  0.02318809,  0.02729259,
@@ -3895,3 +3900,40 @@ DECLARE_ALIGNED(16, const float, ff_aac_deemph_weights)[16] = {
     0,
     USAC_EMPH_COEFF,
 };
+
+const int ff_aac_usac_samplerate[32] = {
+    96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
+    16000, 12000, 11025,  8000,  7350,    -1,    -1, 57600,
+    51200, 40000, 38400, 34150, 28800, 25600, 20000, 19200,
+    17075, 14400, 12800, 9600,     -1,    -1,    -1,    -1,
+};
+
+/* Window type (only long+eight, start/stop/stopstart), sine+sine, kbd+kbd, sine+kbd, kbd+sine */
+const float ff_aac_usac_mdst_filt_cur[4 /* Window */][4 /* Shape */][7] =
+{
+    { { 0.000000,  0.000000,  0.500000, 0.000000, -0.500000,  0.000000,  0.000000 },
+      { 0.091497,  0.000000,  0.581427, 0.000000, -0.581427,  0.000000, -0.091497 },
+      { 0.045748,  0.057238,  0.540714, 0.000000, -0.540714, -0.057238, -0.045748 },
+      { 0.045748, -0.057238,  0.540714, 0.000000, -0.540714,  0.057238, -0.045748 } },
+    { { 0.102658,  0.103791,  0.567149, 0.000000, -0.567149, -0.103791, -0.102658 },
+      { 0.150512,  0.047969,  0.608574, 0.000000, -0.608574, -0.047969, -0.150512 },
+      { 0.104763,  0.105207,  0.567861, 0.000000, -0.567861, -0.105207, -0.104763 },
+      { 0.148406,  0.046553,  0.607863, 0.000000, -0.607863, -0.046553, -0.148406 } },
+    { { 0.102658, -0.103791,  0.567149, 0.000000, -0.567149,  0.103791, -0.102658 },
+      { 0.150512, -0.047969,  0.608574, 0.000000, -0.608574,  0.047969, -0.150512 },
+      { 0.148406, -0.046553,  0.607863, 0.000000, -0.607863,  0.046553, -0.148406 },
+      { 0.104763, -0.105207,  0.567861, 0.000000, -0.567861,  0.105207, -0.104763 } },
+    { { 0.205316,  0.000000,  0.634298, 0.000000, -0.634298,  0.000000, -0.205316 },
+      { 0.209526,  0.000000,  0.635722, 0.000000, -0.635722,  0.000000, -0.209526 },
+      { 0.207421,  0.001416,  0.635010, 0.000000, -0.635010, -0.001416, -0.207421 },
+      { 0.207421, -0.001416,  0.635010, 0.000000, -0.635010,  0.001416, -0.207421 } }
+};
+
+/* Window type (everything/longstop+stopstart), sine or kbd */
+const float ff_aac_usac_mdst_filt_prev[2 /* Window */][2 /* sine/kbd */][7] =
+{
+    { { 0.000000, 0.106103, 0.250000, 0.318310, 0.250000, 0.106103, 0.000000 },
+      { 0.059509, 0.123714, 0.186579, 0.213077, 0.186579, 0.123714, 0.059509 } },
+    { { 0.038498, 0.039212, 0.039645, 0.039790, 0.039645, 0.039212, 0.038498 },
+      { 0.026142, 0.026413, 0.026577, 0.026631, 0.026577, 0.026413, 0.026142 } }
+};
diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h
index 481fc57d93..8dbb2098c5 100644
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -115,4 +115,14 @@ extern const uint8_t ff_tns_max_bands_512 [13];
 extern const uint8_t ff_tns_max_bands_480 [13];
 extern const uint8_t ff_tns_max_bands_128 [13];
 
+/* [x][y], x == 1 -> frame len is 768 frames, y == 1 -> is eight_short */
+extern const uint8_t ff_usac_noise_fill_start_offset[2][2];
+
+extern const int ff_aac_usac_samplerate[32];
+
+/* Window type (only long+eight, start/stop/stopstart), sine+sine, kbd+kbd, sine+kbd, kbd+sine */
+extern const float ff_aac_usac_mdst_filt_cur[4 /* Window */][4 /* Shape */][7];
+/* Window type (everything/longstop+stopstart), sine or kbd */
+extern const float ff_aac_usac_mdst_filt_prev[2 /* Window */][2 /* sine/kbd */][7];
+
 #endif /* AVCODEC_AACTAB_H */
author	Lynne <dev@lynne.ee>	2024-05-16 11:36:12 +0200
committer	Lynne <dev@lynne.ee>	2024-06-02 18:34:45 +0200
commit	eee5fa08083c1df6d0210bf215b658bc3017f98d (patch)
tree	8c222da326d48f19c395d7f631042f03d3dcb726
parent	23b45d7e20b0f60c8c5a00c631b95aa0f9e19448 (diff)
download	ffmpeg-eee5fa08083c1df6d0210bf215b658bc3017f98d.tar.gz