diff options
author | foo86 <foobaz86@gmail.com> | 2016-01-16 11:54:38 +0300 |
---|---|---|
committer | Hendrik Leppkes <h.leppkes@gmail.com> | 2016-01-31 17:09:38 +0100 |
commit | ae5b2c52501d5009fe712334428138a9b758849b (patch) | |
tree | 8e30d705d98efe3b249ff3a57eb01789c3ff4c4f | |
parent | 0930b2dd1f01213ca1f08aff3a9b8b0d5515cede (diff) | |
download | ffmpeg-ae5b2c52501d5009fe712334428138a9b758849b.tar.gz |
avcodec/dca: add new decoder based on libdcadec
-rw-r--r-- | Changelog | 1 | ||||
-rwxr-xr-x | configure | 1 | ||||
-rw-r--r-- | libavcodec/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/aarch64/Makefile | 4 | ||||
-rw-r--r-- | libavcodec/allcodecs.c | 2 | ||||
-rw-r--r-- | libavcodec/arm/Makefile | 6 | ||||
-rw-r--r-- | libavcodec/dca_core.c | 2603 | ||||
-rw-r--r-- | libavcodec/dca_core.h | 206 | ||||
-rw-r--r-- | libavcodec/dca_exss.c | 514 | ||||
-rw-r--r-- | libavcodec/dca_exss.h | 92 | ||||
-rw-r--r-- | libavcodec/dca_xll.c | 1499 | ||||
-rw-r--r-- | libavcodec/dca_xll.h | 149 | ||||
-rw-r--r-- | libavcodec/dcadec.c | 417 | ||||
-rw-r--r-- | libavcodec/dcadec.h | 80 | ||||
-rw-r--r-- | libavcodec/dcadsp.c | 413 | ||||
-rw-r--r-- | libavcodec/dcadsp.h | 91 | ||||
-rw-r--r-- | libavcodec/version.h | 2 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 4 | ||||
-rw-r--r-- | tests/checkasm/Makefile | 2 | ||||
-rw-r--r-- | tests/checkasm/checkasm.c | 4 | ||||
-rw-r--r-- | tests/fate/acodec.mak | 4 | ||||
-rw-r--r-- | tests/fate/audio.mak | 4 |
22 files changed, 6085 insertions, 16 deletions
@@ -61,6 +61,7 @@ version <next>: - support for dvaudio in wav and avi - libaacplus and libvo-aacenc support removed - Cineform HD decoder +- new DCA decoder with full support for DTS-HD extensions version 2.8: @@ -2271,6 +2271,7 @@ comfortnoise_encoder_select="lpc" cook_decoder_select="audiodsp mdct sinewin" cscd_decoder_select="lzo" cscd_decoder_suggest="zlib" +dca_decoder_select="mdct" dds_decoder_select="texturedsp" dirac_decoder_select="dirac_parse dwt golomb videodsp mpegvideoenc" dnxhd_decoder_select="blockdsp idctdsp" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 1ad2e936db..a89fb11cea 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -222,6 +222,9 @@ OBJS-$(CONFIG_COMFORTNOISE_ENCODER) += cngenc.o OBJS-$(CONFIG_CPIA_DECODER) += cpia.o OBJS-$(CONFIG_CSCD_DECODER) += cscd.o OBJS-$(CONFIG_CYUV_DECODER) += cyuv.o +OBJS-$(CONFIG_DCA_DECODER) += dcadec.o dca.o dcadata.o \ + dca_core.o dca_exss.o dca_xll.o \ + dcadsp.o dcadct.o synth_filter.o OBJS-$(CONFIG_DCA_ENCODER) += dcaenc.o dca.o dcadata.o OBJS-$(CONFIG_DDS_DECODER) += dds.o OBJS-$(CONFIG_DIRAC_DECODER) += diracdec.o dirac.o diracdsp.o \ diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 803f55b4cf..fd89035cb0 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -1,4 +1,4 @@ -#OBJS-$(CONFIG_DCA_DECODER) += aarch64/synth_filter_init.o +OBJS-$(CONFIG_DCA_DECODER) += aarch64/synth_filter_init.o OBJS-$(CONFIG_FFT) += aarch64/fft_init_aarch64.o OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o @@ -17,7 +17,7 @@ OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o -#NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/synth_filter_neon.o +NEON-OBJS-$(CONFIG_DCA_DECODER) += aarch64/synth_filter_neon.o NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index b17472933d..c7c1af5834 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -391,7 +391,7 @@ void avcodec_register_all(void) REGISTER_DECODER(BINKAUDIO_RDFT, binkaudio_rdft); REGISTER_DECODER(BMV_AUDIO, bmv_audio); REGISTER_DECODER(COOK, cook); - REGISTER_ENCODER(DCA, dca); + REGISTER_ENCDEC (DCA, dca); REGISTER_DECODER(DSD_LSBF, dsd_lsbf); REGISTER_DECODER(DSD_MSBF, dsd_msbf); REGISTER_DECODER(DSD_LSBF_PLANAR, dsd_lsbf_planar); diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index b2f5a5aec5..179c403bc1 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -36,7 +36,7 @@ OBJS-$(CONFIG_VP8DSP) += arm/vp8dsp_init_arm.o # decoders/encoders OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_init_arm.o \ arm/sbrdsp_init_arm.o -#OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_init_arm.o +OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_init_arm.o OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_arm.o OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o OBJS-$(CONFIG_RV40_DECODER) += arm/rv40dsp_init_arm.o @@ -87,7 +87,7 @@ VFP-OBJS-$(CONFIG_FMTCONVERT) += arm/fmtconvert_vfp.o VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o # decoders/encoders -#VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o +VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o # NEON optimizations @@ -126,7 +126,7 @@ NEON-OBJS-$(CONFIG_VP8DSP) += arm/vp8dsp_init_neon.o \ NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_neon.o \ arm/sbrdsp_neon.o NEON-OBJS-$(CONFIG_LLAUDDSP) += arm/lossless_audiodsp_neon.o -#NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_neon.o +NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_neon.o NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ arm/hevcdsp_deblock_neon.o \ arm/hevcdsp_idct_neon.o \ diff --git a/libavcodec/dca_core.c b/libavcodec/dca_core.c new file mode 100644 index 0000000000..94f0f3dcf0 --- /dev/null +++ b/libavcodec/dca_core.c @@ -0,0 +1,2603 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dcadec.h" +#include "dcadata.h" +#include "dcahuff.h" +#include "dcamath.h" +#include "dca_syncwords.h" + +#if ARCH_ARM +#include "arm/dca.h" +#endif + +enum HeaderType { + HEADER_CORE, + HEADER_XCH, + HEADER_XXCH +}; + +enum AudioMode { + AMODE_MONO, // Mode 0: A (mono) + AMODE_MONO_DUAL, // Mode 1: A + B (dual mono) + AMODE_STEREO, // Mode 2: L + R (stereo) + AMODE_STEREO_SUMDIFF, // Mode 3: (L+R) + (L-R) (sum-diff) + AMODE_STEREO_TOTAL, // Mode 4: LT + RT (left and right total) + AMODE_3F, // Mode 5: C + L + R + AMODE_2F1R, // Mode 6: L + R + S + AMODE_3F1R, // Mode 7: C + L + R + S + AMODE_2F2R, // Mode 8: L + R + SL + SR + AMODE_3F2R, // Mode 9: C + L + R + SL + SR + + AMODE_COUNT +}; + +enum ExtAudioType { + EXT_AUDIO_XCH = 0, + EXT_AUDIO_X96 = 2, + EXT_AUDIO_XXCH = 6 +}; + +enum LFEFlag { + LFE_FLAG_NONE, + LFE_FLAG_128, + LFE_FLAG_64, + LFE_FLAG_INVALID +}; + +static const int8_t prm_ch_to_spkr_map[AMODE_COUNT][5] = { + { DCA_SPEAKER_C, -1, -1, -1, -1 }, + { DCA_SPEAKER_L, DCA_SPEAKER_R, -1, -1, -1 }, + { DCA_SPEAKER_L, DCA_SPEAKER_R, -1, -1, -1 }, + { DCA_SPEAKER_L, DCA_SPEAKER_R, -1, -1, -1 }, + { DCA_SPEAKER_L, DCA_SPEAKER_R, -1, -1, -1 }, + { DCA_SPEAKER_C, DCA_SPEAKER_L, DCA_SPEAKER_R , -1, -1 }, + { DCA_SPEAKER_L, DCA_SPEAKER_R, DCA_SPEAKER_Cs, -1, -1 }, + { DCA_SPEAKER_C, DCA_SPEAKER_L, DCA_SPEAKER_R , DCA_SPEAKER_Cs, -1 }, + { DCA_SPEAKER_L, DCA_SPEAKER_R, DCA_SPEAKER_Ls, DCA_SPEAKER_Rs, -1 }, + { DCA_SPEAKER_C, DCA_SPEAKER_L, DCA_SPEAKER_R, DCA_SPEAKER_Ls, DCA_SPEAKER_Rs } +}; + +static const uint8_t audio_mode_ch_mask[AMODE_COUNT] = { + DCA_SPEAKER_LAYOUT_MONO, + DCA_SPEAKER_LAYOUT_STEREO, + DCA_SPEAKER_LAYOUT_STEREO, + DCA_SPEAKER_LAYOUT_STEREO, + DCA_SPEAKER_LAYOUT_STEREO, + DCA_SPEAKER_LAYOUT_3_0, + DCA_SPEAKER_LAYOUT_2_1, + DCA_SPEAKER_LAYOUT_3_1, + DCA_SPEAKER_LAYOUT_2_2, + DCA_SPEAKER_LAYOUT_5POINT0 +}; + +static const uint8_t block_code_nbits[7] = { + 7, 10, 12, 13, 15, 17, 19 +}; + +static const uint8_t quant_index_sel_nbits[DCA_CODE_BOOKS] = { + 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 +}; + +static const uint8_t quant_index_group_size[DCA_CODE_BOOKS] = { + 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 +}; + +typedef struct DCAVLC { + int offset; ///< Code values offset + int max_depth; ///< Parameter for get_vlc2() + VLC vlc[7]; ///< Actual codes +} DCAVLC; + +static DCAVLC vlc_bit_allocation; +static DCAVLC vlc_transition_mode; +static DCAVLC vlc_scale_factor; +static DCAVLC vlc_quant_index[DCA_CODE_BOOKS]; + +static av_cold void dca_init_vlcs(void) +{ + static VLC_TYPE dca_table[23622][2]; + static int vlcs_initialized = 0; + int i, j, k; + + if (vlcs_initialized) + return; + +#define DCA_INIT_VLC(vlc, a, b, c, d) \ + do { \ + vlc.table = &dca_table[ff_dca_vlc_offs[k]]; \ + vlc.table_allocated = ff_dca_vlc_offs[k + 1] - ff_dca_vlc_offs[k]; \ + init_vlc(&vlc, a, b, c, 1, 1, d, 2, 2, INIT_VLC_USE_NEW_STATIC); \ + } while (0) + + vlc_bit_allocation.offset = 1; + vlc_bit_allocation.max_depth = 2; + for (i = 0, k = 0; i < 5; i++, k++) + DCA_INIT_VLC(vlc_bit_allocation.vlc[i], bitalloc_12_vlc_bits[i], 12, + bitalloc_12_bits[i], bitalloc_12_codes[i]); + + vlc_scale_factor.offset = -64; + vlc_scale_factor.max_depth = 2; + for (i = 0; i < 5; i++, k++) + DCA_INIT_VLC(vlc_scale_factor.vlc[i], SCALES_VLC_BITS, 129, + scales_bits[i], scales_codes[i]); + + vlc_transition_mode.offset = 0; + vlc_transition_mode.max_depth = 1; + for (i = 0; i < 4; i++, k++) + DCA_INIT_VLC(vlc_transition_mode.vlc[i], tmode_vlc_bits[i], 4, + tmode_bits[i], tmode_codes[i]); + + for (i = 0; i < DCA_CODE_BOOKS; i++) { + vlc_quant_index[i].offset = bitalloc_offsets[i]; + vlc_quant_index[i].max_depth = 1 + (i > 4); + for (j = 0; j < quant_index_group_size[i]; j++, k++) + DCA_INIT_VLC(vlc_quant_index[i].vlc[j], bitalloc_maxbits[i][j], + bitalloc_sizes[i], bitalloc_bits[i][j], bitalloc_codes[i][j]); + } + + vlcs_initialized = 1; +} + +static int get_vlc(GetBitContext *s, DCAVLC *v, int i) +{ + return get_vlc2(s, v->vlc[i].table, v->vlc[i].bits, v->max_depth) + v->offset; +} + +static void get_array(GetBitContext *s, int32_t *array, int size, int n) +{ + int i; + + for (i = 0; i < size; i++) + array[i] = get_sbits(s, n); +} + +// 5.3.1 - Bit stream header +static int parse_frame_header(DCACoreDecoder *s) +{ + int normal_frame, pcmr_index; + + // Frame type + normal_frame = get_bits1(&s->gb); + + // Deficit sample count + if (get_bits(&s->gb, 5) != DCA_PCMBLOCK_SAMPLES - 1) { + av_log(s->avctx, AV_LOG_ERROR, "Deficit samples are not supported\n"); + return normal_frame ? AVERROR_INVALIDDATA : AVERROR_PATCHWELCOME; + } + + // CRC present flag + s->crc_present = get_bits1(&s->gb); + + // Number of PCM sample blocks + s->npcmblocks = get_bits(&s->gb, 7) + 1; + if (s->npcmblocks & (DCA_SUBBAND_SAMPLES - 1)) { + av_log(s->avctx, AV_LOG_ERROR, "Unsupported number of PCM sample blocks (%d)\n", s->npcmblocks); + return (s->npcmblocks < 6 || normal_frame) ? AVERROR_INVALIDDATA : AVERROR_PATCHWELCOME; + } + + // Primary frame byte size + s->frame_size = get_bits(&s->gb, 14) + 1; + if (s->frame_size < 96) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid core frame size (%d bytes)\n", s->frame_size); + return AVERROR_INVALIDDATA; + } + + // Audio channel arrangement + s->audio_mode = get_bits(&s->gb, 6); + if (s->audio_mode >= AMODE_COUNT) { + av_log(s->avctx, AV_LOG_ERROR, "Unsupported audio channel arrangement (%d)\n", s->audio_mode); + return AVERROR_PATCHWELCOME; + } + + // Core audio sampling frequency + s->sample_rate = avpriv_dca_sample_rates[get_bits(&s->gb, 4)]; + if (!s->sample_rate) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid core audio sampling frequency\n"); + return AVERROR_INVALIDDATA; + } + + // Transmission bit rate + s->bit_rate = ff_dca_bit_rates[get_bits(&s->gb, 5)]; + + // Reserved field + skip_bits1(&s->gb); + + // Embedded dynamic range flag + s->drc_present = get_bits1(&s->gb); + + // Embedded time stamp flag + s->ts_present = get_bits1(&s->gb); + + // Auxiliary data flag + s->aux_present = get_bits1(&s->gb); + + // HDCD mastering flag + skip_bits1(&s->gb); + + // Extension audio descriptor flag + s->ext_audio_type = get_bits(&s->gb, 3); + + // Extended coding flag + s->ext_audio_present = get_bits1(&s->gb); + + // Audio sync word insertion flag + s->sync_ssf = get_bits1(&s->gb); + + // Low frequency effects flag + s->lfe_present = get_bits(&s->gb, 2); + if (s->lfe_present == LFE_FLAG_INVALID) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid low frequency effects flag\n"); + return AVERROR_INVALIDDATA; + } + + // Predictor history flag switch + s->predictor_history = get_bits1(&s->gb); + + // Header CRC check bytes + if (s->crc_present) + skip_bits(&s->gb, 16); + + // Multirate interpolator switch + s->filter_perfect = get_bits1(&s->gb); + + // Encoder software revision + skip_bits(&s->gb, 4); + + // Copy history + skip_bits(&s->gb, 2); + + // Source PCM resolution + s->source_pcm_res = ff_dca_bits_per_sample[pcmr_index = get_bits(&s->gb, 3)]; + if (!s->source_pcm_res) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid source PCM resolution\n"); + return AVERROR_INVALIDDATA; + } + s->es_format = pcmr_index & 1; + + // Front sum/difference flag + s->sumdiff_front = get_bits1(&s->gb); + + // Surround sum/difference flag + s->sumdiff_surround = get_bits1(&s->gb); + + // Dialog normalization / unspecified + skip_bits(&s->gb, 4); + + return 0; +} + +// 5.3.2 - Primary audio coding header +static int parse_coding_header(DCACoreDecoder *s, enum HeaderType header, int xch_base) +{ + int n, ch, nchannels, header_size = 0, header_pos = get_bits_count(&s->gb); + unsigned int mask, index; + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + switch (header) { + case HEADER_CORE: + // Number of subframes + s->nsubframes = get_bits(&s->gb, 4) + 1; + + // Number of primary audio channels + s->nchannels = get_bits(&s->gb, 3) + 1; + if (s->nchannels != ff_dca_channels[s->audio_mode]) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid number of primary audio channels (%d) for audio channel arrangement (%d)\n", s->nchannels, s->audio_mode); + return AVERROR_INVALIDDATA; + } + av_assert1(s->nchannels <= DCA_CHANNELS - 2); + + s->ch_mask = audio_mode_ch_mask[s->audio_mode]; + + // Add LFE channel if present + if (s->lfe_present) + s->ch_mask |= DCA_SPEAKER_MASK_LFE1; + break; + + case HEADER_XCH: + s->nchannels = ff_dca_channels[s->audio_mode] + 1; + av_assert1(s->nchannels <= DCA_CHANNELS - 1); + s->ch_mask |= DCA_SPEAKER_MASK_Cs; + break; + + case HEADER_XXCH: + // Channel set header length + header_size = get_bits(&s->gb, 7) + 1; + + // Check CRC + if (s->xxch_crc_present + && (s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH channel set header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Number of channels in a channel set + nchannels = get_bits(&s->gb, 3) + 1; + if (nchannels > DCA_XXCH_CHANNELS_MAX) { + avpriv_request_sample(s->avctx, "%d XXCH channels", nchannels); + return AVERROR_PATCHWELCOME; + } + s->nchannels = ff_dca_channels[s->audio_mode] + nchannels; + av_assert1(s->nchannels <= DCA_CHANNELS); + + // Loudspeaker layout mask + mask = get_bits_long(&s->gb, s->xxch_mask_nbits - DCA_SPEAKER_Cs); + s->xxch_spkr_mask = mask << DCA_SPEAKER_Cs; + + if (av_popcount(s->xxch_spkr_mask) != nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH speaker layout mask (%#x)\n", s->xxch_spkr_mask); + return AVERROR_INVALIDDATA; + } + + if (s->xxch_core_mask & s->xxch_spkr_mask) { + av_log(s->avctx, AV_LOG_ERROR, "XXCH speaker layout mask (%#x) overlaps with core (%#x)\n", s->xxch_spkr_mask, s->xxch_core_mask); + return AVERROR_INVALIDDATA; + } + + // Combine core and XXCH masks together + s->ch_mask = s->xxch_core_mask | s->xxch_spkr_mask; + + // Downmix coefficients present in stream + if (get_bits1(&s->gb)) { + int *coeff_ptr = s->xxch_dmix_coeff; + + // Downmix already performed by encoder + s->xxch_dmix_embedded = get_bits1(&s->gb); + + // Downmix scale factor + index = get_bits(&s->gb, 6) * 4 - FF_DCA_DMIXTABLE_OFFSET - 3; + if (index >= FF_DCA_INV_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH downmix scale index (%d)\n", index); + return AVERROR_INVALIDDATA; + } + s->xxch_dmix_scale_inv = ff_dca_inv_dmixtable[index]; + + // Downmix channel mapping mask + for (ch = 0; ch < nchannels; ch++) { + mask = get_bits_long(&s->gb, s->xxch_mask_nbits); + if ((mask & s->xxch_core_mask) != mask) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH downmix channel mapping mask (%#x)\n", mask); + return AVERROR_INVALIDDATA; + } + s->xxch_dmix_mask[ch] = mask; + } + + // Downmix coefficients + for (ch = 0; ch < nchannels; ch++) { + for (n = 0; n < s->xxch_mask_nbits; n++) { + if (s->xxch_dmix_mask[ch] & (1U << n)) { + int code = get_bits(&s->gb, 7); + int sign = (code >> 6) - 1; + if (code &= 63) { + index = code * 4 - 3; + if (index >= FF_DCA_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH downmix coefficient index (%d)\n", index); + return AVERROR_INVALIDDATA; + } + *coeff_ptr++ = (ff_dca_dmixtable[index] ^ sign) - sign; + } else { + *coeff_ptr++ = 0; + } + } + } + } + } else { + s->xxch_dmix_embedded = 0; + } + + break; + } + + // Subband activity count + for (ch = xch_base; ch < s->nchannels; ch++) { + s->nsubbands[ch] = get_bits(&s->gb, 5) + 2; + if (s->nsubbands[ch] > DCA_SUBBANDS) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid subband activity count\n"); + return AVERROR_INVALIDDATA; + } + } + + // High frequency VQ start subband + for (ch = xch_base; ch < s->nchannels; ch++) + s->subband_vq_start[ch] = get_bits(&s->gb, 5) + 1; + + // Joint intensity coding index + for (ch = xch_base; ch < s->nchannels; ch++) { + if ((n = get_bits(&s->gb, 3)) && header == HEADER_XXCH) + n += xch_base - 1; + if (n > s->nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid joint intensity coding index\n"); + return AVERROR_INVALIDDATA; + } + s->joint_intensity_index[ch] = n; + } + + // Transient mode code book + for (ch = xch_base; ch < s->nchannels; ch++) + s->transition_mode_sel[ch] = get_bits(&s->gb, 2); + + // Scale factor code book + for (ch = xch_base; ch < s->nchannels; ch++) { + s->scale_factor_sel[ch] = get_bits(&s->gb, 3); + if (s->scale_factor_sel[ch] == 7) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid scale factor code book\n"); + return AVERROR_INVALIDDATA; + } + } + + // Bit allocation quantizer select + for (ch = xch_base; ch < s->nchannels; ch++) { + s->bit_allocation_sel[ch] = get_bits(&s->gb, 3); + if (s->bit_allocation_sel[ch] == 7) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid bit allocation quantizer select\n"); + return AVERROR_INVALIDDATA; + } + } + + // Quantization index codebook select + for (n = 0; n < DCA_CODE_BOOKS; n++) + for (ch = xch_base; ch < s->nchannels; ch++) + s->quant_index_sel[ch][n] = get_bits(&s->gb, quant_index_sel_nbits[n]); + + // Scale factor adjustment index + for (n = 0; n < DCA_CODE_BOOKS; n++) + for (ch = xch_base; ch < s->nchannels; ch++) + if (s->quant_index_sel[ch][n] < quant_index_group_size[n]) + s->scale_factor_adj[ch][n] = ff_dca_scale_factor_adj[get_bits(&s->gb, 2)]; + + if (header == HEADER_XXCH) { + // Reserved + // Byte align + // CRC16 of channel set header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XXCH channel set header\n"); + return AVERROR_INVALIDDATA; + } + } else { + // Audio header CRC check word + if (s->crc_present) + skip_bits(&s->gb, 16); + } + + return 0; +} + +static inline int parse_scale(DCACoreDecoder *s, int *scale_index, int sel) +{ + const uint32_t *scale_table; + unsigned int scale_size; + + // Select the root square table + if (sel > 5) { + scale_table = ff_dca_scale_factor_quant7; + scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7); + } else { + scale_table = ff_dca_scale_factor_quant6; + scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant6); + } + + // If Huffman code was used, the difference of scales was encoded + if (sel < 5) + *scale_index += get_vlc(&s->gb, &vlc_scale_factor, sel); + else + *scale_index = get_bits(&s->gb, sel + 1); + + // Look up scale factor from the root square table + if ((unsigned int)*scale_index >= scale_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid scale factor index\n"); + return AVERROR_INVALIDDATA; + } + + return scale_table[*scale_index]; +} + +static inline int parse_joint_scale(DCACoreDecoder *s, int sel) +{ + int scale_index; + + // Absolute value was encoded even when Huffman code was used + if (sel < 5) + scale_index = get_vlc(&s->gb, &vlc_scale_factor, sel); + else + scale_index = get_bits(&s->gb, sel + 1); + + // Bias by 64 + scale_index += 64; + + // Look up joint scale factor + if ((unsigned int)scale_index >= FF_ARRAY_ELEMS(ff_dca_joint_scale_factors)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid joint scale factor index\n"); + return AVERROR_INVALIDDATA; + } + + return ff_dca_joint_scale_factors[scale_index]; +} + +// 5.4.1 - Primary audio coding side information +static int parse_subframe_header(DCACoreDecoder *s, int sf, + enum HeaderType header, int xch_base) +{ + int ch, band, ret; + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + if (header == HEADER_CORE) { + // Subsubframe count + s->nsubsubframes[sf] = get_bits(&s->gb, 2) + 1; + + // Partial subsubframe sample count + skip_bits(&s->gb, 3); + } + + // Prediction mode + for (ch = xch_base; ch < s->nchannels; ch++) + for (band = 0; band < s->nsubbands[ch]; band++) + s->prediction_mode[ch][band] = get_bits1(&s->gb); + + // Prediction coefficients VQ address + for (ch = xch_base; ch < s->nchannels; ch++) + for (band = 0; band < s->nsubbands[ch]; band++) + if (s->prediction_mode[ch][band]) + s->prediction_vq_index[ch][band] = get_bits(&s->gb, 12); + + // Bit allocation index + for (ch = xch_base; ch < s->nchannels; ch++) { + int sel = s->bit_allocation_sel[ch]; + + for (band = 0; band < s->subband_vq_start[ch]; band++) { + int abits; + + if (sel < 5) + abits = get_vlc(&s->gb, &vlc_bit_allocation, sel); + else + abits = get_bits(&s->gb, sel - 1); + + if (abits > DCA_ABITS_MAX) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid bit allocation index\n"); + return AVERROR_INVALIDDATA; + } + + s->bit_allocation[ch][band] = abits; + } + } + + // Transition mode + for (ch = xch_base; ch < s->nchannels; ch++) { + // Clear transition mode for all subbands + memset(s->transition_mode[sf][ch], 0, sizeof(s->transition_mode[0][0])); + + // Transient possible only if more than one subsubframe + if (s->nsubsubframes[sf] > 1) { + int sel = s->transition_mode_sel[ch]; + for (band = 0; band < s->subband_vq_start[ch]; band++) + if (s->bit_allocation[ch][band]) + s->transition_mode[sf][ch][band] = get_vlc(&s->gb, &vlc_transition_mode, sel); + } + } + + // Scale factors + for (ch = xch_base; ch < s->nchannels; ch++) { + int sel = s->scale_factor_sel[ch]; + int scale_index = 0; + + // Extract scales for subbands up to VQ + for (band = 0; band < s->subband_vq_start[ch]; band++) { + if (s->bit_allocation[ch][band]) { + if ((ret = parse_scale(s, &scale_index, sel)) < 0) + return ret; + s->scale_factors[ch][band][0] = ret; + if (s->transition_mode[sf][ch][band]) { + if ((ret = parse_scale(s, &scale_index, sel)) < 0) + return ret; + s->scale_factors[ch][band][1] = ret; + } + } else { + s->scale_factors[ch][band][0] = 0; + } + } + + // High frequency VQ subbands + for (band = s->subband_vq_start[ch]; band < s->nsubbands[ch]; band++) { + if ((ret = parse_scale(s, &scale_index, sel)) < 0) + return ret; + s->scale_factors[ch][band][0] = ret; + } + } + + // Joint subband codebook select + for (ch = xch_base; ch < s->nchannels; ch++) { + if (s->joint_intensity_index[ch]) { + s->joint_scale_sel[ch] = get_bits(&s->gb, 3); + if (s->joint_scale_sel[ch] == 7) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid joint scale factor code book\n"); + return AVERROR_INVALIDDATA; + } + } + } + + // Scale factors for joint subband coding + for (ch = xch_base; ch < s->nchannels; ch++) { + int src_ch = s->joint_intensity_index[ch] - 1; + if (src_ch >= 0) { + int sel = s->joint_scale_sel[ch]; + for (band = s->nsubbands[ch]; band < s->nsubbands[src_ch]; band++) { + if ((ret = parse_joint_scale(s, sel)) < 0) + return ret; + s->joint_scale_factors[ch][band] = ret; + } + } + } + + // Dynamic range coefficient + if (s->drc_present && header == HEADER_CORE) + skip_bits(&s->gb, 8); + + // Side information CRC check word + if (s->crc_present) + skip_bits(&s->gb, 16); + + return 0; +} + +#ifndef decode_blockcodes +static inline int decode_blockcodes(int code1, int code2, int levels, int32_t *audio) +{ + int offset = (levels - 1) / 2; + int n, div; + + for (n = 0; n < DCA_SUBBAND_SAMPLES / 2; n++) { + div = FASTDIV(code1, levels); + audio[n] = code1 - div * levels - offset; + code1 = div; + } + for (; n < DCA_SUBBAND_SAMPLES; n++) { + div = FASTDIV(code2, levels); + audio[n] = code2 - div * levels - offset; + code2 = div; + } + + return code1 | code2; +} +#endif + +static inline int parse_block_codes(DCACoreDecoder *s, int32_t *audio, int abits) +{ + // Extract block code indices from the bit stream + int code1 = get_bits(&s->gb, block_code_nbits[abits - 1]); + int code2 = get_bits(&s->gb, block_code_nbits[abits - 1]); + int levels = ff_dca_quant_levels[abits]; + + // Look up samples from the block code book + if (decode_blockcodes(code1, code2, levels, audio)) { + av_log(s->avctx, AV_LOG_ERROR, "Failed to decode block code(s)\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static inline int parse_huffman_codes(DCACoreDecoder *s, int32_t *audio, int abits, int sel) +{ + int i; + + // Extract Huffman codes from the bit stream + for (i = 0; i < DCA_SUBBAND_SAMPLES; i++) + audio[i] = get_vlc(&s->gb, &vlc_quant_index[abits - 1], sel); + + return 1; +} + +static inline int extract_audio(DCACoreDecoder *s, int32_t *audio, int abits, int ch) +{ + av_assert1(abits >= 0 && abits <= DCA_ABITS_MAX); + + if (abits == 0) { + // No bits allocated + memset(audio, 0, DCA_SUBBAND_SAMPLES * sizeof(*audio)); + return 0; + } + + if (abits <= DCA_CODE_BOOKS) { + int sel = s->quant_index_sel[ch][abits - 1]; + if (sel < quant_index_group_size[abits - 1]) { + // Huffman codes + return parse_huffman_codes(s, audio, abits, sel); + } + if (abits <= 7) { + // Block codes + return parse_block_codes(s, audio, abits); + } + } + + // No further encoding + get_array(&s->gb, audio, DCA_SUBBAND_SAMPLES, abits - 3); + return 0; +} + +static inline void dequantize(int32_t *output, const int32_t *input, + int32_t step_size, int32_t scale, int residual) +{ + // Account for quantizer step size + int64_t step_scale = (int64_t)step_size * scale; + int n, shift = 0; + + // Limit scale factor resolution to 22 bits + if (step_scale > (1 << 23)) { + shift = av_log2(step_scale >> 23) + 1; + step_scale >>= shift; + } + + // Scale the samples + if (residual) { + for (n = 0; n < DCA_SUBBAND_SAMPLES; n++) + output[n] += clip23(norm__(input[n] * step_scale, 22 - shift)); + } else { + for (n = 0; n < DCA_SUBBAND_SAMPLES; n++) + output[n] = clip23(norm__(input[n] * step_scale, 22 - shift)); + } +} + +static inline void inverse_adpcm(int32_t **subband_samples, + const int16_t *vq_index, + const int8_t *prediction_mode, + int sb_start, int sb_end, + int ofs, int len) +{ + int i, j, k; + + for (i = sb_start; i < sb_end; i++) { + if (prediction_mode[i]) { + const int16_t *coeff = ff_dca_adpcm_vb[vq_index[i]]; + int32_t *ptr = subband_samples[i] + ofs; + for (j = 0; j < len; j++) { + int64_t err = 0; + for (k = 0; k < DCA_ADPCM_COEFFS; k++) + err += (int64_t)ptr[j - k - 1] * coeff[k]; + ptr[j] = clip23(ptr[j] + clip23(norm13(err))); + } + } + } +} + +// 5.5 - Primary audio data arrays +static int parse_subframe_audio(DCACoreDecoder *s, int sf, enum HeaderType header, + int xch_base, int *sub_pos, int *lfe_pos) +{ + int32_t audio[16], scale; + int n, ssf, ofs, ch, band; + + // Check number of subband samples in this subframe + int nsamples = s->nsubsubframes[sf] * DCA_SUBBAND_SAMPLES; + if (*sub_pos + nsamples > s->npcmblocks) { + av_log(s->avctx, AV_LOG_ERROR, "Subband sample buffer overflow\n"); + return AVERROR_INVALIDDATA; + } + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + // VQ encoded subbands + for (ch = xch_base; ch < s->nchannels; ch++) { + int32_t vq_index[DCA_SUBBANDS]; + + for (band = s->subband_vq_start[ch]; band < s->nsubbands[ch]; band++) + // Extract the VQ address from the bit stream + vq_index[band] = get_bits(&s->gb, 10); + + if (s->subband_vq_start[ch] < s->nsubbands[ch]) { + s->dcadsp->decode_hf(s->subband_samples[ch], vq_index, + ff_dca_high_freq_vq, s->scale_factors[ch], + s->subband_vq_start[ch], s->nsubbands[ch], + *sub_pos, nsamples); + } + } + + // Low frequency effect data + if (s->lfe_present && header == HEADER_CORE) { + unsigned int index; + + // Determine number of LFE samples in this subframe + int nlfesamples = 2 * s->lfe_present * s->nsubsubframes[sf]; + av_assert1((unsigned int)nlfesamples <= FF_ARRAY_ELEMS(audio)); + + // Extract LFE samples from the bit stream + get_array(&s->gb, audio, nlfesamples, 8); + + // Extract scale factor index from the bit stream + index = get_bits(&s->gb, 8); + if (index >= FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid LFE scale factor index\n"); + return AVERROR_INVALIDDATA; + } + + // Look up the 7-bit root square quantization table + scale = ff_dca_scale_factor_quant7[index]; + + // Account for quantizer step size which is 0.035 + scale = mul23(4697620 /* 0.035 * (1 << 27) */, scale); + + // Scale and take the LFE samples + for (n = 0, ofs = *lfe_pos; n < nlfesamples; n++, ofs++) + s->lfe_samples[ofs] = clip23(audio[n] * scale >> 4); + + // Advance LFE sample pointer for the next subframe + *lfe_pos = ofs; + } + + // Audio data + for (ssf = 0, ofs = *sub_pos; ssf < s->nsubsubframes[sf]; ssf++) { + for (ch = xch_base; ch < s->nchannels; ch++) { + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + // Not high frequency VQ subbands + for (band = 0; band < s->subband_vq_start[ch]; band++) { + int ret, trans_ssf, abits = s->bit_allocation[ch][band]; + int32_t step_size; + + // Extract bits from the bit stream + if ((ret = extract_audio(s, audio, abits, ch)) < 0) + return ret; + + // Select quantization step size table and look up + // quantization step size + if (s->bit_rate == 3) + step_size = ff_dca_lossless_quant[abits]; + else + step_size = ff_dca_lossy_quant[abits]; + + // Identify transient location + trans_ssf = s->transition_mode[sf][ch][band]; + + // Determine proper scale factor + if (trans_ssf == 0 || ssf < trans_ssf) + scale = s->scale_factors[ch][band][0]; + else + scale = s->scale_factors[ch][band][1]; + + // Adjust scale factor when SEL indicates Huffman code + if (ret > 0) { + int64_t adj = s->scale_factor_adj[ch][abits - 1]; + scale = clip23(adj * scale >> 22); + } + + dequantize(s->subband_samples[ch][band] + ofs, + audio, step_size, scale, 0); + } + } + + // DSYNC + if ((ssf == s->nsubsubframes[sf] - 1 || s->sync_ssf) && get_bits(&s->gb, 16) != 0xffff) { + av_log(s->avctx, AV_LOG_ERROR, "DSYNC check failed\n"); + return AVERROR_INVALIDDATA; + } + + ofs += DCA_SUBBAND_SAMPLES; + } + + // Inverse ADPCM + for (ch = xch_base; ch < s->nchannels; ch++) { + inverse_adpcm(s->subband_samples[ch], s->prediction_vq_index[ch], + s->prediction_mode[ch], 0, s->nsubbands[ch], + *sub_pos, nsamples); + } + + // Joint subband coding + for (ch = xch_base; ch < s->nchannels; ch++) { + int src_ch = s->joint_intensity_index[ch] - 1; + if (src_ch >= 0) { + s->dcadsp->decode_joint(s->subband_samples[ch], s->subband_samples[src_ch], + s->joint_scale_factors[ch], s->nsubbands[ch], + s->nsubbands[src_ch], *sub_pos, nsamples); + } + } + + // Advance subband sample pointer for the next subframe + *sub_pos = ofs; + return 0; +} + +static void erase_adpcm_history(DCACoreDecoder *s) +{ + int ch, band; + + // Erase ADPCM history from previous frame if + // predictor history switch was disabled + for (ch = 0; ch < DCA_CHANNELS; ch++) + for (band = 0; band < DCA_SUBBANDS; band++) + AV_ZERO128(s->subband_samples[ch][band] - DCA_ADPCM_COEFFS); +} + +static int alloc_sample_buffer(DCACoreDecoder *s) +{ + int nchsamples = DCA_ADPCM_COEFFS + s->npcmblocks; + int nframesamples = nchsamples * DCA_CHANNELS * DCA_SUBBANDS; + int nlfesamples = DCA_LFE_HISTORY + s->npcmblocks / 2; + unsigned int size = s->subband_size; + int ch, band; + + // Reallocate subband sample buffer + av_fast_mallocz(&s->subband_buffer, &s->subband_size, + (nframesamples + nlfesamples) * sizeof(int32_t)); + if (!s->subband_buffer) + return AVERROR(ENOMEM); + + if (size != s->subband_size) { + for (ch = 0; ch < DCA_CHANNELS; ch++) + for (band = 0; band < DCA_SUBBANDS; band++) + s->subband_samples[ch][band] = s->subband_buffer + + (ch * DCA_SUBBANDS + band) * nchsamples + DCA_ADPCM_COEFFS; + s->lfe_samples = s->subband_buffer + nframesamples; + } + + if (!s->predictor_history) + erase_adpcm_history(s); + + return 0; +} + +static int parse_frame_data(DCACoreDecoder *s, enum HeaderType header, int xch_base) +{ + int sf, ch, ret, band, sub_pos, lfe_pos; + + if ((ret = parse_coding_header(s, header, xch_base)) < 0) + return ret; + + for (sf = 0, sub_pos = 0, lfe_pos = DCA_LFE_HISTORY; sf < s->nsubframes; sf++) { + if ((ret = parse_subframe_header(s, sf, header, xch_base)) < 0) + return ret; + if ((ret = parse_subframe_audio(s, sf, header, xch_base, &sub_pos, &lfe_pos)) < 0) + return ret; + } + + for (ch = xch_base; ch < s->nchannels; ch++) { + // Determine number of active subbands for this channel + int nsubbands = s->nsubbands[ch]; + if (s->joint_intensity_index[ch]) + nsubbands = FFMAX(nsubbands, s->nsubbands[s->joint_intensity_index[ch] - 1]); + + // Update history for ADPCM + for (band = 0; band < nsubbands; band++) { + int32_t *samples = s->subband_samples[ch][band] - DCA_ADPCM_COEFFS; + AV_COPY128(samples, samples + s->npcmblocks); + } + + // Clear inactive subbands + for (; band < DCA_SUBBANDS; band++) { + int32_t *samples = s->subband_samples[ch][band] - DCA_ADPCM_COEFFS; + memset(samples, 0, (DCA_ADPCM_COEFFS + s->npcmblocks) * sizeof(int32_t)); + } + } + + return 0; +} + +static int parse_xch_frame(DCACoreDecoder *s) +{ + int ret; + + if (s->ch_mask & DCA_SPEAKER_MASK_Cs) { + av_log(s->avctx, AV_LOG_ERROR, "XCH with Cs speaker already present\n"); + return AVERROR_INVALIDDATA; + } + + if ((ret = parse_frame_data(s, HEADER_XCH, s->nchannels)) < 0) + return ret; + + // Seek to the end of core frame, don't trust XCH frame size + if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XCH frame\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int parse_xxch_frame(DCACoreDecoder *s) +{ + int xxch_nchsets, xxch_frame_size; + int ret, mask, header_size, header_pos = get_bits_count(&s->gb); + + // XXCH sync word + if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XXCH) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH sync word\n"); + return AVERROR_INVALIDDATA; + } + + // XXCH frame header length + header_size = get_bits(&s->gb, 6) + 1; + + // Check XXCH frame header CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos + 32, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XXCH frame header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // CRC presence flag for channel set header + s->xxch_crc_present = get_bits1(&s->gb); + + // Number of bits for loudspeaker mask + s->xxch_mask_nbits = get_bits(&s->gb, 5) + 1; + if (s->xxch_mask_nbits <= DCA_SPEAKER_Cs) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid number of bits for XXCH speaker mask (%d)\n", s->xxch_mask_nbits); + return AVERROR_INVALIDDATA; + } + + // Number of channel sets + xxch_nchsets = get_bits(&s->gb, 2) + 1; + if (xxch_nchsets > 1) { + avpriv_request_sample(s->avctx, "%d XXCH channel sets", xxch_nchsets); + return AVERROR_PATCHWELCOME; + } + + // Channel set 0 data byte size + xxch_frame_size = get_bits(&s->gb, 14) + 1; + + // Core loudspeaker activity mask + s->xxch_core_mask = get_bits_long(&s->gb, s->xxch_mask_nbits); + + // Validate the core mask + mask = s->ch_mask; + + if ((mask & DCA_SPEAKER_MASK_Ls) && (s->xxch_core_mask & DCA_SPEAKER_MASK_Lss)) + mask = (mask & ~DCA_SPEAKER_MASK_Ls) | DCA_SPEAKER_MASK_Lss; + + if ((mask & DCA_SPEAKER_MASK_Rs) && (s->xxch_core_mask & DCA_SPEAKER_MASK_Rss)) + mask = (mask & ~DCA_SPEAKER_MASK_Rs) | DCA_SPEAKER_MASK_Rss; + + if (mask != s->xxch_core_mask) { + av_log(s->avctx, AV_LOG_ERROR, "XXCH core speaker activity mask (%#x) disagrees with core (%#x)\n", s->xxch_core_mask, mask); + return AVERROR_INVALIDDATA; + } + + // Reserved + // Byte align + // CRC16 of XXCH frame header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XXCH frame header\n"); + return AVERROR_INVALIDDATA; + } + + // Parse XXCH channel set 0 + if ((ret = parse_frame_data(s, HEADER_XXCH, s->nchannels)) < 0) + return ret; + + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8 + xxch_frame_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XXCH channel set\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int parse_xbr_subframe(DCACoreDecoder *s, int xbr_base_ch, int xbr_nchannels, + int *xbr_nsubbands, int xbr_transition_mode, int sf, int *sub_pos) +{ + int xbr_nabits[DCA_CHANNELS]; + int xbr_bit_allocation[DCA_CHANNELS][DCA_SUBBANDS]; + int xbr_scale_nbits[DCA_CHANNELS]; + int32_t xbr_scale_factors[DCA_CHANNELS][DCA_SUBBANDS][2]; + int ssf, ch, band, ofs; + + // Check number of subband samples in this subframe + if (*sub_pos + s->nsubsubframes[sf] * DCA_SUBBAND_SAMPLES > s->npcmblocks) { + av_log(s->avctx, AV_LOG_ERROR, "Subband sample buffer overflow\n"); + return AVERROR_INVALIDDATA; + } + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + // Number of bits for XBR bit allocation index + for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) + xbr_nabits[ch] = get_bits(&s->gb, 2) + 2; + + // XBR bit allocation index + for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) { + for (band = 0; band < xbr_nsubbands[ch]; band++) { + xbr_bit_allocation[ch][band] = get_bits(&s->gb, xbr_nabits[ch]); + if (xbr_bit_allocation[ch][band] > DCA_ABITS_MAX) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR bit allocation index\n"); + return AVERROR_INVALIDDATA; + } + } + } + + // Number of bits for scale indices + for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) { + xbr_scale_nbits[ch] = get_bits(&s->gb, 3); + if (!xbr_scale_nbits[ch]) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid number of bits for XBR scale factor index\n"); + return AVERROR_INVALIDDATA; + } + } + + // XBR scale factors + for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) { + const uint32_t *scale_table; + int scale_size; + + // Select the root square table + if (s->scale_factor_sel[ch] > 5) { + scale_table = ff_dca_scale_factor_quant7; + scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant7); + } else { + scale_table = ff_dca_scale_factor_quant6; + scale_size = FF_ARRAY_ELEMS(ff_dca_scale_factor_quant6); + } + + // Parse scale factor indices and look up scale factors from the root + // square table + for (band = 0; band < xbr_nsubbands[ch]; band++) { + if (xbr_bit_allocation[ch][band]) { + int scale_index = get_bits(&s->gb, xbr_scale_nbits[ch]); + if (scale_index >= scale_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR scale factor index\n"); + return AVERROR_INVALIDDATA; + } + xbr_scale_factors[ch][band][0] = scale_table[scale_index]; + if (xbr_transition_mode && s->transition_mode[sf][ch][band]) { + scale_index = get_bits(&s->gb, xbr_scale_nbits[ch]); + if (scale_index >= scale_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR scale factor index\n"); + return AVERROR_INVALIDDATA; + } + xbr_scale_factors[ch][band][1] = scale_table[scale_index]; + } + } + } + } + + // Audio data + for (ssf = 0, ofs = *sub_pos; ssf < s->nsubsubframes[sf]; ssf++) { + for (ch = xbr_base_ch; ch < xbr_nchannels; ch++) { + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + for (band = 0; band < xbr_nsubbands[ch]; band++) { + int ret, trans_ssf, abits = xbr_bit_allocation[ch][band]; + int32_t audio[DCA_SUBBAND_SAMPLES], step_size, scale; + + // Extract bits from the bit stream + if (abits > 7) { + // No further encoding + get_array(&s->gb, audio, DCA_SUBBAND_SAMPLES, abits - 3); + } else if (abits > 0) { + // Block codes + if ((ret = parse_block_codes(s, audio, abits)) < 0) + return ret; + } else { + // No bits allocated + continue; + } + + // Look up quantization step size + step_size = ff_dca_lossless_quant[abits]; + + // Identify transient location + if (xbr_transition_mode) + trans_ssf = s->transition_mode[sf][ch][band]; + else + trans_ssf = 0; + + // Determine proper scale factor + if (trans_ssf == 0 || ssf < trans_ssf) + scale = xbr_scale_factors[ch][band][0]; + else + scale = xbr_scale_factors[ch][band][1]; + + dequantize(s->subband_samples[ch][band] + ofs, + audio, step_size, scale, 1); + } + } + + // DSYNC + if ((ssf == s->nsubsubframes[sf] - 1 || s->sync_ssf) && get_bits(&s->gb, 16) != 0xffff) { + av_log(s->avctx, AV_LOG_ERROR, "XBR-DSYNC check failed\n"); + return AVERROR_INVALIDDATA; + } + + ofs += DCA_SUBBAND_SAMPLES; + } + + // Advance subband sample pointer for the next subframe + *sub_pos = ofs; + return 0; +} + +static int parse_xbr_frame(DCACoreDecoder *s) +{ + int xbr_frame_size[DCA_EXSS_CHSETS_MAX]; + int xbr_nchannels[DCA_EXSS_CHSETS_MAX]; + int xbr_nsubbands[DCA_EXSS_CHSETS_MAX * DCA_EXSS_CHANNELS_MAX]; + int xbr_nchsets, xbr_transition_mode, xbr_band_nbits, xbr_base_ch; + int i, ch1, ch2, ret, header_size, header_pos = get_bits_count(&s->gb); + + // XBR sync word + if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XBR) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR sync word\n"); + return AVERROR_INVALIDDATA; + } + + // XBR frame header length + header_size = get_bits(&s->gb, 6) + 1; + + // Check XBR frame header CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos + 32, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XBR frame header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Number of channel sets + xbr_nchsets = get_bits(&s->gb, 2) + 1; + + // Channel set data byte size + for (i = 0; i < xbr_nchsets; i++) + xbr_frame_size[i] = get_bits(&s->gb, 14) + 1; + + // Transition mode flag + xbr_transition_mode = get_bits1(&s->gb); + + // Channel set headers + for (i = 0, ch2 = 0; i < xbr_nchsets; i++) { + xbr_nchannels[i] = get_bits(&s->gb, 3) + 1; + xbr_band_nbits = get_bits(&s->gb, 2) + 5; + for (ch1 = 0; ch1 < xbr_nchannels[i]; ch1++, ch2++) { + xbr_nsubbands[ch2] = get_bits(&s->gb, xbr_band_nbits) + 1; + if (xbr_nsubbands[ch2] > DCA_SUBBANDS) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid number of active XBR subbands (%d)\n", xbr_nsubbands[ch2]); + return AVERROR_INVALIDDATA; + } + } + } + + // Reserved + // Byte align + // CRC16 of XBR frame header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XBR frame header\n"); + return AVERROR_INVALIDDATA; + } + + // Channel set data + for (i = 0, xbr_base_ch = 0; i < xbr_nchsets; i++) { + header_pos = get_bits_count(&s->gb); + + if (xbr_base_ch + xbr_nchannels[i] <= s->nchannels) { + int sf, sub_pos; + + for (sf = 0, sub_pos = 0; sf < s->nsubframes; sf++) { + if ((ret = parse_xbr_subframe(s, xbr_base_ch, + xbr_base_ch + xbr_nchannels[i], + xbr_nsubbands, xbr_transition_mode, + sf, &sub_pos)) < 0) + return ret; + } + } + + xbr_base_ch += xbr_nchannels[i]; + + if (ff_dca_seek_bits(&s->gb, header_pos + xbr_frame_size[i] * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XBR channel set\n"); + return AVERROR_INVALIDDATA; + } + } + + return 0; +} + +// Modified ISO/IEC 9899 linear congruential generator +// Returns pseudorandom integer in range [-2^30, 2^30 - 1] +static int rand_x96(DCACoreDecoder *s) +{ + s->x96_rand = 1103515245U * s->x96_rand + 12345U; + return (s->x96_rand & 0x7fffffff) - 0x40000000; +} + +static int parse_x96_subframe_audio(DCACoreDecoder *s, int sf, int xch_base, int *sub_pos) +{ + int n, ssf, ch, band, ofs; + + // Check number of subband samples in this subframe + int nsamples = s->nsubsubframes[sf] * DCA_SUBBAND_SAMPLES; + if (*sub_pos + nsamples > s->npcmblocks) { + av_log(s->avctx, AV_LOG_ERROR, "Subband sample buffer overflow\n"); + return AVERROR_INVALIDDATA; + } + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + // VQ encoded or unallocated subbands + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) { + // Get the sample pointer and scale factor + int32_t *samples = s->x96_subband_samples[ch][band] + *sub_pos; + int32_t scale = s->scale_factors[ch][band >> 1][band & 1]; + + switch (s->bit_allocation[ch][band]) { + case 0: // No bits allocated for subband + if (scale <= 1) + memset(samples, 0, nsamples * sizeof(int32_t)); + else for (n = 0; n < nsamples; n++) + // Generate scaled random samples + samples[n] = mul31(rand_x96(s), scale); + break; + + case 1: // VQ encoded subband + for (ssf = 0; ssf < (s->nsubsubframes[sf] + 1) / 2; ssf++) { + // Extract the VQ address from the bit stream and look up + // the VQ code book for up to 16 subband samples + const int8_t *vq_samples = ff_dca_high_freq_vq[get_bits(&s->gb, 10)]; + // Scale and take the samples + for (n = 0; n < FFMIN(nsamples - ssf * 16, 16); n++) + *samples++ = clip23(vq_samples[n] * scale + (1 << 3) >> 4); + } + break; + } + } + } + + // Audio data + for (ssf = 0, ofs = *sub_pos; ssf < s->nsubsubframes[sf]; ssf++) { + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) { + int ret, abits = s->bit_allocation[ch][band] - 1; + int32_t audio[DCA_SUBBAND_SAMPLES], step_size, scale; + + // Not VQ encoded or unallocated subbands + if (abits < 1) + continue; + + // Extract bits from the bit stream + if ((ret = extract_audio(s, audio, abits, ch)) < 0) + return ret; + + // Select quantization step size table and look up quantization + // step size + if (s->bit_rate == 3) + step_size = ff_dca_lossless_quant[abits]; + else + step_size = ff_dca_lossy_quant[abits]; + + // Get the scale factor + scale = s->scale_factors[ch][band >> 1][band & 1]; + + dequantize(s->x96_subband_samples[ch][band] + ofs, + audio, step_size, scale, 0); + } + } + + // DSYNC + if ((ssf == s->nsubsubframes[sf] - 1 || s->sync_ssf) && get_bits(&s->gb, 16) != 0xffff) { + av_log(s->avctx, AV_LOG_ERROR, "X96-DSYNC check failed\n"); + return AVERROR_INVALIDDATA; + } + + ofs += DCA_SUBBAND_SAMPLES; + } + + // Inverse ADPCM + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + inverse_adpcm(s->x96_subband_samples[ch], s->prediction_vq_index[ch], + s->prediction_mode[ch], s->x96_subband_start, s->nsubbands[ch], + *sub_pos, nsamples); + } + + // Joint subband coding + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + int src_ch = s->joint_intensity_index[ch] - 1; + if (src_ch >= 0) { + s->dcadsp->decode_joint(s->x96_subband_samples[ch], s->x96_subband_samples[src_ch], + s->joint_scale_factors[ch], s->nsubbands[ch], + s->nsubbands[src_ch], *sub_pos, nsamples); + } + } + + // Advance subband sample pointer for the next subframe + *sub_pos = ofs; + return 0; +} + +static void erase_x96_adpcm_history(DCACoreDecoder *s) +{ + int ch, band; + + // Erase ADPCM history from previous frame if + // predictor history switch was disabled + for (ch = 0; ch < DCA_CHANNELS; ch++) + for (band = 0; band < DCA_SUBBANDS_X96; band++) + AV_ZERO128(s->x96_subband_samples[ch][band] - DCA_ADPCM_COEFFS); +} + +static int alloc_x96_sample_buffer(DCACoreDecoder *s) +{ + int nchsamples = DCA_ADPCM_COEFFS + s->npcmblocks; + int nframesamples = nchsamples * DCA_CHANNELS * DCA_SUBBANDS_X96; + unsigned int size = s->x96_subband_size; + int ch, band; + + // Reallocate subband sample buffer + av_fast_mallocz(&s->x96_subband_buffer, &s->x96_subband_size, + nframesamples * sizeof(int32_t)); + if (!s->x96_subband_buffer) + return AVERROR(ENOMEM); + + if (size != s->x96_subband_size) { + for (ch = 0; ch < DCA_CHANNELS; ch++) + for (band = 0; band < DCA_SUBBANDS_X96; band++) + s->x96_subband_samples[ch][band] = s->x96_subband_buffer + + (ch * DCA_SUBBANDS_X96 + band) * nchsamples + DCA_ADPCM_COEFFS; + } + + if (!s->predictor_history) + erase_x96_adpcm_history(s); + + return 0; +} + +static int parse_x96_subframe_header(DCACoreDecoder *s, int xch_base) +{ + int ch, band, ret; + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + // Prediction mode + for (ch = xch_base; ch < s->x96_nchannels; ch++) + for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) + s->prediction_mode[ch][band] = get_bits1(&s->gb); + + // Prediction coefficients VQ address + for (ch = xch_base; ch < s->x96_nchannels; ch++) + for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) + if (s->prediction_mode[ch][band]) + s->prediction_vq_index[ch][band] = get_bits(&s->gb, 12); + + // Bit allocation index + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + int sel = s->bit_allocation_sel[ch]; + int abits = 0; + + for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) { + // If Huffman code was used, the difference of abits was encoded + if (sel < 7) + abits += get_vlc(&s->gb, &vlc_quant_index[5 + 2 * s->x96_high_res], sel); + else + abits = get_bits(&s->gb, 3 + s->x96_high_res); + + if (abits < 0 || abits > 7 + 8 * s->x96_high_res) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 bit allocation index\n"); + return AVERROR_INVALIDDATA; + } + + s->bit_allocation[ch][band] = abits; + } + } + + // Scale factors + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + int sel = s->scale_factor_sel[ch]; + int scale_index = 0; + + // Extract scales for subbands which are transmitted even for + // unallocated subbands + for (band = s->x96_subband_start; band < s->nsubbands[ch]; band++) { + if ((ret = parse_scale(s, &scale_index, sel)) < 0) + return ret; + s->scale_factors[ch][band >> 1][band & 1] = ret; + } + } + + // Joint subband codebook select + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + if (s->joint_intensity_index[ch]) { + s->joint_scale_sel[ch] = get_bits(&s->gb, 3); + if (s->joint_scale_sel[ch] == 7) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 joint scale factor code book\n"); + return AVERROR_INVALIDDATA; + } + } + } + + // Scale factors for joint subband coding + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + int src_ch = s->joint_intensity_index[ch] - 1; + if (src_ch >= 0) { + int sel = s->joint_scale_sel[ch]; + for (band = s->nsubbands[ch]; band < s->nsubbands[src_ch]; band++) { + if ((ret = parse_joint_scale(s, sel)) < 0) + return ret; + s->joint_scale_factors[ch][band] = ret; + } + } + } + + // Side information CRC check word + if (s->crc_present) + skip_bits(&s->gb, 16); + + return 0; +} + +static int parse_x96_coding_header(DCACoreDecoder *s, int exss, int xch_base) +{ + int n, ch, header_size = 0, header_pos = get_bits_count(&s->gb); + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + if (exss) { + // Channel set header length + header_size = get_bits(&s->gb, 7) + 1; + + // Check CRC + if (s->x96_crc_present + && (s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 channel set header checksum\n"); + return AVERROR_INVALIDDATA; + } + } + + // High resolution flag + s->x96_high_res = get_bits1(&s->gb); + + // First encoded subband + if (s->x96_rev_no < 8) { + s->x96_subband_start = get_bits(&s->gb, 5); + if (s->x96_subband_start > 27) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 subband start index (%d)\n", s->x96_subband_start); + return AVERROR_INVALIDDATA; + } + } else { + s->x96_subband_start = DCA_SUBBANDS; + } + + // Subband activity count + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + s->nsubbands[ch] = get_bits(&s->gb, 6) + 1; + if (s->nsubbands[ch] < DCA_SUBBANDS) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 subband activity count (%d)\n", s->nsubbands[ch]); + return AVERROR_INVALIDDATA; + } + } + + // Joint intensity coding index + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + if ((n = get_bits(&s->gb, 3)) && xch_base) + n += xch_base - 1; + if (n > s->x96_nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 joint intensity coding index\n"); + return AVERROR_INVALIDDATA; + } + s->joint_intensity_index[ch] = n; + } + + // Scale factor code book + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + s->scale_factor_sel[ch] = get_bits(&s->gb, 3); + if (s->scale_factor_sel[ch] >= 6) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 scale factor code book\n"); + return AVERROR_INVALIDDATA; + } + } + + // Bit allocation quantizer select + for (ch = xch_base; ch < s->x96_nchannels; ch++) + s->bit_allocation_sel[ch] = get_bits(&s->gb, 3); + + // Quantization index codebook select + for (n = 0; n < 6 + 4 * s->x96_high_res; n++) + for (ch = xch_base; ch < s->x96_nchannels; ch++) + s->quant_index_sel[ch][n] = get_bits(&s->gb, quant_index_sel_nbits[n]); + + if (exss) { + // Reserved + // Byte align + // CRC16 of channel set header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 channel set header\n"); + return AVERROR_INVALIDDATA; + } + } else { + if (s->crc_present) + skip_bits(&s->gb, 16); + } + + return 0; +} + +static int parse_x96_frame_data(DCACoreDecoder *s, int exss, int xch_base) +{ + int sf, ch, ret, band, sub_pos; + + if ((ret = parse_x96_coding_header(s, exss, xch_base)) < 0) + return ret; + + for (sf = 0, sub_pos = 0; sf < s->nsubframes; sf++) { + if ((ret = parse_x96_subframe_header(s, xch_base)) < 0) + return ret; + if ((ret = parse_x96_subframe_audio(s, sf, xch_base, &sub_pos)) < 0) + return ret; + } + + for (ch = xch_base; ch < s->x96_nchannels; ch++) { + // Determine number of active subbands for this channel + int nsubbands = s->nsubbands[ch]; + if (s->joint_intensity_index[ch]) + nsubbands = FFMAX(nsubbands, s->nsubbands[s->joint_intensity_index[ch] - 1]); + + // Update history for ADPCM and clear inactive subbands + for (band = 0; band < DCA_SUBBANDS_X96; band++) { + int32_t *samples = s->x96_subband_samples[ch][band] - DCA_ADPCM_COEFFS; + if (band >= s->x96_subband_start && band < nsubbands) + AV_COPY128(samples, samples + s->npcmblocks); + else + memset(samples, 0, (DCA_ADPCM_COEFFS + s->npcmblocks) * sizeof(int32_t)); + } + } + + return 0; +} + +static int parse_x96_frame(DCACoreDecoder *s) +{ + int ret; + + // Revision number + s->x96_rev_no = get_bits(&s->gb, 4); + if (s->x96_rev_no < 1 || s->x96_rev_no > 8) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 revision (%d)\n", s->x96_rev_no); + return AVERROR_INVALIDDATA; + } + + s->x96_crc_present = 0; + s->x96_nchannels = s->nchannels; + + if ((ret = alloc_x96_sample_buffer(s)) < 0) + return ret; + + if ((ret = parse_x96_frame_data(s, 0, 0)) < 0) + return ret; + + // Seek to the end of core frame + if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 frame\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int parse_x96_frame_exss(DCACoreDecoder *s) +{ + int x96_frame_size[DCA_EXSS_CHSETS_MAX]; + int x96_nchannels[DCA_EXSS_CHSETS_MAX]; + int x96_nchsets, x96_base_ch; + int i, ret, header_size, header_pos = get_bits_count(&s->gb); + + // X96 sync word + if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_X96) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 sync word\n"); + return AVERROR_INVALIDDATA; + } + + // X96 frame header length + header_size = get_bits(&s->gb, 6) + 1; + + // Check X96 frame header CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos + 32, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 frame header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Revision number + s->x96_rev_no = get_bits(&s->gb, 4); + if (s->x96_rev_no < 1 || s->x96_rev_no > 8) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid X96 revision (%d)\n", s->x96_rev_no); + return AVERROR_INVALIDDATA; + } + + // CRC presence flag for channel set header + s->x96_crc_present = get_bits1(&s->gb); + + // Number of channel sets + x96_nchsets = get_bits(&s->gb, 2) + 1; + + // Channel set data byte size + for (i = 0; i < x96_nchsets; i++) + x96_frame_size[i] = get_bits(&s->gb, 12) + 1; + + // Number of channels in channel set + for (i = 0; i < x96_nchsets; i++) + x96_nchannels[i] = get_bits(&s->gb, 3) + 1; + + // Reserved + // Byte align + // CRC16 of X96 frame header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 frame header\n"); + return AVERROR_INVALIDDATA; + } + + if ((ret = alloc_x96_sample_buffer(s)) < 0) + return ret; + + // Channel set data + for (i = 0, x96_base_ch = 0; i < x96_nchsets; i++) { + header_pos = get_bits_count(&s->gb); + + if (x96_base_ch + x96_nchannels[i] <= s->nchannels) { + s->x96_nchannels = x96_base_ch + x96_nchannels[i]; + if ((ret = parse_x96_frame_data(s, 1, x96_base_ch)) < 0) + return ret; + } + + x96_base_ch += x96_nchannels[i]; + + if (ff_dca_seek_bits(&s->gb, header_pos + x96_frame_size[i] * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of X96 channel set\n"); + return AVERROR_INVALIDDATA; + } + } + + return 0; +} + +static int parse_aux_data(DCACoreDecoder *s) +{ + int aux_pos; + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + // Auxiliary data byte count (can't be trusted) + skip_bits(&s->gb, 6); + + // 4-byte align + skip_bits_long(&s->gb, -get_bits_count(&s->gb) & 31); + + // Auxiliary data sync word + if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_REV1AUX) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid auxiliary data sync word\n"); + return AVERROR_INVALIDDATA; + } + + aux_pos = get_bits_count(&s->gb); + + // Auxiliary decode time stamp flag + if (get_bits1(&s->gb)) + skip_bits_long(&s->gb, 47); + + // Auxiliary dynamic downmix flag + if (s->prim_dmix_embedded = get_bits1(&s->gb)) { + int i, m, n; + + // Auxiliary primary channel downmix type + s->prim_dmix_type = get_bits(&s->gb, 3); + if (s->prim_dmix_type >= DCA_DMIX_TYPE_COUNT) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid primary channel set downmix type\n"); + return AVERROR_INVALIDDATA; + } + + // Size of downmix coefficients matrix + m = ff_dca_dmix_primary_nch[s->prim_dmix_type]; + n = ff_dca_channels[s->audio_mode] + !!s->lfe_present; + + // Dynamic downmix code coefficients + for (i = 0; i < m * n; i++) { + int code = get_bits(&s->gb, 9); + int sign = (code >> 8) - 1; + unsigned int index = code & 0xff; + if (index >= FF_DCA_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid downmix coefficient index\n"); + return AVERROR_INVALIDDATA; + } + s->prim_dmix_coeff[i] = (ff_dca_dmixtable[index] ^ sign) - sign; + } + } + + // Byte align + skip_bits(&s->gb, -get_bits_count(&s->gb) & 7); + + // CRC16 of auxiliary data + skip_bits(&s->gb, 16); + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, aux_pos, get_bits_count(&s->gb))) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid auxiliary data checksum\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int parse_optional_info(DCACoreDecoder *s) +{ + DCAContext *dca = s->avctx->priv_data; + int ret = -1; + + // Time code stamp + if (s->ts_present) + skip_bits_long(&s->gb, 32); + + // Auxiliary data + if (s->aux_present && (ret = parse_aux_data(s)) < 0 + && (s->avctx->err_recognition & AV_EF_EXPLODE)) + return ret; + + if (ret < 0) + s->prim_dmix_embedded = 0; + + // Core extensions + if (s->ext_audio_present && !dca->core_only) { + int sync_pos = FFMIN(s->frame_size / 4, s->gb.size_in_bits / 32) - 1; + int last_pos = get_bits_count(&s->gb) / 32; + int size, dist; + + // Search for extension sync words aligned on 4-byte boundary. Search + // must be done backwards from the end of core frame to work around + // sync word aliasing issues. + switch (s->ext_audio_type) { + case EXT_AUDIO_XCH: + if (dca->request_channel_layout) + break; + + // The distance between XCH sync word and end of the core frame + // must be equal to XCH frame size. Off by one error is allowed for + // compatibility with legacy bitstreams. Minimum XCH frame size is + // 96 bytes. AMODE and PCHS are further checked to reduce + // probability of alias sync detection. + for (; sync_pos >= last_pos; sync_pos--) { + if (AV_RB32(s->gb.buffer + sync_pos * 4) == DCA_SYNCWORD_XCH) { + s->gb.index = (sync_pos + 1) * 32; + size = get_bits(&s->gb, 10) + 1; + dist = s->frame_size - sync_pos * 4; + if (size >= 96 + && (size == dist || size - 1 == dist) + && get_bits(&s->gb, 7) == 0x08) { + s->xch_pos = get_bits_count(&s->gb); + break; + } + } + } + + if (s->avctx->err_recognition & AV_EF_EXPLODE) { + av_log(s->avctx, AV_LOG_ERROR, "XCH sync word not found\n"); + return AVERROR_INVALIDDATA; + } + break; + + case EXT_AUDIO_X96: + // The distance between X96 sync word and end of the core frame + // must be equal to X96 frame size. Minimum X96 frame size is 96 + // bytes. + for (; sync_pos >= last_pos; sync_pos--) { + if (AV_RB32(s->gb.buffer + sync_pos * 4) == DCA_SYNCWORD_X96) { + s->gb.index = (sync_pos + 1) * 32; + size = get_bits(&s->gb, 12) + 1; + dist = s->frame_size - sync_pos * 4; + if (size >= 96 && size == dist) { + s->x96_pos = get_bits_count(&s->gb); + break; + } + } + } + + if (s->avctx->err_recognition & AV_EF_EXPLODE) { + av_log(s->avctx, AV_LOG_ERROR, "X96 sync word not found\n"); + return AVERROR_INVALIDDATA; + } + break; + + case EXT_AUDIO_XXCH: + if (dca->request_channel_layout) + break; + + // XXCH frame header CRC must be valid. Minimum XXCH frame header + // size is 11 bytes. + for (; sync_pos >= last_pos; sync_pos--) { + if (AV_RB32(s->gb.buffer + sync_pos * 4) == DCA_SYNCWORD_XXCH) { + s->gb.index = (sync_pos + 1) * 32; + size = get_bits(&s->gb, 6) + 1; + if (size >= 11 && + !ff_dca_check_crc(&s->gb, (sync_pos + 1) * 32, + sync_pos * 32 + size * 8)) { + s->xxch_pos = sync_pos * 32; + break; + } + } + } + + if (s->avctx->err_recognition & AV_EF_EXPLODE) { + av_log(s->avctx, AV_LOG_ERROR, "XXCH sync word not found\n"); + return AVERROR_INVALIDDATA; + } + break; + } + } + + return 0; +} + +int ff_dca_core_parse(DCACoreDecoder *s, uint8_t *data, int size) +{ + int ret; + + s->ext_audio_mask = 0; + s->xch_pos = s->xxch_pos = s->x96_pos = 0; + + if ((ret = init_get_bits8(&s->gb, data, size)) < 0) + return ret; + + skip_bits_long(&s->gb, 32); + if ((ret = parse_frame_header(s)) < 0) + return ret; + if ((ret = alloc_sample_buffer(s)) < 0) + return ret; + if ((ret = parse_frame_data(s, HEADER_CORE, 0)) < 0) + return ret; + if ((ret = parse_optional_info(s)) < 0) + return ret; + + // Workaround for DTS in WAV + if (s->frame_size > size && s->frame_size < size + 4) { + av_log(s->avctx, AV_LOG_DEBUG, "Working around excessive core frame size (%d > %d)\n", s->frame_size, size); + s->frame_size = size; + } + + if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of core frame\n"); + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return AVERROR_INVALIDDATA; + } + + return 0; +} + +int ff_dca_core_parse_exss(DCACoreDecoder *s, uint8_t *data, DCAExssAsset *asset) +{ + AVCodecContext *avctx = s->avctx; + DCAContext *dca = avctx->priv_data; + GetBitContext gb = s->gb; + int exss_mask = asset ? asset->extension_mask : 0; + int ret = 0, ext = 0; + + // Parse (X)XCH unless downmixing + if (!dca->request_channel_layout) { + if (exss_mask & DCA_EXSS_XXCH) { + if ((ret = init_get_bits8(&s->gb, data + asset->xxch_offset, asset->xxch_size)) < 0) + return ret; + ret = parse_xxch_frame(s); + ext = DCA_EXSS_XXCH; + } else if (s->xxch_pos) { + s->gb.index = s->xxch_pos; + ret = parse_xxch_frame(s); + ext = DCA_CSS_XXCH; + } else if (s->xch_pos) { + s->gb.index = s->xch_pos; + ret = parse_xch_frame(s); + ext = DCA_CSS_XCH; + } + + // Revert to primary channel set in case (X)XCH parsing fails + if (ret < 0) { + if (avctx->err_recognition & AV_EF_EXPLODE) + return ret; + s->nchannels = ff_dca_channels[s->audio_mode]; + s->ch_mask = audio_mode_ch_mask[s->audio_mode]; + if (s->lfe_present) + s->ch_mask |= DCA_SPEAKER_MASK_LFE1; + } else { + s->ext_audio_mask |= ext; + } + } + + // Parse XBR + if (exss_mask & DCA_EXSS_XBR) { + if ((ret = init_get_bits8(&s->gb, data + asset->xbr_offset, asset->xbr_size)) < 0) + return ret; + if ((ret = parse_xbr_frame(s)) < 0) { + if (avctx->err_recognition & AV_EF_EXPLODE) + return ret; + } else { + s->ext_audio_mask |= DCA_EXSS_XBR; + } + } + + // Parse X96 unless decoding XLL + if (!(dca->packet & DCA_PACKET_XLL)) { + if (exss_mask & DCA_EXSS_X96) { + if ((ret = init_get_bits8(&s->gb, data + asset->x96_offset, asset->x96_size)) < 0) + return ret; + if ((ret = parse_x96_frame_exss(s)) < 0) { + if (ret == AVERROR(ENOMEM) || (avctx->err_recognition & AV_EF_EXPLODE)) + return ret; + } else { + s->ext_audio_mask |= DCA_EXSS_X96; + } + } else if (s->x96_pos) { + s->gb = gb; + s->gb.index = s->x96_pos; + if ((ret = parse_x96_frame(s)) < 0) { + if (ret == AVERROR(ENOMEM) || (avctx->err_recognition & AV_EF_EXPLODE)) + return ret; + } else { + s->ext_audio_mask |= DCA_CSS_X96; + } + } + } + + return 0; +} + +static int map_prm_ch_to_spkr(DCACoreDecoder *s, int ch) +{ + int pos, spkr; + + // Try to map this channel to core first + pos = ff_dca_channels[s->audio_mode]; + if (ch < pos) { + spkr = prm_ch_to_spkr_map[s->audio_mode][ch]; + if (s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH)) { + if (s->xxch_core_mask & (1U << spkr)) + return spkr; + if (spkr == DCA_SPEAKER_Ls && (s->xxch_core_mask & DCA_SPEAKER_MASK_Lss)) + return DCA_SPEAKER_Lss; + if (spkr == DCA_SPEAKER_Rs && (s->xxch_core_mask & DCA_SPEAKER_MASK_Rss)) + return DCA_SPEAKER_Rss; + return -1; + } + return spkr; + } + + // Then XCH + if ((s->ext_audio_mask & DCA_CSS_XCH) && ch == pos) + return DCA_SPEAKER_Cs; + + // Then XXCH + if (s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH)) { + for (spkr = DCA_SPEAKER_Cs; spkr < s->xxch_mask_nbits; spkr++) + if (s->xxch_spkr_mask & (1U << spkr)) + if (pos++ == ch) + return spkr; + } + + // No mapping + return -1; +} + +static void erase_dsp_history(DCACoreDecoder *s) +{ + memset(s->dcadsp_data, 0, sizeof(s->dcadsp_data)); + s->output_history_lfe_fixed = 0; + s->output_history_lfe_float = 0; +} + +static void set_filter_mode(DCACoreDecoder *s, int mode) +{ + if (s->filter_mode != mode) { + erase_dsp_history(s); + s->filter_mode = mode; + } +} + +int ff_dca_core_filter_fixed(DCACoreDecoder *s, int x96_synth) +{ + int n, ch, spkr, nsamples, x96_nchannels = 0; + const int32_t *filter_coeff; + int32_t *ptr; + + // Externally set x96_synth flag implies that X96 synthesis should be + // enabled, yet actual X96 subband data should be discarded. This is a + // special case for lossless residual decoder that ignores X96 data if + // present. + if (!x96_synth && (s->ext_audio_mask & (DCA_CSS_X96 | DCA_EXSS_X96))) { + x96_nchannels = s->x96_nchannels; + x96_synth = 1; + } + if (x96_synth < 0) + x96_synth = 0; + + s->output_rate = s->sample_rate << x96_synth; + s->npcmsamples = nsamples = (s->npcmblocks * DCA_PCMBLOCK_SAMPLES) << x96_synth; + + // Reallocate PCM output buffer + av_fast_malloc(&s->output_buffer, &s->output_size, + nsamples * av_popcount(s->ch_mask) * sizeof(int32_t)); + if (!s->output_buffer) + return AVERROR(ENOMEM); + + ptr = (int32_t *)s->output_buffer; + for (spkr = 0; spkr < DCA_SPEAKER_COUNT; spkr++) { + if (s->ch_mask & (1U << spkr)) { + s->output_samples[spkr] = ptr; + ptr += nsamples; + } else { + s->output_samples[spkr] = NULL; + } + } + + // Handle change of filtering mode + set_filter_mode(s, x96_synth | DCA_FILTER_MODE_FIXED); + + // Select filter + if (x96_synth) + filter_coeff = ff_dca_fir_64bands_fixed; + else if (s->filter_perfect) + filter_coeff = ff_dca_fir_32bands_perfect_fixed; + else + filter_coeff = ff_dca_fir_32bands_nonperfect_fixed; + + // Filter primary channels + for (ch = 0; ch < s->nchannels; ch++) { + // Map this primary channel to speaker + spkr = map_prm_ch_to_spkr(s, ch); + if (spkr < 0) + return AVERROR(EINVAL); + + // Filter bank reconstruction + s->dcadsp->sub_qmf_fixed[x96_synth]( + &s->synth, + &s->dcadct, + s->output_samples[spkr], + s->subband_samples[ch], + ch < x96_nchannels ? s->x96_subband_samples[ch] : NULL, + s->dcadsp_data[ch].u.fix.hist1, + &s->dcadsp_data[ch].offset, + s->dcadsp_data[ch].u.fix.hist2, + filter_coeff, + s->npcmblocks); + } + + // Filter LFE channel + if (s->lfe_present) { + int32_t *samples = s->output_samples[DCA_SPEAKER_LFE1]; + int nlfesamples = s->npcmblocks >> 1; + + // Check LFF + if (s->lfe_present == LFE_FLAG_128) { + av_log(s->avctx, AV_LOG_ERROR, "Fixed point mode doesn't support LFF=1\n"); + return AVERROR(EINVAL); + } + + // Offset intermediate buffer for X96 + if (x96_synth) + samples += nsamples / 2; + + // Interpolate LFE channel + s->dcadsp->lfe_fir_fixed(samples, s->lfe_samples + DCA_LFE_HISTORY, + ff_dca_lfe_fir_64_fixed, s->npcmblocks); + + if (x96_synth) { + // Filter 96 kHz oversampled LFE PCM to attenuate high frequency + // (47.6 - 48.0 kHz) components of interpolation image + s->dcadsp->lfe_x96_fixed(s->output_samples[DCA_SPEAKER_LFE1], + samples, &s->output_history_lfe_fixed, + nsamples / 2); + + } + + // Update LFE history + for (n = DCA_LFE_HISTORY - 1; n >= 0; n--) + s->lfe_samples[n] = s->lfe_samples[nlfesamples + n]; + } + + return 0; +} + +static int filter_frame_fixed(DCACoreDecoder *s, AVFrame *frame) +{ + AVCodecContext *avctx = s->avctx; + DCAContext *dca = avctx->priv_data; + int i, n, ch, ret, spkr, nsamples; + + // Don't filter twice when falling back from XLL + if (!(dca->packet & DCA_PACKET_XLL) && (ret = ff_dca_core_filter_fixed(s, 0)) < 0) + return ret; + + avctx->sample_rate = s->output_rate; + avctx->sample_fmt = AV_SAMPLE_FMT_S32P; + avctx->bits_per_raw_sample = 24; + + frame->nb_samples = nsamples = s->npcmsamples; + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + + // Undo embedded XCH downmix + if (s->es_format && (s->ext_audio_mask & DCA_CSS_XCH) + && s->audio_mode >= AMODE_2F2R) { + s->dcadsp->dmix_sub_xch(s->output_samples[DCA_SPEAKER_Ls], + s->output_samples[DCA_SPEAKER_Rs], + s->output_samples[DCA_SPEAKER_Cs], + nsamples); + + } + + // Undo embedded XXCH downmix + if ((s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH)) + && s->xxch_dmix_embedded) { + int scale_inv = s->xxch_dmix_scale_inv; + int *coeff_ptr = s->xxch_dmix_coeff; + int xch_base = ff_dca_channels[s->audio_mode]; + av_assert1(s->nchannels - xch_base <= DCA_XXCH_CHANNELS_MAX); + + // Undo embedded core downmix pre-scaling + for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) { + if (s->xxch_core_mask & (1U << spkr)) { + s->dcadsp->dmix_scale_inv(s->output_samples[spkr], + scale_inv, nsamples); + } + } + + // Undo downmix + for (ch = xch_base; ch < s->nchannels; ch++) { + int src_spkr = map_prm_ch_to_spkr(s, ch); + if (src_spkr < 0) + return AVERROR(EINVAL); + for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) { + if (s->xxch_dmix_mask[ch - xch_base] & (1U << spkr)) { + int coeff = mul16(*coeff_ptr++, scale_inv); + if (coeff) { + s->dcadsp->dmix_sub(s->output_samples[spkr ], + s->output_samples[src_spkr], + coeff, nsamples); + } + } + } + } + } + + if (!(s->ext_audio_mask & (DCA_CSS_XXCH | DCA_CSS_XCH | DCA_EXSS_XXCH))) { + // Front sum/difference decoding + if ((s->sumdiff_front && s->audio_mode > AMODE_MONO) + || s->audio_mode == AMODE_STEREO_SUMDIFF) { + s->fixed_dsp->butterflies_fixed(s->output_samples[DCA_SPEAKER_L], + s->output_samples[DCA_SPEAKER_R], + nsamples); + } + + // Surround sum/difference decoding + if (s->sumdiff_surround && s->audio_mode >= AMODE_2F2R) { + s->fixed_dsp->butterflies_fixed(s->output_samples[DCA_SPEAKER_Ls], + s->output_samples[DCA_SPEAKER_Rs], + nsamples); + } + } + + // Downmix primary channel set to stereo + if (s->request_mask != s->ch_mask) { + ff_dca_downmix_to_stereo_fixed(s->dcadsp, + s->output_samples, + s->prim_dmix_coeff, + nsamples, s->ch_mask); + } + + for (i = 0; i < avctx->channels; i++) { + int32_t *samples = s->output_samples[s->ch_remap[i]]; + int32_t *plane = (int32_t *)frame->extended_data[i]; + for (n = 0; n < nsamples; n++) + plane[n] = clip23(samples[n]) * (1 << 8); + } + + return 0; +} + +static int filter_frame_float(DCACoreDecoder *s, AVFrame *frame) +{ + AVCodecContext *avctx = s->avctx; + int x96_nchannels = 0, x96_synth = 0; + int i, n, ch, ret, spkr, nsamples, nchannels; + float *output_samples[DCA_SPEAKER_COUNT] = { NULL }, *ptr; + const float *filter_coeff; + + if (s->ext_audio_mask & (DCA_CSS_X96 | DCA_EXSS_X96)) { + x96_nchannels = s->x96_nchannels; + x96_synth = 1; + } + + avctx->sample_rate = s->sample_rate << x96_synth; + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; + avctx->bits_per_raw_sample = 0; + + frame->nb_samples = nsamples = (s->npcmblocks * DCA_PCMBLOCK_SAMPLES) << x96_synth; + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + + // Build reverse speaker to channel mapping + for (i = 0; i < avctx->channels; i++) + output_samples[s->ch_remap[i]] = (float *)frame->extended_data[i]; + + // Allocate space for extra channels + nchannels = av_popcount(s->ch_mask) - avctx->channels; + if (nchannels > 0) { + av_fast_malloc(&s->output_buffer, &s->output_size, + nsamples * nchannels * sizeof(float)); + if (!s->output_buffer) + return AVERROR(ENOMEM); + + ptr = (float *)s->output_buffer; + for (spkr = 0; spkr < DCA_SPEAKER_COUNT; spkr++) { + if (!(s->ch_mask & (1U << spkr))) + continue; + if (output_samples[spkr]) + continue; + output_samples[spkr] = ptr; + ptr += nsamples; + } + } + + // Handle change of filtering mode + set_filter_mode(s, x96_synth); + + // Select filter + if (x96_synth) + filter_coeff = ff_dca_fir_64bands; + else if (s->filter_perfect) + filter_coeff = ff_dca_fir_32bands_perfect; + else + filter_coeff = ff_dca_fir_32bands_nonperfect; + + // Filter primary channels + for (ch = 0; ch < s->nchannels; ch++) { + // Map this primary channel to speaker + spkr = map_prm_ch_to_spkr(s, ch); + if (spkr < 0) + return AVERROR(EINVAL); + + // Filter bank reconstruction + s->dcadsp->sub_qmf_float[x96_synth]( + &s->synth, + &s->imdct[x96_synth], + output_samples[spkr], + s->subband_samples[ch], + ch < x96_nchannels ? s->x96_subband_samples[ch] : NULL, + s->dcadsp_data[ch].u.flt.hist1, + &s->dcadsp_data[ch].offset, + s->dcadsp_data[ch].u.flt.hist2, + filter_coeff, + s->npcmblocks, + 1.0f / (1 << (17 - x96_synth))); + } + + // Filter LFE channel + if (s->lfe_present) { + int dec_select = (s->lfe_present == LFE_FLAG_128); + float *samples = output_samples[DCA_SPEAKER_LFE1]; + int nlfesamples = s->npcmblocks >> (dec_select + 1); + + // Offset intermediate buffer for X96 + if (x96_synth) + samples += nsamples / 2; + + // Select filter + if (dec_select) + filter_coeff = ff_dca_lfe_fir_128; + else + filter_coeff = ff_dca_lfe_fir_64; + + // Interpolate LFE channel + s->dcadsp->lfe_fir_float[dec_select]( + samples, s->lfe_samples + DCA_LFE_HISTORY, + filter_coeff, s->npcmblocks); + + if (x96_synth) { + // Filter 96 kHz oversampled LFE PCM to attenuate high frequency + // (47.6 - 48.0 kHz) components of interpolation image + s->dcadsp->lfe_x96_float(output_samples[DCA_SPEAKER_LFE1], + samples, &s->output_history_lfe_float, + nsamples / 2); + } + + // Update LFE history + for (n = DCA_LFE_HISTORY - 1; n >= 0; n--) + s->lfe_samples[n] = s->lfe_samples[nlfesamples + n]; + } + + // Undo embedded XCH downmix + if (s->es_format && (s->ext_audio_mask & DCA_CSS_XCH) + && s->audio_mode >= AMODE_2F2R) { + s->float_dsp->vector_fmac_scalar(output_samples[DCA_SPEAKER_Ls], + output_samples[DCA_SPEAKER_Cs], + -M_SQRT1_2, nsamples); + s->float_dsp->vector_fmac_scalar(output_samples[DCA_SPEAKER_Rs], + output_samples[DCA_SPEAKER_Cs], + -M_SQRT1_2, nsamples); + } + + // Undo embedded XXCH downmix + if ((s->ext_audio_mask & (DCA_CSS_XXCH | DCA_EXSS_XXCH)) + && s->xxch_dmix_embedded) { + float scale_inv = s->xxch_dmix_scale_inv * (1.0f / (1 << 16)); + int *coeff_ptr = s->xxch_dmix_coeff; + int xch_base = ff_dca_channels[s->audio_mode]; + av_assert1(s->nchannels - xch_base <= DCA_XXCH_CHANNELS_MAX); + + // Undo downmix + for (ch = xch_base; ch < s->nchannels; ch++) { + int src_spkr = map_prm_ch_to_spkr(s, ch); + if (src_spkr < 0) + return AVERROR(EINVAL); + for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) { + if (s->xxch_dmix_mask[ch - xch_base] & (1U << spkr)) { + int coeff = *coeff_ptr++; + if (coeff) { + s->float_dsp->vector_fmac_scalar(output_samples[ spkr], + output_samples[src_spkr], + coeff * (-1.0f / (1 << 15)), + nsamples); + } + } + } + } + + // Undo embedded core downmix pre-scaling + for (spkr = 0; spkr < s->xxch_mask_nbits; spkr++) { + if (s->xxch_core_mask & (1U << spkr)) { + s->float_dsp->vector_fmul_scalar(output_samples[spkr], + output_samples[spkr], + scale_inv, nsamples); + } + } + } + + if (!(s->ext_audio_mask & (DCA_CSS_XXCH | DCA_CSS_XCH | DCA_EXSS_XXCH))) { + // Front sum/difference decoding + if ((s->sumdiff_front && s->audio_mode > AMODE_MONO) + || s->audio_mode == AMODE_STEREO_SUMDIFF) { + s->float_dsp->butterflies_float(output_samples[DCA_SPEAKER_L], + output_samples[DCA_SPEAKER_R], + nsamples); + } + + // Surround sum/difference decoding + if (s->sumdiff_surround && s->audio_mode >= AMODE_2F2R) { + s->float_dsp->butterflies_float(output_samples[DCA_SPEAKER_Ls], + output_samples[DCA_SPEAKER_Rs], + nsamples); + } + } + + // Downmix primary channel set to stereo + if (s->request_mask != s->ch_mask) { + ff_dca_downmix_to_stereo_float(s->float_dsp, output_samples, + s->prim_dmix_coeff, + nsamples, s->ch_mask); + } + + return 0; +} + +int ff_dca_core_filter_frame(DCACoreDecoder *s, AVFrame *frame) +{ + AVCodecContext *avctx = s->avctx; + DCAContext *dca = avctx->priv_data; + DCAExssAsset *asset = &dca->exss.assets[0]; + enum AVMatrixEncoding matrix_encoding; + int ret; + + // Handle downmixing to stereo request + if (dca->request_channel_layout == DCA_SPEAKER_LAYOUT_STEREO + && s->audio_mode > AMODE_MONO && s->prim_dmix_embedded + && (s->prim_dmix_type == DCA_DMIX_TYPE_LoRo || + s->prim_dmix_type == DCA_DMIX_TYPE_LtRt)) + s->request_mask = DCA_SPEAKER_LAYOUT_STEREO; + else + s->request_mask = s->ch_mask; + if (!ff_dca_set_channel_layout(avctx, s->ch_remap, s->request_mask)) + return AVERROR(EINVAL); + + // Force fixed point mode when falling back from XLL + if ((avctx->flags & AV_CODEC_FLAG_BITEXACT) || ((dca->packet & DCA_PACKET_EXSS) + && (asset->extension_mask & DCA_EXSS_XLL))) + ret = filter_frame_fixed(s, frame); + else + ret = filter_frame_float(s, frame); + if (ret < 0) + return ret; + + // Set profile, bit rate, etc + if (s->ext_audio_mask & DCA_EXSS_MASK) + avctx->profile = FF_PROFILE_DTS_HD_HRA; + else if (s->ext_audio_mask & (DCA_CSS_XXCH | DCA_CSS_XCH)) + avctx->profile = FF_PROFILE_DTS_ES; + else if (s->ext_audio_mask & DCA_CSS_X96) + avctx->profile = FF_PROFILE_DTS_96_24; + else + avctx->profile = FF_PROFILE_DTS; + + if (s->bit_rate > 3 && !(s->ext_audio_mask & DCA_EXSS_MASK)) + avctx->bit_rate = s->bit_rate; + else + avctx->bit_rate = 0; + + if (s->audio_mode == AMODE_STEREO_TOTAL || (s->request_mask != s->ch_mask && + s->prim_dmix_type == DCA_DMIX_TYPE_LtRt)) + matrix_encoding = AV_MATRIX_ENCODING_DOLBY; + else + matrix_encoding = AV_MATRIX_ENCODING_NONE; + if ((ret = ff_side_data_update_matrix_encoding(frame, matrix_encoding)) < 0) + return ret; + + return 0; +} + +av_cold void ff_dca_core_flush(DCACoreDecoder *s) +{ + if (s->subband_buffer) { + erase_adpcm_history(s); + memset(s->lfe_samples, 0, DCA_LFE_HISTORY * sizeof(int32_t)); + } + + if (s->x96_subband_buffer) + erase_x96_adpcm_history(s); + + erase_dsp_history(s); +} + +av_cold int ff_dca_core_init(DCACoreDecoder *s) +{ + dca_init_vlcs(); + + if (!(s->float_dsp = avpriv_float_dsp_alloc(0))) + return -1; + if (!(s->fixed_dsp = avpriv_alloc_fixed_dsp(0))) + return -1; + + ff_dcadct_init(&s->dcadct); + if (ff_mdct_init(&s->imdct[0], 6, 1, 1.0) < 0) + return -1; + if (ff_mdct_init(&s->imdct[1], 7, 1, 1.0) < 0) + return -1; + ff_synth_filter_init(&s->synth); + + s->x96_rand = 1; + return 0; +} + +av_cold void ff_dca_core_close(DCACoreDecoder *s) +{ + av_freep(&s->float_dsp); + av_freep(&s->fixed_dsp); + + ff_mdct_end(&s->imdct[0]); + ff_mdct_end(&s->imdct[1]); + + av_freep(&s->subband_buffer); + s->subband_size = 0; + + av_freep(&s->x96_subband_buffer); + s->x96_subband_size = 0; + + av_freep(&s->output_buffer); + s->output_size = 0; +} diff --git a/libavcodec/dca_core.h b/libavcodec/dca_core.h new file mode 100644 index 0000000000..112b72ba41 --- /dev/null +++ b/libavcodec/dca_core.h @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCA_CORE_H +#define AVCODEC_DCA_CORE_H + +#include "libavutil/common.h" +#include "libavutil/float_dsp.h" +#include "libavutil/fixed_dsp.h" +#include "libavutil/mem.h" + +#include "avcodec.h" +#include "internal.h" +#include "get_bits.h" +#include "dca.h" +#include "dca_exss.h" +#include "dcadsp.h" +#include "dcadct.h" +#include "fft.h" +#include "synth_filter.h" + +#define DCA_CHANNELS 7 +#define DCA_SUBBANDS 32 +#define DCA_SUBBANDS_X96 64 +#define DCA_SUBFRAMES 16 +#define DCA_SUBBAND_SAMPLES 8 +#define DCA_PCMBLOCK_SAMPLES 32 +#define DCA_ADPCM_COEFFS 4 +#define DCA_LFE_HISTORY 8 +#define DCA_CODE_BOOKS 10 +#define DCA_ABITS_MAX 26 + +#define DCA_CORE_CHANNELS_MAX 6 +#define DCA_DMIX_CHANNELS_MAX 4 +#define DCA_XXCH_CHANNELS_MAX 2 +#define DCA_EXSS_CHANNELS_MAX 8 +#define DCA_EXSS_CHSETS_MAX 4 + +#define DCA_FILTER_MODE_X96 0x01 +#define DCA_FILTER_MODE_FIXED 0x02 + +typedef struct DCADSPData { + union { + struct { + DECLARE_ALIGNED(32, float, hist1)[1024]; + DECLARE_ALIGNED(32, float, hist2)[64]; + } flt; + struct { + DECLARE_ALIGNED(32, int32_t, hist1)[1024]; + DECLARE_ALIGNED(32, int32_t, hist2)[64]; + } fix; + } u; + int offset; +} DCADSPData; + +typedef struct DCACoreDecoder { + AVCodecContext *avctx; + GetBitContext gb; + + // Bit stream header + int crc_present; ///< CRC present flag + int npcmblocks; ///< Number of PCM sample blocks + int frame_size; ///< Primary frame byte size + int audio_mode; ///< Audio channel arrangement + int sample_rate; ///< Core audio sampling frequency + int bit_rate; ///< Transmission bit rate + int drc_present; ///< Embedded dynamic range flag + int ts_present; ///< Embedded time stamp flag + int aux_present; ///< Auxiliary data flag + int ext_audio_type; ///< Extension audio descriptor flag + int ext_audio_present; ///< Extended coding flag + int sync_ssf; ///< Audio sync word insertion flag + int lfe_present; ///< Low frequency effects flag + int predictor_history; ///< Predictor history flag switch + int filter_perfect; ///< Multirate interpolator switch + int source_pcm_res; ///< Source PCM resolution + int es_format; ///< Extended surround (ES) mastering flag + int sumdiff_front; ///< Front sum/difference flag + int sumdiff_surround; ///< Surround sum/difference flag + + // Primary audio coding header + int nsubframes; ///< Number of subframes + int nchannels; ///< Number of primary audio channels (incl. extension channels) + int ch_mask; ///< Speaker layout mask (incl. LFE and extension channels) + int8_t nsubbands[DCA_CHANNELS]; ///< Subband activity count + int8_t subband_vq_start[DCA_CHANNELS]; ///< High frequency VQ start subband + int8_t joint_intensity_index[DCA_CHANNELS]; ///< Joint intensity coding index + int8_t transition_mode_sel[DCA_CHANNELS]; ///< Transient mode code book + int8_t scale_factor_sel[DCA_CHANNELS]; ///< Scale factor code book + int8_t bit_allocation_sel[DCA_CHANNELS]; ///< Bit allocation quantizer select + int8_t quant_index_sel[DCA_CHANNELS][DCA_CODE_BOOKS]; ///< Quantization index codebook select + int32_t scale_factor_adj[DCA_CHANNELS][DCA_CODE_BOOKS]; ///< Scale factor adjustment + + // Primary audio coding side information + int8_t nsubsubframes[DCA_SUBFRAMES]; ///< Subsubframe count for each subframe + int8_t prediction_mode[DCA_CHANNELS][DCA_SUBBANDS_X96]; ///< Prediction mode + int16_t prediction_vq_index[DCA_CHANNELS][DCA_SUBBANDS_X96]; ///< Prediction coefficients VQ address + int8_t bit_allocation[DCA_CHANNELS][DCA_SUBBANDS_X96]; ///< Bit allocation index + int8_t transition_mode[DCA_SUBFRAMES][DCA_CHANNELS][DCA_SUBBANDS]; ///< Transition mode + int32_t scale_factors[DCA_CHANNELS][DCA_SUBBANDS][2]; ///< Scale factors (2x for transients and X96) + int8_t joint_scale_sel[DCA_CHANNELS]; ///< Joint subband codebook select + int32_t joint_scale_factors[DCA_CHANNELS][DCA_SUBBANDS_X96]; ///< Scale factors for joint subband coding + + // Auxiliary data + int prim_dmix_embedded; ///< Auxiliary dynamic downmix flag + int prim_dmix_type; ///< Auxiliary primary channel downmix type + int prim_dmix_coeff[DCA_DMIX_CHANNELS_MAX * DCA_CORE_CHANNELS_MAX]; ///< Dynamic downmix code coefficients + + // Core extensions + int ext_audio_mask; ///< Bit mask of fully decoded core extensions + + // XCH extension data + int xch_pos; ///< Bit position of XCH frame in core substream + + // XXCH extension data + int xxch_crc_present; ///< CRC presence flag for XXCH channel set header + int xxch_mask_nbits; ///< Number of bits for loudspeaker mask + int xxch_core_mask; ///< Core loudspeaker activity mask + int xxch_spkr_mask; ///< Loudspeaker layout mask + int xxch_dmix_embedded; ///< Downmix already performed by encoder + int xxch_dmix_scale_inv; ///< Downmix scale factor + int xxch_dmix_mask[DCA_XXCH_CHANNELS_MAX]; ///< Downmix channel mapping mask + int xxch_dmix_coeff[DCA_XXCH_CHANNELS_MAX * DCA_CORE_CHANNELS_MAX]; ///< Downmix coefficients + int xxch_pos; ///< Bit position of XXCH frame in core substream + + // X96 extension data + int x96_rev_no; ///< X96 revision number + int x96_crc_present; ///< CRC presence flag for X96 channel set header + int x96_nchannels; ///< Number of primary channels in X96 extension + int x96_high_res; ///< X96 high resolution flag + int x96_subband_start; ///< First encoded subband in X96 extension + int x96_rand; ///< Random seed for generating samples for unallocated X96 subbands + int x96_pos; ///< Bit position of X96 frame in core substream + + // Sample buffers + unsigned int x96_subband_size; + int32_t *x96_subband_buffer; ///< X96 subband sample buffer base + int32_t *x96_subband_samples[DCA_CHANNELS][DCA_SUBBANDS_X96]; ///< X96 subband samples + + unsigned int subband_size; + int32_t *subband_buffer; ///< Subband sample buffer base + int32_t *subband_samples[DCA_CHANNELS][DCA_SUBBANDS]; ///< Subband samples + int32_t *lfe_samples; ///< Decimated LFE samples + + // DSP contexts + DCADSPData dcadsp_data[DCA_CHANNELS]; ///< FIR history buffers + DCADSPContext *dcadsp; + DCADCTContext dcadct; + FFTContext imdct[2]; + SynthFilterContext synth; + AVFloatDSPContext *float_dsp; + AVFixedDSPContext *fixed_dsp; + + // PCM output data + unsigned int output_size; + void *output_buffer; ///< PCM output buffer base + int32_t *output_samples[DCA_SPEAKER_COUNT]; ///< PCM output for fixed point mode + int32_t output_history_lfe_fixed; ///< LFE PCM history for X96 filter + float output_history_lfe_float; ///< LFE PCM history for X96 filter + + int ch_remap[DCA_SPEAKER_COUNT]; ///< Channel to speaker map + int request_mask; ///< Requested channel layout (for stereo downmix) + + int npcmsamples; ///< Number of PCM samples per channel + int output_rate; ///< Output sample rate (1x or 2x header rate) + + int filter_mode; ///< Previous filtering mode for detecting changes +} DCACoreDecoder; + +static inline int ff_dca_core_map_spkr(DCACoreDecoder *core, int spkr) +{ + if (core->ch_mask & (1U << spkr)) + return spkr; + if (spkr == DCA_SPEAKER_Lss && (core->ch_mask & DCA_SPEAKER_MASK_Ls)) + return DCA_SPEAKER_Ls; + if (spkr == DCA_SPEAKER_Rss && (core->ch_mask & DCA_SPEAKER_MASK_Rs)) + return DCA_SPEAKER_Rs; + return -1; +} + +int ff_dca_core_parse(DCACoreDecoder *s, uint8_t *data, int size); +int ff_dca_core_parse_exss(DCACoreDecoder *s, uint8_t *data, DCAExssAsset *asset); +int ff_dca_core_filter_fixed(DCACoreDecoder *s, int x96_synth); +int ff_dca_core_filter_frame(DCACoreDecoder *s, AVFrame *frame); +av_cold void ff_dca_core_flush(DCACoreDecoder *s); +av_cold int ff_dca_core_init(DCACoreDecoder *s); +av_cold void ff_dca_core_close(DCACoreDecoder *s); + +#endif diff --git a/libavcodec/dca_exss.c b/libavcodec/dca_exss.c new file mode 100644 index 0000000000..4579f2350f --- /dev/null +++ b/libavcodec/dca_exss.c @@ -0,0 +1,514 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dcadec.h" +#include "dcadata.h" + +static int count_chs_for_mask(int mask) +{ + return av_popcount(mask) + av_popcount(mask & 0xae66); +} + +static void parse_xll_parameters(DCAExssParser *s, DCAExssAsset *asset) +{ + // Size of XLL data in extension substream + asset->xll_size = get_bits(&s->gb, s->exss_size_nbits) + 1; + + // XLL sync word present flag + if (asset->xll_sync_present = get_bits1(&s->gb)) { + int xll_delay_nbits; + + // Peak bit rate smoothing buffer size + skip_bits(&s->gb, 4); + + // Number of bits for XLL decoding delay + xll_delay_nbits = get_bits(&s->gb, 5) + 1; + + // Initial XLL decoding delay in frames + asset->xll_delay_nframes = get_bits_long(&s->gb, xll_delay_nbits); + + // Number of bytes offset to XLL sync + asset->xll_sync_offset = get_bits(&s->gb, s->exss_size_nbits); + } else { + asset->xll_delay_nframes = 0; + asset->xll_sync_offset = 0; + } +} + +static void parse_lbr_parameters(DCAExssParser *s, DCAExssAsset *asset) +{ + // Size of LBR component in extension substream + asset->lbr_size = get_bits(&s->gb, 14) + 1; + + // LBR sync word present flag + if (get_bits1(&s->gb)) + // LBR sync distance + skip_bits(&s->gb, 2); +} + +static int parse_descriptor(DCAExssParser *s, DCAExssAsset *asset) +{ + int i, j, drc_present, descr_size, descr_pos = get_bits_count(&s->gb); + + // Size of audio asset descriptor in bytes + descr_size = get_bits(&s->gb, 9) + 1; + + // Audio asset identifier + asset->asset_index = get_bits(&s->gb, 3); + + // + // Per stream static metadata + // + + if (s->static_fields_present) { + // Asset type descriptor presence + if (get_bits1(&s->gb)) + // Asset type descriptor + skip_bits(&s->gb, 4); + + // Language descriptor presence + if (get_bits1(&s->gb)) + // Language descriptor + skip_bits(&s->gb, 24); + + // Additional textual information presence + if (get_bits1(&s->gb)) { + // Byte size of additional text info + int text_size = get_bits(&s->gb, 10) + 1; + + // Sanity check available size + if (get_bits_left(&s->gb) < text_size * 8) + return AVERROR_INVALIDDATA; + + // Additional textual information string + skip_bits_long(&s->gb, text_size * 8); + } + + // PCM bit resolution + asset->pcm_bit_res = get_bits(&s->gb, 5) + 1; + + // Maximum sample rate + asset->max_sample_rate = ff_dca_sampling_freqs[get_bits(&s->gb, 4)]; + + // Total number of channels + asset->nchannels_total = get_bits(&s->gb, 8) + 1; + + // One to one map channel to speakers + if (asset->one_to_one_map_ch_to_spkr = get_bits1(&s->gb)) { + int spkr_mask_nbits = 0; + int spkr_remap_nsets; + int nspeakers[8]; + + // Embedded stereo flag + if (asset->nchannels_total > 2) + asset->embedded_stereo = get_bits1(&s->gb); + + // Embedded 6 channels flag + if (asset->nchannels_total > 6) + asset->embedded_6ch = get_bits1(&s->gb); + + // Speaker mask enabled flag + if (asset->spkr_mask_enabled = get_bits1(&s->gb)) { + // Number of bits for speaker activity mask + spkr_mask_nbits = (get_bits(&s->gb, 2) + 1) << 2; + + // Loudspeaker activity mask + asset->spkr_mask = get_bits(&s->gb, spkr_mask_nbits); + } + + // Number of speaker remapping sets + if ((spkr_remap_nsets = get_bits(&s->gb, 3)) && !spkr_mask_nbits) { + av_log(s->avctx, AV_LOG_ERROR, "Speaker mask disabled yet there are remapping sets\n"); + return AVERROR_INVALIDDATA; + } + + // Standard loudspeaker layout mask + for (i = 0; i < spkr_remap_nsets; i++) + nspeakers[i] = count_chs_for_mask(get_bits(&s->gb, spkr_mask_nbits)); + + for (i = 0; i < spkr_remap_nsets; i++) { + // Number of channels to be decoded for speaker remapping + int nch_for_remaps = get_bits(&s->gb, 5) + 1; + + for (j = 0; j < nspeakers[i]; j++) { + // Decoded channels to output speaker mapping mask + int remap_ch_mask = get_bits_long(&s->gb, nch_for_remaps); + + // Loudspeaker remapping codes + skip_bits_long(&s->gb, av_popcount(remap_ch_mask) * 5); + } + } + } else { + asset->embedded_stereo = 0; + asset->embedded_6ch = 0; + asset->spkr_mask_enabled = 0; + asset->spkr_mask = 0; + + // Representation type + asset->representation_type = get_bits(&s->gb, 3); + } + } + + // + // DRC, DNC and mixing metadata + // + + // Dynamic range coefficient presence flag + drc_present = get_bits1(&s->gb); + + // Code for dynamic range coefficient + if (drc_present) + skip_bits(&s->gb, 8); + + // Dialog normalization presence flag + if (get_bits1(&s->gb)) + // Dialog normalization code + skip_bits(&s->gb, 5); + + // DRC for stereo downmix + if (drc_present && asset->embedded_stereo) + skip_bits(&s->gb, 8); + + // Mixing metadata presence flag + if (s->mix_metadata_enabled && get_bits1(&s->gb)) { + int nchannels_dmix; + + // External mixing flag + skip_bits1(&s->gb); + + // Post mixing / replacement gain adjustment + skip_bits(&s->gb, 6); + + // DRC prior to mixing + if (get_bits(&s->gb, 2) == 3) + // Custom code for mixing DRC + skip_bits(&s->gb, 8); + else + // Limit for mixing DRC + skip_bits(&s->gb, 3); + + // Scaling type for channels of main audio + // Scaling parameters of main audio + if (get_bits1(&s->gb)) + for (i = 0; i < s->nmixoutconfigs; i++) + skip_bits_long(&s->gb, 6 * s->nmixoutchs[i]); + else + skip_bits_long(&s->gb, 6 * s->nmixoutconfigs); + + nchannels_dmix = asset->nchannels_total; + if (asset->embedded_6ch) + nchannels_dmix += 6; + if (asset->embedded_stereo) + nchannels_dmix += 2; + + for (i = 0; i < s->nmixoutconfigs; i++) { + if (!s->nmixoutchs[i]) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid speaker layout mask for mixing configuration\n"); + return AVERROR_INVALIDDATA; + } + for (j = 0; j < nchannels_dmix; j++) { + // Mix output mask + int mix_map_mask = get_bits(&s->gb, s->nmixoutchs[i]); + + // Mixing coefficients + skip_bits_long(&s->gb, av_popcount(mix_map_mask) * 6); + } + } + } + + // + // Decoder navigation data + // + + // Coding mode for the asset + asset->coding_mode = get_bits(&s->gb, 2); + + // Coding components used in asset + switch (asset->coding_mode) { + case 0: // Coding mode that may contain multiple coding components + asset->extension_mask = get_bits(&s->gb, 12); + + if (asset->extension_mask & DCA_EXSS_CORE) { + // Size of core component in extension substream + asset->core_size = get_bits(&s->gb, 14) + 1; + // Core sync word present flag + if (get_bits1(&s->gb)) + // Core sync distance + skip_bits(&s->gb, 2); + } + + if (asset->extension_mask & DCA_EXSS_XBR) + // Size of XBR extension in extension substream + asset->xbr_size = get_bits(&s->gb, 14) + 1; + + if (asset->extension_mask & DCA_EXSS_XXCH) + // Size of XXCH extension in extension substream + asset->xxch_size = get_bits(&s->gb, 14) + 1; + + if (asset->extension_mask & DCA_EXSS_X96) + // Size of X96 extension in extension substream + asset->x96_size = get_bits(&s->gb, 12) + 1; + + if (asset->extension_mask & DCA_EXSS_LBR) + parse_lbr_parameters(s, asset); + + if (asset->extension_mask & DCA_EXSS_XLL) + parse_xll_parameters(s, asset); + + if (asset->extension_mask & DCA_EXSS_RSV1) + skip_bits(&s->gb, 16); + + if (asset->extension_mask & DCA_EXSS_RSV2) + skip_bits(&s->gb, 16); + break; + + case 1: // Loss-less coding mode without CBR component + asset->extension_mask = DCA_EXSS_XLL; + parse_xll_parameters(s, asset); + break; + + case 2: // Low bit rate mode + asset->extension_mask = DCA_EXSS_LBR; + parse_lbr_parameters(s, asset); + break; + + case 3: // Auxiliary coding mode + asset->extension_mask = 0; + + // Size of auxiliary coded data + skip_bits(&s->gb, 14); + + // Auxiliary codec identification + skip_bits(&s->gb, 8); + + // Aux sync word present flag + if (get_bits1(&s->gb)) + // Aux sync distance + skip_bits(&s->gb, 3); + break; + } + + if (asset->extension_mask & DCA_EXSS_XLL) + // DTS-HD stream ID + asset->hd_stream_id = get_bits(&s->gb, 3); + + // One to one mixing flag + // Per channel main audio scaling flag + // Main audio scaling codes + // Decode asset in secondary decoder flag + // Revision 2 DRC metadata + // Reserved + // Zero pad + if (ff_dca_seek_bits(&s->gb, descr_pos + descr_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of EXSS asset descriptor\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int set_exss_offsets(DCAExssAsset *asset) +{ + int offs = asset->asset_offset; + int size = asset->asset_size; + + if (asset->extension_mask & DCA_EXSS_CORE) { + asset->core_offset = offs; + if (asset->core_size > size) + return AVERROR_INVALIDDATA; + offs += asset->core_size; + size -= asset->core_size; + } + + if (asset->extension_mask & DCA_EXSS_XBR) { + asset->xbr_offset = offs; + if (asset->xbr_size > size) + return AVERROR_INVALIDDATA; + offs += asset->xbr_size; + size -= asset->xbr_size; + } + + if (asset->extension_mask & DCA_EXSS_XXCH) { + asset->xxch_offset = offs; + if (asset->xxch_size > size) + return AVERROR_INVALIDDATA; + offs += asset->xxch_size; + size -= asset->xxch_size; + } + + if (asset->extension_mask & DCA_EXSS_X96) { + asset->x96_offset = offs; + if (asset->x96_size > size) + return AVERROR_INVALIDDATA; + offs += asset->x96_size; + size -= asset->x96_size; + } + + if (asset->extension_mask & DCA_EXSS_LBR) { + asset->lbr_offset = offs; + if (asset->lbr_size > size) + return AVERROR_INVALIDDATA; + offs += asset->lbr_size; + size -= asset->lbr_size; + } + + if (asset->extension_mask & DCA_EXSS_XLL) { + asset->xll_offset = offs; + if (asset->xll_size > size) + return AVERROR_INVALIDDATA; + offs += asset->xll_size; + size -= asset->xll_size; + } + + return 0; +} + +int ff_dca_exss_parse(DCAExssParser *s, uint8_t *data, int size) +{ + int i, ret, offset, wide_hdr, header_size; + + if ((ret = init_get_bits8(&s->gb, data, size)) < 0) + return ret; + + // Extension substream sync word + skip_bits_long(&s->gb, 32); + + // User defined bits + skip_bits(&s->gb, 8); + + // Extension substream index + s->exss_index = get_bits(&s->gb, 2); + + // Flag indicating short or long header size + wide_hdr = get_bits1(&s->gb); + + // Extension substream header length + header_size = get_bits(&s->gb, 8 + 4 * wide_hdr) + 1; + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, 32 + 8, header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid EXSS header checksum\n"); + return AVERROR_INVALIDDATA; + } + + s->exss_size_nbits = 16 + 4 * wide_hdr; + + // Number of bytes of extension substream + s->exss_size = get_bits(&s->gb, s->exss_size_nbits) + 1; + if (s->exss_size > size) { + av_log(s->avctx, AV_LOG_ERROR, "Packet too short for EXSS frame\n"); + return AVERROR_INVALIDDATA; + } + + // Per stream static fields presence flag + if (s->static_fields_present = get_bits1(&s->gb)) { + int active_exss_mask[8]; + + // Reference clock code + skip_bits(&s->gb, 2); + + // Extension substream frame duration + skip_bits(&s->gb, 3); + + // Timecode presence flag + if (get_bits1(&s->gb)) + // Timecode data + skip_bits_long(&s->gb, 36); + + // Number of defined audio presentations + s->npresents = get_bits(&s->gb, 3) + 1; + if (s->npresents > 1) { + avpriv_request_sample(s->avctx, "%d audio presentations", s->npresents); + return AVERROR_PATCHWELCOME; + } + + // Number of audio assets in extension substream + s->nassets = get_bits(&s->gb, 3) + 1; + if (s->nassets > 1) { + avpriv_request_sample(s->avctx, "%d audio assets", s->nassets); + return AVERROR_PATCHWELCOME; + } + + // Active extension substream mask for audio presentation + for (i = 0; i < s->npresents; i++) + active_exss_mask[i] = get_bits(&s->gb, s->exss_index + 1); + + // Active audio asset mask + for (i = 0; i < s->npresents; i++) + skip_bits_long(&s->gb, av_popcount(active_exss_mask[i]) * 8); + + // Mixing metadata enable flag + if (s->mix_metadata_enabled = get_bits1(&s->gb)) { + int spkr_mask_nbits; + + // Mixing metadata adjustment level + skip_bits(&s->gb, 2); + + // Number of bits for mixer output speaker activity mask + spkr_mask_nbits = (get_bits(&s->gb, 2) + 1) << 2; + + // Number of mixing configurations + s->nmixoutconfigs = get_bits(&s->gb, 2) + 1; + + // Speaker layout mask for mixer output channels + for (i = 0; i < s->nmixoutconfigs; i++) + s->nmixoutchs[i] = count_chs_for_mask(get_bits(&s->gb, spkr_mask_nbits)); + } + } else { + s->npresents = 1; + s->nassets = 1; + } + + // Size of encoded asset data in bytes + offset = header_size; + for (i = 0; i < s->nassets; i++) { + s->assets[i].asset_offset = offset; + s->assets[i].asset_size = get_bits(&s->gb, s->exss_size_nbits) + 1; + offset += s->assets[i].asset_size; + if (offset > s->exss_size) { + av_log(s->avctx, AV_LOG_ERROR, "EXSS asset out of bounds\n"); + return AVERROR_INVALIDDATA; + } + } + + // Audio asset descriptor + for (i = 0; i < s->nassets; i++) { + if ((ret = parse_descriptor(s, &s->assets[i])) < 0) + return ret; + if ((ret = set_exss_offsets(&s->assets[i])) < 0) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid extension size in EXSS asset descriptor\n"); + return ret; + } + } + + // Backward compatible core present + // Backward compatible core substream index + // Backward compatible core asset index + // Reserved + // Byte align + // CRC16 of extension substream header + if (ff_dca_seek_bits(&s->gb, header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of EXSS header\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} diff --git a/libavcodec/dca_exss.h b/libavcodec/dca_exss.h new file mode 100644 index 0000000000..323063aafb --- /dev/null +++ b/libavcodec/dca_exss.h @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCA_EXSS_H +#define AVCODEC_DCA_EXSS_H + +#include "libavutil/common.h" + +#include "avcodec.h" +#include "get_bits.h" + +typedef struct DCAExssAsset { + int asset_offset; ///< Offset to asset data from start of substream + int asset_size; ///< Size of encoded asset data + int asset_index; ///< Audio asset identifier + + int pcm_bit_res; ///< PCM bit resolution + int max_sample_rate; ///< Maximum sample rate + int nchannels_total; ///< Total number of channels + int one_to_one_map_ch_to_spkr; ///< One to one channel to speaker mapping flag + int embedded_stereo; ///< Embedded stereo flag + int embedded_6ch; ///< Embedded 6 channels flag + int spkr_mask_enabled; ///< Speaker mask enabled flag + int spkr_mask; ///< Loudspeaker activity mask + int representation_type; ///< Representation type + + int coding_mode; ///< Coding mode for the asset + int extension_mask; ///< Coding components used in asset + + int core_offset; ///< Offset to core component from start of substream + int core_size; ///< Size of core component in extension substream + + int xbr_offset; ///< Offset to XBR extension from start of substream + int xbr_size; ///< Size of XBR extension in extension substream + + int xxch_offset; ///< Offset to XXCH extension from start of substream + int xxch_size; ///< Size of XXCH extension in extension substream + + int x96_offset; ///< Offset to X96 extension from start of substream + int x96_size; ///< Size of X96 extension in extension substream + + int lbr_offset; ///< Offset to LBR component from start of substream + int lbr_size; ///< Size of LBR component in extension substream + + int xll_offset; ///< Offset to XLL data from start of substream + int xll_size; ///< Size of XLL data in extension substream + int xll_sync_present; ///< XLL sync word present flag + int xll_delay_nframes; ///< Initial XLL decoding delay in frames + int xll_sync_offset; ///< Number of bytes offset to XLL sync + + int hd_stream_id; ///< DTS-HD stream ID +} DCAExssAsset; + +typedef struct DCAExssParser { + AVCodecContext *avctx; + GetBitContext gb; + + int exss_index; ///< Extension substream index + int exss_size_nbits; ///< Number of bits for extension substream size + int exss_size; ///< Number of bytes of extension substream + + int static_fields_present; ///< Per stream static fields presence flag + int npresents; ///< Number of defined audio presentations + int nassets; ///< Number of audio assets in extension substream + + int mix_metadata_enabled; ///< Mixing metadata enable flag + int nmixoutconfigs; ///< Number of mixing configurations + int nmixoutchs[4]; ///< Speaker layout mask for mixer output channels + + DCAExssAsset assets[1]; ///< Audio asset descriptors +} DCAExssParser; + +int ff_dca_exss_parse(DCAExssParser *s, uint8_t *data, int size); + +#endif diff --git a/libavcodec/dca_xll.c b/libavcodec/dca_xll.c new file mode 100644 index 0000000000..cd1af81dcc --- /dev/null +++ b/libavcodec/dca_xll.c @@ -0,0 +1,1499 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dcadec.h" +#include "dcadata.h" +#include "dcamath.h" +#include "dca_syncwords.h" +#include "unary.h" + +static int get_linear(GetBitContext *gb, int n) +{ + unsigned int v = get_bits_long(gb, n); + return (v >> 1) ^ -(v & 1); +} + +static int get_rice_un(GetBitContext *gb, int k) +{ + unsigned int v = get_unary(gb, 1, 128); + return (v << k) | get_bits_long(gb, k); +} + +static int get_rice(GetBitContext *gb, int k) +{ + unsigned int v = get_rice_un(gb, k); + return (v >> 1) ^ -(v & 1); +} + +static void get_array(GetBitContext *gb, int32_t *array, int size, int n) +{ + int i; + + for (i = 0; i < size; i++) + array[i] = get_bits(gb, n); +} + +static void get_linear_array(GetBitContext *gb, int32_t *array, int size, int n) +{ + int i; + + if (n == 0) + memset(array, 0, sizeof(*array) * size); + else for (i = 0; i < size; i++) + array[i] = get_linear(gb, n); +} + +static void get_rice_array(GetBitContext *gb, int32_t *array, int size, int k) +{ + int i; + + for (i = 0; i < size; i++) + array[i] = get_rice(gb, k); +} + +static int parse_dmix_coeffs(DCAXllDecoder *s, DCAXllChSet *c) +{ + // Size of downmix coefficient matrix + int m = c->primary_chset ? ff_dca_dmix_primary_nch[c->dmix_type] : c->hier_ofs; + int i, j, *coeff_ptr = c->dmix_coeff; + + for (i = 0; i < m; i++) { + int code, sign, coeff, scale, scale_inv = 0; + unsigned int index; + + // Downmix scale (only for non-primary channel sets) + if (!c->primary_chset) { + code = get_bits(&s->gb, 9); + sign = (code >> 8) - 1; + index = (code & 0xff) - FF_DCA_DMIXTABLE_OFFSET; + if (index >= FF_DCA_INV_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL downmix scale index\n"); + return AVERROR_INVALIDDATA; + } + scale = ff_dca_dmixtable[index + FF_DCA_DMIXTABLE_OFFSET]; + scale_inv = ff_dca_inv_dmixtable[index]; + c->dmix_scale[i] = (scale ^ sign) - sign; + c->dmix_scale_inv[i] = (scale_inv ^ sign) - sign; + } + + // Downmix coefficients + for (j = 0; j < c->nchannels; j++) { + code = get_bits(&s->gb, 9); + sign = (code >> 8) - 1; + index = code & 0xff; + if (index >= FF_DCA_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL downmix coefficient index\n"); + return AVERROR_INVALIDDATA; + } + coeff = ff_dca_dmixtable[index]; + if (!c->primary_chset) + // Multiply by |InvDmixScale| to get |UndoDmixScale| + coeff = mul16(scale_inv, coeff); + *coeff_ptr++ = (coeff ^ sign) - sign; + } + } + + return 0; +} + +static int chs_parse_header(DCAXllDecoder *s, DCAXllChSet *c, DCAExssAsset *asset) +{ + int i, j, k, ret, band, header_size, header_pos = get_bits_count(&s->gb); + DCAXllChSet *p = &s->chset[0]; + DCAXllBand *b; + + // Size of channel set sub-header + header_size = get_bits(&s->gb, 10) + 1; + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL sub-header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Number of channels in the channel set + c->nchannels = get_bits(&s->gb, 4) + 1; + if (c->nchannels > DCA_XLL_CHANNELS_MAX) { + avpriv_request_sample(s->avctx, "%d XLL channels", c->nchannels); + return AVERROR_PATCHWELCOME; + } + + // Residual type + c->residual_encode = get_bits(&s->gb, c->nchannels); + + // PCM bit resolution + c->pcm_bit_res = get_bits(&s->gb, 5) + 1; + + // Storage unit width + c->storage_bit_res = get_bits(&s->gb, 5) + 1; + if (c->storage_bit_res != 16 && c->storage_bit_res != 24) { + avpriv_request_sample(s->avctx, "%d-bit XLL storage resolution", c->storage_bit_res); + return AVERROR_PATCHWELCOME; + } + + if (c->pcm_bit_res > c->storage_bit_res) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid PCM bit resolution for XLL channel set (%d > %d)\n", c->pcm_bit_res, c->storage_bit_res); + return AVERROR_INVALIDDATA; + } + + // Original sampling frequency + c->freq = ff_dca_sampling_freqs[get_bits(&s->gb, 4)]; + if (c->freq > 192000) { + avpriv_request_sample(s->avctx, "%d Hz XLL sampling frequency", c->freq); + return AVERROR_PATCHWELCOME; + } + + // Sampling frequency modifier + if (get_bits(&s->gb, 2)) { + avpriv_request_sample(s->avctx, "XLL sampling frequency modifier"); + return AVERROR_PATCHWELCOME; + } + + // Which replacement set this channel set is member of + if (get_bits(&s->gb, 2)) { + avpriv_request_sample(s->avctx, "XLL replacement set"); + return AVERROR_PATCHWELCOME; + } + + if (asset->one_to_one_map_ch_to_spkr) { + // Primary channel set flag + c->primary_chset = get_bits1(&s->gb); + if (c->primary_chset != (c == p)) { + av_log(s->avctx, AV_LOG_ERROR, "The first (and only) XLL channel set must be primary\n"); + return AVERROR_INVALIDDATA; + } + + // Downmix coefficients present in stream + c->dmix_coeffs_present = get_bits1(&s->gb); + + // Downmix already performed by encoder + c->dmix_embedded = c->dmix_coeffs_present && get_bits1(&s->gb); + + // Downmix type + if (c->dmix_coeffs_present && c->primary_chset) { + c->dmix_type = get_bits(&s->gb, 3); + if (c->dmix_type >= DCA_DMIX_TYPE_COUNT) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL primary channel set downmix type\n"); + return AVERROR_INVALIDDATA; + } + } + + // Whether the channel set is part of a hierarchy + c->hier_chset = get_bits1(&s->gb); + if (!c->hier_chset && s->nchsets != 1) { + avpriv_request_sample(s->avctx, "XLL channel set outside of hierarchy"); + return AVERROR_PATCHWELCOME; + } + + // Downmix coefficients + if (c->dmix_coeffs_present && (ret = parse_dmix_coeffs(s, c)) < 0) + return ret; + + // Channel mask enabled + if (!get_bits1(&s->gb)) { + avpriv_request_sample(s->avctx, "Disabled XLL channel mask"); + return AVERROR_PATCHWELCOME; + } + + // Channel mask for set + c->ch_mask = get_bits_long(&s->gb, s->ch_mask_nbits); + if (av_popcount(c->ch_mask) != c->nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL channel mask\n"); + return AVERROR_INVALIDDATA; + } + + // Build the channel to speaker map + for (i = 0, j = 0; i < s->ch_mask_nbits; i++) + if (c->ch_mask & (1U << i)) + c->ch_remap[j++] = i; + } else { + // Mapping coeffs present flag + if (c->nchannels != 2 || s->nchsets != 1 || get_bits1(&s->gb)) { + avpriv_request_sample(s->avctx, "Custom XLL channel to speaker mapping"); + return AVERROR_PATCHWELCOME; + } + + // Setup for LtRt decoding + c->primary_chset = 1; + c->dmix_coeffs_present = 0; + c->dmix_embedded = 0; + c->hier_chset = 0; + c->ch_mask = DCA_SPEAKER_LAYOUT_STEREO; + c->ch_remap[0] = DCA_SPEAKER_L; + c->ch_remap[1] = DCA_SPEAKER_R; + } + + if (c->freq > 96000) { + // Extra frequency bands flag + if (get_bits1(&s->gb)) { + avpriv_request_sample(s->avctx, "Extra XLL frequency bands"); + return AVERROR_PATCHWELCOME; + } + c->nfreqbands = 2; + } else { + c->nfreqbands = 1; + } + + // Set the sampling frequency to that of the first frequency band. + // Frequency will be doubled again after bands assembly. + c->freq >>= c->nfreqbands - 1; + + // Verify that all channel sets have the same audio characteristics + if (c != p && (c->nfreqbands != p->nfreqbands || c->freq != p->freq + || c->pcm_bit_res != p->pcm_bit_res + || c->storage_bit_res != p->storage_bit_res)) { + avpriv_request_sample(s->avctx, "Different XLL audio characteristics"); + return AVERROR_PATCHWELCOME; + } + + // Determine number of bits to read bit allocation coding parameter + if (c->storage_bit_res > 16) + c->nabits = 5; + else if (c->storage_bit_res > 8) + c->nabits = 4; + else + c->nabits = 3; + + // Account for embedded downmix and decimator saturation + if ((s->nchsets > 1 || c->nfreqbands > 1) && c->nabits < 5) + c->nabits++; + + for (band = 0, b = c->bands; band < c->nfreqbands; band++, b++) { + // Pairwise channel decorrelation + if ((b->decor_enabled = get_bits1(&s->gb)) && c->nchannels > 1) { + int ch_nbits = av_ceil_log2(c->nchannels); + + // Original channel order + for (i = 0; i < c->nchannels; i++) { + b->orig_order[i] = get_bits(&s->gb, ch_nbits); + if (b->orig_order[i] >= c->nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL original channel order\n"); + return AVERROR_INVALIDDATA; + } + } + + // Pairwise channel coefficients + for (i = 0; i < c->nchannels / 2; i++) + b->decor_coeff[i] = get_bits1(&s->gb) ? get_linear(&s->gb, 7) : 0; + } else { + for (i = 0; i < c->nchannels; i++) + b->orig_order[i] = i; + for (i = 0; i < c->nchannels / 2; i++) + b->decor_coeff[i] = 0; + } + + // Adaptive predictor order + b->highest_pred_order = 0; + for (i = 0; i < c->nchannels; i++) { + b->adapt_pred_order[i] = get_bits(&s->gb, 4); + if (b->adapt_pred_order[i] > b->highest_pred_order) + b->highest_pred_order = b->adapt_pred_order[i]; + } + if (b->highest_pred_order > s->nsegsamples) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL adaptive predicition order\n"); + return AVERROR_INVALIDDATA; + } + + // Fixed predictor order + for (i = 0; i < c->nchannels; i++) + b->fixed_pred_order[i] = b->adapt_pred_order[i] ? 0 : get_bits(&s->gb, 2); + + // Adaptive predictor quantized reflection coefficients + for (i = 0; i < c->nchannels; i++) { + for (j = 0; j < b->adapt_pred_order[i]; j++) { + k = get_linear(&s->gb, 8); + if (k == -128) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL reflection coefficient index\n"); + return AVERROR_INVALIDDATA; + } + if (k < 0) + b->adapt_refl_coeff[i][j] = -(int)ff_dca_xll_refl_coeff[-k]; + else + b->adapt_refl_coeff[i][j] = (int)ff_dca_xll_refl_coeff[ k]; + } + } + + // Downmix performed by encoder in extension frequency band + b->dmix_embedded = c->dmix_embedded && (band == 0 || get_bits1(&s->gb)); + + // MSB/LSB split flag in extension frequency band + if ((band == 0 && s->scalable_lsbs) || (band != 0 && get_bits1(&s->gb))) { + // Size of LSB section in any segment + b->lsb_section_size = get_bits_long(&s->gb, s->seg_size_nbits); + if (b->lsb_section_size < 0 || b->lsb_section_size > s->frame_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid LSB section size\n"); + return AVERROR_INVALIDDATA; + } + + // Account for optional CRC bytes after LSB section + if (b->lsb_section_size && (s->band_crc_present > 2 || + (band == 0 && s->band_crc_present > 1))) + b->lsb_section_size += 2; + + // Number of bits to represent the samples in LSB part + for (i = 0; i < c->nchannels; i++) { + b->nscalablelsbs[i] = get_bits(&s->gb, 4); + if (b->nscalablelsbs[i] && !b->lsb_section_size) { + av_log(s->avctx, AV_LOG_ERROR, "LSB section missing with non-zero LSB width\n"); + return AVERROR_INVALIDDATA; + } + } + } else { + b->lsb_section_size = 0; + for (i = 0; i < c->nchannels; i++) + b->nscalablelsbs[i] = 0; + } + + // Scalable resolution flag in extension frequency band + if ((band == 0 && s->scalable_lsbs) || (band != 0 && get_bits1(&s->gb))) { + // Number of bits discarded by authoring + for (i = 0; i < c->nchannels; i++) + b->bit_width_adjust[i] = get_bits(&s->gb, 4); + } else { + for (i = 0; i < c->nchannels; i++) + b->bit_width_adjust[i] = 0; + } + } + + // Reserved + // Byte align + // CRC16 of channel set sub-header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL sub-header\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int chs_alloc_msb_band_data(DCAXllDecoder *s, DCAXllChSet *c) +{ + int ndecisamples = c->nfreqbands > 1 ? DCA_XLL_DECI_HISTORY_MAX : 0; + int nchsamples = s->nframesamples + ndecisamples; + int i, j, nsamples = nchsamples * c->nchannels * c->nfreqbands; + int32_t *ptr; + + // Reallocate MSB sample buffer + av_fast_malloc(&c->sample_buffer[0], &c->sample_size[0], nsamples * sizeof(int32_t)); + if (!c->sample_buffer[0]) + return AVERROR(ENOMEM); + + ptr = c->sample_buffer[0] + ndecisamples; + for (i = 0; i < c->nfreqbands; i++) { + for (j = 0; j < c->nchannels; j++) { + c->bands[i].msb_sample_buffer[j] = ptr; + ptr += nchsamples; + } + } + + return 0; +} + +static int chs_alloc_lsb_band_data(DCAXllDecoder *s, DCAXllChSet *c) +{ + int i, j, nsamples = 0; + int32_t *ptr; + + // Determine number of frequency bands that have MSB/LSB split + for (i = 0; i < c->nfreqbands; i++) + if (c->bands[i].lsb_section_size) + nsamples += s->nframesamples * c->nchannels; + if (!nsamples) + return 0; + + // Reallocate LSB sample buffer + av_fast_malloc(&c->sample_buffer[1], &c->sample_size[1], nsamples * sizeof(int32_t)); + if (!c->sample_buffer[1]) + return AVERROR(ENOMEM); + + ptr = c->sample_buffer[1]; + for (i = 0; i < c->nfreqbands; i++) { + if (c->bands[i].lsb_section_size) { + for (j = 0; j < c->nchannels; j++) { + c->bands[i].lsb_sample_buffer[j] = ptr; + ptr += s->nframesamples; + } + } else { + for (j = 0; j < c->nchannels; j++) + c->bands[i].lsb_sample_buffer[j] = NULL; + } + } + + return 0; +} + +static int chs_parse_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band, int seg, int band_data_end) +{ + DCAXllBand *b = &c->bands[band]; + int i, j, k; + + // Start unpacking MSB portion of the segment + if (!(seg && get_bits1(&s->gb))) { + // Unpack segment type + // 0 - distinct coding parameters for each channel + // 1 - common coding parameters for all channels + c->seg_common = get_bits1(&s->gb); + + // Determine number of coding parameters encoded in segment + k = c->seg_common ? 1 : c->nchannels; + + // Unpack Rice coding parameters + for (i = 0; i < k; i++) { + // Unpack Rice coding flag + // 0 - linear code, 1 - Rice code + c->rice_code_flag[i] = get_bits1(&s->gb); + if (!c->seg_common && c->rice_code_flag[i]) { + // Unpack Hybrid Rice coding flag + // 0 - Rice code, 1 - Hybrid Rice code + if (get_bits1(&s->gb)) + // Unpack binary code length for isolated samples + c->bitalloc_hybrid_linear[i] = get_bits(&s->gb, c->nabits) + 1; + else + // 0 indicates no Hybrid Rice coding + c->bitalloc_hybrid_linear[i] = 0; + } else { + // 0 indicates no Hybrid Rice coding + c->bitalloc_hybrid_linear[i] = 0; + } + } + + // Unpack coding parameters + for (i = 0; i < k; i++) { + if (seg == 0) { + // Unpack coding parameter for part A of segment 0 + c->bitalloc_part_a[i] = get_bits(&s->gb, c->nabits); + + // Adjust for the linear code + if (!c->rice_code_flag[i] && c->bitalloc_part_a[i]) + c->bitalloc_part_a[i]++; + + if (!c->seg_common) + c->nsamples_part_a[i] = b->adapt_pred_order[i]; + else + c->nsamples_part_a[i] = b->highest_pred_order; + } else { + c->bitalloc_part_a[i] = 0; + c->nsamples_part_a[i] = 0; + } + + // Unpack coding parameter for part B of segment + c->bitalloc_part_b[i] = get_bits(&s->gb, c->nabits); + + // Adjust for the linear code + if (!c->rice_code_flag[i] && c->bitalloc_part_b[i]) + c->bitalloc_part_b[i]++; + } + } + + // Unpack entropy codes + for (i = 0; i < c->nchannels; i++) { + int32_t *part_a, *part_b; + int nsamples_part_b; + + // Select index of coding parameters + k = c->seg_common ? 0 : i; + + // Slice the segment into parts A and B + part_a = b->msb_sample_buffer[i] + seg * s->nsegsamples; + part_b = part_a + c->nsamples_part_a[k]; + nsamples_part_b = s->nsegsamples - c->nsamples_part_a[k]; + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + if (!c->rice_code_flag[k]) { + // Linear codes + // Unpack all residuals of part A of segment 0 + get_linear_array(&s->gb, part_a, c->nsamples_part_a[k], + c->bitalloc_part_a[k]); + + // Unpack all residuals of part B of segment 0 and others + get_linear_array(&s->gb, part_b, nsamples_part_b, + c->bitalloc_part_b[k]); + } else { + // Rice codes + // Unpack all residuals of part A of segment 0 + get_rice_array(&s->gb, part_a, c->nsamples_part_a[k], + c->bitalloc_part_a[k]); + + if (c->bitalloc_hybrid_linear[k]) { + // Hybrid Rice codes + // Unpack the number of isolated samples + int nisosamples = get_bits(&s->gb, s->nsegsamples_log2); + + // Set all locations to 0 + memset(part_b, 0, sizeof(*part_b) * nsamples_part_b); + + // Extract the locations of isolated samples and flag by -1 + for (j = 0; j < nisosamples; j++) { + int loc = get_bits(&s->gb, s->nsegsamples_log2); + if (loc >= nsamples_part_b) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid isolated sample location\n"); + return AVERROR_INVALIDDATA; + } + part_b[loc] = -1; + } + + // Unpack all residuals of part B of segment 0 and others + for (j = 0; j < nsamples_part_b; j++) { + if (part_b[j]) + part_b[j] = get_linear(&s->gb, c->bitalloc_hybrid_linear[k]); + else + part_b[j] = get_rice(&s->gb, c->bitalloc_part_b[k]); + } + } else { + // Rice codes + // Unpack all residuals of part B of segment 0 and others + get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]); + } + } + } + + // Unpack decimator history for frequency band 1 + if (seg == 0 && band == 1) { + int nbits = get_bits(&s->gb, 5) + 1; + for (i = 0; i < c->nchannels; i++) + for (j = 1; j < DCA_XLL_DECI_HISTORY_MAX; j++) + c->deci_history[i][j] = get_sbits_long(&s->gb, nbits); + } + + // Start unpacking LSB portion of the segment + if (b->lsb_section_size) { + // Skip to the start of LSB portion + if (ff_dca_seek_bits(&s->gb, band_data_end - b->lsb_section_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL band data\n"); + return AVERROR_INVALIDDATA; + } + + // Unpack all LSB parts of residuals of this segment + for (i = 0; i < c->nchannels; i++) { + if (b->nscalablelsbs[i]) { + get_array(&s->gb, + b->lsb_sample_buffer[i] + seg * s->nsegsamples, + s->nsegsamples, b->nscalablelsbs[i]); + } + } + } + + // Skip to the end of band data + if (ff_dca_seek_bits(&s->gb, band_data_end)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL band data\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static void av_cold chs_clear_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band, int seg) +{ + DCAXllBand *b = &c->bands[band]; + int i, offset, nsamples; + + if (seg < 0) { + offset = 0; + nsamples = s->nframesamples; + } else { + offset = seg * s->nsegsamples; + nsamples = s->nsegsamples; + } + + for (i = 0; i < c->nchannels; i++) { + memset(b->msb_sample_buffer[i] + offset, 0, nsamples * sizeof(int32_t)); + if (b->lsb_section_size) + memset(b->lsb_sample_buffer[i] + offset, 0, nsamples * sizeof(int32_t)); + } + + if (seg <= 0 && band) + memset(c->deci_history, 0, sizeof(c->deci_history)); + + if (seg < 0) { + memset(b->nscalablelsbs, 0, sizeof(b->nscalablelsbs)); + memset(b->bit_width_adjust, 0, sizeof(b->bit_width_adjust)); + } +} + +static void chs_filter_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band) +{ + DCAXllBand *b = &c->bands[band]; + int nsamples = s->nframesamples; + int i, j, k; + + // Inverse adaptive or fixed prediction + for (i = 0; i < c->nchannels; i++) { + int32_t *buf = b->msb_sample_buffer[i]; + int order = b->adapt_pred_order[i]; + if (order > 0) { + int coeff[DCA_XLL_ADAPT_PRED_ORDER_MAX]; + // Conversion from reflection coefficients to direct form coefficients + for (j = 0; j < order; j++) { + int rc = b->adapt_refl_coeff[i][j]; + for (k = 0; k < (j + 1) / 2; k++) { + int tmp1 = coeff[ k ]; + int tmp2 = coeff[j - k - 1]; + coeff[ k ] = tmp1 + mul16(rc, tmp2); + coeff[j - k - 1] = tmp2 + mul16(rc, tmp1); + } + coeff[j] = rc; + } + // Inverse adaptive prediction + for (j = 0; j < nsamples - order; j++) { + int64_t err = 0; + for (k = 0; k < order; k++) + err += (int64_t)buf[j + k] * coeff[order - k - 1]; + buf[j + k] -= clip23(norm16(err)); + } + } else { + // Inverse fixed coefficient prediction + for (j = 0; j < b->fixed_pred_order[i]; j++) + for (k = 1; k < nsamples; k++) + buf[k] += buf[k - 1]; + } + } + + // Inverse pairwise channel decorrellation + if (b->decor_enabled) { + int32_t *tmp[DCA_XLL_CHANNELS_MAX]; + + for (i = 0; i < c->nchannels / 2; i++) { + int coeff = b->decor_coeff[i]; + if (coeff) { + s->dcadsp->decor(b->msb_sample_buffer[i * 2 + 1], + b->msb_sample_buffer[i * 2 ], + coeff, nsamples); + } + } + + // Reorder channel pointers to the original order + for (i = 0; i < c->nchannels; i++) + tmp[i] = b->msb_sample_buffer[i]; + + for (i = 0; i < c->nchannels; i++) + b->msb_sample_buffer[b->orig_order[i]] = tmp[i]; + } + + // Map output channel pointers for frequency band 0 + if (c->nfreqbands == 1) + for (i = 0; i < c->nchannels; i++) + s->output_samples[c->ch_remap[i]] = b->msb_sample_buffer[i]; +} + +static int chs_get_lsb_width(DCAXllDecoder *s, DCAXllChSet *c, int band, int ch) +{ + int adj = c->bands[band].bit_width_adjust[ch]; + int shift = c->bands[band].nscalablelsbs[ch]; + + if (s->fixed_lsb_width) + shift = s->fixed_lsb_width; + else if (shift && adj) + shift += adj - 1; + else + shift += adj; + + return shift; +} + +static void chs_assemble_msbs_lsbs(DCAXllDecoder *s, DCAXllChSet *c, int band) +{ + DCAXllBand *b = &c->bands[band]; + int n, ch, nsamples = s->nframesamples; + + for (ch = 0; ch < c->nchannels; ch++) { + int shift = chs_get_lsb_width(s, c, band, ch); + if (shift) { + int32_t *msb = b->msb_sample_buffer[ch]; + if (b->nscalablelsbs[ch]) { + int32_t *lsb = b->lsb_sample_buffer[ch]; + int adj = b->bit_width_adjust[ch]; + for (n = 0; n < nsamples; n++) + msb[n] = msb[n] * (1 << shift) + (lsb[n] << adj); + } else { + for (n = 0; n < nsamples; n++) + msb[n] = msb[n] * (1 << shift); + } + } + } +} + +static int chs_assemble_freq_bands(DCAXllDecoder *s, DCAXllChSet *c) +{ + int ch, nsamples = s->nframesamples; + int32_t *ptr; + + av_assert1(c->nfreqbands > 1); + + // Reallocate frequency band assembly buffer + av_fast_malloc(&c->sample_buffer[2], &c->sample_size[2], + 2 * nsamples * c->nchannels * sizeof(int32_t)); + if (!c->sample_buffer[2]) + return AVERROR(ENOMEM); + + // Assemble frequency bands 0 and 1 + ptr = c->sample_buffer[2]; + for (ch = 0; ch < c->nchannels; ch++) { + int32_t *band0 = c->bands[0].msb_sample_buffer[ch]; + int32_t *band1 = c->bands[1].msb_sample_buffer[ch]; + + // Copy decimator history + memcpy(band0 - DCA_XLL_DECI_HISTORY_MAX, + c->deci_history[ch], sizeof(c->deci_history[0])); + + // Filter + s->dcadsp->assemble_freq_bands(ptr, band0, band1, + ff_dca_xll_band_coeff, + nsamples); + + // Remap output channel pointer to assembly buffer + s->output_samples[c->ch_remap[ch]] = ptr; + ptr += nsamples * 2; + } + + return 0; +} + +static int parse_common_header(DCAXllDecoder *s) +{ + int stream_ver, header_size, frame_size_nbits, nframesegs_log2; + + // XLL extension sync word + if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XLL) { + av_log(s->avctx, AV_LOG_VERBOSE, "Invalid XLL sync word\n"); + return AVERROR(EAGAIN); + } + + // Version number + stream_ver = get_bits(&s->gb, 4) + 1; + if (stream_ver > 1) { + avpriv_request_sample(s->avctx, "XLL stream version %d", stream_ver); + return AVERROR_PATCHWELCOME; + } + + // Lossless frame header length + header_size = get_bits(&s->gb, 8) + 1; + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, 32, header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL common header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Number of bits used to read frame size + frame_size_nbits = get_bits(&s->gb, 5) + 1; + + // Number of bytes in a lossless frame + s->frame_size = get_bits_long(&s->gb, frame_size_nbits); + if (s->frame_size < 0 || s->frame_size >= DCA_XLL_PBR_BUFFER_MAX) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL frame size (%d bytes)\n", s->frame_size); + return AVERROR_INVALIDDATA; + } + s->frame_size++; + + // Number of channels sets per frame + s->nchsets = get_bits(&s->gb, 4) + 1; + if (s->nchsets > DCA_XLL_CHSETS_MAX) { + avpriv_request_sample(s->avctx, "%d XLL channel sets", s->nchsets); + return AVERROR_PATCHWELCOME; + } + + // Number of segments per frame + nframesegs_log2 = get_bits(&s->gb, 4); + s->nframesegs = 1 << nframesegs_log2; + if (s->nframesegs > 1024) { + av_log(s->avctx, AV_LOG_ERROR, "Too many segments per XLL frame\n"); + return AVERROR_INVALIDDATA; + } + + // Samples in segment per one frequency band for the first channel set + // Maximum value is 256 for sampling frequencies <= 48 kHz + // Maximum value is 512 for sampling frequencies > 48 kHz + s->nsegsamples_log2 = get_bits(&s->gb, 4); + if (!s->nsegsamples_log2) { + av_log(s->avctx, AV_LOG_ERROR, "Too few samples per XLL segment\n"); + return AVERROR_INVALIDDATA; + } + s->nsegsamples = 1 << s->nsegsamples_log2; + if (s->nsegsamples > 512) { + av_log(s->avctx, AV_LOG_ERROR, "Too many samples per XLL segment\n"); + return AVERROR_INVALIDDATA; + } + + // Samples in frame per one frequency band for the first channel set + s->nframesamples_log2 = s->nsegsamples_log2 + nframesegs_log2; + s->nframesamples = 1 << s->nframesamples_log2; + if (s->nframesamples > 65536) { + av_log(s->avctx, AV_LOG_ERROR, "Too many samples per XLL frame\n"); + return AVERROR_INVALIDDATA; + } + + // Number of bits used to read segment size + s->seg_size_nbits = get_bits(&s->gb, 5) + 1; + + // Presence of CRC16 within each frequency band + // 0 - No CRC16 within band + // 1 - CRC16 placed at the end of MSB0 + // 2 - CRC16 placed at the end of MSB0 and LSB0 + // 3 - CRC16 placed at the end of MSB0 and LSB0 and other frequency bands + s->band_crc_present = get_bits(&s->gb, 2); + + // MSB/LSB split flag + s->scalable_lsbs = get_bits1(&s->gb); + + // Channel position mask + s->ch_mask_nbits = get_bits(&s->gb, 5) + 1; + + // Fixed LSB width + if (s->scalable_lsbs) + s->fixed_lsb_width = get_bits(&s->gb, 4); + else + s->fixed_lsb_width = 0; + + // Reserved + // Byte align + // Header CRC16 protection + if (ff_dca_seek_bits(&s->gb, header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL common header\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int is_hier_dmix_chset(DCAXllChSet *c) +{ + return !c->primary_chset && c->dmix_embedded && c->hier_chset; +} + +static DCAXllChSet *find_next_hier_dmix_chset(DCAXllDecoder *s, DCAXllChSet *c) +{ + if (c->hier_chset) + while (++c < &s->chset[s->nchsets]) + if (is_hier_dmix_chset(c)) + return c; + + return NULL; +} + +static void prescale_down_mix(DCAXllChSet *c, DCAXllChSet *o) +{ + int i, j, *coeff_ptr = c->dmix_coeff; + + for (i = 0; i < c->hier_ofs; i++) { + int scale = o->dmix_scale[i]; + int scale_inv = o->dmix_scale_inv[i]; + c->dmix_scale[i] = mul15(c->dmix_scale[i], scale); + c->dmix_scale_inv[i] = mul16(c->dmix_scale_inv[i], scale_inv); + for (j = 0; j < c->nchannels; j++) { + int coeff = mul16(*coeff_ptr, scale_inv); + *coeff_ptr++ = mul15(coeff, o->dmix_scale[c->hier_ofs + j]); + } + } +} + +static int parse_sub_headers(DCAXllDecoder *s, DCAExssAsset *asset) +{ + DCAContext *dca = s->avctx->priv_data; + DCAXllChSet *c; + int i, ret; + + // Parse channel set headers + s->nfreqbands = 0; + s->nchannels = 0; + s->nreschsets = 0; + for (i = 0, c = s->chset; i < s->nchsets; i++, c++) { + c->hier_ofs = s->nchannels; + if ((ret = chs_parse_header(s, c, asset)) < 0) + return ret; + if (c->nfreqbands > s->nfreqbands) + s->nfreqbands = c->nfreqbands; + if (c->hier_chset) + s->nchannels += c->nchannels; + if (c->residual_encode != (1 << c->nchannels) - 1) + s->nreschsets++; + } + + // Pre-scale downmixing coefficients for all non-primary channel sets + for (i = s->nchsets - 1, c = &s->chset[i]; i > 0; i--, c--) { + if (is_hier_dmix_chset(c)) { + DCAXllChSet *o = find_next_hier_dmix_chset(s, c); + if (o) + prescale_down_mix(c, o); + } + } + + // Determine number of active channel sets to decode + switch (dca->request_channel_layout) { + case DCA_SPEAKER_LAYOUT_STEREO: + s->nactivechsets = 1; + break; + case DCA_SPEAKER_LAYOUT_5POINT0: + case DCA_SPEAKER_LAYOUT_5POINT1: + s->nactivechsets = (s->chset[0].nchannels < 5 && s->nchsets > 1) ? 2 : 1; + break; + default: + s->nactivechsets = s->nchsets; + break; + } + + return 0; +} + +static int parse_navi_table(DCAXllDecoder *s) +{ + int chs, seg, band, navi_nb, navi_pos, *navi_ptr; + DCAXllChSet *c; + + // Determine size of NAVI table + navi_nb = s->nfreqbands * s->nframesegs * s->nchsets; + if (navi_nb > 1024) { + av_log(s->avctx, AV_LOG_ERROR, "Too many NAVI entries (%d)\n", navi_nb); + return AVERROR_INVALIDDATA; + } + + // Reallocate NAVI table + av_fast_malloc(&s->navi, &s->navi_size, navi_nb * sizeof(*s->navi)); + if (!s->navi) + return AVERROR(ENOMEM); + + // Parse NAVI + navi_pos = get_bits_count(&s->gb); + navi_ptr = s->navi; + for (band = 0; band < s->nfreqbands; band++) { + for (seg = 0; seg < s->nframesegs; seg++) { + for (chs = 0, c = s->chset; chs < s->nchsets; chs++, c++) { + int size = 0; + if (c->nfreqbands > band) { + size = get_bits_long(&s->gb, s->seg_size_nbits); + if (size < 0 || size >= s->frame_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI segment size (%d bytes)\n", size); + return AVERROR_INVALIDDATA; + } + size++; + } + *navi_ptr++ = size; + } + } + } + + // Byte align + // CRC16 + skip_bits(&s->gb, -get_bits_count(&s->gb) & 7); + skip_bits(&s->gb, 16); + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, navi_pos, get_bits_count(&s->gb))) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI checksum\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int parse_band_data(DCAXllDecoder *s) +{ + int ret, chs, seg, band, navi_pos, *navi_ptr; + DCAXllChSet *c; + + for (chs = 0, c = s->chset; chs < s->nactivechsets; chs++, c++) { + if ((ret = chs_alloc_msb_band_data(s, c)) < 0) + return ret; + if ((ret = chs_alloc_lsb_band_data(s, c)) < 0) + return ret; + } + + navi_pos = get_bits_count(&s->gb); + navi_ptr = s->navi; + for (band = 0; band < s->nfreqbands; band++) { + for (seg = 0; seg < s->nframesegs; seg++) { + for (chs = 0, c = s->chset; chs < s->nchsets; chs++, c++) { + if (c->nfreqbands > band) { + navi_pos += *navi_ptr * 8; + if (navi_pos > s->gb.size_in_bits) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI position\n"); + return AVERROR_INVALIDDATA; + } + if (chs < s->nactivechsets && + (ret = chs_parse_band_data(s, c, band, seg, navi_pos)) < 0) { + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return ret; + chs_clear_band_data(s, c, band, seg); + } + s->gb.index = navi_pos; + } + navi_ptr++; + } + } + } + + return 0; +} + +static int parse_frame(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset) +{ + int ret; + + if ((ret = init_get_bits8(&s->gb, data, size)) < 0) + return ret; + if ((ret = parse_common_header(s)) < 0) + return ret; + if ((ret = parse_sub_headers(s, asset)) < 0) + return ret; + if ((ret = parse_navi_table(s)) < 0) + return ret; + if ((ret = parse_band_data(s)) < 0) + return ret; + if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL frame\n"); + return AVERROR_INVALIDDATA; + } + return ret; +} + +static void clear_pbr(DCAXllDecoder *s) +{ + s->pbr_length = 0; + s->pbr_delay = 0; +} + +static int copy_to_pbr(DCAXllDecoder *s, uint8_t *data, int size, int delay) +{ + if (size > DCA_XLL_PBR_BUFFER_MAX) + return AVERROR(ENOSPC); + + if (!s->pbr_buffer && !(s->pbr_buffer = av_malloc(DCA_XLL_PBR_BUFFER_MAX + DCA_BUFFER_PADDING_SIZE))) + return AVERROR(ENOMEM); + + memcpy(s->pbr_buffer, data, size); + s->pbr_length = size; + s->pbr_delay = delay; + return 0; +} + +static int parse_frame_no_pbr(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset) +{ + int ret = parse_frame(s, data, size, asset); + + // If XLL packet data didn't start with a sync word, we must have jumped + // right into the middle of PBR smoothing period + if (ret == AVERROR(EAGAIN) && asset->xll_sync_present && asset->xll_sync_offset < size) { + // Skip to the next sync word in this packet + data += asset->xll_sync_offset; + size -= asset->xll_sync_offset; + + // If decoding delay is set, put the frame into PBR buffer and return + // failure code. Higher level decoder is expected to switch to lossy + // core decoding or mute its output until decoding delay expires. + if (asset->xll_delay_nframes > 0) { + if ((ret = copy_to_pbr(s, data, size, asset->xll_delay_nframes)) < 0) + return ret; + return AVERROR(EAGAIN); + } + + // No decoding delay, just parse the frame in place + ret = parse_frame(s, data, size, asset); + } + + if (ret < 0) + return ret; + + if (s->frame_size > size) + return AVERROR(EINVAL); + + // If the XLL decoder didn't consume full packet, start PBR smoothing period + if (s->frame_size < size) + if ((ret = copy_to_pbr(s, data + s->frame_size, size - s->frame_size, 0)) < 0) + return ret; + + return 0; +} + +static int parse_frame_pbr(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset) +{ + int ret; + + if (size > DCA_XLL_PBR_BUFFER_MAX - s->pbr_length) { + ret = AVERROR(ENOSPC); + goto fail; + } + + memcpy(s->pbr_buffer + s->pbr_length, data, size); + s->pbr_length += size; + + // Respect decoding delay after synchronization error + if (s->pbr_delay > 0 && --s->pbr_delay) + return AVERROR(EAGAIN); + + if ((ret = parse_frame(s, s->pbr_buffer, s->pbr_length, asset)) < 0) + goto fail; + + if (s->frame_size > s->pbr_length) { + ret = AVERROR(EINVAL); + goto fail; + } + + if (s->frame_size == s->pbr_length) { + // End of PBR smoothing period + clear_pbr(s); + } else { + s->pbr_length -= s->frame_size; + memmove(s->pbr_buffer, s->pbr_buffer + s->frame_size, s->pbr_length); + } + + return 0; + +fail: + // For now, throw out all PBR state on failure. + // Perhaps we can be smarter and try to resync somehow. + clear_pbr(s); + return ret; +} + +int ff_dca_xll_parse(DCAXllDecoder *s, uint8_t *data, DCAExssAsset *asset) +{ + int ret; + + if (s->hd_stream_id != asset->hd_stream_id) { + clear_pbr(s); + s->hd_stream_id = asset->hd_stream_id; + } + + if (s->pbr_length) + ret = parse_frame_pbr(s, data + asset->xll_offset, asset->xll_size, asset); + else + ret = parse_frame_no_pbr(s, data + asset->xll_offset, asset->xll_size, asset); + + return ret; +} + +static void undo_down_mix(DCAXllDecoder *s, DCAXllChSet *o, int band) +{ + int i, j, k, nchannels = 0, *coeff_ptr = o->dmix_coeff; + DCAXllChSet *c; + + for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) { + if (!c->hier_chset) + continue; + + av_assert1(band < c->nfreqbands); + for (j = 0; j < c->nchannels; j++) { + for (k = 0; k < o->nchannels; k++) { + int coeff = *coeff_ptr++; + if (coeff) { + s->dcadsp->dmix_sub(c->bands[band].msb_sample_buffer[j], + o->bands[band].msb_sample_buffer[k], + coeff, s->nframesamples); + if (band) + s->dcadsp->dmix_sub(c->deci_history[j], + o->deci_history[k], + coeff, DCA_XLL_DECI_HISTORY_MAX); + } + } + } + + nchannels += c->nchannels; + if (nchannels >= o->hier_ofs) + break; + } +} + +static void scale_down_mix(DCAXllDecoder *s, DCAXllChSet *o, int band) +{ + int i, j, nchannels = 0; + DCAXllChSet *c; + + for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) { + if (!c->hier_chset) + continue; + + av_assert1(band < c->nfreqbands); + for (j = 0; j < c->nchannels; j++) { + int scale = o->dmix_scale[nchannels++]; + if (scale != (1 << 15)) { + s->dcadsp->dmix_scale(c->bands[band].msb_sample_buffer[j], + scale, s->nframesamples); + if (band) + s->dcadsp->dmix_scale(c->deci_history[j], + scale, DCA_XLL_DECI_HISTORY_MAX); + } + } + + if (nchannels >= o->hier_ofs) + break; + } +} + +// Clear all band data and replace non-residual encoded channels with lossy +// counterparts +static void av_cold force_lossy_output(DCAXllDecoder *s, DCAXllChSet *c) +{ + DCAContext *dca = s->avctx->priv_data; + int band, ch; + + for (band = 0; band < c->nfreqbands; band++) + chs_clear_band_data(s, c, band, -1); + + for (ch = 0; ch < c->nchannels; ch++) { + if (!(c->residual_encode & (1 << ch))) + continue; + if (ff_dca_core_map_spkr(&dca->core, c->ch_remap[ch]) < 0) + continue; + c->residual_encode &= ~(1 << ch); + } +} + +static int combine_residual_frame(DCAXllDecoder *s, DCAXllChSet *c) +{ + DCAContext *dca = s->avctx->priv_data; + int ch, nsamples = s->nframesamples; + DCAXllChSet *o; + + // Verify that core is compatible + if (!(dca->packet & DCA_PACKET_CORE)) { + av_log(s->avctx, AV_LOG_ERROR, "Residual encoded channels are present without core\n"); + return AVERROR(EINVAL); + } + + if (c->freq != dca->core.output_rate) { + av_log(s->avctx, AV_LOG_WARNING, "Sample rate mismatch between core (%d Hz) and XLL (%d Hz)\n", dca->core.output_rate, c->freq); + return AVERROR_INVALIDDATA; + } + + if (nsamples != dca->core.npcmsamples) { + av_log(s->avctx, AV_LOG_WARNING, "Number of samples per frame mismatch between core (%d) and XLL (%d)\n", dca->core.npcmsamples, nsamples); + return AVERROR_INVALIDDATA; + } + + // See if this channel set is downmixed and find the next channel set in + // hierarchy. If downmixed, undo core pre-scaling before combining with + // residual (residual is not scaled). + o = find_next_hier_dmix_chset(s, c); + + // Reduce core bit width and combine with residual + for (ch = 0; ch < c->nchannels; ch++) { + int n, spkr, shift, round; + int32_t *src, *dst; + + if (c->residual_encode & (1 << ch)) + continue; + + // Map this channel to core speaker + spkr = ff_dca_core_map_spkr(&dca->core, c->ch_remap[ch]); + if (spkr < 0) { + av_log(s->avctx, AV_LOG_WARNING, "Residual encoded channel (%d) references unavailable core channel\n", c->ch_remap[ch]); + return AVERROR_INVALIDDATA; + } + + // Account for LSB width + shift = 24 - c->pcm_bit_res + chs_get_lsb_width(s, c, 0, ch); + if (shift > 24) { + av_log(s->avctx, AV_LOG_WARNING, "Invalid core shift (%d bits)\n", shift); + return AVERROR_INVALIDDATA; + } + + round = shift > 0 ? 1 << (shift - 1) : 0; + + src = dca->core.output_samples[spkr]; + dst = c->bands[0].msb_sample_buffer[ch]; + if (o) { + // Undo embedded core downmix pre-scaling + int scale_inv = o->dmix_scale_inv[c->hier_ofs + ch]; + for (n = 0; n < nsamples; n++) + dst[n] += clip23((mul16(src[n], scale_inv) + round) >> shift); + } else { + // No downmix scaling + for (n = 0; n < nsamples; n++) + dst[n] += (src[n] + round) >> shift; + } + } + + return 0; +} + +int ff_dca_xll_filter_frame(DCAXllDecoder *s, AVFrame *frame) +{ + AVCodecContext *avctx = s->avctx; + DCAContext *dca = avctx->priv_data; + DCAExssAsset *asset = &dca->exss.assets[0]; + DCAXllChSet *p = &s->chset[0], *c; + enum AVMatrixEncoding matrix_encoding = AV_MATRIX_ENCODING_NONE; + int i, j, k, ret, shift, nsamples, request_mask; + int ch_remap[DCA_SPEAKER_COUNT]; + + // Force lossy downmixed output during recovery + if (dca->packet & DCA_PACKET_RECOVERY) { + for (i = 0, c = s->chset; i < s->nchsets; i++, c++) { + if (i < s->nactivechsets) + force_lossy_output(s, c); + + if (!c->primary_chset) + c->dmix_embedded = 0; + } + + s->scalable_lsbs = 0; + s->fixed_lsb_width = 0; + } + + // Filter frequency bands for active channel sets + s->output_mask = 0; + for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) { + chs_filter_band_data(s, c, 0); + + if (c->residual_encode != (1 << c->nchannels) - 1 + && (ret = combine_residual_frame(s, c)) < 0) + return ret; + + if (s->scalable_lsbs) + chs_assemble_msbs_lsbs(s, c, 0); + + if (c->nfreqbands > 1) { + chs_filter_band_data(s, c, 1); + chs_assemble_msbs_lsbs(s, c, 1); + } + + s->output_mask |= c->ch_mask; + } + + // Undo hierarchial downmix and/or apply scaling + for (i = 1, c = &s->chset[1]; i < s->nchsets; i++, c++) { + if (!is_hier_dmix_chset(c)) + continue; + + if (i >= s->nactivechsets) { + for (j = 0; j < c->nfreqbands; j++) + if (c->bands[j].dmix_embedded) + scale_down_mix(s, c, j); + break; + } + + for (j = 0; j < c->nfreqbands; j++) + if (c->bands[j].dmix_embedded) + undo_down_mix(s, c, j); + } + + // Assemble frequency bands for active channel sets + if (s->nfreqbands > 1) { + for (i = 0; i < s->nactivechsets; i++) + if ((ret = chs_assemble_freq_bands(s, &s->chset[i])) < 0) + return ret; + } + + // Normalize to regular 5.1 layout if downmixing + if (dca->request_channel_layout) { + if (s->output_mask & DCA_SPEAKER_MASK_Lss) { + s->output_samples[DCA_SPEAKER_Ls] = s->output_samples[DCA_SPEAKER_Lss]; + s->output_mask = (s->output_mask & ~DCA_SPEAKER_MASK_Lss) | DCA_SPEAKER_MASK_Ls; + } + if (s->output_mask & DCA_SPEAKER_MASK_Rss) { + s->output_samples[DCA_SPEAKER_Rs] = s->output_samples[DCA_SPEAKER_Rss]; + s->output_mask = (s->output_mask & ~DCA_SPEAKER_MASK_Rss) | DCA_SPEAKER_MASK_Rs; + } + } + + // Handle downmixing to stereo request + if (dca->request_channel_layout == DCA_SPEAKER_LAYOUT_STEREO + && DCA_HAS_STEREO(s->output_mask) && p->dmix_embedded + && (p->dmix_type == DCA_DMIX_TYPE_LoRo || + p->dmix_type == DCA_DMIX_TYPE_LtRt)) + request_mask = DCA_SPEAKER_LAYOUT_STEREO; + else + request_mask = s->output_mask; + if (!ff_dca_set_channel_layout(avctx, ch_remap, request_mask)) + return AVERROR(EINVAL); + + avctx->sample_rate = p->freq << (s->nfreqbands - 1); + + switch (p->storage_bit_res) { + case 16: + avctx->sample_fmt = AV_SAMPLE_FMT_S16P; + break; + case 24: + avctx->sample_fmt = AV_SAMPLE_FMT_S32P; + break; + default: + return AVERROR(EINVAL); + } + + avctx->bits_per_raw_sample = p->storage_bit_res; + avctx->profile = FF_PROFILE_DTS_HD_MA; + avctx->bit_rate = 0; + + frame->nb_samples = nsamples = s->nframesamples << (s->nfreqbands - 1); + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + + // Downmix primary channel set to stereo + if (request_mask != s->output_mask) { + ff_dca_downmix_to_stereo_fixed(s->dcadsp, s->output_samples, + p->dmix_coeff, nsamples, + s->output_mask); + } + + shift = p->storage_bit_res - p->pcm_bit_res; + for (i = 0; i < avctx->channels; i++) { + int32_t *samples = s->output_samples[ch_remap[i]]; + if (frame->format == AV_SAMPLE_FMT_S16P) { + int16_t *plane = (int16_t *)frame->extended_data[i]; + for (k = 0; k < nsamples; k++) + plane[k] = av_clip_int16(samples[k] * (1 << shift)); + } else { + int32_t *plane = (int32_t *)frame->extended_data[i]; + for (k = 0; k < nsamples; k++) + plane[k] = clip23(samples[k] * (1 << shift)) * (1 << 8); + } + } + + if (!asset->one_to_one_map_ch_to_spkr) { + if (asset->representation_type == DCA_REPR_TYPE_LtRt) + matrix_encoding = AV_MATRIX_ENCODING_DOLBY; + else if (asset->representation_type == DCA_REPR_TYPE_LhRh) + matrix_encoding = AV_MATRIX_ENCODING_DOLBYHEADPHONE; + } else if (request_mask != s->output_mask && p->dmix_type == DCA_DMIX_TYPE_LtRt) { + matrix_encoding = AV_MATRIX_ENCODING_DOLBY; + } + if ((ret = ff_side_data_update_matrix_encoding(frame, matrix_encoding)) < 0) + return ret; + + return 0; +} + +av_cold void ff_dca_xll_flush(DCAXllDecoder *s) +{ + clear_pbr(s); +} + +av_cold void ff_dca_xll_close(DCAXllDecoder *s) +{ + DCAXllChSet *c; + int i, j; + + for (i = 0, c = s->chset; i < DCA_XLL_CHSETS_MAX; i++, c++) { + for (j = 0; j < DCA_XLL_SAMPLE_BUFFERS_MAX; j++) { + av_freep(&c->sample_buffer[j]); + c->sample_size[j] = 0; + } + } + + av_freep(&s->navi); + s->navi_size = 0; + + av_freep(&s->pbr_buffer); + clear_pbr(s); +} diff --git a/libavcodec/dca_xll.h b/libavcodec/dca_xll.h new file mode 100644 index 0000000000..bc0aa65b1a --- /dev/null +++ b/libavcodec/dca_xll.h @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCA_XLL_H +#define AVCODEC_DCA_XLL_H + +#include "libavutil/common.h" +#include "libavutil/mem.h" + +#include "avcodec.h" +#include "internal.h" +#include "get_bits.h" +#include "dca.h" +#include "dcadsp.h" +#include "dca_exss.h" + +#define DCA_XLL_CHSETS_MAX 3 +#define DCA_XLL_CHANNELS_MAX 8 +#define DCA_XLL_BANDS_MAX 2 +#define DCA_XLL_ADAPT_PRED_ORDER_MAX 16 +#define DCA_XLL_DECI_HISTORY_MAX 8 +#define DCA_XLL_DMIX_SCALES_MAX ((DCA_XLL_CHSETS_MAX - 1) * DCA_XLL_CHANNELS_MAX) +#define DCA_XLL_DMIX_COEFFS_MAX (DCA_XLL_DMIX_SCALES_MAX * DCA_XLL_CHANNELS_MAX) +#define DCA_XLL_PBR_BUFFER_MAX (240 << 10) +#define DCA_XLL_SAMPLE_BUFFERS_MAX 3 + +typedef struct DCAXllBand { + int decor_enabled; ///< Pairwise channel decorrelation flag + int orig_order[DCA_XLL_CHANNELS_MAX]; ///< Original channel order + int decor_coeff[DCA_XLL_CHANNELS_MAX / 2]; ///< Pairwise channel coefficients + + int adapt_pred_order[DCA_XLL_CHANNELS_MAX]; ///< Adaptive predictor order + int highest_pred_order; ///< Highest adaptive predictor order + int fixed_pred_order[DCA_XLL_CHANNELS_MAX]; ///< Fixed predictor order + int adapt_refl_coeff[DCA_XLL_CHANNELS_MAX][DCA_XLL_ADAPT_PRED_ORDER_MAX]; ///< Adaptive predictor reflection coefficients + + int dmix_embedded; ///< Downmix performed by encoder in frequency band + + int lsb_section_size; ///< Size of LSB section in any segment + int nscalablelsbs[DCA_XLL_CHANNELS_MAX]; ///< Number of bits to represent the samples in LSB part + int bit_width_adjust[DCA_XLL_CHANNELS_MAX]; ///< Number of bits discarded by authoring + + int32_t *msb_sample_buffer[DCA_XLL_CHANNELS_MAX]; ///< MSB sample buffer pointers + int32_t *lsb_sample_buffer[DCA_XLL_CHANNELS_MAX]; ///< LSB sample buffer pointers or NULL +} DCAXllBand; + +typedef struct DCAXllChSet { + // Channel set header + int nchannels; ///< Number of channels in the channel set (N) + int residual_encode; ///< Residual encoding mask (0 - residual, 1 - full channel) + int pcm_bit_res; ///< PCM bit resolution (variable) + int storage_bit_res; ///< Storage bit resolution (16 or 24) + int freq; ///< Original sampling frequency (max. 96000 Hz) + + int primary_chset; ///< Primary channel set flag + int dmix_coeffs_present; ///< Downmix coefficients present in stream + int dmix_embedded; ///< Downmix already performed by encoder + int dmix_type; ///< Primary channel set downmix type + int hier_chset; ///< Whether the channel set is part of a hierarchy + int hier_ofs; ///< Number of preceding channels in a hierarchy (M) + int dmix_coeff[DCA_XLL_DMIX_COEFFS_MAX]; ///< Downmixing coefficients + int dmix_scale[DCA_XLL_DMIX_SCALES_MAX]; ///< Downmixing scales + int dmix_scale_inv[DCA_XLL_DMIX_SCALES_MAX]; ///< Inverse downmixing scales + int ch_mask; ///< Channel mask for set + int ch_remap[DCA_XLL_CHANNELS_MAX]; ///< Channel to speaker map + + int nfreqbands; ///< Number of frequency bands (1 or 2) + int nabits; ///< Number of bits to read bit allocation coding parameter + + DCAXllBand bands[DCA_XLL_BANDS_MAX]; ///< Frequency bands + + // Frequency band coding parameters + int seg_common; ///< Segment type + int rice_code_flag[DCA_XLL_CHANNELS_MAX]; ///< Rice coding flag + int bitalloc_hybrid_linear[DCA_XLL_CHANNELS_MAX]; ///< Binary code length for isolated samples + int bitalloc_part_a[DCA_XLL_CHANNELS_MAX]; ///< Coding parameter for part A of segment + int bitalloc_part_b[DCA_XLL_CHANNELS_MAX]; ///< Coding parameter for part B of segment + int nsamples_part_a[DCA_XLL_CHANNELS_MAX]; ///< Number of samples in part A of segment + + // Decimator history + DECLARE_ALIGNED(32, int32_t, deci_history)[DCA_XLL_CHANNELS_MAX][DCA_XLL_DECI_HISTORY_MAX]; ///< Decimator history for frequency band 1 + + // Sample buffers + unsigned int sample_size[DCA_XLL_SAMPLE_BUFFERS_MAX]; + int32_t *sample_buffer[DCA_XLL_SAMPLE_BUFFERS_MAX]; +} DCAXllChSet; + +typedef struct DCAXllDecoder { + AVCodecContext *avctx; + GetBitContext gb; + + int frame_size; ///< Number of bytes in a lossless frame + int nchsets; ///< Number of channels sets per frame + int nframesegs; ///< Number of segments per frame + int nsegsamples_log2; ///< log2(nsegsamples) + int nsegsamples; ///< Samples in segment per one frequency band + int nframesamples_log2; ///< log2(nframesamples) + int nframesamples; ///< Samples in frame per one frequency band + int seg_size_nbits; ///< Number of bits used to read segment size + int band_crc_present; ///< Presence of CRC16 within each frequency band + int scalable_lsbs; ///< MSB/LSB split flag + int ch_mask_nbits; ///< Number of bits used to read channel mask + int fixed_lsb_width; ///< Fixed LSB width + + DCAXllChSet chset[DCA_XLL_CHSETS_MAX]; ///< Channel sets + + int *navi; ///< NAVI table + unsigned int navi_size; + + int nfreqbands; ///< Highest number of frequency bands + int nchannels; ///< Total number of channels in a hierarchy + int nreschsets; ///< Number of channel sets that have residual encoded channels + int nactivechsets; ///< Number of active channel sets to decode + + int hd_stream_id; ///< Previous DTS-HD stream ID for detecting changes + + uint8_t *pbr_buffer; ///< Peak bit rate (PBR) smoothing buffer + int pbr_length; ///< Length in bytes of data currently buffered + int pbr_delay; ///< Delay in frames before decoding buffered data + + DCADSPContext *dcadsp; + + int output_mask; + int32_t *output_samples[DCA_SPEAKER_COUNT]; +} DCAXllDecoder; + +int ff_dca_xll_parse(DCAXllDecoder *s, uint8_t *data, DCAExssAsset *asset); +int ff_dca_xll_filter_frame(DCAXllDecoder *s, AVFrame *frame); +av_cold void ff_dca_xll_flush(DCAXllDecoder *s); +av_cold void ff_dca_xll_close(DCAXllDecoder *s); + +#endif diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c new file mode 100644 index 0000000000..f3c397250c --- /dev/null +++ b/libavcodec/dcadec.c @@ -0,0 +1,417 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "libavutil/channel_layout.h" + +#include "dcadec.h" +#include "dcamath.h" +#include "dca_syncwords.h" +#include "profiles.h" + +#define MIN_PACKET_SIZE 16 +#define MAX_PACKET_SIZE 0x104000 + +int ff_dca_set_channel_layout(AVCodecContext *avctx, int *ch_remap, int dca_mask) +{ + static const uint8_t dca2wav_norm[28] = { + 2, 0, 1, 9, 10, 3, 8, 4, 5, 9, 10, 6, 7, 12, + 13, 14, 3, 6, 7, 11, 12, 14, 16, 15, 17, 8, 4, 5, + }; + + static const uint8_t dca2wav_wide[28] = { + 2, 0, 1, 4, 5, 3, 8, 4, 5, 9, 10, 6, 7, 12, + 13, 14, 3, 9, 10, 11, 12, 14, 16, 15, 17, 8, 4, 5, + }; + + int dca_ch, wav_ch, nchannels = 0; + + if (avctx->request_channel_layout & AV_CH_LAYOUT_NATIVE) { + for (dca_ch = 0; dca_ch < DCA_SPEAKER_COUNT; dca_ch++) + if (dca_mask & (1U << dca_ch)) + ch_remap[nchannels++] = dca_ch; + avctx->channel_layout = dca_mask; + } else { + int wav_mask = 0; + int wav_map[18]; + const uint8_t *dca2wav; + if (dca_mask == DCA_SPEAKER_LAYOUT_7POINT0_WIDE || + dca_mask == DCA_SPEAKER_LAYOUT_7POINT1_WIDE) + dca2wav = dca2wav_wide; + else + dca2wav = dca2wav_norm; + for (dca_ch = 0; dca_ch < 28; dca_ch++) { + if (dca_mask & (1 << dca_ch)) { + wav_ch = dca2wav[dca_ch]; + if (!(wav_mask & (1 << wav_ch))) { + wav_map[wav_ch] = dca_ch; + wav_mask |= 1 << wav_ch; + } + } + } + for (wav_ch = 0; wav_ch < 18; wav_ch++) + if (wav_mask & (1 << wav_ch)) + ch_remap[nchannels++] = wav_map[wav_ch]; + avctx->channel_layout = wav_mask; + } + + avctx->channels = nchannels; + return nchannels; +} + +static uint16_t crc16(const uint8_t *data, int size) +{ + static const uint16_t crctab[16] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + }; + + uint16_t res = 0xffff; + int i; + + for (i = 0; i < size; i++) { + res = (res << 4) ^ crctab[(data[i] >> 4) ^ (res >> 12)]; + res = (res << 4) ^ crctab[(data[i] & 15) ^ (res >> 12)]; + } + + return res; +} + +int ff_dca_check_crc(GetBitContext *s, int p1, int p2) +{ + if (((p1 | p2) & 7) || p1 < 0 || p2 > s->size_in_bits || p2 - p1 < 16) + return -1; + if (crc16(s->buffer + p1 / 8, (p2 - p1) / 8)) + return -1; + return 0; +} + +void ff_dca_downmix_to_stereo_fixed(DCADSPContext *dcadsp, int32_t **samples, + int *coeff_l, int nsamples, int ch_mask) +{ + int pos, spkr, max_spkr = av_log2(ch_mask); + int *coeff_r = coeff_l + av_popcount(ch_mask); + + av_assert0(DCA_HAS_STEREO(ch_mask)); + + // Scale left and right channels + pos = (ch_mask & DCA_SPEAKER_MASK_C); + dcadsp->dmix_scale(samples[DCA_SPEAKER_L], coeff_l[pos ], nsamples); + dcadsp->dmix_scale(samples[DCA_SPEAKER_R], coeff_r[pos + 1], nsamples); + + // Downmix remaining channels + for (spkr = 0; spkr <= max_spkr; spkr++) { + if (!(ch_mask & (1U << spkr))) + continue; + + if (*coeff_l && spkr != DCA_SPEAKER_L) + dcadsp->dmix_add(samples[DCA_SPEAKER_L], samples[spkr], + *coeff_l, nsamples); + + if (*coeff_r && spkr != DCA_SPEAKER_R) + dcadsp->dmix_add(samples[DCA_SPEAKER_R], samples[spkr], + *coeff_r, nsamples); + + coeff_l++; + coeff_r++; + } +} + +void ff_dca_downmix_to_stereo_float(AVFloatDSPContext *fdsp, float **samples, + int *coeff_l, int nsamples, int ch_mask) +{ + int pos, spkr, max_spkr = av_log2(ch_mask); + int *coeff_r = coeff_l + av_popcount(ch_mask); + const float scale = 1.0f / (1 << 15); + + av_assert0(DCA_HAS_STEREO(ch_mask)); + + // Scale left and right channels + pos = (ch_mask & DCA_SPEAKER_MASK_C); + fdsp->vector_fmul_scalar(samples[DCA_SPEAKER_L], samples[DCA_SPEAKER_L], + coeff_l[pos ] * scale, nsamples); + fdsp->vector_fmul_scalar(samples[DCA_SPEAKER_R], samples[DCA_SPEAKER_R], + coeff_r[pos + 1] * scale, nsamples); + + // Downmix remaining channels + for (spkr = 0; spkr <= max_spkr; spkr++) { + if (!(ch_mask & (1U << spkr))) + continue; + + if (*coeff_l && spkr != DCA_SPEAKER_L) + fdsp->vector_fmac_scalar(samples[DCA_SPEAKER_L], samples[spkr], + *coeff_l * scale, nsamples); + + if (*coeff_r && spkr != DCA_SPEAKER_R) + fdsp->vector_fmac_scalar(samples[DCA_SPEAKER_R], samples[spkr], + *coeff_r * scale, nsamples); + + coeff_l++; + coeff_r++; + } +} + +static int convert_bitstream(const uint8_t *src, int src_size, uint8_t *dst, int max_size) +{ + switch (AV_RB32(src)) { + case DCA_SYNCWORD_CORE_BE: + case DCA_SYNCWORD_SUBSTREAM: + memcpy(dst, src, src_size); + return src_size; + case DCA_SYNCWORD_CORE_LE: + case DCA_SYNCWORD_CORE_14B_BE: + case DCA_SYNCWORD_CORE_14B_LE: + return avpriv_dca_convert_bitstream(src, src_size, dst, max_size); + default: + return AVERROR_INVALIDDATA; + } +} + +static int dcadec_decode_frame(AVCodecContext *avctx, void *data, + int *got_frame_ptr, AVPacket *avpkt) +{ + DCAContext *s = avctx->priv_data; + AVFrame *frame = data; + uint8_t *input = avpkt->data; + int input_size = avpkt->size; + int i, ret, prev_packet = s->packet; + + if (input_size < MIN_PACKET_SIZE || input_size > MAX_PACKET_SIZE) { + av_log(avctx, AV_LOG_ERROR, "Invalid packet size\n"); + return AVERROR_INVALIDDATA; + } + + av_fast_malloc(&s->buffer, &s->buffer_size, + FFALIGN(input_size, 4096) + DCA_BUFFER_PADDING_SIZE); + if (!s->buffer) + return AVERROR(ENOMEM); + + for (i = 0, ret = AVERROR_INVALIDDATA; i < input_size - MIN_PACKET_SIZE + 1 && ret < 0; i++) + ret = convert_bitstream(input + i, input_size - i, s->buffer, s->buffer_size); + + if (ret < 0) + return ret; + + input = s->buffer; + input_size = ret; + + s->packet = 0; + + // Parse backward compatible core sub-stream + if (AV_RB32(input) == DCA_SYNCWORD_CORE_BE) { + int frame_size; + + if ((ret = ff_dca_core_parse(&s->core, input, input_size)) < 0) { + s->core_residual_valid = 0; + return ret; + } + + s->packet |= DCA_PACKET_CORE; + + // EXXS data must be aligned on 4-byte boundary + frame_size = FFALIGN(s->core.frame_size, 4); + if (input_size - 4 > frame_size) { + input += frame_size; + input_size -= frame_size; + } + } + + if (!s->core_only) { + DCAExssAsset *asset = NULL; + + // Parse extension sub-stream (EXSS) + if (AV_RB32(input) == DCA_SYNCWORD_SUBSTREAM) { + if ((ret = ff_dca_exss_parse(&s->exss, input, input_size)) < 0) { + if (avctx->err_recognition & AV_EF_EXPLODE) + return ret; + } else { + s->packet |= DCA_PACKET_EXSS; + asset = &s->exss.assets[0]; + } + } + + // Parse XLL component in EXSS + if (asset && (asset->extension_mask & DCA_EXSS_XLL)) { + if ((ret = ff_dca_xll_parse(&s->xll, input, asset)) < 0) { + // Conceal XLL synchronization error + if (ret == AVERROR(EAGAIN) + && (prev_packet & DCA_PACKET_XLL) + && (s->packet & DCA_PACKET_CORE)) + s->packet |= DCA_PACKET_XLL | DCA_PACKET_RECOVERY; + else if (ret == AVERROR(ENOMEM) || (avctx->err_recognition & AV_EF_EXPLODE)) + return ret; + } else { + s->packet |= DCA_PACKET_XLL; + } + } + + // Parse core extensions in EXSS or backward compatible core sub-stream + if ((s->packet & DCA_PACKET_CORE) + && (ret = ff_dca_core_parse_exss(&s->core, input, asset)) < 0) + return ret; + } + + // Filter the frame + if (s->packet & DCA_PACKET_XLL) { + if (s->packet & DCA_PACKET_CORE) { + int x96_synth = -1; + + // Enable X96 synthesis if needed + if (s->xll.chset[0].freq == 96000 && s->core.sample_rate == 48000) + x96_synth = 1; + + if ((ret = ff_dca_core_filter_fixed(&s->core, x96_synth)) < 0) { + s->core_residual_valid = 0; + return ret; + } + + // Force lossy downmixed output on the first core frame filtered. + // This prevents audible clicks when seeking and is consistent with + // what reference decoder does when there are multiple channel sets. + if (!s->core_residual_valid) { + if (s->xll.nreschsets > 0 && s->xll.nchsets > 1) + s->packet |= DCA_PACKET_RECOVERY; + s->core_residual_valid = 1; + } + } + + if ((ret = ff_dca_xll_filter_frame(&s->xll, frame)) < 0) { + // Fall back to core unless hard error + if (!(s->packet & DCA_PACKET_CORE)) + return ret; + if (ret != AVERROR_INVALIDDATA || (avctx->err_recognition & AV_EF_EXPLODE)) + return ret; + if ((ret = ff_dca_core_filter_frame(&s->core, frame)) < 0) { + s->core_residual_valid = 0; + return ret; + } + } + } else if (s->packet & DCA_PACKET_CORE) { + if ((ret = ff_dca_core_filter_frame(&s->core, frame)) < 0) { + s->core_residual_valid = 0; + return ret; + } + s->core_residual_valid = !!(s->core.filter_mode & DCA_FILTER_MODE_FIXED); + } else { + return AVERROR_INVALIDDATA; + } + + *got_frame_ptr = 1; + + return avpkt->size; +} + +static av_cold void dcadec_flush(AVCodecContext *avctx) +{ + DCAContext *s = avctx->priv_data; + + ff_dca_core_flush(&s->core); + ff_dca_xll_flush(&s->xll); + + s->core_residual_valid = 0; +} + +static av_cold int dcadec_close(AVCodecContext *avctx) +{ + DCAContext *s = avctx->priv_data; + + ff_dca_core_close(&s->core); + ff_dca_xll_close(&s->xll); + + av_freep(&s->buffer); + s->buffer_size = 0; + + return 0; +} + +static av_cold int dcadec_init(AVCodecContext *avctx) +{ + DCAContext *s = avctx->priv_data; + + s->avctx = avctx; + s->core.avctx = avctx; + s->exss.avctx = avctx; + s->xll.avctx = avctx; + + if (ff_dca_core_init(&s->core) < 0) + return AVERROR(ENOMEM); + + ff_dcadsp_init(&s->dcadsp); + s->core.dcadsp = &s->dcadsp; + s->xll.dcadsp = &s->dcadsp; + + switch (avctx->request_channel_layout & ~AV_CH_LAYOUT_NATIVE) { + case 0: + s->request_channel_layout = 0; + break; + case AV_CH_LAYOUT_STEREO: + case AV_CH_LAYOUT_STEREO_DOWNMIX: + s->request_channel_layout = DCA_SPEAKER_LAYOUT_STEREO; + break; + case AV_CH_LAYOUT_5POINT0: + s->request_channel_layout = DCA_SPEAKER_LAYOUT_5POINT0; + break; + case AV_CH_LAYOUT_5POINT1: + s->request_channel_layout = DCA_SPEAKER_LAYOUT_5POINT1; + break; + default: + av_log(avctx, AV_LOG_WARNING, "Invalid request_channel_layout\n"); + break; + } + + avctx->sample_fmt = AV_SAMPLE_FMT_S32P; + avctx->bits_per_raw_sample = 24; + + return 0; +} + +#define OFFSET(x) offsetof(DCAContext, x) +#define PARAM AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM + +static const AVOption dcadec_options[] = { + { "core_only", "Decode core only without extensions", OFFSET(core_only), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, PARAM }, + { NULL } +}; + +static const AVClass dcadec_class = { + .class_name = "DCA decoder", + .item_name = av_default_item_name, + .option = dcadec_options, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_DECODER, +}; + +AVCodec ff_dca_decoder = { + .name = "dca", + .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"), + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_DTS, + .priv_data_size = sizeof(DCAContext), + .init = dcadec_init, + .decode = dcadec_decode_frame, + .close = dcadec_close, + .flush = dcadec_flush, + .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF, + .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, AV_SAMPLE_FMT_S32P, + AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE }, + .priv_class = &dcadec_class, + .profiles = NULL_IF_CONFIG_SMALL(ff_dca_profiles), + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, +}; diff --git a/libavcodec/dcadec.h b/libavcodec/dcadec.h new file mode 100644 index 0000000000..6726121d3a --- /dev/null +++ b/libavcodec/dcadec.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCADEC_H +#define AVCODEC_DCADEC_H + +#include "libavutil/common.h" +#include "libavutil/float_dsp.h" + +#include "avcodec.h" +#include "get_bits.h" +#include "dca.h" +#include "dcadsp.h" +#include "dca_core.h" +#include "dca_exss.h" +#include "dca_xll.h" + +#define DCA_BUFFER_PADDING_SIZE 1024 + +#define DCA_PACKET_CORE 0x01 +#define DCA_PACKET_EXSS 0x02 +#define DCA_PACKET_XLL 0x04 +#define DCA_PACKET_RECOVERY 0x08 + +typedef struct DCAContext { + const AVClass *class; ///< class for AVOptions + AVCodecContext *avctx; + + DCACoreDecoder core; ///< Core decoder context + DCAExssParser exss; ///< EXSS parser context + DCAXllDecoder xll; ///< XLL decoder context + + DCADSPContext dcadsp; + + uint8_t *buffer; ///< Packet buffer + unsigned int buffer_size; + + int packet; ///< Packet flags + + int core_residual_valid; ///< Core valid for residual decoding + + int request_channel_layout; ///< Converted from avctx.request_channel_layout + int core_only; ///< Core only decoding flag +} DCAContext; + +int ff_dca_set_channel_layout(AVCodecContext *avctx, int *ch_remap, int dca_mask); + +int ff_dca_check_crc(GetBitContext *s, int p1, int p2); + +void ff_dca_downmix_to_stereo_fixed(DCADSPContext *dcadsp, int32_t **samples, + int *coeff_l, int nsamples, int ch_mask); +void ff_dca_downmix_to_stereo_float(AVFloatDSPContext *fdsp, float **samples, + int *coeff_l, int nsamples, int ch_mask); + +static inline int ff_dca_seek_bits(GetBitContext *s, int p) +{ + if (p < s->index || p > s->size_in_bits) + return -1; + s->index = p; + return 0; +} + +#endif diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c new file mode 100644 index 0000000000..cee3d608b3 --- /dev/null +++ b/libavcodec/dcadsp.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/mem.h" + +#include "dcadsp.h" +#include "dcamath.h" + +static void decode_hf_c(int32_t **dst, + const int32_t *vq_index, + const int8_t hf_vq[1024][32], + int32_t scale_factors[32][2], + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len) +{ + int i, j; + + for (i = sb_start; i < sb_end; i++) { + const int8_t *coeff = hf_vq[vq_index[i]]; + int32_t scale = scale_factors[i][0]; + for (j = 0; j < len; j++) + dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4); + } +} + +static void decode_joint_c(int32_t **dst, int32_t **src, + const int32_t *scale_factors, + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len) +{ + int i, j; + + for (i = sb_start; i < sb_end; i++) { + int32_t scale = scale_factors[i]; + for (j = 0; j < len; j++) + dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale)); + } +} + +static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks, + int dec_select) +{ + // Select decimation factor + int factor = 64 << dec_select; + int ncoeffs = 8 >> dec_select; + int nlfesamples = npcmblocks >> (dec_select + 1); + int i, j, k; + + for (i = 0; i < nlfesamples; i++) { + // One decimated sample generates 64 or 128 interpolated ones + for (j = 0; j < factor / 2; j++) { + float a = 0; + float b = 0; + + for (k = 0; k < ncoeffs; k++) { + a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k]; + b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k]; + } + + pcm_samples[ j] = a; + pcm_samples[factor / 2 + j] = b; + } + + lfe_samples++; + pcm_samples += factor; + } +} + +static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks) +{ + lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0); +} + +static void lfe_fir2_float_c(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks) +{ + lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1); +} + +static void lfe_x96_float_c(float *dst, const float *src, + float *hist, intptr_t len) +{ + float prev = *hist; + int i; + + for (i = 0; i < len; i++) { + float a = 0.25f * src[i] + 0.75f * prev; + float b = 0.75f * src[i] + 0.25f * prev; + prev = src[i]; + *dst++ = a; + *dst++ = b; + } + + *hist = prev; +} + +static void sub_qmf32_float_c(SynthFilterContext *synth, + FFTContext *imdct, + float *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + float *hist1, int *offset, float *hist2, + const float *filter_coeff, intptr_t npcmblocks, + float scale) +{ + LOCAL_ALIGNED(32, float, input, [32]); + int i, j; + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + for (i = 0; i < 32; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_lo[i][j]; + else + input[i] = subband_samples_lo[i][j]; + } + + // One subband sample generates 32 interpolated ones + synth->synth_filter_float(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input, scale); + pcm_samples += 32; + } +} + +static void sub_qmf64_float_c(SynthFilterContext *synth, + FFTContext *imdct, + float *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + float *hist1, int *offset, float *hist2, + const float *filter_coeff, intptr_t npcmblocks, + float scale) +{ + LOCAL_ALIGNED(32, float, input, [64]); + int i, j; + + if (!subband_samples_hi) + memset(&input[32], 0, sizeof(input[0]) * 32); + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + if (subband_samples_hi) { + // Full 64 subbands, first 32 are residual coded + for (i = 0; i < 32; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j]; + else + input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; + } + for (i = 32; i < 64; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_hi[i][j]; + else + input[i] = subband_samples_hi[i][j]; + } + } else { + // Only first 32 subbands + for (i = 0; i < 32; i++) { + if ((i - 1) & 2) + input[i] = -subband_samples_lo[i][j]; + else + input[i] = subband_samples_lo[i][j]; + } + } + + // One subband sample generates 64 interpolated ones + synth->synth_filter_float_64(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input, scale); + pcm_samples += 64; + } +} + +static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples, + const int32_t *filter_coeff, intptr_t npcmblocks) +{ + // Select decimation factor + int nlfesamples = npcmblocks >> 1; + int i, j, k; + + for (i = 0; i < nlfesamples; i++) { + // One decimated sample generates 64 interpolated ones + for (j = 0; j < 32; j++) { + int64_t a = 0; + int64_t b = 0; + + for (k = 0; k < 8; k++) { + a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k]; + b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k]; + } + + pcm_samples[ j] = clip23(norm23(a)); + pcm_samples[32 + j] = clip23(norm23(b)); + } + + lfe_samples++; + pcm_samples += 64; + } +} + +static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src, + int32_t *hist, intptr_t len) +{ + int32_t prev = *hist; + int i; + + for (i = 0; i < len; i++) { + int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev; + int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev; + prev = src[i]; + *dst++ = clip23(norm23(a)); + *dst++ = clip23(norm23(b)); + } + + *hist = prev; +} + +static void sub_qmf32_fixed_c(SynthFilterContext *synth, + DCADCTContext *imdct, + int32_t *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + int32_t *hist1, int *offset, int32_t *hist2, + const int32_t *filter_coeff, intptr_t npcmblocks) +{ + LOCAL_ALIGNED(32, int32_t, input, [32]); + int i, j; + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + for (i = 0; i < 32; i++) + input[i] = subband_samples_lo[i][j]; + + // One subband sample generates 32 interpolated ones + synth->synth_filter_fixed(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input); + pcm_samples += 32; + } +} + +static void sub_qmf64_fixed_c(SynthFilterContext *synth, + DCADCTContext *imdct, + int32_t *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + int32_t *hist1, int *offset, int32_t *hist2, + const int32_t *filter_coeff, intptr_t npcmblocks) +{ + LOCAL_ALIGNED(32, int32_t, input, [64]); + int i, j; + + if (!subband_samples_hi) + memset(&input[32], 0, sizeof(input[0]) * 32); + + for (j = 0; j < npcmblocks; j++) { + // Load in one sample from each subband + if (subband_samples_hi) { + // Full 64 subbands, first 32 are residual coded + for (i = 0; i < 32; i++) + input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; + for (i = 32; i < 64; i++) + input[i] = subband_samples_hi[i][j]; + } else { + // Only first 32 subbands + for (i = 0; i < 32; i++) + input[i] = subband_samples_lo[i][j]; + } + + // One subband sample generates 64 interpolated ones + synth->synth_filter_fixed_64(imdct, hist1, offset, + hist2, filter_coeff, + pcm_samples, input); + pcm_samples += 64; + } +} + +static void decor_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] += src[i] * coeff + (1 << 2) >> 3; +} + +static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2, + const int32_t *src, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) { + int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */); + dst1[i] -= cs; + dst2[i] -= cs; + } +} + +static void dmix_sub_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] -= mul15(src[i], coeff); +} + +static void dmix_add_c(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] += mul15(src[i], coeff); +} + +static void dmix_scale_c(int32_t *dst, intptr_t scale, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = mul15(dst[i], scale); +} + +static void dmix_scale_inv_c(int32_t *dst, intptr_t scale_inv, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = mul16(dst[i], scale_inv); +} + +static void filter0(int32_t *dst, const int32_t *src, int32_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] -= mul22(src[i], coeff); +} + +static void filter1(int32_t *dst, const int32_t *src, int32_t coeff, intptr_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] -= mul23(src[i], coeff); +} + +static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1, + const int32_t *coeff, intptr_t len) +{ + int i; + + filter0(src0, src1, coeff[0], len); + filter0(src1, src0, coeff[1], len); + filter0(src0, src1, coeff[2], len); + filter0(src1, src0, coeff[3], len); + + for (i = 0; i < 8; i++, src0--) { + filter1(src0, src1, coeff[i + 4], len); + filter1(src1, src0, coeff[i + 12], len); + filter1(src0, src1, coeff[i + 4], len); + } + + for (i = 0; i < len; i++) { + *dst++ = *src1++; + *dst++ = *++src0; + } +} + +av_cold void ff_dcadsp_init(DCADSPContext *s) +{ + s->decode_hf = decode_hf_c; + s->decode_joint = decode_joint_c; + + s->lfe_fir_float[0] = lfe_fir1_float_c; + s->lfe_fir_float[1] = lfe_fir2_float_c; + s->lfe_x96_float = lfe_x96_float_c; + s->sub_qmf_float[0] = sub_qmf32_float_c; + s->sub_qmf_float[1] = sub_qmf64_float_c; + + s->lfe_fir_fixed = lfe_fir_fixed_c; + s->lfe_x96_fixed = lfe_x96_fixed_c; + s->sub_qmf_fixed[0] = sub_qmf32_fixed_c; + s->sub_qmf_fixed[1] = sub_qmf64_fixed_c; + + s->decor = decor_c; + + s->dmix_sub_xch = dmix_sub_xch_c; + s->dmix_sub = dmix_sub_c; + s->dmix_add = dmix_add_c; + s->dmix_scale = dmix_scale_c; + s->dmix_scale_inv = dmix_scale_inv_c; + + s->assemble_freq_bands = assemble_freq_bands_c; +} diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h new file mode 100644 index 0000000000..d8acf37ab2 --- /dev/null +++ b/libavcodec/dcadsp.h @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2016 foo86 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DCADSP_H +#define AVCODEC_DCADSP_H + +#include "libavutil/common.h" + +#include "fft.h" +#include "dcadct.h" +#include "synth_filter.h" + +typedef struct DCADSPContext { + void (*decode_hf)(int32_t **dst, + const int32_t *vq_index, + const int8_t hf_vq[1024][32], + int32_t scale_factors[32][2], + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len); + + void (*decode_joint)(int32_t **dst, int32_t **src, + const int32_t *scale_factors, + intptr_t sb_start, intptr_t sb_end, + intptr_t ofs, intptr_t len); + + void (*lfe_fir_float[2])(float *pcm_samples, int32_t *lfe_samples, + const float *filter_coeff, intptr_t npcmblocks); + + void (*lfe_x96_float)(float *dst, const float *src, + float *hist, intptr_t len); + + void (*sub_qmf_float[2])(SynthFilterContext *synth, + FFTContext *imdct, + float *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + float *hist1, int *offset, float *hist2, + const float *filter_coeff, intptr_t npcmblocks, + float scale); + + void (*lfe_fir_fixed)(int32_t *pcm_samples, int32_t *lfe_samples, + const int32_t *filter_coeff, intptr_t npcmblocks); + + void (*lfe_x96_fixed)(int32_t *dst, const int32_t *src, + int32_t *hist, intptr_t len); + + void (*sub_qmf_fixed[2])(SynthFilterContext *synth, + DCADCTContext *imdct, + int32_t *pcm_samples, + int32_t **subband_samples_lo, + int32_t **subband_samples_hi, + int32_t *hist1, int *offset, int32_t *hist2, + const int32_t *filter_coeff, intptr_t npcmblocks); + + void (*decor)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len); + + void (*dmix_sub_xch)(int32_t *dst1, int32_t *dst2, + const int32_t *src, intptr_t len); + + void (*dmix_sub)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len); + + void (*dmix_add)(int32_t *dst, const int32_t *src, intptr_t coeff, intptr_t len); + + void (*dmix_scale)(int32_t *dst, intptr_t scale, intptr_t len); + + void (*dmix_scale_inv)(int32_t *dst, intptr_t scale_inv, intptr_t len); + + void (*assemble_freq_bands)(int32_t *dst, int32_t *src0, int32_t *src1, + const int32_t *coeff, intptr_t len); +} DCADSPContext; + +av_cold void ff_dcadsp_init(DCADSPContext *s); + +#endif diff --git a/libavcodec/version.h b/libavcodec/version.h index 5740137bb8..02063c8ae7 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -30,7 +30,7 @@ #define LIBAVCODEC_VERSION_MAJOR 57 #define LIBAVCODEC_VERSION_MINOR 24 -#define LIBAVCODEC_VERSION_MICRO 100 +#define LIBAVCODEC_VERSION_MICRO 101 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index eec98cb7a0..ce06b908d4 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -44,7 +44,7 @@ OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp_init.o OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp_init.o OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp_init.o OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o -#OBJS-$(CONFIG_DCA_DECODER) += x86/synth_filter_init.o +OBJS-$(CONFIG_DCA_DECODER) += x86/synth_filter_init.o OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc_init.o OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o OBJS-$(CONFIG_JPEG2000_DECODER) += x86/jpeg2000dsp_init.o @@ -132,7 +132,7 @@ YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o YASM-OBJS-$(CONFIG_ALAC_DECODER) += x86/alacdsp.o YASM-OBJS-$(CONFIG_APNG_DECODER) += x86/pngdsp.o -#YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/synth_filter.o +YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/synth_filter.o YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o \ x86/dwt_yasm.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 14a11d64c3..07fe5bc92f 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -1,7 +1,7 @@ # libavcodec tests AVCODECOBJS-$(CONFIG_ALAC_DECODER) += alacdsp.o AVCODECOBJS-$(CONFIG_BSWAPDSP) += bswapdsp.o -#AVCODECOBJS-$(CONFIG_DCA_DECODER) += synth_filter.o +AVCODECOBJS-$(CONFIG_DCA_DECODER) += synth_filter.o AVCODECOBJS-$(CONFIG_FLACDSP) += flacdsp.o AVCODECOBJS-$(CONFIG_FMTCONVERT) += fmtconvert.o AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index f7d1331317..49fd2af379 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -71,9 +71,9 @@ static const struct { #if CONFIG_BSWAPDSP { "bswapdsp", checkasm_check_bswapdsp }, #endif -/* #if CONFIG_DCA_DECODER + #if CONFIG_DCA_DECODER { "synth_filter", checkasm_check_synth_filter }, - #endif*/ + #endif #if CONFIG_FLACDSP { "flacdsp", checkasm_check_flacdsp }, #endif diff --git a/tests/fate/acodec.mak b/tests/fate/acodec.mak index 62b1bc1f09..e0f23208e2 100644 --- a/tests/fate/acodec.mak +++ b/tests/fate/acodec.mak @@ -99,14 +99,14 @@ FATE_ACODEC-$(call ENCDEC, ALAC, MOV) += fate-acodec-alac fate-acodec-alac: FMT = mov fate-acodec-alac: CODEC = alac -compression_level 1 -#FATE_ACODEC-$(call ENCDEC, DCA, DTS) += fate-acodec-dca +FATE_ACODEC-$(call ENCDEC, DCA, DTS) += fate-acodec-dca fate-acodec-dca: tests/data/asynth-44100-2.wav fate-acodec-dca: SRC = tests/data/asynth-44100-2.wav fate-acodec-dca: CMD = md5 -i $(TARGET_PATH)/$(SRC) -c:a dca -strict -2 -f dts -flags +bitexact fate-acodec-dca: CMP = oneline fate-acodec-dca: REF = 7ffdefdf47069289990755c79387cc90 -#FATE_ACODEC-$(call ENCDEC, DCA, WAV) += fate-acodec-dca2 +FATE_ACODEC-$(call ENCDEC, DCA, WAV) += fate-acodec-dca2 fate-acodec-dca2: CMD = enc_dec_pcm dts wav s16le $(SRC) -c:a dca -strict -2 -flags +bitexact fate-acodec-dca2: REF = $(SRC) fate-acodec-dca2: CMP = stddev diff --git a/tests/fate/audio.mak b/tests/fate/audio.mak index 686b7dfad7..93c19a0320 100644 --- a/tests/fate/audio.mak +++ b/tests/fate/audio.mak @@ -21,7 +21,7 @@ fate-dca-core: CMD = pcm -i $(TARGET_SAMPLES)/dts/dts.ts fate-dca-core: CMP = oneoff fate-dca-core: REF = $(SAMPLES)/dts/dts.pcm -#FATE_SAMPLES_AUDIO-$(CONFIG_DCA_DECODER) += $(FATE_DCA-yes) +FATE_SAMPLES_AUDIO-$(CONFIG_DCA_DECODER) += $(FATE_DCA-yes) fate-dca: $(FATE_DCA-yes) FATE_SAMPLES_AUDIO-$(call DEMDEC, DSICIN, DSICINAUDIO) += fate-delphine-cin-audio @@ -31,7 +31,7 @@ FATE_SAMPLES_AUDIO-$(call DEMDEC, DSS, DSS_SP) += fate-dss-lp fate-dss-sp fate-dss-lp: CMD = framecrc -i $(TARGET_SAMPLES)/dss/lp.dss -frames 30 fate-dss-sp: CMD = framecrc -i $(TARGET_SAMPLES)/dss/sp.dss -frames 30 -#FATE_SAMPLES_AUDIO-$(call DEMDEC, DTS, DCA) += fate-dts_es +FATE_SAMPLES_AUDIO-$(call DEMDEC, DTS, DCA) += fate-dts_es fate-dts_es: CMD = pcm -i $(TARGET_SAMPLES)/dts/dts_es.dts fate-dts_es: CMP = oneoff fate-dts_es: REF = $(SAMPLES)/dts/dts_es_2.pcm |