diff options
author | James Almer <jamrial@gmail.com> | 2023-11-25 11:22:28 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2023-12-18 15:20:59 -0300 |
commit | 4ee05182b7cccfa6928dcb0a45c2b50b7d9ea39b (patch) | |
tree | b49dff7515b344514546386f04e0f0ba970439f8 /libavformat/iamf_parse.c | |
parent | 7f9308380f0f2e299ed8d2295b026cdd4ce057d2 (diff) | |
download | ffmpeg-4ee05182b7cccfa6928dcb0a45c2b50b7d9ea39b.tar.gz |
avformat: Immersive Audio Model and Formats demuxer
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavformat/iamf_parse.c')
-rw-r--r-- | libavformat/iamf_parse.c | 1105 |
1 files changed, 1105 insertions, 0 deletions
diff --git a/libavformat/iamf_parse.c b/libavformat/iamf_parse.c new file mode 100644 index 0000000000..feb670180e --- /dev/null +++ b/libavformat/iamf_parse.c @@ -0,0 +1,1105 @@ +/* + * Immersive Audio Model and Formats parsing + * Copyright (c) 2023 James Almer <jamrial@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/common.h" +#include "libavutil/iamf.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/log.h" +#include "libavcodec/get_bits.h" +#include "libavcodec/flac.h" +#include "libavcodec/mpeg4audio.h" +#include "libavcodec/put_bits.h" +#include "avio_internal.h" +#include "iamf_parse.h" +#include "isom.h" + +static int opus_decoder_config(IAMFCodecConfig *codec_config, + AVIOContext *pb, int len) +{ + int left = len - avio_tell(pb); + + if (left < 11) + return AVERROR_INVALIDDATA; + + codec_config->extradata = av_malloc(left + 8); + if (!codec_config->extradata) + return AVERROR(ENOMEM); + + AV_WB32(codec_config->extradata, MKBETAG('O','p','u','s')); + AV_WB32(codec_config->extradata + 4, MKBETAG('H','e','a','d')); + codec_config->extradata_size = avio_read(pb, codec_config->extradata + 8, left); + if (codec_config->extradata_size < left) + return AVERROR_INVALIDDATA; + + codec_config->extradata_size += 8; + codec_config->sample_rate = 48000; + + return 0; +} + +static int aac_decoder_config(IAMFCodecConfig *codec_config, + AVIOContext *pb, int len, void *logctx) +{ + MPEG4AudioConfig cfg = { 0 }; + int object_type_id, codec_id, stream_type; + int ret, tag, left; + + tag = avio_r8(pb); + if (tag != MP4DecConfigDescrTag) + return AVERROR_INVALIDDATA; + + object_type_id = avio_r8(pb); + if (object_type_id != 0x40) + return AVERROR_INVALIDDATA; + + stream_type = avio_r8(pb); + if (((stream_type >> 2) != 5) || ((stream_type >> 1) & 1)) + return AVERROR_INVALIDDATA; + + avio_skip(pb, 3); // buffer size db + avio_skip(pb, 4); // rc_max_rate + avio_skip(pb, 4); // avg bitrate + + codec_id = ff_codec_get_id(ff_mp4_obj_type, object_type_id); + if (codec_id && codec_id != codec_config->codec_id) + return AVERROR_INVALIDDATA; + + tag = avio_r8(pb); + if (tag != MP4DecSpecificDescrTag) + return AVERROR_INVALIDDATA; + + left = len - avio_tell(pb); + if (left <= 0) + return AVERROR_INVALIDDATA; + + codec_config->extradata = av_malloc(left); + if (!codec_config->extradata) + return AVERROR(ENOMEM); + + codec_config->extradata_size = avio_read(pb, codec_config->extradata, left); + if (codec_config->extradata_size < left) + return AVERROR_INVALIDDATA; + + ret = avpriv_mpeg4audio_get_config2(&cfg, codec_config->extradata, + codec_config->extradata_size, 1, logctx); + if (ret < 0) + return ret; + + codec_config->sample_rate = cfg.sample_rate; + + return 0; +} + +static int flac_decoder_config(IAMFCodecConfig *codec_config, + AVIOContext *pb, int len) +{ + int left; + + avio_skip(pb, 4); // METADATA_BLOCK_HEADER + + left = len - avio_tell(pb); + if (left < FLAC_STREAMINFO_SIZE) + return AVERROR_INVALIDDATA; + + codec_config->extradata = av_malloc(left); + if (!codec_config->extradata) + return AVERROR(ENOMEM); + + codec_config->extradata_size = avio_read(pb, codec_config->extradata, left); + if (codec_config->extradata_size < left) + return AVERROR_INVALIDDATA; + + codec_config->sample_rate = AV_RB24(codec_config->extradata + 10) >> 4; + + return 0; +} + +static int ipcm_decoder_config(IAMFCodecConfig *codec_config, + AVIOContext *pb, int len) +{ + static const enum AVSampleFormat sample_fmt[2][3] = { + { AV_CODEC_ID_PCM_S16BE, AV_CODEC_ID_PCM_S24BE, AV_CODEC_ID_PCM_S32BE }, + { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S24LE, AV_CODEC_ID_PCM_S32LE }, + }; + int sample_format = avio_r8(pb); // 0 = BE, 1 = LE + int sample_size = (avio_r8(pb) / 8 - 2); // 16, 24, 32 + if (sample_format > 1 || sample_size > 2) + return AVERROR_INVALIDDATA; + + codec_config->codec_id = sample_fmt[sample_format][sample_size]; + codec_config->sample_rate = avio_rb32(pb); + + if (len - avio_tell(pb)) + return AVERROR_INVALIDDATA; + + return 0; +} + +static int codec_config_obu(void *s, IAMFContext *c, AVIOContext *pb, int len) +{ + IAMFCodecConfig **tmp, *codec_config = NULL; + FFIOContext b; + AVIOContext *pbc; + uint8_t *buf; + enum AVCodecID avcodec_id; + unsigned codec_config_id, nb_samples, codec_id; + int16_t seek_preroll; + int ret; + + buf = av_malloc(len); + if (!buf) + return AVERROR(ENOMEM); + + ret = avio_read(pb, buf, len); + if (ret != len) { + if (ret >= 0) + ret = AVERROR_INVALIDDATA; + goto fail; + } + + ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL); + pbc = &b.pub; + + codec_config_id = ffio_read_leb(pbc); + codec_id = avio_rb32(pbc); + nb_samples = ffio_read_leb(pbc); + seek_preroll = avio_rb16(pbc); + + switch(codec_id) { + case MKBETAG('O','p','u','s'): + avcodec_id = AV_CODEC_ID_OPUS; + break; + case MKBETAG('m','p','4','a'): + avcodec_id = AV_CODEC_ID_AAC; + break; + case MKBETAG('f','L','a','C'): + avcodec_id = AV_CODEC_ID_FLAC; + break; + default: + avcodec_id = AV_CODEC_ID_NONE; + break; + } + + for (int i = 0; i < c->nb_codec_configs; i++) + if (c->codec_configs[i]->codec_config_id == codec_config_id) { + ret = AVERROR_INVALIDDATA; + goto fail; + } + + tmp = av_realloc_array(c->codec_configs, c->nb_codec_configs + 1, sizeof(*c->codec_configs)); + if (!tmp) { + ret = AVERROR(ENOMEM); + goto fail; + } + c->codec_configs = tmp; + + codec_config = av_mallocz(sizeof(*codec_config)); + if (!codec_config) { + ret = AVERROR(ENOMEM); + goto fail; + } + + codec_config->codec_config_id = codec_config_id; + codec_config->codec_id = avcodec_id; + codec_config->nb_samples = nb_samples; + codec_config->seek_preroll = seek_preroll; + + switch(codec_id) { + case MKBETAG('O','p','u','s'): + ret = opus_decoder_config(codec_config, pbc, len); + break; + case MKBETAG('m','p','4','a'): + ret = aac_decoder_config(codec_config, pbc, len, s); + break; + case MKBETAG('f','L','a','C'): + ret = flac_decoder_config(codec_config, pbc, len); + break; + case MKBETAG('i','p','c','m'): + ret = ipcm_decoder_config(codec_config, pbc, len); + break; + default: + break; + } + if (ret < 0) + goto fail; + + c->codec_configs[c->nb_codec_configs++] = codec_config; + + len -= avio_tell(pbc); + if (len) + av_log(s, AV_LOG_WARNING, "Underread in codec_config_obu. %d bytes left at the end\n", len); + + ret = 0; +fail: + av_free(buf); + if (ret < 0) { + if (codec_config) + av_free(codec_config->extradata); + av_free(codec_config); + } + return ret; +} + +static int update_extradata(AVCodecParameters *codecpar) +{ + GetBitContext gb; + PutBitContext pb; + int ret; + + switch(codecpar->codec_id) { + case AV_CODEC_ID_OPUS: + AV_WB8(codecpar->extradata + 9, codecpar->ch_layout.nb_channels); + break; + case AV_CODEC_ID_AAC: { + uint8_t buf[5]; + + init_put_bits(&pb, buf, sizeof(buf)); + ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size); + if (ret < 0) + return ret; + + ret = get_bits(&gb, 5); + put_bits(&pb, 5, ret); + if (ret == AOT_ESCAPE) // violates section 3.11.2, but better check for it + put_bits(&pb, 6, get_bits(&gb, 6)); + ret = get_bits(&gb, 4); + put_bits(&pb, 4, ret); + if (ret == 0x0f) + put_bits(&pb, 24, get_bits(&gb, 24)); + + skip_bits(&gb, 4); + put_bits(&pb, 4, codecpar->ch_layout.nb_channels); // set channel config + ret = put_bits_left(&pb); + put_bits(&pb, ret, get_bits(&gb, ret)); + flush_put_bits(&pb); + + memcpy(codecpar->extradata, buf, sizeof(buf)); + break; + } + case AV_CODEC_ID_FLAC: { + uint8_t buf[13]; + + init_put_bits(&pb, buf, sizeof(buf)); + ret = init_get_bits8(&gb, codecpar->extradata, codecpar->extradata_size); + if (ret < 0) + return ret; + + put_bits32(&pb, get_bits_long(&gb, 32)); // min/max blocksize + put_bits64(&pb, 48, get_bits64(&gb, 48)); // min/max framesize + put_bits(&pb, 20, get_bits(&gb, 20)); // samplerate + skip_bits(&gb, 3); + put_bits(&pb, 3, codecpar->ch_layout.nb_channels - 1); + ret = put_bits_left(&pb); + put_bits(&pb, ret, get_bits(&gb, ret)); + flush_put_bits(&pb); + + memcpy(codecpar->extradata, buf, sizeof(buf)); + break; + } + } + + return 0; +} + +static int scalable_channel_layout_config(void *s, AVIOContext *pb, + IAMFAudioElement *audio_element, + const IAMFCodecConfig *codec_config) +{ + int nb_layers, k = 0; + + nb_layers = avio_r8(pb) >> 5; // get_bits(&gb, 3); + // skip_bits(&gb, 5); //reserved + + if (nb_layers > 6) + return AVERROR_INVALIDDATA; + + for (int i = 0; i < nb_layers; i++) { + AVIAMFLayer *layer; + int loudspeaker_layout, output_gain_is_present_flag; + int substream_count, coupled_substream_count; + int ret, byte = avio_r8(pb); + + layer = av_iamf_audio_element_add_layer(audio_element->element); + if (!layer) + return AVERROR(ENOMEM); + + loudspeaker_layout = byte >> 4; // get_bits(&gb, 4); + output_gain_is_present_flag = (byte >> 3) & 1; //get_bits1(&gb); + if ((byte >> 2) & 1) + layer->flags |= AV_IAMF_LAYER_FLAG_RECON_GAIN; + substream_count = avio_r8(pb); + coupled_substream_count = avio_r8(pb); + + if (output_gain_is_present_flag) { + layer->output_gain_flags = avio_r8(pb) >> 2; // get_bits(&gb, 6); + layer->output_gain = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8); + } + + if (loudspeaker_layout < 10) + av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]); + else + layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC, + .nb_channels = substream_count + + coupled_substream_count }; + + for (int j = 0; j < substream_count; j++) { + IAMFSubStream *substream = &audio_element->substreams[k++]; + + substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO : + (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; + + ret = update_extradata(substream->codecpar); + if (ret < 0) + return ret; + } + + } + + return 0; +} + +static int ambisonics_config(void *s, AVIOContext *pb, + IAMFAudioElement *audio_element, + const IAMFCodecConfig *codec_config) +{ + AVIAMFLayer *layer; + unsigned ambisonics_mode; + int output_channel_count, substream_count, order; + int ret; + + ambisonics_mode = ffio_read_leb(pb); + if (ambisonics_mode > 1) + return 0; + + output_channel_count = avio_r8(pb); // C + substream_count = avio_r8(pb); // N + if (audio_element->nb_substreams != substream_count) + return AVERROR_INVALIDDATA; + + order = floor(sqrt(output_channel_count - 1)); + /* incomplete order - some harmonics are missing */ + if ((order + 1) * (order + 1) != output_channel_count) + return AVERROR_INVALIDDATA; + + layer = av_iamf_audio_element_add_layer(audio_element->element); + if (!layer) + return AVERROR(ENOMEM); + + layer->ambisonics_mode = ambisonics_mode; + if (ambisonics_mode == 0) { + for (int i = 0; i < substream_count; i++) { + IAMFSubStream *substream = &audio_element->substreams[i]; + + substream->codecpar->ch_layout = (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; + + ret = update_extradata(substream->codecpar); + if (ret < 0) + return ret; + } + + layer->ch_layout.order = AV_CHANNEL_ORDER_CUSTOM; + layer->ch_layout.nb_channels = output_channel_count; + layer->ch_layout.u.map = av_calloc(output_channel_count, sizeof(*layer->ch_layout.u.map)); + if (!layer->ch_layout.u.map) + return AVERROR(ENOMEM); + + for (int i = 0; i < output_channel_count; i++) + layer->ch_layout.u.map[i].id = avio_r8(pb) + AV_CHAN_AMBISONIC_BASE; + } else { + int coupled_substream_count = avio_r8(pb); // M + int nb_demixing_matrix = substream_count + coupled_substream_count; + int demixing_matrix_size = nb_demixing_matrix * output_channel_count; + + layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_AMBISONIC, .nb_channels = output_channel_count }; + layer->demixing_matrix = av_malloc_array(demixing_matrix_size, sizeof(*layer->demixing_matrix)); + if (!layer->demixing_matrix) + return AVERROR(ENOMEM); + + for (int i = 0; i < demixing_matrix_size; i++) + layer->demixing_matrix[i] = av_make_q(sign_extend(avio_rb16(pb), 16), 1 << 8); + + for (int i = 0; i < substream_count; i++) { + IAMFSubStream *substream = &audio_element->substreams[i]; + + substream->codecpar->ch_layout = coupled_substream_count-- > 0 ? (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO : + (AVChannelLayout)AV_CHANNEL_LAYOUT_MONO; + + + ret = update_extradata(substream->codecpar); + if (ret < 0) + return ret; + } + } + + return 0; +} + +static int param_parse(void *s, IAMFContext *c, AVIOContext *pb, + unsigned int type, + const IAMFAudioElement *audio_element, + AVIAMFParamDefinition **out_param_definition) +{ + IAMFParamDefinition *param_definition = NULL; + AVIAMFParamDefinition *param; + unsigned int parameter_id, parameter_rate, mode; + unsigned int duration = 0, constant_subblock_duration = 0, nb_subblocks = 0; + size_t param_size; + + parameter_id = ffio_read_leb(pb); + + for (int i = 0; i < c->nb_param_definitions; i++) + if (c->param_definitions[i]->param->parameter_id == parameter_id) { + param_definition = c->param_definitions[i]; + break; + } + + parameter_rate = ffio_read_leb(pb); + mode = avio_r8(pb) >> 7; + + if (mode == 0) { + duration = ffio_read_leb(pb); + if (!duration) + return AVERROR_INVALIDDATA; + constant_subblock_duration = ffio_read_leb(pb); + if (constant_subblock_duration == 0) + nb_subblocks = ffio_read_leb(pb); + else + nb_subblocks = duration / constant_subblock_duration; + } + + param = av_iamf_param_definition_alloc(type, nb_subblocks, ¶m_size); + if (!param) + return AVERROR(ENOMEM); + + for (int i = 0; i < nb_subblocks; i++) { + void *subblock = av_iamf_param_definition_get_subblock(param, i); + unsigned int subblock_duration = constant_subblock_duration; + + if (constant_subblock_duration == 0) + subblock_duration = ffio_read_leb(pb); + + switch (type) { + case AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN: { + AVIAMFMixGain *mix = subblock; + mix->subblock_duration = subblock_duration; + break; + } + case AV_IAMF_PARAMETER_DEFINITION_DEMIXING: { + AVIAMFDemixingInfo *demix = subblock; + demix->subblock_duration = subblock_duration; + // DefaultDemixingInfoParameterData + av_assert0(audio_element); + demix->dmixp_mode = avio_r8(pb) >> 5; + audio_element->element->default_w = avio_r8(pb) >> 4; + break; + } + case AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN: { + AVIAMFReconGain *recon = subblock; + recon->subblock_duration = subblock_duration; + break; + } + default: + av_free(param); + return AVERROR_INVALIDDATA; + } + } + + param->parameter_id = parameter_id; + param->parameter_rate = parameter_rate; + param->duration = duration; + param->constant_subblock_duration = constant_subblock_duration; + param->nb_subblocks = nb_subblocks; + + if (param_definition) { + if (param_definition->param_size != param_size || memcmp(param_definition->param, param, param_size)) { + av_log(s, AV_LOG_ERROR, "Incosistent parameters for parameter_id %u\n", parameter_id); + av_free(param); + return AVERROR_INVALIDDATA; + } + } else { + IAMFParamDefinition **tmp = av_realloc_array(c->param_definitions, c->nb_param_definitions + 1, + sizeof(*c->param_definitions)); + if (!tmp) { + av_free(param); + return AVERROR(ENOMEM); + } + c->param_definitions = tmp; + + param_definition = av_mallocz(sizeof(*param_definition)); + if (!param_definition) { + av_free(param); + return AVERROR(ENOMEM); + } + param_definition->param = param; + param_definition->mode = !mode; + param_definition->param_size = param_size; + param_definition->audio_element = audio_element; + + c->param_definitions[c->nb_param_definitions++] = param_definition; + } + + av_assert0(out_param_definition); + *out_param_definition = param; + + return 0; +} + +static IAMFCodecConfig *get_codec_config(IAMFContext *c, unsigned int codec_config_id) +{ + for (int i = 0; i < c->nb_codec_configs; i++) { + if (c->codec_configs[i]->codec_config_id == codec_config_id) + return c->codec_configs[i]; + } + + return NULL; +} + +static int audio_element_obu(void *s, IAMFContext *c, AVIOContext *pb, int len) +{ + const IAMFCodecConfig *codec_config; + AVIAMFAudioElement *element; + IAMFAudioElement **tmp, *audio_element = NULL; + FFIOContext b; + AVIOContext *pbc; + uint8_t *buf; + unsigned audio_element_id, codec_config_id, num_parameters; + int audio_element_type, ret; + + buf = av_malloc(len); + if (!buf) + return AVERROR(ENOMEM); + + ret = avio_read(pb, buf, len); + if (ret != len) { + if (ret >= 0) + ret = AVERROR_INVALIDDATA; + goto fail; + } + + ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL); + pbc = &b.pub; + + audio_element_id = ffio_read_leb(pbc); + + for (int i = 0; i < c->nb_audio_elements; i++) + if (c->audio_elements[i]->audio_element_id == audio_element_id) { + av_log(s, AV_LOG_ERROR, "Duplicate audio_element_id %d\n", audio_element_id); + ret = AVERROR_INVALIDDATA; + goto fail; + } + + audio_element_type = avio_r8(pbc) >> 5; + codec_config_id = ffio_read_leb(pbc); + + codec_config = get_codec_config(c, codec_config_id); + if (!codec_config) { + av_log(s, AV_LOG_ERROR, "Non existant codec config id %d referenced in an audio element\n", codec_config_id); + ret = AVERROR_INVALIDDATA; + goto fail; + } + + if (codec_config->codec_id == AV_CODEC_ID_NONE) { + av_log(s, AV_LOG_DEBUG, "Unknown codec id referenced in an audio element. Ignoring\n"); + ret = 0; + goto fail; + } + + tmp = av_realloc_array(c->audio_elements, c->nb_audio_elements + 1, sizeof(*c->audio_elements)); + if (!tmp) { + ret = AVERROR(ENOMEM); + goto fail; + } + c->audio_elements = tmp; + + audio_element = av_mallocz(sizeof(*audio_element)); + if (!audio_element) { + ret = AVERROR(ENOMEM); + goto fail; + } + + audio_element->nb_substreams = ffio_read_leb(pbc); + audio_element->codec_config_id = codec_config_id; + audio_element->audio_element_id = audio_element_id; + audio_element->substreams = av_calloc(audio_element->nb_substreams, sizeof(*audio_element->substreams)); + if (!audio_element->substreams) { + ret = AVERROR(ENOMEM); + goto fail; + } + + element = audio_element->element = av_iamf_audio_element_alloc(); + if (!element) { + ret = AVERROR(ENOMEM); + goto fail; + } + + element->audio_element_type = audio_element_type; + + for (int i = 0; i < audio_element->nb_substreams; i++) { + IAMFSubStream *substream = &audio_element->substreams[i]; + + substream->codecpar = avcodec_parameters_alloc(); + if (!substream->codecpar) { + ret = AVERROR(ENOMEM); + goto fail; + } + + substream->audio_substream_id = ffio_read_leb(pbc); + + substream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO; + substream->codecpar->codec_id = codec_config->codec_id; + substream->codecpar->frame_size = codec_config->nb_samples; + substream->codecpar->sample_rate = codec_config->sample_rate; + substream->codecpar->seek_preroll = codec_config->seek_preroll; + + switch(substream->codecpar->codec_id) { + case AV_CODEC_ID_AAC: + case AV_CODEC_ID_FLAC: + case AV_CODEC_ID_OPUS: + substream->codecpar->extradata = av_malloc(codec_config->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!substream->codecpar->extradata) { + ret = AVERROR(ENOMEM); + goto fail; + } + memcpy(substream->codecpar->extradata, codec_config->extradata, codec_config->extradata_size); + memset(substream->codecpar->extradata + codec_config->extradata_size, 0, AV_INPUT_BUFFER_PADDING_SIZE); + substream->codecpar->extradata_size = codec_config->extradata_size; + break; + } + } + + num_parameters = ffio_read_leb(pbc); + if (num_parameters && audio_element_type != 0) { + av_log(s, AV_LOG_ERROR, "Audio Element parameter count %u is invalid" + " for Scene representations\n", num_parameters); + ret = AVERROR_INVALIDDATA; + goto fail; + } + + for (int i = 0; i < num_parameters; i++) { + unsigned type; + + type = ffio_read_leb(pbc); + if (type == AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN) + ret = AVERROR_INVALIDDATA; + else if (type == AV_IAMF_PARAMETER_DEFINITION_DEMIXING) + ret = param_parse(s, c, pbc, type, audio_element, &element->demixing_info); + else if (type == AV_IAMF_PARAMETER_DEFINITION_RECON_GAIN) + ret = param_parse(s, c, pbc, type, audio_element, &element->recon_gain_info); + else { + unsigned param_definition_size = ffio_read_leb(pbc); + avio_skip(pbc, param_definition_size); + } + if (ret < 0) + goto fail; + } + + if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_CHANNEL) { + ret = scalable_channel_layout_config(s, pbc, audio_element, codec_config); + if (ret < 0) + goto fail; + } else if (audio_element_type == AV_IAMF_AUDIO_ELEMENT_TYPE_SCENE) { + ret = ambisonics_config(s, pbc, audio_element, codec_config); + if (ret < 0) + goto fail; + } else { + unsigned audio_element_config_size = ffio_read_leb(pbc); + avio_skip(pbc, audio_element_config_size); + } + + c->audio_elements[c->nb_audio_elements++] = audio_element; + + len -= avio_tell(pbc); + if (len) + av_log(s, AV_LOG_WARNING, "Underread in audio_element_obu. %d bytes left at the end\n", len); + + ret = 0; +fail: + av_free(buf); + if (ret < 0) + ff_iamf_free_audio_element(&audio_element); + return ret; +} + +static int label_string(AVIOContext *pb, char **label) +{ + uint8_t buf[128]; + + avio_get_str(pb, sizeof(buf), buf, sizeof(buf)); + + if (pb->error) + return pb->error; + if (pb->eof_reached) + return AVERROR_INVALIDDATA; + *label = av_strdup(buf); + if (!*label) + return AVERROR(ENOMEM); + + return 0; +} + +static int mix_presentation_obu(void *s, IAMFContext *c, AVIOContext *pb, int len) +{ + AVIAMFMixPresentation *mix; + IAMFMixPresentation **tmp, *mix_presentation = NULL; + FFIOContext b; + AVIOContext *pbc; + uint8_t *buf; + unsigned mix_presentation_id; + int ret; + + buf = av_malloc(len); + if (!buf) + return AVERROR(ENOMEM); + + ret = avio_read(pb, buf, len); + if (ret != len) { + if (ret >= 0) + ret = AVERROR_INVALIDDATA; + goto fail; + } + + ffio_init_context(&b, buf, len, 0, NULL, NULL, NULL, NULL); + pbc = &b.pub; + + mix_presentation_id = ffio_read_leb(pbc); + + for (int i = 0; i < c->nb_mix_presentations; i++) + if (c->mix_presentations[i]->mix_presentation_id == mix_presentation_id) { + av_log(s, AV_LOG_ERROR, "Duplicate mix_presentation_id %d\n", mix_presentation_id); + ret = AVERROR_INVALIDDATA; + goto fail; + } + + tmp = av_realloc_array(c->mix_presentations, c->nb_mix_presentations + 1, sizeof(*c->mix_presentations)); + if (!tmp) { + ret = AVERROR(ENOMEM); + goto fail; + } + c->mix_presentations = tmp; + + mix_presentation = av_mallocz(sizeof(*mix_presentation)); + if (!mix_presentation) { + ret = AVERROR(ENOMEM); + goto fail; + } + + mix_presentation->mix_presentation_id = mix_presentation_id; + mix = mix_presentation->mix = av_iamf_mix_presentation_alloc(); + if (!mix) { + ret = AVERROR(ENOMEM); + goto fail; + } + + mix_presentation->count_label = ffio_read_leb(pbc); + mix_presentation->language_label = av_calloc(mix_presentation->count_label, + sizeof(*mix_presentation->language_label)); + if (!mix_presentation->language_label) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < mix_presentation->count_label; i++) { + ret = label_string(pbc, &mix_presentation->language_label[i]); + if (ret < 0) + goto fail; + } + + for (int i = 0; i < mix_presentation->count_label; i++) { + char *annotation = NULL; + ret = label_string(pbc, &annotation); + if (ret < 0) + goto fail; + ret = av_dict_set(&mix->annotations, mix_presentation->language_label[i], annotation, + AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE); + if (ret < 0) + goto fail; + } + + mix->nb_submixes = ffio_read_leb(pbc); + mix->submixes = av_calloc(mix->nb_submixes, sizeof(*mix->submixes)); + if (!mix->submixes) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (int i = 0; i < mix->nb_submixes; i++) { + AVIAMFSubmix *sub_mix; + + sub_mix = mix->submixes[i] = av_mallocz(sizeof(*sub_mix)); + if (!sub_mix) { + ret = AVERROR(ENOMEM); + goto fail; + } + + sub_mix->nb_elements = ffio_read_leb(pbc); + sub_mix->elements = av_calloc(sub_mix->nb_elements, sizeof(*sub_mix->elements)); + if (!sub_mix->elements) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (int j = 0; j < sub_mix->nb_elements; j++) { + AVIAMFSubmixElement *submix_element; + IAMFAudioElement *audio_element = NULL; + unsigned int rendering_config_extension_size; + + submix_element = sub_mix->elements[j] = av_mallocz(sizeof(*submix_element)); + if (!submix_element) { + ret = AVERROR(ENOMEM); + goto fail; + } + + submix_element->audio_element_id = ffio_read_leb(pbc); + + for (int k = 0; k < c->nb_audio_elements; k++) + if (c->audio_elements[k]->audio_element_id == submix_element->audio_element_id) { + audio_element = c->audio_elements[k]; + break; + } + + if (!audio_element) { + av_log(s, AV_LOG_ERROR, "Invalid Audio Element with id %u referenced by Mix Parameters %u\n", + submix_element->audio_element_id, mix_presentation_id); + ret = AVERROR_INVALIDDATA; + goto fail; + } + + for (int k = 0; k < mix_presentation->count_label; k++) { + char *annotation = NULL; + ret = label_string(pbc, &annotation); + if (ret < 0) + goto fail; + ret = av_dict_set(&submix_element->annotations, mix_presentation->language_label[k], annotation, + AV_DICT_DONT_STRDUP_VAL | AV_DICT_DONT_OVERWRITE); + if (ret < 0) + goto fail; + } + + submix_element->headphones_rendering_mode = avio_r8(pbc) >> 6; + + rendering_config_extension_size = ffio_read_leb(pbc); + avio_skip(pbc, rendering_config_extension_size); + + ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, + NULL, + &submix_element->element_mix_config); + if (ret < 0) + goto fail; + submix_element->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8); + } + + ret = param_parse(s, c, pbc, AV_IAMF_PARAMETER_DEFINITION_MIX_GAIN, NULL, &sub_mix->output_mix_config); + if (ret < 0) + goto fail; + sub_mix->default_mix_gain = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8); + + sub_mix->nb_layouts = ffio_read_leb(pbc); + sub_mix->layouts = av_calloc(sub_mix->nb_layouts, sizeof(*sub_mix->layouts)); + if (!sub_mix->layouts) { + ret = AVERROR(ENOMEM); + goto fail; + } + + for (int j = 0; j < sub_mix->nb_layouts; j++) { + AVIAMFSubmixLayout *submix_layout; + int info_type; + int byte = avio_r8(pbc); + + submix_layout = sub_mix->layouts[j] = av_mallocz(sizeof(*submix_layout)); + if (!submix_layout) { + ret = AVERROR(ENOMEM); + goto fail; + } + + submix_layout->layout_type = byte >> 6; + if (submix_layout->layout_type < AV_IAMF_SUBMIX_LAYOUT_TYPE_LOUDSPEAKERS && + submix_layout->layout_type > AV_IAMF_SUBMIX_LAYOUT_TYPE_BINAURAL) { + av_log(s, AV_LOG_ERROR, "Invalid Layout type %u in a submix from Mix Presentation %u\n", + submix_layout->layout_type, mix_presentation_id); + ret = AVERROR_INVALIDDATA; + goto fail; + } + if (submix_layout->layout_type == 2) { + int sound_system; + sound_system = (byte >> 2) & 0xF; + av_channel_layout_copy(&submix_layout->sound_system, &ff_iamf_sound_system_map[sound_system].layout); + } + + info_type = avio_r8(pbc); + submix_layout->integrated_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8); + submix_layout->digital_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8); + + if (info_type & 1) + submix_layout->true_peak = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8); + if (info_type & 2) { + unsigned int num_anchored_loudness = avio_r8(pbc); + + for (int k = 0; k < num_anchored_loudness; k++) { + unsigned int anchor_element = avio_r8(pbc); + AVRational anchored_loudness = av_make_q(sign_extend(avio_rb16(pbc), 16), 1 << 8); + if (anchor_element == IAMF_ANCHOR_ELEMENT_DIALOGUE) + submix_layout->dialogue_anchored_loudness = anchored_loudness; + else if (anchor_element <= IAMF_ANCHOR_ELEMENT_ALBUM) + submix_layout->album_anchored_loudness = anchored_loudness; + else + av_log(s, AV_LOG_DEBUG, "Unknown anchor_element. Ignoring\n"); + } + } + + if (info_type & 0xFC) { + unsigned int info_type_size = ffio_read_leb(pbc); + avio_skip(pbc, info_type_size); + } + } + } + + c->mix_presentations[c->nb_mix_presentations++] = mix_presentation; + + len -= avio_tell(pbc); + if (len) + av_log(s, AV_LOG_WARNING, "Underread in mix_presentation_obu. %d bytes left at the end\n", len); + + ret = 0; +fail: + av_free(buf); + if (ret < 0) + ff_iamf_free_mix_presentation(&mix_presentation); + return ret; +} + +int ff_iamf_parse_obu_header(const uint8_t *buf, int buf_size, + unsigned *obu_size, int *start_pos, enum IAMF_OBU_Type *type, + unsigned *skip_samples, unsigned *discard_padding) +{ + GetBitContext gb; + int ret, extension_flag, trimming, start; + unsigned skip = 0, discard = 0; + unsigned size; + + ret = init_get_bits8(&gb, buf, FFMIN(buf_size, MAX_IAMF_OBU_HEADER_SIZE)); + if (ret < 0) + return ret; + + *type = get_bits(&gb, 5); + /*redundant =*/ get_bits1(&gb); + trimming = get_bits1(&gb); + extension_flag = get_bits1(&gb); + + *obu_size = get_leb(&gb); + if (*obu_size > INT_MAX) + return AVERROR_INVALIDDATA; + + start = get_bits_count(&gb) / 8; + + if (trimming) { + discard = get_leb(&gb); // num_samples_to_trim_at_end + skip = get_leb(&gb); // num_samples_to_trim_at_start + } + + if (skip_samples) + *skip_samples = skip; + if (discard_padding) + *discard_padding = discard; + + if (extension_flag) { + unsigned int extension_bytes; + extension_bytes = get_leb(&gb); + if (extension_bytes > INT_MAX / 8) + return AVERROR_INVALIDDATA; + skip_bits_long(&gb, extension_bytes * 8); + } + + if (get_bits_left(&gb) < 0) + return AVERROR_INVALIDDATA; + + size = *obu_size + start; + if (size > INT_MAX) + return AVERROR_INVALIDDATA; + + *obu_size -= get_bits_count(&gb) / 8 - start; + *start_pos = size - *obu_size; + + return size; +} + +int ff_iamfdec_read_descriptors(IAMFContext *c, AVIOContext *pb, + int max_size, void *log_ctx) +{ + uint8_t header[MAX_IAMF_OBU_HEADER_SIZE + AV_INPUT_BUFFER_PADDING_SIZE]; + int ret; + + while (1) { + unsigned obu_size; + enum IAMF_OBU_Type type; + int start_pos, len, size; + + if ((ret = ffio_ensure_seekback(pb, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size))) < 0) + return ret; + size = avio_read(pb, header, FFMIN(MAX_IAMF_OBU_HEADER_SIZE, max_size)); + if (size < 0) + return size; + + len = ff_iamf_parse_obu_header(header, size, &obu_size, &start_pos, &type, NULL, NULL); + if (len < 0 || obu_size > max_size) { + av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu header\n"); + avio_seek(pb, -size, SEEK_CUR); + return len; + } + + if (type >= IAMF_OBU_IA_PARAMETER_BLOCK && type < IAMF_OBU_IA_SEQUENCE_HEADER) { + avio_seek(pb, -size, SEEK_CUR); + break; + } + + avio_seek(pb, -(size - start_pos), SEEK_CUR); + switch (type) { + case IAMF_OBU_IA_CODEC_CONFIG: + ret = codec_config_obu(log_ctx, c, pb, obu_size); + break; + case IAMF_OBU_IA_AUDIO_ELEMENT: + ret = audio_element_obu(log_ctx, c, pb, obu_size); + break; + case IAMF_OBU_IA_MIX_PRESENTATION: + ret = mix_presentation_obu(log_ctx, c, pb, obu_size); + break; + case IAMF_OBU_IA_TEMPORAL_DELIMITER: + break; + default: { + int64_t offset = avio_skip(pb, obu_size); + if (offset < 0) + ret = offset; + break; + } + } + if (ret < 0) { + av_log(log_ctx, AV_LOG_ERROR, "Failed to read obu type %d\n", type); + return ret; + } + max_size -= obu_size + start_pos; + if (max_size < 0) + return AVERROR_INVALIDDATA; + if (!max_size) + break; + } + + return 0; +} |