aboutsummaryrefslogtreecommitdiffstats
path: root/libavformat/iamf_parse.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2025-06-16 21:33:26 -0300
committerJames Almer <jamrial@gmail.com>2025-06-24 14:41:43 -0300
commitcd2461e627cc13cc43d68a87e6ebf90c4a7d2850 (patch)
treeaa314269d44439fab39e628792b441be300346d3 /libavformat/iamf_parse.c
parente5f23a3c5efe6f82c3f71e3d139af9162ffa9869 (diff)
downloadffmpeg-cd2461e627cc13cc43d68a87e6ebf90c4a7d2850.tar.gz
avformat/iamf: fix setting channel layout for Scalable layers
The way streams are coded in an IAMF struct follows a scalable model where the channel layouts for each layer may not match the channel order our API can represent in a Native order layout. For example, an audio element may have six coded streams in the form of two stereo streams, followed by two mono streams, and then by another two stereo streams, for a total of 10 channels, and define for them four scalable layers with loudspeaker_layout values "Stereo", "5.1ch", "5.1.2ch", and "5.1.4ch". The first layer references the first stream, and each following layer will reference all previous streams plus extra ones. In this case, the "5.1ch" layer will reference four streams (the first two stereo and the two mono) to encompass six channels, which does not match out native layout 5.1(side) given that FC and LFE come after FL+FR but before SL+SR, and here, they are at the end. For this reason, we need to build Custom order layouts that properly represent what we're exporting. ---- Before: Stream group #0:0[0x12c]: IAMF Audio Element: Layer 0: stereo Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Layer 1: 5.1(side) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Layer 2: 5.1.2 Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Layer 3: 5.1.4 Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:5[0x5]: Audio: opus, 48000 Hz, stereo, fltp (dependent) ---- AFter: Stream group #0:0[0x12c]: IAMF Audio Element: Layer 0: stereo Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Layer 1: 6 channels (FL+FR+SL+SR+FC+LFE) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Layer 2: 8 channels (FL+FR+SL+SR+FC+LFE+TFL+TFR) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Layer 3: 10 channels (FL+FR+SL+SR+FC+LFE+TFL+TFR+TBL+TBR) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:5[0x5]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavformat/iamf_parse.c')
-rw-r--r--libavformat/iamf_parse.c77
1 files changed, 70 insertions, 7 deletions
diff --git a/libavformat/iamf_parse.c b/libavformat/iamf_parse.c
index 756671f7bf..73e98200e3 100644
--- a/libavformat/iamf_parse.c
+++ b/libavformat/iamf_parse.c
@@ -364,7 +364,8 @@ static int scalable_channel_layout_config(void *s, AVIOContext *pb,
return AVERROR(ENOMEM);
audio_element->nb_layers = nb_layers;
- for (int i = 0; i < nb_layers; i++) {
+ for (int i = 0, n = 0; i < nb_layers; i++) {
+ AVChannelLayout ch_layout = { 0 };
AVIAMFLayer *layer;
int loudspeaker_layout, output_gain_is_present_flag;
int substream_count, coupled_substream_count;
@@ -394,12 +395,16 @@ static int scalable_channel_layout_config(void *s, AVIOContext *pb,
if (!i && loudspeaker_layout == 15)
expanded_loudspeaker_layout = avio_r8(pb);
- if (expanded_loudspeaker_layout > 0 && expanded_loudspeaker_layout < 13)
- av_channel_layout_copy(&layer->ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_loudspeaker_layout]);
- else if (loudspeaker_layout < 10)
- av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
- else
- layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
+ if (expanded_loudspeaker_layout > 0 && expanded_loudspeaker_layout < 13) {
+ av_channel_layout_copy(&ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_loudspeaker_layout]);
+ if (i)
+ ch_layout.u.mask &= ~av_channel_layout_subset(&audio_element->element->layers[i-1]->ch_layout, UINT64_MAX);
+ } else if (loudspeaker_layout < 10) {
+ av_channel_layout_copy(&ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
+ if (i)
+ ch_layout.u.mask &= ~av_channel_layout_subset(&audio_element->element->layers[i-1]->ch_layout, UINT64_MAX);
+ } else
+ ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
.nb_channels = substream_count +
coupled_substream_count };
@@ -414,6 +419,64 @@ static int scalable_channel_layout_config(void *s, AVIOContext *pb,
return ret;
}
+ if (ch_layout.order == AV_CHANNEL_ORDER_NATIVE) {
+ ret = av_channel_layout_custom_init(&layer->ch_layout, ch_layout.nb_channels);
+ if (ret < 0)
+ return ret;
+
+ for (int j = 0; j < n; j++)
+ layer->ch_layout.u.map[j].id = av_channel_layout_channel_from_index(&audio_element->element->layers[i-1]->ch_layout, j);
+
+ coupled_substream_count = audio_element->layers[i].coupled_substream_count;
+ while (coupled_substream_count--) {
+ if (ch_layout.u.mask & AV_CH_LAYOUT_STEREO) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_LEFT;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_RIGHT;
+ ch_layout.u.mask &= ~AV_CH_LAYOUT_STEREO;
+ } else if (ch_layout.u.mask & (AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_LEFT_OF_CENTER;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_RIGHT_OF_CENTER;
+ ch_layout.u.mask &= ~(AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER);
+ } else if (ch_layout.u.mask & (AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_SIDE_LEFT;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_SIDE_RIGHT;
+ ch_layout.u.mask &= ~(AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT);
+ } else if (ch_layout.u.mask & (AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_BACK_LEFT;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_BACK_RIGHT;
+ ch_layout.u.mask &= ~(AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT);
+ } else if (ch_layout.u.mask & (AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_FRONT_LEFT;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_FRONT_RIGHT;
+ ch_layout.u.mask &= ~(AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT);
+ } else if (ch_layout.u.mask & (AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT)) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_SIDE_LEFT;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_SIDE_RIGHT;
+ ch_layout.u.mask &= ~(AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT);
+ } else if (ch_layout.u.mask & (AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT)) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_BACK_LEFT;
+ layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_BACK_RIGHT;
+ ch_layout.u.mask &= ~(AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT);
+ }
+ }
+
+ substream_count -= audio_element->layers[i].coupled_substream_count;
+ while (substream_count--) {
+ if (ch_layout.u.mask & AV_CH_FRONT_CENTER) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_CENTER;
+ ch_layout.u.mask &= ~AV_CH_FRONT_CENTER;
+ }
+ if (ch_layout.u.mask & AV_CH_LOW_FREQUENCY) {
+ layer->ch_layout.u.map[n++].id = AV_CHAN_LOW_FREQUENCY;
+ ch_layout.u.mask &= ~AV_CH_LOW_FREQUENCY;
+ }
+ }
+
+ ret = av_channel_layout_retype(&layer->ch_layout, AV_CHANNEL_ORDER_NATIVE, 0);
+ if (ret < 0 && ret != AVERROR(ENOSYS))
+ return ret;
+ } else // AV_CHANNEL_ORDER_UNSPEC
+ av_channel_layout_copy(&layer->ch_layout, &ch_layout);
}
return 0;