avformat/iamf: fix setting channel layout for Scalable layers

The way streams are coded in an IAMF struct follows a scalable model where the channel layouts for each layer may not match the channel order our API can represent in a Native order layout. For example, an audio element may have six coded streams in the form of two stereo streams, followed by two mono streams, and then by another two stereo streams, for a total of 10 channels, and define for them four scalable layers with loudspeaker_layout values "Stereo", "5.1ch", "5.1.2ch", and "5.1.4ch". The first layer references the first stream, and each following layer will reference all previous streams plus extra ones. In this case, the "5.1ch" layer will reference four streams (the first two stereo and the two mono) to encompass six channels, which does not match out native layout 5.1(side) given that FC and LFE come after FL+FR but before SL+SR, and here, they are at the end. For this reason, we need to build Custom order layouts that properly represent what we're exporting. ---- Before: Stream group #0:0[0x12c]: IAMF Audio Element: Layer 0: stereo Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Layer 1: 5.1(side) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Layer 2: 5.1.2 Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Layer 3: 5.1.4 Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:5[0x5]: Audio: opus, 48000 Hz, stereo, fltp (dependent) ---- AFter: Stream group #0:0[0x12c]: IAMF Audio Element: Layer 0: stereo Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Layer 1: 6 channels (FL+FR+SL+SR+FC+LFE) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Layer 2: 8 channels (FL+FR+SL+SR+FC+LFE+TFL+TFR) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Layer 3: 10 channels (FL+FR+SL+SR+FC+LFE+TFL+TFR+TBL+TBR) Stream #0:0[0x0]: Audio: opus, 48000 Hz, stereo, fltp (default) Stream #0:1[0x1]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:2[0x2]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:3[0x3]: Audio: opus, 48000 Hz, mono, fltp (dependent) Stream #0:4[0x4]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Stream #0:5[0x5]: Audio: opus, 48000 Hz, stereo, fltp (dependent) Signed-off-by: James Almer <jamrial@gmail.com>
author: James Almer <jamrial@gmail.com> 2025-06-16 21:33:26 -0300
committer: James Almer <jamrial@gmail.com> 2025-06-24 14:41:43 -0300
commit: cd2461e627cc13cc43d68a87e6ebf90c4a7d2850 (patch)
tree: aa314269d44439fab39e628792b441be300346d3 /libavformat/iamf_parse.c
parent: e5f23a3c5efe6f82c3f71e3d139af9162ffa9869 (diff)
download: ffmpeg-cd2461e627cc13cc43d68a87e6ebf90c4a7d2850.tar.gz
1 files changed, 70 insertions, 7 deletions
diff --git a/libavformat/iamf_parse.c b/libavformat/iamf_parse.c
index 756671f7bf..73e98200e3 100644
--- a/libavformat/iamf_parse.c
+++ b/libavformat/iamf_parse.c
@@ -364,7 +364,8 @@ static int scalable_channel_layout_config(void *s, AVIOContext *pb,
         return AVERROR(ENOMEM);
 
     audio_element->nb_layers = nb_layers;
-    for (int i = 0; i < nb_layers; i++) {
+    for (int i = 0, n = 0; i < nb_layers; i++) {
+        AVChannelLayout ch_layout = { 0 };
         AVIAMFLayer *layer;
         int loudspeaker_layout, output_gain_is_present_flag;
         int substream_count, coupled_substream_count;
@@ -394,12 +395,16 @@ static int scalable_channel_layout_config(void *s, AVIOContext *pb,
 
         if (!i && loudspeaker_layout == 15)
             expanded_loudspeaker_layout = avio_r8(pb);
-        if (expanded_loudspeaker_layout > 0 && expanded_loudspeaker_layout < 13)
-            av_channel_layout_copy(&layer->ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_loudspeaker_layout]);
-        else if (loudspeaker_layout < 10)
-            av_channel_layout_copy(&layer->ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
-        else
-            layer->ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
+        if (expanded_loudspeaker_layout > 0 && expanded_loudspeaker_layout < 13) {
+            av_channel_layout_copy(&ch_layout, &ff_iamf_expanded_scalable_ch_layouts[expanded_loudspeaker_layout]);
+            if (i)
+                ch_layout.u.mask &= ~av_channel_layout_subset(&audio_element->element->layers[i-1]->ch_layout, UINT64_MAX);
+        } else if (loudspeaker_layout < 10) {
+            av_channel_layout_copy(&ch_layout, &ff_iamf_scalable_ch_layouts[loudspeaker_layout]);
+            if (i)
+                ch_layout.u.mask &= ~av_channel_layout_subset(&audio_element->element->layers[i-1]->ch_layout, UINT64_MAX);
+        } else
+            ch_layout = (AVChannelLayout){ .order = AV_CHANNEL_ORDER_UNSPEC,
                                                           .nb_channels = substream_count +
                                                                          coupled_substream_count };
 
@@ -414,6 +419,64 @@ static int scalable_channel_layout_config(void *s, AVIOContext *pb,
                 return ret;
         }
 
+        if (ch_layout.order == AV_CHANNEL_ORDER_NATIVE) {
+            ret = av_channel_layout_custom_init(&layer->ch_layout, ch_layout.nb_channels);
+            if (ret < 0)
+                return ret;
+
+            for (int j = 0; j < n; j++)
+                layer->ch_layout.u.map[j].id = av_channel_layout_channel_from_index(&audio_element->element->layers[i-1]->ch_layout, j);
+
+            coupled_substream_count = audio_element->layers[i].coupled_substream_count;
+            while (coupled_substream_count--) {
+                if (ch_layout.u.mask & AV_CH_LAYOUT_STEREO) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_LEFT;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_RIGHT;
+                    ch_layout.u.mask &= ~AV_CH_LAYOUT_STEREO;
+                } else if (ch_layout.u.mask & (AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER)) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_LEFT_OF_CENTER;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_RIGHT_OF_CENTER;
+                    ch_layout.u.mask &= ~(AV_CH_FRONT_LEFT_OF_CENTER|AV_CH_FRONT_RIGHT_OF_CENTER);
+                } else if (ch_layout.u.mask & (AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT)) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_SIDE_LEFT;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_SIDE_RIGHT;
+                    ch_layout.u.mask &= ~(AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT);
+                } else if (ch_layout.u.mask & (AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT)) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_BACK_LEFT;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_BACK_RIGHT;
+                    ch_layout.u.mask &= ~(AV_CH_BACK_LEFT|AV_CH_BACK_RIGHT);
+                } else if (ch_layout.u.mask & (AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT)) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_FRONT_LEFT;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_FRONT_RIGHT;
+                    ch_layout.u.mask &= ~(AV_CH_TOP_FRONT_LEFT|AV_CH_TOP_FRONT_RIGHT);
+                } else if (ch_layout.u.mask & (AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT)) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_SIDE_LEFT;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_SIDE_RIGHT;
+                    ch_layout.u.mask &= ~(AV_CH_TOP_SIDE_LEFT|AV_CH_TOP_SIDE_RIGHT);
+                } else if (ch_layout.u.mask & (AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT)) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_BACK_LEFT;
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_TOP_BACK_RIGHT;
+                    ch_layout.u.mask &= ~(AV_CH_TOP_BACK_LEFT|AV_CH_TOP_BACK_RIGHT);
+                }
+            }
+
+            substream_count -= audio_element->layers[i].coupled_substream_count;
+            while (substream_count--) {
+                if (ch_layout.u.mask & AV_CH_FRONT_CENTER) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_FRONT_CENTER;
+                    ch_layout.u.mask &= ~AV_CH_FRONT_CENTER;
+                }
+                if (ch_layout.u.mask & AV_CH_LOW_FREQUENCY) {
+                    layer->ch_layout.u.map[n++].id = AV_CHAN_LOW_FREQUENCY;
+                    ch_layout.u.mask &= ~AV_CH_LOW_FREQUENCY;
+                }
+            }
+
+            ret = av_channel_layout_retype(&layer->ch_layout, AV_CHANNEL_ORDER_NATIVE, 0);
+            if (ret < 0 && ret != AVERROR(ENOSYS))
+                return ret;
+        } else // AV_CHANNEL_ORDER_UNSPEC
+            av_channel_layout_copy(&layer->ch_layout, &ch_layout);
     }
 
     return 0;
author	James Almer <jamrial@gmail.com>	2025-06-16 21:33:26 -0300
committer	James Almer <jamrial@gmail.com>	2025-06-24 14:41:43 -0300
commit	cd2461e627cc13cc43d68a87e6ebf90c4a7d2850 (patch)
tree	aa314269d44439fab39e628792b441be300346d3 /libavformat/iamf_parse.c
parent	e5f23a3c5efe6f82c3f71e3d139af9162ffa9869 (diff)
download	ffmpeg-cd2461e627cc13cc43d68a87e6ebf90c4a7d2850.tar.gz