fftools/ffmpeg: add support for multiview video

This extends the syntax for specifying input streams in -map and complex filtergraph labels, to allow selecting a view by view ID, index, or position. The corresponding decoder is then set up to decode the appropriate view and send frames for that view to the correct filtergraph input(s).
author: Anton Khirnov <anton@khirnov.net> 2024-08-10 18:36:49 +0200
committer: Anton Khirnov <anton@khirnov.net> 2024-09-23 17:15:02 +0200
commit: da420ac6e1b7d710c9e5ec97531a94ae1596de3c (patch)
tree: 0ae241b07ac970106ae5a2101902cac71d9a84c8
parent: 68c198fae2851bc68bb3e600424753d31a9be2ce (diff)
download: ffmpeg-da420ac6e1b7d710c9e5ec97531a94ae1596de3c.tar.gz
9 files changed, 610 insertions, 55 deletions
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index 842e92ad1a..34007f7ea2 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -1799,7 +1799,7 @@ Set the size of the canvas used to render subtitles.
 @section Advanced options
 
 @table @option
-@item -map [-]@var{input_file_id}[:@var{stream_specifier}][?] | @var{[linklabel]} (@emph{output})
+@item -map [-]@var{input_file_id}[:@var{stream_specifier}][:@var{view_specifier}][?] | @var{[linklabel]} (@emph{output})
 
 Create one or more streams in the output file. This option has two forms for
 specifying the data source(s): the first selects one or more streams from some
@@ -1814,6 +1814,26 @@ only those streams that match the specifier are used (see the
 A @code{-} character before the stream identifier creates a "negative" mapping.
 It disables matching streams from already created mappings.
 
+An optional @var{view_specifier} may be given after the stream specifier, which
+for multiview video specifies the view to be used. The view specifier may have
+one of the following formats:
+@table @option
+@item view:@var{view_id}
+select a view by its ID; @var{view_id} may be set to 'all' to use all the views
+interleaved into one stream;
+
+@item vidx:@var{view_idx}
+select a view by its index; i.e. 0 is the base view, 1 is the first non-base
+view, etc.
+
+@item vpos:@var{position}
+select a view by its display position; @var{position} may be @code{left} or
+@code{right}
+@end table
+The default for transcoding is to only use the base view, i.e. the equivalent of
+@code{vidx:0}. For streamcopy, view specifiers are not supported and all views
+are always copied.
+
 A trailing @code{?} after the stream index will allow the map to be
 optional: if the map matches no streams the map will be ignored instead
 of failing. Note the map will still fail if an invalid input file index
@@ -2206,11 +2226,15 @@ distinguished by the format of the corresponding link label:
 @item
 To connect an input stream, use @code{[file_index:stream_specifier]} (i.e. the
 same syntax as @option{-map}). If @var{stream_specifier} matches multiple
-streams, the first one will be used.
+streams, the first one will be used. For multiview video, the stream specifier
+may be followed by the view specifier, see documentation for the @option{-map}
+option for its syntax.
 
 @item
 To connect a loopback decoder use [dec:@var{dec_idx}], where @var{dec_idx} is
-the index of the loopback decoder to be connected to given input.
+the index of the loopback decoder to be connected to given input. For multiview
+video, the decoder index may be followed by the view specifier, see
+documentation for the @option{-map} option for its syntax.
 
 @item
 To connect an output from another complex filtergraph, use its link label. E.g
diff --git a/fftools/cmdutils.c b/fftools/cmdutils.c
index 8d06358cac..9beed94ead 100644
--- a/fftools/cmdutils.c
+++ b/fftools/cmdutils.c
@@ -988,7 +988,7 @@ FILE *get_preset_file(char *filename, size_t filename_size,
     return f;
 }
 
-static int cmdutils_isalnum(char c)
+int cmdutils_isalnum(char c)
 {
     return (c >= '0' && c <= '9') ||
            (c >= 'A' && c <= 'Z') ||
diff --git a/fftools/cmdutils.h b/fftools/cmdutils.h
index e74aa2a98d..316b6a8c64 100644
--- a/fftools/cmdutils.h
+++ b/fftools/cmdutils.h
@@ -543,4 +543,6 @@ void remove_avoptions(AVDictionary **a, AVDictionary *b);
 /* Check if any keys exist in dictionary m */
 int check_avoptions(AVDictionary *m);
 
+int cmdutils_isalnum(char c);
+
 #endif /* FFTOOLS_CMDUTILS_H */
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index f4a10b2a66..733d551fa4 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -112,12 +112,32 @@ typedef struct HWDevice {
     AVBufferRef *device_ref;
 } HWDevice;
 
+enum ViewSpecifierType {
+    // no specifier given
+    VIEW_SPECIFIER_TYPE_NONE = 0,
+    // val is view index
+    VIEW_SPECIFIER_TYPE_IDX,
+    // val is view ID
+    VIEW_SPECIFIER_TYPE_ID,
+    // specify view by its position, val is AV_STEREO3D_VIEW_LEFT/RIGHT
+    VIEW_SPECIFIER_TYPE_POS,
+    // use all views, val is ignored
+    VIEW_SPECIFIER_TYPE_ALL,
+};
+
+typedef struct ViewSpecifier {
+    enum ViewSpecifierType type;
+    unsigned               val;
+} ViewSpecifier;
+
 /* select an input stream for an output stream */
 typedef struct StreamMap {
     int disabled;           /* 1 is this mapping is disabled by a negative map */
     int file_index;
     int stream_index;
     char *linklabel;       /* name of an output link, for mapping lavfi outputs */
+
+    ViewSpecifier vs;
 } StreamMap;
 
 typedef struct OptionsContext {
@@ -318,6 +338,10 @@ typedef struct OutputFilterOptions {
     const AVRational         *frame_rates;
     const enum AVColorSpace  *color_spaces;
     const enum AVColorRange  *color_ranges;
+
+    // for simple filtergraphs only, view specifier passed
+    // along to the decoder
+    const ViewSpecifier *vs;
 } OutputFilterOptions;
 
 typedef struct InputFilter {
@@ -817,7 +841,21 @@ void dec_free(Decoder **pdec);
  *
  * @param opts filtergraph input options, to be filled by this function
  */
-int dec_filter_add(Decoder *dec, InputFilter *ifilter, InputFilterOptions *opts);
+int dec_filter_add(Decoder *dec, InputFilter *ifilter, InputFilterOptions *opts,
+                   const ViewSpecifier *vs, SchedulerNode *src);
+
+/*
+ * For multiview video, request output of the view(s) determined by vs.
+ * May be called multiple times.
+ *
+ * If this function is never called, only the base view is output. If it is
+ * called at least once, only the views requested are output.
+ *
+ * @param src scheduler node from which the frames corresponding vs
+ *            will originate
+ */
+int dec_request_view(Decoder *dec, const ViewSpecifier *vs,
+                     SchedulerNode *src);
 
 int enc_alloc(Encoder **penc, const AVCodec *codec,
               Scheduler *sch, unsigned sch_idx);
@@ -847,7 +885,8 @@ void ifile_close(InputFile **f);
 
 int ist_output_add(InputStream *ist, OutputStream *ost);
 int ist_filter_add(InputStream *ist, InputFilter *ifilter, int is_simple,
-                   InputFilterOptions *opts);
+                   const ViewSpecifier *vs, InputFilterOptions *opts,
+                   SchedulerNode *src);
 
 /**
  * Find an unused input stream of given type.
@@ -875,6 +914,8 @@ void opt_match_per_stream_int64(void *logctx, const SpecifierOptList *sol,
 void opt_match_per_stream_dbl(void *logctx, const SpecifierOptList *sol,
                               AVFormatContext *fc, AVStream *st, double *out);
 
+int view_specifier_parse(const char **pspec, ViewSpecifier *vs);
+
 int muxer_thread(void *arg);
 int encoder_thread(void *arg);
 
diff --git a/fftools/ffmpeg_dec.c b/fftools/ffmpeg_dec.c
index 54f7223f0f..2723a0312e 100644
--- a/fftools/ffmpeg_dec.c
+++ b/fftools/ffmpeg_dec.c
@@ -16,6 +16,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <stdbit.h>
+
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/dict.h"
@@ -25,6 +27,7 @@
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "libavutil/pixfmt.h"
+#include "libavutil/stereo3d.h"
 #include "libavutil/time.h"
 #include "libavutil/timestamp.h"
 
@@ -39,6 +42,7 @@ typedef struct DecoderPriv {
     AVCodecContext     *dec_ctx;
 
     AVFrame            *frame;
+    AVFrame            *frame_tmp_ref;
     AVPacket           *pkt;
 
     // override output video sample aspect ratio with this value
@@ -77,6 +81,23 @@ typedef struct DecoderPriv {
     char                log_name[32];
     char               *parent_name;
 
+    // user specified decoder multiview options manually
+    int                 multiview_user_config;
+
+    struct {
+        ViewSpecifier   vs;
+        unsigned        out_idx;
+    }                  *views_requested;
+    int              nb_views_requested;
+
+    /* A map of view ID to decoder outputs.
+     * MUST NOT be accessed outside of get_format()/get_buffer() */
+    struct {
+        unsigned        id;
+        uintptr_t       out_mask;
+    }                  *view_map;
+    int              nb_view_map;
+
     struct {
         AVDictionary       *opts;
         const AVCodec      *codec;
@@ -106,6 +127,7 @@ void dec_free(Decoder **pdec)
     avcodec_free_context(&dp->dec_ctx);
 
     av_frame_free(&dp->frame);
+    av_frame_free(&dp->frame_tmp_ref);
     av_packet_free(&dp->pkt);
 
     av_dict_free(&dp->standalone_init.opts);
@@ -116,6 +138,9 @@ void dec_free(Decoder **pdec)
 
     av_freep(&dp->parent_name);
 
+    av_freep(&dp->views_requested);
+    av_freep(&dp->view_map);
+
     av_freep(pdec);
 }
 
@@ -357,7 +382,8 @@ fail:
     return err;
 }
 
-static int video_frame_process(DecoderPriv *dp, AVFrame *frame)
+static int video_frame_process(DecoderPriv *dp, AVFrame *frame,
+                               unsigned *outputs_mask)
 {
 #if FFMPEG_OPT_TOP
     if (dp->flags & DECODER_FLAG_TOP_FIELD_FIRST) {
@@ -419,6 +445,9 @@ static int video_frame_process(DecoderPriv *dp, AVFrame *frame)
         }
     }
 
+    if (frame->opaque)
+        *outputs_mask = (uintptr_t)frame->opaque;
+
     return 0;
 }
 
@@ -715,6 +744,7 @@ static int packet_decode(DecoderPriv *dp, AVPacket *pkt, AVFrame *frame)
 
     while (1) {
         FrameData *fd;
+        unsigned outputs_mask = 1;
 
         av_frame_unref(frame);
 
@@ -763,7 +793,7 @@ static int packet_decode(DecoderPriv *dp, AVPacket *pkt, AVFrame *frame)
 
             audio_ts_process(dp, frame);
         } else {
-            ret = video_frame_process(dp, frame);
+            ret = video_frame_process(dp, frame, &outputs_mask);
             if (ret < 0) {
                 av_log(dp, AV_LOG_FATAL,
                        "Error while processing the decoded data\n");
@@ -773,10 +803,28 @@ static int packet_decode(DecoderPriv *dp, AVPacket *pkt, AVFrame *frame)
 
         dp->dec.frames_decoded++;
 
-        ret = sch_dec_send(dp->sch, dp->sch_idx, 0, frame);
-        if (ret < 0) {
-            av_frame_unref(frame);
-            return ret == AVERROR_EOF ? AVERROR_EXIT : ret;
+        for (int i = 0; i < stdc_count_ones(outputs_mask); i++) {
+            AVFrame *to_send = frame;
+            int pos;
+
+            av_assert0(outputs_mask);
+            pos = stdc_trailing_zeros(outputs_mask);
+            outputs_mask &= ~(1U << pos);
+
+            // this is not the last output and sch_dec_send() consumes the frame
+            // given to it, so make a temporary reference
+            if (outputs_mask) {
+                to_send = dp->frame_tmp_ref;
+                ret = av_frame_ref(to_send, frame);
+                if (ret < 0)
+                    return ret;
+            }
+
+            ret = sch_dec_send(dp->sch, dp->sch_idx, pos, to_send);
+            if (ret < 0) {
+                av_frame_unref(to_send);
+                return ret == AVERROR_EOF ? AVERROR_EXIT : ret;
+            }
         }
     }
 }
@@ -975,10 +1023,307 @@ finish:
     return ret;
 }
 
+int dec_request_view(Decoder *d, const ViewSpecifier *vs,
+                     SchedulerNode *src)
+{
+    DecoderPriv *dp = dp_from_dec(d);
+    unsigned out_idx = 0;
+    int ret;
+
+    if (dp->multiview_user_config) {
+        if (!vs || vs->type == VIEW_SPECIFIER_TYPE_NONE) {
+            *src = SCH_DEC_OUT(dp->sch_idx, 0);
+            return 0;
+        }
+
+        av_log(dp, AV_LOG_ERROR,
+               "Manually selecting views with -view_ids cannot be combined "
+               "with view selection via stream specifiers. It is strongly "
+               "recommended you always use stream specifiers only.\n");
+        return AVERROR(EINVAL);
+    }
+
+    // when multiview_user_config is not set, NONE specifier is treated
+    // as requesting the base view
+    vs = (vs && vs->type != VIEW_SPECIFIER_TYPE_NONE) ? vs :
+         &(ViewSpecifier){ .type = VIEW_SPECIFIER_TYPE_IDX, .val = 0 };
+
+    // check if the specifier matches an already-existing one
+    for (int i = 0; i < dp->nb_views_requested; i++) {
+        const ViewSpecifier *vs1 = &dp->views_requested[i].vs;
+
+        if (vs->type == vs1->type &&
+            (vs->type == VIEW_SPECIFIER_TYPE_ALL || vs->val == vs1->val)) {
+            *src = SCH_DEC_OUT(dp->sch_idx, dp->views_requested[i].out_idx);
+            return 0;
+        }
+    }
+
+    // we use a bitmask to map view IDs to decoder outputs, which
+    // limits the number of outputs allowed
+    if (dp->nb_views_requested >= sizeof(dp->view_map[0].out_mask) * 8) {
+        av_log(dp, AV_LOG_ERROR, "Too many view specifiers\n");
+        return AVERROR(ENOSYS);
+    }
+
+    ret = GROW_ARRAY(dp->views_requested, dp->nb_views_requested);
+    if (ret < 0)
+        return ret;
+
+    if (dp->nb_views_requested > 1) {
+        ret = sch_add_dec_output(dp->sch, dp->sch_idx);
+        if (ret < 0)
+            return ret;
+        out_idx = ret;
+    }
+
+    dp->views_requested[dp->nb_views_requested - 1].out_idx = out_idx;
+    dp->views_requested[dp->nb_views_requested - 1].vs      = *vs;
+
+    *src = SCH_DEC_OUT(dp->sch_idx,
+                       dp->views_requested[dp->nb_views_requested - 1].out_idx);
+
+    return 0;
+}
+
+static int multiview_setup(DecoderPriv *dp, AVCodecContext *dec_ctx)
+{
+    unsigned views_wanted = 0;
+
+    unsigned nb_view_ids_av, nb_view_ids;
+    unsigned *view_ids_av = NULL, *view_pos_av = NULL;
+    int      *view_ids    = NULL;
+    int ret;
+
+    // no views/only base view were requested - do nothing
+    if (!dp->nb_views_requested ||
+        (dp->nb_views_requested == 1                               &&
+         dp->views_requested[0].vs.type == VIEW_SPECIFIER_TYPE_IDX &&
+         dp->views_requested[0].vs.val  == 0))
+        return 0;
+
+    av_freep(&dp->view_map);
+    dp->nb_view_map = 0;
+
+    // retrieve views available in current CVS
+    ret = av_opt_get_array_size(dec_ctx, "view_ids_available",
+                                AV_OPT_SEARCH_CHILDREN, &nb_view_ids_av);
+    if (ret < 0) {
+        av_log(dp, AV_LOG_ERROR,
+               "Multiview decoding requested, but decoder '%s' does not "
+               "support it\n", dec_ctx->codec->name);
+        return AVERROR(ENOSYS);
+    }
+
+    if (nb_view_ids_av) {
+        unsigned nb_view_pos_av;
+
+        if (nb_view_ids_av >= sizeof(views_wanted) * 8) {
+            av_log(dp, AV_LOG_ERROR, "Too many views in video: %u\n", nb_view_ids_av);
+            ret = AVERROR(ENOSYS);
+            goto fail;
+        }
+
+        view_ids_av = av_calloc(nb_view_ids_av, sizeof(*view_ids_av));
+        if (!view_ids_av) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        ret = av_opt_get_array(dec_ctx, "view_ids_available",
+                               AV_OPT_SEARCH_CHILDREN, 0, nb_view_ids_av,
+                               AV_OPT_TYPE_UINT, view_ids_av);
+        if (ret < 0)
+            goto fail;
+
+        ret = av_opt_get_array_size(dec_ctx, "view_pos_available",
+                                    AV_OPT_SEARCH_CHILDREN, &nb_view_pos_av);
+        if (ret >= 0 && nb_view_pos_av == nb_view_ids_av) {
+            view_pos_av = av_calloc(nb_view_ids_av, sizeof(*view_pos_av));
+            if (!view_pos_av) {
+                ret = AVERROR(ENOMEM);
+                goto fail;
+            }
+
+            ret = av_opt_get_array(dec_ctx, "view_pos_available",
+                                   AV_OPT_SEARCH_CHILDREN, 0, nb_view_ids_av,
+                                   AV_OPT_TYPE_UINT, view_pos_av);
+            if (ret < 0)
+                goto fail;
+        }
+    } else {
+        // assume there is a single view with ID=0
+        nb_view_ids_av = 1;
+        view_ids_av = av_calloc(nb_view_ids_av, sizeof(*view_ids_av));
+        view_pos_av = av_calloc(nb_view_ids_av, sizeof(*view_pos_av));
+        if (!view_ids_av || !view_pos_av) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        view_pos_av[0] = AV_STEREO3D_VIEW_UNSPEC;
+    }
+
+    dp->view_map = av_calloc(nb_view_ids_av, sizeof(*dp->view_map));
+    if (!dp->view_map) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    dp->nb_view_map = nb_view_ids_av;
+
+    for (int i = 0; i < dp->nb_view_map; i++)
+        dp->view_map[i].id = view_ids_av[i];
+
+    // figure out which views should go to which output
+    for (int i = 0; i < dp->nb_views_requested; i++) {
+        const ViewSpecifier *vs = &dp->views_requested[i].vs;
+
+        switch (vs->type) {
+        case VIEW_SPECIFIER_TYPE_IDX:
+            if (vs->val >= nb_view_ids_av) {
+                av_log(dp, exit_on_error ? AV_LOG_ERROR : AV_LOG_WARNING,
+                       "View with index %u requested, but only %u views available "
+                       "in current video sequence (more views may or may not be "
+                       "available in later sequences).\n",
+                       vs->val, nb_view_ids_av);
+                if (exit_on_error) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                continue;
+            }
+            views_wanted                   |= 1U   << vs->val;
+            dp->view_map[vs->val].out_mask |= 1ULL << i;
+
+            break;
+        case VIEW_SPECIFIER_TYPE_ID: {
+            int view_idx = -1;
+
+            for (unsigned j = 0; j < nb_view_ids_av; j++) {
+                if (view_ids_av[j] == vs->val) {
+                    view_idx = j;
+                    break;
+                }
+            }
+            if (view_idx < 0) {
+                av_log(dp, exit_on_error ? AV_LOG_ERROR : AV_LOG_WARNING,
+                       "View with ID %u requested, but is not available "
+                       "in the video sequence\n", vs->val);
+                if (exit_on_error) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                continue;
+            }
+            views_wanted                    |= 1U   << view_idx;
+            dp->view_map[view_idx].out_mask |= 1ULL << i;
+
+            break;
+            }
+        case VIEW_SPECIFIER_TYPE_POS: {
+            int view_idx = -1;
+
+            for (unsigned j = 0; view_pos_av && j < nb_view_ids_av; j++) {
+                if (view_pos_av[j] == vs->val) {
+                    view_idx = j;
+                    break;
+                }
+            }
+            if (view_idx < 0) {
+                av_log(dp, exit_on_error ? AV_LOG_ERROR : AV_LOG_WARNING,
+                       "View position '%s' requested, but is not available "
+                       "in the video sequence\n", av_stereo3d_view_name(vs->val));
+                if (exit_on_error) {
+                    ret = AVERROR(EINVAL);
+                    goto fail;
+                }
+
+                continue;
+            }
+            views_wanted                    |= 1U   << view_idx;
+            dp->view_map[view_idx].out_mask |= 1ULL << i;
+
+            break;
+            }
+        case VIEW_SPECIFIER_TYPE_ALL:
+            views_wanted |= (1U << nb_view_ids_av) - 1;
+
+            for (int j = 0; j < dp->nb_view_map; j++)
+                dp->view_map[j].out_mask |= 1ULL << i;
+
+            break;
+        }
+    }
+    if (!views_wanted) {
+        av_log(dp, AV_LOG_ERROR, "No views were selected for decoding\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    // signal to decoder which views we want
+    nb_view_ids = stdc_count_ones(views_wanted);
+    view_ids = av_malloc_array(nb_view_ids, sizeof(*view_ids));
+    if (!view_ids) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (unsigned i = 0; i < nb_view_ids; i++) {
+        int pos;
+
+        av_assert0(views_wanted);
+        pos = stdc_trailing_zeros(views_wanted);
+        views_wanted &= ~(1U << pos);
+
+        view_ids[i] = view_ids_av[pos];
+    }
+
+    // unset view_ids in case we set it earlier
+    av_opt_set(dec_ctx, "view_ids", NULL, AV_OPT_SEARCH_CHILDREN);
+
+    ret = av_opt_set_array(dec_ctx, "view_ids", AV_OPT_SEARCH_CHILDREN,
+                           0, nb_view_ids, AV_OPT_TYPE_INT, view_ids);
+    if (ret < 0)
+        goto fail;
+
+    if (!dp->frame_tmp_ref) {
+        dp->frame_tmp_ref = av_frame_alloc();
+        if (!dp->frame_tmp_ref) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
+fail:
+    av_freep(&view_ids_av);
+    av_freep(&view_pos_av);
+    av_freep(&view_ids);
+
+    return ret;
+}
+
+static void multiview_check_manual(DecoderPriv *dp, const AVDictionary *dec_opts)
+{
+    if (av_dict_get(dec_opts, "view_ids", NULL, 0)) {
+        av_log(dp, AV_LOG_WARNING, "Manually selecting views with -view_ids "
+               "is not recommended, use view specifiers instead\n");
+        dp->multiview_user_config = 1;
+    }
+}
+
 static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat *pix_fmts)
 {
     DecoderPriv  *dp = s->opaque;
     const enum AVPixelFormat *p;
+    int ret;
+
+    ret = multiview_setup(dp, s);
+    if (ret < 0) {
+        av_log(dp, AV_LOG_ERROR, "Error setting up multiview decoding: %s\n",
+               av_err2str(ret));
+        return AV_PIX_FMT_NONE;
+    }
 
     for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(*p);
@@ -1009,6 +1354,26 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
     return *p;
 }
 
+static int get_buffer(AVCodecContext *dec_ctx, AVFrame *frame, int flags)
+{
+    DecoderPriv *dp = dec_ctx->opaque;
+
+    // for multiview video, store the output mask in frame opaque
+    if (dp->nb_view_map) {
+        const AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_VIEW_ID);
+        int view_id = sd ? *(int*)sd->data : 0;
+
+        for (int i = 0; i < dp->nb_view_map; i++) {
+            if (dp->view_map[i].id == view_id) {
+                frame->opaque = (void*)dp->view_map[i].out_mask;
+                break;
+            }
+        }
+    }
+
+    return avcodec_default_get_buffer2(dec_ctx, frame, flags);
+}
+
 static HWDevice *hw_device_match_by_codec(const AVCodec *codec)
 {
     const AVCodecHWConfig *config;
@@ -1202,6 +1567,7 @@ static int dec_open(DecoderPriv *dp, AVDictionary **dec_opts,
 
     dp->dec_ctx->opaque                = dp;
     dp->dec_ctx->get_format            = get_format;
+    dp->dec_ctx->get_buffer2           = get_buffer;
     dp->dec_ctx->pkt_timebase          = o->time_base;
 
     if (!av_dict_get(*dec_opts, "threads", NULL, 0))
@@ -1291,6 +1657,8 @@ int dec_init(Decoder **pdec, Scheduler *sch,
     if (ret < 0)
         return ret;
 
+    multiview_check_manual(dp, *dec_opts);
+
     ret = dec_open(dp, dec_opts, o, param_out);
     if (ret < 0)
         goto fail;
@@ -1363,6 +1731,8 @@ int dec_create(const OptionsContext *o, const char *arg, Scheduler *sch)
     if (ret < 0)
         return ret;
 
+    multiview_check_manual(dp, dp->standalone_init.opts);
+
     if (o->codec_names.nb_opt) {
         const char *name = o->codec_names.opt[o->codec_names.nb_opt - 1].u.str;
         dp->standalone_init.codec = avcodec_find_decoder_by_name(name);
@@ -1375,7 +1745,8 @@ int dec_create(const OptionsContext *o, const char *arg, Scheduler *sch)
     return 0;
 }
 
-int dec_filter_add(Decoder *d, InputFilter *ifilter, InputFilterOptions *opts)
+int dec_filter_add(Decoder *d, InputFilter *ifilter, InputFilterOptions *opts,
+                   const ViewSpecifier *vs, SchedulerNode *src)
 {
     DecoderPriv *dp = dp_from_dec(d);
     char name[16];
@@ -1385,5 +1756,5 @@ int dec_filter_add(Decoder *d, InputFilter *ifilter, InputFilterOptions *opts)
     if (!opts->name)
         return AVERROR(ENOMEM);
 
-    return dp->sch_idx;
+    return dec_request_view(d, vs, src);
 }
diff --git a/fftools/ffmpeg_demux.c b/fftools/ffmpeg_demux.c
index 0104c75c8b..13aef15eab 100644
--- a/fftools/ffmpeg_demux.c
+++ b/fftools/ffmpeg_demux.c
@@ -874,7 +874,8 @@ void ifile_close(InputFile **pf)
     av_freep(pf);
 }
 
-static int ist_use(InputStream *ist, int decoding_needed)
+static int ist_use(InputStream *ist, int decoding_needed,
+                   const ViewSpecifier *vs, SchedulerNode *src)
 {
     Demuxer      *d = demuxer_from_ifile(ist->file);
     DemuxStream *ds = ds_from_ist(ist);
@@ -961,15 +962,26 @@ static int ist_use(InputStream *ist, int decoding_needed)
         d->have_audio_dec |= is_audio;
     }
 
+    if (decoding_needed && ist->par->codec_type == AVMEDIA_TYPE_VIDEO) {
+        ret = dec_request_view(ist->decoder, vs, src);
+        if (ret < 0)
+            return ret;
+    } else {
+        *src = decoding_needed                             ?
+               SCH_DEC_OUT(ds->sch_idx_dec, 0)             :
+               SCH_DSTREAM(d->f.index, ds->sch_idx_stream);
+    }
+
     return 0;
 }
 
 int ist_output_add(InputStream *ist, OutputStream *ost)
 {
     DemuxStream *ds = ds_from_ist(ist);
+    SchedulerNode src;
     int ret;
 
-    ret = ist_use(ist, ost->enc ? DECODING_FOR_OST : 0);
+    ret = ist_use(ist, ost->enc ? DECODING_FOR_OST : 0, NULL, &src);
     if (ret < 0)
         return ret;
 
@@ -983,14 +995,16 @@ int ist_output_add(InputStream *ist, OutputStream *ost)
 }
 
 int ist_filter_add(InputStream *ist, InputFilter *ifilter, int is_simple,
-                   InputFilterOptions *opts)
+                   const ViewSpecifier *vs, InputFilterOptions *opts,
+                   SchedulerNode *src)
 {
     Demuxer      *d = demuxer_from_ifile(ist->file);
     DemuxStream *ds = ds_from_ist(ist);
     int64_t tsoffset = 0;
     int ret;
 
-    ret = ist_use(ist, is_simple ? DECODING_FOR_OST : DECODING_FOR_FILTER);
+    ret = ist_use(ist, is_simple ? DECODING_FOR_OST : DECODING_FOR_FILTER,
+                  vs, src);
     if (ret < 0)
         return ret;
 
@@ -1074,7 +1088,7 @@ int ist_filter_add(InputStream *ist, InputFilter *ifilter, int is_simple,
     opts->flags |= IFILTER_FLAG_AUTOROTATE * !!(ds->autorotate) |
                    IFILTER_FLAG_REINIT     * !!(ds->reinit_filters);
 
-    return ds->sch_idx_dec;
+    return 0;
 }
 
 static int choose_decoder(const OptionsContext *o, void *logctx,
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index c620f34dc3..7ec328e04e 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -659,11 +659,13 @@ static OutputFilter *ofilter_alloc(FilterGraph *fg, enum AVMediaType type)
     return ofilter;
 }
 
-static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist)
+static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist,
+                            const ViewSpecifier *vs)
 {
     InputFilterPriv *ifp = ifp_from_ifilter(ifilter);
     FilterGraphPriv *fgp = fgp_from_fg(ifilter->graph);
-    int ret, dec_idx;
+    SchedulerNode src;
+    int ret;
 
     av_assert0(!ifp->bound);
     ifp->bound = 1;
@@ -681,13 +683,13 @@ static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist)
     if (!ifp->opts.fallback)
         return AVERROR(ENOMEM);
 
-    dec_idx = ist_filter_add(ist, ifilter, filtergraph_is_simple(ifilter->graph),
-                             &ifp->opts);
-    if (dec_idx < 0)
-        return dec_idx;
+    ret = ist_filter_add(ist, ifilter, filtergraph_is_simple(ifilter->graph),
+                         vs, &ifp->opts, &src);
+    if (ret < 0)
+        return ret;
 
-    ret = sch_connect(fgp->sch, SCH_DEC_OUT(dec_idx, 0),
-                                SCH_FILTER_IN(fgp->sch_idx, ifp->index));
+    ret = sch_connect(fgp->sch,
+                      src, SCH_FILTER_IN(fgp->sch_idx, ifp->index));
     if (ret < 0)
         return ret;
 
@@ -712,10 +714,12 @@ static int ifilter_bind_ist(InputFilter *ifilter, InputStream *ist)
     return 0;
 }
 
-static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec)
+static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec,
+                            const ViewSpecifier *vs)
 {
     FilterGraphPriv *fgp = fgp_from_fg(ifp->ifilter.graph);
-    int ret, dec_idx;
+    SchedulerNode src;
+    int ret;
 
     av_assert0(!ifp->bound);
     ifp->bound = 1;
@@ -728,12 +732,11 @@ static int ifilter_bind_dec(InputFilterPriv *ifp, Decoder *dec)
 
     ifp->type_src = ifp->type;
 
-    dec_idx = dec_filter_add(dec, &ifp->ifilter, &ifp->opts);
-    if (dec_idx < 0)
-        return dec_idx;
+    ret = dec_filter_add(dec, &ifp->ifilter, &ifp->opts, vs, &src);
+    if (ret < 0)
+        return ret;
 
-    ret = sch_connect(fgp->sch, SCH_DEC_OUT(dec_idx, 0),
-                                SCH_FILTER_IN(fgp->sch_idx, ifp->index));
+    ret = sch_connect(fgp->sch, src, SCH_FILTER_IN(fgp->sch_idx, ifp->index));
     if (ret < 0)
         return ret;
 
@@ -1216,7 +1219,7 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost,
 
     ost->filter = fg->outputs[0];
 
-    ret = ifilter_bind_ist(fg->inputs[0], ist);
+    ret = ifilter_bind_ist(fg->inputs[0], ist, opts->vs);
     if (ret < 0)
         return ret;
 
@@ -1240,28 +1243,38 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
     InputFilterPriv *ifp = ifp_from_ifilter(ifilter);
     InputStream *ist = NULL;
     enum AVMediaType type = ifp->type;
+    ViewSpecifier vs = { .type = VIEW_SPECIFIER_TYPE_NONE };
+    const char *spec;
+    char *p;
     int i, ret;
 
     if (ifp->linklabel && !strncmp(ifp->linklabel, "dec:", 4)) {
         // bind to a standalone decoder
         int dec_idx;
 
-        dec_idx = strtol(ifp->linklabel + 4, NULL, 0);
+        dec_idx = strtol(ifp->linklabel + 4, &p, 0);
         if (dec_idx < 0 || dec_idx >= nb_decoders) {
             av_log(fg, AV_LOG_ERROR, "Invalid decoder index %d in filtergraph description %s\n",
                    dec_idx, fgp->graph_desc);
             return AVERROR(EINVAL);
         }
 
-        ret = ifilter_bind_dec(ifp, decoders[dec_idx]);
+        if (type == AVMEDIA_TYPE_VIDEO) {
+            spec = *p == ':' ? p + 1 : p;
+            ret = view_specifier_parse(&spec, &vs);
+            if (ret < 0)
+                return ret;
+        }
+
+        ret = ifilter_bind_dec(ifp, decoders[dec_idx], &vs);
         if (ret < 0)
             av_log(fg, AV_LOG_ERROR, "Error binding a decoder to filtergraph input %s\n",
                    ifilter->name);
         return ret;
     } else if (ifp->linklabel) {
+        StreamSpecifier ss;
         AVFormatContext *s;
         AVStream       *st = NULL;
-        char *p;
         int file_idx;
 
         // try finding an unbound filtergraph output with this label
@@ -1298,17 +1311,33 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
         }
         s = input_files[file_idx]->ctx;
 
+        ret = stream_specifier_parse(&ss, *p == ':' ? p + 1 : p, 1, fg);
+        if (ret < 0) {
+            av_log(fg, AV_LOG_ERROR, "Invalid stream specifier: %s\n", p);
+            return ret;
+        }
+
+        if (type == AVMEDIA_TYPE_VIDEO) {
+            spec = ss.remainder ? ss.remainder : "";
+            ret = view_specifier_parse(&spec, &vs);
+            if (ret < 0) {
+                stream_specifier_uninit(&ss);
+                return ret;
+            }
+        }
+
         for (i = 0; i < s->nb_streams; i++) {
             enum AVMediaType stream_type = s->streams[i]->codecpar->codec_type;
             if (stream_type != type &&
                 !(stream_type == AVMEDIA_TYPE_SUBTITLE &&
                   type == AVMEDIA_TYPE_VIDEO /* sub2video hack */))
                 continue;
-            if (check_stream_specifier(s, s->streams[i], *p == ':' ? p + 1 : p) == 1) {
+            if (stream_specifier_match(&ss, s, s->streams[i], fg)) {
                 st = s->streams[i];
                 break;
             }
         }
+        stream_specifier_uninit(&ss);
         if (!st) {
             av_log(fg, AV_LOG_FATAL, "Stream specifier '%s' in filtergraph description %s "
                    "matches no streams.\n", p, fgp->graph_desc);
@@ -1333,7 +1362,7 @@ static int fg_complex_bind_input(FilterGraph *fg, InputFilter *ifilter)
     }
     av_assert0(ist);
 
-    ret = ifilter_bind_ist(ifilter, ist);
+    ret = ifilter_bind_ist(ifilter, ist, &vs);
     if (ret < 0) {
         av_log(fg, AV_LOG_ERROR,
                "Error binding an input stream to complex filtergraph input %s.\n",
diff --git a/fftools/ffmpeg_mux_init.c b/fftools/ffmpeg_mux_init.c
index db79929aaf..8afb018de8 100644
--- a/fftools/ffmpeg_mux_init.c
+++ b/fftools/ffmpeg_mux_init.c
@@ -918,7 +918,8 @@ static int
 ost_bind_filter(const Muxer *mux, MuxStream *ms, OutputFilter *ofilter,
                 const OptionsContext *o, char *filters,
                 AVRational enc_tb, enum VideoSyncMethod vsync_method,
-                int keep_pix_fmt, int autoscale, int threads_manual)
+                int keep_pix_fmt, int autoscale, int threads_manual,
+                const ViewSpecifier *vs)
 {
     OutputStream       *ost = &ms->ost;
     AVCodecContext *enc_ctx = ost->enc_ctx;
@@ -944,6 +945,7 @@ ost_bind_filter(const Muxer *mux, MuxStream *ms, OutputFilter *ofilter,
         .trim_duration_us = mux->of.recording_time,
         .ts_offset        = mux->of.start_time == AV_NOPTS_VALUE ?
                             0 : mux->of.start_time,
+        .vs               = vs,
 
         .flags = OFILTER_FLAG_DISABLE_CONVERT * !!keep_pix_fmt |
                  OFILTER_FLAG_AUTOSCALE       * !!autoscale    |
@@ -1138,7 +1140,7 @@ fail:
 }
 
 static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
-                   InputStream *ist, OutputFilter *ofilter,
+                   InputStream *ist, OutputFilter *ofilter, const ViewSpecifier *vs,
                    OutputStream **post)
 {
     AVFormatContext *oc = mux->fc;
@@ -1497,7 +1499,7 @@ static int ost_add(Muxer *mux, const OptionsContext *o, enum AVMediaType type,
     if (ost->enc &&
         (type == AVMEDIA_TYPE_VIDEO || type == AVMEDIA_TYPE_AUDIO)) {
         ret = ost_bind_filter(mux, ms, ofilter, o, filters, enc_tb, vsync_method,
-                              keep_pix_fmt, autoscale, threads_manual);
+                              keep_pix_fmt, autoscale, threads_manual, vs);
         if (ret < 0)
             goto fail;
     } else if (ost->ist) {
@@ -1599,7 +1601,7 @@ static int map_auto_video(Muxer *mux, const OptionsContext *o)
        }
     }
     if (best_ist)
-        return ost_add(mux, o, AVMEDIA_TYPE_VIDEO, best_ist, NULL, NULL);
+        return ost_add(mux, o, AVMEDIA_TYPE_VIDEO, best_ist, NULL, NULL, NULL);
 
     return 0;
 }
@@ -1643,7 +1645,7 @@ static int map_auto_audio(Muxer *mux, const OptionsContext *o)
        }
     }
     if (best_ist)
-        return ost_add(mux, o, AVMEDIA_TYPE_AUDIO, best_ist, NULL, NULL);
+        return ost_add(mux, o, AVMEDIA_TYPE_AUDIO, best_ist, NULL, NULL, NULL);
 
     return 0;
 }
@@ -1680,7 +1682,7 @@ static int map_auto_subtitle(Muxer *mux, const OptionsContext *o)
                 input_descriptor && output_descriptor &&
                 (!input_descriptor->props ||
                  !output_descriptor->props)) {
-                return ost_add(mux, o, AVMEDIA_TYPE_SUBTITLE, ist, NULL, NULL);
+                return ost_add(mux, o, AVMEDIA_TYPE_SUBTITLE, ist, NULL, NULL, NULL);
             }
         }
 
@@ -1701,7 +1703,7 @@ static int map_auto_data(Muxer *mux, const OptionsContext *o)
             continue;
         if (ist->st->codecpar->codec_type == AVMEDIA_TYPE_DATA &&
             ist->st->codecpar->codec_id == codec_id) {
-            int ret = ost_add(mux, o, AVMEDIA_TYPE_DATA, ist, NULL, NULL);
+            int ret = ost_add(mux, o, AVMEDIA_TYPE_DATA, ist, NULL, NULL, NULL);
             if (ret < 0)
                 return ret;
         }
@@ -1743,10 +1745,13 @@ loop_end:
         av_log(mux, AV_LOG_VERBOSE, "Creating output stream from an explicitly "
                "mapped complex filtergraph %d, output [%s]\n", fg->index, map->linklabel);
 
-        ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL);
+        ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL, NULL);
         if (ret < 0)
             return ret;
     } else {
+        const ViewSpecifier *vs = map->vs.type == VIEW_SPECIFIER_TYPE_NONE ?
+                                  NULL : &map->vs;
+
         ist = input_files[map->file_index]->streams[map->stream_index];
         if (ist->user_set_discard == AVDISCARD_ALL) {
             av_log(mux, AV_LOG_FATAL, "Stream #%d:%d is disabled and cannot be mapped.\n",
@@ -1777,7 +1782,14 @@ loop_end:
             return 0;
         }
 
-        ret = ost_add(mux, o, ist->st->codecpar->codec_type, ist, NULL, NULL);
+        if (vs && ist->st->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
+            av_log(mux, AV_LOG_ERROR,
+                   "View specifier given for mapping a %s input stream\n",
+                   av_get_media_type_string(ist->st->codecpar->codec_type));
+            return AVERROR(EINVAL);
+        }
+
+        ret = ost_add(mux, o, ist->st->codecpar->codec_type, ist, NULL, vs, NULL);
         if (ret < 0)
             return ret;
     }
@@ -1847,7 +1859,7 @@ read_fail:
             return AVERROR(ENOMEM);
         }
 
-        err = ost_add(mux, o, AVMEDIA_TYPE_ATTACHMENT, NULL, NULL, &ost);
+        err = ost_add(mux, o, AVMEDIA_TYPE_ATTACHMENT, NULL, NULL, NULL, &ost);
         if (err < 0) {
             av_free(attachment_filename);
             av_freep(&attachment);
@@ -1902,7 +1914,7 @@ static int create_streams(Muxer *mux, const OptionsContext *o)
                        av_get_media_type_string(ofilter->type));
             av_log(mux, AV_LOG_VERBOSE, "\n");
 
-            ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL);
+            ret = ost_add(mux, o, ofilter->type, NULL, ofilter, NULL, NULL);
             if (ret < 0)
                 return ret;
         }
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 1aa187f706..f639a1cf0a 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -46,6 +46,7 @@
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/stereo3d.h"
 
 HWDevice *filter_hw_device;
 
@@ -228,6 +229,59 @@ OPT_MATCH_PER_STREAM(int,   int,          OPT_TYPE_INT,    i);
 OPT_MATCH_PER_STREAM(int64, int64_t,      OPT_TYPE_INT64,  i64);
 OPT_MATCH_PER_STREAM(dbl,   double,       OPT_TYPE_DOUBLE, dbl);
 
+int view_specifier_parse(const char **pspec, ViewSpecifier *vs)
+{
+    const char *spec = *pspec;
+    char *endptr;
+
+    vs->type = VIEW_SPECIFIER_TYPE_NONE;
+
+    if (!strncmp(spec, "view:", 5)) {
+        spec += 5;
+
+        if (!strncmp(spec, "all", 3)) {
+            spec += 3;
+            vs->type = VIEW_SPECIFIER_TYPE_ALL;
+        } else {
+            vs->type = VIEW_SPECIFIER_TYPE_ID;
+            vs->val  = strtoul(spec, &endptr, 0);
+            if (endptr == spec) {
+                av_log(NULL, AV_LOG_ERROR, "Invalid view ID: %s\n", spec);
+                return AVERROR(EINVAL);
+            }
+            spec = endptr;
+        }
+    } else if (!strncmp(spec, "vidx:", 5)) {
+        spec += 5;
+        vs->type = VIEW_SPECIFIER_TYPE_IDX;
+        vs->val  = strtoul(spec, &endptr, 0);
+        if (endptr == spec) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid view index: %s\n", spec);
+            return AVERROR(EINVAL);
+        }
+        spec = endptr;
+    } else if (!strncmp(spec, "vpos:", 5)) {
+        spec += 5;
+        vs->type = VIEW_SPECIFIER_TYPE_POS;
+
+        if (!strncmp(spec, "left", 4) && !cmdutils_isalnum(spec[4])) {
+            spec += 4;
+            vs->val = AV_STEREO3D_VIEW_LEFT;
+        } else if (!strncmp(spec, "right", 5) && !cmdutils_isalnum(spec[5])) {
+            spec += 5;
+            vs->val = AV_STEREO3D_VIEW_RIGHT;
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Invalid view position: %s\n", spec);
+            return AVERROR(EINVAL);
+        }
+    } else
+        return 0;
+
+    *pspec = spec;
+
+    return 0;
+}
+
 int parse_and_set_vsync(const char *arg, int *vsync_var, int file_idx, int st_idx, int is_global)
 {
     if      (!av_strcasecmp(arg, "cfr"))         *vsync_var = VSYNC_CFR;
@@ -452,6 +506,7 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
             goto fail;
         }
     } else {
+        ViewSpecifier vs;
         char *endptr;
 
         file_idx = strtol(arg, &endptr, 0);
@@ -468,12 +523,18 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
             goto fail;
         }
 
-        if (ss.remainder) {
-            if (!strcmp(ss.remainder, "?"))
+        arg = ss.remainder ? ss.remainder : "";
+
+        ret = view_specifier_parse(&arg, &vs);
+        if (ret < 0)
+            goto fail;
+
+        if (*arg) {
+            if (!strcmp(arg, "?"))
                 allow_unused = 1;
             else {
-                av_log(NULL, AV_LOG_ERROR, "Trailing garbage after stream specifier: %s\n",
-                       ss.remainder);
+                av_log(NULL, AV_LOG_ERROR,
+                       "Trailing garbage after stream specifier: %s\n", arg);
                 ret = AVERROR(EINVAL);
                 goto fail;
             }
@@ -509,6 +570,7 @@ static int opt_map(void *optctx, const char *opt, const char *arg)
 
                 m->file_index   = file_idx;
                 m->stream_index = i;
+                m->vs           = vs;
             }
     }
author	Anton Khirnov <anton@khirnov.net>	2024-08-10 18:36:49 +0200
committer	Anton Khirnov <anton@khirnov.net>	2024-09-23 17:15:02 +0200
commit	da420ac6e1b7d710c9e5ec97531a94ae1596de3c (patch)
tree	0ae241b07ac970106ae5a2101902cac71d9a84c8
parent	68c198fae2851bc68bb3e600424753d31a9be2ce (diff)
download	ffmpeg-da420ac6e1b7d710c9e5ec97531a94ae1596de3c.tar.gz