libavfilter/vf_dnn_detect: Add input pad

Add input pad to get model input resolution. Detection models always have fixed input size. And the output coordinators are based on the input resolution, so we need to get input size to map coordinators to our real output frames. Signed-off-by: Wenbin Chen <[email protected]> Reviewed-by: Guo Yejun <[email protected]>
author: Wenbin Chen <[email protected]> 2023-12-12 10:33:32 +0800
committer: Guo Yejun <[email protected]> 2023-12-16 21:50:37 +0800
commit: da02836b9d204ca002d973ef7b1e6f60a2316cb1 (patch)
tree: 06d6ef515f417ecd0c9c7527b64fc4a01776111a
parent: 22652b576c2a0670d341648c68ca469ebe08f1a1 (diff)
2 files changed, 45 insertions, 7 deletions
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 089e028818..671a995c70 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -1073,9 +1073,15 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name)
         return AVERROR(ENOSYS);
     }
 
-    input->channels = dims[1];
-    input->height   = input_resizable ? -1 : dims[2];
-    input->width    = input_resizable ? -1 : dims[3];
+    if (dims[1] <= 3) { // NCHW
+        input->channels = dims[1];
+        input->height   = input_resizable ? -1 : dims[2];
+        input->width    = input_resizable ? -1 : dims[3];
+    } else { // NHWC
+        input->height   = input_resizable ? -1 : dims[1];
+        input->width    = input_resizable ? -1 : dims[2];
+        input->channels = dims[3];
+    }
     input->dt       = precision_to_datatype(precision);
 
     return 0;
@@ -1105,9 +1111,15 @@ static int get_input_ov(void *model, DNNData *input, const char *input_name)
                 return DNN_GENERIC_ERROR;
             }
 
-            input->channels = dims.dims[1];
-            input->height   = input_resizable ? -1 : dims.dims[2];
-            input->width    = input_resizable ? -1 : dims.dims[3];
+            if (dims[1] <= 3) { // NCHW
+                input->channels = dims[1];
+                input->height   = input_resizable ? -1 : dims[2];
+                input->width    = input_resizable ? -1 : dims[3];
+            } else { // NHWC
+                input->height   = input_resizable ? -1 : dims[1];
+                input->width    = input_resizable ? -1 : dims[2];
+                input->channels = dims[3];
+            }
             input->dt       = precision_to_datatype(precision);
             return 0;
         }
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
index 5862100b86..35c0508c50 100644
--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -705,13 +705,39 @@ static av_cold void dnn_detect_uninit(AVFilterContext *context)
     free_detect_labels(ctx);
 }
 
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *context     = inlink->dst;
+    DnnDetectContext *ctx = context->priv;
+    DNNData model_input;
+    int ret;
+
+    ret = ff_dnn_get_input(&ctx->dnnctx, &model_input);
+    if (ret != 0) {
+        av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
+        return ret;
+    }
+    ctx->scale_width = model_input.width == -1 ? inlink->w : model_input.width;
+    ctx->scale_height = model_input.height ==  -1 ? inlink->h : model_input.height;
+
+    return 0;
+}
+
+static const AVFilterPad dnn_detect_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+};
+
 const AVFilter ff_vf_dnn_detect = {
     .name          = "dnn_detect",
     .description   = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
     .priv_size     = sizeof(DnnDetectContext),
     .init          = dnn_detect_init,
     .uninit        = dnn_detect_uninit,
-    FILTER_INPUTS(ff_video_default_filterpad),
+    FILTER_INPUTS(dnn_detect_inputs),
     FILTER_OUTPUTS(ff_video_default_filterpad),
     FILTER_PIXFMTS_ARRAY(pix_fmts),
     .priv_class    = &dnn_detect_class,
author	Wenbin Chen <[email protected]>	2023-12-12 10:33:32 +0800
committer	Guo Yejun <[email protected]>	2023-12-16 21:50:37 +0800
commit	da02836b9d204ca002d973ef7b1e6f60a2316cb1 (patch)
tree	06d6ef515f417ecd0c9c7527b64fc4a01776111a
parent	22652b576c2a0670d341648c68ca469ebe08f1a1 (diff)