diff options
author | Zhao Zhili <zhilizhao@tencent.com> | 2024-05-08 00:08:08 +0800 |
---|---|---|
committer | Guo Yejun <yejun.guo@intel.com> | 2024-05-18 19:44:50 +0800 |
commit | 8c21f1e3b71ab6f82c8127a3a0b0b10d13c91caf (patch) | |
tree | 5eb51cde793766a43ef6ddb1eec70e1531d63067 /libavfilter | |
parent | 115c96b9bd53e775f425f23d5b73fa0a9dedbd08 (diff) | |
download | ffmpeg-8c21f1e3b71ab6f82c8127a3a0b0b10d13c91caf.tar.gz |
avfilter/dnn: Refactor DNN parameter configuration system
This patch trying to resolve mulitiple issues related to parameter
configuration:
Firstly, each DNN filters duplicate DNN_COMMON_OPTIONS, which should
be the common options of backend.
Secondly, backend options are hidden behind the scene. It's a
AV_OPT_TYPE_STRING backend_configs for user, and parsed by each
backend. We don't know each backend support what kind of options
from the help message.
Third, DNN backends duplicate DNN_BACKEND_COMMON_OPTIONS.
Last but not the least, pass backend options via AV_OPT_TYPE_STRING
makes it hard to pass AV_OPT_TYPE_BINARY to backend, if not impossible.
This patch puts backend common options and each backend options inside
DnnContext to reduce code duplication, make options user friendly, and
easy to extend for future usecase.
For example,
./ffmpeg -h filter=dnn_processing
dnn_processing AVOptions:
dnn_backend <int> ..FV....... DNN backend (from INT_MIN to INT_MAX) (default tensorflow)
tensorflow 1 ..FV....... tensorflow backend flag
openvino 2 ..FV....... openvino backend flag
torch 3 ..FV....... torch backend flag
dnn_base AVOptions:
model <string> ..F........ path to model file
input <string> ..F........ input name of the model
output <string> ..F........ output name of the model
backend_configs <string> ..F.......P backend configs (deprecated)
options <string> ..F.......P backend configs (deprecated)
nireq <int> ..F........ number of request (from 0 to INT_MAX) (default 0)
async <boolean> ..F........ use DNN async inference (default true)
device <string> ..F........ device to run model
dnn_tensorflow AVOptions:
sess_config <string> ..F........ config for SessionOptions
dnn_openvino AVOptions:
batch_size <int> ..F........ batch size per request (from 1 to 1000) (default 1)
input_resizable <boolean> ..F........ can input be resizable or not (default false)
layout <int> ..F........ input layout of model (from 0 to 2) (default none)
none 0 ..F........ none
nchw 1 ..F........ nchw
nhwc 2 ..F........ nhwc
scale <float> ..F........ Add scale preprocess operation. Divide each element of input by specified value. (from INT_MIN to INT_MAX) (default 0)
mean <float> ..F........ Add mean preprocess operation. Subtract specified value from each element of input. (from INT_MIN to INT_MAX) (default 0)
dnn_th AVOptions:
optimize <int> ..F........ turn on graph executor optimization (from 0 to 1) (default 0)
Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
Reviewed-by: Wenbin Chen <wenbin.chen@intel.com>
Reviewed-by: Guo Yejun <yejun.guo@intel.com>
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/dnn/dnn_backend_common.h | 13 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_openvino.c | 146 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_tf.c | 82 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_backend_torch.cpp | 67 | ||||
-rw-r--r-- | libavfilter/dnn/dnn_interface.c | 90 | ||||
-rw-r--r-- | libavfilter/dnn_filter_common.c | 38 | ||||
-rw-r--r-- | libavfilter/dnn_filter_common.h | 39 | ||||
-rw-r--r-- | libavfilter/dnn_interface.h | 67 | ||||
-rw-r--r-- | libavfilter/vf_derain.c | 6 | ||||
-rw-r--r-- | libavfilter/vf_dnn_classify.c | 4 | ||||
-rw-r--r-- | libavfilter/vf_dnn_detect.c | 4 | ||||
-rw-r--r-- | libavfilter/vf_dnn_processing.c | 4 | ||||
-rw-r--r-- | libavfilter/vf_sr.c | 6 |
13 files changed, 337 insertions, 229 deletions
diff --git a/libavfilter/dnn/dnn_backend_common.h b/libavfilter/dnn/dnn_backend_common.h index 42c67c7040..9f5d37b3e0 100644 --- a/libavfilter/dnn/dnn_backend_common.h +++ b/libavfilter/dnn/dnn_backend_common.h @@ -28,9 +28,16 @@ #include "../dnn_interface.h" #include "libavutil/thread.h" -#define DNN_BACKEND_COMMON_OPTIONS \ - { "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \ - { "async", "use DNN async inference", OFFSET(options.async), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, FLAGS }, +#define DNN_DEFINE_CLASS_EXT(name, desc, options) \ + { \ + .class_name = desc, \ + .item_name = av_default_item_name, \ + .option = options, \ + .version = LIBAVUTIL_VERSION_INT, \ + .category = AV_CLASS_CATEGORY_FILTER, \ + } +#define DNN_DEFINE_CLASS(fname) \ + DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options) // one task for one function call from dnn interface typedef struct TaskItem { diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 374f21b7a1..c4b0682f11 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -40,24 +40,8 @@ #endif #include "dnn_backend_common.h" -typedef struct OVOptions{ - char *device_type; - int nireq; - uint8_t async; - int batch_size; - int input_resizable; - DNNLayout layout; - float scale; - float mean; -} OVOptions; - -typedef struct OVContext { - const AVClass *class; - OVOptions options; -} OVContext; - typedef struct OVModel{ - OVContext ctx; + DnnContext *ctx; DNNModel *model; #if HAVE_OPENVINO2 ov_core_t *core; @@ -98,24 +82,20 @@ typedef struct OVRequestItem { generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \ av_asprintf("%s", iterate_string); -#define OFFSET(x) offsetof(OVContext, x) +#define OFFSET(x) offsetof(OVOptions, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM static const AVOption dnn_openvino_options[] = { - { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, - DNN_BACKEND_COMMON_OPTIONS - { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS}, - { "input_resizable", "can input be resizable or not", OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, - { "layout", "input layout of model", OFFSET(options.layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" }, + { "batch_size", "batch size per request", OFFSET(batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS}, + { "input_resizable", "can input be resizable or not", OFFSET(input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS }, + { "layout", "input layout of model", OFFSET(layout), AV_OPT_TYPE_INT, { .i64 = DL_NONE}, DL_NONE, DL_NHWC, FLAGS, .unit = "layout" }, { "none", "none", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NONE }, 0, 0, FLAGS, .unit = "layout"}, { "nchw", "nchw", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NCHW }, 0, 0, FLAGS, .unit = "layout"}, { "nhwc", "nhwc", 0, AV_OPT_TYPE_CONST, { .i64 = DL_NHWC }, 0, 0, FLAGS, .unit = "layout"}, - { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(options.scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, - { "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(options.mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, + { "scale", "Add scale preprocess operation. Divide each element of input by specified value.", OFFSET(scale), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, + { "mean", "Add mean preprocess operation. Subtract specified value from each element of input.", OFFSET(mean), AV_OPT_TYPE_FLOAT, { .dbl = 0 }, INT_MIN, INT_MAX, FLAGS}, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_openvino); - #if HAVE_OPENVINO2 static const struct { ov_status_e status; @@ -199,7 +179,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) DNNData input; LastLevelTaskItem *lltask; TaskItem *task; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; #if HAVE_OPENVINO2 int64_t* dims; ov_status_e status; @@ -292,7 +272,7 @@ static int fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) input.scale = 1; input.mean = 0; - for (int i = 0; i < ctx->options.batch_size; ++i) { + for (int i = 0; i < ctx->ov_option.batch_size; ++i) { lltask = ff_queue_pop_front(ov_model->lltask_queue); if (!lltask) { break; @@ -360,7 +340,7 @@ static void infer_completion_callback(void *args) OVModel *ov_model = task->model; SafeQueue *requestq = ov_model->request_queue; DNNData *outputs; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; #if HAVE_OPENVINO2 size_t* dims; ov_status_e status; @@ -410,9 +390,9 @@ static void infer_completion_callback(void *args) outputs[i].dims[2] = output_shape.rank > 1 ? dims[output_shape.rank - 2] : 1; outputs[i].dims[3] = output_shape.rank > 0 ? dims[output_shape.rank - 1] : 1; av_assert0(request->lltask_count <= dims[0]); - outputs[i].layout = ctx->options.layout; - outputs[i].scale = ctx->options.scale; - outputs[i].mean = ctx->options.mean; + outputs[i].layout = ctx->ov_option.layout; + outputs[i].scale = ctx->ov_option.scale; + outputs[i].mean = ctx->ov_option.mean; ov_shape_free(&output_shape); ov_tensor_free(output_tensor); output_tensor = NULL; @@ -452,9 +432,9 @@ static void infer_completion_callback(void *args) output.dims[i] = dims.dims[i]; av_assert0(request->lltask_count <= dims.dims[0]); output.dt = precision_to_datatype(precision); - output.layout = ctx->options.layout; - output.scale = ctx->options.scale; - output.mean = ctx->options.mean; + output.layout = ctx->ov_option.layout; + output.scale = ctx->ov_option.scale; + output.mean = ctx->ov_option.mean; outputs = &output; #endif @@ -590,7 +570,6 @@ static void dnn_free_model_ov(DNNModel **model) av_free(ov_model->all_output_names); av_free(ov_model->all_input_names); #endif - av_opt_free(&ov_model->ctx); av_freep(&ov_model); av_freep(model); } @@ -599,7 +578,7 @@ static void dnn_free_model_ov(DNNModel **model) static int init_model_ov(OVModel *ov_model, const char *input_name, const char **output_names, int nb_outputs) { int ret = 0; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; #if HAVE_OPENVINO2 ov_status_e status; ov_preprocess_input_tensor_info_t* input_tensor_info = NULL; @@ -610,7 +589,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ov_layout_t* NCHW_layout = NULL; const char* NHWC_desc = "NHWC"; const char* NCHW_desc = "NCHW"; - const char* device = ctx->options.device_type; + const char* device = ctx->device ? ctx->device : "CPU"; #else IEStatusCode status; ie_available_devices_t a_dev; @@ -618,17 +597,17 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * char *all_dev_names = NULL; #endif // We scale pixel by default when do frame processing. - if (fabsf(ctx->options.scale) < 1e-6f) - ctx->options.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1; + if (fabsf(ctx->ov_option.scale) < 1e-6f) + ctx->ov_option.scale = ov_model->model->func_type == DFT_PROCESS_FRAME ? 255 : 1; // batch size - if (ctx->options.batch_size <= 0) { - ctx->options.batch_size = 1; + if (ctx->ov_option.batch_size <= 0) { + ctx->ov_option.batch_size = 1; } #if HAVE_OPENVINO2 - if (ctx->options.batch_size > 1) { + if (ctx->ov_option.batch_size > 1) { avpriv_report_missing_feature(ctx, "Do not support batch_size > 1 for now," "change batch_size to 1.\n"); - ctx->options.batch_size = 1; + ctx->ov_option.batch_size = 1; } status = ov_preprocess_prepostprocessor_create(ov_model->ov_model, &ov_model->preprocess); @@ -677,9 +656,9 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ret = ov2_map_error(status, NULL); goto err; } - if (ctx->options.layout == DL_NCHW) + if (ctx->ov_option.layout == DL_NCHW) status = ov_preprocess_input_model_info_set_layout(input_model_info, NCHW_layout); - else if (ctx->options.layout == DL_NHWC) + else if (ctx->ov_option.layout == DL_NHWC) status = ov_preprocess_input_model_info_set_layout(input_model_info, NHWC_layout); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to get set input model layout\n"); @@ -725,7 +704,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } if (ov_model->model->func_type != DFT_PROCESS_FRAME) status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); - else if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) + else if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f) status |= ov_preprocess_output_set_element_type(output_tensor_info, F32); else status |= ov_preprocess_output_set_element_type(output_tensor_info, U8); @@ -740,7 +719,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ov_model->output_info = NULL; } // set preprocess steps. - if (fabsf(ctx->options.scale - 1) > 1e-6f || fabsf(ctx->options.mean) > 1e-6f) { + if (fabsf(ctx->ov_option.scale - 1) > 1e-6f || fabsf(ctx->ov_option.mean) > 1e-6f) { ov_preprocess_preprocess_steps_t* input_process_steps = NULL; status = ov_preprocess_input_info_get_preprocess_steps(ov_model->input_info, &input_process_steps); if (status != OK) { @@ -749,8 +728,8 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } status = ov_preprocess_preprocess_steps_convert_element_type(input_process_steps, F32); - status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->options.mean); - status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->options.scale); + status |= ov_preprocess_preprocess_steps_mean(input_process_steps, ctx->ov_option.mean); + status |= ov_preprocess_preprocess_steps_scale(input_process_steps, ctx->ov_option.scale); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to set preprocess steps\n"); ov_preprocess_preprocess_steps_free(input_process_steps); @@ -824,7 +803,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * ov_layout_free(NCHW_layout); ov_layout_free(NHWC_layout); #else - if (ctx->options.batch_size > 1) { + if (ctx->ov_option.batch_size > 1) { input_shapes_t input_shapes; status = ie_network_get_input_shapes(ov_model->network, &input_shapes); if (status != OK) { @@ -832,7 +811,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } for (int i = 0; i < input_shapes.shape_num; i++) - input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size; + input_shapes.shapes[i].shape.dims[0] = ctx->ov_option.batch_size; status = ie_network_reshape(ov_model->network, input_shapes); ie_network_input_shapes_free(&input_shapes); if (status != OK) { @@ -882,7 +861,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } } - status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); + status = ie_core_load_network(ov_model->core, ov_model->network, ctx->device, &config, &ov_model->exe_network); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); status = ie_core_get_available_devices(ov_model->core, &a_dev); @@ -895,15 +874,15 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * APPEND_STRING(all_dev_names, a_dev.devices[i]) } av_log(ctx, AV_LOG_ERROR,"device %s may not be supported, all available devices are: \"%s\"\n", - ctx->options.device_type, all_dev_names); + ctx->device, all_dev_names); ret = AVERROR(ENODEV); goto err; } #endif // create infer_requests for async execution - if (ctx->options.nireq <= 0) { + if (ctx->nireq <= 0) { // the default value is a rough estimation - ctx->options.nireq = av_cpu_count() / 2 + 1; + ctx->nireq = av_cpu_count() / 2 + 1; } ov_model->request_queue = ff_safe_queue_create(); @@ -912,7 +891,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * goto err; } - for (int i = 0; i < ctx->options.nireq; i++) { + for (int i = 0; i < ctx->nireq; i++) { OVRequestItem *item = av_mallocz(sizeof(*item)); if (!item) { ret = AVERROR(ENOMEM); @@ -945,7 +924,7 @@ static int init_model_ov(OVModel *ov_model, const char *input_name, const char * } #endif - item->lltasks = av_malloc_array(ctx->options.batch_size, sizeof(*item->lltasks)); + item->lltasks = av_malloc_array(ctx->ov_option.batch_size, sizeof(*item->lltasks)); if (!item->lltasks) { ret = AVERROR(ENOMEM); goto err; @@ -994,7 +973,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq) LastLevelTaskItem *lltask; int ret = 0; TaskItem *task; - OVContext *ctx; + DnnContext *ctx; OVModel *ov_model; if (ff_queue_size(inferenceq) == 0) { @@ -1010,7 +989,7 @@ static int execute_model_ov(OVRequestItem *request, Queue *inferenceq) lltask = ff_queue_peek_front(inferenceq); task = lltask->task; ov_model = task->model; - ctx = &ov_model->ctx; + ctx = ov_model->ctx; ret = fill_model_input_ov(ov_model, request); if (ret != 0) { @@ -1084,8 +1063,8 @@ err: static int get_input_ov(void *model, DNNData *input, const char *input_name) { OVModel *ov_model = model; - OVContext *ctx = &ov_model->ctx; - int input_resizable = ctx->options.input_resizable; + DnnContext *ctx = ov_model->ctx; + int input_resizable = ctx->ov_option.input_resizable; #if HAVE_OPENVINO2 ov_shape_t input_shape = {0}; @@ -1291,7 +1270,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i #endif int ret; OVModel *ov_model = model; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; TaskItem task; OVRequestItem *request; DNNExecBaseParams exec_params = { @@ -1308,7 +1287,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i } #if HAVE_OPENVINO2 - if (ctx->options.input_resizable) { + if (ctx->ov_option.input_resizable) { status = ov_partial_shape_create(4, dims, &partial_shape); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to create partial shape.\n"); @@ -1339,7 +1318,7 @@ static int get_output_ov(void *model, const char *input_name, int input_width, i if (!ov_model->compiled_model) { #else - if (ctx->options.input_resizable) { + if (ctx->ov_option.input_resizable) { status = ie_network_get_input_shapes(ov_model->network, &input_shapes); input_shapes.shapes->shape.dims[2] = input_height; input_shapes.shapes->shape.dims[3] = input_width; @@ -1386,11 +1365,10 @@ err: return ret; } -static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx) +static DNNModel *dnn_load_model_ov(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNModel *model = NULL; OVModel *ov_model = NULL; - OVContext *ctx = NULL; #if HAVE_OPENVINO2 ov_core_t* core = NULL; ov_model_t* ovmodel = NULL; @@ -1411,17 +1389,9 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f av_freep(&model); return NULL; } + ov_model->ctx = ctx; model->model = ov_model; ov_model->model = model; - ov_model->ctx.class = &dnn_openvino_class; - ctx = &ov_model->ctx; - - //parse options - av_opt_set_defaults(ctx); - if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { - av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); - goto err; - } #if HAVE_OPENVINO2 status = ov_core_create(&core); @@ -1430,13 +1400,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f } ov_model->core = core; - status = ov_core_read_model(core, model_filename, NULL, &ovmodel); + status = ov_core_read_model(core, ctx->model_filename, NULL, &ovmodel); if (status != OK) { ov_version_t ver; status = ov_get_openvino_version(&ver); av_log(NULL, AV_LOG_ERROR, "Failed to read the network from model file %s,\n" "Please check if the model version matches the runtime OpenVINO Version:\n", - model_filename); + ctx->model_filename); if (status == OK) { av_log(NULL, AV_LOG_ERROR, "BuildNumber: %s\n", ver.buildNumber); } @@ -1452,13 +1422,13 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f if (status != OK) goto err; - status = ie_core_read_network(ov_model->core, model_filename, NULL, &ov_model->network); + status = ie_core_read_network(ov_model->core, ctx->model_filename, NULL, &ov_model->network); if (status != OK) { ie_version_t ver; ver = ie_c_api_version(); av_log(ctx, AV_LOG_ERROR, "Failed to read the network from model file %s,\n" "Please check if the model version matches the runtime OpenVINO %s\n", - model_filename, ver.api_version); + ctx->model_filename, ver.api_version); ie_version_free(&ver); goto err; } @@ -1496,7 +1466,6 @@ static DNNModel *dnn_load_model_ov(const char *model_filename, DNNFunctionType f model->get_input = &get_input_ov; model->get_output = &get_output_ov; - model->options = options; model->filter_ctx = filter_ctx; model->func_type = func_type; @@ -1510,7 +1479,7 @@ err: static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_params) { OVModel *ov_model = model->model; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; OVRequestItem *request; TaskItem *task; int ret; @@ -1539,7 +1508,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p return AVERROR(ENOMEM); } - ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->options.async, 1); + ret = ff_dnn_fill_task(task, exec_params, ov_model, ctx->async, 1); if (ret != 0) { av_freep(&task); return ret; @@ -1557,8 +1526,8 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p return ret; } - if (ctx->options.async) { - while (ff_queue_size(ov_model->lltask_queue) >= ctx->options.batch_size) { + if (ctx->async) { + while (ff_queue_size(ov_model->lltask_queue) >= ctx->ov_option.batch_size) { request = ff_safe_queue_pop_front(ov_model->request_queue); if (!request) { av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n"); @@ -1581,7 +1550,7 @@ static int dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams *exec_p return AVERROR(ENOSYS); } - if (ctx->options.batch_size > 1) { + if (ctx->ov_option.batch_size > 1) { avpriv_report_missing_feature(ctx, "batch mode for sync execution"); return AVERROR(ENOSYS); } @@ -1604,7 +1573,7 @@ static DNNAsyncStatusType dnn_get_result_ov(const DNNModel *model, AVFrame **in, static int dnn_flush_ov(const DNNModel *model) { OVModel *ov_model = model->model; - OVContext *ctx = &ov_model->ctx; + DnnContext *ctx = ov_model->ctx; OVRequestItem *request; #if HAVE_OPENVINO2 ov_status_e status; @@ -1652,6 +1621,7 @@ static int dnn_flush_ov(const DNNModel *model) } const DNNModule ff_dnn_backend_openvino = { + .clazz = DNN_DEFINE_CLASS(dnn_openvino), .load_model = dnn_load_model_ov, .execute_model = dnn_execute_model_ov, .get_result = dnn_get_result_ov, diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 2ed17c3c87..d24591b90b 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -36,19 +36,8 @@ #include "safe_queue.h" #include <tensorflow/c/c_api.h> -typedef struct TFOptions{ - char *sess_config; - uint8_t async; - uint32_t nireq; -} TFOptions; - -typedef struct TFContext { - const AVClass *class; - TFOptions options; -} TFContext; - -typedef struct TFModel{ - TFContext ctx; +typedef struct TFModel { + DnnContext *ctx; DNNModel *model; TF_Graph *graph; TF_Session *session; @@ -76,15 +65,13 @@ typedef struct TFRequestItem { DNNAsyncExecModule exec_module; } TFRequestItem; -#define OFFSET(x) offsetof(TFContext, x) +#define OFFSET(x) offsetof(TFOptions, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM static const AVOption dnn_tensorflow_options[] = { - { "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, - DNN_BACKEND_COMMON_OPTIONS + { "sess_config", "config for SessionOptions", OFFSET(sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_tensorflow); static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue); static void infer_completion_callback(void *args); @@ -160,7 +147,7 @@ static int tf_start_inference(void *args) TFModel *tf_model = task->model; if (!request) { - av_log(&tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n"); + av_log(tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n"); return AVERROR(EINVAL); } @@ -170,7 +157,7 @@ static int tf_start_inference(void *args) task->nb_output, NULL, 0, NULL, request->status); if (TF_GetCode(request->status) != TF_OK) { - av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status)); + av_log(tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status)); return DNN_GENERIC_ERROR; } return 0; @@ -198,7 +185,7 @@ static inline void destroy_request_item(TFRequestItem **arg) { static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue) { TFModel *tf_model = task->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask)); if (!lltask) { av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n"); @@ -278,7 +265,7 @@ static TF_Tensor *allocate_input_tensor(const DNNData *input) static int get_input_tf(void *model, DNNData *input, const char *input_name) { TFModel *tf_model = model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TF_Status *status; TF_DataType dt; int64_t dims[4]; @@ -328,7 +315,7 @@ static int get_output_tf(void *model, const char *input_name, int input_width, i { int ret; TFModel *tf_model = model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TaskItem task; TFRequestItem *request; DNNExecBaseParams exec_params = { @@ -399,7 +386,7 @@ static int hex_to_data(uint8_t *data, const char *p) static int load_tf_model(TFModel *tf_model, const char *model_filename) { - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TF_Buffer *graph_def; TF_ImportGraphDefOptions *graph_opts; TF_SessionOptions *sess_opts; @@ -408,7 +395,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename) int sess_config_length = 0; // prepare the sess config data - if (tf_model->ctx.options.sess_config != NULL) { + if (ctx->tf_option.sess_config != NULL) { const char *config; /* tf_model->ctx.options.sess_config is hex to present the serialized proto @@ -416,11 +403,11 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename) proto in a python script, tools/python/tf_sess_config.py is a script example to generate the configs of sess_config. */ - if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) { + if (strncmp(ctx->tf_option.sess_config, "0x", 2) != 0) { av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n"); return AVERROR(EINVAL); } - config = tf_model->ctx.options.sess_config + 2; + config = ctx->tf_option.sess_config + 2; sess_config_length = hex_to_data(NULL, config); sess_config = av_mallocz(sess_config_length + AV_INPUT_BUFFER_PADDING_SIZE); @@ -461,7 +448,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename) if (TF_GetCode(tf_model->status) != TF_OK) { TF_DeleteSessionOptions(sess_opts); av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n", - tf_model->ctx.options.sess_config); + ctx->tf_option.sess_config); return DNN_GENERIC_ERROR; } } @@ -529,15 +516,14 @@ static void dnn_free_model_tf(DNNModel **model) TF_DeleteStatus(tf_model->status); } av_freep(&tf_model); - av_freep(model); + av_freep(&model); } } -static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx) +static DNNModel *dnn_load_model_tf(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNModel *model = NULL; TFModel *tf_model = NULL; - TFContext *ctx = NULL; model = av_mallocz(sizeof(DNNModel)); if (!model){ @@ -551,23 +537,15 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f } model->model = tf_model; tf_model->model = model; - ctx = &tf_model->ctx; - ctx->class = &dnn_tensorflow_class; - - //parse options - av_opt_set_defaults(ctx); - if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { - av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); - goto err; - } + tf_model->ctx = ctx; - if (load_tf_model(tf_model, model_filename) != 0){ - av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", model_filename); + if (load_tf_model(tf_model, ctx->model_filename) != 0){ + av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", ctx->model_filename); goto err; } - if (ctx->options.nireq <= 0) { - ctx->options.nireq = av_cpu_count() / 2 + 1; + if (ctx->nireq <= 0) { + ctx->nireq = av_cpu_count() / 2 + 1; } #if !HAVE_PTHREAD_CANCEL @@ -582,7 +560,7 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f goto err; } - for (int i = 0; i < ctx->options.nireq; i++) { + for (int i = 0; i < ctx->nireq; i++) { TFRequestItem *item = av_mallocz(sizeof(*item)); if (!item) { goto err; @@ -617,7 +595,6 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f model->get_input = &get_input_tf; model->get_output = &get_output_tf; - model->options = options; model->filter_ctx = filter_ctx; model->func_type = func_type; @@ -632,7 +609,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) { LastLevelTaskItem *lltask; TaskItem *task; TFInferRequest *infer_request = NULL; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; int ret = 0; lltask = ff_queue_pop_front(tf_model->lltask_queue); @@ -728,7 +705,7 @@ static void infer_completion_callback(void *args) { DNNData *outputs; TFInferRequest *infer_request = request->infer_request; TFModel *tf_model = task->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; outputs = av_calloc(task->nb_output, sizeof(*outputs)); if (!outputs) { @@ -787,7 +764,7 @@ err: static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue) { TFModel *tf_model; - TFContext *ctx; + DnnContext *ctx; LastLevelTaskItem *lltask; TaskItem *task; int ret = 0; @@ -800,7 +777,7 @@ static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue) lltask = ff_queue_peek_front(lltask_queue); task = lltask->task; tf_model = task->model; - ctx = &tf_model->ctx; + ctx = tf_model->ctx; ret = fill_model_input_tf(tf_model, request); if (ret != 0) { @@ -833,7 +810,7 @@ err: static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params) { TFModel *tf_model = model->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TaskItem *task; TFRequestItem *request; int ret = 0; @@ -849,7 +826,7 @@ static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_p return AVERROR(ENOMEM); } - ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->options.async, 1); + ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->async, 1); if (ret != 0) { av_log(ctx, AV_LOG_ERROR, "Fill task with invalid parameter(s).\n"); av_freep(&task); @@ -887,7 +864,7 @@ static DNNAsyncStatusType dnn_get_result_tf(const DNNModel *model, AVFrame **in, static int dnn_flush_tf(const DNNModel *model) { TFModel *tf_model = model->model; - TFContext *ctx = &tf_model->ctx; + DnnContext *ctx = tf_model->ctx; TFRequestItem *request; int ret; @@ -915,6 +892,7 @@ static int dnn_flush_tf(const DNNModel *model) } const DNNModule ff_dnn_backend_tf = { + .clazz = DNN_DEFINE_CLASS(dnn_tensorflow), .load_model = dnn_load_model_tf, .execute_model = dnn_execute_model_tf, .get_result = dnn_get_result_tf, diff --git a/libavfilter/dnn/dnn_backend_torch.cpp b/libavfilter/dnn/dnn_backend_torch.cpp index ae55893a50..abdef1f178 100644 --- a/libavfilter/dnn/dnn_backend_torch.cpp +++ b/libavfilter/dnn/dnn_backend_torch.cpp @@ -36,18 +36,8 @@ extern "C" { #include "safe_queue.h" } -typedef struct THOptions{ - char *device_name; - int optimize; -} THOptions; - -typedef struct THContext { - const AVClass *c_class; - THOptions options; -} THContext; - typedef struct THModel { - THContext ctx; + DnnContext *ctx; DNNModel *model; torch::jit::Module *jit_model; SafeQueue *request_queue; @@ -67,20 +57,17 @@ typedef struct THRequestItem { } THRequestItem; -#define OFFSET(x) offsetof(THContext, x) +#define OFFSET(x) offsetof(THOptions, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM static const AVOption dnn_th_options[] = { - { "device", "device to run model", OFFSET(options.device_name), AV_OPT_TYPE_STRING, { .str = "cpu" }, 0, 0, FLAGS }, - { "optimize", "turn on graph executor optimization", OFFSET(options.optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS}, + { "optimize", "turn on graph executor optimization", OFFSET(optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS}, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_th); - static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue) { THModel *th_model = (THModel *)task->model; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; LastLevelTaskItem *lltask = (LastLevelTaskItem *)av_malloc(sizeof(*lltask)); if (!lltask) { av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for LastLevelTaskItem\n"); @@ -153,7 +140,6 @@ static void dnn_free_model_th(DNNModel **model) } ff_queue_destroy(th_model->task_queue); delete th_model->jit_model; - av_opt_free(&th_model->ctx); av_freep(&th_model); av_freep(model); } @@ -181,7 +167,7 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request) TaskItem *task = NULL; THInferRequest *infer_request = NULL; DNNData input = { 0 }; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; int ret, width_idx, height_idx, channel_idx; lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue); @@ -241,7 +227,7 @@ static int th_start_inference(void *args) LastLevelTaskItem *lltask = NULL; TaskItem *task = NULL; THModel *th_model = NULL; - THContext *ctx = NULL; + DnnContext *ctx = NULL; std::vector<torch::jit::IValue> inputs; torch::NoGradGuard no_grad; @@ -253,9 +239,9 @@ static int th_start_inference(void *args) lltask = request->lltask; task = lltask->task; th_model = (THModel *)task->model; - ctx = &th_model->ctx; + ctx = th_model->ctx; - if (ctx->options.optimize) + if (ctx->torch_option.optimize) torch::jit::setGraphExecutorOptimize(true); else torch::jit::setGraphExecutorOptimize(false); @@ -292,7 +278,7 @@ static void infer_completion_callback(void *args) { outputs.dims[2] = sizes.at(2); // H outputs.dims[3] = sizes.at(3); // W } else { - avpriv_report_missing_feature(&th_model->ctx, "Support of this kind of model"); + avpriv_report_missing_feature(th_model->ctx, "Support of this kind of model"); goto err; } @@ -304,7 +290,7 @@ static void infer_completion_callback(void *args) { if (th_model->model->frame_post_proc != NULL) { th_model->model->frame_post_proc(task->out_frame, &outputs, th_model->model->filter_ctx); } else { - ff_proc_from_dnn_to_frame(task->out_frame, &outputs, &th_model->ctx); + ff_proc_from_dnn_to_frame(task->out_frame, &outputs, th_model->ctx); } } else { task->out_frame->width = outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)]; @@ -312,7 +298,7 @@ static void infer_completion_callback(void *args) { } break; default: - avpriv_report_missing_feature(&th_model->ctx, "model function type %d", th_model->model->func_type); + avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model->func_type); goto err; } task->inference_done++; @@ -322,7 +308,7 @@ err: if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) { destroy_request_item(&request); - av_log(&th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n"); + av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n"); } } @@ -352,7 +338,7 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue) goto err; } if (task->async) { - avpriv_report_missing_feature(&th_model->ctx, "LibTorch async"); + avpriv_report_missing_feature(th_model->ctx, "LibTorch async"); } else { ret = th_start_inference((void *)(request)); if (ret != 0) { @@ -375,7 +361,7 @@ static int get_output_th(void *model, const char *input_name, int input_width, i { int ret = 0; THModel *th_model = (THModel*) model; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; TaskItem task = { 0 }; THRequestItem *request = NULL; DNNExecBaseParams exec_params = { @@ -424,12 +410,12 @@ static THInferRequest *th_create_inference_request(void) return request; } -static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx) +static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNModel *model = NULL; THModel *th_model = NULL; THRequestItem *item = NULL; - THContext *ctx; + const char *device_name = ctx->device ? ctx->device : "cpu"; model = (DNNModel *)av_mallocz(sizeof(DNNModel)); if (!model) { @@ -443,24 +429,17 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f } th_model->model = model; model->model = th_model; - th_model->ctx.c_class = &dnn_th_class; - ctx = &th_model->ctx; - //parse options - av_opt_set_defaults(ctx); - if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) { - av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options); - return NULL; - } + th_model->ctx = ctx; - c10::Device device = c10::Device(ctx->options.device_name); + c10::Device device = c10::Device(device_name); if (!device.is_cpu()) { - av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", ctx->options.device_name); + av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", device_name); goto fail; } try { th_model->jit_model = new torch::jit::Module; - (*th_model->jit_model) = torch::jit::load(model_filename); + (*th_model->jit_model) = torch::jit::load(ctx->model_filename); } catch (const c10::Error& e) { av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n"); goto fail; @@ -502,7 +481,6 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f model->get_input = &get_input_th; model->get_output = &get_output_th; - model->options = NULL; model->filter_ctx = filter_ctx; model->func_type = func_type; return model; @@ -519,7 +497,7 @@ fail: static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams *exec_params) { THModel *th_model = (THModel *)model->model; - THContext *ctx = &th_model->ctx; + DnnContext *ctx = th_model->ctx; TaskItem *task; THRequestItem *request; int ret = 0; @@ -582,7 +560,7 @@ static int dnn_flush_th(const DNNModel *model) request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue); if (!request) { - av_log(&th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n"); + av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n"); return AVERROR(EINVAL); } @@ -590,6 +568,7 @@ static int dnn_flush_th(const DNNModel *model) } extern const DNNModule ff_dnn_backend_torch = { + .clazz = DNN_DEFINE_CLASS(dnn_th), .load_model = dnn_load_model_th, .execute_model = dnn_execute_model_th, .get_result = dnn_get_result_th, diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index b9f71aea53..e7453f1bb1 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -24,12 +24,61 @@ */ #include "../dnn_interface.h" +#include "libavutil/avassert.h" #include "libavutil/mem.h" +#include "libavutil/opt.h" +#include "libavfilter/internal.h" extern const DNNModule ff_dnn_backend_openvino; extern const DNNModule ff_dnn_backend_tf; extern const DNNModule ff_dnn_backend_torch; +#define OFFSET(x) offsetof(DnnContext, x) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM +static const AVOption dnn_base_options[] = { + {"model", "path to model file", + OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {"input", "input name of the model", + OFFSET(model_inputname), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {"output", "output name of the model", + OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {"backend_configs", "backend configs (deprecated)", + OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED}, + {"options", "backend configs (deprecated)", + OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED}, + {"nireq", "number of request", + OFFSET(nireq), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS}, + {"async", "use DNN async inference", + OFFSET(async), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS}, + {"device", "device to run model", + OFFSET(device), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS}, + {NULL} +}; + +AVFILTER_DEFINE_CLASS(dnn_base); + +typedef struct DnnBackendInfo { + const size_t offset; + union { + const AVClass *class; + const DNNModule *module; + }; +} DnnBackendInfo; + +static const DnnBackendInfo dnn_backend_info_list[] = { + {0, .class = &dnn_base_class}, + // Must keep the same order as in DNNOptions, so offset value in incremental order +#if CONFIG_LIBTENSORFLOW + {offsetof(DnnContext, tf_option), .module = &ff_dnn_backend_tf}, +#endif +#if CONFIG_LIBOPENVINO + {offsetof(DnnContext, ov_option), .module = &ff_dnn_backend_openvino}, +#endif +#if CONFIG_LIBTORCH + {offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch}, +#endif +}; + const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx) { switch(backend_type){ @@ -52,3 +101,44 @@ const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx) return NULL; } } + +void ff_dnn_init_child_class(DnnContext *ctx) +{ + for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list); i++) { + const AVClass **ptr = (const AVClass **) ((char *) ctx + dnn_backend_info_list[i].offset); + *ptr = dnn_backend_info_list[i].class; + } +} + +void *ff_dnn_child_next(DnnContext *obj, void *prev) { + size_t pre_offset; + + if (!prev) { + av_assert0(obj->clazz); + return obj; + } + + pre_offset = (char *)prev - (char *)obj; + for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list) - 1; i++) { + if (dnn_backend_info_list[i].offset == pre_offset) { + const AVClass **ptr = (const AVClass **) ((char *) obj + dnn_backend_info_list[i + 1].offset); + av_assert0(*ptr); + return ptr; + } + } + + return NULL; +} + +const AVClass *ff_dnn_child_class_iterate(void **iter) +{ + uintptr_t i = (uintptr_t) *iter; + + if (i < FF_ARRAY_ELEMS(dnn_backend_info_list)) { + *iter = (void *)(i + 1); + return dnn_backend_info_list[i].class; + } + + return NULL; +} + diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c index 5e76b9ba45..860ca7591f 100644 --- a/libavfilter/dnn_filter_common.c +++ b/libavfilter/dnn_filter_common.c @@ -19,6 +19,7 @@ #include "dnn_filter_common.h" #include "libavutil/avstring.h" #include "libavutil/mem.h" +#include "libavutil/opt.h" #define MAX_SUPPORTED_OUTPUTS_NB 4 @@ -52,6 +53,23 @@ static char **separate_output_names(const char *expr, const char *val_sep, int * return parsed_vals; } +typedef struct DnnFilterBase { + const AVClass *class; + DnnContext dnnctx; +} DnnFilterBase; + +int ff_dnn_filter_init_child_class(AVFilterContext *filter) { + DnnFilterBase *base = filter->priv; + ff_dnn_init_child_class(&base->dnnctx); + return 0; +} + +void *ff_dnn_filter_child_next(void *obj, void *prev) +{ + DnnFilterBase *base = obj; + return ff_dnn_child_next(&base->dnnctx, prev); +} + int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx) { DNNBackendType backend = ctx->backend_type; @@ -91,7 +109,25 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *fil return AVERROR(EINVAL); } - ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, func_type, ctx->backend_options, filter_ctx); + if (ctx->backend_options) { + void *child = NULL; + + av_log(filter_ctx, AV_LOG_WARNING, + "backend_configs is deprecated, please set backend options directly\n"); + while (child = ff_dnn_child_next(ctx, child)) { + if (*(const AVClass **)child == &ctx->dnn_module->clazz) { + int ret = av_opt_set_from_string(child, ctx->backend_options, + NULL, "=", "&"); + if (ret < 0) { + av_log(filter_ctx, AV_LOG_ERROR, "failed to parse options \"%s\"\n", + ctx->backend_options); + return ret; + } + } + } + } + + ctx->model = (ctx->dnn_module->load_model)(ctx, func_type, filter_ctx); if (!ctx->model) { av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n"); return AVERROR(EINVAL); diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h index 30871ee381..b52b55a90d 100644 --- a/libavfilter/dnn_filter_common.h +++ b/libavfilter/dnn_filter_common.h @@ -26,28 +26,23 @@ #include "dnn_interface.h" -typedef struct DnnContext { - char *model_filename; - DNNBackendType backend_type; - char *model_inputname; - char *model_outputnames_string; - char *backend_options; - int async; - - char **model_outputnames; - uint32_t nb_outputs; - const DNNModule *dnn_module; - DNNModel *model; -} DnnContext; - -#define DNN_COMMON_OPTIONS \ - { "model", "path to model file", OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "input", "input name of the model", OFFSET(model_inputname), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "output", "output name of the model", OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "backend_configs", "backend configs", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\ - { "options", "backend configs (deprecated, use backend_configs)", OFFSET(backend_options), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},\ - { "async", "use DNN async inference (ignored, use backend_configs='async=1')", OFFSET(async), AV_OPT_TYPE_BOOL, { .i64 = 1}, 0, 1, FLAGS}, - +#define AVFILTER_DNN_DEFINE_CLASS_EXT(name, desc, options) \ + static const AVClass name##_class = { \ + .class_name = desc, \ + .item_name = av_default_item_name, \ + .option = options, \ + .version = LIBAVUTIL_VERSION_INT, \ + .category = AV_CLASS_CATEGORY_FILTER, \ + .child_next = ff_dnn_filter_child_next, \ + .child_class_iterate = ff_dnn_child_class_iterate, \ + } + +#define AVFILTER_DNN_DEFINE_CLASS(fname) \ + AVFILTER_DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options) + +void *ff_dnn_filter_child_next(void *obj, void *prev); + +int ff_dnn_filter_init_child_class(AVFilterContext *filter); int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx); int ff_dnn_set_frame_proc(DnnContext *ctx, FramePrePostProc pre_proc, FramePrePostProc post_proc); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 63f492e690..4e544486cc 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -93,8 +93,6 @@ typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_i typedef struct DNNModel{ // Stores model that can be different for different backends. void *model; - // Stores options when the model is executed by the backend - const char *options; // Stores FilterContext used for the interaction between AVFrame and DNNData AVFilterContext *filter_ctx; // Stores function type of the model @@ -117,10 +115,65 @@ typedef struct DNNModel{ ClassifyPostProc classify_post_proc; } DNNModel; +typedef struct TFOptions{ + const AVClass *clazz; + + char *sess_config; +} TFOptions; + +typedef struct OVOptions { + const AVClass *clazz; + + int batch_size; + int input_resizable; + DNNLayout layout; + float scale; + float mean; +} OVOptions; + +typedef struct THOptions { + const AVClass *clazz; + int optimize; +} THOptions; + +typedef struct DNNModule DNNModule; + +typedef struct DnnContext { + const AVClass *clazz; + + DNNModel *model; + + char *model_filename; + DNNBackendType backend_type; + char *model_inputname; + char *model_outputnames_string; + char *backend_options; + int async; + + char **model_outputnames; + uint32_t nb_outputs; + const DNNModule *dnn_module; + + int nireq; + char *device; + +#if CONFIG_LIBTENSORFLOW + TFOptions tf_option; +#endif + +#if CONFIG_LIBOPENVINO + OVOptions ov_option; +#endif +#if CONFIG_LIBTORCH + THOptions torch_option; +#endif +} DnnContext; + // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. -typedef struct DNNModule{ +struct DNNModule { + const AVClass clazz; // Loads model and parameters from given file. Returns NULL if it is not possible. - DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx); + DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx); // Executes model with specified input and output. Returns the error code otherwise. int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params); // Retrieve inference result. @@ -129,11 +182,15 @@ typedef struct DNNModule{ int (*flush)(const DNNModel *model); // Frees memory allocated for model. void (*free_model)(DNNModel **model); -} DNNModule; +}; // Initializes DNNModule depending on chosen backend. const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx); +void ff_dnn_init_child_class(DnnContext *ctx); +void *ff_dnn_child_next(DnnContext *obj, void *prev); +const AVClass *ff_dnn_child_class_iterate(void **iter); + static inline int dnn_get_width_idx_by_layout(DNNLayout layout) { return layout == DL_NHWC ? 2 : 3; diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c index c8848dd7ba..7f665b73ab 100644 --- a/libavfilter/vf_derain.c +++ b/libavfilter/vf_derain.c @@ -46,13 +46,10 @@ static const AVOption derain_options[] = { #if (CONFIG_LIBTENSORFLOW == 1) { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" }, #endif - { "model", "path to model file", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, - { "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS }, - { "output", "output name of the model", OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(derain); +AVFILTER_DNN_DEFINE_CLASS(derain); static int filter_frame(AVFilterLink *inlink, AVFrame *in) { @@ -113,6 +110,7 @@ const AVFilter ff_vf_derain = { .name = "derain", .description = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."), .priv_size = sizeof(DRContext), + .preinit = ff_dnn_filter_init_child_class, .init = init, .uninit = uninit, FILTER_INPUTS(derain_inputs), diff --git a/libavfilter/vf_dnn_classify.c b/libavfilter/vf_dnn_classify.c index 1f8f227e3a..965779a8ab 100644 --- a/libavfilter/vf_dnn_classify.c +++ b/libavfilter/vf_dnn_classify.c @@ -50,14 +50,13 @@ static const AVOption dnn_classify_options[] = { #if (CONFIG_LIBOPENVINO == 1) { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" }, #endif - DNN_COMMON_OPTIONS { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS}, { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { "target", "which one to be classified", OFFSET2(target), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_classify); +AVFILTER_DNN_DEFINE_CLASS(dnn_classify); static int dnn_classify_post_proc(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx) { @@ -299,6 +298,7 @@ const AVFilter ff_vf_dnn_classify = { .name = "dnn_classify", .description = NULL_IF_CONFIG_SMALL("Apply DNN classify filter to the input."), .priv_size = sizeof(DnnClassifyContext), + .preinit = ff_dnn_filter_init_child_class, .init = dnn_classify_init, .uninit = dnn_classify_uninit, FILTER_INPUTS(ff_video_default_filterpad), diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c index bacea3ef29..926966368a 100644 --- a/libavfilter/vf_dnn_detect.c +++ b/libavfilter/vf_dnn_detect.c @@ -70,7 +70,6 @@ static const AVOption dnn_detect_options[] = { #if (CONFIG_LIBOPENVINO == 1) { "openvino", "openvino backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_OV }, 0, 0, FLAGS, .unit = "backend" }, #endif - DNN_COMMON_OPTIONS { "confidence", "threshold of confidence", OFFSET2(confidence), AV_OPT_TYPE_FLOAT, { .dbl = 0.5 }, 0, 1, FLAGS}, { "labels", "path to labels file", OFFSET2(labels_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" }, @@ -85,7 +84,7 @@ static const AVOption dnn_detect_options[] = { { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_detect); +AVFILTER_DNN_DEFINE_CLASS(dnn_detect); static inline float sigmoid(float x) { return 1.f / (1.f + exp(-x)); @@ -851,6 +850,7 @@ const AVFilter ff_vf_dnn_detect = { .name = "dnn_detect", .description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."), .priv_size = sizeof(DnnDetectContext), + .preinit = ff_dnn_filter_init_child_class, .init = dnn_detect_init, .uninit = dnn_detect_uninit, FILTER_INPUTS(dnn_detect_inputs), diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index fdac31665e..9a1dd2a356 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -54,11 +54,10 @@ static const AVOption dnn_processing_options[] = { #if (CONFIG_LIBTORCH == 1) { "torch", "torch backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = DNN_TH }, 0, 0, FLAGS, "backend" }, #endif - DNN_COMMON_OPTIONS { NULL } }; -AVFILTER_DEFINE_CLASS(dnn_processing); +AVFILTER_DNN_DEFINE_CLASS(dnn_processing); static av_cold int init(AVFilterContext *context) { @@ -373,6 +372,7 @@ const AVFilter ff_vf_dnn_processing = { .name = "dnn_processing", .description = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."), .priv_size = sizeof(DnnProcessingContext), + .preinit = ff_dnn_filter_init_child_class, .init = init, .uninit = uninit, FILTER_INPUTS(dnn_processing_inputs), diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 60683b5209..f14c0c0cd3 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -50,13 +50,10 @@ static const AVOption sr_options[] = { { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" }, #endif { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS }, - { "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, - { "input", "input name of the model", OFFSET(dnnctx.model_inputname), AV_OPT_TYPE_STRING, { .str = "x" }, 0, 0, FLAGS }, - { "output", "output name of the model", OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING, { .str = "y" }, 0, 0, FLAGS }, { NULL } }; -AVFILTER_DEFINE_CLASS(sr); +AVFILTER_DNN_DEFINE_CLASS(sr); static av_cold int init(AVFilterContext *context) { @@ -192,6 +189,7 @@ const AVFilter ff_vf_sr = { .name = "sr", .description = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."), .priv_size = sizeof(SRContext), + .preinit = ff_dnn_filter_init_child_class, .init = init, .uninit = uninit, FILTER_INPUTS(sr_inputs), |