aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Thompson <sw@jkqxz.net>2018-03-19 22:52:30 +0000
committerMark Thompson <sw@jkqxz.net>2018-03-22 23:12:47 +0000
commit2a1542d105dc3161516e34eef77bcd64aa72cab4 (patch)
tree2f32963bfb6d8a00b9136b8a4f169922840a905e
parentb78d55b2e63e410abe744932fda9358633743a9e (diff)
downloadffmpeg-2a1542d105dc3161516e34eef77bcd64aa72cab4.tar.gz
lavfi/opencl: Derive global work size from plane image sizes
Add a new function to find the global work size given the output image and the required block alignment, then use it in the overlay, program and unsharp filters. Fixes the overlay and unsharp filters applying the kernel to locations outside the frame when subsampled planes are present.
-rw-r--r--libavfilter/opencl.c64
-rw-r--r--libavfilter/opencl.h8
-rw-r--r--libavfilter/vf_overlay_opencl.c6
-rw-r--r--libavfilter/vf_program_opencl.c8
-rw-r--r--libavfilter/vf_unsharp_opencl.c16
5 files changed, 87 insertions, 15 deletions
diff --git a/libavfilter/opencl.c b/libavfilter/opencl.c
index 37afc41f8b..ae61667380 100644
--- a/libavfilter/opencl.c
+++ b/libavfilter/opencl.c
@@ -22,6 +22,7 @@
#include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_opencl.h"
#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "formats.h"
@@ -276,3 +277,66 @@ fail:
av_freep(&src);
return err;
}
+
+int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx,
+ size_t *work_size,
+ AVFrame *frame, int plane,
+ int block_alignment)
+{
+ cl_mem image;
+ cl_mem_object_type type;
+ size_t width, height;
+ cl_int cle;
+
+ if (frame->format != AV_PIX_FMT_OPENCL) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid frame format %s, "
+ "opencl required.\n", av_get_pix_fmt_name(frame->format));
+ return AVERROR(EINVAL);
+ }
+
+ image = (cl_mem)frame->data[plane];
+ if (!image) {
+ av_log(avctx, AV_LOG_ERROR, "Plane %d required but not set.\n",
+ plane);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clGetMemObjectInfo(image, CL_MEM_TYPE, sizeof(type),
+ &type, NULL);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to query object type of "
+ "plane %d: %d.\n", plane, cle);
+ return AVERROR_UNKNOWN;
+ }
+ if (type != CL_MEM_OBJECT_IMAGE2D) {
+ av_log(avctx, AV_LOG_ERROR, "Plane %d is not a 2D image.\n",
+ plane);
+ return AVERROR(EINVAL);
+ }
+
+ cle = clGetImageInfo(image, CL_IMAGE_WIDTH, sizeof(size_t),
+ &width, NULL);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n",
+ plane, cle);
+ return AVERROR_UNKNOWN;
+ }
+
+ cle = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t),
+ &height, NULL);
+ if (cle != CL_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n",
+ plane, cle);
+ return AVERROR_UNKNOWN;
+ }
+
+ if (block_alignment) {
+ width = FFALIGN(width, block_alignment);
+ height = FFALIGN(height, block_alignment);
+ }
+
+ work_size[0] = width;
+ work_size[1] = height;
+
+ return 0;
+}
diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index 4d740c18ab..45fe2a2e27 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -84,4 +84,12 @@ int ff_opencl_filter_load_program(AVFilterContext *avctx,
int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx,
const char *filename);
+/**
+ * Find the work size needed needed for a given plane of an image.
+ */
+int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx,
+ size_t *work_size,
+ AVFrame *frame, int plane,
+ int block_alignment);
+
#endif /* AVFILTER_OPENCL_H */
diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c
index ee8381dfee..16e10f4371 100644
--- a/libavfilter/vf_overlay_opencl.c
+++ b/libavfilter/vf_overlay_opencl.c
@@ -216,8 +216,10 @@ static int overlay_opencl_blend(FFFrameSync *fs)
goto fail_kernel_arg;
}
- global_work[0] = output->width;
- global_work[1] = output->height;
+ err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+ output, plane, 0);
+ if (err < 0)
+ goto fail;
cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
global_work, NULL, 0, NULL, NULL);
diff --git a/libavfilter/vf_program_opencl.c b/libavfilter/vf_program_opencl.c
index 4ee9668236..0bcf188ac7 100644
--- a/libavfilter/vf_program_opencl.c
+++ b/libavfilter/vf_program_opencl.c
@@ -142,10 +142,10 @@ static int program_opencl_run(AVFilterContext *avctx)
}
}
- cle = clGetImageInfo(dst, CL_IMAGE_WIDTH, sizeof(size_t),
- &global_work[0], NULL);
- cle = clGetImageInfo(dst, CL_IMAGE_HEIGHT, sizeof(size_t),
- &global_work[1], NULL);
+ err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+ output, plane, 0);
+ if (err < 0)
+ goto fail;
av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
"(%zux%zu).\n", plane, global_work[0], global_work[1]);
diff --git a/libavfilter/vf_unsharp_opencl.c b/libavfilter/vf_unsharp_opencl.c
index 6a453c014b..19c91857cb 100644
--- a/libavfilter/vf_unsharp_opencl.c
+++ b/libavfilter/vf_unsharp_opencl.c
@@ -320,15 +320,13 @@ static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
}
}
- if (ctx->global) {
- global_work[0] = output->width;
- global_work[1] = output->height;
- } else {
- global_work[0] = FFALIGN(output->width, 16);
- global_work[1] = FFALIGN(output->height, 16);
- local_work[0] = 16;
- local_work[1] = 16;
- }
+ err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p,
+ ctx->global ? 0 : 16);
+ if (err < 0)
+ goto fail;
+
+ local_work[0] = 16;
+ local_work[1] = 16;
av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
"(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",