aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDerek Buitenhuis <derek.buitenhuis@gmail.com>2016-02-24 15:23:16 +0000
committerDerek Buitenhuis <derek.buitenhuis@gmail.com>2016-02-24 15:23:16 +0000
commit6992276acaaee32b33bd5f6e2f0d89588c4ae59a (patch)
treecd5d035d3293ae6076de0b6cc0a41e132e64539d
parent63c3e35332838fca19b7ffdae13233b0f3759402 (diff)
parentad884d100259e55cb51a4239cd8a4fd5154c2073 (diff)
downloadffmpeg-6992276acaaee32b33bd5f6e2f0d89588c4ae59a.tar.gz
Merge commit 'ad884d100259e55cb51a4239cd8a4fd5154c2073'
* commit 'ad884d100259e55cb51a4239cd8a4fd5154c2073': hwcontext: add a CUDA implementation Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
-rw-r--r--doc/APIchanges2
-rw-r--r--libavutil/Makefile2
-rw-r--r--libavutil/hwcontext.c3
-rw-r--r--libavutil/hwcontext.h1
-rw-r--r--libavutil/hwcontext_cuda.c270
-rw-r--r--libavutil/hwcontext_cuda.h46
-rw-r--r--libavutil/hwcontext_internal.h1
7 files changed, 325 insertions, 0 deletions
diff --git a/doc/APIchanges b/doc/APIchanges
index f6061c2918..5af5ab10a1 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -25,6 +25,8 @@ API changes, most recent first:
xxxxxxx buffer.h - Add av_buffer_pool_init2().
xxxxxxx hwcontext.h - Add a new installed header hwcontext.h with a new API
for handling hwaccel frames.
+ xxxxxxx hwcontext_cuda.h - Add a new installed header hwcontext_cuda.h with
+ CUDA-specific hwcontext definitions.
xxxxxxx hwcontext_vdpau.h - Add a new installed header hwcontext_vdpau.h with
VDPAU-specific hwcontext definitions.
xxxxxxx pixfmt.h - Add AV_PIX_FMT_CUDA.
diff --git a/libavutil/Makefile b/libavutil/Makefile
index e6b503aa8c..542a80cd1e 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -32,6 +32,7 @@ HEADERS = adler32.h \
hash.h \
hmac.h \
hwcontext.h \
+ hwcontext_cuda.h \
hwcontext_vdpau.h \
imgutils.h \
intfloat.h \
@@ -151,6 +152,7 @@ OBJS-$(!HAVE_ATOMICS_NATIVE) += atomic.o \
OBJS-$(CONFIG_LZO) += lzo.o
OBJS-$(CONFIG_OPENCL) += opencl.o opencl_internal.o
+OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
OBJS += $(COMPAT_OBJS:%=../compat/%)
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 08a0859752..3eada29742 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -29,6 +29,9 @@
#include "pixfmt.h"
static const HWContextType *hw_table[] = {
+#if CONFIG_CUDA
+ &ff_hwcontext_type_cuda,
+#endif
#if CONFIG_VDPAU
&ff_hwcontext_type_vdpau,
#endif
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index 07123687d2..f46a39fd3d 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -26,6 +26,7 @@
enum AVHWDeviceType {
AV_HWDEVICE_TYPE_VDPAU,
+ AV_HWDEVICE_TYPE_CUDA,
};
typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
new file mode 100644
index 0000000000..2c5980d619
--- /dev/null
+++ b/libavutil/hwcontext_cuda.c
@@ -0,0 +1,270 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "buffer.h"
+#include "common.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_cuda.h"
+#include "mem.h"
+#include "pixdesc.h"
+#include "pixfmt.h"
+
+typedef struct CUDAFramesContext {
+ int shift_width, shift_height;
+} CUDAFramesContext;
+
+static const enum AVPixelFormat supported_formats[] = {
+ AV_PIX_FMT_NV12,
+ AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_YUV444P,
+};
+
+static void cuda_buffer_free(void *opaque, uint8_t *data)
+{
+ AVHWFramesContext *ctx = opaque;
+ AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+
+ CUcontext dummy;
+
+ cuCtxPushCurrent(hwctx->cuda_ctx);
+
+ cuMemFree((CUdeviceptr)data);
+
+ cuCtxPopCurrent(&dummy);
+}
+
+static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
+{
+ AVHWFramesContext *ctx = opaque;
+ AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
+
+ AVBufferRef *ret = NULL;
+ CUcontext dummy = NULL;
+ CUdeviceptr data;
+ CUresult err;
+
+ err = cuCtxPushCurrent(hwctx->cuda_ctx);
+ if (err != CUDA_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
+ return NULL;
+ }
+
+ err = cuMemAlloc(&data, size);
+ if (err != CUDA_SUCCESS)
+ goto fail;
+
+ ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
+ if (!ret) {
+ cuMemFree(data);
+ goto fail;
+ }
+
+fail:
+ cuCtxPopCurrent(&dummy);
+ return ret;
+}
+
+static int cuda_frames_init(AVHWFramesContext *ctx)
+{
+ CUDAFramesContext *priv = ctx->internal->priv;
+ int i;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
+ if (ctx->sw_format == supported_formats[i])
+ break;
+ }
+ if (i == FF_ARRAY_ELEMS(supported_formats)) {
+ av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
+ av_get_pix_fmt_name(ctx->sw_format));
+ return AVERROR(ENOSYS);
+ }
+
+ av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
+
+ if (!ctx->pool) {
+ int size;
+
+ switch (ctx->sw_format) {
+ case AV_PIX_FMT_NV12:
+ case AV_PIX_FMT_YUV420P:
+ size = ctx->width * ctx->height * 3 / 2;
+ break;
+ case AV_PIX_FMT_YUV444P:
+ size = ctx->width * ctx->height * 3;
+ break;
+ }
+
+ ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
+ if (!ctx->internal->pool_internal)
+ return AVERROR(ENOMEM);
+ }
+
+ return 0;
+}
+
+static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
+{
+ frame->buf[0] = av_buffer_pool_get(ctx->pool);
+ if (!frame->buf[0])
+ return AVERROR(ENOMEM);
+
+ switch (ctx->sw_format) {
+ case AV_PIX_FMT_NV12:
+ frame->data[0] = frame->buf[0]->data;
+ frame->data[1] = frame->data[0] + ctx->width * ctx->height;
+ frame->linesize[0] = ctx->width;
+ frame->linesize[1] = ctx->width;
+ break;
+ case AV_PIX_FMT_YUV420P:
+ frame->data[0] = frame->buf[0]->data;
+ frame->data[2] = frame->data[0] + ctx->width * ctx->height;
+ frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4;
+ frame->linesize[0] = ctx->width;
+ frame->linesize[1] = ctx->width / 2;
+ frame->linesize[2] = ctx->width / 2;
+ break;
+ case AV_PIX_FMT_YUV444P:
+ frame->data[0] = frame->buf[0]->data;
+ frame->data[1] = frame->data[0] + ctx->width * ctx->height;
+ frame->data[2] = frame->data[1] + ctx->width * ctx->height;
+ frame->linesize[0] = ctx->width;
+ frame->linesize[1] = ctx->width;
+ frame->linesize[2] = ctx->width;
+ break;
+ default:
+ av_frame_unref(frame);
+ return AVERROR_BUG;
+ }
+
+ frame->format = AV_PIX_FMT_CUDA;
+ frame->width = ctx->width;
+ frame->height = ctx->height;
+
+ return 0;
+}
+
+static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
+ enum AVHWFrameTransferDirection dir,
+ enum AVPixelFormat **formats)
+{
+ enum AVPixelFormat *fmts;
+
+ fmts = av_malloc_array(2, sizeof(*fmts));
+ if (!fmts)
+ return AVERROR(ENOMEM);
+
+ fmts[0] = ctx->sw_format;
+ fmts[1] = AV_PIX_FMT_NONE;
+
+ *formats = fmts;
+
+ return 0;
+}
+
+static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
+ const AVFrame *src)
+{
+ CUDAFramesContext *priv = ctx->internal->priv;
+ AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+ CUcontext dummy;
+ CUresult err;
+ int i;
+
+ err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+ if (err != CUDA_SUCCESS)
+ return AVERROR_UNKNOWN;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+ CUDA_MEMCPY2D cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+ .dstMemoryType = CU_MEMORYTYPE_HOST,
+ .srcDevice = (CUdeviceptr)src->data[i],
+ .dstHost = dst->data[i],
+ .srcPitch = src->linesize[i],
+ .dstPitch = dst->linesize[i],
+ .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
+ .Height = src->height >> (i ? priv->shift_height : 0),
+ };
+
+ err = cuMemcpy2D(&cpy);
+ if (err != CUDA_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+ return AVERROR_UNKNOWN;
+ }
+ }
+
+ cuCtxPopCurrent(&dummy);
+
+ return 0;
+}
+
+static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
+ const AVFrame *src)
+{
+ CUDAFramesContext *priv = ctx->internal->priv;
+ AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
+
+ CUcontext dummy;
+ CUresult err;
+ int i;
+
+ err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+ if (err != CUDA_SUCCESS)
+ return AVERROR_UNKNOWN;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
+ CUDA_MEMCPY2D cpy = {
+ .srcMemoryType = CU_MEMORYTYPE_HOST,
+ .dstMemoryType = CU_MEMORYTYPE_DEVICE,
+ .srcHost = src->data[i],
+ .dstDevice = (CUdeviceptr)dst->data[i],
+ .srcPitch = src->linesize[i],
+ .dstPitch = dst->linesize[i],
+ .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
+ .Height = src->height >> (i ? priv->shift_height : 0),
+ };
+
+ err = cuMemcpy2D(&cpy);
+ if (err != CUDA_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
+ return AVERROR_UNKNOWN;
+ }
+ }
+
+ cuCtxPopCurrent(&dummy);
+
+ return 0;
+}
+
+const HWContextType ff_hwcontext_type_cuda = {
+ .type = AV_HWDEVICE_TYPE_CUDA,
+ .name = "CUDA",
+
+ .device_hwctx_size = sizeof(AVCUDADeviceContext),
+ .frames_priv_size = sizeof(CUDAFramesContext),
+
+ .frames_init = cuda_frames_init,
+ .frames_get_buffer = cuda_get_buffer,
+ .transfer_get_formats = cuda_transfer_get_formats,
+ .transfer_data_to = cuda_transfer_data_to,
+ .transfer_data_from = cuda_transfer_data_from,
+
+ .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
+};
diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
new file mode 100644
index 0000000000..23a77cee73
--- /dev/null
+++ b/libavutil/hwcontext_cuda.h
@@ -0,0 +1,46 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVUTIL_HWCONTEXT_CUDA_H
+#define AVUTIL_HWCONTEXT_CUDA_H
+
+#include <cuda.h>
+
+#include "pixfmt.h"
+
+/**
+ * @file
+ * An API-specific header for AV_HWDEVICE_TYPE_CUDA.
+ *
+ * This API supports dynamic frame pools. AVHWFramesContext.pool must return
+ * AVBufferRefs whose data pointer is a CUdeviceptr.
+ */
+
+/**
+ * This struct is allocated as AVHWDeviceContext.hwctx
+ */
+typedef struct AVCUDADeviceContext {
+ CUcontext cuda_ctx;
+} AVCUDADeviceContext;
+
+/**
+ * AVHWFramesContext.hwctx is currently not used
+ */
+
+#endif /* AVUTIL_HWCONTEXT_CUDA_H */
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index b8076c5b28..cf24ce5eaf 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -86,6 +86,7 @@ struct AVHWFramesInternal {
AVBufferPool *pool_internal;
};
+extern const HWContextType ff_hwcontext_type_cuda;
extern const HWContextType ff_hwcontext_type_vdpau;
#endif /* AVUTIL_HWCONTEXT_INTERNAL_H */