aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDiego de Souza <ddesouza@nvidia.com>2025-01-08 10:30:41 +0100
committerTimo Rothenpieler <timo@rothenpieler.org>2025-02-02 20:01:56 +0100
commit30e6effff94c6f4310aa2db571917bb2952f4d9e (patch)
tree0aa508b71868f1a3dbc8b4bc65676d4d2de2bc2f
parent7454a07d583ad92dd2cafeff8afcb385df64c560 (diff)
downloadffmpeg-30e6effff94c6f4310aa2db571917bb2952f4d9e.tar.gz
avcodec/nvdec: add 4:2:2 decoding and 10-bit support
This commit adds support for 4:2:2 decoding for HEVC and H.264 on NVIDIA Blackwell GPUs. Additionally, it supports 10-bit decoding for H.264 on Blackwell GPUs. Signed-off-by: Diego de Souza <ddesouza@nvidia.com> Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
-rw-r--r--libavcodec/h264_slice.c3
-rw-r--r--libavcodec/hevc/hevcdec.c6
-rw-r--r--libavcodec/nvdec.c44
-rw-r--r--libavcodec/nvdec.h5
-rw-r--r--libavcodec/nvdec_h264.c2
5 files changed, 51 insertions, 9 deletions
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 5108fa0921..cb22b76730 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -808,6 +808,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
#if CONFIG_H264_VULKAN_HWACCEL
*fmt++ = AV_PIX_FMT_VULKAN;
#endif
+#if CONFIG_H264_NVDEC_HWACCEL
+ *fmt++ = AV_PIX_FMT_CUDA;
+#endif
if (CHROMA444(h)) {
if (h->avctx->colorspace == AVCOL_SPC_RGB) {
*fmt++ = AV_PIX_FMT_GBRP10;
diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index 7d3e844945..e9c045f7a1 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -627,6 +627,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#if CONFIG_HEVC_VULKAN_HWACCEL
*fmt++ = AV_PIX_FMT_VULKAN;
#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+ *fmt++ = AV_PIX_FMT_CUDA;
+#endif
break;
case AV_PIX_FMT_YUV444P10:
#if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
@@ -655,6 +658,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
#if CONFIG_HEVC_VULKAN_HWACCEL
*fmt++ = AV_PIX_FMT_VULKAN;
#endif
+#if CONFIG_HEVC_NVDEC_HWACCEL
+ *fmt++ = AV_PIX_FMT_CUDA;
+#endif
break;
}
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index db9d353c61..99351661ea 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -375,13 +375,27 @@ int ff_nvdec_decode_init(AVCodecContext *avctx)
switch (sw_desc->comp[0].depth) {
case 8:
- output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
- cudaVideoSurfaceFormat_NV12;
+ if (chroma_444) {
+ output_format = cudaVideoSurfaceFormat_YUV444;
+#ifdef NVDEC_HAVE_422_SUPPORT
+ } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+ output_format = cudaVideoSurfaceFormat_NV16;
+#endif
+ } else {
+ output_format = cudaVideoSurfaceFormat_NV12;
+ }
break;
case 10:
case 12:
- output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
- cudaVideoSurfaceFormat_P016;
+ if (chroma_444) {
+ output_format = cudaVideoSurfaceFormat_YUV444_16Bit;
+#ifdef NVDEC_HAVE_422_SUPPORT
+ } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+ output_format = cudaVideoSurfaceFormat_P216;
+#endif
+ } else {
+ output_format = cudaVideoSurfaceFormat_P016;
+ }
break;
default:
av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
@@ -729,13 +743,27 @@ int ff_nvdec_frame_params(AVCodecContext *avctx,
switch (sw_desc->comp[0].depth) {
case 8:
- frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
+ if (chroma_444) {
+ frames_ctx->sw_format = AV_PIX_FMT_YUV444P;
+#ifdef NVDEC_HAVE_422_SUPPORT
+ } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+ frames_ctx->sw_format = AV_PIX_FMT_NV16;
+#endif
+ } else {
+ frames_ctx->sw_format = AV_PIX_FMT_NV12;
+ }
break;
case 10:
- frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
- break;
case 12:
- frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
+ if (chroma_444) {
+ frames_ctx->sw_format = AV_PIX_FMT_YUV444P16;
+#ifdef NVDEC_HAVE_422_SUPPORT
+ } else if (cuvid_chroma_format == cudaVideoChromaFormat_422) {
+ frames_ctx->sw_format = AV_PIX_FMT_P216LE;
+#endif
+ } else {
+ frames_ctx->sw_format = AV_PIX_FMT_P016LE;
+ }
break;
default:
return AVERROR(EINVAL);
diff --git a/libavcodec/nvdec.h b/libavcodec/nvdec.h
index 5e22f672d1..2e80c0dc1e 100644
--- a/libavcodec/nvdec.h
+++ b/libavcodec/nvdec.h
@@ -41,6 +41,11 @@
((major) < 8 || ((major) == 8 && (minor) <= 0))
#endif
+// SDK 13.0 compile time feature checks
+#if NVDECAPI_CHECK_VERSION(13, 0)
+#define NVDEC_HAVE_422_SUPPORT
+#endif
+
typedef struct NVDECFrame {
unsigned int idx;
unsigned int ref_idx;
diff --git a/libavcodec/nvdec_h264.c b/libavcodec/nvdec_h264.c
index 9adbc521ec..1ae3dfd032 100644
--- a/libavcodec/nvdec_h264.c
+++ b/libavcodec/nvdec_h264.c
@@ -97,7 +97,7 @@ static int nvdec_h264_start_frame(AVCodecContext *avctx,
.num_ref_idx_l1_active_minus1 = pps->ref_count[1] - 1,
.weighted_pred_flag = pps->weighted_pred,
.weighted_bipred_idc = pps->weighted_bipred_idc,
- .pic_init_qp_minus26 = pps->init_qp - 26,
+ .pic_init_qp_minus26 = pps->init_qp - 26 - 6 * (sps->bit_depth_luma - 8),
.deblocking_filter_control_present_flag = pps->deblocking_filter_parameters_present,
.redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present,
.transform_8x8_mode_flag = pps->transform_8x8_mode,