diff options
author | Timo Rothenpieler <timo@rothenpieler.org> | 2016-10-10 12:47:15 +0200 |
---|---|---|
committer | Timo Rothenpieler <timo@rothenpieler.org> | 2016-11-22 10:34:27 +0100 |
commit | 5c02d2827bef81ba3f3582d67ec7693067985f9e (patch) | |
tree | 67b743809995fa8b724acf009a93cce4ef6ac79a | |
parent | d316b21dba227fa2a2bce0daf02a62f11821227f (diff) | |
download | ffmpeg-5c02d2827bef81ba3f3582d67ec7693067985f9e.tar.gz |
compat/cuda: add dynamic loader
-rw-r--r-- | compat/cuda/dynlink_cuda.h | 97 | ||||
-rw-r--r-- | compat/cuda/dynlink_cuviddec.h | 808 | ||||
-rw-r--r-- | compat/cuda/dynlink_loader.h | 254 | ||||
-rw-r--r-- | compat/cuda/dynlink_nvcuvid.h | 316 |
4 files changed, 1475 insertions, 0 deletions
diff --git a/compat/cuda/dynlink_cuda.h b/compat/cuda/dynlink_cuda.h new file mode 100644 index 0000000000..418bbd008a --- /dev/null +++ b/compat/cuda/dynlink_cuda.h @@ -0,0 +1,97 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2016 + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#if !defined(AV_COMPAT_DYNLINK_CUDA_H) && !defined(CUDA_VERSION) +#define AV_COMPAT_DYNLINK_CUDA_H + +#include <stddef.h> + +#define CUDA_VERSION 7050 + +#if defined(_WIN32) || defined(__CYGWIN__) +#define CUDAAPI __stdcall +#else +#define CUDAAPI +#endif + +#define CU_CTX_SCHED_BLOCKING_SYNC 4 + +typedef int CUdevice; +typedef void* CUarray; +typedef void* CUcontext; +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +typedef unsigned long long CUdeviceptr; +#else +typedef unsigned int CUdeviceptr; +#endif + +typedef enum cudaError_enum { + CUDA_SUCCESS = 0 +} CUresult; + +typedef enum CUmemorytype_enum { + CU_MEMORYTYPE_HOST = 1, + CU_MEMORYTYPE_DEVICE = 2 +} CUmemorytype; + +typedef struct CUDA_MEMCPY2D_st { + size_t srcXInBytes; + size_t srcY; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + size_t srcPitch; + + size_t dstXInBytes; + size_t dstY; + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + size_t dstPitch; + + size_t WidthInBytes; + size_t Height; +} CUDA_MEMCPY2D; + +typedef CUresult CUDAAPI tcuInit(unsigned int Flags); +typedef CUresult CUDAAPI tcuDeviceGetCount(int *count); +typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal); +typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev); +typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev); +typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev); +typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx); +typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx); +typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); +typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize); +typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr); +typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy); +typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr); +typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr); + +#endif diff --git a/compat/cuda/dynlink_cuviddec.h b/compat/cuda/dynlink_cuviddec.h new file mode 100644 index 0000000000..17207bc457 --- /dev/null +++ b/compat/cuda/dynlink_cuviddec.h @@ -0,0 +1,808 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2016 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cuviddec.h + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices. + * \date 2015-2016 + * This file contains constants, structure definitions and function prototypes used for decoding. + */ + +#if !defined(__CUDA_VIDEO_H__) +#define __CUDA_VIDEO_H__ + +#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020)) +#define __CUVID_DEVPTR64 +#endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +typedef void *CUvideodecoder; +typedef struct _CUcontextlock_st *CUvideoctxlock; + +/** + * \addtogroup VIDEO_DECODER Video Decoder + * @{ + */ + +/*! + * \enum cudaVideoCodec + * Video Codec Enums + */ +typedef enum cudaVideoCodec_enum { + cudaVideoCodec_MPEG1=0, /**< MPEG1 */ + cudaVideoCodec_MPEG2, /**< MPEG2 */ + cudaVideoCodec_MPEG4, /**< MPEG4 */ + cudaVideoCodec_VC1, /**< VC1 */ + cudaVideoCodec_H264, /**< H264 */ + cudaVideoCodec_JPEG, /**< JPEG */ + cudaVideoCodec_H264_SVC, /**< H264-SVC */ + cudaVideoCodec_H264_MVC, /**< H264-MVC */ + cudaVideoCodec_HEVC, /**< HEVC */ + cudaVideoCodec_VP8, /**< VP8 */ + cudaVideoCodec_VP9, /**< VP9 */ + cudaVideoCodec_NumCodecs, /**< Max COdecs */ + // Uncompressed YUV + cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< Y,U,V (4:2:0) */ + cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,V,U (4:2:0) */ + cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,UV (4:2:0) */ + cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), /**< YUYV/YUY2 (4:2:2) */ + cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) /**< UYVY (4:2:2) */ +} cudaVideoCodec; + +/*! + * \enum cudaVideoSurfaceFormat + * Video Surface Formats Enums + */ +typedef enum cudaVideoSurfaceFormat_enum { + cudaVideoSurfaceFormat_NV12=0 /**< NV12 (currently the only supported output format) */ +} cudaVideoSurfaceFormat; + +/*! + * \enum cudaVideoDeinterlaceMode + * Deinterlacing Modes Enums + */ +typedef enum cudaVideoDeinterlaceMode_enum { + cudaVideoDeinterlaceMode_Weave=0, /**< Weave both fields (no deinterlacing) */ + cudaVideoDeinterlaceMode_Bob, /**< Drop one field */ + cudaVideoDeinterlaceMode_Adaptive /**< Adaptive deinterlacing */ +} cudaVideoDeinterlaceMode; + +/*! + * \enum cudaVideoChromaFormat + * Chroma Formats Enums + */ +typedef enum cudaVideoChromaFormat_enum { + cudaVideoChromaFormat_Monochrome=0, /**< MonoChrome */ + cudaVideoChromaFormat_420, /**< 4:2:0 */ + cudaVideoChromaFormat_422, /**< 4:2:2 */ + cudaVideoChromaFormat_444 /**< 4:4:4 */ +} cudaVideoChromaFormat; + +/*! + * \enum cudaVideoCreateFlags + * Decoder Flags Enums + */ +typedef enum cudaVideoCreateFlags_enum { + cudaVideoCreate_Default = 0x00, /**< Default operation mode: use dedicated video engines */ + cudaVideoCreate_PreferCUDA = 0x01, /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */ + cudaVideoCreate_PreferDXVA = 0x02, /**< Go through DXVA internally if possible (requires D3D9 interop) */ + cudaVideoCreate_PreferCUVID = 0x04 /**< Use dedicated video engines directly */ +} cudaVideoCreateFlags; + +/*! + * \struct CUVIDDECODECREATEINFO + * Struct used in create decoder + */ +typedef struct _CUVIDDECODECREATEINFO +{ + unsigned long ulWidth; /**< Coded Sequence Width */ + unsigned long ulHeight; /**< Coded Sequence Height */ + unsigned long ulNumDecodeSurfaces; /**< Maximum number of internal decode surfaces */ + cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */ + cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */ + unsigned long ulCreationFlags; /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */ + unsigned long bitDepthMinus8; + unsigned long Reserved1[4]; /**< Reserved for future use - set to zero */ + /** + * area of the frame that should be displayed + */ + struct { + short left; + short top; + short right; + short bottom; + } display_area; + + cudaVideoSurfaceFormat OutputFormat; /**< cudaVideoSurfaceFormat_XXX */ + cudaVideoDeinterlaceMode DeinterlaceMode; /**< cudaVideoDeinterlaceMode_XXX */ + unsigned long ulTargetWidth; /**< Post-processed Output Width (Should be aligned to 2) */ + unsigned long ulTargetHeight; /**< Post-processed Output Height (Should be aligbed to 2) */ + unsigned long ulNumOutputSurfaces; /**< Maximum number of output surfaces simultaneously mapped */ + CUvideoctxlock vidLock; /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */ + /** + * target rectangle in the output frame (for aspect ratio conversion) + * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used + */ + struct { + short left; + short top; + short right; + short bottom; + } target_rect; + unsigned long Reserved2[5]; /**< Reserved for future use - set to zero */ +} CUVIDDECODECREATEINFO; + +/*! + * \struct CUVIDH264DPBENTRY + * H.264 DPB Entry + */ +typedef struct _CUVIDH264DPBENTRY +{ + int PicIdx; /**< picture index of reference frame */ + int FrameIdx; /**< frame_num(short-term) or LongTermFrameIdx(long-term) */ + int is_long_term; /**< 0=short term reference, 1=long term reference */ + int not_existing; /**< non-existing reference frame (corresponding PicIdx should be set to -1) */ + int used_for_reference; /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */ + int FieldOrderCnt[2]; /**< field order count of top and bottom fields */ +} CUVIDH264DPBENTRY; + +/*! + * \struct CUVIDH264MVCEXT + * H.264 MVC Picture Parameters Ext + */ +typedef struct _CUVIDH264MVCEXT +{ + int num_views_minus1; + int view_id; + unsigned char inter_view_flag; + unsigned char num_inter_view_refs_l0; + unsigned char num_inter_view_refs_l1; + unsigned char MVCReserved8Bits; + int InterViewRefsL0[16]; + int InterViewRefsL1[16]; +} CUVIDH264MVCEXT; + +/*! + * \struct CUVIDH264SVCEXT + * H.264 SVC Picture Parameters Ext + */ +typedef struct _CUVIDH264SVCEXT +{ + unsigned char profile_idc; + unsigned char level_idc; + unsigned char DQId; + unsigned char DQIdMax; + unsigned char disable_inter_layer_deblocking_filter_idc; + unsigned char ref_layer_chroma_phase_y_plus1; + signed char inter_layer_slice_alpha_c0_offset_div2; + signed char inter_layer_slice_beta_offset_div2; + + unsigned short DPBEntryValidFlag; + unsigned char inter_layer_deblocking_filter_control_present_flag; + unsigned char extended_spatial_scalability_idc; + unsigned char adaptive_tcoeff_level_prediction_flag; + unsigned char slice_header_restriction_flag; + unsigned char chroma_phase_x_plus1_flag; + unsigned char chroma_phase_y_plus1; + + unsigned char tcoeff_level_prediction_flag; + unsigned char constrained_intra_resampling_flag; + unsigned char ref_layer_chroma_phase_x_plus1_flag; + unsigned char store_ref_base_pic_flag; + unsigned char Reserved8BitsA; + unsigned char Reserved8BitsB; + // For the 4 scaled_ref_layer_XX fields below, + // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_" + // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4, + short scaled_ref_layer_left_offset; + short scaled_ref_layer_top_offset; + short scaled_ref_layer_right_offset; + short scaled_ref_layer_bottom_offset; + unsigned short Reserved16Bits; + struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */ + int bRefBaseLayer; /**< whether to store ref base pic */ +} CUVIDH264SVCEXT; + +/*! + * \struct CUVIDH264PICPARAMS + * H.264 Picture Parameters + */ +typedef struct _CUVIDH264PICPARAMS +{ + // SPS + int log2_max_frame_num_minus4; + int pic_order_cnt_type; + int log2_max_pic_order_cnt_lsb_minus4; + int delta_pic_order_always_zero_flag; + int frame_mbs_only_flag; + int direct_8x8_inference_flag; + int num_ref_frames; // NOTE: shall meet level 4.1 restrictions + unsigned char residual_colour_transform_flag; + unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit supported) + unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit supported) + unsigned char qpprime_y_zero_transform_bypass_flag; + // PPS + int entropy_coding_mode_flag; + int pic_order_present_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; + int weighted_pred_flag; + int weighted_bipred_idc; + int pic_init_qp_minus26; + int deblocking_filter_control_present_flag; + int redundant_pic_cnt_present_flag; + int transform_8x8_mode_flag; + int MbaffFrameFlag; + int constrained_intra_pred_flag; + int chroma_qp_index_offset; + int second_chroma_qp_index_offset; + int ref_pic_flag; + int frame_num; + int CurrFieldOrderCnt[2]; + // DPB + CUVIDH264DPBENTRY dpb[16]; // List of reference frames within the DPB + // Quantization Matrices (raster-order) + unsigned char WeightScale4x4[6][16]; + unsigned char WeightScale8x8[2][64]; + // FMO/ASO + unsigned char fmo_aso_enable; + unsigned char num_slice_groups_minus1; + unsigned char slice_group_map_type; + signed char pic_init_qs_minus26; + unsigned int slice_group_change_rate_minus1; + union + { + unsigned long long slice_group_map_addr; + const unsigned char *pMb2SliceGroupMap; + } fmo; + unsigned int Reserved[12]; + // SVC/MVC + union + { + CUVIDH264MVCEXT mvcext; + CUVIDH264SVCEXT svcext; + } svcmvc; +} CUVIDH264PICPARAMS; + + +/*! + * \struct CUVIDMPEG2PICPARAMS + * MPEG-2 Picture Parameters + */ +typedef struct _CUVIDMPEG2PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + int picture_coding_type; + int full_pel_forward_vector; + int full_pel_backward_vector; + int f_code[2][2]; + int intra_dc_precision; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int top_field_first; + // Quantization matrices (raster order) + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; +} CUVIDMPEG2PICPARAMS; + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// MPEG-4 Picture Parameters +// + +// MPEG-4 has VOP types instead of Picture types +#define I_VOP 0 +#define P_VOP 1 +#define B_VOP 2 +#define S_VOP 3 + +/*! + * \struct CUVIDMPEG4PICPARAMS + * MPEG-4 Picture Parameters + */ +typedef struct _CUVIDMPEG4PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + // VOL + int video_object_layer_width; + int video_object_layer_height; + int vop_time_increment_bitcount; + int top_field_first; + int resync_marker_disable; + int quant_type; + int quarter_sample; + int short_video_header; + int divx_flags; + // VOP + int vop_coding_type; + int vop_coded; + int vop_rounding_type; + int alternate_vertical_scan_flag; + int interlaced; + int vop_fcode_forward; + int vop_fcode_backward; + int trd[2]; + int trb[2]; + // Quantization matrices (raster order) + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; + int gmc_enabled; +} CUVIDMPEG4PICPARAMS; + +/*! + * \struct CUVIDVC1PICPARAMS + * VC1 Picture Parameters + */ +typedef struct _CUVIDVC1PICPARAMS +{ + int ForwardRefIdx; /**< Picture index of forward reference (P/B-frames) */ + int BackwardRefIdx; /**< Picture index of backward reference (B-frames) */ + int FrameWidth; /**< Actual frame width */ + int FrameHeight; /**< Actual frame height */ + // PICTURE + int intra_pic_flag; /**< Set to 1 for I,BI frames */ + int ref_pic_flag; /**< Set to 1 for I,P frames */ + int progressive_fcm; /**< Progressive frame */ + // SEQUENCE + int profile; + int postprocflag; + int pulldown; + int interlace; + int tfcntrflag; + int finterpflag; + int psf; + int multires; + int syncmarker; + int rangered; + int maxbframes; + // ENTRYPOINT + int panscan_flag; + int refdist_flag; + int extended_mv; + int dquant; + int vstransform; + int loopfilter; + int fastuvmc; + int overlap; + int quantizer; + int extended_dmv; + int range_mapy_flag; + int range_mapy; + int range_mapuv_flag; + int range_mapuv; + int rangeredfrm; // range reduction state +} CUVIDVC1PICPARAMS; + +/*! + * \struct CUVIDJPEGPICPARAMS + * JPEG Picture Parameters + */ +typedef struct _CUVIDJPEGPICPARAMS +{ + int Reserved; +} CUVIDJPEGPICPARAMS; + + + /*! + * \struct CUVIDHEVCPICPARAMS + * HEVC Picture Parameters + */ +typedef struct _CUVIDHEVCPICPARAMS +{ + // sps + int pic_width_in_luma_samples; + int pic_height_in_luma_samples; + unsigned char log2_min_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_luma_coding_block_size; + unsigned char log2_min_transform_block_size_minus2; + unsigned char log2_diff_max_min_transform_block_size; + unsigned char pcm_enabled_flag; + unsigned char log2_min_pcm_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; + unsigned char pcm_sample_bit_depth_luma_minus1; + + unsigned char pcm_sample_bit_depth_chroma_minus1; + unsigned char pcm_loop_filter_disabled_flag; + unsigned char strong_intra_smoothing_enabled_flag; + unsigned char max_transform_hierarchy_depth_intra; + unsigned char max_transform_hierarchy_depth_inter; + unsigned char amp_enabled_flag; + unsigned char separate_colour_plane_flag; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + + unsigned char num_short_term_ref_pic_sets; + unsigned char long_term_ref_pics_present_flag; + unsigned char num_long_term_ref_pics_sps; + unsigned char sps_temporal_mvp_enabled_flag; + unsigned char sample_adaptive_offset_enabled_flag; + unsigned char scaling_list_enable_flag; + unsigned char IrapPicFlag; + unsigned char IdrPicFlag; + + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + unsigned char reserved1[14]; + + // pps + unsigned char dependent_slice_segments_enabled_flag; + unsigned char slice_segment_header_extension_present_flag; + unsigned char sign_data_hiding_enabled_flag; + unsigned char cu_qp_delta_enabled_flag; + unsigned char diff_cu_qp_delta_depth; + signed char init_qp_minus26; + signed char pps_cb_qp_offset; + signed char pps_cr_qp_offset; + + unsigned char constrained_intra_pred_flag; + unsigned char weighted_pred_flag; + unsigned char weighted_bipred_flag; + unsigned char transform_skip_enabled_flag; + unsigned char transquant_bypass_enabled_flag; + unsigned char entropy_coding_sync_enabled_flag; + unsigned char log2_parallel_merge_level_minus2; + unsigned char num_extra_slice_header_bits; + + unsigned char loop_filter_across_tiles_enabled_flag; + unsigned char loop_filter_across_slices_enabled_flag; + unsigned char output_flag_present_flag; + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + unsigned char lists_modification_present_flag; + unsigned char cabac_init_present_flag; + unsigned char pps_slice_chroma_qp_offsets_present_flag; + + unsigned char deblocking_filter_override_enabled_flag; + unsigned char pps_deblocking_filter_disabled_flag; + signed char pps_beta_offset_div2; + signed char pps_tc_offset_div2; + unsigned char tiles_enabled_flag; + unsigned char uniform_spacing_flag; + unsigned char num_tile_columns_minus1; + unsigned char num_tile_rows_minus1; + + unsigned short column_width_minus1[21]; + unsigned short row_height_minus1[21]; + unsigned int reserved3[15]; + + // RefPicSets + int NumBitsForShortTermRPSInSlice; + int NumDeltaPocsOfRefRpsIdx; + int NumPocTotalCurr; + int NumPocStCurrBefore; + int NumPocStCurrAfter; + int NumPocLtCurr; + int CurrPicOrderCntVal; + int RefPicIdx[16]; // [refpic] Indices of valid reference pictures (-1 if unused for reference) + int PicOrderCntVal[16]; // [refpic] + unsigned char IsLongTerm[16]; // [refpic] 0=not a long-term reference, 1=long-term reference + unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15) + unsigned char RefPicSetStCurrAfter[8]; // [0..NumPocStCurrAfter-1] -> refpic (0..15) + unsigned char RefPicSetLtCurr[8]; // [0..NumPocLtCurr-1] -> refpic (0..15) + unsigned char RefPicSetInterLayer0[8]; + unsigned char RefPicSetInterLayer1[8]; + unsigned int reserved4[12]; + + // scaling lists (diag order) + unsigned char ScalingList4x4[6][16]; // [matrixId][i] + unsigned char ScalingList8x8[6][64]; // [matrixId][i] + unsigned char ScalingList16x16[6][64]; // [matrixId][i] + unsigned char ScalingList32x32[2][64]; // [matrixId][i] + unsigned char ScalingListDCCoeff16x16[6]; // [matrixId] + unsigned char ScalingListDCCoeff32x32[2]; // [matrixId] +} CUVIDHEVCPICPARAMS; + + +/*! + * \struct CUVIDVP8PICPARAMS + * VP8 Picture Parameters + */ +typedef struct _CUVIDVP8PICPARAMS +{ + int width; + int height; + unsigned int first_partition_size; + //Frame Indexes + unsigned char LastRefIdx; + unsigned char GoldenRefIdx; + unsigned char AltRefIdx; + union { + struct { + unsigned char frame_type : 1; /**< 0 = KEYFRAME, 1 = INTERFRAME */ + unsigned char version : 3; + unsigned char show_frame : 1; + unsigned char update_mb_segmentation_data : 1; /**< Must be 0 if segmentation is not enabled */ + unsigned char Reserved2Bits : 2; + }; + unsigned char wFrameTagFlags; + } tagflags; + unsigned char Reserved1[4]; + unsigned int Reserved2[3]; +} CUVIDVP8PICPARAMS; + +/*! + * \struct CUVIDVP9PICPARAMS + * VP9 Picture Parameters + */ +typedef struct _CUVIDVP9PICPARAMS +{ + unsigned int width; + unsigned int height; + + //Frame Indices + unsigned char LastRefIdx; + unsigned char GoldenRefIdx; + unsigned char AltRefIdx; + unsigned char colorSpace; + + unsigned short profile : 3; + unsigned short frameContextIdx : 2; + unsigned short frameType : 1; + unsigned short showFrame : 1; + unsigned short errorResilient : 1; + unsigned short frameParallelDecoding : 1; + unsigned short subSamplingX : 1; + unsigned short subSamplingY : 1; + unsigned short intraOnly : 1; + unsigned short allow_high_precision_mv : 1; + unsigned short refreshEntropyProbs : 1; + unsigned short reserved2Bits : 2; + + unsigned short reserved16Bits; + + unsigned char refFrameSignBias[4]; + + unsigned char bitDepthMinus8Luma; + unsigned char bitDepthMinus8Chroma; + unsigned char loopFilterLevel; + unsigned char loopFilterSharpness; + + unsigned char modeRefLfEnabled; + unsigned char log2_tile_columns; + unsigned char log2_tile_rows; + + unsigned char segmentEnabled : 1; + unsigned char segmentMapUpdate : 1; + unsigned char segmentMapTemporalUpdate : 1; + unsigned char segmentFeatureMode : 1; + unsigned char reserved4Bits : 4; + + + unsigned char segmentFeatureEnable[8][4]; + short segmentFeatureData[8][4]; + unsigned char mb_segment_tree_probs[7]; + unsigned char segment_pred_probs[3]; + unsigned char reservedSegment16Bits[2]; + + int qpYAc; + int qpYDc; + int qpChDc; + int qpChAc; + + unsigned int activeRefIdx[3]; + unsigned int resetFrameContext; + unsigned int mcomp_filter_type; + unsigned int mbRefLfDelta[4]; + unsigned int mbModeLfDelta[2]; + unsigned int frameTagSize; + unsigned int offsetToDctParts; + unsigned int reserved128Bits[4]; + +} CUVIDVP9PICPARAMS; + + +/*! + * \struct CUVIDPICPARAMS + * Picture Parameters for Decoding + */ +typedef struct _CUVIDPICPARAMS +{ + int PicWidthInMbs; /**< Coded Frame Size */ + int FrameHeightInMbs; /**< Coded Frame Height */ + int CurrPicIdx; /**< Output index of the current picture */ + int field_pic_flag; /**< 0=frame picture, 1=field picture */ + int bottom_field_flag; /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */ + int second_field; /**< Second field of a complementary field pair */ + // Bitstream data + unsigned int nBitstreamDataLen; /**< Number of bytes in bitstream data buffer */ + const unsigned char *pBitstreamData; /**< Ptr to bitstream data for this picture (slice-layer) */ + unsigned int nNumSlices; /**< Number of slices in this picture */ + const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */ + int ref_pic_flag; /**< This picture is a reference picture */ + int intra_pic_flag; /**< This picture is entirely intra coded */ + unsigned int Reserved[30]; /**< Reserved for future use */ + // Codec-specific data + union { + CUVIDMPEG2PICPARAMS mpeg2; /**< Also used for MPEG-1 */ + CUVIDH264PICPARAMS h264; + CUVIDVC1PICPARAMS vc1; + CUVIDMPEG4PICPARAMS mpeg4; + CUVIDJPEGPICPARAMS jpeg; + CUVIDHEVCPICPARAMS hevc; + CUVIDVP8PICPARAMS vp8; + CUVIDVP9PICPARAMS vp9; + unsigned int CodecReserved[1024]; + } CodecSpecific; +} CUVIDPICPARAMS; + + +/*! + * \struct CUVIDPROCPARAMS + * Picture Parameters for Postprocessing + */ +typedef struct _CUVIDPROCPARAMS +{ + int progressive_frame; /**< Input is progressive (deinterlace_mode will be ignored) */ + int second_field; /**< Output the second field (ignored if deinterlace mode is Weave) */ + int top_field_first; /**< Input frame is top field first (1st field is top, 2nd field is bottom) */ + int unpaired_field; /**< Input only contains one field (2nd field is invalid) */ + // The fields below are used for raw YUV input + unsigned int reserved_flags; /**< Reserved for future use (set to zero) */ + unsigned int reserved_zero; /**< Reserved (set to zero) */ + unsigned long long raw_input_dptr; /**< Input CUdeviceptr for raw YUV extensions */ + unsigned int raw_input_pitch; /**< pitch in bytes of raw YUV input (should be aligned appropriately) */ + unsigned int raw_input_format; /**< Reserved for future use (set to zero) */ + unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */ + unsigned int raw_output_pitch; /**< Reserved for future use (set to zero) */ + unsigned int Reserved[48]; + void *Reserved3[3]; +} CUVIDPROCPARAMS; + + +/** + * + * In order to minimize decode latencies, there should be always at least 2 pictures in the decode + * queue at any time, in order to make sure that all decode engines are always busy. + * + * Overall data flow: + * - cuvidCreateDecoder(...) + * For each picture: + * - cuvidDecodePicture(N) + * - cuvidMapVideoFrame(N-4) + * - do some processing in cuda + * - cuvidUnmapVideoFrame(N-4) + * - cuvidDecodePicture(N+1) + * - cuvidMapVideoFrame(N-3) + * ... + * - cuvidDestroyDecoder(...) + * + * NOTE: + * - When the cuda context is created from a D3D device, the D3D device must also be created + * with the D3DCREATE_MULTITHREADED flag. + * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces) + * - cuVidDecodePicture may block the calling thread if there are too many pictures pending + * in the decode queue + */ + +/** + * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci) + * Create the decoder object + */ +typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci); + +/** + * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder) + * Destroy the decoder object + */ +typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder); + +/** + * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams) + * Decode a single picture (field or frame) + */ +typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams); + + +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) +/** + * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); + * Post-process and map a video frame for use in cuda + */ +typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, + unsigned int *pDevPtr, unsigned int *pPitch, + CUVIDPROCPARAMS *pVPP); + +/** + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr) + * Unmap a previously mapped video frame + */ +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr); +#endif + +#if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +/** + * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); + * map a video frame + */ +typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, + unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); + +/** + * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); + * Unmap a previously mapped video frame + */ +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); + +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL) +#define tcuvidMapVideoFrame tcuvidMapVideoFrame64 +#define tcuvidUnmapVideoFrame tcuvidUnmapVideoFrame64 +#endif +#endif + + +/** + * + * Context-locking: to facilitate multi-threaded implementations, the following 4 functions + * provide a simple mutex-style host synchronization. If a non-NULL context is specified + * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given + * context before making any cuda calls. + * A multi-threaded application could create a lock associated with a context handle so that + * multiple threads can safely share the same cuda context: + * - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context + * that can be passed to cuvidCtxLockCreate. + * - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section. + * + * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video + * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls). +*/ + +/** + * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx) + */ +typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx); + +/** + * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck) + */ +typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck); + +/** + * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags) + */ +typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags); + +/** + * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags) + */ +typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags); + +/** @} */ /* End VIDEO_DECODER */ + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + +#endif // __CUDA_VIDEO_H__ diff --git a/compat/cuda/dynlink_loader.h b/compat/cuda/dynlink_loader.h new file mode 100644 index 0000000000..33f23af1e2 --- /dev/null +++ b/compat/cuda/dynlink_loader.h @@ -0,0 +1,254 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2016 + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef AV_COMPAT_CUDA_DYNLINK_LOADER_H +#define AV_COMPAT_CUDA_DYNLINK_LOADER_H + +#include "compat/cuda/dynlink_cuda.h" +#include "compat/cuda/dynlink_nvcuvid.h" +#include "compat/nvenc/nvEncodeAPI.h" +#include "compat/w32dlfcn.h" + +#include "libavutil/log.h" +#include "libavutil/error.h" + +#if defined(_WIN32) +# define LIB_HANDLE HMODULE +#else +# define LIB_HANDLE void* +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# define CUDA_LIBNAME "nvcuda.dll" +# define NVCUVID_LIBNAME "nvcuvid.dll" +# if ARCH_X86_64 +# define NVENC_LIBNAME "nvEncodeAPI64.dll" +# else +# define NVENC_LIBNAME "nvEncodeAPI.dll" +# endif +#else +# define CUDA_LIBNAME "libcuda.so.1" +# define NVCUVID_LIBNAME "libnvcuvid.so.1" +# define NVENC_LIBNAME "libnvidia-encode.so.1" +#endif + +#define LOAD_LIBRARY(l, path) \ + do { \ + if (!((l) = dlopen(path, RTLD_LAZY))) { \ + av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", path); \ + ret = AVERROR_UNKNOWN; \ + goto error; \ + } \ + av_log(NULL, AV_LOG_TRACE, "Loaded lib: %s\n", path); \ + } while (0) + +#define LOAD_SYMBOL(fun, symbol) \ + do { \ + if (!((f->fun) = dlsym(f->lib, symbol))) { \ + av_log(NULL, AV_LOG_ERROR, "Cannot load %s\n", symbol); \ + ret = AVERROR_UNKNOWN; \ + goto error; \ + } \ + av_log(NULL, AV_LOG_TRACE, "Loaded sym: %s\n", symbol); \ + } while (0) + +#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N) \ + T *f; \ + int ret; \ + \ + n##_free_functions(functions); \ + \ + f = *functions = av_mallocz(sizeof(*f)); \ + if (!f) \ + return AVERROR(ENOMEM); \ + \ + LOAD_LIBRARY(f->lib, N); + +#define GENERIC_LOAD_FUNC_FINALE(n) \ + return 0; \ +error: \ + n##_free_functions(functions); \ + return ret; + +#define GENERIC_FREE_FUNC() \ + if (!functions) \ + return; \ + if (*functions && (*functions)->lib) \ + dlclose((*functions)->lib); \ + av_freep(functions); + +#ifdef AV_COMPAT_DYNLINK_CUDA_H +typedef struct CudaFunctions { + tcuInit *cuInit; + tcuDeviceGetCount *cuDeviceGetCount; + tcuDeviceGet *cuDeviceGet; + tcuDeviceGetName *cuDeviceGetName; + tcuDeviceComputeCapability *cuDeviceComputeCapability; + tcuCtxCreate_v2 *cuCtxCreate; + tcuCtxPushCurrent_v2 *cuCtxPushCurrent; + tcuCtxPopCurrent_v2 *cuCtxPopCurrent; + tcuCtxDestroy_v2 *cuCtxDestroy; + tcuMemAlloc_v2 *cuMemAlloc; + tcuMemFree_v2 *cuMemFree; + tcuMemcpy2D_v2 *cuMemcpy2D; + tcuGetErrorName *cuGetErrorName; + tcuGetErrorString *cuGetErrorString; + + LIB_HANDLE lib; +} CudaFunctions; +#else +typedef struct CudaFunctions CudaFunctions; +#endif + +typedef struct CuvidFunctions { + tcuvidCreateDecoder *cuvidCreateDecoder; + tcuvidDestroyDecoder *cuvidDestroyDecoder; + tcuvidDecodePicture *cuvidDecodePicture; + tcuvidMapVideoFrame *cuvidMapVideoFrame; + tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame; + tcuvidCtxLockCreate *cuvidCtxLockCreate; + tcuvidCtxLockDestroy *cuvidCtxLockDestroy; + tcuvidCtxLock *cuvidCtxLock; + tcuvidCtxUnlock *cuvidCtxUnlock; + + tcuvidCreateVideoSource *cuvidCreateVideoSource; + tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW; + tcuvidDestroyVideoSource *cuvidDestroyVideoSource; + tcuvidSetVideoSourceState *cuvidSetVideoSourceState; + tcuvidGetVideoSourceState *cuvidGetVideoSourceState; + tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat; + tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat; + tcuvidCreateVideoParser *cuvidCreateVideoParser; + tcuvidParseVideoData *cuvidParseVideoData; + tcuvidDestroyVideoParser *cuvidDestroyVideoParser; + + LIB_HANDLE lib; +} CuvidFunctions; + +typedef struct NvencFunctions { + NVENCSTATUS (NVENCAPI *NvEncodeAPICreateInstance)(NV_ENCODE_API_FUNCTION_LIST *functionList); + NVENCSTATUS (NVENCAPI *NvEncodeAPIGetMaxSupportedVersion)(uint32_t* version); + + LIB_HANDLE lib; +} NvencFunctions; + +#ifdef AV_COMPAT_DYNLINK_CUDA_H +static inline void cuda_free_functions(CudaFunctions **functions) +{ + GENERIC_FREE_FUNC(); +} +#endif + +static inline void cuvid_free_functions(CuvidFunctions **functions) +{ + GENERIC_FREE_FUNC(); +} + +static inline void nvenc_free_functions(NvencFunctions **functions) +{ + GENERIC_FREE_FUNC(); +} + +#ifdef AV_COMPAT_DYNLINK_CUDA_H +static inline int cuda_load_functions(CudaFunctions **functions) +{ + GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME); + + LOAD_SYMBOL(cuInit, "cuInit"); + LOAD_SYMBOL(cuDeviceGetCount, "cuDeviceGetCount"); + LOAD_SYMBOL(cuDeviceGet, "cuDeviceGet"); + LOAD_SYMBOL(cuDeviceGetName, "cuDeviceGetName"); + LOAD_SYMBOL(cuDeviceComputeCapability, "cuDeviceComputeCapability"); + LOAD_SYMBOL(cuCtxCreate, "cuCtxCreate_v2"); + LOAD_SYMBOL(cuCtxPushCurrent, "cuCtxPushCurrent_v2"); + LOAD_SYMBOL(cuCtxPopCurrent, "cuCtxPopCurrent_v2"); + LOAD_SYMBOL(cuCtxDestroy, "cuCtxDestroy_v2"); + LOAD_SYMBOL(cuMemAlloc, "cuMemAlloc_v2"); + LOAD_SYMBOL(cuMemFree, "cuMemFree_v2"); + LOAD_SYMBOL(cuMemcpy2D, "cuMemcpy2D_v2"); + LOAD_SYMBOL(cuGetErrorName, "cuGetErrorName"); + LOAD_SYMBOL(cuGetErrorString, "cuGetErrorString"); + + GENERIC_LOAD_FUNC_FINALE(cuda); +} +#endif + +static inline int cuvid_load_functions(CuvidFunctions **functions) +{ + GENERIC_LOAD_FUNC_PREAMBLE(CuvidFunctions, cuvid, NVCUVID_LIBNAME); + + LOAD_SYMBOL(cuvidCreateDecoder, "cuvidCreateDecoder"); + LOAD_SYMBOL(cuvidDestroyDecoder, "cuvidDestroyDecoder"); + LOAD_SYMBOL(cuvidDecodePicture, "cuvidDecodePicture"); +#ifdef __CUVID_DEVPTR64 + LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame64"); + LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame64"); +#else + LOAD_SYMBOL(cuvidMapVideoFrame, "cuvidMapVideoFrame"); + LOAD_SYMBOL(cuvidUnmapVideoFrame, "cuvidUnmapVideoFrame"); +#endif + LOAD_SYMBOL(cuvidCtxLockCreate, "cuvidCtxLockCreate"); + LOAD_SYMBOL(cuvidCtxLockDestroy, "cuvidCtxLockDestroy"); + LOAD_SYMBOL(cuvidCtxLock, "cuvidCtxLock"); + LOAD_SYMBOL(cuvidCtxUnlock, "cuvidCtxUnlock"); + + LOAD_SYMBOL(cuvidCreateVideoSource, "cuvidCreateVideoSource"); + LOAD_SYMBOL(cuvidCreateVideoSourceW, "cuvidCreateVideoSourceW"); + LOAD_SYMBOL(cuvidDestroyVideoSource, "cuvidDestroyVideoSource"); + LOAD_SYMBOL(cuvidSetVideoSourceState, "cuvidSetVideoSourceState"); + LOAD_SYMBOL(cuvidGetVideoSourceState, "cuvidGetVideoSourceState"); + LOAD_SYMBOL(cuvidGetSourceVideoFormat, "cuvidGetSourceVideoFormat"); + LOAD_SYMBOL(cuvidGetSourceAudioFormat, "cuvidGetSourceAudioFormat"); + LOAD_SYMBOL(cuvidCreateVideoParser, "cuvidCreateVideoParser"); + LOAD_SYMBOL(cuvidParseVideoData, "cuvidParseVideoData"); + LOAD_SYMBOL(cuvidDestroyVideoParser, "cuvidDestroyVideoParser"); + + GENERIC_LOAD_FUNC_FINALE(cuvid); +} + +static inline int nvenc_load_functions(NvencFunctions **functions) +{ + GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME); + + LOAD_SYMBOL(NvEncodeAPICreateInstance, "NvEncodeAPICreateInstance"); + LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion"); + + GENERIC_LOAD_FUNC_FINALE(nvenc); +} + +#undef GENERIC_LOAD_FUNC_PREAMBLE +#undef LOAD_LIBRARY +#undef LOAD_SYMBOL +#undef GENERIC_LOAD_FUNC_FINALE +#undef GENERIC_FREE_FUNC +#undef CUDA_LIBNAME +#undef NVCUVID_LIBNAME +#undef NVENC_LIBNAME +#undef LIB_HANDLE + +#endif + diff --git a/compat/cuda/dynlink_nvcuvid.h b/compat/cuda/dynlink_nvcuvid.h new file mode 100644 index 0000000000..6c197e0787 --- /dev/null +++ b/compat/cuda/dynlink_nvcuvid.h @@ -0,0 +1,316 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2016 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file nvcuvid.h + * NvCuvid API provides Video Decoding interface to NVIDIA GPU devices. + * \date 2015-2015 + * This file contains the interface constants, structure definitions and function prototypes. + */ + +#if !defined(__NVCUVID_H__) +#define __NVCUVID_H__ + +#include "compat/cuda/dynlink_cuviddec.h" + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +//////////////////////////////////////////////////////////////////////////////////////////////// +// +// High-level helper APIs for video sources +// + +typedef void *CUvideosource; +typedef void *CUvideoparser; +typedef long long CUvideotimestamp; + +/** + * \addtogroup VIDEO_PARSER Video Parser + * @{ + */ + +/*! + * \enum cudaVideoState + * Video Source State + */ +typedef enum { + cudaVideoState_Error = -1, /**< Error state (invalid source) */ + cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */ + cudaVideoState_Started = 1 /**< Source is running and delivering data */ +} cudaVideoState; + +/*! + * \enum cudaAudioCodec + * Audio compression + */ +typedef enum { + cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */ + cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */ + cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */ + cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */ + cudaAudioCodec_LPCM /**< PCM Audio */ +} cudaAudioCodec; + +/*! + * \struct CUVIDEOFORMAT + * Video format + */ +typedef struct +{ + cudaVideoCodec codec; /**< Compression format */ + /** + * frame rate = numerator / denominator (for example: 30000/1001) + */ + struct { + unsigned int numerator; /**< frame rate numerator (0 = unspecified or variable frame rate) */ + unsigned int denominator; /**< frame rate denominator (0 = unspecified or variable frame rate) */ + } frame_rate; + unsigned char progressive_sequence; /**< 0=interlaced, 1=progressive */ + unsigned char bit_depth_luma_minus8; /**< high bit depth Luma */ + unsigned char bit_depth_chroma_minus8; /**< high bit depth Chroma */ + unsigned char reserved1; /**< Reserved for future use */ + unsigned int coded_width; /**< coded frame width */ + unsigned int coded_height; /**< coded frame height */ + /** + * area of the frame that should be displayed + * typical example: + * coded_width = 1920, coded_height = 1088 + * display_area = { 0,0,1920,1080 } + */ + struct { + int left; /**< left position of display rect */ + int top; /**< top position of display rect */ + int right; /**< right position of display rect */ + int bottom; /**< bottom position of display rect */ + } display_area; + cudaVideoChromaFormat chroma_format; /**< Chroma format */ + unsigned int bitrate; /**< video bitrate (bps, 0=unknown) */ + /** + * Display Aspect Ratio = x:y (4:3, 16:9, etc) + */ + struct { + int x; + int y; + } display_aspect_ratio; + /** + * Video Signal Description + */ + struct { + unsigned char video_format : 3; + unsigned char video_full_range_flag : 1; + unsigned char reserved_zero_bits : 4; + unsigned char color_primaries; + unsigned char transfer_characteristics; + unsigned char matrix_coefficients; + } video_signal_description; + unsigned int seqhdr_data_length; /**< Additional bytes following (CUVIDEOFORMATEX) */ +} CUVIDEOFORMAT; + +/*! + * \struct CUVIDEOFORMATEX + * Video format including raw sequence header information + */ +typedef struct +{ + CUVIDEOFORMAT format; + unsigned char raw_seqhdr_data[1024]; +} CUVIDEOFORMATEX; + +/*! + * \struct CUAUDIOFORMAT + * Audio Formats + */ +typedef struct +{ + cudaAudioCodec codec; /**< Compression format */ + unsigned int channels; /**< number of audio channels */ + unsigned int samplespersec; /**< sampling frequency */ + unsigned int bitrate; /**< For uncompressed, can also be used to determine bits per sample */ + unsigned int reserved1; /**< Reserved for future use */ + unsigned int reserved2; /**< Reserved for future use */ +} CUAUDIOFORMAT; + + +/*! + * \enum CUvideopacketflags + * Data packet flags + */ +typedef enum { + CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ + CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ + CUVID_PKT_DISCONTINUITY = 0x04 /**< Set when a discontinuity has to be signalled */ +} CUvideopacketflags; + +/*! + * \struct CUVIDSOURCEDATAPACKET + * Data Packet + */ +typedef struct _CUVIDSOURCEDATAPACKET +{ + unsigned long flags; /**< Combination of CUVID_PKT_XXX flags */ + unsigned long payload_size; /**< number of bytes in the payload (may be zero if EOS flag is set) */ + const unsigned char *payload; /**< Pointer to packet payload data (may be NULL if EOS flag is set) */ + CUvideotimestamp timestamp; /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */ +} CUVIDSOURCEDATAPACKET; + +// Callback for packet delivery +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); + +/*! + * \struct CUVIDSOURCEPARAMS + * Source Params + */ +typedef struct _CUVIDSOURCEPARAMS +{ + unsigned int ulClockRate; /**< Timestamp units in Hz (0=default=10000000Hz) */ + unsigned int uReserved1[7]; /**< Reserved for future use - set to zero */ + void *pUserData; /**< Parameter passed in to the data handlers */ + PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< Called to deliver audio packets */ + PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< Called to deliver video packets */ + void *pvReserved2[8]; /**< Reserved for future use - set to NULL */ +} CUVIDSOURCEPARAMS; + +/*! + * \enum CUvideosourceformat_flags + * CUvideosourceformat_flags + */ +typedef enum { + CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */ +} CUvideosourceformat_flags; + +#if !defined(__APPLE__) +/** + * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams) + * Create Video Source + */ +typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams); + +/** + * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams) + * Create Video Source + */ +typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); + +/** + * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj) + * Destroy Video Source + */ +typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj); + +/** + * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state) + * Set Video Source state + */ +typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state); + +/** + * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj) + * Get Video Source state + */ +typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj); + +/** + * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags) + * Get Video Source Format + */ +typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags); + +/** + * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags) + * Set Video Source state + */ +typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags); + +#endif + +/** + * \struct CUVIDPARSERDISPINFO + */ +typedef struct _CUVIDPARSERDISPINFO +{ + int picture_index; /**< */ + int progressive_frame; /**< */ + int top_field_first; /**< */ + int repeat_first_field; /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field) */ + CUvideotimestamp timestamp; /**< */ +} CUVIDPARSERDISPINFO; + +// +// Parser callbacks +// The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to +// be decoded and/or displayed. +// +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); + +/** + * \struct CUVIDPARSERPARAMS + */ +typedef struct _CUVIDPARSERPARAMS +{ + cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */ + unsigned int ulMaxNumDecodeSurfaces; /**< Max # of decode surfaces (parser will cycle through these) */ + unsigned int ulClockRate; /**< Timestamp units in Hz (0=default=10000000Hz) */ + unsigned int ulErrorThreshold; /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */ + unsigned int ulMaxDisplayDelay; /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */ + unsigned int uReserved1[5]; /**< Reserved for future use - set to 0 */ + void *pUserData; /**< User data for callbacks */ + PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */ + PFNVIDDECODECALLBACK pfnDecodePicture; /**< Called when a picture is ready to be decoded (decode order) */ + PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< Called whenever a picture is ready to be displayed (display order) */ + void *pvReserved2[7]; /**< Reserved for future use - set to NULL */ + CUVIDEOFORMATEX *pExtVideoInfo; /**< [Optional] sequence header data from system layer */ +} CUVIDPARSERPARAMS; + +/** + * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams) + */ +typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); + +/** + * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket) + */ +typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); + +/** + * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj) + */ +typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj); + +/** @} */ /* END VIDEO_PARSER */ +//////////////////////////////////////////////////////////////////////////////////////////////// + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + +#endif // __NVCUVID_H__ + + |