From 369122dd775c906be06b9c96f03eaedaedb91658 Mon Sep 17 00:00:00 2001 From: NVIDIA Corporation <> Date: Sun, 4 Jan 2009 23:55:27 +0000 Subject: Add VDPAU hardware accelerated decoding for H264 which can be used by video players. Original patch by NVIDIA corporation. Originally committed as revision 16431 to svn://svn.ffmpeg.org/ffmpeg/trunk --- configure | 1 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/avcodec.h | 7 ++ libavcodec/h264.c | 34 +++++++- libavcodec/h264_parser.c | 3 +- libavcodec/imgconvert.c | 3 + libavcodec/mpegvideo.c | 6 +- libavcodec/vdpau_internal.h | 31 ++++++++ libavcodec/vdpau_render.h | 86 +++++++++++++++++++++ libavcodec/vdpauvideo.c | 183 ++++++++++++++++++++++++++++++++++++++++++++ libavutil/avutil.h | 1 + 12 files changed, 353 insertions(+), 4 deletions(-) create mode 100644 libavcodec/vdpau_internal.h create mode 100644 libavcodec/vdpau_render.h create mode 100644 libavcodec/vdpauvideo.c diff --git a/configure b/configure index c8be8a6e21..6d9e33d498 100755 --- a/configure +++ b/configure @@ -975,6 +975,7 @@ h261_encoder_select="aandct" h263_encoder_select="aandct" h263p_encoder_select="aandct" h264_decoder_select="golomb" +h264_vdpau_decoder_deps="vdpau" imc_decoder_select="fft mdct" jpegls_decoder_select="golomb" jpegls_encoder_select="golomb" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index e4217d6af4..0ad34314cb 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -100,6 +100,7 @@ OBJS-$(CONFIG_H263_ENCODER) += mpegvideo_enc.o motion_est.o ratecontr OBJS-$(CONFIG_H263P_ENCODER) += mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o OBJS-$(CONFIG_H264_DECODER) += h264.o h264idct.o h264pred.o h264_parser.o cabac.o mpegvideo.o error_resilience.o OBJS-$(CONFIG_H264_ENCODER) += h264enc.o h264dspenc.o +OBJS-$(CONFIG_H264_VDPAU_DECODER) += vdpauvideo.o OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o OBJS-$(CONFIG_IDCIN_DECODER) += idcinvideo.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 1fdda8e052..d1bfff9ca4 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -88,6 +88,7 @@ void avcodec_register_all(void) REGISTER_DECODER (H263I, h263i); REGISTER_ENCODER (H263P, h263p); REGISTER_DECODER (H264, h264); + REGISTER_DECODER (H264_VDPAU, h264_vdpau); REGISTER_ENCDEC (HUFFYUV, huffyuv); REGISTER_DECODER (IDCIN, idcin); REGISTER_DECODER (INDEO2, indeo2); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 7c68a926f6..40dc4ac42e 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -191,6 +191,9 @@ enum CodecID { CODEC_ID_TGV, CODEC_ID_TGQ, + /* "codecs" for HW decoding with VDPAU */ + CODEC_ID_H264_VDPAU= 0x9000, + /* various PCM "codecs" */ CODEC_ID_PCM_S16LE= 0x10000, CODEC_ID_PCM_S16BE, @@ -527,6 +530,10 @@ typedef struct RcOverride{ * This can be used to prevent truncation of the last audio samples. */ #define CODEC_CAP_SMALL_LAST_FRAME 0x0040 +/** + * Codec can export data for HW decoding (VDPAU). + */ +#define CODEC_CAP_HWACCEL_VDPAU 0x0080 //The following defines may change, don't expect compatibility if you use them. #define MB_TYPE_INTRA4x4 0x0001 diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 8a7550f2a6..814c85ab92 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -33,6 +33,7 @@ #include "h264_parser.h" #include "golomb.h" #include "rectangle.h" +#include "vdpau_internal.h" #include "cabac.h" #ifdef ARCH_X86 @@ -2188,6 +2189,8 @@ static av_cold int decode_init(AVCodecContext *avctx){ if(avctx->codec_id == CODEC_ID_SVQ3) avctx->pix_fmt= PIX_FMT_YUVJ420P; + else if(avctx->codec_id == CODEC_ID_H264_VDPAU) + avctx->pix_fmt= PIX_FMT_VDPAU_H264; else avctx->pix_fmt= PIX_FMT_YUV420P; @@ -7289,6 +7292,8 @@ static void execute_decode_slices(H264Context *h, int context_count){ H264Context *hx; int i; + if(avctx->codec_id == CODEC_ID_H264_VDPAU) + return; if(context_count == 1) { decode_slice(avctx, &h); } else { @@ -7416,8 +7421,14 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE) && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE) - && avctx->skip_frame < AVDISCARD_ALL) + && avctx->skip_frame < AVDISCARD_ALL){ + if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){ + static const uint8_t start_code[] = {0x00, 0x00, 0x01}; + ff_VDPAU_h264_add_data_chunk(h, start_code, sizeof(start_code)); + ff_VDPAU_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed ); + }else context_count++; + } break; case NAL_DPA: init_get_bits(&hx->s.gb, ptr, bit_length); @@ -7620,6 +7631,9 @@ static int decode_frame(AVCodecContext *avctx, h->prev_frame_num_offset= h->frame_num_offset; h->prev_frame_num= h->frame_num; + if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU) + ff_VDPAU_h264_picture_complete(h); + /* * FIXME: Error handling code does not seem to support interlaced * when slices span multiple rows @@ -7632,7 +7646,7 @@ static int decode_frame(AVCodecContext *avctx, * past end by one (callers fault) and resync_mb_y != 0 * causes problems for the first MB line, too. */ - if (!FIELD_PICTURE) + if (!avctx->codec_id == CODEC_ID_H264_VDPAU && !FIELD_PICTURE) ff_er_frame_end(s); MPV_frame_end(s); @@ -8005,4 +8019,20 @@ AVCodec h264_decoder = { .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), }; +#ifdef CONFIG_H264_VDPAU_DECODER +AVCodec h264_vdpau_decoder = { + "h264_vdpau", + CODEC_TYPE_VIDEO, + CODEC_ID_H264_VDPAU, + sizeof(H264Context), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU, + .flush= flush_dpb, + .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"), +}; +#endif + #include "svq3.c" diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index ddb79a3094..cfa04c4460 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -161,7 +161,8 @@ static void close(AVCodecParserContext *s) AVCodecParser h264_parser = { - { CODEC_ID_H264 }, + { CODEC_ID_H264, + CODEC_ID_H264_VDPAU }, sizeof(H264Context), NULL, h264_parse, diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c index f5d9f85937..952ddcb566 100644 --- a/libavcodec/imgconvert.c +++ b/libavcodec/imgconvert.c @@ -267,6 +267,9 @@ static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { [PIX_FMT_XVMC_MPEG2_IDCT] = { .name = "xvmcidct", }, + [PIX_FMT_VDPAU_H264] = { + .name = "vdpau_h264", + }, [PIX_FMT_UYYVYY411] = { .name = "uyyvyy411", .nb_channels = 1, diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 6560d6b5a1..43a6e85a11 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -957,7 +957,11 @@ void MPV_frame_end(MpegEncContext *s) XVMC_field_end(s); }else #endif - if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) { + if(!(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) + && s->unrestricted_mv + && s->current_picture.reference + && !s->intra_only + && !(s->flags&CODEC_FLAG_EMU_EDGE)) { s->dsp.draw_edges(s->current_picture.data[0], s->linesize , s->h_edge_pos , s->v_edge_pos , EDGE_WIDTH ); s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h new file mode 100644 index 0000000000..915944e147 --- /dev/null +++ b/libavcodec/vdpau_internal.h @@ -0,0 +1,31 @@ +/* + * Video Decode and Presentation API for UNIX (VDPAU) is used for + * HW decode acceleration for MPEG-1/2, H.264 and VC-1. + * + * Copyright (C) 2008 NVIDIA. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VDPAU_INTERNAL_H +#define AVCODEC_VDPAU_INTERNAL_H + +void ff_VDPAU_h264_add_data_chunk(H264Context *h, const uint8_t *buf, + int buf_size); +void ff_VDPAU_h264_picture_complete(H264Context *h); + +#endif /* AVCODEC_VDPAU_INTERNAL_H */ diff --git a/libavcodec/vdpau_render.h b/libavcodec/vdpau_render.h new file mode 100644 index 0000000000..4fc28b3b42 --- /dev/null +++ b/libavcodec/vdpau_render.h @@ -0,0 +1,86 @@ +/* + * Video Decode and Presentation API for UNIX (VDPAU) is used for + * HW decode acceleration for MPEG-1/2, H.264 and VC-1. + * + * Copyright (C) 2008 NVIDIA. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VDPAU_RENDER_H +#define AVCODEC_VDPAU_RENDER_H + +/** + * \defgroup Decoder VDPAU Decoder and Renderer + * + * VDPAU HW acceleration has two modules + * - VDPAU Decoding + * - VDPAU Presentation + * + * VDPAU decoding module parses all headers using FFmpeg + * parsing mechanism and uses VDPAU for the actual decoding. + * + * As per the current implementation, the actual decoding + * and rendering (API calls) are done as part of VDPAU + * presentation (vo_vdpau.c) module. + * + * @{ + * \defgroup VDPAU_Decoding VDPAU Decoding + * \ingroup Decoder + * @{ + */ + +#include "vdpau/vdpau.h" +#include "vdpau/vdpau_x11.h" + +/** + * \brief The videoSurface is used for render. + */ +#define FF_VDPAU_STATE_USED_FOR_RENDER 1 + +/** + * \brief The videoSurface is needed for reference/prediction, + * codec manipulates this. + */ +#define FF_VDPAU_STATE_USED_FOR_REFERENCE 2 + +/** + * \brief This structure is used as a CALL-BACK between the ffmpeg + * decoder (vd_) and presentation (vo_) module. + * This is used for defining a video-frame containing surface, + * picture-parameter, bitstream informations etc which are passed + * between ffmpeg decoder and its clients. + */ +struct vdpau_render_state + VdpVideoSurface surface; ///< used as rendered surface, never changed. + + int state; ///< Holds FF_VDPAU_STATE_* values + + /** Picture Parameter information for all supported codecs */ + union _VdpPictureInfo { + VdpPictureInfoH264 h264; + } info; + + /** Describe size/location of the compressed video data */ + int bitstreamBuffersAlloced; + int bitstreamBuffersUsed; + VdpBitstreamBuffer *bitstreamBuffers; +}; + +/* @}*/ + +#endif /* AVCODEC_VDPAU_RENDER_H */ diff --git a/libavcodec/vdpauvideo.c b/libavcodec/vdpauvideo.c new file mode 100644 index 0000000000..0ac05eb89c --- /dev/null +++ b/libavcodec/vdpauvideo.c @@ -0,0 +1,183 @@ +/* + * Video Decode and Presentation API for UNIX (VDPAU) is used for + * HW decode acceleration for MPEG-1/2, H.264 and VC-1. + * + * Copyright (c) 2008 NVIDIA. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <limits.h> +#include "avcodec.h" +#include "h264.h" + +#undef NDEBUG +#include <assert.h> + +#include "vdpau_render.h" +#include "vdpau_internal.h" + +/** + * \addtogroup VDPAU_Decoding + * + * @{ + */ + +static void VDPAU_h264_set_reference_frames(H264Context *h) +{ + MpegEncContext * s = &h->s; + struct vdpau_render_state * render, * render_ref; + VdpReferenceFrameH264 * rf, * rf2; + Picture * pic; + int i, list, pic_frame_idx; + + render = (struct vdpau_render_state*)s->current_picture_ptr->data[0]; + assert(render); + + rf = &render->info.h264.referenceFrames[0]; +#define H264_RF_COUNT FF_ARRAY_ELEMS(render->info.h264.referenceFrames) + + for (list = 0; list < 2; ++list) { + Picture **lp = list ? h->long_ref : h->short_ref; + int ls = list ? h->long_ref_count : h->short_ref_count; + + for (i = 0; i < ls; ++i) { + pic = lp[i]; + if (!pic || !pic->reference) + continue; + pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num; + + render_ref = (struct vdpau_render_state*)pic->data[0]; + assert(render_ref); + + rf2 = &render->info.h264.referenceFrames[0]; + while (rf2 != rf) { + if ( + (rf2->surface == render_ref->surface) + && (rf2->is_long_term == pic->long_ref) + && (rf2->frame_idx == pic_frame_idx) + ) + break; + ++rf2; + } + if (rf2 != rf) { + rf2->top_is_reference |= (pic->reference & PICT_TOP_FIELD) ? VDP_TRUE : VDP_FALSE; + rf2->bottom_is_reference |= (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE; + continue; + } + + if (rf >= &render->info.h264.referenceFrames[H264_RF_COUNT]) + continue; + + rf->surface = render_ref->surface; + rf->is_long_term = pic->long_ref; + rf->top_is_reference = (pic->reference & PICT_TOP_FIELD) ? VDP_TRUE : VDP_FALSE; + rf->bottom_is_reference = (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE; + rf->field_order_cnt[0] = pic->field_poc[0]; + rf->field_order_cnt[1] = pic->field_poc[1]; + rf->frame_idx = pic_frame_idx; + + ++rf; + } + } + + for (; rf < &render->info.h264.referenceFrames[H264_RF_COUNT]; ++rf) { + rf->surface = VDP_INVALID_HANDLE; + rf->is_long_term = 0; + rf->top_is_reference = 0; + rf->bottom_is_reference = 0; + rf->field_order_cnt[0] = 0; + rf->field_order_cnt[1] = 0; + rf->frame_idx = 0; + } +} + +void ff_VDPAU_h264_add_data_chunk(H264Context *h, const uint8_t *buf, int buf_size) +{ + MpegEncContext * s = &h->s; + struct vdpau_render_state * render; + + render = (struct vdpau_render_state*)s->current_picture_ptr->data[0]; + assert(render); + + if (!render->bitstreamBuffersUsed) + VDPAU_h264_set_reference_frames(h); + + render->bitstreamBuffers= av_fast_realloc( + render->bitstreamBuffers, + &render->bitstreamBuffersAlloced, + sizeof(*render->bitstreamBuffers)*(render->bitstreamBuffersUsed + 1) + ); + + render->bitstreamBuffers[render->bitstreamBuffersUsed].struct_version = VDP_BITSTREAM_BUFFER_VERSION; + render->bitstreamBuffers[render->bitstreamBuffersUsed].bitstream = buf; + render->bitstreamBuffers[render->bitstreamBuffersUsed].bitstream_bytes = buf_size; + render->bitstreamBuffersUsed++; +} + +void ff_VDPAU_h264_picture_complete(H264Context *h) +{ + MpegEncContext * s = &h->s; + struct vdpau_render_state * render; + + render = (struct vdpau_render_state*)s->current_picture_ptr->data[0]; + assert(render); + + render->info.h264.slice_count = h->slice_num; + if (render->info.h264.slice_count < 1) + return; + + for (int i = 0; i < 2; ++i) { + int foc = s->current_picture_ptr->field_poc[i]; + if (foc == INT_MAX) + foc = 0; + render->info.h264.field_order_cnt[i] = foc; + } + + render->info.h264.is_reference = s->current_picture_ptr->reference ? VDP_TRUE : VDP_FALSE; + render->info.h264.frame_num = h->frame_num; + render->info.h264.field_pic_flag = s->picture_structure != PICT_FRAME; + render->info.h264.bottom_field_flag = s->picture_structure == PICT_BOTTOM_FIELD; + render->info.h264.num_ref_frames = h->sps.ref_frame_count; + render->info.h264.mb_adaptive_frame_field_flag = h->sps.mb_aff; + render->info.h264.constrained_intra_pred_flag = h->pps.constrained_intra_pred; + render->info.h264.weighted_pred_flag = h->pps.weighted_pred; + render->info.h264.weighted_bipred_idc = h->pps.weighted_bipred_idc; + render->info.h264.frame_mbs_only_flag = h->sps.frame_mbs_only_flag; + render->info.h264.transform_8x8_mode_flag = h->pps.transform_8x8_mode; + render->info.h264.chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0]; + render->info.h264.second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1]; + render->info.h264.pic_init_qp_minus26 = h->pps.init_qp - 26; + render->info.h264.num_ref_idx_l0_active_minus1 = h->pps.ref_count[0] - 1; + render->info.h264.num_ref_idx_l1_active_minus1 = h->pps.ref_count[1] - 1; + render->info.h264.log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4; + render->info.h264.pic_order_cnt_type = h->sps.poc_type; + render->info.h264.log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4; + render->info.h264.delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag; + render->info.h264.direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag; + render->info.h264.entropy_coding_mode_flag = h->pps.cabac; + render->info.h264.pic_order_present_flag = h->pps.pic_order_present; + render->info.h264.deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present; + render->info.h264.redundant_pic_cnt_present_flag = h->pps.redundant_pic_cnt_present; + memcpy(render->info.h264.scaling_lists_4x4, h->pps.scaling_matrix4, sizeof(render->info.h264.scaling_lists_4x4)); + memcpy(render->info.h264.scaling_lists_8x8, h->pps.scaling_matrix8, sizeof(render->info.h264.scaling_lists_8x8)); + + ff_draw_horiz_band(s, 0, s->avctx->height); + render->bitstreamBuffersUsed = 0; +} + +/* @}*/ diff --git a/libavutil/avutil.h b/libavutil/avutil.h index a21a8a475f..7366718579 100644 --- a/libavutil/avutil.h +++ b/libavutil/avutil.h @@ -121,6 +121,7 @@ enum PixelFormat { PIX_FMT_YUV440P, ///< Planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples) PIX_FMT_YUVJ440P, ///< Planar YUV 4:4:0 full scale (jpeg) PIX_FMT_YUVA420P, ///< Planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples) + PIX_FMT_VDPAU_H264,///< H264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions }; -- cgit v1.2.3