diff options
author | Oskar Arvidsson <oskar@irock.se> | 2011-03-29 17:48:59 +0200 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-05-10 07:24:36 -0400 |
commit | 19a0729b4cfacfd90b8ee84ab0c093ff7e397e65 (patch) | |
tree | 63279dbdeba8d3fd96b5c971a224341ec32f7100 /libavcodec/x86/dsputil_mmx.c | |
parent | fcc0224e4fbd44ae268903185b0cf83560b13555 (diff) | |
download | ffmpeg-19a0729b4cfacfd90b8ee84ab0c093ff7e397e65.tar.gz |
Adds 8-, 9- and 10-bit versions of some of the functions used by the h264 decoder.
This patch lets e.g. dsputil_init chose dsp functions with respect to
the bit depth to decode. The naming scheme of bit depth dependent
functions is <base name>_<bit depth>[_<prefix>] (i.e. the old
clear_blocks_c is now named clear_blocks_8_c).
Note: Some of the functions for high bit depth is not dependent on the
bit depth, but only on the pixel size. This leaves some room for
optimizing binary size.
Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.c')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index f98e6ae0fa..a0cb11aa40 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) @@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; + if (!high_bit_depth) { c->clear_block = clear_block_mmx; c->clear_blocks = clear_blocks_mmx; if ((mm_flags & AV_CPU_FLAG_SSE) && @@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->clear_block = clear_block_sse; c->clear_blocks = clear_blocks_sse; } + } #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ @@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU + if (!high_bit_depth) { SET_HPEL_FUNCS(put, 0, 16, mmx); SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx); SET_HPEL_FUNCS(avg, 0, 16, mmx); @@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx); SET_HPEL_FUNCS(avg, 1, 8, mmx); SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); + } #if ARCH_X86_32 || !HAVE_YASM c->gmc= gmc_mmx; #endif #if ARCH_X86_32 && HAVE_YASM + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_mmx; #endif c->add_bytes= add_bytes_mmx; c->add_bytes_l2= add_bytes_l2_mmx; + if (!high_bit_depth) c->draw_edges = draw_edges_mmx; if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { @@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #if HAVE_YASM + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; + } c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; @@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_MMX2) { c->prefetch = prefetch_mmx2; + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; @@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + } if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + if (!high_bit_depth) { c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + } if (CONFIG_VP3_DECODER && HAVE_YASM) { c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; @@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2); + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); + } SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); @@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; + } c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; #endif @@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->prefetch = prefetch_3dnow; + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; @@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; } + } if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { @@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow); + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); + } SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); @@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); #if HAVE_YASM + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; + } c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; @@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){ // these functions are slower than mmx on AMD, but faster on Intel + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; c->avg_pixels_tab[0][0] = avg_pixels16_sse2; H264_QPEL_FUNCS(0, 0, sse2); + } } if(mm_flags & AV_CPU_FLAG_SSE2){ + if (!high_bit_depth) { H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 2, sse2); H264_QPEL_FUNCS(0, 3, sse2); @@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, sse2); H264_QPEL_FUNCS(3, 2, sse2); H264_QPEL_FUNCS(3, 3, sse2); + } } #if HAVE_SSSE3 if(mm_flags & AV_CPU_FLAG_SSSE3){ + if (!high_bit_depth) { H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 1, ssse3); H264_QPEL_FUNCS(1, 2, ssse3); @@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, ssse3); H264_QPEL_FUNCS(3, 2, ssse3); H264_QPEL_FUNCS(3, 3, ssse3); + } c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; #if HAVE_YASM + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3; + } c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; @@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_sse; c->gmc= gmc_sse; #endif |