diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2011-07-02 22:18:39 -0400 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-07-03 07:43:38 -0700 |
commit | 9bfa5363da21a5ba7bed174a82c86f2a089fc917 (patch) | |
tree | 99da032075f99024c444431c0bf1d90c92f94606 /libavcodec/x86/dsputil_mmx.c | |
parent | 10dde477c77e0ac0fecda49fdb1dc71329aa7513 (diff) | |
download | ffmpeg-9bfa5363da21a5ba7bed174a82c86f2a089fc917.tar.gz |
H.264: Add x86 assembly for 10-bit H.264 qpel functions.
Mainly ported from 8-bit H.264 qpel.
Some code ported from x264. LGPL ok by author.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.c')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 136 |
1 files changed, 86 insertions, 50 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 03c094533f..d6eed82dd8 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2627,44 +2627,56 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; } -#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \ - c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \ - c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU - - SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2); - SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2); +#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ + c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU + + SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); } +#if HAVE_YASM + else if (bit_depth == 10) { +#if !ARCH_X86_64 + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); +#endif + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); + } +#endif - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); + SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); #if HAVE_YASM c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; @@ -2725,26 +2737,26 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; } - SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow); - SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow); + SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow, ); if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow, ); } - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); + SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); #if HAVE_YASM if (!high_bit_depth) { @@ -2788,7 +2800,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 3, sse2); } #if HAVE_YASM +#define H264_QPEL_FUNCS_10(x, y, CPU)\ + c->put_h264_qpel_pixels_tab[0][x+y*4] = ff_put_h264_qpel16_mc##x##y##_10_##CPU;\ + c->put_h264_qpel_pixels_tab[1][x+y*4] = ff_put_h264_qpel8_mc##x##y##_10_##CPU;\ + c->avg_h264_qpel_pixels_tab[0][x+y*4] = ff_avg_h264_qpel16_mc##x##y##_10_##CPU;\ + c->avg_h264_qpel_pixels_tab[1][x+y*4] = ff_avg_h264_qpel8_mc##x##y##_10_##CPU; if (bit_depth == 10) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); + H264_QPEL_FUNCS_10(1, 0, sse2_cache64) + H264_QPEL_FUNCS_10(2, 0, sse2_cache64) + H264_QPEL_FUNCS_10(3, 0, sse2_cache64) + c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_sse2; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_sse2; } @@ -2810,6 +2835,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 2, ssse3); H264_QPEL_FUNCS(3, 3, ssse3); } + else if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, ssse3_cache64) + H264_QPEL_FUNCS_10(2, 0, ssse3_cache64) + H264_QPEL_FUNCS_10(3, 0, ssse3_cache64) + } c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; #if HAVE_YASM if (!high_bit_depth) { @@ -2906,6 +2936,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_AVX && HAVE_YASM if (mm_flags & AV_CPU_FLAG_AVX) { if (bit_depth == 10) { + //AVX implies !cache64. + //TODO: Port cache(32|64) detection from x264. + H264_QPEL_FUNCS_10(1, 0, sse2) + H264_QPEL_FUNCS_10(2, 0, sse2) + H264_QPEL_FUNCS_10(3, 0, sse2) + c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_10_avx; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_10_avx; } |