diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-24 14:44:34 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-24 15:47:47 +0100 |
commit | fc13a89654b5878050ac07e36254c5e712aa8820 (patch) | |
tree | 73e35d94880aae72b785ef9d1079c442a552777b /libavcodec/x86/h264_qpel.c | |
parent | 70b0aeba001c61d52965debda4875e610bb2435e (diff) | |
parent | e9d817351b28e62868528476971afe4cde1fa280 (diff) | |
download | ffmpeg-fc13a89654b5878050ac07e36254c5e712aa8820.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
dsputil: Separate h264 qpel
Conflicts:
libavcodec/dsputil_template.c
libavcodec/h264.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/h264_qpel.c')
-rw-r--r-- | libavcodec/x86/h264_qpel.c | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 171ef2aa02..2033763f9e 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -22,6 +22,7 @@ #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" #include "libavcodec/dsputil.h" +#include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" #include "dsputil_mmx.h" @@ -490,3 +491,128 @@ QPEL16(mmxext) #endif #endif /* HAVE_YASM */ + +#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \ + do { \ + c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \ + c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \ + } while (0) + +#define H264_QPEL_FUNCS(x, y, CPU) \ + do { \ + c->put_h264_qpel_pixels_tab[0][x + y * 4] = put_h264_qpel16_mc ## x ## y ## _ ## CPU; \ + c->put_h264_qpel_pixels_tab[1][x + y * 4] = put_h264_qpel8_mc ## x ## y ## _ ## CPU; \ + c->avg_h264_qpel_pixels_tab[0][x + y * 4] = avg_h264_qpel16_mc ## x ## y ## _ ## CPU; \ + c->avg_h264_qpel_pixels_tab[1][x + y * 4] = avg_h264_qpel8_mc ## x ## y ## _ ## CPU; \ + } while (0) + +#define H264_QPEL_FUNCS_10(x, y, CPU) \ + do { \ + c->put_h264_qpel_pixels_tab[0][x + y * 4] = ff_put_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ + c->put_h264_qpel_pixels_tab[1][x + y * 4] = ff_put_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ + c->avg_h264_qpel_pixels_tab[0][x + y * 4] = ff_avg_h264_qpel16_mc ## x ## y ## _10_ ## CPU; \ + c->avg_h264_qpel_pixels_tab[1][x + y * 4] = ff_avg_h264_qpel8_mc ## x ## y ## _10_ ## CPU; \ + } while (0) + +void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) +{ + int high_bit_depth = bit_depth > 8; + int mm_flags = av_get_cpu_flags(); + +#if HAVE_MMXEXT_EXTERNAL + if (!high_bit_depth) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); + } else if (bit_depth == 10) { +#if !ARCH_X86_64 + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); +#endif + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); + } +#endif + +#if HAVE_SSE2_EXTERNAL + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { + // these functions are slower than mmx on AMD, but faster on Intel + H264_QPEL_FUNCS(0, 0, sse2); + } + + if (!high_bit_depth) { + H264_QPEL_FUNCS(0, 1, sse2); + H264_QPEL_FUNCS(0, 2, sse2); + H264_QPEL_FUNCS(0, 3, sse2); + H264_QPEL_FUNCS(1, 1, sse2); + H264_QPEL_FUNCS(1, 2, sse2); + H264_QPEL_FUNCS(1, 3, sse2); + H264_QPEL_FUNCS(2, 1, sse2); + H264_QPEL_FUNCS(2, 2, sse2); + H264_QPEL_FUNCS(2, 3, sse2); + H264_QPEL_FUNCS(3, 1, sse2); + H264_QPEL_FUNCS(3, 2, sse2); + H264_QPEL_FUNCS(3, 3, sse2); + } + + if (bit_depth == 10) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); + H264_QPEL_FUNCS_10(1, 0, sse2_cache64); + H264_QPEL_FUNCS_10(2, 0, sse2_cache64); + H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + } +#endif + +#if HAVE_SSSE3_EXTERNAL + if (!high_bit_depth) { + H264_QPEL_FUNCS(1, 0, ssse3); + H264_QPEL_FUNCS(1, 1, ssse3); + H264_QPEL_FUNCS(1, 2, ssse3); + H264_QPEL_FUNCS(1, 3, ssse3); + H264_QPEL_FUNCS(2, 0, ssse3); + H264_QPEL_FUNCS(2, 1, ssse3); + H264_QPEL_FUNCS(2, 2, ssse3); + H264_QPEL_FUNCS(2, 3, ssse3); + H264_QPEL_FUNCS(3, 0, ssse3); + H264_QPEL_FUNCS(3, 1, ssse3); + H264_QPEL_FUNCS(3, 2, ssse3); + H264_QPEL_FUNCS(3, 3, ssse3); + } + + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); + } +#endif + +#if HAVE_AVX_EXTERNAL + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, sse2); + H264_QPEL_FUNCS_10(2, 0, sse2); + H264_QPEL_FUNCS_10(3, 0, sse2); + } +#endif +} |