diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-25 14:14:21 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-25 14:14:21 +0100 |
commit | e9125dd5563484a067ef75b6fce4fbef235c5c6e (patch) | |
tree | 76003b3c06225cca688438f1aedc7d2a539174c8 | |
parent | d07b0d992736e01006caa64b87d996f902d357db (diff) | |
parent | 2c10e2a2f62477efaef5b641974594f7df4ca339 (diff) | |
download | ffmpeg-e9125dd5563484a067ef75b6fce4fbef235c5c6e.tar.gz |
Merge commit '2c10e2a2f62477efaef5b641974594f7df4ca339'
* commit '2c10e2a2f62477efaef5b641974594f7df4ca339':
build: Make the H.264 parser select h264qpel
x86: h264qpel: add cpu flag checks for init function
Conflicts:
libavcodec/x86/h264_qpel.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel.c | 153 |
2 files changed, 76 insertions, 79 deletions
@@ -1824,7 +1824,7 @@ wmv3_vdpau_decoder_select="vc1_vdpau_decoder" wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel" # parsers -h264_parser_select="error_resilience golomb h264dsp h264pred mpegvideo" +h264_parser_select="error_resilience golomb h264dsp h264pred h264qpel mpegvideo" mpeg4video_parser_select="error_resilience mpegvideo" mpegvideo_parser_select="error_resilience mpegvideo" vc1_parser_select="error_resilience mpegvideo" diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 3cf8f42fb7..d6c008ee2a 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -21,6 +21,7 @@ #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" #include "libavcodec/dsputil.h" #include "libavcodec/h264qpel.h" #include "libavcodec/mpegvideo.h" @@ -530,95 +531,91 @@ QPEL16(mmxext) void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) { +#if HAVE_YASM int high_bit_depth = bit_depth > 8; int mm_flags = av_get_cpu_flags(); -#if HAVE_MMXEXT_EXTERNAL - if (!(mm_flags & AV_CPU_FLAG_MMXEXT)) - return; - if (!high_bit_depth) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); - } else if (bit_depth == 10) { + if (EXTERNAL_MMXEXT(mm_flags)) { + if (!high_bit_depth) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); + } else if (bit_depth == 10) { #if !ARCH_X86_64 - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); -#endif - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); - } + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_mmxext, ff_); #endif - -#if HAVE_SSE2_EXTERNAL - if (!(mm_flags & AV_CPU_FLAG_SSE2)) - return; - if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { - // these functions are slower than mmx on AMD, but faster on Intel - H264_QPEL_FUNCS(0, 0, sse2); - } - - if (!high_bit_depth) { - H264_QPEL_FUNCS(0, 1, sse2); - H264_QPEL_FUNCS(0, 2, sse2); - H264_QPEL_FUNCS(0, 3, sse2); - H264_QPEL_FUNCS(1, 1, sse2); - H264_QPEL_FUNCS(1, 2, sse2); - H264_QPEL_FUNCS(1, 3, sse2); - H264_QPEL_FUNCS(2, 1, sse2); - H264_QPEL_FUNCS(2, 2, sse2); - H264_QPEL_FUNCS(2, 3, sse2); - H264_QPEL_FUNCS(3, 1, sse2); - H264_QPEL_FUNCS(3, 2, sse2); - H264_QPEL_FUNCS(3, 3, sse2); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); + } } - if (bit_depth == 10) { - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); - H264_QPEL_FUNCS_10(1, 0, sse2_cache64); - H264_QPEL_FUNCS_10(2, 0, sse2_cache64); - H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + if (EXTERNAL_SSE2(mm_flags)) { + if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW) && !high_bit_depth) { + // these functions are slower than mmx on AMD, but faster on Intel + H264_QPEL_FUNCS(0, 0, sse2); + } + + if (!high_bit_depth) { + H264_QPEL_FUNCS(0, 1, sse2); + H264_QPEL_FUNCS(0, 2, sse2); + H264_QPEL_FUNCS(0, 3, sse2); + H264_QPEL_FUNCS(1, 1, sse2); + H264_QPEL_FUNCS(1, 2, sse2); + H264_QPEL_FUNCS(1, 3, sse2); + H264_QPEL_FUNCS(2, 1, sse2); + H264_QPEL_FUNCS(2, 2, sse2); + H264_QPEL_FUNCS(2, 3, sse2); + H264_QPEL_FUNCS(3, 1, sse2); + H264_QPEL_FUNCS(3, 2, sse2); + H264_QPEL_FUNCS(3, 3, sse2); + } + + if (bit_depth == 10) { + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); + H264_QPEL_FUNCS_10(1, 0, sse2_cache64); + H264_QPEL_FUNCS_10(2, 0, sse2_cache64); + H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + } } -#endif -#if HAVE_SSSE3_EXTERNAL - if (!(mm_flags & AV_CPU_FLAG_SSSE3)) - return; - if (!high_bit_depth) { - H264_QPEL_FUNCS(1, 0, ssse3); - H264_QPEL_FUNCS(1, 1, ssse3); - H264_QPEL_FUNCS(1, 2, ssse3); - H264_QPEL_FUNCS(1, 3, ssse3); - H264_QPEL_FUNCS(2, 0, ssse3); - H264_QPEL_FUNCS(2, 1, ssse3); - H264_QPEL_FUNCS(2, 2, ssse3); - H264_QPEL_FUNCS(2, 3, ssse3); - H264_QPEL_FUNCS(3, 0, ssse3); - H264_QPEL_FUNCS(3, 1, ssse3); - H264_QPEL_FUNCS(3, 2, ssse3); - H264_QPEL_FUNCS(3, 3, ssse3); + if (EXTERNAL_SSSE3(mm_flags)) { + if (!high_bit_depth) { + H264_QPEL_FUNCS(1, 0, ssse3); + H264_QPEL_FUNCS(1, 1, ssse3); + H264_QPEL_FUNCS(1, 2, ssse3); + H264_QPEL_FUNCS(1, 3, ssse3); + H264_QPEL_FUNCS(2, 0, ssse3); + H264_QPEL_FUNCS(2, 1, ssse3); + H264_QPEL_FUNCS(2, 2, ssse3); + H264_QPEL_FUNCS(2, 3, ssse3); + H264_QPEL_FUNCS(3, 0, ssse3); + H264_QPEL_FUNCS(3, 1, ssse3); + H264_QPEL_FUNCS(3, 2, ssse3); + H264_QPEL_FUNCS(3, 3, ssse3); + } + + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); + H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); + } } - if (bit_depth == 10) { - H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); - H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); - H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); - } -#endif - -#if HAVE_AVX_EXTERNAL - if (bit_depth == 10) { - H264_QPEL_FUNCS_10(1, 0, sse2); - H264_QPEL_FUNCS_10(2, 0, sse2); - H264_QPEL_FUNCS_10(3, 0, sse2); + if (EXTERNAL_AVX(mm_flags)) { + if (bit_depth == 10) { + H264_QPEL_FUNCS_10(1, 0, sse2); + H264_QPEL_FUNCS_10(2, 0, sse2); + H264_QPEL_FUNCS_10(3, 0, sse2); + } } #endif } |