diff options
author | Clément Bœsch <u@pkh.me> | 2014-01-30 19:01:30 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-01-30 19:34:13 +0100 |
commit | c5dd73b8902f3a938a88696d0631a1c9d5ed7d03 (patch) | |
tree | c6071ab41110ce9c128938e3ea196b0da4d32d59 /libavcodec/x86/vp9dsp_init.c | |
parent | 6dc9d2cf4741203aeac479e8dc1ebe021abf9006 (diff) | |
download | ffmpeg-c5dd73b8902f3a938a88696d0631a1c9d5ed7d03.tar.gz |
x86/vp9lpf: add ff_vp9_loop_filter_h_{48,84}_16_{sse2,ssse3,avx}().
5.40s → 5.30s overall decode time with -threads 1 on ped1080p.webm
(i7 920, ssse3)
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index ced23ceffa..15baaff646 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -187,6 +187,12 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri lpf_funcs(16, 16, sse2); lpf_funcs(16, 16, ssse3); lpf_funcs(16, 16, avx); +lpf_funcs(84, 16, sse2); +lpf_funcs(84, 16, ssse3); +lpf_funcs(84, 16, avx); +lpf_funcs(48, 16, sse2); +lpf_funcs(48, 16, ssse3); +lpf_funcs(48, 16, avx); lpf_funcs(88, 16, sse2); lpf_funcs(88, 16, ssse3); lpf_funcs(88, 16, avx); @@ -224,6 +230,19 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) init_subpel2(idx, 0, 1, v, type, opt); \ init_subpel2(idx, 1, 0, h, type, opt) +#define init_lpf(opt) do { \ + if (ARCH_X86_64) { \ + dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \ + dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ + dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ + dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ + dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ + dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ + dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ + } \ +} while (0) + if (EXTERNAL_MMX(cpu_flags)) { init_fpel(4, 0, 4, put, mmx); init_fpel(3, 0, 8, put, mmx); @@ -248,12 +267,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) init_fpel(2, 1, 16, avg, sse2); init_fpel(1, 1, 32, avg, sse2); init_fpel(0, 1, 64, avg, sse2); - if (ARCH_X86_64) { - dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_sse2; - dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_sse2; - dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2; - dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2; - } + init_lpf(sse2); } if (EXTERNAL_SSSE3(cpu_flags)) { @@ -276,11 +290,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) dsp->itxfm_add[TX_32X32][ADST_DCT] = dsp->itxfm_add[TX_32X32][DCT_ADST] = dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; - dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_ssse3; - dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_ssse3; - dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_ssse3; - dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_ssse3; } + init_lpf(ssse3); } if (EXTERNAL_AVX(cpu_flags)) { @@ -297,11 +308,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) dsp->itxfm_add[TX_32X32][ADST_DCT] = dsp->itxfm_add[TX_32X32][DCT_ADST] = dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; - dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_avx; - dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_avx; - dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_avx; - dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_avx; } + init_lpf(avx); } #undef init_fpel |