diff options
author | James Almer <jamrial@gmail.com> | 2014-01-28 04:59:45 -0300 |
---|---|---|
committer | Clément Bœsch <clement@stupeflix.com> | 2014-01-28 09:30:55 +0100 |
commit | 644c32ea4b8092e2bb19083df1f3d7ea9f277b78 (patch) | |
tree | a348114ef9a6079efc6142c76262c065ec9b33e2 | |
parent | 222c46c531089dab3009a0e1e7938d51af2b494e (diff) | |
download | ffmpeg-644c32ea4b8092e2bb19083df1f3d7ea9f277b78.tar.gz |
x86/vp9lpf: add ff_vp9_loop_filter_[vh]_88_16_sse2()
Similar gains as the ssse3 version once again
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 3 | ||||
-rw-r--r-- | libavcodec/x86/vp9lpf.asm | 20 |
2 files changed, 20 insertions, 3 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index a6ea075be8..ced23ceffa 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -187,6 +187,7 @@ void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stri lpf_funcs(16, 16, sse2); lpf_funcs(16, 16, ssse3); lpf_funcs(16, 16, avx); +lpf_funcs(88, 16, sse2); lpf_funcs(88, 16, ssse3); lpf_funcs(88, 16, avx); @@ -248,6 +249,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) init_fpel(1, 1, 32, avg, sse2); init_fpel(0, 1, 64, avg, sse2); if (ARCH_X86_64) { + dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_sse2; + dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_sse2; dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_sse2; dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_sse2; } diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm index b374884449..d187b28ce6 100644 --- a/libavcodec/x86/vp9lpf.asm +++ b/libavcodec/x86/vp9lpf.asm @@ -304,6 +304,17 @@ SECTION .text %define Q7 dst2q + strideq %endmacro +%macro SPLATB_MASK 2 +%if cpuflag(ssse3) + pshufb %1, %2 +%else + punpcklbw %1, %1 + punpcklqdq %1, %1 + pshuflw %1, %1, 0 + pshufhw %1, %1, 0x55 +%endif +%endmacro + %macro LOOPFILTER 2 ; %1=v/h %2=size1 lea mstrideq, [strideq] neg mstrideq @@ -394,11 +405,13 @@ SECTION .text SPLATB_REG m2, I, m0 ; I I I I ... SPLATB_REG m3, E, m0 ; E E E E ... %elif %2 == 88 +%if cpuflag(ssse3) mova m0, [mask_mix] +%endif movd m2, Id movd m3, Ed - pshufb m2, m0 - pshufb m3, m0 + SPLATB_MASK m2, m0 + SPLATB_MASK m3, m0 %endif mova m0, [pb_80] pxor m2, m0 @@ -456,7 +469,7 @@ SECTION .text SPLATB_REG m7, H, m0 ; H H H H ... %else movd m7, Hd - pshufb m7, [mask_mix] + SPLATB_MASK m7, [mask_mix] %endif pxor m7, m8 pxor m4, m8 @@ -760,6 +773,7 @@ LPF_16_16_VH sse2 LPF_16_16_VH ssse3 LPF_16_16_VH avx +LPF_88_16_VH sse2 LPF_88_16_VH ssse3 LPF_88_16_VH avx |