diff options
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 85 |
1 files changed, 52 insertions, 33 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 4aa901e27f..3bdc8f7ed9 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1360,22 +1360,29 @@ cglobal vp8_luma_dc_wht_mmx, 2,3 movd [%7+%9*2], m%4 %endmacro -%macro SPLATB_REG 3-4 +%macro SPLATB_REG_MMX 2-3 movd %1, %2 -%ifidn %3, ssse3 - pshufb %1, %4 -%else punpcklbw %1, %1 -%if mmsize == 16 ; sse2 - pshuflw %1, %1, 0x0 - punpcklqdq %1, %1 -%elifidn %3, mmx punpcklwd %1, %1 punpckldq %1, %1 -%else ; mmxext +%endmacro + +%macro SPLATB_REG_MMXEXT 2-3 + movd %1, %2 + punpcklbw %1, %1 pshufw %1, %1, 0x0 -%endif -%endif +%endmacro + +%macro SPLATB_REG_SSE2 2-3 + movd %1, %2 + punpcklbw %1, %1 + pshuflw %1, %1, 0x0 + punpcklqdq %1, %1 +%endmacro + +%macro SPLATB_REG_SSSE3 3 + movd %1, %2 + pshufb %1, %3 %endmacro %macro SIMPLE_LOOPFILTER 3 @@ -1387,10 +1394,10 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 %if mmsize == 8 ; mmx/mmxext mov r3, 2 %endif -%ifidn %1, ssse3 +%ifnidn %1, sse2 && mmsize == 16 pxor m0, m0 %endif - SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register + SPLATB_REG m7, r2, m0 ; splat "flim" into register ; set up indexes to address 4 rows mov r2, r1 @@ -1529,13 +1536,17 @@ cglobal vp8_%2_loop_filter_simple_%1, 3, %3 %endmacro INIT_MMX +%define SPLATB_REG SPLATB_REG_MMX SIMPLE_LOOPFILTER mmx, v, 4 SIMPLE_LOOPFILTER mmx, h, 6 +%define SPLATB_REG SPLATB_REG_MMXEXT SIMPLE_LOOPFILTER mmxext, v, 4 SIMPLE_LOOPFILTER mmxext, h, 6 INIT_XMM +%define SPLATB_REG SPLATB_REG_SSE2 SIMPLE_LOOPFILTER sse2, v, 3 SIMPLE_LOOPFILTER sse2, h, 6 +%define SPLATB_REG SPLATB_REG_SSSE3 SIMPLE_LOOPFILTER ssse3, v, 3 SIMPLE_LOOPFILTER ssse3, h, 6 @@ -1573,15 +1584,15 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %define stack_reg hev_thr_reg %endif -%ifidn %1, ssse3 +%ifnidn %1, sse2 && mmsize == 16 pxor m7, m7 %endif %ifndef m8 ; mmx/mmxext or sse2 on x86-32 ; splat function arguments - SPLATB_REG m0, E_reg, %1, m7 ; E - SPLATB_REG m1, I_reg, %1, m7 ; I - SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG m0, E_reg, m7 ; E + SPLATB_REG m1, I_reg, m7 ; I + SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh ; align stack mov stack_reg, rsp ; backup stack pointer @@ -1614,9 +1625,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %define q0backup m8 ; splat function arguments - SPLATB_REG flim_E, E_reg, %1, m7 ; E - SPLATB_REG flim_I, I_reg, %1, m7 ; I - SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG flim_E, E_reg, m7 ; E + SPLATB_REG flim_I, I_reg, m7 ; I + SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh %endif %if mmsize == 8 && %4 == 16 ; mmx/mmxext @@ -2028,17 +2039,20 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %5 %endmacro INIT_MMX +%define SPLATB_REG SPLATB_REG_MMX INNER_LOOPFILTER mmx, v, 6, 16, 0 INNER_LOOPFILTER mmx, h, 6, 16, 0 -INNER_LOOPFILTER mmxext, v, 6, 16, 0 -INNER_LOOPFILTER mmxext, h, 6, 16, 0 - INNER_LOOPFILTER mmx, v, 6, 8, 0 INNER_LOOPFILTER mmx, h, 6, 8, 0 + +%define SPLATB_REG SPLATB_REG_MMXEXT +INNER_LOOPFILTER mmxext, v, 6, 16, 0 +INNER_LOOPFILTER mmxext, h, 6, 16, 0 INNER_LOOPFILTER mmxext, v, 6, 8, 0 INNER_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM +%define SPLATB_REG SPLATB_REG_SSE2 INNER_LOOPFILTER sse2, v, 5, 16, 13 %ifdef m8 INNER_LOOPFILTER sse2, h, 5, 16, 13 @@ -2048,6 +2062,7 @@ INNER_LOOPFILTER sse2, h, 6, 16, 13 INNER_LOOPFILTER sse2, v, 6, 8, 13 INNER_LOOPFILTER sse2, h, 6, 8, 13 +%define SPLATB_REG SPLATB_REG_SSSE3 INNER_LOOPFILTER ssse3, v, 5, 16, 13 %ifdef m8 INNER_LOOPFILTER ssse3, h, 5, 16, 13 @@ -2152,15 +2167,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define stack_reg hev_thr_reg %endif -%ifidn %1, ssse3 +%ifnidn %1, sse2 && mmsize == 16 pxor m7, m7 %endif %ifndef m8 ; mmx/mmxext or sse2 on x86-32 ; splat function arguments - SPLATB_REG m0, E_reg, %1, m7 ; E - SPLATB_REG m1, I_reg, %1, m7 ; I - SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG m0, E_reg, m7 ; E + SPLATB_REG m1, I_reg, m7 ; I + SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh ; align stack mov stack_reg, rsp ; backup stack pointer @@ -2200,9 +2215,9 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define lim_sign m15 ; splat function arguments - SPLATB_REG flim_E, E_reg, %1, m7 ; E - SPLATB_REG flim_I, I_reg, %1, m7 ; I - SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh + SPLATB_REG flim_E, E_reg, m7 ; E + SPLATB_REG flim_I, I_reg, m7 ; I + SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh %endif %if mmsize == 8 && %4 == 16 ; mmx/mmxext @@ -2696,17 +2711,20 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %endmacro INIT_MMX +%define SPLATB_REG SPLATB_REG_MMX MBEDGE_LOOPFILTER mmx, v, 6, 16, 0 MBEDGE_LOOPFILTER mmx, h, 6, 16, 0 -MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0 -MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0 - MBEDGE_LOOPFILTER mmx, v, 6, 8, 0 MBEDGE_LOOPFILTER mmx, h, 6, 8, 0 + +%define SPLATB_REG SPLATB_REG_MMXEXT +MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0 +MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0 MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0 MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM +%define SPLATB_REG SPLATB_REG_SSE2 MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 %ifdef m8 MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 @@ -2716,6 +2734,7 @@ MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 +%define SPLATB_REG SPLATB_REG_SSSE3 MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 %ifdef m8 MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 |