aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp9itxfm.asm
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2014-01-14 08:09:48 +0100
committerClément Bœsch <clement@stupeflix.com>2014-01-15 15:54:03 +0100
commit8b4190da9382434758e390370b1752583bf4ce3a (patch)
treeda0db2f11156c6fabd71ea752f7203a055d78bf2 /libavcodec/x86/vp9itxfm.asm
parent53e6977c07a8720cf4f785ef23686bf34b5cec57 (diff)
downloadffmpeg-8b4190da9382434758e390370b1752583bf4ce3a.tar.gz
vp9/x86: add AVX for itxfm and lpf.
4412 decicycles in ff_vp9_loop_filter_h_16_16_ssse3, 4193462 runs, 842 skips 3600 decicycles in ff_vp9_loop_filter_h_16_16_avx, 4193621 runs, 683 skips 3010 decicycles in ff_vp9_loop_filter_v_16_16_ssse3, 4193528 runs, 776 skips 2678 decicycles in ff_vp9_loop_filter_v_16_16_avx, 4193742 runs, 562 skips 23025 decicycles in ff_vp9_idct_idct_32x32_add_ssse3, 2096871 runs, 281 skips 19943 decicycles in ff_vp9_idct_idct_32x32_add_avx, 2096815 runs, 337 skips 4675 decicycles in ff_vp9_idct_idct_16x16_add_ssse3, 4194018 runs, 286 skips 3980 decicycles in ff_vp9_idct_idct_16x16_add_avx, 4194022 runs, 282 skips 967 decicycles in ff_vp9_idct_idct_8x8_add_ssse3, 16776972 runs, 244 skips 887 decicycles in ff_vp9_idct_idct_8x8_add_avx, 16777002 runs, 214 skips
Diffstat (limited to 'libavcodec/x86/vp9itxfm.asm')
-rw-r--r--libavcodec/x86/vp9itxfm.asm21
1 files changed, 18 insertions, 3 deletions
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
index fe04b81a26..33c0bc790b 100644
--- a/libavcodec/x86/vp9itxfm.asm
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -289,7 +289,8 @@ cglobal vp9_idct_idct_4x4_add, 4,4,0, dst, stride, block, eob
VP9_STORE_2X 10, 11, 6, 7, 4
%endmacro
-INIT_XMM ssse3
+%macro VP9_IDCT_IDCT_8x8_ADD_XMM 1
+INIT_XMM %1
cglobal vp9_idct_idct_8x8_add, 4,4,13, dst, stride, block, eob
mova m12, [pw_11585x2] ; often used
@@ -376,6 +377,10 @@ cglobal vp9_idct_idct_8x8_add, 4,4,13, dst, stride, block, eob
mova [blockq+112], m4
VP9_IDCT8_WRITEOUT
RET
+%endmacro
+
+VP9_IDCT_IDCT_8x8_ADD_XMM ssse3
+VP9_IDCT_IDCT_8x8_ADD_XMM avx
;---------------------------------------------------------------------------------------------
; void vp9_idct_idct_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
@@ -655,7 +660,8 @@ cglobal vp9_idct_idct_8x8_add, 4,4,13, dst, stride, block, eob
mova [dstq+%7], m%4
%endmacro
-INIT_XMM ssse3
+%macro VP9_IDCT_IDCT_16x16_ADD_XMM 1
+INIT_XMM %1
cglobal vp9_idct_idct_16x16_add, 4, 5, 16, 512, dst, stride, block, eob
; 2x2=eob=3, 4x4=eob=10
cmp eobd, 38
@@ -724,6 +730,10 @@ cglobal vp9_idct_idct_16x16_add, 4, 5, 16, 512, dst, stride, block, eob
; use that to zero out block coefficients
ZERO_BLOCK blockq, 32, 16, m0
RET
+%endmacro
+
+VP9_IDCT_IDCT_16x16_ADD_XMM ssse3
+VP9_IDCT_IDCT_16x16_ADD_XMM avx
;---------------------------------------------------------------------------------------------
; void vp9_idct_idct_32x32_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
@@ -1102,7 +1112,8 @@ cglobal vp9_idct_idct_16x16_add, 4, 5, 16, 512, dst, stride, block, eob
%endif
%endmacro
-INIT_XMM ssse3
+%macro VP9_IDCT_IDCT_32x32_ADD_XMM 1
+INIT_XMM %1
cglobal vp9_idct_idct_32x32_add, 4, 8, 16, 2048, dst, stride, block, eob
cmp eobd, 135
jg .idctfull
@@ -1213,5 +1224,9 @@ cglobal vp9_idct_idct_32x32_add, 4, 8, 16, 2048, dst, stride, block, eob
; use that to zero out block coefficients
ZERO_BLOCK blockq, 64, 32, m7
RET
+%endmacro
+
+VP9_IDCT_IDCT_32x32_ADD_XMM ssse3
+VP9_IDCT_IDCT_32x32_ADD_XMM avx
%endif ; x86-64