aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp9itxfm.asm
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2015-10-06 11:03:45 -0400
committerRonald S. Bultje <rsbultje@gmail.com>2015-10-13 11:05:58 -0400
commit6b579cf547a75a0cbda5cb7f10eab9ca07522b0a (patch)
tree6dc311271278288af98f9d886d39684c62949355 /libavcodec/x86/vp9itxfm.asm
parent1c3be32533e506d66b5a8eb7b93b12d4442146fb (diff)
downloadffmpeg-6b579cf547a75a0cbda5cb7f10eab9ca07522b0a.tar.gz
vp9: add 10bpp simd (mmxext/ssse3) for idct_idct_4x4.
Diffstat (limited to 'libavcodec/x86/vp9itxfm.asm')
-rw-r--r--libavcodec/x86/vp9itxfm.asm50
1 files changed, 1 insertions, 49 deletions
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
index c564f276cf..200f15e790 100644
--- a/libavcodec/x86/vp9itxfm.asm
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -71,8 +71,6 @@ pw_13377x2: times 8 dw 13377*2
pw_m13377_13377: times 4 dw -13377, 13377
pw_13377_0: times 4 dw 13377, 0
-pd_8192: times 4 dd 8192
-
cextern pw_8
cextern pw_16
cextern pw_32
@@ -80,38 +78,10 @@ cextern pw_512
cextern pw_1024
cextern pw_2048
cextern pw_m1
+cextern pd_8192
SECTION .text
-; (a*x + b*y + round) >> shift
-%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2
- pmaddwd m%1, m%2, %4
- pmaddwd m%2, %5
- paddd m%1, %3
- paddd m%2, %3
- psrad m%1, 14
- psrad m%2, 14
-%endmacro
-
-%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2
- VP9_MULSUB_2W_2X %7, %6, %5, [pw_m%3_%4], [pw_%4_%3]
- VP9_MULSUB_2W_2X %1, %2, %5, [pw_m%3_%4], [pw_%4_%3]
- packssdw m%1, m%7
- packssdw m%2, m%6
-%endmacro
-
-%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2
-%if %0 == 7
- punpckhwd m%6, m%2, m%1
- punpcklwd m%2, m%1
- VP9_MULSUB_2W_4X %1, %2, %3, %4, %5, %6, %7
-%else
- punpckhwd m%8, m%4, m%3
- punpcklwd m%2, m%4, m%3
- VP9_MULSUB_2W_4X %1, %2, %5, %6, %7, %8, %9
-%endif
-%endmacro
-
%macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2
punpckhwd m%4, m%2, m%1
punpcklwd m%2, m%1
@@ -191,24 +161,6 @@ cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;-------------------------------------------------------------------------------------------
-%macro VP9_IDCT4_1D_FINALIZE 0
- SUMSUB_BA w, 3, 2, 4 ; m3=t3+t0, m2=-t3+t0
- SUMSUB_BA w, 1, 0, 4 ; m1=t2+t1, m0=-t2+t1
- SWAP 0, 3, 2 ; 3102 -> 0123
-%endmacro
-
-%macro VP9_IDCT4_1D 0
-%if cpuflag(ssse3)
- SUMSUB_BA w, 2, 0, 4 ; m2=IN(0)+IN(2) m0=IN(0)-IN(2)
- pmulhrsw m2, m6 ; m2=t0
- pmulhrsw m0, m6 ; m0=t1
-%else ; <= sse2
- VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5 ; m0=t1, m1=t0
-%endif
- VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5 ; m1=t2, m3=t3
- VP9_IDCT4_1D_FINALIZE
-%endmacro
-
; 2x2 top left corner
%macro VP9_IDCT4_2x2_1D 0
pmulhrsw m0, m5 ; m0=t1