diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2015-10-06 11:03:45 -0400 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2015-10-13 11:05:58 -0400 |
commit | 6b579cf547a75a0cbda5cb7f10eab9ca07522b0a (patch) | |
tree | 6dc311271278288af98f9d886d39684c62949355 /libavcodec/x86/vp9itxfm.asm | |
parent | 1c3be32533e506d66b5a8eb7b93b12d4442146fb (diff) | |
download | ffmpeg-6b579cf547a75a0cbda5cb7f10eab9ca07522b0a.tar.gz |
vp9: add 10bpp simd (mmxext/ssse3) for idct_idct_4x4.
Diffstat (limited to 'libavcodec/x86/vp9itxfm.asm')
-rw-r--r-- | libavcodec/x86/vp9itxfm.asm | 50 |
1 files changed, 1 insertions, 49 deletions
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index c564f276cf..200f15e790 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -71,8 +71,6 @@ pw_13377x2: times 8 dw 13377*2 pw_m13377_13377: times 4 dw -13377, 13377 pw_13377_0: times 4 dw 13377, 0 -pd_8192: times 4 dd 8192 - cextern pw_8 cextern pw_16 cextern pw_32 @@ -80,38 +78,10 @@ cextern pw_512 cextern pw_1024 cextern pw_2048 cextern pw_m1 +cextern pd_8192 SECTION .text -; (a*x + b*y + round) >> shift -%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2 - pmaddwd m%1, m%2, %4 - pmaddwd m%2, %5 - paddd m%1, %3 - paddd m%2, %3 - psrad m%1, 14 - psrad m%2, 14 -%endmacro - -%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2 - VP9_MULSUB_2W_2X %7, %6, %5, [pw_m%3_%4], [pw_%4_%3] - VP9_MULSUB_2W_2X %1, %2, %5, [pw_m%3_%4], [pw_%4_%3] - packssdw m%1, m%7 - packssdw m%2, m%6 -%endmacro - -%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2 -%if %0 == 7 - punpckhwd m%6, m%2, m%1 - punpcklwd m%2, m%1 - VP9_MULSUB_2W_4X %1, %2, %3, %4, %5, %6, %7 -%else - punpckhwd m%8, m%4, m%3 - punpcklwd m%2, m%4, m%3 - VP9_MULSUB_2W_4X %1, %2, %5, %6, %7, %8, %9 -%endif -%endmacro - %macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2 punpckhwd m%4, m%2, m%1 punpcklwd m%2, m%1 @@ -191,24 +161,6 @@ cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob ; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); ;------------------------------------------------------------------------------------------- -%macro VP9_IDCT4_1D_FINALIZE 0 - SUMSUB_BA w, 3, 2, 4 ; m3=t3+t0, m2=-t3+t0 - SUMSUB_BA w, 1, 0, 4 ; m1=t2+t1, m0=-t2+t1 - SWAP 0, 3, 2 ; 3102 -> 0123 -%endmacro - -%macro VP9_IDCT4_1D 0 -%if cpuflag(ssse3) - SUMSUB_BA w, 2, 0, 4 ; m2=IN(0)+IN(2) m0=IN(0)-IN(2) - pmulhrsw m2, m6 ; m2=t0 - pmulhrsw m0, m6 ; m0=t1 -%else ; <= sse2 - VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5 ; m0=t1, m1=t0 -%endif - VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5 ; m1=t2, m3=t3 - VP9_IDCT4_1D_FINALIZE -%endmacro - ; 2x2 top left corner %macro VP9_IDCT4_2x2_1D 0 pmulhrsw m0, m5 ; m0=t1 |