aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenrik Gramner <henrik@gramner.com>2017-09-17 22:52:13 -0300
committerJames Almer <jamrial@gmail.com>2017-09-18 23:24:55 -0300
commit18821e3ba1baa8e0fe037e11c77459ebc73f7e37 (patch)
tree0245e666aa1daed56c171e7ee3e50f39b6e89af3
parent3ffd3b7f5f13080cdba7e8d6b5d9dd7c33ff2345 (diff)
downloadffmpeg-18821e3ba1baa8e0fe037e11c77459ebc73f7e37.tar.gz
x86/exrdsp: optimize ff_reorder_pixels_avx2()
Tested with "checkasm --test=exrdsp -bench" Before: reorder_pixels_c: 5187.8 reorder_pixels_sse2: 377.0 reorder_pixels_avx2: 331.3 After: reorder_pixels_c: 5181.5 reorder_pixels_sse2: 377.0 reorder_pixels_avx2: 313.8 Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/exrdsp.asm13
1 files changed, 6 insertions, 7 deletions
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
index b91a7be20d..06c629e59e 100644
--- a/libavcodec/x86/exrdsp.asm
+++ b/libavcodec/x86/exrdsp.asm
@@ -39,16 +39,15 @@ cglobal reorder_pixels, 3,4,3, dst, src1, size, src2
neg sizeq ; size = offset for dst, src1, src2
.loop:
-%if cpuflag(avx2)
- vpermq m0, [src1q + sizeq], 0xd8; load first part
- vpermq m1, [src2q + sizeq], 0xd8; load second part
-%else
mova m0, [src1q+sizeq] ; load first part
movu m1, [src2q+sizeq] ; load second part
-%endif
SBUTTERFLY bw, 0, 1, 2 ; interleaved
- mova [dstq+2*sizeq ], m0 ; copy to dst
- mova [dstq+2*sizeq+mmsize], m1
+ mova [dstq+2*sizeq ], xm0 ; copy to dst
+ mova [dstq+2*sizeq+16], xm1
+%if cpuflag(avx2)
+ vperm2i128 m0, m0, m1, q0301
+ mova [dstq+2*sizeq+32], m0
+%endif
add sizeq, mmsize
jl .loop
RET