diff options
author | Timothy Gu <timothygu99@gmail.com> | 2015-11-01 03:12:45 -0800 |
---|---|---|
committer | Timothy Gu <timothygu99@gmail.com> | 2015-11-07 14:31:34 -0800 |
commit | 4b80b895a9db60fb6ab2fb64d8c7c9faabbdda79 (patch) | |
tree | e0c29e4c2cb500cf00f0df8c45e55c67b9657ce8 /libavcodec/x86/pixblockdsp.asm | |
parent | 7cea3430a56fb0ff6ef60f08620fd3875e7bfeb6 (diff) | |
download | ffmpeg-4b80b895a9db60fb6ab2fb64d8c7c9faabbdda79.tar.gz |
pixblockdsp: x86: Condense diff_pixels_* to a shared macro
Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com>
Reviewed-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/pixblockdsp.asm')
-rw-r--r-- | libavcodec/x86/pixblockdsp.asm | 64 |
1 files changed, 30 insertions, 34 deletions
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm index 7c5377b2bb..2864d0c977 100644 --- a/libavcodec/x86/pixblockdsp.asm +++ b/libavcodec/x86/pixblockdsp.asm @@ -80,54 +80,50 @@ cglobal get_pixels, 3, 4, 5 mova [r0+0x70], m3 RET -INIT_MMX mmx ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, ; int stride); -cglobal diff_pixels, 4,5 - movsxdifnidn r3, r3d - pxor m7, m7 - add r0, 128 - mov r4, -128 -.loop: - mova m0, [r1] - mova m2, [r2] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - psubw m0, m2 - psubw m1, m3 - mova [r0+r4+0], m0 - mova [r0+r4+8], m1 - add r1, r3 - add r2, r3 - add r4, 16 - jne .loop - REP_RET - -INIT_XMM sse2 -cglobal diff_pixels, 4, 5, 5 +%macro DIFF_PIXELS 0 +cglobal diff_pixels, 4,5,5 movsxdifnidn r3, r3d pxor m4, m4 add r0, 128 mov r4, -128 .loop: - movh m0, [r1] - movh m2, [r2] - movh m1, [r1+r3] - movh m3, [r2+r3] + movq m0, [r1] + movq m2, [r2] +%if mmsize == 8 + movq m1, m0 + movq m3, m2 + punpcklbw m0, m4 + punpckhbw m1, m4 + punpcklbw m2, m4 + punpckhbw m3, m4 +%else + movq m1, [r1+r3] + movq m3, [r2+r3] punpcklbw m0, m4 punpcklbw m1, m4 punpcklbw m2, m4 punpcklbw m3, m4 +%endif psubw m0, m2 psubw m1, m3 - mova [r0+r4+0 ], m0 - mova [r0+r4+16], m1 + mova [r0+r4+0], m0 + mova [r0+r4+mmsize], m1 +%if mmsize == 8 + add r1, r3 + add r2, r3 +%else lea r1, [r1+r3*2] lea r2, [r2+r3*2] - add r4, 32 +%endif + add r4, 2 * mmsize jne .loop RET +%endmacro + +INIT_MMX mmx +DIFF_PIXELS + +INIT_XMM sse2 +DIFF_PIXELS |