diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-10-26 08:24:09 -0400 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-10-27 15:02:48 +0100 |
commit | 960490c0b20dd5f9a6c329bd14023b9598082fda (patch) | |
tree | f83967da07421a901e5d45de8e56736026b275c9 /libavcodec | |
parent | cd86eb265f36a79d2996f44ba7ec7e7acbc99f81 (diff) | |
download | ffmpeg-960490c0b20dd5f9a6c329bd14023b9598082fda.tar.gz |
avcodec/x86/videodsp: Small speedups in ff_emulated_edge_mc x86 SIMD.
Don't use word-size multiplications if size == 2, and if we're using
SIMD instructions (size >= 8), complete leftover 4byte sets using movd,
not mov. Both of these changes lead to minor speedups.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/videodsp.asm | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm index 85a41b5b6d..1ac02574d6 100644 --- a/libavcodec/x86/videodsp.asm +++ b/libavcodec/x86/videodsp.asm @@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22 ; obviously not the same on both sides. %macro READ_V_PIXEL 2 -%if %1 == 2 - movzx valw, byte %2 - imul valw, 0x0101 -%else movzx vald, byte %2 imul vald, 0x01010101 %if %1 >= 8 @@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22 pshufd m0, m0, q0000 %else punpckldq m0, m0 -%endif -%endif ; %1 >= 8 -%endif +%endif ; mmsize == 16 +%endif ; %1 > 16 %endmacro ; READ_V_PIXEL %macro WRITE_V_PIXEL 2 %assign %%off 0 + +%if %1 >= 8 + %rep %1/mmsize movu [%2+%%off], m0 %assign %%off %%off+mmsize @@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22 %assign %%off %%off+8 %endif %endif ; %1-%%off >= 8 -%endif +%endif ; mmsize == 16 %if %1-%%off >= 4 %if %1 > 8 && %1-%%off > 4 movq [%2+%1-8], m0 %assign %%off %1 -%elif %1 >= 8 && %1-%%off >= 4 - movd [%2+%%off], m0 -%assign %%off %%off+4 %else - mov [%2+%%off], vald + movd [%2+%%off], m0 %assign %%off %%off+4 %endif %endif ; %1-%%off >= 4 -%if %1-%%off >= 2 -%if %1 >= 8 - movd [%2+%1-4], m0 -%else +%else ; %1 < 8 + +%rep %1/4 + mov [%2+%%off], vald +%assign %%off %%off+4 +%endrep ; %1/4 + +%endif ; %1 >=/< 8 + +%if %1-%%off == 2 mov [%2+%%off], valw -%endif %endif ; (%1-%%off)/2 %endmacro ; WRITE_V_PIXEL |