diff options
author | James Almer <jamrial@gmail.com> | 2014-04-16 20:15:35 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-04-17 14:15:09 +0200 |
commit | 76ed71a72bffb45027923e4da5f6fc6a97bfb218 (patch) | |
tree | 975fdcf1b1c1165b9eed695c45b5efc942a294d4 | |
parent | 443261cbbdaac2eaba5fada318fa596bd5ab3e4a (diff) | |
download | ffmpeg-76ed71a72bffb45027923e4da5f6fc6a97bfb218.tar.gz |
x86: move horizontal add macros to x86util
Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors
Signed-off-by: James Almer <jamrial@gmail.com>
Reviewed-by: "Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/h264_intrapred_10bit.asm | 16 | ||||
-rw-r--r-- | libavutil/x86/x86util.asm | 33 |
2 files changed, 33 insertions, 16 deletions
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm index 40f1c9f053..9dee577e1d 100644 --- a/libavcodec/x86/h264_intrapred_10bit.asm +++ b/libavcodec/x86/h264_intrapred_10bit.asm @@ -171,22 +171,6 @@ PRED4x4_HD ;----------------------------------------------------------------------------- ; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride) ;----------------------------------------------------------------------------- -%macro HADDD 2 ; sum junk -%if mmsize == 16 - movhlps %2, %1 - paddd %1, %2 - pshuflw %2, %1, 0xE - paddd %1, %2 -%else - pshufw %2, %1, 0xE - paddd %1, %2 -%endif -%endmacro - -%macro HADDW 2 - pmaddwd %1, [pw_1] - HADDD %1, %2 -%endmacro INIT_MMX mmxext cglobal pred4x4_dc_10, 3, 3 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index df58cadf63..67d7905132 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -273,6 +273,39 @@ %endif %endmacro +%macro HADDD 2 ; sum junk +%if sizeof%1 == 32 +%define %2 xmm%2 + vextracti128 %2, %1, 1 +%define %1 xmm%1 + paddd %1, %2 +%endif +%if mmsize >= 16 +%if cpuflag(xop) && sizeof%1 == 16 + vphadddq %1, %1 +%endif + movhlps %2, %1 + paddd %1, %2 +%endif +%if notcpuflag(xop) || sizeof%1 != 16 + PSHUFLW %2, %1, q0032 + paddd %1, %2 +%endif +%undef %1 +%undef %2 +%endmacro + +%macro HADDW 2 ; reg, tmp +%if cpuflag(xop) && sizeof%1 == 16 + vphaddwq %1, %1 + movhlps %2, %1 + paddd %1, %2 +%else + pmaddwd %1, [pw_1] + HADDD %1, %2 +%endif +%endmacro + %macro PALIGNR 4-5 %if cpuflag(ssse3) %if %0==5 |