diff options
author | James Almer <jamrial@gmail.com> | 2014-02-24 02:34:38 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-24 08:30:19 +0100 |
commit | 3f3d748cab3805dcc48599a8747976e18da3ab68 (patch) | |
tree | decb3fc4b90870da4562337616d8764b862731f3 | |
parent | 6c6e4dd139159a7dbf1b85f583804b6334ad88c1 (diff) | |
download | ffmpeg-3f3d748cab3805dcc48599a8747976e18da3ab68.tar.gz |
x86: Move XOP emulation to x86util
We need the emulation to support the cases where the first
argument is the same as the fourth. To achieve this a fifth
argument working as a temporary may be needed.
Emulation that doesn't obey the original instruction semantics
can't be in x86inc.
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/flacdsp.asm | 8 | ||||
-rw-r--r-- | libavutil/x86/x86inc.asm | 19 | ||||
-rw-r--r-- | libavutil/x86/x86util.asm | 19 |
3 files changed, 23 insertions, 23 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm index 1a83cd8f8f..37ee87b163 100644 --- a/libavcodec/x86/flacdsp.asm +++ b/libavcodec/x86/flacdsp.asm @@ -44,21 +44,21 @@ ALIGN 16 test jq, jq jz .end_order .loop_order: - pmacsdql m2, m0, m1, m2 + PMACSDQL m2, m0, m1, m2, m0 movd m0, [decodedq+jq*4] - pmacsdql m3, m1, m0, m3 + PMACSDQL m3, m1, m0, m3, m1 movd m1, [coeffsq+jq*4] inc jq jl .loop_order .end_order: - pmacsdql m2, m0, m1, m2 + PMACSDQL m2, m0, m1, m2, m0 psrlq m2, m4 movd m0, [decodedq] paddd m0, m2 movd [decodedq], m0 sub lend, 2 jl .ret - pmacsdql m3, m1, m0, m3 + PMACSDQL m3, m1, m0, m3, m1 psrlq m3, m4 movd m1, [decodedq+4] paddd m1, m3 diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 88cae0cb48..a7f9f54367 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1407,25 +1407,6 @@ AVX_INSTR pfmul, 1, 0, 1 %undef i %undef j -%macro FMA_INSTR 3 - %macro %1 4-7 %1, %2, %3 - %if cpuflag(xop) - v%5 %1, %2, %3, %4 - %elifidn %1, %4 - %6 %2, %3 - %7 %1, %2 - %else - %6 %1, %2, %3 - %7 %1, %4 - %endif - %endmacro -%endmacro - -FMA_INSTR pmacsdd, pmulld, paddd -FMA_INSTR pmacsww, pmullw, paddw -FMA_INSTR pmacsdql, pmuldq, paddq -FMA_INSTR pmadcswd, pmaddwd, paddd - ; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf. ; This lets us use tzcnt without bumping the yasm version requirement yet. %define tzcnt rep bsf diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index 59e5df248e..df58cadf63 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -666,6 +666,25 @@ %endif %endmacro +%macro PMA_EMU 4 + %macro %1 5-8 %2, %3, %4 + %if cpuflag(xop) + v%6 %1, %2, %3, %4 + %elifidn %1, %4 + %7 %5, %2, %3 + %8 %1, %4, %5 + %else + %7 %1, %2, %3 + %8 %1, %4 + %endif + %endmacro +%endmacro + +PMA_EMU PMACSWW, pmacsww, pmullw, paddw +PMA_EMU PMACSDD, pmacsdd, pmulld, paddd ; sse4 emulation +PMA_EMU PMACSDQL, pmacsdql, pmuldq, paddq ; sse4 emulation +PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd + ; Wrapper for non-FMA version of fmaddps %macro FMULADD_PS 5 %if cpuflag(fma3) || cpuflag(fma4) |