aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2015-02-06 01:57:23 +0100
committerMichael Niedermayer <michaelni@gmx.at>2015-02-12 17:10:36 +0100
commit3572eaaf02f9611ed6f5c4ccba7e75befaf43ecf (patch)
tree66d7e79d6558dbd4cc9b51df8f592851cd73fae8
parent25da8d84a46ef262a979d62ca9c58432dcdb2ccf (diff)
downloadffmpeg-3572eaaf02f9611ed6f5c4ccba7e75befaf43ecf.tar.gz
avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code
This is simpler and more robust, and fixes mismatching XMM save restore mismatches Signed-off-by: Michael Niedermayer <michaelni@gmx.at> (cherry picked from commit f1214763af1abf5d7f49b98f88c06e13b98932a6) Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/lossless_audiodsp.asm8
-rw-r--r--libavcodec/x86/lossless_audiodsp_init.c28
2 files changed, 26 insertions, 10 deletions
diff --git a/libavcodec/x86/lossless_audiodsp.asm b/libavcodec/x86/lossless_audiodsp.asm
index 5dff835902..084ed9a093 100644
--- a/libavcodec/x86/lossless_audiodsp.asm
+++ b/libavcodec/x86/lossless_audiodsp.asm
@@ -26,12 +26,6 @@ SECTION_TEXT
; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
; int order, int mul)
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
-%if mmsize == 16
- test orderq, 8
- jnz scalarproduct_and_madd_int16_fallback
-%else
- scalarproduct_and_madd_int16_fallback
-%endif
shl orderq, 1
movd m7, mulm
%if mmsize == 16
@@ -123,8 +117,6 @@ align 16
; int order, int mul)
INIT_XMM ssse3
cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul
- test orderq, 8
- jnz scalarproduct_and_madd_int16_fallback
shl orderq, 1
movd m7, mulm
pshuflw m7, m7, 0
diff --git a/libavcodec/x86/lossless_audiodsp_init.c b/libavcodec/x86/lossless_audiodsp_init.c
index 4879dff1de..2c13e1e31c 100644
--- a/libavcodec/x86/lossless_audiodsp_init.c
+++ b/libavcodec/x86/lossless_audiodsp_init.c
@@ -31,6 +31,30 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
const int16_t *v3,
int order, int mul);
+static int32_t scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2,
+ const int16_t *v3,
+ int order, int mul)
+{
+#if HAVE_SSE2_EXTERNAL
+ if (order & 8)
+ return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
+ else
+ return ff_scalarproduct_and_madd_int16_sse2(v1, v2, v3, order, mul);
+#endif
+}
+
+static int32_t scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
+ const int16_t *v3,
+ int order, int mul)
+{
+#if HAVE_SSSE3_EXTERNAL
+ if (order & 8)
+ return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul);
+ else
+ return ff_scalarproduct_and_madd_int16_ssse3(v1, v2, v3, order, mul);
+#endif
+}
+
av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -39,9 +63,9 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;
if (EXTERNAL_SSE2(cpu_flags))
- c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
+ c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2;
if (EXTERNAL_SSSE3(cpu_flags) &&
!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit
- c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
+ c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_ssse3;
}