diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2015-02-06 01:57:23 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-02-12 17:10:36 +0100 |
commit | 3572eaaf02f9611ed6f5c4ccba7e75befaf43ecf (patch) | |
tree | 66d7e79d6558dbd4cc9b51df8f592851cd73fae8 | |
parent | 25da8d84a46ef262a979d62ca9c58432dcdb2ccf (diff) | |
download | ffmpeg-3572eaaf02f9611ed6f5c4ccba7e75befaf43ecf.tar.gz |
avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code
This is simpler and more robust, and fixes mismatching XMM save restore
mismatches
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
(cherry picked from commit f1214763af1abf5d7f49b98f88c06e13b98932a6)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/lossless_audiodsp.asm | 8 | ||||
-rw-r--r-- | libavcodec/x86/lossless_audiodsp_init.c | 28 |
2 files changed, 26 insertions, 10 deletions
diff --git a/libavcodec/x86/lossless_audiodsp.asm b/libavcodec/x86/lossless_audiodsp.asm index 5dff835902..084ed9a093 100644 --- a/libavcodec/x86/lossless_audiodsp.asm +++ b/libavcodec/x86/lossless_audiodsp.asm @@ -26,12 +26,6 @@ SECTION_TEXT ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, ; int order, int mul) cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul -%if mmsize == 16 - test orderq, 8 - jnz scalarproduct_and_madd_int16_fallback -%else - scalarproduct_and_madd_int16_fallback -%endif shl orderq, 1 movd m7, mulm %if mmsize == 16 @@ -123,8 +117,6 @@ align 16 ; int order, int mul) INIT_XMM ssse3 cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul - test orderq, 8 - jnz scalarproduct_and_madd_int16_fallback shl orderq, 1 movd m7, mulm pshuflw m7, m7, 0 diff --git a/libavcodec/x86/lossless_audiodsp_init.c b/libavcodec/x86/lossless_audiodsp_init.c index 4879dff1de..2c13e1e31c 100644 --- a/libavcodec/x86/lossless_audiodsp_init.c +++ b/libavcodec/x86/lossless_audiodsp_init.c @@ -31,6 +31,30 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); +static int32_t scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, + const int16_t *v3, + int order, int mul) +{ +#if HAVE_SSE2_EXTERNAL + if (order & 8) + return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul); + else + return ff_scalarproduct_and_madd_int16_sse2(v1, v2, v3, order, mul); +#endif +} + +static int32_t scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, + const int16_t *v3, + int order, int mul) +{ +#if HAVE_SSSE3_EXTERNAL + if (order & 8) + return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul); + else + return ff_scalarproduct_and_madd_int16_ssse3(v1, v2, v3, order, mul); +#endif +} + av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -39,9 +63,9 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c) c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; if (EXTERNAL_SSE2(cpu_flags)) - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; + c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2; if (EXTERNAL_SSSE3(cpu_flags) && !(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_ssse3; } |