diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2015-02-06 01:57:23 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-02-06 02:18:54 +0100 |
commit | f1214763af1abf5d7f49b98f88c06e13b98932a6 (patch) | |
tree | cb9cf187a3b8295934b215369fe11f2404ff21f6 | |
parent | d41b66a1a216956964077b383e6e234d518f17d7 (diff) | |
download | ffmpeg-f1214763af1abf5d7f49b98f88c06e13b98932a6.tar.gz |
avcodec/x86/lossless_audiodsp: Move order&8 fallback into C code
This is simpler and more robust, and fixes mismatching XMM save restore
mismatches
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/lossless_audiodsp.asm | 8 | ||||
-rw-r--r-- | libavcodec/x86/lossless_audiodsp_init.c | 28 |
2 files changed, 26 insertions, 10 deletions
diff --git a/libavcodec/x86/lossless_audiodsp.asm b/libavcodec/x86/lossless_audiodsp.asm index 5dff835902..084ed9a093 100644 --- a/libavcodec/x86/lossless_audiodsp.asm +++ b/libavcodec/x86/lossless_audiodsp.asm @@ -26,12 +26,6 @@ SECTION_TEXT ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, ; int order, int mul) cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul -%if mmsize == 16 - test orderq, 8 - jnz scalarproduct_and_madd_int16_fallback -%else - scalarproduct_and_madd_int16_fallback -%endif shl orderq, 1 movd m7, mulm %if mmsize == 16 @@ -123,8 +117,6 @@ align 16 ; int order, int mul) INIT_XMM ssse3 cglobal scalarproduct_and_madd_int16, 4,4,10, v1, v2, v3, order, mul - test orderq, 8 - jnz scalarproduct_and_madd_int16_fallback shl orderq, 1 movd m7, mulm pshuflw m7, m7, 0 diff --git a/libavcodec/x86/lossless_audiodsp_init.c b/libavcodec/x86/lossless_audiodsp_init.c index 4879dff1de..2c13e1e31c 100644 --- a/libavcodec/x86/lossless_audiodsp_init.c +++ b/libavcodec/x86/lossless_audiodsp_init.c @@ -31,6 +31,30 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); +static int32_t scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, + const int16_t *v3, + int order, int mul) +{ +#if HAVE_SSE2_EXTERNAL + if (order & 8) + return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul); + else + return ff_scalarproduct_and_madd_int16_sse2(v1, v2, v3, order, mul); +#endif +} + +static int32_t scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, + const int16_t *v3, + int order, int mul) +{ +#if HAVE_SSSE3_EXTERNAL + if (order & 8) + return ff_scalarproduct_and_madd_int16_mmxext(v1, v2, v3, order, mul); + else + return ff_scalarproduct_and_madd_int16_ssse3(v1, v2, v3, order, mul); +#endif +} + av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -39,9 +63,9 @@ av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c) c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; if (EXTERNAL_SSE2(cpu_flags)) - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; + c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_sse2; if (EXTERNAL_SSSE3(cpu_flags) && !(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit - c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; + c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_ssse3; } |