diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-06-30 12:15:12 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-06-30 12:15:12 +0200 |
commit | 6e76e6a05a5685b904dc0d1cd610d81ffe43bbc5 (patch) | |
tree | 7c520597874b28e57afee76ffae5f0d1c40162e3 /libavutil | |
parent | a285079bc75a6e3b9aa27910351044b79bc0c490 (diff) | |
parent | b545179fdff1ccfbbb9d422e4e9720cb6c6d9191 (diff) | |
download | ffmpeg-6e76e6a05a5685b904dc0d1cd610d81ffe43bbc5.tar.gz |
Merge commit 'b545179fdff1ccfbbb9d422e4e9720cb6c6d9191'
* commit 'b545179fdff1ccfbbb9d422e4e9720cb6c6d9191':
x86: lpc: simd av_evaluate_lls
Conflicts:
libavutil/x86/lls.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/x86/lls.asm | 37 | ||||
-rw-r--r-- | libavutil/x86/lls_init.c | 3 |
2 files changed, 40 insertions, 0 deletions
diff --git a/libavutil/x86/lls.asm b/libavutil/x86/lls.asm index 59398b4867..ae18f3a737 100644 --- a/libavutil/x86/lls.asm +++ b/libavutil/x86/lls.asm @@ -196,3 +196,40 @@ cglobal update_lls, 3,6,8, ctx, var, count, i, j, count2 .ret: REP_RET %endif + +INIT_XMM sse2 +cglobal evaluate_lls, 2,4,2, ctx, var, order, i + ; This function is often called on the same buffer as update_lls, but with + ; an offset. They can't both be aligned. + ; Load halves rather than movu to avoid store-forwarding stalls, since the + ; input was initialized immediately prior to this function using scalar math. + %define coefsq ctxq + mov id, orderd + imul orderd, MAX_VARS + lea coefsq, [ctxq + LLSModel.coeff + orderq*8] + movsd m0, [varq] + movhpd m0, [varq + 8] + mulpd m0, [coefsq] + lea coefsq, [coefsq + iq*8] + lea varq, [varq + iq*8] + neg iq + add iq, 2 +.loop: + movsd m1, [varq + iq*8] + movhpd m1, [varq + iq*8 + 8] + mulpd m1, [coefsq + iq*8] + addpd m0, m1 + add iq, 2 + jl .loop + jg .skip1 + movsd m1, [varq + iq*8] + mulsd m1, [coefsq + iq*8] + addpd m0, m1 +.skip1: + movhlps m1, m0 + addsd m0, m1 +%if ARCH_X86_32 + movsd r0m, m0 + fld qword r0m +%endif + RET diff --git a/libavutil/x86/lls_init.c b/libavutil/x86/lls_init.c index 0c7b75d07c..eb570428e7 100644 --- a/libavutil/x86/lls_init.c +++ b/libavutil/x86/lls_init.c @@ -25,12 +25,15 @@ void ff_update_lls_sse2(LLSModel *m, double *var); void ff_update_lls_avx(LLSModel *m, double *var); +double ff_evaluate_lls_sse2(LLSModel *m, double *var, int order); av_cold void ff_init_lls_x86(LLSModel *m) { int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE2(cpu_flags)) { m->update_lls = ff_update_lls_sse2; + if (m->indep_count >= 4) + m->evaluate_lls = ff_evaluate_lls_sse2; } if (EXTERNAL_AVX(cpu_flags) && 0) { m->update_lls = ff_update_lls_avx; |