diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2012-12-07 18:26:30 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-15 23:11:18 +0200 |
commit | d1310c591eb24202ecc28af4adb28cce690109e5 (patch) | |
tree | 6a36bacd687337295d79f667d0dcb7fb84c70b06 /libavcodec | |
parent | 7a4424e5ed3a60939126914ee5ddc32226a56cff (diff) | |
download | ffmpeg-d1310c591eb24202ecc28af4adb28cce690109e5.tar.gz |
x86: sbrdsp: implement SSE qmf_deint_neg
From 133 (unrolled av_intfloat32 C) to 59 cycles on Arrandale/Win64.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/sbrdsp.asm | 22 | ||||
-rw-r--r-- | libavcodec/x86/sbrdsp_init.c | 3 |
2 files changed, 25 insertions, 0 deletions
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index af774e1a5c..d556f27112 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -423,3 +423,25 @@ apply_noise_main: add count, mmsize jl .loop RET + +INIT_XMM sse +cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c +%define COUNT 32*4 +%define OFFSET 32*4 + mov cq, -COUNT + lea vrevq, [vq + OFFSET + COUNT] + add vq, OFFSET-mmsize + add srcq, 2*COUNT + mova m3, [ps_neg] +.loop: + mova m0, [srcq + 2*cq + 0*mmsize] + mova m1, [srcq + 2*cq + 1*mmsize] + shufps m2, m0, m1, q2020 + shufps m1, m0, q1313 + xorps m2, m3 + mova [vq], m1 + mova [vrevq + cq], m2 + sub vq, mmsize + add cq, mmsize + jl .loop + REP_RET diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c index 2b912d0e9e..a2aca742cf 100644 --- a/libavcodec/x86/sbrdsp_init.c +++ b/libavcodec/x86/sbrdsp_init.c @@ -51,6 +51,8 @@ void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m, const float *q_filt, int noise, int kx, int m_max); +void ff_sbr_qmf_deint_neg_sse(float *v, const float *src); + av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s) { int cpu_flags = av_get_cpu_flags(); @@ -63,6 +65,7 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s) s->hf_gen = ff_sbr_hf_gen_sse; s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse; s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse; + s->qmf_deint_neg = ff_sbr_qmf_deint_neg_sse; } if (EXTERNAL_SSE2(cpu_flags)) { |