aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/sbrdsp_init.c
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2013-04-09 22:16:36 +0200
committerMichael Niedermayer <michaelni@gmx.at>2013-04-19 13:19:45 +0200
commit76c7277385120741914168d02b57a7a1acf87a9b (patch)
tree1a0821b6a1b2aae976515ed3423a05ac9cd245f5 /libavcodec/x86/sbrdsp_init.c
parent380cfce2b2138a0513f7c054134458b3b4c92fd4 (diff)
downloadffmpeg-76c7277385120741914168d02b57a7a1acf87a9b.tar.gz
x86: sbrdsp: implement SSE2 hf_apply_noise
233 to 105 cycles on Arrandale and Win64. Replacing the multiplication by s_m[m] by a pand and a pxor with appropriate vectors is slower. Unrolling is a 15 cycles win. A SSE version was 4 cycles slower. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/sbrdsp_init.c')
-rw-r--r--libavcodec/x86/sbrdsp_init.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index f97479e9d7..0bc4a6183e 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -38,6 +38,19 @@ void ff_sbr_qmf_deint_bfly_sse(float *v, const float *src0, const float *src1);
void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1);
void ff_sbr_qmf_pre_shuffle_sse2(float *z);
+void ff_sbr_hf_apply_noise_0_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+void ff_sbr_hf_apply_noise_1_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+void ff_sbr_hf_apply_noise_2_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+
av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
int mm_flags = av_get_cpu_flags();
@@ -55,5 +68,9 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
if (EXTERNAL_SSE2(mm_flags)) {
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse2;
s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_sse2;
+ s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_sse2;
+ s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_sse2;
+ s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_sse2;
+ s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_sse2;
}
}