diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-05-29 17:03:56 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-06-18 11:24:10 -0400 |
commit | f61ce90caa909d131ea6ec205823568a38115529 (patch) | |
tree | eb999b6a087a5cce7a00c1209b0026ca8f84caa5 | |
parent | 29f7490c461431b5c00e496f3e0253c170b3924c (diff) | |
download | ffmpeg-f61ce90caa909d131ea6ec205823568a38115529.tar.gz |
lavr: add x86-optimized functions for mixing 1-to-2 s16p with flt coeffs
-rw-r--r-- | libavresample/x86/audio_mix.asm | 47 | ||||
-rw-r--r-- | libavresample/x86/audio_mix_init.c | 13 |
2 files changed, 60 insertions, 0 deletions
diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm index 2bc89cac92..4b0434dd6d 100644 --- a/libavresample/x86/audio_mix.asm +++ b/libavresample/x86/audio_mix.asm @@ -184,3 +184,50 @@ MIX_1_TO_2_FLTP_FLT INIT_YMM avx MIX_1_TO_2_FLTP_FLT %endif + +;----------------------------------------------------------------------------- +; void ff_mix_1_to_2_s16p_flt(int16_t **src, float **matrix, int len, +; int out_ch, int in_ch); +;----------------------------------------------------------------------------- + +%macro MIX_1_TO_2_S16P_FLT 0 +cglobal mix_1_to_2_s16p_flt, 3,5,6, src0, matrix0, len, src1, matrix1 + mov src1q, [src0q+gprsize] + mov src0q, [src0q] + sub src1q, src0q + mov matrix1q, [matrix0q+gprsize] + mov matrix0q, [matrix0q] + VBROADCASTSS m4, [matrix0q] + VBROADCASTSS m5, [matrix1q] + ALIGN 16 +.loop: + mova m0, [src0q] + S16_TO_S32_SX 0, 2 + cvtdq2ps m0, m0 + cvtdq2ps m2, m2 + mulps m1, m0, m5 + mulps m0, m0, m4 + mulps m3, m2, m5 + mulps m2, m2, m4 + cvtps2dq m0, m0 + cvtps2dq m1, m1 + cvtps2dq m2, m2 + cvtps2dq m3, m3 + packssdw m0, m2 + packssdw m1, m3 + mova [src0q ], m0 + mova [src0q+src1q], m1 + add src0q, mmsize + sub lend, mmsize/2 + jg .loop + REP_RET +%endmacro + +INIT_XMM sse2 +MIX_1_TO_2_S16P_FLT +INIT_XMM sse4 +MIX_1_TO_2_S16P_FLT +%if HAVE_AVX +INIT_XMM avx +MIX_1_TO_2_S16P_FLT +%endif diff --git a/libavresample/x86/audio_mix_init.c b/libavresample/x86/audio_mix_init.c index aede260190..b8f3a90eef 100644 --- a/libavresample/x86/audio_mix_init.c +++ b/libavresample/x86/audio_mix_init.c @@ -40,6 +40,13 @@ extern void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len, extern void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len, int out_ch, int in_ch); +extern void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len, + int out_ch, int in_ch); +extern void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len, + int out_ch, int in_ch); +extern void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len, + int out_ch, int in_ch); + av_cold void ff_audio_mix_init_x86(AudioMix *am) { #if HAVE_YASM @@ -56,16 +63,22 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2); ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8, 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2); + ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, + 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2); } if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4); + ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, + 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4); } if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx); ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT, 1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx); + ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, + 1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx); } #endif } |