diff options
author | James Almer <jamrial@gmail.com> | 2024-06-01 11:26:48 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2024-06-09 12:29:49 -0300 |
commit | 91b9af00586471c3aae37435b4f536015593e232 (patch) | |
tree | 9ce7fd0067227675a6b85b102057eec065ff2a2a /libavcodec/x86 | |
parent | 7a3369398f8b7869bec56fd2b96f22d5cc967e30 (diff) | |
download | ffmpeg-91b9af00586471c3aae37435b4f536015593e232.tar.gz |
x86/aacencdsp: add AVX version of quantize_bands
quant_bands_signed_c: 1928.0
quant_bands_signed_sse2: 406.0
quant_bands_signed_avx: 207.0
quant_bands_unsigned_c: 1702.0
quant_bands_unsigned_sse2: 404.0
quant_bands_unsigned_avx: 209.0
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/aacencdsp.asm | 27 | ||||
-rw-r--r-- | libavcodec/x86/aacencdsp_init.c | 6 |
2 files changed, 30 insertions, 3 deletions
diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm index 0d3ba4b89d..99be2d87f5 100644 --- a/libavcodec/x86/aacencdsp.asm +++ b/libavcodec/x86/aacencdsp.asm @@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size ; int size, int is_signed, int maxval, const float Q34, ; const float rounding) ;******************************************************************* -INIT_XMM sse2 +%macro AAC_QUANTIZE_BANDS 0 cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding +%if mmsize == 32 + vbroadcastss m0, Q34m + vbroadcastss m1, roundingm +%if UNIX64 == 0 + cvtsi2ss xm3, dword maxvalm +%else + cvtsi2ss xm3, maxvald +%endif + shufps xm3, xm3, xm3, 0 + vinsertf128 m3, m3, xm3, 1 +%else ; mmsize == 16 %if UNIX64 == 0 movss m0, Q34m movss m1, roundingm @@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q shufps m0, m0, 0 shufps m1, m1, 0 shufps m3, m3, 0 +%endif shl is_signedd, 31 - movd m4, is_signedd - shufps m4, m4, 0 + movd xm4, is_signedd + shufps xm4, xm4, xm4, 0 +%if mmsize == 32 + vinsertf128 m4, m4, xm4, 1 +%endif shl sized, 2 add inq, sizeq add outq, sizeq @@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q add sizeq, mmsize jl .loop RET +%endmacro + +INIT_XMM sse2 +AAC_QUANTIZE_BANDS +INIT_YMM avx +AAC_QUANTIZE_BANDS diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c index e0d8dec4f8..cf17dbf91d 100644 --- a/libavcodec/x86/aacencdsp_init.c +++ b/libavcodec/x86/aacencdsp_init.c @@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size); void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding); +void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled, + int size, int is_signed, int maxval, const float Q34, + const float rounding); av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s) { @@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s) if (EXTERNAL_SSE2(cpu_flags)) s->quant_bands = ff_aac_quantize_bands_sse2; + + if (EXTERNAL_AVX_FAST(cpu_flags)) + s->quant_bands = ff_aac_quantize_bands_avx; } |