aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2024-06-01 11:26:48 -0300
committerJames Almer <jamrial@gmail.com>2024-06-09 12:29:49 -0300
commit91b9af00586471c3aae37435b4f536015593e232 (patch)
tree9ce7fd0067227675a6b85b102057eec065ff2a2a
parent7a3369398f8b7869bec56fd2b96f22d5cc967e30 (diff)
downloadffmpeg-91b9af00586471c3aae37435b4f536015593e232.tar.gz
x86/aacencdsp: add AVX version of quantize_bands
quant_bands_signed_c: 1928.0 quant_bands_signed_sse2: 406.0 quant_bands_signed_avx: 207.0 quant_bands_unsigned_c: 1702.0 quant_bands_unsigned_sse2: 404.0 quant_bands_unsigned_avx: 209.0 Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/aacenc.h2
-rw-r--r--libavcodec/x86/aacencdsp.asm27
-rw-r--r--libavcodec/x86/aacencdsp_init.c6
-rw-r--r--tests/checkasm/aacencdsp.c4
4 files changed, 33 insertions, 6 deletions
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index d07960620e..ae15f91e06 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -242,7 +242,7 @@ typedef struct AACEncContext {
enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
AudioFrameQueue afq;
- DECLARE_ALIGNED(16, int, qcoefs)[96]; ///< quantized coefficients
+ DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients
DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
uint16_t quantize_band_cost_cache_generation;
diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm
index 0d3ba4b89d..99be2d87f5 100644
--- a/libavcodec/x86/aacencdsp.asm
+++ b/libavcodec/x86/aacencdsp.asm
@@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size
; int size, int is_signed, int maxval, const float Q34,
; const float rounding)
;*******************************************************************
-INIT_XMM sse2
+%macro AAC_QUANTIZE_BANDS 0
cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
+%if mmsize == 32
+ vbroadcastss m0, Q34m
+ vbroadcastss m1, roundingm
+%if UNIX64 == 0
+ cvtsi2ss xm3, dword maxvalm
+%else
+ cvtsi2ss xm3, maxvald
+%endif
+ shufps xm3, xm3, xm3, 0
+ vinsertf128 m3, m3, xm3, 1
+%else ; mmsize == 16
%if UNIX64 == 0
movss m0, Q34m
movss m1, roundingm
@@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
shufps m0, m0, 0
shufps m1, m1, 0
shufps m3, m3, 0
+%endif
shl is_signedd, 31
- movd m4, is_signedd
- shufps m4, m4, 0
+ movd xm4, is_signedd
+ shufps xm4, xm4, xm4, 0
+%if mmsize == 32
+ vinsertf128 m4, m4, xm4, 1
+%endif
shl sized, 2
add inq, sizeq
add outq, sizeq
@@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
add sizeq, mmsize
jl .loop
RET
+%endmacro
+
+INIT_XMM sse2
+AAC_QUANTIZE_BANDS
+INIT_YMM avx
+AAC_QUANTIZE_BANDS
diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c
index e0d8dec4f8..cf17dbf91d 100644
--- a/libavcodec/x86/aacencdsp_init.c
+++ b/libavcodec/x86/aacencdsp_init.c
@@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size);
void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
int size, int is_signed, int maxval, const float Q34,
const float rounding);
+void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled,
+ int size, int is_signed, int maxval, const float Q34,
+ const float rounding);
av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
{
@@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
if (EXTERNAL_SSE2(cpu_flags))
s->quant_bands = ff_aac_quantize_bands_sse2;
+
+ if (EXTERNAL_AVX_FAST(cpu_flags))
+ s->quant_bands = ff_aac_quantize_bands_avx;
}
diff --git a/tests/checkasm/aacencdsp.c b/tests/checkasm/aacencdsp.c
index 791dd30320..5308a2ac03 100644
--- a/tests/checkasm/aacencdsp.c
+++ b/tests/checkasm/aacencdsp.c
@@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s)
for (int sign = 0; sign <= 1; sign++) {
if (check_func(s->quant_bands, "quant_bands_%s",
sign ? "signed" : "unsigned")) {
- LOCAL_ALIGNED_16(int, out, [BUF_SIZE]);
- LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]);
+ LOCAL_ALIGNED_32(int, out, [BUF_SIZE]);
+ LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]);
call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);