diff options
author | James Almer <jamrial@gmail.com> | 2024-05-11 23:57:57 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2024-05-12 12:54:19 -0300 |
commit | c00c8679ed35ccf053c3d0602932dda3adac752e (patch) | |
tree | 2c33cddf7de77d66c9e78deace43abbe59a5e1b6 | |
parent | 479d26cea29e5b6c772878d5b7cd68e970a7a420 (diff) | |
download | ffmpeg-c00c8679ed35ccf053c3d0602932dda3adac752e.tar.gz |
x86/flacdsp: add a SSE4 version of lpc16
flac_lpc_16_13_c: 2841.3
flac_lpc_16_13_sse4: 2151.8
flac_lpc_16_16_c: 3382.8
flac_lpc_16_16_sse4: 2228.3
flac_lpc_16_29_c: 5800.3
flac_lpc_16_29_sse4: 3727.3
flac_lpc_16_32_c: 5972.8
flac_lpc_16_32_sse4: 4052.3
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/flacdsp.asm | 13 | ||||
-rw-r--r-- | libavcodec/x86/flacdsp_init.c | 3 |
2 files changed, 10 insertions, 6 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm index 4b2fd65435..f38eb7db76 100644 --- a/libavcodec/x86/flacdsp.asm +++ b/libavcodec/x86/flacdsp.asm @@ -38,9 +38,9 @@ SECTION .text %endif %endmacro -%macro LPC_32 1 +%macro LPC_32 3 INIT_XMM %1 -cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j +cglobal flac_lpc_%2, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j sub lend, pred_orderd jle .ret movsxdifnidn pred_orderq, pred_orderd @@ -67,14 +67,14 @@ ALIGN 16 jl .loop_order .end_order: PMACSDQL m2, m0, m1, m2, m0 - psrlq m2, m4 + %3 m2, m4 movd m0, [decodedq] paddd m0, m2 movd [decodedq], m0 sub lend, 2 jl .ret PMACSDQL m3, m1, m0, m3, m1 - psrlq m3, m4 + %3 m3, m4 movd m1, [decodedq+4] paddd m1, m3 movd [decodedq+4], m1 @@ -83,10 +83,11 @@ ALIGN 16 RET %endmacro +LPC_32 sse4, 16, psrad +LPC_32 sse4, 32, psrlq %if HAVE_XOP_EXTERNAL -LPC_32 xop +LPC_32 xop, 32, psrlq %endif -LPC_32 sse4 ;---------------------------------------------------------------------------------- ;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels, diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c index 87daed7005..dee4bf88fc 100644 --- a/libavcodec/x86/flacdsp_init.c +++ b/libavcodec/x86/flacdsp_init.c @@ -23,6 +23,8 @@ #include "libavutil/x86/cpu.h" #include "config.h" +void ff_flac_lpc_16_sse4(int32_t *samples, const int coeffs[32], int order, + int qlevel, int len); void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order, int qlevel, int len); void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order, @@ -93,6 +95,7 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int } } if (EXTERNAL_SSE4(cpu_flags)) { + c->lpc16 = ff_flac_lpc_16_sse4; c->lpc32 = ff_flac_lpc_32_sse4; } if (EXTERNAL_AVX(cpu_flags)) { |