diff options
author | Paul B Mahol <onemda@gmail.com> | 2022-09-03 23:41:38 +0200 |
---|---|---|
committer | Paul B Mahol <onemda@gmail.com> | 2022-09-05 12:27:49 +0200 |
commit | 1e202d89c95d99d69412854f4ee9d5e569c3da69 (patch) | |
tree | 96a97097bc8fdb053f809088a0dd7bdaa4334256 /libavcodec/x86 | |
parent | ea5b375e0e95b05c88ec728d5953c916b4925bef (diff) | |
download | ffmpeg-1e202d89c95d99d69412854f4ee9d5e569c3da69.tar.gz |
avcodec/x86/flacdsp: fix bug in decorrelation
Fixes #9297
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/flacdsp.asm | 23 | ||||
-rw-r--r-- | libavcodec/x86/flacdsp_init.c | 41 |
2 files changed, 44 insertions, 20 deletions
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm index 7138611526..6d755f4972 100644 --- a/libavcodec/x86/flacdsp.asm +++ b/libavcodec/x86/flacdsp.asm @@ -23,6 +23,10 @@ %include "libavutil/x86/x86util.asm" +SECTION_RODATA + +vector: db 0,1,4,5,8,9,12,13,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1,4,5,8,9,12,13, + SECTION .text %macro PMACSDQL 5 @@ -89,6 +93,9 @@ LPC_32 sse4 ;---------------------------------------------------------------------------------- %macro FLAC_DECORRELATE_16 3-4 cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len +%ifidn %1, indep2 + VBROADCASTI128 m2, [vector] +%endif %if ARCH_X86_32 mov lend, lenm %endif @@ -112,11 +119,17 @@ align 16 %endif %ifnidn %1, indep2 p%4d m2, m0, m1 + packssdw m%2, m%2 + packssdw m%3, m%3 + punpcklwd m%2, m%3 + psllw m%2, m3 +%else + pslld m%2, m3 + pslld m%3, m3 + pshufb m%2, m%2, m2 + pshufb m%3, m%3, m2 + punpcklwd m%2, m%3 %endif - packssdw m%2, m%2 - packssdw m%3, m%3 - punpcklwd m%2, m%3 - psllw m%2, m3 mova [outq + lenq], m%2 add lenq, 16 jl .loop @@ -292,7 +305,7 @@ align 16 REP_RET %endmacro -INIT_XMM sse2 +INIT_XMM ssse3 FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro FLAC_DECORRELATE_INDEP 32, 2, 3, d FLAC_DECORRELATE_INDEP 16, 4, 3, w diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c index 2deaf3117f..48e3e7c55c 100644 --- a/libavcodec/x86/flacdsp_init.c +++ b/libavcodec/x86/flacdsp_init.c @@ -34,7 +34,9 @@ void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int chann void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ int len, int shift); \ void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ - int len, int shift); \ + int len, int shift); + +#define DECORRELATE_IFUNCS(fmt, opt) \ void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ int len, int shift); \ void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \ @@ -48,6 +50,10 @@ DECORRELATE_FUNCS(16, sse2); DECORRELATE_FUNCS(16, avx); DECORRELATE_FUNCS(32, sse2); DECORRELATE_FUNCS(32, avx); +DECORRELATE_IFUNCS(16, ssse3); +DECORRELATE_IFUNCS(16, avx); +DECORRELATE_IFUNCS(32, ssse3); +DECORRELATE_IFUNCS(32, avx); av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels) { @@ -56,29 +62,34 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int if (EXTERNAL_SSE2(cpu_flags)) { if (fmt == AV_SAMPLE_FMT_S16) { + c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2; + c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2; + c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2; + } else if (fmt == AV_SAMPLE_FMT_S32) { + c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2; + c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2; + c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2; + } + } + if (EXTERNAL_SSSE3(cpu_flags)) { + if (fmt == AV_SAMPLE_FMT_S16) { if (channels == 2) - c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep2_16_ssse3; else if (channels == 4) - c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep4_16_ssse3; else if (channels == 6) - c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep6_16_ssse3; else if (ARCH_X86_64 && channels == 8) - c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2; - c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2; - c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2; - c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep8_16_ssse3; } else if (fmt == AV_SAMPLE_FMT_S32) { if (channels == 2) - c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep2_32_ssse3; else if (channels == 4) - c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep4_32_ssse3; else if (channels == 6) - c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep6_32_ssse3; else if (ARCH_X86_64 && channels == 8) - c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2; - c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2; - c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2; - c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2; + c->decorrelate[0] = ff_flac_decorrelate_indep8_32_ssse3; } } if (EXTERNAL_SSE4(cpu_flags)) { |