diff options
author | Lynne <dev@lynne.ee> | 2021-01-09 03:19:18 +0100 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2021-01-14 01:44:20 +0100 |
commit | 9e05421dbe0c733dca2a39f8399db86acc7e82bc (patch) | |
tree | 246b41e3723eb779091bd535858de1e83eec767f /libavcodec/x86/ac3dsp.asm | |
parent | 238b2d4155d9779d770fccb3594076bb32742c82 (diff) | |
download | ffmpeg-9e05421dbe0c733dca2a39f8399db86acc7e82bc.tar.gz |
ac3enc_fixed: drop unnecessary fixed-point DSP code
Diffstat (limited to 'libavcodec/x86/ac3dsp.asm')
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 258 |
1 files changed, 0 insertions, 258 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 675ade3101..4ddaa94320 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -35,10 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 cextern pd_1 pd_151: times 4 dd 151 -; used in ff_apply_window_int16() -pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0 -pd_16384: times 4 dd 16384 - SECTION .text ;----------------------------------------------------------------------------- @@ -82,133 +78,6 @@ AC3_EXPONENT_MIN %undef LOOP_ALIGN ;----------------------------------------------------------------------------- -; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len) -; -; This function uses 2 different methods to calculate a valid result. -; 1) logical 'or' of abs of each element -; This is used for ssse3 because of the pabsw instruction. -; It is also used for mmx because of the lack of min/max instructions. -; 2) calculate min/max for the array, then or(abs(min),abs(max)) -; This is used for mmxext and sse2 because they have pminsw/pmaxsw. -;----------------------------------------------------------------------------- - -; logical 'or' of 4 or 8 words in an mmx or xmm register into the low word -%macro OR_WORDS_HORIZ 2 ; src, tmp -%if cpuflag(sse2) - movhlps %2, %1 - por %1, %2 - pshuflw %2, %1, q0032 - por %1, %2 - pshuflw %2, %1, q0001 - por %1, %2 -%elif cpuflag(mmxext) - pshufw %2, %1, q0032 - por %1, %2 - pshufw %2, %1, q0001 - por %1, %2 -%else ; mmx - movq %2, %1 - psrlq %2, 32 - por %1, %2 - movq %2, %1 - psrlq %2, 16 - por %1, %2 -%endif -%endmacro - -%macro AC3_MAX_MSB_ABS_INT16 1 -cglobal ac3_max_msb_abs_int16, 2,2,5, src, len - pxor m2, m2 - pxor m3, m3 -.loop: -%ifidn %1, min_max - mova m0, [srcq] - mova m1, [srcq+mmsize] - pminsw m2, m0 - pminsw m2, m1 - pmaxsw m3, m0 - pmaxsw m3, m1 -%else ; or_abs -%if notcpuflag(ssse3) - mova m0, [srcq] - mova m1, [srcq+mmsize] - ABS2 m0, m1, m3, m4 -%else ; ssse3 - ; using memory args is faster for ssse3 - pabsw m0, [srcq] - pabsw m1, [srcq+mmsize] -%endif - por m2, m0 - por m2, m1 -%endif - add srcq, mmsize*2 - sub lend, mmsize - ja .loop -%ifidn %1, min_max - ABS2 m2, m3, m0, m1 - por m2, m3 -%endif - OR_WORDS_HORIZ m2, m0 - movd eax, m2 - and eax, 0xFFFF - RET -%endmacro - -INIT_MMX mmx -AC3_MAX_MSB_ABS_INT16 or_abs -INIT_MMX mmxext -AC3_MAX_MSB_ABS_INT16 min_max -INIT_XMM sse2 -AC3_MAX_MSB_ABS_INT16 min_max -INIT_XMM ssse3 -AC3_MAX_MSB_ABS_INT16 or_abs - -;----------------------------------------------------------------------------- -; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() -;----------------------------------------------------------------------------- - -%macro AC3_SHIFT 3 ; l/r, 16/32, shift instruction, instruction set -cglobal ac3_%1shift_int%2, 3, 3, 5, src, len, shift - movd m0, shiftd -.loop: - mova m1, [srcq ] - mova m2, [srcq+mmsize ] - mova m3, [srcq+mmsize*2] - mova m4, [srcq+mmsize*3] - %3 m1, m0 - %3 m2, m0 - %3 m3, m0 - %3 m4, m0 - mova [srcq ], m1 - mova [srcq+mmsize ], m2 - mova [srcq+mmsize*2], m3 - mova [srcq+mmsize*3], m4 - add srcq, mmsize*4 - sub lend, mmsize*32/%2 - ja .loop -.end: - REP_RET -%endmacro - -;----------------------------------------------------------------------------- -; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) -;----------------------------------------------------------------------------- - -INIT_MMX mmx -AC3_SHIFT l, 16, psllw -INIT_XMM sse2 -AC3_SHIFT l, 16, psllw - -;----------------------------------------------------------------------------- -; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) -;----------------------------------------------------------------------------- - -INIT_MMX mmx -AC3_SHIFT r, 32, psrad -INIT_XMM sse2 -AC3_SHIFT r, 32, psrad - -;----------------------------------------------------------------------------- ; void ff_float_to_fixed24(int32_t *dst, const float *src, unsigned int len) ;----------------------------------------------------------------------------- @@ -423,130 +292,3 @@ AC3_EXTRACT_EXPONENTS INIT_XMM ssse3 AC3_EXTRACT_EXPONENTS %endif - -;----------------------------------------------------------------------------- -; void ff_apply_window_int16(int16_t *output, const int16_t *input, -; const int16_t *window, unsigned int len) -;----------------------------------------------------------------------------- - -%macro REVERSE_WORDS 1-2 -%if cpuflag(ssse3) && notcpuflag(atom) - pshufb %1, %2 -%elif cpuflag(sse2) - pshuflw %1, %1, 0x1B - pshufhw %1, %1, 0x1B - pshufd %1, %1, 0x4E -%elif cpuflag(mmxext) - pshufw %1, %1, 0x1B -%endif -%endmacro - -%macro MUL16FIXED 3 -%if cpuflag(ssse3) ; dst, src, unused -; dst = ((dst * src) + (1<<14)) >> 15 - pmulhrsw %1, %2 -%elif cpuflag(mmxext) ; dst, src, temp -; dst = (dst * src) >> 15 -; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back -; in from the pmullw result. - mova %3, %1 - pmulhw %1, %2 - pmullw %3, %2 - psrlw %3, 15 - psllw %1, 1 - por %1, %3 -%endif -%endmacro - -%macro APPLY_WINDOW_INT16 1 ; %1 bitexact version -%if %1 -cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2 -%else -cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2 -%endif - lea offset2q, [offsetq-mmsize] -%if cpuflag(ssse3) && notcpuflag(atom) - mova m5, [pb_revwords] - ALIGN 16 -%elif %1 - mova m5, [pd_16384] -%endif -.loop: -%if cpuflag(ssse3) - ; This version does the 16x16->16 multiplication in-place without expanding - ; to 32-bit. The ssse3 version is bit-identical. - mova m0, [windowq+offset2q] - mova m1, [ inputq+offset2q] - pmulhrsw m1, m0 - REVERSE_WORDS m0, m5 - pmulhrsw m0, [ inputq+offsetq ] - mova [outputq+offset2q], m1 - mova [outputq+offsetq ], m0 -%elif %1 - ; This version expands 16-bit to 32-bit, multiplies by the window, - ; adds 16384 for rounding, right shifts 15, then repacks back to words to - ; save to the output. The window is reversed for the second half. - mova m3, [windowq+offset2q] - mova m4, [ inputq+offset2q] - pxor m0, m0 - punpcklwd m0, m3 - punpcklwd m1, m4 - pmaddwd m0, m1 - paddd m0, m5 - psrad m0, 15 - pxor m2, m2 - punpckhwd m2, m3 - punpckhwd m1, m4 - pmaddwd m2, m1 - paddd m2, m5 - psrad m2, 15 - packssdw m0, m2 - mova [outputq+offset2q], m0 - REVERSE_WORDS m3 - mova m4, [ inputq+offsetq] - pxor m0, m0 - punpcklwd m0, m3 - punpcklwd m1, m4 - pmaddwd m0, m1 - paddd m0, m5 - psrad m0, 15 - pxor m2, m2 - punpckhwd m2, m3 - punpckhwd m1, m4 - pmaddwd m2, m1 - paddd m2, m5 - psrad m2, 15 - packssdw m0, m2 - mova [outputq+offsetq], m0 -%else - ; This version does the 16x16->16 multiplication in-place without expanding - ; to 32-bit. The mmxext and sse2 versions do not use rounding, and - ; therefore are not bit-identical to the C version. - mova m0, [windowq+offset2q] - mova m1, [ inputq+offset2q] - mova m2, [ inputq+offsetq ] - MUL16FIXED m1, m0, m3 - REVERSE_WORDS m0 - MUL16FIXED m2, m0, m3 - mova [outputq+offset2q], m1 - mova [outputq+offsetq ], m2 -%endif - add offsetd, mmsize - sub offset2d, mmsize - jae .loop - REP_RET -%endmacro - -INIT_MMX mmxext -APPLY_WINDOW_INT16 0 -INIT_XMM sse2 -APPLY_WINDOW_INT16 0 - -INIT_MMX mmxext -APPLY_WINDOW_INT16 1 -INIT_XMM sse2 -APPLY_WINDOW_INT16 1 -INIT_XMM ssse3 -APPLY_WINDOW_INT16 1 -INIT_XMM ssse3, atom -APPLY_WINDOW_INT16 1 |