aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/dsputil.asm
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2014-03-26 04:41:28 -0700
committerDiego Biurrun <diego@biurrun.de>2014-04-04 19:08:05 +0200
commit57b5b84e208ad61ffdd74ad849bed212deb92bc5 (patch)
treec6e408f756b6b780579b676a9f286105a2c99a8e /libavcodec/x86/dsputil.asm
parentc2c5be57494e6117086771bca34c8cd4c72c8e99 (diff)
downloadffmpeg-57b5b84e208ad61ffdd74ad849bed212deb92bc5.tar.gz
x86: dsputil: Move ff_apply_window_int16_* bits to ac3dsp, where they belong
Diffstat (limited to 'libavcodec/x86/dsputil.asm')
-rw-r--r--libavcodec/x86/dsputil.asm130
1 files changed, 0 insertions, 130 deletions
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 414d2124db..a8c8fdac2f 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -27,8 +27,6 @@ pb_zzzzzzzz77777777: times 8 db -1
pb_7: times 8 db 7
pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13
-pb_revwords: SHUFFLE_MASK_W 7, 6, 5, 4, 3, 2, 1, 0
-pd_16384: times 4 dd 16384
pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
SECTION_TEXT
@@ -205,134 +203,6 @@ SCALARPRODUCT_LOOP 0
RET
-;-----------------------------------------------------------------------------
-; void ff_apply_window_int16(int16_t *output, const int16_t *input,
-; const int16_t *window, unsigned int len)
-;-----------------------------------------------------------------------------
-
-%macro REVERSE_WORDS 1-2
-%if cpuflag(ssse3) && notcpuflag(atom)
- pshufb %1, %2
-%elif cpuflag(sse2)
- pshuflw %1, %1, 0x1B
- pshufhw %1, %1, 0x1B
- pshufd %1, %1, 0x4E
-%elif cpuflag(mmxext)
- pshufw %1, %1, 0x1B
-%endif
-%endmacro
-
-%macro MUL16FIXED 3
-%if cpuflag(ssse3) ; dst, src, unused
-; dst = ((dst * src) + (1<<14)) >> 15
- pmulhrsw %1, %2
-%elif cpuflag(mmxext) ; dst, src, temp
-; dst = (dst * src) >> 15
-; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
-; in from the pmullw result.
- mova %3, %1
- pmulhw %1, %2
- pmullw %3, %2
- psrlw %3, 15
- psllw %1, 1
- por %1, %3
-%endif
-%endmacro
-
-%macro APPLY_WINDOW_INT16 1 ; %1 bitexact version
-%if %1
-cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2
-%else
-cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2
-%endif
- lea offset2q, [offsetq-mmsize]
-%if cpuflag(ssse3) && notcpuflag(atom)
- mova m5, [pb_revwords]
- ALIGN 16
-%elif %1
- mova m5, [pd_16384]
-%endif
-.loop:
-%if cpuflag(ssse3)
- ; This version does the 16x16->16 multiplication in-place without expanding
- ; to 32-bit. The ssse3 version is bit-identical.
- mova m0, [windowq+offset2q]
- mova m1, [ inputq+offset2q]
- pmulhrsw m1, m0
- REVERSE_WORDS m0, m5
- pmulhrsw m0, [ inputq+offsetq ]
- mova [outputq+offset2q], m1
- mova [outputq+offsetq ], m0
-%elif %1
- ; This version expands 16-bit to 32-bit, multiplies by the window,
- ; adds 16384 for rounding, right shifts 15, then repacks back to words to
- ; save to the output. The window is reversed for the second half.
- mova m3, [windowq+offset2q]
- mova m4, [ inputq+offset2q]
- pxor m0, m0
- punpcklwd m0, m3
- punpcklwd m1, m4
- pmaddwd m0, m1
- paddd m0, m5
- psrad m0, 15
- pxor m2, m2
- punpckhwd m2, m3
- punpckhwd m1, m4
- pmaddwd m2, m1
- paddd m2, m5
- psrad m2, 15
- packssdw m0, m2
- mova [outputq+offset2q], m0
- REVERSE_WORDS m3
- mova m4, [ inputq+offsetq]
- pxor m0, m0
- punpcklwd m0, m3
- punpcklwd m1, m4
- pmaddwd m0, m1
- paddd m0, m5
- psrad m0, 15
- pxor m2, m2
- punpckhwd m2, m3
- punpckhwd m1, m4
- pmaddwd m2, m1
- paddd m2, m5
- psrad m2, 15
- packssdw m0, m2
- mova [outputq+offsetq], m0
-%else
- ; This version does the 16x16->16 multiplication in-place without expanding
- ; to 32-bit. The mmxext and sse2 versions do not use rounding, and
- ; therefore are not bit-identical to the C version.
- mova m0, [windowq+offset2q]
- mova m1, [ inputq+offset2q]
- mova m2, [ inputq+offsetq ]
- MUL16FIXED m1, m0, m3
- REVERSE_WORDS m0
- MUL16FIXED m2, m0, m3
- mova [outputq+offset2q], m1
- mova [outputq+offsetq ], m2
-%endif
- add offsetd, mmsize
- sub offset2d, mmsize
- jae .loop
- REP_RET
-%endmacro
-
-INIT_MMX mmxext
-APPLY_WINDOW_INT16 0
-INIT_XMM sse2
-APPLY_WINDOW_INT16 0
-
-INIT_MMX mmxext
-APPLY_WINDOW_INT16 1
-INIT_XMM sse2
-APPLY_WINDOW_INT16 1
-INIT_XMM ssse3
-APPLY_WINDOW_INT16 1
-INIT_XMM ssse3, atom
-APPLY_WINDOW_INT16 1
-
-
; void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)