diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 16:59:33 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 17:00:26 +0100 |
commit | b90ab2b99365a6c7f997abdb8ebfb0711c981249 (patch) | |
tree | 7d5ce123ef44b54d59531adabbf99d264f994c74 /libavcodec/x86/vorbisdsp.asm | |
parent | 516f30ed20828c04459cb2fd3b18a0bd2de4cdf0 (diff) | |
parent | 2e4bb99f4df7052b3e147ee898fcb4013a34d904 (diff) | |
download | ffmpeg-b90ab2b99365a6c7f997abdb8ebfb0711c981249.tar.gz |
Merge commit '2e4bb99f4df7052b3e147ee898fcb4013a34d904'
* commit '2e4bb99f4df7052b3e147ee898fcb4013a34d904':
vorbisdsp: convert x86 simd functions from inline asm to yasm.
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/vorbisdsp.asm')
-rw-r--r-- | libavcodec/x86/vorbisdsp.asm | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm new file mode 100644 index 0000000000..b25d838868 --- /dev/null +++ b/libavcodec/x86/vorbisdsp.asm @@ -0,0 +1,83 @@ +;****************************************************************************** +;* Vorbis x86 optimizations +;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pdw_80000000: times 4 dd 0x80000000 + +SECTION .text + +%if ARCH_X86_32 +INIT_MMX 3dnow +cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size + pxor m7, m7 + lea magq, [magq+block_sizeq*4] + lea angq, [angq+block_sizeq*4] + neg block_sizeq +.loop: + mova m0, [magq+block_sizeq*4] + mova m1, [angq+block_sizeq*4] + mova m2, m0 + mova m3, m1 + pfcmpge m2, m7 ; m <= 0.0 + pfcmpge m3, m7 ; a <= 0.0 + pslld m2, 31 ; keep only the sign bit + pxor m1, m2 + mova m4, m3 + pand m3, m1 + pandn m4, m1 + pfadd m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) + pfsub m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) + mova [angq+block_sizeq*4], m3 + mova [magq+block_sizeq*4], m0 + add block_sizeq, 2 + jl .loop + femms + RET +%endif + +INIT_XMM sse +cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr + mova m5, [pdw_80000000] + xor cntrq, cntrq +align 16 +.loop: + mova m0, [magq+cntrq*4] + mova m1, [angq+cntrq*4] + xorps m2, m2 + xorps m3, m3 + cmpleps m2, m0 ; m <= 0.0 + cmpleps m3, m1 ; a <= 0.0 + andps m2, m5 ; keep only the sign bit + xorps m1, m2 + mova m4, m3 + andps m3, m1 + andnps m4, m1 + addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) + subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) + mova [angq+cntrq*4], m3 + mova [magq+cntrq*4], m0 + add cntrq, 4 + cmp cntrq, block_sizeq + jl .loop + RET |