diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-15 17:00:41 -0800 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-22 18:02:24 -0800 |
commit | 2e4bb99f4df7052b3e147ee898fcb4013a34d904 (patch) | |
tree | e4747b0767cf4b707fcf8730fd20da42f98310e7 /libavcodec/x86/vorbisdsp.asm | |
parent | 0ee8293a77a6afad161e91ce1d43c4a57ce33a6a (diff) | |
download | ffmpeg-2e4bb99f4df7052b3e147ee898fcb4013a34d904.tar.gz |
vorbisdsp: convert x86 simd functions from inline asm to yasm.
Diffstat (limited to 'libavcodec/x86/vorbisdsp.asm')
-rw-r--r-- | libavcodec/x86/vorbisdsp.asm | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm new file mode 100644 index 0000000000..c54650eef5 --- /dev/null +++ b/libavcodec/x86/vorbisdsp.asm @@ -0,0 +1,83 @@ +;****************************************************************************** +;* Vorbis x86 optimizations +;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA + +pdw_80000000: times 4 dd 0x80000000 + +SECTION .text + +%if ARCH_X86_32 +INIT_MMX 3dnow +cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size + pxor m7, m7 + lea magq, [magq+block_sizeq*4] + lea angq, [angq+block_sizeq*4] + neg block_sizeq +.loop: + mova m0, [magq+block_sizeq*4] + mova m1, [angq+block_sizeq*4] + mova m2, m0 + mova m3, m1 + pfcmpge m2, m7 ; m <= 0.0 + pfcmpge m3, m7 ; a <= 0.0 + pslld m2, 31 ; keep only the sign bit + pxor m1, m2 + mova m4, m3 + pand m3, m1 + pandn m4, m1 + pfadd m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) + pfsub m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) + mova [angq+block_sizeq*4], m3 + mova [magq+block_sizeq*4], m0 + add block_sizeq, 2 + jl .loop + femms + RET +%endif + +INIT_XMM sse +cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr + mova m5, [pdw_80000000] + xor cntrq, cntrq +align 16 +.loop: + mova m0, [magq+cntrq*4] + mova m1, [angq+cntrq*4] + xorps m2, m2 + xorps m3, m3 + cmpleps m2, m0 ; m <= 0.0 + cmpleps m3, m1 ; a <= 0.0 + andps m2, m5 ; keep only the sign bit + xorps m1, m2 + mova m4, m3 + andps m3, m1 + andnps m4, m1 + addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) + subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) + mova [angq+cntrq*4], m3 + mova [magq+cntrq*4], m0 + add cntrq, 4 + cmp cntrq, block_sizeq + jl .loop + RET |