diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-06-22 17:58:28 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-06-22 17:58:28 +0200 |
commit | 99497b4683e5054bcdc5b6802a27d717df9e04f3 (patch) | |
tree | 130022374c1a92b72288272bd0927ae6ac7d825b /libavcodec/x86/audiodsp.asm | |
parent | 0dae193d3ecf5d0dc687f5ad708419bf7600de9a (diff) | |
parent | 9a9e2f1c8aa4539a261625145e5c1f46a8106ac2 (diff) | |
download | ffmpeg-99497b4683e5054bcdc5b6802a27d717df9e04f3.tar.gz |
Merge commit '9a9e2f1c8aa4539a261625145e5c1f46a8106ac2'
* commit '9a9e2f1c8aa4539a261625145e5c1f46a8106ac2':
dsputil: Split audio operations off into a separate context
Conflicts:
configure
libavcodec/takdec.c
libavcodec/x86/Makefile
libavcodec/x86/dsputil.asm
libavcodec/x86/dsputil_init.c
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_x86.h
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/audiodsp.asm')
-rw-r--r-- | libavcodec/x86/audiodsp.asm | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm new file mode 100644 index 0000000000..83f9bb6f45 --- /dev/null +++ b/libavcodec/x86/audiodsp.asm @@ -0,0 +1,133 @@ +;****************************************************************************** +;* optimized audio functions +;* Copyright (c) 2008 Loren Merritt +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%macro SCALARPRODUCT 0 +; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16, 3,3,3, v1, v2, order + shl orderq, 1 + add v1q, orderq + add v2q, orderq + neg orderq + pxor m2, m2 +.loop: + movu m0, [v1q + orderq] + movu m1, [v1q + orderq + mmsize] + pmaddwd m0, [v2q + orderq] + pmaddwd m1, [v2q + orderq + mmsize] + paddd m2, m0 + paddd m2, m1 + add orderq, mmsize*2 + jl .loop + HADDD m2, m0 + movd eax, m2 +%if mmsize == 8 + emms +%endif + RET +%endmacro + +INIT_MMX mmxext +SCALARPRODUCT +INIT_XMM sse2 +SCALARPRODUCT + + +;----------------------------------------------------------------------------- +; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, +; int32_t max, unsigned int len) +;----------------------------------------------------------------------------- + +; %1 = number of xmm registers used +; %2 = number of inline load/process/store loops per asm loop +; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop +; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %5 = suffix +%macro VECTOR_CLIP_INT32 4-5 +cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len +%if %4 + cvtsi2ss m4, minm + cvtsi2ss m5, maxm +%else + movd m4, minm + movd m5, maxm +%endif + SPLATD m4 + SPLATD m5 +.loop: +%assign %%i 0 +%rep %2 + mova m0, [srcq+mmsize*(0+%%i)] + mova m1, [srcq+mmsize*(1+%%i)] + mova m2, [srcq+mmsize*(2+%%i)] + mova m3, [srcq+mmsize*(3+%%i)] +%if %3 + mova m7, [srcq+mmsize*(4+%%i)] + mova m8, [srcq+mmsize*(5+%%i)] + mova m9, [srcq+mmsize*(6+%%i)] + mova m10, [srcq+mmsize*(7+%%i)] +%endif + CLIPD m0, m4, m5, m6 + CLIPD m1, m4, m5, m6 + CLIPD m2, m4, m5, m6 + CLIPD m3, m4, m5, m6 +%if %3 + CLIPD m7, m4, m5, m6 + CLIPD m8, m4, m5, m6 + CLIPD m9, m4, m5, m6 + CLIPD m10, m4, m5, m6 +%endif + mova [dstq+mmsize*(0+%%i)], m0 + mova [dstq+mmsize*(1+%%i)], m1 + mova [dstq+mmsize*(2+%%i)], m2 + mova [dstq+mmsize*(3+%%i)], m3 +%if %3 + mova [dstq+mmsize*(4+%%i)], m7 + mova [dstq+mmsize*(5+%%i)], m8 + mova [dstq+mmsize*(6+%%i)], m9 + mova [dstq+mmsize*(7+%%i)], m10 +%endif +%assign %%i %%i+4*(%3+1) +%endrep + add srcq, mmsize*4*(%2+%3) + add dstq, mmsize*4*(%2+%3) + sub lend, mmsize*(%2+%3) + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +%define CLIPD CLIPD_MMX +VECTOR_CLIP_INT32 0, 1, 0, 0 +INIT_XMM sse2 +VECTOR_CLIP_INT32 6, 1, 0, 0, _int +%define CLIPD CLIPD_SSE2 +VECTOR_CLIP_INT32 6, 2, 0, 1 +INIT_XMM sse4 +%define CLIPD CLIPD_SSE41 +%ifdef m8 +VECTOR_CLIP_INT32 11, 1, 1, 0 +%else +VECTOR_CLIP_INT32 6, 1, 0, 0 +%endif |