diff options
author | Diego Biurrun <diego@biurrun.de> | 2014-01-16 17:30:19 +0100 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2014-06-22 06:20:15 -0700 |
commit | 9a9e2f1c8aa4539a261625145e5c1f46a8106ac2 (patch) | |
tree | 8df94d9ee621e07b5e5f9aad954cc68d92105e88 /libavcodec/x86/audiodsp.asm | |
parent | ca1e36a8e4cd416142487071dbca734567bdaddf (diff) | |
download | ffmpeg-9a9e2f1c8aa4539a261625145e5c1f46a8106ac2.tar.gz |
dsputil: Split audio operations off into a separate context
Diffstat (limited to 'libavcodec/x86/audiodsp.asm')
-rw-r--r-- | libavcodec/x86/audiodsp.asm | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm new file mode 100644 index 0000000000..f2e831df17 --- /dev/null +++ b/libavcodec/x86/audiodsp.asm @@ -0,0 +1,137 @@ +;****************************************************************************** +;* optimized audio functions +;* Copyright (c) 2008 Loren Merritt +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%macro SCALARPRODUCT 0 +; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16, 3,3,3, v1, v2, order + shl orderq, 1 + add v1q, orderq + add v2q, orderq + neg orderq + pxor m2, m2 +.loop: + movu m0, [v1q + orderq] + movu m1, [v1q + orderq + mmsize] + pmaddwd m0, [v2q + orderq] + pmaddwd m1, [v2q + orderq + mmsize] + paddd m2, m0 + paddd m2, m1 + add orderq, mmsize*2 + jl .loop +%if mmsize == 16 + movhlps m0, m2 + paddd m2, m0 + pshuflw m0, m2, 0x4e +%else + pshufw m0, m2, 0x4e +%endif + paddd m2, m0 + movd eax, m2 + RET +%endmacro + +INIT_MMX mmxext +SCALARPRODUCT +INIT_XMM sse2 +SCALARPRODUCT + + +;----------------------------------------------------------------------------- +; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, +; int32_t max, unsigned int len) +;----------------------------------------------------------------------------- + +; %1 = number of xmm registers used +; %2 = number of inline load/process/store loops per asm loop +; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop +; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %5 = suffix +%macro VECTOR_CLIP_INT32 4-5 +cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len +%if %4 + cvtsi2ss m4, minm + cvtsi2ss m5, maxm +%else + movd m4, minm + movd m5, maxm +%endif + SPLATD m4 + SPLATD m5 +.loop: +%assign %%i 1 +%rep %2 + mova m0, [srcq+mmsize*0*%%i] + mova m1, [srcq+mmsize*1*%%i] + mova m2, [srcq+mmsize*2*%%i] + mova m3, [srcq+mmsize*3*%%i] +%if %3 + mova m7, [srcq+mmsize*4*%%i] + mova m8, [srcq+mmsize*5*%%i] + mova m9, [srcq+mmsize*6*%%i] + mova m10, [srcq+mmsize*7*%%i] +%endif + CLIPD m0, m4, m5, m6 + CLIPD m1, m4, m5, m6 + CLIPD m2, m4, m5, m6 + CLIPD m3, m4, m5, m6 +%if %3 + CLIPD m7, m4, m5, m6 + CLIPD m8, m4, m5, m6 + CLIPD m9, m4, m5, m6 + CLIPD m10, m4, m5, m6 +%endif + mova [dstq+mmsize*0*%%i], m0 + mova [dstq+mmsize*1*%%i], m1 + mova [dstq+mmsize*2*%%i], m2 + mova [dstq+mmsize*3*%%i], m3 +%if %3 + mova [dstq+mmsize*4*%%i], m7 + mova [dstq+mmsize*5*%%i], m8 + mova [dstq+mmsize*6*%%i], m9 + mova [dstq+mmsize*7*%%i], m10 +%endif +%assign %%i %%i+1 +%endrep + add srcq, mmsize*4*(%2+%3) + add dstq, mmsize*4*(%2+%3) + sub lend, mmsize*(%2+%3) + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +%define CLIPD CLIPD_MMX +VECTOR_CLIP_INT32 0, 1, 0, 0 +INIT_XMM sse2 +VECTOR_CLIP_INT32 6, 1, 0, 0, _int +%define CLIPD CLIPD_SSE2 +VECTOR_CLIP_INT32 6, 2, 0, 1 +INIT_XMM sse4 +%define CLIPD CLIPD_SSE41 +%ifdef m8 +VECTOR_CLIP_INT32 11, 1, 1, 0 +%else +VECTOR_CLIP_INT32 6, 1, 0, 0 +%endif |