diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-08-29 17:46:10 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-08-29 18:04:34 +0200 |
commit | 1c66807636ed8da5cf81d75cc8bb2726c6d6bc21 (patch) | |
tree | dbb5d2fafe49a56ce5d608c74ae32929c08a4405 /libavcodec/x86/lpc.c | |
parent | 85c830331c36502144e1cc9cf8aa7bd177e1d79d (diff) | |
parent | d488c3bcbaf7ddda42597e014deb661a7e9e2112 (diff) | |
download | ffmpeg-1c66807636ed8da5cf81d75cc8bb2726c6d6bc21.tar.gz |
Merge commit 'd488c3bcbaf7ddda42597e014deb661a7e9e2112'
* commit 'd488c3bcbaf7ddda42597e014deb661a7e9e2112':
configure: support Bitrig OS
yuv2rgb: handle line widths that are not a multiple of 4.
graph2dot: Use the fallback getopt implementation if needed
tools: Include io.h for open/read/write/close if unistd.h doesn't exist
testprogs: Remove unused includes
qt-faststart: Use other seek/tell functions on MSVC than on mingw
ismindex: Include direct.h for _mkdir on windows
sdp: Use static const char arrays instead of pointers to strings
x86: avcodec: Drop silly "_mmx" suffixes from filenames
x86: avcodec: Drop silly "_sse" suffixes from filenames
sdp: Include profile-level-id for H264
utvideoenc: use ff_huff_gen_len_table
huffman: add ff_huff_gen_len_table
cllc: simplify/fix swapped data buffer allocation.
rtpdec_h264: Don't set the pixel format
h264: Check that the codec isn't null before accessing it
audio_frame_queue: Define af_queue_log_state before using it
Conflicts:
libavcodec/audio_frame_queue.c
libavcodec/h264.c
libavcodec/huffman.h
libavcodec/huffyuv.c
libavcodec/utvideoenc.c
libavcodec/x86/Makefile
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/lpc.c')
-rw-r--r-- | libavcodec/x86/lpc.c | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c new file mode 100644 index 0000000000..e0e6f8ba8a --- /dev/null +++ b/libavcodec/x86/lpc.c @@ -0,0 +1,153 @@ +/* + * MMX optimized LPC DSP utils + * Copyright (c) 2007 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86/asm.h" +#include "libavutil/cpu.h" +#include "libavcodec/lpc.h" + +#if HAVE_INLINE_ASM + +static void lpc_apply_welch_window_sse2(const int32_t *data, int len, + double *w_data) +{ + double c = 2.0 / (len-1.0); + int n2 = len>>1; + x86_reg i = -n2*sizeof(int32_t); + x86_reg j = n2*sizeof(int32_t); + __asm__ volatile( + "movsd %4, %%xmm7 \n\t" + "movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t" + "movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t" + "movlhps %%xmm7, %%xmm7 \n\t" + "subpd %%xmm5, %%xmm7 \n\t" + "addsd %%xmm6, %%xmm7 \n\t" + "test $1, %5 \n\t" + "jz 2f \n\t" +#define WELCH(MOVPD, offset)\ + "1: \n\t"\ + "movapd %%xmm7, %%xmm1 \n\t"\ + "mulpd %%xmm1, %%xmm1 \n\t"\ + "movapd %%xmm6, %%xmm0 \n\t"\ + "subpd %%xmm1, %%xmm0 \n\t"\ + "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ + "cvtpi2pd (%3,%0), %%xmm2 \n\t"\ + "cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\ + "mulpd %%xmm0, %%xmm2 \n\t"\ + "mulpd %%xmm1, %%xmm3 \n\t"\ + "movapd %%xmm2, (%2,%0,2) \n\t"\ + MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\ + "subpd %%xmm5, %%xmm7 \n\t"\ + "sub $8, %1 \n\t"\ + "add $8, %0 \n\t"\ + "jl 1b \n\t"\ + + WELCH("movupd", -1) + "jmp 3f \n\t" + "2: \n\t" + WELCH("movapd", -2) + "3: \n\t" + :"+&r"(i), "+&r"(j) + :"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len) + XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm5", "%xmm6", "%xmm7") + ); +#undef WELCH +} + +static void lpc_compute_autocorr_sse2(const double *data, int len, int lag, + double *autoc) +{ + int j; + + if((x86_reg)data & 15) + data++; + + for(j=0; j<lag; j+=2){ + x86_reg i = -len*sizeof(double); + if(j == lag-2) { + __asm__ volatile( + "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" + "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" + "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t" + "1: \n\t" + "movapd (%2,%0), %%xmm3 \n\t" + "movupd -8(%3,%0), %%xmm4 \n\t" + "movapd (%3,%0), %%xmm5 \n\t" + "mulpd %%xmm3, %%xmm4 \n\t" + "mulpd %%xmm3, %%xmm5 \n\t" + "mulpd -16(%3,%0), %%xmm3 \n\t" + "addpd %%xmm4, %%xmm1 \n\t" + "addpd %%xmm5, %%xmm0 \n\t" + "addpd %%xmm3, %%xmm2 \n\t" + "add $16, %0 \n\t" + "jl 1b \n\t" + "movhlps %%xmm0, %%xmm3 \n\t" + "movhlps %%xmm1, %%xmm4 \n\t" + "movhlps %%xmm2, %%xmm5 \n\t" + "addsd %%xmm3, %%xmm0 \n\t" + "addsd %%xmm4, %%xmm1 \n\t" + "addsd %%xmm5, %%xmm2 \n\t" + "movsd %%xmm0, (%1) \n\t" + "movsd %%xmm1, 8(%1) \n\t" + "movsd %%xmm2, 16(%1) \n\t" + :"+&r"(i) + :"r"(autoc+j), "r"(data+len), "r"(data+len-j) + :"memory" + ); + } else { + __asm__ volatile( + "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" + "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" + "1: \n\t" + "movapd (%3,%0), %%xmm3 \n\t" + "movupd -8(%4,%0), %%xmm4 \n\t" + "mulpd %%xmm3, %%xmm4 \n\t" + "mulpd (%4,%0), %%xmm3 \n\t" + "addpd %%xmm4, %%xmm1 \n\t" + "addpd %%xmm3, %%xmm0 \n\t" + "add $16, %0 \n\t" + "jl 1b \n\t" + "movhlps %%xmm0, %%xmm3 \n\t" + "movhlps %%xmm1, %%xmm4 \n\t" + "addsd %%xmm3, %%xmm0 \n\t" + "addsd %%xmm4, %%xmm1 \n\t" + "movsd %%xmm0, %1 \n\t" + "movsd %%xmm1, %2 \n\t" + :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) + :"r"(data+len), "r"(data+len-j) + ); + } + } +} + +#endif /* HAVE_INLINE_ASM */ + +av_cold void ff_lpc_init_x86(LPCContext *c) +{ + int mm_flags = av_get_cpu_flags(); + +#if HAVE_INLINE_ASM + if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) { + c->lpc_apply_welch_window = lpc_apply_welch_window_sse2; + c->lpc_compute_autocorr = lpc_compute_autocorr_sse2; + } +#endif /* HAVE_INLINE_ASM */ +} |