diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-12-06 14:33:38 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-12-06 14:33:38 +0100 |
commit | 15784c2bab56508565771cd04b5dda64d6717953 (patch) | |
tree | a0eade0816e7ea46a08a4cf7ffd2b25e6666c59b /libavutil/x86 | |
parent | 32aedebdc59d5b34ab7a9137855dcc602267e00f (diff) | |
parent | 9d5c62ba5b586c80af508b5914934b1c439f6652 (diff) | |
download | ffmpeg-15784c2bab56508565771cd04b5dda64d6717953.tar.gz |
Merge commit '9d5c62ba5b586c80af508b5914934b1c439f6652'
* commit '9d5c62ba5b586c80af508b5914934b1c439f6652':
lavu/opt: do not filter out the initial sign character except for flags
eval: treat dB as decibels instead of decibytes
float_dsp: add vector_dmul_scalar() to multiply a vector of doubles
Conflicts:
libavutil/eval.c
tests/ref/fate/eval
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86')
-rw-r--r-- | libavutil/x86/float_dsp.asm | 45 | ||||
-rw-r--r-- | libavutil/x86/float_dsp_init.c | 9 | ||||
-rw-r--r-- | libavutil/x86/x86util.asm | 11 |
3 files changed, 65 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index a3200c64b6..365bafea00 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -120,3 +120,48 @@ cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len INIT_XMM sse VECTOR_FMUL_SCALAR + +;------------------------------------------------------------------------------ +; void ff_vector_dmul_scalar(double *dst, const double *src, double mul, +; int len) +;------------------------------------------------------------------------------ + +%macro VECTOR_DMUL_SCALAR 0 +%if UNIX64 +cglobal vector_dmul_scalar, 3,3,3, dst, src, len +%else +cglobal vector_dmul_scalar, 4,4,3, dst, src, mul, len +%endif +%if ARCH_X86_32 + VBROADCASTSD m0, mulm +%else +%if WIN64 + movlhps xmm2, xmm2 +%if cpuflag(avx) + vinsertf128 ymm2, ymm2, xmm2, 1 +%endif + SWAP 0, 2 +%else + movlhps xmm0, xmm0 +%if cpuflag(avx) + vinsertf128 ymm0, ymm0, xmm0, 1 +%endif +%endif +%endif + lea lenq, [lend*8-2*mmsize] +.loop: + mulpd m1, m0, [srcq+lenq ] + mulpd m2, m0, [srcq+lenq+mmsize] + mova [dstq+lenq ], m1 + mova [dstq+lenq+mmsize], m2 + sub lenq, 2*mmsize + jge .loop + REP_RET +%endmacro + +INIT_XMM sse2 +VECTOR_DMUL_SCALAR +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +VECTOR_DMUL_SCALAR +%endif diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 259fda146e..e5112e7f8e 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -35,6 +35,11 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, int len); +extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src, + double mul, int len); +extern void ff_vector_dmul_scalar_avx(double *dst, const double *src, + double mul, int len); + void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) { int mm_flags = av_get_cpu_flags(); @@ -44,8 +49,12 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; } + if (EXTERNAL_SSE2(mm_flags)) { + fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; + } if (EXTERNAL_AVX(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; + fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx; } } diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index c11df90386..00fa7b1827 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -631,6 +631,17 @@ %endif %endmacro +%macro VBROADCASTSD 2 ; dst xmm/ymm, src m64 +%if cpuflag(avx) && mmsize == 32 + vbroadcastsd %1, %2 +%elif cpuflag(sse3) + movddup %1, %2 +%else ; sse2 + movsd %1, %2 + movlhps %1, %1 +%endif +%endmacro + %macro SHUFFLE_MASK_W 8 %rep 8 %if %1>=0x80 |