diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-06-30 22:44:18 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-06-30 22:44:18 +0200 |
commit | 64b25938e90253432d28ffd7d971f085c560a523 (patch) | |
tree | 56ba6a39d7e8f14ebccdcc4db935164b5da624a2 /libavcodec/x86/fmtconvert.asm | |
parent | be24f85176d8e46c3154f1f51013f235b273183e (diff) | |
parent | ceabc13f129cd6344b1eebdbe10119083fe5520e (diff) | |
download | ffmpeg-64b25938e90253432d28ffd7d971f085c560a523.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
dsputilenc_mmx: split assignment of ff_sse16_sse2 to SSE2 section.
dnxhdenc: add space between function argument type and comment.
x86: fmtconvert: add special asm for float_to_int16_interleave_misc_*
attributes: Add a definition of av_always_inline for MSVC
cmdutils: Pass the actual chosen encoder to filter_codec_opts
os_support: Add fallback definitions for stat flags
os_support: Rename the poll fallback function to ff_poll
network: Check for struct pollfd
os_support: Don't compare a negative number against socket descriptors
os_support: Include all the necessary headers for the win32 open function
x86: vc1: fix and enable optimised loop filter
Conflicts:
cmdutils.c
cmdutils.h
ffmpeg.c
ffplay.c
libavformat/os_support.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/fmtconvert.asm')
-rw-r--r-- | libavcodec/x86/fmtconvert.asm | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index 72bee55669..9499a9e3a7 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -115,6 +115,84 @@ FLOAT_TO_INT16 sse, 0 FLOAT_TO_INT16 3dnow, 0 %undef cvtps2pi +;------------------------------------------------------------------------------ +; void ff_float_to_int16_step(int16_t *dst, const float *src, long len, long step); +;------------------------------------------------------------------------------ +%macro FLOAT_TO_INT16_STEP 2 +cglobal float_to_int16_step_%1, 4,7,%2, dst, src, len, step, step3, v1, v2 + add lenq, lenq + lea srcq, [srcq+2*lenq] + lea step3q, [stepq*3] + neg lenq +.loop: +%ifidn %1, sse2 + cvtps2dq m0, [srcq+2*lenq ] + cvtps2dq m1, [srcq+2*lenq+16] + packssdw m0, m1 + movd v1d, m0 + psrldq m0, 4 + movd v2d, m0 + psrldq m0, 4 + mov [dstq], v1w + mov [dstq+stepq*4], v2w + shr v1d, 16 + shr v2d, 16 + mov [dstq+stepq*2], v1w + mov [dstq+step3q*2], v2w + lea dstq, [dstq+stepq*8] + movd v1d, m0 + psrldq m0, 4 + movd v2d, m0 + mov [dstq], v1w + mov [dstq+stepq*4], v2w + shr v1d, 16 + shr v2d, 16 + mov [dstq+stepq*2], v1w + mov [dstq+step3q*2], v2w + lea dstq, [dstq+stepq*8] +%else + cvtps2pi m0, [srcq+2*lenq ] + cvtps2pi m1, [srcq+2*lenq+ 8] + cvtps2pi m2, [srcq+2*lenq+16] + cvtps2pi m3, [srcq+2*lenq+24] + packssdw m0, m1 + packssdw m2, m3 + movd v1d, m0 + psrlq m0, 32 + movd v2d, m0 + mov [dstq], v1w + mov [dstq+stepq*4], v2w + shr v1d, 16 + shr v2d, 16 + mov [dstq+stepq*2], v1w + mov [dstq+step3q*2], v2w + lea dstq, [dstq+stepq*8] + movd v1d, m2 + psrlq m2, 32 + movd v2d, m2 + mov [dstq], v1w + mov [dstq+stepq*4], v2w + shr v1d, 16 + shr v2d, 16 + mov [dstq+stepq*2], v1w + mov [dstq+step3q*2], v2w + lea dstq, [dstq+stepq*8] +%endif + add lenq, 16 + js .loop +%ifnidn %1, sse2 + emms +%endif + REP_RET +%endmacro + +INIT_XMM +FLOAT_TO_INT16_STEP sse2, 2 +INIT_MMX +FLOAT_TO_INT16_STEP sse, 0 +%define cvtps2pi pf2id +FLOAT_TO_INT16_STEP 3dnow, 0 +%undef cvtps2pi ;------------------------------------------------------------------------------- ; void ff_float_to_int16_interleave2(int16_t *dst, const float **src, long len); |