diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-05-19 05:12:45 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-05-19 06:00:31 +0200 |
commit | 75a37b57a59f6701d9443c5f7a0ceec108b27a18 (patch) | |
tree | 1eea866003f3d7385261dea40b5b8063e87f9b8a /libavcodec/x86/fmtconvert.asm | |
parent | 8529f9b36b7c1b8f2cb36ba2709983517c4b6458 (diff) | |
parent | 41e21e4db623ebd77f431a6f30cf21d62d9e1f33 (diff) | |
download | ffmpeg-75a37b57a59f6701d9443c5f7a0ceec108b27a18.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
APIchanges: fill in date and commit for request_sample_fmt
Add floating-point sample format support to the ac3, eac3, dca, aac, and vorbis decoders.
Add support for request_sample_format in ffmpeg and ffplay.
Add APIchanges entry for request_sample_fmt.
Add request_sample_fmt field to AVCodecContext.
Add float_interleave() to FmtConvertContext with x86-optimized versions.
Remove unused make variable SEEK_REFFILE
fate: remove redundant aref and vref references
fate: remove do_ffmpeg_nocheck function
fate: do not collect -benchmark output
mpegaudiodec: remove decode_end() function
fate: run aref and vref as regular tests
mpegaudio: sanitise compute_antialias_* names
mpeg12: add slice-threading checks to slice-threading initializers.
h264: copy pixel_shift between slice threading contexts.
mdec: enable frame-level multithreading.
mdec.c: fix overread.
Conflicts:
libavcodec/aacdec.c
libavcodec/ac3dec.c
libavcodec/avcodec.h
libavcodec/dca.c
libavcodec/h264.c
libavcodec/mdec.c
libavcodec/mpeg12.c
libavcodec/options.c
libavcodec/version.h
libavcodec/vorbisdec.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/fmtconvert.asm')
-rw-r--r-- | libavcodec/x86/fmtconvert.asm | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index dc038dde73..171e52a165 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -20,6 +20,7 @@ ;****************************************************************************** %include "x86inc.asm" +%include "x86util.asm" section .text align=16 @@ -89,3 +90,143 @@ FLOAT_TO_INT16_INTERLEAVE6 3dnow %undef pswapd FLOAT_TO_INT16_INTERLEAVE6 3dn2 %undef cvtps2pi + +;----------------------------------------------------------------------------- +; void ff_float_interleave6(float *dst, const float **src, unsigned int len); +;----------------------------------------------------------------------------- + +%macro BUTTERFLYPS 3 + movaps m%3, m%1 + unpcklps m%1, m%2 + unpckhps m%3, m%2 + SWAP %2, %3 +%endmacro + +%macro FLOAT_INTERLEAVE6 2 +cglobal float_interleave6_%1, 2,7,%2, dst, src, src1, src2, src3, src4, src5 +%ifdef ARCH_X86_64 + %define lend r10d + mov lend, r2d +%else + %define lend dword r2m +%endif + mov src1q, [srcq+1*gprsize] + mov src2q, [srcq+2*gprsize] + mov src3q, [srcq+3*gprsize] + mov src4q, [srcq+4*gprsize] + mov src5q, [srcq+5*gprsize] + mov srcq, [srcq] + sub src1q, srcq + sub src2q, srcq + sub src3q, srcq + sub src4q, srcq + sub src5q, srcq +.loop: +%ifidn %1, sse + movaps m0, [srcq] + movaps m1, [srcq+src1q] + movaps m2, [srcq+src2q] + movaps m3, [srcq+src3q] + movaps m4, [srcq+src4q] + movaps m5, [srcq+src5q] + + BUTTERFLYPS 0, 1, 6 + BUTTERFLYPS 2, 3, 6 + BUTTERFLYPS 4, 5, 6 + + movaps m6, m4 + shufps m4, m0, 0xe4 + movlhps m0, m2 + movhlps m6, m2 + movaps [dstq ], m0 + movaps [dstq+16], m4 + movaps [dstq+32], m6 + + movaps m6, m5 + shufps m5, m1, 0xe4 + movlhps m1, m3 + movhlps m6, m3 + movaps [dstq+48], m1 + movaps [dstq+64], m5 + movaps [dstq+80], m6 +%else ; mmx + movq m0, [srcq] + movq m1, [srcq+src1q] + movq m2, [srcq+src2q] + movq m3, [srcq+src3q] + movq m4, [srcq+src4q] + movq m5, [srcq+src5q] + + SBUTTERFLY dq, 0, 1, 6 + SBUTTERFLY dq, 2, 3, 6 + SBUTTERFLY dq, 4, 5, 6 + movq [dstq ], m0 + movq [dstq+ 8], m2 + movq [dstq+16], m4 + movq [dstq+24], m1 + movq [dstq+32], m3 + movq [dstq+40], m5 +%endif + add srcq, mmsize + add dstq, mmsize*6 + sub lend, mmsize/4 + jg .loop +%ifidn %1, mmx + emms +%endif + REP_RET +%endmacro + +INIT_MMX +FLOAT_INTERLEAVE6 mmx, 0 +INIT_XMM +FLOAT_INTERLEAVE6 sse, 7 + +;----------------------------------------------------------------------------- +; void ff_float_interleave2(float *dst, const float **src, unsigned int len); +;----------------------------------------------------------------------------- + +%macro FLOAT_INTERLEAVE2 2 +cglobal float_interleave2_%1, 3,4,%2, dst, src, len, src1 + mov src1q, [srcq+gprsize] + mov srcq, [srcq ] + sub src1q, srcq +.loop + MOVPS m0, [srcq ] + MOVPS m1, [srcq+src1q ] + MOVPS m3, [srcq +mmsize] + MOVPS m4, [srcq+src1q+mmsize] + + MOVPS m2, m0 + PUNPCKLDQ m0, m1 + PUNPCKHDQ m2, m1 + + MOVPS m1, m3 + PUNPCKLDQ m3, m4 + PUNPCKHDQ m1, m4 + + MOVPS [dstq ], m0 + MOVPS [dstq+1*mmsize], m2 + MOVPS [dstq+2*mmsize], m3 + MOVPS [dstq+3*mmsize], m1 + + add srcq, mmsize*2 + add dstq, mmsize*4 + sub lend, mmsize/2 + jg .loop +%ifidn %1, mmx + emms +%endif + REP_RET +%endmacro + +INIT_MMX +%define MOVPS movq +%define PUNPCKLDQ punpckldq +%define PUNPCKHDQ punpckhdq +FLOAT_INTERLEAVE2 mmx, 0 +INIT_XMM +%define MOVPS movaps +%define PUNPCKLDQ unpcklps +%define PUNPCKHDQ unpckhps +FLOAT_INTERLEAVE2 sse, 5 |