diff options
author | Diego Biurrun <diego@biurrun.de> | 2012-08-02 00:55:34 +0200 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2012-11-02 17:05:29 +0100 |
commit | 0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f (patch) | |
tree | b736ff407825a764a4e1d017ed6afefa802fb0f3 | |
parent | 9a07c1332cfe092b57b5758f22b686ca58806c60 (diff) | |
download | ffmpeg-0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f.tar.gz |
x86: Refactor PSWAPD fallback implementations and port to cpuflags
-rw-r--r-- | libavcodec/x86/fft.asm | 16 | ||||
-rw-r--r-- | libavcodec/x86/fmtconvert.asm | 17 | ||||
-rw-r--r-- | libavutil/x86/x86util.asm | 12 |
3 files changed, 16 insertions, 29 deletions
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index 8c69f1f771..111f3229b4 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -105,7 +105,8 @@ SECTION_TEXT pfadd %5, %4 ; {t6,t5} pxor %3, [ps_m1p1] ; {t8,t7} mova %6, %1 - PSWAPD %3, %3 + movd [r0+12], %3 + punpckhdq %3, [r0+8] pfadd %1, %5 ; {r0,i0} pfsub %6, %5 ; {r2,i2} mova %4, %2 @@ -498,19 +499,6 @@ fft8 %+ SUFFIX: %endmacro %if ARCH_X86_32 -%macro PSWAPD 2 -%if cpuflag(3dnowext) - pswapd %1, %2 -%elifidn %1, %2 - movd [r0+12], %1 - punpckhdq %1, [r0+8] -%else - movq %1, %2 - psrlq %1, 32 - punpckldq %1, %2 -%endif -%endmacro - INIT_MMX 3dnowext FFT48_3DNOW diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm index fb183ce9a6..77b8bd7069 100644 --- a/libavcodec/x86/fmtconvert.asm +++ b/libavcodec/x86/fmtconvert.asm @@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2 INIT_XMM sse2 FLOAT_TO_INT16_INTERLEAVE2 - -%macro PSWAPD_SSE 2 - pshufw %1, %2, 0x4e -%endmacro -%macro PSWAPD_3DNOW 2 - movq %1, %2 - psrlq %1, 32 - punpckldq %1, %2 -%endmacro - %macro FLOAT_TO_INT16_INTERLEAVE6 0 ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len @@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s packssdw mm0, mm3 packssdw mm1, mm4 packssdw mm2, mm5 - pswapd mm3, mm0 + PSWAPD mm3, mm0 punpcklwd mm0, mm1 punpckhwd mm1, mm2 punpcklwd mm2, mm3 - pswapd mm3, mm0 + PSWAPD mm3, mm0 punpckldq mm0, mm2 punpckhdq mm2, mm1 punpckldq mm1, mm3 @@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s %endmacro ; FLOAT_TO_INT16_INTERLEAVE6 INIT_MMX sse -%define pswapd PSWAPD_SSE FLOAT_TO_INT16_INTERLEAVE6 INIT_MMX 3dnow -%define pswapd PSWAPD_3DNOW FLOAT_TO_INT16_INTERLEAVE6 -%undef pswapd INIT_MMX 3dnowext FLOAT_TO_INT16_INTERLEAVE6 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index ca0041acf7..9183d38595 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -319,6 +319,18 @@ %endif %endmacro +%macro PSWAPD 2 +%if cpuflag(mmxext) + pshufw %1, %2, q1032 +%elif cpuflag(3dnowext) + pswapd %1, %2 +%elif cpuflag(3dnow) + movq %1, %2 + psrlq %1, 32 + punpckldq %1, %2 +%endif +%endmacro + %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from %ifnum %5 pand m%3, m%5, m%4 ; src .. y6 .. y4 |