aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2012-08-02 00:55:34 +0200
committerDiego Biurrun <diego@biurrun.de>2012-11-02 17:05:29 +0100
commit0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f (patch)
treeb736ff407825a764a4e1d017ed6afefa802fb0f3
parent9a07c1332cfe092b57b5758f22b686ca58806c60 (diff)
downloadffmpeg-0a7a94f2e53bcdb8ac5857eb8c67c16f6f1d0f2f.tar.gz
x86: Refactor PSWAPD fallback implementations and port to cpuflags
-rw-r--r--libavcodec/x86/fft.asm16
-rw-r--r--libavcodec/x86/fmtconvert.asm17
-rw-r--r--libavutil/x86/x86util.asm12
3 files changed, 16 insertions, 29 deletions
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index 8c69f1f771..111f3229b4 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -105,7 +105,8 @@ SECTION_TEXT
pfadd %5, %4 ; {t6,t5}
pxor %3, [ps_m1p1] ; {t8,t7}
mova %6, %1
- PSWAPD %3, %3
+ movd [r0+12], %3
+ punpckhdq %3, [r0+8]
pfadd %1, %5 ; {r0,i0}
pfsub %6, %5 ; {r2,i2}
mova %4, %2
@@ -498,19 +499,6 @@ fft8 %+ SUFFIX:
%endmacro
%if ARCH_X86_32
-%macro PSWAPD 2
-%if cpuflag(3dnowext)
- pswapd %1, %2
-%elifidn %1, %2
- movd [r0+12], %1
- punpckhdq %1, [r0+8]
-%else
- movq %1, %2
- psrlq %1, 32
- punpckldq %1, %2
-%endif
-%endmacro
-
INIT_MMX 3dnowext
FFT48_3DNOW
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index fb183ce9a6..77b8bd7069 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -246,16 +246,6 @@ FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM sse2
FLOAT_TO_INT16_INTERLEAVE2
-
-%macro PSWAPD_SSE 2
- pshufw %1, %2, 0x4e
-%endmacro
-%macro PSWAPD_3DNOW 2
- movq %1, %2
- psrlq %1, 32
- punpckldq %1, %2
-%endmacro
-
%macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
@@ -285,11 +275,11 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
packssdw mm0, mm3
packssdw mm1, mm4
packssdw mm2, mm5
- pswapd mm3, mm0
+ PSWAPD mm3, mm0
punpcklwd mm0, mm1
punpckhwd mm1, mm2
punpcklwd mm2, mm3
- pswapd mm3, mm0
+ PSWAPD mm3, mm0
punpckldq mm0, mm2
punpckhdq mm2, mm1
punpckldq mm1, mm3
@@ -305,12 +295,9 @@ cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, s
%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX sse
-%define pswapd PSWAPD_SSE
FLOAT_TO_INT16_INTERLEAVE6
INIT_MMX 3dnow
-%define pswapd PSWAPD_3DNOW
FLOAT_TO_INT16_INTERLEAVE6
-%undef pswapd
INIT_MMX 3dnowext
FLOAT_TO_INT16_INTERLEAVE6
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index ca0041acf7..9183d38595 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -319,6 +319,18 @@
%endif
%endmacro
+%macro PSWAPD 2
+%if cpuflag(mmxext)
+ pshufw %1, %2, q1032
+%elif cpuflag(3dnowext)
+ pswapd %1, %2
+%elif cpuflag(3dnow)
+ movq %1, %2
+ psrlq %1, 32
+ punpckldq %1, %2
+%endif
+%endmacro
+
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
%ifnum %5
pand m%3, m%5, m%4 ; src .. y6 .. y4