diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-05-24 18:28:40 -0400 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-05-28 11:41:32 +0200 |
commit | 78046dadc3145a7afd16034ab1178033a053a03e (patch) | |
tree | 53dead8ca328d60eae3bbfe446045b5e89abb91b | |
parent | fc72ec727e8731d57ede82502081366921667486 (diff) | |
download | ffmpeg-78046dadc3145a7afd16034ab1178033a053a03e.tar.gz |
rgb2rgb: remove duplicate mmx/mmx2/3dnow/sse2 functions.
Many functions have such a prefix, but do not actually use any
instructions or features from that set, thus giving the false
impression that swscale is highly optimized for a particular
system, whereas in reality it is not.
-rw-r--r-- | libswscale/x86/rgb2rgb.c | 2 | ||||
-rw-r--r-- | libswscale/x86/rgb2rgb_template.c | 74 |
2 files changed, 41 insertions, 35 deletions
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 81b29f32ff..78b804e367 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -111,7 +111,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; #undef COMPILE_TEMPLATE_SSE2 #undef COMPILE_TEMPLATE_AMD3DNOW #define COMPILE_TEMPLATE_MMX2 0 -#define COMPILE_TEMPLATE_SSE2 1 +#define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_AMD3DNOW 1 #define RENAME(a) a ## _3DNOW #include "rgb2rgb_template.c" diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 8e6ce8586a..c4245afb34 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -30,15 +30,8 @@ #undef MOVNTQ #undef EMMS #undef SFENCE -#undef MMREG_SIZE #undef PAVGB -#if COMPILE_TEMPLATE_SSE2 -#define MMREG_SIZE 16 -#else -#define MMREG_SIZE 8 -#endif - #if COMPILE_TEMPLATE_AMD3DNOW #define PREFETCH "prefetch" #define PAVGB "pavgusb" @@ -64,6 +57,10 @@ #define SFENCE " # nop" #endif +#if !COMPILE_TEMPLATE_SSE2 + +#if !COMPILE_TEMPLATE_AMD3DNOW + static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) { uint8_t *dest = dst; @@ -1513,7 +1510,9 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride) { long x,y; @@ -1530,7 +1529,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi dst+= dstStride; for (y=1; y<srcHeight; y++) { -#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW const x86_reg mmxSize= srcWidth&~15; __asm__ volatile( "mov %4, %%"REG_a" \n\t" @@ -1564,17 +1562,10 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi "punpckhbw %%mm3, %%mm7 \n\t" "punpcklbw %%mm2, %%mm4 \n\t" "punpckhbw %%mm2, %%mm6 \n\t" -#if 1 MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" -#else - "movq %%mm5, (%2, %%"REG_a", 2) \n\t" - "movq %%mm7, 8(%2, %%"REG_a", 2) \n\t" - "movq %%mm4, (%3, %%"REG_a", 2) \n\t" - "movq %%mm6, 8(%3, %%"REG_a", 2) \n\t" -#endif "add $8, %%"REG_a" \n\t" "movq -1(%0, %%"REG_a"), %%mm4 \n\t" "movq -1(%1, %%"REG_a"), %%mm5 \n\t" @@ -1584,12 +1575,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi "g" (-mmxSize) : "%"REG_a ); -#else - const x86_reg mmxSize=1; - - dst[0 ]= (3*src[0] + src[srcStride])>>2; - dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; -#endif for (x=mmxSize-1; x<srcWidth-1; x++) { dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; @@ -1605,7 +1590,6 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi } // last line -#if 1 dst[0]= src[0]; for (x=0; x<srcWidth-1; x++) { @@ -1613,18 +1597,14 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; } dst[2*srcWidth-1]= src[srcWidth-1]; -#else - for (x=0; x<srcWidth; x++) { - dst[2*x+0]= - dst[2*x+1]= src[x]; - } -#endif __asm__ volatile(EMMS" \n\t" SFENCE" \n\t" :::"memory"); } +#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ +#if !COMPILE_TEMPLATE_AMD3DNOW /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) @@ -1728,6 +1708,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ /** * Height should be a multiple of 2 and width should be a multiple of 2. @@ -1978,7 +1959,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ rgb24toyv12_c(src, ydst, udst, vdst, width, height-y, lumStride, chromStride, srcStride); } +#endif /* !COMPILE_TEMPLATE_SSE2 */ +#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, long width, long height, long src1Stride, long src2Stride, long dstStride) @@ -2048,7 +2031,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::: "memory" ); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#if !COMPILE_TEMPLATE_SSE2 +#if !COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, long width, long height, @@ -2228,6 +2214,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ::: "memory" ); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) { @@ -2266,6 +2253,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count } } +#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { dst0+= count; @@ -2311,6 +2299,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds count++; } } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2365,6 +2354,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u } } +#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { dst0+= count; @@ -2411,6 +2401,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst count++; } } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2492,6 +2483,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ); } +#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, long width, long height, long lumStride, long chromStride, long srcStride) @@ -2514,6 +2506,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ::: "memory" ); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, long width, long height, @@ -2540,6 +2533,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ); } +#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, long width, long height, long lumStride, long chromStride, long srcStride) @@ -2562,9 +2556,13 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ::: "memory" ); } +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* !COMPILE_TEMPLATE_SSE2 */ static inline void RENAME(rgb2rgb_init)(void) { +#if !COMPILE_TEMPLATE_SSE2 +#if !COMPILE_TEMPLATE_AMD3DNOW rgb15to16 = RENAME(rgb15to16); rgb15tobgr24 = RENAME(rgb15tobgr24); rgb15to32 = RENAME(rgb15to32); @@ -2588,14 +2586,22 @@ static inline void RENAME(rgb2rgb_init)(void) yuv422ptoyuy2 = RENAME(yuv422ptoyuy2); yuv422ptouyvy = RENAME(yuv422ptouyvy); yuy2toyv12 = RENAME(yuy2toyv12); - planar2x = RENAME(planar2x); - rgb24toyv12 = RENAME(rgb24toyv12); - interleaveBytes = RENAME(interleaveBytes); vu9_to_vu12 = RENAME(vu9_to_vu12); yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); - - uyvytoyuv420 = RENAME(uyvytoyuv420); uyvytoyuv422 = RENAME(uyvytoyuv422); - yuyvtoyuv420 = RENAME(yuyvtoyuv420); yuyvtoyuv422 = RENAME(yuyvtoyuv422); +#endif /* !COMPILE_TEMPLATE_SSE2 */ + +#if COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW + planar2x = RENAME(planar2x); +#endif /* COMPILE_TEMPLATE_MMX2 || COMPILE_TEMPLATE_AMD3DNOW */ + rgb24toyv12 = RENAME(rgb24toyv12); + + yuyvtoyuv420 = RENAME(yuyvtoyuv420); + uyvytoyuv420 = RENAME(uyvytoyuv420); +#endif /* COMPILE_TEMPLATE_SSE2 */ + +#if !COMPILE_TEMPLATE_AMD3DNOW + interleaveBytes = RENAME(interleaveBytes); +#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ } |