diff options
author | Aurelien Jacobs <aurel@gnuage.org> | 2004-10-21 11:55:20 +0000 |
---|---|---|
committer | Aurelien Jacobs <aurel@gnuage.org> | 2004-10-21 11:55:20 +0000 |
commit | 6e1c66bc06af5009b4aa5ab7e103a0e289b06310 (patch) | |
tree | 1dc49306fbec0a4d7d35103bf56d99c7e023a75c /postproc/swscale.c | |
parent | ed861c6bd24548f96019923c48b882e14070555d (diff) | |
download | ffmpeg-6e1c66bc06af5009b4aa5ab7e103a0e289b06310.tar.gz |
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
Originally committed as revision 13721 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
Diffstat (limited to 'postproc/swscale.c')
-rw-r--r-- | postproc/swscale.c | 96 |
1 files changed, 48 insertions, 48 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c index 4b7eec8cc8..6e9419789f 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -145,7 +145,7 @@ write special BGR->BGR scaler #define MIN(a,b) ((a) > (b) ? (b) : (a)) #define MAX(a,b) ((a) < (b) ? (b) : (a)) -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL; static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL; static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; @@ -204,7 +204,7 @@ extern const uint8_t dither_8x8_32[8][8]; extern const uint8_t dither_8x8_73[8][8]; extern const uint8_t dither_8x8_220[8][8]; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) void in_asm_used_var_warning_killer() { volatile int i= bF8+bFC+w10+ @@ -679,7 +679,7 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l #endif //HAVE_ALTIVEC #endif //ARCH_POWERPC -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX @@ -692,7 +692,7 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_3DNOW #endif -#endif //ARCH_X86 +#endif //ARCH_X86 || ARCH_X86_64 #undef HAVE_MMX #undef HAVE_MMX2 @@ -716,7 +716,7 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l #endif #endif //ARCH_POWERPC -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) //X86 versions /* @@ -758,7 +758,7 @@ static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **l #include "swscale_template.c" #endif -#endif //ARCH_X86 +#endif //ARCH_X86 || ARCH_X86_64 // minor note: the HAVE_xyz is messed up after that line so don't use it @@ -783,7 +783,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out int minFilterSize; double *filter=NULL; double *filter2=NULL; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if(flags & SWS_CPU_CAPS_MMX) asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) #endif @@ -1142,17 +1142,17 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out free(filter); } -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits) { uint8_t *fragmentA; - int imm8OfPShufW1A; - int imm8OfPShufW2A; - int fragmentLengthA; + long imm8OfPShufW1A; + long imm8OfPShufW2A; + long fragmentLengthA; uint8_t *fragmentB; - int imm8OfPShufW1B; - int imm8OfPShufW2B; - int fragmentLengthB; + long imm8OfPShufW1B; + long imm8OfPShufW2B; + long fragmentLengthB; int fragmentPos; int xpos, i; @@ -1165,9 +1165,9 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil "jmp 9f \n\t" // Begin "0: \n\t" - "movq (%%edx, %%eax), %%mm3 \n\t" - "movd (%%ecx, %%esi), %%mm0 \n\t" - "movd 1(%%ecx, %%esi), %%mm1 \n\t" + "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" + "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" + "movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "pshufw $0xFF, %%mm1, %%mm1 \n\t" @@ -1175,26 +1175,26 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil "pshufw $0xFF, %%mm0, %%mm0 \n\t" "2: \n\t" "psubw %%mm1, %%mm0 \n\t" - "movl 8(%%ebx, %%eax), %%esi \n\t" + "mov 8(%%"REG_b", %%"REG_a"), %%"REG_S"\n\t" "pmullw %%mm3, %%mm0 \n\t" "psllw $7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" - "movq %%mm0, (%%edi, %%eax) \n\t" + "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t" - "addl $8, %%eax \n\t" + "add $8, %%"REG_a" \n\t" // End "9: \n\t" // "int $3\n\t" - "leal 0b, %0 \n\t" - "leal 1b, %1 \n\t" - "leal 2b, %2 \n\t" - "decl %1 \n\t" - "decl %2 \n\t" - "subl %0, %1 \n\t" - "subl %0, %2 \n\t" - "leal 9b, %3 \n\t" - "subl %0, %3 \n\t" + "lea 0b, %0 \n\t" + "lea 1b, %1 \n\t" + "lea 2b, %2 \n\t" + "dec %1 \n\t" + "dec %2 \n\t" + "sub %0, %1 \n\t" + "sub %0, %2 \n\t" + "lea 9b, %3 \n\t" + "sub %0, %3 \n\t" :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), @@ -1205,34 +1205,34 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil "jmp 9f \n\t" // Begin "0: \n\t" - "movq (%%edx, %%eax), %%mm3 \n\t" - "movd (%%ecx, %%esi), %%mm0 \n\t" + "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" + "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" "punpcklbw %%mm7, %%mm0 \n\t" "pshufw $0xFF, %%mm0, %%mm1 \n\t" "1: \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t" "2: \n\t" "psubw %%mm1, %%mm0 \n\t" - "movl 8(%%ebx, %%eax), %%esi \n\t" + "mov 8(%%"REG_b", %%"REG_a"), %%"REG_S"\n\t" "pmullw %%mm3, %%mm0 \n\t" "psllw $7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" - "movq %%mm0, (%%edi, %%eax) \n\t" + "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t" - "addl $8, %%eax \n\t" + "add $8, %%"REG_a" \n\t" // End "9: \n\t" // "int $3\n\t" - "leal 0b, %0 \n\t" - "leal 1b, %1 \n\t" - "leal 2b, %2 \n\t" - "decl %1 \n\t" - "decl %2 \n\t" - "subl %0, %1 \n\t" - "subl %0, %2 \n\t" - "leal 9b, %3 \n\t" - "subl %0, %3 \n\t" + "lea 0b, %0 \n\t" + "lea 1b, %1 \n\t" + "lea 2b, %2 \n\t" + "dec %1 \n\t" + "dec %2 \n\t" + "sub %0, %1 \n\t" + "sub %0, %2 \n\t" + "lea 9b, %3 \n\t" + "sub %0, %3 \n\t" :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), @@ -1313,7 +1313,7 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil } filterPos[i/2]= xpos>>16; // needed to jump to the next part } -#endif // ARCH_X86 +#endif // ARCH_X86 || ARCH_X86_64 static void globalInit(){ // generating tables: @@ -1327,7 +1327,7 @@ static void globalInit(){ static SwsFunc getSwsFunc(int flags){ #ifdef RUNTIME_CPUDETECT -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) // ordered per speed fasterst first if(flags & SWS_CPU_CAPS_MMX2) return swScale_MMX2; @@ -1755,7 +1755,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int int unscaled, needsDither; int srcFormat, dstFormat; SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) if(flags & SWS_CPU_CAPS_MMX) asm volatile("emms\n\t"::: "memory"); #endif @@ -1995,7 +1995,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, srcFilter->chrH, dstFilter->chrH, c->param); -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) // can't downscale !!! if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { @@ -2136,7 +2136,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int } else { -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n"); #else if(flags & SWS_FAST_BILINEAR) |