aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/x86/swscale.c
diff options
context:
space:
mode:
authorVitor Sessak <vsessak@google.com>2014-09-17 21:10:16 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-09-18 00:03:29 +0200
commit55d11d277bf52b0c7f88f45f1d3fd336fa8c431f (patch)
tree2420ddff6313b668c5c7fecff4b3628fdfbaef4f /libswscale/x86/swscale.c
parent41d82b85ab0ee8bb2931c1f783e30c38c2fb5206 (diff)
downloadffmpeg-55d11d277bf52b0c7f88f45f1d3fd336fa8c431f.tar.gz
swscale/x86: do not expect registers to be preserved across inline ASM blocks
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale/x86/swscale.c')
-rw-r--r--libswscale/x86/swscale.c83
1 files changed, 44 insertions, 39 deletions
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index c4c0e28e53..8ce87b3e37 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -205,36 +205,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
return;
}
- if (offset) {
- __asm__ volatile("movq (%0), %%xmm3\n\t"
- "movdqa %%xmm3, %%xmm4\n\t"
- "psrlq $24, %%xmm3\n\t"
- "psllq $40, %%xmm4\n\t"
- "por %%xmm4, %%xmm3\n\t"
- :: "r"(dither)
- );
- } else {
- __asm__ volatile("movq (%0), %%xmm3\n\t"
- :: "r"(dither)
- );
- }
filterSize--;
- __asm__ volatile(
- "pxor %%xmm0, %%xmm0\n\t"
- "punpcklbw %%xmm0, %%xmm3\n\t"
- "movd %0, %%xmm1\n\t"
- "punpcklwd %%xmm1, %%xmm1\n\t"
- "punpckldq %%xmm1, %%xmm1\n\t"
- "punpcklqdq %%xmm1, %%xmm1\n\t"
- "psllw $3, %%xmm1\n\t"
- "paddw %%xmm1, %%xmm3\n\t"
- "psraw $4, %%xmm3\n\t"
- ::"m"(filterSize)
- );
- __asm__ volatile(
- "movdqa %%xmm3, %%xmm4\n\t"
- "movdqa %%xmm3, %%xmm7\n\t"
- "movl %3, %%ecx\n\t"
+#define MAIN_FUNCTION \
+ "pxor %%xmm0, %%xmm0 \n\t" \
+ "punpcklbw %%xmm0, %%xmm3 \n\t" \
+ "movd %4, %%xmm1 \n\t" \
+ "punpcklwd %%xmm1, %%xmm1 \n\t" \
+ "punpckldq %%xmm1, %%xmm1 \n\t" \
+ "punpcklqdq %%xmm1, %%xmm1 \n\t" \
+ "psllw $3, %%xmm1 \n\t" \
+ "paddw %%xmm1, %%xmm3 \n\t" \
+ "psraw $4, %%xmm3 \n\t" \
+ "movdqa %%xmm3, %%xmm4 \n\t" \
+ "movdqa %%xmm3, %%xmm7 \n\t" \
+ "movl %3, %%ecx \n\t" \
"mov %0, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t" /* FIXME Unroll? */\
@@ -252,20 +236,41 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
" jnz 1b \n\t"\
"psraw $3, %%xmm3 \n\t"\
"psraw $3, %%xmm4 \n\t"\
- "packuswb %%xmm4, %%xmm3 \n\t"
- "movntdq %%xmm3, (%1, %%"REG_c")\n\t"
+ "packuswb %%xmm4, %%xmm3 \n\t"\
+ "movntdq %%xmm3, (%1, %%"REG_c")\n\t"\
"add $16, %%"REG_c" \n\t"\
"cmp %2, %%"REG_c" \n\t"\
- "movdqa %%xmm7, %%xmm3\n\t"
- "movdqa %%xmm7, %%xmm4\n\t"
+ "movdqa %%xmm7, %%xmm3 \n\t" \
+ "movdqa %%xmm7, %%xmm4 \n\t" \
"mov %0, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
- "jb 1b \n\t"\
- :: "g" (filter),
- "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
- : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
- "%"REG_d, "%"REG_S, "%"REG_c
- );
+ "jb 1b \n\t"
+
+ if (offset) {
+ __asm__ volatile(
+ "movq %5, %%xmm3 \n\t"
+ "movdqa %%xmm3, %%xmm4 \n\t"
+ "psrlq $24, %%xmm3 \n\t"
+ "psllq $40, %%xmm4 \n\t"
+ "por %%xmm4, %%xmm3 \n\t"
+ MAIN_FUNCTION
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
+ "m"(filterSize), "m"(((uint64_t *) dither)[0])
+ : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
+ "%"REG_d, "%"REG_S, "%"REG_c
+ );
+ } else {
+ __asm__ volatile(
+ "movq %5, %%xmm3 \n\t"
+ MAIN_FUNCTION
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
+ "m"(filterSize), "m"(((uint64_t *) dither)[0])
+ : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
+ "%"REG_d, "%"REG_S, "%"REG_c
+ );
+ }
}
#endif