diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-02-15 21:08:51 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-02-15 23:33:04 +0100 |
commit | 5e947aeb5945efb34757103f32726041646f4a4d (patch) | |
tree | 776722dcd66ac2a97f12276c3e1f01a773c23242 /libswscale | |
parent | 5ad43af9a62cfd5422dc22f37dd2a2327fa75b7c (diff) | |
download | ffmpeg-5e947aeb5945efb34757103f32726041646f4a4d.tar.gz |
sws/x86: improve rounding for yuv2yuvX
This tries to compensate for the errors introduced by
the rounding of pmulhw
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/x86/swscale.c | 12 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 13 |
2 files changed, 23 insertions, 2 deletions
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 02c454e08f..2f67b1b03f 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, :: "r"(dither) ); } + filterSize--; __asm__ volatile( "pxor %%xmm0, %%xmm0\n\t" "punpcklbw %%xmm0, %%xmm3\n\t" - "psraw $4, %%xmm3\n\t" + "movd %0, %%xmm1\n\t" + "punpcklwd %%xmm1, %%xmm1\n\t" + "punpckldq %%xmm1, %%xmm1\n\t" + "punpcklqdq %%xmm1, %%xmm1\n\t" + "psllw $3, %%xmm1\n\t" + "paddw %%xmm1, %%xmm3\n\t" + "psraw $4, %%xmm3\n\t" + ::"m"(filterSize) + ); + __asm__ volatile( "movdqa %%xmm3, %%xmm4\n\t" "movdqa %%xmm3, %%xmm7\n\t" "movl %3, %%ecx\n\t" diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 62265db30f..f2567c1d8b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, const uint8_t *dither, int offset) { dither_8to16(dither, offset); - __asm__ volatile(\ + filterSize--; + __asm__ volatile( + "movd %0, %%mm1\n\t" + "punpcklwd %%mm1, %%mm1\n\t" + "punpckldq %%mm1, %%mm1\n\t" + "psllw $3, %%mm1\n\t" + "paddw %%mm1, %%mm3\n\t" + "paddw %%mm1, %%mm4\n\t" "psraw $4, %%mm3\n\t" "psraw $4, %%mm4\n\t" + ::"m"(filterSize) + ); + + __asm__ volatile(\ "movq %%mm3, %%mm6\n\t" "movq %%mm4, %%mm7\n\t" "movl %3, %%ecx\n\t" |