diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2002-05-17 01:04:14 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2002-05-17 01:04:14 +0000 |
commit | 607dce96c0225e30ae2e7f3b8de2d00b4f064805 (patch) | |
tree | 609116ed8b80a59e8c32b5632d516a51317544c3 /libavcodec/i386/dsputil_mmx_avg.h | |
parent | 59fe111e8122acc614ace5618e1a4ba7416e9875 (diff) | |
download | ffmpeg-607dce96c0225e30ae2e7f3b8de2d00b4f064805.tar.gz |
hopefully faster mmx2&3dnow MC
Originally committed as revision 506 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx_avg.h')
-rw-r--r-- | libavcodec/i386/dsputil_mmx_avg.h | 498 |
1 files changed, 264 insertions, 234 deletions
diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h index 830fe9f3b2..df773b72a0 100644 --- a/libavcodec/i386/dsputil_mmx_avg.h +++ b/libavcodec/i386/dsputil_mmx_avg.h @@ -1,6 +1,7 @@ /* * DSP utils : average functions are compiled twice for 3dnow/mmx2 * Copyright (c) 2000, 2001 Gerard Lantau. + * Copyright (c) 2002 Michael Niedermayer * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,271 +18,300 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * MMX optimization by Nick Kurshev <nickols_k@mail.ru> + * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> */ static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - int dh, hh; - UINT8 *p; - const UINT8 *pix; - p = block; - pix = pixels; - hh=h>>2; - dh=h&3; - while(hh--) { __asm __volatile( - "movq (%1), %%mm0\n\t" - "movq 1(%1), %%mm1\n\t" - "movq (%1, %2), %%mm2\n\t" - "movq 1(%1, %2), %%mm3\n\t" - "movq (%1, %2, 2), %%mm4\n\t" - "movq 1(%1, %2, 2), %%mm5\n\t" - "movq (%1, %3), %%mm6\n\t" - "movq 1(%1, %3), %%mm7\n\t" - PAVGB" %%mm1, %%mm0\n\t" - PAVGB" %%mm3, %%mm2\n\t" - PAVGB" %%mm5, %%mm4\n\t" - PAVGB" %%mm7, %%mm6\n\t" - "movq %%mm0, (%0)\n\t" - "movq %%mm2, (%0, %2)\n\t" - "movq %%mm4, (%0, %2, 2)\n\t" - "movq %%mm6, (%0, %3)\n\t" - ::"r"(p), "r"(pix), "r" (line_size), "r" (line_size*3) - :"memory"); - pix += line_size*4; p += line_size*4; - } - while(dh--) { + "xorl %%eax, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r" (block), "r" (block+line_size), + "r"(line_size<<1) + :"%eax", "memory"); +} + +static void DEF(put_no_rnd_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ __asm __volatile( - "movq %1, %%mm0\n\t" - "movq 1%1, %%mm1\n\t" - PAVGB" %%mm1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"=m"(*p) - :"m"(*pix) - :"memory"); - pix += line_size; p += line_size; - } + "xorl %%eax, %%eax \n\t" + "movq "MANGLE(mm_bone)", %%mm7 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + "psubusb %%mm7, %%mm0 \n\t" + "psubusb %%mm7, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + "psubusb %%mm7, %%mm0 \n\t" + "psubusb %%mm7, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r" (block), "r" (block+line_size), + "r"(line_size<<1) + :"%eax", "memory"); } static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - int dh, hh; - UINT8 *p; - const UINT8 *pix; - p = block; - pix = pixels; - - hh=h>>1; - dh=h&1; - while(hh--) { __asm __volatile( - "movq %2, %%mm0\n\t" - "movq %3, %%mm1\n\t" - "movq %4, %%mm2\n\t" - PAVGB" %%mm1, %%mm0\n\t" - PAVGB" %%mm2, %%mm1\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm1, %1\n\t" - :"=m"(*p), "=m"(*(p+line_size)) - :"m"(*pix), "m"(*(pix+line_size)), - "m"(*(pix+line_size*2)) - :"memory"); - pix += line_size*2; - p += line_size*2; - } - if(dh) { + "xorl %%eax, %%eax \n\t" + "movq (%1), %%mm0 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq %%mm0, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm0 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r"(pixels+line_size*2), "r" (block), + "r" (block+line_size), "g"(line_size<<1) + :"%eax", "memory"); +} + +static void DEF(put_no_rnd_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ __asm __volatile( - "movq %1, %%mm0\n\t" - "movq %2, %%mm1\n\t" - PAVGB" %%mm1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"=m"(*p) - :"m"(*pix), - "m"(*(pix+line_size)) - :"memory"); - } + "movq "MANGLE(mm_bone)", %%mm7 \n\t" + "xorl %%eax, %%eax \n\t" + "movq (%1), %%mm0 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + "psubusb %%mm7, %%mm1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq %%mm0, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm0 \n\t" + "psubusb %%mm7, %%mm1 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r"(pixels+line_size*2), "r" (block), + "r" (block+line_size), "g"(line_size<<1) + :"%eax", "memory"); } static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - int dh, hh; - UINT8 *p; - const UINT8 *pix; - p = block; - pix = pixels; - hh=h>>2; - dh=h&3; - while(hh--) { __asm __volatile( - "movq (%0), %%mm0\n\t" - "movq (%1), %%mm1\n\t" - "movq (%0, %2), %%mm2\n\t" - "movq (%1, %2), %%mm3\n\t" - "movq (%0, %2, 2), %%mm4\n\t" - "movq (%1, %2, 2), %%mm5\n\t" - "movq (%0, %3), %%mm6\n\t" - "movq (%1, %3), %%mm7\n\t" - PAVGB" %%mm1, %%mm0\n\t" - PAVGB" %%mm3, %%mm2\n\t" - PAVGB" %%mm5, %%mm4\n\t" - PAVGB" %%mm7, %%mm6\n\t" - "movq %%mm0, (%0)\n\t" - "movq %%mm2, (%0, %2)\n\t" - "movq %%mm4, (%0, %2, 2)\n\t" - "movq %%mm6, (%0, %3)\n\t" - ::"r"(p), "r"(pix), "r" (line_size), "r" (line_size*3) - :"memory"); - pix += line_size*4; p += line_size*4; - } - while(dh--) { - __asm __volatile( - "movq %0, %%mm0\n\t" - "movq %1, %%mm1\n\t" - PAVGB" %%mm1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"+m"(*p) - :"m"(*pix) - :"memory"); - pix += line_size; p += line_size; - } + "xorl %%eax, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq (%3, %%eax), %%mm3 \n\t" + "movq (%4, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq (%3, %%eax), %%mm3 \n\t" + "movq (%4, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r" (block), "r" (block+line_size), + "r"(line_size<<1) + :"%eax", "memory"); } -static void DEF(avg_pixels_x2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +static void DEF(avg_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - int dh, hh; - UINT8 *p; - const UINT8 *pix; - p = block; - pix = pixels; - hh=h>>1; - dh=h&1; - while(hh--) { __asm __volatile( - "movq %2, %%mm2\n\t" - "movq 1%2, %%mm3\n\t" - "movq %3, %%mm4\n\t" - "movq 1%3, %%mm5\n\t" - "movq %0, %%mm0\n\t" - "movq %1, %%mm1\n\t" - PAVGB" %%mm3, %%mm2\n\t" - PAVGB" %%mm2, %%mm0\n\t" - PAVGB" %%mm5, %%mm4\n\t" - PAVGB" %%mm4, %%mm1\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm1, %1\n\t" - :"+m"(*p), "+m"(*(p+line_size)) - :"m"(*pix), "m"(*(pix+line_size)) - :"memory"); - pix += line_size*2; - p += line_size*2; - } - if(dh) { - __asm __volatile( - "movq %1, %%mm1\n\t" - "movq 1%1, %%mm2\n\t" - "movq %0, %%mm0\n\t" - PAVGB" %%mm2, %%mm1\n\t" - PAVGB" %%mm1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"+m"(*p) - :"m"(*pix) - :"memory"); - } + "xorl %%eax, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq (%3, %%eax), %%mm3 \n\t" + "movq (%4, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "movq (%1, %%eax), %%mm0 \n\t" + "movq 1(%1, %%eax), %%mm1 \n\t" + "movq (%2, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq (%3, %%eax), %%mm3 \n\t" + "movq (%4, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm2 \n\t" + "movq %%mm0, (%3, %%eax) \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "addl %5, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r" (block), "r" (block+line_size), + "r"(line_size<<1) + :"%eax", "memory"); } -static void DEF(avg_pixels_y2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +static void DEF(avg_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - int dh, hh; - UINT8 *p; - const UINT8 *pix; - p = block; - pix = pixels; - hh=h>>1; - dh=h&1; - while(hh--) { __asm __volatile( - "movq %2, %%mm2\n\t" - "movq %3, %%mm3\n\t" - "movq %3, %%mm4\n\t" - "movq %4, %%mm5\n\t" - "movq %0, %%mm0\n\t" - "movq %1, %%mm1\n\t" - PAVGB" %%mm3, %%mm2\n\t" - PAVGB" %%mm2, %%mm0\n\t" - PAVGB" %%mm5, %%mm4\n\t" - PAVGB" %%mm4, %%mm1\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm1, %1\n\t" - :"+m"(*p), "+m"(*(p+line_size)) - :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)) - :"memory"); - pix += line_size*2; - p += line_size*2; - } - if(dh) { - __asm __volatile( - "movq %1, %%mm1\n\t" - "movq %2, %%mm2\n\t" - "movq %0, %%mm0\n\t" - PAVGB" %%mm2, %%mm1\n\t" - PAVGB" %%mm1, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"+m"(*p) - :"m"(*pix), "m"(*(pix+line_size)) - :"memory"); - } + "xorl %%eax, %%eax \n\t" + "movq (%1), %%mm0 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq (%4, %%eax), %%mm3 \n\t" + "movq (%5, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm1 \n\t" + "movq %%mm0, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm0 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "movq (%4, %%eax), %%mm3 \n\t" + "movq (%5, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + PAVGB" %%mm4, %%mm1 \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r"(pixels+line_size*2), "r" (block), + "r" (block+line_size), "g"(line_size<<1) + :"%eax", "memory"); } -static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter +static void DEF(avg_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) { - UINT8 *p; - const UINT8 *pix; - p = block; - pix = pixels; - __asm __volatile( - "pxor %%mm7, %%mm7\n\t" - "movq %0, %%mm6\n\t" - ::"m"(mm_wtwo)); - do { __asm __volatile( - "movq %1, %%mm0\n\t" - "movq %2, %%mm1\n\t" - "movq 1%1, %%mm4\n\t" - "movq 1%2, %%mm5\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - "punpcklbw %%mm7, %%mm0\n\t" - "punpcklbw %%mm7, %%mm1\n\t" - "punpckhbw %%mm7, %%mm2\n\t" - "punpckhbw %%mm7, %%mm3\n\t" - "paddusw %%mm1, %%mm0\n\t" - "paddusw %%mm3, %%mm2\n\t" - "movq %%mm4, %%mm1\n\t" - "movq %%mm5, %%mm3\n\t" - "punpcklbw %%mm7, %%mm4\n\t" - "punpcklbw %%mm7, %%mm5\n\t" - "punpckhbw %%mm7, %%mm1\n\t" - "punpckhbw %%mm7, %%mm3\n\t" - "paddusw %%mm5, %%mm4\n\t" - "paddusw %%mm3, %%mm1\n\t" - "paddusw %%mm6, %%mm4\n\t" - "paddusw %%mm6, %%mm1\n\t" - "paddusw %%mm4, %%mm0\n\t" - "paddusw %%mm1, %%mm2\n\t" - "psrlw $2, %%mm0\n\t" - "psrlw $2, %%mm2\n\t" - "packuswb %%mm2, %%mm0\n\t" - PAVGB" %0, %%mm0\n\t" - "movq %%mm0, %0\n\t" - :"+m"(*p) - :"m"(*pix), - "m"(*(pix+line_size)) - :"memory"); - pix += line_size; - p += line_size ; - } while(--h); + "movq "MANGLE(mm_bone)", %%mm7 \n\t" + "xorl %%eax, %%eax \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm2 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + "movq 1(%3, %%eax), %%mm4 \n\t" + "psubusb %%mm7, %%mm2 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + PAVGB" %%mm4, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq (%4, %%eax), %%mm3 \n\t" + "movq (%5, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm1 \n\t" + "movq %%mm0, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "movq (%2, %%eax), %%mm1 \n\t" + "movq (%3, %%eax), %%mm0 \n\t" + "movq 1(%2, %%eax), %%mm3 \n\t" + "movq 1(%3, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + PAVGB" %%mm4, %%mm0 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "movq (%4, %%eax), %%mm3 \n\t" + "movq (%5, %%eax), %%mm4 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + PAVGB" %%mm4, %%mm1 \n\t" + "movq %%mm2, (%4, %%eax) \n\t" + "movq %%mm1, (%5, %%eax) \n\t" + "addl %6, %%eax \n\t" + "subl $4, %0 \n\t" + " jnz 1b \n\t" + :"+g"(h) + :"r"(pixels), "r"(pixels+line_size), "r"(pixels+line_size*2), "r" (block), + "r" (block+line_size), "g"(line_size<<1) + :"%eax", "memory"); } +//Note: the sub* functions are no used + static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) { DCTELEM *p; |