diff options
author | Oskar Arvidsson <oskar@irock.se> | 2011-03-29 17:48:51 +0200 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-05-10 07:24:25 -0400 |
commit | 325eefa2ca14b487120b164bc232e6c650fb20f3 (patch) | |
tree | 0a1fb6fecd42dbb9cd5b8c3c11737b4c353d4456 | |
parent | 15fb393be67c73f58d96c2c1bb187d037723835a (diff) | |
download | ffmpeg-325eefa2ca14b487120b164bc232e6c650fb20f3.tar.gz |
Move some functions in dsputil.c into a new file dsputil_template.c.
The functions moved are used when decoding h264.
Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
-rw-r--r-- | libavcodec/dsputil.c | 1228 | ||||
-rw-r--r-- | libavcodec/dsputil_template.c | 1257 |
2 files changed, 1259 insertions, 1226 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 32472b1a74..078e172d00 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -43,6 +43,8 @@ uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; +#include "dsputil_template.c" + // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size #define pb_7f (~0UL/255 * 0x7f) #define pb_80 (~0UL/255 * 0x80) @@ -296,110 +298,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) return s; } -/* draw the edges of width 'w' of an image of size width, height */ -//FIXME check that this is ok for mpeg4 interlaced -static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - /* left and right */ - ptr = buf; - for(i=0;i<height;i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width-1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for(i = 0; i < w; i++) - memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top - if (sides & EDGE_BOTTOM) - for (i = 0; i < w; i++) - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom -} - -/** - * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. - * @param buf destination buffer - * @param src source buffer - * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers - * @param block_w width of block - * @param block_h height of block - * @param src_x x coordinate of the top left sample of the block in the source buffer - * @param src_y y coordinate of the top left sample of the block in the source buffer - * @param w width of the source buffer - * @param h height of the source buffer - */ -void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, - int src_x, int src_y, int w, int h){ - int x, y; - int start_y, start_x, end_y, end_x; - - if(src_y>= h){ - src+= (h-1-src_y)*linesize; - src_y=h-1; - }else if(src_y<=-block_h){ - src+= (1-block_h-src_y)*linesize; - src_y=1-block_h; - } - if(src_x>= w){ - src+= (w-1-src_x); - src_x=w-1; - }else if(src_x<=-block_w){ - src+= (1-block_w-src_x); - src_x=1-block_w; - } - - start_y= FFMAX(0, -src_y); - start_x= FFMAX(0, -src_x); - end_y= FFMIN(block_h, h-src_y); - end_x= FFMIN(block_w, w-src_x); - assert(start_y < end_y && block_h); - assert(start_x < end_x && block_w); - - w = end_x - start_x; - src += start_y*linesize + start_x; - buf += start_x; - - //top - for(y=0; y<start_y; y++){ - memcpy(buf, src, w); - buf += linesize; - } - - // copy existing part - for(; y<end_y; y++){ - memcpy(buf, src, w); - src += linesize; - buf += linesize; - } - - //bottom - src -= linesize; - for(; y<block_h; y++){ - memcpy(buf, src, w); - buf += linesize; - } - - buf -= block_h * linesize + start_x; - while (block_h--){ - //left - for(x=0; x<start_x; x++){ - buf[x] = buf[start_x]; - } - - //right - for(x=end_x; x<block_w; x++){ - buf[x] = buf[end_x - 1]; - } - buf += linesize; - } -} - static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -591,36 +489,6 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels } } -static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) -{ - int i; - for(i=0;i<8;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels[4] += block[4]; - pixels[5] += block[5]; - pixels[6] += block[6]; - pixels[7] += block[7]; - pixels += line_size; - block += 8; - } -} - -static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) -{ - int i; - for(i=0;i<4;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels += line_size; - block += 4; - } -} - static int sum_abs_dctelem_c(DCTELEM *block) { int sum=0, i; @@ -665,539 +533,9 @@ static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align } } -#if 0 - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint64_t*)block), AV_RN64(pixels));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+line_size);\ - OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+line_size);\ - OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint64_t a= AV_RN64(pixels );\ - uint64_t b= AV_RN64(pixels+1);\ - l1= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL);\ - h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint64_t a= AV_RN64(pixels );\ - uint64_t b= AV_RN64(pixels+1);\ - l1= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL);\ - h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) - -#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) -#else // 64 bit variant - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ - OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels8_c(block, pixels, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN32(&src1[i*src_stride1 ]);\ - b= AV_RN32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN32(&src1[i*src_stride1 ]);\ - b= AV_RN32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN32(&src1[i*src_stride1 ]);\ - b= AV_RN32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN16(&src1[i*src_stride1 ]);\ - b= AV_RN16(&src2[i*src_stride2 ]);\ - OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ - OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ - OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a, b, c, d, l0, l1, h0, h1;\ - a= AV_RN32(&src1[i*src_stride1]);\ - b= AV_RN32(&src2[i*src_stride2]);\ - c= AV_RN32(&src3[i*src_stride3]);\ - d= AV_RN32(&src4[i*src_stride4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - c= AV_RN32(&src3[i*src_stride3+4]);\ - d= AV_RN32(&src4[i*src_stride4+4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a, b, c, d, l0, l1, h0, h1;\ - a= AV_RN32(&src1[i*src_stride1]);\ - b= AV_RN32(&src2[i*src_stride2]);\ - c= AV_RN32(&src3[i*src_stride3]);\ - d= AV_RN32(&src4[i*src_stride4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - c= AV_RN32(&src3[i*src_stride3+4]);\ - d= AV_RN32(&src4[i*src_stride4+4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - }\ -}\ -static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ - OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ -}\ -static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ - OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i, a0, b0, a1, b1;\ - a0= pixels[0];\ - b0= pixels[1] + 2;\ - a0 += b0;\ - b0 += pixels[2];\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - a1= pixels[0];\ - b1= pixels[1];\ - a1 += b1;\ - b1 += pixels[2];\ -\ - block[0]= (a1+a0)>>2; /* FIXME non put */\ - block[1]= (b1+b0)>>2;\ -\ - pixels+=line_size;\ - block +=line_size;\ -\ - a0= pixels[0];\ - b0= pixels[1] + 2;\ - a0 += b0;\ - b0 += pixels[2];\ -\ - block[0]= (a1+a0)>>2;\ - block[1]= (b1+b0)>>2;\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint32_t a= AV_RN32(pixels );\ - uint32_t b= AV_RN32(pixels+1);\ - l1= (a&0x03030303UL)\ - + (b&0x03030303UL);\ - h1= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int j;\ - for(j=0; j<2; j++){\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint32_t a= AV_RN32(pixels );\ - uint32_t b= AV_RN32(pixels+1);\ - l1= (a&0x03030303UL)\ - + (b&0x03030303UL);\ - h1= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ - pixels+=4-line_size*(h+1);\ - block +=4-line_size*h;\ - }\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int j;\ - for(j=0; j<2; j++){\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint32_t a= AV_RN32(pixels );\ - uint32_t b= AV_RN32(pixels+1);\ - l1= (a&0x03030303UL)\ - + (b&0x03030303UL);\ - h1= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ - pixels+=4-line_size*(h+1);\ - block +=4-line_size*h;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ -av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ - -#define op_avg(a, b) a = rnd_avg32(a, b) -#endif -#define op_put(a, b) a = b - -PIXOP2(avg, op_avg) -PIXOP2(put, op_put) -#undef op_avg -#undef op_put - -#define put_no_rnd_pixels8_c put_pixels8_c -#define put_no_rnd_pixels16_c put_pixels16_c - #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); -} - -static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); -} - static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) { const int A=(16-x16)*(16-y16); @@ -1494,115 +832,6 @@ static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} #endif -#define H264_CHROMA_MC(OPNAME, OP)\ -static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - dst+= stride;\ - src+= stride;\ - }\ - }else{\ - const int E= B+C;\ - const int step= C ? stride : 1;\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + E*src[step+0]));\ - OP(dst[1], (A*src[1] + E*src[step+1]));\ - dst+= stride;\ - src+= stride;\ - }\ - }\ -}\ -\ -static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ - OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ - dst+= stride;\ - src+= stride;\ - }\ - }else{\ - const int E= B+C;\ - const int step= C ? stride : 1;\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + E*src[step+0]));\ - OP(dst[1], (A*src[1] + E*src[step+1]));\ - OP(dst[2], (A*src[2] + E*src[step+2]));\ - OP(dst[3], (A*src[3] + E*src[step+3]));\ - dst+= stride;\ - src+= stride;\ - }\ - }\ -}\ -\ -static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ - OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ - OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ - OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ - OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ - OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ - dst+= stride;\ - src+= stride;\ - }\ - }else{\ - const int E= B+C;\ - const int step= C ? stride : 1;\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + E*src[step+0]));\ - OP(dst[1], (A*src[1] + E*src[step+1]));\ - OP(dst[2], (A*src[2] + E*src[step+2]));\ - OP(dst[3], (A*src[3] + E*src[step+3]));\ - OP(dst[4], (A*src[4] + E*src[step+4]));\ - OP(dst[5], (A*src[5] + E*src[step+5]));\ - OP(dst[6], (A*src[6] + E*src[step+6]));\ - OP(dst[7], (A*src[7] + E*src[step+7]));\ - dst+= stride;\ - src+= stride;\ - }\ - }\ -} - -#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) -#define op_put(a, b) a = (((b) + 32)>>6) - -H264_CHROMA_MC(put_ , op_put) -H264_CHROMA_MC(avg_ , op_avg) -#undef op_avg -#undef op_put - #define QPEL_MC(r, OPNAME, RND, OP) \ static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ @@ -2100,433 +1329,6 @@ QPEL_MC(0, avg_ , _ , op_avg) #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c -#define H264_LOWPASS(OPNAME, OP, OP2) \ -static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ - OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ - dst+=dstStride;\ - src+=srcStride;\ - }\ -}\ -\ -static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int w=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<w; i++)\ - {\ - const int srcB= src[-2*srcStride];\ - const int srcA= src[-1*srcStride];\ - const int src0= src[0 *srcStride];\ - const int src1= src[1 *srcStride];\ - const int src2= src[2 *srcStride];\ - const int src3= src[3 *srcStride];\ - const int src4= src[4 *srcStride];\ - OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ - OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ - dst++;\ - src++;\ - }\ -}\ -\ -static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - const int h=2;\ - const int w=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - src -= 2*srcStride;\ - for(i=0; i<h+5; i++)\ - {\ - tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ - tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ - tmp+=tmpStride;\ - src+=srcStride;\ - }\ - tmp -= tmpStride*(h+5-2);\ - for(i=0; i<w; i++)\ - {\ - const int tmpB= tmp[-2*tmpStride];\ - const int tmpA= tmp[-1*tmpStride];\ - const int tmp0= tmp[0 *tmpStride];\ - const int tmp1= tmp[1 *tmpStride];\ - const int tmp2= tmp[2 *tmpStride];\ - const int tmp3= tmp[3 *tmpStride];\ - const int tmp4= tmp[4 *tmpStride];\ - OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ - OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ - dst++;\ - tmp++;\ - }\ -}\ -static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=4;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ - OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ - OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ - OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ - dst+=dstStride;\ - src+=srcStride;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int w=4;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<w; i++)\ - {\ - const int srcB= src[-2*srcStride];\ - const int srcA= src[-1*srcStride];\ - const int src0= src[0 *srcStride];\ - const int src1= src[1 *srcStride];\ - const int src2= src[2 *srcStride];\ - const int src3= src[3 *srcStride];\ - const int src4= src[4 *srcStride];\ - const int src5= src[5 *srcStride];\ - const int src6= src[6 *srcStride];\ - OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ - OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ - OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ - OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ - dst++;\ - src++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - const int h=4;\ - const int w=4;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - src -= 2*srcStride;\ - for(i=0; i<h+5; i++)\ - {\ - tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ - tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ - tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ - tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ - tmp+=tmpStride;\ - src+=srcStride;\ - }\ - tmp -= tmpStride*(h+5-2);\ - for(i=0; i<w; i++)\ - {\ - const int tmpB= tmp[-2*tmpStride];\ - const int tmpA= tmp[-1*tmpStride];\ - const int tmp0= tmp[0 *tmpStride];\ - const int tmp1= tmp[1 *tmpStride];\ - const int tmp2= tmp[2 *tmpStride];\ - const int tmp3= tmp[3 *tmpStride];\ - const int tmp4= tmp[4 *tmpStride];\ - const int tmp5= tmp[5 *tmpStride];\ - const int tmp6= tmp[6 *tmpStride];\ - OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ - OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ - OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ - OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ - dst++;\ - tmp++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=8;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ - OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ - OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ - OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ - OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ - OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ - OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ - OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ - dst+=dstStride;\ - src+=srcStride;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int w=8;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<w; i++)\ - {\ - const int srcB= src[-2*srcStride];\ - const int srcA= src[-1*srcStride];\ - const int src0= src[0 *srcStride];\ - const int src1= src[1 *srcStride];\ - const int src2= src[2 *srcStride];\ - const int src3= src[3 *srcStride];\ - const int src4= src[4 *srcStride];\ - const int src5= src[5 *srcStride];\ - const int src6= src[6 *srcStride];\ - const int src7= src[7 *srcStride];\ - const int src8= src[8 *srcStride];\ - const int src9= src[9 *srcStride];\ - const int src10=src[10*srcStride];\ - OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ - OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ - OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ - OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ - OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ - OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ - OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ - OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ - dst++;\ - src++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - const int h=8;\ - const int w=8;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - src -= 2*srcStride;\ - for(i=0; i<h+5; i++)\ - {\ - tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ - tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ - tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ - tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ - tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ - tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ - tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ - tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ - tmp+=tmpStride;\ - src+=srcStride;\ - }\ - tmp -= tmpStride*(h+5-2);\ - for(i=0; i<w; i++)\ - {\ - const int tmpB= tmp[-2*tmpStride];\ - const int tmpA= tmp[-1*tmpStride];\ - const int tmp0= tmp[0 *tmpStride];\ - const int tmp1= tmp[1 *tmpStride];\ - const int tmp2= tmp[2 *tmpStride];\ - const int tmp3= tmp[3 *tmpStride];\ - const int tmp4= tmp[4 *tmpStride];\ - const int tmp5= tmp[5 *tmpStride];\ - const int tmp6= tmp[6 *tmpStride];\ - const int tmp7= tmp[7 *tmpStride];\ - const int tmp8= tmp[8 *tmpStride];\ - const int tmp9= tmp[9 *tmpStride];\ - const int tmp10=tmp[10*tmpStride];\ - OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ - OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ - OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ - OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ - OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ - OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ - OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ - OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ - dst++;\ - tmp++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ -}\ -\ -static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ -}\ -\ -static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ - OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ - OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ -}\ - -#define H264_MC(OPNAME, SIZE) \ -static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t half[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t half[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ - int16_t tmp[SIZE*(SIZE+5)];\ - OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfV[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfV[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ - -#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) -//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) -#define op_put(a, b) a = cm[((b) + 16)>>5] -#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) -#define op2_put(a, b) a = cm[((b) + 512)>>10] - -H264_LOWPASS(put_ , op_put, op2_put) -H264_LOWPASS(avg_ , op_avg, op2_avg) -H264_MC(put_, 2) -H264_MC(put_, 4) -H264_MC(put_, 8) -H264_MC(put_, 16) -H264_MC(avg_, 4) -H264_MC(avg_, 8) -H264_MC(avg_, 16) - -#undef op_avg -#undef op_put -#undef op2_avg -#undef op2_put - -#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c -#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c -#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c -#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c - static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i; @@ -2545,19 +1347,6 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int } } -void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { - put_pixels8_c(dst, src, stride, 8); -} -void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { - avg_pixels8_c(dst, src, stride, 8); -} -void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { - put_pixels16_c(dst, src, stride, 16); -} -void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { - avg_pixels16_c(dst, src, stride, 16); -} - #if CONFIG_RV40_DECODER static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ put_pixels16_xy2_c(dst, src, stride, 16); @@ -3115,19 +1904,6 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ } } -static void clear_block_c(DCTELEM *block) -{ - memset(block, 0, sizeof(DCTELEM)*64); -} - -/** - * memset(blocks, 0, sizeof(DCTELEM)*6*64) - */ -static void clear_blocks_c(DCTELEM *blocks) -{ - memset(blocks, 0, sizeof(DCTELEM)*6*64); -} - static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ long i; for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c new file mode 100644 index 0000000000..866817ce2e --- /dev/null +++ b/libavcodec/dsputil_template.c @@ -0,0 +1,1257 @@ +/* + * DSP utils + * Copyright (c) 2000, 2001 Fabrice Bellard + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * DSP utils + */ + +#include "dsputil.h" + +/* draw the edges of width 'w' of an image of size width, height */ +//FIXME check that this is ok for mpeg4 interlaced +static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides) +{ + uint8_t *ptr, *last_line; + int i; + + /* left and right */ + ptr = buf; + for(i=0;i<height;i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width-1], w); + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for(i = 0; i < w; i++) + memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top + if (sides & EDGE_BOTTOM) + for (i = 0; i < w; i++) + memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom +} + +/** + * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. + * @param buf destination buffer + * @param src source buffer + * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers + * @param block_w width of block + * @param block_h height of block + * @param src_x x coordinate of the top left sample of the block in the source buffer + * @param src_y y coordinate of the top left sample of the block in the source buffer + * @param w width of the source buffer + * @param h height of the source buffer + */ +void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, + int src_x, int src_y, int w, int h){ + int x, y; + int start_y, start_x, end_y, end_x; + + if(src_y>= h){ + src+= (h-1-src_y)*linesize; + src_y=h-1; + }else if(src_y<=-block_h){ + src+= (1-block_h-src_y)*linesize; + src_y=1-block_h; + } + if(src_x>= w){ + src+= (w-1-src_x); + src_x=w-1; + }else if(src_x<=-block_w){ + src+= (1-block_w-src_x); + src_x=1-block_w; + } + + start_y= FFMAX(0, -src_y); + start_x= FFMAX(0, -src_x); + end_y= FFMIN(block_h, h-src_y); + end_x= FFMIN(block_w, w-src_x); + assert(start_y < end_y && block_h); + assert(start_x < end_x && block_w); + + w = end_x - start_x; + src += start_y*linesize + start_x; + buf += start_x; + + //top + for(y=0; y<start_y; y++){ + memcpy(buf, src, w); + buf += linesize; + } + + // copy existing part + for(; y<end_y; y++){ + memcpy(buf, src, w); + src += linesize; + buf += linesize; + } + + //bottom + src -= linesize; + for(; y<block_h; y++){ + memcpy(buf, src, w); + buf += linesize; + } + + buf -= block_h * linesize + start_x; + while (block_h--){ + //left + for(x=0; x<start_x; x++){ + buf[x] = buf[start_x]; + } + + //right + for(x=end_x; x<block_w; x++){ + buf[x] = buf[end_x - 1]; + } + buf += linesize; + } +} + +static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) +{ + int i; + for(i=0;i<8;i++) { + pixels[0] += block[0]; + pixels[1] += block[1]; + pixels[2] += block[2]; + pixels[3] += block[3]; + pixels[4] += block[4]; + pixels[5] += block[5]; + pixels[6] += block[6]; + pixels[7] += block[7]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) +{ + int i; + for(i=0;i<4;i++) { + pixels[0] += block[0]; + pixels[1] += block[1]; + pixels[2] += block[2]; + pixels[3] += block[3]; + pixels += line_size; + block += 4; + } +} + +#if 0 + +#define PIXOP2(OPNAME, OP) \ +static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint64_t*)block), AV_RN64(pixels));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ + OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ + OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+line_size);\ + OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i<h; i++){\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+line_size);\ + OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint64_t a= AV_RN64(pixels );\ + uint64_t b= AV_RN64(pixels+1);\ + l1= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL);\ + h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN64(pixels );\ + b= AV_RN64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= AV_RN64(pixels );\ + const uint64_t b= AV_RN64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint64_t a= AV_RN64(pixels );\ + uint64_t b= AV_RN64(pixels+1);\ + l1= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL);\ + h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN64(pixels );\ + b= AV_RN64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) + +#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) +#else // 64 bit variant + +#define PIXOP2(OPNAME, OP) \ +static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ + OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels8_c(block, pixels, line_size, h);\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a,b;\ + a= AV_RN32(&src1[i*src_stride1 ]);\ + b= AV_RN32(&src2[i*src_stride2 ]);\ + OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a,b;\ + a= AV_RN32(&src1[i*src_stride1 ]);\ + b= AV_RN32(&src2[i*src_stride2 ]);\ + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a,b;\ + a= AV_RN32(&src1[i*src_stride1 ]);\ + b= AV_RN32(&src2[i*src_stride2 ]);\ + OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a,b;\ + a= AV_RN16(&src1[i*src_stride1 ]);\ + b= AV_RN16(&src2[i*src_stride2 ]);\ + OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ + OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ + OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a, b, c, d, l0, l1, h0, h1;\ + a= AV_RN32(&src1[i*src_stride1]);\ + b= AV_RN32(&src2[i*src_stride2]);\ + c= AV_RN32(&src3[i*src_stride3]);\ + d= AV_RN32(&src4[i*src_stride4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + c= AV_RN32(&src3[i*src_stride3+4]);\ + d= AV_RN32(&src4[i*src_stride4+4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a, b, c, d, l0, l1, h0, h1;\ + a= AV_RN32(&src1[i*src_stride1]);\ + b= AV_RN32(&src2[i*src_stride2]);\ + c= AV_RN32(&src3[i*src_stride3]);\ + d= AV_RN32(&src4[i*src_stride4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + a= AV_RN32(&src1[i*src_stride1+4]);\ + b= AV_RN32(&src2[i*src_stride2+4]);\ + c= AV_RN32(&src3[i*src_stride3+4]);\ + d= AV_RN32(&src4[i*src_stride4+4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + }\ +}\ +static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ + OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ +}\ +static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ + OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i, a0, b0, a1, b1;\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + a1= pixels[0];\ + b1= pixels[1];\ + a1 += b1;\ + b1 += pixels[2];\ +\ + block[0]= (a1+a0)>>2; /* FIXME non put */\ + block[1]= (b1+b0)>>2;\ +\ + pixels+=line_size;\ + block +=line_size;\ +\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + block[0]= (a1+a0)>>2;\ + block[1]= (b1+b0)>>2;\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint32_t a= AV_RN32(pixels );\ + uint32_t b= AV_RN32(pixels+1);\ + l1= (a&0x03030303UL)\ + + (b&0x03030303UL);\ + h1= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint32_t a= AV_RN32(pixels );\ + uint32_t b= AV_RN32(pixels+1);\ + l1= (a&0x03030303UL)\ + + (b&0x03030303UL);\ + h1= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= AV_RN32(pixels );\ + const uint32_t b= AV_RN32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i<h; i+=2){\ + uint32_t a= AV_RN32(pixels );\ + uint32_t b= AV_RN32(pixels+1);\ + l1= (a&0x03030303UL)\ + + (b&0x03030303UL);\ + h1= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= AV_RN32(pixels );\ + b= AV_RN32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ +av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ + +#define op_avg(a, b) a = rnd_avg32(a, b) +#endif + +#define op_put(a, b) a = b + +PIXOP2(avg, op_avg) +PIXOP2(put, op_put) +#undef op_avg +#undef op_put + +#define put_no_rnd_pixels8_c put_pixels8_c +#define put_no_rnd_pixels16_c put_pixels16_c + +static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ + put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); +} + +static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ + put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); +} + +#define H264_CHROMA_MC(OPNAME, OP)\ +static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + if(D){\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + dst+= stride;\ + src+= stride;\ + }\ + }else{\ + const int E= B+C;\ + const int step= C ? stride : 1;\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + E*src[step+0]));\ + OP(dst[1], (A*src[1] + E*src[step+1]));\ + dst+= stride;\ + src+= stride;\ + }\ + }\ +}\ +\ +static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + if(D){\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ + dst+= stride;\ + src+= stride;\ + }\ + }else{\ + const int E= B+C;\ + const int step= C ? stride : 1;\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + E*src[step+0]));\ + OP(dst[1], (A*src[1] + E*src[step+1]));\ + OP(dst[2], (A*src[2] + E*src[step+2]));\ + OP(dst[3], (A*src[3] + E*src[step+3]));\ + dst+= stride;\ + src+= stride;\ + }\ + }\ +}\ +\ +static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + if(D){\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ + OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ + OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ + OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ + OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ + dst+= stride;\ + src+= stride;\ + }\ + }else{\ + const int E= B+C;\ + const int step= C ? stride : 1;\ + for(i=0; i<h; i++){\ + OP(dst[0], (A*src[0] + E*src[step+0]));\ + OP(dst[1], (A*src[1] + E*src[step+1]));\ + OP(dst[2], (A*src[2] + E*src[step+2]));\ + OP(dst[3], (A*src[3] + E*src[step+3]));\ + OP(dst[4], (A*src[4] + E*src[step+4]));\ + OP(dst[5], (A*src[5] + E*src[step+5]));\ + OP(dst[6], (A*src[6] + E*src[step+6]));\ + OP(dst[7], (A*src[7] + E*src[step+7]));\ + dst+= stride;\ + src+= stride;\ + }\ + }\ +} + +#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) +#define op_put(a, b) a = (((b) + 32)>>6) + +H264_CHROMA_MC(put_ , op_put) +H264_CHROMA_MC(avg_ , op_avg) +#undef op_avg +#undef op_put + +#define H264_LOWPASS(OPNAME, OP, OP2) \ +static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int h=2;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ + dst+=dstStride;\ + src+=srcStride;\ + }\ +}\ +\ +static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int w=2;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<w; i++)\ + {\ + const int srcB= src[-2*srcStride];\ + const int srcA= src[-1*srcStride];\ + const int src0= src[0 *srcStride];\ + const int src1= src[1 *srcStride];\ + const int src2= src[2 *srcStride];\ + const int src3= src[3 *srcStride];\ + const int src4= src[4 *srcStride];\ + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ + dst++;\ + src++;\ + }\ +}\ +\ +static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + const int h=2;\ + const int w=2;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + src -= 2*srcStride;\ + for(i=0; i<h+5; i++)\ + {\ + tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ + tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ + tmp+=tmpStride;\ + src+=srcStride;\ + }\ + tmp -= tmpStride*(h+5-2);\ + for(i=0; i<w; i++)\ + {\ + const int tmpB= tmp[-2*tmpStride];\ + const int tmpA= tmp[-1*tmpStride];\ + const int tmp0= tmp[0 *tmpStride];\ + const int tmp1= tmp[1 *tmpStride];\ + const int tmp2= tmp[2 *tmpStride];\ + const int tmp3= tmp[3 *tmpStride];\ + const int tmp4= tmp[4 *tmpStride];\ + OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ + OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ + dst++;\ + tmp++;\ + }\ +}\ +static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int h=4;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ + OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ + OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ + dst+=dstStride;\ + src+=srcStride;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int w=4;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<w; i++)\ + {\ + const int srcB= src[-2*srcStride];\ + const int srcA= src[-1*srcStride];\ + const int src0= src[0 *srcStride];\ + const int src1= src[1 *srcStride];\ + const int src2= src[2 *srcStride];\ + const int src3= src[3 *srcStride];\ + const int src4= src[4 *srcStride];\ + const int src5= src[5 *srcStride];\ + const int src6= src[6 *srcStride];\ + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ + OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ + OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ + dst++;\ + src++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + const int h=4;\ + const int w=4;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + src -= 2*srcStride;\ + for(i=0; i<h+5; i++)\ + {\ + tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ + tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ + tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ + tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ + tmp+=tmpStride;\ + src+=srcStride;\ + }\ + tmp -= tmpStride*(h+5-2);\ + for(i=0; i<w; i++)\ + {\ + const int tmpB= tmp[-2*tmpStride];\ + const int tmpA= tmp[-1*tmpStride];\ + const int tmp0= tmp[0 *tmpStride];\ + const int tmp1= tmp[1 *tmpStride];\ + const int tmp2= tmp[2 *tmpStride];\ + const int tmp3= tmp[3 *tmpStride];\ + const int tmp4= tmp[4 *tmpStride];\ + const int tmp5= tmp[5 *tmpStride];\ + const int tmp6= tmp[6 *tmpStride];\ + OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ + OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ + OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ + OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ + dst++;\ + tmp++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int h=8;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ + OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ + OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ + OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ + OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ + OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ + OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ + dst+=dstStride;\ + src+=srcStride;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int w=8;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<w; i++)\ + {\ + const int srcB= src[-2*srcStride];\ + const int srcA= src[-1*srcStride];\ + const int src0= src[0 *srcStride];\ + const int src1= src[1 *srcStride];\ + const int src2= src[2 *srcStride];\ + const int src3= src[3 *srcStride];\ + const int src4= src[4 *srcStride];\ + const int src5= src[5 *srcStride];\ + const int src6= src[6 *srcStride];\ + const int src7= src[7 *srcStride];\ + const int src8= src[8 *srcStride];\ + const int src9= src[9 *srcStride];\ + const int src10=src[10*srcStride];\ + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ + OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ + OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ + OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ + OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ + OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ + OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ + dst++;\ + src++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + const int h=8;\ + const int w=8;\ + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ + int i;\ + src -= 2*srcStride;\ + for(i=0; i<h+5; i++)\ + {\ + tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ + tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ + tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ + tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ + tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ + tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ + tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ + tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ + tmp+=tmpStride;\ + src+=srcStride;\ + }\ + tmp -= tmpStride*(h+5-2);\ + for(i=0; i<w; i++)\ + {\ + const int tmpB= tmp[-2*tmpStride];\ + const int tmpA= tmp[-1*tmpStride];\ + const int tmp0= tmp[0 *tmpStride];\ + const int tmp1= tmp[1 *tmpStride];\ + const int tmp2= tmp[2 *tmpStride];\ + const int tmp3= tmp[3 *tmpStride];\ + const int tmp4= tmp[4 *tmpStride];\ + const int tmp5= tmp[5 *tmpStride];\ + const int tmp6= tmp[6 *tmpStride];\ + const int tmp7= tmp[7 *tmpStride];\ + const int tmp8= tmp[8 *tmpStride];\ + const int tmp9= tmp[9 *tmpStride];\ + const int tmp10=tmp[10*tmpStride];\ + OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ + OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ + OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ + OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ + OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ + OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ + OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ + OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ + dst++;\ + tmp++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ + OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ + OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ +}\ + +#define H264_MC(OPNAME, SIZE) \ +static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t half[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t half[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ + int16_t tmp[SIZE*(SIZE+5)];\ + OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfV[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfV[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ + +#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) +//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) +#define op_put(a, b) a = cm[((b) + 16)>>5] +#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) +#define op2_put(a, b) a = cm[((b) + 512)>>10] + +H264_LOWPASS(put_ , op_put, op2_put) +H264_LOWPASS(avg_ , op_avg, op2_avg) +H264_MC(put_, 2) +H264_MC(put_, 4) +H264_MC(put_, 8) +H264_MC(put_, 16) +H264_MC(avg_, 4) +H264_MC(avg_, 8) +H264_MC(avg_, 16) + +#undef op_avg +#undef op_put +#undef op2_avg +#undef op2_put + +#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c +#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c +#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c +#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c + +void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { + put_pixels8_c(dst, src, stride, 8); +} +void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { + avg_pixels8_c(dst, src, stride, 8); +} +void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { + put_pixels16_c(dst, src, stride, 16); +} +void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { + avg_pixels16_c(dst, src, stride, 16); +} + +static void clear_block_c(DCTELEM *block) +{ + memset(block, 0, sizeof(DCTELEM)*64); +} + +/** + * memset(blocks, 0, sizeof(DCTELEM)*6*64) + */ +static void clear_blocks_c(DCTELEM *blocks) +{ + memset(blocks, 0, sizeof(DCTELEM)*6*64); +} |