diff options
author | Oskar Arvidsson <oskar@irock.se> | 2011-03-29 17:48:51 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-04-10 22:33:41 +0200 |
commit | 8dffcca579dcf179ea9f17026980198618276f70 (patch) | |
tree | 7f487aa2707ead2ec65d5d25539c6d8252a95e51 /libavcodec/dsputil.c | |
parent | 7bc8032b0768d06f18cac0fb0d16579aeced8b54 (diff) | |
download | ffmpeg-8dffcca579dcf179ea9f17026980198618276f70.tar.gz |
Move some functions in dsputil.c into a new file dsputil_internal.h.
The functions moved are used when decoding h264.
Preparatory patch for high bit depth h264 decoding support.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/dsputil.c')
-rw-r--r-- | libavcodec/dsputil.c | 1230 |
1 files changed, 2 insertions, 1228 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 036f9f7bf4..79a844bd19 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -43,6 +43,8 @@ uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; +#include "dsputil_internal.h" + // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size #define pb_7f (~0UL/255 * 0x7f) #define pb_80 (~0UL/255 * 0x80) @@ -296,110 +298,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) return s; } -/* draw the edges of width 'w' of an image of size width, height */ -//FIXME check that this is ok for mpeg4 interlaced -static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - /* left and right */ - ptr = buf; - for(i=0;i<height;i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width-1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for(i = 0; i < w; i++) - memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top - if (sides & EDGE_BOTTOM) - for (i = 0; i < w; i++) - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom -} - -/** - * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. - * @param buf destination buffer - * @param src source buffer - * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers - * @param block_w width of block - * @param block_h height of block - * @param src_x x coordinate of the top left sample of the block in the source buffer - * @param src_y y coordinate of the top left sample of the block in the source buffer - * @param w width of the source buffer - * @param h height of the source buffer - */ -void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, - int src_x, int src_y, int w, int h){ - int x, y; - int start_y, start_x, end_y, end_x; - - if(src_y>= h){ - src+= (h-1-src_y)*linesize; - src_y=h-1; - }else if(src_y<=-block_h){ - src+= (1-block_h-src_y)*linesize; - src_y=1-block_h; - } - if(src_x>= w){ - src+= (w-1-src_x); - src_x=w-1; - }else if(src_x<=-block_w){ - src+= (1-block_w-src_x); - src_x=1-block_w; - } - - start_y= FFMAX(0, -src_y); - start_x= FFMAX(0, -src_x); - end_y= FFMIN(block_h, h-src_y); - end_x= FFMIN(block_w, w-src_x); - assert(start_y < end_y && block_h); - assert(start_x < end_x && block_w); - - w = end_x - start_x; - src += start_y*linesize + start_x; - buf += start_x; - - //top - for(y=0; y<start_y; y++){ - memcpy(buf, src, w); - buf += linesize; - } - - // copy existing part - for(; y<end_y; y++){ - memcpy(buf, src, w); - src += linesize; - buf += linesize; - } - - //bottom - src -= linesize; - for(; y<block_h; y++){ - memcpy(buf, src, w); - buf += linesize; - } - - buf -= block_h * linesize + start_x; - while (block_h--){ - //left - for(x=0; x<start_x; x++){ - buf[x] = buf[start_x]; - } - - //right - for(x=end_x; x<block_w; x++){ - buf[x] = buf[end_x - 1]; - } - buf += linesize; - } -} - static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -591,36 +489,6 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels } } -static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) -{ - int i; - for(i=0;i<8;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels[4] += block[4]; - pixels[5] += block[5]; - pixels[6] += block[6]; - pixels[7] += block[7]; - pixels += line_size; - block += 8; - } -} - -static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) -{ - int i; - for(i=0;i<4;i++) { - pixels[0] += block[0]; - pixels[1] += block[1]; - pixels[2] += block[2]; - pixels[3] += block[3]; - pixels += line_size; - block += 4; - } -} - static int sum_abs_dctelem_c(DCTELEM *block) { int sum=0, i; @@ -665,539 +533,9 @@ static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align } } -#if 0 - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint64_t*)block), AV_RN64(pixels));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+line_size);\ - OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - for(i=0; i<h; i++){\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+line_size);\ - OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint64_t a= AV_RN64(pixels );\ - uint64_t b= AV_RN64(pixels+1);\ - l1= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL);\ - h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0202020202020202ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint64_t a= AV_RN64(pixels );\ - const uint64_t b= AV_RN64(pixels+1);\ - uint64_t l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - uint64_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint64_t a= AV_RN64(pixels );\ - uint64_t b= AV_RN64(pixels+1);\ - l1= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL);\ - h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN64(pixels );\ - b= AV_RN64(pixels+1);\ - l0= (a&0x0303030303030303ULL)\ - + (b&0x0303030303030303ULL)\ - + 0x0101010101010101ULL;\ - h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ - + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ - OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) - -#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) -#else // 64 bit variant - -#define PIXOP2(OPNAME, OP) \ -static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - int i;\ - for(i=0; i<h; i++){\ - OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ - OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels8_c(block, pixels, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN32(&src1[i*src_stride1 ]);\ - b= AV_RN32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN32(&src1[i*src_stride1 ]);\ - b= AV_RN32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN32(&src1[i*src_stride1 ]);\ - b= AV_RN32(&src2[i*src_stride2 ]);\ - OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a,b;\ - a= AV_RN16(&src1[i*src_stride1 ]);\ - b= AV_RN16(&src2[i*src_stride2 ]);\ - OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ - OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ - int src_stride1, int src_stride2, int h){\ - OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ - OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a, b, c, d, l0, l1, h0, h1;\ - a= AV_RN32(&src1[i*src_stride1]);\ - b= AV_RN32(&src2[i*src_stride2]);\ - c= AV_RN32(&src3[i*src_stride3]);\ - d= AV_RN32(&src4[i*src_stride4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - c= AV_RN32(&src3[i*src_stride3+4]);\ - d= AV_RN32(&src4[i*src_stride4+4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ - OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - int i;\ - for(i=0; i<h; i++){\ - uint32_t a, b, c, d, l0, l1, h0, h1;\ - a= AV_RN32(&src1[i*src_stride1]);\ - b= AV_RN32(&src2[i*src_stride2]);\ - c= AV_RN32(&src3[i*src_stride3]);\ - d= AV_RN32(&src4[i*src_stride4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - a= AV_RN32(&src1[i*src_stride1+4]);\ - b= AV_RN32(&src2[i*src_stride2+4]);\ - c= AV_RN32(&src3[i*src_stride3+4]);\ - d= AV_RN32(&src4[i*src_stride4+4]);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - l1= (c&0x03030303UL)\ - + (d&0x03030303UL);\ - h1= ((c&0xFCFCFCFCUL)>>2)\ - + ((d&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - }\ -}\ -static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ - OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ -}\ -static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ - int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ - OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ - OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ -}\ -\ -static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i, a0, b0, a1, b1;\ - a0= pixels[0];\ - b0= pixels[1] + 2;\ - a0 += b0;\ - b0 += pixels[2];\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - a1= pixels[0];\ - b1= pixels[1];\ - a1 += b1;\ - b1 += pixels[2];\ -\ - block[0]= (a1+a0)>>2; /* FIXME non put */\ - block[1]= (b1+b0)>>2;\ -\ - pixels+=line_size;\ - block +=line_size;\ -\ - a0= pixels[0];\ - b0= pixels[1] + 2;\ - a0 += b0;\ - b0 += pixels[2];\ -\ - block[0]= (a1+a0)>>2;\ - block[1]= (b1+b0)>>2;\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint32_t a= AV_RN32(pixels );\ - uint32_t b= AV_RN32(pixels+1);\ - l1= (a&0x03030303UL)\ - + (b&0x03030303UL);\ - h1= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ -}\ -\ -static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int j;\ - for(j=0; j<2; j++){\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint32_t a= AV_RN32(pixels );\ - uint32_t b= AV_RN32(pixels+1);\ - l1= (a&0x03030303UL)\ - + (b&0x03030303UL);\ - h1= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x02020202UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ - pixels+=4-line_size*(h+1);\ - block +=4-line_size*h;\ - }\ -}\ -\ -static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ -{\ - int j;\ - for(j=0; j<2; j++){\ - int i;\ - const uint32_t a= AV_RN32(pixels );\ - const uint32_t b= AV_RN32(pixels+1);\ - uint32_t l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - uint32_t l1,h1;\ -\ - pixels+=line_size;\ - for(i=0; i<h; i+=2){\ - uint32_t a= AV_RN32(pixels );\ - uint32_t b= AV_RN32(pixels+1);\ - l1= (a&0x03030303UL)\ - + (b&0x03030303UL);\ - h1= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - a= AV_RN32(pixels );\ - b= AV_RN32(pixels+1);\ - l0= (a&0x03030303UL)\ - + (b&0x03030303UL)\ - + 0x01010101UL;\ - h0= ((a&0xFCFCFCFCUL)>>2)\ - + ((b&0xFCFCFCFCUL)>>2);\ - OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ - pixels+=line_size;\ - block +=line_size;\ - }\ - pixels+=4-line_size*(h+1);\ - block +=4-line_size*h;\ - }\ -}\ -\ -CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ -av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ -CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ - -#define op_avg(a, b) a = rnd_avg32(a, b) -#endif -#define op_put(a, b) a = b - -PIXOP2(avg, op_avg) -PIXOP2(put, op_put) -#undef op_avg -#undef op_put - -#define put_no_rnd_pixels8_c put_pixels8_c -#define put_no_rnd_pixels16_c put_pixels16_c - #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); -} - -static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ - put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); -} - static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) { const int A=(16-x16)*(16-y16); @@ -1494,115 +832,6 @@ static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} #endif -#define H264_CHROMA_MC(OPNAME, OP)\ -static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - dst+= stride;\ - src+= stride;\ - }\ - }else{\ - const int E= B+C;\ - const int step= C ? stride : 1;\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + E*src[step+0]));\ - OP(dst[1], (A*src[1] + E*src[step+1]));\ - dst+= stride;\ - src+= stride;\ - }\ - }\ -}\ -\ -static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ - OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ - dst+= stride;\ - src+= stride;\ - }\ - }else{\ - const int E= B+C;\ - const int step= C ? stride : 1;\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + E*src[step+0]));\ - OP(dst[1], (A*src[1] + E*src[step+1]));\ - OP(dst[2], (A*src[2] + E*src[step+2]));\ - OP(dst[3], (A*src[3] + E*src[step+3]));\ - dst+= stride;\ - src+= stride;\ - }\ - }\ -}\ -\ -static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ - const int A=(8-x)*(8-y);\ - const int B=( x)*(8-y);\ - const int C=(8-x)*( y);\ - const int D=( x)*( y);\ - int i;\ - \ - assert(x<8 && y<8 && x>=0 && y>=0);\ -\ - if(D){\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ - OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ - OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ - OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ - OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ - OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ - OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ - OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ - dst+= stride;\ - src+= stride;\ - }\ - }else{\ - const int E= B+C;\ - const int step= C ? stride : 1;\ - for(i=0; i<h; i++){\ - OP(dst[0], (A*src[0] + E*src[step+0]));\ - OP(dst[1], (A*src[1] + E*src[step+1]));\ - OP(dst[2], (A*src[2] + E*src[step+2]));\ - OP(dst[3], (A*src[3] + E*src[step+3]));\ - OP(dst[4], (A*src[4] + E*src[step+4]));\ - OP(dst[5], (A*src[5] + E*src[step+5]));\ - OP(dst[6], (A*src[6] + E*src[step+6]));\ - OP(dst[7], (A*src[7] + E*src[step+7]));\ - dst+= stride;\ - src+= stride;\ - }\ - }\ -} - -#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) -#define op_put(a, b) a = (((b) + 32)>>6) - -H264_CHROMA_MC(put_ , op_put) -H264_CHROMA_MC(avg_ , op_avg) -#undef op_avg -#undef op_put - #define QPEL_MC(r, OPNAME, RND, OP) \ static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ @@ -2100,435 +1329,6 @@ QPEL_MC(0, avg_ , _ , op_avg) #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c -#if 1 -#define H264_LOWPASS(OPNAME, OP, OP2) \ -static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ - OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ - dst+=dstStride;\ - src+=srcStride;\ - }\ -}\ -\ -static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int w=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<w; i++)\ - {\ - const int srcB= src[-2*srcStride];\ - const int srcA= src[-1*srcStride];\ - const int src0= src[0 *srcStride];\ - const int src1= src[1 *srcStride];\ - const int src2= src[2 *srcStride];\ - const int src3= src[3 *srcStride];\ - const int src4= src[4 *srcStride];\ - OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ - OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ - dst++;\ - src++;\ - }\ -}\ -\ -static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - const int h=2;\ - const int w=2;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - src -= 2*srcStride;\ - for(i=0; i<h+5; i++)\ - {\ - tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ - tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ - tmp+=tmpStride;\ - src+=srcStride;\ - }\ - tmp -= tmpStride*(h+5-2);\ - for(i=0; i<w; i++)\ - {\ - const int tmpB= tmp[-2*tmpStride];\ - const int tmpA= tmp[-1*tmpStride];\ - const int tmp0= tmp[0 *tmpStride];\ - const int tmp1= tmp[1 *tmpStride];\ - const int tmp2= tmp[2 *tmpStride];\ - const int tmp3= tmp[3 *tmpStride];\ - const int tmp4= tmp[4 *tmpStride];\ - OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ - OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ - dst++;\ - tmp++;\ - }\ -}\ -static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=4;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ - OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ - OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ - OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ - dst+=dstStride;\ - src+=srcStride;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int w=4;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<w; i++)\ - {\ - const int srcB= src[-2*srcStride];\ - const int srcA= src[-1*srcStride];\ - const int src0= src[0 *srcStride];\ - const int src1= src[1 *srcStride];\ - const int src2= src[2 *srcStride];\ - const int src3= src[3 *srcStride];\ - const int src4= src[4 *srcStride];\ - const int src5= src[5 *srcStride];\ - const int src6= src[6 *srcStride];\ - OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ - OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ - OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ - OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ - dst++;\ - src++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - const int h=4;\ - const int w=4;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - src -= 2*srcStride;\ - for(i=0; i<h+5; i++)\ - {\ - tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ - tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ - tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ - tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ - tmp+=tmpStride;\ - src+=srcStride;\ - }\ - tmp -= tmpStride*(h+5-2);\ - for(i=0; i<w; i++)\ - {\ - const int tmpB= tmp[-2*tmpStride];\ - const int tmpA= tmp[-1*tmpStride];\ - const int tmp0= tmp[0 *tmpStride];\ - const int tmp1= tmp[1 *tmpStride];\ - const int tmp2= tmp[2 *tmpStride];\ - const int tmp3= tmp[3 *tmpStride];\ - const int tmp4= tmp[4 *tmpStride];\ - const int tmp5= tmp[5 *tmpStride];\ - const int tmp6= tmp[6 *tmpStride];\ - OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ - OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ - OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ - OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ - dst++;\ - tmp++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int h=8;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<h; i++)\ - {\ - OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ - OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ - OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ - OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ - OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ - OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ - OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ - OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ - dst+=dstStride;\ - src+=srcStride;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - const int w=8;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - for(i=0; i<w; i++)\ - {\ - const int srcB= src[-2*srcStride];\ - const int srcA= src[-1*srcStride];\ - const int src0= src[0 *srcStride];\ - const int src1= src[1 *srcStride];\ - const int src2= src[2 *srcStride];\ - const int src3= src[3 *srcStride];\ - const int src4= src[4 *srcStride];\ - const int src5= src[5 *srcStride];\ - const int src6= src[6 *srcStride];\ - const int src7= src[7 *srcStride];\ - const int src8= src[8 *srcStride];\ - const int src9= src[9 *srcStride];\ - const int src10=src[10*srcStride];\ - OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ - OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ - OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ - OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ - OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ - OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ - OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ - OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ - dst++;\ - src++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - const int h=8;\ - const int w=8;\ - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ - int i;\ - src -= 2*srcStride;\ - for(i=0; i<h+5; i++)\ - {\ - tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ - tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ - tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ - tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ - tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ - tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ - tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ - tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ - tmp+=tmpStride;\ - src+=srcStride;\ - }\ - tmp -= tmpStride*(h+5-2);\ - for(i=0; i<w; i++)\ - {\ - const int tmpB= tmp[-2*tmpStride];\ - const int tmpA= tmp[-1*tmpStride];\ - const int tmp0= tmp[0 *tmpStride];\ - const int tmp1= tmp[1 *tmpStride];\ - const int tmp2= tmp[2 *tmpStride];\ - const int tmp3= tmp[3 *tmpStride];\ - const int tmp4= tmp[4 *tmpStride];\ - const int tmp5= tmp[5 *tmpStride];\ - const int tmp6= tmp[6 *tmpStride];\ - const int tmp7= tmp[7 *tmpStride];\ - const int tmp8= tmp[8 *tmpStride];\ - const int tmp9= tmp[9 *tmpStride];\ - const int tmp10=tmp[10*tmpStride];\ - OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ - OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ - OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ - OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ - OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ - OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ - OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ - OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ - dst++;\ - tmp++;\ - }\ -}\ -\ -static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ -}\ -\ -static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ -}\ -\ -static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ - OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ - OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ -}\ - -#define H264_MC(OPNAME, SIZE) \ -static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ - OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t half[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t half[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t half[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ - copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ - int16_t tmp[SIZE*(SIZE+5)];\ - OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfH[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfV[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ -\ -static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ - uint8_t full[SIZE*(SIZE+5)];\ - uint8_t * const full_mid= full + SIZE*2;\ - int16_t tmp[SIZE*(SIZE+5)];\ - uint8_t halfV[SIZE*SIZE];\ - uint8_t halfHV[SIZE*SIZE];\ - copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ - put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ - put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ -}\ - -#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) -//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) -#define op_put(a, b) a = cm[((b) + 16)>>5] -#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) -#define op2_put(a, b) a = cm[((b) + 512)>>10] - -H264_LOWPASS(put_ , op_put, op2_put) -H264_LOWPASS(avg_ , op_avg, op2_avg) -H264_MC(put_, 2) -H264_MC(put_, 4) -H264_MC(put_, 8) -H264_MC(put_, 16) -H264_MC(avg_, 4) -H264_MC(avg_, 8) -H264_MC(avg_, 16) - -#undef op_avg -#undef op_put -#undef op2_avg -#undef op2_put -#endif - -#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c -#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c -#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c -#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c - static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i; @@ -2547,19 +1347,6 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int } } -void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { - put_pixels8_c(dst, src, stride, 8); -} -void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { - avg_pixels8_c(dst, src, stride, 8); -} -void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { - put_pixels16_c(dst, src, stride, 16); -} -void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { - avg_pixels16_c(dst, src, stride, 16); -} - #if CONFIG_RV40_DECODER static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ put_pixels16_xy2_c(dst, src, stride, 16); @@ -3117,19 +1904,6 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ } } -static void clear_block_c(DCTELEM *block) -{ - memset(block, 0, sizeof(DCTELEM)*64); -} - -/** - * memset(blocks, 0, sizeof(DCTELEM)*6*64) - */ -static void clear_blocks_c(DCTELEM *blocks) -{ - memset(blocks, 0, sizeof(DCTELEM)*6*64); -} - static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ long i; for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ |