diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2003-04-04 14:42:28 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2003-04-04 14:42:28 +0000 |
commit | 0da71265d84b587c7159cd82708ca60ad050dd4c (patch) | |
tree | fc97766fbbea1ab3af9df1aa9e4f37f8b0aca02d /libavcodec/dsputil.c | |
parent | 6aafe463e5d1483b95ad259334c45d2741c92fb2 (diff) | |
download | ffmpeg-0da71265d84b587c7159cd82708ca60ad050dd4c.tar.gz |
H264 decoder & demuxer
Originally committed as revision 1732 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dsputil.c')
-rw-r--r-- | libavcodec/dsputil.c | 502 |
1 files changed, 502 insertions, 0 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 5f4190f755..146a657dd6 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -466,6 +466,14 @@ CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, #else // 64 bit variant #define PIXOP2(OPNAME, OP) \ +static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i<h; i++){\ + OP(*((uint32_t*)(block )), LD32(pixels ));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ int i;\ for(i=0; i<h; i++){\ @@ -507,6 +515,17 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons }\ }\ \ +static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ + int src_stride1, int src_stride2, int h){\ + int i;\ + for(i=0; i<h; i++){\ + uint32_t a,b;\ + a= LD32(&src1[i*src_stride1 ]);\ + b= LD32(&src2[i*src_stride2 ]);\ + OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\ + }\ +}\ +\ static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ int src_stride1, int src_stride2, int h){\ OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ @@ -800,6 +819,113 @@ static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, oy += dyy; } } +#define H264_CHROMA_MC(OPNAME, OP)\ +static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + dst+= stride;\ + src+= stride;\ + }\ +}\ +\ +static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ + dst+= stride;\ + src+= stride;\ + }\ +}\ +\ +static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ + OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ + OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ + OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ + OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ + OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ + OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ + OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ + dst+= stride;\ + src+= stride;\ + }\ +} + +#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) +#define op_put(a, b) a = (((b) + 32)>>6) + +H264_CHROMA_MC(put_ , op_put) +H264_CHROMA_MC(avg_ , op_avg) +#undef op_avg +#undef op_put + +static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + ST32(dst+4 , LD32(src+4 )); + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + ST32(dst+4 , LD32(src+4 )); + ST32(dst+8 , LD32(src+8 )); + ST32(dst+12, LD32(src+12)); + dst+=dstStride; + src+=srcStride; + } +} static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) { @@ -1327,6 +1453,368 @@ QPEL_MC(0, avg_ , _ , op_avg) #undef op_put #undef op_put_no_rnd +#if 1 +#define H264_LOWPASS(OPNAME, OP, OP2) \ +static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int h=4;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ + OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ + OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ + dst+=dstStride;\ + src+=srcStride;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int w=4;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<w; i++)\ + {\ + const int srcB= src[-2*srcStride];\ + const int srcA= src[-1*srcStride];\ + const int src0= src[0 *srcStride];\ + const int src1= src[1 *srcStride];\ + const int src2= src[2 *srcStride];\ + const int src3= src[3 *srcStride];\ + const int src4= src[4 *srcStride];\ + const int src5= src[5 *srcStride];\ + const int src6= src[6 *srcStride];\ + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ + OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ + OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ + dst++;\ + src++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + const int h=4;\ + const int w=4;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + src -= 2*srcStride;\ + for(i=0; i<h+5; i++)\ + {\ + tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ + tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ + tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ + tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ + tmp+=tmpStride;\ + src+=srcStride;\ + }\ + tmp -= tmpStride*(h+5-2);\ + for(i=0; i<w; i++)\ + {\ + const int tmpB= tmp[-2*tmpStride];\ + const int tmpA= tmp[-1*tmpStride];\ + const int tmp0= tmp[0 *tmpStride];\ + const int tmp1= tmp[1 *tmpStride];\ + const int tmp2= tmp[2 *tmpStride];\ + const int tmp3= tmp[3 *tmpStride];\ + const int tmp4= tmp[4 *tmpStride];\ + const int tmp5= tmp[5 *tmpStride];\ + const int tmp6= tmp[6 *tmpStride];\ + OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ + OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ + OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ + OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ + dst++;\ + tmp++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int h=8;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<h; i++)\ + {\ + OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ + OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ + OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ + OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ + OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ + OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ + OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ + OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ + dst+=dstStride;\ + src+=srcStride;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int w=8;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i<w; i++)\ + {\ + const int srcB= src[-2*srcStride];\ + const int srcA= src[-1*srcStride];\ + const int src0= src[0 *srcStride];\ + const int src1= src[1 *srcStride];\ + const int src2= src[2 *srcStride];\ + const int src3= src[3 *srcStride];\ + const int src4= src[4 *srcStride];\ + const int src5= src[5 *srcStride];\ + const int src6= src[6 *srcStride];\ + const int src7= src[7 *srcStride];\ + const int src8= src[8 *srcStride];\ + const int src9= src[9 *srcStride];\ + const int src10=src[10*srcStride];\ + OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ + OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ + OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ + OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ + OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ + OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ + OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ + OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ + dst++;\ + src++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + const int h=8;\ + const int w=8;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + src -= 2*srcStride;\ + for(i=0; i<h+5; i++)\ + {\ + tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ + tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ + tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ + tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ + tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ + tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ + tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ + tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ + tmp+=tmpStride;\ + src+=srcStride;\ + }\ + tmp -= tmpStride*(h+5-2);\ + for(i=0; i<w; i++)\ + {\ + const int tmpB= tmp[-2*tmpStride];\ + const int tmpA= tmp[-1*tmpStride];\ + const int tmp0= tmp[0 *tmpStride];\ + const int tmp1= tmp[1 *tmpStride];\ + const int tmp2= tmp[2 *tmpStride];\ + const int tmp3= tmp[3 *tmpStride];\ + const int tmp4= tmp[4 *tmpStride];\ + const int tmp5= tmp[5 *tmpStride];\ + const int tmp6= tmp[6 *tmpStride];\ + const int tmp7= tmp[7 *tmpStride];\ + const int tmp8= tmp[8 *tmpStride];\ + const int tmp9= tmp[9 *tmpStride];\ + const int tmp10=tmp[10*tmpStride];\ + OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ + OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ + OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ + OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ + OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ + OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ + OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ + OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ + dst++;\ + tmp++;\ + }\ +}\ +\ +static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ + OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ + src += 8*srcStride;\ + tmp += 8*tmpStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ + OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ +}\ + +#define H264_MC(OPNAME, SIZE) \ +static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t half[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t half[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t half[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ + int16_t tmp[SIZE*(SIZE+5)];\ + OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfH[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfV[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ + uint8_t full[SIZE*(SIZE+5)];\ + uint8_t * const full_mid= full + SIZE*2;\ + int16_t tmp[SIZE*(SIZE+5)];\ + uint8_t halfV[SIZE*SIZE];\ + uint8_t halfHV[SIZE*SIZE];\ + copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ + put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ + put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ +}\ + +#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) +//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) +#define op_put(a, b) a = cm[((b) + 16)>>5] +#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) +#define op2_put(a, b) a = cm[((b) + 512)>>10] + +H264_LOWPASS(put_ , op_put, op2_put) +H264_LOWPASS(avg_ , op_avg, op2_avg) +H264_MC(put_, 4) +H264_MC(put_, 8) +H264_MC(put_, 16) +H264_MC(avg_, 4) +H264_MC(avg_, 8) +H264_MC(avg_, 16) + +#undef op_avg +#undef op_put +#undef op2_avg +#undef op2_put +#endif + static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ uint8_t *cm = cropTbl + MAX_NEG_CROP; int i; @@ -2107,7 +2595,21 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) dspfunc(avg_qpel, 1, 8); /* dspfunc(avg_no_rnd_qpel, 1, 8); */ + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(put_h264_qpel, 1, 8); + dspfunc(put_h264_qpel, 2, 4); + dspfunc(avg_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 1, 8); + dspfunc(avg_h264_qpel, 2, 4); + #undef dspfunc + c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; + c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; + c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; + c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; + c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; |